mcli-framework 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (186) hide show
  1. mcli/app/chat_cmd.py +42 -0
  2. mcli/app/commands_cmd.py +226 -0
  3. mcli/app/completion_cmd.py +216 -0
  4. mcli/app/completion_helpers.py +288 -0
  5. mcli/app/cron_test_cmd.py +697 -0
  6. mcli/app/logs_cmd.py +419 -0
  7. mcli/app/main.py +492 -0
  8. mcli/app/model/model.py +1060 -0
  9. mcli/app/model_cmd.py +227 -0
  10. mcli/app/redis_cmd.py +269 -0
  11. mcli/app/video/video.py +1114 -0
  12. mcli/app/visual_cmd.py +303 -0
  13. mcli/chat/chat.py +2409 -0
  14. mcli/chat/command_rag.py +514 -0
  15. mcli/chat/enhanced_chat.py +652 -0
  16. mcli/chat/system_controller.py +1010 -0
  17. mcli/chat/system_integration.py +1016 -0
  18. mcli/cli.py +25 -0
  19. mcli/config.toml +20 -0
  20. mcli/lib/api/api.py +586 -0
  21. mcli/lib/api/daemon_client.py +203 -0
  22. mcli/lib/api/daemon_client_local.py +44 -0
  23. mcli/lib/api/daemon_decorator.py +217 -0
  24. mcli/lib/api/mcli_decorators.py +1032 -0
  25. mcli/lib/auth/auth.py +85 -0
  26. mcli/lib/auth/aws_manager.py +85 -0
  27. mcli/lib/auth/azure_manager.py +91 -0
  28. mcli/lib/auth/credential_manager.py +192 -0
  29. mcli/lib/auth/gcp_manager.py +93 -0
  30. mcli/lib/auth/key_manager.py +117 -0
  31. mcli/lib/auth/mcli_manager.py +93 -0
  32. mcli/lib/auth/token_manager.py +75 -0
  33. mcli/lib/auth/token_util.py +1011 -0
  34. mcli/lib/config/config.py +47 -0
  35. mcli/lib/discovery/__init__.py +1 -0
  36. mcli/lib/discovery/command_discovery.py +274 -0
  37. mcli/lib/erd/erd.py +1345 -0
  38. mcli/lib/erd/generate_graph.py +453 -0
  39. mcli/lib/files/files.py +76 -0
  40. mcli/lib/fs/fs.py +109 -0
  41. mcli/lib/lib.py +29 -0
  42. mcli/lib/logger/logger.py +611 -0
  43. mcli/lib/performance/optimizer.py +409 -0
  44. mcli/lib/performance/rust_bridge.py +502 -0
  45. mcli/lib/performance/uvloop_config.py +154 -0
  46. mcli/lib/pickles/pickles.py +50 -0
  47. mcli/lib/search/cached_vectorizer.py +479 -0
  48. mcli/lib/services/data_pipeline.py +460 -0
  49. mcli/lib/services/lsh_client.py +441 -0
  50. mcli/lib/services/redis_service.py +387 -0
  51. mcli/lib/shell/shell.py +137 -0
  52. mcli/lib/toml/toml.py +33 -0
  53. mcli/lib/ui/styling.py +47 -0
  54. mcli/lib/ui/visual_effects.py +634 -0
  55. mcli/lib/watcher/watcher.py +185 -0
  56. mcli/ml/api/app.py +215 -0
  57. mcli/ml/api/middleware.py +224 -0
  58. mcli/ml/api/routers/admin_router.py +12 -0
  59. mcli/ml/api/routers/auth_router.py +244 -0
  60. mcli/ml/api/routers/backtest_router.py +12 -0
  61. mcli/ml/api/routers/data_router.py +12 -0
  62. mcli/ml/api/routers/model_router.py +302 -0
  63. mcli/ml/api/routers/monitoring_router.py +12 -0
  64. mcli/ml/api/routers/portfolio_router.py +12 -0
  65. mcli/ml/api/routers/prediction_router.py +267 -0
  66. mcli/ml/api/routers/trade_router.py +12 -0
  67. mcli/ml/api/routers/websocket_router.py +76 -0
  68. mcli/ml/api/schemas.py +64 -0
  69. mcli/ml/auth/auth_manager.py +425 -0
  70. mcli/ml/auth/models.py +154 -0
  71. mcli/ml/auth/permissions.py +302 -0
  72. mcli/ml/backtesting/backtest_engine.py +502 -0
  73. mcli/ml/backtesting/performance_metrics.py +393 -0
  74. mcli/ml/cache.py +400 -0
  75. mcli/ml/cli/main.py +398 -0
  76. mcli/ml/config/settings.py +394 -0
  77. mcli/ml/configs/dvc_config.py +230 -0
  78. mcli/ml/configs/mlflow_config.py +131 -0
  79. mcli/ml/configs/mlops_manager.py +293 -0
  80. mcli/ml/dashboard/app.py +532 -0
  81. mcli/ml/dashboard/app_integrated.py +738 -0
  82. mcli/ml/dashboard/app_supabase.py +560 -0
  83. mcli/ml/dashboard/app_training.py +615 -0
  84. mcli/ml/dashboard/cli.py +51 -0
  85. mcli/ml/data_ingestion/api_connectors.py +501 -0
  86. mcli/ml/data_ingestion/data_pipeline.py +567 -0
  87. mcli/ml/data_ingestion/stream_processor.py +512 -0
  88. mcli/ml/database/migrations/env.py +94 -0
  89. mcli/ml/database/models.py +667 -0
  90. mcli/ml/database/session.py +200 -0
  91. mcli/ml/experimentation/ab_testing.py +845 -0
  92. mcli/ml/features/ensemble_features.py +607 -0
  93. mcli/ml/features/political_features.py +676 -0
  94. mcli/ml/features/recommendation_engine.py +809 -0
  95. mcli/ml/features/stock_features.py +573 -0
  96. mcli/ml/features/test_feature_engineering.py +346 -0
  97. mcli/ml/logging.py +85 -0
  98. mcli/ml/mlops/data_versioning.py +518 -0
  99. mcli/ml/mlops/experiment_tracker.py +377 -0
  100. mcli/ml/mlops/model_serving.py +481 -0
  101. mcli/ml/mlops/pipeline_orchestrator.py +614 -0
  102. mcli/ml/models/base_models.py +324 -0
  103. mcli/ml/models/ensemble_models.py +675 -0
  104. mcli/ml/models/recommendation_models.py +474 -0
  105. mcli/ml/models/test_models.py +487 -0
  106. mcli/ml/monitoring/drift_detection.py +676 -0
  107. mcli/ml/monitoring/metrics.py +45 -0
  108. mcli/ml/optimization/portfolio_optimizer.py +834 -0
  109. mcli/ml/preprocessing/data_cleaners.py +451 -0
  110. mcli/ml/preprocessing/feature_extractors.py +491 -0
  111. mcli/ml/preprocessing/ml_pipeline.py +382 -0
  112. mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
  113. mcli/ml/preprocessing/test_preprocessing.py +294 -0
  114. mcli/ml/scripts/populate_sample_data.py +200 -0
  115. mcli/ml/tasks.py +400 -0
  116. mcli/ml/tests/test_integration.py +429 -0
  117. mcli/ml/tests/test_training_dashboard.py +387 -0
  118. mcli/public/oi/oi.py +15 -0
  119. mcli/public/public.py +4 -0
  120. mcli/self/self_cmd.py +1246 -0
  121. mcli/workflow/daemon/api_daemon.py +800 -0
  122. mcli/workflow/daemon/async_command_database.py +681 -0
  123. mcli/workflow/daemon/async_process_manager.py +591 -0
  124. mcli/workflow/daemon/client.py +530 -0
  125. mcli/workflow/daemon/commands.py +1196 -0
  126. mcli/workflow/daemon/daemon.py +905 -0
  127. mcli/workflow/daemon/daemon_api.py +59 -0
  128. mcli/workflow/daemon/enhanced_daemon.py +571 -0
  129. mcli/workflow/daemon/process_cli.py +244 -0
  130. mcli/workflow/daemon/process_manager.py +439 -0
  131. mcli/workflow/daemon/test_daemon.py +275 -0
  132. mcli/workflow/dashboard/dashboard_cmd.py +113 -0
  133. mcli/workflow/docker/docker.py +0 -0
  134. mcli/workflow/file/file.py +100 -0
  135. mcli/workflow/gcloud/config.toml +21 -0
  136. mcli/workflow/gcloud/gcloud.py +58 -0
  137. mcli/workflow/git_commit/ai_service.py +328 -0
  138. mcli/workflow/git_commit/commands.py +430 -0
  139. mcli/workflow/lsh_integration.py +355 -0
  140. mcli/workflow/model_service/client.py +594 -0
  141. mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
  142. mcli/workflow/model_service/lightweight_embedder.py +397 -0
  143. mcli/workflow/model_service/lightweight_model_server.py +714 -0
  144. mcli/workflow/model_service/lightweight_test.py +241 -0
  145. mcli/workflow/model_service/model_service.py +1955 -0
  146. mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
  147. mcli/workflow/model_service/pdf_processor.py +386 -0
  148. mcli/workflow/model_service/test_efficient_runner.py +234 -0
  149. mcli/workflow/model_service/test_example.py +315 -0
  150. mcli/workflow/model_service/test_integration.py +131 -0
  151. mcli/workflow/model_service/test_new_features.py +149 -0
  152. mcli/workflow/openai/openai.py +99 -0
  153. mcli/workflow/politician_trading/commands.py +1790 -0
  154. mcli/workflow/politician_trading/config.py +134 -0
  155. mcli/workflow/politician_trading/connectivity.py +490 -0
  156. mcli/workflow/politician_trading/data_sources.py +395 -0
  157. mcli/workflow/politician_trading/database.py +410 -0
  158. mcli/workflow/politician_trading/demo.py +248 -0
  159. mcli/workflow/politician_trading/models.py +165 -0
  160. mcli/workflow/politician_trading/monitoring.py +413 -0
  161. mcli/workflow/politician_trading/scrapers.py +966 -0
  162. mcli/workflow/politician_trading/scrapers_california.py +412 -0
  163. mcli/workflow/politician_trading/scrapers_eu.py +377 -0
  164. mcli/workflow/politician_trading/scrapers_uk.py +350 -0
  165. mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
  166. mcli/workflow/politician_trading/supabase_functions.py +354 -0
  167. mcli/workflow/politician_trading/workflow.py +852 -0
  168. mcli/workflow/registry/registry.py +180 -0
  169. mcli/workflow/repo/repo.py +223 -0
  170. mcli/workflow/scheduler/commands.py +493 -0
  171. mcli/workflow/scheduler/cron_parser.py +238 -0
  172. mcli/workflow/scheduler/job.py +182 -0
  173. mcli/workflow/scheduler/monitor.py +139 -0
  174. mcli/workflow/scheduler/persistence.py +324 -0
  175. mcli/workflow/scheduler/scheduler.py +679 -0
  176. mcli/workflow/sync/sync_cmd.py +437 -0
  177. mcli/workflow/sync/test_cmd.py +314 -0
  178. mcli/workflow/videos/videos.py +242 -0
  179. mcli/workflow/wakatime/wakatime.py +11 -0
  180. mcli/workflow/workflow.py +37 -0
  181. mcli_framework-7.0.0.dist-info/METADATA +479 -0
  182. mcli_framework-7.0.0.dist-info/RECORD +186 -0
  183. mcli_framework-7.0.0.dist-info/WHEEL +5 -0
  184. mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
  185. mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
  186. mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,394 @@
1
+ """Configuration management for ML system"""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Dict, Any, Optional, List
6
+ from pydantic import Field, field_validator
7
+ from pydantic_settings import BaseSettings, SettingsConfigDict
8
+ import logging
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class DatabaseSettings(BaseSettings):
14
+ """Database configuration"""
15
+ model_config = SettingsConfigDict(env_prefix="DB_")
16
+
17
+ host: str = Field(default="localhost", description="Database host")
18
+ port: int = Field(default=5432, description="Database port")
19
+ name: str = Field(default="ml_system.db", description="Database name")
20
+ user: str = Field(default="", description="Database user")
21
+ password: str = Field(default="", description="Database password")
22
+
23
+ # Connection pool settings
24
+ pool_size: int = Field(default=10, description="Connection pool size")
25
+ max_overflow: int = Field(default=20, description="Max connection overflow")
26
+ pool_timeout: int = Field(default=30, description="Pool timeout in seconds")
27
+
28
+ @property
29
+ def url(self) -> str:
30
+ """Get database URL"""
31
+ # Use SQLite for local development if no user is specified
32
+ if not self.user:
33
+ return f"sqlite:///{self.name}"
34
+ return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.name}"
35
+
36
+ @property
37
+ def async_url(self) -> str:
38
+ """Get async database URL"""
39
+ # Use aiosqlite for local development if no user is specified
40
+ if not self.user:
41
+ return f"sqlite+aiosqlite:///{self.name}"
42
+ return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.name}"
43
+
44
+
45
+ class RedisSettings(BaseSettings):
46
+ """Redis configuration"""
47
+ model_config = SettingsConfigDict(env_prefix="REDIS_")
48
+
49
+ host: str = Field(default="localhost", description="Redis host")
50
+ port: int = Field(default=6379, description="Redis port")
51
+ db: int = Field(default=0, description="Redis database number")
52
+ password: Optional[str] = Field(default=None, description="Redis password")
53
+
54
+ # Connection settings
55
+ max_connections: int = Field(default=50, description="Max connections")
56
+ socket_timeout: int = Field(default=5, description="Socket timeout")
57
+
58
+ @property
59
+ def url(self) -> str:
60
+ """Get Redis URL"""
61
+ auth_part = f":{self.password}@" if self.password else ""
62
+ return f"redis://{auth_part}{self.host}:{self.port}/{self.db}"
63
+
64
+
65
+ class MLflowSettings(BaseSettings):
66
+ """MLflow configuration"""
67
+ model_config = SettingsConfigDict(env_prefix="MLFLOW_")
68
+
69
+ tracking_uri: str = Field(default="http://localhost:5000", description="MLflow tracking server URI")
70
+ experiment_name: str = Field(default="politician_trading", description="Default experiment name")
71
+ artifact_root: Optional[str] = Field(default=None, description="Artifact storage root")
72
+
73
+ # Authentication
74
+ username: Optional[str] = Field(default=None, description="MLflow username")
75
+ password: Optional[str] = Field(default=None, description="MLflow password")
76
+
77
+
78
+ class ModelSettings(BaseSettings):
79
+ """Model configuration"""
80
+ model_config = SettingsConfigDict(env_prefix="MODEL_")
81
+
82
+ # Model paths
83
+ model_dir: Path = Field(default=Path("models"), description="Model storage directory")
84
+ cache_dir: Path = Field(default=Path("cache"), description="Model cache directory")
85
+
86
+ # Training settings
87
+ batch_size: int = Field(default=32, description="Training batch size")
88
+ learning_rate: float = Field(default=0.001, description="Learning rate")
89
+ epochs: int = Field(default=100, description="Training epochs")
90
+
91
+ # Hardware settings
92
+ device: str = Field(default="auto", description="Device to use (cpu, cuda, auto)")
93
+ num_workers: int = Field(default=4, description="Number of worker processes")
94
+
95
+ # Model serving
96
+ serving_host: str = Field(default="0.0.0.0", description="Model serving host")
97
+ serving_port: int = Field(default=8000, description="Model serving port")
98
+
99
+ @field_validator("model_dir", "cache_dir", mode="before")
100
+ @classmethod
101
+ def validate_paths(cls, v):
102
+ """Ensure paths are Path objects"""
103
+ return Path(v) if not isinstance(v, Path) else v
104
+
105
+
106
+ class DataSettings(BaseSettings):
107
+ """Data configuration"""
108
+ model_config = SettingsConfigDict(env_prefix="DATA_")
109
+
110
+ # Data paths
111
+ data_dir: Path = Field(default=Path("data"), description="Data storage directory")
112
+ raw_dir: Path = Field(default=Path("data/raw"), description="Raw data directory")
113
+ processed_dir: Path = Field(default=Path("data/processed"), description="Processed data directory")
114
+
115
+ # DVC settings
116
+ dvc_remote: str = Field(default="local", description="DVC remote storage")
117
+ dvc_cache_dir: Path = Field(default=Path(".dvc/cache"), description="DVC cache directory")
118
+
119
+ # Data processing
120
+ chunk_size: int = Field(default=10000, description="Data processing chunk size")
121
+ max_file_size: int = Field(default=100 * 1024 * 1024, description="Max file size in bytes")
122
+
123
+ @field_validator("data_dir", "raw_dir", "processed_dir", "dvc_cache_dir", mode="before")
124
+ @classmethod
125
+ def validate_paths(cls, v):
126
+ """Ensure paths are Path objects"""
127
+ return Path(v) if not isinstance(v, Path) else v
128
+
129
+
130
+ class APISettings(BaseSettings):
131
+ """API configuration"""
132
+ model_config = SettingsConfigDict(env_prefix="API_")
133
+
134
+ # Server settings
135
+ host: str = Field(default="0.0.0.0", description="API host")
136
+ port: int = Field(default=8000, description="API port")
137
+ workers: int = Field(default=1, description="Number of workers")
138
+
139
+ # Security
140
+ secret_key: str = Field(default="your-secret-key", description="Secret key for JWT")
141
+ algorithm: str = Field(default="HS256", description="JWT algorithm")
142
+ access_token_expire_minutes: int = Field(default=30, description="Token expiry in minutes")
143
+
144
+ # Rate limiting
145
+ rate_limit: int = Field(default=100, description="Requests per minute")
146
+
147
+ # API Keys for external services
148
+ alpha_vantage_key: Optional[str] = Field(default=None, description="Alpha Vantage API key")
149
+ polygon_key: Optional[str] = Field(default=None, description="Polygon.io API key")
150
+ quiver_key: Optional[str] = Field(default=None, description="QuiverQuant API key")
151
+
152
+
153
+ class MonitoringSettings(BaseSettings):
154
+ """Monitoring configuration"""
155
+ model_config = SettingsConfigDict(env_prefix="MONITORING_")
156
+
157
+ # Metrics
158
+ metrics_port: int = Field(default=9090, description="Prometheus metrics port")
159
+ enable_metrics: bool = Field(default=True, description="Enable metrics collection")
160
+
161
+ # Logging
162
+ log_level: str = Field(default="INFO", description="Logging level")
163
+ log_format: str = Field(default="structured", description="Log format (structured, plain)")
164
+
165
+ # Alerting
166
+ enable_alerts: bool = Field(default=True, description="Enable alerting")
167
+ alert_webhook_url: Optional[str] = Field(default=None, description="Webhook URL for alerts")
168
+
169
+ # Drift detection
170
+ drift_check_interval: int = Field(default=3600, description="Drift check interval in seconds")
171
+ drift_threshold: float = Field(default=0.05, description="Drift detection threshold")
172
+
173
+
174
+ class SecuritySettings(BaseSettings):
175
+ """Security configuration"""
176
+ model_config = SettingsConfigDict(env_prefix="SECURITY_")
177
+
178
+ # Authentication
179
+ enable_auth: bool = Field(default=True, description="Enable authentication")
180
+ admin_username: str = Field(default="admin", description="Admin username")
181
+ admin_password: str = Field(default="change_me", description="Admin password")
182
+
183
+ # HTTPS
184
+ ssl_cert_path: Optional[Path] = Field(default=None, description="SSL certificate path")
185
+ ssl_key_path: Optional[Path] = Field(default=None, description="SSL key path")
186
+
187
+ # CORS
188
+ cors_origins: List[str] = Field(default=["*"], description="CORS allowed origins")
189
+
190
+ @field_validator("ssl_cert_path", "ssl_key_path", mode="before")
191
+ @classmethod
192
+ def validate_ssl_paths(cls, v):
193
+ """Ensure SSL paths are Path objects if provided"""
194
+ return Path(v) if v and not isinstance(v, Path) else v
195
+
196
+
197
+ class Settings(BaseSettings):
198
+ """Main application settings"""
199
+ model_config = SettingsConfigDict(
200
+ env_file=".env",
201
+ env_file_encoding="utf-8",
202
+ case_sensitive=False,
203
+ extra="ignore"
204
+ )
205
+
206
+ # Environment
207
+ environment: str = Field(default="development", description="Environment (development, staging, production)")
208
+ debug: bool = Field(default=False, description="Debug mode")
209
+
210
+ # Component settings
211
+ database: DatabaseSettings = Field(default_factory=DatabaseSettings)
212
+ redis: RedisSettings = Field(default_factory=RedisSettings)
213
+ mlflow: MLflowSettings = Field(default_factory=MLflowSettings)
214
+ model: ModelSettings = Field(default_factory=ModelSettings)
215
+ data: DataSettings = Field(default_factory=DataSettings)
216
+ api: APISettings = Field(default_factory=APISettings)
217
+ monitoring: MonitoringSettings = Field(default_factory=MonitoringSettings)
218
+ security: SecuritySettings = Field(default_factory=SecuritySettings)
219
+
220
+ @field_validator("environment")
221
+ @classmethod
222
+ def validate_environment(cls, v):
223
+ """Validate environment value"""
224
+ valid_envs = ["development", "staging", "production"]
225
+ if v not in valid_envs:
226
+ raise ValueError(f"Environment must be one of {valid_envs}")
227
+ return v
228
+
229
+ def __init__(self, **kwargs):
230
+ super().__init__(**kwargs)
231
+ self._create_directories()
232
+
233
+ def _create_directories(self):
234
+ """Create necessary directories"""
235
+ directories = [
236
+ self.model.model_dir,
237
+ self.model.cache_dir,
238
+ self.data.data_dir,
239
+ self.data.raw_dir,
240
+ self.data.processed_dir,
241
+ ]
242
+
243
+ for directory in directories:
244
+ directory.mkdir(parents=True, exist_ok=True)
245
+ logger.debug(f"Ensured directory exists: {directory}")
246
+
247
+ @property
248
+ def is_production(self) -> bool:
249
+ """Check if running in production"""
250
+ return self.environment == "production"
251
+
252
+ @property
253
+ def is_development(self) -> bool:
254
+ """Check if running in development"""
255
+ return self.environment == "development"
256
+
257
+ def get_database_config(self) -> Dict[str, Any]:
258
+ """Get database configuration for SQLAlchemy"""
259
+ return {
260
+ "pool_size": self.database.pool_size,
261
+ "max_overflow": self.database.max_overflow,
262
+ "pool_timeout": self.database.pool_timeout,
263
+ "pool_pre_ping": True,
264
+ "echo": self.debug,
265
+ }
266
+
267
+ def get_redis_config(self) -> Dict[str, Any]:
268
+ """Get Redis configuration"""
269
+ return {
270
+ "host": self.redis.host,
271
+ "port": self.redis.port,
272
+ "db": self.redis.db,
273
+ "password": self.redis.password,
274
+ "max_connections": self.redis.max_connections,
275
+ "socket_timeout": self.redis.socket_timeout,
276
+ "decode_responses": True,
277
+ }
278
+
279
+
280
+ # Global settings instance
281
+ settings = Settings()
282
+
283
+
284
+ def get_settings() -> Settings:
285
+ """Get settings instance (for dependency injection)"""
286
+ return settings
287
+
288
+
289
+ def update_settings(**kwargs) -> Settings:
290
+ """Update settings with new values"""
291
+ global settings
292
+
293
+ # Create new settings instance with updated values
294
+ current_dict = settings.model_dump()
295
+ current_dict.update(kwargs)
296
+ settings = Settings(**current_dict)
297
+
298
+ return settings
299
+
300
+
301
+ # Environment-specific configurations
302
+ def get_development_config() -> Dict[str, Any]:
303
+ """Get development-specific configuration overrides"""
304
+ return {
305
+ "debug": True,
306
+ "database": {
307
+ "host": "localhost",
308
+ "name": "ml_system_dev",
309
+ },
310
+ "redis": {
311
+ "db": 1,
312
+ },
313
+ "mlflow": {
314
+ "tracking_uri": "http://localhost:5000",
315
+ },
316
+ "monitoring": {
317
+ "log_level": "DEBUG",
318
+ "enable_alerts": False,
319
+ },
320
+ "security": {
321
+ "enable_auth": False,
322
+ },
323
+ }
324
+
325
+
326
+ def get_production_config() -> Dict[str, Any]:
327
+ """Get production-specific configuration overrides"""
328
+ return {
329
+ "debug": False,
330
+ "monitoring": {
331
+ "log_level": "INFO",
332
+ "enable_alerts": True,
333
+ },
334
+ "security": {
335
+ "enable_auth": True,
336
+ "cors_origins": ["https://yourdomain.com"],
337
+ },
338
+ }
339
+
340
+
341
+ def get_testing_config() -> Dict[str, Any]:
342
+ """Get testing-specific configuration overrides"""
343
+ return {
344
+ "debug": True,
345
+ "database": {
346
+ "name": "ml_system_test",
347
+ },
348
+ "redis": {
349
+ "db": 2,
350
+ },
351
+ "monitoring": {
352
+ "enable_alerts": False,
353
+ "enable_metrics": False,
354
+ },
355
+ }
356
+
357
+
358
+ # Configuration factory
359
+ def create_settings(environment: str = "development") -> Settings:
360
+ """Create settings for specific environment"""
361
+ base_config = {}
362
+
363
+ if environment == "development":
364
+ base_config.update(get_development_config())
365
+ elif environment == "production":
366
+ base_config.update(get_production_config())
367
+ elif environment == "testing":
368
+ base_config.update(get_testing_config())
369
+
370
+ base_config["environment"] = environment
371
+ return Settings(**base_config)
372
+
373
+
374
+ # Example usage and validation
375
+ if __name__ == "__main__":
376
+ # Test settings loading
377
+ print("Loading settings...")
378
+
379
+ # Test different environments
380
+ for env in ["development", "production", "testing"]:
381
+ print(f"\n{env.upper()} Configuration:")
382
+ env_settings = create_settings(env)
383
+ print(f" Debug: {env_settings.debug}")
384
+ print(f" Database URL: {env_settings.database.url}")
385
+ print(f" Redis URL: {env_settings.redis.url}")
386
+ print(f" Model Dir: {env_settings.model.model_dir}")
387
+
388
+ # Test validation
389
+ try:
390
+ invalid_settings = Settings(environment="invalid")
391
+ except ValueError as e:
392
+ print(f"\nValidation working: {e}")
393
+
394
+ print("\nSettings validation complete!")
@@ -0,0 +1,230 @@
1
+ """DVC Configuration for Data Versioning and Pipeline Management"""
2
+
3
+ import os
4
+ import subprocess
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ import yaml
9
+
10
+
11
+ class DVCConfig:
12
+ """Configuration class for DVC data versioning and pipeline management"""
13
+
14
+ def __init__(self, project_root: Optional[Path] = None):
15
+ self.project_root = project_root or Path(__file__).parent.parent.parent.parent.parent
16
+ self.dvc_dir = self.project_root / ".dvc"
17
+ self.data_dir = self.project_root / "data"
18
+ self.models_dir = self.project_root / "models"
19
+
20
+ def setup_data_directories(self) -> None:
21
+ """Create and configure data directories for DVC tracking"""
22
+ directories = [
23
+ self.data_dir / "raw",
24
+ self.data_dir / "processed",
25
+ self.data_dir / "features",
26
+ self.models_dir / "pytorch",
27
+ self.models_dir / "sklearn",
28
+ self.models_dir / "ensemble",
29
+ ]
30
+
31
+ for directory in directories:
32
+ directory.mkdir(parents=True, exist_ok=True)
33
+ print(f"Created directory: {directory}")
34
+
35
+ def add_data_to_dvc(self, data_path: Path, message: Optional[str] = None) -> None:
36
+ """Add data file or directory to DVC tracking"""
37
+ try:
38
+ # Add to DVC
39
+ cmd = ["dvc", "add", str(data_path)]
40
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
41
+
42
+ if result.returncode != 0:
43
+ raise Exception(f"DVC add failed: {result.stderr}")
44
+
45
+ print(f"Added to DVC: {data_path}")
46
+
47
+ # Add .dvc file to git
48
+ dvc_file = data_path.with_suffix(data_path.suffix + ".dvc")
49
+ if dvc_file.exists():
50
+ git_cmd = ["git", "add", str(dvc_file)]
51
+ subprocess.run(git_cmd, cwd=self.project_root)
52
+ print(f"Added to git: {dvc_file}")
53
+
54
+ except Exception as e:
55
+ print(f"Error adding data to DVC: {e}")
56
+ raise
57
+
58
+ def create_pipeline_stage(
59
+ self,
60
+ stage_name: str,
61
+ command: str,
62
+ dependencies: List[str],
63
+ outputs: List[str],
64
+ parameters: Optional[Dict[str, Any]] = None,
65
+ metrics: Optional[List[str]] = None,
66
+ ) -> None:
67
+ """Create a DVC pipeline stage"""
68
+ try:
69
+ cmd = [
70
+ "dvc",
71
+ "stage",
72
+ "add",
73
+ "-n",
74
+ stage_name,
75
+ "-d",
76
+ *dependencies,
77
+ "-o",
78
+ *outputs,
79
+ command,
80
+ ]
81
+
82
+ if parameters:
83
+ for param_file in parameters:
84
+ cmd.extend(["-p", param_file])
85
+
86
+ if metrics:
87
+ for metric_file in metrics:
88
+ cmd.extend(["-M", metric_file])
89
+
90
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
91
+
92
+ if result.returncode != 0:
93
+ raise Exception(f"DVC stage creation failed: {result.stderr}")
94
+
95
+ print(f"Created DVC pipeline stage: {stage_name}")
96
+
97
+ except Exception as e:
98
+ print(f"Error creating pipeline stage: {e}")
99
+ raise
100
+
101
+ def run_pipeline(self, stage_name: Optional[str] = None) -> None:
102
+ """Run DVC pipeline or specific stage"""
103
+ try:
104
+ cmd = ["dvc", "repro"]
105
+ if stage_name:
106
+ cmd.append(stage_name)
107
+
108
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
109
+
110
+ if result.returncode != 0:
111
+ raise Exception(f"DVC pipeline run failed: {result.stderr}")
112
+
113
+ print(f"DVC pipeline completed successfully")
114
+ if result.stdout:
115
+ print(result.stdout)
116
+
117
+ except Exception as e:
118
+ print(f"Error running pipeline: {e}")
119
+ raise
120
+
121
+ def get_data_version(self, data_path: Path) -> Optional[str]:
122
+ """Get the current version hash of a data file"""
123
+ try:
124
+ dvc_file = data_path.with_suffix(data_path.suffix + ".dvc")
125
+ if not dvc_file.exists():
126
+ return None
127
+
128
+ with open(dvc_file, "r") as f:
129
+ dvc_data = yaml.safe_load(f)
130
+ return dvc_data.get("outs", [{}])[0].get("md5")
131
+
132
+ except Exception as e:
133
+ print(f"Error getting data version: {e}")
134
+ return None
135
+
136
+ def pull_data(self, path: Optional[str] = None) -> None:
137
+ """Pull data from DVC remote storage"""
138
+ try:
139
+ cmd = ["dvc", "pull"]
140
+ if path:
141
+ cmd.append(path)
142
+
143
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
144
+
145
+ if result.returncode != 0:
146
+ raise Exception(f"DVC pull failed: {result.stderr}")
147
+
148
+ print("DVC data pull completed successfully")
149
+
150
+ except Exception as e:
151
+ print(f"Error pulling data: {e}")
152
+ raise
153
+
154
+ def push_data(self, path: Optional[str] = None) -> None:
155
+ """Push data to DVC remote storage"""
156
+ try:
157
+ cmd = ["dvc", "push"]
158
+ if path:
159
+ cmd.append(path)
160
+
161
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
162
+
163
+ if result.returncode != 0:
164
+ print(f"DVC push warning: {result.stderr}")
165
+ # Don't raise exception for push failures (remote might not be configured)
166
+
167
+ print("DVC data push completed")
168
+
169
+ except Exception as e:
170
+ print(f"Note: DVC push failed (remote storage may not be configured): {e}")
171
+
172
+ def configure_remote_storage(
173
+ self, remote_name: str, storage_url: str, default: bool = True
174
+ ) -> None:
175
+ """Configure DVC remote storage"""
176
+ try:
177
+ # Add remote
178
+ cmd = ["dvc", "remote", "add", remote_name, storage_url]
179
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
180
+
181
+ if result.returncode != 0 and "already exists" not in result.stderr:
182
+ raise Exception(f"DVC remote add failed: {result.stderr}")
183
+
184
+ # Set as default if requested
185
+ if default:
186
+ cmd = ["dvc", "remote", "default", remote_name]
187
+ subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
188
+
189
+ print(f"Configured DVC remote: {remote_name}")
190
+
191
+ except Exception as e:
192
+ print(f"Error configuring remote storage: {e}")
193
+ raise
194
+
195
+ def get_pipeline_status(self) -> Dict[str, Any]:
196
+ """Get status of DVC pipeline"""
197
+ try:
198
+ cmd = ["dvc", "status"]
199
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
200
+
201
+ return {
202
+ "returncode": result.returncode,
203
+ "stdout": result.stdout,
204
+ "stderr": result.stderr,
205
+ }
206
+
207
+ except Exception as e:
208
+ print(f"Error getting pipeline status: {e}")
209
+ return {"error": str(e)}
210
+
211
+
212
+ # Global configuration instance
213
+ dvc_config = DVCConfig()
214
+
215
+
216
+ def get_dvc_config() -> DVCConfig:
217
+ """Get the global DVC configuration instance"""
218
+ return dvc_config
219
+
220
+
221
+ def setup_dvc() -> None:
222
+ """Setup DVC data directories and configuration"""
223
+ dvc_config.setup_data_directories()
224
+ print(f"DVC project root: {dvc_config.project_root}")
225
+ print(f"Data directory: {dvc_config.data_dir}")
226
+ print(f"Models directory: {dvc_config.models_dir}")
227
+
228
+
229
+ if __name__ == "__main__":
230
+ setup_dvc()