mcli-framework 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/chat_cmd.py +42 -0
- mcli/app/commands_cmd.py +226 -0
- mcli/app/completion_cmd.py +216 -0
- mcli/app/completion_helpers.py +288 -0
- mcli/app/cron_test_cmd.py +697 -0
- mcli/app/logs_cmd.py +419 -0
- mcli/app/main.py +492 -0
- mcli/app/model/model.py +1060 -0
- mcli/app/model_cmd.py +227 -0
- mcli/app/redis_cmd.py +269 -0
- mcli/app/video/video.py +1114 -0
- mcli/app/visual_cmd.py +303 -0
- mcli/chat/chat.py +2409 -0
- mcli/chat/command_rag.py +514 -0
- mcli/chat/enhanced_chat.py +652 -0
- mcli/chat/system_controller.py +1010 -0
- mcli/chat/system_integration.py +1016 -0
- mcli/cli.py +25 -0
- mcli/config.toml +20 -0
- mcli/lib/api/api.py +586 -0
- mcli/lib/api/daemon_client.py +203 -0
- mcli/lib/api/daemon_client_local.py +44 -0
- mcli/lib/api/daemon_decorator.py +217 -0
- mcli/lib/api/mcli_decorators.py +1032 -0
- mcli/lib/auth/auth.py +85 -0
- mcli/lib/auth/aws_manager.py +85 -0
- mcli/lib/auth/azure_manager.py +91 -0
- mcli/lib/auth/credential_manager.py +192 -0
- mcli/lib/auth/gcp_manager.py +93 -0
- mcli/lib/auth/key_manager.py +117 -0
- mcli/lib/auth/mcli_manager.py +93 -0
- mcli/lib/auth/token_manager.py +75 -0
- mcli/lib/auth/token_util.py +1011 -0
- mcli/lib/config/config.py +47 -0
- mcli/lib/discovery/__init__.py +1 -0
- mcli/lib/discovery/command_discovery.py +274 -0
- mcli/lib/erd/erd.py +1345 -0
- mcli/lib/erd/generate_graph.py +453 -0
- mcli/lib/files/files.py +76 -0
- mcli/lib/fs/fs.py +109 -0
- mcli/lib/lib.py +29 -0
- mcli/lib/logger/logger.py +611 -0
- mcli/lib/performance/optimizer.py +409 -0
- mcli/lib/performance/rust_bridge.py +502 -0
- mcli/lib/performance/uvloop_config.py +154 -0
- mcli/lib/pickles/pickles.py +50 -0
- mcli/lib/search/cached_vectorizer.py +479 -0
- mcli/lib/services/data_pipeline.py +460 -0
- mcli/lib/services/lsh_client.py +441 -0
- mcli/lib/services/redis_service.py +387 -0
- mcli/lib/shell/shell.py +137 -0
- mcli/lib/toml/toml.py +33 -0
- mcli/lib/ui/styling.py +47 -0
- mcli/lib/ui/visual_effects.py +634 -0
- mcli/lib/watcher/watcher.py +185 -0
- mcli/ml/api/app.py +215 -0
- mcli/ml/api/middleware.py +224 -0
- mcli/ml/api/routers/admin_router.py +12 -0
- mcli/ml/api/routers/auth_router.py +244 -0
- mcli/ml/api/routers/backtest_router.py +12 -0
- mcli/ml/api/routers/data_router.py +12 -0
- mcli/ml/api/routers/model_router.py +302 -0
- mcli/ml/api/routers/monitoring_router.py +12 -0
- mcli/ml/api/routers/portfolio_router.py +12 -0
- mcli/ml/api/routers/prediction_router.py +267 -0
- mcli/ml/api/routers/trade_router.py +12 -0
- mcli/ml/api/routers/websocket_router.py +76 -0
- mcli/ml/api/schemas.py +64 -0
- mcli/ml/auth/auth_manager.py +425 -0
- mcli/ml/auth/models.py +154 -0
- mcli/ml/auth/permissions.py +302 -0
- mcli/ml/backtesting/backtest_engine.py +502 -0
- mcli/ml/backtesting/performance_metrics.py +393 -0
- mcli/ml/cache.py +400 -0
- mcli/ml/cli/main.py +398 -0
- mcli/ml/config/settings.py +394 -0
- mcli/ml/configs/dvc_config.py +230 -0
- mcli/ml/configs/mlflow_config.py +131 -0
- mcli/ml/configs/mlops_manager.py +293 -0
- mcli/ml/dashboard/app.py +532 -0
- mcli/ml/dashboard/app_integrated.py +738 -0
- mcli/ml/dashboard/app_supabase.py +560 -0
- mcli/ml/dashboard/app_training.py +615 -0
- mcli/ml/dashboard/cli.py +51 -0
- mcli/ml/data_ingestion/api_connectors.py +501 -0
- mcli/ml/data_ingestion/data_pipeline.py +567 -0
- mcli/ml/data_ingestion/stream_processor.py +512 -0
- mcli/ml/database/migrations/env.py +94 -0
- mcli/ml/database/models.py +667 -0
- mcli/ml/database/session.py +200 -0
- mcli/ml/experimentation/ab_testing.py +845 -0
- mcli/ml/features/ensemble_features.py +607 -0
- mcli/ml/features/political_features.py +676 -0
- mcli/ml/features/recommendation_engine.py +809 -0
- mcli/ml/features/stock_features.py +573 -0
- mcli/ml/features/test_feature_engineering.py +346 -0
- mcli/ml/logging.py +85 -0
- mcli/ml/mlops/data_versioning.py +518 -0
- mcli/ml/mlops/experiment_tracker.py +377 -0
- mcli/ml/mlops/model_serving.py +481 -0
- mcli/ml/mlops/pipeline_orchestrator.py +614 -0
- mcli/ml/models/base_models.py +324 -0
- mcli/ml/models/ensemble_models.py +675 -0
- mcli/ml/models/recommendation_models.py +474 -0
- mcli/ml/models/test_models.py +487 -0
- mcli/ml/monitoring/drift_detection.py +676 -0
- mcli/ml/monitoring/metrics.py +45 -0
- mcli/ml/optimization/portfolio_optimizer.py +834 -0
- mcli/ml/preprocessing/data_cleaners.py +451 -0
- mcli/ml/preprocessing/feature_extractors.py +491 -0
- mcli/ml/preprocessing/ml_pipeline.py +382 -0
- mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
- mcli/ml/preprocessing/test_preprocessing.py +294 -0
- mcli/ml/scripts/populate_sample_data.py +200 -0
- mcli/ml/tasks.py +400 -0
- mcli/ml/tests/test_integration.py +429 -0
- mcli/ml/tests/test_training_dashboard.py +387 -0
- mcli/public/oi/oi.py +15 -0
- mcli/public/public.py +4 -0
- mcli/self/self_cmd.py +1246 -0
- mcli/workflow/daemon/api_daemon.py +800 -0
- mcli/workflow/daemon/async_command_database.py +681 -0
- mcli/workflow/daemon/async_process_manager.py +591 -0
- mcli/workflow/daemon/client.py +530 -0
- mcli/workflow/daemon/commands.py +1196 -0
- mcli/workflow/daemon/daemon.py +905 -0
- mcli/workflow/daemon/daemon_api.py +59 -0
- mcli/workflow/daemon/enhanced_daemon.py +571 -0
- mcli/workflow/daemon/process_cli.py +244 -0
- mcli/workflow/daemon/process_manager.py +439 -0
- mcli/workflow/daemon/test_daemon.py +275 -0
- mcli/workflow/dashboard/dashboard_cmd.py +113 -0
- mcli/workflow/docker/docker.py +0 -0
- mcli/workflow/file/file.py +100 -0
- mcli/workflow/gcloud/config.toml +21 -0
- mcli/workflow/gcloud/gcloud.py +58 -0
- mcli/workflow/git_commit/ai_service.py +328 -0
- mcli/workflow/git_commit/commands.py +430 -0
- mcli/workflow/lsh_integration.py +355 -0
- mcli/workflow/model_service/client.py +594 -0
- mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
- mcli/workflow/model_service/lightweight_embedder.py +397 -0
- mcli/workflow/model_service/lightweight_model_server.py +714 -0
- mcli/workflow/model_service/lightweight_test.py +241 -0
- mcli/workflow/model_service/model_service.py +1955 -0
- mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
- mcli/workflow/model_service/pdf_processor.py +386 -0
- mcli/workflow/model_service/test_efficient_runner.py +234 -0
- mcli/workflow/model_service/test_example.py +315 -0
- mcli/workflow/model_service/test_integration.py +131 -0
- mcli/workflow/model_service/test_new_features.py +149 -0
- mcli/workflow/openai/openai.py +99 -0
- mcli/workflow/politician_trading/commands.py +1790 -0
- mcli/workflow/politician_trading/config.py +134 -0
- mcli/workflow/politician_trading/connectivity.py +490 -0
- mcli/workflow/politician_trading/data_sources.py +395 -0
- mcli/workflow/politician_trading/database.py +410 -0
- mcli/workflow/politician_trading/demo.py +248 -0
- mcli/workflow/politician_trading/models.py +165 -0
- mcli/workflow/politician_trading/monitoring.py +413 -0
- mcli/workflow/politician_trading/scrapers.py +966 -0
- mcli/workflow/politician_trading/scrapers_california.py +412 -0
- mcli/workflow/politician_trading/scrapers_eu.py +377 -0
- mcli/workflow/politician_trading/scrapers_uk.py +350 -0
- mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
- mcli/workflow/politician_trading/supabase_functions.py +354 -0
- mcli/workflow/politician_trading/workflow.py +852 -0
- mcli/workflow/registry/registry.py +180 -0
- mcli/workflow/repo/repo.py +223 -0
- mcli/workflow/scheduler/commands.py +493 -0
- mcli/workflow/scheduler/cron_parser.py +238 -0
- mcli/workflow/scheduler/job.py +182 -0
- mcli/workflow/scheduler/monitor.py +139 -0
- mcli/workflow/scheduler/persistence.py +324 -0
- mcli/workflow/scheduler/scheduler.py +679 -0
- mcli/workflow/sync/sync_cmd.py +437 -0
- mcli/workflow/sync/test_cmd.py +314 -0
- mcli/workflow/videos/videos.py +242 -0
- mcli/workflow/wakatime/wakatime.py +11 -0
- mcli/workflow/workflow.py +37 -0
- mcli_framework-7.0.0.dist-info/METADATA +479 -0
- mcli_framework-7.0.0.dist-info/RECORD +186 -0
- mcli_framework-7.0.0.dist-info/WHEEL +5 -0
- mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
- mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
- mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
"""Configuration management for ML system"""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, Any, Optional, List
|
|
6
|
+
from pydantic import Field, field_validator
|
|
7
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DatabaseSettings(BaseSettings):
|
|
14
|
+
"""Database configuration"""
|
|
15
|
+
model_config = SettingsConfigDict(env_prefix="DB_")
|
|
16
|
+
|
|
17
|
+
host: str = Field(default="localhost", description="Database host")
|
|
18
|
+
port: int = Field(default=5432, description="Database port")
|
|
19
|
+
name: str = Field(default="ml_system.db", description="Database name")
|
|
20
|
+
user: str = Field(default="", description="Database user")
|
|
21
|
+
password: str = Field(default="", description="Database password")
|
|
22
|
+
|
|
23
|
+
# Connection pool settings
|
|
24
|
+
pool_size: int = Field(default=10, description="Connection pool size")
|
|
25
|
+
max_overflow: int = Field(default=20, description="Max connection overflow")
|
|
26
|
+
pool_timeout: int = Field(default=30, description="Pool timeout in seconds")
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def url(self) -> str:
|
|
30
|
+
"""Get database URL"""
|
|
31
|
+
# Use SQLite for local development if no user is specified
|
|
32
|
+
if not self.user:
|
|
33
|
+
return f"sqlite:///{self.name}"
|
|
34
|
+
return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.name}"
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def async_url(self) -> str:
|
|
38
|
+
"""Get async database URL"""
|
|
39
|
+
# Use aiosqlite for local development if no user is specified
|
|
40
|
+
if not self.user:
|
|
41
|
+
return f"sqlite+aiosqlite:///{self.name}"
|
|
42
|
+
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.name}"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class RedisSettings(BaseSettings):
|
|
46
|
+
"""Redis configuration"""
|
|
47
|
+
model_config = SettingsConfigDict(env_prefix="REDIS_")
|
|
48
|
+
|
|
49
|
+
host: str = Field(default="localhost", description="Redis host")
|
|
50
|
+
port: int = Field(default=6379, description="Redis port")
|
|
51
|
+
db: int = Field(default=0, description="Redis database number")
|
|
52
|
+
password: Optional[str] = Field(default=None, description="Redis password")
|
|
53
|
+
|
|
54
|
+
# Connection settings
|
|
55
|
+
max_connections: int = Field(default=50, description="Max connections")
|
|
56
|
+
socket_timeout: int = Field(default=5, description="Socket timeout")
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def url(self) -> str:
|
|
60
|
+
"""Get Redis URL"""
|
|
61
|
+
auth_part = f":{self.password}@" if self.password else ""
|
|
62
|
+
return f"redis://{auth_part}{self.host}:{self.port}/{self.db}"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class MLflowSettings(BaseSettings):
|
|
66
|
+
"""MLflow configuration"""
|
|
67
|
+
model_config = SettingsConfigDict(env_prefix="MLFLOW_")
|
|
68
|
+
|
|
69
|
+
tracking_uri: str = Field(default="http://localhost:5000", description="MLflow tracking server URI")
|
|
70
|
+
experiment_name: str = Field(default="politician_trading", description="Default experiment name")
|
|
71
|
+
artifact_root: Optional[str] = Field(default=None, description="Artifact storage root")
|
|
72
|
+
|
|
73
|
+
# Authentication
|
|
74
|
+
username: Optional[str] = Field(default=None, description="MLflow username")
|
|
75
|
+
password: Optional[str] = Field(default=None, description="MLflow password")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ModelSettings(BaseSettings):
|
|
79
|
+
"""Model configuration"""
|
|
80
|
+
model_config = SettingsConfigDict(env_prefix="MODEL_")
|
|
81
|
+
|
|
82
|
+
# Model paths
|
|
83
|
+
model_dir: Path = Field(default=Path("models"), description="Model storage directory")
|
|
84
|
+
cache_dir: Path = Field(default=Path("cache"), description="Model cache directory")
|
|
85
|
+
|
|
86
|
+
# Training settings
|
|
87
|
+
batch_size: int = Field(default=32, description="Training batch size")
|
|
88
|
+
learning_rate: float = Field(default=0.001, description="Learning rate")
|
|
89
|
+
epochs: int = Field(default=100, description="Training epochs")
|
|
90
|
+
|
|
91
|
+
# Hardware settings
|
|
92
|
+
device: str = Field(default="auto", description="Device to use (cpu, cuda, auto)")
|
|
93
|
+
num_workers: int = Field(default=4, description="Number of worker processes")
|
|
94
|
+
|
|
95
|
+
# Model serving
|
|
96
|
+
serving_host: str = Field(default="0.0.0.0", description="Model serving host")
|
|
97
|
+
serving_port: int = Field(default=8000, description="Model serving port")
|
|
98
|
+
|
|
99
|
+
@field_validator("model_dir", "cache_dir", mode="before")
|
|
100
|
+
@classmethod
|
|
101
|
+
def validate_paths(cls, v):
|
|
102
|
+
"""Ensure paths are Path objects"""
|
|
103
|
+
return Path(v) if not isinstance(v, Path) else v
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class DataSettings(BaseSettings):
|
|
107
|
+
"""Data configuration"""
|
|
108
|
+
model_config = SettingsConfigDict(env_prefix="DATA_")
|
|
109
|
+
|
|
110
|
+
# Data paths
|
|
111
|
+
data_dir: Path = Field(default=Path("data"), description="Data storage directory")
|
|
112
|
+
raw_dir: Path = Field(default=Path("data/raw"), description="Raw data directory")
|
|
113
|
+
processed_dir: Path = Field(default=Path("data/processed"), description="Processed data directory")
|
|
114
|
+
|
|
115
|
+
# DVC settings
|
|
116
|
+
dvc_remote: str = Field(default="local", description="DVC remote storage")
|
|
117
|
+
dvc_cache_dir: Path = Field(default=Path(".dvc/cache"), description="DVC cache directory")
|
|
118
|
+
|
|
119
|
+
# Data processing
|
|
120
|
+
chunk_size: int = Field(default=10000, description="Data processing chunk size")
|
|
121
|
+
max_file_size: int = Field(default=100 * 1024 * 1024, description="Max file size in bytes")
|
|
122
|
+
|
|
123
|
+
@field_validator("data_dir", "raw_dir", "processed_dir", "dvc_cache_dir", mode="before")
|
|
124
|
+
@classmethod
|
|
125
|
+
def validate_paths(cls, v):
|
|
126
|
+
"""Ensure paths are Path objects"""
|
|
127
|
+
return Path(v) if not isinstance(v, Path) else v
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class APISettings(BaseSettings):
|
|
131
|
+
"""API configuration"""
|
|
132
|
+
model_config = SettingsConfigDict(env_prefix="API_")
|
|
133
|
+
|
|
134
|
+
# Server settings
|
|
135
|
+
host: str = Field(default="0.0.0.0", description="API host")
|
|
136
|
+
port: int = Field(default=8000, description="API port")
|
|
137
|
+
workers: int = Field(default=1, description="Number of workers")
|
|
138
|
+
|
|
139
|
+
# Security
|
|
140
|
+
secret_key: str = Field(default="your-secret-key", description="Secret key for JWT")
|
|
141
|
+
algorithm: str = Field(default="HS256", description="JWT algorithm")
|
|
142
|
+
access_token_expire_minutes: int = Field(default=30, description="Token expiry in minutes")
|
|
143
|
+
|
|
144
|
+
# Rate limiting
|
|
145
|
+
rate_limit: int = Field(default=100, description="Requests per minute")
|
|
146
|
+
|
|
147
|
+
# API Keys for external services
|
|
148
|
+
alpha_vantage_key: Optional[str] = Field(default=None, description="Alpha Vantage API key")
|
|
149
|
+
polygon_key: Optional[str] = Field(default=None, description="Polygon.io API key")
|
|
150
|
+
quiver_key: Optional[str] = Field(default=None, description="QuiverQuant API key")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class MonitoringSettings(BaseSettings):
|
|
154
|
+
"""Monitoring configuration"""
|
|
155
|
+
model_config = SettingsConfigDict(env_prefix="MONITORING_")
|
|
156
|
+
|
|
157
|
+
# Metrics
|
|
158
|
+
metrics_port: int = Field(default=9090, description="Prometheus metrics port")
|
|
159
|
+
enable_metrics: bool = Field(default=True, description="Enable metrics collection")
|
|
160
|
+
|
|
161
|
+
# Logging
|
|
162
|
+
log_level: str = Field(default="INFO", description="Logging level")
|
|
163
|
+
log_format: str = Field(default="structured", description="Log format (structured, plain)")
|
|
164
|
+
|
|
165
|
+
# Alerting
|
|
166
|
+
enable_alerts: bool = Field(default=True, description="Enable alerting")
|
|
167
|
+
alert_webhook_url: Optional[str] = Field(default=None, description="Webhook URL for alerts")
|
|
168
|
+
|
|
169
|
+
# Drift detection
|
|
170
|
+
drift_check_interval: int = Field(default=3600, description="Drift check interval in seconds")
|
|
171
|
+
drift_threshold: float = Field(default=0.05, description="Drift detection threshold")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class SecuritySettings(BaseSettings):
|
|
175
|
+
"""Security configuration"""
|
|
176
|
+
model_config = SettingsConfigDict(env_prefix="SECURITY_")
|
|
177
|
+
|
|
178
|
+
# Authentication
|
|
179
|
+
enable_auth: bool = Field(default=True, description="Enable authentication")
|
|
180
|
+
admin_username: str = Field(default="admin", description="Admin username")
|
|
181
|
+
admin_password: str = Field(default="change_me", description="Admin password")
|
|
182
|
+
|
|
183
|
+
# HTTPS
|
|
184
|
+
ssl_cert_path: Optional[Path] = Field(default=None, description="SSL certificate path")
|
|
185
|
+
ssl_key_path: Optional[Path] = Field(default=None, description="SSL key path")
|
|
186
|
+
|
|
187
|
+
# CORS
|
|
188
|
+
cors_origins: List[str] = Field(default=["*"], description="CORS allowed origins")
|
|
189
|
+
|
|
190
|
+
@field_validator("ssl_cert_path", "ssl_key_path", mode="before")
|
|
191
|
+
@classmethod
|
|
192
|
+
def validate_ssl_paths(cls, v):
|
|
193
|
+
"""Ensure SSL paths are Path objects if provided"""
|
|
194
|
+
return Path(v) if v and not isinstance(v, Path) else v
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class Settings(BaseSettings):
|
|
198
|
+
"""Main application settings"""
|
|
199
|
+
model_config = SettingsConfigDict(
|
|
200
|
+
env_file=".env",
|
|
201
|
+
env_file_encoding="utf-8",
|
|
202
|
+
case_sensitive=False,
|
|
203
|
+
extra="ignore"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Environment
|
|
207
|
+
environment: str = Field(default="development", description="Environment (development, staging, production)")
|
|
208
|
+
debug: bool = Field(default=False, description="Debug mode")
|
|
209
|
+
|
|
210
|
+
# Component settings
|
|
211
|
+
database: DatabaseSettings = Field(default_factory=DatabaseSettings)
|
|
212
|
+
redis: RedisSettings = Field(default_factory=RedisSettings)
|
|
213
|
+
mlflow: MLflowSettings = Field(default_factory=MLflowSettings)
|
|
214
|
+
model: ModelSettings = Field(default_factory=ModelSettings)
|
|
215
|
+
data: DataSettings = Field(default_factory=DataSettings)
|
|
216
|
+
api: APISettings = Field(default_factory=APISettings)
|
|
217
|
+
monitoring: MonitoringSettings = Field(default_factory=MonitoringSettings)
|
|
218
|
+
security: SecuritySettings = Field(default_factory=SecuritySettings)
|
|
219
|
+
|
|
220
|
+
@field_validator("environment")
|
|
221
|
+
@classmethod
|
|
222
|
+
def validate_environment(cls, v):
|
|
223
|
+
"""Validate environment value"""
|
|
224
|
+
valid_envs = ["development", "staging", "production"]
|
|
225
|
+
if v not in valid_envs:
|
|
226
|
+
raise ValueError(f"Environment must be one of {valid_envs}")
|
|
227
|
+
return v
|
|
228
|
+
|
|
229
|
+
def __init__(self, **kwargs):
|
|
230
|
+
super().__init__(**kwargs)
|
|
231
|
+
self._create_directories()
|
|
232
|
+
|
|
233
|
+
def _create_directories(self):
|
|
234
|
+
"""Create necessary directories"""
|
|
235
|
+
directories = [
|
|
236
|
+
self.model.model_dir,
|
|
237
|
+
self.model.cache_dir,
|
|
238
|
+
self.data.data_dir,
|
|
239
|
+
self.data.raw_dir,
|
|
240
|
+
self.data.processed_dir,
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
for directory in directories:
|
|
244
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
245
|
+
logger.debug(f"Ensured directory exists: {directory}")
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def is_production(self) -> bool:
|
|
249
|
+
"""Check if running in production"""
|
|
250
|
+
return self.environment == "production"
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def is_development(self) -> bool:
|
|
254
|
+
"""Check if running in development"""
|
|
255
|
+
return self.environment == "development"
|
|
256
|
+
|
|
257
|
+
def get_database_config(self) -> Dict[str, Any]:
|
|
258
|
+
"""Get database configuration for SQLAlchemy"""
|
|
259
|
+
return {
|
|
260
|
+
"pool_size": self.database.pool_size,
|
|
261
|
+
"max_overflow": self.database.max_overflow,
|
|
262
|
+
"pool_timeout": self.database.pool_timeout,
|
|
263
|
+
"pool_pre_ping": True,
|
|
264
|
+
"echo": self.debug,
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
def get_redis_config(self) -> Dict[str, Any]:
|
|
268
|
+
"""Get Redis configuration"""
|
|
269
|
+
return {
|
|
270
|
+
"host": self.redis.host,
|
|
271
|
+
"port": self.redis.port,
|
|
272
|
+
"db": self.redis.db,
|
|
273
|
+
"password": self.redis.password,
|
|
274
|
+
"max_connections": self.redis.max_connections,
|
|
275
|
+
"socket_timeout": self.redis.socket_timeout,
|
|
276
|
+
"decode_responses": True,
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# Global settings instance
|
|
281
|
+
settings = Settings()
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def get_settings() -> Settings:
|
|
285
|
+
"""Get settings instance (for dependency injection)"""
|
|
286
|
+
return settings
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def update_settings(**kwargs) -> Settings:
|
|
290
|
+
"""Update settings with new values"""
|
|
291
|
+
global settings
|
|
292
|
+
|
|
293
|
+
# Create new settings instance with updated values
|
|
294
|
+
current_dict = settings.model_dump()
|
|
295
|
+
current_dict.update(kwargs)
|
|
296
|
+
settings = Settings(**current_dict)
|
|
297
|
+
|
|
298
|
+
return settings
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
# Environment-specific configurations
|
|
302
|
+
def get_development_config() -> Dict[str, Any]:
|
|
303
|
+
"""Get development-specific configuration overrides"""
|
|
304
|
+
return {
|
|
305
|
+
"debug": True,
|
|
306
|
+
"database": {
|
|
307
|
+
"host": "localhost",
|
|
308
|
+
"name": "ml_system_dev",
|
|
309
|
+
},
|
|
310
|
+
"redis": {
|
|
311
|
+
"db": 1,
|
|
312
|
+
},
|
|
313
|
+
"mlflow": {
|
|
314
|
+
"tracking_uri": "http://localhost:5000",
|
|
315
|
+
},
|
|
316
|
+
"monitoring": {
|
|
317
|
+
"log_level": "DEBUG",
|
|
318
|
+
"enable_alerts": False,
|
|
319
|
+
},
|
|
320
|
+
"security": {
|
|
321
|
+
"enable_auth": False,
|
|
322
|
+
},
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def get_production_config() -> Dict[str, Any]:
|
|
327
|
+
"""Get production-specific configuration overrides"""
|
|
328
|
+
return {
|
|
329
|
+
"debug": False,
|
|
330
|
+
"monitoring": {
|
|
331
|
+
"log_level": "INFO",
|
|
332
|
+
"enable_alerts": True,
|
|
333
|
+
},
|
|
334
|
+
"security": {
|
|
335
|
+
"enable_auth": True,
|
|
336
|
+
"cors_origins": ["https://yourdomain.com"],
|
|
337
|
+
},
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def get_testing_config() -> Dict[str, Any]:
|
|
342
|
+
"""Get testing-specific configuration overrides"""
|
|
343
|
+
return {
|
|
344
|
+
"debug": True,
|
|
345
|
+
"database": {
|
|
346
|
+
"name": "ml_system_test",
|
|
347
|
+
},
|
|
348
|
+
"redis": {
|
|
349
|
+
"db": 2,
|
|
350
|
+
},
|
|
351
|
+
"monitoring": {
|
|
352
|
+
"enable_alerts": False,
|
|
353
|
+
"enable_metrics": False,
|
|
354
|
+
},
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
# Configuration factory
|
|
359
|
+
def create_settings(environment: str = "development") -> Settings:
|
|
360
|
+
"""Create settings for specific environment"""
|
|
361
|
+
base_config = {}
|
|
362
|
+
|
|
363
|
+
if environment == "development":
|
|
364
|
+
base_config.update(get_development_config())
|
|
365
|
+
elif environment == "production":
|
|
366
|
+
base_config.update(get_production_config())
|
|
367
|
+
elif environment == "testing":
|
|
368
|
+
base_config.update(get_testing_config())
|
|
369
|
+
|
|
370
|
+
base_config["environment"] = environment
|
|
371
|
+
return Settings(**base_config)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
# Example usage and validation
|
|
375
|
+
if __name__ == "__main__":
|
|
376
|
+
# Test settings loading
|
|
377
|
+
print("Loading settings...")
|
|
378
|
+
|
|
379
|
+
# Test different environments
|
|
380
|
+
for env in ["development", "production", "testing"]:
|
|
381
|
+
print(f"\n{env.upper()} Configuration:")
|
|
382
|
+
env_settings = create_settings(env)
|
|
383
|
+
print(f" Debug: {env_settings.debug}")
|
|
384
|
+
print(f" Database URL: {env_settings.database.url}")
|
|
385
|
+
print(f" Redis URL: {env_settings.redis.url}")
|
|
386
|
+
print(f" Model Dir: {env_settings.model.model_dir}")
|
|
387
|
+
|
|
388
|
+
# Test validation
|
|
389
|
+
try:
|
|
390
|
+
invalid_settings = Settings(environment="invalid")
|
|
391
|
+
except ValueError as e:
|
|
392
|
+
print(f"\nValidation working: {e}")
|
|
393
|
+
|
|
394
|
+
print("\nSettings validation complete!")
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""DVC Configuration for Data Versioning and Pipeline Management"""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DVCConfig:
|
|
12
|
+
"""Configuration class for DVC data versioning and pipeline management"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, project_root: Optional[Path] = None):
|
|
15
|
+
self.project_root = project_root or Path(__file__).parent.parent.parent.parent.parent
|
|
16
|
+
self.dvc_dir = self.project_root / ".dvc"
|
|
17
|
+
self.data_dir = self.project_root / "data"
|
|
18
|
+
self.models_dir = self.project_root / "models"
|
|
19
|
+
|
|
20
|
+
def setup_data_directories(self) -> None:
|
|
21
|
+
"""Create and configure data directories for DVC tracking"""
|
|
22
|
+
directories = [
|
|
23
|
+
self.data_dir / "raw",
|
|
24
|
+
self.data_dir / "processed",
|
|
25
|
+
self.data_dir / "features",
|
|
26
|
+
self.models_dir / "pytorch",
|
|
27
|
+
self.models_dir / "sklearn",
|
|
28
|
+
self.models_dir / "ensemble",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
for directory in directories:
|
|
32
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
print(f"Created directory: {directory}")
|
|
34
|
+
|
|
35
|
+
def add_data_to_dvc(self, data_path: Path, message: Optional[str] = None) -> None:
|
|
36
|
+
"""Add data file or directory to DVC tracking"""
|
|
37
|
+
try:
|
|
38
|
+
# Add to DVC
|
|
39
|
+
cmd = ["dvc", "add", str(data_path)]
|
|
40
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
|
|
41
|
+
|
|
42
|
+
if result.returncode != 0:
|
|
43
|
+
raise Exception(f"DVC add failed: {result.stderr}")
|
|
44
|
+
|
|
45
|
+
print(f"Added to DVC: {data_path}")
|
|
46
|
+
|
|
47
|
+
# Add .dvc file to git
|
|
48
|
+
dvc_file = data_path.with_suffix(data_path.suffix + ".dvc")
|
|
49
|
+
if dvc_file.exists():
|
|
50
|
+
git_cmd = ["git", "add", str(dvc_file)]
|
|
51
|
+
subprocess.run(git_cmd, cwd=self.project_root)
|
|
52
|
+
print(f"Added to git: {dvc_file}")
|
|
53
|
+
|
|
54
|
+
except Exception as e:
|
|
55
|
+
print(f"Error adding data to DVC: {e}")
|
|
56
|
+
raise
|
|
57
|
+
|
|
58
|
+
def create_pipeline_stage(
|
|
59
|
+
self,
|
|
60
|
+
stage_name: str,
|
|
61
|
+
command: str,
|
|
62
|
+
dependencies: List[str],
|
|
63
|
+
outputs: List[str],
|
|
64
|
+
parameters: Optional[Dict[str, Any]] = None,
|
|
65
|
+
metrics: Optional[List[str]] = None,
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Create a DVC pipeline stage"""
|
|
68
|
+
try:
|
|
69
|
+
cmd = [
|
|
70
|
+
"dvc",
|
|
71
|
+
"stage",
|
|
72
|
+
"add",
|
|
73
|
+
"-n",
|
|
74
|
+
stage_name,
|
|
75
|
+
"-d",
|
|
76
|
+
*dependencies,
|
|
77
|
+
"-o",
|
|
78
|
+
*outputs,
|
|
79
|
+
command,
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
if parameters:
|
|
83
|
+
for param_file in parameters:
|
|
84
|
+
cmd.extend(["-p", param_file])
|
|
85
|
+
|
|
86
|
+
if metrics:
|
|
87
|
+
for metric_file in metrics:
|
|
88
|
+
cmd.extend(["-M", metric_file])
|
|
89
|
+
|
|
90
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
|
|
91
|
+
|
|
92
|
+
if result.returncode != 0:
|
|
93
|
+
raise Exception(f"DVC stage creation failed: {result.stderr}")
|
|
94
|
+
|
|
95
|
+
print(f"Created DVC pipeline stage: {stage_name}")
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
print(f"Error creating pipeline stage: {e}")
|
|
99
|
+
raise
|
|
100
|
+
|
|
101
|
+
def run_pipeline(self, stage_name: Optional[str] = None) -> None:
|
|
102
|
+
"""Run DVC pipeline or specific stage"""
|
|
103
|
+
try:
|
|
104
|
+
cmd = ["dvc", "repro"]
|
|
105
|
+
if stage_name:
|
|
106
|
+
cmd.append(stage_name)
|
|
107
|
+
|
|
108
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
|
|
109
|
+
|
|
110
|
+
if result.returncode != 0:
|
|
111
|
+
raise Exception(f"DVC pipeline run failed: {result.stderr}")
|
|
112
|
+
|
|
113
|
+
print(f"DVC pipeline completed successfully")
|
|
114
|
+
if result.stdout:
|
|
115
|
+
print(result.stdout)
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
print(f"Error running pipeline: {e}")
|
|
119
|
+
raise
|
|
120
|
+
|
|
121
|
+
def get_data_version(self, data_path: Path) -> Optional[str]:
|
|
122
|
+
"""Get the current version hash of a data file"""
|
|
123
|
+
try:
|
|
124
|
+
dvc_file = data_path.with_suffix(data_path.suffix + ".dvc")
|
|
125
|
+
if not dvc_file.exists():
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
with open(dvc_file, "r") as f:
|
|
129
|
+
dvc_data = yaml.safe_load(f)
|
|
130
|
+
return dvc_data.get("outs", [{}])[0].get("md5")
|
|
131
|
+
|
|
132
|
+
except Exception as e:
|
|
133
|
+
print(f"Error getting data version: {e}")
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
def pull_data(self, path: Optional[str] = None) -> None:
|
|
137
|
+
"""Pull data from DVC remote storage"""
|
|
138
|
+
try:
|
|
139
|
+
cmd = ["dvc", "pull"]
|
|
140
|
+
if path:
|
|
141
|
+
cmd.append(path)
|
|
142
|
+
|
|
143
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
|
|
144
|
+
|
|
145
|
+
if result.returncode != 0:
|
|
146
|
+
raise Exception(f"DVC pull failed: {result.stderr}")
|
|
147
|
+
|
|
148
|
+
print("DVC data pull completed successfully")
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
print(f"Error pulling data: {e}")
|
|
152
|
+
raise
|
|
153
|
+
|
|
154
|
+
def push_data(self, path: Optional[str] = None) -> None:
|
|
155
|
+
"""Push data to DVC remote storage"""
|
|
156
|
+
try:
|
|
157
|
+
cmd = ["dvc", "push"]
|
|
158
|
+
if path:
|
|
159
|
+
cmd.append(path)
|
|
160
|
+
|
|
161
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
|
|
162
|
+
|
|
163
|
+
if result.returncode != 0:
|
|
164
|
+
print(f"DVC push warning: {result.stderr}")
|
|
165
|
+
# Don't raise exception for push failures (remote might not be configured)
|
|
166
|
+
|
|
167
|
+
print("DVC data push completed")
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
print(f"Note: DVC push failed (remote storage may not be configured): {e}")
|
|
171
|
+
|
|
172
|
+
def configure_remote_storage(
|
|
173
|
+
self, remote_name: str, storage_url: str, default: bool = True
|
|
174
|
+
) -> None:
|
|
175
|
+
"""Configure DVC remote storage"""
|
|
176
|
+
try:
|
|
177
|
+
# Add remote
|
|
178
|
+
cmd = ["dvc", "remote", "add", remote_name, storage_url]
|
|
179
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
|
|
180
|
+
|
|
181
|
+
if result.returncode != 0 and "already exists" not in result.stderr:
|
|
182
|
+
raise Exception(f"DVC remote add failed: {result.stderr}")
|
|
183
|
+
|
|
184
|
+
# Set as default if requested
|
|
185
|
+
if default:
|
|
186
|
+
cmd = ["dvc", "remote", "default", remote_name]
|
|
187
|
+
subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
|
|
188
|
+
|
|
189
|
+
print(f"Configured DVC remote: {remote_name}")
|
|
190
|
+
|
|
191
|
+
except Exception as e:
|
|
192
|
+
print(f"Error configuring remote storage: {e}")
|
|
193
|
+
raise
|
|
194
|
+
|
|
195
|
+
def get_pipeline_status(self) -> Dict[str, Any]:
|
|
196
|
+
"""Get status of DVC pipeline"""
|
|
197
|
+
try:
|
|
198
|
+
cmd = ["dvc", "status"]
|
|
199
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=self.project_root)
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
"returncode": result.returncode,
|
|
203
|
+
"stdout": result.stdout,
|
|
204
|
+
"stderr": result.stderr,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
except Exception as e:
|
|
208
|
+
print(f"Error getting pipeline status: {e}")
|
|
209
|
+
return {"error": str(e)}
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# Global configuration instance
|
|
213
|
+
dvc_config = DVCConfig()
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def get_dvc_config() -> DVCConfig:
|
|
217
|
+
"""Get the global DVC configuration instance"""
|
|
218
|
+
return dvc_config
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def setup_dvc() -> None:
|
|
222
|
+
"""Setup DVC data directories and configuration"""
|
|
223
|
+
dvc_config.setup_data_directories()
|
|
224
|
+
print(f"DVC project root: {dvc_config.project_root}")
|
|
225
|
+
print(f"Data directory: {dvc_config.data_dir}")
|
|
226
|
+
print(f"Models directory: {dvc_config.models_dir}")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
if __name__ == "__main__":
|
|
230
|
+
setup_dvc()
|