mcli-framework 7.12.1__py3-none-any.whl → 7.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/__init__.py +0 -2
- mcli/app/commands_cmd.py +19 -23
- mcli/app/completion_helpers.py +5 -5
- mcli/app/init_cmd.py +10 -10
- mcli/app/lock_cmd.py +82 -27
- mcli/app/main.py +2 -8
- mcli/app/model/model.py +5 -10
- mcli/app/store_cmd.py +8 -8
- mcli/app/video/__init__.py +0 -2
- mcli/app/video/video.py +1 -14
- mcli/chat/chat.py +90 -108
- mcli/chat/command_rag.py +0 -4
- mcli/chat/enhanced_chat.py +32 -41
- mcli/chat/system_controller.py +37 -37
- mcli/chat/system_integration.py +4 -5
- mcli/cli.py +2 -3
- mcli/lib/api/api.py +4 -9
- mcli/lib/api/daemon_client.py +19 -20
- mcli/lib/api/daemon_client_local.py +1 -3
- mcli/lib/api/daemon_decorator.py +6 -6
- mcli/lib/api/mcli_decorators.py +4 -8
- mcli/lib/auth/__init__.py +0 -1
- mcli/lib/auth/auth.py +4 -5
- mcli/lib/auth/mcli_manager.py +7 -12
- mcli/lib/auth/token_util.py +5 -5
- mcli/lib/config/__init__.py +29 -1
- mcli/lib/config/config.py +0 -1
- mcli/lib/custom_commands.py +1 -1
- mcli/lib/discovery/command_discovery.py +15 -15
- mcli/lib/erd/erd.py +7 -7
- mcli/lib/files/files.py +1 -1
- mcli/lib/fs/__init__.py +31 -1
- mcli/lib/fs/fs.py +12 -13
- mcli/lib/lib.py +0 -1
- mcli/lib/logger/logger.py +7 -10
- mcli/lib/performance/optimizer.py +25 -27
- mcli/lib/performance/rust_bridge.py +22 -27
- mcli/lib/performance/uvloop_config.py +0 -1
- mcli/lib/pickles/__init__.py +0 -1
- mcli/lib/pickles/pickles.py +0 -2
- mcli/lib/secrets/commands.py +0 -2
- mcli/lib/secrets/manager.py +0 -1
- mcli/lib/secrets/repl.py +2 -3
- mcli/lib/secrets/store.py +1 -2
- mcli/lib/services/data_pipeline.py +34 -34
- mcli/lib/services/lsh_client.py +38 -40
- mcli/lib/shell/shell.py +2 -2
- mcli/lib/toml/__init__.py +0 -1
- mcli/lib/ui/styling.py +0 -1
- mcli/lib/ui/visual_effects.py +33 -41
- mcli/lib/watcher/watcher.py +0 -1
- mcli/ml/__init__.py +1 -1
- mcli/ml/api/__init__.py +1 -1
- mcli/ml/api/app.py +8 -9
- mcli/ml/api/middleware.py +10 -10
- mcli/ml/api/routers/__init__.py +1 -1
- mcli/ml/api/routers/admin_router.py +3 -3
- mcli/ml/api/routers/auth_router.py +17 -18
- mcli/ml/api/routers/backtest_router.py +2 -2
- mcli/ml/api/routers/data_router.py +2 -2
- mcli/ml/api/routers/model_router.py +14 -15
- mcli/ml/api/routers/monitoring_router.py +2 -2
- mcli/ml/api/routers/portfolio_router.py +2 -2
- mcli/ml/api/routers/prediction_router.py +10 -9
- mcli/ml/api/routers/trade_router.py +2 -2
- mcli/ml/api/routers/websocket_router.py +6 -7
- mcli/ml/api/schemas.py +2 -2
- mcli/ml/auth/__init__.py +1 -1
- mcli/ml/auth/auth_manager.py +22 -23
- mcli/ml/auth/models.py +17 -17
- mcli/ml/auth/permissions.py +17 -17
- mcli/ml/backtesting/__init__.py +1 -1
- mcli/ml/backtesting/backtest_engine.py +31 -35
- mcli/ml/backtesting/performance_metrics.py +12 -14
- mcli/ml/backtesting/run.py +1 -2
- mcli/ml/cache.py +35 -36
- mcli/ml/cli/__init__.py +1 -1
- mcli/ml/cli/main.py +21 -24
- mcli/ml/config/__init__.py +1 -1
- mcli/ml/config/settings.py +28 -29
- mcli/ml/configs/__init__.py +1 -1
- mcli/ml/configs/dvc_config.py +14 -15
- mcli/ml/configs/mlflow_config.py +12 -13
- mcli/ml/configs/mlops_manager.py +19 -21
- mcli/ml/dashboard/__init__.py +4 -4
- mcli/ml/dashboard/app.py +20 -30
- mcli/ml/dashboard/app_supabase.py +16 -19
- mcli/ml/dashboard/app_training.py +11 -14
- mcli/ml/dashboard/cli.py +2 -2
- mcli/ml/dashboard/common.py +2 -3
- mcli/ml/dashboard/components/__init__.py +1 -1
- mcli/ml/dashboard/components/charts.py +13 -11
- mcli/ml/dashboard/components/metrics.py +7 -7
- mcli/ml/dashboard/components/tables.py +12 -9
- mcli/ml/dashboard/overview.py +2 -2
- mcli/ml/dashboard/pages/__init__.py +1 -1
- mcli/ml/dashboard/pages/cicd.py +15 -18
- mcli/ml/dashboard/pages/debug_dependencies.py +7 -7
- mcli/ml/dashboard/pages/monte_carlo_predictions.py +11 -18
- mcli/ml/dashboard/pages/predictions_enhanced.py +24 -32
- mcli/ml/dashboard/pages/scrapers_and_logs.py +22 -24
- mcli/ml/dashboard/pages/test_portfolio.py +3 -6
- mcli/ml/dashboard/pages/trading.py +16 -18
- mcli/ml/dashboard/pages/workflows.py +20 -30
- mcli/ml/dashboard/utils.py +9 -9
- mcli/ml/dashboard/warning_suppression.py +3 -3
- mcli/ml/data_ingestion/__init__.py +1 -1
- mcli/ml/data_ingestion/api_connectors.py +41 -46
- mcli/ml/data_ingestion/data_pipeline.py +36 -46
- mcli/ml/data_ingestion/stream_processor.py +43 -46
- mcli/ml/database/__init__.py +1 -1
- mcli/ml/database/migrations/env.py +2 -2
- mcli/ml/database/models.py +22 -24
- mcli/ml/database/session.py +14 -14
- mcli/ml/experimentation/__init__.py +1 -1
- mcli/ml/experimentation/ab_testing.py +45 -46
- mcli/ml/features/__init__.py +1 -1
- mcli/ml/features/ensemble_features.py +22 -27
- mcli/ml/features/recommendation_engine.py +30 -30
- mcli/ml/features/stock_features.py +29 -32
- mcli/ml/features/test_feature_engineering.py +10 -11
- mcli/ml/logging.py +4 -4
- mcli/ml/mlops/__init__.py +1 -1
- mcli/ml/mlops/data_versioning.py +29 -30
- mcli/ml/mlops/experiment_tracker.py +24 -24
- mcli/ml/mlops/model_serving.py +31 -34
- mcli/ml/mlops/pipeline_orchestrator.py +27 -35
- mcli/ml/models/__init__.py +5 -6
- mcli/ml/models/base_models.py +23 -23
- mcli/ml/models/ensemble_models.py +31 -31
- mcli/ml/models/recommendation_models.py +18 -19
- mcli/ml/models/test_models.py +14 -16
- mcli/ml/monitoring/__init__.py +1 -1
- mcli/ml/monitoring/drift_detection.py +32 -36
- mcli/ml/monitoring/metrics.py +2 -2
- mcli/ml/optimization/__init__.py +1 -1
- mcli/ml/optimization/optimize.py +1 -2
- mcli/ml/optimization/portfolio_optimizer.py +30 -32
- mcli/ml/predictions/__init__.py +1 -1
- mcli/ml/preprocessing/__init__.py +1 -1
- mcli/ml/preprocessing/data_cleaners.py +22 -23
- mcli/ml/preprocessing/feature_extractors.py +23 -26
- mcli/ml/preprocessing/ml_pipeline.py +23 -23
- mcli/ml/preprocessing/test_preprocessing.py +7 -8
- mcli/ml/scripts/populate_sample_data.py +0 -4
- mcli/ml/serving/serve.py +1 -2
- mcli/ml/tasks.py +17 -17
- mcli/ml/tests/test_integration.py +29 -30
- mcli/ml/tests/test_training_dashboard.py +21 -21
- mcli/ml/trading/__init__.py +1 -1
- mcli/ml/trading/migrations.py +5 -5
- mcli/ml/trading/models.py +21 -23
- mcli/ml/trading/paper_trading.py +16 -13
- mcli/ml/trading/risk_management.py +17 -18
- mcli/ml/trading/trading_service.py +25 -28
- mcli/ml/training/__init__.py +1 -1
- mcli/ml/training/train.py +0 -1
- mcli/public/oi/oi.py +1 -2
- mcli/self/completion_cmd.py +6 -10
- mcli/self/logs_cmd.py +19 -24
- mcli/self/migrate_cmd.py +22 -20
- mcli/self/redis_cmd.py +10 -11
- mcli/self/self_cmd.py +10 -18
- mcli/self/store_cmd.py +10 -12
- mcli/self/visual_cmd.py +9 -14
- mcli/self/zsh_cmd.py +2 -4
- mcli/workflow/daemon/async_command_database.py +23 -24
- mcli/workflow/daemon/async_process_manager.py +27 -29
- mcli/workflow/daemon/client.py +27 -33
- mcli/workflow/daemon/daemon.py +32 -36
- mcli/workflow/daemon/enhanced_daemon.py +24 -33
- mcli/workflow/daemon/process_cli.py +11 -12
- mcli/workflow/daemon/process_manager.py +23 -26
- mcli/workflow/daemon/test_daemon.py +4 -5
- mcli/workflow/dashboard/dashboard_cmd.py +0 -1
- mcli/workflow/doc_convert.py +15 -17
- mcli/workflow/gcloud/__init__.py +0 -1
- mcli/workflow/gcloud/gcloud.py +11 -8
- mcli/workflow/git_commit/ai_service.py +14 -15
- mcli/workflow/lsh_integration.py +9 -11
- mcli/workflow/model_service/client.py +26 -31
- mcli/workflow/model_service/download_and_run_efficient_models.py +10 -14
- mcli/workflow/model_service/lightweight_embedder.py +25 -35
- mcli/workflow/model_service/lightweight_model_server.py +26 -32
- mcli/workflow/model_service/lightweight_test.py +7 -10
- mcli/workflow/model_service/model_service.py +80 -91
- mcli/workflow/model_service/ollama_efficient_runner.py +14 -18
- mcli/workflow/model_service/openai_adapter.py +23 -23
- mcli/workflow/model_service/pdf_processor.py +21 -26
- mcli/workflow/model_service/test_efficient_runner.py +12 -16
- mcli/workflow/model_service/test_example.py +11 -13
- mcli/workflow/model_service/test_integration.py +3 -5
- mcli/workflow/model_service/test_new_features.py +7 -8
- mcli/workflow/notebook/converter.py +1 -1
- mcli/workflow/notebook/notebook_cmd.py +5 -6
- mcli/workflow/notebook/schema.py +0 -1
- mcli/workflow/notebook/validator.py +7 -3
- mcli/workflow/openai/openai.py +1 -2
- mcli/workflow/registry/registry.py +4 -1
- mcli/workflow/repo/repo.py +6 -7
- mcli/workflow/scheduler/cron_parser.py +16 -19
- mcli/workflow/scheduler/job.py +10 -10
- mcli/workflow/scheduler/monitor.py +15 -15
- mcli/workflow/scheduler/persistence.py +17 -18
- mcli/workflow/scheduler/scheduler.py +37 -38
- mcli/workflow/secrets/__init__.py +1 -1
- mcli/workflow/sync/test_cmd.py +0 -1
- mcli/workflow/wakatime/__init__.py +5 -9
- mcli/workflow/wakatime/wakatime.py +1 -2
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/METADATA +1 -1
- mcli_framework-7.12.3.dist-info/RECORD +279 -0
- mcli_framework-7.12.1.dist-info/RECORD +0 -279
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/WHEEL +0 -0
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/top_level.txt +0 -0
mcli/ml/mlops/data_versioning.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
"""DVC integration for data versioning and pipeline management"""
|
|
1
|
+
"""DVC integration for data versioning and pipeline management."""
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
-
import os
|
|
7
6
|
import subprocess
|
|
8
7
|
from dataclasses import dataclass
|
|
9
8
|
from datetime import datetime
|
|
@@ -18,7 +17,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
17
|
|
|
19
18
|
@dataclass
|
|
20
19
|
class DVCConfig:
|
|
21
|
-
"""DVC configuration"""
|
|
20
|
+
"""DVC configuration."""
|
|
22
21
|
|
|
23
22
|
project_root: Path = Path(".")
|
|
24
23
|
remote_storage: str = "s3://my-bucket/dvc-storage" # or local path
|
|
@@ -28,7 +27,7 @@ class DVCConfig:
|
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
class DataVersionControl:
|
|
31
|
-
"""DVC wrapper for data versioning"""
|
|
30
|
+
"""DVC wrapper for data versioning."""
|
|
32
31
|
|
|
33
32
|
def __init__(self, config: DVCConfig):
|
|
34
33
|
self.config = config
|
|
@@ -36,7 +35,7 @@ class DataVersionControl:
|
|
|
36
35
|
self._ensure_dvc_initialized()
|
|
37
36
|
|
|
38
37
|
def _ensure_dvc_initialized(self):
|
|
39
|
-
"""Ensure DVC is initialized in project"""
|
|
38
|
+
"""Ensure DVC is initialized in project."""
|
|
40
39
|
dvc_dir = self.project_root / ".dvc"
|
|
41
40
|
|
|
42
41
|
if not dvc_dir.exists():
|
|
@@ -48,7 +47,7 @@ class DataVersionControl:
|
|
|
48
47
|
self._run_command(f"dvc remote add -d storage {self.config.remote_storage}")
|
|
49
48
|
|
|
50
49
|
def _run_command(self, command: str) -> str:
|
|
51
|
-
"""Run DVC command"""
|
|
50
|
+
"""Run DVC command."""
|
|
52
51
|
try:
|
|
53
52
|
result = subprocess.run(
|
|
54
53
|
command.split(), capture_output=True, text=True, cwd=self.project_root
|
|
@@ -67,7 +66,7 @@ class DataVersionControl:
|
|
|
67
66
|
raise
|
|
68
67
|
|
|
69
68
|
def add_data(self, data_path: Union[str, Path], description: Optional[str] = None) -> str:
|
|
70
|
-
"""Add data file or directory to DVC tracking"""
|
|
69
|
+
"""Add data file or directory to DVC tracking."""
|
|
71
70
|
data_path = Path(data_path)
|
|
72
71
|
|
|
73
72
|
if not data_path.exists():
|
|
@@ -91,17 +90,17 @@ class DataVersionControl:
|
|
|
91
90
|
return str(data_path) + ".dvc"
|
|
92
91
|
|
|
93
92
|
def push_data(self):
|
|
94
|
-
"""Push data to remote storage"""
|
|
93
|
+
"""Push data to remote storage."""
|
|
95
94
|
logger.info("Pushing data to remote...")
|
|
96
95
|
self._run_command("dvc push")
|
|
97
96
|
|
|
98
97
|
def pull_data(self):
|
|
99
|
-
"""Pull data from remote storage"""
|
|
98
|
+
"""Pull data from remote storage."""
|
|
100
99
|
logger.info("Pulling data from remote...")
|
|
101
100
|
self._run_command("dvc pull")
|
|
102
101
|
|
|
103
102
|
def checkout(self, version: Optional[str] = None):
|
|
104
|
-
"""Checkout specific data version"""
|
|
103
|
+
"""Checkout specific data version."""
|
|
105
104
|
if version:
|
|
106
105
|
self._run_command(f"git checkout {version}")
|
|
107
106
|
|
|
@@ -109,7 +108,7 @@ class DataVersionControl:
|
|
|
109
108
|
logger.info(f"Checked out data version: {version or 'latest'}")
|
|
110
109
|
|
|
111
110
|
def get_data_status(self) -> Dict[str, Any]:
|
|
112
|
-
"""Get status of tracked data"""
|
|
111
|
+
"""Get status of tracked data."""
|
|
113
112
|
status_output = self._run_command("dvc status")
|
|
114
113
|
|
|
115
114
|
# Parse status
|
|
@@ -128,7 +127,7 @@ class DataVersionControl:
|
|
|
128
127
|
def _generate_metadata(
|
|
129
128
|
self, data_path: Path, description: Optional[str] = None
|
|
130
129
|
) -> Dict[str, Any]:
|
|
131
|
-
"""Generate metadata for data file"""
|
|
130
|
+
"""Generate metadata for data file."""
|
|
132
131
|
stat = data_path.stat()
|
|
133
132
|
|
|
134
133
|
metadata = {
|
|
@@ -152,13 +151,13 @@ class DataVersionControl:
|
|
|
152
151
|
metadata["rows"] = len(df)
|
|
153
152
|
metadata["columns"] = len(df.columns)
|
|
154
153
|
metadata["column_names"] = df.columns.tolist()
|
|
155
|
-
except:
|
|
154
|
+
except Exception:
|
|
156
155
|
pass
|
|
157
156
|
|
|
158
157
|
return metadata
|
|
159
158
|
|
|
160
159
|
def _calculate_hash(self, file_path: Path) -> str:
|
|
161
|
-
"""Calculate file hash"""
|
|
160
|
+
"""Calculate file hash."""
|
|
162
161
|
if file_path.is_dir():
|
|
163
162
|
return "directory"
|
|
164
163
|
|
|
@@ -169,13 +168,13 @@ class DataVersionControl:
|
|
|
169
168
|
return hash_md5.hexdigest()
|
|
170
169
|
|
|
171
170
|
def _commit_changes(self, message: str):
|
|
172
|
-
"""Commit changes to git"""
|
|
171
|
+
"""Commit changes to git."""
|
|
173
172
|
subprocess.run(["git", "add", "-A"], cwd=self.project_root)
|
|
174
173
|
subprocess.run(["git", "commit", "-m", message], cwd=self.project_root)
|
|
175
174
|
|
|
176
175
|
|
|
177
176
|
class DVCPipeline:
|
|
178
|
-
"""DVC pipeline management"""
|
|
177
|
+
"""DVC pipeline management."""
|
|
179
178
|
|
|
180
179
|
def __init__(self, config: DVCConfig):
|
|
181
180
|
self.config = config
|
|
@@ -184,7 +183,7 @@ class DVCPipeline:
|
|
|
184
183
|
self.params_file = config.project_root / "params.yaml"
|
|
185
184
|
|
|
186
185
|
def create_pipeline(self, stages: List[Dict[str, Any]]):
|
|
187
|
-
"""Create DVC pipeline"""
|
|
186
|
+
"""Create DVC pipeline."""
|
|
188
187
|
pipeline = {"stages": {}}
|
|
189
188
|
|
|
190
189
|
for stage in stages:
|
|
@@ -213,7 +212,7 @@ class DVCPipeline:
|
|
|
213
212
|
outs: Optional[List[str]] = None,
|
|
214
213
|
metrics: Optional[List[str]] = None,
|
|
215
214
|
):
|
|
216
|
-
"""Add stage to pipeline"""
|
|
215
|
+
"""Add stage to pipeline."""
|
|
217
216
|
stage_config = {
|
|
218
217
|
"cmd": cmd,
|
|
219
218
|
"deps": deps or [],
|
|
@@ -239,7 +238,7 @@ class DVCPipeline:
|
|
|
239
238
|
logger.info(f"Added stage '{name}' to pipeline")
|
|
240
239
|
|
|
241
240
|
def run_pipeline(self, stage: Optional[str] = None):
|
|
242
|
-
"""Run DVC pipeline"""
|
|
241
|
+
"""Run DVC pipeline."""
|
|
243
242
|
if stage:
|
|
244
243
|
cmd = f"dvc repro {stage}"
|
|
245
244
|
else:
|
|
@@ -249,7 +248,7 @@ class DVCPipeline:
|
|
|
249
248
|
self.dvc._run_command(cmd)
|
|
250
249
|
|
|
251
250
|
def get_metrics(self) -> Dict[str, Any]:
|
|
252
|
-
"""Get pipeline metrics"""
|
|
251
|
+
"""Get pipeline metrics."""
|
|
253
252
|
metrics_output = self.dvc._run_command("dvc metrics show")
|
|
254
253
|
|
|
255
254
|
# Parse metrics (simplified)
|
|
@@ -259,13 +258,13 @@ class DVCPipeline:
|
|
|
259
258
|
key, value = line.split(":", 1)
|
|
260
259
|
try:
|
|
261
260
|
metrics[key.strip()] = float(value.strip())
|
|
262
|
-
except:
|
|
261
|
+
except Exception:
|
|
263
262
|
metrics[key.strip()] = value.strip()
|
|
264
263
|
|
|
265
264
|
return metrics
|
|
266
265
|
|
|
267
266
|
def create_ml_pipeline(self):
|
|
268
|
-
"""Create standard ML pipeline"""
|
|
267
|
+
"""Create standard ML pipeline."""
|
|
269
268
|
stages = [
|
|
270
269
|
{
|
|
271
270
|
"name": "data_preparation",
|
|
@@ -314,26 +313,26 @@ class DVCPipeline:
|
|
|
314
313
|
|
|
315
314
|
|
|
316
315
|
class DataRegistry:
|
|
317
|
-
"""Central registry for versioned datasets"""
|
|
316
|
+
"""Central registry for versioned datasets."""
|
|
318
317
|
|
|
319
318
|
def __init__(self, registry_path: Path = Path("data_registry.json")):
|
|
320
319
|
self.registry_path = registry_path
|
|
321
320
|
self.registry = self._load_registry()
|
|
322
321
|
|
|
323
322
|
def _load_registry(self) -> Dict[str, Any]:
|
|
324
|
-
"""Load data registry"""
|
|
323
|
+
"""Load data registry."""
|
|
325
324
|
if self.registry_path.exists():
|
|
326
325
|
with open(self.registry_path, "r") as f:
|
|
327
326
|
return json.load(f)
|
|
328
327
|
return {"datasets": {}}
|
|
329
328
|
|
|
330
329
|
def _save_registry(self):
|
|
331
|
-
"""Save data registry"""
|
|
330
|
+
"""Save data registry."""
|
|
332
331
|
with open(self.registry_path, "w") as f:
|
|
333
332
|
json.dump(self.registry, f, indent=2)
|
|
334
333
|
|
|
335
334
|
def register_dataset(self, name: str, path: str, version: str, metadata: Dict[str, Any]):
|
|
336
|
-
"""Register new dataset version"""
|
|
335
|
+
"""Register new dataset version."""
|
|
337
336
|
if name not in self.registry["datasets"]:
|
|
338
337
|
self.registry["datasets"][name] = {"versions": {}}
|
|
339
338
|
|
|
@@ -349,7 +348,7 @@ class DataRegistry:
|
|
|
349
348
|
logger.info(f"Registered dataset '{name}' version '{version}'")
|
|
350
349
|
|
|
351
350
|
def get_dataset(self, name: str, version: Optional[str] = None) -> Dict[str, Any]:
|
|
352
|
-
"""Get dataset information"""
|
|
351
|
+
"""Get dataset information."""
|
|
353
352
|
if name not in self.registry["datasets"]:
|
|
354
353
|
raise ValueError(f"Dataset '{name}' not found")
|
|
355
354
|
|
|
@@ -362,11 +361,11 @@ class DataRegistry:
|
|
|
362
361
|
return dataset["versions"][version]
|
|
363
362
|
|
|
364
363
|
def list_datasets(self) -> List[str]:
|
|
365
|
-
"""List all registered datasets"""
|
|
364
|
+
"""List all registered datasets."""
|
|
366
365
|
return list(self.registry["datasets"].keys())
|
|
367
366
|
|
|
368
367
|
def list_versions(self, name: str) -> List[str]:
|
|
369
|
-
"""List all versions of a dataset"""
|
|
368
|
+
"""List all versions of a dataset."""
|
|
370
369
|
if name not in self.registry["datasets"]:
|
|
371
370
|
raise ValueError(f"Dataset '{name}' not found")
|
|
372
371
|
|
|
@@ -374,7 +373,7 @@ class DataRegistry:
|
|
|
374
373
|
|
|
375
374
|
|
|
376
375
|
def create_dvc_config():
|
|
377
|
-
"""Create DVC configuration files"""
|
|
376
|
+
"""Create DVC configuration files."""
|
|
378
377
|
|
|
379
378
|
# Create .dvc/.gitignore
|
|
380
379
|
dvc_gitignore = """
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""MLflow experiment tracking and model registry"""
|
|
1
|
+
"""MLflow experiment tracking and model registry."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
@@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)
|
|
|
21
21
|
|
|
22
22
|
@dataclass
|
|
23
23
|
class MLflowConfig:
|
|
24
|
-
"""Configuration for MLflow tracking"""
|
|
24
|
+
"""Configuration for MLflow tracking."""
|
|
25
25
|
|
|
26
26
|
tracking_uri: str = "sqlite:///mlruns.db"
|
|
27
27
|
experiment_name: str = "politician-trading-predictions"
|
|
@@ -40,7 +40,7 @@ class MLflowConfig:
|
|
|
40
40
|
|
|
41
41
|
@dataclass
|
|
42
42
|
class ExperimentRun:
|
|
43
|
-
"""Container for experiment run information"""
|
|
43
|
+
"""Container for experiment run information."""
|
|
44
44
|
|
|
45
45
|
run_id: str
|
|
46
46
|
experiment_id: str
|
|
@@ -55,7 +55,7 @@ class ExperimentRun:
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
class ExperimentTracker:
|
|
58
|
-
"""MLflow experiment tracker for ML pipeline"""
|
|
58
|
+
"""MLflow experiment tracker for ML pipeline."""
|
|
59
59
|
|
|
60
60
|
def __init__(self, config: MLflowConfig):
|
|
61
61
|
self.config = config
|
|
@@ -64,7 +64,7 @@ class ExperimentTracker:
|
|
|
64
64
|
self.setup_mlflow()
|
|
65
65
|
|
|
66
66
|
def setup_mlflow(self):
|
|
67
|
-
"""Initialize MLflow tracking"""
|
|
67
|
+
"""Initialize MLflow tracking."""
|
|
68
68
|
mlflow.set_tracking_uri(self.config.tracking_uri)
|
|
69
69
|
|
|
70
70
|
if self.config.registry_uri:
|
|
@@ -89,7 +89,7 @@ class ExperimentTracker:
|
|
|
89
89
|
logger.info(f"Experiment: {self.config.experiment_name} (ID: {experiment_id})")
|
|
90
90
|
|
|
91
91
|
def start_run(self, run_name: str, tags: Optional[Dict[str, str]] = None) -> ExperimentRun:
|
|
92
|
-
"""Start a new MLflow run"""
|
|
92
|
+
"""Start a new MLflow run."""
|
|
93
93
|
if self.current_run:
|
|
94
94
|
self.end_run()
|
|
95
95
|
|
|
@@ -115,7 +115,7 @@ class ExperimentTracker:
|
|
|
115
115
|
return self.current_run
|
|
116
116
|
|
|
117
117
|
def log_params(self, params: Dict[str, Any]):
|
|
118
|
-
"""Log parameters to current run"""
|
|
118
|
+
"""Log parameters to current run."""
|
|
119
119
|
if not self.current_run:
|
|
120
120
|
raise ValueError("No active MLflow run. Call start_run() first.")
|
|
121
121
|
|
|
@@ -132,7 +132,7 @@ class ExperimentTracker:
|
|
|
132
132
|
logger.debug(f"Logged {len(params)} parameters")
|
|
133
133
|
|
|
134
134
|
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None):
|
|
135
|
-
"""Log metrics to current run"""
|
|
135
|
+
"""Log metrics to current run."""
|
|
136
136
|
if not self.current_run:
|
|
137
137
|
raise ValueError("No active MLflow run. Call start_run() first.")
|
|
138
138
|
|
|
@@ -143,7 +143,7 @@ class ExperimentTracker:
|
|
|
143
143
|
logger.debug(f"Logged {len(metrics)} metrics at step {step}")
|
|
144
144
|
|
|
145
145
|
def log_artifact(self, artifact_path: Union[str, Path], artifact_type: Optional[str] = None):
|
|
146
|
-
"""Log artifact to current run"""
|
|
146
|
+
"""Log artifact to current run."""
|
|
147
147
|
if not self.current_run:
|
|
148
148
|
raise ValueError("No active MLflow run. Call start_run() first.")
|
|
149
149
|
|
|
@@ -168,7 +168,7 @@ class ExperimentTracker:
|
|
|
168
168
|
conda_env: Optional[Dict] = None,
|
|
169
169
|
pip_requirements: Optional[List[str]] = None,
|
|
170
170
|
):
|
|
171
|
-
"""Log model to current run"""
|
|
171
|
+
"""Log model to current run."""
|
|
172
172
|
if not self.current_run:
|
|
173
173
|
raise ValueError("No active MLflow run. Call start_run() first.")
|
|
174
174
|
|
|
@@ -224,7 +224,7 @@ class ExperimentTracker:
|
|
|
224
224
|
return self.current_run.model_uri
|
|
225
225
|
|
|
226
226
|
def log_figure(self, figure, artifact_name: str):
|
|
227
|
-
"""Log matplotlib figure"""
|
|
227
|
+
"""Log matplotlib figure."""
|
|
228
228
|
if not self.current_run:
|
|
229
229
|
raise ValueError("No active MLflow run. Call start_run() first.")
|
|
230
230
|
|
|
@@ -233,7 +233,7 @@ class ExperimentTracker:
|
|
|
233
233
|
logger.debug(f"Logged figure: {artifact_name}")
|
|
234
234
|
|
|
235
235
|
def log_dict(self, dictionary: Dict, artifact_name: str):
|
|
236
|
-
"""Log dictionary as JSON artifact"""
|
|
236
|
+
"""Log dictionary as JSON artifact."""
|
|
237
237
|
if not self.current_run:
|
|
238
238
|
raise ValueError("No active MLflow run. Call start_run() first.")
|
|
239
239
|
|
|
@@ -242,7 +242,7 @@ class ExperimentTracker:
|
|
|
242
242
|
logger.debug(f"Logged dictionary: {artifact_name}")
|
|
243
243
|
|
|
244
244
|
def end_run(self, status: str = "FINISHED"):
|
|
245
|
-
"""End current MLflow run"""
|
|
245
|
+
"""End current MLflow run."""
|
|
246
246
|
if not self.current_run:
|
|
247
247
|
return
|
|
248
248
|
|
|
@@ -262,13 +262,13 @@ class ExperimentTracker:
|
|
|
262
262
|
return current_run
|
|
263
263
|
|
|
264
264
|
def get_run(self, run_id: str) -> mlflow.entities.Run:
|
|
265
|
-
"""Get run by ID"""
|
|
265
|
+
"""Get run by ID."""
|
|
266
266
|
return self.client.get_run(run_id)
|
|
267
267
|
|
|
268
268
|
def search_runs(
|
|
269
269
|
self, filter_string: str = "", max_results: int = 100
|
|
270
270
|
) -> List[mlflow.entities.Run]:
|
|
271
|
-
"""Search for runs in experiment"""
|
|
271
|
+
"""Search for runs in experiment."""
|
|
272
272
|
return self.client.search_runs(
|
|
273
273
|
experiment_ids=[self.experiment_id],
|
|
274
274
|
filter_string=filter_string,
|
|
@@ -276,7 +276,7 @@ class ExperimentTracker:
|
|
|
276
276
|
)
|
|
277
277
|
|
|
278
278
|
def compare_runs(self, run_ids: List[str], metrics: Optional[List[str]] = None) -> pd.DataFrame:
|
|
279
|
-
"""Compare multiple runs"""
|
|
279
|
+
"""Compare multiple runs."""
|
|
280
280
|
runs_data = []
|
|
281
281
|
|
|
282
282
|
for run_id in run_ids:
|
|
@@ -306,7 +306,7 @@ class ExperimentTracker:
|
|
|
306
306
|
|
|
307
307
|
|
|
308
308
|
class ModelRegistry:
|
|
309
|
-
"""MLflow model registry for model versioning and deployment"""
|
|
309
|
+
"""MLflow model registry for model versioning and deployment."""
|
|
310
310
|
|
|
311
311
|
def __init__(self, config: MLflowConfig):
|
|
312
312
|
self.config = config
|
|
@@ -319,7 +319,7 @@ class ModelRegistry:
|
|
|
319
319
|
def register_model(
|
|
320
320
|
self, model_uri: str, model_name: str, tags: Optional[Dict[str, str]] = None
|
|
321
321
|
) -> str:
|
|
322
|
-
"""Register model in MLflow registry"""
|
|
322
|
+
"""Register model in MLflow registry."""
|
|
323
323
|
try:
|
|
324
324
|
# Create registered model if it doesn't exist
|
|
325
325
|
self.client.create_registered_model(
|
|
@@ -342,7 +342,7 @@ class ModelRegistry:
|
|
|
342
342
|
def transition_model_stage(
|
|
343
343
|
self, model_name: str, version: int, stage: str, archive_existing: bool = True
|
|
344
344
|
):
|
|
345
|
-
"""Transition model version to new stage"""
|
|
345
|
+
"""Transition model version to new stage."""
|
|
346
346
|
self.client.transition_model_version_stage(
|
|
347
347
|
name=model_name,
|
|
348
348
|
version=version,
|
|
@@ -355,7 +355,7 @@ class ModelRegistry:
|
|
|
355
355
|
def load_model(
|
|
356
356
|
self, model_name: str, version: Optional[int] = None, stage: Optional[str] = None
|
|
357
357
|
) -> Any:
|
|
358
|
-
"""Load model from registry"""
|
|
358
|
+
"""Load model from registry."""
|
|
359
359
|
if version:
|
|
360
360
|
model_uri = f"models:/{model_name}/{version}"
|
|
361
361
|
elif stage:
|
|
@@ -368,18 +368,18 @@ class ModelRegistry:
|
|
|
368
368
|
return model
|
|
369
369
|
|
|
370
370
|
def get_model_version(self, model_name: str, version: int):
|
|
371
|
-
"""Get specific model version details"""
|
|
371
|
+
"""Get specific model version details."""
|
|
372
372
|
return self.client.get_model_version(model_name, version)
|
|
373
373
|
|
|
374
374
|
def get_latest_versions(self, model_name: str, stages: Optional[List[str]] = None):
|
|
375
|
-
"""Get latest model versions for given stages"""
|
|
375
|
+
"""Get latest model versions for given stages."""
|
|
376
376
|
return self.client.get_latest_versions(model_name, stages=stages)
|
|
377
377
|
|
|
378
378
|
def delete_model_version(self, model_name: str, version: int):
|
|
379
|
-
"""Delete model version"""
|
|
379
|
+
"""Delete model version."""
|
|
380
380
|
self.client.delete_model_version(model_name, version)
|
|
381
381
|
logger.info(f"Deleted {model_name} version {version}")
|
|
382
382
|
|
|
383
383
|
def search_models(self, filter_string: str = "") -> List:
|
|
384
|
-
"""Search registered models"""
|
|
384
|
+
"""Search registered models."""
|
|
385
385
|
return self.client.search_registered_models(filter_string=filter_string)
|