mcli-framework 7.12.1__py3-none-any.whl → 7.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/__init__.py +0 -2
- mcli/app/commands_cmd.py +19 -23
- mcli/app/completion_helpers.py +5 -5
- mcli/app/init_cmd.py +10 -10
- mcli/app/lock_cmd.py +82 -27
- mcli/app/main.py +2 -8
- mcli/app/model/model.py +5 -10
- mcli/app/store_cmd.py +8 -8
- mcli/app/video/__init__.py +0 -2
- mcli/app/video/video.py +1 -14
- mcli/chat/chat.py +90 -108
- mcli/chat/command_rag.py +0 -4
- mcli/chat/enhanced_chat.py +32 -41
- mcli/chat/system_controller.py +37 -37
- mcli/chat/system_integration.py +4 -5
- mcli/cli.py +2 -3
- mcli/lib/api/api.py +4 -9
- mcli/lib/api/daemon_client.py +19 -20
- mcli/lib/api/daemon_client_local.py +1 -3
- mcli/lib/api/daemon_decorator.py +6 -6
- mcli/lib/api/mcli_decorators.py +4 -8
- mcli/lib/auth/__init__.py +0 -1
- mcli/lib/auth/auth.py +4 -5
- mcli/lib/auth/mcli_manager.py +7 -12
- mcli/lib/auth/token_util.py +5 -5
- mcli/lib/config/__init__.py +29 -1
- mcli/lib/config/config.py +0 -1
- mcli/lib/custom_commands.py +1 -1
- mcli/lib/discovery/command_discovery.py +15 -15
- mcli/lib/erd/erd.py +7 -7
- mcli/lib/files/files.py +1 -1
- mcli/lib/fs/__init__.py +31 -1
- mcli/lib/fs/fs.py +12 -13
- mcli/lib/lib.py +0 -1
- mcli/lib/logger/logger.py +7 -10
- mcli/lib/performance/optimizer.py +25 -27
- mcli/lib/performance/rust_bridge.py +22 -27
- mcli/lib/performance/uvloop_config.py +0 -1
- mcli/lib/pickles/__init__.py +0 -1
- mcli/lib/pickles/pickles.py +0 -2
- mcli/lib/secrets/commands.py +0 -2
- mcli/lib/secrets/manager.py +0 -1
- mcli/lib/secrets/repl.py +2 -3
- mcli/lib/secrets/store.py +1 -2
- mcli/lib/services/data_pipeline.py +34 -34
- mcli/lib/services/lsh_client.py +38 -40
- mcli/lib/shell/shell.py +2 -2
- mcli/lib/toml/__init__.py +0 -1
- mcli/lib/ui/styling.py +0 -1
- mcli/lib/ui/visual_effects.py +33 -41
- mcli/lib/watcher/watcher.py +0 -1
- mcli/ml/__init__.py +1 -1
- mcli/ml/api/__init__.py +1 -1
- mcli/ml/api/app.py +8 -9
- mcli/ml/api/middleware.py +10 -10
- mcli/ml/api/routers/__init__.py +1 -1
- mcli/ml/api/routers/admin_router.py +3 -3
- mcli/ml/api/routers/auth_router.py +17 -18
- mcli/ml/api/routers/backtest_router.py +2 -2
- mcli/ml/api/routers/data_router.py +2 -2
- mcli/ml/api/routers/model_router.py +14 -15
- mcli/ml/api/routers/monitoring_router.py +2 -2
- mcli/ml/api/routers/portfolio_router.py +2 -2
- mcli/ml/api/routers/prediction_router.py +10 -9
- mcli/ml/api/routers/trade_router.py +2 -2
- mcli/ml/api/routers/websocket_router.py +6 -7
- mcli/ml/api/schemas.py +2 -2
- mcli/ml/auth/__init__.py +1 -1
- mcli/ml/auth/auth_manager.py +22 -23
- mcli/ml/auth/models.py +17 -17
- mcli/ml/auth/permissions.py +17 -17
- mcli/ml/backtesting/__init__.py +1 -1
- mcli/ml/backtesting/backtest_engine.py +31 -35
- mcli/ml/backtesting/performance_metrics.py +12 -14
- mcli/ml/backtesting/run.py +1 -2
- mcli/ml/cache.py +35 -36
- mcli/ml/cli/__init__.py +1 -1
- mcli/ml/cli/main.py +21 -24
- mcli/ml/config/__init__.py +1 -1
- mcli/ml/config/settings.py +28 -29
- mcli/ml/configs/__init__.py +1 -1
- mcli/ml/configs/dvc_config.py +14 -15
- mcli/ml/configs/mlflow_config.py +12 -13
- mcli/ml/configs/mlops_manager.py +19 -21
- mcli/ml/dashboard/__init__.py +4 -4
- mcli/ml/dashboard/app.py +20 -30
- mcli/ml/dashboard/app_supabase.py +16 -19
- mcli/ml/dashboard/app_training.py +11 -14
- mcli/ml/dashboard/cli.py +2 -2
- mcli/ml/dashboard/common.py +2 -3
- mcli/ml/dashboard/components/__init__.py +1 -1
- mcli/ml/dashboard/components/charts.py +13 -11
- mcli/ml/dashboard/components/metrics.py +7 -7
- mcli/ml/dashboard/components/tables.py +12 -9
- mcli/ml/dashboard/overview.py +2 -2
- mcli/ml/dashboard/pages/__init__.py +1 -1
- mcli/ml/dashboard/pages/cicd.py +15 -18
- mcli/ml/dashboard/pages/debug_dependencies.py +7 -7
- mcli/ml/dashboard/pages/monte_carlo_predictions.py +11 -18
- mcli/ml/dashboard/pages/predictions_enhanced.py +24 -32
- mcli/ml/dashboard/pages/scrapers_and_logs.py +22 -24
- mcli/ml/dashboard/pages/test_portfolio.py +3 -6
- mcli/ml/dashboard/pages/trading.py +16 -18
- mcli/ml/dashboard/pages/workflows.py +20 -30
- mcli/ml/dashboard/utils.py +9 -9
- mcli/ml/dashboard/warning_suppression.py +3 -3
- mcli/ml/data_ingestion/__init__.py +1 -1
- mcli/ml/data_ingestion/api_connectors.py +41 -46
- mcli/ml/data_ingestion/data_pipeline.py +36 -46
- mcli/ml/data_ingestion/stream_processor.py +43 -46
- mcli/ml/database/__init__.py +1 -1
- mcli/ml/database/migrations/env.py +2 -2
- mcli/ml/database/models.py +22 -24
- mcli/ml/database/session.py +14 -14
- mcli/ml/experimentation/__init__.py +1 -1
- mcli/ml/experimentation/ab_testing.py +45 -46
- mcli/ml/features/__init__.py +1 -1
- mcli/ml/features/ensemble_features.py +22 -27
- mcli/ml/features/recommendation_engine.py +30 -30
- mcli/ml/features/stock_features.py +29 -32
- mcli/ml/features/test_feature_engineering.py +10 -11
- mcli/ml/logging.py +4 -4
- mcli/ml/mlops/__init__.py +1 -1
- mcli/ml/mlops/data_versioning.py +29 -30
- mcli/ml/mlops/experiment_tracker.py +24 -24
- mcli/ml/mlops/model_serving.py +31 -34
- mcli/ml/mlops/pipeline_orchestrator.py +27 -35
- mcli/ml/models/__init__.py +5 -6
- mcli/ml/models/base_models.py +23 -23
- mcli/ml/models/ensemble_models.py +31 -31
- mcli/ml/models/recommendation_models.py +18 -19
- mcli/ml/models/test_models.py +14 -16
- mcli/ml/monitoring/__init__.py +1 -1
- mcli/ml/monitoring/drift_detection.py +32 -36
- mcli/ml/monitoring/metrics.py +2 -2
- mcli/ml/optimization/__init__.py +1 -1
- mcli/ml/optimization/optimize.py +1 -2
- mcli/ml/optimization/portfolio_optimizer.py +30 -32
- mcli/ml/predictions/__init__.py +1 -1
- mcli/ml/preprocessing/__init__.py +1 -1
- mcli/ml/preprocessing/data_cleaners.py +22 -23
- mcli/ml/preprocessing/feature_extractors.py +23 -26
- mcli/ml/preprocessing/ml_pipeline.py +23 -23
- mcli/ml/preprocessing/test_preprocessing.py +7 -8
- mcli/ml/scripts/populate_sample_data.py +0 -4
- mcli/ml/serving/serve.py +1 -2
- mcli/ml/tasks.py +17 -17
- mcli/ml/tests/test_integration.py +29 -30
- mcli/ml/tests/test_training_dashboard.py +21 -21
- mcli/ml/trading/__init__.py +1 -1
- mcli/ml/trading/migrations.py +5 -5
- mcli/ml/trading/models.py +21 -23
- mcli/ml/trading/paper_trading.py +16 -13
- mcli/ml/trading/risk_management.py +17 -18
- mcli/ml/trading/trading_service.py +25 -28
- mcli/ml/training/__init__.py +1 -1
- mcli/ml/training/train.py +0 -1
- mcli/public/oi/oi.py +1 -2
- mcli/self/completion_cmd.py +6 -10
- mcli/self/logs_cmd.py +19 -24
- mcli/self/migrate_cmd.py +22 -20
- mcli/self/redis_cmd.py +10 -11
- mcli/self/self_cmd.py +10 -18
- mcli/self/store_cmd.py +10 -12
- mcli/self/visual_cmd.py +9 -14
- mcli/self/zsh_cmd.py +2 -4
- mcli/workflow/daemon/async_command_database.py +23 -24
- mcli/workflow/daemon/async_process_manager.py +27 -29
- mcli/workflow/daemon/client.py +27 -33
- mcli/workflow/daemon/daemon.py +32 -36
- mcli/workflow/daemon/enhanced_daemon.py +24 -33
- mcli/workflow/daemon/process_cli.py +11 -12
- mcli/workflow/daemon/process_manager.py +23 -26
- mcli/workflow/daemon/test_daemon.py +4 -5
- mcli/workflow/dashboard/dashboard_cmd.py +0 -1
- mcli/workflow/doc_convert.py +15 -17
- mcli/workflow/gcloud/__init__.py +0 -1
- mcli/workflow/gcloud/gcloud.py +11 -8
- mcli/workflow/git_commit/ai_service.py +14 -15
- mcli/workflow/lsh_integration.py +9 -11
- mcli/workflow/model_service/client.py +26 -31
- mcli/workflow/model_service/download_and_run_efficient_models.py +10 -14
- mcli/workflow/model_service/lightweight_embedder.py +25 -35
- mcli/workflow/model_service/lightweight_model_server.py +26 -32
- mcli/workflow/model_service/lightweight_test.py +7 -10
- mcli/workflow/model_service/model_service.py +80 -91
- mcli/workflow/model_service/ollama_efficient_runner.py +14 -18
- mcli/workflow/model_service/openai_adapter.py +23 -23
- mcli/workflow/model_service/pdf_processor.py +21 -26
- mcli/workflow/model_service/test_efficient_runner.py +12 -16
- mcli/workflow/model_service/test_example.py +11 -13
- mcli/workflow/model_service/test_integration.py +3 -5
- mcli/workflow/model_service/test_new_features.py +7 -8
- mcli/workflow/notebook/converter.py +1 -1
- mcli/workflow/notebook/notebook_cmd.py +5 -6
- mcli/workflow/notebook/schema.py +0 -1
- mcli/workflow/notebook/validator.py +7 -3
- mcli/workflow/openai/openai.py +1 -2
- mcli/workflow/registry/registry.py +4 -1
- mcli/workflow/repo/repo.py +6 -7
- mcli/workflow/scheduler/cron_parser.py +16 -19
- mcli/workflow/scheduler/job.py +10 -10
- mcli/workflow/scheduler/monitor.py +15 -15
- mcli/workflow/scheduler/persistence.py +17 -18
- mcli/workflow/scheduler/scheduler.py +37 -38
- mcli/workflow/secrets/__init__.py +1 -1
- mcli/workflow/sync/test_cmd.py +0 -1
- mcli/workflow/wakatime/__init__.py +5 -9
- mcli/workflow/wakatime/wakatime.py +1 -2
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/METADATA +1 -1
- mcli_framework-7.12.3.dist-info/RECORD +279 -0
- mcli_framework-7.12.1.dist-info/RECORD +0 -279
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/WHEEL +0 -0
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.12.1.dist-info → mcli_framework-7.12.3.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,15 @@
|
|
|
1
|
-
"""A/B Testing framework for ML model experiments"""
|
|
1
|
+
"""A/B Testing framework for ML model experiments."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import hashlib
|
|
5
4
|
import json
|
|
6
5
|
import logging
|
|
7
6
|
import random
|
|
8
7
|
import uuid
|
|
9
8
|
from dataclasses import asdict, dataclass, field
|
|
10
|
-
from datetime import datetime
|
|
9
|
+
from datetime import datetime
|
|
11
10
|
from enum import Enum
|
|
12
11
|
from pathlib import Path
|
|
13
|
-
from typing import Any,
|
|
12
|
+
from typing import Any, Dict, List, Optional, Union
|
|
14
13
|
|
|
15
14
|
import numpy as np
|
|
16
15
|
import pandas as pd
|
|
@@ -34,7 +33,7 @@ class VariantType(Enum):
|
|
|
34
33
|
|
|
35
34
|
@dataclass
|
|
36
35
|
class Variant:
|
|
37
|
-
"""A/B test variant configuration"""
|
|
36
|
+
"""A/B test variant configuration."""
|
|
38
37
|
|
|
39
38
|
id: str
|
|
40
39
|
name: str
|
|
@@ -47,7 +46,7 @@ class Variant:
|
|
|
47
46
|
|
|
48
47
|
@dataclass
|
|
49
48
|
class Metric:
|
|
50
|
-
"""A/B test metric definition"""
|
|
49
|
+
"""A/B test metric definition."""
|
|
51
50
|
|
|
52
51
|
name: str
|
|
53
52
|
type: str # "binary", "continuous", "count"
|
|
@@ -60,7 +59,7 @@ class Metric:
|
|
|
60
59
|
|
|
61
60
|
@dataclass
|
|
62
61
|
class ExperimentConfig:
|
|
63
|
-
"""A/B test experiment configuration"""
|
|
62
|
+
"""A/B test experiment configuration."""
|
|
64
63
|
|
|
65
64
|
id: str
|
|
66
65
|
name: str
|
|
@@ -93,7 +92,7 @@ class ExperimentConfig:
|
|
|
93
92
|
|
|
94
93
|
@dataclass
|
|
95
94
|
class UserAssignment:
|
|
96
|
-
"""User assignment to experiment variant"""
|
|
95
|
+
"""User assignment to experiment variant."""
|
|
97
96
|
|
|
98
97
|
user_id: str
|
|
99
98
|
experiment_id: str
|
|
@@ -104,7 +103,7 @@ class UserAssignment:
|
|
|
104
103
|
|
|
105
104
|
@dataclass
|
|
106
105
|
class ExperimentResult:
|
|
107
|
-
"""Results of an A/B test experiment"""
|
|
106
|
+
"""Results of an A/B test experiment."""
|
|
108
107
|
|
|
109
108
|
experiment_id: str
|
|
110
109
|
variant_results: Dict[str, Dict[str, Any]]
|
|
@@ -121,13 +120,13 @@ class ExperimentResult:
|
|
|
121
120
|
|
|
122
121
|
|
|
123
122
|
class TrafficSplitter:
|
|
124
|
-
"""Handle traffic splitting for A/B tests"""
|
|
123
|
+
"""Handle traffic splitting for A/B tests."""
|
|
125
124
|
|
|
126
125
|
def __init__(self):
|
|
127
126
|
self.assignments = {}
|
|
128
127
|
|
|
129
128
|
def assign_variant(self, user_id: str, experiment: ExperimentConfig) -> str:
|
|
130
|
-
"""Assign user to experiment variant"""
|
|
129
|
+
"""Assign user to experiment variant."""
|
|
131
130
|
# Check if user already assigned
|
|
132
131
|
cache_key = f"{user_id}:{experiment.id}"
|
|
133
132
|
if cache_key in self.assignments:
|
|
@@ -159,13 +158,13 @@ class TrafficSplitter:
|
|
|
159
158
|
return control_variant.id
|
|
160
159
|
|
|
161
160
|
def get_assignment(self, user_id: str, experiment_id: str) -> Optional[str]:
|
|
162
|
-
"""Get existing assignment"""
|
|
161
|
+
"""Get existing assignment."""
|
|
163
162
|
cache_key = f"{user_id}:{experiment_id}"
|
|
164
163
|
return self.assignments.get(cache_key)
|
|
165
164
|
|
|
166
165
|
|
|
167
166
|
class MetricsCollector:
|
|
168
|
-
"""Collect and store experiment metrics"""
|
|
167
|
+
"""Collect and store experiment metrics."""
|
|
169
168
|
|
|
170
169
|
def __init__(self, storage_path: Path = Path("experiments/metrics")):
|
|
171
170
|
self.storage_path = storage_path
|
|
@@ -181,7 +180,7 @@ class MetricsCollector:
|
|
|
181
180
|
value: Union[float, int, bool],
|
|
182
181
|
timestamp: Optional[datetime] = None,
|
|
183
182
|
):
|
|
184
|
-
"""Record a metric value for a user"""
|
|
183
|
+
"""Record a metric value for a user."""
|
|
185
184
|
if timestamp is None:
|
|
186
185
|
timestamp = datetime.now()
|
|
187
186
|
|
|
@@ -201,7 +200,7 @@ class MetricsCollector:
|
|
|
201
200
|
self.flush_metrics()
|
|
202
201
|
|
|
203
202
|
def flush_metrics(self):
|
|
204
|
-
"""Flush metrics buffer to storage"""
|
|
203
|
+
"""Flush metrics buffer to storage."""
|
|
205
204
|
if not self.metrics_buffer:
|
|
206
205
|
return
|
|
207
206
|
|
|
@@ -215,7 +214,7 @@ class MetricsCollector:
|
|
|
215
214
|
self.metrics_buffer.clear()
|
|
216
215
|
|
|
217
216
|
def get_experiment_metrics(self, experiment_id: str) -> pd.DataFrame:
|
|
218
|
-
"""Get all metrics for an experiment"""
|
|
217
|
+
"""Get all metrics for an experiment."""
|
|
219
218
|
all_metrics = []
|
|
220
219
|
|
|
221
220
|
# Load from all metric files
|
|
@@ -234,7 +233,7 @@ class MetricsCollector:
|
|
|
234
233
|
|
|
235
234
|
|
|
236
235
|
class StatisticalAnalyzer:
|
|
237
|
-
"""Perform statistical analysis on A/B test results"""
|
|
236
|
+
"""Perform statistical analysis on A/B test results."""
|
|
238
237
|
|
|
239
238
|
def __init__(self, significance_level: float = 0.05):
|
|
240
239
|
self.significance_level = significance_level
|
|
@@ -242,7 +241,7 @@ class StatisticalAnalyzer:
|
|
|
242
241
|
def analyze_experiment(
|
|
243
242
|
self, experiment: ExperimentConfig, metrics_df: pd.DataFrame
|
|
244
243
|
) -> ExperimentResult:
|
|
245
|
-
"""Analyze experiment results"""
|
|
244
|
+
"""Analyze experiment results."""
|
|
246
245
|
if metrics_df.empty:
|
|
247
246
|
return self._empty_result(experiment.id)
|
|
248
247
|
|
|
@@ -298,7 +297,7 @@ class StatisticalAnalyzer:
|
|
|
298
297
|
def _analyze_variant_metrics(
|
|
299
298
|
self, variant_df: pd.DataFrame, metrics_config: List[Metric]
|
|
300
299
|
) -> Dict[str, Any]:
|
|
301
|
-
"""Analyze metrics for a single variant"""
|
|
300
|
+
"""Analyze metrics for a single variant."""
|
|
302
301
|
if variant_df.empty:
|
|
303
302
|
return {}
|
|
304
303
|
|
|
@@ -341,7 +340,7 @@ class StatisticalAnalyzer:
|
|
|
341
340
|
treatment_id: str,
|
|
342
341
|
metrics_config: List[Metric],
|
|
343
342
|
) -> tuple:
|
|
344
|
-
"""Compare treatment variant against control"""
|
|
343
|
+
"""Compare treatment variant against control."""
|
|
345
344
|
tests = {}
|
|
346
345
|
intervals = {}
|
|
347
346
|
|
|
@@ -379,7 +378,7 @@ class StatisticalAnalyzer:
|
|
|
379
378
|
return tests, intervals
|
|
380
379
|
|
|
381
380
|
def _binary_test(self, control: pd.Series, treatment: pd.Series) -> Dict[str, Any]:
|
|
382
|
-
"""Perform statistical test for binary metric"""
|
|
381
|
+
"""Perform statistical test for binary metric."""
|
|
383
382
|
control_success = control.sum()
|
|
384
383
|
control_total = len(control)
|
|
385
384
|
treatment_success = treatment.sum()
|
|
@@ -409,7 +408,7 @@ class StatisticalAnalyzer:
|
|
|
409
408
|
}
|
|
410
409
|
|
|
411
410
|
def _continuous_test(self, control: pd.Series, treatment: pd.Series) -> Dict[str, Any]:
|
|
412
|
-
"""Perform statistical test for continuous metric"""
|
|
411
|
+
"""Perform statistical test for continuous metric."""
|
|
413
412
|
# Two-sample t-test
|
|
414
413
|
statistic, p_value = stats.ttest_ind(treatment, control)
|
|
415
414
|
|
|
@@ -435,7 +434,7 @@ class StatisticalAnalyzer:
|
|
|
435
434
|
}
|
|
436
435
|
|
|
437
436
|
def _count_test(self, control: pd.Series, treatment: pd.Series) -> Dict[str, Any]:
|
|
438
|
-
"""Perform statistical test for count metric"""
|
|
437
|
+
"""Perform statistical test for count metric."""
|
|
439
438
|
# Poisson test (approximated with normal for large samples)
|
|
440
439
|
control_sum = control.sum()
|
|
441
440
|
treatment_sum = treatment.sum()
|
|
@@ -461,7 +460,7 @@ class StatisticalAnalyzer:
|
|
|
461
460
|
}
|
|
462
461
|
|
|
463
462
|
def _binary_confidence_interval(self, data: pd.Series, confidence: float = 0.95) -> tuple:
|
|
464
|
-
"""Calculate confidence interval for binary metric"""
|
|
463
|
+
"""Calculate confidence interval for binary metric."""
|
|
465
464
|
n = len(data)
|
|
466
465
|
p = data.mean()
|
|
467
466
|
z = stats.norm.ppf(1 - (1 - confidence) / 2)
|
|
@@ -469,7 +468,7 @@ class StatisticalAnalyzer:
|
|
|
469
468
|
return (max(0, p - margin), min(1, p + margin))
|
|
470
469
|
|
|
471
470
|
def _continuous_confidence_interval(self, data: pd.Series, confidence: float = 0.95) -> tuple:
|
|
472
|
-
"""Calculate confidence interval for continuous metric"""
|
|
471
|
+
"""Calculate confidence interval for continuous metric."""
|
|
473
472
|
n = len(data)
|
|
474
473
|
mean = data.mean()
|
|
475
474
|
sem = data.std() / np.sqrt(n) if n > 0 else 0
|
|
@@ -478,7 +477,7 @@ class StatisticalAnalyzer:
|
|
|
478
477
|
return (mean - margin, mean + margin)
|
|
479
478
|
|
|
480
479
|
def _binary_effect_interval(self, control: pd.Series, treatment: pd.Series) -> tuple:
|
|
481
|
-
"""Calculate confidence interval for binary effect size"""
|
|
480
|
+
"""Calculate confidence interval for binary effect size."""
|
|
482
481
|
p1 = control.mean()
|
|
483
482
|
p2 = treatment.mean()
|
|
484
483
|
n1 = len(control)
|
|
@@ -492,7 +491,7 @@ class StatisticalAnalyzer:
|
|
|
492
491
|
return (diff - margin, diff + margin)
|
|
493
492
|
|
|
494
493
|
def _continuous_effect_interval(self, control: pd.Series, treatment: pd.Series) -> tuple:
|
|
495
|
-
"""Calculate confidence interval for continuous effect size"""
|
|
494
|
+
"""Calculate confidence interval for continuous effect size."""
|
|
496
495
|
diff = treatment.mean() - control.mean()
|
|
497
496
|
n1 = len(control)
|
|
498
497
|
n2 = len(treatment)
|
|
@@ -510,10 +509,10 @@ class StatisticalAnalyzer:
|
|
|
510
509
|
def _generate_recommendations(
|
|
511
510
|
self, variant_data: Dict, statistical_tests: Dict, metrics_config: List[Metric]
|
|
512
511
|
) -> List[str]:
|
|
513
|
-
"""Generate recommendations based on results"""
|
|
512
|
+
"""Generate recommendations based on results."""
|
|
514
513
|
recommendations = []
|
|
515
514
|
|
|
516
|
-
|
|
515
|
+
[m for m in metrics_config if m.primary]
|
|
517
516
|
|
|
518
517
|
for variant_id, tests in statistical_tests.items():
|
|
519
518
|
significant_improvements = []
|
|
@@ -559,7 +558,7 @@ class StatisticalAnalyzer:
|
|
|
559
558
|
def _determine_winner(
|
|
560
559
|
self, statistical_tests: Dict, metrics_config: List[Metric]
|
|
561
560
|
) -> Optional[str]:
|
|
562
|
-
"""Determine winning variant based on primary metrics"""
|
|
561
|
+
"""Determine winning variant based on primary metrics."""
|
|
563
562
|
primary_metrics = [m for m in metrics_config if m.primary]
|
|
564
563
|
|
|
565
564
|
if not primary_metrics:
|
|
@@ -575,7 +574,7 @@ class StatisticalAnalyzer:
|
|
|
575
574
|
if test and test.get("significant", False):
|
|
576
575
|
effect_size = test.get("effect_size", 0)
|
|
577
576
|
|
|
578
|
-
if metric.goal == "increase" and effect_size > 0:
|
|
577
|
+
if metric.goal == "increase" and effect_size > 0: # noqa: SIM114
|
|
579
578
|
score += 1
|
|
580
579
|
elif metric.goal == "decrease" and effect_size < 0:
|
|
581
580
|
score += 1
|
|
@@ -591,7 +590,7 @@ class StatisticalAnalyzer:
|
|
|
591
590
|
return None
|
|
592
591
|
|
|
593
592
|
def _empty_result(self, experiment_id: str) -> ExperimentResult:
|
|
594
|
-
"""Return empty result for experiments with no data"""
|
|
593
|
+
"""Return empty result for experiments with no data."""
|
|
595
594
|
return ExperimentResult(
|
|
596
595
|
experiment_id=experiment_id,
|
|
597
596
|
variant_results={},
|
|
@@ -603,7 +602,7 @@ class StatisticalAnalyzer:
|
|
|
603
602
|
|
|
604
603
|
|
|
605
604
|
class ABTestingFramework:
|
|
606
|
-
"""Main A/B testing framework orchestrator"""
|
|
605
|
+
"""Main A/B testing framework orchestrator."""
|
|
607
606
|
|
|
608
607
|
def __init__(self, storage_path: Path = Path("experiments")):
|
|
609
608
|
self.storage_path = storage_path
|
|
@@ -617,7 +616,7 @@ class ABTestingFramework:
|
|
|
617
616
|
self.load_experiments()
|
|
618
617
|
|
|
619
618
|
def create_experiment(self, config: ExperimentConfig) -> str:
|
|
620
|
-
"""Create new A/B test experiment"""
|
|
619
|
+
"""Create new A/B test experiment."""
|
|
621
620
|
# Validate configuration
|
|
622
621
|
self._validate_experiment_config(config)
|
|
623
622
|
|
|
@@ -637,7 +636,7 @@ class ABTestingFramework:
|
|
|
637
636
|
return config.id
|
|
638
637
|
|
|
639
638
|
def start_experiment(self, experiment_id: str):
|
|
640
|
-
"""Start an experiment"""
|
|
639
|
+
"""Start an experiment."""
|
|
641
640
|
if experiment_id not in self.experiments:
|
|
642
641
|
raise ValueError(f"Experiment {experiment_id} not found")
|
|
643
642
|
|
|
@@ -649,7 +648,7 @@ class ABTestingFramework:
|
|
|
649
648
|
logger.info(f"Started experiment: {experiment.name}")
|
|
650
649
|
|
|
651
650
|
def stop_experiment(self, experiment_id: str):
|
|
652
|
-
"""Stop an experiment"""
|
|
651
|
+
"""Stop an experiment."""
|
|
653
652
|
if experiment_id not in self.experiments:
|
|
654
653
|
raise ValueError(f"Experiment {experiment_id} not found")
|
|
655
654
|
|
|
@@ -661,7 +660,7 @@ class ABTestingFramework:
|
|
|
661
660
|
logger.info(f"Stopped experiment: {experiment.name}")
|
|
662
661
|
|
|
663
662
|
def assign_user(self, user_id: str, experiment_id: str) -> str:
|
|
664
|
-
"""Assign user to experiment variant"""
|
|
663
|
+
"""Assign user to experiment variant."""
|
|
665
664
|
if experiment_id not in self.experiments:
|
|
666
665
|
return "control"
|
|
667
666
|
|
|
@@ -683,7 +682,7 @@ class ABTestingFramework:
|
|
|
683
682
|
def record_metric(
|
|
684
683
|
self, user_id: str, experiment_id: str, metric_name: str, value: Union[float, int, bool]
|
|
685
684
|
):
|
|
686
|
-
"""Record metric for user"""
|
|
685
|
+
"""Record metric for user."""
|
|
687
686
|
# Get user's variant assignment
|
|
688
687
|
variant_id = self.traffic_splitter.get_assignment(user_id, experiment_id)
|
|
689
688
|
if not variant_id:
|
|
@@ -693,7 +692,7 @@ class ABTestingFramework:
|
|
|
693
692
|
self.metrics_collector.record_metric(user_id, experiment_id, variant_id, metric_name, value)
|
|
694
693
|
|
|
695
694
|
def analyze_experiment(self, experiment_id: str) -> ExperimentResult:
|
|
696
|
-
"""Analyze experiment results"""
|
|
695
|
+
"""Analyze experiment results."""
|
|
697
696
|
if experiment_id not in self.experiments:
|
|
698
697
|
raise ValueError(f"Experiment {experiment_id} not found")
|
|
699
698
|
|
|
@@ -703,7 +702,7 @@ class ABTestingFramework:
|
|
|
703
702
|
return self.analyzer.analyze_experiment(experiment, metrics_df)
|
|
704
703
|
|
|
705
704
|
def get_experiment_summary(self, experiment_id: str) -> Dict[str, Any]:
|
|
706
|
-
"""Get experiment summary"""
|
|
705
|
+
"""Get experiment summary."""
|
|
707
706
|
if experiment_id not in self.experiments:
|
|
708
707
|
raise ValueError(f"Experiment {experiment_id} not found")
|
|
709
708
|
|
|
@@ -722,7 +721,7 @@ class ABTestingFramework:
|
|
|
722
721
|
return summary
|
|
723
722
|
|
|
724
723
|
def list_experiments(self) -> List[Dict[str, Any]]:
|
|
725
|
-
"""List all experiments"""
|
|
724
|
+
"""List all experiments."""
|
|
726
725
|
return [
|
|
727
726
|
{
|
|
728
727
|
"id": exp.id,
|
|
@@ -737,7 +736,7 @@ class ABTestingFramework:
|
|
|
737
736
|
]
|
|
738
737
|
|
|
739
738
|
def save_experiment(self, experiment: ExperimentConfig):
|
|
740
|
-
"""Save experiment to storage"""
|
|
739
|
+
"""Save experiment to storage."""
|
|
741
740
|
experiment_file = self.storage_path / f"experiment_{experiment.id}.json"
|
|
742
741
|
|
|
743
742
|
# Convert to dict and handle non-serializable types
|
|
@@ -760,7 +759,7 @@ class ABTestingFramework:
|
|
|
760
759
|
json.dump(experiment_dict, f, indent=2)
|
|
761
760
|
|
|
762
761
|
def load_experiments(self):
|
|
763
|
-
"""Load experiments from storage"""
|
|
762
|
+
"""Load experiments from storage."""
|
|
764
763
|
for experiment_file in self.storage_path.glob("experiment_*.json"):
|
|
765
764
|
try:
|
|
766
765
|
with open(experiment_file, "r") as f:
|
|
@@ -774,7 +773,7 @@ class ABTestingFramework:
|
|
|
774
773
|
logger.error(f"Failed to load experiment from {experiment_file}: {e}")
|
|
775
774
|
|
|
776
775
|
def _dict_to_experiment(self, experiment_dict: Dict) -> ExperimentConfig:
|
|
777
|
-
"""Convert dictionary back to ExperimentConfig"""
|
|
776
|
+
"""Convert dictionary back to ExperimentConfig."""
|
|
778
777
|
# Convert datetime strings back to objects
|
|
779
778
|
if experiment_dict.get("start_date"):
|
|
780
779
|
experiment_dict["start_date"] = datetime.fromisoformat(experiment_dict["start_date"])
|
|
@@ -800,7 +799,7 @@ class ABTestingFramework:
|
|
|
800
799
|
return ExperimentConfig(**experiment_dict)
|
|
801
800
|
|
|
802
801
|
def _validate_experiment_config(self, config: ExperimentConfig):
|
|
803
|
-
"""Validate experiment configuration"""
|
|
802
|
+
"""Validate experiment configuration."""
|
|
804
803
|
# Check traffic percentages sum to 100%
|
|
805
804
|
total_traffic = sum(v.traffic_percentage for v in config.variants)
|
|
806
805
|
if abs(total_traffic - 100.0) > 0.01:
|
|
@@ -886,7 +885,7 @@ if __name__ == "__main__":
|
|
|
886
885
|
# Analyze results
|
|
887
886
|
results = framework.analyze_experiment(experiment_id)
|
|
888
887
|
|
|
889
|
-
print(
|
|
888
|
+
print("Experiment Results:")
|
|
890
889
|
print(f"Total Users: {results.total_users}")
|
|
891
890
|
print(f"Statistical Significance: {results.statistical_significance}")
|
|
892
891
|
print(f"Winner: {results.winner_variant}")
|
mcli/ml/features/__init__.py
CHANGED
|
@@ -1,16 +1,13 @@
|
|
|
1
|
-
"""Ensemble feature engineering and feature interaction systems"""
|
|
1
|
+
"""Ensemble feature engineering and feature interaction systems."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
import warnings
|
|
5
4
|
from dataclasses import dataclass
|
|
6
|
-
from datetime import datetime, timedelta
|
|
7
5
|
from itertools import combinations
|
|
8
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
9
7
|
|
|
10
8
|
import numpy as np
|
|
11
9
|
import pandas as pd
|
|
12
10
|
from sklearn.cluster import KMeans
|
|
13
|
-
from sklearn.decomposition import PCA
|
|
14
11
|
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression
|
|
15
12
|
from sklearn.preprocessing import PolynomialFeatures
|
|
16
13
|
|
|
@@ -19,7 +16,7 @@ logger = logging.getLogger(__name__)
|
|
|
19
16
|
|
|
20
17
|
@dataclass
|
|
21
18
|
class EnsembleFeatureConfig:
|
|
22
|
-
"""Configuration for ensemble feature engineering"""
|
|
19
|
+
"""Configuration for ensemble feature engineering."""
|
|
23
20
|
|
|
24
21
|
# Feature interaction settings
|
|
25
22
|
max_interaction_degree: int = 2
|
|
@@ -62,7 +59,7 @@ class EnsembleFeatureConfig:
|
|
|
62
59
|
|
|
63
60
|
|
|
64
61
|
class EnsembleFeatureBuilder:
|
|
65
|
-
"""Builds comprehensive feature sets for ensemble models"""
|
|
62
|
+
"""Builds comprehensive feature sets for ensemble models."""
|
|
66
63
|
|
|
67
64
|
def __init__(self, config: Optional[EnsembleFeatureConfig] = None):
|
|
68
65
|
self.config = config or EnsembleFeatureConfig()
|
|
@@ -77,7 +74,7 @@ class EnsembleFeatureBuilder:
|
|
|
77
74
|
include_clustering: bool = True,
|
|
78
75
|
include_rolling: bool = True,
|
|
79
76
|
) -> pd.DataFrame:
|
|
80
|
-
"""Build comprehensive feature set for ensemble models"""
|
|
77
|
+
"""Build comprehensive feature set for ensemble models."""
|
|
81
78
|
|
|
82
79
|
logger.info("Building ensemble features")
|
|
83
80
|
df = base_features.copy()
|
|
@@ -114,7 +111,7 @@ class EnsembleFeatureBuilder:
|
|
|
114
111
|
return df
|
|
115
112
|
|
|
116
113
|
def _get_numerical_features(self, df: pd.DataFrame) -> List[str]:
|
|
117
|
-
"""Get list of numerical feature columns"""
|
|
114
|
+
"""Get list of numerical feature columns."""
|
|
118
115
|
numerical_features = []
|
|
119
116
|
for col in df.columns:
|
|
120
117
|
if (
|
|
@@ -130,17 +127,15 @@ class EnsembleFeatureBuilder:
|
|
|
130
127
|
def _add_rolling_features(
|
|
131
128
|
self, df: pd.DataFrame, numerical_features: List[str]
|
|
132
129
|
) -> pd.DataFrame:
|
|
133
|
-
"""Add rolling window aggregation features"""
|
|
130
|
+
"""Add rolling window aggregation features."""
|
|
134
131
|
logger.info("Adding rolling aggregation features")
|
|
135
132
|
|
|
136
133
|
# Ensure we have date column for time-based rolling
|
|
137
134
|
if "transaction_date_dt" not in df.columns:
|
|
138
135
|
# Create synthetic time index if no date column
|
|
139
136
|
df["synthetic_time_index"] = range(len(df))
|
|
140
|
-
time_col = "synthetic_time_index"
|
|
141
137
|
else:
|
|
142
138
|
df = df.sort_values("transaction_date_dt")
|
|
143
|
-
time_col = "transaction_date_dt"
|
|
144
139
|
|
|
145
140
|
# Select top features for rolling (avoid too many features)
|
|
146
141
|
features_for_rolling = numerical_features[:20]
|
|
@@ -182,7 +177,7 @@ class EnsembleFeatureBuilder:
|
|
|
182
177
|
def _add_interaction_features(
|
|
183
178
|
self, df: pd.DataFrame, numerical_features: List[str], target_column: Optional[str]
|
|
184
179
|
) -> pd.DataFrame:
|
|
185
|
-
"""Add feature interaction terms"""
|
|
180
|
+
"""Add feature interaction terms."""
|
|
186
181
|
logger.info("Adding feature interaction terms")
|
|
187
182
|
|
|
188
183
|
# Limit features to avoid combinatorial explosion
|
|
@@ -194,7 +189,7 @@ class EnsembleFeatureBuilder:
|
|
|
194
189
|
try:
|
|
195
190
|
corr = abs(df[feature].corr(df[target_column]))
|
|
196
191
|
feature_scores.append((feature, corr))
|
|
197
|
-
except:
|
|
192
|
+
except Exception:
|
|
198
193
|
feature_scores.append((feature, 0))
|
|
199
194
|
|
|
200
195
|
feature_scores.sort(key=lambda x: x[1], reverse=True)
|
|
@@ -208,7 +203,7 @@ class EnsembleFeatureBuilder:
|
|
|
208
203
|
try:
|
|
209
204
|
var = df[feature].var()
|
|
210
205
|
feature_vars.append((feature, var))
|
|
211
|
-
except:
|
|
206
|
+
except Exception:
|
|
212
207
|
feature_vars.append((feature, 0))
|
|
213
208
|
|
|
214
209
|
feature_vars.sort(key=lambda x: x[1], reverse=True)
|
|
@@ -259,7 +254,7 @@ class EnsembleFeatureBuilder:
|
|
|
259
254
|
def _add_polynomial_features(
|
|
260
255
|
self, df: pd.DataFrame, selected_features: List[str]
|
|
261
256
|
) -> pd.DataFrame:
|
|
262
|
-
"""Add polynomial features for key variables"""
|
|
257
|
+
"""Add polynomial features for key variables."""
|
|
263
258
|
logger.info("Adding polynomial features")
|
|
264
259
|
|
|
265
260
|
# Limit to top features to avoid memory issues
|
|
@@ -297,7 +292,7 @@ class EnsembleFeatureBuilder:
|
|
|
297
292
|
return df
|
|
298
293
|
|
|
299
294
|
def _add_clustering_features(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
300
|
-
"""Add clustering-based features"""
|
|
295
|
+
"""Add clustering-based features."""
|
|
301
296
|
logger.info("Adding clustering features")
|
|
302
297
|
|
|
303
298
|
# Select features for clustering
|
|
@@ -353,7 +348,7 @@ class EnsembleFeatureBuilder:
|
|
|
353
348
|
def _add_statistical_features(
|
|
354
349
|
self, df: pd.DataFrame, numerical_features: List[str]
|
|
355
350
|
) -> pd.DataFrame:
|
|
356
|
-
"""Add statistical transformation features"""
|
|
351
|
+
"""Add statistical transformation features."""
|
|
357
352
|
logger.info("Adding statistical features")
|
|
358
353
|
|
|
359
354
|
# Select subset of features for statistical transforms
|
|
@@ -392,7 +387,7 @@ class EnsembleFeatureBuilder:
|
|
|
392
387
|
return df
|
|
393
388
|
|
|
394
389
|
def _add_rank_features(self, df: pd.DataFrame, numerical_features: List[str]) -> pd.DataFrame:
|
|
395
|
-
"""Add rank-based features"""
|
|
390
|
+
"""Add rank-based features."""
|
|
396
391
|
logger.info("Adding rank features")
|
|
397
392
|
|
|
398
393
|
# Select subset for ranking
|
|
@@ -418,7 +413,7 @@ class EnsembleFeatureBuilder:
|
|
|
418
413
|
|
|
419
414
|
|
|
420
415
|
class FeatureInteractionEngine:
|
|
421
|
-
"""Advanced feature interaction discovery and generation"""
|
|
416
|
+
"""Advanced feature interaction discovery and generation."""
|
|
422
417
|
|
|
423
418
|
def __init__(self, config: Optional[EnsembleFeatureConfig] = None):
|
|
424
419
|
self.config = config or EnsembleFeatureConfig()
|
|
@@ -426,7 +421,7 @@ class FeatureInteractionEngine:
|
|
|
426
421
|
def discover_interactions(
|
|
427
422
|
self, df: pd.DataFrame, target_column: str, max_interactions: int = 50
|
|
428
423
|
) -> List[Tuple[str, str, float]]:
|
|
429
|
-
"""Discover important feature interactions based on target correlation"""
|
|
424
|
+
"""Discover important feature interactions based on target correlation."""
|
|
430
425
|
|
|
431
426
|
numerical_features = self._get_numerical_features(df)
|
|
432
427
|
interactions = []
|
|
@@ -447,7 +442,7 @@ class FeatureInteractionEngine:
|
|
|
447
442
|
if not np.isnan(correlation) and correlation > 0.1:
|
|
448
443
|
interactions.append((feature1, feature2, correlation))
|
|
449
444
|
|
|
450
|
-
except Exception
|
|
445
|
+
except Exception:
|
|
451
446
|
continue
|
|
452
447
|
|
|
453
448
|
# Sort by correlation strength
|
|
@@ -457,7 +452,7 @@ class FeatureInteractionEngine:
|
|
|
457
452
|
return interactions[:max_interactions]
|
|
458
453
|
|
|
459
454
|
def _get_numerical_features(self, df: pd.DataFrame) -> List[str]:
|
|
460
|
-
"""Get numerical features for interaction discovery"""
|
|
455
|
+
"""Get numerical features for interaction discovery."""
|
|
461
456
|
return [
|
|
462
457
|
col
|
|
463
458
|
for col in df.columns
|
|
@@ -469,7 +464,7 @@ class FeatureInteractionEngine:
|
|
|
469
464
|
def generate_advanced_interactions(
|
|
470
465
|
self, df: pd.DataFrame, feature_pairs: List[Tuple[str, str]]
|
|
471
466
|
) -> pd.DataFrame:
|
|
472
|
-
"""Generate advanced interaction terms for discovered feature pairs"""
|
|
467
|
+
"""Generate advanced interaction terms for discovered feature pairs."""
|
|
473
468
|
|
|
474
469
|
df_enhanced = df.copy()
|
|
475
470
|
|
|
@@ -503,7 +498,7 @@ class FeatureInteractionEngine:
|
|
|
503
498
|
|
|
504
499
|
|
|
505
500
|
class DynamicFeatureSelector:
|
|
506
|
-
"""Dynamic feature selection based on multiple criteria"""
|
|
501
|
+
"""Dynamic feature selection based on multiple criteria."""
|
|
507
502
|
|
|
508
503
|
def __init__(self, config: Optional[EnsembleFeatureConfig] = None):
|
|
509
504
|
self.config = config or EnsembleFeatureConfig()
|
|
@@ -514,7 +509,7 @@ class DynamicFeatureSelector:
|
|
|
514
509
|
target_column: str,
|
|
515
510
|
selection_methods: Optional[List[str]] = None,
|
|
516
511
|
) -> Tuple[pd.DataFrame, Dict[str, Any]]:
|
|
517
|
-
"""Select features using multiple criteria"""
|
|
512
|
+
"""Select features using multiple criteria."""
|
|
518
513
|
|
|
519
514
|
if selection_methods is None:
|
|
520
515
|
selection_methods = ["variance", "correlation", "mutual_info"]
|
|
@@ -558,7 +553,7 @@ class DynamicFeatureSelector:
|
|
|
558
553
|
return result_df, selection_info
|
|
559
554
|
|
|
560
555
|
def _apply_selection_method(self, X: pd.DataFrame, y: pd.Series, method: str) -> List[str]:
|
|
561
|
-
"""Apply specific feature selection method"""
|
|
556
|
+
"""Apply specific feature selection method."""
|
|
562
557
|
|
|
563
558
|
try:
|
|
564
559
|
if method == "variance":
|