mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +6 -2
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +68 -57
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +216 -150
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +62 -50
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +31 -16
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
  90. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
  91. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
@@ -6,14 +6,15 @@ import json
6
6
  import logging
7
7
  import random
8
8
  import uuid
9
- from dataclasses import dataclass, field, asdict
9
+ from dataclasses import asdict, dataclass, field
10
10
  from datetime import datetime, timedelta
11
11
  from enum import Enum
12
- from typing import Dict, Any, List, Optional, Union, Callable
13
- import pandas as pd
12
+ from pathlib import Path
13
+ from typing import Any, Callable, Dict, List, Optional, Union
14
+
14
15
  import numpy as np
16
+ import pandas as pd
15
17
  from scipy import stats
16
- from pathlib import Path
17
18
 
18
19
  logger = logging.getLogger(__name__)
19
20
 
@@ -34,6 +35,7 @@ class VariantType(Enum):
34
35
  @dataclass
35
36
  class Variant:
36
37
  """A/B test variant configuration"""
38
+
37
39
  id: str
38
40
  name: str
39
41
  type: VariantType
@@ -46,6 +48,7 @@ class Variant:
46
48
  @dataclass
47
49
  class Metric:
48
50
  """A/B test metric definition"""
51
+
49
52
  name: str
50
53
  type: str # "binary", "continuous", "count"
51
54
  aggregation: str # "mean", "sum", "count", "rate"
@@ -58,6 +61,7 @@ class Metric:
58
61
  @dataclass
59
62
  class ExperimentConfig:
60
63
  """A/B test experiment configuration"""
64
+
61
65
  id: str
62
66
  name: str
63
67
  description: str
@@ -90,6 +94,7 @@ class ExperimentConfig:
90
94
  @dataclass
91
95
  class UserAssignment:
92
96
  """User assignment to experiment variant"""
97
+
93
98
  user_id: str
94
99
  experiment_id: str
95
100
  variant_id: str
@@ -100,6 +105,7 @@ class UserAssignment:
100
105
  @dataclass
101
106
  class ExperimentResult:
102
107
  """Results of an A/B test experiment"""
108
+
103
109
  experiment_id: str
104
110
  variant_results: Dict[str, Dict[str, Any]]
105
111
  statistical_tests: Dict[str, Dict[str, Any]]
@@ -145,7 +151,10 @@ class TrafficSplitter:
145
151
  return variant.id
146
152
 
147
153
  # Default to control
148
- control_variant = next((v for v in experiment.variants if v.type == VariantType.CONTROL), experiment.variants[0])
154
+ control_variant = next(
155
+ (v for v in experiment.variants if v.type == VariantType.CONTROL),
156
+ experiment.variants[0],
157
+ )
149
158
  self.assignments[cache_key] = control_variant.id
150
159
  return control_variant.id
151
160
 
@@ -163,9 +172,15 @@ class MetricsCollector:
163
172
  self.storage_path.mkdir(parents=True, exist_ok=True)
164
173
  self.metrics_buffer = []
165
174
 
166
- def record_metric(self, user_id: str, experiment_id: str, variant_id: str,
167
- metric_name: str, value: Union[float, int, bool],
168
- timestamp: Optional[datetime] = None):
175
+ def record_metric(
176
+ self,
177
+ user_id: str,
178
+ experiment_id: str,
179
+ variant_id: str,
180
+ metric_name: str,
181
+ value: Union[float, int, bool],
182
+ timestamp: Optional[datetime] = None,
183
+ ):
169
184
  """Record a metric value for a user"""
170
185
  if timestamp is None:
171
186
  timestamp = datetime.now()
@@ -176,7 +191,7 @@ class MetricsCollector:
176
191
  "variant_id": variant_id,
177
192
  "metric_name": metric_name,
178
193
  "value": value,
179
- "timestamp": timestamp.isoformat()
194
+ "timestamp": timestamp.isoformat(),
180
195
  }
181
196
 
182
197
  self.metrics_buffer.append(metric_record)
@@ -193,7 +208,7 @@ class MetricsCollector:
193
208
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
194
209
  filename = self.storage_path / f"metrics_{timestamp}.json"
195
210
 
196
- with open(filename, 'w') as f:
211
+ with open(filename, "w") as f:
197
212
  json.dump(self.metrics_buffer, f, indent=2)
198
213
 
199
214
  logger.info(f"Flushed {len(self.metrics_buffer)} metrics to {filename}")
@@ -205,7 +220,7 @@ class MetricsCollector:
205
220
 
206
221
  # Load from all metric files
207
222
  for file_path in self.storage_path.glob("metrics_*.json"):
208
- with open(file_path, 'r') as f:
223
+ with open(file_path, "r") as f:
209
224
  metrics = json.load(f)
210
225
  experiment_metrics = [m for m in metrics if m["experiment_id"] == experiment_id]
211
226
  all_metrics.extend(experiment_metrics)
@@ -214,7 +229,7 @@ class MetricsCollector:
214
229
  return pd.DataFrame()
215
230
 
216
231
  df = pd.DataFrame(all_metrics)
217
- df['timestamp'] = pd.to_datetime(df['timestamp'])
232
+ df["timestamp"] = pd.to_datetime(df["timestamp"])
218
233
  return df
219
234
 
220
235
 
@@ -224,8 +239,9 @@ class StatisticalAnalyzer:
224
239
  def __init__(self, significance_level: float = 0.05):
225
240
  self.significance_level = significance_level
226
241
 
227
- def analyze_experiment(self, experiment: ExperimentConfig,
228
- metrics_df: pd.DataFrame) -> ExperimentResult:
242
+ def analyze_experiment(
243
+ self, experiment: ExperimentConfig, metrics_df: pd.DataFrame
244
+ ) -> ExperimentResult:
229
245
  """Analyze experiment results"""
230
246
  if metrics_df.empty:
231
247
  return self._empty_result(experiment.id)
@@ -233,14 +249,18 @@ class StatisticalAnalyzer:
233
249
  # Group metrics by variant
234
250
  variant_data = {}
235
251
  for variant in experiment.variants:
236
- variant_metrics = metrics_df[metrics_df['variant_id'] == variant.id]
237
- variant_data[variant.id] = self._analyze_variant_metrics(variant_metrics, experiment.metrics)
252
+ variant_metrics = metrics_df[metrics_df["variant_id"] == variant.id]
253
+ variant_data[variant.id] = self._analyze_variant_metrics(
254
+ variant_metrics, experiment.metrics
255
+ )
238
256
 
239
257
  # Perform statistical tests
240
258
  statistical_tests = {}
241
259
  confidence_intervals = {}
242
260
 
243
- control_variant = next((v for v in experiment.variants if v.type == VariantType.CONTROL), None)
261
+ control_variant = next(
262
+ (v for v in experiment.variants if v.type == VariantType.CONTROL), None
263
+ )
244
264
  if control_variant:
245
265
  for variant in experiment.variants:
246
266
  if variant.type == VariantType.TREATMENT:
@@ -265,21 +285,26 @@ class StatisticalAnalyzer:
265
285
  confidence_intervals=confidence_intervals,
266
286
  recommendations=recommendations,
267
287
  created_at=datetime.now(),
268
- total_users=len(metrics_df['user_id'].unique()),
269
- duration_days=(datetime.now() - experiment.start_date).days if experiment.start_date else 0,
270
- statistical_significance=any(test.get('significant', False) for test in statistical_tests.values()),
271
- winner_variant=winner
288
+ total_users=len(metrics_df["user_id"].unique()),
289
+ duration_days=(
290
+ (datetime.now() - experiment.start_date).days if experiment.start_date else 0
291
+ ),
292
+ statistical_significance=any(
293
+ test.get("significant", False) for test in statistical_tests.values()
294
+ ),
295
+ winner_variant=winner,
272
296
  )
273
297
 
274
- def _analyze_variant_metrics(self, variant_df: pd.DataFrame,
275
- metrics_config: List[Metric]) -> Dict[str, Any]:
298
+ def _analyze_variant_metrics(
299
+ self, variant_df: pd.DataFrame, metrics_config: List[Metric]
300
+ ) -> Dict[str, Any]:
276
301
  """Analyze metrics for a single variant"""
277
302
  if variant_df.empty:
278
303
  return {}
279
304
 
280
305
  results = {}
281
306
  for metric in metrics_config:
282
- metric_data = variant_df[variant_df['metric_name'] == metric.name]['value']
307
+ metric_data = variant_df[variant_df["metric_name"] == metric.name]["value"]
283
308
 
284
309
  if metric_data.empty:
285
310
  continue
@@ -289,7 +314,7 @@ class StatisticalAnalyzer:
289
314
  "count": len(metric_data),
290
315
  "success_rate": metric_data.mean(),
291
316
  "std": metric_data.std(),
292
- "confidence_interval": self._binary_confidence_interval(metric_data)
317
+ "confidence_interval": self._binary_confidence_interval(metric_data),
293
318
  }
294
319
  elif metric.type == "continuous":
295
320
  results[metric.name] = {
@@ -297,34 +322,39 @@ class StatisticalAnalyzer:
297
322
  "mean": metric_data.mean(),
298
323
  "std": metric_data.std(),
299
324
  "median": metric_data.median(),
300
- "confidence_interval": self._continuous_confidence_interval(metric_data)
325
+ "confidence_interval": self._continuous_confidence_interval(metric_data),
301
326
  }
302
327
  elif metric.type == "count":
303
328
  results[metric.name] = {
304
329
  "count": len(metric_data),
305
330
  "sum": metric_data.sum(),
306
331
  "mean": metric_data.mean(),
307
- "rate_per_user": metric_data.sum() / len(variant_df['user_id'].unique())
332
+ "rate_per_user": metric_data.sum() / len(variant_df["user_id"].unique()),
308
333
  }
309
334
 
310
335
  return results
311
336
 
312
- def _compare_variants(self, metrics_df: pd.DataFrame, control_id: str,
313
- treatment_id: str, metrics_config: List[Metric]) -> tuple:
337
+ def _compare_variants(
338
+ self,
339
+ metrics_df: pd.DataFrame,
340
+ control_id: str,
341
+ treatment_id: str,
342
+ metrics_config: List[Metric],
343
+ ) -> tuple:
314
344
  """Compare treatment variant against control"""
315
345
  tests = {}
316
346
  intervals = {}
317
347
 
318
348
  for metric in metrics_config:
319
349
  control_data = metrics_df[
320
- (metrics_df['variant_id'] == control_id) &
321
- (metrics_df['metric_name'] == metric.name)
322
- ]['value']
350
+ (metrics_df["variant_id"] == control_id)
351
+ & (metrics_df["metric_name"] == metric.name)
352
+ ]["value"]
323
353
 
324
354
  treatment_data = metrics_df[
325
- (metrics_df['variant_id'] == treatment_id) &
326
- (metrics_df['metric_name'] == metric.name)
327
- ]['value']
355
+ (metrics_df["variant_id"] == treatment_id)
356
+ & (metrics_df["metric_name"] == metric.name)
357
+ ]["value"]
328
358
 
329
359
  if control_data.empty or treatment_data.empty:
330
360
  continue
@@ -342,7 +372,9 @@ class StatisticalAnalyzer:
342
372
  if metric.type == "binary":
343
373
  intervals[metric.name] = self._binary_effect_interval(control_data, treatment_data)
344
374
  else:
345
- intervals[metric.name] = self._continuous_effect_interval(control_data, treatment_data)
375
+ intervals[metric.name] = self._continuous_effect_interval(
376
+ control_data, treatment_data
377
+ )
346
378
 
347
379
  return tests, intervals
348
380
 
@@ -354,8 +386,10 @@ class StatisticalAnalyzer:
354
386
  treatment_total = len(treatment)
355
387
 
356
388
  # Chi-square test
357
- observed = [[control_success, control_total - control_success],
358
- [treatment_success, treatment_total - treatment_success]]
389
+ observed = [
390
+ [control_success, control_total - control_success],
391
+ [treatment_success, treatment_total - treatment_success],
392
+ ]
359
393
 
360
394
  chi2, p_value, _, _ = stats.chi2_contingency(observed)
361
395
 
@@ -371,7 +405,7 @@ class StatisticalAnalyzer:
371
405
  "significant": p_value < self.significance_level,
372
406
  "effect_size": effect_size,
373
407
  "control_rate": control_rate,
374
- "treatment_rate": treatment_rate
408
+ "treatment_rate": treatment_rate,
375
409
  }
376
410
 
377
411
  def _continuous_test(self, control: pd.Series, treatment: pd.Series) -> Dict[str, Any]:
@@ -380,9 +414,10 @@ class StatisticalAnalyzer:
380
414
  statistic, p_value = stats.ttest_ind(treatment, control)
381
415
 
382
416
  # Effect size (Cohen's d)
383
- pooled_std = np.sqrt(((len(control) - 1) * control.std()**2 +
384
- (len(treatment) - 1) * treatment.std()**2) /
385
- (len(control) + len(treatment) - 2))
417
+ pooled_std = np.sqrt(
418
+ ((len(control) - 1) * control.std() ** 2 + (len(treatment) - 1) * treatment.std() ** 2)
419
+ / (len(control) + len(treatment) - 2)
420
+ )
386
421
 
387
422
  cohens_d = (treatment.mean() - control.mean()) / pooled_std if pooled_std > 0 else 0
388
423
 
@@ -394,7 +429,9 @@ class StatisticalAnalyzer:
394
429
  "effect_size": cohens_d,
395
430
  "control_mean": control.mean(),
396
431
  "treatment_mean": treatment.mean(),
397
- "relative_change": (treatment.mean() - control.mean()) / control.mean() if control.mean() != 0 else 0
432
+ "relative_change": (
433
+ (treatment.mean() - control.mean()) / control.mean() if control.mean() != 0 else 0
434
+ ),
398
435
  }
399
436
 
400
437
  def _count_test(self, control: pd.Series, treatment: pd.Series) -> Dict[str, Any]:
@@ -420,7 +457,7 @@ class StatisticalAnalyzer:
420
457
  "significant": p_value < self.significance_level,
421
458
  "control_rate": control_rate,
422
459
  "treatment_rate": treatment_rate,
423
- "rate_ratio": treatment_rate / control_rate if control_rate > 0 else float('inf')
460
+ "rate_ratio": treatment_rate / control_rate if control_rate > 0 else float("inf"),
424
461
  }
425
462
 
426
463
  def _binary_confidence_interval(self, data: pd.Series, confidence: float = 0.95) -> tuple:
@@ -462,7 +499,7 @@ class StatisticalAnalyzer:
462
499
 
463
500
  if n1 > 1 and n2 > 1:
464
501
  pooled_var = ((n1 - 1) * control.var() + (n2 - 1) * treatment.var()) / (n1 + n2 - 2)
465
- se = np.sqrt(pooled_var * (1/n1 + 1/n2))
502
+ se = np.sqrt(pooled_var * (1 / n1 + 1 / n2))
466
503
  t_value = stats.t.ppf(0.975, n1 + n2 - 2)
467
504
  margin = t_value * se
468
505
  else:
@@ -470,8 +507,9 @@ class StatisticalAnalyzer:
470
507
 
471
508
  return (diff - margin, diff + margin)
472
509
 
473
- def _generate_recommendations(self, variant_data: Dict, statistical_tests: Dict,
474
- metrics_config: List[Metric]) -> List[str]:
510
+ def _generate_recommendations(
511
+ self, variant_data: Dict, statistical_tests: Dict, metrics_config: List[Metric]
512
+ ) -> List[str]:
475
513
  """Generate recommendations based on results"""
476
514
  recommendations = []
477
515
 
@@ -482,17 +520,17 @@ class StatisticalAnalyzer:
482
520
  significant_degradations = []
483
521
 
484
522
  for metric_name, test in tests.items():
485
- if test.get('significant', False):
523
+ if test.get("significant", False):
486
524
  metric_config = next((m for m in metrics_config if m.name == metric_name), None)
487
525
 
488
526
  if metric_config:
489
527
  if metric_config.goal == "increase":
490
- if test.get('effect_size', 0) > 0:
528
+ if test.get("effect_size", 0) > 0:
491
529
  significant_improvements.append(metric_name)
492
530
  else:
493
531
  significant_degradations.append(metric_name)
494
532
  elif metric_config.goal == "decrease":
495
- if test.get('effect_size', 0) < 0:
533
+ if test.get("effect_size", 0) < 0:
496
534
  significant_improvements.append(metric_name)
497
535
  else:
498
536
  significant_degradations.append(metric_name)
@@ -507,12 +545,20 @@ class StatisticalAnalyzer:
507
545
  f"Variant {variant_id} shows significant degradation in: {', '.join(significant_degradations)}"
508
546
  )
509
547
 
510
- if not any(test.get('significant', False) for tests in statistical_tests.values() for test in tests.values()):
511
- recommendations.append("No statistically significant differences detected. Consider running experiment longer.")
548
+ if not any(
549
+ test.get("significant", False)
550
+ for tests in statistical_tests.values()
551
+ for test in tests.values()
552
+ ):
553
+ recommendations.append(
554
+ "No statistically significant differences detected. Consider running experiment longer."
555
+ )
512
556
 
513
557
  return recommendations
514
558
 
515
- def _determine_winner(self, statistical_tests: Dict, metrics_config: List[Metric]) -> Optional[str]:
559
+ def _determine_winner(
560
+ self, statistical_tests: Dict, metrics_config: List[Metric]
561
+ ) -> Optional[str]:
516
562
  """Determine winning variant based on primary metrics"""
517
563
  primary_metrics = [m for m in metrics_config if m.primary]
518
564
 
@@ -526,8 +572,8 @@ class StatisticalAnalyzer:
526
572
 
527
573
  for metric in primary_metrics:
528
574
  test = tests.get(metric.name)
529
- if test and test.get('significant', False):
530
- effect_size = test.get('effect_size', 0)
575
+ if test and test.get("significant", False):
576
+ effect_size = test.get("effect_size", 0)
531
577
 
532
578
  if metric.goal == "increase" and effect_size > 0:
533
579
  score += 1
@@ -552,7 +598,7 @@ class StatisticalAnalyzer:
552
598
  statistical_tests={},
553
599
  confidence_intervals={},
554
600
  recommendations=["No data available for analysis"],
555
- created_at=datetime.now()
601
+ created_at=datetime.now(),
556
602
  )
557
603
 
558
604
 
@@ -634,8 +680,9 @@ class ABTestingFramework:
634
680
 
635
681
  return self.traffic_splitter.assign_variant(user_id, experiment)
636
682
 
637
- def record_metric(self, user_id: str, experiment_id: str, metric_name: str,
638
- value: Union[float, int, bool]):
683
+ def record_metric(
684
+ self, user_id: str, experiment_id: str, metric_name: str, value: Union[float, int, bool]
685
+ ):
639
686
  """Record metric for user"""
640
687
  # Get user's variant assignment
641
688
  variant_id = self.traffic_splitter.get_assignment(user_id, experiment_id)
@@ -643,9 +690,7 @@ class ABTestingFramework:
643
690
  variant_id = self.assign_user(user_id, experiment_id)
644
691
 
645
692
  # Record metric
646
- self.metrics_collector.record_metric(
647
- user_id, experiment_id, variant_id, metric_name, value
648
- )
693
+ self.metrics_collector.record_metric(user_id, experiment_id, variant_id, metric_name, value)
649
694
 
650
695
  def analyze_experiment(self, experiment_id: str) -> ExperimentResult:
651
696
  """Analyze experiment results"""
@@ -667,9 +712,11 @@ class ABTestingFramework:
667
712
 
668
713
  summary = {
669
714
  "experiment": asdict(experiment),
670
- "total_users": len(metrics_df['user_id'].unique()) if not metrics_df.empty else 0,
715
+ "total_users": len(metrics_df["user_id"].unique()) if not metrics_df.empty else 0,
671
716
  "total_events": len(metrics_df) if not metrics_df.empty else 0,
672
- "variant_distribution": metrics_df['variant_id'].value_counts().to_dict() if not metrics_df.empty else {}
717
+ "variant_distribution": (
718
+ metrics_df["variant_id"].value_counts().to_dict() if not metrics_df.empty else {}
719
+ ),
673
720
  }
674
721
 
675
722
  return summary
@@ -684,7 +731,7 @@ class ABTestingFramework:
684
731
  "start_date": exp.start_date.isoformat() if exp.start_date else None,
685
732
  "end_date": exp.end_date.isoformat() if exp.end_date else None,
686
733
  "variants": len(exp.variants),
687
- "metrics": len(exp.metrics)
734
+ "metrics": len(exp.metrics),
688
735
  }
689
736
  for exp in self.experiments.values()
690
737
  ]
@@ -697,24 +744,26 @@ class ABTestingFramework:
697
744
  experiment_dict = asdict(experiment)
698
745
 
699
746
  # Convert datetime objects to ISO strings
700
- if experiment_dict.get('start_date'):
701
- experiment_dict['start_date'] = experiment.start_date.isoformat()
702
- if experiment_dict.get('end_date'):
703
- experiment_dict['end_date'] = experiment.end_date.isoformat()
747
+ if experiment_dict.get("start_date"):
748
+ experiment_dict["start_date"] = experiment.start_date.isoformat()
749
+ if experiment_dict.get("end_date"):
750
+ experiment_dict["end_date"] = experiment.end_date.isoformat()
704
751
 
705
752
  # Convert enums to strings
706
- experiment_dict['status'] = experiment.status.value
707
- for variant in experiment_dict['variants']:
708
- variant['type'] = variant['type'].value if hasattr(variant['type'], 'value') else variant['type']
753
+ experiment_dict["status"] = experiment.status.value
754
+ for variant in experiment_dict["variants"]:
755
+ variant["type"] = (
756
+ variant["type"].value if hasattr(variant["type"], "value") else variant["type"]
757
+ )
709
758
 
710
- with open(experiment_file, 'w') as f:
759
+ with open(experiment_file, "w") as f:
711
760
  json.dump(experiment_dict, f, indent=2)
712
761
 
713
762
  def load_experiments(self):
714
763
  """Load experiments from storage"""
715
764
  for experiment_file in self.storage_path.glob("experiment_*.json"):
716
765
  try:
717
- with open(experiment_file, 'r') as f:
766
+ with open(experiment_file, "r") as f:
718
767
  experiment_dict = json.load(f)
719
768
 
720
769
  # Convert back from dict to objects
@@ -727,26 +776,26 @@ class ABTestingFramework:
727
776
  def _dict_to_experiment(self, experiment_dict: Dict) -> ExperimentConfig:
728
777
  """Convert dictionary back to ExperimentConfig"""
729
778
  # Convert datetime strings back to objects
730
- if experiment_dict.get('start_date'):
731
- experiment_dict['start_date'] = datetime.fromisoformat(experiment_dict['start_date'])
732
- if experiment_dict.get('end_date'):
733
- experiment_dict['end_date'] = datetime.fromisoformat(experiment_dict['end_date'])
779
+ if experiment_dict.get("start_date"):
780
+ experiment_dict["start_date"] = datetime.fromisoformat(experiment_dict["start_date"])
781
+ if experiment_dict.get("end_date"):
782
+ experiment_dict["end_date"] = datetime.fromisoformat(experiment_dict["end_date"])
734
783
 
735
784
  # Convert status string back to enum
736
- experiment_dict['status'] = ExperimentStatus(experiment_dict['status'])
785
+ experiment_dict["status"] = ExperimentStatus(experiment_dict["status"])
737
786
 
738
787
  # Convert variants
739
788
  variants = []
740
- for variant_dict in experiment_dict['variants']:
741
- variant_dict['type'] = VariantType(variant_dict['type'])
789
+ for variant_dict in experiment_dict["variants"]:
790
+ variant_dict["type"] = VariantType(variant_dict["type"])
742
791
  variants.append(Variant(**variant_dict))
743
- experiment_dict['variants'] = variants
792
+ experiment_dict["variants"] = variants
744
793
 
745
794
  # Convert metrics
746
795
  metrics = []
747
- for metric_dict in experiment_dict['metrics']:
796
+ for metric_dict in experiment_dict["metrics"]:
748
797
  metrics.append(Metric(**metric_dict))
749
- experiment_dict['metrics'] = metrics
798
+ experiment_dict["metrics"] = metrics
750
799
 
751
800
  return ExperimentConfig(**experiment_dict)
752
801
 
@@ -784,15 +833,15 @@ if __name__ == "__main__":
784
833
  name="Single Model",
785
834
  type=VariantType.CONTROL,
786
835
  traffic_percentage=50.0,
787
- model_config={"model_type": "single_mlp"}
836
+ model_config={"model_type": "single_mlp"},
788
837
  ),
789
838
  Variant(
790
839
  id="treatment",
791
840
  name="Ensemble Model",
792
841
  type=VariantType.TREATMENT,
793
842
  traffic_percentage=50.0,
794
- model_config={"model_type": "ensemble"}
795
- )
843
+ model_config={"model_type": "ensemble"},
844
+ ),
796
845
  ],
797
846
  metrics=[
798
847
  Metric(
@@ -800,23 +849,18 @@ if __name__ == "__main__":
800
849
  type="continuous",
801
850
  aggregation="mean",
802
851
  goal="increase",
803
- primary=True
852
+ primary=True,
804
853
  ),
805
854
  Metric(
806
855
  name="recommendation_click_rate",
807
856
  type="binary",
808
857
  aggregation="mean",
809
858
  goal="increase",
810
- primary=True
859
+ primary=True,
811
860
  ),
812
- Metric(
813
- name="portfolio_return",
814
- type="continuous",
815
- aggregation="mean",
816
- goal="increase"
817
- )
861
+ Metric(name="portfolio_return", type="continuous", aggregation="mean", goal="increase"),
818
862
  ],
819
- min_sample_size=1000
863
+ min_sample_size=1000,
820
864
  )
821
865
 
822
866
  # Create and start experiment
@@ -829,9 +873,15 @@ if __name__ == "__main__":
829
873
  variant = framework.assign_user(user_id, experiment_id)
830
874
 
831
875
  # Simulate metrics
832
- framework.record_metric(user_id, experiment_id, "prediction_accuracy", random.uniform(0.6, 0.9))
833
- framework.record_metric(user_id, experiment_id, "recommendation_click_rate", random.choice([0, 1]))
834
- framework.record_metric(user_id, experiment_id, "portfolio_return", random.uniform(-0.1, 0.15))
876
+ framework.record_metric(
877
+ user_id, experiment_id, "prediction_accuracy", random.uniform(0.6, 0.9)
878
+ )
879
+ framework.record_metric(
880
+ user_id, experiment_id, "recommendation_click_rate", random.choice([0, 1])
881
+ )
882
+ framework.record_metric(
883
+ user_id, experiment_id, "portfolio_return", random.uniform(-0.1, 0.15)
884
+ )
835
885
 
836
886
  # Analyze results
837
887
  results = framework.analyze_experiment(experiment_id)
@@ -842,4 +892,4 @@ if __name__ == "__main__":
842
892
  print(f"Winner: {results.winner_variant}")
843
893
  print(f"Recommendations: {results.recommendations}")
844
894
 
845
- logger.info("A/B testing framework demo completed")
895
+ logger.info("A/B testing framework demo completed")
@@ -1,17 +1,18 @@
1
1
  """Ensemble feature engineering and feature interaction systems"""
2
2
 
3
- import numpy as np
4
- import pandas as pd
5
- from datetime import datetime, timedelta
6
- from typing import Any, Dict, List, Optional, Tuple, Union, Callable
7
- from dataclasses import dataclass
8
3
  import logging
9
- from itertools import combinations
10
4
  import warnings
5
+ from dataclasses import dataclass
6
+ from datetime import datetime, timedelta
7
+ from itertools import combinations
8
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ from sklearn.cluster import KMeans
13
+ from sklearn.decomposition import PCA
11
14
  from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression
12
15
  from sklearn.preprocessing import PolynomialFeatures
13
- from sklearn.decomposition import PCA
14
- from sklearn.cluster import KMeans
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
18
 
@@ -1,12 +1,13 @@
1
1
  """Political influence features for stock recommendation models"""
2
2
 
3
- import numpy as np
4
- import pandas as pd
3
+ import logging
4
+ from collections import Counter, defaultdict
5
+ from dataclasses import dataclass
5
6
  from datetime import datetime, timedelta
6
7
  from typing import Any, Dict, List, Optional, Tuple, Union
7
- from dataclasses import dataclass
8
- import logging
9
- from collections import defaultdict, Counter
8
+
9
+ import numpy as np
10
+ import pandas as pd
10
11
 
11
12
  logger = logging.getLogger(__name__)
12
13