mcli-framework 7.10.1__py3-none-any.whl → 7.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (99) hide show
  1. mcli/lib/custom_commands.py +10 -0
  2. mcli/lib/optional_deps.py +240 -0
  3. mcli/workflow/git_commit/ai_service.py +13 -2
  4. mcli/workflow/notebook/converter.py +375 -0
  5. mcli/workflow/notebook/notebook_cmd.py +441 -0
  6. mcli/workflow/notebook/schema.py +402 -0
  7. mcli/workflow/notebook/validator.py +313 -0
  8. mcli/workflow/workflow.py +14 -0
  9. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +36 -2
  10. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +14 -94
  11. mcli/__init__.py +0 -160
  12. mcli/__main__.py +0 -14
  13. mcli/app/__init__.py +0 -23
  14. mcli/app/model/__init__.py +0 -0
  15. mcli/app/video/__init__.py +0 -5
  16. mcli/chat/__init__.py +0 -34
  17. mcli/lib/__init__.py +0 -0
  18. mcli/lib/api/__init__.py +0 -0
  19. mcli/lib/auth/__init__.py +0 -1
  20. mcli/lib/config/__init__.py +0 -1
  21. mcli/lib/erd/__init__.py +0 -25
  22. mcli/lib/files/__init__.py +0 -0
  23. mcli/lib/fs/__init__.py +0 -1
  24. mcli/lib/logger/__init__.py +0 -3
  25. mcli/lib/performance/__init__.py +0 -17
  26. mcli/lib/pickles/__init__.py +0 -1
  27. mcli/lib/secrets/__init__.py +0 -10
  28. mcli/lib/shell/__init__.py +0 -0
  29. mcli/lib/toml/__init__.py +0 -1
  30. mcli/lib/watcher/__init__.py +0 -0
  31. mcli/ml/__init__.py +0 -16
  32. mcli/ml/api/__init__.py +0 -30
  33. mcli/ml/api/routers/__init__.py +0 -27
  34. mcli/ml/auth/__init__.py +0 -41
  35. mcli/ml/backtesting/__init__.py +0 -33
  36. mcli/ml/cli/__init__.py +0 -5
  37. mcli/ml/config/__init__.py +0 -33
  38. mcli/ml/configs/__init__.py +0 -16
  39. mcli/ml/dashboard/__init__.py +0 -12
  40. mcli/ml/dashboard/components/__init__.py +0 -7
  41. mcli/ml/dashboard/pages/__init__.py +0 -6
  42. mcli/ml/data_ingestion/__init__.py +0 -29
  43. mcli/ml/database/__init__.py +0 -40
  44. mcli/ml/experimentation/__init__.py +0 -29
  45. mcli/ml/features/__init__.py +0 -39
  46. mcli/ml/features/political_features.py +0 -677
  47. mcli/ml/mlops/__init__.py +0 -19
  48. mcli/ml/models/__init__.py +0 -90
  49. mcli/ml/monitoring/__init__.py +0 -25
  50. mcli/ml/optimization/__init__.py +0 -27
  51. mcli/ml/predictions/__init__.py +0 -5
  52. mcli/ml/preprocessing/__init__.py +0 -24
  53. mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
  54. mcli/ml/scripts/__init__.py +0 -1
  55. mcli/ml/serving/__init__.py +0 -1
  56. mcli/ml/trading/__init__.py +0 -63
  57. mcli/ml/training/__init__.py +0 -7
  58. mcli/mygroup/__init__.py +0 -3
  59. mcli/public/__init__.py +0 -1
  60. mcli/public/commands/__init__.py +0 -2
  61. mcli/self/__init__.py +0 -3
  62. mcli/workflow/__init__.py +0 -0
  63. mcli/workflow/daemon/__init__.py +0 -15
  64. mcli/workflow/dashboard/__init__.py +0 -5
  65. mcli/workflow/docker/__init__.py +0 -0
  66. mcli/workflow/file/__init__.py +0 -0
  67. mcli/workflow/gcloud/__init__.py +0 -1
  68. mcli/workflow/git_commit/__init__.py +0 -0
  69. mcli/workflow/interview/__init__.py +0 -0
  70. mcli/workflow/politician_trading/__init__.py +0 -4
  71. mcli/workflow/politician_trading/config.py +0 -134
  72. mcli/workflow/politician_trading/connectivity.py +0 -492
  73. mcli/workflow/politician_trading/data_sources.py +0 -654
  74. mcli/workflow/politician_trading/database.py +0 -412
  75. mcli/workflow/politician_trading/demo.py +0 -249
  76. mcli/workflow/politician_trading/models.py +0 -327
  77. mcli/workflow/politician_trading/monitoring.py +0 -413
  78. mcli/workflow/politician_trading/scrapers.py +0 -1074
  79. mcli/workflow/politician_trading/scrapers_california.py +0 -434
  80. mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
  81. mcli/workflow/politician_trading/scrapers_eu.py +0 -376
  82. mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
  83. mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
  84. mcli/workflow/politician_trading/scrapers_uk.py +0 -378
  85. mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
  86. mcli/workflow/politician_trading/seed_database.py +0 -520
  87. mcli/workflow/politician_trading/supabase_functions.py +0 -354
  88. mcli/workflow/politician_trading/workflow.py +0 -879
  89. mcli/workflow/registry/__init__.py +0 -0
  90. mcli/workflow/repo/__init__.py +0 -0
  91. mcli/workflow/scheduler/__init__.py +0 -25
  92. mcli/workflow/search/__init__.py +0 -0
  93. mcli/workflow/sync/__init__.py +0 -5
  94. mcli/workflow/videos/__init__.py +0 -1
  95. mcli/workflow/wakatime/__init__.py +0 -80
  96. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
  97. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
  98. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
  99. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
@@ -1,570 +0,0 @@
1
- """Main preprocessor for politician trading data"""
2
-
3
- import logging
4
- from dataclasses import asdict, dataclass
5
- from datetime import datetime, timedelta
6
- from pathlib import Path
7
- from typing import Any, Dict, List, Optional, Tuple, Union
8
-
9
- import joblib
10
- import numpy as np
11
- import pandas as pd
12
-
13
- from .data_cleaners import CleaningStats, MissingValueHandler, OutlierDetector, TradingDataCleaner
14
- from .feature_extractors import (
15
- FeatureExtractionStats,
16
- MarketFeatureExtractor,
17
- PoliticianFeatureExtractor,
18
- SentimentFeatureExtractor,
19
- TemporalFeatureExtractor,
20
- )
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
-
25
- @dataclass
26
- class PreprocessingConfig:
27
- """Configuration for preprocessing pipeline"""
28
-
29
- # Data cleaning
30
- enable_data_cleaning: bool = True
31
- enable_outlier_detection: bool = True
32
- enable_missing_value_handling: bool = True
33
- outlier_action: str = "flag" # "flag", "remove", or "cap"
34
-
35
- # Feature extraction
36
- enable_politician_features: bool = True
37
- enable_market_features: bool = True
38
- enable_temporal_features: bool = True
39
- enable_sentiment_features: bool = True
40
-
41
- # Temporal settings
42
- lookback_periods: List[int] = None
43
- include_future_leakage: bool = False
44
-
45
- # Data splitting
46
- train_split_ratio: float = 0.7
47
- val_split_ratio: float = 0.15
48
- test_split_ratio: float = 0.15
49
- split_by_time: bool = True
50
-
51
- # Output settings
52
- save_preprocessing_artifacts: bool = True
53
- artifacts_dir: Optional[Path] = None
54
-
55
- def __post_init__(self):
56
- if self.lookback_periods is None:
57
- self.lookback_periods = [7, 30, 90, 365]
58
-
59
- if self.artifacts_dir is None:
60
- self.artifacts_dir = Path("./data/preprocessing_artifacts")
61
-
62
- # Validate split ratios
63
- total_ratio = self.train_split_ratio + self.val_split_ratio + self.test_split_ratio
64
- if abs(total_ratio - 1.0) > 0.001:
65
- raise ValueError(f"Split ratios must sum to 1.0, got {total_ratio}")
66
-
67
-
68
- @dataclass
69
- class PreprocessingResults:
70
- """Results from preprocessing pipeline"""
71
-
72
- # Processed data
73
- train_data: pd.DataFrame
74
- val_data: pd.DataFrame
75
- test_data: pd.DataFrame
76
-
77
- # Feature information
78
- feature_names: List[str]
79
- categorical_features: List[str]
80
- numerical_features: List[str]
81
- target_columns: List[str]
82
-
83
- # Statistics
84
- cleaning_stats: CleaningStats
85
- original_shape: Tuple[int, int]
86
- final_shape: Tuple[int, int]
87
- feature_count: int
88
-
89
- # Artifacts paths
90
- scaler_path: Optional[Path] = None
91
- encoder_path: Optional[Path] = None
92
- feature_metadata_path: Optional[Path] = None
93
-
94
-
95
- class PoliticianTradingPreprocessor:
96
- """Main preprocessor for politician trading data for ML models"""
97
-
98
- def __init__(self, config: Optional[PreprocessingConfig] = None):
99
- self.config = config or PreprocessingConfig()
100
-
101
- # Initialize components
102
- self.data_cleaner = TradingDataCleaner()
103
- self.outlier_detector = OutlierDetector()
104
- self.missing_value_handler = MissingValueHandler()
105
-
106
- self.politician_extractor = PoliticianFeatureExtractor()
107
- self.market_extractor = MarketFeatureExtractor()
108
- self.temporal_extractor = TemporalFeatureExtractor(
109
- config={"lookback_periods": self.config.lookback_periods}
110
- )
111
- self.sentiment_extractor = SentimentFeatureExtractor()
112
-
113
- # Preprocessing artifacts
114
- self.scaler = None
115
- self.categorical_encoder = None
116
- self.feature_metadata = {}
117
-
118
- # Create artifacts directory
119
- self.config.artifacts_dir.mkdir(parents=True, exist_ok=True)
120
-
121
- def preprocess(
122
- self, raw_data: Union[List[Dict[str, Any]], pd.DataFrame]
123
- ) -> PreprocessingResults:
124
- """Main preprocessing pipeline"""
125
- logger.info("Starting politician trading data preprocessing")
126
-
127
- # Convert to DataFrame if needed
128
- if isinstance(raw_data, list):
129
- df = pd.DataFrame(raw_data)
130
- else:
131
- df = raw_data.copy()
132
-
133
- original_shape = df.shape
134
- logger.info(f"Input data shape: {original_shape}")
135
-
136
- # Step 1: Data Cleaning
137
- if self.config.enable_data_cleaning:
138
- df, cleaning_stats = self._clean_data(df)
139
- logger.info(f"After cleaning: {df.shape}")
140
- else:
141
- cleaning_stats = CleaningStats(
142
- total_records=len(df),
143
- cleaned_records=len(df),
144
- removed_records=0,
145
- cleaning_operations={},
146
- outliers_detected=0,
147
- missing_values_filled=0,
148
- )
149
-
150
- # Step 2: Feature Extraction
151
- df = self._extract_features(df)
152
- logger.info(f"After feature extraction: {df.shape}")
153
-
154
- # Step 3: Handle outliers
155
- if self.config.enable_outlier_detection:
156
- df = self._handle_outliers(df)
157
- logger.info(f"After outlier handling: {df.shape}")
158
-
159
- # Step 4: Handle missing values
160
- if self.config.enable_missing_value_handling:
161
- df = self._handle_missing_values(df)
162
- logger.info(f"After missing value handling: {df.shape}")
163
-
164
- # Step 5: Feature engineering and encoding
165
- df = self._engineer_features(df)
166
- logger.info(f"After feature engineering: {df.shape}")
167
-
168
- # Step 6: Create target variables
169
- df = self._create_target_variables(df)
170
- logger.info(f"After target creation: {df.shape}")
171
-
172
- # Step 7: Split data
173
- train_data, val_data, test_data = self._split_data(df)
174
-
175
- # Step 8: Scale features
176
- train_data, val_data, test_data = self._scale_features(train_data, val_data, test_data)
177
-
178
- # Step 9: Save artifacts
179
- if self.config.save_preprocessing_artifacts:
180
- self._save_artifacts()
181
-
182
- # Prepare results
183
- feature_names = [col for col in df.columns if not col.startswith("target_")]
184
- categorical_features = self._identify_categorical_features(df)
185
- numerical_features = self._identify_numerical_features(df)
186
- target_columns = [col for col in df.columns if col.startswith("target_")]
187
-
188
- results = PreprocessingResults(
189
- train_data=train_data,
190
- val_data=val_data,
191
- test_data=test_data,
192
- feature_names=feature_names,
193
- categorical_features=categorical_features,
194
- numerical_features=numerical_features,
195
- target_columns=target_columns,
196
- cleaning_stats=cleaning_stats,
197
- original_shape=original_shape,
198
- final_shape=df.shape,
199
- feature_count=len(feature_names),
200
- scaler_path=self.config.artifacts_dir / "scaler.joblib",
201
- encoder_path=self.config.artifacts_dir / "encoder.joblib",
202
- feature_metadata_path=self.config.artifacts_dir / "feature_metadata.joblib",
203
- )
204
-
205
- logger.info(f"Preprocessing complete. Final shape: {df.shape}")
206
- logger.info(f"Features: {len(feature_names)}, Targets: {len(target_columns)}")
207
-
208
- return results
209
-
210
- def _clean_data(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, CleaningStats]:
211
- """Clean the raw data"""
212
- logger.info("Cleaning data")
213
-
214
- # Convert to list of records for cleaner
215
- records = df.to_dict("records")
216
- cleaned_records, cleaning_stats = self.data_cleaner.clean_trading_records(records)
217
-
218
- # Convert back to DataFrame
219
- cleaned_df = pd.DataFrame(cleaned_records)
220
-
221
- return cleaned_df, cleaning_stats
222
-
223
- def _extract_features(self, df: pd.DataFrame) -> pd.DataFrame:
224
- """Extract all features"""
225
- logger.info("Extracting features")
226
-
227
- if self.config.enable_politician_features:
228
- df = self.politician_extractor.extract_politician_features(df)
229
- logger.info("Politician features extracted")
230
-
231
- if self.config.enable_market_features:
232
- df = self.market_extractor.extract_market_features(df)
233
- logger.info("Market features extracted")
234
-
235
- if self.config.enable_temporal_features:
236
- df = self.temporal_extractor.extract_temporal_features(df)
237
- logger.info("Temporal features extracted")
238
-
239
- if self.config.enable_sentiment_features:
240
- df = self.sentiment_extractor.extract_sentiment_features(df)
241
- logger.info("Sentiment features extracted")
242
-
243
- return df
244
-
245
- def _handle_outliers(self, df: pd.DataFrame) -> pd.DataFrame:
246
- """Handle outliers in the data"""
247
- logger.info("Handling outliers")
248
-
249
- df_with_outliers, outlier_info = self.outlier_detector.detect_outliers(df)
250
-
251
- if self.config.outlier_action == "remove":
252
- df_clean = df_with_outliers[~df_with_outliers["is_outlier"]]
253
- logger.info(f"Removed {outlier_info['total_outliers']} outliers")
254
- elif self.config.outlier_action == "flag":
255
- df_clean = df_with_outliers
256
- logger.info(f"Flagged {outlier_info['total_outliers']} outliers")
257
- else: # cap
258
- df_clean = self._cap_outliers(df_with_outliers)
259
- logger.info(f"Capped {outlier_info['total_outliers']} outliers")
260
-
261
- return df_clean
262
-
263
- def _handle_missing_values(self, df: pd.DataFrame) -> pd.DataFrame:
264
- """Handle missing values"""
265
- logger.info("Handling missing values")
266
-
267
- df_clean, missing_info = self.missing_value_handler.handle_missing_values(df)
268
- logger.info(f"Handled missing values: {missing_info['final_missing_counts']}")
269
-
270
- return df_clean
271
-
272
- def _engineer_features(self, df: pd.DataFrame) -> pd.DataFrame:
273
- """Engineer additional features"""
274
- logger.info("Engineering features")
275
-
276
- # Transaction amount buckets
277
- if "transaction_amount_cleaned" in df.columns:
278
- df["amount_bucket"] = pd.cut(
279
- df["transaction_amount_cleaned"],
280
- bins=[0, 1000, 15000, 50000, 500000, float("inf")],
281
- labels=["micro", "small", "medium", "large", "mega"],
282
- )
283
-
284
- # Politician activity level
285
- if "total_transactions" in df.columns:
286
- df["politician_activity_level"] = pd.cut(
287
- df["total_transactions"],
288
- bins=[0, 5, 20, 50, float("inf")],
289
- labels=["low", "medium", "high", "very_high"],
290
- )
291
-
292
- # Market timing features
293
- if "transaction_date_dt" in df.columns:
294
- # Days since start of data
295
- min_date = df["transaction_date_dt"].min()
296
- df["days_since_start"] = (df["transaction_date_dt"] - min_date).dt.days
297
-
298
- # Market cycle approximation (simplified)
299
- df["market_cycle_phase"] = (df["days_since_start"] % 1460) / 1460 # 4-year cycle
300
-
301
- # Interaction features
302
- if all(col in df.columns for col in ["buy_ratio", "total_transactions"]):
303
- df["buy_volume_interaction"] = df["buy_ratio"] * df["total_transactions"]
304
-
305
- if all(
306
- col in df.columns
307
- for col in ["transaction_amount_cleaned", "politician_trading_frequency"]
308
- ):
309
- df["amount_frequency_interaction"] = (
310
- df["transaction_amount_cleaned"] * df["politician_trading_frequency"]
311
- )
312
-
313
- return df
314
-
315
- def _create_target_variables(self, df: pd.DataFrame) -> pd.DataFrame:
316
- """Create target variables for ML models"""
317
- logger.info("Creating target variables")
318
-
319
- # Sort by politician and date for future stock performance calculation
320
- df = df.sort_values(["politician_name_cleaned", "transaction_date_dt"])
321
-
322
- # Target 1: Stock performance after politician trade (simplified)
323
- # This would typically require external market data
324
- # For now, create synthetic targets based on transaction patterns
325
-
326
- # Target: Whether the trade was profitable (binary classification)
327
- # Assumption: Larger transactions from frequent traders are more likely profitable
328
- if all(
329
- col in df.columns
330
- for col in ["transaction_amount_cleaned", "politician_trading_frequency"]
331
- ):
332
- # Probability based on amount and frequency
333
- amount_score = np.log1p(df["transaction_amount_cleaned"]) / 10
334
- frequency_score = np.log1p(df["politician_trading_frequency"]) / 5
335
-
336
- profit_probability = (amount_score + frequency_score) / 2
337
- profit_probability = np.clip(profit_probability, 0.1, 0.9)
338
-
339
- # Binary target with some randomness
340
- np.random.seed(42) # For reproducibility
341
- df["target_profitable"] = np.random.binomial(1, profit_probability)
342
-
343
- # Target 2: Stock recommendation score (regression)
344
- # Based on politician patterns and market factors
345
- if "transaction_type_cleaned" in df.columns:
346
- base_score = 0.5 # Neutral
347
-
348
- # Adjust based on transaction type
349
- type_adjustment = (
350
- df["transaction_type_cleaned"]
351
- .map({"buy": 0.2, "sell": -0.2, "exchange": 0.0})
352
- .fillna(0)
353
- )
354
-
355
- # Adjust based on politician track record
356
- if "buy_ratio" in df.columns:
357
- track_record_adjustment = (df["buy_ratio"] - 0.5) * 0.3
358
-
359
- # Adjust based on timing
360
- if "is_end_of_quarter" in df.columns:
361
- timing_adjustment = df["is_end_of_quarter"].astype(int) * 0.1
362
-
363
- recommendation_score = (
364
- base_score + type_adjustment + track_record_adjustment + timing_adjustment
365
- )
366
- df["target_recommendation_score"] = np.clip(recommendation_score, 0, 1)
367
-
368
- # Target 3: Risk level (multi-class classification)
369
- if "transaction_volatility" in df.columns:
370
- risk_conditions = [
371
- (df["transaction_volatility"] <= 0.2),
372
- (df["transaction_volatility"] <= 0.5),
373
- (df["transaction_volatility"] <= 1.0),
374
- (df["transaction_volatility"] > 1.0),
375
- ]
376
- risk_choices = ["low", "medium", "high", "very_high"]
377
- df["target_risk_level"] = np.select(risk_conditions, risk_choices, default="medium")
378
-
379
- return df
380
-
381
- def _split_data(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
382
- """Split data into train/val/test sets"""
383
- logger.info("Splitting data")
384
-
385
- if self.config.split_by_time and "transaction_date_dt" in df.columns:
386
- # Time-based split
387
- df_sorted = df.sort_values("transaction_date_dt")
388
-
389
- train_size = int(len(df_sorted) * self.config.train_split_ratio)
390
- val_size = int(len(df_sorted) * self.config.val_split_ratio)
391
-
392
- train_data = df_sorted.iloc[:train_size]
393
- val_data = df_sorted.iloc[train_size : train_size + val_size]
394
- test_data = df_sorted.iloc[train_size + val_size :]
395
-
396
- else:
397
- # Random split
398
- df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)
399
-
400
- train_size = int(len(df_shuffled) * self.config.train_split_ratio)
401
- val_size = int(len(df_shuffled) * self.config.val_split_ratio)
402
-
403
- train_data = df_shuffled.iloc[:train_size]
404
- val_data = df_shuffled.iloc[train_size : train_size + val_size]
405
- test_data = df_shuffled.iloc[train_size + val_size :]
406
-
407
- logger.info(
408
- f"Split sizes - Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}"
409
- )
410
-
411
- return train_data, val_data, test_data
412
-
413
- def _scale_features(
414
- self, train_data: pd.DataFrame, val_data: pd.DataFrame, test_data: pd.DataFrame
415
- ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
416
- """Scale numerical features"""
417
- logger.info("Scaling features")
418
-
419
- from sklearn.preprocessing import LabelEncoder, StandardScaler
420
-
421
- numerical_features = self._identify_numerical_features(train_data)
422
- categorical_features = self._identify_categorical_features(train_data)
423
-
424
- # Fit scaler on training data
425
- self.scaler = StandardScaler()
426
- if numerical_features:
427
- train_scaled = train_data.copy()
428
- val_scaled = val_data.copy()
429
- test_scaled = test_data.copy()
430
-
431
- train_scaled[numerical_features] = self.scaler.fit_transform(
432
- train_data[numerical_features]
433
- )
434
- val_scaled[numerical_features] = self.scaler.transform(val_data[numerical_features])
435
- test_scaled[numerical_features] = self.scaler.transform(test_data[numerical_features])
436
- else:
437
- train_scaled, val_scaled, test_scaled = train_data, val_data, test_data
438
-
439
- # Encode categorical features
440
- self.categorical_encoder = {}
441
- if categorical_features:
442
- for feature in categorical_features:
443
- encoder = LabelEncoder()
444
- # Fit on combined data to handle unseen categories
445
- all_values = pd.concat(
446
- [train_scaled[feature], val_scaled[feature], test_scaled[feature]]
447
- ).astype(str)
448
-
449
- encoder.fit(all_values)
450
- self.categorical_encoder[feature] = encoder
451
-
452
- train_scaled[feature] = encoder.transform(train_scaled[feature].astype(str))
453
- val_scaled[feature] = encoder.transform(val_scaled[feature].astype(str))
454
- test_scaled[feature] = encoder.transform(test_scaled[feature].astype(str))
455
-
456
- return train_scaled, val_scaled, test_scaled
457
-
458
- def _identify_numerical_features(self, df: pd.DataFrame) -> List[str]:
459
- """Identify numerical features"""
460
- numerical_features = []
461
- for col in df.columns:
462
- if (
463
- df[col].dtype in ["int64", "float64"]
464
- and not col.startswith("target_")
465
- and not col.endswith("_cleaned")
466
- and col not in ["is_outlier"]
467
- ):
468
- numerical_features.append(col)
469
- return numerical_features
470
-
471
- def _identify_categorical_features(self, df: pd.DataFrame) -> List[str]:
472
- """Identify categorical features"""
473
- categorical_features = []
474
- for col in df.columns:
475
- if (
476
- df[col].dtype == "object"
477
- or df[col].dtype.name == "category"
478
- and not col.startswith("target_")
479
- ):
480
- categorical_features.append(col)
481
- return categorical_features
482
-
483
- def _cap_outliers(self, df: pd.DataFrame) -> pd.DataFrame:
484
- """Cap outliers to percentile values"""
485
- df_capped = df.copy()
486
- numerical_cols = self._identify_numerical_features(df)
487
-
488
- for col in numerical_cols:
489
- if col in df_capped.columns:
490
- q1 = df_capped[col].quantile(0.01)
491
- q99 = df_capped[col].quantile(0.99)
492
- df_capped[col] = np.clip(df_capped[col], q1, q99)
493
-
494
- return df_capped
495
-
496
- def _save_artifacts(self):
497
- """Save preprocessing artifacts"""
498
- logger.info("Saving preprocessing artifacts")
499
-
500
- if self.scaler:
501
- joblib.dump(self.scaler, self.config.artifacts_dir / "scaler.joblib")
502
-
503
- if self.categorical_encoder:
504
- joblib.dump(self.categorical_encoder, self.config.artifacts_dir / "encoder.joblib")
505
-
506
- # Save feature metadata
507
- self.feature_metadata = {
508
- "config": asdict(self.config),
509
- "preprocessing_timestamp": datetime.now().isoformat(),
510
- }
511
- joblib.dump(self.feature_metadata, self.config.artifacts_dir / "feature_metadata.joblib")
512
-
513
- def load_artifacts(self, artifacts_dir: Path):
514
- """Load preprocessing artifacts"""
515
- logger.info(f"Loading preprocessing artifacts from {artifacts_dir}")
516
-
517
- scaler_path = artifacts_dir / "scaler.joblib"
518
- if scaler_path.exists():
519
- self.scaler = joblib.load(scaler_path)
520
-
521
- encoder_path = artifacts_dir / "encoder.joblib"
522
- if encoder_path.exists():
523
- self.categorical_encoder = joblib.load(encoder_path)
524
-
525
- metadata_path = artifacts_dir / "feature_metadata.joblib"
526
- if metadata_path.exists():
527
- self.feature_metadata = joblib.load(metadata_path)
528
-
529
- def transform_new_data(
530
- self, new_data: Union[List[Dict[str, Any]], pd.DataFrame]
531
- ) -> pd.DataFrame:
532
- """Transform new data using fitted preprocessors"""
533
- logger.info("Transforming new data with fitted preprocessors")
534
-
535
- if self.scaler is None and self.categorical_encoder is None:
536
- raise ValueError("No preprocessing artifacts loaded. Call load_artifacts() first.")
537
-
538
- # Convert to DataFrame if needed
539
- if isinstance(new_data, list):
540
- df = pd.DataFrame(new_data)
541
- else:
542
- df = new_data.copy()
543
-
544
- # Apply same preprocessing steps (without fitting)
545
- if self.config.enable_data_cleaning:
546
- records = df.to_dict("records")
547
- cleaned_records, _ = self.data_cleaner.clean_trading_records(records)
548
- df = pd.DataFrame(cleaned_records)
549
-
550
- # Extract features
551
- df = self._extract_features(df)
552
-
553
- # Engineer features
554
- df = self._engineer_features(df)
555
-
556
- # Apply scaling and encoding
557
- numerical_features = self._identify_numerical_features(df)
558
- categorical_features = self._identify_categorical_features(df)
559
-
560
- if self.scaler and numerical_features:
561
- df[numerical_features] = self.scaler.transform(df[numerical_features])
562
-
563
- if self.categorical_encoder and categorical_features:
564
- for feature in categorical_features:
565
- if feature in self.categorical_encoder:
566
- df[feature] = self.categorical_encoder[feature].transform(
567
- df[feature].astype(str)
568
- )
569
-
570
- return df
@@ -1 +0,0 @@
1
- """ML scripts module."""
@@ -1 +0,0 @@
1
- """Model serving module for MCLI ML system."""
@@ -1,63 +0,0 @@
1
- """Trading module for portfolio management and trade execution"""
2
-
3
- from mcli.ml.trading.alpaca_client import (
4
- AlpacaTradingClient,
5
- create_trading_client,
6
- get_alpaca_config_from_env,
7
- )
8
- from mcli.ml.trading.models import ( # Enums; Database models; Pydantic models
9
- OrderCreate,
10
- OrderResponse,
11
- OrderSide,
12
- OrderStatus,
13
- OrderType,
14
- Portfolio,
15
- PortfolioCreate,
16
- PortfolioPerformanceSnapshot,
17
- PortfolioResponse,
18
- PortfolioType,
19
- Position,
20
- PositionResponse,
21
- PositionSide,
22
- RiskLevel,
23
- TradingAccount,
24
- TradingAccountCreate,
25
- TradingOrder,
26
- TradingSignal,
27
- TradingSignalResponse,
28
- )
29
- from mcli.ml.trading.paper_trading import PaperTradingEngine
30
- from mcli.ml.trading.risk_management import RiskManager
31
- from mcli.ml.trading.trading_service import TradingService
32
-
33
- __all__ = [
34
- # Enums
35
- "OrderStatus",
36
- "OrderType",
37
- "OrderSide",
38
- "PositionSide",
39
- "PortfolioType",
40
- "RiskLevel",
41
- # Database models
42
- "TradingAccount",
43
- "Portfolio",
44
- "Position",
45
- "TradingOrder",
46
- "PortfolioPerformanceSnapshot",
47
- "TradingSignal",
48
- # Pydantic models
49
- "TradingAccountCreate",
50
- "PortfolioCreate",
51
- "OrderCreate",
52
- "PositionResponse",
53
- "OrderResponse",
54
- "PortfolioResponse",
55
- "TradingSignalResponse",
56
- # Services
57
- "TradingService",
58
- "AlpacaTradingClient",
59
- "create_trading_client",
60
- "get_alpaca_config_from_env",
61
- "RiskManager",
62
- "PaperTradingEngine",
63
- ]
@@ -1,7 +0,0 @@
1
- """ML model training module"""
2
-
3
- from .train_model import PoliticianTradingNet, fetch_training_data
4
- from .train_model import main as train_model
5
- from .train_model import prepare_dataset
6
-
7
- __all__ = ["PoliticianTradingNet", "train_model", "fetch_training_data", "prepare_dataset"]
mcli/mygroup/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- """
2
- Mygroup commands for mcli.
3
- """
mcli/public/__init__.py DELETED
@@ -1 +0,0 @@
1
- # logger.info("I am in mcli.public.__init__.py")
@@ -1,2 +0,0 @@
1
- # User-generated commands directory
2
- # Commands created through the MCLI chat interface are stored here
mcli/self/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- """
2
- Self-management module for mcli.
3
- """
mcli/workflow/__init__.py DELETED
File without changes
@@ -1,15 +0,0 @@
1
- """
2
- Daemon service for command management and execution.
3
-
4
- This module provides a background daemon service that can store, manage, and execute
5
- commands written in various programming languages (Python, Node.js, Lua, Shell).
6
- Commands are stored in a SQLite database with embeddings for similarity search and
7
- hierarchical grouping.
8
-
9
- The daemon CLI commands are now loaded from portable JSON files in ~/.mcli/commands/
10
- """
11
-
12
- from .daemon import Command, CommandExecutor, DaemonService
13
-
14
- # Export main components
15
- __all__ = ["Command", "CommandExecutor", "DaemonService"]
@@ -1,5 +0,0 @@
1
- """Dashboard workflow module."""
2
-
3
- from .dashboard_cmd import dashboard
4
-
5
- __all__ = ["dashboard"]
File without changes