mcli-framework 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (186) hide show
  1. mcli/app/chat_cmd.py +42 -0
  2. mcli/app/commands_cmd.py +226 -0
  3. mcli/app/completion_cmd.py +216 -0
  4. mcli/app/completion_helpers.py +288 -0
  5. mcli/app/cron_test_cmd.py +697 -0
  6. mcli/app/logs_cmd.py +419 -0
  7. mcli/app/main.py +492 -0
  8. mcli/app/model/model.py +1060 -0
  9. mcli/app/model_cmd.py +227 -0
  10. mcli/app/redis_cmd.py +269 -0
  11. mcli/app/video/video.py +1114 -0
  12. mcli/app/visual_cmd.py +303 -0
  13. mcli/chat/chat.py +2409 -0
  14. mcli/chat/command_rag.py +514 -0
  15. mcli/chat/enhanced_chat.py +652 -0
  16. mcli/chat/system_controller.py +1010 -0
  17. mcli/chat/system_integration.py +1016 -0
  18. mcli/cli.py +25 -0
  19. mcli/config.toml +20 -0
  20. mcli/lib/api/api.py +586 -0
  21. mcli/lib/api/daemon_client.py +203 -0
  22. mcli/lib/api/daemon_client_local.py +44 -0
  23. mcli/lib/api/daemon_decorator.py +217 -0
  24. mcli/lib/api/mcli_decorators.py +1032 -0
  25. mcli/lib/auth/auth.py +85 -0
  26. mcli/lib/auth/aws_manager.py +85 -0
  27. mcli/lib/auth/azure_manager.py +91 -0
  28. mcli/lib/auth/credential_manager.py +192 -0
  29. mcli/lib/auth/gcp_manager.py +93 -0
  30. mcli/lib/auth/key_manager.py +117 -0
  31. mcli/lib/auth/mcli_manager.py +93 -0
  32. mcli/lib/auth/token_manager.py +75 -0
  33. mcli/lib/auth/token_util.py +1011 -0
  34. mcli/lib/config/config.py +47 -0
  35. mcli/lib/discovery/__init__.py +1 -0
  36. mcli/lib/discovery/command_discovery.py +274 -0
  37. mcli/lib/erd/erd.py +1345 -0
  38. mcli/lib/erd/generate_graph.py +453 -0
  39. mcli/lib/files/files.py +76 -0
  40. mcli/lib/fs/fs.py +109 -0
  41. mcli/lib/lib.py +29 -0
  42. mcli/lib/logger/logger.py +611 -0
  43. mcli/lib/performance/optimizer.py +409 -0
  44. mcli/lib/performance/rust_bridge.py +502 -0
  45. mcli/lib/performance/uvloop_config.py +154 -0
  46. mcli/lib/pickles/pickles.py +50 -0
  47. mcli/lib/search/cached_vectorizer.py +479 -0
  48. mcli/lib/services/data_pipeline.py +460 -0
  49. mcli/lib/services/lsh_client.py +441 -0
  50. mcli/lib/services/redis_service.py +387 -0
  51. mcli/lib/shell/shell.py +137 -0
  52. mcli/lib/toml/toml.py +33 -0
  53. mcli/lib/ui/styling.py +47 -0
  54. mcli/lib/ui/visual_effects.py +634 -0
  55. mcli/lib/watcher/watcher.py +185 -0
  56. mcli/ml/api/app.py +215 -0
  57. mcli/ml/api/middleware.py +224 -0
  58. mcli/ml/api/routers/admin_router.py +12 -0
  59. mcli/ml/api/routers/auth_router.py +244 -0
  60. mcli/ml/api/routers/backtest_router.py +12 -0
  61. mcli/ml/api/routers/data_router.py +12 -0
  62. mcli/ml/api/routers/model_router.py +302 -0
  63. mcli/ml/api/routers/monitoring_router.py +12 -0
  64. mcli/ml/api/routers/portfolio_router.py +12 -0
  65. mcli/ml/api/routers/prediction_router.py +267 -0
  66. mcli/ml/api/routers/trade_router.py +12 -0
  67. mcli/ml/api/routers/websocket_router.py +76 -0
  68. mcli/ml/api/schemas.py +64 -0
  69. mcli/ml/auth/auth_manager.py +425 -0
  70. mcli/ml/auth/models.py +154 -0
  71. mcli/ml/auth/permissions.py +302 -0
  72. mcli/ml/backtesting/backtest_engine.py +502 -0
  73. mcli/ml/backtesting/performance_metrics.py +393 -0
  74. mcli/ml/cache.py +400 -0
  75. mcli/ml/cli/main.py +398 -0
  76. mcli/ml/config/settings.py +394 -0
  77. mcli/ml/configs/dvc_config.py +230 -0
  78. mcli/ml/configs/mlflow_config.py +131 -0
  79. mcli/ml/configs/mlops_manager.py +293 -0
  80. mcli/ml/dashboard/app.py +532 -0
  81. mcli/ml/dashboard/app_integrated.py +738 -0
  82. mcli/ml/dashboard/app_supabase.py +560 -0
  83. mcli/ml/dashboard/app_training.py +615 -0
  84. mcli/ml/dashboard/cli.py +51 -0
  85. mcli/ml/data_ingestion/api_connectors.py +501 -0
  86. mcli/ml/data_ingestion/data_pipeline.py +567 -0
  87. mcli/ml/data_ingestion/stream_processor.py +512 -0
  88. mcli/ml/database/migrations/env.py +94 -0
  89. mcli/ml/database/models.py +667 -0
  90. mcli/ml/database/session.py +200 -0
  91. mcli/ml/experimentation/ab_testing.py +845 -0
  92. mcli/ml/features/ensemble_features.py +607 -0
  93. mcli/ml/features/political_features.py +676 -0
  94. mcli/ml/features/recommendation_engine.py +809 -0
  95. mcli/ml/features/stock_features.py +573 -0
  96. mcli/ml/features/test_feature_engineering.py +346 -0
  97. mcli/ml/logging.py +85 -0
  98. mcli/ml/mlops/data_versioning.py +518 -0
  99. mcli/ml/mlops/experiment_tracker.py +377 -0
  100. mcli/ml/mlops/model_serving.py +481 -0
  101. mcli/ml/mlops/pipeline_orchestrator.py +614 -0
  102. mcli/ml/models/base_models.py +324 -0
  103. mcli/ml/models/ensemble_models.py +675 -0
  104. mcli/ml/models/recommendation_models.py +474 -0
  105. mcli/ml/models/test_models.py +487 -0
  106. mcli/ml/monitoring/drift_detection.py +676 -0
  107. mcli/ml/monitoring/metrics.py +45 -0
  108. mcli/ml/optimization/portfolio_optimizer.py +834 -0
  109. mcli/ml/preprocessing/data_cleaners.py +451 -0
  110. mcli/ml/preprocessing/feature_extractors.py +491 -0
  111. mcli/ml/preprocessing/ml_pipeline.py +382 -0
  112. mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
  113. mcli/ml/preprocessing/test_preprocessing.py +294 -0
  114. mcli/ml/scripts/populate_sample_data.py +200 -0
  115. mcli/ml/tasks.py +400 -0
  116. mcli/ml/tests/test_integration.py +429 -0
  117. mcli/ml/tests/test_training_dashboard.py +387 -0
  118. mcli/public/oi/oi.py +15 -0
  119. mcli/public/public.py +4 -0
  120. mcli/self/self_cmd.py +1246 -0
  121. mcli/workflow/daemon/api_daemon.py +800 -0
  122. mcli/workflow/daemon/async_command_database.py +681 -0
  123. mcli/workflow/daemon/async_process_manager.py +591 -0
  124. mcli/workflow/daemon/client.py +530 -0
  125. mcli/workflow/daemon/commands.py +1196 -0
  126. mcli/workflow/daemon/daemon.py +905 -0
  127. mcli/workflow/daemon/daemon_api.py +59 -0
  128. mcli/workflow/daemon/enhanced_daemon.py +571 -0
  129. mcli/workflow/daemon/process_cli.py +244 -0
  130. mcli/workflow/daemon/process_manager.py +439 -0
  131. mcli/workflow/daemon/test_daemon.py +275 -0
  132. mcli/workflow/dashboard/dashboard_cmd.py +113 -0
  133. mcli/workflow/docker/docker.py +0 -0
  134. mcli/workflow/file/file.py +100 -0
  135. mcli/workflow/gcloud/config.toml +21 -0
  136. mcli/workflow/gcloud/gcloud.py +58 -0
  137. mcli/workflow/git_commit/ai_service.py +328 -0
  138. mcli/workflow/git_commit/commands.py +430 -0
  139. mcli/workflow/lsh_integration.py +355 -0
  140. mcli/workflow/model_service/client.py +594 -0
  141. mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
  142. mcli/workflow/model_service/lightweight_embedder.py +397 -0
  143. mcli/workflow/model_service/lightweight_model_server.py +714 -0
  144. mcli/workflow/model_service/lightweight_test.py +241 -0
  145. mcli/workflow/model_service/model_service.py +1955 -0
  146. mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
  147. mcli/workflow/model_service/pdf_processor.py +386 -0
  148. mcli/workflow/model_service/test_efficient_runner.py +234 -0
  149. mcli/workflow/model_service/test_example.py +315 -0
  150. mcli/workflow/model_service/test_integration.py +131 -0
  151. mcli/workflow/model_service/test_new_features.py +149 -0
  152. mcli/workflow/openai/openai.py +99 -0
  153. mcli/workflow/politician_trading/commands.py +1790 -0
  154. mcli/workflow/politician_trading/config.py +134 -0
  155. mcli/workflow/politician_trading/connectivity.py +490 -0
  156. mcli/workflow/politician_trading/data_sources.py +395 -0
  157. mcli/workflow/politician_trading/database.py +410 -0
  158. mcli/workflow/politician_trading/demo.py +248 -0
  159. mcli/workflow/politician_trading/models.py +165 -0
  160. mcli/workflow/politician_trading/monitoring.py +413 -0
  161. mcli/workflow/politician_trading/scrapers.py +966 -0
  162. mcli/workflow/politician_trading/scrapers_california.py +412 -0
  163. mcli/workflow/politician_trading/scrapers_eu.py +377 -0
  164. mcli/workflow/politician_trading/scrapers_uk.py +350 -0
  165. mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
  166. mcli/workflow/politician_trading/supabase_functions.py +354 -0
  167. mcli/workflow/politician_trading/workflow.py +852 -0
  168. mcli/workflow/registry/registry.py +180 -0
  169. mcli/workflow/repo/repo.py +223 -0
  170. mcli/workflow/scheduler/commands.py +493 -0
  171. mcli/workflow/scheduler/cron_parser.py +238 -0
  172. mcli/workflow/scheduler/job.py +182 -0
  173. mcli/workflow/scheduler/monitor.py +139 -0
  174. mcli/workflow/scheduler/persistence.py +324 -0
  175. mcli/workflow/scheduler/scheduler.py +679 -0
  176. mcli/workflow/sync/sync_cmd.py +437 -0
  177. mcli/workflow/sync/test_cmd.py +314 -0
  178. mcli/workflow/videos/videos.py +242 -0
  179. mcli/workflow/wakatime/wakatime.py +11 -0
  180. mcli/workflow/workflow.py +37 -0
  181. mcli_framework-7.0.0.dist-info/METADATA +479 -0
  182. mcli_framework-7.0.0.dist-info/RECORD +186 -0
  183. mcli_framework-7.0.0.dist-info/WHEEL +5 -0
  184. mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
  185. mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
  186. mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,809 @@
1
+ """Stock recommendation engine that combines all feature engineering components"""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from datetime import datetime, timedelta
6
+ from typing import Any, Dict, List, Optional, Tuple, Union
7
+ from dataclasses import dataclass, asdict
8
+ import logging
9
+ from pathlib import Path
10
+ import joblib
11
+
12
+ from .stock_features import (
13
+ StockRecommendationFeatures,
14
+ TechnicalIndicatorFeatures,
15
+ MarketRegimeFeatures,
16
+ )
17
+ from .political_features import (
18
+ PoliticalInfluenceFeatures,
19
+ CongressionalTrackingFeatures,
20
+ PolicyImpactFeatures,
21
+ )
22
+ from .ensemble_features import (
23
+ EnsembleFeatureBuilder,
24
+ FeatureInteractionEngine,
25
+ DynamicFeatureSelector,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ @dataclass
32
+ class RecommendationConfig:
33
+ """Configuration for stock recommendation engine"""
34
+
35
+ # Feature engineering components
36
+ enable_technical_features: bool = True
37
+ enable_political_features: bool = True
38
+ enable_ensemble_features: bool = True
39
+ enable_interaction_features: bool = True
40
+
41
+ # Recommendation scoring
42
+ recommendation_weights: Dict[str, float] = None
43
+ risk_adjustment_factor: float = 0.1
44
+ confidence_threshold: float = 0.6
45
+
46
+ # Time horizons for recommendations
47
+ short_term_days: int = 7
48
+ medium_term_days: int = 30
49
+ long_term_days: int = 90
50
+
51
+ # Feature selection
52
+ max_features: int = 200
53
+ feature_selection_methods: List[str] = None
54
+
55
+ # Output settings
56
+ output_format: str = "detailed" # "simple", "detailed", "full"
57
+ save_feature_importance: bool = True
58
+
59
+ def __post_init__(self):
60
+ if self.recommendation_weights is None:
61
+ self.recommendation_weights = {
62
+ "technical_score": 0.3,
63
+ "political_influence_score": 0.25,
64
+ "market_regime_score": 0.2,
65
+ "ensemble_score": 0.15,
66
+ "risk_adjustment": 0.1,
67
+ }
68
+
69
+ if self.feature_selection_methods is None:
70
+ self.feature_selection_methods = ["correlation", "mutual_info", "variance"]
71
+
72
+
73
+ @dataclass
74
+ class RecommendationResult:
75
+ """Result from stock recommendation engine"""
76
+
77
+ # Basic information
78
+ ticker: str
79
+ company_name: str
80
+ recommendation_score: float
81
+ confidence: float
82
+ risk_level: str
83
+
84
+ # Detailed scores
85
+ technical_score: float
86
+ political_influence_score: float
87
+ market_regime_score: float
88
+ ensemble_score: float
89
+
90
+ # Time horizon predictions
91
+ short_term_outlook: str
92
+ medium_term_outlook: str
93
+ long_term_outlook: str
94
+
95
+ # Supporting information
96
+ key_features: List[str]
97
+ feature_importance: Dict[str, float]
98
+ recommendation_reason: str
99
+ warnings: List[str]
100
+
101
+ # Metadata
102
+ generated_at: datetime
103
+ model_version: str
104
+
105
+
106
+ class StockRecommendationEngine:
107
+ """Comprehensive stock recommendation engine"""
108
+
109
+ def __init__(self, config: Optional[RecommendationConfig] = None):
110
+ self.config = config or RecommendationConfig()
111
+
112
+ # Initialize feature engineering components
113
+ self.stock_features = StockRecommendationFeatures()
114
+ self.technical_features = TechnicalIndicatorFeatures()
115
+ self.market_regime_features = MarketRegimeFeatures()
116
+ self.political_features = PoliticalInfluenceFeatures()
117
+ self.congressional_features = CongressionalTrackingFeatures()
118
+ self.policy_features = PolicyImpactFeatures()
119
+ self.ensemble_builder = EnsembleFeatureBuilder()
120
+ self.interaction_engine = FeatureInteractionEngine()
121
+ self.feature_selector = DynamicFeatureSelector()
122
+
123
+ # Cache for feature importance and model artifacts
124
+ self.feature_importance_cache = {}
125
+ self.model_artifacts = {}
126
+
127
+ def generate_recommendation(
128
+ self,
129
+ trading_data: pd.DataFrame,
130
+ stock_price_data: Optional[pd.DataFrame] = None,
131
+ politician_metadata: Optional[pd.DataFrame] = None,
132
+ market_data: Optional[Dict[str, pd.DataFrame]] = None,
133
+ ) -> List[RecommendationResult]:
134
+ """Generate stock recommendations based on politician trading data"""
135
+
136
+ logger.info("Starting stock recommendation generation")
137
+
138
+ # Extract comprehensive features
139
+ features_df = self._extract_all_features(
140
+ trading_data, stock_price_data, politician_metadata, market_data
141
+ )
142
+
143
+ # Generate recommendations for each stock
144
+ recommendations = []
145
+ stocks = features_df["ticker_cleaned"].dropna().unique()
146
+
147
+ logger.info(f"Generating recommendations for {len(stocks)} stocks")
148
+
149
+ for ticker in stocks:
150
+ try:
151
+ stock_data = features_df[features_df["ticker_cleaned"] == ticker].copy()
152
+ if len(stock_data) == 0:
153
+ continue
154
+
155
+ recommendation = self._generate_stock_recommendation(stock_data, ticker)
156
+ if recommendation:
157
+ recommendations.append(recommendation)
158
+
159
+ except Exception as e:
160
+ logger.error(f"Failed to generate recommendation for {ticker}: {e}")
161
+
162
+ logger.info(f"Generated {len(recommendations)} recommendations")
163
+ return recommendations
164
+
165
+ def _extract_all_features(
166
+ self,
167
+ trading_data: pd.DataFrame,
168
+ stock_price_data: Optional[pd.DataFrame],
169
+ politician_metadata: Optional[pd.DataFrame],
170
+ market_data: Optional[Dict[str, pd.DataFrame]],
171
+ ) -> pd.DataFrame:
172
+ """Extract all features for recommendation generation"""
173
+
174
+ logger.info("Extracting comprehensive feature set")
175
+ df = trading_data.copy()
176
+
177
+ # Technical features (if stock price data available)
178
+ if self.config.enable_technical_features and stock_price_data is not None:
179
+ df = self._add_technical_features(df, stock_price_data)
180
+
181
+ # Political influence features
182
+ if self.config.enable_political_features:
183
+ df = self._add_political_features(df, politician_metadata)
184
+
185
+ # Market regime features
186
+ if stock_price_data is not None:
187
+ df = self._add_market_regime_features(df, stock_price_data, market_data)
188
+
189
+ # Ensemble features
190
+ if self.config.enable_ensemble_features:
191
+ df = self._add_ensemble_features(df)
192
+
193
+ # Feature interactions
194
+ if self.config.enable_interaction_features:
195
+ df = self._add_interaction_features(df)
196
+
197
+ # Feature selection
198
+ df = self._perform_feature_selection(df)
199
+
200
+ logger.info(f"Final feature set: {len(df.columns)} features")
201
+ return df
202
+
203
+ def _add_technical_features(
204
+ self, df: pd.DataFrame, stock_price_data: pd.DataFrame
205
+ ) -> pd.DataFrame:
206
+ """Add technical analysis features"""
207
+ logger.info("Adding technical features")
208
+
209
+ # Merge stock price data
210
+ if "ticker_cleaned" in df.columns and "symbol" in stock_price_data.columns:
211
+ # Group by ticker and add technical features
212
+ enhanced_df = []
213
+
214
+ for ticker in df["ticker_cleaned"].dropna().unique():
215
+ ticker_trading_data = df[df["ticker_cleaned"] == ticker].copy()
216
+ ticker_price_data = stock_price_data[stock_price_data["symbol"] == ticker].copy()
217
+
218
+ if len(ticker_price_data) > 0:
219
+ # Extract technical features
220
+ price_features = self.stock_features.extract_features(ticker_price_data)
221
+ technical_features = self.technical_features.extract_advanced_indicators(
222
+ price_features
223
+ )
224
+
225
+ # Merge with trading data based on date
226
+ if (
227
+ "transaction_date_dt" in ticker_trading_data.columns
228
+ and "date" in technical_features.columns
229
+ ):
230
+ merged = pd.merge_asof(
231
+ ticker_trading_data.sort_values("transaction_date_dt"),
232
+ technical_features.sort_values("date"),
233
+ left_on="transaction_date_dt",
234
+ right_on="date",
235
+ direction="backward",
236
+ )
237
+ enhanced_df.append(merged)
238
+ else:
239
+ # Use latest technical features for all trades
240
+ latest_features = technical_features.iloc[-1:]
241
+ for col in technical_features.columns:
242
+ if col not in ["date", "symbol"]:
243
+ ticker_trading_data[col] = latest_features[col].iloc[0]
244
+ enhanced_df.append(ticker_trading_data)
245
+ else:
246
+ enhanced_df.append(ticker_trading_data)
247
+
248
+ if enhanced_df:
249
+ df = pd.concat(enhanced_df, ignore_index=True)
250
+
251
+ return df
252
+
253
+ def _add_political_features(
254
+ self, df: pd.DataFrame, politician_metadata: Optional[pd.DataFrame]
255
+ ) -> pd.DataFrame:
256
+ """Add political influence features"""
257
+ logger.info("Adding political features")
258
+
259
+ # Political influence features
260
+ df = self.political_features.extract_influence_features(df, politician_metadata)
261
+
262
+ # Congressional tracking features
263
+ df = self.congressional_features.extract_disclosure_features(df)
264
+ df = self.congressional_features.extract_reporting_patterns(df)
265
+
266
+ # Policy impact features
267
+ df = self.policy_features.extract_policy_timing_features(df)
268
+
269
+ return df
270
+
271
+ def _add_market_regime_features(
272
+ self,
273
+ df: pd.DataFrame,
274
+ stock_price_data: pd.DataFrame,
275
+ market_data: Optional[Dict[str, pd.DataFrame]],
276
+ ) -> pd.DataFrame:
277
+ """Add market regime features"""
278
+ logger.info("Adding market regime features")
279
+
280
+ # Add market regime features from stock price data
281
+ regime_features = self.market_regime_features.extract_regime_features(stock_price_data)
282
+
283
+ # Merge regime features
284
+ if "ticker_cleaned" in df.columns and "symbol" in regime_features.columns:
285
+ df = pd.merge(
286
+ df,
287
+ regime_features[["symbol", "vol_regime", "trend_regime", "volume_regime"]],
288
+ left_on="ticker_cleaned",
289
+ right_on="symbol",
290
+ how="left",
291
+ )
292
+
293
+ return df
294
+
295
+ def _add_ensemble_features(self, df: pd.DataFrame) -> pd.DataFrame:
296
+ """Add ensemble features"""
297
+ logger.info("Adding ensemble features")
298
+
299
+ # Build ensemble features
300
+ df = self.ensemble_builder.build_ensemble_features(
301
+ df,
302
+ target_column=None, # No specific target for feature generation
303
+ include_interactions=False, # Will be added separately
304
+ include_clustering=True,
305
+ include_rolling=True,
306
+ )
307
+
308
+ return df
309
+
310
+ def _add_interaction_features(self, df: pd.DataFrame) -> pd.DataFrame:
311
+ """Add feature interactions"""
312
+ logger.info("Adding interaction features")
313
+
314
+ # Get important feature pairs (mock implementation)
315
+ numerical_features = [
316
+ col
317
+ for col in df.columns
318
+ if df[col].dtype in ["int64", "float64"] and not col.startswith("target_")
319
+ ]
320
+
321
+ if len(numerical_features) >= 2:
322
+ # Select top features for interactions (limit to avoid explosion)
323
+ top_features = numerical_features[:15]
324
+ feature_pairs = [
325
+ (top_features[i], top_features[j])
326
+ for i in range(len(top_features))
327
+ for j in range(i + 1, len(top_features))
328
+ ][:20]
329
+
330
+ df = self.interaction_engine.generate_advanced_interactions(df, feature_pairs)
331
+
332
+ return df
333
+
334
+ def _perform_feature_selection(self, df: pd.DataFrame) -> pd.DataFrame:
335
+ """Perform feature selection"""
336
+ logger.info("Performing feature selection")
337
+
338
+ # Create a synthetic target for feature selection if none exists
339
+ if "target_recommendation_score" not in df.columns:
340
+ # Create synthetic target based on transaction patterns
341
+ df["synthetic_target"] = (
342
+ np.log1p(df.get("transaction_amount_cleaned", 0)) * 0.3
343
+ + df.get("total_influence", 0.5) * 0.4
344
+ + np.random.random(len(df)) * 0.3
345
+ )
346
+ target_col = "synthetic_target"
347
+ else:
348
+ target_col = "target_recommendation_score"
349
+
350
+ # Apply feature selection if we have enough features
351
+ feature_cols = [
352
+ col for col in df.columns if col not in [target_col] and not col.startswith("target_")
353
+ ]
354
+
355
+ if len(feature_cols) > self.config.max_features:
356
+ try:
357
+ selected_df, selection_info = self.feature_selector.select_features(
358
+ df, target_col, self.config.feature_selection_methods
359
+ )
360
+
361
+ # Keep original non-feature columns
362
+ non_feature_cols = [col for col in df.columns if col not in feature_cols]
363
+ final_df = pd.concat(
364
+ [
365
+ df[non_feature_cols],
366
+ selected_df.drop(
367
+ columns=[target_col] if target_col in selected_df.columns else []
368
+ ),
369
+ ],
370
+ axis=1,
371
+ )
372
+
373
+ logger.info(
374
+ f"Selected {len(selected_df.columns)-1} features from {len(feature_cols)}"
375
+ )
376
+ return final_df
377
+ except Exception as e:
378
+ logger.warning(f"Feature selection failed: {e}")
379
+
380
+ # Remove synthetic target if we created it
381
+ if "synthetic_target" in df.columns:
382
+ df = df.drop(columns=["synthetic_target"])
383
+
384
+ return df
385
+
386
+ def _generate_stock_recommendation(
387
+ self, stock_data: pd.DataFrame, ticker: str
388
+ ) -> Optional[RecommendationResult]:
389
+ """Generate recommendation for a specific stock"""
390
+
391
+ try:
392
+ # Calculate component scores
393
+ technical_score = self._calculate_technical_score(stock_data)
394
+ political_score = self._calculate_political_score(stock_data)
395
+ regime_score = self._calculate_regime_score(stock_data)
396
+ ensemble_score = self._calculate_ensemble_score(stock_data)
397
+
398
+ # Combine scores using weights
399
+ weights = self.config.recommendation_weights
400
+ final_score = (
401
+ technical_score * weights.get("technical_score", 0.3)
402
+ + political_score * weights.get("political_influence_score", 0.25)
403
+ + regime_score * weights.get("market_regime_score", 0.2)
404
+ + ensemble_score * weights.get("ensemble_score", 0.15)
405
+ )
406
+
407
+ # Risk adjustment
408
+ risk_level = self._assess_risk_level(stock_data)
409
+ risk_multiplier = 1.0 - (
410
+ self.config.risk_adjustment_factor * self._risk_to_numeric(risk_level)
411
+ )
412
+ final_score *= risk_multiplier
413
+
414
+ # Calculate confidence
415
+ confidence = self._calculate_confidence(stock_data, final_score)
416
+
417
+ # Generate outlooks
418
+ short_outlook, medium_outlook, long_outlook = self._generate_outlooks(
419
+ stock_data, final_score
420
+ )
421
+
422
+ # Get key features and explanations
423
+ key_features, feature_importance = self._get_key_features(stock_data)
424
+ recommendation_reason = self._generate_explanation(
425
+ stock_data, final_score, key_features
426
+ )
427
+
428
+ # Generate warnings
429
+ warnings = self._generate_warnings(stock_data, final_score)
430
+
431
+ # Get company name
432
+ company_name = (
433
+ stock_data.get("asset_name_cleaned", {}).iloc[0] if len(stock_data) > 0 else ticker
434
+ )
435
+
436
+ return RecommendationResult(
437
+ ticker=ticker,
438
+ company_name=str(company_name),
439
+ recommendation_score=round(final_score, 3),
440
+ confidence=round(confidence, 3),
441
+ risk_level=risk_level,
442
+ technical_score=round(technical_score, 3),
443
+ political_influence_score=round(political_score, 3),
444
+ market_regime_score=round(regime_score, 3),
445
+ ensemble_score=round(ensemble_score, 3),
446
+ short_term_outlook=short_outlook,
447
+ medium_term_outlook=medium_outlook,
448
+ long_term_outlook=long_outlook,
449
+ key_features=key_features,
450
+ feature_importance=feature_importance,
451
+ recommendation_reason=recommendation_reason,
452
+ warnings=warnings,
453
+ generated_at=datetime.now(),
454
+ model_version="1.0.0",
455
+ )
456
+
457
+ except Exception as e:
458
+ logger.error(f"Failed to generate recommendation for {ticker}: {e}")
459
+ return None
460
+
461
+ def _calculate_technical_score(self, stock_data: pd.DataFrame) -> float:
462
+ """Calculate technical analysis score"""
463
+ try:
464
+ technical_indicators = []
465
+
466
+ # RSI score
467
+ if "rsi" in stock_data.columns:
468
+ rsi = stock_data["rsi"].mean()
469
+ if 30 <= rsi <= 70:
470
+ technical_indicators.append(0.7) # Neutral zone
471
+ elif rsi < 30:
472
+ technical_indicators.append(0.9) # Oversold - buy signal
473
+ else:
474
+ technical_indicators.append(0.3) # Overbought - sell signal
475
+
476
+ # MACD score
477
+ if "macd_bullish" in stock_data.columns:
478
+ macd_bullish = stock_data["macd_bullish"].mean()
479
+ technical_indicators.append(macd_bullish)
480
+
481
+ # Trend score
482
+ if "trend_strength_20" in stock_data.columns:
483
+ trend_strength = stock_data["trend_strength_20"].mean()
484
+ technical_indicators.append(max(0, min(1, trend_strength)))
485
+
486
+ # Volume score
487
+ if "volume_ratio_20" in stock_data.columns:
488
+ volume_ratio = stock_data["volume_ratio_20"].mean()
489
+ volume_score = min(1.0, max(0.0, volume_ratio / 2))
490
+ technical_indicators.append(volume_score)
491
+
492
+ return np.mean(technical_indicators) if technical_indicators else 0.5
493
+
494
+ except Exception as e:
495
+ logger.warning(f"Failed to calculate technical score: {e}")
496
+ return 0.5
497
+
498
+ def _calculate_political_score(self, stock_data: pd.DataFrame) -> float:
499
+ """Calculate political influence score"""
500
+ try:
501
+ political_factors = []
502
+
503
+ # Total influence
504
+ if "total_influence" in stock_data.columns:
505
+ influence = stock_data["total_influence"].mean()
506
+ political_factors.append(min(1.0, influence))
507
+
508
+ # Committee alignment
509
+ if "committee_sector_alignment" in stock_data.columns:
510
+ alignment = stock_data["committee_sector_alignment"].mean()
511
+ political_factors.append(alignment)
512
+
513
+ # Trading frequency score
514
+ if "trading_frequency_score" in stock_data.columns:
515
+ frequency = stock_data["trading_frequency_score"].mean()
516
+ political_factors.append(min(1.0, frequency))
517
+
518
+ # Policy relevance
519
+ if "policy_relevant_trade" in stock_data.columns:
520
+ policy_relevance = stock_data["policy_relevant_trade"].mean()
521
+ political_factors.append(policy_relevance)
522
+
523
+ return np.mean(political_factors) if political_factors else 0.5
524
+
525
+ except Exception as e:
526
+ logger.warning(f"Failed to calculate political score: {e}")
527
+ return 0.5
528
+
529
+ def _calculate_regime_score(self, stock_data: pd.DataFrame) -> float:
530
+ """Calculate market regime score"""
531
+ try:
532
+ regime_factors = []
533
+
534
+ # Volatility regime
535
+ if "vol_regime" in stock_data.columns:
536
+ vol_regime = (
537
+ stock_data["vol_regime"].mode().iloc[0] if len(stock_data) > 0 else "medium_vol"
538
+ )
539
+ vol_score = {"low_vol": 0.8, "medium_vol": 0.6, "high_vol": 0.4}.get(
540
+ vol_regime, 0.5
541
+ )
542
+ regime_factors.append(vol_score)
543
+
544
+ # Trend regime
545
+ if "trend_regime" in stock_data.columns:
546
+ trend_regime = (
547
+ stock_data["trend_regime"].mode().iloc[0] if len(stock_data) > 0 else "sideways"
548
+ )
549
+ trend_score = {"bullish": 0.9, "sideways": 0.5, "bearish": 0.2}.get(
550
+ trend_regime, 0.5
551
+ )
552
+ regime_factors.append(trend_score)
553
+
554
+ # Volume regime
555
+ if "volume_regime" in stock_data.columns:
556
+ volume_regime = (
557
+ stock_data["volume_regime"].mode().iloc[0]
558
+ if len(stock_data) > 0
559
+ else "normal_volume"
560
+ )
561
+ volume_score = {"high_volume": 0.7, "normal_volume": 0.6, "low_volume": 0.4}.get(
562
+ volume_regime, 0.5
563
+ )
564
+ regime_factors.append(volume_score)
565
+
566
+ return np.mean(regime_factors) if regime_factors else 0.5
567
+
568
+ except Exception as e:
569
+ logger.warning(f"Failed to calculate regime score: {e}")
570
+ return 0.5
571
+
572
+ def _calculate_ensemble_score(self, stock_data: pd.DataFrame) -> float:
573
+ """Calculate ensemble model score"""
574
+ try:
575
+ # Use cluster-based scoring as proxy for ensemble
576
+ ensemble_factors = []
577
+
578
+ if "cluster_distance" in stock_data.columns:
579
+ # Lower distance = more typical pattern = higher score
580
+ distance = stock_data["cluster_distance"].mean()
581
+ normalized_distance = min(1.0, distance / 10) # Normalize
582
+ score = 1.0 - normalized_distance
583
+ ensemble_factors.append(score)
584
+
585
+ # Use polynomial features if available
586
+ poly_cols = [col for col in stock_data.columns if col.startswith("poly_")]
587
+ if poly_cols:
588
+ poly_score = abs(stock_data[poly_cols].mean().mean())
589
+ ensemble_factors.append(min(1.0, poly_score))
590
+
591
+ return np.mean(ensemble_factors) if ensemble_factors else 0.5
592
+
593
+ except Exception as e:
594
+ logger.warning(f"Failed to calculate ensemble score: {e}")
595
+ return 0.5
596
+
597
+ def _assess_risk_level(self, stock_data: pd.DataFrame) -> str:
598
+ """Assess risk level for the stock"""
599
+ try:
600
+ risk_factors = []
601
+
602
+ # Volatility risk
603
+ if "volatility_20" in stock_data.columns:
604
+ volatility = stock_data["volatility_20"].mean()
605
+ risk_factors.append(min(1.0, volatility * 10))
606
+
607
+ # Trading concentration risk
608
+ if "total_influence" in stock_data.columns:
609
+ influence = stock_data["total_influence"].mean()
610
+ risk_factors.append(min(1.0, influence))
611
+
612
+ # Policy risk
613
+ if "policy_relevant_trade" in stock_data.columns:
614
+ policy_exposure = stock_data["policy_relevant_trade"].mean()
615
+ risk_factors.append(policy_exposure)
616
+
617
+ avg_risk = np.mean(risk_factors) if risk_factors else 0.5
618
+
619
+ if avg_risk < 0.3:
620
+ return "low"
621
+ elif avg_risk < 0.7:
622
+ return "medium"
623
+ else:
624
+ return "high"
625
+
626
+ except Exception as e:
627
+ logger.warning(f"Failed to assess risk level: {e}")
628
+ return "medium"
629
+
630
+ def _risk_to_numeric(self, risk_level: str) -> float:
631
+ """Convert risk level to numeric value"""
632
+ return {"low": 0.2, "medium": 0.5, "high": 0.8}.get(risk_level, 0.5)
633
+
634
+ def _calculate_confidence(self, stock_data: pd.DataFrame, final_score: float) -> float:
635
+ """Calculate confidence in the recommendation"""
636
+ try:
637
+ confidence_factors = []
638
+
639
+ # Data completeness
640
+ non_null_ratio = stock_data.notna().mean().mean()
641
+ confidence_factors.append(non_null_ratio)
642
+
643
+ # Number of data points
644
+ data_points_factor = min(1.0, len(stock_data) / 10)
645
+ confidence_factors.append(data_points_factor)
646
+
647
+ # Score consistency (how far from neutral)
648
+ score_confidence = abs(final_score - 0.5) * 2
649
+ confidence_factors.append(score_confidence)
650
+
651
+ return np.mean(confidence_factors)
652
+
653
+ except Exception as e:
654
+ logger.warning(f"Failed to calculate confidence: {e}")
655
+ return 0.5
656
+
657
+ def _generate_outlooks(
658
+ self, stock_data: pd.DataFrame, final_score: float
659
+ ) -> Tuple[str, str, str]:
660
+ """Generate short, medium, and long-term outlooks"""
661
+
662
+ def score_to_outlook(score):
663
+ if score >= 0.7:
664
+ return "bullish"
665
+ elif score >= 0.3:
666
+ return "neutral"
667
+ else:
668
+ return "bearish"
669
+
670
+ # Base outlook on final score with some variation
671
+ short_term = score_to_outlook(final_score + np.random.normal(0, 0.1))
672
+ medium_term = score_to_outlook(final_score + np.random.normal(0, 0.05))
673
+ long_term = score_to_outlook(final_score)
674
+
675
+ return short_term, medium_term, long_term
676
+
677
+ def _get_key_features(self, stock_data: pd.DataFrame) -> Tuple[List[str], Dict[str, float]]:
678
+ """Get key features and their importance"""
679
+ try:
680
+ # Get numerical features
681
+ numerical_features = [
682
+ col
683
+ for col in stock_data.columns
684
+ if stock_data[col].dtype in ["int64", "float64"]
685
+ and not col.startswith("target_")
686
+ and stock_data[col].notna().sum() > 0
687
+ ]
688
+
689
+ # Calculate feature importance based on variance and mean values
690
+ feature_importance = {}
691
+ for feature in numerical_features[:10]: # Top 10 features
692
+ try:
693
+ value = abs(stock_data[feature].mean())
694
+ variance = stock_data[feature].var()
695
+ importance = value * (1 + variance)
696
+ feature_importance[feature] = importance
697
+ except:
698
+ feature_importance[feature] = 0
699
+
700
+ # Sort by importance
701
+ sorted_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)
702
+ key_features = [f[0] for f in sorted_features[:5]]
703
+
704
+ # Normalize importance scores
705
+ max_importance = max(feature_importance.values()) if feature_importance else 1
706
+ normalized_importance = {k: v / max_importance for k, v in feature_importance.items()}
707
+
708
+ return key_features, normalized_importance
709
+
710
+ except Exception as e:
711
+ logger.warning(f"Failed to get key features: {e}")
712
+ return [], {}
713
+
714
+ def _generate_explanation(
715
+ self, stock_data: pd.DataFrame, final_score: float, key_features: List[str]
716
+ ) -> str:
717
+ """Generate human-readable explanation for the recommendation"""
718
+
719
+ try:
720
+ if final_score >= 0.7:
721
+ base_sentiment = "Strong buy signal"
722
+ elif final_score >= 0.3:
723
+ base_sentiment = "Neutral outlook"
724
+ else:
725
+ base_sentiment = "Caution advised"
726
+
727
+ # Add key drivers
728
+ drivers = []
729
+ if "total_influence" in key_features:
730
+ drivers.append("high political influence")
731
+ if "rsi" in key_features:
732
+ drivers.append("favorable technical indicators")
733
+ if "committee_sector_alignment" in key_features:
734
+ drivers.append("strong committee-sector alignment")
735
+
736
+ if drivers:
737
+ explanation = f"{base_sentiment} driven by {', '.join(drivers[:2])}."
738
+ else:
739
+ explanation = f"{base_sentiment} based on overall analysis."
740
+
741
+ return explanation
742
+
743
+ except Exception as e:
744
+ logger.warning(f"Failed to generate explanation: {e}")
745
+ return "Recommendation based on comprehensive analysis."
746
+
747
+ def _generate_warnings(self, stock_data: pd.DataFrame, final_score: float) -> List[str]:
748
+ """Generate warnings for the recommendation"""
749
+
750
+ warnings = []
751
+
752
+ try:
753
+ # Data quality warnings
754
+ if len(stock_data) < 5:
755
+ warnings.append("Limited data points available for analysis")
756
+
757
+ # High risk warnings
758
+ if "volatility_20" in stock_data.columns:
759
+ avg_volatility = stock_data["volatility_20"].mean()
760
+ if avg_volatility > 0.3:
761
+ warnings.append("High volatility detected")
762
+
763
+ # Policy risk warnings
764
+ if "policy_relevant_trade" in stock_data.columns:
765
+ policy_exposure = stock_data["policy_relevant_trade"].mean()
766
+ if policy_exposure > 0.8:
767
+ warnings.append("High exposure to policy changes")
768
+
769
+ # Confidence warnings
770
+ if len(stock_data) == 1:
771
+ warnings.append("Single data point - recommendation may be unreliable")
772
+
773
+ except Exception as e:
774
+ logger.warning(f"Failed to generate warnings: {e}")
775
+
776
+ return warnings
777
+
778
+ def save_model_artifacts(self, artifacts_dir: Path):
779
+ """Save model artifacts and configurations"""
780
+ artifacts_dir.mkdir(parents=True, exist_ok=True)
781
+
782
+ # Save configuration
783
+ config_path = artifacts_dir / "recommendation_config.joblib"
784
+ joblib.dump(self.config, config_path)
785
+
786
+ # Save feature importance cache
787
+ if self.feature_importance_cache:
788
+ importance_path = artifacts_dir / "feature_importance.joblib"
789
+ joblib.dump(self.feature_importance_cache, importance_path)
790
+
791
+ logger.info(f"Saved model artifacts to {artifacts_dir}")
792
+
793
+ def load_model_artifacts(self, artifacts_dir: Path):
794
+ """Load model artifacts and configurations"""
795
+ try:
796
+ # Load configuration
797
+ config_path = artifacts_dir / "recommendation_config.joblib"
798
+ if config_path.exists():
799
+ self.config = joblib.load(config_path)
800
+
801
+ # Load feature importance cache
802
+ importance_path = artifacts_dir / "feature_importance.joblib"
803
+ if importance_path.exists():
804
+ self.feature_importance_cache = joblib.load(importance_path)
805
+
806
+ logger.info(f"Loaded model artifacts from {artifacts_dir}")
807
+
808
+ except Exception as e:
809
+ logger.warning(f"Failed to load model artifacts: {e}")