mcli-framework 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (186) hide show
  1. mcli/app/chat_cmd.py +42 -0
  2. mcli/app/commands_cmd.py +226 -0
  3. mcli/app/completion_cmd.py +216 -0
  4. mcli/app/completion_helpers.py +288 -0
  5. mcli/app/cron_test_cmd.py +697 -0
  6. mcli/app/logs_cmd.py +419 -0
  7. mcli/app/main.py +492 -0
  8. mcli/app/model/model.py +1060 -0
  9. mcli/app/model_cmd.py +227 -0
  10. mcli/app/redis_cmd.py +269 -0
  11. mcli/app/video/video.py +1114 -0
  12. mcli/app/visual_cmd.py +303 -0
  13. mcli/chat/chat.py +2409 -0
  14. mcli/chat/command_rag.py +514 -0
  15. mcli/chat/enhanced_chat.py +652 -0
  16. mcli/chat/system_controller.py +1010 -0
  17. mcli/chat/system_integration.py +1016 -0
  18. mcli/cli.py +25 -0
  19. mcli/config.toml +20 -0
  20. mcli/lib/api/api.py +586 -0
  21. mcli/lib/api/daemon_client.py +203 -0
  22. mcli/lib/api/daemon_client_local.py +44 -0
  23. mcli/lib/api/daemon_decorator.py +217 -0
  24. mcli/lib/api/mcli_decorators.py +1032 -0
  25. mcli/lib/auth/auth.py +85 -0
  26. mcli/lib/auth/aws_manager.py +85 -0
  27. mcli/lib/auth/azure_manager.py +91 -0
  28. mcli/lib/auth/credential_manager.py +192 -0
  29. mcli/lib/auth/gcp_manager.py +93 -0
  30. mcli/lib/auth/key_manager.py +117 -0
  31. mcli/lib/auth/mcli_manager.py +93 -0
  32. mcli/lib/auth/token_manager.py +75 -0
  33. mcli/lib/auth/token_util.py +1011 -0
  34. mcli/lib/config/config.py +47 -0
  35. mcli/lib/discovery/__init__.py +1 -0
  36. mcli/lib/discovery/command_discovery.py +274 -0
  37. mcli/lib/erd/erd.py +1345 -0
  38. mcli/lib/erd/generate_graph.py +453 -0
  39. mcli/lib/files/files.py +76 -0
  40. mcli/lib/fs/fs.py +109 -0
  41. mcli/lib/lib.py +29 -0
  42. mcli/lib/logger/logger.py +611 -0
  43. mcli/lib/performance/optimizer.py +409 -0
  44. mcli/lib/performance/rust_bridge.py +502 -0
  45. mcli/lib/performance/uvloop_config.py +154 -0
  46. mcli/lib/pickles/pickles.py +50 -0
  47. mcli/lib/search/cached_vectorizer.py +479 -0
  48. mcli/lib/services/data_pipeline.py +460 -0
  49. mcli/lib/services/lsh_client.py +441 -0
  50. mcli/lib/services/redis_service.py +387 -0
  51. mcli/lib/shell/shell.py +137 -0
  52. mcli/lib/toml/toml.py +33 -0
  53. mcli/lib/ui/styling.py +47 -0
  54. mcli/lib/ui/visual_effects.py +634 -0
  55. mcli/lib/watcher/watcher.py +185 -0
  56. mcli/ml/api/app.py +215 -0
  57. mcli/ml/api/middleware.py +224 -0
  58. mcli/ml/api/routers/admin_router.py +12 -0
  59. mcli/ml/api/routers/auth_router.py +244 -0
  60. mcli/ml/api/routers/backtest_router.py +12 -0
  61. mcli/ml/api/routers/data_router.py +12 -0
  62. mcli/ml/api/routers/model_router.py +302 -0
  63. mcli/ml/api/routers/monitoring_router.py +12 -0
  64. mcli/ml/api/routers/portfolio_router.py +12 -0
  65. mcli/ml/api/routers/prediction_router.py +267 -0
  66. mcli/ml/api/routers/trade_router.py +12 -0
  67. mcli/ml/api/routers/websocket_router.py +76 -0
  68. mcli/ml/api/schemas.py +64 -0
  69. mcli/ml/auth/auth_manager.py +425 -0
  70. mcli/ml/auth/models.py +154 -0
  71. mcli/ml/auth/permissions.py +302 -0
  72. mcli/ml/backtesting/backtest_engine.py +502 -0
  73. mcli/ml/backtesting/performance_metrics.py +393 -0
  74. mcli/ml/cache.py +400 -0
  75. mcli/ml/cli/main.py +398 -0
  76. mcli/ml/config/settings.py +394 -0
  77. mcli/ml/configs/dvc_config.py +230 -0
  78. mcli/ml/configs/mlflow_config.py +131 -0
  79. mcli/ml/configs/mlops_manager.py +293 -0
  80. mcli/ml/dashboard/app.py +532 -0
  81. mcli/ml/dashboard/app_integrated.py +738 -0
  82. mcli/ml/dashboard/app_supabase.py +560 -0
  83. mcli/ml/dashboard/app_training.py +615 -0
  84. mcli/ml/dashboard/cli.py +51 -0
  85. mcli/ml/data_ingestion/api_connectors.py +501 -0
  86. mcli/ml/data_ingestion/data_pipeline.py +567 -0
  87. mcli/ml/data_ingestion/stream_processor.py +512 -0
  88. mcli/ml/database/migrations/env.py +94 -0
  89. mcli/ml/database/models.py +667 -0
  90. mcli/ml/database/session.py +200 -0
  91. mcli/ml/experimentation/ab_testing.py +845 -0
  92. mcli/ml/features/ensemble_features.py +607 -0
  93. mcli/ml/features/political_features.py +676 -0
  94. mcli/ml/features/recommendation_engine.py +809 -0
  95. mcli/ml/features/stock_features.py +573 -0
  96. mcli/ml/features/test_feature_engineering.py +346 -0
  97. mcli/ml/logging.py +85 -0
  98. mcli/ml/mlops/data_versioning.py +518 -0
  99. mcli/ml/mlops/experiment_tracker.py +377 -0
  100. mcli/ml/mlops/model_serving.py +481 -0
  101. mcli/ml/mlops/pipeline_orchestrator.py +614 -0
  102. mcli/ml/models/base_models.py +324 -0
  103. mcli/ml/models/ensemble_models.py +675 -0
  104. mcli/ml/models/recommendation_models.py +474 -0
  105. mcli/ml/models/test_models.py +487 -0
  106. mcli/ml/monitoring/drift_detection.py +676 -0
  107. mcli/ml/monitoring/metrics.py +45 -0
  108. mcli/ml/optimization/portfolio_optimizer.py +834 -0
  109. mcli/ml/preprocessing/data_cleaners.py +451 -0
  110. mcli/ml/preprocessing/feature_extractors.py +491 -0
  111. mcli/ml/preprocessing/ml_pipeline.py +382 -0
  112. mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
  113. mcli/ml/preprocessing/test_preprocessing.py +294 -0
  114. mcli/ml/scripts/populate_sample_data.py +200 -0
  115. mcli/ml/tasks.py +400 -0
  116. mcli/ml/tests/test_integration.py +429 -0
  117. mcli/ml/tests/test_training_dashboard.py +387 -0
  118. mcli/public/oi/oi.py +15 -0
  119. mcli/public/public.py +4 -0
  120. mcli/self/self_cmd.py +1246 -0
  121. mcli/workflow/daemon/api_daemon.py +800 -0
  122. mcli/workflow/daemon/async_command_database.py +681 -0
  123. mcli/workflow/daemon/async_process_manager.py +591 -0
  124. mcli/workflow/daemon/client.py +530 -0
  125. mcli/workflow/daemon/commands.py +1196 -0
  126. mcli/workflow/daemon/daemon.py +905 -0
  127. mcli/workflow/daemon/daemon_api.py +59 -0
  128. mcli/workflow/daemon/enhanced_daemon.py +571 -0
  129. mcli/workflow/daemon/process_cli.py +244 -0
  130. mcli/workflow/daemon/process_manager.py +439 -0
  131. mcli/workflow/daemon/test_daemon.py +275 -0
  132. mcli/workflow/dashboard/dashboard_cmd.py +113 -0
  133. mcli/workflow/docker/docker.py +0 -0
  134. mcli/workflow/file/file.py +100 -0
  135. mcli/workflow/gcloud/config.toml +21 -0
  136. mcli/workflow/gcloud/gcloud.py +58 -0
  137. mcli/workflow/git_commit/ai_service.py +328 -0
  138. mcli/workflow/git_commit/commands.py +430 -0
  139. mcli/workflow/lsh_integration.py +355 -0
  140. mcli/workflow/model_service/client.py +594 -0
  141. mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
  142. mcli/workflow/model_service/lightweight_embedder.py +397 -0
  143. mcli/workflow/model_service/lightweight_model_server.py +714 -0
  144. mcli/workflow/model_service/lightweight_test.py +241 -0
  145. mcli/workflow/model_service/model_service.py +1955 -0
  146. mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
  147. mcli/workflow/model_service/pdf_processor.py +386 -0
  148. mcli/workflow/model_service/test_efficient_runner.py +234 -0
  149. mcli/workflow/model_service/test_example.py +315 -0
  150. mcli/workflow/model_service/test_integration.py +131 -0
  151. mcli/workflow/model_service/test_new_features.py +149 -0
  152. mcli/workflow/openai/openai.py +99 -0
  153. mcli/workflow/politician_trading/commands.py +1790 -0
  154. mcli/workflow/politician_trading/config.py +134 -0
  155. mcli/workflow/politician_trading/connectivity.py +490 -0
  156. mcli/workflow/politician_trading/data_sources.py +395 -0
  157. mcli/workflow/politician_trading/database.py +410 -0
  158. mcli/workflow/politician_trading/demo.py +248 -0
  159. mcli/workflow/politician_trading/models.py +165 -0
  160. mcli/workflow/politician_trading/monitoring.py +413 -0
  161. mcli/workflow/politician_trading/scrapers.py +966 -0
  162. mcli/workflow/politician_trading/scrapers_california.py +412 -0
  163. mcli/workflow/politician_trading/scrapers_eu.py +377 -0
  164. mcli/workflow/politician_trading/scrapers_uk.py +350 -0
  165. mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
  166. mcli/workflow/politician_trading/supabase_functions.py +354 -0
  167. mcli/workflow/politician_trading/workflow.py +852 -0
  168. mcli/workflow/registry/registry.py +180 -0
  169. mcli/workflow/repo/repo.py +223 -0
  170. mcli/workflow/scheduler/commands.py +493 -0
  171. mcli/workflow/scheduler/cron_parser.py +238 -0
  172. mcli/workflow/scheduler/job.py +182 -0
  173. mcli/workflow/scheduler/monitor.py +139 -0
  174. mcli/workflow/scheduler/persistence.py +324 -0
  175. mcli/workflow/scheduler/scheduler.py +679 -0
  176. mcli/workflow/sync/sync_cmd.py +437 -0
  177. mcli/workflow/sync/test_cmd.py +314 -0
  178. mcli/workflow/videos/videos.py +242 -0
  179. mcli/workflow/wakatime/wakatime.py +11 -0
  180. mcli/workflow/workflow.py +37 -0
  181. mcli_framework-7.0.0.dist-info/METADATA +479 -0
  182. mcli_framework-7.0.0.dist-info/RECORD +186 -0
  183. mcli_framework-7.0.0.dist-info/WHEEL +5 -0
  184. mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
  185. mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
  186. mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,573 @@
1
+ """Stock-specific feature engineering for recommendation models"""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from datetime import datetime, timedelta
6
+ from typing import Any, Dict, List, Optional, Tuple, Union
7
+ from dataclasses import dataclass
8
+ import logging
9
+ from collections import defaultdict
10
+ import warnings
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class StockFeatureConfig:
17
+ """Configuration for stock feature extraction"""
18
+
19
+ # Technical indicator periods
20
+ sma_periods: List[int] = None
21
+ ema_periods: List[int] = None
22
+ rsi_period: int = 14
23
+ bollinger_period: int = 20
24
+ bollinger_std: float = 2.0
25
+
26
+ # Volatility features
27
+ volatility_windows: List[int] = None
28
+ return_windows: List[int] = None
29
+
30
+ # Volume features
31
+ volume_ma_periods: List[int] = None
32
+ enable_volume_profile: bool = True
33
+
34
+ # Market regime detection
35
+ regime_lookback: int = 252 # 1 year
36
+ volatility_threshold: float = 0.02
37
+
38
+ def __post_init__(self):
39
+ if self.sma_periods is None:
40
+ self.sma_periods = [5, 10, 20, 50, 200]
41
+ if self.ema_periods is None:
42
+ self.ema_periods = [12, 26, 50]
43
+ if self.volatility_windows is None:
44
+ self.volatility_windows = [5, 10, 20, 60]
45
+ if self.return_windows is None:
46
+ self.return_windows = [1, 5, 10, 20, 60]
47
+ if self.volume_ma_periods is None:
48
+ self.volume_ma_periods = [10, 20, 50]
49
+
50
+
51
+ class StockRecommendationFeatures:
52
+ """Core stock recommendation feature extractor"""
53
+
54
+ def __init__(self, config: Optional[StockFeatureConfig] = None):
55
+ self.config = config or StockFeatureConfig()
56
+
57
+ def extract_features(self, stock_data: pd.DataFrame) -> pd.DataFrame:
58
+ """Extract stock recommendation features"""
59
+ logger.info("Extracting stock recommendation features")
60
+
61
+ df = stock_data.copy()
62
+
63
+ # Ensure required columns exist
64
+ required_cols = ["open", "high", "low", "close", "volume"]
65
+ missing_cols = [col for col in required_cols if col not in df.columns]
66
+
67
+ if missing_cols:
68
+ logger.warning(f"Missing required columns: {missing_cols}")
69
+ # Create synthetic data for missing columns if needed
70
+ for col in missing_cols:
71
+ if col == "volume":
72
+ df[col] = 1000000 # Default volume
73
+ else:
74
+ df[col] = df.get("close", 100.0) # Use close or default price
75
+
76
+ # Sort by date to ensure chronological order
77
+ if "date" in df.columns:
78
+ df = df.sort_values("date").reset_index(drop=True)
79
+
80
+ # Extract all feature categories
81
+ df = self._extract_price_features(df)
82
+ df = self._extract_volume_features(df)
83
+ df = self._extract_volatility_features(df)
84
+ df = self._extract_momentum_features(df)
85
+ df = self._extract_trend_features(df)
86
+
87
+ logger.info(f"Extracted {len(df.columns)} total features")
88
+ return df
89
+
90
+ def _extract_price_features(self, df: pd.DataFrame) -> pd.DataFrame:
91
+ """Extract price-based features"""
92
+ # Basic price relationships
93
+ df["hl_ratio"] = (df["high"] - df["low"]) / df["close"]
94
+ df["oc_ratio"] = (df["open"] - df["close"]) / df["close"]
95
+ df["price_range"] = (df["high"] - df["low"]) / df["low"]
96
+
97
+ # Price gaps
98
+ df["gap_up"] = (df["open"] > df["close"].shift(1)).astype(int)
99
+ df["gap_down"] = (df["open"] < df["close"].shift(1)).astype(int)
100
+ df["gap_size"] = (df["open"] - df["close"].shift(1)) / df["close"].shift(1)
101
+
102
+ # Simple moving averages
103
+ for period in self.config.sma_periods:
104
+ df[f"sma_{period}"] = df["close"].rolling(window=period).mean()
105
+ df[f"price_to_sma_{period}"] = df["close"] / df[f"sma_{period}"]
106
+ df[f"sma_{period}_slope"] = (df[f"sma_{period}"] - df[f"sma_{period}"].shift(5)) / df[
107
+ f"sma_{period}"
108
+ ].shift(5)
109
+
110
+ # Exponential moving averages
111
+ for period in self.config.ema_periods:
112
+ df[f"ema_{period}"] = df["close"].ewm(span=period).mean()
113
+ df[f"price_to_ema_{period}"] = df["close"] / df[f"ema_{period}"]
114
+
115
+ # Moving average crossovers
116
+ if len(self.config.sma_periods) >= 2:
117
+ short_ma = self.config.sma_periods[0]
118
+ long_ma = self.config.sma_periods[-1]
119
+ df["ma_crossover"] = (df[f"sma_{short_ma}"] > df[f"sma_{long_ma}"]).astype(int)
120
+ df["ma_distance"] = (df[f"sma_{short_ma}"] - df[f"sma_{long_ma}"]) / df[
121
+ f"sma_{long_ma}"
122
+ ]
123
+
124
+ return df
125
+
126
+ def _extract_volume_features(self, df: pd.DataFrame) -> pd.DataFrame:
127
+ """Extract volume-based features"""
128
+ # Volume moving averages
129
+ for period in self.config.volume_ma_periods:
130
+ df[f"volume_ma_{period}"] = df["volume"].rolling(window=period).mean()
131
+ df[f"volume_ratio_{period}"] = df["volume"] / df[f"volume_ma_{period}"]
132
+
133
+ # Volume price trend
134
+ df["volume_price_trend"] = (
135
+ ((df["close"] - df["close"].shift(1)) * df["volume"]).rolling(window=10).sum()
136
+ )
137
+
138
+ # On-balance volume
139
+ df["price_change"] = df["close"] - df["close"].shift(1)
140
+ df["obv_flow"] = np.where(
141
+ df["price_change"] > 0,
142
+ df["volume"],
143
+ np.where(df["price_change"] < 0, -df["volume"], 0),
144
+ )
145
+ df["obv"] = df["obv_flow"].cumsum()
146
+
147
+ # Volume accumulation
148
+ df["accumulation"] = (
149
+ np.where(df["close"] > (df["high"] + df["low"]) / 2, df["volume"], -df["volume"])
150
+ .rolling(window=20)
151
+ .sum()
152
+ )
153
+
154
+ # Volume spikes
155
+ df["volume_spike"] = (df["volume"] > df["volume"].rolling(window=20).mean() * 2).astype(int)
156
+
157
+ return df
158
+
159
+ def _extract_volatility_features(self, df: pd.DataFrame) -> pd.DataFrame:
160
+ """Extract volatility-based features"""
161
+ # Calculate returns
162
+ df["returns"] = df["close"].pct_change()
163
+ df["log_returns"] = np.log(df["close"] / df["close"].shift(1))
164
+
165
+ # Rolling volatility
166
+ for window in self.config.volatility_windows:
167
+ df[f"volatility_{window}"] = df["returns"].rolling(window=window).std() * np.sqrt(252)
168
+ df[f"volatility_{window}_rank"] = (
169
+ df[f"volatility_{window}"].rolling(window=60).rank(pct=True)
170
+ )
171
+
172
+ # True Range and Average True Range
173
+ df["true_range"] = np.maximum(
174
+ np.maximum(
175
+ df["high"] - df["low"],
176
+ abs(df["high"] - df["close"].shift(1)),
177
+ ),
178
+ abs(df["low"] - df["close"].shift(1)),
179
+ )
180
+ df["atr_14"] = df["true_range"].rolling(window=14).mean()
181
+ df["atr_ratio"] = df["true_range"] / df["atr_14"]
182
+
183
+ # Bollinger Bands
184
+ sma_bb = df["close"].rolling(window=self.config.bollinger_period).mean()
185
+ bb_std = df["close"].rolling(window=self.config.bollinger_period).std()
186
+ df["bb_upper"] = sma_bb + (bb_std * self.config.bollinger_std)
187
+ df["bb_lower"] = sma_bb - (bb_std * self.config.bollinger_std)
188
+ df["bb_position"] = (df["close"] - df["bb_lower"]) / (df["bb_upper"] - df["bb_lower"])
189
+ df["bb_squeeze"] = (df["bb_upper"] - df["bb_lower"]) / sma_bb
190
+
191
+ # Volatility regime
192
+ rolling_vol = df["returns"].rolling(window=20).std()
193
+ vol_threshold = rolling_vol.rolling(window=60).quantile(0.7)
194
+ df["high_vol_regime"] = (rolling_vol > vol_threshold).astype(int)
195
+
196
+ return df
197
+
198
+ def _extract_momentum_features(self, df: pd.DataFrame) -> pd.DataFrame:
199
+ """Extract momentum-based features"""
200
+ # RSI (Relative Strength Index)
201
+ delta = df["close"].diff()
202
+ gain = delta.where(delta > 0, 0).rolling(window=self.config.rsi_period).mean()
203
+ loss = (-delta.where(delta < 0, 0)).rolling(window=self.config.rsi_period).mean()
204
+ rs = gain / loss
205
+ df["rsi"] = 100 - (100 / (1 + rs))
206
+ df["rsi_overbought"] = (df["rsi"] > 70).astype(int)
207
+ df["rsi_oversold"] = (df["rsi"] < 30).astype(int)
208
+
209
+ # MACD
210
+ ema_12 = df["close"].ewm(span=12).mean()
211
+ ema_26 = df["close"].ewm(span=26).mean()
212
+ df["macd"] = ema_12 - ema_26
213
+ df["macd_signal"] = df["macd"].ewm(span=9).mean()
214
+ df["macd_histogram"] = df["macd"] - df["macd_signal"]
215
+ df["macd_bullish"] = (df["macd"] > df["macd_signal"]).astype(int)
216
+
217
+ # Stochastic oscillator
218
+ lowest_low = df["low"].rolling(window=14).min()
219
+ highest_high = df["high"].rolling(window=14).max()
220
+ df["stoch_k"] = 100 * (df["close"] - lowest_low) / (highest_high - lowest_low)
221
+ df["stoch_d"] = df["stoch_k"].rolling(window=3).mean()
222
+
223
+ # Rate of Change
224
+ for period in [5, 10, 20]:
225
+ df[f"roc_{period}"] = (
226
+ (df["close"] - df["close"].shift(period)) / df["close"].shift(period)
227
+ ) * 100
228
+
229
+ # Momentum
230
+ for period in [5, 10, 20]:
231
+ df[f"momentum_{period}"] = df["close"] / df["close"].shift(period)
232
+
233
+ return df
234
+
235
+ def _extract_trend_features(self, df: pd.DataFrame) -> pd.DataFrame:
236
+ """Extract trend-based features"""
237
+ # Trend strength
238
+ for window in [10, 20, 50]:
239
+ # Linear regression slope
240
+ df[f"trend_slope_{window}"] = (
241
+ df["close"]
242
+ .rolling(window=window)
243
+ .apply(lambda x: np.polyfit(range(len(x)), x, 1)[0], raw=False)
244
+ )
245
+
246
+ # R-squared of trend
247
+ def calculate_r_squared(prices):
248
+ if len(prices) < 2:
249
+ return 0
250
+ x = np.arange(len(prices))
251
+ try:
252
+ slope, intercept = np.polyfit(x, prices, 1)
253
+ predicted = slope * x + intercept
254
+ ss_res = np.sum((prices - predicted) ** 2)
255
+ ss_tot = np.sum((prices - np.mean(prices)) ** 2)
256
+ return 1 - (ss_res / ss_tot) if ss_tot != 0 else 0
257
+ except:
258
+ return 0
259
+
260
+ df[f"trend_strength_{window}"] = (
261
+ df["close"].rolling(window=window).apply(calculate_r_squared, raw=False)
262
+ )
263
+
264
+ # Support and resistance levels
265
+ df["support_level"] = df["low"].rolling(window=20).min()
266
+ df["resistance_level"] = df["high"].rolling(window=20).max()
267
+ df["support_distance"] = (df["close"] - df["support_level"]) / df["close"]
268
+ df["resistance_distance"] = (df["resistance_level"] - df["close"]) / df["close"]
269
+
270
+ # Price position within recent range
271
+ df["range_position"] = (df["close"] - df["support_level"]) / (
272
+ df["resistance_level"] - df["support_level"]
273
+ )
274
+
275
+ # Higher highs and lower lows
276
+ df["higher_high"] = (df["high"] > df["high"].shift(1)).astype(int)
277
+ df["lower_low"] = (df["low"] < df["low"].shift(1)).astype(int)
278
+ df["higher_high_count"] = df["higher_high"].rolling(window=10).sum()
279
+ df["lower_low_count"] = df["lower_low"].rolling(window=10).sum()
280
+
281
+ return df
282
+
283
+
284
+ class TechnicalIndicatorFeatures:
285
+ """Advanced technical indicator features"""
286
+
287
+ def __init__(self, config: Optional[StockFeatureConfig] = None):
288
+ self.config = config or StockFeatureConfig()
289
+
290
+ def extract_advanced_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
291
+ """Extract advanced technical indicators"""
292
+ df = df.copy()
293
+
294
+ # Williams %R
295
+ df["williams_r"] = self._williams_r(df)
296
+
297
+ # Commodity Channel Index (CCI)
298
+ df["cci"] = self._commodity_channel_index(df)
299
+
300
+ # Money Flow Index (MFI)
301
+ df["mfi"] = self._money_flow_index(df)
302
+
303
+ # Aroon indicator
304
+ df["aroon_up"], df["aroon_down"] = self._aroon_indicator(df)
305
+ df["aroon_oscillator"] = df["aroon_up"] - df["aroon_down"]
306
+
307
+ # Parabolic SAR
308
+ df["psar"] = self._parabolic_sar(df)
309
+ df["psar_bullish"] = (df["close"] > df["psar"]).astype(int)
310
+
311
+ # Ichimoku Cloud components
312
+ df = self._ichimoku_cloud(df)
313
+
314
+ return df
315
+
316
+ def _williams_r(self, df: pd.DataFrame, period: int = 14) -> pd.Series:
317
+ """Williams %R oscillator"""
318
+ highest_high = df["high"].rolling(window=period).max()
319
+ lowest_low = df["low"].rolling(window=period).min()
320
+ return -100 * (highest_high - df["close"]) / (highest_high - lowest_low)
321
+
322
+ def _commodity_channel_index(self, df: pd.DataFrame, period: int = 20) -> pd.Series:
323
+ """Commodity Channel Index"""
324
+ typical_price = (df["high"] + df["low"] + df["close"]) / 3
325
+ sma_tp = typical_price.rolling(window=period).mean()
326
+ mad = typical_price.rolling(window=period).apply(
327
+ lambda x: np.mean(np.abs(x - x.mean())), raw=False
328
+ )
329
+ return (typical_price - sma_tp) / (0.015 * mad)
330
+
331
+ def _money_flow_index(self, df: pd.DataFrame, period: int = 14) -> pd.Series:
332
+ """Money Flow Index"""
333
+ typical_price = (df["high"] + df["low"] + df["close"]) / 3
334
+ money_flow = typical_price * df["volume"]
335
+
336
+ positive_flow = money_flow.where(typical_price > typical_price.shift(1), 0)
337
+ negative_flow = money_flow.where(typical_price < typical_price.shift(1), 0)
338
+
339
+ positive_mf = positive_flow.rolling(window=period).sum()
340
+ negative_mf = negative_flow.rolling(window=period).sum()
341
+
342
+ mfi = 100 - (100 / (1 + positive_mf / negative_mf))
343
+ return mfi
344
+
345
+ def _aroon_indicator(self, df: pd.DataFrame, period: int = 25) -> Tuple[pd.Series, pd.Series]:
346
+ """Aroon Up and Aroon Down indicators"""
347
+ aroon_up = (
348
+ 100 * (period - df["high"].rolling(window=period).apply(np.argmax, raw=False)) / period
349
+ )
350
+ aroon_down = (
351
+ 100 * (period - df["low"].rolling(window=period).apply(np.argmin, raw=False)) / period
352
+ )
353
+ return aroon_up, aroon_down
354
+
355
+ def _parabolic_sar(self, df: pd.DataFrame) -> pd.Series:
356
+ """Parabolic SAR indicator (simplified version)"""
357
+ # Simplified PSAR implementation
358
+ high = df["high"].values
359
+ low = df["low"].values
360
+ close = df["close"].values
361
+
362
+ psar = np.zeros(len(df))
363
+ if len(df) > 0:
364
+ psar[0] = low[0]
365
+
366
+ for i in range(1, len(df)):
367
+ # Very simplified version - just use previous close as approximation
368
+ psar[i] = close[i - 1]
369
+
370
+ return pd.Series(psar, index=df.index)
371
+
372
+ def _ichimoku_cloud(self, df: pd.DataFrame) -> pd.DataFrame:
373
+ """Ichimoku Cloud components"""
374
+ # Tenkan-sen (Conversion Line)
375
+ tenkan_high = df["high"].rolling(window=9).max()
376
+ tenkan_low = df["low"].rolling(window=9).min()
377
+ df["tenkan_sen"] = (tenkan_high + tenkan_low) / 2
378
+
379
+ # Kijun-sen (Base Line)
380
+ kijun_high = df["high"].rolling(window=26).max()
381
+ kijun_low = df["low"].rolling(window=26).min()
382
+ df["kijun_sen"] = (kijun_high + kijun_low) / 2
383
+
384
+ # Senkou Span A (Leading Span A)
385
+ df["senkou_span_a"] = ((df["tenkan_sen"] + df["kijun_sen"]) / 2).shift(26)
386
+
387
+ # Senkou Span B (Leading Span B)
388
+ senkou_high = df["high"].rolling(window=52).max()
389
+ senkou_low = df["low"].rolling(window=52).min()
390
+ df["senkou_span_b"] = ((senkou_high + senkou_low) / 2).shift(26)
391
+
392
+ # Chikou Span (Lagging Span)
393
+ df["chikou_span"] = df["close"].shift(-26)
394
+
395
+ # Cloud thickness
396
+ df["cloud_thickness"] = abs(df["senkou_span_a"] - df["senkou_span_b"])
397
+
398
+ # Price relative to cloud
399
+ cloud_top = np.maximum(df["senkou_span_a"], df["senkou_span_b"])
400
+ cloud_bottom = np.minimum(df["senkou_span_a"], df["senkou_span_b"])
401
+
402
+ df["above_cloud"] = (df["close"] > cloud_top).astype(int)
403
+ df["below_cloud"] = (df["close"] < cloud_bottom).astype(int)
404
+ df["in_cloud"] = ((df["close"] >= cloud_bottom) & (df["close"] <= cloud_top)).astype(int)
405
+
406
+ return df
407
+
408
+
409
+ class MarketRegimeFeatures:
410
+ """Market regime detection features"""
411
+
412
+ def __init__(self, config: Optional[StockFeatureConfig] = None):
413
+ self.config = config or StockFeatureConfig()
414
+
415
+ def extract_regime_features(self, df: pd.DataFrame) -> pd.DataFrame:
416
+ """Extract market regime features"""
417
+ df = df.copy()
418
+
419
+ # Volatility regime
420
+ df = self._volatility_regime(df)
421
+
422
+ # Trend regime
423
+ df = self._trend_regime(df)
424
+
425
+ # Volume regime
426
+ df = self._volume_regime(df)
427
+
428
+ # Market stress indicators
429
+ df = self._market_stress_indicators(df)
430
+
431
+ return df
432
+
433
+ def _volatility_regime(self, df: pd.DataFrame) -> pd.DataFrame:
434
+ """Identify volatility regimes"""
435
+ returns = df["close"].pct_change()
436
+ vol_20 = returns.rolling(window=20).std() * np.sqrt(252)
437
+
438
+ # Regime classification based on volatility percentiles
439
+ vol_percentiles = vol_20.rolling(window=self.config.regime_lookback).quantile([0.33, 0.67])
440
+
441
+ df["vol_regime"] = pd.cut(
442
+ vol_20,
443
+ bins=[-np.inf, vol_percentiles.iloc[:, 0], vol_percentiles.iloc[:, 1], np.inf],
444
+ labels=["low_vol", "medium_vol", "high_vol"],
445
+ )
446
+
447
+ # Volatility clustering
448
+ df["vol_cluster"] = (vol_20 > vol_20.rolling(window=60).quantile(0.8)).astype(int)
449
+
450
+ return df
451
+
452
+ def _trend_regime(self, df: pd.DataFrame) -> pd.DataFrame:
453
+ """Identify trend regimes"""
454
+ # Multiple timeframe trend analysis
455
+ for window in [20, 50, 200]:
456
+ sma = df["close"].rolling(window=window).mean()
457
+ df[f"trend_{window}"] = np.where(
458
+ df["close"] > sma, 1, np.where(df["close"] < sma, -1, 0)
459
+ )
460
+
461
+ # Composite trend score
462
+ df["trend_score"] = df["trend_20"] * 0.5 + df["trend_50"] * 0.3 + df["trend_200"] * 0.2
463
+
464
+ # Trend strength
465
+ df["trend_strength"] = abs(df["trend_score"])
466
+
467
+ # Trend regime classification
468
+ df["trend_regime"] = pd.cut(
469
+ df["trend_score"],
470
+ bins=[-np.inf, -0.5, 0.5, np.inf],
471
+ labels=["bearish", "sideways", "bullish"],
472
+ )
473
+
474
+ return df
475
+
476
+ def _volume_regime(self, df: pd.DataFrame) -> pd.DataFrame:
477
+ """Identify volume regimes"""
478
+ volume_ma = df["volume"].rolling(window=20).mean()
479
+ volume_ratio = df["volume"] / volume_ma
480
+
481
+ # Volume regime classification
482
+ vol_high = volume_ratio.rolling(window=60).quantile(0.8)
483
+ vol_low = volume_ratio.rolling(window=60).quantile(0.2)
484
+
485
+ df["volume_regime"] = np.where(
486
+ volume_ratio > vol_high,
487
+ "high_volume",
488
+ np.where(volume_ratio < vol_low, "low_volume", "normal_volume"),
489
+ )
490
+
491
+ # Volume trend
492
+ df["volume_trend"] = (
493
+ df["volume"].rolling(window=10).mean() / df["volume"].rolling(window=30).mean()
494
+ )
495
+
496
+ return df
497
+
498
+ def _market_stress_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
499
+ """Market stress and fear indicators"""
500
+ returns = df["close"].pct_change()
501
+
502
+ # Maximum drawdown
503
+ cumulative = (1 + returns).cumprod()
504
+ running_max = cumulative.expanding().max()
505
+ df["drawdown"] = (cumulative - running_max) / running_max
506
+
507
+ # Consecutive down days
508
+ down_days = (returns < 0).astype(int)
509
+ df["consecutive_down"] = down_days * (
510
+ down_days.groupby((down_days != down_days.shift()).cumsum()).cumcount() + 1
511
+ )
512
+
513
+ # Volatility spikes
514
+ vol_20 = returns.rolling(window=20).std()
515
+ vol_spike_threshold = vol_20.rolling(window=60).quantile(0.95)
516
+ df["vol_spike"] = (vol_20 > vol_spike_threshold).astype(int)
517
+
518
+ # Gap analysis
519
+ gaps = (df["open"] - df["close"].shift(1)) / df["close"].shift(1)
520
+ df["gap_stress"] = (abs(gaps) > 0.02).astype(int)
521
+
522
+ return df
523
+
524
+
525
+ class CrossAssetFeatures:
526
+ """Cross-asset and correlation features"""
527
+
528
+ def __init__(self):
529
+ pass
530
+
531
+ def extract_cross_asset_features(
532
+ self, primary_df: pd.DataFrame, market_data: Dict[str, pd.DataFrame]
533
+ ) -> pd.DataFrame:
534
+ """Extract features based on relationships with other assets"""
535
+ df = primary_df.copy()
536
+
537
+ # Correlation with market indices
538
+ for asset_name, asset_df in market_data.items():
539
+ if "close" in asset_df.columns:
540
+ # Price correlation
541
+ corr_20 = (
542
+ df["close"].pct_change().rolling(window=20).corr(asset_df["close"].pct_change())
543
+ )
544
+ df[f"corr_{asset_name}_20"] = corr_20
545
+
546
+ # Beta calculation
547
+ market_returns = asset_df["close"].pct_change()
548
+ stock_returns = df["close"].pct_change()
549
+
550
+ # Rolling beta
551
+ def calculate_beta(stock_ret, market_ret):
552
+ try:
553
+ covariance = np.cov(stock_ret, market_ret)[0][1]
554
+ market_variance = np.var(market_ret)
555
+ return covariance / market_variance if market_variance != 0 else 1.0
556
+ except:
557
+ return 1.0
558
+
559
+ rolling_beta = pd.Series(index=df.index, dtype=float)
560
+ for i in range(20, len(df)):
561
+ stock_window = stock_returns.iloc[i - 20 : i]
562
+ market_window = market_returns.iloc[i - 20 : i]
563
+ if len(stock_window) == 20 and len(market_window) == 20:
564
+ rolling_beta.iloc[i] = calculate_beta(stock_window, market_window)
565
+
566
+ df[f"beta_{asset_name}"] = rolling_beta
567
+
568
+ # Relative strength
569
+ df[f"relative_strength_{asset_name}"] = (df["close"] / df["close"].shift(20)) / (
570
+ asset_df["close"] / asset_df["close"].shift(20)
571
+ )
572
+
573
+ return df