mcli-framework 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/chat_cmd.py +42 -0
- mcli/app/commands_cmd.py +226 -0
- mcli/app/completion_cmd.py +216 -0
- mcli/app/completion_helpers.py +288 -0
- mcli/app/cron_test_cmd.py +697 -0
- mcli/app/logs_cmd.py +419 -0
- mcli/app/main.py +492 -0
- mcli/app/model/model.py +1060 -0
- mcli/app/model_cmd.py +227 -0
- mcli/app/redis_cmd.py +269 -0
- mcli/app/video/video.py +1114 -0
- mcli/app/visual_cmd.py +303 -0
- mcli/chat/chat.py +2409 -0
- mcli/chat/command_rag.py +514 -0
- mcli/chat/enhanced_chat.py +652 -0
- mcli/chat/system_controller.py +1010 -0
- mcli/chat/system_integration.py +1016 -0
- mcli/cli.py +25 -0
- mcli/config.toml +20 -0
- mcli/lib/api/api.py +586 -0
- mcli/lib/api/daemon_client.py +203 -0
- mcli/lib/api/daemon_client_local.py +44 -0
- mcli/lib/api/daemon_decorator.py +217 -0
- mcli/lib/api/mcli_decorators.py +1032 -0
- mcli/lib/auth/auth.py +85 -0
- mcli/lib/auth/aws_manager.py +85 -0
- mcli/lib/auth/azure_manager.py +91 -0
- mcli/lib/auth/credential_manager.py +192 -0
- mcli/lib/auth/gcp_manager.py +93 -0
- mcli/lib/auth/key_manager.py +117 -0
- mcli/lib/auth/mcli_manager.py +93 -0
- mcli/lib/auth/token_manager.py +75 -0
- mcli/lib/auth/token_util.py +1011 -0
- mcli/lib/config/config.py +47 -0
- mcli/lib/discovery/__init__.py +1 -0
- mcli/lib/discovery/command_discovery.py +274 -0
- mcli/lib/erd/erd.py +1345 -0
- mcli/lib/erd/generate_graph.py +453 -0
- mcli/lib/files/files.py +76 -0
- mcli/lib/fs/fs.py +109 -0
- mcli/lib/lib.py +29 -0
- mcli/lib/logger/logger.py +611 -0
- mcli/lib/performance/optimizer.py +409 -0
- mcli/lib/performance/rust_bridge.py +502 -0
- mcli/lib/performance/uvloop_config.py +154 -0
- mcli/lib/pickles/pickles.py +50 -0
- mcli/lib/search/cached_vectorizer.py +479 -0
- mcli/lib/services/data_pipeline.py +460 -0
- mcli/lib/services/lsh_client.py +441 -0
- mcli/lib/services/redis_service.py +387 -0
- mcli/lib/shell/shell.py +137 -0
- mcli/lib/toml/toml.py +33 -0
- mcli/lib/ui/styling.py +47 -0
- mcli/lib/ui/visual_effects.py +634 -0
- mcli/lib/watcher/watcher.py +185 -0
- mcli/ml/api/app.py +215 -0
- mcli/ml/api/middleware.py +224 -0
- mcli/ml/api/routers/admin_router.py +12 -0
- mcli/ml/api/routers/auth_router.py +244 -0
- mcli/ml/api/routers/backtest_router.py +12 -0
- mcli/ml/api/routers/data_router.py +12 -0
- mcli/ml/api/routers/model_router.py +302 -0
- mcli/ml/api/routers/monitoring_router.py +12 -0
- mcli/ml/api/routers/portfolio_router.py +12 -0
- mcli/ml/api/routers/prediction_router.py +267 -0
- mcli/ml/api/routers/trade_router.py +12 -0
- mcli/ml/api/routers/websocket_router.py +76 -0
- mcli/ml/api/schemas.py +64 -0
- mcli/ml/auth/auth_manager.py +425 -0
- mcli/ml/auth/models.py +154 -0
- mcli/ml/auth/permissions.py +302 -0
- mcli/ml/backtesting/backtest_engine.py +502 -0
- mcli/ml/backtesting/performance_metrics.py +393 -0
- mcli/ml/cache.py +400 -0
- mcli/ml/cli/main.py +398 -0
- mcli/ml/config/settings.py +394 -0
- mcli/ml/configs/dvc_config.py +230 -0
- mcli/ml/configs/mlflow_config.py +131 -0
- mcli/ml/configs/mlops_manager.py +293 -0
- mcli/ml/dashboard/app.py +532 -0
- mcli/ml/dashboard/app_integrated.py +738 -0
- mcli/ml/dashboard/app_supabase.py +560 -0
- mcli/ml/dashboard/app_training.py +615 -0
- mcli/ml/dashboard/cli.py +51 -0
- mcli/ml/data_ingestion/api_connectors.py +501 -0
- mcli/ml/data_ingestion/data_pipeline.py +567 -0
- mcli/ml/data_ingestion/stream_processor.py +512 -0
- mcli/ml/database/migrations/env.py +94 -0
- mcli/ml/database/models.py +667 -0
- mcli/ml/database/session.py +200 -0
- mcli/ml/experimentation/ab_testing.py +845 -0
- mcli/ml/features/ensemble_features.py +607 -0
- mcli/ml/features/political_features.py +676 -0
- mcli/ml/features/recommendation_engine.py +809 -0
- mcli/ml/features/stock_features.py +573 -0
- mcli/ml/features/test_feature_engineering.py +346 -0
- mcli/ml/logging.py +85 -0
- mcli/ml/mlops/data_versioning.py +518 -0
- mcli/ml/mlops/experiment_tracker.py +377 -0
- mcli/ml/mlops/model_serving.py +481 -0
- mcli/ml/mlops/pipeline_orchestrator.py +614 -0
- mcli/ml/models/base_models.py +324 -0
- mcli/ml/models/ensemble_models.py +675 -0
- mcli/ml/models/recommendation_models.py +474 -0
- mcli/ml/models/test_models.py +487 -0
- mcli/ml/monitoring/drift_detection.py +676 -0
- mcli/ml/monitoring/metrics.py +45 -0
- mcli/ml/optimization/portfolio_optimizer.py +834 -0
- mcli/ml/preprocessing/data_cleaners.py +451 -0
- mcli/ml/preprocessing/feature_extractors.py +491 -0
- mcli/ml/preprocessing/ml_pipeline.py +382 -0
- mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
- mcli/ml/preprocessing/test_preprocessing.py +294 -0
- mcli/ml/scripts/populate_sample_data.py +200 -0
- mcli/ml/tasks.py +400 -0
- mcli/ml/tests/test_integration.py +429 -0
- mcli/ml/tests/test_training_dashboard.py +387 -0
- mcli/public/oi/oi.py +15 -0
- mcli/public/public.py +4 -0
- mcli/self/self_cmd.py +1246 -0
- mcli/workflow/daemon/api_daemon.py +800 -0
- mcli/workflow/daemon/async_command_database.py +681 -0
- mcli/workflow/daemon/async_process_manager.py +591 -0
- mcli/workflow/daemon/client.py +530 -0
- mcli/workflow/daemon/commands.py +1196 -0
- mcli/workflow/daemon/daemon.py +905 -0
- mcli/workflow/daemon/daemon_api.py +59 -0
- mcli/workflow/daemon/enhanced_daemon.py +571 -0
- mcli/workflow/daemon/process_cli.py +244 -0
- mcli/workflow/daemon/process_manager.py +439 -0
- mcli/workflow/daemon/test_daemon.py +275 -0
- mcli/workflow/dashboard/dashboard_cmd.py +113 -0
- mcli/workflow/docker/docker.py +0 -0
- mcli/workflow/file/file.py +100 -0
- mcli/workflow/gcloud/config.toml +21 -0
- mcli/workflow/gcloud/gcloud.py +58 -0
- mcli/workflow/git_commit/ai_service.py +328 -0
- mcli/workflow/git_commit/commands.py +430 -0
- mcli/workflow/lsh_integration.py +355 -0
- mcli/workflow/model_service/client.py +594 -0
- mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
- mcli/workflow/model_service/lightweight_embedder.py +397 -0
- mcli/workflow/model_service/lightweight_model_server.py +714 -0
- mcli/workflow/model_service/lightweight_test.py +241 -0
- mcli/workflow/model_service/model_service.py +1955 -0
- mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
- mcli/workflow/model_service/pdf_processor.py +386 -0
- mcli/workflow/model_service/test_efficient_runner.py +234 -0
- mcli/workflow/model_service/test_example.py +315 -0
- mcli/workflow/model_service/test_integration.py +131 -0
- mcli/workflow/model_service/test_new_features.py +149 -0
- mcli/workflow/openai/openai.py +99 -0
- mcli/workflow/politician_trading/commands.py +1790 -0
- mcli/workflow/politician_trading/config.py +134 -0
- mcli/workflow/politician_trading/connectivity.py +490 -0
- mcli/workflow/politician_trading/data_sources.py +395 -0
- mcli/workflow/politician_trading/database.py +410 -0
- mcli/workflow/politician_trading/demo.py +248 -0
- mcli/workflow/politician_trading/models.py +165 -0
- mcli/workflow/politician_trading/monitoring.py +413 -0
- mcli/workflow/politician_trading/scrapers.py +966 -0
- mcli/workflow/politician_trading/scrapers_california.py +412 -0
- mcli/workflow/politician_trading/scrapers_eu.py +377 -0
- mcli/workflow/politician_trading/scrapers_uk.py +350 -0
- mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
- mcli/workflow/politician_trading/supabase_functions.py +354 -0
- mcli/workflow/politician_trading/workflow.py +852 -0
- mcli/workflow/registry/registry.py +180 -0
- mcli/workflow/repo/repo.py +223 -0
- mcli/workflow/scheduler/commands.py +493 -0
- mcli/workflow/scheduler/cron_parser.py +238 -0
- mcli/workflow/scheduler/job.py +182 -0
- mcli/workflow/scheduler/monitor.py +139 -0
- mcli/workflow/scheduler/persistence.py +324 -0
- mcli/workflow/scheduler/scheduler.py +679 -0
- mcli/workflow/sync/sync_cmd.py +437 -0
- mcli/workflow/sync/test_cmd.py +314 -0
- mcli/workflow/videos/videos.py +242 -0
- mcli/workflow/wakatime/wakatime.py +11 -0
- mcli/workflow/workflow.py +37 -0
- mcli_framework-7.0.0.dist-info/METADATA +479 -0
- mcli_framework-7.0.0.dist-info/RECORD +186 -0
- mcli_framework-7.0.0.dist-info/WHEEL +5 -0
- mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
- mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
- mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,573 @@
|
|
|
1
|
+
"""Stock-specific feature engineering for recommendation models"""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
import logging
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class StockFeatureConfig:
|
|
17
|
+
"""Configuration for stock feature extraction"""
|
|
18
|
+
|
|
19
|
+
# Technical indicator periods
|
|
20
|
+
sma_periods: List[int] = None
|
|
21
|
+
ema_periods: List[int] = None
|
|
22
|
+
rsi_period: int = 14
|
|
23
|
+
bollinger_period: int = 20
|
|
24
|
+
bollinger_std: float = 2.0
|
|
25
|
+
|
|
26
|
+
# Volatility features
|
|
27
|
+
volatility_windows: List[int] = None
|
|
28
|
+
return_windows: List[int] = None
|
|
29
|
+
|
|
30
|
+
# Volume features
|
|
31
|
+
volume_ma_periods: List[int] = None
|
|
32
|
+
enable_volume_profile: bool = True
|
|
33
|
+
|
|
34
|
+
# Market regime detection
|
|
35
|
+
regime_lookback: int = 252 # 1 year
|
|
36
|
+
volatility_threshold: float = 0.02
|
|
37
|
+
|
|
38
|
+
def __post_init__(self):
|
|
39
|
+
if self.sma_periods is None:
|
|
40
|
+
self.sma_periods = [5, 10, 20, 50, 200]
|
|
41
|
+
if self.ema_periods is None:
|
|
42
|
+
self.ema_periods = [12, 26, 50]
|
|
43
|
+
if self.volatility_windows is None:
|
|
44
|
+
self.volatility_windows = [5, 10, 20, 60]
|
|
45
|
+
if self.return_windows is None:
|
|
46
|
+
self.return_windows = [1, 5, 10, 20, 60]
|
|
47
|
+
if self.volume_ma_periods is None:
|
|
48
|
+
self.volume_ma_periods = [10, 20, 50]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class StockRecommendationFeatures:
|
|
52
|
+
"""Core stock recommendation feature extractor"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, config: Optional[StockFeatureConfig] = None):
|
|
55
|
+
self.config = config or StockFeatureConfig()
|
|
56
|
+
|
|
57
|
+
def extract_features(self, stock_data: pd.DataFrame) -> pd.DataFrame:
|
|
58
|
+
"""Extract stock recommendation features"""
|
|
59
|
+
logger.info("Extracting stock recommendation features")
|
|
60
|
+
|
|
61
|
+
df = stock_data.copy()
|
|
62
|
+
|
|
63
|
+
# Ensure required columns exist
|
|
64
|
+
required_cols = ["open", "high", "low", "close", "volume"]
|
|
65
|
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
|
66
|
+
|
|
67
|
+
if missing_cols:
|
|
68
|
+
logger.warning(f"Missing required columns: {missing_cols}")
|
|
69
|
+
# Create synthetic data for missing columns if needed
|
|
70
|
+
for col in missing_cols:
|
|
71
|
+
if col == "volume":
|
|
72
|
+
df[col] = 1000000 # Default volume
|
|
73
|
+
else:
|
|
74
|
+
df[col] = df.get("close", 100.0) # Use close or default price
|
|
75
|
+
|
|
76
|
+
# Sort by date to ensure chronological order
|
|
77
|
+
if "date" in df.columns:
|
|
78
|
+
df = df.sort_values("date").reset_index(drop=True)
|
|
79
|
+
|
|
80
|
+
# Extract all feature categories
|
|
81
|
+
df = self._extract_price_features(df)
|
|
82
|
+
df = self._extract_volume_features(df)
|
|
83
|
+
df = self._extract_volatility_features(df)
|
|
84
|
+
df = self._extract_momentum_features(df)
|
|
85
|
+
df = self._extract_trend_features(df)
|
|
86
|
+
|
|
87
|
+
logger.info(f"Extracted {len(df.columns)} total features")
|
|
88
|
+
return df
|
|
89
|
+
|
|
90
|
+
def _extract_price_features(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
91
|
+
"""Extract price-based features"""
|
|
92
|
+
# Basic price relationships
|
|
93
|
+
df["hl_ratio"] = (df["high"] - df["low"]) / df["close"]
|
|
94
|
+
df["oc_ratio"] = (df["open"] - df["close"]) / df["close"]
|
|
95
|
+
df["price_range"] = (df["high"] - df["low"]) / df["low"]
|
|
96
|
+
|
|
97
|
+
# Price gaps
|
|
98
|
+
df["gap_up"] = (df["open"] > df["close"].shift(1)).astype(int)
|
|
99
|
+
df["gap_down"] = (df["open"] < df["close"].shift(1)).astype(int)
|
|
100
|
+
df["gap_size"] = (df["open"] - df["close"].shift(1)) / df["close"].shift(1)
|
|
101
|
+
|
|
102
|
+
# Simple moving averages
|
|
103
|
+
for period in self.config.sma_periods:
|
|
104
|
+
df[f"sma_{period}"] = df["close"].rolling(window=period).mean()
|
|
105
|
+
df[f"price_to_sma_{period}"] = df["close"] / df[f"sma_{period}"]
|
|
106
|
+
df[f"sma_{period}_slope"] = (df[f"sma_{period}"] - df[f"sma_{period}"].shift(5)) / df[
|
|
107
|
+
f"sma_{period}"
|
|
108
|
+
].shift(5)
|
|
109
|
+
|
|
110
|
+
# Exponential moving averages
|
|
111
|
+
for period in self.config.ema_periods:
|
|
112
|
+
df[f"ema_{period}"] = df["close"].ewm(span=period).mean()
|
|
113
|
+
df[f"price_to_ema_{period}"] = df["close"] / df[f"ema_{period}"]
|
|
114
|
+
|
|
115
|
+
# Moving average crossovers
|
|
116
|
+
if len(self.config.sma_periods) >= 2:
|
|
117
|
+
short_ma = self.config.sma_periods[0]
|
|
118
|
+
long_ma = self.config.sma_periods[-1]
|
|
119
|
+
df["ma_crossover"] = (df[f"sma_{short_ma}"] > df[f"sma_{long_ma}"]).astype(int)
|
|
120
|
+
df["ma_distance"] = (df[f"sma_{short_ma}"] - df[f"sma_{long_ma}"]) / df[
|
|
121
|
+
f"sma_{long_ma}"
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
return df
|
|
125
|
+
|
|
126
|
+
def _extract_volume_features(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
127
|
+
"""Extract volume-based features"""
|
|
128
|
+
# Volume moving averages
|
|
129
|
+
for period in self.config.volume_ma_periods:
|
|
130
|
+
df[f"volume_ma_{period}"] = df["volume"].rolling(window=period).mean()
|
|
131
|
+
df[f"volume_ratio_{period}"] = df["volume"] / df[f"volume_ma_{period}"]
|
|
132
|
+
|
|
133
|
+
# Volume price trend
|
|
134
|
+
df["volume_price_trend"] = (
|
|
135
|
+
((df["close"] - df["close"].shift(1)) * df["volume"]).rolling(window=10).sum()
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# On-balance volume
|
|
139
|
+
df["price_change"] = df["close"] - df["close"].shift(1)
|
|
140
|
+
df["obv_flow"] = np.where(
|
|
141
|
+
df["price_change"] > 0,
|
|
142
|
+
df["volume"],
|
|
143
|
+
np.where(df["price_change"] < 0, -df["volume"], 0),
|
|
144
|
+
)
|
|
145
|
+
df["obv"] = df["obv_flow"].cumsum()
|
|
146
|
+
|
|
147
|
+
# Volume accumulation
|
|
148
|
+
df["accumulation"] = (
|
|
149
|
+
np.where(df["close"] > (df["high"] + df["low"]) / 2, df["volume"], -df["volume"])
|
|
150
|
+
.rolling(window=20)
|
|
151
|
+
.sum()
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Volume spikes
|
|
155
|
+
df["volume_spike"] = (df["volume"] > df["volume"].rolling(window=20).mean() * 2).astype(int)
|
|
156
|
+
|
|
157
|
+
return df
|
|
158
|
+
|
|
159
|
+
def _extract_volatility_features(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
160
|
+
"""Extract volatility-based features"""
|
|
161
|
+
# Calculate returns
|
|
162
|
+
df["returns"] = df["close"].pct_change()
|
|
163
|
+
df["log_returns"] = np.log(df["close"] / df["close"].shift(1))
|
|
164
|
+
|
|
165
|
+
# Rolling volatility
|
|
166
|
+
for window in self.config.volatility_windows:
|
|
167
|
+
df[f"volatility_{window}"] = df["returns"].rolling(window=window).std() * np.sqrt(252)
|
|
168
|
+
df[f"volatility_{window}_rank"] = (
|
|
169
|
+
df[f"volatility_{window}"].rolling(window=60).rank(pct=True)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# True Range and Average True Range
|
|
173
|
+
df["true_range"] = np.maximum(
|
|
174
|
+
np.maximum(
|
|
175
|
+
df["high"] - df["low"],
|
|
176
|
+
abs(df["high"] - df["close"].shift(1)),
|
|
177
|
+
),
|
|
178
|
+
abs(df["low"] - df["close"].shift(1)),
|
|
179
|
+
)
|
|
180
|
+
df["atr_14"] = df["true_range"].rolling(window=14).mean()
|
|
181
|
+
df["atr_ratio"] = df["true_range"] / df["atr_14"]
|
|
182
|
+
|
|
183
|
+
# Bollinger Bands
|
|
184
|
+
sma_bb = df["close"].rolling(window=self.config.bollinger_period).mean()
|
|
185
|
+
bb_std = df["close"].rolling(window=self.config.bollinger_period).std()
|
|
186
|
+
df["bb_upper"] = sma_bb + (bb_std * self.config.bollinger_std)
|
|
187
|
+
df["bb_lower"] = sma_bb - (bb_std * self.config.bollinger_std)
|
|
188
|
+
df["bb_position"] = (df["close"] - df["bb_lower"]) / (df["bb_upper"] - df["bb_lower"])
|
|
189
|
+
df["bb_squeeze"] = (df["bb_upper"] - df["bb_lower"]) / sma_bb
|
|
190
|
+
|
|
191
|
+
# Volatility regime
|
|
192
|
+
rolling_vol = df["returns"].rolling(window=20).std()
|
|
193
|
+
vol_threshold = rolling_vol.rolling(window=60).quantile(0.7)
|
|
194
|
+
df["high_vol_regime"] = (rolling_vol > vol_threshold).astype(int)
|
|
195
|
+
|
|
196
|
+
return df
|
|
197
|
+
|
|
198
|
+
def _extract_momentum_features(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
199
|
+
"""Extract momentum-based features"""
|
|
200
|
+
# RSI (Relative Strength Index)
|
|
201
|
+
delta = df["close"].diff()
|
|
202
|
+
gain = delta.where(delta > 0, 0).rolling(window=self.config.rsi_period).mean()
|
|
203
|
+
loss = (-delta.where(delta < 0, 0)).rolling(window=self.config.rsi_period).mean()
|
|
204
|
+
rs = gain / loss
|
|
205
|
+
df["rsi"] = 100 - (100 / (1 + rs))
|
|
206
|
+
df["rsi_overbought"] = (df["rsi"] > 70).astype(int)
|
|
207
|
+
df["rsi_oversold"] = (df["rsi"] < 30).astype(int)
|
|
208
|
+
|
|
209
|
+
# MACD
|
|
210
|
+
ema_12 = df["close"].ewm(span=12).mean()
|
|
211
|
+
ema_26 = df["close"].ewm(span=26).mean()
|
|
212
|
+
df["macd"] = ema_12 - ema_26
|
|
213
|
+
df["macd_signal"] = df["macd"].ewm(span=9).mean()
|
|
214
|
+
df["macd_histogram"] = df["macd"] - df["macd_signal"]
|
|
215
|
+
df["macd_bullish"] = (df["macd"] > df["macd_signal"]).astype(int)
|
|
216
|
+
|
|
217
|
+
# Stochastic oscillator
|
|
218
|
+
lowest_low = df["low"].rolling(window=14).min()
|
|
219
|
+
highest_high = df["high"].rolling(window=14).max()
|
|
220
|
+
df["stoch_k"] = 100 * (df["close"] - lowest_low) / (highest_high - lowest_low)
|
|
221
|
+
df["stoch_d"] = df["stoch_k"].rolling(window=3).mean()
|
|
222
|
+
|
|
223
|
+
# Rate of Change
|
|
224
|
+
for period in [5, 10, 20]:
|
|
225
|
+
df[f"roc_{period}"] = (
|
|
226
|
+
(df["close"] - df["close"].shift(period)) / df["close"].shift(period)
|
|
227
|
+
) * 100
|
|
228
|
+
|
|
229
|
+
# Momentum
|
|
230
|
+
for period in [5, 10, 20]:
|
|
231
|
+
df[f"momentum_{period}"] = df["close"] / df["close"].shift(period)
|
|
232
|
+
|
|
233
|
+
return df
|
|
234
|
+
|
|
235
|
+
def _extract_trend_features(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
236
|
+
"""Extract trend-based features"""
|
|
237
|
+
# Trend strength
|
|
238
|
+
for window in [10, 20, 50]:
|
|
239
|
+
# Linear regression slope
|
|
240
|
+
df[f"trend_slope_{window}"] = (
|
|
241
|
+
df["close"]
|
|
242
|
+
.rolling(window=window)
|
|
243
|
+
.apply(lambda x: np.polyfit(range(len(x)), x, 1)[0], raw=False)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# R-squared of trend
|
|
247
|
+
def calculate_r_squared(prices):
|
|
248
|
+
if len(prices) < 2:
|
|
249
|
+
return 0
|
|
250
|
+
x = np.arange(len(prices))
|
|
251
|
+
try:
|
|
252
|
+
slope, intercept = np.polyfit(x, prices, 1)
|
|
253
|
+
predicted = slope * x + intercept
|
|
254
|
+
ss_res = np.sum((prices - predicted) ** 2)
|
|
255
|
+
ss_tot = np.sum((prices - np.mean(prices)) ** 2)
|
|
256
|
+
return 1 - (ss_res / ss_tot) if ss_tot != 0 else 0
|
|
257
|
+
except:
|
|
258
|
+
return 0
|
|
259
|
+
|
|
260
|
+
df[f"trend_strength_{window}"] = (
|
|
261
|
+
df["close"].rolling(window=window).apply(calculate_r_squared, raw=False)
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Support and resistance levels
|
|
265
|
+
df["support_level"] = df["low"].rolling(window=20).min()
|
|
266
|
+
df["resistance_level"] = df["high"].rolling(window=20).max()
|
|
267
|
+
df["support_distance"] = (df["close"] - df["support_level"]) / df["close"]
|
|
268
|
+
df["resistance_distance"] = (df["resistance_level"] - df["close"]) / df["close"]
|
|
269
|
+
|
|
270
|
+
# Price position within recent range
|
|
271
|
+
df["range_position"] = (df["close"] - df["support_level"]) / (
|
|
272
|
+
df["resistance_level"] - df["support_level"]
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# Higher highs and lower lows
|
|
276
|
+
df["higher_high"] = (df["high"] > df["high"].shift(1)).astype(int)
|
|
277
|
+
df["lower_low"] = (df["low"] < df["low"].shift(1)).astype(int)
|
|
278
|
+
df["higher_high_count"] = df["higher_high"].rolling(window=10).sum()
|
|
279
|
+
df["lower_low_count"] = df["lower_low"].rolling(window=10).sum()
|
|
280
|
+
|
|
281
|
+
return df
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class TechnicalIndicatorFeatures:
|
|
285
|
+
"""Advanced technical indicator features"""
|
|
286
|
+
|
|
287
|
+
def __init__(self, config: Optional[StockFeatureConfig] = None):
|
|
288
|
+
self.config = config or StockFeatureConfig()
|
|
289
|
+
|
|
290
|
+
def extract_advanced_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
291
|
+
"""Extract advanced technical indicators"""
|
|
292
|
+
df = df.copy()
|
|
293
|
+
|
|
294
|
+
# Williams %R
|
|
295
|
+
df["williams_r"] = self._williams_r(df)
|
|
296
|
+
|
|
297
|
+
# Commodity Channel Index (CCI)
|
|
298
|
+
df["cci"] = self._commodity_channel_index(df)
|
|
299
|
+
|
|
300
|
+
# Money Flow Index (MFI)
|
|
301
|
+
df["mfi"] = self._money_flow_index(df)
|
|
302
|
+
|
|
303
|
+
# Aroon indicator
|
|
304
|
+
df["aroon_up"], df["aroon_down"] = self._aroon_indicator(df)
|
|
305
|
+
df["aroon_oscillator"] = df["aroon_up"] - df["aroon_down"]
|
|
306
|
+
|
|
307
|
+
# Parabolic SAR
|
|
308
|
+
df["psar"] = self._parabolic_sar(df)
|
|
309
|
+
df["psar_bullish"] = (df["close"] > df["psar"]).astype(int)
|
|
310
|
+
|
|
311
|
+
# Ichimoku Cloud components
|
|
312
|
+
df = self._ichimoku_cloud(df)
|
|
313
|
+
|
|
314
|
+
return df
|
|
315
|
+
|
|
316
|
+
def _williams_r(self, df: pd.DataFrame, period: int = 14) -> pd.Series:
|
|
317
|
+
"""Williams %R oscillator"""
|
|
318
|
+
highest_high = df["high"].rolling(window=period).max()
|
|
319
|
+
lowest_low = df["low"].rolling(window=period).min()
|
|
320
|
+
return -100 * (highest_high - df["close"]) / (highest_high - lowest_low)
|
|
321
|
+
|
|
322
|
+
def _commodity_channel_index(self, df: pd.DataFrame, period: int = 20) -> pd.Series:
|
|
323
|
+
"""Commodity Channel Index"""
|
|
324
|
+
typical_price = (df["high"] + df["low"] + df["close"]) / 3
|
|
325
|
+
sma_tp = typical_price.rolling(window=period).mean()
|
|
326
|
+
mad = typical_price.rolling(window=period).apply(
|
|
327
|
+
lambda x: np.mean(np.abs(x - x.mean())), raw=False
|
|
328
|
+
)
|
|
329
|
+
return (typical_price - sma_tp) / (0.015 * mad)
|
|
330
|
+
|
|
331
|
+
def _money_flow_index(self, df: pd.DataFrame, period: int = 14) -> pd.Series:
|
|
332
|
+
"""Money Flow Index"""
|
|
333
|
+
typical_price = (df["high"] + df["low"] + df["close"]) / 3
|
|
334
|
+
money_flow = typical_price * df["volume"]
|
|
335
|
+
|
|
336
|
+
positive_flow = money_flow.where(typical_price > typical_price.shift(1), 0)
|
|
337
|
+
negative_flow = money_flow.where(typical_price < typical_price.shift(1), 0)
|
|
338
|
+
|
|
339
|
+
positive_mf = positive_flow.rolling(window=period).sum()
|
|
340
|
+
negative_mf = negative_flow.rolling(window=period).sum()
|
|
341
|
+
|
|
342
|
+
mfi = 100 - (100 / (1 + positive_mf / negative_mf))
|
|
343
|
+
return mfi
|
|
344
|
+
|
|
345
|
+
def _aroon_indicator(self, df: pd.DataFrame, period: int = 25) -> Tuple[pd.Series, pd.Series]:
|
|
346
|
+
"""Aroon Up and Aroon Down indicators"""
|
|
347
|
+
aroon_up = (
|
|
348
|
+
100 * (period - df["high"].rolling(window=period).apply(np.argmax, raw=False)) / period
|
|
349
|
+
)
|
|
350
|
+
aroon_down = (
|
|
351
|
+
100 * (period - df["low"].rolling(window=period).apply(np.argmin, raw=False)) / period
|
|
352
|
+
)
|
|
353
|
+
return aroon_up, aroon_down
|
|
354
|
+
|
|
355
|
+
def _parabolic_sar(self, df: pd.DataFrame) -> pd.Series:
|
|
356
|
+
"""Parabolic SAR indicator (simplified version)"""
|
|
357
|
+
# Simplified PSAR implementation
|
|
358
|
+
high = df["high"].values
|
|
359
|
+
low = df["low"].values
|
|
360
|
+
close = df["close"].values
|
|
361
|
+
|
|
362
|
+
psar = np.zeros(len(df))
|
|
363
|
+
if len(df) > 0:
|
|
364
|
+
psar[0] = low[0]
|
|
365
|
+
|
|
366
|
+
for i in range(1, len(df)):
|
|
367
|
+
# Very simplified version - just use previous close as approximation
|
|
368
|
+
psar[i] = close[i - 1]
|
|
369
|
+
|
|
370
|
+
return pd.Series(psar, index=df.index)
|
|
371
|
+
|
|
372
|
+
def _ichimoku_cloud(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
373
|
+
"""Ichimoku Cloud components"""
|
|
374
|
+
# Tenkan-sen (Conversion Line)
|
|
375
|
+
tenkan_high = df["high"].rolling(window=9).max()
|
|
376
|
+
tenkan_low = df["low"].rolling(window=9).min()
|
|
377
|
+
df["tenkan_sen"] = (tenkan_high + tenkan_low) / 2
|
|
378
|
+
|
|
379
|
+
# Kijun-sen (Base Line)
|
|
380
|
+
kijun_high = df["high"].rolling(window=26).max()
|
|
381
|
+
kijun_low = df["low"].rolling(window=26).min()
|
|
382
|
+
df["kijun_sen"] = (kijun_high + kijun_low) / 2
|
|
383
|
+
|
|
384
|
+
# Senkou Span A (Leading Span A)
|
|
385
|
+
df["senkou_span_a"] = ((df["tenkan_sen"] + df["kijun_sen"]) / 2).shift(26)
|
|
386
|
+
|
|
387
|
+
# Senkou Span B (Leading Span B)
|
|
388
|
+
senkou_high = df["high"].rolling(window=52).max()
|
|
389
|
+
senkou_low = df["low"].rolling(window=52).min()
|
|
390
|
+
df["senkou_span_b"] = ((senkou_high + senkou_low) / 2).shift(26)
|
|
391
|
+
|
|
392
|
+
# Chikou Span (Lagging Span)
|
|
393
|
+
df["chikou_span"] = df["close"].shift(-26)
|
|
394
|
+
|
|
395
|
+
# Cloud thickness
|
|
396
|
+
df["cloud_thickness"] = abs(df["senkou_span_a"] - df["senkou_span_b"])
|
|
397
|
+
|
|
398
|
+
# Price relative to cloud
|
|
399
|
+
cloud_top = np.maximum(df["senkou_span_a"], df["senkou_span_b"])
|
|
400
|
+
cloud_bottom = np.minimum(df["senkou_span_a"], df["senkou_span_b"])
|
|
401
|
+
|
|
402
|
+
df["above_cloud"] = (df["close"] > cloud_top).astype(int)
|
|
403
|
+
df["below_cloud"] = (df["close"] < cloud_bottom).astype(int)
|
|
404
|
+
df["in_cloud"] = ((df["close"] >= cloud_bottom) & (df["close"] <= cloud_top)).astype(int)
|
|
405
|
+
|
|
406
|
+
return df
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
class MarketRegimeFeatures:
|
|
410
|
+
"""Market regime detection features"""
|
|
411
|
+
|
|
412
|
+
def __init__(self, config: Optional[StockFeatureConfig] = None):
|
|
413
|
+
self.config = config or StockFeatureConfig()
|
|
414
|
+
|
|
415
|
+
def extract_regime_features(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
416
|
+
"""Extract market regime features"""
|
|
417
|
+
df = df.copy()
|
|
418
|
+
|
|
419
|
+
# Volatility regime
|
|
420
|
+
df = self._volatility_regime(df)
|
|
421
|
+
|
|
422
|
+
# Trend regime
|
|
423
|
+
df = self._trend_regime(df)
|
|
424
|
+
|
|
425
|
+
# Volume regime
|
|
426
|
+
df = self._volume_regime(df)
|
|
427
|
+
|
|
428
|
+
# Market stress indicators
|
|
429
|
+
df = self._market_stress_indicators(df)
|
|
430
|
+
|
|
431
|
+
return df
|
|
432
|
+
|
|
433
|
+
def _volatility_regime(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
434
|
+
"""Identify volatility regimes"""
|
|
435
|
+
returns = df["close"].pct_change()
|
|
436
|
+
vol_20 = returns.rolling(window=20).std() * np.sqrt(252)
|
|
437
|
+
|
|
438
|
+
# Regime classification based on volatility percentiles
|
|
439
|
+
vol_percentiles = vol_20.rolling(window=self.config.regime_lookback).quantile([0.33, 0.67])
|
|
440
|
+
|
|
441
|
+
df["vol_regime"] = pd.cut(
|
|
442
|
+
vol_20,
|
|
443
|
+
bins=[-np.inf, vol_percentiles.iloc[:, 0], vol_percentiles.iloc[:, 1], np.inf],
|
|
444
|
+
labels=["low_vol", "medium_vol", "high_vol"],
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
# Volatility clustering
|
|
448
|
+
df["vol_cluster"] = (vol_20 > vol_20.rolling(window=60).quantile(0.8)).astype(int)
|
|
449
|
+
|
|
450
|
+
return df
|
|
451
|
+
|
|
452
|
+
def _trend_regime(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
453
|
+
"""Identify trend regimes"""
|
|
454
|
+
# Multiple timeframe trend analysis
|
|
455
|
+
for window in [20, 50, 200]:
|
|
456
|
+
sma = df["close"].rolling(window=window).mean()
|
|
457
|
+
df[f"trend_{window}"] = np.where(
|
|
458
|
+
df["close"] > sma, 1, np.where(df["close"] < sma, -1, 0)
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Composite trend score
|
|
462
|
+
df["trend_score"] = df["trend_20"] * 0.5 + df["trend_50"] * 0.3 + df["trend_200"] * 0.2
|
|
463
|
+
|
|
464
|
+
# Trend strength
|
|
465
|
+
df["trend_strength"] = abs(df["trend_score"])
|
|
466
|
+
|
|
467
|
+
# Trend regime classification
|
|
468
|
+
df["trend_regime"] = pd.cut(
|
|
469
|
+
df["trend_score"],
|
|
470
|
+
bins=[-np.inf, -0.5, 0.5, np.inf],
|
|
471
|
+
labels=["bearish", "sideways", "bullish"],
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
return df
|
|
475
|
+
|
|
476
|
+
def _volume_regime(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
477
|
+
"""Identify volume regimes"""
|
|
478
|
+
volume_ma = df["volume"].rolling(window=20).mean()
|
|
479
|
+
volume_ratio = df["volume"] / volume_ma
|
|
480
|
+
|
|
481
|
+
# Volume regime classification
|
|
482
|
+
vol_high = volume_ratio.rolling(window=60).quantile(0.8)
|
|
483
|
+
vol_low = volume_ratio.rolling(window=60).quantile(0.2)
|
|
484
|
+
|
|
485
|
+
df["volume_regime"] = np.where(
|
|
486
|
+
volume_ratio > vol_high,
|
|
487
|
+
"high_volume",
|
|
488
|
+
np.where(volume_ratio < vol_low, "low_volume", "normal_volume"),
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
# Volume trend
|
|
492
|
+
df["volume_trend"] = (
|
|
493
|
+
df["volume"].rolling(window=10).mean() / df["volume"].rolling(window=30).mean()
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
return df
|
|
497
|
+
|
|
498
|
+
def _market_stress_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
499
|
+
"""Market stress and fear indicators"""
|
|
500
|
+
returns = df["close"].pct_change()
|
|
501
|
+
|
|
502
|
+
# Maximum drawdown
|
|
503
|
+
cumulative = (1 + returns).cumprod()
|
|
504
|
+
running_max = cumulative.expanding().max()
|
|
505
|
+
df["drawdown"] = (cumulative - running_max) / running_max
|
|
506
|
+
|
|
507
|
+
# Consecutive down days
|
|
508
|
+
down_days = (returns < 0).astype(int)
|
|
509
|
+
df["consecutive_down"] = down_days * (
|
|
510
|
+
down_days.groupby((down_days != down_days.shift()).cumsum()).cumcount() + 1
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Volatility spikes
|
|
514
|
+
vol_20 = returns.rolling(window=20).std()
|
|
515
|
+
vol_spike_threshold = vol_20.rolling(window=60).quantile(0.95)
|
|
516
|
+
df["vol_spike"] = (vol_20 > vol_spike_threshold).astype(int)
|
|
517
|
+
|
|
518
|
+
# Gap analysis
|
|
519
|
+
gaps = (df["open"] - df["close"].shift(1)) / df["close"].shift(1)
|
|
520
|
+
df["gap_stress"] = (abs(gaps) > 0.02).astype(int)
|
|
521
|
+
|
|
522
|
+
return df
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
class CrossAssetFeatures:
|
|
526
|
+
"""Cross-asset and correlation features"""
|
|
527
|
+
|
|
528
|
+
def __init__(self):
|
|
529
|
+
pass
|
|
530
|
+
|
|
531
|
+
def extract_cross_asset_features(
|
|
532
|
+
self, primary_df: pd.DataFrame, market_data: Dict[str, pd.DataFrame]
|
|
533
|
+
) -> pd.DataFrame:
|
|
534
|
+
"""Extract features based on relationships with other assets"""
|
|
535
|
+
df = primary_df.copy()
|
|
536
|
+
|
|
537
|
+
# Correlation with market indices
|
|
538
|
+
for asset_name, asset_df in market_data.items():
|
|
539
|
+
if "close" in asset_df.columns:
|
|
540
|
+
# Price correlation
|
|
541
|
+
corr_20 = (
|
|
542
|
+
df["close"].pct_change().rolling(window=20).corr(asset_df["close"].pct_change())
|
|
543
|
+
)
|
|
544
|
+
df[f"corr_{asset_name}_20"] = corr_20
|
|
545
|
+
|
|
546
|
+
# Beta calculation
|
|
547
|
+
market_returns = asset_df["close"].pct_change()
|
|
548
|
+
stock_returns = df["close"].pct_change()
|
|
549
|
+
|
|
550
|
+
# Rolling beta
|
|
551
|
+
def calculate_beta(stock_ret, market_ret):
|
|
552
|
+
try:
|
|
553
|
+
covariance = np.cov(stock_ret, market_ret)[0][1]
|
|
554
|
+
market_variance = np.var(market_ret)
|
|
555
|
+
return covariance / market_variance if market_variance != 0 else 1.0
|
|
556
|
+
except:
|
|
557
|
+
return 1.0
|
|
558
|
+
|
|
559
|
+
rolling_beta = pd.Series(index=df.index, dtype=float)
|
|
560
|
+
for i in range(20, len(df)):
|
|
561
|
+
stock_window = stock_returns.iloc[i - 20 : i]
|
|
562
|
+
market_window = market_returns.iloc[i - 20 : i]
|
|
563
|
+
if len(stock_window) == 20 and len(market_window) == 20:
|
|
564
|
+
rolling_beta.iloc[i] = calculate_beta(stock_window, market_window)
|
|
565
|
+
|
|
566
|
+
df[f"beta_{asset_name}"] = rolling_beta
|
|
567
|
+
|
|
568
|
+
# Relative strength
|
|
569
|
+
df[f"relative_strength_{asset_name}"] = (df["close"] / df["close"].shift(20)) / (
|
|
570
|
+
asset_df["close"] / asset_df["close"].shift(20)
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
return df
|