mcli-framework 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/chat_cmd.py +42 -0
- mcli/app/commands_cmd.py +226 -0
- mcli/app/completion_cmd.py +216 -0
- mcli/app/completion_helpers.py +288 -0
- mcli/app/cron_test_cmd.py +697 -0
- mcli/app/logs_cmd.py +419 -0
- mcli/app/main.py +492 -0
- mcli/app/model/model.py +1060 -0
- mcli/app/model_cmd.py +227 -0
- mcli/app/redis_cmd.py +269 -0
- mcli/app/video/video.py +1114 -0
- mcli/app/visual_cmd.py +303 -0
- mcli/chat/chat.py +2409 -0
- mcli/chat/command_rag.py +514 -0
- mcli/chat/enhanced_chat.py +652 -0
- mcli/chat/system_controller.py +1010 -0
- mcli/chat/system_integration.py +1016 -0
- mcli/cli.py +25 -0
- mcli/config.toml +20 -0
- mcli/lib/api/api.py +586 -0
- mcli/lib/api/daemon_client.py +203 -0
- mcli/lib/api/daemon_client_local.py +44 -0
- mcli/lib/api/daemon_decorator.py +217 -0
- mcli/lib/api/mcli_decorators.py +1032 -0
- mcli/lib/auth/auth.py +85 -0
- mcli/lib/auth/aws_manager.py +85 -0
- mcli/lib/auth/azure_manager.py +91 -0
- mcli/lib/auth/credential_manager.py +192 -0
- mcli/lib/auth/gcp_manager.py +93 -0
- mcli/lib/auth/key_manager.py +117 -0
- mcli/lib/auth/mcli_manager.py +93 -0
- mcli/lib/auth/token_manager.py +75 -0
- mcli/lib/auth/token_util.py +1011 -0
- mcli/lib/config/config.py +47 -0
- mcli/lib/discovery/__init__.py +1 -0
- mcli/lib/discovery/command_discovery.py +274 -0
- mcli/lib/erd/erd.py +1345 -0
- mcli/lib/erd/generate_graph.py +453 -0
- mcli/lib/files/files.py +76 -0
- mcli/lib/fs/fs.py +109 -0
- mcli/lib/lib.py +29 -0
- mcli/lib/logger/logger.py +611 -0
- mcli/lib/performance/optimizer.py +409 -0
- mcli/lib/performance/rust_bridge.py +502 -0
- mcli/lib/performance/uvloop_config.py +154 -0
- mcli/lib/pickles/pickles.py +50 -0
- mcli/lib/search/cached_vectorizer.py +479 -0
- mcli/lib/services/data_pipeline.py +460 -0
- mcli/lib/services/lsh_client.py +441 -0
- mcli/lib/services/redis_service.py +387 -0
- mcli/lib/shell/shell.py +137 -0
- mcli/lib/toml/toml.py +33 -0
- mcli/lib/ui/styling.py +47 -0
- mcli/lib/ui/visual_effects.py +634 -0
- mcli/lib/watcher/watcher.py +185 -0
- mcli/ml/api/app.py +215 -0
- mcli/ml/api/middleware.py +224 -0
- mcli/ml/api/routers/admin_router.py +12 -0
- mcli/ml/api/routers/auth_router.py +244 -0
- mcli/ml/api/routers/backtest_router.py +12 -0
- mcli/ml/api/routers/data_router.py +12 -0
- mcli/ml/api/routers/model_router.py +302 -0
- mcli/ml/api/routers/monitoring_router.py +12 -0
- mcli/ml/api/routers/portfolio_router.py +12 -0
- mcli/ml/api/routers/prediction_router.py +267 -0
- mcli/ml/api/routers/trade_router.py +12 -0
- mcli/ml/api/routers/websocket_router.py +76 -0
- mcli/ml/api/schemas.py +64 -0
- mcli/ml/auth/auth_manager.py +425 -0
- mcli/ml/auth/models.py +154 -0
- mcli/ml/auth/permissions.py +302 -0
- mcli/ml/backtesting/backtest_engine.py +502 -0
- mcli/ml/backtesting/performance_metrics.py +393 -0
- mcli/ml/cache.py +400 -0
- mcli/ml/cli/main.py +398 -0
- mcli/ml/config/settings.py +394 -0
- mcli/ml/configs/dvc_config.py +230 -0
- mcli/ml/configs/mlflow_config.py +131 -0
- mcli/ml/configs/mlops_manager.py +293 -0
- mcli/ml/dashboard/app.py +532 -0
- mcli/ml/dashboard/app_integrated.py +738 -0
- mcli/ml/dashboard/app_supabase.py +560 -0
- mcli/ml/dashboard/app_training.py +615 -0
- mcli/ml/dashboard/cli.py +51 -0
- mcli/ml/data_ingestion/api_connectors.py +501 -0
- mcli/ml/data_ingestion/data_pipeline.py +567 -0
- mcli/ml/data_ingestion/stream_processor.py +512 -0
- mcli/ml/database/migrations/env.py +94 -0
- mcli/ml/database/models.py +667 -0
- mcli/ml/database/session.py +200 -0
- mcli/ml/experimentation/ab_testing.py +845 -0
- mcli/ml/features/ensemble_features.py +607 -0
- mcli/ml/features/political_features.py +676 -0
- mcli/ml/features/recommendation_engine.py +809 -0
- mcli/ml/features/stock_features.py +573 -0
- mcli/ml/features/test_feature_engineering.py +346 -0
- mcli/ml/logging.py +85 -0
- mcli/ml/mlops/data_versioning.py +518 -0
- mcli/ml/mlops/experiment_tracker.py +377 -0
- mcli/ml/mlops/model_serving.py +481 -0
- mcli/ml/mlops/pipeline_orchestrator.py +614 -0
- mcli/ml/models/base_models.py +324 -0
- mcli/ml/models/ensemble_models.py +675 -0
- mcli/ml/models/recommendation_models.py +474 -0
- mcli/ml/models/test_models.py +487 -0
- mcli/ml/monitoring/drift_detection.py +676 -0
- mcli/ml/monitoring/metrics.py +45 -0
- mcli/ml/optimization/portfolio_optimizer.py +834 -0
- mcli/ml/preprocessing/data_cleaners.py +451 -0
- mcli/ml/preprocessing/feature_extractors.py +491 -0
- mcli/ml/preprocessing/ml_pipeline.py +382 -0
- mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
- mcli/ml/preprocessing/test_preprocessing.py +294 -0
- mcli/ml/scripts/populate_sample_data.py +200 -0
- mcli/ml/tasks.py +400 -0
- mcli/ml/tests/test_integration.py +429 -0
- mcli/ml/tests/test_training_dashboard.py +387 -0
- mcli/public/oi/oi.py +15 -0
- mcli/public/public.py +4 -0
- mcli/self/self_cmd.py +1246 -0
- mcli/workflow/daemon/api_daemon.py +800 -0
- mcli/workflow/daemon/async_command_database.py +681 -0
- mcli/workflow/daemon/async_process_manager.py +591 -0
- mcli/workflow/daemon/client.py +530 -0
- mcli/workflow/daemon/commands.py +1196 -0
- mcli/workflow/daemon/daemon.py +905 -0
- mcli/workflow/daemon/daemon_api.py +59 -0
- mcli/workflow/daemon/enhanced_daemon.py +571 -0
- mcli/workflow/daemon/process_cli.py +244 -0
- mcli/workflow/daemon/process_manager.py +439 -0
- mcli/workflow/daemon/test_daemon.py +275 -0
- mcli/workflow/dashboard/dashboard_cmd.py +113 -0
- mcli/workflow/docker/docker.py +0 -0
- mcli/workflow/file/file.py +100 -0
- mcli/workflow/gcloud/config.toml +21 -0
- mcli/workflow/gcloud/gcloud.py +58 -0
- mcli/workflow/git_commit/ai_service.py +328 -0
- mcli/workflow/git_commit/commands.py +430 -0
- mcli/workflow/lsh_integration.py +355 -0
- mcli/workflow/model_service/client.py +594 -0
- mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
- mcli/workflow/model_service/lightweight_embedder.py +397 -0
- mcli/workflow/model_service/lightweight_model_server.py +714 -0
- mcli/workflow/model_service/lightweight_test.py +241 -0
- mcli/workflow/model_service/model_service.py +1955 -0
- mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
- mcli/workflow/model_service/pdf_processor.py +386 -0
- mcli/workflow/model_service/test_efficient_runner.py +234 -0
- mcli/workflow/model_service/test_example.py +315 -0
- mcli/workflow/model_service/test_integration.py +131 -0
- mcli/workflow/model_service/test_new_features.py +149 -0
- mcli/workflow/openai/openai.py +99 -0
- mcli/workflow/politician_trading/commands.py +1790 -0
- mcli/workflow/politician_trading/config.py +134 -0
- mcli/workflow/politician_trading/connectivity.py +490 -0
- mcli/workflow/politician_trading/data_sources.py +395 -0
- mcli/workflow/politician_trading/database.py +410 -0
- mcli/workflow/politician_trading/demo.py +248 -0
- mcli/workflow/politician_trading/models.py +165 -0
- mcli/workflow/politician_trading/monitoring.py +413 -0
- mcli/workflow/politician_trading/scrapers.py +966 -0
- mcli/workflow/politician_trading/scrapers_california.py +412 -0
- mcli/workflow/politician_trading/scrapers_eu.py +377 -0
- mcli/workflow/politician_trading/scrapers_uk.py +350 -0
- mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
- mcli/workflow/politician_trading/supabase_functions.py +354 -0
- mcli/workflow/politician_trading/workflow.py +852 -0
- mcli/workflow/registry/registry.py +180 -0
- mcli/workflow/repo/repo.py +223 -0
- mcli/workflow/scheduler/commands.py +493 -0
- mcli/workflow/scheduler/cron_parser.py +238 -0
- mcli/workflow/scheduler/job.py +182 -0
- mcli/workflow/scheduler/monitor.py +139 -0
- mcli/workflow/scheduler/persistence.py +324 -0
- mcli/workflow/scheduler/scheduler.py +679 -0
- mcli/workflow/sync/sync_cmd.py +437 -0
- mcli/workflow/sync/test_cmd.py +314 -0
- mcli/workflow/videos/videos.py +242 -0
- mcli/workflow/wakatime/wakatime.py +11 -0
- mcli/workflow/workflow.py +37 -0
- mcli_framework-7.0.0.dist-info/METADATA +479 -0
- mcli_framework-7.0.0.dist-info/RECORD +186 -0
- mcli_framework-7.0.0.dist-info/WHEEL +5 -0
- mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
- mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
- mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""Test script for the ML preprocessing pipeline"""
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
from politician_trading_preprocessor import PoliticianTradingPreprocessor, PreprocessingConfig
|
|
10
|
+
from ml_pipeline import MLDataPipeline, MLDataPipelineConfig
|
|
11
|
+
|
|
12
|
+
# Setup logging
|
|
13
|
+
logging.basicConfig(level=logging.INFO)
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def generate_sample_data(n_records: int = 100) -> pd.DataFrame:
|
|
18
|
+
"""Generate sample politician trading data for testing"""
|
|
19
|
+
np.random.seed(42)
|
|
20
|
+
|
|
21
|
+
# Sample politicians
|
|
22
|
+
politicians = [
|
|
23
|
+
"Nancy Pelosi",
|
|
24
|
+
"Mitch McConnell",
|
|
25
|
+
"Chuck Schumer",
|
|
26
|
+
"Kevin McCarthy",
|
|
27
|
+
"Alexandria Ocasio-Cortez",
|
|
28
|
+
"Ted Cruz",
|
|
29
|
+
"Elizabeth Warren",
|
|
30
|
+
"Marco Rubio",
|
|
31
|
+
"Bernie Sanders",
|
|
32
|
+
"Mitt Romney",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
# Sample stocks
|
|
36
|
+
tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "META", "NVDA", "JPM", "BAC", "XOM"]
|
|
37
|
+
companies = [
|
|
38
|
+
"Apple Inc",
|
|
39
|
+
"Microsoft Corp",
|
|
40
|
+
"Alphabet Inc",
|
|
41
|
+
"Amazon.com Inc",
|
|
42
|
+
"Tesla Inc",
|
|
43
|
+
"Meta Platforms Inc",
|
|
44
|
+
"Nvidia Corp",
|
|
45
|
+
"JPMorgan Chase",
|
|
46
|
+
"Bank of America",
|
|
47
|
+
"Exxon Mobil",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
# Generate random data
|
|
51
|
+
records = []
|
|
52
|
+
start_date = datetime.now() - timedelta(days=365)
|
|
53
|
+
|
|
54
|
+
for i in range(n_records):
|
|
55
|
+
# Random date within last year
|
|
56
|
+
days_ago = np.random.randint(0, 365)
|
|
57
|
+
trade_date = start_date + timedelta(days=days_ago)
|
|
58
|
+
|
|
59
|
+
# Random amounts with some outliers
|
|
60
|
+
if np.random.random() < 0.1: # 10% outliers
|
|
61
|
+
amount = np.random.uniform(100000, 5000000)
|
|
62
|
+
else:
|
|
63
|
+
amount = np.random.uniform(1000, 50000)
|
|
64
|
+
|
|
65
|
+
# Random transaction type
|
|
66
|
+
transaction_type = np.random.choice(["buy", "sell"], p=[0.6, 0.4])
|
|
67
|
+
|
|
68
|
+
# Random stock
|
|
69
|
+
ticker_idx = np.random.randint(0, len(tickers))
|
|
70
|
+
|
|
71
|
+
record = {
|
|
72
|
+
"politician_name": np.random.choice(politicians),
|
|
73
|
+
"transaction_date": trade_date.strftime("%Y-%m-%d"),
|
|
74
|
+
"transaction_amount": amount,
|
|
75
|
+
"transaction_type": transaction_type,
|
|
76
|
+
"asset_name": companies[ticker_idx],
|
|
77
|
+
"stock_symbol": tickers[ticker_idx],
|
|
78
|
+
"disclosure_date": (trade_date + timedelta(days=np.random.randint(1, 45))).strftime(
|
|
79
|
+
"%Y-%m-%d"
|
|
80
|
+
),
|
|
81
|
+
"transaction_id": f"T{i:06d}",
|
|
82
|
+
"source": "test_data",
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Add some missing values randomly
|
|
86
|
+
if np.random.random() < 0.1:
|
|
87
|
+
del record["transaction_amount"]
|
|
88
|
+
if np.random.random() < 0.05:
|
|
89
|
+
del record["stock_symbol"]
|
|
90
|
+
|
|
91
|
+
records.append(record)
|
|
92
|
+
|
|
93
|
+
return pd.DataFrame(records)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_data_cleaning():
|
|
97
|
+
"""Test data cleaning functionality"""
|
|
98
|
+
logger.info("Testing data cleaning...")
|
|
99
|
+
|
|
100
|
+
# Generate sample data with issues
|
|
101
|
+
data = generate_sample_data(50)
|
|
102
|
+
|
|
103
|
+
# Add some problematic records
|
|
104
|
+
problematic_records = [
|
|
105
|
+
{
|
|
106
|
+
"politician_name": " john DOE jr. ",
|
|
107
|
+
"transaction_amount": "$15,000.00",
|
|
108
|
+
"transaction_date": "2023-12-01",
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"politician_name": "Jane Smith",
|
|
112
|
+
"transaction_amount": "1K - 15K",
|
|
113
|
+
"transaction_date": "12/01/2023",
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"politician_name": "Bob Johnson",
|
|
117
|
+
"transaction_amount": "",
|
|
118
|
+
"transaction_date": "invalid-date",
|
|
119
|
+
},
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
data = pd.concat([data, pd.DataFrame(problematic_records)], ignore_index=True)
|
|
123
|
+
|
|
124
|
+
# Initialize preprocessor
|
|
125
|
+
config = PreprocessingConfig(enable_data_cleaning=True)
|
|
126
|
+
preprocessor = PoliticianTradingPreprocessor(config)
|
|
127
|
+
|
|
128
|
+
# Test cleaning
|
|
129
|
+
records = data.to_dict("records")
|
|
130
|
+
cleaned_records, cleaning_stats = preprocessor.data_cleaner.clean_trading_records(records)
|
|
131
|
+
|
|
132
|
+
logger.info(f"Original records: {len(records)}")
|
|
133
|
+
logger.info(f"Cleaned records: {len(cleaned_records)}")
|
|
134
|
+
logger.info(f"Cleaning operations: {cleaning_stats.cleaning_operations}")
|
|
135
|
+
|
|
136
|
+
assert len(cleaned_records) > 0, "Should have some cleaned records"
|
|
137
|
+
assert cleaning_stats.cleaned_records > 0, "Should have cleaned some records"
|
|
138
|
+
|
|
139
|
+
logger.info("✅ Data cleaning test passed")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def test_feature_extraction():
|
|
143
|
+
"""Test feature extraction functionality"""
|
|
144
|
+
logger.info("Testing feature extraction...")
|
|
145
|
+
|
|
146
|
+
data = generate_sample_data(100)
|
|
147
|
+
|
|
148
|
+
config = PreprocessingConfig(
|
|
149
|
+
enable_politician_features=True,
|
|
150
|
+
enable_market_features=True,
|
|
151
|
+
enable_temporal_features=True,
|
|
152
|
+
enable_sentiment_features=True,
|
|
153
|
+
)
|
|
154
|
+
preprocessor = PoliticianTradingPreprocessor(config)
|
|
155
|
+
|
|
156
|
+
# Clean data first
|
|
157
|
+
records = data.to_dict("records")
|
|
158
|
+
cleaned_records, _ = preprocessor.data_cleaner.clean_trading_records(records)
|
|
159
|
+
df = pd.DataFrame(cleaned_records)
|
|
160
|
+
|
|
161
|
+
# Extract features
|
|
162
|
+
df_with_features = preprocessor._extract_features(df)
|
|
163
|
+
|
|
164
|
+
logger.info(f"Original columns: {len(df.columns)}")
|
|
165
|
+
logger.info(f"Columns after feature extraction: {len(df_with_features.columns)}")
|
|
166
|
+
|
|
167
|
+
# Check for expected features
|
|
168
|
+
expected_features = [
|
|
169
|
+
"politician_name_length",
|
|
170
|
+
"politician_trading_frequency",
|
|
171
|
+
"asset_trading_frequency",
|
|
172
|
+
"transaction_day_of_week",
|
|
173
|
+
"asset_name_sentiment_score",
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
for feature in expected_features:
|
|
177
|
+
if feature in df_with_features.columns:
|
|
178
|
+
logger.info(f"✅ Found expected feature: {feature}")
|
|
179
|
+
else:
|
|
180
|
+
logger.warning(f"⚠️ Missing expected feature: {feature}")
|
|
181
|
+
|
|
182
|
+
assert len(df_with_features.columns) > len(df.columns), "Should have extracted new features"
|
|
183
|
+
|
|
184
|
+
logger.info("✅ Feature extraction test passed")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def test_full_preprocessing():
|
|
188
|
+
"""Test full preprocessing pipeline"""
|
|
189
|
+
logger.info("Testing full preprocessing pipeline...")
|
|
190
|
+
|
|
191
|
+
data = generate_sample_data(200)
|
|
192
|
+
|
|
193
|
+
config = PreprocessingConfig(
|
|
194
|
+
enable_data_cleaning=True,
|
|
195
|
+
enable_outlier_detection=True,
|
|
196
|
+
enable_missing_value_handling=True,
|
|
197
|
+
train_split_ratio=0.7,
|
|
198
|
+
val_split_ratio=0.15,
|
|
199
|
+
test_split_ratio=0.15,
|
|
200
|
+
save_preprocessing_artifacts=True,
|
|
201
|
+
artifacts_dir=Path("./test_artifacts"),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
preprocessor = PoliticianTradingPreprocessor(config)
|
|
205
|
+
|
|
206
|
+
# Run full preprocessing
|
|
207
|
+
results = preprocessor.preprocess(data)
|
|
208
|
+
|
|
209
|
+
# Check results
|
|
210
|
+
logger.info(f"Original shape: {results.original_shape}")
|
|
211
|
+
logger.info(f"Final shape: {results.final_shape}")
|
|
212
|
+
logger.info(f"Features: {results.feature_count}")
|
|
213
|
+
logger.info(f"Train size: {len(results.train_data)}")
|
|
214
|
+
logger.info(f"Val size: {len(results.val_data)}")
|
|
215
|
+
logger.info(f"Test size: {len(results.test_data)}")
|
|
216
|
+
logger.info(f"Target columns: {results.target_columns}")
|
|
217
|
+
|
|
218
|
+
# Validate results
|
|
219
|
+
assert results.feature_count > 0, "Should have extracted features"
|
|
220
|
+
assert len(results.train_data) > 0, "Should have training data"
|
|
221
|
+
assert len(results.val_data) > 0, "Should have validation data"
|
|
222
|
+
assert len(results.test_data) > 0, "Should have test data"
|
|
223
|
+
assert len(results.target_columns) > 0, "Should have target columns"
|
|
224
|
+
|
|
225
|
+
# Check that splits sum to original
|
|
226
|
+
total_split_size = len(results.train_data) + len(results.val_data) + len(results.test_data)
|
|
227
|
+
assert total_split_size == results.final_shape[0], "Split sizes should sum to final shape"
|
|
228
|
+
|
|
229
|
+
# Check for target variables
|
|
230
|
+
expected_targets = ["target_profitable", "target_recommendation_score", "target_risk_level"]
|
|
231
|
+
for target in expected_targets:
|
|
232
|
+
if target in results.target_columns:
|
|
233
|
+
logger.info(f"✅ Found expected target: {target}")
|
|
234
|
+
|
|
235
|
+
logger.info("✅ Full preprocessing test passed")
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def test_transform_new_data():
|
|
239
|
+
"""Test transforming new data with fitted preprocessor"""
|
|
240
|
+
logger.info("Testing new data transformation...")
|
|
241
|
+
|
|
242
|
+
# Train on initial data
|
|
243
|
+
train_data = generate_sample_data(100)
|
|
244
|
+
|
|
245
|
+
config = PreprocessingConfig(
|
|
246
|
+
save_preprocessing_artifacts=True, artifacts_dir=Path("./test_artifacts")
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
preprocessor = PoliticianTradingPreprocessor(config)
|
|
250
|
+
results = preprocessor.preprocess(train_data)
|
|
251
|
+
|
|
252
|
+
# Generate new data
|
|
253
|
+
new_data = generate_sample_data(20)
|
|
254
|
+
|
|
255
|
+
# Transform new data
|
|
256
|
+
transformed_data = preprocessor.transform_new_data(new_data)
|
|
257
|
+
|
|
258
|
+
logger.info(f"New data original shape: {new_data.shape}")
|
|
259
|
+
logger.info(f"Transformed data shape: {transformed_data.shape}")
|
|
260
|
+
|
|
261
|
+
# Should have same number of features as training
|
|
262
|
+
expected_feature_cols = [
|
|
263
|
+
col for col in results.train_data.columns if not col.startswith("target_")
|
|
264
|
+
]
|
|
265
|
+
actual_feature_cols = [col for col in transformed_data.columns if not col.startswith("target_")]
|
|
266
|
+
|
|
267
|
+
logger.info(f"Expected feature columns: {len(expected_feature_cols)}")
|
|
268
|
+
logger.info(f"Actual feature columns: {len(actual_feature_cols)}")
|
|
269
|
+
|
|
270
|
+
# Note: Some features might be missing due to different data patterns
|
|
271
|
+
assert len(transformed_data) > 0, "Should have transformed some data"
|
|
272
|
+
|
|
273
|
+
logger.info("✅ New data transformation test passed")
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def main():
|
|
277
|
+
"""Run all tests"""
|
|
278
|
+
logger.info("Starting preprocessing pipeline tests...")
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
test_data_cleaning()
|
|
282
|
+
test_feature_extraction()
|
|
283
|
+
test_full_preprocessing()
|
|
284
|
+
test_transform_new_data()
|
|
285
|
+
|
|
286
|
+
logger.info("🎉 All preprocessing tests passed!")
|
|
287
|
+
|
|
288
|
+
except Exception as e:
|
|
289
|
+
logger.error(f"❌ Test failed: {e}")
|
|
290
|
+
raise
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
if __name__ == "__main__":
|
|
294
|
+
main()
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Populate database with sample data for dashboard testing."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
import random
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from mcli.ml.database.session import SessionLocal, init_db
|
|
9
|
+
from mcli.ml.database.models import (
|
|
10
|
+
User, Model, Prediction, Portfolio, Trade, StockData,
|
|
11
|
+
BacktestResult, ModelStatus
|
|
12
|
+
)
|
|
13
|
+
from mcli.ml.config import settings
|
|
14
|
+
|
|
15
|
+
def populate_sample_data():
|
|
16
|
+
"""Populate database with sample data."""
|
|
17
|
+
|
|
18
|
+
# Initialize database
|
|
19
|
+
init_db()
|
|
20
|
+
|
|
21
|
+
db = SessionLocal()
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
# Clear existing data (optional)
|
|
25
|
+
print("Clearing existing data...")
|
|
26
|
+
db.query(Prediction).delete()
|
|
27
|
+
db.query(Trade).delete()
|
|
28
|
+
db.query(BacktestResult).delete()
|
|
29
|
+
db.query(Portfolio).delete()
|
|
30
|
+
db.query(Model).delete()
|
|
31
|
+
db.query(User).delete()
|
|
32
|
+
db.query(StockData).delete()
|
|
33
|
+
db.commit()
|
|
34
|
+
|
|
35
|
+
# Create sample users
|
|
36
|
+
print("Creating sample users...")
|
|
37
|
+
users = []
|
|
38
|
+
for i in range(5):
|
|
39
|
+
user = User(
|
|
40
|
+
username=f"user_{i+1}",
|
|
41
|
+
email=f"user{i+1}@example.com",
|
|
42
|
+
role="user" if i > 0 else "admin",
|
|
43
|
+
is_active=True,
|
|
44
|
+
last_login_at=datetime.utcnow() - timedelta(hours=random.randint(1, 48))
|
|
45
|
+
)
|
|
46
|
+
users.append(user)
|
|
47
|
+
db.add(user)
|
|
48
|
+
|
|
49
|
+
db.commit()
|
|
50
|
+
|
|
51
|
+
# Create sample models
|
|
52
|
+
print("Creating sample models...")
|
|
53
|
+
models = []
|
|
54
|
+
model_names = ["LSTM Predictor", "Transformer Model", "Ensemble Model", "CNN Extractor", "Attention Model"]
|
|
55
|
+
for i, name in enumerate(model_names):
|
|
56
|
+
model = Model(
|
|
57
|
+
name=name,
|
|
58
|
+
version=f"v1.{i}",
|
|
59
|
+
model_type="pytorch",
|
|
60
|
+
status=ModelStatus.DEPLOYED if i < 3 else ModelStatus.TRAINING,
|
|
61
|
+
test_accuracy=random.uniform(0.65, 0.95),
|
|
62
|
+
test_sharpe_ratio=random.uniform(1.2, 2.5),
|
|
63
|
+
test_max_drawdown=random.uniform(0.05, 0.15),
|
|
64
|
+
created_at=datetime.utcnow() - timedelta(days=random.randint(1, 30)),
|
|
65
|
+
updated_at=datetime.utcnow() - timedelta(hours=random.randint(1, 24)),
|
|
66
|
+
created_by_id=random.choice(users).id
|
|
67
|
+
)
|
|
68
|
+
models.append(model)
|
|
69
|
+
db.add(model)
|
|
70
|
+
|
|
71
|
+
db.commit()
|
|
72
|
+
|
|
73
|
+
# Create sample portfolios
|
|
74
|
+
print("Creating sample portfolios...")
|
|
75
|
+
portfolios = []
|
|
76
|
+
portfolio_names = ["Growth Portfolio", "Value Portfolio", "AI Picks", "Risk Parity", "Momentum Strategy"]
|
|
77
|
+
for i, name in enumerate(portfolio_names):
|
|
78
|
+
portfolio = Portfolio(
|
|
79
|
+
name=name,
|
|
80
|
+
description=f"Strategy based on {name.lower()}",
|
|
81
|
+
initial_capital=100000,
|
|
82
|
+
current_value=100000 * random.uniform(0.9, 1.3),
|
|
83
|
+
total_return=random.uniform(-0.1, 0.3),
|
|
84
|
+
sharpe_ratio=random.uniform(0.8, 2.0),
|
|
85
|
+
max_drawdown=random.uniform(0.05, 0.20),
|
|
86
|
+
is_active=i < 4,
|
|
87
|
+
created_by_id=random.choice(users).id
|
|
88
|
+
)
|
|
89
|
+
portfolios.append(portfolio)
|
|
90
|
+
db.add(portfolio)
|
|
91
|
+
|
|
92
|
+
db.commit()
|
|
93
|
+
|
|
94
|
+
# Create sample predictions
|
|
95
|
+
print("Creating sample predictions...")
|
|
96
|
+
tickers = ["AAPL", "GOOGL", "MSFT", "TSLA", "NVDA", "META", "AMZN", "SPY", "QQQ", "DIA"]
|
|
97
|
+
|
|
98
|
+
for _ in range(100):
|
|
99
|
+
prediction = Prediction(
|
|
100
|
+
ticker=random.choice(tickers),
|
|
101
|
+
prediction_date=datetime.utcnow().date() - timedelta(days=random.randint(0, 7)),
|
|
102
|
+
target_date=datetime.utcnow().date() + timedelta(days=random.randint(1, 30)),
|
|
103
|
+
predicted_return=random.uniform(-0.05, 0.05),
|
|
104
|
+
confidence_score=random.uniform(0.5, 0.95),
|
|
105
|
+
model_id=random.choice(models).id
|
|
106
|
+
)
|
|
107
|
+
db.add(prediction)
|
|
108
|
+
|
|
109
|
+
# Add some predictions for today
|
|
110
|
+
for ticker in tickers[:5]:
|
|
111
|
+
prediction = Prediction(
|
|
112
|
+
ticker=ticker,
|
|
113
|
+
prediction_date=datetime.utcnow().date(),
|
|
114
|
+
target_date=datetime.utcnow().date() + timedelta(days=7),
|
|
115
|
+
predicted_return=random.uniform(-0.03, 0.03),
|
|
116
|
+
confidence_score=random.uniform(0.6, 0.9),
|
|
117
|
+
model_id=random.choice(models).id
|
|
118
|
+
)
|
|
119
|
+
db.add(prediction)
|
|
120
|
+
|
|
121
|
+
db.commit()
|
|
122
|
+
|
|
123
|
+
# Create sample stock data
|
|
124
|
+
print("Creating sample stock data...")
|
|
125
|
+
for ticker in tickers:
|
|
126
|
+
base_price = random.uniform(50, 500)
|
|
127
|
+
for i in range(30):
|
|
128
|
+
date = datetime.utcnow().date() - timedelta(days=i)
|
|
129
|
+
stock_data = StockData(
|
|
130
|
+
ticker=ticker,
|
|
131
|
+
date=date,
|
|
132
|
+
open=base_price * random.uniform(0.98, 1.02),
|
|
133
|
+
high=base_price * random.uniform(1.01, 1.03),
|
|
134
|
+
low=base_price * random.uniform(0.97, 0.99),
|
|
135
|
+
close=base_price * random.uniform(0.98, 1.02),
|
|
136
|
+
volume=random.randint(1000000, 50000000),
|
|
137
|
+
adjusted_close=base_price * random.uniform(0.98, 1.02)
|
|
138
|
+
)
|
|
139
|
+
db.add(stock_data)
|
|
140
|
+
base_price = stock_data.close # Random walk
|
|
141
|
+
|
|
142
|
+
db.commit()
|
|
143
|
+
|
|
144
|
+
# Create sample trades
|
|
145
|
+
print("Creating sample trades...")
|
|
146
|
+
for portfolio in portfolios:
|
|
147
|
+
if portfolio.is_active:
|
|
148
|
+
for _ in range(random.randint(5, 15)):
|
|
149
|
+
trade = Trade(
|
|
150
|
+
portfolio_id=portfolio.id,
|
|
151
|
+
ticker=random.choice(tickers),
|
|
152
|
+
trade_type=random.choice(["buy", "sell"]),
|
|
153
|
+
quantity=random.randint(10, 100),
|
|
154
|
+
price=random.uniform(50, 500),
|
|
155
|
+
executed_at=datetime.utcnow() - timedelta(days=random.randint(0, 30))
|
|
156
|
+
)
|
|
157
|
+
db.add(trade)
|
|
158
|
+
|
|
159
|
+
db.commit()
|
|
160
|
+
|
|
161
|
+
# Create sample backtest results
|
|
162
|
+
print("Creating sample backtest results...")
|
|
163
|
+
for model in models:
|
|
164
|
+
if model.status == ModelStatus.DEPLOYED:
|
|
165
|
+
backtest = BacktestResult(
|
|
166
|
+
model_id=model.id,
|
|
167
|
+
start_date=datetime.utcnow().date() - timedelta(days=180),
|
|
168
|
+
end_date=datetime.utcnow().date() - timedelta(days=1),
|
|
169
|
+
initial_capital=100000,
|
|
170
|
+
final_capital=100000 * random.uniform(0.9, 1.4),
|
|
171
|
+
total_return=random.uniform(-0.1, 0.4),
|
|
172
|
+
sharpe_ratio=random.uniform(0.5, 2.5),
|
|
173
|
+
max_drawdown=random.uniform(0.05, 0.25),
|
|
174
|
+
win_rate=random.uniform(0.45, 0.65),
|
|
175
|
+
profit_factor=random.uniform(0.9, 2.0),
|
|
176
|
+
total_trades=random.randint(50, 200)
|
|
177
|
+
)
|
|
178
|
+
db.add(backtest)
|
|
179
|
+
|
|
180
|
+
db.commit()
|
|
181
|
+
|
|
182
|
+
print("✅ Sample data populated successfully!")
|
|
183
|
+
print(f" Users: {len(users)}")
|
|
184
|
+
print(f" Models: {len(models)}")
|
|
185
|
+
print(f" Portfolios: {len(portfolios)}")
|
|
186
|
+
print(f" Predictions: {db.query(Prediction).count()}")
|
|
187
|
+
print(f" Stock Data: {db.query(StockData).count()}")
|
|
188
|
+
print(f" Trades: {db.query(Trade).count()}")
|
|
189
|
+
print(f" Backtest Results: {db.query(BacktestResult).count()}")
|
|
190
|
+
|
|
191
|
+
except Exception as e:
|
|
192
|
+
print(f"❌ Error populating data: {e}")
|
|
193
|
+
db.rollback()
|
|
194
|
+
raise
|
|
195
|
+
finally:
|
|
196
|
+
db.close()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
if __name__ == "__main__":
|
|
200
|
+
populate_sample_data()
|