mcli-framework 7.12.0__py3-none-any.whl → 7.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/__init__.py +0 -2
- mcli/app/commands_cmd.py +19 -23
- mcli/app/completion_helpers.py +5 -5
- mcli/app/init_cmd.py +10 -10
- mcli/app/lock_cmd.py +82 -27
- mcli/app/main.py +4 -50
- mcli/app/model/model.py +5 -10
- mcli/app/store_cmd.py +8 -8
- mcli/app/video/__init__.py +0 -2
- mcli/app/video/video.py +1 -14
- mcli/chat/chat.py +90 -108
- mcli/chat/command_rag.py +0 -4
- mcli/chat/enhanced_chat.py +32 -41
- mcli/chat/system_controller.py +37 -37
- mcli/chat/system_integration.py +4 -5
- mcli/cli.py +2 -3
- mcli/lib/api/api.py +4 -9
- mcli/lib/api/daemon_client.py +19 -20
- mcli/lib/api/daemon_client_local.py +1 -3
- mcli/lib/api/daemon_decorator.py +6 -6
- mcli/lib/api/mcli_decorators.py +4 -8
- mcli/lib/auth/__init__.py +0 -1
- mcli/lib/auth/auth.py +4 -5
- mcli/lib/auth/mcli_manager.py +7 -12
- mcli/lib/auth/token_util.py +5 -5
- mcli/lib/config/__init__.py +29 -1
- mcli/lib/config/config.py +0 -1
- mcli/lib/custom_commands.py +1 -1
- mcli/lib/discovery/command_discovery.py +15 -15
- mcli/lib/erd/erd.py +7 -7
- mcli/lib/files/files.py +1 -1
- mcli/lib/fs/__init__.py +31 -1
- mcli/lib/fs/fs.py +12 -13
- mcli/lib/lib.py +0 -1
- mcli/lib/logger/logger.py +7 -10
- mcli/lib/performance/optimizer.py +25 -27
- mcli/lib/performance/rust_bridge.py +22 -27
- mcli/lib/performance/uvloop_config.py +0 -1
- mcli/lib/pickles/__init__.py +0 -1
- mcli/lib/pickles/pickles.py +0 -2
- mcli/lib/secrets/commands.py +0 -2
- mcli/lib/secrets/manager.py +0 -1
- mcli/lib/secrets/repl.py +2 -3
- mcli/lib/secrets/store.py +1 -2
- mcli/lib/services/data_pipeline.py +34 -34
- mcli/lib/services/lsh_client.py +38 -40
- mcli/lib/shell/shell.py +2 -2
- mcli/lib/toml/__init__.py +0 -1
- mcli/lib/ui/styling.py +0 -1
- mcli/lib/ui/visual_effects.py +33 -41
- mcli/lib/watcher/watcher.py +0 -1
- mcli/ml/__init__.py +1 -1
- mcli/ml/api/__init__.py +1 -1
- mcli/ml/api/app.py +8 -9
- mcli/ml/api/middleware.py +10 -10
- mcli/ml/api/routers/__init__.py +1 -1
- mcli/ml/api/routers/admin_router.py +3 -3
- mcli/ml/api/routers/auth_router.py +17 -18
- mcli/ml/api/routers/backtest_router.py +2 -2
- mcli/ml/api/routers/data_router.py +2 -2
- mcli/ml/api/routers/model_router.py +14 -15
- mcli/ml/api/routers/monitoring_router.py +2 -2
- mcli/ml/api/routers/portfolio_router.py +2 -2
- mcli/ml/api/routers/prediction_router.py +10 -9
- mcli/ml/api/routers/trade_router.py +2 -2
- mcli/ml/api/routers/websocket_router.py +6 -7
- mcli/ml/api/schemas.py +2 -2
- mcli/ml/auth/__init__.py +1 -1
- mcli/ml/auth/auth_manager.py +22 -23
- mcli/ml/auth/models.py +17 -17
- mcli/ml/auth/permissions.py +17 -17
- mcli/ml/backtesting/__init__.py +1 -1
- mcli/ml/backtesting/backtest_engine.py +31 -35
- mcli/ml/backtesting/performance_metrics.py +12 -14
- mcli/ml/backtesting/run.py +1 -2
- mcli/ml/cache.py +35 -36
- mcli/ml/cli/__init__.py +1 -1
- mcli/ml/cli/main.py +21 -24
- mcli/ml/config/__init__.py +1 -1
- mcli/ml/config/settings.py +28 -29
- mcli/ml/configs/__init__.py +1 -1
- mcli/ml/configs/dvc_config.py +14 -15
- mcli/ml/configs/mlflow_config.py +12 -13
- mcli/ml/configs/mlops_manager.py +19 -21
- mcli/ml/dashboard/__init__.py +4 -4
- mcli/ml/dashboard/app.py +20 -30
- mcli/ml/dashboard/app_supabase.py +16 -19
- mcli/ml/dashboard/app_training.py +11 -14
- mcli/ml/dashboard/cli.py +2 -2
- mcli/ml/dashboard/common.py +2 -3
- mcli/ml/dashboard/components/__init__.py +1 -1
- mcli/ml/dashboard/components/charts.py +13 -11
- mcli/ml/dashboard/components/metrics.py +7 -7
- mcli/ml/dashboard/components/tables.py +12 -9
- mcli/ml/dashboard/overview.py +2 -2
- mcli/ml/dashboard/pages/__init__.py +1 -1
- mcli/ml/dashboard/pages/cicd.py +15 -18
- mcli/ml/dashboard/pages/debug_dependencies.py +7 -7
- mcli/ml/dashboard/pages/monte_carlo_predictions.py +11 -18
- mcli/ml/dashboard/pages/predictions_enhanced.py +24 -32
- mcli/ml/dashboard/pages/scrapers_and_logs.py +22 -24
- mcli/ml/dashboard/pages/test_portfolio.py +3 -6
- mcli/ml/dashboard/pages/trading.py +16 -18
- mcli/ml/dashboard/pages/workflows.py +20 -30
- mcli/ml/dashboard/utils.py +9 -9
- mcli/ml/dashboard/warning_suppression.py +3 -3
- mcli/ml/data_ingestion/__init__.py +1 -1
- mcli/ml/data_ingestion/api_connectors.py +41 -46
- mcli/ml/data_ingestion/data_pipeline.py +36 -46
- mcli/ml/data_ingestion/stream_processor.py +43 -46
- mcli/ml/database/__init__.py +1 -1
- mcli/ml/database/migrations/env.py +2 -2
- mcli/ml/database/models.py +22 -24
- mcli/ml/database/session.py +14 -14
- mcli/ml/experimentation/__init__.py +1 -1
- mcli/ml/experimentation/ab_testing.py +45 -46
- mcli/ml/features/__init__.py +1 -1
- mcli/ml/features/ensemble_features.py +22 -27
- mcli/ml/features/recommendation_engine.py +30 -30
- mcli/ml/features/stock_features.py +29 -32
- mcli/ml/features/test_feature_engineering.py +10 -11
- mcli/ml/logging.py +4 -4
- mcli/ml/mlops/__init__.py +1 -1
- mcli/ml/mlops/data_versioning.py +29 -30
- mcli/ml/mlops/experiment_tracker.py +24 -24
- mcli/ml/mlops/model_serving.py +31 -34
- mcli/ml/mlops/pipeline_orchestrator.py +27 -35
- mcli/ml/models/__init__.py +5 -6
- mcli/ml/models/base_models.py +23 -23
- mcli/ml/models/ensemble_models.py +31 -31
- mcli/ml/models/recommendation_models.py +18 -19
- mcli/ml/models/test_models.py +14 -16
- mcli/ml/monitoring/__init__.py +1 -1
- mcli/ml/monitoring/drift_detection.py +32 -36
- mcli/ml/monitoring/metrics.py +2 -2
- mcli/ml/optimization/__init__.py +1 -1
- mcli/ml/optimization/optimize.py +1 -2
- mcli/ml/optimization/portfolio_optimizer.py +30 -32
- mcli/ml/predictions/__init__.py +1 -1
- mcli/ml/preprocessing/__init__.py +1 -1
- mcli/ml/preprocessing/data_cleaners.py +22 -23
- mcli/ml/preprocessing/feature_extractors.py +23 -26
- mcli/ml/preprocessing/ml_pipeline.py +23 -23
- mcli/ml/preprocessing/test_preprocessing.py +7 -8
- mcli/ml/scripts/populate_sample_data.py +0 -4
- mcli/ml/serving/serve.py +1 -2
- mcli/ml/tasks.py +17 -17
- mcli/ml/tests/test_integration.py +29 -30
- mcli/ml/tests/test_training_dashboard.py +21 -21
- mcli/ml/trading/__init__.py +1 -1
- mcli/ml/trading/migrations.py +5 -5
- mcli/ml/trading/models.py +21 -23
- mcli/ml/trading/paper_trading.py +16 -13
- mcli/ml/trading/risk_management.py +17 -18
- mcli/ml/trading/trading_service.py +25 -28
- mcli/ml/training/__init__.py +1 -1
- mcli/ml/training/train.py +0 -1
- mcli/public/oi/oi.py +1 -2
- mcli/self/completion_cmd.py +6 -10
- mcli/self/logs_cmd.py +19 -24
- mcli/self/migrate_cmd.py +22 -20
- mcli/self/redis_cmd.py +10 -11
- mcli/self/self_cmd.py +62 -18
- mcli/self/store_cmd.py +10 -12
- mcli/self/visual_cmd.py +9 -14
- mcli/self/zsh_cmd.py +2 -4
- mcli/workflow/daemon/async_command_database.py +23 -24
- mcli/workflow/daemon/async_process_manager.py +27 -29
- mcli/workflow/daemon/client.py +27 -33
- mcli/workflow/daemon/daemon.py +32 -36
- mcli/workflow/daemon/enhanced_daemon.py +24 -33
- mcli/workflow/daemon/process_cli.py +11 -12
- mcli/workflow/daemon/process_manager.py +23 -26
- mcli/workflow/daemon/test_daemon.py +4 -5
- mcli/workflow/dashboard/dashboard_cmd.py +0 -1
- mcli/workflow/doc_convert.py +15 -17
- mcli/workflow/gcloud/__init__.py +0 -1
- mcli/workflow/gcloud/gcloud.py +11 -8
- mcli/workflow/git_commit/ai_service.py +14 -15
- mcli/workflow/lsh_integration.py +9 -11
- mcli/workflow/model_service/client.py +26 -31
- mcli/workflow/model_service/download_and_run_efficient_models.py +10 -14
- mcli/workflow/model_service/lightweight_embedder.py +25 -35
- mcli/workflow/model_service/lightweight_model_server.py +26 -32
- mcli/workflow/model_service/lightweight_test.py +7 -10
- mcli/workflow/model_service/model_service.py +80 -91
- mcli/workflow/model_service/ollama_efficient_runner.py +14 -18
- mcli/workflow/model_service/openai_adapter.py +23 -23
- mcli/workflow/model_service/pdf_processor.py +21 -26
- mcli/workflow/model_service/test_efficient_runner.py +12 -16
- mcli/workflow/model_service/test_example.py +11 -13
- mcli/workflow/model_service/test_integration.py +3 -5
- mcli/workflow/model_service/test_new_features.py +7 -8
- mcli/workflow/notebook/converter.py +1 -1
- mcli/workflow/notebook/notebook_cmd.py +5 -6
- mcli/workflow/notebook/schema.py +0 -1
- mcli/workflow/notebook/validator.py +7 -3
- mcli/workflow/openai/openai.py +1 -2
- mcli/workflow/registry/registry.py +4 -1
- mcli/workflow/repo/repo.py +6 -7
- mcli/workflow/scheduler/cron_parser.py +16 -19
- mcli/workflow/scheduler/job.py +10 -10
- mcli/workflow/scheduler/monitor.py +15 -15
- mcli/workflow/scheduler/persistence.py +17 -18
- mcli/workflow/scheduler/scheduler.py +37 -38
- mcli/workflow/secrets/__init__.py +1 -1
- mcli/workflow/sync/test_cmd.py +0 -1
- mcli/workflow/wakatime/__init__.py +5 -9
- mcli/workflow/wakatime/wakatime.py +1 -2
- {mcli_framework-7.12.0.dist-info → mcli_framework-7.12.3.dist-info}/METADATA +1 -1
- mcli_framework-7.12.3.dist-info/RECORD +279 -0
- mcli_framework-7.12.0.dist-info/RECORD +0 -279
- {mcli_framework-7.12.0.dist-info → mcli_framework-7.12.3.dist-info}/WHEEL +0 -0
- {mcli_framework-7.12.0.dist-info → mcli_framework-7.12.3.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.12.0.dist-info → mcli_framework-7.12.3.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.12.0.dist-info → mcli_framework-7.12.3.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""API connectors for real-time data ingestion"""
|
|
1
|
+
"""API connectors for real-time data ingestion."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
@@ -12,7 +12,6 @@ from urllib.parse import urljoin
|
|
|
12
12
|
|
|
13
13
|
import aiohttp
|
|
14
14
|
import pandas as pd
|
|
15
|
-
import requests
|
|
16
15
|
import websockets
|
|
17
16
|
import yfinance as yf
|
|
18
17
|
|
|
@@ -21,7 +20,7 @@ logger = logging.getLogger(__name__)
|
|
|
21
20
|
|
|
22
21
|
@dataclass
|
|
23
22
|
class APIConfig:
|
|
24
|
-
"""API configuration"""
|
|
23
|
+
"""API configuration."""
|
|
25
24
|
|
|
26
25
|
api_key: Optional[str] = None
|
|
27
26
|
base_url: str = ""
|
|
@@ -32,7 +31,7 @@ class APIConfig:
|
|
|
32
31
|
|
|
33
32
|
|
|
34
33
|
class BaseAPIConnector(ABC):
|
|
35
|
-
"""Base class for API connectors"""
|
|
34
|
+
"""Base class for API connectors."""
|
|
36
35
|
|
|
37
36
|
def __init__(self, config: APIConfig):
|
|
38
37
|
self.config = config
|
|
@@ -41,11 +40,10 @@ class BaseAPIConnector(ABC):
|
|
|
41
40
|
|
|
42
41
|
@abstractmethod
|
|
43
42
|
async def fetch_data(self, **kwargs) -> Dict[str, Any]:
|
|
44
|
-
"""Fetch data from API"""
|
|
45
|
-
pass
|
|
43
|
+
"""Fetch data from API."""
|
|
46
44
|
|
|
47
45
|
async def _make_request(self, endpoint: str, params: Optional[Dict] = None) -> Dict[str, Any]:
|
|
48
|
-
"""Make API request with rate limiting and retry logic"""
|
|
46
|
+
"""Make API request with rate limiting and retry logic."""
|
|
49
47
|
await self.rate_limiter.acquire()
|
|
50
48
|
|
|
51
49
|
url = urljoin(self.config.base_url, endpoint)
|
|
@@ -74,13 +72,13 @@ class BaseAPIConnector(ABC):
|
|
|
74
72
|
await asyncio.sleep(self.config.retry_delay * retry_count)
|
|
75
73
|
|
|
76
74
|
async def close(self):
|
|
77
|
-
"""Close session"""
|
|
75
|
+
"""Close session."""
|
|
78
76
|
if self.session:
|
|
79
77
|
await self.session.close()
|
|
80
78
|
|
|
81
79
|
|
|
82
80
|
class RateLimiter:
|
|
83
|
-
"""Rate limiter for API requests"""
|
|
81
|
+
"""Rate limiter for API requests."""
|
|
84
82
|
|
|
85
83
|
def __init__(self, rate_limit: int):
|
|
86
84
|
self.rate_limit = rate_limit
|
|
@@ -89,7 +87,7 @@ class RateLimiter:
|
|
|
89
87
|
self.lock = asyncio.Lock()
|
|
90
88
|
|
|
91
89
|
async def acquire(self):
|
|
92
|
-
"""Acquire rate limit token"""
|
|
90
|
+
"""Acquire rate limit token."""
|
|
93
91
|
async with self.lock:
|
|
94
92
|
while self.tokens <= 0:
|
|
95
93
|
now = time.time()
|
|
@@ -105,7 +103,7 @@ class RateLimiter:
|
|
|
105
103
|
|
|
106
104
|
|
|
107
105
|
class CongressionalDataAPI(BaseAPIConnector):
|
|
108
|
-
"""Congressional trading data API connector"""
|
|
106
|
+
"""Congressional trading data API connector."""
|
|
109
107
|
|
|
110
108
|
def __init__(self, config: Optional[APIConfig] = None):
|
|
111
109
|
if not config:
|
|
@@ -113,7 +111,7 @@ class CongressionalDataAPI(BaseAPIConnector):
|
|
|
113
111
|
super().__init__(config)
|
|
114
112
|
|
|
115
113
|
async def fetch_recent_trades(self, days: int = 30) -> List[Dict[str, Any]]:
|
|
116
|
-
"""Fetch recent congressional trades"""
|
|
114
|
+
"""Fetch recent congressional trades."""
|
|
117
115
|
params = {
|
|
118
116
|
"from_date": (datetime.now() - timedelta(days=days)).isoformat(),
|
|
119
117
|
"to_date": datetime.now().isoformat(),
|
|
@@ -128,7 +126,7 @@ class CongressionalDataAPI(BaseAPIConnector):
|
|
|
128
126
|
return self._generate_mock_trades()
|
|
129
127
|
|
|
130
128
|
async def fetch_politician_info(self, politician_id: str) -> Dict[str, Any]:
|
|
131
|
-
"""Fetch politician information"""
|
|
129
|
+
"""Fetch politician information."""
|
|
132
130
|
try:
|
|
133
131
|
return await self._make_request(f"politicians/{politician_id}")
|
|
134
132
|
except Exception as e:
|
|
@@ -136,7 +134,7 @@ class CongressionalDataAPI(BaseAPIConnector):
|
|
|
136
134
|
return self._generate_mock_politician_info(politician_id)
|
|
137
135
|
|
|
138
136
|
def _generate_mock_trades(self) -> List[Dict[str, Any]]:
|
|
139
|
-
"""Generate mock trades for testing"""
|
|
137
|
+
"""Generate mock trades for testing."""
|
|
140
138
|
import random
|
|
141
139
|
|
|
142
140
|
trades = []
|
|
@@ -160,7 +158,7 @@ class CongressionalDataAPI(BaseAPIConnector):
|
|
|
160
158
|
return trades
|
|
161
159
|
|
|
162
160
|
def _generate_mock_politician_info(self, politician_id: str) -> Dict[str, Any]:
|
|
163
|
-
"""Generate mock politician info"""
|
|
161
|
+
"""Generate mock politician info."""
|
|
164
162
|
return {
|
|
165
163
|
"id": politician_id,
|
|
166
164
|
"name": "Mock Politician",
|
|
@@ -172,23 +170,20 @@ class CongressionalDataAPI(BaseAPIConnector):
|
|
|
172
170
|
|
|
173
171
|
|
|
174
172
|
class StockMarketAPI(BaseAPIConnector):
|
|
175
|
-
"""Base class for stock market APIs"""
|
|
173
|
+
"""Base class for stock market APIs."""
|
|
176
174
|
|
|
177
175
|
async def fetch_quote(self, symbol: str) -> Dict[str, Any]:
|
|
178
|
-
"""Fetch current stock quote"""
|
|
179
|
-
pass
|
|
176
|
+
"""Fetch current stock quote."""
|
|
180
177
|
|
|
181
178
|
async def fetch_historical(self, symbol: str, period: str = "1mo") -> pd.DataFrame:
|
|
182
|
-
"""Fetch historical stock data"""
|
|
183
|
-
pass
|
|
179
|
+
"""Fetch historical stock data."""
|
|
184
180
|
|
|
185
181
|
async def stream_quotes(self, symbols: List[str]) -> AsyncIterator[Dict[str, Any]]:
|
|
186
|
-
"""Stream real-time quotes"""
|
|
187
|
-
pass
|
|
182
|
+
"""Stream real-time quotes."""
|
|
188
183
|
|
|
189
184
|
|
|
190
185
|
class AlphaVantageConnector(StockMarketAPI):
|
|
191
|
-
"""Alpha Vantage API connector"""
|
|
186
|
+
"""Alpha Vantage API connector."""
|
|
192
187
|
|
|
193
188
|
def __init__(self, api_key: str):
|
|
194
189
|
config = APIConfig(
|
|
@@ -199,14 +194,14 @@ class AlphaVantageConnector(StockMarketAPI):
|
|
|
199
194
|
super().__init__(config)
|
|
200
195
|
|
|
201
196
|
async def fetch_quote(self, symbol: str) -> Dict[str, Any]:
|
|
202
|
-
"""Fetch current quote from Alpha Vantage"""
|
|
197
|
+
"""Fetch current quote from Alpha Vantage."""
|
|
203
198
|
params = {"function": "GLOBAL_QUOTE", "symbol": symbol, "apikey": self.config.api_key}
|
|
204
199
|
|
|
205
200
|
data = await self._make_request("", params)
|
|
206
201
|
return self._parse_quote(data.get("Global Quote", {}))
|
|
207
202
|
|
|
208
203
|
async def fetch_historical(self, symbol: str, period: str = "1mo") -> pd.DataFrame:
|
|
209
|
-
"""Fetch historical data from Alpha Vantage"""
|
|
204
|
+
"""Fetch historical data from Alpha Vantage."""
|
|
210
205
|
params = {
|
|
211
206
|
"function": "TIME_SERIES_DAILY",
|
|
212
207
|
"symbol": symbol,
|
|
@@ -226,7 +221,7 @@ class AlphaVantageConnector(StockMarketAPI):
|
|
|
226
221
|
return df.sort_index()
|
|
227
222
|
|
|
228
223
|
def _parse_quote(self, quote_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
229
|
-
"""Parse Alpha Vantage quote"""
|
|
224
|
+
"""Parse Alpha Vantage quote."""
|
|
230
225
|
return {
|
|
231
226
|
"symbol": quote_data.get("01. symbol", ""),
|
|
232
227
|
"price": float(quote_data.get("05. price", 0)),
|
|
@@ -238,14 +233,14 @@ class AlphaVantageConnector(StockMarketAPI):
|
|
|
238
233
|
|
|
239
234
|
|
|
240
235
|
class YahooFinanceConnector(StockMarketAPI):
|
|
241
|
-
"""Yahoo Finance connector using yfinance"""
|
|
236
|
+
"""Yahoo Finance connector using yfinance."""
|
|
242
237
|
|
|
243
238
|
def __init__(self):
|
|
244
239
|
config = APIConfig(rate_limit=2000) # Yahoo Finance is generous
|
|
245
240
|
super().__init__(config)
|
|
246
241
|
|
|
247
242
|
async def fetch_quote(self, symbol: str) -> Dict[str, Any]:
|
|
248
|
-
"""Fetch current quote from Yahoo Finance"""
|
|
243
|
+
"""Fetch current quote from Yahoo Finance."""
|
|
249
244
|
try:
|
|
250
245
|
ticker = yf.Ticker(symbol)
|
|
251
246
|
info = ticker.info
|
|
@@ -263,7 +258,7 @@ class YahooFinanceConnector(StockMarketAPI):
|
|
|
263
258
|
return {}
|
|
264
259
|
|
|
265
260
|
async def fetch_historical(self, symbol: str, period: str = "1mo") -> pd.DataFrame:
|
|
266
|
-
"""Fetch historical data from Yahoo Finance"""
|
|
261
|
+
"""Fetch historical data from Yahoo Finance."""
|
|
267
262
|
try:
|
|
268
263
|
ticker = yf.Ticker(symbol)
|
|
269
264
|
df = ticker.history(period=period)
|
|
@@ -274,14 +269,14 @@ class YahooFinanceConnector(StockMarketAPI):
|
|
|
274
269
|
|
|
275
270
|
|
|
276
271
|
class PolygonIOConnector(StockMarketAPI):
|
|
277
|
-
"""Polygon.io API connector"""
|
|
272
|
+
"""Polygon.io API connector."""
|
|
278
273
|
|
|
279
274
|
def __init__(self, api_key: str):
|
|
280
275
|
config = APIConfig(api_key=api_key, base_url="https://api.polygon.io/", rate_limit=100)
|
|
281
276
|
super().__init__(config)
|
|
282
277
|
|
|
283
278
|
async def fetch_quote(self, symbol: str) -> Dict[str, Any]:
|
|
284
|
-
"""Fetch current quote from Polygon.io"""
|
|
279
|
+
"""Fetch current quote from Polygon.io."""
|
|
285
280
|
endpoint = f"v2/last/nbbo/{symbol}"
|
|
286
281
|
params = {"apiKey": self.config.api_key}
|
|
287
282
|
|
|
@@ -291,7 +286,7 @@ class PolygonIOConnector(StockMarketAPI):
|
|
|
291
286
|
async def fetch_aggregates(
|
|
292
287
|
self, symbol: str, from_date: str, to_date: str, timespan: str = "day"
|
|
293
288
|
) -> pd.DataFrame:
|
|
294
|
-
"""Fetch aggregate bars from Polygon.io"""
|
|
289
|
+
"""Fetch aggregate bars from Polygon.io."""
|
|
295
290
|
endpoint = f"v2/aggs/ticker/{symbol}/range/1/{timespan}/{from_date}/{to_date}"
|
|
296
291
|
params = {"apiKey": self.config.api_key, "adjusted": "true", "sort": "asc"}
|
|
297
292
|
|
|
@@ -308,7 +303,7 @@ class PolygonIOConnector(StockMarketAPI):
|
|
|
308
303
|
return df.set_index("timestamp")
|
|
309
304
|
|
|
310
305
|
def _parse_polygon_quote(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
311
|
-
"""Parse Polygon.io quote"""
|
|
306
|
+
"""Parse Polygon.io quote."""
|
|
312
307
|
results = data.get("results", {})
|
|
313
308
|
return {
|
|
314
309
|
"symbol": results.get("T", ""),
|
|
@@ -319,7 +314,7 @@ class PolygonIOConnector(StockMarketAPI):
|
|
|
319
314
|
|
|
320
315
|
|
|
321
316
|
class QuiverQuantConnector(BaseAPIConnector):
|
|
322
|
-
"""QuiverQuant API for congressional trading data"""
|
|
317
|
+
"""QuiverQuant API for congressional trading data."""
|
|
323
318
|
|
|
324
319
|
def __init__(self, api_key: str):
|
|
325
320
|
config = APIConfig(
|
|
@@ -328,7 +323,7 @@ class QuiverQuantConnector(BaseAPIConnector):
|
|
|
328
323
|
super().__init__(config)
|
|
329
324
|
|
|
330
325
|
async def fetch_congress_trades(self) -> List[Dict[str, Any]]:
|
|
331
|
-
"""Fetch congressional trading data"""
|
|
326
|
+
"""Fetch congressional trading data."""
|
|
332
327
|
headers = {"Authorization": f"Bearer {self.config.api_key}", "Accept": "application/json"}
|
|
333
328
|
|
|
334
329
|
try:
|
|
@@ -346,7 +341,7 @@ class QuiverQuantConnector(BaseAPIConnector):
|
|
|
346
341
|
return []
|
|
347
342
|
|
|
348
343
|
async def fetch_lobbying(self, ticker: str) -> List[Dict[str, Any]]:
|
|
349
|
-
"""Fetch lobbying data for a ticker"""
|
|
344
|
+
"""Fetch lobbying data for a ticker."""
|
|
350
345
|
headers = {"Authorization": f"Bearer {self.config.api_key}", "Accept": "application/json"}
|
|
351
346
|
|
|
352
347
|
try:
|
|
@@ -365,7 +360,7 @@ class QuiverQuantConnector(BaseAPIConnector):
|
|
|
365
360
|
|
|
366
361
|
|
|
367
362
|
class WebSocketDataStream:
|
|
368
|
-
"""WebSocket stream for real-time data"""
|
|
363
|
+
"""WebSocket stream for real-time data."""
|
|
369
364
|
|
|
370
365
|
def __init__(self, url: str, api_key: Optional[str] = None):
|
|
371
366
|
self.url = url
|
|
@@ -374,11 +369,11 @@ class WebSocketDataStream:
|
|
|
374
369
|
self.handlers = []
|
|
375
370
|
|
|
376
371
|
def add_handler(self, handler: Callable):
|
|
377
|
-
"""Add message handler"""
|
|
372
|
+
"""Add message handler."""
|
|
378
373
|
self.handlers.append(handler)
|
|
379
374
|
|
|
380
375
|
async def connect(self):
|
|
381
|
-
"""Connect to WebSocket"""
|
|
376
|
+
"""Connect to WebSocket."""
|
|
382
377
|
headers = {}
|
|
383
378
|
if self.api_key:
|
|
384
379
|
headers["Authorization"] = f"Bearer {self.api_key}"
|
|
@@ -387,7 +382,7 @@ class WebSocketDataStream:
|
|
|
387
382
|
logger.info(f"Connected to WebSocket: {self.url}")
|
|
388
383
|
|
|
389
384
|
async def subscribe(self, symbols: List[str]):
|
|
390
|
-
"""Subscribe to symbols"""
|
|
385
|
+
"""Subscribe to symbols."""
|
|
391
386
|
if not self.websocket:
|
|
392
387
|
await self.connect()
|
|
393
388
|
|
|
@@ -395,7 +390,7 @@ class WebSocketDataStream:
|
|
|
395
390
|
await self.websocket.send(json.dumps(message))
|
|
396
391
|
|
|
397
392
|
async def stream(self):
|
|
398
|
-
"""Stream messages"""
|
|
393
|
+
"""Stream messages."""
|
|
399
394
|
if not self.websocket:
|
|
400
395
|
await self.connect()
|
|
401
396
|
|
|
@@ -413,13 +408,13 @@ class WebSocketDataStream:
|
|
|
413
408
|
logger.error(f"Handler error: {e}")
|
|
414
409
|
|
|
415
410
|
async def close(self):
|
|
416
|
-
"""Close WebSocket connection"""
|
|
411
|
+
"""Close WebSocket connection."""
|
|
417
412
|
if self.websocket:
|
|
418
413
|
await self.websocket.close()
|
|
419
414
|
|
|
420
415
|
|
|
421
416
|
class DataAggregator:
|
|
422
|
-
"""Aggregate data from multiple sources"""
|
|
417
|
+
"""Aggregate data from multiple sources."""
|
|
423
418
|
|
|
424
419
|
def __init__(self):
|
|
425
420
|
self.sources = {}
|
|
@@ -427,12 +422,12 @@ class DataAggregator:
|
|
|
427
422
|
self.cache_ttl = 300 # 5 minutes
|
|
428
423
|
|
|
429
424
|
def add_source(self, name: str, connector: BaseAPIConnector):
|
|
430
|
-
"""Add data source"""
|
|
425
|
+
"""Add data source."""
|
|
431
426
|
self.sources[name] = connector
|
|
432
427
|
logger.info(f"Added data source: {name}")
|
|
433
428
|
|
|
434
429
|
async def fetch_all(self, symbol: str) -> Dict[str, Any]:
|
|
435
|
-
"""Fetch data from all sources"""
|
|
430
|
+
"""Fetch data from all sources."""
|
|
436
431
|
results = {}
|
|
437
432
|
|
|
438
433
|
# Check cache
|
|
@@ -466,6 +461,6 @@ class DataAggregator:
|
|
|
466
461
|
return results
|
|
467
462
|
|
|
468
463
|
async def _fetch_with_name(self, name: str, coro):
|
|
469
|
-
"""Helper to fetch with source name"""
|
|
464
|
+
"""Helper to fetch with source name."""
|
|
470
465
|
result = await coro
|
|
471
466
|
return name, result
|
|
@@ -1,26 +1,16 @@
|
|
|
1
|
-
"""Complete data ingestion pipeline with validation and transformation"""
|
|
1
|
+
"""Complete data ingestion pipeline with validation and transformation."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from datetime import datetime, timedelta
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime
|
|
9
8
|
from pathlib import Path
|
|
10
|
-
from typing import Any,
|
|
9
|
+
from typing import Any, Dict, List, Optional, Union
|
|
11
10
|
|
|
12
|
-
import numpy as np
|
|
13
11
|
import pandas as pd
|
|
14
12
|
|
|
15
|
-
from .api_connectors import
|
|
16
|
-
AlphaVantageConnector,
|
|
17
|
-
CongressionalDataAPI,
|
|
18
|
-
DataAggregator,
|
|
19
|
-
PolygonIOConnector,
|
|
20
|
-
QuiverQuantConnector,
|
|
21
|
-
YahooFinanceConnector,
|
|
22
|
-
)
|
|
23
|
-
from .stream_processor import DataAggregator as StreamAggregator
|
|
13
|
+
from .api_connectors import CongressionalDataAPI, YahooFinanceConnector
|
|
24
14
|
from .stream_processor import StreamConfig, StreamProcessor
|
|
25
15
|
|
|
26
16
|
logger = logging.getLogger(__name__)
|
|
@@ -28,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|
|
28
18
|
|
|
29
19
|
@dataclass
|
|
30
20
|
class PipelineConfig:
|
|
31
|
-
"""Data pipeline configuration"""
|
|
21
|
+
"""Data pipeline configuration."""
|
|
32
22
|
|
|
33
23
|
data_dir: Path = Path("data")
|
|
34
24
|
batch_size: int = 1000
|
|
@@ -42,7 +32,7 @@ class PipelineConfig:
|
|
|
42
32
|
|
|
43
33
|
|
|
44
34
|
class DataValidator:
|
|
45
|
-
"""Validate incoming data"""
|
|
35
|
+
"""Validate incoming data."""
|
|
46
36
|
|
|
47
37
|
def __init__(self):
|
|
48
38
|
self.validation_rules = {
|
|
@@ -53,7 +43,7 @@ class DataValidator:
|
|
|
53
43
|
self.validation_stats = {"total": 0, "valid": 0, "invalid": 0, "errors": []}
|
|
54
44
|
|
|
55
45
|
def validate(self, data: Dict[str, Any], data_type: str) -> bool:
|
|
56
|
-
"""Validate data based on type"""
|
|
46
|
+
"""Validate data based on type."""
|
|
57
47
|
self.validation_stats["total"] += 1
|
|
58
48
|
|
|
59
49
|
if data_type not in self.validation_rules:
|
|
@@ -74,7 +64,7 @@ class DataValidator:
|
|
|
74
64
|
return False
|
|
75
65
|
|
|
76
66
|
def _validate_politician_trade(self, data: Dict[str, Any]) -> bool:
|
|
77
|
-
"""Validate politician trading data"""
|
|
67
|
+
"""Validate politician trading data."""
|
|
78
68
|
required_fields = ["politician", "ticker", "transaction_type", "amount", "transaction_date"]
|
|
79
69
|
|
|
80
70
|
# Check required fields
|
|
@@ -97,14 +87,14 @@ class DataValidator:
|
|
|
97
87
|
try:
|
|
98
88
|
if isinstance(data["transaction_date"], str):
|
|
99
89
|
datetime.fromisoformat(data["transaction_date"])
|
|
100
|
-
except:
|
|
90
|
+
except Exception:
|
|
101
91
|
logger.warning(f"Invalid date format: {data['transaction_date']}")
|
|
102
92
|
return False
|
|
103
93
|
|
|
104
94
|
return True
|
|
105
95
|
|
|
106
96
|
def _validate_stock_quote(self, data: Dict[str, Any]) -> bool:
|
|
107
|
-
"""Validate stock quote data"""
|
|
97
|
+
"""Validate stock quote data."""
|
|
108
98
|
required_fields = ["symbol", "price", "timestamp"]
|
|
109
99
|
|
|
110
100
|
for field in required_fields:
|
|
@@ -118,7 +108,7 @@ class DataValidator:
|
|
|
118
108
|
return True
|
|
119
109
|
|
|
120
110
|
def _validate_market_data(self, data: Dict[str, Any]) -> bool:
|
|
121
|
-
"""Validate market data"""
|
|
111
|
+
"""Validate market data."""
|
|
122
112
|
required_fields = ["symbol", "close", "volume", "date"]
|
|
123
113
|
|
|
124
114
|
for field in required_fields:
|
|
@@ -127,7 +117,7 @@ class DataValidator:
|
|
|
127
117
|
|
|
128
118
|
# Validate prices
|
|
129
119
|
for price_field in ["close", "open", "high", "low"]:
|
|
130
|
-
if price_field in data:
|
|
120
|
+
if price_field in data: # noqa: SIM102
|
|
131
121
|
if not isinstance(data[price_field], (int, float)) or data[price_field] <= 0:
|
|
132
122
|
return False
|
|
133
123
|
|
|
@@ -138,12 +128,12 @@ class DataValidator:
|
|
|
138
128
|
return True
|
|
139
129
|
|
|
140
130
|
def get_stats(self) -> Dict[str, Any]:
|
|
141
|
-
"""Get validation statistics"""
|
|
131
|
+
"""Get validation statistics."""
|
|
142
132
|
return self.validation_stats.copy()
|
|
143
133
|
|
|
144
134
|
|
|
145
135
|
class DataTransformer:
|
|
146
|
-
"""Transform and normalize data"""
|
|
136
|
+
"""Transform and normalize data."""
|
|
147
137
|
|
|
148
138
|
def __init__(self):
|
|
149
139
|
self.transformers = {
|
|
@@ -155,7 +145,7 @@ class DataTransformer:
|
|
|
155
145
|
def transform(
|
|
156
146
|
self, data: Union[Dict[str, Any], List[Dict[str, Any]]], data_type: str
|
|
157
147
|
) -> Union[Dict[str, Any], pd.DataFrame]:
|
|
158
|
-
"""Transform data based on type"""
|
|
148
|
+
"""Transform data based on type."""
|
|
159
149
|
if data_type not in self.transformers:
|
|
160
150
|
return data
|
|
161
151
|
|
|
@@ -166,7 +156,7 @@ class DataTransformer:
|
|
|
166
156
|
return self.transformers[data_type](data)
|
|
167
157
|
|
|
168
158
|
def _transform_politician_trade(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
169
|
-
"""Transform politician trading data"""
|
|
159
|
+
"""Transform politician trading data."""
|
|
170
160
|
transformed = data.copy()
|
|
171
161
|
|
|
172
162
|
# Standardize politician name
|
|
@@ -198,7 +188,7 @@ class DataTransformer:
|
|
|
198
188
|
return transformed
|
|
199
189
|
|
|
200
190
|
def _transform_stock_quote(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
201
|
-
"""Transform stock quote data"""
|
|
191
|
+
"""Transform stock quote data."""
|
|
202
192
|
transformed = data.copy()
|
|
203
193
|
|
|
204
194
|
# Normalize symbol
|
|
@@ -219,7 +209,7 @@ class DataTransformer:
|
|
|
219
209
|
return transformed
|
|
220
210
|
|
|
221
211
|
def _transform_market_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
222
|
-
"""Transform market data"""
|
|
212
|
+
"""Transform market data."""
|
|
223
213
|
transformed = data.copy()
|
|
224
214
|
|
|
225
215
|
# Normalize symbol
|
|
@@ -242,7 +232,7 @@ class DataTransformer:
|
|
|
242
232
|
return transformed
|
|
243
233
|
|
|
244
234
|
def _normalize_name(self, name: str) -> str:
|
|
245
|
-
"""Normalize politician name"""
|
|
235
|
+
"""Normalize politician name."""
|
|
246
236
|
# Remove titles
|
|
247
237
|
titles = ["Sen.", "Senator", "Rep.", "Representative", "Hon.", "Dr.", "Mr.", "Mrs.", "Ms."]
|
|
248
238
|
normalized = name
|
|
@@ -256,7 +246,7 @@ class DataTransformer:
|
|
|
256
246
|
return normalized
|
|
257
247
|
|
|
258
248
|
def _categorize_amount(self, amount: float) -> str:
|
|
259
|
-
"""Categorize transaction amount"""
|
|
249
|
+
"""Categorize transaction amount."""
|
|
260
250
|
if amount < 1000:
|
|
261
251
|
return "micro"
|
|
262
252
|
elif amount < 15000:
|
|
@@ -272,7 +262,7 @@ class DataTransformer:
|
|
|
272
262
|
|
|
273
263
|
|
|
274
264
|
class DataLoader:
|
|
275
|
-
"""Load data to storage"""
|
|
265
|
+
"""Load data to storage."""
|
|
276
266
|
|
|
277
267
|
def __init__(self, data_dir: Path):
|
|
278
268
|
self.data_dir = data_dir
|
|
@@ -281,7 +271,7 @@ class DataLoader:
|
|
|
281
271
|
async def save_batch(
|
|
282
272
|
self, data: pd.DataFrame, data_type: str, timestamp: Optional[datetime] = None
|
|
283
273
|
):
|
|
284
|
-
"""Save batch of data"""
|
|
274
|
+
"""Save batch of data."""
|
|
285
275
|
if timestamp is None:
|
|
286
276
|
timestamp = datetime.now()
|
|
287
277
|
|
|
@@ -302,7 +292,7 @@ class DataLoader:
|
|
|
302
292
|
async def save_json(
|
|
303
293
|
self, data: Union[Dict, List], data_type: str, timestamp: Optional[datetime] = None
|
|
304
294
|
):
|
|
305
|
-
"""Save data as JSON"""
|
|
295
|
+
"""Save data as JSON."""
|
|
306
296
|
if timestamp is None:
|
|
307
297
|
timestamp = datetime.now()
|
|
308
298
|
|
|
@@ -322,7 +312,7 @@ class DataLoader:
|
|
|
322
312
|
return filepath
|
|
323
313
|
|
|
324
314
|
def load_latest(self, data_type: str, n_files: int = 1) -> pd.DataFrame:
|
|
325
|
-
"""Load latest data files"""
|
|
315
|
+
"""Load latest data files."""
|
|
326
316
|
type_dir = self.data_dir / data_type
|
|
327
317
|
|
|
328
318
|
if not type_dir.exists():
|
|
@@ -344,7 +334,7 @@ class DataLoader:
|
|
|
344
334
|
|
|
345
335
|
|
|
346
336
|
class IngestionPipeline:
|
|
347
|
-
"""Complete data ingestion pipeline"""
|
|
337
|
+
"""Complete data ingestion pipeline."""
|
|
348
338
|
|
|
349
339
|
def __init__(self, config: PipelineConfig):
|
|
350
340
|
self.config = config
|
|
@@ -368,12 +358,12 @@ class IngestionPipeline:
|
|
|
368
358
|
}
|
|
369
359
|
|
|
370
360
|
def add_source(self, name: str, connector):
|
|
371
|
-
"""Add data source"""
|
|
361
|
+
"""Add data source."""
|
|
372
362
|
self.sources[name] = connector
|
|
373
363
|
logger.info(f"Added data source: {name}")
|
|
374
364
|
|
|
375
365
|
async def initialize_sources(self):
|
|
376
|
-
"""Initialize all data sources"""
|
|
366
|
+
"""Initialize all data sources."""
|
|
377
367
|
# Congressional data
|
|
378
368
|
congress_api = CongressionalDataAPI()
|
|
379
369
|
self.add_source("congress", congress_api)
|
|
@@ -386,7 +376,7 @@ class IngestionPipeline:
|
|
|
386
376
|
logger.info(f"Initialized {len(self.sources)} data sources")
|
|
387
377
|
|
|
388
378
|
async def process_batch(self, data: List[Dict[str, Any]], data_type: str) -> pd.DataFrame:
|
|
389
|
-
"""Process batch of data through pipeline"""
|
|
379
|
+
"""Process batch of data through pipeline."""
|
|
390
380
|
processed_data = []
|
|
391
381
|
|
|
392
382
|
for record in data:
|
|
@@ -418,7 +408,7 @@ class IngestionPipeline:
|
|
|
418
408
|
return pd.DataFrame()
|
|
419
409
|
|
|
420
410
|
async def fetch_politician_trades(self, days: int = 30) -> pd.DataFrame:
|
|
421
|
-
"""Fetch recent politician trades"""
|
|
411
|
+
"""Fetch recent politician trades."""
|
|
422
412
|
congress_api = self.sources.get("congress")
|
|
423
413
|
if not congress_api:
|
|
424
414
|
logger.error("Congressional data source not available")
|
|
@@ -436,7 +426,7 @@ class IngestionPipeline:
|
|
|
436
426
|
async def fetch_stock_data(
|
|
437
427
|
self, tickers: List[str], period: str = "1mo"
|
|
438
428
|
) -> Dict[str, pd.DataFrame]:
|
|
439
|
-
"""Fetch stock data for multiple tickers"""
|
|
429
|
+
"""Fetch stock data for multiple tickers."""
|
|
440
430
|
stock_data = {}
|
|
441
431
|
|
|
442
432
|
for ticker in tickers:
|
|
@@ -459,7 +449,7 @@ class IngestionPipeline:
|
|
|
459
449
|
return stock_data
|
|
460
450
|
|
|
461
451
|
async def start_streaming(self):
|
|
462
|
-
"""Start real-time streaming"""
|
|
452
|
+
"""Start real-time streaming."""
|
|
463
453
|
if not self.config.enable_streaming:
|
|
464
454
|
logger.info("Streaming disabled")
|
|
465
455
|
return
|
|
@@ -481,12 +471,12 @@ class IngestionPipeline:
|
|
|
481
471
|
await self.stream_processor.start()
|
|
482
472
|
|
|
483
473
|
async def stop_streaming(self):
|
|
484
|
-
"""Stop streaming"""
|
|
474
|
+
"""Stop streaming."""
|
|
485
475
|
if self.stream_processor:
|
|
486
476
|
await self.stream_processor.stop()
|
|
487
477
|
|
|
488
478
|
async def run(self, mode: str = "batch"):
|
|
489
|
-
"""Run ingestion pipeline"""
|
|
479
|
+
"""Run ingestion pipeline."""
|
|
490
480
|
self.metrics["start_time"] = datetime.now()
|
|
491
481
|
|
|
492
482
|
try:
|
|
@@ -513,7 +503,7 @@ class IngestionPipeline:
|
|
|
513
503
|
self.metrics["last_update"] = datetime.now()
|
|
514
504
|
|
|
515
505
|
async def run_batch(self):
|
|
516
|
-
"""Run batch processing"""
|
|
506
|
+
"""Run batch processing."""
|
|
517
507
|
logger.info("Starting batch processing...")
|
|
518
508
|
|
|
519
509
|
# Fetch politician trades
|
|
@@ -529,7 +519,7 @@ class IngestionPipeline:
|
|
|
529
519
|
logger.info(f"Processed {len(trades_df)} trades and {len(stock_data)} stocks")
|
|
530
520
|
|
|
531
521
|
def get_metrics(self) -> Dict[str, Any]:
|
|
532
|
-
"""Get pipeline metrics"""
|
|
522
|
+
"""Get pipeline metrics."""
|
|
533
523
|
metrics = self.metrics.copy()
|
|
534
524
|
|
|
535
525
|
# Calculate throughput
|