mcli-framework 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (186) hide show
  1. mcli/app/chat_cmd.py +42 -0
  2. mcli/app/commands_cmd.py +226 -0
  3. mcli/app/completion_cmd.py +216 -0
  4. mcli/app/completion_helpers.py +288 -0
  5. mcli/app/cron_test_cmd.py +697 -0
  6. mcli/app/logs_cmd.py +419 -0
  7. mcli/app/main.py +492 -0
  8. mcli/app/model/model.py +1060 -0
  9. mcli/app/model_cmd.py +227 -0
  10. mcli/app/redis_cmd.py +269 -0
  11. mcli/app/video/video.py +1114 -0
  12. mcli/app/visual_cmd.py +303 -0
  13. mcli/chat/chat.py +2409 -0
  14. mcli/chat/command_rag.py +514 -0
  15. mcli/chat/enhanced_chat.py +652 -0
  16. mcli/chat/system_controller.py +1010 -0
  17. mcli/chat/system_integration.py +1016 -0
  18. mcli/cli.py +25 -0
  19. mcli/config.toml +20 -0
  20. mcli/lib/api/api.py +586 -0
  21. mcli/lib/api/daemon_client.py +203 -0
  22. mcli/lib/api/daemon_client_local.py +44 -0
  23. mcli/lib/api/daemon_decorator.py +217 -0
  24. mcli/lib/api/mcli_decorators.py +1032 -0
  25. mcli/lib/auth/auth.py +85 -0
  26. mcli/lib/auth/aws_manager.py +85 -0
  27. mcli/lib/auth/azure_manager.py +91 -0
  28. mcli/lib/auth/credential_manager.py +192 -0
  29. mcli/lib/auth/gcp_manager.py +93 -0
  30. mcli/lib/auth/key_manager.py +117 -0
  31. mcli/lib/auth/mcli_manager.py +93 -0
  32. mcli/lib/auth/token_manager.py +75 -0
  33. mcli/lib/auth/token_util.py +1011 -0
  34. mcli/lib/config/config.py +47 -0
  35. mcli/lib/discovery/__init__.py +1 -0
  36. mcli/lib/discovery/command_discovery.py +274 -0
  37. mcli/lib/erd/erd.py +1345 -0
  38. mcli/lib/erd/generate_graph.py +453 -0
  39. mcli/lib/files/files.py +76 -0
  40. mcli/lib/fs/fs.py +109 -0
  41. mcli/lib/lib.py +29 -0
  42. mcli/lib/logger/logger.py +611 -0
  43. mcli/lib/performance/optimizer.py +409 -0
  44. mcli/lib/performance/rust_bridge.py +502 -0
  45. mcli/lib/performance/uvloop_config.py +154 -0
  46. mcli/lib/pickles/pickles.py +50 -0
  47. mcli/lib/search/cached_vectorizer.py +479 -0
  48. mcli/lib/services/data_pipeline.py +460 -0
  49. mcli/lib/services/lsh_client.py +441 -0
  50. mcli/lib/services/redis_service.py +387 -0
  51. mcli/lib/shell/shell.py +137 -0
  52. mcli/lib/toml/toml.py +33 -0
  53. mcli/lib/ui/styling.py +47 -0
  54. mcli/lib/ui/visual_effects.py +634 -0
  55. mcli/lib/watcher/watcher.py +185 -0
  56. mcli/ml/api/app.py +215 -0
  57. mcli/ml/api/middleware.py +224 -0
  58. mcli/ml/api/routers/admin_router.py +12 -0
  59. mcli/ml/api/routers/auth_router.py +244 -0
  60. mcli/ml/api/routers/backtest_router.py +12 -0
  61. mcli/ml/api/routers/data_router.py +12 -0
  62. mcli/ml/api/routers/model_router.py +302 -0
  63. mcli/ml/api/routers/monitoring_router.py +12 -0
  64. mcli/ml/api/routers/portfolio_router.py +12 -0
  65. mcli/ml/api/routers/prediction_router.py +267 -0
  66. mcli/ml/api/routers/trade_router.py +12 -0
  67. mcli/ml/api/routers/websocket_router.py +76 -0
  68. mcli/ml/api/schemas.py +64 -0
  69. mcli/ml/auth/auth_manager.py +425 -0
  70. mcli/ml/auth/models.py +154 -0
  71. mcli/ml/auth/permissions.py +302 -0
  72. mcli/ml/backtesting/backtest_engine.py +502 -0
  73. mcli/ml/backtesting/performance_metrics.py +393 -0
  74. mcli/ml/cache.py +400 -0
  75. mcli/ml/cli/main.py +398 -0
  76. mcli/ml/config/settings.py +394 -0
  77. mcli/ml/configs/dvc_config.py +230 -0
  78. mcli/ml/configs/mlflow_config.py +131 -0
  79. mcli/ml/configs/mlops_manager.py +293 -0
  80. mcli/ml/dashboard/app.py +532 -0
  81. mcli/ml/dashboard/app_integrated.py +738 -0
  82. mcli/ml/dashboard/app_supabase.py +560 -0
  83. mcli/ml/dashboard/app_training.py +615 -0
  84. mcli/ml/dashboard/cli.py +51 -0
  85. mcli/ml/data_ingestion/api_connectors.py +501 -0
  86. mcli/ml/data_ingestion/data_pipeline.py +567 -0
  87. mcli/ml/data_ingestion/stream_processor.py +512 -0
  88. mcli/ml/database/migrations/env.py +94 -0
  89. mcli/ml/database/models.py +667 -0
  90. mcli/ml/database/session.py +200 -0
  91. mcli/ml/experimentation/ab_testing.py +845 -0
  92. mcli/ml/features/ensemble_features.py +607 -0
  93. mcli/ml/features/political_features.py +676 -0
  94. mcli/ml/features/recommendation_engine.py +809 -0
  95. mcli/ml/features/stock_features.py +573 -0
  96. mcli/ml/features/test_feature_engineering.py +346 -0
  97. mcli/ml/logging.py +85 -0
  98. mcli/ml/mlops/data_versioning.py +518 -0
  99. mcli/ml/mlops/experiment_tracker.py +377 -0
  100. mcli/ml/mlops/model_serving.py +481 -0
  101. mcli/ml/mlops/pipeline_orchestrator.py +614 -0
  102. mcli/ml/models/base_models.py +324 -0
  103. mcli/ml/models/ensemble_models.py +675 -0
  104. mcli/ml/models/recommendation_models.py +474 -0
  105. mcli/ml/models/test_models.py +487 -0
  106. mcli/ml/monitoring/drift_detection.py +676 -0
  107. mcli/ml/monitoring/metrics.py +45 -0
  108. mcli/ml/optimization/portfolio_optimizer.py +834 -0
  109. mcli/ml/preprocessing/data_cleaners.py +451 -0
  110. mcli/ml/preprocessing/feature_extractors.py +491 -0
  111. mcli/ml/preprocessing/ml_pipeline.py +382 -0
  112. mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
  113. mcli/ml/preprocessing/test_preprocessing.py +294 -0
  114. mcli/ml/scripts/populate_sample_data.py +200 -0
  115. mcli/ml/tasks.py +400 -0
  116. mcli/ml/tests/test_integration.py +429 -0
  117. mcli/ml/tests/test_training_dashboard.py +387 -0
  118. mcli/public/oi/oi.py +15 -0
  119. mcli/public/public.py +4 -0
  120. mcli/self/self_cmd.py +1246 -0
  121. mcli/workflow/daemon/api_daemon.py +800 -0
  122. mcli/workflow/daemon/async_command_database.py +681 -0
  123. mcli/workflow/daemon/async_process_manager.py +591 -0
  124. mcli/workflow/daemon/client.py +530 -0
  125. mcli/workflow/daemon/commands.py +1196 -0
  126. mcli/workflow/daemon/daemon.py +905 -0
  127. mcli/workflow/daemon/daemon_api.py +59 -0
  128. mcli/workflow/daemon/enhanced_daemon.py +571 -0
  129. mcli/workflow/daemon/process_cli.py +244 -0
  130. mcli/workflow/daemon/process_manager.py +439 -0
  131. mcli/workflow/daemon/test_daemon.py +275 -0
  132. mcli/workflow/dashboard/dashboard_cmd.py +113 -0
  133. mcli/workflow/docker/docker.py +0 -0
  134. mcli/workflow/file/file.py +100 -0
  135. mcli/workflow/gcloud/config.toml +21 -0
  136. mcli/workflow/gcloud/gcloud.py +58 -0
  137. mcli/workflow/git_commit/ai_service.py +328 -0
  138. mcli/workflow/git_commit/commands.py +430 -0
  139. mcli/workflow/lsh_integration.py +355 -0
  140. mcli/workflow/model_service/client.py +594 -0
  141. mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
  142. mcli/workflow/model_service/lightweight_embedder.py +397 -0
  143. mcli/workflow/model_service/lightweight_model_server.py +714 -0
  144. mcli/workflow/model_service/lightweight_test.py +241 -0
  145. mcli/workflow/model_service/model_service.py +1955 -0
  146. mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
  147. mcli/workflow/model_service/pdf_processor.py +386 -0
  148. mcli/workflow/model_service/test_efficient_runner.py +234 -0
  149. mcli/workflow/model_service/test_example.py +315 -0
  150. mcli/workflow/model_service/test_integration.py +131 -0
  151. mcli/workflow/model_service/test_new_features.py +149 -0
  152. mcli/workflow/openai/openai.py +99 -0
  153. mcli/workflow/politician_trading/commands.py +1790 -0
  154. mcli/workflow/politician_trading/config.py +134 -0
  155. mcli/workflow/politician_trading/connectivity.py +490 -0
  156. mcli/workflow/politician_trading/data_sources.py +395 -0
  157. mcli/workflow/politician_trading/database.py +410 -0
  158. mcli/workflow/politician_trading/demo.py +248 -0
  159. mcli/workflow/politician_trading/models.py +165 -0
  160. mcli/workflow/politician_trading/monitoring.py +413 -0
  161. mcli/workflow/politician_trading/scrapers.py +966 -0
  162. mcli/workflow/politician_trading/scrapers_california.py +412 -0
  163. mcli/workflow/politician_trading/scrapers_eu.py +377 -0
  164. mcli/workflow/politician_trading/scrapers_uk.py +350 -0
  165. mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
  166. mcli/workflow/politician_trading/supabase_functions.py +354 -0
  167. mcli/workflow/politician_trading/workflow.py +852 -0
  168. mcli/workflow/registry/registry.py +180 -0
  169. mcli/workflow/repo/repo.py +223 -0
  170. mcli/workflow/scheduler/commands.py +493 -0
  171. mcli/workflow/scheduler/cron_parser.py +238 -0
  172. mcli/workflow/scheduler/job.py +182 -0
  173. mcli/workflow/scheduler/monitor.py +139 -0
  174. mcli/workflow/scheduler/persistence.py +324 -0
  175. mcli/workflow/scheduler/scheduler.py +679 -0
  176. mcli/workflow/sync/sync_cmd.py +437 -0
  177. mcli/workflow/sync/test_cmd.py +314 -0
  178. mcli/workflow/videos/videos.py +242 -0
  179. mcli/workflow/wakatime/wakatime.py +11 -0
  180. mcli/workflow/workflow.py +37 -0
  181. mcli_framework-7.0.0.dist-info/METADATA +479 -0
  182. mcli_framework-7.0.0.dist-info/RECORD +186 -0
  183. mcli_framework-7.0.0.dist-info/WHEEL +5 -0
  184. mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
  185. mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
  186. mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,460 @@
1
+ """
2
+ Data Pipeline Service for mcli-LSH Integration
3
+ Handles ETL processes for data received from LSH daemon
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import time
9
+ from typing import Any, Dict, List, Optional, Callable
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+
13
+ from mcli.lib.logger.logger import get_logger
14
+ from .lsh_client import LSHClient, LSHEventProcessor
15
+
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ class DataPipelineConfig:
20
+ """Configuration for data pipeline"""
21
+
22
+ def __init__(self):
23
+ self.batch_size = 100
24
+ self.batch_timeout = 30 # seconds
25
+ self.retry_attempts = 3
26
+ self.retry_delay = 5 # seconds
27
+ self.output_dir = Path("./data/processed")
28
+ self.enable_validation = True
29
+ self.enable_enrichment = True
30
+
31
+
32
+ class DataValidator:
33
+ """Validates incoming data"""
34
+
35
+ def __init__(self):
36
+ self.logger = get_logger(f"{__name__}.validator")
37
+
38
+ async def validate_trading_record(self, record: Dict[str, Any]) -> bool:
39
+ """Validate politician trading record"""
40
+ required_fields = [
41
+ "politician_name",
42
+ "transaction_date",
43
+ "transaction_type",
44
+ "asset_name",
45
+ ]
46
+
47
+ for field in required_fields:
48
+ if field not in record:
49
+ self.logger.warning(f"Missing required field: {field}")
50
+ return False
51
+
52
+ # Validate transaction date
53
+ if "transaction_date" in record:
54
+ try:
55
+ datetime.fromisoformat(record["transaction_date"])
56
+ except ValueError:
57
+ self.logger.warning(f"Invalid transaction date: {record['transaction_date']}")
58
+ return False
59
+
60
+ # Validate amount if present
61
+ if "transaction_amount" in record:
62
+ try:
63
+ float(record["transaction_amount"])
64
+ except (ValueError, TypeError):
65
+ self.logger.warning(f"Invalid transaction amount: {record['transaction_amount']}")
66
+ return False
67
+
68
+ return True
69
+
70
+ async def validate_supabase_record(self, table: str, record: Dict[str, Any]) -> bool:
71
+ """Validate Supabase record based on table schema"""
72
+ if not record:
73
+ return False
74
+
75
+ # Basic validation - can be extended with schema validation
76
+ if "id" in record and not record["id"]:
77
+ self.logger.warning("Record missing ID")
78
+ return False
79
+
80
+ return True
81
+
82
+
83
+ class DataEnricher:
84
+ """Enriches data with additional information"""
85
+
86
+ def __init__(self):
87
+ self.logger = get_logger(f"{__name__}.enricher")
88
+
89
+ async def enrich_trading_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
90
+ """Enrich trading record with additional data"""
91
+ enriched = record.copy()
92
+
93
+ # Add processing timestamp
94
+ enriched["processed_at"] = datetime.now(timezone.utc).isoformat()
95
+
96
+ # Add amount categorization
97
+ if "transaction_amount" in record:
98
+ amount = float(record["transaction_amount"])
99
+ enriched["amount_category"] = self._categorize_amount(amount)
100
+ enriched["amount_bucket"] = self._bucket_amount(amount)
101
+
102
+ # Add politician party enrichment (placeholder)
103
+ if "politician_name" in record:
104
+ enriched["politician_metadata"] = await self._get_politician_metadata(
105
+ record["politician_name"]
106
+ )
107
+
108
+ # Add market context (placeholder)
109
+ if "asset_name" in record and "transaction_date" in record:
110
+ enriched["market_context"] = await self._get_market_context(
111
+ record["asset_name"], record["transaction_date"]
112
+ )
113
+
114
+ return enriched
115
+
116
+ def _categorize_amount(self, amount: float) -> str:
117
+ """Categorize transaction amount"""
118
+ if amount < 1000:
119
+ return "micro"
120
+ elif amount < 15000:
121
+ return "small"
122
+ elif amount < 50000:
123
+ return "medium"
124
+ elif amount < 500000:
125
+ return "large"
126
+ else:
127
+ return "mega"
128
+
129
+ def _bucket_amount(self, amount: float) -> str:
130
+ """Bucket amounts for analysis"""
131
+ if amount < 1000:
132
+ return "0-1K"
133
+ elif amount < 10000:
134
+ return "1K-10K"
135
+ elif amount < 50000:
136
+ return "10K-50K"
137
+ elif amount < 100000:
138
+ return "50K-100K"
139
+ elif amount < 500000:
140
+ return "100K-500K"
141
+ elif amount < 1000000:
142
+ return "500K-1M"
143
+ else:
144
+ return "1M+"
145
+
146
+ async def _get_politician_metadata(self, politician_name: str) -> Dict[str, Any]:
147
+ """Get politician metadata (placeholder for external API)"""
148
+ # This would typically call an external API
149
+ return {
150
+ "enriched_at": datetime.now(timezone.utc).isoformat(),
151
+ "source": "mcli_enricher",
152
+ "name_normalized": politician_name.title(),
153
+ }
154
+
155
+ async def _get_market_context(self, asset_name: str, transaction_date: str) -> Dict[str, Any]:
156
+ """Get market context for the transaction (placeholder)"""
157
+ # This would typically call financial APIs
158
+ return {
159
+ "enriched_at": datetime.now(timezone.utc).isoformat(),
160
+ "asset_normalized": asset_name.upper(),
161
+ "transaction_date": transaction_date,
162
+ }
163
+
164
+
165
+ class DataProcessor:
166
+ """Main data processing engine"""
167
+
168
+ def __init__(self, config: DataPipelineConfig):
169
+ self.config = config
170
+ self.logger = get_logger(f"{__name__}.processor")
171
+ self.validator = DataValidator()
172
+ self.enricher = DataEnricher()
173
+ self.batch_buffer: List[Dict[str, Any]] = []
174
+ self.last_batch_time = time.time()
175
+ self._processing_lock = asyncio.Lock()
176
+
177
+ # Ensure output directory exists
178
+ self.config.output_dir.mkdir(parents=True, exist_ok=True)
179
+
180
+ async def process_trading_data(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
181
+ """Process politician trading data"""
182
+ processed_records = []
183
+
184
+ for record in records:
185
+ try:
186
+ # Validate
187
+ if self.config.enable_validation:
188
+ if not await self.validator.validate_trading_record(record):
189
+ self.logger.warning(f"Validation failed for record: {record.get('id', 'unknown')}")
190
+ continue
191
+
192
+ # Enrich
193
+ if self.config.enable_enrichment:
194
+ enriched_record = await self.enricher.enrich_trading_record(record)
195
+ else:
196
+ enriched_record = record.copy()
197
+
198
+ # Add processing metadata
199
+ enriched_record["mcli_processed_at"] = datetime.now(timezone.utc).isoformat()
200
+ enriched_record["mcli_pipeline_version"] = "1.0.0"
201
+
202
+ processed_records.append(enriched_record)
203
+
204
+ except Exception as e:
205
+ self.logger.error(f"Error processing trading record: {e}")
206
+ continue
207
+
208
+ self.logger.info(f"Processed {len(processed_records)}/{len(records)} trading records")
209
+ return processed_records
210
+
211
+ async def process_supabase_sync(self, table: str, operation: str, data: Dict[str, Any]) -> Dict[str, Any]:
212
+ """Process Supabase sync data"""
213
+ try:
214
+ # Validate
215
+ if self.config.enable_validation:
216
+ if not await self.validator.validate_supabase_record(table, data):
217
+ self.logger.warning(f"Validation failed for {table} record")
218
+ return {}
219
+
220
+ # Transform based on table and operation
221
+ processed_data = await self._transform_supabase_data(table, operation, data)
222
+
223
+ # Add processing metadata
224
+ processed_data["mcli_processed_at"] = datetime.now(timezone.utc).isoformat()
225
+ processed_data["mcli_source_table"] = table
226
+ processed_data["mcli_operation"] = operation
227
+
228
+ return processed_data
229
+
230
+ except Exception as e:
231
+ self.logger.error(f"Error processing Supabase sync: {e}")
232
+ return {}
233
+
234
+ async def _transform_supabase_data(self, table: str, operation: str, data: Dict[str, Any]) -> Dict[str, Any]:
235
+ """Transform Supabase data based on table schema"""
236
+ transformed = data.copy()
237
+
238
+ # Apply table-specific transformations
239
+ if "politician" in table.lower():
240
+ transformed = await self._transform_politician_table(transformed)
241
+ elif "trading" in table.lower():
242
+ transformed = await self._transform_trading_table(transformed)
243
+
244
+ return transformed
245
+
246
+ async def _transform_politician_table(self, data: Dict[str, Any]) -> Dict[str, Any]:
247
+ """Transform politician table data"""
248
+ # Normalize names
249
+ if "name" in data:
250
+ data["name_normalized"] = data["name"].title()
251
+
252
+ # Add derived fields
253
+ if "party" in data:
254
+ data["party_normalized"] = data["party"].upper()
255
+
256
+ return data
257
+
258
+ async def _transform_trading_table(self, data: Dict[str, Any]) -> Dict[str, Any]:
259
+ """Transform trading table data"""
260
+ # Normalize asset names
261
+ if "asset_name" in data:
262
+ data["asset_name_normalized"] = data["asset_name"].upper()
263
+
264
+ # Convert amounts to float
265
+ if "amount" in data and isinstance(data["amount"], str):
266
+ try:
267
+ data["amount_float"] = float(data["amount"])
268
+ except ValueError:
269
+ pass
270
+
271
+ return data
272
+
273
+ async def add_to_batch(self, record: Dict[str, Any]):
274
+ """Add record to batch for processing"""
275
+ async with self._processing_lock:
276
+ self.batch_buffer.append(record)
277
+
278
+ # Check if batch should be processed
279
+ current_time = time.time()
280
+ time_since_last_batch = current_time - self.last_batch_time
281
+
282
+ if (
283
+ len(self.batch_buffer) >= self.config.batch_size
284
+ or time_since_last_batch >= self.config.batch_timeout
285
+ ):
286
+ await self._process_batch()
287
+
288
+ async def _process_batch(self):
289
+ """Process accumulated batch"""
290
+ if not self.batch_buffer:
291
+ return
292
+
293
+ batch = self.batch_buffer.copy()
294
+ self.batch_buffer.clear()
295
+ self.last_batch_time = time.time()
296
+
297
+ self.logger.info(f"Processing batch of {len(batch)} records")
298
+
299
+ try:
300
+ # Process batch
301
+ processed_batch = await self.process_trading_data(batch)
302
+
303
+ # Save to file
304
+ await self._save_batch(processed_batch)
305
+
306
+ # Emit completion event
307
+ await self._emit_batch_completed(processed_batch)
308
+
309
+ except Exception as e:
310
+ self.logger.error(f"Batch processing failed: {e}")
311
+ # Re-add to buffer for retry (simplified)
312
+ self.batch_buffer.extend(batch)
313
+
314
+ async def _save_batch(self, batch: List[Dict[str, Any]]):
315
+ """Save processed batch to file"""
316
+ if not batch:
317
+ return
318
+
319
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
320
+ filename = f"processed_batch_{timestamp}.jsonl"
321
+ filepath = self.config.output_dir / filename
322
+
323
+ try:
324
+ with open(filepath, "w") as f:
325
+ for record in batch:
326
+ f.write(json.dumps(record) + "\n")
327
+
328
+ self.logger.info(f"Saved {len(batch)} records to {filepath}")
329
+
330
+ except Exception as e:
331
+ self.logger.error(f"Failed to save batch: {e}")
332
+
333
+ async def _emit_batch_completed(self, batch: List[Dict[str, Any]]):
334
+ """Emit batch completion event"""
335
+ self.logger.info(f"Batch processing completed: {len(batch)} records")
336
+
337
+ async def flush_batch(self):
338
+ """Force process current batch"""
339
+ async with self._processing_lock:
340
+ if self.batch_buffer:
341
+ await self._process_batch()
342
+
343
+
344
+ class LSHDataPipeline:
345
+ """Main integration service for LSH-mcli data pipeline"""
346
+
347
+ def __init__(self, lsh_client: LSHClient, config: Optional[DataPipelineConfig] = None):
348
+ self.lsh_client = lsh_client
349
+ self.config = config or DataPipelineConfig()
350
+ self.processor = DataProcessor(self.config)
351
+ self.event_processor = LSHEventProcessor(lsh_client)
352
+ self.logger = get_logger(__name__)
353
+ self._is_running = False
354
+
355
+ # Setup event handlers
356
+ self._setup_pipeline_handlers()
357
+
358
+ def _setup_pipeline_handlers(self):
359
+ """Setup event handlers for pipeline processing"""
360
+ self.lsh_client.on("lsh.job.completed", self._handle_job_completed)
361
+ self.lsh_client.on("lsh.supabase.sync", self._handle_supabase_sync)
362
+ self.lsh_client.on("trading.data.processed", self._handle_trading_data)
363
+
364
+ async def _handle_job_completed(self, event_data: Dict[str, Any]):
365
+ """Handle LSH job completion"""
366
+ job_name = event_data.get("job_name", "")
367
+ job_id = event_data.get("job_id", "")
368
+
369
+ self.logger.info(f"Processing completed job: {job_name}")
370
+
371
+ # Check if this is a trading-related job
372
+ if "trading" in job_name.lower() or "politician" in job_name.lower():
373
+ stdout = event_data.get("stdout", "")
374
+ if stdout.strip():
375
+ await self._process_job_output(job_id, stdout)
376
+
377
+ async def _handle_supabase_sync(self, event_data: Dict[str, Any]):
378
+ """Handle Supabase sync event"""
379
+ table = event_data.get("table", "")
380
+ operation = event_data.get("operation", "")
381
+ data = event_data.get("data", {})
382
+
383
+ self.logger.info(f"Processing Supabase sync: {operation} on {table}")
384
+
385
+ processed_data = await self.processor.process_supabase_sync(table, operation, data)
386
+ if processed_data:
387
+ await self.processor.add_to_batch(processed_data)
388
+
389
+ async def _handle_trading_data(self, event_data: Dict[str, Any]):
390
+ """Handle processed trading data"""
391
+ records = event_data.get("records", [])
392
+
393
+ self.logger.info(f"Received {len(records)} trading records for pipeline processing")
394
+
395
+ for record in records:
396
+ await self.processor.add_to_batch(record)
397
+
398
+ async def _process_job_output(self, job_id: str, output: str):
399
+ """Process job output data"""
400
+ try:
401
+ # Parse output lines as JSON
402
+ records = []
403
+ for line in output.strip().split("\n"):
404
+ if line.strip():
405
+ try:
406
+ record = json.loads(line)
407
+ record["source_job_id"] = job_id
408
+ records.append(record)
409
+ except json.JSONDecodeError:
410
+ continue
411
+
412
+ if records:
413
+ processed_records = await self.processor.process_trading_data(records)
414
+ for record in processed_records:
415
+ await self.processor.add_to_batch(record)
416
+
417
+ except Exception as e:
418
+ self.logger.error(f"Error processing job output: {e}")
419
+
420
+ async def start(self):
421
+ """Start the data pipeline"""
422
+ if self._is_running:
423
+ self.logger.warning("Pipeline already running")
424
+ return
425
+
426
+ self.logger.info("Starting LSH data pipeline...")
427
+ self._is_running = True
428
+
429
+ try:
430
+ # Start LSH event processing
431
+ await self.event_processor.start_processing()
432
+
433
+ except Exception as e:
434
+ self.logger.error(f"Pipeline error: {e}")
435
+ self._is_running = False
436
+ raise
437
+
438
+ async def stop(self):
439
+ """Stop the data pipeline"""
440
+ if not self._is_running:
441
+ return
442
+
443
+ self.logger.info("Stopping LSH data pipeline...")
444
+ self._is_running = False
445
+
446
+ # Flush any remaining batches
447
+ await self.processor.flush_batch()
448
+
449
+ async def get_stats(self) -> Dict[str, Any]:
450
+ """Get pipeline statistics"""
451
+ return {
452
+ "is_running": self._is_running,
453
+ "batch_buffer_size": len(self.processor.batch_buffer),
454
+ "last_batch_time": self.processor.last_batch_time,
455
+ "config": {
456
+ "batch_size": self.config.batch_size,
457
+ "batch_timeout": self.config.batch_timeout,
458
+ "output_dir": str(self.config.output_dir),
459
+ },
460
+ }