mcli-framework 7.1.3__py3-none-any.whl → 7.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (38) hide show
  1. mcli/app/main.py +10 -0
  2. mcli/lib/custom_commands.py +424 -0
  3. mcli/lib/paths.py +12 -0
  4. mcli/ml/dashboard/app.py +13 -13
  5. mcli/ml/dashboard/app_integrated.py +1292 -148
  6. mcli/ml/dashboard/app_supabase.py +46 -21
  7. mcli/ml/dashboard/app_training.py +14 -14
  8. mcli/ml/dashboard/components/charts.py +258 -0
  9. mcli/ml/dashboard/components/metrics.py +125 -0
  10. mcli/ml/dashboard/components/tables.py +228 -0
  11. mcli/ml/dashboard/pages/cicd.py +382 -0
  12. mcli/ml/dashboard/pages/predictions_enhanced.py +820 -0
  13. mcli/ml/dashboard/pages/scrapers_and_logs.py +1060 -0
  14. mcli/ml/dashboard/pages/workflows.py +533 -0
  15. mcli/ml/training/train_model.py +569 -0
  16. mcli/self/self_cmd.py +322 -94
  17. mcli/workflow/politician_trading/data_sources.py +259 -1
  18. mcli/workflow/politician_trading/models.py +159 -1
  19. mcli/workflow/politician_trading/scrapers_corporate_registry.py +846 -0
  20. mcli/workflow/politician_trading/scrapers_free_sources.py +516 -0
  21. mcli/workflow/politician_trading/scrapers_third_party.py +391 -0
  22. mcli/workflow/politician_trading/seed_database.py +539 -0
  23. mcli/workflow/workflow.py +8 -27
  24. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/METADATA +1 -1
  25. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/RECORD +29 -25
  26. mcli/workflow/daemon/api_daemon.py +0 -800
  27. mcli/workflow/daemon/commands.py +0 -1196
  28. mcli/workflow/dashboard/dashboard_cmd.py +0 -120
  29. mcli/workflow/file/file.py +0 -100
  30. mcli/workflow/git_commit/commands.py +0 -430
  31. mcli/workflow/politician_trading/commands.py +0 -1939
  32. mcli/workflow/scheduler/commands.py +0 -493
  33. mcli/workflow/sync/sync_cmd.py +0 -437
  34. mcli/workflow/videos/videos.py +0 -242
  35. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/WHEEL +0 -0
  36. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/entry_points.txt +0 -0
  37. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/licenses/LICENSE +0 -0
  38. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/top_level.txt +0 -0
@@ -1,1939 +0,0 @@
1
- """
2
- CLI commands for politician trading workflow
3
- """
4
-
5
- import asyncio
6
- import json
7
- import os
8
- import re
9
- from datetime import datetime, timedelta
10
- from pathlib import Path
11
- from typing import Any, Dict, List
12
-
13
- import click
14
- from rich.console import Console
15
- from rich.json import JSON
16
- from rich.panel import Panel
17
- from rich.progress import Progress, SpinnerColumn, TextColumn
18
- from rich.table import Table
19
-
20
- from mcli.lib.logger.logger import get_logger
21
-
22
- from .config import WorkflowConfig
23
- from .connectivity import (
24
- SupabaseConnectivityValidator,
25
- run_connectivity_validation,
26
- run_continuous_monitoring,
27
- )
28
- from .database import PoliticianTradingDB
29
- from .monitoring import PoliticianTradingMonitor, run_health_check, run_stats_report
30
- from .workflow import (
31
- PoliticianTradingWorkflow,
32
- check_politician_trading_status,
33
- run_politician_trading_collection,
34
- )
35
-
36
- logger = get_logger(__name__)
37
- console = Console()
38
-
39
-
40
- @click.group(name="politician-trading")
41
- def politician_trading_cli():
42
- """Manage politician trading data collection workflow"""
43
- pass
44
-
45
-
46
- @politician_trading_cli.command("run")
47
- @click.option("--full", is_flag=True, help="Run full data collection (default)")
48
- @click.option("--us-only", is_flag=True, help="Only collect US Congress data")
49
- @click.option("--eu-only", is_flag=True, help="Only collect EU Parliament data")
50
- def run_collection(full: bool, us_only: bool, eu_only: bool):
51
- """Run politician trading data collection"""
52
- console.print("🏛️ Starting Politician Trading Data Collection", style="bold cyan")
53
-
54
- try:
55
- if us_only:
56
- console.print("Collecting US Congress data only...", style="yellow")
57
- # Would implement US-only collection
58
- result = asyncio.run(run_politician_trading_collection())
59
- elif eu_only:
60
- console.print("Collecting EU Parliament data only...", style="yellow")
61
- # Would implement EU-only collection
62
- result = asyncio.run(run_politician_trading_collection())
63
- else:
64
- console.print("Running full data collection...", style="green")
65
- result = asyncio.run(run_politician_trading_collection())
66
-
67
- # Display results
68
- if result.get("status") == "completed":
69
- console.print("✅ Collection completed successfully!", style="bold green")
70
-
71
- # Create summary table
72
- table = Table(title="Collection Summary")
73
- table.add_column("Metric", style="cyan")
74
- table.add_column("Value", style="green")
75
-
76
- summary = result.get("summary", {})
77
- table.add_row("New Disclosures", str(summary.get("total_new_disclosures", 0)))
78
- table.add_row("Updated Disclosures", str(summary.get("total_updated_disclosures", 0)))
79
- table.add_row("Errors", str(len(summary.get("errors", []))))
80
- table.add_row(
81
- "Duration",
82
- _calculate_duration(result.get("started_at"), result.get("completed_at")),
83
- )
84
-
85
- console.print(table)
86
-
87
- # Show job details
88
- jobs = result.get("jobs", {})
89
- for job_name, job_data in jobs.items():
90
- job_panel = Panel(
91
- f"Status: {job_data.get('status', 'unknown')}\n"
92
- f"New: {job_data.get('new_disclosures', 0)} | "
93
- f"Updated: {job_data.get('updated_disclosures', 0)} | "
94
- f"Errors: {len(job_data.get('errors', []))}",
95
- title=f"📊 {job_name.upper()} Job",
96
- border_style="green",
97
- )
98
- console.print(job_panel)
99
- else:
100
- console.print("❌ Collection failed!", style="bold red")
101
- if "error" in result:
102
- console.print(f"Error: {result['error']}", style="red")
103
-
104
- except Exception as e:
105
- console.print(f"❌ Command failed: {e}", style="bold red")
106
- logger.error(f"Collection command failed: {e}")
107
-
108
-
109
- @politician_trading_cli.command("status")
110
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
111
- def check_status(output_json: bool):
112
- """Check current status of politician trading data collection"""
113
- try:
114
- status = asyncio.run(check_politician_trading_status())
115
-
116
- if output_json:
117
- console.print(JSON.from_data(status))
118
- return
119
-
120
- # Display formatted status
121
- console.print("🏛️ Politician Trading Data Status", style="bold cyan")
122
-
123
- # Overall status
124
- if "error" in status:
125
- console.print(f"❌ Status check failed: {status['error']}", style="red")
126
- return
127
-
128
- # Summary panel
129
- summary_text = f"""Database Connection: {status.get('database_connection', 'unknown')}
130
- Configuration: {status.get('config_loaded', 'unknown')}
131
- Total Disclosures: {status.get('total_disclosures', 0):,}
132
- Today's New Records: {status.get('recent_disclosures_today', 0):,}
133
- Last Update: {status.get('timestamp', 'unknown')}"""
134
-
135
- summary_panel = Panel(summary_text, title="📈 System Status", border_style="blue")
136
- console.print(summary_panel)
137
-
138
- # Recent jobs table
139
- recent_jobs = status.get("recent_jobs", [])
140
- if recent_jobs:
141
- jobs_table = Table(title="Recent Jobs")
142
- jobs_table.add_column("Job Type", style="cyan")
143
- jobs_table.add_column("Status", style="green")
144
- jobs_table.add_column("Started", style="yellow")
145
- jobs_table.add_column("Records", justify="right", style="magenta")
146
- jobs_table.add_column("Duration", style="blue")
147
-
148
- for job in recent_jobs[:5]: # Show last 5 jobs
149
- status_style = (
150
- "green"
151
- if job.get("status") == "completed"
152
- else "red" if job.get("status") == "failed" else "yellow"
153
- )
154
-
155
- jobs_table.add_row(
156
- job.get("job_type", ""),
157
- f"[{status_style}]{job.get('status', '')}[/{status_style}]",
158
- _format_timestamp(job.get("started_at")),
159
- str(job.get("records_processed", 0)),
160
- _calculate_duration(job.get("started_at"), job.get("completed_at")),
161
- )
162
-
163
- console.print(jobs_table)
164
-
165
- except Exception as e:
166
- console.print(f"❌ Status check failed: {e}", style="bold red")
167
- logger.error(f"Status command failed: {e}")
168
-
169
-
170
- @politician_trading_cli.command("setup")
171
- @click.option("--create-tables", is_flag=True, help="Create database tables")
172
- @click.option("--verify", is_flag=True, help="Verify configuration and connection")
173
- @click.option("--generate-schema", is_flag=True, help="Generate schema SQL file")
174
- @click.option("--output-dir", default=".", help="Directory to save generated files")
175
- def setup_workflow(create_tables: bool, verify: bool, generate_schema: bool, output_dir: str):
176
- """Setup politician trading workflow"""
177
- console.print("🔧 Setting up Politician Trading Workflow", style="bold blue")
178
-
179
- try:
180
- config = WorkflowConfig.default()
181
- workflow = PoliticianTradingWorkflow(config)
182
-
183
- if verify:
184
- console.print("Verifying configuration and database connection...")
185
-
186
- # Test database connection
187
- try:
188
- status = asyncio.run(workflow.run_quick_check())
189
- if "error" not in status:
190
- console.print("✅ Database connection successful", style="green")
191
- console.print("✅ Configuration loaded", style="green")
192
-
193
- # Display config summary
194
- config_text = f"""Supabase URL: {config.supabase.url}
195
- Request Delay: {config.scraping.request_delay}s
196
- Max Retries: {config.scraping.max_retries}
197
- Timeout: {config.scraping.timeout}s"""
198
-
199
- config_panel = Panel(config_text, title="🔧 Configuration", border_style="blue")
200
- console.print(config_panel)
201
- else:
202
- console.print(f"❌ Verification failed: {status['error']}", style="red")
203
- except Exception as e:
204
- console.print(f"❌ Verification failed: {e}", style="red")
205
-
206
- if generate_schema:
207
- console.print("📄 Generating database schema files...", style="blue")
208
-
209
- # Generate schema file
210
- import os
211
- from pathlib import Path
212
-
213
- output_path = Path(output_dir)
214
- output_path.mkdir(exist_ok=True)
215
-
216
- # Read the schema SQL from the module
217
- schema_file = Path(__file__).parent / "schema.sql"
218
- if schema_file.exists():
219
- schema_content = schema_file.read_text()
220
-
221
- # Write to output directory
222
- output_schema_file = output_path / "politician_trading_schema.sql"
223
- output_schema_file.write_text(schema_content)
224
-
225
- console.print(
226
- f"✅ Schema SQL generated: {output_schema_file.absolute()}", style="green"
227
- )
228
-
229
- # Also generate a setup instructions file
230
- instructions = f"""# Politician Trading Database Setup Instructions
231
-
232
- ## Step 1: Create Database Schema
233
-
234
- 1. Open your Supabase SQL editor: https://supabase.com/dashboard/project/{config.supabase.url.split('//')[1].split('.')[0]}/sql/new
235
- 2. Copy and paste the contents of: {output_schema_file.absolute()}
236
- 3. Execute the SQL to create all tables, indexes, and triggers
237
-
238
- ## Step 2: Verify Setup
239
-
240
- Run the following command to verify everything is working:
241
-
242
- ```bash
243
- politician-trading setup --verify
244
- ```
245
-
246
- ## Step 3: Test Connectivity
247
-
248
- ```bash
249
- politician-trading connectivity
250
- ```
251
-
252
- ## Step 4: Run First Collection
253
-
254
- ```bash
255
- politician-trading test-workflow --verbose
256
- ```
257
-
258
- ## Step 5: Setup Automated Collection (Optional)
259
-
260
- ```bash
261
- politician-trading cron-job --create
262
- ```
263
-
264
- ## Database Tables Created
265
-
266
- - **politicians**: Stores politician information (US Congress, EU Parliament)
267
- - **trading_disclosures**: Individual trading transactions/disclosures
268
- - **data_pull_jobs**: Job execution tracking and status
269
- - **data_sources**: Data source configuration and health
270
-
271
- ## Troubleshooting
272
-
273
- If you encounter issues:
274
-
275
- 1. Check connectivity: `politician-trading connectivity --json`
276
- 2. View logs: `politician-trading health`
277
- 3. Test workflow: `politician-trading test-workflow --verbose`
278
- """
279
-
280
- instructions_file = output_path / "SETUP_INSTRUCTIONS.md"
281
- instructions_file.write_text(instructions)
282
-
283
- console.print(
284
- f"✅ Setup instructions generated: {instructions_file.absolute()}",
285
- style="green",
286
- )
287
-
288
- # Display summary
289
- console.print("\n📋 Generated Files:", style="bold")
290
- console.print(f" 📄 Schema SQL: {output_schema_file.name}")
291
- console.print(f" 📋 Instructions: {instructions_file.name}")
292
- console.print(f" 📁 Location: {output_path.absolute()}")
293
-
294
- console.print("\n🚀 Next Steps:", style="bold green")
295
- console.print("1. Open Supabase SQL editor")
296
- console.print(f"2. Execute SQL from: {output_schema_file.name}")
297
- console.print("3. Run: politician-trading setup --verify")
298
- console.print("4. Run: politician-trading test-workflow --verbose")
299
-
300
- else:
301
- console.print("❌ Schema template not found", style="red")
302
-
303
- if create_tables:
304
- console.print("Creating database tables...")
305
- schema_ok = asyncio.run(workflow.db.ensure_schema())
306
- if schema_ok:
307
- console.print("✅ Database schema verified", style="green")
308
- else:
309
- console.print("⚠️ Database schema needs to be created manually", style="yellow")
310
- console.print("💡 Run: politician-trading setup --generate-schema", style="blue")
311
-
312
- except Exception as e:
313
- console.print(f"❌ Setup failed: {e}", style="bold red")
314
- logger.error(f"Setup command failed: {e}")
315
-
316
-
317
- @politician_trading_cli.command("cron-job")
318
- @click.option("--create", is_flag=True, help="Show how to create Supabase cron job")
319
- @click.option("--test", is_flag=True, help="Test the cron job function")
320
- def manage_cron_job(create: bool, test: bool):
321
- """Manage Supabase cron job for automated data collection"""
322
-
323
- if create:
324
- console.print("🕒 Creating Supabase Cron Job", style="bold blue")
325
-
326
- cron_sql = """
327
- -- Create cron job for politician trading data collection
328
- SELECT cron.schedule(
329
- 'politician-trading-collection',
330
- '0 */6 * * *', -- Every 6 hours
331
- $$
332
- SELECT net.http_post(
333
- url := 'https://your-function-url.supabase.co/functions/v1/politician-trading-collect',
334
- headers := '{"Content-Type": "application/json", "Authorization": "Bearer YOUR_ANON_KEY"}'::jsonb,
335
- body := '{}'::jsonb
336
- ) as request_id;
337
- $$
338
- );
339
-
340
- -- Check cron job status
341
- SELECT * FROM cron.job;
342
- """
343
-
344
- console.print("Add this SQL to your Supabase SQL editor:", style="green")
345
- console.print(Panel(cron_sql, title="📝 Cron Job SQL", border_style="green"))
346
-
347
- console.print("\n📋 Next steps:", style="bold blue")
348
- console.print("1. Create an Edge Function in Supabase for the collection endpoint")
349
- console.print("2. Update the URL in the cron job SQL above")
350
- console.print("3. Execute the SQL in your Supabase dashboard")
351
- console.print("4. Monitor the job with: SELECT * FROM cron.job_run_details;")
352
-
353
- if test:
354
- console.print("🧪 Testing cron job function...", style="yellow")
355
- try:
356
- result = asyncio.run(run_politician_trading_collection())
357
- console.print("✅ Cron job function test completed", style="green")
358
- console.print(JSON.from_data(result))
359
- except Exception as e:
360
- console.print(f"❌ Cron job test failed: {e}", style="red")
361
-
362
-
363
- @politician_trading_cli.command("health")
364
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
365
- def check_health(output_json: bool):
366
- """Check system health and status"""
367
- try:
368
- health = asyncio.run(run_health_check())
369
-
370
- if output_json:
371
- console.print(JSON.from_data(health))
372
- else:
373
- monitor = PoliticianTradingMonitor()
374
- monitor.display_health_report(health)
375
-
376
- except Exception as e:
377
- console.print(f"❌ Health check failed: {e}", style="bold red")
378
- logger.error(f"Health check command failed: {e}")
379
-
380
-
381
- @politician_trading_cli.command("stats")
382
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
383
- def show_stats(output_json: bool):
384
- """Show detailed statistics"""
385
- try:
386
- stats = asyncio.run(run_stats_report())
387
-
388
- if output_json:
389
- console.print(JSON.from_data(stats))
390
- else:
391
- monitor = PoliticianTradingMonitor()
392
- monitor.display_stats_report(stats)
393
-
394
- except Exception as e:
395
- console.print(f"❌ Stats generation failed: {e}", style="bold red")
396
- logger.error(f"Stats command failed: {e}")
397
-
398
-
399
- @politician_trading_cli.command("monitor")
400
- @click.option("--interval", default=30, help="Check interval in seconds")
401
- @click.option("--count", default=0, help="Number of checks (0 = infinite)")
402
- def continuous_monitor(interval: int, count: int):
403
- """Continuously monitor system health"""
404
- console.print(f"🔄 Starting continuous monitoring (interval: {interval}s)", style="bold blue")
405
-
406
- async def monitor_loop():
407
- monitor = PoliticianTradingMonitor()
408
- check_count = 0
409
-
410
- while True:
411
- try:
412
- console.clear()
413
- console.print(
414
- f"Check #{check_count + 1} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
415
- style="dim",
416
- )
417
-
418
- health = await monitor.get_system_health()
419
- monitor.display_health_report(health)
420
-
421
- check_count += 1
422
- if count > 0 and check_count >= count:
423
- break
424
-
425
- if count == 0 or check_count < count:
426
- console.print(
427
- f"\n⏱️ Next check in {interval} seconds... (Ctrl+C to stop)", style="dim"
428
- )
429
- await asyncio.sleep(interval)
430
-
431
- except KeyboardInterrupt:
432
- console.print("\n👋 Monitoring stopped by user", style="yellow")
433
- break
434
- except Exception as e:
435
- console.print(f"❌ Monitor check failed: {e}", style="red")
436
- await asyncio.sleep(interval)
437
-
438
- try:
439
- asyncio.run(monitor_loop())
440
- except Exception as e:
441
- console.print(f"❌ Monitoring failed: {e}", style="bold red")
442
- logger.error(f"Monitor command failed: {e}")
443
-
444
-
445
- @politician_trading_cli.command("connectivity")
446
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
447
- @click.option("--continuous", is_flag=True, help="Run continuous monitoring")
448
- @click.option("--interval", default=30, help="Check interval in seconds (continuous mode)")
449
- @click.option("--duration", default=0, help="Duration in minutes (0 = infinite)")
450
- def check_connectivity(output_json: bool, continuous: bool, interval: int, duration: int):
451
- """Test Supabase connectivity and database operations"""
452
- if continuous:
453
- console.print(f"🔄 Starting continuous connectivity monitoring", style="bold blue")
454
- try:
455
- asyncio.run(run_continuous_monitoring(interval, duration))
456
- except Exception as e:
457
- console.print(f"❌ Continuous monitoring failed: {e}", style="bold red")
458
- logger.error(f"Continuous monitoring failed: {e}")
459
- else:
460
- try:
461
- validation_result = asyncio.run(run_connectivity_validation())
462
-
463
- if output_json:
464
- console.print(JSON.from_data(validation_result))
465
- else:
466
- validator = SupabaseConnectivityValidator()
467
- validator.display_connectivity_report(validation_result)
468
-
469
- except Exception as e:
470
- console.print(f"❌ Connectivity validation failed: {e}", style="bold red")
471
- logger.error(f"Connectivity validation failed: {e}")
472
-
473
-
474
- @politician_trading_cli.command("test-workflow")
475
- @click.option("--verbose", "-v", is_flag=True, help="Verbose output")
476
- @click.option("--validate-writes", is_flag=True, help="Validate database writes")
477
- def test_full_workflow(verbose: bool, validate_writes: bool):
478
- """Run a complete workflow test with live Supabase connectivity"""
479
- console.print("🧪 Running Full Politician Trading Workflow Test", style="bold green")
480
-
481
- async def run_test():
482
- # First validate connectivity
483
- console.print("\n🔗 Step 1: Validating Supabase connectivity...", style="blue")
484
- validator = SupabaseConnectivityValidator()
485
- connectivity_result = await validator.validate_connectivity()
486
-
487
- if verbose:
488
- validator.display_connectivity_report(connectivity_result)
489
- else:
490
- console.print(
491
- f"Connectivity Score: {connectivity_result['connectivity_score']}%", style="cyan"
492
- )
493
-
494
- if connectivity_result["connectivity_score"] < 75:
495
- console.print("⚠️ Connectivity issues detected. Workflow may fail.", style="yellow")
496
-
497
- # Run the workflow
498
- console.print("\n🏛️ Step 2: Running politician trading collection workflow...", style="blue")
499
-
500
- try:
501
- with console.status("[bold blue]Executing workflow...") as status:
502
- workflow_result = await run_politician_trading_collection()
503
-
504
- # Display workflow results
505
- console.print("\n📊 Workflow Results:", style="bold")
506
-
507
- if workflow_result.get("status") == "completed":
508
- console.print("✅ Workflow completed successfully!", style="green")
509
-
510
- summary = workflow_result.get("summary", {})
511
- console.print(f"New Disclosures: {summary.get('total_new_disclosures', 0)}")
512
- console.print(f"Updated Disclosures: {summary.get('total_updated_disclosures', 0)}")
513
- console.print(f"Errors: {len(summary.get('errors', []))}")
514
-
515
- if verbose and summary.get("errors"):
516
- console.print("\nErrors encountered:", style="red")
517
- for error in summary["errors"][:5]: # Show first 5 errors
518
- console.print(f" • {error}", style="dim red")
519
-
520
- else:
521
- console.print("❌ Workflow failed!", style="red")
522
- if "error" in workflow_result:
523
- console.print(f"Error: {workflow_result['error']}", style="red")
524
-
525
- # Validate writes if requested
526
- if validate_writes:
527
- console.print("\n🔍 Step 3: Validating database writes...", style="blue")
528
- write_validation = await validator._test_write_operations()
529
-
530
- if write_validation["success"]:
531
- console.print("✅ Database writes validated successfully", style="green")
532
- else:
533
- console.print(
534
- f"❌ Database write validation failed: {write_validation.get('error', 'Unknown error')}",
535
- style="red",
536
- )
537
-
538
- # Final connectivity check
539
- console.print("\n🔗 Step 4: Post-workflow connectivity check...", style="blue")
540
- final_connectivity = await validator.validate_connectivity()
541
-
542
- console.print(
543
- f"Final Connectivity Score: {final_connectivity['connectivity_score']}%",
544
- style="cyan",
545
- )
546
-
547
- # Summary
548
- console.print("\n📋 Test Summary:", style="bold")
549
- workflow_status = (
550
- "✅ PASSED" if workflow_result.get("status") == "completed" else "❌ FAILED"
551
- )
552
- connectivity_status = (
553
- "✅ GOOD" if final_connectivity["connectivity_score"] >= 75 else "⚠️ DEGRADED"
554
- )
555
-
556
- console.print(f"Workflow: {workflow_status}")
557
- console.print(f"Connectivity: {connectivity_status}")
558
- console.print(
559
- f"Duration: {workflow_result.get('started_at', '')} to {workflow_result.get('completed_at', '')}"
560
- )
561
-
562
- return {
563
- "workflow_result": workflow_result,
564
- "connectivity_result": final_connectivity,
565
- "test_passed": workflow_result.get("status") == "completed"
566
- and final_connectivity["connectivity_score"] >= 75,
567
- }
568
-
569
- except Exception as e:
570
- console.print(f"❌ Workflow test failed: {e}", style="bold red")
571
- if verbose:
572
- console.print_exception()
573
- return {"error": str(e), "test_passed": False}
574
-
575
- try:
576
- test_result = asyncio.run(run_test())
577
-
578
- if test_result.get("test_passed"):
579
- console.print("\n🎉 Full workflow test PASSED!", style="bold green")
580
- else:
581
- console.print("\n❌ Full workflow test FAILED!", style="bold red")
582
-
583
- except Exception as e:
584
- console.print(f"❌ Test execution failed: {e}", style="bold red")
585
- logger.error(f"Test workflow command failed: {e}")
586
-
587
-
588
- @politician_trading_cli.command("schema")
589
- @click.option("--show-location", is_flag=True, help="Show schema file location")
590
- @click.option("--generate", is_flag=True, help="Generate schema files")
591
- @click.option("--output-dir", default=".", help="Output directory for generated files")
592
- def manage_schema(show_location: bool, generate: bool, output_dir: str):
593
- """Manage database schema files"""
594
-
595
- if show_location:
596
- console.print("📁 Schema File Locations", style="bold blue")
597
-
598
- from pathlib import Path
599
-
600
- schema_file = Path(__file__).parent / "schema.sql"
601
-
602
- console.print(f"Built-in Schema: {schema_file.absolute()}", style="cyan")
603
- console.print(f"File size: {schema_file.stat().st_size} bytes", style="dim")
604
- console.print(
605
- f"Exists: {'✅ Yes' if schema_file.exists() else '❌ No'}",
606
- style="green" if schema_file.exists() else "red",
607
- )
608
-
609
- # Show current working directory option
610
- cwd_schema = Path.cwd() / "politician_trading_schema.sql"
611
- console.print(f"\nCurrent directory: {cwd_schema.absolute()}", style="cyan")
612
- console.print(
613
- f"Exists: {'✅ Yes' if cwd_schema.exists() else '❌ No'}",
614
- style="green" if cwd_schema.exists() else "dim",
615
- )
616
-
617
- if not cwd_schema.exists():
618
- console.print("\n💡 To generate schema file here:", style="blue")
619
- console.print("politician-trading schema --generate", style="yellow")
620
-
621
- elif generate:
622
- # Reuse the setup command logic
623
- try:
624
- import os
625
- from pathlib import Path
626
-
627
- console.print("📄 Generating database schema files...", style="blue")
628
-
629
- output_path = Path(output_dir)
630
- output_path.mkdir(exist_ok=True)
631
-
632
- # Read the schema SQL from the module
633
- schema_file = Path(__file__).parent / "schema.sql"
634
- if schema_file.exists():
635
- schema_content = schema_file.read_text()
636
-
637
- # Write to output directory
638
- output_schema_file = output_path / "politician_trading_schema.sql"
639
- output_schema_file.write_text(schema_content)
640
-
641
- console.print(
642
- f"✅ Schema SQL generated: {output_schema_file.absolute()}", style="green"
643
- )
644
-
645
- # Show file info
646
- console.print(f"📊 File size: {output_schema_file.stat().st_size:,} bytes")
647
- console.print(f"📅 Created: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
648
-
649
- # Count SQL statements
650
- statements = len(
651
- [
652
- line
653
- for line in schema_content.split("\n")
654
- if line.strip().startswith(("CREATE", "INSERT", "SELECT"))
655
- ]
656
- )
657
- console.print(f"📝 SQL statements: {statements}")
658
-
659
- else:
660
- console.print("❌ Schema template not found", style="red")
661
-
662
- except Exception as e:
663
- console.print(f"❌ Schema generation failed: {e}", style="red")
664
-
665
- else:
666
- # Show schema information by default
667
- console.print("🗂️ Politician Trading Database Schema", style="bold blue")
668
-
669
- schema_info = [
670
- (
671
- "politicians",
672
- "Stores politician information",
673
- "UUID primary key, bioguide_id, role, party",
674
- ),
675
- (
676
- "trading_disclosures",
677
- "Individual trading transactions",
678
- "References politicians, amount ranges, asset details",
679
- ),
680
- (
681
- "data_pull_jobs",
682
- "Job execution tracking",
683
- "Status, timing, record counts, error details",
684
- ),
685
- (
686
- "data_sources",
687
- "Data source configuration",
688
- "URLs, regions, health status, request config",
689
- ),
690
- ]
691
-
692
- schema_table = Table(title="Database Tables")
693
- schema_table.add_column("Table", style="cyan")
694
- schema_table.add_column("Purpose", style="white")
695
- schema_table.add_column("Key Features", style="yellow")
696
-
697
- for table_name, purpose, features in schema_info:
698
- schema_table.add_row(table_name, purpose, features)
699
-
700
- console.print(schema_table)
701
-
702
- console.print("\n🚀 Commands:", style="bold")
703
- console.print(" --show-location Show where schema files are located")
704
- console.print(" --generate Generate schema SQL file")
705
- console.print(" --generate --output-dir DIR Generate to specific directory")
706
-
707
-
708
- # Helper functions
709
- def _calculate_duration(start_time: str, end_time: str) -> str:
710
- """Calculate duration between timestamps"""
711
- if not start_time or not end_time:
712
- return "Unknown"
713
-
714
- try:
715
- start = datetime.fromisoformat(start_time.replace("Z", "+00:00"))
716
- end = datetime.fromisoformat(end_time.replace("Z", "+00:00"))
717
- duration = end - start
718
-
719
- total_seconds = int(duration.total_seconds())
720
- hours = total_seconds // 3600
721
- minutes = (total_seconds % 3600) // 60
722
- seconds = total_seconds % 60
723
-
724
- if hours > 0:
725
- return f"{hours}h {minutes}m {seconds}s"
726
- elif minutes > 0:
727
- return f"{minutes}m {seconds}s"
728
- else:
729
- return f"{seconds}s"
730
- except Exception:
731
- return "Unknown"
732
-
733
-
734
- def _format_timestamp(timestamp: str) -> str:
735
- """Format timestamp for display"""
736
- if not timestamp:
737
- return "Unknown"
738
-
739
- try:
740
- dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
741
- return dt.strftime("%Y-%m-%d %H:%M")
742
- except Exception:
743
- return timestamp[:16] if len(timestamp) > 16 else timestamp
744
-
745
-
746
- def _format_asset_display(disclosure: Dict[str, Any]) -> str:
747
- """Format asset display with proper ticker/name handling"""
748
- asset_name = disclosure.get("asset_name", "Unknown Asset")
749
- asset_ticker = disclosure.get("asset_ticker")
750
-
751
- # If we have both ticker and name, show ticker first
752
- if asset_ticker and asset_ticker.strip() and asset_ticker.lower() != "none":
753
- return f"{asset_ticker} - {asset_name[:15]}"
754
- # If we only have asset name, show just that
755
- elif asset_name and asset_name.strip():
756
- return asset_name[:20]
757
- # Fallback
758
- else:
759
- return "Unknown Asset"
760
-
761
-
762
- @politician_trading_cli.command("data-sources")
763
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
764
- def view_data_sources(output_json: bool):
765
- """View current data sources and their configurations"""
766
- console = Console()
767
-
768
- try:
769
- from .config import WorkflowConfig
770
- from .data_sources import ACTIVE_SOURCES, ALL_DATA_SOURCES, TOTAL_SOURCES
771
-
772
- config = WorkflowConfig.default()
773
- active_sources = config.scraping.get_active_sources()
774
-
775
- # Group sources by category for display
776
- data_sources = {}
777
-
778
- for category, sources in ALL_DATA_SOURCES.items():
779
- active_category_sources = [s for s in sources if s.status == "active"]
780
- if active_category_sources:
781
- data_sources[category] = {
782
- "name": {
783
- "us_federal": "US Federal Government",
784
- "us_states": "US State Governments",
785
- "eu_parliament": "EU Parliament",
786
- "eu_national": "EU National Parliaments",
787
- "third_party": "Third-Party Aggregators",
788
- }[category],
789
- "sources": active_category_sources,
790
- "count": len(active_category_sources),
791
- "status": "active",
792
- "description": {
793
- "us_federal": "Congressional and federal official financial disclosures",
794
- "us_states": "State legislature financial disclosure databases",
795
- "eu_parliament": "MEP financial interest and income declarations",
796
- "eu_national": "National parliament financial disclosure systems",
797
- "third_party": "Commercial aggregators and enhanced analysis platforms",
798
- }[category],
799
- }
800
-
801
- if output_json:
802
- # For JSON output, convert DataSource objects to dictionaries
803
- json_output = {}
804
- for category, info in data_sources.items():
805
- json_output[category] = {
806
- "name": info["name"],
807
- "description": info["description"],
808
- "count": info["count"],
809
- "status": info["status"],
810
- "sources": [
811
- {
812
- "name": source.name,
813
- "jurisdiction": source.jurisdiction,
814
- "institution": source.institution,
815
- "url": source.url,
816
- "disclosure_types": [dt.value for dt in source.disclosure_types],
817
- "access_method": source.access_method.value,
818
- "update_frequency": source.update_frequency,
819
- "threshold_amount": source.threshold_amount,
820
- "data_format": source.data_format,
821
- "notes": source.notes,
822
- }
823
- for source in info["sources"]
824
- ],
825
- }
826
- console.print(JSON.from_data(json_output))
827
- else:
828
- console.print(
829
- f"📊 Comprehensive Political Trading Data Sources ({ACTIVE_SOURCES} active of {TOTAL_SOURCES} total)",
830
- style="bold cyan",
831
- )
832
-
833
- for category_id, source_info in data_sources.items():
834
- console.print(
835
- f"\n[bold blue]{source_info['name']}[/bold blue] ({source_info['count']} sources)"
836
- )
837
- console.print(f" {source_info['description']}", style="dim")
838
-
839
- # Create table for this category's sources
840
- table = Table()
841
- table.add_column("Source", style="cyan")
842
- table.add_column("Jurisdiction", style="green")
843
- table.add_column("Access", style="yellow")
844
- table.add_column("Disclosure Types", style="magenta")
845
- table.add_column("Threshold", style="blue")
846
-
847
- for source in source_info["sources"]:
848
- # Format disclosure types
849
- types_display = ", ".join(
850
- [dt.value.replace("_", " ").title() for dt in source.disclosure_types]
851
- )
852
-
853
- # Format threshold
854
- threshold_display = (
855
- f"${source.threshold_amount:,}" if source.threshold_amount else "None"
856
- )
857
-
858
- table.add_row(
859
- source.name,
860
- source.jurisdiction,
861
- source.access_method.value.replace("_", " ").title(),
862
- types_display[:30] + ("..." if len(types_display) > 30 else ""),
863
- threshold_display,
864
- )
865
-
866
- console.print(table)
867
-
868
- console.print(
869
- f"\n[dim]Total: {ACTIVE_SOURCES} active sources across {len(data_sources)} categories[/dim]"
870
- )
871
-
872
- except Exception as e:
873
- if output_json:
874
- console.print(JSON.from_data({"error": str(e)}))
875
- else:
876
- console.print(f"❌ Failed to load data sources: {e}", style="bold red")
877
-
878
-
879
- @politician_trading_cli.command("jobs")
880
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
881
- @click.option("--limit", default=10, help="Number of recent jobs to show")
882
- def view_jobs(output_json: bool, limit: int):
883
- """View current and recent data collection jobs"""
884
- console = Console()
885
-
886
- try:
887
-
888
- async def get_jobs():
889
- from .config import WorkflowConfig
890
- from .database import PoliticianTradingDB
891
-
892
- config = WorkflowConfig.default()
893
- db = PoliticianTradingDB(config)
894
-
895
- # Get recent jobs
896
- jobs_result = (
897
- db.client.table("data_pull_jobs")
898
- .select("*")
899
- .order("started_at", desc=True)
900
- .limit(limit)
901
- .execute()
902
- )
903
-
904
- return jobs_result.data if jobs_result.data else []
905
-
906
- jobs = asyncio.run(get_jobs())
907
-
908
- if output_json:
909
- console.print(JSON.from_data(jobs))
910
- else:
911
- console.print("🔄 Recent Data Collection Jobs", style="bold cyan")
912
-
913
- if not jobs:
914
- console.print("No jobs found", style="yellow")
915
- return
916
-
917
- jobs_table = Table()
918
- jobs_table.add_column("Job ID", style="cyan")
919
- jobs_table.add_column("Type", style="green")
920
- jobs_table.add_column("Status", style="white")
921
- jobs_table.add_column("Started", style="blue")
922
- jobs_table.add_column("Duration", style="magenta")
923
- jobs_table.add_column("Records", style="yellow")
924
-
925
- for job in jobs:
926
- status_color = {
927
- "completed": "green",
928
- "running": "yellow",
929
- "failed": "red",
930
- "pending": "blue",
931
- }.get(job.get("status", "unknown"), "white")
932
-
933
- # Calculate duration
934
- started = job.get("started_at", "")
935
- completed = job.get("completed_at", "")
936
- duration = _format_duration_from_timestamps(started, completed)
937
-
938
- # Format records
939
- records_info = f"{job.get('records_new', 0)}n/{job.get('records_updated', 0)}u/{job.get('records_failed', 0)}f"
940
-
941
- jobs_table.add_row(
942
- job.get("id", "")[:8] + "...",
943
- job.get("job_type", "unknown"),
944
- f"[{status_color}]{job.get('status', 'unknown')}[/{status_color}]",
945
- _format_timestamp(started),
946
- duration,
947
- records_info,
948
- )
949
-
950
- console.print(jobs_table)
951
- console.print("\nLegend: Records = new/updated/failed", style="dim")
952
-
953
- except Exception as e:
954
- if output_json:
955
- console.print(JSON.from_data({"error": str(e)}))
956
- else:
957
- console.print(f"❌ Failed to load jobs: {e}", style="bold red")
958
- logger.error(f"Jobs view failed: {e}")
959
-
960
-
961
- def _format_duration_from_timestamps(started: str, completed: str) -> str:
962
- """Calculate and format duration from timestamps"""
963
- if not started:
964
- return "Unknown"
965
-
966
- try:
967
- start_dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
968
-
969
- if completed:
970
- end_dt = datetime.fromisoformat(completed.replace("Z", "+00:00"))
971
- duration = end_dt - start_dt
972
- else:
973
- # Job still running
974
- from datetime import timezone
975
-
976
- duration = datetime.now(timezone.utc) - start_dt
977
-
978
- return _format_duration_seconds(int(duration.total_seconds()))
979
-
980
- except Exception:
981
- return "Unknown"
982
-
983
-
984
- @politician_trading_cli.command("politicians")
985
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
986
- @click.option("--limit", default=20, help="Number of politicians to show")
987
- @click.option(
988
- "--role", type=click.Choice(["us_house_rep", "us_senator", "eu_mep"]), help="Filter by role"
989
- )
990
- @click.option("--party", help="Filter by party")
991
- @click.option("--state", help="Filter by state/country")
992
- @click.option("--search", help="Search by name (first, last, or full name)")
993
- def view_politicians(output_json: bool, limit: int, role: str, party: str, state: str, search: str):
994
- """View and search politicians in the database"""
995
- console = Console()
996
-
997
- try:
998
-
999
- async def get_politicians():
1000
- from .config import WorkflowConfig
1001
- from .database import PoliticianTradingDB
1002
-
1003
- config = WorkflowConfig.default()
1004
- db = PoliticianTradingDB(config)
1005
-
1006
- # Build query
1007
- query = db.client.table("politicians").select("*")
1008
-
1009
- # Apply filters
1010
- if role:
1011
- query = query.eq("role", role)
1012
- if party:
1013
- query = query.ilike("party", f"%{party}%")
1014
- if state:
1015
- query = query.ilike("state_or_country", f"%{state}%")
1016
- if search:
1017
- # Search across name fields
1018
- query = query.or_(
1019
- f"first_name.ilike.%{search}%,last_name.ilike.%{search}%,full_name.ilike.%{search}%"
1020
- )
1021
-
1022
- result = query.order("created_at", desc=True).limit(limit).execute()
1023
- return result.data if result.data else []
1024
-
1025
- politicians = asyncio.run(get_politicians())
1026
-
1027
- if output_json:
1028
- console.print(JSON.from_data(politicians))
1029
- else:
1030
- console.print("👥 Politicians Database", style="bold cyan")
1031
-
1032
- if not politicians:
1033
- console.print("No politicians found", style="yellow")
1034
- return
1035
-
1036
- politicians_table = Table()
1037
- politicians_table.add_column("Name", style="cyan", min_width=25)
1038
- politicians_table.add_column("Role", style="green")
1039
- politicians_table.add_column("Party", style="blue")
1040
- politicians_table.add_column("State/Country", style="magenta")
1041
- politicians_table.add_column("District", style="yellow")
1042
- politicians_table.add_column("Added", style="dim")
1043
-
1044
- for pol in politicians:
1045
- role_display = {
1046
- "us_house_rep": "🏛️ House Rep",
1047
- "us_senator": "🏛️ Senator",
1048
- "eu_mep": "🇪🇺 MEP",
1049
- }.get(pol.get("role", ""), pol.get("role", "Unknown"))
1050
-
1051
- politicians_table.add_row(
1052
- pol.get("full_name")
1053
- or f"{pol.get('first_name', '')} {pol.get('last_name', '')}".strip(),
1054
- role_display,
1055
- pol.get("party", "") or "Independent",
1056
- pol.get("state_or_country", ""),
1057
- pol.get("district", "") or "At-Large",
1058
- _format_timestamp(pol.get("created_at", "")),
1059
- )
1060
-
1061
- console.print(politicians_table)
1062
- console.print(
1063
- f"\nShowing {len(politicians)} of {len(politicians)} politicians", style="dim"
1064
- )
1065
-
1066
- except Exception as e:
1067
- if output_json:
1068
- console.print(JSON.from_data({"error": str(e)}))
1069
- else:
1070
- console.print(f"❌ Failed to load politicians: {e}", style="bold red")
1071
- logger.error(f"Politicians view failed: {e}")
1072
-
1073
-
1074
- @politician_trading_cli.command("disclosures")
1075
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1076
- @click.option("--limit", default=20, help="Number of disclosures to show")
1077
- @click.option("--politician", help="Filter by politician name")
1078
- @click.option("--asset", help="Filter by asset name or ticker")
1079
- @click.option(
1080
- "--transaction-type",
1081
- type=click.Choice(["purchase", "sale", "exchange"]),
1082
- help="Filter by transaction type",
1083
- )
1084
- @click.option("--amount-min", type=float, help="Minimum transaction amount")
1085
- @click.option("--amount-max", type=float, help="Maximum transaction amount")
1086
- @click.option("--days", default=30, help="Show disclosures from last N days")
1087
- @click.option("--details", is_flag=True, help="Show detailed information including raw data")
1088
- def view_disclosures(
1089
- output_json: bool,
1090
- limit: int,
1091
- politician: str,
1092
- asset: str,
1093
- transaction_type: str,
1094
- amount_min: float,
1095
- amount_max: float,
1096
- days: int,
1097
- details: bool,
1098
- ):
1099
- """View and search trading disclosures in the database"""
1100
- console = Console()
1101
-
1102
- try:
1103
-
1104
- async def get_disclosures():
1105
- from datetime import datetime, timedelta, timezone
1106
-
1107
- from .config import WorkflowConfig
1108
- from .database import PoliticianTradingDB
1109
-
1110
- config = WorkflowConfig.default()
1111
- db = PoliticianTradingDB(config)
1112
-
1113
- # Build query with join to get politician info
1114
- # Supabase uses foreign key relationships for joins
1115
- query = db.client.table("trading_disclosures").select("*, politicians!inner(*)")
1116
-
1117
- # Date filter
1118
- if days > 0:
1119
- cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
1120
- query = query.gte("created_at", cutoff_date.isoformat())
1121
-
1122
- # Apply filters
1123
- if politician:
1124
- # For nested relationships, we need a different approach
1125
- # Let's use a simpler filter on the main table for now
1126
- query = query.filter("politicians.full_name", "ilike", f"%{politician}%")
1127
-
1128
- if asset:
1129
- query = query.or_(f"asset_name.ilike.%{asset}%,asset_ticker.ilike.%{asset}%")
1130
-
1131
- if transaction_type:
1132
- query = query.eq("transaction_type", transaction_type)
1133
-
1134
- if amount_min is not None:
1135
- query = query.gte("amount_range_min", amount_min)
1136
-
1137
- if amount_max is not None:
1138
- query = query.lte("amount_range_max", amount_max)
1139
-
1140
- result = query.order("transaction_date", desc=True).limit(limit).execute()
1141
- return result.data if result.data else []
1142
-
1143
- disclosures = asyncio.run(get_disclosures())
1144
-
1145
- if output_json:
1146
- console.print(JSON.from_data(disclosures))
1147
- else:
1148
- console.print("💰 Trading Disclosures Database", style="bold cyan")
1149
-
1150
- if not disclosures:
1151
- console.print("No disclosures found", style="yellow")
1152
- return
1153
-
1154
- if details:
1155
- # Detailed view
1156
- for i, disclosure in enumerate(disclosures):
1157
- console.print(f"\n[bold cyan]Disclosure {i+1}[/bold cyan]")
1158
-
1159
- detail_table = Table()
1160
- detail_table.add_column("Field", style="cyan")
1161
- detail_table.add_column("Value", style="white")
1162
-
1163
- politician_info = disclosure.get("politicians", {})
1164
- politician_name = (
1165
- politician_info.get("full_name")
1166
- or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
1167
- )
1168
-
1169
- detail_table.add_row(
1170
- "Politician",
1171
- f"{politician_name} ({politician_info.get('party', 'Unknown')})",
1172
- )
1173
- detail_table.add_row(
1174
- "Asset",
1175
- f"{disclosure.get('asset_name', 'Unknown')} ({disclosure.get('asset_ticker', 'N/A')})",
1176
- )
1177
- detail_table.add_row(
1178
- "Transaction", disclosure.get("transaction_type", "Unknown").title()
1179
- )
1180
- detail_table.add_row(
1181
- "Date", _format_timestamp(disclosure.get("transaction_date", ""))
1182
- )
1183
- detail_table.add_row(
1184
- "Disclosure Date", _format_timestamp(disclosure.get("disclosure_date", ""))
1185
- )
1186
-
1187
- # Amount formatting
1188
- amount_min = disclosure.get("amount_range_min")
1189
- amount_max = disclosure.get("amount_range_max")
1190
- amount_exact = disclosure.get("amount_exact")
1191
-
1192
- if amount_exact:
1193
- amount_str = f"${amount_exact:,.2f}"
1194
- elif amount_min is not None and amount_max is not None:
1195
- amount_str = f"${amount_min:,.0f} - ${amount_max:,.0f}"
1196
- else:
1197
- amount_str = "Unknown"
1198
-
1199
- detail_table.add_row("Amount", amount_str)
1200
- detail_table.add_row("Source URL", disclosure.get("source_url", "N/A"))
1201
- detail_table.add_row(
1202
- "Added", _format_timestamp(disclosure.get("created_at", ""))
1203
- )
1204
-
1205
- console.print(detail_table)
1206
- else:
1207
- # Compact table view
1208
- disclosures_table = Table()
1209
- disclosures_table.add_column("Politician", style="cyan", min_width=25)
1210
- disclosures_table.add_column("Asset", style="green")
1211
- disclosures_table.add_column("Type", style="blue")
1212
- disclosures_table.add_column("Amount", style="yellow")
1213
- disclosures_table.add_column("Date", style="magenta")
1214
- disclosures_table.add_column("Party", style="dim")
1215
-
1216
- for disclosure in disclosures:
1217
- politician_info = disclosure.get("politicians", {})
1218
- politician_name = (
1219
- politician_info.get("full_name")
1220
- or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
1221
- )
1222
-
1223
- # Format amount
1224
- amount_min = disclosure.get("amount_range_min")
1225
- amount_max = disclosure.get("amount_range_max")
1226
- amount_exact = disclosure.get("amount_exact")
1227
-
1228
- if amount_exact:
1229
- amount_str = f"${amount_exact:,.0f}"
1230
- elif amount_min is not None and amount_max is not None:
1231
- amount_str = f"${amount_min:,.0f}-${amount_max:,.0f}"
1232
- else:
1233
- amount_str = "Unknown"
1234
-
1235
- # Transaction type with emoji
1236
- trans_type = disclosure.get("transaction_type", "unknown")
1237
- trans_emoji = {
1238
- "purchase": "🟢 Buy",
1239
- "sale": "🔴 Sell",
1240
- "exchange": "🔄 Exchange",
1241
- }.get(trans_type, "❓ " + trans_type.title())
1242
-
1243
- disclosures_table.add_row(
1244
- politician_name[:35] + ("..." if len(politician_name) > 35 else ""),
1245
- _format_asset_display(disclosure),
1246
- trans_emoji,
1247
- amount_str,
1248
- _format_timestamp(disclosure.get("transaction_date", "")),
1249
- politician_info.get("party", "")[:12],
1250
- )
1251
-
1252
- console.print(disclosures_table)
1253
-
1254
- console.print(
1255
- f"\nShowing {len(disclosures)} disclosures from last {days} days", style="dim"
1256
- )
1257
-
1258
- except Exception as e:
1259
- if output_json:
1260
- console.print(JSON.from_data({"error": str(e)}))
1261
- else:
1262
- console.print(f"❌ Failed to load disclosures: {e}", style="bold red")
1263
- logger.error(f"Disclosures view failed: {e}")
1264
-
1265
-
1266
- @politician_trading_cli.command("verify")
1267
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1268
- def verify_database(output_json: bool):
1269
- """Verify database integrity and show summary statistics"""
1270
- console = Console()
1271
-
1272
- try:
1273
-
1274
- async def verify_data():
1275
- from datetime import timedelta
1276
-
1277
- from .config import WorkflowConfig
1278
- from .database import PoliticianTradingDB
1279
-
1280
- config = WorkflowConfig.default()
1281
- db = PoliticianTradingDB(config)
1282
-
1283
- verification = {
1284
- "timestamp": datetime.now().isoformat(),
1285
- "tables": {},
1286
- "integrity": {},
1287
- "summary": {},
1288
- }
1289
-
1290
- # Check each table
1291
- tables_to_check = ["politicians", "trading_disclosures", "data_pull_jobs"]
1292
-
1293
- for table_name in tables_to_check:
1294
- try:
1295
- result = db.client.table(table_name).select("id").execute()
1296
- count = len(result.data) if result.data else 0
1297
- verification["tables"][table_name] = {
1298
- "exists": True,
1299
- "record_count": count,
1300
- "status": "ok",
1301
- }
1302
- except Exception as e:
1303
- verification["tables"][table_name] = {
1304
- "exists": False,
1305
- "error": str(e),
1306
- "status": "error",
1307
- }
1308
-
1309
- # Check referential integrity - simplified approach
1310
- try:
1311
- # Just verify we can query both tables
1312
- disclosures_result = db.client.table("trading_disclosures").select("id").execute()
1313
- politicians_result = db.client.table("politicians").select("id").execute()
1314
-
1315
- disclosures_count = len(disclosures_result.data) if disclosures_result.data else 0
1316
- politicians_count = len(politicians_result.data) if politicians_result.data else 0
1317
-
1318
- verification["integrity"] = {
1319
- "disclosures_with_politicians": disclosures_count,
1320
- "total_politicians": politicians_count,
1321
- "status": "ok",
1322
- }
1323
- except Exception as e:
1324
- verification["integrity"] = {"error": str(e), "status": "error"}
1325
-
1326
- # Summary statistics
1327
- try:
1328
- politicians_count = verification["tables"]["politicians"]["record_count"]
1329
- disclosures_count = verification["tables"]["trading_disclosures"]["record_count"]
1330
- jobs_count = verification["tables"]["data_pull_jobs"]["record_count"]
1331
-
1332
- # Get recent activity
1333
- recent_jobs = (
1334
- db.client.table("data_pull_jobs")
1335
- .select("*")
1336
- .gte("started_at", (datetime.now() - timedelta(days=7)).isoformat())
1337
- .execute()
1338
- )
1339
-
1340
- recent_jobs_count = len(recent_jobs.data) if recent_jobs.data else 0
1341
- successful_jobs = len(
1342
- [j for j in (recent_jobs.data or []) if j.get("status") == "completed"]
1343
- )
1344
-
1345
- verification["summary"] = {
1346
- "total_politicians": politicians_count,
1347
- "total_disclosures": disclosures_count,
1348
- "total_jobs": jobs_count,
1349
- "jobs_last_7_days": recent_jobs_count,
1350
- "successful_jobs_last_7_days": successful_jobs,
1351
- "success_rate_7_days": (
1352
- (successful_jobs / recent_jobs_count * 100) if recent_jobs_count > 0 else 0
1353
- ),
1354
- }
1355
-
1356
- except Exception as e:
1357
- verification["summary"] = {"error": str(e)}
1358
-
1359
- return verification
1360
-
1361
- verification = asyncio.run(verify_data())
1362
-
1363
- if output_json:
1364
- console.print(JSON.from_data(verification))
1365
- else:
1366
- console.print("🔍 Database Verification Report", style="bold cyan")
1367
-
1368
- # Table status
1369
- tables_panel = Table(title="Table Status")
1370
- tables_panel.add_column("Table", style="cyan")
1371
- tables_panel.add_column("Status", style="white")
1372
- tables_panel.add_column("Records", justify="right", style="green")
1373
-
1374
- for table_name, info in verification["tables"].items():
1375
- status_color = "green" if info["status"] == "ok" else "red"
1376
- status_text = f"[{status_color}]{info['status'].upper()}[/{status_color}]"
1377
- record_count = str(info.get("record_count", "N/A"))
1378
-
1379
- tables_panel.add_row(table_name, status_text, record_count)
1380
-
1381
- console.print(tables_panel)
1382
-
1383
- # Integrity check
1384
- integrity_info = verification.get("integrity", {})
1385
- if integrity_info.get("status") == "ok":
1386
- console.print("✅ Data integrity check passed", style="green")
1387
- disc_count = integrity_info.get("disclosures_with_politicians", 0)
1388
- pol_count = integrity_info.get("total_politicians", 0)
1389
- console.print(
1390
- f" Disclosures: {disc_count}, Politicians: {pol_count}", style="dim"
1391
- )
1392
- else:
1393
- console.print("❌ Data integrity check failed", style="red")
1394
-
1395
- # Summary
1396
- summary = verification.get("summary", {})
1397
- if "error" not in summary:
1398
- console.print("\n📊 Summary Statistics", style="bold blue")
1399
- console.print(f"Politicians: {summary.get('total_politicians', 0)}")
1400
- console.print(f"Trading Disclosures: {summary.get('total_disclosures', 0)}")
1401
- console.print(f"Data Collection Jobs: {summary.get('total_jobs', 0)}")
1402
- console.print(
1403
- f"Jobs (7 days): {summary.get('jobs_last_7_days', 0)} ({summary.get('successful_jobs_last_7_days', 0)} successful)"
1404
- )
1405
- console.print(f"Success Rate: {summary.get('success_rate_7_days', 0):.1f}%")
1406
-
1407
- except Exception as e:
1408
- if output_json:
1409
- console.print(JSON.from_data({"error": str(e)}))
1410
- else:
1411
- console.print(f"❌ Verification failed: {e}", style="bold red")
1412
- logger.error(f"Database verification failed: {e}")
1413
-
1414
-
1415
- @politician_trading_cli.group("cron")
1416
- def cron_commands():
1417
- """Manage cron-based automated data collection"""
1418
- pass
1419
-
1420
-
1421
- @cron_commands.command("run")
1422
- @click.option(
1423
- "--type",
1424
- "collection_type",
1425
- default="full",
1426
- type=click.Choice(["full", "us", "eu", "quick"]),
1427
- help="Type of collection to run",
1428
- )
1429
- def cron_run(collection_type: str):
1430
- """Run scheduled data collection (designed for cron jobs)"""
1431
-
1432
- async def run_cron_collection():
1433
- """Run the cron collection"""
1434
- from datetime import datetime
1435
-
1436
- logger.info(f"Starting scheduled collection: {collection_type}")
1437
- console.print(f"🕐 Running {collection_type} data collection...", style="blue")
1438
-
1439
- try:
1440
- workflow = PoliticianTradingWorkflow()
1441
-
1442
- if collection_type == "full":
1443
- results = await run_politician_trading_collection()
1444
- elif collection_type == "us":
1445
- # US-only collection
1446
- us_results = await workflow._collect_us_congress_data()
1447
- ca_results = await workflow._collect_california_data()
1448
- us_states_results = await workflow._collect_us_states_data()
1449
-
1450
- results = {
1451
- "status": "completed",
1452
- "started_at": datetime.utcnow().isoformat(),
1453
- "completed_at": datetime.utcnow().isoformat(),
1454
- "jobs": {
1455
- "us_congress": us_results,
1456
- "california": ca_results,
1457
- "us_states": us_states_results,
1458
- },
1459
- "summary": {
1460
- "total_new_disclosures": sum(
1461
- [
1462
- us_results.get("new_disclosures", 0),
1463
- ca_results.get("new_disclosures", 0),
1464
- us_states_results.get("new_disclosures", 0),
1465
- ]
1466
- )
1467
- },
1468
- }
1469
- elif collection_type == "eu":
1470
- # EU-only collection
1471
- eu_results = await workflow._collect_eu_parliament_data()
1472
- eu_states_results = await workflow._collect_eu_member_states_data()
1473
- uk_results = await workflow._collect_uk_parliament_data()
1474
-
1475
- results = {
1476
- "status": "completed",
1477
- "started_at": datetime.utcnow().isoformat(),
1478
- "completed_at": datetime.utcnow().isoformat(),
1479
- "jobs": {
1480
- "eu_parliament": eu_results,
1481
- "eu_member_states": eu_states_results,
1482
- "uk_parliament": uk_results,
1483
- },
1484
- "summary": {
1485
- "total_new_disclosures": sum(
1486
- [
1487
- eu_results.get("new_disclosures", 0),
1488
- eu_states_results.get("new_disclosures", 0),
1489
- uk_results.get("new_disclosures", 0),
1490
- ]
1491
- )
1492
- },
1493
- }
1494
- elif collection_type == "quick":
1495
- # Quick status check
1496
- status = await workflow.run_quick_check()
1497
- results = {
1498
- "status": "completed",
1499
- "type": "quick_check",
1500
- "results": status,
1501
- "summary": {"total_new_disclosures": 0},
1502
- }
1503
-
1504
- # Log results
1505
- summary = results.get("summary", {})
1506
- logger.info(
1507
- f"Cron collection completed - New: {summary.get('total_new_disclosures', 0)}"
1508
- )
1509
-
1510
- console.print(f"✅ {collection_type.title()} collection completed", style="green")
1511
- console.print(
1512
- f"New disclosures: {summary.get('total_new_disclosures', 0)}", style="cyan"
1513
- )
1514
-
1515
- return results
1516
-
1517
- except Exception as e:
1518
- logger.error(f"Cron collection failed: {e}")
1519
- console.print(f"❌ Collection failed: {e}", style="red")
1520
- return {"status": "failed", "error": str(e)}
1521
-
1522
- asyncio.run(run_cron_collection())
1523
-
1524
-
1525
- @cron_commands.command("setup")
1526
- def cron_setup():
1527
- """Show cron setup instructions"""
1528
- console.print("🕐 CRON SETUP INSTRUCTIONS", style="bold cyan")
1529
- console.print("Add these lines to your crontab (run: crontab -e)", style="dim")
1530
-
1531
- # Get current working directory for the cron commands
1532
- repo_path = Path(__file__).parent.parent.parent.parent.parent
1533
-
1534
- instructions = f"""
1535
- # Full collection every 6 hours
1536
- 0 */6 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type full >> /tmp/politician_cron.log 2>&1
1537
-
1538
- # US collection every 4 hours
1539
- 0 */4 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type us >> /tmp/politician_cron.log 2>&1
1540
-
1541
- # EU collection every 8 hours
1542
- 0 */8 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type eu >> /tmp/politician_cron.log 2>&1
1543
-
1544
- # Quick health check daily at 9 AM
1545
- 0 9 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type quick >> /tmp/politician_cron.log 2>&1
1546
- """
1547
-
1548
- console.print(Panel(instructions, title="Crontab Entries", border_style="blue"))
1549
-
1550
- console.print("\n💡 Tips:", style="bold yellow")
1551
- console.print("• Start with just one cron job to test", style="dim")
1552
- console.print("• Check logs at /tmp/politician_cron.log", style="dim")
1553
- console.print("• Use 'mcli politician-trading monitor' to check results", style="dim")
1554
-
1555
-
1556
- @politician_trading_cli.command("monitor")
1557
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1558
- def monitor_system(output_json: bool):
1559
- """Monitor system status, jobs, and database"""
1560
-
1561
- async def run_monitor():
1562
- """Run the monitoring"""
1563
- try:
1564
- config = WorkflowConfig.default()
1565
- db = PoliticianTradingDB(config)
1566
- workflow = PoliticianTradingWorkflow(config)
1567
-
1568
- # Get system health
1569
- await db.ensure_schema()
1570
- quick_status = await workflow.run_quick_check()
1571
-
1572
- # Get job history
1573
- job_status = await db.get_job_status()
1574
- recent_jobs = job_status.get("recent_jobs", [])
1575
-
1576
- # Analyze job statistics
1577
- status_counts = {"completed": 0, "running": 0, "failed": 0, "pending": 0}
1578
- job_types = {}
1579
- latest_by_type = {}
1580
-
1581
- for job in recent_jobs:
1582
- status = job.get("status", "unknown")
1583
- job_type = job.get("job_type", "unknown")
1584
- started_at = job.get("started_at", "")
1585
-
1586
- if status in status_counts:
1587
- status_counts[status] += 1
1588
- job_types[job_type] = job_types.get(job_type, 0) + 1
1589
-
1590
- if job_type not in latest_by_type or started_at > latest_by_type[job_type].get(
1591
- "started_at", ""
1592
- ):
1593
- latest_by_type[job_type] = job
1594
-
1595
- # Get scraper availability
1596
- try:
1597
- from . import scrapers
1598
-
1599
- scraper_status = {
1600
- "UK Parliament API": scrapers.UK_SCRAPER_AVAILABLE,
1601
- "California NetFile": scrapers.CALIFORNIA_SCRAPER_AVAILABLE,
1602
- "EU Member States": scrapers.EU_MEMBER_STATES_SCRAPER_AVAILABLE,
1603
- "US States Ethics": scrapers.US_STATES_SCRAPER_AVAILABLE,
1604
- }
1605
- available_scrapers = sum(scraper_status.values())
1606
- except:
1607
- scraper_status = {}
1608
- available_scrapers = 0
1609
-
1610
- monitor_data = {
1611
- "system_health": {
1612
- "database_connection": quick_status.get("database_connection", "unknown"),
1613
- "config_loaded": quick_status.get("config_loaded", "unknown"),
1614
- "timestamp": quick_status.get("timestamp", datetime.now().isoformat()),
1615
- },
1616
- "job_statistics": {
1617
- "total_recent_jobs": len(recent_jobs),
1618
- "status_counts": status_counts,
1619
- "job_types": job_types,
1620
- },
1621
- "latest_jobs": latest_by_type,
1622
- "scraper_availability": {
1623
- "available_count": available_scrapers,
1624
- "total_count": len(scraper_status),
1625
- "scrapers": scraper_status,
1626
- },
1627
- }
1628
-
1629
- return monitor_data
1630
-
1631
- except Exception as e:
1632
- logger.error(f"Monitoring failed: {e}")
1633
- return {"error": str(e)}
1634
-
1635
- monitor_data = asyncio.run(run_monitor())
1636
-
1637
- if output_json:
1638
- console.print(JSON.from_data(monitor_data))
1639
- else:
1640
- console.print("🔍 SYSTEM MONITOR", style="bold cyan")
1641
-
1642
- # System Health
1643
- health = monitor_data.get("system_health", {})
1644
- health_table = Table(title="System Health")
1645
- health_table.add_column("Component", style="cyan")
1646
- health_table.add_column("Status", style="white")
1647
-
1648
- db_status = health["database_connection"]
1649
- db_color = "green" if db_status == "ok" else "red"
1650
- health_table.add_row("Database", f"[{db_color}]{db_status.upper()}[/{db_color}]")
1651
-
1652
- config_status = health["config_loaded"]
1653
- config_color = "green" if config_status == "ok" else "red"
1654
- health_table.add_row(
1655
- "Configuration", f"[{config_color}]{config_status.upper()}[/{config_color}]"
1656
- )
1657
-
1658
- console.print(health_table)
1659
-
1660
- # Job Statistics
1661
- job_stats = monitor_data.get("job_statistics", {})
1662
- console.print(
1663
- f"\n📊 Job Statistics (Total: {job_stats.get('total_recent_jobs', 0)})",
1664
- style="bold blue",
1665
- )
1666
-
1667
- status_counts = job_stats.get("status_counts", {})
1668
- for status, count in status_counts.items():
1669
- if count > 0:
1670
- icon = {"completed": "✅", "running": "🔄", "failed": "❌", "pending": "⏳"}[status]
1671
- console.print(f"{icon} {status.title()}: {count}")
1672
-
1673
- # Latest Jobs by Type
1674
- console.print(f"\n📋 Latest Jobs by Source", style="bold blue")
1675
- latest_jobs = monitor_data.get("latest_jobs", {})
1676
-
1677
- for job_type, job in sorted(latest_jobs.items()):
1678
- status = job.get("status", "unknown")
1679
- icon = {"completed": "✅", "running": "🔄", "failed": "❌", "pending": "⏳"}.get(
1680
- status, "❓"
1681
- )
1682
-
1683
- source_name = job_type.replace("_", " ").title()
1684
- console.print(f"\n{icon} {source_name}")
1685
- console.print(f" Status: {status}")
1686
- console.print(f" Last run: {job.get('started_at', 'N/A')[:19]}")
1687
- console.print(
1688
- f" Records: {job.get('records_processed', 0)} processed, {job.get('records_new', 0)} new"
1689
- )
1690
-
1691
- # Scraper Availability
1692
- scraper_info = monitor_data.get("scraper_availability", {})
1693
- available = scraper_info.get("available_count", 0)
1694
- total = scraper_info.get("total_count", 0)
1695
-
1696
- console.print(f"\n🌍 Scraper Availability: {available}/{total}", style="bold blue")
1697
-
1698
- scrapers_status = scraper_info.get("scrapers", {})
1699
- for scraper_name, available in scrapers_status.items():
1700
- icon = "✅" if available else "❌"
1701
- status = "Available" if available else "Not Available"
1702
- console.print(f"{icon} {scraper_name}: {status}")
1703
-
1704
-
1705
- @politician_trading_cli.command("read-data")
1706
- @click.option("--limit", default=50, help="Number of recent records to show")
1707
- @click.option("--days", default=7, help="Days back to look for data")
1708
- @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1709
- def read_recent_data(limit: int, days: int, output_json: bool):
1710
- """Read recent data from the database"""
1711
-
1712
- async def read_data():
1713
- """Read recent data from database"""
1714
- try:
1715
- config = WorkflowConfig.default()
1716
- db = PoliticianTradingDB(config)
1717
-
1718
- # Get job history
1719
- job_status = await db.get_job_status()
1720
- jobs = job_status.get("recent_jobs", [])
1721
-
1722
- # Analyze data freshness
1723
- freshness = {}
1724
- for job in jobs:
1725
- job_type = job.get("job_type", "unknown")
1726
- if job.get("status") == "completed":
1727
- completed_at = job.get("completed_at")
1728
- if (
1729
- job_type not in freshness
1730
- or completed_at > freshness[job_type]["last_success"]
1731
- ):
1732
- # Check if recent (within threshold)
1733
- is_recent = False
1734
- if completed_at:
1735
- try:
1736
- timestamp = datetime.fromisoformat(
1737
- completed_at.replace("Z", "+00:00")
1738
- )
1739
- is_recent = (
1740
- datetime.now() - timestamp.replace(tzinfo=None)
1741
- ) < timedelta(hours=24)
1742
- except:
1743
- pass
1744
-
1745
- freshness[job_type] = {
1746
- "last_success": completed_at,
1747
- "records_collected": job.get("records_new", 0),
1748
- "status": "fresh" if is_recent else "stale",
1749
- }
1750
-
1751
- return {
1752
- "recent_jobs": jobs[:limit],
1753
- "data_freshness": freshness,
1754
- "summary": {
1755
- "total_jobs": len(jobs),
1756
- "job_types": len(set(job.get("job_type") for job in jobs)),
1757
- "fresh_sources": len([v for v in freshness.values() if v["status"] == "fresh"]),
1758
- },
1759
- }
1760
-
1761
- except Exception as e:
1762
- logger.error(f"Failed to read data: {e}")
1763
- return {"error": str(e)}
1764
-
1765
- data = asyncio.run(read_data())
1766
-
1767
- if output_json:
1768
- console.print(JSON.from_data(data))
1769
- else:
1770
- console.print("📊 RECENT DATA SUMMARY", style="bold cyan")
1771
-
1772
- if "error" in data:
1773
- console.print(f"❌ Error: {data['error']}", style="red")
1774
- return
1775
-
1776
- # Summary stats
1777
- summary = data.get("summary", {})
1778
- console.print(f"\n📈 Summary:", style="bold blue")
1779
- console.print(f"Total recent jobs: {summary.get('total_jobs', 0)}")
1780
- console.print(f"Active job types: {summary.get('job_types', 0)}")
1781
- console.print(f"Fresh data sources: {summary.get('fresh_sources', 0)}")
1782
-
1783
- # Data freshness
1784
- freshness = data.get("data_freshness", {})
1785
- if freshness:
1786
- console.print(f"\n🕐 Data Freshness:", style="bold blue")
1787
- for source, info in freshness.items():
1788
- status_icon = "🟢" if info["status"] == "fresh" else "🟡"
1789
- source_name = source.replace("_", " ").title()
1790
- last_success = info["last_success"][:19] if info["last_success"] else "Never"
1791
- console.print(f"{status_icon} {source_name}: {last_success}")
1792
-
1793
- # Recent jobs
1794
- recent_jobs = data.get("recent_jobs", [])[:10] # Show top 10
1795
- if recent_jobs:
1796
- console.print(f"\n📋 Recent Jobs (showing {len(recent_jobs)}):", style="bold blue")
1797
- for job in recent_jobs:
1798
- status_icon = {
1799
- "completed": "✅",
1800
- "running": "🔄",
1801
- "failed": "❌",
1802
- "pending": "⏳",
1803
- }.get(job.get("status"), "❓")
1804
- job_type = job.get("job_type", "unknown").replace("_", " ").title()
1805
- started_at = job.get("started_at", "N/A")[:19]
1806
- console.print(f"{status_icon} {job_type}: {started_at}")
1807
-
1808
-
1809
- @politician_trading_cli.command("config-real-data")
1810
- @click.option("--enable", is_flag=True, help="Enable real data collection")
1811
- @click.option("--restore", is_flag=True, help="Restore sample data mode")
1812
- @click.option("--status", is_flag=True, help="Show current configuration status")
1813
- def configure_real_data(enable: bool, restore: bool, status: bool):
1814
- """Configure real vs sample data collection"""
1815
-
1816
- if status or not (enable or restore):
1817
- # Show current status
1818
- console.print("🔧 DATA COLLECTION CONFIGURATION", style="bold cyan")
1819
-
1820
- console.print("\n📋 Current Status:", style="bold blue")
1821
- console.print("• Sample data mode: Currently DISABLED", style="green")
1822
- console.print("• Real API calls: Currently ACTIVE", style="green")
1823
- console.print("• Database writes: Currently WORKING", style="green")
1824
-
1825
- console.print("\n🎯 Data Source Readiness:", style="bold blue")
1826
- readiness_info = [
1827
- ("UK Parliament API", "✅ Active - Real API with full transaction data", "green"),
1828
- ("US House/Senate", "✅ Active - Real disclosure database access", "green"),
1829
- ("EU Parliament", "✅ Active - Real MEP profile scraping", "green"),
1830
- ("California NetFile", "⚠️ Limited - Complex forms require careful handling", "yellow"),
1831
- ("EU Member States", "⚠️ Limited - Country-specific implementations needed", "yellow"),
1832
- ]
1833
-
1834
- for source, info, color in readiness_info:
1835
- console.print(f"{info}", style=color)
1836
-
1837
- console.print("\n💡 Commands:", style="bold blue")
1838
- console.print("mcli politician-trading config-real-data --enable # Enable real data")
1839
- console.print("mcli politician-trading config-real-data --restore # Restore sample mode")
1840
-
1841
- return
1842
-
1843
- # Get scraper files
1844
- src_dir = Path(__file__).parent
1845
- scraper_files = [
1846
- "scrapers_uk.py",
1847
- "scrapers_california.py",
1848
- "scrapers_eu.py",
1849
- "scrapers_us_states.py",
1850
- ]
1851
-
1852
- if restore:
1853
- console.print("🔄 RESTORING SAMPLE DATA MODE", style="bold yellow")
1854
-
1855
- restored = 0
1856
- for file_name in scraper_files:
1857
- file_path = src_dir / file_name
1858
- backup_path = Path(str(file_path) + ".backup")
1859
-
1860
- if backup_path.exists():
1861
- # Restore from backup
1862
- try:
1863
- backup_content = backup_path.read_text()
1864
- file_path.write_text(backup_content)
1865
- restored += 1
1866
- console.print(f"✅ Restored {file_name} from backup", style="green")
1867
- except Exception as e:
1868
- console.print(f"❌ Failed to restore {file_name}: {e}", style="red")
1869
- else:
1870
- console.print(f"ℹ️ No backup found for {file_name}", style="dim")
1871
-
1872
- console.print(f"\n🎯 Restored {restored} files to sample mode", style="green")
1873
-
1874
- elif enable:
1875
- console.print("🚀 ENABLING REAL DATA COLLECTION", style="bold green")
1876
-
1877
- with Progress(
1878
- SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console
1879
- ) as progress:
1880
- task = progress.add_task("Configuring scrapers...", total=len(scraper_files))
1881
-
1882
- modifications_made = 0
1883
-
1884
- for file_name in scraper_files:
1885
- progress.update(task, description=f"Processing {file_name}...")
1886
-
1887
- file_path = src_dir / file_name
1888
-
1889
- if not file_path.exists():
1890
- progress.advance(task)
1891
- continue
1892
-
1893
- try:
1894
- # Read file content
1895
- content = file_path.read_text()
1896
- original_content = content
1897
-
1898
- # Remove sample flags
1899
- content = re.sub(r'"sample":\s*True', '"sample": False', content)
1900
- content = re.sub(r"'sample':\s*True", "'sample': False", content)
1901
-
1902
- # Enable actual processing
1903
- content = re.sub(
1904
- r"# This would implement actual (.+?) scraping",
1905
- r'logger.info("Processing real \1 data")',
1906
- content,
1907
- )
1908
-
1909
- if content != original_content:
1910
- # Backup original
1911
- backup_path = str(file_path) + ".backup"
1912
- Path(backup_path).write_text(original_content)
1913
-
1914
- # Write modified content
1915
- file_path.write_text(content)
1916
- modifications_made += 1
1917
-
1918
- except Exception as e:
1919
- console.print(f"❌ Error processing {file_name}: {e}", style="red")
1920
-
1921
- progress.advance(task)
1922
-
1923
- console.print(f"\n✅ Real data configuration complete!", style="bold green")
1924
- console.print(f"Modified {modifications_made} scraper files", style="green")
1925
-
1926
- if modifications_made > 0:
1927
- console.print(f"\n⚠️ Important Next Steps:", style="bold yellow")
1928
- console.print("1. Test with UK Parliament first (most reliable)", style="dim")
1929
- console.print("2. Monitor API rate limits carefully", style="dim")
1930
- console.print("3. Check logs for parsing errors", style="dim")
1931
- console.print("4. Use --restore flag if issues occur", style="dim")
1932
-
1933
- console.print(f"\n🧪 Test Commands:", style="bold blue")
1934
- console.print("mcli politician-trading cron run --type quick # Quick test")
1935
- console.print("mcli politician-trading monitor # Check results")
1936
-
1937
-
1938
- # Export the CLI group for registration
1939
- cli = politician_trading_cli