mcli-framework 7.1.1__py3-none-any.whl โ†’ 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +6 -2
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +68 -57
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +216 -150
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +62 -50
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +31 -16
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.1.dist-info โ†’ mcli_framework-7.1.2.dist-info}/METADATA +1 -1
  90. {mcli_framework-7.1.1.dist-info โ†’ mcli_framework-7.1.2.dist-info}/RECORD +94 -94
  91. {mcli_framework-7.1.1.dist-info โ†’ mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.1.dist-info โ†’ mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.1.dist-info โ†’ mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.1.dist-info โ†’ mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
@@ -8,25 +8,30 @@ import os
8
8
  import re
9
9
  from datetime import datetime, timedelta
10
10
  from pathlib import Path
11
- from typing import Dict, Any, List
11
+ from typing import Any, Dict, List
12
12
 
13
13
  import click
14
14
  from rich.console import Console
15
- from rich.table import Table
16
- from rich.panel import Panel
17
15
  from rich.json import JSON
16
+ from rich.panel import Panel
18
17
  from rich.progress import Progress, SpinnerColumn, TextColumn
18
+ from rich.table import Table
19
19
 
20
20
  from mcli.lib.logger.logger import get_logger
21
+
22
+ from .config import WorkflowConfig
23
+ from .connectivity import (
24
+ SupabaseConnectivityValidator,
25
+ run_connectivity_validation,
26
+ run_continuous_monitoring,
27
+ )
28
+ from .database import PoliticianTradingDB
29
+ from .monitoring import PoliticianTradingMonitor, run_health_check, run_stats_report
21
30
  from .workflow import (
22
31
  PoliticianTradingWorkflow,
23
- run_politician_trading_collection,
24
32
  check_politician_trading_status,
33
+ run_politician_trading_collection,
25
34
  )
26
- from .config import WorkflowConfig
27
- from .database import PoliticianTradingDB
28
- from .monitoring import PoliticianTradingMonitor, run_health_check, run_stats_report
29
- from .connectivity import SupabaseConnectivityValidator, run_connectivity_validation, run_continuous_monitoring
30
35
 
31
36
  logger = get_logger(__name__)
32
37
  console = Console()
@@ -200,25 +205,27 @@ Timeout: {config.scraping.timeout}s"""
200
205
 
201
206
  if generate_schema:
202
207
  console.print("๐Ÿ“„ Generating database schema files...", style="blue")
203
-
208
+
204
209
  # Generate schema file
205
210
  import os
206
211
  from pathlib import Path
207
-
212
+
208
213
  output_path = Path(output_dir)
209
214
  output_path.mkdir(exist_ok=True)
210
-
215
+
211
216
  # Read the schema SQL from the module
212
217
  schema_file = Path(__file__).parent / "schema.sql"
213
218
  if schema_file.exists():
214
219
  schema_content = schema_file.read_text()
215
-
220
+
216
221
  # Write to output directory
217
222
  output_schema_file = output_path / "politician_trading_schema.sql"
218
223
  output_schema_file.write_text(schema_content)
219
-
220
- console.print(f"โœ… Schema SQL generated: {output_schema_file.absolute()}", style="green")
221
-
224
+
225
+ console.print(
226
+ f"โœ… Schema SQL generated: {output_schema_file.absolute()}", style="green"
227
+ )
228
+
222
229
  # Also generate a setup instructions file
223
230
  instructions = f"""# Politician Trading Database Setup Instructions
224
231
 
@@ -269,24 +276,27 @@ If you encounter issues:
269
276
  2. View logs: `politician-trading health`
270
277
  3. Test workflow: `politician-trading test-workflow --verbose`
271
278
  """
272
-
279
+
273
280
  instructions_file = output_path / "SETUP_INSTRUCTIONS.md"
274
281
  instructions_file.write_text(instructions)
275
-
276
- console.print(f"โœ… Setup instructions generated: {instructions_file.absolute()}", style="green")
277
-
282
+
283
+ console.print(
284
+ f"โœ… Setup instructions generated: {instructions_file.absolute()}",
285
+ style="green",
286
+ )
287
+
278
288
  # Display summary
279
289
  console.print("\n๐Ÿ“‹ Generated Files:", style="bold")
280
290
  console.print(f" ๐Ÿ“„ Schema SQL: {output_schema_file.name}")
281
291
  console.print(f" ๐Ÿ“‹ Instructions: {instructions_file.name}")
282
292
  console.print(f" ๐Ÿ“ Location: {output_path.absolute()}")
283
-
293
+
284
294
  console.print("\n๐Ÿš€ Next Steps:", style="bold green")
285
295
  console.print("1. Open Supabase SQL editor")
286
296
  console.print(f"2. Execute SQL from: {output_schema_file.name}")
287
297
  console.print("3. Run: politician-trading setup --verify")
288
298
  console.print("4. Run: politician-trading test-workflow --verbose")
289
-
299
+
290
300
  else:
291
301
  console.print("โŒ Schema template not found", style="red")
292
302
 
@@ -449,13 +459,13 @@ def check_connectivity(output_json: bool, continuous: bool, interval: int, durat
449
459
  else:
450
460
  try:
451
461
  validation_result = asyncio.run(run_connectivity_validation())
452
-
462
+
453
463
  if output_json:
454
464
  console.print(JSON.from_data(validation_result))
455
465
  else:
456
466
  validator = SupabaseConnectivityValidator()
457
467
  validator.display_connectivity_report(validation_result)
458
-
468
+
459
469
  except Exception as e:
460
470
  console.print(f"โŒ Connectivity validation failed: {e}", style="bold red")
461
471
  logger.error(f"Connectivity validation failed: {e}")
@@ -467,94 +477,109 @@ def check_connectivity(output_json: bool, continuous: bool, interval: int, durat
467
477
  def test_full_workflow(verbose: bool, validate_writes: bool):
468
478
  """Run a complete workflow test with live Supabase connectivity"""
469
479
  console.print("๐Ÿงช Running Full Politician Trading Workflow Test", style="bold green")
470
-
480
+
471
481
  async def run_test():
472
482
  # First validate connectivity
473
483
  console.print("\n๐Ÿ”— Step 1: Validating Supabase connectivity...", style="blue")
474
484
  validator = SupabaseConnectivityValidator()
475
485
  connectivity_result = await validator.validate_connectivity()
476
-
486
+
477
487
  if verbose:
478
488
  validator.display_connectivity_report(connectivity_result)
479
489
  else:
480
- console.print(f"Connectivity Score: {connectivity_result['connectivity_score']}%", style="cyan")
481
-
482
- if connectivity_result['connectivity_score'] < 75:
490
+ console.print(
491
+ f"Connectivity Score: {connectivity_result['connectivity_score']}%", style="cyan"
492
+ )
493
+
494
+ if connectivity_result["connectivity_score"] < 75:
483
495
  console.print("โš ๏ธ Connectivity issues detected. Workflow may fail.", style="yellow")
484
-
496
+
485
497
  # Run the workflow
486
498
  console.print("\n๐Ÿ›๏ธ Step 2: Running politician trading collection workflow...", style="blue")
487
-
499
+
488
500
  try:
489
501
  with console.status("[bold blue]Executing workflow...") as status:
490
502
  workflow_result = await run_politician_trading_collection()
491
-
503
+
492
504
  # Display workflow results
493
505
  console.print("\n๐Ÿ“Š Workflow Results:", style="bold")
494
-
506
+
495
507
  if workflow_result.get("status") == "completed":
496
508
  console.print("โœ… Workflow completed successfully!", style="green")
497
-
509
+
498
510
  summary = workflow_result.get("summary", {})
499
511
  console.print(f"New Disclosures: {summary.get('total_new_disclosures', 0)}")
500
512
  console.print(f"Updated Disclosures: {summary.get('total_updated_disclosures', 0)}")
501
513
  console.print(f"Errors: {len(summary.get('errors', []))}")
502
-
514
+
503
515
  if verbose and summary.get("errors"):
504
516
  console.print("\nErrors encountered:", style="red")
505
517
  for error in summary["errors"][:5]: # Show first 5 errors
506
518
  console.print(f" โ€ข {error}", style="dim red")
507
-
519
+
508
520
  else:
509
521
  console.print("โŒ Workflow failed!", style="red")
510
522
  if "error" in workflow_result:
511
523
  console.print(f"Error: {workflow_result['error']}", style="red")
512
-
524
+
513
525
  # Validate writes if requested
514
526
  if validate_writes:
515
527
  console.print("\n๐Ÿ” Step 3: Validating database writes...", style="blue")
516
528
  write_validation = await validator._test_write_operations()
517
-
529
+
518
530
  if write_validation["success"]:
519
531
  console.print("โœ… Database writes validated successfully", style="green")
520
532
  else:
521
- console.print(f"โŒ Database write validation failed: {write_validation.get('error', 'Unknown error')}", style="red")
522
-
533
+ console.print(
534
+ f"โŒ Database write validation failed: {write_validation.get('error', 'Unknown error')}",
535
+ style="red",
536
+ )
537
+
523
538
  # Final connectivity check
524
539
  console.print("\n๐Ÿ”— Step 4: Post-workflow connectivity check...", style="blue")
525
540
  final_connectivity = await validator.validate_connectivity()
526
-
527
- console.print(f"Final Connectivity Score: {final_connectivity['connectivity_score']}%", style="cyan")
528
-
541
+
542
+ console.print(
543
+ f"Final Connectivity Score: {final_connectivity['connectivity_score']}%",
544
+ style="cyan",
545
+ )
546
+
529
547
  # Summary
530
548
  console.print("\n๐Ÿ“‹ Test Summary:", style="bold")
531
- workflow_status = "โœ… PASSED" if workflow_result.get("status") == "completed" else "โŒ FAILED"
532
- connectivity_status = "โœ… GOOD" if final_connectivity['connectivity_score'] >= 75 else "โš ๏ธ DEGRADED"
533
-
549
+ workflow_status = (
550
+ "โœ… PASSED" if workflow_result.get("status") == "completed" else "โŒ FAILED"
551
+ )
552
+ connectivity_status = (
553
+ "โœ… GOOD" if final_connectivity["connectivity_score"] >= 75 else "โš ๏ธ DEGRADED"
554
+ )
555
+
534
556
  console.print(f"Workflow: {workflow_status}")
535
557
  console.print(f"Connectivity: {connectivity_status}")
536
- console.print(f"Duration: {workflow_result.get('started_at', '')} to {workflow_result.get('completed_at', '')}")
537
-
558
+ console.print(
559
+ f"Duration: {workflow_result.get('started_at', '')} to {workflow_result.get('completed_at', '')}"
560
+ )
561
+
538
562
  return {
539
563
  "workflow_result": workflow_result,
540
564
  "connectivity_result": final_connectivity,
541
- "test_passed": workflow_result.get("status") == "completed" and final_connectivity['connectivity_score'] >= 75
565
+ "test_passed": workflow_result.get("status") == "completed"
566
+ and final_connectivity["connectivity_score"] >= 75,
542
567
  }
543
-
568
+
544
569
  except Exception as e:
545
570
  console.print(f"โŒ Workflow test failed: {e}", style="bold red")
546
571
  if verbose:
547
572
  console.print_exception()
548
573
  return {"error": str(e), "test_passed": False}
549
-
574
+
550
575
  try:
551
576
  test_result = asyncio.run(run_test())
552
-
577
+
553
578
  if test_result.get("test_passed"):
554
579
  console.print("\n๐ŸŽ‰ Full workflow test PASSED!", style="bold green")
555
580
  else:
556
581
  console.print("\nโŒ Full workflow test FAILED!", style="bold red")
557
-
582
+
558
583
  except Exception as e:
559
584
  console.print(f"โŒ Test execution failed: {e}", style="bold red")
560
585
  logger.error(f"Test workflow command failed: {e}")
@@ -566,83 +591,114 @@ def test_full_workflow(verbose: bool, validate_writes: bool):
566
591
  @click.option("--output-dir", default=".", help="Output directory for generated files")
567
592
  def manage_schema(show_location: bool, generate: bool, output_dir: str):
568
593
  """Manage database schema files"""
569
-
594
+
570
595
  if show_location:
571
596
  console.print("๐Ÿ“ Schema File Locations", style="bold blue")
572
-
597
+
573
598
  from pathlib import Path
599
+
574
600
  schema_file = Path(__file__).parent / "schema.sql"
575
-
601
+
576
602
  console.print(f"Built-in Schema: {schema_file.absolute()}", style="cyan")
577
603
  console.print(f"File size: {schema_file.stat().st_size} bytes", style="dim")
578
- console.print(f"Exists: {'โœ… Yes' if schema_file.exists() else 'โŒ No'}", style="green" if schema_file.exists() else "red")
579
-
604
+ console.print(
605
+ f"Exists: {'โœ… Yes' if schema_file.exists() else 'โŒ No'}",
606
+ style="green" if schema_file.exists() else "red",
607
+ )
608
+
580
609
  # Show current working directory option
581
610
  cwd_schema = Path.cwd() / "politician_trading_schema.sql"
582
611
  console.print(f"\nCurrent directory: {cwd_schema.absolute()}", style="cyan")
583
- console.print(f"Exists: {'โœ… Yes' if cwd_schema.exists() else 'โŒ No'}", style="green" if cwd_schema.exists() else "dim")
584
-
612
+ console.print(
613
+ f"Exists: {'โœ… Yes' if cwd_schema.exists() else 'โŒ No'}",
614
+ style="green" if cwd_schema.exists() else "dim",
615
+ )
616
+
585
617
  if not cwd_schema.exists():
586
618
  console.print("\n๐Ÿ’ก To generate schema file here:", style="blue")
587
619
  console.print("politician-trading schema --generate", style="yellow")
588
-
620
+
589
621
  elif generate:
590
622
  # Reuse the setup command logic
591
623
  try:
592
- from pathlib import Path
593
624
  import os
594
-
625
+ from pathlib import Path
626
+
595
627
  console.print("๐Ÿ“„ Generating database schema files...", style="blue")
596
-
628
+
597
629
  output_path = Path(output_dir)
598
630
  output_path.mkdir(exist_ok=True)
599
-
631
+
600
632
  # Read the schema SQL from the module
601
633
  schema_file = Path(__file__).parent / "schema.sql"
602
634
  if schema_file.exists():
603
635
  schema_content = schema_file.read_text()
604
-
636
+
605
637
  # Write to output directory
606
638
  output_schema_file = output_path / "politician_trading_schema.sql"
607
639
  output_schema_file.write_text(schema_content)
608
-
609
- console.print(f"โœ… Schema SQL generated: {output_schema_file.absolute()}", style="green")
610
-
640
+
641
+ console.print(
642
+ f"โœ… Schema SQL generated: {output_schema_file.absolute()}", style="green"
643
+ )
644
+
611
645
  # Show file info
612
646
  console.print(f"๐Ÿ“Š File size: {output_schema_file.stat().st_size:,} bytes")
613
647
  console.print(f"๐Ÿ“… Created: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
614
-
648
+
615
649
  # Count SQL statements
616
- statements = len([line for line in schema_content.split('\n') if line.strip().startswith(('CREATE', 'INSERT', 'SELECT'))])
650
+ statements = len(
651
+ [
652
+ line
653
+ for line in schema_content.split("\n")
654
+ if line.strip().startswith(("CREATE", "INSERT", "SELECT"))
655
+ ]
656
+ )
617
657
  console.print(f"๐Ÿ“ SQL statements: {statements}")
618
-
658
+
619
659
  else:
620
660
  console.print("โŒ Schema template not found", style="red")
621
-
661
+
622
662
  except Exception as e:
623
663
  console.print(f"โŒ Schema generation failed: {e}", style="red")
624
-
664
+
625
665
  else:
626
666
  # Show schema information by default
627
667
  console.print("๐Ÿ—‚๏ธ Politician Trading Database Schema", style="bold blue")
628
-
668
+
629
669
  schema_info = [
630
- ("politicians", "Stores politician information", "UUID primary key, bioguide_id, role, party"),
631
- ("trading_disclosures", "Individual trading transactions", "References politicians, amount ranges, asset details"),
632
- ("data_pull_jobs", "Job execution tracking", "Status, timing, record counts, error details"),
633
- ("data_sources", "Data source configuration", "URLs, regions, health status, request config")
670
+ (
671
+ "politicians",
672
+ "Stores politician information",
673
+ "UUID primary key, bioguide_id, role, party",
674
+ ),
675
+ (
676
+ "trading_disclosures",
677
+ "Individual trading transactions",
678
+ "References politicians, amount ranges, asset details",
679
+ ),
680
+ (
681
+ "data_pull_jobs",
682
+ "Job execution tracking",
683
+ "Status, timing, record counts, error details",
684
+ ),
685
+ (
686
+ "data_sources",
687
+ "Data source configuration",
688
+ "URLs, regions, health status, request config",
689
+ ),
634
690
  ]
635
-
691
+
636
692
  schema_table = Table(title="Database Tables")
637
693
  schema_table.add_column("Table", style="cyan")
638
694
  schema_table.add_column("Purpose", style="white")
639
695
  schema_table.add_column("Key Features", style="yellow")
640
-
696
+
641
697
  for table_name, purpose, features in schema_info:
642
698
  schema_table.add_row(table_name, purpose, features)
643
-
699
+
644
700
  console.print(schema_table)
645
-
701
+
646
702
  console.print("\n๐Ÿš€ Commands:", style="bold")
647
703
  console.print(" --show-location Show where schema files are located")
648
704
  console.print(" --generate Generate schema SQL file")
@@ -689,11 +745,11 @@ def _format_timestamp(timestamp: str) -> str:
689
745
 
690
746
  def _format_asset_display(disclosure: Dict[str, Any]) -> str:
691
747
  """Format asset display with proper ticker/name handling"""
692
- asset_name = disclosure.get('asset_name', 'Unknown Asset')
693
- asset_ticker = disclosure.get('asset_ticker')
694
-
748
+ asset_name = disclosure.get("asset_name", "Unknown Asset")
749
+ asset_ticker = disclosure.get("asset_ticker")
750
+
695
751
  # If we have both ticker and name, show ticker first
696
- if asset_ticker and asset_ticker.strip() and asset_ticker.lower() != 'none':
752
+ if asset_ticker and asset_ticker.strip() and asset_ticker.lower() != "none":
697
753
  return f"{asset_ticker} - {asset_name[:15]}"
698
754
  # If we only have asset name, show just that
699
755
  elif asset_name and asset_name.strip():
@@ -708,27 +764,27 @@ def _format_asset_display(disclosure: Dict[str, Any]) -> str:
708
764
  def view_data_sources(output_json: bool):
709
765
  """View current data sources and their configurations"""
710
766
  console = Console()
711
-
767
+
712
768
  try:
713
769
  from .config import WorkflowConfig
714
- from .data_sources import ALL_DATA_SOURCES, TOTAL_SOURCES, ACTIVE_SOURCES
715
-
770
+ from .data_sources import ACTIVE_SOURCES, ALL_DATA_SOURCES, TOTAL_SOURCES
771
+
716
772
  config = WorkflowConfig.default()
717
773
  active_sources = config.scraping.get_active_sources()
718
-
774
+
719
775
  # Group sources by category for display
720
776
  data_sources = {}
721
-
777
+
722
778
  for category, sources in ALL_DATA_SOURCES.items():
723
779
  active_category_sources = [s for s in sources if s.status == "active"]
724
780
  if active_category_sources:
725
781
  data_sources[category] = {
726
782
  "name": {
727
783
  "us_federal": "US Federal Government",
728
- "us_states": "US State Governments",
784
+ "us_states": "US State Governments",
729
785
  "eu_parliament": "EU Parliament",
730
786
  "eu_national": "EU National Parliaments",
731
- "third_party": "Third-Party Aggregators"
787
+ "third_party": "Third-Party Aggregators",
732
788
  }[category],
733
789
  "sources": active_category_sources,
734
790
  "count": len(active_category_sources),
@@ -736,12 +792,12 @@ def view_data_sources(output_json: bool):
736
792
  "description": {
737
793
  "us_federal": "Congressional and federal official financial disclosures",
738
794
  "us_states": "State legislature financial disclosure databases",
739
- "eu_parliament": "MEP financial interest and income declarations",
795
+ "eu_parliament": "MEP financial interest and income declarations",
740
796
  "eu_national": "National parliament financial disclosure systems",
741
- "third_party": "Commercial aggregators and enhanced analysis platforms"
742
- }[category]
797
+ "third_party": "Commercial aggregators and enhanced analysis platforms",
798
+ }[category],
743
799
  }
744
-
800
+
745
801
  if output_json:
746
802
  # For JSON output, convert DataSource objects to dictionaries
747
803
  json_output = {}
@@ -762,19 +818,24 @@ def view_data_sources(output_json: bool):
762
818
  "update_frequency": source.update_frequency,
763
819
  "threshold_amount": source.threshold_amount,
764
820
  "data_format": source.data_format,
765
- "notes": source.notes
821
+ "notes": source.notes,
766
822
  }
767
823
  for source in info["sources"]
768
- ]
824
+ ],
769
825
  }
770
826
  console.print(JSON.from_data(json_output))
771
827
  else:
772
- console.print(f"๐Ÿ“Š Comprehensive Political Trading Data Sources ({ACTIVE_SOURCES} active of {TOTAL_SOURCES} total)", style="bold cyan")
773
-
828
+ console.print(
829
+ f"๐Ÿ“Š Comprehensive Political Trading Data Sources ({ACTIVE_SOURCES} active of {TOTAL_SOURCES} total)",
830
+ style="bold cyan",
831
+ )
832
+
774
833
  for category_id, source_info in data_sources.items():
775
- console.print(f"\n[bold blue]{source_info['name']}[/bold blue] ({source_info['count']} sources)")
834
+ console.print(
835
+ f"\n[bold blue]{source_info['name']}[/bold blue] ({source_info['count']} sources)"
836
+ )
776
837
  console.print(f" {source_info['description']}", style="dim")
777
-
838
+
778
839
  # Create table for this category's sources
779
840
  table = Table()
780
841
  table.add_column("Source", style="cyan")
@@ -782,32 +843,32 @@ def view_data_sources(output_json: bool):
782
843
  table.add_column("Access", style="yellow")
783
844
  table.add_column("Disclosure Types", style="magenta")
784
845
  table.add_column("Threshold", style="blue")
785
-
846
+
786
847
  for source in source_info["sources"]:
787
848
  # Format disclosure types
788
- types_display = ", ".join([
789
- dt.value.replace("_", " ").title()
790
- for dt in source.disclosure_types
791
- ])
792
-
849
+ types_display = ", ".join(
850
+ [dt.value.replace("_", " ").title() for dt in source.disclosure_types]
851
+ )
852
+
793
853
  # Format threshold
794
854
  threshold_display = (
795
- f"${source.threshold_amount:,}" if source.threshold_amount
796
- else "None"
855
+ f"${source.threshold_amount:,}" if source.threshold_amount else "None"
797
856
  )
798
-
857
+
799
858
  table.add_row(
800
859
  source.name,
801
860
  source.jurisdiction,
802
861
  source.access_method.value.replace("_", " ").title(),
803
862
  types_display[:30] + ("..." if len(types_display) > 30 else ""),
804
- threshold_display
863
+ threshold_display,
805
864
  )
806
-
865
+
807
866
  console.print(table)
808
-
809
- console.print(f"\n[dim]Total: {ACTIVE_SOURCES} active sources across {len(data_sources)} categories[/dim]")
810
-
867
+
868
+ console.print(
869
+ f"\n[dim]Total: {ACTIVE_SOURCES} active sources across {len(data_sources)} categories[/dim]"
870
+ )
871
+
811
872
  except Exception as e:
812
873
  if output_json:
813
874
  console.print(JSON.from_data({"error": str(e)}))
@@ -821,15 +882,16 @@ def view_data_sources(output_json: bool):
821
882
  def view_jobs(output_json: bool, limit: int):
822
883
  """View current and recent data collection jobs"""
823
884
  console = Console()
824
-
885
+
825
886
  try:
887
+
826
888
  async def get_jobs():
827
- from .database import PoliticianTradingDB
828
889
  from .config import WorkflowConfig
829
-
890
+ from .database import PoliticianTradingDB
891
+
830
892
  config = WorkflowConfig.default()
831
893
  db = PoliticianTradingDB(config)
832
-
894
+
833
895
  # Get recent jobs
834
896
  jobs_result = (
835
897
  db.client.table("data_pull_jobs")
@@ -838,20 +900,20 @@ def view_jobs(output_json: bool, limit: int):
838
900
  .limit(limit)
839
901
  .execute()
840
902
  )
841
-
903
+
842
904
  return jobs_result.data if jobs_result.data else []
843
-
905
+
844
906
  jobs = asyncio.run(get_jobs())
845
-
907
+
846
908
  if output_json:
847
909
  console.print(JSON.from_data(jobs))
848
910
  else:
849
911
  console.print("๐Ÿ”„ Recent Data Collection Jobs", style="bold cyan")
850
-
912
+
851
913
  if not jobs:
852
914
  console.print("No jobs found", style="yellow")
853
915
  return
854
-
916
+
855
917
  jobs_table = Table()
856
918
  jobs_table.add_column("Job ID", style="cyan")
857
919
  jobs_table.add_column("Type", style="green")
@@ -859,35 +921,35 @@ def view_jobs(output_json: bool, limit: int):
859
921
  jobs_table.add_column("Started", style="blue")
860
922
  jobs_table.add_column("Duration", style="magenta")
861
923
  jobs_table.add_column("Records", style="yellow")
862
-
924
+
863
925
  for job in jobs:
864
926
  status_color = {
865
927
  "completed": "green",
866
- "running": "yellow",
928
+ "running": "yellow",
867
929
  "failed": "red",
868
- "pending": "blue"
930
+ "pending": "blue",
869
931
  }.get(job.get("status", "unknown"), "white")
870
-
932
+
871
933
  # Calculate duration
872
934
  started = job.get("started_at", "")
873
935
  completed = job.get("completed_at", "")
874
936
  duration = _format_duration_from_timestamps(started, completed)
875
-
937
+
876
938
  # Format records
877
939
  records_info = f"{job.get('records_new', 0)}n/{job.get('records_updated', 0)}u/{job.get('records_failed', 0)}f"
878
-
940
+
879
941
  jobs_table.add_row(
880
942
  job.get("id", "")[:8] + "...",
881
943
  job.get("job_type", "unknown"),
882
944
  f"[{status_color}]{job.get('status', 'unknown')}[/{status_color}]",
883
945
  _format_timestamp(started),
884
946
  duration,
885
- records_info
947
+ records_info,
886
948
  )
887
-
949
+
888
950
  console.print(jobs_table)
889
951
  console.print("\nLegend: Records = new/updated/failed", style="dim")
890
-
952
+
891
953
  except Exception as e:
892
954
  if output_json:
893
955
  console.print(JSON.from_data({"error": str(e)}))
@@ -900,20 +962,21 @@ def _format_duration_from_timestamps(started: str, completed: str) -> str:
900
962
  """Calculate and format duration from timestamps"""
901
963
  if not started:
902
964
  return "Unknown"
903
-
965
+
904
966
  try:
905
967
  start_dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
906
-
968
+
907
969
  if completed:
908
970
  end_dt = datetime.fromisoformat(completed.replace("Z", "+00:00"))
909
971
  duration = end_dt - start_dt
910
972
  else:
911
973
  # Job still running
912
974
  from datetime import timezone
975
+
913
976
  duration = datetime.now(timezone.utc) - start_dt
914
-
977
+
915
978
  return _format_duration_seconds(int(duration.total_seconds()))
916
-
979
+
917
980
  except Exception:
918
981
  return "Unknown"
919
982
 
@@ -921,25 +984,28 @@ def _format_duration_from_timestamps(started: str, completed: str) -> str:
921
984
  @politician_trading_cli.command("politicians")
922
985
  @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
923
986
  @click.option("--limit", default=20, help="Number of politicians to show")
924
- @click.option("--role", type=click.Choice(['us_house_rep', 'us_senator', 'eu_mep']), help="Filter by role")
987
+ @click.option(
988
+ "--role", type=click.Choice(["us_house_rep", "us_senator", "eu_mep"]), help="Filter by role"
989
+ )
925
990
  @click.option("--party", help="Filter by party")
926
991
  @click.option("--state", help="Filter by state/country")
927
992
  @click.option("--search", help="Search by name (first, last, or full name)")
928
993
  def view_politicians(output_json: bool, limit: int, role: str, party: str, state: str, search: str):
929
994
  """View and search politicians in the database"""
930
995
  console = Console()
931
-
996
+
932
997
  try:
998
+
933
999
  async def get_politicians():
934
- from .database import PoliticianTradingDB
935
1000
  from .config import WorkflowConfig
936
-
1001
+ from .database import PoliticianTradingDB
1002
+
937
1003
  config = WorkflowConfig.default()
938
1004
  db = PoliticianTradingDB(config)
939
-
1005
+
940
1006
  # Build query
941
1007
  query = db.client.table("politicians").select("*")
942
-
1008
+
943
1009
  # Apply filters
944
1010
  if role:
945
1011
  query = query.eq("role", role)
@@ -949,22 +1015,24 @@ def view_politicians(output_json: bool, limit: int, role: str, party: str, state
949
1015
  query = query.ilike("state_or_country", f"%{state}%")
950
1016
  if search:
951
1017
  # Search across name fields
952
- query = query.or_(f"first_name.ilike.%{search}%,last_name.ilike.%{search}%,full_name.ilike.%{search}%")
953
-
1018
+ query = query.or_(
1019
+ f"first_name.ilike.%{search}%,last_name.ilike.%{search}%,full_name.ilike.%{search}%"
1020
+ )
1021
+
954
1022
  result = query.order("created_at", desc=True).limit(limit).execute()
955
1023
  return result.data if result.data else []
956
-
1024
+
957
1025
  politicians = asyncio.run(get_politicians())
958
-
1026
+
959
1027
  if output_json:
960
1028
  console.print(JSON.from_data(politicians))
961
1029
  else:
962
1030
  console.print("๐Ÿ‘ฅ Politicians Database", style="bold cyan")
963
-
1031
+
964
1032
  if not politicians:
965
1033
  console.print("No politicians found", style="yellow")
966
1034
  return
967
-
1035
+
968
1036
  politicians_table = Table()
969
1037
  politicians_table.add_column("Name", style="cyan", min_width=25)
970
1038
  politicians_table.add_column("Role", style="green")
@@ -972,26 +1040,29 @@ def view_politicians(output_json: bool, limit: int, role: str, party: str, state
972
1040
  politicians_table.add_column("State/Country", style="magenta")
973
1041
  politicians_table.add_column("District", style="yellow")
974
1042
  politicians_table.add_column("Added", style="dim")
975
-
1043
+
976
1044
  for pol in politicians:
977
1045
  role_display = {
978
1046
  "us_house_rep": "๐Ÿ›๏ธ House Rep",
979
- "us_senator": "๐Ÿ›๏ธ Senator",
980
- "eu_mep": "๐Ÿ‡ช๐Ÿ‡บ MEP"
1047
+ "us_senator": "๐Ÿ›๏ธ Senator",
1048
+ "eu_mep": "๐Ÿ‡ช๐Ÿ‡บ MEP",
981
1049
  }.get(pol.get("role", ""), pol.get("role", "Unknown"))
982
-
1050
+
983
1051
  politicians_table.add_row(
984
- pol.get("full_name") or f"{pol.get('first_name', '')} {pol.get('last_name', '')}".strip(),
1052
+ pol.get("full_name")
1053
+ or f"{pol.get('first_name', '')} {pol.get('last_name', '')}".strip(),
985
1054
  role_display,
986
1055
  pol.get("party", "") or "Independent",
987
1056
  pol.get("state_or_country", ""),
988
1057
  pol.get("district", "") or "At-Large",
989
- _format_timestamp(pol.get("created_at", ""))
1058
+ _format_timestamp(pol.get("created_at", "")),
990
1059
  )
991
-
1060
+
992
1061
  console.print(politicians_table)
993
- console.print(f"\nShowing {len(politicians)} of {len(politicians)} politicians", style="dim")
994
-
1062
+ console.print(
1063
+ f"\nShowing {len(politicians)} of {len(politicians)} politicians", style="dim"
1064
+ )
1065
+
995
1066
  except Exception as e:
996
1067
  if output_json:
997
1068
  console.print(JSON.from_data({"error": str(e)}))
@@ -1000,109 +1071,137 @@ def view_politicians(output_json: bool, limit: int, role: str, party: str, state
1000
1071
  logger.error(f"Politicians view failed: {e}")
1001
1072
 
1002
1073
 
1003
- @politician_trading_cli.command("disclosures")
1074
+ @politician_trading_cli.command("disclosures")
1004
1075
  @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1005
1076
  @click.option("--limit", default=20, help="Number of disclosures to show")
1006
1077
  @click.option("--politician", help="Filter by politician name")
1007
1078
  @click.option("--asset", help="Filter by asset name or ticker")
1008
- @click.option("--transaction-type", type=click.Choice(['purchase', 'sale', 'exchange']), help="Filter by transaction type")
1079
+ @click.option(
1080
+ "--transaction-type",
1081
+ type=click.Choice(["purchase", "sale", "exchange"]),
1082
+ help="Filter by transaction type",
1083
+ )
1009
1084
  @click.option("--amount-min", type=float, help="Minimum transaction amount")
1010
1085
  @click.option("--amount-max", type=float, help="Maximum transaction amount")
1011
1086
  @click.option("--days", default=30, help="Show disclosures from last N days")
1012
1087
  @click.option("--details", is_flag=True, help="Show detailed information including raw data")
1013
- def view_disclosures(output_json: bool, limit: int, politician: str, asset: str,
1014
- transaction_type: str, amount_min: float, amount_max: float,
1015
- days: int, details: bool):
1088
+ def view_disclosures(
1089
+ output_json: bool,
1090
+ limit: int,
1091
+ politician: str,
1092
+ asset: str,
1093
+ transaction_type: str,
1094
+ amount_min: float,
1095
+ amount_max: float,
1096
+ days: int,
1097
+ details: bool,
1098
+ ):
1016
1099
  """View and search trading disclosures in the database"""
1017
1100
  console = Console()
1018
-
1101
+
1019
1102
  try:
1103
+
1020
1104
  async def get_disclosures():
1021
- from .database import PoliticianTradingDB
1022
- from .config import WorkflowConfig
1023
1105
  from datetime import datetime, timedelta, timezone
1024
-
1106
+
1107
+ from .config import WorkflowConfig
1108
+ from .database import PoliticianTradingDB
1109
+
1025
1110
  config = WorkflowConfig.default()
1026
1111
  db = PoliticianTradingDB(config)
1027
-
1028
- # Build query with join to get politician info
1112
+
1113
+ # Build query with join to get politician info
1029
1114
  # Supabase uses foreign key relationships for joins
1030
- query = (
1031
- db.client.table("trading_disclosures")
1032
- .select("*, politicians!inner(*)")
1033
- )
1034
-
1115
+ query = db.client.table("trading_disclosures").select("*, politicians!inner(*)")
1116
+
1035
1117
  # Date filter
1036
1118
  if days > 0:
1037
1119
  cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
1038
1120
  query = query.gte("created_at", cutoff_date.isoformat())
1039
-
1121
+
1040
1122
  # Apply filters
1041
1123
  if politician:
1042
1124
  # For nested relationships, we need a different approach
1043
1125
  # Let's use a simpler filter on the main table for now
1044
1126
  query = query.filter("politicians.full_name", "ilike", f"%{politician}%")
1045
-
1127
+
1046
1128
  if asset:
1047
1129
  query = query.or_(f"asset_name.ilike.%{asset}%,asset_ticker.ilike.%{asset}%")
1048
-
1130
+
1049
1131
  if transaction_type:
1050
1132
  query = query.eq("transaction_type", transaction_type)
1051
-
1133
+
1052
1134
  if amount_min is not None:
1053
1135
  query = query.gte("amount_range_min", amount_min)
1054
-
1136
+
1055
1137
  if amount_max is not None:
1056
1138
  query = query.lte("amount_range_max", amount_max)
1057
-
1139
+
1058
1140
  result = query.order("transaction_date", desc=True).limit(limit).execute()
1059
1141
  return result.data if result.data else []
1060
-
1142
+
1061
1143
  disclosures = asyncio.run(get_disclosures())
1062
-
1144
+
1063
1145
  if output_json:
1064
1146
  console.print(JSON.from_data(disclosures))
1065
1147
  else:
1066
1148
  console.print("๐Ÿ’ฐ Trading Disclosures Database", style="bold cyan")
1067
-
1149
+
1068
1150
  if not disclosures:
1069
1151
  console.print("No disclosures found", style="yellow")
1070
1152
  return
1071
-
1153
+
1072
1154
  if details:
1073
1155
  # Detailed view
1074
1156
  for i, disclosure in enumerate(disclosures):
1075
1157
  console.print(f"\n[bold cyan]Disclosure {i+1}[/bold cyan]")
1076
-
1158
+
1077
1159
  detail_table = Table()
1078
1160
  detail_table.add_column("Field", style="cyan")
1079
1161
  detail_table.add_column("Value", style="white")
1080
-
1162
+
1081
1163
  politician_info = disclosure.get("politicians", {})
1082
- politician_name = politician_info.get("full_name") or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
1083
-
1084
- detail_table.add_row("Politician", f"{politician_name} ({politician_info.get('party', 'Unknown')})")
1085
- detail_table.add_row("Asset", f"{disclosure.get('asset_name', 'Unknown')} ({disclosure.get('asset_ticker', 'N/A')})")
1086
- detail_table.add_row("Transaction", disclosure.get('transaction_type', 'Unknown').title())
1087
- detail_table.add_row("Date", _format_timestamp(disclosure.get('transaction_date', '')))
1088
- detail_table.add_row("Disclosure Date", _format_timestamp(disclosure.get('disclosure_date', '')))
1089
-
1164
+ politician_name = (
1165
+ politician_info.get("full_name")
1166
+ or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
1167
+ )
1168
+
1169
+ detail_table.add_row(
1170
+ "Politician",
1171
+ f"{politician_name} ({politician_info.get('party', 'Unknown')})",
1172
+ )
1173
+ detail_table.add_row(
1174
+ "Asset",
1175
+ f"{disclosure.get('asset_name', 'Unknown')} ({disclosure.get('asset_ticker', 'N/A')})",
1176
+ )
1177
+ detail_table.add_row(
1178
+ "Transaction", disclosure.get("transaction_type", "Unknown").title()
1179
+ )
1180
+ detail_table.add_row(
1181
+ "Date", _format_timestamp(disclosure.get("transaction_date", ""))
1182
+ )
1183
+ detail_table.add_row(
1184
+ "Disclosure Date", _format_timestamp(disclosure.get("disclosure_date", ""))
1185
+ )
1186
+
1090
1187
  # Amount formatting
1091
- amount_min = disclosure.get('amount_range_min')
1092
- amount_max = disclosure.get('amount_range_max')
1093
- amount_exact = disclosure.get('amount_exact')
1094
-
1188
+ amount_min = disclosure.get("amount_range_min")
1189
+ amount_max = disclosure.get("amount_range_max")
1190
+ amount_exact = disclosure.get("amount_exact")
1191
+
1095
1192
  if amount_exact:
1096
1193
  amount_str = f"${amount_exact:,.2f}"
1097
1194
  elif amount_min is not None and amount_max is not None:
1098
1195
  amount_str = f"${amount_min:,.0f} - ${amount_max:,.0f}"
1099
1196
  else:
1100
1197
  amount_str = "Unknown"
1101
-
1198
+
1102
1199
  detail_table.add_row("Amount", amount_str)
1103
- detail_table.add_row("Source URL", disclosure.get('source_url', 'N/A'))
1104
- detail_table.add_row("Added", _format_timestamp(disclosure.get('created_at', '')))
1105
-
1200
+ detail_table.add_row("Source URL", disclosure.get("source_url", "N/A"))
1201
+ detail_table.add_row(
1202
+ "Added", _format_timestamp(disclosure.get("created_at", ""))
1203
+ )
1204
+
1106
1205
  console.print(detail_table)
1107
1206
  else:
1108
1207
  # Compact table view
@@ -1113,40 +1212,49 @@ def view_disclosures(output_json: bool, limit: int, politician: str, asset: str,
1113
1212
  disclosures_table.add_column("Amount", style="yellow")
1114
1213
  disclosures_table.add_column("Date", style="magenta")
1115
1214
  disclosures_table.add_column("Party", style="dim")
1116
-
1215
+
1117
1216
  for disclosure in disclosures:
1118
1217
  politician_info = disclosure.get("politicians", {})
1119
- politician_name = politician_info.get("full_name") or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
1120
-
1218
+ politician_name = (
1219
+ politician_info.get("full_name")
1220
+ or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
1221
+ )
1222
+
1121
1223
  # Format amount
1122
- amount_min = disclosure.get('amount_range_min')
1123
- amount_max = disclosure.get('amount_range_max')
1124
- amount_exact = disclosure.get('amount_exact')
1125
-
1224
+ amount_min = disclosure.get("amount_range_min")
1225
+ amount_max = disclosure.get("amount_range_max")
1226
+ amount_exact = disclosure.get("amount_exact")
1227
+
1126
1228
  if amount_exact:
1127
1229
  amount_str = f"${amount_exact:,.0f}"
1128
1230
  elif amount_min is not None and amount_max is not None:
1129
1231
  amount_str = f"${amount_min:,.0f}-${amount_max:,.0f}"
1130
1232
  else:
1131
1233
  amount_str = "Unknown"
1132
-
1234
+
1133
1235
  # Transaction type with emoji
1134
- trans_type = disclosure.get('transaction_type', 'unknown')
1135
- trans_emoji = {"purchase": "๐ŸŸข Buy", "sale": "๐Ÿ”ด Sell", "exchange": "๐Ÿ”„ Exchange"}.get(trans_type, "โ“ " + trans_type.title())
1136
-
1236
+ trans_type = disclosure.get("transaction_type", "unknown")
1237
+ trans_emoji = {
1238
+ "purchase": "๐ŸŸข Buy",
1239
+ "sale": "๐Ÿ”ด Sell",
1240
+ "exchange": "๐Ÿ”„ Exchange",
1241
+ }.get(trans_type, "โ“ " + trans_type.title())
1242
+
1137
1243
  disclosures_table.add_row(
1138
1244
  politician_name[:35] + ("..." if len(politician_name) > 35 else ""),
1139
1245
  _format_asset_display(disclosure),
1140
1246
  trans_emoji,
1141
1247
  amount_str,
1142
- _format_timestamp(disclosure.get('transaction_date', '')),
1143
- politician_info.get('party', '')[:12]
1248
+ _format_timestamp(disclosure.get("transaction_date", "")),
1249
+ politician_info.get("party", "")[:12],
1144
1250
  )
1145
-
1251
+
1146
1252
  console.print(disclosures_table)
1147
-
1148
- console.print(f"\nShowing {len(disclosures)} disclosures from last {days} days", style="dim")
1149
-
1253
+
1254
+ console.print(
1255
+ f"\nShowing {len(disclosures)} disclosures from last {days} days", style="dim"
1256
+ )
1257
+
1150
1258
  except Exception as e:
1151
1259
  if output_json:
1152
1260
  console.print(JSON.from_data({"error": str(e)}))
@@ -1160,26 +1268,28 @@ def view_disclosures(output_json: bool, limit: int, politician: str, asset: str,
1160
1268
  def verify_database(output_json: bool):
1161
1269
  """Verify database integrity and show summary statistics"""
1162
1270
  console = Console()
1163
-
1271
+
1164
1272
  try:
1273
+
1165
1274
  async def verify_data():
1166
- from .database import PoliticianTradingDB
1167
- from .config import WorkflowConfig
1168
1275
  from datetime import timedelta
1169
-
1276
+
1277
+ from .config import WorkflowConfig
1278
+ from .database import PoliticianTradingDB
1279
+
1170
1280
  config = WorkflowConfig.default()
1171
1281
  db = PoliticianTradingDB(config)
1172
-
1282
+
1173
1283
  verification = {
1174
1284
  "timestamp": datetime.now().isoformat(),
1175
1285
  "tables": {},
1176
1286
  "integrity": {},
1177
- "summary": {}
1287
+ "summary": {},
1178
1288
  }
1179
-
1289
+
1180
1290
  # Check each table
1181
1291
  tables_to_check = ["politicians", "trading_disclosures", "data_pull_jobs"]
1182
-
1292
+
1183
1293
  for table_name in tables_to_check:
1184
1294
  try:
1185
1295
  result = db.client.table(table_name).select("id").execute()
@@ -1187,41 +1297,38 @@ def verify_database(output_json: bool):
1187
1297
  verification["tables"][table_name] = {
1188
1298
  "exists": True,
1189
1299
  "record_count": count,
1190
- "status": "ok"
1300
+ "status": "ok",
1191
1301
  }
1192
1302
  except Exception as e:
1193
1303
  verification["tables"][table_name] = {
1194
1304
  "exists": False,
1195
1305
  "error": str(e),
1196
- "status": "error"
1306
+ "status": "error",
1197
1307
  }
1198
-
1308
+
1199
1309
  # Check referential integrity - simplified approach
1200
1310
  try:
1201
1311
  # Just verify we can query both tables
1202
1312
  disclosures_result = db.client.table("trading_disclosures").select("id").execute()
1203
1313
  politicians_result = db.client.table("politicians").select("id").execute()
1204
-
1314
+
1205
1315
  disclosures_count = len(disclosures_result.data) if disclosures_result.data else 0
1206
1316
  politicians_count = len(politicians_result.data) if politicians_result.data else 0
1207
-
1317
+
1208
1318
  verification["integrity"] = {
1209
1319
  "disclosures_with_politicians": disclosures_count,
1210
1320
  "total_politicians": politicians_count,
1211
- "status": "ok"
1321
+ "status": "ok",
1212
1322
  }
1213
1323
  except Exception as e:
1214
- verification["integrity"] = {
1215
- "error": str(e),
1216
- "status": "error"
1217
- }
1218
-
1324
+ verification["integrity"] = {"error": str(e), "status": "error"}
1325
+
1219
1326
  # Summary statistics
1220
1327
  try:
1221
1328
  politicians_count = verification["tables"]["politicians"]["record_count"]
1222
1329
  disclosures_count = verification["tables"]["trading_disclosures"]["record_count"]
1223
1330
  jobs_count = verification["tables"]["data_pull_jobs"]["record_count"]
1224
-
1331
+
1225
1332
  # Get recent activity
1226
1333
  recent_jobs = (
1227
1334
  db.client.table("data_pull_jobs")
@@ -1229,56 +1336,62 @@ def verify_database(output_json: bool):
1229
1336
  .gte("started_at", (datetime.now() - timedelta(days=7)).isoformat())
1230
1337
  .execute()
1231
1338
  )
1232
-
1339
+
1233
1340
  recent_jobs_count = len(recent_jobs.data) if recent_jobs.data else 0
1234
- successful_jobs = len([j for j in (recent_jobs.data or []) if j.get("status") == "completed"])
1235
-
1341
+ successful_jobs = len(
1342
+ [j for j in (recent_jobs.data or []) if j.get("status") == "completed"]
1343
+ )
1344
+
1236
1345
  verification["summary"] = {
1237
1346
  "total_politicians": politicians_count,
1238
1347
  "total_disclosures": disclosures_count,
1239
1348
  "total_jobs": jobs_count,
1240
1349
  "jobs_last_7_days": recent_jobs_count,
1241
1350
  "successful_jobs_last_7_days": successful_jobs,
1242
- "success_rate_7_days": (successful_jobs / recent_jobs_count * 100) if recent_jobs_count > 0 else 0
1351
+ "success_rate_7_days": (
1352
+ (successful_jobs / recent_jobs_count * 100) if recent_jobs_count > 0 else 0
1353
+ ),
1243
1354
  }
1244
-
1355
+
1245
1356
  except Exception as e:
1246
1357
  verification["summary"] = {"error": str(e)}
1247
-
1358
+
1248
1359
  return verification
1249
-
1360
+
1250
1361
  verification = asyncio.run(verify_data())
1251
-
1362
+
1252
1363
  if output_json:
1253
1364
  console.print(JSON.from_data(verification))
1254
1365
  else:
1255
1366
  console.print("๐Ÿ” Database Verification Report", style="bold cyan")
1256
-
1367
+
1257
1368
  # Table status
1258
1369
  tables_panel = Table(title="Table Status")
1259
1370
  tables_panel.add_column("Table", style="cyan")
1260
1371
  tables_panel.add_column("Status", style="white")
1261
1372
  tables_panel.add_column("Records", justify="right", style="green")
1262
-
1373
+
1263
1374
  for table_name, info in verification["tables"].items():
1264
1375
  status_color = "green" if info["status"] == "ok" else "red"
1265
1376
  status_text = f"[{status_color}]{info['status'].upper()}[/{status_color}]"
1266
1377
  record_count = str(info.get("record_count", "N/A"))
1267
-
1378
+
1268
1379
  tables_panel.add_row(table_name, status_text, record_count)
1269
-
1380
+
1270
1381
  console.print(tables_panel)
1271
-
1382
+
1272
1383
  # Integrity check
1273
1384
  integrity_info = verification.get("integrity", {})
1274
1385
  if integrity_info.get("status") == "ok":
1275
1386
  console.print("โœ… Data integrity check passed", style="green")
1276
1387
  disc_count = integrity_info.get("disclosures_with_politicians", 0)
1277
1388
  pol_count = integrity_info.get("total_politicians", 0)
1278
- console.print(f" Disclosures: {disc_count}, Politicians: {pol_count}", style="dim")
1389
+ console.print(
1390
+ f" Disclosures: {disc_count}, Politicians: {pol_count}", style="dim"
1391
+ )
1279
1392
  else:
1280
1393
  console.print("โŒ Data integrity check failed", style="red")
1281
-
1394
+
1282
1395
  # Summary
1283
1396
  summary = verification.get("summary", {})
1284
1397
  if "error" not in summary:
@@ -1286,9 +1399,11 @@ def verify_database(output_json: bool):
1286
1399
  console.print(f"Politicians: {summary.get('total_politicians', 0)}")
1287
1400
  console.print(f"Trading Disclosures: {summary.get('total_disclosures', 0)}")
1288
1401
  console.print(f"Data Collection Jobs: {summary.get('total_jobs', 0)}")
1289
- console.print(f"Jobs (7 days): {summary.get('jobs_last_7_days', 0)} ({summary.get('successful_jobs_last_7_days', 0)} successful)")
1402
+ console.print(
1403
+ f"Jobs (7 days): {summary.get('jobs_last_7_days', 0)} ({summary.get('successful_jobs_last_7_days', 0)} successful)"
1404
+ )
1290
1405
  console.print(f"Success Rate: {summary.get('success_rate_7_days', 0):.1f}%")
1291
-
1406
+
1292
1407
  except Exception as e:
1293
1408
  if output_json:
1294
1409
  console.print(JSON.from_data({"error": str(e)}))
@@ -1304,30 +1419,34 @@ def cron_commands():
1304
1419
 
1305
1420
 
1306
1421
  @cron_commands.command("run")
1307
- @click.option("--type", "collection_type", default="full",
1308
- type=click.Choice(["full", "us", "eu", "quick"]),
1309
- help="Type of collection to run")
1422
+ @click.option(
1423
+ "--type",
1424
+ "collection_type",
1425
+ default="full",
1426
+ type=click.Choice(["full", "us", "eu", "quick"]),
1427
+ help="Type of collection to run",
1428
+ )
1310
1429
  def cron_run(collection_type: str):
1311
1430
  """Run scheduled data collection (designed for cron jobs)"""
1312
-
1431
+
1313
1432
  async def run_cron_collection():
1314
1433
  """Run the cron collection"""
1315
1434
  from datetime import datetime
1316
-
1435
+
1317
1436
  logger.info(f"Starting scheduled collection: {collection_type}")
1318
1437
  console.print(f"๐Ÿ• Running {collection_type} data collection...", style="blue")
1319
-
1438
+
1320
1439
  try:
1321
1440
  workflow = PoliticianTradingWorkflow()
1322
-
1441
+
1323
1442
  if collection_type == "full":
1324
1443
  results = await run_politician_trading_collection()
1325
1444
  elif collection_type == "us":
1326
1445
  # US-only collection
1327
1446
  us_results = await workflow._collect_us_congress_data()
1328
- ca_results = await workflow._collect_california_data()
1447
+ ca_results = await workflow._collect_california_data()
1329
1448
  us_states_results = await workflow._collect_us_states_data()
1330
-
1449
+
1331
1450
  results = {
1332
1451
  "status": "completed",
1333
1452
  "started_at": datetime.utcnow().isoformat(),
@@ -1335,22 +1454,24 @@ def cron_run(collection_type: str):
1335
1454
  "jobs": {
1336
1455
  "us_congress": us_results,
1337
1456
  "california": ca_results,
1338
- "us_states": us_states_results
1457
+ "us_states": us_states_results,
1339
1458
  },
1340
1459
  "summary": {
1341
- "total_new_disclosures": sum([
1342
- us_results.get("new_disclosures", 0),
1343
- ca_results.get("new_disclosures", 0),
1344
- us_states_results.get("new_disclosures", 0)
1345
- ])
1346
- }
1460
+ "total_new_disclosures": sum(
1461
+ [
1462
+ us_results.get("new_disclosures", 0),
1463
+ ca_results.get("new_disclosures", 0),
1464
+ us_states_results.get("new_disclosures", 0),
1465
+ ]
1466
+ )
1467
+ },
1347
1468
  }
1348
1469
  elif collection_type == "eu":
1349
1470
  # EU-only collection
1350
1471
  eu_results = await workflow._collect_eu_parliament_data()
1351
1472
  eu_states_results = await workflow._collect_eu_member_states_data()
1352
1473
  uk_results = await workflow._collect_uk_parliament_data()
1353
-
1474
+
1354
1475
  results = {
1355
1476
  "status": "completed",
1356
1477
  "started_at": datetime.utcnow().isoformat(),
@@ -1358,15 +1479,17 @@ def cron_run(collection_type: str):
1358
1479
  "jobs": {
1359
1480
  "eu_parliament": eu_results,
1360
1481
  "eu_member_states": eu_states_results,
1361
- "uk_parliament": uk_results
1482
+ "uk_parliament": uk_results,
1362
1483
  },
1363
1484
  "summary": {
1364
- "total_new_disclosures": sum([
1365
- eu_results.get("new_disclosures", 0),
1366
- eu_states_results.get("new_disclosures", 0),
1367
- uk_results.get("new_disclosures", 0)
1368
- ])
1369
- }
1485
+ "total_new_disclosures": sum(
1486
+ [
1487
+ eu_results.get("new_disclosures", 0),
1488
+ eu_states_results.get("new_disclosures", 0),
1489
+ uk_results.get("new_disclosures", 0),
1490
+ ]
1491
+ )
1492
+ },
1370
1493
  }
1371
1494
  elif collection_type == "quick":
1372
1495
  # Quick status check
@@ -1375,23 +1498,27 @@ def cron_run(collection_type: str):
1375
1498
  "status": "completed",
1376
1499
  "type": "quick_check",
1377
1500
  "results": status,
1378
- "summary": {"total_new_disclosures": 0}
1501
+ "summary": {"total_new_disclosures": 0},
1379
1502
  }
1380
-
1503
+
1381
1504
  # Log results
1382
- summary = results.get('summary', {})
1383
- logger.info(f"Cron collection completed - New: {summary.get('total_new_disclosures', 0)}")
1384
-
1505
+ summary = results.get("summary", {})
1506
+ logger.info(
1507
+ f"Cron collection completed - New: {summary.get('total_new_disclosures', 0)}"
1508
+ )
1509
+
1385
1510
  console.print(f"โœ… {collection_type.title()} collection completed", style="green")
1386
- console.print(f"New disclosures: {summary.get('total_new_disclosures', 0)}", style="cyan")
1387
-
1511
+ console.print(
1512
+ f"New disclosures: {summary.get('total_new_disclosures', 0)}", style="cyan"
1513
+ )
1514
+
1388
1515
  return results
1389
-
1516
+
1390
1517
  except Exception as e:
1391
1518
  logger.error(f"Cron collection failed: {e}")
1392
1519
  console.print(f"โŒ Collection failed: {e}", style="red")
1393
1520
  return {"status": "failed", "error": str(e)}
1394
-
1521
+
1395
1522
  asyncio.run(run_cron_collection())
1396
1523
 
1397
1524
 
@@ -1400,10 +1527,10 @@ def cron_setup():
1400
1527
  """Show cron setup instructions"""
1401
1528
  console.print("๐Ÿ• CRON SETUP INSTRUCTIONS", style="bold cyan")
1402
1529
  console.print("Add these lines to your crontab (run: crontab -e)", style="dim")
1403
-
1530
+
1404
1531
  # Get current working directory for the cron commands
1405
1532
  repo_path = Path(__file__).parent.parent.parent.parent.parent
1406
-
1533
+
1407
1534
  instructions = f"""
1408
1535
  # Full collection every 6 hours
1409
1536
  0 */6 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type full >> /tmp/politician_cron.log 2>&1
@@ -1417,9 +1544,9 @@ def cron_setup():
1417
1544
  # Quick health check daily at 9 AM
1418
1545
  0 9 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type quick >> /tmp/politician_cron.log 2>&1
1419
1546
  """
1420
-
1547
+
1421
1548
  console.print(Panel(instructions, title="Crontab Entries", border_style="blue"))
1422
-
1549
+
1423
1550
  console.print("\n๐Ÿ’ก Tips:", style="bold yellow")
1424
1551
  console.print("โ€ข Start with just one cron job to test", style="dim")
1425
1552
  console.print("โ€ข Check logs at /tmp/politician_cron.log", style="dim")
@@ -1430,136 +1557,148 @@ def cron_setup():
1430
1557
  @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1431
1558
  def monitor_system(output_json: bool):
1432
1559
  """Monitor system status, jobs, and database"""
1433
-
1560
+
1434
1561
  async def run_monitor():
1435
1562
  """Run the monitoring"""
1436
1563
  try:
1437
1564
  config = WorkflowConfig.default()
1438
1565
  db = PoliticianTradingDB(config)
1439
1566
  workflow = PoliticianTradingWorkflow(config)
1440
-
1567
+
1441
1568
  # Get system health
1442
1569
  await db.ensure_schema()
1443
1570
  quick_status = await workflow.run_quick_check()
1444
-
1571
+
1445
1572
  # Get job history
1446
1573
  job_status = await db.get_job_status()
1447
- recent_jobs = job_status.get('recent_jobs', [])
1448
-
1574
+ recent_jobs = job_status.get("recent_jobs", [])
1575
+
1449
1576
  # Analyze job statistics
1450
- status_counts = {'completed': 0, 'running': 0, 'failed': 0, 'pending': 0}
1577
+ status_counts = {"completed": 0, "running": 0, "failed": 0, "pending": 0}
1451
1578
  job_types = {}
1452
1579
  latest_by_type = {}
1453
-
1580
+
1454
1581
  for job in recent_jobs:
1455
- status = job.get('status', 'unknown')
1456
- job_type = job.get('job_type', 'unknown')
1457
- started_at = job.get('started_at', '')
1458
-
1582
+ status = job.get("status", "unknown")
1583
+ job_type = job.get("job_type", "unknown")
1584
+ started_at = job.get("started_at", "")
1585
+
1459
1586
  if status in status_counts:
1460
1587
  status_counts[status] += 1
1461
1588
  job_types[job_type] = job_types.get(job_type, 0) + 1
1462
-
1463
- if job_type not in latest_by_type or started_at > latest_by_type[job_type].get('started_at', ''):
1589
+
1590
+ if job_type not in latest_by_type or started_at > latest_by_type[job_type].get(
1591
+ "started_at", ""
1592
+ ):
1464
1593
  latest_by_type[job_type] = job
1465
-
1594
+
1466
1595
  # Get scraper availability
1467
1596
  try:
1468
1597
  from . import scrapers
1598
+
1469
1599
  scraper_status = {
1470
- 'UK Parliament API': scrapers.UK_SCRAPER_AVAILABLE,
1471
- 'California NetFile': scrapers.CALIFORNIA_SCRAPER_AVAILABLE,
1472
- 'EU Member States': scrapers.EU_MEMBER_STATES_SCRAPER_AVAILABLE,
1473
- 'US States Ethics': scrapers.US_STATES_SCRAPER_AVAILABLE,
1600
+ "UK Parliament API": scrapers.UK_SCRAPER_AVAILABLE,
1601
+ "California NetFile": scrapers.CALIFORNIA_SCRAPER_AVAILABLE,
1602
+ "EU Member States": scrapers.EU_MEMBER_STATES_SCRAPER_AVAILABLE,
1603
+ "US States Ethics": scrapers.US_STATES_SCRAPER_AVAILABLE,
1474
1604
  }
1475
1605
  available_scrapers = sum(scraper_status.values())
1476
1606
  except:
1477
1607
  scraper_status = {}
1478
1608
  available_scrapers = 0
1479
-
1609
+
1480
1610
  monitor_data = {
1481
1611
  "system_health": {
1482
- "database_connection": quick_status.get('database_connection', 'unknown'),
1483
- "config_loaded": quick_status.get('config_loaded', 'unknown'),
1484
- "timestamp": quick_status.get('timestamp', datetime.now().isoformat())
1612
+ "database_connection": quick_status.get("database_connection", "unknown"),
1613
+ "config_loaded": quick_status.get("config_loaded", "unknown"),
1614
+ "timestamp": quick_status.get("timestamp", datetime.now().isoformat()),
1485
1615
  },
1486
1616
  "job_statistics": {
1487
1617
  "total_recent_jobs": len(recent_jobs),
1488
1618
  "status_counts": status_counts,
1489
- "job_types": job_types
1619
+ "job_types": job_types,
1490
1620
  },
1491
1621
  "latest_jobs": latest_by_type,
1492
1622
  "scraper_availability": {
1493
1623
  "available_count": available_scrapers,
1494
1624
  "total_count": len(scraper_status),
1495
- "scrapers": scraper_status
1496
- }
1625
+ "scrapers": scraper_status,
1626
+ },
1497
1627
  }
1498
-
1628
+
1499
1629
  return monitor_data
1500
-
1630
+
1501
1631
  except Exception as e:
1502
1632
  logger.error(f"Monitoring failed: {e}")
1503
1633
  return {"error": str(e)}
1504
-
1634
+
1505
1635
  monitor_data = asyncio.run(run_monitor())
1506
-
1636
+
1507
1637
  if output_json:
1508
1638
  console.print(JSON.from_data(monitor_data))
1509
1639
  else:
1510
1640
  console.print("๐Ÿ” SYSTEM MONITOR", style="bold cyan")
1511
-
1641
+
1512
1642
  # System Health
1513
- health = monitor_data.get('system_health', {})
1643
+ health = monitor_data.get("system_health", {})
1514
1644
  health_table = Table(title="System Health")
1515
1645
  health_table.add_column("Component", style="cyan")
1516
1646
  health_table.add_column("Status", style="white")
1517
-
1518
- db_status = health['database_connection']
1647
+
1648
+ db_status = health["database_connection"]
1519
1649
  db_color = "green" if db_status == "ok" else "red"
1520
1650
  health_table.add_row("Database", f"[{db_color}]{db_status.upper()}[/{db_color}]")
1521
-
1522
- config_status = health['config_loaded']
1651
+
1652
+ config_status = health["config_loaded"]
1523
1653
  config_color = "green" if config_status == "ok" else "red"
1524
- health_table.add_row("Configuration", f"[{config_color}]{config_status.upper()}[/{config_color}]")
1525
-
1654
+ health_table.add_row(
1655
+ "Configuration", f"[{config_color}]{config_status.upper()}[/{config_color}]"
1656
+ )
1657
+
1526
1658
  console.print(health_table)
1527
-
1659
+
1528
1660
  # Job Statistics
1529
- job_stats = monitor_data.get('job_statistics', {})
1530
- console.print(f"\n๐Ÿ“Š Job Statistics (Total: {job_stats.get('total_recent_jobs', 0)})", style="bold blue")
1531
-
1532
- status_counts = job_stats.get('status_counts', {})
1661
+ job_stats = monitor_data.get("job_statistics", {})
1662
+ console.print(
1663
+ f"\n๐Ÿ“Š Job Statistics (Total: {job_stats.get('total_recent_jobs', 0)})",
1664
+ style="bold blue",
1665
+ )
1666
+
1667
+ status_counts = job_stats.get("status_counts", {})
1533
1668
  for status, count in status_counts.items():
1534
1669
  if count > 0:
1535
- icon = {'completed': 'โœ…', 'running': '๐Ÿ”„', 'failed': 'โŒ', 'pending': 'โณ'}[status]
1670
+ icon = {"completed": "โœ…", "running": "๐Ÿ”„", "failed": "โŒ", "pending": "โณ"}[status]
1536
1671
  console.print(f"{icon} {status.title()}: {count}")
1537
-
1672
+
1538
1673
  # Latest Jobs by Type
1539
1674
  console.print(f"\n๐Ÿ“‹ Latest Jobs by Source", style="bold blue")
1540
- latest_jobs = monitor_data.get('latest_jobs', {})
1541
-
1675
+ latest_jobs = monitor_data.get("latest_jobs", {})
1676
+
1542
1677
  for job_type, job in sorted(latest_jobs.items()):
1543
- status = job.get('status', 'unknown')
1544
- icon = {'completed': 'โœ…', 'running': '๐Ÿ”„', 'failed': 'โŒ', 'pending': 'โณ'}.get(status, 'โ“')
1545
-
1546
- source_name = job_type.replace('_', ' ').title()
1678
+ status = job.get("status", "unknown")
1679
+ icon = {"completed": "โœ…", "running": "๐Ÿ”„", "failed": "โŒ", "pending": "โณ"}.get(
1680
+ status, "โ“"
1681
+ )
1682
+
1683
+ source_name = job_type.replace("_", " ").title()
1547
1684
  console.print(f"\n{icon} {source_name}")
1548
1685
  console.print(f" Status: {status}")
1549
1686
  console.print(f" Last run: {job.get('started_at', 'N/A')[:19]}")
1550
- console.print(f" Records: {job.get('records_processed', 0)} processed, {job.get('records_new', 0)} new")
1551
-
1687
+ console.print(
1688
+ f" Records: {job.get('records_processed', 0)} processed, {job.get('records_new', 0)} new"
1689
+ )
1690
+
1552
1691
  # Scraper Availability
1553
- scraper_info = monitor_data.get('scraper_availability', {})
1554
- available = scraper_info.get('available_count', 0)
1555
- total = scraper_info.get('total_count', 0)
1556
-
1692
+ scraper_info = monitor_data.get("scraper_availability", {})
1693
+ available = scraper_info.get("available_count", 0)
1694
+ total = scraper_info.get("total_count", 0)
1695
+
1557
1696
  console.print(f"\n๐ŸŒ Scraper Availability: {available}/{total}", style="bold blue")
1558
-
1559
- scrapers_status = scraper_info.get('scrapers', {})
1697
+
1698
+ scrapers_status = scraper_info.get("scrapers", {})
1560
1699
  for scraper_name, available in scrapers_status.items():
1561
- icon = 'โœ…' if available else 'โŒ'
1562
- status = 'Available' if available else 'Not Available'
1700
+ icon = "โœ…" if available else "โŒ"
1701
+ status = "Available" if available else "Not Available"
1563
1702
  console.print(f"{icon} {scraper_name}: {status}")
1564
1703
 
1565
1704
 
@@ -1569,143 +1708,155 @@ def monitor_system(output_json: bool):
1569
1708
  @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1570
1709
  def read_recent_data(limit: int, days: int, output_json: bool):
1571
1710
  """Read recent data from the database"""
1572
-
1711
+
1573
1712
  async def read_data():
1574
1713
  """Read recent data from database"""
1575
1714
  try:
1576
1715
  config = WorkflowConfig.default()
1577
1716
  db = PoliticianTradingDB(config)
1578
-
1717
+
1579
1718
  # Get job history
1580
1719
  job_status = await db.get_job_status()
1581
- jobs = job_status.get('recent_jobs', [])
1582
-
1720
+ jobs = job_status.get("recent_jobs", [])
1721
+
1583
1722
  # Analyze data freshness
1584
1723
  freshness = {}
1585
1724
  for job in jobs:
1586
- job_type = job.get('job_type', 'unknown')
1587
- if job.get('status') == 'completed':
1588
- completed_at = job.get('completed_at')
1589
- if job_type not in freshness or completed_at > freshness[job_type]['last_success']:
1725
+ job_type = job.get("job_type", "unknown")
1726
+ if job.get("status") == "completed":
1727
+ completed_at = job.get("completed_at")
1728
+ if (
1729
+ job_type not in freshness
1730
+ or completed_at > freshness[job_type]["last_success"]
1731
+ ):
1590
1732
  # Check if recent (within threshold)
1591
1733
  is_recent = False
1592
1734
  if completed_at:
1593
1735
  try:
1594
- timestamp = datetime.fromisoformat(completed_at.replace('Z', '+00:00'))
1595
- is_recent = (datetime.now() - timestamp.replace(tzinfo=None)) < timedelta(hours=24)
1736
+ timestamp = datetime.fromisoformat(
1737
+ completed_at.replace("Z", "+00:00")
1738
+ )
1739
+ is_recent = (
1740
+ datetime.now() - timestamp.replace(tzinfo=None)
1741
+ ) < timedelta(hours=24)
1596
1742
  except:
1597
1743
  pass
1598
-
1744
+
1599
1745
  freshness[job_type] = {
1600
- 'last_success': completed_at,
1601
- 'records_collected': job.get('records_new', 0),
1602
- 'status': 'fresh' if is_recent else 'stale'
1746
+ "last_success": completed_at,
1747
+ "records_collected": job.get("records_new", 0),
1748
+ "status": "fresh" if is_recent else "stale",
1603
1749
  }
1604
-
1750
+
1605
1751
  return {
1606
1752
  "recent_jobs": jobs[:limit],
1607
1753
  "data_freshness": freshness,
1608
1754
  "summary": {
1609
1755
  "total_jobs": len(jobs),
1610
- "job_types": len(set(job.get('job_type') for job in jobs)),
1611
- "fresh_sources": len([v for v in freshness.values() if v['status'] == 'fresh'])
1612
- }
1756
+ "job_types": len(set(job.get("job_type") for job in jobs)),
1757
+ "fresh_sources": len([v for v in freshness.values() if v["status"] == "fresh"]),
1758
+ },
1613
1759
  }
1614
-
1760
+
1615
1761
  except Exception as e:
1616
1762
  logger.error(f"Failed to read data: {e}")
1617
1763
  return {"error": str(e)}
1618
-
1764
+
1619
1765
  data = asyncio.run(read_data())
1620
-
1766
+
1621
1767
  if output_json:
1622
1768
  console.print(JSON.from_data(data))
1623
1769
  else:
1624
1770
  console.print("๐Ÿ“Š RECENT DATA SUMMARY", style="bold cyan")
1625
-
1771
+
1626
1772
  if "error" in data:
1627
1773
  console.print(f"โŒ Error: {data['error']}", style="red")
1628
1774
  return
1629
-
1775
+
1630
1776
  # Summary stats
1631
- summary = data.get('summary', {})
1777
+ summary = data.get("summary", {})
1632
1778
  console.print(f"\n๐Ÿ“ˆ Summary:", style="bold blue")
1633
1779
  console.print(f"Total recent jobs: {summary.get('total_jobs', 0)}")
1634
1780
  console.print(f"Active job types: {summary.get('job_types', 0)}")
1635
1781
  console.print(f"Fresh data sources: {summary.get('fresh_sources', 0)}")
1636
-
1782
+
1637
1783
  # Data freshness
1638
- freshness = data.get('data_freshness', {})
1784
+ freshness = data.get("data_freshness", {})
1639
1785
  if freshness:
1640
1786
  console.print(f"\n๐Ÿ• Data Freshness:", style="bold blue")
1641
1787
  for source, info in freshness.items():
1642
- status_icon = '๐ŸŸข' if info['status'] == 'fresh' else '๐ŸŸก'
1643
- source_name = source.replace('_', ' ').title()
1644
- last_success = info['last_success'][:19] if info['last_success'] else 'Never'
1788
+ status_icon = "๐ŸŸข" if info["status"] == "fresh" else "๐ŸŸก"
1789
+ source_name = source.replace("_", " ").title()
1790
+ last_success = info["last_success"][:19] if info["last_success"] else "Never"
1645
1791
  console.print(f"{status_icon} {source_name}: {last_success}")
1646
-
1792
+
1647
1793
  # Recent jobs
1648
- recent_jobs = data.get('recent_jobs', [])[:10] # Show top 10
1794
+ recent_jobs = data.get("recent_jobs", [])[:10] # Show top 10
1649
1795
  if recent_jobs:
1650
- console.print(f"\n๐Ÿ“‹ Recent Jobs (showing {len(recent_jobs)}):", style="bold blue")
1796
+ console.print(f"\n๐Ÿ“‹ Recent Jobs (showing {len(recent_jobs)}):", style="bold blue")
1651
1797
  for job in recent_jobs:
1652
- status_icon = {'completed': 'โœ…', 'running': '๐Ÿ”„', 'failed': 'โŒ', 'pending': 'โณ'}.get(job.get('status'), 'โ“')
1653
- job_type = job.get('job_type', 'unknown').replace('_', ' ').title()
1654
- started_at = job.get('started_at', 'N/A')[:19]
1798
+ status_icon = {
1799
+ "completed": "โœ…",
1800
+ "running": "๐Ÿ”„",
1801
+ "failed": "โŒ",
1802
+ "pending": "โณ",
1803
+ }.get(job.get("status"), "โ“")
1804
+ job_type = job.get("job_type", "unknown").replace("_", " ").title()
1805
+ started_at = job.get("started_at", "N/A")[:19]
1655
1806
  console.print(f"{status_icon} {job_type}: {started_at}")
1656
1807
 
1657
1808
 
1658
- @politician_trading_cli.command("config-real-data")
1809
+ @politician_trading_cli.command("config-real-data")
1659
1810
  @click.option("--enable", is_flag=True, help="Enable real data collection")
1660
1811
  @click.option("--restore", is_flag=True, help="Restore sample data mode")
1661
1812
  @click.option("--status", is_flag=True, help="Show current configuration status")
1662
1813
  def configure_real_data(enable: bool, restore: bool, status: bool):
1663
1814
  """Configure real vs sample data collection"""
1664
-
1815
+
1665
1816
  if status or not (enable or restore):
1666
1817
  # Show current status
1667
1818
  console.print("๐Ÿ”ง DATA COLLECTION CONFIGURATION", style="bold cyan")
1668
-
1819
+
1669
1820
  console.print("\n๐Ÿ“‹ Current Status:", style="bold blue")
1670
1821
  console.print("โ€ข Sample data mode: Currently DISABLED", style="green")
1671
1822
  console.print("โ€ข Real API calls: Currently ACTIVE", style="green")
1672
1823
  console.print("โ€ข Database writes: Currently WORKING", style="green")
1673
-
1824
+
1674
1825
  console.print("\n๐ŸŽฏ Data Source Readiness:", style="bold blue")
1675
1826
  readiness_info = [
1676
1827
  ("UK Parliament API", "โœ… Active - Real API with full transaction data", "green"),
1677
- ("US House/Senate", "โœ… Active - Real disclosure database access", "green"),
1828
+ ("US House/Senate", "โœ… Active - Real disclosure database access", "green"),
1678
1829
  ("EU Parliament", "โœ… Active - Real MEP profile scraping", "green"),
1679
1830
  ("California NetFile", "โš ๏ธ Limited - Complex forms require careful handling", "yellow"),
1680
- ("EU Member States", "โš ๏ธ Limited - Country-specific implementations needed", "yellow")
1831
+ ("EU Member States", "โš ๏ธ Limited - Country-specific implementations needed", "yellow"),
1681
1832
  ]
1682
-
1833
+
1683
1834
  for source, info, color in readiness_info:
1684
1835
  console.print(f"{info}", style=color)
1685
-
1836
+
1686
1837
  console.print("\n๐Ÿ’ก Commands:", style="bold blue")
1687
1838
  console.print("mcli politician-trading config-real-data --enable # Enable real data")
1688
1839
  console.print("mcli politician-trading config-real-data --restore # Restore sample mode")
1689
-
1840
+
1690
1841
  return
1691
-
1842
+
1692
1843
  # Get scraper files
1693
1844
  src_dir = Path(__file__).parent
1694
1845
  scraper_files = [
1695
1846
  "scrapers_uk.py",
1696
1847
  "scrapers_california.py",
1697
- "scrapers_eu.py",
1698
- "scrapers_us_states.py"
1848
+ "scrapers_eu.py",
1849
+ "scrapers_us_states.py",
1699
1850
  ]
1700
-
1851
+
1701
1852
  if restore:
1702
1853
  console.print("๐Ÿ”„ RESTORING SAMPLE DATA MODE", style="bold yellow")
1703
-
1854
+
1704
1855
  restored = 0
1705
1856
  for file_name in scraper_files:
1706
1857
  file_path = src_dir / file_name
1707
1858
  backup_path = Path(str(file_path) + ".backup")
1708
-
1859
+
1709
1860
  if backup_path.exists():
1710
1861
  # Restore from backup
1711
1862
  try:
@@ -1717,70 +1868,68 @@ def configure_real_data(enable: bool, restore: bool, status: bool):
1717
1868
  console.print(f"โŒ Failed to restore {file_name}: {e}", style="red")
1718
1869
  else:
1719
1870
  console.print(f"โ„น๏ธ No backup found for {file_name}", style="dim")
1720
-
1871
+
1721
1872
  console.print(f"\n๐ŸŽฏ Restored {restored} files to sample mode", style="green")
1722
-
1873
+
1723
1874
  elif enable:
1724
1875
  console.print("๐Ÿš€ ENABLING REAL DATA COLLECTION", style="bold green")
1725
-
1876
+
1726
1877
  with Progress(
1727
- SpinnerColumn(),
1728
- TextColumn("[progress.description]{task.description}"),
1729
- console=console
1878
+ SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console
1730
1879
  ) as progress:
1731
1880
  task = progress.add_task("Configuring scrapers...", total=len(scraper_files))
1732
-
1881
+
1733
1882
  modifications_made = 0
1734
-
1883
+
1735
1884
  for file_name in scraper_files:
1736
1885
  progress.update(task, description=f"Processing {file_name}...")
1737
-
1886
+
1738
1887
  file_path = src_dir / file_name
1739
-
1888
+
1740
1889
  if not file_path.exists():
1741
1890
  progress.advance(task)
1742
1891
  continue
1743
-
1892
+
1744
1893
  try:
1745
1894
  # Read file content
1746
1895
  content = file_path.read_text()
1747
1896
  original_content = content
1748
-
1897
+
1749
1898
  # Remove sample flags
1750
1899
  content = re.sub(r'"sample":\s*True', '"sample": False', content)
1751
1900
  content = re.sub(r"'sample':\s*True", "'sample': False", content)
1752
-
1901
+
1753
1902
  # Enable actual processing
1754
1903
  content = re.sub(
1755
- r'# This would implement actual (.+?) scraping',
1904
+ r"# This would implement actual (.+?) scraping",
1756
1905
  r'logger.info("Processing real \1 data")',
1757
- content
1906
+ content,
1758
1907
  )
1759
-
1908
+
1760
1909
  if content != original_content:
1761
1910
  # Backup original
1762
1911
  backup_path = str(file_path) + ".backup"
1763
1912
  Path(backup_path).write_text(original_content)
1764
-
1913
+
1765
1914
  # Write modified content
1766
1915
  file_path.write_text(content)
1767
1916
  modifications_made += 1
1768
-
1917
+
1769
1918
  except Exception as e:
1770
1919
  console.print(f"โŒ Error processing {file_name}: {e}", style="red")
1771
-
1920
+
1772
1921
  progress.advance(task)
1773
-
1922
+
1774
1923
  console.print(f"\nโœ… Real data configuration complete!", style="bold green")
1775
1924
  console.print(f"Modified {modifications_made} scraper files", style="green")
1776
-
1925
+
1777
1926
  if modifications_made > 0:
1778
1927
  console.print(f"\nโš ๏ธ Important Next Steps:", style="bold yellow")
1779
1928
  console.print("1. Test with UK Parliament first (most reliable)", style="dim")
1780
1929
  console.print("2. Monitor API rate limits carefully", style="dim")
1781
1930
  console.print("3. Check logs for parsing errors", style="dim")
1782
1931
  console.print("4. Use --restore flag if issues occur", style="dim")
1783
-
1932
+
1784
1933
  console.print(f"\n๐Ÿงช Test Commands:", style="bold blue")
1785
1934
  console.print("mcli politician-trading cron run --type quick # Quick test")
1786
1935
  console.print("mcli politician-trading monitor # Check results")