mcli-framework 7.1.0__py3-none-any.whl โ 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +46 -13
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +69 -58
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +283 -152
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +235 -0
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +38 -18
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.0.dist-info โ mcli_framework-7.1.2.dist-info}/METADATA +2 -2
- {mcli_framework-7.1.0.dist-info โ mcli_framework-7.1.2.dist-info}/RECORD +94 -93
- {mcli_framework-7.1.0.dist-info โ mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.0.dist-info โ mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.0.dist-info โ mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.0.dist-info โ mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
|
@@ -8,25 +8,30 @@ import os
|
|
|
8
8
|
import re
|
|
9
9
|
from datetime import datetime, timedelta
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import
|
|
11
|
+
from typing import Any, Dict, List
|
|
12
12
|
|
|
13
13
|
import click
|
|
14
14
|
from rich.console import Console
|
|
15
|
-
from rich.table import Table
|
|
16
|
-
from rich.panel import Panel
|
|
17
15
|
from rich.json import JSON
|
|
16
|
+
from rich.panel import Panel
|
|
18
17
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
18
|
+
from rich.table import Table
|
|
19
19
|
|
|
20
20
|
from mcli.lib.logger.logger import get_logger
|
|
21
|
+
|
|
22
|
+
from .config import WorkflowConfig
|
|
23
|
+
from .connectivity import (
|
|
24
|
+
SupabaseConnectivityValidator,
|
|
25
|
+
run_connectivity_validation,
|
|
26
|
+
run_continuous_monitoring,
|
|
27
|
+
)
|
|
28
|
+
from .database import PoliticianTradingDB
|
|
29
|
+
from .monitoring import PoliticianTradingMonitor, run_health_check, run_stats_report
|
|
21
30
|
from .workflow import (
|
|
22
31
|
PoliticianTradingWorkflow,
|
|
23
|
-
run_politician_trading_collection,
|
|
24
32
|
check_politician_trading_status,
|
|
33
|
+
run_politician_trading_collection,
|
|
25
34
|
)
|
|
26
|
-
from .config import WorkflowConfig
|
|
27
|
-
from .database import PoliticianTradingDB
|
|
28
|
-
from .monitoring import PoliticianTradingMonitor, run_health_check, run_stats_report
|
|
29
|
-
from .connectivity import SupabaseConnectivityValidator, run_connectivity_validation, run_continuous_monitoring
|
|
30
35
|
|
|
31
36
|
logger = get_logger(__name__)
|
|
32
37
|
console = Console()
|
|
@@ -200,25 +205,27 @@ Timeout: {config.scraping.timeout}s"""
|
|
|
200
205
|
|
|
201
206
|
if generate_schema:
|
|
202
207
|
console.print("๐ Generating database schema files...", style="blue")
|
|
203
|
-
|
|
208
|
+
|
|
204
209
|
# Generate schema file
|
|
205
210
|
import os
|
|
206
211
|
from pathlib import Path
|
|
207
|
-
|
|
212
|
+
|
|
208
213
|
output_path = Path(output_dir)
|
|
209
214
|
output_path.mkdir(exist_ok=True)
|
|
210
|
-
|
|
215
|
+
|
|
211
216
|
# Read the schema SQL from the module
|
|
212
217
|
schema_file = Path(__file__).parent / "schema.sql"
|
|
213
218
|
if schema_file.exists():
|
|
214
219
|
schema_content = schema_file.read_text()
|
|
215
|
-
|
|
220
|
+
|
|
216
221
|
# Write to output directory
|
|
217
222
|
output_schema_file = output_path / "politician_trading_schema.sql"
|
|
218
223
|
output_schema_file.write_text(schema_content)
|
|
219
|
-
|
|
220
|
-
console.print(
|
|
221
|
-
|
|
224
|
+
|
|
225
|
+
console.print(
|
|
226
|
+
f"โ
Schema SQL generated: {output_schema_file.absolute()}", style="green"
|
|
227
|
+
)
|
|
228
|
+
|
|
222
229
|
# Also generate a setup instructions file
|
|
223
230
|
instructions = f"""# Politician Trading Database Setup Instructions
|
|
224
231
|
|
|
@@ -269,24 +276,27 @@ If you encounter issues:
|
|
|
269
276
|
2. View logs: `politician-trading health`
|
|
270
277
|
3. Test workflow: `politician-trading test-workflow --verbose`
|
|
271
278
|
"""
|
|
272
|
-
|
|
279
|
+
|
|
273
280
|
instructions_file = output_path / "SETUP_INSTRUCTIONS.md"
|
|
274
281
|
instructions_file.write_text(instructions)
|
|
275
|
-
|
|
276
|
-
console.print(
|
|
277
|
-
|
|
282
|
+
|
|
283
|
+
console.print(
|
|
284
|
+
f"โ
Setup instructions generated: {instructions_file.absolute()}",
|
|
285
|
+
style="green",
|
|
286
|
+
)
|
|
287
|
+
|
|
278
288
|
# Display summary
|
|
279
289
|
console.print("\n๐ Generated Files:", style="bold")
|
|
280
290
|
console.print(f" ๐ Schema SQL: {output_schema_file.name}")
|
|
281
291
|
console.print(f" ๐ Instructions: {instructions_file.name}")
|
|
282
292
|
console.print(f" ๐ Location: {output_path.absolute()}")
|
|
283
|
-
|
|
293
|
+
|
|
284
294
|
console.print("\n๐ Next Steps:", style="bold green")
|
|
285
295
|
console.print("1. Open Supabase SQL editor")
|
|
286
296
|
console.print(f"2. Execute SQL from: {output_schema_file.name}")
|
|
287
297
|
console.print("3. Run: politician-trading setup --verify")
|
|
288
298
|
console.print("4. Run: politician-trading test-workflow --verbose")
|
|
289
|
-
|
|
299
|
+
|
|
290
300
|
else:
|
|
291
301
|
console.print("โ Schema template not found", style="red")
|
|
292
302
|
|
|
@@ -449,13 +459,13 @@ def check_connectivity(output_json: bool, continuous: bool, interval: int, durat
|
|
|
449
459
|
else:
|
|
450
460
|
try:
|
|
451
461
|
validation_result = asyncio.run(run_connectivity_validation())
|
|
452
|
-
|
|
462
|
+
|
|
453
463
|
if output_json:
|
|
454
464
|
console.print(JSON.from_data(validation_result))
|
|
455
465
|
else:
|
|
456
466
|
validator = SupabaseConnectivityValidator()
|
|
457
467
|
validator.display_connectivity_report(validation_result)
|
|
458
|
-
|
|
468
|
+
|
|
459
469
|
except Exception as e:
|
|
460
470
|
console.print(f"โ Connectivity validation failed: {e}", style="bold red")
|
|
461
471
|
logger.error(f"Connectivity validation failed: {e}")
|
|
@@ -467,94 +477,109 @@ def check_connectivity(output_json: bool, continuous: bool, interval: int, durat
|
|
|
467
477
|
def test_full_workflow(verbose: bool, validate_writes: bool):
|
|
468
478
|
"""Run a complete workflow test with live Supabase connectivity"""
|
|
469
479
|
console.print("๐งช Running Full Politician Trading Workflow Test", style="bold green")
|
|
470
|
-
|
|
480
|
+
|
|
471
481
|
async def run_test():
|
|
472
482
|
# First validate connectivity
|
|
473
483
|
console.print("\n๐ Step 1: Validating Supabase connectivity...", style="blue")
|
|
474
484
|
validator = SupabaseConnectivityValidator()
|
|
475
485
|
connectivity_result = await validator.validate_connectivity()
|
|
476
|
-
|
|
486
|
+
|
|
477
487
|
if verbose:
|
|
478
488
|
validator.display_connectivity_report(connectivity_result)
|
|
479
489
|
else:
|
|
480
|
-
console.print(
|
|
481
|
-
|
|
482
|
-
|
|
490
|
+
console.print(
|
|
491
|
+
f"Connectivity Score: {connectivity_result['connectivity_score']}%", style="cyan"
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
if connectivity_result["connectivity_score"] < 75:
|
|
483
495
|
console.print("โ ๏ธ Connectivity issues detected. Workflow may fail.", style="yellow")
|
|
484
|
-
|
|
496
|
+
|
|
485
497
|
# Run the workflow
|
|
486
498
|
console.print("\n๐๏ธ Step 2: Running politician trading collection workflow...", style="blue")
|
|
487
|
-
|
|
499
|
+
|
|
488
500
|
try:
|
|
489
501
|
with console.status("[bold blue]Executing workflow...") as status:
|
|
490
502
|
workflow_result = await run_politician_trading_collection()
|
|
491
|
-
|
|
503
|
+
|
|
492
504
|
# Display workflow results
|
|
493
505
|
console.print("\n๐ Workflow Results:", style="bold")
|
|
494
|
-
|
|
506
|
+
|
|
495
507
|
if workflow_result.get("status") == "completed":
|
|
496
508
|
console.print("โ
Workflow completed successfully!", style="green")
|
|
497
|
-
|
|
509
|
+
|
|
498
510
|
summary = workflow_result.get("summary", {})
|
|
499
511
|
console.print(f"New Disclosures: {summary.get('total_new_disclosures', 0)}")
|
|
500
512
|
console.print(f"Updated Disclosures: {summary.get('total_updated_disclosures', 0)}")
|
|
501
513
|
console.print(f"Errors: {len(summary.get('errors', []))}")
|
|
502
|
-
|
|
514
|
+
|
|
503
515
|
if verbose and summary.get("errors"):
|
|
504
516
|
console.print("\nErrors encountered:", style="red")
|
|
505
517
|
for error in summary["errors"][:5]: # Show first 5 errors
|
|
506
518
|
console.print(f" โข {error}", style="dim red")
|
|
507
|
-
|
|
519
|
+
|
|
508
520
|
else:
|
|
509
521
|
console.print("โ Workflow failed!", style="red")
|
|
510
522
|
if "error" in workflow_result:
|
|
511
523
|
console.print(f"Error: {workflow_result['error']}", style="red")
|
|
512
|
-
|
|
524
|
+
|
|
513
525
|
# Validate writes if requested
|
|
514
526
|
if validate_writes:
|
|
515
527
|
console.print("\n๐ Step 3: Validating database writes...", style="blue")
|
|
516
528
|
write_validation = await validator._test_write_operations()
|
|
517
|
-
|
|
529
|
+
|
|
518
530
|
if write_validation["success"]:
|
|
519
531
|
console.print("โ
Database writes validated successfully", style="green")
|
|
520
532
|
else:
|
|
521
|
-
console.print(
|
|
522
|
-
|
|
533
|
+
console.print(
|
|
534
|
+
f"โ Database write validation failed: {write_validation.get('error', 'Unknown error')}",
|
|
535
|
+
style="red",
|
|
536
|
+
)
|
|
537
|
+
|
|
523
538
|
# Final connectivity check
|
|
524
539
|
console.print("\n๐ Step 4: Post-workflow connectivity check...", style="blue")
|
|
525
540
|
final_connectivity = await validator.validate_connectivity()
|
|
526
|
-
|
|
527
|
-
console.print(
|
|
528
|
-
|
|
541
|
+
|
|
542
|
+
console.print(
|
|
543
|
+
f"Final Connectivity Score: {final_connectivity['connectivity_score']}%",
|
|
544
|
+
style="cyan",
|
|
545
|
+
)
|
|
546
|
+
|
|
529
547
|
# Summary
|
|
530
548
|
console.print("\n๐ Test Summary:", style="bold")
|
|
531
|
-
workflow_status =
|
|
532
|
-
|
|
533
|
-
|
|
549
|
+
workflow_status = (
|
|
550
|
+
"โ
PASSED" if workflow_result.get("status") == "completed" else "โ FAILED"
|
|
551
|
+
)
|
|
552
|
+
connectivity_status = (
|
|
553
|
+
"โ
GOOD" if final_connectivity["connectivity_score"] >= 75 else "โ ๏ธ DEGRADED"
|
|
554
|
+
)
|
|
555
|
+
|
|
534
556
|
console.print(f"Workflow: {workflow_status}")
|
|
535
557
|
console.print(f"Connectivity: {connectivity_status}")
|
|
536
|
-
console.print(
|
|
537
|
-
|
|
558
|
+
console.print(
|
|
559
|
+
f"Duration: {workflow_result.get('started_at', '')} to {workflow_result.get('completed_at', '')}"
|
|
560
|
+
)
|
|
561
|
+
|
|
538
562
|
return {
|
|
539
563
|
"workflow_result": workflow_result,
|
|
540
564
|
"connectivity_result": final_connectivity,
|
|
541
|
-
"test_passed": workflow_result.get("status") == "completed"
|
|
565
|
+
"test_passed": workflow_result.get("status") == "completed"
|
|
566
|
+
and final_connectivity["connectivity_score"] >= 75,
|
|
542
567
|
}
|
|
543
|
-
|
|
568
|
+
|
|
544
569
|
except Exception as e:
|
|
545
570
|
console.print(f"โ Workflow test failed: {e}", style="bold red")
|
|
546
571
|
if verbose:
|
|
547
572
|
console.print_exception()
|
|
548
573
|
return {"error": str(e), "test_passed": False}
|
|
549
|
-
|
|
574
|
+
|
|
550
575
|
try:
|
|
551
576
|
test_result = asyncio.run(run_test())
|
|
552
|
-
|
|
577
|
+
|
|
553
578
|
if test_result.get("test_passed"):
|
|
554
579
|
console.print("\n๐ Full workflow test PASSED!", style="bold green")
|
|
555
580
|
else:
|
|
556
581
|
console.print("\nโ Full workflow test FAILED!", style="bold red")
|
|
557
|
-
|
|
582
|
+
|
|
558
583
|
except Exception as e:
|
|
559
584
|
console.print(f"โ Test execution failed: {e}", style="bold red")
|
|
560
585
|
logger.error(f"Test workflow command failed: {e}")
|
|
@@ -566,83 +591,114 @@ def test_full_workflow(verbose: bool, validate_writes: bool):
|
|
|
566
591
|
@click.option("--output-dir", default=".", help="Output directory for generated files")
|
|
567
592
|
def manage_schema(show_location: bool, generate: bool, output_dir: str):
|
|
568
593
|
"""Manage database schema files"""
|
|
569
|
-
|
|
594
|
+
|
|
570
595
|
if show_location:
|
|
571
596
|
console.print("๐ Schema File Locations", style="bold blue")
|
|
572
|
-
|
|
597
|
+
|
|
573
598
|
from pathlib import Path
|
|
599
|
+
|
|
574
600
|
schema_file = Path(__file__).parent / "schema.sql"
|
|
575
|
-
|
|
601
|
+
|
|
576
602
|
console.print(f"Built-in Schema: {schema_file.absolute()}", style="cyan")
|
|
577
603
|
console.print(f"File size: {schema_file.stat().st_size} bytes", style="dim")
|
|
578
|
-
console.print(
|
|
579
|
-
|
|
604
|
+
console.print(
|
|
605
|
+
f"Exists: {'โ
Yes' if schema_file.exists() else 'โ No'}",
|
|
606
|
+
style="green" if schema_file.exists() else "red",
|
|
607
|
+
)
|
|
608
|
+
|
|
580
609
|
# Show current working directory option
|
|
581
610
|
cwd_schema = Path.cwd() / "politician_trading_schema.sql"
|
|
582
611
|
console.print(f"\nCurrent directory: {cwd_schema.absolute()}", style="cyan")
|
|
583
|
-
console.print(
|
|
584
|
-
|
|
612
|
+
console.print(
|
|
613
|
+
f"Exists: {'โ
Yes' if cwd_schema.exists() else 'โ No'}",
|
|
614
|
+
style="green" if cwd_schema.exists() else "dim",
|
|
615
|
+
)
|
|
616
|
+
|
|
585
617
|
if not cwd_schema.exists():
|
|
586
618
|
console.print("\n๐ก To generate schema file here:", style="blue")
|
|
587
619
|
console.print("politician-trading schema --generate", style="yellow")
|
|
588
|
-
|
|
620
|
+
|
|
589
621
|
elif generate:
|
|
590
622
|
# Reuse the setup command logic
|
|
591
623
|
try:
|
|
592
|
-
from pathlib import Path
|
|
593
624
|
import os
|
|
594
|
-
|
|
625
|
+
from pathlib import Path
|
|
626
|
+
|
|
595
627
|
console.print("๐ Generating database schema files...", style="blue")
|
|
596
|
-
|
|
628
|
+
|
|
597
629
|
output_path = Path(output_dir)
|
|
598
630
|
output_path.mkdir(exist_ok=True)
|
|
599
|
-
|
|
631
|
+
|
|
600
632
|
# Read the schema SQL from the module
|
|
601
633
|
schema_file = Path(__file__).parent / "schema.sql"
|
|
602
634
|
if schema_file.exists():
|
|
603
635
|
schema_content = schema_file.read_text()
|
|
604
|
-
|
|
636
|
+
|
|
605
637
|
# Write to output directory
|
|
606
638
|
output_schema_file = output_path / "politician_trading_schema.sql"
|
|
607
639
|
output_schema_file.write_text(schema_content)
|
|
608
|
-
|
|
609
|
-
console.print(
|
|
610
|
-
|
|
640
|
+
|
|
641
|
+
console.print(
|
|
642
|
+
f"โ
Schema SQL generated: {output_schema_file.absolute()}", style="green"
|
|
643
|
+
)
|
|
644
|
+
|
|
611
645
|
# Show file info
|
|
612
646
|
console.print(f"๐ File size: {output_schema_file.stat().st_size:,} bytes")
|
|
613
647
|
console.print(f"๐
Created: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
614
|
-
|
|
648
|
+
|
|
615
649
|
# Count SQL statements
|
|
616
|
-
statements = len(
|
|
650
|
+
statements = len(
|
|
651
|
+
[
|
|
652
|
+
line
|
|
653
|
+
for line in schema_content.split("\n")
|
|
654
|
+
if line.strip().startswith(("CREATE", "INSERT", "SELECT"))
|
|
655
|
+
]
|
|
656
|
+
)
|
|
617
657
|
console.print(f"๐ SQL statements: {statements}")
|
|
618
|
-
|
|
658
|
+
|
|
619
659
|
else:
|
|
620
660
|
console.print("โ Schema template not found", style="red")
|
|
621
|
-
|
|
661
|
+
|
|
622
662
|
except Exception as e:
|
|
623
663
|
console.print(f"โ Schema generation failed: {e}", style="red")
|
|
624
|
-
|
|
664
|
+
|
|
625
665
|
else:
|
|
626
666
|
# Show schema information by default
|
|
627
667
|
console.print("๐๏ธ Politician Trading Database Schema", style="bold blue")
|
|
628
|
-
|
|
668
|
+
|
|
629
669
|
schema_info = [
|
|
630
|
-
(
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
670
|
+
(
|
|
671
|
+
"politicians",
|
|
672
|
+
"Stores politician information",
|
|
673
|
+
"UUID primary key, bioguide_id, role, party",
|
|
674
|
+
),
|
|
675
|
+
(
|
|
676
|
+
"trading_disclosures",
|
|
677
|
+
"Individual trading transactions",
|
|
678
|
+
"References politicians, amount ranges, asset details",
|
|
679
|
+
),
|
|
680
|
+
(
|
|
681
|
+
"data_pull_jobs",
|
|
682
|
+
"Job execution tracking",
|
|
683
|
+
"Status, timing, record counts, error details",
|
|
684
|
+
),
|
|
685
|
+
(
|
|
686
|
+
"data_sources",
|
|
687
|
+
"Data source configuration",
|
|
688
|
+
"URLs, regions, health status, request config",
|
|
689
|
+
),
|
|
634
690
|
]
|
|
635
|
-
|
|
691
|
+
|
|
636
692
|
schema_table = Table(title="Database Tables")
|
|
637
693
|
schema_table.add_column("Table", style="cyan")
|
|
638
694
|
schema_table.add_column("Purpose", style="white")
|
|
639
695
|
schema_table.add_column("Key Features", style="yellow")
|
|
640
|
-
|
|
696
|
+
|
|
641
697
|
for table_name, purpose, features in schema_info:
|
|
642
698
|
schema_table.add_row(table_name, purpose, features)
|
|
643
|
-
|
|
699
|
+
|
|
644
700
|
console.print(schema_table)
|
|
645
|
-
|
|
701
|
+
|
|
646
702
|
console.print("\n๐ Commands:", style="bold")
|
|
647
703
|
console.print(" --show-location Show where schema files are located")
|
|
648
704
|
console.print(" --generate Generate schema SQL file")
|
|
@@ -689,11 +745,11 @@ def _format_timestamp(timestamp: str) -> str:
|
|
|
689
745
|
|
|
690
746
|
def _format_asset_display(disclosure: Dict[str, Any]) -> str:
|
|
691
747
|
"""Format asset display with proper ticker/name handling"""
|
|
692
|
-
asset_name = disclosure.get(
|
|
693
|
-
asset_ticker = disclosure.get(
|
|
694
|
-
|
|
748
|
+
asset_name = disclosure.get("asset_name", "Unknown Asset")
|
|
749
|
+
asset_ticker = disclosure.get("asset_ticker")
|
|
750
|
+
|
|
695
751
|
# If we have both ticker and name, show ticker first
|
|
696
|
-
if asset_ticker and asset_ticker.strip() and asset_ticker.lower() !=
|
|
752
|
+
if asset_ticker and asset_ticker.strip() and asset_ticker.lower() != "none":
|
|
697
753
|
return f"{asset_ticker} - {asset_name[:15]}"
|
|
698
754
|
# If we only have asset name, show just that
|
|
699
755
|
elif asset_name and asset_name.strip():
|
|
@@ -708,27 +764,27 @@ def _format_asset_display(disclosure: Dict[str, Any]) -> str:
|
|
|
708
764
|
def view_data_sources(output_json: bool):
|
|
709
765
|
"""View current data sources and their configurations"""
|
|
710
766
|
console = Console()
|
|
711
|
-
|
|
767
|
+
|
|
712
768
|
try:
|
|
713
769
|
from .config import WorkflowConfig
|
|
714
|
-
from .data_sources import ALL_DATA_SOURCES, TOTAL_SOURCES
|
|
715
|
-
|
|
770
|
+
from .data_sources import ACTIVE_SOURCES, ALL_DATA_SOURCES, TOTAL_SOURCES
|
|
771
|
+
|
|
716
772
|
config = WorkflowConfig.default()
|
|
717
773
|
active_sources = config.scraping.get_active_sources()
|
|
718
|
-
|
|
774
|
+
|
|
719
775
|
# Group sources by category for display
|
|
720
776
|
data_sources = {}
|
|
721
|
-
|
|
777
|
+
|
|
722
778
|
for category, sources in ALL_DATA_SOURCES.items():
|
|
723
779
|
active_category_sources = [s for s in sources if s.status == "active"]
|
|
724
780
|
if active_category_sources:
|
|
725
781
|
data_sources[category] = {
|
|
726
782
|
"name": {
|
|
727
783
|
"us_federal": "US Federal Government",
|
|
728
|
-
"us_states": "US State Governments",
|
|
784
|
+
"us_states": "US State Governments",
|
|
729
785
|
"eu_parliament": "EU Parliament",
|
|
730
786
|
"eu_national": "EU National Parliaments",
|
|
731
|
-
"third_party": "Third-Party Aggregators"
|
|
787
|
+
"third_party": "Third-Party Aggregators",
|
|
732
788
|
}[category],
|
|
733
789
|
"sources": active_category_sources,
|
|
734
790
|
"count": len(active_category_sources),
|
|
@@ -736,12 +792,12 @@ def view_data_sources(output_json: bool):
|
|
|
736
792
|
"description": {
|
|
737
793
|
"us_federal": "Congressional and federal official financial disclosures",
|
|
738
794
|
"us_states": "State legislature financial disclosure databases",
|
|
739
|
-
"eu_parliament": "MEP financial interest and income declarations",
|
|
795
|
+
"eu_parliament": "MEP financial interest and income declarations",
|
|
740
796
|
"eu_national": "National parliament financial disclosure systems",
|
|
741
|
-
"third_party": "Commercial aggregators and enhanced analysis platforms"
|
|
742
|
-
}[category]
|
|
797
|
+
"third_party": "Commercial aggregators and enhanced analysis platforms",
|
|
798
|
+
}[category],
|
|
743
799
|
}
|
|
744
|
-
|
|
800
|
+
|
|
745
801
|
if output_json:
|
|
746
802
|
# For JSON output, convert DataSource objects to dictionaries
|
|
747
803
|
json_output = {}
|
|
@@ -762,19 +818,24 @@ def view_data_sources(output_json: bool):
|
|
|
762
818
|
"update_frequency": source.update_frequency,
|
|
763
819
|
"threshold_amount": source.threshold_amount,
|
|
764
820
|
"data_format": source.data_format,
|
|
765
|
-
"notes": source.notes
|
|
821
|
+
"notes": source.notes,
|
|
766
822
|
}
|
|
767
823
|
for source in info["sources"]
|
|
768
|
-
]
|
|
824
|
+
],
|
|
769
825
|
}
|
|
770
826
|
console.print(JSON.from_data(json_output))
|
|
771
827
|
else:
|
|
772
|
-
console.print(
|
|
773
|
-
|
|
828
|
+
console.print(
|
|
829
|
+
f"๐ Comprehensive Political Trading Data Sources ({ACTIVE_SOURCES} active of {TOTAL_SOURCES} total)",
|
|
830
|
+
style="bold cyan",
|
|
831
|
+
)
|
|
832
|
+
|
|
774
833
|
for category_id, source_info in data_sources.items():
|
|
775
|
-
console.print(
|
|
834
|
+
console.print(
|
|
835
|
+
f"\n[bold blue]{source_info['name']}[/bold blue] ({source_info['count']} sources)"
|
|
836
|
+
)
|
|
776
837
|
console.print(f" {source_info['description']}", style="dim")
|
|
777
|
-
|
|
838
|
+
|
|
778
839
|
# Create table for this category's sources
|
|
779
840
|
table = Table()
|
|
780
841
|
table.add_column("Source", style="cyan")
|
|
@@ -782,32 +843,32 @@ def view_data_sources(output_json: bool):
|
|
|
782
843
|
table.add_column("Access", style="yellow")
|
|
783
844
|
table.add_column("Disclosure Types", style="magenta")
|
|
784
845
|
table.add_column("Threshold", style="blue")
|
|
785
|
-
|
|
846
|
+
|
|
786
847
|
for source in source_info["sources"]:
|
|
787
848
|
# Format disclosure types
|
|
788
|
-
types_display = ", ".join(
|
|
789
|
-
dt.value.replace("_", " ").title()
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
849
|
+
types_display = ", ".join(
|
|
850
|
+
[dt.value.replace("_", " ").title() for dt in source.disclosure_types]
|
|
851
|
+
)
|
|
852
|
+
|
|
793
853
|
# Format threshold
|
|
794
854
|
threshold_display = (
|
|
795
|
-
f"${source.threshold_amount:,}" if source.threshold_amount
|
|
796
|
-
else "None"
|
|
855
|
+
f"${source.threshold_amount:,}" if source.threshold_amount else "None"
|
|
797
856
|
)
|
|
798
|
-
|
|
857
|
+
|
|
799
858
|
table.add_row(
|
|
800
859
|
source.name,
|
|
801
860
|
source.jurisdiction,
|
|
802
861
|
source.access_method.value.replace("_", " ").title(),
|
|
803
862
|
types_display[:30] + ("..." if len(types_display) > 30 else ""),
|
|
804
|
-
threshold_display
|
|
863
|
+
threshold_display,
|
|
805
864
|
)
|
|
806
|
-
|
|
865
|
+
|
|
807
866
|
console.print(table)
|
|
808
|
-
|
|
809
|
-
console.print(
|
|
810
|
-
|
|
867
|
+
|
|
868
|
+
console.print(
|
|
869
|
+
f"\n[dim]Total: {ACTIVE_SOURCES} active sources across {len(data_sources)} categories[/dim]"
|
|
870
|
+
)
|
|
871
|
+
|
|
811
872
|
except Exception as e:
|
|
812
873
|
if output_json:
|
|
813
874
|
console.print(JSON.from_data({"error": str(e)}))
|
|
@@ -821,15 +882,16 @@ def view_data_sources(output_json: bool):
|
|
|
821
882
|
def view_jobs(output_json: bool, limit: int):
|
|
822
883
|
"""View current and recent data collection jobs"""
|
|
823
884
|
console = Console()
|
|
824
|
-
|
|
885
|
+
|
|
825
886
|
try:
|
|
887
|
+
|
|
826
888
|
async def get_jobs():
|
|
827
|
-
from .database import PoliticianTradingDB
|
|
828
889
|
from .config import WorkflowConfig
|
|
829
|
-
|
|
890
|
+
from .database import PoliticianTradingDB
|
|
891
|
+
|
|
830
892
|
config = WorkflowConfig.default()
|
|
831
893
|
db = PoliticianTradingDB(config)
|
|
832
|
-
|
|
894
|
+
|
|
833
895
|
# Get recent jobs
|
|
834
896
|
jobs_result = (
|
|
835
897
|
db.client.table("data_pull_jobs")
|
|
@@ -838,20 +900,20 @@ def view_jobs(output_json: bool, limit: int):
|
|
|
838
900
|
.limit(limit)
|
|
839
901
|
.execute()
|
|
840
902
|
)
|
|
841
|
-
|
|
903
|
+
|
|
842
904
|
return jobs_result.data if jobs_result.data else []
|
|
843
|
-
|
|
905
|
+
|
|
844
906
|
jobs = asyncio.run(get_jobs())
|
|
845
|
-
|
|
907
|
+
|
|
846
908
|
if output_json:
|
|
847
909
|
console.print(JSON.from_data(jobs))
|
|
848
910
|
else:
|
|
849
911
|
console.print("๐ Recent Data Collection Jobs", style="bold cyan")
|
|
850
|
-
|
|
912
|
+
|
|
851
913
|
if not jobs:
|
|
852
914
|
console.print("No jobs found", style="yellow")
|
|
853
915
|
return
|
|
854
|
-
|
|
916
|
+
|
|
855
917
|
jobs_table = Table()
|
|
856
918
|
jobs_table.add_column("Job ID", style="cyan")
|
|
857
919
|
jobs_table.add_column("Type", style="green")
|
|
@@ -859,35 +921,35 @@ def view_jobs(output_json: bool, limit: int):
|
|
|
859
921
|
jobs_table.add_column("Started", style="blue")
|
|
860
922
|
jobs_table.add_column("Duration", style="magenta")
|
|
861
923
|
jobs_table.add_column("Records", style="yellow")
|
|
862
|
-
|
|
924
|
+
|
|
863
925
|
for job in jobs:
|
|
864
926
|
status_color = {
|
|
865
927
|
"completed": "green",
|
|
866
|
-
"running": "yellow",
|
|
928
|
+
"running": "yellow",
|
|
867
929
|
"failed": "red",
|
|
868
|
-
"pending": "blue"
|
|
930
|
+
"pending": "blue",
|
|
869
931
|
}.get(job.get("status", "unknown"), "white")
|
|
870
|
-
|
|
932
|
+
|
|
871
933
|
# Calculate duration
|
|
872
934
|
started = job.get("started_at", "")
|
|
873
935
|
completed = job.get("completed_at", "")
|
|
874
936
|
duration = _format_duration_from_timestamps(started, completed)
|
|
875
|
-
|
|
937
|
+
|
|
876
938
|
# Format records
|
|
877
939
|
records_info = f"{job.get('records_new', 0)}n/{job.get('records_updated', 0)}u/{job.get('records_failed', 0)}f"
|
|
878
|
-
|
|
940
|
+
|
|
879
941
|
jobs_table.add_row(
|
|
880
942
|
job.get("id", "")[:8] + "...",
|
|
881
943
|
job.get("job_type", "unknown"),
|
|
882
944
|
f"[{status_color}]{job.get('status', 'unknown')}[/{status_color}]",
|
|
883
945
|
_format_timestamp(started),
|
|
884
946
|
duration,
|
|
885
|
-
records_info
|
|
947
|
+
records_info,
|
|
886
948
|
)
|
|
887
|
-
|
|
949
|
+
|
|
888
950
|
console.print(jobs_table)
|
|
889
951
|
console.print("\nLegend: Records = new/updated/failed", style="dim")
|
|
890
|
-
|
|
952
|
+
|
|
891
953
|
except Exception as e:
|
|
892
954
|
if output_json:
|
|
893
955
|
console.print(JSON.from_data({"error": str(e)}))
|
|
@@ -900,20 +962,21 @@ def _format_duration_from_timestamps(started: str, completed: str) -> str:
|
|
|
900
962
|
"""Calculate and format duration from timestamps"""
|
|
901
963
|
if not started:
|
|
902
964
|
return "Unknown"
|
|
903
|
-
|
|
965
|
+
|
|
904
966
|
try:
|
|
905
967
|
start_dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
|
|
906
|
-
|
|
968
|
+
|
|
907
969
|
if completed:
|
|
908
970
|
end_dt = datetime.fromisoformat(completed.replace("Z", "+00:00"))
|
|
909
971
|
duration = end_dt - start_dt
|
|
910
972
|
else:
|
|
911
973
|
# Job still running
|
|
912
974
|
from datetime import timezone
|
|
975
|
+
|
|
913
976
|
duration = datetime.now(timezone.utc) - start_dt
|
|
914
|
-
|
|
977
|
+
|
|
915
978
|
return _format_duration_seconds(int(duration.total_seconds()))
|
|
916
|
-
|
|
979
|
+
|
|
917
980
|
except Exception:
|
|
918
981
|
return "Unknown"
|
|
919
982
|
|
|
@@ -921,25 +984,28 @@ def _format_duration_from_timestamps(started: str, completed: str) -> str:
|
|
|
921
984
|
@politician_trading_cli.command("politicians")
|
|
922
985
|
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
923
986
|
@click.option("--limit", default=20, help="Number of politicians to show")
|
|
924
|
-
@click.option(
|
|
987
|
+
@click.option(
|
|
988
|
+
"--role", type=click.Choice(["us_house_rep", "us_senator", "eu_mep"]), help="Filter by role"
|
|
989
|
+
)
|
|
925
990
|
@click.option("--party", help="Filter by party")
|
|
926
991
|
@click.option("--state", help="Filter by state/country")
|
|
927
992
|
@click.option("--search", help="Search by name (first, last, or full name)")
|
|
928
993
|
def view_politicians(output_json: bool, limit: int, role: str, party: str, state: str, search: str):
|
|
929
994
|
"""View and search politicians in the database"""
|
|
930
995
|
console = Console()
|
|
931
|
-
|
|
996
|
+
|
|
932
997
|
try:
|
|
998
|
+
|
|
933
999
|
async def get_politicians():
|
|
934
|
-
from .database import PoliticianTradingDB
|
|
935
1000
|
from .config import WorkflowConfig
|
|
936
|
-
|
|
1001
|
+
from .database import PoliticianTradingDB
|
|
1002
|
+
|
|
937
1003
|
config = WorkflowConfig.default()
|
|
938
1004
|
db = PoliticianTradingDB(config)
|
|
939
|
-
|
|
1005
|
+
|
|
940
1006
|
# Build query
|
|
941
1007
|
query = db.client.table("politicians").select("*")
|
|
942
|
-
|
|
1008
|
+
|
|
943
1009
|
# Apply filters
|
|
944
1010
|
if role:
|
|
945
1011
|
query = query.eq("role", role)
|
|
@@ -949,22 +1015,24 @@ def view_politicians(output_json: bool, limit: int, role: str, party: str, state
|
|
|
949
1015
|
query = query.ilike("state_or_country", f"%{state}%")
|
|
950
1016
|
if search:
|
|
951
1017
|
# Search across name fields
|
|
952
|
-
query = query.or_(
|
|
953
|
-
|
|
1018
|
+
query = query.or_(
|
|
1019
|
+
f"first_name.ilike.%{search}%,last_name.ilike.%{search}%,full_name.ilike.%{search}%"
|
|
1020
|
+
)
|
|
1021
|
+
|
|
954
1022
|
result = query.order("created_at", desc=True).limit(limit).execute()
|
|
955
1023
|
return result.data if result.data else []
|
|
956
|
-
|
|
1024
|
+
|
|
957
1025
|
politicians = asyncio.run(get_politicians())
|
|
958
|
-
|
|
1026
|
+
|
|
959
1027
|
if output_json:
|
|
960
1028
|
console.print(JSON.from_data(politicians))
|
|
961
1029
|
else:
|
|
962
1030
|
console.print("๐ฅ Politicians Database", style="bold cyan")
|
|
963
|
-
|
|
1031
|
+
|
|
964
1032
|
if not politicians:
|
|
965
1033
|
console.print("No politicians found", style="yellow")
|
|
966
1034
|
return
|
|
967
|
-
|
|
1035
|
+
|
|
968
1036
|
politicians_table = Table()
|
|
969
1037
|
politicians_table.add_column("Name", style="cyan", min_width=25)
|
|
970
1038
|
politicians_table.add_column("Role", style="green")
|
|
@@ -972,26 +1040,29 @@ def view_politicians(output_json: bool, limit: int, role: str, party: str, state
|
|
|
972
1040
|
politicians_table.add_column("State/Country", style="magenta")
|
|
973
1041
|
politicians_table.add_column("District", style="yellow")
|
|
974
1042
|
politicians_table.add_column("Added", style="dim")
|
|
975
|
-
|
|
1043
|
+
|
|
976
1044
|
for pol in politicians:
|
|
977
1045
|
role_display = {
|
|
978
1046
|
"us_house_rep": "๐๏ธ House Rep",
|
|
979
|
-
"us_senator": "๐๏ธ Senator",
|
|
980
|
-
"eu_mep": "๐ช๐บ MEP"
|
|
1047
|
+
"us_senator": "๐๏ธ Senator",
|
|
1048
|
+
"eu_mep": "๐ช๐บ MEP",
|
|
981
1049
|
}.get(pol.get("role", ""), pol.get("role", "Unknown"))
|
|
982
|
-
|
|
1050
|
+
|
|
983
1051
|
politicians_table.add_row(
|
|
984
|
-
pol.get("full_name")
|
|
1052
|
+
pol.get("full_name")
|
|
1053
|
+
or f"{pol.get('first_name', '')} {pol.get('last_name', '')}".strip(),
|
|
985
1054
|
role_display,
|
|
986
1055
|
pol.get("party", "") or "Independent",
|
|
987
1056
|
pol.get("state_or_country", ""),
|
|
988
1057
|
pol.get("district", "") or "At-Large",
|
|
989
|
-
_format_timestamp(pol.get("created_at", ""))
|
|
1058
|
+
_format_timestamp(pol.get("created_at", "")),
|
|
990
1059
|
)
|
|
991
|
-
|
|
1060
|
+
|
|
992
1061
|
console.print(politicians_table)
|
|
993
|
-
console.print(
|
|
994
|
-
|
|
1062
|
+
console.print(
|
|
1063
|
+
f"\nShowing {len(politicians)} of {len(politicians)} politicians", style="dim"
|
|
1064
|
+
)
|
|
1065
|
+
|
|
995
1066
|
except Exception as e:
|
|
996
1067
|
if output_json:
|
|
997
1068
|
console.print(JSON.from_data({"error": str(e)}))
|
|
@@ -1000,109 +1071,137 @@ def view_politicians(output_json: bool, limit: int, role: str, party: str, state
|
|
|
1000
1071
|
logger.error(f"Politicians view failed: {e}")
|
|
1001
1072
|
|
|
1002
1073
|
|
|
1003
|
-
@politician_trading_cli.command("disclosures")
|
|
1074
|
+
@politician_trading_cli.command("disclosures")
|
|
1004
1075
|
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1005
1076
|
@click.option("--limit", default=20, help="Number of disclosures to show")
|
|
1006
1077
|
@click.option("--politician", help="Filter by politician name")
|
|
1007
1078
|
@click.option("--asset", help="Filter by asset name or ticker")
|
|
1008
|
-
@click.option(
|
|
1079
|
+
@click.option(
|
|
1080
|
+
"--transaction-type",
|
|
1081
|
+
type=click.Choice(["purchase", "sale", "exchange"]),
|
|
1082
|
+
help="Filter by transaction type",
|
|
1083
|
+
)
|
|
1009
1084
|
@click.option("--amount-min", type=float, help="Minimum transaction amount")
|
|
1010
1085
|
@click.option("--amount-max", type=float, help="Maximum transaction amount")
|
|
1011
1086
|
@click.option("--days", default=30, help="Show disclosures from last N days")
|
|
1012
1087
|
@click.option("--details", is_flag=True, help="Show detailed information including raw data")
|
|
1013
|
-
def view_disclosures(
|
|
1014
|
-
|
|
1015
|
-
|
|
1088
|
+
def view_disclosures(
|
|
1089
|
+
output_json: bool,
|
|
1090
|
+
limit: int,
|
|
1091
|
+
politician: str,
|
|
1092
|
+
asset: str,
|
|
1093
|
+
transaction_type: str,
|
|
1094
|
+
amount_min: float,
|
|
1095
|
+
amount_max: float,
|
|
1096
|
+
days: int,
|
|
1097
|
+
details: bool,
|
|
1098
|
+
):
|
|
1016
1099
|
"""View and search trading disclosures in the database"""
|
|
1017
1100
|
console = Console()
|
|
1018
|
-
|
|
1101
|
+
|
|
1019
1102
|
try:
|
|
1103
|
+
|
|
1020
1104
|
async def get_disclosures():
|
|
1021
|
-
from .database import PoliticianTradingDB
|
|
1022
|
-
from .config import WorkflowConfig
|
|
1023
1105
|
from datetime import datetime, timedelta, timezone
|
|
1024
|
-
|
|
1106
|
+
|
|
1107
|
+
from .config import WorkflowConfig
|
|
1108
|
+
from .database import PoliticianTradingDB
|
|
1109
|
+
|
|
1025
1110
|
config = WorkflowConfig.default()
|
|
1026
1111
|
db = PoliticianTradingDB(config)
|
|
1027
|
-
|
|
1028
|
-
# Build query with join to get politician info
|
|
1112
|
+
|
|
1113
|
+
# Build query with join to get politician info
|
|
1029
1114
|
# Supabase uses foreign key relationships for joins
|
|
1030
|
-
query = (
|
|
1031
|
-
|
|
1032
|
-
.select("*, politicians!inner(*)")
|
|
1033
|
-
)
|
|
1034
|
-
|
|
1115
|
+
query = db.client.table("trading_disclosures").select("*, politicians!inner(*)")
|
|
1116
|
+
|
|
1035
1117
|
# Date filter
|
|
1036
1118
|
if days > 0:
|
|
1037
1119
|
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
|
|
1038
1120
|
query = query.gte("created_at", cutoff_date.isoformat())
|
|
1039
|
-
|
|
1121
|
+
|
|
1040
1122
|
# Apply filters
|
|
1041
1123
|
if politician:
|
|
1042
1124
|
# For nested relationships, we need a different approach
|
|
1043
1125
|
# Let's use a simpler filter on the main table for now
|
|
1044
1126
|
query = query.filter("politicians.full_name", "ilike", f"%{politician}%")
|
|
1045
|
-
|
|
1127
|
+
|
|
1046
1128
|
if asset:
|
|
1047
1129
|
query = query.or_(f"asset_name.ilike.%{asset}%,asset_ticker.ilike.%{asset}%")
|
|
1048
|
-
|
|
1130
|
+
|
|
1049
1131
|
if transaction_type:
|
|
1050
1132
|
query = query.eq("transaction_type", transaction_type)
|
|
1051
|
-
|
|
1133
|
+
|
|
1052
1134
|
if amount_min is not None:
|
|
1053
1135
|
query = query.gte("amount_range_min", amount_min)
|
|
1054
|
-
|
|
1136
|
+
|
|
1055
1137
|
if amount_max is not None:
|
|
1056
1138
|
query = query.lte("amount_range_max", amount_max)
|
|
1057
|
-
|
|
1139
|
+
|
|
1058
1140
|
result = query.order("transaction_date", desc=True).limit(limit).execute()
|
|
1059
1141
|
return result.data if result.data else []
|
|
1060
|
-
|
|
1142
|
+
|
|
1061
1143
|
disclosures = asyncio.run(get_disclosures())
|
|
1062
|
-
|
|
1144
|
+
|
|
1063
1145
|
if output_json:
|
|
1064
1146
|
console.print(JSON.from_data(disclosures))
|
|
1065
1147
|
else:
|
|
1066
1148
|
console.print("๐ฐ Trading Disclosures Database", style="bold cyan")
|
|
1067
|
-
|
|
1149
|
+
|
|
1068
1150
|
if not disclosures:
|
|
1069
1151
|
console.print("No disclosures found", style="yellow")
|
|
1070
1152
|
return
|
|
1071
|
-
|
|
1153
|
+
|
|
1072
1154
|
if details:
|
|
1073
1155
|
# Detailed view
|
|
1074
1156
|
for i, disclosure in enumerate(disclosures):
|
|
1075
1157
|
console.print(f"\n[bold cyan]Disclosure {i+1}[/bold cyan]")
|
|
1076
|
-
|
|
1158
|
+
|
|
1077
1159
|
detail_table = Table()
|
|
1078
1160
|
detail_table.add_column("Field", style="cyan")
|
|
1079
1161
|
detail_table.add_column("Value", style="white")
|
|
1080
|
-
|
|
1162
|
+
|
|
1081
1163
|
politician_info = disclosure.get("politicians", {})
|
|
1082
|
-
politician_name =
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
detail_table.add_row(
|
|
1088
|
-
|
|
1089
|
-
|
|
1164
|
+
politician_name = (
|
|
1165
|
+
politician_info.get("full_name")
|
|
1166
|
+
or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
detail_table.add_row(
|
|
1170
|
+
"Politician",
|
|
1171
|
+
f"{politician_name} ({politician_info.get('party', 'Unknown')})",
|
|
1172
|
+
)
|
|
1173
|
+
detail_table.add_row(
|
|
1174
|
+
"Asset",
|
|
1175
|
+
f"{disclosure.get('asset_name', 'Unknown')} ({disclosure.get('asset_ticker', 'N/A')})",
|
|
1176
|
+
)
|
|
1177
|
+
detail_table.add_row(
|
|
1178
|
+
"Transaction", disclosure.get("transaction_type", "Unknown").title()
|
|
1179
|
+
)
|
|
1180
|
+
detail_table.add_row(
|
|
1181
|
+
"Date", _format_timestamp(disclosure.get("transaction_date", ""))
|
|
1182
|
+
)
|
|
1183
|
+
detail_table.add_row(
|
|
1184
|
+
"Disclosure Date", _format_timestamp(disclosure.get("disclosure_date", ""))
|
|
1185
|
+
)
|
|
1186
|
+
|
|
1090
1187
|
# Amount formatting
|
|
1091
|
-
amount_min = disclosure.get(
|
|
1092
|
-
amount_max = disclosure.get(
|
|
1093
|
-
amount_exact = disclosure.get(
|
|
1094
|
-
|
|
1188
|
+
amount_min = disclosure.get("amount_range_min")
|
|
1189
|
+
amount_max = disclosure.get("amount_range_max")
|
|
1190
|
+
amount_exact = disclosure.get("amount_exact")
|
|
1191
|
+
|
|
1095
1192
|
if amount_exact:
|
|
1096
1193
|
amount_str = f"${amount_exact:,.2f}"
|
|
1097
1194
|
elif amount_min is not None and amount_max is not None:
|
|
1098
1195
|
amount_str = f"${amount_min:,.0f} - ${amount_max:,.0f}"
|
|
1099
1196
|
else:
|
|
1100
1197
|
amount_str = "Unknown"
|
|
1101
|
-
|
|
1198
|
+
|
|
1102
1199
|
detail_table.add_row("Amount", amount_str)
|
|
1103
|
-
detail_table.add_row("Source URL", disclosure.get(
|
|
1104
|
-
detail_table.add_row(
|
|
1105
|
-
|
|
1200
|
+
detail_table.add_row("Source URL", disclosure.get("source_url", "N/A"))
|
|
1201
|
+
detail_table.add_row(
|
|
1202
|
+
"Added", _format_timestamp(disclosure.get("created_at", ""))
|
|
1203
|
+
)
|
|
1204
|
+
|
|
1106
1205
|
console.print(detail_table)
|
|
1107
1206
|
else:
|
|
1108
1207
|
# Compact table view
|
|
@@ -1113,40 +1212,49 @@ def view_disclosures(output_json: bool, limit: int, politician: str, asset: str,
|
|
|
1113
1212
|
disclosures_table.add_column("Amount", style="yellow")
|
|
1114
1213
|
disclosures_table.add_column("Date", style="magenta")
|
|
1115
1214
|
disclosures_table.add_column("Party", style="dim")
|
|
1116
|
-
|
|
1215
|
+
|
|
1117
1216
|
for disclosure in disclosures:
|
|
1118
1217
|
politician_info = disclosure.get("politicians", {})
|
|
1119
|
-
politician_name =
|
|
1120
|
-
|
|
1218
|
+
politician_name = (
|
|
1219
|
+
politician_info.get("full_name")
|
|
1220
|
+
or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1121
1223
|
# Format amount
|
|
1122
|
-
amount_min = disclosure.get(
|
|
1123
|
-
amount_max = disclosure.get(
|
|
1124
|
-
amount_exact = disclosure.get(
|
|
1125
|
-
|
|
1224
|
+
amount_min = disclosure.get("amount_range_min")
|
|
1225
|
+
amount_max = disclosure.get("amount_range_max")
|
|
1226
|
+
amount_exact = disclosure.get("amount_exact")
|
|
1227
|
+
|
|
1126
1228
|
if amount_exact:
|
|
1127
1229
|
amount_str = f"${amount_exact:,.0f}"
|
|
1128
1230
|
elif amount_min is not None and amount_max is not None:
|
|
1129
1231
|
amount_str = f"${amount_min:,.0f}-${amount_max:,.0f}"
|
|
1130
1232
|
else:
|
|
1131
1233
|
amount_str = "Unknown"
|
|
1132
|
-
|
|
1234
|
+
|
|
1133
1235
|
# Transaction type with emoji
|
|
1134
|
-
trans_type = disclosure.get(
|
|
1135
|
-
trans_emoji = {
|
|
1136
|
-
|
|
1236
|
+
trans_type = disclosure.get("transaction_type", "unknown")
|
|
1237
|
+
trans_emoji = {
|
|
1238
|
+
"purchase": "๐ข Buy",
|
|
1239
|
+
"sale": "๐ด Sell",
|
|
1240
|
+
"exchange": "๐ Exchange",
|
|
1241
|
+
}.get(trans_type, "โ " + trans_type.title())
|
|
1242
|
+
|
|
1137
1243
|
disclosures_table.add_row(
|
|
1138
1244
|
politician_name[:35] + ("..." if len(politician_name) > 35 else ""),
|
|
1139
1245
|
_format_asset_display(disclosure),
|
|
1140
1246
|
trans_emoji,
|
|
1141
1247
|
amount_str,
|
|
1142
|
-
_format_timestamp(disclosure.get(
|
|
1143
|
-
politician_info.get(
|
|
1248
|
+
_format_timestamp(disclosure.get("transaction_date", "")),
|
|
1249
|
+
politician_info.get("party", "")[:12],
|
|
1144
1250
|
)
|
|
1145
|
-
|
|
1251
|
+
|
|
1146
1252
|
console.print(disclosures_table)
|
|
1147
|
-
|
|
1148
|
-
console.print(
|
|
1149
|
-
|
|
1253
|
+
|
|
1254
|
+
console.print(
|
|
1255
|
+
f"\nShowing {len(disclosures)} disclosures from last {days} days", style="dim"
|
|
1256
|
+
)
|
|
1257
|
+
|
|
1150
1258
|
except Exception as e:
|
|
1151
1259
|
if output_json:
|
|
1152
1260
|
console.print(JSON.from_data({"error": str(e)}))
|
|
@@ -1160,26 +1268,28 @@ def view_disclosures(output_json: bool, limit: int, politician: str, asset: str,
|
|
|
1160
1268
|
def verify_database(output_json: bool):
|
|
1161
1269
|
"""Verify database integrity and show summary statistics"""
|
|
1162
1270
|
console = Console()
|
|
1163
|
-
|
|
1271
|
+
|
|
1164
1272
|
try:
|
|
1273
|
+
|
|
1165
1274
|
async def verify_data():
|
|
1166
|
-
from .database import PoliticianTradingDB
|
|
1167
|
-
from .config import WorkflowConfig
|
|
1168
1275
|
from datetime import timedelta
|
|
1169
|
-
|
|
1276
|
+
|
|
1277
|
+
from .config import WorkflowConfig
|
|
1278
|
+
from .database import PoliticianTradingDB
|
|
1279
|
+
|
|
1170
1280
|
config = WorkflowConfig.default()
|
|
1171
1281
|
db = PoliticianTradingDB(config)
|
|
1172
|
-
|
|
1282
|
+
|
|
1173
1283
|
verification = {
|
|
1174
1284
|
"timestamp": datetime.now().isoformat(),
|
|
1175
1285
|
"tables": {},
|
|
1176
1286
|
"integrity": {},
|
|
1177
|
-
"summary": {}
|
|
1287
|
+
"summary": {},
|
|
1178
1288
|
}
|
|
1179
|
-
|
|
1289
|
+
|
|
1180
1290
|
# Check each table
|
|
1181
1291
|
tables_to_check = ["politicians", "trading_disclosures", "data_pull_jobs"]
|
|
1182
|
-
|
|
1292
|
+
|
|
1183
1293
|
for table_name in tables_to_check:
|
|
1184
1294
|
try:
|
|
1185
1295
|
result = db.client.table(table_name).select("id").execute()
|
|
@@ -1187,41 +1297,38 @@ def verify_database(output_json: bool):
|
|
|
1187
1297
|
verification["tables"][table_name] = {
|
|
1188
1298
|
"exists": True,
|
|
1189
1299
|
"record_count": count,
|
|
1190
|
-
"status": "ok"
|
|
1300
|
+
"status": "ok",
|
|
1191
1301
|
}
|
|
1192
1302
|
except Exception as e:
|
|
1193
1303
|
verification["tables"][table_name] = {
|
|
1194
1304
|
"exists": False,
|
|
1195
1305
|
"error": str(e),
|
|
1196
|
-
"status": "error"
|
|
1306
|
+
"status": "error",
|
|
1197
1307
|
}
|
|
1198
|
-
|
|
1308
|
+
|
|
1199
1309
|
# Check referential integrity - simplified approach
|
|
1200
1310
|
try:
|
|
1201
1311
|
# Just verify we can query both tables
|
|
1202
1312
|
disclosures_result = db.client.table("trading_disclosures").select("id").execute()
|
|
1203
1313
|
politicians_result = db.client.table("politicians").select("id").execute()
|
|
1204
|
-
|
|
1314
|
+
|
|
1205
1315
|
disclosures_count = len(disclosures_result.data) if disclosures_result.data else 0
|
|
1206
1316
|
politicians_count = len(politicians_result.data) if politicians_result.data else 0
|
|
1207
|
-
|
|
1317
|
+
|
|
1208
1318
|
verification["integrity"] = {
|
|
1209
1319
|
"disclosures_with_politicians": disclosures_count,
|
|
1210
1320
|
"total_politicians": politicians_count,
|
|
1211
|
-
"status": "ok"
|
|
1321
|
+
"status": "ok",
|
|
1212
1322
|
}
|
|
1213
1323
|
except Exception as e:
|
|
1214
|
-
verification["integrity"] = {
|
|
1215
|
-
|
|
1216
|
-
"status": "error"
|
|
1217
|
-
}
|
|
1218
|
-
|
|
1324
|
+
verification["integrity"] = {"error": str(e), "status": "error"}
|
|
1325
|
+
|
|
1219
1326
|
# Summary statistics
|
|
1220
1327
|
try:
|
|
1221
1328
|
politicians_count = verification["tables"]["politicians"]["record_count"]
|
|
1222
1329
|
disclosures_count = verification["tables"]["trading_disclosures"]["record_count"]
|
|
1223
1330
|
jobs_count = verification["tables"]["data_pull_jobs"]["record_count"]
|
|
1224
|
-
|
|
1331
|
+
|
|
1225
1332
|
# Get recent activity
|
|
1226
1333
|
recent_jobs = (
|
|
1227
1334
|
db.client.table("data_pull_jobs")
|
|
@@ -1229,56 +1336,62 @@ def verify_database(output_json: bool):
|
|
|
1229
1336
|
.gte("started_at", (datetime.now() - timedelta(days=7)).isoformat())
|
|
1230
1337
|
.execute()
|
|
1231
1338
|
)
|
|
1232
|
-
|
|
1339
|
+
|
|
1233
1340
|
recent_jobs_count = len(recent_jobs.data) if recent_jobs.data else 0
|
|
1234
|
-
successful_jobs = len(
|
|
1235
|
-
|
|
1341
|
+
successful_jobs = len(
|
|
1342
|
+
[j for j in (recent_jobs.data or []) if j.get("status") == "completed"]
|
|
1343
|
+
)
|
|
1344
|
+
|
|
1236
1345
|
verification["summary"] = {
|
|
1237
1346
|
"total_politicians": politicians_count,
|
|
1238
1347
|
"total_disclosures": disclosures_count,
|
|
1239
1348
|
"total_jobs": jobs_count,
|
|
1240
1349
|
"jobs_last_7_days": recent_jobs_count,
|
|
1241
1350
|
"successful_jobs_last_7_days": successful_jobs,
|
|
1242
|
-
"success_rate_7_days": (
|
|
1351
|
+
"success_rate_7_days": (
|
|
1352
|
+
(successful_jobs / recent_jobs_count * 100) if recent_jobs_count > 0 else 0
|
|
1353
|
+
),
|
|
1243
1354
|
}
|
|
1244
|
-
|
|
1355
|
+
|
|
1245
1356
|
except Exception as e:
|
|
1246
1357
|
verification["summary"] = {"error": str(e)}
|
|
1247
|
-
|
|
1358
|
+
|
|
1248
1359
|
return verification
|
|
1249
|
-
|
|
1360
|
+
|
|
1250
1361
|
verification = asyncio.run(verify_data())
|
|
1251
|
-
|
|
1362
|
+
|
|
1252
1363
|
if output_json:
|
|
1253
1364
|
console.print(JSON.from_data(verification))
|
|
1254
1365
|
else:
|
|
1255
1366
|
console.print("๐ Database Verification Report", style="bold cyan")
|
|
1256
|
-
|
|
1367
|
+
|
|
1257
1368
|
# Table status
|
|
1258
1369
|
tables_panel = Table(title="Table Status")
|
|
1259
1370
|
tables_panel.add_column("Table", style="cyan")
|
|
1260
1371
|
tables_panel.add_column("Status", style="white")
|
|
1261
1372
|
tables_panel.add_column("Records", justify="right", style="green")
|
|
1262
|
-
|
|
1373
|
+
|
|
1263
1374
|
for table_name, info in verification["tables"].items():
|
|
1264
1375
|
status_color = "green" if info["status"] == "ok" else "red"
|
|
1265
1376
|
status_text = f"[{status_color}]{info['status'].upper()}[/{status_color}]"
|
|
1266
1377
|
record_count = str(info.get("record_count", "N/A"))
|
|
1267
|
-
|
|
1378
|
+
|
|
1268
1379
|
tables_panel.add_row(table_name, status_text, record_count)
|
|
1269
|
-
|
|
1380
|
+
|
|
1270
1381
|
console.print(tables_panel)
|
|
1271
|
-
|
|
1382
|
+
|
|
1272
1383
|
# Integrity check
|
|
1273
1384
|
integrity_info = verification.get("integrity", {})
|
|
1274
1385
|
if integrity_info.get("status") == "ok":
|
|
1275
1386
|
console.print("โ
Data integrity check passed", style="green")
|
|
1276
1387
|
disc_count = integrity_info.get("disclosures_with_politicians", 0)
|
|
1277
1388
|
pol_count = integrity_info.get("total_politicians", 0)
|
|
1278
|
-
console.print(
|
|
1389
|
+
console.print(
|
|
1390
|
+
f" Disclosures: {disc_count}, Politicians: {pol_count}", style="dim"
|
|
1391
|
+
)
|
|
1279
1392
|
else:
|
|
1280
1393
|
console.print("โ Data integrity check failed", style="red")
|
|
1281
|
-
|
|
1394
|
+
|
|
1282
1395
|
# Summary
|
|
1283
1396
|
summary = verification.get("summary", {})
|
|
1284
1397
|
if "error" not in summary:
|
|
@@ -1286,9 +1399,11 @@ def verify_database(output_json: bool):
|
|
|
1286
1399
|
console.print(f"Politicians: {summary.get('total_politicians', 0)}")
|
|
1287
1400
|
console.print(f"Trading Disclosures: {summary.get('total_disclosures', 0)}")
|
|
1288
1401
|
console.print(f"Data Collection Jobs: {summary.get('total_jobs', 0)}")
|
|
1289
|
-
console.print(
|
|
1402
|
+
console.print(
|
|
1403
|
+
f"Jobs (7 days): {summary.get('jobs_last_7_days', 0)} ({summary.get('successful_jobs_last_7_days', 0)} successful)"
|
|
1404
|
+
)
|
|
1290
1405
|
console.print(f"Success Rate: {summary.get('success_rate_7_days', 0):.1f}%")
|
|
1291
|
-
|
|
1406
|
+
|
|
1292
1407
|
except Exception as e:
|
|
1293
1408
|
if output_json:
|
|
1294
1409
|
console.print(JSON.from_data({"error": str(e)}))
|
|
@@ -1304,30 +1419,34 @@ def cron_commands():
|
|
|
1304
1419
|
|
|
1305
1420
|
|
|
1306
1421
|
@cron_commands.command("run")
|
|
1307
|
-
@click.option(
|
|
1308
|
-
|
|
1309
|
-
|
|
1422
|
+
@click.option(
|
|
1423
|
+
"--type",
|
|
1424
|
+
"collection_type",
|
|
1425
|
+
default="full",
|
|
1426
|
+
type=click.Choice(["full", "us", "eu", "quick"]),
|
|
1427
|
+
help="Type of collection to run",
|
|
1428
|
+
)
|
|
1310
1429
|
def cron_run(collection_type: str):
|
|
1311
1430
|
"""Run scheduled data collection (designed for cron jobs)"""
|
|
1312
|
-
|
|
1431
|
+
|
|
1313
1432
|
async def run_cron_collection():
|
|
1314
1433
|
"""Run the cron collection"""
|
|
1315
1434
|
from datetime import datetime
|
|
1316
|
-
|
|
1435
|
+
|
|
1317
1436
|
logger.info(f"Starting scheduled collection: {collection_type}")
|
|
1318
1437
|
console.print(f"๐ Running {collection_type} data collection...", style="blue")
|
|
1319
|
-
|
|
1438
|
+
|
|
1320
1439
|
try:
|
|
1321
1440
|
workflow = PoliticianTradingWorkflow()
|
|
1322
|
-
|
|
1441
|
+
|
|
1323
1442
|
if collection_type == "full":
|
|
1324
1443
|
results = await run_politician_trading_collection()
|
|
1325
1444
|
elif collection_type == "us":
|
|
1326
1445
|
# US-only collection
|
|
1327
1446
|
us_results = await workflow._collect_us_congress_data()
|
|
1328
|
-
ca_results = await workflow._collect_california_data()
|
|
1447
|
+
ca_results = await workflow._collect_california_data()
|
|
1329
1448
|
us_states_results = await workflow._collect_us_states_data()
|
|
1330
|
-
|
|
1449
|
+
|
|
1331
1450
|
results = {
|
|
1332
1451
|
"status": "completed",
|
|
1333
1452
|
"started_at": datetime.utcnow().isoformat(),
|
|
@@ -1335,22 +1454,24 @@ def cron_run(collection_type: str):
|
|
|
1335
1454
|
"jobs": {
|
|
1336
1455
|
"us_congress": us_results,
|
|
1337
1456
|
"california": ca_results,
|
|
1338
|
-
"us_states": us_states_results
|
|
1457
|
+
"us_states": us_states_results,
|
|
1339
1458
|
},
|
|
1340
1459
|
"summary": {
|
|
1341
|
-
"total_new_disclosures": sum(
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1460
|
+
"total_new_disclosures": sum(
|
|
1461
|
+
[
|
|
1462
|
+
us_results.get("new_disclosures", 0),
|
|
1463
|
+
ca_results.get("new_disclosures", 0),
|
|
1464
|
+
us_states_results.get("new_disclosures", 0),
|
|
1465
|
+
]
|
|
1466
|
+
)
|
|
1467
|
+
},
|
|
1347
1468
|
}
|
|
1348
1469
|
elif collection_type == "eu":
|
|
1349
1470
|
# EU-only collection
|
|
1350
1471
|
eu_results = await workflow._collect_eu_parliament_data()
|
|
1351
1472
|
eu_states_results = await workflow._collect_eu_member_states_data()
|
|
1352
1473
|
uk_results = await workflow._collect_uk_parliament_data()
|
|
1353
|
-
|
|
1474
|
+
|
|
1354
1475
|
results = {
|
|
1355
1476
|
"status": "completed",
|
|
1356
1477
|
"started_at": datetime.utcnow().isoformat(),
|
|
@@ -1358,15 +1479,17 @@ def cron_run(collection_type: str):
|
|
|
1358
1479
|
"jobs": {
|
|
1359
1480
|
"eu_parliament": eu_results,
|
|
1360
1481
|
"eu_member_states": eu_states_results,
|
|
1361
|
-
"uk_parliament": uk_results
|
|
1482
|
+
"uk_parliament": uk_results,
|
|
1362
1483
|
},
|
|
1363
1484
|
"summary": {
|
|
1364
|
-
"total_new_disclosures": sum(
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1485
|
+
"total_new_disclosures": sum(
|
|
1486
|
+
[
|
|
1487
|
+
eu_results.get("new_disclosures", 0),
|
|
1488
|
+
eu_states_results.get("new_disclosures", 0),
|
|
1489
|
+
uk_results.get("new_disclosures", 0),
|
|
1490
|
+
]
|
|
1491
|
+
)
|
|
1492
|
+
},
|
|
1370
1493
|
}
|
|
1371
1494
|
elif collection_type == "quick":
|
|
1372
1495
|
# Quick status check
|
|
@@ -1375,23 +1498,27 @@ def cron_run(collection_type: str):
|
|
|
1375
1498
|
"status": "completed",
|
|
1376
1499
|
"type": "quick_check",
|
|
1377
1500
|
"results": status,
|
|
1378
|
-
"summary": {"total_new_disclosures": 0}
|
|
1501
|
+
"summary": {"total_new_disclosures": 0},
|
|
1379
1502
|
}
|
|
1380
|
-
|
|
1503
|
+
|
|
1381
1504
|
# Log results
|
|
1382
|
-
summary = results.get(
|
|
1383
|
-
logger.info(
|
|
1384
|
-
|
|
1505
|
+
summary = results.get("summary", {})
|
|
1506
|
+
logger.info(
|
|
1507
|
+
f"Cron collection completed - New: {summary.get('total_new_disclosures', 0)}"
|
|
1508
|
+
)
|
|
1509
|
+
|
|
1385
1510
|
console.print(f"โ
{collection_type.title()} collection completed", style="green")
|
|
1386
|
-
console.print(
|
|
1387
|
-
|
|
1511
|
+
console.print(
|
|
1512
|
+
f"New disclosures: {summary.get('total_new_disclosures', 0)}", style="cyan"
|
|
1513
|
+
)
|
|
1514
|
+
|
|
1388
1515
|
return results
|
|
1389
|
-
|
|
1516
|
+
|
|
1390
1517
|
except Exception as e:
|
|
1391
1518
|
logger.error(f"Cron collection failed: {e}")
|
|
1392
1519
|
console.print(f"โ Collection failed: {e}", style="red")
|
|
1393
1520
|
return {"status": "failed", "error": str(e)}
|
|
1394
|
-
|
|
1521
|
+
|
|
1395
1522
|
asyncio.run(run_cron_collection())
|
|
1396
1523
|
|
|
1397
1524
|
|
|
@@ -1400,10 +1527,10 @@ def cron_setup():
|
|
|
1400
1527
|
"""Show cron setup instructions"""
|
|
1401
1528
|
console.print("๐ CRON SETUP INSTRUCTIONS", style="bold cyan")
|
|
1402
1529
|
console.print("Add these lines to your crontab (run: crontab -e)", style="dim")
|
|
1403
|
-
|
|
1530
|
+
|
|
1404
1531
|
# Get current working directory for the cron commands
|
|
1405
1532
|
repo_path = Path(__file__).parent.parent.parent.parent.parent
|
|
1406
|
-
|
|
1533
|
+
|
|
1407
1534
|
instructions = f"""
|
|
1408
1535
|
# Full collection every 6 hours
|
|
1409
1536
|
0 */6 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type full >> /tmp/politician_cron.log 2>&1
|
|
@@ -1417,9 +1544,9 @@ def cron_setup():
|
|
|
1417
1544
|
# Quick health check daily at 9 AM
|
|
1418
1545
|
0 9 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type quick >> /tmp/politician_cron.log 2>&1
|
|
1419
1546
|
"""
|
|
1420
|
-
|
|
1547
|
+
|
|
1421
1548
|
console.print(Panel(instructions, title="Crontab Entries", border_style="blue"))
|
|
1422
|
-
|
|
1549
|
+
|
|
1423
1550
|
console.print("\n๐ก Tips:", style="bold yellow")
|
|
1424
1551
|
console.print("โข Start with just one cron job to test", style="dim")
|
|
1425
1552
|
console.print("โข Check logs at /tmp/politician_cron.log", style="dim")
|
|
@@ -1430,136 +1557,148 @@ def cron_setup():
|
|
|
1430
1557
|
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1431
1558
|
def monitor_system(output_json: bool):
|
|
1432
1559
|
"""Monitor system status, jobs, and database"""
|
|
1433
|
-
|
|
1560
|
+
|
|
1434
1561
|
async def run_monitor():
|
|
1435
1562
|
"""Run the monitoring"""
|
|
1436
1563
|
try:
|
|
1437
1564
|
config = WorkflowConfig.default()
|
|
1438
1565
|
db = PoliticianTradingDB(config)
|
|
1439
1566
|
workflow = PoliticianTradingWorkflow(config)
|
|
1440
|
-
|
|
1567
|
+
|
|
1441
1568
|
# Get system health
|
|
1442
1569
|
await db.ensure_schema()
|
|
1443
1570
|
quick_status = await workflow.run_quick_check()
|
|
1444
|
-
|
|
1571
|
+
|
|
1445
1572
|
# Get job history
|
|
1446
1573
|
job_status = await db.get_job_status()
|
|
1447
|
-
recent_jobs = job_status.get(
|
|
1448
|
-
|
|
1574
|
+
recent_jobs = job_status.get("recent_jobs", [])
|
|
1575
|
+
|
|
1449
1576
|
# Analyze job statistics
|
|
1450
|
-
status_counts = {
|
|
1577
|
+
status_counts = {"completed": 0, "running": 0, "failed": 0, "pending": 0}
|
|
1451
1578
|
job_types = {}
|
|
1452
1579
|
latest_by_type = {}
|
|
1453
|
-
|
|
1580
|
+
|
|
1454
1581
|
for job in recent_jobs:
|
|
1455
|
-
status = job.get(
|
|
1456
|
-
job_type = job.get(
|
|
1457
|
-
started_at = job.get(
|
|
1458
|
-
|
|
1582
|
+
status = job.get("status", "unknown")
|
|
1583
|
+
job_type = job.get("job_type", "unknown")
|
|
1584
|
+
started_at = job.get("started_at", "")
|
|
1585
|
+
|
|
1459
1586
|
if status in status_counts:
|
|
1460
1587
|
status_counts[status] += 1
|
|
1461
1588
|
job_types[job_type] = job_types.get(job_type, 0) + 1
|
|
1462
|
-
|
|
1463
|
-
if job_type not in latest_by_type or started_at > latest_by_type[job_type].get(
|
|
1589
|
+
|
|
1590
|
+
if job_type not in latest_by_type or started_at > latest_by_type[job_type].get(
|
|
1591
|
+
"started_at", ""
|
|
1592
|
+
):
|
|
1464
1593
|
latest_by_type[job_type] = job
|
|
1465
|
-
|
|
1594
|
+
|
|
1466
1595
|
# Get scraper availability
|
|
1467
1596
|
try:
|
|
1468
1597
|
from . import scrapers
|
|
1598
|
+
|
|
1469
1599
|
scraper_status = {
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1600
|
+
"UK Parliament API": scrapers.UK_SCRAPER_AVAILABLE,
|
|
1601
|
+
"California NetFile": scrapers.CALIFORNIA_SCRAPER_AVAILABLE,
|
|
1602
|
+
"EU Member States": scrapers.EU_MEMBER_STATES_SCRAPER_AVAILABLE,
|
|
1603
|
+
"US States Ethics": scrapers.US_STATES_SCRAPER_AVAILABLE,
|
|
1474
1604
|
}
|
|
1475
1605
|
available_scrapers = sum(scraper_status.values())
|
|
1476
1606
|
except:
|
|
1477
1607
|
scraper_status = {}
|
|
1478
1608
|
available_scrapers = 0
|
|
1479
|
-
|
|
1609
|
+
|
|
1480
1610
|
monitor_data = {
|
|
1481
1611
|
"system_health": {
|
|
1482
|
-
"database_connection": quick_status.get(
|
|
1483
|
-
"config_loaded": quick_status.get(
|
|
1484
|
-
"timestamp": quick_status.get(
|
|
1612
|
+
"database_connection": quick_status.get("database_connection", "unknown"),
|
|
1613
|
+
"config_loaded": quick_status.get("config_loaded", "unknown"),
|
|
1614
|
+
"timestamp": quick_status.get("timestamp", datetime.now().isoformat()),
|
|
1485
1615
|
},
|
|
1486
1616
|
"job_statistics": {
|
|
1487
1617
|
"total_recent_jobs": len(recent_jobs),
|
|
1488
1618
|
"status_counts": status_counts,
|
|
1489
|
-
"job_types": job_types
|
|
1619
|
+
"job_types": job_types,
|
|
1490
1620
|
},
|
|
1491
1621
|
"latest_jobs": latest_by_type,
|
|
1492
1622
|
"scraper_availability": {
|
|
1493
1623
|
"available_count": available_scrapers,
|
|
1494
1624
|
"total_count": len(scraper_status),
|
|
1495
|
-
"scrapers": scraper_status
|
|
1496
|
-
}
|
|
1625
|
+
"scrapers": scraper_status,
|
|
1626
|
+
},
|
|
1497
1627
|
}
|
|
1498
|
-
|
|
1628
|
+
|
|
1499
1629
|
return monitor_data
|
|
1500
|
-
|
|
1630
|
+
|
|
1501
1631
|
except Exception as e:
|
|
1502
1632
|
logger.error(f"Monitoring failed: {e}")
|
|
1503
1633
|
return {"error": str(e)}
|
|
1504
|
-
|
|
1634
|
+
|
|
1505
1635
|
monitor_data = asyncio.run(run_monitor())
|
|
1506
|
-
|
|
1636
|
+
|
|
1507
1637
|
if output_json:
|
|
1508
1638
|
console.print(JSON.from_data(monitor_data))
|
|
1509
1639
|
else:
|
|
1510
1640
|
console.print("๐ SYSTEM MONITOR", style="bold cyan")
|
|
1511
|
-
|
|
1641
|
+
|
|
1512
1642
|
# System Health
|
|
1513
|
-
health = monitor_data.get(
|
|
1643
|
+
health = monitor_data.get("system_health", {})
|
|
1514
1644
|
health_table = Table(title="System Health")
|
|
1515
1645
|
health_table.add_column("Component", style="cyan")
|
|
1516
1646
|
health_table.add_column("Status", style="white")
|
|
1517
|
-
|
|
1518
|
-
db_status = health[
|
|
1647
|
+
|
|
1648
|
+
db_status = health["database_connection"]
|
|
1519
1649
|
db_color = "green" if db_status == "ok" else "red"
|
|
1520
1650
|
health_table.add_row("Database", f"[{db_color}]{db_status.upper()}[/{db_color}]")
|
|
1521
|
-
|
|
1522
|
-
config_status = health[
|
|
1651
|
+
|
|
1652
|
+
config_status = health["config_loaded"]
|
|
1523
1653
|
config_color = "green" if config_status == "ok" else "red"
|
|
1524
|
-
health_table.add_row(
|
|
1525
|
-
|
|
1654
|
+
health_table.add_row(
|
|
1655
|
+
"Configuration", f"[{config_color}]{config_status.upper()}[/{config_color}]"
|
|
1656
|
+
)
|
|
1657
|
+
|
|
1526
1658
|
console.print(health_table)
|
|
1527
|
-
|
|
1659
|
+
|
|
1528
1660
|
# Job Statistics
|
|
1529
|
-
job_stats = monitor_data.get(
|
|
1530
|
-
console.print(
|
|
1531
|
-
|
|
1532
|
-
|
|
1661
|
+
job_stats = monitor_data.get("job_statistics", {})
|
|
1662
|
+
console.print(
|
|
1663
|
+
f"\n๐ Job Statistics (Total: {job_stats.get('total_recent_jobs', 0)})",
|
|
1664
|
+
style="bold blue",
|
|
1665
|
+
)
|
|
1666
|
+
|
|
1667
|
+
status_counts = job_stats.get("status_counts", {})
|
|
1533
1668
|
for status, count in status_counts.items():
|
|
1534
1669
|
if count > 0:
|
|
1535
|
-
icon = {
|
|
1670
|
+
icon = {"completed": "โ
", "running": "๐", "failed": "โ", "pending": "โณ"}[status]
|
|
1536
1671
|
console.print(f"{icon} {status.title()}: {count}")
|
|
1537
|
-
|
|
1672
|
+
|
|
1538
1673
|
# Latest Jobs by Type
|
|
1539
1674
|
console.print(f"\n๐ Latest Jobs by Source", style="bold blue")
|
|
1540
|
-
latest_jobs = monitor_data.get(
|
|
1541
|
-
|
|
1675
|
+
latest_jobs = monitor_data.get("latest_jobs", {})
|
|
1676
|
+
|
|
1542
1677
|
for job_type, job in sorted(latest_jobs.items()):
|
|
1543
|
-
status = job.get(
|
|
1544
|
-
icon = {
|
|
1545
|
-
|
|
1546
|
-
|
|
1678
|
+
status = job.get("status", "unknown")
|
|
1679
|
+
icon = {"completed": "โ
", "running": "๐", "failed": "โ", "pending": "โณ"}.get(
|
|
1680
|
+
status, "โ"
|
|
1681
|
+
)
|
|
1682
|
+
|
|
1683
|
+
source_name = job_type.replace("_", " ").title()
|
|
1547
1684
|
console.print(f"\n{icon} {source_name}")
|
|
1548
1685
|
console.print(f" Status: {status}")
|
|
1549
1686
|
console.print(f" Last run: {job.get('started_at', 'N/A')[:19]}")
|
|
1550
|
-
console.print(
|
|
1551
|
-
|
|
1687
|
+
console.print(
|
|
1688
|
+
f" Records: {job.get('records_processed', 0)} processed, {job.get('records_new', 0)} new"
|
|
1689
|
+
)
|
|
1690
|
+
|
|
1552
1691
|
# Scraper Availability
|
|
1553
|
-
scraper_info = monitor_data.get(
|
|
1554
|
-
available = scraper_info.get(
|
|
1555
|
-
total = scraper_info.get(
|
|
1556
|
-
|
|
1692
|
+
scraper_info = monitor_data.get("scraper_availability", {})
|
|
1693
|
+
available = scraper_info.get("available_count", 0)
|
|
1694
|
+
total = scraper_info.get("total_count", 0)
|
|
1695
|
+
|
|
1557
1696
|
console.print(f"\n๐ Scraper Availability: {available}/{total}", style="bold blue")
|
|
1558
|
-
|
|
1559
|
-
scrapers_status = scraper_info.get(
|
|
1697
|
+
|
|
1698
|
+
scrapers_status = scraper_info.get("scrapers", {})
|
|
1560
1699
|
for scraper_name, available in scrapers_status.items():
|
|
1561
|
-
icon =
|
|
1562
|
-
status =
|
|
1700
|
+
icon = "โ
" if available else "โ"
|
|
1701
|
+
status = "Available" if available else "Not Available"
|
|
1563
1702
|
console.print(f"{icon} {scraper_name}: {status}")
|
|
1564
1703
|
|
|
1565
1704
|
|
|
@@ -1569,143 +1708,155 @@ def monitor_system(output_json: bool):
|
|
|
1569
1708
|
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1570
1709
|
def read_recent_data(limit: int, days: int, output_json: bool):
|
|
1571
1710
|
"""Read recent data from the database"""
|
|
1572
|
-
|
|
1711
|
+
|
|
1573
1712
|
async def read_data():
|
|
1574
1713
|
"""Read recent data from database"""
|
|
1575
1714
|
try:
|
|
1576
1715
|
config = WorkflowConfig.default()
|
|
1577
1716
|
db = PoliticianTradingDB(config)
|
|
1578
|
-
|
|
1717
|
+
|
|
1579
1718
|
# Get job history
|
|
1580
1719
|
job_status = await db.get_job_status()
|
|
1581
|
-
jobs = job_status.get(
|
|
1582
|
-
|
|
1720
|
+
jobs = job_status.get("recent_jobs", [])
|
|
1721
|
+
|
|
1583
1722
|
# Analyze data freshness
|
|
1584
1723
|
freshness = {}
|
|
1585
1724
|
for job in jobs:
|
|
1586
|
-
job_type = job.get(
|
|
1587
|
-
if job.get(
|
|
1588
|
-
completed_at = job.get(
|
|
1589
|
-
if
|
|
1725
|
+
job_type = job.get("job_type", "unknown")
|
|
1726
|
+
if job.get("status") == "completed":
|
|
1727
|
+
completed_at = job.get("completed_at")
|
|
1728
|
+
if (
|
|
1729
|
+
job_type not in freshness
|
|
1730
|
+
or completed_at > freshness[job_type]["last_success"]
|
|
1731
|
+
):
|
|
1590
1732
|
# Check if recent (within threshold)
|
|
1591
1733
|
is_recent = False
|
|
1592
1734
|
if completed_at:
|
|
1593
1735
|
try:
|
|
1594
|
-
timestamp = datetime.fromisoformat(
|
|
1595
|
-
|
|
1736
|
+
timestamp = datetime.fromisoformat(
|
|
1737
|
+
completed_at.replace("Z", "+00:00")
|
|
1738
|
+
)
|
|
1739
|
+
is_recent = (
|
|
1740
|
+
datetime.now() - timestamp.replace(tzinfo=None)
|
|
1741
|
+
) < timedelta(hours=24)
|
|
1596
1742
|
except:
|
|
1597
1743
|
pass
|
|
1598
|
-
|
|
1744
|
+
|
|
1599
1745
|
freshness[job_type] = {
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1746
|
+
"last_success": completed_at,
|
|
1747
|
+
"records_collected": job.get("records_new", 0),
|
|
1748
|
+
"status": "fresh" if is_recent else "stale",
|
|
1603
1749
|
}
|
|
1604
|
-
|
|
1750
|
+
|
|
1605
1751
|
return {
|
|
1606
1752
|
"recent_jobs": jobs[:limit],
|
|
1607
1753
|
"data_freshness": freshness,
|
|
1608
1754
|
"summary": {
|
|
1609
1755
|
"total_jobs": len(jobs),
|
|
1610
|
-
"job_types": len(set(job.get(
|
|
1611
|
-
"fresh_sources": len([v for v in freshness.values() if v[
|
|
1612
|
-
}
|
|
1756
|
+
"job_types": len(set(job.get("job_type") for job in jobs)),
|
|
1757
|
+
"fresh_sources": len([v for v in freshness.values() if v["status"] == "fresh"]),
|
|
1758
|
+
},
|
|
1613
1759
|
}
|
|
1614
|
-
|
|
1760
|
+
|
|
1615
1761
|
except Exception as e:
|
|
1616
1762
|
logger.error(f"Failed to read data: {e}")
|
|
1617
1763
|
return {"error": str(e)}
|
|
1618
|
-
|
|
1764
|
+
|
|
1619
1765
|
data = asyncio.run(read_data())
|
|
1620
|
-
|
|
1766
|
+
|
|
1621
1767
|
if output_json:
|
|
1622
1768
|
console.print(JSON.from_data(data))
|
|
1623
1769
|
else:
|
|
1624
1770
|
console.print("๐ RECENT DATA SUMMARY", style="bold cyan")
|
|
1625
|
-
|
|
1771
|
+
|
|
1626
1772
|
if "error" in data:
|
|
1627
1773
|
console.print(f"โ Error: {data['error']}", style="red")
|
|
1628
1774
|
return
|
|
1629
|
-
|
|
1775
|
+
|
|
1630
1776
|
# Summary stats
|
|
1631
|
-
summary = data.get(
|
|
1777
|
+
summary = data.get("summary", {})
|
|
1632
1778
|
console.print(f"\n๐ Summary:", style="bold blue")
|
|
1633
1779
|
console.print(f"Total recent jobs: {summary.get('total_jobs', 0)}")
|
|
1634
1780
|
console.print(f"Active job types: {summary.get('job_types', 0)}")
|
|
1635
1781
|
console.print(f"Fresh data sources: {summary.get('fresh_sources', 0)}")
|
|
1636
|
-
|
|
1782
|
+
|
|
1637
1783
|
# Data freshness
|
|
1638
|
-
freshness = data.get(
|
|
1784
|
+
freshness = data.get("data_freshness", {})
|
|
1639
1785
|
if freshness:
|
|
1640
1786
|
console.print(f"\n๐ Data Freshness:", style="bold blue")
|
|
1641
1787
|
for source, info in freshness.items():
|
|
1642
|
-
status_icon =
|
|
1643
|
-
source_name = source.replace(
|
|
1644
|
-
last_success = info[
|
|
1788
|
+
status_icon = "๐ข" if info["status"] == "fresh" else "๐ก"
|
|
1789
|
+
source_name = source.replace("_", " ").title()
|
|
1790
|
+
last_success = info["last_success"][:19] if info["last_success"] else "Never"
|
|
1645
1791
|
console.print(f"{status_icon} {source_name}: {last_success}")
|
|
1646
|
-
|
|
1792
|
+
|
|
1647
1793
|
# Recent jobs
|
|
1648
|
-
recent_jobs = data.get(
|
|
1794
|
+
recent_jobs = data.get("recent_jobs", [])[:10] # Show top 10
|
|
1649
1795
|
if recent_jobs:
|
|
1650
|
-
console.print(f"\n๐ Recent Jobs (showing {len(recent_jobs)}):", style="bold blue")
|
|
1796
|
+
console.print(f"\n๐ Recent Jobs (showing {len(recent_jobs)}):", style="bold blue")
|
|
1651
1797
|
for job in recent_jobs:
|
|
1652
|
-
status_icon = {
|
|
1653
|
-
|
|
1654
|
-
|
|
1798
|
+
status_icon = {
|
|
1799
|
+
"completed": "โ
",
|
|
1800
|
+
"running": "๐",
|
|
1801
|
+
"failed": "โ",
|
|
1802
|
+
"pending": "โณ",
|
|
1803
|
+
}.get(job.get("status"), "โ")
|
|
1804
|
+
job_type = job.get("job_type", "unknown").replace("_", " ").title()
|
|
1805
|
+
started_at = job.get("started_at", "N/A")[:19]
|
|
1655
1806
|
console.print(f"{status_icon} {job_type}: {started_at}")
|
|
1656
1807
|
|
|
1657
1808
|
|
|
1658
|
-
@politician_trading_cli.command("config-real-data")
|
|
1809
|
+
@politician_trading_cli.command("config-real-data")
|
|
1659
1810
|
@click.option("--enable", is_flag=True, help="Enable real data collection")
|
|
1660
1811
|
@click.option("--restore", is_flag=True, help="Restore sample data mode")
|
|
1661
1812
|
@click.option("--status", is_flag=True, help="Show current configuration status")
|
|
1662
1813
|
def configure_real_data(enable: bool, restore: bool, status: bool):
|
|
1663
1814
|
"""Configure real vs sample data collection"""
|
|
1664
|
-
|
|
1815
|
+
|
|
1665
1816
|
if status or not (enable or restore):
|
|
1666
1817
|
# Show current status
|
|
1667
1818
|
console.print("๐ง DATA COLLECTION CONFIGURATION", style="bold cyan")
|
|
1668
|
-
|
|
1819
|
+
|
|
1669
1820
|
console.print("\n๐ Current Status:", style="bold blue")
|
|
1670
1821
|
console.print("โข Sample data mode: Currently DISABLED", style="green")
|
|
1671
1822
|
console.print("โข Real API calls: Currently ACTIVE", style="green")
|
|
1672
1823
|
console.print("โข Database writes: Currently WORKING", style="green")
|
|
1673
|
-
|
|
1824
|
+
|
|
1674
1825
|
console.print("\n๐ฏ Data Source Readiness:", style="bold blue")
|
|
1675
1826
|
readiness_info = [
|
|
1676
1827
|
("UK Parliament API", "โ
Active - Real API with full transaction data", "green"),
|
|
1677
|
-
("US House/Senate", "โ
Active - Real disclosure database access", "green"),
|
|
1828
|
+
("US House/Senate", "โ
Active - Real disclosure database access", "green"),
|
|
1678
1829
|
("EU Parliament", "โ
Active - Real MEP profile scraping", "green"),
|
|
1679
1830
|
("California NetFile", "โ ๏ธ Limited - Complex forms require careful handling", "yellow"),
|
|
1680
|
-
("EU Member States", "โ ๏ธ Limited - Country-specific implementations needed", "yellow")
|
|
1831
|
+
("EU Member States", "โ ๏ธ Limited - Country-specific implementations needed", "yellow"),
|
|
1681
1832
|
]
|
|
1682
|
-
|
|
1833
|
+
|
|
1683
1834
|
for source, info, color in readiness_info:
|
|
1684
1835
|
console.print(f"{info}", style=color)
|
|
1685
|
-
|
|
1836
|
+
|
|
1686
1837
|
console.print("\n๐ก Commands:", style="bold blue")
|
|
1687
1838
|
console.print("mcli politician-trading config-real-data --enable # Enable real data")
|
|
1688
1839
|
console.print("mcli politician-trading config-real-data --restore # Restore sample mode")
|
|
1689
|
-
|
|
1840
|
+
|
|
1690
1841
|
return
|
|
1691
|
-
|
|
1842
|
+
|
|
1692
1843
|
# Get scraper files
|
|
1693
1844
|
src_dir = Path(__file__).parent
|
|
1694
1845
|
scraper_files = [
|
|
1695
1846
|
"scrapers_uk.py",
|
|
1696
1847
|
"scrapers_california.py",
|
|
1697
|
-
"scrapers_eu.py",
|
|
1698
|
-
"scrapers_us_states.py"
|
|
1848
|
+
"scrapers_eu.py",
|
|
1849
|
+
"scrapers_us_states.py",
|
|
1699
1850
|
]
|
|
1700
|
-
|
|
1851
|
+
|
|
1701
1852
|
if restore:
|
|
1702
1853
|
console.print("๐ RESTORING SAMPLE DATA MODE", style="bold yellow")
|
|
1703
|
-
|
|
1854
|
+
|
|
1704
1855
|
restored = 0
|
|
1705
1856
|
for file_name in scraper_files:
|
|
1706
1857
|
file_path = src_dir / file_name
|
|
1707
1858
|
backup_path = Path(str(file_path) + ".backup")
|
|
1708
|
-
|
|
1859
|
+
|
|
1709
1860
|
if backup_path.exists():
|
|
1710
1861
|
# Restore from backup
|
|
1711
1862
|
try:
|
|
@@ -1717,70 +1868,68 @@ def configure_real_data(enable: bool, restore: bool, status: bool):
|
|
|
1717
1868
|
console.print(f"โ Failed to restore {file_name}: {e}", style="red")
|
|
1718
1869
|
else:
|
|
1719
1870
|
console.print(f"โน๏ธ No backup found for {file_name}", style="dim")
|
|
1720
|
-
|
|
1871
|
+
|
|
1721
1872
|
console.print(f"\n๐ฏ Restored {restored} files to sample mode", style="green")
|
|
1722
|
-
|
|
1873
|
+
|
|
1723
1874
|
elif enable:
|
|
1724
1875
|
console.print("๐ ENABLING REAL DATA COLLECTION", style="bold green")
|
|
1725
|
-
|
|
1876
|
+
|
|
1726
1877
|
with Progress(
|
|
1727
|
-
SpinnerColumn(),
|
|
1728
|
-
TextColumn("[progress.description]{task.description}"),
|
|
1729
|
-
console=console
|
|
1878
|
+
SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console
|
|
1730
1879
|
) as progress:
|
|
1731
1880
|
task = progress.add_task("Configuring scrapers...", total=len(scraper_files))
|
|
1732
|
-
|
|
1881
|
+
|
|
1733
1882
|
modifications_made = 0
|
|
1734
|
-
|
|
1883
|
+
|
|
1735
1884
|
for file_name in scraper_files:
|
|
1736
1885
|
progress.update(task, description=f"Processing {file_name}...")
|
|
1737
|
-
|
|
1886
|
+
|
|
1738
1887
|
file_path = src_dir / file_name
|
|
1739
|
-
|
|
1888
|
+
|
|
1740
1889
|
if not file_path.exists():
|
|
1741
1890
|
progress.advance(task)
|
|
1742
1891
|
continue
|
|
1743
|
-
|
|
1892
|
+
|
|
1744
1893
|
try:
|
|
1745
1894
|
# Read file content
|
|
1746
1895
|
content = file_path.read_text()
|
|
1747
1896
|
original_content = content
|
|
1748
|
-
|
|
1897
|
+
|
|
1749
1898
|
# Remove sample flags
|
|
1750
1899
|
content = re.sub(r'"sample":\s*True', '"sample": False', content)
|
|
1751
1900
|
content = re.sub(r"'sample':\s*True", "'sample': False", content)
|
|
1752
|
-
|
|
1901
|
+
|
|
1753
1902
|
# Enable actual processing
|
|
1754
1903
|
content = re.sub(
|
|
1755
|
-
r
|
|
1904
|
+
r"# This would implement actual (.+?) scraping",
|
|
1756
1905
|
r'logger.info("Processing real \1 data")',
|
|
1757
|
-
content
|
|
1906
|
+
content,
|
|
1758
1907
|
)
|
|
1759
|
-
|
|
1908
|
+
|
|
1760
1909
|
if content != original_content:
|
|
1761
1910
|
# Backup original
|
|
1762
1911
|
backup_path = str(file_path) + ".backup"
|
|
1763
1912
|
Path(backup_path).write_text(original_content)
|
|
1764
|
-
|
|
1913
|
+
|
|
1765
1914
|
# Write modified content
|
|
1766
1915
|
file_path.write_text(content)
|
|
1767
1916
|
modifications_made += 1
|
|
1768
|
-
|
|
1917
|
+
|
|
1769
1918
|
except Exception as e:
|
|
1770
1919
|
console.print(f"โ Error processing {file_name}: {e}", style="red")
|
|
1771
|
-
|
|
1920
|
+
|
|
1772
1921
|
progress.advance(task)
|
|
1773
|
-
|
|
1922
|
+
|
|
1774
1923
|
console.print(f"\nโ
Real data configuration complete!", style="bold green")
|
|
1775
1924
|
console.print(f"Modified {modifications_made} scraper files", style="green")
|
|
1776
|
-
|
|
1925
|
+
|
|
1777
1926
|
if modifications_made > 0:
|
|
1778
1927
|
console.print(f"\nโ ๏ธ Important Next Steps:", style="bold yellow")
|
|
1779
1928
|
console.print("1. Test with UK Parliament first (most reliable)", style="dim")
|
|
1780
1929
|
console.print("2. Monitor API rate limits carefully", style="dim")
|
|
1781
1930
|
console.print("3. Check logs for parsing errors", style="dim")
|
|
1782
1931
|
console.print("4. Use --restore flag if issues occur", style="dim")
|
|
1783
|
-
|
|
1932
|
+
|
|
1784
1933
|
console.print(f"\n๐งช Test Commands:", style="bold blue")
|
|
1785
1934
|
console.print("mcli politician-trading cron run --type quick # Quick test")
|
|
1786
1935
|
console.print("mcli politician-trading monitor # Check results")
|