mcli-framework 7.1.3__py3-none-any.whl → 7.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/main.py +10 -0
- mcli/lib/custom_commands.py +424 -0
- mcli/lib/paths.py +12 -0
- mcli/ml/dashboard/app.py +13 -13
- mcli/ml/dashboard/app_integrated.py +1292 -148
- mcli/ml/dashboard/app_supabase.py +46 -21
- mcli/ml/dashboard/app_training.py +14 -14
- mcli/ml/dashboard/components/charts.py +258 -0
- mcli/ml/dashboard/components/metrics.py +125 -0
- mcli/ml/dashboard/components/tables.py +228 -0
- mcli/ml/dashboard/pages/cicd.py +382 -0
- mcli/ml/dashboard/pages/predictions_enhanced.py +820 -0
- mcli/ml/dashboard/pages/scrapers_and_logs.py +1060 -0
- mcli/ml/dashboard/pages/workflows.py +533 -0
- mcli/ml/training/train_model.py +569 -0
- mcli/self/self_cmd.py +322 -94
- mcli/workflow/politician_trading/data_sources.py +259 -1
- mcli/workflow/politician_trading/models.py +159 -1
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +846 -0
- mcli/workflow/politician_trading/scrapers_free_sources.py +516 -0
- mcli/workflow/politician_trading/scrapers_third_party.py +391 -0
- mcli/workflow/politician_trading/seed_database.py +539 -0
- mcli/workflow/workflow.py +8 -27
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/RECORD +29 -25
- mcli/workflow/daemon/api_daemon.py +0 -800
- mcli/workflow/daemon/commands.py +0 -1196
- mcli/workflow/dashboard/dashboard_cmd.py +0 -120
- mcli/workflow/file/file.py +0 -100
- mcli/workflow/git_commit/commands.py +0 -430
- mcli/workflow/politician_trading/commands.py +0 -1939
- mcli/workflow/scheduler/commands.py +0 -493
- mcli/workflow/sync/sync_cmd.py +0 -437
- mcli/workflow/videos/videos.py +0 -242
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,1939 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
CLI commands for politician trading workflow
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import asyncio
|
|
6
|
-
import json
|
|
7
|
-
import os
|
|
8
|
-
import re
|
|
9
|
-
from datetime import datetime, timedelta
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from typing import Any, Dict, List
|
|
12
|
-
|
|
13
|
-
import click
|
|
14
|
-
from rich.console import Console
|
|
15
|
-
from rich.json import JSON
|
|
16
|
-
from rich.panel import Panel
|
|
17
|
-
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
18
|
-
from rich.table import Table
|
|
19
|
-
|
|
20
|
-
from mcli.lib.logger.logger import get_logger
|
|
21
|
-
|
|
22
|
-
from .config import WorkflowConfig
|
|
23
|
-
from .connectivity import (
|
|
24
|
-
SupabaseConnectivityValidator,
|
|
25
|
-
run_connectivity_validation,
|
|
26
|
-
run_continuous_monitoring,
|
|
27
|
-
)
|
|
28
|
-
from .database import PoliticianTradingDB
|
|
29
|
-
from .monitoring import PoliticianTradingMonitor, run_health_check, run_stats_report
|
|
30
|
-
from .workflow import (
|
|
31
|
-
PoliticianTradingWorkflow,
|
|
32
|
-
check_politician_trading_status,
|
|
33
|
-
run_politician_trading_collection,
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
logger = get_logger(__name__)
|
|
37
|
-
console = Console()
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@click.group(name="politician-trading")
|
|
41
|
-
def politician_trading_cli():
|
|
42
|
-
"""Manage politician trading data collection workflow"""
|
|
43
|
-
pass
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
@politician_trading_cli.command("run")
|
|
47
|
-
@click.option("--full", is_flag=True, help="Run full data collection (default)")
|
|
48
|
-
@click.option("--us-only", is_flag=True, help="Only collect US Congress data")
|
|
49
|
-
@click.option("--eu-only", is_flag=True, help="Only collect EU Parliament data")
|
|
50
|
-
def run_collection(full: bool, us_only: bool, eu_only: bool):
|
|
51
|
-
"""Run politician trading data collection"""
|
|
52
|
-
console.print("🏛️ Starting Politician Trading Data Collection", style="bold cyan")
|
|
53
|
-
|
|
54
|
-
try:
|
|
55
|
-
if us_only:
|
|
56
|
-
console.print("Collecting US Congress data only...", style="yellow")
|
|
57
|
-
# Would implement US-only collection
|
|
58
|
-
result = asyncio.run(run_politician_trading_collection())
|
|
59
|
-
elif eu_only:
|
|
60
|
-
console.print("Collecting EU Parliament data only...", style="yellow")
|
|
61
|
-
# Would implement EU-only collection
|
|
62
|
-
result = asyncio.run(run_politician_trading_collection())
|
|
63
|
-
else:
|
|
64
|
-
console.print("Running full data collection...", style="green")
|
|
65
|
-
result = asyncio.run(run_politician_trading_collection())
|
|
66
|
-
|
|
67
|
-
# Display results
|
|
68
|
-
if result.get("status") == "completed":
|
|
69
|
-
console.print("✅ Collection completed successfully!", style="bold green")
|
|
70
|
-
|
|
71
|
-
# Create summary table
|
|
72
|
-
table = Table(title="Collection Summary")
|
|
73
|
-
table.add_column("Metric", style="cyan")
|
|
74
|
-
table.add_column("Value", style="green")
|
|
75
|
-
|
|
76
|
-
summary = result.get("summary", {})
|
|
77
|
-
table.add_row("New Disclosures", str(summary.get("total_new_disclosures", 0)))
|
|
78
|
-
table.add_row("Updated Disclosures", str(summary.get("total_updated_disclosures", 0)))
|
|
79
|
-
table.add_row("Errors", str(len(summary.get("errors", []))))
|
|
80
|
-
table.add_row(
|
|
81
|
-
"Duration",
|
|
82
|
-
_calculate_duration(result.get("started_at"), result.get("completed_at")),
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
console.print(table)
|
|
86
|
-
|
|
87
|
-
# Show job details
|
|
88
|
-
jobs = result.get("jobs", {})
|
|
89
|
-
for job_name, job_data in jobs.items():
|
|
90
|
-
job_panel = Panel(
|
|
91
|
-
f"Status: {job_data.get('status', 'unknown')}\n"
|
|
92
|
-
f"New: {job_data.get('new_disclosures', 0)} | "
|
|
93
|
-
f"Updated: {job_data.get('updated_disclosures', 0)} | "
|
|
94
|
-
f"Errors: {len(job_data.get('errors', []))}",
|
|
95
|
-
title=f"📊 {job_name.upper()} Job",
|
|
96
|
-
border_style="green",
|
|
97
|
-
)
|
|
98
|
-
console.print(job_panel)
|
|
99
|
-
else:
|
|
100
|
-
console.print("❌ Collection failed!", style="bold red")
|
|
101
|
-
if "error" in result:
|
|
102
|
-
console.print(f"Error: {result['error']}", style="red")
|
|
103
|
-
|
|
104
|
-
except Exception as e:
|
|
105
|
-
console.print(f"❌ Command failed: {e}", style="bold red")
|
|
106
|
-
logger.error(f"Collection command failed: {e}")
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
@politician_trading_cli.command("status")
|
|
110
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
111
|
-
def check_status(output_json: bool):
|
|
112
|
-
"""Check current status of politician trading data collection"""
|
|
113
|
-
try:
|
|
114
|
-
status = asyncio.run(check_politician_trading_status())
|
|
115
|
-
|
|
116
|
-
if output_json:
|
|
117
|
-
console.print(JSON.from_data(status))
|
|
118
|
-
return
|
|
119
|
-
|
|
120
|
-
# Display formatted status
|
|
121
|
-
console.print("🏛️ Politician Trading Data Status", style="bold cyan")
|
|
122
|
-
|
|
123
|
-
# Overall status
|
|
124
|
-
if "error" in status:
|
|
125
|
-
console.print(f"❌ Status check failed: {status['error']}", style="red")
|
|
126
|
-
return
|
|
127
|
-
|
|
128
|
-
# Summary panel
|
|
129
|
-
summary_text = f"""Database Connection: {status.get('database_connection', 'unknown')}
|
|
130
|
-
Configuration: {status.get('config_loaded', 'unknown')}
|
|
131
|
-
Total Disclosures: {status.get('total_disclosures', 0):,}
|
|
132
|
-
Today's New Records: {status.get('recent_disclosures_today', 0):,}
|
|
133
|
-
Last Update: {status.get('timestamp', 'unknown')}"""
|
|
134
|
-
|
|
135
|
-
summary_panel = Panel(summary_text, title="📈 System Status", border_style="blue")
|
|
136
|
-
console.print(summary_panel)
|
|
137
|
-
|
|
138
|
-
# Recent jobs table
|
|
139
|
-
recent_jobs = status.get("recent_jobs", [])
|
|
140
|
-
if recent_jobs:
|
|
141
|
-
jobs_table = Table(title="Recent Jobs")
|
|
142
|
-
jobs_table.add_column("Job Type", style="cyan")
|
|
143
|
-
jobs_table.add_column("Status", style="green")
|
|
144
|
-
jobs_table.add_column("Started", style="yellow")
|
|
145
|
-
jobs_table.add_column("Records", justify="right", style="magenta")
|
|
146
|
-
jobs_table.add_column("Duration", style="blue")
|
|
147
|
-
|
|
148
|
-
for job in recent_jobs[:5]: # Show last 5 jobs
|
|
149
|
-
status_style = (
|
|
150
|
-
"green"
|
|
151
|
-
if job.get("status") == "completed"
|
|
152
|
-
else "red" if job.get("status") == "failed" else "yellow"
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
jobs_table.add_row(
|
|
156
|
-
job.get("job_type", ""),
|
|
157
|
-
f"[{status_style}]{job.get('status', '')}[/{status_style}]",
|
|
158
|
-
_format_timestamp(job.get("started_at")),
|
|
159
|
-
str(job.get("records_processed", 0)),
|
|
160
|
-
_calculate_duration(job.get("started_at"), job.get("completed_at")),
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
console.print(jobs_table)
|
|
164
|
-
|
|
165
|
-
except Exception as e:
|
|
166
|
-
console.print(f"❌ Status check failed: {e}", style="bold red")
|
|
167
|
-
logger.error(f"Status command failed: {e}")
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
@politician_trading_cli.command("setup")
|
|
171
|
-
@click.option("--create-tables", is_flag=True, help="Create database tables")
|
|
172
|
-
@click.option("--verify", is_flag=True, help="Verify configuration and connection")
|
|
173
|
-
@click.option("--generate-schema", is_flag=True, help="Generate schema SQL file")
|
|
174
|
-
@click.option("--output-dir", default=".", help="Directory to save generated files")
|
|
175
|
-
def setup_workflow(create_tables: bool, verify: bool, generate_schema: bool, output_dir: str):
|
|
176
|
-
"""Setup politician trading workflow"""
|
|
177
|
-
console.print("🔧 Setting up Politician Trading Workflow", style="bold blue")
|
|
178
|
-
|
|
179
|
-
try:
|
|
180
|
-
config = WorkflowConfig.default()
|
|
181
|
-
workflow = PoliticianTradingWorkflow(config)
|
|
182
|
-
|
|
183
|
-
if verify:
|
|
184
|
-
console.print("Verifying configuration and database connection...")
|
|
185
|
-
|
|
186
|
-
# Test database connection
|
|
187
|
-
try:
|
|
188
|
-
status = asyncio.run(workflow.run_quick_check())
|
|
189
|
-
if "error" not in status:
|
|
190
|
-
console.print("✅ Database connection successful", style="green")
|
|
191
|
-
console.print("✅ Configuration loaded", style="green")
|
|
192
|
-
|
|
193
|
-
# Display config summary
|
|
194
|
-
config_text = f"""Supabase URL: {config.supabase.url}
|
|
195
|
-
Request Delay: {config.scraping.request_delay}s
|
|
196
|
-
Max Retries: {config.scraping.max_retries}
|
|
197
|
-
Timeout: {config.scraping.timeout}s"""
|
|
198
|
-
|
|
199
|
-
config_panel = Panel(config_text, title="🔧 Configuration", border_style="blue")
|
|
200
|
-
console.print(config_panel)
|
|
201
|
-
else:
|
|
202
|
-
console.print(f"❌ Verification failed: {status['error']}", style="red")
|
|
203
|
-
except Exception as e:
|
|
204
|
-
console.print(f"❌ Verification failed: {e}", style="red")
|
|
205
|
-
|
|
206
|
-
if generate_schema:
|
|
207
|
-
console.print("📄 Generating database schema files...", style="blue")
|
|
208
|
-
|
|
209
|
-
# Generate schema file
|
|
210
|
-
import os
|
|
211
|
-
from pathlib import Path
|
|
212
|
-
|
|
213
|
-
output_path = Path(output_dir)
|
|
214
|
-
output_path.mkdir(exist_ok=True)
|
|
215
|
-
|
|
216
|
-
# Read the schema SQL from the module
|
|
217
|
-
schema_file = Path(__file__).parent / "schema.sql"
|
|
218
|
-
if schema_file.exists():
|
|
219
|
-
schema_content = schema_file.read_text()
|
|
220
|
-
|
|
221
|
-
# Write to output directory
|
|
222
|
-
output_schema_file = output_path / "politician_trading_schema.sql"
|
|
223
|
-
output_schema_file.write_text(schema_content)
|
|
224
|
-
|
|
225
|
-
console.print(
|
|
226
|
-
f"✅ Schema SQL generated: {output_schema_file.absolute()}", style="green"
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
# Also generate a setup instructions file
|
|
230
|
-
instructions = f"""# Politician Trading Database Setup Instructions
|
|
231
|
-
|
|
232
|
-
## Step 1: Create Database Schema
|
|
233
|
-
|
|
234
|
-
1. Open your Supabase SQL editor: https://supabase.com/dashboard/project/{config.supabase.url.split('//')[1].split('.')[0]}/sql/new
|
|
235
|
-
2. Copy and paste the contents of: {output_schema_file.absolute()}
|
|
236
|
-
3. Execute the SQL to create all tables, indexes, and triggers
|
|
237
|
-
|
|
238
|
-
## Step 2: Verify Setup
|
|
239
|
-
|
|
240
|
-
Run the following command to verify everything is working:
|
|
241
|
-
|
|
242
|
-
```bash
|
|
243
|
-
politician-trading setup --verify
|
|
244
|
-
```
|
|
245
|
-
|
|
246
|
-
## Step 3: Test Connectivity
|
|
247
|
-
|
|
248
|
-
```bash
|
|
249
|
-
politician-trading connectivity
|
|
250
|
-
```
|
|
251
|
-
|
|
252
|
-
## Step 4: Run First Collection
|
|
253
|
-
|
|
254
|
-
```bash
|
|
255
|
-
politician-trading test-workflow --verbose
|
|
256
|
-
```
|
|
257
|
-
|
|
258
|
-
## Step 5: Setup Automated Collection (Optional)
|
|
259
|
-
|
|
260
|
-
```bash
|
|
261
|
-
politician-trading cron-job --create
|
|
262
|
-
```
|
|
263
|
-
|
|
264
|
-
## Database Tables Created
|
|
265
|
-
|
|
266
|
-
- **politicians**: Stores politician information (US Congress, EU Parliament)
|
|
267
|
-
- **trading_disclosures**: Individual trading transactions/disclosures
|
|
268
|
-
- **data_pull_jobs**: Job execution tracking and status
|
|
269
|
-
- **data_sources**: Data source configuration and health
|
|
270
|
-
|
|
271
|
-
## Troubleshooting
|
|
272
|
-
|
|
273
|
-
If you encounter issues:
|
|
274
|
-
|
|
275
|
-
1. Check connectivity: `politician-trading connectivity --json`
|
|
276
|
-
2. View logs: `politician-trading health`
|
|
277
|
-
3. Test workflow: `politician-trading test-workflow --verbose`
|
|
278
|
-
"""
|
|
279
|
-
|
|
280
|
-
instructions_file = output_path / "SETUP_INSTRUCTIONS.md"
|
|
281
|
-
instructions_file.write_text(instructions)
|
|
282
|
-
|
|
283
|
-
console.print(
|
|
284
|
-
f"✅ Setup instructions generated: {instructions_file.absolute()}",
|
|
285
|
-
style="green",
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
# Display summary
|
|
289
|
-
console.print("\n📋 Generated Files:", style="bold")
|
|
290
|
-
console.print(f" 📄 Schema SQL: {output_schema_file.name}")
|
|
291
|
-
console.print(f" 📋 Instructions: {instructions_file.name}")
|
|
292
|
-
console.print(f" 📁 Location: {output_path.absolute()}")
|
|
293
|
-
|
|
294
|
-
console.print("\n🚀 Next Steps:", style="bold green")
|
|
295
|
-
console.print("1. Open Supabase SQL editor")
|
|
296
|
-
console.print(f"2. Execute SQL from: {output_schema_file.name}")
|
|
297
|
-
console.print("3. Run: politician-trading setup --verify")
|
|
298
|
-
console.print("4. Run: politician-trading test-workflow --verbose")
|
|
299
|
-
|
|
300
|
-
else:
|
|
301
|
-
console.print("❌ Schema template not found", style="red")
|
|
302
|
-
|
|
303
|
-
if create_tables:
|
|
304
|
-
console.print("Creating database tables...")
|
|
305
|
-
schema_ok = asyncio.run(workflow.db.ensure_schema())
|
|
306
|
-
if schema_ok:
|
|
307
|
-
console.print("✅ Database schema verified", style="green")
|
|
308
|
-
else:
|
|
309
|
-
console.print("⚠️ Database schema needs to be created manually", style="yellow")
|
|
310
|
-
console.print("💡 Run: politician-trading setup --generate-schema", style="blue")
|
|
311
|
-
|
|
312
|
-
except Exception as e:
|
|
313
|
-
console.print(f"❌ Setup failed: {e}", style="bold red")
|
|
314
|
-
logger.error(f"Setup command failed: {e}")
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
@politician_trading_cli.command("cron-job")
|
|
318
|
-
@click.option("--create", is_flag=True, help="Show how to create Supabase cron job")
|
|
319
|
-
@click.option("--test", is_flag=True, help="Test the cron job function")
|
|
320
|
-
def manage_cron_job(create: bool, test: bool):
|
|
321
|
-
"""Manage Supabase cron job for automated data collection"""
|
|
322
|
-
|
|
323
|
-
if create:
|
|
324
|
-
console.print("🕒 Creating Supabase Cron Job", style="bold blue")
|
|
325
|
-
|
|
326
|
-
cron_sql = """
|
|
327
|
-
-- Create cron job for politician trading data collection
|
|
328
|
-
SELECT cron.schedule(
|
|
329
|
-
'politician-trading-collection',
|
|
330
|
-
'0 */6 * * *', -- Every 6 hours
|
|
331
|
-
$$
|
|
332
|
-
SELECT net.http_post(
|
|
333
|
-
url := 'https://your-function-url.supabase.co/functions/v1/politician-trading-collect',
|
|
334
|
-
headers := '{"Content-Type": "application/json", "Authorization": "Bearer YOUR_ANON_KEY"}'::jsonb,
|
|
335
|
-
body := '{}'::jsonb
|
|
336
|
-
) as request_id;
|
|
337
|
-
$$
|
|
338
|
-
);
|
|
339
|
-
|
|
340
|
-
-- Check cron job status
|
|
341
|
-
SELECT * FROM cron.job;
|
|
342
|
-
"""
|
|
343
|
-
|
|
344
|
-
console.print("Add this SQL to your Supabase SQL editor:", style="green")
|
|
345
|
-
console.print(Panel(cron_sql, title="📝 Cron Job SQL", border_style="green"))
|
|
346
|
-
|
|
347
|
-
console.print("\n📋 Next steps:", style="bold blue")
|
|
348
|
-
console.print("1. Create an Edge Function in Supabase for the collection endpoint")
|
|
349
|
-
console.print("2. Update the URL in the cron job SQL above")
|
|
350
|
-
console.print("3. Execute the SQL in your Supabase dashboard")
|
|
351
|
-
console.print("4. Monitor the job with: SELECT * FROM cron.job_run_details;")
|
|
352
|
-
|
|
353
|
-
if test:
|
|
354
|
-
console.print("🧪 Testing cron job function...", style="yellow")
|
|
355
|
-
try:
|
|
356
|
-
result = asyncio.run(run_politician_trading_collection())
|
|
357
|
-
console.print("✅ Cron job function test completed", style="green")
|
|
358
|
-
console.print(JSON.from_data(result))
|
|
359
|
-
except Exception as e:
|
|
360
|
-
console.print(f"❌ Cron job test failed: {e}", style="red")
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
@politician_trading_cli.command("health")
|
|
364
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
365
|
-
def check_health(output_json: bool):
|
|
366
|
-
"""Check system health and status"""
|
|
367
|
-
try:
|
|
368
|
-
health = asyncio.run(run_health_check())
|
|
369
|
-
|
|
370
|
-
if output_json:
|
|
371
|
-
console.print(JSON.from_data(health))
|
|
372
|
-
else:
|
|
373
|
-
monitor = PoliticianTradingMonitor()
|
|
374
|
-
monitor.display_health_report(health)
|
|
375
|
-
|
|
376
|
-
except Exception as e:
|
|
377
|
-
console.print(f"❌ Health check failed: {e}", style="bold red")
|
|
378
|
-
logger.error(f"Health check command failed: {e}")
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
@politician_trading_cli.command("stats")
|
|
382
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
383
|
-
def show_stats(output_json: bool):
|
|
384
|
-
"""Show detailed statistics"""
|
|
385
|
-
try:
|
|
386
|
-
stats = asyncio.run(run_stats_report())
|
|
387
|
-
|
|
388
|
-
if output_json:
|
|
389
|
-
console.print(JSON.from_data(stats))
|
|
390
|
-
else:
|
|
391
|
-
monitor = PoliticianTradingMonitor()
|
|
392
|
-
monitor.display_stats_report(stats)
|
|
393
|
-
|
|
394
|
-
except Exception as e:
|
|
395
|
-
console.print(f"❌ Stats generation failed: {e}", style="bold red")
|
|
396
|
-
logger.error(f"Stats command failed: {e}")
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
@politician_trading_cli.command("monitor")
|
|
400
|
-
@click.option("--interval", default=30, help="Check interval in seconds")
|
|
401
|
-
@click.option("--count", default=0, help="Number of checks (0 = infinite)")
|
|
402
|
-
def continuous_monitor(interval: int, count: int):
|
|
403
|
-
"""Continuously monitor system health"""
|
|
404
|
-
console.print(f"🔄 Starting continuous monitoring (interval: {interval}s)", style="bold blue")
|
|
405
|
-
|
|
406
|
-
async def monitor_loop():
|
|
407
|
-
monitor = PoliticianTradingMonitor()
|
|
408
|
-
check_count = 0
|
|
409
|
-
|
|
410
|
-
while True:
|
|
411
|
-
try:
|
|
412
|
-
console.clear()
|
|
413
|
-
console.print(
|
|
414
|
-
f"Check #{check_count + 1} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
|
415
|
-
style="dim",
|
|
416
|
-
)
|
|
417
|
-
|
|
418
|
-
health = await monitor.get_system_health()
|
|
419
|
-
monitor.display_health_report(health)
|
|
420
|
-
|
|
421
|
-
check_count += 1
|
|
422
|
-
if count > 0 and check_count >= count:
|
|
423
|
-
break
|
|
424
|
-
|
|
425
|
-
if count == 0 or check_count < count:
|
|
426
|
-
console.print(
|
|
427
|
-
f"\n⏱️ Next check in {interval} seconds... (Ctrl+C to stop)", style="dim"
|
|
428
|
-
)
|
|
429
|
-
await asyncio.sleep(interval)
|
|
430
|
-
|
|
431
|
-
except KeyboardInterrupt:
|
|
432
|
-
console.print("\n👋 Monitoring stopped by user", style="yellow")
|
|
433
|
-
break
|
|
434
|
-
except Exception as e:
|
|
435
|
-
console.print(f"❌ Monitor check failed: {e}", style="red")
|
|
436
|
-
await asyncio.sleep(interval)
|
|
437
|
-
|
|
438
|
-
try:
|
|
439
|
-
asyncio.run(monitor_loop())
|
|
440
|
-
except Exception as e:
|
|
441
|
-
console.print(f"❌ Monitoring failed: {e}", style="bold red")
|
|
442
|
-
logger.error(f"Monitor command failed: {e}")
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
@politician_trading_cli.command("connectivity")
|
|
446
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
447
|
-
@click.option("--continuous", is_flag=True, help="Run continuous monitoring")
|
|
448
|
-
@click.option("--interval", default=30, help="Check interval in seconds (continuous mode)")
|
|
449
|
-
@click.option("--duration", default=0, help="Duration in minutes (0 = infinite)")
|
|
450
|
-
def check_connectivity(output_json: bool, continuous: bool, interval: int, duration: int):
|
|
451
|
-
"""Test Supabase connectivity and database operations"""
|
|
452
|
-
if continuous:
|
|
453
|
-
console.print(f"🔄 Starting continuous connectivity monitoring", style="bold blue")
|
|
454
|
-
try:
|
|
455
|
-
asyncio.run(run_continuous_monitoring(interval, duration))
|
|
456
|
-
except Exception as e:
|
|
457
|
-
console.print(f"❌ Continuous monitoring failed: {e}", style="bold red")
|
|
458
|
-
logger.error(f"Continuous monitoring failed: {e}")
|
|
459
|
-
else:
|
|
460
|
-
try:
|
|
461
|
-
validation_result = asyncio.run(run_connectivity_validation())
|
|
462
|
-
|
|
463
|
-
if output_json:
|
|
464
|
-
console.print(JSON.from_data(validation_result))
|
|
465
|
-
else:
|
|
466
|
-
validator = SupabaseConnectivityValidator()
|
|
467
|
-
validator.display_connectivity_report(validation_result)
|
|
468
|
-
|
|
469
|
-
except Exception as e:
|
|
470
|
-
console.print(f"❌ Connectivity validation failed: {e}", style="bold red")
|
|
471
|
-
logger.error(f"Connectivity validation failed: {e}")
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
@politician_trading_cli.command("test-workflow")
|
|
475
|
-
@click.option("--verbose", "-v", is_flag=True, help="Verbose output")
|
|
476
|
-
@click.option("--validate-writes", is_flag=True, help="Validate database writes")
|
|
477
|
-
def test_full_workflow(verbose: bool, validate_writes: bool):
|
|
478
|
-
"""Run a complete workflow test with live Supabase connectivity"""
|
|
479
|
-
console.print("🧪 Running Full Politician Trading Workflow Test", style="bold green")
|
|
480
|
-
|
|
481
|
-
async def run_test():
|
|
482
|
-
# First validate connectivity
|
|
483
|
-
console.print("\n🔗 Step 1: Validating Supabase connectivity...", style="blue")
|
|
484
|
-
validator = SupabaseConnectivityValidator()
|
|
485
|
-
connectivity_result = await validator.validate_connectivity()
|
|
486
|
-
|
|
487
|
-
if verbose:
|
|
488
|
-
validator.display_connectivity_report(connectivity_result)
|
|
489
|
-
else:
|
|
490
|
-
console.print(
|
|
491
|
-
f"Connectivity Score: {connectivity_result['connectivity_score']}%", style="cyan"
|
|
492
|
-
)
|
|
493
|
-
|
|
494
|
-
if connectivity_result["connectivity_score"] < 75:
|
|
495
|
-
console.print("⚠️ Connectivity issues detected. Workflow may fail.", style="yellow")
|
|
496
|
-
|
|
497
|
-
# Run the workflow
|
|
498
|
-
console.print("\n🏛️ Step 2: Running politician trading collection workflow...", style="blue")
|
|
499
|
-
|
|
500
|
-
try:
|
|
501
|
-
with console.status("[bold blue]Executing workflow...") as status:
|
|
502
|
-
workflow_result = await run_politician_trading_collection()
|
|
503
|
-
|
|
504
|
-
# Display workflow results
|
|
505
|
-
console.print("\n📊 Workflow Results:", style="bold")
|
|
506
|
-
|
|
507
|
-
if workflow_result.get("status") == "completed":
|
|
508
|
-
console.print("✅ Workflow completed successfully!", style="green")
|
|
509
|
-
|
|
510
|
-
summary = workflow_result.get("summary", {})
|
|
511
|
-
console.print(f"New Disclosures: {summary.get('total_new_disclosures', 0)}")
|
|
512
|
-
console.print(f"Updated Disclosures: {summary.get('total_updated_disclosures', 0)}")
|
|
513
|
-
console.print(f"Errors: {len(summary.get('errors', []))}")
|
|
514
|
-
|
|
515
|
-
if verbose and summary.get("errors"):
|
|
516
|
-
console.print("\nErrors encountered:", style="red")
|
|
517
|
-
for error in summary["errors"][:5]: # Show first 5 errors
|
|
518
|
-
console.print(f" • {error}", style="dim red")
|
|
519
|
-
|
|
520
|
-
else:
|
|
521
|
-
console.print("❌ Workflow failed!", style="red")
|
|
522
|
-
if "error" in workflow_result:
|
|
523
|
-
console.print(f"Error: {workflow_result['error']}", style="red")
|
|
524
|
-
|
|
525
|
-
# Validate writes if requested
|
|
526
|
-
if validate_writes:
|
|
527
|
-
console.print("\n🔍 Step 3: Validating database writes...", style="blue")
|
|
528
|
-
write_validation = await validator._test_write_operations()
|
|
529
|
-
|
|
530
|
-
if write_validation["success"]:
|
|
531
|
-
console.print("✅ Database writes validated successfully", style="green")
|
|
532
|
-
else:
|
|
533
|
-
console.print(
|
|
534
|
-
f"❌ Database write validation failed: {write_validation.get('error', 'Unknown error')}",
|
|
535
|
-
style="red",
|
|
536
|
-
)
|
|
537
|
-
|
|
538
|
-
# Final connectivity check
|
|
539
|
-
console.print("\n🔗 Step 4: Post-workflow connectivity check...", style="blue")
|
|
540
|
-
final_connectivity = await validator.validate_connectivity()
|
|
541
|
-
|
|
542
|
-
console.print(
|
|
543
|
-
f"Final Connectivity Score: {final_connectivity['connectivity_score']}%",
|
|
544
|
-
style="cyan",
|
|
545
|
-
)
|
|
546
|
-
|
|
547
|
-
# Summary
|
|
548
|
-
console.print("\n📋 Test Summary:", style="bold")
|
|
549
|
-
workflow_status = (
|
|
550
|
-
"✅ PASSED" if workflow_result.get("status") == "completed" else "❌ FAILED"
|
|
551
|
-
)
|
|
552
|
-
connectivity_status = (
|
|
553
|
-
"✅ GOOD" if final_connectivity["connectivity_score"] >= 75 else "⚠️ DEGRADED"
|
|
554
|
-
)
|
|
555
|
-
|
|
556
|
-
console.print(f"Workflow: {workflow_status}")
|
|
557
|
-
console.print(f"Connectivity: {connectivity_status}")
|
|
558
|
-
console.print(
|
|
559
|
-
f"Duration: {workflow_result.get('started_at', '')} to {workflow_result.get('completed_at', '')}"
|
|
560
|
-
)
|
|
561
|
-
|
|
562
|
-
return {
|
|
563
|
-
"workflow_result": workflow_result,
|
|
564
|
-
"connectivity_result": final_connectivity,
|
|
565
|
-
"test_passed": workflow_result.get("status") == "completed"
|
|
566
|
-
and final_connectivity["connectivity_score"] >= 75,
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
except Exception as e:
|
|
570
|
-
console.print(f"❌ Workflow test failed: {e}", style="bold red")
|
|
571
|
-
if verbose:
|
|
572
|
-
console.print_exception()
|
|
573
|
-
return {"error": str(e), "test_passed": False}
|
|
574
|
-
|
|
575
|
-
try:
|
|
576
|
-
test_result = asyncio.run(run_test())
|
|
577
|
-
|
|
578
|
-
if test_result.get("test_passed"):
|
|
579
|
-
console.print("\n🎉 Full workflow test PASSED!", style="bold green")
|
|
580
|
-
else:
|
|
581
|
-
console.print("\n❌ Full workflow test FAILED!", style="bold red")
|
|
582
|
-
|
|
583
|
-
except Exception as e:
|
|
584
|
-
console.print(f"❌ Test execution failed: {e}", style="bold red")
|
|
585
|
-
logger.error(f"Test workflow command failed: {e}")
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
@politician_trading_cli.command("schema")
|
|
589
|
-
@click.option("--show-location", is_flag=True, help="Show schema file location")
|
|
590
|
-
@click.option("--generate", is_flag=True, help="Generate schema files")
|
|
591
|
-
@click.option("--output-dir", default=".", help="Output directory for generated files")
|
|
592
|
-
def manage_schema(show_location: bool, generate: bool, output_dir: str):
|
|
593
|
-
"""Manage database schema files"""
|
|
594
|
-
|
|
595
|
-
if show_location:
|
|
596
|
-
console.print("📁 Schema File Locations", style="bold blue")
|
|
597
|
-
|
|
598
|
-
from pathlib import Path
|
|
599
|
-
|
|
600
|
-
schema_file = Path(__file__).parent / "schema.sql"
|
|
601
|
-
|
|
602
|
-
console.print(f"Built-in Schema: {schema_file.absolute()}", style="cyan")
|
|
603
|
-
console.print(f"File size: {schema_file.stat().st_size} bytes", style="dim")
|
|
604
|
-
console.print(
|
|
605
|
-
f"Exists: {'✅ Yes' if schema_file.exists() else '❌ No'}",
|
|
606
|
-
style="green" if schema_file.exists() else "red",
|
|
607
|
-
)
|
|
608
|
-
|
|
609
|
-
# Show current working directory option
|
|
610
|
-
cwd_schema = Path.cwd() / "politician_trading_schema.sql"
|
|
611
|
-
console.print(f"\nCurrent directory: {cwd_schema.absolute()}", style="cyan")
|
|
612
|
-
console.print(
|
|
613
|
-
f"Exists: {'✅ Yes' if cwd_schema.exists() else '❌ No'}",
|
|
614
|
-
style="green" if cwd_schema.exists() else "dim",
|
|
615
|
-
)
|
|
616
|
-
|
|
617
|
-
if not cwd_schema.exists():
|
|
618
|
-
console.print("\n💡 To generate schema file here:", style="blue")
|
|
619
|
-
console.print("politician-trading schema --generate", style="yellow")
|
|
620
|
-
|
|
621
|
-
elif generate:
|
|
622
|
-
# Reuse the setup command logic
|
|
623
|
-
try:
|
|
624
|
-
import os
|
|
625
|
-
from pathlib import Path
|
|
626
|
-
|
|
627
|
-
console.print("📄 Generating database schema files...", style="blue")
|
|
628
|
-
|
|
629
|
-
output_path = Path(output_dir)
|
|
630
|
-
output_path.mkdir(exist_ok=True)
|
|
631
|
-
|
|
632
|
-
# Read the schema SQL from the module
|
|
633
|
-
schema_file = Path(__file__).parent / "schema.sql"
|
|
634
|
-
if schema_file.exists():
|
|
635
|
-
schema_content = schema_file.read_text()
|
|
636
|
-
|
|
637
|
-
# Write to output directory
|
|
638
|
-
output_schema_file = output_path / "politician_trading_schema.sql"
|
|
639
|
-
output_schema_file.write_text(schema_content)
|
|
640
|
-
|
|
641
|
-
console.print(
|
|
642
|
-
f"✅ Schema SQL generated: {output_schema_file.absolute()}", style="green"
|
|
643
|
-
)
|
|
644
|
-
|
|
645
|
-
# Show file info
|
|
646
|
-
console.print(f"📊 File size: {output_schema_file.stat().st_size:,} bytes")
|
|
647
|
-
console.print(f"📅 Created: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
648
|
-
|
|
649
|
-
# Count SQL statements
|
|
650
|
-
statements = len(
|
|
651
|
-
[
|
|
652
|
-
line
|
|
653
|
-
for line in schema_content.split("\n")
|
|
654
|
-
if line.strip().startswith(("CREATE", "INSERT", "SELECT"))
|
|
655
|
-
]
|
|
656
|
-
)
|
|
657
|
-
console.print(f"📝 SQL statements: {statements}")
|
|
658
|
-
|
|
659
|
-
else:
|
|
660
|
-
console.print("❌ Schema template not found", style="red")
|
|
661
|
-
|
|
662
|
-
except Exception as e:
|
|
663
|
-
console.print(f"❌ Schema generation failed: {e}", style="red")
|
|
664
|
-
|
|
665
|
-
else:
|
|
666
|
-
# Show schema information by default
|
|
667
|
-
console.print("🗂️ Politician Trading Database Schema", style="bold blue")
|
|
668
|
-
|
|
669
|
-
schema_info = [
|
|
670
|
-
(
|
|
671
|
-
"politicians",
|
|
672
|
-
"Stores politician information",
|
|
673
|
-
"UUID primary key, bioguide_id, role, party",
|
|
674
|
-
),
|
|
675
|
-
(
|
|
676
|
-
"trading_disclosures",
|
|
677
|
-
"Individual trading transactions",
|
|
678
|
-
"References politicians, amount ranges, asset details",
|
|
679
|
-
),
|
|
680
|
-
(
|
|
681
|
-
"data_pull_jobs",
|
|
682
|
-
"Job execution tracking",
|
|
683
|
-
"Status, timing, record counts, error details",
|
|
684
|
-
),
|
|
685
|
-
(
|
|
686
|
-
"data_sources",
|
|
687
|
-
"Data source configuration",
|
|
688
|
-
"URLs, regions, health status, request config",
|
|
689
|
-
),
|
|
690
|
-
]
|
|
691
|
-
|
|
692
|
-
schema_table = Table(title="Database Tables")
|
|
693
|
-
schema_table.add_column("Table", style="cyan")
|
|
694
|
-
schema_table.add_column("Purpose", style="white")
|
|
695
|
-
schema_table.add_column("Key Features", style="yellow")
|
|
696
|
-
|
|
697
|
-
for table_name, purpose, features in schema_info:
|
|
698
|
-
schema_table.add_row(table_name, purpose, features)
|
|
699
|
-
|
|
700
|
-
console.print(schema_table)
|
|
701
|
-
|
|
702
|
-
console.print("\n🚀 Commands:", style="bold")
|
|
703
|
-
console.print(" --show-location Show where schema files are located")
|
|
704
|
-
console.print(" --generate Generate schema SQL file")
|
|
705
|
-
console.print(" --generate --output-dir DIR Generate to specific directory")
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
# Helper functions
|
|
709
|
-
def _calculate_duration(start_time: str, end_time: str) -> str:
|
|
710
|
-
"""Calculate duration between timestamps"""
|
|
711
|
-
if not start_time or not end_time:
|
|
712
|
-
return "Unknown"
|
|
713
|
-
|
|
714
|
-
try:
|
|
715
|
-
start = datetime.fromisoformat(start_time.replace("Z", "+00:00"))
|
|
716
|
-
end = datetime.fromisoformat(end_time.replace("Z", "+00:00"))
|
|
717
|
-
duration = end - start
|
|
718
|
-
|
|
719
|
-
total_seconds = int(duration.total_seconds())
|
|
720
|
-
hours = total_seconds // 3600
|
|
721
|
-
minutes = (total_seconds % 3600) // 60
|
|
722
|
-
seconds = total_seconds % 60
|
|
723
|
-
|
|
724
|
-
if hours > 0:
|
|
725
|
-
return f"{hours}h {minutes}m {seconds}s"
|
|
726
|
-
elif minutes > 0:
|
|
727
|
-
return f"{minutes}m {seconds}s"
|
|
728
|
-
else:
|
|
729
|
-
return f"{seconds}s"
|
|
730
|
-
except Exception:
|
|
731
|
-
return "Unknown"
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
def _format_timestamp(timestamp: str) -> str:
|
|
735
|
-
"""Format timestamp for display"""
|
|
736
|
-
if not timestamp:
|
|
737
|
-
return "Unknown"
|
|
738
|
-
|
|
739
|
-
try:
|
|
740
|
-
dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
|
741
|
-
return dt.strftime("%Y-%m-%d %H:%M")
|
|
742
|
-
except Exception:
|
|
743
|
-
return timestamp[:16] if len(timestamp) > 16 else timestamp
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
def _format_asset_display(disclosure: Dict[str, Any]) -> str:
|
|
747
|
-
"""Format asset display with proper ticker/name handling"""
|
|
748
|
-
asset_name = disclosure.get("asset_name", "Unknown Asset")
|
|
749
|
-
asset_ticker = disclosure.get("asset_ticker")
|
|
750
|
-
|
|
751
|
-
# If we have both ticker and name, show ticker first
|
|
752
|
-
if asset_ticker and asset_ticker.strip() and asset_ticker.lower() != "none":
|
|
753
|
-
return f"{asset_ticker} - {asset_name[:15]}"
|
|
754
|
-
# If we only have asset name, show just that
|
|
755
|
-
elif asset_name and asset_name.strip():
|
|
756
|
-
return asset_name[:20]
|
|
757
|
-
# Fallback
|
|
758
|
-
else:
|
|
759
|
-
return "Unknown Asset"
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
@politician_trading_cli.command("data-sources")
|
|
763
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
764
|
-
def view_data_sources(output_json: bool):
|
|
765
|
-
"""View current data sources and their configurations"""
|
|
766
|
-
console = Console()
|
|
767
|
-
|
|
768
|
-
try:
|
|
769
|
-
from .config import WorkflowConfig
|
|
770
|
-
from .data_sources import ACTIVE_SOURCES, ALL_DATA_SOURCES, TOTAL_SOURCES
|
|
771
|
-
|
|
772
|
-
config = WorkflowConfig.default()
|
|
773
|
-
active_sources = config.scraping.get_active_sources()
|
|
774
|
-
|
|
775
|
-
# Group sources by category for display
|
|
776
|
-
data_sources = {}
|
|
777
|
-
|
|
778
|
-
for category, sources in ALL_DATA_SOURCES.items():
|
|
779
|
-
active_category_sources = [s for s in sources if s.status == "active"]
|
|
780
|
-
if active_category_sources:
|
|
781
|
-
data_sources[category] = {
|
|
782
|
-
"name": {
|
|
783
|
-
"us_federal": "US Federal Government",
|
|
784
|
-
"us_states": "US State Governments",
|
|
785
|
-
"eu_parliament": "EU Parliament",
|
|
786
|
-
"eu_national": "EU National Parliaments",
|
|
787
|
-
"third_party": "Third-Party Aggregators",
|
|
788
|
-
}[category],
|
|
789
|
-
"sources": active_category_sources,
|
|
790
|
-
"count": len(active_category_sources),
|
|
791
|
-
"status": "active",
|
|
792
|
-
"description": {
|
|
793
|
-
"us_federal": "Congressional and federal official financial disclosures",
|
|
794
|
-
"us_states": "State legislature financial disclosure databases",
|
|
795
|
-
"eu_parliament": "MEP financial interest and income declarations",
|
|
796
|
-
"eu_national": "National parliament financial disclosure systems",
|
|
797
|
-
"third_party": "Commercial aggregators and enhanced analysis platforms",
|
|
798
|
-
}[category],
|
|
799
|
-
}
|
|
800
|
-
|
|
801
|
-
if output_json:
|
|
802
|
-
# For JSON output, convert DataSource objects to dictionaries
|
|
803
|
-
json_output = {}
|
|
804
|
-
for category, info in data_sources.items():
|
|
805
|
-
json_output[category] = {
|
|
806
|
-
"name": info["name"],
|
|
807
|
-
"description": info["description"],
|
|
808
|
-
"count": info["count"],
|
|
809
|
-
"status": info["status"],
|
|
810
|
-
"sources": [
|
|
811
|
-
{
|
|
812
|
-
"name": source.name,
|
|
813
|
-
"jurisdiction": source.jurisdiction,
|
|
814
|
-
"institution": source.institution,
|
|
815
|
-
"url": source.url,
|
|
816
|
-
"disclosure_types": [dt.value for dt in source.disclosure_types],
|
|
817
|
-
"access_method": source.access_method.value,
|
|
818
|
-
"update_frequency": source.update_frequency,
|
|
819
|
-
"threshold_amount": source.threshold_amount,
|
|
820
|
-
"data_format": source.data_format,
|
|
821
|
-
"notes": source.notes,
|
|
822
|
-
}
|
|
823
|
-
for source in info["sources"]
|
|
824
|
-
],
|
|
825
|
-
}
|
|
826
|
-
console.print(JSON.from_data(json_output))
|
|
827
|
-
else:
|
|
828
|
-
console.print(
|
|
829
|
-
f"📊 Comprehensive Political Trading Data Sources ({ACTIVE_SOURCES} active of {TOTAL_SOURCES} total)",
|
|
830
|
-
style="bold cyan",
|
|
831
|
-
)
|
|
832
|
-
|
|
833
|
-
for category_id, source_info in data_sources.items():
|
|
834
|
-
console.print(
|
|
835
|
-
f"\n[bold blue]{source_info['name']}[/bold blue] ({source_info['count']} sources)"
|
|
836
|
-
)
|
|
837
|
-
console.print(f" {source_info['description']}", style="dim")
|
|
838
|
-
|
|
839
|
-
# Create table for this category's sources
|
|
840
|
-
table = Table()
|
|
841
|
-
table.add_column("Source", style="cyan")
|
|
842
|
-
table.add_column("Jurisdiction", style="green")
|
|
843
|
-
table.add_column("Access", style="yellow")
|
|
844
|
-
table.add_column("Disclosure Types", style="magenta")
|
|
845
|
-
table.add_column("Threshold", style="blue")
|
|
846
|
-
|
|
847
|
-
for source in source_info["sources"]:
|
|
848
|
-
# Format disclosure types
|
|
849
|
-
types_display = ", ".join(
|
|
850
|
-
[dt.value.replace("_", " ").title() for dt in source.disclosure_types]
|
|
851
|
-
)
|
|
852
|
-
|
|
853
|
-
# Format threshold
|
|
854
|
-
threshold_display = (
|
|
855
|
-
f"${source.threshold_amount:,}" if source.threshold_amount else "None"
|
|
856
|
-
)
|
|
857
|
-
|
|
858
|
-
table.add_row(
|
|
859
|
-
source.name,
|
|
860
|
-
source.jurisdiction,
|
|
861
|
-
source.access_method.value.replace("_", " ").title(),
|
|
862
|
-
types_display[:30] + ("..." if len(types_display) > 30 else ""),
|
|
863
|
-
threshold_display,
|
|
864
|
-
)
|
|
865
|
-
|
|
866
|
-
console.print(table)
|
|
867
|
-
|
|
868
|
-
console.print(
|
|
869
|
-
f"\n[dim]Total: {ACTIVE_SOURCES} active sources across {len(data_sources)} categories[/dim]"
|
|
870
|
-
)
|
|
871
|
-
|
|
872
|
-
except Exception as e:
|
|
873
|
-
if output_json:
|
|
874
|
-
console.print(JSON.from_data({"error": str(e)}))
|
|
875
|
-
else:
|
|
876
|
-
console.print(f"❌ Failed to load data sources: {e}", style="bold red")
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
@politician_trading_cli.command("jobs")
|
|
880
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
881
|
-
@click.option("--limit", default=10, help="Number of recent jobs to show")
|
|
882
|
-
def view_jobs(output_json: bool, limit: int):
|
|
883
|
-
"""View current and recent data collection jobs"""
|
|
884
|
-
console = Console()
|
|
885
|
-
|
|
886
|
-
try:
|
|
887
|
-
|
|
888
|
-
async def get_jobs():
|
|
889
|
-
from .config import WorkflowConfig
|
|
890
|
-
from .database import PoliticianTradingDB
|
|
891
|
-
|
|
892
|
-
config = WorkflowConfig.default()
|
|
893
|
-
db = PoliticianTradingDB(config)
|
|
894
|
-
|
|
895
|
-
# Get recent jobs
|
|
896
|
-
jobs_result = (
|
|
897
|
-
db.client.table("data_pull_jobs")
|
|
898
|
-
.select("*")
|
|
899
|
-
.order("started_at", desc=True)
|
|
900
|
-
.limit(limit)
|
|
901
|
-
.execute()
|
|
902
|
-
)
|
|
903
|
-
|
|
904
|
-
return jobs_result.data if jobs_result.data else []
|
|
905
|
-
|
|
906
|
-
jobs = asyncio.run(get_jobs())
|
|
907
|
-
|
|
908
|
-
if output_json:
|
|
909
|
-
console.print(JSON.from_data(jobs))
|
|
910
|
-
else:
|
|
911
|
-
console.print("🔄 Recent Data Collection Jobs", style="bold cyan")
|
|
912
|
-
|
|
913
|
-
if not jobs:
|
|
914
|
-
console.print("No jobs found", style="yellow")
|
|
915
|
-
return
|
|
916
|
-
|
|
917
|
-
jobs_table = Table()
|
|
918
|
-
jobs_table.add_column("Job ID", style="cyan")
|
|
919
|
-
jobs_table.add_column("Type", style="green")
|
|
920
|
-
jobs_table.add_column("Status", style="white")
|
|
921
|
-
jobs_table.add_column("Started", style="blue")
|
|
922
|
-
jobs_table.add_column("Duration", style="magenta")
|
|
923
|
-
jobs_table.add_column("Records", style="yellow")
|
|
924
|
-
|
|
925
|
-
for job in jobs:
|
|
926
|
-
status_color = {
|
|
927
|
-
"completed": "green",
|
|
928
|
-
"running": "yellow",
|
|
929
|
-
"failed": "red",
|
|
930
|
-
"pending": "blue",
|
|
931
|
-
}.get(job.get("status", "unknown"), "white")
|
|
932
|
-
|
|
933
|
-
# Calculate duration
|
|
934
|
-
started = job.get("started_at", "")
|
|
935
|
-
completed = job.get("completed_at", "")
|
|
936
|
-
duration = _format_duration_from_timestamps(started, completed)
|
|
937
|
-
|
|
938
|
-
# Format records
|
|
939
|
-
records_info = f"{job.get('records_new', 0)}n/{job.get('records_updated', 0)}u/{job.get('records_failed', 0)}f"
|
|
940
|
-
|
|
941
|
-
jobs_table.add_row(
|
|
942
|
-
job.get("id", "")[:8] + "...",
|
|
943
|
-
job.get("job_type", "unknown"),
|
|
944
|
-
f"[{status_color}]{job.get('status', 'unknown')}[/{status_color}]",
|
|
945
|
-
_format_timestamp(started),
|
|
946
|
-
duration,
|
|
947
|
-
records_info,
|
|
948
|
-
)
|
|
949
|
-
|
|
950
|
-
console.print(jobs_table)
|
|
951
|
-
console.print("\nLegend: Records = new/updated/failed", style="dim")
|
|
952
|
-
|
|
953
|
-
except Exception as e:
|
|
954
|
-
if output_json:
|
|
955
|
-
console.print(JSON.from_data({"error": str(e)}))
|
|
956
|
-
else:
|
|
957
|
-
console.print(f"❌ Failed to load jobs: {e}", style="bold red")
|
|
958
|
-
logger.error(f"Jobs view failed: {e}")
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
def _format_duration_from_timestamps(started: str, completed: str) -> str:
|
|
962
|
-
"""Calculate and format duration from timestamps"""
|
|
963
|
-
if not started:
|
|
964
|
-
return "Unknown"
|
|
965
|
-
|
|
966
|
-
try:
|
|
967
|
-
start_dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
|
|
968
|
-
|
|
969
|
-
if completed:
|
|
970
|
-
end_dt = datetime.fromisoformat(completed.replace("Z", "+00:00"))
|
|
971
|
-
duration = end_dt - start_dt
|
|
972
|
-
else:
|
|
973
|
-
# Job still running
|
|
974
|
-
from datetime import timezone
|
|
975
|
-
|
|
976
|
-
duration = datetime.now(timezone.utc) - start_dt
|
|
977
|
-
|
|
978
|
-
return _format_duration_seconds(int(duration.total_seconds()))
|
|
979
|
-
|
|
980
|
-
except Exception:
|
|
981
|
-
return "Unknown"
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
@politician_trading_cli.command("politicians")
|
|
985
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
986
|
-
@click.option("--limit", default=20, help="Number of politicians to show")
|
|
987
|
-
@click.option(
|
|
988
|
-
"--role", type=click.Choice(["us_house_rep", "us_senator", "eu_mep"]), help="Filter by role"
|
|
989
|
-
)
|
|
990
|
-
@click.option("--party", help="Filter by party")
|
|
991
|
-
@click.option("--state", help="Filter by state/country")
|
|
992
|
-
@click.option("--search", help="Search by name (first, last, or full name)")
|
|
993
|
-
def view_politicians(output_json: bool, limit: int, role: str, party: str, state: str, search: str):
|
|
994
|
-
"""View and search politicians in the database"""
|
|
995
|
-
console = Console()
|
|
996
|
-
|
|
997
|
-
try:
|
|
998
|
-
|
|
999
|
-
async def get_politicians():
|
|
1000
|
-
from .config import WorkflowConfig
|
|
1001
|
-
from .database import PoliticianTradingDB
|
|
1002
|
-
|
|
1003
|
-
config = WorkflowConfig.default()
|
|
1004
|
-
db = PoliticianTradingDB(config)
|
|
1005
|
-
|
|
1006
|
-
# Build query
|
|
1007
|
-
query = db.client.table("politicians").select("*")
|
|
1008
|
-
|
|
1009
|
-
# Apply filters
|
|
1010
|
-
if role:
|
|
1011
|
-
query = query.eq("role", role)
|
|
1012
|
-
if party:
|
|
1013
|
-
query = query.ilike("party", f"%{party}%")
|
|
1014
|
-
if state:
|
|
1015
|
-
query = query.ilike("state_or_country", f"%{state}%")
|
|
1016
|
-
if search:
|
|
1017
|
-
# Search across name fields
|
|
1018
|
-
query = query.or_(
|
|
1019
|
-
f"first_name.ilike.%{search}%,last_name.ilike.%{search}%,full_name.ilike.%{search}%"
|
|
1020
|
-
)
|
|
1021
|
-
|
|
1022
|
-
result = query.order("created_at", desc=True).limit(limit).execute()
|
|
1023
|
-
return result.data if result.data else []
|
|
1024
|
-
|
|
1025
|
-
politicians = asyncio.run(get_politicians())
|
|
1026
|
-
|
|
1027
|
-
if output_json:
|
|
1028
|
-
console.print(JSON.from_data(politicians))
|
|
1029
|
-
else:
|
|
1030
|
-
console.print("👥 Politicians Database", style="bold cyan")
|
|
1031
|
-
|
|
1032
|
-
if not politicians:
|
|
1033
|
-
console.print("No politicians found", style="yellow")
|
|
1034
|
-
return
|
|
1035
|
-
|
|
1036
|
-
politicians_table = Table()
|
|
1037
|
-
politicians_table.add_column("Name", style="cyan", min_width=25)
|
|
1038
|
-
politicians_table.add_column("Role", style="green")
|
|
1039
|
-
politicians_table.add_column("Party", style="blue")
|
|
1040
|
-
politicians_table.add_column("State/Country", style="magenta")
|
|
1041
|
-
politicians_table.add_column("District", style="yellow")
|
|
1042
|
-
politicians_table.add_column("Added", style="dim")
|
|
1043
|
-
|
|
1044
|
-
for pol in politicians:
|
|
1045
|
-
role_display = {
|
|
1046
|
-
"us_house_rep": "🏛️ House Rep",
|
|
1047
|
-
"us_senator": "🏛️ Senator",
|
|
1048
|
-
"eu_mep": "🇪🇺 MEP",
|
|
1049
|
-
}.get(pol.get("role", ""), pol.get("role", "Unknown"))
|
|
1050
|
-
|
|
1051
|
-
politicians_table.add_row(
|
|
1052
|
-
pol.get("full_name")
|
|
1053
|
-
or f"{pol.get('first_name', '')} {pol.get('last_name', '')}".strip(),
|
|
1054
|
-
role_display,
|
|
1055
|
-
pol.get("party", "") or "Independent",
|
|
1056
|
-
pol.get("state_or_country", ""),
|
|
1057
|
-
pol.get("district", "") or "At-Large",
|
|
1058
|
-
_format_timestamp(pol.get("created_at", "")),
|
|
1059
|
-
)
|
|
1060
|
-
|
|
1061
|
-
console.print(politicians_table)
|
|
1062
|
-
console.print(
|
|
1063
|
-
f"\nShowing {len(politicians)} of {len(politicians)} politicians", style="dim"
|
|
1064
|
-
)
|
|
1065
|
-
|
|
1066
|
-
except Exception as e:
|
|
1067
|
-
if output_json:
|
|
1068
|
-
console.print(JSON.from_data({"error": str(e)}))
|
|
1069
|
-
else:
|
|
1070
|
-
console.print(f"❌ Failed to load politicians: {e}", style="bold red")
|
|
1071
|
-
logger.error(f"Politicians view failed: {e}")
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
@politician_trading_cli.command("disclosures")
|
|
1075
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1076
|
-
@click.option("--limit", default=20, help="Number of disclosures to show")
|
|
1077
|
-
@click.option("--politician", help="Filter by politician name")
|
|
1078
|
-
@click.option("--asset", help="Filter by asset name or ticker")
|
|
1079
|
-
@click.option(
|
|
1080
|
-
"--transaction-type",
|
|
1081
|
-
type=click.Choice(["purchase", "sale", "exchange"]),
|
|
1082
|
-
help="Filter by transaction type",
|
|
1083
|
-
)
|
|
1084
|
-
@click.option("--amount-min", type=float, help="Minimum transaction amount")
|
|
1085
|
-
@click.option("--amount-max", type=float, help="Maximum transaction amount")
|
|
1086
|
-
@click.option("--days", default=30, help="Show disclosures from last N days")
|
|
1087
|
-
@click.option("--details", is_flag=True, help="Show detailed information including raw data")
|
|
1088
|
-
def view_disclosures(
|
|
1089
|
-
output_json: bool,
|
|
1090
|
-
limit: int,
|
|
1091
|
-
politician: str,
|
|
1092
|
-
asset: str,
|
|
1093
|
-
transaction_type: str,
|
|
1094
|
-
amount_min: float,
|
|
1095
|
-
amount_max: float,
|
|
1096
|
-
days: int,
|
|
1097
|
-
details: bool,
|
|
1098
|
-
):
|
|
1099
|
-
"""View and search trading disclosures in the database"""
|
|
1100
|
-
console = Console()
|
|
1101
|
-
|
|
1102
|
-
try:
|
|
1103
|
-
|
|
1104
|
-
async def get_disclosures():
|
|
1105
|
-
from datetime import datetime, timedelta, timezone
|
|
1106
|
-
|
|
1107
|
-
from .config import WorkflowConfig
|
|
1108
|
-
from .database import PoliticianTradingDB
|
|
1109
|
-
|
|
1110
|
-
config = WorkflowConfig.default()
|
|
1111
|
-
db = PoliticianTradingDB(config)
|
|
1112
|
-
|
|
1113
|
-
# Build query with join to get politician info
|
|
1114
|
-
# Supabase uses foreign key relationships for joins
|
|
1115
|
-
query = db.client.table("trading_disclosures").select("*, politicians!inner(*)")
|
|
1116
|
-
|
|
1117
|
-
# Date filter
|
|
1118
|
-
if days > 0:
|
|
1119
|
-
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
|
|
1120
|
-
query = query.gte("created_at", cutoff_date.isoformat())
|
|
1121
|
-
|
|
1122
|
-
# Apply filters
|
|
1123
|
-
if politician:
|
|
1124
|
-
# For nested relationships, we need a different approach
|
|
1125
|
-
# Let's use a simpler filter on the main table for now
|
|
1126
|
-
query = query.filter("politicians.full_name", "ilike", f"%{politician}%")
|
|
1127
|
-
|
|
1128
|
-
if asset:
|
|
1129
|
-
query = query.or_(f"asset_name.ilike.%{asset}%,asset_ticker.ilike.%{asset}%")
|
|
1130
|
-
|
|
1131
|
-
if transaction_type:
|
|
1132
|
-
query = query.eq("transaction_type", transaction_type)
|
|
1133
|
-
|
|
1134
|
-
if amount_min is not None:
|
|
1135
|
-
query = query.gte("amount_range_min", amount_min)
|
|
1136
|
-
|
|
1137
|
-
if amount_max is not None:
|
|
1138
|
-
query = query.lte("amount_range_max", amount_max)
|
|
1139
|
-
|
|
1140
|
-
result = query.order("transaction_date", desc=True).limit(limit).execute()
|
|
1141
|
-
return result.data if result.data else []
|
|
1142
|
-
|
|
1143
|
-
disclosures = asyncio.run(get_disclosures())
|
|
1144
|
-
|
|
1145
|
-
if output_json:
|
|
1146
|
-
console.print(JSON.from_data(disclosures))
|
|
1147
|
-
else:
|
|
1148
|
-
console.print("💰 Trading Disclosures Database", style="bold cyan")
|
|
1149
|
-
|
|
1150
|
-
if not disclosures:
|
|
1151
|
-
console.print("No disclosures found", style="yellow")
|
|
1152
|
-
return
|
|
1153
|
-
|
|
1154
|
-
if details:
|
|
1155
|
-
# Detailed view
|
|
1156
|
-
for i, disclosure in enumerate(disclosures):
|
|
1157
|
-
console.print(f"\n[bold cyan]Disclosure {i+1}[/bold cyan]")
|
|
1158
|
-
|
|
1159
|
-
detail_table = Table()
|
|
1160
|
-
detail_table.add_column("Field", style="cyan")
|
|
1161
|
-
detail_table.add_column("Value", style="white")
|
|
1162
|
-
|
|
1163
|
-
politician_info = disclosure.get("politicians", {})
|
|
1164
|
-
politician_name = (
|
|
1165
|
-
politician_info.get("full_name")
|
|
1166
|
-
or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
|
|
1167
|
-
)
|
|
1168
|
-
|
|
1169
|
-
detail_table.add_row(
|
|
1170
|
-
"Politician",
|
|
1171
|
-
f"{politician_name} ({politician_info.get('party', 'Unknown')})",
|
|
1172
|
-
)
|
|
1173
|
-
detail_table.add_row(
|
|
1174
|
-
"Asset",
|
|
1175
|
-
f"{disclosure.get('asset_name', 'Unknown')} ({disclosure.get('asset_ticker', 'N/A')})",
|
|
1176
|
-
)
|
|
1177
|
-
detail_table.add_row(
|
|
1178
|
-
"Transaction", disclosure.get("transaction_type", "Unknown").title()
|
|
1179
|
-
)
|
|
1180
|
-
detail_table.add_row(
|
|
1181
|
-
"Date", _format_timestamp(disclosure.get("transaction_date", ""))
|
|
1182
|
-
)
|
|
1183
|
-
detail_table.add_row(
|
|
1184
|
-
"Disclosure Date", _format_timestamp(disclosure.get("disclosure_date", ""))
|
|
1185
|
-
)
|
|
1186
|
-
|
|
1187
|
-
# Amount formatting
|
|
1188
|
-
amount_min = disclosure.get("amount_range_min")
|
|
1189
|
-
amount_max = disclosure.get("amount_range_max")
|
|
1190
|
-
amount_exact = disclosure.get("amount_exact")
|
|
1191
|
-
|
|
1192
|
-
if amount_exact:
|
|
1193
|
-
amount_str = f"${amount_exact:,.2f}"
|
|
1194
|
-
elif amount_min is not None and amount_max is not None:
|
|
1195
|
-
amount_str = f"${amount_min:,.0f} - ${amount_max:,.0f}"
|
|
1196
|
-
else:
|
|
1197
|
-
amount_str = "Unknown"
|
|
1198
|
-
|
|
1199
|
-
detail_table.add_row("Amount", amount_str)
|
|
1200
|
-
detail_table.add_row("Source URL", disclosure.get("source_url", "N/A"))
|
|
1201
|
-
detail_table.add_row(
|
|
1202
|
-
"Added", _format_timestamp(disclosure.get("created_at", ""))
|
|
1203
|
-
)
|
|
1204
|
-
|
|
1205
|
-
console.print(detail_table)
|
|
1206
|
-
else:
|
|
1207
|
-
# Compact table view
|
|
1208
|
-
disclosures_table = Table()
|
|
1209
|
-
disclosures_table.add_column("Politician", style="cyan", min_width=25)
|
|
1210
|
-
disclosures_table.add_column("Asset", style="green")
|
|
1211
|
-
disclosures_table.add_column("Type", style="blue")
|
|
1212
|
-
disclosures_table.add_column("Amount", style="yellow")
|
|
1213
|
-
disclosures_table.add_column("Date", style="magenta")
|
|
1214
|
-
disclosures_table.add_column("Party", style="dim")
|
|
1215
|
-
|
|
1216
|
-
for disclosure in disclosures:
|
|
1217
|
-
politician_info = disclosure.get("politicians", {})
|
|
1218
|
-
politician_name = (
|
|
1219
|
-
politician_info.get("full_name")
|
|
1220
|
-
or f"{politician_info.get('first_name', '')} {politician_info.get('last_name', '')}".strip()
|
|
1221
|
-
)
|
|
1222
|
-
|
|
1223
|
-
# Format amount
|
|
1224
|
-
amount_min = disclosure.get("amount_range_min")
|
|
1225
|
-
amount_max = disclosure.get("amount_range_max")
|
|
1226
|
-
amount_exact = disclosure.get("amount_exact")
|
|
1227
|
-
|
|
1228
|
-
if amount_exact:
|
|
1229
|
-
amount_str = f"${amount_exact:,.0f}"
|
|
1230
|
-
elif amount_min is not None and amount_max is not None:
|
|
1231
|
-
amount_str = f"${amount_min:,.0f}-${amount_max:,.0f}"
|
|
1232
|
-
else:
|
|
1233
|
-
amount_str = "Unknown"
|
|
1234
|
-
|
|
1235
|
-
# Transaction type with emoji
|
|
1236
|
-
trans_type = disclosure.get("transaction_type", "unknown")
|
|
1237
|
-
trans_emoji = {
|
|
1238
|
-
"purchase": "🟢 Buy",
|
|
1239
|
-
"sale": "🔴 Sell",
|
|
1240
|
-
"exchange": "🔄 Exchange",
|
|
1241
|
-
}.get(trans_type, "❓ " + trans_type.title())
|
|
1242
|
-
|
|
1243
|
-
disclosures_table.add_row(
|
|
1244
|
-
politician_name[:35] + ("..." if len(politician_name) > 35 else ""),
|
|
1245
|
-
_format_asset_display(disclosure),
|
|
1246
|
-
trans_emoji,
|
|
1247
|
-
amount_str,
|
|
1248
|
-
_format_timestamp(disclosure.get("transaction_date", "")),
|
|
1249
|
-
politician_info.get("party", "")[:12],
|
|
1250
|
-
)
|
|
1251
|
-
|
|
1252
|
-
console.print(disclosures_table)
|
|
1253
|
-
|
|
1254
|
-
console.print(
|
|
1255
|
-
f"\nShowing {len(disclosures)} disclosures from last {days} days", style="dim"
|
|
1256
|
-
)
|
|
1257
|
-
|
|
1258
|
-
except Exception as e:
|
|
1259
|
-
if output_json:
|
|
1260
|
-
console.print(JSON.from_data({"error": str(e)}))
|
|
1261
|
-
else:
|
|
1262
|
-
console.print(f"❌ Failed to load disclosures: {e}", style="bold red")
|
|
1263
|
-
logger.error(f"Disclosures view failed: {e}")
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
@politician_trading_cli.command("verify")
|
|
1267
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1268
|
-
def verify_database(output_json: bool):
|
|
1269
|
-
"""Verify database integrity and show summary statistics"""
|
|
1270
|
-
console = Console()
|
|
1271
|
-
|
|
1272
|
-
try:
|
|
1273
|
-
|
|
1274
|
-
async def verify_data():
|
|
1275
|
-
from datetime import timedelta
|
|
1276
|
-
|
|
1277
|
-
from .config import WorkflowConfig
|
|
1278
|
-
from .database import PoliticianTradingDB
|
|
1279
|
-
|
|
1280
|
-
config = WorkflowConfig.default()
|
|
1281
|
-
db = PoliticianTradingDB(config)
|
|
1282
|
-
|
|
1283
|
-
verification = {
|
|
1284
|
-
"timestamp": datetime.now().isoformat(),
|
|
1285
|
-
"tables": {},
|
|
1286
|
-
"integrity": {},
|
|
1287
|
-
"summary": {},
|
|
1288
|
-
}
|
|
1289
|
-
|
|
1290
|
-
# Check each table
|
|
1291
|
-
tables_to_check = ["politicians", "trading_disclosures", "data_pull_jobs"]
|
|
1292
|
-
|
|
1293
|
-
for table_name in tables_to_check:
|
|
1294
|
-
try:
|
|
1295
|
-
result = db.client.table(table_name).select("id").execute()
|
|
1296
|
-
count = len(result.data) if result.data else 0
|
|
1297
|
-
verification["tables"][table_name] = {
|
|
1298
|
-
"exists": True,
|
|
1299
|
-
"record_count": count,
|
|
1300
|
-
"status": "ok",
|
|
1301
|
-
}
|
|
1302
|
-
except Exception as e:
|
|
1303
|
-
verification["tables"][table_name] = {
|
|
1304
|
-
"exists": False,
|
|
1305
|
-
"error": str(e),
|
|
1306
|
-
"status": "error",
|
|
1307
|
-
}
|
|
1308
|
-
|
|
1309
|
-
# Check referential integrity - simplified approach
|
|
1310
|
-
try:
|
|
1311
|
-
# Just verify we can query both tables
|
|
1312
|
-
disclosures_result = db.client.table("trading_disclosures").select("id").execute()
|
|
1313
|
-
politicians_result = db.client.table("politicians").select("id").execute()
|
|
1314
|
-
|
|
1315
|
-
disclosures_count = len(disclosures_result.data) if disclosures_result.data else 0
|
|
1316
|
-
politicians_count = len(politicians_result.data) if politicians_result.data else 0
|
|
1317
|
-
|
|
1318
|
-
verification["integrity"] = {
|
|
1319
|
-
"disclosures_with_politicians": disclosures_count,
|
|
1320
|
-
"total_politicians": politicians_count,
|
|
1321
|
-
"status": "ok",
|
|
1322
|
-
}
|
|
1323
|
-
except Exception as e:
|
|
1324
|
-
verification["integrity"] = {"error": str(e), "status": "error"}
|
|
1325
|
-
|
|
1326
|
-
# Summary statistics
|
|
1327
|
-
try:
|
|
1328
|
-
politicians_count = verification["tables"]["politicians"]["record_count"]
|
|
1329
|
-
disclosures_count = verification["tables"]["trading_disclosures"]["record_count"]
|
|
1330
|
-
jobs_count = verification["tables"]["data_pull_jobs"]["record_count"]
|
|
1331
|
-
|
|
1332
|
-
# Get recent activity
|
|
1333
|
-
recent_jobs = (
|
|
1334
|
-
db.client.table("data_pull_jobs")
|
|
1335
|
-
.select("*")
|
|
1336
|
-
.gte("started_at", (datetime.now() - timedelta(days=7)).isoformat())
|
|
1337
|
-
.execute()
|
|
1338
|
-
)
|
|
1339
|
-
|
|
1340
|
-
recent_jobs_count = len(recent_jobs.data) if recent_jobs.data else 0
|
|
1341
|
-
successful_jobs = len(
|
|
1342
|
-
[j for j in (recent_jobs.data or []) if j.get("status") == "completed"]
|
|
1343
|
-
)
|
|
1344
|
-
|
|
1345
|
-
verification["summary"] = {
|
|
1346
|
-
"total_politicians": politicians_count,
|
|
1347
|
-
"total_disclosures": disclosures_count,
|
|
1348
|
-
"total_jobs": jobs_count,
|
|
1349
|
-
"jobs_last_7_days": recent_jobs_count,
|
|
1350
|
-
"successful_jobs_last_7_days": successful_jobs,
|
|
1351
|
-
"success_rate_7_days": (
|
|
1352
|
-
(successful_jobs / recent_jobs_count * 100) if recent_jobs_count > 0 else 0
|
|
1353
|
-
),
|
|
1354
|
-
}
|
|
1355
|
-
|
|
1356
|
-
except Exception as e:
|
|
1357
|
-
verification["summary"] = {"error": str(e)}
|
|
1358
|
-
|
|
1359
|
-
return verification
|
|
1360
|
-
|
|
1361
|
-
verification = asyncio.run(verify_data())
|
|
1362
|
-
|
|
1363
|
-
if output_json:
|
|
1364
|
-
console.print(JSON.from_data(verification))
|
|
1365
|
-
else:
|
|
1366
|
-
console.print("🔍 Database Verification Report", style="bold cyan")
|
|
1367
|
-
|
|
1368
|
-
# Table status
|
|
1369
|
-
tables_panel = Table(title="Table Status")
|
|
1370
|
-
tables_panel.add_column("Table", style="cyan")
|
|
1371
|
-
tables_panel.add_column("Status", style="white")
|
|
1372
|
-
tables_panel.add_column("Records", justify="right", style="green")
|
|
1373
|
-
|
|
1374
|
-
for table_name, info in verification["tables"].items():
|
|
1375
|
-
status_color = "green" if info["status"] == "ok" else "red"
|
|
1376
|
-
status_text = f"[{status_color}]{info['status'].upper()}[/{status_color}]"
|
|
1377
|
-
record_count = str(info.get("record_count", "N/A"))
|
|
1378
|
-
|
|
1379
|
-
tables_panel.add_row(table_name, status_text, record_count)
|
|
1380
|
-
|
|
1381
|
-
console.print(tables_panel)
|
|
1382
|
-
|
|
1383
|
-
# Integrity check
|
|
1384
|
-
integrity_info = verification.get("integrity", {})
|
|
1385
|
-
if integrity_info.get("status") == "ok":
|
|
1386
|
-
console.print("✅ Data integrity check passed", style="green")
|
|
1387
|
-
disc_count = integrity_info.get("disclosures_with_politicians", 0)
|
|
1388
|
-
pol_count = integrity_info.get("total_politicians", 0)
|
|
1389
|
-
console.print(
|
|
1390
|
-
f" Disclosures: {disc_count}, Politicians: {pol_count}", style="dim"
|
|
1391
|
-
)
|
|
1392
|
-
else:
|
|
1393
|
-
console.print("❌ Data integrity check failed", style="red")
|
|
1394
|
-
|
|
1395
|
-
# Summary
|
|
1396
|
-
summary = verification.get("summary", {})
|
|
1397
|
-
if "error" not in summary:
|
|
1398
|
-
console.print("\n📊 Summary Statistics", style="bold blue")
|
|
1399
|
-
console.print(f"Politicians: {summary.get('total_politicians', 0)}")
|
|
1400
|
-
console.print(f"Trading Disclosures: {summary.get('total_disclosures', 0)}")
|
|
1401
|
-
console.print(f"Data Collection Jobs: {summary.get('total_jobs', 0)}")
|
|
1402
|
-
console.print(
|
|
1403
|
-
f"Jobs (7 days): {summary.get('jobs_last_7_days', 0)} ({summary.get('successful_jobs_last_7_days', 0)} successful)"
|
|
1404
|
-
)
|
|
1405
|
-
console.print(f"Success Rate: {summary.get('success_rate_7_days', 0):.1f}%")
|
|
1406
|
-
|
|
1407
|
-
except Exception as e:
|
|
1408
|
-
if output_json:
|
|
1409
|
-
console.print(JSON.from_data({"error": str(e)}))
|
|
1410
|
-
else:
|
|
1411
|
-
console.print(f"❌ Verification failed: {e}", style="bold red")
|
|
1412
|
-
logger.error(f"Database verification failed: {e}")
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
@politician_trading_cli.group("cron")
|
|
1416
|
-
def cron_commands():
|
|
1417
|
-
"""Manage cron-based automated data collection"""
|
|
1418
|
-
pass
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
@cron_commands.command("run")
|
|
1422
|
-
@click.option(
|
|
1423
|
-
"--type",
|
|
1424
|
-
"collection_type",
|
|
1425
|
-
default="full",
|
|
1426
|
-
type=click.Choice(["full", "us", "eu", "quick"]),
|
|
1427
|
-
help="Type of collection to run",
|
|
1428
|
-
)
|
|
1429
|
-
def cron_run(collection_type: str):
|
|
1430
|
-
"""Run scheduled data collection (designed for cron jobs)"""
|
|
1431
|
-
|
|
1432
|
-
async def run_cron_collection():
|
|
1433
|
-
"""Run the cron collection"""
|
|
1434
|
-
from datetime import datetime
|
|
1435
|
-
|
|
1436
|
-
logger.info(f"Starting scheduled collection: {collection_type}")
|
|
1437
|
-
console.print(f"🕐 Running {collection_type} data collection...", style="blue")
|
|
1438
|
-
|
|
1439
|
-
try:
|
|
1440
|
-
workflow = PoliticianTradingWorkflow()
|
|
1441
|
-
|
|
1442
|
-
if collection_type == "full":
|
|
1443
|
-
results = await run_politician_trading_collection()
|
|
1444
|
-
elif collection_type == "us":
|
|
1445
|
-
# US-only collection
|
|
1446
|
-
us_results = await workflow._collect_us_congress_data()
|
|
1447
|
-
ca_results = await workflow._collect_california_data()
|
|
1448
|
-
us_states_results = await workflow._collect_us_states_data()
|
|
1449
|
-
|
|
1450
|
-
results = {
|
|
1451
|
-
"status": "completed",
|
|
1452
|
-
"started_at": datetime.utcnow().isoformat(),
|
|
1453
|
-
"completed_at": datetime.utcnow().isoformat(),
|
|
1454
|
-
"jobs": {
|
|
1455
|
-
"us_congress": us_results,
|
|
1456
|
-
"california": ca_results,
|
|
1457
|
-
"us_states": us_states_results,
|
|
1458
|
-
},
|
|
1459
|
-
"summary": {
|
|
1460
|
-
"total_new_disclosures": sum(
|
|
1461
|
-
[
|
|
1462
|
-
us_results.get("new_disclosures", 0),
|
|
1463
|
-
ca_results.get("new_disclosures", 0),
|
|
1464
|
-
us_states_results.get("new_disclosures", 0),
|
|
1465
|
-
]
|
|
1466
|
-
)
|
|
1467
|
-
},
|
|
1468
|
-
}
|
|
1469
|
-
elif collection_type == "eu":
|
|
1470
|
-
# EU-only collection
|
|
1471
|
-
eu_results = await workflow._collect_eu_parliament_data()
|
|
1472
|
-
eu_states_results = await workflow._collect_eu_member_states_data()
|
|
1473
|
-
uk_results = await workflow._collect_uk_parliament_data()
|
|
1474
|
-
|
|
1475
|
-
results = {
|
|
1476
|
-
"status": "completed",
|
|
1477
|
-
"started_at": datetime.utcnow().isoformat(),
|
|
1478
|
-
"completed_at": datetime.utcnow().isoformat(),
|
|
1479
|
-
"jobs": {
|
|
1480
|
-
"eu_parliament": eu_results,
|
|
1481
|
-
"eu_member_states": eu_states_results,
|
|
1482
|
-
"uk_parliament": uk_results,
|
|
1483
|
-
},
|
|
1484
|
-
"summary": {
|
|
1485
|
-
"total_new_disclosures": sum(
|
|
1486
|
-
[
|
|
1487
|
-
eu_results.get("new_disclosures", 0),
|
|
1488
|
-
eu_states_results.get("new_disclosures", 0),
|
|
1489
|
-
uk_results.get("new_disclosures", 0),
|
|
1490
|
-
]
|
|
1491
|
-
)
|
|
1492
|
-
},
|
|
1493
|
-
}
|
|
1494
|
-
elif collection_type == "quick":
|
|
1495
|
-
# Quick status check
|
|
1496
|
-
status = await workflow.run_quick_check()
|
|
1497
|
-
results = {
|
|
1498
|
-
"status": "completed",
|
|
1499
|
-
"type": "quick_check",
|
|
1500
|
-
"results": status,
|
|
1501
|
-
"summary": {"total_new_disclosures": 0},
|
|
1502
|
-
}
|
|
1503
|
-
|
|
1504
|
-
# Log results
|
|
1505
|
-
summary = results.get("summary", {})
|
|
1506
|
-
logger.info(
|
|
1507
|
-
f"Cron collection completed - New: {summary.get('total_new_disclosures', 0)}"
|
|
1508
|
-
)
|
|
1509
|
-
|
|
1510
|
-
console.print(f"✅ {collection_type.title()} collection completed", style="green")
|
|
1511
|
-
console.print(
|
|
1512
|
-
f"New disclosures: {summary.get('total_new_disclosures', 0)}", style="cyan"
|
|
1513
|
-
)
|
|
1514
|
-
|
|
1515
|
-
return results
|
|
1516
|
-
|
|
1517
|
-
except Exception as e:
|
|
1518
|
-
logger.error(f"Cron collection failed: {e}")
|
|
1519
|
-
console.print(f"❌ Collection failed: {e}", style="red")
|
|
1520
|
-
return {"status": "failed", "error": str(e)}
|
|
1521
|
-
|
|
1522
|
-
asyncio.run(run_cron_collection())
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
@cron_commands.command("setup")
|
|
1526
|
-
def cron_setup():
|
|
1527
|
-
"""Show cron setup instructions"""
|
|
1528
|
-
console.print("🕐 CRON SETUP INSTRUCTIONS", style="bold cyan")
|
|
1529
|
-
console.print("Add these lines to your crontab (run: crontab -e)", style="dim")
|
|
1530
|
-
|
|
1531
|
-
# Get current working directory for the cron commands
|
|
1532
|
-
repo_path = Path(__file__).parent.parent.parent.parent.parent
|
|
1533
|
-
|
|
1534
|
-
instructions = f"""
|
|
1535
|
-
# Full collection every 6 hours
|
|
1536
|
-
0 */6 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type full >> /tmp/politician_cron.log 2>&1
|
|
1537
|
-
|
|
1538
|
-
# US collection every 4 hours
|
|
1539
|
-
0 */4 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type us >> /tmp/politician_cron.log 2>&1
|
|
1540
|
-
|
|
1541
|
-
# EU collection every 8 hours
|
|
1542
|
-
0 */8 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type eu >> /tmp/politician_cron.log 2>&1
|
|
1543
|
-
|
|
1544
|
-
# Quick health check daily at 9 AM
|
|
1545
|
-
0 9 * * * cd {repo_path} && source .venv/bin/activate && mcli politician-trading cron run --type quick >> /tmp/politician_cron.log 2>&1
|
|
1546
|
-
"""
|
|
1547
|
-
|
|
1548
|
-
console.print(Panel(instructions, title="Crontab Entries", border_style="blue"))
|
|
1549
|
-
|
|
1550
|
-
console.print("\n💡 Tips:", style="bold yellow")
|
|
1551
|
-
console.print("• Start with just one cron job to test", style="dim")
|
|
1552
|
-
console.print("• Check logs at /tmp/politician_cron.log", style="dim")
|
|
1553
|
-
console.print("• Use 'mcli politician-trading monitor' to check results", style="dim")
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
@politician_trading_cli.command("monitor")
|
|
1557
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1558
|
-
def monitor_system(output_json: bool):
|
|
1559
|
-
"""Monitor system status, jobs, and database"""
|
|
1560
|
-
|
|
1561
|
-
async def run_monitor():
|
|
1562
|
-
"""Run the monitoring"""
|
|
1563
|
-
try:
|
|
1564
|
-
config = WorkflowConfig.default()
|
|
1565
|
-
db = PoliticianTradingDB(config)
|
|
1566
|
-
workflow = PoliticianTradingWorkflow(config)
|
|
1567
|
-
|
|
1568
|
-
# Get system health
|
|
1569
|
-
await db.ensure_schema()
|
|
1570
|
-
quick_status = await workflow.run_quick_check()
|
|
1571
|
-
|
|
1572
|
-
# Get job history
|
|
1573
|
-
job_status = await db.get_job_status()
|
|
1574
|
-
recent_jobs = job_status.get("recent_jobs", [])
|
|
1575
|
-
|
|
1576
|
-
# Analyze job statistics
|
|
1577
|
-
status_counts = {"completed": 0, "running": 0, "failed": 0, "pending": 0}
|
|
1578
|
-
job_types = {}
|
|
1579
|
-
latest_by_type = {}
|
|
1580
|
-
|
|
1581
|
-
for job in recent_jobs:
|
|
1582
|
-
status = job.get("status", "unknown")
|
|
1583
|
-
job_type = job.get("job_type", "unknown")
|
|
1584
|
-
started_at = job.get("started_at", "")
|
|
1585
|
-
|
|
1586
|
-
if status in status_counts:
|
|
1587
|
-
status_counts[status] += 1
|
|
1588
|
-
job_types[job_type] = job_types.get(job_type, 0) + 1
|
|
1589
|
-
|
|
1590
|
-
if job_type not in latest_by_type or started_at > latest_by_type[job_type].get(
|
|
1591
|
-
"started_at", ""
|
|
1592
|
-
):
|
|
1593
|
-
latest_by_type[job_type] = job
|
|
1594
|
-
|
|
1595
|
-
# Get scraper availability
|
|
1596
|
-
try:
|
|
1597
|
-
from . import scrapers
|
|
1598
|
-
|
|
1599
|
-
scraper_status = {
|
|
1600
|
-
"UK Parliament API": scrapers.UK_SCRAPER_AVAILABLE,
|
|
1601
|
-
"California NetFile": scrapers.CALIFORNIA_SCRAPER_AVAILABLE,
|
|
1602
|
-
"EU Member States": scrapers.EU_MEMBER_STATES_SCRAPER_AVAILABLE,
|
|
1603
|
-
"US States Ethics": scrapers.US_STATES_SCRAPER_AVAILABLE,
|
|
1604
|
-
}
|
|
1605
|
-
available_scrapers = sum(scraper_status.values())
|
|
1606
|
-
except:
|
|
1607
|
-
scraper_status = {}
|
|
1608
|
-
available_scrapers = 0
|
|
1609
|
-
|
|
1610
|
-
monitor_data = {
|
|
1611
|
-
"system_health": {
|
|
1612
|
-
"database_connection": quick_status.get("database_connection", "unknown"),
|
|
1613
|
-
"config_loaded": quick_status.get("config_loaded", "unknown"),
|
|
1614
|
-
"timestamp": quick_status.get("timestamp", datetime.now().isoformat()),
|
|
1615
|
-
},
|
|
1616
|
-
"job_statistics": {
|
|
1617
|
-
"total_recent_jobs": len(recent_jobs),
|
|
1618
|
-
"status_counts": status_counts,
|
|
1619
|
-
"job_types": job_types,
|
|
1620
|
-
},
|
|
1621
|
-
"latest_jobs": latest_by_type,
|
|
1622
|
-
"scraper_availability": {
|
|
1623
|
-
"available_count": available_scrapers,
|
|
1624
|
-
"total_count": len(scraper_status),
|
|
1625
|
-
"scrapers": scraper_status,
|
|
1626
|
-
},
|
|
1627
|
-
}
|
|
1628
|
-
|
|
1629
|
-
return monitor_data
|
|
1630
|
-
|
|
1631
|
-
except Exception as e:
|
|
1632
|
-
logger.error(f"Monitoring failed: {e}")
|
|
1633
|
-
return {"error": str(e)}
|
|
1634
|
-
|
|
1635
|
-
monitor_data = asyncio.run(run_monitor())
|
|
1636
|
-
|
|
1637
|
-
if output_json:
|
|
1638
|
-
console.print(JSON.from_data(monitor_data))
|
|
1639
|
-
else:
|
|
1640
|
-
console.print("🔍 SYSTEM MONITOR", style="bold cyan")
|
|
1641
|
-
|
|
1642
|
-
# System Health
|
|
1643
|
-
health = monitor_data.get("system_health", {})
|
|
1644
|
-
health_table = Table(title="System Health")
|
|
1645
|
-
health_table.add_column("Component", style="cyan")
|
|
1646
|
-
health_table.add_column("Status", style="white")
|
|
1647
|
-
|
|
1648
|
-
db_status = health["database_connection"]
|
|
1649
|
-
db_color = "green" if db_status == "ok" else "red"
|
|
1650
|
-
health_table.add_row("Database", f"[{db_color}]{db_status.upper()}[/{db_color}]")
|
|
1651
|
-
|
|
1652
|
-
config_status = health["config_loaded"]
|
|
1653
|
-
config_color = "green" if config_status == "ok" else "red"
|
|
1654
|
-
health_table.add_row(
|
|
1655
|
-
"Configuration", f"[{config_color}]{config_status.upper()}[/{config_color}]"
|
|
1656
|
-
)
|
|
1657
|
-
|
|
1658
|
-
console.print(health_table)
|
|
1659
|
-
|
|
1660
|
-
# Job Statistics
|
|
1661
|
-
job_stats = monitor_data.get("job_statistics", {})
|
|
1662
|
-
console.print(
|
|
1663
|
-
f"\n📊 Job Statistics (Total: {job_stats.get('total_recent_jobs', 0)})",
|
|
1664
|
-
style="bold blue",
|
|
1665
|
-
)
|
|
1666
|
-
|
|
1667
|
-
status_counts = job_stats.get("status_counts", {})
|
|
1668
|
-
for status, count in status_counts.items():
|
|
1669
|
-
if count > 0:
|
|
1670
|
-
icon = {"completed": "✅", "running": "🔄", "failed": "❌", "pending": "⏳"}[status]
|
|
1671
|
-
console.print(f"{icon} {status.title()}: {count}")
|
|
1672
|
-
|
|
1673
|
-
# Latest Jobs by Type
|
|
1674
|
-
console.print(f"\n📋 Latest Jobs by Source", style="bold blue")
|
|
1675
|
-
latest_jobs = monitor_data.get("latest_jobs", {})
|
|
1676
|
-
|
|
1677
|
-
for job_type, job in sorted(latest_jobs.items()):
|
|
1678
|
-
status = job.get("status", "unknown")
|
|
1679
|
-
icon = {"completed": "✅", "running": "🔄", "failed": "❌", "pending": "⏳"}.get(
|
|
1680
|
-
status, "❓"
|
|
1681
|
-
)
|
|
1682
|
-
|
|
1683
|
-
source_name = job_type.replace("_", " ").title()
|
|
1684
|
-
console.print(f"\n{icon} {source_name}")
|
|
1685
|
-
console.print(f" Status: {status}")
|
|
1686
|
-
console.print(f" Last run: {job.get('started_at', 'N/A')[:19]}")
|
|
1687
|
-
console.print(
|
|
1688
|
-
f" Records: {job.get('records_processed', 0)} processed, {job.get('records_new', 0)} new"
|
|
1689
|
-
)
|
|
1690
|
-
|
|
1691
|
-
# Scraper Availability
|
|
1692
|
-
scraper_info = monitor_data.get("scraper_availability", {})
|
|
1693
|
-
available = scraper_info.get("available_count", 0)
|
|
1694
|
-
total = scraper_info.get("total_count", 0)
|
|
1695
|
-
|
|
1696
|
-
console.print(f"\n🌍 Scraper Availability: {available}/{total}", style="bold blue")
|
|
1697
|
-
|
|
1698
|
-
scrapers_status = scraper_info.get("scrapers", {})
|
|
1699
|
-
for scraper_name, available in scrapers_status.items():
|
|
1700
|
-
icon = "✅" if available else "❌"
|
|
1701
|
-
status = "Available" if available else "Not Available"
|
|
1702
|
-
console.print(f"{icon} {scraper_name}: {status}")
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
@politician_trading_cli.command("read-data")
|
|
1706
|
-
@click.option("--limit", default=50, help="Number of recent records to show")
|
|
1707
|
-
@click.option("--days", default=7, help="Days back to look for data")
|
|
1708
|
-
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1709
|
-
def read_recent_data(limit: int, days: int, output_json: bool):
|
|
1710
|
-
"""Read recent data from the database"""
|
|
1711
|
-
|
|
1712
|
-
async def read_data():
|
|
1713
|
-
"""Read recent data from database"""
|
|
1714
|
-
try:
|
|
1715
|
-
config = WorkflowConfig.default()
|
|
1716
|
-
db = PoliticianTradingDB(config)
|
|
1717
|
-
|
|
1718
|
-
# Get job history
|
|
1719
|
-
job_status = await db.get_job_status()
|
|
1720
|
-
jobs = job_status.get("recent_jobs", [])
|
|
1721
|
-
|
|
1722
|
-
# Analyze data freshness
|
|
1723
|
-
freshness = {}
|
|
1724
|
-
for job in jobs:
|
|
1725
|
-
job_type = job.get("job_type", "unknown")
|
|
1726
|
-
if job.get("status") == "completed":
|
|
1727
|
-
completed_at = job.get("completed_at")
|
|
1728
|
-
if (
|
|
1729
|
-
job_type not in freshness
|
|
1730
|
-
or completed_at > freshness[job_type]["last_success"]
|
|
1731
|
-
):
|
|
1732
|
-
# Check if recent (within threshold)
|
|
1733
|
-
is_recent = False
|
|
1734
|
-
if completed_at:
|
|
1735
|
-
try:
|
|
1736
|
-
timestamp = datetime.fromisoformat(
|
|
1737
|
-
completed_at.replace("Z", "+00:00")
|
|
1738
|
-
)
|
|
1739
|
-
is_recent = (
|
|
1740
|
-
datetime.now() - timestamp.replace(tzinfo=None)
|
|
1741
|
-
) < timedelta(hours=24)
|
|
1742
|
-
except:
|
|
1743
|
-
pass
|
|
1744
|
-
|
|
1745
|
-
freshness[job_type] = {
|
|
1746
|
-
"last_success": completed_at,
|
|
1747
|
-
"records_collected": job.get("records_new", 0),
|
|
1748
|
-
"status": "fresh" if is_recent else "stale",
|
|
1749
|
-
}
|
|
1750
|
-
|
|
1751
|
-
return {
|
|
1752
|
-
"recent_jobs": jobs[:limit],
|
|
1753
|
-
"data_freshness": freshness,
|
|
1754
|
-
"summary": {
|
|
1755
|
-
"total_jobs": len(jobs),
|
|
1756
|
-
"job_types": len(set(job.get("job_type") for job in jobs)),
|
|
1757
|
-
"fresh_sources": len([v for v in freshness.values() if v["status"] == "fresh"]),
|
|
1758
|
-
},
|
|
1759
|
-
}
|
|
1760
|
-
|
|
1761
|
-
except Exception as e:
|
|
1762
|
-
logger.error(f"Failed to read data: {e}")
|
|
1763
|
-
return {"error": str(e)}
|
|
1764
|
-
|
|
1765
|
-
data = asyncio.run(read_data())
|
|
1766
|
-
|
|
1767
|
-
if output_json:
|
|
1768
|
-
console.print(JSON.from_data(data))
|
|
1769
|
-
else:
|
|
1770
|
-
console.print("📊 RECENT DATA SUMMARY", style="bold cyan")
|
|
1771
|
-
|
|
1772
|
-
if "error" in data:
|
|
1773
|
-
console.print(f"❌ Error: {data['error']}", style="red")
|
|
1774
|
-
return
|
|
1775
|
-
|
|
1776
|
-
# Summary stats
|
|
1777
|
-
summary = data.get("summary", {})
|
|
1778
|
-
console.print(f"\n📈 Summary:", style="bold blue")
|
|
1779
|
-
console.print(f"Total recent jobs: {summary.get('total_jobs', 0)}")
|
|
1780
|
-
console.print(f"Active job types: {summary.get('job_types', 0)}")
|
|
1781
|
-
console.print(f"Fresh data sources: {summary.get('fresh_sources', 0)}")
|
|
1782
|
-
|
|
1783
|
-
# Data freshness
|
|
1784
|
-
freshness = data.get("data_freshness", {})
|
|
1785
|
-
if freshness:
|
|
1786
|
-
console.print(f"\n🕐 Data Freshness:", style="bold blue")
|
|
1787
|
-
for source, info in freshness.items():
|
|
1788
|
-
status_icon = "🟢" if info["status"] == "fresh" else "🟡"
|
|
1789
|
-
source_name = source.replace("_", " ").title()
|
|
1790
|
-
last_success = info["last_success"][:19] if info["last_success"] else "Never"
|
|
1791
|
-
console.print(f"{status_icon} {source_name}: {last_success}")
|
|
1792
|
-
|
|
1793
|
-
# Recent jobs
|
|
1794
|
-
recent_jobs = data.get("recent_jobs", [])[:10] # Show top 10
|
|
1795
|
-
if recent_jobs:
|
|
1796
|
-
console.print(f"\n📋 Recent Jobs (showing {len(recent_jobs)}):", style="bold blue")
|
|
1797
|
-
for job in recent_jobs:
|
|
1798
|
-
status_icon = {
|
|
1799
|
-
"completed": "✅",
|
|
1800
|
-
"running": "🔄",
|
|
1801
|
-
"failed": "❌",
|
|
1802
|
-
"pending": "⏳",
|
|
1803
|
-
}.get(job.get("status"), "❓")
|
|
1804
|
-
job_type = job.get("job_type", "unknown").replace("_", " ").title()
|
|
1805
|
-
started_at = job.get("started_at", "N/A")[:19]
|
|
1806
|
-
console.print(f"{status_icon} {job_type}: {started_at}")
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
@politician_trading_cli.command("config-real-data")
|
|
1810
|
-
@click.option("--enable", is_flag=True, help="Enable real data collection")
|
|
1811
|
-
@click.option("--restore", is_flag=True, help="Restore sample data mode")
|
|
1812
|
-
@click.option("--status", is_flag=True, help="Show current configuration status")
|
|
1813
|
-
def configure_real_data(enable: bool, restore: bool, status: bool):
|
|
1814
|
-
"""Configure real vs sample data collection"""
|
|
1815
|
-
|
|
1816
|
-
if status or not (enable or restore):
|
|
1817
|
-
# Show current status
|
|
1818
|
-
console.print("🔧 DATA COLLECTION CONFIGURATION", style="bold cyan")
|
|
1819
|
-
|
|
1820
|
-
console.print("\n📋 Current Status:", style="bold blue")
|
|
1821
|
-
console.print("• Sample data mode: Currently DISABLED", style="green")
|
|
1822
|
-
console.print("• Real API calls: Currently ACTIVE", style="green")
|
|
1823
|
-
console.print("• Database writes: Currently WORKING", style="green")
|
|
1824
|
-
|
|
1825
|
-
console.print("\n🎯 Data Source Readiness:", style="bold blue")
|
|
1826
|
-
readiness_info = [
|
|
1827
|
-
("UK Parliament API", "✅ Active - Real API with full transaction data", "green"),
|
|
1828
|
-
("US House/Senate", "✅ Active - Real disclosure database access", "green"),
|
|
1829
|
-
("EU Parliament", "✅ Active - Real MEP profile scraping", "green"),
|
|
1830
|
-
("California NetFile", "⚠️ Limited - Complex forms require careful handling", "yellow"),
|
|
1831
|
-
("EU Member States", "⚠️ Limited - Country-specific implementations needed", "yellow"),
|
|
1832
|
-
]
|
|
1833
|
-
|
|
1834
|
-
for source, info, color in readiness_info:
|
|
1835
|
-
console.print(f"{info}", style=color)
|
|
1836
|
-
|
|
1837
|
-
console.print("\n💡 Commands:", style="bold blue")
|
|
1838
|
-
console.print("mcli politician-trading config-real-data --enable # Enable real data")
|
|
1839
|
-
console.print("mcli politician-trading config-real-data --restore # Restore sample mode")
|
|
1840
|
-
|
|
1841
|
-
return
|
|
1842
|
-
|
|
1843
|
-
# Get scraper files
|
|
1844
|
-
src_dir = Path(__file__).parent
|
|
1845
|
-
scraper_files = [
|
|
1846
|
-
"scrapers_uk.py",
|
|
1847
|
-
"scrapers_california.py",
|
|
1848
|
-
"scrapers_eu.py",
|
|
1849
|
-
"scrapers_us_states.py",
|
|
1850
|
-
]
|
|
1851
|
-
|
|
1852
|
-
if restore:
|
|
1853
|
-
console.print("🔄 RESTORING SAMPLE DATA MODE", style="bold yellow")
|
|
1854
|
-
|
|
1855
|
-
restored = 0
|
|
1856
|
-
for file_name in scraper_files:
|
|
1857
|
-
file_path = src_dir / file_name
|
|
1858
|
-
backup_path = Path(str(file_path) + ".backup")
|
|
1859
|
-
|
|
1860
|
-
if backup_path.exists():
|
|
1861
|
-
# Restore from backup
|
|
1862
|
-
try:
|
|
1863
|
-
backup_content = backup_path.read_text()
|
|
1864
|
-
file_path.write_text(backup_content)
|
|
1865
|
-
restored += 1
|
|
1866
|
-
console.print(f"✅ Restored {file_name} from backup", style="green")
|
|
1867
|
-
except Exception as e:
|
|
1868
|
-
console.print(f"❌ Failed to restore {file_name}: {e}", style="red")
|
|
1869
|
-
else:
|
|
1870
|
-
console.print(f"ℹ️ No backup found for {file_name}", style="dim")
|
|
1871
|
-
|
|
1872
|
-
console.print(f"\n🎯 Restored {restored} files to sample mode", style="green")
|
|
1873
|
-
|
|
1874
|
-
elif enable:
|
|
1875
|
-
console.print("🚀 ENABLING REAL DATA COLLECTION", style="bold green")
|
|
1876
|
-
|
|
1877
|
-
with Progress(
|
|
1878
|
-
SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console
|
|
1879
|
-
) as progress:
|
|
1880
|
-
task = progress.add_task("Configuring scrapers...", total=len(scraper_files))
|
|
1881
|
-
|
|
1882
|
-
modifications_made = 0
|
|
1883
|
-
|
|
1884
|
-
for file_name in scraper_files:
|
|
1885
|
-
progress.update(task, description=f"Processing {file_name}...")
|
|
1886
|
-
|
|
1887
|
-
file_path = src_dir / file_name
|
|
1888
|
-
|
|
1889
|
-
if not file_path.exists():
|
|
1890
|
-
progress.advance(task)
|
|
1891
|
-
continue
|
|
1892
|
-
|
|
1893
|
-
try:
|
|
1894
|
-
# Read file content
|
|
1895
|
-
content = file_path.read_text()
|
|
1896
|
-
original_content = content
|
|
1897
|
-
|
|
1898
|
-
# Remove sample flags
|
|
1899
|
-
content = re.sub(r'"sample":\s*True', '"sample": False', content)
|
|
1900
|
-
content = re.sub(r"'sample':\s*True", "'sample': False", content)
|
|
1901
|
-
|
|
1902
|
-
# Enable actual processing
|
|
1903
|
-
content = re.sub(
|
|
1904
|
-
r"# This would implement actual (.+?) scraping",
|
|
1905
|
-
r'logger.info("Processing real \1 data")',
|
|
1906
|
-
content,
|
|
1907
|
-
)
|
|
1908
|
-
|
|
1909
|
-
if content != original_content:
|
|
1910
|
-
# Backup original
|
|
1911
|
-
backup_path = str(file_path) + ".backup"
|
|
1912
|
-
Path(backup_path).write_text(original_content)
|
|
1913
|
-
|
|
1914
|
-
# Write modified content
|
|
1915
|
-
file_path.write_text(content)
|
|
1916
|
-
modifications_made += 1
|
|
1917
|
-
|
|
1918
|
-
except Exception as e:
|
|
1919
|
-
console.print(f"❌ Error processing {file_name}: {e}", style="red")
|
|
1920
|
-
|
|
1921
|
-
progress.advance(task)
|
|
1922
|
-
|
|
1923
|
-
console.print(f"\n✅ Real data configuration complete!", style="bold green")
|
|
1924
|
-
console.print(f"Modified {modifications_made} scraper files", style="green")
|
|
1925
|
-
|
|
1926
|
-
if modifications_made > 0:
|
|
1927
|
-
console.print(f"\n⚠️ Important Next Steps:", style="bold yellow")
|
|
1928
|
-
console.print("1. Test with UK Parliament first (most reliable)", style="dim")
|
|
1929
|
-
console.print("2. Monitor API rate limits carefully", style="dim")
|
|
1930
|
-
console.print("3. Check logs for parsing errors", style="dim")
|
|
1931
|
-
console.print("4. Use --restore flag if issues occur", style="dim")
|
|
1932
|
-
|
|
1933
|
-
console.print(f"\n🧪 Test Commands:", style="bold blue")
|
|
1934
|
-
console.print("mcli politician-trading cron run --type quick # Quick test")
|
|
1935
|
-
console.print("mcli politician-trading monitor # Check results")
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
# Export the CLI group for registration
|
|
1939
|
-
cli = politician_trading_cli
|