mcli-framework 7.10.1__py3-none-any.whl → 7.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/commands_cmd.py +150 -58
- mcli/app/main.py +21 -27
- mcli/lib/custom_commands.py +62 -12
- mcli/lib/optional_deps.py +240 -0
- mcli/lib/paths.py +129 -5
- mcli/self/migrate_cmd.py +261 -0
- mcli/self/self_cmd.py +8 -0
- mcli/workflow/git_commit/ai_service.py +13 -2
- mcli/workflow/notebook/__init__.py +16 -0
- mcli/workflow/notebook/converter.py +375 -0
- mcli/workflow/notebook/notebook_cmd.py +441 -0
- mcli/workflow/notebook/schema.py +402 -0
- mcli/workflow/notebook/validator.py +313 -0
- mcli/workflow/secrets/__init__.py +4 -0
- mcli/workflow/secrets/secrets_cmd.py +192 -0
- mcli/workflow/workflow.py +35 -5
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/METADATA +86 -55
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/RECORD +22 -34
- mcli/ml/features/political_features.py +0 -677
- mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
- mcli/workflow/politician_trading/__init__.py +0 -4
- mcli/workflow/politician_trading/config.py +0 -134
- mcli/workflow/politician_trading/connectivity.py +0 -492
- mcli/workflow/politician_trading/data_sources.py +0 -654
- mcli/workflow/politician_trading/database.py +0 -412
- mcli/workflow/politician_trading/demo.py +0 -249
- mcli/workflow/politician_trading/models.py +0 -327
- mcli/workflow/politician_trading/monitoring.py +0 -413
- mcli/workflow/politician_trading/scrapers.py +0 -1074
- mcli/workflow/politician_trading/scrapers_california.py +0 -434
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
- mcli/workflow/politician_trading/scrapers_eu.py +0 -376
- mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
- mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
- mcli/workflow/politician_trading/scrapers_uk.py +0 -378
- mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
- mcli/workflow/politician_trading/seed_database.py +0 -520
- mcli/workflow/politician_trading/supabase_functions.py +0 -354
- mcli/workflow/politician_trading/workflow.py +0 -879
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/WHEEL +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/top_level.txt +0 -0
|
@@ -1,412 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Database client and schema management for politician trading data
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import asyncio
|
|
6
|
-
import logging
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from typing import Any, Dict, List, Optional
|
|
9
|
-
from uuid import uuid4
|
|
10
|
-
|
|
11
|
-
from postgrest.exceptions import APIError
|
|
12
|
-
from supabase import Client, create_client
|
|
13
|
-
|
|
14
|
-
from .config import WorkflowConfig
|
|
15
|
-
from .models import DataPullJob, DataSource, Politician, TradingDisclosure
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class PoliticianTradingDB:
|
|
21
|
-
"""Database client for politician trading data"""
|
|
22
|
-
|
|
23
|
-
def __init__(self, config: WorkflowConfig):
|
|
24
|
-
self.config = config
|
|
25
|
-
self.client: Optional[Client] = None
|
|
26
|
-
self._init_client()
|
|
27
|
-
|
|
28
|
-
def _init_client(self):
|
|
29
|
-
"""Initialize Supabase client"""
|
|
30
|
-
try:
|
|
31
|
-
self.client = create_client(self.config.supabase.url, self.config.supabase.key)
|
|
32
|
-
logger.info("Supabase client initialized successfully")
|
|
33
|
-
except Exception as e:
|
|
34
|
-
logger.error(f"Failed to initialize Supabase client: {e}")
|
|
35
|
-
raise
|
|
36
|
-
|
|
37
|
-
async def ensure_schema(self) -> bool:
|
|
38
|
-
"""Ensure database schema exists"""
|
|
39
|
-
try:
|
|
40
|
-
# Check if tables exist by trying to query them
|
|
41
|
-
await self._check_table_exists("politicians")
|
|
42
|
-
await self._check_table_exists("trading_disclosures")
|
|
43
|
-
await self._check_table_exists("data_pull_jobs")
|
|
44
|
-
await self._check_table_exists("data_sources")
|
|
45
|
-
logger.info("Database schema verified")
|
|
46
|
-
return True
|
|
47
|
-
except Exception as e:
|
|
48
|
-
logger.error(f"Schema check failed: {e}")
|
|
49
|
-
logger.info("You'll need to create the database schema manually")
|
|
50
|
-
return False
|
|
51
|
-
|
|
52
|
-
async def _check_table_exists(self, table_name: str):
|
|
53
|
-
"""Check if table exists"""
|
|
54
|
-
try:
|
|
55
|
-
result = self.client.table(table_name).select("*").limit(1).execute()
|
|
56
|
-
return True
|
|
57
|
-
except Exception as e:
|
|
58
|
-
logger.warning(f"Table {table_name} may not exist: {e}")
|
|
59
|
-
raise
|
|
60
|
-
|
|
61
|
-
# Politician management
|
|
62
|
-
async def get_politician(self, politician_id: str) -> Optional[Politician]:
|
|
63
|
-
"""Get politician by ID"""
|
|
64
|
-
try:
|
|
65
|
-
result = self.client.table("politicians").select("*").eq("id", politician_id).execute()
|
|
66
|
-
if result.data:
|
|
67
|
-
return self._dict_to_politician(result.data[0])
|
|
68
|
-
return None
|
|
69
|
-
except Exception as e:
|
|
70
|
-
logger.error(f"Failed to get politician {politician_id}: {e}")
|
|
71
|
-
return None
|
|
72
|
-
|
|
73
|
-
async def find_politician_by_name(
|
|
74
|
-
self, first_name: str, last_name: str
|
|
75
|
-
) -> Optional[Politician]:
|
|
76
|
-
"""Find politician by name"""
|
|
77
|
-
try:
|
|
78
|
-
result = (
|
|
79
|
-
self.client.table("politicians")
|
|
80
|
-
.select("*")
|
|
81
|
-
.eq("first_name", first_name)
|
|
82
|
-
.eq("last_name", last_name)
|
|
83
|
-
.execute()
|
|
84
|
-
)
|
|
85
|
-
if result.data:
|
|
86
|
-
return self._dict_to_politician(result.data[0])
|
|
87
|
-
return None
|
|
88
|
-
except Exception as e:
|
|
89
|
-
logger.error(f"Failed to find politician {first_name} {last_name}: {e}")
|
|
90
|
-
return None
|
|
91
|
-
|
|
92
|
-
async def upsert_politician(self, politician: Politician) -> str:
|
|
93
|
-
"""Insert or update politician"""
|
|
94
|
-
try:
|
|
95
|
-
# First, try to find an existing politician
|
|
96
|
-
existing = await self.find_politician_by_name(
|
|
97
|
-
politician.first_name, politician.last_name
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
if existing:
|
|
101
|
-
# Update existing politician (but don't change ID)
|
|
102
|
-
politician_dict = self._politician_to_dict(politician)
|
|
103
|
-
politician_dict["id"] = existing.id # Keep existing ID
|
|
104
|
-
politician_dict["updated_at"] = datetime.utcnow().isoformat()
|
|
105
|
-
|
|
106
|
-
result = (
|
|
107
|
-
self.client.table("politicians")
|
|
108
|
-
.update(politician_dict)
|
|
109
|
-
.eq("id", existing.id)
|
|
110
|
-
.execute()
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
if result.data:
|
|
114
|
-
return result.data[0]["id"]
|
|
115
|
-
return existing.id
|
|
116
|
-
else:
|
|
117
|
-
# Insert new politician
|
|
118
|
-
politician_dict = self._politician_to_dict(politician)
|
|
119
|
-
if not politician_dict.get("id"):
|
|
120
|
-
politician_dict["id"] = str(uuid4())
|
|
121
|
-
|
|
122
|
-
politician_dict["created_at"] = datetime.utcnow().isoformat()
|
|
123
|
-
politician_dict["updated_at"] = datetime.utcnow().isoformat()
|
|
124
|
-
|
|
125
|
-
result = self.client.table("politicians").insert(politician_dict).execute()
|
|
126
|
-
|
|
127
|
-
if result.data:
|
|
128
|
-
return result.data[0]["id"]
|
|
129
|
-
return politician_dict["id"]
|
|
130
|
-
|
|
131
|
-
except Exception as e:
|
|
132
|
-
logger.error(f"Failed to upsert politician: {e}")
|
|
133
|
-
# For debugging: log the politician data that caused the error
|
|
134
|
-
logger.error(f"Politician data: {politician.first_name} {politician.last_name}")
|
|
135
|
-
return "" # Return empty string instead of raising to prevent cascade failures
|
|
136
|
-
|
|
137
|
-
# Trading disclosure management
|
|
138
|
-
async def get_disclosure(self, disclosure_id: str) -> Optional[TradingDisclosure]:
|
|
139
|
-
"""Get trading disclosure by ID"""
|
|
140
|
-
try:
|
|
141
|
-
result = (
|
|
142
|
-
self.client.table("trading_disclosures")
|
|
143
|
-
.select("*")
|
|
144
|
-
.eq("id", disclosure_id)
|
|
145
|
-
.execute()
|
|
146
|
-
)
|
|
147
|
-
if result.data:
|
|
148
|
-
return self._dict_to_disclosure(result.data[0])
|
|
149
|
-
return None
|
|
150
|
-
except Exception as e:
|
|
151
|
-
logger.error(f"Failed to get disclosure {disclosure_id}: {e}")
|
|
152
|
-
return None
|
|
153
|
-
|
|
154
|
-
async def find_disclosure_by_transaction(
|
|
155
|
-
self, politician_id: str, transaction_date: datetime, asset_name: str, transaction_type: str
|
|
156
|
-
) -> Optional[TradingDisclosure]:
|
|
157
|
-
"""Find existing disclosure by transaction details"""
|
|
158
|
-
try:
|
|
159
|
-
result = (
|
|
160
|
-
self.client.table("trading_disclosures")
|
|
161
|
-
.select("*")
|
|
162
|
-
.eq("politician_id", politician_id)
|
|
163
|
-
.eq("transaction_date", transaction_date.isoformat())
|
|
164
|
-
.eq("asset_name", asset_name)
|
|
165
|
-
.eq("transaction_type", transaction_type)
|
|
166
|
-
.execute()
|
|
167
|
-
)
|
|
168
|
-
if result.data:
|
|
169
|
-
return self._dict_to_disclosure(result.data[0])
|
|
170
|
-
return None
|
|
171
|
-
except Exception as e:
|
|
172
|
-
logger.error(f"Failed to find disclosure: {e}")
|
|
173
|
-
return None
|
|
174
|
-
|
|
175
|
-
async def insert_disclosure(self, disclosure: TradingDisclosure) -> str:
|
|
176
|
-
"""Insert new trading disclosure"""
|
|
177
|
-
try:
|
|
178
|
-
disclosure_dict = self._disclosure_to_dict(disclosure)
|
|
179
|
-
if not disclosure_dict.get("id"):
|
|
180
|
-
disclosure_dict["id"] = str(uuid4())
|
|
181
|
-
|
|
182
|
-
result = self.client.table("trading_disclosures").insert(disclosure_dict).execute()
|
|
183
|
-
if result.data:
|
|
184
|
-
return result.data[0]["id"]
|
|
185
|
-
return disclosure_dict["id"]
|
|
186
|
-
except Exception as e:
|
|
187
|
-
logger.error(f"Failed to insert disclosure: {e}")
|
|
188
|
-
raise
|
|
189
|
-
|
|
190
|
-
async def update_disclosure(self, disclosure: TradingDisclosure) -> bool:
|
|
191
|
-
"""Update existing trading disclosure"""
|
|
192
|
-
try:
|
|
193
|
-
disclosure_dict = self._disclosure_to_dict(disclosure)
|
|
194
|
-
disclosure_dict["updated_at"] = datetime.utcnow().isoformat()
|
|
195
|
-
|
|
196
|
-
result = (
|
|
197
|
-
self.client.table("trading_disclosures")
|
|
198
|
-
.update(disclosure_dict)
|
|
199
|
-
.eq("id", disclosure.id)
|
|
200
|
-
.execute()
|
|
201
|
-
)
|
|
202
|
-
return len(result.data) > 0
|
|
203
|
-
except Exception as e:
|
|
204
|
-
logger.error(f"Failed to update disclosure: {e}")
|
|
205
|
-
return False
|
|
206
|
-
|
|
207
|
-
async def get_recent_disclosures(self, limit: int = 100) -> List[TradingDisclosure]:
|
|
208
|
-
"""Get recent trading disclosures"""
|
|
209
|
-
try:
|
|
210
|
-
result = (
|
|
211
|
-
self.client.table("trading_disclosures")
|
|
212
|
-
.select("*")
|
|
213
|
-
.order("disclosure_date", desc=True)
|
|
214
|
-
.limit(limit)
|
|
215
|
-
.execute()
|
|
216
|
-
)
|
|
217
|
-
return [self._dict_to_disclosure(d) for d in result.data]
|
|
218
|
-
except Exception as e:
|
|
219
|
-
logger.error(f"Failed to get recent disclosures: {e}")
|
|
220
|
-
return []
|
|
221
|
-
|
|
222
|
-
# Data pull job management
|
|
223
|
-
async def create_data_pull_job(self, job_type: str, config_snapshot: Dict[str, Any]) -> str:
|
|
224
|
-
"""Create new data pull job"""
|
|
225
|
-
try:
|
|
226
|
-
job = DataPullJob(
|
|
227
|
-
id=str(uuid4()),
|
|
228
|
-
job_type=job_type,
|
|
229
|
-
status="pending",
|
|
230
|
-
config_snapshot=config_snapshot,
|
|
231
|
-
started_at=datetime.utcnow(),
|
|
232
|
-
)
|
|
233
|
-
|
|
234
|
-
job_dict = self._job_to_dict(job)
|
|
235
|
-
result = self.client.table("data_pull_jobs").insert(job_dict).execute()
|
|
236
|
-
if result.data:
|
|
237
|
-
return result.data[0]["id"]
|
|
238
|
-
return job.id
|
|
239
|
-
except Exception as e:
|
|
240
|
-
logger.error(f"Failed to create data pull job: {e}")
|
|
241
|
-
raise
|
|
242
|
-
|
|
243
|
-
async def update_data_pull_job(self, job: DataPullJob) -> bool:
|
|
244
|
-
"""Update data pull job"""
|
|
245
|
-
try:
|
|
246
|
-
job_dict = self._job_to_dict(job)
|
|
247
|
-
result = self.client.table("data_pull_jobs").update(job_dict).eq("id", job.id).execute()
|
|
248
|
-
return len(result.data) > 0
|
|
249
|
-
except Exception as e:
|
|
250
|
-
logger.error(f"Failed to update data pull job: {e}")
|
|
251
|
-
return False
|
|
252
|
-
|
|
253
|
-
async def get_job_status(self) -> Dict[str, Any]:
|
|
254
|
-
"""Get current job status summary"""
|
|
255
|
-
try:
|
|
256
|
-
# Get recent jobs
|
|
257
|
-
result = (
|
|
258
|
-
self.client.table("data_pull_jobs")
|
|
259
|
-
.select("*")
|
|
260
|
-
.order("created_at", desc=True)
|
|
261
|
-
.limit(10)
|
|
262
|
-
.execute()
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
jobs = result.data
|
|
266
|
-
|
|
267
|
-
# Calculate summary statistics
|
|
268
|
-
total_disclosures = (
|
|
269
|
-
self.client.table("trading_disclosures").select("id", count="exact").execute()
|
|
270
|
-
).count
|
|
271
|
-
|
|
272
|
-
recent_disclosures = (
|
|
273
|
-
self.client.table("trading_disclosures")
|
|
274
|
-
.select("id", count="exact")
|
|
275
|
-
.gte(
|
|
276
|
-
"created_at",
|
|
277
|
-
(datetime.utcnow().replace(hour=0, minute=0, second=0)).isoformat(),
|
|
278
|
-
)
|
|
279
|
-
.execute()
|
|
280
|
-
).count
|
|
281
|
-
|
|
282
|
-
return {
|
|
283
|
-
"total_disclosures": total_disclosures,
|
|
284
|
-
"recent_disclosures_today": recent_disclosures,
|
|
285
|
-
"recent_jobs": jobs,
|
|
286
|
-
"last_update": datetime.utcnow().isoformat(),
|
|
287
|
-
}
|
|
288
|
-
except Exception as e:
|
|
289
|
-
logger.error(f"Failed to get job status: {e}")
|
|
290
|
-
return {"error": str(e)}
|
|
291
|
-
|
|
292
|
-
# Helper methods for data conversion
|
|
293
|
-
def _politician_to_dict(self, politician: Politician) -> Dict[str, Any]:
|
|
294
|
-
"""Convert Politician to dictionary"""
|
|
295
|
-
return {
|
|
296
|
-
"id": politician.id,
|
|
297
|
-
"first_name": politician.first_name,
|
|
298
|
-
"last_name": politician.last_name,
|
|
299
|
-
"full_name": politician.full_name,
|
|
300
|
-
"role": politician.role.value if politician.role else None,
|
|
301
|
-
"party": politician.party,
|
|
302
|
-
"state_or_country": politician.state_or_country,
|
|
303
|
-
"district": politician.district,
|
|
304
|
-
"term_start": politician.term_start.isoformat() if politician.term_start else None,
|
|
305
|
-
"term_end": politician.term_end.isoformat() if politician.term_end else None,
|
|
306
|
-
"bioguide_id": politician.bioguide_id,
|
|
307
|
-
"eu_id": politician.eu_id,
|
|
308
|
-
"created_at": politician.created_at.isoformat(),
|
|
309
|
-
"updated_at": politician.updated_at.isoformat(),
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
def _dict_to_politician(self, data: Dict[str, Any]) -> Politician:
|
|
313
|
-
"""Convert dictionary to Politician"""
|
|
314
|
-
from .models import PoliticianRole
|
|
315
|
-
|
|
316
|
-
return Politician(
|
|
317
|
-
id=data.get("id"),
|
|
318
|
-
first_name=data.get("first_name", ""),
|
|
319
|
-
last_name=data.get("last_name", ""),
|
|
320
|
-
full_name=data.get("full_name", ""),
|
|
321
|
-
role=PoliticianRole(data.get("role", "us_house_representative")),
|
|
322
|
-
party=data.get("party", ""),
|
|
323
|
-
state_or_country=data.get("state_or_country", ""),
|
|
324
|
-
district=data.get("district"),
|
|
325
|
-
term_start=(
|
|
326
|
-
datetime.fromisoformat(data["term_start"]) if data.get("term_start") else None
|
|
327
|
-
),
|
|
328
|
-
term_end=datetime.fromisoformat(data["term_end"]) if data.get("term_end") else None,
|
|
329
|
-
bioguide_id=data.get("bioguide_id"),
|
|
330
|
-
eu_id=data.get("eu_id"),
|
|
331
|
-
created_at=datetime.fromisoformat(data["created_at"]),
|
|
332
|
-
updated_at=datetime.fromisoformat(data["updated_at"]),
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
def _disclosure_to_dict(self, disclosure: TradingDisclosure) -> Dict[str, Any]:
|
|
336
|
-
"""Convert TradingDisclosure to dictionary"""
|
|
337
|
-
return {
|
|
338
|
-
"id": disclosure.id,
|
|
339
|
-
"politician_id": disclosure.politician_id,
|
|
340
|
-
"transaction_date": disclosure.transaction_date.isoformat(),
|
|
341
|
-
"disclosure_date": disclosure.disclosure_date.isoformat(),
|
|
342
|
-
"transaction_type": (
|
|
343
|
-
disclosure.transaction_type.value if disclosure.transaction_type else None
|
|
344
|
-
),
|
|
345
|
-
"asset_name": disclosure.asset_name,
|
|
346
|
-
"asset_ticker": disclosure.asset_ticker,
|
|
347
|
-
"asset_type": disclosure.asset_type,
|
|
348
|
-
"amount_range_min": (
|
|
349
|
-
float(disclosure.amount_range_min) if disclosure.amount_range_min else None
|
|
350
|
-
),
|
|
351
|
-
"amount_range_max": (
|
|
352
|
-
float(disclosure.amount_range_max) if disclosure.amount_range_max else None
|
|
353
|
-
),
|
|
354
|
-
"amount_exact": float(disclosure.amount_exact) if disclosure.amount_exact else None,
|
|
355
|
-
"source_url": disclosure.source_url,
|
|
356
|
-
"source_document_id": disclosure.source_document_id,
|
|
357
|
-
"raw_data": disclosure.raw_data,
|
|
358
|
-
"status": disclosure.status.value if disclosure.status else None,
|
|
359
|
-
"processing_notes": disclosure.processing_notes,
|
|
360
|
-
"created_at": disclosure.created_at.isoformat(),
|
|
361
|
-
"updated_at": disclosure.updated_at.isoformat(),
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
def _dict_to_disclosure(self, data: Dict[str, Any]) -> TradingDisclosure:
|
|
365
|
-
"""Convert dictionary to TradingDisclosure"""
|
|
366
|
-
from decimal import Decimal
|
|
367
|
-
|
|
368
|
-
from .models import DisclosureStatus, TransactionType
|
|
369
|
-
|
|
370
|
-
return TradingDisclosure(
|
|
371
|
-
id=data.get("id"),
|
|
372
|
-
politician_id=data.get("politician_id", ""),
|
|
373
|
-
transaction_date=datetime.fromisoformat(data["transaction_date"]),
|
|
374
|
-
disclosure_date=datetime.fromisoformat(data["disclosure_date"]),
|
|
375
|
-
transaction_type=TransactionType(data.get("transaction_type", "purchase")),
|
|
376
|
-
asset_name=data.get("asset_name", ""),
|
|
377
|
-
asset_ticker=data.get("asset_ticker"),
|
|
378
|
-
asset_type=data.get("asset_type", ""),
|
|
379
|
-
amount_range_min=(
|
|
380
|
-
Decimal(str(data["amount_range_min"])) if data.get("amount_range_min") else None
|
|
381
|
-
),
|
|
382
|
-
amount_range_max=(
|
|
383
|
-
Decimal(str(data["amount_range_max"])) if data.get("amount_range_max") else None
|
|
384
|
-
),
|
|
385
|
-
amount_exact=Decimal(str(data["amount_exact"])) if data.get("amount_exact") else None,
|
|
386
|
-
source_url=data.get("source_url", ""),
|
|
387
|
-
source_document_id=data.get("source_document_id"),
|
|
388
|
-
raw_data=data.get("raw_data", {}),
|
|
389
|
-
status=DisclosureStatus(data.get("status", "pending")),
|
|
390
|
-
processing_notes=data.get("processing_notes", ""),
|
|
391
|
-
created_at=datetime.fromisoformat(data["created_at"]),
|
|
392
|
-
updated_at=datetime.fromisoformat(data["updated_at"]),
|
|
393
|
-
)
|
|
394
|
-
|
|
395
|
-
def _job_to_dict(self, job: DataPullJob) -> Dict[str, Any]:
|
|
396
|
-
"""Convert DataPullJob to dictionary"""
|
|
397
|
-
return {
|
|
398
|
-
"id": job.id,
|
|
399
|
-
"job_type": job.job_type,
|
|
400
|
-
"status": job.status,
|
|
401
|
-
"started_at": job.started_at.isoformat() if job.started_at else None,
|
|
402
|
-
"completed_at": job.completed_at.isoformat() if job.completed_at else None,
|
|
403
|
-
"records_found": job.records_found,
|
|
404
|
-
"records_processed": job.records_processed,
|
|
405
|
-
"records_new": job.records_new,
|
|
406
|
-
"records_updated": job.records_updated,
|
|
407
|
-
"records_failed": job.records_failed,
|
|
408
|
-
"error_message": job.error_message,
|
|
409
|
-
"error_details": job.error_details,
|
|
410
|
-
"config_snapshot": job.config_snapshot,
|
|
411
|
-
"created_at": job.created_at.isoformat(),
|
|
412
|
-
}
|
|
@@ -1,249 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Demonstration script showing politician trading workflow execution and data creation
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import asyncio
|
|
6
|
-
import json
|
|
7
|
-
import uuid
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
|
|
10
|
-
from rich.console import Console
|
|
11
|
-
from rich.json import JSON
|
|
12
|
-
from rich.panel import Panel
|
|
13
|
-
from rich.table import Table
|
|
14
|
-
|
|
15
|
-
from .connectivity import SupabaseConnectivityValidator
|
|
16
|
-
from .monitoring import run_health_check, run_stats_report
|
|
17
|
-
from .workflow import run_politician_trading_collection
|
|
18
|
-
|
|
19
|
-
console = Console()
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
async def demonstrate_workflow_execution():
|
|
23
|
-
"""Comprehensive demonstration of the workflow execution"""
|
|
24
|
-
|
|
25
|
-
console.print("🏛️ Politician Trading Data Collection - Full Demonstration", style="bold cyan")
|
|
26
|
-
console.print("=" * 80, style="dim")
|
|
27
|
-
|
|
28
|
-
# Step 1: Show what would happen with connectivity validation
|
|
29
|
-
console.print("\n📋 STEP 1: Supabase Connectivity Validation", style="bold blue")
|
|
30
|
-
console.print("This step validates database connectivity and operations...")
|
|
31
|
-
|
|
32
|
-
validator = SupabaseConnectivityValidator()
|
|
33
|
-
|
|
34
|
-
# Show the types of tests that would run
|
|
35
|
-
tests_info = [
|
|
36
|
-
("Basic Connection", "Tests fundamental database connectivity"),
|
|
37
|
-
("Read Operations", "Validates ability to query all required tables"),
|
|
38
|
-
("Write Operations", "Creates, updates, and deletes test records"),
|
|
39
|
-
("Table Access", "Verifies schema and table structure"),
|
|
40
|
-
("Job Tracking", "Tests job status and history tracking"),
|
|
41
|
-
("Real-time Sync", "Validates immediate write/read consistency"),
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
test_table = Table(title="Connectivity Tests")
|
|
45
|
-
test_table.add_column("Test", style="cyan")
|
|
46
|
-
test_table.add_column("Description", style="white")
|
|
47
|
-
|
|
48
|
-
for test_name, description in tests_info:
|
|
49
|
-
test_table.add_row(test_name, description)
|
|
50
|
-
|
|
51
|
-
console.print(test_table)
|
|
52
|
-
|
|
53
|
-
# Step 2: Show database schema that would be created
|
|
54
|
-
console.print("\n📋 STEP 2: Database Schema Requirements", style="bold blue")
|
|
55
|
-
|
|
56
|
-
schema_info = [
|
|
57
|
-
(
|
|
58
|
-
"politicians",
|
|
59
|
-
"Stores politician information (US Congress, EU Parliament)",
|
|
60
|
-
"~1000 records",
|
|
61
|
-
),
|
|
62
|
-
("trading_disclosures", "Individual trading transactions/disclosures", "~50,000+ records"),
|
|
63
|
-
("data_pull_jobs", "Job execution tracking and status", "~100 records"),
|
|
64
|
-
("data_sources", "Data source configuration and health", "~10 records"),
|
|
65
|
-
]
|
|
66
|
-
|
|
67
|
-
schema_table = Table(title="Database Tables")
|
|
68
|
-
schema_table.add_column("Table", style="cyan")
|
|
69
|
-
schema_table.add_column("Purpose", style="white")
|
|
70
|
-
schema_table.add_column("Expected Size", style="yellow")
|
|
71
|
-
|
|
72
|
-
for table_name, purpose, size in schema_info:
|
|
73
|
-
schema_table.add_row(table_name, purpose, size)
|
|
74
|
-
|
|
75
|
-
console.print(schema_table)
|
|
76
|
-
|
|
77
|
-
# Step 3: Demonstrate workflow execution
|
|
78
|
-
console.print("\n📋 STEP 3: Workflow Execution Simulation", style="bold blue")
|
|
79
|
-
console.print("Running the politician trading collection workflow...")
|
|
80
|
-
|
|
81
|
-
try:
|
|
82
|
-
# This will attempt to run the workflow (may fail due to schema)
|
|
83
|
-
workflow_result = await run_politician_trading_collection()
|
|
84
|
-
|
|
85
|
-
# Show what the result structure looks like
|
|
86
|
-
console.print("\n🔍 Workflow Result Structure:", style="bold")
|
|
87
|
-
|
|
88
|
-
# Create a mock successful result to show what it would look like
|
|
89
|
-
mock_successful_result = {
|
|
90
|
-
"started_at": "2024-09-02T09:00:00.000Z",
|
|
91
|
-
"completed_at": "2024-09-02T09:05:30.150Z",
|
|
92
|
-
"status": "completed",
|
|
93
|
-
"jobs": {
|
|
94
|
-
"us_congress": {
|
|
95
|
-
"job_id": "job_12345",
|
|
96
|
-
"status": "completed",
|
|
97
|
-
"new_disclosures": 15,
|
|
98
|
-
"updated_disclosures": 3,
|
|
99
|
-
"errors": [],
|
|
100
|
-
},
|
|
101
|
-
"eu_parliament": {
|
|
102
|
-
"job_id": "job_12346",
|
|
103
|
-
"status": "completed",
|
|
104
|
-
"new_disclosures": 8,
|
|
105
|
-
"updated_disclosures": 1,
|
|
106
|
-
"errors": [],
|
|
107
|
-
},
|
|
108
|
-
},
|
|
109
|
-
"summary": {"total_new_disclosures": 23, "total_updated_disclosures": 4, "errors": []},
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
console.print(JSON.from_data(mock_successful_result))
|
|
113
|
-
|
|
114
|
-
# Show the actual result we got
|
|
115
|
-
console.print("\n🔍 Actual Workflow Result:", style="bold")
|
|
116
|
-
console.print(JSON.from_data(workflow_result))
|
|
117
|
-
|
|
118
|
-
except Exception as e:
|
|
119
|
-
console.print(f"\n⚠️ Workflow execution encountered expected issues: {e}", style="yellow")
|
|
120
|
-
console.print("This is normal when database schema hasn't been created yet.", style="dim")
|
|
121
|
-
|
|
122
|
-
# Step 4: Show what data would be created
|
|
123
|
-
console.print("\n📋 STEP 4: Sample Data That Would Be Created", style="bold blue")
|
|
124
|
-
|
|
125
|
-
# Sample politician records
|
|
126
|
-
console.print("\n👥 Sample Politician Records:", style="bold")
|
|
127
|
-
sample_politicians = [
|
|
128
|
-
{
|
|
129
|
-
"full_name": "Nancy Pelosi",
|
|
130
|
-
"role": "us_house_representative",
|
|
131
|
-
"party": "Democratic",
|
|
132
|
-
"state_or_country": "CA",
|
|
133
|
-
"district": "5",
|
|
134
|
-
"bioguide_id": "P000197",
|
|
135
|
-
},
|
|
136
|
-
{
|
|
137
|
-
"full_name": "Ted Cruz",
|
|
138
|
-
"role": "us_senator",
|
|
139
|
-
"party": "Republican",
|
|
140
|
-
"state_or_country": "TX",
|
|
141
|
-
"bioguide_id": "C001098",
|
|
142
|
-
},
|
|
143
|
-
]
|
|
144
|
-
|
|
145
|
-
for politician in sample_politicians:
|
|
146
|
-
console.print(JSON.from_data(politician))
|
|
147
|
-
|
|
148
|
-
# Sample trading disclosures
|
|
149
|
-
console.print("\n💰 Sample Trading Disclosure Records:", style="bold")
|
|
150
|
-
sample_disclosures = [
|
|
151
|
-
{
|
|
152
|
-
"politician_id": str(uuid.uuid4()),
|
|
153
|
-
"transaction_date": "2024-08-15T00:00:00Z",
|
|
154
|
-
"disclosure_date": "2024-08-20T00:00:00Z",
|
|
155
|
-
"transaction_type": "purchase",
|
|
156
|
-
"asset_name": "Apple Inc.",
|
|
157
|
-
"asset_ticker": "AAPL",
|
|
158
|
-
"asset_type": "stock",
|
|
159
|
-
"amount_range_min": 15001.00,
|
|
160
|
-
"amount_range_max": 50000.00,
|
|
161
|
-
"source_url": "https://disclosures-clerk.house.gov",
|
|
162
|
-
"status": "processed",
|
|
163
|
-
},
|
|
164
|
-
{
|
|
165
|
-
"politician_id": "pol_2",
|
|
166
|
-
"transaction_date": "2024-08-10T00:00:00Z",
|
|
167
|
-
"disclosure_date": "2024-08-25T00:00:00Z",
|
|
168
|
-
"transaction_type": "sale",
|
|
169
|
-
"asset_name": "Microsoft Corporation",
|
|
170
|
-
"asset_ticker": "MSFT",
|
|
171
|
-
"asset_type": "stock",
|
|
172
|
-
"amount_range_min": 1001.00,
|
|
173
|
-
"amount_range_max": 15000.00,
|
|
174
|
-
"source_url": "https://efdsearch.senate.gov",
|
|
175
|
-
"status": "processed",
|
|
176
|
-
},
|
|
177
|
-
]
|
|
178
|
-
|
|
179
|
-
for disclosure in sample_disclosures:
|
|
180
|
-
console.print(JSON.from_data(disclosure))
|
|
181
|
-
|
|
182
|
-
# Step 5: Show job tracking
|
|
183
|
-
console.print("\n📋 STEP 5: Job Tracking and Monitoring", style="bold blue")
|
|
184
|
-
|
|
185
|
-
sample_job_record = {
|
|
186
|
-
"id": "job_12345",
|
|
187
|
-
"job_type": "us_congress",
|
|
188
|
-
"status": "completed",
|
|
189
|
-
"started_at": "2024-09-02T09:00:00Z",
|
|
190
|
-
"completed_at": "2024-09-02T09:03:45Z",
|
|
191
|
-
"records_found": 20,
|
|
192
|
-
"records_processed": 18,
|
|
193
|
-
"records_new": 15,
|
|
194
|
-
"records_updated": 3,
|
|
195
|
-
"records_failed": 2,
|
|
196
|
-
"config_snapshot": {
|
|
197
|
-
"supabase_url": "https://uljsqvwkomdrlnofmlad.supabase.co",
|
|
198
|
-
"request_delay": 1.0,
|
|
199
|
-
"max_retries": 3,
|
|
200
|
-
},
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
console.print("📊 Sample Job Record:", style="bold")
|
|
204
|
-
console.print(JSON.from_data(sample_job_record))
|
|
205
|
-
|
|
206
|
-
# Step 6: Show CLI commands for management
|
|
207
|
-
console.print("\n📋 STEP 6: CLI Commands for Management", style="bold blue")
|
|
208
|
-
|
|
209
|
-
commands_info = [
|
|
210
|
-
("politician-trading setup --create-tables", "Create database schema"),
|
|
211
|
-
("politician-trading connectivity", "Test Supabase connectivity"),
|
|
212
|
-
("politician-trading run", "Execute data collection"),
|
|
213
|
-
("politician-trading status", "Check system status"),
|
|
214
|
-
("politician-trading health", "System health monitoring"),
|
|
215
|
-
("politician-trading stats", "View detailed statistics"),
|
|
216
|
-
("politician-trading test-workflow -v", "Run full workflow test"),
|
|
217
|
-
("politician-trading connectivity --continuous", "Continuous monitoring"),
|
|
218
|
-
("politician-trading cron-job --create", "Setup automated scheduling"),
|
|
219
|
-
]
|
|
220
|
-
|
|
221
|
-
commands_table = Table(title="Available CLI Commands")
|
|
222
|
-
commands_table.add_column("Command", style="cyan")
|
|
223
|
-
commands_table.add_column("Description", style="white")
|
|
224
|
-
|
|
225
|
-
for command, description in commands_info:
|
|
226
|
-
commands_table.add_row(command, description)
|
|
227
|
-
|
|
228
|
-
console.print(commands_table)
|
|
229
|
-
|
|
230
|
-
# Summary
|
|
231
|
-
console.print("\n📋 SUMMARY", style="bold green")
|
|
232
|
-
console.print("✅ Workflow validates Supabase connectivity with 6 comprehensive tests")
|
|
233
|
-
console.print("✅ Creates and manages 4 database tables with proper indexing")
|
|
234
|
-
console.print("✅ Scrapes data from US Congress and EU Parliament sources")
|
|
235
|
-
console.print("✅ Tracks job execution with detailed status and metrics")
|
|
236
|
-
console.print("✅ Provides comprehensive CLI for management and monitoring")
|
|
237
|
-
console.print("✅ Supports automated scheduling via Supabase cron jobs")
|
|
238
|
-
console.print("✅ Includes real-time monitoring and health checks")
|
|
239
|
-
|
|
240
|
-
console.print("\n🚀 Next Steps to Deploy:", style="bold blue")
|
|
241
|
-
console.print("1. Execute the schema.sql in your Supabase SQL editor")
|
|
242
|
-
console.print("2. Run: politician-trading setup --verify")
|
|
243
|
-
console.print("3. Run: politician-trading connectivity")
|
|
244
|
-
console.print("4. Run: politician-trading test-workflow --verbose")
|
|
245
|
-
console.print("5. Setup cron job: politician-trading cron-job --create")
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
if __name__ == "__main__":
|
|
249
|
-
asyncio.run(demonstrate_workflow_execution())
|