mcli-framework 7.10.1__py3-none-any.whl → 7.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (43) hide show
  1. mcli/app/commands_cmd.py +150 -58
  2. mcli/app/main.py +21 -27
  3. mcli/lib/custom_commands.py +62 -12
  4. mcli/lib/optional_deps.py +240 -0
  5. mcli/lib/paths.py +129 -5
  6. mcli/self/migrate_cmd.py +261 -0
  7. mcli/self/self_cmd.py +8 -0
  8. mcli/workflow/git_commit/ai_service.py +13 -2
  9. mcli/workflow/notebook/__init__.py +16 -0
  10. mcli/workflow/notebook/converter.py +375 -0
  11. mcli/workflow/notebook/notebook_cmd.py +441 -0
  12. mcli/workflow/notebook/schema.py +402 -0
  13. mcli/workflow/notebook/validator.py +313 -0
  14. mcli/workflow/secrets/__init__.py +4 -0
  15. mcli/workflow/secrets/secrets_cmd.py +192 -0
  16. mcli/workflow/workflow.py +35 -5
  17. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/METADATA +86 -55
  18. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/RECORD +22 -34
  19. mcli/ml/features/political_features.py +0 -677
  20. mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
  21. mcli/workflow/politician_trading/__init__.py +0 -4
  22. mcli/workflow/politician_trading/config.py +0 -134
  23. mcli/workflow/politician_trading/connectivity.py +0 -492
  24. mcli/workflow/politician_trading/data_sources.py +0 -654
  25. mcli/workflow/politician_trading/database.py +0 -412
  26. mcli/workflow/politician_trading/demo.py +0 -249
  27. mcli/workflow/politician_trading/models.py +0 -327
  28. mcli/workflow/politician_trading/monitoring.py +0 -413
  29. mcli/workflow/politician_trading/scrapers.py +0 -1074
  30. mcli/workflow/politician_trading/scrapers_california.py +0 -434
  31. mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
  32. mcli/workflow/politician_trading/scrapers_eu.py +0 -376
  33. mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
  34. mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
  35. mcli/workflow/politician_trading/scrapers_uk.py +0 -378
  36. mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
  37. mcli/workflow/politician_trading/seed_database.py +0 -520
  38. mcli/workflow/politician_trading/supabase_functions.py +0 -354
  39. mcli/workflow/politician_trading/workflow.py +0 -879
  40. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/WHEEL +0 -0
  41. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/entry_points.txt +0 -0
  42. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/licenses/LICENSE +0 -0
  43. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/top_level.txt +0 -0
@@ -1,520 +0,0 @@
1
- """
2
- Database Seeding Script for Politician Trading Data
3
-
4
- This script provides functionality to seed the Supabase database with politician
5
- trading data from multiple sources, creating a comprehensive data bank that can
6
- be iteratively updated.
7
-
8
- Usage:
9
- python -m mcli.workflow.politician_trading.seed_database --sources all
10
- python -m mcli.workflow.politician_trading.seed_database --sources propublica
11
- python -m mcli.workflow.politician_trading.seed_database --test-run
12
- """
13
-
14
- import argparse
15
- import logging
16
- import os
17
- import sys
18
- from datetime import datetime
19
- from pathlib import Path
20
- from typing import Dict, List, Optional
21
- from uuid import UUID
22
-
23
- from supabase import Client, create_client
24
-
25
- # Load environment variables from .env file
26
- try:
27
- from dotenv import load_dotenv
28
-
29
- # Look for .env in project root
30
- env_path = Path(__file__).parent.parent.parent.parent.parent / ".env"
31
- if env_path.exists():
32
- load_dotenv(env_path)
33
- logger = logging.getLogger(__name__)
34
- logger.info(f"Loaded environment variables from {env_path}")
35
- except ImportError:
36
- # python-dotenv not installed, try loading from .streamlit/secrets.toml
37
- pass
38
-
39
- from .data_sources import ALL_DATA_SOURCES, AccessMethod, DataSource
40
- from .models import Politician, TradingDisclosure
41
- from .scrapers_free_sources import FreeDataFetcher
42
-
43
- # Configure logging
44
- logging.basicConfig(
45
- level=logging.INFO,
46
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
47
- handlers=[logging.StreamHandler(sys.stdout), logging.FileHandler("/tmp/seed_database.log")],
48
- )
49
- logger = logging.getLogger(__name__)
50
-
51
-
52
- # =============================================================================
53
- # Database Connection
54
- # =============================================================================
55
-
56
-
57
- def get_supabase_client() -> Client:
58
- """Get Supabase client from environment variables"""
59
- url = os.getenv("SUPABASE_URL")
60
- key = os.getenv("SUPABASE_SERVICE_ROLE_KEY") or os.getenv("SUPABASE_KEY")
61
-
62
- if not url or not key:
63
- raise ValueError(
64
- "SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY (or SUPABASE_KEY) "
65
- "environment variables must be set"
66
- )
67
-
68
- return create_client(url, key)
69
-
70
-
71
- # =============================================================================
72
- # Data Pull Job Tracking
73
- # =============================================================================
74
-
75
-
76
- def create_data_pull_job(client: Client, job_type: str, config: Optional[Dict] = None) -> UUID:
77
- """
78
- Create a new data pull job record
79
-
80
- Args:
81
- client: Supabase client
82
- job_type: Type of job (e.g., "propublica", "stocknear", "seed_all")
83
- config: Optional configuration snapshot
84
-
85
- Returns:
86
- Job ID
87
- """
88
- try:
89
- result = (
90
- client.table("data_pull_jobs")
91
- .insert(
92
- {
93
- "job_type": job_type,
94
- "status": "running",
95
- "started_at": datetime.now().isoformat(),
96
- "config_snapshot": config or {},
97
- }
98
- )
99
- .execute()
100
- )
101
-
102
- job_id = result.data[0]["id"]
103
- logger.info(f"Created data pull job: {job_id} (type: {job_type})")
104
- return UUID(job_id)
105
-
106
- except Exception as e:
107
- logger.error(f"Error creating data pull job: {e}")
108
- raise
109
-
110
-
111
- def update_data_pull_job(
112
- client: Client,
113
- job_id: UUID,
114
- status: str,
115
- stats: Optional[Dict] = None,
116
- error: Optional[str] = None,
117
- ):
118
- """
119
- Update data pull job with results
120
-
121
- Args:
122
- client: Supabase client
123
- job_id: Job ID to update
124
- status: Job status ("completed", "failed", "running")
125
- stats: Optional statistics (records_found, records_new, etc.)
126
- error: Optional error message if failed
127
- """
128
- try:
129
- update_data = {"status": status, "completed_at": datetime.now().isoformat()}
130
-
131
- if stats:
132
- update_data.update(stats)
133
-
134
- if error:
135
- update_data["error_message"] = error
136
-
137
- client.table("data_pull_jobs").update(update_data).eq("id", str(job_id)).execute()
138
-
139
- logger.info(f"Updated job {job_id}: status={status}")
140
-
141
- except Exception as e:
142
- logger.error(f"Error updating data pull job: {e}")
143
-
144
-
145
- # =============================================================================
146
- # Politician Upsert Logic
147
- # =============================================================================
148
-
149
-
150
- def upsert_politicians(client: Client, politicians: List[Politician]) -> Dict[str, UUID]:
151
- """
152
- Upsert politicians to database, returning mapping of bioguide_id -> UUID
153
-
154
- Args:
155
- client: Supabase client
156
- politicians: List of Politician objects
157
-
158
- Returns:
159
- Dictionary mapping bioguide_id to politician UUID
160
- """
161
- politician_map = {}
162
- new_count = 0
163
- updated_count = 0
164
-
165
- for politician in politicians:
166
- try:
167
- # Convert to database format
168
- pol_data = {
169
- "first_name": politician.first_name,
170
- "last_name": politician.last_name,
171
- "full_name": politician.full_name,
172
- "role": politician.role,
173
- "party": politician.party,
174
- "state_or_country": politician.state_or_country,
175
- "district": politician.district,
176
- "bioguide_id": politician.bioguide_id,
177
- }
178
-
179
- # Try to find existing politician
180
- if politician.bioguide_id:
181
- # Query by bioguide_id if available
182
- existing = (
183
- client.table("politicians")
184
- .select("id")
185
- .eq("bioguide_id", politician.bioguide_id)
186
- .execute()
187
- )
188
- else:
189
- # Query by unique constraint fields (first_name, last_name, role, state_or_country)
190
- existing = (
191
- client.table("politicians")
192
- .select("id")
193
- .eq("first_name", politician.first_name)
194
- .eq("last_name", politician.last_name)
195
- .eq("role", politician.role)
196
- .eq("state_or_country", politician.state_or_country)
197
- .execute()
198
- )
199
-
200
- if existing.data:
201
- # Update existing
202
- pol_id = UUID(existing.data[0]["id"])
203
- client.table("politicians").update(pol_data).eq("id", str(pol_id)).execute()
204
- updated_count += 1
205
- else:
206
- # Insert new
207
- result = client.table("politicians").insert(pol_data).execute()
208
- pol_id = UUID(result.data[0]["id"])
209
- new_count += 1
210
-
211
- # Store mapping - use bioguide_id if available, otherwise use full_name
212
- if politician.bioguide_id:
213
- politician_map[politician.bioguide_id] = pol_id
214
- elif politician.full_name:
215
- # For sources without bioguide_id (e.g., Senate Stock Watcher), use full_name
216
- politician_map[politician.full_name] = pol_id
217
-
218
- except Exception as e:
219
- logger.error(f"Error upserting politician {politician.full_name}: {e}")
220
- continue
221
-
222
- logger.info(
223
- f"Upserted {len(politicians)} politicians ({new_count} new, {updated_count} updated)"
224
- )
225
-
226
- return politician_map
227
-
228
-
229
- # =============================================================================
230
- # Trading Disclosure Upsert Logic
231
- # =============================================================================
232
-
233
-
234
- def upsert_trading_disclosures(
235
- client: Client, disclosures: List[TradingDisclosure], politician_map: Dict[str, UUID]
236
- ) -> Dict[str, int]:
237
- """
238
- Upsert trading disclosures to database
239
-
240
- Args:
241
- client: Supabase client
242
- disclosures: List of TradingDisclosure objects
243
- politician_map: Mapping of bioguide_id to politician UUID
244
-
245
- Returns:
246
- Statistics dictionary with counts
247
- """
248
- new_count = 0
249
- updated_count = 0
250
- skipped_count = 0
251
-
252
- for disclosure in disclosures:
253
- try:
254
- # Get politician ID
255
- pol_id = politician_map.get(disclosure.politician_bioguide_id)
256
- if not pol_id:
257
- logger.warning(
258
- f"Skipping disclosure - politician not found: "
259
- f"{disclosure.politician_bioguide_id}"
260
- )
261
- skipped_count += 1
262
- continue
263
-
264
- # Convert to database format
265
- disclosure_data = {
266
- "politician_id": str(pol_id),
267
- "transaction_date": disclosure.transaction_date.isoformat(),
268
- "disclosure_date": disclosure.disclosure_date.isoformat(),
269
- "transaction_type": disclosure.transaction_type,
270
- "asset_name": disclosure.asset_name,
271
- "asset_ticker": disclosure.asset_ticker,
272
- "asset_type": disclosure.asset_type,
273
- "amount_range_min": disclosure.amount_range_min,
274
- "amount_range_max": disclosure.amount_range_max,
275
- "amount_exact": disclosure.amount_exact,
276
- "source_url": disclosure.source_url,
277
- "raw_data": disclosure.raw_data,
278
- "status": "processed",
279
- }
280
-
281
- # Check for existing disclosure (using unique constraint)
282
- existing = (
283
- client.table("trading_disclosures")
284
- .select("id")
285
- .eq("politician_id", str(pol_id))
286
- .eq("transaction_date", disclosure.transaction_date.isoformat())
287
- .eq("asset_name", disclosure.asset_name)
288
- .eq("transaction_type", disclosure.transaction_type)
289
- .eq("disclosure_date", disclosure.disclosure_date.isoformat())
290
- .execute()
291
- )
292
-
293
- if existing.data:
294
- # Update existing
295
- disc_id = existing.data[0]["id"]
296
- client.table("trading_disclosures").update(disclosure_data).eq(
297
- "id", disc_id
298
- ).execute()
299
- updated_count += 1
300
- else:
301
- # Insert new
302
- client.table("trading_disclosures").insert(disclosure_data).execute()
303
- new_count += 1
304
-
305
- except Exception as e:
306
- logger.error(f"Error upserting disclosure: {e}")
307
- skipped_count += 1
308
- continue
309
-
310
- logger.info(
311
- f"Upserted {len(disclosures)} disclosures "
312
- f"({new_count} new, {updated_count} updated, {skipped_count} skipped)"
313
- )
314
-
315
- return {
316
- "records_found": len(disclosures),
317
- "records_new": new_count,
318
- "records_updated": updated_count,
319
- "records_failed": skipped_count,
320
- }
321
-
322
-
323
- # =============================================================================
324
- # Source-Specific Seeding Functions
325
- # =============================================================================
326
-
327
-
328
- def seed_from_senate_watcher(
329
- client: Client, test_run: bool = False, recent_only: bool = False, days: int = 90
330
- ) -> Dict[str, int]:
331
- """
332
- Seed database from Senate Stock Watcher GitHub dataset
333
-
334
- Args:
335
- client: Supabase client
336
- test_run: If True, only fetch but don't insert to DB
337
- recent_only: If True, only fetch recent transactions
338
- days: Number of days to look back if recent_only=True
339
-
340
- Returns:
341
- Statistics dictionary
342
- """
343
- logger.info("=" * 80)
344
- logger.info("SEEDING FROM SENATE STOCK WATCHER (GitHub)")
345
- logger.info("=" * 80)
346
-
347
- # Create job record
348
- job_id = create_data_pull_job(
349
- client, "senate_watcher_seed", {"recent_only": recent_only, "days": days}
350
- )
351
-
352
- try:
353
- # Initialize fetcher
354
- fetcher = FreeDataFetcher()
355
-
356
- # Fetch data
357
- data = fetcher.fetch_from_senate_watcher(recent_only=recent_only, days=days)
358
-
359
- politicians = data["politicians"]
360
- disclosures = data["disclosures"]
361
-
362
- logger.info(f"Fetched {len(politicians)} politicians, {len(disclosures)} disclosures")
363
-
364
- if test_run:
365
- logger.info("TEST RUN - Not inserting to database")
366
- logger.info(f"Sample politician: {politicians[0] if politicians else 'None'}")
367
- logger.info(f"Sample disclosure: {disclosures[0] if disclosures else 'None'}")
368
- update_data_pull_job(
369
- client,
370
- job_id,
371
- "completed",
372
- {
373
- "records_found": len(politicians) + len(disclosures),
374
- "records_new": 0,
375
- "records_updated": 0,
376
- },
377
- )
378
- return {"records_found": len(politicians) + len(disclosures)}
379
-
380
- # Upsert politicians
381
- politician_map = upsert_politicians(client, politicians)
382
-
383
- # Upsert disclosures
384
- disclosure_stats = upsert_trading_disclosures(client, disclosures, politician_map)
385
-
386
- # Update job record
387
- update_data_pull_job(client, job_id, "completed", disclosure_stats)
388
-
389
- return disclosure_stats
390
-
391
- except Exception as e:
392
- logger.error(f"Error seeding from Senate Stock Watcher: {e}")
393
- update_data_pull_job(client, job_id, "failed", error=str(e))
394
- raise
395
-
396
-
397
- def seed_from_all_sources(client: Client, test_run: bool = False) -> Dict[str, Dict[str, int]]:
398
- """
399
- Seed database from all available sources
400
-
401
- Args:
402
- client: Supabase client
403
- test_run: If True, only fetch but don't insert to DB
404
-
405
- Returns:
406
- Dictionary mapping source name to statistics
407
- """
408
- logger.info("=" * 80)
409
- logger.info("SEEDING FROM ALL SOURCES")
410
- logger.info("=" * 80)
411
-
412
- results = {}
413
-
414
- # Senate Stock Watcher (free GitHub dataset - no API key needed!)
415
- try:
416
- logger.info("\n📡 Senate Stock Watcher (GitHub)")
417
- results["senate_watcher"] = seed_from_senate_watcher(client, test_run)
418
- except Exception as e:
419
- logger.error(f"Senate Stock Watcher seeding failed: {e}")
420
- results["senate_watcher"] = {"error": str(e)}
421
-
422
- # TODO: Add other sources as implemented
423
- # - Finnhub (requires free API key from finnhub.io)
424
- # - SEC Edgar (free, no API key, but need to implement Form 4 parsing)
425
- # - StockNear (requires JavaScript rendering)
426
- # - QuiverQuant (requires premium subscription)
427
-
428
- logger.info("\n" + "=" * 80)
429
- logger.info("SEEDING SUMMARY")
430
- logger.info("=" * 80)
431
-
432
- for source, stats in results.items():
433
- logger.info(f"\n{source}:")
434
- if "error" in stats:
435
- logger.error(f" ❌ Failed: {stats['error']}")
436
- else:
437
- logger.info(f" ✅ Found: {stats.get('records_found', 0)}")
438
- logger.info(f" ➕ New: {stats.get('records_new', 0)}")
439
- logger.info(f" 🔄 Updated: {stats.get('records_updated', 0)}")
440
- logger.info(f" ⚠️ Failed: {stats.get('records_failed', 0)}")
441
-
442
- return results
443
-
444
-
445
- # =============================================================================
446
- # CLI Interface
447
- # =============================================================================
448
-
449
-
450
- def main():
451
- """Main CLI entry point"""
452
- parser = argparse.ArgumentParser(
453
- description="Seed politician trading database from multiple sources"
454
- )
455
-
456
- parser.add_argument(
457
- "--sources",
458
- choices=["all", "senate", "finnhub", "sec-edgar"],
459
- default="all",
460
- help="Which data sources to seed from (default: all)",
461
- )
462
-
463
- parser.add_argument(
464
- "--recent-only", action="store_true", help="Only fetch recent transactions (last 90 days)"
465
- )
466
-
467
- parser.add_argument(
468
- "--days",
469
- type=int,
470
- default=90,
471
- help="Number of days to look back when using --recent-only (default: 90)",
472
- )
473
-
474
- parser.add_argument(
475
- "--test-run",
476
- action="store_true",
477
- help="Fetch data but don't insert to database (for testing)",
478
- )
479
-
480
- parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
481
-
482
- args = parser.parse_args()
483
-
484
- if args.verbose:
485
- logging.getLogger().setLevel(logging.DEBUG)
486
-
487
- # Get Supabase client
488
- try:
489
- client = get_supabase_client()
490
- logger.info("✅ Connected to Supabase")
491
- except Exception as e:
492
- logger.error(f"❌ Failed to connect to Supabase: {e}")
493
- sys.exit(1)
494
-
495
- # Run seeding
496
- try:
497
- if args.sources == "senate":
498
- seed_from_senate_watcher(
499
- client, test_run=args.test_run, recent_only=args.recent_only, days=args.days
500
- )
501
- elif args.sources == "all":
502
- seed_from_all_sources(client, args.test_run)
503
- else:
504
- logger.error(f"Source '{args.sources}' not yet implemented")
505
- logger.info("Available sources: all, senate")
506
- logger.info("Coming soon: finnhub, sec-edgar")
507
- sys.exit(1)
508
-
509
- logger.info("\n✅ Seeding completed successfully!")
510
-
511
- except KeyboardInterrupt:
512
- logger.info("\n⚠️ Seeding interrupted by user")
513
- sys.exit(1)
514
- except Exception as e:
515
- logger.error(f"\n❌ Seeding failed: {e}")
516
- sys.exit(1)
517
-
518
-
519
- if __name__ == "__main__":
520
- main()