mcli-framework 7.1.3__py3-none-any.whl → 7.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (38) hide show
  1. mcli/app/main.py +10 -0
  2. mcli/lib/custom_commands.py +424 -0
  3. mcli/lib/paths.py +12 -0
  4. mcli/ml/dashboard/app.py +13 -13
  5. mcli/ml/dashboard/app_integrated.py +1292 -148
  6. mcli/ml/dashboard/app_supabase.py +46 -21
  7. mcli/ml/dashboard/app_training.py +14 -14
  8. mcli/ml/dashboard/components/charts.py +258 -0
  9. mcli/ml/dashboard/components/metrics.py +125 -0
  10. mcli/ml/dashboard/components/tables.py +228 -0
  11. mcli/ml/dashboard/pages/cicd.py +382 -0
  12. mcli/ml/dashboard/pages/predictions_enhanced.py +820 -0
  13. mcli/ml/dashboard/pages/scrapers_and_logs.py +1060 -0
  14. mcli/ml/dashboard/pages/workflows.py +533 -0
  15. mcli/ml/training/train_model.py +569 -0
  16. mcli/self/self_cmd.py +322 -94
  17. mcli/workflow/politician_trading/data_sources.py +259 -1
  18. mcli/workflow/politician_trading/models.py +159 -1
  19. mcli/workflow/politician_trading/scrapers_corporate_registry.py +846 -0
  20. mcli/workflow/politician_trading/scrapers_free_sources.py +516 -0
  21. mcli/workflow/politician_trading/scrapers_third_party.py +391 -0
  22. mcli/workflow/politician_trading/seed_database.py +539 -0
  23. mcli/workflow/workflow.py +8 -27
  24. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/METADATA +1 -1
  25. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/RECORD +29 -25
  26. mcli/workflow/daemon/api_daemon.py +0 -800
  27. mcli/workflow/daemon/commands.py +0 -1196
  28. mcli/workflow/dashboard/dashboard_cmd.py +0 -120
  29. mcli/workflow/file/file.py +0 -100
  30. mcli/workflow/git_commit/commands.py +0 -430
  31. mcli/workflow/politician_trading/commands.py +0 -1939
  32. mcli/workflow/scheduler/commands.py +0 -493
  33. mcli/workflow/sync/sync_cmd.py +0 -437
  34. mcli/workflow/videos/videos.py +0 -242
  35. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/WHEEL +0 -0
  36. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/entry_points.txt +0 -0
  37. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/licenses/LICENSE +0 -0
  38. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,539 @@
1
+ """
2
+ Database Seeding Script for Politician Trading Data
3
+
4
+ This script provides functionality to seed the Supabase database with politician
5
+ trading data from multiple sources, creating a comprehensive data bank that can
6
+ be iteratively updated.
7
+
8
+ Usage:
9
+ python -m mcli.workflow.politician_trading.seed_database --sources all
10
+ python -m mcli.workflow.politician_trading.seed_database --sources propublica
11
+ python -m mcli.workflow.politician_trading.seed_database --test-run
12
+ """
13
+
14
+ import argparse
15
+ import logging
16
+ import os
17
+ import sys
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+ from typing import Dict, List, Optional
21
+ from uuid import UUID
22
+
23
+ from supabase import create_client, Client
24
+
25
+ # Load environment variables from .env file
26
+ try:
27
+ from dotenv import load_dotenv
28
+ # Look for .env in project root
29
+ env_path = Path(__file__).parent.parent.parent.parent.parent / ".env"
30
+ if env_path.exists():
31
+ load_dotenv(env_path)
32
+ logger = logging.getLogger(__name__)
33
+ logger.info(f"Loaded environment variables from {env_path}")
34
+ except ImportError:
35
+ # python-dotenv not installed, try loading from .streamlit/secrets.toml
36
+ pass
37
+
38
+ from .data_sources import ALL_DATA_SOURCES, AccessMethod, DataSource
39
+ from .models import Politician, TradingDisclosure
40
+ from .scrapers_free_sources import FreeDataFetcher
41
+
42
+
43
+ # Configure logging
44
+ logging.basicConfig(
45
+ level=logging.INFO,
46
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
47
+ handlers=[
48
+ logging.StreamHandler(sys.stdout),
49
+ logging.FileHandler("/tmp/seed_database.log")
50
+ ]
51
+ )
52
+ logger = logging.getLogger(__name__)
53
+
54
+
55
+ # =============================================================================
56
+ # Database Connection
57
+ # =============================================================================
58
+
59
+
60
+ def get_supabase_client() -> Client:
61
+ """Get Supabase client from environment variables"""
62
+ url = os.getenv("SUPABASE_URL")
63
+ key = os.getenv("SUPABASE_SERVICE_ROLE_KEY") or os.getenv("SUPABASE_KEY")
64
+
65
+ if not url or not key:
66
+ raise ValueError(
67
+ "SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY (or SUPABASE_KEY) "
68
+ "environment variables must be set"
69
+ )
70
+
71
+ return create_client(url, key)
72
+
73
+
74
+ # =============================================================================
75
+ # Data Pull Job Tracking
76
+ # =============================================================================
77
+
78
+
79
+ def create_data_pull_job(
80
+ client: Client,
81
+ job_type: str,
82
+ config: Optional[Dict] = None
83
+ ) -> UUID:
84
+ """
85
+ Create a new data pull job record
86
+
87
+ Args:
88
+ client: Supabase client
89
+ job_type: Type of job (e.g., "propublica", "stocknear", "seed_all")
90
+ config: Optional configuration snapshot
91
+
92
+ Returns:
93
+ Job ID
94
+ """
95
+ try:
96
+ result = client.table("data_pull_jobs").insert({
97
+ "job_type": job_type,
98
+ "status": "running",
99
+ "started_at": datetime.now().isoformat(),
100
+ "config_snapshot": config or {}
101
+ }).execute()
102
+
103
+ job_id = result.data[0]["id"]
104
+ logger.info(f"Created data pull job: {job_id} (type: {job_type})")
105
+ return UUID(job_id)
106
+
107
+ except Exception as e:
108
+ logger.error(f"Error creating data pull job: {e}")
109
+ raise
110
+
111
+
112
+ def update_data_pull_job(
113
+ client: Client,
114
+ job_id: UUID,
115
+ status: str,
116
+ stats: Optional[Dict] = None,
117
+ error: Optional[str] = None
118
+ ):
119
+ """
120
+ Update data pull job with results
121
+
122
+ Args:
123
+ client: Supabase client
124
+ job_id: Job ID to update
125
+ status: Job status ("completed", "failed", "running")
126
+ stats: Optional statistics (records_found, records_new, etc.)
127
+ error: Optional error message if failed
128
+ """
129
+ try:
130
+ update_data = {
131
+ "status": status,
132
+ "completed_at": datetime.now().isoformat()
133
+ }
134
+
135
+ if stats:
136
+ update_data.update(stats)
137
+
138
+ if error:
139
+ update_data["error_message"] = error
140
+
141
+ client.table("data_pull_jobs").update(update_data).eq("id", str(job_id)).execute()
142
+
143
+ logger.info(f"Updated job {job_id}: status={status}")
144
+
145
+ except Exception as e:
146
+ logger.error(f"Error updating data pull job: {e}")
147
+
148
+
149
+ # =============================================================================
150
+ # Politician Upsert Logic
151
+ # =============================================================================
152
+
153
+
154
+ def upsert_politicians(
155
+ client: Client,
156
+ politicians: List[Politician]
157
+ ) -> Dict[str, UUID]:
158
+ """
159
+ Upsert politicians to database, returning mapping of bioguide_id -> UUID
160
+
161
+ Args:
162
+ client: Supabase client
163
+ politicians: List of Politician objects
164
+
165
+ Returns:
166
+ Dictionary mapping bioguide_id to politician UUID
167
+ """
168
+ politician_map = {}
169
+ new_count = 0
170
+ updated_count = 0
171
+
172
+ for politician in politicians:
173
+ try:
174
+ # Convert to database format
175
+ pol_data = {
176
+ "first_name": politician.first_name,
177
+ "last_name": politician.last_name,
178
+ "full_name": politician.full_name,
179
+ "role": politician.role,
180
+ "party": politician.party,
181
+ "state_or_country": politician.state_or_country,
182
+ "district": politician.district,
183
+ "bioguide_id": politician.bioguide_id,
184
+ }
185
+
186
+ # Try to find existing politician
187
+ if politician.bioguide_id:
188
+ # Query by bioguide_id if available
189
+ existing = client.table("politicians").select("id").eq(
190
+ "bioguide_id", politician.bioguide_id
191
+ ).execute()
192
+ else:
193
+ # Query by unique constraint fields (first_name, last_name, role, state_or_country)
194
+ existing = client.table("politicians").select("id").eq(
195
+ "first_name", politician.first_name
196
+ ).eq(
197
+ "last_name", politician.last_name
198
+ ).eq(
199
+ "role", politician.role
200
+ ).eq(
201
+ "state_or_country", politician.state_or_country
202
+ ).execute()
203
+
204
+ if existing.data:
205
+ # Update existing
206
+ pol_id = UUID(existing.data[0]["id"])
207
+ client.table("politicians").update(pol_data).eq("id", str(pol_id)).execute()
208
+ updated_count += 1
209
+ else:
210
+ # Insert new
211
+ result = client.table("politicians").insert(pol_data).execute()
212
+ pol_id = UUID(result.data[0]["id"])
213
+ new_count += 1
214
+
215
+ # Store mapping - use bioguide_id if available, otherwise use full_name
216
+ if politician.bioguide_id:
217
+ politician_map[politician.bioguide_id] = pol_id
218
+ elif politician.full_name:
219
+ # For sources without bioguide_id (e.g., Senate Stock Watcher), use full_name
220
+ politician_map[politician.full_name] = pol_id
221
+
222
+ except Exception as e:
223
+ logger.error(f"Error upserting politician {politician.full_name}: {e}")
224
+ continue
225
+
226
+ logger.info(f"Upserted {len(politicians)} politicians ({new_count} new, {updated_count} updated)")
227
+
228
+ return politician_map
229
+
230
+
231
+ # =============================================================================
232
+ # Trading Disclosure Upsert Logic
233
+ # =============================================================================
234
+
235
+
236
+ def upsert_trading_disclosures(
237
+ client: Client,
238
+ disclosures: List[TradingDisclosure],
239
+ politician_map: Dict[str, UUID]
240
+ ) -> Dict[str, int]:
241
+ """
242
+ Upsert trading disclosures to database
243
+
244
+ Args:
245
+ client: Supabase client
246
+ disclosures: List of TradingDisclosure objects
247
+ politician_map: Mapping of bioguide_id to politician UUID
248
+
249
+ Returns:
250
+ Statistics dictionary with counts
251
+ """
252
+ new_count = 0
253
+ updated_count = 0
254
+ skipped_count = 0
255
+
256
+ for disclosure in disclosures:
257
+ try:
258
+ # Get politician ID
259
+ pol_id = politician_map.get(disclosure.politician_bioguide_id)
260
+ if not pol_id:
261
+ logger.warning(
262
+ f"Skipping disclosure - politician not found: "
263
+ f"{disclosure.politician_bioguide_id}"
264
+ )
265
+ skipped_count += 1
266
+ continue
267
+
268
+ # Convert to database format
269
+ disclosure_data = {
270
+ "politician_id": str(pol_id),
271
+ "transaction_date": disclosure.transaction_date.isoformat(),
272
+ "disclosure_date": disclosure.disclosure_date.isoformat(),
273
+ "transaction_type": disclosure.transaction_type,
274
+ "asset_name": disclosure.asset_name,
275
+ "asset_ticker": disclosure.asset_ticker,
276
+ "asset_type": disclosure.asset_type,
277
+ "amount_range_min": disclosure.amount_range_min,
278
+ "amount_range_max": disclosure.amount_range_max,
279
+ "amount_exact": disclosure.amount_exact,
280
+ "source_url": disclosure.source_url,
281
+ "raw_data": disclosure.raw_data,
282
+ "status": "processed",
283
+ }
284
+
285
+ # Check for existing disclosure (using unique constraint)
286
+ existing = client.table("trading_disclosures").select("id").eq(
287
+ "politician_id", str(pol_id)
288
+ ).eq(
289
+ "transaction_date", disclosure.transaction_date.isoformat()
290
+ ).eq(
291
+ "asset_name", disclosure.asset_name
292
+ ).eq(
293
+ "transaction_type", disclosure.transaction_type
294
+ ).eq(
295
+ "disclosure_date", disclosure.disclosure_date.isoformat()
296
+ ).execute()
297
+
298
+ if existing.data:
299
+ # Update existing
300
+ disc_id = existing.data[0]["id"]
301
+ client.table("trading_disclosures").update(disclosure_data).eq(
302
+ "id", disc_id
303
+ ).execute()
304
+ updated_count += 1
305
+ else:
306
+ # Insert new
307
+ client.table("trading_disclosures").insert(disclosure_data).execute()
308
+ new_count += 1
309
+
310
+ except Exception as e:
311
+ logger.error(f"Error upserting disclosure: {e}")
312
+ skipped_count += 1
313
+ continue
314
+
315
+ logger.info(
316
+ f"Upserted {len(disclosures)} disclosures "
317
+ f"({new_count} new, {updated_count} updated, {skipped_count} skipped)"
318
+ )
319
+
320
+ return {
321
+ "records_found": len(disclosures),
322
+ "records_new": new_count,
323
+ "records_updated": updated_count,
324
+ "records_failed": skipped_count,
325
+ }
326
+
327
+
328
+ # =============================================================================
329
+ # Source-Specific Seeding Functions
330
+ # =============================================================================
331
+
332
+
333
+ def seed_from_senate_watcher(
334
+ client: Client,
335
+ test_run: bool = False,
336
+ recent_only: bool = False,
337
+ days: int = 90
338
+ ) -> Dict[str, int]:
339
+ """
340
+ Seed database from Senate Stock Watcher GitHub dataset
341
+
342
+ Args:
343
+ client: Supabase client
344
+ test_run: If True, only fetch but don't insert to DB
345
+ recent_only: If True, only fetch recent transactions
346
+ days: Number of days to look back if recent_only=True
347
+
348
+ Returns:
349
+ Statistics dictionary
350
+ """
351
+ logger.info("=" * 80)
352
+ logger.info("SEEDING FROM SENATE STOCK WATCHER (GitHub)")
353
+ logger.info("=" * 80)
354
+
355
+ # Create job record
356
+ job_id = create_data_pull_job(client, "senate_watcher_seed", {
357
+ "recent_only": recent_only,
358
+ "days": days
359
+ })
360
+
361
+ try:
362
+ # Initialize fetcher
363
+ fetcher = FreeDataFetcher()
364
+
365
+ # Fetch data
366
+ data = fetcher.fetch_from_senate_watcher(
367
+ recent_only=recent_only,
368
+ days=days
369
+ )
370
+
371
+ politicians = data["politicians"]
372
+ disclosures = data["disclosures"]
373
+
374
+ logger.info(f"Fetched {len(politicians)} politicians, {len(disclosures)} disclosures")
375
+
376
+ if test_run:
377
+ logger.info("TEST RUN - Not inserting to database")
378
+ logger.info(f"Sample politician: {politicians[0] if politicians else 'None'}")
379
+ logger.info(f"Sample disclosure: {disclosures[0] if disclosures else 'None'}")
380
+ update_data_pull_job(client, job_id, "completed", {
381
+ "records_found": len(politicians) + len(disclosures),
382
+ "records_new": 0,
383
+ "records_updated": 0,
384
+ })
385
+ return {"records_found": len(politicians) + len(disclosures)}
386
+
387
+ # Upsert politicians
388
+ politician_map = upsert_politicians(client, politicians)
389
+
390
+ # Upsert disclosures
391
+ disclosure_stats = upsert_trading_disclosures(client, disclosures, politician_map)
392
+
393
+ # Update job record
394
+ update_data_pull_job(client, job_id, "completed", disclosure_stats)
395
+
396
+ return disclosure_stats
397
+
398
+ except Exception as e:
399
+ logger.error(f"Error seeding from Senate Stock Watcher: {e}")
400
+ update_data_pull_job(client, job_id, "failed", error=str(e))
401
+ raise
402
+
403
+
404
+ def seed_from_all_sources(
405
+ client: Client,
406
+ test_run: bool = False
407
+ ) -> Dict[str, Dict[str, int]]:
408
+ """
409
+ Seed database from all available sources
410
+
411
+ Args:
412
+ client: Supabase client
413
+ test_run: If True, only fetch but don't insert to DB
414
+
415
+ Returns:
416
+ Dictionary mapping source name to statistics
417
+ """
418
+ logger.info("=" * 80)
419
+ logger.info("SEEDING FROM ALL SOURCES")
420
+ logger.info("=" * 80)
421
+
422
+ results = {}
423
+
424
+ # Senate Stock Watcher (free GitHub dataset - no API key needed!)
425
+ try:
426
+ logger.info("\n📡 Senate Stock Watcher (GitHub)")
427
+ results["senate_watcher"] = seed_from_senate_watcher(client, test_run)
428
+ except Exception as e:
429
+ logger.error(f"Senate Stock Watcher seeding failed: {e}")
430
+ results["senate_watcher"] = {"error": str(e)}
431
+
432
+ # TODO: Add other sources as implemented
433
+ # - Finnhub (requires free API key from finnhub.io)
434
+ # - SEC Edgar (free, no API key, but need to implement Form 4 parsing)
435
+ # - StockNear (requires JavaScript rendering)
436
+ # - QuiverQuant (requires premium subscription)
437
+
438
+ logger.info("\n" + "=" * 80)
439
+ logger.info("SEEDING SUMMARY")
440
+ logger.info("=" * 80)
441
+
442
+ for source, stats in results.items():
443
+ logger.info(f"\n{source}:")
444
+ if "error" in stats:
445
+ logger.error(f" ❌ Failed: {stats['error']}")
446
+ else:
447
+ logger.info(f" ✅ Found: {stats.get('records_found', 0)}")
448
+ logger.info(f" ➕ New: {stats.get('records_new', 0)}")
449
+ logger.info(f" 🔄 Updated: {stats.get('records_updated', 0)}")
450
+ logger.info(f" ⚠️ Failed: {stats.get('records_failed', 0)}")
451
+
452
+ return results
453
+
454
+
455
+ # =============================================================================
456
+ # CLI Interface
457
+ # =============================================================================
458
+
459
+
460
+ def main():
461
+ """Main CLI entry point"""
462
+ parser = argparse.ArgumentParser(
463
+ description="Seed politician trading database from multiple sources"
464
+ )
465
+
466
+ parser.add_argument(
467
+ "--sources",
468
+ choices=["all", "senate", "finnhub", "sec-edgar"],
469
+ default="all",
470
+ help="Which data sources to seed from (default: all)"
471
+ )
472
+
473
+ parser.add_argument(
474
+ "--recent-only",
475
+ action="store_true",
476
+ help="Only fetch recent transactions (last 90 days)"
477
+ )
478
+
479
+ parser.add_argument(
480
+ "--days",
481
+ type=int,
482
+ default=90,
483
+ help="Number of days to look back when using --recent-only (default: 90)"
484
+ )
485
+
486
+ parser.add_argument(
487
+ "--test-run",
488
+ action="store_true",
489
+ help="Fetch data but don't insert to database (for testing)"
490
+ )
491
+
492
+ parser.add_argument(
493
+ "--verbose",
494
+ action="store_true",
495
+ help="Enable verbose logging"
496
+ )
497
+
498
+ args = parser.parse_args()
499
+
500
+ if args.verbose:
501
+ logging.getLogger().setLevel(logging.DEBUG)
502
+
503
+ # Get Supabase client
504
+ try:
505
+ client = get_supabase_client()
506
+ logger.info("✅ Connected to Supabase")
507
+ except Exception as e:
508
+ logger.error(f"❌ Failed to connect to Supabase: {e}")
509
+ sys.exit(1)
510
+
511
+ # Run seeding
512
+ try:
513
+ if args.sources == "senate":
514
+ seed_from_senate_watcher(
515
+ client,
516
+ test_run=args.test_run,
517
+ recent_only=args.recent_only,
518
+ days=args.days
519
+ )
520
+ elif args.sources == "all":
521
+ seed_from_all_sources(client, args.test_run)
522
+ else:
523
+ logger.error(f"Source '{args.sources}' not yet implemented")
524
+ logger.info("Available sources: all, senate")
525
+ logger.info("Coming soon: finnhub, sec-edgar")
526
+ sys.exit(1)
527
+
528
+ logger.info("\n✅ Seeding completed successfully!")
529
+
530
+ except KeyboardInterrupt:
531
+ logger.info("\n⚠️ Seeding interrupted by user")
532
+ sys.exit(1)
533
+ except Exception as e:
534
+ logger.error(f"\n❌ Seeding failed: {e}")
535
+ sys.exit(1)
536
+
537
+
538
+ if __name__ == "__main__":
539
+ main()
mcli/workflow/workflow.py CHANGED
@@ -1,37 +1,18 @@
1
- import click
1
+ """
2
+ Workflow command group for mcli.
3
+
4
+ All workflow commands are now loaded from portable JSON files in ~/.mcli/commands/
5
+ This provides a clean, maintainable way to manage workflow commands.
6
+ """
2
7
 
3
- from .daemon.api_daemon import api_daemon
4
- from .daemon.commands import daemon
5
- from .dashboard.dashboard_cmd import dashboard
6
- from .file.file import file
7
- from .git_commit.commands import git_commit_cli
8
- from .politician_trading.commands import politician_trading_cli
9
- from .scheduler.commands import scheduler
10
- from .sync.sync_cmd import sync
11
- from .videos.videos import videos
8
+ import click
12
9
 
13
10
 
14
11
  @click.group(name="workflow")
15
12
  def workflow():
16
- """Workflow commands"""
13
+ """Workflow commands for automation, video processing, and daemon management"""
17
14
  pass
18
15
 
19
16
 
20
- # Add subcommands
21
- def register_workflow_commands():
22
- workflow.add_command(file)
23
- workflow.add_command(videos)
24
- workflow.add_command(daemon)
25
- workflow.add_command(api_daemon)
26
- workflow.add_command(dashboard)
27
- workflow.add_command(git_commit_cli)
28
- workflow.add_command(scheduler)
29
- workflow.add_command(sync)
30
- workflow.add_command(politician_trading_cli)
31
-
32
-
33
- register_workflow_commands()
34
-
35
-
36
17
  if __name__ == "__main__":
37
18
  workflow()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcli-framework
3
- Version: 7.1.3
3
+ Version: 7.2.0
4
4
  Summary: 🚀 High-performance CLI framework with Rust extensions, AI chat, and stunning visuals
5
5
  Author-email: Luis Fernandez de la Vara <luis@lefv.io>
6
6
  Maintainer-email: Luis Fernandez de la Vara <luis@lefv.io>