mcli-framework 7.6.0__py3-none-any.whl → 7.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (49) hide show
  1. mcli/app/commands_cmd.py +51 -39
  2. mcli/app/main.py +10 -2
  3. mcli/app/model_cmd.py +1 -1
  4. mcli/lib/custom_commands.py +4 -10
  5. mcli/ml/api/app.py +1 -5
  6. mcli/ml/dashboard/app.py +2 -2
  7. mcli/ml/dashboard/app_integrated.py +168 -116
  8. mcli/ml/dashboard/app_supabase.py +7 -3
  9. mcli/ml/dashboard/app_training.py +3 -6
  10. mcli/ml/dashboard/components/charts.py +74 -115
  11. mcli/ml/dashboard/components/metrics.py +24 -44
  12. mcli/ml/dashboard/components/tables.py +32 -40
  13. mcli/ml/dashboard/overview.py +102 -78
  14. mcli/ml/dashboard/pages/cicd.py +103 -56
  15. mcli/ml/dashboard/pages/debug_dependencies.py +35 -28
  16. mcli/ml/dashboard/pages/gravity_viz.py +374 -313
  17. mcli/ml/dashboard/pages/monte_carlo_predictions.py +50 -48
  18. mcli/ml/dashboard/pages/predictions_enhanced.py +396 -248
  19. mcli/ml/dashboard/pages/scrapers_and_logs.py +299 -273
  20. mcli/ml/dashboard/pages/test_portfolio.py +153 -121
  21. mcli/ml/dashboard/pages/trading.py +238 -169
  22. mcli/ml/dashboard/pages/workflows.py +129 -84
  23. mcli/ml/dashboard/streamlit_extras_utils.py +70 -79
  24. mcli/ml/dashboard/utils.py +24 -21
  25. mcli/ml/dashboard/warning_suppression.py +6 -4
  26. mcli/ml/database/session.py +16 -5
  27. mcli/ml/mlops/pipeline_orchestrator.py +1 -3
  28. mcli/ml/predictions/monte_carlo.py +6 -18
  29. mcli/ml/trading/alpaca_client.py +95 -96
  30. mcli/ml/trading/migrations.py +76 -40
  31. mcli/ml/trading/models.py +78 -60
  32. mcli/ml/trading/paper_trading.py +92 -74
  33. mcli/ml/trading/risk_management.py +106 -85
  34. mcli/ml/trading/trading_service.py +155 -110
  35. mcli/ml/training/train_model.py +1 -3
  36. mcli/self/self_cmd.py +71 -57
  37. mcli/workflow/daemon/daemon.py +2 -0
  38. mcli/workflow/model_service/openai_adapter.py +6 -2
  39. mcli/workflow/politician_trading/models.py +6 -2
  40. mcli/workflow/politician_trading/scrapers_corporate_registry.py +39 -88
  41. mcli/workflow/politician_trading/scrapers_free_sources.py +32 -39
  42. mcli/workflow/politician_trading/scrapers_third_party.py +21 -39
  43. mcli/workflow/politician_trading/seed_database.py +70 -89
  44. {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/METADATA +1 -1
  45. {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/RECORD +49 -49
  46. {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/WHEEL +0 -0
  47. {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/entry_points.txt +0 -0
  48. {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/licenses/LICENSE +0 -0
  49. {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/top_level.txt +0 -0
@@ -32,25 +32,22 @@ logger = logging.getLogger(__name__)
32
32
  def show_scrapers_and_logs():
33
33
  """Main function for scrapers and logs page"""
34
34
  st.header("🔍 Data Scrapers & System Logs")
35
-
35
+
36
36
  # Add a simple test to ensure the page is rendering
37
37
  st.info("📋 Page loaded successfully - Scrapers & Logs functionality is available")
38
38
 
39
- st.markdown("""
39
+ st.markdown(
40
+ """
40
41
  **Features:**
41
42
  - 🚀 Manual data scraping from corporate registries
42
43
  - 📊 Real-time scraper logs and job status
43
44
  - 📝 System logs viewer
44
45
  - 📈 Job history and statistics
45
- """)
46
+ """
47
+ )
46
48
 
47
49
  # Create tabs
48
- tabs = st.tabs([
49
- "🚀 Manual Scraping",
50
- "📊 Scraper Logs",
51
- "📝 System Logs",
52
- "📈 Job History"
53
- ])
50
+ tabs = st.tabs(["🚀 Manual Scraping", "📊 Scraper Logs", "📝 System Logs", "📈 Job History"])
54
51
 
55
52
  with tabs[0]:
56
53
  show_manual_scraping()
@@ -69,10 +66,12 @@ def show_manual_scraping():
69
66
  """Manual scraping interface"""
70
67
  st.subheader("🚀 Manual Data Scraping")
71
68
 
72
- st.markdown("""
69
+ st.markdown(
70
+ """
73
71
  Manually trigger data scraping jobs from various sources.
74
72
  Select a source, configure parameters, and run the scraper.
75
- """)
73
+ """
74
+ )
76
75
 
77
76
  # Source selection
78
77
  source_type = st.selectbox(
@@ -85,7 +84,7 @@ def show_manual_scraping():
85
84
  "XBRL US",
86
85
  "Senate Stock Watcher (GitHub)",
87
86
  ],
88
- help="Choose which data source to scrape"
87
+ help="Choose which data source to scrape",
89
88
  )
90
89
 
91
90
  # Source-specific configuration
@@ -108,17 +107,21 @@ def show_uk_companies_house_scraper():
108
107
  st.markdown("### UK Companies House Configuration")
109
108
 
110
109
  # Check API key
111
- api_key = os.getenv("UK_COMPANIES_HOUSE_API_KEY") or st.secrets.get("UK_COMPANIES_HOUSE_API_KEY", "")
110
+ api_key = os.getenv("UK_COMPANIES_HOUSE_API_KEY") or st.secrets.get(
111
+ "UK_COMPANIES_HOUSE_API_KEY", ""
112
+ )
112
113
 
113
114
  if not api_key:
114
115
  st.error("❌ UK Companies House API key not configured")
115
- st.info("""
116
+ st.info(
117
+ """
116
118
  To use this scraper, set `UK_COMPANIES_HOUSE_API_KEY` in:
117
119
  - Streamlit Cloud: Settings → Secrets
118
120
  - Local: .streamlit/secrets.toml or environment variable
119
121
 
120
122
  Get free API key: https://developer.company-information.service.gov.uk/
121
- """)
123
+ """
124
+ )
122
125
  return
123
126
 
124
127
  st.success("✅ API key configured")
@@ -128,16 +131,14 @@ def show_uk_companies_house_scraper():
128
131
 
129
132
  with col1:
130
133
  company_query = st.text_input(
131
- "Company Name",
132
- value="Tesco",
133
- help="Company name to search for"
134
+ "Company Name", value="Tesco", help="Company name to search for"
134
135
  )
135
136
  max_results = st.number_input(
136
137
  "Max Results",
137
138
  min_value=1,
138
139
  max_value=100,
139
140
  value=10,
140
- help="Maximum number of companies to fetch"
141
+ help="Maximum number of companies to fetch",
141
142
  )
142
143
 
143
144
  with col2:
@@ -148,24 +149,18 @@ def show_uk_companies_house_scraper():
148
149
  # Run scraper
149
150
  if st.button("🚀 Run UK Companies House Scraper", type="primary"):
150
151
  run_uk_companies_house_scraper(
151
- company_query,
152
- max_results,
153
- fetch_officers,
154
- fetch_psc,
155
- save_to_db
152
+ company_query, max_results, fetch_officers, fetch_psc, save_to_db
156
153
  )
157
154
 
158
155
 
159
156
  def run_uk_companies_house_scraper(
160
- query: str,
161
- max_results: int,
162
- fetch_officers: bool,
163
- fetch_psc: bool,
164
- save_to_db: bool
157
+ query: str, max_results: int, fetch_officers: bool, fetch_psc: bool, save_to_db: bool
165
158
  ):
166
159
  """Execute UK Companies House scraper"""
167
160
  try:
168
- from mcli.workflow.politician_trading.scrapers_corporate_registry import UKCompaniesHouseScraper
161
+ from mcli.workflow.politician_trading.scrapers_corporate_registry import (
162
+ UKCompaniesHouseScraper,
163
+ )
169
164
 
170
165
  # Create log capture
171
166
  log_stream = StringIO()
@@ -174,7 +169,9 @@ def run_uk_companies_house_scraper(
174
169
  formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
175
170
  handler.setFormatter(formatter)
176
171
 
177
- scraper_logger = logging.getLogger("mcli.workflow.politician_trading.scrapers_corporate_registry")
172
+ scraper_logger = logging.getLogger(
173
+ "mcli.workflow.politician_trading.scrapers_corporate_registry"
174
+ )
178
175
  scraper_logger.addHandler(handler)
179
176
 
180
177
  # Create progress containers
@@ -220,11 +217,7 @@ def run_uk_companies_house_scraper(
220
217
  progress_bar.progress(30 + int((i + 1) / len(companies) * 50))
221
218
 
222
219
  # Display logs
223
- log_container.text_area(
224
- "Scraper Logs",
225
- log_stream.getvalue(),
226
- height=200
227
- )
220
+ log_container.text_area("Scraper Logs", log_stream.getvalue(), height=200)
228
221
 
229
222
  # Display results
230
223
  with results_container:
@@ -237,40 +230,57 @@ def run_uk_companies_house_scraper(
237
230
 
238
231
  # Show companies
239
232
  st.markdown("#### Companies Found")
240
- companies_df = pd.DataFrame([{
241
- "Number": c.get("company_number"),
242
- "Name": c.get("title"),
243
- "Status": c.get("company_status"),
244
- "Type": c.get("company_type"),
245
- "Address": c.get("address_snippet", "")[:50]
246
- } for c in companies])
233
+ companies_df = pd.DataFrame(
234
+ [
235
+ {
236
+ "Number": c.get("company_number"),
237
+ "Name": c.get("title"),
238
+ "Status": c.get("company_status"),
239
+ "Type": c.get("company_type"),
240
+ "Address": c.get("address_snippet", "")[:50],
241
+ }
242
+ for c in companies
243
+ ]
244
+ )
247
245
  st.dataframe(companies_df, use_container_width=True)
248
246
 
249
247
  # Show officers
250
248
  if all_officers:
251
249
  st.markdown("#### Officers Found")
252
- officers_df = pd.DataFrame([{
253
- "Name": o.get("name"),
254
- "Role": o.get("officer_role"),
255
- "Appointed": o.get("appointed_on", ""),
256
- "Nationality": o.get("nationality", ""),
257
- "Occupation": o.get("occupation", "")
258
- } for o in all_officers[:50]]) # Limit to 50 for display
250
+ officers_df = pd.DataFrame(
251
+ [
252
+ {
253
+ "Name": o.get("name"),
254
+ "Role": o.get("officer_role"),
255
+ "Appointed": o.get("appointed_on", ""),
256
+ "Nationality": o.get("nationality", ""),
257
+ "Occupation": o.get("occupation", ""),
258
+ }
259
+ for o in all_officers[:50]
260
+ ]
261
+ ) # Limit to 50 for display
259
262
  st.dataframe(officers_df, use_container_width=True)
260
263
 
261
264
  # Show PSC
262
265
  if all_psc:
263
266
  st.markdown("#### Persons with Significant Control")
264
- psc_df = pd.DataFrame([{
265
- "Name": p.get("name"),
266
- "Kind": p.get("kind", "").replace("-", " ").title(),
267
- "Control": ", ".join(p.get("natures_of_control", [])),
268
- "Nationality": p.get("nationality", ""),
269
- } for p in all_psc[:50]])
267
+ psc_df = pd.DataFrame(
268
+ [
269
+ {
270
+ "Name": p.get("name"),
271
+ "Kind": p.get("kind", "").replace("-", " ").title(),
272
+ "Control": ", ".join(p.get("natures_of_control", [])),
273
+ "Nationality": p.get("nationality", ""),
274
+ }
275
+ for p in all_psc[:50]
276
+ ]
277
+ )
270
278
  st.dataframe(psc_df, use_container_width=True)
271
279
 
272
280
  progress_bar.progress(100)
273
- status_container.success(f"✅ Scraping completed! Found {len(companies)} companies, {len(all_officers)} officers, {len(all_psc)} PSC")
281
+ status_container.success(
282
+ f"✅ Scraping completed! Found {len(companies)} companies, {len(all_officers)} officers, {len(all_psc)} PSC"
283
+ )
274
284
 
275
285
  # Save to database if requested
276
286
  if save_to_db:
@@ -279,6 +289,7 @@ def run_uk_companies_house_scraper(
279
289
  except Exception as e:
280
290
  st.error(f"❌ Error: {e}")
281
291
  import traceback
292
+
282
293
  st.code(traceback.format_exc())
283
294
 
284
295
 
@@ -293,25 +304,18 @@ def show_info_financiere_scraper():
293
304
 
294
305
  with col1:
295
306
  query = st.text_input(
296
- "Search Query (optional)",
297
- value="",
298
- help="Company name, ISIN, or leave blank for all"
307
+ "Search Query (optional)", value="", help="Company name, ISIN, or leave blank for all"
299
308
  )
300
309
  days_back = st.number_input(
301
310
  "Days Back",
302
311
  min_value=1,
303
312
  max_value=365,
304
313
  value=30,
305
- help="How many days of history to fetch"
314
+ help="How many days of history to fetch",
306
315
  )
307
316
 
308
317
  with col2:
309
- max_results = st.number_input(
310
- "Max Results",
311
- min_value=1,
312
- max_value=100,
313
- value=20
314
- )
318
+ max_results = st.number_input("Max Results", min_value=1, max_value=100, value=20)
315
319
  save_to_db = st.checkbox("Save to Database", value=False)
316
320
 
317
321
  # Run scraper
@@ -319,15 +323,12 @@ def show_info_financiere_scraper():
319
323
  run_info_financiere_scraper(query, days_back, max_results, save_to_db)
320
324
 
321
325
 
322
- def run_info_financiere_scraper(
323
- query: str,
324
- days_back: int,
325
- max_results: int,
326
- save_to_db: bool
327
- ):
326
+ def run_info_financiere_scraper(query: str, days_back: int, max_results: int, save_to_db: bool):
328
327
  """Execute Info-Financière scraper"""
329
328
  try:
330
- from mcli.workflow.politician_trading.scrapers_corporate_registry import InfoFinanciereAPIScraper
329
+ from mcli.workflow.politician_trading.scrapers_corporate_registry import (
330
+ InfoFinanciereAPIScraper,
331
+ )
331
332
 
332
333
  status_container = st.empty()
333
334
  progress_bar = st.progress(0)
@@ -345,10 +346,7 @@ def run_info_financiere_scraper(
345
346
  # Search publications
346
347
  status_container.info(f"🔍 Searching publications ({from_date} to {to_date})...")
347
348
  publications = scraper.search_publications(
348
- query=query or None,
349
- from_date=from_date,
350
- to_date=to_date,
351
- per_page=max_results
349
+ query=query or None, from_date=from_date, to_date=to_date, per_page=max_results
352
350
  )
353
351
  progress_bar.progress(80)
354
352
 
@@ -363,12 +361,17 @@ def run_info_financiere_scraper(
363
361
  st.metric("Publications Found", len(publications))
364
362
 
365
363
  # Show publications
366
- pubs_df = pd.DataFrame([{
367
- "Date": p.get("publication_date", ""),
368
- "Title": p.get("title", "")[:100],
369
- "Type": p.get("publication_type", ""),
370
- "Issuer": p.get("issuer_name", "")
371
- } for p in publications])
364
+ pubs_df = pd.DataFrame(
365
+ [
366
+ {
367
+ "Date": p.get("publication_date", ""),
368
+ "Title": p.get("title", "")[:100],
369
+ "Type": p.get("publication_type", ""),
370
+ "Issuer": p.get("issuer_name", ""),
371
+ }
372
+ for p in publications
373
+ ]
374
+ )
372
375
  st.dataframe(pubs_df, use_container_width=True)
373
376
 
374
377
  progress_bar.progress(100)
@@ -380,6 +383,7 @@ def run_info_financiere_scraper(
380
383
  except Exception as e:
381
384
  st.error(f"❌ Error: {e}")
382
385
  import traceback
386
+
383
387
  st.code(traceback.format_exc())
384
388
 
385
389
 
@@ -392,30 +396,23 @@ def show_opencorporates_scraper():
392
396
  if api_key:
393
397
  st.success("✅ API key configured")
394
398
  else:
395
- st.info("ℹ️ No API key (free tier with rate limits). Get API key for better performance: https://opencorporates.com/api_accounts/new")
399
+ st.info(
400
+ "ℹ️ No API key (free tier with rate limits). Get API key for better performance: https://opencorporates.com/api_accounts/new"
401
+ )
396
402
 
397
403
  # Configuration
398
404
  col1, col2 = st.columns(2)
399
405
 
400
406
  with col1:
401
- query = st.text_input(
402
- "Company Name",
403
- value="Apple",
404
- help="Company name to search for"
405
- )
407
+ query = st.text_input("Company Name", value="Apple", help="Company name to search for")
406
408
  jurisdiction = st.selectbox(
407
409
  "Jurisdiction (optional)",
408
410
  ["", "us_ca", "us_de", "us_ny", "gb", "de", "fr", "nl"],
409
- help="Filter by jurisdiction code"
411
+ help="Filter by jurisdiction code",
410
412
  )
411
413
 
412
414
  with col2:
413
- max_results = st.number_input(
414
- "Max Results",
415
- min_value=1,
416
- max_value=100,
417
- value=10
418
- )
415
+ max_results = st.number_input("Max Results", min_value=1, max_value=100, value=10)
419
416
  save_to_db = st.checkbox("Save to Database", value=False)
420
417
 
421
418
  # Run scraper
@@ -423,15 +420,12 @@ def show_opencorporates_scraper():
423
420
  run_opencorporates_scraper(query, jurisdiction or None, max_results, save_to_db)
424
421
 
425
422
 
426
- def run_opencorporates_scraper(
427
- query: str,
428
- jurisdiction: str,
429
- max_results: int,
430
- save_to_db: bool
431
- ):
423
+ def run_opencorporates_scraper(query: str, jurisdiction: str, max_results: int, save_to_db: bool):
432
424
  """Execute OpenCorporates scraper"""
433
425
  try:
434
- from mcli.workflow.politician_trading.scrapers_corporate_registry import OpenCorporatesScraper
426
+ from mcli.workflow.politician_trading.scrapers_corporate_registry import (
427
+ OpenCorporatesScraper,
428
+ )
435
429
 
436
430
  status_container = st.empty()
437
431
  progress_bar = st.progress(0)
@@ -445,9 +439,7 @@ def run_opencorporates_scraper(
445
439
  # Search companies
446
440
  status_container.info(f"🔍 Searching for '{query}'...")
447
441
  companies = scraper.search_companies(
448
- query,
449
- jurisdiction_code=jurisdiction,
450
- per_page=max_results
442
+ query, jurisdiction_code=jurisdiction, per_page=max_results
451
443
  )
452
444
  progress_bar.progress(80)
453
445
 
@@ -462,13 +454,18 @@ def run_opencorporates_scraper(
462
454
  st.metric("Companies Found", len(companies))
463
455
 
464
456
  # Show companies
465
- companies_df = pd.DataFrame([{
466
- "Jurisdiction": c.get("company", {}).get("jurisdiction_code", ""),
467
- "Number": c.get("company", {}).get("company_number", ""),
468
- "Name": c.get("company", {}).get("name", ""),
469
- "Status": c.get("company", {}).get("current_status", ""),
470
- "Type": c.get("company", {}).get("company_type", "")
471
- } for c in companies])
457
+ companies_df = pd.DataFrame(
458
+ [
459
+ {
460
+ "Jurisdiction": c.get("company", {}).get("jurisdiction_code", ""),
461
+ "Number": c.get("company", {}).get("company_number", ""),
462
+ "Name": c.get("company", {}).get("name", ""),
463
+ "Status": c.get("company", {}).get("current_status", ""),
464
+ "Type": c.get("company", {}).get("company_type", ""),
465
+ }
466
+ for c in companies
467
+ ]
468
+ )
472
469
  st.dataframe(companies_df, use_container_width=True)
473
470
 
474
471
  progress_bar.progress(100)
@@ -477,6 +474,7 @@ def run_opencorporates_scraper(
477
474
  except Exception as e:
478
475
  st.error(f"❌ Error: {e}")
479
476
  import traceback
477
+
480
478
  st.code(traceback.format_exc())
481
479
 
482
480
 
@@ -493,22 +491,12 @@ def show_xbrl_filings_scraper():
493
491
  country = st.selectbox(
494
492
  "Country (optional)",
495
493
  ["", "GB", "FR", "DE", "ES", "IT", "NL", "BE"],
496
- help="Filter by country code"
497
- )
498
- days_back = st.number_input(
499
- "Days Back",
500
- min_value=1,
501
- max_value=365,
502
- value=30
494
+ help="Filter by country code",
503
495
  )
496
+ days_back = st.number_input("Days Back", min_value=1, max_value=365, value=30)
504
497
 
505
498
  with col2:
506
- max_results = st.number_input(
507
- "Max Results",
508
- min_value=1,
509
- max_value=500,
510
- value=100
511
- )
499
+ max_results = st.number_input("Max Results", min_value=1, max_value=500, value=100)
512
500
  save_to_db = st.checkbox("Save to Database", value=False)
513
501
 
514
502
  # Run scraper
@@ -516,12 +504,7 @@ def show_xbrl_filings_scraper():
516
504
  run_xbrl_filings_scraper(country or None, days_back, max_results, save_to_db)
517
505
 
518
506
 
519
- def run_xbrl_filings_scraper(
520
- country: str,
521
- days_back: int,
522
- max_results: int,
523
- save_to_db: bool
524
- ):
507
+ def run_xbrl_filings_scraper(country: str, days_back: int, max_results: int, save_to_db: bool):
525
508
  """Execute XBRL Filings scraper"""
526
509
  try:
527
510
  from mcli.workflow.politician_trading.scrapers_corporate_registry import XBRLFilingsScraper
@@ -540,11 +523,7 @@ def run_xbrl_filings_scraper(
540
523
 
541
524
  # Get filings
542
525
  status_container.info(f"🔍 Fetching XBRL filings since {from_date}...")
543
- filings = scraper.get_filings(
544
- country=country,
545
- from_date=from_date,
546
- page_size=max_results
547
- )
526
+ filings = scraper.get_filings(country=country, from_date=from_date, page_size=max_results)
548
527
  progress_bar.progress(80)
549
528
 
550
529
  # Display results
@@ -558,13 +537,18 @@ def run_xbrl_filings_scraper(
558
537
  st.metric("Filings Found", len(filings))
559
538
 
560
539
  # Show filings
561
- filings_df = pd.DataFrame([{
562
- "ID": f.get("id", ""),
563
- "Country": f.get("attributes", {}).get("country", ""),
564
- "Entity": f.get("attributes", {}).get("entity_name", "")[:50],
565
- "Period": f.get("attributes", {}).get("period_end", ""),
566
- "Date Added": f.get("attributes", {}).get("date_added", "")
567
- } for f in filings])
540
+ filings_df = pd.DataFrame(
541
+ [
542
+ {
543
+ "ID": f.get("id", ""),
544
+ "Country": f.get("attributes", {}).get("country", ""),
545
+ "Entity": f.get("attributes", {}).get("entity_name", "")[:50],
546
+ "Period": f.get("attributes", {}).get("period_end", ""),
547
+ "Date Added": f.get("attributes", {}).get("date_added", ""),
548
+ }
549
+ for f in filings
550
+ ]
551
+ )
568
552
  st.dataframe(filings_df, use_container_width=True)
569
553
 
570
554
  progress_bar.progress(100)
@@ -573,6 +557,7 @@ def run_xbrl_filings_scraper(
573
557
  except Exception as e:
574
558
  st.error(f"❌ Error: {e}")
575
559
  import traceback
560
+
576
561
  st.code(traceback.format_exc())
577
562
 
578
563
 
@@ -584,13 +569,15 @@ def show_xbrl_us_scraper():
584
569
 
585
570
  if not api_key:
586
571
  st.error("❌ XBRL US API key not configured")
587
- st.info("""
572
+ st.info(
573
+ """
588
574
  To use this scraper, set `XBRL_US_API_KEY` in:
589
575
  - Streamlit Cloud: Settings → Secrets
590
576
  - Local: .streamlit/secrets.toml or environment variable
591
577
 
592
578
  Get free API key: https://xbrl.us/home/use/xbrl-api/
593
- """)
579
+ """
580
+ )
594
581
  return
595
582
 
596
583
  st.success("✅ API key configured")
@@ -600,18 +587,11 @@ def show_xbrl_us_scraper():
600
587
 
601
588
  with col1:
602
589
  query = st.text_input(
603
- "Company Name or Ticker",
604
- value="Tesla",
605
- help="Search by company name or stock ticker"
590
+ "Company Name or Ticker", value="Tesla", help="Search by company name or stock ticker"
606
591
  )
607
592
 
608
593
  with col2:
609
- max_results = st.number_input(
610
- "Max Results",
611
- min_value=1,
612
- max_value=100,
613
- value=10
614
- )
594
+ max_results = st.number_input("Max Results", min_value=1, max_value=100, value=10)
615
595
  save_to_db = st.checkbox("Save to Database", value=False)
616
596
 
617
597
  # Run scraper
@@ -619,11 +599,7 @@ def show_xbrl_us_scraper():
619
599
  run_xbrl_us_scraper(query, max_results, save_to_db)
620
600
 
621
601
 
622
- def run_xbrl_us_scraper(
623
- query: str,
624
- max_results: int,
625
- save_to_db: bool
626
- ):
602
+ def run_xbrl_us_scraper(query: str, max_results: int, save_to_db: bool):
627
603
  """Execute XBRL US scraper"""
628
604
  try:
629
605
  from mcli.workflow.politician_trading.scrapers_corporate_registry import XBRLUSScraper
@@ -653,12 +629,17 @@ def run_xbrl_us_scraper(
653
629
  st.metric("Entities Found", len(entities))
654
630
 
655
631
  # Show entities
656
- entities_df = pd.DataFrame([{
657
- "ID": e.get("entity", {}).get("id", ""),
658
- "Name": e.get("entity", {}).get("name", ""),
659
- "CIK": e.get("entity", {}).get("cik", ""),
660
- "Ticker": e.get("entity", {}).get("ticker", "")
661
- } for e in entities])
632
+ entities_df = pd.DataFrame(
633
+ [
634
+ {
635
+ "ID": e.get("entity", {}).get("id", ""),
636
+ "Name": e.get("entity", {}).get("name", ""),
637
+ "CIK": e.get("entity", {}).get("cik", ""),
638
+ "Ticker": e.get("entity", {}).get("ticker", ""),
639
+ }
640
+ for e in entities
641
+ ]
642
+ )
662
643
  st.dataframe(entities_df, use_container_width=True)
663
644
 
664
645
  progress_bar.progress(100)
@@ -667,6 +648,7 @@ def run_xbrl_us_scraper(
667
648
  except Exception as e:
668
649
  st.error(f"❌ Error: {e}")
669
650
  import traceback
651
+
670
652
  st.code(traceback.format_exc())
671
653
 
672
654
 
@@ -682,11 +664,7 @@ def show_senate_watcher_scraper():
682
664
  with col1:
683
665
  recent_only = st.checkbox("Recent Only", value=True)
684
666
  days_back = st.number_input(
685
- "Days Back (if recent)",
686
- min_value=1,
687
- max_value=365,
688
- value=90,
689
- disabled=not recent_only
667
+ "Days Back (if recent)", min_value=1, max_value=365, value=90, disabled=not recent_only
690
668
  )
691
669
 
692
670
  with col2:
@@ -697,11 +675,7 @@ def show_senate_watcher_scraper():
697
675
  run_senate_watcher_scraper(recent_only, days_back, save_to_db)
698
676
 
699
677
 
700
- def run_senate_watcher_scraper(
701
- recent_only: bool,
702
- days_back: int,
703
- save_to_db: bool
704
- ):
678
+ def run_senate_watcher_scraper(recent_only: bool, days_back: int, save_to_db: bool):
705
679
  """Execute Senate Stock Watcher scraper"""
706
680
  try:
707
681
  from mcli.workflow.politician_trading.scrapers_free_sources import FreeDataFetcher
@@ -734,19 +708,30 @@ def run_senate_watcher_scraper(
734
708
  # Show disclosures
735
709
  if disclosures:
736
710
  st.markdown("#### Recent Trading Disclosures")
737
- disc_df = pd.DataFrame([{
738
- "Date": d.transaction_date.strftime("%Y-%m-%d") if hasattr(d.transaction_date, 'strftime') else str(d.transaction_date),
739
- "Ticker": d.asset_ticker or "—",
740
- "Asset": d.asset_name[:50],
741
- "Type": d.transaction_type,
742
- "Politician": d.politician_bioguide_id,
743
- "Min": f"${d.amount_range_min:,.0f}" if d.amount_range_min else "",
744
- "Max": f"${d.amount_range_max:,.0f}" if d.amount_range_max else ""
745
- } for d in disclosures[:100]]) # Limit to 100 for display
711
+ disc_df = pd.DataFrame(
712
+ [
713
+ {
714
+ "Date": (
715
+ d.transaction_date.strftime("%Y-%m-%d")
716
+ if hasattr(d.transaction_date, "strftime")
717
+ else str(d.transaction_date)
718
+ ),
719
+ "Ticker": d.asset_ticker or "—",
720
+ "Asset": d.asset_name[:50],
721
+ "Type": d.transaction_type,
722
+ "Politician": d.politician_bioguide_id,
723
+ "Min": f"${d.amount_range_min:,.0f}" if d.amount_range_min else "",
724
+ "Max": f"${d.amount_range_max:,.0f}" if d.amount_range_max else "",
725
+ }
726
+ for d in disclosures[:100]
727
+ ]
728
+ ) # Limit to 100 for display
746
729
  st.dataframe(disc_df, use_container_width=True)
747
730
 
748
731
  progress_bar.progress(100)
749
- status_container.success(f"✅ Scraping completed! Found {len(politicians)} politicians, {len(disclosures)} disclosures")
732
+ status_container.success(
733
+ f"✅ Scraping completed! Found {len(politicians)} politicians, {len(disclosures)} disclosures"
734
+ )
750
735
 
751
736
  if save_to_db:
752
737
  save_politician_trading_to_db(politicians, disclosures)
@@ -754,6 +739,7 @@ def run_senate_watcher_scraper(
754
739
  except Exception as e:
755
740
  st.error(f"❌ Error: {e}")
756
741
  import traceback
742
+
757
743
  st.code(traceback.format_exc())
758
744
 
759
745
 
@@ -779,9 +765,11 @@ def show_scraper_logs():
779
765
  """Display scraper logs"""
780
766
  st.subheader("📊 Scraper Logs")
781
767
 
782
- st.markdown("""
768
+ st.markdown(
769
+ """
783
770
  View real-time logs from scraping operations and data pull jobs.
784
- """)
771
+ """
772
+ )
785
773
 
786
774
  # Get logs from Supabase data_pull_jobs
787
775
  try:
@@ -791,7 +779,13 @@ def show_scraper_logs():
791
779
 
792
780
  if client:
793
781
  # Get recent jobs
794
- jobs = client.table("data_pull_jobs").select("*").order("created_at", desc=True).limit(50).execute()
782
+ jobs = (
783
+ client.table("data_pull_jobs")
784
+ .select("*")
785
+ .order("created_at", desc=True)
786
+ .limit(50)
787
+ .execute()
788
+ )
795
789
 
796
790
  if jobs.data:
797
791
  st.markdown("### Recent Data Pull Jobs")
@@ -799,19 +793,33 @@ def show_scraper_logs():
799
793
  jobs_df = pd.DataFrame(jobs.data)
800
794
 
801
795
  # Format dates
802
- for col in ['started_at', 'completed_at', 'created_at']:
796
+ for col in ["started_at", "completed_at", "created_at"]:
803
797
  if col in jobs_df.columns:
804
- jobs_df[col] = pd.to_datetime(jobs_df[col], format='ISO8601', errors='coerce')
798
+ jobs_df[col] = pd.to_datetime(
799
+ jobs_df[col], format="ISO8601", errors="coerce"
800
+ )
805
801
 
806
802
  # Display jobs table
807
- display_df = jobs_df[[
808
- 'created_at', 'job_type', 'status', 'records_found',
809
- 'records_new', 'records_updated', 'records_failed'
810
- ]].copy()
803
+ display_df = jobs_df[
804
+ [
805
+ "created_at",
806
+ "job_type",
807
+ "status",
808
+ "records_found",
809
+ "records_new",
810
+ "records_updated",
811
+ "records_failed",
812
+ ]
813
+ ].copy()
811
814
 
812
815
  display_df.columns = [
813
- 'Timestamp', 'Job Type', 'Status', 'Found',
814
- 'New', 'Updated', 'Failed'
816
+ "Timestamp",
817
+ "Job Type",
818
+ "Status",
819
+ "Found",
820
+ "New",
821
+ "Updated",
822
+ "Failed",
815
823
  ]
816
824
 
817
825
  st.dataframe(display_df, use_container_width=True)
@@ -821,26 +829,26 @@ def show_scraper_logs():
821
829
 
822
830
  selected_job = st.selectbox(
823
831
  "Select Job",
824
- jobs_df['id'].tolist(),
825
- format_func=lambda x: f"{jobs_df[jobs_df['id']==x]['job_type'].values[0]} - {jobs_df[jobs_df['id']==x]['created_at'].values[0]}"
832
+ jobs_df["id"].tolist(),
833
+ format_func=lambda x: f"{jobs_df[jobs_df['id']==x]['job_type'].values[0]} - {jobs_df[jobs_df['id']==x]['created_at'].values[0]}",
826
834
  )
827
835
 
828
836
  if selected_job:
829
- job = jobs_df[jobs_df['id'] == selected_job].iloc[0]
837
+ job = jobs_df[jobs_df["id"] == selected_job].iloc[0]
830
838
 
831
839
  col1, col2, col3, col4 = st.columns(4)
832
- col1.metric("Status", job['status'])
833
- col2.metric("Records Found", job['records_found'])
834
- col3.metric("New Records", job['records_new'])
835
- col4.metric("Failed", job['records_failed'])
840
+ col1.metric("Status", job["status"])
841
+ col2.metric("Records Found", job["records_found"])
842
+ col3.metric("New Records", job["records_new"])
843
+ col4.metric("Failed", job["records_failed"])
836
844
 
837
- if job.get('error_message'):
845
+ if job.get("error_message"):
838
846
  st.error(f"**Error:** {job['error_message']}")
839
847
 
840
848
  # Show config snapshot
841
- if job.get('config_snapshot'):
849
+ if job.get("config_snapshot"):
842
850
  with st.expander("Configuration Snapshot"):
843
- st.json(job['config_snapshot'])
851
+ st.json(job["config_snapshot"])
844
852
 
845
853
  else:
846
854
  st.info("No jobs found in database")
@@ -856,33 +864,29 @@ def show_system_logs():
856
864
  """Display system logs"""
857
865
  st.subheader("📝 System Logs")
858
866
 
859
- st.markdown("""
867
+ st.markdown(
868
+ """
860
869
  View application logs, errors, and system events.
861
- """)
870
+ """
871
+ )
862
872
 
863
873
  # Log file path
864
874
  log_file = Path("/tmp/seed_database.log")
865
875
 
866
876
  if log_file.exists():
867
877
  try:
868
- with open(log_file, 'r') as f:
878
+ with open(log_file, "r") as f:
869
879
  logs = f.readlines()
870
880
 
871
881
  # Filter options
872
882
  col1, col2, col3 = st.columns(3)
873
883
 
874
884
  with col1:
875
- log_level = st.selectbox(
876
- "Log Level",
877
- ["ALL", "ERROR", "WARNING", "INFO", "DEBUG"]
878
- )
885
+ log_level = st.selectbox("Log Level", ["ALL", "ERROR", "WARNING", "INFO", "DEBUG"])
879
886
 
880
887
  with col2:
881
888
  lines_to_show = st.number_input(
882
- "Lines to Show",
883
- min_value=10,
884
- max_value=1000,
885
- value=100
889
+ "Lines to Show", min_value=10, max_value=1000, value=100
886
890
  )
887
891
 
888
892
  with col3:
@@ -898,51 +902,54 @@ def show_system_logs():
898
902
  filtered_logs = [l for l in filtered_logs if search_term.lower() in l.lower()]
899
903
 
900
904
  # Display logs
901
- st.text_area(
902
- "Log Output",
903
- "".join(filtered_logs),
904
- height=400
905
- )
905
+ st.text_area("Log Output", "".join(filtered_logs), height=400)
906
906
 
907
907
  # Download button
908
908
  st.download_button(
909
909
  "Download Full Logs",
910
910
  "".join(logs),
911
911
  file_name=f"system_logs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
912
- mime="text/plain"
912
+ mime="text/plain",
913
913
  )
914
914
 
915
915
  except Exception as e:
916
916
  st.error(f"Error reading log file: {e}")
917
917
  else:
918
918
  st.info("📋 **No logs available yet**")
919
- st.markdown("""
919
+ st.markdown(
920
+ """
920
921
  System logs will appear here automatically after scraping jobs run.
921
922
 
922
923
  **To generate logs:**
923
924
  - Use the "Manual Scrapers" section above to run a data pull
924
925
  - Wait for automated jobs to execute
925
926
  - Logs will be stored in: `/tmp/seed_database.log`
926
- """)
927
+ """
928
+ )
927
929
 
928
930
  # Create example logs display
929
931
  st.markdown("### 📝 Example Log Output")
930
- st.code("""
932
+ st.code(
933
+ """
931
934
  2025-10-07 12:00:00 - INFO - Starting data pull job: senate_watcher_seed
932
935
  2025-10-07 12:00:05 - INFO - Fetched 8350 Senate transactions
933
936
  2025-10-07 12:00:10 - INFO - Upserted 89 politicians (5 new, 84 updated)
934
937
  2025-10-07 12:01:30 - INFO - Upserted 8350 disclosures (6353 new, 1893 updated, 104 failed)
935
938
  2025-10-07 12:01:31 - INFO - Job completed successfully
936
- """, language="log")
939
+ """,
940
+ language="log",
941
+ )
937
942
 
938
943
 
939
944
  def show_job_history():
940
945
  """Display job history and statistics"""
941
946
  st.subheader("📈 Job History & Statistics")
942
947
 
943
- st.markdown("""
948
+ st.markdown(
949
+ """
944
950
  View historical data about scraping jobs, success rates, and trends.
945
- """)
951
+ """
952
+ )
946
953
 
947
954
  try:
948
955
  from mcli.ml.dashboard.app_integrated import get_supabase_client
@@ -951,15 +958,23 @@ def show_job_history():
951
958
 
952
959
  if client:
953
960
  # Get all jobs
954
- jobs = client.table("data_pull_jobs").select("*").order("created_at", desc=True).limit(1000).execute()
961
+ jobs = (
962
+ client.table("data_pull_jobs")
963
+ .select("*")
964
+ .order("created_at", desc=True)
965
+ .limit(1000)
966
+ .execute()
967
+ )
955
968
 
956
969
  if jobs.data and len(jobs.data) > 0:
957
970
  jobs_df = pd.DataFrame(jobs.data)
958
971
 
959
972
  # Format dates
960
- for col in ['started_at', 'completed_at', 'created_at']:
973
+ for col in ["started_at", "completed_at", "created_at"]:
961
974
  if col in jobs_df.columns:
962
- jobs_df[col] = pd.to_datetime(jobs_df[col], format='ISO8601', errors='coerce')
975
+ jobs_df[col] = pd.to_datetime(
976
+ jobs_df[col], format="ISO8601", errors="coerce"
977
+ )
963
978
 
964
979
  # Statistics
965
980
  st.markdown("### Overall Statistics")
@@ -967,8 +982,8 @@ def show_job_history():
967
982
  col1, col2, col3, col4 = st.columns(4)
968
983
 
969
984
  total_jobs = len(jobs_df)
970
- completed_jobs = len(jobs_df[jobs_df['status'] == 'completed'])
971
- failed_jobs = len(jobs_df[jobs_df['status'] == 'failed'])
985
+ completed_jobs = len(jobs_df[jobs_df["status"] == "completed"])
986
+ failed_jobs = len(jobs_df[jobs_df["status"] == "failed"])
972
987
  success_rate = (completed_jobs / total_jobs * 100) if total_jobs > 0 else 0
973
988
 
974
989
  col1.metric("Total Jobs", total_jobs)
@@ -979,84 +994,94 @@ def show_job_history():
979
994
  # Job type breakdown
980
995
  st.markdown("### Job Type Breakdown")
981
996
 
982
- job_type_counts = jobs_df['job_type'].value_counts()
997
+ job_type_counts = jobs_df["job_type"].value_counts()
983
998
 
984
999
  fig = px.pie(
985
- values=job_type_counts.values,
986
- names=job_type_counts.index,
987
- title="Jobs by Type"
1000
+ values=job_type_counts.values, names=job_type_counts.index, title="Jobs by Type"
988
1001
  )
989
1002
  st.plotly_chart(fig, config={"displayModeBar": True}, use_container_width=True)
990
1003
 
991
1004
  # Status breakdown
992
1005
  st.markdown("### Status Breakdown")
993
1006
 
994
- status_counts = jobs_df['status'].value_counts()
1007
+ status_counts = jobs_df["status"].value_counts()
995
1008
 
996
1009
  fig = px.bar(
997
1010
  x=status_counts.index,
998
1011
  y=status_counts.values,
999
- labels={'x': 'Status', 'y': 'Count'},
1000
- title="Jobs by Status"
1012
+ labels={"x": "Status", "y": "Count"},
1013
+ title="Jobs by Status",
1001
1014
  )
1002
1015
  st.plotly_chart(fig, config={"displayModeBar": True}, use_container_width=True)
1003
1016
 
1004
1017
  # Timeline
1005
1018
  st.markdown("### Job Timeline")
1006
1019
 
1007
- jobs_df['date'] = jobs_df['created_at'].dt.date
1020
+ jobs_df["date"] = jobs_df["created_at"].dt.date
1008
1021
 
1009
- timeline_df = jobs_df.groupby(['date', 'status']).size().reset_index(name='count')
1022
+ timeline_df = jobs_df.groupby(["date", "status"]).size().reset_index(name="count")
1010
1023
 
1011
1024
  fig = px.line(
1012
- timeline_df,
1013
- x='date',
1014
- y='count',
1015
- color='status',
1016
- title="Jobs Over Time"
1025
+ timeline_df, x="date", y="count", color="status", title="Jobs Over Time"
1017
1026
  )
1018
1027
  st.plotly_chart(fig, config={"displayModeBar": True}, use_container_width=True)
1019
1028
 
1020
1029
  # Records processed
1021
1030
  st.markdown("### Records Processed")
1022
1031
 
1023
- records_df = jobs_df[jobs_df['status'] == 'completed'][['created_at', 'records_found', 'records_new', 'records_updated', 'records_failed']].copy()
1032
+ records_df = jobs_df[jobs_df["status"] == "completed"][
1033
+ [
1034
+ "created_at",
1035
+ "records_found",
1036
+ "records_new",
1037
+ "records_updated",
1038
+ "records_failed",
1039
+ ]
1040
+ ].copy()
1024
1041
 
1025
1042
  if not records_df.empty:
1026
1043
  fig = go.Figure()
1027
1044
 
1028
- fig.add_trace(go.Scatter(
1029
- x=records_df['created_at'],
1030
- y=records_df['records_new'],
1031
- name='New Records',
1032
- mode='lines+markers'
1033
- ))
1034
-
1035
- fig.add_trace(go.Scatter(
1036
- x=records_df['created_at'],
1037
- y=records_df['records_updated'],
1038
- name='Updated Records',
1039
- mode='lines+markers'
1040
- ))
1041
-
1042
- fig.add_trace(go.Scatter(
1043
- x=records_df['created_at'],
1044
- y=records_df['records_failed'],
1045
- name='Failed Records',
1046
- mode='lines+markers'
1047
- ))
1045
+ fig.add_trace(
1046
+ go.Scatter(
1047
+ x=records_df["created_at"],
1048
+ y=records_df["records_new"],
1049
+ name="New Records",
1050
+ mode="lines+markers",
1051
+ )
1052
+ )
1053
+
1054
+ fig.add_trace(
1055
+ go.Scatter(
1056
+ x=records_df["created_at"],
1057
+ y=records_df["records_updated"],
1058
+ name="Updated Records",
1059
+ mode="lines+markers",
1060
+ )
1061
+ )
1062
+
1063
+ fig.add_trace(
1064
+ go.Scatter(
1065
+ x=records_df["created_at"],
1066
+ y=records_df["records_failed"],
1067
+ name="Failed Records",
1068
+ mode="lines+markers",
1069
+ )
1070
+ )
1048
1071
 
1049
1072
  fig.update_layout(
1050
1073
  title="Records Processed Over Time",
1051
1074
  xaxis_title="Date",
1052
1075
  yaxis_title="Count",
1053
- hovermode='x unified'
1076
+ hovermode="x unified",
1054
1077
  )
1055
1078
 
1056
1079
  st.plotly_chart(fig, config={"displayModeBar": True}, use_container_width=True)
1057
1080
 
1058
1081
  else:
1059
- st.info("No job history available yet. Run some scraping jobs to see statistics here.")
1082
+ st.info(
1083
+ "No job history available yet. Run some scraping jobs to see statistics here."
1084
+ )
1060
1085
 
1061
1086
  else:
1062
1087
  st.warning("Supabase not connected - job history unavailable")
@@ -1064,6 +1089,7 @@ def show_job_history():
1064
1089
  except Exception as e:
1065
1090
  st.error(f"Error loading job history: {e}")
1066
1091
  import traceback
1092
+
1067
1093
  st.code(traceback.format_exc())
1068
1094
 
1069
1095