mcli-framework 7.1.3__py3-none-any.whl → 7.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (114) hide show
  1. mcli/__init__.py +160 -0
  2. mcli/__main__.py +14 -0
  3. mcli/app/__init__.py +23 -0
  4. mcli/app/main.py +10 -0
  5. mcli/app/model/__init__.py +0 -0
  6. mcli/app/video/__init__.py +5 -0
  7. mcli/chat/__init__.py +34 -0
  8. mcli/lib/__init__.py +0 -0
  9. mcli/lib/api/__init__.py +0 -0
  10. mcli/lib/auth/__init__.py +1 -0
  11. mcli/lib/config/__init__.py +1 -0
  12. mcli/lib/custom_commands.py +424 -0
  13. mcli/lib/erd/__init__.py +25 -0
  14. mcli/lib/files/__init__.py +0 -0
  15. mcli/lib/fs/__init__.py +1 -0
  16. mcli/lib/logger/__init__.py +3 -0
  17. mcli/lib/paths.py +12 -0
  18. mcli/lib/performance/__init__.py +17 -0
  19. mcli/lib/pickles/__init__.py +1 -0
  20. mcli/lib/shell/__init__.py +0 -0
  21. mcli/lib/toml/__init__.py +1 -0
  22. mcli/lib/watcher/__init__.py +0 -0
  23. mcli/ml/__init__.py +16 -0
  24. mcli/ml/api/__init__.py +30 -0
  25. mcli/ml/api/routers/__init__.py +27 -0
  26. mcli/ml/api/schemas.py +2 -2
  27. mcli/ml/auth/__init__.py +45 -0
  28. mcli/ml/auth/models.py +2 -2
  29. mcli/ml/backtesting/__init__.py +39 -0
  30. mcli/ml/cli/__init__.py +5 -0
  31. mcli/ml/cli/main.py +1 -1
  32. mcli/ml/config/__init__.py +33 -0
  33. mcli/ml/configs/__init__.py +16 -0
  34. mcli/ml/dashboard/__init__.py +12 -0
  35. mcli/ml/dashboard/app.py +13 -13
  36. mcli/ml/dashboard/app_integrated.py +1309 -148
  37. mcli/ml/dashboard/app_supabase.py +46 -21
  38. mcli/ml/dashboard/app_training.py +14 -14
  39. mcli/ml/dashboard/components/__init__.py +7 -0
  40. mcli/ml/dashboard/components/charts.py +258 -0
  41. mcli/ml/dashboard/components/metrics.py +125 -0
  42. mcli/ml/dashboard/components/tables.py +228 -0
  43. mcli/ml/dashboard/pages/__init__.py +6 -0
  44. mcli/ml/dashboard/pages/cicd.py +382 -0
  45. mcli/ml/dashboard/pages/predictions_enhanced.py +834 -0
  46. mcli/ml/dashboard/pages/scrapers_and_logs.py +1060 -0
  47. mcli/ml/dashboard/pages/test_portfolio.py +373 -0
  48. mcli/ml/dashboard/pages/trading.py +714 -0
  49. mcli/ml/dashboard/pages/workflows.py +533 -0
  50. mcli/ml/dashboard/utils.py +154 -0
  51. mcli/ml/data_ingestion/__init__.py +39 -0
  52. mcli/ml/database/__init__.py +47 -0
  53. mcli/ml/experimentation/__init__.py +29 -0
  54. mcli/ml/features/__init__.py +39 -0
  55. mcli/ml/mlops/__init__.py +33 -0
  56. mcli/ml/models/__init__.py +94 -0
  57. mcli/ml/monitoring/__init__.py +25 -0
  58. mcli/ml/optimization/__init__.py +27 -0
  59. mcli/ml/predictions/__init__.py +5 -0
  60. mcli/ml/preprocessing/__init__.py +28 -0
  61. mcli/ml/scripts/__init__.py +1 -0
  62. mcli/ml/trading/__init__.py +60 -0
  63. mcli/ml/trading/alpaca_client.py +353 -0
  64. mcli/ml/trading/migrations.py +164 -0
  65. mcli/ml/trading/models.py +418 -0
  66. mcli/ml/trading/paper_trading.py +326 -0
  67. mcli/ml/trading/risk_management.py +370 -0
  68. mcli/ml/trading/trading_service.py +480 -0
  69. mcli/ml/training/__init__.py +10 -0
  70. mcli/ml/training/train_model.py +569 -0
  71. mcli/mygroup/__init__.py +3 -0
  72. mcli/public/__init__.py +1 -0
  73. mcli/public/commands/__init__.py +2 -0
  74. mcli/self/__init__.py +3 -0
  75. mcli/self/self_cmd.py +579 -91
  76. mcli/workflow/__init__.py +0 -0
  77. mcli/workflow/daemon/__init__.py +15 -0
  78. mcli/workflow/daemon/daemon.py +21 -3
  79. mcli/workflow/dashboard/__init__.py +5 -0
  80. mcli/workflow/docker/__init__.py +0 -0
  81. mcli/workflow/file/__init__.py +0 -0
  82. mcli/workflow/gcloud/__init__.py +1 -0
  83. mcli/workflow/git_commit/__init__.py +0 -0
  84. mcli/workflow/interview/__init__.py +0 -0
  85. mcli/workflow/politician_trading/__init__.py +4 -0
  86. mcli/workflow/politician_trading/data_sources.py +259 -1
  87. mcli/workflow/politician_trading/models.py +159 -1
  88. mcli/workflow/politician_trading/scrapers_corporate_registry.py +846 -0
  89. mcli/workflow/politician_trading/scrapers_free_sources.py +516 -0
  90. mcli/workflow/politician_trading/scrapers_third_party.py +391 -0
  91. mcli/workflow/politician_trading/seed_database.py +539 -0
  92. mcli/workflow/registry/__init__.py +0 -0
  93. mcli/workflow/repo/__init__.py +0 -0
  94. mcli/workflow/scheduler/__init__.py +25 -0
  95. mcli/workflow/search/__init__.py +0 -0
  96. mcli/workflow/sync/__init__.py +5 -0
  97. mcli/workflow/videos/__init__.py +1 -0
  98. mcli/workflow/wakatime/__init__.py +80 -0
  99. mcli/workflow/workflow.py +8 -27
  100. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/METADATA +3 -1
  101. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/RECORD +105 -29
  102. mcli/workflow/daemon/api_daemon.py +0 -800
  103. mcli/workflow/daemon/commands.py +0 -1196
  104. mcli/workflow/dashboard/dashboard_cmd.py +0 -120
  105. mcli/workflow/file/file.py +0 -100
  106. mcli/workflow/git_commit/commands.py +0 -430
  107. mcli/workflow/politician_trading/commands.py +0 -1939
  108. mcli/workflow/scheduler/commands.py +0 -493
  109. mcli/workflow/sync/sync_cmd.py +0 -437
  110. mcli/workflow/videos/videos.py +0 -242
  111. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/WHEEL +0 -0
  112. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/entry_points.txt +0 -0
  113. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/licenses/LICENSE +0 -0
  114. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1060 @@
1
+ """
2
+ Scrapers and Logs Dashboard Page
3
+
4
+ This page provides:
5
+ 1. Manual scraping interface for corporate registry data
6
+ 2. Real-time scraper logs and job status
7
+ 3. System logs viewer
8
+ 4. Job history and statistics
9
+ """
10
+
11
+ import logging
12
+ import os
13
+ import sys
14
+ from datetime import datetime, timedelta
15
+ from io import StringIO
16
+ from pathlib import Path
17
+
18
+ import pandas as pd
19
+ import plotly.express as px
20
+ import plotly.graph_objects as go
21
+ import streamlit as st
22
+ from plotly.subplots import make_subplots
23
+
24
+ # Configure logging
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
28
+ )
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def show_scrapers_and_logs():
33
+ """Main function for scrapers and logs page"""
34
+ st.header("🔍 Data Scrapers & System Logs")
35
+
36
+ st.markdown("""
37
+ **Features:**
38
+ - 🚀 Manual data scraping from corporate registries
39
+ - 📊 Real-time scraper logs and job status
40
+ - 📝 System logs viewer
41
+ - 📈 Job history and statistics
42
+ """)
43
+
44
+ # Create tabs
45
+ tabs = st.tabs([
46
+ "🚀 Manual Scraping",
47
+ "📊 Scraper Logs",
48
+ "📝 System Logs",
49
+ "📈 Job History"
50
+ ])
51
+
52
+ with tabs[0]:
53
+ show_manual_scraping()
54
+
55
+ with tabs[1]:
56
+ show_scraper_logs()
57
+
58
+ with tabs[2]:
59
+ show_system_logs()
60
+
61
+ with tabs[3]:
62
+ show_job_history()
63
+
64
+
65
+ def show_manual_scraping():
66
+ """Manual scraping interface"""
67
+ st.subheader("🚀 Manual Data Scraping")
68
+
69
+ st.markdown("""
70
+ Manually trigger data scraping jobs from various sources.
71
+ Select a source, configure parameters, and run the scraper.
72
+ """)
73
+
74
+ # Source selection
75
+ source_type = st.selectbox(
76
+ "Select Data Source",
77
+ [
78
+ "UK Companies House",
79
+ "Info-Financière (France)",
80
+ "OpenCorporates",
81
+ "XBRL Filings (EU/UK)",
82
+ "XBRL US",
83
+ "Senate Stock Watcher (GitHub)",
84
+ ],
85
+ help="Choose which data source to scrape"
86
+ )
87
+
88
+ # Source-specific configuration
89
+ if source_type == "UK Companies House":
90
+ show_uk_companies_house_scraper()
91
+ elif source_type == "Info-Financière (France)":
92
+ show_info_financiere_scraper()
93
+ elif source_type == "OpenCorporates":
94
+ show_opencorporates_scraper()
95
+ elif source_type == "XBRL Filings (EU/UK)":
96
+ show_xbrl_filings_scraper()
97
+ elif source_type == "XBRL US":
98
+ show_xbrl_us_scraper()
99
+ elif source_type == "Senate Stock Watcher (GitHub)":
100
+ show_senate_watcher_scraper()
101
+
102
+
103
+ def show_uk_companies_house_scraper():
104
+ """UK Companies House scraper interface"""
105
+ st.markdown("### UK Companies House Configuration")
106
+
107
+ # Check API key
108
+ api_key = os.getenv("UK_COMPANIES_HOUSE_API_KEY") or st.secrets.get("UK_COMPANIES_HOUSE_API_KEY", "")
109
+
110
+ if not api_key:
111
+ st.error("❌ UK Companies House API key not configured")
112
+ st.info("""
113
+ To use this scraper, set `UK_COMPANIES_HOUSE_API_KEY` in:
114
+ - Streamlit Cloud: Settings → Secrets
115
+ - Local: .streamlit/secrets.toml or environment variable
116
+
117
+ Get free API key: https://developer.company-information.service.gov.uk/
118
+ """)
119
+ return
120
+
121
+ st.success("✅ API key configured")
122
+
123
+ # Configuration
124
+ col1, col2 = st.columns(2)
125
+
126
+ with col1:
127
+ company_query = st.text_input(
128
+ "Company Name",
129
+ value="Tesco",
130
+ help="Company name to search for"
131
+ )
132
+ max_results = st.number_input(
133
+ "Max Results",
134
+ min_value=1,
135
+ max_value=100,
136
+ value=10,
137
+ help="Maximum number of companies to fetch"
138
+ )
139
+
140
+ with col2:
141
+ fetch_officers = st.checkbox("Fetch Officers", value=True)
142
+ fetch_psc = st.checkbox("Fetch PSC Data", value=True)
143
+ save_to_db = st.checkbox("Save to Database", value=False)
144
+
145
+ # Run scraper
146
+ if st.button("🚀 Run UK Companies House Scraper", type="primary"):
147
+ run_uk_companies_house_scraper(
148
+ company_query,
149
+ max_results,
150
+ fetch_officers,
151
+ fetch_psc,
152
+ save_to_db
153
+ )
154
+
155
+
156
+ def run_uk_companies_house_scraper(
157
+ query: str,
158
+ max_results: int,
159
+ fetch_officers: bool,
160
+ fetch_psc: bool,
161
+ save_to_db: bool
162
+ ):
163
+ """Execute UK Companies House scraper"""
164
+ try:
165
+ from mcli.workflow.politician_trading.scrapers_corporate_registry import UKCompaniesHouseScraper
166
+
167
+ # Create log capture
168
+ log_stream = StringIO()
169
+ handler = logging.StreamHandler(log_stream)
170
+ handler.setLevel(logging.INFO)
171
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
172
+ handler.setFormatter(formatter)
173
+
174
+ scraper_logger = logging.getLogger("mcli.workflow.politician_trading.scrapers_corporate_registry")
175
+ scraper_logger.addHandler(handler)
176
+
177
+ # Create progress containers
178
+ status_container = st.empty()
179
+ progress_bar = st.progress(0)
180
+ log_container = st.empty()
181
+ results_container = st.empty()
182
+
183
+ # Initialize scraper
184
+ status_container.info("🔄 Initializing UK Companies House scraper...")
185
+ scraper = UKCompaniesHouseScraper()
186
+ progress_bar.progress(10)
187
+
188
+ # Search companies
189
+ status_container.info(f"🔍 Searching for '{query}'...")
190
+ companies = scraper.search_companies(query, items_per_page=max_results)
191
+ progress_bar.progress(30)
192
+
193
+ if not companies:
194
+ status_container.warning(f"⚠️ No companies found matching '{query}'")
195
+ return
196
+
197
+ status_container.success(f"✅ Found {len(companies)} companies")
198
+
199
+ # Fetch additional data
200
+ all_officers = []
201
+ all_psc = []
202
+
203
+ for i, company in enumerate(companies):
204
+ company_number = company.get("company_number")
205
+ company_name = company.get("title", "Unknown")
206
+
207
+ if fetch_officers:
208
+ status_container.info(f"👥 Fetching officers for {company_name}...")
209
+ officers = scraper.get_company_officers(company_number)
210
+ all_officers.extend(officers)
211
+
212
+ if fetch_psc:
213
+ status_container.info(f"🏢 Fetching PSC for {company_name}...")
214
+ psc = scraper.get_persons_with_significant_control(company_number)
215
+ all_psc.extend(psc)
216
+
217
+ progress_bar.progress(30 + int((i + 1) / len(companies) * 50))
218
+
219
+ # Display logs
220
+ log_container.text_area(
221
+ "Scraper Logs",
222
+ log_stream.getvalue(),
223
+ height=200
224
+ )
225
+
226
+ # Display results
227
+ with results_container:
228
+ st.markdown("### 📊 Scraping Results")
229
+
230
+ col1, col2, col3 = st.columns(3)
231
+ col1.metric("Companies", len(companies))
232
+ col2.metric("Officers", len(all_officers))
233
+ col3.metric("PSC", len(all_psc))
234
+
235
+ # Show companies
236
+ st.markdown("#### Companies Found")
237
+ companies_df = pd.DataFrame([{
238
+ "Number": c.get("company_number"),
239
+ "Name": c.get("title"),
240
+ "Status": c.get("company_status"),
241
+ "Type": c.get("company_type"),
242
+ "Address": c.get("address_snippet", "")[:50]
243
+ } for c in companies])
244
+ st.dataframe(companies_df, use_container_width=True)
245
+
246
+ # Show officers
247
+ if all_officers:
248
+ st.markdown("#### Officers Found")
249
+ officers_df = pd.DataFrame([{
250
+ "Name": o.get("name"),
251
+ "Role": o.get("officer_role"),
252
+ "Appointed": o.get("appointed_on", ""),
253
+ "Nationality": o.get("nationality", ""),
254
+ "Occupation": o.get("occupation", "")
255
+ } for o in all_officers[:50]]) # Limit to 50 for display
256
+ st.dataframe(officers_df, use_container_width=True)
257
+
258
+ # Show PSC
259
+ if all_psc:
260
+ st.markdown("#### Persons with Significant Control")
261
+ psc_df = pd.DataFrame([{
262
+ "Name": p.get("name"),
263
+ "Kind": p.get("kind", "").replace("-", " ").title(),
264
+ "Control": ", ".join(p.get("natures_of_control", [])),
265
+ "Nationality": p.get("nationality", ""),
266
+ } for p in all_psc[:50]])
267
+ st.dataframe(psc_df, use_container_width=True)
268
+
269
+ progress_bar.progress(100)
270
+ status_container.success(f"✅ Scraping completed! Found {len(companies)} companies, {len(all_officers)} officers, {len(all_psc)} PSC")
271
+
272
+ # Save to database if requested
273
+ if save_to_db:
274
+ save_corporate_data_to_db(companies, all_officers, all_psc, "uk_companies_house")
275
+
276
+ except Exception as e:
277
+ st.error(f"❌ Error: {e}")
278
+ import traceback
279
+ st.code(traceback.format_exc())
280
+
281
+
282
+ def show_info_financiere_scraper():
283
+ """Info-Financière scraper interface"""
284
+ st.markdown("### Info-Financière (France) Configuration")
285
+
286
+ st.success("✅ No API key required (FREE)")
287
+
288
+ # Configuration
289
+ col1, col2 = st.columns(2)
290
+
291
+ with col1:
292
+ query = st.text_input(
293
+ "Search Query (optional)",
294
+ value="",
295
+ help="Company name, ISIN, or leave blank for all"
296
+ )
297
+ days_back = st.number_input(
298
+ "Days Back",
299
+ min_value=1,
300
+ max_value=365,
301
+ value=30,
302
+ help="How many days of history to fetch"
303
+ )
304
+
305
+ with col2:
306
+ max_results = st.number_input(
307
+ "Max Results",
308
+ min_value=1,
309
+ max_value=100,
310
+ value=20
311
+ )
312
+ save_to_db = st.checkbox("Save to Database", value=False)
313
+
314
+ # Run scraper
315
+ if st.button("🚀 Run Info-Financière Scraper", type="primary"):
316
+ run_info_financiere_scraper(query, days_back, max_results, save_to_db)
317
+
318
+
319
+ def run_info_financiere_scraper(
320
+ query: str,
321
+ days_back: int,
322
+ max_results: int,
323
+ save_to_db: bool
324
+ ):
325
+ """Execute Info-Financière scraper"""
326
+ try:
327
+ from mcli.workflow.politician_trading.scrapers_corporate_registry import InfoFinanciereAPIScraper
328
+
329
+ status_container = st.empty()
330
+ progress_bar = st.progress(0)
331
+ results_container = st.empty()
332
+
333
+ # Initialize scraper
334
+ status_container.info("🔄 Initializing Info-Financière scraper...")
335
+ scraper = InfoFinanciereAPIScraper()
336
+ progress_bar.progress(20)
337
+
338
+ # Calculate date range
339
+ from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
340
+ to_date = datetime.now().strftime("%Y-%m-%d")
341
+
342
+ # Search publications
343
+ status_container.info(f"🔍 Searching publications ({from_date} to {to_date})...")
344
+ publications = scraper.search_publications(
345
+ query=query or None,
346
+ from_date=from_date,
347
+ to_date=to_date,
348
+ per_page=max_results
349
+ )
350
+ progress_bar.progress(80)
351
+
352
+ # Display results
353
+ with results_container:
354
+ st.markdown("### 📊 Scraping Results")
355
+
356
+ if not publications:
357
+ st.warning(f"⚠️ No publications found for the given criteria")
358
+ return
359
+
360
+ st.metric("Publications Found", len(publications))
361
+
362
+ # Show publications
363
+ pubs_df = pd.DataFrame([{
364
+ "Date": p.get("publication_date", ""),
365
+ "Title": p.get("title", "")[:100],
366
+ "Type": p.get("publication_type", ""),
367
+ "Issuer": p.get("issuer_name", "")
368
+ } for p in publications])
369
+ st.dataframe(pubs_df, use_container_width=True)
370
+
371
+ progress_bar.progress(100)
372
+ status_container.success(f"✅ Scraping completed! Found {len(publications)} publications")
373
+
374
+ if save_to_db:
375
+ save_financial_publications_to_db(publications, "info_financiere")
376
+
377
+ except Exception as e:
378
+ st.error(f"❌ Error: {e}")
379
+ import traceback
380
+ st.code(traceback.format_exc())
381
+
382
+
383
+ def show_opencorporates_scraper():
384
+ """OpenCorporates scraper interface"""
385
+ st.markdown("### OpenCorporates Configuration")
386
+
387
+ api_key = os.getenv("OPENCORPORATES_API_KEY") or st.secrets.get("OPENCORPORATES_API_KEY", "")
388
+
389
+ if api_key:
390
+ st.success("✅ API key configured")
391
+ else:
392
+ st.info("ℹ️ No API key (free tier with rate limits). Get API key for better performance: https://opencorporates.com/api_accounts/new")
393
+
394
+ # Configuration
395
+ col1, col2 = st.columns(2)
396
+
397
+ with col1:
398
+ query = st.text_input(
399
+ "Company Name",
400
+ value="Apple",
401
+ help="Company name to search for"
402
+ )
403
+ jurisdiction = st.selectbox(
404
+ "Jurisdiction (optional)",
405
+ ["", "us_ca", "us_de", "us_ny", "gb", "de", "fr", "nl"],
406
+ help="Filter by jurisdiction code"
407
+ )
408
+
409
+ with col2:
410
+ max_results = st.number_input(
411
+ "Max Results",
412
+ min_value=1,
413
+ max_value=100,
414
+ value=10
415
+ )
416
+ save_to_db = st.checkbox("Save to Database", value=False)
417
+
418
+ # Run scraper
419
+ if st.button("🚀 Run OpenCorporates Scraper", type="primary"):
420
+ run_opencorporates_scraper(query, jurisdiction or None, max_results, save_to_db)
421
+
422
+
423
+ def run_opencorporates_scraper(
424
+ query: str,
425
+ jurisdiction: str,
426
+ max_results: int,
427
+ save_to_db: bool
428
+ ):
429
+ """Execute OpenCorporates scraper"""
430
+ try:
431
+ from mcli.workflow.politician_trading.scrapers_corporate_registry import OpenCorporatesScraper
432
+
433
+ status_container = st.empty()
434
+ progress_bar = st.progress(0)
435
+ results_container = st.empty()
436
+
437
+ # Initialize scraper
438
+ status_container.info("🔄 Initializing OpenCorporates scraper...")
439
+ scraper = OpenCorporatesScraper()
440
+ progress_bar.progress(20)
441
+
442
+ # Search companies
443
+ status_container.info(f"🔍 Searching for '{query}'...")
444
+ companies = scraper.search_companies(
445
+ query,
446
+ jurisdiction_code=jurisdiction,
447
+ per_page=max_results
448
+ )
449
+ progress_bar.progress(80)
450
+
451
+ # Display results
452
+ with results_container:
453
+ st.markdown("### 📊 Scraping Results")
454
+
455
+ if not companies:
456
+ st.warning(f"⚠️ No companies found matching '{query}'")
457
+ return
458
+
459
+ st.metric("Companies Found", len(companies))
460
+
461
+ # Show companies
462
+ companies_df = pd.DataFrame([{
463
+ "Jurisdiction": c.get("company", {}).get("jurisdiction_code", ""),
464
+ "Number": c.get("company", {}).get("company_number", ""),
465
+ "Name": c.get("company", {}).get("name", ""),
466
+ "Status": c.get("company", {}).get("current_status", ""),
467
+ "Type": c.get("company", {}).get("company_type", "")
468
+ } for c in companies])
469
+ st.dataframe(companies_df, use_container_width=True)
470
+
471
+ progress_bar.progress(100)
472
+ status_container.success(f"✅ Scraping completed! Found {len(companies)} companies")
473
+
474
+ except Exception as e:
475
+ st.error(f"❌ Error: {e}")
476
+ import traceback
477
+ st.code(traceback.format_exc())
478
+
479
+
480
+ def show_xbrl_filings_scraper():
481
+ """XBRL Filings scraper interface"""
482
+ st.markdown("### XBRL Filings (EU/UK) Configuration")
483
+
484
+ st.success("✅ No API key required (FREE)")
485
+
486
+ # Configuration
487
+ col1, col2 = st.columns(2)
488
+
489
+ with col1:
490
+ country = st.selectbox(
491
+ "Country (optional)",
492
+ ["", "GB", "FR", "DE", "ES", "IT", "NL", "BE"],
493
+ help="Filter by country code"
494
+ )
495
+ days_back = st.number_input(
496
+ "Days Back",
497
+ min_value=1,
498
+ max_value=365,
499
+ value=30
500
+ )
501
+
502
+ with col2:
503
+ max_results = st.number_input(
504
+ "Max Results",
505
+ min_value=1,
506
+ max_value=500,
507
+ value=100
508
+ )
509
+ save_to_db = st.checkbox("Save to Database", value=False)
510
+
511
+ # Run scraper
512
+ if st.button("🚀 Run XBRL Filings Scraper", type="primary"):
513
+ run_xbrl_filings_scraper(country or None, days_back, max_results, save_to_db)
514
+
515
+
516
+ def run_xbrl_filings_scraper(
517
+ country: str,
518
+ days_back: int,
519
+ max_results: int,
520
+ save_to_db: bool
521
+ ):
522
+ """Execute XBRL Filings scraper"""
523
+ try:
524
+ from mcli.workflow.politician_trading.scrapers_corporate_registry import XBRLFilingsScraper
525
+
526
+ status_container = st.empty()
527
+ progress_bar = st.progress(0)
528
+ results_container = st.empty()
529
+
530
+ # Initialize scraper
531
+ status_container.info("🔄 Initializing XBRL Filings scraper...")
532
+ scraper = XBRLFilingsScraper()
533
+ progress_bar.progress(20)
534
+
535
+ # Calculate date range
536
+ from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
537
+
538
+ # Get filings
539
+ status_container.info(f"🔍 Fetching XBRL filings since {from_date}...")
540
+ filings = scraper.get_filings(
541
+ country=country,
542
+ from_date=from_date,
543
+ page_size=max_results
544
+ )
545
+ progress_bar.progress(80)
546
+
547
+ # Display results
548
+ with results_container:
549
+ st.markdown("### 📊 Scraping Results")
550
+
551
+ if not filings:
552
+ st.warning(f"⚠️ No filings found for the given criteria")
553
+ return
554
+
555
+ st.metric("Filings Found", len(filings))
556
+
557
+ # Show filings
558
+ filings_df = pd.DataFrame([{
559
+ "ID": f.get("id", ""),
560
+ "Country": f.get("attributes", {}).get("country", ""),
561
+ "Entity": f.get("attributes", {}).get("entity_name", "")[:50],
562
+ "Period": f.get("attributes", {}).get("period_end", ""),
563
+ "Date Added": f.get("attributes", {}).get("date_added", "")
564
+ } for f in filings])
565
+ st.dataframe(filings_df, use_container_width=True)
566
+
567
+ progress_bar.progress(100)
568
+ status_container.success(f"✅ Scraping completed! Found {len(filings)} filings")
569
+
570
+ except Exception as e:
571
+ st.error(f"❌ Error: {e}")
572
+ import traceback
573
+ st.code(traceback.format_exc())
574
+
575
+
576
+ def show_xbrl_us_scraper():
577
+ """XBRL US scraper interface"""
578
+ st.markdown("### XBRL US Configuration")
579
+
580
+ api_key = os.getenv("XBRL_US_API_KEY") or st.secrets.get("XBRL_US_API_KEY", "")
581
+
582
+ if not api_key:
583
+ st.error("❌ XBRL US API key not configured")
584
+ st.info("""
585
+ To use this scraper, set `XBRL_US_API_KEY` in:
586
+ - Streamlit Cloud: Settings → Secrets
587
+ - Local: .streamlit/secrets.toml or environment variable
588
+
589
+ Get free API key: https://xbrl.us/home/use/xbrl-api/
590
+ """)
591
+ return
592
+
593
+ st.success("✅ API key configured")
594
+
595
+ # Configuration
596
+ col1, col2 = st.columns(2)
597
+
598
+ with col1:
599
+ query = st.text_input(
600
+ "Company Name or Ticker",
601
+ value="Tesla",
602
+ help="Search by company name or stock ticker"
603
+ )
604
+
605
+ with col2:
606
+ max_results = st.number_input(
607
+ "Max Results",
608
+ min_value=1,
609
+ max_value=100,
610
+ value=10
611
+ )
612
+ save_to_db = st.checkbox("Save to Database", value=False)
613
+
614
+ # Run scraper
615
+ if st.button("🚀 Run XBRL US Scraper", type="primary"):
616
+ run_xbrl_us_scraper(query, max_results, save_to_db)
617
+
618
+
619
+ def run_xbrl_us_scraper(
620
+ query: str,
621
+ max_results: int,
622
+ save_to_db: bool
623
+ ):
624
+ """Execute XBRL US scraper"""
625
+ try:
626
+ from mcli.workflow.politician_trading.scrapers_corporate_registry import XBRLUSScraper
627
+
628
+ status_container = st.empty()
629
+ progress_bar = st.progress(0)
630
+ results_container = st.empty()
631
+
632
+ # Initialize scraper
633
+ status_container.info("🔄 Initializing XBRL US scraper...")
634
+ scraper = XBRLUSScraper()
635
+ progress_bar.progress(20)
636
+
637
+ # Search companies
638
+ status_container.info(f"🔍 Searching for '{query}'...")
639
+ entities = scraper.search_companies(query, limit=max_results)
640
+ progress_bar.progress(80)
641
+
642
+ # Display results
643
+ with results_container:
644
+ st.markdown("### 📊 Scraping Results")
645
+
646
+ if not entities:
647
+ st.warning(f"⚠️ No entities found matching '{query}'")
648
+ return
649
+
650
+ st.metric("Entities Found", len(entities))
651
+
652
+ # Show entities
653
+ entities_df = pd.DataFrame([{
654
+ "ID": e.get("entity", {}).get("id", ""),
655
+ "Name": e.get("entity", {}).get("name", ""),
656
+ "CIK": e.get("entity", {}).get("cik", ""),
657
+ "Ticker": e.get("entity", {}).get("ticker", "")
658
+ } for e in entities])
659
+ st.dataframe(entities_df, use_container_width=True)
660
+
661
+ progress_bar.progress(100)
662
+ status_container.success(f"✅ Scraping completed! Found {len(entities)} entities")
663
+
664
+ except Exception as e:
665
+ st.error(f"❌ Error: {e}")
666
+ import traceback
667
+ st.code(traceback.format_exc())
668
+
669
+
670
+ def show_senate_watcher_scraper():
671
+ """Senate Stock Watcher scraper interface"""
672
+ st.markdown("### Senate Stock Watcher (GitHub) Configuration")
673
+
674
+ st.success("✅ No API key required (FREE)")
675
+
676
+ # Configuration
677
+ col1, col2 = st.columns(2)
678
+
679
+ with col1:
680
+ recent_only = st.checkbox("Recent Only", value=True)
681
+ days_back = st.number_input(
682
+ "Days Back (if recent)",
683
+ min_value=1,
684
+ max_value=365,
685
+ value=90,
686
+ disabled=not recent_only
687
+ )
688
+
689
+ with col2:
690
+ save_to_db = st.checkbox("Save to Database", value=True)
691
+
692
+ # Run scraper
693
+ if st.button("🚀 Run Senate Stock Watcher Scraper", type="primary"):
694
+ run_senate_watcher_scraper(recent_only, days_back, save_to_db)
695
+
696
+
697
+ def run_senate_watcher_scraper(
698
+ recent_only: bool,
699
+ days_back: int,
700
+ save_to_db: bool
701
+ ):
702
+ """Execute Senate Stock Watcher scraper"""
703
+ try:
704
+ from mcli.workflow.politician_trading.scrapers_free_sources import FreeDataFetcher
705
+
706
+ status_container = st.empty()
707
+ progress_bar = st.progress(0)
708
+ results_container = st.empty()
709
+
710
+ # Initialize fetcher
711
+ status_container.info("🔄 Initializing Senate Stock Watcher scraper...")
712
+ fetcher = FreeDataFetcher()
713
+ progress_bar.progress(20)
714
+
715
+ # Fetch data
716
+ status_container.info("🔍 Fetching Senate trading data from GitHub...")
717
+ data = fetcher.fetch_from_senate_watcher(recent_only=recent_only, days=days_back)
718
+ progress_bar.progress(80)
719
+
720
+ politicians = data.get("politicians", [])
721
+ disclosures = data.get("disclosures", [])
722
+
723
+ # Display results
724
+ with results_container:
725
+ st.markdown("### 📊 Scraping Results")
726
+
727
+ col1, col2 = st.columns(2)
728
+ col1.metric("Politicians", len(politicians))
729
+ col2.metric("Disclosures", len(disclosures))
730
+
731
+ # Show disclosures
732
+ if disclosures:
733
+ st.markdown("#### Recent Trading Disclosures")
734
+ disc_df = pd.DataFrame([{
735
+ "Date": d.transaction_date.strftime("%Y-%m-%d") if hasattr(d.transaction_date, 'strftime') else str(d.transaction_date),
736
+ "Politician": d.politician_bioguide_id,
737
+ "Type": d.transaction_type,
738
+ "Asset": d.asset_name[:50],
739
+ "Ticker": d.asset_ticker or "",
740
+ "Min": f"${d.amount_range_min:,.0f}" if d.amount_range_min else "",
741
+ "Max": f"${d.amount_range_max:,.0f}" if d.amount_range_max else ""
742
+ } for d in disclosures[:100]]) # Limit to 100 for display
743
+ st.dataframe(disc_df, use_container_width=True)
744
+
745
+ progress_bar.progress(100)
746
+ status_container.success(f"✅ Scraping completed! Found {len(politicians)} politicians, {len(disclosures)} disclosures")
747
+
748
+ if save_to_db:
749
+ save_politician_trading_to_db(politicians, disclosures)
750
+
751
+ except Exception as e:
752
+ st.error(f"❌ Error: {e}")
753
+ import traceback
754
+ st.code(traceback.format_exc())
755
+
756
+
757
+ def save_corporate_data_to_db(companies, officers, psc, source):
758
+ """Save corporate data to Supabase"""
759
+ st.info("⚠️ Database saving not yet implemented. Data displayed above.")
760
+ # TODO: Implement Supabase upsert logic
761
+
762
+
763
+ def save_financial_publications_to_db(publications, source):
764
+ """Save financial publications to Supabase"""
765
+ st.info("⚠️ Database saving not yet implemented. Data displayed above.")
766
+ # TODO: Implement Supabase upsert logic
767
+
768
+
769
+ def save_politician_trading_to_db(politicians, disclosures):
770
+ """Save politician trading data to Supabase"""
771
+ st.info("⚠️ Using existing seed_database.py logic for this source")
772
+ # TODO: Call seed_database.py functions
773
+
774
+
775
+ def show_scraper_logs():
776
+ """Display scraper logs"""
777
+ st.subheader("📊 Scraper Logs")
778
+
779
+ st.markdown("""
780
+ View real-time logs from scraping operations and data pull jobs.
781
+ """)
782
+
783
+ # Get logs from Supabase data_pull_jobs
784
+ try:
785
+ from mcli.ml.dashboard.app_integrated import get_supabase_client
786
+
787
+ client = get_supabase_client()
788
+
789
+ if client:
790
+ # Get recent jobs
791
+ jobs = client.table("data_pull_jobs").select("*").order("created_at", desc=True).limit(50).execute()
792
+
793
+ if jobs.data:
794
+ st.markdown("### Recent Data Pull Jobs")
795
+
796
+ jobs_df = pd.DataFrame(jobs.data)
797
+
798
+ # Format dates
799
+ for col in ['started_at', 'completed_at', 'created_at']:
800
+ if col in jobs_df.columns:
801
+ jobs_df[col] = pd.to_datetime(jobs_df[col], format='ISO8601', errors='coerce')
802
+
803
+ # Display jobs table
804
+ display_df = jobs_df[[
805
+ 'created_at', 'job_type', 'status', 'records_found',
806
+ 'records_new', 'records_updated', 'records_failed'
807
+ ]].copy()
808
+
809
+ display_df.columns = [
810
+ 'Timestamp', 'Job Type', 'Status', 'Found',
811
+ 'New', 'Updated', 'Failed'
812
+ ]
813
+
814
+ st.dataframe(display_df, use_container_width=True)
815
+
816
+ # Job details
817
+ st.markdown("### Job Details")
818
+
819
+ selected_job = st.selectbox(
820
+ "Select Job",
821
+ jobs_df['id'].tolist(),
822
+ format_func=lambda x: f"{jobs_df[jobs_df['id']==x]['job_type'].values[0]} - {jobs_df[jobs_df['id']==x]['created_at'].values[0]}"
823
+ )
824
+
825
+ if selected_job:
826
+ job = jobs_df[jobs_df['id'] == selected_job].iloc[0]
827
+
828
+ col1, col2, col3, col4 = st.columns(4)
829
+ col1.metric("Status", job['status'])
830
+ col2.metric("Records Found", job['records_found'])
831
+ col3.metric("New Records", job['records_new'])
832
+ col4.metric("Failed", job['records_failed'])
833
+
834
+ if job.get('error_message'):
835
+ st.error(f"**Error:** {job['error_message']}")
836
+
837
+ # Show config snapshot
838
+ if job.get('config_snapshot'):
839
+ with st.expander("Configuration Snapshot"):
840
+ st.json(job['config_snapshot'])
841
+
842
+ else:
843
+ st.info("No jobs found in database")
844
+
845
+ else:
846
+ st.warning("Supabase not connected - logs unavailable")
847
+
848
+ except Exception as e:
849
+ st.error(f"Error loading scraper logs: {e}")
850
+
851
+
852
+ def show_system_logs():
853
+ """Display system logs"""
854
+ st.subheader("📝 System Logs")
855
+
856
+ st.markdown("""
857
+ View application logs, errors, and system events.
858
+ """)
859
+
860
+ # Log file path
861
+ log_file = Path("/tmp/seed_database.log")
862
+
863
+ if log_file.exists():
864
+ try:
865
+ with open(log_file, 'r') as f:
866
+ logs = f.readlines()
867
+
868
+ # Filter options
869
+ col1, col2, col3 = st.columns(3)
870
+
871
+ with col1:
872
+ log_level = st.selectbox(
873
+ "Log Level",
874
+ ["ALL", "ERROR", "WARNING", "INFO", "DEBUG"]
875
+ )
876
+
877
+ with col2:
878
+ lines_to_show = st.number_input(
879
+ "Lines to Show",
880
+ min_value=10,
881
+ max_value=1000,
882
+ value=100
883
+ )
884
+
885
+ with col3:
886
+ search_term = st.text_input("Search", value="")
887
+
888
+ # Filter logs
889
+ filtered_logs = logs[-lines_to_show:]
890
+
891
+ if log_level != "ALL":
892
+ filtered_logs = [l for l in filtered_logs if log_level in l]
893
+
894
+ if search_term:
895
+ filtered_logs = [l for l in filtered_logs if search_term.lower() in l.lower()]
896
+
897
+ # Display logs
898
+ st.text_area(
899
+ "Log Output",
900
+ "".join(filtered_logs),
901
+ height=400
902
+ )
903
+
904
+ # Download button
905
+ st.download_button(
906
+ "Download Full Logs",
907
+ "".join(logs),
908
+ file_name=f"system_logs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
909
+ mime="text/plain"
910
+ )
911
+
912
+ except Exception as e:
913
+ st.error(f"Error reading log file: {e}")
914
+ else:
915
+ st.info(f"Log file not found at {log_file}. Logs will appear here after scraping jobs run.")
916
+
917
+ # Create example logs display
918
+ st.markdown("### Example Log Output")
919
+ st.code("""
920
+ 2025-10-07 12:00:00 - INFO - Starting data pull job: senate_watcher_seed
921
+ 2025-10-07 12:00:05 - INFO - Fetched 8350 Senate transactions
922
+ 2025-10-07 12:00:10 - INFO - Upserted 89 politicians (5 new, 84 updated)
923
+ 2025-10-07 12:01:30 - INFO - Upserted 8350 disclosures (6353 new, 1893 updated, 104 failed)
924
+ 2025-10-07 12:01:31 - INFO - Job completed successfully
925
+ """, language="log")
926
+
927
+
928
+ def show_job_history():
929
+ """Display job history and statistics"""
930
+ st.subheader("📈 Job History & Statistics")
931
+
932
+ st.markdown("""
933
+ View historical data about scraping jobs, success rates, and trends.
934
+ """)
935
+
936
+ try:
937
+ from mcli.ml.dashboard.app_integrated import get_supabase_client
938
+
939
+ client = get_supabase_client()
940
+
941
+ if client:
942
+ # Get all jobs
943
+ jobs = client.table("data_pull_jobs").select("*").order("created_at", desc=True).limit(1000).execute()
944
+
945
+ if jobs.data and len(jobs.data) > 0:
946
+ jobs_df = pd.DataFrame(jobs.data)
947
+
948
+ # Format dates
949
+ for col in ['started_at', 'completed_at', 'created_at']:
950
+ if col in jobs_df.columns:
951
+ jobs_df[col] = pd.to_datetime(jobs_df[col], format='ISO8601', errors='coerce')
952
+
953
+ # Statistics
954
+ st.markdown("### Overall Statistics")
955
+
956
+ col1, col2, col3, col4 = st.columns(4)
957
+
958
+ total_jobs = len(jobs_df)
959
+ completed_jobs = len(jobs_df[jobs_df['status'] == 'completed'])
960
+ failed_jobs = len(jobs_df[jobs_df['status'] == 'failed'])
961
+ success_rate = (completed_jobs / total_jobs * 100) if total_jobs > 0 else 0
962
+
963
+ col1.metric("Total Jobs", total_jobs)
964
+ col2.metric("Completed", completed_jobs)
965
+ col3.metric("Failed", failed_jobs)
966
+ col4.metric("Success Rate", f"{success_rate:.1f}%")
967
+
968
+ # Job type breakdown
969
+ st.markdown("### Job Type Breakdown")
970
+
971
+ job_type_counts = jobs_df['job_type'].value_counts()
972
+
973
+ fig = px.pie(
974
+ values=job_type_counts.values,
975
+ names=job_type_counts.index,
976
+ title="Jobs by Type"
977
+ )
978
+ st.plotly_chart(fig, use_container_width=True)
979
+
980
+ # Status breakdown
981
+ st.markdown("### Status Breakdown")
982
+
983
+ status_counts = jobs_df['status'].value_counts()
984
+
985
+ fig = px.bar(
986
+ x=status_counts.index,
987
+ y=status_counts.values,
988
+ labels={'x': 'Status', 'y': 'Count'},
989
+ title="Jobs by Status"
990
+ )
991
+ st.plotly_chart(fig, use_container_width=True)
992
+
993
+ # Timeline
994
+ st.markdown("### Job Timeline")
995
+
996
+ jobs_df['date'] = jobs_df['created_at'].dt.date
997
+
998
+ timeline_df = jobs_df.groupby(['date', 'status']).size().reset_index(name='count')
999
+
1000
+ fig = px.line(
1001
+ timeline_df,
1002
+ x='date',
1003
+ y='count',
1004
+ color='status',
1005
+ title="Jobs Over Time"
1006
+ )
1007
+ st.plotly_chart(fig, use_container_width=True)
1008
+
1009
+ # Records processed
1010
+ st.markdown("### Records Processed")
1011
+
1012
+ records_df = jobs_df[jobs_df['status'] == 'completed'][['created_at', 'records_found', 'records_new', 'records_updated', 'records_failed']].copy()
1013
+
1014
+ if not records_df.empty:
1015
+ fig = go.Figure()
1016
+
1017
+ fig.add_trace(go.Scatter(
1018
+ x=records_df['created_at'],
1019
+ y=records_df['records_new'],
1020
+ name='New Records',
1021
+ mode='lines+markers'
1022
+ ))
1023
+
1024
+ fig.add_trace(go.Scatter(
1025
+ x=records_df['created_at'],
1026
+ y=records_df['records_updated'],
1027
+ name='Updated Records',
1028
+ mode='lines+markers'
1029
+ ))
1030
+
1031
+ fig.add_trace(go.Scatter(
1032
+ x=records_df['created_at'],
1033
+ y=records_df['records_failed'],
1034
+ name='Failed Records',
1035
+ mode='lines+markers'
1036
+ ))
1037
+
1038
+ fig.update_layout(
1039
+ title="Records Processed Over Time",
1040
+ xaxis_title="Date",
1041
+ yaxis_title="Count",
1042
+ hovermode='x unified'
1043
+ )
1044
+
1045
+ st.plotly_chart(fig, use_container_width=True)
1046
+
1047
+ else:
1048
+ st.info("No job history available yet. Run some scraping jobs to see statistics here.")
1049
+
1050
+ else:
1051
+ st.warning("Supabase not connected - job history unavailable")
1052
+
1053
+ except Exception as e:
1054
+ st.error(f"Error loading job history: {e}")
1055
+ import traceback
1056
+ st.code(traceback.format_exc())
1057
+
1058
+
1059
+ # Export for use in main dashboard
1060
+ __all__ = ["show_scrapers_and_logs"]