mcli-framework 7.6.0__py3-none-any.whl → 7.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/commands_cmd.py +51 -39
- mcli/app/main.py +10 -2
- mcli/app/model_cmd.py +1 -1
- mcli/lib/custom_commands.py +4 -10
- mcli/ml/api/app.py +1 -5
- mcli/ml/dashboard/app.py +2 -2
- mcli/ml/dashboard/app_integrated.py +168 -116
- mcli/ml/dashboard/app_supabase.py +7 -3
- mcli/ml/dashboard/app_training.py +3 -6
- mcli/ml/dashboard/components/charts.py +74 -115
- mcli/ml/dashboard/components/metrics.py +24 -44
- mcli/ml/dashboard/components/tables.py +32 -40
- mcli/ml/dashboard/overview.py +102 -78
- mcli/ml/dashboard/pages/cicd.py +103 -56
- mcli/ml/dashboard/pages/debug_dependencies.py +35 -28
- mcli/ml/dashboard/pages/gravity_viz.py +374 -313
- mcli/ml/dashboard/pages/monte_carlo_predictions.py +50 -48
- mcli/ml/dashboard/pages/predictions_enhanced.py +396 -248
- mcli/ml/dashboard/pages/scrapers_and_logs.py +299 -273
- mcli/ml/dashboard/pages/test_portfolio.py +153 -121
- mcli/ml/dashboard/pages/trading.py +238 -169
- mcli/ml/dashboard/pages/workflows.py +129 -84
- mcli/ml/dashboard/streamlit_extras_utils.py +70 -79
- mcli/ml/dashboard/utils.py +24 -21
- mcli/ml/dashboard/warning_suppression.py +6 -4
- mcli/ml/database/session.py +16 -5
- mcli/ml/mlops/pipeline_orchestrator.py +1 -3
- mcli/ml/predictions/monte_carlo.py +6 -18
- mcli/ml/trading/alpaca_client.py +95 -96
- mcli/ml/trading/migrations.py +76 -40
- mcli/ml/trading/models.py +78 -60
- mcli/ml/trading/paper_trading.py +92 -74
- mcli/ml/trading/risk_management.py +106 -85
- mcli/ml/trading/trading_service.py +155 -110
- mcli/ml/training/train_model.py +1 -3
- mcli/self/self_cmd.py +71 -57
- mcli/workflow/daemon/daemon.py +2 -0
- mcli/workflow/model_service/openai_adapter.py +6 -2
- mcli/workflow/politician_trading/models.py +6 -2
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +39 -88
- mcli/workflow/politician_trading/scrapers_free_sources.py +32 -39
- mcli/workflow/politician_trading/scrapers_third_party.py +21 -39
- mcli/workflow/politician_trading/seed_database.py +70 -89
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/METADATA +1 -1
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/RECORD +49 -49
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.2.dist-info}/top_level.txt +0 -0
|
@@ -32,25 +32,22 @@ logger = logging.getLogger(__name__)
|
|
|
32
32
|
def show_scrapers_and_logs():
|
|
33
33
|
"""Main function for scrapers and logs page"""
|
|
34
34
|
st.header("🔍 Data Scrapers & System Logs")
|
|
35
|
-
|
|
35
|
+
|
|
36
36
|
# Add a simple test to ensure the page is rendering
|
|
37
37
|
st.info("📋 Page loaded successfully - Scrapers & Logs functionality is available")
|
|
38
38
|
|
|
39
|
-
st.markdown(
|
|
39
|
+
st.markdown(
|
|
40
|
+
"""
|
|
40
41
|
**Features:**
|
|
41
42
|
- 🚀 Manual data scraping from corporate registries
|
|
42
43
|
- 📊 Real-time scraper logs and job status
|
|
43
44
|
- 📝 System logs viewer
|
|
44
45
|
- 📈 Job history and statistics
|
|
45
|
-
"""
|
|
46
|
+
"""
|
|
47
|
+
)
|
|
46
48
|
|
|
47
49
|
# Create tabs
|
|
48
|
-
tabs = st.tabs([
|
|
49
|
-
"🚀 Manual Scraping",
|
|
50
|
-
"📊 Scraper Logs",
|
|
51
|
-
"📝 System Logs",
|
|
52
|
-
"📈 Job History"
|
|
53
|
-
])
|
|
50
|
+
tabs = st.tabs(["🚀 Manual Scraping", "📊 Scraper Logs", "📝 System Logs", "📈 Job History"])
|
|
54
51
|
|
|
55
52
|
with tabs[0]:
|
|
56
53
|
show_manual_scraping()
|
|
@@ -69,10 +66,12 @@ def show_manual_scraping():
|
|
|
69
66
|
"""Manual scraping interface"""
|
|
70
67
|
st.subheader("🚀 Manual Data Scraping")
|
|
71
68
|
|
|
72
|
-
st.markdown(
|
|
69
|
+
st.markdown(
|
|
70
|
+
"""
|
|
73
71
|
Manually trigger data scraping jobs from various sources.
|
|
74
72
|
Select a source, configure parameters, and run the scraper.
|
|
75
|
-
"""
|
|
73
|
+
"""
|
|
74
|
+
)
|
|
76
75
|
|
|
77
76
|
# Source selection
|
|
78
77
|
source_type = st.selectbox(
|
|
@@ -85,7 +84,7 @@ def show_manual_scraping():
|
|
|
85
84
|
"XBRL US",
|
|
86
85
|
"Senate Stock Watcher (GitHub)",
|
|
87
86
|
],
|
|
88
|
-
help="Choose which data source to scrape"
|
|
87
|
+
help="Choose which data source to scrape",
|
|
89
88
|
)
|
|
90
89
|
|
|
91
90
|
# Source-specific configuration
|
|
@@ -108,17 +107,21 @@ def show_uk_companies_house_scraper():
|
|
|
108
107
|
st.markdown("### UK Companies House Configuration")
|
|
109
108
|
|
|
110
109
|
# Check API key
|
|
111
|
-
api_key = os.getenv("UK_COMPANIES_HOUSE_API_KEY") or st.secrets.get(
|
|
110
|
+
api_key = os.getenv("UK_COMPANIES_HOUSE_API_KEY") or st.secrets.get(
|
|
111
|
+
"UK_COMPANIES_HOUSE_API_KEY", ""
|
|
112
|
+
)
|
|
112
113
|
|
|
113
114
|
if not api_key:
|
|
114
115
|
st.error("❌ UK Companies House API key not configured")
|
|
115
|
-
st.info(
|
|
116
|
+
st.info(
|
|
117
|
+
"""
|
|
116
118
|
To use this scraper, set `UK_COMPANIES_HOUSE_API_KEY` in:
|
|
117
119
|
- Streamlit Cloud: Settings → Secrets
|
|
118
120
|
- Local: .streamlit/secrets.toml or environment variable
|
|
119
121
|
|
|
120
122
|
Get free API key: https://developer.company-information.service.gov.uk/
|
|
121
|
-
"""
|
|
123
|
+
"""
|
|
124
|
+
)
|
|
122
125
|
return
|
|
123
126
|
|
|
124
127
|
st.success("✅ API key configured")
|
|
@@ -128,16 +131,14 @@ def show_uk_companies_house_scraper():
|
|
|
128
131
|
|
|
129
132
|
with col1:
|
|
130
133
|
company_query = st.text_input(
|
|
131
|
-
"Company Name",
|
|
132
|
-
value="Tesco",
|
|
133
|
-
help="Company name to search for"
|
|
134
|
+
"Company Name", value="Tesco", help="Company name to search for"
|
|
134
135
|
)
|
|
135
136
|
max_results = st.number_input(
|
|
136
137
|
"Max Results",
|
|
137
138
|
min_value=1,
|
|
138
139
|
max_value=100,
|
|
139
140
|
value=10,
|
|
140
|
-
help="Maximum number of companies to fetch"
|
|
141
|
+
help="Maximum number of companies to fetch",
|
|
141
142
|
)
|
|
142
143
|
|
|
143
144
|
with col2:
|
|
@@ -148,24 +149,18 @@ def show_uk_companies_house_scraper():
|
|
|
148
149
|
# Run scraper
|
|
149
150
|
if st.button("🚀 Run UK Companies House Scraper", type="primary"):
|
|
150
151
|
run_uk_companies_house_scraper(
|
|
151
|
-
company_query,
|
|
152
|
-
max_results,
|
|
153
|
-
fetch_officers,
|
|
154
|
-
fetch_psc,
|
|
155
|
-
save_to_db
|
|
152
|
+
company_query, max_results, fetch_officers, fetch_psc, save_to_db
|
|
156
153
|
)
|
|
157
154
|
|
|
158
155
|
|
|
159
156
|
def run_uk_companies_house_scraper(
|
|
160
|
-
query: str,
|
|
161
|
-
max_results: int,
|
|
162
|
-
fetch_officers: bool,
|
|
163
|
-
fetch_psc: bool,
|
|
164
|
-
save_to_db: bool
|
|
157
|
+
query: str, max_results: int, fetch_officers: bool, fetch_psc: bool, save_to_db: bool
|
|
165
158
|
):
|
|
166
159
|
"""Execute UK Companies House scraper"""
|
|
167
160
|
try:
|
|
168
|
-
from mcli.workflow.politician_trading.scrapers_corporate_registry import
|
|
161
|
+
from mcli.workflow.politician_trading.scrapers_corporate_registry import (
|
|
162
|
+
UKCompaniesHouseScraper,
|
|
163
|
+
)
|
|
169
164
|
|
|
170
165
|
# Create log capture
|
|
171
166
|
log_stream = StringIO()
|
|
@@ -174,7 +169,9 @@ def run_uk_companies_house_scraper(
|
|
|
174
169
|
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
|
175
170
|
handler.setFormatter(formatter)
|
|
176
171
|
|
|
177
|
-
scraper_logger = logging.getLogger(
|
|
172
|
+
scraper_logger = logging.getLogger(
|
|
173
|
+
"mcli.workflow.politician_trading.scrapers_corporate_registry"
|
|
174
|
+
)
|
|
178
175
|
scraper_logger.addHandler(handler)
|
|
179
176
|
|
|
180
177
|
# Create progress containers
|
|
@@ -220,11 +217,7 @@ def run_uk_companies_house_scraper(
|
|
|
220
217
|
progress_bar.progress(30 + int((i + 1) / len(companies) * 50))
|
|
221
218
|
|
|
222
219
|
# Display logs
|
|
223
|
-
log_container.text_area(
|
|
224
|
-
"Scraper Logs",
|
|
225
|
-
log_stream.getvalue(),
|
|
226
|
-
height=200
|
|
227
|
-
)
|
|
220
|
+
log_container.text_area("Scraper Logs", log_stream.getvalue(), height=200)
|
|
228
221
|
|
|
229
222
|
# Display results
|
|
230
223
|
with results_container:
|
|
@@ -237,40 +230,57 @@ def run_uk_companies_house_scraper(
|
|
|
237
230
|
|
|
238
231
|
# Show companies
|
|
239
232
|
st.markdown("#### Companies Found")
|
|
240
|
-
companies_df = pd.DataFrame(
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
233
|
+
companies_df = pd.DataFrame(
|
|
234
|
+
[
|
|
235
|
+
{
|
|
236
|
+
"Number": c.get("company_number"),
|
|
237
|
+
"Name": c.get("title"),
|
|
238
|
+
"Status": c.get("company_status"),
|
|
239
|
+
"Type": c.get("company_type"),
|
|
240
|
+
"Address": c.get("address_snippet", "")[:50],
|
|
241
|
+
}
|
|
242
|
+
for c in companies
|
|
243
|
+
]
|
|
244
|
+
)
|
|
247
245
|
st.dataframe(companies_df, use_container_width=True)
|
|
248
246
|
|
|
249
247
|
# Show officers
|
|
250
248
|
if all_officers:
|
|
251
249
|
st.markdown("#### Officers Found")
|
|
252
|
-
officers_df = pd.DataFrame(
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
250
|
+
officers_df = pd.DataFrame(
|
|
251
|
+
[
|
|
252
|
+
{
|
|
253
|
+
"Name": o.get("name"),
|
|
254
|
+
"Role": o.get("officer_role"),
|
|
255
|
+
"Appointed": o.get("appointed_on", ""),
|
|
256
|
+
"Nationality": o.get("nationality", ""),
|
|
257
|
+
"Occupation": o.get("occupation", ""),
|
|
258
|
+
}
|
|
259
|
+
for o in all_officers[:50]
|
|
260
|
+
]
|
|
261
|
+
) # Limit to 50 for display
|
|
259
262
|
st.dataframe(officers_df, use_container_width=True)
|
|
260
263
|
|
|
261
264
|
# Show PSC
|
|
262
265
|
if all_psc:
|
|
263
266
|
st.markdown("#### Persons with Significant Control")
|
|
264
|
-
psc_df = pd.DataFrame(
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
267
|
+
psc_df = pd.DataFrame(
|
|
268
|
+
[
|
|
269
|
+
{
|
|
270
|
+
"Name": p.get("name"),
|
|
271
|
+
"Kind": p.get("kind", "").replace("-", " ").title(),
|
|
272
|
+
"Control": ", ".join(p.get("natures_of_control", [])),
|
|
273
|
+
"Nationality": p.get("nationality", ""),
|
|
274
|
+
}
|
|
275
|
+
for p in all_psc[:50]
|
|
276
|
+
]
|
|
277
|
+
)
|
|
270
278
|
st.dataframe(psc_df, use_container_width=True)
|
|
271
279
|
|
|
272
280
|
progress_bar.progress(100)
|
|
273
|
-
status_container.success(
|
|
281
|
+
status_container.success(
|
|
282
|
+
f"✅ Scraping completed! Found {len(companies)} companies, {len(all_officers)} officers, {len(all_psc)} PSC"
|
|
283
|
+
)
|
|
274
284
|
|
|
275
285
|
# Save to database if requested
|
|
276
286
|
if save_to_db:
|
|
@@ -279,6 +289,7 @@ def run_uk_companies_house_scraper(
|
|
|
279
289
|
except Exception as e:
|
|
280
290
|
st.error(f"❌ Error: {e}")
|
|
281
291
|
import traceback
|
|
292
|
+
|
|
282
293
|
st.code(traceback.format_exc())
|
|
283
294
|
|
|
284
295
|
|
|
@@ -293,25 +304,18 @@ def show_info_financiere_scraper():
|
|
|
293
304
|
|
|
294
305
|
with col1:
|
|
295
306
|
query = st.text_input(
|
|
296
|
-
"Search Query (optional)",
|
|
297
|
-
value="",
|
|
298
|
-
help="Company name, ISIN, or leave blank for all"
|
|
307
|
+
"Search Query (optional)", value="", help="Company name, ISIN, or leave blank for all"
|
|
299
308
|
)
|
|
300
309
|
days_back = st.number_input(
|
|
301
310
|
"Days Back",
|
|
302
311
|
min_value=1,
|
|
303
312
|
max_value=365,
|
|
304
313
|
value=30,
|
|
305
|
-
help="How many days of history to fetch"
|
|
314
|
+
help="How many days of history to fetch",
|
|
306
315
|
)
|
|
307
316
|
|
|
308
317
|
with col2:
|
|
309
|
-
max_results = st.number_input(
|
|
310
|
-
"Max Results",
|
|
311
|
-
min_value=1,
|
|
312
|
-
max_value=100,
|
|
313
|
-
value=20
|
|
314
|
-
)
|
|
318
|
+
max_results = st.number_input("Max Results", min_value=1, max_value=100, value=20)
|
|
315
319
|
save_to_db = st.checkbox("Save to Database", value=False)
|
|
316
320
|
|
|
317
321
|
# Run scraper
|
|
@@ -319,15 +323,12 @@ def show_info_financiere_scraper():
|
|
|
319
323
|
run_info_financiere_scraper(query, days_back, max_results, save_to_db)
|
|
320
324
|
|
|
321
325
|
|
|
322
|
-
def run_info_financiere_scraper(
|
|
323
|
-
query: str,
|
|
324
|
-
days_back: int,
|
|
325
|
-
max_results: int,
|
|
326
|
-
save_to_db: bool
|
|
327
|
-
):
|
|
326
|
+
def run_info_financiere_scraper(query: str, days_back: int, max_results: int, save_to_db: bool):
|
|
328
327
|
"""Execute Info-Financière scraper"""
|
|
329
328
|
try:
|
|
330
|
-
from mcli.workflow.politician_trading.scrapers_corporate_registry import
|
|
329
|
+
from mcli.workflow.politician_trading.scrapers_corporate_registry import (
|
|
330
|
+
InfoFinanciereAPIScraper,
|
|
331
|
+
)
|
|
331
332
|
|
|
332
333
|
status_container = st.empty()
|
|
333
334
|
progress_bar = st.progress(0)
|
|
@@ -345,10 +346,7 @@ def run_info_financiere_scraper(
|
|
|
345
346
|
# Search publications
|
|
346
347
|
status_container.info(f"🔍 Searching publications ({from_date} to {to_date})...")
|
|
347
348
|
publications = scraper.search_publications(
|
|
348
|
-
query=query or None,
|
|
349
|
-
from_date=from_date,
|
|
350
|
-
to_date=to_date,
|
|
351
|
-
per_page=max_results
|
|
349
|
+
query=query or None, from_date=from_date, to_date=to_date, per_page=max_results
|
|
352
350
|
)
|
|
353
351
|
progress_bar.progress(80)
|
|
354
352
|
|
|
@@ -363,12 +361,17 @@ def run_info_financiere_scraper(
|
|
|
363
361
|
st.metric("Publications Found", len(publications))
|
|
364
362
|
|
|
365
363
|
# Show publications
|
|
366
|
-
pubs_df = pd.DataFrame(
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
364
|
+
pubs_df = pd.DataFrame(
|
|
365
|
+
[
|
|
366
|
+
{
|
|
367
|
+
"Date": p.get("publication_date", ""),
|
|
368
|
+
"Title": p.get("title", "")[:100],
|
|
369
|
+
"Type": p.get("publication_type", ""),
|
|
370
|
+
"Issuer": p.get("issuer_name", ""),
|
|
371
|
+
}
|
|
372
|
+
for p in publications
|
|
373
|
+
]
|
|
374
|
+
)
|
|
372
375
|
st.dataframe(pubs_df, use_container_width=True)
|
|
373
376
|
|
|
374
377
|
progress_bar.progress(100)
|
|
@@ -380,6 +383,7 @@ def run_info_financiere_scraper(
|
|
|
380
383
|
except Exception as e:
|
|
381
384
|
st.error(f"❌ Error: {e}")
|
|
382
385
|
import traceback
|
|
386
|
+
|
|
383
387
|
st.code(traceback.format_exc())
|
|
384
388
|
|
|
385
389
|
|
|
@@ -392,30 +396,23 @@ def show_opencorporates_scraper():
|
|
|
392
396
|
if api_key:
|
|
393
397
|
st.success("✅ API key configured")
|
|
394
398
|
else:
|
|
395
|
-
st.info(
|
|
399
|
+
st.info(
|
|
400
|
+
"ℹ️ No API key (free tier with rate limits). Get API key for better performance: https://opencorporates.com/api_accounts/new"
|
|
401
|
+
)
|
|
396
402
|
|
|
397
403
|
# Configuration
|
|
398
404
|
col1, col2 = st.columns(2)
|
|
399
405
|
|
|
400
406
|
with col1:
|
|
401
|
-
query = st.text_input(
|
|
402
|
-
"Company Name",
|
|
403
|
-
value="Apple",
|
|
404
|
-
help="Company name to search for"
|
|
405
|
-
)
|
|
407
|
+
query = st.text_input("Company Name", value="Apple", help="Company name to search for")
|
|
406
408
|
jurisdiction = st.selectbox(
|
|
407
409
|
"Jurisdiction (optional)",
|
|
408
410
|
["", "us_ca", "us_de", "us_ny", "gb", "de", "fr", "nl"],
|
|
409
|
-
help="Filter by jurisdiction code"
|
|
411
|
+
help="Filter by jurisdiction code",
|
|
410
412
|
)
|
|
411
413
|
|
|
412
414
|
with col2:
|
|
413
|
-
max_results = st.number_input(
|
|
414
|
-
"Max Results",
|
|
415
|
-
min_value=1,
|
|
416
|
-
max_value=100,
|
|
417
|
-
value=10
|
|
418
|
-
)
|
|
415
|
+
max_results = st.number_input("Max Results", min_value=1, max_value=100, value=10)
|
|
419
416
|
save_to_db = st.checkbox("Save to Database", value=False)
|
|
420
417
|
|
|
421
418
|
# Run scraper
|
|
@@ -423,15 +420,12 @@ def show_opencorporates_scraper():
|
|
|
423
420
|
run_opencorporates_scraper(query, jurisdiction or None, max_results, save_to_db)
|
|
424
421
|
|
|
425
422
|
|
|
426
|
-
def run_opencorporates_scraper(
|
|
427
|
-
query: str,
|
|
428
|
-
jurisdiction: str,
|
|
429
|
-
max_results: int,
|
|
430
|
-
save_to_db: bool
|
|
431
|
-
):
|
|
423
|
+
def run_opencorporates_scraper(query: str, jurisdiction: str, max_results: int, save_to_db: bool):
|
|
432
424
|
"""Execute OpenCorporates scraper"""
|
|
433
425
|
try:
|
|
434
|
-
from mcli.workflow.politician_trading.scrapers_corporate_registry import
|
|
426
|
+
from mcli.workflow.politician_trading.scrapers_corporate_registry import (
|
|
427
|
+
OpenCorporatesScraper,
|
|
428
|
+
)
|
|
435
429
|
|
|
436
430
|
status_container = st.empty()
|
|
437
431
|
progress_bar = st.progress(0)
|
|
@@ -445,9 +439,7 @@ def run_opencorporates_scraper(
|
|
|
445
439
|
# Search companies
|
|
446
440
|
status_container.info(f"🔍 Searching for '{query}'...")
|
|
447
441
|
companies = scraper.search_companies(
|
|
448
|
-
query,
|
|
449
|
-
jurisdiction_code=jurisdiction,
|
|
450
|
-
per_page=max_results
|
|
442
|
+
query, jurisdiction_code=jurisdiction, per_page=max_results
|
|
451
443
|
)
|
|
452
444
|
progress_bar.progress(80)
|
|
453
445
|
|
|
@@ -462,13 +454,18 @@ def run_opencorporates_scraper(
|
|
|
462
454
|
st.metric("Companies Found", len(companies))
|
|
463
455
|
|
|
464
456
|
# Show companies
|
|
465
|
-
companies_df = pd.DataFrame(
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
457
|
+
companies_df = pd.DataFrame(
|
|
458
|
+
[
|
|
459
|
+
{
|
|
460
|
+
"Jurisdiction": c.get("company", {}).get("jurisdiction_code", ""),
|
|
461
|
+
"Number": c.get("company", {}).get("company_number", ""),
|
|
462
|
+
"Name": c.get("company", {}).get("name", ""),
|
|
463
|
+
"Status": c.get("company", {}).get("current_status", ""),
|
|
464
|
+
"Type": c.get("company", {}).get("company_type", ""),
|
|
465
|
+
}
|
|
466
|
+
for c in companies
|
|
467
|
+
]
|
|
468
|
+
)
|
|
472
469
|
st.dataframe(companies_df, use_container_width=True)
|
|
473
470
|
|
|
474
471
|
progress_bar.progress(100)
|
|
@@ -477,6 +474,7 @@ def run_opencorporates_scraper(
|
|
|
477
474
|
except Exception as e:
|
|
478
475
|
st.error(f"❌ Error: {e}")
|
|
479
476
|
import traceback
|
|
477
|
+
|
|
480
478
|
st.code(traceback.format_exc())
|
|
481
479
|
|
|
482
480
|
|
|
@@ -493,22 +491,12 @@ def show_xbrl_filings_scraper():
|
|
|
493
491
|
country = st.selectbox(
|
|
494
492
|
"Country (optional)",
|
|
495
493
|
["", "GB", "FR", "DE", "ES", "IT", "NL", "BE"],
|
|
496
|
-
help="Filter by country code"
|
|
497
|
-
)
|
|
498
|
-
days_back = st.number_input(
|
|
499
|
-
"Days Back",
|
|
500
|
-
min_value=1,
|
|
501
|
-
max_value=365,
|
|
502
|
-
value=30
|
|
494
|
+
help="Filter by country code",
|
|
503
495
|
)
|
|
496
|
+
days_back = st.number_input("Days Back", min_value=1, max_value=365, value=30)
|
|
504
497
|
|
|
505
498
|
with col2:
|
|
506
|
-
max_results = st.number_input(
|
|
507
|
-
"Max Results",
|
|
508
|
-
min_value=1,
|
|
509
|
-
max_value=500,
|
|
510
|
-
value=100
|
|
511
|
-
)
|
|
499
|
+
max_results = st.number_input("Max Results", min_value=1, max_value=500, value=100)
|
|
512
500
|
save_to_db = st.checkbox("Save to Database", value=False)
|
|
513
501
|
|
|
514
502
|
# Run scraper
|
|
@@ -516,12 +504,7 @@ def show_xbrl_filings_scraper():
|
|
|
516
504
|
run_xbrl_filings_scraper(country or None, days_back, max_results, save_to_db)
|
|
517
505
|
|
|
518
506
|
|
|
519
|
-
def run_xbrl_filings_scraper(
|
|
520
|
-
country: str,
|
|
521
|
-
days_back: int,
|
|
522
|
-
max_results: int,
|
|
523
|
-
save_to_db: bool
|
|
524
|
-
):
|
|
507
|
+
def run_xbrl_filings_scraper(country: str, days_back: int, max_results: int, save_to_db: bool):
|
|
525
508
|
"""Execute XBRL Filings scraper"""
|
|
526
509
|
try:
|
|
527
510
|
from mcli.workflow.politician_trading.scrapers_corporate_registry import XBRLFilingsScraper
|
|
@@ -540,11 +523,7 @@ def run_xbrl_filings_scraper(
|
|
|
540
523
|
|
|
541
524
|
# Get filings
|
|
542
525
|
status_container.info(f"🔍 Fetching XBRL filings since {from_date}...")
|
|
543
|
-
filings = scraper.get_filings(
|
|
544
|
-
country=country,
|
|
545
|
-
from_date=from_date,
|
|
546
|
-
page_size=max_results
|
|
547
|
-
)
|
|
526
|
+
filings = scraper.get_filings(country=country, from_date=from_date, page_size=max_results)
|
|
548
527
|
progress_bar.progress(80)
|
|
549
528
|
|
|
550
529
|
# Display results
|
|
@@ -558,13 +537,18 @@ def run_xbrl_filings_scraper(
|
|
|
558
537
|
st.metric("Filings Found", len(filings))
|
|
559
538
|
|
|
560
539
|
# Show filings
|
|
561
|
-
filings_df = pd.DataFrame(
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
540
|
+
filings_df = pd.DataFrame(
|
|
541
|
+
[
|
|
542
|
+
{
|
|
543
|
+
"ID": f.get("id", ""),
|
|
544
|
+
"Country": f.get("attributes", {}).get("country", ""),
|
|
545
|
+
"Entity": f.get("attributes", {}).get("entity_name", "")[:50],
|
|
546
|
+
"Period": f.get("attributes", {}).get("period_end", ""),
|
|
547
|
+
"Date Added": f.get("attributes", {}).get("date_added", ""),
|
|
548
|
+
}
|
|
549
|
+
for f in filings
|
|
550
|
+
]
|
|
551
|
+
)
|
|
568
552
|
st.dataframe(filings_df, use_container_width=True)
|
|
569
553
|
|
|
570
554
|
progress_bar.progress(100)
|
|
@@ -573,6 +557,7 @@ def run_xbrl_filings_scraper(
|
|
|
573
557
|
except Exception as e:
|
|
574
558
|
st.error(f"❌ Error: {e}")
|
|
575
559
|
import traceback
|
|
560
|
+
|
|
576
561
|
st.code(traceback.format_exc())
|
|
577
562
|
|
|
578
563
|
|
|
@@ -584,13 +569,15 @@ def show_xbrl_us_scraper():
|
|
|
584
569
|
|
|
585
570
|
if not api_key:
|
|
586
571
|
st.error("❌ XBRL US API key not configured")
|
|
587
|
-
st.info(
|
|
572
|
+
st.info(
|
|
573
|
+
"""
|
|
588
574
|
To use this scraper, set `XBRL_US_API_KEY` in:
|
|
589
575
|
- Streamlit Cloud: Settings → Secrets
|
|
590
576
|
- Local: .streamlit/secrets.toml or environment variable
|
|
591
577
|
|
|
592
578
|
Get free API key: https://xbrl.us/home/use/xbrl-api/
|
|
593
|
-
"""
|
|
579
|
+
"""
|
|
580
|
+
)
|
|
594
581
|
return
|
|
595
582
|
|
|
596
583
|
st.success("✅ API key configured")
|
|
@@ -600,18 +587,11 @@ def show_xbrl_us_scraper():
|
|
|
600
587
|
|
|
601
588
|
with col1:
|
|
602
589
|
query = st.text_input(
|
|
603
|
-
"Company Name or Ticker",
|
|
604
|
-
value="Tesla",
|
|
605
|
-
help="Search by company name or stock ticker"
|
|
590
|
+
"Company Name or Ticker", value="Tesla", help="Search by company name or stock ticker"
|
|
606
591
|
)
|
|
607
592
|
|
|
608
593
|
with col2:
|
|
609
|
-
max_results = st.number_input(
|
|
610
|
-
"Max Results",
|
|
611
|
-
min_value=1,
|
|
612
|
-
max_value=100,
|
|
613
|
-
value=10
|
|
614
|
-
)
|
|
594
|
+
max_results = st.number_input("Max Results", min_value=1, max_value=100, value=10)
|
|
615
595
|
save_to_db = st.checkbox("Save to Database", value=False)
|
|
616
596
|
|
|
617
597
|
# Run scraper
|
|
@@ -619,11 +599,7 @@ def show_xbrl_us_scraper():
|
|
|
619
599
|
run_xbrl_us_scraper(query, max_results, save_to_db)
|
|
620
600
|
|
|
621
601
|
|
|
622
|
-
def run_xbrl_us_scraper(
|
|
623
|
-
query: str,
|
|
624
|
-
max_results: int,
|
|
625
|
-
save_to_db: bool
|
|
626
|
-
):
|
|
602
|
+
def run_xbrl_us_scraper(query: str, max_results: int, save_to_db: bool):
|
|
627
603
|
"""Execute XBRL US scraper"""
|
|
628
604
|
try:
|
|
629
605
|
from mcli.workflow.politician_trading.scrapers_corporate_registry import XBRLUSScraper
|
|
@@ -653,12 +629,17 @@ def run_xbrl_us_scraper(
|
|
|
653
629
|
st.metric("Entities Found", len(entities))
|
|
654
630
|
|
|
655
631
|
# Show entities
|
|
656
|
-
entities_df = pd.DataFrame(
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
632
|
+
entities_df = pd.DataFrame(
|
|
633
|
+
[
|
|
634
|
+
{
|
|
635
|
+
"ID": e.get("entity", {}).get("id", ""),
|
|
636
|
+
"Name": e.get("entity", {}).get("name", ""),
|
|
637
|
+
"CIK": e.get("entity", {}).get("cik", ""),
|
|
638
|
+
"Ticker": e.get("entity", {}).get("ticker", ""),
|
|
639
|
+
}
|
|
640
|
+
for e in entities
|
|
641
|
+
]
|
|
642
|
+
)
|
|
662
643
|
st.dataframe(entities_df, use_container_width=True)
|
|
663
644
|
|
|
664
645
|
progress_bar.progress(100)
|
|
@@ -667,6 +648,7 @@ def run_xbrl_us_scraper(
|
|
|
667
648
|
except Exception as e:
|
|
668
649
|
st.error(f"❌ Error: {e}")
|
|
669
650
|
import traceback
|
|
651
|
+
|
|
670
652
|
st.code(traceback.format_exc())
|
|
671
653
|
|
|
672
654
|
|
|
@@ -682,11 +664,7 @@ def show_senate_watcher_scraper():
|
|
|
682
664
|
with col1:
|
|
683
665
|
recent_only = st.checkbox("Recent Only", value=True)
|
|
684
666
|
days_back = st.number_input(
|
|
685
|
-
"Days Back (if recent)",
|
|
686
|
-
min_value=1,
|
|
687
|
-
max_value=365,
|
|
688
|
-
value=90,
|
|
689
|
-
disabled=not recent_only
|
|
667
|
+
"Days Back (if recent)", min_value=1, max_value=365, value=90, disabled=not recent_only
|
|
690
668
|
)
|
|
691
669
|
|
|
692
670
|
with col2:
|
|
@@ -697,11 +675,7 @@ def show_senate_watcher_scraper():
|
|
|
697
675
|
run_senate_watcher_scraper(recent_only, days_back, save_to_db)
|
|
698
676
|
|
|
699
677
|
|
|
700
|
-
def run_senate_watcher_scraper(
|
|
701
|
-
recent_only: bool,
|
|
702
|
-
days_back: int,
|
|
703
|
-
save_to_db: bool
|
|
704
|
-
):
|
|
678
|
+
def run_senate_watcher_scraper(recent_only: bool, days_back: int, save_to_db: bool):
|
|
705
679
|
"""Execute Senate Stock Watcher scraper"""
|
|
706
680
|
try:
|
|
707
681
|
from mcli.workflow.politician_trading.scrapers_free_sources import FreeDataFetcher
|
|
@@ -734,19 +708,30 @@ def run_senate_watcher_scraper(
|
|
|
734
708
|
# Show disclosures
|
|
735
709
|
if disclosures:
|
|
736
710
|
st.markdown("#### Recent Trading Disclosures")
|
|
737
|
-
disc_df = pd.DataFrame(
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
711
|
+
disc_df = pd.DataFrame(
|
|
712
|
+
[
|
|
713
|
+
{
|
|
714
|
+
"Date": (
|
|
715
|
+
d.transaction_date.strftime("%Y-%m-%d")
|
|
716
|
+
if hasattr(d.transaction_date, "strftime")
|
|
717
|
+
else str(d.transaction_date)
|
|
718
|
+
),
|
|
719
|
+
"Ticker": d.asset_ticker or "—",
|
|
720
|
+
"Asset": d.asset_name[:50],
|
|
721
|
+
"Type": d.transaction_type,
|
|
722
|
+
"Politician": d.politician_bioguide_id,
|
|
723
|
+
"Min": f"${d.amount_range_min:,.0f}" if d.amount_range_min else "",
|
|
724
|
+
"Max": f"${d.amount_range_max:,.0f}" if d.amount_range_max else "",
|
|
725
|
+
}
|
|
726
|
+
for d in disclosures[:100]
|
|
727
|
+
]
|
|
728
|
+
) # Limit to 100 for display
|
|
746
729
|
st.dataframe(disc_df, use_container_width=True)
|
|
747
730
|
|
|
748
731
|
progress_bar.progress(100)
|
|
749
|
-
status_container.success(
|
|
732
|
+
status_container.success(
|
|
733
|
+
f"✅ Scraping completed! Found {len(politicians)} politicians, {len(disclosures)} disclosures"
|
|
734
|
+
)
|
|
750
735
|
|
|
751
736
|
if save_to_db:
|
|
752
737
|
save_politician_trading_to_db(politicians, disclosures)
|
|
@@ -754,6 +739,7 @@ def run_senate_watcher_scraper(
|
|
|
754
739
|
except Exception as e:
|
|
755
740
|
st.error(f"❌ Error: {e}")
|
|
756
741
|
import traceback
|
|
742
|
+
|
|
757
743
|
st.code(traceback.format_exc())
|
|
758
744
|
|
|
759
745
|
|
|
@@ -779,9 +765,11 @@ def show_scraper_logs():
|
|
|
779
765
|
"""Display scraper logs"""
|
|
780
766
|
st.subheader("📊 Scraper Logs")
|
|
781
767
|
|
|
782
|
-
st.markdown(
|
|
768
|
+
st.markdown(
|
|
769
|
+
"""
|
|
783
770
|
View real-time logs from scraping operations and data pull jobs.
|
|
784
|
-
"""
|
|
771
|
+
"""
|
|
772
|
+
)
|
|
785
773
|
|
|
786
774
|
# Get logs from Supabase data_pull_jobs
|
|
787
775
|
try:
|
|
@@ -791,7 +779,13 @@ def show_scraper_logs():
|
|
|
791
779
|
|
|
792
780
|
if client:
|
|
793
781
|
# Get recent jobs
|
|
794
|
-
jobs =
|
|
782
|
+
jobs = (
|
|
783
|
+
client.table("data_pull_jobs")
|
|
784
|
+
.select("*")
|
|
785
|
+
.order("created_at", desc=True)
|
|
786
|
+
.limit(50)
|
|
787
|
+
.execute()
|
|
788
|
+
)
|
|
795
789
|
|
|
796
790
|
if jobs.data:
|
|
797
791
|
st.markdown("### Recent Data Pull Jobs")
|
|
@@ -799,19 +793,33 @@ def show_scraper_logs():
|
|
|
799
793
|
jobs_df = pd.DataFrame(jobs.data)
|
|
800
794
|
|
|
801
795
|
# Format dates
|
|
802
|
-
for col in [
|
|
796
|
+
for col in ["started_at", "completed_at", "created_at"]:
|
|
803
797
|
if col in jobs_df.columns:
|
|
804
|
-
jobs_df[col] = pd.to_datetime(
|
|
798
|
+
jobs_df[col] = pd.to_datetime(
|
|
799
|
+
jobs_df[col], format="ISO8601", errors="coerce"
|
|
800
|
+
)
|
|
805
801
|
|
|
806
802
|
# Display jobs table
|
|
807
|
-
display_df = jobs_df[
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
803
|
+
display_df = jobs_df[
|
|
804
|
+
[
|
|
805
|
+
"created_at",
|
|
806
|
+
"job_type",
|
|
807
|
+
"status",
|
|
808
|
+
"records_found",
|
|
809
|
+
"records_new",
|
|
810
|
+
"records_updated",
|
|
811
|
+
"records_failed",
|
|
812
|
+
]
|
|
813
|
+
].copy()
|
|
811
814
|
|
|
812
815
|
display_df.columns = [
|
|
813
|
-
|
|
814
|
-
|
|
816
|
+
"Timestamp",
|
|
817
|
+
"Job Type",
|
|
818
|
+
"Status",
|
|
819
|
+
"Found",
|
|
820
|
+
"New",
|
|
821
|
+
"Updated",
|
|
822
|
+
"Failed",
|
|
815
823
|
]
|
|
816
824
|
|
|
817
825
|
st.dataframe(display_df, use_container_width=True)
|
|
@@ -821,26 +829,26 @@ def show_scraper_logs():
|
|
|
821
829
|
|
|
822
830
|
selected_job = st.selectbox(
|
|
823
831
|
"Select Job",
|
|
824
|
-
jobs_df[
|
|
825
|
-
format_func=lambda x: f"{jobs_df[jobs_df['id']==x]['job_type'].values[0]} - {jobs_df[jobs_df['id']==x]['created_at'].values[0]}"
|
|
832
|
+
jobs_df["id"].tolist(),
|
|
833
|
+
format_func=lambda x: f"{jobs_df[jobs_df['id']==x]['job_type'].values[0]} - {jobs_df[jobs_df['id']==x]['created_at'].values[0]}",
|
|
826
834
|
)
|
|
827
835
|
|
|
828
836
|
if selected_job:
|
|
829
|
-
job = jobs_df[jobs_df[
|
|
837
|
+
job = jobs_df[jobs_df["id"] == selected_job].iloc[0]
|
|
830
838
|
|
|
831
839
|
col1, col2, col3, col4 = st.columns(4)
|
|
832
|
-
col1.metric("Status", job[
|
|
833
|
-
col2.metric("Records Found", job[
|
|
834
|
-
col3.metric("New Records", job[
|
|
835
|
-
col4.metric("Failed", job[
|
|
840
|
+
col1.metric("Status", job["status"])
|
|
841
|
+
col2.metric("Records Found", job["records_found"])
|
|
842
|
+
col3.metric("New Records", job["records_new"])
|
|
843
|
+
col4.metric("Failed", job["records_failed"])
|
|
836
844
|
|
|
837
|
-
if job.get(
|
|
845
|
+
if job.get("error_message"):
|
|
838
846
|
st.error(f"**Error:** {job['error_message']}")
|
|
839
847
|
|
|
840
848
|
# Show config snapshot
|
|
841
|
-
if job.get(
|
|
849
|
+
if job.get("config_snapshot"):
|
|
842
850
|
with st.expander("Configuration Snapshot"):
|
|
843
|
-
st.json(job[
|
|
851
|
+
st.json(job["config_snapshot"])
|
|
844
852
|
|
|
845
853
|
else:
|
|
846
854
|
st.info("No jobs found in database")
|
|
@@ -856,33 +864,29 @@ def show_system_logs():
|
|
|
856
864
|
"""Display system logs"""
|
|
857
865
|
st.subheader("📝 System Logs")
|
|
858
866
|
|
|
859
|
-
st.markdown(
|
|
867
|
+
st.markdown(
|
|
868
|
+
"""
|
|
860
869
|
View application logs, errors, and system events.
|
|
861
|
-
"""
|
|
870
|
+
"""
|
|
871
|
+
)
|
|
862
872
|
|
|
863
873
|
# Log file path
|
|
864
874
|
log_file = Path("/tmp/seed_database.log")
|
|
865
875
|
|
|
866
876
|
if log_file.exists():
|
|
867
877
|
try:
|
|
868
|
-
with open(log_file,
|
|
878
|
+
with open(log_file, "r") as f:
|
|
869
879
|
logs = f.readlines()
|
|
870
880
|
|
|
871
881
|
# Filter options
|
|
872
882
|
col1, col2, col3 = st.columns(3)
|
|
873
883
|
|
|
874
884
|
with col1:
|
|
875
|
-
log_level = st.selectbox(
|
|
876
|
-
"Log Level",
|
|
877
|
-
["ALL", "ERROR", "WARNING", "INFO", "DEBUG"]
|
|
878
|
-
)
|
|
885
|
+
log_level = st.selectbox("Log Level", ["ALL", "ERROR", "WARNING", "INFO", "DEBUG"])
|
|
879
886
|
|
|
880
887
|
with col2:
|
|
881
888
|
lines_to_show = st.number_input(
|
|
882
|
-
"Lines to Show",
|
|
883
|
-
min_value=10,
|
|
884
|
-
max_value=1000,
|
|
885
|
-
value=100
|
|
889
|
+
"Lines to Show", min_value=10, max_value=1000, value=100
|
|
886
890
|
)
|
|
887
891
|
|
|
888
892
|
with col3:
|
|
@@ -898,51 +902,54 @@ def show_system_logs():
|
|
|
898
902
|
filtered_logs = [l for l in filtered_logs if search_term.lower() in l.lower()]
|
|
899
903
|
|
|
900
904
|
# Display logs
|
|
901
|
-
st.text_area(
|
|
902
|
-
"Log Output",
|
|
903
|
-
"".join(filtered_logs),
|
|
904
|
-
height=400
|
|
905
|
-
)
|
|
905
|
+
st.text_area("Log Output", "".join(filtered_logs), height=400)
|
|
906
906
|
|
|
907
907
|
# Download button
|
|
908
908
|
st.download_button(
|
|
909
909
|
"Download Full Logs",
|
|
910
910
|
"".join(logs),
|
|
911
911
|
file_name=f"system_logs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
|
|
912
|
-
mime="text/plain"
|
|
912
|
+
mime="text/plain",
|
|
913
913
|
)
|
|
914
914
|
|
|
915
915
|
except Exception as e:
|
|
916
916
|
st.error(f"Error reading log file: {e}")
|
|
917
917
|
else:
|
|
918
918
|
st.info("📋 **No logs available yet**")
|
|
919
|
-
st.markdown(
|
|
919
|
+
st.markdown(
|
|
920
|
+
"""
|
|
920
921
|
System logs will appear here automatically after scraping jobs run.
|
|
921
922
|
|
|
922
923
|
**To generate logs:**
|
|
923
924
|
- Use the "Manual Scrapers" section above to run a data pull
|
|
924
925
|
- Wait for automated jobs to execute
|
|
925
926
|
- Logs will be stored in: `/tmp/seed_database.log`
|
|
926
|
-
"""
|
|
927
|
+
"""
|
|
928
|
+
)
|
|
927
929
|
|
|
928
930
|
# Create example logs display
|
|
929
931
|
st.markdown("### 📝 Example Log Output")
|
|
930
|
-
st.code(
|
|
932
|
+
st.code(
|
|
933
|
+
"""
|
|
931
934
|
2025-10-07 12:00:00 - INFO - Starting data pull job: senate_watcher_seed
|
|
932
935
|
2025-10-07 12:00:05 - INFO - Fetched 8350 Senate transactions
|
|
933
936
|
2025-10-07 12:00:10 - INFO - Upserted 89 politicians (5 new, 84 updated)
|
|
934
937
|
2025-10-07 12:01:30 - INFO - Upserted 8350 disclosures (6353 new, 1893 updated, 104 failed)
|
|
935
938
|
2025-10-07 12:01:31 - INFO - Job completed successfully
|
|
936
|
-
""",
|
|
939
|
+
""",
|
|
940
|
+
language="log",
|
|
941
|
+
)
|
|
937
942
|
|
|
938
943
|
|
|
939
944
|
def show_job_history():
|
|
940
945
|
"""Display job history and statistics"""
|
|
941
946
|
st.subheader("📈 Job History & Statistics")
|
|
942
947
|
|
|
943
|
-
st.markdown(
|
|
948
|
+
st.markdown(
|
|
949
|
+
"""
|
|
944
950
|
View historical data about scraping jobs, success rates, and trends.
|
|
945
|
-
"""
|
|
951
|
+
"""
|
|
952
|
+
)
|
|
946
953
|
|
|
947
954
|
try:
|
|
948
955
|
from mcli.ml.dashboard.app_integrated import get_supabase_client
|
|
@@ -951,15 +958,23 @@ def show_job_history():
|
|
|
951
958
|
|
|
952
959
|
if client:
|
|
953
960
|
# Get all jobs
|
|
954
|
-
jobs =
|
|
961
|
+
jobs = (
|
|
962
|
+
client.table("data_pull_jobs")
|
|
963
|
+
.select("*")
|
|
964
|
+
.order("created_at", desc=True)
|
|
965
|
+
.limit(1000)
|
|
966
|
+
.execute()
|
|
967
|
+
)
|
|
955
968
|
|
|
956
969
|
if jobs.data and len(jobs.data) > 0:
|
|
957
970
|
jobs_df = pd.DataFrame(jobs.data)
|
|
958
971
|
|
|
959
972
|
# Format dates
|
|
960
|
-
for col in [
|
|
973
|
+
for col in ["started_at", "completed_at", "created_at"]:
|
|
961
974
|
if col in jobs_df.columns:
|
|
962
|
-
jobs_df[col] = pd.to_datetime(
|
|
975
|
+
jobs_df[col] = pd.to_datetime(
|
|
976
|
+
jobs_df[col], format="ISO8601", errors="coerce"
|
|
977
|
+
)
|
|
963
978
|
|
|
964
979
|
# Statistics
|
|
965
980
|
st.markdown("### Overall Statistics")
|
|
@@ -967,8 +982,8 @@ def show_job_history():
|
|
|
967
982
|
col1, col2, col3, col4 = st.columns(4)
|
|
968
983
|
|
|
969
984
|
total_jobs = len(jobs_df)
|
|
970
|
-
completed_jobs = len(jobs_df[jobs_df[
|
|
971
|
-
failed_jobs = len(jobs_df[jobs_df[
|
|
985
|
+
completed_jobs = len(jobs_df[jobs_df["status"] == "completed"])
|
|
986
|
+
failed_jobs = len(jobs_df[jobs_df["status"] == "failed"])
|
|
972
987
|
success_rate = (completed_jobs / total_jobs * 100) if total_jobs > 0 else 0
|
|
973
988
|
|
|
974
989
|
col1.metric("Total Jobs", total_jobs)
|
|
@@ -979,84 +994,94 @@ def show_job_history():
|
|
|
979
994
|
# Job type breakdown
|
|
980
995
|
st.markdown("### Job Type Breakdown")
|
|
981
996
|
|
|
982
|
-
job_type_counts = jobs_df[
|
|
997
|
+
job_type_counts = jobs_df["job_type"].value_counts()
|
|
983
998
|
|
|
984
999
|
fig = px.pie(
|
|
985
|
-
values=job_type_counts.values,
|
|
986
|
-
names=job_type_counts.index,
|
|
987
|
-
title="Jobs by Type"
|
|
1000
|
+
values=job_type_counts.values, names=job_type_counts.index, title="Jobs by Type"
|
|
988
1001
|
)
|
|
989
1002
|
st.plotly_chart(fig, config={"displayModeBar": True}, use_container_width=True)
|
|
990
1003
|
|
|
991
1004
|
# Status breakdown
|
|
992
1005
|
st.markdown("### Status Breakdown")
|
|
993
1006
|
|
|
994
|
-
status_counts = jobs_df[
|
|
1007
|
+
status_counts = jobs_df["status"].value_counts()
|
|
995
1008
|
|
|
996
1009
|
fig = px.bar(
|
|
997
1010
|
x=status_counts.index,
|
|
998
1011
|
y=status_counts.values,
|
|
999
|
-
labels={
|
|
1000
|
-
title="Jobs by Status"
|
|
1012
|
+
labels={"x": "Status", "y": "Count"},
|
|
1013
|
+
title="Jobs by Status",
|
|
1001
1014
|
)
|
|
1002
1015
|
st.plotly_chart(fig, config={"displayModeBar": True}, use_container_width=True)
|
|
1003
1016
|
|
|
1004
1017
|
# Timeline
|
|
1005
1018
|
st.markdown("### Job Timeline")
|
|
1006
1019
|
|
|
1007
|
-
jobs_df[
|
|
1020
|
+
jobs_df["date"] = jobs_df["created_at"].dt.date
|
|
1008
1021
|
|
|
1009
|
-
timeline_df = jobs_df.groupby([
|
|
1022
|
+
timeline_df = jobs_df.groupby(["date", "status"]).size().reset_index(name="count")
|
|
1010
1023
|
|
|
1011
1024
|
fig = px.line(
|
|
1012
|
-
timeline_df,
|
|
1013
|
-
x='date',
|
|
1014
|
-
y='count',
|
|
1015
|
-
color='status',
|
|
1016
|
-
title="Jobs Over Time"
|
|
1025
|
+
timeline_df, x="date", y="count", color="status", title="Jobs Over Time"
|
|
1017
1026
|
)
|
|
1018
1027
|
st.plotly_chart(fig, config={"displayModeBar": True}, use_container_width=True)
|
|
1019
1028
|
|
|
1020
1029
|
# Records processed
|
|
1021
1030
|
st.markdown("### Records Processed")
|
|
1022
1031
|
|
|
1023
|
-
records_df = jobs_df[jobs_df[
|
|
1032
|
+
records_df = jobs_df[jobs_df["status"] == "completed"][
|
|
1033
|
+
[
|
|
1034
|
+
"created_at",
|
|
1035
|
+
"records_found",
|
|
1036
|
+
"records_new",
|
|
1037
|
+
"records_updated",
|
|
1038
|
+
"records_failed",
|
|
1039
|
+
]
|
|
1040
|
+
].copy()
|
|
1024
1041
|
|
|
1025
1042
|
if not records_df.empty:
|
|
1026
1043
|
fig = go.Figure()
|
|
1027
1044
|
|
|
1028
|
-
fig.add_trace(
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1045
|
+
fig.add_trace(
|
|
1046
|
+
go.Scatter(
|
|
1047
|
+
x=records_df["created_at"],
|
|
1048
|
+
y=records_df["records_new"],
|
|
1049
|
+
name="New Records",
|
|
1050
|
+
mode="lines+markers",
|
|
1051
|
+
)
|
|
1052
|
+
)
|
|
1053
|
+
|
|
1054
|
+
fig.add_trace(
|
|
1055
|
+
go.Scatter(
|
|
1056
|
+
x=records_df["created_at"],
|
|
1057
|
+
y=records_df["records_updated"],
|
|
1058
|
+
name="Updated Records",
|
|
1059
|
+
mode="lines+markers",
|
|
1060
|
+
)
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
fig.add_trace(
|
|
1064
|
+
go.Scatter(
|
|
1065
|
+
x=records_df["created_at"],
|
|
1066
|
+
y=records_df["records_failed"],
|
|
1067
|
+
name="Failed Records",
|
|
1068
|
+
mode="lines+markers",
|
|
1069
|
+
)
|
|
1070
|
+
)
|
|
1048
1071
|
|
|
1049
1072
|
fig.update_layout(
|
|
1050
1073
|
title="Records Processed Over Time",
|
|
1051
1074
|
xaxis_title="Date",
|
|
1052
1075
|
yaxis_title="Count",
|
|
1053
|
-
hovermode=
|
|
1076
|
+
hovermode="x unified",
|
|
1054
1077
|
)
|
|
1055
1078
|
|
|
1056
1079
|
st.plotly_chart(fig, config={"displayModeBar": True}, use_container_width=True)
|
|
1057
1080
|
|
|
1058
1081
|
else:
|
|
1059
|
-
st.info(
|
|
1082
|
+
st.info(
|
|
1083
|
+
"No job history available yet. Run some scraping jobs to see statistics here."
|
|
1084
|
+
)
|
|
1060
1085
|
|
|
1061
1086
|
else:
|
|
1062
1087
|
st.warning("Supabase not connected - job history unavailable")
|
|
@@ -1064,6 +1089,7 @@ def show_job_history():
|
|
|
1064
1089
|
except Exception as e:
|
|
1065
1090
|
st.error(f"Error loading job history: {e}")
|
|
1066
1091
|
import traceback
|
|
1092
|
+
|
|
1067
1093
|
st.code(traceback.format_exc())
|
|
1068
1094
|
|
|
1069
1095
|
|