mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +46 -13
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +69 -58
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +283 -152
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +235 -0
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +38 -18
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
|
@@ -73,24 +73,24 @@ class ScrapingConfig:
|
|
|
73
73
|
def get_active_sources(self):
|
|
74
74
|
"""Get all active data sources based on configuration"""
|
|
75
75
|
from .data_sources import ALL_DATA_SOURCES
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
active_sources = []
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
if self.enable_us_federal:
|
|
80
80
|
active_sources.extend(ALL_DATA_SOURCES["us_federal"])
|
|
81
|
-
|
|
81
|
+
|
|
82
82
|
if self.enable_us_states:
|
|
83
83
|
active_sources.extend(ALL_DATA_SOURCES["us_states"])
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
if self.enable_eu_parliament:
|
|
86
86
|
active_sources.extend(ALL_DATA_SOURCES["eu_parliament"])
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
if self.enable_eu_national:
|
|
89
89
|
active_sources.extend(ALL_DATA_SOURCES["eu_national"])
|
|
90
|
-
|
|
90
|
+
|
|
91
91
|
if self.enable_third_party:
|
|
92
92
|
active_sources.extend(ALL_DATA_SOURCES["third_party"])
|
|
93
|
-
|
|
93
|
+
|
|
94
94
|
# Filter to only active status sources
|
|
95
95
|
return [source for source in active_sources if source.status == "active"]
|
|
96
96
|
|
|
@@ -4,19 +4,19 @@ Continuous Supabase connectivity validation and monitoring
|
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import json
|
|
7
|
+
import logging
|
|
7
8
|
import time
|
|
8
9
|
from datetime import datetime, timedelta
|
|
9
|
-
from typing import
|
|
10
|
-
import logging
|
|
10
|
+
from typing import Any, Dict, Optional
|
|
11
11
|
|
|
12
12
|
from rich.console import Console
|
|
13
13
|
from rich.live import Live
|
|
14
|
-
from rich.table import Table
|
|
15
14
|
from rich.panel import Panel
|
|
16
|
-
from rich.progress import Progress, SpinnerColumn, TextColumn,
|
|
15
|
+
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
|
|
16
|
+
from rich.table import Table
|
|
17
17
|
|
|
18
|
-
from .database import PoliticianTradingDB
|
|
19
18
|
from .config import WorkflowConfig
|
|
19
|
+
from .database import PoliticianTradingDB
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
console = Console()
|
|
@@ -100,13 +100,14 @@ class SupabaseConnectivityValidator:
|
|
|
100
100
|
try:
|
|
101
101
|
# Test basic REST API connectivity instead of RPC
|
|
102
102
|
import httpx
|
|
103
|
+
|
|
103
104
|
async with httpx.AsyncClient() as client:
|
|
104
105
|
response = await client.get(
|
|
105
|
-
self.config.supabase.url +
|
|
106
|
-
headers={
|
|
107
|
-
timeout=30.0
|
|
106
|
+
self.config.supabase.url + "/rest/v1/",
|
|
107
|
+
headers={"apikey": self.config.supabase.key},
|
|
108
|
+
timeout=30.0,
|
|
108
109
|
)
|
|
109
|
-
|
|
110
|
+
|
|
110
111
|
if response.status_code == 200:
|
|
111
112
|
return {
|
|
112
113
|
"success": True,
|
|
@@ -115,8 +116,8 @@ class SupabaseConnectivityValidator:
|
|
|
115
116
|
}
|
|
116
117
|
else:
|
|
117
118
|
return {
|
|
118
|
-
"success": False,
|
|
119
|
-
"error": f"HTTP {response.status_code}: {response.text[:100]}"
|
|
119
|
+
"success": False,
|
|
120
|
+
"error": f"HTTP {response.status_code}: {response.text[:100]}",
|
|
120
121
|
}
|
|
121
122
|
except Exception as e:
|
|
122
123
|
return {"success": False, "error": str(e)}
|
|
@@ -127,7 +128,7 @@ class SupabaseConnectivityValidator:
|
|
|
127
128
|
# Try reading from multiple tables
|
|
128
129
|
tables_to_test = [
|
|
129
130
|
"politicians",
|
|
130
|
-
"trading_disclosures",
|
|
131
|
+
"trading_disclosures",
|
|
131
132
|
"data_pull_jobs",
|
|
132
133
|
"data_sources",
|
|
133
134
|
]
|
|
@@ -148,14 +149,14 @@ class SupabaseConnectivityValidator:
|
|
|
148
149
|
|
|
149
150
|
accessible_count = sum(1 for status in read_results.values() if status == "accessible")
|
|
150
151
|
missing_count = sum(1 for status in read_results.values() if status == "table_missing")
|
|
151
|
-
|
|
152
|
+
|
|
152
153
|
if schema_missing and accessible_count == 0:
|
|
153
154
|
return {
|
|
154
155
|
"success": False,
|
|
155
156
|
"tables_tested": read_results,
|
|
156
157
|
"accessible_tables": accessible_count,
|
|
157
158
|
"missing_tables": missing_count,
|
|
158
|
-
"message": "Database schema not set up. Run 'mcli workflow politician-trading setup --generate-schema' to get setup instructions."
|
|
159
|
+
"message": "Database schema not set up. Run 'mcli workflow politician-trading setup --generate-schema' to get setup instructions.",
|
|
159
160
|
}
|
|
160
161
|
else:
|
|
161
162
|
success = accessible_count > 0
|
|
@@ -195,15 +196,15 @@ class SupabaseConnectivityValidator:
|
|
|
195
196
|
return {
|
|
196
197
|
"success": False,
|
|
197
198
|
"error": "Table 'data_pull_jobs' not found",
|
|
198
|
-
"message": "Database schema not set up. Run schema setup first."
|
|
199
|
+
"message": "Database schema not set up. Run schema setup first.",
|
|
199
200
|
}
|
|
200
201
|
else:
|
|
201
202
|
raise e
|
|
202
203
|
|
|
203
204
|
# Get the inserted record ID
|
|
204
205
|
if insert_result.data and len(insert_result.data) > 0:
|
|
205
|
-
inserted_id = insert_result.data[0][
|
|
206
|
-
|
|
206
|
+
inserted_id = insert_result.data[0]["id"]
|
|
207
|
+
|
|
207
208
|
# Update the record
|
|
208
209
|
update_result = (
|
|
209
210
|
self.db.client.table("data_pull_jobs")
|
|
@@ -322,26 +323,27 @@ class SupabaseConnectivityValidator:
|
|
|
322
323
|
test_source_id = f"rt_test_{int(time.time())}"
|
|
323
324
|
|
|
324
325
|
# Insert
|
|
325
|
-
insert_result =
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
326
|
+
insert_result = (
|
|
327
|
+
self.db.client.table("data_sources")
|
|
328
|
+
.insert(
|
|
329
|
+
{
|
|
330
|
+
"name": "Real-time Test Source",
|
|
331
|
+
"url": "https://test.example.com",
|
|
332
|
+
"source_type": "test",
|
|
333
|
+
"region": "test",
|
|
334
|
+
"is_active": True,
|
|
335
|
+
"created_at": timestamp,
|
|
336
|
+
}
|
|
337
|
+
)
|
|
338
|
+
.execute()
|
|
339
|
+
)
|
|
335
340
|
|
|
336
341
|
if insert_result.data and len(insert_result.data) > 0:
|
|
337
|
-
inserted_id = insert_result.data[0][
|
|
338
|
-
|
|
342
|
+
inserted_id = insert_result.data[0]["id"]
|
|
343
|
+
|
|
339
344
|
# Immediate read-back
|
|
340
345
|
result = (
|
|
341
|
-
self.db.client.table("data_sources")
|
|
342
|
-
.select("*")
|
|
343
|
-
.eq("id", inserted_id)
|
|
344
|
-
.execute()
|
|
346
|
+
self.db.client.table("data_sources").select("*").eq("id", inserted_id).execute()
|
|
345
347
|
)
|
|
346
348
|
|
|
347
349
|
# Clean up
|
|
@@ -8,43 +8,50 @@ Based on 2025 research of available public databases and APIs.
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
from dataclasses import dataclass, field
|
|
11
|
-
from typing import Dict, List, Optional, Literal
|
|
12
11
|
from enum import Enum
|
|
12
|
+
from typing import Dict, List, Literal, Optional
|
|
13
|
+
|
|
13
14
|
|
|
14
15
|
class DisclosureType(Enum):
|
|
15
16
|
"""Types of financial disclosures available"""
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
|
|
18
|
+
STOCK_TRANSACTIONS = "stock_transactions" # Individual buy/sell transactions
|
|
19
|
+
FINANCIAL_INTERESTS = "financial_interests" # General financial interests/assets
|
|
20
|
+
ASSET_DECLARATIONS = "asset_declarations" # Property, investments, etc.
|
|
21
|
+
INCOME_SOURCES = "income_sources" # Outside income sources
|
|
22
|
+
CONFLICT_INTERESTS = "conflict_interests" # Potential conflicts of interest
|
|
23
|
+
|
|
21
24
|
|
|
22
25
|
class AccessMethod(Enum):
|
|
23
26
|
"""How data can be accessed"""
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
|
|
28
|
+
WEB_SCRAPING = "web_scraping" # HTML scraping required
|
|
29
|
+
API = "api" # JSON/XML API available
|
|
30
|
+
PDF_PARSING = "pdf_parsing" # PDF documents to parse
|
|
27
31
|
MANUAL_DOWNLOAD = "manual_download" # Manual download required
|
|
28
|
-
DATABASE_QUERY = "database_query"
|
|
32
|
+
DATABASE_QUERY = "database_query" # Direct database access
|
|
33
|
+
|
|
29
34
|
|
|
30
35
|
@dataclass
|
|
31
36
|
class DataSource:
|
|
32
37
|
"""Configuration for a single data source"""
|
|
38
|
+
|
|
33
39
|
name: str
|
|
34
|
-
jurisdiction: str
|
|
35
|
-
institution: str
|
|
40
|
+
jurisdiction: str # e.g., "US-Federal", "US-CA", "EU", "DE"
|
|
41
|
+
institution: str # e.g., "House", "Senate", "Bundestag"
|
|
36
42
|
url: str
|
|
37
43
|
disclosure_types: List[DisclosureType]
|
|
38
44
|
access_method: AccessMethod
|
|
39
|
-
update_frequency: str
|
|
40
|
-
threshold_amount: Optional[int] = None
|
|
41
|
-
data_format: str = "html"
|
|
45
|
+
update_frequency: str # e.g., "daily", "weekly", "monthly"
|
|
46
|
+
threshold_amount: Optional[int] = None # Minimum disclosure amount in USD
|
|
47
|
+
data_format: str = "html" # html, json, xml, pdf
|
|
42
48
|
api_key_required: bool = False
|
|
43
49
|
rate_limits: Optional[str] = None
|
|
44
50
|
historical_data_available: bool = True
|
|
45
51
|
notes: Optional[str] = None
|
|
46
52
|
status: Literal["active", "inactive", "testing", "planned"] = "active"
|
|
47
53
|
|
|
54
|
+
|
|
48
55
|
# =============================================================================
|
|
49
56
|
# US FEDERAL SOURCES
|
|
50
57
|
# =============================================================================
|
|
@@ -62,12 +69,11 @@ US_FEDERAL_SOURCES = [
|
|
|
62
69
|
data_format="html",
|
|
63
70
|
historical_data_available=True,
|
|
64
71
|
notes="STOCK Act requires prompt disclosure of transactions >$1,000. 8-year archive available.",
|
|
65
|
-
status="active"
|
|
72
|
+
status="active",
|
|
66
73
|
),
|
|
67
|
-
|
|
68
74
|
DataSource(
|
|
69
75
|
name="US Senate Financial Disclosures",
|
|
70
|
-
jurisdiction="US-Federal",
|
|
76
|
+
jurisdiction="US-Federal",
|
|
71
77
|
institution="Senate",
|
|
72
78
|
url="https://efd.senate.gov",
|
|
73
79
|
disclosure_types=[DisclosureType.STOCK_TRANSACTIONS, DisclosureType.ASSET_DECLARATIONS],
|
|
@@ -77,9 +83,8 @@ US_FEDERAL_SOURCES = [
|
|
|
77
83
|
data_format="html",
|
|
78
84
|
historical_data_available=True,
|
|
79
85
|
notes="Filing threshold $150,160 for 2025. 6-year retention after leaving office.",
|
|
80
|
-
status="active"
|
|
86
|
+
status="active",
|
|
81
87
|
),
|
|
82
|
-
|
|
83
88
|
DataSource(
|
|
84
89
|
name="Office of Government Ethics",
|
|
85
90
|
jurisdiction="US-Federal",
|
|
@@ -91,8 +96,8 @@ US_FEDERAL_SOURCES = [
|
|
|
91
96
|
data_format="pdf",
|
|
92
97
|
historical_data_available=True,
|
|
93
98
|
notes="Executive branch officials, judges, and senior staff disclosures",
|
|
94
|
-
status="active"
|
|
95
|
-
)
|
|
99
|
+
status="active",
|
|
100
|
+
),
|
|
96
101
|
]
|
|
97
102
|
|
|
98
103
|
# =============================================================================
|
|
@@ -113,23 +118,21 @@ US_STATE_SOURCES = [
|
|
|
113
118
|
data_format="json",
|
|
114
119
|
api_key_required=False,
|
|
115
120
|
notes="Fair Political Practices Commission Form 700. NetFile API available.",
|
|
116
|
-
status="active"
|
|
121
|
+
status="active",
|
|
117
122
|
),
|
|
118
|
-
|
|
119
123
|
# New York
|
|
120
124
|
DataSource(
|
|
121
125
|
name="New York State Financial Disclosure",
|
|
122
126
|
jurisdiction="US-NY",
|
|
123
|
-
institution="State Legislature",
|
|
127
|
+
institution="State Legislature",
|
|
124
128
|
url="https://ethics.ny.gov/financial-disclosure-statements-elected-officials",
|
|
125
129
|
disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.INCOME_SOURCES],
|
|
126
130
|
access_method=AccessMethod.PDF_PARSING,
|
|
127
131
|
update_frequency="Annually (May 15 deadline)",
|
|
128
132
|
data_format="pdf",
|
|
129
133
|
notes="Commission on Ethics and Lobbying in Government",
|
|
130
|
-
status="active"
|
|
134
|
+
status="active",
|
|
131
135
|
),
|
|
132
|
-
|
|
133
136
|
# Florida
|
|
134
137
|
DataSource(
|
|
135
138
|
name="Florida Financial Disclosure",
|
|
@@ -141,22 +144,20 @@ US_STATE_SOURCES = [
|
|
|
141
144
|
update_frequency="Annually (July 1 deadline, grace period until Sept 1)",
|
|
142
145
|
data_format="html",
|
|
143
146
|
notes="All elected state and local public officers required to file",
|
|
144
|
-
status="active"
|
|
147
|
+
status="active",
|
|
145
148
|
),
|
|
146
|
-
|
|
147
149
|
# Texas
|
|
148
150
|
DataSource(
|
|
149
151
|
name="Texas Ethics Commission",
|
|
150
|
-
jurisdiction="US-TX",
|
|
152
|
+
jurisdiction="US-TX",
|
|
151
153
|
institution="State Legislature",
|
|
152
154
|
url="https://www.ethics.state.tx.us/search/cf/",
|
|
153
155
|
disclosure_types=[DisclosureType.FINANCIAL_INTERESTS],
|
|
154
156
|
access_method=AccessMethod.WEB_SCRAPING,
|
|
155
157
|
update_frequency="Annually",
|
|
156
158
|
data_format="html",
|
|
157
|
-
status="active"
|
|
159
|
+
status="active",
|
|
158
160
|
),
|
|
159
|
-
|
|
160
161
|
# Michigan
|
|
161
162
|
DataSource(
|
|
162
163
|
name="Michigan Personal Financial Disclosure",
|
|
@@ -166,10 +167,10 @@ US_STATE_SOURCES = [
|
|
|
166
167
|
disclosure_types=[DisclosureType.FINANCIAL_INTERESTS],
|
|
167
168
|
access_method=AccessMethod.WEB_SCRAPING,
|
|
168
169
|
update_frequency="Annually",
|
|
169
|
-
data_format="html",
|
|
170
|
+
data_format="html",
|
|
170
171
|
notes="Candidates for Governor, Lt. Gov, SoS, AG, and Legislature required",
|
|
171
|
-
status="active"
|
|
172
|
-
)
|
|
172
|
+
status="active",
|
|
173
|
+
),
|
|
173
174
|
]
|
|
174
175
|
|
|
175
176
|
# =============================================================================
|
|
@@ -188,9 +189,8 @@ EU_PARLIAMENT_SOURCES = [
|
|
|
188
189
|
threshold_amount=5000, # €5,000+ outside income must be declared
|
|
189
190
|
data_format="pdf",
|
|
190
191
|
notes="Individual MEP pages have declarations. Third-party aggregation by EU Integrity Watch.",
|
|
191
|
-
status="active"
|
|
192
|
+
status="active",
|
|
192
193
|
),
|
|
193
|
-
|
|
194
194
|
DataSource(
|
|
195
195
|
name="EU Integrity Watch",
|
|
196
196
|
jurisdiction="EU",
|
|
@@ -201,8 +201,8 @@ EU_PARLIAMENT_SOURCES = [
|
|
|
201
201
|
update_frequency="Updated after MEP declarations",
|
|
202
202
|
data_format="html",
|
|
203
203
|
notes="Automated extraction from Parliament PDFs. Interactive database available.",
|
|
204
|
-
status="active"
|
|
205
|
-
)
|
|
204
|
+
status="active",
|
|
205
|
+
),
|
|
206
206
|
]
|
|
207
207
|
|
|
208
208
|
# =============================================================================
|
|
@@ -222,13 +222,12 @@ EU_NATIONAL_SOURCES = [
|
|
|
222
222
|
threshold_amount=None, # 5% company ownership threshold (down from 25% in 2021)
|
|
223
223
|
data_format="html",
|
|
224
224
|
notes="Transparency Act 2021. Company ownership >5%, tougher bribery laws (1-10 years prison).",
|
|
225
|
-
status="active"
|
|
225
|
+
status="active",
|
|
226
226
|
),
|
|
227
|
-
|
|
228
227
|
# France
|
|
229
228
|
DataSource(
|
|
230
229
|
name="French Parliament Financial Declarations",
|
|
231
|
-
jurisdiction="FR",
|
|
230
|
+
jurisdiction="FR",
|
|
232
231
|
institution="National Assembly & Senate",
|
|
233
232
|
url="https://www.hatvp.fr/", # High Authority for Transparency in Public Life
|
|
234
233
|
disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
|
|
@@ -236,12 +235,11 @@ EU_NATIONAL_SOURCES = [
|
|
|
236
235
|
update_frequency="Annually",
|
|
237
236
|
data_format="html",
|
|
238
237
|
notes="HATVP publishes declarations. Asset declarations for MEPs since 2019. Penalties: 3 years prison + €45,000 fine.",
|
|
239
|
-
status="active"
|
|
238
|
+
status="active",
|
|
240
239
|
),
|
|
241
|
-
|
|
242
240
|
# United Kingdom
|
|
243
241
|
DataSource(
|
|
244
|
-
name="UK Parliament Register of Members' Financial Interests",
|
|
242
|
+
name="UK Parliament Register of Members' Financial Interests",
|
|
245
243
|
jurisdiction="UK",
|
|
246
244
|
institution="House of Commons",
|
|
247
245
|
url="https://www.parliament.uk/mps-lords-and-offices/standards-and-financial-interests/parliamentary-commissioner-for-standards/registers-of-interests/register-of-members-financial-interests/",
|
|
@@ -252,22 +250,20 @@ EU_NATIONAL_SOURCES = [
|
|
|
252
250
|
data_format="json",
|
|
253
251
|
api_key_required=False,
|
|
254
252
|
notes="Open Parliament Licence API available. Register updated bi-weekly.",
|
|
255
|
-
status="active"
|
|
253
|
+
status="active",
|
|
256
254
|
),
|
|
257
|
-
|
|
258
255
|
DataSource(
|
|
259
256
|
name="UK House of Lords Register of Interests",
|
|
260
257
|
jurisdiction="UK",
|
|
261
|
-
institution="House of Lords",
|
|
258
|
+
institution="House of Lords",
|
|
262
259
|
url="https://members.parliament.uk/members/lords/interests/register-of-lords-interests",
|
|
263
260
|
disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.INCOME_SOURCES],
|
|
264
261
|
access_method=AccessMethod.WEB_SCRAPING,
|
|
265
262
|
update_frequency="Updated regularly",
|
|
266
263
|
data_format="html",
|
|
267
264
|
notes="More detailed shareholding disclosure than Commons. Searchable database.",
|
|
268
|
-
status="active"
|
|
265
|
+
status="active",
|
|
269
266
|
),
|
|
270
|
-
|
|
271
267
|
# Spain
|
|
272
268
|
DataSource(
|
|
273
269
|
name="Spanish Parliament Transparency Portal",
|
|
@@ -279,9 +275,8 @@ EU_NATIONAL_SOURCES = [
|
|
|
279
275
|
update_frequency="Updated as required",
|
|
280
276
|
data_format="html",
|
|
281
277
|
notes="Deputies and senators publish institutional agendas with interest representatives. No lobbyist register.",
|
|
282
|
-
status="active"
|
|
278
|
+
status="active",
|
|
283
279
|
),
|
|
284
|
-
|
|
285
280
|
# Italy
|
|
286
281
|
DataSource(
|
|
287
282
|
name="Italian Parliament Financial Declarations",
|
|
@@ -293,8 +288,8 @@ EU_NATIONAL_SOURCES = [
|
|
|
293
288
|
update_frequency="Per legislative term",
|
|
294
289
|
data_format="html",
|
|
295
290
|
notes="Individual member pages contain declarations. Limited standardization.",
|
|
296
|
-
status="testing"
|
|
297
|
-
)
|
|
291
|
+
status="testing",
|
|
292
|
+
),
|
|
298
293
|
]
|
|
299
294
|
|
|
300
295
|
# =============================================================================
|
|
@@ -314,12 +309,11 @@ THIRD_PARTY_SOURCES = [
|
|
|
314
309
|
api_key_required=True,
|
|
315
310
|
rate_limits="1000 requests/day",
|
|
316
311
|
notes="Center for Responsive Politics aggregation of federal disclosures.",
|
|
317
|
-
status="active"
|
|
312
|
+
status="active",
|
|
318
313
|
),
|
|
319
|
-
|
|
320
314
|
DataSource(
|
|
321
315
|
name="LegiStorm Financial Disclosures",
|
|
322
|
-
jurisdiction="US-Federal",
|
|
316
|
+
jurisdiction="US-Federal",
|
|
323
317
|
institution="Third-party aggregator",
|
|
324
318
|
url="https://www.legistorm.com/financial_disclosure.html",
|
|
325
319
|
disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.STOCK_TRANSACTIONS],
|
|
@@ -327,13 +321,12 @@ THIRD_PARTY_SOURCES = [
|
|
|
327
321
|
update_frequency="Real-time from government sources",
|
|
328
322
|
data_format="html",
|
|
329
323
|
notes="Subscription service with enhanced search and analysis tools.",
|
|
330
|
-
status="active"
|
|
324
|
+
status="active",
|
|
331
325
|
),
|
|
332
|
-
|
|
333
326
|
DataSource(
|
|
334
327
|
name="QuiverQuant Congressional Trading",
|
|
335
328
|
jurisdiction="US-Federal",
|
|
336
|
-
institution="Third-party aggregator",
|
|
329
|
+
institution="Third-party aggregator",
|
|
337
330
|
url="https://api.quiverquant.com/beta/live/congresstrading",
|
|
338
331
|
disclosure_types=[DisclosureType.STOCK_TRANSACTIONS],
|
|
339
332
|
access_method=AccessMethod.API,
|
|
@@ -342,8 +335,8 @@ THIRD_PARTY_SOURCES = [
|
|
|
342
335
|
api_key_required=True,
|
|
343
336
|
rate_limits="Varies by subscription",
|
|
344
337
|
notes="Financial data company focusing on congressional stock trades.",
|
|
345
|
-
status="active"
|
|
346
|
-
)
|
|
338
|
+
status="active",
|
|
339
|
+
),
|
|
347
340
|
]
|
|
348
341
|
|
|
349
342
|
# =============================================================================
|
|
@@ -352,19 +345,19 @@ THIRD_PARTY_SOURCES = [
|
|
|
352
345
|
|
|
353
346
|
ALL_DATA_SOURCES = {
|
|
354
347
|
"us_federal": US_FEDERAL_SOURCES,
|
|
355
|
-
"us_states": US_STATE_SOURCES,
|
|
348
|
+
"us_states": US_STATE_SOURCES,
|
|
356
349
|
"eu_parliament": EU_PARLIAMENT_SOURCES,
|
|
357
350
|
"eu_national": EU_NATIONAL_SOURCES,
|
|
358
|
-
"third_party": THIRD_PARTY_SOURCES
|
|
351
|
+
"third_party": THIRD_PARTY_SOURCES,
|
|
359
352
|
}
|
|
360
353
|
|
|
361
354
|
# Summary statistics
|
|
362
355
|
TOTAL_SOURCES = sum(len(sources) for sources in ALL_DATA_SOURCES.values())
|
|
363
356
|
ACTIVE_SOURCES = sum(
|
|
364
|
-
len([s for s in sources if s.status == "active"])
|
|
365
|
-
for sources in ALL_DATA_SOURCES.values()
|
|
357
|
+
len([s for s in sources if s.status == "active"]) for sources in ALL_DATA_SOURCES.values()
|
|
366
358
|
)
|
|
367
359
|
|
|
360
|
+
|
|
368
361
|
def get_sources_by_jurisdiction(jurisdiction: str) -> List[DataSource]:
|
|
369
362
|
"""Get all sources for a specific jurisdiction (e.g., 'US-CA', 'DE', 'EU')"""
|
|
370
363
|
all_sources = []
|
|
@@ -372,6 +365,7 @@ def get_sources_by_jurisdiction(jurisdiction: str) -> List[DataSource]:
|
|
|
372
365
|
all_sources.extend([s for s in source_group if s.jurisdiction == jurisdiction])
|
|
373
366
|
return all_sources
|
|
374
367
|
|
|
368
|
+
|
|
375
369
|
def get_sources_by_type(disclosure_type: DisclosureType) -> List[DataSource]:
|
|
376
370
|
"""Get all sources that provide a specific type of disclosure"""
|
|
377
371
|
all_sources = []
|
|
@@ -379,6 +373,7 @@ def get_sources_by_type(disclosure_type: DisclosureType) -> List[DataSource]:
|
|
|
379
373
|
all_sources.extend([s for s in source_group if disclosure_type in s.disclosure_types])
|
|
380
374
|
return all_sources
|
|
381
375
|
|
|
376
|
+
|
|
382
377
|
def get_api_sources() -> List[DataSource]:
|
|
383
378
|
"""Get all sources that provide API access"""
|
|
384
379
|
all_sources = []
|
|
@@ -386,10 +381,16 @@ def get_api_sources() -> List[DataSource]:
|
|
|
386
381
|
all_sources.extend([s for s in source_group if s.access_method == AccessMethod.API])
|
|
387
382
|
return all_sources
|
|
388
383
|
|
|
384
|
+
|
|
389
385
|
# Export for use in workflow configuration
|
|
390
386
|
__all__ = [
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
387
|
+
"DataSource",
|
|
388
|
+
"DisclosureType",
|
|
389
|
+
"AccessMethod",
|
|
390
|
+
"ALL_DATA_SOURCES",
|
|
391
|
+
"get_sources_by_jurisdiction",
|
|
392
|
+
"get_sources_by_type",
|
|
393
|
+
"get_api_sources",
|
|
394
|
+
"TOTAL_SOURCES",
|
|
395
|
+
"ACTIVE_SOURCES",
|
|
396
|
+
]
|
|
@@ -5,14 +5,14 @@ Database client and schema management for politician trading data
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import logging
|
|
7
7
|
from datetime import datetime
|
|
8
|
-
from typing import
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
9
|
from uuid import uuid4
|
|
10
10
|
|
|
11
|
-
from supabase import create_client, Client
|
|
12
11
|
from postgrest.exceptions import APIError
|
|
12
|
+
from supabase import Client, create_client
|
|
13
13
|
|
|
14
14
|
from .config import WorkflowConfig
|
|
15
|
-
from .models import
|
|
15
|
+
from .models import DataPullJob, DataSource, Politician, TradingDisclosure
|
|
16
16
|
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
@@ -96,17 +96,20 @@ class PoliticianTradingDB:
|
|
|
96
96
|
existing = await self.find_politician_by_name(
|
|
97
97
|
politician.first_name, politician.last_name
|
|
98
98
|
)
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
if existing:
|
|
101
101
|
# Update existing politician (but don't change ID)
|
|
102
102
|
politician_dict = self._politician_to_dict(politician)
|
|
103
103
|
politician_dict["id"] = existing.id # Keep existing ID
|
|
104
104
|
politician_dict["updated_at"] = datetime.utcnow().isoformat()
|
|
105
|
-
|
|
106
|
-
result =
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
105
|
+
|
|
106
|
+
result = (
|
|
107
|
+
self.client.table("politicians")
|
|
108
|
+
.update(politician_dict)
|
|
109
|
+
.eq("id", existing.id)
|
|
110
|
+
.execute()
|
|
111
|
+
)
|
|
112
|
+
|
|
110
113
|
if result.data:
|
|
111
114
|
return result.data[0]["id"]
|
|
112
115
|
return existing.id
|
|
@@ -115,18 +118,16 @@ class PoliticianTradingDB:
|
|
|
115
118
|
politician_dict = self._politician_to_dict(politician)
|
|
116
119
|
if not politician_dict.get("id"):
|
|
117
120
|
politician_dict["id"] = str(uuid4())
|
|
118
|
-
|
|
121
|
+
|
|
119
122
|
politician_dict["created_at"] = datetime.utcnow().isoformat()
|
|
120
123
|
politician_dict["updated_at"] = datetime.utcnow().isoformat()
|
|
121
124
|
|
|
122
|
-
result = self.client.table("politicians").insert(
|
|
123
|
-
|
|
124
|
-
).execute()
|
|
125
|
-
|
|
125
|
+
result = self.client.table("politicians").insert(politician_dict).execute()
|
|
126
|
+
|
|
126
127
|
if result.data:
|
|
127
128
|
return result.data[0]["id"]
|
|
128
129
|
return politician_dict["id"]
|
|
129
|
-
|
|
130
|
+
|
|
130
131
|
except Exception as e:
|
|
131
132
|
logger.error(f"Failed to upsert politician: {e}")
|
|
132
133
|
# For debugging: log the politician data that caused the error
|
|
@@ -362,9 +363,10 @@ class PoliticianTradingDB:
|
|
|
362
363
|
|
|
363
364
|
def _dict_to_disclosure(self, data: Dict[str, Any]) -> TradingDisclosure:
|
|
364
365
|
"""Convert dictionary to TradingDisclosure"""
|
|
365
|
-
from .models import TransactionType, DisclosureStatus
|
|
366
366
|
from decimal import Decimal
|
|
367
367
|
|
|
368
|
+
from .models import DisclosureStatus, TransactionType
|
|
369
|
+
|
|
368
370
|
return TradingDisclosure(
|
|
369
371
|
id=data.get("id"),
|
|
370
372
|
politician_id=data.get("politician_id", ""),
|
|
@@ -6,14 +6,15 @@ import asyncio
|
|
|
6
6
|
import json
|
|
7
7
|
import uuid
|
|
8
8
|
from datetime import datetime
|
|
9
|
+
|
|
9
10
|
from rich.console import Console
|
|
10
|
-
from rich.table import Table
|
|
11
|
-
from rich.panel import Panel
|
|
12
11
|
from rich.json import JSON
|
|
12
|
+
from rich.panel import Panel
|
|
13
|
+
from rich.table import Table
|
|
13
14
|
|
|
14
|
-
from .workflow import run_politician_trading_collection
|
|
15
15
|
from .connectivity import SupabaseConnectivityValidator
|
|
16
16
|
from .monitoring import run_health_check, run_stats_report
|
|
17
|
+
from .workflow import run_politician_trading_collection
|
|
17
18
|
|
|
18
19
|
console = Console()
|
|
19
20
|
|