mcli-framework 7.5.1__py3-none-any.whl → 7.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/commands_cmd.py +51 -39
- mcli/app/completion_helpers.py +4 -13
- mcli/app/main.py +21 -25
- mcli/app/model_cmd.py +119 -9
- mcli/lib/custom_commands.py +16 -11
- mcli/ml/api/app.py +1 -5
- mcli/ml/dashboard/app.py +2 -2
- mcli/ml/dashboard/app_integrated.py +168 -116
- mcli/ml/dashboard/app_supabase.py +7 -3
- mcli/ml/dashboard/app_training.py +3 -6
- mcli/ml/dashboard/components/charts.py +74 -115
- mcli/ml/dashboard/components/metrics.py +24 -44
- mcli/ml/dashboard/components/tables.py +32 -40
- mcli/ml/dashboard/overview.py +102 -78
- mcli/ml/dashboard/pages/cicd.py +103 -56
- mcli/ml/dashboard/pages/debug_dependencies.py +35 -28
- mcli/ml/dashboard/pages/gravity_viz.py +374 -313
- mcli/ml/dashboard/pages/monte_carlo_predictions.py +50 -48
- mcli/ml/dashboard/pages/predictions_enhanced.py +396 -248
- mcli/ml/dashboard/pages/scrapers_and_logs.py +299 -273
- mcli/ml/dashboard/pages/test_portfolio.py +153 -121
- mcli/ml/dashboard/pages/trading.py +238 -169
- mcli/ml/dashboard/pages/workflows.py +129 -84
- mcli/ml/dashboard/streamlit_extras_utils.py +70 -79
- mcli/ml/dashboard/utils.py +24 -21
- mcli/ml/dashboard/warning_suppression.py +6 -4
- mcli/ml/database/session.py +16 -5
- mcli/ml/mlops/pipeline_orchestrator.py +1 -3
- mcli/ml/predictions/monte_carlo.py +6 -18
- mcli/ml/trading/alpaca_client.py +95 -96
- mcli/ml/trading/migrations.py +76 -40
- mcli/ml/trading/models.py +78 -60
- mcli/ml/trading/paper_trading.py +92 -74
- mcli/ml/trading/risk_management.py +106 -85
- mcli/ml/trading/trading_service.py +155 -110
- mcli/ml/training/train_model.py +1 -3
- mcli/{app → self}/completion_cmd.py +6 -6
- mcli/self/self_cmd.py +100 -57
- mcli/test/test_cmd.py +30 -0
- mcli/workflow/daemon/daemon.py +2 -0
- mcli/workflow/model_service/openai_adapter.py +347 -0
- mcli/workflow/politician_trading/models.py +6 -2
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +39 -88
- mcli/workflow/politician_trading/scrapers_free_sources.py +32 -39
- mcli/workflow/politician_trading/scrapers_third_party.py +21 -39
- mcli/workflow/politician_trading/seed_database.py +70 -89
- {mcli_framework-7.5.1.dist-info → mcli_framework-7.6.1.dist-info}/METADATA +1 -1
- {mcli_framework-7.5.1.dist-info → mcli_framework-7.6.1.dist-info}/RECORD +56 -54
- /mcli/{app → self}/logs_cmd.py +0 -0
- /mcli/{app → self}/redis_cmd.py +0 -0
- /mcli/{app → self}/visual_cmd.py +0 -0
- /mcli/{app → test}/cron_test_cmd.py +0 -0
- {mcli_framework-7.5.1.dist-info → mcli_framework-7.6.1.dist-info}/WHEEL +0 -0
- {mcli_framework-7.5.1.dist-info → mcli_framework-7.6.1.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.5.1.dist-info → mcli_framework-7.6.1.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.5.1.dist-info → mcli_framework-7.6.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAI API Adapter for MCLI Model Service
|
|
3
|
+
|
|
4
|
+
Provides OpenAI-compatible endpoints for tools like aider.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import time
|
|
9
|
+
import uuid
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
|
|
14
|
+
from fastapi.responses import StreamingResponse
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
16
|
+
|
|
17
|
+
from mcli.lib.logger.logger import get_logger
|
|
18
|
+
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Message(BaseModel):
|
|
23
|
+
"""OpenAI message format"""
|
|
24
|
+
|
|
25
|
+
role: str
|
|
26
|
+
content: str
|
|
27
|
+
name: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ChatCompletionRequest(BaseModel):
|
|
31
|
+
"""OpenAI chat completion request"""
|
|
32
|
+
|
|
33
|
+
model: str
|
|
34
|
+
messages: List[Message]
|
|
35
|
+
temperature: Optional[float] = 0.7
|
|
36
|
+
top_p: Optional[float] = 0.9
|
|
37
|
+
n: Optional[int] = 1
|
|
38
|
+
stream: Optional[bool] = False
|
|
39
|
+
stop: Optional[List[str]] = None
|
|
40
|
+
max_tokens: Optional[int] = 2048
|
|
41
|
+
presence_penalty: Optional[float] = 0.0
|
|
42
|
+
frequency_penalty: Optional[float] = 0.0
|
|
43
|
+
user: Optional[str] = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ChatCompletionChoice(BaseModel):
|
|
47
|
+
"""Chat completion choice"""
|
|
48
|
+
|
|
49
|
+
index: int
|
|
50
|
+
message: Message
|
|
51
|
+
finish_reason: str
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Usage(BaseModel):
|
|
55
|
+
"""Token usage information"""
|
|
56
|
+
|
|
57
|
+
prompt_tokens: int
|
|
58
|
+
completion_tokens: int
|
|
59
|
+
total_tokens: int
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ChatCompletionResponse(BaseModel):
|
|
63
|
+
"""OpenAI chat completion response"""
|
|
64
|
+
|
|
65
|
+
id: str
|
|
66
|
+
object: str = "chat.completion"
|
|
67
|
+
created: int
|
|
68
|
+
model: str
|
|
69
|
+
choices: List[ChatCompletionChoice]
|
|
70
|
+
usage: Usage
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class ModelInfo(BaseModel):
|
|
74
|
+
"""Model information"""
|
|
75
|
+
|
|
76
|
+
id: str
|
|
77
|
+
object: str = "model"
|
|
78
|
+
created: int
|
|
79
|
+
owned_by: str = "mcli"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ModelListResponse(BaseModel):
|
|
83
|
+
"""Model list response"""
|
|
84
|
+
|
|
85
|
+
object: str = "list"
|
|
86
|
+
data: List[ModelInfo]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class APIKeyManager:
|
|
90
|
+
"""Manages API key authentication"""
|
|
91
|
+
|
|
92
|
+
def __init__(self):
|
|
93
|
+
self.valid_keys: Dict[str, Dict[str, Any]] = {}
|
|
94
|
+
|
|
95
|
+
def add_key(self, key: str, name: str = "default", metadata: Optional[Dict] = None):
|
|
96
|
+
"""Add a valid API key"""
|
|
97
|
+
self.valid_keys[key] = {
|
|
98
|
+
"name": name,
|
|
99
|
+
"created_at": datetime.now().isoformat(),
|
|
100
|
+
"metadata": metadata or {},
|
|
101
|
+
"usage_count": 0,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
def validate_key(self, key: str) -> bool:
|
|
105
|
+
"""Validate an API key"""
|
|
106
|
+
if key in self.valid_keys:
|
|
107
|
+
self.valid_keys[key]["usage_count"] += 1
|
|
108
|
+
return True
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
def remove_key(self, key: str):
|
|
112
|
+
"""Remove an API key"""
|
|
113
|
+
if key in self.valid_keys:
|
|
114
|
+
del self.valid_keys[key]
|
|
115
|
+
|
|
116
|
+
def list_keys(self) -> List[Dict[str, Any]]:
|
|
117
|
+
"""List all API keys (without showing the actual key)"""
|
|
118
|
+
return [
|
|
119
|
+
{
|
|
120
|
+
"name": info["name"],
|
|
121
|
+
"created_at": info["created_at"],
|
|
122
|
+
"usage_count": info["usage_count"],
|
|
123
|
+
}
|
|
124
|
+
for info in self.valid_keys.values()
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class OpenAIAdapter:
|
|
129
|
+
"""Adapter to make MCLI model service OpenAI-compatible"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, model_manager, require_auth: bool = True):
|
|
132
|
+
self.model_manager = model_manager
|
|
133
|
+
self.require_auth = require_auth
|
|
134
|
+
self.api_key_manager = APIKeyManager()
|
|
135
|
+
self.router = APIRouter(prefix="/v1")
|
|
136
|
+
|
|
137
|
+
# Setup routes
|
|
138
|
+
self._setup_routes()
|
|
139
|
+
|
|
140
|
+
def _setup_routes(self):
|
|
141
|
+
"""Setup OpenAI-compatible routes"""
|
|
142
|
+
|
|
143
|
+
@self.router.get("/models", response_model=ModelListResponse)
|
|
144
|
+
async def list_models(api_key: str = Depends(self.verify_api_key)):
|
|
145
|
+
"""List available models (OpenAI compatible)"""
|
|
146
|
+
models = []
|
|
147
|
+
|
|
148
|
+
# Get loaded models from model manager
|
|
149
|
+
if hasattr(self.model_manager, "loaded_models"):
|
|
150
|
+
for model_name in self.model_manager.loaded_models.keys():
|
|
151
|
+
models.append(
|
|
152
|
+
ModelInfo(
|
|
153
|
+
id=model_name,
|
|
154
|
+
object="model",
|
|
155
|
+
created=int(time.time()),
|
|
156
|
+
owned_by="mcli",
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# If no models loaded, return available lightweight models
|
|
161
|
+
if not models:
|
|
162
|
+
from .lightweight_model_server import LIGHTWEIGHT_MODELS
|
|
163
|
+
|
|
164
|
+
for model_key in LIGHTWEIGHT_MODELS.keys():
|
|
165
|
+
models.append(
|
|
166
|
+
ModelInfo(
|
|
167
|
+
id=model_key,
|
|
168
|
+
object="model",
|
|
169
|
+
created=int(time.time()),
|
|
170
|
+
owned_by="mcli",
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
return ModelListResponse(object="list", data=models)
|
|
175
|
+
|
|
176
|
+
@self.router.post("/chat/completions")
|
|
177
|
+
async def create_chat_completion(
|
|
178
|
+
request: ChatCompletionRequest, api_key: str = Depends(self.verify_api_key)
|
|
179
|
+
):
|
|
180
|
+
"""Create a chat completion (OpenAI compatible)"""
|
|
181
|
+
try:
|
|
182
|
+
# Extract the conversation history
|
|
183
|
+
messages = request.messages
|
|
184
|
+
prompt = self._messages_to_prompt(messages)
|
|
185
|
+
|
|
186
|
+
# Generate response using the model
|
|
187
|
+
if request.stream:
|
|
188
|
+
return StreamingResponse(
|
|
189
|
+
self._generate_stream(request, prompt), media_type="text/event-stream"
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
response_text = await self._generate_response(request, prompt)
|
|
193
|
+
|
|
194
|
+
# Create OpenAI-compatible response
|
|
195
|
+
completion_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
|
196
|
+
response = ChatCompletionResponse(
|
|
197
|
+
id=completion_id,
|
|
198
|
+
object="chat.completion",
|
|
199
|
+
created=int(time.time()),
|
|
200
|
+
model=request.model,
|
|
201
|
+
choices=[
|
|
202
|
+
ChatCompletionChoice(
|
|
203
|
+
index=0,
|
|
204
|
+
message=Message(role="assistant", content=response_text),
|
|
205
|
+
finish_reason="stop",
|
|
206
|
+
)
|
|
207
|
+
],
|
|
208
|
+
usage=Usage(
|
|
209
|
+
prompt_tokens=len(prompt.split()),
|
|
210
|
+
completion_tokens=len(response_text.split()),
|
|
211
|
+
total_tokens=len(prompt.split()) + len(response_text.split()),
|
|
212
|
+
),
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return response
|
|
216
|
+
|
|
217
|
+
except Exception as e:
|
|
218
|
+
logger.error(f"Error in chat completion: {e}")
|
|
219
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
220
|
+
|
|
221
|
+
def _messages_to_prompt(self, messages: List[Message]) -> str:
|
|
222
|
+
"""Convert OpenAI messages format to a simple prompt"""
|
|
223
|
+
prompt_parts = []
|
|
224
|
+
|
|
225
|
+
for message in messages:
|
|
226
|
+
role = message.role
|
|
227
|
+
content = message.content
|
|
228
|
+
|
|
229
|
+
if role == "system":
|
|
230
|
+
prompt_parts.append(f"System: {content}")
|
|
231
|
+
elif role == "user":
|
|
232
|
+
prompt_parts.append(f"User: {content}")
|
|
233
|
+
elif role == "assistant":
|
|
234
|
+
prompt_parts.append(f"Assistant: {content}")
|
|
235
|
+
|
|
236
|
+
return "\n\n".join(prompt_parts)
|
|
237
|
+
|
|
238
|
+
async def _generate_response(self, request: ChatCompletionRequest, prompt: str) -> str:
|
|
239
|
+
"""Generate a response from the model"""
|
|
240
|
+
try:
|
|
241
|
+
# Use the lightweight model server if available
|
|
242
|
+
if hasattr(self.model_manager, "loaded_models"):
|
|
243
|
+
# Get the first loaded model or the requested model
|
|
244
|
+
model_name = request.model
|
|
245
|
+
available_models = list(self.model_manager.loaded_models.keys())
|
|
246
|
+
|
|
247
|
+
if not available_models:
|
|
248
|
+
# Try to auto-load the requested model
|
|
249
|
+
from .lightweight_model_server import LIGHTWEIGHT_MODELS
|
|
250
|
+
|
|
251
|
+
if model_name in LIGHTWEIGHT_MODELS:
|
|
252
|
+
logger.info(f"Auto-loading model: {model_name}")
|
|
253
|
+
success = self.model_manager.download_and_load_model(model_name)
|
|
254
|
+
if not success:
|
|
255
|
+
raise HTTPException(
|
|
256
|
+
status_code=500, detail=f"Failed to load model: {model_name}"
|
|
257
|
+
)
|
|
258
|
+
else:
|
|
259
|
+
raise HTTPException(
|
|
260
|
+
status_code=404,
|
|
261
|
+
detail=f"Model {model_name} not found. Available models: {list(LIGHTWEIGHT_MODELS.keys())}",
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Generate response (placeholder - would use actual model inference)
|
|
265
|
+
response = f"This is a response from MCLI model service using {model_name}. In a production environment, this would use the actual model for inference.\n\nYour prompt was: {prompt[:100]}..."
|
|
266
|
+
|
|
267
|
+
return response
|
|
268
|
+
else:
|
|
269
|
+
return "Model manager not properly initialized"
|
|
270
|
+
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.error(f"Error generating response: {e}")
|
|
273
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
274
|
+
|
|
275
|
+
async def _generate_stream(
|
|
276
|
+
self, request: ChatCompletionRequest, prompt: str
|
|
277
|
+
) -> AsyncGenerator[str, None]:
|
|
278
|
+
"""Generate a streaming response"""
|
|
279
|
+
completion_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
|
280
|
+
|
|
281
|
+
# Generate response
|
|
282
|
+
response_text = await self._generate_response(request, prompt)
|
|
283
|
+
|
|
284
|
+
# Stream the response word by word
|
|
285
|
+
words = response_text.split()
|
|
286
|
+
for i, word in enumerate(words):
|
|
287
|
+
chunk = {
|
|
288
|
+
"id": completion_id,
|
|
289
|
+
"object": "chat.completion.chunk",
|
|
290
|
+
"created": int(time.time()),
|
|
291
|
+
"model": request.model,
|
|
292
|
+
"choices": [
|
|
293
|
+
{
|
|
294
|
+
"index": 0,
|
|
295
|
+
"delta": {"content": word + " " if i < len(words) - 1 else word},
|
|
296
|
+
"finish_reason": None if i < len(words) - 1 else "stop",
|
|
297
|
+
}
|
|
298
|
+
],
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
yield f"data: {json.dumps(chunk)}\n\n"
|
|
302
|
+
|
|
303
|
+
# Send final message
|
|
304
|
+
yield "data: [DONE]\n\n"
|
|
305
|
+
|
|
306
|
+
async def verify_api_key(self, authorization: Optional[str] = Header(None)) -> str:
|
|
307
|
+
"""Verify API key from Authorization header"""
|
|
308
|
+
if not self.require_auth:
|
|
309
|
+
return "no-auth-required"
|
|
310
|
+
|
|
311
|
+
if not authorization:
|
|
312
|
+
raise HTTPException(
|
|
313
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
314
|
+
detail="Missing API key",
|
|
315
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Extract API key from "Bearer <key>" format
|
|
319
|
+
try:
|
|
320
|
+
scheme, key = authorization.split()
|
|
321
|
+
if scheme.lower() != "bearer":
|
|
322
|
+
raise HTTPException(
|
|
323
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
324
|
+
detail="Invalid authentication scheme",
|
|
325
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
326
|
+
)
|
|
327
|
+
except ValueError:
|
|
328
|
+
raise HTTPException(
|
|
329
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
330
|
+
detail="Invalid authorization header format",
|
|
331
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# Validate the API key
|
|
335
|
+
if not self.api_key_manager.validate_key(key):
|
|
336
|
+
raise HTTPException(
|
|
337
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
338
|
+
detail="Invalid API key",
|
|
339
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
return key
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def create_openai_adapter(model_manager, require_auth: bool = True) -> OpenAIAdapter:
|
|
346
|
+
"""Create an OpenAI adapter instance"""
|
|
347
|
+
return OpenAIAdapter(model_manager, require_auth)
|
|
@@ -238,10 +238,14 @@ class PersonWithSignificantControl:
|
|
|
238
238
|
|
|
239
239
|
# PSC details
|
|
240
240
|
name: str = ""
|
|
241
|
-
kind: str =
|
|
241
|
+
kind: str = (
|
|
242
|
+
"" # individual-person-with-significant-control, corporate-entity-person-with-significant-control, etc.
|
|
243
|
+
)
|
|
242
244
|
|
|
243
245
|
# Control nature
|
|
244
|
-
natures_of_control: List[str] = field(
|
|
246
|
+
natures_of_control: List[str] = field(
|
|
247
|
+
default_factory=list
|
|
248
|
+
) # ownership-of-shares-75-to-100-percent, etc.
|
|
245
249
|
notified_on: Optional[datetime] = None
|
|
246
250
|
|
|
247
251
|
# Personal details (may be redacted)
|
|
@@ -15,9 +15,9 @@ politician trading patterns, conflicts of interest, and asset declarations.
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import time
|
|
18
|
+
from base64 import b64encode
|
|
18
19
|
from datetime import datetime, timedelta
|
|
19
20
|
from typing import Dict, List, Optional
|
|
20
|
-
from base64 import b64encode
|
|
21
21
|
|
|
22
22
|
import requests
|
|
23
23
|
|
|
@@ -54,10 +54,9 @@ class UKCompaniesHouseScraper:
|
|
|
54
54
|
# API uses HTTP Basic Auth with API key as username, password empty
|
|
55
55
|
auth_string = f"{self.api_key}:"
|
|
56
56
|
auth_header = b64encode(auth_string.encode()).decode()
|
|
57
|
-
self.session.headers.update(
|
|
58
|
-
"Authorization": f"Basic {auth_header}",
|
|
59
|
-
|
|
60
|
-
})
|
|
57
|
+
self.session.headers.update(
|
|
58
|
+
{"Authorization": f"Basic {auth_header}", "User-Agent": "PoliticianTradingTracker/1.0"}
|
|
59
|
+
)
|
|
61
60
|
|
|
62
61
|
def search_companies(self, query: str, items_per_page: int = 20) -> List[Dict]:
|
|
63
62
|
"""
|
|
@@ -72,10 +71,7 @@ class UKCompaniesHouseScraper:
|
|
|
72
71
|
"""
|
|
73
72
|
try:
|
|
74
73
|
url = f"{self.BASE_URL}/search/companies"
|
|
75
|
-
params = {
|
|
76
|
-
"q": query,
|
|
77
|
-
"items_per_page": min(items_per_page, 100)
|
|
78
|
-
}
|
|
74
|
+
params = {"q": query, "items_per_page": min(items_per_page, 100)}
|
|
79
75
|
|
|
80
76
|
response = self.session.get(url, params=params, timeout=30)
|
|
81
77
|
response.raise_for_status()
|
|
@@ -198,10 +194,9 @@ class InfoFinanciereAPIScraper:
|
|
|
198
194
|
|
|
199
195
|
def __init__(self):
|
|
200
196
|
self.session = requests.Session()
|
|
201
|
-
self.session.headers.update(
|
|
202
|
-
"User-Agent": "PoliticianTradingTracker/1.0",
|
|
203
|
-
|
|
204
|
-
})
|
|
197
|
+
self.session.headers.update(
|
|
198
|
+
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
|
|
199
|
+
)
|
|
205
200
|
|
|
206
201
|
def search_publications(
|
|
207
202
|
self,
|
|
@@ -209,7 +204,7 @@ class InfoFinanciereAPIScraper:
|
|
|
209
204
|
from_date: Optional[str] = None,
|
|
210
205
|
to_date: Optional[str] = None,
|
|
211
206
|
page: int = 1,
|
|
212
|
-
per_page: int = 20
|
|
207
|
+
per_page: int = 20,
|
|
213
208
|
) -> List[Dict]:
|
|
214
209
|
"""
|
|
215
210
|
Search financial publications
|
|
@@ -226,10 +221,7 @@ class InfoFinanciereAPIScraper:
|
|
|
226
221
|
"""
|
|
227
222
|
try:
|
|
228
223
|
url = f"{self.BASE_URL}/publications"
|
|
229
|
-
params = {
|
|
230
|
-
"page": page,
|
|
231
|
-
"per_page": min(per_page, 100)
|
|
232
|
-
}
|
|
224
|
+
params = {"page": page, "per_page": min(per_page, 100)}
|
|
233
225
|
|
|
234
226
|
if query:
|
|
235
227
|
params["q"] = query
|
|
@@ -298,17 +290,12 @@ class OpenCorporatesScraper:
|
|
|
298
290
|
# API key is optional for free tier, but recommended
|
|
299
291
|
|
|
300
292
|
self.session = requests.Session()
|
|
301
|
-
self.session.headers.update(
|
|
302
|
-
"User-Agent": "PoliticianTradingTracker/1.0",
|
|
303
|
-
|
|
304
|
-
})
|
|
293
|
+
self.session.headers.update(
|
|
294
|
+
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
|
|
295
|
+
)
|
|
305
296
|
|
|
306
297
|
def search_companies(
|
|
307
|
-
self,
|
|
308
|
-
query: str,
|
|
309
|
-
jurisdiction_code: Optional[str] = None,
|
|
310
|
-
per_page: int = 30,
|
|
311
|
-
page: int = 1
|
|
298
|
+
self, query: str, jurisdiction_code: Optional[str] = None, per_page: int = 30, page: int = 1
|
|
312
299
|
) -> List[Dict]:
|
|
313
300
|
"""
|
|
314
301
|
Search for companies across jurisdictions
|
|
@@ -324,11 +311,7 @@ class OpenCorporatesScraper:
|
|
|
324
311
|
"""
|
|
325
312
|
try:
|
|
326
313
|
url = f"{self.BASE_URL}/companies/search"
|
|
327
|
-
params = {
|
|
328
|
-
"q": query,
|
|
329
|
-
"per_page": min(per_page, 100),
|
|
330
|
-
"page": page
|
|
331
|
-
}
|
|
314
|
+
params = {"q": query, "per_page": min(per_page, 100), "page": page}
|
|
332
315
|
|
|
333
316
|
if jurisdiction_code:
|
|
334
317
|
params["jurisdiction_code"] = jurisdiction_code
|
|
@@ -406,7 +389,9 @@ class OpenCorporatesScraper:
|
|
|
406
389
|
results = data.get("results", {})
|
|
407
390
|
officers = results.get("officers", [])
|
|
408
391
|
|
|
409
|
-
logger.info(
|
|
392
|
+
logger.info(
|
|
393
|
+
f"Found {len(officers)} officers for company {jurisdiction_code}/{company_number}"
|
|
394
|
+
)
|
|
410
395
|
return officers
|
|
411
396
|
|
|
412
397
|
except Exception as e:
|
|
@@ -432,10 +417,9 @@ class XBRLFilingsScraper:
|
|
|
432
417
|
|
|
433
418
|
def __init__(self):
|
|
434
419
|
self.session = requests.Session()
|
|
435
|
-
self.session.headers.update(
|
|
436
|
-
"User-Agent": "PoliticianTradingTracker/1.0",
|
|
437
|
-
|
|
438
|
-
})
|
|
420
|
+
self.session.headers.update(
|
|
421
|
+
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/vnd.api+json"}
|
|
422
|
+
)
|
|
439
423
|
|
|
440
424
|
def get_filings(
|
|
441
425
|
self,
|
|
@@ -443,7 +427,7 @@ class XBRLFilingsScraper:
|
|
|
443
427
|
from_date: Optional[str] = None,
|
|
444
428
|
to_date: Optional[str] = None,
|
|
445
429
|
page_number: int = 1,
|
|
446
|
-
page_size: int = 100
|
|
430
|
+
page_size: int = 100,
|
|
447
431
|
) -> List[Dict]:
|
|
448
432
|
"""
|
|
449
433
|
Get XBRL filings with filters
|
|
@@ -460,10 +444,7 @@ class XBRLFilingsScraper:
|
|
|
460
444
|
"""
|
|
461
445
|
try:
|
|
462
446
|
url = f"{self.BASE_URL}/filings"
|
|
463
|
-
params = {
|
|
464
|
-
"page[number]": page_number,
|
|
465
|
-
"page[size]": min(page_size, 500)
|
|
466
|
-
}
|
|
447
|
+
params = {"page[number]": page_number, "page[size]": min(page_size, 500)}
|
|
467
448
|
|
|
468
449
|
# Add filters using JSON:API filter syntax
|
|
469
450
|
if country:
|
|
@@ -487,10 +468,7 @@ class XBRLFilingsScraper:
|
|
|
487
468
|
return []
|
|
488
469
|
|
|
489
470
|
def get_entities(
|
|
490
|
-
self,
|
|
491
|
-
country: Optional[str] = None,
|
|
492
|
-
page_number: int = 1,
|
|
493
|
-
page_size: int = 100
|
|
471
|
+
self, country: Optional[str] = None, page_number: int = 1, page_size: int = 100
|
|
494
472
|
) -> List[Dict]:
|
|
495
473
|
"""
|
|
496
474
|
Get filing entities (companies)
|
|
@@ -505,10 +483,7 @@ class XBRLFilingsScraper:
|
|
|
505
483
|
"""
|
|
506
484
|
try:
|
|
507
485
|
url = f"{self.BASE_URL}/entities"
|
|
508
|
-
params = {
|
|
509
|
-
"page[number]": page_number,
|
|
510
|
-
"page[size]": min(page_size, 500)
|
|
511
|
-
}
|
|
486
|
+
params = {"page[number]": page_number, "page[size]": min(page_size, 500)}
|
|
512
487
|
|
|
513
488
|
if country:
|
|
514
489
|
params["filter[country]"] = country
|
|
@@ -553,10 +528,9 @@ class XBRLUSScraper:
|
|
|
553
528
|
)
|
|
554
529
|
|
|
555
530
|
self.session = requests.Session()
|
|
556
|
-
self.session.headers.update(
|
|
557
|
-
"User-Agent": "PoliticianTradingTracker/1.0",
|
|
558
|
-
|
|
559
|
-
})
|
|
531
|
+
self.session.headers.update(
|
|
532
|
+
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
|
|
533
|
+
)
|
|
560
534
|
|
|
561
535
|
def search_companies(self, query: str, limit: int = 100) -> List[Dict]:
|
|
562
536
|
"""
|
|
@@ -571,11 +545,7 @@ class XBRLUSScraper:
|
|
|
571
545
|
"""
|
|
572
546
|
try:
|
|
573
547
|
url = f"{self.BASE_URL}/entity/search"
|
|
574
|
-
params = {
|
|
575
|
-
"name": query,
|
|
576
|
-
"limit": min(limit, 2000),
|
|
577
|
-
"client_id": self.api_key
|
|
578
|
-
}
|
|
548
|
+
params = {"name": query, "limit": min(limit, 2000), "client_id": self.api_key}
|
|
579
549
|
|
|
580
550
|
response = self.session.get(url, params=params, timeout=30)
|
|
581
551
|
response.raise_for_status()
|
|
@@ -595,7 +565,7 @@ class XBRLUSScraper:
|
|
|
595
565
|
entity_id: int,
|
|
596
566
|
filing_date_from: Optional[str] = None,
|
|
597
567
|
filing_date_to: Optional[str] = None,
|
|
598
|
-
limit: int = 100
|
|
568
|
+
limit: int = 100,
|
|
599
569
|
) -> List[Dict]:
|
|
600
570
|
"""
|
|
601
571
|
Get filings for an entity
|
|
@@ -611,11 +581,7 @@ class XBRLUSScraper:
|
|
|
611
581
|
"""
|
|
612
582
|
try:
|
|
613
583
|
url = f"{self.BASE_URL}/filing/search"
|
|
614
|
-
params = {
|
|
615
|
-
"entity.id": entity_id,
|
|
616
|
-
"limit": min(limit, 2000),
|
|
617
|
-
"client_id": self.api_key
|
|
618
|
-
}
|
|
584
|
+
params = {"entity.id": entity_id, "limit": min(limit, 2000), "client_id": self.api_key}
|
|
619
585
|
|
|
620
586
|
if filing_date_from:
|
|
621
587
|
params["filing_date.from"] = filing_date_from
|
|
@@ -641,7 +607,7 @@ class XBRLUSScraper:
|
|
|
641
607
|
entity_id: Optional[int] = None,
|
|
642
608
|
period_end_from: Optional[str] = None,
|
|
643
609
|
period_end_to: Optional[str] = None,
|
|
644
|
-
limit: int = 100
|
|
610
|
+
limit: int = 100,
|
|
645
611
|
) -> List[Dict]:
|
|
646
612
|
"""
|
|
647
613
|
Get XBRL facts (financial data points)
|
|
@@ -661,7 +627,7 @@ class XBRLUSScraper:
|
|
|
661
627
|
params = {
|
|
662
628
|
"concept.local-name": concept_name,
|
|
663
629
|
"limit": min(limit, 2000),
|
|
664
|
-
"client_id": self.api_key
|
|
630
|
+
"client_id": self.api_key,
|
|
665
631
|
}
|
|
666
632
|
|
|
667
633
|
if entity_id:
|
|
@@ -699,7 +665,7 @@ class CorporateRegistryFetcher:
|
|
|
699
665
|
self,
|
|
700
666
|
uk_companies_house_key: Optional[str] = None,
|
|
701
667
|
opencorporates_key: Optional[str] = None,
|
|
702
|
-
xbrl_us_key: Optional[str] = None
|
|
668
|
+
xbrl_us_key: Optional[str] = None,
|
|
703
669
|
):
|
|
704
670
|
"""
|
|
705
671
|
Initialize fetcher with optional API keys
|
|
@@ -767,16 +733,10 @@ class CorporateRegistryFetcher:
|
|
|
767
733
|
f"{len(all_officers)} officers, {len(all_psc)} PSC records"
|
|
768
734
|
)
|
|
769
735
|
|
|
770
|
-
return {
|
|
771
|
-
"companies": companies,
|
|
772
|
-
"officers": all_officers,
|
|
773
|
-
"psc": all_psc
|
|
774
|
-
}
|
|
736
|
+
return {"companies": companies, "officers": all_officers, "psc": all_psc}
|
|
775
737
|
|
|
776
738
|
def fetch_french_disclosures(
|
|
777
|
-
self,
|
|
778
|
-
query: Optional[str] = None,
|
|
779
|
-
days_back: int = 30
|
|
739
|
+
self, query: Optional[str] = None, days_back: int = 30
|
|
780
740
|
) -> List[Dict]:
|
|
781
741
|
"""
|
|
782
742
|
Fetch French financial disclosures
|
|
@@ -794,19 +754,14 @@ class CorporateRegistryFetcher:
|
|
|
794
754
|
to_date = datetime.now().strftime("%Y-%m-%d")
|
|
795
755
|
|
|
796
756
|
publications = self.info_financiere.search_publications(
|
|
797
|
-
query=query,
|
|
798
|
-
from_date=from_date,
|
|
799
|
-
to_date=to_date,
|
|
800
|
-
per_page=100
|
|
757
|
+
query=query, from_date=from_date, to_date=to_date, per_page=100
|
|
801
758
|
)
|
|
802
759
|
|
|
803
760
|
logger.info(f"Fetched {len(publications)} French publications")
|
|
804
761
|
return publications
|
|
805
762
|
|
|
806
763
|
def fetch_xbrl_eu_filings(
|
|
807
|
-
self,
|
|
808
|
-
country: Optional[str] = None,
|
|
809
|
-
days_back: int = 30
|
|
764
|
+
self, country: Optional[str] = None, days_back: int = 30
|
|
810
765
|
) -> List[Dict]:
|
|
811
766
|
"""
|
|
812
767
|
Fetch EU/UK XBRL filings
|
|
@@ -822,11 +777,7 @@ class CorporateRegistryFetcher:
|
|
|
822
777
|
|
|
823
778
|
from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
|
|
824
779
|
|
|
825
|
-
filings = self.xbrl_filings.get_filings(
|
|
826
|
-
country=country,
|
|
827
|
-
from_date=from_date,
|
|
828
|
-
page_size=100
|
|
829
|
-
)
|
|
780
|
+
filings = self.xbrl_filings.get_filings(country=country, from_date=from_date, page_size=100)
|
|
830
781
|
|
|
831
782
|
logger.info(f"Fetched {len(filings)} XBRL filings")
|
|
832
783
|
return filings
|