mcli-framework 7.6.0__py3-none-any.whl → 7.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/commands_cmd.py +51 -39
- mcli/app/main.py +10 -2
- mcli/app/model_cmd.py +1 -1
- mcli/lib/custom_commands.py +4 -10
- mcli/ml/api/app.py +1 -5
- mcli/ml/dashboard/app.py +2 -2
- mcli/ml/dashboard/app_integrated.py +168 -116
- mcli/ml/dashboard/app_supabase.py +7 -3
- mcli/ml/dashboard/app_training.py +3 -6
- mcli/ml/dashboard/components/charts.py +74 -115
- mcli/ml/dashboard/components/metrics.py +24 -44
- mcli/ml/dashboard/components/tables.py +32 -40
- mcli/ml/dashboard/overview.py +102 -78
- mcli/ml/dashboard/pages/cicd.py +103 -56
- mcli/ml/dashboard/pages/debug_dependencies.py +35 -28
- mcli/ml/dashboard/pages/gravity_viz.py +374 -313
- mcli/ml/dashboard/pages/monte_carlo_predictions.py +50 -48
- mcli/ml/dashboard/pages/predictions_enhanced.py +396 -248
- mcli/ml/dashboard/pages/scrapers_and_logs.py +299 -273
- mcli/ml/dashboard/pages/test_portfolio.py +153 -121
- mcli/ml/dashboard/pages/trading.py +238 -169
- mcli/ml/dashboard/pages/workflows.py +129 -84
- mcli/ml/dashboard/streamlit_extras_utils.py +70 -79
- mcli/ml/dashboard/utils.py +24 -21
- mcli/ml/dashboard/warning_suppression.py +6 -4
- mcli/ml/database/session.py +16 -5
- mcli/ml/mlops/pipeline_orchestrator.py +1 -3
- mcli/ml/predictions/monte_carlo.py +6 -18
- mcli/ml/trading/alpaca_client.py +95 -96
- mcli/ml/trading/migrations.py +76 -40
- mcli/ml/trading/models.py +78 -60
- mcli/ml/trading/paper_trading.py +92 -74
- mcli/ml/trading/risk_management.py +106 -85
- mcli/ml/trading/trading_service.py +155 -110
- mcli/ml/training/train_model.py +1 -3
- mcli/self/self_cmd.py +71 -57
- mcli/workflow/daemon/daemon.py +2 -0
- mcli/workflow/model_service/openai_adapter.py +6 -2
- mcli/workflow/politician_trading/models.py +6 -2
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +39 -88
- mcli/workflow/politician_trading/scrapers_free_sources.py +32 -39
- mcli/workflow/politician_trading/scrapers_third_party.py +21 -39
- mcli/workflow/politician_trading/seed_database.py +70 -89
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.1.dist-info}/METADATA +1 -1
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.1.dist-info}/RECORD +49 -49
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.1.dist-info}/WHEEL +0 -0
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.1.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.1.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.6.0.dist-info → mcli_framework-7.6.1.dist-info}/top_level.txt +0 -0
mcli/self/self_cmd.py
CHANGED
|
@@ -29,8 +29,8 @@ try:
|
|
|
29
29
|
except ImportError:
|
|
30
30
|
process = None
|
|
31
31
|
|
|
32
|
-
from mcli.lib.logger.logger import get_logger
|
|
33
32
|
from mcli.lib.custom_commands import get_command_manager
|
|
33
|
+
from mcli.lib.logger.logger import get_logger
|
|
34
34
|
|
|
35
35
|
logger = get_logger()
|
|
36
36
|
|
|
@@ -345,29 +345,35 @@ def collect_commands() -> List[Dict[str, Any]]:
|
|
|
345
345
|
|
|
346
346
|
try:
|
|
347
347
|
# Suppress Streamlit warnings and logging during module import
|
|
348
|
-
import warnings
|
|
349
348
|
import logging
|
|
350
|
-
import sys
|
|
351
349
|
import os
|
|
350
|
+
import sys
|
|
351
|
+
import warnings
|
|
352
352
|
from contextlib import redirect_stderr
|
|
353
353
|
from io import StringIO
|
|
354
|
-
|
|
354
|
+
|
|
355
355
|
# Suppress Python warnings
|
|
356
356
|
with warnings.catch_warnings():
|
|
357
357
|
warnings.filterwarnings("ignore", message=".*missing ScriptRunContext.*")
|
|
358
358
|
warnings.filterwarnings("ignore", message=".*No runtime found.*")
|
|
359
|
-
warnings.filterwarnings(
|
|
359
|
+
warnings.filterwarnings(
|
|
360
|
+
"ignore", message=".*Session state does not function.*"
|
|
361
|
+
)
|
|
360
362
|
warnings.filterwarnings("ignore", message=".*to view this Streamlit app.*")
|
|
361
|
-
|
|
363
|
+
|
|
362
364
|
# Suppress Streamlit logger warnings
|
|
363
365
|
streamlit_logger = logging.getLogger("streamlit")
|
|
364
366
|
original_level = streamlit_logger.level
|
|
365
367
|
streamlit_logger.setLevel(logging.CRITICAL)
|
|
366
|
-
|
|
368
|
+
|
|
367
369
|
# Also suppress specific Streamlit sub-loggers
|
|
368
|
-
logging.getLogger(
|
|
369
|
-
|
|
370
|
-
|
|
370
|
+
logging.getLogger(
|
|
371
|
+
"streamlit.runtime.scriptrunner_utils.script_run_context"
|
|
372
|
+
).setLevel(logging.CRITICAL)
|
|
373
|
+
logging.getLogger("streamlit.runtime.caching.cache_data_api").setLevel(
|
|
374
|
+
logging.CRITICAL
|
|
375
|
+
)
|
|
376
|
+
|
|
371
377
|
# Redirect stderr to suppress Streamlit warnings
|
|
372
378
|
with redirect_stderr(StringIO()):
|
|
373
379
|
try:
|
|
@@ -417,40 +423,44 @@ def collect_commands() -> List[Dict[str, Any]]:
|
|
|
417
423
|
return commands
|
|
418
424
|
|
|
419
425
|
|
|
420
|
-
def open_editor_for_command(
|
|
426
|
+
def open_editor_for_command(
|
|
427
|
+
command_name: str, command_group: str, description: str
|
|
428
|
+
) -> Optional[str]:
|
|
421
429
|
"""
|
|
422
430
|
Open the user's default editor to allow them to write command logic.
|
|
423
|
-
|
|
431
|
+
|
|
424
432
|
Args:
|
|
425
433
|
command_name: Name of the command
|
|
426
434
|
command_group: Group for the command
|
|
427
435
|
description: Description of the command
|
|
428
|
-
|
|
436
|
+
|
|
429
437
|
Returns:
|
|
430
438
|
The Python code written by the user, or None if cancelled
|
|
431
439
|
"""
|
|
432
|
-
import tempfile
|
|
433
|
-
import subprocess
|
|
434
440
|
import os
|
|
441
|
+
import subprocess
|
|
435
442
|
import sys
|
|
443
|
+
import tempfile
|
|
436
444
|
from pathlib import Path
|
|
437
|
-
|
|
445
|
+
|
|
438
446
|
# Get the user's default editor
|
|
439
|
-
editor = os.environ.get(
|
|
447
|
+
editor = os.environ.get("EDITOR")
|
|
440
448
|
if not editor:
|
|
441
449
|
# Try common editors in order of preference
|
|
442
|
-
for common_editor in [
|
|
443
|
-
if subprocess.run([
|
|
450
|
+
for common_editor in ["vim", "nano", "code", "subl", "atom", "emacs"]:
|
|
451
|
+
if subprocess.run(["which", common_editor], capture_output=True).returncode == 0:
|
|
444
452
|
editor = common_editor
|
|
445
453
|
break
|
|
446
|
-
|
|
454
|
+
|
|
447
455
|
if not editor:
|
|
448
|
-
click.echo(
|
|
456
|
+
click.echo(
|
|
457
|
+
"❌ No editor found. Please set the EDITOR environment variable or install vim/nano."
|
|
458
|
+
)
|
|
449
459
|
return None
|
|
450
|
-
|
|
460
|
+
|
|
451
461
|
# Create a temporary file with the template
|
|
452
462
|
template = get_command_template(command_name, command_group)
|
|
453
|
-
|
|
463
|
+
|
|
454
464
|
# Add helpful comments to the template
|
|
455
465
|
enhanced_template = f'''"""
|
|
456
466
|
{command_name} command for mcli.{command_group}.
|
|
@@ -491,64 +501,66 @@ logger = get_logger()
|
|
|
491
501
|
# logger.info(f"Executing {command_name} command with name: {{name}}")
|
|
492
502
|
# click.echo(f"Hello, {{name}}! This is the {command_name} command.")
|
|
493
503
|
'''
|
|
494
|
-
|
|
504
|
+
|
|
495
505
|
# Create temporary file
|
|
496
|
-
with tempfile.NamedTemporaryFile(mode=
|
|
506
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as temp_file:
|
|
497
507
|
temp_file.write(enhanced_template)
|
|
498
508
|
temp_file_path = temp_file.name
|
|
499
|
-
|
|
509
|
+
|
|
500
510
|
try:
|
|
501
511
|
# Check if we're in an interactive environment
|
|
502
512
|
if not sys.stdin.isatty() or not sys.stdout.isatty():
|
|
503
|
-
click.echo(
|
|
513
|
+
click.echo(
|
|
514
|
+
"❌ Editor requires an interactive terminal. Use --template flag for non-interactive mode."
|
|
515
|
+
)
|
|
504
516
|
return None
|
|
505
|
-
|
|
517
|
+
|
|
506
518
|
# Open editor
|
|
507
519
|
click.echo(f"📝 Opening {editor} to edit command logic...")
|
|
508
520
|
click.echo("💡 Write your Python command logic and save the file to continue.")
|
|
509
521
|
click.echo("💡 Press Ctrl+C to cancel command creation.")
|
|
510
|
-
|
|
522
|
+
|
|
511
523
|
# Run the editor
|
|
512
524
|
result = subprocess.run([editor, temp_file_path], check=False)
|
|
513
|
-
|
|
525
|
+
|
|
514
526
|
if result.returncode != 0:
|
|
515
527
|
click.echo("❌ Editor exited with error. Command creation cancelled.")
|
|
516
528
|
return None
|
|
517
|
-
|
|
529
|
+
|
|
518
530
|
# Read the edited content
|
|
519
|
-
with open(temp_file_path,
|
|
531
|
+
with open(temp_file_path, "r") as f:
|
|
520
532
|
edited_code = f.read()
|
|
521
|
-
|
|
533
|
+
|
|
522
534
|
# Check if the file was actually edited (not just the template)
|
|
523
535
|
if edited_code.strip() == enhanced_template.strip():
|
|
524
536
|
click.echo("⚠️ No changes detected. Command creation cancelled.")
|
|
525
537
|
return None
|
|
526
|
-
|
|
538
|
+
|
|
527
539
|
# Extract the actual command code (remove the instructions)
|
|
528
|
-
lines = edited_code.split(
|
|
540
|
+
lines = edited_code.split("\n")
|
|
529
541
|
code_lines = []
|
|
530
542
|
in_code_section = False
|
|
531
|
-
|
|
543
|
+
|
|
532
544
|
for line in lines:
|
|
533
|
-
if line.strip().startswith(
|
|
545
|
+
if line.strip().startswith("# Your command implementation goes here:"):
|
|
534
546
|
in_code_section = True
|
|
535
547
|
continue
|
|
536
548
|
if in_code_section:
|
|
537
549
|
code_lines.append(line)
|
|
538
|
-
|
|
550
|
+
|
|
539
551
|
if not code_lines or not any(line.strip() for line in code_lines):
|
|
540
552
|
# Fallback: use the entire file content
|
|
541
553
|
code_lines = lines
|
|
542
|
-
|
|
543
|
-
final_code =
|
|
544
|
-
|
|
554
|
+
|
|
555
|
+
final_code = "\n".join(code_lines).strip()
|
|
556
|
+
|
|
545
557
|
if not final_code:
|
|
546
558
|
click.echo("❌ No command code found. Command creation cancelled.")
|
|
547
559
|
return None
|
|
548
|
-
|
|
560
|
+
|
|
549
561
|
click.echo("✅ Command code captured successfully!")
|
|
550
562
|
return final_code
|
|
551
|
-
|
|
563
|
+
|
|
552
564
|
except KeyboardInterrupt:
|
|
553
565
|
click.echo("\n❌ Command creation cancelled by user.")
|
|
554
566
|
return None
|
|
@@ -611,7 +623,9 @@ def extract_workflow_commands(output):
|
|
|
611
623
|
|
|
612
624
|
if isinstance(cmd_obj, click.Group):
|
|
613
625
|
# For groups, create a template
|
|
614
|
-
command_info[
|
|
626
|
+
command_info[
|
|
627
|
+
"code"
|
|
628
|
+
] = f'''"""
|
|
615
629
|
{cmd_name} workflow command.
|
|
616
630
|
"""
|
|
617
631
|
import click
|
|
@@ -625,7 +639,9 @@ def app():
|
|
|
625
639
|
'''
|
|
626
640
|
else:
|
|
627
641
|
# For regular commands, create a template
|
|
628
|
-
command_info[
|
|
642
|
+
command_info[
|
|
643
|
+
"code"
|
|
644
|
+
] = f'''"""
|
|
629
645
|
{cmd_name} workflow command.
|
|
630
646
|
"""
|
|
631
647
|
import click
|
|
@@ -650,9 +666,7 @@ def app():
|
|
|
650
666
|
click.echo(
|
|
651
667
|
f"\n💡 These are templates. Import with: mcli self import-commands {output_file}"
|
|
652
668
|
)
|
|
653
|
-
click.echo(
|
|
654
|
-
" Then customize the code in ~/.mcli/commands/<command>.json"
|
|
655
|
-
)
|
|
669
|
+
click.echo(" Then customize the code in ~/.mcli/commands/<command>.json")
|
|
656
670
|
return 0
|
|
657
671
|
else:
|
|
658
672
|
click.echo("⚠️ No workflow commands found to extract")
|
|
@@ -1229,24 +1243,20 @@ def update(check: bool, pre: bool, yes: bool, skip_ci_check: bool):
|
|
|
1229
1243
|
|
|
1230
1244
|
console.print(f"[dim]{traceback.format_exc()}[/dim]")
|
|
1231
1245
|
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
1246
|
# Validate syntax
|
|
1235
1247
|
try:
|
|
1236
|
-
compile(new_code,
|
|
1248
|
+
compile(new_code, "<string>", "exec")
|
|
1237
1249
|
except SyntaxError as e:
|
|
1238
1250
|
click.echo(f"❌ Syntax error in edited code: {e}", err=True)
|
|
1239
|
-
should_save = Prompt.ask(
|
|
1240
|
-
"Save anyway?", choices=["y", "n"], default="n"
|
|
1241
|
-
)
|
|
1251
|
+
should_save = Prompt.ask("Save anyway?", choices=["y", "n"], default="n")
|
|
1242
1252
|
if should_save.lower() != "y":
|
|
1243
1253
|
return 1
|
|
1244
1254
|
|
|
1245
1255
|
# Update the command
|
|
1246
|
-
command_data[
|
|
1247
|
-
command_data[
|
|
1256
|
+
command_data["code"] = new_code
|
|
1257
|
+
command_data["updated_at"] = datetime.now().isoformat()
|
|
1248
1258
|
|
|
1249
|
-
with open(command_file,
|
|
1259
|
+
with open(command_file, "w") as f:
|
|
1250
1260
|
json.dump(command_data, f, indent=2)
|
|
1251
1261
|
|
|
1252
1262
|
# Update lockfile
|
|
@@ -1268,6 +1278,7 @@ self_app.add_command(plugin)
|
|
|
1268
1278
|
# Import and register new commands that have been moved to self
|
|
1269
1279
|
try:
|
|
1270
1280
|
from mcli.self.completion_cmd import completion
|
|
1281
|
+
|
|
1271
1282
|
self_app.add_command(completion, name="completion")
|
|
1272
1283
|
logger.debug("Added completion command to self group")
|
|
1273
1284
|
except ImportError as e:
|
|
@@ -1275,6 +1286,7 @@ except ImportError as e:
|
|
|
1275
1286
|
|
|
1276
1287
|
try:
|
|
1277
1288
|
from mcli.self.logs_cmd import logs_group
|
|
1289
|
+
|
|
1278
1290
|
self_app.add_command(logs_group, name="logs")
|
|
1279
1291
|
logger.debug("Added logs command to self group")
|
|
1280
1292
|
except ImportError as e:
|
|
@@ -1282,6 +1294,7 @@ except ImportError as e:
|
|
|
1282
1294
|
|
|
1283
1295
|
try:
|
|
1284
1296
|
from mcli.self.redis_cmd import redis_group
|
|
1297
|
+
|
|
1285
1298
|
self_app.add_command(redis_group, name="redis")
|
|
1286
1299
|
logger.debug("Added redis command to self group")
|
|
1287
1300
|
except ImportError as e:
|
|
@@ -1289,6 +1302,7 @@ except ImportError as e:
|
|
|
1289
1302
|
|
|
1290
1303
|
try:
|
|
1291
1304
|
from mcli.self.visual_cmd import visual
|
|
1305
|
+
|
|
1292
1306
|
self_app.add_command(visual, name="visual")
|
|
1293
1307
|
logger.debug("Added visual command to self group")
|
|
1294
1308
|
except ImportError as e:
|
mcli/workflow/daemon/daemon.py
CHANGED
|
@@ -20,6 +20,7 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
|
20
20
|
try:
|
|
21
21
|
from watchdog.events import FileSystemEventHandler
|
|
22
22
|
from watchdog.observers import Observer
|
|
23
|
+
|
|
23
24
|
HAS_WATCHDOG = True
|
|
24
25
|
except ImportError:
|
|
25
26
|
# Watchdog not available, file watching will be disabled
|
|
@@ -40,6 +41,7 @@ class CommandDatabase:
|
|
|
40
41
|
"""Stub database for backward compatibility.
|
|
41
42
|
Commands are now stored as JSON files and loaded via the custom commands system.
|
|
42
43
|
"""
|
|
44
|
+
|
|
43
45
|
def __init__(self, db_path: Optional[str] = None):
|
|
44
46
|
logger.debug("CommandDatabase stub initialized - commands now managed via JSON files")
|
|
45
47
|
pass
|
|
@@ -10,7 +10,7 @@ import uuid
|
|
|
10
10
|
from datetime import datetime
|
|
11
11
|
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
12
12
|
|
|
13
|
-
from fastapi import APIRouter, Depends,
|
|
13
|
+
from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
|
|
14
14
|
from fastapi.responses import StreamingResponse
|
|
15
15
|
from pydantic import BaseModel, Field
|
|
16
16
|
|
|
@@ -116,7 +116,11 @@ class APIKeyManager:
|
|
|
116
116
|
def list_keys(self) -> List[Dict[str, Any]]:
|
|
117
117
|
"""List all API keys (without showing the actual key)"""
|
|
118
118
|
return [
|
|
119
|
-
{
|
|
119
|
+
{
|
|
120
|
+
"name": info["name"],
|
|
121
|
+
"created_at": info["created_at"],
|
|
122
|
+
"usage_count": info["usage_count"],
|
|
123
|
+
}
|
|
120
124
|
for info in self.valid_keys.values()
|
|
121
125
|
]
|
|
122
126
|
|
|
@@ -238,10 +238,14 @@ class PersonWithSignificantControl:
|
|
|
238
238
|
|
|
239
239
|
# PSC details
|
|
240
240
|
name: str = ""
|
|
241
|
-
kind: str =
|
|
241
|
+
kind: str = (
|
|
242
|
+
"" # individual-person-with-significant-control, corporate-entity-person-with-significant-control, etc.
|
|
243
|
+
)
|
|
242
244
|
|
|
243
245
|
# Control nature
|
|
244
|
-
natures_of_control: List[str] = field(
|
|
246
|
+
natures_of_control: List[str] = field(
|
|
247
|
+
default_factory=list
|
|
248
|
+
) # ownership-of-shares-75-to-100-percent, etc.
|
|
245
249
|
notified_on: Optional[datetime] = None
|
|
246
250
|
|
|
247
251
|
# Personal details (may be redacted)
|
|
@@ -15,9 +15,9 @@ politician trading patterns, conflicts of interest, and asset declarations.
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import time
|
|
18
|
+
from base64 import b64encode
|
|
18
19
|
from datetime import datetime, timedelta
|
|
19
20
|
from typing import Dict, List, Optional
|
|
20
|
-
from base64 import b64encode
|
|
21
21
|
|
|
22
22
|
import requests
|
|
23
23
|
|
|
@@ -54,10 +54,9 @@ class UKCompaniesHouseScraper:
|
|
|
54
54
|
# API uses HTTP Basic Auth with API key as username, password empty
|
|
55
55
|
auth_string = f"{self.api_key}:"
|
|
56
56
|
auth_header = b64encode(auth_string.encode()).decode()
|
|
57
|
-
self.session.headers.update(
|
|
58
|
-
"Authorization": f"Basic {auth_header}",
|
|
59
|
-
|
|
60
|
-
})
|
|
57
|
+
self.session.headers.update(
|
|
58
|
+
{"Authorization": f"Basic {auth_header}", "User-Agent": "PoliticianTradingTracker/1.0"}
|
|
59
|
+
)
|
|
61
60
|
|
|
62
61
|
def search_companies(self, query: str, items_per_page: int = 20) -> List[Dict]:
|
|
63
62
|
"""
|
|
@@ -72,10 +71,7 @@ class UKCompaniesHouseScraper:
|
|
|
72
71
|
"""
|
|
73
72
|
try:
|
|
74
73
|
url = f"{self.BASE_URL}/search/companies"
|
|
75
|
-
params = {
|
|
76
|
-
"q": query,
|
|
77
|
-
"items_per_page": min(items_per_page, 100)
|
|
78
|
-
}
|
|
74
|
+
params = {"q": query, "items_per_page": min(items_per_page, 100)}
|
|
79
75
|
|
|
80
76
|
response = self.session.get(url, params=params, timeout=30)
|
|
81
77
|
response.raise_for_status()
|
|
@@ -198,10 +194,9 @@ class InfoFinanciereAPIScraper:
|
|
|
198
194
|
|
|
199
195
|
def __init__(self):
|
|
200
196
|
self.session = requests.Session()
|
|
201
|
-
self.session.headers.update(
|
|
202
|
-
"User-Agent": "PoliticianTradingTracker/1.0",
|
|
203
|
-
|
|
204
|
-
})
|
|
197
|
+
self.session.headers.update(
|
|
198
|
+
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
|
|
199
|
+
)
|
|
205
200
|
|
|
206
201
|
def search_publications(
|
|
207
202
|
self,
|
|
@@ -209,7 +204,7 @@ class InfoFinanciereAPIScraper:
|
|
|
209
204
|
from_date: Optional[str] = None,
|
|
210
205
|
to_date: Optional[str] = None,
|
|
211
206
|
page: int = 1,
|
|
212
|
-
per_page: int = 20
|
|
207
|
+
per_page: int = 20,
|
|
213
208
|
) -> List[Dict]:
|
|
214
209
|
"""
|
|
215
210
|
Search financial publications
|
|
@@ -226,10 +221,7 @@ class InfoFinanciereAPIScraper:
|
|
|
226
221
|
"""
|
|
227
222
|
try:
|
|
228
223
|
url = f"{self.BASE_URL}/publications"
|
|
229
|
-
params = {
|
|
230
|
-
"page": page,
|
|
231
|
-
"per_page": min(per_page, 100)
|
|
232
|
-
}
|
|
224
|
+
params = {"page": page, "per_page": min(per_page, 100)}
|
|
233
225
|
|
|
234
226
|
if query:
|
|
235
227
|
params["q"] = query
|
|
@@ -298,17 +290,12 @@ class OpenCorporatesScraper:
|
|
|
298
290
|
# API key is optional for free tier, but recommended
|
|
299
291
|
|
|
300
292
|
self.session = requests.Session()
|
|
301
|
-
self.session.headers.update(
|
|
302
|
-
"User-Agent": "PoliticianTradingTracker/1.0",
|
|
303
|
-
|
|
304
|
-
})
|
|
293
|
+
self.session.headers.update(
|
|
294
|
+
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
|
|
295
|
+
)
|
|
305
296
|
|
|
306
297
|
def search_companies(
|
|
307
|
-
self,
|
|
308
|
-
query: str,
|
|
309
|
-
jurisdiction_code: Optional[str] = None,
|
|
310
|
-
per_page: int = 30,
|
|
311
|
-
page: int = 1
|
|
298
|
+
self, query: str, jurisdiction_code: Optional[str] = None, per_page: int = 30, page: int = 1
|
|
312
299
|
) -> List[Dict]:
|
|
313
300
|
"""
|
|
314
301
|
Search for companies across jurisdictions
|
|
@@ -324,11 +311,7 @@ class OpenCorporatesScraper:
|
|
|
324
311
|
"""
|
|
325
312
|
try:
|
|
326
313
|
url = f"{self.BASE_URL}/companies/search"
|
|
327
|
-
params = {
|
|
328
|
-
"q": query,
|
|
329
|
-
"per_page": min(per_page, 100),
|
|
330
|
-
"page": page
|
|
331
|
-
}
|
|
314
|
+
params = {"q": query, "per_page": min(per_page, 100), "page": page}
|
|
332
315
|
|
|
333
316
|
if jurisdiction_code:
|
|
334
317
|
params["jurisdiction_code"] = jurisdiction_code
|
|
@@ -406,7 +389,9 @@ class OpenCorporatesScraper:
|
|
|
406
389
|
results = data.get("results", {})
|
|
407
390
|
officers = results.get("officers", [])
|
|
408
391
|
|
|
409
|
-
logger.info(
|
|
392
|
+
logger.info(
|
|
393
|
+
f"Found {len(officers)} officers for company {jurisdiction_code}/{company_number}"
|
|
394
|
+
)
|
|
410
395
|
return officers
|
|
411
396
|
|
|
412
397
|
except Exception as e:
|
|
@@ -432,10 +417,9 @@ class XBRLFilingsScraper:
|
|
|
432
417
|
|
|
433
418
|
def __init__(self):
|
|
434
419
|
self.session = requests.Session()
|
|
435
|
-
self.session.headers.update(
|
|
436
|
-
"User-Agent": "PoliticianTradingTracker/1.0",
|
|
437
|
-
|
|
438
|
-
})
|
|
420
|
+
self.session.headers.update(
|
|
421
|
+
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/vnd.api+json"}
|
|
422
|
+
)
|
|
439
423
|
|
|
440
424
|
def get_filings(
|
|
441
425
|
self,
|
|
@@ -443,7 +427,7 @@ class XBRLFilingsScraper:
|
|
|
443
427
|
from_date: Optional[str] = None,
|
|
444
428
|
to_date: Optional[str] = None,
|
|
445
429
|
page_number: int = 1,
|
|
446
|
-
page_size: int = 100
|
|
430
|
+
page_size: int = 100,
|
|
447
431
|
) -> List[Dict]:
|
|
448
432
|
"""
|
|
449
433
|
Get XBRL filings with filters
|
|
@@ -460,10 +444,7 @@ class XBRLFilingsScraper:
|
|
|
460
444
|
"""
|
|
461
445
|
try:
|
|
462
446
|
url = f"{self.BASE_URL}/filings"
|
|
463
|
-
params = {
|
|
464
|
-
"page[number]": page_number,
|
|
465
|
-
"page[size]": min(page_size, 500)
|
|
466
|
-
}
|
|
447
|
+
params = {"page[number]": page_number, "page[size]": min(page_size, 500)}
|
|
467
448
|
|
|
468
449
|
# Add filters using JSON:API filter syntax
|
|
469
450
|
if country:
|
|
@@ -487,10 +468,7 @@ class XBRLFilingsScraper:
|
|
|
487
468
|
return []
|
|
488
469
|
|
|
489
470
|
def get_entities(
|
|
490
|
-
self,
|
|
491
|
-
country: Optional[str] = None,
|
|
492
|
-
page_number: int = 1,
|
|
493
|
-
page_size: int = 100
|
|
471
|
+
self, country: Optional[str] = None, page_number: int = 1, page_size: int = 100
|
|
494
472
|
) -> List[Dict]:
|
|
495
473
|
"""
|
|
496
474
|
Get filing entities (companies)
|
|
@@ -505,10 +483,7 @@ class XBRLFilingsScraper:
|
|
|
505
483
|
"""
|
|
506
484
|
try:
|
|
507
485
|
url = f"{self.BASE_URL}/entities"
|
|
508
|
-
params = {
|
|
509
|
-
"page[number]": page_number,
|
|
510
|
-
"page[size]": min(page_size, 500)
|
|
511
|
-
}
|
|
486
|
+
params = {"page[number]": page_number, "page[size]": min(page_size, 500)}
|
|
512
487
|
|
|
513
488
|
if country:
|
|
514
489
|
params["filter[country]"] = country
|
|
@@ -553,10 +528,9 @@ class XBRLUSScraper:
|
|
|
553
528
|
)
|
|
554
529
|
|
|
555
530
|
self.session = requests.Session()
|
|
556
|
-
self.session.headers.update(
|
|
557
|
-
"User-Agent": "PoliticianTradingTracker/1.0",
|
|
558
|
-
|
|
559
|
-
})
|
|
531
|
+
self.session.headers.update(
|
|
532
|
+
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
|
|
533
|
+
)
|
|
560
534
|
|
|
561
535
|
def search_companies(self, query: str, limit: int = 100) -> List[Dict]:
|
|
562
536
|
"""
|
|
@@ -571,11 +545,7 @@ class XBRLUSScraper:
|
|
|
571
545
|
"""
|
|
572
546
|
try:
|
|
573
547
|
url = f"{self.BASE_URL}/entity/search"
|
|
574
|
-
params = {
|
|
575
|
-
"name": query,
|
|
576
|
-
"limit": min(limit, 2000),
|
|
577
|
-
"client_id": self.api_key
|
|
578
|
-
}
|
|
548
|
+
params = {"name": query, "limit": min(limit, 2000), "client_id": self.api_key}
|
|
579
549
|
|
|
580
550
|
response = self.session.get(url, params=params, timeout=30)
|
|
581
551
|
response.raise_for_status()
|
|
@@ -595,7 +565,7 @@ class XBRLUSScraper:
|
|
|
595
565
|
entity_id: int,
|
|
596
566
|
filing_date_from: Optional[str] = None,
|
|
597
567
|
filing_date_to: Optional[str] = None,
|
|
598
|
-
limit: int = 100
|
|
568
|
+
limit: int = 100,
|
|
599
569
|
) -> List[Dict]:
|
|
600
570
|
"""
|
|
601
571
|
Get filings for an entity
|
|
@@ -611,11 +581,7 @@ class XBRLUSScraper:
|
|
|
611
581
|
"""
|
|
612
582
|
try:
|
|
613
583
|
url = f"{self.BASE_URL}/filing/search"
|
|
614
|
-
params = {
|
|
615
|
-
"entity.id": entity_id,
|
|
616
|
-
"limit": min(limit, 2000),
|
|
617
|
-
"client_id": self.api_key
|
|
618
|
-
}
|
|
584
|
+
params = {"entity.id": entity_id, "limit": min(limit, 2000), "client_id": self.api_key}
|
|
619
585
|
|
|
620
586
|
if filing_date_from:
|
|
621
587
|
params["filing_date.from"] = filing_date_from
|
|
@@ -641,7 +607,7 @@ class XBRLUSScraper:
|
|
|
641
607
|
entity_id: Optional[int] = None,
|
|
642
608
|
period_end_from: Optional[str] = None,
|
|
643
609
|
period_end_to: Optional[str] = None,
|
|
644
|
-
limit: int = 100
|
|
610
|
+
limit: int = 100,
|
|
645
611
|
) -> List[Dict]:
|
|
646
612
|
"""
|
|
647
613
|
Get XBRL facts (financial data points)
|
|
@@ -661,7 +627,7 @@ class XBRLUSScraper:
|
|
|
661
627
|
params = {
|
|
662
628
|
"concept.local-name": concept_name,
|
|
663
629
|
"limit": min(limit, 2000),
|
|
664
|
-
"client_id": self.api_key
|
|
630
|
+
"client_id": self.api_key,
|
|
665
631
|
}
|
|
666
632
|
|
|
667
633
|
if entity_id:
|
|
@@ -699,7 +665,7 @@ class CorporateRegistryFetcher:
|
|
|
699
665
|
self,
|
|
700
666
|
uk_companies_house_key: Optional[str] = None,
|
|
701
667
|
opencorporates_key: Optional[str] = None,
|
|
702
|
-
xbrl_us_key: Optional[str] = None
|
|
668
|
+
xbrl_us_key: Optional[str] = None,
|
|
703
669
|
):
|
|
704
670
|
"""
|
|
705
671
|
Initialize fetcher with optional API keys
|
|
@@ -767,16 +733,10 @@ class CorporateRegistryFetcher:
|
|
|
767
733
|
f"{len(all_officers)} officers, {len(all_psc)} PSC records"
|
|
768
734
|
)
|
|
769
735
|
|
|
770
|
-
return {
|
|
771
|
-
"companies": companies,
|
|
772
|
-
"officers": all_officers,
|
|
773
|
-
"psc": all_psc
|
|
774
|
-
}
|
|
736
|
+
return {"companies": companies, "officers": all_officers, "psc": all_psc}
|
|
775
737
|
|
|
776
738
|
def fetch_french_disclosures(
|
|
777
|
-
self,
|
|
778
|
-
query: Optional[str] = None,
|
|
779
|
-
days_back: int = 30
|
|
739
|
+
self, query: Optional[str] = None, days_back: int = 30
|
|
780
740
|
) -> List[Dict]:
|
|
781
741
|
"""
|
|
782
742
|
Fetch French financial disclosures
|
|
@@ -794,19 +754,14 @@ class CorporateRegistryFetcher:
|
|
|
794
754
|
to_date = datetime.now().strftime("%Y-%m-%d")
|
|
795
755
|
|
|
796
756
|
publications = self.info_financiere.search_publications(
|
|
797
|
-
query=query,
|
|
798
|
-
from_date=from_date,
|
|
799
|
-
to_date=to_date,
|
|
800
|
-
per_page=100
|
|
757
|
+
query=query, from_date=from_date, to_date=to_date, per_page=100
|
|
801
758
|
)
|
|
802
759
|
|
|
803
760
|
logger.info(f"Fetched {len(publications)} French publications")
|
|
804
761
|
return publications
|
|
805
762
|
|
|
806
763
|
def fetch_xbrl_eu_filings(
|
|
807
|
-
self,
|
|
808
|
-
country: Optional[str] = None,
|
|
809
|
-
days_back: int = 30
|
|
764
|
+
self, country: Optional[str] = None, days_back: int = 30
|
|
810
765
|
) -> List[Dict]:
|
|
811
766
|
"""
|
|
812
767
|
Fetch EU/UK XBRL filings
|
|
@@ -822,11 +777,7 @@ class CorporateRegistryFetcher:
|
|
|
822
777
|
|
|
823
778
|
from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
|
|
824
779
|
|
|
825
|
-
filings = self.xbrl_filings.get_filings(
|
|
826
|
-
country=country,
|
|
827
|
-
from_date=from_date,
|
|
828
|
-
page_size=100
|
|
829
|
-
)
|
|
780
|
+
filings = self.xbrl_filings.get_filings(country=country, from_date=from_date, page_size=100)
|
|
830
781
|
|
|
831
782
|
logger.info(f"Fetched {len(filings)} XBRL filings")
|
|
832
783
|
return filings
|