mailaccess 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. backend/Dockerfile +30 -0
  2. backend/__init__.py +0 -0
  3. backend/api/__init__.py +0 -0
  4. backend/api/middleware/auth.py +49 -0
  5. backend/api/queue_registry.py +13 -0
  6. backend/api/router.py +11 -0
  7. backend/api/routes/__init__.py +0 -0
  8. backend/api/routes/health.py +28 -0
  9. backend/api/routes/investigations.py +167 -0
  10. backend/api/routes/maltego.py +83 -0
  11. backend/api/routes/modules.py +26 -0
  12. backend/api/websocket.py +111 -0
  13. backend/config.py +101 -0
  14. backend/core/__init__.py +24 -0
  15. backend/core/aggregator.py +37 -0
  16. backend/core/engine.py +243 -0
  17. backend/core/http_client.py +53 -0
  18. backend/core/permutator.py +96 -0
  19. backend/core/proxy.py +55 -0
  20. backend/core/rate_limiter.py +61 -0
  21. backend/core/result_aggregator.py +101 -0
  22. backend/core/scheduler.py +29 -0
  23. backend/core/service.py +199 -0
  24. backend/db/__init__.py +12 -0
  25. backend/db/database.py +23 -0
  26. backend/db/models.py +97 -0
  27. backend/exporters/__init__.py +30 -0
  28. backend/exporters/base.py +14 -0
  29. backend/exporters/csv_exporter.py +51 -0
  30. backend/exporters/json_exporter.py +26 -0
  31. backend/exporters/maltego_exporter.py +91 -0
  32. backend/exporters/markdown_exporter.py +140 -0
  33. backend/exporters/pdf_exporter.py +527 -0
  34. backend/exporters/stix_exporter.py +201 -0
  35. backend/integrations/integration_webhook.py +39 -0
  36. backend/integrations/maltego_transform.py +260 -0
  37. backend/integrations/webhooks.py +208 -0
  38. backend/main.py +96 -0
  39. backend/modules/__init__.py +57 -0
  40. backend/modules/account_discovery.py +110 -0
  41. backend/modules/base.py +41 -0
  42. backend/modules/dns_lookup.py +16 -0
  43. backend/modules/domain_intel.py +284 -0
  44. backend/modules/emailrep.py +105 -0
  45. backend/modules/ghunt_module.py +240 -0
  46. backend/modules/google_dork.py +111 -0
  47. backend/modules/google_search.py +12 -0
  48. backend/modules/gravatar.py +88 -0
  49. backend/modules/haveibeenpwned.py +1 -0
  50. backend/modules/hibp.py +154 -0
  51. backend/modules/hudson_rock.py +175 -0
  52. backend/modules/hunter_io.py +17 -0
  53. backend/modules/permutation_discovery.py +194 -0
  54. backend/modules/shodan.py +18 -0
  55. backend/modules/social.py +448 -0
  56. backend/modules/social_links.py +16 -0
  57. backend/modules/whatsmyname.py +151 -0
  58. backend/modules/whois_lookup.py +15 -0
  59. cli/__init__.py +0 -0
  60. cli/main.py +276 -0
  61. mailaccess-0.1.0.dist-info/METADATA +160 -0
  62. mailaccess-0.1.0.dist-info/RECORD +64 -0
  63. mailaccess-0.1.0.dist-info/WHEEL +4 -0
  64. mailaccess-0.1.0.dist-info/entry_points.txt +2 -0
backend/Dockerfile ADDED
@@ -0,0 +1,30 @@
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Prevent Python from writing pyc files to disc
6
+ ENV PYTHONDONTWRITEBYTECODE=1
7
+ # Prevent Python from buffering stdout and stderr
8
+ ENV PYTHONUNBUFFERED=1
9
+
10
+ # Copy required files for installation
11
+ COPY pyproject.toml README.md ./
12
+ COPY backend/ ./backend/
13
+ COPY cli/ ./cli/
14
+
15
+ # WeasyPrint runtime libraries
16
+ RUN apt-get update && apt-get install -y --no-install-recommends \
17
+ libgobject-2.0-0 libcairo2 libpango-1.0-0 libpangocairo-1.0-0 \
18
+ libgdk-pixbuf-xlib-2.0-0 libffi-dev shared-mime-info \
19
+ && rm -rf /var/lib/apt/lists/*
20
+
21
+ # Install dependencies (using --no-cache-dir to reduce image size)
22
+ RUN pip install --no-cache-dir .
23
+
24
+ # Create data directory for SQLite persistence
25
+ RUN mkdir -p /app/data
26
+
27
+ EXPOSE 8000
28
+
29
+ # Default command (used in prod, overridden in dev)
30
+ CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "8000"]
backend/__init__.py ADDED
File without changes
File without changes
@@ -0,0 +1,49 @@
1
+ import contextvars
2
+ import uuid
3
+ from typing import Awaitable, Callable
4
+
5
+ from fastapi import Request
6
+ from fastapi.responses import JSONResponse
7
+ from starlette.middleware.base import BaseHTTPMiddleware
8
+
9
+ from backend.config import settings
10
+
11
+ request_id_contextvar = contextvars.ContextVar("request_id", default="-")
12
+
13
+
14
+ class RequestIDMiddleware(BaseHTTPMiddleware):
15
+ async def dispatch(
16
+ self, request: Request, call_next: Callable[[Request], Awaitable]
17
+ ):
18
+ req_id = str(uuid.uuid4())
19
+ request_id_contextvar.set(req_id)
20
+
21
+ response = await call_next(request)
22
+ response.headers["X-Request-ID"] = req_id
23
+ return response
24
+
25
+
26
+ class APIKeyMiddleware(BaseHTTPMiddleware):
27
+ async def dispatch(
28
+ self, request: Request, call_next: Callable[[Request], Awaitable]
29
+ ):
30
+ path = request.url.path
31
+
32
+ # Bypass authentication for /health, websocket, and Maltego transform routes
33
+ if path.startswith("/health") or path.startswith("/ws/") or path.startswith("/maltego/"):
34
+ return await call_next(request)
35
+
36
+ # If API key is not configured, bypass authentication entirely
37
+ if not settings.mailaccess_api_key:
38
+ return await call_next(request)
39
+
40
+ # For /api/ routes, enforce the API key header
41
+ if path.startswith("/api/"):
42
+ api_key = request.headers.get("X-API-Key")
43
+ if not api_key or api_key != settings.mailaccess_api_key:
44
+ return JSONResponse(
45
+ status_code=401,
46
+ content={"error": "unauthorized"}
47
+ )
48
+
49
+ return await call_next(request)
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+
5
+ _queues: dict[str, asyncio.Queue] = {}
6
+
7
+
8
+ def put(investigation_id: str, queue: asyncio.Queue) -> None:
9
+ _queues[investigation_id] = queue
10
+
11
+
12
+ def pop(investigation_id: str) -> asyncio.Queue | None:
13
+ return _queues.pop(investigation_id, None)
backend/api/router.py ADDED
@@ -0,0 +1,11 @@
1
+ from fastapi import APIRouter
2
+
3
+ from .routes.investigations import router as investigations_router
4
+ from .routes.modules import router as modules_router
5
+ from .websocket import router as ws_router
6
+
7
+ api_router = APIRouter()
8
+ api_router.include_router(investigations_router, tags=["investigations"])
9
+ api_router.include_router(modules_router, prefix="/modules", tags=["modules"])
10
+
11
+ __all__ = ["api_router", "ws_router"]
File without changes
@@ -0,0 +1,28 @@
1
+ from fastapi import APIRouter, Depends
2
+ from sqlalchemy import text
3
+ from sqlalchemy.ext.asyncio import AsyncSession
4
+
5
+ from backend.db.database import get_db
6
+ from backend.modules import get_all_modules
7
+
8
+ router = APIRouter()
9
+
10
+
11
+ @router.get("/health")
12
+ async def health_check(session: AsyncSession = Depends(get_db)):
13
+ db_status = "error"
14
+ try:
15
+ # Simple query to check if the DB is reachable
16
+ await session.execute(text("SELECT 1"))
17
+ db_status = "connected"
18
+ except Exception:
19
+ pass
20
+
21
+ modules_loaded = [mod.name for mod in get_all_modules()]
22
+
23
+ return {
24
+ "status": "ok",
25
+ "version": "0.1.0",
26
+ "modules_loaded": modules_loaded,
27
+ "db": db_status
28
+ }
@@ -0,0 +1,167 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+
5
+ from fastapi import APIRouter, Depends, HTTPException, Query
6
+ from fastapi.responses import Response
7
+ from pydantic import BaseModel
8
+ from sqlalchemy.ext.asyncio import AsyncSession
9
+
10
+ from ...core.service import InvestigationService, enrich_report
11
+ from ...db.database import get_db
12
+ from ...exporters import EXPORTERS
13
+ from .. import queue_registry
14
+
15
+ router = APIRouter()
16
+
17
+ async def _cleanup_queue(investigation_id: str, delay: float = 300.0) -> None:
18
+ await asyncio.sleep(delay)
19
+ queue_registry.pop(investigation_id)
20
+
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Schemas
24
+ # ---------------------------------------------------------------------------
25
+
26
+
27
+ class InvestigateRequest(BaseModel):
28
+ email: str
29
+ modules: list[str] | None = None
30
+
31
+
32
+ class InvestigateResponse(BaseModel):
33
+ id: str
34
+ status: str
35
+ created_at: str
36
+
37
+
38
+ class InvestigationSummary(BaseModel):
39
+ id: str
40
+ email: str
41
+ status: str
42
+ exposure_score: int | None
43
+ created_at: str
44
+ completed_at: str | None
45
+
46
+
47
+ class PaginatedInvestigations(BaseModel):
48
+ total: int
49
+ page: int
50
+ page_size: int
51
+ pages: int
52
+ items: list[InvestigationSummary]
53
+
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # Routes
57
+ # ---------------------------------------------------------------------------
58
+
59
+
60
+ @router.post("/investigate", response_model=InvestigateResponse, status_code=202)
61
+ async def start_investigation(
62
+ body: InvestigateRequest,
63
+ session: AsyncSession = Depends(get_db),
64
+ ) -> InvestigateResponse:
65
+ """Create a new investigation and kick off the engine in the background."""
66
+ service = InvestigationService(session)
67
+ investigation_id, created_at, queue = await service.create_investigation(
68
+ body.email, body.modules
69
+ )
70
+ queue_registry.put(investigation_id, queue)
71
+ # Release the queue from memory after 5 minutes if no WS consumer arrives.
72
+ asyncio.create_task(_cleanup_queue(investigation_id, delay=300.0))
73
+ return InvestigateResponse(
74
+ id=investigation_id,
75
+ status="pending",
76
+ created_at=created_at.isoformat(),
77
+ )
78
+
79
+
80
+ @router.get("/report/{investigation_id}")
81
+ async def get_report(
82
+ investigation_id: str,
83
+ session: AsyncSession = Depends(get_db),
84
+ ) -> dict:
85
+ """Return the full enriched investigation report."""
86
+ service = InvestigationService(session)
87
+ data = await service.get_investigation(investigation_id)
88
+
89
+ if data is None:
90
+ raise HTTPException(status_code=404, detail="Investigation not found")
91
+
92
+ return enrich_report(data)
93
+
94
+
95
+ @router.get("/report/{investigation_id}/export")
96
+ async def export_report(
97
+ investigation_id: str,
98
+ format: str = Query("json", pattern="^(json|csv|markdown|pdf|stix|maltego)$"),
99
+ session: AsyncSession = Depends(get_db),
100
+ ) -> Response:
101
+ """Export the investigation report in the requested format."""
102
+
103
+ service = InvestigationService(session)
104
+ data = await service.get_investigation(investigation_id)
105
+ if data is None:
106
+ raise HTTPException(status_code=404, detail="Investigation not found")
107
+
108
+ data = enrich_report(data)
109
+ email = data.get("email", "unknown")
110
+
111
+ exporter = EXPORTERS[format]()
112
+ if format == "pdf":
113
+ from ...exporters.pdf_exporter import PdfExporter
114
+ assert isinstance(exporter, PdfExporter)
115
+ content = await exporter.generate(investigation_id, data)
116
+ else:
117
+ content = exporter.export(investigation_id, data)
118
+ return Response(
119
+ content=content,
120
+ media_type=exporter.content_type,
121
+ headers={
122
+ "Content-Disposition": (
123
+ f'attachment; filename="mailaccess_{email}_{investigation_id}'
124
+ f'.{"stix.json" if format == "stix" else "maltego.csv" if format == "maltego" else format}"'
125
+ )
126
+ },
127
+ )
128
+
129
+
130
+ @router.get("/investigations", response_model=PaginatedInvestigations)
131
+ async def list_investigations(
132
+ page: int = Query(1, ge=1),
133
+ page_size: int = Query(20, ge=1, le=100),
134
+ session: AsyncSession = Depends(get_db),
135
+ ) -> PaginatedInvestigations:
136
+ """Paginated list of past investigations, newest first."""
137
+ service = InvestigationService(session)
138
+ result = await service.list_investigations(page=page, page_size=page_size)
139
+ return PaginatedInvestigations(
140
+ total=result["total"],
141
+ page=result["page"],
142
+ page_size=result["page_size"],
143
+ pages=result["pages"],
144
+ items=[
145
+ InvestigationSummary(
146
+ id=item["id"],
147
+ email=item["email"],
148
+ status=item["status"],
149
+ exposure_score=item.get("exposure_score"),
150
+ created_at=item["created_at"],
151
+ completed_at=item.get("completed_at"),
152
+ )
153
+ for item in result["items"]
154
+ ],
155
+ )
156
+
157
+
158
+ @router.delete("/investigation/{investigation_id}", status_code=204)
159
+ async def delete_investigation(
160
+ investigation_id: str,
161
+ session: AsyncSession = Depends(get_db),
162
+ ) -> None:
163
+ """Hard-delete an investigation and all associated findings."""
164
+ service = InvestigationService(session)
165
+ deleted = await service.delete_investigation(investigation_id)
166
+ if not deleted:
167
+ raise HTTPException(status_code=404, detail="Investigation not found")
@@ -0,0 +1,83 @@
1
+ """Maltego local transform server endpoint.
2
+
3
+ Implements the TRX protocol (XML over HTTP POST) so Maltego Desktop can run
4
+ email investigations directly without any API key.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import logging
10
+
11
+ from fastapi import APIRouter, Depends, Request
12
+ from fastapi.responses import Response
13
+ from sqlalchemy.ext.asyncio import AsyncSession
14
+
15
+ from ...core.service import InvestigationService, enrich_report
16
+ from ...db.database import get_db
17
+ from ...integrations.maltego_transform import build_error_response, build_response, parse_request
18
+ from .. import queue_registry
19
+
20
+ logger = logging.getLogger("mailaccess.maltego")
21
+
22
+ router = APIRouter()
23
+
24
+ _TRANSFORM_TIMEOUT = 55.0
25
+ _XML = "application/xml"
26
+
27
+
28
+ @router.post("/email_investigate")
29
+ async def email_investigate(
30
+ request: Request,
31
+ session: AsyncSession = Depends(get_db),
32
+ ) -> Response:
33
+ """TRX transform: run a full email investigation and return Maltego entities."""
34
+ body = await request.body()
35
+
36
+ try:
37
+ email = parse_request(body)
38
+ except Exception as exc:
39
+ return Response(
40
+ content=build_error_response(f"Invalid TRX request: {exc}"),
41
+ media_type=_XML,
42
+ )
43
+
44
+ service = InvestigationService(session)
45
+ try:
46
+ investigation_id, _created_at, queue = await service.create_investigation(email)
47
+ queue_registry.put(investigation_id, queue)
48
+ except Exception as exc:
49
+ logger.exception("Failed to start investigation for %s", email)
50
+ return Response(
51
+ content=build_error_response(f"Failed to start investigation: {exc}"),
52
+ media_type=_XML,
53
+ )
54
+
55
+ # Drain the event queue until the engine pushes the None sentinel or we hit 55s.
56
+ partial = False
57
+ loop = asyncio.get_running_loop()
58
+ deadline = loop.time() + _TRANSFORM_TIMEOUT
59
+
60
+ while True:
61
+ remaining = deadline - loop.time()
62
+ if remaining <= 0:
63
+ partial = True
64
+ break
65
+ try:
66
+ event = await asyncio.wait_for(queue.get(), timeout=remaining)
67
+ if event is None: # completion sentinel from the engine
68
+ break
69
+ except asyncio.TimeoutError:
70
+ partial = True
71
+ break
72
+
73
+ data = await service.get_investigation(investigation_id)
74
+ if data is None:
75
+ return Response(
76
+ content=build_error_response("Investigation record not found"),
77
+ media_type=_XML,
78
+ )
79
+
80
+ return Response(
81
+ content=build_response(enrich_report(data), partial=partial),
82
+ media_type=_XML,
83
+ )
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ from fastapi import APIRouter
4
+ from pydantic import BaseModel
5
+
6
+ from ...modules import get_all_modules
7
+
8
+ router = APIRouter()
9
+
10
+
11
+ class ModuleInfo(BaseModel):
12
+ name: str
13
+ description: str
14
+ requires_key: bool
15
+
16
+
17
+ @router.get("/", response_model=list[ModuleInfo])
18
+ async def list_modules() -> list[ModuleInfo]:
19
+ return [
20
+ ModuleInfo(
21
+ name=m.name,
22
+ description=m.description,
23
+ requires_key=m.requires_key,
24
+ )
25
+ for m in get_all_modules()
26
+ ]
@@ -0,0 +1,111 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from datetime import datetime, timezone
5
+
6
+ from fastapi import APIRouter, WebSocket, WebSocketDisconnect
7
+
8
+ from ..core.engine import QueueEvent
9
+ from ..db.database import AsyncSessionLocal
10
+ from ..db.models import Investigation
11
+ from . import queue_registry
12
+
13
+ router = APIRouter()
14
+
15
+
16
+ def _risk_level(score: int | None) -> str:
17
+ if score is None:
18
+ return "unknown"
19
+ if score <= 20:
20
+ return "low"
21
+ if score <= 50:
22
+ return "medium"
23
+ if score <= 80:
24
+ return "high"
25
+ return "critical"
26
+
27
+
28
+ @router.websocket("/ws/investigate/{investigation_id}")
29
+ async def ws_investigate(investigation_id: str, websocket: WebSocket) -> None:
30
+ """
31
+ Stream investigation events in real time.
32
+
33
+ Connect immediately after POST /api/investigate. The server pushes one
34
+ event per module as it starts and completes, then a final
35
+ "investigation_complete" frame when all modules are done and the DB is
36
+ persisted.
37
+
38
+ Event frames::
39
+
40
+ { "type": "module_start", "module": "hibp", "timestamp": "..." }
41
+ { "type": "module_result", "module": "hibp", "findings": [...], "status": "success" }
42
+ { "type": "module_error", "module": "social", "error": "...", "status": "failed" }
43
+ { "type": "investigation_complete", "exposure_score": 72, "risk_level": "high" }
44
+ """
45
+ await websocket.accept()
46
+
47
+ queue = queue_registry.pop(investigation_id)
48
+ if queue is None:
49
+ await websocket.send_json(
50
+ {"type": "error", "error": "investigation not found or already streaming"}
51
+ )
52
+ await websocket.close(code=1008)
53
+ return
54
+
55
+ try:
56
+ while True:
57
+ item: QueueEvent | None = await queue.get()
58
+
59
+ if item is None:
60
+ # Sentinel: engine finished and persisted — fetch final score from DB.
61
+ async with AsyncSessionLocal() as db:
62
+ inv = await db.get(Investigation, investigation_id)
63
+ score = inv.exposure_score if inv else None
64
+ await websocket.send_json(
65
+ {
66
+ "type": "investigation_complete",
67
+ "exposure_score": score,
68
+ "risk_level": _risk_level(score),
69
+ }
70
+ )
71
+ break
72
+
73
+ if item.type == "module_start":
74
+ await websocket.send_json(
75
+ {
76
+ "type": "module_start",
77
+ "module": item.module_name,
78
+ "timestamp": datetime.now(timezone.utc).isoformat(),
79
+ }
80
+ )
81
+ elif item.type == "module_result":
82
+ assert item.result is not None
83
+ await websocket.send_json(
84
+ {
85
+ "type": "module_result",
86
+ "module": item.module_name,
87
+ "findings": item.result.findings,
88
+ "status": item.result.status.value,
89
+ }
90
+ )
91
+ elif item.type == "module_error":
92
+ assert item.result is not None
93
+ await websocket.send_json(
94
+ {
95
+ "type": "module_error",
96
+ "module": item.module_name,
97
+ "error": ", ".join(item.result.errors or ["unknown error"]),
98
+ "status": "failed",
99
+ }
100
+ )
101
+
102
+ except WebSocketDisconnect:
103
+ # Drain the queue so the engine's background task isn't blocked.
104
+ asyncio.create_task(_drain_silently(queue))
105
+
106
+
107
+ async def _drain_silently(queue: asyncio.Queue) -> None:
108
+ while True:
109
+ item = await queue.get()
110
+ if item is None:
111
+ break
backend/config.py ADDED
@@ -0,0 +1,101 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+
5
+ from pydantic import field_validator
6
+ from pydantic_settings import BaseSettings, SettingsConfigDict
7
+
8
+
9
+ class Settings(BaseSettings):
10
+ model_config = SettingsConfigDict(
11
+ env_file=".env",
12
+ env_file_encoding="utf-8",
13
+ extra="ignore",
14
+ )
15
+
16
+ # Database
17
+ database_url: str = "sqlite+aiosqlite:///./data/mailaccess.db"
18
+
19
+ # Application
20
+ debug: bool = False
21
+ log_level: str = "INFO"
22
+ cors_origins: list[str] = ["http://localhost:5173", "http://localhost:3000"]
23
+
24
+ # Worker
25
+ max_concurrent_modules: int = 10
26
+ module_timeout_seconds: int = 30
27
+ # Per-module timeout overrides: MODULE_TIMEOUT_OVERRIDES={"whatsmyname": 120}
28
+ module_timeout_overrides: dict[str, int] = {}
29
+
30
+ # Account discovery (opt-in — probes 120+ platforms via Holehe, can be noisy)
31
+ enable_account_discovery: bool = False
32
+
33
+ # WhatsMyName (opt-in — username enumeration across 700+ platforms, takes 60–90s)
34
+ enable_whatsmyname: bool = False
35
+
36
+ # Permutation discovery (opt-in — generates email variations from recovered names,
37
+ # then probes each with HIBP + Hudson Rock; adds 30–60s and up to 120 API calls)
38
+ enable_permutation_discovery: bool = False
39
+
40
+ # GHunt (opt-in — requires ghunt>=2.3 installed and a valid creds file from `ghunt login`)
41
+ # Cookies expire periodically and require manual refresh via `ghunt login`.
42
+ enable_ghunt: bool = False
43
+ ghunt_creds_path: str | None = None
44
+
45
+ # Webhooks
46
+ slack_webhook_url: str | None = None
47
+ discord_webhook_url: str | None = None
48
+ integration_webhook_url: str | None = None
49
+ integration_webhook_secret: str | None = None
50
+
51
+ # API keys (all optional — modules skip themselves when their key is absent)
52
+ mailaccess_api_key: str | None = None
53
+ haveibeenpwned_api_key: str | None = None
54
+ hibp_api_key: str | None = None
55
+ hunter_io_api_key: str | None = None
56
+ emailrep_api_key: str | None = None
57
+ shodan_api_key: str | None = None
58
+ serpapi_key: str | None = None
59
+
60
+ # Proxy
61
+ proxy_url: str | None = None
62
+ proxy_enabled: bool = False
63
+
64
+ # Rate limiting
65
+ rate_limit_enabled: bool = True
66
+ request_delay_ms: int = 1000
67
+ # Per-domain overrides (ms): RATE_LIMIT_OVERRIDES={"api.github.com": 500}
68
+ rate_limit_overrides: dict[str, int] = {}
69
+ # Legacy per-domain delays (seconds): RATE_LIMIT_DELAYS={"haveibeenpwned.com": 1.5}
70
+ rate_limit_delays: dict[str, float] = {}
71
+
72
+ @field_validator("rate_limit_overrides", mode="before")
73
+ @classmethod
74
+ def _parse_overrides(cls, v: str | dict) -> dict[str, int]:
75
+ if isinstance(v, str):
76
+ return json.loads(v) if v else {}
77
+ return v
78
+
79
+ @field_validator("module_timeout_overrides", mode="before")
80
+ @classmethod
81
+ def _parse_timeout_overrides(cls, v: str | dict) -> dict[str, int]:
82
+ if isinstance(v, str):
83
+ return json.loads(v) if v else {}
84
+ return v
85
+
86
+ @field_validator("rate_limit_delays", mode="before")
87
+ @classmethod
88
+ def _parse_delays(cls, v: str | dict) -> dict[str, float]:
89
+ if isinstance(v, str):
90
+ return json.loads(v) if v else {}
91
+ return v
92
+
93
+ @field_validator("cors_origins", mode="before")
94
+ @classmethod
95
+ def _split_cors(cls, v: str | list[str]) -> list[str]:
96
+ if isinstance(v, str):
97
+ return [origin.strip() for origin in v.split(",")]
98
+ return v
99
+
100
+
101
+ settings = Settings()
@@ -0,0 +1,24 @@
1
+ from .aggregator import ResultAggregator
2
+ from .engine import InvestigationEngine
3
+ from .http_client import build_client
4
+ from .proxy import ProxyConnectionError, ProxyConfig, proxy_config
5
+ from .rate_limiter import DomainRateLimiter, RateLimiter, rate_limiter
6
+ from .result_aggregator import ProfileAggregator, UnifiedProfile
7
+ from .scheduler import Scheduler
8
+ from .service import InvestigationService
9
+
10
+ __all__ = [
11
+ "DomainRateLimiter",
12
+ "InvestigationEngine",
13
+ "InvestigationService",
14
+ "ProfileAggregator",
15
+ "ProxyConfig",
16
+ "ProxyConnectionError",
17
+ "RateLimiter",
18
+ "ResultAggregator",
19
+ "Scheduler",
20
+ "UnifiedProfile",
21
+ "build_client",
22
+ "proxy_config",
23
+ "rate_limiter",
24
+ ]