mailaccess 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/Dockerfile +30 -0
- backend/__init__.py +0 -0
- backend/api/__init__.py +0 -0
- backend/api/middleware/auth.py +49 -0
- backend/api/queue_registry.py +13 -0
- backend/api/router.py +11 -0
- backend/api/routes/__init__.py +0 -0
- backend/api/routes/health.py +28 -0
- backend/api/routes/investigations.py +167 -0
- backend/api/routes/maltego.py +83 -0
- backend/api/routes/modules.py +26 -0
- backend/api/websocket.py +111 -0
- backend/config.py +101 -0
- backend/core/__init__.py +24 -0
- backend/core/aggregator.py +37 -0
- backend/core/engine.py +243 -0
- backend/core/http_client.py +53 -0
- backend/core/permutator.py +96 -0
- backend/core/proxy.py +55 -0
- backend/core/rate_limiter.py +61 -0
- backend/core/result_aggregator.py +101 -0
- backend/core/scheduler.py +29 -0
- backend/core/service.py +199 -0
- backend/db/__init__.py +12 -0
- backend/db/database.py +23 -0
- backend/db/models.py +97 -0
- backend/exporters/__init__.py +30 -0
- backend/exporters/base.py +14 -0
- backend/exporters/csv_exporter.py +51 -0
- backend/exporters/json_exporter.py +26 -0
- backend/exporters/maltego_exporter.py +91 -0
- backend/exporters/markdown_exporter.py +140 -0
- backend/exporters/pdf_exporter.py +527 -0
- backend/exporters/stix_exporter.py +201 -0
- backend/integrations/integration_webhook.py +39 -0
- backend/integrations/maltego_transform.py +260 -0
- backend/integrations/webhooks.py +208 -0
- backend/main.py +96 -0
- backend/modules/__init__.py +57 -0
- backend/modules/account_discovery.py +110 -0
- backend/modules/base.py +41 -0
- backend/modules/dns_lookup.py +16 -0
- backend/modules/domain_intel.py +284 -0
- backend/modules/emailrep.py +105 -0
- backend/modules/ghunt_module.py +240 -0
- backend/modules/google_dork.py +111 -0
- backend/modules/google_search.py +12 -0
- backend/modules/gravatar.py +88 -0
- backend/modules/haveibeenpwned.py +1 -0
- backend/modules/hibp.py +154 -0
- backend/modules/hudson_rock.py +175 -0
- backend/modules/hunter_io.py +17 -0
- backend/modules/permutation_discovery.py +194 -0
- backend/modules/shodan.py +18 -0
- backend/modules/social.py +448 -0
- backend/modules/social_links.py +16 -0
- backend/modules/whatsmyname.py +151 -0
- backend/modules/whois_lookup.py +15 -0
- cli/__init__.py +0 -0
- cli/main.py +276 -0
- mailaccess-0.1.0.dist-info/METADATA +160 -0
- mailaccess-0.1.0.dist-info/RECORD +64 -0
- mailaccess-0.1.0.dist-info/WHEEL +4 -0
- mailaccess-0.1.0.dist-info/entry_points.txt +2 -0
backend/Dockerfile
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
FROM python:3.12-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
# Prevent Python from writing pyc files to disc
|
|
6
|
+
ENV PYTHONDONTWRITEBYTECODE=1
|
|
7
|
+
# Prevent Python from buffering stdout and stderr
|
|
8
|
+
ENV PYTHONUNBUFFERED=1
|
|
9
|
+
|
|
10
|
+
# Copy required files for installation
|
|
11
|
+
COPY pyproject.toml README.md ./
|
|
12
|
+
COPY backend/ ./backend/
|
|
13
|
+
COPY cli/ ./cli/
|
|
14
|
+
|
|
15
|
+
# WeasyPrint runtime libraries
|
|
16
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
17
|
+
libgobject-2.0-0 libcairo2 libpango-1.0-0 libpangocairo-1.0-0 \
|
|
18
|
+
libgdk-pixbuf-xlib-2.0-0 libffi-dev shared-mime-info \
|
|
19
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
20
|
+
|
|
21
|
+
# Install dependencies (using --no-cache-dir to reduce image size)
|
|
22
|
+
RUN pip install --no-cache-dir .
|
|
23
|
+
|
|
24
|
+
# Create data directory for SQLite persistence
|
|
25
|
+
RUN mkdir -p /app/data
|
|
26
|
+
|
|
27
|
+
EXPOSE 8000
|
|
28
|
+
|
|
29
|
+
# Default command (used in prod, overridden in dev)
|
|
30
|
+
CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
backend/__init__.py
ADDED
|
File without changes
|
backend/api/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import contextvars
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import Awaitable, Callable
|
|
4
|
+
|
|
5
|
+
from fastapi import Request
|
|
6
|
+
from fastapi.responses import JSONResponse
|
|
7
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
8
|
+
|
|
9
|
+
from backend.config import settings
|
|
10
|
+
|
|
11
|
+
request_id_contextvar = contextvars.ContextVar("request_id", default="-")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RequestIDMiddleware(BaseHTTPMiddleware):
|
|
15
|
+
async def dispatch(
|
|
16
|
+
self, request: Request, call_next: Callable[[Request], Awaitable]
|
|
17
|
+
):
|
|
18
|
+
req_id = str(uuid.uuid4())
|
|
19
|
+
request_id_contextvar.set(req_id)
|
|
20
|
+
|
|
21
|
+
response = await call_next(request)
|
|
22
|
+
response.headers["X-Request-ID"] = req_id
|
|
23
|
+
return response
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class APIKeyMiddleware(BaseHTTPMiddleware):
|
|
27
|
+
async def dispatch(
|
|
28
|
+
self, request: Request, call_next: Callable[[Request], Awaitable]
|
|
29
|
+
):
|
|
30
|
+
path = request.url.path
|
|
31
|
+
|
|
32
|
+
# Bypass authentication for /health, websocket, and Maltego transform routes
|
|
33
|
+
if path.startswith("/health") or path.startswith("/ws/") or path.startswith("/maltego/"):
|
|
34
|
+
return await call_next(request)
|
|
35
|
+
|
|
36
|
+
# If API key is not configured, bypass authentication entirely
|
|
37
|
+
if not settings.mailaccess_api_key:
|
|
38
|
+
return await call_next(request)
|
|
39
|
+
|
|
40
|
+
# For /api/ routes, enforce the API key header
|
|
41
|
+
if path.startswith("/api/"):
|
|
42
|
+
api_key = request.headers.get("X-API-Key")
|
|
43
|
+
if not api_key or api_key != settings.mailaccess_api_key:
|
|
44
|
+
return JSONResponse(
|
|
45
|
+
status_code=401,
|
|
46
|
+
content={"error": "unauthorized"}
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return await call_next(request)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
|
|
5
|
+
_queues: dict[str, asyncio.Queue] = {}
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def put(investigation_id: str, queue: asyncio.Queue) -> None:
|
|
9
|
+
_queues[investigation_id] = queue
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def pop(investigation_id: str) -> asyncio.Queue | None:
|
|
13
|
+
return _queues.pop(investigation_id, None)
|
backend/api/router.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from fastapi import APIRouter
|
|
2
|
+
|
|
3
|
+
from .routes.investigations import router as investigations_router
|
|
4
|
+
from .routes.modules import router as modules_router
|
|
5
|
+
from .websocket import router as ws_router
|
|
6
|
+
|
|
7
|
+
api_router = APIRouter()
|
|
8
|
+
api_router.include_router(investigations_router, tags=["investigations"])
|
|
9
|
+
api_router.include_router(modules_router, prefix="/modules", tags=["modules"])
|
|
10
|
+
|
|
11
|
+
__all__ = ["api_router", "ws_router"]
|
|
File without changes
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from fastapi import APIRouter, Depends
|
|
2
|
+
from sqlalchemy import text
|
|
3
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
+
|
|
5
|
+
from backend.db.database import get_db
|
|
6
|
+
from backend.modules import get_all_modules
|
|
7
|
+
|
|
8
|
+
router = APIRouter()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@router.get("/health")
|
|
12
|
+
async def health_check(session: AsyncSession = Depends(get_db)):
|
|
13
|
+
db_status = "error"
|
|
14
|
+
try:
|
|
15
|
+
# Simple query to check if the DB is reachable
|
|
16
|
+
await session.execute(text("SELECT 1"))
|
|
17
|
+
db_status = "connected"
|
|
18
|
+
except Exception:
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
modules_loaded = [mod.name for mod in get_all_modules()]
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
"status": "ok",
|
|
25
|
+
"version": "0.1.0",
|
|
26
|
+
"modules_loaded": modules_loaded,
|
|
27
|
+
"db": db_status
|
|
28
|
+
}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
6
|
+
from fastapi.responses import Response
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
9
|
+
|
|
10
|
+
from ...core.service import InvestigationService, enrich_report
|
|
11
|
+
from ...db.database import get_db
|
|
12
|
+
from ...exporters import EXPORTERS
|
|
13
|
+
from .. import queue_registry
|
|
14
|
+
|
|
15
|
+
router = APIRouter()
|
|
16
|
+
|
|
17
|
+
async def _cleanup_queue(investigation_id: str, delay: float = 300.0) -> None:
|
|
18
|
+
await asyncio.sleep(delay)
|
|
19
|
+
queue_registry.pop(investigation_id)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Schemas
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class InvestigateRequest(BaseModel):
|
|
28
|
+
email: str
|
|
29
|
+
modules: list[str] | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class InvestigateResponse(BaseModel):
|
|
33
|
+
id: str
|
|
34
|
+
status: str
|
|
35
|
+
created_at: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class InvestigationSummary(BaseModel):
|
|
39
|
+
id: str
|
|
40
|
+
email: str
|
|
41
|
+
status: str
|
|
42
|
+
exposure_score: int | None
|
|
43
|
+
created_at: str
|
|
44
|
+
completed_at: str | None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class PaginatedInvestigations(BaseModel):
|
|
48
|
+
total: int
|
|
49
|
+
page: int
|
|
50
|
+
page_size: int
|
|
51
|
+
pages: int
|
|
52
|
+
items: list[InvestigationSummary]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Routes
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@router.post("/investigate", response_model=InvestigateResponse, status_code=202)
|
|
61
|
+
async def start_investigation(
|
|
62
|
+
body: InvestigateRequest,
|
|
63
|
+
session: AsyncSession = Depends(get_db),
|
|
64
|
+
) -> InvestigateResponse:
|
|
65
|
+
"""Create a new investigation and kick off the engine in the background."""
|
|
66
|
+
service = InvestigationService(session)
|
|
67
|
+
investigation_id, created_at, queue = await service.create_investigation(
|
|
68
|
+
body.email, body.modules
|
|
69
|
+
)
|
|
70
|
+
queue_registry.put(investigation_id, queue)
|
|
71
|
+
# Release the queue from memory after 5 minutes if no WS consumer arrives.
|
|
72
|
+
asyncio.create_task(_cleanup_queue(investigation_id, delay=300.0))
|
|
73
|
+
return InvestigateResponse(
|
|
74
|
+
id=investigation_id,
|
|
75
|
+
status="pending",
|
|
76
|
+
created_at=created_at.isoformat(),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@router.get("/report/{investigation_id}")
|
|
81
|
+
async def get_report(
|
|
82
|
+
investigation_id: str,
|
|
83
|
+
session: AsyncSession = Depends(get_db),
|
|
84
|
+
) -> dict:
|
|
85
|
+
"""Return the full enriched investigation report."""
|
|
86
|
+
service = InvestigationService(session)
|
|
87
|
+
data = await service.get_investigation(investigation_id)
|
|
88
|
+
|
|
89
|
+
if data is None:
|
|
90
|
+
raise HTTPException(status_code=404, detail="Investigation not found")
|
|
91
|
+
|
|
92
|
+
return enrich_report(data)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@router.get("/report/{investigation_id}/export")
|
|
96
|
+
async def export_report(
|
|
97
|
+
investigation_id: str,
|
|
98
|
+
format: str = Query("json", pattern="^(json|csv|markdown|pdf|stix|maltego)$"),
|
|
99
|
+
session: AsyncSession = Depends(get_db),
|
|
100
|
+
) -> Response:
|
|
101
|
+
"""Export the investigation report in the requested format."""
|
|
102
|
+
|
|
103
|
+
service = InvestigationService(session)
|
|
104
|
+
data = await service.get_investigation(investigation_id)
|
|
105
|
+
if data is None:
|
|
106
|
+
raise HTTPException(status_code=404, detail="Investigation not found")
|
|
107
|
+
|
|
108
|
+
data = enrich_report(data)
|
|
109
|
+
email = data.get("email", "unknown")
|
|
110
|
+
|
|
111
|
+
exporter = EXPORTERS[format]()
|
|
112
|
+
if format == "pdf":
|
|
113
|
+
from ...exporters.pdf_exporter import PdfExporter
|
|
114
|
+
assert isinstance(exporter, PdfExporter)
|
|
115
|
+
content = await exporter.generate(investigation_id, data)
|
|
116
|
+
else:
|
|
117
|
+
content = exporter.export(investigation_id, data)
|
|
118
|
+
return Response(
|
|
119
|
+
content=content,
|
|
120
|
+
media_type=exporter.content_type,
|
|
121
|
+
headers={
|
|
122
|
+
"Content-Disposition": (
|
|
123
|
+
f'attachment; filename="mailaccess_{email}_{investigation_id}'
|
|
124
|
+
f'.{"stix.json" if format == "stix" else "maltego.csv" if format == "maltego" else format}"'
|
|
125
|
+
)
|
|
126
|
+
},
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@router.get("/investigations", response_model=PaginatedInvestigations)
|
|
131
|
+
async def list_investigations(
|
|
132
|
+
page: int = Query(1, ge=1),
|
|
133
|
+
page_size: int = Query(20, ge=1, le=100),
|
|
134
|
+
session: AsyncSession = Depends(get_db),
|
|
135
|
+
) -> PaginatedInvestigations:
|
|
136
|
+
"""Paginated list of past investigations, newest first."""
|
|
137
|
+
service = InvestigationService(session)
|
|
138
|
+
result = await service.list_investigations(page=page, page_size=page_size)
|
|
139
|
+
return PaginatedInvestigations(
|
|
140
|
+
total=result["total"],
|
|
141
|
+
page=result["page"],
|
|
142
|
+
page_size=result["page_size"],
|
|
143
|
+
pages=result["pages"],
|
|
144
|
+
items=[
|
|
145
|
+
InvestigationSummary(
|
|
146
|
+
id=item["id"],
|
|
147
|
+
email=item["email"],
|
|
148
|
+
status=item["status"],
|
|
149
|
+
exposure_score=item.get("exposure_score"),
|
|
150
|
+
created_at=item["created_at"],
|
|
151
|
+
completed_at=item.get("completed_at"),
|
|
152
|
+
)
|
|
153
|
+
for item in result["items"]
|
|
154
|
+
],
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@router.delete("/investigation/{investigation_id}", status_code=204)
|
|
159
|
+
async def delete_investigation(
|
|
160
|
+
investigation_id: str,
|
|
161
|
+
session: AsyncSession = Depends(get_db),
|
|
162
|
+
) -> None:
|
|
163
|
+
"""Hard-delete an investigation and all associated findings."""
|
|
164
|
+
service = InvestigationService(session)
|
|
165
|
+
deleted = await service.delete_investigation(investigation_id)
|
|
166
|
+
if not deleted:
|
|
167
|
+
raise HTTPException(status_code=404, detail="Investigation not found")
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Maltego local transform server endpoint.
|
|
2
|
+
|
|
3
|
+
Implements the TRX protocol (XML over HTTP POST) so Maltego Desktop can run
|
|
4
|
+
email investigations directly without any API key.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from fastapi import APIRouter, Depends, Request
|
|
12
|
+
from fastapi.responses import Response
|
|
13
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
14
|
+
|
|
15
|
+
from ...core.service import InvestigationService, enrich_report
|
|
16
|
+
from ...db.database import get_db
|
|
17
|
+
from ...integrations.maltego_transform import build_error_response, build_response, parse_request
|
|
18
|
+
from .. import queue_registry
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger("mailaccess.maltego")
|
|
21
|
+
|
|
22
|
+
router = APIRouter()
|
|
23
|
+
|
|
24
|
+
_TRANSFORM_TIMEOUT = 55.0
|
|
25
|
+
_XML = "application/xml"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@router.post("/email_investigate")
|
|
29
|
+
async def email_investigate(
|
|
30
|
+
request: Request,
|
|
31
|
+
session: AsyncSession = Depends(get_db),
|
|
32
|
+
) -> Response:
|
|
33
|
+
"""TRX transform: run a full email investigation and return Maltego entities."""
|
|
34
|
+
body = await request.body()
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
email = parse_request(body)
|
|
38
|
+
except Exception as exc:
|
|
39
|
+
return Response(
|
|
40
|
+
content=build_error_response(f"Invalid TRX request: {exc}"),
|
|
41
|
+
media_type=_XML,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
service = InvestigationService(session)
|
|
45
|
+
try:
|
|
46
|
+
investigation_id, _created_at, queue = await service.create_investigation(email)
|
|
47
|
+
queue_registry.put(investigation_id, queue)
|
|
48
|
+
except Exception as exc:
|
|
49
|
+
logger.exception("Failed to start investigation for %s", email)
|
|
50
|
+
return Response(
|
|
51
|
+
content=build_error_response(f"Failed to start investigation: {exc}"),
|
|
52
|
+
media_type=_XML,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Drain the event queue until the engine pushes the None sentinel or we hit 55s.
|
|
56
|
+
partial = False
|
|
57
|
+
loop = asyncio.get_running_loop()
|
|
58
|
+
deadline = loop.time() + _TRANSFORM_TIMEOUT
|
|
59
|
+
|
|
60
|
+
while True:
|
|
61
|
+
remaining = deadline - loop.time()
|
|
62
|
+
if remaining <= 0:
|
|
63
|
+
partial = True
|
|
64
|
+
break
|
|
65
|
+
try:
|
|
66
|
+
event = await asyncio.wait_for(queue.get(), timeout=remaining)
|
|
67
|
+
if event is None: # completion sentinel from the engine
|
|
68
|
+
break
|
|
69
|
+
except asyncio.TimeoutError:
|
|
70
|
+
partial = True
|
|
71
|
+
break
|
|
72
|
+
|
|
73
|
+
data = await service.get_investigation(investigation_id)
|
|
74
|
+
if data is None:
|
|
75
|
+
return Response(
|
|
76
|
+
content=build_error_response("Investigation record not found"),
|
|
77
|
+
media_type=_XML,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return Response(
|
|
81
|
+
content=build_response(enrich_report(data), partial=partial),
|
|
82
|
+
media_type=_XML,
|
|
83
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from ...modules import get_all_modules
|
|
7
|
+
|
|
8
|
+
router = APIRouter()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ModuleInfo(BaseModel):
|
|
12
|
+
name: str
|
|
13
|
+
description: str
|
|
14
|
+
requires_key: bool
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@router.get("/", response_model=list[ModuleInfo])
|
|
18
|
+
async def list_modules() -> list[ModuleInfo]:
|
|
19
|
+
return [
|
|
20
|
+
ModuleInfo(
|
|
21
|
+
name=m.name,
|
|
22
|
+
description=m.description,
|
|
23
|
+
requires_key=m.requires_key,
|
|
24
|
+
)
|
|
25
|
+
for m in get_all_modules()
|
|
26
|
+
]
|
backend/api/websocket.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
|
7
|
+
|
|
8
|
+
from ..core.engine import QueueEvent
|
|
9
|
+
from ..db.database import AsyncSessionLocal
|
|
10
|
+
from ..db.models import Investigation
|
|
11
|
+
from . import queue_registry
|
|
12
|
+
|
|
13
|
+
router = APIRouter()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _risk_level(score: int | None) -> str:
|
|
17
|
+
if score is None:
|
|
18
|
+
return "unknown"
|
|
19
|
+
if score <= 20:
|
|
20
|
+
return "low"
|
|
21
|
+
if score <= 50:
|
|
22
|
+
return "medium"
|
|
23
|
+
if score <= 80:
|
|
24
|
+
return "high"
|
|
25
|
+
return "critical"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@router.websocket("/ws/investigate/{investigation_id}")
|
|
29
|
+
async def ws_investigate(investigation_id: str, websocket: WebSocket) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Stream investigation events in real time.
|
|
32
|
+
|
|
33
|
+
Connect immediately after POST /api/investigate. The server pushes one
|
|
34
|
+
event per module as it starts and completes, then a final
|
|
35
|
+
"investigation_complete" frame when all modules are done and the DB is
|
|
36
|
+
persisted.
|
|
37
|
+
|
|
38
|
+
Event frames::
|
|
39
|
+
|
|
40
|
+
{ "type": "module_start", "module": "hibp", "timestamp": "..." }
|
|
41
|
+
{ "type": "module_result", "module": "hibp", "findings": [...], "status": "success" }
|
|
42
|
+
{ "type": "module_error", "module": "social", "error": "...", "status": "failed" }
|
|
43
|
+
{ "type": "investigation_complete", "exposure_score": 72, "risk_level": "high" }
|
|
44
|
+
"""
|
|
45
|
+
await websocket.accept()
|
|
46
|
+
|
|
47
|
+
queue = queue_registry.pop(investigation_id)
|
|
48
|
+
if queue is None:
|
|
49
|
+
await websocket.send_json(
|
|
50
|
+
{"type": "error", "error": "investigation not found or already streaming"}
|
|
51
|
+
)
|
|
52
|
+
await websocket.close(code=1008)
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
while True:
|
|
57
|
+
item: QueueEvent | None = await queue.get()
|
|
58
|
+
|
|
59
|
+
if item is None:
|
|
60
|
+
# Sentinel: engine finished and persisted — fetch final score from DB.
|
|
61
|
+
async with AsyncSessionLocal() as db:
|
|
62
|
+
inv = await db.get(Investigation, investigation_id)
|
|
63
|
+
score = inv.exposure_score if inv else None
|
|
64
|
+
await websocket.send_json(
|
|
65
|
+
{
|
|
66
|
+
"type": "investigation_complete",
|
|
67
|
+
"exposure_score": score,
|
|
68
|
+
"risk_level": _risk_level(score),
|
|
69
|
+
}
|
|
70
|
+
)
|
|
71
|
+
break
|
|
72
|
+
|
|
73
|
+
if item.type == "module_start":
|
|
74
|
+
await websocket.send_json(
|
|
75
|
+
{
|
|
76
|
+
"type": "module_start",
|
|
77
|
+
"module": item.module_name,
|
|
78
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
79
|
+
}
|
|
80
|
+
)
|
|
81
|
+
elif item.type == "module_result":
|
|
82
|
+
assert item.result is not None
|
|
83
|
+
await websocket.send_json(
|
|
84
|
+
{
|
|
85
|
+
"type": "module_result",
|
|
86
|
+
"module": item.module_name,
|
|
87
|
+
"findings": item.result.findings,
|
|
88
|
+
"status": item.result.status.value,
|
|
89
|
+
}
|
|
90
|
+
)
|
|
91
|
+
elif item.type == "module_error":
|
|
92
|
+
assert item.result is not None
|
|
93
|
+
await websocket.send_json(
|
|
94
|
+
{
|
|
95
|
+
"type": "module_error",
|
|
96
|
+
"module": item.module_name,
|
|
97
|
+
"error": ", ".join(item.result.errors or ["unknown error"]),
|
|
98
|
+
"status": "failed",
|
|
99
|
+
}
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
except WebSocketDisconnect:
|
|
103
|
+
# Drain the queue so the engine's background task isn't blocked.
|
|
104
|
+
asyncio.create_task(_drain_silently(queue))
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
async def _drain_silently(queue: asyncio.Queue) -> None:
|
|
108
|
+
while True:
|
|
109
|
+
item = await queue.get()
|
|
110
|
+
if item is None:
|
|
111
|
+
break
|
backend/config.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from pydantic import field_validator
|
|
6
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Settings(BaseSettings):
|
|
10
|
+
model_config = SettingsConfigDict(
|
|
11
|
+
env_file=".env",
|
|
12
|
+
env_file_encoding="utf-8",
|
|
13
|
+
extra="ignore",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# Database
|
|
17
|
+
database_url: str = "sqlite+aiosqlite:///./data/mailaccess.db"
|
|
18
|
+
|
|
19
|
+
# Application
|
|
20
|
+
debug: bool = False
|
|
21
|
+
log_level: str = "INFO"
|
|
22
|
+
cors_origins: list[str] = ["http://localhost:5173", "http://localhost:3000"]
|
|
23
|
+
|
|
24
|
+
# Worker
|
|
25
|
+
max_concurrent_modules: int = 10
|
|
26
|
+
module_timeout_seconds: int = 30
|
|
27
|
+
# Per-module timeout overrides: MODULE_TIMEOUT_OVERRIDES={"whatsmyname": 120}
|
|
28
|
+
module_timeout_overrides: dict[str, int] = {}
|
|
29
|
+
|
|
30
|
+
# Account discovery (opt-in — probes 120+ platforms via Holehe, can be noisy)
|
|
31
|
+
enable_account_discovery: bool = False
|
|
32
|
+
|
|
33
|
+
# WhatsMyName (opt-in — username enumeration across 700+ platforms, takes 60–90s)
|
|
34
|
+
enable_whatsmyname: bool = False
|
|
35
|
+
|
|
36
|
+
# Permutation discovery (opt-in — generates email variations from recovered names,
|
|
37
|
+
# then probes each with HIBP + Hudson Rock; adds 30–60s and up to 120 API calls)
|
|
38
|
+
enable_permutation_discovery: bool = False
|
|
39
|
+
|
|
40
|
+
# GHunt (opt-in — requires ghunt>=2.3 installed and a valid creds file from `ghunt login`)
|
|
41
|
+
# Cookies expire periodically and require manual refresh via `ghunt login`.
|
|
42
|
+
enable_ghunt: bool = False
|
|
43
|
+
ghunt_creds_path: str | None = None
|
|
44
|
+
|
|
45
|
+
# Webhooks
|
|
46
|
+
slack_webhook_url: str | None = None
|
|
47
|
+
discord_webhook_url: str | None = None
|
|
48
|
+
integration_webhook_url: str | None = None
|
|
49
|
+
integration_webhook_secret: str | None = None
|
|
50
|
+
|
|
51
|
+
# API keys (all optional — modules skip themselves when their key is absent)
|
|
52
|
+
mailaccess_api_key: str | None = None
|
|
53
|
+
haveibeenpwned_api_key: str | None = None
|
|
54
|
+
hibp_api_key: str | None = None
|
|
55
|
+
hunter_io_api_key: str | None = None
|
|
56
|
+
emailrep_api_key: str | None = None
|
|
57
|
+
shodan_api_key: str | None = None
|
|
58
|
+
serpapi_key: str | None = None
|
|
59
|
+
|
|
60
|
+
# Proxy
|
|
61
|
+
proxy_url: str | None = None
|
|
62
|
+
proxy_enabled: bool = False
|
|
63
|
+
|
|
64
|
+
# Rate limiting
|
|
65
|
+
rate_limit_enabled: bool = True
|
|
66
|
+
request_delay_ms: int = 1000
|
|
67
|
+
# Per-domain overrides (ms): RATE_LIMIT_OVERRIDES={"api.github.com": 500}
|
|
68
|
+
rate_limit_overrides: dict[str, int] = {}
|
|
69
|
+
# Legacy per-domain delays (seconds): RATE_LIMIT_DELAYS={"haveibeenpwned.com": 1.5}
|
|
70
|
+
rate_limit_delays: dict[str, float] = {}
|
|
71
|
+
|
|
72
|
+
@field_validator("rate_limit_overrides", mode="before")
|
|
73
|
+
@classmethod
|
|
74
|
+
def _parse_overrides(cls, v: str | dict) -> dict[str, int]:
|
|
75
|
+
if isinstance(v, str):
|
|
76
|
+
return json.loads(v) if v else {}
|
|
77
|
+
return v
|
|
78
|
+
|
|
79
|
+
@field_validator("module_timeout_overrides", mode="before")
|
|
80
|
+
@classmethod
|
|
81
|
+
def _parse_timeout_overrides(cls, v: str | dict) -> dict[str, int]:
|
|
82
|
+
if isinstance(v, str):
|
|
83
|
+
return json.loads(v) if v else {}
|
|
84
|
+
return v
|
|
85
|
+
|
|
86
|
+
@field_validator("rate_limit_delays", mode="before")
|
|
87
|
+
@classmethod
|
|
88
|
+
def _parse_delays(cls, v: str | dict) -> dict[str, float]:
|
|
89
|
+
if isinstance(v, str):
|
|
90
|
+
return json.loads(v) if v else {}
|
|
91
|
+
return v
|
|
92
|
+
|
|
93
|
+
@field_validator("cors_origins", mode="before")
|
|
94
|
+
@classmethod
|
|
95
|
+
def _split_cors(cls, v: str | list[str]) -> list[str]:
|
|
96
|
+
if isinstance(v, str):
|
|
97
|
+
return [origin.strip() for origin in v.split(",")]
|
|
98
|
+
return v
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
settings = Settings()
|
backend/core/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from .aggregator import ResultAggregator
|
|
2
|
+
from .engine import InvestigationEngine
|
|
3
|
+
from .http_client import build_client
|
|
4
|
+
from .proxy import ProxyConnectionError, ProxyConfig, proxy_config
|
|
5
|
+
from .rate_limiter import DomainRateLimiter, RateLimiter, rate_limiter
|
|
6
|
+
from .result_aggregator import ProfileAggregator, UnifiedProfile
|
|
7
|
+
from .scheduler import Scheduler
|
|
8
|
+
from .service import InvestigationService
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"DomainRateLimiter",
|
|
12
|
+
"InvestigationEngine",
|
|
13
|
+
"InvestigationService",
|
|
14
|
+
"ProfileAggregator",
|
|
15
|
+
"ProxyConfig",
|
|
16
|
+
"ProxyConnectionError",
|
|
17
|
+
"RateLimiter",
|
|
18
|
+
"ResultAggregator",
|
|
19
|
+
"Scheduler",
|
|
20
|
+
"UnifiedProfile",
|
|
21
|
+
"build_client",
|
|
22
|
+
"proxy_config",
|
|
23
|
+
"rate_limiter",
|
|
24
|
+
]
|