dokeo 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. dokeo-3.0.0/PKG-INFO +111 -0
  2. dokeo-3.0.0/README.md +85 -0
  3. dokeo-3.0.0/api/__init__.py +0 -0
  4. dokeo-3.0.0/api/auth.py +55 -0
  5. dokeo-3.0.0/api/config.py +63 -0
  6. dokeo-3.0.0/api/deps.py +41 -0
  7. dokeo-3.0.0/api/main.py +90 -0
  8. dokeo-3.0.0/api/rate_limit.py +115 -0
  9. dokeo-3.0.0/api/tracing.py +74 -0
  10. dokeo-3.0.0/api/user.py +49 -0
  11. dokeo-3.0.0/dokeo.egg-info/PKG-INFO +111 -0
  12. dokeo-3.0.0/dokeo.egg-info/SOURCES.txt +72 -0
  13. dokeo-3.0.0/dokeo.egg-info/dependency_links.txt +1 -0
  14. dokeo-3.0.0/dokeo.egg-info/entry_points.txt +3 -0
  15. dokeo-3.0.0/dokeo.egg-info/requires.txt +20 -0
  16. dokeo-3.0.0/dokeo.egg-info/top_level.txt +3 -0
  17. dokeo-3.0.0/dokeo_cli.py +300 -0
  18. dokeo-3.0.0/gate/__init__.py +0 -0
  19. dokeo-3.0.0/gate/audit_log.py +202 -0
  20. dokeo-3.0.0/gate/auth.py +147 -0
  21. dokeo-3.0.0/gate/batch.py +212 -0
  22. dokeo-3.0.0/gate/catalog.py +120 -0
  23. dokeo-3.0.0/gate/channels.py +306 -0
  24. dokeo-3.0.0/gate/checks/__init__.py +17 -0
  25. dokeo-3.0.0/gate/checks/aeo.py +69 -0
  26. dokeo-3.0.0/gate/checks/cannibalisation.py +42 -0
  27. dokeo-3.0.0/gate/checks/claims.py +53 -0
  28. dokeo-3.0.0/gate/checks/confidentiality.py +32 -0
  29. dokeo-3.0.0/gate/checks/geo.py +74 -0
  30. dokeo-3.0.0/gate/checks/near_duplicate.py +44 -0
  31. dokeo-3.0.0/gate/checks/readability.py +26 -0
  32. dokeo-3.0.0/gate/checks/structure.py +49 -0
  33. dokeo-3.0.0/gate/competitor.py +282 -0
  34. dokeo-3.0.0/gate/content_type_detect.py +89 -0
  35. dokeo-3.0.0/gate/content_types.py +1265 -0
  36. dokeo-3.0.0/gate/corpus.py +51 -0
  37. dokeo-3.0.0/gate/custom_rules.py +201 -0
  38. dokeo-3.0.0/gate/diff_view.py +198 -0
  39. dokeo-3.0.0/gate/email_report.py +148 -0
  40. dokeo-3.0.0/gate/embeddings.py +35 -0
  41. dokeo-3.0.0/gate/fetcher.py +175 -0
  42. dokeo-3.0.0/gate/gate.py +133 -0
  43. dokeo-3.0.0/gate/knowledge_base.py +200 -0
  44. dokeo-3.0.0/gate/listicle/__init__.py +0 -0
  45. dokeo-3.0.0/gate/listicle/assemble.py +111 -0
  46. dokeo-3.0.0/gate/listicle/formats.py +245 -0
  47. dokeo-3.0.0/gate/listicle/generate.py +52 -0
  48. dokeo-3.0.0/gate/listicle/llm.py +389 -0
  49. dokeo-3.0.0/gate/listicle/qa.py +210 -0
  50. dokeo-3.0.0/gate/listicle/research.py +54 -0
  51. dokeo-3.0.0/gate/listicle/run.py +251 -0
  52. dokeo-3.0.0/gate/listicle/schema.py +74 -0
  53. dokeo-3.0.0/gate/listicle_ui.py +431 -0
  54. dokeo-3.0.0/gate/logging_utils.py +90 -0
  55. dokeo-3.0.0/gate/mcp_server.py +505 -0
  56. dokeo-3.0.0/gate/metrics.py +99 -0
  57. dokeo-3.0.0/gate/notifier.py +140 -0
  58. dokeo-3.0.0/gate/report_generator.py +333 -0
  59. dokeo-3.0.0/gate/scheduler.py +151 -0
  60. dokeo-3.0.0/gate/suggest.py +282 -0
  61. dokeo-3.0.0/gate/trends.py +111 -0
  62. dokeo-3.0.0/gate/voice.py +108 -0
  63. dokeo-3.0.0/gate/webhook.py +498 -0
  64. dokeo-3.0.0/pyproject.toml +40 -0
  65. dokeo-3.0.0/setup.cfg +4 -0
  66. dokeo-3.0.0/tests/test_api.py +247 -0
  67. dokeo-3.0.0/tests/test_checks.py +53 -0
  68. dokeo-3.0.0/tests/test_competitor.py +84 -0
  69. dokeo-3.0.0/tests/test_content_types.py +48 -0
  70. dokeo-3.0.0/tests/test_enhancements.py +93 -0
  71. dokeo-3.0.0/tests/test_listicle.py +76 -0
  72. dokeo-3.0.0/tests/test_logging.py +44 -0
  73. dokeo-3.0.0/tests/test_mcp_server.py +151 -0
  74. dokeo-3.0.0/tests/test_webhook_auth.py +64 -0
dokeo-3.0.0/PKG-INFO ADDED
@@ -0,0 +1,111 @@
1
+ Metadata-Version: 2.4
2
+ Name: dokeo
3
+ Version: 3.0.0
4
+ Summary: Pre-publish content quality gate (SEO, AEO, GEO) - API, CLI, MCP
5
+ Requires-Python: >=3.9
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: streamlit>=1.32.0
8
+ Requires-Dist: pyyaml>=6.0
9
+ Requires-Dist: scikit-learn>=1.3.0
10
+ Requires-Dist: nltk>=3.8
11
+ Requires-Dist: textstat>=0.7
12
+ Requires-Dist: numpy>=1.24
13
+ Requires-Dist: trafilatura>=2.0.0
14
+ Requires-Dist: feedparser>=6.0.0
15
+ Requires-Dist: requests>=2.28.0
16
+ Requires-Dist: lxml[html-clean]>=5.0.0
17
+ Requires-Dist: anthropic>=0.40
18
+ Requires-Dist: openai>=1.50
19
+ Requires-Dist: pydantic>=2.0
20
+ Requires-Dist: python-dotenv>=1.0
21
+ Requires-Dist: fastapi>=0.115
22
+ Requires-Dist: uvicorn[standard]>=0.30
23
+ Requires-Dist: httpx>=0.27
24
+ Provides-Extra: mcp
25
+ Requires-Dist: mcp>=1.0; extra == "mcp"
26
+
27
+ # Dokeo — Content Quality Gate
28
+
29
+ Pre-publish content quality gate for SEO, AEO, and GEO. Scores every post,
30
+ email, video script, and AI output against 8 core checks before it ships.
31
+
32
+ ## Surfaces
33
+
34
+ Dokeo ships as four parallel surfaces that all hit the same gate engine:
35
+
36
+ | Surface | Port / Transport | Best for |
37
+ |---|---|---|
38
+ | **FastAPI service** | `http://host:8000/api/v1/*` | Programmatic integrations, webhooks |
39
+ | **Next.js web app** | `https://host/web/*` | End-user UI with Clerk auth |
40
+ | **Streamlit UI** | `https://host/` | Legacy dashboard, single-user password |
41
+ | **MCP server** | stdio | Claude / Cursor / any MCP client |
42
+ | **CLI** | `dokeo` in your shell | Pipelines, n8n, Claude Code |
43
+
44
+ The legacy webhook (port 8502, `/check`, `/check-url`, etc.) is kept for
45
+ backward compatibility with existing n8n / Zapier / Make integrations.
46
+
47
+ ## Quickstart
48
+
49
+ ```bash
50
+ # 1. Install
51
+ pip install -e .
52
+
53
+ # 2. Run the API
54
+ uvicorn api.main:app --reload --port 8000
55
+ # → http://localhost:8000/docs for OpenAPI
56
+
57
+ # 3. Run the web app (separate terminal)
58
+ cd web && bun install && bun run dev
59
+
60
+ # 4. Run the Streamlit UI (separate terminal)
61
+ streamlit run app.py
62
+
63
+ # 5. Try the CLI
64
+ echo "# My post\n\n## Question?\n\n..." | dokeo pipe
65
+
66
+ # 6. Try the MCP server (Claude Desktop / Cursor will spawn it for you)
67
+ dokeo-mcp-server
68
+ ```
69
+
70
+ ## Self-hosted with Docker
71
+
72
+ ```bash
73
+ cp .env.example .env
74
+ # Edit .env - set DOKEO_API_KEY, DOKEO_PASSWORD, Clerk keys
75
+ docker compose up -d
76
+ ```
77
+
78
+ Routes (via Caddy at ports 80/443):
79
+
80
+ - `/` → Streamlit (port 8501)
81
+ - `/web/*`, `/sign-in`, `/sign-up` → Next.js (port 3000)
82
+ - `/api/v1/*` → FastAPI (port 8000)
83
+ - `/check*`, `/check-url*`, `/check-blog*`, `/health*`, etc. → legacy webhook (port 8502)
84
+
85
+ ## Auth model
86
+
87
+ | Surface | Auth |
88
+ |---|---|
89
+ | FastAPI (`/api/v1/*`) | Bearer token: `DOKEO_API_KEY` or `DOKEO_API_KEYS=key:tenant,...` |
90
+ | Legacy webhook | Same Bearer token |
91
+ | Streamlit | Cookie session via `DOKEO_PASSWORD` |
92
+ | Next.js | Clerk (hosted) |
93
+ | MCP | Filesystem only; no network exposure |
94
+ | CLI | None (local) |
95
+
96
+ `DOKEO_API_KEY` is required in any non-dev environment. Streamlit refuses
97
+ to boot in production without `DOKEO_PASSWORD`.
98
+
99
+ ## Documentation
100
+
101
+ - API: OpenAPI at `/docs` or `/redoc` when running
102
+ - MCP: [docs/mcp.md](docs/mcp.md)
103
+ - Engine config: `config.yaml`
104
+ - Content-type definitions: `gate/content_types.py`
105
+
106
+ ## Tests
107
+
108
+ ```bash
109
+ pytest # all tests
110
+ pytest tests/test_mcp_server.py # MCP only
111
+ ```
dokeo-3.0.0/README.md ADDED
@@ -0,0 +1,85 @@
1
+ # Dokeo — Content Quality Gate
2
+
3
+ Pre-publish content quality gate for SEO, AEO, and GEO. Scores every post,
4
+ email, video script, and AI output against 8 core checks before it ships.
5
+
6
+ ## Surfaces
7
+
8
+ Dokeo ships as four parallel surfaces that all hit the same gate engine:
9
+
10
+ | Surface | Port / Transport | Best for |
11
+ |---|---|---|
12
+ | **FastAPI service** | `http://host:8000/api/v1/*` | Programmatic integrations, webhooks |
13
+ | **Next.js web app** | `https://host/web/*` | End-user UI with Clerk auth |
14
+ | **Streamlit UI** | `https://host/` | Legacy dashboard, single-user password |
15
+ | **MCP server** | stdio | Claude / Cursor / any MCP client |
16
+ | **CLI** | `dokeo` in your shell | Pipelines, n8n, Claude Code |
17
+
18
+ The legacy webhook (port 8502, `/check`, `/check-url`, etc.) is kept for
19
+ backward compatibility with existing n8n / Zapier / Make integrations.
20
+
21
+ ## Quickstart
22
+
23
+ ```bash
24
+ # 1. Install
25
+ pip install -e .
26
+
27
+ # 2. Run the API
28
+ uvicorn api.main:app --reload --port 8000
29
+ # → http://localhost:8000/docs for OpenAPI
30
+
31
+ # 3. Run the web app (separate terminal)
32
+ cd web && bun install && bun run dev
33
+
34
+ # 4. Run the Streamlit UI (separate terminal)
35
+ streamlit run app.py
36
+
37
+ # 5. Try the CLI
38
+ echo "# My post\n\n## Question?\n\n..." | dokeo pipe
39
+
40
+ # 6. Try the MCP server (Claude Desktop / Cursor will spawn it for you)
41
+ dokeo-mcp-server
42
+ ```
43
+
44
+ ## Self-hosted with Docker
45
+
46
+ ```bash
47
+ cp .env.example .env
48
+ # Edit .env - set DOKEO_API_KEY, DOKEO_PASSWORD, Clerk keys
49
+ docker compose up -d
50
+ ```
51
+
52
+ Routes (via Caddy at ports 80/443):
53
+
54
+ - `/` → Streamlit (port 8501)
55
+ - `/web/*`, `/sign-in`, `/sign-up` → Next.js (port 3000)
56
+ - `/api/v1/*` → FastAPI (port 8000)
57
+ - `/check*`, `/check-url*`, `/check-blog*`, `/health*`, etc. → legacy webhook (port 8502)
58
+
59
+ ## Auth model
60
+
61
+ | Surface | Auth |
62
+ |---|---|
63
+ | FastAPI (`/api/v1/*`) | Bearer token: `DOKEO_API_KEY` or `DOKEO_API_KEYS=key:tenant,...` |
64
+ | Legacy webhook | Same Bearer token |
65
+ | Streamlit | Cookie session via `DOKEO_PASSWORD` |
66
+ | Next.js | Clerk (hosted) |
67
+ | MCP | Filesystem only; no network exposure |
68
+ | CLI | None (local) |
69
+
70
+ `DOKEO_API_KEY` is required in any non-dev environment. Streamlit refuses
71
+ to boot in production without `DOKEO_PASSWORD`.
72
+
73
+ ## Documentation
74
+
75
+ - API: OpenAPI at `/docs` or `/redoc` when running
76
+ - MCP: [docs/mcp.md](docs/mcp.md)
77
+ - Engine config: `config.yaml`
78
+ - Content-type definitions: `gate/content_types.py`
79
+
80
+ ## Tests
81
+
82
+ ```bash
83
+ pytest # all tests
84
+ pytest tests/test_mcp_server.py # MCP only
85
+ ```
File without changes
@@ -0,0 +1,55 @@
1
+ """API key auth - FastAPI dependency.
2
+
3
+ Replaces the hand-rolled bearer check in gate/webhook.py with an idiomatic
4
+ FastAPI dependency. Same env vars, same multi-tenant pattern, same
5
+ constant-time compare.
6
+
7
+ Usage in a route:
8
+ @router.get("/stats", dependencies=[Depends(require_api_key)])
9
+ def stats(): ...
10
+
11
+ Or to read the tenant in the handler:
12
+ def stats(tenant: str = Depends(api_key_tenant)): ...
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import hmac
17
+ from typing import Optional
18
+
19
+ from fastapi import Depends, HTTPException, status
20
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
21
+
22
+ from api.config import settings
23
+
24
+ _bearer = HTTPBearer(auto_error=False)
25
+
26
+
27
+ async def require_api_key(
28
+ creds: Optional[HTTPAuthorizationCredentials] = Depends(_bearer),
29
+ ) -> str:
30
+ """Validate the Bearer token. Returns the tenant id (or 'default').
31
+
32
+ In dev (no keys configured) auth is open - same behavior as the existing
33
+ webhook. In prod, set DOKEO_API_KEY or DOKEO_API_KEYS.
34
+ """
35
+ if not settings.auth_enabled:
36
+ return "default"
37
+ if creds is None or not creds.credentials:
38
+ raise HTTPException(
39
+ status_code=status.HTTP_401_UNAUTHORIZED,
40
+ detail="missing Bearer token",
41
+ headers={"WWW-Authenticate": "Bearer"},
42
+ )
43
+ token = creds.credentials
44
+ for key, tenant in settings.api_keys.items():
45
+ if hmac.compare_digest(token, key):
46
+ return tenant
47
+ raise HTTPException(
48
+ status_code=status.HTTP_401_UNAUTHORIZED,
49
+ detail="invalid API key",
50
+ headers={"WWW-Authenticate": "Bearer"},
51
+ )
52
+
53
+
54
+ # Convenience aliases
55
+ api_key_tenant = require_api_key
@@ -0,0 +1,63 @@
1
+ """Settings - read once at startup from env vars.
2
+
3
+ Mirrors the env vars already used by gate/webhook.py so the two services
4
+ can share the same .env file during the transition:
5
+ DOKEO_API_KEY single bearer token (dev / single-tenant)
6
+ DOKEO_API_KEYS multi-tenant: key1:tenant1,key2:tenant2
7
+ DOKEO_CORS_ORIGIN allowlist for browser clients (the Next.js app)
8
+ DOKEO_RATE_LIMIT requests per minute per IP (default 120)
9
+ DOKEO_LOG_LEVEL INFO / DEBUG / WARNING
10
+ ANTHROPIC_API_KEY live mode for the builder (read by gate.listicle.llm)
11
+ OPENAI_API_KEY live mode for the builder
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ from pathlib import Path
17
+
18
+ ROOT = Path(__file__).resolve().parent.parent
19
+
20
+
21
+ def _parse_keys(raw: str) -> dict[str, str]:
22
+ out: dict[str, str] = {}
23
+ for pair in raw.split(","):
24
+ pair = pair.strip()
25
+ if not pair:
26
+ continue
27
+ if ":" in pair:
28
+ k, t = pair.split(":", 1)
29
+ out[k.strip()] = t.strip()
30
+ else:
31
+ out[pair] = "tenant-" + pair[:6]
32
+ return out
33
+
34
+
35
+ class Settings:
36
+ VERSION = "3.0.0"
37
+
38
+ # Auth
39
+ api_key: str = os.environ.get("DOKEO_API_KEY", "")
40
+ api_keys: dict[str, str] = {**( {api_key: "default"} if (api_key := os.environ.get("DOKEO_API_KEY", "")) else {}), **_parse_keys(os.environ.get("DOKEO_API_KEYS", ""))}
41
+
42
+ # CORS
43
+ cors_origin: str = os.environ.get("DOKEO_CORS_ORIGIN", "")
44
+
45
+ # Rate limit
46
+ rate_limit: int = int(os.environ.get("DOKEO_RATE_LIMIT", "120"))
47
+ rate_limit_tenant: int = int(os.environ.get("DOKEO_RATE_LIMIT_TENANT", "300"))
48
+ rate_window: int = 60
49
+
50
+ # Paths
51
+ base_dir: Path = ROOT
52
+ corpus_dir: Path = ROOT / "sample_corpus" / "published"
53
+ config_path: Path = ROOT / "config.yaml"
54
+
55
+ # Misc
56
+ log_level: str = os.environ.get("DOKEO_LOG_LEVEL", "INFO")
57
+
58
+ @property
59
+ def auth_enabled(self) -> bool:
60
+ return bool(self.api_keys)
61
+
62
+
63
+ settings = Settings()
@@ -0,0 +1,41 @@
1
+ """Shared FastAPI dependencies - singletons for the engine + supporting stores.
2
+
3
+ These wrap the gate/ package's own singletons so the HTTP layer never
4
+ constructs engine objects itself. Keeps a single source of truth for config
5
+ and the corpus index (which is expensive to build).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import yaml
10
+ from functools import lru_cache
11
+
12
+ from fastapi import Request
13
+
14
+ from gate.gate import QualityGate
15
+ from gate.audit_log import AuditLog
16
+ from gate.knowledge_base import KnowledgeBase
17
+
18
+
19
+ @lru_cache(maxsize=1)
20
+ def get_quality_gate() -> QualityGate:
21
+ """The core content-quality engine. One instance per process."""
22
+ from api.config import settings
23
+ with open(settings.config_path) as f:
24
+ cfg = yaml.safe_load(f)
25
+ return QualityGate(str(settings.corpus_dir), cfg,
26
+ custom_rules_path=str(settings.base_dir / "custom_rules.json"))
27
+
28
+
29
+ @lru_cache(maxsize=1)
30
+ def get_audit_log() -> AuditLog:
31
+ return AuditLog()
32
+
33
+
34
+ @lru_cache(maxsize=1)
35
+ def get_knowledge_base() -> KnowledgeBase:
36
+ from api.config import settings
37
+ return KnowledgeBase(str(settings.base_dir / "knowledge_base"))
38
+
39
+
40
+ def get_request_id(request: Request) -> str:
41
+ return request.headers.get("x-request-id") or ""
@@ -0,0 +1,90 @@
1
+ """Dokeo FastAPI service - the engine's new HTTP surface.
2
+
3
+ Runs alongside Streamlit (port 8501) and the legacy webhook (port 8502).
4
+ This service lives on port 8000 by default and exposes:
5
+
6
+ · Auto-generated OpenAPI docs at /docs and /redoc
7
+ · Same auth + rate-limit + CORS posture as the legacy webhook
8
+ · The full gate engine (8 checks, 17 content types)
9
+ · The listicle builder pipeline (research → generate → QA)
10
+ · Catalog endpoints (content types, engines, formats, categories)
11
+ · Audit log access
12
+
13
+ Run:
14
+ uvicorn api.main:app --reload --port 8000
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ from contextlib import asynccontextmanager
20
+
21
+ from fastapi import FastAPI
22
+ from fastapi.middleware.cors import CORSMiddleware
23
+
24
+ from api.config import settings
25
+ from api.rate_limit import RateLimitMiddleware
26
+ from api.tracing import TraceMiddleware
27
+ from api.routes import admin, audit, batch, builder, catalog, health, knowledge, rules, scan
28
+
29
+ log = logging.getLogger("dokeo-api")
30
+
31
+
32
+ @asynccontextmanager
33
+ async def lifespan(app: FastAPI):
34
+ log.info("dokeo-api v%s - auth=%s, rate_limit=%d/min, cors=%s",
35
+ settings.VERSION,
36
+ "ON" if settings.auth_enabled else "OFF",
37
+ settings.rate_limit,
38
+ settings.cors_origin or "same-origin")
39
+ yield
40
+
41
+
42
+ def create_app() -> FastAPI:
43
+ app = FastAPI(
44
+ title="Dokeo Content Quality API",
45
+ description=(
46
+ "Pre-publish content quality gate + listicle/comparison builder.\n\n"
47
+ "**Engines:** SEO · AEO · GEO across 17 content types.\n\n"
48
+ "**Auth:** Bearer token via `Authorization: Bearer <key>` header. "
49
+ "Set `DOKEO_API_KEY` in env (dev: open when unset).\n\n"
50
+ "**Rate limit:** per-IP token bucket. See `X-RateLimit-*` headers."
51
+ ),
52
+ version=settings.VERSION,
53
+ docs_url="/docs",
54
+ redoc_url="/redoc",
55
+ openapi_url="/openapi.json",
56
+ lifespan=lifespan,
57
+ )
58
+
59
+ # ── Middleware ────────────────────────────────────────────────────
60
+ app.add_middleware(
61
+ CORSMiddleware,
62
+ allow_origins=[settings.cors_origin] if settings.cors_origin else [],
63
+ allow_credentials=True,
64
+ allow_methods=["*"],
65
+ allow_headers=["*"],
66
+ )
67
+ app.add_middleware(TraceMiddleware)
68
+ app.add_middleware(RateLimitMiddleware)
69
+
70
+ # ── Routes ────────────────────────────────────────────────────────
71
+ api_prefix = "/api/v1"
72
+ app.include_router(health.router, prefix=api_prefix)
73
+ app.include_router(catalog.router, prefix=api_prefix)
74
+ app.include_router(scan.router, prefix=api_prefix)
75
+ app.include_router(builder.router, prefix=api_prefix)
76
+ app.include_router(audit.router, prefix=api_prefix)
77
+ app.include_router(batch.router, prefix=api_prefix)
78
+ app.include_router(rules.router, prefix=api_prefix)
79
+ app.include_router(admin.router, prefix=api_prefix)
80
+ app.include_router(knowledge.router, prefix=api_prefix)
81
+
82
+ @app.get("/", include_in_schema=False)
83
+ def root():
84
+ return {"service": "dokeo-api", "version": settings.VERSION,
85
+ "docs": "/docs", "health": f"{api_prefix}/health"}
86
+
87
+ return app
88
+
89
+
90
+ app = create_app()
@@ -0,0 +1,115 @@
1
+ """Per-IP + per-tenant token-bucket rate limiter.
2
+
3
+ Two layers:
4
+ 1. Per-IP (60-120 req/min) — same as the old webhook behavior.
5
+ 2. Per-tenant (configurable) — set DOKEO_RATE_LIMIT_TENANT=300 for
6
+ a higher per-tenant ceiling once
7
+ you've authenticated.
8
+
9
+ X-RateLimit-* headers on every response tell clients when they can retry.
10
+ """
11
+ from __future__ import annotations
12
+ import threading
13
+ import time
14
+ from collections import defaultdict
15
+
16
+ from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
17
+ from starlette.requests import Request
18
+ from starlette.responses import Response
19
+
20
+ from api.config import settings
21
+
22
+
23
+ class RateLimitMiddleware(BaseHTTPMiddleware):
24
+ def __init__(self, app, ip_limit: int | None = None, ip_window: int | None = None,
25
+ tenant_limit: int | None = None, tenant_window: int | None = None):
26
+ super().__init__(app)
27
+ self.ip_limit = ip_limit or settings.rate_limit
28
+ self.ip_window = ip_window or settings.rate_window
29
+ self.tenant_limit = tenant_limit or settings.rate_limit_tenant
30
+ self.tenant_window = tenant_window or settings.rate_window
31
+ self._ip_buckets: dict[str, list[float]] = defaultdict(list)
32
+ self._tenant_buckets: dict[str, list[float]] = defaultdict(list)
33
+ self._lock = threading.Lock()
34
+
35
+ def _client_ip(self, request: Request) -> str:
36
+ fwd = request.headers.get("x-forwarded-for", "")
37
+ if fwd:
38
+ return fwd.split(",")[0].strip()
39
+ return request.client.host if request.client else "unknown"
40
+
41
+ def _check(self, key: str, buckets: dict, limit: int, window: int) -> tuple[bool, int, int]:
42
+ now = time.time()
43
+ with self._lock:
44
+ bucket = buckets[key]
45
+ while bucket and bucket[0] < now - window:
46
+ bucket.pop(0)
47
+ if len(bucket) >= limit:
48
+ reset = int(window - (now - bucket[0])) if bucket else window
49
+ return False, 0, max(reset, 1)
50
+ bucket.append(now)
51
+ return True, limit - len(bucket), window
52
+
53
+ def _tenant_key(self, request: Request) -> str | None:
54
+ """Pull tenant from Bearer token (set by the require_api_key dep
55
+ which runs after this middleware, so we re-parse here)."""
56
+ auth = request.headers.get("authorization", "")
57
+ if not auth.startswith("Bearer "):
58
+ return None
59
+ token = auth[7:]
60
+ for key, tenant in settings.api_keys.items():
61
+ # Use hmac.compare_digest via a small inline impl
62
+ import hmac
63
+ if hmac.compare_digest(token, key):
64
+ return tenant
65
+ return None
66
+
67
+ async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
68
+ # 1. Per-IP
69
+ ip = self._client_ip(request)
70
+ ip_ok, ip_remaining, ip_reset = self._check(ip, self._ip_buckets, self.ip_limit, self.ip_window)
71
+ if not ip_ok:
72
+ from fastapi.responses import JSONResponse
73
+ return JSONResponse(
74
+ status_code=429,
75
+ content={"error": "rate limit exceeded (ip)", "limit": self.ip_limit,
76
+ "reset_in_seconds": ip_reset},
77
+ headers={
78
+ "X-RateLimit-Limit": str(self.ip_limit),
79
+ "X-RateLimit-Remaining": "0",
80
+ "X-RateLimit-Reset": str(ip_reset),
81
+ "Retry-After": str(ip_reset),
82
+ },
83
+ )
84
+
85
+ # 2. Per-tenant (only if we can identify one)
86
+ tenant = self._tenant_key(request)
87
+ tenant_remaining = None
88
+ tenant_limit = None
89
+ if tenant and self.tenant_limit:
90
+ t_ok, t_remaining, t_reset = self._check(tenant, self._tenant_buckets,
91
+ self.tenant_limit, self.tenant_window)
92
+ if not t_ok:
93
+ from fastapi.responses import JSONResponse
94
+ return JSONResponse(
95
+ status_code=429,
96
+ content={"error": "rate limit exceeded (tenant)", "tenant": tenant,
97
+ "limit": self.tenant_limit, "reset_in_seconds": t_reset},
98
+ headers={
99
+ "X-RateLimit-Tenant-Limit": str(self.tenant_limit),
100
+ "X-RateLimit-Tenant-Remaining": "0",
101
+ "X-RateLimit-Tenant-Reset": str(t_reset),
102
+ "Retry-After": str(t_reset),
103
+ },
104
+ )
105
+ tenant_remaining = t_remaining
106
+ tenant_limit = self.tenant_limit
107
+
108
+ response = await call_next(request)
109
+ response.headers["X-RateLimit-Limit"] = str(self.ip_limit)
110
+ response.headers["X-RateLimit-Remaining"] = str(ip_remaining)
111
+ response.headers["X-RateLimit-Reset"] = str(ip_reset)
112
+ if tenant_limit is not None:
113
+ response.headers["X-RateLimit-Tenant-Limit"] = str(tenant_limit)
114
+ response.headers["X-RateLimit-Tenant-Remaining"] = str(tenant_remaining or 0)
115
+ return response
@@ -0,0 +1,74 @@
1
+ """Lightweight distributed tracing.
2
+
3
+ Adds a request-scoped trace_id (8 bytes hex) and span_id (4 bytes hex)
4
+ to every request. Exposed as response headers (X-Trace-Id, X-Span-Id)
5
+ and threaded through the audit log so every scan is correlated.
6
+
7
+ For real OTel export, swap _start_span() for opentelemetry.trace
8
+ calls and add an exporter. The audit-log enrichment stays.
9
+ """
10
+ from __future__ import annotations
11
+ import os
12
+ import secrets
13
+ import time
14
+ import uuid
15
+ from contextvars import ContextVar
16
+
17
+ from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
18
+ from starlette.requests import Request
19
+ from starlette.responses import Response
20
+
21
+
22
+ _current_trace: ContextVar = ContextVar("trace_id", default="")
23
+ _current_span: ContextVar = ContextVar("span_id", default="")
24
+ _span_start: ContextVar = ContextVar("span_start", default=0.0)
25
+
26
+
27
+ def current_trace_id() -> str:
28
+ return _current_trace.get()
29
+
30
+
31
+ def current_span_id() -> str:
32
+ return _current_span.get()
33
+
34
+
35
+ def _start_span() -> tuple[str, str, float]:
36
+ trace = _current_trace.get() or uuid.uuid4().hex[:16]
37
+ span = secrets.token_hex(4)
38
+ return trace, span, time.time()
39
+
40
+
41
+ class TraceMiddleware(BaseHTTPMiddleware):
42
+ """Assigns a trace_id+span_id to every request. Logs duration."""
43
+
44
+ async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
45
+ # Inherit trace from upstream header if present (so a load balancer
46
+ # or proxy can stitch traces together).
47
+ incoming_trace = request.headers.get("x-trace-id")
48
+ if incoming_trace:
49
+ tok_t = _current_trace.set(incoming_trace)
50
+ else:
51
+ tok_t = _current_trace.set(uuid.uuid4().hex[:16])
52
+ trace_id = _current_trace.get()
53
+ span_id = secrets.token_hex(4)
54
+ tok_s = _current_span.set(span_id)
55
+ start = time.time()
56
+ tok_start = _span_start.set(start)
57
+ response = await call_next(request)
58
+ duration_ms = round((time.time() - start) * 1000, 1)
59
+ _current_span.reset(tok_s)
60
+ _current_trace.reset(tok_t)
61
+ _span_start.reset(tok_start)
62
+ response.headers["X-Trace-Id"] = trace_id
63
+ response.headers["X-Span-Id"] = span_id
64
+ response.headers["X-Trace-Duration-Ms"] = str(duration_ms)
65
+ response.headers["X-Trace-Duration-Ms"] = str(duration_ms)
66
+ # Structured log line
67
+ import sys
68
+ print(
69
+ f'{{"event":"http.trace","service":"dokeo-api","trace_id":"{trace_id}",'
70
+ f'"span_id":"{span_id}","method":"{request.method}","path":"{request.url.path}",'
71
+ f'"status":{response.status_code},"duration_ms":{duration_ms}}}',
72
+ file=sys.stderr,
73
+ )
74
+ return response
@@ -0,0 +1,49 @@
1
+ """Clerk user identity - extracted from headers injected by the Next.js proxy.
2
+
3
+ The Next.js route handler reads the Clerk session server-side and forwards
4
+ identity as headers. This module makes them available to FastAPI handlers
5
+ as a dependency:
6
+
7
+ from api.user import current_user
8
+ def scan(user: User = Depends(current_user)): ...
9
+
10
+ In dev (no headers), falls back to an anonymous user so the API still works
11
+ when called directly without going through Next.js.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Optional
17
+
18
+ from fastapi import Depends, Header
19
+
20
+
21
+ @dataclass
22
+ class User:
23
+ """The caller's identity. `id` is the Clerk user ID, or 'anonymous' in dev."""
24
+ id: str = "anonymous"
25
+ email: str = ""
26
+ name: str = ""
27
+
28
+ @property
29
+ def is_anonymous(self) -> bool:
30
+ return self.id == "anonymous"
31
+
32
+ @property
33
+ def display_name(self) -> str:
34
+ return self.name or self.email or self.id
35
+
36
+
37
+ def current_user(
38
+ x_dokeo_user_id: Optional[str] = Header(None, alias="X-Dokeo-User-Id"),
39
+ x_dokeo_user_email: Optional[str] = Header(None, alias="X-Dokeo-User-Email"),
40
+ x_dokeo_user_name: Optional[str] = Header(None, alias="X-Dokeo-User-Name"),
41
+ ) -> User:
42
+ """FastAPI dependency: the Clerk user making this request."""
43
+ if not x_dokeo_user_id:
44
+ return User()
45
+ return User(
46
+ id=x_dokeo_user_id,
47
+ email=x_dokeo_user_email or "",
48
+ name=x_dokeo_user_name or "",
49
+ )