dokeo 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/__init__.py +0 -0
- api/auth.py +55 -0
- api/config.py +63 -0
- api/deps.py +41 -0
- api/main.py +90 -0
- api/rate_limit.py +115 -0
- api/tracing.py +74 -0
- api/user.py +49 -0
- dokeo-3.0.0.dist-info/METADATA +112 -0
- dokeo-3.0.0.dist-info/RECORD +60 -0
- dokeo-3.0.0.dist-info/WHEEL +5 -0
- dokeo-3.0.0.dist-info/entry_points.txt +3 -0
- dokeo-3.0.0.dist-info/top_level.txt +3 -0
- dokeo_cli.py +300 -0
- gate/__init__.py +0 -0
- gate/audit_log.py +202 -0
- gate/auth.py +147 -0
- gate/batch.py +212 -0
- gate/catalog.py +120 -0
- gate/channels.py +306 -0
- gate/checks/__init__.py +17 -0
- gate/checks/aeo.py +69 -0
- gate/checks/cannibalisation.py +42 -0
- gate/checks/claims.py +53 -0
- gate/checks/confidentiality.py +32 -0
- gate/checks/geo.py +74 -0
- gate/checks/near_duplicate.py +44 -0
- gate/checks/readability.py +26 -0
- gate/checks/structure.py +49 -0
- gate/competitor.py +282 -0
- gate/content_type_detect.py +89 -0
- gate/content_types.py +1265 -0
- gate/corpus.py +51 -0
- gate/custom_rules.py +201 -0
- gate/diff_view.py +198 -0
- gate/email_report.py +148 -0
- gate/embeddings.py +35 -0
- gate/fetcher.py +175 -0
- gate/gate.py +133 -0
- gate/knowledge_base.py +200 -0
- gate/listicle/__init__.py +0 -0
- gate/listicle/assemble.py +111 -0
- gate/listicle/formats.py +245 -0
- gate/listicle/generate.py +52 -0
- gate/listicle/llm.py +389 -0
- gate/listicle/qa.py +210 -0
- gate/listicle/research.py +54 -0
- gate/listicle/run.py +251 -0
- gate/listicle/schema.py +74 -0
- gate/listicle_ui.py +431 -0
- gate/logging_utils.py +90 -0
- gate/mcp_server.py +505 -0
- gate/metrics.py +99 -0
- gate/notifier.py +140 -0
- gate/report_generator.py +333 -0
- gate/scheduler.py +151 -0
- gate/suggest.py +282 -0
- gate/trends.py +111 -0
- gate/voice.py +108 -0
- gate/webhook.py +498 -0
api/__init__.py
ADDED
|
File without changes
|
api/auth.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""API key auth - FastAPI dependency.
|
|
2
|
+
|
|
3
|
+
Replaces the hand-rolled bearer check in gate/webhook.py with an idiomatic
|
|
4
|
+
FastAPI dependency. Same env vars, same multi-tenant pattern, same
|
|
5
|
+
constant-time compare.
|
|
6
|
+
|
|
7
|
+
Usage in a route:
|
|
8
|
+
@router.get("/stats", dependencies=[Depends(require_api_key)])
|
|
9
|
+
def stats(): ...
|
|
10
|
+
|
|
11
|
+
Or to read the tenant in the handler:
|
|
12
|
+
def stats(tenant: str = Depends(api_key_tenant)): ...
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import hmac
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from fastapi import Depends, HTTPException, status
|
|
20
|
+
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
|
21
|
+
|
|
22
|
+
from api.config import settings
|
|
23
|
+
|
|
24
|
+
_bearer = HTTPBearer(auto_error=False)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def require_api_key(
|
|
28
|
+
creds: Optional[HTTPAuthorizationCredentials] = Depends(_bearer),
|
|
29
|
+
) -> str:
|
|
30
|
+
"""Validate the Bearer token. Returns the tenant id (or 'default').
|
|
31
|
+
|
|
32
|
+
In dev (no keys configured) auth is open - same behavior as the existing
|
|
33
|
+
webhook. In prod, set DOKEO_API_KEY or DOKEO_API_KEYS.
|
|
34
|
+
"""
|
|
35
|
+
if not settings.auth_enabled:
|
|
36
|
+
return "default"
|
|
37
|
+
if creds is None or not creds.credentials:
|
|
38
|
+
raise HTTPException(
|
|
39
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
40
|
+
detail="missing Bearer token",
|
|
41
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
42
|
+
)
|
|
43
|
+
token = creds.credentials
|
|
44
|
+
for key, tenant in settings.api_keys.items():
|
|
45
|
+
if hmac.compare_digest(token, key):
|
|
46
|
+
return tenant
|
|
47
|
+
raise HTTPException(
|
|
48
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
49
|
+
detail="invalid API key",
|
|
50
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Convenience aliases
|
|
55
|
+
api_key_tenant = require_api_key
|
api/config.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Settings - read once at startup from env vars.
|
|
2
|
+
|
|
3
|
+
Mirrors the env vars already used by gate/webhook.py so the two services
|
|
4
|
+
can share the same .env file during the transition:
|
|
5
|
+
DOKEO_API_KEY single bearer token (dev / single-tenant)
|
|
6
|
+
DOKEO_API_KEYS multi-tenant: key1:tenant1,key2:tenant2
|
|
7
|
+
DOKEO_CORS_ORIGIN allowlist for browser clients (the Next.js app)
|
|
8
|
+
DOKEO_RATE_LIMIT requests per minute per IP (default 120)
|
|
9
|
+
DOKEO_LOG_LEVEL INFO / DEBUG / WARNING
|
|
10
|
+
ANTHROPIC_API_KEY live mode for the builder (read by gate.listicle.llm)
|
|
11
|
+
OPENAI_API_KEY live mode for the builder
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
ROOT = Path(__file__).resolve().parent.parent
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _parse_keys(raw: str) -> dict[str, str]:
|
|
22
|
+
out: dict[str, str] = {}
|
|
23
|
+
for pair in raw.split(","):
|
|
24
|
+
pair = pair.strip()
|
|
25
|
+
if not pair:
|
|
26
|
+
continue
|
|
27
|
+
if ":" in pair:
|
|
28
|
+
k, t = pair.split(":", 1)
|
|
29
|
+
out[k.strip()] = t.strip()
|
|
30
|
+
else:
|
|
31
|
+
out[pair] = "tenant-" + pair[:6]
|
|
32
|
+
return out
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Settings:
|
|
36
|
+
VERSION = "3.0.0"
|
|
37
|
+
|
|
38
|
+
# Auth
|
|
39
|
+
api_key: str = os.environ.get("DOKEO_API_KEY", "")
|
|
40
|
+
api_keys: dict[str, str] = {**( {api_key: "default"} if (api_key := os.environ.get("DOKEO_API_KEY", "")) else {}), **_parse_keys(os.environ.get("DOKEO_API_KEYS", ""))}
|
|
41
|
+
|
|
42
|
+
# CORS
|
|
43
|
+
cors_origin: str = os.environ.get("DOKEO_CORS_ORIGIN", "")
|
|
44
|
+
|
|
45
|
+
# Rate limit
|
|
46
|
+
rate_limit: int = int(os.environ.get("DOKEO_RATE_LIMIT", "120"))
|
|
47
|
+
rate_limit_tenant: int = int(os.environ.get("DOKEO_RATE_LIMIT_TENANT", "300"))
|
|
48
|
+
rate_window: int = 60
|
|
49
|
+
|
|
50
|
+
# Paths
|
|
51
|
+
base_dir: Path = ROOT
|
|
52
|
+
corpus_dir: Path = ROOT / "sample_corpus" / "published"
|
|
53
|
+
config_path: Path = ROOT / "config.yaml"
|
|
54
|
+
|
|
55
|
+
# Misc
|
|
56
|
+
log_level: str = os.environ.get("DOKEO_LOG_LEVEL", "INFO")
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def auth_enabled(self) -> bool:
|
|
60
|
+
return bool(self.api_keys)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
settings = Settings()
|
api/deps.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Shared FastAPI dependencies - singletons for the engine + supporting stores.
|
|
2
|
+
|
|
3
|
+
These wrap the gate/ package's own singletons so the HTTP layer never
|
|
4
|
+
constructs engine objects itself. Keeps a single source of truth for config
|
|
5
|
+
and the corpus index (which is expensive to build).
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
from functools import lru_cache
|
|
11
|
+
|
|
12
|
+
from fastapi import Request
|
|
13
|
+
|
|
14
|
+
from gate.gate import QualityGate
|
|
15
|
+
from gate.audit_log import AuditLog
|
|
16
|
+
from gate.knowledge_base import KnowledgeBase
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@lru_cache(maxsize=1)
|
|
20
|
+
def get_quality_gate() -> QualityGate:
|
|
21
|
+
"""The core content-quality engine. One instance per process."""
|
|
22
|
+
from api.config import settings
|
|
23
|
+
with open(settings.config_path) as f:
|
|
24
|
+
cfg = yaml.safe_load(f)
|
|
25
|
+
return QualityGate(str(settings.corpus_dir), cfg,
|
|
26
|
+
custom_rules_path=str(settings.base_dir / "custom_rules.json"))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@lru_cache(maxsize=1)
|
|
30
|
+
def get_audit_log() -> AuditLog:
|
|
31
|
+
return AuditLog()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@lru_cache(maxsize=1)
|
|
35
|
+
def get_knowledge_base() -> KnowledgeBase:
|
|
36
|
+
from api.config import settings
|
|
37
|
+
return KnowledgeBase(str(settings.base_dir / "knowledge_base"))
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_request_id(request: Request) -> str:
|
|
41
|
+
return request.headers.get("x-request-id") or ""
|
api/main.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Dokeo FastAPI service - the engine's new HTTP surface.
|
|
2
|
+
|
|
3
|
+
Runs alongside Streamlit (port 8501) and the legacy webhook (port 8502).
|
|
4
|
+
This service lives on port 8000 by default and exposes:
|
|
5
|
+
|
|
6
|
+
· Auto-generated OpenAPI docs at /docs and /redoc
|
|
7
|
+
· Same auth + rate-limit + CORS posture as the legacy webhook
|
|
8
|
+
· The full gate engine (8 checks, 17 content types)
|
|
9
|
+
· The listicle builder pipeline (research → generate → QA)
|
|
10
|
+
· Catalog endpoints (content types, engines, formats, categories)
|
|
11
|
+
· Audit log access
|
|
12
|
+
|
|
13
|
+
Run:
|
|
14
|
+
uvicorn api.main:app --reload --port 8000
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
from contextlib import asynccontextmanager
|
|
20
|
+
|
|
21
|
+
from fastapi import FastAPI
|
|
22
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
23
|
+
|
|
24
|
+
from api.config import settings
|
|
25
|
+
from api.rate_limit import RateLimitMiddleware
|
|
26
|
+
from api.tracing import TraceMiddleware
|
|
27
|
+
from api.routes import admin, audit, batch, builder, catalog, health, knowledge, rules, scan
|
|
28
|
+
|
|
29
|
+
log = logging.getLogger("dokeo-api")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@asynccontextmanager
|
|
33
|
+
async def lifespan(app: FastAPI):
|
|
34
|
+
log.info("dokeo-api v%s - auth=%s, rate_limit=%d/min, cors=%s",
|
|
35
|
+
settings.VERSION,
|
|
36
|
+
"ON" if settings.auth_enabled else "OFF",
|
|
37
|
+
settings.rate_limit,
|
|
38
|
+
settings.cors_origin or "same-origin")
|
|
39
|
+
yield
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def create_app() -> FastAPI:
|
|
43
|
+
app = FastAPI(
|
|
44
|
+
title="Dokeo Content Quality API",
|
|
45
|
+
description=(
|
|
46
|
+
"Pre-publish content quality gate + listicle/comparison builder.\n\n"
|
|
47
|
+
"**Engines:** SEO · AEO · GEO across 17 content types.\n\n"
|
|
48
|
+
"**Auth:** Bearer token via `Authorization: Bearer <key>` header. "
|
|
49
|
+
"Set `DOKEO_API_KEY` in env (dev: open when unset).\n\n"
|
|
50
|
+
"**Rate limit:** per-IP token bucket. See `X-RateLimit-*` headers."
|
|
51
|
+
),
|
|
52
|
+
version=settings.VERSION,
|
|
53
|
+
docs_url="/docs",
|
|
54
|
+
redoc_url="/redoc",
|
|
55
|
+
openapi_url="/openapi.json",
|
|
56
|
+
lifespan=lifespan,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# ── Middleware ────────────────────────────────────────────────────
|
|
60
|
+
app.add_middleware(
|
|
61
|
+
CORSMiddleware,
|
|
62
|
+
allow_origins=[settings.cors_origin] if settings.cors_origin else [],
|
|
63
|
+
allow_credentials=True,
|
|
64
|
+
allow_methods=["*"],
|
|
65
|
+
allow_headers=["*"],
|
|
66
|
+
)
|
|
67
|
+
app.add_middleware(TraceMiddleware)
|
|
68
|
+
app.add_middleware(RateLimitMiddleware)
|
|
69
|
+
|
|
70
|
+
# ── Routes ────────────────────────────────────────────────────────
|
|
71
|
+
api_prefix = "/api/v1"
|
|
72
|
+
app.include_router(health.router, prefix=api_prefix)
|
|
73
|
+
app.include_router(catalog.router, prefix=api_prefix)
|
|
74
|
+
app.include_router(scan.router, prefix=api_prefix)
|
|
75
|
+
app.include_router(builder.router, prefix=api_prefix)
|
|
76
|
+
app.include_router(audit.router, prefix=api_prefix)
|
|
77
|
+
app.include_router(batch.router, prefix=api_prefix)
|
|
78
|
+
app.include_router(rules.router, prefix=api_prefix)
|
|
79
|
+
app.include_router(admin.router, prefix=api_prefix)
|
|
80
|
+
app.include_router(knowledge.router, prefix=api_prefix)
|
|
81
|
+
|
|
82
|
+
@app.get("/", include_in_schema=False)
|
|
83
|
+
def root():
|
|
84
|
+
return {"service": "dokeo-api", "version": settings.VERSION,
|
|
85
|
+
"docs": "/docs", "health": f"{api_prefix}/health"}
|
|
86
|
+
|
|
87
|
+
return app
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
app = create_app()
|
api/rate_limit.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Per-IP + per-tenant token-bucket rate limiter.
|
|
2
|
+
|
|
3
|
+
Two layers:
|
|
4
|
+
1. Per-IP (60-120 req/min) — same as the old webhook behavior.
|
|
5
|
+
2. Per-tenant (configurable) — set DOKEO_RATE_LIMIT_TENANT=300 for
|
|
6
|
+
a higher per-tenant ceiling once
|
|
7
|
+
you've authenticated.
|
|
8
|
+
|
|
9
|
+
X-RateLimit-* headers on every response tell clients when they can retry.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
import threading
|
|
13
|
+
import time
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
|
|
16
|
+
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
|
17
|
+
from starlette.requests import Request
|
|
18
|
+
from starlette.responses import Response
|
|
19
|
+
|
|
20
|
+
from api.config import settings
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RateLimitMiddleware(BaseHTTPMiddleware):
|
|
24
|
+
def __init__(self, app, ip_limit: int | None = None, ip_window: int | None = None,
|
|
25
|
+
tenant_limit: int | None = None, tenant_window: int | None = None):
|
|
26
|
+
super().__init__(app)
|
|
27
|
+
self.ip_limit = ip_limit or settings.rate_limit
|
|
28
|
+
self.ip_window = ip_window or settings.rate_window
|
|
29
|
+
self.tenant_limit = tenant_limit or settings.rate_limit_tenant
|
|
30
|
+
self.tenant_window = tenant_window or settings.rate_window
|
|
31
|
+
self._ip_buckets: dict[str, list[float]] = defaultdict(list)
|
|
32
|
+
self._tenant_buckets: dict[str, list[float]] = defaultdict(list)
|
|
33
|
+
self._lock = threading.Lock()
|
|
34
|
+
|
|
35
|
+
def _client_ip(self, request: Request) -> str:
|
|
36
|
+
fwd = request.headers.get("x-forwarded-for", "")
|
|
37
|
+
if fwd:
|
|
38
|
+
return fwd.split(",")[0].strip()
|
|
39
|
+
return request.client.host if request.client else "unknown"
|
|
40
|
+
|
|
41
|
+
def _check(self, key: str, buckets: dict, limit: int, window: int) -> tuple[bool, int, int]:
|
|
42
|
+
now = time.time()
|
|
43
|
+
with self._lock:
|
|
44
|
+
bucket = buckets[key]
|
|
45
|
+
while bucket and bucket[0] < now - window:
|
|
46
|
+
bucket.pop(0)
|
|
47
|
+
if len(bucket) >= limit:
|
|
48
|
+
reset = int(window - (now - bucket[0])) if bucket else window
|
|
49
|
+
return False, 0, max(reset, 1)
|
|
50
|
+
bucket.append(now)
|
|
51
|
+
return True, limit - len(bucket), window
|
|
52
|
+
|
|
53
|
+
def _tenant_key(self, request: Request) -> str | None:
|
|
54
|
+
"""Pull tenant from Bearer token (set by the require_api_key dep
|
|
55
|
+
which runs after this middleware, so we re-parse here)."""
|
|
56
|
+
auth = request.headers.get("authorization", "")
|
|
57
|
+
if not auth.startswith("Bearer "):
|
|
58
|
+
return None
|
|
59
|
+
token = auth[7:]
|
|
60
|
+
for key, tenant in settings.api_keys.items():
|
|
61
|
+
# Use hmac.compare_digest via a small inline impl
|
|
62
|
+
import hmac
|
|
63
|
+
if hmac.compare_digest(token, key):
|
|
64
|
+
return tenant
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
|
|
68
|
+
# 1. Per-IP
|
|
69
|
+
ip = self._client_ip(request)
|
|
70
|
+
ip_ok, ip_remaining, ip_reset = self._check(ip, self._ip_buckets, self.ip_limit, self.ip_window)
|
|
71
|
+
if not ip_ok:
|
|
72
|
+
from fastapi.responses import JSONResponse
|
|
73
|
+
return JSONResponse(
|
|
74
|
+
status_code=429,
|
|
75
|
+
content={"error": "rate limit exceeded (ip)", "limit": self.ip_limit,
|
|
76
|
+
"reset_in_seconds": ip_reset},
|
|
77
|
+
headers={
|
|
78
|
+
"X-RateLimit-Limit": str(self.ip_limit),
|
|
79
|
+
"X-RateLimit-Remaining": "0",
|
|
80
|
+
"X-RateLimit-Reset": str(ip_reset),
|
|
81
|
+
"Retry-After": str(ip_reset),
|
|
82
|
+
},
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# 2. Per-tenant (only if we can identify one)
|
|
86
|
+
tenant = self._tenant_key(request)
|
|
87
|
+
tenant_remaining = None
|
|
88
|
+
tenant_limit = None
|
|
89
|
+
if tenant and self.tenant_limit:
|
|
90
|
+
t_ok, t_remaining, t_reset = self._check(tenant, self._tenant_buckets,
|
|
91
|
+
self.tenant_limit, self.tenant_window)
|
|
92
|
+
if not t_ok:
|
|
93
|
+
from fastapi.responses import JSONResponse
|
|
94
|
+
return JSONResponse(
|
|
95
|
+
status_code=429,
|
|
96
|
+
content={"error": "rate limit exceeded (tenant)", "tenant": tenant,
|
|
97
|
+
"limit": self.tenant_limit, "reset_in_seconds": t_reset},
|
|
98
|
+
headers={
|
|
99
|
+
"X-RateLimit-Tenant-Limit": str(self.tenant_limit),
|
|
100
|
+
"X-RateLimit-Tenant-Remaining": "0",
|
|
101
|
+
"X-RateLimit-Tenant-Reset": str(t_reset),
|
|
102
|
+
"Retry-After": str(t_reset),
|
|
103
|
+
},
|
|
104
|
+
)
|
|
105
|
+
tenant_remaining = t_remaining
|
|
106
|
+
tenant_limit = self.tenant_limit
|
|
107
|
+
|
|
108
|
+
response = await call_next(request)
|
|
109
|
+
response.headers["X-RateLimit-Limit"] = str(self.ip_limit)
|
|
110
|
+
response.headers["X-RateLimit-Remaining"] = str(ip_remaining)
|
|
111
|
+
response.headers["X-RateLimit-Reset"] = str(ip_reset)
|
|
112
|
+
if tenant_limit is not None:
|
|
113
|
+
response.headers["X-RateLimit-Tenant-Limit"] = str(tenant_limit)
|
|
114
|
+
response.headers["X-RateLimit-Tenant-Remaining"] = str(tenant_remaining or 0)
|
|
115
|
+
return response
|
api/tracing.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Lightweight distributed tracing.
|
|
2
|
+
|
|
3
|
+
Adds a request-scoped trace_id (8 bytes hex) and span_id (4 bytes hex)
|
|
4
|
+
to every request. Exposed as response headers (X-Trace-Id, X-Span-Id)
|
|
5
|
+
and threaded through the audit log so every scan is correlated.
|
|
6
|
+
|
|
7
|
+
For real OTel export, swap _start_span() for opentelemetry.trace
|
|
8
|
+
calls and add an exporter. The audit-log enrichment stays.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
import os
|
|
12
|
+
import secrets
|
|
13
|
+
import time
|
|
14
|
+
import uuid
|
|
15
|
+
from contextvars import ContextVar
|
|
16
|
+
|
|
17
|
+
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
|
18
|
+
from starlette.requests import Request
|
|
19
|
+
from starlette.responses import Response
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
_current_trace: ContextVar = ContextVar("trace_id", default="")
|
|
23
|
+
_current_span: ContextVar = ContextVar("span_id", default="")
|
|
24
|
+
_span_start: ContextVar = ContextVar("span_start", default=0.0)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def current_trace_id() -> str:
|
|
28
|
+
return _current_trace.get()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def current_span_id() -> str:
|
|
32
|
+
return _current_span.get()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _start_span() -> tuple[str, str, float]:
|
|
36
|
+
trace = _current_trace.get() or uuid.uuid4().hex[:16]
|
|
37
|
+
span = secrets.token_hex(4)
|
|
38
|
+
return trace, span, time.time()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class TraceMiddleware(BaseHTTPMiddleware):
|
|
42
|
+
"""Assigns a trace_id+span_id to every request. Logs duration."""
|
|
43
|
+
|
|
44
|
+
async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
|
|
45
|
+
# Inherit trace from upstream header if present (so a load balancer
|
|
46
|
+
# or proxy can stitch traces together).
|
|
47
|
+
incoming_trace = request.headers.get("x-trace-id")
|
|
48
|
+
if incoming_trace:
|
|
49
|
+
tok_t = _current_trace.set(incoming_trace)
|
|
50
|
+
else:
|
|
51
|
+
tok_t = _current_trace.set(uuid.uuid4().hex[:16])
|
|
52
|
+
trace_id = _current_trace.get()
|
|
53
|
+
span_id = secrets.token_hex(4)
|
|
54
|
+
tok_s = _current_span.set(span_id)
|
|
55
|
+
start = time.time()
|
|
56
|
+
tok_start = _span_start.set(start)
|
|
57
|
+
response = await call_next(request)
|
|
58
|
+
duration_ms = round((time.time() - start) * 1000, 1)
|
|
59
|
+
_current_span.reset(tok_s)
|
|
60
|
+
_current_trace.reset(tok_t)
|
|
61
|
+
_span_start.reset(tok_start)
|
|
62
|
+
response.headers["X-Trace-Id"] = trace_id
|
|
63
|
+
response.headers["X-Span-Id"] = span_id
|
|
64
|
+
response.headers["X-Trace-Duration-Ms"] = str(duration_ms)
|
|
65
|
+
response.headers["X-Trace-Duration-Ms"] = str(duration_ms)
|
|
66
|
+
# Structured log line
|
|
67
|
+
import sys
|
|
68
|
+
print(
|
|
69
|
+
f'{{"event":"http.trace","service":"dokeo-api","trace_id":"{trace_id}",'
|
|
70
|
+
f'"span_id":"{span_id}","method":"{request.method}","path":"{request.url.path}",'
|
|
71
|
+
f'"status":{response.status_code},"duration_ms":{duration_ms}}}',
|
|
72
|
+
file=sys.stderr,
|
|
73
|
+
)
|
|
74
|
+
return response
|
api/user.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Clerk user identity - extracted from headers injected by the Next.js proxy.
|
|
2
|
+
|
|
3
|
+
The Next.js route handler reads the Clerk session server-side and forwards
|
|
4
|
+
identity as headers. This module makes them available to FastAPI handlers
|
|
5
|
+
as a dependency:
|
|
6
|
+
|
|
7
|
+
from api.user import current_user
|
|
8
|
+
def scan(user: User = Depends(current_user)): ...
|
|
9
|
+
|
|
10
|
+
In dev (no headers), falls back to an anonymous user so the API still works
|
|
11
|
+
when called directly without going through Next.js.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
from fastapi import Depends, Header
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class User:
|
|
23
|
+
"""The caller's identity. `id` is the Clerk user ID, or 'anonymous' in dev."""
|
|
24
|
+
id: str = "anonymous"
|
|
25
|
+
email: str = ""
|
|
26
|
+
name: str = ""
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def is_anonymous(self) -> bool:
|
|
30
|
+
return self.id == "anonymous"
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def display_name(self) -> str:
|
|
34
|
+
return self.name or self.email or self.id
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def current_user(
|
|
38
|
+
x_dokeo_user_id: Optional[str] = Header(None, alias="X-Dokeo-User-Id"),
|
|
39
|
+
x_dokeo_user_email: Optional[str] = Header(None, alias="X-Dokeo-User-Email"),
|
|
40
|
+
x_dokeo_user_name: Optional[str] = Header(None, alias="X-Dokeo-User-Name"),
|
|
41
|
+
) -> User:
|
|
42
|
+
"""FastAPI dependency: the Clerk user making this request."""
|
|
43
|
+
if not x_dokeo_user_id:
|
|
44
|
+
return User()
|
|
45
|
+
return User(
|
|
46
|
+
id=x_dokeo_user_id,
|
|
47
|
+
email=x_dokeo_user_email or "",
|
|
48
|
+
name=x_dokeo_user_name or "",
|
|
49
|
+
)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dokeo
|
|
3
|
+
Version: 3.0.0
|
|
4
|
+
Summary: Pre-publish content quality gate (SEO, AEO, GEO) - API, CLI, MCP
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: streamlit>=1.32.0
|
|
8
|
+
Requires-Dist: pyyaml>=6.0
|
|
9
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
10
|
+
Requires-Dist: nltk>=3.8
|
|
11
|
+
Requires-Dist: textstat>=0.7
|
|
12
|
+
Requires-Dist: numpy>=1.24
|
|
13
|
+
Requires-Dist: trafilatura>=2.0.0
|
|
14
|
+
Requires-Dist: feedparser>=6.0.0
|
|
15
|
+
Requires-Dist: requests>=2.28.0
|
|
16
|
+
Requires-Dist: lxml[html-clean]>=5.0.0
|
|
17
|
+
Requires-Dist: anthropic>=0.40
|
|
18
|
+
Requires-Dist: openai>=1.50
|
|
19
|
+
Requires-Dist: pydantic>=2.0
|
|
20
|
+
Requires-Dist: python-dotenv>=1.0
|
|
21
|
+
Requires-Dist: fastapi>=0.115
|
|
22
|
+
Requires-Dist: uvicorn[standard]>=0.30
|
|
23
|
+
Requires-Dist: httpx>=0.27
|
|
24
|
+
Requires-Dist: eval_type_backport>=0.2; python_version < "3.10"
|
|
25
|
+
Provides-Extra: mcp
|
|
26
|
+
Requires-Dist: mcp>=1.0; extra == "mcp"
|
|
27
|
+
|
|
28
|
+
# Dokeo — Content Quality Gate
|
|
29
|
+
|
|
30
|
+
Pre-publish content quality gate for SEO, AEO, and GEO. Scores every post,
|
|
31
|
+
email, video script, and AI output against 8 core checks before it ships.
|
|
32
|
+
|
|
33
|
+
## Surfaces
|
|
34
|
+
|
|
35
|
+
Dokeo ships as four parallel surfaces that all hit the same gate engine:
|
|
36
|
+
|
|
37
|
+
| Surface | Port / Transport | Best for |
|
|
38
|
+
|---|---|---|
|
|
39
|
+
| **FastAPI service** | `http://host:8000/api/v1/*` | Programmatic integrations, webhooks |
|
|
40
|
+
| **Next.js web app** | `https://host/web/*` | End-user UI with Clerk auth |
|
|
41
|
+
| **Streamlit UI** | `https://host/` | Legacy dashboard, single-user password |
|
|
42
|
+
| **MCP server** | stdio | Claude / Cursor / any MCP client |
|
|
43
|
+
| **CLI** | `dokeo` in your shell | Pipelines, n8n, Claude Code |
|
|
44
|
+
|
|
45
|
+
The legacy webhook (port 8502, `/check`, `/check-url`, etc.) is kept for
|
|
46
|
+
backward compatibility with existing n8n / Zapier / Make integrations.
|
|
47
|
+
|
|
48
|
+
## Quickstart
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# 1. Install
|
|
52
|
+
pip install -e .
|
|
53
|
+
|
|
54
|
+
# 2. Run the API
|
|
55
|
+
uvicorn api.main:app --reload --port 8000
|
|
56
|
+
# → http://localhost:8000/docs for OpenAPI
|
|
57
|
+
|
|
58
|
+
# 3. Run the web app (separate terminal)
|
|
59
|
+
cd web && bun install && bun run dev
|
|
60
|
+
|
|
61
|
+
# 4. Run the Streamlit UI (separate terminal)
|
|
62
|
+
streamlit run app.py
|
|
63
|
+
|
|
64
|
+
# 5. Try the CLI
|
|
65
|
+
echo "# My post\n\n## Question?\n\n..." | dokeo pipe
|
|
66
|
+
|
|
67
|
+
# 6. Try the MCP server (Claude Desktop / Cursor will spawn it for you)
|
|
68
|
+
dokeo-mcp-server
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Self-hosted with Docker
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
cp .env.example .env
|
|
75
|
+
# Edit .env - set DOKEO_API_KEY, DOKEO_PASSWORD, Clerk keys
|
|
76
|
+
docker compose up -d
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Routes (via Caddy at ports 80/443):
|
|
80
|
+
|
|
81
|
+
- `/` → Streamlit (port 8501)
|
|
82
|
+
- `/web/*`, `/sign-in`, `/sign-up` → Next.js (port 3000)
|
|
83
|
+
- `/api/v1/*` → FastAPI (port 8000)
|
|
84
|
+
- `/check*`, `/check-url*`, `/check-blog*`, `/health*`, etc. → legacy webhook (port 8502)
|
|
85
|
+
|
|
86
|
+
## Auth model
|
|
87
|
+
|
|
88
|
+
| Surface | Auth |
|
|
89
|
+
|---|---|
|
|
90
|
+
| FastAPI (`/api/v1/*`) | Bearer token: `DOKEO_API_KEY` or `DOKEO_API_KEYS=key:tenant,...` |
|
|
91
|
+
| Legacy webhook | Same Bearer token |
|
|
92
|
+
| Streamlit | Cookie session via `DOKEO_PASSWORD` |
|
|
93
|
+
| Next.js | Clerk (hosted) |
|
|
94
|
+
| MCP | Filesystem only; no network exposure |
|
|
95
|
+
| CLI | None (local) |
|
|
96
|
+
|
|
97
|
+
`DOKEO_API_KEY` is required in any non-dev environment. Streamlit refuses
|
|
98
|
+
to boot in production without `DOKEO_PASSWORD`.
|
|
99
|
+
|
|
100
|
+
## Documentation
|
|
101
|
+
|
|
102
|
+
- API: OpenAPI at `/docs` or `/redoc` when running
|
|
103
|
+
- MCP: [docs/mcp.md](docs/mcp.md)
|
|
104
|
+
- Engine config: `config.yaml`
|
|
105
|
+
- Content-type definitions: `gate/content_types.py`
|
|
106
|
+
|
|
107
|
+
## Tests
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pytest # all tests
|
|
111
|
+
pytest tests/test_mcp_server.py # MCP only
|
|
112
|
+
```
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
dokeo_cli.py,sha256=iwZfFzHwsUh8Rkk6y2hOheVe4Q411CjgCM7vS2y07sI,10951
|
|
2
|
+
api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
api/auth.py,sha256=YxpdJwBZGSB3wYlQVPHUC1PqCr3dA0NkGEt1jLHwNzg,1673
|
|
4
|
+
api/config.py,sha256=Dl48g8ls59cJmlIuRlUaQNv_5jqZTBWsI4qYjG1TCGE,1979
|
|
5
|
+
api/deps.py,sha256=g2oS_iugY2BWSqiowtFKqwr1dJsfkuFHOGR6OralPSo,1242
|
|
6
|
+
api/main.py,sha256=qcp09ZTimHc86EW-n7FCDNvLGQUs-zCNoVQxnZGdjU4,3424
|
|
7
|
+
api/rate_limit.py,sha256=zYAYBGVvWPykiv1HGkPlf86us8HhCv907TwBVy6cocE,5162
|
|
8
|
+
api/tracing.py,sha256=lRdsHYhjVl_KuQ92LwxIeR3LZQO91rQbDsLIQH-RTmE,2721
|
|
9
|
+
api/user.py,sha256=ItoRWovAV9lPfFm3hBmztLN_QGbWj0MTgp-pfYtYqyU,1483
|
|
10
|
+
gate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
gate/audit_log.py,sha256=ms96nbUlQ4_4_UfasDp_GktNEMcPkEQnP3TLk08Eu04,7936
|
|
12
|
+
gate/auth.py,sha256=kTqcsaJfsfIF9Osy0dEhXaivsPouVK5-tRG9X3F1gQE,4924
|
|
13
|
+
gate/batch.py,sha256=vX-hrEozbFVfb6IG0kwejLpEycEuOojBglZtxwv3sxQ,8059
|
|
14
|
+
gate/catalog.py,sha256=UsSmb8svggFHPqRcFhl88CQk4tks06sHkxTyNKffvew,4934
|
|
15
|
+
gate/channels.py,sha256=DBTgdRkm6ayyxPrep0V0O1ZNYuMO8JWXnp4lTn_fkqo,15738
|
|
16
|
+
gate/competitor.py,sha256=cKD4m4PrvDkhXBuPwKFD3KnLEL1-mpqSGFNdFAcbV5Q,9149
|
|
17
|
+
gate/content_type_detect.py,sha256=OerByyOisx_oO2yKMk8VxbL55Rta8e6lEpOLD9s7Gw4,3185
|
|
18
|
+
gate/content_types.py,sha256=oBMK7Bh4e081IUkbeiQsL0-uUk1zGOKDUx5VAOD4EE4,63618
|
|
19
|
+
gate/corpus.py,sha256=VqbdkY5mH6VESGHqA9Pcm55iwsghl-KooLRoKLjiTgY,1317
|
|
20
|
+
gate/custom_rules.py,sha256=fZO6UPvsqdYrDHP4AwvFOJdRYEp8yaQFVPMNzyrswNc,8344
|
|
21
|
+
gate/diff_view.py,sha256=zVR2lCdrRE3qQte7c2hbepGxQ88iWFR5_zPLFiGeWD8,7990
|
|
22
|
+
gate/email_report.py,sha256=S5WJaVvkyuvXYFEmnBiOWl8bATur2L2Kybk015yW0ys,7361
|
|
23
|
+
gate/embeddings.py,sha256=944qM1rHnXeM2bmCJeJ9kXirDz-MDKOAwmm-re_5nRE,1291
|
|
24
|
+
gate/fetcher.py,sha256=I3zqNVNcI7z7KqVBOBbx-FOeoQoN9UeQal2_W495pCM,5512
|
|
25
|
+
gate/gate.py,sha256=dbBPwdkD-JJ58Apec3cfw8yFsz6p6lPHu3_a9H9yws4,5005
|
|
26
|
+
gate/knowledge_base.py,sha256=iQ1uPCgZjBBikrWQ5Zl_XnNfb2Kj3wRyIqIorX-jFt4,7244
|
|
27
|
+
gate/listicle_ui.py,sha256=T2Ys9eP25Cz8_Yyb_I8byiiCgTb6xDyp55M_TOeQ_Hs,18755
|
|
28
|
+
gate/logging_utils.py,sha256=rpC_LhSLsLhrL9FhdI2k2TxqNtG73i70jn_7M5g5NhA,2950
|
|
29
|
+
gate/mcp_server.py,sha256=lPC5xzNUxaNpHP8pS56dEpf96CchZjjFcp-IprQonlI,17057
|
|
30
|
+
gate/metrics.py,sha256=o34YsaM5m-_CHxV5IZAi-mnm5ENuJHPPQ2ayth_4km8,3632
|
|
31
|
+
gate/notifier.py,sha256=J27qMud6cHJcYRDPcSBnqso-4IvnLczSgXlMvpvDVV0,6377
|
|
32
|
+
gate/report_generator.py,sha256=V2Vo6sPIlTWhpBfQf-PAK1My1NXFmkq9wKJyewHbDRw,11111
|
|
33
|
+
gate/scheduler.py,sha256=9s_tlnP7-2Csb4GTiaSUKSFPmC2jFpVvB5vVpWNdmtU,5563
|
|
34
|
+
gate/suggest.py,sha256=-BPaXyp-_AufZDxIrUPDhaMxaU5ISaIjORqAbICrRhw,10863
|
|
35
|
+
gate/trends.py,sha256=SS4s9y9LgqqZq6GHioECF8nuiRg1Lc7CAvJUWo8jBeA,4698
|
|
36
|
+
gate/voice.py,sha256=k4Ls9NpyyyBa43kFkVHD8KPnxQ9gqQxdwx_sVywmpZI,4510
|
|
37
|
+
gate/webhook.py,sha256=09W_iEi2NSQvOyGxquJ79_ArvTe8-0XZ9-alV52-a7E,21512
|
|
38
|
+
gate/checks/__init__.py,sha256=KEP-bRTi8DSYp6PpzxY4hQVpamX-FL6XHXidCcrrIeI,413
|
|
39
|
+
gate/checks/aeo.py,sha256=aAObDa8yljxRraBFXOkjTVgdTibc5RO_idaIQRb36qY,2196
|
|
40
|
+
gate/checks/cannibalisation.py,sha256=mQwOyRwE2yO4jIys51vUGS9mhhuALVX3KOAbqhdb1kg,1508
|
|
41
|
+
gate/checks/claims.py,sha256=wLGF_v72_dEp1z9J8xzvdfM2yV4tA_qhvQMZJq0nCyY,1901
|
|
42
|
+
gate/checks/confidentiality.py,sha256=bi9b7TJQrU6sZgXNYBVKSZi24Cv5FiJXuEhwvPt2-IA,1037
|
|
43
|
+
gate/checks/geo.py,sha256=bgmrO3wqO_tbQZUzHmg8azTnwGpcHJmtt8eqVM-exqc,2405
|
|
44
|
+
gate/checks/near_duplicate.py,sha256=AKzExEvId-bHUKuMH8_cKCgO58Fu1GRr2GhMpaKxZ3A,1421
|
|
45
|
+
gate/checks/readability.py,sha256=_ePrftn1Vn0UT0RMTUhX8pNyQP5r2UF1jZVXdgMgiGc,847
|
|
46
|
+
gate/checks/structure.py,sha256=6_AYTgZ7MRbjCY0OrDvI9ZRjlXJNfdakZeR0OHIZrzM,1991
|
|
47
|
+
gate/listicle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
+
gate/listicle/assemble.py,sha256=U6MBNmmja545JKO6KKPJ26MZfr7WM0yuLdCS7-LGins,4051
|
|
49
|
+
gate/listicle/formats.py,sha256=dySnQ1Z32okTj6P_Oa1USlCYyTcP6muiZ7D_vzdS-oM,11061
|
|
50
|
+
gate/listicle/generate.py,sha256=uTWXmoiALWmwXdneO_vLpDQnubqr6ht6ieQmBAG8HZ4,2137
|
|
51
|
+
gate/listicle/llm.py,sha256=UaNMizX634D7dt0kDIv5bfYqjErK22dcwYu2hlh7xFc,18973
|
|
52
|
+
gate/listicle/qa.py,sha256=dqUwdsWH29HrZV3mKi0XnBjyaerKUDChAYLQ0_TyuQI,9626
|
|
53
|
+
gate/listicle/research.py,sha256=M0_0F3vnYWfB-hnPszE6BgDh64FqQx4cQJ80nPLPijM,2278
|
|
54
|
+
gate/listicle/run.py,sha256=QFiXyJoUPfJAuiBpnya9MJd7QDqWP6DznQ8t9tApDeo,10806
|
|
55
|
+
gate/listicle/schema.py,sha256=pfXP40lREn6Chv-LxREECjwWpM8Dz5GX49Ao0J27-0o,2742
|
|
56
|
+
dokeo-3.0.0.dist-info/METADATA,sha256=8FLhAISkkOyD66ylq41Sn770cp9DF8HP01v286Nxn5o,3307
|
|
57
|
+
dokeo-3.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
58
|
+
dokeo-3.0.0.dist-info/entry_points.txt,sha256=DUnZjVJrhAV6yBwMwZPVHcxl9DuIH6HAo7nDyuSW0Ek,81
|
|
59
|
+
dokeo-3.0.0.dist-info/top_level.txt,sha256=LG941KagTGke_Nhvv63-oP1jeoTIzpbZwge8ZVLbleU,19
|
|
60
|
+
dokeo-3.0.0.dist-info/RECORD,,
|