dokeo 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. api/__init__.py +0 -0
  2. api/auth.py +55 -0
  3. api/config.py +63 -0
  4. api/deps.py +41 -0
  5. api/main.py +90 -0
  6. api/rate_limit.py +115 -0
  7. api/tracing.py +74 -0
  8. api/user.py +49 -0
  9. dokeo-3.0.0.dist-info/METADATA +112 -0
  10. dokeo-3.0.0.dist-info/RECORD +60 -0
  11. dokeo-3.0.0.dist-info/WHEEL +5 -0
  12. dokeo-3.0.0.dist-info/entry_points.txt +3 -0
  13. dokeo-3.0.0.dist-info/top_level.txt +3 -0
  14. dokeo_cli.py +300 -0
  15. gate/__init__.py +0 -0
  16. gate/audit_log.py +202 -0
  17. gate/auth.py +147 -0
  18. gate/batch.py +212 -0
  19. gate/catalog.py +120 -0
  20. gate/channels.py +306 -0
  21. gate/checks/__init__.py +17 -0
  22. gate/checks/aeo.py +69 -0
  23. gate/checks/cannibalisation.py +42 -0
  24. gate/checks/claims.py +53 -0
  25. gate/checks/confidentiality.py +32 -0
  26. gate/checks/geo.py +74 -0
  27. gate/checks/near_duplicate.py +44 -0
  28. gate/checks/readability.py +26 -0
  29. gate/checks/structure.py +49 -0
  30. gate/competitor.py +282 -0
  31. gate/content_type_detect.py +89 -0
  32. gate/content_types.py +1265 -0
  33. gate/corpus.py +51 -0
  34. gate/custom_rules.py +201 -0
  35. gate/diff_view.py +198 -0
  36. gate/email_report.py +148 -0
  37. gate/embeddings.py +35 -0
  38. gate/fetcher.py +175 -0
  39. gate/gate.py +133 -0
  40. gate/knowledge_base.py +200 -0
  41. gate/listicle/__init__.py +0 -0
  42. gate/listicle/assemble.py +111 -0
  43. gate/listicle/formats.py +245 -0
  44. gate/listicle/generate.py +52 -0
  45. gate/listicle/llm.py +389 -0
  46. gate/listicle/qa.py +210 -0
  47. gate/listicle/research.py +54 -0
  48. gate/listicle/run.py +251 -0
  49. gate/listicle/schema.py +74 -0
  50. gate/listicle_ui.py +431 -0
  51. gate/logging_utils.py +90 -0
  52. gate/mcp_server.py +505 -0
  53. gate/metrics.py +99 -0
  54. gate/notifier.py +140 -0
  55. gate/report_generator.py +333 -0
  56. gate/scheduler.py +151 -0
  57. gate/suggest.py +282 -0
  58. gate/trends.py +111 -0
  59. gate/voice.py +108 -0
  60. gate/webhook.py +498 -0
api/__init__.py ADDED
File without changes
api/auth.py ADDED
@@ -0,0 +1,55 @@
1
+ """API key auth - FastAPI dependency.
2
+
3
+ Replaces the hand-rolled bearer check in gate/webhook.py with an idiomatic
4
+ FastAPI dependency. Same env vars, same multi-tenant pattern, same
5
+ constant-time compare.
6
+
7
+ Usage in a route:
8
+ @router.get("/stats", dependencies=[Depends(require_api_key)])
9
+ def stats(): ...
10
+
11
+ Or to read the tenant in the handler:
12
+ def stats(tenant: str = Depends(api_key_tenant)): ...
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import hmac
17
+ from typing import Optional
18
+
19
+ from fastapi import Depends, HTTPException, status
20
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
21
+
22
+ from api.config import settings
23
+
24
+ _bearer = HTTPBearer(auto_error=False)
25
+
26
+
27
+ async def require_api_key(
28
+ creds: Optional[HTTPAuthorizationCredentials] = Depends(_bearer),
29
+ ) -> str:
30
+ """Validate the Bearer token. Returns the tenant id (or 'default').
31
+
32
+ In dev (no keys configured) auth is open - same behavior as the existing
33
+ webhook. In prod, set DOKEO_API_KEY or DOKEO_API_KEYS.
34
+ """
35
+ if not settings.auth_enabled:
36
+ return "default"
37
+ if creds is None or not creds.credentials:
38
+ raise HTTPException(
39
+ status_code=status.HTTP_401_UNAUTHORIZED,
40
+ detail="missing Bearer token",
41
+ headers={"WWW-Authenticate": "Bearer"},
42
+ )
43
+ token = creds.credentials
44
+ for key, tenant in settings.api_keys.items():
45
+ if hmac.compare_digest(token, key):
46
+ return tenant
47
+ raise HTTPException(
48
+ status_code=status.HTTP_401_UNAUTHORIZED,
49
+ detail="invalid API key",
50
+ headers={"WWW-Authenticate": "Bearer"},
51
+ )
52
+
53
+
54
+ # Convenience aliases
55
+ api_key_tenant = require_api_key
api/config.py ADDED
@@ -0,0 +1,63 @@
1
+ """Settings - read once at startup from env vars.
2
+
3
+ Mirrors the env vars already used by gate/webhook.py so the two services
4
+ can share the same .env file during the transition:
5
+ DOKEO_API_KEY single bearer token (dev / single-tenant)
6
+ DOKEO_API_KEYS multi-tenant: key1:tenant1,key2:tenant2
7
+ DOKEO_CORS_ORIGIN allowlist for browser clients (the Next.js app)
8
+ DOKEO_RATE_LIMIT requests per minute per IP (default 120)
9
+ DOKEO_LOG_LEVEL INFO / DEBUG / WARNING
10
+ ANTHROPIC_API_KEY live mode for the builder (read by gate.listicle.llm)
11
+ OPENAI_API_KEY live mode for the builder
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ from pathlib import Path
17
+
18
+ ROOT = Path(__file__).resolve().parent.parent
19
+
20
+
21
+ def _parse_keys(raw: str) -> dict[str, str]:
22
+ out: dict[str, str] = {}
23
+ for pair in raw.split(","):
24
+ pair = pair.strip()
25
+ if not pair:
26
+ continue
27
+ if ":" in pair:
28
+ k, t = pair.split(":", 1)
29
+ out[k.strip()] = t.strip()
30
+ else:
31
+ out[pair] = "tenant-" + pair[:6]
32
+ return out
33
+
34
+
35
+ class Settings:
36
+ VERSION = "3.0.0"
37
+
38
+ # Auth
39
+ api_key: str = os.environ.get("DOKEO_API_KEY", "")
40
+ api_keys: dict[str, str] = {**( {api_key: "default"} if (api_key := os.environ.get("DOKEO_API_KEY", "")) else {}), **_parse_keys(os.environ.get("DOKEO_API_KEYS", ""))}
41
+
42
+ # CORS
43
+ cors_origin: str = os.environ.get("DOKEO_CORS_ORIGIN", "")
44
+
45
+ # Rate limit
46
+ rate_limit: int = int(os.environ.get("DOKEO_RATE_LIMIT", "120"))
47
+ rate_limit_tenant: int = int(os.environ.get("DOKEO_RATE_LIMIT_TENANT", "300"))
48
+ rate_window: int = 60
49
+
50
+ # Paths
51
+ base_dir: Path = ROOT
52
+ corpus_dir: Path = ROOT / "sample_corpus" / "published"
53
+ config_path: Path = ROOT / "config.yaml"
54
+
55
+ # Misc
56
+ log_level: str = os.environ.get("DOKEO_LOG_LEVEL", "INFO")
57
+
58
+ @property
59
+ def auth_enabled(self) -> bool:
60
+ return bool(self.api_keys)
61
+
62
+
63
+ settings = Settings()
api/deps.py ADDED
@@ -0,0 +1,41 @@
1
+ """Shared FastAPI dependencies - singletons for the engine + supporting stores.
2
+
3
+ These wrap the gate/ package's own singletons so the HTTP layer never
4
+ constructs engine objects itself. Keeps a single source of truth for config
5
+ and the corpus index (which is expensive to build).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import yaml
10
+ from functools import lru_cache
11
+
12
+ from fastapi import Request
13
+
14
+ from gate.gate import QualityGate
15
+ from gate.audit_log import AuditLog
16
+ from gate.knowledge_base import KnowledgeBase
17
+
18
+
19
+ @lru_cache(maxsize=1)
20
+ def get_quality_gate() -> QualityGate:
21
+ """The core content-quality engine. One instance per process."""
22
+ from api.config import settings
23
+ with open(settings.config_path) as f:
24
+ cfg = yaml.safe_load(f)
25
+ return QualityGate(str(settings.corpus_dir), cfg,
26
+ custom_rules_path=str(settings.base_dir / "custom_rules.json"))
27
+
28
+
29
+ @lru_cache(maxsize=1)
30
+ def get_audit_log() -> AuditLog:
31
+ return AuditLog()
32
+
33
+
34
+ @lru_cache(maxsize=1)
35
+ def get_knowledge_base() -> KnowledgeBase:
36
+ from api.config import settings
37
+ return KnowledgeBase(str(settings.base_dir / "knowledge_base"))
38
+
39
+
40
+ def get_request_id(request: Request) -> str:
41
+ return request.headers.get("x-request-id") or ""
api/main.py ADDED
@@ -0,0 +1,90 @@
1
+ """Dokeo FastAPI service - the engine's new HTTP surface.
2
+
3
+ Runs alongside Streamlit (port 8501) and the legacy webhook (port 8502).
4
+ This service lives on port 8000 by default and exposes:
5
+
6
+ · Auto-generated OpenAPI docs at /docs and /redoc
7
+ · Same auth + rate-limit + CORS posture as the legacy webhook
8
+ · The full gate engine (8 checks, 17 content types)
9
+ · The listicle builder pipeline (research → generate → QA)
10
+ · Catalog endpoints (content types, engines, formats, categories)
11
+ · Audit log access
12
+
13
+ Run:
14
+ uvicorn api.main:app --reload --port 8000
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ from contextlib import asynccontextmanager
20
+
21
+ from fastapi import FastAPI
22
+ from fastapi.middleware.cors import CORSMiddleware
23
+
24
+ from api.config import settings
25
+ from api.rate_limit import RateLimitMiddleware
26
+ from api.tracing import TraceMiddleware
27
+ from api.routes import admin, audit, batch, builder, catalog, health, knowledge, rules, scan
28
+
29
+ log = logging.getLogger("dokeo-api")
30
+
31
+
32
+ @asynccontextmanager
33
+ async def lifespan(app: FastAPI):
34
+ log.info("dokeo-api v%s - auth=%s, rate_limit=%d/min, cors=%s",
35
+ settings.VERSION,
36
+ "ON" if settings.auth_enabled else "OFF",
37
+ settings.rate_limit,
38
+ settings.cors_origin or "same-origin")
39
+ yield
40
+
41
+
42
+ def create_app() -> FastAPI:
43
+ app = FastAPI(
44
+ title="Dokeo Content Quality API",
45
+ description=(
46
+ "Pre-publish content quality gate + listicle/comparison builder.\n\n"
47
+ "**Engines:** SEO · AEO · GEO across 17 content types.\n\n"
48
+ "**Auth:** Bearer token via `Authorization: Bearer <key>` header. "
49
+ "Set `DOKEO_API_KEY` in env (dev: open when unset).\n\n"
50
+ "**Rate limit:** per-IP token bucket. See `X-RateLimit-*` headers."
51
+ ),
52
+ version=settings.VERSION,
53
+ docs_url="/docs",
54
+ redoc_url="/redoc",
55
+ openapi_url="/openapi.json",
56
+ lifespan=lifespan,
57
+ )
58
+
59
+ # ── Middleware ────────────────────────────────────────────────────
60
+ app.add_middleware(
61
+ CORSMiddleware,
62
+ allow_origins=[settings.cors_origin] if settings.cors_origin else [],
63
+ allow_credentials=True,
64
+ allow_methods=["*"],
65
+ allow_headers=["*"],
66
+ )
67
+ app.add_middleware(TraceMiddleware)
68
+ app.add_middleware(RateLimitMiddleware)
69
+
70
+ # ── Routes ────────────────────────────────────────────────────────
71
+ api_prefix = "/api/v1"
72
+ app.include_router(health.router, prefix=api_prefix)
73
+ app.include_router(catalog.router, prefix=api_prefix)
74
+ app.include_router(scan.router, prefix=api_prefix)
75
+ app.include_router(builder.router, prefix=api_prefix)
76
+ app.include_router(audit.router, prefix=api_prefix)
77
+ app.include_router(batch.router, prefix=api_prefix)
78
+ app.include_router(rules.router, prefix=api_prefix)
79
+ app.include_router(admin.router, prefix=api_prefix)
80
+ app.include_router(knowledge.router, prefix=api_prefix)
81
+
82
+ @app.get("/", include_in_schema=False)
83
+ def root():
84
+ return {"service": "dokeo-api", "version": settings.VERSION,
85
+ "docs": "/docs", "health": f"{api_prefix}/health"}
86
+
87
+ return app
88
+
89
+
90
+ app = create_app()
api/rate_limit.py ADDED
@@ -0,0 +1,115 @@
1
+ """Per-IP + per-tenant token-bucket rate limiter.
2
+
3
+ Two layers:
4
+ 1. Per-IP (60-120 req/min) — same as the old webhook behavior.
5
+ 2. Per-tenant (configurable) — set DOKEO_RATE_LIMIT_TENANT=300 for
6
+ a higher per-tenant ceiling once
7
+ you've authenticated.
8
+
9
+ X-RateLimit-* headers on every response tell clients when they can retry.
10
+ """
11
+ from __future__ import annotations
12
+ import threading
13
+ import time
14
+ from collections import defaultdict
15
+
16
+ from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
17
+ from starlette.requests import Request
18
+ from starlette.responses import Response
19
+
20
+ from api.config import settings
21
+
22
+
23
+ class RateLimitMiddleware(BaseHTTPMiddleware):
24
+ def __init__(self, app, ip_limit: int | None = None, ip_window: int | None = None,
25
+ tenant_limit: int | None = None, tenant_window: int | None = None):
26
+ super().__init__(app)
27
+ self.ip_limit = ip_limit or settings.rate_limit
28
+ self.ip_window = ip_window or settings.rate_window
29
+ self.tenant_limit = tenant_limit or settings.rate_limit_tenant
30
+ self.tenant_window = tenant_window or settings.rate_window
31
+ self._ip_buckets: dict[str, list[float]] = defaultdict(list)
32
+ self._tenant_buckets: dict[str, list[float]] = defaultdict(list)
33
+ self._lock = threading.Lock()
34
+
35
+ def _client_ip(self, request: Request) -> str:
36
+ fwd = request.headers.get("x-forwarded-for", "")
37
+ if fwd:
38
+ return fwd.split(",")[0].strip()
39
+ return request.client.host if request.client else "unknown"
40
+
41
+ def _check(self, key: str, buckets: dict, limit: int, window: int) -> tuple[bool, int, int]:
42
+ now = time.time()
43
+ with self._lock:
44
+ bucket = buckets[key]
45
+ while bucket and bucket[0] < now - window:
46
+ bucket.pop(0)
47
+ if len(bucket) >= limit:
48
+ reset = int(window - (now - bucket[0])) if bucket else window
49
+ return False, 0, max(reset, 1)
50
+ bucket.append(now)
51
+ return True, limit - len(bucket), window
52
+
53
+ def _tenant_key(self, request: Request) -> str | None:
54
+ """Pull tenant from Bearer token (set by the require_api_key dep
55
+ which runs after this middleware, so we re-parse here)."""
56
+ auth = request.headers.get("authorization", "")
57
+ if not auth.startswith("Bearer "):
58
+ return None
59
+ token = auth[7:]
60
+ for key, tenant in settings.api_keys.items():
61
+ # Use hmac.compare_digest via a small inline impl
62
+ import hmac
63
+ if hmac.compare_digest(token, key):
64
+ return tenant
65
+ return None
66
+
67
+ async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
68
+ # 1. Per-IP
69
+ ip = self._client_ip(request)
70
+ ip_ok, ip_remaining, ip_reset = self._check(ip, self._ip_buckets, self.ip_limit, self.ip_window)
71
+ if not ip_ok:
72
+ from fastapi.responses import JSONResponse
73
+ return JSONResponse(
74
+ status_code=429,
75
+ content={"error": "rate limit exceeded (ip)", "limit": self.ip_limit,
76
+ "reset_in_seconds": ip_reset},
77
+ headers={
78
+ "X-RateLimit-Limit": str(self.ip_limit),
79
+ "X-RateLimit-Remaining": "0",
80
+ "X-RateLimit-Reset": str(ip_reset),
81
+ "Retry-After": str(ip_reset),
82
+ },
83
+ )
84
+
85
+ # 2. Per-tenant (only if we can identify one)
86
+ tenant = self._tenant_key(request)
87
+ tenant_remaining = None
88
+ tenant_limit = None
89
+ if tenant and self.tenant_limit:
90
+ t_ok, t_remaining, t_reset = self._check(tenant, self._tenant_buckets,
91
+ self.tenant_limit, self.tenant_window)
92
+ if not t_ok:
93
+ from fastapi.responses import JSONResponse
94
+ return JSONResponse(
95
+ status_code=429,
96
+ content={"error": "rate limit exceeded (tenant)", "tenant": tenant,
97
+ "limit": self.tenant_limit, "reset_in_seconds": t_reset},
98
+ headers={
99
+ "X-RateLimit-Tenant-Limit": str(self.tenant_limit),
100
+ "X-RateLimit-Tenant-Remaining": "0",
101
+ "X-RateLimit-Tenant-Reset": str(t_reset),
102
+ "Retry-After": str(t_reset),
103
+ },
104
+ )
105
+ tenant_remaining = t_remaining
106
+ tenant_limit = self.tenant_limit
107
+
108
+ response = await call_next(request)
109
+ response.headers["X-RateLimit-Limit"] = str(self.ip_limit)
110
+ response.headers["X-RateLimit-Remaining"] = str(ip_remaining)
111
+ response.headers["X-RateLimit-Reset"] = str(ip_reset)
112
+ if tenant_limit is not None:
113
+ response.headers["X-RateLimit-Tenant-Limit"] = str(tenant_limit)
114
+ response.headers["X-RateLimit-Tenant-Remaining"] = str(tenant_remaining or 0)
115
+ return response
api/tracing.py ADDED
@@ -0,0 +1,74 @@
1
+ """Lightweight distributed tracing.
2
+
3
+ Adds a request-scoped trace_id (8 bytes hex) and span_id (4 bytes hex)
4
+ to every request. Exposed as response headers (X-Trace-Id, X-Span-Id)
5
+ and threaded through the audit log so every scan is correlated.
6
+
7
+ For real OTel export, swap _start_span() for opentelemetry.trace
8
+ calls and add an exporter. The audit-log enrichment stays.
9
+ """
10
+ from __future__ import annotations
11
+ import os
12
+ import secrets
13
+ import time
14
+ import uuid
15
+ from contextvars import ContextVar
16
+
17
+ from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
18
+ from starlette.requests import Request
19
+ from starlette.responses import Response
20
+
21
+
22
+ _current_trace: ContextVar = ContextVar("trace_id", default="")
23
+ _current_span: ContextVar = ContextVar("span_id", default="")
24
+ _span_start: ContextVar = ContextVar("span_start", default=0.0)
25
+
26
+
27
+ def current_trace_id() -> str:
28
+ return _current_trace.get()
29
+
30
+
31
+ def current_span_id() -> str:
32
+ return _current_span.get()
33
+
34
+
35
+ def _start_span() -> tuple[str, str, float]:
36
+ trace = _current_trace.get() or uuid.uuid4().hex[:16]
37
+ span = secrets.token_hex(4)
38
+ return trace, span, time.time()
39
+
40
+
41
+ class TraceMiddleware(BaseHTTPMiddleware):
42
+ """Assigns a trace_id+span_id to every request. Logs duration."""
43
+
44
+ async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
45
+ # Inherit trace from upstream header if present (so a load balancer
46
+ # or proxy can stitch traces together).
47
+ incoming_trace = request.headers.get("x-trace-id")
48
+ if incoming_trace:
49
+ tok_t = _current_trace.set(incoming_trace)
50
+ else:
51
+ tok_t = _current_trace.set(uuid.uuid4().hex[:16])
52
+ trace_id = _current_trace.get()
53
+ span_id = secrets.token_hex(4)
54
+ tok_s = _current_span.set(span_id)
55
+ start = time.time()
56
+ tok_start = _span_start.set(start)
57
+ response = await call_next(request)
58
+ duration_ms = round((time.time() - start) * 1000, 1)
59
+ _current_span.reset(tok_s)
60
+ _current_trace.reset(tok_t)
61
+ _span_start.reset(tok_start)
62
+ response.headers["X-Trace-Id"] = trace_id
63
+ response.headers["X-Span-Id"] = span_id
64
+ response.headers["X-Trace-Duration-Ms"] = str(duration_ms)
65
+ response.headers["X-Trace-Duration-Ms"] = str(duration_ms)
66
+ # Structured log line
67
+ import sys
68
+ print(
69
+ f'{{"event":"http.trace","service":"dokeo-api","trace_id":"{trace_id}",'
70
+ f'"span_id":"{span_id}","method":"{request.method}","path":"{request.url.path}",'
71
+ f'"status":{response.status_code},"duration_ms":{duration_ms}}}',
72
+ file=sys.stderr,
73
+ )
74
+ return response
api/user.py ADDED
@@ -0,0 +1,49 @@
1
+ """Clerk user identity - extracted from headers injected by the Next.js proxy.
2
+
3
+ The Next.js route handler reads the Clerk session server-side and forwards
4
+ identity as headers. This module makes them available to FastAPI handlers
5
+ as a dependency:
6
+
7
+ from api.user import current_user
8
+ def scan(user: User = Depends(current_user)): ...
9
+
10
+ In dev (no headers), falls back to an anonymous user so the API still works
11
+ when called directly without going through Next.js.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Optional
17
+
18
+ from fastapi import Depends, Header
19
+
20
+
21
+ @dataclass
22
+ class User:
23
+ """The caller's identity. `id` is the Clerk user ID, or 'anonymous' in dev."""
24
+ id: str = "anonymous"
25
+ email: str = ""
26
+ name: str = ""
27
+
28
+ @property
29
+ def is_anonymous(self) -> bool:
30
+ return self.id == "anonymous"
31
+
32
+ @property
33
+ def display_name(self) -> str:
34
+ return self.name or self.email or self.id
35
+
36
+
37
+ def current_user(
38
+ x_dokeo_user_id: Optional[str] = Header(None, alias="X-Dokeo-User-Id"),
39
+ x_dokeo_user_email: Optional[str] = Header(None, alias="X-Dokeo-User-Email"),
40
+ x_dokeo_user_name: Optional[str] = Header(None, alias="X-Dokeo-User-Name"),
41
+ ) -> User:
42
+ """FastAPI dependency: the Clerk user making this request."""
43
+ if not x_dokeo_user_id:
44
+ return User()
45
+ return User(
46
+ id=x_dokeo_user_id,
47
+ email=x_dokeo_user_email or "",
48
+ name=x_dokeo_user_name or "",
49
+ )
@@ -0,0 +1,112 @@
1
+ Metadata-Version: 2.4
2
+ Name: dokeo
3
+ Version: 3.0.0
4
+ Summary: Pre-publish content quality gate (SEO, AEO, GEO) - API, CLI, MCP
5
+ Requires-Python: >=3.9
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: streamlit>=1.32.0
8
+ Requires-Dist: pyyaml>=6.0
9
+ Requires-Dist: scikit-learn>=1.3.0
10
+ Requires-Dist: nltk>=3.8
11
+ Requires-Dist: textstat>=0.7
12
+ Requires-Dist: numpy>=1.24
13
+ Requires-Dist: trafilatura>=2.0.0
14
+ Requires-Dist: feedparser>=6.0.0
15
+ Requires-Dist: requests>=2.28.0
16
+ Requires-Dist: lxml[html-clean]>=5.0.0
17
+ Requires-Dist: anthropic>=0.40
18
+ Requires-Dist: openai>=1.50
19
+ Requires-Dist: pydantic>=2.0
20
+ Requires-Dist: python-dotenv>=1.0
21
+ Requires-Dist: fastapi>=0.115
22
+ Requires-Dist: uvicorn[standard]>=0.30
23
+ Requires-Dist: httpx>=0.27
24
+ Requires-Dist: eval_type_backport>=0.2; python_version < "3.10"
25
+ Provides-Extra: mcp
26
+ Requires-Dist: mcp>=1.0; extra == "mcp"
27
+
28
+ # Dokeo — Content Quality Gate
29
+
30
+ Pre-publish content quality gate for SEO, AEO, and GEO. Scores every post,
31
+ email, video script, and AI output against 8 core checks before it ships.
32
+
33
+ ## Surfaces
34
+
35
+ Dokeo ships as four parallel surfaces that all hit the same gate engine:
36
+
37
+ | Surface | Port / Transport | Best for |
38
+ |---|---|---|
39
+ | **FastAPI service** | `http://host:8000/api/v1/*` | Programmatic integrations, webhooks |
40
+ | **Next.js web app** | `https://host/web/*` | End-user UI with Clerk auth |
41
+ | **Streamlit UI** | `https://host/` | Legacy dashboard, single-user password |
42
+ | **MCP server** | stdio | Claude / Cursor / any MCP client |
43
+ | **CLI** | `dokeo` in your shell | Pipelines, n8n, Claude Code |
44
+
45
+ The legacy webhook (port 8502, `/check`, `/check-url`, etc.) is kept for
46
+ backward compatibility with existing n8n / Zapier / Make integrations.
47
+
48
+ ## Quickstart
49
+
50
+ ```bash
51
+ # 1. Install
52
+ pip install -e .
53
+
54
+ # 2. Run the API
55
+ uvicorn api.main:app --reload --port 8000
56
+ # → http://localhost:8000/docs for OpenAPI
57
+
58
+ # 3. Run the web app (separate terminal)
59
+ cd web && bun install && bun run dev
60
+
61
+ # 4. Run the Streamlit UI (separate terminal)
62
+ streamlit run app.py
63
+
64
+ # 5. Try the CLI
65
+ echo "# My post\n\n## Question?\n\n..." | dokeo pipe
66
+
67
+ # 6. Try the MCP server (Claude Desktop / Cursor will spawn it for you)
68
+ dokeo-mcp-server
69
+ ```
70
+
71
+ ## Self-hosted with Docker
72
+
73
+ ```bash
74
+ cp .env.example .env
75
+ # Edit .env - set DOKEO_API_KEY, DOKEO_PASSWORD, Clerk keys
76
+ docker compose up -d
77
+ ```
78
+
79
+ Routes (via Caddy at ports 80/443):
80
+
81
+ - `/` → Streamlit (port 8501)
82
+ - `/web/*`, `/sign-in`, `/sign-up` → Next.js (port 3000)
83
+ - `/api/v1/*` → FastAPI (port 8000)
84
+ - `/check*`, `/check-url*`, `/check-blog*`, `/health*`, etc. → legacy webhook (port 8502)
85
+
86
+ ## Auth model
87
+
88
+ | Surface | Auth |
89
+ |---|---|
90
+ | FastAPI (`/api/v1/*`) | Bearer token: `DOKEO_API_KEY` or `DOKEO_API_KEYS=key:tenant,...` |
91
+ | Legacy webhook | Same Bearer token |
92
+ | Streamlit | Cookie session via `DOKEO_PASSWORD` |
93
+ | Next.js | Clerk (hosted) |
94
+ | MCP | Filesystem only; no network exposure |
95
+ | CLI | None (local) |
96
+
97
+ `DOKEO_API_KEY` is required in any non-dev environment. Streamlit refuses
98
+ to boot in production without `DOKEO_PASSWORD`.
99
+
100
+ ## Documentation
101
+
102
+ - API: OpenAPI at `/docs` or `/redoc` when running
103
+ - MCP: [docs/mcp.md](docs/mcp.md)
104
+ - Engine config: `config.yaml`
105
+ - Content-type definitions: `gate/content_types.py`
106
+
107
+ ## Tests
108
+
109
+ ```bash
110
+ pytest # all tests
111
+ pytest tests/test_mcp_server.py # MCP only
112
+ ```
@@ -0,0 +1,60 @@
1
+ dokeo_cli.py,sha256=iwZfFzHwsUh8Rkk6y2hOheVe4Q411CjgCM7vS2y07sI,10951
2
+ api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ api/auth.py,sha256=YxpdJwBZGSB3wYlQVPHUC1PqCr3dA0NkGEt1jLHwNzg,1673
4
+ api/config.py,sha256=Dl48g8ls59cJmlIuRlUaQNv_5jqZTBWsI4qYjG1TCGE,1979
5
+ api/deps.py,sha256=g2oS_iugY2BWSqiowtFKqwr1dJsfkuFHOGR6OralPSo,1242
6
+ api/main.py,sha256=qcp09ZTimHc86EW-n7FCDNvLGQUs-zCNoVQxnZGdjU4,3424
7
+ api/rate_limit.py,sha256=zYAYBGVvWPykiv1HGkPlf86us8HhCv907TwBVy6cocE,5162
8
+ api/tracing.py,sha256=lRdsHYhjVl_KuQ92LwxIeR3LZQO91rQbDsLIQH-RTmE,2721
9
+ api/user.py,sha256=ItoRWovAV9lPfFm3hBmztLN_QGbWj0MTgp-pfYtYqyU,1483
10
+ gate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ gate/audit_log.py,sha256=ms96nbUlQ4_4_UfasDp_GktNEMcPkEQnP3TLk08Eu04,7936
12
+ gate/auth.py,sha256=kTqcsaJfsfIF9Osy0dEhXaivsPouVK5-tRG9X3F1gQE,4924
13
+ gate/batch.py,sha256=vX-hrEozbFVfb6IG0kwejLpEycEuOojBglZtxwv3sxQ,8059
14
+ gate/catalog.py,sha256=UsSmb8svggFHPqRcFhl88CQk4tks06sHkxTyNKffvew,4934
15
+ gate/channels.py,sha256=DBTgdRkm6ayyxPrep0V0O1ZNYuMO8JWXnp4lTn_fkqo,15738
16
+ gate/competitor.py,sha256=cKD4m4PrvDkhXBuPwKFD3KnLEL1-mpqSGFNdFAcbV5Q,9149
17
+ gate/content_type_detect.py,sha256=OerByyOisx_oO2yKMk8VxbL55Rta8e6lEpOLD9s7Gw4,3185
18
+ gate/content_types.py,sha256=oBMK7Bh4e081IUkbeiQsL0-uUk1zGOKDUx5VAOD4EE4,63618
19
+ gate/corpus.py,sha256=VqbdkY5mH6VESGHqA9Pcm55iwsghl-KooLRoKLjiTgY,1317
20
+ gate/custom_rules.py,sha256=fZO6UPvsqdYrDHP4AwvFOJdRYEp8yaQFVPMNzyrswNc,8344
21
+ gate/diff_view.py,sha256=zVR2lCdrRE3qQte7c2hbepGxQ88iWFR5_zPLFiGeWD8,7990
22
+ gate/email_report.py,sha256=S5WJaVvkyuvXYFEmnBiOWl8bATur2L2Kybk015yW0ys,7361
23
+ gate/embeddings.py,sha256=944qM1rHnXeM2bmCJeJ9kXirDz-MDKOAwmm-re_5nRE,1291
24
+ gate/fetcher.py,sha256=I3zqNVNcI7z7KqVBOBbx-FOeoQoN9UeQal2_W495pCM,5512
25
+ gate/gate.py,sha256=dbBPwdkD-JJ58Apec3cfw8yFsz6p6lPHu3_a9H9yws4,5005
26
+ gate/knowledge_base.py,sha256=iQ1uPCgZjBBikrWQ5Zl_XnNfb2Kj3wRyIqIorX-jFt4,7244
27
+ gate/listicle_ui.py,sha256=T2Ys9eP25Cz8_Yyb_I8byiiCgTb6xDyp55M_TOeQ_Hs,18755
28
+ gate/logging_utils.py,sha256=rpC_LhSLsLhrL9FhdI2k2TxqNtG73i70jn_7M5g5NhA,2950
29
+ gate/mcp_server.py,sha256=lPC5xzNUxaNpHP8pS56dEpf96CchZjjFcp-IprQonlI,17057
30
+ gate/metrics.py,sha256=o34YsaM5m-_CHxV5IZAi-mnm5ENuJHPPQ2ayth_4km8,3632
31
+ gate/notifier.py,sha256=J27qMud6cHJcYRDPcSBnqso-4IvnLczSgXlMvpvDVV0,6377
32
+ gate/report_generator.py,sha256=V2Vo6sPIlTWhpBfQf-PAK1My1NXFmkq9wKJyewHbDRw,11111
33
+ gate/scheduler.py,sha256=9s_tlnP7-2Csb4GTiaSUKSFPmC2jFpVvB5vVpWNdmtU,5563
34
+ gate/suggest.py,sha256=-BPaXyp-_AufZDxIrUPDhaMxaU5ISaIjORqAbICrRhw,10863
35
+ gate/trends.py,sha256=SS4s9y9LgqqZq6GHioECF8nuiRg1Lc7CAvJUWo8jBeA,4698
36
+ gate/voice.py,sha256=k4Ls9NpyyyBa43kFkVHD8KPnxQ9gqQxdwx_sVywmpZI,4510
37
+ gate/webhook.py,sha256=09W_iEi2NSQvOyGxquJ79_ArvTe8-0XZ9-alV52-a7E,21512
38
+ gate/checks/__init__.py,sha256=KEP-bRTi8DSYp6PpzxY4hQVpamX-FL6XHXidCcrrIeI,413
39
+ gate/checks/aeo.py,sha256=aAObDa8yljxRraBFXOkjTVgdTibc5RO_idaIQRb36qY,2196
40
+ gate/checks/cannibalisation.py,sha256=mQwOyRwE2yO4jIys51vUGS9mhhuALVX3KOAbqhdb1kg,1508
41
+ gate/checks/claims.py,sha256=wLGF_v72_dEp1z9J8xzvdfM2yV4tA_qhvQMZJq0nCyY,1901
42
+ gate/checks/confidentiality.py,sha256=bi9b7TJQrU6sZgXNYBVKSZi24Cv5FiJXuEhwvPt2-IA,1037
43
+ gate/checks/geo.py,sha256=bgmrO3wqO_tbQZUzHmg8azTnwGpcHJmtt8eqVM-exqc,2405
44
+ gate/checks/near_duplicate.py,sha256=AKzExEvId-bHUKuMH8_cKCgO58Fu1GRr2GhMpaKxZ3A,1421
45
+ gate/checks/readability.py,sha256=_ePrftn1Vn0UT0RMTUhX8pNyQP5r2UF1jZVXdgMgiGc,847
46
+ gate/checks/structure.py,sha256=6_AYTgZ7MRbjCY0OrDvI9ZRjlXJNfdakZeR0OHIZrzM,1991
47
+ gate/listicle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
+ gate/listicle/assemble.py,sha256=U6MBNmmja545JKO6KKPJ26MZfr7WM0yuLdCS7-LGins,4051
49
+ gate/listicle/formats.py,sha256=dySnQ1Z32okTj6P_Oa1USlCYyTcP6muiZ7D_vzdS-oM,11061
50
+ gate/listicle/generate.py,sha256=uTWXmoiALWmwXdneO_vLpDQnubqr6ht6ieQmBAG8HZ4,2137
51
+ gate/listicle/llm.py,sha256=UaNMizX634D7dt0kDIv5bfYqjErK22dcwYu2hlh7xFc,18973
52
+ gate/listicle/qa.py,sha256=dqUwdsWH29HrZV3mKi0XnBjyaerKUDChAYLQ0_TyuQI,9626
53
+ gate/listicle/research.py,sha256=M0_0F3vnYWfB-hnPszE6BgDh64FqQx4cQJ80nPLPijM,2278
54
+ gate/listicle/run.py,sha256=QFiXyJoUPfJAuiBpnya9MJd7QDqWP6DznQ8t9tApDeo,10806
55
+ gate/listicle/schema.py,sha256=pfXP40lREn6Chv-LxREECjwWpM8Dz5GX49Ao0J27-0o,2742
56
+ dokeo-3.0.0.dist-info/METADATA,sha256=8FLhAISkkOyD66ylq41Sn770cp9DF8HP01v286Nxn5o,3307
57
+ dokeo-3.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
58
+ dokeo-3.0.0.dist-info/entry_points.txt,sha256=DUnZjVJrhAV6yBwMwZPVHcxl9DuIH6HAo7nDyuSW0Ek,81
59
+ dokeo-3.0.0.dist-info/top_level.txt,sha256=LG941KagTGke_Nhvv63-oP1jeoTIzpbZwge8ZVLbleU,19
60
+ dokeo-3.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ dokeo = dokeo_cli:main
3
+ dokeo-mcp-server = gate.mcp_server:main
@@ -0,0 +1,3 @@
1
+ api
2
+ dokeo_cli
3
+ gate