npm - arkaos - Versions diffs - 3.78.0 → 4.0.1 - Mend

arkaos 3.78.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/README.md +42 -30
package/VERSION +1 -1
package/arka/SKILL.md +2 -2
package/config/agent-allowlists/laravel.yaml +1 -0
package/config/agent-allowlists/node.yaml +1 -0
package/config/agent-allowlists/nuxt.yaml +1 -0
package/config/agent-allowlists/python.yaml +1 -0
package/core/agents/__pycache__/registry_gen.cpython-313.pyc +0 -0
package/core/agents/__pycache__/schema.cpython-313.pyc +0 -0
package/core/agents/registry_gen.py +6 -1
package/core/agents/schema.py +4 -0
package/core/cognition/__pycache__/reorganizer.cpython-313.pyc +0 -0
package/core/cognition/reorganizer.py +37 -7
package/core/governance/__pycache__/design_system_lint.cpython-313.pyc +0 -0
package/core/governance/__pycache__/design_system_lint_cli.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/agent_match.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/sources.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
package/core/knowledge/agent_match.py +114 -0
package/core/knowledge/chunker.py +45 -0
package/core/knowledge/ingest.py +156 -78
package/core/knowledge/sources.py +138 -0
package/core/knowledge/vector_store.py +52 -0
package/core/squads/__pycache__/loader.cpython-313.pyc +0 -0
package/core/squads/loader.py +25 -0
package/core/sync/__pycache__/agent_provisioner.cpython-313.pyc +0 -0
package/core/sync/agent_provisioner.py +19 -8
package/dashboard/app/components/KnowledgeSourcesList.vue +40 -13
package/dashboard/app/pages/cognition.vue +9 -4
package/dashboard/app/pages/knowledge/[id].vue +669 -0
package/dashboard/app/pages/knowledge/index.vue +1281 -0
package/dashboard/app/types/index.d.ts +1 -1
package/departments/brand/agents/ux-designer.yaml +15 -1
package/departments/brand/agents/ux-researcher.yaml +73 -0
package/departments/brand/agents/ux-strategist.yaml +72 -0
package/departments/dev/agents/ai-engineering/ai-engineering-lead.yaml +76 -0
package/departments/dev/agents/architect.yaml +9 -3
package/departments/dev/agents/backend-core/laravel-eng.yaml +76 -0
package/departments/dev/agents/backend-core/node-ts-eng.yaml +76 -0
package/departments/dev/agents/backend-core/python-eng.yaml +76 -0
package/departments/dev/agents/backend-dev.yaml +10 -4
package/departments/dev/agents/data-platform/etl-eng.yaml +74 -0
package/departments/dev/agents/dba.yaml +7 -3
package/departments/dev/references/backend-knowledge-and-tools.md +70 -0
package/departments/ecom/agents/retention-manager.yaml +13 -1
package/departments/leadership/agents/culture-coach.yaml +20 -0
package/departments/leadership/agents/hr-specialist.yaml +18 -0
package/departments/leadership/agents/leadership-director.yaml +10 -0
package/departments/org/agents/chief-of-staff.yaml +76 -0
package/departments/org/agents/coo.yaml +11 -0
package/departments/org/agents/okr-steward.yaml +71 -0
package/departments/org/agents/org-designer.yaml +23 -0
package/departments/org/skills/okr-cadence/SKILL.md +34 -0
package/departments/org/skills/principles-audit/SKILL.md +36 -0
package/departments/pm/agents/pm-director.yaml +21 -8
package/departments/pm/agents/product-owner.yaml +24 -2
package/departments/pm/agents/scrum-master.yaml +21 -0
package/departments/pm/agents/strategic-pm.yaml +72 -0
package/departments/pm/skills/discovery-plan/SKILL.md +7 -1
package/departments/quality/agents/cqo.yaml +8 -0
package/departments/saas/agents/cs-manager.yaml +19 -2
package/departments/saas/agents/growth-engineer.yaml +14 -1
package/departments/saas/agents/metrics-analyst.yaml +17 -1
package/departments/saas/agents/revops-lead.yaml +73 -0
package/departments/saas/skills/leaky-bucket/SKILL.md +28 -0
package/departments/saas/skills/voc-loop/SKILL.md +29 -0
package/departments/sales/agents/sales-director.yaml +9 -0
package/departments/sales/agents/sdr.yaml +72 -0
package/departments/strategy/agents/decision-quality.yaml +72 -0
package/departments/strategy/agents/strategy-director.yaml +13 -0
package/departments/strategy/skills/premortem/SKILL.md +33 -0
package/knowledge/agents-registry-v2.json +1218 -78
package/package.json +1 -1
package/pyproject.toml +1 -1
package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
package/scripts/bench/__init__.py +5 -0
package/scripts/bench/__pycache__/__init__.cpython-313.pyc +0 -0
package/scripts/bench/__pycache__/harness.cpython-313.pyc +0 -0
package/scripts/bench/__pycache__/run.cpython-313.pyc +0 -0
package/scripts/bench/harness.py +138 -0
package/scripts/bench/run.py +136 -0
package/scripts/dashboard-api.py +376 -13
package/scripts/tools/__pycache__/docs_stats.cpython-313.pyc +0 -0
package/scripts/tools/docs_stats.py +154 -0
package/dashboard/app/pages/knowledge.vue +0 -918

package/scripts/dashboard-api.py CHANGED Viewed

@@ -22,6 +22,7 @@ sys.path.insert(0, str(ARKAOS_ROOT))
 from fastapi import FastAPI, Query, Request, WebSocket, WebSocketDisconnect
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse, JSONResponse
 app = FastAPI(title="ArkaOS Dashboard API", version="2.2.0")
@@ -66,7 +67,7 @@ async def ws_tasks(websocket: WebSocket):
 app.add_middleware(
     CORSMiddleware,
-    allow_origin_regex=r"http://localhost:\d+",
+    allow_origin_regex=r"^(http://localhost:\d+|chrome-extension://[a-p0-9]{32})$",
     allow_methods=["GET", "POST", "PUT", "DELETE"],
     allow_headers=["*"],
 )
@@ -130,6 +131,23 @@ def _get_vector_store():
     return None
+_source_registry_cache = None
+def _get_source_registry():
+    """Lazy singleton SourceRegistry over the shared knowledge.db."""
+    global _source_registry_cache
+    if _source_registry_cache is None:
+        try:
+            from core.knowledge.sources import SourceRegistry
+            db_path = Path.home() / ".arkaos" / "knowledge.db"
+            db_path.parent.mkdir(parents=True, exist_ok=True)
+            _source_registry_cache = SourceRegistry(db_path)
+        except Exception:
+            return None
+    return _source_registry_cache
 # --- Endpoints ---
 @app.get("/api/overview")
@@ -971,8 +989,10 @@ async def knowledge_upload_file(file: UploadFile):
     media_dir = Path.home() / ".arkaos" / "media"
     media_dir.mkdir(parents=True, exist_ok=True)
-    # Save uploaded file
-    file_path = media_dir / file.filename
+    # Save uploaded file — sanitize filename to block path traversal
+    file_path = _safe_upload_path(media_dir, file.filename)
+    if file_path is None:
+        return {"error": "invalid filename"}
     content = await file.read()
     file_path.write_bytes(content)
@@ -995,13 +1015,14 @@ async def knowledge_upload_file(file: UploadFile):
         from core.jobs.manager import JobManager as _JM
         from core.knowledge.ingest import IngestEngine
         local_mgr = _JM()
-        engine = IngestEngine(store)
         def on_progress(pct, msg):
             status = "embedding" if "embed" in msg.lower() or "index" in msg.lower() else "processing"
             local_mgr.update_progress(job_id, pct, msg, status)
             broadcast_from_thread({"type": "job_progress", "job_id": job_id, "progress": pct, "message": msg, "status": status})
         try:
             local_mgr.start(job_id)
+            reg = _get_source_registry()
+            engine = IngestEngine(store, registry=reg)
             result = engine.ingest(source, source_type, on_progress=on_progress)
             if result.success:
                 local_mgr.complete(job_id, chunks_created=result.chunks_created)
@@ -1064,7 +1085,6 @@ def knowledge_ingest(body: dict):
         from core.jobs.manager import JobManager as _JM
         local_mgr = _JM()
-        engine = IngestEngine(store)
         def on_progress(pct, msg):
             status = "processing"
             if "phase 2" in msg.lower() or "download" in msg.lower():
@@ -1086,6 +1106,8 @@ def knowledge_ingest(body: dict):
         try:
             local_mgr.start(job_id)
             broadcast_from_thread({"type": "job_progress", "job_id": job_id, "progress": 0, "message": "Starting...", "status": "processing"})
+            reg = _get_source_registry()
+            engine = IngestEngine(store, registry=reg)
             result = engine.ingest(source, source_type, on_progress=on_progress)
             if result.success:
                 local_mgr.complete(job_id, chunks_created=result.chunks_created)
@@ -1201,20 +1223,69 @@ def knowledge_search(q: str = Query(...), top_k: int = Query(5)):
     return {"results": results, "query": q, "total": len(results)}
+def _merge_source_rows(
+    store_rows: list[dict], registry_rows: list[dict]
+) -> list[dict]:
+    """Union chunk-based + registry rows keyed by source string.
+    Every emitted row keeps the legacy ``source``/``chunks`` keys and adds
+    ``id`` (always linkable), ``title``, ``type``, ``has_media``,
+    ``duration`` and ``status``. A registry source with 0 chunks still
+    appears. Sorted by chunks desc, then source asc.
+    """
+    from core.knowledge.sources import source_id
+    by_source: dict[str, dict] = {}
+    for r in store_rows:
+        src = r.get("source", "")
+        by_source[src] = {"source": src, "chunks": int(r.get("chunks", 0) or 0)}
+    for reg in registry_rows:
+        src = reg.get("source", "")
+        row = by_source.setdefault(src, {"source": src, "chunks": 0})
+        row.update(_registry_fields(reg))
+    for src, row in by_source.items():
+        row.setdefault("id", source_id(src))
+        for key, default in (("title", ""), ("type", ""), ("has_media", False),
+                             ("duration", 0), ("status", "")):
+            row.setdefault(key, default)
+    return sorted(by_source.values(), key=lambda r: (-r["chunks"], r["source"]))
+def _registry_fields(reg: dict) -> dict:
+    """Project a registry row onto the list-row metadata keys."""
+    from core.knowledge.sources import source_id
+    return {
+        "id": reg.get("id") or source_id(reg.get("source", "")),
+        "title": reg.get("title", "") or "",
+        "type": reg.get("type", "") or "",
+        "has_media": bool(reg.get("media_path")),
+        "duration": reg.get("duration", 0) or 0,
+        "status": reg.get("status", "") or "",
+    }
 @app.get("/api/knowledge/sources")
 def knowledge_list_sources():
-    """PR88c v3.25.0 — list every distinct source + chunk count.
+    """List every distinct source merged from vector store + registry.
-    Returns ``{sources: [{source, chunks}], total: N}``. Sorted
-    descending by chunk count.
+    Returns ``{sources: [...], total: N}``. Each row keeps the legacy
+    ``source``/``chunks`` keys and adds ``id``/``title``/``type``/
+    ``has_media``/``duration``/``status`` so the frontend can link each
+    row to ``/knowledge/{id}``. Registry sources with 0 chunks appear too.
     """
     store = _get_vector_store()
-    if not store:
+    registry = _get_source_registry()
+    store_rows: list[dict] = []
+    if store:
+        try:
+            store_rows = store.list_sources()
+        except Exception as exc:  # noqa: BLE001
+            return {"sources": [], "total": 0, "error": str(exc)}
+    registry_rows = registry.list() if registry else []
+    if not store and not registry:
         return {"sources": [], "total": 0, "error": "vector store unavailable"}
-    try:
-        rows = store.list_sources()
-    except Exception as exc:  # noqa: BLE001
-        return {"sources": [], "total": 0, "error": str(exc)}
+    rows = _merge_source_rows(store_rows, registry_rows)
     return {"sources": rows, "total": len(rows)}
@@ -1244,6 +1315,298 @@ def knowledge_delete_source(source: str = Query(...)):
     return {"deleted": int(deleted), "source": clean}
+def _source_str_for_id(source_id_: str) -> Optional[str]:
+    """Reverse-resolve the raw source string whose id matches ``source_id_``.
+    Cold path, O(n) over the vector store's distinct sources. Returns None
+    when no chunk source matches (or the store is unavailable). Shared by
+    ``_detail_from_store`` and ``_resolve_transcript`` so the reverse-lookup
+    lives in exactly one place.
+    """
+    from core.knowledge.sources import source_id
+    store = _get_vector_store()
+    if store is None:
+        return None
+    return next(
+        (s for s in store.distinct_sources() if source_id(s) == source_id_),
+        None,
+    )
+def _resolve_transcript(source_id_: str) -> Optional[str]:
+    """Best-available transcript text for a source id, or None.
+    Resolution order:
+      1. Registry row with a non-empty stored ``transcript`` -> that text.
+      2. Else reconstruct from the vector store's chunks (legacy sources).
+      3. Else None (no registry row and no chunk source matched the id).
+    """
+    registry = _get_source_registry()
+    row = registry.get(source_id_) if registry else None
+    if row is not None and (row.get("transcript") or "").strip():
+        return row["transcript"]
+    match = _source_str_for_id(source_id_)
+    if match is None:
+        return None
+    store = _get_vector_store()
+    return store.transcript_for_source(match) if store else None
+def _detail_from_store(source_id_: str) -> Optional[dict]:
+    """Build a minimal detail dict for a chunks-only (pre-registry) source.
+    Reverse-looks-up the raw source string whose ``source_id`` matches the
+    requested id, then returns a dict in the same shape the frontend
+    expects — including a transcript reconstructed from the chunks. Returns
+    None when no chunk source matches the id.
+    """
+    from core.knowledge.ingest import detect_source_type
+    match = _source_str_for_id(source_id_)
+    if match is None:
+        return None
+    store = _get_vector_store()
+    chunks = store.chunks_for_source(match) if store else []
+    transcript = store.transcript_for_source(match) if store else ""
+    return {
+        "id": source_id_, "source": match,
+        "type": detect_source_type(match), "title": "", "duration": 0,
+        "language": "", "thumbnail_path": "", "media_path": "",
+        "transcript": transcript, "transcript_reconstructed": bool(transcript),
+        "chunk_count": len(chunks), "status": "indexed",
+        "error": "", "created_at": "", "updated_at": "", "chunks": chunks,
+    }
+@app.get("/api/knowledge/sources/{source_id}")
+def knowledge_source_detail(source_id: str):
+    """Return a single source's metadata plus its indexed chunks.
+    Registry row wins (enriched with chunks). When no registry row exists,
+    falls back to the vector store so pre-registry / chunks-only sources
+    still resolve instead of 404ing the list link.
+    """
+    registry = _get_source_registry()
+    row = registry.get(source_id) if registry else None
+    if row is not None:
+        row = dict(row)
+        store = _get_vector_store()
+        row["chunks"] = store.chunks_for_source(row["source"]) if store else []
+        stored = (row.get("transcript") or "").strip()
+        if not stored:
+            row["transcript"] = (
+                store.transcript_for_source(row["source"]) if store else ""
+            )
+            row["transcript_reconstructed"] = bool(row["transcript"])
+        else:
+            row["transcript_reconstructed"] = False
+        return row
+    fallback = _detail_from_store(source_id)
+    if fallback is not None:
+        return fallback
+    return JSONResponse({"error": "not found"}, status_code=404)
+@app.get("/api/knowledge/sources/{source_id}/transcript")
+def knowledge_source_transcript(source_id: str):
+    """Return the full transcript text for a source.
+    A stored registry transcript wins. Otherwise the transcript is
+    reconstructed by joining the source's indexed chunks (legacy sources
+    ingested before the registry have chunks but no stored transcript). The
+    response carries ``reconstructed`` so the frontend can badge it.
+    404 only when the id matches nothing at all (no registry row and no
+    chunk source). A known source with genuinely zero chunks returns an
+    empty transcript (200) so the page shows "No transcript available."
+    """
+    registry = _get_source_registry()
+    row = registry.get(source_id) if registry else None
+    stored = (row.get("transcript") or "").strip() if row else ""
+    if stored:
+        return {"transcript": row["transcript"], "reconstructed": False}
+    match = _source_str_for_id(source_id)
+    if row is None and match is None:
+        return JSONResponse({"error": "not found"}, status_code=404)
+    text = _resolve_transcript(source_id) or ""
+    return {"transcript": text, "reconstructed": bool(text)}
+_AGENT_MATCH_TEXT_CAP = 4000  # representative sample for embedding; not full text
+def _source_knowledge_text(source_id_: str) -> str:
+    """Best-available knowledge text for a source: title + transcript sample.
+    Prepends the registry title (when present) to a capped sample of the
+    transcript. Falls back to joining the first few chunks when no
+    transcript resolves. Returns "" when the source has no text at all.
+    Read-only — never writes.
+    """
+    registry = _get_source_registry()
+    row = registry.get(source_id_) if registry else None
+    title = str((row or {}).get("title") or "").strip()
+    body = (_resolve_transcript(source_id_) or "").strip()
+    if not body:
+        match = _source_str_for_id(source_id_)
+        store = _get_vector_store()
+        if match and store:
+            chunks = store.chunks_for_source(match)[:5]
+            body = " ".join(str(c.get("text") or "") for c in chunks).strip()
+    sample = body[:_AGENT_MATCH_TEXT_CAP]
+    return (f"{title}\n{sample}".strip()) if title else sample
+@app.get("/api/knowledge/sources/{source_id}/agent-matches")
+def knowledge_source_agent_matches(source_id: str, top_n: int = Query(5)):
+    """Suggest which agents should learn from this source (semantic match).
+    READ-ONLY. Resolves the source's knowledge text (title + transcript
+    sample), embeds it against each agent's expertise profile, and returns
+    the top matches. Degrades to ``{matches: [], reason}`` (200, never 500)
+    when there is no source text or the embedder is unavailable.
+    """
+    from core.knowledge import agent_match, embedder
+    text = _source_knowledge_text(source_id)
+    if not text:
+        return {"matches": [], "source_id": source_id, "count": 0, "reason": "no source text"}
+    if not embedder.is_available():
+        return {"matches": [], "source_id": source_id, "count": 0, "reason": "embedder unavailable"}
+    matches = agent_match.match_agents(text, _load_agents(), top_n=min(top_n, 10))
+    if not matches:
+        return {"matches": [], "source_id": source_id, "count": 0, "reason": "embedder unavailable"}
+    return {"matches": matches, "source_id": source_id, "count": len(matches)}
+def _agent_matches_for_proposal(source_id_: str, text: str, body: Optional[dict]) -> list[dict]:
+    """Resolve the agents to include in a proposal: scoped ids or top matches."""
+    from core.knowledge import agent_match
+    agents = _load_agents()
+    matches = agent_match.match_agents(text, agents, top_n=10)
+    ids = (body or {}).get("agent_ids") if isinstance(body, dict) else None
+    if ids:
+        wanted = {str(i) for i in ids}
+        return [m for m in matches if m["id"] in wanted]
+    return matches[:5]
+@app.post("/api/knowledge/sources/{source_id}/agent-proposal")
+def knowledge_source_agent_proposal(source_id: str, body: Optional[dict] = None):
+    """Generate a PROPOSE-ONLY markdown proposal of agents to update.
+    Body optional: ``{"agent_ids": [...]}`` scopes to specific agents;
+    absent → top matches. Client identifiers are redacted via the
+    reorganizer's shared ``redact_clients`` so nothing leaks. The ONLY
+    write is the proposal markdown under
+    ``~/.arkaos/reorganize-proposals/`` — NEVER an agent YAML.
+    """
+    from core.knowledge import embedder
+    text = _source_knowledge_text(source_id)
+    if not text:
+        return {"error": "no source text", "agents": 0}
+    if not embedder.is_available():
+        return {"error": "embedder unavailable", "agents": 0}
+    matches = _agent_matches_for_proposal(source_id, text, body)
+    registry = _get_source_registry()
+    row = registry.get(source_id) if registry else None
+    title = str((row or {}).get("title") or "").strip() or source_id
+    markdown = _render_agent_proposal(source_id, title, matches)
+    path = _write_agent_proposal(source_id, markdown)
+    return {"proposal_path": str(path), "agents": len(matches)}
+def _render_agent_proposal(source_id_: str, title: str, matches: list[dict]) -> str:
+    """Render the propose-only markdown. Untrusted fields are redacted then escaped."""
+    from core.cognition.reorganizer import md_escape, redact_clients
+    safe_title = md_escape(redact_clients(title))
+    lines = [
+        f"# Agent Attribution Proposal — {safe_title}",
+        "",
+        "> **PROPOSE-ONLY** — review and apply manually; this never edits agent files.",
+        f"> Source: `{source_id_}`",
+        "",
+        "## Suggested agents",
+        "",
+    ]
+    if not matches:
+        lines.append("_(no agent matches)_")
+    else:
+        lines.extend(_agent_proposal_line(m, md_escape) for m in matches)
+    return redact_clients("\n".join(lines))
+def _agent_proposal_line(m: dict, escape) -> str:
+    """Render one suggested-agent bullet. matched_terms (untrusted) escaped inline."""
+    terms = ", ".join(escape(t) for t in (m.get("matched_terms") or [])) or "n/a"
+    return (
+        f"- **{m.get('name', '')}** ({m.get('department', '')} — "
+        f"{m.get('role', '')}) score: {m.get('score', 0)}; matched: {terms}"
+    )
+def _write_agent_proposal(source_id_: str, markdown: str) -> Path:
+    """Atomic write to ~/.arkaos/reorganize-proposals/ with a stable name."""
+    safe = "".join(c if c.isalnum() or c in "-_" else "-" for c in source_id_)[:64]
+    out = Path.home() / ".arkaos" / "reorganize-proposals"
+    out.mkdir(parents=True, exist_ok=True)
+    path = out / f"agent-attribution-{safe}.md"
+    tmp = path.with_suffix(f".tmp-{os.getpid()}.md")
+    tmp.write_text(markdown, encoding="utf-8")
+    os.replace(tmp, path)
+    return path
+@app.get("/api/knowledge/sources/{source_id}/media")
+def knowledge_source_media(source_id: str):
+    """Stream a source's media file with HTTP Range support."""
+    path = _safe_media_path(source_id)
+    if path is None:
+        return JSONResponse({"error": "not found"}, status_code=404)
+    return FileResponse(str(path))
+@app.get("/api/knowledge/sources/{source_id}/download")
+def knowledge_source_download(source_id: str):
+    """Download a source's media file as an attachment."""
+    path = _safe_media_path(source_id)
+    if path is None:
+        return JSONResponse({"error": "not found"}, status_code=404)
+    return FileResponse(
+        str(path), filename=path.name,
+        content_disposition_type="attachment",
+    )
+def _safe_upload_path(media_dir: Path, filename: str) -> Optional[Path]:
+    """Resolve an upload target inside media_dir, blocking path traversal."""
+    safe_name = Path(filename or "").name  # strip any path components
+    if not safe_name:
+        return None
+    file_path = (media_dir / safe_name).resolve()
+    media_root = media_dir.resolve()
+    if media_root not in file_path.parents and file_path != media_root:
+        return None
+    return file_path
+def _safe_media_path(source_id: str) -> Optional[Path]:
+    """Resolve a source's media path, guarding against path traversal."""
+    registry = _get_source_registry()
+    row = registry.get(source_id) if registry else None
+    if row is None or not row["media_path"]:
+        return None
+    media_root = (Path.home() / ".arkaos" / "media").resolve()
+    path = Path(row["media_path"]).resolve()
+    if not path.exists() or media_root not in path.parents:
+        return None
+    return path
 @app.get("/api/health")
 def health():
     """PR70 v2.87.0 — per-check severity + response timestamp.

package/scripts/tools/__pycache__/docs_stats.cpython-313.pyc ADDED Viewed

Binary file

package/scripts/tools/docs_stats.py ADDED Viewed

@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+"""ArkaOS Docs Stats -- canonical source of truth for documentation numbers.
+Counts agents, departments, skills, ADRs, and tests directly from the
+repository so that every document (README, wiki, CLAUDE.md) consumes generated
+numbers instead of hand-typed ones. This is the antidote to documentation
+drift: no number is ever written by hand.
+Usage:
+    python docs_stats.py                 # human-readable (repo root auto-detected)
+    python docs_stats.py --json
+    python docs_stats.py --root /path/to/arka-os --json
+    python docs_stats.py --with-pytest   # also collect authoritative pytest case count
+"""
+from __future__ import annotations
+import argparse
+import json
+import re
+import subprocess
+import sys
+from pathlib import Path
+from typing import Optional
+_TEST_DEF_RE = re.compile(r"^\s*(?:async\s+)?def\s+test_\w+", re.MULTILINE)
+_COLLECTED_RE = re.compile(r"(\d+)\s+tests?\s+collected")
+def repo_root(start: Optional[Path] = None) -> Path:
+    """Find the repo root by walking up to a dir with VERSION + departments/."""
+    cur = (start or Path(__file__).resolve()).resolve()
+    candidates = [cur, *cur.parents] if cur.is_dir() else [cur.parent, *cur.parents]
+    for p in candidates:
+        if (p / "VERSION").is_file() and (p / "departments").is_dir():
+            return p
+    return Path.cwd()
+def read_version(root: Path) -> str:
+    """Read the canonical version string from the VERSION file."""
+    vf = root / "VERSION"
+    return vf.read_text(encoding="utf-8").strip() if vf.is_file() else ""
+def count_agents(root: Path) -> dict:
+    """Count agent YAML files under departments/*/agents/ (recursive, to
+    include sub-squad nesting). Returns total files + unique slugs."""
+    dep = root / "departments"
+    files = [f for d in dep.glob("*/agents") if d.is_dir()
+             for f in d.rglob("*.yaml")] if dep.is_dir() else []
+    return {"files": len(files), "unique_slugs": len({f.name for f in files})}
+def count_departments(root: Path) -> int:
+    """Count department directories under departments/."""
+    dep = root / "departments"
+    return sum(1 for d in dep.iterdir() if d.is_dir()) if dep.is_dir() else 0
+def count_skills(root: Path) -> dict:
+    """Count SKILL.md files by area. 'core' = departments + arka."""
+    def _n(rel: str) -> int:
+        base = root / rel
+        return len(list(base.rglob("SKILL.md"))) if base.is_dir() else 0
+    dept, arka, market = _n("departments"), _n("arka"), _n("marketplace")
+    return {"departments": dept, "arka": arka, "marketplace": market,
+            "core": dept + arka}
+def count_adrs(root: Path) -> int:
+    """Count Architecture Decision Records in docs/adr/."""
+    adr = root / "docs" / "adr"
+    return len(list(adr.glob("*.md"))) if adr.is_dir() else 0
+def count_test_functions(root: Path) -> int:
+    """Static count of `def test_` / `async def test_` definitions in tests/."""
+    tdir = root / "tests"
+    if not tdir.is_dir():
+        return 0
+    return sum(len(_TEST_DEF_RE.findall(f.read_text(encoding="utf-8", errors="replace")))
+               for f in tdir.rglob("test_*.py"))
+def collect_pytest_cases(root: Path) -> Optional[int]:
+    """Authoritative pytest case count via --collect-only. None on failure."""
+    try:
+        out = subprocess.run(
+            [sys.executable, "-m", "pytest", "--collect-only", "-q"],
+            cwd=root, capture_output=True, text=True, timeout=300, check=False)
+    except (OSError, subprocess.SubprocessError):
+        return None
+    for line in reversed(out.stdout.splitlines()):
+        m = _COLLECTED_RE.search(line)
+        if m:
+            return int(m.group(1))
+    return None
+def gather(root: Path, with_pytest: bool = False) -> dict:
+    """Collect all documentation stats into a JSON-serialisable dict."""
+    tests = {"functions": count_test_functions(root)}
+    if with_pytest:
+        tests["collected"] = collect_pytest_cases(root)
+    return {
+        "version": read_version(root),
+        "agents": count_agents(root),
+        "departments": count_departments(root),
+        "skills": count_skills(root),
+        "adrs": count_adrs(root),
+        "tests": tests,
+        "root": str(root),
+    }
+def format_text(stats: dict) -> str:
+    """Render a human-readable summary."""
+    a, s, t = stats["agents"], stats["skills"], stats["tests"]
+    lines = [
+        "=" * 52,
+        "ARKAOS DOCS STATS (canonical)",
+        "=" * 52,
+        f"Version:        {stats['version']}",
+        f"Departments:    {stats['departments']}",
+        f"Agents:         {a['files']} files ({a['unique_slugs']} unique slugs)",
+        f"Skills (core):  {s['core']}  (departments {s['departments']} + arka {s['arka']})",
+        f"  marketplace:  {s['marketplace']}",
+        f"ADRs:           {stats['adrs']}",
+        f"Test functions: {t['functions']}",
+    ]
+    if "collected" in t:
+        lines.append(f"Test cases:     {t['collected']} (pytest collected)")
+    lines.append("=" * 52)
+    return "\n".join(lines)
+def main() -> int:
+    """Entry point."""
+    parser = argparse.ArgumentParser(
+        description="ArkaOS docs stats -- canonical documentation counter")
+    parser.add_argument("--root", default=None, help="Repo root (default: auto-detect)")
+    parser.add_argument("--json", action="store_true", help="Output as JSON")
+    parser.add_argument("--with-pytest", action="store_true",
+                        help="Also collect authoritative pytest case count")
+    args = parser.parse_args()
+    root = Path(args.root).resolve() if args.root else repo_root()
+    stats = gather(root, with_pytest=args.with_pytest)
+    print(json.dumps(stats, indent=2) if args.json else format_text(stats))
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())