arkaos 3.78.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -30
- package/VERSION +1 -1
- package/arka/SKILL.md +2 -2
- package/config/agent-allowlists/laravel.yaml +1 -0
- package/config/agent-allowlists/node.yaml +1 -0
- package/config/agent-allowlists/nuxt.yaml +1 -0
- package/config/agent-allowlists/python.yaml +1 -0
- package/core/agents/__pycache__/registry_gen.cpython-313.pyc +0 -0
- package/core/agents/__pycache__/schema.cpython-313.pyc +0 -0
- package/core/agents/registry_gen.py +6 -1
- package/core/agents/schema.py +4 -0
- package/core/cognition/__pycache__/reorganizer.cpython-313.pyc +0 -0
- package/core/cognition/reorganizer.py +37 -7
- package/core/governance/__pycache__/design_system_lint.cpython-313.pyc +0 -0
- package/core/governance/__pycache__/design_system_lint_cli.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/agent_match.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/sources.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
- package/core/knowledge/agent_match.py +114 -0
- package/core/knowledge/chunker.py +45 -0
- package/core/knowledge/ingest.py +156 -78
- package/core/knowledge/sources.py +138 -0
- package/core/knowledge/vector_store.py +52 -0
- package/core/squads/__pycache__/loader.cpython-313.pyc +0 -0
- package/core/squads/loader.py +25 -0
- package/core/sync/__pycache__/agent_provisioner.cpython-313.pyc +0 -0
- package/core/sync/agent_provisioner.py +19 -8
- package/dashboard/app/components/KnowledgeSourcesList.vue +40 -13
- package/dashboard/app/pages/cognition.vue +9 -4
- package/dashboard/app/pages/knowledge/[id].vue +669 -0
- package/dashboard/app/pages/knowledge/index.vue +1281 -0
- package/dashboard/app/types/index.d.ts +1 -1
- package/departments/brand/agents/ux-designer.yaml +15 -1
- package/departments/brand/agents/ux-researcher.yaml +73 -0
- package/departments/brand/agents/ux-strategist.yaml +72 -0
- package/departments/dev/agents/ai-engineering/ai-engineering-lead.yaml +76 -0
- package/departments/dev/agents/architect.yaml +9 -3
- package/departments/dev/agents/backend-core/laravel-eng.yaml +76 -0
- package/departments/dev/agents/backend-core/node-ts-eng.yaml +76 -0
- package/departments/dev/agents/backend-core/python-eng.yaml +76 -0
- package/departments/dev/agents/backend-dev.yaml +10 -4
- package/departments/dev/agents/data-platform/etl-eng.yaml +74 -0
- package/departments/dev/agents/dba.yaml +7 -3
- package/departments/dev/references/backend-knowledge-and-tools.md +70 -0
- package/departments/ecom/agents/retention-manager.yaml +13 -1
- package/departments/leadership/agents/culture-coach.yaml +20 -0
- package/departments/leadership/agents/hr-specialist.yaml +18 -0
- package/departments/leadership/agents/leadership-director.yaml +10 -0
- package/departments/org/agents/chief-of-staff.yaml +76 -0
- package/departments/org/agents/coo.yaml +11 -0
- package/departments/org/agents/okr-steward.yaml +71 -0
- package/departments/org/agents/org-designer.yaml +23 -0
- package/departments/org/skills/okr-cadence/SKILL.md +34 -0
- package/departments/org/skills/principles-audit/SKILL.md +36 -0
- package/departments/pm/agents/pm-director.yaml +21 -8
- package/departments/pm/agents/product-owner.yaml +24 -2
- package/departments/pm/agents/scrum-master.yaml +21 -0
- package/departments/pm/agents/strategic-pm.yaml +72 -0
- package/departments/pm/skills/discovery-plan/SKILL.md +7 -1
- package/departments/quality/agents/cqo.yaml +8 -0
- package/departments/saas/agents/cs-manager.yaml +19 -2
- package/departments/saas/agents/growth-engineer.yaml +14 -1
- package/departments/saas/agents/metrics-analyst.yaml +17 -1
- package/departments/saas/agents/revops-lead.yaml +73 -0
- package/departments/saas/skills/leaky-bucket/SKILL.md +28 -0
- package/departments/saas/skills/voc-loop/SKILL.md +29 -0
- package/departments/sales/agents/sales-director.yaml +9 -0
- package/departments/sales/agents/sdr.yaml +72 -0
- package/departments/strategy/agents/decision-quality.yaml +72 -0
- package/departments/strategy/agents/strategy-director.yaml +13 -0
- package/departments/strategy/skills/premortem/SKILL.md +33 -0
- package/knowledge/agents-registry-v2.json +1218 -78
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
- package/scripts/bench/__init__.py +5 -0
- package/scripts/bench/__pycache__/__init__.cpython-313.pyc +0 -0
- package/scripts/bench/__pycache__/harness.cpython-313.pyc +0 -0
- package/scripts/bench/__pycache__/run.cpython-313.pyc +0 -0
- package/scripts/bench/harness.py +138 -0
- package/scripts/bench/run.py +136 -0
- package/scripts/dashboard-api.py +376 -13
- package/scripts/tools/__pycache__/docs_stats.cpython-313.pyc +0 -0
- package/scripts/tools/docs_stats.py +154 -0
- package/dashboard/app/pages/knowledge.vue +0 -918
package/scripts/dashboard-api.py
CHANGED
|
@@ -22,6 +22,7 @@ sys.path.insert(0, str(ARKAOS_ROOT))
|
|
|
22
22
|
|
|
23
23
|
from fastapi import FastAPI, Query, Request, WebSocket, WebSocketDisconnect
|
|
24
24
|
from fastapi.middleware.cors import CORSMiddleware
|
|
25
|
+
from fastapi.responses import FileResponse, JSONResponse
|
|
25
26
|
|
|
26
27
|
app = FastAPI(title="ArkaOS Dashboard API", version="2.2.0")
|
|
27
28
|
|
|
@@ -66,7 +67,7 @@ async def ws_tasks(websocket: WebSocket):
|
|
|
66
67
|
|
|
67
68
|
app.add_middleware(
|
|
68
69
|
CORSMiddleware,
|
|
69
|
-
allow_origin_regex=r"http://localhost:\d
|
|
70
|
+
allow_origin_regex=r"^(http://localhost:\d+|chrome-extension://[a-p0-9]{32})$",
|
|
70
71
|
allow_methods=["GET", "POST", "PUT", "DELETE"],
|
|
71
72
|
allow_headers=["*"],
|
|
72
73
|
)
|
|
@@ -130,6 +131,23 @@ def _get_vector_store():
|
|
|
130
131
|
return None
|
|
131
132
|
|
|
132
133
|
|
|
134
|
+
_source_registry_cache = None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _get_source_registry():
|
|
138
|
+
"""Lazy singleton SourceRegistry over the shared knowledge.db."""
|
|
139
|
+
global _source_registry_cache
|
|
140
|
+
if _source_registry_cache is None:
|
|
141
|
+
try:
|
|
142
|
+
from core.knowledge.sources import SourceRegistry
|
|
143
|
+
db_path = Path.home() / ".arkaos" / "knowledge.db"
|
|
144
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
145
|
+
_source_registry_cache = SourceRegistry(db_path)
|
|
146
|
+
except Exception:
|
|
147
|
+
return None
|
|
148
|
+
return _source_registry_cache
|
|
149
|
+
|
|
150
|
+
|
|
133
151
|
# --- Endpoints ---
|
|
134
152
|
|
|
135
153
|
@app.get("/api/overview")
|
|
@@ -971,8 +989,10 @@ async def knowledge_upload_file(file: UploadFile):
|
|
|
971
989
|
media_dir = Path.home() / ".arkaos" / "media"
|
|
972
990
|
media_dir.mkdir(parents=True, exist_ok=True)
|
|
973
991
|
|
|
974
|
-
# Save uploaded file
|
|
975
|
-
file_path = media_dir
|
|
992
|
+
# Save uploaded file — sanitize filename to block path traversal
|
|
993
|
+
file_path = _safe_upload_path(media_dir, file.filename)
|
|
994
|
+
if file_path is None:
|
|
995
|
+
return {"error": "invalid filename"}
|
|
976
996
|
content = await file.read()
|
|
977
997
|
file_path.write_bytes(content)
|
|
978
998
|
|
|
@@ -995,13 +1015,14 @@ async def knowledge_upload_file(file: UploadFile):
|
|
|
995
1015
|
from core.jobs.manager import JobManager as _JM
|
|
996
1016
|
from core.knowledge.ingest import IngestEngine
|
|
997
1017
|
local_mgr = _JM()
|
|
998
|
-
engine = IngestEngine(store)
|
|
999
1018
|
def on_progress(pct, msg):
|
|
1000
1019
|
status = "embedding" if "embed" in msg.lower() or "index" in msg.lower() else "processing"
|
|
1001
1020
|
local_mgr.update_progress(job_id, pct, msg, status)
|
|
1002
1021
|
broadcast_from_thread({"type": "job_progress", "job_id": job_id, "progress": pct, "message": msg, "status": status})
|
|
1003
1022
|
try:
|
|
1004
1023
|
local_mgr.start(job_id)
|
|
1024
|
+
reg = _get_source_registry()
|
|
1025
|
+
engine = IngestEngine(store, registry=reg)
|
|
1005
1026
|
result = engine.ingest(source, source_type, on_progress=on_progress)
|
|
1006
1027
|
if result.success:
|
|
1007
1028
|
local_mgr.complete(job_id, chunks_created=result.chunks_created)
|
|
@@ -1064,7 +1085,6 @@ def knowledge_ingest(body: dict):
|
|
|
1064
1085
|
from core.jobs.manager import JobManager as _JM
|
|
1065
1086
|
local_mgr = _JM()
|
|
1066
1087
|
|
|
1067
|
-
engine = IngestEngine(store)
|
|
1068
1088
|
def on_progress(pct, msg):
|
|
1069
1089
|
status = "processing"
|
|
1070
1090
|
if "phase 2" in msg.lower() or "download" in msg.lower():
|
|
@@ -1086,6 +1106,8 @@ def knowledge_ingest(body: dict):
|
|
|
1086
1106
|
try:
|
|
1087
1107
|
local_mgr.start(job_id)
|
|
1088
1108
|
broadcast_from_thread({"type": "job_progress", "job_id": job_id, "progress": 0, "message": "Starting...", "status": "processing"})
|
|
1109
|
+
reg = _get_source_registry()
|
|
1110
|
+
engine = IngestEngine(store, registry=reg)
|
|
1089
1111
|
result = engine.ingest(source, source_type, on_progress=on_progress)
|
|
1090
1112
|
if result.success:
|
|
1091
1113
|
local_mgr.complete(job_id, chunks_created=result.chunks_created)
|
|
@@ -1201,20 +1223,69 @@ def knowledge_search(q: str = Query(...), top_k: int = Query(5)):
|
|
|
1201
1223
|
return {"results": results, "query": q, "total": len(results)}
|
|
1202
1224
|
|
|
1203
1225
|
|
|
1226
|
+
def _merge_source_rows(
|
|
1227
|
+
store_rows: list[dict], registry_rows: list[dict]
|
|
1228
|
+
) -> list[dict]:
|
|
1229
|
+
"""Union chunk-based + registry rows keyed by source string.
|
|
1230
|
+
|
|
1231
|
+
Every emitted row keeps the legacy ``source``/``chunks`` keys and adds
|
|
1232
|
+
``id`` (always linkable), ``title``, ``type``, ``has_media``,
|
|
1233
|
+
``duration`` and ``status``. A registry source with 0 chunks still
|
|
1234
|
+
appears. Sorted by chunks desc, then source asc.
|
|
1235
|
+
"""
|
|
1236
|
+
from core.knowledge.sources import source_id
|
|
1237
|
+
|
|
1238
|
+
by_source: dict[str, dict] = {}
|
|
1239
|
+
for r in store_rows:
|
|
1240
|
+
src = r.get("source", "")
|
|
1241
|
+
by_source[src] = {"source": src, "chunks": int(r.get("chunks", 0) or 0)}
|
|
1242
|
+
for reg in registry_rows:
|
|
1243
|
+
src = reg.get("source", "")
|
|
1244
|
+
row = by_source.setdefault(src, {"source": src, "chunks": 0})
|
|
1245
|
+
row.update(_registry_fields(reg))
|
|
1246
|
+
for src, row in by_source.items():
|
|
1247
|
+
row.setdefault("id", source_id(src))
|
|
1248
|
+
for key, default in (("title", ""), ("type", ""), ("has_media", False),
|
|
1249
|
+
("duration", 0), ("status", "")):
|
|
1250
|
+
row.setdefault(key, default)
|
|
1251
|
+
return sorted(by_source.values(), key=lambda r: (-r["chunks"], r["source"]))
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
def _registry_fields(reg: dict) -> dict:
|
|
1255
|
+
"""Project a registry row onto the list-row metadata keys."""
|
|
1256
|
+
from core.knowledge.sources import source_id
|
|
1257
|
+
|
|
1258
|
+
return {
|
|
1259
|
+
"id": reg.get("id") or source_id(reg.get("source", "")),
|
|
1260
|
+
"title": reg.get("title", "") or "",
|
|
1261
|
+
"type": reg.get("type", "") or "",
|
|
1262
|
+
"has_media": bool(reg.get("media_path")),
|
|
1263
|
+
"duration": reg.get("duration", 0) or 0,
|
|
1264
|
+
"status": reg.get("status", "") or "",
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
|
|
1204
1268
|
@app.get("/api/knowledge/sources")
|
|
1205
1269
|
def knowledge_list_sources():
|
|
1206
|
-
"""
|
|
1270
|
+
"""List every distinct source merged from vector store + registry.
|
|
1207
1271
|
|
|
1208
|
-
Returns ``{sources: [
|
|
1209
|
-
|
|
1272
|
+
Returns ``{sources: [...], total: N}``. Each row keeps the legacy
|
|
1273
|
+
``source``/``chunks`` keys and adds ``id``/``title``/``type``/
|
|
1274
|
+
``has_media``/``duration``/``status`` so the frontend can link each
|
|
1275
|
+
row to ``/knowledge/{id}``. Registry sources with 0 chunks appear too.
|
|
1210
1276
|
"""
|
|
1211
1277
|
store = _get_vector_store()
|
|
1212
|
-
|
|
1278
|
+
registry = _get_source_registry()
|
|
1279
|
+
store_rows: list[dict] = []
|
|
1280
|
+
if store:
|
|
1281
|
+
try:
|
|
1282
|
+
store_rows = store.list_sources()
|
|
1283
|
+
except Exception as exc: # noqa: BLE001
|
|
1284
|
+
return {"sources": [], "total": 0, "error": str(exc)}
|
|
1285
|
+
registry_rows = registry.list() if registry else []
|
|
1286
|
+
if not store and not registry:
|
|
1213
1287
|
return {"sources": [], "total": 0, "error": "vector store unavailable"}
|
|
1214
|
-
|
|
1215
|
-
rows = store.list_sources()
|
|
1216
|
-
except Exception as exc: # noqa: BLE001
|
|
1217
|
-
return {"sources": [], "total": 0, "error": str(exc)}
|
|
1288
|
+
rows = _merge_source_rows(store_rows, registry_rows)
|
|
1218
1289
|
return {"sources": rows, "total": len(rows)}
|
|
1219
1290
|
|
|
1220
1291
|
|
|
@@ -1244,6 +1315,298 @@ def knowledge_delete_source(source: str = Query(...)):
|
|
|
1244
1315
|
return {"deleted": int(deleted), "source": clean}
|
|
1245
1316
|
|
|
1246
1317
|
|
|
1318
|
+
def _source_str_for_id(source_id_: str) -> Optional[str]:
|
|
1319
|
+
"""Reverse-resolve the raw source string whose id matches ``source_id_``.
|
|
1320
|
+
|
|
1321
|
+
Cold path, O(n) over the vector store's distinct sources. Returns None
|
|
1322
|
+
when no chunk source matches (or the store is unavailable). Shared by
|
|
1323
|
+
``_detail_from_store`` and ``_resolve_transcript`` so the reverse-lookup
|
|
1324
|
+
lives in exactly one place.
|
|
1325
|
+
"""
|
|
1326
|
+
from core.knowledge.sources import source_id
|
|
1327
|
+
|
|
1328
|
+
store = _get_vector_store()
|
|
1329
|
+
if store is None:
|
|
1330
|
+
return None
|
|
1331
|
+
return next(
|
|
1332
|
+
(s for s in store.distinct_sources() if source_id(s) == source_id_),
|
|
1333
|
+
None,
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
def _resolve_transcript(source_id_: str) -> Optional[str]:
|
|
1338
|
+
"""Best-available transcript text for a source id, or None.
|
|
1339
|
+
|
|
1340
|
+
Resolution order:
|
|
1341
|
+
1. Registry row with a non-empty stored ``transcript`` -> that text.
|
|
1342
|
+
2. Else reconstruct from the vector store's chunks (legacy sources).
|
|
1343
|
+
3. Else None (no registry row and no chunk source matched the id).
|
|
1344
|
+
"""
|
|
1345
|
+
registry = _get_source_registry()
|
|
1346
|
+
row = registry.get(source_id_) if registry else None
|
|
1347
|
+
if row is not None and (row.get("transcript") or "").strip():
|
|
1348
|
+
return row["transcript"]
|
|
1349
|
+
match = _source_str_for_id(source_id_)
|
|
1350
|
+
if match is None:
|
|
1351
|
+
return None
|
|
1352
|
+
store = _get_vector_store()
|
|
1353
|
+
return store.transcript_for_source(match) if store else None
|
|
1354
|
+
|
|
1355
|
+
|
|
1356
|
+
def _detail_from_store(source_id_: str) -> Optional[dict]:
|
|
1357
|
+
"""Build a minimal detail dict for a chunks-only (pre-registry) source.
|
|
1358
|
+
|
|
1359
|
+
Reverse-looks-up the raw source string whose ``source_id`` matches the
|
|
1360
|
+
requested id, then returns a dict in the same shape the frontend
|
|
1361
|
+
expects — including a transcript reconstructed from the chunks. Returns
|
|
1362
|
+
None when no chunk source matches the id.
|
|
1363
|
+
"""
|
|
1364
|
+
from core.knowledge.ingest import detect_source_type
|
|
1365
|
+
|
|
1366
|
+
match = _source_str_for_id(source_id_)
|
|
1367
|
+
if match is None:
|
|
1368
|
+
return None
|
|
1369
|
+
store = _get_vector_store()
|
|
1370
|
+
chunks = store.chunks_for_source(match) if store else []
|
|
1371
|
+
transcript = store.transcript_for_source(match) if store else ""
|
|
1372
|
+
return {
|
|
1373
|
+
"id": source_id_, "source": match,
|
|
1374
|
+
"type": detect_source_type(match), "title": "", "duration": 0,
|
|
1375
|
+
"language": "", "thumbnail_path": "", "media_path": "",
|
|
1376
|
+
"transcript": transcript, "transcript_reconstructed": bool(transcript),
|
|
1377
|
+
"chunk_count": len(chunks), "status": "indexed",
|
|
1378
|
+
"error": "", "created_at": "", "updated_at": "", "chunks": chunks,
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
|
|
1382
|
+
@app.get("/api/knowledge/sources/{source_id}")
|
|
1383
|
+
def knowledge_source_detail(source_id: str):
|
|
1384
|
+
"""Return a single source's metadata plus its indexed chunks.
|
|
1385
|
+
|
|
1386
|
+
Registry row wins (enriched with chunks). When no registry row exists,
|
|
1387
|
+
falls back to the vector store so pre-registry / chunks-only sources
|
|
1388
|
+
still resolve instead of 404ing the list link.
|
|
1389
|
+
"""
|
|
1390
|
+
registry = _get_source_registry()
|
|
1391
|
+
row = registry.get(source_id) if registry else None
|
|
1392
|
+
if row is not None:
|
|
1393
|
+
row = dict(row)
|
|
1394
|
+
store = _get_vector_store()
|
|
1395
|
+
row["chunks"] = store.chunks_for_source(row["source"]) if store else []
|
|
1396
|
+
stored = (row.get("transcript") or "").strip()
|
|
1397
|
+
if not stored:
|
|
1398
|
+
row["transcript"] = (
|
|
1399
|
+
store.transcript_for_source(row["source"]) if store else ""
|
|
1400
|
+
)
|
|
1401
|
+
row["transcript_reconstructed"] = bool(row["transcript"])
|
|
1402
|
+
else:
|
|
1403
|
+
row["transcript_reconstructed"] = False
|
|
1404
|
+
return row
|
|
1405
|
+
fallback = _detail_from_store(source_id)
|
|
1406
|
+
if fallback is not None:
|
|
1407
|
+
return fallback
|
|
1408
|
+
return JSONResponse({"error": "not found"}, status_code=404)
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
@app.get("/api/knowledge/sources/{source_id}/transcript")
|
|
1412
|
+
def knowledge_source_transcript(source_id: str):
|
|
1413
|
+
"""Return the full transcript text for a source.
|
|
1414
|
+
|
|
1415
|
+
A stored registry transcript wins. Otherwise the transcript is
|
|
1416
|
+
reconstructed by joining the source's indexed chunks (legacy sources
|
|
1417
|
+
ingested before the registry have chunks but no stored transcript). The
|
|
1418
|
+
response carries ``reconstructed`` so the frontend can badge it.
|
|
1419
|
+
|
|
1420
|
+
404 only when the id matches nothing at all (no registry row and no
|
|
1421
|
+
chunk source). A known source with genuinely zero chunks returns an
|
|
1422
|
+
empty transcript (200) so the page shows "No transcript available."
|
|
1423
|
+
"""
|
|
1424
|
+
registry = _get_source_registry()
|
|
1425
|
+
row = registry.get(source_id) if registry else None
|
|
1426
|
+
stored = (row.get("transcript") or "").strip() if row else ""
|
|
1427
|
+
if stored:
|
|
1428
|
+
return {"transcript": row["transcript"], "reconstructed": False}
|
|
1429
|
+
match = _source_str_for_id(source_id)
|
|
1430
|
+
if row is None and match is None:
|
|
1431
|
+
return JSONResponse({"error": "not found"}, status_code=404)
|
|
1432
|
+
text = _resolve_transcript(source_id) or ""
|
|
1433
|
+
return {"transcript": text, "reconstructed": bool(text)}
|
|
1434
|
+
|
|
1435
|
+
|
|
1436
|
+
_AGENT_MATCH_TEXT_CAP = 4000 # representative sample for embedding; not full text
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
def _source_knowledge_text(source_id_: str) -> str:
|
|
1440
|
+
"""Best-available knowledge text for a source: title + transcript sample.
|
|
1441
|
+
|
|
1442
|
+
Prepends the registry title (when present) to a capped sample of the
|
|
1443
|
+
transcript. Falls back to joining the first few chunks when no
|
|
1444
|
+
transcript resolves. Returns "" when the source has no text at all.
|
|
1445
|
+
Read-only — never writes.
|
|
1446
|
+
"""
|
|
1447
|
+
registry = _get_source_registry()
|
|
1448
|
+
row = registry.get(source_id_) if registry else None
|
|
1449
|
+
title = str((row or {}).get("title") or "").strip()
|
|
1450
|
+
body = (_resolve_transcript(source_id_) or "").strip()
|
|
1451
|
+
if not body:
|
|
1452
|
+
match = _source_str_for_id(source_id_)
|
|
1453
|
+
store = _get_vector_store()
|
|
1454
|
+
if match and store:
|
|
1455
|
+
chunks = store.chunks_for_source(match)[:5]
|
|
1456
|
+
body = " ".join(str(c.get("text") or "") for c in chunks).strip()
|
|
1457
|
+
sample = body[:_AGENT_MATCH_TEXT_CAP]
|
|
1458
|
+
return (f"{title}\n{sample}".strip()) if title else sample
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
@app.get("/api/knowledge/sources/{source_id}/agent-matches")
|
|
1462
|
+
def knowledge_source_agent_matches(source_id: str, top_n: int = Query(5)):
|
|
1463
|
+
"""Suggest which agents should learn from this source (semantic match).
|
|
1464
|
+
|
|
1465
|
+
READ-ONLY. Resolves the source's knowledge text (title + transcript
|
|
1466
|
+
sample), embeds it against each agent's expertise profile, and returns
|
|
1467
|
+
the top matches. Degrades to ``{matches: [], reason}`` (200, never 500)
|
|
1468
|
+
when there is no source text or the embedder is unavailable.
|
|
1469
|
+
"""
|
|
1470
|
+
from core.knowledge import agent_match, embedder
|
|
1471
|
+
|
|
1472
|
+
text = _source_knowledge_text(source_id)
|
|
1473
|
+
if not text:
|
|
1474
|
+
return {"matches": [], "source_id": source_id, "count": 0, "reason": "no source text"}
|
|
1475
|
+
if not embedder.is_available():
|
|
1476
|
+
return {"matches": [], "source_id": source_id, "count": 0, "reason": "embedder unavailable"}
|
|
1477
|
+
matches = agent_match.match_agents(text, _load_agents(), top_n=min(top_n, 10))
|
|
1478
|
+
if not matches:
|
|
1479
|
+
return {"matches": [], "source_id": source_id, "count": 0, "reason": "embedder unavailable"}
|
|
1480
|
+
return {"matches": matches, "source_id": source_id, "count": len(matches)}
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
def _agent_matches_for_proposal(source_id_: str, text: str, body: Optional[dict]) -> list[dict]:
|
|
1484
|
+
"""Resolve the agents to include in a proposal: scoped ids or top matches."""
|
|
1485
|
+
from core.knowledge import agent_match
|
|
1486
|
+
|
|
1487
|
+
agents = _load_agents()
|
|
1488
|
+
matches = agent_match.match_agents(text, agents, top_n=10)
|
|
1489
|
+
ids = (body or {}).get("agent_ids") if isinstance(body, dict) else None
|
|
1490
|
+
if ids:
|
|
1491
|
+
wanted = {str(i) for i in ids}
|
|
1492
|
+
return [m for m in matches if m["id"] in wanted]
|
|
1493
|
+
return matches[:5]
|
|
1494
|
+
|
|
1495
|
+
|
|
1496
|
+
@app.post("/api/knowledge/sources/{source_id}/agent-proposal")
|
|
1497
|
+
def knowledge_source_agent_proposal(source_id: str, body: Optional[dict] = None):
|
|
1498
|
+
"""Generate a PROPOSE-ONLY markdown proposal of agents to update.
|
|
1499
|
+
|
|
1500
|
+
Body optional: ``{"agent_ids": [...]}`` scopes to specific agents;
|
|
1501
|
+
absent → top matches. Client identifiers are redacted via the
|
|
1502
|
+
reorganizer's shared ``redact_clients`` so nothing leaks. The ONLY
|
|
1503
|
+
write is the proposal markdown under
|
|
1504
|
+
``~/.arkaos/reorganize-proposals/`` — NEVER an agent YAML.
|
|
1505
|
+
"""
|
|
1506
|
+
from core.knowledge import embedder
|
|
1507
|
+
|
|
1508
|
+
text = _source_knowledge_text(source_id)
|
|
1509
|
+
if not text:
|
|
1510
|
+
return {"error": "no source text", "agents": 0}
|
|
1511
|
+
if not embedder.is_available():
|
|
1512
|
+
return {"error": "embedder unavailable", "agents": 0}
|
|
1513
|
+
matches = _agent_matches_for_proposal(source_id, text, body)
|
|
1514
|
+
registry = _get_source_registry()
|
|
1515
|
+
row = registry.get(source_id) if registry else None
|
|
1516
|
+
title = str((row or {}).get("title") or "").strip() or source_id
|
|
1517
|
+
markdown = _render_agent_proposal(source_id, title, matches)
|
|
1518
|
+
path = _write_agent_proposal(source_id, markdown)
|
|
1519
|
+
return {"proposal_path": str(path), "agents": len(matches)}
|
|
1520
|
+
|
|
1521
|
+
|
|
1522
|
+
def _render_agent_proposal(source_id_: str, title: str, matches: list[dict]) -> str:
|
|
1523
|
+
"""Render the propose-only markdown. Untrusted fields are redacted then escaped."""
|
|
1524
|
+
from core.cognition.reorganizer import md_escape, redact_clients
|
|
1525
|
+
|
|
1526
|
+
safe_title = md_escape(redact_clients(title))
|
|
1527
|
+
lines = [
|
|
1528
|
+
f"# Agent Attribution Proposal — {safe_title}",
|
|
1529
|
+
"",
|
|
1530
|
+
"> **PROPOSE-ONLY** — review and apply manually; this never edits agent files.",
|
|
1531
|
+
f"> Source: `{source_id_}`",
|
|
1532
|
+
"",
|
|
1533
|
+
"## Suggested agents",
|
|
1534
|
+
"",
|
|
1535
|
+
]
|
|
1536
|
+
if not matches:
|
|
1537
|
+
lines.append("_(no agent matches)_")
|
|
1538
|
+
else:
|
|
1539
|
+
lines.extend(_agent_proposal_line(m, md_escape) for m in matches)
|
|
1540
|
+
return redact_clients("\n".join(lines))
|
|
1541
|
+
|
|
1542
|
+
|
|
1543
|
+
def _agent_proposal_line(m: dict, escape) -> str:
|
|
1544
|
+
"""Render one suggested-agent bullet. matched_terms (untrusted) escaped inline."""
|
|
1545
|
+
terms = ", ".join(escape(t) for t in (m.get("matched_terms") or [])) or "n/a"
|
|
1546
|
+
return (
|
|
1547
|
+
f"- **{m.get('name', '')}** ({m.get('department', '')} — "
|
|
1548
|
+
f"{m.get('role', '')}) score: {m.get('score', 0)}; matched: {terms}"
|
|
1549
|
+
)
|
|
1550
|
+
|
|
1551
|
+
|
|
1552
|
+
def _write_agent_proposal(source_id_: str, markdown: str) -> Path:
|
|
1553
|
+
"""Atomic write to ~/.arkaos/reorganize-proposals/ with a stable name."""
|
|
1554
|
+
safe = "".join(c if c.isalnum() or c in "-_" else "-" for c in source_id_)[:64]
|
|
1555
|
+
out = Path.home() / ".arkaos" / "reorganize-proposals"
|
|
1556
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
1557
|
+
path = out / f"agent-attribution-{safe}.md"
|
|
1558
|
+
tmp = path.with_suffix(f".tmp-{os.getpid()}.md")
|
|
1559
|
+
tmp.write_text(markdown, encoding="utf-8")
|
|
1560
|
+
os.replace(tmp, path)
|
|
1561
|
+
return path
|
|
1562
|
+
|
|
1563
|
+
|
|
1564
|
+
@app.get("/api/knowledge/sources/{source_id}/media")
|
|
1565
|
+
def knowledge_source_media(source_id: str):
|
|
1566
|
+
"""Stream a source's media file with HTTP Range support."""
|
|
1567
|
+
path = _safe_media_path(source_id)
|
|
1568
|
+
if path is None:
|
|
1569
|
+
return JSONResponse({"error": "not found"}, status_code=404)
|
|
1570
|
+
return FileResponse(str(path))
|
|
1571
|
+
|
|
1572
|
+
|
|
1573
|
+
@app.get("/api/knowledge/sources/{source_id}/download")
|
|
1574
|
+
def knowledge_source_download(source_id: str):
|
|
1575
|
+
"""Download a source's media file as an attachment."""
|
|
1576
|
+
path = _safe_media_path(source_id)
|
|
1577
|
+
if path is None:
|
|
1578
|
+
return JSONResponse({"error": "not found"}, status_code=404)
|
|
1579
|
+
return FileResponse(
|
|
1580
|
+
str(path), filename=path.name,
|
|
1581
|
+
content_disposition_type="attachment",
|
|
1582
|
+
)
|
|
1583
|
+
|
|
1584
|
+
|
|
1585
|
+
def _safe_upload_path(media_dir: Path, filename: str) -> Optional[Path]:
|
|
1586
|
+
"""Resolve an upload target inside media_dir, blocking path traversal."""
|
|
1587
|
+
safe_name = Path(filename or "").name # strip any path components
|
|
1588
|
+
if not safe_name:
|
|
1589
|
+
return None
|
|
1590
|
+
file_path = (media_dir / safe_name).resolve()
|
|
1591
|
+
media_root = media_dir.resolve()
|
|
1592
|
+
if media_root not in file_path.parents and file_path != media_root:
|
|
1593
|
+
return None
|
|
1594
|
+
return file_path
|
|
1595
|
+
|
|
1596
|
+
|
|
1597
|
+
def _safe_media_path(source_id: str) -> Optional[Path]:
|
|
1598
|
+
"""Resolve a source's media path, guarding against path traversal."""
|
|
1599
|
+
registry = _get_source_registry()
|
|
1600
|
+
row = registry.get(source_id) if registry else None
|
|
1601
|
+
if row is None or not row["media_path"]:
|
|
1602
|
+
return None
|
|
1603
|
+
media_root = (Path.home() / ".arkaos" / "media").resolve()
|
|
1604
|
+
path = Path(row["media_path"]).resolve()
|
|
1605
|
+
if not path.exists() or media_root not in path.parents:
|
|
1606
|
+
return None
|
|
1607
|
+
return path
|
|
1608
|
+
|
|
1609
|
+
|
|
1247
1610
|
@app.get("/api/health")
|
|
1248
1611
|
def health():
|
|
1249
1612
|
"""PR70 v2.87.0 — per-check severity + response timestamp.
|
|
Binary file
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""ArkaOS Docs Stats -- canonical source of truth for documentation numbers.
|
|
3
|
+
|
|
4
|
+
Counts agents, departments, skills, ADRs, and tests directly from the
|
|
5
|
+
repository so that every document (README, wiki, CLAUDE.md) consumes generated
|
|
6
|
+
numbers instead of hand-typed ones. This is the antidote to documentation
|
|
7
|
+
drift: no number is ever written by hand.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python docs_stats.py # human-readable (repo root auto-detected)
|
|
11
|
+
python docs_stats.py --json
|
|
12
|
+
python docs_stats.py --root /path/to/arka-os --json
|
|
13
|
+
python docs_stats.py --with-pytest # also collect authoritative pytest case count
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
import re
|
|
20
|
+
import subprocess
|
|
21
|
+
import sys
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Optional
|
|
24
|
+
|
|
25
|
+
_TEST_DEF_RE = re.compile(r"^\s*(?:async\s+)?def\s+test_\w+", re.MULTILINE)
|
|
26
|
+
_COLLECTED_RE = re.compile(r"(\d+)\s+tests?\s+collected")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def repo_root(start: Optional[Path] = None) -> Path:
|
|
30
|
+
"""Find the repo root by walking up to a dir with VERSION + departments/."""
|
|
31
|
+
cur = (start or Path(__file__).resolve()).resolve()
|
|
32
|
+
candidates = [cur, *cur.parents] if cur.is_dir() else [cur.parent, *cur.parents]
|
|
33
|
+
for p in candidates:
|
|
34
|
+
if (p / "VERSION").is_file() and (p / "departments").is_dir():
|
|
35
|
+
return p
|
|
36
|
+
return Path.cwd()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def read_version(root: Path) -> str:
|
|
40
|
+
"""Read the canonical version string from the VERSION file."""
|
|
41
|
+
vf = root / "VERSION"
|
|
42
|
+
return vf.read_text(encoding="utf-8").strip() if vf.is_file() else ""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def count_agents(root: Path) -> dict:
|
|
46
|
+
"""Count agent YAML files under departments/*/agents/ (recursive, to
|
|
47
|
+
include sub-squad nesting). Returns total files + unique slugs."""
|
|
48
|
+
dep = root / "departments"
|
|
49
|
+
files = [f for d in dep.glob("*/agents") if d.is_dir()
|
|
50
|
+
for f in d.rglob("*.yaml")] if dep.is_dir() else []
|
|
51
|
+
return {"files": len(files), "unique_slugs": len({f.name for f in files})}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def count_departments(root: Path) -> int:
|
|
55
|
+
"""Count department directories under departments/."""
|
|
56
|
+
dep = root / "departments"
|
|
57
|
+
return sum(1 for d in dep.iterdir() if d.is_dir()) if dep.is_dir() else 0
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def count_skills(root: Path) -> dict:
|
|
61
|
+
"""Count SKILL.md files by area. 'core' = departments + arka."""
|
|
62
|
+
def _n(rel: str) -> int:
|
|
63
|
+
base = root / rel
|
|
64
|
+
return len(list(base.rglob("SKILL.md"))) if base.is_dir() else 0
|
|
65
|
+
|
|
66
|
+
dept, arka, market = _n("departments"), _n("arka"), _n("marketplace")
|
|
67
|
+
return {"departments": dept, "arka": arka, "marketplace": market,
|
|
68
|
+
"core": dept + arka}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def count_adrs(root: Path) -> int:
|
|
72
|
+
"""Count Architecture Decision Records in docs/adr/."""
|
|
73
|
+
adr = root / "docs" / "adr"
|
|
74
|
+
return len(list(adr.glob("*.md"))) if adr.is_dir() else 0
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def count_test_functions(root: Path) -> int:
|
|
78
|
+
"""Static count of `def test_` / `async def test_` definitions in tests/."""
|
|
79
|
+
tdir = root / "tests"
|
|
80
|
+
if not tdir.is_dir():
|
|
81
|
+
return 0
|
|
82
|
+
return sum(len(_TEST_DEF_RE.findall(f.read_text(encoding="utf-8", errors="replace")))
|
|
83
|
+
for f in tdir.rglob("test_*.py"))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def collect_pytest_cases(root: Path) -> Optional[int]:
|
|
87
|
+
"""Authoritative pytest case count via --collect-only. None on failure."""
|
|
88
|
+
try:
|
|
89
|
+
out = subprocess.run(
|
|
90
|
+
[sys.executable, "-m", "pytest", "--collect-only", "-q"],
|
|
91
|
+
cwd=root, capture_output=True, text=True, timeout=300, check=False)
|
|
92
|
+
except (OSError, subprocess.SubprocessError):
|
|
93
|
+
return None
|
|
94
|
+
for line in reversed(out.stdout.splitlines()):
|
|
95
|
+
m = _COLLECTED_RE.search(line)
|
|
96
|
+
if m:
|
|
97
|
+
return int(m.group(1))
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def gather(root: Path, with_pytest: bool = False) -> dict:
|
|
102
|
+
"""Collect all documentation stats into a JSON-serialisable dict."""
|
|
103
|
+
tests = {"functions": count_test_functions(root)}
|
|
104
|
+
if with_pytest:
|
|
105
|
+
tests["collected"] = collect_pytest_cases(root)
|
|
106
|
+
return {
|
|
107
|
+
"version": read_version(root),
|
|
108
|
+
"agents": count_agents(root),
|
|
109
|
+
"departments": count_departments(root),
|
|
110
|
+
"skills": count_skills(root),
|
|
111
|
+
"adrs": count_adrs(root),
|
|
112
|
+
"tests": tests,
|
|
113
|
+
"root": str(root),
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def format_text(stats: dict) -> str:
|
|
118
|
+
"""Render a human-readable summary."""
|
|
119
|
+
a, s, t = stats["agents"], stats["skills"], stats["tests"]
|
|
120
|
+
lines = [
|
|
121
|
+
"=" * 52,
|
|
122
|
+
"ARKAOS DOCS STATS (canonical)",
|
|
123
|
+
"=" * 52,
|
|
124
|
+
f"Version: {stats['version']}",
|
|
125
|
+
f"Departments: {stats['departments']}",
|
|
126
|
+
f"Agents: {a['files']} files ({a['unique_slugs']} unique slugs)",
|
|
127
|
+
f"Skills (core): {s['core']} (departments {s['departments']} + arka {s['arka']})",
|
|
128
|
+
f" marketplace: {s['marketplace']}",
|
|
129
|
+
f"ADRs: {stats['adrs']}",
|
|
130
|
+
f"Test functions: {t['functions']}",
|
|
131
|
+
]
|
|
132
|
+
if "collected" in t:
|
|
133
|
+
lines.append(f"Test cases: {t['collected']} (pytest collected)")
|
|
134
|
+
lines.append("=" * 52)
|
|
135
|
+
return "\n".join(lines)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def main() -> int:
|
|
139
|
+
"""Entry point."""
|
|
140
|
+
parser = argparse.ArgumentParser(
|
|
141
|
+
description="ArkaOS docs stats -- canonical documentation counter")
|
|
142
|
+
parser.add_argument("--root", default=None, help="Repo root (default: auto-detect)")
|
|
143
|
+
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
144
|
+
parser.add_argument("--with-pytest", action="store_true",
|
|
145
|
+
help="Also collect authoritative pytest case count")
|
|
146
|
+
args = parser.parse_args()
|
|
147
|
+
root = Path(args.root).resolve() if args.root else repo_root()
|
|
148
|
+
stats = gather(root, with_pytest=args.with_pytest)
|
|
149
|
+
print(json.dumps(stats, indent=2) if args.json else format_text(stats))
|
|
150
|
+
return 0
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
if __name__ == "__main__":
|
|
154
|
+
sys.exit(main())
|