arkaos 3.78.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/config/agent-allowlists/laravel.yaml +1 -0
- package/config/agent-allowlists/node.yaml +1 -0
- package/config/agent-allowlists/nuxt.yaml +1 -0
- package/config/agent-allowlists/python.yaml +1 -0
- package/core/agents/__pycache__/registry_gen.cpython-313.pyc +0 -0
- package/core/agents/__pycache__/schema.cpython-313.pyc +0 -0
- package/core/agents/registry_gen.py +6 -1
- package/core/agents/schema.py +4 -0
- package/core/cognition/__pycache__/reorganizer.cpython-313.pyc +0 -0
- package/core/cognition/reorganizer.py +37 -7
- package/core/governance/__pycache__/design_system_lint.cpython-313.pyc +0 -0
- package/core/governance/__pycache__/design_system_lint_cli.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/agent_match.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/sources.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
- package/core/knowledge/agent_match.py +114 -0
- package/core/knowledge/chunker.py +45 -0
- package/core/knowledge/ingest.py +156 -78
- package/core/knowledge/sources.py +138 -0
- package/core/knowledge/vector_store.py +52 -0
- package/core/squads/__pycache__/loader.cpython-313.pyc +0 -0
- package/core/squads/loader.py +25 -0
- package/core/sync/__pycache__/agent_provisioner.cpython-313.pyc +0 -0
- package/core/sync/agent_provisioner.py +19 -8
- package/dashboard/app/components/KnowledgeSourcesList.vue +40 -13
- package/dashboard/app/pages/cognition.vue +9 -4
- package/dashboard/app/pages/knowledge/[id].vue +669 -0
- package/dashboard/app/pages/knowledge/index.vue +1281 -0
- package/dashboard/app/types/index.d.ts +1 -1
- package/departments/brand/agents/ux-designer.yaml +15 -1
- package/departments/brand/agents/ux-researcher.yaml +73 -0
- package/departments/brand/agents/ux-strategist.yaml +72 -0
- package/departments/dev/agents/ai-engineering/ai-engineering-lead.yaml +76 -0
- package/departments/dev/agents/architect.yaml +9 -3
- package/departments/dev/agents/backend-core/laravel-eng.yaml +76 -0
- package/departments/dev/agents/backend-core/node-ts-eng.yaml +76 -0
- package/departments/dev/agents/backend-core/python-eng.yaml +76 -0
- package/departments/dev/agents/backend-dev.yaml +10 -4
- package/departments/dev/agents/data-platform/etl-eng.yaml +74 -0
- package/departments/dev/agents/dba.yaml +7 -3
- package/departments/dev/references/backend-knowledge-and-tools.md +70 -0
- package/departments/ecom/agents/retention-manager.yaml +13 -1
- package/departments/leadership/agents/culture-coach.yaml +20 -0
- package/departments/leadership/agents/hr-specialist.yaml +18 -0
- package/departments/leadership/agents/leadership-director.yaml +10 -0
- package/departments/org/agents/chief-of-staff.yaml +76 -0
- package/departments/org/agents/coo.yaml +11 -0
- package/departments/org/agents/okr-steward.yaml +71 -0
- package/departments/org/agents/org-designer.yaml +23 -0
- package/departments/org/skills/okr-cadence/SKILL.md +34 -0
- package/departments/org/skills/principles-audit/SKILL.md +36 -0
- package/departments/pm/agents/pm-director.yaml +21 -8
- package/departments/pm/agents/product-owner.yaml +24 -2
- package/departments/pm/agents/scrum-master.yaml +21 -0
- package/departments/pm/agents/strategic-pm.yaml +72 -0
- package/departments/pm/skills/discovery-plan/SKILL.md +7 -1
- package/departments/quality/agents/cqo.yaml +8 -0
- package/departments/saas/agents/cs-manager.yaml +19 -2
- package/departments/saas/agents/growth-engineer.yaml +14 -1
- package/departments/saas/agents/metrics-analyst.yaml +17 -1
- package/departments/saas/agents/revops-lead.yaml +73 -0
- package/departments/saas/skills/leaky-bucket/SKILL.md +28 -0
- package/departments/saas/skills/voc-loop/SKILL.md +29 -0
- package/departments/sales/agents/sales-director.yaml +9 -0
- package/departments/sales/agents/sdr.yaml +72 -0
- package/departments/strategy/agents/decision-quality.yaml +72 -0
- package/departments/strategy/agents/strategy-director.yaml +13 -0
- package/departments/strategy/skills/premortem/SKILL.md +33 -0
- package/knowledge/agents-registry-v2.json +1218 -78
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
- package/scripts/dashboard-api.py +376 -13
- package/dashboard/app/pages/knowledge.vue +0 -918
package/package.json
CHANGED
package/pyproject.toml
CHANGED
|
Binary file
|
package/scripts/dashboard-api.py
CHANGED
|
@@ -22,6 +22,7 @@ sys.path.insert(0, str(ARKAOS_ROOT))
|
|
|
22
22
|
|
|
23
23
|
from fastapi import FastAPI, Query, Request, WebSocket, WebSocketDisconnect
|
|
24
24
|
from fastapi.middleware.cors import CORSMiddleware
|
|
25
|
+
from fastapi.responses import FileResponse, JSONResponse
|
|
25
26
|
|
|
26
27
|
app = FastAPI(title="ArkaOS Dashboard API", version="2.2.0")
|
|
27
28
|
|
|
@@ -66,7 +67,7 @@ async def ws_tasks(websocket: WebSocket):
|
|
|
66
67
|
|
|
67
68
|
app.add_middleware(
|
|
68
69
|
CORSMiddleware,
|
|
69
|
-
allow_origin_regex=r"http://localhost:\d
|
|
70
|
+
allow_origin_regex=r"^(http://localhost:\d+|chrome-extension://[a-p0-9]{32})$",
|
|
70
71
|
allow_methods=["GET", "POST", "PUT", "DELETE"],
|
|
71
72
|
allow_headers=["*"],
|
|
72
73
|
)
|
|
@@ -130,6 +131,23 @@ def _get_vector_store():
|
|
|
130
131
|
return None
|
|
131
132
|
|
|
132
133
|
|
|
134
|
+
_source_registry_cache = None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _get_source_registry():
|
|
138
|
+
"""Lazy singleton SourceRegistry over the shared knowledge.db."""
|
|
139
|
+
global _source_registry_cache
|
|
140
|
+
if _source_registry_cache is None:
|
|
141
|
+
try:
|
|
142
|
+
from core.knowledge.sources import SourceRegistry
|
|
143
|
+
db_path = Path.home() / ".arkaos" / "knowledge.db"
|
|
144
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
145
|
+
_source_registry_cache = SourceRegistry(db_path)
|
|
146
|
+
except Exception:
|
|
147
|
+
return None
|
|
148
|
+
return _source_registry_cache
|
|
149
|
+
|
|
150
|
+
|
|
133
151
|
# --- Endpoints ---
|
|
134
152
|
|
|
135
153
|
@app.get("/api/overview")
|
|
@@ -971,8 +989,10 @@ async def knowledge_upload_file(file: UploadFile):
|
|
|
971
989
|
media_dir = Path.home() / ".arkaos" / "media"
|
|
972
990
|
media_dir.mkdir(parents=True, exist_ok=True)
|
|
973
991
|
|
|
974
|
-
# Save uploaded file
|
|
975
|
-
file_path = media_dir
|
|
992
|
+
# Save uploaded file — sanitize filename to block path traversal
|
|
993
|
+
file_path = _safe_upload_path(media_dir, file.filename)
|
|
994
|
+
if file_path is None:
|
|
995
|
+
return {"error": "invalid filename"}
|
|
976
996
|
content = await file.read()
|
|
977
997
|
file_path.write_bytes(content)
|
|
978
998
|
|
|
@@ -995,13 +1015,14 @@ async def knowledge_upload_file(file: UploadFile):
|
|
|
995
1015
|
from core.jobs.manager import JobManager as _JM
|
|
996
1016
|
from core.knowledge.ingest import IngestEngine
|
|
997
1017
|
local_mgr = _JM()
|
|
998
|
-
engine = IngestEngine(store)
|
|
999
1018
|
def on_progress(pct, msg):
|
|
1000
1019
|
status = "embedding" if "embed" in msg.lower() or "index" in msg.lower() else "processing"
|
|
1001
1020
|
local_mgr.update_progress(job_id, pct, msg, status)
|
|
1002
1021
|
broadcast_from_thread({"type": "job_progress", "job_id": job_id, "progress": pct, "message": msg, "status": status})
|
|
1003
1022
|
try:
|
|
1004
1023
|
local_mgr.start(job_id)
|
|
1024
|
+
reg = _get_source_registry()
|
|
1025
|
+
engine = IngestEngine(store, registry=reg)
|
|
1005
1026
|
result = engine.ingest(source, source_type, on_progress=on_progress)
|
|
1006
1027
|
if result.success:
|
|
1007
1028
|
local_mgr.complete(job_id, chunks_created=result.chunks_created)
|
|
@@ -1064,7 +1085,6 @@ def knowledge_ingest(body: dict):
|
|
|
1064
1085
|
from core.jobs.manager import JobManager as _JM
|
|
1065
1086
|
local_mgr = _JM()
|
|
1066
1087
|
|
|
1067
|
-
engine = IngestEngine(store)
|
|
1068
1088
|
def on_progress(pct, msg):
|
|
1069
1089
|
status = "processing"
|
|
1070
1090
|
if "phase 2" in msg.lower() or "download" in msg.lower():
|
|
@@ -1086,6 +1106,8 @@ def knowledge_ingest(body: dict):
|
|
|
1086
1106
|
try:
|
|
1087
1107
|
local_mgr.start(job_id)
|
|
1088
1108
|
broadcast_from_thread({"type": "job_progress", "job_id": job_id, "progress": 0, "message": "Starting...", "status": "processing"})
|
|
1109
|
+
reg = _get_source_registry()
|
|
1110
|
+
engine = IngestEngine(store, registry=reg)
|
|
1089
1111
|
result = engine.ingest(source, source_type, on_progress=on_progress)
|
|
1090
1112
|
if result.success:
|
|
1091
1113
|
local_mgr.complete(job_id, chunks_created=result.chunks_created)
|
|
@@ -1201,20 +1223,69 @@ def knowledge_search(q: str = Query(...), top_k: int = Query(5)):
|
|
|
1201
1223
|
return {"results": results, "query": q, "total": len(results)}
|
|
1202
1224
|
|
|
1203
1225
|
|
|
1226
|
+
def _merge_source_rows(
|
|
1227
|
+
store_rows: list[dict], registry_rows: list[dict]
|
|
1228
|
+
) -> list[dict]:
|
|
1229
|
+
"""Union chunk-based + registry rows keyed by source string.
|
|
1230
|
+
|
|
1231
|
+
Every emitted row keeps the legacy ``source``/``chunks`` keys and adds
|
|
1232
|
+
``id`` (always linkable), ``title``, ``type``, ``has_media``,
|
|
1233
|
+
``duration`` and ``status``. A registry source with 0 chunks still
|
|
1234
|
+
appears. Sorted by chunks desc, then source asc.
|
|
1235
|
+
"""
|
|
1236
|
+
from core.knowledge.sources import source_id
|
|
1237
|
+
|
|
1238
|
+
by_source: dict[str, dict] = {}
|
|
1239
|
+
for r in store_rows:
|
|
1240
|
+
src = r.get("source", "")
|
|
1241
|
+
by_source[src] = {"source": src, "chunks": int(r.get("chunks", 0) or 0)}
|
|
1242
|
+
for reg in registry_rows:
|
|
1243
|
+
src = reg.get("source", "")
|
|
1244
|
+
row = by_source.setdefault(src, {"source": src, "chunks": 0})
|
|
1245
|
+
row.update(_registry_fields(reg))
|
|
1246
|
+
for src, row in by_source.items():
|
|
1247
|
+
row.setdefault("id", source_id(src))
|
|
1248
|
+
for key, default in (("title", ""), ("type", ""), ("has_media", False),
|
|
1249
|
+
("duration", 0), ("status", "")):
|
|
1250
|
+
row.setdefault(key, default)
|
|
1251
|
+
return sorted(by_source.values(), key=lambda r: (-r["chunks"], r["source"]))
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
def _registry_fields(reg: dict) -> dict:
|
|
1255
|
+
"""Project a registry row onto the list-row metadata keys."""
|
|
1256
|
+
from core.knowledge.sources import source_id
|
|
1257
|
+
|
|
1258
|
+
return {
|
|
1259
|
+
"id": reg.get("id") or source_id(reg.get("source", "")),
|
|
1260
|
+
"title": reg.get("title", "") or "",
|
|
1261
|
+
"type": reg.get("type", "") or "",
|
|
1262
|
+
"has_media": bool(reg.get("media_path")),
|
|
1263
|
+
"duration": reg.get("duration", 0) or 0,
|
|
1264
|
+
"status": reg.get("status", "") or "",
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
|
|
1204
1268
|
@app.get("/api/knowledge/sources")
|
|
1205
1269
|
def knowledge_list_sources():
|
|
1206
|
-
"""
|
|
1270
|
+
"""List every distinct source merged from vector store + registry.
|
|
1207
1271
|
|
|
1208
|
-
Returns ``{sources: [
|
|
1209
|
-
|
|
1272
|
+
Returns ``{sources: [...], total: N}``. Each row keeps the legacy
|
|
1273
|
+
``source``/``chunks`` keys and adds ``id``/``title``/``type``/
|
|
1274
|
+
``has_media``/``duration``/``status`` so the frontend can link each
|
|
1275
|
+
row to ``/knowledge/{id}``. Registry sources with 0 chunks appear too.
|
|
1210
1276
|
"""
|
|
1211
1277
|
store = _get_vector_store()
|
|
1212
|
-
|
|
1278
|
+
registry = _get_source_registry()
|
|
1279
|
+
store_rows: list[dict] = []
|
|
1280
|
+
if store:
|
|
1281
|
+
try:
|
|
1282
|
+
store_rows = store.list_sources()
|
|
1283
|
+
except Exception as exc: # noqa: BLE001
|
|
1284
|
+
return {"sources": [], "total": 0, "error": str(exc)}
|
|
1285
|
+
registry_rows = registry.list() if registry else []
|
|
1286
|
+
if not store and not registry:
|
|
1213
1287
|
return {"sources": [], "total": 0, "error": "vector store unavailable"}
|
|
1214
|
-
|
|
1215
|
-
rows = store.list_sources()
|
|
1216
|
-
except Exception as exc: # noqa: BLE001
|
|
1217
|
-
return {"sources": [], "total": 0, "error": str(exc)}
|
|
1288
|
+
rows = _merge_source_rows(store_rows, registry_rows)
|
|
1218
1289
|
return {"sources": rows, "total": len(rows)}
|
|
1219
1290
|
|
|
1220
1291
|
|
|
@@ -1244,6 +1315,298 @@ def knowledge_delete_source(source: str = Query(...)):
|
|
|
1244
1315
|
return {"deleted": int(deleted), "source": clean}
|
|
1245
1316
|
|
|
1246
1317
|
|
|
1318
|
+
def _source_str_for_id(source_id_: str) -> Optional[str]:
|
|
1319
|
+
"""Reverse-resolve the raw source string whose id matches ``source_id_``.
|
|
1320
|
+
|
|
1321
|
+
Cold path, O(n) over the vector store's distinct sources. Returns None
|
|
1322
|
+
when no chunk source matches (or the store is unavailable). Shared by
|
|
1323
|
+
``_detail_from_store`` and ``_resolve_transcript`` so the reverse-lookup
|
|
1324
|
+
lives in exactly one place.
|
|
1325
|
+
"""
|
|
1326
|
+
from core.knowledge.sources import source_id
|
|
1327
|
+
|
|
1328
|
+
store = _get_vector_store()
|
|
1329
|
+
if store is None:
|
|
1330
|
+
return None
|
|
1331
|
+
return next(
|
|
1332
|
+
(s for s in store.distinct_sources() if source_id(s) == source_id_),
|
|
1333
|
+
None,
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
def _resolve_transcript(source_id_: str) -> Optional[str]:
|
|
1338
|
+
"""Best-available transcript text for a source id, or None.
|
|
1339
|
+
|
|
1340
|
+
Resolution order:
|
|
1341
|
+
1. Registry row with a non-empty stored ``transcript`` -> that text.
|
|
1342
|
+
2. Else reconstruct from the vector store's chunks (legacy sources).
|
|
1343
|
+
3. Else None (no registry row and no chunk source matched the id).
|
|
1344
|
+
"""
|
|
1345
|
+
registry = _get_source_registry()
|
|
1346
|
+
row = registry.get(source_id_) if registry else None
|
|
1347
|
+
if row is not None and (row.get("transcript") or "").strip():
|
|
1348
|
+
return row["transcript"]
|
|
1349
|
+
match = _source_str_for_id(source_id_)
|
|
1350
|
+
if match is None:
|
|
1351
|
+
return None
|
|
1352
|
+
store = _get_vector_store()
|
|
1353
|
+
return store.transcript_for_source(match) if store else None
|
|
1354
|
+
|
|
1355
|
+
|
|
1356
|
+
def _detail_from_store(source_id_: str) -> Optional[dict]:
|
|
1357
|
+
"""Build a minimal detail dict for a chunks-only (pre-registry) source.
|
|
1358
|
+
|
|
1359
|
+
Reverse-looks-up the raw source string whose ``source_id`` matches the
|
|
1360
|
+
requested id, then returns a dict in the same shape the frontend
|
|
1361
|
+
expects — including a transcript reconstructed from the chunks. Returns
|
|
1362
|
+
None when no chunk source matches the id.
|
|
1363
|
+
"""
|
|
1364
|
+
from core.knowledge.ingest import detect_source_type
|
|
1365
|
+
|
|
1366
|
+
match = _source_str_for_id(source_id_)
|
|
1367
|
+
if match is None:
|
|
1368
|
+
return None
|
|
1369
|
+
store = _get_vector_store()
|
|
1370
|
+
chunks = store.chunks_for_source(match) if store else []
|
|
1371
|
+
transcript = store.transcript_for_source(match) if store else ""
|
|
1372
|
+
return {
|
|
1373
|
+
"id": source_id_, "source": match,
|
|
1374
|
+
"type": detect_source_type(match), "title": "", "duration": 0,
|
|
1375
|
+
"language": "", "thumbnail_path": "", "media_path": "",
|
|
1376
|
+
"transcript": transcript, "transcript_reconstructed": bool(transcript),
|
|
1377
|
+
"chunk_count": len(chunks), "status": "indexed",
|
|
1378
|
+
"error": "", "created_at": "", "updated_at": "", "chunks": chunks,
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
|
|
1382
|
+
@app.get("/api/knowledge/sources/{source_id}")
|
|
1383
|
+
def knowledge_source_detail(source_id: str):
|
|
1384
|
+
"""Return a single source's metadata plus its indexed chunks.
|
|
1385
|
+
|
|
1386
|
+
Registry row wins (enriched with chunks). When no registry row exists,
|
|
1387
|
+
falls back to the vector store so pre-registry / chunks-only sources
|
|
1388
|
+
still resolve instead of 404ing the list link.
|
|
1389
|
+
"""
|
|
1390
|
+
registry = _get_source_registry()
|
|
1391
|
+
row = registry.get(source_id) if registry else None
|
|
1392
|
+
if row is not None:
|
|
1393
|
+
row = dict(row)
|
|
1394
|
+
store = _get_vector_store()
|
|
1395
|
+
row["chunks"] = store.chunks_for_source(row["source"]) if store else []
|
|
1396
|
+
stored = (row.get("transcript") or "").strip()
|
|
1397
|
+
if not stored:
|
|
1398
|
+
row["transcript"] = (
|
|
1399
|
+
store.transcript_for_source(row["source"]) if store else ""
|
|
1400
|
+
)
|
|
1401
|
+
row["transcript_reconstructed"] = bool(row["transcript"])
|
|
1402
|
+
else:
|
|
1403
|
+
row["transcript_reconstructed"] = False
|
|
1404
|
+
return row
|
|
1405
|
+
fallback = _detail_from_store(source_id)
|
|
1406
|
+
if fallback is not None:
|
|
1407
|
+
return fallback
|
|
1408
|
+
return JSONResponse({"error": "not found"}, status_code=404)
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
@app.get("/api/knowledge/sources/{source_id}/transcript")
|
|
1412
|
+
def knowledge_source_transcript(source_id: str):
|
|
1413
|
+
"""Return the full transcript text for a source.
|
|
1414
|
+
|
|
1415
|
+
A stored registry transcript wins. Otherwise the transcript is
|
|
1416
|
+
reconstructed by joining the source's indexed chunks (legacy sources
|
|
1417
|
+
ingested before the registry have chunks but no stored transcript). The
|
|
1418
|
+
response carries ``reconstructed`` so the frontend can badge it.
|
|
1419
|
+
|
|
1420
|
+
404 only when the id matches nothing at all (no registry row and no
|
|
1421
|
+
chunk source). A known source with genuinely zero chunks returns an
|
|
1422
|
+
empty transcript (200) so the page shows "No transcript available."
|
|
1423
|
+
"""
|
|
1424
|
+
registry = _get_source_registry()
|
|
1425
|
+
row = registry.get(source_id) if registry else None
|
|
1426
|
+
stored = (row.get("transcript") or "").strip() if row else ""
|
|
1427
|
+
if stored:
|
|
1428
|
+
return {"transcript": row["transcript"], "reconstructed": False}
|
|
1429
|
+
match = _source_str_for_id(source_id)
|
|
1430
|
+
if row is None and match is None:
|
|
1431
|
+
return JSONResponse({"error": "not found"}, status_code=404)
|
|
1432
|
+
text = _resolve_transcript(source_id) or ""
|
|
1433
|
+
return {"transcript": text, "reconstructed": bool(text)}
|
|
1434
|
+
|
|
1435
|
+
|
|
1436
|
+
_AGENT_MATCH_TEXT_CAP = 4000 # representative sample for embedding; not full text
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
def _source_knowledge_text(source_id_: str) -> str:
|
|
1440
|
+
"""Best-available knowledge text for a source: title + transcript sample.
|
|
1441
|
+
|
|
1442
|
+
Prepends the registry title (when present) to a capped sample of the
|
|
1443
|
+
transcript. Falls back to joining the first few chunks when no
|
|
1444
|
+
transcript resolves. Returns "" when the source has no text at all.
|
|
1445
|
+
Read-only — never writes.
|
|
1446
|
+
"""
|
|
1447
|
+
registry = _get_source_registry()
|
|
1448
|
+
row = registry.get(source_id_) if registry else None
|
|
1449
|
+
title = str((row or {}).get("title") or "").strip()
|
|
1450
|
+
body = (_resolve_transcript(source_id_) or "").strip()
|
|
1451
|
+
if not body:
|
|
1452
|
+
match = _source_str_for_id(source_id_)
|
|
1453
|
+
store = _get_vector_store()
|
|
1454
|
+
if match and store:
|
|
1455
|
+
chunks = store.chunks_for_source(match)[:5]
|
|
1456
|
+
body = " ".join(str(c.get("text") or "") for c in chunks).strip()
|
|
1457
|
+
sample = body[:_AGENT_MATCH_TEXT_CAP]
|
|
1458
|
+
return (f"{title}\n{sample}".strip()) if title else sample
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
@app.get("/api/knowledge/sources/{source_id}/agent-matches")
|
|
1462
|
+
def knowledge_source_agent_matches(source_id: str, top_n: int = Query(5)):
|
|
1463
|
+
"""Suggest which agents should learn from this source (semantic match).
|
|
1464
|
+
|
|
1465
|
+
READ-ONLY. Resolves the source's knowledge text (title + transcript
|
|
1466
|
+
sample), embeds it against each agent's expertise profile, and returns
|
|
1467
|
+
the top matches. Degrades to ``{matches: [], reason}`` (200, never 500)
|
|
1468
|
+
when there is no source text or the embedder is unavailable.
|
|
1469
|
+
"""
|
|
1470
|
+
from core.knowledge import agent_match, embedder
|
|
1471
|
+
|
|
1472
|
+
text = _source_knowledge_text(source_id)
|
|
1473
|
+
if not text:
|
|
1474
|
+
return {"matches": [], "source_id": source_id, "count": 0, "reason": "no source text"}
|
|
1475
|
+
if not embedder.is_available():
|
|
1476
|
+
return {"matches": [], "source_id": source_id, "count": 0, "reason": "embedder unavailable"}
|
|
1477
|
+
matches = agent_match.match_agents(text, _load_agents(), top_n=min(top_n, 10))
|
|
1478
|
+
if not matches:
|
|
1479
|
+
return {"matches": [], "source_id": source_id, "count": 0, "reason": "embedder unavailable"}
|
|
1480
|
+
return {"matches": matches, "source_id": source_id, "count": len(matches)}
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
def _agent_matches_for_proposal(source_id_: str, text: str, body: Optional[dict]) -> list[dict]:
|
|
1484
|
+
"""Resolve the agents to include in a proposal: scoped ids or top matches."""
|
|
1485
|
+
from core.knowledge import agent_match
|
|
1486
|
+
|
|
1487
|
+
agents = _load_agents()
|
|
1488
|
+
matches = agent_match.match_agents(text, agents, top_n=10)
|
|
1489
|
+
ids = (body or {}).get("agent_ids") if isinstance(body, dict) else None
|
|
1490
|
+
if ids:
|
|
1491
|
+
wanted = {str(i) for i in ids}
|
|
1492
|
+
return [m for m in matches if m["id"] in wanted]
|
|
1493
|
+
return matches[:5]
|
|
1494
|
+
|
|
1495
|
+
|
|
1496
|
+
@app.post("/api/knowledge/sources/{source_id}/agent-proposal")
|
|
1497
|
+
def knowledge_source_agent_proposal(source_id: str, body: Optional[dict] = None):
|
|
1498
|
+
"""Generate a PROPOSE-ONLY markdown proposal of agents to update.
|
|
1499
|
+
|
|
1500
|
+
Body optional: ``{"agent_ids": [...]}`` scopes to specific agents;
|
|
1501
|
+
absent → top matches. Client identifiers are redacted via the
|
|
1502
|
+
reorganizer's shared ``redact_clients`` so nothing leaks. The ONLY
|
|
1503
|
+
write is the proposal markdown under
|
|
1504
|
+
``~/.arkaos/reorganize-proposals/`` — NEVER an agent YAML.
|
|
1505
|
+
"""
|
|
1506
|
+
from core.knowledge import embedder
|
|
1507
|
+
|
|
1508
|
+
text = _source_knowledge_text(source_id)
|
|
1509
|
+
if not text:
|
|
1510
|
+
return {"error": "no source text", "agents": 0}
|
|
1511
|
+
if not embedder.is_available():
|
|
1512
|
+
return {"error": "embedder unavailable", "agents": 0}
|
|
1513
|
+
matches = _agent_matches_for_proposal(source_id, text, body)
|
|
1514
|
+
registry = _get_source_registry()
|
|
1515
|
+
row = registry.get(source_id) if registry else None
|
|
1516
|
+
title = str((row or {}).get("title") or "").strip() or source_id
|
|
1517
|
+
markdown = _render_agent_proposal(source_id, title, matches)
|
|
1518
|
+
path = _write_agent_proposal(source_id, markdown)
|
|
1519
|
+
return {"proposal_path": str(path), "agents": len(matches)}
|
|
1520
|
+
|
|
1521
|
+
|
|
1522
|
+
def _render_agent_proposal(source_id_: str, title: str, matches: list[dict]) -> str:
|
|
1523
|
+
"""Render the propose-only markdown. Untrusted fields are redacted then escaped."""
|
|
1524
|
+
from core.cognition.reorganizer import md_escape, redact_clients
|
|
1525
|
+
|
|
1526
|
+
safe_title = md_escape(redact_clients(title))
|
|
1527
|
+
lines = [
|
|
1528
|
+
f"# Agent Attribution Proposal — {safe_title}",
|
|
1529
|
+
"",
|
|
1530
|
+
"> **PROPOSE-ONLY** — review and apply manually; this never edits agent files.",
|
|
1531
|
+
f"> Source: `{source_id_}`",
|
|
1532
|
+
"",
|
|
1533
|
+
"## Suggested agents",
|
|
1534
|
+
"",
|
|
1535
|
+
]
|
|
1536
|
+
if not matches:
|
|
1537
|
+
lines.append("_(no agent matches)_")
|
|
1538
|
+
else:
|
|
1539
|
+
lines.extend(_agent_proposal_line(m, md_escape) for m in matches)
|
|
1540
|
+
return redact_clients("\n".join(lines))
|
|
1541
|
+
|
|
1542
|
+
|
|
1543
|
+
def _agent_proposal_line(m: dict, escape) -> str:
|
|
1544
|
+
"""Render one suggested-agent bullet. matched_terms (untrusted) escaped inline."""
|
|
1545
|
+
terms = ", ".join(escape(t) for t in (m.get("matched_terms") or [])) or "n/a"
|
|
1546
|
+
return (
|
|
1547
|
+
f"- **{m.get('name', '')}** ({m.get('department', '')} — "
|
|
1548
|
+
f"{m.get('role', '')}) score: {m.get('score', 0)}; matched: {terms}"
|
|
1549
|
+
)
|
|
1550
|
+
|
|
1551
|
+
|
|
1552
|
+
def _write_agent_proposal(source_id_: str, markdown: str) -> Path:
|
|
1553
|
+
"""Atomic write to ~/.arkaos/reorganize-proposals/ with a stable name."""
|
|
1554
|
+
safe = "".join(c if c.isalnum() or c in "-_" else "-" for c in source_id_)[:64]
|
|
1555
|
+
out = Path.home() / ".arkaos" / "reorganize-proposals"
|
|
1556
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
1557
|
+
path = out / f"agent-attribution-{safe}.md"
|
|
1558
|
+
tmp = path.with_suffix(f".tmp-{os.getpid()}.md")
|
|
1559
|
+
tmp.write_text(markdown, encoding="utf-8")
|
|
1560
|
+
os.replace(tmp, path)
|
|
1561
|
+
return path
|
|
1562
|
+
|
|
1563
|
+
|
|
1564
|
+
@app.get("/api/knowledge/sources/{source_id}/media")
|
|
1565
|
+
def knowledge_source_media(source_id: str):
|
|
1566
|
+
"""Stream a source's media file with HTTP Range support."""
|
|
1567
|
+
path = _safe_media_path(source_id)
|
|
1568
|
+
if path is None:
|
|
1569
|
+
return JSONResponse({"error": "not found"}, status_code=404)
|
|
1570
|
+
return FileResponse(str(path))
|
|
1571
|
+
|
|
1572
|
+
|
|
1573
|
+
@app.get("/api/knowledge/sources/{source_id}/download")
|
|
1574
|
+
def knowledge_source_download(source_id: str):
|
|
1575
|
+
"""Download a source's media file as an attachment."""
|
|
1576
|
+
path = _safe_media_path(source_id)
|
|
1577
|
+
if path is None:
|
|
1578
|
+
return JSONResponse({"error": "not found"}, status_code=404)
|
|
1579
|
+
return FileResponse(
|
|
1580
|
+
str(path), filename=path.name,
|
|
1581
|
+
content_disposition_type="attachment",
|
|
1582
|
+
)
|
|
1583
|
+
|
|
1584
|
+
|
|
1585
|
+
def _safe_upload_path(media_dir: Path, filename: str) -> Optional[Path]:
|
|
1586
|
+
"""Resolve an upload target inside media_dir, blocking path traversal."""
|
|
1587
|
+
safe_name = Path(filename or "").name # strip any path components
|
|
1588
|
+
if not safe_name:
|
|
1589
|
+
return None
|
|
1590
|
+
file_path = (media_dir / safe_name).resolve()
|
|
1591
|
+
media_root = media_dir.resolve()
|
|
1592
|
+
if media_root not in file_path.parents and file_path != media_root:
|
|
1593
|
+
return None
|
|
1594
|
+
return file_path
|
|
1595
|
+
|
|
1596
|
+
|
|
1597
|
+
def _safe_media_path(source_id: str) -> Optional[Path]:
|
|
1598
|
+
"""Resolve a source's media path, guarding against path traversal."""
|
|
1599
|
+
registry = _get_source_registry()
|
|
1600
|
+
row = registry.get(source_id) if registry else None
|
|
1601
|
+
if row is None or not row["media_path"]:
|
|
1602
|
+
return None
|
|
1603
|
+
media_root = (Path.home() / ".arkaos" / "media").resolve()
|
|
1604
|
+
path = Path(row["media_path"]).resolve()
|
|
1605
|
+
if not path.exists() or media_root not in path.parents:
|
|
1606
|
+
return None
|
|
1607
|
+
return path
|
|
1608
|
+
|
|
1609
|
+
|
|
1247
1610
|
@app.get("/api/health")
|
|
1248
1611
|
def health():
|
|
1249
1612
|
"""PR70 v2.87.0 — per-check severity + response timestamp.
|