arkaos 3.78.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/VERSION +1 -1
  2. package/config/agent-allowlists/laravel.yaml +1 -0
  3. package/config/agent-allowlists/node.yaml +1 -0
  4. package/config/agent-allowlists/nuxt.yaml +1 -0
  5. package/config/agent-allowlists/python.yaml +1 -0
  6. package/core/agents/__pycache__/registry_gen.cpython-313.pyc +0 -0
  7. package/core/agents/__pycache__/schema.cpython-313.pyc +0 -0
  8. package/core/agents/registry_gen.py +6 -1
  9. package/core/agents/schema.py +4 -0
  10. package/core/cognition/__pycache__/reorganizer.cpython-313.pyc +0 -0
  11. package/core/cognition/reorganizer.py +37 -7
  12. package/core/governance/__pycache__/design_system_lint.cpython-313.pyc +0 -0
  13. package/core/governance/__pycache__/design_system_lint_cli.cpython-313.pyc +0 -0
  14. package/core/knowledge/__pycache__/agent_match.cpython-313.pyc +0 -0
  15. package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
  16. package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
  17. package/core/knowledge/__pycache__/sources.cpython-313.pyc +0 -0
  18. package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
  19. package/core/knowledge/agent_match.py +114 -0
  20. package/core/knowledge/chunker.py +45 -0
  21. package/core/knowledge/ingest.py +156 -78
  22. package/core/knowledge/sources.py +138 -0
  23. package/core/knowledge/vector_store.py +52 -0
  24. package/core/squads/__pycache__/loader.cpython-313.pyc +0 -0
  25. package/core/squads/loader.py +25 -0
  26. package/core/sync/__pycache__/agent_provisioner.cpython-313.pyc +0 -0
  27. package/core/sync/agent_provisioner.py +19 -8
  28. package/dashboard/app/components/KnowledgeSourcesList.vue +40 -13
  29. package/dashboard/app/pages/cognition.vue +9 -4
  30. package/dashboard/app/pages/knowledge/[id].vue +669 -0
  31. package/dashboard/app/pages/knowledge/index.vue +1281 -0
  32. package/dashboard/app/types/index.d.ts +1 -1
  33. package/departments/brand/agents/ux-designer.yaml +15 -1
  34. package/departments/brand/agents/ux-researcher.yaml +73 -0
  35. package/departments/brand/agents/ux-strategist.yaml +72 -0
  36. package/departments/dev/agents/ai-engineering/ai-engineering-lead.yaml +76 -0
  37. package/departments/dev/agents/architect.yaml +9 -3
  38. package/departments/dev/agents/backend-core/laravel-eng.yaml +76 -0
  39. package/departments/dev/agents/backend-core/node-ts-eng.yaml +76 -0
  40. package/departments/dev/agents/backend-core/python-eng.yaml +76 -0
  41. package/departments/dev/agents/backend-dev.yaml +10 -4
  42. package/departments/dev/agents/data-platform/etl-eng.yaml +74 -0
  43. package/departments/dev/agents/dba.yaml +7 -3
  44. package/departments/dev/references/backend-knowledge-and-tools.md +70 -0
  45. package/departments/ecom/agents/retention-manager.yaml +13 -1
  46. package/departments/leadership/agents/culture-coach.yaml +20 -0
  47. package/departments/leadership/agents/hr-specialist.yaml +18 -0
  48. package/departments/leadership/agents/leadership-director.yaml +10 -0
  49. package/departments/org/agents/chief-of-staff.yaml +76 -0
  50. package/departments/org/agents/coo.yaml +11 -0
  51. package/departments/org/agents/okr-steward.yaml +71 -0
  52. package/departments/org/agents/org-designer.yaml +23 -0
  53. package/departments/org/skills/okr-cadence/SKILL.md +34 -0
  54. package/departments/org/skills/principles-audit/SKILL.md +36 -0
  55. package/departments/pm/agents/pm-director.yaml +21 -8
  56. package/departments/pm/agents/product-owner.yaml +24 -2
  57. package/departments/pm/agents/scrum-master.yaml +21 -0
  58. package/departments/pm/agents/strategic-pm.yaml +72 -0
  59. package/departments/pm/skills/discovery-plan/SKILL.md +7 -1
  60. package/departments/quality/agents/cqo.yaml +8 -0
  61. package/departments/saas/agents/cs-manager.yaml +19 -2
  62. package/departments/saas/agents/growth-engineer.yaml +14 -1
  63. package/departments/saas/agents/metrics-analyst.yaml +17 -1
  64. package/departments/saas/agents/revops-lead.yaml +73 -0
  65. package/departments/saas/skills/leaky-bucket/SKILL.md +28 -0
  66. package/departments/saas/skills/voc-loop/SKILL.md +29 -0
  67. package/departments/sales/agents/sales-director.yaml +9 -0
  68. package/departments/sales/agents/sdr.yaml +72 -0
  69. package/departments/strategy/agents/decision-quality.yaml +72 -0
  70. package/departments/strategy/agents/strategy-director.yaml +13 -0
  71. package/departments/strategy/skills/premortem/SKILL.md +33 -0
  72. package/knowledge/agents-registry-v2.json +1218 -78
  73. package/package.json +1 -1
  74. package/pyproject.toml +1 -1
  75. package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
  76. package/scripts/dashboard-api.py +376 -13
  77. package/dashboard/app/pages/knowledge.vue +0 -918
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arkaos",
3
- "version": "3.78.0",
3
+ "version": "4.0.0",
4
4
  "description": "The Operating System for AI Agent Teams",
5
5
  "type": "module",
6
6
  "bin": {
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "arkaos-core"
3
- version = "3.78.0"
3
+ version = "4.0.0"
4
4
  description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -22,6 +22,7 @@ sys.path.insert(0, str(ARKAOS_ROOT))
22
22
 
23
23
  from fastapi import FastAPI, Query, Request, WebSocket, WebSocketDisconnect
24
24
  from fastapi.middleware.cors import CORSMiddleware
25
+ from fastapi.responses import FileResponse, JSONResponse
25
26
 
26
27
  app = FastAPI(title="ArkaOS Dashboard API", version="2.2.0")
27
28
 
@@ -66,7 +67,7 @@ async def ws_tasks(websocket: WebSocket):
66
67
 
67
68
  app.add_middleware(
68
69
  CORSMiddleware,
69
- allow_origin_regex=r"http://localhost:\d+",
70
+ allow_origin_regex=r"^(http://localhost:\d+|chrome-extension://[a-p0-9]{32})$",
70
71
  allow_methods=["GET", "POST", "PUT", "DELETE"],
71
72
  allow_headers=["*"],
72
73
  )
@@ -130,6 +131,23 @@ def _get_vector_store():
130
131
  return None
131
132
 
132
133
 
134
+ _source_registry_cache = None
135
+
136
+
137
+ def _get_source_registry():
138
+ """Lazy singleton SourceRegistry over the shared knowledge.db."""
139
+ global _source_registry_cache
140
+ if _source_registry_cache is None:
141
+ try:
142
+ from core.knowledge.sources import SourceRegistry
143
+ db_path = Path.home() / ".arkaos" / "knowledge.db"
144
+ db_path.parent.mkdir(parents=True, exist_ok=True)
145
+ _source_registry_cache = SourceRegistry(db_path)
146
+ except Exception:
147
+ return None
148
+ return _source_registry_cache
149
+
150
+
133
151
  # --- Endpoints ---
134
152
 
135
153
  @app.get("/api/overview")
@@ -971,8 +989,10 @@ async def knowledge_upload_file(file: UploadFile):
971
989
  media_dir = Path.home() / ".arkaos" / "media"
972
990
  media_dir.mkdir(parents=True, exist_ok=True)
973
991
 
974
- # Save uploaded file
975
- file_path = media_dir / file.filename
992
+ # Save uploaded file — sanitize filename to block path traversal
993
+ file_path = _safe_upload_path(media_dir, file.filename)
994
+ if file_path is None:
995
+ return {"error": "invalid filename"}
976
996
  content = await file.read()
977
997
  file_path.write_bytes(content)
978
998
 
@@ -995,13 +1015,14 @@ async def knowledge_upload_file(file: UploadFile):
995
1015
  from core.jobs.manager import JobManager as _JM
996
1016
  from core.knowledge.ingest import IngestEngine
997
1017
  local_mgr = _JM()
998
- engine = IngestEngine(store)
999
1018
  def on_progress(pct, msg):
1000
1019
  status = "embedding" if "embed" in msg.lower() or "index" in msg.lower() else "processing"
1001
1020
  local_mgr.update_progress(job_id, pct, msg, status)
1002
1021
  broadcast_from_thread({"type": "job_progress", "job_id": job_id, "progress": pct, "message": msg, "status": status})
1003
1022
  try:
1004
1023
  local_mgr.start(job_id)
1024
+ reg = _get_source_registry()
1025
+ engine = IngestEngine(store, registry=reg)
1005
1026
  result = engine.ingest(source, source_type, on_progress=on_progress)
1006
1027
  if result.success:
1007
1028
  local_mgr.complete(job_id, chunks_created=result.chunks_created)
@@ -1064,7 +1085,6 @@ def knowledge_ingest(body: dict):
1064
1085
  from core.jobs.manager import JobManager as _JM
1065
1086
  local_mgr = _JM()
1066
1087
 
1067
- engine = IngestEngine(store)
1068
1088
  def on_progress(pct, msg):
1069
1089
  status = "processing"
1070
1090
  if "phase 2" in msg.lower() or "download" in msg.lower():
@@ -1086,6 +1106,8 @@ def knowledge_ingest(body: dict):
1086
1106
  try:
1087
1107
  local_mgr.start(job_id)
1088
1108
  broadcast_from_thread({"type": "job_progress", "job_id": job_id, "progress": 0, "message": "Starting...", "status": "processing"})
1109
+ reg = _get_source_registry()
1110
+ engine = IngestEngine(store, registry=reg)
1089
1111
  result = engine.ingest(source, source_type, on_progress=on_progress)
1090
1112
  if result.success:
1091
1113
  local_mgr.complete(job_id, chunks_created=result.chunks_created)
@@ -1201,20 +1223,69 @@ def knowledge_search(q: str = Query(...), top_k: int = Query(5)):
1201
1223
  return {"results": results, "query": q, "total": len(results)}
1202
1224
 
1203
1225
 
1226
+ def _merge_source_rows(
1227
+ store_rows: list[dict], registry_rows: list[dict]
1228
+ ) -> list[dict]:
1229
+ """Union chunk-based + registry rows keyed by source string.
1230
+
1231
+ Every emitted row keeps the legacy ``source``/``chunks`` keys and adds
1232
+ ``id`` (always linkable), ``title``, ``type``, ``has_media``,
1233
+ ``duration`` and ``status``. A registry source with 0 chunks still
1234
+ appears. Sorted by chunks desc, then source asc.
1235
+ """
1236
+ from core.knowledge.sources import source_id
1237
+
1238
+ by_source: dict[str, dict] = {}
1239
+ for r in store_rows:
1240
+ src = r.get("source", "")
1241
+ by_source[src] = {"source": src, "chunks": int(r.get("chunks", 0) or 0)}
1242
+ for reg in registry_rows:
1243
+ src = reg.get("source", "")
1244
+ row = by_source.setdefault(src, {"source": src, "chunks": 0})
1245
+ row.update(_registry_fields(reg))
1246
+ for src, row in by_source.items():
1247
+ row.setdefault("id", source_id(src))
1248
+ for key, default in (("title", ""), ("type", ""), ("has_media", False),
1249
+ ("duration", 0), ("status", "")):
1250
+ row.setdefault(key, default)
1251
+ return sorted(by_source.values(), key=lambda r: (-r["chunks"], r["source"]))
1252
+
1253
+
1254
+ def _registry_fields(reg: dict) -> dict:
1255
+ """Project a registry row onto the list-row metadata keys."""
1256
+ from core.knowledge.sources import source_id
1257
+
1258
+ return {
1259
+ "id": reg.get("id") or source_id(reg.get("source", "")),
1260
+ "title": reg.get("title", "") or "",
1261
+ "type": reg.get("type", "") or "",
1262
+ "has_media": bool(reg.get("media_path")),
1263
+ "duration": reg.get("duration", 0) or 0,
1264
+ "status": reg.get("status", "") or "",
1265
+ }
1266
+
1267
+
1204
1268
  @app.get("/api/knowledge/sources")
1205
1269
  def knowledge_list_sources():
1206
- """PR88c v3.25.0 — list every distinct source + chunk count.
1270
+ """List every distinct source merged from vector store + registry.
1207
1271
 
1208
- Returns ``{sources: [{source, chunks}], total: N}``. Sorted
1209
- descending by chunk count.
1272
+ Returns ``{sources: [...], total: N}``. Each row keeps the legacy
1273
+ ``source``/``chunks`` keys and adds ``id``/``title``/``type``/
1274
+ ``has_media``/``duration``/``status`` so the frontend can link each
1275
+ row to ``/knowledge/{id}``. Registry sources with 0 chunks appear too.
1210
1276
  """
1211
1277
  store = _get_vector_store()
1212
- if not store:
1278
+ registry = _get_source_registry()
1279
+ store_rows: list[dict] = []
1280
+ if store:
1281
+ try:
1282
+ store_rows = store.list_sources()
1283
+ except Exception as exc: # noqa: BLE001
1284
+ return {"sources": [], "total": 0, "error": str(exc)}
1285
+ registry_rows = registry.list() if registry else []
1286
+ if not store and not registry:
1213
1287
  return {"sources": [], "total": 0, "error": "vector store unavailable"}
1214
- try:
1215
- rows = store.list_sources()
1216
- except Exception as exc: # noqa: BLE001
1217
- return {"sources": [], "total": 0, "error": str(exc)}
1288
+ rows = _merge_source_rows(store_rows, registry_rows)
1218
1289
  return {"sources": rows, "total": len(rows)}
1219
1290
 
1220
1291
 
@@ -1244,6 +1315,298 @@ def knowledge_delete_source(source: str = Query(...)):
1244
1315
  return {"deleted": int(deleted), "source": clean}
1245
1316
 
1246
1317
 
1318
+ def _source_str_for_id(source_id_: str) -> Optional[str]:
1319
+ """Reverse-resolve the raw source string whose id matches ``source_id_``.
1320
+
1321
+ Cold path, O(n) over the vector store's distinct sources. Returns None
1322
+ when no chunk source matches (or the store is unavailable). Shared by
1323
+ ``_detail_from_store`` and ``_resolve_transcript`` so the reverse-lookup
1324
+ lives in exactly one place.
1325
+ """
1326
+ from core.knowledge.sources import source_id
1327
+
1328
+ store = _get_vector_store()
1329
+ if store is None:
1330
+ return None
1331
+ return next(
1332
+ (s for s in store.distinct_sources() if source_id(s) == source_id_),
1333
+ None,
1334
+ )
1335
+
1336
+
1337
+ def _resolve_transcript(source_id_: str) -> Optional[str]:
1338
+ """Best-available transcript text for a source id, or None.
1339
+
1340
+ Resolution order:
1341
+ 1. Registry row with a non-empty stored ``transcript`` -> that text.
1342
+ 2. Else reconstruct from the vector store's chunks (legacy sources).
1343
+ 3. Else None (no registry row and no chunk source matched the id).
1344
+ """
1345
+ registry = _get_source_registry()
1346
+ row = registry.get(source_id_) if registry else None
1347
+ if row is not None and (row.get("transcript") or "").strip():
1348
+ return row["transcript"]
1349
+ match = _source_str_for_id(source_id_)
1350
+ if match is None:
1351
+ return None
1352
+ store = _get_vector_store()
1353
+ return store.transcript_for_source(match) if store else None
1354
+
1355
+
1356
+ def _detail_from_store(source_id_: str) -> Optional[dict]:
1357
+ """Build a minimal detail dict for a chunks-only (pre-registry) source.
1358
+
1359
+ Reverse-looks-up the raw source string whose ``source_id`` matches the
1360
+ requested id, then returns a dict in the same shape the frontend
1361
+ expects — including a transcript reconstructed from the chunks. Returns
1362
+ None when no chunk source matches the id.
1363
+ """
1364
+ from core.knowledge.ingest import detect_source_type
1365
+
1366
+ match = _source_str_for_id(source_id_)
1367
+ if match is None:
1368
+ return None
1369
+ store = _get_vector_store()
1370
+ chunks = store.chunks_for_source(match) if store else []
1371
+ transcript = store.transcript_for_source(match) if store else ""
1372
+ return {
1373
+ "id": source_id_, "source": match,
1374
+ "type": detect_source_type(match), "title": "", "duration": 0,
1375
+ "language": "", "thumbnail_path": "", "media_path": "",
1376
+ "transcript": transcript, "transcript_reconstructed": bool(transcript),
1377
+ "chunk_count": len(chunks), "status": "indexed",
1378
+ "error": "", "created_at": "", "updated_at": "", "chunks": chunks,
1379
+ }
1380
+
1381
+
1382
+ @app.get("/api/knowledge/sources/{source_id}")
1383
+ def knowledge_source_detail(source_id: str):
1384
+ """Return a single source's metadata plus its indexed chunks.
1385
+
1386
+ Registry row wins (enriched with chunks). When no registry row exists,
1387
+ falls back to the vector store so pre-registry / chunks-only sources
1388
+ still resolve instead of 404ing the list link.
1389
+ """
1390
+ registry = _get_source_registry()
1391
+ row = registry.get(source_id) if registry else None
1392
+ if row is not None:
1393
+ row = dict(row)
1394
+ store = _get_vector_store()
1395
+ row["chunks"] = store.chunks_for_source(row["source"]) if store else []
1396
+ stored = (row.get("transcript") or "").strip()
1397
+ if not stored:
1398
+ row["transcript"] = (
1399
+ store.transcript_for_source(row["source"]) if store else ""
1400
+ )
1401
+ row["transcript_reconstructed"] = bool(row["transcript"])
1402
+ else:
1403
+ row["transcript_reconstructed"] = False
1404
+ return row
1405
+ fallback = _detail_from_store(source_id)
1406
+ if fallback is not None:
1407
+ return fallback
1408
+ return JSONResponse({"error": "not found"}, status_code=404)
1409
+
1410
+
1411
+ @app.get("/api/knowledge/sources/{source_id}/transcript")
1412
+ def knowledge_source_transcript(source_id: str):
1413
+ """Return the full transcript text for a source.
1414
+
1415
+ A stored registry transcript wins. Otherwise the transcript is
1416
+ reconstructed by joining the source's indexed chunks (legacy sources
1417
+ ingested before the registry have chunks but no stored transcript). The
1418
+ response carries ``reconstructed`` so the frontend can badge it.
1419
+
1420
+ 404 only when the id matches nothing at all (no registry row and no
1421
+ chunk source). A known source with genuinely zero chunks returns an
1422
+ empty transcript (200) so the page shows "No transcript available."
1423
+ """
1424
+ registry = _get_source_registry()
1425
+ row = registry.get(source_id) if registry else None
1426
+ stored = (row.get("transcript") or "").strip() if row else ""
1427
+ if stored:
1428
+ return {"transcript": row["transcript"], "reconstructed": False}
1429
+ match = _source_str_for_id(source_id)
1430
+ if row is None and match is None:
1431
+ return JSONResponse({"error": "not found"}, status_code=404)
1432
+ text = _resolve_transcript(source_id) or ""
1433
+ return {"transcript": text, "reconstructed": bool(text)}
1434
+
1435
+
1436
+ _AGENT_MATCH_TEXT_CAP = 4000 # representative sample for embedding; not full text
1437
+
1438
+
1439
+ def _source_knowledge_text(source_id_: str) -> str:
1440
+ """Best-available knowledge text for a source: title + transcript sample.
1441
+
1442
+ Prepends the registry title (when present) to a capped sample of the
1443
+ transcript. Falls back to joining the first few chunks when no
1444
+ transcript resolves. Returns "" when the source has no text at all.
1445
+ Read-only — never writes.
1446
+ """
1447
+ registry = _get_source_registry()
1448
+ row = registry.get(source_id_) if registry else None
1449
+ title = str((row or {}).get("title") or "").strip()
1450
+ body = (_resolve_transcript(source_id_) or "").strip()
1451
+ if not body:
1452
+ match = _source_str_for_id(source_id_)
1453
+ store = _get_vector_store()
1454
+ if match and store:
1455
+ chunks = store.chunks_for_source(match)[:5]
1456
+ body = " ".join(str(c.get("text") or "") for c in chunks).strip()
1457
+ sample = body[:_AGENT_MATCH_TEXT_CAP]
1458
+ return (f"{title}\n{sample}".strip()) if title else sample
1459
+
1460
+
1461
+ @app.get("/api/knowledge/sources/{source_id}/agent-matches")
1462
+ def knowledge_source_agent_matches(source_id: str, top_n: int = Query(5)):
1463
+ """Suggest which agents should learn from this source (semantic match).
1464
+
1465
+ READ-ONLY. Resolves the source's knowledge text (title + transcript
1466
+ sample), embeds it against each agent's expertise profile, and returns
1467
+ the top matches. Degrades to ``{matches: [], reason}`` (200, never 500)
1468
+ when there is no source text or the embedder is unavailable.
1469
+ """
1470
+ from core.knowledge import agent_match, embedder
1471
+
1472
+ text = _source_knowledge_text(source_id)
1473
+ if not text:
1474
+ return {"matches": [], "source_id": source_id, "count": 0, "reason": "no source text"}
1475
+ if not embedder.is_available():
1476
+ return {"matches": [], "source_id": source_id, "count": 0, "reason": "embedder unavailable"}
1477
+ matches = agent_match.match_agents(text, _load_agents(), top_n=min(top_n, 10))
1478
+ if not matches:
1479
+ return {"matches": [], "source_id": source_id, "count": 0, "reason": "embedder unavailable"}
1480
+ return {"matches": matches, "source_id": source_id, "count": len(matches)}
1481
+
1482
+
1483
+ def _agent_matches_for_proposal(source_id_: str, text: str, body: Optional[dict]) -> list[dict]:
1484
+ """Resolve the agents to include in a proposal: scoped ids or top matches."""
1485
+ from core.knowledge import agent_match
1486
+
1487
+ agents = _load_agents()
1488
+ matches = agent_match.match_agents(text, agents, top_n=10)
1489
+ ids = (body or {}).get("agent_ids") if isinstance(body, dict) else None
1490
+ if ids:
1491
+ wanted = {str(i) for i in ids}
1492
+ return [m for m in matches if m["id"] in wanted]
1493
+ return matches[:5]
1494
+
1495
+
1496
+ @app.post("/api/knowledge/sources/{source_id}/agent-proposal")
1497
+ def knowledge_source_agent_proposal(source_id: str, body: Optional[dict] = None):
1498
+ """Generate a PROPOSE-ONLY markdown proposal of agents to update.
1499
+
1500
+ Body optional: ``{"agent_ids": [...]}`` scopes to specific agents;
1501
+ absent → top matches. Client identifiers are redacted via the
1502
+ reorganizer's shared ``redact_clients`` so nothing leaks. The ONLY
1503
+ write is the proposal markdown under
1504
+ ``~/.arkaos/reorganize-proposals/`` — NEVER an agent YAML.
1505
+ """
1506
+ from core.knowledge import embedder
1507
+
1508
+ text = _source_knowledge_text(source_id)
1509
+ if not text:
1510
+ return {"error": "no source text", "agents": 0}
1511
+ if not embedder.is_available():
1512
+ return {"error": "embedder unavailable", "agents": 0}
1513
+ matches = _agent_matches_for_proposal(source_id, text, body)
1514
+ registry = _get_source_registry()
1515
+ row = registry.get(source_id) if registry else None
1516
+ title = str((row or {}).get("title") or "").strip() or source_id
1517
+ markdown = _render_agent_proposal(source_id, title, matches)
1518
+ path = _write_agent_proposal(source_id, markdown)
1519
+ return {"proposal_path": str(path), "agents": len(matches)}
1520
+
1521
+
1522
+ def _render_agent_proposal(source_id_: str, title: str, matches: list[dict]) -> str:
1523
+ """Render the propose-only markdown. Untrusted fields are redacted then escaped."""
1524
+ from core.cognition.reorganizer import md_escape, redact_clients
1525
+
1526
+ safe_title = md_escape(redact_clients(title))
1527
+ lines = [
1528
+ f"# Agent Attribution Proposal — {safe_title}",
1529
+ "",
1530
+ "> **PROPOSE-ONLY** — review and apply manually; this never edits agent files.",
1531
+ f"> Source: `{source_id_}`",
1532
+ "",
1533
+ "## Suggested agents",
1534
+ "",
1535
+ ]
1536
+ if not matches:
1537
+ lines.append("_(no agent matches)_")
1538
+ else:
1539
+ lines.extend(_agent_proposal_line(m, md_escape) for m in matches)
1540
+ return redact_clients("\n".join(lines))
1541
+
1542
+
1543
+ def _agent_proposal_line(m: dict, escape) -> str:
1544
+ """Render one suggested-agent bullet. matched_terms (untrusted) escaped inline."""
1545
+ terms = ", ".join(escape(t) for t in (m.get("matched_terms") or [])) or "n/a"
1546
+ return (
1547
+ f"- **{m.get('name', '')}** ({m.get('department', '')} — "
1548
+ f"{m.get('role', '')}) score: {m.get('score', 0)}; matched: {terms}"
1549
+ )
1550
+
1551
+
1552
+ def _write_agent_proposal(source_id_: str, markdown: str) -> Path:
1553
+ """Atomic write to ~/.arkaos/reorganize-proposals/ with a stable name."""
1554
+ safe = "".join(c if c.isalnum() or c in "-_" else "-" for c in source_id_)[:64]
1555
+ out = Path.home() / ".arkaos" / "reorganize-proposals"
1556
+ out.mkdir(parents=True, exist_ok=True)
1557
+ path = out / f"agent-attribution-{safe}.md"
1558
+ tmp = path.with_suffix(f".tmp-{os.getpid()}.md")
1559
+ tmp.write_text(markdown, encoding="utf-8")
1560
+ os.replace(tmp, path)
1561
+ return path
1562
+
1563
+
1564
+ @app.get("/api/knowledge/sources/{source_id}/media")
1565
+ def knowledge_source_media(source_id: str):
1566
+ """Stream a source's media file with HTTP Range support."""
1567
+ path = _safe_media_path(source_id)
1568
+ if path is None:
1569
+ return JSONResponse({"error": "not found"}, status_code=404)
1570
+ return FileResponse(str(path))
1571
+
1572
+
1573
+ @app.get("/api/knowledge/sources/{source_id}/download")
1574
+ def knowledge_source_download(source_id: str):
1575
+ """Download a source's media file as an attachment."""
1576
+ path = _safe_media_path(source_id)
1577
+ if path is None:
1578
+ return JSONResponse({"error": "not found"}, status_code=404)
1579
+ return FileResponse(
1580
+ str(path), filename=path.name,
1581
+ content_disposition_type="attachment",
1582
+ )
1583
+
1584
+
1585
+ def _safe_upload_path(media_dir: Path, filename: str) -> Optional[Path]:
1586
+ """Resolve an upload target inside media_dir, blocking path traversal."""
1587
+ safe_name = Path(filename or "").name # strip any path components
1588
+ if not safe_name:
1589
+ return None
1590
+ file_path = (media_dir / safe_name).resolve()
1591
+ media_root = media_dir.resolve()
1592
+ if media_root not in file_path.parents and file_path != media_root:
1593
+ return None
1594
+ return file_path
1595
+
1596
+
1597
+ def _safe_media_path(source_id: str) -> Optional[Path]:
1598
+ """Resolve a source's media path, guarding against path traversal."""
1599
+ registry = _get_source_registry()
1600
+ row = registry.get(source_id) if registry else None
1601
+ if row is None or not row["media_path"]:
1602
+ return None
1603
+ media_root = (Path.home() / ".arkaos" / "media").resolve()
1604
+ path = Path(row["media_path"]).resolve()
1605
+ if not path.exists() or media_root not in path.parents:
1606
+ return None
1607
+ return path
1608
+
1609
+
1247
1610
  @app.get("/api/health")
1248
1611
  def health():
1249
1612
  """PR70 v2.87.0 — per-check severity + response timestamp.