agent-devkit 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.md +66 -13
  2. package/bin/agent.mjs +133 -7
  3. package/package.json +1 -1
  4. package/runtime/README.md +205 -13
  5. package/runtime/agent +31 -5
  6. package/runtime/agents/README.md +18 -0
  7. package/runtime/agents/contribution-reviewer/AGENTS.md +8 -0
  8. package/runtime/agents/contribution-reviewer/README.md +8 -0
  9. package/runtime/agents/contribution-reviewer/agent.yaml +40 -0
  10. package/runtime/agents/contribution-reviewer/capabilities/plan-contribution-pr/capability.yaml +27 -0
  11. package/runtime/agents/contribution-reviewer/capabilities/plan-contribution-pr/decision-rules.md +5 -0
  12. package/runtime/agents/contribution-reviewer/capabilities/plan-contribution-pr/workflow.md +6 -0
  13. package/runtime/agents/contribution-reviewer/capabilities/review-contribution/capability.yaml +25 -0
  14. package/runtime/agents/contribution-reviewer/capabilities/review-contribution/decision-rules.md +5 -0
  15. package/runtime/agents/contribution-reviewer/capabilities/review-contribution/workflow.md +5 -0
  16. package/runtime/agents/contribution-reviewer/capabilities/validate-local-contribution/capability.yaml +26 -0
  17. package/runtime/agents/contribution-reviewer/capabilities/validate-local-contribution/decision-rules.md +5 -0
  18. package/runtime/agents/contribution-reviewer/capabilities/validate-local-contribution/workflow.md +6 -0
  19. package/runtime/agents/contribution-reviewer/infra/README.md +6 -0
  20. package/runtime/agents/contribution-reviewer/knowledge/context.md +8 -0
  21. package/runtime/agents/contribution-reviewer/knowledge/system.md +8 -0
  22. package/runtime/agents/contribution-reviewer/templates/README.md +3 -0
  23. package/runtime/agents/knowledge-author/AGENTS.md +7 -0
  24. package/runtime/agents/knowledge-author/README.md +7 -0
  25. package/runtime/agents/knowledge-author/agent.yaml +37 -0
  26. package/runtime/agents/knowledge-author/capabilities/create-knowledge-snapshot/capability.yaml +30 -0
  27. package/runtime/agents/knowledge-author/capabilities/create-knowledge-snapshot/decision-rules.md +6 -0
  28. package/runtime/agents/knowledge-author/capabilities/create-knowledge-snapshot/workflow.md +7 -0
  29. package/runtime/agents/knowledge-author/infra/.gitkeep +1 -0
  30. package/runtime/agents/knowledge-author/knowledge/context.md +4 -0
  31. package/runtime/agents/knowledge-author/knowledge/system.md +4 -0
  32. package/runtime/agents/knowledge-author/templates/.gitkeep +1 -0
  33. package/runtime/agents/knowledge-curator/AGENTS.md +7 -0
  34. package/runtime/agents/knowledge-curator/README.md +6 -0
  35. package/runtime/agents/knowledge-curator/agent.yaml +37 -0
  36. package/runtime/agents/knowledge-curator/capabilities/curate-knowledge-base/capability.yaml +29 -0
  37. package/runtime/agents/knowledge-curator/capabilities/curate-knowledge-base/decision-rules.md +6 -0
  38. package/runtime/agents/knowledge-curator/capabilities/curate-knowledge-base/workflow.md +7 -0
  39. package/runtime/agents/knowledge-curator/infra/.gitkeep +1 -0
  40. package/runtime/agents/knowledge-curator/knowledge/context.md +4 -0
  41. package/runtime/agents/knowledge-curator/knowledge/system.md +4 -0
  42. package/runtime/agents/knowledge-curator/templates/.gitkeep +1 -0
  43. package/runtime/agents/knowledge-infra-builder/AGENTS.md +8 -0
  44. package/runtime/agents/knowledge-infra-builder/README.md +8 -0
  45. package/runtime/agents/knowledge-infra-builder/agent.yaml +38 -0
  46. package/runtime/agents/knowledge-infra-builder/capabilities/create-knowledge-base/capability.yaml +30 -0
  47. package/runtime/agents/knowledge-infra-builder/capabilities/create-knowledge-base/decision-rules.md +6 -0
  48. package/runtime/agents/knowledge-infra-builder/capabilities/create-knowledge-base/workflow.md +7 -0
  49. package/runtime/agents/knowledge-infra-builder/infra/.gitkeep +1 -0
  50. package/runtime/agents/knowledge-infra-builder/knowledge/context.md +4 -0
  51. package/runtime/agents/knowledge-infra-builder/knowledge/system.md +4 -0
  52. package/runtime/agents/knowledge-infra-builder/templates/.gitkeep +1 -0
  53. package/runtime/agents/knowledge-owner/AGENTS.md +7 -0
  54. package/runtime/agents/knowledge-owner/README.md +6 -0
  55. package/runtime/agents/knowledge-owner/agent.yaml +37 -0
  56. package/runtime/agents/knowledge-owner/capabilities/publish-knowledge-snapshot/capability.yaml +28 -0
  57. package/runtime/agents/knowledge-owner/capabilities/publish-knowledge-snapshot/decision-rules.md +6 -0
  58. package/runtime/agents/knowledge-owner/capabilities/publish-knowledge-snapshot/workflow.md +7 -0
  59. package/runtime/agents/knowledge-owner/infra/.gitkeep +1 -0
  60. package/runtime/agents/knowledge-owner/knowledge/context.md +4 -0
  61. package/runtime/agents/knowledge-owner/knowledge/system.md +4 -0
  62. package/runtime/agents/knowledge-owner/templates/.gitkeep +1 -0
  63. package/runtime/agents/knowledge-reviewer/AGENTS.md +7 -0
  64. package/runtime/agents/knowledge-reviewer/README.md +7 -0
  65. package/runtime/agents/knowledge-reviewer/agent.yaml +36 -0
  66. package/runtime/agents/knowledge-reviewer/capabilities/review-knowledge-snapshot/capability.yaml +26 -0
  67. package/runtime/agents/knowledge-reviewer/capabilities/review-knowledge-snapshot/decision-rules.md +6 -0
  68. package/runtime/agents/knowledge-reviewer/capabilities/review-knowledge-snapshot/workflow.md +7 -0
  69. package/runtime/agents/knowledge-reviewer/infra/.gitkeep +1 -0
  70. package/runtime/agents/knowledge-reviewer/knowledge/context.md +4 -0
  71. package/runtime/agents/knowledge-reviewer/knowledge/system.md +4 -0
  72. package/runtime/agents/knowledge-reviewer/templates/.gitkeep +1 -0
  73. package/runtime/agents/local-memory-manager/AGENTS.md +5 -0
  74. package/runtime/agents/local-memory-manager/README.md +7 -0
  75. package/runtime/agents/local-memory-manager/agent.yaml +38 -0
  76. package/runtime/agents/local-memory-manager/capabilities/curate-local-memory/capability.yaml +19 -0
  77. package/runtime/agents/local-memory-manager/capabilities/curate-local-memory/decision-rules.md +5 -0
  78. package/runtime/agents/local-memory-manager/capabilities/curate-local-memory/workflow.md +6 -0
  79. package/runtime/agents/local-memory-manager/capabilities/inspect-local-memory/capability.yaml +19 -0
  80. package/runtime/agents/local-memory-manager/capabilities/inspect-local-memory/decision-rules.md +5 -0
  81. package/runtime/agents/local-memory-manager/capabilities/inspect-local-memory/workflow.md +5 -0
  82. package/runtime/agents/local-memory-manager/infra/.gitkeep +1 -0
  83. package/runtime/agents/local-memory-manager/knowledge/context.md +4 -0
  84. package/runtime/agents/local-memory-manager/knowledge/system.md +4 -0
  85. package/runtime/agents/local-memory-manager/templates/.gitkeep +1 -0
  86. package/runtime/agents/memory-sync-manager/AGENTS.md +7 -0
  87. package/runtime/agents/memory-sync-manager/README.md +7 -0
  88. package/runtime/agents/memory-sync-manager/agent.yaml +37 -0
  89. package/runtime/agents/memory-sync-manager/capabilities/plan-memory-backup/capability.yaml +29 -0
  90. package/runtime/agents/memory-sync-manager/capabilities/plan-memory-backup/decision-rules.md +6 -0
  91. package/runtime/agents/memory-sync-manager/capabilities/plan-memory-backup/workflow.md +7 -0
  92. package/runtime/agents/memory-sync-manager/infra/.gitkeep +1 -0
  93. package/runtime/agents/memory-sync-manager/knowledge/context.md +4 -0
  94. package/runtime/agents/memory-sync-manager/knowledge/system.md +4 -0
  95. package/runtime/agents/memory-sync-manager/templates/.gitkeep +1 -0
  96. package/runtime/agents/shared-memory-curator/AGENTS.md +5 -0
  97. package/runtime/agents/shared-memory-curator/README.md +6 -0
  98. package/runtime/agents/shared-memory-curator/agent.yaml +38 -0
  99. package/runtime/agents/shared-memory-curator/capabilities/create-shared-memory/capability.yaml +19 -0
  100. package/runtime/agents/shared-memory-curator/capabilities/create-shared-memory/decision-rules.md +5 -0
  101. package/runtime/agents/shared-memory-curator/capabilities/create-shared-memory/workflow.md +5 -0
  102. package/runtime/agents/shared-memory-curator/capabilities/publish-shared-submission/capability.yaml +19 -0
  103. package/runtime/agents/shared-memory-curator/capabilities/publish-shared-submission/decision-rules.md +5 -0
  104. package/runtime/agents/shared-memory-curator/capabilities/publish-shared-submission/workflow.md +5 -0
  105. package/runtime/agents/shared-memory-curator/capabilities/review-shared-submission/capability.yaml +19 -0
  106. package/runtime/agents/shared-memory-curator/capabilities/review-shared-submission/decision-rules.md +5 -0
  107. package/runtime/agents/shared-memory-curator/capabilities/review-shared-submission/workflow.md +5 -0
  108. package/runtime/agents/shared-memory-curator/infra/.gitkeep +1 -0
  109. package/runtime/agents/shared-memory-curator/knowledge/context.md +5 -0
  110. package/runtime/agents/shared-memory-curator/knowledge/system.md +4 -0
  111. package/runtime/agents/shared-memory-curator/templates/.gitkeep +1 -0
  112. package/runtime/cli/README.md +47 -8
  113. package/runtime/cli/aikit/__init__.py +1 -1
  114. package/runtime/cli/aikit/agent_registry.py +4 -2
  115. package/runtime/cli/aikit/agentic_commands.py +158 -0
  116. package/runtime/cli/aikit/app_home.py +2 -0
  117. package/runtime/cli/aikit/audit.py +16 -6
  118. package/runtime/cli/aikit/catalog.py +278 -8
  119. package/runtime/cli/aikit/cli_dispatch.py +489 -13
  120. package/runtime/cli/aikit/cli_parser.py +146 -8
  121. package/runtime/cli/aikit/contribution.py +132 -2
  122. package/runtime/cli/aikit/doctor_runtime.py +85 -0
  123. package/runtime/cli/aikit/embedded_mini_brain.py +351 -0
  124. package/runtime/cli/aikit/eval.py +356 -10
  125. package/runtime/cli/aikit/human_output.py +310 -4
  126. package/runtime/cli/aikit/interactive_wizard.py +146 -0
  127. package/runtime/cli/aikit/knowledge_base.py +1067 -0
  128. package/runtime/cli/aikit/llm.py +40 -6
  129. package/runtime/cli/aikit/local_artifacts.py +444 -0
  130. package/runtime/cli/aikit/local_llm.py +176 -0
  131. package/runtime/cli/aikit/local_llm_operator.py +15 -5
  132. package/runtime/cli/aikit/main.py +15 -0
  133. package/runtime/cli/aikit/mcp_manifest.py +798 -0
  134. package/runtime/cli/aikit/mcp_tools.py +643 -5
  135. package/runtime/cli/aikit/memory.py +405 -0
  136. package/runtime/cli/aikit/mini_brain.py +56 -25
  137. package/runtime/cli/aikit/model_router.py +42 -9
  138. package/runtime/cli/aikit/natural_prompt_runtime.py +194 -2
  139. package/runtime/cli/aikit/ollama.py +64 -15
  140. package/runtime/cli/aikit/onboarding.py +551 -0
  141. package/runtime/cli/aikit/output.py +67 -0
  142. package/runtime/cli/aikit/prompt_injection.py +12 -1
  143. package/runtime/cli/aikit/review_gate.py +14 -2
  144. package/runtime/cli/aikit/roadmap_cli.py +1 -1
  145. package/runtime/cli/aikit/secrets.py +3 -2
  146. package/runtime/cli/aikit/setup_wizard_payload.py +3 -0
  147. package/runtime/cli/aikit/shared_memory.py +415 -0
  148. package/runtime/cli/aikit/specialist_readiness.py +152 -0
  149. package/runtime/cli/aikit/tasks.py +104 -1
  150. package/runtime/cli/aikit/team.py +380 -0
  151. package/runtime/cli/aikit/toolchain.py +7 -2
  152. package/runtime/cli/aikit/workflows.py +115 -14
  153. package/runtime/models/qwen2.5-0.5b-instruct/manifest.json +30 -0
  154. package/runtime/providers/knowledge-github.yaml +40 -0
  155. package/runtime/providers/knowledge-google-drive.yaml +32 -0
  156. package/runtime/providers/knowledge-local.yaml +26 -0
  157. package/runtime/providers/knowledge-notion.yaml +32 -0
  158. package/runtime/providers/knowledge-obsidian.yaml +24 -0
  159. package/runtime/providers/knowledge-onedrive.yaml +36 -0
  160. package/runtime/providers/knowledge-s3.yaml +45 -0
  161. package/runtime/providers/knowledge-sharepoint.yaml +39 -0
  162. package/runtime/providers/knowledge-supabase.yaml +43 -0
  163. package/runtime/providers/knowledge-vector.yaml +39 -0
  164. package/runtime/requirements.txt +6 -0
  165. package/runtime/scripts/docker-cli-qa.sh +453 -0
  166. package/runtime/scripts/release-catalog-snapshot.json +55 -4
  167. package/runtime/scripts/release-gate.py +54 -13
  168. package/runtime/tooling/toolchain.yaml +92 -0
  169. package/runtime/vendor/skills/napkin/napkin.md +21 -7
  170. package/runtime/workflows/azure-card-analysis/README.md +3 -0
  171. package/runtime/workflows/azure-card-analysis/workflow.yaml +30 -0
  172. package/runtime/workflows/daily-pr-review/README.md +3 -0
  173. package/runtime/workflows/daily-pr-review/workflow.yaml +31 -0
  174. package/runtime/workflows/incident-analysis/README.md +3 -0
  175. package/runtime/workflows/incident-analysis/workflow.yaml +33 -0
  176. package/runtime/workflows/release-prep/README.md +3 -0
  177. package/runtime/workflows/release-prep/workflow.yaml +30 -0
@@ -0,0 +1,1067 @@
1
+ """File-first shared knowledge base commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import hashlib
7
+ import re
8
+ import secrets
9
+ import shutil
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from cli.aikit.errors import DevKitError
15
+ from cli.aikit.memory import redact_secrets
16
+ from cli.aikit.prompt_injection import external_content_block
17
+
18
+
19
+ KNOWLEDGE_SCHEMA_VERSION = "agent-devkit.knowledge/v1"
20
+ KNOWLEDGE_BASE_SCHEMA_VERSION = "agent-devkit.knowledge-base/v1"
21
+ DEFAULT_KB_DIR = "knowledge-base"
22
+ TOKEN_SCOPES = ("read", "contribute", "review", "admin", "owner")
23
+ KNOWLEDGE_PROVIDER_ALIASES = {
24
+ "local": "knowledge-local",
25
+ "filesystem": "knowledge-local",
26
+ "github": "knowledge-github",
27
+ "s3": "knowledge-s3",
28
+ "supabase": "knowledge-supabase",
29
+ "google-drive": "knowledge-google-drive",
30
+ "drive": "knowledge-google-drive",
31
+ "sharepoint": "knowledge-sharepoint",
32
+ "onedrive": "knowledge-onedrive",
33
+ "notion": "knowledge-notion",
34
+ "obsidian": "knowledge-obsidian",
35
+ "vector": "knowledge-vector",
36
+ }
37
+ KNOWLEDGE_PROVIDERS = set(KNOWLEDGE_PROVIDER_ALIASES.values())
38
+ ENTRY_DIRS = (
39
+ "runbooks",
40
+ "troubleshooting",
41
+ "api-docs",
42
+ "architecture-decisions",
43
+ "incident-learnings",
44
+ "automation-patterns",
45
+ "provider-configs",
46
+ )
47
+ SECRET_PATTERN = re.compile(r"(token|secret|password|api[_-]?key|pat)\s*[:=]\s*\S+", re.IGNORECASE)
48
+ PII_PATTERN = re.compile(r"\b\d{3}\.\d{3}\.\d{3}-\d{2}\b|\b[\w.+-]+@[\w.-]+\.[A-Za-z]{2,}\b")
49
+ PERSONAL_MEMORY_PATTERN = re.compile(
50
+ r"\b(meu nome|minha personalidade|minha prefer[eê]ncia|minhas prefer[eê]ncias|"
51
+ r"prefiro|gosto de ser chamado|me chame de|chame-me de|responda para mim)\b",
52
+ re.IGNORECASE,
53
+ )
54
+ CONVERSATIONAL_TOKENS = {
55
+ "bom",
56
+ "dia",
57
+ "boa",
58
+ "tarde",
59
+ "noite",
60
+ "ok",
61
+ "obrigado",
62
+ "obrigada",
63
+ "valeu",
64
+ "beleza",
65
+ "perfeito",
66
+ "entendi",
67
+ "sim",
68
+ "nao",
69
+ "não",
70
+ "thanks",
71
+ "thank",
72
+ "you",
73
+ "hello",
74
+ "hi",
75
+ }
76
+ REUSABLE_KNOWLEDGE_TOKENS = {
77
+ "api",
78
+ "arquitetura",
79
+ "automacao",
80
+ "automation",
81
+ "comando",
82
+ "configuracao",
83
+ "configuração",
84
+ "decisao",
85
+ "decisão",
86
+ "deploy",
87
+ "diagnostico",
88
+ "diagnóstico",
89
+ "docker",
90
+ "erro",
91
+ "incident",
92
+ "incidente",
93
+ "incidentes",
94
+ "integracao",
95
+ "integração",
96
+ "knowledge",
97
+ "padrao",
98
+ "padrão",
99
+ "passo",
100
+ "procedimento",
101
+ "provider",
102
+ "qa",
103
+ "reutilizavel",
104
+ "reutilizável",
105
+ "reusable",
106
+ "runbook",
107
+ "solucao",
108
+ "solução",
109
+ "support",
110
+ "teste",
111
+ "troubleshooting",
112
+ "workflow",
113
+ }
114
+
115
+
116
+ def knowledge_init(project: Path | None = None, *, force: bool = False) -> dict[str, Any]:
117
+ root = knowledge_root(project)
118
+ manifest = root / "kb.yaml"
119
+ if manifest.exists() and not force:
120
+ return knowledge_doctor(project)
121
+ create_structure(root)
122
+ write_yaml(manifest, default_manifest())
123
+ (root / "README.md").write_text("# Knowledge Base\n\nFile-first shared Agent DevKit knowledge base.\n", encoding="utf-8")
124
+ for policy in ("contribution-policy", "review-policy", "retention-policy", "security-policy"):
125
+ (root / "policies" / f"{policy}.md").write_text(f"# {policy.replace('-', ' ').title()}\n\nDraft policy.\n", encoding="utf-8")
126
+ rebuild_lexical_index(root)
127
+ return {
128
+ "kind": "knowledge",
129
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
130
+ "status": "initialized",
131
+ "path": str(root),
132
+ "manifest": str(manifest),
133
+ }
134
+
135
+
136
+ def knowledge_base_create(project: Path | None = None, *, provider: str | None = None, force: bool = False) -> dict[str, Any]:
137
+ init = knowledge_init(project, force=force)
138
+ root = knowledge_root(project)
139
+ manifest = read_manifest(root)
140
+ provider_id = normalize_knowledge_provider(provider or (manifest.get("storage") or {}).get("provider") or "local")
141
+ manifest["schema_version"] = KNOWLEDGE_BASE_SCHEMA_VERSION
142
+ manifest["storage"] = {"provider": provider_id, "location": DEFAULT_KB_DIR}
143
+ manifest["permissions"] = token_permissions()
144
+ manifest["updated_at"] = now_iso()
145
+ write_yaml(root / "kb.yaml", manifest)
146
+ tokens = ensure_token_refs(root)
147
+ return {
148
+ "kind": "knowledge-base",
149
+ "schema_version": KNOWLEDGE_BASE_SCHEMA_VERSION,
150
+ "status": "created" if init.get("status") == "initialized" else "ok",
151
+ "path": str(root),
152
+ "kb": public_manifest(manifest),
153
+ "tokens": public_tokens(tokens),
154
+ "stored_values": False,
155
+ }
156
+
157
+
158
+ def knowledge_base_join(
159
+ kb_id: str | None,
160
+ project: Path | None = None,
161
+ *,
162
+ provider: str | None = None,
163
+ force: bool = False,
164
+ ) -> dict[str, Any]:
165
+ if not kb_id:
166
+ raise DevKitError("knowledge-base join requires a kb_id")
167
+ payload = knowledge_base_create(project, provider=provider, force=force)
168
+ root = knowledge_root(project)
169
+ manifest = read_manifest(root)
170
+ manifest["kb_id"] = kb_id
171
+ manifest["updated_at"] = now_iso()
172
+ manifest["sync"] = {
173
+ "mode": "local-config-only",
174
+ "remote_connected": False,
175
+ "requires_token": True,
176
+ }
177
+ write_yaml(root / "kb.yaml", manifest)
178
+ return {
179
+ **payload,
180
+ "status": "joined",
181
+ "kb": public_manifest(manifest),
182
+ "remote_connected": False,
183
+ "next_steps": [
184
+ "Configure provider credentials by reference before remote sync.",
185
+ "Use `agent knowledge-base tokens` to inspect required token refs without exposing values.",
186
+ ],
187
+ }
188
+
189
+
190
+ def knowledge_base_status(project: Path | None = None) -> dict[str, Any]:
191
+ root = require_knowledge_root(project)
192
+ manifest = read_manifest(root)
193
+ tokens = ensure_token_refs(root)
194
+ doctor = knowledge_doctor(project)
195
+ return {
196
+ "kind": "knowledge-base",
197
+ "schema_version": KNOWLEDGE_BASE_SCHEMA_VERSION,
198
+ "status": "ok" if doctor.get("status") == "ok" else "blocked",
199
+ "path": str(root),
200
+ "kb": public_manifest(manifest),
201
+ "tokens": public_tokens(tokens),
202
+ "checks": doctor.get("checks") or [],
203
+ "stored_values": False,
204
+ }
205
+
206
+
207
+ def knowledge_base_tokens(project: Path | None = None) -> dict[str, Any]:
208
+ root = require_knowledge_root(project)
209
+ tokens = ensure_token_refs(root)
210
+ return {
211
+ "kind": "knowledge-base-tokens",
212
+ "schema_version": KNOWLEDGE_BASE_SCHEMA_VERSION,
213
+ "status": "ok",
214
+ "path": str(tokens_path(root)),
215
+ "tokens": public_tokens(tokens),
216
+ "stored_values": False,
217
+ }
218
+
219
+
220
+ def knowledge_base_rotate_token(scope: str | None, project: Path | None = None) -> dict[str, Any]:
221
+ token_scope = require_scope(scope)
222
+ root = require_knowledge_root(project)
223
+ tokens = ensure_token_refs(root)
224
+ tokens["tokens"][token_scope] = token_ref(token_scope)
225
+ tokens["updated_at"] = now_iso()
226
+ write_json(tokens_path(root), tokens)
227
+ return {
228
+ "kind": "knowledge-base-token",
229
+ "schema_version": KNOWLEDGE_BASE_SCHEMA_VERSION,
230
+ "status": "rotated",
231
+ "scope": token_scope,
232
+ "token": public_token(token_scope, tokens["tokens"][token_scope]),
233
+ "stored_values": False,
234
+ }
235
+
236
+
237
+ def knowledge_doctor(project: Path | None = None) -> dict[str, Any]:
238
+ root = knowledge_root(project)
239
+ checks = [
240
+ {"id": "kb-root-exists", "status": "passed" if root.exists() else "failed"},
241
+ {"id": "manifest-exists", "status": "passed" if (root / "kb.yaml").exists() else "failed"},
242
+ {"id": "entries-dir-exists", "status": "passed" if (root / "entries").exists() else "failed"},
243
+ {"id": "snapshots-dir-exists", "status": "passed" if (root / "snapshots" / "pending").exists() else "failed"},
244
+ {"id": "lexical-index-exists", "status": "passed" if (root / "indexes" / "lexical.json").exists() else "failed"},
245
+ {"id": "semantic-index-manifest-exists", "status": "passed" if (root / "indexes" / "semantic.json").exists() else "failed"},
246
+ {"id": "chunks-index-exists", "status": "passed" if (root / "indexes" / "chunks.jsonl").exists() else "failed"},
247
+ ]
248
+ findings = scan_tree(root) if root.exists() else []
249
+ checks.append({"id": "no-secret-or-pii", "status": "passed" if not findings else "failed"})
250
+ return {
251
+ "kind": "knowledge-doctor",
252
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
253
+ "status": "ok" if all(check["status"] == "passed" for check in checks) else "blocked",
254
+ "path": str(root),
255
+ "checks": checks,
256
+ "findings": findings[:20],
257
+ }
258
+
259
+
260
+ def knowledge_search(query: str | None, project: Path | None = None) -> dict[str, Any]:
261
+ if not query:
262
+ raise DevKitError("knowledge search requires a query")
263
+ root = require_knowledge_root(project)
264
+ index_path = root / "indexes" / "lexical.json"
265
+ if not index_path.exists():
266
+ rebuild_lexical_index(root)
267
+ index = json.loads(index_path.read_text(encoding="utf-8"))
268
+ tokens = tokenize(query)
269
+ results = []
270
+ for item in index.get("items") or []:
271
+ item_tokens = set(item.get("tokens") or [])
272
+ score = len(tokens & item_tokens)
273
+ if score:
274
+ results.append({"path": item.get("path"), "title": item.get("title"), "score": score})
275
+ results.sort(key=lambda item: (-int(item["score"]), str(item["path"])))
276
+ return {
277
+ "kind": "knowledge-search",
278
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
279
+ "status": "ok",
280
+ "query": query,
281
+ "count": len(results),
282
+ "items": results[:20],
283
+ }
284
+
285
+
286
+ def knowledge_index(project: Path | None = None) -> dict[str, Any]:
287
+ root = require_knowledge_root(project)
288
+ path = rebuild_lexical_index(root)
289
+ payload = json.loads(path.read_text(encoding="utf-8"))
290
+ return {
291
+ "kind": "knowledge-index",
292
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
293
+ "status": "rebuilt",
294
+ "path": str(path),
295
+ "count": len(payload.get("items") or []),
296
+ }
297
+
298
+
299
+ def knowledge_snapshot_list(project: Path | None = None) -> dict[str, Any]:
300
+ root = require_knowledge_root(project)
301
+ items = []
302
+ for bucket in ("pending", "accepted", "rejected"):
303
+ folder = root / "snapshots" / bucket
304
+ for path in sorted(folder.glob("*.md")):
305
+ items.append(snapshot_item(root, path, bucket))
306
+ return {
307
+ "kind": "knowledge-snapshots",
308
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
309
+ "status": "ok",
310
+ "path": str(root / "snapshots"),
311
+ "count": len(items),
312
+ "items": items,
313
+ }
314
+
315
+
316
+ def knowledge_snapshot_show(snapshot_id: str | None, project: Path | None = None) -> dict[str, Any]:
317
+ root = require_knowledge_root(project)
318
+ path, bucket = find_snapshot(root, snapshot_id)
319
+ text = path.read_text(encoding="utf-8", errors="replace")
320
+ return {
321
+ "kind": "knowledge-snapshot",
322
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
323
+ "status": bucket,
324
+ "snapshot_id": path.stem,
325
+ "path": str(path),
326
+ "snapshot": snapshot_item(root, path, bucket),
327
+ "content": text,
328
+ }
329
+
330
+
331
+ def knowledge_snapshot_score(snapshot_id: str | None, project: Path | None = None) -> dict[str, Any]:
332
+ root = require_knowledge_root(project)
333
+ path, bucket = find_snapshot(root, snapshot_id)
334
+ text = path.read_text(encoding="utf-8", errors="replace")
335
+ metadata = read_snapshot_metadata(root, bucket, path.stem)
336
+ metadata_findings = metadata.get("findings") if isinstance(metadata.get("findings"), list) else []
337
+ findings = [
338
+ *metadata_findings,
339
+ *scan_text(text),
340
+ *knowledge_policy_findings(text),
341
+ *duplicate_snapshot_findings(root, path.stem, text),
342
+ ]
343
+ block = external_content_block(f"knowledge-snapshot:{path.stem}", "markdown", text)
344
+ if block["severity"] != "none":
345
+ findings.append({"reason": "prompt-injection", "severity": block["severity"], "markers": block["detected_injection_markers"]})
346
+ tokens = tokenize(text)
347
+ positive = min(60, len(tokens))
348
+ penalties = len(findings) * 25
349
+ score = max(0, min(100, 40 + positive - penalties))
350
+ decision = "blocked" if findings else ("review" if score < 70 else "submit")
351
+ return {
352
+ "kind": "knowledge-snapshot-score",
353
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
354
+ "status": "ok",
355
+ "snapshot_id": path.stem,
356
+ "bucket": bucket,
357
+ "score": score,
358
+ "decision": decision,
359
+ "findings": findings,
360
+ "prompt_injection": {
361
+ "severity": block["severity"],
362
+ "markers": block["detected_injection_markers"],
363
+ },
364
+ }
365
+
366
+
367
+ def knowledge_snapshot_create(
368
+ *,
369
+ title: str | None,
370
+ content: str | None,
371
+ from_file: str | None,
372
+ entry_type: str | None,
373
+ project: Path | None = None,
374
+ ) -> dict[str, Any]:
375
+ root = require_knowledge_root(project)
376
+ raw_content = snapshot_content(content, from_file)
377
+ if not title:
378
+ raise DevKitError("knowledge snapshot create requires --title")
379
+ findings = scan_text(raw_content)
380
+ block = external_content_block("knowledge-snapshot", "markdown", raw_content)
381
+ snapshot_id = slugify(title)
382
+ path = root / "snapshots" / "pending" / f"{snapshot_id}.md"
383
+ if path.exists():
384
+ snapshot_id = f"{snapshot_id}-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}"
385
+ path = root / "snapshots" / "pending" / f"{snapshot_id}.md"
386
+ frontmatter = {
387
+ "snapshot_id": snapshot_id,
388
+ "title": title,
389
+ "type": entry_type or "runbook",
390
+ "created_at": datetime.now(timezone.utc).isoformat(),
391
+ "status": "pending",
392
+ "prompt_injection_severity": block["severity"],
393
+ }
394
+ sanitized_content = sanitize_snapshot_content(raw_content)
395
+ body = render_snapshot(frontmatter, sanitized_content)
396
+ path.write_text(body, encoding="utf-8")
397
+ write_json(
398
+ snapshot_metadata_path(root, "pending", snapshot_id),
399
+ {
400
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
401
+ "snapshot_id": snapshot_id,
402
+ "title": title,
403
+ "type": entry_type or "runbook",
404
+ "status": "pending",
405
+ "findings": findings,
406
+ "prompt_injection": {
407
+ "severity": block["severity"],
408
+ "markers": block["detected_injection_markers"],
409
+ },
410
+ "created_at": frontmatter["created_at"],
411
+ },
412
+ )
413
+ return {
414
+ "kind": "knowledge-snapshot",
415
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
416
+ "status": "created",
417
+ "snapshot_id": snapshot_id,
418
+ "path": str(path),
419
+ "findings": findings,
420
+ "prompt_injection": {
421
+ "severity": block["severity"],
422
+ "markers": block["detected_injection_markers"],
423
+ },
424
+ "review_required": True,
425
+ }
426
+
427
+
428
+ def knowledge_snapshot_submit(snapshot_id: str | None, project: Path | None = None) -> dict[str, Any]:
429
+ root = require_knowledge_root(project)
430
+ path = snapshot_path(root, "pending", require_snapshot_id(snapshot_id))
431
+ score = knowledge_snapshot_score(path.stem, project)
432
+ if score["decision"] == "blocked":
433
+ return {
434
+ "kind": "knowledge-snapshot-submit",
435
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
436
+ "status": "blocked",
437
+ "snapshot_id": path.stem,
438
+ "score": score,
439
+ "reason": "snapshot_score_blocked",
440
+ }
441
+ return {
442
+ "kind": "knowledge-snapshot-submit",
443
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
444
+ "status": "pending-review",
445
+ "snapshot_id": path.stem,
446
+ "path": str(path),
447
+ "score": score,
448
+ "remote_connected": False,
449
+ "review_required": True,
450
+ "next_steps": [f"Review with `agent knowledge review {path.stem}`."],
451
+ }
452
+
453
+
454
+ def knowledge_review_list(project: Path | None = None) -> dict[str, Any]:
455
+ root = require_knowledge_root(project)
456
+ items = []
457
+ for bucket in ("pending", "approved", "rejected"):
458
+ folder = root / "reviews" / bucket
459
+ for path in sorted(folder.glob("*.json")):
460
+ try:
461
+ payload = json.loads(path.read_text(encoding="utf-8"))
462
+ except json.JSONDecodeError:
463
+ continue
464
+ if isinstance(payload, dict):
465
+ items.append({"path": str(path.relative_to(root)), "bucket": bucket, **payload})
466
+ return {
467
+ "kind": "knowledge-reviews",
468
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
469
+ "status": "ok",
470
+ "path": str(root / "reviews"),
471
+ "count": len(items),
472
+ "items": items,
473
+ }
474
+
475
+
476
+ def knowledge_review(snapshot_id: str | None, project: Path | None = None, *, persist: bool = True) -> dict[str, Any]:
477
+ if not snapshot_id:
478
+ raise DevKitError("knowledge review requires a snapshot id")
479
+ root = require_knowledge_root(project)
480
+ path = snapshot_path(root, "pending", snapshot_id)
481
+ text = path.read_text(encoding="utf-8")
482
+ metadata = read_snapshot_metadata(root, "pending", path.stem)
483
+ metadata_findings = metadata.get("findings") if isinstance(metadata.get("findings"), list) else []
484
+ findings = [
485
+ *metadata_findings,
486
+ *scan_text(text),
487
+ *knowledge_policy_findings(text),
488
+ *duplicate_snapshot_findings(root, path.stem, text),
489
+ ]
490
+ block = external_content_block(f"knowledge-snapshot:{snapshot_id}", "markdown", text)
491
+ if block["severity"] != "none":
492
+ findings.append({"reason": "prompt-injection", "severity": block["severity"], "markers": block["detected_injection_markers"]})
493
+ passed = not findings
494
+ review_payload = {
495
+ "snapshot_id": snapshot_id,
496
+ "status": "approved" if passed else "rejected",
497
+ "findings": findings,
498
+ "prompt_injection": {
499
+ "severity": block["severity"],
500
+ "markers": block["detected_injection_markers"],
501
+ },
502
+ "reviewed_at": datetime.now(timezone.utc).isoformat(),
503
+ }
504
+ review_path = None
505
+ if persist:
506
+ review_dir = root / "reviews" / ("approved" if passed else "rejected")
507
+ review_dir.mkdir(parents=True, exist_ok=True)
508
+ review_path = review_dir / f"{snapshot_id}.json"
509
+ review_path.write_text(json.dumps(review_payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
510
+ write_knowledge_audit(
511
+ root,
512
+ event="review",
513
+ snapshot_id=snapshot_id,
514
+ decision=review_payload["status"],
515
+ actor="knowledge-reviewer",
516
+ content=text,
517
+ findings=findings,
518
+ metadata={
519
+ "review_path": str(review_path.relative_to(root)),
520
+ "prompt_injection": review_payload["prompt_injection"],
521
+ },
522
+ )
523
+ return {
524
+ "kind": "knowledge-review",
525
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
526
+ "status": "approved" if passed else "rejected",
527
+ "snapshot_id": snapshot_id,
528
+ "review": review_payload,
529
+ "persisted": persist,
530
+ "path": str(review_path) if review_path else None,
531
+ }
532
+
533
+
534
+ def knowledge_curate(project: Path | None = None) -> dict[str, Any]:
535
+ root = require_knowledge_root(project)
536
+ snapshot_items = knowledge_snapshot_list(project)["items"]
537
+ titles: dict[str, list[dict[str, Any]]] = {}
538
+ for item in snapshot_items:
539
+ title = str(item.get("title") or item.get("snapshot_id") or "").strip().lower()
540
+ if title:
541
+ titles.setdefault(title, []).append(item)
542
+ findings = [
543
+ {
544
+ "reason": "duplicate-title",
545
+ "title": title,
546
+ "paths": [str(item.get("path")) for item in items],
547
+ }
548
+ for title, items in sorted(titles.items())
549
+ if len(items) > 1
550
+ ]
551
+ index = knowledge_index(project)
552
+ return {
553
+ "kind": "knowledge-curation",
554
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
555
+ "status": "ok",
556
+ "path": str(root),
557
+ "findings": findings,
558
+ "index": {"status": index["status"], "count": index["count"]},
559
+ "next_steps": ["Review duplicate or obsolete entries before publishing curatorial changes."],
560
+ }
561
+
562
+
563
+ def knowledge_publish(snapshot_id: str | None, project: Path | None = None, *, yes: bool = False, owner_agent: str | None = None) -> dict[str, Any]:
564
+ if not snapshot_id:
565
+ raise DevKitError("knowledge publish requires a snapshot id")
566
+ root = require_knowledge_root(project)
567
+ pending = snapshot_path(root, "pending", snapshot_id)
568
+ owner_required = owner_agent_required(root)
569
+ if yes and owner_agent != owner_required:
570
+ return {
571
+ "kind": "knowledge-publish",
572
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
573
+ "status": "blocked",
574
+ "snapshot_id": snapshot_id,
575
+ "reason": "owner_agent_required",
576
+ "owner_agent_required": owner_required,
577
+ "owner_agent": owner_agent,
578
+ "exit_code": 2,
579
+ }
580
+ review = knowledge_review(snapshot_id, project, persist=yes)
581
+ if review["status"] != "approved":
582
+ return {
583
+ "kind": "knowledge-publish",
584
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
585
+ "status": "blocked",
586
+ "snapshot_id": snapshot_id,
587
+ "review": review,
588
+ "reason": "snapshot_review_failed",
589
+ }
590
+ if not yes:
591
+ return {
592
+ "kind": "knowledge-publish",
593
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
594
+ "status": "planned",
595
+ "snapshot_id": snapshot_id,
596
+ "review": review,
597
+ "owner_agent_required": owner_required,
598
+ "next_steps": ["Re-run with `--yes --owner-agent knowledge-owner` to publish this approved snapshot locally."],
599
+ }
600
+ accepted = root / "snapshots" / "accepted" / pending.name
601
+ accepted.parent.mkdir(parents=True, exist_ok=True)
602
+ content = pending.read_text(encoding="utf-8", errors="replace")
603
+ shutil.move(str(pending), accepted)
604
+ pending_metadata = snapshot_metadata_path(root, "pending", snapshot_id)
605
+ if pending_metadata.exists():
606
+ accepted_metadata = snapshot_metadata_path(root, "accepted", snapshot_id)
607
+ accepted_metadata.parent.mkdir(parents=True, exist_ok=True)
608
+ shutil.move(str(pending_metadata), accepted_metadata)
609
+ rebuild_lexical_index(root)
610
+ write_knowledge_audit(
611
+ root,
612
+ event="publish",
613
+ snapshot_id=snapshot_id,
614
+ decision="published",
615
+ actor=owner_required,
616
+ content=content,
617
+ findings=(review.get("review") or {}).get("findings") or [],
618
+ metadata={
619
+ "accepted_path": str(accepted.relative_to(root)),
620
+ "review_status": review.get("status"),
621
+ },
622
+ )
623
+ return {
624
+ "kind": "knowledge-publish",
625
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
626
+ "status": "published",
627
+ "snapshot_id": snapshot_id,
628
+ "path": str(accepted),
629
+ "review": review,
630
+ }
631
+
632
+
633
+ def knowledge_sync(project: Path | None = None) -> dict[str, Any]:
634
+ root = require_knowledge_root(project)
635
+ manifest = read_manifest(root)
636
+ storage = manifest.get("storage") if isinstance(manifest.get("storage"), dict) else {}
637
+ provider = storage.get("provider") or "knowledge-local"
638
+ if provider == "knowledge-local":
639
+ return {
640
+ "kind": "knowledge-sync",
641
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
642
+ "status": "local-only",
643
+ "executed": False,
644
+ "provider": provider,
645
+ "message": "Local file-first knowledge base does not require remote sync.",
646
+ }
647
+ return {
648
+ "kind": "knowledge-sync",
649
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
650
+ "status": "planned",
651
+ "executed": False,
652
+ "provider": provider,
653
+ "remote_connected": False,
654
+ "next_steps": ["Configure provider credentials by reference and opt in before remote sync."],
655
+ }
656
+
657
+
658
+ def owner_agent_required(root: Path) -> str:
659
+ manifest = read_manifest(root)
660
+ return str(manifest.get("owner_agent") or "knowledge-owner")
661
+
662
+
663
+ def knowledge_root(project: Path | None = None) -> Path:
664
+ return (project or Path.cwd()).resolve() / DEFAULT_KB_DIR
665
+
666
+
667
+ def require_knowledge_root(project: Path | None = None) -> Path:
668
+ root = knowledge_root(project)
669
+ if not (root / "kb.yaml").exists():
670
+ raise DevKitError("knowledge base not initialized. Run `agent knowledge init` first.")
671
+ return root
672
+
673
+
674
+ def create_structure(root: Path) -> None:
675
+ for relative in [
676
+ "policies",
677
+ "entries",
678
+ "snapshots/pending",
679
+ "snapshots/accepted",
680
+ "snapshots/rejected",
681
+ "reviews/pending",
682
+ "reviews/approved",
683
+ "reviews/rejected",
684
+ "indexes",
685
+ "audit",
686
+ "manifests",
687
+ ]:
688
+ (root / relative).mkdir(parents=True, exist_ok=True)
689
+ for entry_dir in ENTRY_DIRS:
690
+ (root / "entries" / entry_dir).mkdir(parents=True, exist_ok=True)
691
+ initialize_derived_indexes(root)
692
+
693
+
694
+ def default_manifest() -> dict[str, Any]:
695
+ now = datetime.now(timezone.utc).isoformat()
696
+ return {
697
+ "schema_version": KNOWLEDGE_BASE_SCHEMA_VERSION,
698
+ "kb_id": new_kb_id(),
699
+ "name": "Local Knowledge Base",
700
+ "description": "File-first shared knowledge base",
701
+ "owner_agent": "knowledge-owner",
702
+ "storage": {"provider": "knowledge-local", "location": DEFAULT_KB_DIR},
703
+ "indexes": {"lexical": {"enabled": True}, "semantic": {"enabled": False, "provider": None}},
704
+ "cache": {"local_ttl_minutes": 1440, "remote_ttl_minutes": 240},
705
+ "policies": {
706
+ "contribution": "policies/contribution-policy.md",
707
+ "review": "policies/review-policy.md",
708
+ "security": "policies/security-policy.md",
709
+ },
710
+ "permissions": token_permissions(),
711
+ "created_at": now,
712
+ "updated_at": now,
713
+ }
714
+
715
+
716
+ def new_kb_id() -> str:
717
+ return f"kb_{secrets.token_hex(10)}"
718
+
719
+
720
+ def token_permissions() -> dict[str, str]:
721
+ return {scope: f"secret-ref:knowledge-base/{scope}" for scope in TOKEN_SCOPES}
722
+
723
+
724
+ def tokens_path(root: Path) -> Path:
725
+ return root / "manifests" / "tokens.json"
726
+
727
+
728
+ def ensure_token_refs(root: Path) -> dict[str, Any]:
729
+ path = tokens_path(root)
730
+ if path.exists():
731
+ try:
732
+ data = json.loads(path.read_text(encoding="utf-8"))
733
+ except json.JSONDecodeError:
734
+ data = {}
735
+ else:
736
+ data = {}
737
+ tokens = data.get("tokens") if isinstance(data.get("tokens"), dict) else {}
738
+ changed = False
739
+ for scope in TOKEN_SCOPES:
740
+ if not isinstance(tokens.get(scope), dict):
741
+ tokens[scope] = token_ref(scope)
742
+ changed = True
743
+ payload = {
744
+ "schema_version": KNOWLEDGE_BASE_SCHEMA_VERSION,
745
+ "stored_values": False,
746
+ "tokens": tokens,
747
+ "updated_at": data.get("updated_at") or now_iso(),
748
+ }
749
+ if changed or not path.exists():
750
+ write_json(path, payload)
751
+ return payload
752
+
753
+
754
+ def token_ref(scope: str) -> dict[str, Any]:
755
+ return {
756
+ "scope": scope,
757
+ "ref": f"secret-ref:knowledge-base/{scope}",
758
+ "fingerprint": secrets.token_hex(8),
759
+ "value_stored": False,
760
+ "rotated_at": now_iso(),
761
+ }
762
+
763
+
764
+ def public_tokens(payload: dict[str, Any]) -> list[dict[str, Any]]:
765
+ tokens = payload.get("tokens") if isinstance(payload.get("tokens"), dict) else {}
766
+ return [public_token(scope, tokens.get(scope) if isinstance(tokens.get(scope), dict) else {}) for scope in TOKEN_SCOPES]
767
+
768
+
769
+ def public_token(scope: str, token: dict[str, Any]) -> dict[str, Any]:
770
+ return {
771
+ "scope": scope,
772
+ "ref": token.get("ref") or f"secret-ref:knowledge-base/{scope}",
773
+ "fingerprint": token.get("fingerprint"),
774
+ "value_stored": False,
775
+ "rotated_at": token.get("rotated_at"),
776
+ }
777
+
778
+
779
+ def read_manifest(root: Path) -> dict[str, Any]:
780
+ path = root / "kb.yaml"
781
+ if not path.exists():
782
+ raise DevKitError("knowledge base not initialized. Run `agent knowledge-base create` first.")
783
+ try:
784
+ import yaml # type: ignore
785
+
786
+ data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
787
+ except ImportError:
788
+ data = json.loads(path.read_text(encoding="utf-8"))
789
+ return data if isinstance(data, dict) else {}
790
+
791
+
792
+ def public_manifest(manifest: dict[str, Any]) -> dict[str, Any]:
793
+ return {
794
+ "kb_id": manifest.get("kb_id"),
795
+ "name": manifest.get("name"),
796
+ "description": manifest.get("description"),
797
+ "owner_agent": manifest.get("owner_agent"),
798
+ "storage": manifest.get("storage") or {},
799
+ "indexes": manifest.get("indexes") or {},
800
+ "cache": manifest.get("cache") or {},
801
+ "permissions": manifest.get("permissions") or {},
802
+ "created_at": manifest.get("created_at"),
803
+ "updated_at": manifest.get("updated_at"),
804
+ }
805
+
806
+
807
+ def require_scope(scope: str | None) -> str:
808
+ if not scope:
809
+ raise DevKitError("knowledge-base rotate-token requires a scope")
810
+ value = scope.strip().lower()
811
+ aliases = {"approve": "owner", "owner_key": "owner", "contribution": "contribute"}
812
+ value = aliases.get(value, value)
813
+ if value not in TOKEN_SCOPES:
814
+ raise DevKitError(f"unsupported knowledge-base token scope: {scope}")
815
+ return value
816
+
817
+
818
+ def normalize_knowledge_provider(provider: str | None) -> str:
819
+ raw = (provider or "local").strip().lower()
820
+ provider_id = KNOWLEDGE_PROVIDER_ALIASES.get(raw, raw)
821
+ if provider_id not in KNOWLEDGE_PROVIDERS:
822
+ supported = ", ".join(sorted(KNOWLEDGE_PROVIDER_ALIASES))
823
+ raise DevKitError(f"unsupported knowledge provider: {provider}. Supported: {supported}")
824
+ return provider_id
825
+
826
+
827
+ def rebuild_lexical_index(root: Path) -> Path:
828
+ items = []
829
+ for base in (root / "entries", root / "snapshots" / "accepted"):
830
+ if not base.exists():
831
+ continue
832
+ for path in sorted(base.rglob("*")):
833
+ if path.is_file() and path.suffix.lower() in {".md", ".json", ".yaml", ".yml"}:
834
+ text = path.read_text(encoding="utf-8", errors="replace")
835
+ items.append({"path": str(path.relative_to(root)), "title": title_for(path, text), "tokens": sorted(tokenize(text))})
836
+ payload = {"schema_version": KNOWLEDGE_SCHEMA_VERSION, "items": items, "rebuilt_at": datetime.now(timezone.utc).isoformat()}
837
+ path = root / "indexes" / "lexical.json"
838
+ path.parent.mkdir(parents=True, exist_ok=True)
839
+ path.write_text(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
840
+ initialize_derived_indexes(root)
841
+ return path
842
+
843
+
844
+ def initialize_derived_indexes(root: Path) -> None:
845
+ index_root = root / "indexes"
846
+ index_root.mkdir(parents=True, exist_ok=True)
847
+ semantic_path = index_root / "semantic.json"
848
+ if not semantic_path.exists():
849
+ payload = {
850
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
851
+ "enabled": False,
852
+ "provider": None,
853
+ "derived": True,
854
+ "items": [],
855
+ "rebuilt_at": None,
856
+ }
857
+ semantic_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
858
+ chunks_path = index_root / "chunks.jsonl"
859
+ if not chunks_path.exists():
860
+ chunks_path.write_text("", encoding="utf-8")
861
+
862
+
863
+ def snapshot_content(content: str | None, from_file: str | None) -> str:
864
+ if from_file:
865
+ return Path(from_file).expanduser().resolve().read_text(encoding="utf-8", errors="replace")
866
+ if content:
867
+ return content
868
+ raise DevKitError("knowledge snapshot create requires --content or --from-file")
869
+
870
+
871
+ def sanitize_snapshot_content(text: str) -> str:
872
+ sanitized = redact_secrets(text)
873
+ sanitized = SECRET_PATTERN.sub(lambda match: redact_secret_assignment(match.group(0)), sanitized)
874
+ return PII_PATTERN.sub("[REDACTED_PII]", sanitized)
875
+
876
+
877
+ def redact_secret_assignment(value: str) -> str:
878
+ separator = "=" if "=" in value else ":"
879
+ prefix = value.split(separator, 1)[0].rstrip()
880
+ return f"{prefix}{separator}[REDACTED_SECRET]"
881
+
882
+
883
+ def snapshot_path(root: Path, bucket: str, snapshot_id: str) -> Path:
884
+ path = root / "snapshots" / bucket / f"{slugify(snapshot_id)}.md"
885
+ if not path.exists():
886
+ raise DevKitError(f"knowledge snapshot not found: {snapshot_id}")
887
+ return path
888
+
889
+
890
+ def snapshot_metadata_path(root: Path, bucket: str, snapshot_id: str) -> Path:
891
+ return root / "snapshots" / bucket / f"{slugify(snapshot_id)}.json"
892
+
893
+
894
+ def read_snapshot_metadata(root: Path, bucket: str, snapshot_id: str) -> dict[str, Any]:
895
+ path = snapshot_metadata_path(root, bucket, snapshot_id)
896
+ if not path.exists():
897
+ return {}
898
+ try:
899
+ payload = json.loads(path.read_text(encoding="utf-8"))
900
+ except json.JSONDecodeError:
901
+ return {}
902
+ return payload if isinstance(payload, dict) else {}
903
+
904
+
905
+ def find_snapshot(root: Path, snapshot_id: str | None) -> tuple[Path, str]:
906
+ item_id = require_snapshot_id(snapshot_id)
907
+ for bucket in ("pending", "accepted", "rejected"):
908
+ path = root / "snapshots" / bucket / f"{slugify(item_id)}.md"
909
+ if path.exists():
910
+ return path, bucket
911
+ raise DevKitError(f"knowledge snapshot not found: {snapshot_id}")
912
+
913
+
914
+ def require_snapshot_id(snapshot_id: str | None) -> str:
915
+ if not snapshot_id:
916
+ raise DevKitError("knowledge snapshot id is required")
917
+ return snapshot_id
918
+
919
+
920
+ def snapshot_item(root: Path, path: Path, bucket: str) -> dict[str, Any]:
921
+ text = path.read_text(encoding="utf-8", errors="replace")
922
+ return {
923
+ "snapshot_id": path.stem,
924
+ "title": title_for(path, text),
925
+ "bucket": bucket,
926
+ "path": str(path.relative_to(root)),
927
+ "bytes": path.stat().st_size,
928
+ }
929
+
930
+
931
+ def render_snapshot(frontmatter: dict[str, Any], content: str) -> str:
932
+ return "---\n" + "\n".join(f"{key}: {value}" for key, value in frontmatter.items()) + "\n---\n\n" + content.strip() + "\n"
933
+
934
+
935
+ def scan_tree(root: Path) -> list[dict[str, Any]]:
936
+ findings: list[dict[str, Any]] = []
937
+ for path in sorted(root.rglob("*")):
938
+ if path.is_file() and path.suffix.lower() in {".md", ".json", ".yaml", ".yml", ".txt"}:
939
+ for finding in scan_text(path.read_text(encoding="utf-8", errors="replace")):
940
+ findings.append({"path": str(path.relative_to(root)), **finding})
941
+ return findings
942
+
943
+
944
+ def scan_text(text: str) -> list[dict[str, Any]]:
945
+ findings = []
946
+ if SECRET_PATTERN.search(text):
947
+ findings.append({"reason": "secret-like-material"})
948
+ if PII_PATTERN.search(text):
949
+ findings.append({"reason": "pii-like-material"})
950
+ block = external_content_block("knowledge-scan", "text", text)
951
+ if block["severity"] != "none":
952
+ findings.append({"reason": "prompt-injection", "severity": block["severity"], "markers": block["detected_injection_markers"]})
953
+ return findings
954
+
955
+
956
+ def knowledge_policy_findings(text: str) -> list[dict[str, Any]]:
957
+ body = snapshot_body(text)
958
+ tokens = tokenize(body)
959
+ if not tokens:
960
+ return [{"reason": "low-recurring-utility", "detail": "empty-content"}]
961
+ has_reusable_signal = bool(tokens & REUSABLE_KNOWLEDGE_TOKENS)
962
+ findings: list[dict[str, Any]] = []
963
+ if PERSONAL_MEMORY_PATTERN.search(body):
964
+ findings.append({"reason": "personal-memory-content"})
965
+ if len(tokens) < 6 and not has_reusable_signal:
966
+ findings.append({"reason": "low-recurring-utility", "token_count": len(tokens)})
967
+ if len(tokens) <= 14 and not has_reusable_signal and bool(tokens & CONVERSATIONAL_TOKENS):
968
+ findings.append({"reason": "purely-conversational-content", "token_count": len(tokens)})
969
+ return findings
970
+
971
+
972
+ def duplicate_snapshot_findings(root: Path, snapshot_id: str, text: str) -> list[dict[str, Any]]:
973
+ current_fingerprint = content_fingerprint(snapshot_body(text))
974
+ if not current_fingerprint:
975
+ return []
976
+ matches = []
977
+ for bucket in ("accepted", "pending"):
978
+ folder = root / "snapshots" / bucket
979
+ for path in sorted(folder.glob("*.md")):
980
+ if path.stem == slugify(snapshot_id):
981
+ continue
982
+ candidate_text = path.read_text(encoding="utf-8", errors="replace")
983
+ if content_fingerprint(snapshot_body(candidate_text)) == current_fingerprint:
984
+ matches.append(str(path.relative_to(root)))
985
+ if not matches:
986
+ return []
987
+ return [{"reason": "duplicate-content", "matches": matches}]
988
+
989
+
990
+ def snapshot_body(text: str) -> str:
991
+ stripped = text.lstrip()
992
+ if not stripped.startswith("---"):
993
+ return text
994
+ parts = stripped.split("---", 2)
995
+ if len(parts) >= 3:
996
+ return parts[2]
997
+ return text
998
+
999
+
1000
+ def content_fingerprint(text: str) -> str:
1001
+ return " ".join(sorted(tokenize(text)))
1002
+
1003
+
1004
+ def tokenize(text: str) -> set[str]:
1005
+ return {token for token in re.findall(r"[a-zA-Z0-9_À-ÿ-]{2,}", text.lower()) if token}
1006
+
1007
+
1008
+ def title_for(path: Path, text: str) -> str:
1009
+ for line in text.splitlines():
1010
+ stripped = line.strip()
1011
+ if stripped.startswith("#"):
1012
+ return stripped.lstrip("#").strip() or path.stem
1013
+ return path.stem
1014
+
1015
+
1016
+ def slugify(value: str) -> str:
1017
+ slug = re.sub(r"[^a-zA-Z0-9]+", "-", value.strip().lower()).strip("-")
1018
+ return slug or "snapshot"
1019
+
1020
+
1021
+ def write_yaml(path: Path, payload: dict[str, Any]) -> None:
1022
+ try:
1023
+ import yaml # type: ignore
1024
+
1025
+ text = yaml.safe_dump(payload, allow_unicode=True, sort_keys=False)
1026
+ except ImportError:
1027
+ text = json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
1028
+ path.write_text(text, encoding="utf-8")
1029
+
1030
+
1031
+ def write_json(path: Path, payload: dict[str, Any]) -> None:
1032
+ path.parent.mkdir(parents=True, exist_ok=True)
1033
+ path.write_text(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
1034
+
1035
+
1036
+ def write_knowledge_audit(
1037
+ root: Path,
1038
+ *,
1039
+ event: str,
1040
+ snapshot_id: str,
1041
+ decision: str,
1042
+ actor: str,
1043
+ content: str,
1044
+ findings: list[dict[str, Any]],
1045
+ metadata: dict[str, Any] | None = None,
1046
+ ) -> Path:
1047
+ created_at = now_iso()
1048
+ safe_event = slugify(event)
1049
+ safe_snapshot = slugify(snapshot_id)
1050
+ path = root / "audit" / f"{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S%f')}-{safe_event}-{safe_snapshot}.json"
1051
+ payload = {
1052
+ "schema_version": KNOWLEDGE_SCHEMA_VERSION,
1053
+ "event": event,
1054
+ "snapshot_id": snapshot_id,
1055
+ "decision": decision,
1056
+ "actor": actor,
1057
+ "content_sha256": hashlib.sha256(content.encode("utf-8")).hexdigest(),
1058
+ "findings": findings,
1059
+ "metadata": metadata or {},
1060
+ "created_at": created_at,
1061
+ }
1062
+ write_json(path, payload)
1063
+ return path
1064
+
1065
+
1066
+ def now_iso() -> str:
1067
+ return datetime.now(timezone.utc).isoformat()