akm-cli 0.7.5 → 0.8.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/{.github/CHANGELOG.md → CHANGELOG.md} +113 -2
  2. package/README.md +20 -4
  3. package/SECURITY.md +93 -0
  4. package/dist/cli/config-migrate.js +144 -0
  5. package/dist/cli/config-validate.js +39 -0
  6. package/dist/cli/confirm.js +73 -0
  7. package/dist/cli/parse-args.js +133 -0
  8. package/dist/cli.js +1995 -551
  9. package/dist/commands/agent-dispatch.js +110 -0
  10. package/dist/commands/agent-support.js +68 -0
  11. package/dist/commands/completions.js +3 -0
  12. package/dist/commands/config-cli.js +130 -534
  13. package/dist/commands/consolidate.js +1531 -0
  14. package/dist/commands/curate.js +44 -3
  15. package/dist/commands/db-cli.js +23 -0
  16. package/dist/commands/distill-promotion-policy.js +660 -0
  17. package/dist/commands/distill.js +990 -75
  18. package/dist/commands/eval-cases.js +43 -0
  19. package/dist/commands/events.js +5 -23
  20. package/dist/commands/graph.js +477 -0
  21. package/dist/commands/health.js +400 -0
  22. package/dist/commands/help/help-accept.md +9 -0
  23. package/dist/commands/help/help-improve.md +77 -0
  24. package/dist/commands/help/help-proposals.md +15 -0
  25. package/dist/commands/help/help-propose.md +17 -0
  26. package/dist/commands/help/help-reject.md +8 -0
  27. package/dist/commands/history.js +54 -46
  28. package/dist/commands/improve-profiles.js +146 -0
  29. package/dist/commands/improve-result-file.js +103 -0
  30. package/dist/commands/improve.js +2175 -0
  31. package/dist/commands/info.js +5 -2
  32. package/dist/commands/init.js +50 -2
  33. package/dist/commands/installed-stashes.js +102 -139
  34. package/dist/commands/knowledge.js +136 -0
  35. package/dist/commands/lint/agent-linter.js +49 -0
  36. package/dist/commands/lint/base-linter.js +479 -0
  37. package/dist/commands/lint/command-linter.js +49 -0
  38. package/dist/commands/lint/default-linter.js +16 -0
  39. package/dist/commands/lint/index.js +183 -0
  40. package/dist/commands/lint/knowledge-linter.js +16 -0
  41. package/dist/commands/lint/markdown-insertion.js +343 -0
  42. package/dist/commands/lint/memory-linter.js +61 -0
  43. package/dist/commands/lint/registry.js +36 -0
  44. package/dist/commands/lint/skill-linter.js +45 -0
  45. package/dist/commands/lint/task-linter.js +50 -0
  46. package/dist/commands/lint/types.js +4 -0
  47. package/dist/commands/lint/vault-key-rules.js +139 -0
  48. package/dist/commands/lint/workflow-linter.js +56 -0
  49. package/dist/commands/lint.js +4 -0
  50. package/dist/commands/migration-help.js +5 -2
  51. package/dist/commands/proposal.js +66 -12
  52. package/dist/commands/propose.js +86 -31
  53. package/dist/commands/reflect.js +1119 -73
  54. package/dist/commands/registry-search.js +5 -2
  55. package/dist/commands/remember.js +69 -6
  56. package/dist/commands/schema-repair.js +203 -0
  57. package/dist/commands/search.js +115 -14
  58. package/dist/commands/self-update.js +3 -0
  59. package/dist/commands/show.js +144 -25
  60. package/dist/commands/source-add.js +17 -45
  61. package/dist/commands/source-clone.js +3 -0
  62. package/dist/commands/source-manage.js +14 -19
  63. package/dist/commands/tasks.js +438 -0
  64. package/dist/commands/url-checker.js +42 -0
  65. package/dist/commands/vault.js +130 -77
  66. package/dist/core/action-contributors.js +28 -0
  67. package/dist/core/asset-ref.js +7 -0
  68. package/dist/core/asset-registry.js +7 -16
  69. package/dist/core/asset-serialize.js +88 -0
  70. package/dist/core/asset-spec.js +22 -0
  71. package/dist/core/common.js +157 -0
  72. package/dist/core/concurrent.js +25 -0
  73. package/dist/core/config-io.js +347 -0
  74. package/dist/core/config-migration.js +625 -0
  75. package/dist/core/config-schema.js +501 -0
  76. package/dist/core/config-sources.js +108 -0
  77. package/dist/core/config-types.js +4 -0
  78. package/dist/core/config-walker.js +337 -0
  79. package/dist/core/config.js +327 -987
  80. package/dist/core/errors.js +40 -19
  81. package/dist/core/events.js +91 -138
  82. package/dist/core/file-lock.js +104 -0
  83. package/dist/core/frontmatter.js +3 -6
  84. package/dist/core/lesson-lint.js +3 -0
  85. package/dist/core/markdown.js +20 -0
  86. package/dist/core/memory-belief.js +62 -0
  87. package/dist/core/memory-contradiction-detect.js +274 -0
  88. package/dist/core/memory-improve.js +806 -0
  89. package/dist/core/parse.js +158 -0
  90. package/dist/core/paths.js +326 -14
  91. package/dist/core/proposal-quality-validators.js +364 -0
  92. package/dist/core/proposal-validators.js +69 -0
  93. package/dist/core/proposals.js +498 -42
  94. package/dist/core/state-db.js +927 -0
  95. package/dist/core/text-truncation.js +107 -0
  96. package/dist/core/time.js +54 -0
  97. package/dist/core/warn.js +62 -1
  98. package/dist/core/write-source.js +3 -0
  99. package/dist/indexer/db-backup.js +391 -0
  100. package/dist/indexer/db-search.js +152 -253
  101. package/dist/indexer/db.js +933 -103
  102. package/dist/indexer/ensure-index.js +64 -0
  103. package/dist/indexer/file-context.js +3 -0
  104. package/dist/indexer/graph-boost.js +376 -101
  105. package/dist/indexer/graph-db.js +391 -0
  106. package/dist/indexer/graph-dedup.js +95 -0
  107. package/dist/indexer/graph-extraction.js +550 -124
  108. package/dist/indexer/index-context.js +4 -0
  109. package/dist/indexer/indexer.js +506 -291
  110. package/dist/indexer/llm-cache.js +47 -0
  111. package/dist/indexer/manifest.js +3 -0
  112. package/dist/indexer/matchers.js +148 -160
  113. package/dist/indexer/memory-inference.js +99 -74
  114. package/dist/indexer/metadata-contributors.js +29 -0
  115. package/dist/indexer/metadata.js +255 -196
  116. package/dist/indexer/path-resolver.js +92 -0
  117. package/dist/indexer/project-context.js +192 -0
  118. package/dist/indexer/ranking-contributors.js +331 -0
  119. package/dist/indexer/ranking.js +81 -0
  120. package/dist/indexer/search-fields.js +5 -9
  121. package/dist/indexer/search-hit-enrichers.js +111 -0
  122. package/dist/indexer/search-source.js +44 -10
  123. package/dist/indexer/semantic-status.js +5 -16
  124. package/dist/indexer/staleness-detect.js +447 -0
  125. package/dist/indexer/usage-events.js +12 -9
  126. package/dist/indexer/walker.js +28 -0
  127. package/dist/integrations/agent/builders.js +135 -0
  128. package/dist/integrations/agent/config.js +122 -230
  129. package/dist/integrations/agent/detect.js +3 -0
  130. package/dist/integrations/agent/index.js +7 -13
  131. package/dist/integrations/agent/model-aliases.js +55 -0
  132. package/dist/integrations/agent/profiles.js +70 -5
  133. package/dist/integrations/agent/prompts.js +150 -74
  134. package/dist/integrations/agent/runner.js +151 -0
  135. package/dist/integrations/agent/sdk-runner.js +126 -0
  136. package/dist/integrations/agent/spawn.js +118 -23
  137. package/dist/integrations/github.js +3 -0
  138. package/dist/integrations/lockfile.js +32 -69
  139. package/dist/integrations/session-logs/index.js +68 -0
  140. package/dist/integrations/session-logs/providers/claude-code.js +59 -0
  141. package/dist/integrations/session-logs/providers/opencode.js +55 -0
  142. package/dist/integrations/session-logs/types.js +4 -0
  143. package/dist/llm/call-ai.js +62 -0
  144. package/dist/llm/client.js +72 -124
  145. package/dist/llm/embedder.js +3 -19
  146. package/dist/llm/embedders/cache.js +3 -7
  147. package/dist/llm/embedders/local.js +3 -0
  148. package/dist/llm/embedders/remote.js +20 -8
  149. package/dist/llm/embedders/types.js +3 -7
  150. package/dist/llm/feature-gate.js +89 -48
  151. package/dist/llm/graph-extract.js +676 -70
  152. package/dist/llm/index-passes.js +9 -23
  153. package/dist/llm/memory-infer.js +52 -71
  154. package/dist/llm/metadata-enhance.js +42 -29
  155. package/dist/llm/prompts/graph-extract-user-prompt.md +35 -0
  156. package/dist/output/cli-hints-full.md +281 -0
  157. package/dist/output/cli-hints-short.md +65 -0
  158. package/dist/output/cli-hints.js +5 -318
  159. package/dist/output/context.js +3 -0
  160. package/dist/output/renderers.js +223 -256
  161. package/dist/output/shapes.js +150 -105
  162. package/dist/output/text.js +318 -30
  163. package/dist/registry/build-index.js +3 -0
  164. package/dist/registry/create-provider-registry.js +3 -0
  165. package/dist/registry/factory.js +3 -0
  166. package/dist/registry/origin-resolve.js +3 -0
  167. package/dist/registry/providers/index.js +3 -0
  168. package/dist/registry/providers/skills-sh.js +70 -49
  169. package/dist/registry/providers/static-index.js +53 -48
  170. package/dist/registry/providers/types.js +3 -24
  171. package/dist/registry/resolve.js +11 -16
  172. package/dist/registry/types.js +3 -0
  173. package/dist/scripts/migrate-storage.js +17307 -0
  174. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +8900 -0
  175. package/dist/scripts/migrations/v16-to-v17.js +141 -0
  176. package/dist/setup/detect.js +3 -0
  177. package/dist/setup/ripgrep-install.js +3 -0
  178. package/dist/setup/ripgrep-resolve.js +3 -0
  179. package/dist/setup/setup.js +775 -37
  180. package/dist/setup/steps.js +3 -15
  181. package/dist/sources/include.js +3 -0
  182. package/dist/sources/provider-factory.js +5 -12
  183. package/dist/sources/provider.js +3 -20
  184. package/dist/sources/providers/filesystem.js +19 -23
  185. package/dist/sources/providers/git.js +7 -5
  186. package/dist/sources/providers/index.js +3 -0
  187. package/dist/sources/providers/install-types.js +3 -13
  188. package/dist/sources/providers/npm.js +3 -4
  189. package/dist/sources/providers/provider-utils.js +3 -0
  190. package/dist/sources/providers/sync-from-ref.js +3 -11
  191. package/dist/sources/providers/tar-utils.js +3 -0
  192. package/dist/sources/providers/website.js +18 -22
  193. package/dist/sources/resolve.js +3 -0
  194. package/dist/sources/types.js +3 -0
  195. package/dist/sources/website-ingest.js +7 -0
  196. package/dist/tasks/backends/cron.js +203 -0
  197. package/dist/tasks/backends/exec-utils.js +28 -0
  198. package/dist/tasks/backends/index.js +24 -0
  199. package/dist/tasks/backends/launchd-template.xml +19 -0
  200. package/dist/tasks/backends/launchd.js +187 -0
  201. package/dist/tasks/backends/schtasks-template.xml +29 -0
  202. package/dist/tasks/backends/schtasks.js +215 -0
  203. package/dist/tasks/parser.js +211 -0
  204. package/dist/tasks/resolveAkmBin.js +87 -0
  205. package/dist/tasks/runner.js +458 -0
  206. package/dist/tasks/schedule.js +211 -0
  207. package/dist/tasks/schema.js +15 -0
  208. package/dist/tasks/validator.js +62 -0
  209. package/dist/version.js +3 -0
  210. package/dist/wiki/index-template.md +12 -0
  211. package/dist/wiki/ingest-workflow-template.md +54 -0
  212. package/dist/wiki/log-template.md +8 -0
  213. package/dist/wiki/schema-template.md +61 -0
  214. package/dist/wiki/wiki-templates.js +15 -0
  215. package/dist/wiki/wiki.js +13 -61
  216. package/dist/workflows/authoring.js +8 -25
  217. package/dist/workflows/cli.js +3 -0
  218. package/dist/workflows/db.js +140 -10
  219. package/dist/workflows/document-cache.js +3 -10
  220. package/dist/workflows/parser.js +3 -0
  221. package/dist/workflows/renderer.js +11 -3
  222. package/dist/workflows/runs.js +62 -91
  223. package/dist/workflows/schema.js +3 -0
  224. package/dist/workflows/scope-key.js +3 -0
  225. package/dist/workflows/validator.js +4 -8
  226. package/dist/workflows/workflow-template.md +24 -0
  227. package/docs/README.md +9 -2
  228. package/docs/data-and-telemetry.md +225 -0
  229. package/docs/migration/release-notes/0.7.0.md +1 -1
  230. package/docs/migration/release-notes/0.7.5.md +2 -2
  231. package/docs/migration/release-notes/0.8.0.md +48 -0
  232. package/docs/migration/v0.7-to-v0.8.md +1307 -0
  233. package/package.json +20 -8
  234. package/.github/LICENSE +0 -374
  235. package/dist/commands/install-audit.js +0 -381
  236. package/dist/templates/wiki-templates.js +0 -100
@@ -0,0 +1,391 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import fs from "node:fs";
5
+ import { rethrowIfTestIsolationError } from "../core/errors";
6
+ import { getDbPath } from "../core/paths";
7
+ import { warn } from "../core/warn";
8
+ import { closeDatabase, openExistingDatabase } from "./db";
9
+ function withReadableGraphDb(db, fn) {
10
+ if (db)
11
+ return fn(db);
12
+ const dbPath = getDbPath();
13
+ if (!fs.existsSync(dbPath))
14
+ throw new Error("GRAPH_DB_MISSING");
15
+ const opened = openExistingDatabase(dbPath);
16
+ try {
17
+ return fn(opened);
18
+ }
19
+ finally {
20
+ closeDatabase(opened);
21
+ }
22
+ }
23
+ function uniqueSorted(values) {
24
+ return [...new Set(values)].sort((a, b) => a.localeCompare(b));
25
+ }
26
+ function normalizeEntity(value) {
27
+ return value.trim().toLowerCase();
28
+ }
29
+ /**
30
+ * Resolve a file_path within a stash to its entries.id. Returns null when the
31
+ * path has no indexed entry (orphan graph row).
32
+ */
33
+ export function resolveEntryIdForPath(db, stashRoot, filePath) {
34
+ try {
35
+ const row = db
36
+ .prepare("SELECT id FROM entries WHERE stash_dir = ? AND file_path = ? LIMIT 1")
37
+ .get(stashRoot, filePath);
38
+ if (row)
39
+ return row.id;
40
+ // Fall back to file_path-only match (legacy callers may pass a stash root
41
+ // that doesn't exactly match entries.stash_dir, e.g. trailing-slash diffs).
42
+ const fallback = db.prepare("SELECT id FROM entries WHERE file_path = ? LIMIT 1").get(filePath);
43
+ return fallback?.id ?? null;
44
+ }
45
+ catch {
46
+ return null;
47
+ }
48
+ }
49
+ /**
50
+ * Persist (or update) a graph snapshot for a stash root.
51
+ *
52
+ * Implementation: incremental upsert keyed on entries.id. Unchanged files
53
+ * (matching body_hash) are skipped; changed files have their child rows
54
+ * deleted (CASCADE) and re-inserted; files in DB but absent from the new
55
+ * snapshot are deleted. The old behaviour wiped every row for the stash on
56
+ * each write, which produced ~22k row writes per re-index even when one
57
+ * asset changed.
58
+ *
59
+ * Orphan files (no entries row resolvable) are skipped and counted in a
60
+ * single warn() so the caller sees the magnitude without log spam.
61
+ */
62
+ export function replaceStoredGraph(db, graph) {
63
+ const upsertMeta = db.prepare(`INSERT INTO graph_meta (
64
+ stash_root,
65
+ schema_version,
66
+ generated_at,
67
+ considered_files,
68
+ extracted_files,
69
+ entity_count,
70
+ relation_count,
71
+ extraction_coverage,
72
+ density,
73
+ extractor_id,
74
+ extraction_run_id,
75
+ model,
76
+ prompt_version,
77
+ batch_size,
78
+ cache_hits,
79
+ cache_misses,
80
+ truncation_count,
81
+ failure_count
82
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
83
+ ON CONFLICT(stash_root) DO UPDATE SET
84
+ schema_version = excluded.schema_version,
85
+ generated_at = excluded.generated_at,
86
+ considered_files = excluded.considered_files,
87
+ extracted_files = excluded.extracted_files,
88
+ entity_count = excluded.entity_count,
89
+ relation_count = excluded.relation_count,
90
+ extraction_coverage = excluded.extraction_coverage,
91
+ density = excluded.density,
92
+ extractor_id = excluded.extractor_id,
93
+ extraction_run_id = excluded.extraction_run_id,
94
+ model = excluded.model,
95
+ prompt_version = excluded.prompt_version,
96
+ batch_size = excluded.batch_size,
97
+ cache_hits = excluded.cache_hits,
98
+ cache_misses = excluded.cache_misses,
99
+ truncation_count = excluded.truncation_count,
100
+ failure_count = excluded.failure_count`);
101
+ const selectExisting = db.prepare("SELECT entry_id, file_path, body_hash FROM graph_files WHERE stash_root = ?");
102
+ const deleteFile = db.prepare("DELETE FROM graph_files WHERE entry_id = ?");
103
+ const deleteEntities = db.prepare("DELETE FROM graph_file_entities WHERE entry_id = ?");
104
+ const deleteRelations = db.prepare("DELETE FROM graph_file_relations WHERE entry_id = ?");
105
+ const insertFile = db.prepare(`INSERT INTO graph_files (
106
+ entry_id, stash_root, file_path, file_order, file_type, body_hash, confidence, status, reason, extraction_run_id
107
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
108
+ const updateFileMeta = db.prepare(`UPDATE graph_files
109
+ SET file_order = ?, file_type = ?, confidence = ?, status = ?, reason = ?, extraction_run_id = ?
110
+ WHERE entry_id = ?`);
111
+ const insertEntity = db.prepare(`INSERT INTO graph_file_entities (entry_id, entity_order, stash_root, entity_norm, entity)
112
+ VALUES (?, ?, ?, ?, ?)`);
113
+ const insertRelation = db.prepare(`INSERT INTO graph_file_relations (
114
+ entry_id, relation_order, from_entity_norm, from_entity, to_entity_norm, to_entity, relation_type, confidence
115
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
116
+ const quality = graph.quality;
117
+ const telemetry = graph.telemetry;
118
+ db.transaction(() => {
119
+ upsertMeta.run(graph.stashRoot, graph.schemaVersion, graph.generatedAt, quality?.consideredFiles ?? graph.files.length, quality?.extractedFiles ?? graph.files.length, quality?.entityCount ?? graph.entities?.length ?? 0, quality?.relationCount ?? graph.relations?.length ?? 0, quality?.extractionCoverage ?? 0, quality?.density ?? 0, telemetry?.extractorId ?? null, telemetry?.extractionRunId ?? null, telemetry?.model ?? null, telemetry?.promptVersion ?? null, telemetry?.batchSize ?? null, telemetry?.cacheHits ?? 0, telemetry?.cacheMisses ?? 0, telemetry?.truncationCount ?? 0, telemetry?.failureCount ?? 0);
120
+ // Build a snapshot of existing rows for incremental compare.
121
+ const existingRows = selectExisting.all(graph.stashRoot);
122
+ const existingByPath = new Map();
123
+ for (const row of existingRows)
124
+ existingByPath.set(row.file_path, row);
125
+ let orphanCount = 0;
126
+ const presentEntryIds = new Set();
127
+ for (const [fileOrder, node] of graph.files.entries()) {
128
+ // body_hash is NOT NULL in schema v2; default to a sentinel for inputs
129
+ // (test fixtures, legacy imports) that don't supply one. The sentinel
130
+ // never equals a real hash so subsequent staleness checks always
131
+ // re-extract — correct behaviour for "unknown" bodies.
132
+ const bodyHash = node.bodyHash && node.bodyHash.length > 0 ? node.bodyHash : "";
133
+ const entryId = resolveEntryIdForPath(db, graph.stashRoot, node.path);
134
+ if (entryId == null) {
135
+ orphanCount += 1;
136
+ continue;
137
+ }
138
+ presentEntryIds.add(entryId);
139
+ const existing = existingByPath.get(node.path);
140
+ if (existing && existing.entry_id === entryId && existing.body_hash === bodyHash) {
141
+ // Body unchanged — only fix up file_order/confidence in case they drifted.
142
+ updateFileMeta.run(fileOrder, node.type, node.confidence ?? null, node.status ?? (node.entities.length > 0 ? "extracted" : "empty"), node.reason ?? (node.entities.length > 0 ? "none" : "no_graph_content"), node.extractionRunId ?? telemetry?.extractionRunId ?? null, entryId);
143
+ continue;
144
+ }
145
+ if (existing) {
146
+ // Stale row (different body_hash, or entry_id moved to a different
147
+ // path under the same file_path). Wipe child rows; CASCADE would do
148
+ // it but explicit DELETE keeps the order deterministic.
149
+ deleteEntities.run(existing.entry_id);
150
+ deleteRelations.run(existing.entry_id);
151
+ deleteFile.run(existing.entry_id);
152
+ }
153
+ insertFile.run(entryId, graph.stashRoot, node.path, fileOrder, node.type, bodyHash, node.confidence ?? null, node.status ?? (node.entities.length > 0 ? "extracted" : "empty"), node.reason ?? (node.entities.length > 0 ? "none" : "no_graph_content"), node.extractionRunId ?? telemetry?.extractionRunId ?? null);
154
+ for (const [entityOrder, entity] of node.entities.entries()) {
155
+ insertEntity.run(entryId, entityOrder, graph.stashRoot, normalizeEntity(entity), entity);
156
+ }
157
+ for (const [relationOrder, relation] of node.relations.entries()) {
158
+ insertRelation.run(entryId, relationOrder, normalizeEntity(relation.from), relation.from, normalizeEntity(relation.to), relation.to, relation.type ?? null, relation.confidence ?? null);
159
+ }
160
+ }
161
+ // Delete files present in DB but absent from the new snapshot. Child
162
+ // tables CASCADE on entry_id.
163
+ for (const row of existingRows) {
164
+ if (!presentEntryIds.has(row.entry_id)) {
165
+ deleteEntities.run(row.entry_id);
166
+ deleteRelations.run(row.entry_id);
167
+ deleteFile.run(row.entry_id);
168
+ }
169
+ }
170
+ if (orphanCount > 0) {
171
+ warn(`[graph] replaceStoredGraph: skipped ${orphanCount} file(s) with no resolvable entry under ${graph.stashRoot}.`);
172
+ }
173
+ })();
174
+ }
175
+ export function deleteStoredGraph(db, stashPath) {
176
+ db.transaction(() => {
177
+ // Child rows cascade via entry_id; deleting graph_files clears them.
178
+ db.prepare("DELETE FROM graph_files WHERE stash_root = ?").run(stashPath);
179
+ db.prepare("DELETE FROM graph_meta WHERE stash_root = ?").run(stashPath);
180
+ })();
181
+ }
182
+ /**
183
+ * Scoped loader — only the graph_meta row for a stash. Used by callers that
184
+ * only need summary numbers (e.g. `akm graph summary`).
185
+ */
186
+ export function loadGraphMetaOnly(stashPath, db) {
187
+ return loadStoredGraphMeta(stashPath, db);
188
+ }
189
+ /**
190
+ * Scoped loader — graph_files rows without entities/relations. Used for
191
+ * orphan detection and entity overview commands.
192
+ */
193
+ export function loadGraphFilesOnly(stashPath, db) {
194
+ try {
195
+ return withReadableGraphDb(db, (readDb) => {
196
+ try {
197
+ const rows = readDb
198
+ .prepare(`SELECT entry_id, file_path, file_type, body_hash, confidence, status, reason
199
+ FROM graph_files
200
+ WHERE stash_root = ?
201
+ ORDER BY file_order`)
202
+ .all(stashPath);
203
+ return rows.map((row) => ({
204
+ entryId: row.entry_id,
205
+ path: row.file_path,
206
+ type: row.file_type,
207
+ bodyHash: row.body_hash,
208
+ ...(typeof row.confidence === "number" ? { confidence: row.confidence } : {}),
209
+ ...(row.status ? { status: row.status } : {}),
210
+ ...(row.reason ? { reason: row.reason } : {}),
211
+ }));
212
+ }
213
+ catch {
214
+ return [];
215
+ }
216
+ });
217
+ }
218
+ catch (err) {
219
+ // Never mask the bun-test isolation guard as "no stored graph files".
220
+ rethrowIfTestIsolationError(err);
221
+ return [];
222
+ }
223
+ }
224
+ /**
225
+ * Scoped loader — entities for a single entry_id. Used by per-asset lookups.
226
+ */
227
+ export function loadGraphEntitiesByEntry(db, entryId) {
228
+ try {
229
+ const rows = db
230
+ .prepare("SELECT entity FROM graph_file_entities WHERE entry_id = ? ORDER BY entity_order")
231
+ .all(entryId);
232
+ return rows.map((r) => r.entity);
233
+ }
234
+ catch {
235
+ return [];
236
+ }
237
+ }
238
+ export function loadStoredGraphMeta(stashPath, db) {
239
+ try {
240
+ return withReadableGraphDb(db, (readDb) => {
241
+ try {
242
+ const row = readDb
243
+ .prepare(`SELECT
244
+ stash_root,
245
+ schema_version,
246
+ generated_at,
247
+ considered_files,
248
+ extracted_files,
249
+ entity_count,
250
+ relation_count,
251
+ extraction_coverage,
252
+ density,
253
+ extractor_id,
254
+ extraction_run_id,
255
+ model,
256
+ prompt_version,
257
+ batch_size,
258
+ cache_hits,
259
+ cache_misses,
260
+ truncation_count,
261
+ failure_count
262
+ FROM graph_meta
263
+ WHERE stash_root = ?`)
264
+ .get(stashPath);
265
+ if (!row)
266
+ return null;
267
+ return {
268
+ stashPath: row.stash_root,
269
+ graphPath: getDbPath(),
270
+ schemaVersion: row.schema_version,
271
+ generatedAt: row.generated_at,
272
+ quality: {
273
+ consideredFiles: row.considered_files,
274
+ extractedFiles: row.extracted_files,
275
+ entityCount: row.entity_count,
276
+ relationCount: row.relation_count,
277
+ extractionCoverage: row.extraction_coverage,
278
+ density: row.density,
279
+ },
280
+ telemetry: {
281
+ ...(row.extractor_id ? { extractorId: row.extractor_id } : {}),
282
+ ...(row.extraction_run_id ? { extractionRunId: row.extraction_run_id } : {}),
283
+ ...(row.model ? { model: row.model } : {}),
284
+ ...(row.prompt_version ? { promptVersion: row.prompt_version } : {}),
285
+ ...(typeof row.batch_size === "number" ? { batchSize: row.batch_size } : {}),
286
+ cacheHits: row.cache_hits,
287
+ cacheMisses: row.cache_misses,
288
+ truncationCount: row.truncation_count,
289
+ failureCount: row.failure_count,
290
+ },
291
+ };
292
+ }
293
+ catch {
294
+ return null;
295
+ }
296
+ });
297
+ }
298
+ catch (err) {
299
+ // Never mask the bun-test isolation guard as "no stored graph meta".
300
+ rethrowIfTestIsolationError(err);
301
+ return null;
302
+ }
303
+ }
304
+ export function loadStoredGraphSnapshot(stashPath, db) {
305
+ try {
306
+ return withReadableGraphDb(db, (readDb) => {
307
+ const meta = loadStoredGraphMeta(stashPath, readDb);
308
+ if (!meta)
309
+ return null;
310
+ try {
311
+ const fileRows = readDb
312
+ .prepare(`SELECT entry_id, file_path, file_type, body_hash, confidence, status, reason, extraction_run_id
313
+ FROM graph_files
314
+ WHERE stash_root = ?
315
+ ORDER BY file_order`)
316
+ .all(stashPath);
317
+ const entityRows = readDb
318
+ .prepare(`SELECT gfe.entry_id AS entry_id, gf.file_path AS file_path, gfe.entity AS entity
319
+ FROM graph_file_entities gfe
320
+ JOIN graph_files gf ON gf.entry_id = gfe.entry_id
321
+ WHERE gf.stash_root = ?
322
+ ORDER BY gf.file_order, gfe.entity_order`)
323
+ .all(stashPath);
324
+ const relationRows = readDb
325
+ .prepare(`SELECT gfr.entry_id AS entry_id,
326
+ gf.file_path AS file_path,
327
+ gfr.from_entity AS from_entity,
328
+ gfr.to_entity AS to_entity,
329
+ gfr.relation_type AS relation_type,
330
+ gfr.confidence AS confidence
331
+ FROM graph_file_relations gfr
332
+ JOIN graph_files gf ON gf.entry_id = gfr.entry_id
333
+ WHERE gf.stash_root = ?
334
+ ORDER BY gf.file_order, gfr.relation_order`)
335
+ .all(stashPath);
336
+ const entitiesByPath = new Map();
337
+ for (const row of entityRows) {
338
+ const bucket = entitiesByPath.get(row.file_path);
339
+ if (bucket)
340
+ bucket.push(row.entity);
341
+ else
342
+ entitiesByPath.set(row.file_path, [row.entity]);
343
+ }
344
+ const relationsByPath = new Map();
345
+ for (const row of relationRows) {
346
+ const relation = {
347
+ from: row.from_entity,
348
+ to: row.to_entity,
349
+ ...(row.relation_type ? { type: row.relation_type } : {}),
350
+ ...(typeof row.confidence === "number" ? { confidence: row.confidence } : {}),
351
+ };
352
+ const bucket = relationsByPath.get(row.file_path);
353
+ if (bucket)
354
+ bucket.push(relation);
355
+ else
356
+ relationsByPath.set(row.file_path, [relation]);
357
+ }
358
+ const files = fileRows.map((row) => ({
359
+ path: row.file_path,
360
+ type: row.file_type,
361
+ ...(row.body_hash ? { bodyHash: row.body_hash } : {}),
362
+ entities: entitiesByPath.get(row.file_path) ?? [],
363
+ relations: relationsByPath.get(row.file_path) ?? [],
364
+ ...(typeof row.confidence === "number" ? { confidence: row.confidence } : {}),
365
+ ...(row.status ? { status: row.status } : {}),
366
+ ...(row.reason ? { reason: row.reason } : {}),
367
+ ...(row.extraction_run_id ? { extractionRunId: row.extraction_run_id } : {}),
368
+ }));
369
+ return {
370
+ stashPath: meta.stashPath,
371
+ graphPath: meta.graphPath,
372
+ schemaVersion: meta.schemaVersion,
373
+ generatedAt: meta.generatedAt,
374
+ ...(meta.quality ? { quality: meta.quality } : {}),
375
+ ...(meta.telemetry ? { telemetry: meta.telemetry } : {}),
376
+ files,
377
+ entities: uniqueSorted(files.flatMap((file) => file.entities)),
378
+ relations: files.flatMap((file) => file.relations),
379
+ };
380
+ }
381
+ catch {
382
+ return null;
383
+ }
384
+ });
385
+ }
386
+ catch (err) {
387
+ // Never mask the bun-test isolation guard as "no stored graph snapshot".
388
+ rethrowIfTestIsolationError(err);
389
+ return null;
390
+ }
391
+ }
@@ -0,0 +1,95 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ function normalizeRelationType(raw) {
5
+ const normalized = raw?.trim().toLowerCase().replace(/\s+/g, " ") ?? "";
6
+ if (!normalized)
7
+ return "";
8
+ if (normalized === "use" || normalized === "utilizes")
9
+ return "uses";
10
+ if (normalized === "depend on" || normalized === "depends")
11
+ return "depends on";
12
+ if (normalized === "integrates" || normalized === "integration with")
13
+ return "integrates with";
14
+ return normalized;
15
+ }
16
+ function normalizeConfidence(raw) {
17
+ if (typeof raw !== "number" || !Number.isFinite(raw))
18
+ return undefined;
19
+ return Math.max(0, Math.min(1, raw));
20
+ }
21
+ /**
22
+ * Merge and deduplicate entities and relations from multiple per-asset
23
+ * GraphExtraction results into one canonical graph.
24
+ *
25
+ * Entities are keyed on their lowercased, trimmed form. The first-seen
26
+ * casing is preserved as canonical. Relations are keyed on
27
+ * `(from, to, type)` (all lowercased). Dangling relations — those whose
28
+ * `from` or `to` is absent from the deduplicated entity set — are dropped.
29
+ */
30
+ export function deduplicateGraph(extractions, assetRefs) {
31
+ const entityCanonical = new Map();
32
+ const entitySources = new Map();
33
+ for (let i = 0; i < extractions.length; i++) {
34
+ const ref = assetRefs?.[i] ?? "unknown";
35
+ for (const raw of extractions[i].entities) {
36
+ const trimmed = raw.trim();
37
+ if (!trimmed)
38
+ continue;
39
+ const normalized = trimmed.toLowerCase();
40
+ if (!entityCanonical.has(normalized)) {
41
+ entityCanonical.set(normalized, trimmed);
42
+ entitySources.set(normalized, [ref]);
43
+ }
44
+ else {
45
+ const srcs = entitySources.get(normalized);
46
+ if (srcs && !srcs.includes(ref))
47
+ srcs.push(ref);
48
+ }
49
+ }
50
+ }
51
+ const entities = Array.from(entityCanonical.values());
52
+ const entityNormSet = new Set(entityCanonical.keys());
53
+ const relSeenKey = new Map();
54
+ const relationIndexByKey = new Map();
55
+ const relations = [];
56
+ for (let i = 0; i < extractions.length; i++) {
57
+ const ref = assetRefs?.[i] ?? "unknown";
58
+ for (const rel of extractions[i].relations) {
59
+ const fromNorm = rel.from.trim().toLowerCase();
60
+ const toNorm = rel.to.trim().toLowerCase();
61
+ const typeNorm = normalizeRelationType(rel.type);
62
+ if (!entityNormSet.has(fromNorm) || !entityNormSet.has(toNorm))
63
+ continue;
64
+ const key = `${fromNorm}\0${toNorm}\0${typeNorm}`;
65
+ if (!relSeenKey.has(key)) {
66
+ relSeenKey.set(key, [ref]);
67
+ const canonical = {
68
+ from: entityCanonical.get(fromNorm) ?? rel.from,
69
+ to: entityCanonical.get(toNorm) ?? rel.to,
70
+ };
71
+ if (typeNorm)
72
+ canonical.type = typeNorm;
73
+ const confidence = normalizeConfidence(rel.confidence);
74
+ if (confidence !== undefined)
75
+ canonical.confidence = confidence;
76
+ relationIndexByKey.set(key, relations.length);
77
+ relations.push(canonical);
78
+ }
79
+ else {
80
+ const srcs = relSeenKey.get(key);
81
+ if (srcs && !srcs.includes(ref))
82
+ srcs.push(ref);
83
+ const idx = relationIndexByKey.get(key);
84
+ const nextConfidence = normalizeConfidence(rel.confidence);
85
+ if (idx !== undefined && nextConfidence !== undefined) {
86
+ const current = normalizeConfidence(relations[idx]?.confidence) ?? 0;
87
+ if (nextConfidence > current && relations[idx])
88
+ relations[idx].confidence = nextConfidence;
89
+ }
90
+ }
91
+ }
92
+ }
93
+ const relationSources = new Map(relSeenKey);
94
+ return { entities, relations, entitySources, relationSources };
95
+ }