akm-cli 0.8.0-rc1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. package/{.github/CHANGELOG.md → CHANGELOG.md} +191 -3
  2. package/README.md +22 -6
  3. package/SECURITY.md +93 -0
  4. package/dist/cli/config-migrate.js +144 -0
  5. package/dist/cli/config-validate.js +39 -0
  6. package/dist/cli/confirm.js +73 -0
  7. package/dist/cli/parse-args.js +93 -3
  8. package/dist/cli/shared.js +129 -0
  9. package/dist/cli.js +2162 -1258
  10. package/dist/commands/add-cli.js +279 -0
  11. package/dist/commands/agent-dispatch.js +20 -12
  12. package/dist/commands/agent-support.js +11 -5
  13. package/dist/commands/completions.js +3 -0
  14. package/dist/commands/config-cli.js +129 -517
  15. package/dist/commands/consolidate.js +1533 -144
  16. package/dist/commands/curate.js +44 -3
  17. package/dist/commands/db-cli.js +23 -0
  18. package/dist/commands/distill-promotion-policy.js +5 -3
  19. package/dist/commands/distill.js +906 -100
  20. package/dist/commands/env.js +213 -0
  21. package/dist/commands/eval-cases.js +3 -0
  22. package/dist/commands/events.js +3 -0
  23. package/dist/commands/extract-cli.js +127 -0
  24. package/dist/commands/extract-prompt.js +204 -0
  25. package/dist/commands/extract.js +477 -0
  26. package/dist/commands/feedback-cli.js +331 -0
  27. package/dist/commands/graph.js +260 -5
  28. package/dist/commands/health.js +977 -51
  29. package/dist/commands/help/help-accept.md +6 -3
  30. package/dist/commands/help/help-improve.md +36 -8
  31. package/dist/commands/help/help-proposals.md +7 -4
  32. package/dist/commands/help/help-reject.md +5 -2
  33. package/dist/commands/history.js +51 -16
  34. package/dist/commands/improve-auto-accept.js +97 -0
  35. package/dist/commands/improve-cli.js +236 -0
  36. package/dist/commands/improve-profiles.js +184 -0
  37. package/dist/commands/improve-result-file.js +167 -0
  38. package/dist/commands/improve.js +1725 -332
  39. package/dist/commands/info.js +3 -0
  40. package/dist/commands/init.js +49 -1
  41. package/dist/commands/installed-stashes.js +6 -23
  42. package/dist/commands/knowledge.js +3 -0
  43. package/dist/commands/lint/agent-linter.js +3 -0
  44. package/dist/commands/lint/base-linter.js +233 -5
  45. package/dist/commands/lint/command-linter.js +3 -0
  46. package/dist/commands/lint/default-linter.js +3 -0
  47. package/dist/commands/lint/env-key-rules.js +154 -0
  48. package/dist/commands/lint/index.js +92 -3
  49. package/dist/commands/lint/knowledge-linter.js +3 -0
  50. package/dist/commands/lint/markdown-insertion.js +343 -0
  51. package/dist/commands/lint/memory-linter.js +3 -0
  52. package/dist/commands/lint/registry.js +3 -0
  53. package/dist/commands/lint/skill-linter.js +3 -0
  54. package/dist/commands/lint/task-linter.js +15 -12
  55. package/dist/commands/lint/types.js +3 -0
  56. package/dist/commands/lint/workflow-linter.js +3 -0
  57. package/dist/commands/lint.js +3 -0
  58. package/dist/commands/migration-help.js +5 -2
  59. package/dist/commands/proposal-drain-policies.js +128 -0
  60. package/dist/commands/proposal-drain.js +477 -0
  61. package/dist/commands/proposal.js +60 -6
  62. package/dist/commands/propose.js +24 -19
  63. package/dist/commands/reflect.js +1004 -94
  64. package/dist/commands/registry-cli.js +150 -0
  65. package/dist/commands/registry-search.js +3 -0
  66. package/dist/commands/remember-cli.js +257 -0
  67. package/dist/commands/remember.js +15 -6
  68. package/dist/commands/schema-repair.js +88 -15
  69. package/dist/commands/search.js +99 -14
  70. package/dist/commands/secret.js +173 -0
  71. package/dist/commands/self-update.js +3 -0
  72. package/dist/commands/show.js +32 -13
  73. package/dist/commands/source-add.js +7 -35
  74. package/dist/commands/source-clone.js +3 -0
  75. package/dist/commands/source-manage.js +3 -0
  76. package/dist/commands/tasks.js +161 -95
  77. package/dist/commands/url-checker.js +3 -0
  78. package/dist/core/action-contributors.js +3 -0
  79. package/dist/core/asset-ref.js +17 -2
  80. package/dist/core/asset-registry.js +9 -2
  81. package/dist/core/asset-serialize.js +88 -0
  82. package/dist/core/asset-spec.js +61 -5
  83. package/dist/core/common.js +93 -5
  84. package/dist/core/concurrent.js +3 -0
  85. package/dist/core/config-io.js +347 -0
  86. package/dist/core/config-migration.js +622 -0
  87. package/dist/core/config-schema.js +558 -0
  88. package/dist/core/config-sources.js +108 -0
  89. package/dist/core/config-types.js +4 -0
  90. package/dist/core/config-walker.js +337 -0
  91. package/dist/core/config.js +366 -1077
  92. package/dist/core/errors.js +42 -20
  93. package/dist/core/events.js +31 -25
  94. package/dist/core/file-lock.js +104 -0
  95. package/dist/core/frontmatter.js +75 -10
  96. package/dist/core/lesson-lint.js +3 -0
  97. package/dist/core/markdown.js +3 -0
  98. package/dist/core/memory-belief.js +62 -0
  99. package/dist/core/memory-contradiction-detect.js +274 -0
  100. package/dist/core/memory-improve.js +142 -14
  101. package/dist/core/parse.js +3 -0
  102. package/dist/core/paths.js +218 -50
  103. package/dist/core/proposal-quality-validators.js +380 -0
  104. package/dist/core/proposal-validators.js +11 -3
  105. package/dist/core/proposals.js +464 -5
  106. package/dist/core/state-db.js +349 -56
  107. package/dist/core/text-truncation.js +107 -0
  108. package/dist/core/time.js +3 -0
  109. package/dist/core/tty.js +59 -0
  110. package/dist/core/warn.js +7 -2
  111. package/dist/core/write-source.js +12 -0
  112. package/dist/indexer/db-backup.js +391 -0
  113. package/dist/indexer/db-search.js +136 -28
  114. package/dist/indexer/db.js +662 -166
  115. package/dist/indexer/ensure-index.js +3 -0
  116. package/dist/indexer/file-context.js +3 -0
  117. package/dist/indexer/graph-boost.js +162 -40
  118. package/dist/indexer/graph-db.js +241 -51
  119. package/dist/indexer/graph-dedup.js +3 -7
  120. package/dist/indexer/graph-extraction.js +242 -149
  121. package/dist/indexer/index-context.js +3 -9
  122. package/dist/indexer/indexer.js +84 -14
  123. package/dist/indexer/llm-cache.js +24 -19
  124. package/dist/indexer/manifest.js +3 -0
  125. package/dist/indexer/matchers.js +184 -11
  126. package/dist/indexer/memory-inference.js +94 -50
  127. package/dist/indexer/metadata-contributors.js +3 -0
  128. package/dist/indexer/metadata.js +114 -48
  129. package/dist/indexer/path-resolver.js +3 -0
  130. package/dist/indexer/project-context.js +192 -0
  131. package/dist/indexer/ranking-contributors.js +134 -7
  132. package/dist/indexer/ranking.js +8 -1
  133. package/dist/indexer/search-fields.js +5 -9
  134. package/dist/indexer/search-hit-enrichers.js +91 -2
  135. package/dist/indexer/search-source.js +20 -1
  136. package/dist/indexer/semantic-status.js +4 -1
  137. package/dist/indexer/staleness-detect.js +447 -0
  138. package/dist/indexer/usage-events.js +12 -9
  139. package/dist/indexer/walker.js +3 -0
  140. package/dist/integrations/agent/builders.js +135 -0
  141. package/dist/integrations/agent/config.js +121 -401
  142. package/dist/integrations/agent/detect.js +3 -0
  143. package/dist/integrations/agent/index.js +6 -14
  144. package/dist/integrations/agent/model-aliases.js +55 -0
  145. package/dist/integrations/agent/profiles.js +3 -0
  146. package/dist/integrations/agent/prompts.js +137 -8
  147. package/dist/integrations/agent/runner.js +208 -0
  148. package/dist/integrations/agent/sdk-runner.js +8 -2
  149. package/dist/integrations/agent/spawn.js +54 -14
  150. package/dist/integrations/github.js +3 -0
  151. package/dist/integrations/lockfile.js +22 -51
  152. package/dist/integrations/session-logs/index.js +4 -0
  153. package/dist/integrations/session-logs/inline-refs.js +35 -0
  154. package/dist/integrations/session-logs/pre-filter.js +152 -0
  155. package/dist/integrations/session-logs/providers/claude-code.js +226 -0
  156. package/dist/integrations/session-logs/providers/opencode.js +231 -25
  157. package/dist/integrations/session-logs/types.js +3 -0
  158. package/dist/llm/call-ai.js +14 -26
  159. package/dist/llm/client.js +16 -2
  160. package/dist/llm/embedder.js +20 -29
  161. package/dist/llm/embedders/cache.js +3 -7
  162. package/dist/llm/embedders/local.js +42 -1
  163. package/dist/llm/embedders/remote.js +20 -8
  164. package/dist/llm/embedders/types.js +3 -7
  165. package/dist/llm/feature-gate.js +92 -56
  166. package/dist/llm/graph-extract.js +401 -30
  167. package/dist/llm/index-passes.js +44 -29
  168. package/dist/llm/memory-infer.js +30 -2
  169. package/dist/llm/metadata-enhance.js +3 -7
  170. package/dist/llm/prompts/extract-session.md +80 -0
  171. package/dist/llm/prompts/graph-extract-user-prompt.md +24 -1
  172. package/dist/output/cli-hints-full.md +60 -32
  173. package/dist/output/cli-hints-short.md +10 -7
  174. package/dist/output/cli-hints.js +5 -2
  175. package/dist/output/context.js +60 -8
  176. package/dist/output/renderers.js +170 -194
  177. package/dist/output/shapes/curate.js +56 -0
  178. package/dist/output/shapes/distill.js +10 -0
  179. package/dist/output/shapes/env-list.js +19 -0
  180. package/dist/output/shapes/events.js +11 -0
  181. package/dist/output/shapes/helpers.js +424 -0
  182. package/dist/output/shapes/history.js +7 -0
  183. package/dist/output/shapes/passthrough.js +105 -0
  184. package/dist/output/shapes/proposal-accept.js +7 -0
  185. package/dist/output/shapes/proposal-diff.js +7 -0
  186. package/dist/output/shapes/proposal-list.js +7 -0
  187. package/dist/output/shapes/proposal-producer.js +11 -0
  188. package/dist/output/shapes/proposal-reject.js +7 -0
  189. package/dist/output/shapes/proposal-show.js +7 -0
  190. package/dist/output/shapes/registry-search.js +6 -0
  191. package/dist/output/shapes/registry.js +30 -0
  192. package/dist/output/shapes/search.js +6 -0
  193. package/dist/output/shapes/secret-list.js +19 -0
  194. package/dist/output/shapes/show.js +6 -0
  195. package/dist/output/shapes/vault-list.js +19 -0
  196. package/dist/output/shapes.js +51 -549
  197. package/dist/output/text/add.js +6 -0
  198. package/dist/output/text/clone.js +6 -0
  199. package/dist/output/text/config.js +6 -0
  200. package/dist/output/text/curate.js +6 -0
  201. package/dist/output/text/distill.js +7 -0
  202. package/dist/output/text/enable-disable.js +7 -0
  203. package/dist/output/text/events.js +10 -0
  204. package/dist/output/text/feedback.js +6 -0
  205. package/dist/output/text/helpers.js +1059 -0
  206. package/dist/output/text/history.js +7 -0
  207. package/dist/output/text/import.js +6 -0
  208. package/dist/output/text/index.js +6 -0
  209. package/dist/output/text/info.js +6 -0
  210. package/dist/output/text/init.js +6 -0
  211. package/dist/output/text/list.js +6 -0
  212. package/dist/output/text/proposal-producer.js +8 -0
  213. package/dist/output/text/proposal.js +12 -0
  214. package/dist/output/text/registry-commands.js +11 -0
  215. package/dist/output/text/registry.js +30 -0
  216. package/dist/output/text/remember.js +6 -0
  217. package/dist/output/text/remove.js +6 -0
  218. package/dist/output/text/save.js +6 -0
  219. package/dist/output/text/search.js +6 -0
  220. package/dist/output/text/show.js +6 -0
  221. package/dist/output/text/update.js +6 -0
  222. package/dist/output/text/upgrade.js +6 -0
  223. package/dist/output/text/vault.js +16 -0
  224. package/dist/output/text/wiki.js +15 -0
  225. package/dist/output/text/workflow.js +14 -0
  226. package/dist/output/text.js +44 -1329
  227. package/dist/registry/build-index.js +3 -0
  228. package/dist/registry/create-provider-registry.js +3 -0
  229. package/dist/registry/factory.js +4 -1
  230. package/dist/registry/origin-resolve.js +3 -0
  231. package/dist/registry/providers/index.js +3 -0
  232. package/dist/registry/providers/skills-sh.js +11 -2
  233. package/dist/registry/providers/static-index.js +10 -1
  234. package/dist/registry/providers/types.js +3 -24
  235. package/dist/registry/resolve.js +11 -16
  236. package/dist/registry/types.js +3 -0
  237. package/dist/scripts/migrate-storage.js +17767 -0
  238. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
  239. package/dist/scripts/migrations/v16-to-v17.js +141 -0
  240. package/dist/setup/detect.js +3 -0
  241. package/dist/setup/ripgrep-install.js +3 -0
  242. package/dist/setup/ripgrep-resolve.js +3 -0
  243. package/dist/setup/setup.js +306 -67
  244. package/dist/setup/steps.js +3 -15
  245. package/dist/sources/include.js +3 -0
  246. package/dist/sources/provider-factory.js +3 -11
  247. package/dist/sources/provider.js +3 -20
  248. package/dist/sources/providers/filesystem.js +19 -23
  249. package/dist/sources/providers/git.js +171 -21
  250. package/dist/sources/providers/index.js +3 -0
  251. package/dist/sources/providers/install-types.js +3 -13
  252. package/dist/sources/providers/npm.js +3 -4
  253. package/dist/sources/providers/provider-utils.js +3 -0
  254. package/dist/sources/providers/sync-from-ref.js +3 -11
  255. package/dist/sources/providers/tar-utils.js +3 -0
  256. package/dist/sources/providers/website.js +18 -22
  257. package/dist/sources/resolve.js +3 -0
  258. package/dist/sources/types.js +3 -0
  259. package/dist/sources/website-ingest.js +3 -0
  260. package/dist/tasks/backends/cron.js +3 -0
  261. package/dist/tasks/backends/exec-utils.js +3 -0
  262. package/dist/tasks/backends/index.js +3 -11
  263. package/dist/tasks/backends/launchd.js +3 -0
  264. package/dist/tasks/backends/schtasks.js +3 -0
  265. package/dist/tasks/parser.js +51 -38
  266. package/dist/tasks/resolveAkmBin.js +3 -0
  267. package/dist/tasks/runner.js +35 -9
  268. package/dist/tasks/schedule.js +20 -1
  269. package/dist/tasks/schema.js +5 -3
  270. package/dist/tasks/validator.js +6 -3
  271. package/dist/version.js +3 -0
  272. package/dist/wiki/wiki-templates.js +3 -0
  273. package/dist/wiki/wiki.js +3 -0
  274. package/dist/workflows/authoring.js +3 -0
  275. package/dist/workflows/cli.js +3 -0
  276. package/dist/workflows/db.js +140 -10
  277. package/dist/workflows/document-cache.js +3 -10
  278. package/dist/workflows/parser.js +3 -0
  279. package/dist/workflows/renderer.js +3 -0
  280. package/dist/workflows/runs.js +18 -1
  281. package/dist/workflows/schema.js +3 -0
  282. package/dist/workflows/scope-key.js +3 -0
  283. package/dist/workflows/validator.js +5 -9
  284. package/docs/README.md +7 -2
  285. package/docs/data-and-telemetry.md +225 -0
  286. package/docs/migration/release-notes/0.7.5.md +2 -2
  287. package/docs/migration/release-notes/0.8.0.md +57 -5
  288. package/docs/migration/v0.7-to-v0.8.md +1378 -0
  289. package/package.json +28 -11
  290. package/.github/LICENSE +0 -374
  291. package/dist/commands/install-audit.js +0 -385
  292. package/dist/commands/vault.js +0 -307
  293. package/dist/indexer/match-contributors.js +0 -141
  294. package/dist/integrations/agent/pipeline.js +0 -39
  295. package/dist/integrations/agent/runners.js +0 -31
@@ -1,48 +1,19 @@
1
- /**
2
- * Graph-extraction pass for `akm index` (#207).
3
- *
4
- * Walks the primary stash for `memory:` and `knowledge:` assets, asks the
5
- * configured LLM to extract entities and relations from each one, and
6
- * persists the result to stash-local SQLite graph tables keyed by stash root.
7
- * The artifact is consumed by the search
8
- * pipeline (see `src/indexer/graph-boost.ts`) as a single boost component
9
- * inside the existing FTS5+boosts loop — there is NO second SearchHit
10
- * scorer and no parallel ranking track.
11
- *
12
- * Disabling — three preconditions must ALL hold for the pass to run:
13
- * 1. `akm.llm` must be configured (no provider = no extraction). When
14
- * absent, `resolveIndexPassLLM("graph", config)` returns `undefined`
15
- * and the pass short-circuits.
16
- * 2. `llm.features.graph_extraction !== false` — the locked v1 spec §14
17
- * feature-flag layer. Set to `false` to block the pass at the
18
- * feature-gate layer (no network call may ever issue).
19
- * 3. `index.graph.llm !== false` — the per-pass opt-out layer (#208).
20
- * Set to `false` to skip just this pass while leaving other passes
21
- * that share the same `llm` block enabled.
22
- * Toggling any one off does NOT delete the existing persisted graph — the
23
- * user keeps the boost component they already have, it just stops
24
- * refreshing.
25
- *
26
- * Locked v1 contract:
27
- * - LLM access is exclusively via `resolveIndexPassLLM("graph", config)`.
28
- * - The graph rows are an indexer artifact, NOT a user-visible
29
- * asset. It does not have an asset ref, does not appear in search
30
- * hits, and is not addressable via `akm show`. Direct `fs.writeFile`
31
- * is therefore the correct primitive — `writeAssetToSource` is
32
- * reserved for asset writes (CLAUDE.md / spec §10 step 5).
33
- */
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
34
4
  import fs from "node:fs";
35
5
  import path from "node:path";
36
6
  import { TYPE_DIRS } from "../core/asset-spec";
37
7
  import { concurrentMap } from "../core/concurrent";
8
+ import { getIndexPassConfig, resolveBatchSize } from "../core/config";
38
9
  import { parseFrontmatter } from "../core/frontmatter";
39
- import { warn } from "../core/warn";
10
+ import { warn, warnVerbose } from "../core/warn";
11
+ import { isProcessEnabled } from "../llm/feature-gate";
40
12
  import * as graphExtract from "../llm/graph-extract";
41
13
  import { resolveIndexPassLLM } from "../llm/index-passes";
42
- import { computeBodyHash, GRAPH_SCHEMA_VERSION, getLlmCacheEntry, upsertLlmCacheEntry } from "./db";
14
+ import { computeBodyHash, GRAPH_SCHEMA_VERSION, getLlmCacheEntriesByRefs, getLlmCacheEntry, upsertLlmCacheEntry, } from "./db";
43
15
  import { loadStoredGraphSnapshot, replaceStoredGraph } from "./graph-db";
44
16
  import { deduplicateGraph } from "./graph-dedup";
45
- import { withLlmCache } from "./llm-cache";
46
17
  import { walkMarkdownFiles } from "./walker";
47
18
  /** Schema version for the persisted artifact — bumps trigger a full rebuild. */
48
19
  export const GRAPH_FILE_SCHEMA_VERSION = GRAPH_SCHEMA_VERSION;
@@ -61,6 +32,13 @@ const EMPTY_RESULT = {
61
32
  totalRelations: 0,
62
33
  written: false,
63
34
  quality: { ...EMPTY_QUALITY },
35
+ telemetry: {
36
+ cacheHits: 0,
37
+ cacheMisses: 0,
38
+ truncationCount: 0,
39
+ failureCount: 0,
40
+ },
41
+ warnings: [],
64
42
  };
65
43
  function roundMetric(value) {
66
44
  return Number(value.toFixed(4));
@@ -90,13 +68,38 @@ const SUPPORTED_GRAPH_EXTRACTION_INCLUDE_TYPES = new Set([
90
68
  "task",
91
69
  "wiki",
92
70
  ]);
71
+ const GRAPH_CACHE_VARIANT_PREFIX = "graph-extraction";
93
72
  function normalizeConfidence(raw) {
94
73
  if (typeof raw !== "number" || !Number.isFinite(raw))
95
74
  return undefined;
96
75
  return Math.max(0, Math.min(1, raw));
97
76
  }
77
+ function getGraphExtractorId(config) {
78
+ const fingerprint = computeBodyHash(JSON.stringify({
79
+ promptVersion: graphExtract.GRAPH_EXTRACT_PROMPT_VERSION,
80
+ model: config.model,
81
+ batchSize: config.batchSize,
82
+ includeTypes: config.includeTypes,
83
+ maxChunkBodyChars: 1600,
84
+ maxBatchBodyChars: 1600,
85
+ })).slice(0, 16);
86
+ return `${GRAPH_CACHE_VARIANT_PREFIX}:${graphExtract.GRAPH_EXTRACT_PROMPT_VERSION}:${config.model}:${fingerprint}`;
87
+ }
88
+ function buildLowQualityWarnings(quality, telemetry) {
89
+ const warnings = [];
90
+ if (quality.consideredFiles >= 5 && quality.extractionCoverage < 0.3) {
91
+ warnings.push(`Low graph extraction coverage (${quality.extractedFiles}/${quality.consideredFiles}, ${quality.extractionCoverage}).`);
92
+ }
93
+ if (quality.entityCount >= 8 && quality.relationCount === 0) {
94
+ warnings.push("Graph extraction produced many entities but no relations.");
95
+ }
96
+ if (telemetry.failureCount > 0) {
97
+ warnings.push(`Graph extraction encountered ${telemetry.failureCount} failed file extraction(s).`);
98
+ }
99
+ return warnings;
100
+ }
98
101
  export function getGraphExtractionIncludeTypes(config) {
99
- const configured = config.index?.graph?.graphExtractionIncludeTypes;
102
+ const configured = getIndexPassConfig(config.index, "graph")?.graphExtractionIncludeTypes;
100
103
  if (!configured || configured.length === 0)
101
104
  return [...DEFAULT_GRAPH_EXTRACTION_INCLUDE_TYPES];
102
105
  const out = [];
@@ -139,6 +142,8 @@ function validateGraphCacheShape(raw) {
139
142
  entities: obj.entities,
140
143
  relations: Array.isArray(obj.relations) ? obj.relations : [],
141
144
  confidence: normalizeConfidence(obj.confidence),
145
+ ...(typeof obj.status === "string" ? { status: obj.status } : {}),
146
+ ...(typeof obj.reason === "string" ? { reason: obj.reason } : {}),
142
147
  };
143
148
  }
144
149
  function loadGraphFile(stashRoot, db) {
@@ -159,9 +164,15 @@ function loadGraphFile(stashRoot, db) {
159
164
  entities: cacheShape.entities,
160
165
  relations: cacheShape.relations,
161
166
  confidence: normalizeConfidence(node.confidence),
167
+ ...(node.status ? { status: node.status } : {}),
168
+ ...(node.reason ? { reason: node.reason } : {}),
169
+ ...(node.extractionRunId ? { extractionRunId: node.extractionRunId } : {}),
162
170
  });
163
171
  }
164
- return { files: out };
172
+ return {
173
+ files: out,
174
+ ...(graph.telemetry ? { telemetry: graph.telemetry } : {}),
175
+ };
165
176
  }
166
177
  function mergeGraphNodes(previousNodes, refreshedNodes, candidatePaths) {
167
178
  if (!candidatePaths)
@@ -194,6 +205,8 @@ function reuseGraphNode(previousNodes, candidate, bodyHash) {
194
205
  entities: validated.entities,
195
206
  relations: validated.relations,
196
207
  confidence: normalizeConfidence(node.confidence),
208
+ ...(node.status ? { status: node.status } : {}),
209
+ ...(node.reason ? { reason: node.reason } : {}),
197
210
  };
198
211
  }
199
212
  /**
@@ -201,12 +214,12 @@ function reuseGraphNode(previousNodes, candidate, bodyHash) {
201
214
  *
202
215
  * Three preconditions — ALL must hold for the pass to run:
203
216
  *
204
- * 1. **Provider configured** — `akm.llm` must be present. Without a
217
+ * 1. **Provider configured** — an LLM profile must be selectable. Without a
205
218
  * configured provider, `resolveIndexPassLLM("graph", config)` returns
206
219
  * `undefined` (the pass cannot run because there is no model to call).
207
- * 2. **Feature gate** — `llm.features.graph_extraction` (defaults to
208
- * `true`). When `false`, no network call may issue regardless of
209
- * per-pass settings. This is the locked spec-§14 gate.
220
+ * 2. **Feature gate** — `profiles.improve.default.processes.graphExtraction.enabled`
221
+ * (defaults to `true`). When `false`, no network call may issue regardless
222
+ * of per-pass settings.
210
223
  * 3. **Per-pass gate** — `index.graph.llm` (defaults to `true`). When
211
224
  * `false`, the indexer simply skips this pass for the current run.
212
225
  *
@@ -219,25 +232,38 @@ function reuseGraphNode(previousNodes, candidate, bodyHash) {
219
232
  * preserves existing behaviour, fully opt-in).
220
233
  */
221
234
  export async function runGraphExtractionPass(config, sources, signal, db, reEnrich, onProgress, options = {}) {
222
- // Gate 1 — locked feature flag (§14). Defaults to enabled; only an
223
- // explicit `false` disables the pass entirely.
224
- if (config.llm?.features?.graph_extraction === false)
235
+ // Gate 1 — feature gate via isProcessEnabled, which reads the 0.8.0 path
236
+ // (profiles.improve.default.processes.graphExtraction.enabled). Defaults to
237
+ // enabled when the key is absent.
238
+ if (!isProcessEnabled("index", "graph_extraction", config))
225
239
  return { ...EMPTY_RESULT };
226
240
  // Gate 2 — per-pass opt-out (#208). Returns the resolved llm config or
227
241
  // `undefined` when the pass should not run.
228
242
  const llmConfig = resolveIndexPassLLM("graph", config);
229
- if (!llmConfig)
243
+ if (!llmConfig) {
244
+ const reason = getIndexPassConfig(config.index, "graph")?.llm === false
245
+ ? "index.graph.llm is false"
246
+ : "no default LLM profile is configured";
247
+ warnVerbose(`graph extraction: skipped because ${reason}.`);
230
248
  return { ...EMPTY_RESULT };
249
+ }
231
250
  // The pass only writes to the primary (working) stash. Read-only caches
232
251
  // (git, npm, website) are deliberately untouched — the graph artifact for
233
252
  // those sources would be clobbered by the next sync().
234
253
  const primary = sources[0];
235
- if (!primary)
254
+ if (!primary) {
255
+ warnVerbose("graph extraction: skipped because no primary stash source is available.");
236
256
  return { ...EMPTY_RESULT };
237
- const eligible = collectEligibleFiles(primary.path, getGraphExtractionIncludeTypes(config)).filter((candidate) => !options.candidatePaths || options.candidatePaths.has(candidate.absPath));
257
+ }
258
+ const includeTypes = getGraphExtractionIncludeTypes(config);
259
+ const eligible = collectEligibleFiles(primary.path, includeTypes).filter((candidate) => !options.candidatePaths || options.candidatePaths.has(candidate.absPath));
238
260
  const considered = eligible.length;
239
- if (considered === 0)
261
+ if (considered === 0) {
262
+ const scoped = options.candidatePaths ? ` matching ${options.candidatePaths.size} candidate path(s)` : "";
263
+ warnVerbose(`graph extraction: skipped because no eligible files${scoped} were found under ${primary.path}. ` +
264
+ `includeTypes=${includeTypes.join(",")}`);
240
265
  return { ...EMPTY_RESULT };
266
+ }
241
267
  const previousGraph = loadGraphFile(primary.path, db);
242
268
  const previousNodes = new Map(previousGraph.files.map((node) => [node.path, node]));
243
269
  const nodes = [];
@@ -246,9 +272,58 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
246
272
  let processed = 0;
247
273
  let extracted = 0;
248
274
  onProgress?.({ processed, total: considered, extracted, totalEntities, totalRelations });
249
- // Read the configured batch size. Default of 1 preserves the existing
250
- // per-asset behaviour and is fully opt-in.
251
- const batchSize = config.index?.graph?.graphExtractionBatchSize ?? 1;
275
+ const reportProgress = (currentPath, result) => {
276
+ processed += 1;
277
+ if (result) {
278
+ if (result.entities.length > 0)
279
+ extracted += 1;
280
+ totalEntities += result.entities.length;
281
+ totalRelations += result.relations.length;
282
+ }
283
+ onProgress?.({
284
+ processed,
285
+ total: considered,
286
+ extracted,
287
+ totalEntities,
288
+ totalRelations,
289
+ currentPath,
290
+ });
291
+ };
292
+ // Resolve the effective batch size. Falls back to
293
+ // DEFAULT_GRAPH_EXTRACTION_BATCH_SIZE (4) when unset, and clamps against
294
+ // `llm.contextLength` if the model's context window is configured.
295
+ const batchSize = resolveBatchSize(getIndexPassConfig(config.index, "graph")?.graphExtractionBatchSize, llmConfig.contextLength);
296
+ const extractionRunId = crypto.randomUUID();
297
+ const extractorId = getGraphExtractorId({ model: llmConfig.model, batchSize, includeTypes });
298
+ const cacheVariant = extractorId;
299
+ const telemetry = {
300
+ extractorId,
301
+ extractionRunId,
302
+ model: llmConfig.model,
303
+ promptVersion: graphExtract.GRAPH_EXTRACT_PROMPT_VERSION,
304
+ batchSize,
305
+ cacheHits: 0,
306
+ cacheMisses: 0,
307
+ truncationCount: 0,
308
+ failureCount: 0,
309
+ };
310
+ const canReusePreviousGraph = previousGraph.telemetry?.extractorId === extractorId;
311
+ const runtimeTelemetry = {
312
+ truncationCount: 0,
313
+ failureCount: 0,
314
+ filteredGenericEntities: 0,
315
+ filteredInvalidRelations: 0,
316
+ filteredLowConfidenceRelations: 0,
317
+ contextBatchRetries: 0,
318
+ nonArrayBatchFailures: 0,
319
+ };
320
+ const batchState = {
321
+ batchingDisabled: false,
322
+ nonArrayBatchFailures: 0,
323
+ };
324
+ warnVerbose(`graph extraction: starting for ${considered} eligible file(s) under ${primary.path}; ` +
325
+ `includeTypes=${includeTypes.join(",")}, batchSize=${batchSize}, concurrency=${llmConfig.concurrency ?? 1}, ` +
326
+ `reEnrich=${reEnrich === true}, candidateScoped=${options.candidatePaths ? "true" : "false"}.`);
252
327
  const onFallback = (evt) => {
253
328
  warn(`[akm] LLM fallback for ${evt.feature}: ${evt.reason}`);
254
329
  };
@@ -256,49 +331,67 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
256
331
  if (batchSize <= 1) {
257
332
  // ── Original per-asset path (with incremental cache) ─────────────────
258
333
  extractionResults = await concurrentMap(eligible, async (candidate) => {
259
- if (signal?.aborted)
334
+ if (signal?.aborted) {
335
+ reportProgress(candidate.absPath, undefined);
260
336
  return undefined;
337
+ }
261
338
  const bodyHash = computeBodyHash(candidate.body);
262
339
  let cached;
263
340
  if (db) {
264
- // withLlmCache handles hash computation, cache lookup, LLM call, and cache write.
265
- // When cache misses and this run is not forced, attempt graph-node reuse before LLM.
266
- cached = await withLlmCache(db, candidate.absPath, candidate.body, reEnrich ?? false, async () => {
267
- if (!(reEnrich ?? false)) {
268
- const reused = reuseGraphNode(previousNodes, candidate, bodyHash);
269
- if (reused)
270
- return reused;
341
+ if (!(reEnrich ?? false)) {
342
+ const cacheEntry = getLlmCacheEntry(db, candidate.absPath, bodyHash, cacheVariant);
343
+ if (cacheEntry) {
344
+ try {
345
+ cached = validateGraphCacheShape(JSON.parse(cacheEntry.resultJson));
346
+ if (cached)
347
+ telemetry.cacheHits += 1;
348
+ }
349
+ catch {
350
+ cached = undefined;
351
+ }
271
352
  }
272
- const extraction = await graphExtract.extractGraphFromBody(llmConfig, candidate.body, signal, config, onFallback);
273
- // Cache empty results too so we skip on next run.
274
- return {
275
- entities: extraction.entities,
276
- relations: extraction.relations,
277
- ...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
278
- };
279
- }, validateGraphCacheShape);
353
+ }
280
354
  }
281
355
  else if (!(reEnrich ?? false)) {
356
+ // No DB — best-effort reuse from the previous in-memory graph.
282
357
  cached = reuseGraphNode(previousNodes, candidate, bodyHash);
283
358
  }
359
+ if (!cached && !(reEnrich ?? false) && canReusePreviousGraph) {
360
+ const reused = reuseGraphNode(previousNodes, candidate, bodyHash);
361
+ if (reused) {
362
+ cached = reused;
363
+ if (db) {
364
+ upsertLlmCacheEntry(db, candidate.absPath, bodyHash, JSON.stringify(reused), cacheVariant);
365
+ }
366
+ telemetry.cacheHits += 1;
367
+ }
368
+ }
284
369
  if (!cached) {
285
- const extraction = await graphExtract.extractGraphFromBody(llmConfig, candidate.body, signal, config, onFallback);
370
+ telemetry.cacheMisses += 1;
371
+ const extraction = await graphExtract.extractGraphFromBody(llmConfig, candidate.body, signal, config, onFallback, { batchState, telemetry: runtimeTelemetry });
286
372
  cached = {
287
373
  entities: extraction.entities,
288
374
  relations: extraction.relations,
289
375
  ...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
376
+ ...(extraction.status ? { status: extraction.status } : {}),
377
+ ...(extraction.reason ? { reason: extraction.reason } : {}),
290
378
  };
379
+ if (db) {
380
+ upsertLlmCacheEntry(db, candidate.absPath, bodyHash, JSON.stringify(cached), cacheVariant);
381
+ }
291
382
  }
292
- if (!cached || cached.entities.length === 0)
293
- return undefined;
294
- return {
383
+ const result = {
295
384
  absPath: candidate.absPath,
296
385
  type: candidate.type,
297
386
  bodyHash,
298
387
  entities: cached.entities,
299
388
  relations: cached.relations,
300
389
  ...(cached.confidence !== undefined ? { confidence: cached.confidence } : {}),
390
+ ...(cached.status ? { status: cached.status } : {}),
391
+ ...(cached.reason ? { reason: cached.reason } : {}),
301
392
  };
393
+ reportProgress(candidate.absPath, result);
394
+ return result;
302
395
  },
303
396
  // Default concurrency of 4 for cloud APIs. Set `llm.concurrency: 1`
304
397
  // in config.json for local model servers (LM Studio, Ollama).
@@ -317,30 +410,44 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
317
410
  if (signal?.aborted)
318
411
  return;
319
412
  const chunk = eligible.slice(start, start + batchSize);
413
+ const reportChunkProgress = () => {
414
+ for (let j = 0; j < chunk.length; j++) {
415
+ const candidate = chunk[j];
416
+ if (!candidate)
417
+ continue;
418
+ reportProgress(candidate.absPath, rawResults[start + j]);
419
+ }
420
+ };
320
421
  // Pre-resolve cache hits for this chunk; track which positions need LLM.
321
422
  const bodyHashes = chunk.map((c) => computeBodyHash(c.body));
423
+ // Batch the cache lookup: one IN(...) query for the whole chunk instead
424
+ // of N individual SELECTs. The map covers every ref in this chunk that
425
+ // has any cached row; the per-position hash check happens below.
426
+ const chunkCache = db && !reEnrich
427
+ ? getLlmCacheEntriesByRefs(db, chunk.map((c) => c.absPath), cacheVariant)
428
+ : new Map();
322
429
  const needsLlm = chunk.map((c, j) => {
323
430
  if (!db || reEnrich)
324
431
  return true;
325
- const cached = getLlmCacheEntry(db, c.absPath, bodyHashes[j] ?? "");
326
- if (!cached)
432
+ const cached = chunkCache.get(c.absPath);
433
+ // Hash mismatch → body changed, treat as cache miss.
434
+ if (!cached || cached.bodyHash !== (bodyHashes[j] ?? ""))
327
435
  return true;
328
436
  try {
329
437
  const parsed = validateGraphCacheShape(JSON.parse(cached.resultJson));
330
438
  if (!parsed)
331
439
  return true;
332
- const entities = parsed.entities;
333
- rawResults[start + j] =
334
- entities.length > 0
335
- ? {
336
- absPath: c.absPath,
337
- type: c.type,
338
- bodyHash: bodyHashes[j] ?? "",
339
- entities,
340
- relations: parsed.relations,
341
- ...(parsed.confidence !== undefined ? { confidence: parsed.confidence } : {}),
342
- }
343
- : undefined;
440
+ telemetry.cacheHits += 1;
441
+ rawResults[start + j] = {
442
+ absPath: c.absPath,
443
+ type: c.type,
444
+ bodyHash: bodyHashes[j] ?? "",
445
+ entities: parsed.entities,
446
+ relations: parsed.relations,
447
+ ...(parsed.confidence !== undefined ? { confidence: parsed.confidence } : {}),
448
+ ...(parsed.status ? { status: parsed.status } : {}),
449
+ ...(parsed.reason ? { reason: parsed.reason } : {}),
450
+ };
344
451
  return false;
345
452
  }
346
453
  catch {
@@ -349,7 +456,7 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
349
456
  });
350
457
  // Secondary incremental path: reuse previous graph nodes when the body hash
351
458
  // still matches and DB cache is missing/stale/unavailable.
352
- if (!(reEnrich ?? false)) {
459
+ if (!(reEnrich ?? false) && canReusePreviousGraph) {
353
460
  for (let j = 0; j < chunk.length; j++) {
354
461
  if (!needsLlm[j])
355
462
  continue;
@@ -359,30 +466,33 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
359
466
  const reused = reuseGraphNode(previousNodes, candidate, bodyHashes[j] ?? "");
360
467
  if (!reused)
361
468
  continue;
362
- rawResults[start + j] =
363
- reused.entities.length > 0
364
- ? {
365
- absPath: candidate.absPath,
366
- type: candidate.type,
367
- bodyHash: bodyHashes[j] ?? "",
368
- entities: reused.entities,
369
- relations: reused.relations,
370
- ...(reused.confidence !== undefined ? { confidence: reused.confidence } : {}),
371
- }
372
- : undefined;
469
+ telemetry.cacheHits += 1;
470
+ rawResults[start + j] = {
471
+ absPath: candidate.absPath,
472
+ type: candidate.type,
473
+ bodyHash: bodyHashes[j] ?? "",
474
+ entities: reused.entities,
475
+ relations: reused.relations,
476
+ ...(reused.confidence !== undefined ? { confidence: reused.confidence } : {}),
477
+ ...(reused.status ? { status: reused.status } : {}),
478
+ ...(reused.reason ? { reason: reused.reason } : {}),
479
+ };
373
480
  if (db) {
374
- upsertLlmCacheEntry(db, candidate.absPath, bodyHashes[j] ?? "", JSON.stringify(reused));
481
+ upsertLlmCacheEntry(db, candidate.absPath, bodyHashes[j] ?? "", JSON.stringify(reused), cacheVariant);
375
482
  }
376
483
  needsLlm[j] = false;
377
484
  }
378
485
  }
379
486
  const uncachedChunk = chunk.filter((_, j) => needsLlm[j]);
380
- if (uncachedChunk.length === 0)
487
+ if (uncachedChunk.length === 0) {
488
+ reportChunkProgress();
381
489
  return;
490
+ }
382
491
  const bodies = uncachedChunk.map((c) => c.body);
492
+ telemetry.cacheMisses += uncachedChunk.length;
383
493
  // extractGraphFromBodies always returns an array of the same length
384
494
  // as bodies (it falls back per-asset for any missing indices).
385
- const batchExtractions = await graphExtract.extractGraphFromBodies(llmConfig, bodies, signal, config, onFallback);
495
+ const batchExtractions = await graphExtract.extractGraphFromBodies(llmConfig, bodies, signal, config, onFallback, { batchState, telemetry: runtimeTelemetry });
386
496
  // Map LLM results back to original positions and write cache entries.
387
497
  let llmIdx = 0;
388
498
  for (let j = 0; j < chunk.length; j++) {
@@ -397,22 +507,22 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
397
507
  entities: extraction.entities,
398
508
  relations: extraction.relations,
399
509
  ...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
400
- }));
401
- }
402
- if (extraction.entities.length === 0) {
403
- rawResults[start + j] = undefined;
404
- }
405
- else {
406
- rawResults[start + j] = {
407
- absPath: candidate.absPath,
408
- type: candidate.type,
409
- bodyHash: bodyHashes[j] ?? "",
410
- entities: extraction.entities,
411
- relations: extraction.relations,
412
- ...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
413
- };
510
+ ...(extraction.status ? { status: extraction.status } : {}),
511
+ ...(extraction.reason ? { reason: extraction.reason } : {}),
512
+ }), cacheVariant);
414
513
  }
514
+ rawResults[start + j] = {
515
+ absPath: candidate.absPath,
516
+ type: candidate.type,
517
+ bodyHash: bodyHashes[j] ?? "",
518
+ entities: extraction.entities,
519
+ relations: extraction.relations,
520
+ ...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
521
+ ...(extraction.status ? { status: extraction.status } : {}),
522
+ ...(extraction.reason ? { reason: extraction.reason } : {}),
523
+ };
415
524
  }
525
+ reportChunkProgress();
416
526
  }, llmConfig.concurrency ?? 1);
417
527
  extractionResults = rawResults;
418
528
  }
@@ -435,45 +545,22 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
435
545
  ...(normalizeConfidence(result.confidence) !== undefined
436
546
  ? { confidence: normalizeConfidence(result.confidence) }
437
547
  : {}),
438
- });
439
- }
440
- processed = 0;
441
- extracted = 0;
442
- totalEntities = 0;
443
- totalRelations = 0;
444
- for (let i = 0; i < extractionResults.length; i++) {
445
- const result = extractionResults[i];
446
- processed += 1;
447
- if (result) {
448
- extracted += 1;
449
- totalEntities += result.entities.length;
450
- totalRelations += result.relations.length;
451
- }
452
- onProgress?.({
453
- processed,
454
- total: considered,
455
- extracted,
456
- totalEntities,
457
- totalRelations,
458
- currentPath: eligible[i]?.absPath,
548
+ status: result.status ?? (result.entities.length > 0 ? "extracted" : "empty"),
549
+ reason: result.reason ?? (result.entities.length > 0 ? "none" : "no_graph_content"),
550
+ extractionRunId,
459
551
  });
460
552
  }
461
553
  const mergedNodes = mergeGraphNodes(previousGraph.files, nodes, options.candidatePaths);
462
554
  const assetRefs = mergedNodes.map((node) => node.path);
463
555
  const deduped = deduplicateGraph(mergedNodes.map((node) => ({ entities: node.entities, relations: node.relations })), assetRefs);
464
- if (mergedNodes.length === 0) {
465
- warn("graph extraction: all extractions failed or returned no entities; leaving existing graph rows untouched.");
466
- return {
467
- considered,
468
- extracted: 0,
469
- totalEntities: 0,
470
- totalRelations: 0,
471
- written: false,
472
- quality: computeGraphQualityTelemetry(considered, 0, 0, 0),
473
- };
474
- }
475
- const qualityConsidered = options.candidatePaths ? mergedNodes.length : considered;
476
- const quality = computeGraphQualityTelemetry(qualityConsidered, mergedNodes.length, deduped.entities.length, deduped.relations.length);
556
+ telemetry.truncationCount = runtimeTelemetry.truncationCount ?? 0;
557
+ telemetry.failureCount = runtimeTelemetry.failureCount ?? 0;
558
+ const qualityConsidered = mergedNodes.length;
559
+ const qualityExtracted = mergedNodes.filter((node) => node.status === "extracted" && node.entities.length > 0).length;
560
+ const quality = computeGraphQualityTelemetry(qualityConsidered, qualityExtracted, deduped.entities.length, deduped.relations.length);
561
+ const warnings = buildLowQualityWarnings(quality, telemetry);
562
+ for (const warning of warnings)
563
+ warnVerbose(`graph extraction quality: ${warning}`);
477
564
  const graph = {
478
565
  schemaVersion: GRAPH_FILE_SCHEMA_VERSION,
479
566
  generatedAt: new Date().toISOString(),
@@ -482,8 +569,12 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
482
569
  entities: deduped.entities,
483
570
  relations: deduped.relations,
484
571
  quality,
572
+ telemetry,
485
573
  };
486
574
  const written = writeGraphFile(primary.path, graph, db);
575
+ warnVerbose(`graph extraction: ${written ? "persisted" : "did not persist"} graph for ${primary.path}; ` +
576
+ `considered=${considered}, extractedThisRun=${extracted}, storedFiles=${mergedNodes.length}, ` +
577
+ `entities=${deduped.entities.length}, relations=${deduped.relations.length}, coverage=${quality.extractionCoverage}.`);
487
578
  return {
488
579
  considered,
489
580
  extracted,
@@ -491,6 +582,8 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
491
582
  totalRelations,
492
583
  written,
493
584
  quality,
585
+ telemetry,
586
+ warnings,
494
587
  };
495
588
  }
496
589
  /**
@@ -1,10 +1,4 @@
1
- /**
2
- * IndexRunContext shared state threaded through every phase of `akmIndex()`.
3
- *
4
- * Extracted from `src/indexer/indexer.ts` so each named phase function
5
- * (`runSourceCachePhase`, `runMemoryInferencePhase`, …) can receive a single
6
- * typed argument rather than a long positional parameter list. The context is
7
- * assembled once at the top of `akmIndex()` and passed to each phase in
8
- * sequence.
9
- */
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
10
4
  export {};