akm-cli 0.6.1 → 0.7.0-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/dist/{cli.js → src/cli.js} +620 -26
- package/dist/{commands → src/commands}/config-cli.js +5 -4
- package/dist/src/commands/distill.js +283 -0
- package/dist/src/commands/events.js +108 -0
- package/dist/src/commands/history.js +120 -0
- package/dist/{commands → src/commands}/installed-stashes.js +1 -1
- package/dist/src/commands/proposal.js +119 -0
- package/dist/src/commands/propose.js +171 -0
- package/dist/src/commands/reflect.js +193 -0
- package/dist/{commands → src/commands}/registry-search.js +2 -1
- package/dist/{commands → src/commands}/remember.js +12 -0
- package/dist/{commands → src/commands}/search.js +74 -1
- package/dist/{commands → src/commands}/self-update.js +4 -3
- package/dist/{commands → src/commands}/show.js +44 -0
- package/dist/{core → src/core}/asset-ref.js +5 -5
- package/dist/{core → src/core}/asset-spec.js +12 -0
- package/dist/{core → src/core}/common.js +1 -1
- package/dist/{core → src/core}/config.js +175 -121
- package/dist/{core → src/core}/errors.js +4 -0
- package/dist/src/core/events.js +239 -0
- package/dist/src/core/lesson-lint.js +86 -0
- package/dist/src/core/proposals.js +406 -0
- package/dist/src/core/warn.js +72 -0
- package/dist/{core → src/core}/write-source.js +80 -5
- package/dist/{indexer → src/indexer}/db-search.js +113 -24
- package/dist/{indexer → src/indexer}/db.js +76 -23
- package/dist/{indexer → src/indexer}/file-context.js +0 -3
- package/dist/src/indexer/graph-boost.js +179 -0
- package/dist/src/indexer/graph-extraction.js +212 -0
- package/dist/{indexer → src/indexer}/indexer.js +73 -6
- package/dist/src/indexer/memory-inference.js +263 -0
- package/dist/{indexer → src/indexer}/metadata.js +111 -3
- package/dist/src/integrations/agent/config.js +292 -0
- package/dist/src/integrations/agent/detect.js +94 -0
- package/dist/src/integrations/agent/index.js +17 -0
- package/dist/src/integrations/agent/profiles.js +65 -0
- package/dist/src/integrations/agent/prompts.js +167 -0
- package/dist/src/integrations/agent/spawn.js +221 -0
- package/dist/{integrations → src/integrations}/lockfile.js +0 -26
- package/dist/{llm → src/llm}/client.js +33 -2
- package/dist/src/llm/feature-gate.js +108 -0
- package/dist/src/llm/graph-extract.js +107 -0
- package/dist/src/llm/index-passes.js +35 -0
- package/dist/src/llm/memory-infer.js +86 -0
- package/dist/{output → src/output}/renderers.js +60 -1
- package/dist/src/output/shapes.js +516 -0
- package/dist/{output → src/output}/text.js +447 -4
- package/dist/{registry → src/registry}/build-index.js +14 -4
- package/dist/{registry → src/registry}/factory.js +0 -8
- package/dist/{registry → src/registry}/providers/static-index.js +3 -2
- package/dist/{registry → src/registry}/resolve.js +68 -2
- package/dist/{setup → src/setup}/setup.js +43 -5
- package/dist/{sources → src/sources}/providers/git.js +7 -15
- package/dist/tests/add-website-source.test.js +119 -0
- package/dist/tests/agent/agent-config-loader.test.js +70 -0
- package/dist/tests/agent/agent-config.test.js +221 -0
- package/dist/tests/agent/agent-detect.test.js +100 -0
- package/dist/tests/agent/agent-spawn.test.js +234 -0
- package/dist/tests/agent-output.test.js +186 -0
- package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +103 -0
- package/dist/tests/architecture/agent-spawn-seam.test.js +193 -0
- package/dist/tests/architecture/llm-stateless-seam.test.js +112 -0
- package/dist/tests/asset-ref.test.js +192 -0
- package/dist/tests/asset-registry.test.js +103 -0
- package/dist/tests/asset-spec.test.js +241 -0
- package/dist/tests/bench/attribution.test.js +995 -0
- package/dist/tests/bench/cleanup-sigint.test.js +83 -0
- package/dist/tests/bench/cleanup.js +203 -0
- package/dist/tests/bench/cleanup.test.js +166 -0
- package/dist/tests/bench/cli.js +683 -0
- package/dist/tests/bench/cli.test.js +177 -0
- package/dist/tests/bench/compare.test.js +556 -0
- package/dist/tests/bench/corpus.js +314 -0
- package/dist/tests/bench/corpus.test.js +258 -0
- package/dist/tests/bench/driver.js +346 -0
- package/dist/tests/bench/driver.test.js +443 -0
- package/dist/tests/bench/evolve-metrics.js +179 -0
- package/dist/tests/bench/evolve-metrics.test.js +187 -0
- package/dist/tests/bench/evolve.js +580 -0
- package/dist/tests/bench/evolve.test.js +616 -0
- package/dist/tests/bench/failure-modes.test.js +300 -0
- package/dist/tests/bench/feedback-integrity.test.js +456 -0
- package/dist/tests/bench/leakage.test.js +125 -0
- package/dist/tests/bench/learning-curve.test.js +133 -0
- package/dist/tests/bench/metrics.js +2319 -0
- package/dist/tests/bench/metrics.test.js +1144 -0
- package/dist/tests/bench/no-os-tmpdir-invariant.test.js +43 -0
- package/dist/tests/bench/report.js +1821 -0
- package/dist/tests/bench/report.test.js +989 -0
- package/dist/tests/bench/runner.js +536 -0
- package/dist/tests/bench/runner.test.js +958 -0
- package/dist/tests/bench/search-bridge.test.js +331 -0
- package/dist/tests/bench/tmp.js +41 -0
- package/dist/tests/bench/trajectory.js +116 -0
- package/dist/tests/bench/trajectory.test.js +127 -0
- package/dist/tests/bench/verifier.js +109 -0
- package/dist/tests/bench/verifier.test.js +118 -0
- package/dist/tests/bench/workflow-evaluator.js +557 -0
- package/dist/tests/bench/workflow-evaluator.test.js +421 -0
- package/dist/tests/bench/workflow-spec.js +358 -0
- package/dist/tests/bench/workflow-spec.test.js +363 -0
- package/dist/tests/bench/workflow-trace.js +438 -0
- package/dist/tests/bench/workflow-trace.test.js +254 -0
- package/dist/tests/benchmark-search-quality.js +536 -0
- package/dist/tests/benchmark-suite.js +1441 -0
- package/dist/tests/capture-cli.test.js +112 -0
- package/dist/tests/cli-errors.test.js +203 -0
- package/dist/tests/commands/events.test.js +370 -0
- package/dist/tests/commands/history.test.js +223 -0
- package/dist/tests/commands/import.test.js +103 -0
- package/dist/tests/commands/proposal-cli.test.js +209 -0
- package/dist/tests/commands/reflect-propose-cli.test.js +333 -0
- package/dist/tests/commands/remember.test.js +97 -0
- package/dist/tests/commands/scope-flags.test.js +300 -0
- package/dist/tests/commands/search.test.js +537 -0
- package/dist/tests/commands/show-indexer-parity.test.js +117 -0
- package/dist/tests/commands/show.test.js +294 -0
- package/dist/tests/common.test.js +266 -0
- package/dist/tests/completions.test.js +142 -0
- package/dist/tests/config-cli.test.js +193 -0
- package/dist/tests/config-llm-features.test.js +139 -0
- package/dist/tests/config.test.js +544 -0
- package/dist/tests/contracts/migration-baseline.test.js +43 -0
- package/dist/tests/contracts/reflect-propose-envelope.test.js +139 -0
- package/dist/tests/contracts/spec-helpers.js +46 -0
- package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +228 -0
- package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +56 -0
- package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +34 -0
- package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +94 -0
- package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +39 -0
- package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +44 -0
- package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +47 -0
- package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +40 -0
- package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +58 -0
- package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +34 -0
- package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +75 -0
- package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +36 -0
- package/dist/tests/core/write-source.test.js +366 -0
- package/dist/tests/curate-command.test.js +87 -0
- package/dist/tests/db-scoring.test.js +201 -0
- package/dist/tests/db.test.js +654 -0
- package/dist/tests/distill-cli-flag.test.js +208 -0
- package/dist/tests/distill.test.js +515 -0
- package/dist/tests/docker-install.test.js +120 -0
- package/dist/tests/e2e.test.js +1398 -0
- package/dist/tests/embedder.test.js +340 -0
- package/dist/tests/embedding-model-config.test.js +379 -0
- package/dist/tests/feedback-command.test.js +172 -0
- package/dist/tests/file-context.test.js +552 -0
- package/dist/tests/fixtures/scripts/git/summarize-diff.js +9 -0
- package/dist/tests/fixtures/scripts/lint/eslint-check.js +7 -0
- package/dist/tests/fixtures/stashes/load.js +166 -0
- package/dist/tests/fixtures/stashes/load.test.js +88 -0
- package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +12 -0
- package/dist/tests/frontmatter.test.js +190 -0
- package/dist/tests/fts-field-weighting.test.js +254 -0
- package/dist/tests/fuzzy-search.test.js +230 -0
- package/dist/tests/git-provider-clone.test.js +45 -0
- package/dist/tests/github.test.js +161 -0
- package/dist/tests/graph-boost-ranking.test.js +305 -0
- package/dist/tests/graph-extraction.test.js +282 -0
- package/dist/tests/helpers/usage-events.js +8 -0
- package/dist/tests/index-pass-llm.test.js +161 -0
- package/dist/tests/indexer.test.js +559 -0
- package/dist/tests/info-command.test.js +166 -0
- package/dist/tests/init.test.js +69 -0
- package/dist/tests/install-script.test.js +246 -0
- package/dist/tests/integration/agent-real-profile.test.js +94 -0
- package/dist/tests/issue-36-repro.test.js +304 -0
- package/dist/tests/issues-191-194.test.js +160 -0
- package/dist/tests/lesson-lint.test.js +111 -0
- package/dist/tests/llm-client.test.js +115 -0
- package/dist/tests/llm-feature-gate.test.js +151 -0
- package/dist/tests/llm.test.js +139 -0
- package/dist/tests/lockfile.test.js +216 -0
- package/dist/tests/manifest.test.js +205 -0
- package/dist/tests/markdown.test.js +126 -0
- package/dist/tests/matchers-unit.test.js +189 -0
- package/dist/tests/memory-inference.test.js +299 -0
- package/dist/tests/merge-scoring.test.js +136 -0
- package/dist/tests/metadata.test.js +313 -0
- package/dist/tests/migration-help.test.js +89 -0
- package/dist/tests/origin-resolve.test.js +124 -0
- package/dist/tests/output-baseline.test.js +217 -0
- package/dist/tests/output-shapes-unit.test.js +476 -0
- package/dist/tests/parallel-search.test.js +272 -0
- package/dist/tests/parameter-metadata.test.js +365 -0
- package/dist/tests/paths.test.js +177 -0
- package/dist/tests/progressive-disclosure.test.js +280 -0
- package/dist/tests/proposals.test.js +279 -0
- package/dist/tests/proposed-quality.test.js +271 -0
- package/dist/tests/provider-registry.test.js +32 -0
- package/dist/tests/ranking-regression.test.js +548 -0
- package/dist/tests/reflect-propose.test.js +455 -0
- package/dist/tests/registry-build-index.test.js +378 -0
- package/dist/tests/registry-cli.test.js +290 -0
- package/dist/tests/registry-index-v2.test.js +430 -0
- package/dist/tests/registry-install.test.js +728 -0
- package/dist/tests/registry-providers/parity.test.js +189 -0
- package/dist/tests/registry-providers/skills-sh.test.js +309 -0
- package/dist/tests/registry-providers/static-index.test.js +204 -0
- package/dist/tests/registry-resolve.test.js +126 -0
- package/dist/tests/registry-search.test.js +723 -0
- package/dist/tests/remember-frontmatter.test.js +380 -0
- package/dist/tests/remember-unit.test.js +123 -0
- package/dist/tests/ripgrep-install.test.js +251 -0
- package/dist/tests/ripgrep-resolve.test.js +108 -0
- package/dist/tests/ripgrep.test.js +163 -0
- package/dist/tests/save-command.test.js +94 -0
- package/dist/tests/save-trust-qa-fixes.test.js +270 -0
- package/dist/tests/scoring-pipeline.test.js +648 -0
- package/dist/tests/search-include-proposed-cli.test.js +118 -0
- package/dist/tests/self-update.test.js +442 -0
- package/dist/tests/semantic-search-e2e.test.js +512 -0
- package/dist/tests/semantic-status.test.js +471 -0
- package/dist/tests/setup-run.integration.js +877 -0
- package/dist/tests/setup-wizard.test.js +198 -0
- package/dist/tests/setup.test.js +131 -0
- package/dist/tests/source-add.test.js +11 -0
- package/dist/tests/source-clone.test.js +254 -0
- package/dist/tests/source-manage.test.js +366 -0
- package/dist/tests/source-providers/filesystem.test.js +82 -0
- package/dist/tests/source-providers/git.test.js +252 -0
- package/dist/tests/source-providers/website.test.js +128 -0
- package/dist/tests/source-qa-fixes.test.js +268 -0
- package/dist/tests/source-registry.test.js +350 -0
- package/dist/tests/source-resolve.test.js +100 -0
- package/dist/tests/source-source.test.js +221 -0
- package/dist/tests/source.test.js +533 -0
- package/dist/tests/tar-utils-scan.test.js +73 -0
- package/dist/tests/toggle-components.test.js +73 -0
- package/dist/tests/usage-telemetry.test.js +265 -0
- package/dist/tests/utility-scoring.test.js +558 -0
- package/dist/tests/vault-load-error.test.js +78 -0
- package/dist/tests/vault-qa-fixes.test.js +194 -0
- package/dist/tests/vault.test.js +429 -0
- package/dist/tests/vector-search.test.js +608 -0
- package/dist/tests/walker.test.js +252 -0
- package/dist/tests/wave2-cluster-bc.test.js +228 -0
- package/dist/tests/wave2-cluster-d.test.js +180 -0
- package/dist/tests/wave2-cluster-e.test.js +179 -0
- package/dist/tests/wiki-qa-fixes.test.js +270 -0
- package/dist/tests/wiki.test.js +529 -0
- package/dist/tests/workflow-cli.test.js +271 -0
- package/dist/tests/workflow-markdown.test.js +171 -0
- package/dist/tests/workflow-path-escape.test.js +132 -0
- package/dist/tests/workflow-qa-fixes.test.js +377 -0
- package/dist/tests/workflows/indexer-rejection.test.js +213 -0
- package/docs/README.md +8 -0
- package/docs/migration/release-notes/0.7.0.md +244 -0
- package/package.json +2 -2
- package/dist/core/warn.js +0 -27
- package/dist/output/shapes.js +0 -212
- /package/dist/{commands → src/commands}/completions.js +0 -0
- /package/dist/{commands → src/commands}/curate.js +0 -0
- /package/dist/{commands → src/commands}/info.js +0 -0
- /package/dist/{commands → src/commands}/init.js +0 -0
- /package/dist/{commands → src/commands}/install-audit.js +0 -0
- /package/dist/{commands → src/commands}/migration-help.js +0 -0
- /package/dist/{commands → src/commands}/source-add.js +0 -0
- /package/dist/{commands → src/commands}/source-clone.js +0 -0
- /package/dist/{commands → src/commands}/source-manage.js +0 -0
- /package/dist/{commands → src/commands}/vault.js +0 -0
- /package/dist/{core → src/core}/asset-registry.js +0 -0
- /package/dist/{core → src/core}/frontmatter.js +0 -0
- /package/dist/{core → src/core}/markdown.js +0 -0
- /package/dist/{core → src/core}/paths.js +0 -0
- /package/dist/{indexer → src/indexer}/manifest.js +0 -0
- /package/dist/{indexer → src/indexer}/matchers.js +0 -0
- /package/dist/{indexer → src/indexer}/search-fields.js +0 -0
- /package/dist/{indexer → src/indexer}/search-source.js +0 -0
- /package/dist/{indexer → src/indexer}/semantic-status.js +0 -0
- /package/dist/{indexer → src/indexer}/usage-events.js +0 -0
- /package/dist/{indexer → src/indexer}/walker.js +0 -0
- /package/dist/{integrations → src/integrations}/github.js +0 -0
- /package/dist/{llm → src/llm}/embedder.js +0 -0
- /package/dist/{llm → src/llm}/embedders/cache.js +0 -0
- /package/dist/{llm → src/llm}/embedders/local.js +0 -0
- /package/dist/{llm → src/llm}/embedders/remote.js +0 -0
- /package/dist/{llm → src/llm}/embedders/types.js +0 -0
- /package/dist/{llm → src/llm}/metadata-enhance.js +0 -0
- /package/dist/{output → src/output}/cli-hints.js +0 -0
- /package/dist/{output → src/output}/context.js +0 -0
- /package/dist/{registry → src/registry}/create-provider-registry.js +0 -0
- /package/dist/{registry → src/registry}/origin-resolve.js +0 -0
- /package/dist/{registry → src/registry}/providers/index.js +0 -0
- /package/dist/{registry → src/registry}/providers/skills-sh.js +0 -0
- /package/dist/{registry → src/registry}/providers/types.js +0 -0
- /package/dist/{registry → src/registry}/types.js +0 -0
- /package/dist/{setup → src/setup}/detect.js +0 -0
- /package/dist/{setup → src/setup}/ripgrep-install.js +0 -0
- /package/dist/{setup → src/setup}/ripgrep-resolve.js +0 -0
- /package/dist/{setup → src/setup}/steps.js +0 -0
- /package/dist/{sources → src/sources}/include.js +0 -0
- /package/dist/{sources → src/sources}/provider-factory.js +0 -0
- /package/dist/{sources → src/sources}/provider.js +0 -0
- /package/dist/{sources → src/sources}/providers/filesystem.js +0 -0
- /package/dist/{sources → src/sources}/providers/index.js +0 -0
- /package/dist/{sources → src/sources}/providers/install-types.js +0 -0
- /package/dist/{sources → src/sources}/providers/npm.js +0 -0
- /package/dist/{sources → src/sources}/providers/provider-utils.js +0 -0
- /package/dist/{sources → src/sources}/providers/sync-from-ref.js +0 -0
- /package/dist/{sources → src/sources}/providers/tar-utils.js +0 -0
- /package/dist/{sources → src/sources}/providers/website.js +0 -0
- /package/dist/{sources → src/sources}/resolve.js +0 -0
- /package/dist/{sources → src/sources}/types.js +0 -0
- /package/dist/{templates → src/templates}/wiki-templates.js +0 -0
- /package/dist/{version.js → src/version.js} +0 -0
- /package/dist/{wiki → src/wiki}/wiki.js +0 -0
- /package/dist/{workflows → src/workflows}/authoring.js +0 -0
- /package/dist/{workflows → src/workflows}/cli.js +0 -0
- /package/dist/{workflows → src/workflows}/db.js +0 -0
- /package/dist/{workflows → src/workflows}/document-cache.js +0 -0
- /package/dist/{workflows → src/workflows}/parser.js +0 -0
- /package/dist/{workflows → src/workflows}/renderer.js +0 -0
- /package/dist/{workflows → src/workflows}/runs.js +0 -0
- /package/dist/{workflows → src/workflows}/schema.js +0 -0
- /package/dist/{workflows → src/workflows}/validator.js +0 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Graph-extraction pass for `akm index` (#207).
|
|
3
|
+
*
|
|
4
|
+
* Walks the primary stash for `memory:` and `knowledge:` assets, asks the
|
|
5
|
+
* configured LLM to extract entities and relations from each one, and
|
|
6
|
+
* persists the result to a single stash-local artifact at
|
|
7
|
+
* `<stashRoot>/.akm/graph.json`. The artifact is consumed by the search
|
|
8
|
+
* pipeline (see `src/indexer/graph-boost.ts`) as a single boost component
|
|
9
|
+
* inside the existing FTS5+boosts loop — there is NO second SearchHit
|
|
10
|
+
* scorer and no parallel ranking track.
|
|
11
|
+
*
|
|
12
|
+
* Disabling — three preconditions must ALL hold for the pass to run:
|
|
13
|
+
* 1. `akm.llm` must be configured (no provider = no extraction). When
|
|
14
|
+
* absent, `resolveIndexPassLLM("graph", config)` returns `undefined`
|
|
15
|
+
* and the pass short-circuits.
|
|
16
|
+
* 2. `llm.features.graph_extraction !== false` — the locked v1 spec §14
|
|
17
|
+
* feature-flag layer. Set to `false` to block the pass at the
|
|
18
|
+
* feature-gate layer (no network call may ever issue).
|
|
19
|
+
* 3. `index.graph.llm !== false` — the per-pass opt-out layer (#208).
|
|
20
|
+
* Set to `false` to skip just this pass while leaving other passes
|
|
21
|
+
* that share the same `llm` block enabled.
|
|
22
|
+
* Toggling any one off does NOT delete the existing `graph.json` — the
|
|
23
|
+
* user keeps the boost component they already have, it just stops
|
|
24
|
+
* refreshing.
|
|
25
|
+
*
|
|
26
|
+
* Locked v1 contract:
|
|
27
|
+
* - LLM access is exclusively via `resolveIndexPassLLM("graph", config)`.
|
|
28
|
+
* - The `graph.json` file is an indexer artifact, NOT a user-visible
|
|
29
|
+
* asset. It does not have an asset ref, does not appear in search
|
|
30
|
+
* hits, and is not addressable via `akm show`. Direct `fs.writeFile`
|
|
31
|
+
* is therefore the correct primitive — `writeAssetToSource` is
|
|
32
|
+
* reserved for asset writes (CLAUDE.md / spec §10 step 5).
|
|
33
|
+
*/
|
|
34
|
+
import fs from "node:fs";
|
|
35
|
+
import path from "node:path";
|
|
36
|
+
import { parseFrontmatter } from "../core/frontmatter";
|
|
37
|
+
import { warn } from "../core/warn";
|
|
38
|
+
import { extractGraphFromBody } from "../llm/graph-extract";
|
|
39
|
+
import { resolveIndexPassLLM } from "../llm/index-passes";
|
|
40
|
+
/** Schema version for the persisted artifact — bumps trigger a full rebuild. */
|
|
41
|
+
export const GRAPH_FILE_SCHEMA_VERSION = 1;
|
|
42
|
+
/** Path scheme — kept stable so consumers (search-time boost) can find it. */
|
|
43
|
+
export const GRAPH_FILE_RELATIVE_PATH = path.join(".akm", "graph.json");
|
|
44
|
+
/** Public path resolver — exported so the search-side reader and tests share the rule. */
|
|
45
|
+
export function getGraphFilePath(stashRoot) {
|
|
46
|
+
return path.join(stashRoot, GRAPH_FILE_RELATIVE_PATH);
|
|
47
|
+
}
|
|
48
|
+
const EMPTY_RESULT = {
|
|
49
|
+
considered: 0,
|
|
50
|
+
extracted: 0,
|
|
51
|
+
totalEntities: 0,
|
|
52
|
+
totalRelations: 0,
|
|
53
|
+
written: false,
|
|
54
|
+
};
|
|
55
|
+
/**
|
|
56
|
+
* Top-level entry point. Returns a no-op result when the pass is disabled.
|
|
57
|
+
*
|
|
58
|
+
* Three preconditions — ALL must hold for the pass to run:
|
|
59
|
+
*
|
|
60
|
+
* 1. **Provider configured** — `akm.llm` must be present. Without a
|
|
61
|
+
* configured provider, `resolveIndexPassLLM("graph", config)` returns
|
|
62
|
+
* `undefined` (the pass cannot run because there is no model to call).
|
|
63
|
+
* 2. **Feature gate** — `llm.features.graph_extraction` (defaults to
|
|
64
|
+
* `true`). When `false`, no network call may issue regardless of
|
|
65
|
+
* per-pass settings. This is the locked spec-§14 gate.
|
|
66
|
+
* 3. **Per-pass gate** — `index.graph.llm` (defaults to `true`). When
|
|
67
|
+
* `false`, the indexer simply skips this pass for the current run.
|
|
68
|
+
*
|
|
69
|
+
* If any of the three is missing or `false`, this function short-circuits
|
|
70
|
+
* to an empty no-op result, leaving any existing `graph.json` untouched on
|
|
71
|
+
* disk.
|
|
72
|
+
*/
|
|
73
|
+
export async function runGraphExtractionPass(config, sources) {
|
|
74
|
+
// Gate 1 — locked feature flag (§14). Defaults to enabled; only an
|
|
75
|
+
// explicit `false` disables the pass entirely.
|
|
76
|
+
if (config.llm?.features?.graph_extraction === false)
|
|
77
|
+
return { ...EMPTY_RESULT };
|
|
78
|
+
// Gate 2 — per-pass opt-out (#208). Returns the resolved llm config or
|
|
79
|
+
// `undefined` when the pass should not run.
|
|
80
|
+
const llmConfig = resolveIndexPassLLM("graph", config);
|
|
81
|
+
if (!llmConfig)
|
|
82
|
+
return { ...EMPTY_RESULT };
|
|
83
|
+
// The pass only writes to the primary (working) stash. Read-only caches
|
|
84
|
+
// (git, npm, website) are deliberately untouched — the graph artifact for
|
|
85
|
+
// those sources would be clobbered by the next sync().
|
|
86
|
+
const primary = sources[0];
|
|
87
|
+
if (!primary)
|
|
88
|
+
return { ...EMPTY_RESULT };
|
|
89
|
+
const eligible = collectEligibleFiles(primary.path);
|
|
90
|
+
const considered = eligible.length;
|
|
91
|
+
if (considered === 0)
|
|
92
|
+
return { ...EMPTY_RESULT };
|
|
93
|
+
const nodes = [];
|
|
94
|
+
let totalEntities = 0;
|
|
95
|
+
let totalRelations = 0;
|
|
96
|
+
for (const candidate of eligible) {
|
|
97
|
+
const extraction = await extractGraphFromBody(llmConfig, candidate.body);
|
|
98
|
+
if (extraction.entities.length === 0)
|
|
99
|
+
continue;
|
|
100
|
+
nodes.push({
|
|
101
|
+
path: candidate.absPath,
|
|
102
|
+
type: candidate.type,
|
|
103
|
+
// Lower-case once at write time so the search-time boost can do a
|
|
104
|
+
// single case-folded comparison without re-canonicalising on every
|
|
105
|
+
// query.
|
|
106
|
+
entities: extraction.entities.map((e) => e.toLowerCase()),
|
|
107
|
+
relations: extraction.relations.map((r) => ({
|
|
108
|
+
from: r.from.toLowerCase(),
|
|
109
|
+
to: r.to.toLowerCase(),
|
|
110
|
+
...(r.type ? { type: r.type.toLowerCase() } : {}),
|
|
111
|
+
})),
|
|
112
|
+
});
|
|
113
|
+
totalEntities += extraction.entities.length;
|
|
114
|
+
totalRelations += extraction.relations.length;
|
|
115
|
+
}
|
|
116
|
+
const graph = {
|
|
117
|
+
schemaVersion: GRAPH_FILE_SCHEMA_VERSION,
|
|
118
|
+
generatedAt: new Date().toISOString(),
|
|
119
|
+
stashRoot: primary.path,
|
|
120
|
+
files: nodes,
|
|
121
|
+
};
|
|
122
|
+
const written = writeGraphFile(primary.path, graph);
|
|
123
|
+
return {
|
|
124
|
+
considered,
|
|
125
|
+
extracted: nodes.length,
|
|
126
|
+
totalEntities,
|
|
127
|
+
totalRelations,
|
|
128
|
+
written,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Scan the primary stash for `memory:` and `knowledge:` markdown files
|
|
133
|
+
* suitable for graph extraction. The directory layout convention is the
|
|
134
|
+
* same one the rest of the indexer uses: `<stashRoot>/<type>/...`.
|
|
135
|
+
*
|
|
136
|
+
* Inferred-child memories (frontmatter `inferred: true`) are skipped — they
|
|
137
|
+
* are atomic facts already, with no internal graph structure worth
|
|
138
|
+
* extracting.
|
|
139
|
+
*
|
|
140
|
+
* Exported for direct unit testing.
|
|
141
|
+
*/
|
|
142
|
+
export function collectEligibleFiles(stashRoot) {
|
|
143
|
+
const out = [];
|
|
144
|
+
for (const type of ["memory", "knowledge"]) {
|
|
145
|
+
const dir = path.join(stashRoot, `${type === "memory" ? "memories" : "knowledge"}`);
|
|
146
|
+
if (!fs.existsSync(dir))
|
|
147
|
+
continue;
|
|
148
|
+
for (const filePath of walkMarkdownFiles(dir)) {
|
|
149
|
+
let raw;
|
|
150
|
+
try {
|
|
151
|
+
raw = fs.readFileSync(filePath, "utf8");
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
const parsed = parseFrontmatter(raw);
|
|
157
|
+
// Skip inferred memory children — they are atomic and there's no
|
|
158
|
+
// graph to extract from a single-fact body.
|
|
159
|
+
if (type === "memory" && parsed.data.inferred === true)
|
|
160
|
+
continue;
|
|
161
|
+
const body = parsed.content.trim();
|
|
162
|
+
if (!body)
|
|
163
|
+
continue;
|
|
164
|
+
out.push({ absPath: filePath, type, body });
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return out;
|
|
168
|
+
}
|
|
169
|
+
function* walkMarkdownFiles(root) {
|
|
170
|
+
let entries;
|
|
171
|
+
try {
|
|
172
|
+
entries = fs.readdirSync(root, { withFileTypes: true });
|
|
173
|
+
}
|
|
174
|
+
catch {
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
for (const entry of entries) {
|
|
178
|
+
const full = path.join(root, entry.name);
|
|
179
|
+
if (entry.isDirectory()) {
|
|
180
|
+
yield* walkMarkdownFiles(full);
|
|
181
|
+
}
|
|
182
|
+
else if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) {
|
|
183
|
+
yield full;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
// ── Persistence ─────────────────────────────────────────────────────────────
|
|
188
|
+
/**
|
|
189
|
+
* Write `graph.json` atomically to `<stashRoot>/.akm/graph.json`.
|
|
190
|
+
*
|
|
191
|
+
* Direct `fs.writeFile` is intentional. The graph artifact is an indexer
|
|
192
|
+
* cache — not a user-visible asset — so it does not have an asset ref and
|
|
193
|
+
* `writeAssetToSource` (which routes through the asset-spec rendering
|
|
194
|
+
* layer) is the wrong primitive here. See CLAUDE.md / spec §10 step 5 for
|
|
195
|
+
* the carve-out: kind-branching writes for asset content live in
|
|
196
|
+
* `src/core/write-source.ts`; opaque indexer artifacts may write directly.
|
|
197
|
+
*/
|
|
198
|
+
function writeGraphFile(stashRoot, graph) {
|
|
199
|
+
const target = getGraphFilePath(stashRoot);
|
|
200
|
+
const dir = path.dirname(target);
|
|
201
|
+
try {
|
|
202
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
203
|
+
const tmp = `${target}.tmp.${process.pid}.${Math.random().toString(36).slice(2)}`;
|
|
204
|
+
fs.writeFileSync(tmp, `${JSON.stringify(graph, null, 2)}\n`, "utf8");
|
|
205
|
+
fs.renameSync(tmp, target);
|
|
206
|
+
return true;
|
|
207
|
+
}
|
|
208
|
+
catch (err) {
|
|
209
|
+
warn(`graph extraction: failed to write ${target}: ${err instanceof Error ? err.message : String(err)}`);
|
|
210
|
+
return false;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
@@ -2,10 +2,13 @@ import fs from "node:fs";
|
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { isHttpUrl, resolveStashDir, toErrorMessage } from "../core/common";
|
|
4
4
|
import { getDbPath } from "../core/paths";
|
|
5
|
-
import { warn } from "../core/warn";
|
|
5
|
+
import { isVerbose, warn } from "../core/warn";
|
|
6
|
+
import { resolveIndexPassLLM } from "../llm/index-passes";
|
|
6
7
|
import { takeWorkflowDocument } from "../workflows/document-cache";
|
|
7
8
|
import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getMeta, isVecAvailable, openDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertUtilityScore, warnIfVecMissing, } from "./db";
|
|
8
|
-
import {
|
|
9
|
+
import { runGraphExtractionPass } from "./graph-extraction";
|
|
10
|
+
import { runMemoryInferencePass } from "./memory-inference";
|
|
11
|
+
import { generateMetadataFlat, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
|
|
9
12
|
import { buildSearchText } from "./search-fields";
|
|
10
13
|
import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
|
|
11
14
|
import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
|
|
@@ -41,7 +44,10 @@ export async function akmIndex(options) {
|
|
|
41
44
|
sourcesCount: allSourceDirs.length,
|
|
42
45
|
semanticSearchMode: config.semanticSearchMode,
|
|
43
46
|
embeddingProvider: getEmbeddingProvider(config.embedding),
|
|
44
|
-
|
|
47
|
+
// Surface "llm enabled" only when at least one pass would actually
|
|
48
|
+
// run. Today that means the enrichment pass; future passes plug in
|
|
49
|
+
// via `resolveIndexPassLLM`.
|
|
50
|
+
llmEnabled: !!resolveIndexPassLLM("enrichment", config),
|
|
45
51
|
vecAvailable: isVecAvailable(db),
|
|
46
52
|
}),
|
|
47
53
|
});
|
|
@@ -76,6 +82,49 @@ export async function akmIndex(options) {
|
|
|
76
82
|
}
|
|
77
83
|
}
|
|
78
84
|
}
|
|
85
|
+
// Memory inference pass (#201). Runs before the walk so any atomic-fact
|
|
86
|
+
// children that get written are picked up by the walker in this same run
|
|
87
|
+
// and don't have to wait for the next `akm index`. Gated entirely by
|
|
88
|
+
// `resolveIndexPassLLM("memory", config)` — when the user has no
|
|
89
|
+
// `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
|
|
90
|
+
// and existing inferred children are left in place.
|
|
91
|
+
try {
|
|
92
|
+
const inferenceResult = await runMemoryInferencePass(config, allSourceEntries);
|
|
93
|
+
if (inferenceResult.writtenFacts > 0) {
|
|
94
|
+
onProgress({
|
|
95
|
+
phase: "llm",
|
|
96
|
+
message: `Memory inference wrote ${inferenceResult.writtenFacts} atomic fact${inferenceResult.writtenFacts === 1 ? "" : "s"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
catch (err) {
|
|
101
|
+
// Defensive — runMemoryInferencePass swallows per-memory failures.
|
|
102
|
+
// A thrown error here would only come from an unexpected programming
|
|
103
|
+
// bug; surface it as a warning rather than aborting the index run.
|
|
104
|
+
warn(`Memory inference pass aborted: ${err instanceof Error ? err.message : String(err)}`);
|
|
105
|
+
}
|
|
106
|
+
// Graph extraction pass (#207). Runs after memory inference so any
|
|
107
|
+
// atomic-fact children that just got written are visible to the graph
|
|
108
|
+
// walk. Persists `<stashRoot>/.akm/graph.json` — an indexer artifact,
|
|
109
|
+
// NOT a user-visible asset, so it is not routed through
|
|
110
|
+
// writeAssetToSource. The artifact feeds the existing FTS5+boosts
|
|
111
|
+
// pipeline as a single boost component (see graph-boost.ts); there is
|
|
112
|
+
// no parallel scoring track. Disabled when either gate (the locked
|
|
113
|
+
// `llm.features.graph_extraction` feature flag or the per-pass
|
|
114
|
+
// `index.graph.llm` toggle) is off; the existing graph file is
|
|
115
|
+
// preserved on disk in that case.
|
|
116
|
+
try {
|
|
117
|
+
const graphResult = await runGraphExtractionPass(config, allSourceEntries);
|
|
118
|
+
if (graphResult.written) {
|
|
119
|
+
onProgress({
|
|
120
|
+
phase: "llm",
|
|
121
|
+
message: `Graph extraction wrote ${graphResult.totalEntities} entit${graphResult.totalEntities === 1 ? "y" : "ies"} and ${graphResult.totalRelations} relation${graphResult.totalRelations === 1 ? "" : "s"} from ${graphResult.extracted} file${graphResult.extracted === 1 ? "" : "s"}.`,
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
catch (err) {
|
|
126
|
+
warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
|
|
127
|
+
}
|
|
79
128
|
const tWalkStart = Date.now();
|
|
80
129
|
// Walk stash dirs and index entries.
|
|
81
130
|
// doFullDelete=true merges the wipe into the same transaction as the
|
|
@@ -86,12 +135,26 @@ export async function akmIndex(options) {
|
|
|
86
135
|
phase: "scan",
|
|
87
136
|
message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
|
|
88
137
|
});
|
|
138
|
+
// Workflow validation noise gate (issue #273): per-spec stderr lines from
|
|
139
|
+
// `buildMetadataSkipWarning` are suppressed at default verbosity in
|
|
140
|
+
// `metadata.ts`. Replace them with a single summary line so operators
|
|
141
|
+
// running a cold-start search against a fresh registry-cloned source
|
|
142
|
+
// don't get the impression akm is broken. Verbose mode keeps the
|
|
143
|
+
// per-spec output instead of (not in addition to) the summary.
|
|
144
|
+
if (!isVerbose()) {
|
|
145
|
+
const skippedWorkflowCount = warnings.filter(isWorkflowSkipWarning).length;
|
|
146
|
+
if (skippedWorkflowCount > 0) {
|
|
147
|
+
const noun = skippedWorkflowCount === 1 ? "workflow spec" : "workflow specs";
|
|
148
|
+
warn(`${skippedWorkflowCount} ${noun} skipped due to validation errors; ` +
|
|
149
|
+
"rerun with --verbose (or AKM_VERBOSE=1) to see details.");
|
|
150
|
+
}
|
|
151
|
+
}
|
|
89
152
|
const tWalkEnd = Date.now();
|
|
90
153
|
// Enhance entries with LLM if configured
|
|
91
154
|
await enhanceDirsWithLlm(db, config, dirsNeedingLlm);
|
|
92
155
|
onProgress({
|
|
93
156
|
phase: "llm",
|
|
94
|
-
message: config
|
|
157
|
+
message: resolveIndexPassLLM("enrichment", config)
|
|
95
158
|
? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
|
|
96
159
|
: "LLM enhancement disabled.",
|
|
97
160
|
});
|
|
@@ -373,7 +436,11 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
|
|
|
373
436
|
return { scannedDirs, skippedDirs, generatedCount, warnings, dirsNeedingLlm };
|
|
374
437
|
}
|
|
375
438
|
async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
|
|
376
|
-
|
|
439
|
+
// Resolve per-pass LLM config via the unified shim. Returns undefined when
|
|
440
|
+
// either no `akm.llm` is configured or the user opted this pass out via
|
|
441
|
+
// `index.enrichment.llm = false`. (#208)
|
|
442
|
+
const llmConfig = resolveIndexPassLLM("enrichment", config);
|
|
443
|
+
if (!llmConfig || dirsNeedingLlm.length === 0)
|
|
377
444
|
return;
|
|
378
445
|
// Aggregate per-entry failures so a misconfigured LLM endpoint surfaces
|
|
379
446
|
// as a single visible warning instead of silently degrading every entry
|
|
@@ -385,7 +452,7 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
|
|
|
385
452
|
if (generatedEntries.length === 0)
|
|
386
453
|
continue;
|
|
387
454
|
const generatedStash = { entries: generatedEntries };
|
|
388
|
-
const enhanced = await enhanceStashWithLlm(
|
|
455
|
+
const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary);
|
|
389
456
|
// Re-upsert the enhanced entries in a single transaction so a crash
|
|
390
457
|
// cannot leave half the entries updated and the rest stale.
|
|
391
458
|
db.transaction(() => {
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory inference pass for `akm index` (#201).
|
|
3
|
+
*
|
|
4
|
+
* Detects memories pending inference, asks the configured LLM to split each
|
|
5
|
+
* into atomic facts, and writes the results back as new memory files with
|
|
6
|
+
* frontmatter `inferred: true` + a `source:` backref to the parent memory.
|
|
7
|
+
*
|
|
8
|
+
* Pending predicate (see {@link isPendingMemory}):
|
|
9
|
+
* - File lives under `<stashRoot>/memories/` and ends in `.md`.
|
|
10
|
+
* - Frontmatter does NOT have `inferenceProcessed: true` (parent already split).
|
|
11
|
+
* - Frontmatter does NOT have `inferred: true` (this is itself a child fact).
|
|
12
|
+
*
|
|
13
|
+
* Idempotency: after a successful split the parent's frontmatter is rewritten
|
|
14
|
+
* with `inferenceProcessed: true`. A subsequent `akm index` therefore skips
|
|
15
|
+
* the parent without re-running the LLM.
|
|
16
|
+
*
|
|
17
|
+
* Disabling — two orthogonal gates per v1 spec §14:
|
|
18
|
+
* 1. `llm.features.memory_inference = false` blocks the pass at the
|
|
19
|
+
* locked feature-flag layer (no network call may ever issue).
|
|
20
|
+
* 2. `index.memory.llm = false` (or no `akm.llm` block at all) opts the
|
|
21
|
+
* pass out at the per-pass layer (#208).
|
|
22
|
+
* A pass runs iff both layers allow it. Existing inferred children are
|
|
23
|
+
* NEVER deleted — the user keeps what was already produced.
|
|
24
|
+
*
|
|
25
|
+
* Locked v1 contract:
|
|
26
|
+
* - LLM access is exclusively via `resolveIndexPassLLM("memory", config)`.
|
|
27
|
+
* - All child memory writes go through `writeAssetToSource` in
|
|
28
|
+
* `src/core/write-source.ts`. The parent's frontmatter rewrite is an
|
|
29
|
+
* explicit narrow exception — see {@link markParentProcessed}.
|
|
30
|
+
*/
|
|
31
|
+
import fs from "node:fs";
|
|
32
|
+
import path from "node:path";
|
|
33
|
+
import { stringify as yamlStringify } from "yaml";
|
|
34
|
+
import { parseAssetRef } from "../core/asset-ref";
|
|
35
|
+
import { parseFrontmatter, parseFrontmatterBlock } from "../core/frontmatter";
|
|
36
|
+
import { warn } from "../core/warn";
|
|
37
|
+
import { writeAssetToSource } from "../core/write-source";
|
|
38
|
+
import { resolveIndexPassLLM } from "../llm/index-passes";
|
|
39
|
+
import { splitMemoryIntoAtomicFacts } from "../llm/memory-infer";
|
|
40
|
+
/**
|
|
41
|
+
* Frontmatter keys this pass cares about. Constants so a future rename only
|
|
42
|
+
* needs to touch one site.
|
|
43
|
+
*/
|
|
44
|
+
const FM_INFERRED = "inferred";
|
|
45
|
+
const FM_INFERENCE_PROCESSED = "inferenceProcessed";
|
|
46
|
+
const FM_SOURCE = "source";
|
|
47
|
+
/**
|
|
48
|
+
* Top-level entry point. Returns a no-op result when the pass is disabled.
|
|
49
|
+
*
|
|
50
|
+
* Two orthogonal gates per v1 spec §14:
|
|
51
|
+
*
|
|
52
|
+
* 1. **Feature gate** — `llm.features.memory_inference` (defaults to
|
|
53
|
+
* `true`). When `false`, no network call may issue regardless of
|
|
54
|
+
* per-pass settings. This is the locked spec-§14 gate.
|
|
55
|
+
* 2. **Per-pass gate** — `resolveIndexPassLLM("memory", config)` (which
|
|
56
|
+
* reads `index.memory.llm`). When `false`, the indexer simply skips
|
|
57
|
+
* this pass for the current run.
|
|
58
|
+
*
|
|
59
|
+
* Both must allow the call for the pass to run. Either set to `false`
|
|
60
|
+
* short-circuits to a no-op result.
|
|
61
|
+
*/
|
|
62
|
+
export async function runMemoryInferencePass(config, sources) {
|
|
63
|
+
const empty = {
|
|
64
|
+
considered: 0,
|
|
65
|
+
splitParents: 0,
|
|
66
|
+
writtenFacts: 0,
|
|
67
|
+
skippedNoFacts: 0,
|
|
68
|
+
};
|
|
69
|
+
// Gate 1 — locked feature flag (§14). Defaults to enabled; only an
|
|
70
|
+
// explicit `false` disables the pass entirely.
|
|
71
|
+
if (config.llm?.features?.memory_inference === false)
|
|
72
|
+
return empty;
|
|
73
|
+
// Gate 2 — per-pass opt-out (#208). Returns the resolved llm config or
|
|
74
|
+
// `undefined` when the pass should not run.
|
|
75
|
+
const llmConfig = resolveIndexPassLLM("memory", config);
|
|
76
|
+
if (!llmConfig)
|
|
77
|
+
return empty;
|
|
78
|
+
// The pass only writes to the primary (working) stash. Read-only caches
|
|
79
|
+
// (git, npm, website) are deliberately untouched — writing inferred
|
|
80
|
+
// children there would be clobbered by the next sync().
|
|
81
|
+
const primary = sources[0];
|
|
82
|
+
if (!primary)
|
|
83
|
+
return empty;
|
|
84
|
+
const pending = collectPendingMemories(primary.path);
|
|
85
|
+
empty.considered = pending.length;
|
|
86
|
+
if (pending.length === 0)
|
|
87
|
+
return empty;
|
|
88
|
+
for (const record of pending) {
|
|
89
|
+
const facts = await splitMemoryIntoAtomicFacts(llmConfig, record.body);
|
|
90
|
+
if (facts.length === 0) {
|
|
91
|
+
empty.skippedNoFacts += 1;
|
|
92
|
+
// Intentionally NOT marked processed — a transient LLM failure should
|
|
93
|
+
// be retried on the next index run.
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
const written = await writeAtomicChildren(record, facts);
|
|
97
|
+
if (written > 0) {
|
|
98
|
+
markParentProcessed(record);
|
|
99
|
+
empty.splitParents += 1;
|
|
100
|
+
empty.writtenFacts += written;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return empty;
|
|
104
|
+
}
|
|
105
|
+
// ── Pending detection ───────────────────────────────────────────────────────
|
|
106
|
+
/**
|
|
107
|
+
* Walk `<stashRoot>/memories/` (recursively) and return every memory that
|
|
108
|
+
* still needs inference. The directory may not exist on a fresh stash; that
|
|
109
|
+
* is treated as "no pending memories" rather than an error.
|
|
110
|
+
*/
|
|
111
|
+
export function collectPendingMemories(stashRoot) {
|
|
112
|
+
const memoriesDir = path.join(stashRoot, "memories");
|
|
113
|
+
if (!fs.existsSync(memoriesDir))
|
|
114
|
+
return [];
|
|
115
|
+
const out = [];
|
|
116
|
+
for (const filePath of walkMarkdownFiles(memoriesDir)) {
|
|
117
|
+
let raw;
|
|
118
|
+
try {
|
|
119
|
+
raw = fs.readFileSync(filePath, "utf8");
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
const parsed = parseFrontmatter(raw);
|
|
125
|
+
if (!isPendingMemory(parsed.data))
|
|
126
|
+
continue;
|
|
127
|
+
const relName = toMemoryName(memoriesDir, filePath);
|
|
128
|
+
if (!relName)
|
|
129
|
+
continue;
|
|
130
|
+
out.push({
|
|
131
|
+
filePath,
|
|
132
|
+
stashRoot,
|
|
133
|
+
ref: `memory:${relName}`,
|
|
134
|
+
data: parsed.data,
|
|
135
|
+
body: parsed.content,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
return out;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Predicate: true when the parsed frontmatter indicates the memory has not
|
|
142
|
+
* yet been split AND is not itself an inferred child.
|
|
143
|
+
*
|
|
144
|
+
* Exported for direct unit testing — keeping the predicate in one place
|
|
145
|
+
* avoids drift between the walker, tests, and any future consumers.
|
|
146
|
+
*/
|
|
147
|
+
export function isPendingMemory(frontmatter) {
|
|
148
|
+
if (frontmatter[FM_INFERRED] === true)
|
|
149
|
+
return false;
|
|
150
|
+
if (frontmatter[FM_INFERENCE_PROCESSED] === true)
|
|
151
|
+
return false;
|
|
152
|
+
return true;
|
|
153
|
+
}
|
|
154
|
+
function* walkMarkdownFiles(root) {
|
|
155
|
+
let entries;
|
|
156
|
+
try {
|
|
157
|
+
entries = fs.readdirSync(root, { withFileTypes: true });
|
|
158
|
+
}
|
|
159
|
+
catch {
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
for (const entry of entries) {
|
|
163
|
+
const full = path.join(root, entry.name);
|
|
164
|
+
if (entry.isDirectory()) {
|
|
165
|
+
yield* walkMarkdownFiles(full);
|
|
166
|
+
}
|
|
167
|
+
else if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) {
|
|
168
|
+
yield full;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
function toMemoryName(memoriesDir, filePath) {
|
|
173
|
+
const rel = path.relative(memoriesDir, filePath);
|
|
174
|
+
if (!rel || rel.startsWith(".."))
|
|
175
|
+
return undefined;
|
|
176
|
+
// Strip the `.md` extension; preserve any nested subdirectory layout the
|
|
177
|
+
// user has organised under memories/.
|
|
178
|
+
return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
|
|
179
|
+
}
|
|
180
|
+
// ── Writing children + marking parent ───────────────────────────────────────
|
|
181
|
+
async function writeAtomicChildren(parent, facts) {
|
|
182
|
+
const memoriesDir = path.join(parent.stashRoot, "memories");
|
|
183
|
+
// Sibling directory layout: <parentDir>/<parentBase>.facts/fact-N.md
|
|
184
|
+
// Keeps facts grouped near the parent without polluting the top level.
|
|
185
|
+
const parentRel = path.relative(memoriesDir, parent.filePath).replace(/\\/g, "/");
|
|
186
|
+
const parentBase = parentRel.replace(/\.md$/i, "");
|
|
187
|
+
const factsDirRel = `${parentBase}.facts`;
|
|
188
|
+
// Children are routed through writeAssetToSource — the single dispatch
|
|
189
|
+
// point for kind-branching writes (CLAUDE.md / spec §10 step 5). Memory
|
|
190
|
+
// assets resolve to `<source.path>/memories/<name>.md`, so a child name
|
|
191
|
+
// of `<parentBase>.facts/fact-N` lands at exactly the documented child
|
|
192
|
+
// path scheme.
|
|
193
|
+
const writeTarget = {
|
|
194
|
+
kind: "filesystem",
|
|
195
|
+
name: "stash",
|
|
196
|
+
path: parent.stashRoot,
|
|
197
|
+
};
|
|
198
|
+
const writeConfig = {
|
|
199
|
+
type: "filesystem",
|
|
200
|
+
name: "stash",
|
|
201
|
+
path: parent.stashRoot,
|
|
202
|
+
writable: true,
|
|
203
|
+
};
|
|
204
|
+
let written = 0;
|
|
205
|
+
for (let i = 0; i < facts.length; i++) {
|
|
206
|
+
const fact = facts[i];
|
|
207
|
+
const childName = `${factsDirRel}/fact-${i + 1}`;
|
|
208
|
+
const childRefStr = `memory:${childName}`;
|
|
209
|
+
const childPath = path.join(memoriesDir, `${childName}.md`);
|
|
210
|
+
// Idempotent re-writes: if a child already exists at this slot we skip
|
|
211
|
+
// it. The parent's `inferenceProcessed` marker is the primary idempotency
|
|
212
|
+
// guard (we never re-enter the splitter for a processed parent), but a
|
|
213
|
+
// partial previous run that crashed before the marker landed should not
|
|
214
|
+
// duplicate facts.
|
|
215
|
+
if (fs.existsSync(childPath)) {
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
try {
|
|
219
|
+
const content = renderChildMemory(fact, parent.ref);
|
|
220
|
+
const childRef = parseAssetRef(childRefStr);
|
|
221
|
+
await writeAssetToSource(writeTarget, writeConfig, childRef, content);
|
|
222
|
+
written += 1;
|
|
223
|
+
}
|
|
224
|
+
catch (err) {
|
|
225
|
+
warn(`memory inference: failed to write atomic child ${childName}: ${err instanceof Error ? err.message : String(err)}`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return written;
|
|
229
|
+
}
|
|
230
|
+
function renderChildMemory(fact, parentRef) {
|
|
231
|
+
const fm = {
|
|
232
|
+
[FM_INFERRED]: true,
|
|
233
|
+
[FM_SOURCE]: parentRef,
|
|
234
|
+
};
|
|
235
|
+
const yaml = yamlStringify(fm).trimEnd();
|
|
236
|
+
return `---\n${yaml}\n---\n\n${fact.trim()}\n`;
|
|
237
|
+
}
|
|
238
|
+
function markParentProcessed(parent) {
|
|
239
|
+
// Frontmatter-only rewrite of an existing asset: not a new asset write,
|
|
240
|
+
// so writeAssetToSource isn't a fit here (it would round-trip the body
|
|
241
|
+
// through the asset-spec rendering layer instead of preserving the
|
|
242
|
+
// user's original markdown bytes verbatim). The narrow exception is
|
|
243
|
+
// documented in v1 spec §10 step 5 and CLAUDE.md write-source rules.
|
|
244
|
+
let raw;
|
|
245
|
+
try {
|
|
246
|
+
raw = fs.readFileSync(parent.filePath, "utf8");
|
|
247
|
+
}
|
|
248
|
+
catch (err) {
|
|
249
|
+
warn(`memory inference: failed to re-read parent ${parent.filePath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
const updatedFm = { ...parent.data, [FM_INFERENCE_PROCESSED]: true };
|
|
253
|
+
const yaml = yamlStringify(updatedFm).trimEnd();
|
|
254
|
+
const block = parseFrontmatterBlock(raw);
|
|
255
|
+
const body = block?.content ?? raw;
|
|
256
|
+
const next = `---\n${yaml}\n---\n${body.startsWith("\n") ? "" : "\n"}${body}`;
|
|
257
|
+
try {
|
|
258
|
+
fs.writeFileSync(parent.filePath, next, "utf8");
|
|
259
|
+
}
|
|
260
|
+
catch (err) {
|
|
261
|
+
warn(`memory inference: failed to mark parent processed ${parent.filePath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
262
|
+
}
|
|
263
|
+
}
|