akm-cli 0.7.5 → 0.8.0-rc.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CHANGELOG.md +1 -1
- package/dist/cli/parse-args.js +86 -0
- package/dist/cli.js +1023 -521
- package/dist/commands/agent-dispatch.js +107 -0
- package/dist/commands/agent-support.js +62 -0
- package/dist/commands/config-cli.js +68 -84
- package/dist/commands/consolidate.js +812 -0
- package/dist/commands/distill-promotion-policy.js +658 -0
- package/dist/commands/distill.js +218 -43
- package/dist/commands/eval-cases.js +40 -0
- package/dist/commands/events.js +2 -23
- package/dist/commands/graph.js +222 -0
- package/dist/commands/health.js +376 -0
- package/dist/commands/help/help-accept.md +9 -0
- package/dist/commands/help/help-improve.md +53 -0
- package/dist/commands/help/help-proposals.md +15 -0
- package/dist/commands/help/help-propose.md +17 -0
- package/dist/commands/help/help-reject.md +8 -0
- package/dist/commands/history.js +3 -30
- package/dist/commands/improve.js +1161 -0
- package/dist/commands/info.js +2 -2
- package/dist/commands/init.js +2 -2
- package/dist/commands/install-audit.js +5 -1
- package/dist/commands/installed-stashes.js +118 -138
- package/dist/commands/knowledge.js +133 -0
- package/dist/commands/lint/agent-linter.js +46 -0
- package/dist/commands/lint/base-linter.js +291 -0
- package/dist/commands/lint/command-linter.js +46 -0
- package/dist/commands/lint/default-linter.js +13 -0
- package/dist/commands/lint/index.js +145 -0
- package/dist/commands/lint/knowledge-linter.js +13 -0
- package/dist/commands/lint/memory-linter.js +58 -0
- package/dist/commands/lint/registry.js +33 -0
- package/dist/commands/lint/skill-linter.js +42 -0
- package/dist/commands/lint/task-linter.js +47 -0
- package/dist/commands/lint/types.js +1 -0
- package/dist/commands/lint/vault-key-rules.js +67 -0
- package/dist/commands/lint/workflow-linter.js +53 -0
- package/dist/commands/lint.js +1 -0
- package/dist/commands/proposal.js +8 -7
- package/dist/commands/propose.js +71 -28
- package/dist/commands/reflect.js +135 -35
- package/dist/commands/registry-search.js +2 -2
- package/dist/commands/remember.js +54 -0
- package/dist/commands/schema-repair.js +130 -0
- package/dist/commands/search.js +21 -5
- package/dist/commands/show.js +125 -20
- package/dist/commands/source-add.js +10 -10
- package/dist/commands/source-manage.js +11 -19
- package/dist/commands/tasks.js +385 -0
- package/dist/commands/url-checker.js +39 -0
- package/dist/commands/vault.js +168 -77
- package/dist/core/action-contributors.js +25 -0
- package/dist/core/asset-ref.js +4 -0
- package/dist/core/asset-registry.js +4 -16
- package/dist/core/asset-spec.js +10 -0
- package/dist/core/common.js +100 -0
- package/dist/core/concurrent.js +22 -0
- package/dist/core/config.js +233 -133
- package/dist/core/events.js +73 -126
- package/dist/core/frontmatter.js +0 -6
- package/dist/core/markdown.js +17 -0
- package/dist/core/memory-improve.js +678 -0
- package/dist/core/parse.js +155 -0
- package/dist/core/paths.js +101 -3
- package/dist/core/proposal-validators.js +61 -0
- package/dist/core/proposals.js +49 -38
- package/dist/core/state-db.js +731 -0
- package/dist/core/time.js +51 -0
- package/dist/core/warn.js +59 -1
- package/dist/indexer/db-search.js +52 -238
- package/dist/indexer/db.js +403 -54
- package/dist/indexer/ensure-index.js +61 -0
- package/dist/indexer/graph-boost.js +247 -94
- package/dist/indexer/graph-db.js +201 -0
- package/dist/indexer/graph-dedup.js +99 -0
- package/dist/indexer/graph-extraction.js +409 -76
- package/dist/indexer/index-context.js +10 -0
- package/dist/indexer/indexer.js +456 -290
- package/dist/indexer/llm-cache.js +47 -0
- package/dist/indexer/matchers.js +124 -160
- package/dist/indexer/memory-inference.js +63 -29
- package/dist/indexer/metadata-contributors.js +26 -0
- package/dist/indexer/metadata.js +196 -197
- package/dist/indexer/path-resolver.js +89 -0
- package/dist/indexer/ranking-contributors.js +204 -0
- package/dist/indexer/ranking.js +74 -0
- package/dist/indexer/search-hit-enrichers.js +22 -0
- package/dist/indexer/search-source.js +24 -9
- package/dist/indexer/semantic-status.js +2 -16
- package/dist/indexer/walker.js +25 -0
- package/dist/integrations/agent/builders.js +109 -0
- package/dist/integrations/agent/config.js +203 -3
- package/dist/integrations/agent/index.js +5 -2
- package/dist/integrations/agent/model-aliases.js +63 -0
- package/dist/integrations/agent/profiles.js +67 -5
- package/dist/integrations/agent/prompts.js +77 -72
- package/dist/integrations/agent/sdk-runner.js +120 -0
- package/dist/integrations/agent/spawn.js +93 -22
- package/dist/integrations/lockfile.js +10 -18
- package/dist/integrations/session-logs/index.js +65 -0
- package/dist/integrations/session-logs/providers/claude-code.js +56 -0
- package/dist/integrations/session-logs/providers/opencode.js +52 -0
- package/dist/integrations/session-logs/types.js +1 -0
- package/dist/llm/call-ai.js +74 -0
- package/dist/llm/client.js +61 -122
- package/dist/llm/feature-gate.js +27 -16
- package/dist/llm/graph-extract.js +297 -62
- package/dist/llm/memory-infer.js +49 -71
- package/dist/llm/metadata-enhance.js +39 -22
- package/dist/llm/prompts/graph-extract-user-prompt.md +12 -0
- package/dist/output/cli-hints-full.md +277 -0
- package/dist/output/cli-hints-short.md +65 -0
- package/dist/output/cli-hints.js +2 -318
- package/dist/output/renderers.js +220 -256
- package/dist/output/shapes.js +101 -93
- package/dist/output/text.js +256 -17
- package/dist/registry/providers/skills-sh.js +61 -49
- package/dist/registry/providers/static-index.js +44 -48
- package/dist/registry/resolve.js +8 -16
- package/dist/setup/setup.js +510 -11
- package/dist/sources/provider-factory.js +2 -1
- package/dist/sources/providers/filesystem.js +16 -23
- package/dist/sources/providers/git.js +4 -5
- package/dist/sources/providers/website.js +15 -22
- package/dist/sources/website-ingest.js +4 -0
- package/dist/tasks/backends/cron.js +200 -0
- package/dist/tasks/backends/exec-utils.js +25 -0
- package/dist/tasks/backends/index.js +32 -0
- package/dist/tasks/backends/launchd-template.xml +19 -0
- package/dist/tasks/backends/launchd.js +184 -0
- package/dist/tasks/backends/schtasks-template.xml +29 -0
- package/dist/tasks/backends/schtasks.js +212 -0
- package/dist/tasks/parser.js +198 -0
- package/dist/tasks/resolveAkmBin.js +84 -0
- package/dist/tasks/runner.js +432 -0
- package/dist/tasks/schedule.js +208 -0
- package/dist/tasks/schema.js +13 -0
- package/dist/tasks/validator.js +59 -0
- package/dist/wiki/index-template.md +12 -0
- package/dist/wiki/ingest-workflow-template.md +54 -0
- package/dist/wiki/log-template.md +8 -0
- package/dist/wiki/schema-template.md +61 -0
- package/dist/wiki/wiki-templates.js +12 -0
- package/dist/wiki/wiki.js +10 -61
- package/dist/workflows/authoring.js +5 -25
- package/dist/workflows/renderer.js +8 -3
- package/dist/workflows/runs.js +59 -91
- package/dist/workflows/validator.js +1 -1
- package/dist/workflows/workflow-template.md +24 -0
- package/docs/README.md +5 -2
- package/docs/migration/release-notes/0.7.0.md +1 -1
- package/docs/migration/release-notes/0.8.0.md +43 -0
- package/package.json +3 -2
- package/dist/templates/wiki-templates.js +0 -100
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure graph deduplication utility — no LLM calls, no I/O.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from src/llm/graph-extract.ts so it can be imported by
|
|
5
|
+
* src/indexer/graph-extraction.ts without being replaced by test mocks
|
|
6
|
+
* that stub the LLM layer.
|
|
7
|
+
*/
|
|
8
|
+
function normalizeRelationType(raw) {
|
|
9
|
+
const normalized = raw?.trim().toLowerCase().replace(/\s+/g, " ") ?? "";
|
|
10
|
+
if (!normalized)
|
|
11
|
+
return "";
|
|
12
|
+
if (normalized === "use" || normalized === "utilizes")
|
|
13
|
+
return "uses";
|
|
14
|
+
if (normalized === "depend on" || normalized === "depends")
|
|
15
|
+
return "depends on";
|
|
16
|
+
if (normalized === "integrates" || normalized === "integration with")
|
|
17
|
+
return "integrates with";
|
|
18
|
+
return normalized;
|
|
19
|
+
}
|
|
20
|
+
function normalizeConfidence(raw) {
|
|
21
|
+
if (typeof raw !== "number" || !Number.isFinite(raw))
|
|
22
|
+
return undefined;
|
|
23
|
+
return Math.max(0, Math.min(1, raw));
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Merge and deduplicate entities and relations from multiple per-asset
|
|
27
|
+
* GraphExtraction results into one canonical graph.
|
|
28
|
+
*
|
|
29
|
+
* Entities are keyed on their lowercased, trimmed form. The first-seen
|
|
30
|
+
* casing is preserved as canonical. Relations are keyed on
|
|
31
|
+
* `(from, to, type)` (all lowercased). Dangling relations — those whose
|
|
32
|
+
* `from` or `to` is absent from the deduplicated entity set — are dropped.
|
|
33
|
+
*/
|
|
34
|
+
export function deduplicateGraph(extractions, assetRefs) {
|
|
35
|
+
const entityCanonical = new Map();
|
|
36
|
+
const entitySources = new Map();
|
|
37
|
+
for (let i = 0; i < extractions.length; i++) {
|
|
38
|
+
const ref = assetRefs?.[i] ?? "unknown";
|
|
39
|
+
for (const raw of extractions[i].entities) {
|
|
40
|
+
const trimmed = raw.trim();
|
|
41
|
+
if (!trimmed)
|
|
42
|
+
continue;
|
|
43
|
+
const normalized = trimmed.toLowerCase();
|
|
44
|
+
if (!entityCanonical.has(normalized)) {
|
|
45
|
+
entityCanonical.set(normalized, trimmed);
|
|
46
|
+
entitySources.set(normalized, [ref]);
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
const srcs = entitySources.get(normalized);
|
|
50
|
+
if (srcs && !srcs.includes(ref))
|
|
51
|
+
srcs.push(ref);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
const entities = Array.from(entityCanonical.values());
|
|
56
|
+
const entityNormSet = new Set(entityCanonical.keys());
|
|
57
|
+
const relSeenKey = new Map();
|
|
58
|
+
const relationIndexByKey = new Map();
|
|
59
|
+
const relations = [];
|
|
60
|
+
for (let i = 0; i < extractions.length; i++) {
|
|
61
|
+
const ref = assetRefs?.[i] ?? "unknown";
|
|
62
|
+
for (const rel of extractions[i].relations) {
|
|
63
|
+
const fromNorm = rel.from.trim().toLowerCase();
|
|
64
|
+
const toNorm = rel.to.trim().toLowerCase();
|
|
65
|
+
const typeNorm = normalizeRelationType(rel.type);
|
|
66
|
+
if (!entityNormSet.has(fromNorm) || !entityNormSet.has(toNorm))
|
|
67
|
+
continue;
|
|
68
|
+
const key = `${fromNorm}\0${toNorm}\0${typeNorm}`;
|
|
69
|
+
if (!relSeenKey.has(key)) {
|
|
70
|
+
relSeenKey.set(key, [ref]);
|
|
71
|
+
const canonical = {
|
|
72
|
+
from: entityCanonical.get(fromNorm) ?? rel.from,
|
|
73
|
+
to: entityCanonical.get(toNorm) ?? rel.to,
|
|
74
|
+
};
|
|
75
|
+
if (typeNorm)
|
|
76
|
+
canonical.type = typeNorm;
|
|
77
|
+
const confidence = normalizeConfidence(rel.confidence);
|
|
78
|
+
if (confidence !== undefined)
|
|
79
|
+
canonical.confidence = confidence;
|
|
80
|
+
relationIndexByKey.set(key, relations.length);
|
|
81
|
+
relations.push(canonical);
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
const srcs = relSeenKey.get(key);
|
|
85
|
+
if (srcs && !srcs.includes(ref))
|
|
86
|
+
srcs.push(ref);
|
|
87
|
+
const idx = relationIndexByKey.get(key);
|
|
88
|
+
const nextConfidence = normalizeConfidence(rel.confidence);
|
|
89
|
+
if (idx !== undefined && nextConfidence !== undefined) {
|
|
90
|
+
const current = normalizeConfidence(relations[idx]?.confidence) ?? 0;
|
|
91
|
+
if (nextConfidence > current && relations[idx])
|
|
92
|
+
relations[idx].confidence = nextConfidence;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
const relationSources = new Map(relSeenKey);
|
|
98
|
+
return { entities, relations, entitySources, relationSources };
|
|
99
|
+
}
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Walks the primary stash for `memory:` and `knowledge:` assets, asks the
|
|
5
5
|
* configured LLM to extract entities and relations from each one, and
|
|
6
|
-
* persists the result to
|
|
7
|
-
*
|
|
6
|
+
* persists the result to stash-local SQLite graph tables keyed by stash root.
|
|
7
|
+
* The artifact is consumed by the search
|
|
8
8
|
* pipeline (see `src/indexer/graph-boost.ts`) as a single boost component
|
|
9
9
|
* inside the existing FTS5+boosts loop — there is NO second SearchHit
|
|
10
10
|
* scorer and no parallel ranking track.
|
|
@@ -19,13 +19,13 @@
|
|
|
19
19
|
* 3. `index.graph.llm !== false` — the per-pass opt-out layer (#208).
|
|
20
20
|
* Set to `false` to skip just this pass while leaving other passes
|
|
21
21
|
* that share the same `llm` block enabled.
|
|
22
|
-
* Toggling any one off does NOT delete the existing
|
|
22
|
+
* Toggling any one off does NOT delete the existing persisted graph — the
|
|
23
23
|
* user keeps the boost component they already have, it just stops
|
|
24
24
|
* refreshing.
|
|
25
25
|
*
|
|
26
26
|
* Locked v1 contract:
|
|
27
27
|
* - LLM access is exclusively via `resolveIndexPassLLM("graph", config)`.
|
|
28
|
-
* - The
|
|
28
|
+
* - The graph rows are an indexer artifact, NOT a user-visible
|
|
29
29
|
* asset. It does not have an asset ref, does not appear in search
|
|
30
30
|
* hits, and is not addressable via `akm show`. Direct `fs.writeFile`
|
|
31
31
|
* is therefore the correct primitive — `writeAssetToSource` is
|
|
@@ -33,25 +33,169 @@
|
|
|
33
33
|
*/
|
|
34
34
|
import fs from "node:fs";
|
|
35
35
|
import path from "node:path";
|
|
36
|
+
import { TYPE_DIRS } from "../core/asset-spec";
|
|
37
|
+
import { concurrentMap } from "../core/concurrent";
|
|
36
38
|
import { parseFrontmatter } from "../core/frontmatter";
|
|
37
39
|
import { warn } from "../core/warn";
|
|
38
|
-
import
|
|
40
|
+
import * as graphExtract from "../llm/graph-extract";
|
|
39
41
|
import { resolveIndexPassLLM } from "../llm/index-passes";
|
|
42
|
+
import { computeBodyHash, GRAPH_SCHEMA_VERSION, getLlmCacheEntry, upsertLlmCacheEntry } from "./db";
|
|
43
|
+
import { loadStoredGraphSnapshot, replaceStoredGraph } from "./graph-db";
|
|
44
|
+
import { deduplicateGraph } from "./graph-dedup";
|
|
45
|
+
import { withLlmCache } from "./llm-cache";
|
|
46
|
+
import { walkMarkdownFiles } from "./walker";
|
|
40
47
|
/** Schema version for the persisted artifact — bumps trigger a full rebuild. */
|
|
41
|
-
export const GRAPH_FILE_SCHEMA_VERSION =
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
+
export const GRAPH_FILE_SCHEMA_VERSION = GRAPH_SCHEMA_VERSION;
|
|
49
|
+
const EMPTY_QUALITY = {
|
|
50
|
+
consideredFiles: 0,
|
|
51
|
+
extractedFiles: 0,
|
|
52
|
+
entityCount: 0,
|
|
53
|
+
relationCount: 0,
|
|
54
|
+
extractionCoverage: 0,
|
|
55
|
+
density: 0,
|
|
56
|
+
};
|
|
48
57
|
const EMPTY_RESULT = {
|
|
49
58
|
considered: 0,
|
|
50
59
|
extracted: 0,
|
|
51
60
|
totalEntities: 0,
|
|
52
61
|
totalRelations: 0,
|
|
53
62
|
written: false,
|
|
63
|
+
quality: { ...EMPTY_QUALITY },
|
|
54
64
|
};
|
|
65
|
+
function roundMetric(value) {
|
|
66
|
+
return Number(value.toFixed(4));
|
|
67
|
+
}
|
|
68
|
+
function computeGraphQualityTelemetry(consideredFiles, extractedFiles, entityCount, relationCount) {
|
|
69
|
+
const extractionCoverage = consideredFiles > 0 ? extractedFiles / consideredFiles : 0;
|
|
70
|
+
const maxEdges = entityCount > 1 ? (entityCount * (entityCount - 1)) / 2 : 0;
|
|
71
|
+
const density = maxEdges > 0 ? relationCount / maxEdges : 0;
|
|
72
|
+
return {
|
|
73
|
+
consideredFiles,
|
|
74
|
+
extractedFiles,
|
|
75
|
+
entityCount,
|
|
76
|
+
relationCount,
|
|
77
|
+
extractionCoverage: roundMetric(extractionCoverage),
|
|
78
|
+
density: roundMetric(density),
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
export const DEFAULT_GRAPH_EXTRACTION_INCLUDE_TYPES = ["memory", "knowledge"];
|
|
82
|
+
const SUPPORTED_GRAPH_EXTRACTION_INCLUDE_TYPES = new Set([
|
|
83
|
+
"memory",
|
|
84
|
+
"knowledge",
|
|
85
|
+
"skill",
|
|
86
|
+
"command",
|
|
87
|
+
"agent",
|
|
88
|
+
"workflow",
|
|
89
|
+
"lesson",
|
|
90
|
+
"task",
|
|
91
|
+
"wiki",
|
|
92
|
+
]);
|
|
93
|
+
function normalizeConfidence(raw) {
|
|
94
|
+
if (typeof raw !== "number" || !Number.isFinite(raw))
|
|
95
|
+
return undefined;
|
|
96
|
+
return Math.max(0, Math.min(1, raw));
|
|
97
|
+
}
|
|
98
|
+
export function getGraphExtractionIncludeTypes(config) {
|
|
99
|
+
const configured = config.index?.graph?.graphExtractionIncludeTypes;
|
|
100
|
+
if (!configured || configured.length === 0)
|
|
101
|
+
return [...DEFAULT_GRAPH_EXTRACTION_INCLUDE_TYPES];
|
|
102
|
+
const out = [];
|
|
103
|
+
const seen = new Set();
|
|
104
|
+
for (const rawType of configured) {
|
|
105
|
+
const type = rawType.trim().toLowerCase();
|
|
106
|
+
if (!type || seen.has(type))
|
|
107
|
+
continue;
|
|
108
|
+
if (!SUPPORTED_GRAPH_EXTRACTION_INCLUDE_TYPES.has(type))
|
|
109
|
+
continue;
|
|
110
|
+
seen.add(type);
|
|
111
|
+
out.push(type);
|
|
112
|
+
}
|
|
113
|
+
return out.length > 0 ? out : [...DEFAULT_GRAPH_EXTRACTION_INCLUDE_TYPES];
|
|
114
|
+
}
|
|
115
|
+
function validateGraphCacheShape(raw) {
|
|
116
|
+
if (!raw || typeof raw !== "object")
|
|
117
|
+
return undefined;
|
|
118
|
+
const obj = raw;
|
|
119
|
+
if (!Array.isArray(obj.entities) || !obj.entities.every((e) => typeof e === "string"))
|
|
120
|
+
return undefined;
|
|
121
|
+
if (obj.relations !== undefined &&
|
|
122
|
+
(!Array.isArray(obj.relations) ||
|
|
123
|
+
!obj.relations.every((r) => {
|
|
124
|
+
if (!r || typeof r !== "object")
|
|
125
|
+
return false;
|
|
126
|
+
const rel = r;
|
|
127
|
+
if (typeof rel.from !== "string" || typeof rel.to !== "string")
|
|
128
|
+
return false;
|
|
129
|
+
if (rel.type !== undefined && typeof rel.type !== "string")
|
|
130
|
+
return false;
|
|
131
|
+
if (rel.confidence !== undefined && (typeof rel.confidence !== "number" || !Number.isFinite(rel.confidence))) {
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
return true;
|
|
135
|
+
}))) {
|
|
136
|
+
return undefined;
|
|
137
|
+
}
|
|
138
|
+
return {
|
|
139
|
+
entities: obj.entities,
|
|
140
|
+
relations: Array.isArray(obj.relations) ? obj.relations : [],
|
|
141
|
+
confidence: normalizeConfidence(obj.confidence),
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
function loadGraphFile(stashRoot, db) {
|
|
145
|
+
if (!db)
|
|
146
|
+
return { files: [] };
|
|
147
|
+
const graph = loadStoredGraphSnapshot(stashRoot, db);
|
|
148
|
+
if (!graph)
|
|
149
|
+
return { files: [] };
|
|
150
|
+
const out = [];
|
|
151
|
+
for (const node of graph.files) {
|
|
152
|
+
const cacheShape = validateGraphCacheShape({ entities: node.entities, relations: node.relations });
|
|
153
|
+
if (!cacheShape)
|
|
154
|
+
continue;
|
|
155
|
+
out.push({
|
|
156
|
+
path: node.path,
|
|
157
|
+
type: node.type,
|
|
158
|
+
bodyHash: node.bodyHash,
|
|
159
|
+
entities: cacheShape.entities,
|
|
160
|
+
relations: cacheShape.relations,
|
|
161
|
+
confidence: normalizeConfidence(node.confidence),
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
return { files: out };
|
|
165
|
+
}
|
|
166
|
+
function mergeGraphNodes(previousNodes, refreshedNodes, candidatePaths) {
|
|
167
|
+
if (!candidatePaths)
|
|
168
|
+
return refreshedNodes;
|
|
169
|
+
const refreshedByPath = new Map(refreshedNodes.map((node) => [node.path, node]));
|
|
170
|
+
const merged = [];
|
|
171
|
+
for (const node of previousNodes) {
|
|
172
|
+
if (candidatePaths.has(node.path))
|
|
173
|
+
continue;
|
|
174
|
+
merged.push(node);
|
|
175
|
+
}
|
|
176
|
+
for (const node of refreshedNodes)
|
|
177
|
+
merged.push(refreshedByPath.get(node.path) ?? node);
|
|
178
|
+
return merged;
|
|
179
|
+
}
|
|
180
|
+
function reuseGraphNode(previousNodes, candidate, bodyHash) {
|
|
181
|
+
const node = previousNodes.get(candidate.absPath);
|
|
182
|
+
if (!node)
|
|
183
|
+
return undefined;
|
|
184
|
+
if (node.type !== candidate.type)
|
|
185
|
+
return undefined;
|
|
186
|
+
if (typeof node.bodyHash !== "string" || node.bodyHash.length === 0)
|
|
187
|
+
return undefined;
|
|
188
|
+
if (node.bodyHash !== bodyHash)
|
|
189
|
+
return undefined;
|
|
190
|
+
const validated = validateGraphCacheShape({ entities: node.entities, relations: node.relations });
|
|
191
|
+
if (!validated)
|
|
192
|
+
return undefined;
|
|
193
|
+
return {
|
|
194
|
+
entities: validated.entities,
|
|
195
|
+
relations: validated.relations,
|
|
196
|
+
confidence: normalizeConfidence(node.confidence),
|
|
197
|
+
};
|
|
198
|
+
}
|
|
55
199
|
/**
|
|
56
200
|
* Top-level entry point. Returns a no-op result when the pass is disabled.
|
|
57
201
|
*
|
|
@@ -67,10 +211,14 @@ const EMPTY_RESULT = {
|
|
|
67
211
|
* `false`, the indexer simply skips this pass for the current run.
|
|
68
212
|
*
|
|
69
213
|
* If any of the three is missing or `false`, this function short-circuits
|
|
70
|
-
* to an empty no-op result, leaving any existing
|
|
71
|
-
*
|
|
214
|
+
* to an empty no-op result, leaving any existing persisted graph untouched.
|
|
215
|
+
*
|
|
216
|
+
* When `config.index.graph.graphExtractionBatchSize > 1`, eligible files are
|
|
217
|
+
* chunked into batches and each chunk is processed with a single LLM call via
|
|
218
|
+
* `extractGraphFromBodies`. Default batch size is 1 (one call per asset —
|
|
219
|
+
* preserves existing behaviour, fully opt-in).
|
|
72
220
|
*/
|
|
73
|
-
export async function runGraphExtractionPass(config, sources, signal) {
|
|
221
|
+
export async function runGraphExtractionPass(config, sources, signal, db, reEnrich, onProgress, options = {}) {
|
|
74
222
|
// Gate 1 — locked feature flag (§14). Defaults to enabled; only an
|
|
75
223
|
// explicit `false` disables the pass entirely.
|
|
76
224
|
if (config.llm?.features?.graph_extraction === false)
|
|
@@ -86,58 +234,263 @@ export async function runGraphExtractionPass(config, sources, signal) {
|
|
|
86
234
|
const primary = sources[0];
|
|
87
235
|
if (!primary)
|
|
88
236
|
return { ...EMPTY_RESULT };
|
|
89
|
-
const eligible = collectEligibleFiles(primary.path);
|
|
237
|
+
const eligible = collectEligibleFiles(primary.path, getGraphExtractionIncludeTypes(config)).filter((candidate) => !options.candidatePaths || options.candidatePaths.has(candidate.absPath));
|
|
90
238
|
const considered = eligible.length;
|
|
91
239
|
if (considered === 0)
|
|
92
240
|
return { ...EMPTY_RESULT };
|
|
241
|
+
const previousGraph = loadGraphFile(primary.path, db);
|
|
242
|
+
const previousNodes = new Map(previousGraph.files.map((node) => [node.path, node]));
|
|
93
243
|
const nodes = [];
|
|
94
244
|
let totalEntities = 0;
|
|
95
245
|
let totalRelations = 0;
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
246
|
+
let processed = 0;
|
|
247
|
+
let extracted = 0;
|
|
248
|
+
onProgress?.({ processed, total: considered, extracted, totalEntities, totalRelations });
|
|
249
|
+
// Read the configured batch size. Default of 1 preserves the existing
|
|
250
|
+
// per-asset behaviour and is fully opt-in.
|
|
251
|
+
const batchSize = config.index?.graph?.graphExtractionBatchSize ?? 1;
|
|
252
|
+
const onFallback = (evt) => {
|
|
253
|
+
warn(`[akm] LLM fallback for ${evt.feature}: ${evt.reason}`);
|
|
254
|
+
};
|
|
255
|
+
let extractionResults;
|
|
256
|
+
if (batchSize <= 1) {
|
|
257
|
+
// ── Original per-asset path (with incremental cache) ─────────────────
|
|
258
|
+
extractionResults = await concurrentMap(eligible, async (candidate) => {
|
|
259
|
+
if (signal?.aborted)
|
|
260
|
+
return undefined;
|
|
261
|
+
const bodyHash = computeBodyHash(candidate.body);
|
|
262
|
+
let cached;
|
|
263
|
+
if (db) {
|
|
264
|
+
// withLlmCache handles hash computation, cache lookup, LLM call, and cache write.
|
|
265
|
+
// When cache misses and this run is not forced, attempt graph-node reuse before LLM.
|
|
266
|
+
cached = await withLlmCache(db, candidate.absPath, candidate.body, reEnrich ?? false, async () => {
|
|
267
|
+
if (!(reEnrich ?? false)) {
|
|
268
|
+
const reused = reuseGraphNode(previousNodes, candidate, bodyHash);
|
|
269
|
+
if (reused)
|
|
270
|
+
return reused;
|
|
271
|
+
}
|
|
272
|
+
const extraction = await graphExtract.extractGraphFromBody(llmConfig, candidate.body, signal, config, onFallback);
|
|
273
|
+
// Cache empty results too so we skip on next run.
|
|
274
|
+
return {
|
|
275
|
+
entities: extraction.entities,
|
|
276
|
+
relations: extraction.relations,
|
|
277
|
+
...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
|
|
278
|
+
};
|
|
279
|
+
}, validateGraphCacheShape);
|
|
280
|
+
}
|
|
281
|
+
else if (!(reEnrich ?? false)) {
|
|
282
|
+
cached = reuseGraphNode(previousNodes, candidate, bodyHash);
|
|
283
|
+
}
|
|
284
|
+
if (!cached) {
|
|
285
|
+
const extraction = await graphExtract.extractGraphFromBody(llmConfig, candidate.body, signal, config, onFallback);
|
|
286
|
+
cached = {
|
|
287
|
+
entities: extraction.entities,
|
|
288
|
+
relations: extraction.relations,
|
|
289
|
+
...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
if (!cached || cached.entities.length === 0)
|
|
293
|
+
return undefined;
|
|
294
|
+
return {
|
|
295
|
+
absPath: candidate.absPath,
|
|
296
|
+
type: candidate.type,
|
|
297
|
+
bodyHash,
|
|
298
|
+
entities: cached.entities,
|
|
299
|
+
relations: cached.relations,
|
|
300
|
+
...(cached.confidence !== undefined ? { confidence: cached.confidence } : {}),
|
|
301
|
+
};
|
|
302
|
+
},
|
|
303
|
+
// Default concurrency of 4 for cloud APIs. Set `llm.concurrency: 1`
|
|
304
|
+
// in config.json for local model servers (LM Studio, Ollama).
|
|
305
|
+
llmConfig.concurrency ?? 1);
|
|
306
|
+
}
|
|
307
|
+
else {
|
|
308
|
+
// ── Batched path (with incremental cache) ────────────────────────────
|
|
309
|
+
// Chunk eligible files into groups of `batchSize` and call
|
|
310
|
+
// `extractGraphFromBodies` once per chunk. Cache hits are resolved
|
|
311
|
+
// before chunking so they don't consume LLM tokens in the batch call.
|
|
312
|
+
const rawResults = new Array(eligible.length).fill(undefined);
|
|
313
|
+
const chunkStarts = [];
|
|
314
|
+
for (let start = 0; start < eligible.length; start += batchSize)
|
|
315
|
+
chunkStarts.push(start);
|
|
316
|
+
await concurrentMap(chunkStarts, async (start) => {
|
|
317
|
+
if (signal?.aborted)
|
|
318
|
+
return;
|
|
319
|
+
const chunk = eligible.slice(start, start + batchSize);
|
|
320
|
+
// Pre-resolve cache hits for this chunk; track which positions need LLM.
|
|
321
|
+
const bodyHashes = chunk.map((c) => computeBodyHash(c.body));
|
|
322
|
+
const needsLlm = chunk.map((c, j) => {
|
|
323
|
+
if (!db || reEnrich)
|
|
324
|
+
return true;
|
|
325
|
+
const cached = getLlmCacheEntry(db, c.absPath, bodyHashes[j] ?? "");
|
|
326
|
+
if (!cached)
|
|
327
|
+
return true;
|
|
328
|
+
try {
|
|
329
|
+
const parsed = validateGraphCacheShape(JSON.parse(cached.resultJson));
|
|
330
|
+
if (!parsed)
|
|
331
|
+
return true;
|
|
332
|
+
const entities = parsed.entities;
|
|
333
|
+
rawResults[start + j] =
|
|
334
|
+
entities.length > 0
|
|
335
|
+
? {
|
|
336
|
+
absPath: c.absPath,
|
|
337
|
+
type: c.type,
|
|
338
|
+
bodyHash: bodyHashes[j] ?? "",
|
|
339
|
+
entities,
|
|
340
|
+
relations: parsed.relations,
|
|
341
|
+
...(parsed.confidence !== undefined ? { confidence: parsed.confidence } : {}),
|
|
342
|
+
}
|
|
343
|
+
: undefined;
|
|
344
|
+
return false;
|
|
345
|
+
}
|
|
346
|
+
catch {
|
|
347
|
+
return true;
|
|
348
|
+
}
|
|
349
|
+
});
|
|
350
|
+
// Secondary incremental path: reuse previous graph nodes when the body hash
|
|
351
|
+
// still matches and DB cache is missing/stale/unavailable.
|
|
352
|
+
if (!(reEnrich ?? false)) {
|
|
353
|
+
for (let j = 0; j < chunk.length; j++) {
|
|
354
|
+
if (!needsLlm[j])
|
|
355
|
+
continue;
|
|
356
|
+
const candidate = chunk[j];
|
|
357
|
+
if (!candidate)
|
|
358
|
+
continue;
|
|
359
|
+
const reused = reuseGraphNode(previousNodes, candidate, bodyHashes[j] ?? "");
|
|
360
|
+
if (!reused)
|
|
361
|
+
continue;
|
|
362
|
+
rawResults[start + j] =
|
|
363
|
+
reused.entities.length > 0
|
|
364
|
+
? {
|
|
365
|
+
absPath: candidate.absPath,
|
|
366
|
+
type: candidate.type,
|
|
367
|
+
bodyHash: bodyHashes[j] ?? "",
|
|
368
|
+
entities: reused.entities,
|
|
369
|
+
relations: reused.relations,
|
|
370
|
+
...(reused.confidence !== undefined ? { confidence: reused.confidence } : {}),
|
|
371
|
+
}
|
|
372
|
+
: undefined;
|
|
373
|
+
if (db) {
|
|
374
|
+
upsertLlmCacheEntry(db, candidate.absPath, bodyHashes[j] ?? "", JSON.stringify(reused));
|
|
375
|
+
}
|
|
376
|
+
needsLlm[j] = false;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
const uncachedChunk = chunk.filter((_, j) => needsLlm[j]);
|
|
380
|
+
if (uncachedChunk.length === 0)
|
|
381
|
+
return;
|
|
382
|
+
const bodies = uncachedChunk.map((c) => c.body);
|
|
383
|
+
// extractGraphFromBodies always returns an array of the same length
|
|
384
|
+
// as bodies (it falls back per-asset for any missing indices).
|
|
385
|
+
const batchExtractions = await graphExtract.extractGraphFromBodies(llmConfig, bodies, signal, config, onFallback);
|
|
386
|
+
// Map LLM results back to original positions and write cache entries.
|
|
387
|
+
let llmIdx = 0;
|
|
388
|
+
for (let j = 0; j < chunk.length; j++) {
|
|
389
|
+
if (!needsLlm[j])
|
|
390
|
+
continue;
|
|
391
|
+
const candidate = chunk[j];
|
|
392
|
+
const extraction = batchExtractions[llmIdx++];
|
|
393
|
+
if (!candidate || !extraction)
|
|
394
|
+
continue;
|
|
395
|
+
if (db) {
|
|
396
|
+
upsertLlmCacheEntry(db, candidate.absPath, bodyHashes[j] ?? "", JSON.stringify({
|
|
397
|
+
entities: extraction.entities,
|
|
398
|
+
relations: extraction.relations,
|
|
399
|
+
...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
|
|
400
|
+
}));
|
|
401
|
+
}
|
|
402
|
+
if (extraction.entities.length === 0) {
|
|
403
|
+
rawResults[start + j] = undefined;
|
|
404
|
+
}
|
|
405
|
+
else {
|
|
406
|
+
rawResults[start + j] = {
|
|
407
|
+
absPath: candidate.absPath,
|
|
408
|
+
type: candidate.type,
|
|
409
|
+
bodyHash: bodyHashes[j] ?? "",
|
|
410
|
+
entities: extraction.entities,
|
|
411
|
+
relations: extraction.relations,
|
|
412
|
+
...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
}, llmConfig.concurrency ?? 1);
|
|
417
|
+
extractionResults = rawResults;
|
|
418
|
+
}
|
|
419
|
+
for (const result of extractionResults) {
|
|
420
|
+
if (!result)
|
|
101
421
|
continue;
|
|
102
422
|
nodes.push({
|
|
103
|
-
path:
|
|
104
|
-
type:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
...(r.
|
|
113
|
-
}))
|
|
423
|
+
path: result.absPath,
|
|
424
|
+
type: result.type,
|
|
425
|
+
bodyHash: result.bodyHash,
|
|
426
|
+
entities: [...new Set(result.entities.map((entity) => entity.trim()).filter(Boolean))],
|
|
427
|
+
relations: result.relations
|
|
428
|
+
.map((r) => ({
|
|
429
|
+
from: r.from.trim(),
|
|
430
|
+
to: r.to.trim(),
|
|
431
|
+
...(r.type ? { type: r.type.trim() } : {}),
|
|
432
|
+
...(normalizeConfidence(r.confidence) !== undefined ? { confidence: normalizeConfidence(r.confidence) } : {}),
|
|
433
|
+
}))
|
|
434
|
+
.filter((relation) => relation.from && relation.to),
|
|
435
|
+
...(normalizeConfidence(result.confidence) !== undefined
|
|
436
|
+
? { confidence: normalizeConfidence(result.confidence) }
|
|
437
|
+
: {}),
|
|
438
|
+
});
|
|
439
|
+
}
|
|
440
|
+
processed = 0;
|
|
441
|
+
extracted = 0;
|
|
442
|
+
totalEntities = 0;
|
|
443
|
+
totalRelations = 0;
|
|
444
|
+
for (let i = 0; i < extractionResults.length; i++) {
|
|
445
|
+
const result = extractionResults[i];
|
|
446
|
+
processed += 1;
|
|
447
|
+
if (result) {
|
|
448
|
+
extracted += 1;
|
|
449
|
+
totalEntities += result.entities.length;
|
|
450
|
+
totalRelations += result.relations.length;
|
|
451
|
+
}
|
|
452
|
+
onProgress?.({
|
|
453
|
+
processed,
|
|
454
|
+
total: considered,
|
|
455
|
+
extracted,
|
|
456
|
+
totalEntities,
|
|
457
|
+
totalRelations,
|
|
458
|
+
currentPath: eligible[i]?.absPath,
|
|
114
459
|
});
|
|
115
|
-
totalEntities += extraction.entities.length;
|
|
116
|
-
totalRelations += extraction.relations.length;
|
|
117
460
|
}
|
|
118
|
-
|
|
119
|
-
|
|
461
|
+
const mergedNodes = mergeGraphNodes(previousGraph.files, nodes, options.candidatePaths);
|
|
462
|
+
const assetRefs = mergedNodes.map((node) => node.path);
|
|
463
|
+
const deduped = deduplicateGraph(mergedNodes.map((node) => ({ entities: node.entities, relations: node.relations })), assetRefs);
|
|
464
|
+
if (mergedNodes.length === 0) {
|
|
465
|
+
warn("graph extraction: all extractions failed or returned no entities; leaving existing graph rows untouched.");
|
|
120
466
|
return {
|
|
121
467
|
considered,
|
|
122
468
|
extracted: 0,
|
|
123
469
|
totalEntities: 0,
|
|
124
470
|
totalRelations: 0,
|
|
125
471
|
written: false,
|
|
472
|
+
quality: computeGraphQualityTelemetry(considered, 0, 0, 0),
|
|
126
473
|
};
|
|
127
474
|
}
|
|
475
|
+
const qualityConsidered = options.candidatePaths ? mergedNodes.length : considered;
|
|
476
|
+
const quality = computeGraphQualityTelemetry(qualityConsidered, mergedNodes.length, deduped.entities.length, deduped.relations.length);
|
|
128
477
|
const graph = {
|
|
129
478
|
schemaVersion: GRAPH_FILE_SCHEMA_VERSION,
|
|
130
479
|
generatedAt: new Date().toISOString(),
|
|
131
480
|
stashRoot: primary.path,
|
|
132
|
-
files:
|
|
481
|
+
files: mergedNodes,
|
|
482
|
+
entities: deduped.entities,
|
|
483
|
+
relations: deduped.relations,
|
|
484
|
+
quality,
|
|
133
485
|
};
|
|
134
|
-
const written = writeGraphFile(primary.path, graph);
|
|
486
|
+
const written = writeGraphFile(primary.path, graph, db);
|
|
135
487
|
return {
|
|
136
488
|
considered,
|
|
137
|
-
extracted
|
|
489
|
+
extracted,
|
|
138
490
|
totalEntities,
|
|
139
491
|
totalRelations,
|
|
140
492
|
written,
|
|
493
|
+
quality,
|
|
141
494
|
};
|
|
142
495
|
}
|
|
143
496
|
/**
|
|
@@ -151,10 +504,16 @@ export async function runGraphExtractionPass(config, sources, signal) {
|
|
|
151
504
|
*
|
|
152
505
|
* Exported for direct unit testing.
|
|
153
506
|
*/
|
|
154
|
-
export function collectEligibleFiles(stashRoot) {
|
|
507
|
+
export function collectEligibleFiles(stashRoot, includeTypes = [...DEFAULT_GRAPH_EXTRACTION_INCLUDE_TYPES]) {
|
|
155
508
|
const out = [];
|
|
156
|
-
for (const
|
|
157
|
-
const
|
|
509
|
+
for (const rawType of includeTypes) {
|
|
510
|
+
const type = rawType.trim().toLowerCase();
|
|
511
|
+
if (!SUPPORTED_GRAPH_EXTRACTION_INCLUDE_TYPES.has(type))
|
|
512
|
+
continue;
|
|
513
|
+
const stashDir = TYPE_DIRS[type];
|
|
514
|
+
if (!stashDir)
|
|
515
|
+
continue;
|
|
516
|
+
const dir = path.join(stashRoot, stashDir);
|
|
158
517
|
if (!fs.existsSync(dir))
|
|
159
518
|
continue;
|
|
160
519
|
for (const filePath of walkMarkdownFiles(dir)) {
|
|
@@ -178,47 +537,21 @@ export function collectEligibleFiles(stashRoot) {
|
|
|
178
537
|
}
|
|
179
538
|
return out;
|
|
180
539
|
}
|
|
181
|
-
function* walkMarkdownFiles(root) {
|
|
182
|
-
let entries;
|
|
183
|
-
try {
|
|
184
|
-
entries = fs.readdirSync(root, { withFileTypes: true });
|
|
185
|
-
}
|
|
186
|
-
catch {
|
|
187
|
-
return;
|
|
188
|
-
}
|
|
189
|
-
for (const entry of entries) {
|
|
190
|
-
const full = path.join(root, entry.name);
|
|
191
|
-
if (entry.isDirectory()) {
|
|
192
|
-
yield* walkMarkdownFiles(full);
|
|
193
|
-
}
|
|
194
|
-
else if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) {
|
|
195
|
-
yield full;
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
540
|
// ── Persistence ─────────────────────────────────────────────────────────────
|
|
200
541
|
/**
|
|
201
|
-
*
|
|
202
|
-
*
|
|
203
|
-
* Direct `fs.writeFile` is intentional. The graph artifact is an indexer
|
|
204
|
-
* cache — not a user-visible asset — so it does not have an asset ref and
|
|
205
|
-
* `writeAssetToSource` (which routes through the asset-spec rendering
|
|
206
|
-
* layer) is the wrong primitive here. See CLAUDE.md / spec §10 step 5 for
|
|
207
|
-
* the carve-out: kind-branching writes for asset content live in
|
|
208
|
-
* `src/core/write-source.ts`; opaque indexer artifacts may write directly.
|
|
542
|
+
* Persist graph rows into the SQLite index DB.
|
|
209
543
|
*/
|
|
210
|
-
function writeGraphFile(stashRoot, graph) {
|
|
211
|
-
|
|
212
|
-
|
|
544
|
+
function writeGraphFile(stashRoot, graph, db) {
|
|
545
|
+
if (!db) {
|
|
546
|
+
warn("graph extraction: no database handle available; skipping graph persistence.");
|
|
547
|
+
return false;
|
|
548
|
+
}
|
|
213
549
|
try {
|
|
214
|
-
|
|
215
|
-
const tmp = `${target}.tmp.${process.pid}.${Math.random().toString(36).slice(2)}`;
|
|
216
|
-
fs.writeFileSync(tmp, `${JSON.stringify(graph, null, 2)}\n`, "utf8");
|
|
217
|
-
fs.renameSync(tmp, target);
|
|
550
|
+
replaceStoredGraph(db, graph);
|
|
218
551
|
return true;
|
|
219
552
|
}
|
|
220
553
|
catch (err) {
|
|
221
|
-
warn(`graph extraction: failed to
|
|
554
|
+
warn(`graph extraction: failed to persist graph for ${stashRoot}: ${err instanceof Error ? err.message : String(err)}`);
|
|
222
555
|
return false;
|
|
223
556
|
}
|
|
224
557
|
}
|