akm-cli 0.8.0-rc2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{.github/CHANGELOG.md → CHANGELOG.md} +191 -3
- package/README.md +22 -6
- package/SECURITY.md +93 -0
- package/dist/cli/config-migrate.js +144 -0
- package/dist/cli/config-validate.js +39 -0
- package/dist/cli/confirm.js +73 -0
- package/dist/cli/parse-args.js +93 -3
- package/dist/cli/shared.js +129 -0
- package/dist/cli.js +2141 -1268
- package/dist/commands/add-cli.js +279 -0
- package/dist/commands/agent-dispatch.js +20 -12
- package/dist/commands/agent-support.js +11 -5
- package/dist/commands/completions.js +3 -0
- package/dist/commands/config-cli.js +129 -517
- package/dist/commands/consolidate.js +1533 -144
- package/dist/commands/curate.js +44 -3
- package/dist/commands/db-cli.js +23 -0
- package/dist/commands/distill-promotion-policy.js +5 -3
- package/dist/commands/distill.js +906 -100
- package/dist/commands/env.js +213 -0
- package/dist/commands/eval-cases.js +3 -0
- package/dist/commands/events.js +3 -0
- package/dist/commands/extract-cli.js +127 -0
- package/dist/commands/extract-prompt.js +204 -0
- package/dist/commands/extract.js +477 -0
- package/dist/commands/feedback-cli.js +331 -0
- package/dist/commands/graph.js +260 -5
- package/dist/commands/health.js +977 -51
- package/dist/commands/help/help-accept.md +6 -3
- package/dist/commands/help/help-improve.md +36 -8
- package/dist/commands/help/help-proposals.md +7 -4
- package/dist/commands/help/help-reject.md +5 -2
- package/dist/commands/history.js +51 -16
- package/dist/commands/improve-auto-accept.js +97 -0
- package/dist/commands/improve-cli.js +236 -0
- package/dist/commands/improve-profiles.js +184 -0
- package/dist/commands/improve-result-file.js +167 -0
- package/dist/commands/improve.js +1725 -332
- package/dist/commands/info.js +3 -0
- package/dist/commands/init.js +49 -1
- package/dist/commands/installed-stashes.js +6 -23
- package/dist/commands/knowledge.js +3 -0
- package/dist/commands/lint/agent-linter.js +3 -0
- package/dist/commands/lint/base-linter.js +199 -5
- package/dist/commands/lint/command-linter.js +3 -0
- package/dist/commands/lint/default-linter.js +3 -0
- package/dist/commands/lint/env-key-rules.js +154 -0
- package/dist/commands/lint/index.js +92 -3
- package/dist/commands/lint/knowledge-linter.js +3 -0
- package/dist/commands/lint/markdown-insertion.js +343 -0
- package/dist/commands/lint/memory-linter.js +3 -0
- package/dist/commands/lint/registry.js +3 -0
- package/dist/commands/lint/skill-linter.js +3 -0
- package/dist/commands/lint/task-linter.js +15 -12
- package/dist/commands/lint/types.js +3 -0
- package/dist/commands/lint/workflow-linter.js +3 -0
- package/dist/commands/lint.js +3 -0
- package/dist/commands/migration-help.js +5 -2
- package/dist/commands/proposal-drain-policies.js +128 -0
- package/dist/commands/proposal-drain.js +477 -0
- package/dist/commands/proposal.js +60 -6
- package/dist/commands/propose.js +24 -19
- package/dist/commands/reflect.js +1004 -94
- package/dist/commands/registry-cli.js +150 -0
- package/dist/commands/registry-search.js +3 -0
- package/dist/commands/remember-cli.js +257 -0
- package/dist/commands/remember.js +15 -6
- package/dist/commands/schema-repair.js +88 -15
- package/dist/commands/search.js +99 -14
- package/dist/commands/secret.js +173 -0
- package/dist/commands/self-update.js +3 -0
- package/dist/commands/show.js +32 -13
- package/dist/commands/source-add.js +7 -35
- package/dist/commands/source-clone.js +3 -0
- package/dist/commands/source-manage.js +3 -0
- package/dist/commands/tasks.js +161 -95
- package/dist/commands/url-checker.js +3 -0
- package/dist/core/action-contributors.js +3 -0
- package/dist/core/asset-ref.js +13 -2
- package/dist/core/asset-registry.js +9 -2
- package/dist/core/asset-serialize.js +88 -0
- package/dist/core/asset-spec.js +61 -5
- package/dist/core/common.js +93 -5
- package/dist/core/concurrent.js +3 -0
- package/dist/core/config-io.js +347 -0
- package/dist/core/config-migration.js +622 -0
- package/dist/core/config-schema.js +558 -0
- package/dist/core/config-sources.js +108 -0
- package/dist/core/config-types.js +4 -0
- package/dist/core/config-walker.js +337 -0
- package/dist/core/config.js +366 -1077
- package/dist/core/errors.js +42 -20
- package/dist/core/events.js +31 -25
- package/dist/core/file-lock.js +104 -0
- package/dist/core/frontmatter.js +75 -10
- package/dist/core/lesson-lint.js +3 -0
- package/dist/core/markdown.js +3 -0
- package/dist/core/memory-belief.js +62 -0
- package/dist/core/memory-contradiction-detect.js +274 -0
- package/dist/core/memory-improve.js +142 -14
- package/dist/core/parse.js +3 -0
- package/dist/core/paths.js +218 -50
- package/dist/core/proposal-quality-validators.js +380 -0
- package/dist/core/proposal-validators.js +11 -3
- package/dist/core/proposals.js +464 -5
- package/dist/core/state-db.js +349 -56
- package/dist/core/text-truncation.js +107 -0
- package/dist/core/time.js +3 -0
- package/dist/core/tty.js +59 -0
- package/dist/core/warn.js +7 -2
- package/dist/core/write-source.js +12 -0
- package/dist/indexer/db-backup.js +391 -0
- package/dist/indexer/db-search.js +136 -28
- package/dist/indexer/db.js +661 -166
- package/dist/indexer/ensure-index.js +3 -0
- package/dist/indexer/file-context.js +3 -0
- package/dist/indexer/graph-boost.js +162 -40
- package/dist/indexer/graph-db.js +241 -51
- package/dist/indexer/graph-dedup.js +3 -7
- package/dist/indexer/graph-extraction.js +242 -149
- package/dist/indexer/index-context.js +3 -9
- package/dist/indexer/indexer.js +84 -14
- package/dist/indexer/llm-cache.js +24 -19
- package/dist/indexer/manifest.js +3 -0
- package/dist/indexer/matchers.js +184 -11
- package/dist/indexer/memory-inference.js +94 -50
- package/dist/indexer/metadata-contributors.js +3 -0
- package/dist/indexer/metadata.js +110 -50
- package/dist/indexer/path-resolver.js +3 -0
- package/dist/indexer/project-context.js +192 -0
- package/dist/indexer/ranking-contributors.js +134 -7
- package/dist/indexer/ranking.js +8 -1
- package/dist/indexer/search-fields.js +5 -9
- package/dist/indexer/search-hit-enrichers.js +91 -2
- package/dist/indexer/search-source.js +20 -1
- package/dist/indexer/semantic-status.js +4 -1
- package/dist/indexer/staleness-detect.js +447 -0
- package/dist/indexer/usage-events.js +12 -9
- package/dist/indexer/walker.js +3 -0
- package/dist/integrations/agent/builders.js +135 -0
- package/dist/integrations/agent/config.js +121 -401
- package/dist/integrations/agent/detect.js +3 -0
- package/dist/integrations/agent/index.js +6 -14
- package/dist/integrations/agent/model-aliases.js +55 -0
- package/dist/integrations/agent/profiles.js +3 -0
- package/dist/integrations/agent/prompts.js +137 -8
- package/dist/integrations/agent/runner.js +208 -0
- package/dist/integrations/agent/sdk-runner.js +8 -2
- package/dist/integrations/agent/spawn.js +54 -14
- package/dist/integrations/github.js +3 -0
- package/dist/integrations/lockfile.js +22 -51
- package/dist/integrations/session-logs/index.js +4 -0
- package/dist/integrations/session-logs/inline-refs.js +35 -0
- package/dist/integrations/session-logs/pre-filter.js +152 -0
- package/dist/integrations/session-logs/providers/claude-code.js +226 -0
- package/dist/integrations/session-logs/providers/opencode.js +231 -25
- package/dist/integrations/session-logs/types.js +3 -0
- package/dist/llm/call-ai.js +14 -26
- package/dist/llm/client.js +16 -2
- package/dist/llm/embedder.js +20 -29
- package/dist/llm/embedders/cache.js +3 -7
- package/dist/llm/embedders/local.js +42 -1
- package/dist/llm/embedders/remote.js +20 -8
- package/dist/llm/embedders/types.js +3 -7
- package/dist/llm/feature-gate.js +92 -56
- package/dist/llm/graph-extract.js +401 -30
- package/dist/llm/index-passes.js +44 -29
- package/dist/llm/memory-infer.js +30 -2
- package/dist/llm/metadata-enhance.js +3 -7
- package/dist/llm/prompts/extract-session.md +80 -0
- package/dist/llm/prompts/graph-extract-user-prompt.md +24 -1
- package/dist/output/cli-hints-full.md +60 -32
- package/dist/output/cli-hints-short.md +10 -7
- package/dist/output/cli-hints.js +5 -2
- package/dist/output/context.js +60 -8
- package/dist/output/renderers.js +170 -194
- package/dist/output/shapes/curate.js +56 -0
- package/dist/output/shapes/distill.js +10 -0
- package/dist/output/shapes/env-list.js +19 -0
- package/dist/output/shapes/events.js +11 -0
- package/dist/output/shapes/helpers.js +424 -0
- package/dist/output/shapes/history.js +7 -0
- package/dist/output/shapes/passthrough.js +105 -0
- package/dist/output/shapes/proposal-accept.js +7 -0
- package/dist/output/shapes/proposal-diff.js +7 -0
- package/dist/output/shapes/proposal-list.js +7 -0
- package/dist/output/shapes/proposal-producer.js +11 -0
- package/dist/output/shapes/proposal-reject.js +7 -0
- package/dist/output/shapes/proposal-show.js +7 -0
- package/dist/output/shapes/registry-search.js +6 -0
- package/dist/output/shapes/registry.js +30 -0
- package/dist/output/shapes/search.js +6 -0
- package/dist/output/shapes/secret-list.js +19 -0
- package/dist/output/shapes/show.js +6 -0
- package/dist/output/shapes/vault-list.js +19 -0
- package/dist/output/shapes.js +51 -549
- package/dist/output/text/add.js +6 -0
- package/dist/output/text/clone.js +6 -0
- package/dist/output/text/config.js +6 -0
- package/dist/output/text/curate.js +6 -0
- package/dist/output/text/distill.js +7 -0
- package/dist/output/text/enable-disable.js +7 -0
- package/dist/output/text/events.js +10 -0
- package/dist/output/text/feedback.js +6 -0
- package/dist/output/text/helpers.js +1059 -0
- package/dist/output/text/history.js +7 -0
- package/dist/output/text/import.js +6 -0
- package/dist/output/text/index.js +6 -0
- package/dist/output/text/info.js +6 -0
- package/dist/output/text/init.js +6 -0
- package/dist/output/text/list.js +6 -0
- package/dist/output/text/proposal-producer.js +8 -0
- package/dist/output/text/proposal.js +12 -0
- package/dist/output/text/registry-commands.js +11 -0
- package/dist/output/text/registry.js +30 -0
- package/dist/output/text/remember.js +6 -0
- package/dist/output/text/remove.js +6 -0
- package/dist/output/text/save.js +6 -0
- package/dist/output/text/search.js +6 -0
- package/dist/output/text/show.js +6 -0
- package/dist/output/text/update.js +6 -0
- package/dist/output/text/upgrade.js +6 -0
- package/dist/output/text/vault.js +16 -0
- package/dist/output/text/wiki.js +15 -0
- package/dist/output/text/workflow.js +14 -0
- package/dist/output/text.js +44 -1329
- package/dist/registry/build-index.js +3 -0
- package/dist/registry/create-provider-registry.js +3 -0
- package/dist/registry/factory.js +4 -1
- package/dist/registry/origin-resolve.js +3 -0
- package/dist/registry/providers/index.js +3 -0
- package/dist/registry/providers/skills-sh.js +11 -2
- package/dist/registry/providers/static-index.js +10 -1
- package/dist/registry/providers/types.js +3 -24
- package/dist/registry/resolve.js +11 -16
- package/dist/registry/types.js +3 -0
- package/dist/scripts/migrate-storage.js +17767 -0
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
- package/dist/scripts/migrations/v16-to-v17.js +141 -0
- package/dist/setup/detect.js +3 -0
- package/dist/setup/ripgrep-install.js +3 -0
- package/dist/setup/ripgrep-resolve.js +3 -0
- package/dist/setup/setup.js +306 -67
- package/dist/setup/steps.js +3 -15
- package/dist/sources/include.js +3 -0
- package/dist/sources/provider-factory.js +3 -11
- package/dist/sources/provider.js +3 -20
- package/dist/sources/providers/filesystem.js +19 -23
- package/dist/sources/providers/git.js +171 -21
- package/dist/sources/providers/index.js +3 -0
- package/dist/sources/providers/install-types.js +3 -13
- package/dist/sources/providers/npm.js +3 -4
- package/dist/sources/providers/provider-utils.js +3 -0
- package/dist/sources/providers/sync-from-ref.js +3 -11
- package/dist/sources/providers/tar-utils.js +3 -0
- package/dist/sources/providers/website.js +18 -22
- package/dist/sources/resolve.js +3 -0
- package/dist/sources/types.js +3 -0
- package/dist/sources/website-ingest.js +3 -0
- package/dist/tasks/backends/cron.js +3 -0
- package/dist/tasks/backends/exec-utils.js +3 -0
- package/dist/tasks/backends/index.js +3 -11
- package/dist/tasks/backends/launchd.js +3 -0
- package/dist/tasks/backends/schtasks.js +3 -0
- package/dist/tasks/parser.js +51 -38
- package/dist/tasks/resolveAkmBin.js +3 -0
- package/dist/tasks/runner.js +35 -9
- package/dist/tasks/schedule.js +20 -1
- package/dist/tasks/schema.js +5 -3
- package/dist/tasks/validator.js +6 -3
- package/dist/version.js +3 -0
- package/dist/wiki/wiki-templates.js +3 -0
- package/dist/wiki/wiki.js +3 -0
- package/dist/workflows/authoring.js +3 -0
- package/dist/workflows/cli.js +3 -0
- package/dist/workflows/db.js +140 -10
- package/dist/workflows/document-cache.js +3 -10
- package/dist/workflows/parser.js +3 -0
- package/dist/workflows/renderer.js +3 -0
- package/dist/workflows/runs.js +18 -1
- package/dist/workflows/schema.js +3 -0
- package/dist/workflows/scope-key.js +3 -0
- package/dist/workflows/validator.js +5 -9
- package/docs/README.md +7 -2
- package/docs/data-and-telemetry.md +225 -0
- package/docs/migration/release-notes/0.7.5.md +2 -2
- package/docs/migration/release-notes/0.8.0.md +57 -5
- package/docs/migration/v0.7-to-v0.8.md +1378 -0
- package/package.json +28 -11
- package/.github/LICENSE +0 -374
- package/dist/commands/install-audit.js +0 -385
- package/dist/commands/vault.js +0 -310
- package/dist/indexer/match-contributors.js +0 -141
- package/dist/integrations/agent/pipeline.js +0 -39
- package/dist/integrations/agent/runners.js +0 -31
|
@@ -1,48 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
* Walks the primary stash for `memory:` and `knowledge:` assets, asks the
|
|
5
|
-
* configured LLM to extract entities and relations from each one, and
|
|
6
|
-
* persists the result to stash-local SQLite graph tables keyed by stash root.
|
|
7
|
-
* The artifact is consumed by the search
|
|
8
|
-
* pipeline (see `src/indexer/graph-boost.ts`) as a single boost component
|
|
9
|
-
* inside the existing FTS5+boosts loop — there is NO second SearchHit
|
|
10
|
-
* scorer and no parallel ranking track.
|
|
11
|
-
*
|
|
12
|
-
* Disabling — three preconditions must ALL hold for the pass to run:
|
|
13
|
-
* 1. `akm.llm` must be configured (no provider = no extraction). When
|
|
14
|
-
* absent, `resolveIndexPassLLM("graph", config)` returns `undefined`
|
|
15
|
-
* and the pass short-circuits.
|
|
16
|
-
* 2. `llm.features.graph_extraction !== false` — the locked v1 spec §14
|
|
17
|
-
* feature-flag layer. Set to `false` to block the pass at the
|
|
18
|
-
* feature-gate layer (no network call may ever issue).
|
|
19
|
-
* 3. `index.graph.llm !== false` — the per-pass opt-out layer (#208).
|
|
20
|
-
* Set to `false` to skip just this pass while leaving other passes
|
|
21
|
-
* that share the same `llm` block enabled.
|
|
22
|
-
* Toggling any one off does NOT delete the existing persisted graph — the
|
|
23
|
-
* user keeps the boost component they already have, it just stops
|
|
24
|
-
* refreshing.
|
|
25
|
-
*
|
|
26
|
-
* Locked v1 contract:
|
|
27
|
-
* - LLM access is exclusively via `resolveIndexPassLLM("graph", config)`.
|
|
28
|
-
* - The graph rows are an indexer artifact, NOT a user-visible
|
|
29
|
-
* asset. It does not have an asset ref, does not appear in search
|
|
30
|
-
* hits, and is not addressable via `akm show`. Direct `fs.writeFile`
|
|
31
|
-
* is therefore the correct primitive — `writeAssetToSource` is
|
|
32
|
-
* reserved for asset writes (CLAUDE.md / spec §10 step 5).
|
|
33
|
-
*/
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
34
4
|
import fs from "node:fs";
|
|
35
5
|
import path from "node:path";
|
|
36
6
|
import { TYPE_DIRS } from "../core/asset-spec";
|
|
37
7
|
import { concurrentMap } from "../core/concurrent";
|
|
8
|
+
import { getIndexPassConfig, resolveBatchSize } from "../core/config";
|
|
38
9
|
import { parseFrontmatter } from "../core/frontmatter";
|
|
39
|
-
import { warn } from "../core/warn";
|
|
10
|
+
import { warn, warnVerbose } from "../core/warn";
|
|
11
|
+
import { isProcessEnabled } from "../llm/feature-gate";
|
|
40
12
|
import * as graphExtract from "../llm/graph-extract";
|
|
41
13
|
import { resolveIndexPassLLM } from "../llm/index-passes";
|
|
42
|
-
import { computeBodyHash, GRAPH_SCHEMA_VERSION, getLlmCacheEntry, upsertLlmCacheEntry } from "./db";
|
|
14
|
+
import { computeBodyHash, GRAPH_SCHEMA_VERSION, getLlmCacheEntriesByRefs, getLlmCacheEntry, upsertLlmCacheEntry, } from "./db";
|
|
43
15
|
import { loadStoredGraphSnapshot, replaceStoredGraph } from "./graph-db";
|
|
44
16
|
import { deduplicateGraph } from "./graph-dedup";
|
|
45
|
-
import { withLlmCache } from "./llm-cache";
|
|
46
17
|
import { walkMarkdownFiles } from "./walker";
|
|
47
18
|
/** Schema version for the persisted artifact — bumps trigger a full rebuild. */
|
|
48
19
|
export const GRAPH_FILE_SCHEMA_VERSION = GRAPH_SCHEMA_VERSION;
|
|
@@ -61,6 +32,13 @@ const EMPTY_RESULT = {
|
|
|
61
32
|
totalRelations: 0,
|
|
62
33
|
written: false,
|
|
63
34
|
quality: { ...EMPTY_QUALITY },
|
|
35
|
+
telemetry: {
|
|
36
|
+
cacheHits: 0,
|
|
37
|
+
cacheMisses: 0,
|
|
38
|
+
truncationCount: 0,
|
|
39
|
+
failureCount: 0,
|
|
40
|
+
},
|
|
41
|
+
warnings: [],
|
|
64
42
|
};
|
|
65
43
|
function roundMetric(value) {
|
|
66
44
|
return Number(value.toFixed(4));
|
|
@@ -90,13 +68,38 @@ const SUPPORTED_GRAPH_EXTRACTION_INCLUDE_TYPES = new Set([
|
|
|
90
68
|
"task",
|
|
91
69
|
"wiki",
|
|
92
70
|
]);
|
|
71
|
+
const GRAPH_CACHE_VARIANT_PREFIX = "graph-extraction";
|
|
93
72
|
function normalizeConfidence(raw) {
|
|
94
73
|
if (typeof raw !== "number" || !Number.isFinite(raw))
|
|
95
74
|
return undefined;
|
|
96
75
|
return Math.max(0, Math.min(1, raw));
|
|
97
76
|
}
|
|
77
|
+
function getGraphExtractorId(config) {
|
|
78
|
+
const fingerprint = computeBodyHash(JSON.stringify({
|
|
79
|
+
promptVersion: graphExtract.GRAPH_EXTRACT_PROMPT_VERSION,
|
|
80
|
+
model: config.model,
|
|
81
|
+
batchSize: config.batchSize,
|
|
82
|
+
includeTypes: config.includeTypes,
|
|
83
|
+
maxChunkBodyChars: 1600,
|
|
84
|
+
maxBatchBodyChars: 1600,
|
|
85
|
+
})).slice(0, 16);
|
|
86
|
+
return `${GRAPH_CACHE_VARIANT_PREFIX}:${graphExtract.GRAPH_EXTRACT_PROMPT_VERSION}:${config.model}:${fingerprint}`;
|
|
87
|
+
}
|
|
88
|
+
function buildLowQualityWarnings(quality, telemetry) {
|
|
89
|
+
const warnings = [];
|
|
90
|
+
if (quality.consideredFiles >= 5 && quality.extractionCoverage < 0.3) {
|
|
91
|
+
warnings.push(`Low graph extraction coverage (${quality.extractedFiles}/${quality.consideredFiles}, ${quality.extractionCoverage}).`);
|
|
92
|
+
}
|
|
93
|
+
if (quality.entityCount >= 8 && quality.relationCount === 0) {
|
|
94
|
+
warnings.push("Graph extraction produced many entities but no relations.");
|
|
95
|
+
}
|
|
96
|
+
if (telemetry.failureCount > 0) {
|
|
97
|
+
warnings.push(`Graph extraction encountered ${telemetry.failureCount} failed file extraction(s).`);
|
|
98
|
+
}
|
|
99
|
+
return warnings;
|
|
100
|
+
}
|
|
98
101
|
export function getGraphExtractionIncludeTypes(config) {
|
|
99
|
-
const configured = config.index
|
|
102
|
+
const configured = getIndexPassConfig(config.index, "graph")?.graphExtractionIncludeTypes;
|
|
100
103
|
if (!configured || configured.length === 0)
|
|
101
104
|
return [...DEFAULT_GRAPH_EXTRACTION_INCLUDE_TYPES];
|
|
102
105
|
const out = [];
|
|
@@ -139,6 +142,8 @@ function validateGraphCacheShape(raw) {
|
|
|
139
142
|
entities: obj.entities,
|
|
140
143
|
relations: Array.isArray(obj.relations) ? obj.relations : [],
|
|
141
144
|
confidence: normalizeConfidence(obj.confidence),
|
|
145
|
+
...(typeof obj.status === "string" ? { status: obj.status } : {}),
|
|
146
|
+
...(typeof obj.reason === "string" ? { reason: obj.reason } : {}),
|
|
142
147
|
};
|
|
143
148
|
}
|
|
144
149
|
function loadGraphFile(stashRoot, db) {
|
|
@@ -159,9 +164,15 @@ function loadGraphFile(stashRoot, db) {
|
|
|
159
164
|
entities: cacheShape.entities,
|
|
160
165
|
relations: cacheShape.relations,
|
|
161
166
|
confidence: normalizeConfidence(node.confidence),
|
|
167
|
+
...(node.status ? { status: node.status } : {}),
|
|
168
|
+
...(node.reason ? { reason: node.reason } : {}),
|
|
169
|
+
...(node.extractionRunId ? { extractionRunId: node.extractionRunId } : {}),
|
|
162
170
|
});
|
|
163
171
|
}
|
|
164
|
-
return {
|
|
172
|
+
return {
|
|
173
|
+
files: out,
|
|
174
|
+
...(graph.telemetry ? { telemetry: graph.telemetry } : {}),
|
|
175
|
+
};
|
|
165
176
|
}
|
|
166
177
|
function mergeGraphNodes(previousNodes, refreshedNodes, candidatePaths) {
|
|
167
178
|
if (!candidatePaths)
|
|
@@ -194,6 +205,8 @@ function reuseGraphNode(previousNodes, candidate, bodyHash) {
|
|
|
194
205
|
entities: validated.entities,
|
|
195
206
|
relations: validated.relations,
|
|
196
207
|
confidence: normalizeConfidence(node.confidence),
|
|
208
|
+
...(node.status ? { status: node.status } : {}),
|
|
209
|
+
...(node.reason ? { reason: node.reason } : {}),
|
|
197
210
|
};
|
|
198
211
|
}
|
|
199
212
|
/**
|
|
@@ -201,12 +214,12 @@ function reuseGraphNode(previousNodes, candidate, bodyHash) {
|
|
|
201
214
|
*
|
|
202
215
|
* Three preconditions — ALL must hold for the pass to run:
|
|
203
216
|
*
|
|
204
|
-
* 1. **Provider configured** —
|
|
217
|
+
* 1. **Provider configured** — an LLM profile must be selectable. Without a
|
|
205
218
|
* configured provider, `resolveIndexPassLLM("graph", config)` returns
|
|
206
219
|
* `undefined` (the pass cannot run because there is no model to call).
|
|
207
|
-
* 2. **Feature gate** — `
|
|
208
|
-
* `true`). When `false`, no network call may issue regardless
|
|
209
|
-
* per-pass settings.
|
|
220
|
+
* 2. **Feature gate** — `profiles.improve.default.processes.graphExtraction.enabled`
|
|
221
|
+
* (defaults to `true`). When `false`, no network call may issue regardless
|
|
222
|
+
* of per-pass settings.
|
|
210
223
|
* 3. **Per-pass gate** — `index.graph.llm` (defaults to `true`). When
|
|
211
224
|
* `false`, the indexer simply skips this pass for the current run.
|
|
212
225
|
*
|
|
@@ -219,25 +232,38 @@ function reuseGraphNode(previousNodes, candidate, bodyHash) {
|
|
|
219
232
|
* preserves existing behaviour, fully opt-in).
|
|
220
233
|
*/
|
|
221
234
|
export async function runGraphExtractionPass(config, sources, signal, db, reEnrich, onProgress, options = {}) {
|
|
222
|
-
// Gate 1 —
|
|
223
|
-
//
|
|
224
|
-
|
|
235
|
+
// Gate 1 — feature gate via isProcessEnabled, which reads the 0.8.0 path
|
|
236
|
+
// (profiles.improve.default.processes.graphExtraction.enabled). Defaults to
|
|
237
|
+
// enabled when the key is absent.
|
|
238
|
+
if (!isProcessEnabled("index", "graph_extraction", config))
|
|
225
239
|
return { ...EMPTY_RESULT };
|
|
226
240
|
// Gate 2 — per-pass opt-out (#208). Returns the resolved llm config or
|
|
227
241
|
// `undefined` when the pass should not run.
|
|
228
242
|
const llmConfig = resolveIndexPassLLM("graph", config);
|
|
229
|
-
if (!llmConfig)
|
|
243
|
+
if (!llmConfig) {
|
|
244
|
+
const reason = getIndexPassConfig(config.index, "graph")?.llm === false
|
|
245
|
+
? "index.graph.llm is false"
|
|
246
|
+
: "no default LLM profile is configured";
|
|
247
|
+
warnVerbose(`graph extraction: skipped because ${reason}.`);
|
|
230
248
|
return { ...EMPTY_RESULT };
|
|
249
|
+
}
|
|
231
250
|
// The pass only writes to the primary (working) stash. Read-only caches
|
|
232
251
|
// (git, npm, website) are deliberately untouched — the graph artifact for
|
|
233
252
|
// those sources would be clobbered by the next sync().
|
|
234
253
|
const primary = sources[0];
|
|
235
|
-
if (!primary)
|
|
254
|
+
if (!primary) {
|
|
255
|
+
warnVerbose("graph extraction: skipped because no primary stash source is available.");
|
|
236
256
|
return { ...EMPTY_RESULT };
|
|
237
|
-
|
|
257
|
+
}
|
|
258
|
+
const includeTypes = getGraphExtractionIncludeTypes(config);
|
|
259
|
+
const eligible = collectEligibleFiles(primary.path, includeTypes).filter((candidate) => !options.candidatePaths || options.candidatePaths.has(candidate.absPath));
|
|
238
260
|
const considered = eligible.length;
|
|
239
|
-
if (considered === 0)
|
|
261
|
+
if (considered === 0) {
|
|
262
|
+
const scoped = options.candidatePaths ? ` matching ${options.candidatePaths.size} candidate path(s)` : "";
|
|
263
|
+
warnVerbose(`graph extraction: skipped because no eligible files${scoped} were found under ${primary.path}. ` +
|
|
264
|
+
`includeTypes=${includeTypes.join(",")}`);
|
|
240
265
|
return { ...EMPTY_RESULT };
|
|
266
|
+
}
|
|
241
267
|
const previousGraph = loadGraphFile(primary.path, db);
|
|
242
268
|
const previousNodes = new Map(previousGraph.files.map((node) => [node.path, node]));
|
|
243
269
|
const nodes = [];
|
|
@@ -246,9 +272,58 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
246
272
|
let processed = 0;
|
|
247
273
|
let extracted = 0;
|
|
248
274
|
onProgress?.({ processed, total: considered, extracted, totalEntities, totalRelations });
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
275
|
+
const reportProgress = (currentPath, result) => {
|
|
276
|
+
processed += 1;
|
|
277
|
+
if (result) {
|
|
278
|
+
if (result.entities.length > 0)
|
|
279
|
+
extracted += 1;
|
|
280
|
+
totalEntities += result.entities.length;
|
|
281
|
+
totalRelations += result.relations.length;
|
|
282
|
+
}
|
|
283
|
+
onProgress?.({
|
|
284
|
+
processed,
|
|
285
|
+
total: considered,
|
|
286
|
+
extracted,
|
|
287
|
+
totalEntities,
|
|
288
|
+
totalRelations,
|
|
289
|
+
currentPath,
|
|
290
|
+
});
|
|
291
|
+
};
|
|
292
|
+
// Resolve the effective batch size. Falls back to
|
|
293
|
+
// DEFAULT_GRAPH_EXTRACTION_BATCH_SIZE (4) when unset, and clamps against
|
|
294
|
+
// `llm.contextLength` if the model's context window is configured.
|
|
295
|
+
const batchSize = resolveBatchSize(getIndexPassConfig(config.index, "graph")?.graphExtractionBatchSize, llmConfig.contextLength);
|
|
296
|
+
const extractionRunId = crypto.randomUUID();
|
|
297
|
+
const extractorId = getGraphExtractorId({ model: llmConfig.model, batchSize, includeTypes });
|
|
298
|
+
const cacheVariant = extractorId;
|
|
299
|
+
const telemetry = {
|
|
300
|
+
extractorId,
|
|
301
|
+
extractionRunId,
|
|
302
|
+
model: llmConfig.model,
|
|
303
|
+
promptVersion: graphExtract.GRAPH_EXTRACT_PROMPT_VERSION,
|
|
304
|
+
batchSize,
|
|
305
|
+
cacheHits: 0,
|
|
306
|
+
cacheMisses: 0,
|
|
307
|
+
truncationCount: 0,
|
|
308
|
+
failureCount: 0,
|
|
309
|
+
};
|
|
310
|
+
const canReusePreviousGraph = previousGraph.telemetry?.extractorId === extractorId;
|
|
311
|
+
const runtimeTelemetry = {
|
|
312
|
+
truncationCount: 0,
|
|
313
|
+
failureCount: 0,
|
|
314
|
+
filteredGenericEntities: 0,
|
|
315
|
+
filteredInvalidRelations: 0,
|
|
316
|
+
filteredLowConfidenceRelations: 0,
|
|
317
|
+
contextBatchRetries: 0,
|
|
318
|
+
nonArrayBatchFailures: 0,
|
|
319
|
+
};
|
|
320
|
+
const batchState = {
|
|
321
|
+
batchingDisabled: false,
|
|
322
|
+
nonArrayBatchFailures: 0,
|
|
323
|
+
};
|
|
324
|
+
warnVerbose(`graph extraction: starting for ${considered} eligible file(s) under ${primary.path}; ` +
|
|
325
|
+
`includeTypes=${includeTypes.join(",")}, batchSize=${batchSize}, concurrency=${llmConfig.concurrency ?? 1}, ` +
|
|
326
|
+
`reEnrich=${reEnrich === true}, candidateScoped=${options.candidatePaths ? "true" : "false"}.`);
|
|
252
327
|
const onFallback = (evt) => {
|
|
253
328
|
warn(`[akm] LLM fallback for ${evt.feature}: ${evt.reason}`);
|
|
254
329
|
};
|
|
@@ -256,49 +331,67 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
256
331
|
if (batchSize <= 1) {
|
|
257
332
|
// ── Original per-asset path (with incremental cache) ─────────────────
|
|
258
333
|
extractionResults = await concurrentMap(eligible, async (candidate) => {
|
|
259
|
-
if (signal?.aborted)
|
|
334
|
+
if (signal?.aborted) {
|
|
335
|
+
reportProgress(candidate.absPath, undefined);
|
|
260
336
|
return undefined;
|
|
337
|
+
}
|
|
261
338
|
const bodyHash = computeBodyHash(candidate.body);
|
|
262
339
|
let cached;
|
|
263
340
|
if (db) {
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
341
|
+
if (!(reEnrich ?? false)) {
|
|
342
|
+
const cacheEntry = getLlmCacheEntry(db, candidate.absPath, bodyHash, cacheVariant);
|
|
343
|
+
if (cacheEntry) {
|
|
344
|
+
try {
|
|
345
|
+
cached = validateGraphCacheShape(JSON.parse(cacheEntry.resultJson));
|
|
346
|
+
if (cached)
|
|
347
|
+
telemetry.cacheHits += 1;
|
|
348
|
+
}
|
|
349
|
+
catch {
|
|
350
|
+
cached = undefined;
|
|
351
|
+
}
|
|
271
352
|
}
|
|
272
|
-
|
|
273
|
-
// Cache empty results too so we skip on next run.
|
|
274
|
-
return {
|
|
275
|
-
entities: extraction.entities,
|
|
276
|
-
relations: extraction.relations,
|
|
277
|
-
...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
|
|
278
|
-
};
|
|
279
|
-
}, validateGraphCacheShape);
|
|
353
|
+
}
|
|
280
354
|
}
|
|
281
355
|
else if (!(reEnrich ?? false)) {
|
|
356
|
+
// No DB — best-effort reuse from the previous in-memory graph.
|
|
282
357
|
cached = reuseGraphNode(previousNodes, candidate, bodyHash);
|
|
283
358
|
}
|
|
359
|
+
if (!cached && !(reEnrich ?? false) && canReusePreviousGraph) {
|
|
360
|
+
const reused = reuseGraphNode(previousNodes, candidate, bodyHash);
|
|
361
|
+
if (reused) {
|
|
362
|
+
cached = reused;
|
|
363
|
+
if (db) {
|
|
364
|
+
upsertLlmCacheEntry(db, candidate.absPath, bodyHash, JSON.stringify(reused), cacheVariant);
|
|
365
|
+
}
|
|
366
|
+
telemetry.cacheHits += 1;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
284
369
|
if (!cached) {
|
|
285
|
-
|
|
370
|
+
telemetry.cacheMisses += 1;
|
|
371
|
+
const extraction = await graphExtract.extractGraphFromBody(llmConfig, candidate.body, signal, config, onFallback, { batchState, telemetry: runtimeTelemetry });
|
|
286
372
|
cached = {
|
|
287
373
|
entities: extraction.entities,
|
|
288
374
|
relations: extraction.relations,
|
|
289
375
|
...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
|
|
376
|
+
...(extraction.status ? { status: extraction.status } : {}),
|
|
377
|
+
...(extraction.reason ? { reason: extraction.reason } : {}),
|
|
290
378
|
};
|
|
379
|
+
if (db) {
|
|
380
|
+
upsertLlmCacheEntry(db, candidate.absPath, bodyHash, JSON.stringify(cached), cacheVariant);
|
|
381
|
+
}
|
|
291
382
|
}
|
|
292
|
-
|
|
293
|
-
return undefined;
|
|
294
|
-
return {
|
|
383
|
+
const result = {
|
|
295
384
|
absPath: candidate.absPath,
|
|
296
385
|
type: candidate.type,
|
|
297
386
|
bodyHash,
|
|
298
387
|
entities: cached.entities,
|
|
299
388
|
relations: cached.relations,
|
|
300
389
|
...(cached.confidence !== undefined ? { confidence: cached.confidence } : {}),
|
|
390
|
+
...(cached.status ? { status: cached.status } : {}),
|
|
391
|
+
...(cached.reason ? { reason: cached.reason } : {}),
|
|
301
392
|
};
|
|
393
|
+
reportProgress(candidate.absPath, result);
|
|
394
|
+
return result;
|
|
302
395
|
},
|
|
303
396
|
// Default concurrency of 4 for cloud APIs. Set `llm.concurrency: 1`
|
|
304
397
|
// in config.json for local model servers (LM Studio, Ollama).
|
|
@@ -317,30 +410,44 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
317
410
|
if (signal?.aborted)
|
|
318
411
|
return;
|
|
319
412
|
const chunk = eligible.slice(start, start + batchSize);
|
|
413
|
+
const reportChunkProgress = () => {
|
|
414
|
+
for (let j = 0; j < chunk.length; j++) {
|
|
415
|
+
const candidate = chunk[j];
|
|
416
|
+
if (!candidate)
|
|
417
|
+
continue;
|
|
418
|
+
reportProgress(candidate.absPath, rawResults[start + j]);
|
|
419
|
+
}
|
|
420
|
+
};
|
|
320
421
|
// Pre-resolve cache hits for this chunk; track which positions need LLM.
|
|
321
422
|
const bodyHashes = chunk.map((c) => computeBodyHash(c.body));
|
|
423
|
+
// Batch the cache lookup: one IN(...) query for the whole chunk instead
|
|
424
|
+
// of N individual SELECTs. The map covers every ref in this chunk that
|
|
425
|
+
// has any cached row; the per-position hash check happens below.
|
|
426
|
+
const chunkCache = db && !reEnrich
|
|
427
|
+
? getLlmCacheEntriesByRefs(db, chunk.map((c) => c.absPath), cacheVariant)
|
|
428
|
+
: new Map();
|
|
322
429
|
const needsLlm = chunk.map((c, j) => {
|
|
323
430
|
if (!db || reEnrich)
|
|
324
431
|
return true;
|
|
325
|
-
const cached =
|
|
326
|
-
|
|
432
|
+
const cached = chunkCache.get(c.absPath);
|
|
433
|
+
// Hash mismatch → body changed, treat as cache miss.
|
|
434
|
+
if (!cached || cached.bodyHash !== (bodyHashes[j] ?? ""))
|
|
327
435
|
return true;
|
|
328
436
|
try {
|
|
329
437
|
const parsed = validateGraphCacheShape(JSON.parse(cached.resultJson));
|
|
330
438
|
if (!parsed)
|
|
331
439
|
return true;
|
|
332
|
-
|
|
333
|
-
rawResults[start + j] =
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
: undefined;
|
|
440
|
+
telemetry.cacheHits += 1;
|
|
441
|
+
rawResults[start + j] = {
|
|
442
|
+
absPath: c.absPath,
|
|
443
|
+
type: c.type,
|
|
444
|
+
bodyHash: bodyHashes[j] ?? "",
|
|
445
|
+
entities: parsed.entities,
|
|
446
|
+
relations: parsed.relations,
|
|
447
|
+
...(parsed.confidence !== undefined ? { confidence: parsed.confidence } : {}),
|
|
448
|
+
...(parsed.status ? { status: parsed.status } : {}),
|
|
449
|
+
...(parsed.reason ? { reason: parsed.reason } : {}),
|
|
450
|
+
};
|
|
344
451
|
return false;
|
|
345
452
|
}
|
|
346
453
|
catch {
|
|
@@ -349,7 +456,7 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
349
456
|
});
|
|
350
457
|
// Secondary incremental path: reuse previous graph nodes when the body hash
|
|
351
458
|
// still matches and DB cache is missing/stale/unavailable.
|
|
352
|
-
if (!(reEnrich ?? false)) {
|
|
459
|
+
if (!(reEnrich ?? false) && canReusePreviousGraph) {
|
|
353
460
|
for (let j = 0; j < chunk.length; j++) {
|
|
354
461
|
if (!needsLlm[j])
|
|
355
462
|
continue;
|
|
@@ -359,30 +466,33 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
359
466
|
const reused = reuseGraphNode(previousNodes, candidate, bodyHashes[j] ?? "");
|
|
360
467
|
if (!reused)
|
|
361
468
|
continue;
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
469
|
+
telemetry.cacheHits += 1;
|
|
470
|
+
rawResults[start + j] = {
|
|
471
|
+
absPath: candidate.absPath,
|
|
472
|
+
type: candidate.type,
|
|
473
|
+
bodyHash: bodyHashes[j] ?? "",
|
|
474
|
+
entities: reused.entities,
|
|
475
|
+
relations: reused.relations,
|
|
476
|
+
...(reused.confidence !== undefined ? { confidence: reused.confidence } : {}),
|
|
477
|
+
...(reused.status ? { status: reused.status } : {}),
|
|
478
|
+
...(reused.reason ? { reason: reused.reason } : {}),
|
|
479
|
+
};
|
|
373
480
|
if (db) {
|
|
374
|
-
upsertLlmCacheEntry(db, candidate.absPath, bodyHashes[j] ?? "", JSON.stringify(reused));
|
|
481
|
+
upsertLlmCacheEntry(db, candidate.absPath, bodyHashes[j] ?? "", JSON.stringify(reused), cacheVariant);
|
|
375
482
|
}
|
|
376
483
|
needsLlm[j] = false;
|
|
377
484
|
}
|
|
378
485
|
}
|
|
379
486
|
const uncachedChunk = chunk.filter((_, j) => needsLlm[j]);
|
|
380
|
-
if (uncachedChunk.length === 0)
|
|
487
|
+
if (uncachedChunk.length === 0) {
|
|
488
|
+
reportChunkProgress();
|
|
381
489
|
return;
|
|
490
|
+
}
|
|
382
491
|
const bodies = uncachedChunk.map((c) => c.body);
|
|
492
|
+
telemetry.cacheMisses += uncachedChunk.length;
|
|
383
493
|
// extractGraphFromBodies always returns an array of the same length
|
|
384
494
|
// as bodies (it falls back per-asset for any missing indices).
|
|
385
|
-
const batchExtractions = await graphExtract.extractGraphFromBodies(llmConfig, bodies, signal, config, onFallback);
|
|
495
|
+
const batchExtractions = await graphExtract.extractGraphFromBodies(llmConfig, bodies, signal, config, onFallback, { batchState, telemetry: runtimeTelemetry });
|
|
386
496
|
// Map LLM results back to original positions and write cache entries.
|
|
387
497
|
let llmIdx = 0;
|
|
388
498
|
for (let j = 0; j < chunk.length; j++) {
|
|
@@ -397,22 +507,22 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
397
507
|
entities: extraction.entities,
|
|
398
508
|
relations: extraction.relations,
|
|
399
509
|
...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
rawResults[start + j] = undefined;
|
|
404
|
-
}
|
|
405
|
-
else {
|
|
406
|
-
rawResults[start + j] = {
|
|
407
|
-
absPath: candidate.absPath,
|
|
408
|
-
type: candidate.type,
|
|
409
|
-
bodyHash: bodyHashes[j] ?? "",
|
|
410
|
-
entities: extraction.entities,
|
|
411
|
-
relations: extraction.relations,
|
|
412
|
-
...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
|
|
413
|
-
};
|
|
510
|
+
...(extraction.status ? { status: extraction.status } : {}),
|
|
511
|
+
...(extraction.reason ? { reason: extraction.reason } : {}),
|
|
512
|
+
}), cacheVariant);
|
|
414
513
|
}
|
|
514
|
+
rawResults[start + j] = {
|
|
515
|
+
absPath: candidate.absPath,
|
|
516
|
+
type: candidate.type,
|
|
517
|
+
bodyHash: bodyHashes[j] ?? "",
|
|
518
|
+
entities: extraction.entities,
|
|
519
|
+
relations: extraction.relations,
|
|
520
|
+
...(extraction.confidence !== undefined ? { confidence: extraction.confidence } : {}),
|
|
521
|
+
...(extraction.status ? { status: extraction.status } : {}),
|
|
522
|
+
...(extraction.reason ? { reason: extraction.reason } : {}),
|
|
523
|
+
};
|
|
415
524
|
}
|
|
525
|
+
reportChunkProgress();
|
|
416
526
|
}, llmConfig.concurrency ?? 1);
|
|
417
527
|
extractionResults = rawResults;
|
|
418
528
|
}
|
|
@@ -435,45 +545,22 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
435
545
|
...(normalizeConfidence(result.confidence) !== undefined
|
|
436
546
|
? { confidence: normalizeConfidence(result.confidence) }
|
|
437
547
|
: {}),
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
extracted = 0;
|
|
442
|
-
totalEntities = 0;
|
|
443
|
-
totalRelations = 0;
|
|
444
|
-
for (let i = 0; i < extractionResults.length; i++) {
|
|
445
|
-
const result = extractionResults[i];
|
|
446
|
-
processed += 1;
|
|
447
|
-
if (result) {
|
|
448
|
-
extracted += 1;
|
|
449
|
-
totalEntities += result.entities.length;
|
|
450
|
-
totalRelations += result.relations.length;
|
|
451
|
-
}
|
|
452
|
-
onProgress?.({
|
|
453
|
-
processed,
|
|
454
|
-
total: considered,
|
|
455
|
-
extracted,
|
|
456
|
-
totalEntities,
|
|
457
|
-
totalRelations,
|
|
458
|
-
currentPath: eligible[i]?.absPath,
|
|
548
|
+
status: result.status ?? (result.entities.length > 0 ? "extracted" : "empty"),
|
|
549
|
+
reason: result.reason ?? (result.entities.length > 0 ? "none" : "no_graph_content"),
|
|
550
|
+
extractionRunId,
|
|
459
551
|
});
|
|
460
552
|
}
|
|
461
553
|
const mergedNodes = mergeGraphNodes(previousGraph.files, nodes, options.candidatePaths);
|
|
462
554
|
const assetRefs = mergedNodes.map((node) => node.path);
|
|
463
555
|
const deduped = deduplicateGraph(mergedNodes.map((node) => ({ entities: node.entities, relations: node.relations })), assetRefs);
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
quality: computeGraphQualityTelemetry(considered, 0, 0, 0),
|
|
473
|
-
};
|
|
474
|
-
}
|
|
475
|
-
const qualityConsidered = options.candidatePaths ? mergedNodes.length : considered;
|
|
476
|
-
const quality = computeGraphQualityTelemetry(qualityConsidered, mergedNodes.length, deduped.entities.length, deduped.relations.length);
|
|
556
|
+
telemetry.truncationCount = runtimeTelemetry.truncationCount ?? 0;
|
|
557
|
+
telemetry.failureCount = runtimeTelemetry.failureCount ?? 0;
|
|
558
|
+
const qualityConsidered = mergedNodes.length;
|
|
559
|
+
const qualityExtracted = mergedNodes.filter((node) => node.status === "extracted" && node.entities.length > 0).length;
|
|
560
|
+
const quality = computeGraphQualityTelemetry(qualityConsidered, qualityExtracted, deduped.entities.length, deduped.relations.length);
|
|
561
|
+
const warnings = buildLowQualityWarnings(quality, telemetry);
|
|
562
|
+
for (const warning of warnings)
|
|
563
|
+
warnVerbose(`graph extraction quality: ${warning}`);
|
|
477
564
|
const graph = {
|
|
478
565
|
schemaVersion: GRAPH_FILE_SCHEMA_VERSION,
|
|
479
566
|
generatedAt: new Date().toISOString(),
|
|
@@ -482,8 +569,12 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
482
569
|
entities: deduped.entities,
|
|
483
570
|
relations: deduped.relations,
|
|
484
571
|
quality,
|
|
572
|
+
telemetry,
|
|
485
573
|
};
|
|
486
574
|
const written = writeGraphFile(primary.path, graph, db);
|
|
575
|
+
warnVerbose(`graph extraction: ${written ? "persisted" : "did not persist"} graph for ${primary.path}; ` +
|
|
576
|
+
`considered=${considered}, extractedThisRun=${extracted}, storedFiles=${mergedNodes.length}, ` +
|
|
577
|
+
`entities=${deduped.entities.length}, relations=${deduped.relations.length}, coverage=${quality.extractionCoverage}.`);
|
|
487
578
|
return {
|
|
488
579
|
considered,
|
|
489
580
|
extracted,
|
|
@@ -491,6 +582,8 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
491
582
|
totalRelations,
|
|
492
583
|
written,
|
|
493
584
|
quality,
|
|
585
|
+
telemetry,
|
|
586
|
+
warnings,
|
|
494
587
|
};
|
|
495
588
|
}
|
|
496
589
|
/**
|
|
@@ -1,10 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
* Extracted from `src/indexer/indexer.ts` so each named phase function
|
|
5
|
-
* (`runSourceCachePhase`, `runMemoryInferencePhase`, …) can receive a single
|
|
6
|
-
* typed argument rather than a long positional parameter list. The context is
|
|
7
|
-
* assembled once at the top of `akmIndex()` and passed to each phase in
|
|
8
|
-
* sequence.
|
|
9
|
-
*/
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
10
4
|
export {};
|