akm-cli 0.6.0 → 0.7.0-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/dist/{cli.js → src/cli.js} +672 -29
- package/dist/{commands → src/commands}/config-cli.js +5 -4
- package/dist/src/commands/distill.js +283 -0
- package/dist/src/commands/events.js +108 -0
- package/dist/src/commands/history.js +120 -0
- package/dist/{commands → src/commands}/installed-stashes.js +28 -2
- package/dist/src/commands/proposal.js +119 -0
- package/dist/src/commands/propose.js +171 -0
- package/dist/src/commands/reflect.js +193 -0
- package/dist/{commands → src/commands}/registry-search.js +2 -1
- package/dist/{commands → src/commands}/remember.js +12 -0
- package/dist/{commands → src/commands}/search.js +74 -1
- package/dist/{commands → src/commands}/self-update.js +4 -3
- package/dist/{commands → src/commands}/show.js +67 -2
- package/dist/{core → src/core}/asset-ref.js +5 -5
- package/dist/{core → src/core}/asset-spec.js +12 -0
- package/dist/{core → src/core}/common.js +1 -1
- package/dist/{core → src/core}/config.js +175 -121
- package/dist/{core → src/core}/errors.js +4 -0
- package/dist/src/core/events.js +239 -0
- package/dist/src/core/lesson-lint.js +86 -0
- package/dist/src/core/proposals.js +406 -0
- package/dist/src/core/warn.js +72 -0
- package/dist/{core → src/core}/write-source.js +80 -5
- package/dist/{indexer → src/indexer}/db-search.js +119 -27
- package/dist/{indexer → src/indexer}/db.js +76 -23
- package/dist/{indexer → src/indexer}/file-context.js +0 -3
- package/dist/src/indexer/graph-boost.js +179 -0
- package/dist/src/indexer/graph-extraction.js +212 -0
- package/dist/{indexer → src/indexer}/indexer.js +73 -6
- package/dist/src/indexer/memory-inference.js +263 -0
- package/dist/{indexer → src/indexer}/metadata.js +114 -11
- package/dist/src/integrations/agent/config.js +292 -0
- package/dist/src/integrations/agent/detect.js +94 -0
- package/dist/src/integrations/agent/index.js +17 -0
- package/dist/src/integrations/agent/profiles.js +65 -0
- package/dist/src/integrations/agent/prompts.js +167 -0
- package/dist/src/integrations/agent/spawn.js +221 -0
- package/dist/{integrations → src/integrations}/lockfile.js +0 -26
- package/dist/{llm → src/llm}/client.js +33 -2
- package/dist/src/llm/feature-gate.js +108 -0
- package/dist/src/llm/graph-extract.js +107 -0
- package/dist/src/llm/index-passes.js +35 -0
- package/dist/src/llm/memory-infer.js +86 -0
- package/dist/{output → src/output}/renderers.js +60 -1
- package/dist/src/output/shapes.js +516 -0
- package/dist/{output → src/output}/text.js +447 -4
- package/dist/{registry → src/registry}/build-index.js +14 -4
- package/dist/{registry → src/registry}/factory.js +0 -8
- package/dist/{registry → src/registry}/providers/static-index.js +3 -2
- package/dist/{registry → src/registry}/resolve.js +68 -2
- package/dist/{setup → src/setup}/setup.js +43 -5
- package/dist/{sources → src/sources}/providers/git.js +7 -15
- package/dist/{wiki → src/wiki}/wiki.js +9 -11
- package/dist/tests/add-website-source.test.js +119 -0
- package/dist/tests/agent/agent-config-loader.test.js +70 -0
- package/dist/tests/agent/agent-config.test.js +221 -0
- package/dist/tests/agent/agent-detect.test.js +100 -0
- package/dist/tests/agent/agent-spawn.test.js +234 -0
- package/dist/tests/agent-output.test.js +186 -0
- package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +103 -0
- package/dist/tests/architecture/agent-spawn-seam.test.js +193 -0
- package/dist/tests/architecture/llm-stateless-seam.test.js +112 -0
- package/dist/tests/asset-ref.test.js +192 -0
- package/dist/tests/asset-registry.test.js +103 -0
- package/dist/tests/asset-spec.test.js +241 -0
- package/dist/tests/bench/attribution.test.js +995 -0
- package/dist/tests/bench/cleanup-sigint.test.js +83 -0
- package/dist/tests/bench/cleanup.js +203 -0
- package/dist/tests/bench/cleanup.test.js +166 -0
- package/dist/tests/bench/cli.js +683 -0
- package/dist/tests/bench/cli.test.js +177 -0
- package/dist/tests/bench/compare.test.js +556 -0
- package/dist/tests/bench/corpus.js +314 -0
- package/dist/tests/bench/corpus.test.js +258 -0
- package/dist/tests/bench/driver.js +346 -0
- package/dist/tests/bench/driver.test.js +443 -0
- package/dist/tests/bench/evolve-metrics.js +179 -0
- package/dist/tests/bench/evolve-metrics.test.js +187 -0
- package/dist/tests/bench/evolve.js +580 -0
- package/dist/tests/bench/evolve.test.js +616 -0
- package/dist/tests/bench/failure-modes.test.js +300 -0
- package/dist/tests/bench/feedback-integrity.test.js +456 -0
- package/dist/tests/bench/leakage.test.js +125 -0
- package/dist/tests/bench/learning-curve.test.js +133 -0
- package/dist/tests/bench/metrics.js +2319 -0
- package/dist/tests/bench/metrics.test.js +1144 -0
- package/dist/tests/bench/no-os-tmpdir-invariant.test.js +43 -0
- package/dist/tests/bench/report.js +1821 -0
- package/dist/tests/bench/report.test.js +989 -0
- package/dist/tests/bench/runner.js +536 -0
- package/dist/tests/bench/runner.test.js +958 -0
- package/dist/tests/bench/search-bridge.test.js +331 -0
- package/dist/tests/bench/tmp.js +41 -0
- package/dist/tests/bench/trajectory.js +116 -0
- package/dist/tests/bench/trajectory.test.js +127 -0
- package/dist/tests/bench/verifier.js +109 -0
- package/dist/tests/bench/verifier.test.js +118 -0
- package/dist/tests/bench/workflow-evaluator.js +557 -0
- package/dist/tests/bench/workflow-evaluator.test.js +421 -0
- package/dist/tests/bench/workflow-spec.js +358 -0
- package/dist/tests/bench/workflow-spec.test.js +363 -0
- package/dist/tests/bench/workflow-trace.js +438 -0
- package/dist/tests/bench/workflow-trace.test.js +254 -0
- package/dist/tests/benchmark-search-quality.js +536 -0
- package/dist/tests/benchmark-suite.js +1441 -0
- package/dist/tests/capture-cli.test.js +112 -0
- package/dist/tests/cli-errors.test.js +203 -0
- package/dist/tests/commands/events.test.js +370 -0
- package/dist/tests/commands/history.test.js +223 -0
- package/dist/tests/commands/import.test.js +103 -0
- package/dist/tests/commands/proposal-cli.test.js +209 -0
- package/dist/tests/commands/reflect-propose-cli.test.js +333 -0
- package/dist/tests/commands/remember.test.js +97 -0
- package/dist/tests/commands/scope-flags.test.js +300 -0
- package/dist/tests/commands/search.test.js +537 -0
- package/dist/tests/commands/show-indexer-parity.test.js +117 -0
- package/dist/tests/commands/show.test.js +294 -0
- package/dist/tests/common.test.js +266 -0
- package/dist/tests/completions.test.js +142 -0
- package/dist/tests/config-cli.test.js +193 -0
- package/dist/tests/config-llm-features.test.js +139 -0
- package/dist/tests/config.test.js +544 -0
- package/dist/tests/contracts/migration-baseline.test.js +43 -0
- package/dist/tests/contracts/reflect-propose-envelope.test.js +139 -0
- package/dist/tests/contracts/spec-helpers.js +46 -0
- package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +228 -0
- package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +56 -0
- package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +34 -0
- package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +94 -0
- package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +39 -0
- package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +44 -0
- package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +47 -0
- package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +40 -0
- package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +58 -0
- package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +34 -0
- package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +75 -0
- package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +36 -0
- package/dist/tests/core/write-source.test.js +366 -0
- package/dist/tests/curate-command.test.js +87 -0
- package/dist/tests/db-scoring.test.js +201 -0
- package/dist/tests/db.test.js +654 -0
- package/dist/tests/distill-cli-flag.test.js +208 -0
- package/dist/tests/distill.test.js +515 -0
- package/dist/tests/docker-install.test.js +120 -0
- package/dist/tests/e2e.test.js +1398 -0
- package/dist/tests/embedder.test.js +340 -0
- package/dist/tests/embedding-model-config.test.js +379 -0
- package/dist/tests/feedback-command.test.js +172 -0
- package/dist/tests/file-context.test.js +552 -0
- package/dist/tests/fixtures/scripts/git/summarize-diff.js +9 -0
- package/dist/tests/fixtures/scripts/lint/eslint-check.js +7 -0
- package/dist/tests/fixtures/stashes/load.js +166 -0
- package/dist/tests/fixtures/stashes/load.test.js +88 -0
- package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +12 -0
- package/dist/tests/frontmatter.test.js +190 -0
- package/dist/tests/fts-field-weighting.test.js +254 -0
- package/dist/tests/fuzzy-search.test.js +230 -0
- package/dist/tests/git-provider-clone.test.js +45 -0
- package/dist/tests/github.test.js +161 -0
- package/dist/tests/graph-boost-ranking.test.js +305 -0
- package/dist/tests/graph-extraction.test.js +282 -0
- package/dist/tests/helpers/usage-events.js +8 -0
- package/dist/tests/index-pass-llm.test.js +161 -0
- package/dist/tests/indexer.test.js +559 -0
- package/dist/tests/info-command.test.js +166 -0
- package/dist/tests/init.test.js +69 -0
- package/dist/tests/install-script.test.js +246 -0
- package/dist/tests/integration/agent-real-profile.test.js +94 -0
- package/dist/tests/issue-36-repro.test.js +304 -0
- package/dist/tests/issues-191-194.test.js +160 -0
- package/dist/tests/lesson-lint.test.js +111 -0
- package/dist/tests/llm-client.test.js +115 -0
- package/dist/tests/llm-feature-gate.test.js +151 -0
- package/dist/tests/llm.test.js +139 -0
- package/dist/tests/lockfile.test.js +216 -0
- package/dist/tests/manifest.test.js +205 -0
- package/dist/tests/markdown.test.js +126 -0
- package/dist/tests/matchers-unit.test.js +189 -0
- package/dist/tests/memory-inference.test.js +299 -0
- package/dist/tests/merge-scoring.test.js +136 -0
- package/dist/tests/metadata.test.js +313 -0
- package/dist/tests/migration-help.test.js +89 -0
- package/dist/tests/origin-resolve.test.js +124 -0
- package/dist/tests/output-baseline.test.js +217 -0
- package/dist/tests/output-shapes-unit.test.js +476 -0
- package/dist/tests/parallel-search.test.js +272 -0
- package/dist/tests/parameter-metadata.test.js +365 -0
- package/dist/tests/paths.test.js +177 -0
- package/dist/tests/progressive-disclosure.test.js +280 -0
- package/dist/tests/proposals.test.js +279 -0
- package/dist/tests/proposed-quality.test.js +271 -0
- package/dist/tests/provider-registry.test.js +32 -0
- package/dist/tests/ranking-regression.test.js +548 -0
- package/dist/tests/reflect-propose.test.js +455 -0
- package/dist/tests/registry-build-index.test.js +378 -0
- package/dist/tests/registry-cli.test.js +290 -0
- package/dist/tests/registry-index-v2.test.js +430 -0
- package/dist/tests/registry-install.test.js +728 -0
- package/dist/tests/registry-providers/parity.test.js +189 -0
- package/dist/tests/registry-providers/skills-sh.test.js +309 -0
- package/dist/tests/registry-providers/static-index.test.js +204 -0
- package/dist/tests/registry-resolve.test.js +126 -0
- package/dist/tests/registry-search.test.js +723 -0
- package/dist/tests/remember-frontmatter.test.js +380 -0
- package/dist/tests/remember-unit.test.js +123 -0
- package/dist/tests/ripgrep-install.test.js +251 -0
- package/dist/tests/ripgrep-resolve.test.js +108 -0
- package/dist/tests/ripgrep.test.js +163 -0
- package/dist/tests/save-command.test.js +94 -0
- package/dist/tests/save-trust-qa-fixes.test.js +270 -0
- package/dist/tests/scoring-pipeline.test.js +648 -0
- package/dist/tests/search-include-proposed-cli.test.js +118 -0
- package/dist/tests/self-update.test.js +442 -0
- package/dist/tests/semantic-search-e2e.test.js +512 -0
- package/dist/tests/semantic-status.test.js +471 -0
- package/dist/tests/setup-run.integration.js +877 -0
- package/dist/tests/setup-wizard.test.js +198 -0
- package/dist/tests/setup.test.js +131 -0
- package/dist/tests/source-add.test.js +11 -0
- package/dist/tests/source-clone.test.js +254 -0
- package/dist/tests/source-manage.test.js +366 -0
- package/dist/tests/source-providers/filesystem.test.js +82 -0
- package/dist/tests/source-providers/git.test.js +252 -0
- package/dist/tests/source-providers/website.test.js +128 -0
- package/dist/tests/source-qa-fixes.test.js +268 -0
- package/dist/tests/source-registry.test.js +350 -0
- package/dist/tests/source-resolve.test.js +100 -0
- package/dist/tests/source-source.test.js +221 -0
- package/dist/tests/source.test.js +533 -0
- package/dist/tests/tar-utils-scan.test.js +73 -0
- package/dist/tests/toggle-components.test.js +73 -0
- package/dist/tests/usage-telemetry.test.js +265 -0
- package/dist/tests/utility-scoring.test.js +558 -0
- package/dist/tests/vault-load-error.test.js +78 -0
- package/dist/tests/vault-qa-fixes.test.js +194 -0
- package/dist/tests/vault.test.js +429 -0
- package/dist/tests/vector-search.test.js +608 -0
- package/dist/tests/walker.test.js +252 -0
- package/dist/tests/wave2-cluster-bc.test.js +228 -0
- package/dist/tests/wave2-cluster-d.test.js +180 -0
- package/dist/tests/wave2-cluster-e.test.js +179 -0
- package/dist/tests/wiki-qa-fixes.test.js +270 -0
- package/dist/tests/wiki.test.js +529 -0
- package/dist/tests/workflow-cli.test.js +271 -0
- package/dist/tests/workflow-markdown.test.js +171 -0
- package/dist/tests/workflow-path-escape.test.js +132 -0
- package/dist/tests/workflow-qa-fixes.test.js +377 -0
- package/dist/tests/workflows/indexer-rejection.test.js +213 -0
- package/docs/README.md +8 -0
- package/docs/migration/release-notes/0.7.0.md +244 -0
- package/package.json +2 -2
- package/dist/core/warn.js +0 -27
- package/dist/output/shapes.js +0 -212
- /package/dist/{commands → src/commands}/completions.js +0 -0
- /package/dist/{commands → src/commands}/curate.js +0 -0
- /package/dist/{commands → src/commands}/info.js +0 -0
- /package/dist/{commands → src/commands}/init.js +0 -0
- /package/dist/{commands → src/commands}/install-audit.js +0 -0
- /package/dist/{commands → src/commands}/migration-help.js +0 -0
- /package/dist/{commands → src/commands}/source-add.js +0 -0
- /package/dist/{commands → src/commands}/source-clone.js +0 -0
- /package/dist/{commands → src/commands}/source-manage.js +0 -0
- /package/dist/{commands → src/commands}/vault.js +0 -0
- /package/dist/{core → src/core}/asset-registry.js +0 -0
- /package/dist/{core → src/core}/frontmatter.js +0 -0
- /package/dist/{core → src/core}/markdown.js +0 -0
- /package/dist/{core → src/core}/paths.js +0 -0
- /package/dist/{indexer → src/indexer}/manifest.js +0 -0
- /package/dist/{indexer → src/indexer}/matchers.js +0 -0
- /package/dist/{indexer → src/indexer}/search-fields.js +0 -0
- /package/dist/{indexer → src/indexer}/search-source.js +0 -0
- /package/dist/{indexer → src/indexer}/semantic-status.js +0 -0
- /package/dist/{indexer → src/indexer}/usage-events.js +0 -0
- /package/dist/{indexer → src/indexer}/walker.js +0 -0
- /package/dist/{integrations → src/integrations}/github.js +0 -0
- /package/dist/{llm → src/llm}/embedder.js +0 -0
- /package/dist/{llm → src/llm}/embedders/cache.js +0 -0
- /package/dist/{llm → src/llm}/embedders/local.js +0 -0
- /package/dist/{llm → src/llm}/embedders/remote.js +0 -0
- /package/dist/{llm → src/llm}/embedders/types.js +0 -0
- /package/dist/{llm → src/llm}/metadata-enhance.js +0 -0
- /package/dist/{output → src/output}/cli-hints.js +0 -0
- /package/dist/{output → src/output}/context.js +0 -0
- /package/dist/{registry → src/registry}/create-provider-registry.js +0 -0
- /package/dist/{registry → src/registry}/origin-resolve.js +0 -0
- /package/dist/{registry → src/registry}/providers/index.js +0 -0
- /package/dist/{registry → src/registry}/providers/skills-sh.js +0 -0
- /package/dist/{registry → src/registry}/providers/types.js +0 -0
- /package/dist/{registry → src/registry}/types.js +0 -0
- /package/dist/{setup → src/setup}/detect.js +0 -0
- /package/dist/{setup → src/setup}/ripgrep-install.js +0 -0
- /package/dist/{setup → src/setup}/ripgrep-resolve.js +0 -0
- /package/dist/{setup → src/setup}/steps.js +0 -0
- /package/dist/{sources → src/sources}/include.js +0 -0
- /package/dist/{sources → src/sources}/provider-factory.js +0 -0
- /package/dist/{sources → src/sources}/provider.js +0 -0
- /package/dist/{sources → src/sources}/providers/filesystem.js +0 -0
- /package/dist/{sources → src/sources}/providers/index.js +0 -0
- /package/dist/{sources → src/sources}/providers/install-types.js +0 -0
- /package/dist/{sources → src/sources}/providers/npm.js +0 -0
- /package/dist/{sources → src/sources}/providers/provider-utils.js +0 -0
- /package/dist/{sources → src/sources}/providers/sync-from-ref.js +0 -0
- /package/dist/{sources → src/sources}/providers/tar-utils.js +0 -0
- /package/dist/{sources → src/sources}/providers/website.js +0 -0
- /package/dist/{sources → src/sources}/resolve.js +0 -0
- /package/dist/{sources → src/sources}/types.js +0 -0
- /package/dist/{templates → src/templates}/wiki-templates.js +0 -0
- /package/dist/{version.js → src/version.js} +0 -0
- /package/dist/{workflows → src/workflows}/authoring.js +0 -0
- /package/dist/{workflows → src/workflows}/cli.js +0 -0
- /package/dist/{workflows → src/workflows}/db.js +0 -0
- /package/dist/{workflows → src/workflows}/document-cache.js +0 -0
- /package/dist/{workflows → src/workflows}/parser.js +0 -0
- /package/dist/{workflows → src/workflows}/renderer.js +0 -0
- /package/dist/{workflows → src/workflows}/runs.js +0 -0
- /package/dist/{workflows → src/workflows}/schema.js +0 -0
- /package/dist/{workflows → src/workflows}/validator.js +0 -0
|
@@ -17,9 +17,10 @@ import { defaultRendererRegistry } from "../core/asset-registry";
|
|
|
17
17
|
import { deriveCanonicalAssetNameFromStashRoot } from "../core/asset-spec";
|
|
18
18
|
import { getDbPath } from "../core/paths";
|
|
19
19
|
import { warn } from "../core/warn";
|
|
20
|
-
import { closeDatabase, getAllEntries, getEntryById, getEntryCount, getMeta, getUtilityScoresByIds, openDatabase, searchFts, searchVec, } from "./db";
|
|
20
|
+
import { closeDatabase, getAllEntries, getEntryById, getEntryCount, getMeta, getUtilityScoresByIds, openDatabase, sanitizeFtsQuery, searchFts, searchVec, } from "./db";
|
|
21
21
|
import { getRenderer } from "./file-context";
|
|
22
|
-
import {
|
|
22
|
+
import { computeGraphBoost, loadGraphBoostContext } from "./graph-boost";
|
|
23
|
+
import { generateMetadataFlat, isProposedQuality, loadStashFile, shouldIndexStashFile, } from "./metadata";
|
|
23
24
|
import { buildSearchText } from "./search-fields";
|
|
24
25
|
import { buildEditHint, findSourceForPath, isEditable } from "./search-source";
|
|
25
26
|
import { deriveSemanticProviderFingerprint, getEffectiveSemanticStatus, isSemanticRuntimeReady, readSemanticStatus, } from "./semantic-status";
|
|
@@ -44,6 +45,8 @@ function resolveSearchHitOrigin(source) {
|
|
|
44
45
|
// ── Main search entrypoint ───────────────────────────────────────────────────
|
|
45
46
|
export async function searchLocal(input) {
|
|
46
47
|
const { query, searchType, limit, stashDir, sources, config } = input;
|
|
48
|
+
const filters = input.filters;
|
|
49
|
+
const includeProposed = input.includeProposed === true;
|
|
47
50
|
const rendererRegistry = input.rendererRegistry ?? defaultRendererRegistry;
|
|
48
51
|
const allSourceDirs = sources.map((s) => s.path);
|
|
49
52
|
const rawStatus = readSemanticStatus();
|
|
@@ -85,7 +88,7 @@ export async function searchLocal(input) {
|
|
|
85
88
|
}
|
|
86
89
|
}
|
|
87
90
|
if (entryCount > 0 && stashDirMatch) {
|
|
88
|
-
const { hits, embedMs, rankMs } = await searchDatabase(db, query, searchType, limit, stashDir, allSourceDirs, config, sources, rendererRegistry);
|
|
91
|
+
const { hits, embedMs, rankMs } = await searchDatabase(db, query, searchType, limit, stashDir, allSourceDirs, config, sources, rendererRegistry, filters, includeProposed);
|
|
89
92
|
return {
|
|
90
93
|
hits,
|
|
91
94
|
tip: hits.length === 0
|
|
@@ -105,7 +108,7 @@ export async function searchLocal(input) {
|
|
|
105
108
|
catch (error) {
|
|
106
109
|
warn("Search index unavailable, falling back to substring search:", error instanceof Error ? error.message : String(error));
|
|
107
110
|
}
|
|
108
|
-
const hitArrays = await Promise.all(allSourceDirs.map((dir) => substringSearch(query, searchType, limit, dir, sources, config, rendererRegistry)));
|
|
111
|
+
const hitArrays = await Promise.all(allSourceDirs.map((dir) => substringSearch(query, searchType, limit, dir, sources, config, rendererRegistry, filters, includeProposed)));
|
|
109
112
|
const hits = hitArrays.flat().slice(0, limit);
|
|
110
113
|
return {
|
|
111
114
|
hits,
|
|
@@ -114,9 +117,12 @@ export async function searchLocal(input) {
|
|
|
114
117
|
};
|
|
115
118
|
}
|
|
116
119
|
// ── Database search ─────────────────────────────────────────────────────────
|
|
117
|
-
async function searchDatabase(db, query, searchType, limit, stashDir, allSourceDirs, config, sources, rendererRegistry = defaultRendererRegistry) {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
+
async function searchDatabase(db, query, searchType, limit, stashDir, allSourceDirs, config, sources, rendererRegistry = defaultRendererRegistry, filters, includeProposed = false) {
|
|
121
|
+
const hasSearchableTokens = query.length > 0 && sanitizeFtsQuery(query).length > 0;
|
|
122
|
+
// Empty queries — including ones that sanitize down to no searchable FTS
|
|
123
|
+
// tokens such as "." — should enumerate matching entries instead of
|
|
124
|
+
// returning an empty result set from FTS.
|
|
125
|
+
if (!hasSearchableTokens) {
|
|
120
126
|
const typeFilter = searchType === "any" ? undefined : searchType;
|
|
121
127
|
const allEntries = getAllEntries(db, typeFilter);
|
|
122
128
|
// Deduplicate by file path — multiple entries can share the same file
|
|
@@ -127,7 +133,18 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
|
|
|
127
133
|
seenFilePaths.add(ie.filePath);
|
|
128
134
|
return true;
|
|
129
135
|
});
|
|
130
|
-
|
|
136
|
+
// Scope filter: drop entries whose stored scope does not satisfy every
|
|
137
|
+
// supplied scope key. Filtering happens BEFORE the limit slice so a
|
|
138
|
+
// restrictive filter still returns up to `limit` results.
|
|
139
|
+
const scopeFiltered = filters
|
|
140
|
+
? uniqueEntries.filter((ie) => entryMatchesScope(ie.entry.scope, filters))
|
|
141
|
+
: uniqueEntries;
|
|
142
|
+
// Proposed-quality filter (v1 spec §4.2): exclude entries with
|
|
143
|
+
// `quality: "proposed"` unless the caller explicitly opts in.
|
|
144
|
+
const qualityFiltered = includeProposed
|
|
145
|
+
? scopeFiltered
|
|
146
|
+
: scopeFiltered.filter((ie) => !isProposedQuality(ie.entry.quality));
|
|
147
|
+
const selected = qualityFiltered.slice(0, limit);
|
|
131
148
|
const hits = await Promise.all(selected.map((ie) => buildDbHit({
|
|
132
149
|
entry: ie.entry,
|
|
133
150
|
path: ie.filePath,
|
|
@@ -228,6 +245,23 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
|
|
|
228
245
|
// reference docs. Curated metadata is more reliable than auto-generated.
|
|
229
246
|
const queryTokens = query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
230
247
|
const queryLower = query.toLowerCase().trim();
|
|
248
|
+
// Graph boost context (#207). Built once per query and reused across
|
|
249
|
+
// every scored entry so the disk read + JSON parse only happens once
|
|
250
|
+
// per search invocation. `null` when no graph file is present, when
|
|
251
|
+
// the schema doesn't match, or when no query token matches a graph
|
|
252
|
+
// entity — in all of those cases the per-entry call is skipped and
|
|
253
|
+
// graph contributes nothing. The graph signal feeds this single
|
|
254
|
+
// FTS5+boosts loop as ONE additive component (CLAUDE.md / spec §6:
|
|
255
|
+
// one scoring pipeline, no parallel SearchHit scorer).
|
|
256
|
+
const graphContext = (() => {
|
|
257
|
+
// Search across all source dirs; the graph file lives next to the
|
|
258
|
+
// primary source root. Cache misses are silent — the helper handles
|
|
259
|
+
// missing files internally and returns `null` instead of throwing.
|
|
260
|
+
const primaryDir = allSourceDirs[0];
|
|
261
|
+
if (!primaryDir)
|
|
262
|
+
return null;
|
|
263
|
+
return loadGraphBoostContext(primaryDir, query);
|
|
264
|
+
})();
|
|
231
265
|
for (const item of scored) {
|
|
232
266
|
const entry = item.entry;
|
|
233
267
|
let boostSum = 0;
|
|
@@ -321,10 +355,26 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
|
|
|
321
355
|
}
|
|
322
356
|
}
|
|
323
357
|
// ── 7. Metadata quality signals ──
|
|
324
|
-
|
|
358
|
+
// Curated metadata is the only boost-bearing quality marker. `generated`
|
|
359
|
+
// and `proposed` (and unknown values) get no boost. `proposed` is also
|
|
360
|
+
// filtered out by default downstream (v1 spec §4.2).
|
|
361
|
+
const qualityBoost = entry.quality === "curated" ? 0.05 : 0;
|
|
325
362
|
boostSum += qualityBoost;
|
|
326
363
|
const confidenceBoost = typeof entry.confidence === "number" ? Math.min(0.05, Math.max(0, entry.confidence) * 0.05) : 0;
|
|
327
364
|
boostSum += confidenceBoost;
|
|
365
|
+
// ── 8. Graph signal (opt-in, #207) ──
|
|
366
|
+
// When the graph-extraction pass has produced a `graph.json`,
|
|
367
|
+
// contribute an additive boost based on how many of this entry's
|
|
368
|
+
// extracted entities match the query (or are one hop away from a
|
|
369
|
+
// match). Computed inside the same loop so all boosts are in one
|
|
370
|
+
// place and the per-call cost is one map lookup when the graph is
|
|
371
|
+
// absent. There is no parallel scoring track — `boostSum` is the
|
|
372
|
+
// single accumulator and the existing `MAX_BOOST_SUM` cap below
|
|
373
|
+
// applies to graph contributions exactly as it does to every other
|
|
374
|
+
// boost.
|
|
375
|
+
if (graphContext) {
|
|
376
|
+
boostSum += computeGraphBoost(graphContext, item.filePath);
|
|
377
|
+
}
|
|
328
378
|
const cappedBoost = Math.min(boostSum, MAX_BOOST_SUM);
|
|
329
379
|
item.score = item.score * (1 + cappedBoost);
|
|
330
380
|
}
|
|
@@ -368,22 +418,39 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
|
|
|
368
418
|
// a filename field all collapse to files[0]). Showing the same path/ref
|
|
369
419
|
// multiple times clutters results.
|
|
370
420
|
const deduped = deduplicateByPath(preFilter);
|
|
421
|
+
// Scope filter: drop hits whose stored scope does not satisfy every supplied
|
|
422
|
+
// key. Applied AFTER ranking — filtering narrows the result set without
|
|
423
|
+
// touching the single FTS5+boosts scoring pipeline.
|
|
424
|
+
const scopeFiltered = filters ? deduped.filter((item) => entryMatchesScope(item.entry.scope, filters)) : deduped;
|
|
425
|
+
// Proposed-quality filter (v1 spec §4.2): exclude entries with
|
|
426
|
+
// `quality: "proposed"` unless the caller passed `--include-proposed`.
|
|
427
|
+
// Applied AFTER ranking for the same reason as scope filtering.
|
|
428
|
+
const qualityFiltered = includeProposed
|
|
429
|
+
? scopeFiltered
|
|
430
|
+
: scopeFiltered.filter((item) => !isProposedQuality(item.entry.quality));
|
|
371
431
|
const rankMs = Date.now() - tRank0;
|
|
372
|
-
const selected =
|
|
373
|
-
const hits = await Promise.all(selected.map(({ entry, filePath, score, rankingMode, utilityBoosted }) =>
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
//
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
432
|
+
const selected = qualityFiltered.slice(0, limit);
|
|
433
|
+
const hits = await Promise.all(selected.map(({ entry, filePath, score, rankingMode, utilityBoosted }) => {
|
|
434
|
+
// CLAUDE.md locks SearchHit.score in [0,1]. The boost loop above can
|
|
435
|
+
// exceed 1.0 (this was a pre-existing breach that #207's graph boost
|
|
436
|
+
// — up to ~1.05 additive contribution — made detectable); clamp here
|
|
437
|
+
// so the score handed to buildDbHit always satisfies the spec.
|
|
438
|
+
const finalScore = Math.min(1, Math.max(0, score));
|
|
439
|
+
return buildDbHit({
|
|
440
|
+
entry,
|
|
441
|
+
path: filePath,
|
|
442
|
+
// Round to 4 decimal places
|
|
443
|
+
score: Math.round(finalScore * 10000) / 10000,
|
|
444
|
+
query,
|
|
445
|
+
rankingMode,
|
|
446
|
+
defaultStashDir: stashDir,
|
|
447
|
+
allSourceDirs,
|
|
448
|
+
sources,
|
|
449
|
+
config,
|
|
450
|
+
utilityBoosted,
|
|
451
|
+
rendererRegistry,
|
|
452
|
+
});
|
|
453
|
+
}));
|
|
387
454
|
return { embedMs, rankMs, hits };
|
|
388
455
|
}
|
|
389
456
|
// ── Vector scorer ───────────────────────────────────────────────────────────
|
|
@@ -413,9 +480,13 @@ async function tryVecScores(db, query, k, config) {
|
|
|
413
480
|
}
|
|
414
481
|
}
|
|
415
482
|
// ── Substring fallback (no index) ───────────────────────────────────────────
|
|
416
|
-
async function substringSearch(query, searchType, limit, stashDir, sources, config, rendererRegistry = defaultRendererRegistry) {
|
|
483
|
+
async function substringSearch(query, searchType, limit, stashDir, sources, config, rendererRegistry = defaultRendererRegistry, filters, includeProposed = false) {
|
|
417
484
|
const assets = await indexAssets(stashDir, searchType, sources);
|
|
418
|
-
const
|
|
485
|
+
const scopeMatched = filters ? assets.filter((asset) => entryMatchesScope(asset.entry.scope, filters)) : assets;
|
|
486
|
+
const qualityMatched = includeProposed
|
|
487
|
+
? scopeMatched
|
|
488
|
+
: scopeMatched.filter((asset) => !isProposedQuality(asset.entry.quality));
|
|
489
|
+
const matched = qualityMatched.filter((asset) => !query || buildSearchText(asset.entry).includes(query));
|
|
419
490
|
if (!query) {
|
|
420
491
|
const sorted = matched.sort(compareAssets);
|
|
421
492
|
const unique = deduplicateAssetsByPath(sorted);
|
|
@@ -468,7 +539,9 @@ export async function buildDbHit(input) {
|
|
|
468
539
|
// phase (searchDatabase). buildDbHit receives the already-final score and
|
|
469
540
|
// passes it through without further multiplication. We still compute the
|
|
470
541
|
// boost values here for buildWhyMatched reporting.
|
|
471
|
-
|
|
542
|
+
// Mirrors the boost computation in `searchDatabase`; only `curated`
|
|
543
|
+
// contributes a positive boost. Used for `whyMatched` reporting only.
|
|
544
|
+
const qualityBoost = input.entry.quality === "curated" ? 0.05 : 0;
|
|
472
545
|
const confidenceBoost = typeof input.entry.confidence === "number" ? Math.min(0.05, Math.max(0, input.entry.confidence) * 0.05) : 0;
|
|
473
546
|
// Round to 4 decimal places, no boost multiplication
|
|
474
547
|
const score = Math.round(input.score * 10000) / 10000;
|
|
@@ -492,6 +565,9 @@ export async function buildDbHit(input) {
|
|
|
492
565
|
score,
|
|
493
566
|
whyMatched,
|
|
494
567
|
...(estimatedTokens !== undefined ? { estimatedTokens } : {}),
|
|
568
|
+
// Surface optional quality (v1 spec §4.2). Omitted when entry has
|
|
569
|
+
// no `quality` field so payloads stay compact for the common case.
|
|
570
|
+
...(input.entry.quality ? { quality: input.entry.quality } : {}),
|
|
495
571
|
};
|
|
496
572
|
const renderer = await rendererForType(input.entry.type, rendererRegistry);
|
|
497
573
|
if (renderer?.enrichSearchHit) {
|
|
@@ -570,6 +646,7 @@ async function assetToSearchHit(asset, stashDir, sources, config, score, rendere
|
|
|
570
646
|
action: buildLocalAction(asset.entry.type, ref, rendererRegistry),
|
|
571
647
|
...(score !== undefined ? { score } : {}),
|
|
572
648
|
...(estimatedTokens !== undefined ? { estimatedTokens } : {}),
|
|
649
|
+
...(asset.entry.quality ? { quality: asset.entry.quality } : {}),
|
|
573
650
|
};
|
|
574
651
|
const renderer = await rendererForType(asset.entry.type, rendererRegistry);
|
|
575
652
|
if (renderer?.enrichSearchHit) {
|
|
@@ -714,6 +791,21 @@ function deduplicateAssetsByPath(assets) {
|
|
|
714
791
|
return true;
|
|
715
792
|
});
|
|
716
793
|
}
|
|
794
|
+
/**
|
|
795
|
+
* Exact-match scope filter check. Legacy entries without a `scope` object only
|
|
796
|
+
* match when no filter is supplied — which is what the caller guards on
|
|
797
|
+
* before invoking this helper.
|
|
798
|
+
*/
|
|
799
|
+
function entryMatchesScope(scope, filters) {
|
|
800
|
+
for (const key of ["user", "agent", "run", "channel"]) {
|
|
801
|
+
const expected = filters[key];
|
|
802
|
+
if (expected === undefined)
|
|
803
|
+
continue;
|
|
804
|
+
if (!scope || scope[key] !== expected)
|
|
805
|
+
return false;
|
|
806
|
+
}
|
|
807
|
+
return true;
|
|
808
|
+
}
|
|
717
809
|
function realpathOrResolve(targetPath) {
|
|
718
810
|
try {
|
|
719
811
|
return fs.realpathSync(targetPath);
|
|
@@ -240,7 +240,14 @@ function ensureSchema(db, embeddingDim) {
|
|
|
240
240
|
*/
|
|
241
241
|
function handleVersionUpgrade(db) {
|
|
242
242
|
const storedVersion = getMeta(db, "version");
|
|
243
|
-
|
|
243
|
+
// BUG-L4: distinguish "missing" (undefined) from "present but empty" — both
|
|
244
|
+
// were previously coerced through `!storedVersion` and treated as "no
|
|
245
|
+
// upgrade needed", which caused fresh databases (with no version row) to
|
|
246
|
+
// skip the upgrade path correctly, but also caused the upgrade path to be
|
|
247
|
+
// taken when a corrupted/empty version string was persisted. The current
|
|
248
|
+
// tables get dropped only when the stored version exists AND differs from
|
|
249
|
+
// DB_VERSION; missing or empty version means a fresh DB and no upgrade.
|
|
250
|
+
if (storedVersion === undefined || storedVersion === "" || storedVersion === String(DB_VERSION))
|
|
244
251
|
return [];
|
|
245
252
|
let usageBackup = [];
|
|
246
253
|
try {
|
|
@@ -258,7 +265,7 @@ function handleVersionUpgrade(db) {
|
|
|
258
265
|
db.exec("DROP INDEX IF EXISTS idx_entries_type");
|
|
259
266
|
db.exec("DROP TABLE IF EXISTS entries");
|
|
260
267
|
db.exec("DELETE FROM index_meta");
|
|
261
|
-
|
|
268
|
+
warn("[akm] Index rebuilt due to version upgrade. Run 'akm index' to repopulate.");
|
|
262
269
|
return usageBackup;
|
|
263
270
|
}
|
|
264
271
|
/**
|
|
@@ -272,22 +279,49 @@ function restoreUsageEventsBackup(db, backup) {
|
|
|
272
279
|
if (backup.length === 0)
|
|
273
280
|
return;
|
|
274
281
|
try {
|
|
282
|
+
// BUG-H4: introspect the *target* table's columns rather than relying on
|
|
283
|
+
// `row[0]`'s keys. The backup may carry columns the new schema dropped,
|
|
284
|
+
// and the new schema may have NOT-NULL columns without DEFAULT that the
|
|
285
|
+
// old backup never carried. Project the backup onto the intersection so
|
|
286
|
+
// we don't silently lose every row to per-row INSERT errors, and warn
|
|
287
|
+
// once if any backup column was dropped from the new schema.
|
|
288
|
+
const targetCols = db.prepare("PRAGMA table_info(usage_events)").all().map((c) => c.name);
|
|
289
|
+
if (targetCols.length === 0) {
|
|
290
|
+
warn("[db] restoreUsageEventsBackup: usage_events table missing — discarding %d backup row(s)", backup.length);
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
const targetSet = new Set(targetCols);
|
|
294
|
+
const backupCols = Object.keys(backup[0] ?? {});
|
|
295
|
+
const projectedCols = backupCols.filter((c) => targetSet.has(c));
|
|
296
|
+
const droppedCols = backupCols.filter((c) => !targetSet.has(c));
|
|
297
|
+
if (projectedCols.length === 0) {
|
|
298
|
+
warn("[db] restoreUsageEventsBackup: no overlapping columns between backup and current schema — discarding %d row(s); dropped: %s", backup.length, droppedCols.join(", ") || "(none)");
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
if (droppedCols.length > 0) {
|
|
302
|
+
warn("[db] restoreUsageEventsBackup: dropping columns no longer in usage_events schema: %s", droppedCols.join(", "));
|
|
303
|
+
}
|
|
304
|
+
let restored = 0;
|
|
305
|
+
let failed = 0;
|
|
275
306
|
db.transaction(() => {
|
|
276
|
-
const
|
|
277
|
-
const
|
|
278
|
-
const insert = db.prepare(`INSERT INTO usage_events (${cols.join(", ")}) VALUES (${placeholders})`);
|
|
307
|
+
const placeholders = projectedCols.map(() => "?").join(", ");
|
|
308
|
+
const insert = db.prepare(`INSERT INTO usage_events (${projectedCols.join(", ")}) VALUES (${placeholders})`);
|
|
279
309
|
for (const row of backup) {
|
|
280
310
|
try {
|
|
281
|
-
insert.run(...
|
|
311
|
+
insert.run(...projectedCols.map((c) => row[c]));
|
|
312
|
+
restored++;
|
|
282
313
|
}
|
|
283
314
|
catch {
|
|
284
|
-
|
|
315
|
+
failed++;
|
|
285
316
|
}
|
|
286
317
|
}
|
|
287
318
|
})();
|
|
319
|
+
if (failed > 0) {
|
|
320
|
+
warn("[db] restoreUsageEventsBackup: restored %d row(s); skipped %d incompatible row(s)", restored, failed);
|
|
321
|
+
}
|
|
288
322
|
}
|
|
289
|
-
catch {
|
|
290
|
-
|
|
323
|
+
catch (err) {
|
|
324
|
+
warn("[db] restoreUsageEventsBackup: discarded %d backup row(s) — %s", backup.length, err instanceof Error ? err.message : String(err));
|
|
291
325
|
}
|
|
292
326
|
}
|
|
293
327
|
// ── Meta helpers ────────────────────────────────────────────────────────────
|
|
@@ -488,17 +522,15 @@ export function rebuildFts(db, options) {
|
|
|
488
522
|
if (skipped > 0) {
|
|
489
523
|
warn(`[db] rebuildFts: skipped ${skipped} entr${skipped === 1 ? "y" : "ies"} with invalid entry_json`);
|
|
490
524
|
}
|
|
491
|
-
// Always drain the dirty queue —
|
|
492
|
-
//
|
|
493
|
-
//
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
else
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
db.exec("DELETE FROM entries_fts_dirty");
|
|
501
|
-
}
|
|
525
|
+
// Always drain the dirty queue — both paths converge here. The
|
|
526
|
+
// incremental path drains it because we just consumed every dirty row;
|
|
527
|
+
// the full path drains it because a full rebuild covers everything the
|
|
528
|
+
// dirty list tracks. The table is guaranteed to exist (created by
|
|
529
|
+
// ensureSchema()).
|
|
530
|
+
//
|
|
531
|
+
// BUG-L1: previously the if/else arms ran identical statements — the
|
|
532
|
+
// duplication has been collapsed.
|
|
533
|
+
db.exec("DELETE FROM entries_fts_dirty");
|
|
502
534
|
})();
|
|
503
535
|
}
|
|
504
536
|
// ── Vector operations ───────────────────────────────────────────────────────
|
|
@@ -539,8 +571,26 @@ function float32Buffer(vec) {
|
|
|
539
571
|
const f32 = new Float32Array(vec);
|
|
540
572
|
return Buffer.from(f32.buffer);
|
|
541
573
|
}
|
|
542
|
-
|
|
543
|
-
|
|
574
|
+
/**
|
|
575
|
+
* Decode a stored embedding BLOB into a Float32 array of `expectedDim`
|
|
576
|
+
* dimensions. Returns `null` (and emits a warning) when the byte length does
|
|
577
|
+
* not exactly match `expectedDim * 4`, including the legacy partial-trailing
|
|
578
|
+
* float case the previous truncating-divide silently swallowed.
|
|
579
|
+
*
|
|
580
|
+
* BUG-M2: the previous `buf.byteLength / 4` divide would truncate any
|
|
581
|
+
* trailing partial float and a misaligned `byteOffset` would throw — both
|
|
582
|
+
* surfaced as opaque generic errors caught upstream.
|
|
583
|
+
*/
|
|
584
|
+
function bufferToFloat32(buf, expectedDim) {
|
|
585
|
+
if (buf.byteLength !== expectedDim * 4) {
|
|
586
|
+
warn("[db] bufferToFloat32: skipping embedding row — expected %d bytes (%d dim x 4), got %d", expectedDim * 4, expectedDim, buf.byteLength);
|
|
587
|
+
return null;
|
|
588
|
+
}
|
|
589
|
+
// Copy into a fresh ArrayBuffer to sidestep any byteOffset alignment
|
|
590
|
+
// requirements imposed by Float32Array's typed-array view contract.
|
|
591
|
+
const aligned = new ArrayBuffer(buf.byteLength);
|
|
592
|
+
new Uint8Array(aligned).set(buf);
|
|
593
|
+
const f32 = new Float32Array(aligned);
|
|
544
594
|
return Array.from(f32);
|
|
545
595
|
}
|
|
546
596
|
function searchBlobVec(db, queryEmbedding, k) {
|
|
@@ -548,9 +598,12 @@ function searchBlobVec(db, queryEmbedding, k) {
|
|
|
548
598
|
const rows = db.prepare("SELECT id, embedding FROM embeddings").all();
|
|
549
599
|
if (rows.length === 0)
|
|
550
600
|
return [];
|
|
601
|
+
const expectedDim = queryEmbedding.length;
|
|
551
602
|
const scored = [];
|
|
552
603
|
for (const row of rows) {
|
|
553
|
-
const embedding = bufferToFloat32(row.embedding);
|
|
604
|
+
const embedding = bufferToFloat32(row.embedding, expectedDim);
|
|
605
|
+
if (embedding === null)
|
|
606
|
+
continue;
|
|
554
607
|
const similarity = cosineSimilarity(queryEmbedding, embedding);
|
|
555
608
|
scored.push({ id: row.id, similarity });
|
|
556
609
|
}
|
|
@@ -69,9 +69,6 @@ const matchers = [];
|
|
|
69
69
|
/** Renderer lookup by name. */
|
|
70
70
|
const renderers = new Map();
|
|
71
71
|
let builtinsPromise;
|
|
72
|
-
export function resetBuiltinsCache() {
|
|
73
|
-
builtinsPromise = undefined;
|
|
74
|
-
}
|
|
75
72
|
/**
|
|
76
73
|
* Ensure that built-in matchers and renderers are registered.
|
|
77
74
|
* Called lazily on first use of runMatchers/getRenderer.
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search-time graph-boost integration for the `akm index` graph pass (#207).
|
|
3
|
+
*
|
|
4
|
+
* This module is the consumer half of the graph-extraction pass. It loads
|
|
5
|
+
* the persisted `graph.json` (when present) and exposes a single helper,
|
|
6
|
+
* {@link computeGraphBoost}, that the existing FTS5+boosts loop in
|
|
7
|
+
* `src/indexer/db-search.ts` calls per-entry to obtain an additive boost
|
|
8
|
+
* value.
|
|
9
|
+
*
|
|
10
|
+
* CLAUDE.md / v1 spec compliance:
|
|
11
|
+
* - The graph signal feeds the **single** FTS5+boosts pipeline as one
|
|
12
|
+
* additive boost component. There is no parallel scoring track.
|
|
13
|
+
* - There is no second `SearchHit` scorer. `searchDatabase` continues to
|
|
14
|
+
* own ranking; this module just answers "what additive boost does the
|
|
15
|
+
* graph contribute for this (query, entry) pair?".
|
|
16
|
+
* - Missing/stale/unparseable `graph.json` → boost is `0`. The pipeline
|
|
17
|
+
* degrades gracefully to its non-graph behaviour, exactly as today.
|
|
18
|
+
*/
|
|
19
|
+
import fs from "node:fs";
|
|
20
|
+
import { warn } from "../core/warn";
|
|
21
|
+
import { GRAPH_FILE_SCHEMA_VERSION, getGraphFilePath } from "./graph-extraction";
|
|
22
|
+
/**
|
|
23
|
+
* Per-entry weights, exposed as constants so tests can read them and so the
|
|
24
|
+
* single-source-of-truth for "how much does the graph contribute" is here
|
|
25
|
+
* rather than inlined into `db-search.ts`. Kept conservative — the goal is
|
|
26
|
+
* a useful tiebreaker, not domination of the lexical signal.
|
|
27
|
+
*/
|
|
28
|
+
export const GRAPH_DIRECT_BOOST_PER_ENTITY = 0.25;
|
|
29
|
+
export const GRAPH_DIRECT_BOOST_CAP = 0.75;
|
|
30
|
+
export const GRAPH_HOP_BOOST_PER_ENTITY = 0.1;
|
|
31
|
+
export const GRAPH_HOP_BOOST_CAP = 0.3;
|
|
32
|
+
/**
|
|
33
|
+
* Load the graph file for a stash root and pre-compute everything that's
|
|
34
|
+
* shared across all entries scored for one query. Returns `null` when:
|
|
35
|
+
* - `graph.json` does not exist.
|
|
36
|
+
* - The file fails to parse.
|
|
37
|
+
* - The schema version doesn't match (treated like "missing" so an old
|
|
38
|
+
* index keeps working until the next `akm index --full`).
|
|
39
|
+
* - The query produces no token-level entity matches (no boost is
|
|
40
|
+
* possible, so we skip the per-entry overhead entirely).
|
|
41
|
+
*/
|
|
42
|
+
export function loadGraphBoostContext(stashRoot, query) {
|
|
43
|
+
const graph = readGraphFile(stashRoot);
|
|
44
|
+
if (!graph)
|
|
45
|
+
return null;
|
|
46
|
+
const queryTokens = query
|
|
47
|
+
.toLowerCase()
|
|
48
|
+
.split(/[\s\-_/]+/)
|
|
49
|
+
.filter((t) => t.length >= 2);
|
|
50
|
+
if (queryTokens.length === 0)
|
|
51
|
+
return null;
|
|
52
|
+
// Build a flat union of all extracted entities across the corpus. This
|
|
53
|
+
// is small (capped per-asset at extract time) and lets the per-entry
|
|
54
|
+
// path do a single set membership test.
|
|
55
|
+
const allEntities = new Set();
|
|
56
|
+
const nodesByPath = new Map();
|
|
57
|
+
for (const node of graph.files) {
|
|
58
|
+
nodesByPath.set(node.path, node);
|
|
59
|
+
for (const entity of node.entities)
|
|
60
|
+
allEntities.add(entity);
|
|
61
|
+
}
|
|
62
|
+
// An entity matches the query when any of its sub-tokens equals or
|
|
63
|
+
// contains a query token. Cheap and forgiving — exact substring match is
|
|
64
|
+
// sufficient because both sides are already lower-cased at extract time.
|
|
65
|
+
const matchedEntities = new Set();
|
|
66
|
+
for (const entity of allEntities) {
|
|
67
|
+
const entityTokens = entity.split(/[\s\-_/]+/).filter(Boolean);
|
|
68
|
+
for (const qt of queryTokens) {
|
|
69
|
+
if (entity === qt || entity.includes(qt) || entityTokens.some((et) => et === qt)) {
|
|
70
|
+
matchedEntities.add(entity);
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (matchedEntities.size === 0)
|
|
76
|
+
return null;
|
|
77
|
+
// One-hop neighbours: any entity that appears on the other end of a
|
|
78
|
+
// relation whose other endpoint is in matchedEntities.
|
|
79
|
+
const oneHopEntities = new Set();
|
|
80
|
+
for (const node of graph.files) {
|
|
81
|
+
for (const rel of node.relations) {
|
|
82
|
+
if (matchedEntities.has(rel.from) && !matchedEntities.has(rel.to)) {
|
|
83
|
+
oneHopEntities.add(rel.to);
|
|
84
|
+
}
|
|
85
|
+
else if (matchedEntities.has(rel.to) && !matchedEntities.has(rel.from)) {
|
|
86
|
+
oneHopEntities.add(rel.from);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return { nodesByPath, matchedEntities, oneHopEntities };
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Compute the graph-boost contribution for a single scored entry.
|
|
94
|
+
*
|
|
95
|
+
* The return value is added directly into `boostSum` in `searchDatabase`'s
|
|
96
|
+
* existing scoring loop — same units, same cap policy. Returns `0` when
|
|
97
|
+
* the entry's file isn't in the graph or when no entity overlap exists.
|
|
98
|
+
*/
|
|
99
|
+
export function computeGraphBoost(context, filePath) {
|
|
100
|
+
const node = context.nodesByPath.get(filePath);
|
|
101
|
+
if (!node)
|
|
102
|
+
return 0;
|
|
103
|
+
let directHits = 0;
|
|
104
|
+
let hopHits = 0;
|
|
105
|
+
for (const entity of node.entities) {
|
|
106
|
+
if (context.matchedEntities.has(entity))
|
|
107
|
+
directHits += 1;
|
|
108
|
+
else if (context.oneHopEntities.has(entity))
|
|
109
|
+
hopHits += 1;
|
|
110
|
+
}
|
|
111
|
+
const directBoost = Math.min(GRAPH_DIRECT_BOOST_CAP, directHits * GRAPH_DIRECT_BOOST_PER_ENTITY);
|
|
112
|
+
const hopBoost = Math.min(GRAPH_HOP_BOOST_CAP, hopHits * GRAPH_HOP_BOOST_PER_ENTITY);
|
|
113
|
+
return directBoost + hopBoost;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Lightweight reader — extracted so the boost loader and tests share one
|
|
117
|
+
* code path. Tolerant of missing files (returns null) but logs a warning
|
|
118
|
+
* when an existing file fails to parse so corruption is visible.
|
|
119
|
+
*/
|
|
120
|
+
function readGraphFile(stashRoot) {
|
|
121
|
+
const target = getGraphFilePath(stashRoot);
|
|
122
|
+
let raw;
|
|
123
|
+
try {
|
|
124
|
+
raw = fs.readFileSync(target, "utf8");
|
|
125
|
+
}
|
|
126
|
+
catch {
|
|
127
|
+
// Missing → no boost. Not an error: the user simply hasn't enabled
|
|
128
|
+
// graph extraction yet, or the pass hasn't run.
|
|
129
|
+
return null;
|
|
130
|
+
}
|
|
131
|
+
let parsed;
|
|
132
|
+
try {
|
|
133
|
+
parsed = JSON.parse(raw);
|
|
134
|
+
}
|
|
135
|
+
catch (err) {
|
|
136
|
+
warn(`graph boost: failed to parse ${target}: ${err instanceof Error ? err.message : String(err)}`);
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
if (!isGraphFile(parsed) || parsed.schemaVersion !== GRAPH_FILE_SCHEMA_VERSION) {
|
|
140
|
+
return null;
|
|
141
|
+
}
|
|
142
|
+
return parsed;
|
|
143
|
+
}
|
|
144
|
+
function isGraphFile(value) {
|
|
145
|
+
if (typeof value !== "object" || value === null)
|
|
146
|
+
return false;
|
|
147
|
+
const obj = value;
|
|
148
|
+
if (typeof obj.schemaVersion !== "number")
|
|
149
|
+
return false;
|
|
150
|
+
if (typeof obj.generatedAt !== "string")
|
|
151
|
+
return false;
|
|
152
|
+
if (typeof obj.stashRoot !== "string")
|
|
153
|
+
return false;
|
|
154
|
+
if (!Array.isArray(obj.files))
|
|
155
|
+
return false;
|
|
156
|
+
for (const f of obj.files) {
|
|
157
|
+
if (typeof f !== "object" || f === null)
|
|
158
|
+
return false;
|
|
159
|
+
const node = f;
|
|
160
|
+
if (typeof node.path !== "string")
|
|
161
|
+
return false;
|
|
162
|
+
if (typeof node.type !== "string")
|
|
163
|
+
return false;
|
|
164
|
+
if (!Array.isArray(node.entities) || !node.entities.every((e) => typeof e === "string"))
|
|
165
|
+
return false;
|
|
166
|
+
if (!Array.isArray(node.relations))
|
|
167
|
+
return false;
|
|
168
|
+
for (const r of node.relations) {
|
|
169
|
+
if (typeof r !== "object" || r === null)
|
|
170
|
+
return false;
|
|
171
|
+
const rel = r;
|
|
172
|
+
if (typeof rel.from !== "string" || typeof rel.to !== "string")
|
|
173
|
+
return false;
|
|
174
|
+
if (rel.type !== undefined && typeof rel.type !== "string")
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
return true;
|
|
179
|
+
}
|