akm-cli 0.6.0 → 0.7.0-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/dist/{cli.js → src/cli.js} +672 -29
- package/dist/{commands → src/commands}/config-cli.js +5 -4
- package/dist/src/commands/distill.js +283 -0
- package/dist/src/commands/events.js +108 -0
- package/dist/src/commands/history.js +120 -0
- package/dist/{commands → src/commands}/installed-stashes.js +28 -2
- package/dist/src/commands/proposal.js +119 -0
- package/dist/src/commands/propose.js +171 -0
- package/dist/src/commands/reflect.js +193 -0
- package/dist/{commands → src/commands}/registry-search.js +2 -1
- package/dist/{commands → src/commands}/remember.js +12 -0
- package/dist/{commands → src/commands}/search.js +74 -1
- package/dist/{commands → src/commands}/self-update.js +4 -3
- package/dist/{commands → src/commands}/show.js +67 -2
- package/dist/{core → src/core}/asset-ref.js +5 -5
- package/dist/{core → src/core}/asset-spec.js +12 -0
- package/dist/{core → src/core}/common.js +1 -1
- package/dist/{core → src/core}/config.js +175 -121
- package/dist/{core → src/core}/errors.js +4 -0
- package/dist/src/core/events.js +239 -0
- package/dist/src/core/lesson-lint.js +86 -0
- package/dist/src/core/proposals.js +406 -0
- package/dist/src/core/warn.js +72 -0
- package/dist/{core → src/core}/write-source.js +80 -5
- package/dist/{indexer → src/indexer}/db-search.js +119 -27
- package/dist/{indexer → src/indexer}/db.js +76 -23
- package/dist/{indexer → src/indexer}/file-context.js +0 -3
- package/dist/src/indexer/graph-boost.js +179 -0
- package/dist/src/indexer/graph-extraction.js +212 -0
- package/dist/{indexer → src/indexer}/indexer.js +73 -6
- package/dist/src/indexer/memory-inference.js +263 -0
- package/dist/{indexer → src/indexer}/metadata.js +114 -11
- package/dist/src/integrations/agent/config.js +292 -0
- package/dist/src/integrations/agent/detect.js +94 -0
- package/dist/src/integrations/agent/index.js +17 -0
- package/dist/src/integrations/agent/profiles.js +65 -0
- package/dist/src/integrations/agent/prompts.js +167 -0
- package/dist/src/integrations/agent/spawn.js +221 -0
- package/dist/{integrations → src/integrations}/lockfile.js +0 -26
- package/dist/{llm → src/llm}/client.js +33 -2
- package/dist/src/llm/feature-gate.js +108 -0
- package/dist/src/llm/graph-extract.js +107 -0
- package/dist/src/llm/index-passes.js +35 -0
- package/dist/src/llm/memory-infer.js +86 -0
- package/dist/{output → src/output}/renderers.js +60 -1
- package/dist/src/output/shapes.js +516 -0
- package/dist/{output → src/output}/text.js +447 -4
- package/dist/{registry → src/registry}/build-index.js +14 -4
- package/dist/{registry → src/registry}/factory.js +0 -8
- package/dist/{registry → src/registry}/providers/static-index.js +3 -2
- package/dist/{registry → src/registry}/resolve.js +68 -2
- package/dist/{setup → src/setup}/setup.js +43 -5
- package/dist/{sources → src/sources}/providers/git.js +7 -15
- package/dist/{wiki → src/wiki}/wiki.js +9 -11
- package/dist/tests/add-website-source.test.js +119 -0
- package/dist/tests/agent/agent-config-loader.test.js +70 -0
- package/dist/tests/agent/agent-config.test.js +221 -0
- package/dist/tests/agent/agent-detect.test.js +100 -0
- package/dist/tests/agent/agent-spawn.test.js +234 -0
- package/dist/tests/agent-output.test.js +186 -0
- package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +103 -0
- package/dist/tests/architecture/agent-spawn-seam.test.js +193 -0
- package/dist/tests/architecture/llm-stateless-seam.test.js +112 -0
- package/dist/tests/asset-ref.test.js +192 -0
- package/dist/tests/asset-registry.test.js +103 -0
- package/dist/tests/asset-spec.test.js +241 -0
- package/dist/tests/bench/attribution.test.js +995 -0
- package/dist/tests/bench/cleanup-sigint.test.js +83 -0
- package/dist/tests/bench/cleanup.js +203 -0
- package/dist/tests/bench/cleanup.test.js +166 -0
- package/dist/tests/bench/cli.js +683 -0
- package/dist/tests/bench/cli.test.js +177 -0
- package/dist/tests/bench/compare.test.js +556 -0
- package/dist/tests/bench/corpus.js +314 -0
- package/dist/tests/bench/corpus.test.js +258 -0
- package/dist/tests/bench/driver.js +346 -0
- package/dist/tests/bench/driver.test.js +443 -0
- package/dist/tests/bench/evolve-metrics.js +179 -0
- package/dist/tests/bench/evolve-metrics.test.js +187 -0
- package/dist/tests/bench/evolve.js +580 -0
- package/dist/tests/bench/evolve.test.js +616 -0
- package/dist/tests/bench/failure-modes.test.js +300 -0
- package/dist/tests/bench/feedback-integrity.test.js +456 -0
- package/dist/tests/bench/leakage.test.js +125 -0
- package/dist/tests/bench/learning-curve.test.js +133 -0
- package/dist/tests/bench/metrics.js +2319 -0
- package/dist/tests/bench/metrics.test.js +1144 -0
- package/dist/tests/bench/no-os-tmpdir-invariant.test.js +43 -0
- package/dist/tests/bench/report.js +1821 -0
- package/dist/tests/bench/report.test.js +989 -0
- package/dist/tests/bench/runner.js +536 -0
- package/dist/tests/bench/runner.test.js +958 -0
- package/dist/tests/bench/search-bridge.test.js +331 -0
- package/dist/tests/bench/tmp.js +41 -0
- package/dist/tests/bench/trajectory.js +116 -0
- package/dist/tests/bench/trajectory.test.js +127 -0
- package/dist/tests/bench/verifier.js +109 -0
- package/dist/tests/bench/verifier.test.js +118 -0
- package/dist/tests/bench/workflow-evaluator.js +557 -0
- package/dist/tests/bench/workflow-evaluator.test.js +421 -0
- package/dist/tests/bench/workflow-spec.js +358 -0
- package/dist/tests/bench/workflow-spec.test.js +363 -0
- package/dist/tests/bench/workflow-trace.js +438 -0
- package/dist/tests/bench/workflow-trace.test.js +254 -0
- package/dist/tests/benchmark-search-quality.js +536 -0
- package/dist/tests/benchmark-suite.js +1441 -0
- package/dist/tests/capture-cli.test.js +112 -0
- package/dist/tests/cli-errors.test.js +203 -0
- package/dist/tests/commands/events.test.js +370 -0
- package/dist/tests/commands/history.test.js +223 -0
- package/dist/tests/commands/import.test.js +103 -0
- package/dist/tests/commands/proposal-cli.test.js +209 -0
- package/dist/tests/commands/reflect-propose-cli.test.js +333 -0
- package/dist/tests/commands/remember.test.js +97 -0
- package/dist/tests/commands/scope-flags.test.js +300 -0
- package/dist/tests/commands/search.test.js +537 -0
- package/dist/tests/commands/show-indexer-parity.test.js +117 -0
- package/dist/tests/commands/show.test.js +294 -0
- package/dist/tests/common.test.js +266 -0
- package/dist/tests/completions.test.js +142 -0
- package/dist/tests/config-cli.test.js +193 -0
- package/dist/tests/config-llm-features.test.js +139 -0
- package/dist/tests/config.test.js +544 -0
- package/dist/tests/contracts/migration-baseline.test.js +43 -0
- package/dist/tests/contracts/reflect-propose-envelope.test.js +139 -0
- package/dist/tests/contracts/spec-helpers.js +46 -0
- package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +228 -0
- package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +56 -0
- package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +34 -0
- package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +94 -0
- package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +39 -0
- package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +44 -0
- package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +47 -0
- package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +40 -0
- package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +58 -0
- package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +34 -0
- package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +75 -0
- package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +36 -0
- package/dist/tests/core/write-source.test.js +366 -0
- package/dist/tests/curate-command.test.js +87 -0
- package/dist/tests/db-scoring.test.js +201 -0
- package/dist/tests/db.test.js +654 -0
- package/dist/tests/distill-cli-flag.test.js +208 -0
- package/dist/tests/distill.test.js +515 -0
- package/dist/tests/docker-install.test.js +120 -0
- package/dist/tests/e2e.test.js +1398 -0
- package/dist/tests/embedder.test.js +340 -0
- package/dist/tests/embedding-model-config.test.js +379 -0
- package/dist/tests/feedback-command.test.js +172 -0
- package/dist/tests/file-context.test.js +552 -0
- package/dist/tests/fixtures/scripts/git/summarize-diff.js +9 -0
- package/dist/tests/fixtures/scripts/lint/eslint-check.js +7 -0
- package/dist/tests/fixtures/stashes/load.js +166 -0
- package/dist/tests/fixtures/stashes/load.test.js +88 -0
- package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +12 -0
- package/dist/tests/frontmatter.test.js +190 -0
- package/dist/tests/fts-field-weighting.test.js +254 -0
- package/dist/tests/fuzzy-search.test.js +230 -0
- package/dist/tests/git-provider-clone.test.js +45 -0
- package/dist/tests/github.test.js +161 -0
- package/dist/tests/graph-boost-ranking.test.js +305 -0
- package/dist/tests/graph-extraction.test.js +282 -0
- package/dist/tests/helpers/usage-events.js +8 -0
- package/dist/tests/index-pass-llm.test.js +161 -0
- package/dist/tests/indexer.test.js +559 -0
- package/dist/tests/info-command.test.js +166 -0
- package/dist/tests/init.test.js +69 -0
- package/dist/tests/install-script.test.js +246 -0
- package/dist/tests/integration/agent-real-profile.test.js +94 -0
- package/dist/tests/issue-36-repro.test.js +304 -0
- package/dist/tests/issues-191-194.test.js +160 -0
- package/dist/tests/lesson-lint.test.js +111 -0
- package/dist/tests/llm-client.test.js +115 -0
- package/dist/tests/llm-feature-gate.test.js +151 -0
- package/dist/tests/llm.test.js +139 -0
- package/dist/tests/lockfile.test.js +216 -0
- package/dist/tests/manifest.test.js +205 -0
- package/dist/tests/markdown.test.js +126 -0
- package/dist/tests/matchers-unit.test.js +189 -0
- package/dist/tests/memory-inference.test.js +299 -0
- package/dist/tests/merge-scoring.test.js +136 -0
- package/dist/tests/metadata.test.js +313 -0
- package/dist/tests/migration-help.test.js +89 -0
- package/dist/tests/origin-resolve.test.js +124 -0
- package/dist/tests/output-baseline.test.js +217 -0
- package/dist/tests/output-shapes-unit.test.js +476 -0
- package/dist/tests/parallel-search.test.js +272 -0
- package/dist/tests/parameter-metadata.test.js +365 -0
- package/dist/tests/paths.test.js +177 -0
- package/dist/tests/progressive-disclosure.test.js +280 -0
- package/dist/tests/proposals.test.js +279 -0
- package/dist/tests/proposed-quality.test.js +271 -0
- package/dist/tests/provider-registry.test.js +32 -0
- package/dist/tests/ranking-regression.test.js +548 -0
- package/dist/tests/reflect-propose.test.js +455 -0
- package/dist/tests/registry-build-index.test.js +378 -0
- package/dist/tests/registry-cli.test.js +290 -0
- package/dist/tests/registry-index-v2.test.js +430 -0
- package/dist/tests/registry-install.test.js +728 -0
- package/dist/tests/registry-providers/parity.test.js +189 -0
- package/dist/tests/registry-providers/skills-sh.test.js +309 -0
- package/dist/tests/registry-providers/static-index.test.js +204 -0
- package/dist/tests/registry-resolve.test.js +126 -0
- package/dist/tests/registry-search.test.js +723 -0
- package/dist/tests/remember-frontmatter.test.js +380 -0
- package/dist/tests/remember-unit.test.js +123 -0
- package/dist/tests/ripgrep-install.test.js +251 -0
- package/dist/tests/ripgrep-resolve.test.js +108 -0
- package/dist/tests/ripgrep.test.js +163 -0
- package/dist/tests/save-command.test.js +94 -0
- package/dist/tests/save-trust-qa-fixes.test.js +270 -0
- package/dist/tests/scoring-pipeline.test.js +648 -0
- package/dist/tests/search-include-proposed-cli.test.js +118 -0
- package/dist/tests/self-update.test.js +442 -0
- package/dist/tests/semantic-search-e2e.test.js +512 -0
- package/dist/tests/semantic-status.test.js +471 -0
- package/dist/tests/setup-run.integration.js +877 -0
- package/dist/tests/setup-wizard.test.js +198 -0
- package/dist/tests/setup.test.js +131 -0
- package/dist/tests/source-add.test.js +11 -0
- package/dist/tests/source-clone.test.js +254 -0
- package/dist/tests/source-manage.test.js +366 -0
- package/dist/tests/source-providers/filesystem.test.js +82 -0
- package/dist/tests/source-providers/git.test.js +252 -0
- package/dist/tests/source-providers/website.test.js +128 -0
- package/dist/tests/source-qa-fixes.test.js +268 -0
- package/dist/tests/source-registry.test.js +350 -0
- package/dist/tests/source-resolve.test.js +100 -0
- package/dist/tests/source-source.test.js +221 -0
- package/dist/tests/source.test.js +533 -0
- package/dist/tests/tar-utils-scan.test.js +73 -0
- package/dist/tests/toggle-components.test.js +73 -0
- package/dist/tests/usage-telemetry.test.js +265 -0
- package/dist/tests/utility-scoring.test.js +558 -0
- package/dist/tests/vault-load-error.test.js +78 -0
- package/dist/tests/vault-qa-fixes.test.js +194 -0
- package/dist/tests/vault.test.js +429 -0
- package/dist/tests/vector-search.test.js +608 -0
- package/dist/tests/walker.test.js +252 -0
- package/dist/tests/wave2-cluster-bc.test.js +228 -0
- package/dist/tests/wave2-cluster-d.test.js +180 -0
- package/dist/tests/wave2-cluster-e.test.js +179 -0
- package/dist/tests/wiki-qa-fixes.test.js +270 -0
- package/dist/tests/wiki.test.js +529 -0
- package/dist/tests/workflow-cli.test.js +271 -0
- package/dist/tests/workflow-markdown.test.js +171 -0
- package/dist/tests/workflow-path-escape.test.js +132 -0
- package/dist/tests/workflow-qa-fixes.test.js +377 -0
- package/dist/tests/workflows/indexer-rejection.test.js +213 -0
- package/docs/README.md +8 -0
- package/docs/migration/release-notes/0.7.0.md +244 -0
- package/package.json +2 -2
- package/dist/core/warn.js +0 -27
- package/dist/output/shapes.js +0 -212
- /package/dist/{commands → src/commands}/completions.js +0 -0
- /package/dist/{commands → src/commands}/curate.js +0 -0
- /package/dist/{commands → src/commands}/info.js +0 -0
- /package/dist/{commands → src/commands}/init.js +0 -0
- /package/dist/{commands → src/commands}/install-audit.js +0 -0
- /package/dist/{commands → src/commands}/migration-help.js +0 -0
- /package/dist/{commands → src/commands}/source-add.js +0 -0
- /package/dist/{commands → src/commands}/source-clone.js +0 -0
- /package/dist/{commands → src/commands}/source-manage.js +0 -0
- /package/dist/{commands → src/commands}/vault.js +0 -0
- /package/dist/{core → src/core}/asset-registry.js +0 -0
- /package/dist/{core → src/core}/frontmatter.js +0 -0
- /package/dist/{core → src/core}/markdown.js +0 -0
- /package/dist/{core → src/core}/paths.js +0 -0
- /package/dist/{indexer → src/indexer}/manifest.js +0 -0
- /package/dist/{indexer → src/indexer}/matchers.js +0 -0
- /package/dist/{indexer → src/indexer}/search-fields.js +0 -0
- /package/dist/{indexer → src/indexer}/search-source.js +0 -0
- /package/dist/{indexer → src/indexer}/semantic-status.js +0 -0
- /package/dist/{indexer → src/indexer}/usage-events.js +0 -0
- /package/dist/{indexer → src/indexer}/walker.js +0 -0
- /package/dist/{integrations → src/integrations}/github.js +0 -0
- /package/dist/{llm → src/llm}/embedder.js +0 -0
- /package/dist/{llm → src/llm}/embedders/cache.js +0 -0
- /package/dist/{llm → src/llm}/embedders/local.js +0 -0
- /package/dist/{llm → src/llm}/embedders/remote.js +0 -0
- /package/dist/{llm → src/llm}/embedders/types.js +0 -0
- /package/dist/{llm → src/llm}/metadata-enhance.js +0 -0
- /package/dist/{output → src/output}/cli-hints.js +0 -0
- /package/dist/{output → src/output}/context.js +0 -0
- /package/dist/{registry → src/registry}/create-provider-registry.js +0 -0
- /package/dist/{registry → src/registry}/origin-resolve.js +0 -0
- /package/dist/{registry → src/registry}/providers/index.js +0 -0
- /package/dist/{registry → src/registry}/providers/skills-sh.js +0 -0
- /package/dist/{registry → src/registry}/providers/types.js +0 -0
- /package/dist/{registry → src/registry}/types.js +0 -0
- /package/dist/{setup → src/setup}/detect.js +0 -0
- /package/dist/{setup → src/setup}/ripgrep-install.js +0 -0
- /package/dist/{setup → src/setup}/ripgrep-resolve.js +0 -0
- /package/dist/{setup → src/setup}/steps.js +0 -0
- /package/dist/{sources → src/sources}/include.js +0 -0
- /package/dist/{sources → src/sources}/provider-factory.js +0 -0
- /package/dist/{sources → src/sources}/provider.js +0 -0
- /package/dist/{sources → src/sources}/providers/filesystem.js +0 -0
- /package/dist/{sources → src/sources}/providers/index.js +0 -0
- /package/dist/{sources → src/sources}/providers/install-types.js +0 -0
- /package/dist/{sources → src/sources}/providers/npm.js +0 -0
- /package/dist/{sources → src/sources}/providers/provider-utils.js +0 -0
- /package/dist/{sources → src/sources}/providers/sync-from-ref.js +0 -0
- /package/dist/{sources → src/sources}/providers/tar-utils.js +0 -0
- /package/dist/{sources → src/sources}/providers/website.js +0 -0
- /package/dist/{sources → src/sources}/resolve.js +0 -0
- /package/dist/{sources → src/sources}/types.js +0 -0
- /package/dist/{templates → src/templates}/wiki-templates.js +0 -0
- /package/dist/{version.js → src/version.js} +0 -0
- /package/dist/{workflows → src/workflows}/authoring.js +0 -0
- /package/dist/{workflows → src/workflows}/cli.js +0 -0
- /package/dist/{workflows → src/workflows}/db.js +0 -0
- /package/dist/{workflows → src/workflows}/document-cache.js +0 -0
- /package/dist/{workflows → src/workflows}/parser.js +0 -0
- /package/dist/{workflows → src/workflows}/renderer.js +0 -0
- /package/dist/{workflows → src/workflows}/runs.js +0 -0
- /package/dist/{workflows → src/workflows}/schema.js +0 -0
- /package/dist/{workflows → src/workflows}/validator.js +0 -0
|
@@ -0,0 +1,608 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for vector/semantic search path coverage.
|
|
3
|
+
*
|
|
4
|
+
* The entire test suite previously set `semanticSearchMode: "off"`, so
|
|
5
|
+
* `tryVecScores()` and the hybrid score merging pipeline were dead code
|
|
6
|
+
* in tests. This file covers:
|
|
7
|
+
*
|
|
8
|
+
* - tryVecScores runs when semantic status is ready
|
|
9
|
+
* - Hybrid score merging (FTS 0.7 + vec 0.3 weights)
|
|
10
|
+
* - FTS-only entries surviving in hybrid mode
|
|
11
|
+
* - NaN/Infinity guard on vector distances
|
|
12
|
+
* - BM25 normalization edge cases (all identical scores)
|
|
13
|
+
* - JS fallback path (BLOB-based cosine similarity, no sqlite-vec)
|
|
14
|
+
* - Dimension mismatch produces zero similarity
|
|
15
|
+
*/
|
|
16
|
+
import { afterAll, afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
17
|
+
import fs from "node:fs";
|
|
18
|
+
import os from "node:os";
|
|
19
|
+
import path from "node:path";
|
|
20
|
+
import { closeDatabase, openDatabase, rebuildFts, searchFts, searchVec, setMeta, upsertEmbedding, upsertEntry, } from "../src/indexer/db";
|
|
21
|
+
import { cosineSimilarity } from "../src/llm/embedder";
|
|
22
|
+
// ── Temp directory management ───────────────────────────────────────────────
|
|
23
|
+
const createdTmpDirs = [];
|
|
24
|
+
function createTmpDir(prefix = "akm-vec-") {
|
|
25
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), prefix));
|
|
26
|
+
createdTmpDirs.push(dir);
|
|
27
|
+
return dir;
|
|
28
|
+
}
|
|
29
|
+
afterAll(() => {
|
|
30
|
+
for (const dir of createdTmpDirs) {
|
|
31
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
35
|
+
function tmpDbPath(label = "vec") {
|
|
36
|
+
const dir = createTmpDir(`akm-${label}-`);
|
|
37
|
+
return path.join(dir, "test.db");
|
|
38
|
+
}
|
|
39
|
+
function makeEntry(overrides) {
|
|
40
|
+
return {
|
|
41
|
+
description: "A test entry",
|
|
42
|
+
...overrides,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
function insertTestEntry(db, key, opts) {
|
|
46
|
+
const type = opts?.type ?? "script";
|
|
47
|
+
const entry = makeEntry({
|
|
48
|
+
name: key,
|
|
49
|
+
type,
|
|
50
|
+
description: opts?.description ?? `Description for ${key}`,
|
|
51
|
+
tags: opts?.tags,
|
|
52
|
+
});
|
|
53
|
+
return upsertEntry(db, key, opts?.dirPath ?? "/test/dir", opts?.filePath ?? `/test/dir/${key}.ts`, opts?.stashDir ?? "/test/stash", entry, opts?.searchText ?? `${key} ${entry.description}`);
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Create a normalized Float32 vector of the given dimension.
|
|
57
|
+
* The vector has value `val` at each position, then is L2-normalized.
|
|
58
|
+
*/
|
|
59
|
+
function makeNormalizedVec(dim, val = 1) {
|
|
60
|
+
const raw = new Array(dim).fill(val);
|
|
61
|
+
const norm = Math.sqrt(raw.reduce((s, v) => s + v * v, 0));
|
|
62
|
+
return raw.map((v) => v / norm);
|
|
63
|
+
}
|
|
64
|
+
// ── Environment isolation ───────────────────────────────────────────────────
|
|
65
|
+
const originalXdgCacheHome = process.env.XDG_CACHE_HOME;
|
|
66
|
+
const originalXdgConfigHome = process.env.XDG_CONFIG_HOME;
|
|
67
|
+
const originalAkmStashDir = process.env.AKM_STASH_DIR;
|
|
68
|
+
let testCacheDir = "";
|
|
69
|
+
let testConfigDir = "";
|
|
70
|
+
beforeEach(() => {
|
|
71
|
+
testCacheDir = createTmpDir("akm-vec-cache-");
|
|
72
|
+
testConfigDir = createTmpDir("akm-vec-config-");
|
|
73
|
+
process.env.XDG_CACHE_HOME = testCacheDir;
|
|
74
|
+
process.env.XDG_CONFIG_HOME = testConfigDir;
|
|
75
|
+
});
|
|
76
|
+
afterEach(() => {
|
|
77
|
+
if (originalXdgCacheHome === undefined) {
|
|
78
|
+
delete process.env.XDG_CACHE_HOME;
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
process.env.XDG_CACHE_HOME = originalXdgCacheHome;
|
|
82
|
+
}
|
|
83
|
+
if (originalXdgConfigHome === undefined) {
|
|
84
|
+
delete process.env.XDG_CONFIG_HOME;
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
process.env.XDG_CONFIG_HOME = originalXdgConfigHome;
|
|
88
|
+
}
|
|
89
|
+
if (originalAkmStashDir === undefined) {
|
|
90
|
+
delete process.env.AKM_STASH_DIR;
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
process.env.AKM_STASH_DIR = originalAkmStashDir;
|
|
94
|
+
}
|
|
95
|
+
if (testCacheDir) {
|
|
96
|
+
fs.rmSync(testCacheDir, { recursive: true, force: true });
|
|
97
|
+
testCacheDir = "";
|
|
98
|
+
}
|
|
99
|
+
if (testConfigDir) {
|
|
100
|
+
fs.rmSync(testConfigDir, { recursive: true, force: true });
|
|
101
|
+
testConfigDir = "";
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
// ── Test a: tryVecScores runs when status is ready ─────────────────────────
|
|
105
|
+
describe("tryVecScores activation", () => {
|
|
106
|
+
test("searchVec returns results when embeddings exist in BLOB table", () => {
|
|
107
|
+
// Verify the low-level searchVec (which delegates to searchBlobVec
|
|
108
|
+
// when sqlite-vec is unavailable) returns results from the embeddings
|
|
109
|
+
// BLOB table. This is the data path that tryVecScores consumes.
|
|
110
|
+
const dbPath = tmpDbPath("vec-activation");
|
|
111
|
+
const dim = 4;
|
|
112
|
+
const db = openDatabase(dbPath, { embeddingDim: dim });
|
|
113
|
+
try {
|
|
114
|
+
const id = insertTestEntry(db, "vec-ready-tool", {
|
|
115
|
+
description: "A tool with embeddings ready for vector search",
|
|
116
|
+
stashDir: "/test/stash",
|
|
117
|
+
});
|
|
118
|
+
rebuildFts(db);
|
|
119
|
+
// Insert a normalized embedding into the BLOB table
|
|
120
|
+
const embedding = makeNormalizedVec(dim);
|
|
121
|
+
upsertEmbedding(db, id, embedding);
|
|
122
|
+
setMeta(db, "hasEmbeddings", "1");
|
|
123
|
+
// Query with the same vector — should find the entry
|
|
124
|
+
const results = searchVec(db, embedding, 5);
|
|
125
|
+
expect(results.length).toBe(1);
|
|
126
|
+
expect(results[0].id).toBe(id);
|
|
127
|
+
// Distance should be ~0 since query = stored embedding
|
|
128
|
+
expect(results[0].distance).toBeLessThan(0.01);
|
|
129
|
+
}
|
|
130
|
+
finally {
|
|
131
|
+
closeDatabase(db);
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
test("searchVec returns results sorted by similarity (closest first)", () => {
|
|
135
|
+
const dbPath = tmpDbPath("vec-sorted");
|
|
136
|
+
const dim = 4;
|
|
137
|
+
const db = openDatabase(dbPath, { embeddingDim: dim });
|
|
138
|
+
try {
|
|
139
|
+
// Insert two entries with different embeddings
|
|
140
|
+
const id1 = insertTestEntry(db, "close-match", {
|
|
141
|
+
description: "Close match entry",
|
|
142
|
+
stashDir: "/test/stash",
|
|
143
|
+
});
|
|
144
|
+
const id2 = insertTestEntry(db, "far-match", {
|
|
145
|
+
description: "Far match entry",
|
|
146
|
+
stashDir: "/test/stash",
|
|
147
|
+
});
|
|
148
|
+
rebuildFts(db);
|
|
149
|
+
// close-match: embedding near the query direction
|
|
150
|
+
const closeEmb = makeNormalizedVec(dim, 1); // [0.5, 0.5, 0.5, 0.5] normalized
|
|
151
|
+
upsertEmbedding(db, id1, closeEmb);
|
|
152
|
+
// far-match: embedding in a very different direction
|
|
153
|
+
const farRaw = [1, 0, 0, 0]; // already unit
|
|
154
|
+
upsertEmbedding(db, id2, farRaw);
|
|
155
|
+
setMeta(db, "hasEmbeddings", "1");
|
|
156
|
+
// Query with the same direction as close-match
|
|
157
|
+
const queryVec = makeNormalizedVec(dim, 1);
|
|
158
|
+
const results = searchVec(db, queryVec, 10);
|
|
159
|
+
expect(results.length).toBe(2);
|
|
160
|
+
// The close match should come first (smaller distance)
|
|
161
|
+
const closeResult = results.find((r) => r.id === id1);
|
|
162
|
+
const farResult = results.find((r) => r.id === id2);
|
|
163
|
+
expect(closeResult).toBeDefined();
|
|
164
|
+
expect(farResult).toBeDefined();
|
|
165
|
+
expect(closeResult?.distance).toBeLessThan(farResult?.distance ?? Number.POSITIVE_INFINITY);
|
|
166
|
+
}
|
|
167
|
+
finally {
|
|
168
|
+
closeDatabase(db);
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
});
|
|
172
|
+
// ── Test b: Hybrid score merging ───────────────────────────────────────────
|
|
173
|
+
describe("Hybrid score merging (FTS 0.7 + vec 0.3 weights)", () => {
|
|
174
|
+
test("combined score uses FTS and vec weights correctly", () => {
|
|
175
|
+
// Directly verify the weighted combination formula:
|
|
176
|
+
// combinedScore = ftsNormalized * 0.7 + vecCosine * 0.3
|
|
177
|
+
// This mirrors the logic in searchDatabase() lines 228-237.
|
|
178
|
+
const FTS_WEIGHT = 0.7;
|
|
179
|
+
const VEC_WEIGHT = 0.3;
|
|
180
|
+
const ftsNormalized = 0.8; // Hypothetical normalized FTS score
|
|
181
|
+
const vecCosine = 0.9; // Hypothetical cosine similarity
|
|
182
|
+
const expected = ftsNormalized * FTS_WEIGHT + vecCosine * VEC_WEIGHT;
|
|
183
|
+
// 0.8 * 0.7 + 0.9 * 0.3 = 0.56 + 0.27 = 0.83
|
|
184
|
+
expect(expected).toBeCloseTo(0.83, 2);
|
|
185
|
+
// Verify the vec component matters: a high vec score should pull up
|
|
186
|
+
// an entry with a lower FTS score
|
|
187
|
+
const lowFts = 0.3;
|
|
188
|
+
const highVec = 1.0;
|
|
189
|
+
const boostedScore = lowFts * FTS_WEIGHT + highVec * VEC_WEIGHT;
|
|
190
|
+
// 0.3 * 0.7 + 1.0 * 0.3 = 0.21 + 0.3 = 0.51
|
|
191
|
+
expect(boostedScore).toBeCloseTo(0.51, 2);
|
|
192
|
+
// And vice versa: high FTS with low vec
|
|
193
|
+
const highFts = 1.0;
|
|
194
|
+
const lowVec = 0.1;
|
|
195
|
+
const ftsHeavy = highFts * FTS_WEIGHT + lowVec * VEC_WEIGHT;
|
|
196
|
+
// 1.0 * 0.7 + 0.1 * 0.3 = 0.7 + 0.03 = 0.73
|
|
197
|
+
expect(ftsHeavy).toBeCloseTo(0.73, 2);
|
|
198
|
+
});
|
|
199
|
+
test("FTS and vec rankings differ but combined score reflects both", () => {
|
|
200
|
+
// Simulate the scenario where FTS and vec disagree on ranking.
|
|
201
|
+
// Entry A: high FTS, low vec. Entry B: low FTS, high vec.
|
|
202
|
+
// The combined scores should place them closer together than either
|
|
203
|
+
// signal alone would suggest.
|
|
204
|
+
const FTS_WEIGHT = 0.7;
|
|
205
|
+
const VEC_WEIGHT = 0.3;
|
|
206
|
+
// Entry A: FTS champion
|
|
207
|
+
const aFts = 1.0;
|
|
208
|
+
const aVec = 0.2;
|
|
209
|
+
const aCombined = aFts * FTS_WEIGHT + aVec * VEC_WEIGHT;
|
|
210
|
+
// Entry B: Vector champion
|
|
211
|
+
const bFts = 0.4;
|
|
212
|
+
const bVec = 1.0;
|
|
213
|
+
const bCombined = bFts * FTS_WEIGHT + bVec * VEC_WEIGHT;
|
|
214
|
+
// A should still win (FTS weight is higher), but B is close
|
|
215
|
+
expect(aCombined).toBeGreaterThan(bCombined);
|
|
216
|
+
// The gap should be smaller than FTS-only
|
|
217
|
+
const ftsOnlyGap = aFts - bFts; // 0.6
|
|
218
|
+
const combinedGap = aCombined - bCombined; // (0.76 - 0.58) = 0.18
|
|
219
|
+
expect(combinedGap).toBeLessThan(ftsOnlyGap);
|
|
220
|
+
});
|
|
221
|
+
});
|
|
222
|
+
// ── Test c: FTS-only entries in hybrid mode ────────────────────────────────
|
|
223
|
+
describe("FTS-only entries survive in hybrid mode", () => {
|
|
224
|
+
test("entry matching FTS but with no embedding appears in results", () => {
|
|
225
|
+
const dbPath = tmpDbPath("fts-only-hybrid");
|
|
226
|
+
const dim = 4;
|
|
227
|
+
const db = openDatabase(dbPath, { embeddingDim: dim });
|
|
228
|
+
try {
|
|
229
|
+
// Insert two entries: one with embedding, one without
|
|
230
|
+
const idWithEmb = insertTestEntry(db, "with-embedding", {
|
|
231
|
+
description: "A tool with vector support for deploy",
|
|
232
|
+
searchText: "with-embedding deploy tool vector support",
|
|
233
|
+
stashDir: "/test/stash",
|
|
234
|
+
});
|
|
235
|
+
const idNoEmb = insertTestEntry(db, "no-embedding", {
|
|
236
|
+
description: "A deploy tool without vector support",
|
|
237
|
+
searchText: "no-embedding deploy tool without vector",
|
|
238
|
+
stashDir: "/test/stash",
|
|
239
|
+
});
|
|
240
|
+
rebuildFts(db);
|
|
241
|
+
// Only add embedding for the first entry
|
|
242
|
+
const embedding = makeNormalizedVec(dim);
|
|
243
|
+
upsertEmbedding(db, idWithEmb, embedding);
|
|
244
|
+
setMeta(db, "hasEmbeddings", "1");
|
|
245
|
+
// Both should appear in FTS results for "deploy"
|
|
246
|
+
const ftsResults = searchFts(db, "deploy", 10);
|
|
247
|
+
const ftsIds = ftsResults.map((r) => r.id);
|
|
248
|
+
expect(ftsIds).toContain(idWithEmb);
|
|
249
|
+
expect(ftsIds).toContain(idNoEmb);
|
|
250
|
+
// The entry without an embedding still has a valid FTS score.
|
|
251
|
+
// In the hybrid merging code, it gets rankingMode "fts" (not "hybrid").
|
|
252
|
+
// Verify both entries found by FTS are valid.
|
|
253
|
+
for (const result of ftsResults) {
|
|
254
|
+
expect(Number.isFinite(result.bm25Score)).toBe(true);
|
|
255
|
+
expect(Number.isNaN(result.bm25Score)).toBe(false);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
finally {
|
|
259
|
+
closeDatabase(db);
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
test("FTS results with no embedding counterpart get ftsScore only (no vec component)", () => {
|
|
263
|
+
// Simulate the merging logic: when embedScoreMap has no entry for an id,
|
|
264
|
+
// the combined score equals ftsScore alone (no vec weight added).
|
|
265
|
+
const ftsScoreMap = new Map();
|
|
266
|
+
ftsScoreMap.set(1, 0.8); // has embedding
|
|
267
|
+
ftsScoreMap.set(2, 0.6); // no embedding
|
|
268
|
+
const embedScoreMap = new Map();
|
|
269
|
+
embedScoreMap.set(1, 0.9); // only entry 1 has vec score
|
|
270
|
+
const FTS_WEIGHT = 0.7;
|
|
271
|
+
const VEC_WEIGHT = 0.3;
|
|
272
|
+
// Entry 1: hybrid score
|
|
273
|
+
const entry1Embed = embedScoreMap.get(1);
|
|
274
|
+
const entry1Fts = ftsScoreMap.get(1);
|
|
275
|
+
if (entry1Fts === undefined) {
|
|
276
|
+
throw new Error("Expected FTS score for entry 1");
|
|
277
|
+
}
|
|
278
|
+
const entry1Score = entry1Embed !== undefined ? entry1Fts * FTS_WEIGHT + entry1Embed * VEC_WEIGHT : entry1Fts;
|
|
279
|
+
expect(entry1Score).toBeCloseTo(0.8 * 0.7 + 0.9 * 0.3, 4);
|
|
280
|
+
// Entry 2: FTS-only score (no vec component)
|
|
281
|
+
const entry2Embed = embedScoreMap.get(2);
|
|
282
|
+
const entry2Fts = ftsScoreMap.get(2);
|
|
283
|
+
if (entry2Fts === undefined) {
|
|
284
|
+
throw new Error("Expected FTS score for entry 2");
|
|
285
|
+
}
|
|
286
|
+
const entry2Score = entry2Embed !== undefined ? entry2Fts * FTS_WEIGHT + entry2Embed * VEC_WEIGHT : entry2Fts;
|
|
287
|
+
expect(entry2Score).toBe(0.6); // Pure FTS score, no weighting
|
|
288
|
+
});
|
|
289
|
+
});
|
|
290
|
+
// ── Test d: NaN/Infinity guard ─────────────────────────────────────────────
|
|
291
|
+
describe("NaN/Infinity guard on vector distances", () => {
|
|
292
|
+
test("NaN distance is clamped to 0 by the guard formula", () => {
|
|
293
|
+
// The guard in tryVecScores:
|
|
294
|
+
// const raw = 1 - (distance * distance) / 2;
|
|
295
|
+
// scores.set(id, Number.isFinite(raw) ? Math.max(0, raw) : 0);
|
|
296
|
+
const distance = Number.NaN;
|
|
297
|
+
const raw = 1 - (distance * distance) / 2;
|
|
298
|
+
expect(Number.isNaN(raw)).toBe(true);
|
|
299
|
+
const guarded = Number.isFinite(raw) ? Math.max(0, raw) : 0;
|
|
300
|
+
expect(guarded).toBe(0);
|
|
301
|
+
});
|
|
302
|
+
test("Infinity distance is clamped to 0 by the guard formula", () => {
|
|
303
|
+
const distance = Number.POSITIVE_INFINITY;
|
|
304
|
+
const raw = 1 - (distance * distance) / 2;
|
|
305
|
+
expect(Number.isFinite(raw)).toBe(false);
|
|
306
|
+
const guarded = Number.isFinite(raw) ? Math.max(0, raw) : 0;
|
|
307
|
+
expect(guarded).toBe(0);
|
|
308
|
+
});
|
|
309
|
+
test("negative Infinity distance is clamped to 0", () => {
|
|
310
|
+
const distance = Number.NEGATIVE_INFINITY;
|
|
311
|
+
const raw = 1 - (distance * distance) / 2;
|
|
312
|
+
expect(Number.isFinite(raw)).toBe(false);
|
|
313
|
+
const guarded = Number.isFinite(raw) ? Math.max(0, raw) : 0;
|
|
314
|
+
expect(guarded).toBe(0);
|
|
315
|
+
});
|
|
316
|
+
test("large distance producing negative raw is clamped to 0", () => {
|
|
317
|
+
// distance = 3 => raw = 1 - 9/2 = 1 - 4.5 = -3.5
|
|
318
|
+
const distance = 3;
|
|
319
|
+
const raw = 1 - (distance * distance) / 2;
|
|
320
|
+
expect(raw).toBeLessThan(0);
|
|
321
|
+
const guarded = Number.isFinite(raw) ? Math.max(0, raw) : 0;
|
|
322
|
+
expect(guarded).toBe(0);
|
|
323
|
+
});
|
|
324
|
+
test("normal distance 0 produces cosine similarity of 1", () => {
|
|
325
|
+
const distance = 0;
|
|
326
|
+
const raw = 1 - (distance * distance) / 2;
|
|
327
|
+
expect(raw).toBe(1);
|
|
328
|
+
const guarded = Number.isFinite(raw) ? Math.max(0, raw) : 0;
|
|
329
|
+
expect(guarded).toBe(1);
|
|
330
|
+
});
|
|
331
|
+
test("normal distance ~1.414 (orthogonal vectors) produces cosine ~0", () => {
|
|
332
|
+
// For orthogonal unit vectors, L2 distance = sqrt(2) ~ 1.414
|
|
333
|
+
const distance = Math.sqrt(2);
|
|
334
|
+
const raw = 1 - (distance * distance) / 2;
|
|
335
|
+
// raw = 1 - 2/2 = 0
|
|
336
|
+
expect(raw).toBeCloseTo(0, 5);
|
|
337
|
+
const guarded = Number.isFinite(raw) ? Math.max(0, raw) : 0;
|
|
338
|
+
expect(guarded).toBeCloseTo(0, 5);
|
|
339
|
+
});
|
|
340
|
+
});
|
|
341
|
+
// ── Test e: BM25 normalization edge cases ──────────────────────────────────
|
|
342
|
+
describe("BM25 normalization edge cases", () => {
|
|
343
|
+
test("all identical BM25 scores normalize to 1.0", () => {
|
|
344
|
+
// Mirrors the normalization logic in searchDatabase():
|
|
345
|
+
// const range = bestBm25 - worstBm25;
|
|
346
|
+
// const normalized = range !== 0 ? (r.bm25Score - worstBm25) / range : 1.0;
|
|
347
|
+
// const ftsScore = 0.3 + normalized * 0.7;
|
|
348
|
+
const scores = [-5.0, -5.0, -5.0]; // all identical
|
|
349
|
+
const bestBm25 = scores[0];
|
|
350
|
+
const worstBm25 = scores[scores.length - 1];
|
|
351
|
+
const range = bestBm25 - worstBm25; // 0
|
|
352
|
+
const normalized = scores.map((s) => (range !== 0 ? (s - worstBm25) / range : 1.0));
|
|
353
|
+
// All should be 1.0 when range is 0
|
|
354
|
+
for (const n of normalized) {
|
|
355
|
+
expect(n).toBe(1.0);
|
|
356
|
+
}
|
|
357
|
+
// After scaling to 0.3-1.0 range
|
|
358
|
+
const scaled = normalized.map((n) => 0.3 + n * 0.7);
|
|
359
|
+
for (const s of scaled) {
|
|
360
|
+
expect(s).toBe(1.0);
|
|
361
|
+
}
|
|
362
|
+
});
|
|
363
|
+
test("two distinct BM25 scores normalize to 1.0 and 0.3", () => {
|
|
364
|
+
// Best = -10 (most negative), worst = -2 (least negative)
|
|
365
|
+
const bestBm25 = -10;
|
|
366
|
+
const worstBm25 = -2;
|
|
367
|
+
const range = bestBm25 - worstBm25; // -8
|
|
368
|
+
const bestNormalized = (bestBm25 - worstBm25) / range;
|
|
369
|
+
expect(bestNormalized).toBe(1.0);
|
|
370
|
+
const worstNormalized = (worstBm25 - worstBm25) / range;
|
|
371
|
+
expect(worstNormalized).toBeCloseTo(0, 10);
|
|
372
|
+
// After scaling
|
|
373
|
+
const bestScaled = 0.3 + bestNormalized * 0.7;
|
|
374
|
+
expect(bestScaled).toBe(1.0);
|
|
375
|
+
const worstScaled = 0.3 + worstNormalized * 0.7;
|
|
376
|
+
expect(worstScaled).toBe(0.3);
|
|
377
|
+
});
|
|
378
|
+
test("BM25 normalization with three scores preserves ordering", () => {
|
|
379
|
+
// best = -15, mid = -10, worst = -5
|
|
380
|
+
const bestBm25 = -15;
|
|
381
|
+
const worstBm25 = -5;
|
|
382
|
+
const range = bestBm25 - worstBm25; // -10
|
|
383
|
+
const bestN = (bestBm25 - worstBm25) / range; // (-15 - -5) / -10 = -10/-10 = 1.0
|
|
384
|
+
const midN = (-10 - worstBm25) / range; // (-10 - -5) / -10 = -5/-10 = 0.5
|
|
385
|
+
const worstN = (worstBm25 - worstBm25) / range; // 0 / -10 = -0
|
|
386
|
+
expect(bestN).toBe(1.0);
|
|
387
|
+
expect(midN).toBe(0.5);
|
|
388
|
+
expect(worstN).toBeCloseTo(0, 10);
|
|
389
|
+
// Ordering is preserved after scaling
|
|
390
|
+
const bestS = 0.3 + bestN * 0.7;
|
|
391
|
+
const midS = 0.3 + midN * 0.7;
|
|
392
|
+
const worstS = 0.3 + worstN * 0.7;
|
|
393
|
+
expect(bestS).toBeGreaterThan(midS);
|
|
394
|
+
expect(midS).toBeGreaterThan(worstS);
|
|
395
|
+
});
|
|
396
|
+
test("single FTS result normalizes to 1.0", () => {
|
|
397
|
+
// When there's only one result, best = worst, range = 0
|
|
398
|
+
const scores = [-7.3];
|
|
399
|
+
const bestBm25 = scores[0];
|
|
400
|
+
const worstBm25 = scores[0];
|
|
401
|
+
const range = bestBm25 - worstBm25; // 0
|
|
402
|
+
const normalized = range !== 0 ? (scores[0] - worstBm25) / range : 1.0;
|
|
403
|
+
expect(normalized).toBe(1.0);
|
|
404
|
+
const scaled = 0.3 + normalized * 0.7;
|
|
405
|
+
expect(scaled).toBe(1.0);
|
|
406
|
+
});
|
|
407
|
+
});
|
|
408
|
+
// ── Test f: JS fallback path (BLOB-based cosine similarity) ────────────────
|
|
409
|
+
describe("JS fallback path (BLOB cosine similarity, no sqlite-vec)", () => {
|
|
410
|
+
test("searchVec with BLOB embeddings returns correct similarity ranking", () => {
|
|
411
|
+
const dbPath = tmpDbPath("blob-fallback");
|
|
412
|
+
const dim = 4;
|
|
413
|
+
const db = openDatabase(dbPath, { embeddingDim: dim });
|
|
414
|
+
try {
|
|
415
|
+
// Insert three entries with different embeddings
|
|
416
|
+
const id1 = insertTestEntry(db, "exact-match", {
|
|
417
|
+
description: "Exact match entry",
|
|
418
|
+
stashDir: "/test/stash",
|
|
419
|
+
});
|
|
420
|
+
const id2 = insertTestEntry(db, "partial-match", {
|
|
421
|
+
description: "Partial match entry",
|
|
422
|
+
stashDir: "/test/stash",
|
|
423
|
+
});
|
|
424
|
+
const id3 = insertTestEntry(db, "no-match", {
|
|
425
|
+
description: "No match entry",
|
|
426
|
+
stashDir: "/test/stash",
|
|
427
|
+
});
|
|
428
|
+
rebuildFts(db);
|
|
429
|
+
// Embeddings with known cosine similarities to query [1, 0, 0, 0]:
|
|
430
|
+
// exact-match: [1, 0, 0, 0] -> cosine = 1.0
|
|
431
|
+
// partial-match: [0.707, 0.707, 0, 0] -> cosine ~ 0.707
|
|
432
|
+
// no-match: [0, 0, 0, 1] -> cosine = 0.0
|
|
433
|
+
upsertEmbedding(db, id1, [1, 0, 0, 0]);
|
|
434
|
+
const partial = Math.SQRT1_2;
|
|
435
|
+
upsertEmbedding(db, id2, [partial, partial, 0, 0]);
|
|
436
|
+
upsertEmbedding(db, id3, [0, 0, 0, 1]);
|
|
437
|
+
setMeta(db, "hasEmbeddings", "1");
|
|
438
|
+
const queryVec = [1, 0, 0, 0];
|
|
439
|
+
const results = searchVec(db, queryVec, 10);
|
|
440
|
+
expect(results.length).toBe(3);
|
|
441
|
+
// Results should be sorted by similarity descending (distance ascending)
|
|
442
|
+
// The JS fallback converts cosine similarity to L2 distance:
|
|
443
|
+
// For normalized vectors: L2 = sqrt(2 * (1 - cos_sim))
|
|
444
|
+
const exactResult = results.find((r) => r.id === id1);
|
|
445
|
+
const partialResult = results.find((r) => r.id === id2);
|
|
446
|
+
const noMatchResult = results.find((r) => r.id === id3);
|
|
447
|
+
expect(exactResult).toBeDefined();
|
|
448
|
+
expect(partialResult).toBeDefined();
|
|
449
|
+
expect(noMatchResult).toBeDefined();
|
|
450
|
+
// exact match should have smallest distance
|
|
451
|
+
expect(exactResult?.distance).toBeLessThan(partialResult?.distance ?? Number.POSITIVE_INFINITY);
|
|
452
|
+
expect(partialResult?.distance).toBeLessThan(noMatchResult?.distance ?? Number.POSITIVE_INFINITY);
|
|
453
|
+
// exact match distance should be ~0
|
|
454
|
+
expect(exactResult?.distance).toBeCloseTo(0, 2);
|
|
455
|
+
// no-match distance should be ~sqrt(2) ~ 1.414
|
|
456
|
+
expect(noMatchResult?.distance).toBeCloseTo(Math.sqrt(2), 1);
|
|
457
|
+
}
|
|
458
|
+
finally {
|
|
459
|
+
closeDatabase(db);
|
|
460
|
+
}
|
|
461
|
+
});
|
|
462
|
+
test("searchVec returns empty array when no embeddings exist", () => {
|
|
463
|
+
const dbPath = tmpDbPath("blob-empty");
|
|
464
|
+
const dim = 4;
|
|
465
|
+
const db = openDatabase(dbPath, { embeddingDim: dim });
|
|
466
|
+
try {
|
|
467
|
+
insertTestEntry(db, "no-embed-entry", {
|
|
468
|
+
description: "Entry without embedding",
|
|
469
|
+
stashDir: "/test/stash",
|
|
470
|
+
});
|
|
471
|
+
rebuildFts(db);
|
|
472
|
+
const results = searchVec(db, [1, 0, 0, 0], 10);
|
|
473
|
+
expect(results).toHaveLength(0);
|
|
474
|
+
}
|
|
475
|
+
finally {
|
|
476
|
+
closeDatabase(db);
|
|
477
|
+
}
|
|
478
|
+
});
|
|
479
|
+
test("searchVec with k smaller than total results returns top-k", () => {
|
|
480
|
+
const dbPath = tmpDbPath("blob-topk");
|
|
481
|
+
const dim = 4;
|
|
482
|
+
const db = openDatabase(dbPath, { embeddingDim: dim });
|
|
483
|
+
try {
|
|
484
|
+
// Insert 5 entries with embeddings
|
|
485
|
+
const ids = [];
|
|
486
|
+
for (let i = 0; i < 5; i++) {
|
|
487
|
+
const id = insertTestEntry(db, `entry-${i}`, {
|
|
488
|
+
description: `Entry number ${i}`,
|
|
489
|
+
stashDir: "/test/stash",
|
|
490
|
+
});
|
|
491
|
+
// Each entry has a slightly different embedding direction
|
|
492
|
+
const emb = [0, 0, 0, 0];
|
|
493
|
+
emb[i % dim] = 1;
|
|
494
|
+
upsertEmbedding(db, id, emb);
|
|
495
|
+
ids.push(id);
|
|
496
|
+
}
|
|
497
|
+
rebuildFts(db);
|
|
498
|
+
setMeta(db, "hasEmbeddings", "1");
|
|
499
|
+
// Query for top 2 only
|
|
500
|
+
const results = searchVec(db, [1, 0, 0, 0], 2);
|
|
501
|
+
expect(results.length).toBe(2);
|
|
502
|
+
}
|
|
503
|
+
finally {
|
|
504
|
+
closeDatabase(db);
|
|
505
|
+
}
|
|
506
|
+
});
|
|
507
|
+
});
|
|
508
|
+
// ── Test g: Dimension mismatch produces zero ───────────────────────────────
|
|
509
|
+
describe("Dimension mismatch produces zero similarity", () => {
|
|
510
|
+
test("cosineSimilarity returns 0 for mismatched dimensions (384 vs 768)", () => {
|
|
511
|
+
const vec384 = new Array(384).fill(1 / Math.sqrt(384));
|
|
512
|
+
const vec768 = new Array(768).fill(1 / Math.sqrt(768));
|
|
513
|
+
const similarity = cosineSimilarity(vec384, vec768);
|
|
514
|
+
expect(similarity).toBe(0);
|
|
515
|
+
});
|
|
516
|
+
test("cosineSimilarity returns 0 for mismatched dimensions (small vectors)", () => {
|
|
517
|
+
const vecA = [1, 0, 0];
|
|
518
|
+
const vecB = [1, 0, 0, 0];
|
|
519
|
+
const similarity = cosineSimilarity(vecA, vecB);
|
|
520
|
+
expect(similarity).toBe(0);
|
|
521
|
+
});
|
|
522
|
+
test("cosineSimilarity returns correct value for matching dimensions", () => {
|
|
523
|
+
// Same direction: cosine = 1.0
|
|
524
|
+
const vecA = [1, 0, 0, 0];
|
|
525
|
+
const vecB = [1, 0, 0, 0];
|
|
526
|
+
expect(cosineSimilarity(vecA, vecB)).toBeCloseTo(1.0, 5);
|
|
527
|
+
// Orthogonal: cosine = 0.0
|
|
528
|
+
const vecC = [1, 0, 0, 0];
|
|
529
|
+
const vecD = [0, 1, 0, 0];
|
|
530
|
+
expect(cosineSimilarity(vecC, vecD)).toBeCloseTo(0.0, 5);
|
|
531
|
+
// Opposite: cosine = -1.0
|
|
532
|
+
const vecE = [1, 0, 0, 0];
|
|
533
|
+
const vecF = [-1, 0, 0, 0];
|
|
534
|
+
expect(cosineSimilarity(vecE, vecF)).toBeCloseTo(-1.0, 5);
|
|
535
|
+
});
|
|
536
|
+
test("cosineSimilarity returns 0 for empty vectors", () => {
|
|
537
|
+
expect(cosineSimilarity([], [])).toBe(0);
|
|
538
|
+
});
|
|
539
|
+
test("cosineSimilarity returns 0 for zero vectors", () => {
|
|
540
|
+
const zero = [0, 0, 0, 0];
|
|
541
|
+
expect(cosineSimilarity(zero, zero)).toBe(0);
|
|
542
|
+
});
|
|
543
|
+
test("searchBlobVec handles dimension mismatch gracefully via cosineSimilarity", () => {
|
|
544
|
+
// When stored embeddings have 4 dims but query has 8 dims,
|
|
545
|
+
// the JS fallback calls cosineSimilarity which returns 0.
|
|
546
|
+
// Verify this end-to-end via searchVec.
|
|
547
|
+
const dbPath = tmpDbPath("dim-mismatch");
|
|
548
|
+
const dim = 4;
|
|
549
|
+
const db = openDatabase(dbPath, { embeddingDim: dim });
|
|
550
|
+
try {
|
|
551
|
+
const id = insertTestEntry(db, "small-emb", {
|
|
552
|
+
description: "Entry with small embedding",
|
|
553
|
+
stashDir: "/test/stash",
|
|
554
|
+
});
|
|
555
|
+
rebuildFts(db);
|
|
556
|
+
// Store a 4-dim embedding
|
|
557
|
+
upsertEmbedding(db, id, [1, 0, 0, 0]);
|
|
558
|
+
setMeta(db, "hasEmbeddings", "1");
|
|
559
|
+
// Query with an 8-dim vector (dimension mismatch)
|
|
560
|
+
const queryVec8 = [1, 0, 0, 0, 0, 0, 0, 0];
|
|
561
|
+
const results = searchVec(db, queryVec8, 10);
|
|
562
|
+
// Results should still come back (no crash) but with max distance
|
|
563
|
+
// since cosineSimilarity returns 0 for mismatched dims.
|
|
564
|
+
// The JS fallback converts cosine=0 to L2 = sqrt(2*(1-0)) = sqrt(2).
|
|
565
|
+
if (results.length > 0) {
|
|
566
|
+
expect(results[0].distance).toBeCloseTo(Math.sqrt(2), 1);
|
|
567
|
+
}
|
|
568
|
+
// Either we get the result with max distance or empty (both acceptable)
|
|
569
|
+
expect(results.length).toBeLessThanOrEqual(1);
|
|
570
|
+
}
|
|
571
|
+
finally {
|
|
572
|
+
closeDatabase(db);
|
|
573
|
+
}
|
|
574
|
+
});
|
|
575
|
+
});
|
|
576
|
+
// ── End-to-end: L2-to-cosine conversion round-trip ─────────────────────────
|
|
577
|
+
describe("L2-to-cosine conversion round-trip", () => {
|
|
578
|
+
test("searchVec distance converts correctly back to cosine similarity", () => {
|
|
579
|
+
// The scoring pipeline in tryVecScores does:
|
|
580
|
+
// raw = 1 - (distance * distance) / 2
|
|
581
|
+
// And searchBlobVec does:
|
|
582
|
+
// distance = sqrt(2 * max(0, 1 - cosineSim))
|
|
583
|
+
// These should be inverse operations for normalized vectors.
|
|
584
|
+
const dbPath = tmpDbPath("roundtrip");
|
|
585
|
+
const dim = 4;
|
|
586
|
+
const db = openDatabase(dbPath, { embeddingDim: dim });
|
|
587
|
+
try {
|
|
588
|
+
const id = insertTestEntry(db, "roundtrip-entry", {
|
|
589
|
+
description: "Round-trip test entry",
|
|
590
|
+
stashDir: "/test/stash",
|
|
591
|
+
});
|
|
592
|
+
rebuildFts(db);
|
|
593
|
+
// Known cosine similarity: query=[1,0,0,0], stored=[0.6,0.8,0,0]
|
|
594
|
+
// cos(query, stored) = 0.6
|
|
595
|
+
upsertEmbedding(db, id, [0.6, 0.8, 0, 0]);
|
|
596
|
+
setMeta(db, "hasEmbeddings", "1");
|
|
597
|
+
const results = searchVec(db, [1, 0, 0, 0], 10);
|
|
598
|
+
expect(results.length).toBe(1);
|
|
599
|
+
const distance = results[0].distance;
|
|
600
|
+
// Convert back: cosine = 1 - distance^2 / 2
|
|
601
|
+
const recoveredCosine = 1 - (distance * distance) / 2;
|
|
602
|
+
expect(recoveredCosine).toBeCloseTo(0.6, 1);
|
|
603
|
+
}
|
|
604
|
+
finally {
|
|
605
|
+
closeDatabase(db);
|
|
606
|
+
}
|
|
607
|
+
});
|
|
608
|
+
});
|