nlm-memory 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/README.md +72 -34
  2. package/dist/cli/nlm.js +2 -1
  3. package/dist/cli/nlm.js.map +1 -1
  4. package/dist/http/app.js +2 -1
  5. package/dist/http/app.js.map +1 -1
  6. package/dist/mcp/server.js +20 -1
  7. package/dist/mcp/server.js.map +1 -1
  8. package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
  9. package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
  10. package/dist/ui/index.html +2 -2
  11. package/package.json +26 -1
  12. package/.agents/plugins/marketplace.json +0 -20
  13. package/.github/workflows/ci.yml +0 -30
  14. package/docs/methodology/re-derivation-rate.md +0 -112
  15. package/docs/methodology/useful-hit-rate.md +0 -79
  16. package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
  17. package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
  18. package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
  19. package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
  20. package/docs/plans/desktop-product.md +0 -69
  21. package/docs/plans/factstore-design.md +0 -236
  22. package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
  23. package/logs/CHANGELOG/CHANGELOG.md +0 -209
  24. package/migrations/000_initial_schema.sql +0 -174
  25. package/migrations/001_entity_type_rename.sql +0 -17
  26. package/migrations/002_adapter_state_extend.sql +0 -12
  27. package/migrations/003_session_embeddings.sql +0 -11
  28. package/migrations/004_facts.sql +0 -46
  29. package/migrations/005_sources.sql +0 -31
  30. package/migrations/006_providers.sql +0 -33
  31. package/migrations/007_source_tokens.sql +0 -17
  32. package/migrations/008_fts_rebuild.sql +0 -9
  33. package/migrations/009_session_embedding_chunks.sql +0 -46
  34. package/migrations/010_sources_opencode.sql +0 -30
  35. package/migrations/011_sources_hermes_agent.sql +0 -30
  36. package/migrations/012_sources_aider.sql +0 -30
  37. package/migrations/013_adapter_state_failure_count.sql +0 -12
  38. package/migrations/014_sources_cursor.sql +0 -30
  39. package/migrations/015_sources_windsurf.sql +0 -30
  40. package/plugin-hermes-agent/README.md +0 -49
  41. package/plugin-hermes-agent/__init__.py +0 -75
  42. package/plugin-hermes-agent/plugin.yaml +0 -15
  43. package/scripts/backfill-citations.mjs +0 -0
  44. package/scripts/build-codex-plugin.mjs +0 -61
  45. package/scripts/deepseek-probe.mjs +0 -67
  46. package/scripts/extract-triples.mjs +0 -207
  47. package/scripts/longmemeval/embedding-cache.ts +0 -77
  48. package/scripts/longmemeval/fetch-dataset.sh +0 -25
  49. package/scripts/longmemeval/run-harness.ts +0 -315
  50. package/scripts/longmemeval/scorer.ts +0 -99
  51. package/scripts/longmemeval/tsconfig.json +0 -9
  52. package/scripts/longmemeval/types.ts +0 -35
  53. package/scripts/nlm-daily-digest.py +0 -239
  54. package/scripts/nlm-daily-digest.sh +0 -28
  55. package/src/cli/classify-parity.ts +0 -257
  56. package/src/cli/launchctl-helpers.ts +0 -49
  57. package/src/cli/nlm.ts +0 -1078
  58. package/src/core/actions/actions-log.ts +0 -118
  59. package/src/core/actions/overlay.ts +0 -117
  60. package/src/core/adapters/aider.ts +0 -205
  61. package/src/core/adapters/claude-code.ts +0 -293
  62. package/src/core/adapters/common.ts +0 -54
  63. package/src/core/adapters/cursor.ts +0 -486
  64. package/src/core/adapters/from-source.ts +0 -67
  65. package/src/core/adapters/hermes-agent.ts +0 -240
  66. package/src/core/adapters/hermes.ts +0 -277
  67. package/src/core/adapters/jsonl-generic.ts +0 -208
  68. package/src/core/adapters/opencode.ts +0 -281
  69. package/src/core/adapters/pi.ts +0 -264
  70. package/src/core/adapters/windsurf.ts +0 -386
  71. package/src/core/classifier/prompt.ts +0 -200
  72. package/src/core/dataset/build-dataset.ts +0 -463
  73. package/src/core/embedding/chunk-body.ts +0 -76
  74. package/src/core/embedding/embed-backfill.ts +0 -210
  75. package/src/core/embedding/embed-normalize.ts +0 -135
  76. package/src/core/facts/backfill-facts.ts +0 -254
  77. package/src/core/facts/extract-facts.ts +0 -50
  78. package/src/core/hook/citation-detect.ts +0 -124
  79. package/src/core/hook/cite-memo.ts +0 -68
  80. package/src/core/hook/claude-settings.ts +0 -187
  81. package/src/core/hook/gate.ts +0 -25
  82. package/src/core/hook/hook-log.ts +0 -41
  83. package/src/core/hook/memo-sweep.ts +0 -164
  84. package/src/core/hook/memo.ts +0 -67
  85. package/src/core/hook/pointer-block.ts +0 -26
  86. package/src/core/hook/select.ts +0 -32
  87. package/src/core/hook/transcript.ts +0 -121
  88. package/src/core/ingest/ingest-session.ts +0 -111
  89. package/src/core/providers/provider-models.ts +0 -100
  90. package/src/core/providers/provider-registry.ts +0 -196
  91. package/src/core/recall/citation-log.ts +0 -108
  92. package/src/core/recall/filter.ts +0 -27
  93. package/src/core/recall/index.ts +0 -6
  94. package/src/core/recall/match-fields.ts +0 -40
  95. package/src/core/recall/query-log.ts +0 -149
  96. package/src/core/recall/query-shape.ts +0 -66
  97. package/src/core/recall/recall-service.ts +0 -320
  98. package/src/core/recall/recent-log.ts +0 -59
  99. package/src/core/recall/tokenize.ts +0 -18
  100. package/src/core/recall/useful-scan.ts +0 -336
  101. package/src/core/recall-facts/fact-query-log.ts +0 -150
  102. package/src/core/recall-facts/fact-recall-service.ts +0 -327
  103. package/src/core/scheduler/scan-once.ts +0 -142
  104. package/src/core/scheduler/scheduler.ts +0 -225
  105. package/src/core/sources/source-registry.ts +0 -278
  106. package/src/core/storage/db-restore.ts +0 -133
  107. package/src/core/storage/live-status.ts +0 -45
  108. package/src/core/storage/migrate.ts +0 -72
  109. package/src/core/storage/sqlite-fact-store.ts +0 -304
  110. package/src/core/storage/sqlite-session-store.ts +0 -810
  111. package/src/hook/hook-auth.ts +0 -18
  112. package/src/hook/prompt-recall-hook.ts +0 -180
  113. package/src/hook/session-end-hook.ts +0 -81
  114. package/src/hook/session-start-hook.ts +0 -168
  115. package/src/hook/stop-hook.ts +0 -239
  116. package/src/http/app.ts +0 -1215
  117. package/src/install/claude-code.ts +0 -128
  118. package/src/install/codex.ts +0 -367
  119. package/src/install/cursor.ts +0 -68
  120. package/src/install/hermes-agent.ts +0 -76
  121. package/src/install/hermes.ts +0 -78
  122. package/src/install/nlm-dir-perms.ts +0 -55
  123. package/src/install/ollama.ts +0 -284
  124. package/src/install/setup.ts +0 -489
  125. package/src/install/windsurf.ts +0 -68
  126. package/src/llm/classifier-box.ts +0 -64
  127. package/src/llm/deepseek-client.ts +0 -150
  128. package/src/llm/env-autoload.ts +0 -55
  129. package/src/llm/ollama-client.ts +0 -189
  130. package/src/mcp/server.ts +0 -534
  131. package/src/ports/fact-store.ts +0 -102
  132. package/src/ports/llm-client.ts +0 -52
  133. package/src/ports/logger.ts +0 -16
  134. package/src/ports/session-store.ts +0 -45
  135. package/src/ports/transcript-adapter.ts +0 -55
  136. package/src/shared/types.ts +0 -149
  137. package/src/ui/App.tsx +0 -58
  138. package/src/ui/components/PromoteOpenButton.tsx +0 -65
  139. package/src/ui/components/SessionDrawer.tsx +0 -199
  140. package/src/ui/components/SideNav.tsx +0 -162
  141. package/src/ui/components/Skeleton.tsx +0 -107
  142. package/src/ui/index.html +0 -13
  143. package/src/ui/lib/actions.ts +0 -30
  144. package/src/ui/lib/api.ts +0 -92
  145. package/src/ui/lib/dataset.ts +0 -141
  146. package/src/ui/lib/registries.ts +0 -155
  147. package/src/ui/lib/view-settings.ts +0 -41
  148. package/src/ui/main.tsx +0 -15
  149. package/src/ui/pages/Live.tsx +0 -229
  150. package/src/ui/pages/Pulse.tsx +0 -415
  151. package/src/ui/pages/Recall.tsx +0 -190
  152. package/src/ui/pages/River.tsx +0 -354
  153. package/src/ui/pages/Search.tsx +0 -386
  154. package/src/ui/pages/Stub.tsx +0 -9
  155. package/src/ui/pages/Thread.tsx +0 -473
  156. package/src/ui/pages/settings/Classifier.tsx +0 -227
  157. package/src/ui/pages/settings/Data.tsx +0 -190
  158. package/src/ui/pages/settings/Index.tsx +0 -65
  159. package/src/ui/pages/settings/Labels.tsx +0 -224
  160. package/src/ui/pages/settings/Providers.tsx +0 -305
  161. package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
  162. package/src/ui/pages/settings/Sources.tsx +0 -326
  163. package/src/ui/pages/settings/Views.tsx +0 -96
  164. package/src/ui/styles.css +0 -1890
  165. package/src/ui/tsconfig.json +0 -21
  166. package/src/ui/vite.config.ts +0 -19
  167. package/tests/fixtures/claude_code/short_session.jsonl +0 -2
  168. package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
  169. package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
  170. package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
  171. package/tests/fixtures/facts.ts +0 -17
  172. package/tests/fixtures/golden-corpus.ts +0 -85
  173. package/tests/fixtures/hermes/paired_request_dump.json +0 -24
  174. package/tests/fixtures/hermes/paired_session.json +0 -23
  175. package/tests/fixtures/hermes/request_dump.json +0 -28
  176. package/tests/fixtures/hermes/session_iso.json +0 -38
  177. package/tests/fixtures/hermes/session_unix.json +0 -38
  178. package/tests/fixtures/hermes/system_only.json +0 -18
  179. package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
  180. package/tests/fixtures/pi/short-successful.jsonl +0 -5
  181. package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
  182. package/tests/fixtures/sessions.ts +0 -22
  183. package/tests/integration/backfill-facts.test.ts +0 -362
  184. package/tests/integration/citation-explicit.test.ts +0 -111
  185. package/tests/integration/cite-event.test.ts +0 -169
  186. package/tests/integration/cite-memo.test.ts +0 -87
  187. package/tests/integration/db-restore.test.ts +0 -153
  188. package/tests/integration/embed-backfill.test.ts +0 -176
  189. package/tests/integration/fact-supersedence.test.ts +0 -313
  190. package/tests/integration/fts-index.test.ts +0 -60
  191. package/tests/integration/getbyids-sqlite.test.ts +0 -100
  192. package/tests/integration/hermes-agent-hooks.test.ts +0 -248
  193. package/tests/integration/hook-claude-settings.test.ts +0 -218
  194. package/tests/integration/hook-log.test.ts +0 -54
  195. package/tests/integration/hook-memo.test.ts +0 -68
  196. package/tests/integration/hook-pre-compact.test.ts +0 -105
  197. package/tests/integration/hook-subagent-start.test.ts +0 -102
  198. package/tests/integration/http.test.ts +0 -401
  199. package/tests/integration/keyword-search-fts.test.ts +0 -66
  200. package/tests/integration/mcp-recall-logging.test.ts +0 -88
  201. package/tests/integration/mcp.test.ts +0 -260
  202. package/tests/integration/memo-sweep.test.ts +0 -91
  203. package/tests/integration/prompt-recall-hook.test.ts +0 -88
  204. package/tests/integration/provider-registry.test.ts +0 -107
  205. package/tests/integration/recall-golden.test.ts +0 -59
  206. package/tests/integration/recall-sqlite.test.ts +0 -169
  207. package/tests/integration/scheduler.test.ts +0 -391
  208. package/tests/integration/session-end-hook.test.ts +0 -48
  209. package/tests/integration/session-start-hook.test.ts +0 -126
  210. package/tests/integration/source-registry.test.ts +0 -122
  211. package/tests/integration/sqlite-fact-store.test.ts +0 -346
  212. package/tests/integration/stop-hook.test.ts +0 -560
  213. package/tests/integration/wal-checkpoint.test.ts +0 -49
  214. package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
  215. package/tests/unit/core/adapters/aider.test.ts +0 -230
  216. package/tests/unit/core/adapters/claude-code.test.ts +0 -118
  217. package/tests/unit/core/adapters/cursor.test.ts +0 -485
  218. package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
  219. package/tests/unit/core/adapters/hermes.test.ts +0 -81
  220. package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
  221. package/tests/unit/core/adapters/opencode.test.ts +0 -354
  222. package/tests/unit/core/adapters/pi.test.ts +0 -110
  223. package/tests/unit/core/adapters/windsurf.test.ts +0 -416
  224. package/tests/unit/core/classifier/prompt.test.ts +0 -126
  225. package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
  226. package/tests/unit/core/facts/extract-facts.test.ts +0 -117
  227. package/tests/unit/core/filter.test.ts +0 -40
  228. package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
  229. package/tests/unit/core/hook/citation-detect.test.ts +0 -124
  230. package/tests/unit/core/hook/gate.test.ts +0 -29
  231. package/tests/unit/core/hook/pointer-block.test.ts +0 -22
  232. package/tests/unit/core/hook/select.test.ts +0 -66
  233. package/tests/unit/core/match-fields.test.ts +0 -39
  234. package/tests/unit/core/mcp-cite-session.test.ts +0 -51
  235. package/tests/unit/core/providers/provider-models.test.ts +0 -101
  236. package/tests/unit/core/query-shape.test.ts +0 -92
  237. package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
  238. package/tests/unit/core/recall-service.test.ts +0 -200
  239. package/tests/unit/core/storage/live-status.test.ts +0 -54
  240. package/tests/unit/core/tokenize.test.ts +0 -32
  241. package/tests/unit/core/useful-scan.test.ts +0 -537
  242. package/tests/unit/llm/embed.test.ts +0 -93
  243. package/tests/unit/llm/ollama-client.test.ts +0 -124
  244. package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
  245. package/tsconfig.json +0 -31
  246. package/tsconfig.test.json +0 -11
  247. package/vitest.config.ts +0 -22
@@ -1,87 +0,0 @@
1
- import { mkdtempSync, readdirSync, rmSync, writeFileSync } from "node:fs";
2
- import { tmpdir } from "node:os";
3
- import { join } from "node:path";
4
- import { afterEach, beforeEach, describe, expect, it } from "vitest";
5
- import {
6
- clearCited,
7
- loadCited,
8
- recordCited,
9
- } from "../../src/core/hook/cite-memo.js";
10
-
11
- describe("cite-memo", () => {
12
- let tmp: string;
13
-
14
- beforeEach(() => {
15
- tmp = mkdtempSync(join(tmpdir(), "nlm-cite-memo-"));
16
- process.env["NLM_HOOK_STATE_DIR"] = tmp;
17
- });
18
-
19
- afterEach(() => {
20
- delete process.env["NLM_HOOK_STATE_DIR"];
21
- rmSync(tmp, { recursive: true, force: true });
22
- });
23
-
24
- it("loadCited returns empty set when no memo file exists", () => {
25
- expect(loadCited("conv-x").size).toBe(0);
26
- });
27
-
28
- it("recordCited persists ids; loadCited returns them on next call", () => {
29
- recordCited("conv-x", ["cc_a", "cc_b"]);
30
- expect(loadCited("conv-x")).toEqual(new Set(["cc_a", "cc_b"]));
31
- });
32
-
33
- it("recordCited unions across calls (does not overwrite)", () => {
34
- recordCited("conv-x", ["cc_a"]);
35
- recordCited("conv-x", ["cc_b", "cc_a"]);
36
- expect(loadCited("conv-x")).toEqual(new Set(["cc_a", "cc_b"]));
37
- });
38
-
39
- it("recordCited with empty list is a no-op (no file created)", () => {
40
- recordCited("conv-empty", []);
41
- expect(readdirSync(tmp).filter((f) => f.startsWith("conv-empty"))).toEqual([]);
42
- });
43
-
44
- it("clearCited removes the file and returns true; second call returns false", () => {
45
- recordCited("conv-x", ["cc_a"]);
46
- expect(clearCited("conv-x")).toBe(true);
47
- expect(clearCited("conv-x")).toBe(false);
48
- expect(loadCited("conv-x").size).toBe(0);
49
- });
50
-
51
- it("uses .cited.json filename suffix — parallel to surfaced memo's .json", () => {
52
- recordCited("conv-x", ["cc_a"]);
53
- const files = readdirSync(tmp);
54
- expect(files).toContain("conv-x.cited.json");
55
- });
56
-
57
- it("treats corrupt JSON as empty without throwing", () => {
58
- writeFileSync(join(tmp, "conv-bad.cited.json"), "not json", "utf8");
59
- expect(loadCited("conv-bad").size).toBe(0);
60
- });
61
-
62
- it("treats non-array JSON as empty without throwing", () => {
63
- writeFileSync(
64
- join(tmp, "conv-obj.cited.json"),
65
- JSON.stringify({ cc_a: 1 }),
66
- "utf8",
67
- );
68
- expect(loadCited("conv-obj").size).toBe(0);
69
- });
70
-
71
- it("filters out non-string entries from the persisted array", () => {
72
- writeFileSync(
73
- join(tmp, "conv-mixed.cited.json"),
74
- JSON.stringify(["cc_a", 42, null, "cc_b"]),
75
- "utf8",
76
- );
77
- expect(loadCited("conv-mixed")).toEqual(new Set(["cc_a", "cc_b"]));
78
- });
79
-
80
- it("sanitizes unsafe conversation IDs so the path stays inside the state dir", () => {
81
- recordCited("../escape/attempt", ["cc_a"]);
82
- const files = readdirSync(tmp);
83
- // No file at ../escape/attempt should exist; conversion replaces unsafe chars.
84
- expect(files.some((f) => f.endsWith(".cited.json"))).toBe(true);
85
- expect(files).not.toContain("..");
86
- });
87
- });
@@ -1,153 +0,0 @@
1
- /**
2
- * Backup + restore integration. Real SQLite stores, VACUUM INTO snapshot,
3
- * candidate validation, staged restore, and boot-time promotion.
4
- */
5
-
6
- import { existsSync, mkdtempSync, readdirSync, rmSync, statSync, writeFileSync } from "node:fs";
7
- import { tmpdir } from "node:os";
8
- import { join, resolve } from "node:path";
9
- import { afterEach, beforeEach, describe, expect, it } from "vitest";
10
- import type Database from "better-sqlite3";
11
- import { SqliteSessionStore } from "../../src/core/storage/sqlite-session-store.js";
12
- import {
13
- PENDING_SUFFIX,
14
- applyPendingRestore,
15
- snapshotScratchPath,
16
- stageRestore,
17
- validateRestoreCandidate,
18
- vacuumSnapshot,
19
- } from "../../src/core/storage/db-restore.js";
20
-
21
- const MIGRATIONS_DIR = resolve(__dirname, "../../migrations");
22
-
23
- function insertMarkerSource(db: Database.Database, name: string, runtimeLabel: string): void {
24
- db.prepare(
25
- "INSERT INTO sources (kind, name, path_or_url, runtime_label, parse_config, enabled) " +
26
- "VALUES ('webhook', ?, NULL, ?, '{}', 1)",
27
- ).run(name, runtimeLabel);
28
- }
29
-
30
- describe("db-restore", () => {
31
- let tmp: string;
32
- let dbPath: string;
33
-
34
- beforeEach(() => {
35
- tmp = mkdtempSync(join(tmpdir(), "nlm-restore-"));
36
- dbPath = join(tmp, "canonical.sqlite");
37
- });
38
-
39
- afterEach(() => {
40
- rmSync(tmp, { recursive: true, force: true });
41
- });
42
-
43
- function freshStore(path: string): SqliteSessionStore {
44
- return new SqliteSessionStore({ dbPath: path, migrationsDir: MIGRATIONS_DIR });
45
- }
46
-
47
- it("vacuumSnapshot writes a valid standalone copy", () => {
48
- const store = freshStore(dbPath);
49
- const snap = snapshotScratchPath(dbPath);
50
- const bytes = vacuumSnapshot(store.rawDb(), snap);
51
- store.close();
52
-
53
- expect(bytes).toBeGreaterThan(0);
54
- expect(statSync(snap).size).toBe(bytes);
55
- expect(validateRestoreCandidate(snap).ok).toBe(true);
56
- rmSync(snap, { force: true });
57
- });
58
-
59
- it("validateRestoreCandidate rejects a non-SQLite file", () => {
60
- const junk = join(tmp, "junk.sqlite");
61
- writeFileSync(junk, "this is not a database");
62
- const result = validateRestoreCandidate(junk);
63
- expect(result.ok).toBe(false);
64
- expect(result.error).toBeTruthy();
65
- });
66
-
67
- it("validateRestoreCandidate rejects a SQLite file lacking nlm tables", () => {
68
- const bare = join(tmp, "bare.sqlite");
69
- const store = freshStore(bare);
70
- store.rawDb().prepare("DROP TABLE sessions").run();
71
- store.close();
72
- const result = validateRestoreCandidate(bare);
73
- expect(result.ok).toBe(false);
74
- expect(result.error).toMatch(/sessions/);
75
- });
76
-
77
- it("validateRestoreCandidate reports session count and schema version", () => {
78
- const store = freshStore(dbPath);
79
- const snap = snapshotScratchPath(dbPath);
80
- vacuumSnapshot(store.rawDb(), snap);
81
- store.close();
82
-
83
- const result = validateRestoreCandidate(snap);
84
- expect(result.ok).toBe(true);
85
- expect(result.sessions).toBe(0);
86
- expect(result.schemaVersion).toBeGreaterThanOrEqual(0);
87
- rmSync(snap, { force: true });
88
- });
89
-
90
- it("stageRestore parks a valid candidate at the pending path", () => {
91
- const store = freshStore(dbPath);
92
- const snap = snapshotScratchPath(dbPath);
93
- vacuumSnapshot(store.rawDb(), snap);
94
- store.close();
95
-
96
- const result = stageRestore(dbPath, snap);
97
- expect(result.ok).toBe(true);
98
- expect(existsSync(dbPath + PENDING_SUFFIX)).toBe(true);
99
- expect(existsSync(snap)).toBe(false); // candidate was renamed, not copied
100
- });
101
-
102
- it("stageRestore removes the candidate when validation fails", () => {
103
- const junk = join(tmp, "junk.sqlite");
104
- writeFileSync(junk, "not a database");
105
- const result = stageRestore(dbPath, junk);
106
- expect(result.ok).toBe(false);
107
- expect(existsSync(junk)).toBe(false);
108
- expect(existsSync(dbPath + PENDING_SUFFIX)).toBe(false);
109
- });
110
-
111
- it("applyPendingRestore is a no-op when nothing is staged", () => {
112
- freshStore(dbPath).close();
113
- const result = applyPendingRestore(dbPath);
114
- expect(result.applied).toBe(false);
115
- });
116
-
117
- it("applyPendingRestore promotes the staged DB and archives the current one", () => {
118
- // Current DB: one source seeded so we can tell the two stores apart.
119
- const current = freshStore(dbPath);
120
- insertMarkerSource(current.rawDb(), "marker-current", "current");
121
- current.close();
122
-
123
- // Staged DB: built elsewhere, carries a different marker source.
124
- const stagedSrc = join(tmp, "staged-src.sqlite");
125
- const staged = freshStore(stagedSrc);
126
- insertMarkerSource(staged.rawDb(), "marker-staged", "staged");
127
- const snap = snapshotScratchPath(dbPath);
128
- vacuumSnapshot(staged.rawDb(), snap);
129
- staged.close();
130
- expect(stageRestore(dbPath, snap).ok).toBe(true);
131
-
132
- const result = applyPendingRestore(dbPath);
133
- expect(result.applied).toBe(true);
134
- expect(result.archivedTo).toBeTruthy();
135
- expect(existsSync(result.archivedTo!)).toBe(true);
136
- expect(existsSync(dbPath + PENDING_SUFFIX)).toBe(false);
137
-
138
- // The promoted DB is the staged one.
139
- const reopened = freshStore(dbPath);
140
- const names = reopened
141
- .rawDb()
142
- .prepare<[], { name: string }>("SELECT name FROM sources")
143
- .all()
144
- .map((r) => r.name);
145
- reopened.close();
146
- expect(names).toContain("marker-staged");
147
- expect(names).not.toContain("marker-current");
148
-
149
- // Exactly one pre-restore archive was created.
150
- const archives = readdirSync(tmp).filter((f) => f.includes(".pre-restore-"));
151
- expect(archives.length).toBe(1);
152
- });
153
- });
@@ -1,176 +0,0 @@
1
- /**
2
- * Integration tests for embed-backfill + embed-normalize against a real
3
- * SQLite + sqlite-vec store. No network: a deterministic fake LLMClient
4
- * stands in for Ollama.
5
- */
6
-
7
- import { mkdtempSync, rmSync, existsSync } from "node:fs";
8
- import { tmpdir } from "node:os";
9
- import { join, resolve } from "node:path";
10
- import { afterEach, beforeEach, describe, expect, it } from "vitest";
11
- import Database from "better-sqlite3";
12
- import * as sqliteVec from "sqlite-vec";
13
- import { SqliteSessionStore } from "../../src/core/storage/sqlite-session-store.js";
14
- import { reembedCorpus } from "../../src/core/embedding/embed-backfill.js";
15
- import { normalizeEmbeddings } from "../../src/core/embedding/embed-normalize.js";
16
- import type { EmbedResult, LLMClient } from "../../src/ports/llm-client.js";
17
- import type { Session } from "../../src/shared/types.js";
18
- import { makeSession } from "../fixtures/sessions.js";
19
-
20
- const MIGRATIONS_DIR = resolve(__dirname, "../../migrations");
21
-
22
- function unitWithLeading(value: number): Float32Array {
23
- const v = new Float32Array(768);
24
- v[0] = value;
25
- let sum = 0;
26
- for (let i = 0; i < v.length; i++) sum += (v[i] ?? 0) ** 2;
27
- const n = Math.sqrt(sum) || 1;
28
- const out = new Float32Array(768);
29
- for (let i = 0; i < v.length; i++) out[i] = (v[i] ?? 0) / n;
30
- return out;
31
- }
32
-
33
- class DeterministicEmbedder implements LLMClient {
34
- calls = 0;
35
- async embed(): Promise<EmbedResult> {
36
- this.calls += 1;
37
- // Stable, distinct, unit-length vectors per call
38
- return { vector: unitWithLeading(this.calls), model: "fake" };
39
- }
40
- async classify(): Promise<never> {
41
- throw new Error("not used");
42
- }
43
- }
44
-
45
- const seed: ReadonlyArray<Session> = [
46
- makeSession({ id: "s_a", label: "Hono setup", body: "wired Hono routes" }),
47
- makeSession({ id: "s_b", label: "pgvector plan", body: "drafted pgvector swap" }),
48
- makeSession({ id: "s_c", label: "tx tax county", body: "ingested county directory" }),
49
- ];
50
-
51
- describe("reembedCorpus", () => {
52
- let tmp: string;
53
- let dbPath: string;
54
- let statePath: string;
55
-
56
- beforeEach(() => {
57
- tmp = mkdtempSync(join(tmpdir(), "nlm-emb-"));
58
- dbPath = join(tmp, "canonical.sqlite");
59
- statePath = join(tmp, "state.json");
60
- const store = new SqliteSessionStore({ dbPath, migrationsDir: MIGRATIONS_DIR });
61
- for (const s of seed) {
62
- store.insertSessionForTest(s);
63
- // seed each with a non-normalized vector so backfill has something to replace
64
- store.insertEmbeddingForTest(s.id, new Float32Array(768).fill(0.5));
65
- }
66
- store.close();
67
- });
68
-
69
- afterEach(() => rmSync(tmp, { recursive: true, force: true }));
70
-
71
- it("replaces every embedding and writes a state file", async () => {
72
- const embedder = new DeterministicEmbedder();
73
- const report = await reembedCorpus({ dbPath, embedder, statePath });
74
- expect(report.dbMissing).toBe(false);
75
- expect(report.total).toBe(3);
76
- expect(report.succeeded).toBe(3);
77
- expect(report.failed).toBe(0);
78
- expect(report.skippedAlreadyDone).toBe(0);
79
- expect(embedder.calls).toBe(3);
80
- expect(existsSync(statePath)).toBe(true);
81
- });
82
-
83
- it("is resumable — second run skips ids already in state", async () => {
84
- const embedder1 = new DeterministicEmbedder();
85
- await reembedCorpus({ dbPath, embedder: embedder1, statePath });
86
- const embedder2 = new DeterministicEmbedder();
87
- const report = await reembedCorpus({ dbPath, embedder: embedder2, statePath });
88
- expect(report.skippedAlreadyDone).toBe(3);
89
- expect(report.succeeded).toBe(0);
90
- expect(embedder2.calls).toBe(0);
91
- });
92
-
93
- it("respects --limit", async () => {
94
- const embedder = new DeterministicEmbedder();
95
- const report = await reembedCorpus({ dbPath, embedder, statePath, limit: 2 });
96
- expect(report.total).toBe(2);
97
- expect(report.succeeded).toBe(2);
98
- });
99
- });
100
-
101
- describe("normalizeEmbeddings", () => {
102
- let tmp: string;
103
- let dbPath: string;
104
-
105
- beforeEach(() => {
106
- tmp = mkdtempSync(join(tmpdir(), "nlm-norm-"));
107
- dbPath = join(tmp, "canonical.sqlite");
108
- const store = new SqliteSessionStore({ dbPath, migrationsDir: MIGRATIONS_DIR });
109
- store.insertSessionForTest(makeSession({ id: "raw" }));
110
- store.insertSessionForTest(makeSession({ id: "already" }));
111
- store.insertSessionForTest(makeSession({ id: "zero" }));
112
- store.close();
113
- // embed-normalize operates on the legacy session_embeddings table that
114
- // migration 003 still creates (left in place for rollback safety after
115
- // the chunk + max-pool migration). Seed it directly via raw SQL — the
116
- // session store's helpers now target session_embedding_chunks.
117
- const db = new Database(dbPath);
118
- sqliteVec.load(db);
119
- const ins = db.prepare(
120
- "INSERT INTO session_embeddings (session_id, embedding) VALUES (?, ?)",
121
- );
122
- const toBlob = (v: Float32Array): Buffer =>
123
- Buffer.from(v.buffer, v.byteOffset, v.byteLength);
124
- // raw: non-unit (||v|| = sqrt(768 * 0.25) ≈ 13.86)
125
- ins.run("raw", toBlob(new Float32Array(768).fill(0.5)));
126
- // already: unit (one component at 1.0)
127
- const unit = new Float32Array(768);
128
- unit[0] = 1;
129
- ins.run("already", toBlob(unit));
130
- // zero: zero vector
131
- ins.run("zero", toBlob(new Float32Array(768)));
132
- db.close();
133
- });
134
-
135
- afterEach(() => rmSync(tmp, { recursive: true, force: true }));
136
-
137
- it("rewrites only the non-unit non-zero row", () => {
138
- const report = normalizeEmbeddings({ dbPath });
139
- expect(report.total).toBe(3);
140
- expect(report.rewritten).toBe(1);
141
- expect(report.alreadyNormalized).toBe(1);
142
- expect(report.zeroVector).toBe(1);
143
- });
144
-
145
- it("dry-run reports the same counts without writing", () => {
146
- const beforeDb = new Database(dbPath);
147
- sqliteVec.load(beforeDb);
148
- const beforeBlob = beforeDb
149
- .prepare<[string], { embedding: Buffer }>(
150
- "SELECT embedding FROM session_embeddings WHERE session_id = ?",
151
- )
152
- .get("raw")!.embedding;
153
- beforeDb.close();
154
-
155
- const report = normalizeEmbeddings({ dbPath, dryRun: true });
156
- expect(report.rewritten).toBe(1);
157
- expect(report.dryRun).toBe(true);
158
-
159
- const afterDb = new Database(dbPath);
160
- sqliteVec.load(afterDb);
161
- const afterBlob = afterDb
162
- .prepare<[string], { embedding: Buffer }>(
163
- "SELECT embedding FROM session_embeddings WHERE session_id = ?",
164
- )
165
- .get("raw")!.embedding;
166
- afterDb.close();
167
- expect(afterBlob.equals(beforeBlob)).toBe(true);
168
- });
169
-
170
- it("is idempotent — second run rewrites nothing", () => {
171
- normalizeEmbeddings({ dbPath });
172
- const report = normalizeEmbeddings({ dbPath });
173
- expect(report.rewritten).toBe(0);
174
- expect(report.alreadyNormalized).toBe(2); // raw is now unit too
175
- });
176
- });