nlm-memory 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +72 -34
  2. package/dist/cli/nlm.js +223 -33
  3. package/dist/cli/nlm.js.map +1 -1
  4. package/dist/core/adapters/cursor.d.ts +45 -0
  5. package/dist/core/adapters/cursor.js +397 -0
  6. package/dist/core/adapters/cursor.js.map +1 -0
  7. package/dist/core/adapters/from-source.js +10 -0
  8. package/dist/core/adapters/from-source.js.map +1 -1
  9. package/dist/core/adapters/windsurf.d.ts +44 -0
  10. package/dist/core/adapters/windsurf.js +299 -0
  11. package/dist/core/adapters/windsurf.js.map +1 -0
  12. package/dist/core/hook/claude-settings.d.ts +12 -5
  13. package/dist/core/hook/claude-settings.js +21 -6
  14. package/dist/core/hook/claude-settings.js.map +1 -1
  15. package/dist/core/sources/source-registry.d.ts +1 -1
  16. package/dist/core/sources/source-registry.js +18 -0
  17. package/dist/core/sources/source-registry.js.map +1 -1
  18. package/dist/core/storage/sqlite-session-store.d.ts +2 -0
  19. package/dist/core/storage/sqlite-session-store.js +38 -2
  20. package/dist/core/storage/sqlite-session-store.js.map +1 -1
  21. package/dist/hook/hook-auth.d.ts +13 -0
  22. package/dist/hook/hook-auth.js +19 -0
  23. package/dist/hook/hook-auth.js.map +1 -0
  24. package/dist/hook/prompt-recall-hook.js +7 -1
  25. package/dist/hook/prompt-recall-hook.js.map +1 -1
  26. package/dist/hook/session-start-hook.js +4 -1
  27. package/dist/hook/session-start-hook.js.map +1 -1
  28. package/dist/hook/stop-hook.js +4 -1
  29. package/dist/hook/stop-hook.js.map +1 -1
  30. package/dist/http/app.d.ts +2 -0
  31. package/dist/http/app.js +76 -1
  32. package/dist/http/app.js.map +1 -1
  33. package/dist/install/claude-code.js +1 -1
  34. package/dist/install/claude-code.js.map +1 -1
  35. package/dist/install/cursor.d.ts +25 -0
  36. package/dist/install/cursor.js +43 -0
  37. package/dist/install/cursor.js.map +1 -0
  38. package/dist/install/nlm-dir-perms.d.ts +19 -0
  39. package/dist/install/nlm-dir-perms.js +43 -0
  40. package/dist/install/nlm-dir-perms.js.map +1 -0
  41. package/dist/install/ollama.d.ts +18 -1
  42. package/dist/install/ollama.js +62 -7
  43. package/dist/install/ollama.js.map +1 -1
  44. package/dist/install/setup.d.ts +4 -0
  45. package/dist/install/setup.js +141 -18
  46. package/dist/install/setup.js.map +1 -1
  47. package/dist/install/windsurf.d.ts +25 -0
  48. package/dist/install/windsurf.js +43 -0
  49. package/dist/install/windsurf.js.map +1 -0
  50. package/dist/mcp/server.js +20 -1
  51. package/dist/mcp/server.js.map +1 -1
  52. package/dist/shared/types.d.ts +4 -0
  53. package/dist/ui/assets/{index-BA6IpU8g.css → index-Beo8psd-.css} +1 -1
  54. package/dist/ui/assets/index-CSPTTeeM.js +69 -0
  55. package/dist/ui/index.html +2 -2
  56. package/package.json +26 -1
  57. package/plugin/scripts/prompt-recall-hook.mjs +55 -4
  58. package/plugin/scripts/stop-hook.mjs +57 -6
  59. package/.agents/plugins/marketplace.json +0 -20
  60. package/.github/workflows/ci.yml +0 -30
  61. package/dist/ui/assets/index-B_qIVV0k.js +0 -69
  62. package/docs/methodology/re-derivation-rate.md +0 -112
  63. package/docs/methodology/useful-hit-rate.md +0 -79
  64. package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
  65. package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
  66. package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
  67. package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
  68. package/docs/plans/desktop-product.md +0 -69
  69. package/docs/plans/factstore-design.md +0 -236
  70. package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1389
  71. package/logs/CHANGELOG/CHANGELOG.md +0 -337
  72. package/migrations/000_initial_schema.sql +0 -174
  73. package/migrations/001_entity_type_rename.sql +0 -17
  74. package/migrations/002_adapter_state_extend.sql +0 -12
  75. package/migrations/003_session_embeddings.sql +0 -11
  76. package/migrations/004_facts.sql +0 -46
  77. package/migrations/005_sources.sql +0 -31
  78. package/migrations/006_providers.sql +0 -33
  79. package/migrations/007_source_tokens.sql +0 -17
  80. package/migrations/008_fts_rebuild.sql +0 -9
  81. package/migrations/009_session_embedding_chunks.sql +0 -46
  82. package/migrations/010_sources_opencode.sql +0 -30
  83. package/migrations/011_sources_hermes_agent.sql +0 -30
  84. package/migrations/012_sources_aider.sql +0 -30
  85. package/migrations/013_adapter_state_failure_count.sql +0 -12
  86. package/plugin-hermes-agent/README.md +0 -49
  87. package/plugin-hermes-agent/__init__.py +0 -75
  88. package/plugin-hermes-agent/plugin.yaml +0 -15
  89. package/scripts/backfill-citations.mjs +0 -0
  90. package/scripts/build-codex-plugin.mjs +0 -61
  91. package/scripts/deepseek-probe.mjs +0 -67
  92. package/scripts/extract-triples.mjs +0 -207
  93. package/scripts/longmemeval/embedding-cache.ts +0 -77
  94. package/scripts/longmemeval/fetch-dataset.sh +0 -25
  95. package/scripts/longmemeval/run-harness.ts +0 -315
  96. package/scripts/longmemeval/scorer.ts +0 -99
  97. package/scripts/longmemeval/tsconfig.json +0 -9
  98. package/scripts/longmemeval/types.ts +0 -35
  99. package/scripts/nlm-daily-digest.py +0 -239
  100. package/scripts/nlm-daily-digest.sh +0 -28
  101. package/src/cli/classify-parity.ts +0 -257
  102. package/src/cli/launchctl-helpers.ts +0 -49
  103. package/src/cli/nlm.ts +0 -885
  104. package/src/core/actions/actions-log.ts +0 -118
  105. package/src/core/actions/overlay.ts +0 -117
  106. package/src/core/adapters/aider.ts +0 -205
  107. package/src/core/adapters/claude-code.ts +0 -293
  108. package/src/core/adapters/common.ts +0 -54
  109. package/src/core/adapters/from-source.ts +0 -57
  110. package/src/core/adapters/hermes-agent.ts +0 -240
  111. package/src/core/adapters/hermes.ts +0 -277
  112. package/src/core/adapters/jsonl-generic.ts +0 -208
  113. package/src/core/adapters/opencode.ts +0 -281
  114. package/src/core/adapters/pi.ts +0 -264
  115. package/src/core/classifier/prompt.ts +0 -200
  116. package/src/core/dataset/build-dataset.ts +0 -463
  117. package/src/core/embedding/chunk-body.ts +0 -76
  118. package/src/core/embedding/embed-backfill.ts +0 -210
  119. package/src/core/embedding/embed-normalize.ts +0 -135
  120. package/src/core/facts/backfill-facts.ts +0 -254
  121. package/src/core/facts/extract-facts.ts +0 -50
  122. package/src/core/hook/citation-detect.ts +0 -124
  123. package/src/core/hook/cite-memo.ts +0 -68
  124. package/src/core/hook/claude-settings.ts +0 -166
  125. package/src/core/hook/gate.ts +0 -25
  126. package/src/core/hook/hook-log.ts +0 -41
  127. package/src/core/hook/memo-sweep.ts +0 -164
  128. package/src/core/hook/memo.ts +0 -67
  129. package/src/core/hook/pointer-block.ts +0 -26
  130. package/src/core/hook/select.ts +0 -32
  131. package/src/core/hook/transcript.ts +0 -121
  132. package/src/core/ingest/ingest-session.ts +0 -111
  133. package/src/core/providers/provider-models.ts +0 -100
  134. package/src/core/providers/provider-registry.ts +0 -196
  135. package/src/core/recall/citation-log.ts +0 -108
  136. package/src/core/recall/filter.ts +0 -27
  137. package/src/core/recall/index.ts +0 -6
  138. package/src/core/recall/match-fields.ts +0 -40
  139. package/src/core/recall/query-log.ts +0 -149
  140. package/src/core/recall/query-shape.ts +0 -66
  141. package/src/core/recall/recall-service.ts +0 -320
  142. package/src/core/recall/recent-log.ts +0 -59
  143. package/src/core/recall/tokenize.ts +0 -18
  144. package/src/core/recall/useful-scan.ts +0 -336
  145. package/src/core/recall-facts/fact-query-log.ts +0 -150
  146. package/src/core/recall-facts/fact-recall-service.ts +0 -327
  147. package/src/core/scheduler/scan-once.ts +0 -142
  148. package/src/core/scheduler/scheduler.ts +0 -225
  149. package/src/core/sources/source-registry.ts +0 -260
  150. package/src/core/storage/db-restore.ts +0 -133
  151. package/src/core/storage/live-status.ts +0 -45
  152. package/src/core/storage/migrate.ts +0 -72
  153. package/src/core/storage/sqlite-fact-store.ts +0 -304
  154. package/src/core/storage/sqlite-session-store.ts +0 -765
  155. package/src/hook/prompt-recall-hook.ts +0 -174
  156. package/src/hook/session-end-hook.ts +0 -81
  157. package/src/hook/session-start-hook.ts +0 -165
  158. package/src/hook/stop-hook.ts +0 -236
  159. package/src/http/app.ts +0 -1137
  160. package/src/install/claude-code.ts +0 -128
  161. package/src/install/codex.ts +0 -367
  162. package/src/install/hermes-agent.ts +0 -76
  163. package/src/install/hermes.ts +0 -78
  164. package/src/install/ollama.ts +0 -211
  165. package/src/install/setup.ts +0 -368
  166. package/src/llm/classifier-box.ts +0 -64
  167. package/src/llm/deepseek-client.ts +0 -150
  168. package/src/llm/env-autoload.ts +0 -55
  169. package/src/llm/ollama-client.ts +0 -189
  170. package/src/mcp/server.ts +0 -534
  171. package/src/ports/fact-store.ts +0 -102
  172. package/src/ports/llm-client.ts +0 -52
  173. package/src/ports/logger.ts +0 -16
  174. package/src/ports/session-store.ts +0 -45
  175. package/src/ports/transcript-adapter.ts +0 -55
  176. package/src/shared/types.ts +0 -145
  177. package/src/ui/App.tsx +0 -58
  178. package/src/ui/components/PromoteOpenButton.tsx +0 -65
  179. package/src/ui/components/SessionDrawer.tsx +0 -136
  180. package/src/ui/components/SideNav.tsx +0 -162
  181. package/src/ui/components/Skeleton.tsx +0 -107
  182. package/src/ui/index.html +0 -13
  183. package/src/ui/lib/actions.ts +0 -30
  184. package/src/ui/lib/api.ts +0 -92
  185. package/src/ui/lib/dataset.ts +0 -141
  186. package/src/ui/lib/registries.ts +0 -155
  187. package/src/ui/lib/view-settings.ts +0 -41
  188. package/src/ui/main.tsx +0 -15
  189. package/src/ui/pages/Live.tsx +0 -229
  190. package/src/ui/pages/Pulse.tsx +0 -415
  191. package/src/ui/pages/Recall.tsx +0 -190
  192. package/src/ui/pages/River.tsx +0 -308
  193. package/src/ui/pages/Search.tsx +0 -93
  194. package/src/ui/pages/Stub.tsx +0 -9
  195. package/src/ui/pages/Thread.tsx +0 -262
  196. package/src/ui/pages/settings/Classifier.tsx +0 -227
  197. package/src/ui/pages/settings/Data.tsx +0 -190
  198. package/src/ui/pages/settings/Index.tsx +0 -65
  199. package/src/ui/pages/settings/Labels.tsx +0 -224
  200. package/src/ui/pages/settings/Providers.tsx +0 -305
  201. package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
  202. package/src/ui/pages/settings/Sources.tsx +0 -326
  203. package/src/ui/pages/settings/Views.tsx +0 -96
  204. package/src/ui/styles.css +0 -1766
  205. package/src/ui/tsconfig.json +0 -21
  206. package/src/ui/vite.config.ts +0 -19
  207. package/tests/fixtures/claude_code/short_session.jsonl +0 -2
  208. package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
  209. package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
  210. package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
  211. package/tests/fixtures/facts.ts +0 -17
  212. package/tests/fixtures/golden-corpus.ts +0 -85
  213. package/tests/fixtures/hermes/paired_request_dump.json +0 -24
  214. package/tests/fixtures/hermes/paired_session.json +0 -23
  215. package/tests/fixtures/hermes/request_dump.json +0 -28
  216. package/tests/fixtures/hermes/session_iso.json +0 -38
  217. package/tests/fixtures/hermes/session_unix.json +0 -38
  218. package/tests/fixtures/hermes/system_only.json +0 -18
  219. package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
  220. package/tests/fixtures/pi/short-successful.jsonl +0 -5
  221. package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
  222. package/tests/fixtures/sessions.ts +0 -22
  223. package/tests/integration/backfill-facts.test.ts +0 -362
  224. package/tests/integration/citation-explicit.test.ts +0 -111
  225. package/tests/integration/cite-event.test.ts +0 -169
  226. package/tests/integration/cite-memo.test.ts +0 -87
  227. package/tests/integration/db-restore.test.ts +0 -153
  228. package/tests/integration/embed-backfill.test.ts +0 -176
  229. package/tests/integration/fact-supersedence.test.ts +0 -313
  230. package/tests/integration/fts-index.test.ts +0 -60
  231. package/tests/integration/getbyids-sqlite.test.ts +0 -60
  232. package/tests/integration/hermes-agent-hooks.test.ts +0 -248
  233. package/tests/integration/hook-claude-settings.test.ts +0 -205
  234. package/tests/integration/hook-log.test.ts +0 -54
  235. package/tests/integration/hook-memo.test.ts +0 -68
  236. package/tests/integration/hook-pre-compact.test.ts +0 -105
  237. package/tests/integration/hook-subagent-start.test.ts +0 -102
  238. package/tests/integration/http.test.ts +0 -401
  239. package/tests/integration/keyword-search-fts.test.ts +0 -66
  240. package/tests/integration/mcp-recall-logging.test.ts +0 -88
  241. package/tests/integration/mcp.test.ts +0 -248
  242. package/tests/integration/memo-sweep.test.ts +0 -91
  243. package/tests/integration/prompt-recall-hook.test.ts +0 -88
  244. package/tests/integration/provider-registry.test.ts +0 -107
  245. package/tests/integration/recall-golden.test.ts +0 -59
  246. package/tests/integration/recall-sqlite.test.ts +0 -169
  247. package/tests/integration/scheduler.test.ts +0 -391
  248. package/tests/integration/session-end-hook.test.ts +0 -48
  249. package/tests/integration/session-start-hook.test.ts +0 -126
  250. package/tests/integration/source-registry.test.ts +0 -120
  251. package/tests/integration/sqlite-fact-store.test.ts +0 -346
  252. package/tests/integration/stop-hook.test.ts +0 -560
  253. package/tests/integration/wal-checkpoint.test.ts +0 -49
  254. package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
  255. package/tests/unit/core/adapters/aider.test.ts +0 -230
  256. package/tests/unit/core/adapters/claude-code.test.ts +0 -118
  257. package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
  258. package/tests/unit/core/adapters/hermes.test.ts +0 -81
  259. package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
  260. package/tests/unit/core/adapters/opencode.test.ts +0 -354
  261. package/tests/unit/core/adapters/pi.test.ts +0 -110
  262. package/tests/unit/core/classifier/prompt.test.ts +0 -126
  263. package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
  264. package/tests/unit/core/facts/extract-facts.test.ts +0 -117
  265. package/tests/unit/core/filter.test.ts +0 -40
  266. package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
  267. package/tests/unit/core/hook/citation-detect.test.ts +0 -124
  268. package/tests/unit/core/hook/gate.test.ts +0 -29
  269. package/tests/unit/core/hook/pointer-block.test.ts +0 -22
  270. package/tests/unit/core/hook/select.test.ts +0 -66
  271. package/tests/unit/core/match-fields.test.ts +0 -39
  272. package/tests/unit/core/mcp-cite-session.test.ts +0 -51
  273. package/tests/unit/core/providers/provider-models.test.ts +0 -101
  274. package/tests/unit/core/query-shape.test.ts +0 -92
  275. package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
  276. package/tests/unit/core/recall-service.test.ts +0 -200
  277. package/tests/unit/core/storage/live-status.test.ts +0 -54
  278. package/tests/unit/core/tokenize.test.ts +0 -32
  279. package/tests/unit/core/useful-scan.test.ts +0 -537
  280. package/tests/unit/llm/embed.test.ts +0 -93
  281. package/tests/unit/llm/ollama-client.test.ts +0 -124
  282. package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
  283. package/tsconfig.json +0 -31
  284. package/tsconfig.test.json +0 -11
  285. package/vitest.config.ts +0 -22
@@ -1,207 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Extract (query, surfaced_id, surfaced_body, label, weight, source) triples
4
- * from the NLM telemetry logs for reranker training data.
5
- *
6
- * Data sources:
7
- * ~/.nlm/hook-log.jsonl — one row per UserPromptSubmit/SessionStart fire,
8
- * fields: ts, conversationId, promptPreview, wouldInject[]
9
- * ~/.nlm/citation-log.jsonl — one row per detected citation,
10
- * fields: ts, conversation_id, cited_id, kind
11
- * ~/.nlm/canonical.sqlite — sessions table for surfaced_body lookup
12
- *
13
- * Algorithm:
14
- * 1. Index citations by (conversationId, citedId, kind).
15
- * 2. Identify "gold conversations": conversations that produced at least one
16
- * tool_use citation. These are the only conversations with confirmed
17
- * positive signal — surfaced-but-not-cited sessions in them are genuine
18
- * hard negatives.
19
- * 3. For each hook-log entry with wouldInject.length > 0:
20
- * - If the conversation is a gold conversation:
21
- * * tool_use cited sessions → weight 1.0, source "tool_use" (gold positive)
22
- * * NOT cited sessions → weight 0.0, source "hard_negative"
23
- * - Prose-only conversations are skipped (signal too noisy).
24
- * 4. Fetch surfaced_body from SQLite for each (query, surfaced_id) pair.
25
- * 5. Write JSONL to --output path (or stdout).
26
- *
27
- * Dedup: one triple per (query, surfaced_id, source) key — duplicate hook
28
- * fires for the same (conversationId, id) pair are collapsed.
29
- *
30
- * Usage:
31
- * node scripts/extract-triples.mjs # stdout, last 30d
32
- * node scripts/extract-triples.mjs --days 7 # last 7 days
33
- * node scripts/extract-triples.mjs --output triples.jsonl
34
- * node scripts/extract-triples.mjs --stats # summary only, no output
35
- */
36
-
37
- import { createReadStream, existsSync, writeFileSync } from "node:fs";
38
- import { createInterface } from "node:readline";
39
- import { homedir } from "node:os";
40
- import { join } from "node:path";
41
- import { createWriteStream } from "node:fs";
42
- import Database from "better-sqlite3";
43
-
44
- // ── CLI args ─────────────────────────────────────────────────────────────────
45
-
46
- const args = process.argv.slice(2);
47
- const days = (() => {
48
- const idx = args.indexOf("--days");
49
- return idx !== -1 ? Number.parseInt(args[idx + 1], 10) || 30 : 30;
50
- })();
51
- const outputPath = (() => {
52
- const idx = args.indexOf("--output");
53
- return idx !== -1 ? args[idx + 1] : null;
54
- })();
55
- const statsOnly = args.includes("--stats");
56
-
57
- const hookLogPath = process.env["NLM_HOOK_LOG"] ?? join(homedir(), ".nlm", "hook-log.jsonl");
58
- const citationLogPath = process.env["NLM_CITATION_LOG"] ?? join(homedir(), ".nlm", "citation-log.jsonl");
59
- const dbPath = process.env["NLM_DB_PATH"] ?? join(homedir(), ".nlm", "canonical.sqlite");
60
-
61
- const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
62
-
63
- // ── Helpers ───────────────────────────────────────────────────────────────────
64
-
65
- async function readJsonl(path) {
66
- if (!existsSync(path)) return [];
67
- const rows = [];
68
- const rl = createInterface({ input: createReadStream(path), crlfDelay: Infinity });
69
- for await (const line of rl) {
70
- const trimmed = line.trim();
71
- if (!trimmed) continue;
72
- try {
73
- rows.push(JSON.parse(trimmed));
74
- } catch {
75
- // skip malformed lines
76
- }
77
- }
78
- return rows;
79
- }
80
-
81
- function tsMs(entry, field = "ts") {
82
- const v = entry[field];
83
- if (typeof v !== "string") return 0;
84
- return Date.parse(v);
85
- }
86
-
87
- // ── Step 1: Load citations ────────────────────────────────────────────────────
88
-
89
- const citationRows = await readJsonl(citationLogPath);
90
-
91
- // Map: conversationId → Set of tool_use cited IDs
92
- const toolUseCitations = new Map();
93
- // Set of gold conversations (those with ≥1 tool_use citation)
94
- const goldConversations = new Set();
95
-
96
- for (const row of citationRows) {
97
- if (row.kind !== "tool_use") continue;
98
- const convId = row.conversation_id;
99
- const citedId = row.cited_id;
100
- if (!convId || !citedId) continue;
101
- if (!toolUseCitations.has(convId)) toolUseCitations.set(convId, new Set());
102
- toolUseCitations.get(convId).add(citedId);
103
- goldConversations.add(convId);
104
- }
105
-
106
- // ── Step 2: Load hook-log entries in window ───────────────────────────────────
107
-
108
- const hookRows = await readJsonl(hookLogPath);
109
-
110
- // Collect: for each (query, surfaced_id), record (conversationId, weight, source)
111
- // Dedup key: `${conversationId}::${query}::${surfaced_id}::${source}`
112
- const seen = new Set();
113
- const rawTriples = [];
114
-
115
- for (const row of hookRows) {
116
- // Skip stop-hook entries
117
- if (typeof row.kind === "string") continue;
118
- const ts = tsMs(row);
119
- if (!ts || ts < cutoff) continue;
120
- const conversationId = row.conversationId;
121
- const query = row.promptPreview;
122
- const wouldInject = Array.isArray(row.wouldInject) ? row.wouldInject : [];
123
- if (!conversationId || !query || wouldInject.length === 0) continue;
124
- // Only process gold conversations
125
- if (!goldConversations.has(conversationId)) continue;
126
-
127
- const cited = toolUseCitations.get(conversationId) ?? new Set();
128
-
129
- for (const surfacedId of wouldInject) {
130
- if (typeof surfacedId !== "string") continue;
131
- const isCited = cited.has(surfacedId);
132
- const source = isCited ? "tool_use" : "hard_negative";
133
- const weight = isCited ? 1.0 : 0.0;
134
- const key = `${conversationId}::${query}::${surfacedId}::${source}`;
135
- if (seen.has(key)) continue;
136
- seen.add(key);
137
- rawTriples.push({ query, surfaced_id: surfacedId, weight, source });
138
- }
139
- }
140
-
141
- if (rawTriples.length === 0) {
142
- console.error(
143
- `extract-triples: no triples found. ` +
144
- `Checked ${goldConversations.size} gold conversations in the last ${days}d. ` +
145
- `Run nlm useful-scan and ensure tool_use citations exist in citation-log.jsonl.`,
146
- );
147
- process.exit(0);
148
- }
149
-
150
- // ── Step 3: Fetch surfaced_body from SQLite ────────────────────────────────────
151
-
152
- const uniqueIds = [...new Set(rawTriples.map((t) => t.surfaced_id))];
153
-
154
- let bodyById = new Map();
155
- if (existsSync(dbPath)) {
156
- try {
157
- const db = new Database(dbPath, { readonly: true });
158
- const placeholders = uniqueIds.map(() => "?").join(",");
159
- const rows = db.prepare(
160
- `SELECT id, body FROM sessions WHERE id IN (${placeholders})`,
161
- ).all(...uniqueIds);
162
- for (const row of rows) {
163
- bodyById.set(row.id, typeof row.body === "string" ? row.body : "");
164
- }
165
- db.close();
166
- } catch (err) {
167
- console.error(`extract-triples: sqlite lookup failed — ${err.message}. Bodies will be empty.`);
168
- }
169
- } else {
170
- console.error(`extract-triples: db not found at ${dbPath}. Bodies will be empty.`);
171
- }
172
-
173
- // ── Step 4: Assemble final triples ────────────────────────────────────────────
174
-
175
- const triples = rawTriples.map((t) => ({
176
- query: t.query,
177
- surfaced_id: t.surfaced_id,
178
- surfaced_body: bodyById.get(t.surfaced_id) ?? "",
179
- label: t.weight === 1.0 ? "positive" : "negative",
180
- weight: t.weight,
181
- source: t.source,
182
- }));
183
-
184
- // ── Stats ─────────────────────────────────────────────────────────────────────
185
-
186
- const positives = triples.filter((t) => t.weight === 1.0).length;
187
- const negatives = triples.filter((t) => t.weight === 0.0).length;
188
- const withBody = triples.filter((t) => t.surfaced_body.length > 0).length;
189
-
190
- console.error(
191
- `extract-triples: ${triples.length} triples ` +
192
- `(${positives} positive, ${negatives} hard-negative, ${withBody}/${triples.length} with body) ` +
193
- `from ${goldConversations.size} gold conversations over last ${days}d`,
194
- );
195
-
196
- if (statsOnly) process.exit(0);
197
-
198
- // ── Step 5: Write output ──────────────────────────────────────────────────────
199
-
200
- const lines = triples.map((t) => JSON.stringify(t)).join("\n") + "\n";
201
-
202
- if (outputPath) {
203
- writeFileSync(outputPath, lines, "utf8");
204
- console.error(`extract-triples: wrote ${triples.length} rows to ${outputPath}`);
205
- } else {
206
- process.stdout.write(lines);
207
- }
@@ -1,77 +0,0 @@
1
- /**
2
- * SHA256-keyed on-disk embedding cache. The LongMemEval-S haystack has
3
- * ~24K session bodies (~19K unique); embedding them via local Ollama takes
4
- * ~30 min the first time. Reruns must be instant — calibrating retrieval
5
- * parameters means dozens of re-evaluations, and re-embedding each time
6
- * would burn hours of wall clock for no signal.
7
- *
8
- * Backed by a small SQLite at $LONGMEMEVAL_CACHE_DIR/embeddings.sqlite.
9
- * Key = sha256(kind + ":" + text); value = Float32Array as BLOB.
10
- */
11
-
12
- import Database from "better-sqlite3";
13
- import type { Database as DB } from "better-sqlite3";
14
- import { createHash } from "node:crypto";
15
- import { mkdirSync } from "node:fs";
16
- import { dirname } from "node:path";
17
- import type { EmbeddingKind, LLMClient } from "../../src/ports/llm-client.js";
18
-
19
- const CREATE_SQL =
20
- "CREATE TABLE IF NOT EXISTS embeddings (key TEXT PRIMARY KEY, vector BLOB NOT NULL)";
21
-
22
- export interface EmbeddingCacheOptions {
23
- readonly dbPath: string;
24
- readonly llm: LLMClient;
25
- }
26
-
27
- export class EmbeddingCache {
28
- private readonly db: DB;
29
- private readonly llm: LLMClient;
30
- private readonly getStmt: ReturnType<DB["prepare"]>;
31
- private readonly putStmt: ReturnType<DB["prepare"]>;
32
-
33
- constructor(opts: EmbeddingCacheOptions) {
34
- mkdirSync(dirname(opts.dbPath), { recursive: true });
35
- this.db = new Database(opts.dbPath);
36
- this.db.pragma("journal_mode = WAL");
37
- this.db.prepare(CREATE_SQL).run();
38
- this.getStmt = this.db.prepare(
39
- "SELECT vector FROM embeddings WHERE key = @key",
40
- );
41
- this.putStmt = this.db.prepare(
42
- "INSERT OR REPLACE INTO embeddings (key, vector) VALUES (@key, @vector)",
43
- );
44
- this.llm = opts.llm;
45
- }
46
-
47
- async embed(text: string, kind: EmbeddingKind): Promise<Float32Array> {
48
- const key = createHash("sha256").update(`${kind}:${text}`).digest("hex");
49
- const row = this.getStmt.get({ key }) as { vector: Buffer } | undefined;
50
- if (row) {
51
- return new Float32Array(
52
- row.vector.buffer,
53
- row.vector.byteOffset,
54
- row.vector.byteLength / Float32Array.BYTES_PER_ELEMENT,
55
- );
56
- }
57
- const result = await this.llm.embed(text, kind);
58
- const blob = Buffer.from(
59
- result.vector.buffer,
60
- result.vector.byteOffset,
61
- result.vector.byteLength,
62
- );
63
- this.putStmt.run({ key, vector: blob });
64
- return result.vector;
65
- }
66
-
67
- size(): number {
68
- const row = this.db.prepare("SELECT COUNT(*) AS n FROM embeddings").get() as {
69
- n: number;
70
- };
71
- return row.n;
72
- }
73
-
74
- close(): void {
75
- this.db.close();
76
- }
77
- }
@@ -1,25 +0,0 @@
1
- #!/usr/bin/env bash
2
- # Fetch the LongMemEval-S (small) dataset from HuggingFace into a local cache.
3
- # Idempotent — skips download if the target file already exists with non-zero size.
4
- #
5
- # Source: https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned
6
- # Variant: longmemeval_s_cleaned.json — 500 questions × ~40 haystack sessions.
7
-
8
- set -euo pipefail
9
-
10
- CACHE_DIR="${LONGMEMEVAL_CACHE_DIR:-$HOME/.cache/longmemeval}"
11
- VARIANT="${LONGMEMEVAL_VARIANT:-longmemeval_s_cleaned.json}"
12
- URL="https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/${VARIANT}"
13
- TARGET="${CACHE_DIR}/${VARIANT}"
14
-
15
- mkdir -p "$CACHE_DIR"
16
-
17
- if [[ -s "$TARGET" ]]; then
18
- echo "longmemeval-fetch: ${TARGET} already present ($(wc -c <"$TARGET") bytes) — skipping."
19
- exit 0
20
- fi
21
-
22
- echo "longmemeval-fetch: downloading ${URL}"
23
- curl -fsSL --retry 3 -o "${TARGET}.tmp" "$URL"
24
- mv "${TARGET}.tmp" "$TARGET"
25
- echo "longmemeval-fetch: wrote ${TARGET} ($(wc -c <"$TARGET") bytes)"
@@ -1,315 +0,0 @@
1
- /**
2
- * LongMemEval-S baseline harness for NLM.
3
- *
4
- * Body-only ingest (skip classifier) + local Ollama nomic-embed-text. For
5
- * each evaluation instance: spin up an in-memory NLM corpus loaded with
6
- * the haystack sessions, query in each retrieval mode (keyword / semantic
7
- * / hybrid+RRF), score R@5 plus the session-body companion metric.
8
- *
9
- * Pure body-only retrieval — this measures the retrieval *algorithm*, not
10
- * the full classifier-in-loop NLM pipeline. The number is comparable to
11
- * agentmemory's published R@5 because both bench bodies-only.
12
- *
13
- * Usage:
14
- * node dist/scripts/longmemeval/run-harness.js \
15
- * --variant longmemeval_s_cleaned.json \
16
- * --modes keyword,semantic,hybrid \
17
- * --limit 500 \
18
- * --report-dir reports/longmemeval
19
- *
20
- * Re-runs are fast: embeddings cache in ~/.cache/longmemeval/embeddings.sqlite
21
- * keyed by sha256(kind + text). First run = ~30 min embedding; subsequent = seconds.
22
- */
23
-
24
- import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
25
- import { homedir, tmpdir } from "node:os";
26
- import { dirname, join, resolve } from "node:path";
27
- import { fileURLToPath } from "node:url";
28
-
29
- const __filename = fileURLToPath(import.meta.url);
30
- const __dirname = dirname(__filename);
31
- import { RecallService } from "../../src/core/recall/recall-service.js";
32
- import { SqliteSessionStore } from "../../src/core/storage/sqlite-session-store.js";
33
- import { OllamaClient } from "../../src/llm/ollama-client.js";
34
- import type {
35
- EmbedResult,
36
- EmbeddingKind,
37
- LLMClient,
38
- } from "../../src/ports/llm-client.js";
39
- import type { RecallMode } from "../../src/shared/types.js";
40
- import { EmbeddingCache } from "./embedding-cache.js";
41
- import { scoreOne, aggregate, type SingleScore } from "./scorer.js";
42
- import { turnsToBody, type LongMemEvalInstance } from "./types.js";
43
- import { chunkSessionText } from "../../src/core/embedding/chunk-body.js";
44
-
45
- interface Args {
46
- readonly datasetPath: string;
47
- readonly modes: ReadonlyArray<RecallMode>;
48
- readonly limit: number;
49
- readonly k: number;
50
- readonly reportDir: string;
51
- readonly cacheDir: string;
52
- readonly migrationsDir: string;
53
- }
54
-
55
- function parseArgs(argv: ReadonlyArray<string>): Args {
56
- const get = (flag: string, fallback?: string): string => {
57
- const i = argv.indexOf(flag);
58
- if (i < 0) {
59
- if (fallback === undefined) throw new Error(`missing required flag: ${flag}`);
60
- return fallback;
61
- }
62
- return argv[i + 1] ?? "";
63
- };
64
- const cacheDir =
65
- process.env["LONGMEMEVAL_CACHE_DIR"] ?? join(homedir(), ".cache", "longmemeval");
66
- const variant = get("--variant", "longmemeval_s_cleaned.json");
67
- const datasetPath = join(cacheDir, variant);
68
- const modes = get("--modes", "keyword,semantic,hybrid")
69
- .split(",")
70
- .map((m) => m.trim()) as RecallMode[];
71
- const limit = Number.parseInt(get("--limit", "500"), 10);
72
- const k = Number.parseInt(get("--k", "5"), 10);
73
- const reportDir = get("--report-dir", resolve("reports/longmemeval"));
74
- const migrationsDir = resolve(__dirname, "../../migrations");
75
- return { datasetPath, modes, limit, k, reportDir, cacheDir, migrationsDir };
76
- }
77
-
78
- /** LLMClient wrapper: routes embed() through the on-disk cache. */
79
- class CachingEmbedder implements LLMClient {
80
- constructor(private readonly cache: EmbeddingCache) {}
81
- async embed(text: string, kind: EmbeddingKind): Promise<EmbedResult> {
82
- const vector = await this.cache.embed(text, kind);
83
- return { vector, model: "nomic-embed-text@cached" };
84
- }
85
- async classify(): Promise<never> {
86
- throw new Error("classify not used in LongMemEval body-only harness");
87
- }
88
- }
89
-
90
- interface InstanceResult {
91
- readonly question_id: string;
92
- readonly question_type: string;
93
- readonly by_mode: Record<string, SingleScore & { returnedIds: string[] }>;
94
- readonly embed_failures: number;
95
- }
96
-
97
- async function runInstance(
98
- instance: LongMemEvalInstance,
99
- args: Args,
100
- cache: EmbeddingCache,
101
- embedder: LLMClient,
102
- ): Promise<InstanceResult> {
103
- const needsEmbeddings = args.modes.some(
104
- (m) => m === "semantic" || m === "hybrid",
105
- );
106
- const tmpDir = mkdtempSync(join(tmpdir(), "nlm-lmeval-"));
107
- const store = new SqliteSessionStore({
108
- dbPath: join(tmpDir, "canonical.sqlite"),
109
- migrationsDir: args.migrationsDir,
110
- });
111
- const bodyById = new Map<string, string>();
112
- let embedFailures = 0;
113
- const seen = new Set<string>();
114
- try {
115
- for (let i = 0; i < instance.haystack_sessions.length; i++) {
116
- const id = instance.haystack_session_ids[i];
117
- const date = instance.haystack_dates[i];
118
- const turns = instance.haystack_sessions[i];
119
- if (!id || !date || !turns) continue;
120
- if (seen.has(id)) continue;
121
- seen.add(id);
122
- const body = turnsToBody(turns);
123
- bodyById.set(id, body);
124
- store.insertSessionForTest({
125
- id,
126
- runtime: "longmemeval",
127
- runtimeSessionId: id,
128
- startedAt: date,
129
- endedAt: date,
130
- durationMin: 0,
131
- label: "",
132
- summary: "",
133
- body,
134
- status: "closed",
135
- transcriptKind: "longmemeval-jsonl",
136
- transcriptPath: null,
137
- entities: [],
138
- decisions: [],
139
- open: [],
140
- });
141
- if (needsEmbeddings) {
142
- const chunks = chunkSessionText({ body });
143
- for (let c = 0; c < chunks.length; c++) {
144
- try {
145
- const vector = await cache.embed(chunks[c]!, "document");
146
- store.insertChunkEmbeddingForTest(id, c, vector);
147
- } catch {
148
- // Per-chunk embed failure is non-fatal — successfully embedded
149
- // chunks still contribute via max-pool. Counter tracks attempts.
150
- embedFailures++;
151
- }
152
- }
153
- }
154
- }
155
-
156
- const recall = new RecallService({ store, llm: embedder });
157
- const byMode: InstanceResult["by_mode"] = {};
158
- for (const mode of args.modes) {
159
- const result = await recall.search({
160
- query: instance.question,
161
- mode,
162
- limit: args.k,
163
- });
164
- const returnedIds = result.results.map((r) => r.id);
165
- const returnedBodies = returnedIds.map((id) => bodyById.get(id) ?? "");
166
- const score = scoreOne({
167
- returnedIds,
168
- goldIds: instance.answer_session_ids,
169
- returnedBodies,
170
- answer: instance.answer,
171
- k: args.k,
172
- });
173
- byMode[mode] = { ...score, returnedIds };
174
- }
175
- return {
176
- question_id: instance.question_id,
177
- question_type: instance.question_type,
178
- by_mode: byMode,
179
- embed_failures: embedFailures,
180
- };
181
- } finally {
182
- store.close();
183
- rmSync(tmpDir, { recursive: true, force: true });
184
- }
185
- }
186
-
187
- async function main(): Promise<void> {
188
- const args = parseArgs(process.argv.slice(2));
189
- console.log(`longmemeval-harness: loading ${args.datasetPath}`);
190
- const raw = readFileSync(args.datasetPath, "utf8");
191
- const dataset = JSON.parse(raw) as LongMemEvalInstance[];
192
- const slice = dataset.slice(0, args.limit);
193
- console.log(
194
- `longmemeval-harness: ${slice.length}/${dataset.length} instances, modes=${args.modes.join(",")}, k=${args.k}`,
195
- );
196
-
197
- // Warm the embedding cache and the LLM transport.
198
- const llm = new OllamaClient({ embedModel: "nomic-embed-text" });
199
- const cache = new EmbeddingCache({
200
- dbPath: join(args.cacheDir, "embeddings.sqlite"),
201
- llm,
202
- });
203
- const embedder = new CachingEmbedder(cache);
204
-
205
- console.log(`longmemeval-harness: cache contains ${cache.size()} embeddings on entry`);
206
-
207
- const results: InstanceResult[] = [];
208
- const t0 = Date.now();
209
- for (let i = 0; i < slice.length; i++) {
210
- const inst = slice[i];
211
- if (!inst) continue;
212
- const result = await runInstance(inst, args, cache, embedder);
213
- results.push(result);
214
- if ((i + 1) % 10 === 0 || i === slice.length - 1) {
215
- const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
216
- const cached = cache.size();
217
- console.log(
218
- ` [${i + 1}/${slice.length}] ${elapsed}s elapsed, cache=${cached}`,
219
- );
220
- }
221
- }
222
-
223
- // Aggregate.
224
- const aggregated: Record<string, ReturnType<typeof aggregate>> = {};
225
- for (const mode of args.modes) {
226
- aggregated[mode] = aggregate(
227
- results.map((r) => r.by_mode[mode] as SingleScore).filter(Boolean),
228
- );
229
- }
230
-
231
- // Per-question-type breakdown.
232
- const byType: Record<string, Record<string, ReturnType<typeof aggregate>>> = {};
233
- const types = new Set(results.map((r) => r.question_type));
234
- for (const t of types) {
235
- byType[t] = {};
236
- for (const mode of args.modes) {
237
- const subset = results
238
- .filter((r) => r.question_type === t)
239
- .map((r) => r.by_mode[mode] as SingleScore)
240
- .filter(Boolean);
241
- byType[t]![mode] = aggregate(subset);
242
- }
243
- }
244
-
245
- const stamp = new Date().toISOString().replace(/[:T]/g, "-").slice(0, 19);
246
- const outDir = join(args.reportDir, stamp);
247
- mkdirSync(outDir, { recursive: true });
248
- const json = {
249
- dataset: args.datasetPath,
250
- n: results.length,
251
- k: args.k,
252
- modes: args.modes,
253
- aggregate: aggregated,
254
- by_question_type: byType,
255
- results,
256
- elapsed_seconds: (Date.now() - t0) / 1000,
257
- };
258
- writeFileSync(join(outDir, "results.json"), JSON.stringify(json, null, 2));
259
- writeFileSync(join(outDir, "summary.md"), renderSummary(json));
260
- console.log(`longmemeval-harness: wrote ${outDir}/`);
261
- console.log(renderSummary(json));
262
-
263
- cache.close();
264
- }
265
-
266
- function renderSummary(json: {
267
- dataset: string;
268
- n: number;
269
- k: number;
270
- modes: ReadonlyArray<RecallMode>;
271
- aggregate: Record<string, ReturnType<typeof aggregate>>;
272
- by_question_type: Record<string, Record<string, ReturnType<typeof aggregate>>>;
273
- elapsed_seconds: number;
274
- }): string {
275
- const lines: string[] = [];
276
- lines.push(`# LongMemEval-S — NLM baseline (body-only, n=${json.n}, k=${json.k})`);
277
- lines.push("");
278
- lines.push(`Dataset: \`${json.dataset}\``);
279
- lines.push(`Elapsed: ${json.elapsed_seconds.toFixed(1)}s`);
280
- lines.push("");
281
- lines.push("## Aggregate");
282
- lines.push("");
283
- lines.push(`| Mode | R@${json.k} | Session-body hit |`);
284
- lines.push(`| --- | --- | --- |`);
285
- for (const mode of json.modes) {
286
- const a = json.aggregate[mode];
287
- if (!a) continue;
288
- lines.push(
289
- `| ${mode} | ${(a.recallAtK * 100).toFixed(1)}% | ${(a.sessionBodyHitRate * 100).toFixed(1)}% |`,
290
- );
291
- }
292
- lines.push("");
293
- lines.push("## By question type");
294
- lines.push("");
295
- const types = Object.keys(json.by_question_type).sort();
296
- lines.push(
297
- `| Question type | ${json.modes.map((m) => `${m} R@${json.k}`).join(" | ")} |`,
298
- );
299
- lines.push(`| --- | ${json.modes.map(() => "---").join(" | ")} |`);
300
- for (const t of types) {
301
- const row = json.by_question_type[t]!;
302
- const cells = json.modes.map((m) => {
303
- const a = row[m];
304
- return a ? `${(a.recallAtK * 100).toFixed(1)}% (n=${a.n})` : "—";
305
- });
306
- lines.push(`| ${t} | ${cells.join(" | ")} |`);
307
- }
308
- lines.push("");
309
- return lines.join("\n");
310
- }
311
-
312
- void main().catch((err) => {
313
- console.error("longmemeval-harness: fatal", err);
314
- process.exit(1);
315
- });