nlm-memory 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/README.md +72 -34
  2. package/dist/cli/nlm.js +2 -1
  3. package/dist/cli/nlm.js.map +1 -1
  4. package/dist/http/app.js +2 -1
  5. package/dist/http/app.js.map +1 -1
  6. package/dist/mcp/server.js +20 -1
  7. package/dist/mcp/server.js.map +1 -1
  8. package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
  9. package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
  10. package/dist/ui/index.html +2 -2
  11. package/package.json +26 -1
  12. package/.agents/plugins/marketplace.json +0 -20
  13. package/.github/workflows/ci.yml +0 -30
  14. package/docs/methodology/re-derivation-rate.md +0 -112
  15. package/docs/methodology/useful-hit-rate.md +0 -79
  16. package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
  17. package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
  18. package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
  19. package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
  20. package/docs/plans/desktop-product.md +0 -69
  21. package/docs/plans/factstore-design.md +0 -236
  22. package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
  23. package/logs/CHANGELOG/CHANGELOG.md +0 -209
  24. package/migrations/000_initial_schema.sql +0 -174
  25. package/migrations/001_entity_type_rename.sql +0 -17
  26. package/migrations/002_adapter_state_extend.sql +0 -12
  27. package/migrations/003_session_embeddings.sql +0 -11
  28. package/migrations/004_facts.sql +0 -46
  29. package/migrations/005_sources.sql +0 -31
  30. package/migrations/006_providers.sql +0 -33
  31. package/migrations/007_source_tokens.sql +0 -17
  32. package/migrations/008_fts_rebuild.sql +0 -9
  33. package/migrations/009_session_embedding_chunks.sql +0 -46
  34. package/migrations/010_sources_opencode.sql +0 -30
  35. package/migrations/011_sources_hermes_agent.sql +0 -30
  36. package/migrations/012_sources_aider.sql +0 -30
  37. package/migrations/013_adapter_state_failure_count.sql +0 -12
  38. package/migrations/014_sources_cursor.sql +0 -30
  39. package/migrations/015_sources_windsurf.sql +0 -30
  40. package/plugin-hermes-agent/README.md +0 -49
  41. package/plugin-hermes-agent/__init__.py +0 -75
  42. package/plugin-hermes-agent/plugin.yaml +0 -15
  43. package/scripts/backfill-citations.mjs +0 -0
  44. package/scripts/build-codex-plugin.mjs +0 -61
  45. package/scripts/deepseek-probe.mjs +0 -67
  46. package/scripts/extract-triples.mjs +0 -207
  47. package/scripts/longmemeval/embedding-cache.ts +0 -77
  48. package/scripts/longmemeval/fetch-dataset.sh +0 -25
  49. package/scripts/longmemeval/run-harness.ts +0 -315
  50. package/scripts/longmemeval/scorer.ts +0 -99
  51. package/scripts/longmemeval/tsconfig.json +0 -9
  52. package/scripts/longmemeval/types.ts +0 -35
  53. package/scripts/nlm-daily-digest.py +0 -239
  54. package/scripts/nlm-daily-digest.sh +0 -28
  55. package/src/cli/classify-parity.ts +0 -257
  56. package/src/cli/launchctl-helpers.ts +0 -49
  57. package/src/cli/nlm.ts +0 -1078
  58. package/src/core/actions/actions-log.ts +0 -118
  59. package/src/core/actions/overlay.ts +0 -117
  60. package/src/core/adapters/aider.ts +0 -205
  61. package/src/core/adapters/claude-code.ts +0 -293
  62. package/src/core/adapters/common.ts +0 -54
  63. package/src/core/adapters/cursor.ts +0 -486
  64. package/src/core/adapters/from-source.ts +0 -67
  65. package/src/core/adapters/hermes-agent.ts +0 -240
  66. package/src/core/adapters/hermes.ts +0 -277
  67. package/src/core/adapters/jsonl-generic.ts +0 -208
  68. package/src/core/adapters/opencode.ts +0 -281
  69. package/src/core/adapters/pi.ts +0 -264
  70. package/src/core/adapters/windsurf.ts +0 -386
  71. package/src/core/classifier/prompt.ts +0 -200
  72. package/src/core/dataset/build-dataset.ts +0 -463
  73. package/src/core/embedding/chunk-body.ts +0 -76
  74. package/src/core/embedding/embed-backfill.ts +0 -210
  75. package/src/core/embedding/embed-normalize.ts +0 -135
  76. package/src/core/facts/backfill-facts.ts +0 -254
  77. package/src/core/facts/extract-facts.ts +0 -50
  78. package/src/core/hook/citation-detect.ts +0 -124
  79. package/src/core/hook/cite-memo.ts +0 -68
  80. package/src/core/hook/claude-settings.ts +0 -187
  81. package/src/core/hook/gate.ts +0 -25
  82. package/src/core/hook/hook-log.ts +0 -41
  83. package/src/core/hook/memo-sweep.ts +0 -164
  84. package/src/core/hook/memo.ts +0 -67
  85. package/src/core/hook/pointer-block.ts +0 -26
  86. package/src/core/hook/select.ts +0 -32
  87. package/src/core/hook/transcript.ts +0 -121
  88. package/src/core/ingest/ingest-session.ts +0 -111
  89. package/src/core/providers/provider-models.ts +0 -100
  90. package/src/core/providers/provider-registry.ts +0 -196
  91. package/src/core/recall/citation-log.ts +0 -108
  92. package/src/core/recall/filter.ts +0 -27
  93. package/src/core/recall/index.ts +0 -6
  94. package/src/core/recall/match-fields.ts +0 -40
  95. package/src/core/recall/query-log.ts +0 -149
  96. package/src/core/recall/query-shape.ts +0 -66
  97. package/src/core/recall/recall-service.ts +0 -320
  98. package/src/core/recall/recent-log.ts +0 -59
  99. package/src/core/recall/tokenize.ts +0 -18
  100. package/src/core/recall/useful-scan.ts +0 -336
  101. package/src/core/recall-facts/fact-query-log.ts +0 -150
  102. package/src/core/recall-facts/fact-recall-service.ts +0 -327
  103. package/src/core/scheduler/scan-once.ts +0 -142
  104. package/src/core/scheduler/scheduler.ts +0 -225
  105. package/src/core/sources/source-registry.ts +0 -278
  106. package/src/core/storage/db-restore.ts +0 -133
  107. package/src/core/storage/live-status.ts +0 -45
  108. package/src/core/storage/migrate.ts +0 -72
  109. package/src/core/storage/sqlite-fact-store.ts +0 -304
  110. package/src/core/storage/sqlite-session-store.ts +0 -810
  111. package/src/hook/hook-auth.ts +0 -18
  112. package/src/hook/prompt-recall-hook.ts +0 -180
  113. package/src/hook/session-end-hook.ts +0 -81
  114. package/src/hook/session-start-hook.ts +0 -168
  115. package/src/hook/stop-hook.ts +0 -239
  116. package/src/http/app.ts +0 -1215
  117. package/src/install/claude-code.ts +0 -128
  118. package/src/install/codex.ts +0 -367
  119. package/src/install/cursor.ts +0 -68
  120. package/src/install/hermes-agent.ts +0 -76
  121. package/src/install/hermes.ts +0 -78
  122. package/src/install/nlm-dir-perms.ts +0 -55
  123. package/src/install/ollama.ts +0 -284
  124. package/src/install/setup.ts +0 -489
  125. package/src/install/windsurf.ts +0 -68
  126. package/src/llm/classifier-box.ts +0 -64
  127. package/src/llm/deepseek-client.ts +0 -150
  128. package/src/llm/env-autoload.ts +0 -55
  129. package/src/llm/ollama-client.ts +0 -189
  130. package/src/mcp/server.ts +0 -534
  131. package/src/ports/fact-store.ts +0 -102
  132. package/src/ports/llm-client.ts +0 -52
  133. package/src/ports/logger.ts +0 -16
  134. package/src/ports/session-store.ts +0 -45
  135. package/src/ports/transcript-adapter.ts +0 -55
  136. package/src/shared/types.ts +0 -149
  137. package/src/ui/App.tsx +0 -58
  138. package/src/ui/components/PromoteOpenButton.tsx +0 -65
  139. package/src/ui/components/SessionDrawer.tsx +0 -199
  140. package/src/ui/components/SideNav.tsx +0 -162
  141. package/src/ui/components/Skeleton.tsx +0 -107
  142. package/src/ui/index.html +0 -13
  143. package/src/ui/lib/actions.ts +0 -30
  144. package/src/ui/lib/api.ts +0 -92
  145. package/src/ui/lib/dataset.ts +0 -141
  146. package/src/ui/lib/registries.ts +0 -155
  147. package/src/ui/lib/view-settings.ts +0 -41
  148. package/src/ui/main.tsx +0 -15
  149. package/src/ui/pages/Live.tsx +0 -229
  150. package/src/ui/pages/Pulse.tsx +0 -415
  151. package/src/ui/pages/Recall.tsx +0 -190
  152. package/src/ui/pages/River.tsx +0 -354
  153. package/src/ui/pages/Search.tsx +0 -386
  154. package/src/ui/pages/Stub.tsx +0 -9
  155. package/src/ui/pages/Thread.tsx +0 -473
  156. package/src/ui/pages/settings/Classifier.tsx +0 -227
  157. package/src/ui/pages/settings/Data.tsx +0 -190
  158. package/src/ui/pages/settings/Index.tsx +0 -65
  159. package/src/ui/pages/settings/Labels.tsx +0 -224
  160. package/src/ui/pages/settings/Providers.tsx +0 -305
  161. package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
  162. package/src/ui/pages/settings/Sources.tsx +0 -326
  163. package/src/ui/pages/settings/Views.tsx +0 -96
  164. package/src/ui/styles.css +0 -1890
  165. package/src/ui/tsconfig.json +0 -21
  166. package/src/ui/vite.config.ts +0 -19
  167. package/tests/fixtures/claude_code/short_session.jsonl +0 -2
  168. package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
  169. package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
  170. package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
  171. package/tests/fixtures/facts.ts +0 -17
  172. package/tests/fixtures/golden-corpus.ts +0 -85
  173. package/tests/fixtures/hermes/paired_request_dump.json +0 -24
  174. package/tests/fixtures/hermes/paired_session.json +0 -23
  175. package/tests/fixtures/hermes/request_dump.json +0 -28
  176. package/tests/fixtures/hermes/session_iso.json +0 -38
  177. package/tests/fixtures/hermes/session_unix.json +0 -38
  178. package/tests/fixtures/hermes/system_only.json +0 -18
  179. package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
  180. package/tests/fixtures/pi/short-successful.jsonl +0 -5
  181. package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
  182. package/tests/fixtures/sessions.ts +0 -22
  183. package/tests/integration/backfill-facts.test.ts +0 -362
  184. package/tests/integration/citation-explicit.test.ts +0 -111
  185. package/tests/integration/cite-event.test.ts +0 -169
  186. package/tests/integration/cite-memo.test.ts +0 -87
  187. package/tests/integration/db-restore.test.ts +0 -153
  188. package/tests/integration/embed-backfill.test.ts +0 -176
  189. package/tests/integration/fact-supersedence.test.ts +0 -313
  190. package/tests/integration/fts-index.test.ts +0 -60
  191. package/tests/integration/getbyids-sqlite.test.ts +0 -100
  192. package/tests/integration/hermes-agent-hooks.test.ts +0 -248
  193. package/tests/integration/hook-claude-settings.test.ts +0 -218
  194. package/tests/integration/hook-log.test.ts +0 -54
  195. package/tests/integration/hook-memo.test.ts +0 -68
  196. package/tests/integration/hook-pre-compact.test.ts +0 -105
  197. package/tests/integration/hook-subagent-start.test.ts +0 -102
  198. package/tests/integration/http.test.ts +0 -401
  199. package/tests/integration/keyword-search-fts.test.ts +0 -66
  200. package/tests/integration/mcp-recall-logging.test.ts +0 -88
  201. package/tests/integration/mcp.test.ts +0 -260
  202. package/tests/integration/memo-sweep.test.ts +0 -91
  203. package/tests/integration/prompt-recall-hook.test.ts +0 -88
  204. package/tests/integration/provider-registry.test.ts +0 -107
  205. package/tests/integration/recall-golden.test.ts +0 -59
  206. package/tests/integration/recall-sqlite.test.ts +0 -169
  207. package/tests/integration/scheduler.test.ts +0 -391
  208. package/tests/integration/session-end-hook.test.ts +0 -48
  209. package/tests/integration/session-start-hook.test.ts +0 -126
  210. package/tests/integration/source-registry.test.ts +0 -122
  211. package/tests/integration/sqlite-fact-store.test.ts +0 -346
  212. package/tests/integration/stop-hook.test.ts +0 -560
  213. package/tests/integration/wal-checkpoint.test.ts +0 -49
  214. package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
  215. package/tests/unit/core/adapters/aider.test.ts +0 -230
  216. package/tests/unit/core/adapters/claude-code.test.ts +0 -118
  217. package/tests/unit/core/adapters/cursor.test.ts +0 -485
  218. package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
  219. package/tests/unit/core/adapters/hermes.test.ts +0 -81
  220. package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
  221. package/tests/unit/core/adapters/opencode.test.ts +0 -354
  222. package/tests/unit/core/adapters/pi.test.ts +0 -110
  223. package/tests/unit/core/adapters/windsurf.test.ts +0 -416
  224. package/tests/unit/core/classifier/prompt.test.ts +0 -126
  225. package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
  226. package/tests/unit/core/facts/extract-facts.test.ts +0 -117
  227. package/tests/unit/core/filter.test.ts +0 -40
  228. package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
  229. package/tests/unit/core/hook/citation-detect.test.ts +0 -124
  230. package/tests/unit/core/hook/gate.test.ts +0 -29
  231. package/tests/unit/core/hook/pointer-block.test.ts +0 -22
  232. package/tests/unit/core/hook/select.test.ts +0 -66
  233. package/tests/unit/core/match-fields.test.ts +0 -39
  234. package/tests/unit/core/mcp-cite-session.test.ts +0 -51
  235. package/tests/unit/core/providers/provider-models.test.ts +0 -101
  236. package/tests/unit/core/query-shape.test.ts +0 -92
  237. package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
  238. package/tests/unit/core/recall-service.test.ts +0 -200
  239. package/tests/unit/core/storage/live-status.test.ts +0 -54
  240. package/tests/unit/core/tokenize.test.ts +0 -32
  241. package/tests/unit/core/useful-scan.test.ts +0 -537
  242. package/tests/unit/llm/embed.test.ts +0 -93
  243. package/tests/unit/llm/ollama-client.test.ts +0 -124
  244. package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
  245. package/tsconfig.json +0 -31
  246. package/tsconfig.test.json +0 -11
  247. package/vitest.config.ts +0 -22
@@ -1,50 +0,0 @@
1
- /**
2
- * extractFacts — pure transform from ClassifyResult to Fact[].
3
- *
4
- * Lives in core/, has no framework imports, no clock or randomness coupling
5
- * (id generator and timestamp are injected so tests are deterministic).
6
- * Phase B.2 — see docs/plans/factstore-design.md Section 3.
7
- *
8
- * Confidence policy (Section 3 of the plan): facts inherit the session-level
9
- * confidence verbatim. Below 0.4 the function returns an empty array — the
10
- * session still ingests with markers, but its facts are dropped as
11
- * extraction-quality noise. Between 0.4 and 0.6 facts are written but will
12
- * be filtered out of recall by the FactStore default `minConfidence: 0.6`.
13
- */
14
-
15
- import { randomUUID } from "node:crypto";
16
- import type { ClassifyResult } from "@ports/llm-client.js";
17
- import type { Fact } from "@shared/types.js";
18
-
19
- const CONFIDENCE_FLOOR = 0.4;
20
-
21
- export interface ExtractFactsOptions {
22
- /** Generator for fact ids. Defaults to `fact_<randomUUID()>`. */
23
- readonly idGenerator?: () => string;
24
- }
25
-
26
- export function extractFacts(
27
- result: ClassifyResult,
28
- sessionId: string,
29
- createdAt: string,
30
- opts: ExtractFactsOptions = {},
31
- ): Fact[] {
32
- if (result.confidence < CONFIDENCE_FLOOR) return [];
33
- const genId = opts.idGenerator ?? (() => `fact_${randomUUID()}`);
34
- const out: Fact[] = [];
35
- for (const raw of result.facts) {
36
- out.push({
37
- id: genId(),
38
- kind: raw.kind,
39
- subject: raw.subject,
40
- predicate: raw.predicate,
41
- value: raw.value,
42
- sourceSessionId: sessionId,
43
- sourceQuote: raw.sourceQuote ?? null,
44
- createdAt,
45
- supersededBy: null,
46
- confidence: result.confidence,
47
- });
48
- }
49
- return out;
50
- }
@@ -1,124 +0,0 @@
1
- /**
2
- * Detects which surfaced recall IDs an assistant turn cited.
3
- *
4
- * Two channels, ordered by signal strength:
5
- * - tool_use: the model invoked an MCP NLM tool (get_session, recall_facts,
6
- * get_fact_history, recall_sessions) whose input references a
7
- * surfaced ID. This is the strong "the model dug into the
8
- * surfaced session" signal. Almost no false positives.
9
- * - prose: the surfaced ID appears as a substring in the response text.
10
- * Models rarely echo session IDs verbatim, so this channel
11
- * fires in practice almost never — kept for completeness.
12
- *
13
- * Returns both the union of cited IDs and the per-ID channel so the citation
14
- * log can carry kind metadata. ID minimum length keeps generic short tokens
15
- * from false-positiving against either channel.
16
- *
17
- * This is the training-data substrate for a future learned reranker.
18
- */
19
-
20
- import type { ToolUseBlock } from "./transcript.js";
21
-
22
- const MIN_ID_LEN = 6;
23
-
24
- export type CitationKind = "tool_use" | "prose";
25
-
26
- export interface CitationDetectInput {
27
- readonly responseText: string;
28
- readonly toolUses: ReadonlyArray<ToolUseBlock>;
29
- readonly surfacedIds: Iterable<string>;
30
- }
31
-
32
- export interface DetectedCitation {
33
- readonly id: string;
34
- readonly kind: CitationKind;
35
- }
36
-
37
- export function detectCitations(input: CitationDetectInput): DetectedCitation[] {
38
- const surfaced: string[] = [];
39
- const seen = new Set<string>();
40
- for (const id of input.surfacedIds) {
41
- if (id.length < MIN_ID_LEN) continue;
42
- if (seen.has(id)) continue;
43
- seen.add(id);
44
- surfaced.push(id);
45
- }
46
-
47
- const cited: DetectedCitation[] = [];
48
- const claimedByToolUse = new Set<string>();
49
-
50
- // Channel A: tool_use. Two sub-cases:
51
- //
52
- // A1: cite_session — the model called the explicit citation primitive with
53
- // the session ID in tu.input.id. Strongest possible signal: structured,
54
- // deterministic, zero ambiguity. ID must be a surfaced session ID.
55
- //
56
- // A2: other NLM tools (get_session, recall_sessions, recall_facts,
57
- // get_fact_history) — stringify the input and substring-scan for surfaced
58
- // IDs. These tools accept ids via top-level fields, so the serialization
59
- // always includes the id when used.
60
- for (const tu of input.toolUses) {
61
- if (!isNlmTool(tu.name)) continue;
62
- if (isCiteSessionTool(tu.name)) {
63
- // A1: explicit cite_session call — the MCP server handler already wrote
64
- // this citation directly to the citation log (citeSessionHandler →
65
- // appendCitation). Detecting it here again would produce a second log
66
- // entry for the same model action (double-count). Skip so the Stop hook
67
- // only captures implicit citations the MCP handler didn't see.
68
- continue;
69
- }
70
- // A2: other NLM tools — serialize and substring-scan.
71
- const serialized = safeStringify(tu.input);
72
- if (!serialized) continue;
73
- for (const id of surfaced) {
74
- if (claimedByToolUse.has(id)) continue;
75
- if (serialized.includes(id)) {
76
- cited.push({ id, kind: "tool_use" });
77
- claimedByToolUse.add(id);
78
- }
79
- }
80
- }
81
-
82
- // Channel B: prose. Only emit if the tool_use channel didn't already
83
- // claim this id — same id shouldn't double-count.
84
- if (input.responseText) {
85
- for (const id of surfaced) {
86
- if (claimedByToolUse.has(id)) continue;
87
- if (input.responseText.includes(id)) {
88
- cited.push({ id, kind: "prose" });
89
- }
90
- }
91
- }
92
- return cited;
93
- }
94
-
95
- /** Back-compat: prose-only detector returning a flat id list. */
96
- export function detectCitedIds(
97
- responseText: string,
98
- surfacedIds: Iterable<string>,
99
- ): string[] {
100
- return detectCitations({
101
- responseText,
102
- toolUses: [],
103
- surfacedIds,
104
- }).map((c) => c.id);
105
- }
106
-
107
- function isNlmTool(name: string): boolean {
108
- // Claude Code namespaces MCP tools as `mcp__<server>__<tool>`. The NLM
109
- // server name is "nlm-memory" in the user's .mcp.json today; accept any
110
- // server name containing "nlm" so future renames stay covered.
111
- return /^mcp__[^_]*nlm[^_]*__/.test(name);
112
- }
113
-
114
- function isCiteSessionTool(name: string): boolean {
115
- return name.endsWith("__cite_session");
116
- }
117
-
118
- function safeStringify(value: unknown): string {
119
- try {
120
- return JSON.stringify(value);
121
- } catch {
122
- return "";
123
- }
124
- }
@@ -1,68 +0,0 @@
1
- /**
2
- * Per-conversation dedup memo for the Stop hook's citation detector.
3
- *
4
- * The Stop hook scans the full transcript every fire, so a long conversation
5
- * with repeated Stop firings would otherwise re-detect the same tool_use
6
- * citations every turn and double-count them in the citation log. This memo
7
- * holds the set of (conversationId, citedId) pairs already posted, so each
8
- * citation lands exactly once regardless of how many times Stop fires.
9
- *
10
- * Storage parallels the surfaced-memo (`memo.ts`): same state directory
11
- * (`~/.nlm/hook-state/`, overridable via NLM_HOOK_STATE_DIR), filename suffix
12
- * `.cited.json` to distinguish from the surfaced memo's `.json`. The existing
13
- * memo-sweep walks the directory by mtime and cleans both files together.
14
- *
15
- * Defensive: a missing or corrupt file yields an empty set; a write failure
16
- * is swallowed. Telemetry path — must never break the hook.
17
- */
18
-
19
- import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
20
- import { homedir } from "node:os";
21
- import { join } from "node:path";
22
-
23
- function stateDir(): string {
24
- return process.env["NLM_HOOK_STATE_DIR"] ?? join(homedir(), ".nlm", "hook-state");
25
- }
26
-
27
- function memoPath(conversationId: string): string {
28
- const safe = conversationId.replace(/[^A-Za-z0-9_-]/g, "_") || "unknown";
29
- return join(stateDir(), `${safe}.cited.json`);
30
- }
31
-
32
- export function loadCited(conversationId: string): Set<string> {
33
- try {
34
- const path = memoPath(conversationId);
35
- if (!existsSync(path)) return new Set();
36
- const parsed: unknown = JSON.parse(readFileSync(path, "utf8"));
37
- if (!Array.isArray(parsed)) return new Set();
38
- return new Set(parsed.filter((x): x is string => typeof x === "string"));
39
- } catch {
40
- return new Set();
41
- }
42
- }
43
-
44
- export function recordCited(
45
- conversationId: string,
46
- ids: ReadonlyArray<string>,
47
- ): void {
48
- if (ids.length === 0) return;
49
- try {
50
- const merged = loadCited(conversationId);
51
- for (const id of ids) merged.add(id);
52
- mkdirSync(stateDir(), { recursive: true });
53
- writeFileSync(memoPath(conversationId), JSON.stringify([...merged]), "utf8");
54
- } catch {
55
- // Memo write failure must never break the hook.
56
- }
57
- }
58
-
59
- export function clearCited(conversationId: string): boolean {
60
- try {
61
- const path = memoPath(conversationId);
62
- if (!existsSync(path)) return false;
63
- rmSync(path);
64
- return true;
65
- } catch {
66
- return false;
67
- }
68
- }
@@ -1,187 +0,0 @@
1
- /**
2
- * Adds/removes NLM hook entries in a Claude Code settings.json.
3
- *
4
- * NLM-owned entries are identified by HOOK_SCRIPT_MARKERS. add is idempotent
5
- * (replaces any prior NLM entry for the same event); remove strips only NLM
6
- * entries and preserves everything else.
7
- */
8
-
9
- import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
10
- import { dirname } from "node:path";
11
- import { spawnSync } from "node:child_process";
12
-
13
- // Every NLM hook script ends in `-hook.js`. We tag entries we own by
14
- // matching the filename suffix against this list. Add new entries here
15
- // when a new hook script ships.
16
- const HOOK_SCRIPT_MARKERS = [
17
- "prompt-recall-hook.js",
18
- "session-end-hook.js",
19
- "stop-hook.js",
20
- "session-start-hook.js",
21
- "pre-compact-hook.js",
22
- "subagent-start-hook.js",
23
- ] as const;
24
-
25
- /**
26
- * Single-quote a shell argument so paths with spaces or other shell
27
- * metacharacters survive `sh -c` tokenization. Without this, a path like
28
- * `/Users/echalupa/Documents/Coding Projects/...` is split on whitespace
29
- * and node receives the wrong argv — silent hook bricking.
30
- */
31
- export function shellQuote(arg: string): string {
32
- return `'${arg.replace(/'/g, "'\\''")}'`;
33
- }
34
-
35
- /**
36
- * Double-quote a cmd.exe argument. Embedded double quotes are doubled per
37
- * cmd.exe parsing rules. Used for hook commands on Windows where Claude
38
- * Code dispatches via cmd.exe /c rather than sh -c.
39
- */
40
- export function cmdQuote(arg: string): string {
41
- return `"${arg.replace(/"/g, '""')}"`;
42
- }
43
-
44
- export function buildHookCommand(
45
- execPath: string,
46
- hookJs: string,
47
- mode: "shadow" | "live",
48
- targetPlatform: NodeJS.Platform = process.platform,
49
- ): string {
50
- if (targetPlatform === "win32") {
51
- // cmd.exe: `set VAR=val && "exec" "script"`. The set is scoped to the
52
- // cmd /c invocation so the env var is visible to the chained child.
53
- return `set NLM_HOOK_MODE=${mode} && ${cmdQuote(execPath)} ${cmdQuote(hookJs)}`;
54
- }
55
- return `NLM_HOOK_MODE=${mode} ${shellQuote(execPath)} ${shellQuote(hookJs)}`;
56
- }
57
-
58
- export interface SmokeTestResult {
59
- readonly ok: boolean;
60
- readonly reason?: string;
61
- readonly stderr?: string;
62
- }
63
-
64
- /**
65
- * Invoke the wired command exactly the way Claude Code does (sh -c on
66
- * POSIX, cmd.exe /c on Windows) with JSON on stdin and confirm the hook
67
- * log gained an entry. Catches the class of failures where settings.json
68
- * looks valid but the hook fails at startup (path tokenization, missing
69
- * modules, missing shell, etc.).
70
- */
71
- export function smokeTestHookCommand(
72
- command: string,
73
- hookLogPath: string,
74
- timeoutMs = 5000,
75
- ): SmokeTestResult {
76
- const sizeBefore = existsSync(hookLogPath) ? statSync(hookLogPath).size : 0;
77
- const isWin = process.platform === "win32";
78
- const result = spawnSync(
79
- isWin ? "cmd.exe" : "sh",
80
- [isWin ? "/c" : "-c", command],
81
- {
82
- input: JSON.stringify({ prompt: "smoke test", session_id: "install-smoke" }),
83
- timeout: timeoutMs,
84
- encoding: "utf8",
85
- },
86
- );
87
- if (result.error) {
88
- return { ok: false, reason: `spawn failed: ${result.error.message}` };
89
- }
90
- if (result.status !== 0) {
91
- return {
92
- ok: false,
93
- reason: `exit code ${result.status ?? "null"}`,
94
- stderr: result.stderr,
95
- };
96
- }
97
- const sizeAfter = existsSync(hookLogPath) ? statSync(hookLogPath).size : 0;
98
- if (sizeAfter <= sizeBefore) {
99
- return {
100
- ok: false,
101
- reason: `no entry appended to ${hookLogPath}`,
102
- stderr: result.stderr,
103
- };
104
- }
105
- return { ok: true };
106
- }
107
-
108
- export type ClaudeHookEvent =
109
- | "UserPromptSubmit"
110
- | "SessionStart"
111
- | "SessionEnd"
112
- | "Stop"
113
- | "PreCompact"
114
- | "SubagentStart"
115
- | "PostToolUse"
116
- | "PreToolUse";
117
-
118
- interface HookCommand {
119
- readonly type: string;
120
- readonly command: string;
121
- }
122
- interface HookEntry {
123
- readonly hooks: ReadonlyArray<HookCommand>;
124
- }
125
- interface ClaudeSettings {
126
- hooks?: Record<string, HookEntry[]>;
127
- [key: string]: unknown;
128
- }
129
-
130
- function read(path: string): ClaudeSettings {
131
- if (!existsSync(path)) return {};
132
- const parsed: unknown = JSON.parse(readFileSync(path, "utf8"));
133
- if (typeof parsed !== "object" || parsed === null) {
134
- throw new Error(`Claude settings at ${path} is not a JSON object`);
135
- }
136
- return parsed as ClaudeSettings;
137
- }
138
-
139
- function write(path: string, settings: ClaudeSettings): void {
140
- mkdirSync(dirname(path), { recursive: true });
141
- writeFileSync(path, `${JSON.stringify(settings, null, 2)}\n`, "utf8");
142
- }
143
-
144
- function isNlmEntry(entry: HookEntry): boolean {
145
- return entry.hooks.some((h) =>
146
- HOOK_SCRIPT_MARKERS.some((marker) => h.command.includes(marker)),
147
- );
148
- }
149
-
150
- export function addHook(
151
- settingsPath: string,
152
- command: string,
153
- event: ClaudeHookEvent = "UserPromptSubmit",
154
- ): void {
155
- const settings = read(settingsPath);
156
- const hooks = settings.hooks ?? {};
157
- const existing = hooks[event] ?? [];
158
- const others = existing.filter((e) => !isNlmEntry(e));
159
- const next: HookEntry[] = [
160
- ...others,
161
- { hooks: [{ type: "command", command }] },
162
- ];
163
- write(settingsPath, { ...settings, hooks: { ...hooks, [event]: next } });
164
- }
165
-
166
- /**
167
- * Remove the NLM-tagged hook entry from one event (default UserPromptSubmit)
168
- * or every event when `event === "*"`. Leaves unrelated entries untouched.
169
- */
170
- export function removeHook(
171
- settingsPath: string,
172
- event: ClaudeHookEvent | "*" = "UserPromptSubmit",
173
- ): void {
174
- if (!existsSync(settingsPath)) return;
175
- const settings = read(settingsPath);
176
- const allHooks = settings.hooks ?? {};
177
- const events: string[] = event === "*" ? Object.keys(allHooks) : [event];
178
- const nextHooks: Record<string, HookEntry[]> = { ...allHooks };
179
- for (const ev of events) {
180
- const existing = nextHooks[ev];
181
- if (!existing) continue;
182
- const kept = existing.filter((e) => !isNlmEntry(e));
183
- if (kept.length > 0) nextHooks[ev] = kept;
184
- else delete nextHooks[ev];
185
- }
186
- write(settingsPath, { ...settings, hooks: nextHooks });
187
- }
@@ -1,25 +0,0 @@
1
- /**
2
- * Prompt gate for the recall hook. Pure — no I/O.
3
- *
4
- * A conservative generative *excluder*: the default is "evaluate" (query
5
- * recall); only high-precision generative openers short-circuit to
6
- * "generative". A false "generative" wrongly skips recall — the exact
7
- * failure this feature fixes — so the generative set is deliberately tight.
8
- * It is calibrated further against shadow-mode logs.
9
- */
10
-
11
- export type PromptClass = "generative" | "evaluate";
12
-
13
- const LEADING_FILLER =
14
- /^(please|can you|could you|would you|will you|i need you to|i'd like you to|i want you to|i would like you to|help me|let's|lets|hey|ok|okay)\b[\s,]*/i;
15
-
16
- const GENERATIVE_OPENER =
17
- /^(write|draft|create|compose|generate|brainstorm|design|outline|sketch|invent|rename|come up with)\b/i;
18
-
19
- export function classifyPrompt(prompt: string): PromptClass {
20
- let p = prompt.trim();
21
- for (let i = 0; i < 3 && LEADING_FILLER.test(p); i++) {
22
- p = p.replace(LEADING_FILLER, "");
23
- }
24
- return GENERATIVE_OPENER.test(p) ? "generative" : "evaluate";
25
- }
@@ -1,41 +0,0 @@
1
- /**
2
- * Append-only JSONL log for the recall hook. One line per prompt the hook
3
- * evaluated. This is the dataset the relevance gate (generative patterns +
4
- * score threshold) is calibrated against during the shadow window.
5
- *
6
- * Path defaults to ~/.nlm/hook-log.jsonl, overridable via NLM_HOOK_LOG.
7
- * appendHookLog swallows its own errors — telemetry must never break the hook.
8
- * Uses synchronous I/O: the hook is a short-lived per-prompt process, and an
9
- * async write could be lost if the process exits before it flushes.
10
- */
11
-
12
- import { appendFileSync, mkdirSync } from "node:fs";
13
- import { homedir } from "node:os";
14
- import { dirname, join } from "node:path";
15
- import type { PromptClass } from "./gate.js";
16
-
17
- export interface HookLogEntry {
18
- readonly ts: string;
19
- readonly conversationId: string;
20
- readonly promptPreview: string;
21
- readonly gate: PromptClass;
22
- readonly hits: ReadonlyArray<{ readonly id: string; readonly score: number }>;
23
- readonly wouldInject: ReadonlyArray<string>;
24
- readonly estTokens: number;
25
- readonly mode: "shadow" | "live";
26
- }
27
-
28
- function logPath(): string {
29
- return process.env["NLM_HOOK_LOG"] ?? join(homedir(), ".nlm", "hook-log.jsonl");
30
- }
31
-
32
- export function appendHookLog(entry: HookLogEntry): void {
33
- try {
34
- const path = logPath();
35
- mkdirSync(dirname(path), { recursive: true });
36
- // Sync I/O: hook is a short-lived process — async write could be lost on exit.
37
- appendFileSync(path, `${JSON.stringify(entry)}\n`, "utf8");
38
- } catch {
39
- // Telemetry failure must never break the hook.
40
- }
41
- }
@@ -1,164 +0,0 @@
1
- /**
2
- * Idle sweep for per-conversation hook memo files.
3
- *
4
- * The SessionEnd hook is best-effort — Claude Code doesn't fire it on
5
- * crashes, kill -9, or IDE force-close. Without a backstop, memo files
6
- * at ~/.nlm/hook-state/<conv>.json accumulate forever for any session
7
- * that didn't close cleanly.
8
- *
9
- * This sweep is the daemon-side backstop. It runs on a timer, scans the
10
- * state directory, and deletes any memo whose mtime is older than the
11
- * dormant threshold. Reuses the same `age > day` threshold the dataset
12
- * builder uses to mark runtimes as "dormant" so the UI/dataset semantics
13
- * stay consistent across the system.
14
- *
15
- * Hooks are fast-path; this is the always-correct backstop.
16
- */
17
-
18
- import { existsSync, readdirSync, rmSync, statSync } from "node:fs";
19
- import { homedir } from "node:os";
20
- import { join } from "node:path";
21
-
22
- // Mirrors the dormant threshold in build-dataset.ts:
23
- // age <= hour → "active"
24
- // age <= day → "idle"
25
- // age > day → "dormant"
26
- // We sweep memos that are dormant.
27
- const DEFAULT_DORMANT_MS = 24 * 60 * 60 * 1000;
28
- const DEFAULT_INTERVAL_MS = 5 * 60 * 1000;
29
-
30
- export interface MemoSweepOptions {
31
- /** Directory holding per-conversation memo files. Defaults to ~/.nlm/hook-state/. */
32
- readonly stateDir?: string;
33
- /** Age threshold in ms beyond which a memo is swept. Default 24h (dormant). */
34
- readonly dormantMs?: number;
35
- /** Tick interval in ms. Default 5 min. */
36
- readonly intervalMs?: number;
37
- /** Defaults to console.error. Set to a noop in tests. */
38
- readonly logger?: (msg: string) => void;
39
- /** Override for time source — for deterministic tests. */
40
- readonly now?: () => number;
41
- }
42
-
43
- export interface SweepReport {
44
- readonly scanned: number;
45
- readonly deleted: number;
46
- readonly kept: number;
47
- readonly errors: number;
48
- }
49
-
50
- function defaultStateDir(): string {
51
- return process.env["NLM_HOOK_STATE_DIR"] ?? join(homedir(), ".nlm", "hook-state");
52
- }
53
-
54
- /**
55
- * One-shot sweep. Returns the report; safe to call from tests or one-off
56
- * CLI invocations without standing up the scheduler.
57
- */
58
- export function sweepMemoDir(opts: MemoSweepOptions = {}): SweepReport {
59
- const stateDir = opts.stateDir ?? defaultStateDir();
60
- const dormantMs = opts.dormantMs ?? DEFAULT_DORMANT_MS;
61
- const now = opts.now ?? Date.now;
62
- const logger = opts.logger ?? ((msg) => console.error(msg));
63
-
64
- if (!existsSync(stateDir)) {
65
- return { scanned: 0, deleted: 0, kept: 0, errors: 0 };
66
- }
67
-
68
- let entries: ReadonlyArray<string>;
69
- try {
70
- entries = readdirSync(stateDir);
71
- } catch (e) {
72
- logger(`[memo-sweep] readdir failed for ${stateDir}: ${e instanceof Error ? e.message : String(e)}`);
73
- return { scanned: 0, deleted: 0, kept: 0, errors: 1 };
74
- }
75
-
76
- const cutoff = now() - dormantMs;
77
- let deleted = 0;
78
- let kept = 0;
79
- let errors = 0;
80
-
81
- for (const name of entries) {
82
- if (!name.endsWith(".json")) {
83
- // Don't touch files we don't own (kept silently, don't even count).
84
- continue;
85
- }
86
- const path = join(stateDir, name);
87
- try {
88
- const stat = statSync(path);
89
- if (stat.mtimeMs < cutoff) {
90
- rmSync(path);
91
- deleted += 1;
92
- } else {
93
- kept += 1;
94
- }
95
- } catch (e) {
96
- errors += 1;
97
- logger(`[memo-sweep] failed on ${path}: ${e instanceof Error ? e.message : String(e)}`);
98
- }
99
- }
100
-
101
- return { scanned: deleted + kept + errors, deleted, kept, errors };
102
- }
103
-
104
- /**
105
- * Periodic sweep loop. Mirrors ScanScheduler's start/stop shape so the
106
- * daemon can manage it the same way. First tick fires immediately on
107
- * start() — the daemon picking up after a long downtime should sweep
108
- * accumulated memos right away, not wait an interval.
109
- */
110
- export class MemoSweepScheduler {
111
- private readonly opts: Required<Omit<MemoSweepOptions, "stateDir" | "now">> & {
112
- readonly stateDir: string | undefined;
113
- readonly now: (() => number) | undefined;
114
- };
115
- private stopped = true;
116
- private timer: NodeJS.Timeout | null = null;
117
-
118
- constructor(opts: MemoSweepOptions = {}) {
119
- this.opts = {
120
- stateDir: opts.stateDir,
121
- dormantMs: opts.dormantMs ?? DEFAULT_DORMANT_MS,
122
- intervalMs: opts.intervalMs ?? DEFAULT_INTERVAL_MS,
123
- logger: opts.logger ?? ((msg) => console.error(msg)),
124
- now: opts.now,
125
- };
126
- }
127
-
128
- start(): void {
129
- if (!this.stopped) return;
130
- this.stopped = false;
131
- this.scheduleNext(0);
132
- }
133
-
134
- stop(): void {
135
- this.stopped = true;
136
- if (this.timer) {
137
- clearTimeout(this.timer);
138
- this.timer = null;
139
- }
140
- }
141
-
142
- tick(): SweepReport {
143
- return sweepMemoDir({
144
- dormantMs: this.opts.dormantMs,
145
- logger: this.opts.logger,
146
- ...(this.opts.stateDir !== undefined ? { stateDir: this.opts.stateDir } : {}),
147
- ...(this.opts.now !== undefined ? { now: this.opts.now } : {}),
148
- });
149
- }
150
-
151
- private scheduleNext(delayMs: number): void {
152
- if (this.stopped) return;
153
- this.timer = setTimeout(() => {
154
- try {
155
- this.tick();
156
- } catch (e) {
157
- this.opts.logger(`[memo-sweep] tick crashed: ${e instanceof Error ? e.message : String(e)}`);
158
- }
159
- this.scheduleNext(this.opts.intervalMs);
160
- }, delayMs);
161
- // Don't keep the event loop alive just for the sweep.
162
- if (this.timer && typeof this.timer.unref === "function") this.timer.unref();
163
- }
164
- }