claude-memory-layer 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.claude/settings.local.json +11 -0
  2. package/README.md +2 -0
  3. package/dist/cli/index.js +87 -17
  4. package/dist/cli/index.js.map +2 -2
  5. package/dist/core/index.js +30 -5
  6. package/dist/core/index.js.map +2 -2
  7. package/dist/hooks/post-tool-use.js +117 -18
  8. package/dist/hooks/post-tool-use.js.map +2 -2
  9. package/dist/hooks/semantic-daemon.js +7337 -0
  10. package/dist/hooks/semantic-daemon.js.map +7 -0
  11. package/dist/hooks/session-end.js +71 -16
  12. package/dist/hooks/session-end.js.map +2 -2
  13. package/dist/hooks/session-start.js +156 -24
  14. package/dist/hooks/session-start.js.map +4 -4
  15. package/dist/hooks/stop.js +101 -18
  16. package/dist/hooks/stop.js.map +2 -2
  17. package/dist/hooks/user-prompt-submit.js +291 -102
  18. package/dist/hooks/user-prompt-submit.js.map +4 -4
  19. package/dist/server/api/index.js +71 -16
  20. package/dist/server/api/index.js.map +2 -2
  21. package/dist/server/index.js +71 -16
  22. package/dist/server/index.js.map +2 -2
  23. package/dist/services/memory-service.js +71 -16
  24. package/dist/services/memory-service.js.map +2 -2
  25. package/dist/ui/app.js +48 -1
  26. package/dist/ui/index.html +11 -3
  27. package/memory/_index.md +1 -0
  28. package/memory/agent_response/uncategorized/2026-03-04.md +1138 -1
  29. package/memory/session_summary/uncategorized/2026-03-04.md +31 -0
  30. package/memory/tool_observation/uncategorized/2026-03-04.md +785 -1
  31. package/memory/user_prompt/uncategorized/2026-03-04.md +438 -1
  32. package/package.json +1 -1
  33. package/scripts/build.ts +2 -1
  34. package/specs/selective-tool-observation/context.md +100 -0
  35. package/specs/selective-tool-observation/plan.md +158 -0
  36. package/specs/selective-tool-observation/spec.md +127 -0
  37. package/src/cli/index.ts +1 -0
  38. package/src/core/embedder.ts +15 -4
  39. package/src/core/sqlite-event-store.ts +16 -0
  40. package/src/core/turn-state.ts +48 -0
  41. package/src/core/types.ts +1 -0
  42. package/src/hooks/post-tool-use.ts +47 -2
  43. package/src/hooks/semantic-daemon-client.ts +208 -0
  44. package/src/hooks/semantic-daemon.ts +276 -0
  45. package/src/hooks/session-start.ts +7 -0
  46. package/src/hooks/stop.ts +19 -4
  47. package/src/hooks/user-prompt-submit.ts +48 -40
  48. package/src/services/memory-service.ts +59 -16
  49. package/src/services/session-history-importer.ts +18 -0
  50. package/src/ui/app.js +48 -1
  51. package/src/ui/index.html +11 -3
@@ -0,0 +1,276 @@
1
+ #!/usr/bin/env node
2
+
3
+ import * as fs from 'fs';
4
+ import * as net from 'net';
5
+ import * as os from 'os';
6
+ import * as path from 'path';
7
+ import { MemoryService, getProjectStoragePath, getSessionProject } from '../services/memory-service.js';
8
+
9
+ interface SemanticDaemonRequest {
10
+ type?: 'retrieve';
11
+ sessionId?: string;
12
+ prompt?: string;
13
+ topK?: number;
14
+ minScore?: number;
15
+ }
16
+
17
+ interface SemanticMemory {
18
+ type: string;
19
+ content: string;
20
+ id?: string;
21
+ score?: number;
22
+ }
23
+
24
+ interface SemanticDaemonResponse {
25
+ ok: boolean;
26
+ memories?: SemanticMemory[];
27
+ error?: string;
28
+ }
29
+
30
+ const SOCKET_PATH = process.env.CLAUDE_MEMORY_SEMANTIC_SOCKET || path.join(
31
+ os.homedir(),
32
+ '.claude-code',
33
+ 'memory',
34
+ 'semantic-daemon.sock'
35
+ );
36
+
37
+ const IDLE_TIMEOUT_MS = parseInt(process.env.CLAUDE_MEMORY_SEMANTIC_DAEMON_IDLE_MS || '600000');
38
+ const serviceCache = new Map<string, MemoryService>();
39
+
40
+ let server: net.Server | null = null;
41
+ let idleTimer: NodeJS.Timeout | null = null;
42
+ let shuttingDown = false;
43
+
44
+ function scheduleIdleShutdown(): void {
45
+ if (idleTimer) {
46
+ clearTimeout(idleTimer);
47
+ }
48
+
49
+ idleTimer = setTimeout(() => {
50
+ shutdown(0).catch(() => {
51
+ process.exit(0);
52
+ });
53
+ }, IDLE_TIMEOUT_MS);
54
+ idleTimer.unref();
55
+ }
56
+
57
+ function parseRequest(raw: string): SemanticDaemonRequest {
58
+ try {
59
+ return JSON.parse(raw) as SemanticDaemonRequest;
60
+ } catch {
61
+ return {};
62
+ }
63
+ }
64
+
65
+ function isValidRequest(input: SemanticDaemonRequest): input is Required<SemanticDaemonRequest> {
66
+ return input.type === 'retrieve'
67
+ && typeof input.sessionId === 'string'
68
+ && input.sessionId.length > 0
69
+ && typeof input.prompt === 'string'
70
+ && input.prompt.length > 0
71
+ && Number.isFinite(input.topK)
72
+ && Number.isFinite(input.minScore);
73
+ }
74
+
75
+ function makeErrorResponse(error: unknown): SemanticDaemonResponse {
76
+ return { ok: false, error: error instanceof Error ? error.message : 'unknown daemon error' };
77
+ }
78
+
79
+ function isVectorSessionFilterError(error: unknown): boolean {
80
+ const message = error instanceof Error ? error.message.toLowerCase() : '';
81
+ return message.includes('no field named sessionid');
82
+ }
83
+
84
+ function getServiceForSession(sessionId: string): MemoryService {
85
+ const projectInfo = getSessionProject(sessionId);
86
+ const key = projectInfo?.projectHash || '__global__';
87
+
88
+ if (serviceCache.has(key)) {
89
+ return serviceCache.get(key)!;
90
+ }
91
+
92
+ const service = new MemoryService({
93
+ storagePath: projectInfo
94
+ ? getProjectStoragePath(projectInfo.projectPath)
95
+ : path.join(os.homedir(), '.claude-code', 'memory'),
96
+ projectHash: projectInfo?.projectHash,
97
+ projectPath: projectInfo?.projectPath,
98
+ readOnly: false,
99
+ embeddingOnly: true,
100
+ analyticsEnabled: false,
101
+ sharedStoreConfig: { enabled: false }
102
+ });
103
+
104
+ serviceCache.set(key, service);
105
+ return service;
106
+ }
107
+
108
+ async function handleRequest(raw: string): Promise<SemanticDaemonResponse> {
109
+ const input = parseRequest(raw);
110
+ if (!isValidRequest(input)) {
111
+ return { ok: false, error: 'invalid request' };
112
+ }
113
+
114
+ try {
115
+ const service = getServiceForSession(input.sessionId);
116
+ let result;
117
+ try {
118
+ result = await service.retrieveMemories(input.prompt, {
119
+ topK: input.topK,
120
+ minScore: input.minScore,
121
+ sessionId: input.sessionId,
122
+ intentRewrite: true,
123
+ adaptiveRerank: true,
124
+ projectScopeMode: 'strict'
125
+ });
126
+ } catch (error) {
127
+ if (!isVectorSessionFilterError(error)) {
128
+ throw error;
129
+ }
130
+
131
+ // LanceDB field-case mismatch can fail sessionId filtering.
132
+ // Retry without session filter and keep project strict scoping.
133
+ result = await service.retrieveMemories(input.prompt, {
134
+ topK: input.topK,
135
+ minScore: input.minScore,
136
+ intentRewrite: true,
137
+ adaptiveRerank: true,
138
+ projectScopeMode: 'strict'
139
+ });
140
+ }
141
+
142
+ const memories = result.memories.map((m) => ({
143
+ type: m.event.eventType,
144
+ content: m.event.content,
145
+ id: m.event.id,
146
+ score: m.score
147
+ }));
148
+
149
+ return { ok: true, memories };
150
+ } catch (error) {
151
+ return makeErrorResponse(error);
152
+ }
153
+ }
154
+
155
+ function createServer(): net.Server {
156
+ return net.createServer({ allowHalfOpen: true }, (socket) => {
157
+ scheduleIdleShutdown();
158
+ socket.setEncoding('utf8');
159
+
160
+ let requestRaw = '';
161
+
162
+ socket.on('data', (chunk) => {
163
+ requestRaw += chunk;
164
+ if (requestRaw.length > 1024 * 1024) {
165
+ socket.end(JSON.stringify({ ok: false, error: 'request too large' }));
166
+ }
167
+ });
168
+
169
+ socket.on('end', async () => {
170
+ const response = await handleRequest(requestRaw);
171
+ socket.end(JSON.stringify(response));
172
+ scheduleIdleShutdown();
173
+ });
174
+
175
+ socket.on('error', () => {
176
+ // Ignore per-socket errors to keep daemon process alive.
177
+ });
178
+ });
179
+ }
180
+
181
+ async function socketInUse(p: string): Promise<boolean> {
182
+ if (!fs.existsSync(p)) return false;
183
+ return new Promise((resolve) => {
184
+ let settled = false;
185
+ const client = net.createConnection(p);
186
+ const done = (alive: boolean) => {
187
+ if (settled) return;
188
+ settled = true;
189
+ client.destroy();
190
+ resolve(alive);
191
+ };
192
+ client.on('connect', () => done(true));
193
+ client.on('error', () => done(false));
194
+ setTimeout(() => done(false), 120).unref();
195
+ });
196
+ }
197
+
198
+ async function listenServer(): Promise<void> {
199
+ const socketDir = path.dirname(SOCKET_PATH);
200
+ if (!fs.existsSync(socketDir)) {
201
+ fs.mkdirSync(socketDir, { recursive: true });
202
+ }
203
+
204
+ if (await socketInUse(SOCKET_PATH)) {
205
+ process.exit(0);
206
+ }
207
+
208
+ if (fs.existsSync(SOCKET_PATH)) {
209
+ try {
210
+ fs.unlinkSync(SOCKET_PATH);
211
+ } catch {
212
+ // Ignore stale socket unlink failures.
213
+ }
214
+ }
215
+
216
+ server = createServer();
217
+
218
+ await new Promise<void>((resolve, reject) => {
219
+ if (!server) {
220
+ reject(new Error('daemon server not initialized'));
221
+ return;
222
+ }
223
+
224
+ server.once('error', reject);
225
+ server.listen(SOCKET_PATH, () => {
226
+ server?.off('error', reject);
227
+ resolve();
228
+ });
229
+ });
230
+ }
231
+
232
+ async function shutdown(code: number): Promise<void> {
233
+ if (shuttingDown) return;
234
+ shuttingDown = true;
235
+
236
+ if (idleTimer) {
237
+ clearTimeout(idleTimer);
238
+ }
239
+
240
+ const closePromises: Promise<void>[] = [];
241
+ for (const service of serviceCache.values()) {
242
+ closePromises.push(service.shutdown().catch(() => undefined));
243
+ }
244
+ await Promise.all(closePromises);
245
+ serviceCache.clear();
246
+
247
+ if (server) {
248
+ await new Promise<void>((resolve) => {
249
+ server?.close(() => resolve());
250
+ });
251
+ }
252
+
253
+ if (fs.existsSync(SOCKET_PATH)) {
254
+ try {
255
+ fs.unlinkSync(SOCKET_PATH);
256
+ } catch {
257
+ // Ignore socket cleanup failure.
258
+ }
259
+ }
260
+
261
+ process.exit(code);
262
+ }
263
+
264
+ async function main(): Promise<void> {
265
+ await listenServer();
266
+ scheduleIdleShutdown();
267
+ }
268
+
269
+ process.on('SIGINT', () => { shutdown(0).catch(() => process.exit(0)); });
270
+ process.on('SIGTERM', () => { shutdown(0).catch(() => process.exit(0)); });
271
+ process.on('uncaughtException', () => { shutdown(1).catch(() => process.exit(1)); });
272
+ process.on('unhandledRejection', () => { shutdown(1).catch(() => process.exit(1)); });
273
+
274
+ main().catch(() => {
275
+ process.exit(1);
276
+ });
@@ -8,6 +8,7 @@ import {
8
8
  getLightweightMemoryService,
9
9
  registerSession
10
10
  } from '../services/memory-service.js';
11
+ import { ensureDaemonRunning } from './semantic-daemon-client.js';
11
12
  import type { SessionStartInput, SessionStartOutput } from '../core/types.js';
12
13
 
13
14
  async function main(): Promise<void> {
@@ -18,6 +19,12 @@ async function main(): Promise<void> {
18
19
  // Register session with project path for other hooks to find
19
20
  registerSession(input.session_id, input.cwd);
20
21
 
22
+ // Start semantic daemon in the background (non-blocking) so VectorWorker
23
+ // can process any pending embedding_outbox items immediately.
24
+ ensureDaemonRunning().catch(() => {
25
+ // Ignore - daemon will start on first prompt if needed
26
+ });
27
+
21
28
  // Use lightweight service to avoid starting background workers in hook process
22
29
  const memoryService = getLightweightMemoryService(input.session_id);
23
30
 
package/src/hooks/stop.ts CHANGED
@@ -17,7 +17,7 @@ import * as fs from 'fs';
17
17
  import * as readline from 'readline';
18
18
  import { getLightweightMemoryService } from '../services/memory-service.js';
19
19
  import { applyPrivacyFilter } from '../core/privacy/index.js';
20
- import { readTurnState, clearTurnState } from '../core/turn-state.js';
20
+ import { readTurnState, clearTurnState, writeLastAssistantSnippet } from '../core/turn-state.js';
21
21
  import type { StopInput, Config } from '../core/types.js';
22
22
 
23
23
  // Default privacy config
@@ -94,8 +94,16 @@ async function main(): Promise<void> {
94
94
  // Read assistant messages from transcript
95
95
  const assistantMessages = await extractAssistantMessages(input.transcript_path);
96
96
 
97
+ const MIN_AGENT_RESPONSE_LEN = parseInt(
98
+ process.env.CLAUDE_MEMORY_AGENT_RESPONSE_MIN_LEN || '150'
99
+ );
100
+ const lastIdx = assistantMessages.length - 1;
101
+
97
102
  // Store each assistant response
98
- for (const text of assistantMessages) {
103
+ for (let i = 0; i < assistantMessages.length; i++) {
104
+ const text = assistantMessages[i];
105
+ const isLast = i === lastIdx;
106
+
99
107
  // Apply privacy filter
100
108
  const filterResult = applyPrivacyFilter(text, DEFAULT_PRIVACY_CONFIG);
101
109
  let content = filterResult.content;
@@ -105,8 +113,9 @@ async function main(): Promise<void> {
105
113
  content = content.slice(0, 5000) + '...[truncated]';
106
114
  }
107
115
 
108
- // Skip very short responses (likely just tool calls)
109
- if (content.trim().length < 10) continue;
116
+ // Skip very short responses (likely just tool calls or transition messages)
117
+ // Always store the last message (may be the final answer)
118
+ if (!isLast && content.trim().length < MIN_AGENT_RESPONSE_LEN) continue;
110
119
 
111
120
  await memoryService.storeAgentResponse(
112
121
  input.session_id,
@@ -118,6 +127,12 @@ async function main(): Promise<void> {
118
127
  );
119
128
  }
120
129
 
130
+ // Save last assistant response snippet for next-turn retrieval context enrichment
131
+ if (assistantMessages.length > 0) {
132
+ const lastMessage = assistantMessages[assistantMessages.length - 1];
133
+ writeLastAssistantSnippet(input.session_id, lastMessage);
134
+ }
135
+
121
136
  // Clean up turn state file after processing
122
137
  clearTurnState(input.session_id);
123
138
 
@@ -1,10 +1,15 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
3
  * User Prompt Submit Hook
4
- * Called when user submits a prompt - retrieves relevant memories using fast keyword search
4
+ * Called when user submits a prompt - retrieves relevant memories.
5
5
  *
6
- * Uses SQLite FTS5 for fast keyword-based search (no ML model needed)
7
- * Much faster than vector search (~100ms vs 3-5s)
6
+ * Retrieval mode (CLAUDE_MEMORY_RETRIEVAL_MODE):
7
+ * - keyword (default-fast): SQLite FTS5 only, no ML model (~10ms)
8
+ * - semantic: vector search via long-running semantic daemon (~15-20ms warm)
9
+ * - hybrid: semantic first, keyword fallback (default)
10
+ *
11
+ * The semantic daemon keeps the embedding model in memory across hook invocations,
12
+ * avoiding per-request model initialization (~730ms cold start).
8
13
  *
9
14
  * Turn Grouping: Generates a turn_id and persists it to a state file
10
15
  * so PostToolUse and Stop hooks can associate their events with this turn.
@@ -14,8 +19,9 @@ import { randomUUID } from 'crypto';
14
19
  import * as fs from 'fs';
15
20
  import * as path from 'path';
16
21
  import * as os from 'os';
17
- import { getLightweightMemoryService, getMemoryServiceForSession } from '../services/memory-service.js';
18
- import { writeTurnState } from '../core/turn-state.js';
22
+ import { getLightweightMemoryService } from '../services/memory-service.js';
23
+ import { writeTurnState, readLastAssistantSnippet } from '../core/turn-state.js';
24
+ import { retrieveSemanticMemories } from './semantic-daemon-client.js';
19
25
  import type { UserPromptSubmitInput, UserPromptSubmitOutput } from '../core/types.js';
20
26
 
21
27
  // Configuration
@@ -25,7 +31,7 @@ const BASE_MIN_SCORE = parseFloat(process.env.CLAUDE_MEMORY_MIN_SCORE || '0.4');
25
31
  const FALLBACK_MIN_SCORE = parseFloat(process.env.CLAUDE_MEMORY_FALLBACK_MIN_SCORE || '0.3');
26
32
  const ENABLE_SEARCH = process.env.CLAUDE_MEMORY_SEARCH !== 'false';
27
33
  const RETRIEVAL_MODE = (process.env.CLAUDE_MEMORY_RETRIEVAL_MODE || 'hybrid') as 'keyword' | 'semantic' | 'hybrid';
28
- const SEMANTIC_TIMEOUT_MS = parseInt(process.env.CLAUDE_MEMORY_SEMANTIC_TIMEOUT_MS || '1200');
34
+ const SEMANTIC_TIMEOUT_MS = parseInt(process.env.CLAUDE_MEMORY_SEMANTIC_TIMEOUT_MS || '2000');
29
35
  const ADHERENCE_INTERVAL_TURNS = parseInt(process.env.CLAUDE_MEMORY_ADHERENCE_INTERVAL_TURNS || '3');
30
36
 
31
37
  const ADHERENCE_STATE_DIR = path.join(os.homedir(), '.claude-code', 'memory');
@@ -59,21 +65,6 @@ function getDynamicMinScore(prompt: string): number {
59
65
  return BASE_MIN_SCORE;
60
66
  }
61
67
 
62
- function withTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
63
- return new Promise((resolve, reject) => {
64
- const timer = setTimeout(() => reject(new Error(`semantic retrieval timeout (${timeoutMs}ms)`)), timeoutMs);
65
- promise
66
- .then((result) => {
67
- clearTimeout(timer);
68
- resolve(result);
69
- })
70
- .catch((error) => {
71
- clearTimeout(timer);
72
- reject(error);
73
- });
74
- });
75
- }
76
-
77
68
  function formatMemoryContext(items: Array<{ type: string; content: string }>): string {
78
69
  if (items.length === 0) return '';
79
70
  const lines = items.map((m) => {
@@ -196,6 +187,12 @@ async function main(): Promise<void> {
196
187
  const adherenceDecision = shouldRunAdherenceCheck(currentTurn, input.prompt, adherenceState);
197
188
  logAdherenceDecision(input.session_id, currentTurn, adherenceDecision.run, adherenceDecision.reason);
198
189
 
190
+ // On first turn of a new session, backfill helpfulness for sessions
191
+ // that ended without Stop hook (crash, force-close, etc.)
192
+ if (currentTurn === 1) {
193
+ memoryService.evaluatePendingSessions(input.session_id).catch(() => {});
194
+ }
195
+
199
196
  // Store only non-trivial prompts (skip /commands, short inputs)
200
197
  if (shouldStorePrompt(input.prompt)) {
201
198
  await memoryService.storeUserPrompt(
@@ -214,32 +211,30 @@ async function main(): Promise<void> {
214
211
 
215
212
  // Search strategy: turn-1 always enforce adherence check,
216
213
  // then adaptively enforce on write-intent/topic-shift/interval
217
- if (ENABLE_SEARCH && input.prompt.length > 10 && adherenceDecision.run) {
214
+ const isSlashCommand = input.prompt.trimStart().startsWith('/');
215
+ if (ENABLE_SEARCH && !isSlashCommand && input.prompt.length > 10 && adherenceDecision.run) {
218
216
  const minScore = getDynamicMinScore(input.prompt);
219
217
  let mergedMemories: Array<{ type: string; content: string; id?: string; score?: number }> = [];
220
218
 
219
+ // On turn 2+, enrich the retrieval query with the previous assistant response
220
+ // so short/ambiguous follow-ups ("그거 고쳐줘") resolve correctly.
221
+ const lastSnippet = currentTurn > 1 ? readLastAssistantSnippet(input.session_id) : null;
222
+ const retrievalQuery = lastSnippet
223
+ ? `${lastSnippet}\n\n${input.prompt}`
224
+ : input.prompt;
225
+
221
226
  const canUseSemantic = RETRIEVAL_MODE === 'semantic' || RETRIEVAL_MODE === 'hybrid';
222
227
  if (canUseSemantic) {
223
228
  try {
224
- const semanticService = getMemoryServiceForSession(input.session_id);
225
- const semantic = await withTimeout(
226
- semanticService.retrieveMemories(input.prompt, {
227
- topK: MAX_MEMORIES,
228
- minScore,
229
+ mergedMemories = await retrieveSemanticMemories(
230
+ {
229
231
  sessionId: input.session_id,
230
- intentRewrite: true,
231
- adaptiveRerank: true,
232
- projectScopeMode: 'strict'
233
- }),
232
+ prompt: retrievalQuery,
233
+ topK: MAX_MEMORIES,
234
+ minScore
235
+ },
234
236
  SEMANTIC_TIMEOUT_MS
235
237
  );
236
-
237
- mergedMemories = semantic.memories.map((m) => ({
238
- type: m.event.eventType,
239
- content: m.event.content,
240
- id: m.event.id,
241
- score: m.score
242
- }));
243
238
  } catch {
244
239
  // Semantic retrieval is best-effort; fallback below handles the rest
245
240
  }
@@ -251,14 +246,14 @@ async function main(): Promise<void> {
251
246
  mergedMemories.length === 0;
252
247
 
253
248
  if (shouldUseKeywordFallback && mergedMemories.length < MAX_MEMORIES) {
254
- let results = await memoryService.keywordSearch(input.prompt, {
249
+ let results = await memoryService.keywordSearch(retrievalQuery, {
255
250
  topK: MAX_MEMORIES,
256
251
  minScore
257
252
  });
258
253
 
259
254
  // recall rescue: if nothing found at tuned threshold, retry with fallback floor
260
255
  if (results.length === 0 && FALLBACK_MIN_SCORE < minScore) {
261
- results = await memoryService.keywordSearch(input.prompt, {
256
+ results = await memoryService.keywordSearch(retrievalQuery, {
262
257
  topK: MAX_MEMORIES,
263
258
  minScore: FALLBACK_MIN_SCORE
264
259
  });
@@ -299,6 +294,19 @@ async function main(): Promise<void> {
299
294
 
300
295
  context = formatMemoryContext(mergedMemories);
301
296
  }
297
+
298
+ // Record query-level trace for dashboard stats (retrieval_traces table)
299
+ const allCandidateIds = mergedMemories.map((m) => m.id).filter((v): v is string => Boolean(v));
300
+ try {
301
+ await memoryService.recordQueryTrace({
302
+ sessionId: input.session_id,
303
+ queryText: retrievalQuery,
304
+ strategy: RETRIEVAL_MODE,
305
+ candidateEventIds: allCandidateIds,
306
+ selectedEventIds: allCandidateIds,
307
+ confidence: mergedMemories.length > 0 ? 'medium' : 'none'
308
+ });
309
+ } catch { /* non-critical */ }
302
310
  }
303
311
 
304
312
  writeAdherenceState({
@@ -61,6 +61,8 @@ export interface MemoryServiceConfig {
61
61
  analyticsEnabled?: boolean;
62
62
  /** Lightweight mode for hooks - skip heavy initialization (default: false) */
63
63
  lightweightMode?: boolean;
64
+ /** Start only VectorWorker, skip GraduationWorker and SyncWorker (default: false) */
65
+ embeddingOnly?: boolean;
64
66
  }
65
67
 
66
68
  // ============================================================
@@ -212,6 +214,7 @@ export class MemoryService {
212
214
 
213
215
  private readonly readOnly: boolean;
214
216
  private readonly lightweightMode: boolean;
217
+ private readonly embeddingOnly: boolean;
215
218
  private readonly mdMirror: MarkdownMirror;
216
219
  private readonly storagePath: string;
217
220
 
@@ -220,6 +223,7 @@ export class MemoryService {
220
223
  this.storagePath = storagePath;
221
224
  this.readOnly = config.readOnly ?? false;
222
225
  this.lightweightMode = config.lightweightMode ?? false;
226
+ this.embeddingOnly = config.embeddingOnly ?? false;
223
227
  this.mdMirror = new MarkdownMirror(process.cwd());
224
228
 
225
229
  // Ensure storage directory exists (only if not read-only)
@@ -325,24 +329,26 @@ export class MemoryService {
325
329
  );
326
330
  this.vectorWorker.start();
327
331
 
328
- // Connect graduation pipeline to retriever for access tracking
329
- this.retriever.setGraduationPipeline(this.graduation);
332
+ if (!this.embeddingOnly) {
333
+ // Connect graduation pipeline to retriever for access tracking
334
+ this.retriever.setGraduationPipeline(this.graduation);
330
335
 
331
- // Start graduation worker for automatic level promotion
332
- this.graduationWorker = createGraduationWorker(
333
- this.sqliteStore as unknown as EventStore,
334
- this.graduation
335
- );
336
- this.graduationWorker.start();
337
-
338
- // Start sync worker (SQLite -> DuckDB) if analytics store is available
339
- if (this.analyticsStore) {
340
- this.syncWorker = new SyncWorker(
341
- this.sqliteStore,
342
- this.analyticsStore,
343
- { intervalMs: 30000, batchSize: 500 }
336
+ // Start graduation worker for automatic level promotion
337
+ this.graduationWorker = createGraduationWorker(
338
+ this.sqliteStore as unknown as EventStore,
339
+ this.graduation
344
340
  );
345
- this.syncWorker.start();
341
+ this.graduationWorker.start();
342
+
343
+ // Start sync worker (SQLite -> DuckDB) if analytics store is available
344
+ if (this.analyticsStore) {
345
+ this.syncWorker = new SyncWorker(
346
+ this.sqliteStore,
347
+ this.analyticsStore,
348
+ { intervalMs: 30000, batchSize: 500 }
349
+ );
350
+ this.syncWorker.start();
351
+ }
346
352
  }
347
353
 
348
354
  // Load endless mode setting
@@ -1254,6 +1260,27 @@ export class MemoryService {
1254
1260
  await this.sqliteStore.recordRetrieval(eventId, sessionId, score, query);
1255
1261
  }
1256
1262
 
1263
+ /**
1264
+ * Record a query-level retrieval trace (used by user-prompt-submit hook).
1265
+ * Feeds the retrieval_traces table that powers dashboard stats.
1266
+ */
1267
+ async recordQueryTrace(input: {
1268
+ sessionId: string;
1269
+ queryText: string;
1270
+ strategy: string;
1271
+ candidateEventIds: string[];
1272
+ selectedEventIds: string[];
1273
+ confidence: string;
1274
+ }): Promise<void> {
1275
+ await this.initialize();
1276
+ await this.sqliteStore.recordRetrievalTrace({
1277
+ ...input,
1278
+ candidateDetails: [],
1279
+ selectedDetails: [],
1280
+ fallbackTrace: [],
1281
+ });
1282
+ }
1283
+
1257
1284
  /**
1258
1285
  * Evaluate helpfulness of retrievals in a session (called at session end)
1259
1286
  */
@@ -1262,6 +1289,22 @@ export class MemoryService {
1262
1289
  await this.sqliteStore.evaluateSessionHelpfulness(sessionId);
1263
1290
  }
1264
1291
 
1292
+ /**
1293
+ * Backfill helpfulness evaluation for sessions that ended without Stop hook.
1294
+ * Call on first turn of a new session to catch missed evaluations.
1295
+ */
1296
+ async evaluatePendingSessions(currentSessionId: string): Promise<void> {
1297
+ await this.initialize();
1298
+ const sessions = await this.sqliteStore.getUnevaluatedSessions(currentSessionId, 5);
1299
+ for (const sid of sessions) {
1300
+ try {
1301
+ await this.sqliteStore.evaluateSessionHelpfulness(sid);
1302
+ } catch {
1303
+ // non-critical, skip failed
1304
+ }
1305
+ }
1306
+ }
1307
+
1265
1308
  /**
1266
1309
  * Get most helpful memories ranked by helpfulness score
1267
1310
  */
@@ -59,6 +59,18 @@ export interface ClaudeMessage {
59
59
  * - 'thinking': Assistant thinking (thinking blocks)
60
60
  * - 'skip': Everything else (progress, system, summary, etc.)
61
61
  */
62
+ /**
63
+ * Filter trivial user inputs that aren't worth storing.
64
+ * Mirrors the shouldStorePrompt() logic from user-prompt-submit.ts.
65
+ */
66
+ function isWorthStoringPrompt(content: string): boolean {
67
+ const trimmed = content.trim();
68
+ if (trimmed.startsWith('/')) return false;
69
+ if (trimmed.length < 15) return false;
70
+ if (!/[a-zA-Z가-힣]{2,}/.test(trimmed)) return false;
71
+ return true;
72
+ }
73
+
62
74
  function classifyEntry(entry: ClaudeMessage): 'user_prompt' | 'tool_result' | 'agent_text' | 'tool_use' | 'thinking' | 'skip' {
63
75
  if (entry.type !== 'user' && entry.type !== 'assistant') {
64
76
  return 'skip';
@@ -283,6 +295,12 @@ export class SessionHistoryImporter {
283
295
  const content = this.extractContent(entry);
284
296
  if (!content) continue;
285
297
 
298
+ // Skip trivial inputs: slash commands, very short, no real words
299
+ if (!isWorthStoringPrompt(content)) {
300
+ result.skippedDuplicates++;
301
+ continue;
302
+ }
303
+
286
304
  // New turn starts with each real user prompt
287
305
  currentTurnId = randomUUID();
288
306