@aeriondyseti/vector-memory-mcp 2.4.4 → 2.5.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,42 @@
1
1
  import { randomUUID, createHash } from "crypto";
2
+ import { basename } from "path";
2
3
  import type { Memory, SearchIntent, IntentProfile, HybridRow } from "./memory";
3
- import { isDeleted } from "./memory";
4
- import type { SearchResult, SearchOptions } from "./conversation";
4
+ import { isDeleted, computeConfidence } from "./memory";
5
+ import type { SearchResult, SearchOptions, HistoryFilters } from "./conversation";
5
6
  import type { MemoryRepository } from "./memory.repository";
6
7
  import type { EmbeddingsService } from "./embeddings.service";
7
8
  import type { ConversationHistoryService } from "./conversation.service";
9
+ import { normalizeProject } from "./project";
8
10
 
11
+ // Jitter values halved from original (0.02/0.05/0.15) because RRF_K=10 produces
12
+ // ~6x more score spread than K=60, amplifying jitter's disruption effect.
9
13
  const INTENT_PROFILES: Record<SearchIntent, IntentProfile> = {
10
- continuity: { weights: { relevance: 0.3, recency: 0.5, utility: 0.2 }, jitter: 0.02 },
11
- fact_check: { weights: { relevance: 0.6, recency: 0.1, utility: 0.3 }, jitter: 0.02 },
12
- frequent: { weights: { relevance: 0.2, recency: 0.2, utility: 0.6 }, jitter: 0.02 },
13
- associative: { weights: { relevance: 0.7, recency: 0.1, utility: 0.2 }, jitter: 0.05 },
14
- explore: { weights: { relevance: 0.4, recency: 0.3, utility: 0.3 }, jitter: 0.15 },
14
+ continuity: { weights: { relevance: 0.3, recency: 0.5, utility: 0.2 }, jitter: 0.01 },
15
+ fact_check: { weights: { relevance: 0.6, recency: 0.1, utility: 0.3 }, jitter: 0.01 },
16
+ frequent: { weights: { relevance: 0.2, recency: 0.2, utility: 0.6 }, jitter: 0.01 },
17
+ associative: { weights: { relevance: 0.7, recency: 0.1, utility: 0.2 }, jitter: 0.025 },
18
+ explore: { weights: { relevance: 0.4, recency: 0.3, utility: 0.3 }, jitter: 0.08 },
15
19
  };
16
20
 
17
21
  const sigmoid = (x: number): number => 1 / (1 + Math.exp(-x));
18
22
 
23
+ // Modest same-project ranking boost for scope:"all" searches — same-repo
24
+ // memories win ties without hiding cross-project results.
25
+ const CURRENT_PROJECT_BOOST = 1.15;
26
+
19
27
  export class MemoryService {
20
28
  private conversationService: ConversationHistoryService | null = null;
21
29
 
22
30
  constructor(
23
31
  private repository: MemoryRepository,
24
- private embeddings: EmbeddingsService
32
+ private embeddings: EmbeddingsService,
33
+ private project: string | null = null
25
34
  ) {}
26
35
 
36
+ getProject(): string | null {
37
+ return this.project;
38
+ }
39
+
27
40
  setConversationService(service: ConversationHistoryService): void {
28
41
  this.conversationService = service;
29
42
  }
@@ -43,7 +56,8 @@ export class MemoryService {
43
56
  async store(
44
57
  content: string,
45
58
  metadata: Record<string, unknown> = {},
46
- embeddingText?: string
59
+ embeddingText?: string,
60
+ project?: string
47
61
  ): Promise<Memory> {
48
62
  const id = randomUUID();
49
63
  const now = new Date();
@@ -61,6 +75,7 @@ export class MemoryService {
61
75
  usefulness: 0,
62
76
  accessCount: 0,
63
77
  lastAccessed: now, // Initialize to createdAt for fair discovery
78
+ project: project !== undefined ? normalizeProject(project) : this.project,
64
79
  };
65
80
 
66
81
  await this.repository.insert(memory);
@@ -198,11 +213,51 @@ export class MemoryService {
198
213
  // Widen the candidate pool to account for offset
199
214
  const effectiveLimit = offset + limit;
200
215
 
216
+ // Resolve project scope: "all" = no filter (with same-project ranking
217
+ // boost), "project" = current project, anything else = explicit path.
218
+ const scope = options?.scope ?? "all";
219
+ const projectFilter: string | undefined =
220
+ scope === "all"
221
+ ? undefined
222
+ : scope === "project"
223
+ ? (this.project ?? undefined)
224
+ : normalizeProject(scope);
225
+
226
+ const hasDateFilters = options?.after || options?.before;
227
+ const memoryFilters =
228
+ hasDateFilters || projectFilter !== undefined
229
+ ? {
230
+ after: options?.after,
231
+ before: options?.before,
232
+ project: projectFilter,
233
+ }
234
+ : undefined;
235
+
236
+ // Merge top-level date filters into history filters so after/before
237
+ // apply uniformly. Explicit history_after/history_before take precedence,
238
+ // as does an explicit historyFilters.project.
239
+ const historyFilters = options?.historyFilters;
240
+ const effectiveHistoryFilters: HistoryFilters | undefined =
241
+ hasDateFilters || projectFilter !== undefined || historyFilters
242
+ ? {
243
+ ...historyFilters,
244
+ after: historyFilters?.after ?? options?.after,
245
+ before: historyFilters?.before ?? options?.before,
246
+ project: historyFilters?.project ?? projectFilter,
247
+ }
248
+ : historyFilters;
249
+
250
+ // Same-project boost only applies to unscoped searches
251
+ const boost = (resultProject: string | null): number =>
252
+ scope === "all" && this.project && resultProject === this.project
253
+ ? CURRENT_PROJECT_BOOST
254
+ : 1;
255
+
201
256
  // Run memory + history queries in parallel
202
257
  const memoryPromise =
203
258
  !historyOnly
204
259
  ? this.repository
205
- .findHybrid(queryEmbedding, query, effectiveLimit * 5)
260
+ .findHybrid(queryEmbedding, query, effectiveLimit * 5, memoryFilters)
206
261
  .then((candidates) =>
207
262
  candidates
208
263
  .filter((m) => includeDeleted || !isDeleted(m))
@@ -213,7 +268,11 @@ export class MemoryService {
213
268
  createdAt: candidate.createdAt,
214
269
  updatedAt: candidate.updatedAt,
215
270
  source: "memory" as const,
216
- score: this.computeMemoryScore(candidate, profile, now),
271
+ score:
272
+ this.computeMemoryScore(candidate, profile, now) *
273
+ boost(candidate.project),
274
+ confidence: computeConfidence(candidate.signals),
275
+ project: candidate.project,
217
276
  supersededBy: candidate.supersededBy,
218
277
  usefulness: candidate.usefulness,
219
278
  accessCount: candidate.accessCount,
@@ -229,23 +288,28 @@ export class MemoryService {
229
288
  query,
230
289
  queryEmbedding,
231
290
  historyOnly ? effectiveLimit * 5 : effectiveLimit * 3,
232
- options?.historyFilters
291
+ effectiveHistoryFilters
233
292
  )
234
293
  .then((historyRows) =>
235
- historyRows.map((row) => ({
236
- id: row.id,
237
- content: row.content,
238
- metadata: row.metadata,
239
- createdAt: row.createdAt,
240
- updatedAt: row.createdAt,
241
- source: "conversation_history" as const,
242
- score: row.rrfScore * historyWeight,
243
- supersededBy: null,
244
- sessionId: (row.metadata?.session_id as string) ?? "",
245
- role: (row.metadata?.role as string) ?? "unknown",
246
- messageIndexStart: (row.metadata?.message_index_start as number) ?? 0,
247
- messageIndexEnd: (row.metadata?.message_index_end as number) ?? 0,
248
- }))
294
+ historyRows.map((row) => {
295
+ const rowProject = (row.metadata?.project as string) ?? null;
296
+ return {
297
+ id: row.id,
298
+ content: row.content,
299
+ metadata: row.metadata,
300
+ createdAt: row.createdAt,
301
+ updatedAt: row.createdAt,
302
+ source: "conversation_history" as const,
303
+ score: row.rrfScore * historyWeight * boost(rowProject),
304
+ confidence: computeConfidence(row.signals),
305
+ project: rowProject,
306
+ supersededBy: null,
307
+ sessionId: (row.metadata?.session_id as string) ?? "",
308
+ role: (row.metadata?.role as string) ?? "unknown",
309
+ messageIndexStart: (row.metadata?.message_index_start as number) ?? 0,
310
+ messageIndexEnd: (row.metadata?.message_index_end as number) ?? 0,
311
+ };
312
+ })
249
313
  )
250
314
  : Promise.resolve([] as SearchResult[]);
251
315
 
@@ -290,8 +354,17 @@ export class MemoryService {
290
354
  ].join("-");
291
355
  }
292
356
 
357
+ /**
358
+ * Resolve a caller-supplied project (possibly a legacy display name or
359
+ * relative value) or fall back to the server's configured project.
360
+ */
361
+ private resolveProject(project?: string): string | undefined {
362
+ if (project && project.trim().length > 0) return normalizeProject(project);
363
+ return this.project ?? undefined;
364
+ }
365
+
293
366
  async setWaypoint(args: {
294
- project: string;
367
+ project?: string;
295
368
  branch?: string;
296
369
  summary: string;
297
370
  completed?: string[];
@@ -306,6 +379,7 @@ export class MemoryService {
306
379
  await this.trackAccess(args.memory_ids);
307
380
  }
308
381
 
382
+ const project = this.resolveProject(args.project);
309
383
  const now = new Date();
310
384
  const date = now.toISOString().slice(0, 10);
311
385
  const time = now.toISOString().slice(11, 16);
@@ -317,7 +391,7 @@ export class MemoryService {
317
391
  return items.map((i) => `- ${i}`).join("\n");
318
392
  };
319
393
 
320
- const content = `# Waypoint - ${args.project}
394
+ const content = `# Waypoint - ${project ?? "unknown project"}
321
395
  **Date:** ${date} ${time} | **Branch:** ${args.branch ?? "unknown"}
322
396
 
323
397
  ## Summary
@@ -341,14 +415,14 @@ ${list(args.memory_ids)}`;
341
415
  const metadata: Record<string, unknown> = {
342
416
  ...(args.metadata ?? {}),
343
417
  type: "waypoint",
344
- project: args.project,
418
+ project: project ?? null,
345
419
  date,
346
420
  branch: args.branch ?? "unknown",
347
421
  memory_ids: args.memory_ids ?? [],
348
422
  };
349
423
 
350
424
  const memory: Memory = {
351
- id: MemoryService.waypointId(args.project),
425
+ id: MemoryService.waypointId(project),
352
426
  content,
353
427
  embedding: new Array(this.embeddings.dimension).fill(0),
354
428
  metadata,
@@ -358,35 +432,83 @@ ${list(args.memory_ids)}`;
358
432
  usefulness: 0,
359
433
  accessCount: 0,
360
434
  lastAccessed: now, // Initialize to now for consistency
435
+ project: project ?? null,
361
436
  };
362
437
 
438
+ // NOTE: deliberately no UUID_ZERO "global latest" copy — in a shared
439
+ // database that becomes last-writer-wins across projects. Readers that
440
+ // don't know their project resolve it from cwd instead.
363
441
  await this.repository.upsert(memory);
364
442
 
365
- // Always update the global (no-project) waypoint so the session-start
366
- // hook can find the most recent waypoint without knowing the project name.
367
- const globalId = MemoryService.UUID_ZERO;
368
- if (memory.id !== globalId) {
369
- await this.repository.upsert({ ...memory, id: globalId });
370
- }
371
-
372
443
  return memory;
373
444
  }
374
445
 
446
+ /**
447
+ * Find the latest waypoint for a project, trying legacy ID schemes in
448
+ * order and migrating hits to the canonical ID:
449
+ * 1. canonical: waypointId(normalized absolute path)
450
+ * 2. legacy skill-supplied display name: waypointId(basename)
451
+ * 3. legacy UUID-formatted IDs for both of the above
452
+ * 4. UUID_ZERO "global latest" — only when its metadata.project matches,
453
+ * so one project's pre-migration waypoint never leaks into another
454
+ */
375
455
  async getLatestWaypoint(project?: string): Promise<Memory | null> {
376
- const waypoint = await this.get(MemoryService.waypointId(project));
377
- if (waypoint) return waypoint;
456
+ const resolved = this.resolveProject(project);
457
+ const canonicalId = MemoryService.waypointId(resolved);
458
+
459
+ const waypoint = await this.get(canonicalId);
460
+ if (waypoint && !isDeleted(waypoint)) return waypoint;
461
+
462
+ const candidateIds: string[] = [];
463
+ if (resolved) {
464
+ const display = basename(resolved);
465
+ candidateIds.push(MemoryService.waypointId(display));
466
+ const legacyPath = MemoryService.legacyWaypointId(resolved);
467
+ if (legacyPath) candidateIds.push(legacyPath);
468
+ const legacyDisplay = MemoryService.legacyWaypointId(display);
469
+ if (legacyDisplay) candidateIds.push(legacyDisplay);
470
+ } else {
471
+ const legacyId = MemoryService.legacyWaypointId(resolved);
472
+ if (legacyId) candidateIds.push(legacyId);
473
+ }
378
474
 
379
- // Fallback: try legacy UUID-formatted waypoint ID and migrate on read
380
- const legacyId = MemoryService.legacyWaypointId(project);
381
- if (!legacyId) return null;
475
+ for (const id of candidateIds) {
476
+ if (id === canonicalId) continue;
477
+ const legacy = await this.repository.findById(id);
478
+ if (!legacy || isDeleted(legacy)) continue;
479
+
480
+ // Migrate: write under canonical ID, delete old
481
+ await this.repository.upsert({
482
+ ...legacy,
483
+ id: canonicalId,
484
+ project: resolved ?? legacy.project,
485
+ });
486
+ await this.repository.markDeleted(id);
487
+ return { ...legacy, id: canonicalId, project: resolved ?? legacy.project };
488
+ }
382
489
 
383
- const legacy = await this.repository.findById(legacyId);
384
- if (!legacy) return null;
490
+ // Last resort: the pre-migration UUID_ZERO copy, guarded by project match
491
+ if (resolved && canonicalId !== MemoryService.UUID_ZERO) {
492
+ const global = await this.repository.findById(MemoryService.UUID_ZERO);
493
+ if (global && !isDeleted(global)) {
494
+ const metaProject = (global.metadata.project as string | undefined) ?? "";
495
+ const matches =
496
+ metaProject.length > 0 &&
497
+ (normalizeProject(metaProject) === resolved ||
498
+ metaProject.trim().toLowerCase() ===
499
+ basename(resolved).toLowerCase());
500
+ if (matches) {
501
+ await this.repository.upsert({
502
+ ...global,
503
+ id: canonicalId,
504
+ project: resolved,
505
+ });
506
+ await this.repository.markDeleted(MemoryService.UUID_ZERO);
507
+ return { ...global, id: canonicalId, project: resolved };
508
+ }
509
+ }
510
+ }
385
511
 
386
- // Migrate: write under new ID, delete old
387
- const newId = MemoryService.waypointId(project);
388
- await this.repository.upsert({ ...legacy, id: newId });
389
- await this.repository.markDeleted(legacyId);
390
- return { ...legacy, id: newId };
512
+ return null;
391
513
  }
392
514
  }
@@ -11,6 +11,8 @@ export interface Memory {
11
11
  usefulness: number;
12
12
  accessCount: number;
13
13
  lastAccessed: Date | null;
14
+ /** Canonical project path this memory belongs to (null = untagged/legacy). */
15
+ project: string | null;
14
16
  }
15
17
 
16
18
  export function isDeleted(memory: Memory): boolean {
@@ -28,6 +30,7 @@ export function memoryToDict(memory: Memory): Record<string, unknown> {
28
30
  usefulness: memory.usefulness,
29
31
  accessCount: memory.accessCount,
30
32
  lastAccessed: memory.lastAccessed?.toISOString() ?? null,
33
+ project: memory.project,
31
34
  };
32
35
  }
33
36
 
@@ -38,7 +41,46 @@ export interface IntentProfile {
38
41
  jitter: number;
39
42
  }
40
43
 
44
+ /** Signals preserved from the hybrid search pipeline for confidence scoring. */
45
+ export interface SearchSignals {
46
+ cosineSimilarity: number | null;
47
+ ftsMatch: boolean;
48
+ knnRank: number | null;
49
+ ftsRank: number | null;
50
+ }
51
+
41
52
  /** Augments any entity type with an RRF score from hybrid search. */
42
- export type WithRrfScore<T> = T & { rrfScore: number };
53
+ export type WithRrfScore<T> = T & { rrfScore: number; signals: SearchSignals };
43
54
 
44
55
  export type HybridRow = WithRrfScore<Memory>;
56
+
57
+ /**
58
+ * Compute absolute confidence (0-1) from search signals.
59
+ *
60
+ * Based primarily on cosine similarity (the strongest absolute signal)
61
+ * mapped through a sigmoid with an agreement bonus for dual-path matches.
62
+ * The midpoint and steepness are calibrated for all-MiniLM-L6-v2 embeddings.
63
+ */
64
+ // Calibrated against all-MiniLM-L6-v2: noise ceiling ~0.25, weak-relevant floor ~0.30
65
+ const CONFIDENCE_STEEPNESS = 14;
66
+ const CONFIDENCE_MIDPOINT = 0.35;
67
+ const CONFIDENCE_AGREEMENT_BONUS = 0.08;
68
+
69
+ export function computeConfidence(signals: SearchSignals): number {
70
+ const sim = signals.cosineSimilarity;
71
+
72
+ if (sim === null) {
73
+ // FTS-only result — keyword match but no semantic confirmation
74
+ return signals.ftsMatch ? 0.40 : 0.0;
75
+ }
76
+
77
+ // Shifted sigmoid: maps cosine similarity to interpretable confidence
78
+ let confidence = 1 / (1 + Math.exp(-CONFIDENCE_STEEPNESS * (sim - CONFIDENCE_MIDPOINT)));
79
+
80
+ // Dual-path agreement bonus: found by both KNN and FTS
81
+ if (signals.ftsMatch) {
82
+ confidence = Math.min(1.0, confidence + CONFIDENCE_AGREEMENT_BONUS);
83
+ }
84
+
85
+ return confidence;
86
+ }
@@ -1,5 +1,8 @@
1
1
  import type { Database } from "bun:sqlite";
2
+ import { readFile } from "fs/promises";
3
+ import { dirname, join } from "path";
2
4
  import type { EmbeddingsService } from "./embeddings.service";
5
+ import { normalizeProject } from "./project";
3
6
  import { serializeVector } from "./sqlite-utils";
4
7
 
5
8
  /**
@@ -62,7 +65,8 @@ export function runMigrations(db: Database): void {
62
65
  superseded_by TEXT,
63
66
  usefulness REAL NOT NULL DEFAULT 0.0,
64
67
  access_count INTEGER NOT NULL DEFAULT 0,
65
- last_accessed INTEGER
68
+ last_accessed INTEGER,
69
+ project TEXT
66
70
  )
67
71
  `);
68
72
 
@@ -109,11 +113,185 @@ export function runMigrations(db: Database): void {
109
113
  )
110
114
  `);
111
115
 
116
+ // -- Conversation index state (replaces conversation_index_state.json) --
117
+ db.exec(`
118
+ CREATE TABLE IF NOT EXISTS conversation_index_state (
119
+ session_id TEXT PRIMARY KEY,
120
+ file_path TEXT NOT NULL,
121
+ project TEXT NOT NULL,
122
+ last_modified INTEGER NOT NULL,
123
+ chunk_count INTEGER NOT NULL,
124
+ message_count INTEGER NOT NULL,
125
+ indexed_at INTEGER NOT NULL,
126
+ first_message_at INTEGER NOT NULL,
127
+ last_message_at INTEGER NOT NULL
128
+ )
129
+ `);
130
+
131
+ // -- Versioned migrations (non-idempotent schema changes) --
132
+ runVersionedMigrations(db);
133
+
112
134
  // -- Indexes --
113
135
  db.exec(`CREATE INDEX IF NOT EXISTS idx_conversation_session_id ON conversation_history(session_id)`);
114
136
  db.exec(`CREATE INDEX IF NOT EXISTS idx_conversation_project ON conversation_history(project)`);
115
137
  db.exec(`CREATE INDEX IF NOT EXISTS idx_conversation_role ON conversation_history(role)`);
116
138
  db.exec(`CREATE INDEX IF NOT EXISTS idx_conversation_created_at ON conversation_history(created_at)`);
139
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_memories_created_at ON memories(created_at)`);
140
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_memories_project ON memories(project)`);
141
+ }
142
+
143
+ /** Current schema version. Bump when adding a versioned migration below. */
144
+ const SCHEMA_VERSION = 1;
145
+
146
+ function getUserVersion(db: Database): number {
147
+ const row = db.prepare("PRAGMA user_version").get() as
148
+ | { user_version: number }
149
+ | null;
150
+ return row?.user_version ?? 0;
151
+ }
152
+
153
+ /**
154
+ * Non-idempotent migrations (e.g. ALTER TABLE) gated by PRAGMA user_version.
155
+ *
156
+ * Concurrency-safe for multiple processes opening the same database: the
157
+ * version is re-checked inside BEGIN IMMEDIATE, so the loser of a startup
158
+ * race blocks on busy_timeout, then sees the bumped version and no-ops.
159
+ */
160
+ function runVersionedMigrations(db: Database): void {
161
+ if (getUserVersion(db) >= SCHEMA_VERSION) return;
162
+
163
+ db.exec("BEGIN IMMEDIATE");
164
+ try {
165
+ const version = getUserVersion(db);
166
+
167
+ if (version < 1) {
168
+ // v1: project column on memories (fresh databases get it via CREATE
169
+ // TABLE above; pre-existing databases need the ALTER).
170
+ const columns = db
171
+ .prepare("PRAGMA table_info(memories)")
172
+ .all() as Array<{ name: string }>;
173
+ if (!columns.some((c) => c.name === "project")) {
174
+ db.exec("ALTER TABLE memories ADD COLUMN project TEXT");
175
+ }
176
+
177
+ // Backfill from metadata where a project was recorded (waypoints).
178
+ // Values are stored raw — they may be legacy display names rather than
179
+ // canonical paths; consolidation re-stamps them with the real project.
180
+ db.exec(`
181
+ UPDATE memories
182
+ SET project = json_extract(metadata, '$.project')
183
+ WHERE project IS NULL
184
+ AND json_extract(metadata, '$.project') IS NOT NULL
185
+ `);
186
+
187
+ db.exec(`PRAGMA user_version = ${SCHEMA_VERSION}`);
188
+ }
189
+
190
+ db.exec("COMMIT");
191
+ } catch (e) {
192
+ db.exec("ROLLBACK");
193
+ throw e;
194
+ }
195
+ }
196
+
197
+ /**
198
+ * Repair legacy `conversation_history.project` values.
199
+ *
200
+ * Rows indexed before the cwd-based parser carry lossy dash-decoded project
201
+ * values (no leading slash; any dash in a directory name decoded as "/").
202
+ * This re-derives the true project from each session file's `cwd` field.
203
+ * Rows whose session file is gone get a best-effort "/" prefix so they gain
204
+ * the canonical-path invariant and are not re-scanned on every startup.
205
+ *
206
+ * Identifies legacy rows by the missing leading slash, so it converges to a
207
+ * no-op once all rows are repaired.
208
+ */
209
+ export async function repairConversationProjects(
210
+ db: Database,
211
+ dbPath: string,
212
+ ): Promise<void> {
213
+ const legacy = db
214
+ .prepare(
215
+ "SELECT DISTINCT session_id FROM conversation_history WHERE project NOT LIKE '/%'",
216
+ )
217
+ .all() as Array<{ session_id: string }>;
218
+
219
+ if (legacy.length === 0) return;
220
+
221
+ // session_id -> file_path, from the index state table or the legacy JSON
222
+ const filePaths = new Map<string, string>();
223
+ const stateRows = db
224
+ .prepare("SELECT session_id, file_path FROM conversation_index_state")
225
+ .all() as Array<{ session_id: string; file_path: string }>;
226
+ for (const row of stateRows) filePaths.set(row.session_id, row.file_path);
227
+
228
+ if (filePaths.size === 0) {
229
+ try {
230
+ const raw = await readFile(
231
+ join(dirname(dbPath), "conversation_index_state.json"),
232
+ "utf-8",
233
+ );
234
+ const entries = JSON.parse(raw) as Array<{
235
+ sessionId: string;
236
+ filePath: string;
237
+ }>;
238
+ for (const e of entries) filePaths.set(e.sessionId, e.filePath);
239
+ } catch {
240
+ // No legacy state file — fall through to best-effort repair
241
+ }
242
+ }
243
+
244
+ console.error(
245
+ `[vector-memory-mcp] Repairing project values for ${legacy.length} legacy sessions...`,
246
+ );
247
+
248
+ const updateExact = db.prepare(`
249
+ UPDATE conversation_history
250
+ SET project = ?, metadata = json_set(metadata, '$.project', ?)
251
+ WHERE session_id = ?
252
+ `);
253
+ const updateBestEffort = db.prepare(`
254
+ UPDATE conversation_history
255
+ SET project = '/' || project
256
+ WHERE session_id = ? AND project NOT LIKE '/%'
257
+ `);
258
+ const updateState = db.prepare(
259
+ "UPDATE conversation_index_state SET project = ? WHERE session_id = ?",
260
+ );
261
+
262
+ for (const { session_id } of legacy) {
263
+ const filePath = filePaths.get(session_id);
264
+ const cwd = filePath ? await readSessionCwd(filePath) : null;
265
+ if (cwd) {
266
+ const project = normalizeProject(cwd);
267
+ updateExact.run(project, project, session_id);
268
+ updateState.run(project, session_id);
269
+ } else {
270
+ updateBestEffort.run(session_id);
271
+ }
272
+ }
273
+ }
274
+
275
+ /** Read the first `cwd` value from a Claude Code session JSONL file. */
276
+ async function readSessionCwd(filePath: string): Promise<string | null> {
277
+ let content: string;
278
+ try {
279
+ content = await readFile(filePath, "utf-8");
280
+ } catch {
281
+ return null;
282
+ }
283
+ for (const line of content.split("\n")) {
284
+ if (!line.includes('"cwd"')) continue;
285
+ try {
286
+ const entry = JSON.parse(line) as { cwd?: unknown };
287
+ if (typeof entry.cwd === "string" && entry.cwd.length > 0) {
288
+ return entry.cwd;
289
+ }
290
+ } catch {
291
+ // malformed line — keep scanning
292
+ }
293
+ }
294
+ return null;
117
295
  }
118
296
 
119
297
  /**
@@ -215,27 +393,30 @@ export async function backfillVectors(
215
393
  "INSERT OR REPLACE INTO conversation_history_vec (id, vector) VALUES (?, ?)",
216
394
  );
217
395
 
218
- // Batch embed in chunks of 32
396
+ // Batch embed in chunks of 32. Embedding happens OUTSIDE the write
397
+ // transaction and each batch commits separately — holding the write lock
398
+ // across model inference would block every other process sharing the db.
219
399
  const BATCH_SIZE = 32;
220
- db.exec("BEGIN");
221
- try {
222
- for (let i = 0; i < missingConvos.length; i += BATCH_SIZE) {
223
- const batch = missingConvos.slice(i, i + BATCH_SIZE);
224
- const vecs = await embeddings.embedBatch(batch.map((r) => r.content));
400
+ for (let i = 0; i < missingConvos.length; i += BATCH_SIZE) {
401
+ const batch = missingConvos.slice(i, i + BATCH_SIZE);
402
+ const vecs = await embeddings.embedBatch(batch.map((r) => r.content));
403
+
404
+ db.exec("BEGIN");
405
+ try {
225
406
  for (let j = 0; j < batch.length; j++) {
226
407
  insertConvoVec.run(batch[j].id, serializeVector(vecs[j]));
227
408
  }
409
+ db.exec("COMMIT");
410
+ } catch (e) {
411
+ db.exec("ROLLBACK");
412
+ throw e;
413
+ }
228
414
 
229
- if ((i + BATCH_SIZE) % 100 < BATCH_SIZE) {
230
- console.error(
231
- `[vector-memory-mcp] ...${Math.min(i + BATCH_SIZE, missingConvos.length)}/${missingConvos.length} conversation chunks`,
232
- );
233
- }
415
+ if ((i + BATCH_SIZE) % 100 < BATCH_SIZE) {
416
+ console.error(
417
+ `[vector-memory-mcp] ...${Math.min(i + BATCH_SIZE, missingConvos.length)}/${missingConvos.length} conversation chunks`,
418
+ );
234
419
  }
235
- db.exec("COMMIT");
236
- } catch (e) {
237
- db.exec("ROLLBACK");
238
- throw e;
239
420
  }
240
421
 
241
422
  console.error(