@aeriondyseti/vector-memory-mcp 2.3.0 → 2.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import type { Database } from "bun:sqlite";
2
- import type { EmbeddingsService } from "./embeddings.service.js";
3
- import { serializeVector } from "./sqlite-utils.js";
2
+ import type { EmbeddingsService } from "./embeddings.service";
3
+ import { serializeVector } from "./sqlite-utils";
4
4
 
5
5
  /**
6
6
  * Pre-migration step: remove vec0 virtual table entries from sqlite_master
@@ -127,8 +127,25 @@ export async function backfillVectors(
127
127
  db: Database,
128
128
  embeddings: EmbeddingsService,
129
129
  ): Promise<void> {
130
+ // Quick gap check: if no rows are missing vectors, skip the expensive backfill
131
+ const hasMemories = db.prepare("SELECT 1 FROM memories LIMIT 1").get();
132
+ const hasConvos = db.prepare("SELECT 1 FROM conversation_history LIMIT 1").get();
133
+
134
+ if (!hasMemories && !hasConvos) return;
135
+
136
+ const memoryGap = hasMemories && db.prepare(
137
+ `SELECT 1 FROM memories m LEFT JOIN memories_vec v ON m.id = v.id
138
+ WHERE v.id IS NULL OR length(v.vector) = 0 LIMIT 1`,
139
+ ).get();
140
+
141
+ const convoGap = hasConvos && db.prepare(
142
+ `SELECT 1 FROM conversation_history c LEFT JOIN conversation_history_vec v ON c.id = v.id
143
+ WHERE v.id IS NULL OR length(v.vector) = 0 LIMIT 1`,
144
+ ).get();
145
+
146
+ if (!memoryGap && !convoGap) return;
147
+
130
148
  // ── Memories ──────────────────────────────────────────────────────
131
- // Catch both missing rows (v.id IS NULL) and corrupt 0-byte BLOBs
132
149
  const missingMemories = db
133
150
  .prepare(
134
151
  `SELECT m.id, m.content, json_extract(m.metadata, '$.type') AS type
@@ -151,14 +168,27 @@ export async function backfillVectors(
151
168
  new Array(embeddings.dimension).fill(0),
152
169
  );
153
170
 
154
- for (const row of missingMemories) {
155
- // Waypoints use a zero vector (not semantically searched)
156
- const blob =
157
- row.type === "waypoint"
158
- ? zeroVector
159
- : serializeVector(await embeddings.embed(row.content));
171
+ // Separate waypoints from content that needs embedding
172
+ const toEmbed = missingMemories.filter((r) => r.type !== "waypoint");
173
+ const waypoints = missingMemories.filter((r) => r.type === "waypoint");
160
174
 
161
- insertVec.run(row.id, blob);
175
+ // Batch embed all non-waypoint content
176
+ const vectors = toEmbed.length > 0
177
+ ? await embeddings.embedBatch(toEmbed.map((r) => r.content))
178
+ : [];
179
+
180
+ db.exec("BEGIN");
181
+ try {
182
+ for (const row of waypoints) {
183
+ insertVec.run(row.id, zeroVector);
184
+ }
185
+ for (let i = 0; i < toEmbed.length; i++) {
186
+ insertVec.run(toEmbed[i].id, serializeVector(vectors[i]));
187
+ }
188
+ db.exec("COMMIT");
189
+ } catch (e) {
190
+ db.exec("ROLLBACK");
191
+ throw e;
162
192
  }
163
193
 
164
194
  console.error(
@@ -185,17 +215,27 @@ export async function backfillVectors(
185
215
  "INSERT OR REPLACE INTO conversation_history_vec (id, vector) VALUES (?, ?)",
186
216
  );
187
217
 
188
- for (let i = 0; i < missingConvos.length; i++) {
189
- const row = missingConvos[i];
190
- const vec = serializeVector(await embeddings.embed(row.content));
191
- insertConvoVec.run(row.id, vec);
192
-
193
- // Log progress every 100 chunks
194
- if ((i + 1) % 100 === 0) {
195
- console.error(
196
- `[vector-memory-mcp] ...${i + 1}/${missingConvos.length} conversation chunks`,
197
- );
218
+ // Batch embed in chunks of 32
219
+ const BATCH_SIZE = 32;
220
+ db.exec("BEGIN");
221
+ try {
222
+ for (let i = 0; i < missingConvos.length; i += BATCH_SIZE) {
223
+ const batch = missingConvos.slice(i, i + BATCH_SIZE);
224
+ const vecs = await embeddings.embedBatch(batch.map((r) => r.content));
225
+ for (let j = 0; j < batch.length; j++) {
226
+ insertConvoVec.run(batch[j].id, serializeVector(vecs[j]));
227
+ }
228
+
229
+ if ((i + BATCH_SIZE) % 100 < BATCH_SIZE) {
230
+ console.error(
231
+ `[vector-memory-mcp] ...${Math.min(i + BATCH_SIZE, missingConvos.length)}/${missingConvos.length} conversation chunks`,
232
+ );
233
+ }
198
234
  }
235
+ db.exec("COMMIT");
236
+ } catch (e) {
237
+ db.exec("ROLLBACK");
238
+ throw e;
199
239
  }
200
240
 
201
241
  console.error(
@@ -1,7 +1,7 @@
1
1
  import { readFile, readdir, stat } from "fs/promises";
2
2
  import { basename, dirname, join } from "path";
3
- import type { ParsedMessage, SessionFileInfo } from "../conversation.js";
4
- import type { SessionLogParser } from "./types.js";
3
+ import type { ParsedMessage, SessionFileInfo } from "../conversation";
4
+ import type { SessionLogParser } from "./types";
5
5
 
6
6
  // UUID pattern for session IDs
7
7
  const UUID_PATTERN =
@@ -45,7 +45,7 @@ export class ClaudeCodeSessionParser implements SessionLogParser {
45
45
  const fileName = basename(filePath, ".jsonl");
46
46
  const parentDir = basename(dirname(filePath));
47
47
  // Check if this is inside a subagents directory
48
- const isSubagentFile = filePath.includes("/subagents/");
48
+ const isSubagentFile = /[/\\]subagents[/\\]/.test(filePath);
49
49
 
50
50
  // For subagent files, project dir is 3 levels up: <project>/<session>/subagents/<file>
51
51
  // For main files, project dir is direct parent
@@ -1,4 +1,4 @@
1
- import type { ParsedMessage, SessionFileInfo } from "../conversation.js";
1
+ import type { ParsedMessage, SessionFileInfo } from "../conversation";
2
2
 
3
3
  /** Interface for parsing session log files into structured messages */
4
4
  export interface SessionLogParser {
@@ -3,6 +3,28 @@ import type { Database } from "bun:sqlite";
3
3
  /** RRF constant matching the previous LanceDB reranker default */
4
4
  export const RRF_K = 60;
5
5
 
6
+ /**
7
+ * Maximum parameters per SQLite query to stay within SQLITE_MAX_VARIABLE_NUMBER.
8
+ */
9
+ export const SQLITE_BATCH_SIZE = 100;
10
+
11
+ /**
12
+ * Execute a query in batches when the number of parameters exceeds SQLITE_BATCH_SIZE.
13
+ * Splits the ids array and concatenates results.
14
+ */
15
+ export function batchedQuery<T>(
16
+ db: Database,
17
+ ids: string[],
18
+ queryFn: (batch: string[]) => T[]
19
+ ): T[] {
20
+ if (ids.length <= SQLITE_BATCH_SIZE) return queryFn(ids);
21
+ const results: T[] = [];
22
+ for (let i = 0; i < ids.length; i += SQLITE_BATCH_SIZE) {
23
+ results.push(...queryFn(ids.slice(i, i + SQLITE_BATCH_SIZE)));
24
+ }
25
+ return results;
26
+ }
27
+
6
28
  /**
7
29
  * Serialize a number[] embedding to raw float32 bytes for BLOB storage.
8
30
  */
package/server/index.ts CHANGED
@@ -1,15 +1,15 @@
1
1
  #!/usr/bin/env bun
2
2
 
3
- import { loadConfig, parseCliArgs } from "./config/index.js";
4
- import { connectToDatabase } from "./core/connection.js";
5
- import { backfillVectors } from "./core/migrations.js";
6
- import { MemoryRepository } from "./core/memory.repository.js";
7
- import { ConversationRepository } from "./core/conversation.repository.js";
8
- import { EmbeddingsService } from "./core/embeddings.service.js";
9
- import { MemoryService } from "./core/memory.service.js";
10
- import { ConversationHistoryService } from "./core/conversation.service.js";
11
- import { startServer } from "./transports/mcp/server.js";
12
- import { startHttpServer } from "./transports/http/server.js";
3
+ import { loadConfig, parseCliArgs } from "./config/index";
4
+ import { connectToDatabase } from "./core/connection";
5
+ import { backfillVectors } from "./core/migrations";
6
+ import { MemoryRepository } from "./core/memory.repository";
7
+ import { ConversationRepository } from "./core/conversation.repository";
8
+ import { EmbeddingsService } from "./core/embeddings.service";
9
+ import { MemoryService } from "./core/memory.service";
10
+ import { ConversationHistoryService } from "./core/conversation.service";
11
+ import { startServer } from "./transports/mcp/server";
12
+ import { startHttpServer } from "./transports/http/server";
13
13
 
14
14
  async function main(): Promise<void> {
15
15
  const args = process.argv.slice(2);
@@ -25,17 +25,15 @@ async function main(): Promise<void> {
25
25
  const overrides = parseCliArgs(args);
26
26
  const config = loadConfig(overrides);
27
27
 
28
- // Initialize database
28
+ // Initialize database and backfill any missing vectors before services start
29
29
  const db = connectToDatabase(config.dbPath);
30
+ const embeddings = new EmbeddingsService(config.embeddingModel, config.embeddingDimension);
31
+ await backfillVectors(db, embeddings);
30
32
 
31
33
  // Initialize layers
32
34
  const repository = new MemoryRepository(db);
33
- const embeddings = new EmbeddingsService(config.embeddingModel, config.embeddingDimension);
34
35
  const memoryService = new MemoryService(repository, embeddings);
35
36
 
36
- // Backfill any missing vectors (e.g. after vec0-to-BLOB migration)
37
- await backfillVectors(db, embeddings);
38
-
39
37
  if (config.pluginMode) {
40
38
  console.error("[vector-memory-mcp] Running in plugin mode");
41
39
  }
@@ -21,11 +21,11 @@ import {
21
21
  } from "@modelcontextprotocol/sdk/types.js";
22
22
  import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
23
23
 
24
- import { tools } from "../mcp/tools.js";
25
- import { handleToolCall } from "../mcp/handlers.js";
26
- import { SERVER_INSTRUCTIONS } from "../mcp/server.js";
27
- import { VERSION } from "../../config/index.js";
28
- import type { MemoryService } from "../../core/memory.service.js";
24
+ import { tools } from "../mcp/tools";
25
+ import { handleToolCall } from "../mcp/handlers";
26
+ import { SERVER_INSTRUCTIONS } from "../mcp/server";
27
+ import { VERSION } from "../../config/index";
28
+ import type { MemoryService } from "../../core/memory.service";
29
29
 
30
30
  interface Session {
31
31
  server: Server;
@@ -3,11 +3,11 @@ import { cors } from "hono/cors";
3
3
  import { createServer } from "net";
4
4
  import { writeFileSync, mkdirSync, unlinkSync } from "fs";
5
5
  import { join } from "path";
6
- import type { MemoryService } from "../../core/memory.service.js";
7
- import type { Config } from "../../config/index.js";
8
- import { isDeleted } from "../../core/memory.js";
9
- import { createMcpRoutes } from "./mcp-transport.js";
10
- import type { Memory, SearchIntent } from "../../core/memory.js";
6
+ import type { MemoryService } from "../../core/memory.service";
7
+ import type { Config } from "../../config/index";
8
+ import { isDeleted } from "../../core/memory";
9
+ import { createMcpRoutes } from "./mcp-transport";
10
+ import type { Memory, SearchIntent } from "../../core/memory";
11
11
 
12
12
 
13
13
  /**
@@ -111,10 +111,22 @@ export function createHttpApp(memoryService: MemoryService, config: Config): Hon
111
111
  embeddingDimension: config.embeddingDimension,
112
112
  historyEnabled: config.conversationHistory.enabled,
113
113
  pluginMode: config.pluginMode,
114
+ embeddingReady: memoryService.getEmbeddings().isReady,
114
115
  },
115
116
  });
116
117
  });
117
118
 
119
+ // Warmup endpoint — triggers ONNX model load if not already cached
120
+ app.post("/warmup", async (c) => {
121
+ const embeddings = memoryService.getEmbeddings();
122
+ if (embeddings.isReady) {
123
+ return c.json({ status: "already_warm" });
124
+ }
125
+ const start = Date.now();
126
+ await embeddings.warmup();
127
+ return c.json({ status: "warmed", elapsed: Date.now() - start });
128
+ });
129
+
118
130
  // Search endpoint
119
131
  app.post("/search", async (c) => {
120
132
  try {
@@ -127,7 +139,7 @@ export function createHttpApp(memoryService: MemoryService, config: Config): Hon
127
139
  return c.json({ error: "Missing or invalid 'query' field" }, 400);
128
140
  }
129
141
 
130
- const results = await memoryService.search(query, intent, limit);
142
+ const results = await memoryService.search(query, intent, { limit });
131
143
 
132
144
  return c.json({
133
145
  results: results.map((r) => ({
@@ -1,9 +1,9 @@
1
1
  import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
2
- import type { MemoryService } from "../../core/memory.service.js";
3
- import type { ConversationHistoryService } from "../../core/conversation.service.js";
4
- import type { SearchIntent } from "../../core/memory.js";
5
- import type { HistoryFilters, SearchResult } from "../../core/conversation.js";
6
- import { DEBUG } from "../../config/index.js";
2
+ import type { MemoryService } from "../../core/memory.service";
3
+ import type { ConversationHistoryService } from "../../core/conversation.service";
4
+ import type { SearchIntent } from "../../core/memory";
5
+ import type { HistoryFilters, SearchResult } from "../../core/conversation";
6
+ import { DEBUG } from "../../config/index";
7
7
 
8
8
  /**
9
9
  * Safely coerce a tool argument to an array. Handles the case where the MCP
@@ -51,6 +51,14 @@ function parseDate(value: unknown, fieldName: string): Date | undefined {
51
51
  return date;
52
52
  }
53
53
 
54
+ function requireString(args: Record<string, unknown> | undefined, field: string): string {
55
+ const value = args?.[field];
56
+ if (typeof value !== "string" || value.trim() === "") {
57
+ throw new Error(`${field} is required`);
58
+ }
59
+ return value;
60
+ }
61
+
54
62
  export async function handleStoreMemories(
55
63
  args: Record<string, unknown> | undefined,
56
64
  service: MemoryService
@@ -189,7 +197,9 @@ export async function handleSearchMemories(
189
197
  return errorResult(errorText(e));
190
198
  }
191
199
 
192
- const results = await service.search(query, intent, limit, includeDeleted, {
200
+ const results = await service.search(query, intent, {
201
+ limit,
202
+ includeDeleted,
193
203
  includeHistory,
194
204
  historyOnly,
195
205
  historyFilters,
@@ -202,19 +212,7 @@ export async function handleSearchMemories(
202
212
  };
203
213
  }
204
214
 
205
- const formatted = results.map((r: SearchResult) => {
206
- let result = `[${r.source}] ID: ${r.id}\nContent: ${r.content}`;
207
- if (r.metadata && Object.keys(r.metadata).length > 0) {
208
- result += `\nMetadata: ${JSON.stringify(r.metadata)}`;
209
- }
210
- if (r.source === "memory" && includeDeleted && r.supersededBy) {
211
- result += `\n[DELETED]`;
212
- }
213
- if (r.source === "conversation_history" && r.sessionId) {
214
- result += `\nSession: ${r.sessionId}`;
215
- }
216
- return result;
217
- });
215
+ const formatted = results.map((r) => formatSearchResult(r, includeDeleted));
218
216
 
219
217
  return {
220
218
  content: [{ type: "text", text: formatted.join("\n\n---\n\n") }],
@@ -241,6 +239,20 @@ function formatMemoryDetail(
241
239
  return result;
242
240
  }
243
241
 
242
+ function formatSearchResult(r: SearchResult, includeDeleted: boolean): string {
243
+ let result = `[${r.source}] ID: ${r.id}\nContent: ${r.content}`;
244
+ if (r.metadata && Object.keys(r.metadata).length > 0) {
245
+ result += `\nMetadata: ${JSON.stringify(r.metadata)}`;
246
+ }
247
+ if (r.source === "memory" && includeDeleted && r.supersededBy) {
248
+ result += `\n[DELETED]`;
249
+ }
250
+ if (r.source === "conversation_history" && r.sessionId) {
251
+ result += `\nSession: ${r.sessionId}`;
252
+ }
253
+ return result;
254
+ }
255
+
244
256
  export async function handleGetMemories(
245
257
  args: Record<string, unknown> | undefined,
246
258
  service: MemoryService
@@ -267,8 +279,11 @@ export async function handleReportMemoryUsefulness(
267
279
  args: Record<string, unknown> | undefined,
268
280
  service: MemoryService
269
281
  ): Promise<CallToolResult> {
270
- const memoryId = args?.memory_id as string;
271
- const useful = args?.useful as boolean;
282
+ const memoryId = requireString(args, "memory_id");
283
+ const useful = args?.useful;
284
+ if (typeof useful !== "boolean") {
285
+ return errorResult("useful is required and must be a boolean");
286
+ }
272
287
 
273
288
  const memory = await service.vote(memoryId, useful ? 1 : -1);
274
289
 
@@ -290,10 +305,19 @@ export async function handleSetWaypoint(
290
305
  args: Record<string, unknown> | undefined,
291
306
  service: MemoryService
292
307
  ): Promise<CallToolResult> {
308
+ let project: string;
309
+ let summary: string;
310
+ try {
311
+ project = requireString(args, "project");
312
+ summary = requireString(args, "summary");
313
+ } catch (e) {
314
+ return errorResult(errorText(e));
315
+ }
316
+
293
317
  const memory = await service.setWaypoint({
294
- project: args?.project as string,
318
+ project,
295
319
  branch: args?.branch as string | undefined,
296
- summary: args?.summary as string,
320
+ summary,
297
321
  completed: (args?.completed as string[] | undefined) ?? [],
298
322
  in_progress_blocked: (args?.in_progress_blocked as string[] | undefined) ?? [],
299
323
  key_decisions: (args?.key_decisions as string[] | undefined) ?? [],
@@ -6,12 +6,12 @@ import {
6
6
  ListResourcesRequestSchema,
7
7
  ReadResourceRequestSchema,
8
8
  } from "@modelcontextprotocol/sdk/types.js";
9
- import { resources, readResource } from "./resources.js";
9
+ import { resources, readResource } from "./resources";
10
10
 
11
- import { tools } from "./tools.js";
12
- import { handleToolCall } from "./handlers.js";
13
- import type { MemoryService } from "../../core/memory.service.js";
14
- import { VERSION } from "../../config/index.js";
11
+ import { tools } from "./tools";
12
+ import { handleToolCall } from "./handlers";
13
+ import type { MemoryService } from "../../core/memory.service";
14
+ import { VERSION } from "../../config/index";
15
15
 
16
16
  export const SERVER_INSTRUCTIONS = `This server is the user's canonical memory system. It provides persistent, semantic vector memory that survives across conversations and sessions.
17
17
 
@@ -1,181 +0,0 @@
1
- #!/usr/bin/env bun
2
- /**
3
- * Standalone LanceDB data extractor — runs in a child process so that
4
- * @lancedb/lancedb native bindings never coexist with bun:sqlite's
5
- * extension loading in the same process.
6
- *
7
- * Usage: bun scripts/lancedb-extract.ts <lance-db-path>
8
- * Output: JSON on stdout — { memories: Row[], conversations: Row[] }
9
- */
10
-
11
- const source = process.argv[2];
12
- if (!source) {
13
- console.error("Usage: bun scripts/lancedb-extract.ts <lance-db-path>");
14
- process.exit(1);
15
- }
16
-
17
- // Arrow TimeUnit enum → divisor to convert to milliseconds.
18
- // 0=SECOND, 1=MILLISECOND, 2=MICROSECOND, 3=NANOSECOND
19
- // Negative divisor = multiply (seconds → ms needs ×1000).
20
- const TIME_UNIT_TO_MS_DIVISOR: Record<number, bigint> = {
21
- 0: -1000n, // seconds → ms (multiply by 1000)
22
- 1: 1n, // ms → no conversion
23
- 2: 1000n, // μs → ms
24
- 3: 1000000n, // ns → ms
25
- };
26
-
27
- function buildTimestampDivisors(schema: any): Map<string, bigint> {
28
- const map = new Map<string, bigint>();
29
- for (const field of schema.fields) {
30
- if (field.type.typeId === 10) {
31
- map.set(field.name, TIME_UNIT_TO_MS_DIVISOR[field.type.unit] ?? 1n);
32
- }
33
- }
34
- return map;
35
- }
36
-
37
- function columnValue(batch: any, colName: string, rowIdx: number): unknown {
38
- const col = batch.getChild(colName);
39
- if (!col) return undefined;
40
- try {
41
- return col.get(rowIdx);
42
- } catch {
43
- // Arrow's getter can throw on BigInt timestamps exceeding MAX_SAFE_INTEGER;
44
- // fall back to the raw typed array.
45
- let offset = rowIdx;
46
- for (const data of col.data) {
47
- if (offset < data.length) {
48
- return (data.values instanceof BigInt64Array || data.values instanceof BigUint64Array)
49
- ? data.values[offset]
50
- : null;
51
- }
52
- offset -= data.length;
53
- }
54
- return null;
55
- }
56
- }
57
-
58
- function toEpochMs(value: unknown, divisor: bigint = 1n): number {
59
- if (value == null) return Date.now();
60
- if (value instanceof Date) return value.getTime();
61
- if (typeof value === "bigint") {
62
- if (divisor < 0n) return Number(value * -divisor); // seconds → ms
63
- if (divisor === 1n) return Number(value);
64
- return Number(value / divisor);
65
- }
66
- if (typeof value === "number") {
67
- if (divisor < 0n) return value * Number(-divisor);
68
- if (divisor === 1n) return value;
69
- return Math.floor(value / Number(divisor));
70
- }
71
- return Date.now();
72
- }
73
-
74
- function toFloatArray(vec: unknown): number[] {
75
- if (Array.isArray(vec)) return vec;
76
- if (vec instanceof Float32Array) return Array.from(vec);
77
- if (vec && typeof (vec as any).toArray === "function") {
78
- return Array.from((vec as any).toArray());
79
- }
80
- if (ArrayBuffer.isView(vec)) {
81
- const view = vec as DataView;
82
- return Array.from(new Float32Array(view.buffer, view.byteOffset, view.byteLength / 4));
83
- }
84
- return [];
85
- }
86
-
87
- const BATCH_SIZE = 100;
88
- const lancedb = await import("@lancedb/lancedb");
89
- const db = await lancedb.connect(source);
90
- const tableNames = await db.tableNames();
91
- console.error(`Found tables: ${tableNames.join(", ")}`);
92
-
93
- const result: { memories: any[]; conversations: any[] } = {
94
- memories: [],
95
- conversations: [],
96
- };
97
-
98
- if (tableNames.includes("memories")) {
99
- const table = await db.openTable("memories");
100
- const total = await table.countRows();
101
- console.error(`Reading ${total} memories...`);
102
-
103
- // Paginated scan — query().toArrow() without offset/limit returns
104
- // non-deterministic results that can duplicate some rows and skip others.
105
- const schemaSample = await table.query().limit(1).toArrow();
106
- const tsDivisors = buildTimestampDivisors(schemaSample.schema);
107
- const seen = new Map<string, any>();
108
-
109
- for (let offset = 0; offset < total; offset += BATCH_SIZE) {
110
- const arrowTable = await table.query().offset(offset).limit(BATCH_SIZE).toArrow();
111
- for (const batch of arrowTable.batches) {
112
- for (let i = 0; i < batch.numRows; i++) {
113
- const id = columnValue(batch, "id", i) as string;
114
- const content = columnValue(batch, "content", i) as string;
115
- const lastAccessed = columnValue(batch, "last_accessed", i);
116
- const accessedMs = lastAccessed != null ? toEpochMs(lastAccessed, tsDivisors.get("last_accessed")) : null;
117
- // Deduplicate by ID: prefer most recently accessed, then longest content.
118
- const existing = seen.get(id);
119
- if (existing) {
120
- const existingAccess = existing.last_accessed ?? 0;
121
- const newAccess = accessedMs ?? 0;
122
- if (newAccess < existingAccess) continue;
123
- if (newAccess === existingAccess && content.length <= existing.content.length) continue;
124
- }
125
- seen.set(id, {
126
- id,
127
- content,
128
- metadata: columnValue(batch, "metadata", i) ?? "{}",
129
- vector: toFloatArray(columnValue(batch, "vector", i)),
130
- created_at: toEpochMs(columnValue(batch, "created_at", i), tsDivisors.get("created_at")),
131
- updated_at: toEpochMs(columnValue(batch, "updated_at", i), tsDivisors.get("updated_at")),
132
- last_accessed: accessedMs,
133
- superseded_by: columnValue(batch, "superseded_by", i) ?? null,
134
- usefulness: columnValue(batch, "usefulness", i) ?? 0,
135
- access_count: columnValue(batch, "access_count", i) ?? 0,
136
- });
137
- }
138
- }
139
- }
140
- result.memories = [...seen.values()];
141
- console.error(` ${result.memories.length} unique memories read (${total} rows scanned)`);
142
- }
143
-
144
- if (tableNames.includes("conversation_history")) {
145
- const table = await db.openTable("conversation_history");
146
- const total = await table.countRows();
147
- console.error(`Reading ${total} conversation chunks...`);
148
-
149
- const schemaSample = await table.query().limit(1).toArrow();
150
- const tsDivisors = buildTimestampDivisors(schemaSample.schema);
151
- const seen = new Map<string, any>();
152
-
153
- for (let offset = 0; offset < total; offset += BATCH_SIZE) {
154
- const arrowTable = await table.query().offset(offset).limit(BATCH_SIZE).toArrow();
155
- for (const batch of arrowTable.batches) {
156
- for (let i = 0; i < batch.numRows; i++) {
157
- const id = columnValue(batch, "id", i) as string;
158
- const content = columnValue(batch, "content", i) as string;
159
- const existing = seen.get(id);
160
- if (existing && existing.content.length >= content.length) continue;
161
- seen.set(id, {
162
- id,
163
- content,
164
- metadata: columnValue(batch, "metadata", i) ?? "{}",
165
- vector: toFloatArray(columnValue(batch, "vector", i)),
166
- created_at: toEpochMs(columnValue(batch, "created_at", i), tsDivisors.get("created_at")),
167
- session_id: columnValue(batch, "session_id", i),
168
- role: columnValue(batch, "role", i),
169
- message_index_start: columnValue(batch, "message_index_start", i) ?? 0,
170
- message_index_end: columnValue(batch, "message_index_end", i) ?? 0,
171
- project: columnValue(batch, "project", i) ?? "",
172
- });
173
- }
174
- }
175
- }
176
- result.conversations = [...seen.values()];
177
- console.error(` ${result.conversations.length} unique conversation chunks read (${total} rows scanned)`);
178
- }
179
-
180
- await db.close?.();
181
- process.stdout.write(JSON.stringify(result));