@aeriondyseti/vector-memory-mcp 2.2.2 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aeriondyseti/vector-memory-mcp",
3
- "version": "2.2.2",
3
+ "version": "2.2.3",
4
4
  "description": "A zero-configuration RAG memory server for MCP clients",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
@@ -14,24 +14,59 @@ if (!source) {
14
14
  process.exit(1);
15
15
  }
16
16
 
17
- /**
18
- * Read a value from an Arrow column at a given row index.
19
- * Arrow timestamp columns return BigInt we convert to epoch-ms here
20
- * without going through Arrow's bigIntToNumber safety check.
21
- */
17
+ // Arrow TimeUnit enum → divisor to convert to milliseconds.
18
+ // 0=SECOND, 1=MILLISECOND, 2=MICROSECOND, 3=NANOSECOND
19
+ // Negative divisor = multiply (seconds ms needs ×1000).
20
+ const TIME_UNIT_TO_MS_DIVISOR: Record<number, bigint> = {
21
+ 0: -1000n, // seconds → ms (multiply by 1000)
22
+ 1: 1n, // ms → no conversion
23
+ 2: 1000n, // μs → ms
24
+ 3: 1000000n, // ns → ms
25
+ };
26
+
27
+ function buildTimestampDivisors(schema: any): Map<string, bigint> {
28
+ const map = new Map<string, bigint>();
29
+ for (const field of schema.fields) {
30
+ if (field.type.typeId === 10) {
31
+ map.set(field.name, TIME_UNIT_TO_MS_DIVISOR[field.type.unit] ?? 1n);
32
+ }
33
+ }
34
+ return map;
35
+ }
36
+
22
37
  function columnValue(batch: any, colName: string, rowIdx: number): unknown {
23
38
  const col = batch.getChild(colName);
24
39
  if (!col) return undefined;
25
- return col.get(rowIdx);
40
+ try {
41
+ return col.get(rowIdx);
42
+ } catch {
43
+ // Arrow's getter can throw on BigInt timestamps exceeding MAX_SAFE_INTEGER;
44
+ // fall back to the raw typed array.
45
+ let offset = rowIdx;
46
+ for (const data of col.data) {
47
+ if (offset < data.length) {
48
+ return (data.values instanceof BigInt64Array || data.values instanceof BigUint64Array)
49
+ ? data.values[offset]
50
+ : null;
51
+ }
52
+ offset -= data.length;
53
+ }
54
+ return null;
55
+ }
26
56
  }
27
57
 
28
- function toEpochMs(value: unknown): number {
29
- if (typeof value === "number") return value;
58
+ function toEpochMs(value: unknown, divisor: bigint = 1n): number {
59
+ if (value == null) return Date.now();
30
60
  if (value instanceof Date) return value.getTime();
31
61
  if (typeof value === "bigint") {
32
- // Arrow timestamps are microseconds; convert to milliseconds.
33
- const ms = value / 1000n;
34
- return Number(ms);
62
+ if (divisor < 0n) return Number(value * -divisor); // seconds ms
63
+ if (divisor === 1n) return Number(value);
64
+ return Number(value / divisor);
65
+ }
66
+ if (typeof value === "number") {
67
+ if (divisor < 0n) return value * Number(-divisor);
68
+ if (divisor === 1n) return value;
69
+ return Math.floor(value / Number(divisor));
35
70
  }
36
71
  return Date.now();
37
72
  }
@@ -49,6 +84,7 @@ function toFloatArray(vec: unknown): number[] {
49
84
  return [];
50
85
  }
51
86
 
87
+ const BATCH_SIZE = 100;
52
88
  const lancedb = await import("@lancedb/lancedb");
53
89
  const db = await lancedb.connect(source);
54
90
  const tableNames = await db.tableNames();
@@ -64,27 +100,45 @@ if (tableNames.includes("memories")) {
64
100
  const total = await table.countRows();
65
101
  console.error(`Reading ${total} memories...`);
66
102
 
67
- // Use toArrow() to get raw Arrow RecordBatches, bypassing StructRow
68
- // property accessors that throw on BigInt timestamps.
69
- const arrowTable = await table.query().toArrow();
70
- for (const batch of arrowTable.batches) {
71
- for (let i = 0; i < batch.numRows; i++) {
72
- const lastAccessed = columnValue(batch, "last_accessed", i);
73
- result.memories.push({
74
- id: columnValue(batch, "id", i),
75
- content: columnValue(batch, "content", i),
76
- metadata: columnValue(batch, "metadata", i) ?? "{}",
77
- vector: toFloatArray(columnValue(batch, "vector", i)),
78
- created_at: toEpochMs(columnValue(batch, "created_at", i)),
79
- updated_at: toEpochMs(columnValue(batch, "updated_at", i)),
80
- last_accessed: lastAccessed != null ? toEpochMs(lastAccessed) : null,
81
- superseded_by: columnValue(batch, "superseded_by", i) ?? null,
82
- usefulness: columnValue(batch, "usefulness", i) ?? 0,
83
- access_count: columnValue(batch, "access_count", i) ?? 0,
84
- });
103
+ // Paginated scan — query().toArrow() without offset/limit returns
104
+ // non-deterministic results that can duplicate some rows and skip others.
105
+ const schemaSample = await table.query().limit(1).toArrow();
106
+ const tsDivisors = buildTimestampDivisors(schemaSample.schema);
107
+ const seen = new Map<string, any>();
108
+
109
+ for (let offset = 0; offset < total; offset += BATCH_SIZE) {
110
+ const arrowTable = await table.query().offset(offset).limit(BATCH_SIZE).toArrow();
111
+ for (const batch of arrowTable.batches) {
112
+ for (let i = 0; i < batch.numRows; i++) {
113
+ const id = columnValue(batch, "id", i) as string;
114
+ const content = columnValue(batch, "content", i) as string;
115
+ const lastAccessed = columnValue(batch, "last_accessed", i);
116
+ const accessedMs = lastAccessed != null ? toEpochMs(lastAccessed, tsDivisors.get("last_accessed")) : null;
117
+ // Deduplicate by ID: prefer most recently accessed, then longest content.
118
+ const existing = seen.get(id);
119
+ if (existing) {
120
+ const existingAccess = existing.last_accessed ?? 0;
121
+ const newAccess = accessedMs ?? 0;
122
+ if (newAccess < existingAccess) continue;
123
+ if (newAccess === existingAccess && content.length <= existing.content.length) continue;
124
+ }
125
+ seen.set(id, {
126
+ id,
127
+ content,
128
+ metadata: columnValue(batch, "metadata", i) ?? "{}",
129
+ vector: toFloatArray(columnValue(batch, "vector", i)),
130
+ created_at: toEpochMs(columnValue(batch, "created_at", i), tsDivisors.get("created_at")),
131
+ updated_at: toEpochMs(columnValue(batch, "updated_at", i), tsDivisors.get("updated_at")),
132
+ last_accessed: accessedMs,
133
+ superseded_by: columnValue(batch, "superseded_by", i) ?? null,
134
+ usefulness: columnValue(batch, "usefulness", i) ?? 0,
135
+ access_count: columnValue(batch, "access_count", i) ?? 0,
136
+ });
137
+ }
85
138
  }
86
139
  }
87
- console.error(` ${result.memories.length} memories read`);
140
+ result.memories = [...seen.values()];
141
+ console.error(` ${result.memories.length} unique memories read (${total} rows scanned)`);
88
142
  }
89
143
 
90
144
  if (tableNames.includes("conversation_history")) {
@@ -92,24 +146,35 @@ if (tableNames.includes("conversation_history")) {
92
146
  const total = await table.countRows();
93
147
  console.error(`Reading ${total} conversation chunks...`);
94
148
 
95
- const arrowTable = await table.query().toArrow();
96
- for (const batch of arrowTable.batches) {
97
- for (let i = 0; i < batch.numRows; i++) {
98
- result.conversations.push({
99
- id: columnValue(batch, "id", i),
100
- content: columnValue(batch, "content", i),
101
- metadata: columnValue(batch, "metadata", i) ?? "{}",
102
- vector: toFloatArray(columnValue(batch, "vector", i)),
103
- created_at: toEpochMs(columnValue(batch, "created_at", i)),
104
- session_id: columnValue(batch, "session_id", i),
105
- role: columnValue(batch, "role", i),
106
- message_index_start: columnValue(batch, "message_index_start", i) ?? 0,
107
- message_index_end: columnValue(batch, "message_index_end", i) ?? 0,
108
- project: columnValue(batch, "project", i) ?? "",
109
- });
149
+ const schemaSample = await table.query().limit(1).toArrow();
150
+ const tsDivisors = buildTimestampDivisors(schemaSample.schema);
151
+ const seen = new Map<string, any>();
152
+
153
+ for (let offset = 0; offset < total; offset += BATCH_SIZE) {
154
+ const arrowTable = await table.query().offset(offset).limit(BATCH_SIZE).toArrow();
155
+ for (const batch of arrowTable.batches) {
156
+ for (let i = 0; i < batch.numRows; i++) {
157
+ const id = columnValue(batch, "id", i) as string;
158
+ const content = columnValue(batch, "content", i) as string;
159
+ const existing = seen.get(id);
160
+ if (existing && existing.content.length >= content.length) continue;
161
+ seen.set(id, {
162
+ id,
163
+ content,
164
+ metadata: columnValue(batch, "metadata", i) ?? "{}",
165
+ vector: toFloatArray(columnValue(batch, "vector", i)),
166
+ created_at: toEpochMs(columnValue(batch, "created_at", i), tsDivisors.get("created_at")),
167
+ session_id: columnValue(batch, "session_id", i),
168
+ role: columnValue(batch, "role", i),
169
+ message_index_start: columnValue(batch, "message_index_start", i) ?? 0,
170
+ message_index_end: columnValue(batch, "message_index_end", i) ?? 0,
171
+ project: columnValue(batch, "project", i) ?? "",
172
+ });
173
+ }
110
174
  }
111
175
  }
112
- console.error(` ${result.conversations.length} conversation chunks read`);
176
+ result.conversations = [...seen.values()];
177
+ console.error(` ${result.conversations.length} unique conversation chunks read (${total} rows scanned)`);
113
178
  }
114
179
 
115
180
  await db.close?.();