@gmickel/gno 0.41.0 → 0.41.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -122,6 +122,15 @@ gno collection clear-embeddings my-collection --all
122
122
  gno embed my-collection
123
123
  ```
124
124
 
125
+ If a re-embed run still reports failures, rerun with:
126
+
127
+ ```bash
128
+ gno --verbose embed --force
129
+ ```
130
+
131
+ Recent releases now print sample embedding errors and a concrete retry hint when
132
+ batch recovery cannot fully recover on its own.
133
+
125
134
  Model guides:
126
135
 
127
136
  - [Code Embeddings](./docs/guides/code-embeddings.md)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gmickel/gno",
3
- "version": "0.41.0",
3
+ "version": "0.41.1",
4
4
  "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
5
5
  "keywords": [
6
6
  "embeddings",
@@ -71,6 +71,9 @@ export type EmbedResult =
71
71
  duration: number;
72
72
  model: string;
73
73
  searchAvailable: boolean;
74
+ errorSamples?: string[];
75
+ suggestion?: string;
76
+ syncError?: string;
74
77
  }
75
78
  | { success: false; error: string };
76
79
 
@@ -87,6 +90,30 @@ function formatDuration(seconds: number): string {
87
90
  return `${mins}m ${secs.toFixed(0)}s`;
88
91
  }
89
92
 
93
+ function formatLlmFailure(
94
+ error: { message: string; cause?: unknown } | undefined
95
+ ): string {
96
+ if (!error) {
97
+ return "Unknown embedding failure";
98
+ }
99
+ const cause =
100
+ error.cause &&
101
+ typeof error.cause === "object" &&
102
+ "message" in error.cause &&
103
+ typeof error.cause.message === "string"
104
+ ? error.cause.message
105
+ : typeof error.cause === "string"
106
+ ? error.cause
107
+ : "";
108
+ return cause && cause !== error.message
109
+ ? `${error.message} - ${cause}`
110
+ : error.message;
111
+ }
112
+
113
+ function isDisposedBatchError(message: string): boolean {
114
+ return message.toLowerCase().includes("object is disposed");
115
+ }
116
+
90
117
  async function checkVecAvailable(
91
118
  db: import("bun:sqlite").Database
92
119
  ): Promise<boolean> {
@@ -111,10 +138,20 @@ interface BatchContext {
111
138
  showProgress: boolean;
112
139
  totalToEmbed: number;
113
140
  verbose: boolean;
141
+ recreateEmbedPort?: () => Promise<
142
+ { ok: true; value: EmbeddingPort } | { ok: false; error: string }
143
+ >;
114
144
  }
115
145
 
116
146
  type BatchResult =
117
- | { ok: true; embedded: number; errors: number; duration: number }
147
+ | {
148
+ ok: true;
149
+ embedded: number;
150
+ errors: number;
151
+ duration: number;
152
+ errorSamples: string[];
153
+ suggestion?: string;
154
+ }
118
155
  | { ok: false; error: string };
119
156
 
120
157
  interface Cursor {
@@ -126,8 +163,21 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
126
163
  const startTime = Date.now();
127
164
  let embedded = 0;
128
165
  let errors = 0;
166
+ const errorSamples: string[] = [];
167
+ let suggestion: string | undefined;
129
168
  let cursor: Cursor | undefined;
130
169
 
170
+ const pushErrorSamples = (samples: string[]): void => {
171
+ for (const sample of samples) {
172
+ if (errorSamples.length >= 5) {
173
+ break;
174
+ }
175
+ if (!errorSamples.includes(sample)) {
176
+ errorSamples.push(sample);
177
+ }
178
+ }
179
+ };
180
+
131
181
  while (embedded + errors < ctx.totalToEmbed) {
132
182
  // Get next batch using seek pagination (cursor-based)
133
183
  const batchResult = ctx.force
@@ -161,6 +211,89 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
161
211
  )
162
212
  );
163
213
  if (!batchEmbedResult.ok) {
214
+ const formattedError = formatLlmFailure(batchEmbedResult.error);
215
+ if (ctx.recreateEmbedPort && isDisposedBatchError(formattedError)) {
216
+ if (ctx.verbose) {
217
+ process.stderr.write(
218
+ "\n[embed] Embedding port disposed; recreating model/contexts and retrying batch once\n"
219
+ );
220
+ }
221
+ const recreated = await ctx.recreateEmbedPort();
222
+ if (recreated.ok) {
223
+ ctx.embedPort = recreated.value;
224
+ const retryResult = await embedTextsWithRecovery(
225
+ ctx.embedPort,
226
+ batch.map((b) =>
227
+ formatDocForEmbedding(b.text, b.title ?? undefined, ctx.modelUri)
228
+ )
229
+ );
230
+ if (retryResult.ok) {
231
+ if (ctx.verbose) {
232
+ process.stderr.write(
233
+ "\n[embed] Retry after port reset succeeded\n"
234
+ );
235
+ }
236
+ pushErrorSamples(retryResult.value.failureSamples);
237
+ suggestion ||= retryResult.value.retrySuggestion;
238
+
239
+ const retryVectors: VectorRow[] = [];
240
+ for (const [idx, item] of batch.entries()) {
241
+ const embedding = retryResult.value.vectors[idx];
242
+ if (!embedding) {
243
+ errors += 1;
244
+ continue;
245
+ }
246
+ retryVectors.push({
247
+ mirrorHash: item.mirrorHash,
248
+ seq: item.seq,
249
+ model: ctx.modelUri,
250
+ embedding: new Float32Array(embedding),
251
+ });
252
+ }
253
+
254
+ if (retryVectors.length === 0) {
255
+ if (ctx.verbose) {
256
+ process.stderr.write(
257
+ "\n[embed] No recoverable embeddings in retry batch\n"
258
+ );
259
+ }
260
+ continue;
261
+ }
262
+
263
+ const retryStoreResult =
264
+ await ctx.vectorIndex.upsertVectors(retryVectors);
265
+ if (!retryStoreResult.ok) {
266
+ if (ctx.verbose) {
267
+ process.stderr.write(
268
+ `\n[embed] Store failed: ${retryStoreResult.error.message}\n`
269
+ );
270
+ }
271
+ pushErrorSamples([retryStoreResult.error.message]);
272
+ suggestion ??=
273
+ "Store write failed. Rerun `gno embed` once more; if it repeats, run `gno doctor` and `gno vec sync`.";
274
+ errors += retryVectors.length;
275
+ continue;
276
+ }
277
+
278
+ embedded += retryVectors.length;
279
+ if (ctx.showProgress) {
280
+ const embeddedDisplay = Math.min(embedded, ctx.totalToEmbed);
281
+ const completed = Math.min(embedded + errors, ctx.totalToEmbed);
282
+ const pct = (completed / ctx.totalToEmbed) * 100;
283
+ const elapsed = (Date.now() - startTime) / 1000;
284
+ const rate = embedded / Math.max(elapsed, 0.001);
285
+ const eta =
286
+ Math.max(0, ctx.totalToEmbed - completed) /
287
+ Math.max(rate, 0.001);
288
+ process.stdout.write(
289
+ `\rEmbedding: ${embeddedDisplay.toLocaleString()}/${ctx.totalToEmbed.toLocaleString()} (${pct.toFixed(1)}%) | ${rate.toFixed(1)} chunks/s | ETA ${formatDuration(eta)}`
290
+ );
291
+ }
292
+ continue;
293
+ }
294
+ }
295
+ }
296
+
164
297
  if (ctx.verbose) {
165
298
  const err = batchEmbedResult.error;
166
299
  const cause = err.cause;
@@ -178,6 +311,9 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
178
311
  `\n[embed] Batch failed (${batch.length} chunks: ${titles}${batch.length > 3 ? "..." : ""}): ${err.message}${causeMsg ? ` - ${causeMsg}` : ""}\n`
179
312
  );
180
313
  }
314
+ pushErrorSamples([formattedError]);
315
+ suggestion =
316
+ "Try rerunning the same command. If failures persist, rerun with `gno --verbose embed --batch-size 1` to isolate failing chunks.";
181
317
  errors += batch.length;
182
318
  continue;
183
319
  }
@@ -191,6 +327,13 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
191
327
  `\n[embed] Batch fallback (${batch.length} chunks: ${titles}${batch.length > 3 ? "..." : ""}): ${batchEmbedResult.value.batchError ?? "unknown batch error"}\n`
192
328
  );
193
329
  }
330
+ pushErrorSamples(batchEmbedResult.value.failureSamples);
331
+ suggestion ||= batchEmbedResult.value.retrySuggestion;
332
+ if (ctx.verbose && batchEmbedResult.value.failureSamples.length > 0) {
333
+ for (const sample of batchEmbedResult.value.failureSamples) {
334
+ process.stderr.write(`\n[embed] Sample failure: ${sample}\n`);
335
+ }
336
+ }
194
337
 
195
338
  const vectors: VectorRow[] = [];
196
339
  for (const [idx, item] of batch.entries()) {
@@ -221,6 +364,9 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
221
364
  `\n[embed] Store failed: ${storeResult.error.message}\n`
222
365
  );
223
366
  }
367
+ pushErrorSamples([storeResult.error.message]);
368
+ suggestion ??=
369
+ "Store write failed. Rerun `gno embed` once more; if it repeats, run `gno doctor` and `gno vec sync`.";
224
370
  errors += vectors.length;
225
371
  continue;
226
372
  }
@@ -229,13 +375,15 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
229
375
 
230
376
  // Progress output
231
377
  if (ctx.showProgress) {
232
- const pct = ((embedded + errors) / ctx.totalToEmbed) * 100;
378
+ const embeddedDisplay = Math.min(embedded, ctx.totalToEmbed);
379
+ const completed = Math.min(embedded + errors, ctx.totalToEmbed);
380
+ const pct = (completed / ctx.totalToEmbed) * 100;
233
381
  const elapsed = (Date.now() - startTime) / 1000;
234
382
  const rate = embedded / Math.max(elapsed, 0.001);
235
383
  const eta =
236
- (ctx.totalToEmbed - embedded - errors) / Math.max(rate, 0.001);
384
+ Math.max(0, ctx.totalToEmbed - completed) / Math.max(rate, 0.001);
237
385
  process.stdout.write(
238
- `\rEmbedding: ${embedded.toLocaleString()}/${ctx.totalToEmbed.toLocaleString()} (${pct.toFixed(1)}%) | ${rate.toFixed(1)} chunks/s | ETA ${formatDuration(eta)}`
386
+ `\rEmbedding: ${embeddedDisplay.toLocaleString()}/${ctx.totalToEmbed.toLocaleString()} (${pct.toFixed(1)}%) | ${rate.toFixed(1)} chunks/s | ETA ${formatDuration(eta)}`
239
387
  );
240
388
  }
241
389
  }
@@ -249,6 +397,8 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
249
397
  embedded,
250
398
  errors,
251
399
  duration: (Date.now() - startTime) / 1000,
400
+ errorSamples,
401
+ suggestion,
252
402
  };
253
403
  }
254
404
 
@@ -354,6 +504,7 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
354
504
  duration: 0,
355
505
  model: modelUri,
356
506
  searchAvailable: vecAvailable,
507
+ errorSamples: [],
357
508
  };
358
509
  }
359
510
 
@@ -366,6 +517,7 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
366
517
  duration: 0,
367
518
  model: modelUri,
368
519
  searchAvailable: vecAvailable,
520
+ errorSamples: [],
369
521
  };
370
522
  }
371
523
 
@@ -382,6 +534,27 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
382
534
  : undefined;
383
535
 
384
536
  const llm = new LlmAdapter(config);
537
+ const recreateEmbedPort = async () => {
538
+ if (embedPort) {
539
+ await embedPort.dispose();
540
+ }
541
+ await llm.getManager().dispose(modelUri);
542
+ const recreated = await llm.createEmbeddingPort(modelUri, {
543
+ policy,
544
+ onProgress: downloadProgress
545
+ ? (progress) => downloadProgress("embed", progress)
546
+ : undefined,
547
+ });
548
+ if (!recreated.ok) {
549
+ return { ok: false as const, error: recreated.error.message };
550
+ }
551
+ const initResult = await recreated.value.init();
552
+ if (!initResult.ok) {
553
+ await recreated.value.dispose();
554
+ return { ok: false as const, error: initResult.error.message };
555
+ }
556
+ return { ok: true as const, value: recreated.value };
557
+ };
385
558
  const embedResult = await llm.createEmbeddingPort(modelUri, {
386
559
  policy,
387
560
  onProgress: downloadProgress
@@ -428,6 +601,7 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
428
601
  showProgress: !options.json,
429
602
  totalToEmbed,
430
603
  verbose: options.verbose ?? false,
604
+ recreateEmbedPort,
431
605
  });
432
606
 
433
607
  if (!result.ok) {
@@ -447,10 +621,27 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
447
621
  }
448
622
  }
449
623
  vectorIndex.vecDirty = false;
450
- } else if (!options.json) {
451
- process.stdout.write(
452
- `\n[vec] Sync failed: ${syncResult.error.message}\n`
453
- );
624
+ } else {
625
+ if (!options.json) {
626
+ process.stdout.write(
627
+ `\n[vec] Sync failed: ${syncResult.error.message}\n`
628
+ );
629
+ }
630
+ return {
631
+ success: true,
632
+ embedded: result.embedded,
633
+ errors: result.errors,
634
+ duration: result.duration,
635
+ model: modelUri,
636
+ searchAvailable: vectorIndex.searchAvailable,
637
+ errorSamples: [
638
+ ...result.errorSamples,
639
+ syncResult.error.message,
640
+ ].slice(0, 5),
641
+ suggestion:
642
+ "Vector index sync failed after embedding. Rerun `gno embed` once more. If it repeats, run `gno vec sync`.",
643
+ syncError: syncResult.error.message,
644
+ };
454
645
  }
455
646
  }
456
647
 
@@ -461,6 +652,8 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
461
652
  duration: result.duration,
462
653
  model: modelUri,
463
654
  searchAvailable: vectorIndex.searchAvailable,
655
+ errorSamples: result.errorSamples,
656
+ suggestion: result.suggestion,
464
657
  };
465
658
  } finally {
466
659
  if (embedPort) {
@@ -585,6 +778,9 @@ export function formatEmbed(
585
778
  duration: result.duration,
586
779
  model: result.model,
587
780
  searchAvailable: result.searchAvailable,
781
+ errorSamples: result.errorSamples ?? [],
782
+ suggestion: result.suggestion,
783
+ syncError: result.syncError,
588
784
  },
589
785
  null,
590
786
  2
@@ -606,6 +802,14 @@ export function formatEmbed(
606
802
 
607
803
  if (result.errors > 0) {
608
804
  lines.push(`${result.errors} chunks failed to embed.`);
805
+ if ((result.errorSamples?.length ?? 0) > 0) {
806
+ for (const sample of result.errorSamples ?? []) {
807
+ lines.push(`Sample error: ${sample}`);
808
+ }
809
+ }
810
+ if (result.suggestion) {
811
+ lines.push(`Hint: ${result.suggestion}`);
812
+ }
609
813
  }
610
814
 
611
815
  if (!result.searchAvailable) {
@@ -614,5 +818,9 @@ export function formatEmbed(
614
818
  );
615
819
  }
616
820
 
821
+ if (result.syncError) {
822
+ lines.push(`Vec sync error: ${result.syncError}`);
823
+ }
824
+
617
825
  return lines.join("\n");
618
826
  }
@@ -14,8 +14,12 @@ export interface EmbedBatchRecoveryResult {
14
14
  batchFailed: boolean;
15
15
  batchError?: string;
16
16
  fallbackErrors: number;
17
+ failureSamples: string[];
18
+ retrySuggestion?: string;
17
19
  }
18
20
 
21
+ const MAX_FAILURE_SAMPLES = 5;
22
+
19
23
  function errorMessage(error: unknown): string {
20
24
  if (
21
25
  error &&
@@ -28,6 +32,27 @@ function errorMessage(error: unknown): string {
28
32
  return String(error);
29
33
  }
30
34
 
35
+ function formatFailureMessage(error: {
36
+ message: string;
37
+ cause?: unknown;
38
+ }): string {
39
+ const cause = error.cause ? errorMessage(error.cause) : "";
40
+ return cause && cause !== error.message
41
+ ? `${error.message} - ${cause}`
42
+ : error.message;
43
+ }
44
+
45
+ function isDisposedFailure(message: string): boolean {
46
+ return message.toLowerCase().includes("object is disposed");
47
+ }
48
+
49
+ async function resetEmbeddingPort(
50
+ embedPort: EmbeddingPort
51
+ ): Promise<LlmResult<void>> {
52
+ await embedPort.dispose();
53
+ return embedPort.init();
54
+ }
55
+
31
56
  export async function embedTextsWithRecovery(
32
57
  embedPort: EmbeddingPort,
33
58
  texts: string[]
@@ -39,13 +64,24 @@ export async function embedTextsWithRecovery(
39
64
  vectors: [],
40
65
  batchFailed: false,
41
66
  fallbackErrors: 0,
67
+ failureSamples: [],
42
68
  },
43
69
  };
44
70
  }
45
71
 
46
72
  const profile = getEmbeddingCompatibilityProfile(embedPort.modelUri);
47
73
  if (profile.batchEmbeddingTrusted) {
48
- const batchResult = await embedPort.embedBatch(texts);
74
+ let batchResult = await embedPort.embedBatch(texts);
75
+ if (!batchResult.ok) {
76
+ const formattedBatchError = formatFailureMessage(batchResult.error);
77
+ if (isDisposedFailure(formattedBatchError)) {
78
+ const reset = await resetEmbeddingPort(embedPort);
79
+ if (!reset.ok) {
80
+ return reset;
81
+ }
82
+ batchResult = await embedPort.embedBatch(texts);
83
+ }
84
+ }
49
85
  if (batchResult.ok && batchResult.value.length === texts.length) {
50
86
  return {
51
87
  ok: true,
@@ -53,11 +89,14 @@ export async function embedTextsWithRecovery(
53
89
  vectors: batchResult.value,
54
90
  batchFailed: false,
55
91
  fallbackErrors: 0,
92
+ failureSamples: [],
56
93
  },
57
94
  };
58
95
  }
59
96
 
60
- const recovered = await recoverIndividually(embedPort, texts);
97
+ const recovered = await recoverWithAdaptiveBatches(embedPort, texts, {
98
+ rootBatchAlreadyFailed: true,
99
+ });
61
100
  if (!recovered.ok) {
62
101
  return recovered;
63
102
  }
@@ -68,7 +107,11 @@ export async function embedTextsWithRecovery(
68
107
  batchFailed: true,
69
108
  batchError: batchResult.ok
70
109
  ? `Embedding count mismatch: got ${batchResult.value.length}, expected ${texts.length}`
71
- : batchResult.error.message,
110
+ : formatFailureMessage(batchResult.error),
111
+ retrySuggestion:
112
+ recovered.value.fallbackErrors > 0
113
+ ? "Try rerunning the same command. If failures persist, rerun with `gno --verbose embed --batch-size 1` to isolate failing chunks."
114
+ : undefined,
72
115
  },
73
116
  };
74
117
  }
@@ -83,10 +126,113 @@ export async function embedTextsWithRecovery(
83
126
  ...recovered.value,
84
127
  batchFailed: true,
85
128
  batchError: "Batch embedding disabled for this compatibility profile",
129
+ retrySuggestion:
130
+ recovered.value.fallbackErrors > 0
131
+ ? "Some chunks still failed individually. Rerun with `gno --verbose embed --batch-size 1` for exact chunk errors."
132
+ : undefined,
86
133
  },
87
134
  };
88
135
  }
89
136
 
137
+ async function recoverWithAdaptiveBatches(
138
+ embedPort: EmbeddingPort,
139
+ texts: string[],
140
+ options: { rootBatchAlreadyFailed?: boolean } = {}
141
+ ): Promise<
142
+ LlmResult<Omit<EmbedBatchRecoveryResult, "batchFailed" | "batchError">>
143
+ > {
144
+ try {
145
+ const vectors: Array<number[] | null> = Array.from(
146
+ { length: texts.length },
147
+ () => null
148
+ );
149
+ const failureSamples: string[] = [];
150
+ let fallbackErrors = 0;
151
+
152
+ const recordFailure = (message: string): void => {
153
+ if (failureSamples.length < MAX_FAILURE_SAMPLES) {
154
+ failureSamples.push(message);
155
+ }
156
+ };
157
+
158
+ const processRange = async (
159
+ rangeTexts: string[],
160
+ offset: number,
161
+ batchAlreadyFailed = false
162
+ ): Promise<void> => {
163
+ if (rangeTexts.length === 0) {
164
+ return;
165
+ }
166
+
167
+ if (rangeTexts.length === 1) {
168
+ const result = await embedPort.embed(rangeTexts[0] ?? "");
169
+ if (result.ok) {
170
+ vectors[offset] = result.value;
171
+ return;
172
+ }
173
+ fallbackErrors += 1;
174
+ recordFailure(formatFailureMessage(result.error));
175
+ return;
176
+ }
177
+
178
+ let batchResult: Awaited<ReturnType<typeof embedPort.embedBatch>> | null =
179
+ null;
180
+ if (!batchAlreadyFailed) {
181
+ batchResult = await embedPort.embedBatch(rangeTexts);
182
+ }
183
+ if (
184
+ batchResult &&
185
+ batchResult.ok &&
186
+ batchResult.value.length === rangeTexts.length
187
+ ) {
188
+ for (const [index, vector] of batchResult.value.entries()) {
189
+ vectors[offset + index] = vector;
190
+ }
191
+ return;
192
+ }
193
+
194
+ const mid = Math.ceil(rangeTexts.length / 2);
195
+ await processRange(rangeTexts.slice(0, mid), offset);
196
+ await processRange(rangeTexts.slice(mid), offset + mid);
197
+ };
198
+
199
+ await processRange(texts, 0, options.rootBatchAlreadyFailed ?? false);
200
+
201
+ if (fallbackErrors === texts.length) {
202
+ const reinit = await resetEmbeddingPort(embedPort);
203
+ if (!reinit.ok) {
204
+ return reinit;
205
+ }
206
+
207
+ const retry = await recoverIndividually(embedPort, texts);
208
+ if (!retry.ok) {
209
+ return retry;
210
+ }
211
+ return {
212
+ ok: true,
213
+ value: retry.value,
214
+ };
215
+ }
216
+
217
+ return {
218
+ ok: true,
219
+ value: {
220
+ vectors,
221
+ fallbackErrors,
222
+ failureSamples,
223
+ },
224
+ };
225
+ } catch (error) {
226
+ return {
227
+ ok: false,
228
+ error: inferenceFailedError(
229
+ embedPort.modelUri,
230
+ new Error(errorMessage(error))
231
+ ),
232
+ };
233
+ }
234
+ }
235
+
90
236
  async function recoverIndividually(
91
237
  embedPort: EmbeddingPort,
92
238
  texts: string[]
@@ -95,6 +241,7 @@ async function recoverIndividually(
95
241
  > {
96
242
  try {
97
243
  const vectors: Array<number[] | null> = [];
244
+ const failureSamples: string[] = [];
98
245
  let fallbackErrors = 0;
99
246
 
100
247
  for (const text of texts) {
@@ -104,6 +251,9 @@ async function recoverIndividually(
104
251
  } else {
105
252
  vectors.push(null);
106
253
  fallbackErrors += 1;
254
+ if (failureSamples.length < MAX_FAILURE_SAMPLES) {
255
+ failureSamples.push(formatFailureMessage(result.error));
256
+ }
107
257
  }
108
258
  }
109
259
 
@@ -112,6 +262,7 @@ async function recoverIndividually(
112
262
  value: {
113
263
  vectors,
114
264
  fallbackErrors,
265
+ failureSamples,
115
266
  },
116
267
  };
117
268
  } catch (error) {
@@ -117,10 +117,12 @@ export async function createVectorIndexPort(
117
117
  `);
118
118
 
119
119
  // Prepared statements for vec0 table (if available)
120
- const upsertVecStmt = searchAvailable
121
- ? db.prepare(
122
- `INSERT OR REPLACE INTO ${tableName} (chunk_id, embedding) VALUES (?, ?)`
123
- )
120
+ const deleteVecChunkStmt = searchAvailable
121
+ ? db.prepare(`DELETE FROM ${tableName} WHERE chunk_id = ?`)
122
+ : null;
123
+
124
+ const insertVecStmt = searchAvailable
125
+ ? db.prepare(`INSERT INTO ${tableName} (chunk_id, embedding) VALUES (?, ?)`)
124
126
  : null;
125
127
 
126
128
  const searchStmt = searchAvailable
@@ -175,12 +177,15 @@ export async function createVectorIndexPort(
175
177
  }
176
178
 
177
179
  // 2. Best-effort update vec0 (graceful degradation)
178
- if (upsertVecStmt) {
180
+ if (deleteVecChunkStmt && insertVecStmt) {
179
181
  try {
180
182
  db.transaction(() => {
181
183
  for (const row of rows) {
182
184
  const chunkId = `${row.mirrorHash}:${row.seq}`;
183
- upsertVecStmt.run(chunkId, encodeEmbedding(row.embedding));
185
+ // sqlite-vec vec0 tables do not reliably support OR REPLACE semantics.
186
+ // Delete first, then insert the fresh vector row.
187
+ deleteVecChunkStmt.run(chunkId);
188
+ insertVecStmt.run(chunkId, encodeEmbedding(row.embedding));
184
189
  }
185
190
  })();
186
191
  } catch (e) {