@gmickel/gno 1.5.2 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Embedding freshness fingerprint.
3
+ *
4
+ * @module src/embed/fingerprint
5
+ */
6
+
7
+ import { getEmbeddingCompatibilityProfile } from "../llm/embedding-compatibility";
8
+
9
+ export const EMBEDDING_CONTEXTUAL_FORMAT_VERSION = "contextual-embedding-v1";
10
+ export const EMBEDDING_CHUNKING_STRATEGY_VERSION = "markdown-char-semantic-v1";
11
+
12
+ export interface EmbeddingFingerprintInput {
13
+ modelUri: string;
14
+ dimensions?: number;
15
+ }
16
+
17
+ export function getEmbeddingFingerprint(
18
+ input: EmbeddingFingerprintInput
19
+ ): string {
20
+ const profile = getEmbeddingCompatibilityProfile(input.modelUri);
21
+ const payload = {
22
+ chunking: EMBEDDING_CHUNKING_STRATEGY_VERSION,
23
+ contextualFormatting: EMBEDDING_CONTEXTUAL_FORMAT_VERSION,
24
+ dimensions: input.dimensions ?? null,
25
+ modelUri: input.modelUri,
26
+ profile: {
27
+ batchEmbeddingTrusted: profile.batchEmbeddingTrusted,
28
+ documentFormat: profile.documentFormat,
29
+ id: profile.id,
30
+ queryFormat: profile.queryFormat,
31
+ },
32
+ };
33
+
34
+ return new Bun.CryptoHasher("sha256")
35
+ .update(JSON.stringify(payload))
36
+ .digest("hex");
37
+ }
@@ -0,0 +1,137 @@
1
+ import type { EmbeddingPort } from "../llm/types";
2
+ import type { BacklogItem, VectorIndexPort, VectorRow } from "../store/vector";
3
+
4
+ import { formatDocForEmbedding } from "../pipeline/contextual";
5
+ import { embedTextsWithRecovery } from "./batch";
6
+
7
+ export const MAX_EMBED_CHUNK_ATTEMPTS = 2;
8
+ export const MAX_EMBED_FAILURE_SAMPLES = 5;
9
+
10
+ export interface EmbedStoreBatchResult {
11
+ embedded: number;
12
+ errors: number;
13
+ retryItems: BacklogItem[];
14
+ errorSamples: string[];
15
+ suggestion?: string;
16
+ batchFailed: boolean;
17
+ batchError?: string;
18
+ }
19
+
20
+ export function chunkRetryKey(item: Pick<BacklogItem, "mirrorHash" | "seq">) {
21
+ return `${item.mirrorHash}\0${item.seq}`;
22
+ }
23
+
24
+ export function addUniqueSamples(target: string[], samples: string[]): void {
25
+ for (const sample of samples) {
26
+ if (target.length >= MAX_EMBED_FAILURE_SAMPLES) {
27
+ break;
28
+ }
29
+ if (!target.includes(sample)) {
30
+ target.push(sample);
31
+ }
32
+ }
33
+ }
34
+
35
+ export function formatLlmFailure(
36
+ error: { message: string; cause?: unknown } | undefined
37
+ ): string {
38
+ if (!error) {
39
+ return "Unknown embedding failure";
40
+ }
41
+ const cause =
42
+ error.cause &&
43
+ typeof error.cause === "object" &&
44
+ "message" in error.cause &&
45
+ typeof error.cause.message === "string"
46
+ ? error.cause.message
47
+ : typeof error.cause === "string"
48
+ ? error.cause
49
+ : "";
50
+ return cause && cause !== error.message
51
+ ? `${error.message} - ${cause}`
52
+ : error.message;
53
+ }
54
+
55
+ export async function embedAndStoreBatch(params: {
56
+ embedPort: EmbeddingPort;
57
+ vectorIndex: VectorIndexPort;
58
+ items: BacklogItem[];
59
+ modelUri: string;
60
+ embedFingerprint: string;
61
+ }): Promise<EmbedStoreBatchResult> {
62
+ const { embedPort, vectorIndex, items, modelUri, embedFingerprint } = params;
63
+ const embedResult = await embedTextsWithRecovery(
64
+ embedPort,
65
+ items.map((item) =>
66
+ formatDocForEmbedding(item.text, item.title ?? undefined, modelUri)
67
+ )
68
+ );
69
+
70
+ if (!embedResult.ok) {
71
+ const formattedError = formatLlmFailure(embedResult.error);
72
+ return {
73
+ embedded: 0,
74
+ errors: embedResult.error.retryable ? 0 : items.length,
75
+ retryItems: embedResult.error.retryable ? items : [],
76
+ errorSamples: [formattedError],
77
+ suggestion: embedResult.error.retryable
78
+ ? "Try rerunning the same command. If failures persist, rerun with `gno --verbose embed --batch-size 1` to isolate failing chunks."
79
+ : embedResult.error.suggestion,
80
+ batchFailed: true,
81
+ batchError: formattedError,
82
+ };
83
+ }
84
+
85
+ const vectors: VectorRow[] = [];
86
+ const retryItems: BacklogItem[] = [];
87
+ for (const [idx, item] of items.entries()) {
88
+ const embedding = embedResult.value.vectors[idx];
89
+ if (!embedding) {
90
+ retryItems.push(item);
91
+ continue;
92
+ }
93
+ vectors.push({
94
+ mirrorHash: item.mirrorHash,
95
+ seq: item.seq,
96
+ model: modelUri,
97
+ embedFingerprint,
98
+ embedding: new Float32Array(embedding),
99
+ });
100
+ }
101
+
102
+ if (vectors.length === 0) {
103
+ return {
104
+ embedded: 0,
105
+ errors: 0,
106
+ retryItems,
107
+ errorSamples: embedResult.value.failureSamples,
108
+ suggestion: embedResult.value.retrySuggestion,
109
+ batchFailed: embedResult.value.batchFailed,
110
+ batchError: embedResult.value.batchError,
111
+ };
112
+ }
113
+
114
+ const storeResult = await vectorIndex.upsertVectors(vectors);
115
+ if (!storeResult.ok) {
116
+ return {
117
+ embedded: 0,
118
+ errors: vectors.length,
119
+ retryItems,
120
+ errorSamples: [storeResult.error.message],
121
+ suggestion:
122
+ "Store write failed. Rerun `gno embed` once more; if it repeats, run `gno doctor` and `gno vec sync`.",
123
+ batchFailed: embedResult.value.batchFailed,
124
+ batchError: embedResult.value.batchError,
125
+ };
126
+ }
127
+
128
+ return {
129
+ embedded: vectors.length,
130
+ errors: 0,
131
+ retryItems,
132
+ errorSamples: embedResult.value.failureSamples,
133
+ suggestion: embedResult.value.retrySuggestion,
134
+ batchFailed: embedResult.value.batchFailed,
135
+ batchError: embedResult.value.batchError,
136
+ };
137
+ }
@@ -39,6 +39,8 @@ interface TokenizingModel {
39
39
  detokenize(tokens: readonly number[]): string;
40
40
  }
41
41
 
42
+ type EmbeddingInput = Parameters<LlamaEmbeddingContext["getEmbeddingFor"]>[0];
43
+
42
44
  // ─────────────────────────────────────────────────────────────────────────────
43
45
  // Constants
44
46
  // ─────────────────────────────────────────────────────────────────────────────
@@ -46,12 +48,19 @@ interface TokenizingModel {
46
48
  // Aim for a small pool so CPU-only runs can exploit parallel contexts without
47
49
  // multiplying RAM usage too aggressively. Additional contexts fall back
48
50
  // gracefully if memory is tight.
49
- const MAX_EMBEDDING_CONTEXTS = 4;
51
+ const MAX_DEFAULT_EMBEDDING_CONTEXTS = 2;
52
+ const MAX_EMBEDDING_CONTEXTS_OVERRIDE = 4;
50
53
  const TARGET_CORES_PER_EMBEDDING_CONTEXT = 4;
51
- const LOW_MEMORY_WINDOWS_THRESHOLD_BYTES = 24 * 1024 * 1024 * 1024;
54
+ const CONSTRAINED_WINDOWS_THRESHOLD_BYTES = 16 * 1024 * 1024 * 1024;
55
+ const MID_MEMORY_WINDOWS_THRESHOLD_BYTES = 24 * 1024 * 1024 * 1024;
52
56
  const LOW_MEMORY_WINDOWS_CONTEXTS = 1;
57
+ const MID_MEMORY_WINDOWS_CONTEXTS = 2;
53
58
  const DEFAULT_EMBEDDING_CONTEXT_SIZE = 2_048;
54
59
 
60
+ function embeddingVectorToArray(vector: readonly number[]): number[] {
61
+ return Array.isArray(vector) ? (vector as number[]) : Array.from(vector);
62
+ }
63
+
55
64
  function resolveEmbeddingContextPoolOverride(
56
65
  env: NodeJS.ProcessEnv = process.env
57
66
  ): number | undefined {
@@ -63,7 +72,35 @@ function resolveEmbeddingContextPoolOverride(
63
72
  if (!(Number.isFinite(parsed) && parsed > 0)) {
64
73
  return undefined;
65
74
  }
66
- return Math.max(1, Math.min(MAX_EMBEDDING_CONTEXTS, parsed));
75
+ return Math.max(1, Math.min(MAX_EMBEDDING_CONTEXTS_OVERRIDE, parsed));
76
+ }
77
+
78
+ function resolveThreadsPerContextOverride(
79
+ env: NodeJS.ProcessEnv = process.env
80
+ ): number | undefined {
81
+ const raw = env.GNO_EMBED_THREADS;
82
+ if (!raw) {
83
+ return undefined;
84
+ }
85
+ const parsed = Number.parseInt(raw, 10);
86
+ if (!(Number.isFinite(parsed) && parsed > 0)) {
87
+ return undefined;
88
+ }
89
+ return Math.max(1, parsed);
90
+ }
91
+
92
+ function resolveEmbeddingContextSizeOverride(
93
+ env: NodeJS.ProcessEnv = process.env
94
+ ): number | undefined {
95
+ const raw = env.GNO_EMBED_CONTEXT_SIZE;
96
+ if (!raw) {
97
+ return undefined;
98
+ }
99
+ const parsed = Number.parseInt(raw, 10);
100
+ if (!(Number.isFinite(parsed) && parsed > 0)) {
101
+ return undefined;
102
+ }
103
+ return Math.max(128, parsed);
67
104
  }
68
105
 
69
106
  export function resolveEmbeddingContextPoolSize(options: {
@@ -86,19 +123,28 @@ export function resolveEmbeddingContextPoolSize(options: {
86
123
  const totalMemoryBytes = options.totalMemoryBytes ?? totalmem();
87
124
  if (
88
125
  platformName === "win32" &&
89
- totalMemoryBytes <= LOW_MEMORY_WINDOWS_THRESHOLD_BYTES
126
+ totalMemoryBytes < CONSTRAINED_WINDOWS_THRESHOLD_BYTES
90
127
  ) {
91
128
  return LOW_MEMORY_WINDOWS_CONTEXTS;
92
129
  }
93
130
 
94
131
  const cpuMathCores = Math.max(1, options.cpuMathCores);
95
- return Math.max(
132
+ const adaptivePoolSize = Math.max(
96
133
  1,
97
134
  Math.min(
98
- MAX_EMBEDDING_CONTEXTS,
135
+ MAX_DEFAULT_EMBEDDING_CONTEXTS,
99
136
  Math.ceil(cpuMathCores / TARGET_CORES_PER_EMBEDDING_CONTEXT)
100
137
  )
101
138
  );
139
+
140
+ if (
141
+ platformName === "win32" &&
142
+ totalMemoryBytes < MID_MEMORY_WINDOWS_THRESHOLD_BYTES
143
+ ) {
144
+ return Math.min(MID_MEMORY_WINDOWS_CONTEXTS, adaptivePoolSize);
145
+ }
146
+
147
+ return adaptivePoolSize;
102
148
  }
103
149
 
104
150
  // ─────────────────────────────────────────────────────────────────────────────
@@ -145,9 +191,9 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
145
191
  return { ok: false, error: prepared.error };
146
192
  }
147
193
  const embedding = await this.runOnWorker((worker) =>
148
- worker.context.getEmbeddingFor(prepared.value.text)
194
+ worker.context.getEmbeddingFor(prepared.value.input)
149
195
  );
150
- const vector = Array.from(embedding.vector) as number[];
196
+ const vector = embeddingVectorToArray(embedding.vector);
151
197
 
152
198
  // Cache dimensions on first call
153
199
  if (this.dims === null) {
@@ -171,13 +217,13 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
171
217
  }
172
218
 
173
219
  try {
174
- const preparedTexts: string[] = [];
220
+ const preparedInputs: EmbeddingInput[] = [];
175
221
  for (const text of texts) {
176
222
  const prepared = this.truncateForEmbedding(text, "batch");
177
223
  if (!prepared.ok) {
178
224
  return { ok: false, error: prepared.error };
179
225
  }
180
- preparedTexts.push(prepared.value.text);
226
+ preparedInputs.push(prepared.value.input);
181
227
  }
182
228
 
183
229
  const allResults = Array.from(
@@ -191,16 +237,19 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
191
237
  while (true) {
192
238
  const index = nextIndex;
193
239
  nextIndex += 1;
194
- if (index >= preparedTexts.length) {
240
+ if (index >= preparedInputs.length) {
195
241
  return;
196
242
  }
197
243
 
244
+ const input = preparedInputs[index];
245
+ if (input === undefined) {
246
+ return;
247
+ }
198
248
  const embedding = await this.runOnSpecificWorker(
199
249
  worker,
200
- (current) =>
201
- current.context.getEmbeddingFor(preparedTexts[index] as string)
250
+ (current) => current.context.getEmbeddingFor(input)
202
251
  );
203
- allResults[index] = Array.from(embedding.vector) as number[];
252
+ allResults[index] = embeddingVectorToArray(embedding.vector);
204
253
  }
205
254
  })
206
255
  );
@@ -316,6 +365,11 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
316
365
  return 0;
317
366
  }
318
367
 
368
+ const override = resolveThreadsPerContextOverride();
369
+ if (override !== undefined) {
370
+ return override;
371
+ }
372
+
319
373
  return Math.max(1, Math.floor(Math.max(1, llama.cpuMathCores) / poolSize));
320
374
  }
321
375
 
@@ -335,6 +389,8 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
335
389
  this.llamaModel = llamaModel as TokenizingModel;
336
390
  const llama = await this.manager.getLlama();
337
391
  const lifecycleVersion = this.lifecycleVersion;
392
+ this.embeddingContextSize =
393
+ resolveEmbeddingContextSizeOverride() ?? DEFAULT_EMBEDDING_CONTEXT_SIZE;
338
394
  const targetPoolSize = this.resolveTargetPoolSize(llama);
339
395
  const threadsPerContext = this.resolveThreadsPerContext(
340
396
  llama,
@@ -400,7 +456,7 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
400
456
  private truncateForEmbedding(
401
457
  text: string,
402
458
  mode: "single" | "batch"
403
- ): LlmResult<{ text: string }> {
459
+ ): LlmResult<{ input: EmbeddingInput }> {
404
460
  const model = this.llamaModel;
405
461
  const modelLimit =
406
462
  typeof model?.trainContextSize === "number" &&
@@ -409,7 +465,7 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
409
465
  ? Math.floor(model.trainContextSize)
410
466
  : undefined;
411
467
  if (!model) {
412
- return { ok: true, value: { text } };
468
+ return { ok: true, value: { input: text } };
413
469
  }
414
470
 
415
471
  const rawLimit =
@@ -420,10 +476,13 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
420
476
  try {
421
477
  const tokens = model.tokenize(text);
422
478
  if (tokens.length <= limit) {
423
- return { ok: true, value: { text } };
479
+ return {
480
+ ok: true,
481
+ value: { input: tokens as EmbeddingInput },
482
+ };
424
483
  }
425
484
 
426
- const truncatedText = model.detokenize(tokens.slice(0, limit));
485
+ const truncatedTokens = tokens.slice(0, limit);
427
486
  const shouldWarn =
428
487
  mode === "single"
429
488
  ? !this.warnedSingleTruncation
@@ -438,7 +497,10 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
438
497
  `[llama] Truncated embedding input from ${tokens.length} to ${limit} tokens`
439
498
  );
440
499
  }
441
- return { ok: true, value: { text: truncatedText } };
500
+ return {
501
+ ok: true,
502
+ value: { input: truncatedTokens as EmbeddingInput },
503
+ };
442
504
  } catch (error) {
443
505
  return { ok: false, error: inferenceFailedError(this.modelUri, error) };
444
506
  }
package/src/sdk/embed.ts CHANGED
@@ -19,15 +19,15 @@ import type {
19
19
  import type { GnoEmbedOptions, GnoEmbedResult } from "./types";
20
20
 
21
21
  import { embedBacklog } from "../embed";
22
- import { embedTextsWithRecovery } from "../embed/batch";
22
+ import { getEmbeddingFingerprint } from "../embed/fingerprint";
23
+ import {
24
+ chunkRetryKey,
25
+ embedAndStoreBatch,
26
+ MAX_EMBED_CHUNK_ATTEMPTS,
27
+ } from "../embed/retry";
23
28
  import { resolveModelUri } from "../llm/registry";
24
- import { formatDocForEmbedding } from "../pipeline/contextual";
25
29
  import { err, ok } from "../store/types";
26
- import {
27
- createVectorIndexPort,
28
- createVectorStatsPort,
29
- type VectorRow,
30
- } from "../store/vector";
30
+ import { createVectorIndexPort, createVectorStatsPort } from "../store/vector";
31
31
  import { sdkError } from "./errors";
32
32
 
33
33
  interface EmbedRuntimeOptions {
@@ -121,6 +121,68 @@ async function forceEmbedAll(
121
121
  let embedded = 0;
122
122
  let errors = 0;
123
123
  let cursor: { mirrorHash: string; seq: number } | undefined;
124
+ const retryQueue = new Map<string, { item: BacklogItem; attempts: number }>();
125
+ const embedFingerprint = getEmbeddingFingerprint({
126
+ modelUri,
127
+ dimensions: vectorIndex.dimensions,
128
+ });
129
+
130
+ const enqueueRetryItems = (items: BacklogItem[], attempts: number): void => {
131
+ for (const item of items) {
132
+ const key = chunkRetryKey(item);
133
+ const existing = retryQueue.get(key);
134
+ retryQueue.set(key, {
135
+ item,
136
+ attempts: Math.max(existing?.attempts ?? 0, attempts),
137
+ });
138
+ }
139
+ };
140
+
141
+ const drainRetryQueue = async (): Promise<number> => {
142
+ if (retryQueue.size === 0) {
143
+ return 0;
144
+ }
145
+
146
+ let retryEmbedded = 0;
147
+ const entries = [...retryQueue.values()].filter(
148
+ (entry) => entry.attempts < MAX_EMBED_CHUNK_ATTEMPTS
149
+ );
150
+
151
+ for (let idx = 0; idx < entries.length; idx += batchSize) {
152
+ const slice = entries.slice(idx, idx + batchSize);
153
+ for (const entry of slice) {
154
+ retryQueue.delete(chunkRetryKey(entry.item));
155
+ entry.attempts += 1;
156
+ }
157
+
158
+ const retryResult = await embedAndStoreBatch({
159
+ embedPort,
160
+ vectorIndex,
161
+ items: slice.map((entry) => entry.item),
162
+ modelUri,
163
+ embedFingerprint,
164
+ });
165
+ embedded += retryResult.embedded;
166
+ errors += retryResult.errors;
167
+ retryEmbedded += retryResult.embedded;
168
+
169
+ const retryByKey = new Set(
170
+ retryResult.retryItems.map((item) => chunkRetryKey(item))
171
+ );
172
+ for (const entry of slice) {
173
+ if (!retryByKey.has(chunkRetryKey(entry.item))) {
174
+ continue;
175
+ }
176
+ if (entry.attempts >= MAX_EMBED_CHUNK_ATTEMPTS) {
177
+ errors += 1;
178
+ } else {
179
+ retryQueue.set(chunkRetryKey(entry.item), entry);
180
+ }
181
+ }
182
+ }
183
+
184
+ return retryEmbedded;
185
+ };
124
186
 
125
187
  while (true) {
126
188
  const batchResult = await getActiveChunks(db, batchSize, cursor);
@@ -140,45 +202,27 @@ async function forceEmbedAll(
140
202
  cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
141
203
  }
142
204
 
143
- const embedResult = await embedTextsWithRecovery(
205
+ const beforeEmbedded = embedded;
206
+ const embedResult = await embedAndStoreBatch({
144
207
  embedPort,
145
- batch.map((item) =>
146
- formatDocForEmbedding(
147
- item.text,
148
- item.title ?? undefined,
149
- embedPort.modelUri
150
- )
151
- )
152
- );
153
-
154
- if (!embedResult.ok) {
155
- errors += batch.length;
156
- continue;
157
- }
208
+ vectorIndex,
209
+ items: batch,
210
+ modelUri,
211
+ embedFingerprint,
212
+ });
213
+ embedded += embedResult.embedded;
214
+ errors += embedResult.errors;
215
+ enqueueRetryItems(embedResult.retryItems, 1);
158
216
 
159
- const vectors: VectorRow[] = [];
160
- for (const [idx, item] of batch.entries()) {
161
- const embedding = embedResult.value.vectors[idx];
162
- if (!embedding) {
163
- errors += 1;
164
- continue;
165
- }
166
- vectors.push({
167
- mirrorHash: item.mirrorHash,
168
- seq: item.seq,
169
- model: modelUri,
170
- embedding: new Float32Array(embedding),
171
- });
217
+ if (embedded > beforeEmbedded) {
218
+ await drainRetryQueue();
172
219
  }
220
+ }
173
221
 
174
- if (vectors.length > 0) {
175
- const storeResult = await vectorIndex.upsertVectors(vectors);
176
- if (!storeResult.ok) {
177
- errors += vectors.length;
178
- continue;
179
- }
180
- embedded += vectors.length;
181
- }
222
+ await drainRetryQueue();
223
+ if (retryQueue.size > 0) {
224
+ errors += retryQueue.size;
225
+ retryQueue.clear();
182
226
  }
183
227
 
184
228
  if (vectorIndex.vecDirty) {
@@ -217,24 +261,25 @@ export async function runEmbed(
217
261
  const db = runtime.store.getRawDb();
218
262
  const stats: VectorStatsPort = createVectorStatsPort(db);
219
263
 
220
- const backlogResult = force
221
- ? await getActiveChunkCount(db)
222
- : await stats.countBacklog(modelUri, { collection: options.collection });
223
- if (!backlogResult.ok) {
224
- throw sdkError("STORE", backlogResult.error.message, {
225
- cause: backlogResult.error.cause,
226
- });
227
- }
264
+ let totalToEmbed = 0;
265
+ if (force) {
266
+ const forceCount = await getActiveChunkCount(db);
267
+ if (!forceCount.ok) {
268
+ throw sdkError("STORE", forceCount.error.message, {
269
+ cause: forceCount.error.cause,
270
+ });
271
+ }
228
272
 
229
- const totalToEmbed = backlogResult.value;
230
- if (totalToEmbed === 0 || dryRun) {
231
- return {
232
- embedded: totalToEmbed,
233
- errors: 0,
234
- duration: 0,
235
- model: modelUri,
236
- searchAvailable: await checkVecAvailable(db),
237
- };
273
+ totalToEmbed = forceCount.value;
274
+ if (totalToEmbed === 0 || dryRun) {
275
+ return {
276
+ embedded: totalToEmbed,
277
+ errors: 0,
278
+ duration: 0,
279
+ model: modelUri,
280
+ searchAvailable: await checkVecAvailable(db),
281
+ };
282
+ }
238
283
  }
239
284
 
240
285
  const embedResult = await runtime.llm.createEmbeddingPort(modelUri, {
@@ -266,6 +311,36 @@ export async function runEmbed(
266
311
  }
267
312
 
268
313
  const vectorIndex = vectorResult.value;
314
+ if (!force) {
315
+ const embedFingerprint = getEmbeddingFingerprint({
316
+ modelUri,
317
+ dimensions: vectorIndex.dimensions,
318
+ });
319
+ const backlogResult = await stats.countBacklog(
320
+ modelUri,
321
+ embedFingerprint,
322
+ {
323
+ collection: options.collection,
324
+ }
325
+ );
326
+ if (!backlogResult.ok) {
327
+ throw sdkError("STORE", backlogResult.error.message, {
328
+ cause: backlogResult.error.cause,
329
+ });
330
+ }
331
+
332
+ totalToEmbed = backlogResult.value;
333
+ if (totalToEmbed === 0 || dryRun) {
334
+ return {
335
+ embedded: totalToEmbed,
336
+ errors: 0,
337
+ duration: 0,
338
+ model: modelUri,
339
+ searchAvailable: vectorIndex.searchAvailable,
340
+ };
341
+ }
342
+ }
343
+
269
344
  const startedAt = Date.now();
270
345
  let result: { embedded: number; errors: number };
271
346
  if (force) {
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Migration: vector embedding freshness fingerprints.
3
+ *
4
+ * @module src/store/migrations/008-vector-fingerprints
5
+ */
6
+
7
+ import type { Database } from "bun:sqlite";
8
+
9
+ import type { Migration } from "./runner";
10
+
11
+ export const migration: Migration = {
12
+ version: 8,
13
+ name: "vector_fingerprints",
14
+
15
+ up(db: Database): void {
16
+ db.exec(`
17
+ ALTER TABLE content_vectors ADD COLUMN embed_fingerprint TEXT NOT NULL DEFAULT ''
18
+ `);
19
+
20
+ db.exec(`
21
+ CREATE INDEX IF NOT EXISTS idx_vectors_freshness
22
+ ON content_vectors(model, embed_fingerprint, mirror_hash, seq, embedded_at)
23
+ `);
24
+ },
25
+ };
@@ -21,6 +21,7 @@ import { migration as m004 } from "./004-doc-links";
21
21
  import { migration as m005 } from "./005-graph-indexes";
22
22
  import { migration as m006 } from "./006-document-metadata";
23
23
  import { migration as m007 } from "./007-document-date-fields";
24
+ import { migration as m008 } from "./008-vector-fingerprints";
24
25
 
25
26
  /** All migrations in order */
26
- export const migrations = [m001, m002, m003, m004, m005, m006, m007];
27
+ export const migrations = [m001, m002, m003, m004, m005, m006, m007, m008];