@gmickel/gno 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Embedding freshness fingerprint.
3
+ *
4
+ * @module src/embed/fingerprint
5
+ */
6
+
7
+ import { getEmbeddingCompatibilityProfile } from "../llm/embedding-compatibility";
8
+
9
+ export const EMBEDDING_CONTEXTUAL_FORMAT_VERSION = "contextual-embedding-v1";
10
+ export const EMBEDDING_CHUNKING_STRATEGY_VERSION = "markdown-char-semantic-v1";
11
+
12
+ export interface EmbeddingFingerprintInput {
13
+ modelUri: string;
14
+ dimensions?: number;
15
+ }
16
+
17
+ export function getEmbeddingFingerprint(
18
+ input: EmbeddingFingerprintInput
19
+ ): string {
20
+ const profile = getEmbeddingCompatibilityProfile(input.modelUri);
21
+ const payload = {
22
+ chunking: EMBEDDING_CHUNKING_STRATEGY_VERSION,
23
+ contextualFormatting: EMBEDDING_CONTEXTUAL_FORMAT_VERSION,
24
+ dimensions: input.dimensions ?? null,
25
+ modelUri: input.modelUri,
26
+ profile: {
27
+ batchEmbeddingTrusted: profile.batchEmbeddingTrusted,
28
+ documentFormat: profile.documentFormat,
29
+ id: profile.id,
30
+ queryFormat: profile.queryFormat,
31
+ },
32
+ };
33
+
34
+ return new Bun.CryptoHasher("sha256")
35
+ .update(JSON.stringify(payload))
36
+ .digest("hex");
37
+ }
@@ -0,0 +1,137 @@
1
+ import type { EmbeddingPort } from "../llm/types";
2
+ import type { BacklogItem, VectorIndexPort, VectorRow } from "../store/vector";
3
+
4
+ import { formatDocForEmbedding } from "../pipeline/contextual";
5
+ import { embedTextsWithRecovery } from "./batch";
6
+
7
+ export const MAX_EMBED_CHUNK_ATTEMPTS = 2;
8
+ export const MAX_EMBED_FAILURE_SAMPLES = 5;
9
+
10
+ export interface EmbedStoreBatchResult {
11
+ embedded: number;
12
+ errors: number;
13
+ retryItems: BacklogItem[];
14
+ errorSamples: string[];
15
+ suggestion?: string;
16
+ batchFailed: boolean;
17
+ batchError?: string;
18
+ }
19
+
20
+ export function chunkRetryKey(item: Pick<BacklogItem, "mirrorHash" | "seq">) {
21
+ return `${item.mirrorHash}\0${item.seq}`;
22
+ }
23
+
24
+ export function addUniqueSamples(target: string[], samples: string[]): void {
25
+ for (const sample of samples) {
26
+ if (target.length >= MAX_EMBED_FAILURE_SAMPLES) {
27
+ break;
28
+ }
29
+ if (!target.includes(sample)) {
30
+ target.push(sample);
31
+ }
32
+ }
33
+ }
34
+
35
+ export function formatLlmFailure(
36
+ error: { message: string; cause?: unknown } | undefined
37
+ ): string {
38
+ if (!error) {
39
+ return "Unknown embedding failure";
40
+ }
41
+ const cause =
42
+ error.cause &&
43
+ typeof error.cause === "object" &&
44
+ "message" in error.cause &&
45
+ typeof error.cause.message === "string"
46
+ ? error.cause.message
47
+ : typeof error.cause === "string"
48
+ ? error.cause
49
+ : "";
50
+ return cause && cause !== error.message
51
+ ? `${error.message} - ${cause}`
52
+ : error.message;
53
+ }
54
+
55
+ export async function embedAndStoreBatch(params: {
56
+ embedPort: EmbeddingPort;
57
+ vectorIndex: VectorIndexPort;
58
+ items: BacklogItem[];
59
+ modelUri: string;
60
+ embedFingerprint: string;
61
+ }): Promise<EmbedStoreBatchResult> {
62
+ const { embedPort, vectorIndex, items, modelUri, embedFingerprint } = params;
63
+ const embedResult = await embedTextsWithRecovery(
64
+ embedPort,
65
+ items.map((item) =>
66
+ formatDocForEmbedding(item.text, item.title ?? undefined, modelUri)
67
+ )
68
+ );
69
+
70
+ if (!embedResult.ok) {
71
+ const formattedError = formatLlmFailure(embedResult.error);
72
+ return {
73
+ embedded: 0,
74
+ errors: embedResult.error.retryable ? 0 : items.length,
75
+ retryItems: embedResult.error.retryable ? items : [],
76
+ errorSamples: [formattedError],
77
+ suggestion: embedResult.error.retryable
78
+ ? "Try rerunning the same command. If failures persist, rerun with `gno --verbose embed --batch-size 1` to isolate failing chunks."
79
+ : embedResult.error.suggestion,
80
+ batchFailed: true,
81
+ batchError: formattedError,
82
+ };
83
+ }
84
+
85
+ const vectors: VectorRow[] = [];
86
+ const retryItems: BacklogItem[] = [];
87
+ for (const [idx, item] of items.entries()) {
88
+ const embedding = embedResult.value.vectors[idx];
89
+ if (!embedding) {
90
+ retryItems.push(item);
91
+ continue;
92
+ }
93
+ vectors.push({
94
+ mirrorHash: item.mirrorHash,
95
+ seq: item.seq,
96
+ model: modelUri,
97
+ embedFingerprint,
98
+ embedding: new Float32Array(embedding),
99
+ });
100
+ }
101
+
102
+ if (vectors.length === 0) {
103
+ return {
104
+ embedded: 0,
105
+ errors: 0,
106
+ retryItems,
107
+ errorSamples: embedResult.value.failureSamples,
108
+ suggestion: embedResult.value.retrySuggestion,
109
+ batchFailed: embedResult.value.batchFailed,
110
+ batchError: embedResult.value.batchError,
111
+ };
112
+ }
113
+
114
+ const storeResult = await vectorIndex.upsertVectors(vectors);
115
+ if (!storeResult.ok) {
116
+ return {
117
+ embedded: 0,
118
+ errors: vectors.length,
119
+ retryItems,
120
+ errorSamples: [storeResult.error.message],
121
+ suggestion:
122
+ "Store write failed. Rerun `gno embed` once more; if it repeats, run `gno doctor` and `gno vec sync`.",
123
+ batchFailed: embedResult.value.batchFailed,
124
+ batchError: embedResult.value.batchError,
125
+ };
126
+ }
127
+
128
+ return {
129
+ embedded: vectors.length,
130
+ errors: 0,
131
+ retryItems,
132
+ errorSamples: embedResult.value.failureSamples,
133
+ suggestion: embedResult.value.retrySuggestion,
134
+ batchFailed: embedResult.value.batchFailed,
135
+ batchError: embedResult.value.batchError,
136
+ };
137
+ }
package/src/sdk/embed.ts CHANGED
@@ -19,15 +19,15 @@ import type {
19
19
  import type { GnoEmbedOptions, GnoEmbedResult } from "./types";
20
20
 
21
21
  import { embedBacklog } from "../embed";
22
- import { embedTextsWithRecovery } from "../embed/batch";
22
+ import { getEmbeddingFingerprint } from "../embed/fingerprint";
23
+ import {
24
+ chunkRetryKey,
25
+ embedAndStoreBatch,
26
+ MAX_EMBED_CHUNK_ATTEMPTS,
27
+ } from "../embed/retry";
23
28
  import { resolveModelUri } from "../llm/registry";
24
- import { formatDocForEmbedding } from "../pipeline/contextual";
25
29
  import { err, ok } from "../store/types";
26
- import {
27
- createVectorIndexPort,
28
- createVectorStatsPort,
29
- type VectorRow,
30
- } from "../store/vector";
30
+ import { createVectorIndexPort, createVectorStatsPort } from "../store/vector";
31
31
  import { sdkError } from "./errors";
32
32
 
33
33
  interface EmbedRuntimeOptions {
@@ -121,6 +121,68 @@ async function forceEmbedAll(
121
121
  let embedded = 0;
122
122
  let errors = 0;
123
123
  let cursor: { mirrorHash: string; seq: number } | undefined;
124
+ const retryQueue = new Map<string, { item: BacklogItem; attempts: number }>();
125
+ const embedFingerprint = getEmbeddingFingerprint({
126
+ modelUri,
127
+ dimensions: vectorIndex.dimensions,
128
+ });
129
+
130
+ const enqueueRetryItems = (items: BacklogItem[], attempts: number): void => {
131
+ for (const item of items) {
132
+ const key = chunkRetryKey(item);
133
+ const existing = retryQueue.get(key);
134
+ retryQueue.set(key, {
135
+ item,
136
+ attempts: Math.max(existing?.attempts ?? 0, attempts),
137
+ });
138
+ }
139
+ };
140
+
141
+ const drainRetryQueue = async (): Promise<number> => {
142
+ if (retryQueue.size === 0) {
143
+ return 0;
144
+ }
145
+
146
+ let retryEmbedded = 0;
147
+ const entries = [...retryQueue.values()].filter(
148
+ (entry) => entry.attempts < MAX_EMBED_CHUNK_ATTEMPTS
149
+ );
150
+
151
+ for (let idx = 0; idx < entries.length; idx += batchSize) {
152
+ const slice = entries.slice(idx, idx + batchSize);
153
+ for (const entry of slice) {
154
+ retryQueue.delete(chunkRetryKey(entry.item));
155
+ entry.attempts += 1;
156
+ }
157
+
158
+ const retryResult = await embedAndStoreBatch({
159
+ embedPort,
160
+ vectorIndex,
161
+ items: slice.map((entry) => entry.item),
162
+ modelUri,
163
+ embedFingerprint,
164
+ });
165
+ embedded += retryResult.embedded;
166
+ errors += retryResult.errors;
167
+ retryEmbedded += retryResult.embedded;
168
+
169
+ const retryByKey = new Set(
170
+ retryResult.retryItems.map((item) => chunkRetryKey(item))
171
+ );
172
+ for (const entry of slice) {
173
+ if (!retryByKey.has(chunkRetryKey(entry.item))) {
174
+ continue;
175
+ }
176
+ if (entry.attempts >= MAX_EMBED_CHUNK_ATTEMPTS) {
177
+ errors += 1;
178
+ } else {
179
+ retryQueue.set(chunkRetryKey(entry.item), entry);
180
+ }
181
+ }
182
+ }
183
+
184
+ return retryEmbedded;
185
+ };
124
186
 
125
187
  while (true) {
126
188
  const batchResult = await getActiveChunks(db, batchSize, cursor);
@@ -140,45 +202,27 @@ async function forceEmbedAll(
140
202
  cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
141
203
  }
142
204
 
143
- const embedResult = await embedTextsWithRecovery(
205
+ const beforeEmbedded = embedded;
206
+ const embedResult = await embedAndStoreBatch({
144
207
  embedPort,
145
- batch.map((item) =>
146
- formatDocForEmbedding(
147
- item.text,
148
- item.title ?? undefined,
149
- embedPort.modelUri
150
- )
151
- )
152
- );
153
-
154
- if (!embedResult.ok) {
155
- errors += batch.length;
156
- continue;
157
- }
208
+ vectorIndex,
209
+ items: batch,
210
+ modelUri,
211
+ embedFingerprint,
212
+ });
213
+ embedded += embedResult.embedded;
214
+ errors += embedResult.errors;
215
+ enqueueRetryItems(embedResult.retryItems, 1);
158
216
 
159
- const vectors: VectorRow[] = [];
160
- for (const [idx, item] of batch.entries()) {
161
- const embedding = embedResult.value.vectors[idx];
162
- if (!embedding) {
163
- errors += 1;
164
- continue;
165
- }
166
- vectors.push({
167
- mirrorHash: item.mirrorHash,
168
- seq: item.seq,
169
- model: modelUri,
170
- embedding: new Float32Array(embedding),
171
- });
217
+ if (embedded > beforeEmbedded) {
218
+ await drainRetryQueue();
172
219
  }
220
+ }
173
221
 
174
- if (vectors.length > 0) {
175
- const storeResult = await vectorIndex.upsertVectors(vectors);
176
- if (!storeResult.ok) {
177
- errors += vectors.length;
178
- continue;
179
- }
180
- embedded += vectors.length;
181
- }
222
+ await drainRetryQueue();
223
+ if (retryQueue.size > 0) {
224
+ errors += retryQueue.size;
225
+ retryQueue.clear();
182
226
  }
183
227
 
184
228
  if (vectorIndex.vecDirty) {
@@ -217,24 +261,25 @@ export async function runEmbed(
217
261
  const db = runtime.store.getRawDb();
218
262
  const stats: VectorStatsPort = createVectorStatsPort(db);
219
263
 
220
- const backlogResult = force
221
- ? await getActiveChunkCount(db)
222
- : await stats.countBacklog(modelUri, { collection: options.collection });
223
- if (!backlogResult.ok) {
224
- throw sdkError("STORE", backlogResult.error.message, {
225
- cause: backlogResult.error.cause,
226
- });
227
- }
264
+ let totalToEmbed = 0;
265
+ if (force) {
266
+ const forceCount = await getActiveChunkCount(db);
267
+ if (!forceCount.ok) {
268
+ throw sdkError("STORE", forceCount.error.message, {
269
+ cause: forceCount.error.cause,
270
+ });
271
+ }
228
272
 
229
- const totalToEmbed = backlogResult.value;
230
- if (totalToEmbed === 0 || dryRun) {
231
- return {
232
- embedded: totalToEmbed,
233
- errors: 0,
234
- duration: 0,
235
- model: modelUri,
236
- searchAvailable: await checkVecAvailable(db),
237
- };
273
+ totalToEmbed = forceCount.value;
274
+ if (totalToEmbed === 0 || dryRun) {
275
+ return {
276
+ embedded: totalToEmbed,
277
+ errors: 0,
278
+ duration: 0,
279
+ model: modelUri,
280
+ searchAvailable: await checkVecAvailable(db),
281
+ };
282
+ }
238
283
  }
239
284
 
240
285
  const embedResult = await runtime.llm.createEmbeddingPort(modelUri, {
@@ -266,6 +311,36 @@ export async function runEmbed(
266
311
  }
267
312
 
268
313
  const vectorIndex = vectorResult.value;
314
+ if (!force) {
315
+ const embedFingerprint = getEmbeddingFingerprint({
316
+ modelUri,
317
+ dimensions: vectorIndex.dimensions,
318
+ });
319
+ const backlogResult = await stats.countBacklog(
320
+ modelUri,
321
+ embedFingerprint,
322
+ {
323
+ collection: options.collection,
324
+ }
325
+ );
326
+ if (!backlogResult.ok) {
327
+ throw sdkError("STORE", backlogResult.error.message, {
328
+ cause: backlogResult.error.cause,
329
+ });
330
+ }
331
+
332
+ totalToEmbed = backlogResult.value;
333
+ if (totalToEmbed === 0 || dryRun) {
334
+ return {
335
+ embedded: totalToEmbed,
336
+ errors: 0,
337
+ duration: 0,
338
+ model: modelUri,
339
+ searchAvailable: vectorIndex.searchAvailable,
340
+ };
341
+ }
342
+ }
343
+
269
344
  const startedAt = Date.now();
270
345
  let result: { embedded: number; errors: number };
271
346
  if (force) {
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Migration: vector embedding freshness fingerprints.
3
+ *
4
+ * @module src/store/migrations/008-vector-fingerprints
5
+ */
6
+
7
+ import type { Database } from "bun:sqlite";
8
+
9
+ import type { Migration } from "./runner";
10
+
11
+ export const migration: Migration = {
12
+ version: 8,
13
+ name: "vector_fingerprints",
14
+
15
+ up(db: Database): void {
16
+ db.exec(`
17
+ ALTER TABLE content_vectors ADD COLUMN embed_fingerprint TEXT NOT NULL DEFAULT ''
18
+ `);
19
+
20
+ db.exec(`
21
+ CREATE INDEX IF NOT EXISTS idx_vectors_freshness
22
+ ON content_vectors(model, embed_fingerprint, mirror_hash, seq, embedded_at)
23
+ `);
24
+ },
25
+ };
@@ -21,6 +21,7 @@ import { migration as m004 } from "./004-doc-links";
21
21
  import { migration as m005 } from "./005-graph-indexes";
22
22
  import { migration as m006 } from "./006-document-metadata";
23
23
  import { migration as m007 } from "./007-document-date-fields";
24
+ import { migration as m008 } from "./008-vector-fingerprints";
24
25
 
25
26
  /** All migrations in order */
26
- export const migrations = [m001, m002, m003, m004, m005, m006, m007];
27
+ export const migrations = [m001, m002, m003, m004, m005, m006, m007, m008];
@@ -53,6 +53,7 @@ import { analyzeGraphCommunities } from "../../core/graph-analysis";
53
53
  import { normalizeWikiName, stripWikiMdExt } from "../../core/links";
54
54
  import { migrations, runMigrations } from "../migrations";
55
55
  import { err, ok } from "../types";
56
+ import { getStoredEmbeddingFingerprint } from "../vector/freshness";
56
57
  import { modelTableName } from "../vector/sqlite-vec";
57
58
  import { loadFts5Snowball } from "./fts5-snowball";
58
59
 
@@ -3065,10 +3066,14 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
3065
3066
 
3066
3067
  async getStatus(options?: {
3067
3068
  embedModel?: string;
3069
+ embedFingerprint?: string;
3068
3070
  }): Promise<StoreResult<IndexStatus>> {
3069
3071
  try {
3070
3072
  const db = this.ensureOpen();
3071
3073
  const embedModel = options?.embedModel ?? null;
3074
+ const embedFingerprint =
3075
+ options?.embedFingerprint ??
3076
+ (embedModel ? getStoredEmbeddingFingerprint(db, embedModel) : null);
3072
3077
 
3073
3078
  // Get version
3074
3079
  const versionRow = db
@@ -3097,7 +3102,7 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
3097
3102
  }
3098
3103
 
3099
3104
  const collectionStats = db
3100
- .query<CollectionStat, [string | null, string | null]>(
3105
+ .query<CollectionStat, [string | null, string | null, string | null]>(
3101
3106
  `
3102
3107
  SELECT
3103
3108
  c.name,
@@ -3120,7 +3125,10 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
3120
3125
  SELECT 1 FROM content_vectors cv
3121
3126
  WHERE cv.mirror_hash = cc.mirror_hash
3122
3127
  AND cv.seq = cc.seq
3123
- AND (? IS NULL OR cv.model = ?)
3128
+ AND (? IS NULL OR (
3129
+ cv.model = ?
3130
+ AND cv.embed_fingerprint = ?
3131
+ ))
3124
3132
  AND cv.embedded_at >= cc.created_at
3125
3133
  )) as embedded_count
3126
3134
  FROM collections c
@@ -3128,7 +3136,7 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
3128
3136
  GROUP BY c.name, c.path
3129
3137
  `
3130
3138
  )
3131
- .all(embedModel, embedModel);
3139
+ .all(embedModel, embedModel, embedFingerprint);
3132
3140
 
3133
3141
  // Get totals
3134
3142
  const totalsRow = db
@@ -3152,7 +3160,10 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
3152
3160
  // Embedding backlog: chunks from active docs without vectors
3153
3161
  // Uses EXISTS to avoid duplicates when multiple docs share mirror_hash
3154
3162
  const backlogRow = db
3155
- .query<{ count: number }, [string | null, string | null]>(
3163
+ .query<
3164
+ { count: number },
3165
+ [string | null, string | null, string | null]
3166
+ >(
3156
3167
  `
3157
3168
  SELECT COUNT(*) as count FROM content_chunks c
3158
3169
  WHERE EXISTS (
@@ -3163,12 +3174,15 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
3163
3174
  SELECT 1 FROM content_vectors v
3164
3175
  WHERE v.mirror_hash = c.mirror_hash
3165
3176
  AND v.seq = c.seq
3166
- AND (? IS NULL OR v.model = ?)
3177
+ AND (? IS NULL OR (
3178
+ v.model = ?
3179
+ AND v.embed_fingerprint = ?
3180
+ ))
3167
3181
  AND v.embedded_at >= c.created_at
3168
3182
  )
3169
3183
  `
3170
3184
  )
3171
- .get(embedModel, embedModel);
3185
+ .get(embedModel, embedModel, embedFingerprint);
3172
3186
 
3173
3187
  // Recent errors (last 24h)
3174
3188
  const recentErrorsRow = db
@@ -984,6 +984,7 @@ export interface StorePort {
984
984
  */
985
985
  getStatus(options?: {
986
986
  embedModel?: string;
987
+ embedFingerprint?: string;
987
988
  }): Promise<StoreResult<IndexStatus>>;
988
989
 
989
990
  // ─────────────────────────────────────────────────────────────────────────
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Vector freshness helpers.
3
+ *
4
+ * @module src/store/vector/freshness
5
+ */
6
+
7
+ import type { Database } from "bun:sqlite";
8
+
9
+ import { getEmbeddingFingerprint } from "../../embed/fingerprint";
10
+
11
+ export function getStoredEmbeddingDimensions(
12
+ db: Database,
13
+ model: string
14
+ ): number | undefined {
15
+ const row = db
16
+ .prepare("SELECT embedding FROM content_vectors WHERE model = ? LIMIT 1")
17
+ .get(model) as { embedding: Uint8Array } | undefined;
18
+
19
+ if (!row?.embedding) {
20
+ return undefined;
21
+ }
22
+
23
+ return row.embedding.byteLength / Float32Array.BYTES_PER_ELEMENT;
24
+ }
25
+
26
+ export function getStoredEmbeddingFingerprint(
27
+ db: Database,
28
+ modelUri: string
29
+ ): string {
30
+ return getEmbeddingFingerprint({
31
+ modelUri,
32
+ dimensions: getStoredEmbeddingDimensions(db, modelUri),
33
+ });
34
+ }
@@ -116,8 +116,10 @@ export async function createVectorIndexPort(
116
116
 
117
117
  // Prepared statements for content_vectors table
118
118
  const upsertVectorStmt = db.prepare(`
119
- INSERT OR REPLACE INTO content_vectors (mirror_hash, seq, model, embedding, embedded_at)
120
- VALUES (?, ?, ?, ?, datetime('now'))
119
+ INSERT OR REPLACE INTO content_vectors (
120
+ mirror_hash, seq, model, embed_fingerprint, embedding, embedded_at
121
+ )
122
+ VALUES (?, ?, ?, ?, ?, datetime('now'))
121
123
  `);
122
124
 
123
125
  const deleteVectorStmt = db.prepare(`
@@ -172,6 +174,7 @@ export async function createVectorIndexPort(
172
174
  row.mirrorHash,
173
175
  row.seq,
174
176
  row.model,
177
+ row.embedFingerprint,
175
178
  encodeEmbedding(row.embedding)
176
179
  );
177
180
  }
@@ -65,6 +65,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
65
65
 
66
66
  countBacklog(
67
67
  model: string,
68
+ embedFingerprint: string,
68
69
  options?: { collection?: string }
69
70
  ): Promise<StoreResult<number>> {
70
71
  try {
@@ -80,10 +81,13 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
80
81
  WHERE v.mirror_hash = c.mirror_hash
81
82
  AND v.seq = c.seq
82
83
  AND v.model = ?
84
+ AND v.embed_fingerprint = ?
83
85
  AND v.embedded_at >= c.created_at
84
86
  )
85
87
  `;
86
- const result = db.prepare(sql).get(...activeDoc.params, model) as {
88
+ const result = db
89
+ .prepare(sql)
90
+ .get(...activeDoc.params, model, embedFingerprint) as {
87
91
  count: number;
88
92
  };
89
93
  return Promise.resolve(ok(result.count));
@@ -99,6 +103,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
99
103
 
100
104
  getBacklog(
101
105
  model: string,
106
+ embedFingerprint: string,
102
107
  options?: {
103
108
  limit?: number;
104
109
  after?: { mirrorHash: string; seq: number };
@@ -123,6 +128,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
123
128
  WHERE v.mirror_hash = c.mirror_hash
124
129
  AND v.seq = c.seq
125
130
  AND v.model = ?
131
+ AND v.embed_fingerprint = ?
126
132
  ) THEN 'new'
127
133
  ELSE 'changed'
128
134
  END as reason
@@ -133,6 +139,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
133
139
  WHERE v.mirror_hash = c.mirror_hash
134
140
  AND v.seq = c.seq
135
141
  AND v.model = ?
142
+ AND v.embed_fingerprint = ?
136
143
  AND v.embedded_at >= c.created_at
137
144
  )
138
145
  AND (c.mirror_hash > ? OR (c.mirror_hash = ? AND c.seq > ?))
@@ -148,6 +155,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
148
155
  WHERE v.mirror_hash = c.mirror_hash
149
156
  AND v.seq = c.seq
150
157
  AND v.model = ?
158
+ AND v.embed_fingerprint = ?
151
159
  ) THEN 'new'
152
160
  ELSE 'changed'
153
161
  END as reason
@@ -158,6 +166,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
158
166
  WHERE v.mirror_hash = c.mirror_hash
159
167
  AND v.seq = c.seq
160
168
  AND v.model = ?
169
+ AND v.embed_fingerprint = ?
161
170
  AND v.embedded_at >= c.created_at
162
171
  )
163
172
  ORDER BY c.mirror_hash, c.seq
@@ -167,14 +176,23 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
167
176
  const params = after
168
177
  ? [
169
178
  model,
179
+ embedFingerprint,
170
180
  ...activeDoc.params,
171
181
  model,
182
+ embedFingerprint,
172
183
  after.mirrorHash,
173
184
  after.mirrorHash,
174
185
  after.seq,
175
186
  limit,
176
187
  ]
177
- : [model, ...activeDoc.params, model, limit];
188
+ : [
189
+ model,
190
+ embedFingerprint,
191
+ ...activeDoc.params,
192
+ model,
193
+ embedFingerprint,
194
+ limit,
195
+ ];
178
196
 
179
197
  const results = db.prepare(sql).all(...params) as BacklogItem[];
180
198
  return Promise.resolve(ok(results));