@gmickel/gno 1.5.2 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +5 -2
- package/src/cli/commands/doctor.ts +179 -1
- package/src/cli/commands/embed.ts +217 -242
- package/src/embed/backlog.ts +92 -45
- package/src/embed/fingerprint.ts +37 -0
- package/src/embed/retry.ts +137 -0
- package/src/llm/nodeLlamaCpp/embedding.ts +81 -19
- package/src/sdk/embed.ts +134 -59
- package/src/store/migrations/008-vector-fingerprints.ts +25 -0
- package/src/store/migrations/index.ts +2 -1
- package/src/store/sqlite/adapter.ts +20 -6
- package/src/store/types.ts +1 -0
- package/src/store/vector/freshness.ts +34 -0
- package/src/store/vector/sqlite-vec.ts +5 -2
- package/src/store/vector/stats.ts +20 -2
- package/src/store/vector/types.ts +3 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding freshness fingerprint.
|
|
3
|
+
*
|
|
4
|
+
* @module src/embed/fingerprint
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { getEmbeddingCompatibilityProfile } from "../llm/embedding-compatibility";
|
|
8
|
+
|
|
9
|
+
export const EMBEDDING_CONTEXTUAL_FORMAT_VERSION = "contextual-embedding-v1";
|
|
10
|
+
export const EMBEDDING_CHUNKING_STRATEGY_VERSION = "markdown-char-semantic-v1";
|
|
11
|
+
|
|
12
|
+
export interface EmbeddingFingerprintInput {
|
|
13
|
+
modelUri: string;
|
|
14
|
+
dimensions?: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function getEmbeddingFingerprint(
|
|
18
|
+
input: EmbeddingFingerprintInput
|
|
19
|
+
): string {
|
|
20
|
+
const profile = getEmbeddingCompatibilityProfile(input.modelUri);
|
|
21
|
+
const payload = {
|
|
22
|
+
chunking: EMBEDDING_CHUNKING_STRATEGY_VERSION,
|
|
23
|
+
contextualFormatting: EMBEDDING_CONTEXTUAL_FORMAT_VERSION,
|
|
24
|
+
dimensions: input.dimensions ?? null,
|
|
25
|
+
modelUri: input.modelUri,
|
|
26
|
+
profile: {
|
|
27
|
+
batchEmbeddingTrusted: profile.batchEmbeddingTrusted,
|
|
28
|
+
documentFormat: profile.documentFormat,
|
|
29
|
+
id: profile.id,
|
|
30
|
+
queryFormat: profile.queryFormat,
|
|
31
|
+
},
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
return new Bun.CryptoHasher("sha256")
|
|
35
|
+
.update(JSON.stringify(payload))
|
|
36
|
+
.digest("hex");
|
|
37
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import type { EmbeddingPort } from "../llm/types";
|
|
2
|
+
import type { BacklogItem, VectorIndexPort, VectorRow } from "../store/vector";
|
|
3
|
+
|
|
4
|
+
import { formatDocForEmbedding } from "../pipeline/contextual";
|
|
5
|
+
import { embedTextsWithRecovery } from "./batch";
|
|
6
|
+
|
|
7
|
+
export const MAX_EMBED_CHUNK_ATTEMPTS = 2;
|
|
8
|
+
export const MAX_EMBED_FAILURE_SAMPLES = 5;
|
|
9
|
+
|
|
10
|
+
export interface EmbedStoreBatchResult {
|
|
11
|
+
embedded: number;
|
|
12
|
+
errors: number;
|
|
13
|
+
retryItems: BacklogItem[];
|
|
14
|
+
errorSamples: string[];
|
|
15
|
+
suggestion?: string;
|
|
16
|
+
batchFailed: boolean;
|
|
17
|
+
batchError?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function chunkRetryKey(item: Pick<BacklogItem, "mirrorHash" | "seq">) {
|
|
21
|
+
return `${item.mirrorHash}\0${item.seq}`;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function addUniqueSamples(target: string[], samples: string[]): void {
|
|
25
|
+
for (const sample of samples) {
|
|
26
|
+
if (target.length >= MAX_EMBED_FAILURE_SAMPLES) {
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
if (!target.includes(sample)) {
|
|
30
|
+
target.push(sample);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function formatLlmFailure(
|
|
36
|
+
error: { message: string; cause?: unknown } | undefined
|
|
37
|
+
): string {
|
|
38
|
+
if (!error) {
|
|
39
|
+
return "Unknown embedding failure";
|
|
40
|
+
}
|
|
41
|
+
const cause =
|
|
42
|
+
error.cause &&
|
|
43
|
+
typeof error.cause === "object" &&
|
|
44
|
+
"message" in error.cause &&
|
|
45
|
+
typeof error.cause.message === "string"
|
|
46
|
+
? error.cause.message
|
|
47
|
+
: typeof error.cause === "string"
|
|
48
|
+
? error.cause
|
|
49
|
+
: "";
|
|
50
|
+
return cause && cause !== error.message
|
|
51
|
+
? `${error.message} - ${cause}`
|
|
52
|
+
: error.message;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export async function embedAndStoreBatch(params: {
|
|
56
|
+
embedPort: EmbeddingPort;
|
|
57
|
+
vectorIndex: VectorIndexPort;
|
|
58
|
+
items: BacklogItem[];
|
|
59
|
+
modelUri: string;
|
|
60
|
+
embedFingerprint: string;
|
|
61
|
+
}): Promise<EmbedStoreBatchResult> {
|
|
62
|
+
const { embedPort, vectorIndex, items, modelUri, embedFingerprint } = params;
|
|
63
|
+
const embedResult = await embedTextsWithRecovery(
|
|
64
|
+
embedPort,
|
|
65
|
+
items.map((item) =>
|
|
66
|
+
formatDocForEmbedding(item.text, item.title ?? undefined, modelUri)
|
|
67
|
+
)
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
if (!embedResult.ok) {
|
|
71
|
+
const formattedError = formatLlmFailure(embedResult.error);
|
|
72
|
+
return {
|
|
73
|
+
embedded: 0,
|
|
74
|
+
errors: embedResult.error.retryable ? 0 : items.length,
|
|
75
|
+
retryItems: embedResult.error.retryable ? items : [],
|
|
76
|
+
errorSamples: [formattedError],
|
|
77
|
+
suggestion: embedResult.error.retryable
|
|
78
|
+
? "Try rerunning the same command. If failures persist, rerun with `gno --verbose embed --batch-size 1` to isolate failing chunks."
|
|
79
|
+
: embedResult.error.suggestion,
|
|
80
|
+
batchFailed: true,
|
|
81
|
+
batchError: formattedError,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const vectors: VectorRow[] = [];
|
|
86
|
+
const retryItems: BacklogItem[] = [];
|
|
87
|
+
for (const [idx, item] of items.entries()) {
|
|
88
|
+
const embedding = embedResult.value.vectors[idx];
|
|
89
|
+
if (!embedding) {
|
|
90
|
+
retryItems.push(item);
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
vectors.push({
|
|
94
|
+
mirrorHash: item.mirrorHash,
|
|
95
|
+
seq: item.seq,
|
|
96
|
+
model: modelUri,
|
|
97
|
+
embedFingerprint,
|
|
98
|
+
embedding: new Float32Array(embedding),
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (vectors.length === 0) {
|
|
103
|
+
return {
|
|
104
|
+
embedded: 0,
|
|
105
|
+
errors: 0,
|
|
106
|
+
retryItems,
|
|
107
|
+
errorSamples: embedResult.value.failureSamples,
|
|
108
|
+
suggestion: embedResult.value.retrySuggestion,
|
|
109
|
+
batchFailed: embedResult.value.batchFailed,
|
|
110
|
+
batchError: embedResult.value.batchError,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const storeResult = await vectorIndex.upsertVectors(vectors);
|
|
115
|
+
if (!storeResult.ok) {
|
|
116
|
+
return {
|
|
117
|
+
embedded: 0,
|
|
118
|
+
errors: vectors.length,
|
|
119
|
+
retryItems,
|
|
120
|
+
errorSamples: [storeResult.error.message],
|
|
121
|
+
suggestion:
|
|
122
|
+
"Store write failed. Rerun `gno embed` once more; if it repeats, run `gno doctor` and `gno vec sync`.",
|
|
123
|
+
batchFailed: embedResult.value.batchFailed,
|
|
124
|
+
batchError: embedResult.value.batchError,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
embedded: vectors.length,
|
|
130
|
+
errors: 0,
|
|
131
|
+
retryItems,
|
|
132
|
+
errorSamples: embedResult.value.failureSamples,
|
|
133
|
+
suggestion: embedResult.value.retrySuggestion,
|
|
134
|
+
batchFailed: embedResult.value.batchFailed,
|
|
135
|
+
batchError: embedResult.value.batchError,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
@@ -39,6 +39,8 @@ interface TokenizingModel {
|
|
|
39
39
|
detokenize(tokens: readonly number[]): string;
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
+
type EmbeddingInput = Parameters<LlamaEmbeddingContext["getEmbeddingFor"]>[0];
|
|
43
|
+
|
|
42
44
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
43
45
|
// Constants
|
|
44
46
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -46,12 +48,19 @@ interface TokenizingModel {
|
|
|
46
48
|
// Aim for a small pool so CPU-only runs can exploit parallel contexts without
|
|
47
49
|
// multiplying RAM usage too aggressively. Additional contexts fall back
|
|
48
50
|
// gracefully if memory is tight.
|
|
49
|
-
const
|
|
51
|
+
const MAX_DEFAULT_EMBEDDING_CONTEXTS = 2;
|
|
52
|
+
const MAX_EMBEDDING_CONTEXTS_OVERRIDE = 4;
|
|
50
53
|
const TARGET_CORES_PER_EMBEDDING_CONTEXT = 4;
|
|
51
|
-
const
|
|
54
|
+
const CONSTRAINED_WINDOWS_THRESHOLD_BYTES = 16 * 1024 * 1024 * 1024;
|
|
55
|
+
const MID_MEMORY_WINDOWS_THRESHOLD_BYTES = 24 * 1024 * 1024 * 1024;
|
|
52
56
|
const LOW_MEMORY_WINDOWS_CONTEXTS = 1;
|
|
57
|
+
const MID_MEMORY_WINDOWS_CONTEXTS = 2;
|
|
53
58
|
const DEFAULT_EMBEDDING_CONTEXT_SIZE = 2_048;
|
|
54
59
|
|
|
60
|
+
function embeddingVectorToArray(vector: readonly number[]): number[] {
|
|
61
|
+
return Array.isArray(vector) ? (vector as number[]) : Array.from(vector);
|
|
62
|
+
}
|
|
63
|
+
|
|
55
64
|
function resolveEmbeddingContextPoolOverride(
|
|
56
65
|
env: NodeJS.ProcessEnv = process.env
|
|
57
66
|
): number | undefined {
|
|
@@ -63,7 +72,35 @@ function resolveEmbeddingContextPoolOverride(
|
|
|
63
72
|
if (!(Number.isFinite(parsed) && parsed > 0)) {
|
|
64
73
|
return undefined;
|
|
65
74
|
}
|
|
66
|
-
return Math.max(1, Math.min(
|
|
75
|
+
return Math.max(1, Math.min(MAX_EMBEDDING_CONTEXTS_OVERRIDE, parsed));
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function resolveThreadsPerContextOverride(
|
|
79
|
+
env: NodeJS.ProcessEnv = process.env
|
|
80
|
+
): number | undefined {
|
|
81
|
+
const raw = env.GNO_EMBED_THREADS;
|
|
82
|
+
if (!raw) {
|
|
83
|
+
return undefined;
|
|
84
|
+
}
|
|
85
|
+
const parsed = Number.parseInt(raw, 10);
|
|
86
|
+
if (!(Number.isFinite(parsed) && parsed > 0)) {
|
|
87
|
+
return undefined;
|
|
88
|
+
}
|
|
89
|
+
return Math.max(1, parsed);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function resolveEmbeddingContextSizeOverride(
|
|
93
|
+
env: NodeJS.ProcessEnv = process.env
|
|
94
|
+
): number | undefined {
|
|
95
|
+
const raw = env.GNO_EMBED_CONTEXT_SIZE;
|
|
96
|
+
if (!raw) {
|
|
97
|
+
return undefined;
|
|
98
|
+
}
|
|
99
|
+
const parsed = Number.parseInt(raw, 10);
|
|
100
|
+
if (!(Number.isFinite(parsed) && parsed > 0)) {
|
|
101
|
+
return undefined;
|
|
102
|
+
}
|
|
103
|
+
return Math.max(128, parsed);
|
|
67
104
|
}
|
|
68
105
|
|
|
69
106
|
export function resolveEmbeddingContextPoolSize(options: {
|
|
@@ -86,19 +123,28 @@ export function resolveEmbeddingContextPoolSize(options: {
|
|
|
86
123
|
const totalMemoryBytes = options.totalMemoryBytes ?? totalmem();
|
|
87
124
|
if (
|
|
88
125
|
platformName === "win32" &&
|
|
89
|
-
totalMemoryBytes
|
|
126
|
+
totalMemoryBytes < CONSTRAINED_WINDOWS_THRESHOLD_BYTES
|
|
90
127
|
) {
|
|
91
128
|
return LOW_MEMORY_WINDOWS_CONTEXTS;
|
|
92
129
|
}
|
|
93
130
|
|
|
94
131
|
const cpuMathCores = Math.max(1, options.cpuMathCores);
|
|
95
|
-
|
|
132
|
+
const adaptivePoolSize = Math.max(
|
|
96
133
|
1,
|
|
97
134
|
Math.min(
|
|
98
|
-
|
|
135
|
+
MAX_DEFAULT_EMBEDDING_CONTEXTS,
|
|
99
136
|
Math.ceil(cpuMathCores / TARGET_CORES_PER_EMBEDDING_CONTEXT)
|
|
100
137
|
)
|
|
101
138
|
);
|
|
139
|
+
|
|
140
|
+
if (
|
|
141
|
+
platformName === "win32" &&
|
|
142
|
+
totalMemoryBytes < MID_MEMORY_WINDOWS_THRESHOLD_BYTES
|
|
143
|
+
) {
|
|
144
|
+
return Math.min(MID_MEMORY_WINDOWS_CONTEXTS, adaptivePoolSize);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return adaptivePoolSize;
|
|
102
148
|
}
|
|
103
149
|
|
|
104
150
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -145,9 +191,9 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
145
191
|
return { ok: false, error: prepared.error };
|
|
146
192
|
}
|
|
147
193
|
const embedding = await this.runOnWorker((worker) =>
|
|
148
|
-
worker.context.getEmbeddingFor(prepared.value.
|
|
194
|
+
worker.context.getEmbeddingFor(prepared.value.input)
|
|
149
195
|
);
|
|
150
|
-
const vector =
|
|
196
|
+
const vector = embeddingVectorToArray(embedding.vector);
|
|
151
197
|
|
|
152
198
|
// Cache dimensions on first call
|
|
153
199
|
if (this.dims === null) {
|
|
@@ -171,13 +217,13 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
171
217
|
}
|
|
172
218
|
|
|
173
219
|
try {
|
|
174
|
-
const
|
|
220
|
+
const preparedInputs: EmbeddingInput[] = [];
|
|
175
221
|
for (const text of texts) {
|
|
176
222
|
const prepared = this.truncateForEmbedding(text, "batch");
|
|
177
223
|
if (!prepared.ok) {
|
|
178
224
|
return { ok: false, error: prepared.error };
|
|
179
225
|
}
|
|
180
|
-
|
|
226
|
+
preparedInputs.push(prepared.value.input);
|
|
181
227
|
}
|
|
182
228
|
|
|
183
229
|
const allResults = Array.from(
|
|
@@ -191,16 +237,19 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
191
237
|
while (true) {
|
|
192
238
|
const index = nextIndex;
|
|
193
239
|
nextIndex += 1;
|
|
194
|
-
if (index >=
|
|
240
|
+
if (index >= preparedInputs.length) {
|
|
195
241
|
return;
|
|
196
242
|
}
|
|
197
243
|
|
|
244
|
+
const input = preparedInputs[index];
|
|
245
|
+
if (input === undefined) {
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
198
248
|
const embedding = await this.runOnSpecificWorker(
|
|
199
249
|
worker,
|
|
200
|
-
(current) =>
|
|
201
|
-
current.context.getEmbeddingFor(preparedTexts[index] as string)
|
|
250
|
+
(current) => current.context.getEmbeddingFor(input)
|
|
202
251
|
);
|
|
203
|
-
allResults[index] =
|
|
252
|
+
allResults[index] = embeddingVectorToArray(embedding.vector);
|
|
204
253
|
}
|
|
205
254
|
})
|
|
206
255
|
);
|
|
@@ -316,6 +365,11 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
316
365
|
return 0;
|
|
317
366
|
}
|
|
318
367
|
|
|
368
|
+
const override = resolveThreadsPerContextOverride();
|
|
369
|
+
if (override !== undefined) {
|
|
370
|
+
return override;
|
|
371
|
+
}
|
|
372
|
+
|
|
319
373
|
return Math.max(1, Math.floor(Math.max(1, llama.cpuMathCores) / poolSize));
|
|
320
374
|
}
|
|
321
375
|
|
|
@@ -335,6 +389,8 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
335
389
|
this.llamaModel = llamaModel as TokenizingModel;
|
|
336
390
|
const llama = await this.manager.getLlama();
|
|
337
391
|
const lifecycleVersion = this.lifecycleVersion;
|
|
392
|
+
this.embeddingContextSize =
|
|
393
|
+
resolveEmbeddingContextSizeOverride() ?? DEFAULT_EMBEDDING_CONTEXT_SIZE;
|
|
338
394
|
const targetPoolSize = this.resolveTargetPoolSize(llama);
|
|
339
395
|
const threadsPerContext = this.resolveThreadsPerContext(
|
|
340
396
|
llama,
|
|
@@ -400,7 +456,7 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
400
456
|
private truncateForEmbedding(
|
|
401
457
|
text: string,
|
|
402
458
|
mode: "single" | "batch"
|
|
403
|
-
): LlmResult<{
|
|
459
|
+
): LlmResult<{ input: EmbeddingInput }> {
|
|
404
460
|
const model = this.llamaModel;
|
|
405
461
|
const modelLimit =
|
|
406
462
|
typeof model?.trainContextSize === "number" &&
|
|
@@ -409,7 +465,7 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
409
465
|
? Math.floor(model.trainContextSize)
|
|
410
466
|
: undefined;
|
|
411
467
|
if (!model) {
|
|
412
|
-
return { ok: true, value: { text } };
|
|
468
|
+
return { ok: true, value: { input: text } };
|
|
413
469
|
}
|
|
414
470
|
|
|
415
471
|
const rawLimit =
|
|
@@ -420,10 +476,13 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
420
476
|
try {
|
|
421
477
|
const tokens = model.tokenize(text);
|
|
422
478
|
if (tokens.length <= limit) {
|
|
423
|
-
return {
|
|
479
|
+
return {
|
|
480
|
+
ok: true,
|
|
481
|
+
value: { input: tokens as EmbeddingInput },
|
|
482
|
+
};
|
|
424
483
|
}
|
|
425
484
|
|
|
426
|
-
const
|
|
485
|
+
const truncatedTokens = tokens.slice(0, limit);
|
|
427
486
|
const shouldWarn =
|
|
428
487
|
mode === "single"
|
|
429
488
|
? !this.warnedSingleTruncation
|
|
@@ -438,7 +497,10 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
438
497
|
`[llama] Truncated embedding input from ${tokens.length} to ${limit} tokens`
|
|
439
498
|
);
|
|
440
499
|
}
|
|
441
|
-
return {
|
|
500
|
+
return {
|
|
501
|
+
ok: true,
|
|
502
|
+
value: { input: truncatedTokens as EmbeddingInput },
|
|
503
|
+
};
|
|
442
504
|
} catch (error) {
|
|
443
505
|
return { ok: false, error: inferenceFailedError(this.modelUri, error) };
|
|
444
506
|
}
|
package/src/sdk/embed.ts
CHANGED
|
@@ -19,15 +19,15 @@ import type {
|
|
|
19
19
|
import type { GnoEmbedOptions, GnoEmbedResult } from "./types";
|
|
20
20
|
|
|
21
21
|
import { embedBacklog } from "../embed";
|
|
22
|
-
import {
|
|
22
|
+
import { getEmbeddingFingerprint } from "../embed/fingerprint";
|
|
23
|
+
import {
|
|
24
|
+
chunkRetryKey,
|
|
25
|
+
embedAndStoreBatch,
|
|
26
|
+
MAX_EMBED_CHUNK_ATTEMPTS,
|
|
27
|
+
} from "../embed/retry";
|
|
23
28
|
import { resolveModelUri } from "../llm/registry";
|
|
24
|
-
import { formatDocForEmbedding } from "../pipeline/contextual";
|
|
25
29
|
import { err, ok } from "../store/types";
|
|
26
|
-
import {
|
|
27
|
-
createVectorIndexPort,
|
|
28
|
-
createVectorStatsPort,
|
|
29
|
-
type VectorRow,
|
|
30
|
-
} from "../store/vector";
|
|
30
|
+
import { createVectorIndexPort, createVectorStatsPort } from "../store/vector";
|
|
31
31
|
import { sdkError } from "./errors";
|
|
32
32
|
|
|
33
33
|
interface EmbedRuntimeOptions {
|
|
@@ -121,6 +121,68 @@ async function forceEmbedAll(
|
|
|
121
121
|
let embedded = 0;
|
|
122
122
|
let errors = 0;
|
|
123
123
|
let cursor: { mirrorHash: string; seq: number } | undefined;
|
|
124
|
+
const retryQueue = new Map<string, { item: BacklogItem; attempts: number }>();
|
|
125
|
+
const embedFingerprint = getEmbeddingFingerprint({
|
|
126
|
+
modelUri,
|
|
127
|
+
dimensions: vectorIndex.dimensions,
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const enqueueRetryItems = (items: BacklogItem[], attempts: number): void => {
|
|
131
|
+
for (const item of items) {
|
|
132
|
+
const key = chunkRetryKey(item);
|
|
133
|
+
const existing = retryQueue.get(key);
|
|
134
|
+
retryQueue.set(key, {
|
|
135
|
+
item,
|
|
136
|
+
attempts: Math.max(existing?.attempts ?? 0, attempts),
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
const drainRetryQueue = async (): Promise<number> => {
|
|
142
|
+
if (retryQueue.size === 0) {
|
|
143
|
+
return 0;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
let retryEmbedded = 0;
|
|
147
|
+
const entries = [...retryQueue.values()].filter(
|
|
148
|
+
(entry) => entry.attempts < MAX_EMBED_CHUNK_ATTEMPTS
|
|
149
|
+
);
|
|
150
|
+
|
|
151
|
+
for (let idx = 0; idx < entries.length; idx += batchSize) {
|
|
152
|
+
const slice = entries.slice(idx, idx + batchSize);
|
|
153
|
+
for (const entry of slice) {
|
|
154
|
+
retryQueue.delete(chunkRetryKey(entry.item));
|
|
155
|
+
entry.attempts += 1;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const retryResult = await embedAndStoreBatch({
|
|
159
|
+
embedPort,
|
|
160
|
+
vectorIndex,
|
|
161
|
+
items: slice.map((entry) => entry.item),
|
|
162
|
+
modelUri,
|
|
163
|
+
embedFingerprint,
|
|
164
|
+
});
|
|
165
|
+
embedded += retryResult.embedded;
|
|
166
|
+
errors += retryResult.errors;
|
|
167
|
+
retryEmbedded += retryResult.embedded;
|
|
168
|
+
|
|
169
|
+
const retryByKey = new Set(
|
|
170
|
+
retryResult.retryItems.map((item) => chunkRetryKey(item))
|
|
171
|
+
);
|
|
172
|
+
for (const entry of slice) {
|
|
173
|
+
if (!retryByKey.has(chunkRetryKey(entry.item))) {
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
if (entry.attempts >= MAX_EMBED_CHUNK_ATTEMPTS) {
|
|
177
|
+
errors += 1;
|
|
178
|
+
} else {
|
|
179
|
+
retryQueue.set(chunkRetryKey(entry.item), entry);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return retryEmbedded;
|
|
185
|
+
};
|
|
124
186
|
|
|
125
187
|
while (true) {
|
|
126
188
|
const batchResult = await getActiveChunks(db, batchSize, cursor);
|
|
@@ -140,45 +202,27 @@ async function forceEmbedAll(
|
|
|
140
202
|
cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
|
|
141
203
|
}
|
|
142
204
|
|
|
143
|
-
const
|
|
205
|
+
const beforeEmbedded = embedded;
|
|
206
|
+
const embedResult = await embedAndStoreBatch({
|
|
144
207
|
embedPort,
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
);
|
|
153
|
-
|
|
154
|
-
if (!embedResult.ok) {
|
|
155
|
-
errors += batch.length;
|
|
156
|
-
continue;
|
|
157
|
-
}
|
|
208
|
+
vectorIndex,
|
|
209
|
+
items: batch,
|
|
210
|
+
modelUri,
|
|
211
|
+
embedFingerprint,
|
|
212
|
+
});
|
|
213
|
+
embedded += embedResult.embedded;
|
|
214
|
+
errors += embedResult.errors;
|
|
215
|
+
enqueueRetryItems(embedResult.retryItems, 1);
|
|
158
216
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
const embedding = embedResult.value.vectors[idx];
|
|
162
|
-
if (!embedding) {
|
|
163
|
-
errors += 1;
|
|
164
|
-
continue;
|
|
165
|
-
}
|
|
166
|
-
vectors.push({
|
|
167
|
-
mirrorHash: item.mirrorHash,
|
|
168
|
-
seq: item.seq,
|
|
169
|
-
model: modelUri,
|
|
170
|
-
embedding: new Float32Array(embedding),
|
|
171
|
-
});
|
|
217
|
+
if (embedded > beforeEmbedded) {
|
|
218
|
+
await drainRetryQueue();
|
|
172
219
|
}
|
|
220
|
+
}
|
|
173
221
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
continue;
|
|
179
|
-
}
|
|
180
|
-
embedded += vectors.length;
|
|
181
|
-
}
|
|
222
|
+
await drainRetryQueue();
|
|
223
|
+
if (retryQueue.size > 0) {
|
|
224
|
+
errors += retryQueue.size;
|
|
225
|
+
retryQueue.clear();
|
|
182
226
|
}
|
|
183
227
|
|
|
184
228
|
if (vectorIndex.vecDirty) {
|
|
@@ -217,24 +261,25 @@ export async function runEmbed(
|
|
|
217
261
|
const db = runtime.store.getRawDb();
|
|
218
262
|
const stats: VectorStatsPort = createVectorStatsPort(db);
|
|
219
263
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
264
|
+
let totalToEmbed = 0;
|
|
265
|
+
if (force) {
|
|
266
|
+
const forceCount = await getActiveChunkCount(db);
|
|
267
|
+
if (!forceCount.ok) {
|
|
268
|
+
throw sdkError("STORE", forceCount.error.message, {
|
|
269
|
+
cause: forceCount.error.cause,
|
|
270
|
+
});
|
|
271
|
+
}
|
|
228
272
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
273
|
+
totalToEmbed = forceCount.value;
|
|
274
|
+
if (totalToEmbed === 0 || dryRun) {
|
|
275
|
+
return {
|
|
276
|
+
embedded: totalToEmbed,
|
|
277
|
+
errors: 0,
|
|
278
|
+
duration: 0,
|
|
279
|
+
model: modelUri,
|
|
280
|
+
searchAvailable: await checkVecAvailable(db),
|
|
281
|
+
};
|
|
282
|
+
}
|
|
238
283
|
}
|
|
239
284
|
|
|
240
285
|
const embedResult = await runtime.llm.createEmbeddingPort(modelUri, {
|
|
@@ -266,6 +311,36 @@ export async function runEmbed(
|
|
|
266
311
|
}
|
|
267
312
|
|
|
268
313
|
const vectorIndex = vectorResult.value;
|
|
314
|
+
if (!force) {
|
|
315
|
+
const embedFingerprint = getEmbeddingFingerprint({
|
|
316
|
+
modelUri,
|
|
317
|
+
dimensions: vectorIndex.dimensions,
|
|
318
|
+
});
|
|
319
|
+
const backlogResult = await stats.countBacklog(
|
|
320
|
+
modelUri,
|
|
321
|
+
embedFingerprint,
|
|
322
|
+
{
|
|
323
|
+
collection: options.collection,
|
|
324
|
+
}
|
|
325
|
+
);
|
|
326
|
+
if (!backlogResult.ok) {
|
|
327
|
+
throw sdkError("STORE", backlogResult.error.message, {
|
|
328
|
+
cause: backlogResult.error.cause,
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
totalToEmbed = backlogResult.value;
|
|
333
|
+
if (totalToEmbed === 0 || dryRun) {
|
|
334
|
+
return {
|
|
335
|
+
embedded: totalToEmbed,
|
|
336
|
+
errors: 0,
|
|
337
|
+
duration: 0,
|
|
338
|
+
model: modelUri,
|
|
339
|
+
searchAvailable: vectorIndex.searchAvailable,
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
269
344
|
const startedAt = Date.now();
|
|
270
345
|
let result: { embedded: number; errors: number };
|
|
271
346
|
if (force) {
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Migration: vector embedding freshness fingerprints.
|
|
3
|
+
*
|
|
4
|
+
* @module src/store/migrations/008-vector-fingerprints
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Database } from "bun:sqlite";
|
|
8
|
+
|
|
9
|
+
import type { Migration } from "./runner";
|
|
10
|
+
|
|
11
|
+
export const migration: Migration = {
|
|
12
|
+
version: 8,
|
|
13
|
+
name: "vector_fingerprints",
|
|
14
|
+
|
|
15
|
+
up(db: Database): void {
|
|
16
|
+
db.exec(`
|
|
17
|
+
ALTER TABLE content_vectors ADD COLUMN embed_fingerprint TEXT NOT NULL DEFAULT ''
|
|
18
|
+
`);
|
|
19
|
+
|
|
20
|
+
db.exec(`
|
|
21
|
+
CREATE INDEX IF NOT EXISTS idx_vectors_freshness
|
|
22
|
+
ON content_vectors(model, embed_fingerprint, mirror_hash, seq, embedded_at)
|
|
23
|
+
`);
|
|
24
|
+
},
|
|
25
|
+
};
|
|
@@ -21,6 +21,7 @@ import { migration as m004 } from "./004-doc-links";
|
|
|
21
21
|
import { migration as m005 } from "./005-graph-indexes";
|
|
22
22
|
import { migration as m006 } from "./006-document-metadata";
|
|
23
23
|
import { migration as m007 } from "./007-document-date-fields";
|
|
24
|
+
import { migration as m008 } from "./008-vector-fingerprints";
|
|
24
25
|
|
|
25
26
|
/** All migrations in order */
|
|
26
|
-
export const migrations = [m001, m002, m003, m004, m005, m006, m007];
|
|
27
|
+
export const migrations = [m001, m002, m003, m004, m005, m006, m007, m008];
|