@gmickel/gno 0.40.2 → 0.41.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -32
- package/package.json +1 -1
- package/src/cli/commands/embed.ts +251 -27
- package/src/cli/commands/vsearch.ts +1 -1
- package/src/embed/backlog.ts +28 -21
- package/src/embed/batch.ts +277 -0
- package/src/llm/embedding-compatibility.ts +82 -0
- package/src/mcp/tools/vsearch.ts +1 -1
- package/src/pipeline/contextual.ts +19 -2
- package/src/pipeline/hybrid.ts +66 -24
- package/src/pipeline/vsearch.ts +3 -1
- package/src/sdk/client.ts +1 -1
- package/src/sdk/embed.ts +31 -14
- package/src/store/vector/sqlite-vec.ts +11 -6
package/README.md
CHANGED
|
@@ -87,7 +87,7 @@ gno daemon
|
|
|
87
87
|
|
|
88
88
|
## What's New
|
|
89
89
|
|
|
90
|
-
> Latest release: [v0.
|
|
90
|
+
> Latest release: [v0.40.2](./CHANGELOG.md#0402---2026-04-06)
|
|
91
91
|
> Full release history: [CHANGELOG.md](./CHANGELOG.md)
|
|
92
92
|
|
|
93
93
|
- **Retrieval Quality Upgrade**: stronger BM25 lexical handling, code-aware chunking, terminal result hyperlinks, and per-collection model overrides
|
|
@@ -108,6 +108,35 @@ gno embed
|
|
|
108
108
|
That regenerates embeddings for the new default model. Old vectors are kept
|
|
109
109
|
until you explicitly clear stale embeddings.
|
|
110
110
|
|
|
111
|
+
If the release also changes the embedding formatting/profile behavior for your
|
|
112
|
+
active model, prefer one of these stronger migration paths:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
gno embed --force
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
or per collection:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
gno collection clear-embeddings my-collection --all
|
|
122
|
+
gno embed my-collection
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
If a re-embed run still reports failures, rerun with:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
gno --verbose embed --force
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Recent releases now print sample embedding errors and a concrete retry hint when
|
|
132
|
+
batch recovery cannot fully recover on its own.
|
|
133
|
+
|
|
134
|
+
Model guides:
|
|
135
|
+
|
|
136
|
+
- [Code Embeddings](./docs/guides/code-embeddings.md)
|
|
137
|
+
- [Per-Collection Models](./docs/guides/per-collection-models.md)
|
|
138
|
+
- [Bring Your Own Models](./docs/guides/bring-your-own-models.md)
|
|
139
|
+
|
|
111
140
|
### Fine-Tuned Model Quick Use
|
|
112
141
|
|
|
113
142
|
```yaml
|
|
@@ -672,22 +701,23 @@ graph TD
|
|
|
672
701
|
|
|
673
702
|
Models auto-download on first use to `~/.cache/gno/models/`. For deterministic startup, set `GNO_NO_AUTO_DOWNLOAD=1` and use `gno models pull` explicitly. Alternatively, offload to a GPU server on your network using HTTP backends.
|
|
674
703
|
|
|
675
|
-
| Model
|
|
676
|
-
|
|
|
677
|
-
| Qwen3-Embedding-0.6B
|
|
678
|
-
| Qwen3-Reranker-0.6B
|
|
679
|
-
|
|
|
704
|
+
| Model | Purpose | Size |
|
|
705
|
+
| :--------------------- | :------------------------------------ | :----------- |
|
|
706
|
+
| Qwen3-Embedding-0.6B | Embeddings (multilingual) | ~640MB |
|
|
707
|
+
| Qwen3-Reranker-0.6B | Cross-encoder reranking (32K context) | ~700MB |
|
|
708
|
+
| Qwen3 / Qwen2.5 family | Query expansion + AI answers | ~600MB-2.5GB |
|
|
680
709
|
|
|
681
710
|
### Model Presets
|
|
682
711
|
|
|
683
|
-
| Preset
|
|
684
|
-
|
|
|
685
|
-
| `slim`
|
|
686
|
-
| `
|
|
687
|
-
| `
|
|
712
|
+
| Preset | Disk | Best For |
|
|
713
|
+
| :----------- | :----- | :------------------------------------------------------ |
|
|
714
|
+
| `slim-tuned` | ~1GB | Current default, tuned retrieval in a compact footprint |
|
|
715
|
+
| `slim` | ~1GB | Fast, good quality |
|
|
716
|
+
| `balanced` | ~2GB | Slightly larger model |
|
|
717
|
+
| `quality` | ~2.5GB | Best answers |
|
|
688
718
|
|
|
689
719
|
```bash
|
|
690
|
-
gno models use slim
|
|
720
|
+
gno models use slim-tuned
|
|
691
721
|
gno models pull --all # Optional: pre-download models (auto-downloads on first use)
|
|
692
722
|
```
|
|
693
723
|
|
|
@@ -720,7 +750,7 @@ models:
|
|
|
720
750
|
presets:
|
|
721
751
|
- id: remote-gpu
|
|
722
752
|
name: Remote GPU Server
|
|
723
|
-
embed: "http://192.168.1.100:8081/v1/embeddings#
|
|
753
|
+
embed: "http://192.168.1.100:8081/v1/embeddings#qwen3-embedding-0.6b"
|
|
724
754
|
rerank: "http://192.168.1.100:8082/v1/completions#reranker"
|
|
725
755
|
expand: "http://192.168.1.100:8083/v1/chat/completions#gno-expand"
|
|
726
756
|
gen: "http://192.168.1.100:8083/v1/chat/completions#qwen3-4b"
|
|
@@ -730,6 +760,11 @@ Works with llama-server, Ollama, LocalAI, vLLM, or any OpenAI-compatible server.
|
|
|
730
760
|
|
|
731
761
|
> **Configuration**: [Model Setup](https://gno.sh/docs/CONFIGURATION/)
|
|
732
762
|
|
|
763
|
+
Remote/BYOM guides:
|
|
764
|
+
|
|
765
|
+
- [Bring Your Own Models](./docs/guides/bring-your-own-models.md)
|
|
766
|
+
- [Per-Collection Models](./docs/guides/per-collection-models.md)
|
|
767
|
+
|
|
733
768
|
---
|
|
734
769
|
|
|
735
770
|
## Architecture
|
|
@@ -801,33 +836,29 @@ If a model turns out to be better specifically for code, the intended user story
|
|
|
801
836
|
|
|
802
837
|
That lets GNO stay sane by default while still giving power users a clean path to code-specialist retrieval.
|
|
803
838
|
|
|
804
|
-
|
|
839
|
+
More model docs:
|
|
805
840
|
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
path: /Users/you/work/gno/src
|
|
810
|
-
pattern: "**/*.{ts,tsx,js,jsx,go,rs,py,swift,c}"
|
|
811
|
-
models:
|
|
812
|
-
embed: "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf"
|
|
813
|
-
```
|
|
841
|
+
- [Code Embeddings](./docs/guides/code-embeddings.md)
|
|
842
|
+
- [Per-Collection Models](./docs/guides/per-collection-models.md)
|
|
843
|
+
- [Bring Your Own Models](./docs/guides/bring-your-own-models.md)
|
|
814
844
|
|
|
815
|
-
|
|
845
|
+
Current product stance:
|
|
816
846
|
|
|
817
|
-
-
|
|
818
|
-
-
|
|
819
|
-
-
|
|
847
|
+
- `Qwen3-Embedding-0.6B-GGUF` is already the global default embed model
|
|
848
|
+
- you do **not** need a collection override just to get Qwen on code collections
|
|
849
|
+
- use a collection override only when one collection should intentionally diverge from that default
|
|
820
850
|
|
|
821
|
-
Why
|
|
851
|
+
Why Qwen is the current default:
|
|
822
852
|
|
|
823
|
-
- matches `bge-m3` on the tiny canonical benchmark
|
|
853
|
+
- matches or exceeds `bge-m3` on the tiny canonical benchmark
|
|
824
854
|
- significantly beats `bge-m3` on the real GNO `src/serve` code slice
|
|
825
855
|
- also beats `bge-m3` on a pinned public-OSS code slice
|
|
856
|
+
- also beats `bge-m3` on the multilingual prose/docs benchmark lane
|
|
826
857
|
|
|
827
|
-
|
|
858
|
+
Current trade-off:
|
|
828
859
|
|
|
829
860
|
- Qwen is slower to embed than `bge-m3`
|
|
830
|
-
- existing users upgrading
|
|
861
|
+
- existing users upgrading or adopting a new embedding formatting profile may need to run `gno embed` again so stored vectors match the current formatter/runtime path
|
|
831
862
|
|
|
832
863
|
### General Multilingual Embedding Benchmark
|
|
833
864
|
|
|
@@ -841,8 +872,8 @@ bun run bench:general-embeddings --candidate qwen3-embedding-0.6b --write
|
|
|
841
872
|
|
|
842
873
|
Current signal on the public multilingual FastAPI-docs fixture:
|
|
843
874
|
|
|
844
|
-
- `bge-m3`: vector nDCG@10 `0.
|
|
845
|
-
- `Qwen3-Embedding-0.6B-GGUF`: vector nDCG@10 `0.
|
|
875
|
+
- `bge-m3`: vector nDCG@10 `0.3508`, hybrid nDCG@10 `0.6756`
|
|
876
|
+
- `Qwen3-Embedding-0.6B-GGUF`: vector nDCG@10 `0.9891`, hybrid nDCG@10 `0.9891`
|
|
846
877
|
|
|
847
878
|
Interpretation:
|
|
848
879
|
|
package/package.json
CHANGED
|
@@ -17,6 +17,7 @@ import {
|
|
|
17
17
|
isInitialized,
|
|
18
18
|
loadConfig,
|
|
19
19
|
} from "../../config";
|
|
20
|
+
import { embedTextsWithRecovery } from "../../embed/batch";
|
|
20
21
|
import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
|
|
21
22
|
import { resolveDownloadPolicy } from "../../llm/policy";
|
|
22
23
|
import { resolveModelUri } from "../../llm/registry";
|
|
@@ -70,6 +71,9 @@ export type EmbedResult =
|
|
|
70
71
|
duration: number;
|
|
71
72
|
model: string;
|
|
72
73
|
searchAvailable: boolean;
|
|
74
|
+
errorSamples?: string[];
|
|
75
|
+
suggestion?: string;
|
|
76
|
+
syncError?: string;
|
|
73
77
|
}
|
|
74
78
|
| { success: false; error: string };
|
|
75
79
|
|
|
@@ -86,6 +90,30 @@ function formatDuration(seconds: number): string {
|
|
|
86
90
|
return `${mins}m ${secs.toFixed(0)}s`;
|
|
87
91
|
}
|
|
88
92
|
|
|
93
|
+
function formatLlmFailure(
|
|
94
|
+
error: { message: string; cause?: unknown } | undefined
|
|
95
|
+
): string {
|
|
96
|
+
if (!error) {
|
|
97
|
+
return "Unknown embedding failure";
|
|
98
|
+
}
|
|
99
|
+
const cause =
|
|
100
|
+
error.cause &&
|
|
101
|
+
typeof error.cause === "object" &&
|
|
102
|
+
"message" in error.cause &&
|
|
103
|
+
typeof error.cause.message === "string"
|
|
104
|
+
? error.cause.message
|
|
105
|
+
: typeof error.cause === "string"
|
|
106
|
+
? error.cause
|
|
107
|
+
: "";
|
|
108
|
+
return cause && cause !== error.message
|
|
109
|
+
? `${error.message} - ${cause}`
|
|
110
|
+
: error.message;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function isDisposedBatchError(message: string): boolean {
|
|
114
|
+
return message.toLowerCase().includes("object is disposed");
|
|
115
|
+
}
|
|
116
|
+
|
|
89
117
|
async function checkVecAvailable(
|
|
90
118
|
db: import("bun:sqlite").Database
|
|
91
119
|
): Promise<boolean> {
|
|
@@ -110,10 +138,20 @@ interface BatchContext {
|
|
|
110
138
|
showProgress: boolean;
|
|
111
139
|
totalToEmbed: number;
|
|
112
140
|
verbose: boolean;
|
|
141
|
+
recreateEmbedPort?: () => Promise<
|
|
142
|
+
{ ok: true; value: EmbeddingPort } | { ok: false; error: string }
|
|
143
|
+
>;
|
|
113
144
|
}
|
|
114
145
|
|
|
115
146
|
type BatchResult =
|
|
116
|
-
| {
|
|
147
|
+
| {
|
|
148
|
+
ok: true;
|
|
149
|
+
embedded: number;
|
|
150
|
+
errors: number;
|
|
151
|
+
duration: number;
|
|
152
|
+
errorSamples: string[];
|
|
153
|
+
suggestion?: string;
|
|
154
|
+
}
|
|
117
155
|
| { ok: false; error: string };
|
|
118
156
|
|
|
119
157
|
interface Cursor {
|
|
@@ -125,8 +163,21 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
|
|
|
125
163
|
const startTime = Date.now();
|
|
126
164
|
let embedded = 0;
|
|
127
165
|
let errors = 0;
|
|
166
|
+
const errorSamples: string[] = [];
|
|
167
|
+
let suggestion: string | undefined;
|
|
128
168
|
let cursor: Cursor | undefined;
|
|
129
169
|
|
|
170
|
+
const pushErrorSamples = (samples: string[]): void => {
|
|
171
|
+
for (const sample of samples) {
|
|
172
|
+
if (errorSamples.length >= 5) {
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
175
|
+
if (!errorSamples.includes(sample)) {
|
|
176
|
+
errorSamples.push(sample);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
};
|
|
180
|
+
|
|
130
181
|
while (embedded + errors < ctx.totalToEmbed) {
|
|
131
182
|
// Get next batch using seek pagination (cursor-based)
|
|
132
183
|
const batchResult = ctx.force
|
|
@@ -153,10 +204,96 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
|
|
|
153
204
|
}
|
|
154
205
|
|
|
155
206
|
// Embed batch with contextual formatting (title prefix)
|
|
156
|
-
const batchEmbedResult = await
|
|
157
|
-
|
|
207
|
+
const batchEmbedResult = await embedTextsWithRecovery(
|
|
208
|
+
ctx.embedPort,
|
|
209
|
+
batch.map((b) =>
|
|
210
|
+
formatDocForEmbedding(b.text, b.title ?? undefined, ctx.modelUri)
|
|
211
|
+
)
|
|
158
212
|
);
|
|
159
213
|
if (!batchEmbedResult.ok) {
|
|
214
|
+
const formattedError = formatLlmFailure(batchEmbedResult.error);
|
|
215
|
+
if (ctx.recreateEmbedPort && isDisposedBatchError(formattedError)) {
|
|
216
|
+
if (ctx.verbose) {
|
|
217
|
+
process.stderr.write(
|
|
218
|
+
"\n[embed] Embedding port disposed; recreating model/contexts and retrying batch once\n"
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
const recreated = await ctx.recreateEmbedPort();
|
|
222
|
+
if (recreated.ok) {
|
|
223
|
+
ctx.embedPort = recreated.value;
|
|
224
|
+
const retryResult = await embedTextsWithRecovery(
|
|
225
|
+
ctx.embedPort,
|
|
226
|
+
batch.map((b) =>
|
|
227
|
+
formatDocForEmbedding(b.text, b.title ?? undefined, ctx.modelUri)
|
|
228
|
+
)
|
|
229
|
+
);
|
|
230
|
+
if (retryResult.ok) {
|
|
231
|
+
if (ctx.verbose) {
|
|
232
|
+
process.stderr.write(
|
|
233
|
+
"\n[embed] Retry after port reset succeeded\n"
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
pushErrorSamples(retryResult.value.failureSamples);
|
|
237
|
+
suggestion ||= retryResult.value.retrySuggestion;
|
|
238
|
+
|
|
239
|
+
const retryVectors: VectorRow[] = [];
|
|
240
|
+
for (const [idx, item] of batch.entries()) {
|
|
241
|
+
const embedding = retryResult.value.vectors[idx];
|
|
242
|
+
if (!embedding) {
|
|
243
|
+
errors += 1;
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
retryVectors.push({
|
|
247
|
+
mirrorHash: item.mirrorHash,
|
|
248
|
+
seq: item.seq,
|
|
249
|
+
model: ctx.modelUri,
|
|
250
|
+
embedding: new Float32Array(embedding),
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if (retryVectors.length === 0) {
|
|
255
|
+
if (ctx.verbose) {
|
|
256
|
+
process.stderr.write(
|
|
257
|
+
"\n[embed] No recoverable embeddings in retry batch\n"
|
|
258
|
+
);
|
|
259
|
+
}
|
|
260
|
+
continue;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const retryStoreResult =
|
|
264
|
+
await ctx.vectorIndex.upsertVectors(retryVectors);
|
|
265
|
+
if (!retryStoreResult.ok) {
|
|
266
|
+
if (ctx.verbose) {
|
|
267
|
+
process.stderr.write(
|
|
268
|
+
`\n[embed] Store failed: ${retryStoreResult.error.message}\n`
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
pushErrorSamples([retryStoreResult.error.message]);
|
|
272
|
+
suggestion ??=
|
|
273
|
+
"Store write failed. Rerun `gno embed` once more; if it repeats, run `gno doctor` and `gno vec sync`.";
|
|
274
|
+
errors += retryVectors.length;
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
embedded += retryVectors.length;
|
|
279
|
+
if (ctx.showProgress) {
|
|
280
|
+
const embeddedDisplay = Math.min(embedded, ctx.totalToEmbed);
|
|
281
|
+
const completed = Math.min(embedded + errors, ctx.totalToEmbed);
|
|
282
|
+
const pct = (completed / ctx.totalToEmbed) * 100;
|
|
283
|
+
const elapsed = (Date.now() - startTime) / 1000;
|
|
284
|
+
const rate = embedded / Math.max(elapsed, 0.001);
|
|
285
|
+
const eta =
|
|
286
|
+
Math.max(0, ctx.totalToEmbed - completed) /
|
|
287
|
+
Math.max(rate, 0.001);
|
|
288
|
+
process.stdout.write(
|
|
289
|
+
`\rEmbedding: ${embeddedDisplay.toLocaleString()}/${ctx.totalToEmbed.toLocaleString()} (${pct.toFixed(1)}%) | ${rate.toFixed(1)} chunks/s | ETA ${formatDuration(eta)}`
|
|
290
|
+
);
|
|
291
|
+
}
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
160
297
|
if (ctx.verbose) {
|
|
161
298
|
const err = batchEmbedResult.error;
|
|
162
299
|
const cause = err.cause;
|
|
@@ -174,30 +311,52 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
|
|
|
174
311
|
`\n[embed] Batch failed (${batch.length} chunks: ${titles}${batch.length > 3 ? "..." : ""}): ${err.message}${causeMsg ? ` - ${causeMsg}` : ""}\n`
|
|
175
312
|
);
|
|
176
313
|
}
|
|
314
|
+
pushErrorSamples([formattedError]);
|
|
315
|
+
suggestion =
|
|
316
|
+
"Try rerunning the same command. If failures persist, rerun with `gno --verbose embed --batch-size 1` to isolate failing chunks.";
|
|
177
317
|
errors += batch.length;
|
|
178
318
|
continue;
|
|
179
319
|
}
|
|
180
320
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
321
|
+
if (ctx.verbose && batchEmbedResult.value.batchFailed) {
|
|
322
|
+
const titles = batch
|
|
323
|
+
.slice(0, 3)
|
|
324
|
+
.map((b) => b.title ?? b.mirrorHash.slice(0, 8))
|
|
325
|
+
.join(", ");
|
|
326
|
+
process.stderr.write(
|
|
327
|
+
`\n[embed] Batch fallback (${batch.length} chunks: ${titles}${batch.length > 3 ? "..." : ""}): ${batchEmbedResult.value.batchError ?? "unknown batch error"}\n`
|
|
328
|
+
);
|
|
329
|
+
}
|
|
330
|
+
pushErrorSamples(batchEmbedResult.value.failureSamples);
|
|
331
|
+
suggestion ||= batchEmbedResult.value.retrySuggestion;
|
|
332
|
+
if (ctx.verbose && batchEmbedResult.value.failureSamples.length > 0) {
|
|
333
|
+
for (const sample of batchEmbedResult.value.failureSamples) {
|
|
334
|
+
process.stderr.write(`\n[embed] Sample failure: ${sample}\n`);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
const vectors: VectorRow[] = [];
|
|
339
|
+
for (const [idx, item] of batch.entries()) {
|
|
340
|
+
const embedding = batchEmbedResult.value.vectors[idx];
|
|
341
|
+
if (!embedding) {
|
|
342
|
+
errors += 1;
|
|
343
|
+
continue;
|
|
344
|
+
}
|
|
345
|
+
vectors.push({
|
|
346
|
+
mirrorHash: item.mirrorHash,
|
|
347
|
+
seq: item.seq,
|
|
348
|
+
model: ctx.modelUri,
|
|
349
|
+
embedding: new Float32Array(embedding),
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (vectors.length === 0) {
|
|
184
354
|
if (ctx.verbose) {
|
|
185
|
-
process.stderr.write(
|
|
186
|
-
`\n[embed] Count mismatch: got ${embeddings.length}, expected ${batch.length}\n`
|
|
187
|
-
);
|
|
355
|
+
process.stderr.write("\n[embed] No recoverable embeddings in batch\n");
|
|
188
356
|
}
|
|
189
|
-
errors += batch.length;
|
|
190
357
|
continue;
|
|
191
358
|
}
|
|
192
359
|
|
|
193
|
-
// Store vectors (embeddedAt set by DB)
|
|
194
|
-
const vectors: VectorRow[] = batch.map((b, idx) => ({
|
|
195
|
-
mirrorHash: b.mirrorHash,
|
|
196
|
-
seq: b.seq,
|
|
197
|
-
model: ctx.modelUri,
|
|
198
|
-
embedding: new Float32Array(embeddings[idx] as number[]),
|
|
199
|
-
}));
|
|
200
|
-
|
|
201
360
|
const storeResult = await ctx.vectorIndex.upsertVectors(vectors);
|
|
202
361
|
if (!storeResult.ok) {
|
|
203
362
|
if (ctx.verbose) {
|
|
@@ -205,21 +364,26 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
|
|
|
205
364
|
`\n[embed] Store failed: ${storeResult.error.message}\n`
|
|
206
365
|
);
|
|
207
366
|
}
|
|
208
|
-
|
|
367
|
+
pushErrorSamples([storeResult.error.message]);
|
|
368
|
+
suggestion ??=
|
|
369
|
+
"Store write failed. Rerun `gno embed` once more; if it repeats, run `gno doctor` and `gno vec sync`.";
|
|
370
|
+
errors += vectors.length;
|
|
209
371
|
continue;
|
|
210
372
|
}
|
|
211
373
|
|
|
212
|
-
embedded +=
|
|
374
|
+
embedded += vectors.length;
|
|
213
375
|
|
|
214
376
|
// Progress output
|
|
215
377
|
if (ctx.showProgress) {
|
|
216
|
-
const
|
|
378
|
+
const embeddedDisplay = Math.min(embedded, ctx.totalToEmbed);
|
|
379
|
+
const completed = Math.min(embedded + errors, ctx.totalToEmbed);
|
|
380
|
+
const pct = (completed / ctx.totalToEmbed) * 100;
|
|
217
381
|
const elapsed = (Date.now() - startTime) / 1000;
|
|
218
382
|
const rate = embedded / Math.max(elapsed, 0.001);
|
|
219
383
|
const eta =
|
|
220
|
-
(ctx.totalToEmbed -
|
|
384
|
+
Math.max(0, ctx.totalToEmbed - completed) / Math.max(rate, 0.001);
|
|
221
385
|
process.stdout.write(
|
|
222
|
-
`\rEmbedding: ${
|
|
386
|
+
`\rEmbedding: ${embeddedDisplay.toLocaleString()}/${ctx.totalToEmbed.toLocaleString()} (${pct.toFixed(1)}%) | ${rate.toFixed(1)} chunks/s | ETA ${formatDuration(eta)}`
|
|
223
387
|
);
|
|
224
388
|
}
|
|
225
389
|
}
|
|
@@ -233,6 +397,8 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
|
|
|
233
397
|
embedded,
|
|
234
398
|
errors,
|
|
235
399
|
duration: (Date.now() - startTime) / 1000,
|
|
400
|
+
errorSamples,
|
|
401
|
+
suggestion,
|
|
236
402
|
};
|
|
237
403
|
}
|
|
238
404
|
|
|
@@ -338,6 +504,7 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
|
|
|
338
504
|
duration: 0,
|
|
339
505
|
model: modelUri,
|
|
340
506
|
searchAvailable: vecAvailable,
|
|
507
|
+
errorSamples: [],
|
|
341
508
|
};
|
|
342
509
|
}
|
|
343
510
|
|
|
@@ -350,6 +517,7 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
|
|
|
350
517
|
duration: 0,
|
|
351
518
|
model: modelUri,
|
|
352
519
|
searchAvailable: vecAvailable,
|
|
520
|
+
errorSamples: [],
|
|
353
521
|
};
|
|
354
522
|
}
|
|
355
523
|
|
|
@@ -366,6 +534,27 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
|
|
|
366
534
|
: undefined;
|
|
367
535
|
|
|
368
536
|
const llm = new LlmAdapter(config);
|
|
537
|
+
const recreateEmbedPort = async () => {
|
|
538
|
+
if (embedPort) {
|
|
539
|
+
await embedPort.dispose();
|
|
540
|
+
}
|
|
541
|
+
await llm.getManager().dispose(modelUri);
|
|
542
|
+
const recreated = await llm.createEmbeddingPort(modelUri, {
|
|
543
|
+
policy,
|
|
544
|
+
onProgress: downloadProgress
|
|
545
|
+
? (progress) => downloadProgress("embed", progress)
|
|
546
|
+
: undefined,
|
|
547
|
+
});
|
|
548
|
+
if (!recreated.ok) {
|
|
549
|
+
return { ok: false as const, error: recreated.error.message };
|
|
550
|
+
}
|
|
551
|
+
const initResult = await recreated.value.init();
|
|
552
|
+
if (!initResult.ok) {
|
|
553
|
+
await recreated.value.dispose();
|
|
554
|
+
return { ok: false as const, error: initResult.error.message };
|
|
555
|
+
}
|
|
556
|
+
return { ok: true as const, value: recreated.value };
|
|
557
|
+
};
|
|
369
558
|
const embedResult = await llm.createEmbeddingPort(modelUri, {
|
|
370
559
|
policy,
|
|
371
560
|
onProgress: downloadProgress
|
|
@@ -412,6 +601,7 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
|
|
|
412
601
|
showProgress: !options.json,
|
|
413
602
|
totalToEmbed,
|
|
414
603
|
verbose: options.verbose ?? false,
|
|
604
|
+
recreateEmbedPort,
|
|
415
605
|
});
|
|
416
606
|
|
|
417
607
|
if (!result.ok) {
|
|
@@ -431,10 +621,27 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
|
|
|
431
621
|
}
|
|
432
622
|
}
|
|
433
623
|
vectorIndex.vecDirty = false;
|
|
434
|
-
} else
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
624
|
+
} else {
|
|
625
|
+
if (!options.json) {
|
|
626
|
+
process.stdout.write(
|
|
627
|
+
`\n[vec] Sync failed: ${syncResult.error.message}\n`
|
|
628
|
+
);
|
|
629
|
+
}
|
|
630
|
+
return {
|
|
631
|
+
success: true,
|
|
632
|
+
embedded: result.embedded,
|
|
633
|
+
errors: result.errors,
|
|
634
|
+
duration: result.duration,
|
|
635
|
+
model: modelUri,
|
|
636
|
+
searchAvailable: vectorIndex.searchAvailable,
|
|
637
|
+
errorSamples: [
|
|
638
|
+
...result.errorSamples,
|
|
639
|
+
syncResult.error.message,
|
|
640
|
+
].slice(0, 5),
|
|
641
|
+
suggestion:
|
|
642
|
+
"Vector index sync failed after embedding. Rerun `gno embed` once more. If it repeats, run `gno vec sync`.",
|
|
643
|
+
syncError: syncResult.error.message,
|
|
644
|
+
};
|
|
438
645
|
}
|
|
439
646
|
}
|
|
440
647
|
|
|
@@ -445,6 +652,8 @@ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
|
|
|
445
652
|
duration: result.duration,
|
|
446
653
|
model: modelUri,
|
|
447
654
|
searchAvailable: vectorIndex.searchAvailable,
|
|
655
|
+
errorSamples: result.errorSamples,
|
|
656
|
+
suggestion: result.suggestion,
|
|
448
657
|
};
|
|
449
658
|
} finally {
|
|
450
659
|
if (embedPort) {
|
|
@@ -569,6 +778,9 @@ export function formatEmbed(
|
|
|
569
778
|
duration: result.duration,
|
|
570
779
|
model: result.model,
|
|
571
780
|
searchAvailable: result.searchAvailable,
|
|
781
|
+
errorSamples: result.errorSamples ?? [],
|
|
782
|
+
suggestion: result.suggestion,
|
|
783
|
+
syncError: result.syncError,
|
|
572
784
|
},
|
|
573
785
|
null,
|
|
574
786
|
2
|
|
@@ -590,6 +802,14 @@ export function formatEmbed(
|
|
|
590
802
|
|
|
591
803
|
if (result.errors > 0) {
|
|
592
804
|
lines.push(`${result.errors} chunks failed to embed.`);
|
|
805
|
+
if ((result.errorSamples?.length ?? 0) > 0) {
|
|
806
|
+
for (const sample of result.errorSamples ?? []) {
|
|
807
|
+
lines.push(`Sample error: ${sample}`);
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
if (result.suggestion) {
|
|
811
|
+
lines.push(`Hint: ${result.suggestion}`);
|
|
812
|
+
}
|
|
593
813
|
}
|
|
594
814
|
|
|
595
815
|
if (!result.searchAvailable) {
|
|
@@ -598,5 +818,9 @@ export function formatEmbed(
|
|
|
598
818
|
);
|
|
599
819
|
}
|
|
600
820
|
|
|
821
|
+
if (result.syncError) {
|
|
822
|
+
lines.push(`Vec sync error: ${result.syncError}`);
|
|
823
|
+
}
|
|
824
|
+
|
|
601
825
|
return lines.join("\n");
|
|
602
826
|
}
|
|
@@ -97,7 +97,7 @@ export async function vsearch(
|
|
|
97
97
|
try {
|
|
98
98
|
// Embed query with contextual formatting (also determines dimensions)
|
|
99
99
|
const queryEmbedResult = await embedPort.embed(
|
|
100
|
-
formatQueryForEmbedding(query)
|
|
100
|
+
formatQueryForEmbedding(query, embedPort.modelUri)
|
|
101
101
|
);
|
|
102
102
|
if (!queryEmbedResult.ok) {
|
|
103
103
|
return { success: false, error: queryEmbedResult.error.message };
|
package/src/embed/backlog.ts
CHANGED
|
@@ -16,6 +16,7 @@ import type {
|
|
|
16
16
|
|
|
17
17
|
import { formatDocForEmbedding } from "../pipeline/contextual";
|
|
18
18
|
import { err, ok } from "../store/types";
|
|
19
|
+
import { embedTextsWithRecovery } from "./batch";
|
|
19
20
|
|
|
20
21
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
21
22
|
// Types
|
|
@@ -85,9 +86,14 @@ export async function embedBacklog(
|
|
|
85
86
|
}
|
|
86
87
|
|
|
87
88
|
// Embed batch with contextual formatting (title prefix)
|
|
88
|
-
const embedResult = await
|
|
89
|
+
const embedResult = await embedTextsWithRecovery(
|
|
90
|
+
embedPort,
|
|
89
91
|
batch.map((b: BacklogItem) =>
|
|
90
|
-
formatDocForEmbedding(
|
|
92
|
+
formatDocForEmbedding(
|
|
93
|
+
b.text,
|
|
94
|
+
b.title ?? undefined,
|
|
95
|
+
embedPort.modelUri
|
|
96
|
+
)
|
|
91
97
|
)
|
|
92
98
|
);
|
|
93
99
|
|
|
@@ -96,28 +102,29 @@ export async function embedBacklog(
|
|
|
96
102
|
continue;
|
|
97
103
|
}
|
|
98
104
|
|
|
99
|
-
|
|
100
|
-
const
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
105
|
+
const vectors: VectorRow[] = [];
|
|
106
|
+
for (const [idx, item] of batch.entries()) {
|
|
107
|
+
const embedding = embedResult.value.vectors[idx];
|
|
108
|
+
if (!embedding) {
|
|
109
|
+
errors += 1;
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
vectors.push({
|
|
113
|
+
mirrorHash: item.mirrorHash,
|
|
114
|
+
seq: item.seq,
|
|
115
|
+
model: modelUri,
|
|
116
|
+
embedding: new Float32Array(embedding),
|
|
117
|
+
});
|
|
104
118
|
}
|
|
105
119
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
const storeResult = await vectorIndex.upsertVectors(vectors);
|
|
115
|
-
if (!storeResult.ok) {
|
|
116
|
-
errors += batch.length;
|
|
117
|
-
continue;
|
|
120
|
+
if (vectors.length > 0) {
|
|
121
|
+
const storeResult = await vectorIndex.upsertVectors(vectors);
|
|
122
|
+
if (!storeResult.ok) {
|
|
123
|
+
errors += vectors.length;
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
embedded += vectors.length;
|
|
118
127
|
}
|
|
119
|
-
|
|
120
|
-
embedded += batch.length;
|
|
121
128
|
}
|
|
122
129
|
|
|
123
130
|
// Sync vec index once at end if any vec0 writes failed
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared embedding batch helpers.
|
|
3
|
+
*
|
|
4
|
+
* @module src/embed/batch
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { EmbeddingPort, LlmResult } from "../llm/types";
|
|
8
|
+
|
|
9
|
+
import { getEmbeddingCompatibilityProfile } from "../llm/embedding-compatibility";
|
|
10
|
+
import { inferenceFailedError } from "../llm/errors";
|
|
11
|
+
|
|
12
|
+
export interface EmbedBatchRecoveryResult {
|
|
13
|
+
vectors: Array<number[] | null>;
|
|
14
|
+
batchFailed: boolean;
|
|
15
|
+
batchError?: string;
|
|
16
|
+
fallbackErrors: number;
|
|
17
|
+
failureSamples: string[];
|
|
18
|
+
retrySuggestion?: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const MAX_FAILURE_SAMPLES = 5;
|
|
22
|
+
|
|
23
|
+
function errorMessage(error: unknown): string {
|
|
24
|
+
if (
|
|
25
|
+
error &&
|
|
26
|
+
typeof error === "object" &&
|
|
27
|
+
"message" in error &&
|
|
28
|
+
typeof error.message === "string"
|
|
29
|
+
) {
|
|
30
|
+
return error.message;
|
|
31
|
+
}
|
|
32
|
+
return String(error);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function formatFailureMessage(error: {
|
|
36
|
+
message: string;
|
|
37
|
+
cause?: unknown;
|
|
38
|
+
}): string {
|
|
39
|
+
const cause = error.cause ? errorMessage(error.cause) : "";
|
|
40
|
+
return cause && cause !== error.message
|
|
41
|
+
? `${error.message} - ${cause}`
|
|
42
|
+
: error.message;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function isDisposedFailure(message: string): boolean {
|
|
46
|
+
return message.toLowerCase().includes("object is disposed");
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async function resetEmbeddingPort(
|
|
50
|
+
embedPort: EmbeddingPort
|
|
51
|
+
): Promise<LlmResult<void>> {
|
|
52
|
+
await embedPort.dispose();
|
|
53
|
+
return embedPort.init();
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export async function embedTextsWithRecovery(
|
|
57
|
+
embedPort: EmbeddingPort,
|
|
58
|
+
texts: string[]
|
|
59
|
+
): Promise<LlmResult<EmbedBatchRecoveryResult>> {
|
|
60
|
+
if (texts.length === 0) {
|
|
61
|
+
return {
|
|
62
|
+
ok: true,
|
|
63
|
+
value: {
|
|
64
|
+
vectors: [],
|
|
65
|
+
batchFailed: false,
|
|
66
|
+
fallbackErrors: 0,
|
|
67
|
+
failureSamples: [],
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const profile = getEmbeddingCompatibilityProfile(embedPort.modelUri);
|
|
73
|
+
if (profile.batchEmbeddingTrusted) {
|
|
74
|
+
let batchResult = await embedPort.embedBatch(texts);
|
|
75
|
+
if (!batchResult.ok) {
|
|
76
|
+
const formattedBatchError = formatFailureMessage(batchResult.error);
|
|
77
|
+
if (isDisposedFailure(formattedBatchError)) {
|
|
78
|
+
const reset = await resetEmbeddingPort(embedPort);
|
|
79
|
+
if (!reset.ok) {
|
|
80
|
+
return reset;
|
|
81
|
+
}
|
|
82
|
+
batchResult = await embedPort.embedBatch(texts);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
if (batchResult.ok && batchResult.value.length === texts.length) {
|
|
86
|
+
return {
|
|
87
|
+
ok: true,
|
|
88
|
+
value: {
|
|
89
|
+
vectors: batchResult.value,
|
|
90
|
+
batchFailed: false,
|
|
91
|
+
fallbackErrors: 0,
|
|
92
|
+
failureSamples: [],
|
|
93
|
+
},
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const recovered = await recoverWithAdaptiveBatches(embedPort, texts, {
|
|
98
|
+
rootBatchAlreadyFailed: true,
|
|
99
|
+
});
|
|
100
|
+
if (!recovered.ok) {
|
|
101
|
+
return recovered;
|
|
102
|
+
}
|
|
103
|
+
return {
|
|
104
|
+
ok: true,
|
|
105
|
+
value: {
|
|
106
|
+
...recovered.value,
|
|
107
|
+
batchFailed: true,
|
|
108
|
+
batchError: batchResult.ok
|
|
109
|
+
? `Embedding count mismatch: got ${batchResult.value.length}, expected ${texts.length}`
|
|
110
|
+
: formatFailureMessage(batchResult.error),
|
|
111
|
+
retrySuggestion:
|
|
112
|
+
recovered.value.fallbackErrors > 0
|
|
113
|
+
? "Try rerunning the same command. If failures persist, rerun with `gno --verbose embed --batch-size 1` to isolate failing chunks."
|
|
114
|
+
: undefined,
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const recovered = await recoverIndividually(embedPort, texts);
|
|
120
|
+
if (!recovered.ok) {
|
|
121
|
+
return recovered;
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
ok: true,
|
|
125
|
+
value: {
|
|
126
|
+
...recovered.value,
|
|
127
|
+
batchFailed: true,
|
|
128
|
+
batchError: "Batch embedding disabled for this compatibility profile",
|
|
129
|
+
retrySuggestion:
|
|
130
|
+
recovered.value.fallbackErrors > 0
|
|
131
|
+
? "Some chunks still failed individually. Rerun with `gno --verbose embed --batch-size 1` for exact chunk errors."
|
|
132
|
+
: undefined,
|
|
133
|
+
},
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async function recoverWithAdaptiveBatches(
|
|
138
|
+
embedPort: EmbeddingPort,
|
|
139
|
+
texts: string[],
|
|
140
|
+
options: { rootBatchAlreadyFailed?: boolean } = {}
|
|
141
|
+
): Promise<
|
|
142
|
+
LlmResult<Omit<EmbedBatchRecoveryResult, "batchFailed" | "batchError">>
|
|
143
|
+
> {
|
|
144
|
+
try {
|
|
145
|
+
const vectors: Array<number[] | null> = Array.from(
|
|
146
|
+
{ length: texts.length },
|
|
147
|
+
() => null
|
|
148
|
+
);
|
|
149
|
+
const failureSamples: string[] = [];
|
|
150
|
+
let fallbackErrors = 0;
|
|
151
|
+
|
|
152
|
+
const recordFailure = (message: string): void => {
|
|
153
|
+
if (failureSamples.length < MAX_FAILURE_SAMPLES) {
|
|
154
|
+
failureSamples.push(message);
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
const processRange = async (
|
|
159
|
+
rangeTexts: string[],
|
|
160
|
+
offset: number,
|
|
161
|
+
batchAlreadyFailed = false
|
|
162
|
+
): Promise<void> => {
|
|
163
|
+
if (rangeTexts.length === 0) {
|
|
164
|
+
return;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (rangeTexts.length === 1) {
|
|
168
|
+
const result = await embedPort.embed(rangeTexts[0] ?? "");
|
|
169
|
+
if (result.ok) {
|
|
170
|
+
vectors[offset] = result.value;
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
fallbackErrors += 1;
|
|
174
|
+
recordFailure(formatFailureMessage(result.error));
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let batchResult: Awaited<ReturnType<typeof embedPort.embedBatch>> | null =
|
|
179
|
+
null;
|
|
180
|
+
if (!batchAlreadyFailed) {
|
|
181
|
+
batchResult = await embedPort.embedBatch(rangeTexts);
|
|
182
|
+
}
|
|
183
|
+
if (
|
|
184
|
+
batchResult &&
|
|
185
|
+
batchResult.ok &&
|
|
186
|
+
batchResult.value.length === rangeTexts.length
|
|
187
|
+
) {
|
|
188
|
+
for (const [index, vector] of batchResult.value.entries()) {
|
|
189
|
+
vectors[offset + index] = vector;
|
|
190
|
+
}
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const mid = Math.ceil(rangeTexts.length / 2);
|
|
195
|
+
await processRange(rangeTexts.slice(0, mid), offset);
|
|
196
|
+
await processRange(rangeTexts.slice(mid), offset + mid);
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
await processRange(texts, 0, options.rootBatchAlreadyFailed ?? false);
|
|
200
|
+
|
|
201
|
+
if (fallbackErrors === texts.length) {
|
|
202
|
+
const reinit = await resetEmbeddingPort(embedPort);
|
|
203
|
+
if (!reinit.ok) {
|
|
204
|
+
return reinit;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const retry = await recoverIndividually(embedPort, texts);
|
|
208
|
+
if (!retry.ok) {
|
|
209
|
+
return retry;
|
|
210
|
+
}
|
|
211
|
+
return {
|
|
212
|
+
ok: true,
|
|
213
|
+
value: retry.value,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
ok: true,
|
|
219
|
+
value: {
|
|
220
|
+
vectors,
|
|
221
|
+
fallbackErrors,
|
|
222
|
+
failureSamples,
|
|
223
|
+
},
|
|
224
|
+
};
|
|
225
|
+
} catch (error) {
|
|
226
|
+
return {
|
|
227
|
+
ok: false,
|
|
228
|
+
error: inferenceFailedError(
|
|
229
|
+
embedPort.modelUri,
|
|
230
|
+
new Error(errorMessage(error))
|
|
231
|
+
),
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
async function recoverIndividually(
|
|
237
|
+
embedPort: EmbeddingPort,
|
|
238
|
+
texts: string[]
|
|
239
|
+
): Promise<
|
|
240
|
+
LlmResult<Omit<EmbedBatchRecoveryResult, "batchFailed" | "batchError">>
|
|
241
|
+
> {
|
|
242
|
+
try {
|
|
243
|
+
const vectors: Array<number[] | null> = [];
|
|
244
|
+
const failureSamples: string[] = [];
|
|
245
|
+
let fallbackErrors = 0;
|
|
246
|
+
|
|
247
|
+
for (const text of texts) {
|
|
248
|
+
const result = await embedPort.embed(text);
|
|
249
|
+
if (result.ok) {
|
|
250
|
+
vectors.push(result.value);
|
|
251
|
+
} else {
|
|
252
|
+
vectors.push(null);
|
|
253
|
+
fallbackErrors += 1;
|
|
254
|
+
if (failureSamples.length < MAX_FAILURE_SAMPLES) {
|
|
255
|
+
failureSamples.push(formatFailureMessage(result.error));
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
ok: true,
|
|
262
|
+
value: {
|
|
263
|
+
vectors,
|
|
264
|
+
fallbackErrors,
|
|
265
|
+
failureSamples,
|
|
266
|
+
},
|
|
267
|
+
};
|
|
268
|
+
} catch (error) {
|
|
269
|
+
return {
|
|
270
|
+
ok: false,
|
|
271
|
+
error: inferenceFailedError(
|
|
272
|
+
embedPort.modelUri,
|
|
273
|
+
new Error(errorMessage(error))
|
|
274
|
+
),
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding compatibility profiles.
|
|
3
|
+
*
|
|
4
|
+
* Encodes model-specific formatting/runtime hints for embedding models without
|
|
5
|
+
* forcing every caller to special-case URIs inline.
|
|
6
|
+
*
|
|
7
|
+
* @module src/llm/embedding-compatibility
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export type EmbeddingQueryFormat = "contextual-task" | "qwen-instruct";
|
|
11
|
+
export type EmbeddingDocumentFormat = "title-prefixed" | "raw-text";
|
|
12
|
+
|
|
13
|
+
export interface EmbeddingCompatibilityProfile {
|
|
14
|
+
id: string;
|
|
15
|
+
queryFormat: EmbeddingQueryFormat;
|
|
16
|
+
documentFormat: EmbeddingDocumentFormat;
|
|
17
|
+
/**
|
|
18
|
+
* Whether embedBatch is trusted for this model in GNO's current native path.
|
|
19
|
+
* If false, callers should use per-item embedding until compatibility is
|
|
20
|
+
* better understood.
|
|
21
|
+
*/
|
|
22
|
+
batchEmbeddingTrusted: boolean;
|
|
23
|
+
notes?: string[];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const DEFAULT_PROFILE: EmbeddingCompatibilityProfile = {
|
|
27
|
+
id: "default",
|
|
28
|
+
queryFormat: "contextual-task",
|
|
29
|
+
documentFormat: "title-prefixed",
|
|
30
|
+
batchEmbeddingTrusted: true,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const QWEN_PROFILE: EmbeddingCompatibilityProfile = {
|
|
34
|
+
id: "qwen-embedding",
|
|
35
|
+
queryFormat: "qwen-instruct",
|
|
36
|
+
documentFormat: "raw-text",
|
|
37
|
+
batchEmbeddingTrusted: true,
|
|
38
|
+
notes: [
|
|
39
|
+
"Uses Qwen-style instruct query formatting.",
|
|
40
|
+
"Documents are embedded as raw text (optionally prefixed with title).",
|
|
41
|
+
],
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
const JINA_PROFILE: EmbeddingCompatibilityProfile = {
|
|
45
|
+
id: "jina-embedding",
|
|
46
|
+
queryFormat: "contextual-task",
|
|
47
|
+
documentFormat: "title-prefixed",
|
|
48
|
+
batchEmbeddingTrusted: false,
|
|
49
|
+
notes: [
|
|
50
|
+
"Current native runtime path has batch-embedding issues on real fixtures.",
|
|
51
|
+
"Prefer per-item embedding fallback until compatibility improves.",
|
|
52
|
+
],
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
function normalizeModelUri(modelUri?: string): string {
|
|
56
|
+
return modelUri?.toLowerCase() ?? "";
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function hasAllTerms(haystack: string, terms: string[]): boolean {
|
|
60
|
+
return terms.every((term) => haystack.includes(term));
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function getEmbeddingCompatibilityProfile(
|
|
64
|
+
modelUri?: string
|
|
65
|
+
): EmbeddingCompatibilityProfile {
|
|
66
|
+
const normalizedUri = normalizeModelUri(modelUri);
|
|
67
|
+
|
|
68
|
+
if (hasAllTerms(normalizedUri, ["qwen", "embed"])) {
|
|
69
|
+
return QWEN_PROFILE;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (
|
|
73
|
+
normalizedUri.includes("jina-embeddings-v4-text-code") ||
|
|
74
|
+
normalizedUri.includes("jina-code-embeddings") ||
|
|
75
|
+
hasAllTerms(normalizedUri, ["jina", "embeddings-v4-text-code"]) ||
|
|
76
|
+
hasAllTerms(normalizedUri, ["jina", "code-embeddings"])
|
|
77
|
+
) {
|
|
78
|
+
return JINA_PROFILE;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return DEFAULT_PROFILE;
|
|
82
|
+
}
|
package/src/mcp/tools/vsearch.ts
CHANGED
|
@@ -149,7 +149,7 @@ export function handleVsearch(
|
|
|
149
149
|
try {
|
|
150
150
|
// Embed query with contextual formatting
|
|
151
151
|
const queryEmbedResult = await embedPort.embed(
|
|
152
|
-
formatQueryForEmbedding(args.query)
|
|
152
|
+
formatQueryForEmbedding(args.query, embedPort.modelUri)
|
|
153
153
|
);
|
|
154
154
|
if (!queryEmbedResult.ok) {
|
|
155
155
|
throw new Error(queryEmbedResult.error.message);
|
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
* @module src/pipeline/contextual
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
+
import { getEmbeddingCompatibilityProfile } from "../llm/embedding-compatibility";
|
|
14
|
+
|
|
13
15
|
// Top-level regex for performance
|
|
14
16
|
const HEADING_REGEX = /^##?\s+(.+)$/m;
|
|
15
17
|
const SUBHEADING_REGEX = /^##\s+(.+)$/m;
|
|
@@ -19,8 +21,16 @@ const EXT_REGEX = /\.\w+$/;
|
|
|
19
21
|
* Format document text for embedding.
|
|
20
22
|
* Prepends title for contextual retrieval.
|
|
21
23
|
*/
|
|
22
|
-
export function formatDocForEmbedding(
|
|
24
|
+
export function formatDocForEmbedding(
|
|
25
|
+
text: string,
|
|
26
|
+
title?: string,
|
|
27
|
+
modelUri?: string
|
|
28
|
+
): string {
|
|
29
|
+
const profile = getEmbeddingCompatibilityProfile(modelUri);
|
|
23
30
|
const safeTitle = title?.trim() || "none";
|
|
31
|
+
if (profile.documentFormat === "raw-text") {
|
|
32
|
+
return title?.trim() ? `${title.trim()}\n${text}` : text;
|
|
33
|
+
}
|
|
24
34
|
return `title: ${safeTitle} | text: ${text}`;
|
|
25
35
|
}
|
|
26
36
|
|
|
@@ -28,7 +38,14 @@ export function formatDocForEmbedding(text: string, title?: string): string {
|
|
|
28
38
|
* Format query for embedding.
|
|
29
39
|
* Uses task-prefixed format for asymmetric retrieval.
|
|
30
40
|
*/
|
|
31
|
-
export function formatQueryForEmbedding(
|
|
41
|
+
export function formatQueryForEmbedding(
|
|
42
|
+
query: string,
|
|
43
|
+
modelUri?: string
|
|
44
|
+
): string {
|
|
45
|
+
const profile = getEmbeddingCompatibilityProfile(modelUri);
|
|
46
|
+
if (profile.queryFormat === "qwen-instruct") {
|
|
47
|
+
return `Instruct: Retrieve relevant documents for the given query\nQuery: ${query}`;
|
|
48
|
+
}
|
|
32
49
|
return `task: search result | query: ${query}`;
|
|
33
50
|
}
|
|
34
51
|
|
package/src/pipeline/hybrid.ts
CHANGED
|
@@ -18,6 +18,7 @@ import type {
|
|
|
18
18
|
SearchResults,
|
|
19
19
|
} from "./types";
|
|
20
20
|
|
|
21
|
+
import { embedTextsWithRecovery } from "../embed/batch";
|
|
21
22
|
import { err, ok } from "../store/types";
|
|
22
23
|
import { createChunkLookup } from "./chunk-lookup";
|
|
23
24
|
import { formatQueryForEmbedding } from "./contextual";
|
|
@@ -213,7 +214,9 @@ async function searchVectorChunks(
|
|
|
213
214
|
}
|
|
214
215
|
|
|
215
216
|
// Embed query with contextual formatting
|
|
216
|
-
const embedResult = await embedPort.embed(
|
|
217
|
+
const embedResult = await embedPort.embed(
|
|
218
|
+
formatQueryForEmbedding(query, embedPort.modelUri)
|
|
219
|
+
);
|
|
217
220
|
if (!embedResult.ok) {
|
|
218
221
|
return [];
|
|
219
222
|
}
|
|
@@ -443,17 +446,6 @@ export async function searchHybrid(
|
|
|
443
446
|
const vectorStartedAt = performance.now();
|
|
444
447
|
|
|
445
448
|
if (vectorAvailable && vectorIndex && embedPort) {
|
|
446
|
-
// Original query (increase limit when post-filters are active).
|
|
447
|
-
const vecChunks = await searchVectorChunks(vectorIndex, embedPort, query, {
|
|
448
|
-
limit: limit * 2 * retrievalMultiplier,
|
|
449
|
-
});
|
|
450
|
-
|
|
451
|
-
vecCount = vecChunks.length;
|
|
452
|
-
if (vecCount > 0) {
|
|
453
|
-
rankedInputs.push(toRankedInput("vector", vecChunks));
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
// Semantic variants + HyDE (optional; run in parallel and ignore failures)
|
|
457
449
|
const vectorVariantQueries = [
|
|
458
450
|
...(expansion?.vectorQueries?.map((query) => ({
|
|
459
451
|
source: "vector_variant" as const,
|
|
@@ -464,22 +456,72 @@ export async function searchHybrid(
|
|
|
464
456
|
: []),
|
|
465
457
|
];
|
|
466
458
|
|
|
467
|
-
if (vectorVariantQueries.length
|
|
468
|
-
const
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
459
|
+
if (vectorVariantQueries.length === 0) {
|
|
460
|
+
const vecChunks = await searchVectorChunks(
|
|
461
|
+
vectorIndex,
|
|
462
|
+
embedPort,
|
|
463
|
+
query,
|
|
464
|
+
{
|
|
465
|
+
limit: limit * 2 * retrievalMultiplier,
|
|
466
|
+
}
|
|
467
|
+
);
|
|
468
|
+
|
|
469
|
+
vecCount = vecChunks.length;
|
|
470
|
+
if (vecCount > 0) {
|
|
471
|
+
rankedInputs.push(toRankedInput("vector", vecChunks));
|
|
472
|
+
}
|
|
473
|
+
} else {
|
|
474
|
+
const batchedQueries = [
|
|
475
|
+
{
|
|
476
|
+
source: "vector" as const,
|
|
477
|
+
query,
|
|
478
|
+
limit: limit * 2 * retrievalMultiplier,
|
|
479
|
+
},
|
|
480
|
+
...vectorVariantQueries.map((variant) => ({
|
|
481
|
+
...variant,
|
|
482
|
+
limit: limit * retrievalMultiplier,
|
|
483
|
+
})),
|
|
484
|
+
];
|
|
485
|
+
|
|
486
|
+
const embedResult = await embedTextsWithRecovery(
|
|
487
|
+
embedPort,
|
|
488
|
+
batchedQueries.map((variant) =>
|
|
489
|
+
formatQueryForEmbedding(variant.query, embedPort.modelUri)
|
|
473
490
|
)
|
|
474
491
|
);
|
|
475
492
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
493
|
+
if (!embedResult.ok) {
|
|
494
|
+
counters.fallbackEvents.push("vector_embed_error");
|
|
495
|
+
} else {
|
|
496
|
+
if (embedResult.value.batchFailed) {
|
|
497
|
+
counters.fallbackEvents.push("vector_embed_batch_fallback");
|
|
479
498
|
}
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
499
|
+
|
|
500
|
+
for (const [index, variant] of batchedQueries.entries()) {
|
|
501
|
+
const embedding = embedResult.value.vectors[index];
|
|
502
|
+
if (!embedding || !variant) {
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
const searchResult = await vectorIndex.searchNearest(
|
|
507
|
+
new Float32Array(embedding),
|
|
508
|
+
variant.limit
|
|
509
|
+
);
|
|
510
|
+
if (!searchResult.ok || searchResult.value.length === 0) {
|
|
511
|
+
continue;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
const chunks = searchResult.value.map((item) => ({
|
|
515
|
+
mirrorHash: item.mirrorHash,
|
|
516
|
+
seq: item.seq,
|
|
517
|
+
}));
|
|
518
|
+
if (variant.source === "vector") {
|
|
519
|
+
vecCount = chunks.length;
|
|
520
|
+
}
|
|
521
|
+
if (chunks.length === 0) {
|
|
522
|
+
continue;
|
|
523
|
+
}
|
|
524
|
+
rankedInputs.push(toRankedInput(variant.source, chunks));
|
|
483
525
|
}
|
|
484
526
|
}
|
|
485
527
|
}
|
package/src/pipeline/vsearch.ts
CHANGED
|
@@ -353,7 +353,9 @@ export async function searchVector(
|
|
|
353
353
|
}
|
|
354
354
|
|
|
355
355
|
// Embed query with contextual formatting
|
|
356
|
-
const embedResult = await embedPort.embed(
|
|
356
|
+
const embedResult = await embedPort.embed(
|
|
357
|
+
formatQueryForEmbedding(query, embedPort.modelUri)
|
|
358
|
+
);
|
|
357
359
|
if (!embedResult.ok) {
|
|
358
360
|
return err(
|
|
359
361
|
"QUERY_FAILED",
|
package/src/sdk/client.ts
CHANGED
|
@@ -401,7 +401,7 @@ class GnoClientImpl implements GnoClient {
|
|
|
401
401
|
}
|
|
402
402
|
|
|
403
403
|
const queryEmbedResult = await ports.embedPort.embed(
|
|
404
|
-
formatQueryForEmbedding(query)
|
|
404
|
+
formatQueryForEmbedding(query, ports.embedPort.modelUri)
|
|
405
405
|
);
|
|
406
406
|
if (!queryEmbedResult.ok) {
|
|
407
407
|
throw sdkError("MODEL", queryEmbedResult.error.message, {
|
package/src/sdk/embed.ts
CHANGED
|
@@ -19,6 +19,7 @@ import type {
|
|
|
19
19
|
import type { GnoEmbedOptions, GnoEmbedResult } from "./types";
|
|
20
20
|
|
|
21
21
|
import { embedBacklog } from "../embed";
|
|
22
|
+
import { embedTextsWithRecovery } from "../embed/batch";
|
|
22
23
|
import { resolveModelUri } from "../llm/registry";
|
|
23
24
|
import { formatDocForEmbedding } from "../pipeline/contextual";
|
|
24
25
|
import { err, ok } from "../store/types";
|
|
@@ -139,29 +140,45 @@ async function forceEmbedAll(
|
|
|
139
140
|
cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
|
|
140
141
|
}
|
|
141
142
|
|
|
142
|
-
const embedResult = await
|
|
143
|
+
const embedResult = await embedTextsWithRecovery(
|
|
144
|
+
embedPort,
|
|
143
145
|
batch.map((item) =>
|
|
144
|
-
formatDocForEmbedding(
|
|
146
|
+
formatDocForEmbedding(
|
|
147
|
+
item.text,
|
|
148
|
+
item.title ?? undefined,
|
|
149
|
+
embedPort.modelUri
|
|
150
|
+
)
|
|
145
151
|
)
|
|
146
152
|
);
|
|
147
|
-
|
|
153
|
+
|
|
154
|
+
if (!embedResult.ok) {
|
|
148
155
|
errors += batch.length;
|
|
149
156
|
continue;
|
|
150
157
|
}
|
|
151
158
|
|
|
152
|
-
const vectors: VectorRow[] =
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
159
|
+
const vectors: VectorRow[] = [];
|
|
160
|
+
for (const [idx, item] of batch.entries()) {
|
|
161
|
+
const embedding = embedResult.value.vectors[idx];
|
|
162
|
+
if (!embedding) {
|
|
163
|
+
errors += 1;
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
vectors.push({
|
|
167
|
+
mirrorHash: item.mirrorHash,
|
|
168
|
+
seq: item.seq,
|
|
169
|
+
model: modelUri,
|
|
170
|
+
embedding: new Float32Array(embedding),
|
|
171
|
+
});
|
|
162
172
|
}
|
|
163
173
|
|
|
164
|
-
|
|
174
|
+
if (vectors.length > 0) {
|
|
175
|
+
const storeResult = await vectorIndex.upsertVectors(vectors);
|
|
176
|
+
if (!storeResult.ok) {
|
|
177
|
+
errors += vectors.length;
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
embedded += vectors.length;
|
|
181
|
+
}
|
|
165
182
|
}
|
|
166
183
|
|
|
167
184
|
if (vectorIndex.vecDirty) {
|
|
@@ -117,10 +117,12 @@ export async function createVectorIndexPort(
|
|
|
117
117
|
`);
|
|
118
118
|
|
|
119
119
|
// Prepared statements for vec0 table (if available)
|
|
120
|
-
const
|
|
121
|
-
? db.prepare(
|
|
122
|
-
|
|
123
|
-
|
|
120
|
+
const deleteVecChunkStmt = searchAvailable
|
|
121
|
+
? db.prepare(`DELETE FROM ${tableName} WHERE chunk_id = ?`)
|
|
122
|
+
: null;
|
|
123
|
+
|
|
124
|
+
const insertVecStmt = searchAvailable
|
|
125
|
+
? db.prepare(`INSERT INTO ${tableName} (chunk_id, embedding) VALUES (?, ?)`)
|
|
124
126
|
: null;
|
|
125
127
|
|
|
126
128
|
const searchStmt = searchAvailable
|
|
@@ -175,12 +177,15 @@ export async function createVectorIndexPort(
|
|
|
175
177
|
}
|
|
176
178
|
|
|
177
179
|
// 2. Best-effort update vec0 (graceful degradation)
|
|
178
|
-
if (
|
|
180
|
+
if (deleteVecChunkStmt && insertVecStmt) {
|
|
179
181
|
try {
|
|
180
182
|
db.transaction(() => {
|
|
181
183
|
for (const row of rows) {
|
|
182
184
|
const chunkId = `${row.mirrorHash}:${row.seq}`;
|
|
183
|
-
|
|
185
|
+
// sqlite-vec vec0 tables do not reliably support OR REPLACE semantics.
|
|
186
|
+
// Delete first, then insert the fresh vector row.
|
|
187
|
+
deleteVecChunkStmt.run(chunkId);
|
|
188
|
+
insertVecStmt.run(chunkId, encodeEmbedding(row.embedding));
|
|
184
189
|
}
|
|
185
190
|
})();
|
|
186
191
|
} catch (e) {
|