@ez-corp/ez-search 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/index-cmd.js +14 -24
- package/dist/cli/commands/query-cmd.js +31 -23
- package/dist/services/download-progress.js +68 -0
- package/dist/services/image-embedder.js +33 -37
- package/dist/services/manifest-cache.js +1 -1
- package/dist/services/model-router.js +28 -22
- package/dist/services/vector-db.js +5 -21
- package/package.json +1 -1
|
@@ -2,23 +2,23 @@
|
|
|
2
2
|
* Index command — end-to-end pipeline: scan -> manifest check -> chunk -> embed -> store.
|
|
3
3
|
*
|
|
4
4
|
* Pipeline flow (per type):
|
|
5
|
-
* 1. Resolve path and open vector
|
|
5
|
+
* 1. Resolve path and open vector collection
|
|
6
6
|
* 2. Handle --clear (wipe storage + manifest)
|
|
7
7
|
* 3. Load manifest (incremental cache)
|
|
8
8
|
* 4. For each type in [code, text, image]:
|
|
9
9
|
* a. Scan files of that type
|
|
10
10
|
* b. Detect changed/new/deleted files against manifest
|
|
11
|
-
* c. Remove deleted files' chunks from
|
|
11
|
+
* c. Remove deleted files' chunks from col-768
|
|
12
12
|
* d. Chunk changed/new files
|
|
13
13
|
* e. Batch embed with the correct model
|
|
14
|
-
* f. Insert embeddings into
|
|
15
|
-
* 5. Optimize
|
|
14
|
+
* f. Insert embeddings into col-768
|
|
15
|
+
* 5. Optimize collection THEN save manifest (order matters)
|
|
16
16
|
* 6. Dispose pipelines and output results
|
|
17
17
|
*
|
|
18
18
|
* Model routing:
|
|
19
|
-
* code ->
|
|
20
|
-
* text ->
|
|
21
|
-
* image -> Xenova/
|
|
19
|
+
* code -> onnx-community/Qwen3-Embedding-0.6B-ONNX, col-768
|
|
20
|
+
* text -> onnx-community/Qwen3-Embedding-0.6B-ONNX, col-768
|
|
21
|
+
* image -> Xenova/siglip-base-patch16-224, col-768
|
|
22
22
|
*/
|
|
23
23
|
import * as path from 'path';
|
|
24
24
|
import * as fsp from 'fs/promises';
|
|
@@ -195,14 +195,12 @@ async function runTextEmbeddingPipeline(opts) {
|
|
|
195
195
|
progress.update(`${type}: loading model...`);
|
|
196
196
|
const { createEmbeddingPipeline } = await import('../../services/model-router.js');
|
|
197
197
|
pipe = await createEmbeddingPipeline(type);
|
|
198
|
-
// Nomic requires "search_document: " prefix on indexed documents
|
|
199
|
-
const prefix = type === 'text' ? 'search_document: ' : '';
|
|
200
198
|
const totalBatches = Math.ceil(allPendingChunks.length / BATCH_SIZE);
|
|
201
199
|
for (let batchStart = 0; batchStart < allPendingChunks.length; batchStart += BATCH_SIZE) {
|
|
202
200
|
const batchNum = Math.floor(batchStart / BATCH_SIZE) + 1;
|
|
203
201
|
progress.update(`${type}: embedding`, batchNum, totalBatches);
|
|
204
202
|
const batch = allPendingChunks.slice(batchStart, batchStart + BATCH_SIZE);
|
|
205
|
-
const texts = batch.map((c) =>
|
|
203
|
+
const texts = batch.map((c) => c.text);
|
|
206
204
|
const embeddings = await pipe.embed(texts);
|
|
207
205
|
for (let i = 0; i < batch.length; i++) {
|
|
208
206
|
const chunk = batch[i];
|
|
@@ -212,7 +210,7 @@ async function runTextEmbeddingPipeline(opts) {
|
|
|
212
210
|
modelId: pipe.modelId,
|
|
213
211
|
lineStart: chunk.lineStart,
|
|
214
212
|
lineEnd: chunk.lineEnd,
|
|
215
|
-
chunkText: chunk.text,
|
|
213
|
+
chunkText: chunk.text,
|
|
216
214
|
});
|
|
217
215
|
chunksCreated++;
|
|
218
216
|
}
|
|
@@ -235,23 +233,21 @@ export async function runIndex(targetPath, options) {
|
|
|
235
233
|
const { ProgressReporter } = await import('../progress.js');
|
|
236
234
|
const progress = new ProgressReporter({
|
|
237
235
|
quiet: options.quiet,
|
|
238
|
-
json: options.format
|
|
236
|
+
json: options.format === 'json',
|
|
239
237
|
});
|
|
240
238
|
try {
|
|
241
239
|
// 1. Resolve path
|
|
242
240
|
const absPath = path.resolve(targetPath);
|
|
243
241
|
// 2. Open vector collections
|
|
244
242
|
const { openProjectCollections } = await import('../../services/vector-db.js');
|
|
245
|
-
let { col768,
|
|
243
|
+
let { col768, storagePath } = openProjectCollections(absPath);
|
|
246
244
|
// 3. Handle --clear
|
|
247
245
|
// rmSync removes .ez-search/ entirely (including manifest.json inside it)
|
|
248
246
|
if (options.clear) {
|
|
249
247
|
col768.close();
|
|
250
|
-
col512.close();
|
|
251
248
|
rmSync(storagePath, { recursive: true, force: true });
|
|
252
249
|
const reopened = openProjectCollections(absPath);
|
|
253
250
|
col768 = reopened.col768;
|
|
254
|
-
col512 = reopened.col512;
|
|
255
251
|
storagePath = reopened.storagePath;
|
|
256
252
|
}
|
|
257
253
|
// 4. Load manifest and helpers
|
|
@@ -272,7 +268,6 @@ export async function runIndex(targetPath, options) {
|
|
|
272
268
|
const allDeletedPaths = [];
|
|
273
269
|
// Per-type file counts for text output
|
|
274
270
|
const typeFileCounts = {};
|
|
275
|
-
let imageFilesProcessed = false;
|
|
276
271
|
for (const fileType of typesToIndex) {
|
|
277
272
|
// Scan files of this type
|
|
278
273
|
const scannedFiles = [];
|
|
@@ -329,7 +324,7 @@ export async function runIndex(targetPath, options) {
|
|
|
329
324
|
}
|
|
330
325
|
}
|
|
331
326
|
else if (fileType === 'image') {
|
|
332
|
-
// Image pipeline: one vector per file, goes into col-
|
|
327
|
+
// Image pipeline: one vector per file, goes into col-768
|
|
333
328
|
const { EXTENSION_MAP } = await import('../../types.js');
|
|
334
329
|
const deletedPaths = Object.keys(manifest.files).filter((relPath) => {
|
|
335
330
|
if (scannedSet.has(relPath))
|
|
@@ -340,7 +335,7 @@ export async function runIndex(targetPath, options) {
|
|
|
340
335
|
for (const deletedPath of deletedPaths) {
|
|
341
336
|
const entry = manifest.files[deletedPath];
|
|
342
337
|
for (const chunk of entry.chunks) {
|
|
343
|
-
|
|
338
|
+
col768.remove(chunk.id);
|
|
344
339
|
totalChunksRemoved++;
|
|
345
340
|
}
|
|
346
341
|
delete manifest.files[deletedPath];
|
|
@@ -379,7 +374,7 @@ export async function runIndex(targetPath, options) {
|
|
|
379
374
|
const fileHash = hashContent(buf);
|
|
380
375
|
const embedding = await imagePipeline.embedImage(buf);
|
|
381
376
|
const chunkId = makeChunkId(file.relativePath, 0);
|
|
382
|
-
|
|
377
|
+
col768.insert(chunkId, embedding, {
|
|
383
378
|
filePath: file.relativePath,
|
|
384
379
|
chunkIndex: 0,
|
|
385
380
|
modelId: imagePipeline.modelId,
|
|
@@ -397,7 +392,6 @@ export async function runIndex(targetPath, options) {
|
|
|
397
392
|
totalFilesIndexed++;
|
|
398
393
|
}
|
|
399
394
|
await imagePipeline.dispose();
|
|
400
|
-
imageFilesProcessed = true;
|
|
401
395
|
typeFileCounts['image'] = (typeFileCounts['image'] ?? 0) + filesToProcess.length;
|
|
402
396
|
}
|
|
403
397
|
}
|
|
@@ -412,10 +406,6 @@ export async function runIndex(targetPath, options) {
|
|
|
412
406
|
progress.update('optimizing index...');
|
|
413
407
|
col768.optimize();
|
|
414
408
|
col768.close();
|
|
415
|
-
if (imageFilesProcessed) {
|
|
416
|
-
col512.optimize();
|
|
417
|
-
}
|
|
418
|
-
col512.close();
|
|
419
409
|
saveManifest(absPath, manifest);
|
|
420
410
|
progress.done();
|
|
421
411
|
// 7. Output results
|
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Query command —
|
|
2
|
+
* Query command — single-collection grouped semantic search.
|
|
3
3
|
*
|
|
4
4
|
* Pipeline:
|
|
5
5
|
* 1. Resolve project directory (cwd)
|
|
6
|
-
* 2. Open vector
|
|
6
|
+
* 2. Open vector collection (col-768 for all types)
|
|
7
7
|
* 3. Load manifest for totalIndexed count
|
|
8
8
|
* 4. For each requested type:
|
|
9
|
-
* a. code: embed with
|
|
10
|
-
* b. text: embed with
|
|
11
|
-
* c. image: embed with
|
|
9
|
+
* a. code: embed with Qwen3 (instruct prefix), query col-768, filter by Qwen3 modelId
|
|
10
|
+
* b. text: embed with Qwen3 (instruct prefix), query col-768, filter by Qwen3 modelId
|
|
11
|
+
* c. image: embed with SigLIP text encoder, query col-768, filter by siglip modelId
|
|
12
12
|
* 5. Apply --threshold and --dir filters per type
|
|
13
13
|
* 6. Collapse adjacent chunks per type
|
|
14
14
|
* 7. Sort by score desc, slice to topK per type
|
|
15
15
|
* 8. Output grouped JSON { code: [...], text: [...], image: [...] } or text with ## headers
|
|
16
16
|
*
|
|
17
|
-
* col-768 holds
|
|
18
|
-
* Over-fetch topK*5 ensures enough candidates after modelId filtering.
|
|
17
|
+
* col-768 holds ALL vectors (code, text, image); they are distinguished by modelId metadata.
|
|
19
18
|
*/
|
|
20
19
|
export async function runQuery(text, options) {
|
|
21
20
|
const topK = parseInt(options.topK, 10);
|
|
@@ -59,7 +58,6 @@ export async function runQuery(text, options) {
|
|
|
59
58
|
}
|
|
60
59
|
else {
|
|
61
60
|
// Pre-detect indexed types from manifest: only load models for types that have data.
|
|
62
|
-
// This avoids loading Jina when only text is indexed (or Nomic when only code is indexed).
|
|
63
61
|
const { EXTENSION_MAP } = await import('../../types.js');
|
|
64
62
|
const indexedTypes = new Set();
|
|
65
63
|
for (const filePath of Object.keys(manifest.files)) {
|
|
@@ -84,7 +82,6 @@ export async function runQuery(text, options) {
|
|
|
84
82
|
// 4. Open vector collections as needed
|
|
85
83
|
const { openCollection } = await import('../../services/vector-db.js');
|
|
86
84
|
const col768 = openCollection(projectDir, 'col-768');
|
|
87
|
-
const col512 = typesToQuery.includes('image') ? openCollection(projectDir, 'col-512') : null;
|
|
88
85
|
try {
|
|
89
86
|
// ── Helpers ──────────────────────────────────────────────────────────────
|
|
90
87
|
const { normalizeResults, filterAndCollapse, filterImageResults } = await import('../../services/query-utils.js');
|
|
@@ -97,11 +94,16 @@ export async function runQuery(text, options) {
|
|
|
97
94
|
let textResults = [];
|
|
98
95
|
let imageResults = [];
|
|
99
96
|
if (typesToQuery.includes('code')) {
|
|
100
|
-
// Code:
|
|
97
|
+
// Code: Qwen3 embedding, filter for Qwen3 modelId
|
|
101
98
|
let pipe = null;
|
|
102
99
|
try {
|
|
100
|
+
if (process.stderr.isTTY)
|
|
101
|
+
process.stderr.write('\r\x1b[Kcode: loading model...');
|
|
103
102
|
pipe = await createEmbeddingPipeline('code');
|
|
104
|
-
|
|
103
|
+
if (process.stderr.isTTY)
|
|
104
|
+
process.stderr.write('\r\x1b[K');
|
|
105
|
+
const prefixedQuery = `Instruct: Given a search query, retrieve relevant code snippets\nQuery: ${text}`;
|
|
106
|
+
const [queryEmbedding] = await pipe.embed([prefixedQuery]);
|
|
105
107
|
let rawResults;
|
|
106
108
|
try {
|
|
107
109
|
rawResults = col768.query(queryEmbedding, fetchCount);
|
|
@@ -110,7 +112,7 @@ export async function runQuery(text, options) {
|
|
|
110
112
|
rawResults = [];
|
|
111
113
|
}
|
|
112
114
|
const normalized = normalizeResults(rawResults);
|
|
113
|
-
codeResults = filterAndCollapse(normalized, (id) => id.includes('
|
|
115
|
+
codeResults = filterAndCollapse(normalized, (id) => id.includes('Qwen3-Embedding'), { threshold, dir: options.dir, topK });
|
|
114
116
|
}
|
|
115
117
|
catch (err) {
|
|
116
118
|
process.stderr.write(`[query] code pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
@@ -121,11 +123,15 @@ export async function runQuery(text, options) {
|
|
|
121
123
|
}
|
|
122
124
|
}
|
|
123
125
|
if (typesToQuery.includes('text')) {
|
|
124
|
-
// Text:
|
|
126
|
+
// Text: Qwen3 embedding with instruct prefix, filter for Qwen3 modelId
|
|
125
127
|
let pipe = null;
|
|
126
128
|
try {
|
|
129
|
+
if (process.stderr.isTTY)
|
|
130
|
+
process.stderr.write('\r\x1b[Ktext: loading model...');
|
|
127
131
|
pipe = await createEmbeddingPipeline('text');
|
|
128
|
-
|
|
132
|
+
if (process.stderr.isTTY)
|
|
133
|
+
process.stderr.write('\r\x1b[K');
|
|
134
|
+
const prefixedQuery = `Instruct: Given a search query, retrieve relevant text passages\nQuery: ${text}`;
|
|
129
135
|
const [queryEmbedding] = await pipe.embed([prefixedQuery]);
|
|
130
136
|
let rawResults;
|
|
131
137
|
try {
|
|
@@ -135,7 +141,7 @@ export async function runQuery(text, options) {
|
|
|
135
141
|
rawResults = [];
|
|
136
142
|
}
|
|
137
143
|
const normalized = normalizeResults(rawResults);
|
|
138
|
-
textResults = filterAndCollapse(normalized, (id) => id.includes('
|
|
144
|
+
textResults = filterAndCollapse(normalized, (id) => id.includes('Qwen3-Embedding'), { threshold, dir: options.dir, topK });
|
|
139
145
|
}
|
|
140
146
|
catch (err) {
|
|
141
147
|
process.stderr.write(`[query] text pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
@@ -145,22 +151,26 @@ export async function runQuery(text, options) {
|
|
|
145
151
|
await pipe.dispose();
|
|
146
152
|
}
|
|
147
153
|
}
|
|
148
|
-
if (typesToQuery.includes('image')
|
|
149
|
-
// Image:
|
|
154
|
+
if (typesToQuery.includes('image')) {
|
|
155
|
+
// Image: SigLIP text embedding, query col-768, filter for siglip modelId
|
|
150
156
|
let pipe = null;
|
|
151
157
|
try {
|
|
152
|
-
|
|
153
|
-
|
|
158
|
+
if (process.stderr.isTTY)
|
|
159
|
+
process.stderr.write('\r\x1b[Kimage: loading model...');
|
|
160
|
+
const { createSiglipTextPipeline } = await import('../../services/image-embedder.js');
|
|
161
|
+
pipe = await createSiglipTextPipeline();
|
|
162
|
+
if (process.stderr.isTTY)
|
|
163
|
+
process.stderr.write('\r\x1b[K');
|
|
154
164
|
const [queryEmbedding] = await pipe.embedText([text]);
|
|
155
165
|
let rawResults;
|
|
156
166
|
try {
|
|
157
|
-
rawResults =
|
|
167
|
+
rawResults = col768.query(queryEmbedding, fetchCount);
|
|
158
168
|
}
|
|
159
169
|
catch {
|
|
160
170
|
rawResults = [];
|
|
161
171
|
}
|
|
162
172
|
const normalized = normalizeResults(rawResults);
|
|
163
|
-
imageResults = filterImageResults(normalized, (id) => id.includes('
|
|
173
|
+
imageResults = filterImageResults(normalized, (id) => id.includes('siglip'), { threshold, dir: options.dir, topK });
|
|
164
174
|
}
|
|
165
175
|
catch (err) {
|
|
166
176
|
process.stderr.write(`[query] image pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
@@ -261,8 +271,6 @@ export async function runQuery(text, options) {
|
|
|
261
271
|
}
|
|
262
272
|
finally {
|
|
263
273
|
col768.close();
|
|
264
|
-
if (col512)
|
|
265
|
-
col512.close();
|
|
266
274
|
}
|
|
267
275
|
}
|
|
268
276
|
catch (err) {
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default progress callback for Transformers.js model downloads.
|
|
3
|
+
*
|
|
4
|
+
* Transformers.js fires download/progress events even for cached models, so we
|
|
5
|
+
* check the cache directory to decide the label:
|
|
6
|
+
* - Cache miss → "Downloading <model> — <file> XX%"
|
|
7
|
+
* - Cache hit → "Loading <model>..."
|
|
8
|
+
*
|
|
9
|
+
* Output goes to stderr and only when running in a TTY.
|
|
10
|
+
*/
|
|
11
|
+
import * as fs from 'fs';
|
|
12
|
+
import * as path from 'path';
|
|
13
|
+
import { resolveModelCachePath } from '../config/paths.js';
|
|
14
|
+
function isModelCached(modelId) {
|
|
15
|
+
// Transformers.js stores models under <cacheDir>/<org>/<repo>/
|
|
16
|
+
// e.g. ~/.ez-search/models/onnx-community/Qwen3-Embedding-0.6B-ONNX/
|
|
17
|
+
const modelDir = path.join(resolveModelCachePath(), ...modelId.split('/'));
|
|
18
|
+
try {
|
|
19
|
+
return fs.statSync(modelDir).isDirectory();
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
export function createDownloadProgressCallback(modelId) {
|
|
26
|
+
const isTTY = !!process.stderr.isTTY;
|
|
27
|
+
if (!isTTY)
|
|
28
|
+
return () => { };
|
|
29
|
+
const cached = isModelCached(modelId);
|
|
30
|
+
// For cached models, show a single "Loading..." and clear on ready
|
|
31
|
+
if (cached) {
|
|
32
|
+
let shown = false;
|
|
33
|
+
return (event) => {
|
|
34
|
+
if (!event || typeof event !== 'object')
|
|
35
|
+
return;
|
|
36
|
+
const e = event;
|
|
37
|
+
if (!shown && e.status === 'initiate') {
|
|
38
|
+
shown = true;
|
|
39
|
+
process.stderr.write(`\r\x1b[KLoading ${modelId}...`);
|
|
40
|
+
}
|
|
41
|
+
if (e.status === 'ready') {
|
|
42
|
+
process.stderr.write('\r\x1b[K');
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
// For uncached models, show per-file download progress
|
|
47
|
+
const downloading = new Set();
|
|
48
|
+
return (event) => {
|
|
49
|
+
if (!event || typeof event !== 'object')
|
|
50
|
+
return;
|
|
51
|
+
const e = event;
|
|
52
|
+
if (e.status === 'download' && e.file) {
|
|
53
|
+
if (!downloading.has(e.file)) {
|
|
54
|
+
downloading.add(e.file);
|
|
55
|
+
process.stderr.write(`\r\x1b[KDownloading ${modelId} — ${e.file}...`);
|
|
56
|
+
}
|
|
57
|
+
else if (typeof e.progress === 'number') {
|
|
58
|
+
process.stderr.write(`\r\x1b[KDownloading ${modelId} — ${e.file} ${Math.round(e.progress)}%`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
else if (e.status === 'done' && e.file) {
|
|
62
|
+
downloading.delete(e.file);
|
|
63
|
+
}
|
|
64
|
+
if (e.status === 'ready') {
|
|
65
|
+
process.stderr.write('\r\x1b[K');
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
}
|
|
@@ -1,27 +1,25 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* SigLIP image embedding service — converts image files to 768-dim Float32Array embeddings.
|
|
3
3
|
*
|
|
4
|
-
* Uses
|
|
5
|
-
* Quantized variants (int8, uint8) fail in onnxruntime-node with:
|
|
6
|
-
* "ConvInteger(10) is not implemented"
|
|
7
|
-
* Therefore, dtype: 'fp32' is REQUIRED and must not be changed.
|
|
4
|
+
* Uses SiglipVisionModel (not the full SigLIP model) with fp32 dtype.
|
|
8
5
|
*
|
|
9
6
|
* Supported formats: .jpg, .jpeg, .png, .webp (anything RawImage can decode).
|
|
10
7
|
*
|
|
11
|
-
* One image produces one
|
|
8
|
+
* One image produces one 768-dim vector — no chunking is performed.
|
|
12
9
|
* Model weights are cached in ~/.ez-search/models/ alongside text/code models.
|
|
13
10
|
*/
|
|
14
|
-
import {
|
|
11
|
+
import { SiglipVisionModel, SiglipTextModel, AutoProcessor, AutoTokenizer, RawImage, env } from '@huggingface/transformers';
|
|
15
12
|
import { resolveModelCachePath } from '../config/paths.js';
|
|
13
|
+
import { createDownloadProgressCallback } from './download-progress.js';
|
|
16
14
|
// ── Constants ─────────────────────────────────────────────────────────────────
|
|
17
|
-
const
|
|
18
|
-
const
|
|
15
|
+
const SIGLIP_MODEL_ID = 'Xenova/siglip-base-patch16-224';
|
|
16
|
+
const SIGLIP_DIM = 768;
|
|
19
17
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
20
18
|
/**
|
|
21
19
|
* L2-normalize a vector in-place.
|
|
22
20
|
*
|
|
23
|
-
*
|
|
24
|
-
* normalize their output — only the full
|
|
21
|
+
* SiglipVisionModel and SiglipTextModel do NOT
|
|
22
|
+
* normalize their output — only the full SigLIP model does. Without this,
|
|
25
23
|
* cosine distances in Zvec are meaningless (all scores collapse to ~0.21).
|
|
26
24
|
*/
|
|
27
25
|
function l2Normalize(vec) {
|
|
@@ -36,41 +34,38 @@ function l2Normalize(vec) {
|
|
|
36
34
|
}
|
|
37
35
|
// ── Public API ────────────────────────────────────────────────────────────────
|
|
38
36
|
/**
|
|
39
|
-
* Create an ImageEmbeddingPipeline backed by
|
|
37
|
+
* Create an ImageEmbeddingPipeline backed by SigLIP ViT-B/16 (fp32).
|
|
40
38
|
*
|
|
41
|
-
* Loads the AutoProcessor and
|
|
39
|
+
* Loads the AutoProcessor and SiglipVisionModel in parallel.
|
|
42
40
|
* Model weights are cached in ~/.ez-search/models/.
|
|
43
|
-
*
|
|
44
|
-
* IMPORTANT: dtype must remain 'fp32'. Quantized variants fail in Node.js with
|
|
45
|
-
* "ConvInteger(10) is not implemented" from onnxruntime-node.
|
|
46
41
|
*/
|
|
47
42
|
export async function createImageEmbeddingPipeline() {
|
|
48
43
|
// Set cache dir BEFORE first model load — this is critical
|
|
49
44
|
env.cacheDir = resolveModelCachePath();
|
|
50
45
|
env.allowRemoteModels = true;
|
|
46
|
+
const cb = createDownloadProgressCallback(SIGLIP_MODEL_ID);
|
|
51
47
|
// Load processor and vision model in parallel for faster startup
|
|
52
48
|
const [processor, visionModel] = await Promise.all([
|
|
53
|
-
AutoProcessor.from_pretrained(
|
|
54
|
-
|
|
55
|
-
// fp32 is REQUIRED — do not use 'int8', 'uint8', or other quantized dtypes.
|
|
56
|
-
// onnxruntime-node does not implement ConvInteger(10), which quantized CLIP uses.
|
|
49
|
+
AutoProcessor.from_pretrained(SIGLIP_MODEL_ID, { progress_callback: cb }),
|
|
50
|
+
SiglipVisionModel.from_pretrained(SIGLIP_MODEL_ID, {
|
|
57
51
|
dtype: 'fp32',
|
|
52
|
+
progress_callback: cb,
|
|
58
53
|
}),
|
|
59
54
|
]);
|
|
60
|
-
console.error(`[image-embedder] Loaded
|
|
55
|
+
console.error(`[image-embedder] Loaded SigLIP vision model (fp32)`);
|
|
61
56
|
return {
|
|
62
|
-
modelId:
|
|
63
|
-
dim:
|
|
57
|
+
modelId: SIGLIP_MODEL_ID,
|
|
58
|
+
dim: SIGLIP_DIM,
|
|
64
59
|
async embedImage(buf) {
|
|
65
60
|
// Use fromBlob instead of file:// URLs to avoid encoding issues with
|
|
66
61
|
// special Unicode characters in filenames (e.g. macOS narrow no-break spaces).
|
|
67
62
|
const image = await RawImage.fromBlob(new Blob([new Uint8Array(buf)]));
|
|
68
|
-
// Preprocess: resize, normalize, convert to tensor expected by
|
|
63
|
+
// Preprocess: resize, normalize, convert to tensor expected by SigLIP
|
|
69
64
|
const inputs = await processor(image);
|
|
70
|
-
// Run the vision encoder — output.
|
|
65
|
+
// Run the vision encoder — output.pooler_output is a [1, 768] Tensor
|
|
71
66
|
const output = await visionModel(inputs);
|
|
72
67
|
// Extract and L2-normalize (projection models don't normalize)
|
|
73
|
-
return l2Normalize(new Float32Array(output.
|
|
68
|
+
return l2Normalize(new Float32Array(output.pooler_output.data.slice(0, SIGLIP_DIM)));
|
|
74
69
|
},
|
|
75
70
|
async dispose() {
|
|
76
71
|
if (typeof visionModel.dispose === 'function') {
|
|
@@ -80,30 +75,31 @@ export async function createImageEmbeddingPipeline() {
|
|
|
80
75
|
};
|
|
81
76
|
}
|
|
82
77
|
/**
|
|
83
|
-
* Create a
|
|
78
|
+
* Create a SiglipTextPipeline backed by SigLIP ViT-B/16 (fp32).
|
|
84
79
|
*
|
|
85
|
-
* Loads AutoTokenizer and
|
|
86
|
-
* Used for text-to-image search: encode query text into
|
|
80
|
+
* Loads AutoTokenizer and SiglipTextModel in parallel.
|
|
81
|
+
* Used for text-to-image search: encode query text into SigLIP's 768-dim space,
|
|
87
82
|
* then find nearest image embeddings.
|
|
88
83
|
*/
|
|
89
|
-
export async function
|
|
84
|
+
export async function createSiglipTextPipeline() {
|
|
90
85
|
env.cacheDir = resolveModelCachePath();
|
|
91
86
|
env.allowRemoteModels = true;
|
|
87
|
+
const cb = createDownloadProgressCallback(SIGLIP_MODEL_ID);
|
|
92
88
|
const [tokenizer, textModel] = await Promise.all([
|
|
93
|
-
AutoTokenizer.from_pretrained(
|
|
94
|
-
|
|
89
|
+
AutoTokenizer.from_pretrained(SIGLIP_MODEL_ID, { progress_callback: cb }),
|
|
90
|
+
SiglipTextModel.from_pretrained(SIGLIP_MODEL_ID, { dtype: 'fp32', progress_callback: cb }),
|
|
95
91
|
]);
|
|
96
|
-
console.error(`[image-embedder] Loaded
|
|
92
|
+
console.error(`[image-embedder] Loaded SigLIP text model (fp32)`);
|
|
97
93
|
return {
|
|
98
|
-
modelId:
|
|
99
|
-
dim:
|
|
94
|
+
modelId: SIGLIP_MODEL_ID,
|
|
95
|
+
dim: SIGLIP_DIM,
|
|
100
96
|
async embedText(texts) {
|
|
101
97
|
const inputs = tokenizer(texts, { padding: true, truncation: true });
|
|
102
98
|
const output = await textModel(inputs);
|
|
103
|
-
const data = output.
|
|
99
|
+
const data = output.pooler_output.data;
|
|
104
100
|
const embeddings = [];
|
|
105
101
|
for (let i = 0; i < texts.length; i++) {
|
|
106
|
-
embeddings.push(l2Normalize(new Float32Array(data.slice(i *
|
|
102
|
+
embeddings.push(l2Normalize(new Float32Array(data.slice(i * SIGLIP_DIM, (i + 1) * SIGLIP_DIM))));
|
|
107
103
|
}
|
|
108
104
|
return embeddings;
|
|
109
105
|
},
|
|
@@ -13,7 +13,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, mkdirSync } from '
|
|
|
13
13
|
import * as path from 'path';
|
|
14
14
|
import { resolveProjectStoragePath } from '../config/paths.js';
|
|
15
15
|
// ── Constants ─────────────────────────────────────────────────────────────────
|
|
16
|
-
export const MANIFEST_VERSION =
|
|
16
|
+
export const MANIFEST_VERSION = 5;
|
|
17
17
|
export const MANIFEST_FILENAME = 'manifest.json';
|
|
18
18
|
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
19
19
|
function manifestPath(projectDir) {
|
|
@@ -6,32 +6,25 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Model cache is stored in ~/.ez-search/models/ (not the default HuggingFace cache).
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
9
|
+
* Both code and text use Qwen3-Embedding-0.6B. Output is truncated from 1024 to 768 dims
|
|
10
|
+
* via Matryoshka Representation Learning, then L2-normalized.
|
|
11
|
+
*
|
|
12
|
+
* Query prefixing (Instruct/Query format) is the caller's responsibility.
|
|
13
13
|
*/
|
|
14
14
|
import { pipeline, env } from '@huggingface/transformers';
|
|
15
15
|
import { resolveModelCachePath } from '../config/paths.js';
|
|
16
|
+
import { createDownloadProgressCallback } from './download-progress.js';
|
|
16
17
|
// ── Model registry ────────────────────────────────────────────────────────────
|
|
17
18
|
const MODEL_REGISTRY = {
|
|
18
19
|
code: {
|
|
19
|
-
id: '
|
|
20
|
+
id: 'onnx-community/Qwen3-Embedding-0.6B-ONNX',
|
|
21
|
+
nativeDim: 1024,
|
|
20
22
|
dim: 768,
|
|
21
23
|
},
|
|
22
24
|
text: {
|
|
23
|
-
id: '
|
|
25
|
+
id: 'onnx-community/Qwen3-Embedding-0.6B-ONNX',
|
|
26
|
+
nativeDim: 1024,
|
|
24
27
|
dim: 768,
|
|
25
|
-
/**
|
|
26
|
-
* Nomic requires task prefixes on all inputs:
|
|
27
|
-
* document: "search_document: <text>"
|
|
28
|
-
* query: "search_query: <text>"
|
|
29
|
-
* The embed() method does NOT add these — callers must prefix their strings.
|
|
30
|
-
*/
|
|
31
|
-
taskPrefix: {
|
|
32
|
-
document: 'search_document: ',
|
|
33
|
-
query: 'search_query: ',
|
|
34
|
-
},
|
|
35
28
|
},
|
|
36
29
|
};
|
|
37
30
|
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
@@ -49,6 +42,16 @@ function extractEmbedding(output) {
|
|
|
49
42
|
}
|
|
50
43
|
throw new Error(`Unexpected embedding output shape: ${JSON.stringify(output)}`);
|
|
51
44
|
}
|
|
45
|
+
function l2Normalize(vec) {
|
|
46
|
+
let norm = 0;
|
|
47
|
+
for (let i = 0; i < vec.length; i++)
|
|
48
|
+
norm += vec[i] * vec[i];
|
|
49
|
+
norm = Math.sqrt(norm);
|
|
50
|
+
if (norm > 0)
|
|
51
|
+
for (let i = 0; i < vec.length; i++)
|
|
52
|
+
vec[i] /= norm;
|
|
53
|
+
return vec;
|
|
54
|
+
}
|
|
52
55
|
// ── Public API ────────────────────────────────────────────────────────────────
|
|
53
56
|
/**
|
|
54
57
|
* Create an EmbeddingPipeline for the given model type.
|
|
@@ -58,12 +61,11 @@ function extractEmbedding(output) {
|
|
|
58
61
|
*
|
|
59
62
|
* Model weights are cached in ~/.ez-search/models/ (set before first pipeline() call).
|
|
60
63
|
*
|
|
61
|
-
* @param modelType - 'code'
|
|
62
|
-
* 'text' for nomic-ai/nomic-embed-text-v1.5 (768-dim, prefixes required)
|
|
64
|
+
* @param modelType - 'code' or 'text', both backed by Qwen3-Embedding-0.6B (768-dim after truncation)
|
|
63
65
|
*/
|
|
64
66
|
export async function createEmbeddingPipeline(modelType, options = {}) {
|
|
65
67
|
const model = MODEL_REGISTRY[modelType];
|
|
66
|
-
const
|
|
68
|
+
const cb = options.progressCallback ?? createDownloadProgressCallback(model.id);
|
|
67
69
|
// Set cache dir BEFORE first pipeline() call — this is critical
|
|
68
70
|
env.cacheDir = resolveModelCachePath();
|
|
69
71
|
env.allowRemoteModels = true;
|
|
@@ -74,7 +76,7 @@ export async function createEmbeddingPipeline(modelType, options = {}) {
|
|
|
74
76
|
pipe = await pipeline('feature-extraction', model.id, {
|
|
75
77
|
device: 'webgpu',
|
|
76
78
|
dtype: 'fp32',
|
|
77
|
-
|
|
79
|
+
progress_callback: cb,
|
|
78
80
|
});
|
|
79
81
|
backend = 'webgpu';
|
|
80
82
|
console.error(`[model-router] Using WebGPU for ${model.id}`);
|
|
@@ -86,7 +88,7 @@ export async function createEmbeddingPipeline(modelType, options = {}) {
|
|
|
86
88
|
pipe = await pipeline('feature-extraction', model.id, {
|
|
87
89
|
device: 'cpu',
|
|
88
90
|
dtype: 'q8',
|
|
89
|
-
|
|
91
|
+
progress_callback: cb,
|
|
90
92
|
});
|
|
91
93
|
backend = 'cpu';
|
|
92
94
|
console.error(`[model-router] Using CPU for ${model.id}`);
|
|
@@ -97,7 +99,11 @@ export async function createEmbeddingPipeline(modelType, options = {}) {
|
|
|
97
99
|
dim: model.dim,
|
|
98
100
|
async embed(texts) {
|
|
99
101
|
const outputs = await Promise.all(texts.map((text) => pipe(text, { pooling: 'mean', normalize: true })));
|
|
100
|
-
return outputs.map(
|
|
102
|
+
return outputs.map((output) => {
|
|
103
|
+
const raw = extractEmbedding(output);
|
|
104
|
+
const truncated = new Float32Array(raw.buffer, raw.byteOffset, model.dim);
|
|
105
|
+
return l2Normalize(new Float32Array(truncated));
|
|
106
|
+
});
|
|
101
107
|
},
|
|
102
108
|
async dispose() {
|
|
103
109
|
if (pipe && typeof pipe.dispose === 'function') {
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
* Vector DB service — wraps @zvec/zvec behind a clean interface.
|
|
3
3
|
*
|
|
4
4
|
* Uses createRequire because @zvec/zvec is a CommonJS package in an ESM project.
|
|
5
|
-
*
|
|
6
|
-
* col-768 — for
|
|
7
|
-
* col-512 — for image embeddings (CLIP, 512-dim)
|
|
5
|
+
* Single collection per project:
|
|
6
|
+
* col-768 — for all embeddings (code, text, image — all 768-dim)
|
|
8
7
|
*
|
|
9
8
|
* Storage lives at <project>/.ez-search/ (project-scoped).
|
|
10
9
|
*/
|
|
@@ -17,7 +16,7 @@ const { ZVecCreateAndOpen, ZVecOpen, ZVecCollectionSchema, ZVecDataType, ZVecInd
|
|
|
17
16
|
// Initialize Zvec at module level — suppress noisy logs
|
|
18
17
|
ZVecInitialize({ logLevel: ZVecLogLevel.WARN });
|
|
19
18
|
// ── Schema versioning ─────────────────────────────────────────────────────────
|
|
20
|
-
const SCHEMA_VERSION =
|
|
19
|
+
const SCHEMA_VERSION = 3;
|
|
21
20
|
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
22
21
|
/**
|
|
23
22
|
* Validate that an ID doesn't contain colons (Zvec rejects them).
|
|
@@ -144,31 +143,16 @@ function createCollection(storageDir, name, dim) {
|
|
|
144
143
|
},
|
|
145
144
|
};
|
|
146
145
|
}
|
|
147
|
-
/**
|
|
148
|
-
* Open both vector collections for a project.
|
|
149
|
-
*
|
|
150
|
-
* Storage layout:
|
|
151
|
-
* <projectDir>/.ez-search/col-768/ (768-dim, code/text)
|
|
152
|
-
* <projectDir>/.ez-search/col-512/ (512-dim, images)
|
|
153
|
-
*
|
|
154
|
-
* Creates the storage directory if it does not exist.
|
|
155
|
-
*/
|
|
156
146
|
export function openProjectCollections(projectDir) {
|
|
157
147
|
const storageDir = resolveProjectStoragePath(projectDir);
|
|
158
148
|
mkdirSync(storageDir, { recursive: true });
|
|
159
149
|
ensureSchemaVersion(storageDir);
|
|
160
150
|
const col768 = createCollection(storageDir, 'col-768', 768);
|
|
161
|
-
|
|
162
|
-
return { col768, col512, storagePath: storageDir };
|
|
151
|
+
return { col768, storagePath: storageDir };
|
|
163
152
|
}
|
|
164
|
-
/**
|
|
165
|
-
* Open a single vector collection by name.
|
|
166
|
-
* Use this when you only need one collection (e.g. query only needs col-768).
|
|
167
|
-
*/
|
|
168
153
|
export function openCollection(projectDir, name) {
|
|
169
154
|
const storageDir = resolveProjectStoragePath(projectDir);
|
|
170
155
|
mkdirSync(storageDir, { recursive: true });
|
|
171
156
|
ensureSchemaVersion(storageDir);
|
|
172
|
-
|
|
173
|
-
return createCollection(storageDir, name, dim);
|
|
157
|
+
return createCollection(storageDir, name, 768);
|
|
174
158
|
}
|