@ez-corp/ez-search 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,23 +2,23 @@
2
2
  * Index command — end-to-end pipeline: scan -> manifest check -> chunk -> embed -> store.
3
3
  *
4
4
  * Pipeline flow (per type):
5
- * 1. Resolve path and open vector collections
5
+ * 1. Resolve path and open vector collection
6
6
  * 2. Handle --clear (wipe storage + manifest)
7
7
  * 3. Load manifest (incremental cache)
8
8
  * 4. For each type in [code, text, image]:
9
9
  * a. Scan files of that type
10
10
  * b. Detect changed/new/deleted files against manifest
11
- * c. Remove deleted files' chunks from the appropriate collection
11
+ * c. Remove deleted files' chunks from col-768
12
12
  * d. Chunk changed/new files
13
13
  * e. Batch embed with the correct model
14
- * f. Insert embeddings into the appropriate collection
15
- * 5. Optimize collections THEN save manifest (order matters)
14
+ * f. Insert embeddings into col-768
15
+ * 5. Optimize collection THEN save manifest (order matters)
16
16
  * 6. Dispose pipelines and output results
17
17
  *
18
18
  * Model routing:
19
- * code -> jinaai/jina-embeddings-v2-base-code, col-768
20
- * text -> nomic-ai/nomic-embed-text-v1.5, col-768 (prefix: "search_document: ")
21
- * image -> Xenova/clip-vit-base-patch16, col-512 (one vector per file)
19
+ * code -> onnx-community/Qwen3-Embedding-0.6B-ONNX, col-768
20
+ * text -> onnx-community/Qwen3-Embedding-0.6B-ONNX, col-768
21
+ * image -> Xenova/siglip-base-patch16-224, col-768
22
22
  */
23
23
  import * as path from 'path';
24
24
  import * as fsp from 'fs/promises';
@@ -195,14 +195,12 @@ async function runTextEmbeddingPipeline(opts) {
195
195
  progress.update(`${type}: loading model...`);
196
196
  const { createEmbeddingPipeline } = await import('../../services/model-router.js');
197
197
  pipe = await createEmbeddingPipeline(type);
198
- // Nomic requires "search_document: " prefix on indexed documents
199
- const prefix = type === 'text' ? 'search_document: ' : '';
200
198
  const totalBatches = Math.ceil(allPendingChunks.length / BATCH_SIZE);
201
199
  for (let batchStart = 0; batchStart < allPendingChunks.length; batchStart += BATCH_SIZE) {
202
200
  const batchNum = Math.floor(batchStart / BATCH_SIZE) + 1;
203
201
  progress.update(`${type}: embedding`, batchNum, totalBatches);
204
202
  const batch = allPendingChunks.slice(batchStart, batchStart + BATCH_SIZE);
205
- const texts = batch.map((c) => prefix + c.text);
203
+ const texts = batch.map((c) => c.text);
206
204
  const embeddings = await pipe.embed(texts);
207
205
  for (let i = 0; i < batch.length; i++) {
208
206
  const chunk = batch[i];
@@ -212,7 +210,7 @@ async function runTextEmbeddingPipeline(opts) {
212
210
  modelId: pipe.modelId,
213
211
  lineStart: chunk.lineStart,
214
212
  lineEnd: chunk.lineEnd,
215
- chunkText: chunk.text, // store without prefix
213
+ chunkText: chunk.text,
216
214
  });
217
215
  chunksCreated++;
218
216
  }
@@ -235,23 +233,21 @@ export async function runIndex(targetPath, options) {
235
233
  const { ProgressReporter } = await import('../progress.js');
236
234
  const progress = new ProgressReporter({
237
235
  quiet: options.quiet,
238
- json: options.format !== 'text',
236
+ json: options.format === 'json',
239
237
  });
240
238
  try {
241
239
  // 1. Resolve path
242
240
  const absPath = path.resolve(targetPath);
243
241
  // 2. Open vector collections
244
242
  const { openProjectCollections } = await import('../../services/vector-db.js');
245
- let { col768, col512, storagePath } = openProjectCollections(absPath);
243
+ let { col768, storagePath } = openProjectCollections(absPath);
246
244
  // 3. Handle --clear
247
245
  // rmSync removes .ez-search/ entirely (including manifest.json inside it)
248
246
  if (options.clear) {
249
247
  col768.close();
250
- col512.close();
251
248
  rmSync(storagePath, { recursive: true, force: true });
252
249
  const reopened = openProjectCollections(absPath);
253
250
  col768 = reopened.col768;
254
- col512 = reopened.col512;
255
251
  storagePath = reopened.storagePath;
256
252
  }
257
253
  // 4. Load manifest and helpers
@@ -272,7 +268,6 @@ export async function runIndex(targetPath, options) {
272
268
  const allDeletedPaths = [];
273
269
  // Per-type file counts for text output
274
270
  const typeFileCounts = {};
275
- let imageFilesProcessed = false;
276
271
  for (const fileType of typesToIndex) {
277
272
  // Scan files of this type
278
273
  const scannedFiles = [];
@@ -329,7 +324,7 @@ export async function runIndex(targetPath, options) {
329
324
  }
330
325
  }
331
326
  else if (fileType === 'image') {
332
- // Image pipeline: one vector per file, goes into col-512
327
+ // Image pipeline: one vector per file, goes into col-768
333
328
  const { EXTENSION_MAP } = await import('../../types.js');
334
329
  const deletedPaths = Object.keys(manifest.files).filter((relPath) => {
335
330
  if (scannedSet.has(relPath))
@@ -340,7 +335,7 @@ export async function runIndex(targetPath, options) {
340
335
  for (const deletedPath of deletedPaths) {
341
336
  const entry = manifest.files[deletedPath];
342
337
  for (const chunk of entry.chunks) {
343
- col512.remove(chunk.id);
338
+ col768.remove(chunk.id);
344
339
  totalChunksRemoved++;
345
340
  }
346
341
  delete manifest.files[deletedPath];
@@ -379,7 +374,7 @@ export async function runIndex(targetPath, options) {
379
374
  const fileHash = hashContent(buf);
380
375
  const embedding = await imagePipeline.embedImage(buf);
381
376
  const chunkId = makeChunkId(file.relativePath, 0);
382
- col512.insert(chunkId, embedding, {
377
+ col768.insert(chunkId, embedding, {
383
378
  filePath: file.relativePath,
384
379
  chunkIndex: 0,
385
380
  modelId: imagePipeline.modelId,
@@ -397,7 +392,6 @@ export async function runIndex(targetPath, options) {
397
392
  totalFilesIndexed++;
398
393
  }
399
394
  await imagePipeline.dispose();
400
- imageFilesProcessed = true;
401
395
  typeFileCounts['image'] = (typeFileCounts['image'] ?? 0) + filesToProcess.length;
402
396
  }
403
397
  }
@@ -412,10 +406,6 @@ export async function runIndex(targetPath, options) {
412
406
  progress.update('optimizing index...');
413
407
  col768.optimize();
414
408
  col768.close();
415
- if (imageFilesProcessed) {
416
- col512.optimize();
417
- }
418
- col512.close();
419
409
  saveManifest(absPath, manifest);
420
410
  progress.done();
421
411
  // 7. Output results
@@ -1,21 +1,20 @@
1
1
  /**
2
- * Query command — multi-collection grouped semantic search.
2
+ * Query command — single-collection grouped semantic search.
3
3
  *
4
4
  * Pipeline:
5
5
  * 1. Resolve project directory (cwd)
6
- * 2. Open vector collections (col-768 for code/text, col-512 for images)
6
+ * 2. Open vector collection (col-768 for all types)
7
7
  * 3. Load manifest for totalIndexed count
8
8
  * 4. For each requested type:
9
- * a. code: embed with Jina, over-fetch topK*5 from col-768, filter by jina modelId
10
- * b. text: embed with Nomic ("search_query: " prefix), over-fetch topK*5 from col-768, filter by nomic modelId
11
- * c. image: embed with CLIP text encoder, over-fetch topK*5 from col-512, filter by clip modelId
9
+ * a. code: embed with Qwen3 (instruct prefix), query col-768, filter by Qwen3 modelId
10
+ * b. text: embed with Qwen3 (instruct prefix), query col-768, filter by Qwen3 modelId
11
+ * c. image: embed with SigLIP text encoder, query col-768, filter by siglip modelId
12
12
  * 5. Apply --threshold and --dir filters per type
13
13
  * 6. Collapse adjacent chunks per type
14
14
  * 7. Sort by score desc, slice to topK per type
15
15
  * 8. Output grouped JSON { code: [...], text: [...], image: [...] } or text with ## headers
16
16
  *
17
- * col-768 holds BOTH code and text vectors; they are distinguished by modelId metadata.
18
- * Over-fetch topK*5 ensures enough candidates after modelId filtering.
17
+ * col-768 holds ALL vectors (code, text, image); they are distinguished by modelId metadata.
19
18
  */
20
19
  export async function runQuery(text, options) {
21
20
  const topK = parseInt(options.topK, 10);
@@ -59,7 +58,6 @@ export async function runQuery(text, options) {
59
58
  }
60
59
  else {
61
60
  // Pre-detect indexed types from manifest: only load models for types that have data.
62
- // This avoids loading Jina when only text is indexed (or Nomic when only code is indexed).
63
61
  const { EXTENSION_MAP } = await import('../../types.js');
64
62
  const indexedTypes = new Set();
65
63
  for (const filePath of Object.keys(manifest.files)) {
@@ -84,7 +82,6 @@ export async function runQuery(text, options) {
84
82
  // 4. Open vector collections as needed
85
83
  const { openCollection } = await import('../../services/vector-db.js');
86
84
  const col768 = openCollection(projectDir, 'col-768');
87
- const col512 = typesToQuery.includes('image') ? openCollection(projectDir, 'col-512') : null;
88
85
  try {
89
86
  // ── Helpers ──────────────────────────────────────────────────────────────
90
87
  const { normalizeResults, filterAndCollapse, filterImageResults } = await import('../../services/query-utils.js');
@@ -97,11 +94,16 @@ export async function runQuery(text, options) {
97
94
  let textResults = [];
98
95
  let imageResults = [];
99
96
  if (typesToQuery.includes('code')) {
100
- // Code: Jina embedding, filter for jina modelId
97
+ // Code: Qwen3 embedding, filter for Qwen3 modelId
101
98
  let pipe = null;
102
99
  try {
100
+ if (process.stderr.isTTY)
101
+ process.stderr.write('\r\x1b[Kcode: loading model...');
103
102
  pipe = await createEmbeddingPipeline('code');
104
- const [queryEmbedding] = await pipe.embed([text]);
103
+ if (process.stderr.isTTY)
104
+ process.stderr.write('\r\x1b[K');
105
+ const prefixedQuery = `Instruct: Given a search query, retrieve relevant code snippets\nQuery: ${text}`;
106
+ const [queryEmbedding] = await pipe.embed([prefixedQuery]);
105
107
  let rawResults;
106
108
  try {
107
109
  rawResults = col768.query(queryEmbedding, fetchCount);
@@ -110,7 +112,7 @@ export async function runQuery(text, options) {
110
112
  rawResults = [];
111
113
  }
112
114
  const normalized = normalizeResults(rawResults);
113
- codeResults = filterAndCollapse(normalized, (id) => id.includes('jina') || id.startsWith('jinaai/'), { threshold, dir: options.dir, topK });
115
+ codeResults = filterAndCollapse(normalized, (id) => id.includes('Qwen3-Embedding'), { threshold, dir: options.dir, topK });
114
116
  }
115
117
  catch (err) {
116
118
  process.stderr.write(`[query] code pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
@@ -121,11 +123,15 @@ export async function runQuery(text, options) {
121
123
  }
122
124
  }
123
125
  if (typesToQuery.includes('text')) {
124
- // Text: Nomic embedding with "search_query: " prefix, filter for nomic modelId
126
+ // Text: Qwen3 embedding with instruct prefix, filter for Qwen3 modelId
125
127
  let pipe = null;
126
128
  try {
129
+ if (process.stderr.isTTY)
130
+ process.stderr.write('\r\x1b[Ktext: loading model...');
127
131
  pipe = await createEmbeddingPipeline('text');
128
- const prefixedQuery = `search_query: ${text}`;
132
+ if (process.stderr.isTTY)
133
+ process.stderr.write('\r\x1b[K');
134
+ const prefixedQuery = `Instruct: Given a search query, retrieve relevant text passages\nQuery: ${text}`;
129
135
  const [queryEmbedding] = await pipe.embed([prefixedQuery]);
130
136
  let rawResults;
131
137
  try {
@@ -135,7 +141,7 @@ export async function runQuery(text, options) {
135
141
  rawResults = [];
136
142
  }
137
143
  const normalized = normalizeResults(rawResults);
138
- textResults = filterAndCollapse(normalized, (id) => id.includes('nomic'), { threshold, dir: options.dir, topK });
144
+ textResults = filterAndCollapse(normalized, (id) => id.includes('Qwen3-Embedding'), { threshold, dir: options.dir, topK });
139
145
  }
140
146
  catch (err) {
141
147
  process.stderr.write(`[query] text pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
@@ -145,22 +151,26 @@ export async function runQuery(text, options) {
145
151
  await pipe.dispose();
146
152
  }
147
153
  }
148
- if (typesToQuery.includes('image') && col512) {
149
- // Image: CLIP text embedding, query col-512, filter for clip modelId
154
+ if (typesToQuery.includes('image')) {
155
+ // Image: SigLIP text embedding, query col-768, filter for siglip modelId
150
156
  let pipe = null;
151
157
  try {
152
- const { createClipTextPipeline } = await import('../../services/image-embedder.js');
153
- pipe = await createClipTextPipeline();
158
+ if (process.stderr.isTTY)
159
+ process.stderr.write('\r\x1b[Kimage: loading model...');
160
+ const { createSiglipTextPipeline } = await import('../../services/image-embedder.js');
161
+ pipe = await createSiglipTextPipeline();
162
+ if (process.stderr.isTTY)
163
+ process.stderr.write('\r\x1b[K');
154
164
  const [queryEmbedding] = await pipe.embedText([text]);
155
165
  let rawResults;
156
166
  try {
157
- rawResults = col512.query(queryEmbedding, fetchCount);
167
+ rawResults = col768.query(queryEmbedding, fetchCount);
158
168
  }
159
169
  catch {
160
170
  rawResults = [];
161
171
  }
162
172
  const normalized = normalizeResults(rawResults);
163
- imageResults = filterImageResults(normalized, (id) => id.includes('clip'), { threshold, dir: options.dir, topK });
173
+ imageResults = filterImageResults(normalized, (id) => id.includes('siglip'), { threshold, dir: options.dir, topK });
164
174
  }
165
175
  catch (err) {
166
176
  process.stderr.write(`[query] image pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
@@ -261,8 +271,6 @@ export async function runQuery(text, options) {
261
271
  }
262
272
  finally {
263
273
  col768.close();
264
- if (col512)
265
- col512.close();
266
274
  }
267
275
  }
268
276
  catch (err) {
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Default progress callback for Transformers.js model downloads.
3
+ *
4
+ * Transformers.js fires download/progress events even for cached models, so we
5
+ * check the cache directory to decide the label:
6
+ * - Cache miss → "Downloading <model> — <file> XX%"
7
+ * - Cache hit → "Loading <model>..."
8
+ *
9
+ * Output goes to stderr and only when running in a TTY.
10
+ */
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { resolveModelCachePath } from '../config/paths.js';
14
+ function isModelCached(modelId) {
15
+ // Transformers.js stores models under <cacheDir>/<org>/<repo>/
16
+ // e.g. ~/.ez-search/models/onnx-community/Qwen3-Embedding-0.6B-ONNX/
17
+ const modelDir = path.join(resolveModelCachePath(), ...modelId.split('/'));
18
+ try {
19
+ return fs.statSync(modelDir).isDirectory();
20
+ }
21
+ catch {
22
+ return false;
23
+ }
24
+ }
25
+ export function createDownloadProgressCallback(modelId) {
26
+ const isTTY = !!process.stderr.isTTY;
27
+ if (!isTTY)
28
+ return () => { };
29
+ const cached = isModelCached(modelId);
30
+ // For cached models, show a single "Loading..." and clear on ready
31
+ if (cached) {
32
+ let shown = false;
33
+ return (event) => {
34
+ if (!event || typeof event !== 'object')
35
+ return;
36
+ const e = event;
37
+ if (!shown && e.status === 'initiate') {
38
+ shown = true;
39
+ process.stderr.write(`\r\x1b[KLoading ${modelId}...`);
40
+ }
41
+ if (e.status === 'ready') {
42
+ process.stderr.write('\r\x1b[K');
43
+ }
44
+ };
45
+ }
46
+ // For uncached models, show per-file download progress
47
+ const downloading = new Set();
48
+ return (event) => {
49
+ if (!event || typeof event !== 'object')
50
+ return;
51
+ const e = event;
52
+ if (e.status === 'download' && e.file) {
53
+ if (!downloading.has(e.file)) {
54
+ downloading.add(e.file);
55
+ process.stderr.write(`\r\x1b[KDownloading ${modelId} — ${e.file}...`);
56
+ }
57
+ else if (typeof e.progress === 'number') {
58
+ process.stderr.write(`\r\x1b[KDownloading ${modelId} — ${e.file} ${Math.round(e.progress)}%`);
59
+ }
60
+ }
61
+ else if (e.status === 'done' && e.file) {
62
+ downloading.delete(e.file);
63
+ }
64
+ if (e.status === 'ready') {
65
+ process.stderr.write('\r\x1b[K');
66
+ }
67
+ };
68
+ }
@@ -1,27 +1,25 @@
1
1
  /**
2
- * CLIP image embedding service — converts image files to 512-dim Float32Array embeddings.
2
+ * SigLIP image embedding service — converts image files to 768-dim Float32Array embeddings.
3
3
  *
4
- * Uses CLIPVisionModelWithProjection (not the full CLIP model) with fp32 dtype.
5
- * Quantized variants (int8, uint8) fail in onnxruntime-node with:
6
- * "ConvInteger(10) is not implemented"
7
- * Therefore, dtype: 'fp32' is REQUIRED and must not be changed.
4
+ * Uses SiglipVisionModel (not the full SigLIP model) with fp32 dtype.
8
5
  *
9
6
  * Supported formats: .jpg, .jpeg, .png, .webp (anything RawImage can decode).
10
7
  *
11
- * One image produces one 512-dim vector — no chunking is performed.
8
+ * One image produces one 768-dim vector — no chunking is performed.
12
9
  * Model weights are cached in ~/.ez-search/models/ alongside text/code models.
13
10
  */
14
- import { CLIPVisionModelWithProjection, CLIPTextModelWithProjection, AutoProcessor, AutoTokenizer, RawImage, env } from '@huggingface/transformers';
11
+ import { SiglipVisionModel, SiglipTextModel, AutoProcessor, AutoTokenizer, RawImage, env } from '@huggingface/transformers';
15
12
  import { resolveModelCachePath } from '../config/paths.js';
13
+ import { createDownloadProgressCallback } from './download-progress.js';
16
14
  // ── Constants ─────────────────────────────────────────────────────────────────
17
- const CLIP_MODEL_ID = 'Xenova/clip-vit-base-patch16';
18
- const CLIP_DIM = 512;
15
+ const SIGLIP_MODEL_ID = 'Xenova/siglip-base-patch16-224';
16
+ const SIGLIP_DIM = 768;
19
17
  // ── Helpers ──────────────────────────────────────────────────────────────────
20
18
  /**
21
19
  * L2-normalize a vector in-place.
22
20
  *
23
- * CLIPVisionModelWithProjection and CLIPTextModelWithProjection do NOT
24
- * normalize their output — only the full CLIPModel does. Without this,
21
+ * SiglipVisionModel and SiglipTextModel do NOT
22
+ * normalize their output — only the full SigLIP model does. Without this,
25
23
  * cosine distances in Zvec are meaningless (all scores collapse to ~0.21).
26
24
  */
27
25
  function l2Normalize(vec) {
@@ -36,41 +34,38 @@ function l2Normalize(vec) {
36
34
  }
37
35
  // ── Public API ────────────────────────────────────────────────────────────────
38
36
  /**
39
- * Create an ImageEmbeddingPipeline backed by CLIP ViT-B/16 (fp32).
37
+ * Create an ImageEmbeddingPipeline backed by SigLIP ViT-B/16 (fp32).
40
38
  *
41
- * Loads the AutoProcessor and CLIPVisionModelWithProjection in parallel.
39
+ * Loads the AutoProcessor and SiglipVisionModel in parallel.
42
40
  * Model weights are cached in ~/.ez-search/models/.
43
- *
44
- * IMPORTANT: dtype must remain 'fp32'. Quantized variants fail in Node.js with
45
- * "ConvInteger(10) is not implemented" from onnxruntime-node.
46
41
  */
47
42
  export async function createImageEmbeddingPipeline() {
48
43
  // Set cache dir BEFORE first model load — this is critical
49
44
  env.cacheDir = resolveModelCachePath();
50
45
  env.allowRemoteModels = true;
46
+ const cb = createDownloadProgressCallback(SIGLIP_MODEL_ID);
51
47
  // Load processor and vision model in parallel for faster startup
52
48
  const [processor, visionModel] = await Promise.all([
53
- AutoProcessor.from_pretrained(CLIP_MODEL_ID),
54
- CLIPVisionModelWithProjection.from_pretrained(CLIP_MODEL_ID, {
55
- // fp32 is REQUIRED — do not use 'int8', 'uint8', or other quantized dtypes.
56
- // onnxruntime-node does not implement ConvInteger(10), which quantized CLIP uses.
49
+ AutoProcessor.from_pretrained(SIGLIP_MODEL_ID, { progress_callback: cb }),
50
+ SiglipVisionModel.from_pretrained(SIGLIP_MODEL_ID, {
57
51
  dtype: 'fp32',
52
+ progress_callback: cb,
58
53
  }),
59
54
  ]);
60
- console.error(`[image-embedder] Loaded CLIP vision model (fp32)`);
55
+ console.error(`[image-embedder] Loaded SigLIP vision model (fp32)`);
61
56
  return {
62
- modelId: CLIP_MODEL_ID,
63
- dim: CLIP_DIM,
57
+ modelId: SIGLIP_MODEL_ID,
58
+ dim: SIGLIP_DIM,
64
59
  async embedImage(buf) {
65
60
  // Use fromBlob instead of file:// URLs to avoid encoding issues with
66
61
  // special Unicode characters in filenames (e.g. macOS narrow no-break spaces).
67
62
  const image = await RawImage.fromBlob(new Blob([new Uint8Array(buf)]));
68
- // Preprocess: resize, normalize, convert to tensor expected by CLIP
63
+ // Preprocess: resize, normalize, convert to tensor expected by SigLIP
69
64
  const inputs = await processor(image);
70
- // Run the vision encoder — output.image_embeds is a [1, 512] Tensor
65
+ // Run the vision encoder — output.pooler_output is a [1, 768] Tensor
71
66
  const output = await visionModel(inputs);
72
67
  // Extract and L2-normalize (projection models don't normalize)
73
- return l2Normalize(new Float32Array(output.image_embeds.data.slice(0, CLIP_DIM)));
68
+ return l2Normalize(new Float32Array(output.pooler_output.data.slice(0, SIGLIP_DIM)));
74
69
  },
75
70
  async dispose() {
76
71
  if (typeof visionModel.dispose === 'function') {
@@ -80,30 +75,31 @@ export async function createImageEmbeddingPipeline() {
80
75
  };
81
76
  }
82
77
  /**
83
- * Create a ClipTextPipeline backed by CLIP ViT-B/16 (fp32).
78
+ * Create a SiglipTextPipeline backed by SigLIP ViT-B/16 (fp32).
84
79
  *
85
- * Loads AutoTokenizer and CLIPTextModelWithProjection in parallel.
86
- * Used for text-to-image search: encode query text into CLIP's 512-dim space,
80
+ * Loads AutoTokenizer and SiglipTextModel in parallel.
81
+ * Used for text-to-image search: encode query text into SigLIP's 768-dim space,
87
82
  * then find nearest image embeddings.
88
83
  */
89
- export async function createClipTextPipeline() {
84
+ export async function createSiglipTextPipeline() {
90
85
  env.cacheDir = resolveModelCachePath();
91
86
  env.allowRemoteModels = true;
87
+ const cb = createDownloadProgressCallback(SIGLIP_MODEL_ID);
92
88
  const [tokenizer, textModel] = await Promise.all([
93
- AutoTokenizer.from_pretrained(CLIP_MODEL_ID),
94
- CLIPTextModelWithProjection.from_pretrained(CLIP_MODEL_ID, { dtype: 'fp32' }),
89
+ AutoTokenizer.from_pretrained(SIGLIP_MODEL_ID, { progress_callback: cb }),
90
+ SiglipTextModel.from_pretrained(SIGLIP_MODEL_ID, { dtype: 'fp32', progress_callback: cb }),
95
91
  ]);
96
- console.error(`[image-embedder] Loaded CLIP text model (fp32)`);
92
+ console.error(`[image-embedder] Loaded SigLIP text model (fp32)`);
97
93
  return {
98
- modelId: CLIP_MODEL_ID,
99
- dim: CLIP_DIM,
94
+ modelId: SIGLIP_MODEL_ID,
95
+ dim: SIGLIP_DIM,
100
96
  async embedText(texts) {
101
97
  const inputs = tokenizer(texts, { padding: true, truncation: true });
102
98
  const output = await textModel(inputs);
103
- const data = output.text_embeds.data;
99
+ const data = output.pooler_output.data;
104
100
  const embeddings = [];
105
101
  for (let i = 0; i < texts.length; i++) {
106
- embeddings.push(l2Normalize(new Float32Array(data.slice(i * CLIP_DIM, (i + 1) * CLIP_DIM))));
102
+ embeddings.push(l2Normalize(new Float32Array(data.slice(i * SIGLIP_DIM, (i + 1) * SIGLIP_DIM))));
107
103
  }
108
104
  return embeddings;
109
105
  },
@@ -13,7 +13,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, mkdirSync } from '
13
13
  import * as path from 'path';
14
14
  import { resolveProjectStoragePath } from '../config/paths.js';
15
15
  // ── Constants ─────────────────────────────────────────────────────────────────
16
- export const MANIFEST_VERSION = 4;
16
+ export const MANIFEST_VERSION = 5;
17
17
  export const MANIFEST_FILENAME = 'manifest.json';
18
18
  // ── Helpers ───────────────────────────────────────────────────────────────────
19
19
  function manifestPath(projectDir) {
@@ -6,32 +6,25 @@
6
6
  *
7
7
  * Model cache is stored in ~/.ez-search/models/ (not the default HuggingFace cache).
8
8
  *
9
- * NOTE: The nomic text model requires task prefixes on inputs callers are responsible:
10
- * - Documents: prefix with "search_document: "
11
- * - Queries: prefix with "search_query: "
12
- * The pipeline itself does NOT add prefixes automatically.
9
+ * Both code and text use Qwen3-Embedding-0.6B. Output is truncated from 1024 to 768 dims
10
+ * via Matryoshka Representation Learning, then L2-normalized.
11
+ *
12
+ * Query prefixing (Instruct/Query format) is the caller's responsibility.
13
13
  */
14
14
  import { pipeline, env } from '@huggingface/transformers';
15
15
  import { resolveModelCachePath } from '../config/paths.js';
16
+ import { createDownloadProgressCallback } from './download-progress.js';
16
17
  // ── Model registry ────────────────────────────────────────────────────────────
17
18
  const MODEL_REGISTRY = {
18
19
  code: {
19
- id: 'jinaai/jina-embeddings-v2-base-code',
20
+ id: 'onnx-community/Qwen3-Embedding-0.6B-ONNX',
21
+ nativeDim: 1024,
20
22
  dim: 768,
21
23
  },
22
24
  text: {
23
- id: 'nomic-ai/nomic-embed-text-v1.5',
25
+ id: 'onnx-community/Qwen3-Embedding-0.6B-ONNX',
26
+ nativeDim: 1024,
24
27
  dim: 768,
25
- /**
26
- * Nomic requires task prefixes on all inputs:
27
- * document: "search_document: <text>"
28
- * query: "search_query: <text>"
29
- * The embed() method does NOT add these — callers must prefix their strings.
30
- */
31
- taskPrefix: {
32
- document: 'search_document: ',
33
- query: 'search_query: ',
34
- },
35
28
  },
36
29
  };
37
30
  // ── Helpers ───────────────────────────────────────────────────────────────────
@@ -49,6 +42,16 @@ function extractEmbedding(output) {
49
42
  }
50
43
  throw new Error(`Unexpected embedding output shape: ${JSON.stringify(output)}`);
51
44
  }
45
+ function l2Normalize(vec) {
46
+ let norm = 0;
47
+ for (let i = 0; i < vec.length; i++)
48
+ norm += vec[i] * vec[i];
49
+ norm = Math.sqrt(norm);
50
+ if (norm > 0)
51
+ for (let i = 0; i < vec.length; i++)
52
+ vec[i] /= norm;
53
+ return vec;
54
+ }
52
55
  // ── Public API ────────────────────────────────────────────────────────────────
53
56
  /**
54
57
  * Create an EmbeddingPipeline for the given model type.
@@ -58,12 +61,11 @@ function extractEmbedding(output) {
58
61
  *
59
62
  * Model weights are cached in ~/.ez-search/models/ (set before first pipeline() call).
60
63
  *
61
- * @param modelType - 'code' for jinaai/jina-embeddings-v2-base-code (768-dim)
62
- * 'text' for nomic-ai/nomic-embed-text-v1.5 (768-dim, prefixes required)
64
+ * @param modelType - 'code' or 'text', both backed by Qwen3-Embedding-0.6B (768-dim after truncation)
63
65
  */
64
66
  export async function createEmbeddingPipeline(modelType, options = {}) {
65
67
  const model = MODEL_REGISTRY[modelType];
66
- const progressCallback = options.progressCallback;
68
+ const cb = options.progressCallback ?? createDownloadProgressCallback(model.id);
67
69
  // Set cache dir BEFORE first pipeline() call — this is critical
68
70
  env.cacheDir = resolveModelCachePath();
69
71
  env.allowRemoteModels = true;
@@ -74,7 +76,7 @@ export async function createEmbeddingPipeline(modelType, options = {}) {
74
76
  pipe = await pipeline('feature-extraction', model.id, {
75
77
  device: 'webgpu',
76
78
  dtype: 'fp32',
77
- ...(progressCallback ? { progress_callback: progressCallback } : {}),
79
+ progress_callback: cb,
78
80
  });
79
81
  backend = 'webgpu';
80
82
  console.error(`[model-router] Using WebGPU for ${model.id}`);
@@ -86,7 +88,7 @@ export async function createEmbeddingPipeline(modelType, options = {}) {
86
88
  pipe = await pipeline('feature-extraction', model.id, {
87
89
  device: 'cpu',
88
90
  dtype: 'q8',
89
- ...(progressCallback ? { progress_callback: progressCallback } : {}),
91
+ progress_callback: cb,
90
92
  });
91
93
  backend = 'cpu';
92
94
  console.error(`[model-router] Using CPU for ${model.id}`);
@@ -97,7 +99,11 @@ export async function createEmbeddingPipeline(modelType, options = {}) {
97
99
  dim: model.dim,
98
100
  async embed(texts) {
99
101
  const outputs = await Promise.all(texts.map((text) => pipe(text, { pooling: 'mean', normalize: true })));
100
- return outputs.map(extractEmbedding);
102
+ return outputs.map((output) => {
103
+ const raw = extractEmbedding(output);
104
+ const truncated = new Float32Array(raw.buffer, raw.byteOffset, model.dim);
105
+ return l2Normalize(new Float32Array(truncated));
106
+ });
101
107
  },
102
108
  async dispose() {
103
109
  if (pipe && typeof pipe.dispose === 'function') {
@@ -2,9 +2,8 @@
2
2
  * Vector DB service — wraps @zvec/zvec behind a clean interface.
3
3
  *
4
4
  * Uses createRequire because @zvec/zvec is a CommonJS package in an ESM project.
5
- * Two collections per project:
6
- * col-768 — for code/text embeddings (jina, nomic, 768-dim)
7
- * col-512 — for image embeddings (CLIP, 512-dim)
5
+ * Single collection per project:
6
+ * col-768 — for all embeddings (code, text, image — all 768-dim)
8
7
  *
9
8
  * Storage lives at <project>/.ez-search/ (project-scoped).
10
9
  */
@@ -17,7 +16,7 @@ const { ZVecCreateAndOpen, ZVecOpen, ZVecCollectionSchema, ZVecDataType, ZVecInd
17
16
  // Initialize Zvec at module level — suppress noisy logs
18
17
  ZVecInitialize({ logLevel: ZVecLogLevel.WARN });
19
18
  // ── Schema versioning ─────────────────────────────────────────────────────────
20
- const SCHEMA_VERSION = 2;
19
+ const SCHEMA_VERSION = 3;
21
20
  // ── Helpers ───────────────────────────────────────────────────────────────────
22
21
  /**
23
22
  * Validate that an ID doesn't contain colons (Zvec rejects them).
@@ -144,31 +143,16 @@ function createCollection(storageDir, name, dim) {
144
143
  },
145
144
  };
146
145
  }
147
- /**
148
- * Open both vector collections for a project.
149
- *
150
- * Storage layout:
151
- * <projectDir>/.ez-search/col-768/ (768-dim, code/text)
152
- * <projectDir>/.ez-search/col-512/ (512-dim, images)
153
- *
154
- * Creates the storage directory if it does not exist.
155
- */
156
146
  export function openProjectCollections(projectDir) {
157
147
  const storageDir = resolveProjectStoragePath(projectDir);
158
148
  mkdirSync(storageDir, { recursive: true });
159
149
  ensureSchemaVersion(storageDir);
160
150
  const col768 = createCollection(storageDir, 'col-768', 768);
161
- const col512 = createCollection(storageDir, 'col-512', 512);
162
- return { col768, col512, storagePath: storageDir };
151
+ return { col768, storagePath: storageDir };
163
152
  }
164
- /**
165
- * Open a single vector collection by name.
166
- * Use this when you only need one collection (e.g. query only needs col-768).
167
- */
168
153
  export function openCollection(projectDir, name) {
169
154
  const storageDir = resolveProjectStoragePath(projectDir);
170
155
  mkdirSync(storageDir, { recursive: true });
171
156
  ensureSchemaVersion(storageDir);
172
- const dim = name === 'col-768' ? 768 : 512;
173
- return createCollection(storageDir, name, dim);
157
+ return createCollection(storageDir, name, 768);
174
158
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ez-corp/ez-search",
3
- "version": "1.1.0",
3
+ "version": "1.1.2",
4
4
  "description": "Semantic codebase search with zero cloud dependencies",
5
5
  "type": "module",
6
6
  "main": "dist/cli/index.js",