npm - @ez-corp/ez-search - Versions diffs - 1.1.0 → 1.1.2 - Mend

@ez-corp/ez-search 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/cli/commands/index-cmd.js +14 -24
package/dist/cli/commands/query-cmd.js +31 -23
package/dist/services/download-progress.js +68 -0
package/dist/services/image-embedder.js +33 -37
package/dist/services/manifest-cache.js +1 -1
package/dist/services/model-router.js +28 -22
package/dist/services/vector-db.js +5 -21
package/package.json +1 -1

package/dist/cli/commands/index-cmd.js CHANGED Viewed

@@ -2,23 +2,23 @@
  * Index command — end-to-end pipeline: scan -> manifest check -> chunk -> embed -> store.
  *
  * Pipeline flow (per type):
- *   1. Resolve path and open vector collections
+ *   1. Resolve path and open vector collection
  *   2. Handle --clear (wipe storage + manifest)
  *   3. Load manifest (incremental cache)
  *   4. For each type in [code, text, image]:
  *      a. Scan files of that type
  *      b. Detect changed/new/deleted files against manifest
- *      c. Remove deleted files' chunks from the appropriate collection
+ *      c. Remove deleted files' chunks from col-768
  *      d. Chunk changed/new files
  *      e. Batch embed with the correct model
- *      f. Insert embeddings into the appropriate collection
- *   5. Optimize collections THEN save manifest (order matters)
+ *      f. Insert embeddings into col-768
+ *   5. Optimize collection THEN save manifest (order matters)
  *   6. Dispose pipelines and output results
  *
  * Model routing:
- *   code  -> jinaai/jina-embeddings-v2-base-code, col-768
- *   text  -> nomic-ai/nomic-embed-text-v1.5, col-768  (prefix: "search_document: ")
- *   image -> Xenova/clip-vit-base-patch16, col-512     (one vector per file)
+ *   code  -> onnx-community/Qwen3-Embedding-0.6B-ONNX, col-768
+ *   text  -> onnx-community/Qwen3-Embedding-0.6B-ONNX, col-768
+ *   image -> Xenova/siglip-base-patch16-224, col-768
  */
 import * as path from 'path';
 import * as fsp from 'fs/promises';
@@ -195,14 +195,12 @@ async function runTextEmbeddingPipeline(opts) {
         progress.update(`${type}: loading model...`);
         const { createEmbeddingPipeline } = await import('../../services/model-router.js');
         pipe = await createEmbeddingPipeline(type);
-        // Nomic requires "search_document: " prefix on indexed documents
-        const prefix = type === 'text' ? 'search_document: ' : '';
         const totalBatches = Math.ceil(allPendingChunks.length / BATCH_SIZE);
         for (let batchStart = 0; batchStart < allPendingChunks.length; batchStart += BATCH_SIZE) {
             const batchNum = Math.floor(batchStart / BATCH_SIZE) + 1;
             progress.update(`${type}: embedding`, batchNum, totalBatches);
             const batch = allPendingChunks.slice(batchStart, batchStart + BATCH_SIZE);
-            const texts = batch.map((c) => prefix + c.text);
+            const texts = batch.map((c) => c.text);
             const embeddings = await pipe.embed(texts);
             for (let i = 0; i < batch.length; i++) {
                 const chunk = batch[i];
@@ -212,7 +210,7 @@ async function runTextEmbeddingPipeline(opts) {
                     modelId: pipe.modelId,
                     lineStart: chunk.lineStart,
                     lineEnd: chunk.lineEnd,
-                    chunkText: chunk.text, // store without prefix
+                    chunkText: chunk.text,
                 });
                 chunksCreated++;
             }
@@ -235,23 +233,21 @@ export async function runIndex(targetPath, options) {
     const { ProgressReporter } = await import('../progress.js');
     const progress = new ProgressReporter({
         quiet: options.quiet,
-        json: options.format !== 'text',
+        json: options.format === 'json',
     });
     try {
         // 1. Resolve path
         const absPath = path.resolve(targetPath);
         // 2. Open vector collections
         const { openProjectCollections } = await import('../../services/vector-db.js');
-        let { col768, col512, storagePath } = openProjectCollections(absPath);
+        let { col768, storagePath } = openProjectCollections(absPath);
         // 3. Handle --clear
         // rmSync removes .ez-search/ entirely (including manifest.json inside it)
         if (options.clear) {
             col768.close();
-            col512.close();
             rmSync(storagePath, { recursive: true, force: true });
             const reopened = openProjectCollections(absPath);
             col768 = reopened.col768;
-            col512 = reopened.col512;
             storagePath = reopened.storagePath;
         }
         // 4. Load manifest and helpers
@@ -272,7 +268,6 @@ export async function runIndex(targetPath, options) {
         const allDeletedPaths = [];
         // Per-type file counts for text output
         const typeFileCounts = {};
-        let imageFilesProcessed = false;
         for (const fileType of typesToIndex) {
             // Scan files of this type
             const scannedFiles = [];
@@ -329,7 +324,7 @@ export async function runIndex(targetPath, options) {
                 }
             }
             else if (fileType === 'image') {
-                // Image pipeline: one vector per file, goes into col-512
+                // Image pipeline: one vector per file, goes into col-768
                 const { EXTENSION_MAP } = await import('../../types.js');
                 const deletedPaths = Object.keys(manifest.files).filter((relPath) => {
                     if (scannedSet.has(relPath))
@@ -340,7 +335,7 @@ export async function runIndex(targetPath, options) {
                 for (const deletedPath of deletedPaths) {
                     const entry = manifest.files[deletedPath];
                     for (const chunk of entry.chunks) {
-                        col512.remove(chunk.id);
+                        col768.remove(chunk.id);
                         totalChunksRemoved++;
                     }
                     delete manifest.files[deletedPath];
@@ -379,7 +374,7 @@ export async function runIndex(targetPath, options) {
                         const fileHash = hashContent(buf);
                         const embedding = await imagePipeline.embedImage(buf);
                         const chunkId = makeChunkId(file.relativePath, 0);
-                        col512.insert(chunkId, embedding, {
+                        col768.insert(chunkId, embedding, {
                             filePath: file.relativePath,
                             chunkIndex: 0,
                             modelId: imagePipeline.modelId,
@@ -397,7 +392,6 @@ export async function runIndex(targetPath, options) {
                         totalFilesIndexed++;
                     }
                     await imagePipeline.dispose();
-                    imageFilesProcessed = true;
                     typeFileCounts['image'] = (typeFileCounts['image'] ?? 0) + filesToProcess.length;
                 }
             }
@@ -412,10 +406,6 @@ export async function runIndex(targetPath, options) {
         progress.update('optimizing index...');
         col768.optimize();
         col768.close();
-        if (imageFilesProcessed) {
-            col512.optimize();
-        }
-        col512.close();
         saveManifest(absPath, manifest);
         progress.done();
         // 7. Output results

package/dist/cli/commands/query-cmd.js CHANGED Viewed

@@ -1,21 +1,20 @@
 /**
- * Query command — multi-collection grouped semantic search.
+ * Query command — single-collection grouped semantic search.
  *
  * Pipeline:
  *   1. Resolve project directory (cwd)
- *   2. Open vector collections (col-768 for code/text, col-512 for images)
+ *   2. Open vector collection (col-768 for all types)
  *   3. Load manifest for totalIndexed count
  *   4. For each requested type:
- *      a. code:  embed with Jina, over-fetch topK*5 from col-768, filter by jina modelId
- *      b. text:  embed with Nomic ("search_query: " prefix), over-fetch topK*5 from col-768, filter by nomic modelId
- *      c. image: embed with CLIP text encoder, over-fetch topK*5 from col-512, filter by clip modelId
+ *      a. code:  embed with Qwen3 (instruct prefix), query col-768, filter by Qwen3 modelId
+ *      b. text:  embed with Qwen3 (instruct prefix), query col-768, filter by Qwen3 modelId
+ *      c. image: embed with SigLIP text encoder, query col-768, filter by siglip modelId
  *   5. Apply --threshold and --dir filters per type
  *   6. Collapse adjacent chunks per type
  *   7. Sort by score desc, slice to topK per type
  *   8. Output grouped JSON { code: [...], text: [...], image: [...] } or text with ## headers
  *
- * col-768 holds BOTH code and text vectors; they are distinguished by modelId metadata.
- * Over-fetch topK*5 ensures enough candidates after modelId filtering.
+ * col-768 holds ALL vectors (code, text, image); they are distinguished by modelId metadata.
  */
 export async function runQuery(text, options) {
     const topK = parseInt(options.topK, 10);
@@ -59,7 +58,6 @@ export async function runQuery(text, options) {
         }
         else {
             // Pre-detect indexed types from manifest: only load models for types that have data.
-            // This avoids loading Jina when only text is indexed (or Nomic when only code is indexed).
             const { EXTENSION_MAP } = await import('../../types.js');
             const indexedTypes = new Set();
             for (const filePath of Object.keys(manifest.files)) {
@@ -84,7 +82,6 @@ export async function runQuery(text, options) {
         // 4. Open vector collections as needed
         const { openCollection } = await import('../../services/vector-db.js');
         const col768 = openCollection(projectDir, 'col-768');
-        const col512 = typesToQuery.includes('image') ? openCollection(projectDir, 'col-512') : null;
         try {
             // ── Helpers ──────────────────────────────────────────────────────────────
             const { normalizeResults, filterAndCollapse, filterImageResults } = await import('../../services/query-utils.js');
@@ -97,11 +94,16 @@ export async function runQuery(text, options) {
             let textResults = [];
             let imageResults = [];
             if (typesToQuery.includes('code')) {
-                // Code: Jina embedding, filter for jina modelId
+                // Code: Qwen3 embedding, filter for Qwen3 modelId
                 let pipe = null;
                 try {
+                    if (process.stderr.isTTY)
+                        process.stderr.write('\r\x1b[Kcode: loading model...');
                     pipe = await createEmbeddingPipeline('code');
-                    const [queryEmbedding] = await pipe.embed([text]);
+                    if (process.stderr.isTTY)
+                        process.stderr.write('\r\x1b[K');
+                    const prefixedQuery = `Instruct: Given a search query, retrieve relevant code snippets\nQuery: ${text}`;
+                    const [queryEmbedding] = await pipe.embed([prefixedQuery]);
                     let rawResults;
                     try {
                         rawResults = col768.query(queryEmbedding, fetchCount);
@@ -110,7 +112,7 @@ export async function runQuery(text, options) {
                         rawResults = [];
                     }
                     const normalized = normalizeResults(rawResults);
-                    codeResults = filterAndCollapse(normalized, (id) => id.includes('jina') || id.startsWith('jinaai/'), { threshold, dir: options.dir, topK });
+                    codeResults = filterAndCollapse(normalized, (id) => id.includes('Qwen3-Embedding'), { threshold, dir: options.dir, topK });
                 }
                 catch (err) {
                     process.stderr.write(`[query] code pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
@@ -121,11 +123,15 @@ export async function runQuery(text, options) {
                 }
             }
             if (typesToQuery.includes('text')) {
-                // Text: Nomic embedding with "search_query: " prefix, filter for nomic modelId
+                // Text: Qwen3 embedding with instruct prefix, filter for Qwen3 modelId
                 let pipe = null;
                 try {
+                    if (process.stderr.isTTY)
+                        process.stderr.write('\r\x1b[Ktext: loading model...');
                     pipe = await createEmbeddingPipeline('text');
-                    const prefixedQuery = `search_query: ${text}`;
+                    if (process.stderr.isTTY)
+                        process.stderr.write('\r\x1b[K');
+                    const prefixedQuery = `Instruct: Given a search query, retrieve relevant text passages\nQuery: ${text}`;
                     const [queryEmbedding] = await pipe.embed([prefixedQuery]);
                     let rawResults;
                     try {
@@ -135,7 +141,7 @@ export async function runQuery(text, options) {
                         rawResults = [];
                     }
                     const normalized = normalizeResults(rawResults);
-                    textResults = filterAndCollapse(normalized, (id) => id.includes('nomic'), { threshold, dir: options.dir, topK });
+                    textResults = filterAndCollapse(normalized, (id) => id.includes('Qwen3-Embedding'), { threshold, dir: options.dir, topK });
                 }
                 catch (err) {
                     process.stderr.write(`[query] text pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
@@ -145,22 +151,26 @@ export async function runQuery(text, options) {
                         await pipe.dispose();
                 }
             }
-            if (typesToQuery.includes('image') && col512) {
-                // Image: CLIP text embedding, query col-512, filter for clip modelId
+            if (typesToQuery.includes('image')) {
+                // Image: SigLIP text embedding, query col-768, filter for siglip modelId
                 let pipe = null;
                 try {
-                    const { createClipTextPipeline } = await import('../../services/image-embedder.js');
-                    pipe = await createClipTextPipeline();
+                    if (process.stderr.isTTY)
+                        process.stderr.write('\r\x1b[Kimage: loading model...');
+                    const { createSiglipTextPipeline } = await import('../../services/image-embedder.js');
+                    pipe = await createSiglipTextPipeline();
+                    if (process.stderr.isTTY)
+                        process.stderr.write('\r\x1b[K');
                     const [queryEmbedding] = await pipe.embedText([text]);
                     let rawResults;
                     try {
-                        rawResults = col512.query(queryEmbedding, fetchCount);
+                        rawResults = col768.query(queryEmbedding, fetchCount);
                     }
                     catch {
                         rawResults = [];
                     }
                     const normalized = normalizeResults(rawResults);
-                    imageResults = filterImageResults(normalized, (id) => id.includes('clip'), { threshold, dir: options.dir, topK });
+                    imageResults = filterImageResults(normalized, (id) => id.includes('siglip'), { threshold, dir: options.dir, topK });
                 }
                 catch (err) {
                     process.stderr.write(`[query] image pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
@@ -261,8 +271,6 @@ export async function runQuery(text, options) {
         }
         finally {
             col768.close();
-            if (col512)
-                col512.close();
         }
     }
     catch (err) {

package/dist/services/download-progress.js ADDED Viewed

@@ -0,0 +1,68 @@
+/**
+ * Default progress callback for Transformers.js model downloads.
+ *
+ * Transformers.js fires download/progress events even for cached models, so we
+ * check the cache directory to decide the label:
+ *   - Cache miss → "Downloading <model> — <file> XX%"
+ *   - Cache hit  → "Loading <model>..."
+ *
+ * Output goes to stderr and only when running in a TTY.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import { resolveModelCachePath } from '../config/paths.js';
+function isModelCached(modelId) {
+    // Transformers.js stores models under <cacheDir>/<org>/<repo>/
+    // e.g. ~/.ez-search/models/onnx-community/Qwen3-Embedding-0.6B-ONNX/
+    const modelDir = path.join(resolveModelCachePath(), ...modelId.split('/'));
+    try {
+        return fs.statSync(modelDir).isDirectory();
+    }
+    catch {
+        return false;
+    }
+}
+export function createDownloadProgressCallback(modelId) {
+    const isTTY = !!process.stderr.isTTY;
+    if (!isTTY)
+        return () => { };
+    const cached = isModelCached(modelId);
+    // For cached models, show a single "Loading..." and clear on ready
+    if (cached) {
+        let shown = false;
+        return (event) => {
+            if (!event || typeof event !== 'object')
+                return;
+            const e = event;
+            if (!shown && e.status === 'initiate') {
+                shown = true;
+                process.stderr.write(`\r\x1b[KLoading ${modelId}...`);
+            }
+            if (e.status === 'ready') {
+                process.stderr.write('\r\x1b[K');
+            }
+        };
+    }
+    // For uncached models, show per-file download progress
+    const downloading = new Set();
+    return (event) => {
+        if (!event || typeof event !== 'object')
+            return;
+        const e = event;
+        if (e.status === 'download' && e.file) {
+            if (!downloading.has(e.file)) {
+                downloading.add(e.file);
+                process.stderr.write(`\r\x1b[KDownloading ${modelId} — ${e.file}...`);
+            }
+            else if (typeof e.progress === 'number') {
+                process.stderr.write(`\r\x1b[KDownloading ${modelId} — ${e.file} ${Math.round(e.progress)}%`);
+            }
+        }
+        else if (e.status === 'done' && e.file) {
+            downloading.delete(e.file);
+        }
+        if (e.status === 'ready') {
+            process.stderr.write('\r\x1b[K');
+        }
+    };
+}

package/dist/services/image-embedder.js CHANGED Viewed

@@ -1,27 +1,25 @@
 /**
- * CLIP image embedding service — converts image files to 512-dim Float32Array embeddings.
+ * SigLIP image embedding service — converts image files to 768-dim Float32Array embeddings.
  *
- * Uses CLIPVisionModelWithProjection (not the full CLIP model) with fp32 dtype.
- * Quantized variants (int8, uint8) fail in onnxruntime-node with:
- *   "ConvInteger(10) is not implemented"
- * Therefore, dtype: 'fp32' is REQUIRED and must not be changed.
+ * Uses SiglipVisionModel (not the full SigLIP model) with fp32 dtype.
  *
  * Supported formats: .jpg, .jpeg, .png, .webp (anything RawImage can decode).
  *
- * One image produces one 512-dim vector — no chunking is performed.
+ * One image produces one 768-dim vector — no chunking is performed.
  * Model weights are cached in ~/.ez-search/models/ alongside text/code models.
  */
-import { CLIPVisionModelWithProjection, CLIPTextModelWithProjection, AutoProcessor, AutoTokenizer, RawImage, env } from '@huggingface/transformers';
+import { SiglipVisionModel, SiglipTextModel, AutoProcessor, AutoTokenizer, RawImage, env } from '@huggingface/transformers';
 import { resolveModelCachePath } from '../config/paths.js';
+import { createDownloadProgressCallback } from './download-progress.js';
 // ── Constants ─────────────────────────────────────────────────────────────────
-const CLIP_MODEL_ID = 'Xenova/clip-vit-base-patch16';
-const CLIP_DIM = 512;
+const SIGLIP_MODEL_ID = 'Xenova/siglip-base-patch16-224';
+const SIGLIP_DIM = 768;
 // ── Helpers ──────────────────────────────────────────────────────────────────
 /**
  * L2-normalize a vector in-place.
  *
- * CLIPVisionModelWithProjection and CLIPTextModelWithProjection do NOT
- * normalize their output — only the full CLIPModel does. Without this,
+ * SiglipVisionModel and SiglipTextModel do NOT
+ * normalize their output — only the full SigLIP model does. Without this,
  * cosine distances in Zvec are meaningless (all scores collapse to ~0.21).
  */
 function l2Normalize(vec) {
@@ -36,41 +34,38 @@ function l2Normalize(vec) {
 }
 // ── Public API ────────────────────────────────────────────────────────────────
 /**
- * Create an ImageEmbeddingPipeline backed by CLIP ViT-B/16 (fp32).
+ * Create an ImageEmbeddingPipeline backed by SigLIP ViT-B/16 (fp32).
  *
- * Loads the AutoProcessor and CLIPVisionModelWithProjection in parallel.
+ * Loads the AutoProcessor and SiglipVisionModel in parallel.
  * Model weights are cached in ~/.ez-search/models/.
- *
- * IMPORTANT: dtype must remain 'fp32'. Quantized variants fail in Node.js with
- * "ConvInteger(10) is not implemented" from onnxruntime-node.
  */
 export async function createImageEmbeddingPipeline() {
     // Set cache dir BEFORE first model load — this is critical
     env.cacheDir = resolveModelCachePath();
     env.allowRemoteModels = true;
+    const cb = createDownloadProgressCallback(SIGLIP_MODEL_ID);
     // Load processor and vision model in parallel for faster startup
     const [processor, visionModel] = await Promise.all([
-        AutoProcessor.from_pretrained(CLIP_MODEL_ID),
-        CLIPVisionModelWithProjection.from_pretrained(CLIP_MODEL_ID, {
-            // fp32 is REQUIRED — do not use 'int8', 'uint8', or other quantized dtypes.
-            // onnxruntime-node does not implement ConvInteger(10), which quantized CLIP uses.
+        AutoProcessor.from_pretrained(SIGLIP_MODEL_ID, { progress_callback: cb }),
+        SiglipVisionModel.from_pretrained(SIGLIP_MODEL_ID, {
             dtype: 'fp32',
+            progress_callback: cb,
         }),
     ]);
-    console.error(`[image-embedder] Loaded CLIP vision model (fp32)`);
+    console.error(`[image-embedder] Loaded SigLIP vision model (fp32)`);
     return {
-        modelId: CLIP_MODEL_ID,
-        dim: CLIP_DIM,
+        modelId: SIGLIP_MODEL_ID,
+        dim: SIGLIP_DIM,
         async embedImage(buf) {
             // Use fromBlob instead of file:// URLs to avoid encoding issues with
             // special Unicode characters in filenames (e.g. macOS narrow no-break spaces).
             const image = await RawImage.fromBlob(new Blob([new Uint8Array(buf)]));
-            // Preprocess: resize, normalize, convert to tensor expected by CLIP
+            // Preprocess: resize, normalize, convert to tensor expected by SigLIP
             const inputs = await processor(image);
-            // Run the vision encoder — output.image_embeds is a [1, 512] Tensor
+            // Run the vision encoder — output.pooler_output is a [1, 768] Tensor
             const output = await visionModel(inputs);
             // Extract and L2-normalize (projection models don't normalize)
-            return l2Normalize(new Float32Array(output.image_embeds.data.slice(0, CLIP_DIM)));
+            return l2Normalize(new Float32Array(output.pooler_output.data.slice(0, SIGLIP_DIM)));
         },
         async dispose() {
             if (typeof visionModel.dispose === 'function') {
@@ -80,30 +75,31 @@ export async function createImageEmbeddingPipeline() {
     };
 }
 /**
- * Create a ClipTextPipeline backed by CLIP ViT-B/16 (fp32).
+ * Create a SiglipTextPipeline backed by SigLIP ViT-B/16 (fp32).
  *
- * Loads AutoTokenizer and CLIPTextModelWithProjection in parallel.
- * Used for text-to-image search: encode query text into CLIP's 512-dim space,
+ * Loads AutoTokenizer and SiglipTextModel in parallel.
+ * Used for text-to-image search: encode query text into SigLIP's 768-dim space,
  * then find nearest image embeddings.
  */
-export async function createClipTextPipeline() {
+export async function createSiglipTextPipeline() {
     env.cacheDir = resolveModelCachePath();
     env.allowRemoteModels = true;
+    const cb = createDownloadProgressCallback(SIGLIP_MODEL_ID);
     const [tokenizer, textModel] = await Promise.all([
-        AutoTokenizer.from_pretrained(CLIP_MODEL_ID),
-        CLIPTextModelWithProjection.from_pretrained(CLIP_MODEL_ID, { dtype: 'fp32' }),
+        AutoTokenizer.from_pretrained(SIGLIP_MODEL_ID, { progress_callback: cb }),
+        SiglipTextModel.from_pretrained(SIGLIP_MODEL_ID, { dtype: 'fp32', progress_callback: cb }),
     ]);
-    console.error(`[image-embedder] Loaded CLIP text model (fp32)`);
+    console.error(`[image-embedder] Loaded SigLIP text model (fp32)`);
     return {
-        modelId: CLIP_MODEL_ID,
-        dim: CLIP_DIM,
+        modelId: SIGLIP_MODEL_ID,
+        dim: SIGLIP_DIM,
         async embedText(texts) {
             const inputs = tokenizer(texts, { padding: true, truncation: true });
             const output = await textModel(inputs);
-            const data = output.text_embeds.data;
+            const data = output.pooler_output.data;
             const embeddings = [];
             for (let i = 0; i < texts.length; i++) {
-                embeddings.push(l2Normalize(new Float32Array(data.slice(i * CLIP_DIM, (i + 1) * CLIP_DIM))));
+                embeddings.push(l2Normalize(new Float32Array(data.slice(i * SIGLIP_DIM, (i + 1) * SIGLIP_DIM))));
             }
             return embeddings;
         },

package/dist/services/manifest-cache.js CHANGED Viewed

@@ -13,7 +13,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, mkdirSync } from '
 import * as path from 'path';
 import { resolveProjectStoragePath } from '../config/paths.js';
 // ── Constants ─────────────────────────────────────────────────────────────────
-export const MANIFEST_VERSION = 4;
+export const MANIFEST_VERSION = 5;
 export const MANIFEST_FILENAME = 'manifest.json';
 // ── Helpers ───────────────────────────────────────────────────────────────────
 function manifestPath(projectDir) {

package/dist/services/model-router.js CHANGED Viewed

@@ -6,32 +6,25 @@
  *
  * Model cache is stored in ~/.ez-search/models/ (not the default HuggingFace cache).
  *
- * NOTE: The nomic text model requires task prefixes on inputs — callers are responsible:
- *   - Documents: prefix with "search_document: "
- *   - Queries:   prefix with "search_query: "
- * The pipeline itself does NOT add prefixes automatically.
+ * Both code and text use Qwen3-Embedding-0.6B. Output is truncated from 1024 to 768 dims
+ * via Matryoshka Representation Learning, then L2-normalized.
+ *
+ * Query prefixing (Instruct/Query format) is the caller's responsibility.
  */
 import { pipeline, env } from '@huggingface/transformers';
 import { resolveModelCachePath } from '../config/paths.js';
+import { createDownloadProgressCallback } from './download-progress.js';
 // ── Model registry ────────────────────────────────────────────────────────────
 const MODEL_REGISTRY = {
     code: {
-        id: 'jinaai/jina-embeddings-v2-base-code',
+        id: 'onnx-community/Qwen3-Embedding-0.6B-ONNX',
+        nativeDim: 1024,
         dim: 768,
     },
     text: {
-        id: 'nomic-ai/nomic-embed-text-v1.5',
+        id: 'onnx-community/Qwen3-Embedding-0.6B-ONNX',
+        nativeDim: 1024,
         dim: 768,
-        /**
-         * Nomic requires task prefixes on all inputs:
-         *   document: "search_document: <text>"
-         *   query:    "search_query: <text>"
-         * The embed() method does NOT add these — callers must prefix their strings.
-         */
-        taskPrefix: {
-            document: 'search_document: ',
-            query: 'search_query: ',
-        },
     },
 };
 // ── Helpers ───────────────────────────────────────────────────────────────────
@@ -49,6 +42,16 @@ function extractEmbedding(output) {
     }
     throw new Error(`Unexpected embedding output shape: ${JSON.stringify(output)}`);
 }
+function l2Normalize(vec) {
+    let norm = 0;
+    for (let i = 0; i < vec.length; i++)
+        norm += vec[i] * vec[i];
+    norm = Math.sqrt(norm);
+    if (norm > 0)
+        for (let i = 0; i < vec.length; i++)
+            vec[i] /= norm;
+    return vec;
+}
 // ── Public API ────────────────────────────────────────────────────────────────
 /**
  * Create an EmbeddingPipeline for the given model type.
@@ -58,12 +61,11 @@ function extractEmbedding(output) {
  *
  * Model weights are cached in ~/.ez-search/models/ (set before first pipeline() call).
  *
- * @param modelType - 'code' for jinaai/jina-embeddings-v2-base-code (768-dim)
- *                    'text' for nomic-ai/nomic-embed-text-v1.5 (768-dim, prefixes required)
+ * @param modelType - 'code' or 'text', both backed by Qwen3-Embedding-0.6B (768-dim after truncation)
  */
 export async function createEmbeddingPipeline(modelType, options = {}) {
     const model = MODEL_REGISTRY[modelType];
-    const progressCallback = options.progressCallback;
+    const cb = options.progressCallback ?? createDownloadProgressCallback(model.id);
     // Set cache dir BEFORE first pipeline() call — this is critical
     env.cacheDir = resolveModelCachePath();
     env.allowRemoteModels = true;
@@ -74,7 +76,7 @@ export async function createEmbeddingPipeline(modelType, options = {}) {
         pipe = await pipeline('feature-extraction', model.id, {
             device: 'webgpu',
             dtype: 'fp32',
-            ...(progressCallback ? { progress_callback: progressCallback } : {}),
+            progress_callback: cb,
         });
         backend = 'webgpu';
         console.error(`[model-router] Using WebGPU for ${model.id}`);
@@ -86,7 +88,7 @@ export async function createEmbeddingPipeline(modelType, options = {}) {
         pipe = await pipeline('feature-extraction', model.id, {
             device: 'cpu',
             dtype: 'q8',
-            ...(progressCallback ? { progress_callback: progressCallback } : {}),
+            progress_callback: cb,
         });
         backend = 'cpu';
         console.error(`[model-router] Using CPU for ${model.id}`);
@@ -97,7 +99,11 @@ export async function createEmbeddingPipeline(modelType, options = {}) {
         dim: model.dim,
         async embed(texts) {
             const outputs = await Promise.all(texts.map((text) => pipe(text, { pooling: 'mean', normalize: true })));
-            return outputs.map(extractEmbedding);
+            return outputs.map((output) => {
+                const raw = extractEmbedding(output);
+                const truncated = new Float32Array(raw.buffer, raw.byteOffset, model.dim);
+                return l2Normalize(new Float32Array(truncated));
+            });
         },
         async dispose() {
             if (pipe && typeof pipe.dispose === 'function') {

package/dist/services/vector-db.js CHANGED Viewed

@@ -2,9 +2,8 @@
  * Vector DB service — wraps @zvec/zvec behind a clean interface.
  *
  * Uses createRequire because @zvec/zvec is a CommonJS package in an ESM project.
- * Two collections per project:
- *   col-768 — for code/text embeddings (jina, nomic, 768-dim)
- *   col-512 — for image embeddings (CLIP, 512-dim)
+ * Single collection per project:
+ *   col-768 — for all embeddings (code, text, image — all 768-dim)
  *
  * Storage lives at <project>/.ez-search/ (project-scoped).
  */
@@ -17,7 +16,7 @@ const { ZVecCreateAndOpen, ZVecOpen, ZVecCollectionSchema, ZVecDataType, ZVecInd
 // Initialize Zvec at module level — suppress noisy logs
 ZVecInitialize({ logLevel: ZVecLogLevel.WARN });
 // ── Schema versioning ─────────────────────────────────────────────────────────
-const SCHEMA_VERSION = 2;
+const SCHEMA_VERSION = 3;
 // ── Helpers ───────────────────────────────────────────────────────────────────
 /**
  * Validate that an ID doesn't contain colons (Zvec rejects them).
@@ -144,31 +143,16 @@ function createCollection(storageDir, name, dim) {
         },
     };
 }
-/**
- * Open both vector collections for a project.
- *
- * Storage layout:
- *   <projectDir>/.ez-search/col-768/  (768-dim, code/text)
- *   <projectDir>/.ez-search/col-512/  (512-dim, images)
- *
- * Creates the storage directory if it does not exist.
- */
 export function openProjectCollections(projectDir) {
     const storageDir = resolveProjectStoragePath(projectDir);
     mkdirSync(storageDir, { recursive: true });
     ensureSchemaVersion(storageDir);
     const col768 = createCollection(storageDir, 'col-768', 768);
-    const col512 = createCollection(storageDir, 'col-512', 512);
-    return { col768, col512, storagePath: storageDir };
+    return { col768, storagePath: storageDir };
 }
-/**
- * Open a single vector collection by name.
- * Use this when you only need one collection (e.g. query only needs col-768).
- */
 export function openCollection(projectDir, name) {
     const storageDir = resolveProjectStoragePath(projectDir);
     mkdirSync(storageDir, { recursive: true });
     ensureSchemaVersion(storageDir);
-    const dim = name === 'col-768' ? 768 : 512;
-    return createCollection(storageDir, name, dim);
+    return createCollection(storageDir, name, 768);
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ez-corp/ez-search",
-  "version": "1.1.0",
+  "version": "1.1.2",
   "description": "Semantic codebase search with zero cloud dependencies",
   "type": "module",
   "main": "dist/cli/index.js",