npm - @ez-corp/ez-search - Versions diffs - 1.0.8 → 1.0.10 - Mend

@ez-corp/ez-search 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/cli/commands/index-cmd.js +23 -4
package/dist/cli/progress.js +33 -0
package/dist/services/image-embedder.js +3 -3
package/dist/services/manifest-cache.js +1 -1
package/package.json +1 -1

package/dist/cli/commands/index-cmd.js CHANGED Viewed

@@ -18,7 +18,7 @@
  * Model routing:
  *   code  -> jinaai/jina-embeddings-v2-base-code, col-768
  *   text  -> nomic-ai/nomic-embed-text-v1.5, col-768  (prefix: "search_document: ")
- *   image -> Xenova/clip-vit-base-patch32, col-512     (one vector per file)
+ *   image -> Xenova/clip-vit-base-patch16, col-512     (one vector per file)
  */
 import * as path from 'path';
 import * as fsp from 'fs/promises';
@@ -30,7 +30,7 @@ const BATCH_SIZE = 32;
  * Used by both code and text pipelines (they differ only in chunker, model, prefix, tokenizer).
  */
 async function runTextEmbeddingPipeline(opts) {
-    const { type, files, col768, manifest, hashContent, hashText, makeChunkId } = opts;
+    const { type, files, col768, manifest, hashContent, hashText, makeChunkId, progress } = opts;
     let filesIndexed = 0;
     let filesSkipped = 0;
     let chunksCreated = 0;
@@ -38,7 +38,9 @@ async function runTextEmbeddingPipeline(opts) {
     let chunksRemoved = 0;
     // Determine which files need processing (mtime+size fast path, hash confirmation)
     const filesToProcess = [];
-    for (const file of files) {
+    for (let fi = 0; fi < files.length; fi++) {
+        const file = files[fi];
+        progress.update(`${type}: checking files`, fi + 1, files.length);
         const existing = manifest.files[file.relativePath];
         if (existing && existing.mtime === file.mtimeMs && existing.size === file.sizeBytes) {
             filesSkipped++;
@@ -190,11 +192,15 @@ async function runTextEmbeddingPipeline(opts) {
     }
     // Embed all pending chunks
     if (allPendingChunks.length > 0) {
+        progress.update(`${type}: loading model...`);
         const { createEmbeddingPipeline } = await import('../../services/model-router.js');
         pipe = await createEmbeddingPipeline(type);
         // Nomic requires "search_document: " prefix on indexed documents
         const prefix = type === 'text' ? 'search_document: ' : '';
+        const totalBatches = Math.ceil(allPendingChunks.length / BATCH_SIZE);
         for (let batchStart = 0; batchStart < allPendingChunks.length; batchStart += BATCH_SIZE) {
+            const batchNum = Math.floor(batchStart / BATCH_SIZE) + 1;
+            progress.update(`${type}: embedding`, batchNum, totalBatches);
             const batch = allPendingChunks.slice(batchStart, batchStart + BATCH_SIZE);
             const texts = batch.map((c) => prefix + c.text);
             const embeddings = await pipe.embed(texts);
@@ -226,6 +232,11 @@ async function runTextEmbeddingPipeline(opts) {
 }
 export async function runIndex(targetPath, options) {
     const startTime = Date.now();
+    const { ProgressReporter } = await import('../progress.js');
+    const progress = new ProgressReporter({
+        quiet: options.quiet,
+        json: options.format !== 'text',
+    });
     try {
         // 1. Resolve path
         const absPath = path.resolve(targetPath);
@@ -267,6 +278,7 @@ export async function runIndex(targetPath, options) {
             const scannedFiles = [];
             for await (const file of scanFiles(absPath, { useIgnoreFiles: options.ignore, typeFilter: fileType })) {
                 scannedFiles.push(file);
+                progress.update(`scanning ${fileType} files... ${scannedFiles.length} found`);
             }
             totalFilesScanned += scannedFiles.length;
             if (scannedFiles.length === 0) {
@@ -305,6 +317,7 @@ export async function runIndex(targetPath, options) {
                     hashContent,
                     hashText,
                     makeChunkId,
+                    progress,
                 });
                 totalFilesIndexed += result.filesIndexed;
                 totalFilesSkipped += result.filesSkipped;
@@ -356,9 +369,12 @@ export async function runIndex(targetPath, options) {
                 }
                 if (filesToProcess.length > 0) {
                     // Load CLIP pipeline once for the batch
+                    progress.update('image: loading model...');
                     const { createImageEmbeddingPipeline } = await import('../../services/image-embedder.js');
                     const imagePipeline = await createImageEmbeddingPipeline();
-                    for (const file of filesToProcess) {
+                    for (let imgIdx = 0; imgIdx < filesToProcess.length; imgIdx++) {
+                        const file = filesToProcess[imgIdx];
+                        progress.update('image: embedding', imgIdx + 1, filesToProcess.length);
                         const buf = await fsp.readFile(file.absolutePath);
                         const fileHash = hashContent(buf);
                         const embedding = await imagePipeline.embedImage(buf);
@@ -393,6 +409,7 @@ export async function runIndex(targetPath, options) {
             emitError({ code: 'EMPTY_DIR', message: 'No supported files found in directory', suggestion: 'Ensure the directory contains supported file types (.ts, .js, .py, .go, .rs, .c, .cpp, .md, .txt, .jpg, .png, .webp)' }, format);
         }
         // 6. Optimize, close collections, THEN save manifest
+        progress.update('optimizing index...');
         col768.optimize();
         col768.close();
         if (imageFilesProcessed) {
@@ -400,6 +417,7 @@ export async function runIndex(targetPath, options) {
         }
         col512.close();
         saveManifest(absPath, manifest);
+        progress.done();
         // 7. Output results
         const durationMs = Date.now() - startTime;
         const hasChanges = totalFilesIndexed > 0 || allDeletedPaths.length > 0;
@@ -447,6 +465,7 @@ export async function runIndex(targetPath, options) {
         return output;
     }
     catch (err) {
+        progress.done();
         const { emitError } = await import('../errors.js');
         const message = err instanceof Error ? err.message : String(err);
         return emitError({ code: 'GENERAL_ERROR', message, suggestion: 'Check the error above and retry' }, options.format === 'text' ? 'text' : 'json');

package/dist/cli/progress.js ADDED Viewed

@@ -0,0 +1,33 @@
+/**
+ * Live progress reporter for CLI indexing.
+ *
+ * Writes a single updating line to stderr using \r + ANSI clear.
+ * Only active when stderr is a TTY and output isn't suppressed.
+ * Does not interfere with JSON/text output on stdout.
+ */
+const BAR_WIDTH = 20;
+export class ProgressReporter {
+    enabled;
+    constructor(opts) {
+        this.enabled = !opts.quiet && !opts.json && !!process.stderr.isTTY;
+    }
+    /** Overwrite the current line with a status message + optional progress bar. */
+    update(message, current, total) {
+        if (!this.enabled)
+            return;
+        let line = message;
+        if (total != null && current != null && total > 0) {
+            const pct = Math.min(current / total, 1);
+            const filled = Math.round(pct * BAR_WIDTH);
+            const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(BAR_WIDTH - filled);
+            line = `${message} [${bar}] ${current}/${total}`;
+        }
+        process.stderr.write(`\r\x1b[K${line}`);
+    }
+    /** Clear the progress line. Call when indexing is complete. */
+    done() {
+        if (!this.enabled)
+            return;
+        process.stderr.write('\r\x1b[K');
+    }
+}

package/dist/services/image-embedder.js CHANGED Viewed

@@ -14,7 +14,7 @@
 import { CLIPVisionModelWithProjection, CLIPTextModelWithProjection, AutoProcessor, AutoTokenizer, RawImage, env } from '@huggingface/transformers';
 import { resolveModelCachePath } from '../config/paths.js';
 // ── Constants ─────────────────────────────────────────────────────────────────
-const CLIP_MODEL_ID = 'Xenova/clip-vit-base-patch32';
+const CLIP_MODEL_ID = 'Xenova/clip-vit-base-patch16';
 const CLIP_DIM = 512;
 // ── Helpers ──────────────────────────────────────────────────────────────────
 /**
@@ -36,7 +36,7 @@ function l2Normalize(vec) {
 }
 // ── Public API ────────────────────────────────────────────────────────────────
 /**
- * Create an ImageEmbeddingPipeline backed by CLIP ViT-B/32 (fp32).
+ * Create an ImageEmbeddingPipeline backed by CLIP ViT-B/16 (fp32).
  *
  * Loads the AutoProcessor and CLIPVisionModelWithProjection in parallel.
  * Model weights are cached in ~/.ez-search/models/.
@@ -80,7 +80,7 @@ export async function createImageEmbeddingPipeline() {
     };
 }
 /**
- * Create a ClipTextPipeline backed by CLIP ViT-B/32 (fp32).
+ * Create a ClipTextPipeline backed by CLIP ViT-B/16 (fp32).
  *
  * Loads AutoTokenizer and CLIPTextModelWithProjection in parallel.
  * Used for text-to-image search: encode query text into CLIP's 512-dim space,

package/dist/services/manifest-cache.js CHANGED Viewed

@@ -13,7 +13,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, mkdirSync } from '
 import * as path from 'path';
 import { resolveProjectStoragePath } from '../config/paths.js';
 // ── Constants ─────────────────────────────────────────────────────────────────
-export const MANIFEST_VERSION = 3;
+export const MANIFEST_VERSION = 4;
 export const MANIFEST_FILENAME = 'manifest.json';
 // ── Helpers ───────────────────────────────────────────────────────────────────
 function manifestPath(projectDir) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ez-corp/ez-search",
-  "version": "1.0.8",
+  "version": "1.0.10",
   "description": "Semantic codebase search with zero cloud dependencies",
   "type": "module",
   "main": "dist/cli/index.js",