@ez-corp/ez-search 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@
18
18
  * Model routing:
19
19
  * code -> jinaai/jina-embeddings-v2-base-code, col-768
20
20
  * text -> nomic-ai/nomic-embed-text-v1.5, col-768 (prefix: "search_document: ")
21
- * image -> Xenova/clip-vit-base-patch32, col-512 (one vector per file)
21
+ * image -> Xenova/clip-vit-base-patch16, col-512 (one vector per file)
22
22
  */
23
23
  import * as path from 'path';
24
24
  import * as fsp from 'fs/promises';
@@ -30,7 +30,7 @@ const BATCH_SIZE = 32;
30
30
  * Used by both code and text pipelines (they differ only in chunker, model, prefix, tokenizer).
31
31
  */
32
32
  async function runTextEmbeddingPipeline(opts) {
33
- const { type, files, col768, manifest, hashContent, hashText, makeChunkId } = opts;
33
+ const { type, files, col768, manifest, hashContent, hashText, makeChunkId, progress } = opts;
34
34
  let filesIndexed = 0;
35
35
  let filesSkipped = 0;
36
36
  let chunksCreated = 0;
@@ -38,7 +38,9 @@ async function runTextEmbeddingPipeline(opts) {
38
38
  let chunksRemoved = 0;
39
39
  // Determine which files need processing (mtime+size fast path, hash confirmation)
40
40
  const filesToProcess = [];
41
- for (const file of files) {
41
+ for (let fi = 0; fi < files.length; fi++) {
42
+ const file = files[fi];
43
+ progress.update(`${type}: checking files`, fi + 1, files.length);
42
44
  const existing = manifest.files[file.relativePath];
43
45
  if (existing && existing.mtime === file.mtimeMs && existing.size === file.sizeBytes) {
44
46
  filesSkipped++;
@@ -190,11 +192,15 @@ async function runTextEmbeddingPipeline(opts) {
190
192
  }
191
193
  // Embed all pending chunks
192
194
  if (allPendingChunks.length > 0) {
195
+ progress.update(`${type}: loading model...`);
193
196
  const { createEmbeddingPipeline } = await import('../../services/model-router.js');
194
197
  pipe = await createEmbeddingPipeline(type);
195
198
  // Nomic requires "search_document: " prefix on indexed documents
196
199
  const prefix = type === 'text' ? 'search_document: ' : '';
200
+ const totalBatches = Math.ceil(allPendingChunks.length / BATCH_SIZE);
197
201
  for (let batchStart = 0; batchStart < allPendingChunks.length; batchStart += BATCH_SIZE) {
202
+ const batchNum = Math.floor(batchStart / BATCH_SIZE) + 1;
203
+ progress.update(`${type}: embedding`, batchNum, totalBatches);
198
204
  const batch = allPendingChunks.slice(batchStart, batchStart + BATCH_SIZE);
199
205
  const texts = batch.map((c) => prefix + c.text);
200
206
  const embeddings = await pipe.embed(texts);
@@ -226,6 +232,11 @@ async function runTextEmbeddingPipeline(opts) {
226
232
  }
227
233
  export async function runIndex(targetPath, options) {
228
234
  const startTime = Date.now();
235
+ const { ProgressReporter } = await import('../progress.js');
236
+ const progress = new ProgressReporter({
237
+ quiet: options.quiet,
238
+ json: options.format !== 'text',
239
+ });
229
240
  try {
230
241
  // 1. Resolve path
231
242
  const absPath = path.resolve(targetPath);
@@ -267,6 +278,7 @@ export async function runIndex(targetPath, options) {
267
278
  const scannedFiles = [];
268
279
  for await (const file of scanFiles(absPath, { useIgnoreFiles: options.ignore, typeFilter: fileType })) {
269
280
  scannedFiles.push(file);
281
+ progress.update(`scanning ${fileType} files... ${scannedFiles.length} found`);
270
282
  }
271
283
  totalFilesScanned += scannedFiles.length;
272
284
  if (scannedFiles.length === 0) {
@@ -305,6 +317,7 @@ export async function runIndex(targetPath, options) {
305
317
  hashContent,
306
318
  hashText,
307
319
  makeChunkId,
320
+ progress,
308
321
  });
309
322
  totalFilesIndexed += result.filesIndexed;
310
323
  totalFilesSkipped += result.filesSkipped;
@@ -356,9 +369,12 @@ export async function runIndex(targetPath, options) {
356
369
  }
357
370
  if (filesToProcess.length > 0) {
358
371
  // Load CLIP pipeline once for the batch
372
+ progress.update('image: loading model...');
359
373
  const { createImageEmbeddingPipeline } = await import('../../services/image-embedder.js');
360
374
  const imagePipeline = await createImageEmbeddingPipeline();
361
- for (const file of filesToProcess) {
375
+ for (let imgIdx = 0; imgIdx < filesToProcess.length; imgIdx++) {
376
+ const file = filesToProcess[imgIdx];
377
+ progress.update('image: embedding', imgIdx + 1, filesToProcess.length);
362
378
  const buf = await fsp.readFile(file.absolutePath);
363
379
  const fileHash = hashContent(buf);
364
380
  const embedding = await imagePipeline.embedImage(buf);
@@ -393,6 +409,7 @@ export async function runIndex(targetPath, options) {
393
409
  emitError({ code: 'EMPTY_DIR', message: 'No supported files found in directory', suggestion: 'Ensure the directory contains supported file types (.ts, .js, .py, .go, .rs, .c, .cpp, .md, .txt, .jpg, .png, .webp)' }, format);
394
410
  }
395
411
  // 6. Optimize, close collections, THEN save manifest
412
+ progress.update('optimizing index...');
396
413
  col768.optimize();
397
414
  col768.close();
398
415
  if (imageFilesProcessed) {
@@ -400,6 +417,7 @@ export async function runIndex(targetPath, options) {
400
417
  }
401
418
  col512.close();
402
419
  saveManifest(absPath, manifest);
420
+ progress.done();
403
421
  // 7. Output results
404
422
  const durationMs = Date.now() - startTime;
405
423
  const hasChanges = totalFilesIndexed > 0 || allDeletedPaths.length > 0;
@@ -447,6 +465,7 @@ export async function runIndex(targetPath, options) {
447
465
  return output;
448
466
  }
449
467
  catch (err) {
468
+ progress.done();
450
469
  const { emitError } = await import('../errors.js');
451
470
  const message = err instanceof Error ? err.message : String(err);
452
471
  return emitError({ code: 'GENERAL_ERROR', message, suggestion: 'Check the error above and retry' }, options.format === 'text' ? 'text' : 'json');
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Live progress reporter for CLI indexing.
3
+ *
4
+ * Writes a single updating line to stderr using \r + ANSI clear.
5
+ * Only active when stderr is a TTY and output isn't suppressed.
6
+ * Does not interfere with JSON/text output on stdout.
7
+ */
8
+ const BAR_WIDTH = 20;
9
+ export class ProgressReporter {
10
+ enabled;
11
+ constructor(opts) {
12
+ this.enabled = !opts.quiet && !opts.json && !!process.stderr.isTTY;
13
+ }
14
+ /** Overwrite the current line with a status message + optional progress bar. */
15
+ update(message, current, total) {
16
+ if (!this.enabled)
17
+ return;
18
+ let line = message;
19
+ if (total != null && current != null && total > 0) {
20
+ const pct = Math.min(current / total, 1);
21
+ const filled = Math.round(pct * BAR_WIDTH);
22
+ const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(BAR_WIDTH - filled);
23
+ line = `${message} [${bar}] ${current}/${total}`;
24
+ }
25
+ process.stderr.write(`\r\x1b[K${line}`);
26
+ }
27
+ /** Clear the progress line. Call when indexing is complete. */
28
+ done() {
29
+ if (!this.enabled)
30
+ return;
31
+ process.stderr.write('\r\x1b[K');
32
+ }
33
+ }
@@ -14,7 +14,7 @@
14
14
  import { CLIPVisionModelWithProjection, CLIPTextModelWithProjection, AutoProcessor, AutoTokenizer, RawImage, env } from '@huggingface/transformers';
15
15
  import { resolveModelCachePath } from '../config/paths.js';
16
16
  // ── Constants ─────────────────────────────────────────────────────────────────
17
- const CLIP_MODEL_ID = 'Xenova/clip-vit-base-patch32';
17
+ const CLIP_MODEL_ID = 'Xenova/clip-vit-base-patch16';
18
18
  const CLIP_DIM = 512;
19
19
  // ── Helpers ──────────────────────────────────────────────────────────────────
20
20
  /**
@@ -36,7 +36,7 @@ function l2Normalize(vec) {
36
36
  }
37
37
  // ── Public API ────────────────────────────────────────────────────────────────
38
38
  /**
39
- * Create an ImageEmbeddingPipeline backed by CLIP ViT-B/32 (fp32).
39
+ * Create an ImageEmbeddingPipeline backed by CLIP ViT-B/16 (fp32).
40
40
  *
41
41
  * Loads the AutoProcessor and CLIPVisionModelWithProjection in parallel.
42
42
  * Model weights are cached in ~/.ez-search/models/.
@@ -80,7 +80,7 @@ export async function createImageEmbeddingPipeline() {
80
80
  };
81
81
  }
82
82
  /**
83
- * Create a ClipTextPipeline backed by CLIP ViT-B/32 (fp32).
83
+ * Create a ClipTextPipeline backed by CLIP ViT-B/16 (fp32).
84
84
  *
85
85
  * Loads AutoTokenizer and CLIPTextModelWithProjection in parallel.
86
86
  * Used for text-to-image search: encode query text into CLIP's 512-dim space,
@@ -13,7 +13,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, mkdirSync } from '
13
13
  import * as path from 'path';
14
14
  import { resolveProjectStoragePath } from '../config/paths.js';
15
15
  // ── Constants ─────────────────────────────────────────────────────────────────
16
- export const MANIFEST_VERSION = 3;
16
+ export const MANIFEST_VERSION = 4;
17
17
  export const MANIFEST_FILENAME = 'manifest.json';
18
18
  // ── Helpers ───────────────────────────────────────────────────────────────────
19
19
  function manifestPath(projectDir) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ez-corp/ez-search",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "Semantic codebase search with zero cloud dependencies",
5
5
  "type": "module",
6
6
  "main": "dist/cli/index.js",