opencode-codebase-index 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -713,8 +713,21 @@ function getDefaultSearchConfig() {
713
713
  contextLines: 0
714
714
  };
715
715
  }
716
+ function getDefaultDebugConfig() {
717
+ return {
718
+ enabled: false,
719
+ logLevel: "info",
720
+ logSearch: true,
721
+ logEmbedding: true,
722
+ logCache: true,
723
+ logGc: true,
724
+ logBranch: true,
725
+ metrics: true
726
+ };
727
+ }
716
728
  var VALID_PROVIDERS = ["auto", "github-copilot", "openai", "google", "ollama"];
717
729
  var VALID_SCOPES = ["project", "global"];
730
+ var VALID_LOG_LEVELS = ["error", "warn", "info", "debug"];
718
731
  function isValidProvider(value) {
719
732
  return typeof value === "string" && VALID_PROVIDERS.includes(value);
720
733
  }
@@ -724,10 +737,14 @@ function isValidScope(value) {
724
737
  function isStringArray(value) {
725
738
  return Array.isArray(value) && value.every((item) => typeof item === "string");
726
739
  }
740
+ function isValidLogLevel(value) {
741
+ return typeof value === "string" && VALID_LOG_LEVELS.includes(value);
742
+ }
727
743
  function parseConfig(raw) {
728
744
  const input = raw && typeof raw === "object" ? raw : {};
729
745
  const defaultIndexing = getDefaultIndexingConfig();
730
746
  const defaultSearch = getDefaultSearchConfig();
747
+ const defaultDebug = getDefaultDebugConfig();
731
748
  const rawIndexing = input.indexing && typeof input.indexing === "object" ? input.indexing : {};
732
749
  const indexing = {
733
750
  autoIndex: typeof rawIndexing.autoIndex === "boolean" ? rawIndexing.autoIndex : defaultIndexing.autoIndex,
@@ -749,6 +766,17 @@ function parseConfig(raw) {
749
766
  hybridWeight: typeof rawSearch.hybridWeight === "number" ? Math.min(1, Math.max(0, rawSearch.hybridWeight)) : defaultSearch.hybridWeight,
750
767
  contextLines: typeof rawSearch.contextLines === "number" ? Math.min(50, Math.max(0, rawSearch.contextLines)) : defaultSearch.contextLines
751
768
  };
769
+ const rawDebug = input.debug && typeof input.debug === "object" ? input.debug : {};
770
+ const debug = {
771
+ enabled: typeof rawDebug.enabled === "boolean" ? rawDebug.enabled : defaultDebug.enabled,
772
+ logLevel: isValidLogLevel(rawDebug.logLevel) ? rawDebug.logLevel : defaultDebug.logLevel,
773
+ logSearch: typeof rawDebug.logSearch === "boolean" ? rawDebug.logSearch : defaultDebug.logSearch,
774
+ logEmbedding: typeof rawDebug.logEmbedding === "boolean" ? rawDebug.logEmbedding : defaultDebug.logEmbedding,
775
+ logCache: typeof rawDebug.logCache === "boolean" ? rawDebug.logCache : defaultDebug.logCache,
776
+ logGc: typeof rawDebug.logGc === "boolean" ? rawDebug.logGc : defaultDebug.logGc,
777
+ logBranch: typeof rawDebug.logBranch === "boolean" ? rawDebug.logBranch : defaultDebug.logBranch,
778
+ metrics: typeof rawDebug.metrics === "boolean" ? rawDebug.metrics : defaultDebug.metrics
779
+ };
752
780
  return {
753
781
  embeddingProvider: isValidProvider(input.embeddingProvider) ? input.embeddingProvider : "auto",
754
782
  embeddingModel: typeof input.embeddingModel === "string" ? input.embeddingModel : "auto",
@@ -756,7 +784,8 @@ function parseConfig(raw) {
756
784
  include: isStringArray(input.include) ? input.include : DEFAULT_INCLUDE,
757
785
  exclude: isStringArray(input.exclude) ? input.exclude : DEFAULT_EXCLUDE,
758
786
  indexing,
759
- search
787
+ search,
788
+ debug
760
789
  };
761
790
  }
762
791
  var EMBEDDING_MODELS = {
@@ -821,6 +850,7 @@ function getDefaultModelForProvider(provider) {
821
850
  // src/indexer/index.ts
822
851
  var import_fs4 = require("fs");
823
852
  var path5 = __toESM(require("path"), 1);
853
+ var import_perf_hooks = require("perf_hooks");
824
854
 
825
855
  // node_modules/eventemitter3/index.mjs
826
856
  var import_index = __toESM(require_eventemitter3(), 1);
@@ -2330,6 +2360,298 @@ function padRight(str, length) {
2330
2360
  return str.padEnd(length);
2331
2361
  }
2332
2362
 
2363
+ // src/utils/logger.ts
2364
+ var LOG_LEVEL_PRIORITY = {
2365
+ error: 0,
2366
+ warn: 1,
2367
+ info: 2,
2368
+ debug: 3
2369
+ };
2370
+ function createEmptyMetrics() {
2371
+ return {
2372
+ filesScanned: 0,
2373
+ filesParsed: 0,
2374
+ parseMs: 0,
2375
+ chunksProcessed: 0,
2376
+ chunksEmbedded: 0,
2377
+ chunksFromCache: 0,
2378
+ chunksRemoved: 0,
2379
+ embeddingApiCalls: 0,
2380
+ embeddingTokensUsed: 0,
2381
+ embeddingErrors: 0,
2382
+ searchCount: 0,
2383
+ searchTotalMs: 0,
2384
+ searchAvgMs: 0,
2385
+ searchLastMs: 0,
2386
+ embeddingCallMs: 0,
2387
+ vectorSearchMs: 0,
2388
+ keywordSearchMs: 0,
2389
+ fusionMs: 0,
2390
+ cacheHits: 0,
2391
+ cacheMisses: 0,
2392
+ queryCacheHits: 0,
2393
+ queryCacheSimilarHits: 0,
2394
+ queryCacheMisses: 0,
2395
+ gcRuns: 0,
2396
+ gcOrphansRemoved: 0,
2397
+ gcChunksRemoved: 0,
2398
+ gcEmbeddingsRemoved: 0
2399
+ };
2400
+ }
2401
+ var Logger = class {
2402
+ config;
2403
+ metrics;
2404
+ logs = [];
2405
+ maxLogs = 1e3;
2406
+ constructor(config) {
2407
+ this.config = config;
2408
+ this.metrics = createEmptyMetrics();
2409
+ }
2410
+ shouldLog(level) {
2411
+ if (!this.config.enabled) return false;
2412
+ return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[this.config.logLevel];
2413
+ }
2414
+ log(level, category, message, data) {
2415
+ if (!this.shouldLog(level)) return;
2416
+ const entry = {
2417
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2418
+ level,
2419
+ category,
2420
+ message,
2421
+ data
2422
+ };
2423
+ this.logs.push(entry);
2424
+ if (this.logs.length > this.maxLogs) {
2425
+ this.logs.shift();
2426
+ }
2427
+ }
2428
+ search(level, message, data) {
2429
+ if (this.config.logSearch) {
2430
+ this.log(level, "search", message, data);
2431
+ }
2432
+ }
2433
+ embedding(level, message, data) {
2434
+ if (this.config.logEmbedding) {
2435
+ this.log(level, "embedding", message, data);
2436
+ }
2437
+ }
2438
+ cache(level, message, data) {
2439
+ if (this.config.logCache) {
2440
+ this.log(level, "cache", message, data);
2441
+ }
2442
+ }
2443
+ gc(level, message, data) {
2444
+ if (this.config.logGc) {
2445
+ this.log(level, "gc", message, data);
2446
+ }
2447
+ }
2448
+ branch(level, message, data) {
2449
+ if (this.config.logBranch) {
2450
+ this.log(level, "branch", message, data);
2451
+ }
2452
+ }
2453
+ info(message, data) {
2454
+ this.log("info", "general", message, data);
2455
+ }
2456
+ warn(message, data) {
2457
+ this.log("warn", "general", message, data);
2458
+ }
2459
+ error(message, data) {
2460
+ this.log("error", "general", message, data);
2461
+ }
2462
+ debug(message, data) {
2463
+ this.log("debug", "general", message, data);
2464
+ }
2465
+ recordIndexingStart() {
2466
+ if (!this.config.metrics) return;
2467
+ this.metrics.indexingStartTime = Date.now();
2468
+ }
2469
+ recordIndexingEnd() {
2470
+ if (!this.config.metrics) return;
2471
+ this.metrics.indexingEndTime = Date.now();
2472
+ }
2473
+ recordFilesScanned(count) {
2474
+ if (!this.config.metrics) return;
2475
+ this.metrics.filesScanned = count;
2476
+ }
2477
+ recordFilesParsed(count) {
2478
+ if (!this.config.metrics) return;
2479
+ this.metrics.filesParsed = count;
2480
+ }
2481
+ recordParseDuration(durationMs) {
2482
+ if (!this.config.metrics) return;
2483
+ this.metrics.parseMs = durationMs;
2484
+ }
2485
+ recordChunksProcessed(count) {
2486
+ if (!this.config.metrics) return;
2487
+ this.metrics.chunksProcessed += count;
2488
+ }
2489
+ recordChunksEmbedded(count) {
2490
+ if (!this.config.metrics) return;
2491
+ this.metrics.chunksEmbedded += count;
2492
+ }
2493
+ recordChunksFromCache(count) {
2494
+ if (!this.config.metrics) return;
2495
+ this.metrics.chunksFromCache += count;
2496
+ }
2497
+ recordChunksRemoved(count) {
2498
+ if (!this.config.metrics) return;
2499
+ this.metrics.chunksRemoved += count;
2500
+ }
2501
+ recordEmbeddingApiCall(tokens) {
2502
+ if (!this.config.metrics) return;
2503
+ this.metrics.embeddingApiCalls++;
2504
+ this.metrics.embeddingTokensUsed += tokens;
2505
+ }
2506
+ recordEmbeddingError() {
2507
+ if (!this.config.metrics) return;
2508
+ this.metrics.embeddingErrors++;
2509
+ }
2510
+ recordSearch(durationMs, breakdown) {
2511
+ if (!this.config.metrics) return;
2512
+ this.metrics.searchCount++;
2513
+ this.metrics.searchTotalMs += durationMs;
2514
+ this.metrics.searchLastMs = durationMs;
2515
+ this.metrics.searchAvgMs = this.metrics.searchTotalMs / this.metrics.searchCount;
2516
+ if (breakdown) {
2517
+ this.metrics.embeddingCallMs = breakdown.embeddingMs;
2518
+ this.metrics.vectorSearchMs = breakdown.vectorMs;
2519
+ this.metrics.keywordSearchMs = breakdown.keywordMs;
2520
+ this.metrics.fusionMs = breakdown.fusionMs;
2521
+ }
2522
+ }
2523
+ recordCacheHit() {
2524
+ if (!this.config.metrics) return;
2525
+ this.metrics.cacheHits++;
2526
+ }
2527
+ recordCacheMiss() {
2528
+ if (!this.config.metrics) return;
2529
+ this.metrics.cacheMisses++;
2530
+ }
2531
+ recordQueryCacheHit() {
2532
+ if (!this.config.metrics) return;
2533
+ this.metrics.queryCacheHits++;
2534
+ }
2535
+ recordQueryCacheSimilarHit() {
2536
+ if (!this.config.metrics) return;
2537
+ this.metrics.queryCacheSimilarHits++;
2538
+ }
2539
+ recordQueryCacheMiss() {
2540
+ if (!this.config.metrics) return;
2541
+ this.metrics.queryCacheMisses++;
2542
+ }
2543
+ recordGc(orphans, chunks, embeddings) {
2544
+ if (!this.config.metrics) return;
2545
+ this.metrics.gcRuns++;
2546
+ this.metrics.gcOrphansRemoved += orphans;
2547
+ this.metrics.gcChunksRemoved += chunks;
2548
+ this.metrics.gcEmbeddingsRemoved += embeddings;
2549
+ }
2550
+ getMetrics() {
2551
+ return { ...this.metrics };
2552
+ }
2553
+ getLogs(limit) {
2554
+ const logs = [...this.logs];
2555
+ if (limit) {
2556
+ return logs.slice(-limit);
2557
+ }
2558
+ return logs;
2559
+ }
2560
+ getLogsByCategory(category, limit) {
2561
+ const filtered = this.logs.filter((l) => l.category === category);
2562
+ if (limit) {
2563
+ return filtered.slice(-limit);
2564
+ }
2565
+ return filtered;
2566
+ }
2567
+ getLogsByLevel(level, limit) {
2568
+ const filtered = this.logs.filter((l) => l.level === level);
2569
+ if (limit) {
2570
+ return filtered.slice(-limit);
2571
+ }
2572
+ return filtered;
2573
+ }
2574
+ resetMetrics() {
2575
+ this.metrics = createEmptyMetrics();
2576
+ }
2577
+ clearLogs() {
2578
+ this.logs = [];
2579
+ }
2580
+ formatMetrics() {
2581
+ const m = this.metrics;
2582
+ const lines = [];
2583
+ lines.push("=== Metrics ===");
2584
+ if (m.indexingStartTime && m.indexingEndTime) {
2585
+ const duration = m.indexingEndTime - m.indexingStartTime;
2586
+ lines.push(`Indexing duration: ${(duration / 1e3).toFixed(2)}s`);
2587
+ }
2588
+ lines.push("");
2589
+ lines.push("Indexing:");
2590
+ lines.push(` Files scanned: ${m.filesScanned}`);
2591
+ lines.push(` Files parsed: ${m.filesParsed}`);
2592
+ lines.push(` Chunks processed: ${m.chunksProcessed}`);
2593
+ lines.push(` Chunks embedded: ${m.chunksEmbedded}`);
2594
+ lines.push(` Chunks from cache: ${m.chunksFromCache}`);
2595
+ lines.push(` Chunks removed: ${m.chunksRemoved}`);
2596
+ lines.push("");
2597
+ lines.push("Embedding API:");
2598
+ lines.push(` API calls: ${m.embeddingApiCalls}`);
2599
+ lines.push(` Tokens used: ${m.embeddingTokensUsed.toLocaleString()}`);
2600
+ lines.push(` Errors: ${m.embeddingErrors}`);
2601
+ if (m.searchCount > 0) {
2602
+ lines.push("");
2603
+ lines.push("Search:");
2604
+ lines.push(` Total searches: ${m.searchCount}`);
2605
+ lines.push(` Average time: ${m.searchAvgMs.toFixed(2)}ms`);
2606
+ lines.push(` Last search: ${m.searchLastMs.toFixed(2)}ms`);
2607
+ if (m.embeddingCallMs > 0) {
2608
+ lines.push(` - Embedding: ${m.embeddingCallMs.toFixed(2)}ms`);
2609
+ lines.push(` - Vector search: ${m.vectorSearchMs.toFixed(2)}ms`);
2610
+ lines.push(` - Keyword search: ${m.keywordSearchMs.toFixed(2)}ms`);
2611
+ lines.push(` - Fusion: ${m.fusionMs.toFixed(2)}ms`);
2612
+ }
2613
+ }
2614
+ const totalCacheOps = m.cacheHits + m.cacheMisses;
2615
+ if (totalCacheOps > 0) {
2616
+ lines.push("");
2617
+ lines.push("Cache:");
2618
+ lines.push(` Hits: ${m.cacheHits}`);
2619
+ lines.push(` Misses: ${m.cacheMisses}`);
2620
+ lines.push(` Hit rate: ${(m.cacheHits / totalCacheOps * 100).toFixed(1)}%`);
2621
+ }
2622
+ if (m.gcRuns > 0) {
2623
+ lines.push("");
2624
+ lines.push("Garbage Collection:");
2625
+ lines.push(` GC runs: ${m.gcRuns}`);
2626
+ lines.push(` Orphans removed: ${m.gcOrphansRemoved}`);
2627
+ lines.push(` Chunks removed: ${m.gcChunksRemoved}`);
2628
+ lines.push(` Embeddings removed: ${m.gcEmbeddingsRemoved}`);
2629
+ }
2630
+ return lines.join("\n");
2631
+ }
2632
+ formatRecentLogs(limit = 20) {
2633
+ const logs = this.getLogs(limit);
2634
+ if (logs.length === 0) {
2635
+ return "No logs recorded.";
2636
+ }
2637
+ return logs.map((l) => {
2638
+ const dataStr = l.data ? ` ${JSON.stringify(l.data)}` : "";
2639
+ return `[${l.timestamp}] [${l.level.toUpperCase()}] [${l.category}] ${l.message}${dataStr}`;
2640
+ }).join("\n");
2641
+ }
2642
+ isEnabled() {
2643
+ return this.config.enabled;
2644
+ }
2645
+ isMetricsEnabled() {
2646
+ return this.config.enabled && this.config.metrics;
2647
+ }
2648
+ };
2649
+ var globalLogger = null;
2650
+ function initializeLogger(config) {
2651
+ globalLogger = new Logger(config);
2652
+ return globalLogger;
2653
+ }
2654
+
2333
2655
  // src/native/index.ts
2334
2656
  var path3 = __toESM(require("path"), 1);
2335
2657
  var os2 = __toESM(require("os"), 1);
@@ -2459,6 +2781,21 @@ var VectorStore = class {
2459
2781
  metadata: JSON.parse(r.metadata)
2460
2782
  }));
2461
2783
  }
2784
+ getMetadata(id) {
2785
+ const result = this.inner.getMetadata(id);
2786
+ if (result === null || result === void 0) {
2787
+ return void 0;
2788
+ }
2789
+ return JSON.parse(result);
2790
+ }
2791
+ getMetadataBatch(ids) {
2792
+ const results = this.inner.getMetadataBatch(ids);
2793
+ const map = /* @__PURE__ */ new Map();
2794
+ for (const { key, metadata } of results) {
2795
+ map.set(key, JSON.parse(metadata));
2796
+ }
2797
+ return map;
2798
+ }
2462
2799
  };
2463
2800
  var CHARS_PER_TOKEN = 4;
2464
2801
  var MAX_BATCH_TOKENS = 7500;
@@ -2857,12 +3194,18 @@ var Indexer = class {
2857
3194
  failedBatchesPath = "";
2858
3195
  currentBranch = "default";
2859
3196
  baseBranch = "main";
3197
+ logger;
3198
+ queryEmbeddingCache = /* @__PURE__ */ new Map();
3199
+ maxQueryCacheSize = 100;
3200
+ queryCacheTtlMs = 5 * 60 * 1e3;
3201
+ querySimilarityThreshold = 0.85;
2860
3202
  constructor(projectRoot, config) {
2861
3203
  this.projectRoot = projectRoot;
2862
3204
  this.config = config;
2863
3205
  this.indexPath = this.getIndexPath();
2864
3206
  this.fileHashCachePath = path5.join(this.indexPath, "file-hashes.json");
2865
3207
  this.failedBatchesPath = path5.join(this.indexPath, "failed-batches.json");
3208
+ this.logger = initializeLogger(config.debug);
2866
3209
  }
2867
3210
  getIndexPath() {
2868
3211
  if (this.config.scope === "global") {
@@ -2941,6 +3284,11 @@ var Indexer = class {
2941
3284
  "No embedding provider available. Configure GitHub, OpenAI, Google, or Ollama."
2942
3285
  );
2943
3286
  }
3287
+ this.logger.info("Initializing indexer", {
3288
+ provider: this.detectedProvider.provider,
3289
+ model: this.detectedProvider.modelInfo.model,
3290
+ scope: this.config.scope
3291
+ });
2944
3292
  this.provider = createEmbeddingProvider(
2945
3293
  this.detectedProvider.credentials,
2946
3294
  this.detectedProvider.modelInfo
@@ -2972,9 +3320,14 @@ var Indexer = class {
2972
3320
  if (isGitRepo(this.projectRoot)) {
2973
3321
  this.currentBranch = getBranchOrDefault(this.projectRoot);
2974
3322
  this.baseBranch = getBaseBranch(this.projectRoot);
3323
+ this.logger.branch("info", "Detected git repository", {
3324
+ currentBranch: this.currentBranch,
3325
+ baseBranch: this.baseBranch
3326
+ });
2975
3327
  } else {
2976
3328
  this.currentBranch = "default";
2977
3329
  this.baseBranch = "default";
3330
+ this.logger.branch("debug", "Not a git repository, using default branch");
2978
3331
  }
2979
3332
  if (this.config.indexing.autoGc) {
2980
3333
  await this.maybeRunAutoGc();
@@ -3058,6 +3411,8 @@ var Indexer = class {
3058
3411
  }
3059
3412
  async index(onProgress) {
3060
3413
  const { store, provider, invertedIndex, database, detectedProvider } = await this.ensureInitialized();
3414
+ this.logger.recordIndexingStart();
3415
+ this.logger.info("Starting indexing", { projectRoot: this.projectRoot });
3061
3416
  const startTime = Date.now();
3062
3417
  const stats = {
3063
3418
  totalFiles: 0,
@@ -3087,6 +3442,11 @@ var Indexer = class {
3087
3442
  );
3088
3443
  stats.totalFiles = files.length;
3089
3444
  stats.skippedFiles = skipped;
3445
+ this.logger.recordFilesScanned(files.length);
3446
+ this.logger.cache("debug", "Scanning files for changes", {
3447
+ totalFiles: files.length,
3448
+ skippedFiles: skipped.length
3449
+ });
3090
3450
  const changedFiles = [];
3091
3451
  const unchangedFilePaths = /* @__PURE__ */ new Set();
3092
3452
  const currentFileHashes = /* @__PURE__ */ new Map();
@@ -3095,11 +3455,17 @@ var Indexer = class {
3095
3455
  currentFileHashes.set(f.path, currentHash);
3096
3456
  if (this.fileHashCache.get(f.path) === currentHash) {
3097
3457
  unchangedFilePaths.add(f.path);
3458
+ this.logger.recordCacheHit();
3098
3459
  } else {
3099
3460
  const content = await import_fs4.promises.readFile(f.path, "utf-8");
3100
3461
  changedFiles.push({ path: f.path, content, hash: currentHash });
3462
+ this.logger.recordCacheMiss();
3101
3463
  }
3102
3464
  }
3465
+ this.logger.cache("info", "File hash cache results", {
3466
+ unchanged: unchangedFilePaths.size,
3467
+ changed: changedFiles.length
3468
+ });
3103
3469
  onProgress?.({
3104
3470
  phase: "parsing",
3105
3471
  filesProcessed: 0,
@@ -3107,7 +3473,12 @@ var Indexer = class {
3107
3473
  chunksProcessed: 0,
3108
3474
  totalChunks: 0
3109
3475
  });
3476
+ const parseStartTime = import_perf_hooks.performance.now();
3110
3477
  const parsedFiles = parseFiles(changedFiles);
3478
+ const parseMs = import_perf_hooks.performance.now() - parseStartTime;
3479
+ this.logger.recordFilesParsed(parsedFiles.length);
3480
+ this.logger.recordParseDuration(parseMs);
3481
+ this.logger.debug("Parsed changed files", { parsedCount: parsedFiles.length, parseMs: parseMs.toFixed(2) });
3111
3482
  const existingChunks = /* @__PURE__ */ new Map();
3112
3483
  const existingChunksByFile = /* @__PURE__ */ new Map();
3113
3484
  for (const { key, metadata } of store.getAllMetadata()) {
@@ -3189,6 +3560,13 @@ var Indexer = class {
3189
3560
  stats.totalChunks = pendingChunks.length;
3190
3561
  stats.existingChunks = currentChunkIds.size - pendingChunks.length;
3191
3562
  stats.removedChunks = removedCount;
3563
+ this.logger.recordChunksProcessed(currentChunkIds.size);
3564
+ this.logger.recordChunksRemoved(removedCount);
3565
+ this.logger.info("Chunk analysis complete", {
3566
+ pending: pendingChunks.length,
3567
+ existing: stats.existingChunks,
3568
+ removed: removedCount
3569
+ });
3192
3570
  if (pendingChunks.length === 0 && removedCount === 0) {
3193
3571
  database.clearBranch(this.currentBranch);
3194
3572
  database.addChunksToBranchBatch(this.currentBranch, Array.from(currentChunkIds));
@@ -3232,6 +3610,11 @@ var Indexer = class {
3232
3610
  const missingHashes = new Set(database.getMissingEmbeddings(allContentHashes));
3233
3611
  const chunksNeedingEmbedding = pendingChunks.filter((c) => missingHashes.has(c.contentHash));
3234
3612
  const chunksWithExistingEmbedding = pendingChunks.filter((c) => !missingHashes.has(c.contentHash));
3613
+ this.logger.cache("info", "Embedding cache lookup", {
3614
+ needsEmbedding: chunksNeedingEmbedding.length,
3615
+ fromCache: chunksWithExistingEmbedding.length
3616
+ });
3617
+ this.logger.recordChunksFromCache(chunksWithExistingEmbedding.length);
3235
3618
  for (const chunk of chunksWithExistingEmbedding) {
3236
3619
  const embeddingBuffer = database.getEmbedding(chunk.contentHash);
3237
3620
  if (embeddingBuffer) {
@@ -3270,13 +3653,16 @@ var Indexer = class {
3270
3653
  const message = getErrorMessage(error);
3271
3654
  if (isRateLimitError(error)) {
3272
3655
  rateLimitBackoffMs = Math.min(providerRateLimits.maxRetryMs, (rateLimitBackoffMs || providerRateLimits.minRetryMs) * 2);
3273
- console.error(
3274
- `Rate limited (attempt ${error.attemptNumber}/${error.retriesLeft + error.attemptNumber}): waiting ${rateLimitBackoffMs / 1e3}s before retry...`
3275
- );
3656
+ this.logger.embedding("warn", `Rate limited, backing off`, {
3657
+ attempt: error.attemptNumber,
3658
+ retriesLeft: error.retriesLeft,
3659
+ backoffMs: rateLimitBackoffMs
3660
+ });
3276
3661
  } else {
3277
- console.error(
3278
- `Embedding batch failed (attempt ${error.attemptNumber}): ${message}`
3279
- );
3662
+ this.logger.embedding("error", `Embedding batch failed`, {
3663
+ attempt: error.attemptNumber,
3664
+ error: message
3665
+ });
3280
3666
  }
3281
3667
  }
3282
3668
  }
@@ -3303,6 +3689,12 @@ var Indexer = class {
3303
3689
  }
3304
3690
  stats.indexedChunks += batch.length;
3305
3691
  stats.tokensUsed += result.totalTokensUsed;
3692
+ this.logger.recordChunksEmbedded(batch.length);
3693
+ this.logger.recordEmbeddingApiCall(result.totalTokensUsed);
3694
+ this.logger.embedding("debug", `Embedded batch`, {
3695
+ batchSize: batch.length,
3696
+ tokens: result.totalTokensUsed
3697
+ });
3306
3698
  onProgress?.({
3307
3699
  phase: "embedding",
3308
3700
  filesProcessed: files.length,
@@ -3313,7 +3705,11 @@ var Indexer = class {
3313
3705
  } catch (error) {
3314
3706
  stats.failedChunks += batch.length;
3315
3707
  this.addFailedBatch(batch, getErrorMessage(error));
3316
- console.error(`Failed to embed batch after retries: ${getErrorMessage(error)}`);
3708
+ this.logger.recordEmbeddingError();
3709
+ this.logger.embedding("error", `Failed to embed batch after retries`, {
3710
+ batchSize: batch.length,
3711
+ error: getErrorMessage(error)
3712
+ });
3317
3713
  }
3318
3714
  });
3319
3715
  }
@@ -3335,6 +3731,16 @@ var Indexer = class {
3335
3731
  await this.maybeRunOrphanGc();
3336
3732
  }
3337
3733
  stats.durationMs = Date.now() - startTime;
3734
+ this.logger.recordIndexingEnd();
3735
+ this.logger.info("Indexing complete", {
3736
+ files: stats.totalFiles,
3737
+ indexed: stats.indexedChunks,
3738
+ existing: stats.existingChunks,
3739
+ removed: stats.removedChunks,
3740
+ failed: stats.failedChunks,
3741
+ tokens: stats.tokensUsed,
3742
+ durationMs: stats.durationMs
3743
+ });
3338
3744
  if (stats.failedChunks > 0) {
3339
3745
  stats.failedBatchesPath = this.failedBatchesPath;
3340
3746
  }
@@ -3347,18 +3753,96 @@ var Indexer = class {
3347
3753
  });
3348
3754
  return stats;
3349
3755
  }
3756
+ async getQueryEmbedding(query, provider) {
3757
+ const now = Date.now();
3758
+ const cached = this.queryEmbeddingCache.get(query);
3759
+ if (cached && now - cached.timestamp < this.queryCacheTtlMs) {
3760
+ this.logger.cache("debug", "Query embedding cache hit (exact)", { query: query.slice(0, 50) });
3761
+ this.logger.recordQueryCacheHit();
3762
+ return cached.embedding;
3763
+ }
3764
+ const similarMatch = this.findSimilarCachedQuery(query, now);
3765
+ if (similarMatch) {
3766
+ this.logger.cache("debug", "Query embedding cache hit (similar)", {
3767
+ query: query.slice(0, 50),
3768
+ similarTo: similarMatch.key.slice(0, 50),
3769
+ similarity: similarMatch.similarity.toFixed(3)
3770
+ });
3771
+ this.logger.recordQueryCacheSimilarHit();
3772
+ return similarMatch.embedding;
3773
+ }
3774
+ this.logger.cache("debug", "Query embedding cache miss", { query: query.slice(0, 50) });
3775
+ this.logger.recordQueryCacheMiss();
3776
+ const { embedding, tokensUsed } = await provider.embed(query);
3777
+ this.logger.recordEmbeddingApiCall(tokensUsed);
3778
+ if (this.queryEmbeddingCache.size >= this.maxQueryCacheSize) {
3779
+ const oldestKey = this.queryEmbeddingCache.keys().next().value;
3780
+ if (oldestKey) {
3781
+ this.queryEmbeddingCache.delete(oldestKey);
3782
+ }
3783
+ }
3784
+ this.queryEmbeddingCache.set(query, { embedding, timestamp: now });
3785
+ return embedding;
3786
+ }
3787
+ findSimilarCachedQuery(query, now) {
3788
+ const queryTokens = this.tokenize(query);
3789
+ if (queryTokens.size === 0) return null;
3790
+ let bestMatch = null;
3791
+ for (const [cachedQuery, { embedding, timestamp }] of this.queryEmbeddingCache) {
3792
+ if (now - timestamp >= this.queryCacheTtlMs) continue;
3793
+ const cachedTokens = this.tokenize(cachedQuery);
3794
+ const similarity = this.jaccardSimilarity(queryTokens, cachedTokens);
3795
+ if (similarity >= this.querySimilarityThreshold) {
3796
+ if (!bestMatch || similarity > bestMatch.similarity) {
3797
+ bestMatch = { key: cachedQuery, embedding, similarity };
3798
+ }
3799
+ }
3800
+ }
3801
+ return bestMatch;
3802
+ }
3803
+ tokenize(text) {
3804
+ return new Set(
3805
+ text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((t) => t.length > 1)
3806
+ );
3807
+ }
3808
+ jaccardSimilarity(a, b) {
3809
+ if (a.size === 0 && b.size === 0) return 1;
3810
+ if (a.size === 0 || b.size === 0) return 0;
3811
+ let intersection = 0;
3812
+ for (const token of a) {
3813
+ if (b.has(token)) intersection++;
3814
+ }
3815
+ const union = a.size + b.size - intersection;
3816
+ return intersection / union;
3817
+ }
3350
3818
  async search(query, limit, options) {
3819
+ const searchStartTime = import_perf_hooks.performance.now();
3351
3820
  const { store, provider, database } = await this.ensureInitialized();
3352
3821
  if (store.count() === 0) {
3822
+ this.logger.search("debug", "Search on empty index", { query });
3353
3823
  return [];
3354
3824
  }
3355
3825
  const maxResults = limit ?? this.config.search.maxResults;
3356
3826
  const hybridWeight = options?.hybridWeight ?? this.config.search.hybridWeight;
3357
3827
  const filterByBranch = options?.filterByBranch ?? true;
3358
- const { embedding } = await provider.embed(query);
3828
+ this.logger.search("debug", "Starting search", {
3829
+ query,
3830
+ maxResults,
3831
+ hybridWeight,
3832
+ filterByBranch
3833
+ });
3834
+ const embeddingStartTime = import_perf_hooks.performance.now();
3835
+ const embedding = await this.getQueryEmbedding(query, provider);
3836
+ const embeddingMs = import_perf_hooks.performance.now() - embeddingStartTime;
3837
+ const vectorStartTime = import_perf_hooks.performance.now();
3359
3838
  const semanticResults = store.search(embedding, maxResults * 4);
3839
+ const vectorMs = import_perf_hooks.performance.now() - vectorStartTime;
3840
+ const keywordStartTime = import_perf_hooks.performance.now();
3360
3841
  const keywordResults = await this.keywordSearch(query, maxResults * 4);
3842
+ const keywordMs = import_perf_hooks.performance.now() - keywordStartTime;
3843
+ const fusionStartTime = import_perf_hooks.performance.now();
3361
3844
  const combined = this.fuseResults(semanticResults, keywordResults, hybridWeight, maxResults * 4);
3845
+ const fusionMs = import_perf_hooks.performance.now() - fusionStartTime;
3362
3846
  let branchChunkIds = null;
3363
3847
  if (filterByBranch && this.currentBranch !== "default") {
3364
3848
  branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
@@ -3379,6 +3863,22 @@ var Indexer = class {
3379
3863
  }
3380
3864
  return true;
3381
3865
  }).slice(0, maxResults);
3866
+ const totalSearchMs = import_perf_hooks.performance.now() - searchStartTime;
3867
+ this.logger.recordSearch(totalSearchMs, {
3868
+ embeddingMs,
3869
+ vectorMs,
3870
+ keywordMs,
3871
+ fusionMs
3872
+ });
3873
+ this.logger.search("info", "Search complete", {
3874
+ query,
3875
+ results: filtered.length,
3876
+ totalMs: Math.round(totalSearchMs * 100) / 100,
3877
+ embeddingMs: Math.round(embeddingMs * 100) / 100,
3878
+ vectorMs: Math.round(vectorMs * 100) / 100,
3879
+ keywordMs: Math.round(keywordMs * 100) / 100,
3880
+ fusionMs: Math.round(fusionMs * 100) / 100
3881
+ });
3382
3882
  return Promise.all(
3383
3883
  filtered.map(async (r) => {
3384
3884
  let content = "";
@@ -3417,11 +3917,8 @@ var Indexer = class {
3417
3917
  if (scores.size === 0) {
3418
3918
  return [];
3419
3919
  }
3420
- const allMetadata = store.getAllMetadata();
3421
- const metadataMap = /* @__PURE__ */ new Map();
3422
- for (const { key, metadata } of allMetadata) {
3423
- metadataMap.set(key, metadata);
3424
- }
3920
+ const chunkIds = Array.from(scores.keys());
3921
+ const metadataMap = store.getMetadataBatch(chunkIds);
3425
3922
  const results = [];
3426
3923
  for (const [chunkId, score] of scores) {
3427
3924
  const metadata = metadataMap.get(chunkId);
@@ -3484,6 +3981,7 @@ var Indexer = class {
3484
3981
  }
3485
3982
  async healthCheck() {
3486
3983
  const { store, invertedIndex, database } = await this.ensureInitialized();
3984
+ this.logger.gc("info", "Starting health check");
3487
3985
  const allMetadata = store.getAllMetadata();
3488
3986
  const filePathsToChunkKeys = /* @__PURE__ */ new Map();
3489
3987
  for (const { key, metadata } of allMetadata) {
@@ -3510,6 +4008,13 @@ var Indexer = class {
3510
4008
  }
3511
4009
  const gcOrphanEmbeddings = database.gcOrphanEmbeddings();
3512
4010
  const gcOrphanChunks = database.gcOrphanChunks();
4011
+ this.logger.recordGc(removedCount, gcOrphanChunks, gcOrphanEmbeddings);
4012
+ this.logger.gc("info", "Health check complete", {
4013
+ removedStale: removedCount,
4014
+ orphanEmbeddings: gcOrphanEmbeddings,
4015
+ orphanChunks: gcOrphanChunks,
4016
+ removedFiles: removedFilePaths.length
4017
+ });
3513
4018
  return { removed: removedCount, filePaths: removedFilePaths, gcOrphanEmbeddings, gcOrphanChunks };
3514
4019
  }
3515
4020
  async retryFailedBatches() {
@@ -3543,9 +4048,12 @@ var Indexer = class {
3543
4048
  invertedIndex.removeChunk(chunk.id);
3544
4049
  invertedIndex.addChunk(chunk.id, chunk.content);
3545
4050
  }
4051
+ this.logger.recordChunksEmbedded(batch.chunks.length);
4052
+ this.logger.recordEmbeddingApiCall(result.totalTokensUsed);
3546
4053
  succeeded += batch.chunks.length;
3547
4054
  } catch (error) {
3548
4055
  failed += batch.chunks.length;
4056
+ this.logger.recordEmbeddingError();
3549
4057
  stillFailing.push({
3550
4058
  ...batch,
3551
4059
  attemptCount: batch.attemptCount + 1,
@@ -3580,6 +4088,9 @@ var Indexer = class {
3580
4088
  const { database } = await this.ensureInitialized();
3581
4089
  return database.getStats();
3582
4090
  }
4091
+ getLogger() {
4092
+ return this.logger;
4093
+ }
3583
4094
  };
3584
4095
 
3585
4096
  // node_modules/chokidar/index.js
@@ -5619,6 +6130,51 @@ var index_health_check = (0, import_plugin.tool)({
5619
6130
  return lines.join("\n");
5620
6131
  }
5621
6132
  });
6133
+ var index_metrics = (0, import_plugin.tool)({
6134
+ description: "Get metrics and performance statistics for the codebase index. Shows indexing stats, search timings, cache hit rates, and API usage. Requires debug.enabled=true and debug.metrics=true in config.",
6135
+ args: {},
6136
+ async execute() {
6137
+ const indexer = getIndexer();
6138
+ const logger = indexer.getLogger();
6139
+ if (!logger.isEnabled()) {
6140
+ return 'Debug mode is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true,\n "metrics": true\n }\n}\n```';
6141
+ }
6142
+ if (!logger.isMetricsEnabled()) {
6143
+ return 'Metrics collection is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true,\n "metrics": true\n }\n}\n```';
6144
+ }
6145
+ return logger.formatMetrics();
6146
+ }
6147
+ });
6148
+ var index_logs = (0, import_plugin.tool)({
6149
+ description: "Get recent debug logs from the codebase indexer. Shows timestamped log entries with level and category. Requires debug.enabled=true in config.",
6150
+ args: {
6151
+ limit: z.number().optional().default(20).describe("Maximum number of log entries to return"),
6152
+ category: z.enum(["search", "embedding", "cache", "gc", "branch", "general"]).optional().describe("Filter by log category"),
6153
+ level: z.enum(["error", "warn", "info", "debug"]).optional().describe("Filter by minimum log level")
6154
+ },
6155
+ async execute(args) {
6156
+ const indexer = getIndexer();
6157
+ const logger = indexer.getLogger();
6158
+ if (!logger.isEnabled()) {
6159
+ return 'Debug mode is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true\n }\n}\n```';
6160
+ }
6161
+ let logs;
6162
+ if (args.category) {
6163
+ logs = logger.getLogsByCategory(args.category, args.limit);
6164
+ } else if (args.level) {
6165
+ logs = logger.getLogsByLevel(args.level, args.limit);
6166
+ } else {
6167
+ logs = logger.getLogs(args.limit);
6168
+ }
6169
+ if (logs.length === 0) {
6170
+ return "No logs recorded yet. Logs are captured during indexing and search operations.";
6171
+ }
6172
+ return logs.map((l) => {
6173
+ const dataStr = l.data ? ` ${JSON.stringify(l.data)}` : "";
6174
+ return `[${l.timestamp}] [${l.level.toUpperCase()}] [${l.category}] ${l.message}${dataStr}`;
6175
+ }).join("\n");
6176
+ }
6177
+ });
5622
6178
  function formatIndexStats(stats, verbose = false) {
5623
6179
  const lines = [];
5624
6180
  if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
@@ -5804,7 +6360,9 @@ var plugin = async ({ directory }) => {
5804
6360
  codebase_search,
5805
6361
  index_codebase,
5806
6362
  index_status,
5807
- index_health_check
6363
+ index_health_check,
6364
+ index_metrics,
6365
+ index_logs
5808
6366
  },
5809
6367
  async config(cfg) {
5810
6368
  cfg.command = cfg.command ?? {};