opencode-codebase-index 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -708,8 +708,21 @@ function getDefaultSearchConfig() {
708
708
  contextLines: 0
709
709
  };
710
710
  }
711
+ function getDefaultDebugConfig() {
712
+ return {
713
+ enabled: false,
714
+ logLevel: "info",
715
+ logSearch: true,
716
+ logEmbedding: true,
717
+ logCache: true,
718
+ logGc: true,
719
+ logBranch: true,
720
+ metrics: true
721
+ };
722
+ }
711
723
  var VALID_PROVIDERS = ["auto", "github-copilot", "openai", "google", "ollama"];
712
724
  var VALID_SCOPES = ["project", "global"];
725
+ var VALID_LOG_LEVELS = ["error", "warn", "info", "debug"];
713
726
  function isValidProvider(value) {
714
727
  return typeof value === "string" && VALID_PROVIDERS.includes(value);
715
728
  }
@@ -719,10 +732,14 @@ function isValidScope(value) {
719
732
  function isStringArray(value) {
720
733
  return Array.isArray(value) && value.every((item) => typeof item === "string");
721
734
  }
735
+ function isValidLogLevel(value) {
736
+ return typeof value === "string" && VALID_LOG_LEVELS.includes(value);
737
+ }
722
738
  function parseConfig(raw) {
723
739
  const input = raw && typeof raw === "object" ? raw : {};
724
740
  const defaultIndexing = getDefaultIndexingConfig();
725
741
  const defaultSearch = getDefaultSearchConfig();
742
+ const defaultDebug = getDefaultDebugConfig();
726
743
  const rawIndexing = input.indexing && typeof input.indexing === "object" ? input.indexing : {};
727
744
  const indexing = {
728
745
  autoIndex: typeof rawIndexing.autoIndex === "boolean" ? rawIndexing.autoIndex : defaultIndexing.autoIndex,
@@ -744,6 +761,17 @@ function parseConfig(raw) {
744
761
  hybridWeight: typeof rawSearch.hybridWeight === "number" ? Math.min(1, Math.max(0, rawSearch.hybridWeight)) : defaultSearch.hybridWeight,
745
762
  contextLines: typeof rawSearch.contextLines === "number" ? Math.min(50, Math.max(0, rawSearch.contextLines)) : defaultSearch.contextLines
746
763
  };
764
+ const rawDebug = input.debug && typeof input.debug === "object" ? input.debug : {};
765
+ const debug = {
766
+ enabled: typeof rawDebug.enabled === "boolean" ? rawDebug.enabled : defaultDebug.enabled,
767
+ logLevel: isValidLogLevel(rawDebug.logLevel) ? rawDebug.logLevel : defaultDebug.logLevel,
768
+ logSearch: typeof rawDebug.logSearch === "boolean" ? rawDebug.logSearch : defaultDebug.logSearch,
769
+ logEmbedding: typeof rawDebug.logEmbedding === "boolean" ? rawDebug.logEmbedding : defaultDebug.logEmbedding,
770
+ logCache: typeof rawDebug.logCache === "boolean" ? rawDebug.logCache : defaultDebug.logCache,
771
+ logGc: typeof rawDebug.logGc === "boolean" ? rawDebug.logGc : defaultDebug.logGc,
772
+ logBranch: typeof rawDebug.logBranch === "boolean" ? rawDebug.logBranch : defaultDebug.logBranch,
773
+ metrics: typeof rawDebug.metrics === "boolean" ? rawDebug.metrics : defaultDebug.metrics
774
+ };
747
775
  return {
748
776
  embeddingProvider: isValidProvider(input.embeddingProvider) ? input.embeddingProvider : "auto",
749
777
  embeddingModel: typeof input.embeddingModel === "string" ? input.embeddingModel : "auto",
@@ -751,7 +779,8 @@ function parseConfig(raw) {
751
779
  include: isStringArray(input.include) ? input.include : DEFAULT_INCLUDE,
752
780
  exclude: isStringArray(input.exclude) ? input.exclude : DEFAULT_EXCLUDE,
753
781
  indexing,
754
- search
782
+ search,
783
+ debug
755
784
  };
756
785
  }
757
786
  var EMBEDDING_MODELS = {
@@ -816,6 +845,7 @@ function getDefaultModelForProvider(provider) {
816
845
  // src/indexer/index.ts
817
846
  import { existsSync as existsSync4, readFileSync as readFileSync4, writeFileSync, promises as fsPromises2 } from "fs";
818
847
  import * as path5 from "path";
848
+ import { performance as performance2 } from "perf_hooks";
819
849
 
820
850
  // node_modules/eventemitter3/index.mjs
821
851
  var import_index = __toESM(require_eventemitter3(), 1);
@@ -2325,6 +2355,298 @@ function padRight(str, length) {
2325
2355
  return str.padEnd(length);
2326
2356
  }
2327
2357
 
2358
+ // src/utils/logger.ts
2359
+ var LOG_LEVEL_PRIORITY = {
2360
+ error: 0,
2361
+ warn: 1,
2362
+ info: 2,
2363
+ debug: 3
2364
+ };
2365
+ function createEmptyMetrics() {
2366
+ return {
2367
+ filesScanned: 0,
2368
+ filesParsed: 0,
2369
+ parseMs: 0,
2370
+ chunksProcessed: 0,
2371
+ chunksEmbedded: 0,
2372
+ chunksFromCache: 0,
2373
+ chunksRemoved: 0,
2374
+ embeddingApiCalls: 0,
2375
+ embeddingTokensUsed: 0,
2376
+ embeddingErrors: 0,
2377
+ searchCount: 0,
2378
+ searchTotalMs: 0,
2379
+ searchAvgMs: 0,
2380
+ searchLastMs: 0,
2381
+ embeddingCallMs: 0,
2382
+ vectorSearchMs: 0,
2383
+ keywordSearchMs: 0,
2384
+ fusionMs: 0,
2385
+ cacheHits: 0,
2386
+ cacheMisses: 0,
2387
+ queryCacheHits: 0,
2388
+ queryCacheSimilarHits: 0,
2389
+ queryCacheMisses: 0,
2390
+ gcRuns: 0,
2391
+ gcOrphansRemoved: 0,
2392
+ gcChunksRemoved: 0,
2393
+ gcEmbeddingsRemoved: 0
2394
+ };
2395
+ }
2396
+ var Logger = class {
2397
+ config;
2398
+ metrics;
2399
+ logs = [];
2400
+ maxLogs = 1e3;
2401
+ constructor(config) {
2402
+ this.config = config;
2403
+ this.metrics = createEmptyMetrics();
2404
+ }
2405
+ shouldLog(level) {
2406
+ if (!this.config.enabled) return false;
2407
+ return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[this.config.logLevel];
2408
+ }
2409
+ log(level, category, message, data) {
2410
+ if (!this.shouldLog(level)) return;
2411
+ const entry = {
2412
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2413
+ level,
2414
+ category,
2415
+ message,
2416
+ data
2417
+ };
2418
+ this.logs.push(entry);
2419
+ if (this.logs.length > this.maxLogs) {
2420
+ this.logs.shift();
2421
+ }
2422
+ }
2423
+ search(level, message, data) {
2424
+ if (this.config.logSearch) {
2425
+ this.log(level, "search", message, data);
2426
+ }
2427
+ }
2428
+ embedding(level, message, data) {
2429
+ if (this.config.logEmbedding) {
2430
+ this.log(level, "embedding", message, data);
2431
+ }
2432
+ }
2433
+ cache(level, message, data) {
2434
+ if (this.config.logCache) {
2435
+ this.log(level, "cache", message, data);
2436
+ }
2437
+ }
2438
+ gc(level, message, data) {
2439
+ if (this.config.logGc) {
2440
+ this.log(level, "gc", message, data);
2441
+ }
2442
+ }
2443
+ branch(level, message, data) {
2444
+ if (this.config.logBranch) {
2445
+ this.log(level, "branch", message, data);
2446
+ }
2447
+ }
2448
+ info(message, data) {
2449
+ this.log("info", "general", message, data);
2450
+ }
2451
+ warn(message, data) {
2452
+ this.log("warn", "general", message, data);
2453
+ }
2454
+ error(message, data) {
2455
+ this.log("error", "general", message, data);
2456
+ }
2457
+ debug(message, data) {
2458
+ this.log("debug", "general", message, data);
2459
+ }
2460
+ recordIndexingStart() {
2461
+ if (!this.config.metrics) return;
2462
+ this.metrics.indexingStartTime = Date.now();
2463
+ }
2464
+ recordIndexingEnd() {
2465
+ if (!this.config.metrics) return;
2466
+ this.metrics.indexingEndTime = Date.now();
2467
+ }
2468
+ recordFilesScanned(count) {
2469
+ if (!this.config.metrics) return;
2470
+ this.metrics.filesScanned = count;
2471
+ }
2472
+ recordFilesParsed(count) {
2473
+ if (!this.config.metrics) return;
2474
+ this.metrics.filesParsed = count;
2475
+ }
2476
+ recordParseDuration(durationMs) {
2477
+ if (!this.config.metrics) return;
2478
+ this.metrics.parseMs = durationMs;
2479
+ }
2480
+ recordChunksProcessed(count) {
2481
+ if (!this.config.metrics) return;
2482
+ this.metrics.chunksProcessed += count;
2483
+ }
2484
+ recordChunksEmbedded(count) {
2485
+ if (!this.config.metrics) return;
2486
+ this.metrics.chunksEmbedded += count;
2487
+ }
2488
+ recordChunksFromCache(count) {
2489
+ if (!this.config.metrics) return;
2490
+ this.metrics.chunksFromCache += count;
2491
+ }
2492
+ recordChunksRemoved(count) {
2493
+ if (!this.config.metrics) return;
2494
+ this.metrics.chunksRemoved += count;
2495
+ }
2496
+ recordEmbeddingApiCall(tokens) {
2497
+ if (!this.config.metrics) return;
2498
+ this.metrics.embeddingApiCalls++;
2499
+ this.metrics.embeddingTokensUsed += tokens;
2500
+ }
2501
+ recordEmbeddingError() {
2502
+ if (!this.config.metrics) return;
2503
+ this.metrics.embeddingErrors++;
2504
+ }
2505
+ recordSearch(durationMs, breakdown) {
2506
+ if (!this.config.metrics) return;
2507
+ this.metrics.searchCount++;
2508
+ this.metrics.searchTotalMs += durationMs;
2509
+ this.metrics.searchLastMs = durationMs;
2510
+ this.metrics.searchAvgMs = this.metrics.searchTotalMs / this.metrics.searchCount;
2511
+ if (breakdown) {
2512
+ this.metrics.embeddingCallMs = breakdown.embeddingMs;
2513
+ this.metrics.vectorSearchMs = breakdown.vectorMs;
2514
+ this.metrics.keywordSearchMs = breakdown.keywordMs;
2515
+ this.metrics.fusionMs = breakdown.fusionMs;
2516
+ }
2517
+ }
2518
+ recordCacheHit() {
2519
+ if (!this.config.metrics) return;
2520
+ this.metrics.cacheHits++;
2521
+ }
2522
+ recordCacheMiss() {
2523
+ if (!this.config.metrics) return;
2524
+ this.metrics.cacheMisses++;
2525
+ }
2526
+ recordQueryCacheHit() {
2527
+ if (!this.config.metrics) return;
2528
+ this.metrics.queryCacheHits++;
2529
+ }
2530
+ recordQueryCacheSimilarHit() {
2531
+ if (!this.config.metrics) return;
2532
+ this.metrics.queryCacheSimilarHits++;
2533
+ }
2534
+ recordQueryCacheMiss() {
2535
+ if (!this.config.metrics) return;
2536
+ this.metrics.queryCacheMisses++;
2537
+ }
2538
+ recordGc(orphans, chunks, embeddings) {
2539
+ if (!this.config.metrics) return;
2540
+ this.metrics.gcRuns++;
2541
+ this.metrics.gcOrphansRemoved += orphans;
2542
+ this.metrics.gcChunksRemoved += chunks;
2543
+ this.metrics.gcEmbeddingsRemoved += embeddings;
2544
+ }
2545
+ getMetrics() {
2546
+ return { ...this.metrics };
2547
+ }
2548
+ getLogs(limit) {
2549
+ const logs = [...this.logs];
2550
+ if (limit) {
2551
+ return logs.slice(-limit);
2552
+ }
2553
+ return logs;
2554
+ }
2555
+ getLogsByCategory(category, limit) {
2556
+ const filtered = this.logs.filter((l) => l.category === category);
2557
+ if (limit) {
2558
+ return filtered.slice(-limit);
2559
+ }
2560
+ return filtered;
2561
+ }
2562
+ getLogsByLevel(level, limit) {
2563
+ const filtered = this.logs.filter((l) => l.level === level);
2564
+ if (limit) {
2565
+ return filtered.slice(-limit);
2566
+ }
2567
+ return filtered;
2568
+ }
2569
+ resetMetrics() {
2570
+ this.metrics = createEmptyMetrics();
2571
+ }
2572
+ clearLogs() {
2573
+ this.logs = [];
2574
+ }
2575
+ formatMetrics() {
2576
+ const m = this.metrics;
2577
+ const lines = [];
2578
+ lines.push("=== Metrics ===");
2579
+ if (m.indexingStartTime && m.indexingEndTime) {
2580
+ const duration = m.indexingEndTime - m.indexingStartTime;
2581
+ lines.push(`Indexing duration: ${(duration / 1e3).toFixed(2)}s`);
2582
+ }
2583
+ lines.push("");
2584
+ lines.push("Indexing:");
2585
+ lines.push(` Files scanned: ${m.filesScanned}`);
2586
+ lines.push(` Files parsed: ${m.filesParsed}`);
2587
+ lines.push(` Chunks processed: ${m.chunksProcessed}`);
2588
+ lines.push(` Chunks embedded: ${m.chunksEmbedded}`);
2589
+ lines.push(` Chunks from cache: ${m.chunksFromCache}`);
2590
+ lines.push(` Chunks removed: ${m.chunksRemoved}`);
2591
+ lines.push("");
2592
+ lines.push("Embedding API:");
2593
+ lines.push(` API calls: ${m.embeddingApiCalls}`);
2594
+ lines.push(` Tokens used: ${m.embeddingTokensUsed.toLocaleString()}`);
2595
+ lines.push(` Errors: ${m.embeddingErrors}`);
2596
+ if (m.searchCount > 0) {
2597
+ lines.push("");
2598
+ lines.push("Search:");
2599
+ lines.push(` Total searches: ${m.searchCount}`);
2600
+ lines.push(` Average time: ${m.searchAvgMs.toFixed(2)}ms`);
2601
+ lines.push(` Last search: ${m.searchLastMs.toFixed(2)}ms`);
2602
+ if (m.embeddingCallMs > 0) {
2603
+ lines.push(` - Embedding: ${m.embeddingCallMs.toFixed(2)}ms`);
2604
+ lines.push(` - Vector search: ${m.vectorSearchMs.toFixed(2)}ms`);
2605
+ lines.push(` - Keyword search: ${m.keywordSearchMs.toFixed(2)}ms`);
2606
+ lines.push(` - Fusion: ${m.fusionMs.toFixed(2)}ms`);
2607
+ }
2608
+ }
2609
+ const totalCacheOps = m.cacheHits + m.cacheMisses;
2610
+ if (totalCacheOps > 0) {
2611
+ lines.push("");
2612
+ lines.push("Cache:");
2613
+ lines.push(` Hits: ${m.cacheHits}`);
2614
+ lines.push(` Misses: ${m.cacheMisses}`);
2615
+ lines.push(` Hit rate: ${(m.cacheHits / totalCacheOps * 100).toFixed(1)}%`);
2616
+ }
2617
+ if (m.gcRuns > 0) {
2618
+ lines.push("");
2619
+ lines.push("Garbage Collection:");
2620
+ lines.push(` GC runs: ${m.gcRuns}`);
2621
+ lines.push(` Orphans removed: ${m.gcOrphansRemoved}`);
2622
+ lines.push(` Chunks removed: ${m.gcChunksRemoved}`);
2623
+ lines.push(` Embeddings removed: ${m.gcEmbeddingsRemoved}`);
2624
+ }
2625
+ return lines.join("\n");
2626
+ }
2627
+ formatRecentLogs(limit = 20) {
2628
+ const logs = this.getLogs(limit);
2629
+ if (logs.length === 0) {
2630
+ return "No logs recorded.";
2631
+ }
2632
+ return logs.map((l) => {
2633
+ const dataStr = l.data ? ` ${JSON.stringify(l.data)}` : "";
2634
+ return `[${l.timestamp}] [${l.level.toUpperCase()}] [${l.category}] ${l.message}${dataStr}`;
2635
+ }).join("\n");
2636
+ }
2637
+ isEnabled() {
2638
+ return this.config.enabled;
2639
+ }
2640
+ isMetricsEnabled() {
2641
+ return this.config.enabled && this.config.metrics;
2642
+ }
2643
+ };
2644
+ var globalLogger = null;
2645
+ function initializeLogger(config) {
2646
+ globalLogger = new Logger(config);
2647
+ return globalLogger;
2648
+ }
2649
+
2328
2650
  // src/native/index.ts
2329
2651
  import * as path3 from "path";
2330
2652
  import * as os2 from "os";
@@ -2453,6 +2775,21 @@ var VectorStore = class {
2453
2775
  metadata: JSON.parse(r.metadata)
2454
2776
  }));
2455
2777
  }
2778
+ getMetadata(id) {
2779
+ const result = this.inner.getMetadata(id);
2780
+ if (result === null || result === void 0) {
2781
+ return void 0;
2782
+ }
2783
+ return JSON.parse(result);
2784
+ }
2785
+ getMetadataBatch(ids) {
2786
+ const results = this.inner.getMetadataBatch(ids);
2787
+ const map = /* @__PURE__ */ new Map();
2788
+ for (const { key, metadata } of results) {
2789
+ map.set(key, JSON.parse(metadata));
2790
+ }
2791
+ return map;
2792
+ }
2456
2793
  };
2457
2794
  var CHARS_PER_TOKEN = 4;
2458
2795
  var MAX_BATCH_TOKENS = 7500;
@@ -2851,12 +3188,18 @@ var Indexer = class {
2851
3188
  failedBatchesPath = "";
2852
3189
  currentBranch = "default";
2853
3190
  baseBranch = "main";
3191
+ logger;
3192
+ queryEmbeddingCache = /* @__PURE__ */ new Map();
3193
+ maxQueryCacheSize = 100;
3194
+ queryCacheTtlMs = 5 * 60 * 1e3;
3195
+ querySimilarityThreshold = 0.85;
2854
3196
  constructor(projectRoot, config) {
2855
3197
  this.projectRoot = projectRoot;
2856
3198
  this.config = config;
2857
3199
  this.indexPath = this.getIndexPath();
2858
3200
  this.fileHashCachePath = path5.join(this.indexPath, "file-hashes.json");
2859
3201
  this.failedBatchesPath = path5.join(this.indexPath, "failed-batches.json");
3202
+ this.logger = initializeLogger(config.debug);
2860
3203
  }
2861
3204
  getIndexPath() {
2862
3205
  if (this.config.scope === "global") {
@@ -2935,6 +3278,11 @@ var Indexer = class {
2935
3278
  "No embedding provider available. Configure GitHub, OpenAI, Google, or Ollama."
2936
3279
  );
2937
3280
  }
3281
+ this.logger.info("Initializing indexer", {
3282
+ provider: this.detectedProvider.provider,
3283
+ model: this.detectedProvider.modelInfo.model,
3284
+ scope: this.config.scope
3285
+ });
2938
3286
  this.provider = createEmbeddingProvider(
2939
3287
  this.detectedProvider.credentials,
2940
3288
  this.detectedProvider.modelInfo
@@ -2966,9 +3314,14 @@ var Indexer = class {
2966
3314
  if (isGitRepo(this.projectRoot)) {
2967
3315
  this.currentBranch = getBranchOrDefault(this.projectRoot);
2968
3316
  this.baseBranch = getBaseBranch(this.projectRoot);
3317
+ this.logger.branch("info", "Detected git repository", {
3318
+ currentBranch: this.currentBranch,
3319
+ baseBranch: this.baseBranch
3320
+ });
2969
3321
  } else {
2970
3322
  this.currentBranch = "default";
2971
3323
  this.baseBranch = "default";
3324
+ this.logger.branch("debug", "Not a git repository, using default branch");
2972
3325
  }
2973
3326
  if (this.config.indexing.autoGc) {
2974
3327
  await this.maybeRunAutoGc();
@@ -3052,6 +3405,8 @@ var Indexer = class {
3052
3405
  }
3053
3406
  async index(onProgress) {
3054
3407
  const { store, provider, invertedIndex, database, detectedProvider } = await this.ensureInitialized();
3408
+ this.logger.recordIndexingStart();
3409
+ this.logger.info("Starting indexing", { projectRoot: this.projectRoot });
3055
3410
  const startTime = Date.now();
3056
3411
  const stats = {
3057
3412
  totalFiles: 0,
@@ -3081,6 +3436,11 @@ var Indexer = class {
3081
3436
  );
3082
3437
  stats.totalFiles = files.length;
3083
3438
  stats.skippedFiles = skipped;
3439
+ this.logger.recordFilesScanned(files.length);
3440
+ this.logger.cache("debug", "Scanning files for changes", {
3441
+ totalFiles: files.length,
3442
+ skippedFiles: skipped.length
3443
+ });
3084
3444
  const changedFiles = [];
3085
3445
  const unchangedFilePaths = /* @__PURE__ */ new Set();
3086
3446
  const currentFileHashes = /* @__PURE__ */ new Map();
@@ -3089,11 +3449,17 @@ var Indexer = class {
3089
3449
  currentFileHashes.set(f.path, currentHash);
3090
3450
  if (this.fileHashCache.get(f.path) === currentHash) {
3091
3451
  unchangedFilePaths.add(f.path);
3452
+ this.logger.recordCacheHit();
3092
3453
  } else {
3093
3454
  const content = await fsPromises2.readFile(f.path, "utf-8");
3094
3455
  changedFiles.push({ path: f.path, content, hash: currentHash });
3456
+ this.logger.recordCacheMiss();
3095
3457
  }
3096
3458
  }
3459
+ this.logger.cache("info", "File hash cache results", {
3460
+ unchanged: unchangedFilePaths.size,
3461
+ changed: changedFiles.length
3462
+ });
3097
3463
  onProgress?.({
3098
3464
  phase: "parsing",
3099
3465
  filesProcessed: 0,
@@ -3101,7 +3467,12 @@ var Indexer = class {
3101
3467
  chunksProcessed: 0,
3102
3468
  totalChunks: 0
3103
3469
  });
3470
+ const parseStartTime = performance2.now();
3104
3471
  const parsedFiles = parseFiles(changedFiles);
3472
+ const parseMs = performance2.now() - parseStartTime;
3473
+ this.logger.recordFilesParsed(parsedFiles.length);
3474
+ this.logger.recordParseDuration(parseMs);
3475
+ this.logger.debug("Parsed changed files", { parsedCount: parsedFiles.length, parseMs: parseMs.toFixed(2) });
3105
3476
  const existingChunks = /* @__PURE__ */ new Map();
3106
3477
  const existingChunksByFile = /* @__PURE__ */ new Map();
3107
3478
  for (const { key, metadata } of store.getAllMetadata()) {
@@ -3183,6 +3554,13 @@ var Indexer = class {
3183
3554
  stats.totalChunks = pendingChunks.length;
3184
3555
  stats.existingChunks = currentChunkIds.size - pendingChunks.length;
3185
3556
  stats.removedChunks = removedCount;
3557
+ this.logger.recordChunksProcessed(currentChunkIds.size);
3558
+ this.logger.recordChunksRemoved(removedCount);
3559
+ this.logger.info("Chunk analysis complete", {
3560
+ pending: pendingChunks.length,
3561
+ existing: stats.existingChunks,
3562
+ removed: removedCount
3563
+ });
3186
3564
  if (pendingChunks.length === 0 && removedCount === 0) {
3187
3565
  database.clearBranch(this.currentBranch);
3188
3566
  database.addChunksToBranchBatch(this.currentBranch, Array.from(currentChunkIds));
@@ -3226,6 +3604,11 @@ var Indexer = class {
3226
3604
  const missingHashes = new Set(database.getMissingEmbeddings(allContentHashes));
3227
3605
  const chunksNeedingEmbedding = pendingChunks.filter((c) => missingHashes.has(c.contentHash));
3228
3606
  const chunksWithExistingEmbedding = pendingChunks.filter((c) => !missingHashes.has(c.contentHash));
3607
+ this.logger.cache("info", "Embedding cache lookup", {
3608
+ needsEmbedding: chunksNeedingEmbedding.length,
3609
+ fromCache: chunksWithExistingEmbedding.length
3610
+ });
3611
+ this.logger.recordChunksFromCache(chunksWithExistingEmbedding.length);
3229
3612
  for (const chunk of chunksWithExistingEmbedding) {
3230
3613
  const embeddingBuffer = database.getEmbedding(chunk.contentHash);
3231
3614
  if (embeddingBuffer) {
@@ -3264,13 +3647,16 @@ var Indexer = class {
3264
3647
  const message = getErrorMessage(error);
3265
3648
  if (isRateLimitError(error)) {
3266
3649
  rateLimitBackoffMs = Math.min(providerRateLimits.maxRetryMs, (rateLimitBackoffMs || providerRateLimits.minRetryMs) * 2);
3267
- console.error(
3268
- `Rate limited (attempt ${error.attemptNumber}/${error.retriesLeft + error.attemptNumber}): waiting ${rateLimitBackoffMs / 1e3}s before retry...`
3269
- );
3650
+ this.logger.embedding("warn", `Rate limited, backing off`, {
3651
+ attempt: error.attemptNumber,
3652
+ retriesLeft: error.retriesLeft,
3653
+ backoffMs: rateLimitBackoffMs
3654
+ });
3270
3655
  } else {
3271
- console.error(
3272
- `Embedding batch failed (attempt ${error.attemptNumber}): ${message}`
3273
- );
3656
+ this.logger.embedding("error", `Embedding batch failed`, {
3657
+ attempt: error.attemptNumber,
3658
+ error: message
3659
+ });
3274
3660
  }
3275
3661
  }
3276
3662
  }
@@ -3297,6 +3683,12 @@ var Indexer = class {
3297
3683
  }
3298
3684
  stats.indexedChunks += batch.length;
3299
3685
  stats.tokensUsed += result.totalTokensUsed;
3686
+ this.logger.recordChunksEmbedded(batch.length);
3687
+ this.logger.recordEmbeddingApiCall(result.totalTokensUsed);
3688
+ this.logger.embedding("debug", `Embedded batch`, {
3689
+ batchSize: batch.length,
3690
+ tokens: result.totalTokensUsed
3691
+ });
3300
3692
  onProgress?.({
3301
3693
  phase: "embedding",
3302
3694
  filesProcessed: files.length,
@@ -3307,7 +3699,11 @@ var Indexer = class {
3307
3699
  } catch (error) {
3308
3700
  stats.failedChunks += batch.length;
3309
3701
  this.addFailedBatch(batch, getErrorMessage(error));
3310
- console.error(`Failed to embed batch after retries: ${getErrorMessage(error)}`);
3702
+ this.logger.recordEmbeddingError();
3703
+ this.logger.embedding("error", `Failed to embed batch after retries`, {
3704
+ batchSize: batch.length,
3705
+ error: getErrorMessage(error)
3706
+ });
3311
3707
  }
3312
3708
  });
3313
3709
  }
@@ -3329,6 +3725,16 @@ var Indexer = class {
3329
3725
  await this.maybeRunOrphanGc();
3330
3726
  }
3331
3727
  stats.durationMs = Date.now() - startTime;
3728
+ this.logger.recordIndexingEnd();
3729
+ this.logger.info("Indexing complete", {
3730
+ files: stats.totalFiles,
3731
+ indexed: stats.indexedChunks,
3732
+ existing: stats.existingChunks,
3733
+ removed: stats.removedChunks,
3734
+ failed: stats.failedChunks,
3735
+ tokens: stats.tokensUsed,
3736
+ durationMs: stats.durationMs
3737
+ });
3332
3738
  if (stats.failedChunks > 0) {
3333
3739
  stats.failedBatchesPath = this.failedBatchesPath;
3334
3740
  }
@@ -3341,18 +3747,96 @@ var Indexer = class {
3341
3747
  });
3342
3748
  return stats;
3343
3749
  }
3750
+ async getQueryEmbedding(query, provider) {
3751
+ const now = Date.now();
3752
+ const cached = this.queryEmbeddingCache.get(query);
3753
+ if (cached && now - cached.timestamp < this.queryCacheTtlMs) {
3754
+ this.logger.cache("debug", "Query embedding cache hit (exact)", { query: query.slice(0, 50) });
3755
+ this.logger.recordQueryCacheHit();
3756
+ return cached.embedding;
3757
+ }
3758
+ const similarMatch = this.findSimilarCachedQuery(query, now);
3759
+ if (similarMatch) {
3760
+ this.logger.cache("debug", "Query embedding cache hit (similar)", {
3761
+ query: query.slice(0, 50),
3762
+ similarTo: similarMatch.key.slice(0, 50),
3763
+ similarity: similarMatch.similarity.toFixed(3)
3764
+ });
3765
+ this.logger.recordQueryCacheSimilarHit();
3766
+ return similarMatch.embedding;
3767
+ }
3768
+ this.logger.cache("debug", "Query embedding cache miss", { query: query.slice(0, 50) });
3769
+ this.logger.recordQueryCacheMiss();
3770
+ const { embedding, tokensUsed } = await provider.embed(query);
3771
+ this.logger.recordEmbeddingApiCall(tokensUsed);
3772
+ if (this.queryEmbeddingCache.size >= this.maxQueryCacheSize) {
3773
+ const oldestKey = this.queryEmbeddingCache.keys().next().value;
3774
+ if (oldestKey) {
3775
+ this.queryEmbeddingCache.delete(oldestKey);
3776
+ }
3777
+ }
3778
+ this.queryEmbeddingCache.set(query, { embedding, timestamp: now });
3779
+ return embedding;
3780
+ }
3781
+ findSimilarCachedQuery(query, now) {
3782
+ const queryTokens = this.tokenize(query);
3783
+ if (queryTokens.size === 0) return null;
3784
+ let bestMatch = null;
3785
+ for (const [cachedQuery, { embedding, timestamp }] of this.queryEmbeddingCache) {
3786
+ if (now - timestamp >= this.queryCacheTtlMs) continue;
3787
+ const cachedTokens = this.tokenize(cachedQuery);
3788
+ const similarity = this.jaccardSimilarity(queryTokens, cachedTokens);
3789
+ if (similarity >= this.querySimilarityThreshold) {
3790
+ if (!bestMatch || similarity > bestMatch.similarity) {
3791
+ bestMatch = { key: cachedQuery, embedding, similarity };
3792
+ }
3793
+ }
3794
+ }
3795
+ return bestMatch;
3796
+ }
3797
+ tokenize(text) {
3798
+ return new Set(
3799
+ text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((t) => t.length > 1)
3800
+ );
3801
+ }
3802
+ jaccardSimilarity(a, b) {
3803
+ if (a.size === 0 && b.size === 0) return 1;
3804
+ if (a.size === 0 || b.size === 0) return 0;
3805
+ let intersection = 0;
3806
+ for (const token of a) {
3807
+ if (b.has(token)) intersection++;
3808
+ }
3809
+ const union = a.size + b.size - intersection;
3810
+ return intersection / union;
3811
+ }
3344
3812
  async search(query, limit, options) {
3813
+ const searchStartTime = performance2.now();
3345
3814
  const { store, provider, database } = await this.ensureInitialized();
3346
3815
  if (store.count() === 0) {
3816
+ this.logger.search("debug", "Search on empty index", { query });
3347
3817
  return [];
3348
3818
  }
3349
3819
  const maxResults = limit ?? this.config.search.maxResults;
3350
3820
  const hybridWeight = options?.hybridWeight ?? this.config.search.hybridWeight;
3351
3821
  const filterByBranch = options?.filterByBranch ?? true;
3352
- const { embedding } = await provider.embed(query);
3822
+ this.logger.search("debug", "Starting search", {
3823
+ query,
3824
+ maxResults,
3825
+ hybridWeight,
3826
+ filterByBranch
3827
+ });
3828
+ const embeddingStartTime = performance2.now();
3829
+ const embedding = await this.getQueryEmbedding(query, provider);
3830
+ const embeddingMs = performance2.now() - embeddingStartTime;
3831
+ const vectorStartTime = performance2.now();
3353
3832
  const semanticResults = store.search(embedding, maxResults * 4);
3833
+ const vectorMs = performance2.now() - vectorStartTime;
3834
+ const keywordStartTime = performance2.now();
3354
3835
  const keywordResults = await this.keywordSearch(query, maxResults * 4);
3836
+ const keywordMs = performance2.now() - keywordStartTime;
3837
+ const fusionStartTime = performance2.now();
3355
3838
  const combined = this.fuseResults(semanticResults, keywordResults, hybridWeight, maxResults * 4);
3839
+ const fusionMs = performance2.now() - fusionStartTime;
3356
3840
  let branchChunkIds = null;
3357
3841
  if (filterByBranch && this.currentBranch !== "default") {
3358
3842
  branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
@@ -3373,6 +3857,22 @@ var Indexer = class {
3373
3857
  }
3374
3858
  return true;
3375
3859
  }).slice(0, maxResults);
3860
+ const totalSearchMs = performance2.now() - searchStartTime;
3861
+ this.logger.recordSearch(totalSearchMs, {
3862
+ embeddingMs,
3863
+ vectorMs,
3864
+ keywordMs,
3865
+ fusionMs
3866
+ });
3867
+ this.logger.search("info", "Search complete", {
3868
+ query,
3869
+ results: filtered.length,
3870
+ totalMs: Math.round(totalSearchMs * 100) / 100,
3871
+ embeddingMs: Math.round(embeddingMs * 100) / 100,
3872
+ vectorMs: Math.round(vectorMs * 100) / 100,
3873
+ keywordMs: Math.round(keywordMs * 100) / 100,
3874
+ fusionMs: Math.round(fusionMs * 100) / 100
3875
+ });
3376
3876
  return Promise.all(
3377
3877
  filtered.map(async (r) => {
3378
3878
  let content = "";
@@ -3411,11 +3911,8 @@ var Indexer = class {
3411
3911
  if (scores.size === 0) {
3412
3912
  return [];
3413
3913
  }
3414
- const allMetadata = store.getAllMetadata();
3415
- const metadataMap = /* @__PURE__ */ new Map();
3416
- for (const { key, metadata } of allMetadata) {
3417
- metadataMap.set(key, metadata);
3418
- }
3914
+ const chunkIds = Array.from(scores.keys());
3915
+ const metadataMap = store.getMetadataBatch(chunkIds);
3419
3916
  const results = [];
3420
3917
  for (const [chunkId, score] of scores) {
3421
3918
  const metadata = metadataMap.get(chunkId);
@@ -3478,6 +3975,7 @@ var Indexer = class {
3478
3975
  }
3479
3976
  async healthCheck() {
3480
3977
  const { store, invertedIndex, database } = await this.ensureInitialized();
3978
+ this.logger.gc("info", "Starting health check");
3481
3979
  const allMetadata = store.getAllMetadata();
3482
3980
  const filePathsToChunkKeys = /* @__PURE__ */ new Map();
3483
3981
  for (const { key, metadata } of allMetadata) {
@@ -3504,6 +4002,13 @@ var Indexer = class {
3504
4002
  }
3505
4003
  const gcOrphanEmbeddings = database.gcOrphanEmbeddings();
3506
4004
  const gcOrphanChunks = database.gcOrphanChunks();
4005
+ this.logger.recordGc(removedCount, gcOrphanChunks, gcOrphanEmbeddings);
4006
+ this.logger.gc("info", "Health check complete", {
4007
+ removedStale: removedCount,
4008
+ orphanEmbeddings: gcOrphanEmbeddings,
4009
+ orphanChunks: gcOrphanChunks,
4010
+ removedFiles: removedFilePaths.length
4011
+ });
3507
4012
  return { removed: removedCount, filePaths: removedFilePaths, gcOrphanEmbeddings, gcOrphanChunks };
3508
4013
  }
3509
4014
  async retryFailedBatches() {
@@ -3537,9 +4042,12 @@ var Indexer = class {
3537
4042
  invertedIndex.removeChunk(chunk.id);
3538
4043
  invertedIndex.addChunk(chunk.id, chunk.content);
3539
4044
  }
4045
+ this.logger.recordChunksEmbedded(batch.chunks.length);
4046
+ this.logger.recordEmbeddingApiCall(result.totalTokensUsed);
3540
4047
  succeeded += batch.chunks.length;
3541
4048
  } catch (error) {
3542
4049
  failed += batch.chunks.length;
4050
+ this.logger.recordEmbeddingError();
3543
4051
  stillFailing.push({
3544
4052
  ...batch,
3545
4053
  attemptCount: batch.attemptCount + 1,
@@ -3574,6 +4082,9 @@ var Indexer = class {
3574
4082
  const { database } = await this.ensureInitialized();
3575
4083
  return database.getStats();
3576
4084
  }
4085
+ getLogger() {
4086
+ return this.logger;
4087
+ }
3577
4088
  };
3578
4089
 
3579
4090
  // node_modules/chokidar/index.js
@@ -5613,6 +6124,51 @@ var index_health_check = tool({
5613
6124
  return lines.join("\n");
5614
6125
  }
5615
6126
  });
6127
+ var index_metrics = tool({
6128
+ description: "Get metrics and performance statistics for the codebase index. Shows indexing stats, search timings, cache hit rates, and API usage. Requires debug.enabled=true and debug.metrics=true in config.",
6129
+ args: {},
6130
+ async execute() {
6131
+ const indexer = getIndexer();
6132
+ const logger = indexer.getLogger();
6133
+ if (!logger.isEnabled()) {
6134
+ return 'Debug mode is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true,\n "metrics": true\n }\n}\n```';
6135
+ }
6136
+ if (!logger.isMetricsEnabled()) {
6137
+ return 'Metrics collection is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true,\n "metrics": true\n }\n}\n```';
6138
+ }
6139
+ return logger.formatMetrics();
6140
+ }
6141
+ });
6142
+ var index_logs = tool({
6143
+ description: "Get recent debug logs from the codebase indexer. Shows timestamped log entries with level and category. Requires debug.enabled=true in config.",
6144
+ args: {
6145
+ limit: z.number().optional().default(20).describe("Maximum number of log entries to return"),
6146
+ category: z.enum(["search", "embedding", "cache", "gc", "branch", "general"]).optional().describe("Filter by log category"),
6147
+ level: z.enum(["error", "warn", "info", "debug"]).optional().describe("Filter by minimum log level")
6148
+ },
6149
+ async execute(args) {
6150
+ const indexer = getIndexer();
6151
+ const logger = indexer.getLogger();
6152
+ if (!logger.isEnabled()) {
6153
+ return 'Debug mode is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true\n }\n}\n```';
6154
+ }
6155
+ let logs;
6156
+ if (args.category) {
6157
+ logs = logger.getLogsByCategory(args.category, args.limit);
6158
+ } else if (args.level) {
6159
+ logs = logger.getLogsByLevel(args.level, args.limit);
6160
+ } else {
6161
+ logs = logger.getLogs(args.limit);
6162
+ }
6163
+ if (logs.length === 0) {
6164
+ return "No logs recorded yet. Logs are captured during indexing and search operations.";
6165
+ }
6166
+ return logs.map((l) => {
6167
+ const dataStr = l.data ? ` ${JSON.stringify(l.data)}` : "";
6168
+ return `[${l.timestamp}] [${l.level.toUpperCase()}] [${l.category}] ${l.message}${dataStr}`;
6169
+ }).join("\n");
6170
+ }
6171
+ });
5616
6172
  function formatIndexStats(stats, verbose = false) {
5617
6173
  const lines = [];
5618
6174
  if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
@@ -5797,7 +6353,9 @@ var plugin = async ({ directory }) => {
5797
6353
  codebase_search,
5798
6354
  index_codebase,
5799
6355
  index_status,
5800
- index_health_check
6356
+ index_health_check,
6357
+ index_metrics,
6358
+ index_logs
5801
6359
  },
5802
6360
  async config(cfg) {
5803
6361
  cfg.command = cfg.command ?? {};