opencode-codebase-index 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -1
- package/dist/index.cjs +728 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +728 -17
- package/dist/index.js.map +1 -1
- package/native/codebase-index-native.darwin-arm64.node +0 -0
- package/native/codebase-index-native.darwin-x64.node +0 -0
- package/native/codebase-index-native.linux-arm64-gnu.node +0 -0
- package/native/codebase-index-native.linux-x64-gnu.node +0 -0
- package/native/codebase-index-native.win32-x64-msvc.node +0 -0
- package/package.json +3 -1
- package/skill/SKILL.md +39 -164
package/dist/index.cjs
CHANGED
|
@@ -713,8 +713,21 @@ function getDefaultSearchConfig() {
|
|
|
713
713
|
contextLines: 0
|
|
714
714
|
};
|
|
715
715
|
}
|
|
716
|
+
function getDefaultDebugConfig() {
|
|
717
|
+
return {
|
|
718
|
+
enabled: false,
|
|
719
|
+
logLevel: "info",
|
|
720
|
+
logSearch: true,
|
|
721
|
+
logEmbedding: true,
|
|
722
|
+
logCache: true,
|
|
723
|
+
logGc: true,
|
|
724
|
+
logBranch: true,
|
|
725
|
+
metrics: true
|
|
726
|
+
};
|
|
727
|
+
}
|
|
716
728
|
var VALID_PROVIDERS = ["auto", "github-copilot", "openai", "google", "ollama"];
|
|
717
729
|
var VALID_SCOPES = ["project", "global"];
|
|
730
|
+
var VALID_LOG_LEVELS = ["error", "warn", "info", "debug"];
|
|
718
731
|
function isValidProvider(value) {
|
|
719
732
|
return typeof value === "string" && VALID_PROVIDERS.includes(value);
|
|
720
733
|
}
|
|
@@ -724,10 +737,14 @@ function isValidScope(value) {
|
|
|
724
737
|
function isStringArray(value) {
|
|
725
738
|
return Array.isArray(value) && value.every((item) => typeof item === "string");
|
|
726
739
|
}
|
|
740
|
+
function isValidLogLevel(value) {
|
|
741
|
+
return typeof value === "string" && VALID_LOG_LEVELS.includes(value);
|
|
742
|
+
}
|
|
727
743
|
function parseConfig(raw) {
|
|
728
744
|
const input = raw && typeof raw === "object" ? raw : {};
|
|
729
745
|
const defaultIndexing = getDefaultIndexingConfig();
|
|
730
746
|
const defaultSearch = getDefaultSearchConfig();
|
|
747
|
+
const defaultDebug = getDefaultDebugConfig();
|
|
731
748
|
const rawIndexing = input.indexing && typeof input.indexing === "object" ? input.indexing : {};
|
|
732
749
|
const indexing = {
|
|
733
750
|
autoIndex: typeof rawIndexing.autoIndex === "boolean" ? rawIndexing.autoIndex : defaultIndexing.autoIndex,
|
|
@@ -749,6 +766,17 @@ function parseConfig(raw) {
|
|
|
749
766
|
hybridWeight: typeof rawSearch.hybridWeight === "number" ? Math.min(1, Math.max(0, rawSearch.hybridWeight)) : defaultSearch.hybridWeight,
|
|
750
767
|
contextLines: typeof rawSearch.contextLines === "number" ? Math.min(50, Math.max(0, rawSearch.contextLines)) : defaultSearch.contextLines
|
|
751
768
|
};
|
|
769
|
+
const rawDebug = input.debug && typeof input.debug === "object" ? input.debug : {};
|
|
770
|
+
const debug = {
|
|
771
|
+
enabled: typeof rawDebug.enabled === "boolean" ? rawDebug.enabled : defaultDebug.enabled,
|
|
772
|
+
logLevel: isValidLogLevel(rawDebug.logLevel) ? rawDebug.logLevel : defaultDebug.logLevel,
|
|
773
|
+
logSearch: typeof rawDebug.logSearch === "boolean" ? rawDebug.logSearch : defaultDebug.logSearch,
|
|
774
|
+
logEmbedding: typeof rawDebug.logEmbedding === "boolean" ? rawDebug.logEmbedding : defaultDebug.logEmbedding,
|
|
775
|
+
logCache: typeof rawDebug.logCache === "boolean" ? rawDebug.logCache : defaultDebug.logCache,
|
|
776
|
+
logGc: typeof rawDebug.logGc === "boolean" ? rawDebug.logGc : defaultDebug.logGc,
|
|
777
|
+
logBranch: typeof rawDebug.logBranch === "boolean" ? rawDebug.logBranch : defaultDebug.logBranch,
|
|
778
|
+
metrics: typeof rawDebug.metrics === "boolean" ? rawDebug.metrics : defaultDebug.metrics
|
|
779
|
+
};
|
|
752
780
|
return {
|
|
753
781
|
embeddingProvider: isValidProvider(input.embeddingProvider) ? input.embeddingProvider : "auto",
|
|
754
782
|
embeddingModel: typeof input.embeddingModel === "string" ? input.embeddingModel : "auto",
|
|
@@ -756,7 +784,8 @@ function parseConfig(raw) {
|
|
|
756
784
|
include: isStringArray(input.include) ? input.include : DEFAULT_INCLUDE,
|
|
757
785
|
exclude: isStringArray(input.exclude) ? input.exclude : DEFAULT_EXCLUDE,
|
|
758
786
|
indexing,
|
|
759
|
-
search
|
|
787
|
+
search,
|
|
788
|
+
debug
|
|
760
789
|
};
|
|
761
790
|
}
|
|
762
791
|
var EMBEDDING_MODELS = {
|
|
@@ -821,6 +850,7 @@ function getDefaultModelForProvider(provider) {
|
|
|
821
850
|
// src/indexer/index.ts
|
|
822
851
|
var import_fs4 = require("fs");
|
|
823
852
|
var path5 = __toESM(require("path"), 1);
|
|
853
|
+
var import_perf_hooks = require("perf_hooks");
|
|
824
854
|
|
|
825
855
|
// node_modules/eventemitter3/index.mjs
|
|
826
856
|
var import_index = __toESM(require_eventemitter3(), 1);
|
|
@@ -2330,6 +2360,298 @@ function padRight(str, length) {
|
|
|
2330
2360
|
return str.padEnd(length);
|
|
2331
2361
|
}
|
|
2332
2362
|
|
|
2363
|
+
// src/utils/logger.ts
|
|
2364
|
+
var LOG_LEVEL_PRIORITY = {
|
|
2365
|
+
error: 0,
|
|
2366
|
+
warn: 1,
|
|
2367
|
+
info: 2,
|
|
2368
|
+
debug: 3
|
|
2369
|
+
};
|
|
2370
|
+
function createEmptyMetrics() {
|
|
2371
|
+
return {
|
|
2372
|
+
filesScanned: 0,
|
|
2373
|
+
filesParsed: 0,
|
|
2374
|
+
parseMs: 0,
|
|
2375
|
+
chunksProcessed: 0,
|
|
2376
|
+
chunksEmbedded: 0,
|
|
2377
|
+
chunksFromCache: 0,
|
|
2378
|
+
chunksRemoved: 0,
|
|
2379
|
+
embeddingApiCalls: 0,
|
|
2380
|
+
embeddingTokensUsed: 0,
|
|
2381
|
+
embeddingErrors: 0,
|
|
2382
|
+
searchCount: 0,
|
|
2383
|
+
searchTotalMs: 0,
|
|
2384
|
+
searchAvgMs: 0,
|
|
2385
|
+
searchLastMs: 0,
|
|
2386
|
+
embeddingCallMs: 0,
|
|
2387
|
+
vectorSearchMs: 0,
|
|
2388
|
+
keywordSearchMs: 0,
|
|
2389
|
+
fusionMs: 0,
|
|
2390
|
+
cacheHits: 0,
|
|
2391
|
+
cacheMisses: 0,
|
|
2392
|
+
queryCacheHits: 0,
|
|
2393
|
+
queryCacheSimilarHits: 0,
|
|
2394
|
+
queryCacheMisses: 0,
|
|
2395
|
+
gcRuns: 0,
|
|
2396
|
+
gcOrphansRemoved: 0,
|
|
2397
|
+
gcChunksRemoved: 0,
|
|
2398
|
+
gcEmbeddingsRemoved: 0
|
|
2399
|
+
};
|
|
2400
|
+
}
|
|
2401
|
+
var Logger = class {
|
|
2402
|
+
config;
|
|
2403
|
+
metrics;
|
|
2404
|
+
logs = [];
|
|
2405
|
+
maxLogs = 1e3;
|
|
2406
|
+
constructor(config) {
|
|
2407
|
+
this.config = config;
|
|
2408
|
+
this.metrics = createEmptyMetrics();
|
|
2409
|
+
}
|
|
2410
|
+
shouldLog(level) {
|
|
2411
|
+
if (!this.config.enabled) return false;
|
|
2412
|
+
return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[this.config.logLevel];
|
|
2413
|
+
}
|
|
2414
|
+
log(level, category, message, data) {
|
|
2415
|
+
if (!this.shouldLog(level)) return;
|
|
2416
|
+
const entry = {
|
|
2417
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2418
|
+
level,
|
|
2419
|
+
category,
|
|
2420
|
+
message,
|
|
2421
|
+
data
|
|
2422
|
+
};
|
|
2423
|
+
this.logs.push(entry);
|
|
2424
|
+
if (this.logs.length > this.maxLogs) {
|
|
2425
|
+
this.logs.shift();
|
|
2426
|
+
}
|
|
2427
|
+
}
|
|
2428
|
+
search(level, message, data) {
|
|
2429
|
+
if (this.config.logSearch) {
|
|
2430
|
+
this.log(level, "search", message, data);
|
|
2431
|
+
}
|
|
2432
|
+
}
|
|
2433
|
+
embedding(level, message, data) {
|
|
2434
|
+
if (this.config.logEmbedding) {
|
|
2435
|
+
this.log(level, "embedding", message, data);
|
|
2436
|
+
}
|
|
2437
|
+
}
|
|
2438
|
+
cache(level, message, data) {
|
|
2439
|
+
if (this.config.logCache) {
|
|
2440
|
+
this.log(level, "cache", message, data);
|
|
2441
|
+
}
|
|
2442
|
+
}
|
|
2443
|
+
gc(level, message, data) {
|
|
2444
|
+
if (this.config.logGc) {
|
|
2445
|
+
this.log(level, "gc", message, data);
|
|
2446
|
+
}
|
|
2447
|
+
}
|
|
2448
|
+
branch(level, message, data) {
|
|
2449
|
+
if (this.config.logBranch) {
|
|
2450
|
+
this.log(level, "branch", message, data);
|
|
2451
|
+
}
|
|
2452
|
+
}
|
|
2453
|
+
info(message, data) {
|
|
2454
|
+
this.log("info", "general", message, data);
|
|
2455
|
+
}
|
|
2456
|
+
warn(message, data) {
|
|
2457
|
+
this.log("warn", "general", message, data);
|
|
2458
|
+
}
|
|
2459
|
+
error(message, data) {
|
|
2460
|
+
this.log("error", "general", message, data);
|
|
2461
|
+
}
|
|
2462
|
+
debug(message, data) {
|
|
2463
|
+
this.log("debug", "general", message, data);
|
|
2464
|
+
}
|
|
2465
|
+
recordIndexingStart() {
|
|
2466
|
+
if (!this.config.metrics) return;
|
|
2467
|
+
this.metrics.indexingStartTime = Date.now();
|
|
2468
|
+
}
|
|
2469
|
+
recordIndexingEnd() {
|
|
2470
|
+
if (!this.config.metrics) return;
|
|
2471
|
+
this.metrics.indexingEndTime = Date.now();
|
|
2472
|
+
}
|
|
2473
|
+
recordFilesScanned(count) {
|
|
2474
|
+
if (!this.config.metrics) return;
|
|
2475
|
+
this.metrics.filesScanned = count;
|
|
2476
|
+
}
|
|
2477
|
+
recordFilesParsed(count) {
|
|
2478
|
+
if (!this.config.metrics) return;
|
|
2479
|
+
this.metrics.filesParsed = count;
|
|
2480
|
+
}
|
|
2481
|
+
recordParseDuration(durationMs) {
|
|
2482
|
+
if (!this.config.metrics) return;
|
|
2483
|
+
this.metrics.parseMs = durationMs;
|
|
2484
|
+
}
|
|
2485
|
+
recordChunksProcessed(count) {
|
|
2486
|
+
if (!this.config.metrics) return;
|
|
2487
|
+
this.metrics.chunksProcessed += count;
|
|
2488
|
+
}
|
|
2489
|
+
recordChunksEmbedded(count) {
|
|
2490
|
+
if (!this.config.metrics) return;
|
|
2491
|
+
this.metrics.chunksEmbedded += count;
|
|
2492
|
+
}
|
|
2493
|
+
recordChunksFromCache(count) {
|
|
2494
|
+
if (!this.config.metrics) return;
|
|
2495
|
+
this.metrics.chunksFromCache += count;
|
|
2496
|
+
}
|
|
2497
|
+
recordChunksRemoved(count) {
|
|
2498
|
+
if (!this.config.metrics) return;
|
|
2499
|
+
this.metrics.chunksRemoved += count;
|
|
2500
|
+
}
|
|
2501
|
+
recordEmbeddingApiCall(tokens) {
|
|
2502
|
+
if (!this.config.metrics) return;
|
|
2503
|
+
this.metrics.embeddingApiCalls++;
|
|
2504
|
+
this.metrics.embeddingTokensUsed += tokens;
|
|
2505
|
+
}
|
|
2506
|
+
recordEmbeddingError() {
|
|
2507
|
+
if (!this.config.metrics) return;
|
|
2508
|
+
this.metrics.embeddingErrors++;
|
|
2509
|
+
}
|
|
2510
|
+
recordSearch(durationMs, breakdown) {
|
|
2511
|
+
if (!this.config.metrics) return;
|
|
2512
|
+
this.metrics.searchCount++;
|
|
2513
|
+
this.metrics.searchTotalMs += durationMs;
|
|
2514
|
+
this.metrics.searchLastMs = durationMs;
|
|
2515
|
+
this.metrics.searchAvgMs = this.metrics.searchTotalMs / this.metrics.searchCount;
|
|
2516
|
+
if (breakdown) {
|
|
2517
|
+
this.metrics.embeddingCallMs = breakdown.embeddingMs;
|
|
2518
|
+
this.metrics.vectorSearchMs = breakdown.vectorMs;
|
|
2519
|
+
this.metrics.keywordSearchMs = breakdown.keywordMs;
|
|
2520
|
+
this.metrics.fusionMs = breakdown.fusionMs;
|
|
2521
|
+
}
|
|
2522
|
+
}
|
|
2523
|
+
recordCacheHit() {
|
|
2524
|
+
if (!this.config.metrics) return;
|
|
2525
|
+
this.metrics.cacheHits++;
|
|
2526
|
+
}
|
|
2527
|
+
recordCacheMiss() {
|
|
2528
|
+
if (!this.config.metrics) return;
|
|
2529
|
+
this.metrics.cacheMisses++;
|
|
2530
|
+
}
|
|
2531
|
+
recordQueryCacheHit() {
|
|
2532
|
+
if (!this.config.metrics) return;
|
|
2533
|
+
this.metrics.queryCacheHits++;
|
|
2534
|
+
}
|
|
2535
|
+
recordQueryCacheSimilarHit() {
|
|
2536
|
+
if (!this.config.metrics) return;
|
|
2537
|
+
this.metrics.queryCacheSimilarHits++;
|
|
2538
|
+
}
|
|
2539
|
+
recordQueryCacheMiss() {
|
|
2540
|
+
if (!this.config.metrics) return;
|
|
2541
|
+
this.metrics.queryCacheMisses++;
|
|
2542
|
+
}
|
|
2543
|
+
recordGc(orphans, chunks, embeddings) {
|
|
2544
|
+
if (!this.config.metrics) return;
|
|
2545
|
+
this.metrics.gcRuns++;
|
|
2546
|
+
this.metrics.gcOrphansRemoved += orphans;
|
|
2547
|
+
this.metrics.gcChunksRemoved += chunks;
|
|
2548
|
+
this.metrics.gcEmbeddingsRemoved += embeddings;
|
|
2549
|
+
}
|
|
2550
|
+
getMetrics() {
|
|
2551
|
+
return { ...this.metrics };
|
|
2552
|
+
}
|
|
2553
|
+
getLogs(limit) {
|
|
2554
|
+
const logs = [...this.logs];
|
|
2555
|
+
if (limit) {
|
|
2556
|
+
return logs.slice(-limit);
|
|
2557
|
+
}
|
|
2558
|
+
return logs;
|
|
2559
|
+
}
|
|
2560
|
+
getLogsByCategory(category, limit) {
|
|
2561
|
+
const filtered = this.logs.filter((l) => l.category === category);
|
|
2562
|
+
if (limit) {
|
|
2563
|
+
return filtered.slice(-limit);
|
|
2564
|
+
}
|
|
2565
|
+
return filtered;
|
|
2566
|
+
}
|
|
2567
|
+
getLogsByLevel(level, limit) {
|
|
2568
|
+
const filtered = this.logs.filter((l) => l.level === level);
|
|
2569
|
+
if (limit) {
|
|
2570
|
+
return filtered.slice(-limit);
|
|
2571
|
+
}
|
|
2572
|
+
return filtered;
|
|
2573
|
+
}
|
|
2574
|
+
resetMetrics() {
|
|
2575
|
+
this.metrics = createEmptyMetrics();
|
|
2576
|
+
}
|
|
2577
|
+
clearLogs() {
|
|
2578
|
+
this.logs = [];
|
|
2579
|
+
}
|
|
2580
|
+
formatMetrics() {
|
|
2581
|
+
const m = this.metrics;
|
|
2582
|
+
const lines = [];
|
|
2583
|
+
lines.push("=== Metrics ===");
|
|
2584
|
+
if (m.indexingStartTime && m.indexingEndTime) {
|
|
2585
|
+
const duration = m.indexingEndTime - m.indexingStartTime;
|
|
2586
|
+
lines.push(`Indexing duration: ${(duration / 1e3).toFixed(2)}s`);
|
|
2587
|
+
}
|
|
2588
|
+
lines.push("");
|
|
2589
|
+
lines.push("Indexing:");
|
|
2590
|
+
lines.push(` Files scanned: ${m.filesScanned}`);
|
|
2591
|
+
lines.push(` Files parsed: ${m.filesParsed}`);
|
|
2592
|
+
lines.push(` Chunks processed: ${m.chunksProcessed}`);
|
|
2593
|
+
lines.push(` Chunks embedded: ${m.chunksEmbedded}`);
|
|
2594
|
+
lines.push(` Chunks from cache: ${m.chunksFromCache}`);
|
|
2595
|
+
lines.push(` Chunks removed: ${m.chunksRemoved}`);
|
|
2596
|
+
lines.push("");
|
|
2597
|
+
lines.push("Embedding API:");
|
|
2598
|
+
lines.push(` API calls: ${m.embeddingApiCalls}`);
|
|
2599
|
+
lines.push(` Tokens used: ${m.embeddingTokensUsed.toLocaleString()}`);
|
|
2600
|
+
lines.push(` Errors: ${m.embeddingErrors}`);
|
|
2601
|
+
if (m.searchCount > 0) {
|
|
2602
|
+
lines.push("");
|
|
2603
|
+
lines.push("Search:");
|
|
2604
|
+
lines.push(` Total searches: ${m.searchCount}`);
|
|
2605
|
+
lines.push(` Average time: ${m.searchAvgMs.toFixed(2)}ms`);
|
|
2606
|
+
lines.push(` Last search: ${m.searchLastMs.toFixed(2)}ms`);
|
|
2607
|
+
if (m.embeddingCallMs > 0) {
|
|
2608
|
+
lines.push(` - Embedding: ${m.embeddingCallMs.toFixed(2)}ms`);
|
|
2609
|
+
lines.push(` - Vector search: ${m.vectorSearchMs.toFixed(2)}ms`);
|
|
2610
|
+
lines.push(` - Keyword search: ${m.keywordSearchMs.toFixed(2)}ms`);
|
|
2611
|
+
lines.push(` - Fusion: ${m.fusionMs.toFixed(2)}ms`);
|
|
2612
|
+
}
|
|
2613
|
+
}
|
|
2614
|
+
const totalCacheOps = m.cacheHits + m.cacheMisses;
|
|
2615
|
+
if (totalCacheOps > 0) {
|
|
2616
|
+
lines.push("");
|
|
2617
|
+
lines.push("Cache:");
|
|
2618
|
+
lines.push(` Hits: ${m.cacheHits}`);
|
|
2619
|
+
lines.push(` Misses: ${m.cacheMisses}`);
|
|
2620
|
+
lines.push(` Hit rate: ${(m.cacheHits / totalCacheOps * 100).toFixed(1)}%`);
|
|
2621
|
+
}
|
|
2622
|
+
if (m.gcRuns > 0) {
|
|
2623
|
+
lines.push("");
|
|
2624
|
+
lines.push("Garbage Collection:");
|
|
2625
|
+
lines.push(` GC runs: ${m.gcRuns}`);
|
|
2626
|
+
lines.push(` Orphans removed: ${m.gcOrphansRemoved}`);
|
|
2627
|
+
lines.push(` Chunks removed: ${m.gcChunksRemoved}`);
|
|
2628
|
+
lines.push(` Embeddings removed: ${m.gcEmbeddingsRemoved}`);
|
|
2629
|
+
}
|
|
2630
|
+
return lines.join("\n");
|
|
2631
|
+
}
|
|
2632
|
+
formatRecentLogs(limit = 20) {
|
|
2633
|
+
const logs = this.getLogs(limit);
|
|
2634
|
+
if (logs.length === 0) {
|
|
2635
|
+
return "No logs recorded.";
|
|
2636
|
+
}
|
|
2637
|
+
return logs.map((l) => {
|
|
2638
|
+
const dataStr = l.data ? ` ${JSON.stringify(l.data)}` : "";
|
|
2639
|
+
return `[${l.timestamp}] [${l.level.toUpperCase()}] [${l.category}] ${l.message}${dataStr}`;
|
|
2640
|
+
}).join("\n");
|
|
2641
|
+
}
|
|
2642
|
+
isEnabled() {
|
|
2643
|
+
return this.config.enabled;
|
|
2644
|
+
}
|
|
2645
|
+
isMetricsEnabled() {
|
|
2646
|
+
return this.config.enabled && this.config.metrics;
|
|
2647
|
+
}
|
|
2648
|
+
};
|
|
2649
|
+
var globalLogger = null;
|
|
2650
|
+
function initializeLogger(config) {
|
|
2651
|
+
globalLogger = new Logger(config);
|
|
2652
|
+
return globalLogger;
|
|
2653
|
+
}
|
|
2654
|
+
|
|
2333
2655
|
// src/native/index.ts
|
|
2334
2656
|
var path3 = __toESM(require("path"), 1);
|
|
2335
2657
|
var os2 = __toESM(require("os"), 1);
|
|
@@ -2459,6 +2781,21 @@ var VectorStore = class {
|
|
|
2459
2781
|
metadata: JSON.parse(r.metadata)
|
|
2460
2782
|
}));
|
|
2461
2783
|
}
|
|
2784
|
+
getMetadata(id) {
|
|
2785
|
+
const result = this.inner.getMetadata(id);
|
|
2786
|
+
if (result === null || result === void 0) {
|
|
2787
|
+
return void 0;
|
|
2788
|
+
}
|
|
2789
|
+
return JSON.parse(result);
|
|
2790
|
+
}
|
|
2791
|
+
getMetadataBatch(ids) {
|
|
2792
|
+
const results = this.inner.getMetadataBatch(ids);
|
|
2793
|
+
const map = /* @__PURE__ */ new Map();
|
|
2794
|
+
for (const { key, metadata } of results) {
|
|
2795
|
+
map.set(key, JSON.parse(metadata));
|
|
2796
|
+
}
|
|
2797
|
+
return map;
|
|
2798
|
+
}
|
|
2462
2799
|
};
|
|
2463
2800
|
var CHARS_PER_TOKEN = 4;
|
|
2464
2801
|
var MAX_BATCH_TOKENS = 7500;
|
|
@@ -2857,12 +3194,18 @@ var Indexer = class {
|
|
|
2857
3194
|
failedBatchesPath = "";
|
|
2858
3195
|
currentBranch = "default";
|
|
2859
3196
|
baseBranch = "main";
|
|
3197
|
+
logger;
|
|
3198
|
+
queryEmbeddingCache = /* @__PURE__ */ new Map();
|
|
3199
|
+
maxQueryCacheSize = 100;
|
|
3200
|
+
queryCacheTtlMs = 5 * 60 * 1e3;
|
|
3201
|
+
querySimilarityThreshold = 0.85;
|
|
2860
3202
|
constructor(projectRoot, config) {
|
|
2861
3203
|
this.projectRoot = projectRoot;
|
|
2862
3204
|
this.config = config;
|
|
2863
3205
|
this.indexPath = this.getIndexPath();
|
|
2864
3206
|
this.fileHashCachePath = path5.join(this.indexPath, "file-hashes.json");
|
|
2865
3207
|
this.failedBatchesPath = path5.join(this.indexPath, "failed-batches.json");
|
|
3208
|
+
this.logger = initializeLogger(config.debug);
|
|
2866
3209
|
}
|
|
2867
3210
|
getIndexPath() {
|
|
2868
3211
|
if (this.config.scope === "global") {
|
|
@@ -2941,6 +3284,11 @@ var Indexer = class {
|
|
|
2941
3284
|
"No embedding provider available. Configure GitHub, OpenAI, Google, or Ollama."
|
|
2942
3285
|
);
|
|
2943
3286
|
}
|
|
3287
|
+
this.logger.info("Initializing indexer", {
|
|
3288
|
+
provider: this.detectedProvider.provider,
|
|
3289
|
+
model: this.detectedProvider.modelInfo.model,
|
|
3290
|
+
scope: this.config.scope
|
|
3291
|
+
});
|
|
2944
3292
|
this.provider = createEmbeddingProvider(
|
|
2945
3293
|
this.detectedProvider.credentials,
|
|
2946
3294
|
this.detectedProvider.modelInfo
|
|
@@ -2972,9 +3320,14 @@ var Indexer = class {
|
|
|
2972
3320
|
if (isGitRepo(this.projectRoot)) {
|
|
2973
3321
|
this.currentBranch = getBranchOrDefault(this.projectRoot);
|
|
2974
3322
|
this.baseBranch = getBaseBranch(this.projectRoot);
|
|
3323
|
+
this.logger.branch("info", "Detected git repository", {
|
|
3324
|
+
currentBranch: this.currentBranch,
|
|
3325
|
+
baseBranch: this.baseBranch
|
|
3326
|
+
});
|
|
2975
3327
|
} else {
|
|
2976
3328
|
this.currentBranch = "default";
|
|
2977
3329
|
this.baseBranch = "default";
|
|
3330
|
+
this.logger.branch("debug", "Not a git repository, using default branch");
|
|
2978
3331
|
}
|
|
2979
3332
|
if (this.config.indexing.autoGc) {
|
|
2980
3333
|
await this.maybeRunAutoGc();
|
|
@@ -3058,6 +3411,8 @@ var Indexer = class {
|
|
|
3058
3411
|
}
|
|
3059
3412
|
async index(onProgress) {
|
|
3060
3413
|
const { store, provider, invertedIndex, database, detectedProvider } = await this.ensureInitialized();
|
|
3414
|
+
this.logger.recordIndexingStart();
|
|
3415
|
+
this.logger.info("Starting indexing", { projectRoot: this.projectRoot });
|
|
3061
3416
|
const startTime = Date.now();
|
|
3062
3417
|
const stats = {
|
|
3063
3418
|
totalFiles: 0,
|
|
@@ -3087,6 +3442,11 @@ var Indexer = class {
|
|
|
3087
3442
|
);
|
|
3088
3443
|
stats.totalFiles = files.length;
|
|
3089
3444
|
stats.skippedFiles = skipped;
|
|
3445
|
+
this.logger.recordFilesScanned(files.length);
|
|
3446
|
+
this.logger.cache("debug", "Scanning files for changes", {
|
|
3447
|
+
totalFiles: files.length,
|
|
3448
|
+
skippedFiles: skipped.length
|
|
3449
|
+
});
|
|
3090
3450
|
const changedFiles = [];
|
|
3091
3451
|
const unchangedFilePaths = /* @__PURE__ */ new Set();
|
|
3092
3452
|
const currentFileHashes = /* @__PURE__ */ new Map();
|
|
@@ -3095,11 +3455,17 @@ var Indexer = class {
|
|
|
3095
3455
|
currentFileHashes.set(f.path, currentHash);
|
|
3096
3456
|
if (this.fileHashCache.get(f.path) === currentHash) {
|
|
3097
3457
|
unchangedFilePaths.add(f.path);
|
|
3458
|
+
this.logger.recordCacheHit();
|
|
3098
3459
|
} else {
|
|
3099
3460
|
const content = await import_fs4.promises.readFile(f.path, "utf-8");
|
|
3100
3461
|
changedFiles.push({ path: f.path, content, hash: currentHash });
|
|
3462
|
+
this.logger.recordCacheMiss();
|
|
3101
3463
|
}
|
|
3102
3464
|
}
|
|
3465
|
+
this.logger.cache("info", "File hash cache results", {
|
|
3466
|
+
unchanged: unchangedFilePaths.size,
|
|
3467
|
+
changed: changedFiles.length
|
|
3468
|
+
});
|
|
3103
3469
|
onProgress?.({
|
|
3104
3470
|
phase: "parsing",
|
|
3105
3471
|
filesProcessed: 0,
|
|
@@ -3107,7 +3473,12 @@ var Indexer = class {
|
|
|
3107
3473
|
chunksProcessed: 0,
|
|
3108
3474
|
totalChunks: 0
|
|
3109
3475
|
});
|
|
3476
|
+
const parseStartTime = import_perf_hooks.performance.now();
|
|
3110
3477
|
const parsedFiles = parseFiles(changedFiles);
|
|
3478
|
+
const parseMs = import_perf_hooks.performance.now() - parseStartTime;
|
|
3479
|
+
this.logger.recordFilesParsed(parsedFiles.length);
|
|
3480
|
+
this.logger.recordParseDuration(parseMs);
|
|
3481
|
+
this.logger.debug("Parsed changed files", { parsedCount: parsedFiles.length, parseMs: parseMs.toFixed(2) });
|
|
3111
3482
|
const existingChunks = /* @__PURE__ */ new Map();
|
|
3112
3483
|
const existingChunksByFile = /* @__PURE__ */ new Map();
|
|
3113
3484
|
for (const { key, metadata } of store.getAllMetadata()) {
|
|
@@ -3189,6 +3560,13 @@ var Indexer = class {
|
|
|
3189
3560
|
stats.totalChunks = pendingChunks.length;
|
|
3190
3561
|
stats.existingChunks = currentChunkIds.size - pendingChunks.length;
|
|
3191
3562
|
stats.removedChunks = removedCount;
|
|
3563
|
+
this.logger.recordChunksProcessed(currentChunkIds.size);
|
|
3564
|
+
this.logger.recordChunksRemoved(removedCount);
|
|
3565
|
+
this.logger.info("Chunk analysis complete", {
|
|
3566
|
+
pending: pendingChunks.length,
|
|
3567
|
+
existing: stats.existingChunks,
|
|
3568
|
+
removed: removedCount
|
|
3569
|
+
});
|
|
3192
3570
|
if (pendingChunks.length === 0 && removedCount === 0) {
|
|
3193
3571
|
database.clearBranch(this.currentBranch);
|
|
3194
3572
|
database.addChunksToBranchBatch(this.currentBranch, Array.from(currentChunkIds));
|
|
@@ -3232,6 +3610,11 @@ var Indexer = class {
|
|
|
3232
3610
|
const missingHashes = new Set(database.getMissingEmbeddings(allContentHashes));
|
|
3233
3611
|
const chunksNeedingEmbedding = pendingChunks.filter((c) => missingHashes.has(c.contentHash));
|
|
3234
3612
|
const chunksWithExistingEmbedding = pendingChunks.filter((c) => !missingHashes.has(c.contentHash));
|
|
3613
|
+
this.logger.cache("info", "Embedding cache lookup", {
|
|
3614
|
+
needsEmbedding: chunksNeedingEmbedding.length,
|
|
3615
|
+
fromCache: chunksWithExistingEmbedding.length
|
|
3616
|
+
});
|
|
3617
|
+
this.logger.recordChunksFromCache(chunksWithExistingEmbedding.length);
|
|
3235
3618
|
for (const chunk of chunksWithExistingEmbedding) {
|
|
3236
3619
|
const embeddingBuffer = database.getEmbedding(chunk.contentHash);
|
|
3237
3620
|
if (embeddingBuffer) {
|
|
@@ -3270,13 +3653,16 @@ var Indexer = class {
|
|
|
3270
3653
|
const message = getErrorMessage(error);
|
|
3271
3654
|
if (isRateLimitError(error)) {
|
|
3272
3655
|
rateLimitBackoffMs = Math.min(providerRateLimits.maxRetryMs, (rateLimitBackoffMs || providerRateLimits.minRetryMs) * 2);
|
|
3273
|
-
|
|
3274
|
-
|
|
3275
|
-
|
|
3656
|
+
this.logger.embedding("warn", `Rate limited, backing off`, {
|
|
3657
|
+
attempt: error.attemptNumber,
|
|
3658
|
+
retriesLeft: error.retriesLeft,
|
|
3659
|
+
backoffMs: rateLimitBackoffMs
|
|
3660
|
+
});
|
|
3276
3661
|
} else {
|
|
3277
|
-
|
|
3278
|
-
|
|
3279
|
-
|
|
3662
|
+
this.logger.embedding("error", `Embedding batch failed`, {
|
|
3663
|
+
attempt: error.attemptNumber,
|
|
3664
|
+
error: message
|
|
3665
|
+
});
|
|
3280
3666
|
}
|
|
3281
3667
|
}
|
|
3282
3668
|
}
|
|
@@ -3303,6 +3689,12 @@ var Indexer = class {
|
|
|
3303
3689
|
}
|
|
3304
3690
|
stats.indexedChunks += batch.length;
|
|
3305
3691
|
stats.tokensUsed += result.totalTokensUsed;
|
|
3692
|
+
this.logger.recordChunksEmbedded(batch.length);
|
|
3693
|
+
this.logger.recordEmbeddingApiCall(result.totalTokensUsed);
|
|
3694
|
+
this.logger.embedding("debug", `Embedded batch`, {
|
|
3695
|
+
batchSize: batch.length,
|
|
3696
|
+
tokens: result.totalTokensUsed
|
|
3697
|
+
});
|
|
3306
3698
|
onProgress?.({
|
|
3307
3699
|
phase: "embedding",
|
|
3308
3700
|
filesProcessed: files.length,
|
|
@@ -3313,7 +3705,11 @@ var Indexer = class {
|
|
|
3313
3705
|
} catch (error) {
|
|
3314
3706
|
stats.failedChunks += batch.length;
|
|
3315
3707
|
this.addFailedBatch(batch, getErrorMessage(error));
|
|
3316
|
-
|
|
3708
|
+
this.logger.recordEmbeddingError();
|
|
3709
|
+
this.logger.embedding("error", `Failed to embed batch after retries`, {
|
|
3710
|
+
batchSize: batch.length,
|
|
3711
|
+
error: getErrorMessage(error)
|
|
3712
|
+
});
|
|
3317
3713
|
}
|
|
3318
3714
|
});
|
|
3319
3715
|
}
|
|
@@ -3335,6 +3731,16 @@ var Indexer = class {
|
|
|
3335
3731
|
await this.maybeRunOrphanGc();
|
|
3336
3732
|
}
|
|
3337
3733
|
stats.durationMs = Date.now() - startTime;
|
|
3734
|
+
this.logger.recordIndexingEnd();
|
|
3735
|
+
this.logger.info("Indexing complete", {
|
|
3736
|
+
files: stats.totalFiles,
|
|
3737
|
+
indexed: stats.indexedChunks,
|
|
3738
|
+
existing: stats.existingChunks,
|
|
3739
|
+
removed: stats.removedChunks,
|
|
3740
|
+
failed: stats.failedChunks,
|
|
3741
|
+
tokens: stats.tokensUsed,
|
|
3742
|
+
durationMs: stats.durationMs
|
|
3743
|
+
});
|
|
3338
3744
|
if (stats.failedChunks > 0) {
|
|
3339
3745
|
stats.failedBatchesPath = this.failedBatchesPath;
|
|
3340
3746
|
}
|
|
@@ -3347,18 +3753,96 @@ var Indexer = class {
|
|
|
3347
3753
|
});
|
|
3348
3754
|
return stats;
|
|
3349
3755
|
}
|
|
3756
|
+
async getQueryEmbedding(query, provider) {
|
|
3757
|
+
const now = Date.now();
|
|
3758
|
+
const cached = this.queryEmbeddingCache.get(query);
|
|
3759
|
+
if (cached && now - cached.timestamp < this.queryCacheTtlMs) {
|
|
3760
|
+
this.logger.cache("debug", "Query embedding cache hit (exact)", { query: query.slice(0, 50) });
|
|
3761
|
+
this.logger.recordQueryCacheHit();
|
|
3762
|
+
return cached.embedding;
|
|
3763
|
+
}
|
|
3764
|
+
const similarMatch = this.findSimilarCachedQuery(query, now);
|
|
3765
|
+
if (similarMatch) {
|
|
3766
|
+
this.logger.cache("debug", "Query embedding cache hit (similar)", {
|
|
3767
|
+
query: query.slice(0, 50),
|
|
3768
|
+
similarTo: similarMatch.key.slice(0, 50),
|
|
3769
|
+
similarity: similarMatch.similarity.toFixed(3)
|
|
3770
|
+
});
|
|
3771
|
+
this.logger.recordQueryCacheSimilarHit();
|
|
3772
|
+
return similarMatch.embedding;
|
|
3773
|
+
}
|
|
3774
|
+
this.logger.cache("debug", "Query embedding cache miss", { query: query.slice(0, 50) });
|
|
3775
|
+
this.logger.recordQueryCacheMiss();
|
|
3776
|
+
const { embedding, tokensUsed } = await provider.embed(query);
|
|
3777
|
+
this.logger.recordEmbeddingApiCall(tokensUsed);
|
|
3778
|
+
if (this.queryEmbeddingCache.size >= this.maxQueryCacheSize) {
|
|
3779
|
+
const oldestKey = this.queryEmbeddingCache.keys().next().value;
|
|
3780
|
+
if (oldestKey) {
|
|
3781
|
+
this.queryEmbeddingCache.delete(oldestKey);
|
|
3782
|
+
}
|
|
3783
|
+
}
|
|
3784
|
+
this.queryEmbeddingCache.set(query, { embedding, timestamp: now });
|
|
3785
|
+
return embedding;
|
|
3786
|
+
}
|
|
3787
|
+
findSimilarCachedQuery(query, now) {
|
|
3788
|
+
const queryTokens = this.tokenize(query);
|
|
3789
|
+
if (queryTokens.size === 0) return null;
|
|
3790
|
+
let bestMatch = null;
|
|
3791
|
+
for (const [cachedQuery, { embedding, timestamp }] of this.queryEmbeddingCache) {
|
|
3792
|
+
if (now - timestamp >= this.queryCacheTtlMs) continue;
|
|
3793
|
+
const cachedTokens = this.tokenize(cachedQuery);
|
|
3794
|
+
const similarity = this.jaccardSimilarity(queryTokens, cachedTokens);
|
|
3795
|
+
if (similarity >= this.querySimilarityThreshold) {
|
|
3796
|
+
if (!bestMatch || similarity > bestMatch.similarity) {
|
|
3797
|
+
bestMatch = { key: cachedQuery, embedding, similarity };
|
|
3798
|
+
}
|
|
3799
|
+
}
|
|
3800
|
+
}
|
|
3801
|
+
return bestMatch;
|
|
3802
|
+
}
|
|
3803
|
+
tokenize(text) {
|
|
3804
|
+
return new Set(
|
|
3805
|
+
text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((t) => t.length > 1)
|
|
3806
|
+
);
|
|
3807
|
+
}
|
|
3808
|
+
jaccardSimilarity(a, b) {
|
|
3809
|
+
if (a.size === 0 && b.size === 0) return 1;
|
|
3810
|
+
if (a.size === 0 || b.size === 0) return 0;
|
|
3811
|
+
let intersection = 0;
|
|
3812
|
+
for (const token of a) {
|
|
3813
|
+
if (b.has(token)) intersection++;
|
|
3814
|
+
}
|
|
3815
|
+
const union = a.size + b.size - intersection;
|
|
3816
|
+
return intersection / union;
|
|
3817
|
+
}
|
|
3350
3818
|
async search(query, limit, options) {
|
|
3819
|
+
const searchStartTime = import_perf_hooks.performance.now();
|
|
3351
3820
|
const { store, provider, database } = await this.ensureInitialized();
|
|
3352
3821
|
if (store.count() === 0) {
|
|
3822
|
+
this.logger.search("debug", "Search on empty index", { query });
|
|
3353
3823
|
return [];
|
|
3354
3824
|
}
|
|
3355
3825
|
const maxResults = limit ?? this.config.search.maxResults;
|
|
3356
3826
|
const hybridWeight = options?.hybridWeight ?? this.config.search.hybridWeight;
|
|
3357
3827
|
const filterByBranch = options?.filterByBranch ?? true;
|
|
3358
|
-
|
|
3828
|
+
this.logger.search("debug", "Starting search", {
|
|
3829
|
+
query,
|
|
3830
|
+
maxResults,
|
|
3831
|
+
hybridWeight,
|
|
3832
|
+
filterByBranch
|
|
3833
|
+
});
|
|
3834
|
+
const embeddingStartTime = import_perf_hooks.performance.now();
|
|
3835
|
+
const embedding = await this.getQueryEmbedding(query, provider);
|
|
3836
|
+
const embeddingMs = import_perf_hooks.performance.now() - embeddingStartTime;
|
|
3837
|
+
const vectorStartTime = import_perf_hooks.performance.now();
|
|
3359
3838
|
const semanticResults = store.search(embedding, maxResults * 4);
|
|
3839
|
+
const vectorMs = import_perf_hooks.performance.now() - vectorStartTime;
|
|
3840
|
+
const keywordStartTime = import_perf_hooks.performance.now();
|
|
3360
3841
|
const keywordResults = await this.keywordSearch(query, maxResults * 4);
|
|
3842
|
+
const keywordMs = import_perf_hooks.performance.now() - keywordStartTime;
|
|
3843
|
+
const fusionStartTime = import_perf_hooks.performance.now();
|
|
3361
3844
|
const combined = this.fuseResults(semanticResults, keywordResults, hybridWeight, maxResults * 4);
|
|
3845
|
+
const fusionMs = import_perf_hooks.performance.now() - fusionStartTime;
|
|
3362
3846
|
let branchChunkIds = null;
|
|
3363
3847
|
if (filterByBranch && this.currentBranch !== "default") {
|
|
3364
3848
|
branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
|
|
@@ -3379,12 +3863,29 @@ var Indexer = class {
|
|
|
3379
3863
|
}
|
|
3380
3864
|
return true;
|
|
3381
3865
|
}).slice(0, maxResults);
|
|
3866
|
+
const totalSearchMs = import_perf_hooks.performance.now() - searchStartTime;
|
|
3867
|
+
this.logger.recordSearch(totalSearchMs, {
|
|
3868
|
+
embeddingMs,
|
|
3869
|
+
vectorMs,
|
|
3870
|
+
keywordMs,
|
|
3871
|
+
fusionMs
|
|
3872
|
+
});
|
|
3873
|
+
this.logger.search("info", "Search complete", {
|
|
3874
|
+
query,
|
|
3875
|
+
results: filtered.length,
|
|
3876
|
+
totalMs: Math.round(totalSearchMs * 100) / 100,
|
|
3877
|
+
embeddingMs: Math.round(embeddingMs * 100) / 100,
|
|
3878
|
+
vectorMs: Math.round(vectorMs * 100) / 100,
|
|
3879
|
+
keywordMs: Math.round(keywordMs * 100) / 100,
|
|
3880
|
+
fusionMs: Math.round(fusionMs * 100) / 100
|
|
3881
|
+
});
|
|
3882
|
+
const metadataOnly = options?.metadataOnly ?? false;
|
|
3382
3883
|
return Promise.all(
|
|
3383
3884
|
filtered.map(async (r) => {
|
|
3384
3885
|
let content = "";
|
|
3385
3886
|
let contextStartLine = r.metadata.startLine;
|
|
3386
3887
|
let contextEndLine = r.metadata.endLine;
|
|
3387
|
-
if (this.config.search.includeContext) {
|
|
3888
|
+
if (!metadataOnly && this.config.search.includeContext) {
|
|
3388
3889
|
try {
|
|
3389
3890
|
const fileContent = await import_fs4.promises.readFile(
|
|
3390
3891
|
r.metadata.filePath,
|
|
@@ -3417,11 +3918,8 @@ var Indexer = class {
|
|
|
3417
3918
|
if (scores.size === 0) {
|
|
3418
3919
|
return [];
|
|
3419
3920
|
}
|
|
3420
|
-
const
|
|
3421
|
-
const metadataMap =
|
|
3422
|
-
for (const { key, metadata } of allMetadata) {
|
|
3423
|
-
metadataMap.set(key, metadata);
|
|
3424
|
-
}
|
|
3921
|
+
const chunkIds = Array.from(scores.keys());
|
|
3922
|
+
const metadataMap = store.getMetadataBatch(chunkIds);
|
|
3425
3923
|
const results = [];
|
|
3426
3924
|
for (const [chunkId, score] of scores) {
|
|
3427
3925
|
const metadata = metadataMap.get(chunkId);
|
|
@@ -3484,6 +3982,7 @@ var Indexer = class {
|
|
|
3484
3982
|
}
|
|
3485
3983
|
async healthCheck() {
|
|
3486
3984
|
const { store, invertedIndex, database } = await this.ensureInitialized();
|
|
3985
|
+
this.logger.gc("info", "Starting health check");
|
|
3487
3986
|
const allMetadata = store.getAllMetadata();
|
|
3488
3987
|
const filePathsToChunkKeys = /* @__PURE__ */ new Map();
|
|
3489
3988
|
for (const { key, metadata } of allMetadata) {
|
|
@@ -3510,6 +4009,13 @@ var Indexer = class {
|
|
|
3510
4009
|
}
|
|
3511
4010
|
const gcOrphanEmbeddings = database.gcOrphanEmbeddings();
|
|
3512
4011
|
const gcOrphanChunks = database.gcOrphanChunks();
|
|
4012
|
+
this.logger.recordGc(removedCount, gcOrphanChunks, gcOrphanEmbeddings);
|
|
4013
|
+
this.logger.gc("info", "Health check complete", {
|
|
4014
|
+
removedStale: removedCount,
|
|
4015
|
+
orphanEmbeddings: gcOrphanEmbeddings,
|
|
4016
|
+
orphanChunks: gcOrphanChunks,
|
|
4017
|
+
removedFiles: removedFilePaths.length
|
|
4018
|
+
});
|
|
3513
4019
|
return { removed: removedCount, filePaths: removedFilePaths, gcOrphanEmbeddings, gcOrphanChunks };
|
|
3514
4020
|
}
|
|
3515
4021
|
async retryFailedBatches() {
|
|
@@ -3543,9 +4049,12 @@ var Indexer = class {
|
|
|
3543
4049
|
invertedIndex.removeChunk(chunk.id);
|
|
3544
4050
|
invertedIndex.addChunk(chunk.id, chunk.content);
|
|
3545
4051
|
}
|
|
4052
|
+
this.logger.recordChunksEmbedded(batch.chunks.length);
|
|
4053
|
+
this.logger.recordEmbeddingApiCall(result.totalTokensUsed);
|
|
3546
4054
|
succeeded += batch.chunks.length;
|
|
3547
4055
|
} catch (error) {
|
|
3548
4056
|
failed += batch.chunks.length;
|
|
4057
|
+
this.logger.recordEmbeddingError();
|
|
3549
4058
|
stillFailing.push({
|
|
3550
4059
|
...batch,
|
|
3551
4060
|
attemptCount: batch.attemptCount + 1,
|
|
@@ -3580,6 +4089,94 @@ var Indexer = class {
|
|
|
3580
4089
|
const { database } = await this.ensureInitialized();
|
|
3581
4090
|
return database.getStats();
|
|
3582
4091
|
}
|
|
4092
|
+
getLogger() {
|
|
4093
|
+
return this.logger;
|
|
4094
|
+
}
|
|
4095
|
+
async findSimilar(code, limit, options) {
|
|
4096
|
+
const searchStartTime = import_perf_hooks.performance.now();
|
|
4097
|
+
const { store, provider, database } = await this.ensureInitialized();
|
|
4098
|
+
if (store.count() === 0) {
|
|
4099
|
+
this.logger.search("debug", "Find similar on empty index");
|
|
4100
|
+
return [];
|
|
4101
|
+
}
|
|
4102
|
+
const maxResults = limit ?? this.config.search.maxResults;
|
|
4103
|
+
const filterByBranch = options?.filterByBranch ?? true;
|
|
4104
|
+
this.logger.search("debug", "Starting find similar", {
|
|
4105
|
+
codeLength: code.length,
|
|
4106
|
+
maxResults,
|
|
4107
|
+
filterByBranch
|
|
4108
|
+
});
|
|
4109
|
+
const embeddingStartTime = import_perf_hooks.performance.now();
|
|
4110
|
+
const { embedding, tokensUsed } = await provider.embed(code);
|
|
4111
|
+
const embeddingMs = import_perf_hooks.performance.now() - embeddingStartTime;
|
|
4112
|
+
this.logger.recordEmbeddingApiCall(tokensUsed);
|
|
4113
|
+
const vectorStartTime = import_perf_hooks.performance.now();
|
|
4114
|
+
const semanticResults = store.search(embedding, maxResults * 2);
|
|
4115
|
+
const vectorMs = import_perf_hooks.performance.now() - vectorStartTime;
|
|
4116
|
+
let branchChunkIds = null;
|
|
4117
|
+
if (filterByBranch && this.currentBranch !== "default") {
|
|
4118
|
+
branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
|
|
4119
|
+
}
|
|
4120
|
+
const filtered = semanticResults.filter((r) => {
|
|
4121
|
+
if (r.score < this.config.search.minScore) return false;
|
|
4122
|
+
if (branchChunkIds && !branchChunkIds.has(r.id)) return false;
|
|
4123
|
+
if (options?.excludeFile) {
|
|
4124
|
+
if (r.metadata.filePath === options.excludeFile) return false;
|
|
4125
|
+
}
|
|
4126
|
+
if (options?.fileType) {
|
|
4127
|
+
const ext = r.metadata.filePath.split(".").pop()?.toLowerCase();
|
|
4128
|
+
if (ext !== options.fileType.toLowerCase().replace(/^\./, "")) return false;
|
|
4129
|
+
}
|
|
4130
|
+
if (options?.directory) {
|
|
4131
|
+
const normalizedDir = options.directory.replace(/^\/|\/$/g, "");
|
|
4132
|
+
if (!r.metadata.filePath.includes(`/${normalizedDir}/`) && !r.metadata.filePath.includes(`${normalizedDir}/`)) return false;
|
|
4133
|
+
}
|
|
4134
|
+
if (options?.chunkType) {
|
|
4135
|
+
if (r.metadata.chunkType !== options.chunkType) return false;
|
|
4136
|
+
}
|
|
4137
|
+
return true;
|
|
4138
|
+
}).slice(0, maxResults);
|
|
4139
|
+
const totalSearchMs = import_perf_hooks.performance.now() - searchStartTime;
|
|
4140
|
+
this.logger.recordSearch(totalSearchMs, {
|
|
4141
|
+
embeddingMs,
|
|
4142
|
+
vectorMs,
|
|
4143
|
+
keywordMs: 0,
|
|
4144
|
+
fusionMs: 0
|
|
4145
|
+
});
|
|
4146
|
+
this.logger.search("info", "Find similar complete", {
|
|
4147
|
+
codeLength: code.length,
|
|
4148
|
+
results: filtered.length,
|
|
4149
|
+
totalMs: Math.round(totalSearchMs * 100) / 100,
|
|
4150
|
+
embeddingMs: Math.round(embeddingMs * 100) / 100,
|
|
4151
|
+
vectorMs: Math.round(vectorMs * 100) / 100
|
|
4152
|
+
});
|
|
4153
|
+
return Promise.all(
|
|
4154
|
+
filtered.map(async (r) => {
|
|
4155
|
+
let content = "";
|
|
4156
|
+
if (this.config.search.includeContext) {
|
|
4157
|
+
try {
|
|
4158
|
+
const fileContent = await import_fs4.promises.readFile(
|
|
4159
|
+
r.metadata.filePath,
|
|
4160
|
+
"utf-8"
|
|
4161
|
+
);
|
|
4162
|
+
const lines = fileContent.split("\n");
|
|
4163
|
+
content = lines.slice(r.metadata.startLine - 1, r.metadata.endLine).join("\n");
|
|
4164
|
+
} catch {
|
|
4165
|
+
content = "[File not accessible]";
|
|
4166
|
+
}
|
|
4167
|
+
}
|
|
4168
|
+
return {
|
|
4169
|
+
filePath: r.metadata.filePath,
|
|
4170
|
+
startLine: r.metadata.startLine,
|
|
4171
|
+
endLine: r.metadata.endLine,
|
|
4172
|
+
content,
|
|
4173
|
+
score: r.score,
|
|
4174
|
+
chunkType: r.metadata.chunkType,
|
|
4175
|
+
name: r.metadata.name
|
|
4176
|
+
};
|
|
4177
|
+
})
|
|
4178
|
+
);
|
|
4179
|
+
}
|
|
3583
4180
|
};
|
|
3584
4181
|
|
|
3585
4182
|
// node_modules/chokidar/index.js
|
|
@@ -5521,7 +6118,7 @@ function getIndexer() {
|
|
|
5521
6118
|
return sharedIndexer;
|
|
5522
6119
|
}
|
|
5523
6120
|
var codebase_search = (0, import_plugin.tool)({
|
|
5524
|
-
description: "Search codebase by MEANING, not keywords. Use when you
|
|
6121
|
+
description: "Search codebase by MEANING, not keywords. Returns full code content. Use when you need to see actual implementation. For just finding WHERE code is (saves ~90% tokens), use codebase_peek instead. For known identifiers like 'validateToken', use grep - it's faster.",
|
|
5525
6122
|
args: {
|
|
5526
6123
|
query: z.string().describe("Natural language description of what code you're looking for. Describe behavior, not syntax."),
|
|
5527
6124
|
limit: z.number().optional().default(10).describe("Maximum number of results to return"),
|
|
@@ -5553,6 +6150,38 @@ ${r.content}
|
|
|
5553
6150
|
${formatted.join("\n\n")}`;
|
|
5554
6151
|
}
|
|
5555
6152
|
});
|
|
6153
|
+
var codebase_peek = (0, import_plugin.tool)({
|
|
6154
|
+
description: "Quick lookup of code locations by meaning. Returns only metadata (file, line, name, type) WITHOUT code content. Use this first to find WHERE code is, then use Read tool to examine specific files. Saves tokens by not returning full code blocks. Best for: discovery, navigation, finding multiple related locations.",
|
|
6155
|
+
args: {
|
|
6156
|
+
query: z.string().describe("Natural language description of what code you're looking for."),
|
|
6157
|
+
limit: z.number().optional().default(10).describe("Maximum number of results to return"),
|
|
6158
|
+
fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
|
|
6159
|
+
directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
|
|
6160
|
+
chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type")
|
|
6161
|
+
},
|
|
6162
|
+
async execute(args) {
|
|
6163
|
+
const indexer = getIndexer();
|
|
6164
|
+
const results = await indexer.search(args.query, args.limit ?? 10, {
|
|
6165
|
+
fileType: args.fileType,
|
|
6166
|
+
directory: args.directory,
|
|
6167
|
+
chunkType: args.chunkType,
|
|
6168
|
+
metadataOnly: true
|
|
6169
|
+
});
|
|
6170
|
+
if (results.length === 0) {
|
|
6171
|
+
return "No matching code found. Try a different query or run index_codebase first.";
|
|
6172
|
+
}
|
|
6173
|
+
const formatted = results.map((r, idx) => {
|
|
6174
|
+
const location = `${r.filePath}:${r.startLine}-${r.endLine}`;
|
|
6175
|
+
const name = r.name ? `"${r.name}"` : "(anonymous)";
|
|
6176
|
+
return `[${idx + 1}] ${r.chunkType} ${name} at ${location} (score: ${r.score.toFixed(2)})`;
|
|
6177
|
+
});
|
|
6178
|
+
return `Found ${results.length} locations for "${args.query}":
|
|
6179
|
+
|
|
6180
|
+
${formatted.join("\n")}
|
|
6181
|
+
|
|
6182
|
+
Use Read tool to examine specific files.`;
|
|
6183
|
+
}
|
|
6184
|
+
});
|
|
5556
6185
|
var index_codebase = (0, import_plugin.tool)({
|
|
5557
6186
|
description: "Index the codebase for semantic search. Creates vector embeddings of code chunks. Incremental - only re-indexes changed files (~50ms when nothing changed). Run before first codebase_search.",
|
|
5558
6187
|
args: {
|
|
@@ -5619,6 +6248,84 @@ var index_health_check = (0, import_plugin.tool)({
|
|
|
5619
6248
|
return lines.join("\n");
|
|
5620
6249
|
}
|
|
5621
6250
|
});
|
|
6251
|
+
var index_metrics = (0, import_plugin.tool)({
|
|
6252
|
+
description: "Get metrics and performance statistics for the codebase index. Shows indexing stats, search timings, cache hit rates, and API usage. Requires debug.enabled=true and debug.metrics=true in config.",
|
|
6253
|
+
args: {},
|
|
6254
|
+
async execute() {
|
|
6255
|
+
const indexer = getIndexer();
|
|
6256
|
+
const logger = indexer.getLogger();
|
|
6257
|
+
if (!logger.isEnabled()) {
|
|
6258
|
+
return 'Debug mode is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true,\n "metrics": true\n }\n}\n```';
|
|
6259
|
+
}
|
|
6260
|
+
if (!logger.isMetricsEnabled()) {
|
|
6261
|
+
return 'Metrics collection is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true,\n "metrics": true\n }\n}\n```';
|
|
6262
|
+
}
|
|
6263
|
+
return logger.formatMetrics();
|
|
6264
|
+
}
|
|
6265
|
+
});
|
|
6266
|
+
var index_logs = (0, import_plugin.tool)({
|
|
6267
|
+
description: "Get recent debug logs from the codebase indexer. Shows timestamped log entries with level and category. Requires debug.enabled=true in config.",
|
|
6268
|
+
args: {
|
|
6269
|
+
limit: z.number().optional().default(20).describe("Maximum number of log entries to return"),
|
|
6270
|
+
category: z.enum(["search", "embedding", "cache", "gc", "branch", "general"]).optional().describe("Filter by log category"),
|
|
6271
|
+
level: z.enum(["error", "warn", "info", "debug"]).optional().describe("Filter by minimum log level")
|
|
6272
|
+
},
|
|
6273
|
+
async execute(args) {
|
|
6274
|
+
const indexer = getIndexer();
|
|
6275
|
+
const logger = indexer.getLogger();
|
|
6276
|
+
if (!logger.isEnabled()) {
|
|
6277
|
+
return 'Debug mode is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true\n }\n}\n```';
|
|
6278
|
+
}
|
|
6279
|
+
let logs;
|
|
6280
|
+
if (args.category) {
|
|
6281
|
+
logs = logger.getLogsByCategory(args.category, args.limit);
|
|
6282
|
+
} else if (args.level) {
|
|
6283
|
+
logs = logger.getLogsByLevel(args.level, args.limit);
|
|
6284
|
+
} else {
|
|
6285
|
+
logs = logger.getLogs(args.limit);
|
|
6286
|
+
}
|
|
6287
|
+
if (logs.length === 0) {
|
|
6288
|
+
return "No logs recorded yet. Logs are captured during indexing and search operations.";
|
|
6289
|
+
}
|
|
6290
|
+
return logs.map((l) => {
|
|
6291
|
+
const dataStr = l.data ? ` ${JSON.stringify(l.data)}` : "";
|
|
6292
|
+
return `[${l.timestamp}] [${l.level.toUpperCase()}] [${l.category}] ${l.message}${dataStr}`;
|
|
6293
|
+
}).join("\n");
|
|
6294
|
+
}
|
|
6295
|
+
});
|
|
6296
|
+
var find_similar = (0, import_plugin.tool)({
|
|
6297
|
+
description: "Find code similar to a given snippet. Use for duplicate detection, pattern discovery, or refactoring prep. Paste code and find semantically similar implementations elsewhere in the codebase.",
|
|
6298
|
+
args: {
|
|
6299
|
+
code: z.string().describe("The code snippet to find similar code for"),
|
|
6300
|
+
limit: z.number().optional().default(10).describe("Maximum number of results to return"),
|
|
6301
|
+
fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
|
|
6302
|
+
directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
|
|
6303
|
+
chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type"),
|
|
6304
|
+
excludeFile: z.string().optional().describe("Exclude results from this file path (useful when searching for duplicates of code from a specific file)")
|
|
6305
|
+
},
|
|
6306
|
+
async execute(args) {
|
|
6307
|
+
const indexer = getIndexer();
|
|
6308
|
+
const results = await indexer.findSimilar(args.code, args.limit ?? 10, {
|
|
6309
|
+
fileType: args.fileType,
|
|
6310
|
+
directory: args.directory,
|
|
6311
|
+
chunkType: args.chunkType,
|
|
6312
|
+
excludeFile: args.excludeFile
|
|
6313
|
+
});
|
|
6314
|
+
if (results.length === 0) {
|
|
6315
|
+
return "No similar code found. Try a different snippet or run index_codebase first.";
|
|
6316
|
+
}
|
|
6317
|
+
const formatted = results.map((r, idx) => {
|
|
6318
|
+
const header = r.name ? `[${idx + 1}] ${r.chunkType} "${r.name}" in ${r.filePath}:${r.startLine}-${r.endLine}` : `[${idx + 1}] ${r.chunkType} in ${r.filePath}:${r.startLine}-${r.endLine}`;
|
|
6319
|
+
return `${header} (similarity: ${(r.score * 100).toFixed(1)}%)
|
|
6320
|
+
\`\`\`
|
|
6321
|
+
${r.content}
|
|
6322
|
+
\`\`\``;
|
|
6323
|
+
});
|
|
6324
|
+
return `Found ${results.length} similar code blocks:
|
|
6325
|
+
|
|
6326
|
+
${formatted.join("\n\n")}`;
|
|
6327
|
+
}
|
|
6328
|
+
});
|
|
5622
6329
|
function formatIndexStats(stats, verbose = false) {
|
|
5623
6330
|
const lines = [];
|
|
5624
6331
|
if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
|
|
@@ -5802,9 +6509,13 @@ var plugin = async ({ directory }) => {
|
|
|
5802
6509
|
return {
|
|
5803
6510
|
tool: {
|
|
5804
6511
|
codebase_search,
|
|
6512
|
+
codebase_peek,
|
|
5805
6513
|
index_codebase,
|
|
5806
6514
|
index_status,
|
|
5807
|
-
index_health_check
|
|
6515
|
+
index_health_check,
|
|
6516
|
+
index_metrics,
|
|
6517
|
+
index_logs,
|
|
6518
|
+
find_similar
|
|
5808
6519
|
},
|
|
5809
6520
|
async config(cfg) {
|
|
5810
6521
|
cfg.command = cfg.command ?? {};
|