opencode-codebase-index 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -1
- package/dist/index.cjs +728 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +728 -17
- package/dist/index.js.map +1 -1
- package/native/codebase-index-native.darwin-arm64.node +0 -0
- package/native/codebase-index-native.darwin-x64.node +0 -0
- package/native/codebase-index-native.linux-arm64-gnu.node +0 -0
- package/native/codebase-index-native.linux-x64-gnu.node +0 -0
- package/native/codebase-index-native.win32-x64-msvc.node +0 -0
- package/package.json +3 -1
- package/skill/SKILL.md +39 -164
package/dist/index.js
CHANGED
|
@@ -708,8 +708,21 @@ function getDefaultSearchConfig() {
|
|
|
708
708
|
contextLines: 0
|
|
709
709
|
};
|
|
710
710
|
}
|
|
711
|
+
function getDefaultDebugConfig() {
|
|
712
|
+
return {
|
|
713
|
+
enabled: false,
|
|
714
|
+
logLevel: "info",
|
|
715
|
+
logSearch: true,
|
|
716
|
+
logEmbedding: true,
|
|
717
|
+
logCache: true,
|
|
718
|
+
logGc: true,
|
|
719
|
+
logBranch: true,
|
|
720
|
+
metrics: true
|
|
721
|
+
};
|
|
722
|
+
}
|
|
711
723
|
var VALID_PROVIDERS = ["auto", "github-copilot", "openai", "google", "ollama"];
|
|
712
724
|
var VALID_SCOPES = ["project", "global"];
|
|
725
|
+
var VALID_LOG_LEVELS = ["error", "warn", "info", "debug"];
|
|
713
726
|
function isValidProvider(value) {
|
|
714
727
|
return typeof value === "string" && VALID_PROVIDERS.includes(value);
|
|
715
728
|
}
|
|
@@ -719,10 +732,14 @@ function isValidScope(value) {
|
|
|
719
732
|
function isStringArray(value) {
|
|
720
733
|
return Array.isArray(value) && value.every((item) => typeof item === "string");
|
|
721
734
|
}
|
|
735
|
+
function isValidLogLevel(value) {
|
|
736
|
+
return typeof value === "string" && VALID_LOG_LEVELS.includes(value);
|
|
737
|
+
}
|
|
722
738
|
function parseConfig(raw) {
|
|
723
739
|
const input = raw && typeof raw === "object" ? raw : {};
|
|
724
740
|
const defaultIndexing = getDefaultIndexingConfig();
|
|
725
741
|
const defaultSearch = getDefaultSearchConfig();
|
|
742
|
+
const defaultDebug = getDefaultDebugConfig();
|
|
726
743
|
const rawIndexing = input.indexing && typeof input.indexing === "object" ? input.indexing : {};
|
|
727
744
|
const indexing = {
|
|
728
745
|
autoIndex: typeof rawIndexing.autoIndex === "boolean" ? rawIndexing.autoIndex : defaultIndexing.autoIndex,
|
|
@@ -744,6 +761,17 @@ function parseConfig(raw) {
|
|
|
744
761
|
hybridWeight: typeof rawSearch.hybridWeight === "number" ? Math.min(1, Math.max(0, rawSearch.hybridWeight)) : defaultSearch.hybridWeight,
|
|
745
762
|
contextLines: typeof rawSearch.contextLines === "number" ? Math.min(50, Math.max(0, rawSearch.contextLines)) : defaultSearch.contextLines
|
|
746
763
|
};
|
|
764
|
+
const rawDebug = input.debug && typeof input.debug === "object" ? input.debug : {};
|
|
765
|
+
const debug = {
|
|
766
|
+
enabled: typeof rawDebug.enabled === "boolean" ? rawDebug.enabled : defaultDebug.enabled,
|
|
767
|
+
logLevel: isValidLogLevel(rawDebug.logLevel) ? rawDebug.logLevel : defaultDebug.logLevel,
|
|
768
|
+
logSearch: typeof rawDebug.logSearch === "boolean" ? rawDebug.logSearch : defaultDebug.logSearch,
|
|
769
|
+
logEmbedding: typeof rawDebug.logEmbedding === "boolean" ? rawDebug.logEmbedding : defaultDebug.logEmbedding,
|
|
770
|
+
logCache: typeof rawDebug.logCache === "boolean" ? rawDebug.logCache : defaultDebug.logCache,
|
|
771
|
+
logGc: typeof rawDebug.logGc === "boolean" ? rawDebug.logGc : defaultDebug.logGc,
|
|
772
|
+
logBranch: typeof rawDebug.logBranch === "boolean" ? rawDebug.logBranch : defaultDebug.logBranch,
|
|
773
|
+
metrics: typeof rawDebug.metrics === "boolean" ? rawDebug.metrics : defaultDebug.metrics
|
|
774
|
+
};
|
|
747
775
|
return {
|
|
748
776
|
embeddingProvider: isValidProvider(input.embeddingProvider) ? input.embeddingProvider : "auto",
|
|
749
777
|
embeddingModel: typeof input.embeddingModel === "string" ? input.embeddingModel : "auto",
|
|
@@ -751,7 +779,8 @@ function parseConfig(raw) {
|
|
|
751
779
|
include: isStringArray(input.include) ? input.include : DEFAULT_INCLUDE,
|
|
752
780
|
exclude: isStringArray(input.exclude) ? input.exclude : DEFAULT_EXCLUDE,
|
|
753
781
|
indexing,
|
|
754
|
-
search
|
|
782
|
+
search,
|
|
783
|
+
debug
|
|
755
784
|
};
|
|
756
785
|
}
|
|
757
786
|
var EMBEDDING_MODELS = {
|
|
@@ -816,6 +845,7 @@ function getDefaultModelForProvider(provider) {
|
|
|
816
845
|
// src/indexer/index.ts
|
|
817
846
|
import { existsSync as existsSync4, readFileSync as readFileSync4, writeFileSync, promises as fsPromises2 } from "fs";
|
|
818
847
|
import * as path5 from "path";
|
|
848
|
+
import { performance as performance2 } from "perf_hooks";
|
|
819
849
|
|
|
820
850
|
// node_modules/eventemitter3/index.mjs
|
|
821
851
|
var import_index = __toESM(require_eventemitter3(), 1);
|
|
@@ -2325,6 +2355,298 @@ function padRight(str, length) {
|
|
|
2325
2355
|
return str.padEnd(length);
|
|
2326
2356
|
}
|
|
2327
2357
|
|
|
2358
|
+
// src/utils/logger.ts
|
|
2359
|
+
var LOG_LEVEL_PRIORITY = {
|
|
2360
|
+
error: 0,
|
|
2361
|
+
warn: 1,
|
|
2362
|
+
info: 2,
|
|
2363
|
+
debug: 3
|
|
2364
|
+
};
|
|
2365
|
+
function createEmptyMetrics() {
|
|
2366
|
+
return {
|
|
2367
|
+
filesScanned: 0,
|
|
2368
|
+
filesParsed: 0,
|
|
2369
|
+
parseMs: 0,
|
|
2370
|
+
chunksProcessed: 0,
|
|
2371
|
+
chunksEmbedded: 0,
|
|
2372
|
+
chunksFromCache: 0,
|
|
2373
|
+
chunksRemoved: 0,
|
|
2374
|
+
embeddingApiCalls: 0,
|
|
2375
|
+
embeddingTokensUsed: 0,
|
|
2376
|
+
embeddingErrors: 0,
|
|
2377
|
+
searchCount: 0,
|
|
2378
|
+
searchTotalMs: 0,
|
|
2379
|
+
searchAvgMs: 0,
|
|
2380
|
+
searchLastMs: 0,
|
|
2381
|
+
embeddingCallMs: 0,
|
|
2382
|
+
vectorSearchMs: 0,
|
|
2383
|
+
keywordSearchMs: 0,
|
|
2384
|
+
fusionMs: 0,
|
|
2385
|
+
cacheHits: 0,
|
|
2386
|
+
cacheMisses: 0,
|
|
2387
|
+
queryCacheHits: 0,
|
|
2388
|
+
queryCacheSimilarHits: 0,
|
|
2389
|
+
queryCacheMisses: 0,
|
|
2390
|
+
gcRuns: 0,
|
|
2391
|
+
gcOrphansRemoved: 0,
|
|
2392
|
+
gcChunksRemoved: 0,
|
|
2393
|
+
gcEmbeddingsRemoved: 0
|
|
2394
|
+
};
|
|
2395
|
+
}
|
|
2396
|
+
var Logger = class {
|
|
2397
|
+
config;
|
|
2398
|
+
metrics;
|
|
2399
|
+
logs = [];
|
|
2400
|
+
maxLogs = 1e3;
|
|
2401
|
+
constructor(config) {
|
|
2402
|
+
this.config = config;
|
|
2403
|
+
this.metrics = createEmptyMetrics();
|
|
2404
|
+
}
|
|
2405
|
+
shouldLog(level) {
|
|
2406
|
+
if (!this.config.enabled) return false;
|
|
2407
|
+
return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[this.config.logLevel];
|
|
2408
|
+
}
|
|
2409
|
+
log(level, category, message, data) {
|
|
2410
|
+
if (!this.shouldLog(level)) return;
|
|
2411
|
+
const entry = {
|
|
2412
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2413
|
+
level,
|
|
2414
|
+
category,
|
|
2415
|
+
message,
|
|
2416
|
+
data
|
|
2417
|
+
};
|
|
2418
|
+
this.logs.push(entry);
|
|
2419
|
+
if (this.logs.length > this.maxLogs) {
|
|
2420
|
+
this.logs.shift();
|
|
2421
|
+
}
|
|
2422
|
+
}
|
|
2423
|
+
search(level, message, data) {
|
|
2424
|
+
if (this.config.logSearch) {
|
|
2425
|
+
this.log(level, "search", message, data);
|
|
2426
|
+
}
|
|
2427
|
+
}
|
|
2428
|
+
embedding(level, message, data) {
|
|
2429
|
+
if (this.config.logEmbedding) {
|
|
2430
|
+
this.log(level, "embedding", message, data);
|
|
2431
|
+
}
|
|
2432
|
+
}
|
|
2433
|
+
cache(level, message, data) {
|
|
2434
|
+
if (this.config.logCache) {
|
|
2435
|
+
this.log(level, "cache", message, data);
|
|
2436
|
+
}
|
|
2437
|
+
}
|
|
2438
|
+
gc(level, message, data) {
|
|
2439
|
+
if (this.config.logGc) {
|
|
2440
|
+
this.log(level, "gc", message, data);
|
|
2441
|
+
}
|
|
2442
|
+
}
|
|
2443
|
+
branch(level, message, data) {
|
|
2444
|
+
if (this.config.logBranch) {
|
|
2445
|
+
this.log(level, "branch", message, data);
|
|
2446
|
+
}
|
|
2447
|
+
}
|
|
2448
|
+
info(message, data) {
|
|
2449
|
+
this.log("info", "general", message, data);
|
|
2450
|
+
}
|
|
2451
|
+
warn(message, data) {
|
|
2452
|
+
this.log("warn", "general", message, data);
|
|
2453
|
+
}
|
|
2454
|
+
error(message, data) {
|
|
2455
|
+
this.log("error", "general", message, data);
|
|
2456
|
+
}
|
|
2457
|
+
debug(message, data) {
|
|
2458
|
+
this.log("debug", "general", message, data);
|
|
2459
|
+
}
|
|
2460
|
+
recordIndexingStart() {
|
|
2461
|
+
if (!this.config.metrics) return;
|
|
2462
|
+
this.metrics.indexingStartTime = Date.now();
|
|
2463
|
+
}
|
|
2464
|
+
recordIndexingEnd() {
|
|
2465
|
+
if (!this.config.metrics) return;
|
|
2466
|
+
this.metrics.indexingEndTime = Date.now();
|
|
2467
|
+
}
|
|
2468
|
+
recordFilesScanned(count) {
|
|
2469
|
+
if (!this.config.metrics) return;
|
|
2470
|
+
this.metrics.filesScanned = count;
|
|
2471
|
+
}
|
|
2472
|
+
recordFilesParsed(count) {
|
|
2473
|
+
if (!this.config.metrics) return;
|
|
2474
|
+
this.metrics.filesParsed = count;
|
|
2475
|
+
}
|
|
2476
|
+
recordParseDuration(durationMs) {
|
|
2477
|
+
if (!this.config.metrics) return;
|
|
2478
|
+
this.metrics.parseMs = durationMs;
|
|
2479
|
+
}
|
|
2480
|
+
recordChunksProcessed(count) {
|
|
2481
|
+
if (!this.config.metrics) return;
|
|
2482
|
+
this.metrics.chunksProcessed += count;
|
|
2483
|
+
}
|
|
2484
|
+
recordChunksEmbedded(count) {
|
|
2485
|
+
if (!this.config.metrics) return;
|
|
2486
|
+
this.metrics.chunksEmbedded += count;
|
|
2487
|
+
}
|
|
2488
|
+
recordChunksFromCache(count) {
|
|
2489
|
+
if (!this.config.metrics) return;
|
|
2490
|
+
this.metrics.chunksFromCache += count;
|
|
2491
|
+
}
|
|
2492
|
+
recordChunksRemoved(count) {
|
|
2493
|
+
if (!this.config.metrics) return;
|
|
2494
|
+
this.metrics.chunksRemoved += count;
|
|
2495
|
+
}
|
|
2496
|
+
recordEmbeddingApiCall(tokens) {
|
|
2497
|
+
if (!this.config.metrics) return;
|
|
2498
|
+
this.metrics.embeddingApiCalls++;
|
|
2499
|
+
this.metrics.embeddingTokensUsed += tokens;
|
|
2500
|
+
}
|
|
2501
|
+
recordEmbeddingError() {
|
|
2502
|
+
if (!this.config.metrics) return;
|
|
2503
|
+
this.metrics.embeddingErrors++;
|
|
2504
|
+
}
|
|
2505
|
+
recordSearch(durationMs, breakdown) {
|
|
2506
|
+
if (!this.config.metrics) return;
|
|
2507
|
+
this.metrics.searchCount++;
|
|
2508
|
+
this.metrics.searchTotalMs += durationMs;
|
|
2509
|
+
this.metrics.searchLastMs = durationMs;
|
|
2510
|
+
this.metrics.searchAvgMs = this.metrics.searchTotalMs / this.metrics.searchCount;
|
|
2511
|
+
if (breakdown) {
|
|
2512
|
+
this.metrics.embeddingCallMs = breakdown.embeddingMs;
|
|
2513
|
+
this.metrics.vectorSearchMs = breakdown.vectorMs;
|
|
2514
|
+
this.metrics.keywordSearchMs = breakdown.keywordMs;
|
|
2515
|
+
this.metrics.fusionMs = breakdown.fusionMs;
|
|
2516
|
+
}
|
|
2517
|
+
}
|
|
2518
|
+
recordCacheHit() {
|
|
2519
|
+
if (!this.config.metrics) return;
|
|
2520
|
+
this.metrics.cacheHits++;
|
|
2521
|
+
}
|
|
2522
|
+
recordCacheMiss() {
|
|
2523
|
+
if (!this.config.metrics) return;
|
|
2524
|
+
this.metrics.cacheMisses++;
|
|
2525
|
+
}
|
|
2526
|
+
recordQueryCacheHit() {
|
|
2527
|
+
if (!this.config.metrics) return;
|
|
2528
|
+
this.metrics.queryCacheHits++;
|
|
2529
|
+
}
|
|
2530
|
+
recordQueryCacheSimilarHit() {
|
|
2531
|
+
if (!this.config.metrics) return;
|
|
2532
|
+
this.metrics.queryCacheSimilarHits++;
|
|
2533
|
+
}
|
|
2534
|
+
recordQueryCacheMiss() {
|
|
2535
|
+
if (!this.config.metrics) return;
|
|
2536
|
+
this.metrics.queryCacheMisses++;
|
|
2537
|
+
}
|
|
2538
|
+
recordGc(orphans, chunks, embeddings) {
|
|
2539
|
+
if (!this.config.metrics) return;
|
|
2540
|
+
this.metrics.gcRuns++;
|
|
2541
|
+
this.metrics.gcOrphansRemoved += orphans;
|
|
2542
|
+
this.metrics.gcChunksRemoved += chunks;
|
|
2543
|
+
this.metrics.gcEmbeddingsRemoved += embeddings;
|
|
2544
|
+
}
|
|
2545
|
+
getMetrics() {
|
|
2546
|
+
return { ...this.metrics };
|
|
2547
|
+
}
|
|
2548
|
+
getLogs(limit) {
|
|
2549
|
+
const logs = [...this.logs];
|
|
2550
|
+
if (limit) {
|
|
2551
|
+
return logs.slice(-limit);
|
|
2552
|
+
}
|
|
2553
|
+
return logs;
|
|
2554
|
+
}
|
|
2555
|
+
getLogsByCategory(category, limit) {
|
|
2556
|
+
const filtered = this.logs.filter((l) => l.category === category);
|
|
2557
|
+
if (limit) {
|
|
2558
|
+
return filtered.slice(-limit);
|
|
2559
|
+
}
|
|
2560
|
+
return filtered;
|
|
2561
|
+
}
|
|
2562
|
+
getLogsByLevel(level, limit) {
|
|
2563
|
+
const filtered = this.logs.filter((l) => l.level === level);
|
|
2564
|
+
if (limit) {
|
|
2565
|
+
return filtered.slice(-limit);
|
|
2566
|
+
}
|
|
2567
|
+
return filtered;
|
|
2568
|
+
}
|
|
2569
|
+
resetMetrics() {
|
|
2570
|
+
this.metrics = createEmptyMetrics();
|
|
2571
|
+
}
|
|
2572
|
+
clearLogs() {
|
|
2573
|
+
this.logs = [];
|
|
2574
|
+
}
|
|
2575
|
+
formatMetrics() {
|
|
2576
|
+
const m = this.metrics;
|
|
2577
|
+
const lines = [];
|
|
2578
|
+
lines.push("=== Metrics ===");
|
|
2579
|
+
if (m.indexingStartTime && m.indexingEndTime) {
|
|
2580
|
+
const duration = m.indexingEndTime - m.indexingStartTime;
|
|
2581
|
+
lines.push(`Indexing duration: ${(duration / 1e3).toFixed(2)}s`);
|
|
2582
|
+
}
|
|
2583
|
+
lines.push("");
|
|
2584
|
+
lines.push("Indexing:");
|
|
2585
|
+
lines.push(` Files scanned: ${m.filesScanned}`);
|
|
2586
|
+
lines.push(` Files parsed: ${m.filesParsed}`);
|
|
2587
|
+
lines.push(` Chunks processed: ${m.chunksProcessed}`);
|
|
2588
|
+
lines.push(` Chunks embedded: ${m.chunksEmbedded}`);
|
|
2589
|
+
lines.push(` Chunks from cache: ${m.chunksFromCache}`);
|
|
2590
|
+
lines.push(` Chunks removed: ${m.chunksRemoved}`);
|
|
2591
|
+
lines.push("");
|
|
2592
|
+
lines.push("Embedding API:");
|
|
2593
|
+
lines.push(` API calls: ${m.embeddingApiCalls}`);
|
|
2594
|
+
lines.push(` Tokens used: ${m.embeddingTokensUsed.toLocaleString()}`);
|
|
2595
|
+
lines.push(` Errors: ${m.embeddingErrors}`);
|
|
2596
|
+
if (m.searchCount > 0) {
|
|
2597
|
+
lines.push("");
|
|
2598
|
+
lines.push("Search:");
|
|
2599
|
+
lines.push(` Total searches: ${m.searchCount}`);
|
|
2600
|
+
lines.push(` Average time: ${m.searchAvgMs.toFixed(2)}ms`);
|
|
2601
|
+
lines.push(` Last search: ${m.searchLastMs.toFixed(2)}ms`);
|
|
2602
|
+
if (m.embeddingCallMs > 0) {
|
|
2603
|
+
lines.push(` - Embedding: ${m.embeddingCallMs.toFixed(2)}ms`);
|
|
2604
|
+
lines.push(` - Vector search: ${m.vectorSearchMs.toFixed(2)}ms`);
|
|
2605
|
+
lines.push(` - Keyword search: ${m.keywordSearchMs.toFixed(2)}ms`);
|
|
2606
|
+
lines.push(` - Fusion: ${m.fusionMs.toFixed(2)}ms`);
|
|
2607
|
+
}
|
|
2608
|
+
}
|
|
2609
|
+
const totalCacheOps = m.cacheHits + m.cacheMisses;
|
|
2610
|
+
if (totalCacheOps > 0) {
|
|
2611
|
+
lines.push("");
|
|
2612
|
+
lines.push("Cache:");
|
|
2613
|
+
lines.push(` Hits: ${m.cacheHits}`);
|
|
2614
|
+
lines.push(` Misses: ${m.cacheMisses}`);
|
|
2615
|
+
lines.push(` Hit rate: ${(m.cacheHits / totalCacheOps * 100).toFixed(1)}%`);
|
|
2616
|
+
}
|
|
2617
|
+
if (m.gcRuns > 0) {
|
|
2618
|
+
lines.push("");
|
|
2619
|
+
lines.push("Garbage Collection:");
|
|
2620
|
+
lines.push(` GC runs: ${m.gcRuns}`);
|
|
2621
|
+
lines.push(` Orphans removed: ${m.gcOrphansRemoved}`);
|
|
2622
|
+
lines.push(` Chunks removed: ${m.gcChunksRemoved}`);
|
|
2623
|
+
lines.push(` Embeddings removed: ${m.gcEmbeddingsRemoved}`);
|
|
2624
|
+
}
|
|
2625
|
+
return lines.join("\n");
|
|
2626
|
+
}
|
|
2627
|
+
formatRecentLogs(limit = 20) {
|
|
2628
|
+
const logs = this.getLogs(limit);
|
|
2629
|
+
if (logs.length === 0) {
|
|
2630
|
+
return "No logs recorded.";
|
|
2631
|
+
}
|
|
2632
|
+
return logs.map((l) => {
|
|
2633
|
+
const dataStr = l.data ? ` ${JSON.stringify(l.data)}` : "";
|
|
2634
|
+
return `[${l.timestamp}] [${l.level.toUpperCase()}] [${l.category}] ${l.message}${dataStr}`;
|
|
2635
|
+
}).join("\n");
|
|
2636
|
+
}
|
|
2637
|
+
isEnabled() {
|
|
2638
|
+
return this.config.enabled;
|
|
2639
|
+
}
|
|
2640
|
+
isMetricsEnabled() {
|
|
2641
|
+
return this.config.enabled && this.config.metrics;
|
|
2642
|
+
}
|
|
2643
|
+
};
|
|
2644
|
+
var globalLogger = null;
|
|
2645
|
+
function initializeLogger(config) {
|
|
2646
|
+
globalLogger = new Logger(config);
|
|
2647
|
+
return globalLogger;
|
|
2648
|
+
}
|
|
2649
|
+
|
|
2328
2650
|
// src/native/index.ts
|
|
2329
2651
|
import * as path3 from "path";
|
|
2330
2652
|
import * as os2 from "os";
|
|
@@ -2453,6 +2775,21 @@ var VectorStore = class {
|
|
|
2453
2775
|
metadata: JSON.parse(r.metadata)
|
|
2454
2776
|
}));
|
|
2455
2777
|
}
|
|
2778
|
+
getMetadata(id) {
|
|
2779
|
+
const result = this.inner.getMetadata(id);
|
|
2780
|
+
if (result === null || result === void 0) {
|
|
2781
|
+
return void 0;
|
|
2782
|
+
}
|
|
2783
|
+
return JSON.parse(result);
|
|
2784
|
+
}
|
|
2785
|
+
getMetadataBatch(ids) {
|
|
2786
|
+
const results = this.inner.getMetadataBatch(ids);
|
|
2787
|
+
const map = /* @__PURE__ */ new Map();
|
|
2788
|
+
for (const { key, metadata } of results) {
|
|
2789
|
+
map.set(key, JSON.parse(metadata));
|
|
2790
|
+
}
|
|
2791
|
+
return map;
|
|
2792
|
+
}
|
|
2456
2793
|
};
|
|
2457
2794
|
var CHARS_PER_TOKEN = 4;
|
|
2458
2795
|
var MAX_BATCH_TOKENS = 7500;
|
|
@@ -2851,12 +3188,18 @@ var Indexer = class {
|
|
|
2851
3188
|
failedBatchesPath = "";
|
|
2852
3189
|
currentBranch = "default";
|
|
2853
3190
|
baseBranch = "main";
|
|
3191
|
+
logger;
|
|
3192
|
+
queryEmbeddingCache = /* @__PURE__ */ new Map();
|
|
3193
|
+
maxQueryCacheSize = 100;
|
|
3194
|
+
queryCacheTtlMs = 5 * 60 * 1e3;
|
|
3195
|
+
querySimilarityThreshold = 0.85;
|
|
2854
3196
|
constructor(projectRoot, config) {
|
|
2855
3197
|
this.projectRoot = projectRoot;
|
|
2856
3198
|
this.config = config;
|
|
2857
3199
|
this.indexPath = this.getIndexPath();
|
|
2858
3200
|
this.fileHashCachePath = path5.join(this.indexPath, "file-hashes.json");
|
|
2859
3201
|
this.failedBatchesPath = path5.join(this.indexPath, "failed-batches.json");
|
|
3202
|
+
this.logger = initializeLogger(config.debug);
|
|
2860
3203
|
}
|
|
2861
3204
|
getIndexPath() {
|
|
2862
3205
|
if (this.config.scope === "global") {
|
|
@@ -2935,6 +3278,11 @@ var Indexer = class {
|
|
|
2935
3278
|
"No embedding provider available. Configure GitHub, OpenAI, Google, or Ollama."
|
|
2936
3279
|
);
|
|
2937
3280
|
}
|
|
3281
|
+
this.logger.info("Initializing indexer", {
|
|
3282
|
+
provider: this.detectedProvider.provider,
|
|
3283
|
+
model: this.detectedProvider.modelInfo.model,
|
|
3284
|
+
scope: this.config.scope
|
|
3285
|
+
});
|
|
2938
3286
|
this.provider = createEmbeddingProvider(
|
|
2939
3287
|
this.detectedProvider.credentials,
|
|
2940
3288
|
this.detectedProvider.modelInfo
|
|
@@ -2966,9 +3314,14 @@ var Indexer = class {
|
|
|
2966
3314
|
if (isGitRepo(this.projectRoot)) {
|
|
2967
3315
|
this.currentBranch = getBranchOrDefault(this.projectRoot);
|
|
2968
3316
|
this.baseBranch = getBaseBranch(this.projectRoot);
|
|
3317
|
+
this.logger.branch("info", "Detected git repository", {
|
|
3318
|
+
currentBranch: this.currentBranch,
|
|
3319
|
+
baseBranch: this.baseBranch
|
|
3320
|
+
});
|
|
2969
3321
|
} else {
|
|
2970
3322
|
this.currentBranch = "default";
|
|
2971
3323
|
this.baseBranch = "default";
|
|
3324
|
+
this.logger.branch("debug", "Not a git repository, using default branch");
|
|
2972
3325
|
}
|
|
2973
3326
|
if (this.config.indexing.autoGc) {
|
|
2974
3327
|
await this.maybeRunAutoGc();
|
|
@@ -3052,6 +3405,8 @@ var Indexer = class {
|
|
|
3052
3405
|
}
|
|
3053
3406
|
async index(onProgress) {
|
|
3054
3407
|
const { store, provider, invertedIndex, database, detectedProvider } = await this.ensureInitialized();
|
|
3408
|
+
this.logger.recordIndexingStart();
|
|
3409
|
+
this.logger.info("Starting indexing", { projectRoot: this.projectRoot });
|
|
3055
3410
|
const startTime = Date.now();
|
|
3056
3411
|
const stats = {
|
|
3057
3412
|
totalFiles: 0,
|
|
@@ -3081,6 +3436,11 @@ var Indexer = class {
|
|
|
3081
3436
|
);
|
|
3082
3437
|
stats.totalFiles = files.length;
|
|
3083
3438
|
stats.skippedFiles = skipped;
|
|
3439
|
+
this.logger.recordFilesScanned(files.length);
|
|
3440
|
+
this.logger.cache("debug", "Scanning files for changes", {
|
|
3441
|
+
totalFiles: files.length,
|
|
3442
|
+
skippedFiles: skipped.length
|
|
3443
|
+
});
|
|
3084
3444
|
const changedFiles = [];
|
|
3085
3445
|
const unchangedFilePaths = /* @__PURE__ */ new Set();
|
|
3086
3446
|
const currentFileHashes = /* @__PURE__ */ new Map();
|
|
@@ -3089,11 +3449,17 @@ var Indexer = class {
|
|
|
3089
3449
|
currentFileHashes.set(f.path, currentHash);
|
|
3090
3450
|
if (this.fileHashCache.get(f.path) === currentHash) {
|
|
3091
3451
|
unchangedFilePaths.add(f.path);
|
|
3452
|
+
this.logger.recordCacheHit();
|
|
3092
3453
|
} else {
|
|
3093
3454
|
const content = await fsPromises2.readFile(f.path, "utf-8");
|
|
3094
3455
|
changedFiles.push({ path: f.path, content, hash: currentHash });
|
|
3456
|
+
this.logger.recordCacheMiss();
|
|
3095
3457
|
}
|
|
3096
3458
|
}
|
|
3459
|
+
this.logger.cache("info", "File hash cache results", {
|
|
3460
|
+
unchanged: unchangedFilePaths.size,
|
|
3461
|
+
changed: changedFiles.length
|
|
3462
|
+
});
|
|
3097
3463
|
onProgress?.({
|
|
3098
3464
|
phase: "parsing",
|
|
3099
3465
|
filesProcessed: 0,
|
|
@@ -3101,7 +3467,12 @@ var Indexer = class {
|
|
|
3101
3467
|
chunksProcessed: 0,
|
|
3102
3468
|
totalChunks: 0
|
|
3103
3469
|
});
|
|
3470
|
+
const parseStartTime = performance2.now();
|
|
3104
3471
|
const parsedFiles = parseFiles(changedFiles);
|
|
3472
|
+
const parseMs = performance2.now() - parseStartTime;
|
|
3473
|
+
this.logger.recordFilesParsed(parsedFiles.length);
|
|
3474
|
+
this.logger.recordParseDuration(parseMs);
|
|
3475
|
+
this.logger.debug("Parsed changed files", { parsedCount: parsedFiles.length, parseMs: parseMs.toFixed(2) });
|
|
3105
3476
|
const existingChunks = /* @__PURE__ */ new Map();
|
|
3106
3477
|
const existingChunksByFile = /* @__PURE__ */ new Map();
|
|
3107
3478
|
for (const { key, metadata } of store.getAllMetadata()) {
|
|
@@ -3183,6 +3554,13 @@ var Indexer = class {
|
|
|
3183
3554
|
stats.totalChunks = pendingChunks.length;
|
|
3184
3555
|
stats.existingChunks = currentChunkIds.size - pendingChunks.length;
|
|
3185
3556
|
stats.removedChunks = removedCount;
|
|
3557
|
+
this.logger.recordChunksProcessed(currentChunkIds.size);
|
|
3558
|
+
this.logger.recordChunksRemoved(removedCount);
|
|
3559
|
+
this.logger.info("Chunk analysis complete", {
|
|
3560
|
+
pending: pendingChunks.length,
|
|
3561
|
+
existing: stats.existingChunks,
|
|
3562
|
+
removed: removedCount
|
|
3563
|
+
});
|
|
3186
3564
|
if (pendingChunks.length === 0 && removedCount === 0) {
|
|
3187
3565
|
database.clearBranch(this.currentBranch);
|
|
3188
3566
|
database.addChunksToBranchBatch(this.currentBranch, Array.from(currentChunkIds));
|
|
@@ -3226,6 +3604,11 @@ var Indexer = class {
|
|
|
3226
3604
|
const missingHashes = new Set(database.getMissingEmbeddings(allContentHashes));
|
|
3227
3605
|
const chunksNeedingEmbedding = pendingChunks.filter((c) => missingHashes.has(c.contentHash));
|
|
3228
3606
|
const chunksWithExistingEmbedding = pendingChunks.filter((c) => !missingHashes.has(c.contentHash));
|
|
3607
|
+
this.logger.cache("info", "Embedding cache lookup", {
|
|
3608
|
+
needsEmbedding: chunksNeedingEmbedding.length,
|
|
3609
|
+
fromCache: chunksWithExistingEmbedding.length
|
|
3610
|
+
});
|
|
3611
|
+
this.logger.recordChunksFromCache(chunksWithExistingEmbedding.length);
|
|
3229
3612
|
for (const chunk of chunksWithExistingEmbedding) {
|
|
3230
3613
|
const embeddingBuffer = database.getEmbedding(chunk.contentHash);
|
|
3231
3614
|
if (embeddingBuffer) {
|
|
@@ -3264,13 +3647,16 @@ var Indexer = class {
|
|
|
3264
3647
|
const message = getErrorMessage(error);
|
|
3265
3648
|
if (isRateLimitError(error)) {
|
|
3266
3649
|
rateLimitBackoffMs = Math.min(providerRateLimits.maxRetryMs, (rateLimitBackoffMs || providerRateLimits.minRetryMs) * 2);
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3650
|
+
this.logger.embedding("warn", `Rate limited, backing off`, {
|
|
3651
|
+
attempt: error.attemptNumber,
|
|
3652
|
+
retriesLeft: error.retriesLeft,
|
|
3653
|
+
backoffMs: rateLimitBackoffMs
|
|
3654
|
+
});
|
|
3270
3655
|
} else {
|
|
3271
|
-
|
|
3272
|
-
|
|
3273
|
-
|
|
3656
|
+
this.logger.embedding("error", `Embedding batch failed`, {
|
|
3657
|
+
attempt: error.attemptNumber,
|
|
3658
|
+
error: message
|
|
3659
|
+
});
|
|
3274
3660
|
}
|
|
3275
3661
|
}
|
|
3276
3662
|
}
|
|
@@ -3297,6 +3683,12 @@ var Indexer = class {
|
|
|
3297
3683
|
}
|
|
3298
3684
|
stats.indexedChunks += batch.length;
|
|
3299
3685
|
stats.tokensUsed += result.totalTokensUsed;
|
|
3686
|
+
this.logger.recordChunksEmbedded(batch.length);
|
|
3687
|
+
this.logger.recordEmbeddingApiCall(result.totalTokensUsed);
|
|
3688
|
+
this.logger.embedding("debug", `Embedded batch`, {
|
|
3689
|
+
batchSize: batch.length,
|
|
3690
|
+
tokens: result.totalTokensUsed
|
|
3691
|
+
});
|
|
3300
3692
|
onProgress?.({
|
|
3301
3693
|
phase: "embedding",
|
|
3302
3694
|
filesProcessed: files.length,
|
|
@@ -3307,7 +3699,11 @@ var Indexer = class {
|
|
|
3307
3699
|
} catch (error) {
|
|
3308
3700
|
stats.failedChunks += batch.length;
|
|
3309
3701
|
this.addFailedBatch(batch, getErrorMessage(error));
|
|
3310
|
-
|
|
3702
|
+
this.logger.recordEmbeddingError();
|
|
3703
|
+
this.logger.embedding("error", `Failed to embed batch after retries`, {
|
|
3704
|
+
batchSize: batch.length,
|
|
3705
|
+
error: getErrorMessage(error)
|
|
3706
|
+
});
|
|
3311
3707
|
}
|
|
3312
3708
|
});
|
|
3313
3709
|
}
|
|
@@ -3329,6 +3725,16 @@ var Indexer = class {
|
|
|
3329
3725
|
await this.maybeRunOrphanGc();
|
|
3330
3726
|
}
|
|
3331
3727
|
stats.durationMs = Date.now() - startTime;
|
|
3728
|
+
this.logger.recordIndexingEnd();
|
|
3729
|
+
this.logger.info("Indexing complete", {
|
|
3730
|
+
files: stats.totalFiles,
|
|
3731
|
+
indexed: stats.indexedChunks,
|
|
3732
|
+
existing: stats.existingChunks,
|
|
3733
|
+
removed: stats.removedChunks,
|
|
3734
|
+
failed: stats.failedChunks,
|
|
3735
|
+
tokens: stats.tokensUsed,
|
|
3736
|
+
durationMs: stats.durationMs
|
|
3737
|
+
});
|
|
3332
3738
|
if (stats.failedChunks > 0) {
|
|
3333
3739
|
stats.failedBatchesPath = this.failedBatchesPath;
|
|
3334
3740
|
}
|
|
@@ -3341,18 +3747,96 @@ var Indexer = class {
|
|
|
3341
3747
|
});
|
|
3342
3748
|
return stats;
|
|
3343
3749
|
}
|
|
3750
|
+
async getQueryEmbedding(query, provider) {
|
|
3751
|
+
const now = Date.now();
|
|
3752
|
+
const cached = this.queryEmbeddingCache.get(query);
|
|
3753
|
+
if (cached && now - cached.timestamp < this.queryCacheTtlMs) {
|
|
3754
|
+
this.logger.cache("debug", "Query embedding cache hit (exact)", { query: query.slice(0, 50) });
|
|
3755
|
+
this.logger.recordQueryCacheHit();
|
|
3756
|
+
return cached.embedding;
|
|
3757
|
+
}
|
|
3758
|
+
const similarMatch = this.findSimilarCachedQuery(query, now);
|
|
3759
|
+
if (similarMatch) {
|
|
3760
|
+
this.logger.cache("debug", "Query embedding cache hit (similar)", {
|
|
3761
|
+
query: query.slice(0, 50),
|
|
3762
|
+
similarTo: similarMatch.key.slice(0, 50),
|
|
3763
|
+
similarity: similarMatch.similarity.toFixed(3)
|
|
3764
|
+
});
|
|
3765
|
+
this.logger.recordQueryCacheSimilarHit();
|
|
3766
|
+
return similarMatch.embedding;
|
|
3767
|
+
}
|
|
3768
|
+
this.logger.cache("debug", "Query embedding cache miss", { query: query.slice(0, 50) });
|
|
3769
|
+
this.logger.recordQueryCacheMiss();
|
|
3770
|
+
const { embedding, tokensUsed } = await provider.embed(query);
|
|
3771
|
+
this.logger.recordEmbeddingApiCall(tokensUsed);
|
|
3772
|
+
if (this.queryEmbeddingCache.size >= this.maxQueryCacheSize) {
|
|
3773
|
+
const oldestKey = this.queryEmbeddingCache.keys().next().value;
|
|
3774
|
+
if (oldestKey) {
|
|
3775
|
+
this.queryEmbeddingCache.delete(oldestKey);
|
|
3776
|
+
}
|
|
3777
|
+
}
|
|
3778
|
+
this.queryEmbeddingCache.set(query, { embedding, timestamp: now });
|
|
3779
|
+
return embedding;
|
|
3780
|
+
}
|
|
3781
|
+
findSimilarCachedQuery(query, now) {
|
|
3782
|
+
const queryTokens = this.tokenize(query);
|
|
3783
|
+
if (queryTokens.size === 0) return null;
|
|
3784
|
+
let bestMatch = null;
|
|
3785
|
+
for (const [cachedQuery, { embedding, timestamp }] of this.queryEmbeddingCache) {
|
|
3786
|
+
if (now - timestamp >= this.queryCacheTtlMs) continue;
|
|
3787
|
+
const cachedTokens = this.tokenize(cachedQuery);
|
|
3788
|
+
const similarity = this.jaccardSimilarity(queryTokens, cachedTokens);
|
|
3789
|
+
if (similarity >= this.querySimilarityThreshold) {
|
|
3790
|
+
if (!bestMatch || similarity > bestMatch.similarity) {
|
|
3791
|
+
bestMatch = { key: cachedQuery, embedding, similarity };
|
|
3792
|
+
}
|
|
3793
|
+
}
|
|
3794
|
+
}
|
|
3795
|
+
return bestMatch;
|
|
3796
|
+
}
|
|
3797
|
+
tokenize(text) {
|
|
3798
|
+
return new Set(
|
|
3799
|
+
text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((t) => t.length > 1)
|
|
3800
|
+
);
|
|
3801
|
+
}
|
|
3802
|
+
jaccardSimilarity(a, b) {
|
|
3803
|
+
if (a.size === 0 && b.size === 0) return 1;
|
|
3804
|
+
if (a.size === 0 || b.size === 0) return 0;
|
|
3805
|
+
let intersection = 0;
|
|
3806
|
+
for (const token of a) {
|
|
3807
|
+
if (b.has(token)) intersection++;
|
|
3808
|
+
}
|
|
3809
|
+
const union = a.size + b.size - intersection;
|
|
3810
|
+
return intersection / union;
|
|
3811
|
+
}
|
|
3344
3812
|
async search(query, limit, options) {
|
|
3813
|
+
const searchStartTime = performance2.now();
|
|
3345
3814
|
const { store, provider, database } = await this.ensureInitialized();
|
|
3346
3815
|
if (store.count() === 0) {
|
|
3816
|
+
this.logger.search("debug", "Search on empty index", { query });
|
|
3347
3817
|
return [];
|
|
3348
3818
|
}
|
|
3349
3819
|
const maxResults = limit ?? this.config.search.maxResults;
|
|
3350
3820
|
const hybridWeight = options?.hybridWeight ?? this.config.search.hybridWeight;
|
|
3351
3821
|
const filterByBranch = options?.filterByBranch ?? true;
|
|
3352
|
-
|
|
3822
|
+
this.logger.search("debug", "Starting search", {
|
|
3823
|
+
query,
|
|
3824
|
+
maxResults,
|
|
3825
|
+
hybridWeight,
|
|
3826
|
+
filterByBranch
|
|
3827
|
+
});
|
|
3828
|
+
const embeddingStartTime = performance2.now();
|
|
3829
|
+
const embedding = await this.getQueryEmbedding(query, provider);
|
|
3830
|
+
const embeddingMs = performance2.now() - embeddingStartTime;
|
|
3831
|
+
const vectorStartTime = performance2.now();
|
|
3353
3832
|
const semanticResults = store.search(embedding, maxResults * 4);
|
|
3833
|
+
const vectorMs = performance2.now() - vectorStartTime;
|
|
3834
|
+
const keywordStartTime = performance2.now();
|
|
3354
3835
|
const keywordResults = await this.keywordSearch(query, maxResults * 4);
|
|
3836
|
+
const keywordMs = performance2.now() - keywordStartTime;
|
|
3837
|
+
const fusionStartTime = performance2.now();
|
|
3355
3838
|
const combined = this.fuseResults(semanticResults, keywordResults, hybridWeight, maxResults * 4);
|
|
3839
|
+
const fusionMs = performance2.now() - fusionStartTime;
|
|
3356
3840
|
let branchChunkIds = null;
|
|
3357
3841
|
if (filterByBranch && this.currentBranch !== "default") {
|
|
3358
3842
|
branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
|
|
@@ -3373,12 +3857,29 @@ var Indexer = class {
|
|
|
3373
3857
|
}
|
|
3374
3858
|
return true;
|
|
3375
3859
|
}).slice(0, maxResults);
|
|
3860
|
+
const totalSearchMs = performance2.now() - searchStartTime;
|
|
3861
|
+
this.logger.recordSearch(totalSearchMs, {
|
|
3862
|
+
embeddingMs,
|
|
3863
|
+
vectorMs,
|
|
3864
|
+
keywordMs,
|
|
3865
|
+
fusionMs
|
|
3866
|
+
});
|
|
3867
|
+
this.logger.search("info", "Search complete", {
|
|
3868
|
+
query,
|
|
3869
|
+
results: filtered.length,
|
|
3870
|
+
totalMs: Math.round(totalSearchMs * 100) / 100,
|
|
3871
|
+
embeddingMs: Math.round(embeddingMs * 100) / 100,
|
|
3872
|
+
vectorMs: Math.round(vectorMs * 100) / 100,
|
|
3873
|
+
keywordMs: Math.round(keywordMs * 100) / 100,
|
|
3874
|
+
fusionMs: Math.round(fusionMs * 100) / 100
|
|
3875
|
+
});
|
|
3876
|
+
const metadataOnly = options?.metadataOnly ?? false;
|
|
3376
3877
|
return Promise.all(
|
|
3377
3878
|
filtered.map(async (r) => {
|
|
3378
3879
|
let content = "";
|
|
3379
3880
|
let contextStartLine = r.metadata.startLine;
|
|
3380
3881
|
let contextEndLine = r.metadata.endLine;
|
|
3381
|
-
if (this.config.search.includeContext) {
|
|
3882
|
+
if (!metadataOnly && this.config.search.includeContext) {
|
|
3382
3883
|
try {
|
|
3383
3884
|
const fileContent = await fsPromises2.readFile(
|
|
3384
3885
|
r.metadata.filePath,
|
|
@@ -3411,11 +3912,8 @@ var Indexer = class {
|
|
|
3411
3912
|
if (scores.size === 0) {
|
|
3412
3913
|
return [];
|
|
3413
3914
|
}
|
|
3414
|
-
const
|
|
3415
|
-
const metadataMap =
|
|
3416
|
-
for (const { key, metadata } of allMetadata) {
|
|
3417
|
-
metadataMap.set(key, metadata);
|
|
3418
|
-
}
|
|
3915
|
+
const chunkIds = Array.from(scores.keys());
|
|
3916
|
+
const metadataMap = store.getMetadataBatch(chunkIds);
|
|
3419
3917
|
const results = [];
|
|
3420
3918
|
for (const [chunkId, score] of scores) {
|
|
3421
3919
|
const metadata = metadataMap.get(chunkId);
|
|
@@ -3478,6 +3976,7 @@ var Indexer = class {
|
|
|
3478
3976
|
}
|
|
3479
3977
|
async healthCheck() {
|
|
3480
3978
|
const { store, invertedIndex, database } = await this.ensureInitialized();
|
|
3979
|
+
this.logger.gc("info", "Starting health check");
|
|
3481
3980
|
const allMetadata = store.getAllMetadata();
|
|
3482
3981
|
const filePathsToChunkKeys = /* @__PURE__ */ new Map();
|
|
3483
3982
|
for (const { key, metadata } of allMetadata) {
|
|
@@ -3504,6 +4003,13 @@ var Indexer = class {
|
|
|
3504
4003
|
}
|
|
3505
4004
|
const gcOrphanEmbeddings = database.gcOrphanEmbeddings();
|
|
3506
4005
|
const gcOrphanChunks = database.gcOrphanChunks();
|
|
4006
|
+
this.logger.recordGc(removedCount, gcOrphanChunks, gcOrphanEmbeddings);
|
|
4007
|
+
this.logger.gc("info", "Health check complete", {
|
|
4008
|
+
removedStale: removedCount,
|
|
4009
|
+
orphanEmbeddings: gcOrphanEmbeddings,
|
|
4010
|
+
orphanChunks: gcOrphanChunks,
|
|
4011
|
+
removedFiles: removedFilePaths.length
|
|
4012
|
+
});
|
|
3507
4013
|
return { removed: removedCount, filePaths: removedFilePaths, gcOrphanEmbeddings, gcOrphanChunks };
|
|
3508
4014
|
}
|
|
3509
4015
|
async retryFailedBatches() {
|
|
@@ -3537,9 +4043,12 @@ var Indexer = class {
|
|
|
3537
4043
|
invertedIndex.removeChunk(chunk.id);
|
|
3538
4044
|
invertedIndex.addChunk(chunk.id, chunk.content);
|
|
3539
4045
|
}
|
|
4046
|
+
this.logger.recordChunksEmbedded(batch.chunks.length);
|
|
4047
|
+
this.logger.recordEmbeddingApiCall(result.totalTokensUsed);
|
|
3540
4048
|
succeeded += batch.chunks.length;
|
|
3541
4049
|
} catch (error) {
|
|
3542
4050
|
failed += batch.chunks.length;
|
|
4051
|
+
this.logger.recordEmbeddingError();
|
|
3543
4052
|
stillFailing.push({
|
|
3544
4053
|
...batch,
|
|
3545
4054
|
attemptCount: batch.attemptCount + 1,
|
|
@@ -3574,6 +4083,94 @@ var Indexer = class {
|
|
|
3574
4083
|
const { database } = await this.ensureInitialized();
|
|
3575
4084
|
return database.getStats();
|
|
3576
4085
|
}
|
|
4086
|
+
getLogger() {
|
|
4087
|
+
return this.logger;
|
|
4088
|
+
}
|
|
4089
|
+
async findSimilar(code, limit, options) {
|
|
4090
|
+
const searchStartTime = performance2.now();
|
|
4091
|
+
const { store, provider, database } = await this.ensureInitialized();
|
|
4092
|
+
if (store.count() === 0) {
|
|
4093
|
+
this.logger.search("debug", "Find similar on empty index");
|
|
4094
|
+
return [];
|
|
4095
|
+
}
|
|
4096
|
+
const maxResults = limit ?? this.config.search.maxResults;
|
|
4097
|
+
const filterByBranch = options?.filterByBranch ?? true;
|
|
4098
|
+
this.logger.search("debug", "Starting find similar", {
|
|
4099
|
+
codeLength: code.length,
|
|
4100
|
+
maxResults,
|
|
4101
|
+
filterByBranch
|
|
4102
|
+
});
|
|
4103
|
+
const embeddingStartTime = performance2.now();
|
|
4104
|
+
const { embedding, tokensUsed } = await provider.embed(code);
|
|
4105
|
+
const embeddingMs = performance2.now() - embeddingStartTime;
|
|
4106
|
+
this.logger.recordEmbeddingApiCall(tokensUsed);
|
|
4107
|
+
const vectorStartTime = performance2.now();
|
|
4108
|
+
const semanticResults = store.search(embedding, maxResults * 2);
|
|
4109
|
+
const vectorMs = performance2.now() - vectorStartTime;
|
|
4110
|
+
let branchChunkIds = null;
|
|
4111
|
+
if (filterByBranch && this.currentBranch !== "default") {
|
|
4112
|
+
branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
|
|
4113
|
+
}
|
|
4114
|
+
const filtered = semanticResults.filter((r) => {
|
|
4115
|
+
if (r.score < this.config.search.minScore) return false;
|
|
4116
|
+
if (branchChunkIds && !branchChunkIds.has(r.id)) return false;
|
|
4117
|
+
if (options?.excludeFile) {
|
|
4118
|
+
if (r.metadata.filePath === options.excludeFile) return false;
|
|
4119
|
+
}
|
|
4120
|
+
if (options?.fileType) {
|
|
4121
|
+
const ext = r.metadata.filePath.split(".").pop()?.toLowerCase();
|
|
4122
|
+
if (ext !== options.fileType.toLowerCase().replace(/^\./, "")) return false;
|
|
4123
|
+
}
|
|
4124
|
+
if (options?.directory) {
|
|
4125
|
+
const normalizedDir = options.directory.replace(/^\/|\/$/g, "");
|
|
4126
|
+
if (!r.metadata.filePath.includes(`/${normalizedDir}/`) && !r.metadata.filePath.includes(`${normalizedDir}/`)) return false;
|
|
4127
|
+
}
|
|
4128
|
+
if (options?.chunkType) {
|
|
4129
|
+
if (r.metadata.chunkType !== options.chunkType) return false;
|
|
4130
|
+
}
|
|
4131
|
+
return true;
|
|
4132
|
+
}).slice(0, maxResults);
|
|
4133
|
+
const totalSearchMs = performance2.now() - searchStartTime;
|
|
4134
|
+
this.logger.recordSearch(totalSearchMs, {
|
|
4135
|
+
embeddingMs,
|
|
4136
|
+
vectorMs,
|
|
4137
|
+
keywordMs: 0,
|
|
4138
|
+
fusionMs: 0
|
|
4139
|
+
});
|
|
4140
|
+
this.logger.search("info", "Find similar complete", {
|
|
4141
|
+
codeLength: code.length,
|
|
4142
|
+
results: filtered.length,
|
|
4143
|
+
totalMs: Math.round(totalSearchMs * 100) / 100,
|
|
4144
|
+
embeddingMs: Math.round(embeddingMs * 100) / 100,
|
|
4145
|
+
vectorMs: Math.round(vectorMs * 100) / 100
|
|
4146
|
+
});
|
|
4147
|
+
return Promise.all(
|
|
4148
|
+
filtered.map(async (r) => {
|
|
4149
|
+
let content = "";
|
|
4150
|
+
if (this.config.search.includeContext) {
|
|
4151
|
+
try {
|
|
4152
|
+
const fileContent = await fsPromises2.readFile(
|
|
4153
|
+
r.metadata.filePath,
|
|
4154
|
+
"utf-8"
|
|
4155
|
+
);
|
|
4156
|
+
const lines = fileContent.split("\n");
|
|
4157
|
+
content = lines.slice(r.metadata.startLine - 1, r.metadata.endLine).join("\n");
|
|
4158
|
+
} catch {
|
|
4159
|
+
content = "[File not accessible]";
|
|
4160
|
+
}
|
|
4161
|
+
}
|
|
4162
|
+
return {
|
|
4163
|
+
filePath: r.metadata.filePath,
|
|
4164
|
+
startLine: r.metadata.startLine,
|
|
4165
|
+
endLine: r.metadata.endLine,
|
|
4166
|
+
content,
|
|
4167
|
+
score: r.score,
|
|
4168
|
+
chunkType: r.metadata.chunkType,
|
|
4169
|
+
name: r.metadata.name
|
|
4170
|
+
};
|
|
4171
|
+
})
|
|
4172
|
+
);
|
|
4173
|
+
}
|
|
3577
4174
|
};
|
|
3578
4175
|
|
|
3579
4176
|
// node_modules/chokidar/index.js
|
|
@@ -5515,7 +6112,7 @@ function getIndexer() {
|
|
|
5515
6112
|
return sharedIndexer;
|
|
5516
6113
|
}
|
|
5517
6114
|
var codebase_search = tool({
|
|
5518
|
-
description: "Search codebase by MEANING, not keywords. Use when you
|
|
6115
|
+
description: "Search codebase by MEANING, not keywords. Returns full code content. Use when you need to see actual implementation. For just finding WHERE code is (saves ~90% tokens), use codebase_peek instead. For known identifiers like 'validateToken', use grep - it's faster.",
|
|
5519
6116
|
args: {
|
|
5520
6117
|
query: z.string().describe("Natural language description of what code you're looking for. Describe behavior, not syntax."),
|
|
5521
6118
|
limit: z.number().optional().default(10).describe("Maximum number of results to return"),
|
|
@@ -5547,6 +6144,38 @@ ${r.content}
|
|
|
5547
6144
|
${formatted.join("\n\n")}`;
|
|
5548
6145
|
}
|
|
5549
6146
|
});
|
|
6147
|
+
var codebase_peek = tool({
|
|
6148
|
+
description: "Quick lookup of code locations by meaning. Returns only metadata (file, line, name, type) WITHOUT code content. Use this first to find WHERE code is, then use Read tool to examine specific files. Saves tokens by not returning full code blocks. Best for: discovery, navigation, finding multiple related locations.",
|
|
6149
|
+
args: {
|
|
6150
|
+
query: z.string().describe("Natural language description of what code you're looking for."),
|
|
6151
|
+
limit: z.number().optional().default(10).describe("Maximum number of results to return"),
|
|
6152
|
+
fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
|
|
6153
|
+
directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
|
|
6154
|
+
chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type")
|
|
6155
|
+
},
|
|
6156
|
+
async execute(args) {
|
|
6157
|
+
const indexer = getIndexer();
|
|
6158
|
+
const results = await indexer.search(args.query, args.limit ?? 10, {
|
|
6159
|
+
fileType: args.fileType,
|
|
6160
|
+
directory: args.directory,
|
|
6161
|
+
chunkType: args.chunkType,
|
|
6162
|
+
metadataOnly: true
|
|
6163
|
+
});
|
|
6164
|
+
if (results.length === 0) {
|
|
6165
|
+
return "No matching code found. Try a different query or run index_codebase first.";
|
|
6166
|
+
}
|
|
6167
|
+
const formatted = results.map((r, idx) => {
|
|
6168
|
+
const location = `${r.filePath}:${r.startLine}-${r.endLine}`;
|
|
6169
|
+
const name = r.name ? `"${r.name}"` : "(anonymous)";
|
|
6170
|
+
return `[${idx + 1}] ${r.chunkType} ${name} at ${location} (score: ${r.score.toFixed(2)})`;
|
|
6171
|
+
});
|
|
6172
|
+
return `Found ${results.length} locations for "${args.query}":
|
|
6173
|
+
|
|
6174
|
+
${formatted.join("\n")}
|
|
6175
|
+
|
|
6176
|
+
Use Read tool to examine specific files.`;
|
|
6177
|
+
}
|
|
6178
|
+
});
|
|
5550
6179
|
var index_codebase = tool({
|
|
5551
6180
|
description: "Index the codebase for semantic search. Creates vector embeddings of code chunks. Incremental - only re-indexes changed files (~50ms when nothing changed). Run before first codebase_search.",
|
|
5552
6181
|
args: {
|
|
@@ -5613,6 +6242,84 @@ var index_health_check = tool({
|
|
|
5613
6242
|
return lines.join("\n");
|
|
5614
6243
|
}
|
|
5615
6244
|
});
|
|
6245
|
+
var index_metrics = tool({
|
|
6246
|
+
description: "Get metrics and performance statistics for the codebase index. Shows indexing stats, search timings, cache hit rates, and API usage. Requires debug.enabled=true and debug.metrics=true in config.",
|
|
6247
|
+
args: {},
|
|
6248
|
+
async execute() {
|
|
6249
|
+
const indexer = getIndexer();
|
|
6250
|
+
const logger = indexer.getLogger();
|
|
6251
|
+
if (!logger.isEnabled()) {
|
|
6252
|
+
return 'Debug mode is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true,\n "metrics": true\n }\n}\n```';
|
|
6253
|
+
}
|
|
6254
|
+
if (!logger.isMetricsEnabled()) {
|
|
6255
|
+
return 'Metrics collection is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true,\n "metrics": true\n }\n}\n```';
|
|
6256
|
+
}
|
|
6257
|
+
return logger.formatMetrics();
|
|
6258
|
+
}
|
|
6259
|
+
});
|
|
6260
|
+
var index_logs = tool({
|
|
6261
|
+
description: "Get recent debug logs from the codebase indexer. Shows timestamped log entries with level and category. Requires debug.enabled=true in config.",
|
|
6262
|
+
args: {
|
|
6263
|
+
limit: z.number().optional().default(20).describe("Maximum number of log entries to return"),
|
|
6264
|
+
category: z.enum(["search", "embedding", "cache", "gc", "branch", "general"]).optional().describe("Filter by log category"),
|
|
6265
|
+
level: z.enum(["error", "warn", "info", "debug"]).optional().describe("Filter by minimum log level")
|
|
6266
|
+
},
|
|
6267
|
+
async execute(args) {
|
|
6268
|
+
const indexer = getIndexer();
|
|
6269
|
+
const logger = indexer.getLogger();
|
|
6270
|
+
if (!logger.isEnabled()) {
|
|
6271
|
+
return 'Debug mode is disabled. Enable it in your config:\n\n```json\n{\n "debug": {\n "enabled": true\n }\n}\n```';
|
|
6272
|
+
}
|
|
6273
|
+
let logs;
|
|
6274
|
+
if (args.category) {
|
|
6275
|
+
logs = logger.getLogsByCategory(args.category, args.limit);
|
|
6276
|
+
} else if (args.level) {
|
|
6277
|
+
logs = logger.getLogsByLevel(args.level, args.limit);
|
|
6278
|
+
} else {
|
|
6279
|
+
logs = logger.getLogs(args.limit);
|
|
6280
|
+
}
|
|
6281
|
+
if (logs.length === 0) {
|
|
6282
|
+
return "No logs recorded yet. Logs are captured during indexing and search operations.";
|
|
6283
|
+
}
|
|
6284
|
+
return logs.map((l) => {
|
|
6285
|
+
const dataStr = l.data ? ` ${JSON.stringify(l.data)}` : "";
|
|
6286
|
+
return `[${l.timestamp}] [${l.level.toUpperCase()}] [${l.category}] ${l.message}${dataStr}`;
|
|
6287
|
+
}).join("\n");
|
|
6288
|
+
}
|
|
6289
|
+
});
|
|
6290
|
+
var find_similar = tool({
|
|
6291
|
+
description: "Find code similar to a given snippet. Use for duplicate detection, pattern discovery, or refactoring prep. Paste code and find semantically similar implementations elsewhere in the codebase.",
|
|
6292
|
+
args: {
|
|
6293
|
+
code: z.string().describe("The code snippet to find similar code for"),
|
|
6294
|
+
limit: z.number().optional().default(10).describe("Maximum number of results to return"),
|
|
6295
|
+
fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
|
|
6296
|
+
directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
|
|
6297
|
+
chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type"),
|
|
6298
|
+
excludeFile: z.string().optional().describe("Exclude results from this file path (useful when searching for duplicates of code from a specific file)")
|
|
6299
|
+
},
|
|
6300
|
+
async execute(args) {
|
|
6301
|
+
const indexer = getIndexer();
|
|
6302
|
+
const results = await indexer.findSimilar(args.code, args.limit ?? 10, {
|
|
6303
|
+
fileType: args.fileType,
|
|
6304
|
+
directory: args.directory,
|
|
6305
|
+
chunkType: args.chunkType,
|
|
6306
|
+
excludeFile: args.excludeFile
|
|
6307
|
+
});
|
|
6308
|
+
if (results.length === 0) {
|
|
6309
|
+
return "No similar code found. Try a different snippet or run index_codebase first.";
|
|
6310
|
+
}
|
|
6311
|
+
const formatted = results.map((r, idx) => {
|
|
6312
|
+
const header = r.name ? `[${idx + 1}] ${r.chunkType} "${r.name}" in ${r.filePath}:${r.startLine}-${r.endLine}` : `[${idx + 1}] ${r.chunkType} in ${r.filePath}:${r.startLine}-${r.endLine}`;
|
|
6313
|
+
return `${header} (similarity: ${(r.score * 100).toFixed(1)}%)
|
|
6314
|
+
\`\`\`
|
|
6315
|
+
${r.content}
|
|
6316
|
+
\`\`\``;
|
|
6317
|
+
});
|
|
6318
|
+
return `Found ${results.length} similar code blocks:
|
|
6319
|
+
|
|
6320
|
+
${formatted.join("\n\n")}`;
|
|
6321
|
+
}
|
|
6322
|
+
});
|
|
5616
6323
|
function formatIndexStats(stats, verbose = false) {
|
|
5617
6324
|
const lines = [];
|
|
5618
6325
|
if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
|
|
@@ -5795,9 +6502,13 @@ var plugin = async ({ directory }) => {
|
|
|
5795
6502
|
return {
|
|
5796
6503
|
tool: {
|
|
5797
6504
|
codebase_search,
|
|
6505
|
+
codebase_peek,
|
|
5798
6506
|
index_codebase,
|
|
5799
6507
|
index_status,
|
|
5800
|
-
index_health_check
|
|
6508
|
+
index_health_check,
|
|
6509
|
+
index_metrics,
|
|
6510
|
+
index_logs,
|
|
6511
|
+
find_similar
|
|
5801
6512
|
},
|
|
5802
6513
|
async config(cfg) {
|
|
5803
6514
|
cfg.command = cfg.command ?? {};
|