@comfanion/usethis_search 4.3.1 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api.ts +34 -17
- package/cache/manager.ts +30 -19
- package/cli.ts +8 -5
- package/file-indexer.ts +28 -11
- package/hooks/message-before.ts +5 -5
- package/hooks/tool-substitution.ts +4 -120
- package/index.ts +17 -6
- package/package.json +4 -2
- package/tools/codeindex.ts +192 -184
- package/tools/graph.ts +265 -0
- package/tools/read-interceptor.ts +7 -3
- package/tools/search.ts +275 -186
- package/tools/workspace-state.ts +1 -2
- package/tools/workspace.ts +88 -117
- package/vectorizer/analyzers/lsp-client.ts +52 -6
- package/vectorizer/chunkers/chunker-factory.ts +6 -0
- package/vectorizer/chunkers/code-chunker.ts +73 -16
- package/vectorizer/chunkers/lsp-chunker.ts +313 -191
- package/vectorizer/graph-db.ts +6 -4
- package/vectorizer/index.ts +406 -142
- package/vectorizer/query-decomposer.ts +397 -0
- package/vectorizer/usage-tracker.ts +36 -0
- package/vectorizer.yaml +9 -2
package/vectorizer/index.ts
CHANGED
|
@@ -19,6 +19,8 @@ import { GraphDB } from "./graph-db.ts";
|
|
|
19
19
|
import { GraphBuilder, isStructuralPredicate } from "./graph-builder.ts";
|
|
20
20
|
import { UsageTracker } from "./usage-tracker.ts";
|
|
21
21
|
import { ChunkStore } from "./chunk-store.ts";
|
|
22
|
+
import { decomposeQuery, rrfMerge, DEFAULT_DECOMPOSER_CONFIG } from "./query-decomposer.ts";
|
|
23
|
+
import type { DecomposerConfig } from "./query-decomposer.ts";
|
|
22
24
|
|
|
23
25
|
// Suppress transformers.js logs unless DEBUG is set
|
|
24
26
|
const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*";
|
|
@@ -86,6 +88,9 @@ let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
|
|
|
86
88
|
let METRICS_ENABLED = false;
|
|
87
89
|
let CACHE_ENABLED = true;
|
|
88
90
|
|
|
91
|
+
// ── Query decomposition config ───────────────────────────────────────────────
|
|
92
|
+
let DECOMPOSER_CONFIG: DecomposerConfig = { ...DEFAULT_DECOMPOSER_CONFIG };
|
|
93
|
+
|
|
89
94
|
// ── Search defaults (exposed to tool layer) ──────────────────────────────────
|
|
90
95
|
const DEFAULT_SEARCH_CONFIG = {
|
|
91
96
|
freshen: false, // Don't freshen on every search — auto_index handles it
|
|
@@ -101,8 +106,8 @@ const DEFAULT_WORKSPACE_CONFIG = {
|
|
|
101
106
|
maxFiles: 30, // Max number of files in workspace
|
|
102
107
|
attachTopN: 5, // Top N search results to attach with full content
|
|
103
108
|
attachRelatedPerFile: 3, // Max graph relations per main file
|
|
104
|
-
minScoreMain: 0.65, // Min score for main files
|
|
105
|
-
minScoreRelated: 0.5, // Min score for graph relations
|
|
109
|
+
minScoreMain: 0.65, // Min score for main files (override in vectorizer.yaml)
|
|
110
|
+
minScoreRelated: 0.5, // Min score for graph relations (override in vectorizer.yaml)
|
|
106
111
|
persistContent: false, // Save full content in snapshots (debug mode)
|
|
107
112
|
autoPruneSearch: true, // Replace old search outputs with compact summaries
|
|
108
113
|
substituteToolOutputs: true, // Replace tool outputs when files in workspace
|
|
@@ -188,6 +193,13 @@ function defaultVectorizerYaml() {
|
|
|
188
193
|
` auto_prune_search: true # Replace old search outputs with compact summaries\n` +
|
|
189
194
|
` substitute_tool_outputs: true # Replace tool outputs when files in workspace\n` +
|
|
190
195
|
`\n` +
|
|
196
|
+
` # Query decomposition (v4 — improves long query relevance)\n` +
|
|
197
|
+
` decomposition:\n` +
|
|
198
|
+
` enabled: true # Split complex queries into focused sub-queries\n` +
|
|
199
|
+
` min_words: 5 # Min significant words to trigger decomposition\n` +
|
|
200
|
+
` max_sub_queries: 4 # Max sub-queries (including keyword core)\n` +
|
|
201
|
+
` min_sub_query_words: 2 # Min words per sub-query\n` +
|
|
202
|
+
`\n` +
|
|
191
203
|
` # Quality monitoring\n` +
|
|
192
204
|
` quality:\n` +
|
|
193
205
|
` enable_metrics: false\n` +
|
|
@@ -370,6 +382,17 @@ async function loadConfig(projectRoot) {
|
|
|
370
382
|
CACHE_ENABLED = parseBool(qs, "enable_cache", true);
|
|
371
383
|
}
|
|
372
384
|
|
|
385
|
+
// ── Parse query decomposition config ────────────────────────────────────
|
|
386
|
+
const decomposerMatch = section.match(/^\s{2}decomposition:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
|
|
387
|
+
if (decomposerMatch) {
|
|
388
|
+
const ds = decomposerMatch[1];
|
|
389
|
+
DECOMPOSER_CONFIG.enabled = parseBool(ds, "enabled", DEFAULT_DECOMPOSER_CONFIG.enabled);
|
|
390
|
+
DECOMPOSER_CONFIG.minWords = parseNumber(ds, "min_words", DEFAULT_DECOMPOSER_CONFIG.minWords);
|
|
391
|
+
DECOMPOSER_CONFIG.maxSubQueries = parseNumber(ds, "max_sub_queries", DEFAULT_DECOMPOSER_CONFIG.maxSubQueries);
|
|
392
|
+
DECOMPOSER_CONFIG.minSubQueryWords = parseNumber(ds, "min_sub_query_words", DEFAULT_DECOMPOSER_CONFIG.minSubQueryWords);
|
|
393
|
+
if (DEBUG) console.log("[vectorizer] Decomposer config:", DECOMPOSER_CONFIG);
|
|
394
|
+
}
|
|
395
|
+
|
|
373
396
|
// ── Parse graph config (v3) ──────────────────────────────────────────────
|
|
374
397
|
const graphMatch = section.match(/^\s{2}graph:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
|
|
375
398
|
if (graphMatch) {
|
|
@@ -475,6 +498,54 @@ function clearQueryCache() {
|
|
|
475
498
|
}
|
|
476
499
|
}
|
|
477
500
|
|
|
501
|
+
// ── Shared ONNX model singleton ─────────────────────────────────────────────
|
|
502
|
+
// Model lives for the entire session — not tied to indexer pool TTL.
|
|
503
|
+
// Indexer eviction clears caches/DB but model stays loaded for fast search.
|
|
504
|
+
let _sharedModel: any = null
|
|
505
|
+
let _sharedModelPromise: Promise<any> | null = null
|
|
506
|
+
|
|
507
|
+
async function getSharedModel(retries = 3): Promise<any> {
|
|
508
|
+
if (_sharedModel) return _sharedModel
|
|
509
|
+
|
|
510
|
+
// Prevent concurrent loads — reuse in-flight promise
|
|
511
|
+
if (_sharedModelPromise) return _sharedModelPromise
|
|
512
|
+
|
|
513
|
+
_sharedModelPromise = (async () => {
|
|
514
|
+
let lastError: Error | null = null
|
|
515
|
+
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
516
|
+
try {
|
|
517
|
+
if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}... (attempt ${attempt}/${retries})`)
|
|
518
|
+
_sharedModel = await pipeline("feature-extraction", EMBEDDING_MODEL, {
|
|
519
|
+
progress_callback: DEBUG ? undefined : null,
|
|
520
|
+
})
|
|
521
|
+
if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`)
|
|
522
|
+
_sharedModelPromise = null
|
|
523
|
+
return _sharedModel
|
|
524
|
+
} catch (error) {
|
|
525
|
+
lastError = error as Error
|
|
526
|
+
if (attempt < retries) {
|
|
527
|
+
const delay = attempt * 2000
|
|
528
|
+
if (DEBUG) console.log(`[vectorizer] Model load attempt ${attempt} failed: ${lastError.message}. Retrying in ${delay}ms...`)
|
|
529
|
+
await new Promise(r => setTimeout(r, delay))
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
_sharedModelPromise = null
|
|
534
|
+
throw new Error(`Model loading failed after ${retries} attempts: ${lastError?.message || "unknown"}`)
|
|
535
|
+
})()
|
|
536
|
+
|
|
537
|
+
return _sharedModelPromise
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
/** Dispose shared model — call only on process exit or explicit cleanup. */
|
|
541
|
+
async function disposeSharedModel(): Promise<void> {
|
|
542
|
+
if (_sharedModel) {
|
|
543
|
+
try { await _sharedModel.dispose() } catch { /* best effort */ }
|
|
544
|
+
_sharedModel = null
|
|
545
|
+
}
|
|
546
|
+
_sharedModelPromise = null
|
|
547
|
+
}
|
|
548
|
+
|
|
478
549
|
class CodebaseIndexer {
|
|
479
550
|
constructor(projectRoot, indexName = "code") {
|
|
480
551
|
this.root = projectRoot;
|
|
@@ -543,21 +614,14 @@ class CodebaseIndexer {
|
|
|
543
614
|
|
|
544
615
|
async loadModel() {
|
|
545
616
|
if (!this.model) {
|
|
546
|
-
|
|
547
|
-
if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}...`);
|
|
548
|
-
this.model = await pipeline("feature-extraction", EMBEDDING_MODEL, {
|
|
549
|
-
progress_callback: DEBUG ? undefined : null,
|
|
550
|
-
});
|
|
551
|
-
if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`);
|
|
552
|
-
} catch (error) {
|
|
553
|
-
this.model = null;
|
|
554
|
-
throw new Error(`Model loading failed: ${error.message || error}`);
|
|
555
|
-
}
|
|
617
|
+
this.model = await getSharedModel();
|
|
556
618
|
}
|
|
557
619
|
return this.model;
|
|
558
620
|
}
|
|
559
621
|
|
|
560
622
|
async unloadModel() {
|
|
623
|
+
// Drop reference to shared model — do NOT dispose it.
|
|
624
|
+
// Model singleton lives for the entire session.
|
|
561
625
|
this.model = null;
|
|
562
626
|
// Release BM25 data held in memory
|
|
563
627
|
if (this.bm25) {
|
|
@@ -586,6 +650,14 @@ class CodebaseIndexer {
|
|
|
586
650
|
try { await this.usageTracker.save(); } catch { /* best effort */ }
|
|
587
651
|
this.usageTracker = null;
|
|
588
652
|
}
|
|
653
|
+
// Close LanceDB connection (releases file handles)
|
|
654
|
+
if (this.db) {
|
|
655
|
+
try {
|
|
656
|
+
// LanceDB v0.x doesn't expose close() — drop reference to release
|
|
657
|
+
// Arrow/IPC file handles are released when Connection is GC'd
|
|
658
|
+
this.db = null;
|
|
659
|
+
} catch { /* best effort */ }
|
|
660
|
+
}
|
|
589
661
|
this._chunkCache = null;
|
|
590
662
|
clearQueryCache();
|
|
591
663
|
if (global.gc) global.gc();
|
|
@@ -693,6 +765,8 @@ class CodebaseIndexer {
|
|
|
693
765
|
return null; // unchanged
|
|
694
766
|
}
|
|
695
767
|
|
|
768
|
+
if (DEBUG) console.log(`[vectorizer] prepareFile: ${relPath} [read]`);
|
|
769
|
+
|
|
696
770
|
// Extract metadata
|
|
697
771
|
const fileMeta = await extractFileMetadata(filePath, content);
|
|
698
772
|
const archived = this.isArchived(relPath, content);
|
|
@@ -700,9 +774,13 @@ class CodebaseIndexer {
|
|
|
700
774
|
// Clean content before chunking
|
|
701
775
|
const cleaned = cleanContent(content, fileMeta.file_type, CLEANING_CONFIG);
|
|
702
776
|
|
|
777
|
+
if (DEBUG) console.log(`[vectorizer] prepareFile: ${relPath} [chunking]`);
|
|
778
|
+
|
|
703
779
|
// Semantic chunking (async for LSP-based chunking)
|
|
704
780
|
const chunks = await chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG, filePath, this.root);
|
|
705
781
|
|
|
782
|
+
if (DEBUG) console.log(`[vectorizer] prepareFile: ${relPath} [${chunks.length} chunks, building graph]`);
|
|
783
|
+
|
|
706
784
|
// Assign chunk IDs
|
|
707
785
|
const chunksWithIds = this.graphBuilder
|
|
708
786
|
? this.graphBuilder.assignChunkIds(relPath, chunks)
|
|
@@ -730,23 +808,39 @@ class CodebaseIndexer {
|
|
|
730
808
|
}
|
|
731
809
|
|
|
732
810
|
// Return prepared rows (without vector — Phase 2 fills it)
|
|
733
|
-
const rows = chunksWithIds.map((chunk, i) =>
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
content
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
811
|
+
const rows = chunksWithIds.map((chunk, i) => {
|
|
812
|
+
// Build metadata prefix for embedding enrichment.
|
|
813
|
+
// This helps the embedding model associate function/class names with the code,
|
|
814
|
+
// improving search relevance for queries like "ensureBudget" or "WorkspaceCache".
|
|
815
|
+
// The prefix is stored in a separate field — original content stays clean.
|
|
816
|
+
const metaParts: string[] = []
|
|
817
|
+
if (relPath) metaParts.push(`File: ${relPath}`)
|
|
818
|
+
if (chunk.class_name) metaParts.push(`Class: ${chunk.class_name}`)
|
|
819
|
+
if (chunk.function_name) metaParts.push(`Method: ${chunk.function_name}`)
|
|
820
|
+
if (chunk.heading_context) metaParts.push(`Section: ${chunk.heading_context}`)
|
|
821
|
+
const metaPrefix = metaParts.length > 0 ? `// ${metaParts.join(" | ")}\n` : ""
|
|
822
|
+
|
|
823
|
+
return {
|
|
824
|
+
chunk_id: chunk.chunk_id,
|
|
825
|
+
file: relPath,
|
|
826
|
+
chunk_index: i,
|
|
827
|
+
content: chunk.content,
|
|
828
|
+
// Enriched content for embedding — metadata prefix + original content.
|
|
829
|
+
// Phase 2 embeds this instead of raw content.
|
|
830
|
+
content_for_embedding: metaPrefix + chunk.content,
|
|
831
|
+
archived,
|
|
832
|
+
file_type: fileMeta.file_type,
|
|
833
|
+
language: fileMeta.language,
|
|
834
|
+
last_modified: fileMeta.last_modified,
|
|
835
|
+
file_size: fileMeta.file_size,
|
|
836
|
+
heading_context: chunk.heading_context || "",
|
|
837
|
+
function_name: chunk.function_name || "",
|
|
838
|
+
class_name: chunk.class_name || "",
|
|
839
|
+
tags: (fileMeta.tags || []).join(","),
|
|
840
|
+
start_line: chunk.start_line ?? -1,
|
|
841
|
+
end_line: chunk.end_line ?? -1,
|
|
842
|
+
}
|
|
843
|
+
});
|
|
750
844
|
|
|
751
845
|
// Store chunks in ChunkStore (Phase 1 — BM25 available immediately)
|
|
752
846
|
if (this.chunkStore) {
|
|
@@ -758,6 +852,7 @@ class CodebaseIndexer {
|
|
|
758
852
|
}
|
|
759
853
|
}
|
|
760
854
|
|
|
855
|
+
if (DEBUG) console.log(`[vectorizer] prepareFile: ${relPath} [done, ${rows.length} rows]`);
|
|
761
856
|
return { relPath, hash, rows };
|
|
762
857
|
}
|
|
763
858
|
|
|
@@ -782,19 +877,24 @@ class CodebaseIndexer {
|
|
|
782
877
|
|
|
783
878
|
// Batch embed
|
|
784
879
|
const allData = [];
|
|
880
|
+
let embedErrors = 0;
|
|
785
881
|
for (let i = 0; i < allRows.length; i += batchSize) {
|
|
786
882
|
const batch = allRows.slice(i, i + batchSize);
|
|
787
|
-
const texts = batch.map(r => r.content);
|
|
788
883
|
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
884
|
+
for (const row of batch) {
|
|
885
|
+
try {
|
|
886
|
+
// Use enriched content (with metadata prefix) for embedding,
|
|
887
|
+
// but store original content in LanceDB for display.
|
|
888
|
+
const textToEmbed = row.content_for_embedding || row.content;
|
|
889
|
+
const result = await model(textToEmbed, { pooling: "mean", normalize: true });
|
|
890
|
+
// Don't store content_for_embedding in LanceDB — it's only for embedding
|
|
891
|
+
const { content_for_embedding, ...rowWithoutEmbeddingText } = row;
|
|
892
|
+
allData.push({ ...rowWithoutEmbeddingText, vector: Array.from(result.data) });
|
|
893
|
+
} catch (e) {
|
|
894
|
+
embedErrors++;
|
|
895
|
+
if (DEBUG) console.log(`[vectorizer] Embed failed for ${row.chunk_id || row.file}: ${(e as Error).message}`);
|
|
896
|
+
// Skip this chunk — don't let one bad chunk kill the entire index
|
|
897
|
+
}
|
|
798
898
|
}
|
|
799
899
|
|
|
800
900
|
if (onProgress) {
|
|
@@ -802,14 +902,25 @@ class CodebaseIndexer {
|
|
|
802
902
|
}
|
|
803
903
|
}
|
|
804
904
|
|
|
905
|
+
if (embedErrors > 0 && DEBUG) {
|
|
906
|
+
console.log(`[vectorizer] ${embedErrors} chunks failed to embed (skipped)`);
|
|
907
|
+
}
|
|
908
|
+
|
|
805
909
|
// Bulk store in LanceDB
|
|
910
|
+
if (allData.length === 0) return 0;
|
|
911
|
+
|
|
806
912
|
const tableName = "chunks";
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
913
|
+
try {
|
|
914
|
+
const tables = await this.db.tableNames();
|
|
915
|
+
if (tables.includes(tableName)) {
|
|
916
|
+
const table = await this.db.openTable(tableName);
|
|
917
|
+
await table.add(allData);
|
|
918
|
+
} else {
|
|
919
|
+
await this.db.createTable(tableName, allData);
|
|
920
|
+
}
|
|
921
|
+
} catch (e) {
|
|
922
|
+
if (DEBUG) console.log(`[vectorizer] LanceDB store failed: ${(e as Error).message}`);
|
|
923
|
+
throw e; // Re-throw — caller (indexAll) will catch and log
|
|
813
924
|
}
|
|
814
925
|
|
|
815
926
|
// Update hashes + mark vectorized in ChunkStore
|
|
@@ -853,8 +964,8 @@ class CodebaseIndexer {
|
|
|
853
964
|
// Clean content before chunking
|
|
854
965
|
const cleaned = cleanContent(content, fileMeta.file_type, CLEANING_CONFIG);
|
|
855
966
|
|
|
856
|
-
// Semantic chunking
|
|
857
|
-
const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
|
|
967
|
+
// Semantic chunking (await for LSP-based chunking when filePath is provided)
|
|
968
|
+
const chunks = await chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG, filePath, this.root);
|
|
858
969
|
|
|
859
970
|
// v3: Assign chunk IDs for graph tracking (works without graph — just adds IDs)
|
|
860
971
|
const chunksWithIds = this.graphBuilder
|
|
@@ -893,12 +1004,22 @@ class CodebaseIndexer {
|
|
|
893
1004
|
|
|
894
1005
|
const data = [];
|
|
895
1006
|
for (let i = 0; i < chunksWithIds.length; i++) {
|
|
896
|
-
const
|
|
1007
|
+
const chunk = chunksWithIds[i];
|
|
1008
|
+
// Build metadata prefix for embedding enrichment (same as prepareFile)
|
|
1009
|
+
const metaParts: string[] = [];
|
|
1010
|
+
if (relPath) metaParts.push(`File: ${relPath}`);
|
|
1011
|
+
if (chunk.class_name) metaParts.push(`Class: ${chunk.class_name}`);
|
|
1012
|
+
if (chunk.function_name) metaParts.push(`Method: ${chunk.function_name}`);
|
|
1013
|
+
if (chunk.heading_context) metaParts.push(`Section: ${chunk.heading_context}`);
|
|
1014
|
+
const metaPrefix = metaParts.length > 0 ? `// ${metaParts.join(" | ")}\n` : "";
|
|
1015
|
+
const textToEmbed = metaPrefix + chunk.content;
|
|
1016
|
+
|
|
1017
|
+
const embedding = await this.embed(textToEmbed);
|
|
897
1018
|
data.push({
|
|
898
|
-
chunk_id:
|
|
1019
|
+
chunk_id: chunk.chunk_id,
|
|
899
1020
|
file: relPath,
|
|
900
1021
|
chunk_index: i,
|
|
901
|
-
content:
|
|
1022
|
+
content: chunk.content,
|
|
902
1023
|
vector: embedding,
|
|
903
1024
|
archived: archived,
|
|
904
1025
|
// v2 metadata
|
|
@@ -906,13 +1027,13 @@ class CodebaseIndexer {
|
|
|
906
1027
|
language: fileMeta.language,
|
|
907
1028
|
last_modified: fileMeta.last_modified,
|
|
908
1029
|
file_size: fileMeta.file_size,
|
|
909
|
-
heading_context:
|
|
910
|
-
function_name:
|
|
911
|
-
class_name:
|
|
1030
|
+
heading_context: chunk.heading_context || "",
|
|
1031
|
+
function_name: chunk.function_name || "",
|
|
1032
|
+
class_name: chunk.class_name || "",
|
|
912
1033
|
tags: (fileMeta.tags || []).join(","),
|
|
913
1034
|
// Line numbers for "from-to" extraction (default to -1 when unknown)
|
|
914
|
-
start_line:
|
|
915
|
-
end_line:
|
|
1035
|
+
start_line: chunk.start_line ?? -1,
|
|
1036
|
+
end_line: chunk.end_line ?? -1,
|
|
916
1037
|
});
|
|
917
1038
|
}
|
|
918
1039
|
|
|
@@ -1087,7 +1208,13 @@ class CodebaseIndexer {
|
|
|
1087
1208
|
content: neighborChunk.content,
|
|
1088
1209
|
relation: edge.predicate,
|
|
1089
1210
|
score,
|
|
1090
|
-
via: edge.source
|
|
1211
|
+
via: edge.source,
|
|
1212
|
+
start_line: neighborChunk.start_line,
|
|
1213
|
+
end_line: neighborChunk.end_line,
|
|
1214
|
+
chunk_index: neighborChunk.chunk_index,
|
|
1215
|
+
language: neighborChunk.language,
|
|
1216
|
+
function_name: neighborChunk.function_name,
|
|
1217
|
+
class_name: neighborChunk.class_name,
|
|
1091
1218
|
});
|
|
1092
1219
|
}
|
|
1093
1220
|
|
|
@@ -1121,9 +1248,9 @@ class CodebaseIndexer {
|
|
|
1121
1248
|
}
|
|
1122
1249
|
}
|
|
1123
1250
|
|
|
1124
|
-
// ──
|
|
1251
|
+
// ── Single-query search (internal — used by search() for each sub-query) ──
|
|
1125
1252
|
|
|
1126
|
-
async
|
|
1253
|
+
async _searchSingle(query, limit = 5, includeArchived = false, options = {}) {
|
|
1127
1254
|
const tableName = "chunks";
|
|
1128
1255
|
const tables = await this.db.tableNames();
|
|
1129
1256
|
|
|
@@ -1178,14 +1305,9 @@ class CodebaseIndexer {
|
|
|
1178
1305
|
}
|
|
1179
1306
|
}
|
|
1180
1307
|
|
|
1181
|
-
// Apply metadata filters then return
|
|
1308
|
+
// Apply metadata filters then return
|
|
1182
1309
|
results = this._applyMetadataFilters(results, includeArchived, options);
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
// Graph context expansion (same as vector path)
|
|
1186
|
-
await this._expandGraphContext(finalResults, null, query);
|
|
1187
|
-
|
|
1188
|
-
return finalResults;
|
|
1310
|
+
return results.slice(0, limit);
|
|
1189
1311
|
}
|
|
1190
1312
|
|
|
1191
1313
|
// ── Vector search (Phase 2 complete) ─────────────────────────────────────
|
|
@@ -1280,7 +1402,51 @@ class CodebaseIndexer {
|
|
|
1280
1402
|
|
|
1281
1403
|
// ── Metadata filters ──────────────────────────────────────────────────
|
|
1282
1404
|
results = this._applyMetadataFilters(results, includeArchived, options);
|
|
1283
|
-
|
|
1405
|
+
return results.slice(0, limit);
|
|
1406
|
+
}
|
|
1407
|
+
|
|
1408
|
+
// ── Search (v4: query decomposition + RRF merge + hybrid + metrics) ────────
|
|
1409
|
+
|
|
1410
|
+
async search(query, limit = 5, includeArchived = false, options = {}) {
|
|
1411
|
+
// ── Query decomposition ──────────────────────────────────────────────────
|
|
1412
|
+
const decomposition = decomposeQuery(query, DECOMPOSER_CONFIG);
|
|
1413
|
+
|
|
1414
|
+
let finalResults;
|
|
1415
|
+
|
|
1416
|
+
if (decomposition.decomposed && decomposition.subQueries.length > 1) {
|
|
1417
|
+
if (DEBUG) {
|
|
1418
|
+
console.log(`[vectorizer] Query decomposed (${decomposition.strategy}): ${decomposition.subQueries.length} sub-queries`);
|
|
1419
|
+
for (const sq of decomposition.subQueries) {
|
|
1420
|
+
console.log(` → "${sq}"`);
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1424
|
+
// Run each sub-query independently, over-fetch to give RRF more signal
|
|
1425
|
+
const perQueryLimit = Math.max(limit * 2, 20);
|
|
1426
|
+
const resultSets = [];
|
|
1427
|
+
|
|
1428
|
+
for (const subQuery of decomposition.subQueries) {
|
|
1429
|
+
const results = await this._searchSingle(subQuery, perQueryLimit, includeArchived, options);
|
|
1430
|
+
if (results.length > 0) {
|
|
1431
|
+
resultSets.push(results);
|
|
1432
|
+
}
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
if (resultSets.length === 0) {
|
|
1436
|
+
finalResults = [];
|
|
1437
|
+
} else if (resultSets.length === 1) {
|
|
1438
|
+
finalResults = resultSets[0].slice(0, limit);
|
|
1439
|
+
} else {
|
|
1440
|
+
// RRF merge across sub-query result sets
|
|
1441
|
+
finalResults = rrfMerge(resultSets, 60, limit);
|
|
1442
|
+
if (DEBUG) {
|
|
1443
|
+
console.log(`[vectorizer] RRF merged ${resultSets.length} result sets → ${finalResults.length} results`);
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
} else {
|
|
1447
|
+
// Short/simple query — single search (no decomposition overhead)
|
|
1448
|
+
finalResults = await this._searchSingle(query, limit, includeArchived, options);
|
|
1449
|
+
}
|
|
1284
1450
|
|
|
1285
1451
|
// ── Metrics tracking ────────────────────────────────────────────────────
|
|
1286
1452
|
if (METRICS_ENABLED) {
|
|
@@ -1304,75 +1470,107 @@ class CodebaseIndexer {
|
|
|
1304
1470
|
}
|
|
1305
1471
|
|
|
1306
1472
|
// ── Graph context expansion (v3) ───────────────────────────────────────
|
|
1473
|
+
// Use original query for graph expansion (most complete context)
|
|
1474
|
+
const queryEmbedding = finalResults.length > 0 ? await this.embedQuery(query).catch(() => null) : null;
|
|
1307
1475
|
await this._expandGraphContext(finalResults, queryEmbedding, query);
|
|
1308
1476
|
|
|
1309
1477
|
return finalResults;
|
|
1310
1478
|
}
|
|
1311
1479
|
|
|
1312
1480
|
async findChunkById(chunkId) {
|
|
1313
|
-
//
|
|
1314
|
-
//
|
|
1481
|
+
// LRU cache with bounded size — avoids loading all 100K rows into memory.
|
|
1482
|
+
// Point lookups: ChunkStore (SQLite) first, LanceDB fallback for vectors.
|
|
1315
1483
|
if (!this._chunkCache) {
|
|
1316
1484
|
this._chunkCache = new Map();
|
|
1485
|
+
}
|
|
1317
1486
|
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1487
|
+
// Check LRU cache first
|
|
1488
|
+
const cached = this._chunkCache.get(chunkId);
|
|
1489
|
+
if (cached) {
|
|
1490
|
+
// Move to end (most recently used) — delete & re-insert
|
|
1491
|
+
this._chunkCache.delete(chunkId);
|
|
1492
|
+
this._chunkCache.set(chunkId, cached);
|
|
1493
|
+
return cached;
|
|
1494
|
+
}
|
|
1495
|
+
|
|
1496
|
+
// Point lookup: ChunkStore (SQLite — fast, no vectors)
|
|
1497
|
+
let chunk = null;
|
|
1498
|
+
if (this.chunkStore) {
|
|
1499
|
+
try {
|
|
1500
|
+
chunk = this.chunkStore.getChunkById(chunkId);
|
|
1501
|
+
} catch {
|
|
1502
|
+
// best effort
|
|
1503
|
+
}
|
|
1504
|
+
}
|
|
1505
|
+
|
|
1506
|
+
// If we need vectors (for cosine similarity in graph expansion),
|
|
1507
|
+
// try LanceDB point lookup. Only if ChunkStore had no result or we need vectors.
|
|
1508
|
+
if (!chunk) {
|
|
1509
|
+
try {
|
|
1510
|
+
const tableName = "chunks";
|
|
1511
|
+
const tables = await this.db.tableNames();
|
|
1512
|
+
if (tables.includes(tableName)) {
|
|
1323
1513
|
const table = await this.db.openTable(tableName);
|
|
1324
|
-
const rows = await table.filter("
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
this._chunkCache.set(row.chunk_id, row);
|
|
1328
|
-
}
|
|
1514
|
+
const rows = await table.filter(`chunk_id = '${chunkId.replace(/'/g, "''")}'`).limit(1).execute();
|
|
1515
|
+
if (rows.length > 0) {
|
|
1516
|
+
chunk = rows[0];
|
|
1329
1517
|
}
|
|
1330
|
-
} catch (e) {
|
|
1331
|
-
if (DEBUG) console.log("[vectorizer] Chunk cache from LanceDB failed:", e.message);
|
|
1332
1518
|
}
|
|
1519
|
+
} catch (e) {
|
|
1520
|
+
if (DEBUG) console.log("[vectorizer] LanceDB point lookup failed:", e.message);
|
|
1333
1521
|
}
|
|
1522
|
+
}
|
|
1334
1523
|
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
}
|
|
1343
|
-
}
|
|
1344
|
-
if (DEBUG && allChunks.length > 0) {
|
|
1345
|
-
console.log(`[vectorizer] Chunk cache from ChunkStore (${allChunks.length} chunks, no vectors)`);
|
|
1346
|
-
}
|
|
1347
|
-
} catch (e) {
|
|
1348
|
-
if (DEBUG) console.log("[vectorizer] Chunk cache from ChunkStore failed:", e.message);
|
|
1524
|
+
if (chunk) {
|
|
1525
|
+
// LRU eviction: cap at 500 entries (each ~2KB without vector, ~4KB with)
|
|
1526
|
+
const MAX_CHUNK_CACHE = 500;
|
|
1527
|
+
if (this._chunkCache.size >= MAX_CHUNK_CACHE) {
|
|
1528
|
+
const oldest = this._chunkCache.keys().next().value;
|
|
1529
|
+
if (oldest !== undefined) {
|
|
1530
|
+
this._chunkCache.delete(oldest);
|
|
1349
1531
|
}
|
|
1350
1532
|
}
|
|
1533
|
+
this._chunkCache.set(chunkId, chunk);
|
|
1351
1534
|
}
|
|
1352
|
-
|
|
1535
|
+
|
|
1536
|
+
return chunk || null;
|
|
1353
1537
|
}
|
|
1354
1538
|
|
|
1355
1539
|
/**
|
|
1356
1540
|
* Find all chunks belonging to a specific file path.
|
|
1541
|
+
* Uses ChunkStore (SQLite) for efficient file-level queries — no full cache load.
|
|
1357
1542
|
* @param {string} filePath - Relative file path (e.g. "src/auth.ts")
|
|
1358
1543
|
* @returns {Promise<Array>} Array of chunks from this file
|
|
1359
1544
|
*/
|
|
1360
1545
|
async findChunksByPath(filePath) {
|
|
1361
|
-
//
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1546
|
+
// Primary: ChunkStore has an index on file column — fast
|
|
1547
|
+
if (this.chunkStore) {
|
|
1548
|
+
try {
|
|
1549
|
+
const chunks = this.chunkStore.getChunksByFile(filePath);
|
|
1550
|
+
if (chunks.length > 0) {
|
|
1551
|
+
chunks.sort((a, b) => (a.chunk_index || 0) - (b.chunk_index || 0));
|
|
1552
|
+
return chunks;
|
|
1553
|
+
}
|
|
1554
|
+
} catch {
|
|
1555
|
+
// fallback below
|
|
1370
1556
|
}
|
|
1371
1557
|
}
|
|
1372
|
-
|
|
1373
|
-
//
|
|
1374
|
-
|
|
1375
|
-
|
|
1558
|
+
|
|
1559
|
+
// Fallback: LanceDB filter by file
|
|
1560
|
+
try {
|
|
1561
|
+
const tableName = "chunks";
|
|
1562
|
+
const tables = await this.db.tableNames();
|
|
1563
|
+
if (tables.includes(tableName)) {
|
|
1564
|
+
const table = await this.db.openTable(tableName);
|
|
1565
|
+
const rows = await table.filter(`file = '${filePath.replace(/'/g, "''")}'`).limit(1000).execute();
|
|
1566
|
+
rows.sort((a, b) => (a.chunk_index || 0) - (b.chunk_index || 0));
|
|
1567
|
+
return rows;
|
|
1568
|
+
}
|
|
1569
|
+
} catch (e) {
|
|
1570
|
+
if (DEBUG) console.log("[vectorizer] findChunksByPath LanceDB failed:", e.message);
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1573
|
+
return [];
|
|
1376
1574
|
}
|
|
1377
1575
|
|
|
1378
1576
|
cosineSimilarity(vecA, vecB) {
|
|
@@ -1384,7 +1582,8 @@ class CodebaseIndexer {
|
|
|
1384
1582
|
normA += vecA[i] * vecA[i];
|
|
1385
1583
|
normB += vecB[i] * vecB[i];
|
|
1386
1584
|
}
|
|
1387
|
-
|
|
1585
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
1586
|
+
return denom === 0 ? 0 : dotProduct / denom;
|
|
1388
1587
|
}
|
|
1389
1588
|
|
|
1390
1589
|
async checkHealth(extraIgnore = []) {
|
|
@@ -1547,22 +1746,33 @@ class CodebaseIndexer {
|
|
|
1547
1746
|
const total = files.length;
|
|
1548
1747
|
const CONCURRENCY = 5;
|
|
1549
1748
|
|
|
1749
|
+
// Helper: write to indexer.log (always, not just DEBUG)
|
|
1750
|
+
const logPath = path.join(this.root, ".opencode", "indexer.log");
|
|
1751
|
+
const fsSync = await import("fs");
|
|
1752
|
+
const logToFile = (msg: string) => {
|
|
1753
|
+
const ts = new Date().toISOString().slice(11, 19);
|
|
1754
|
+
try { fsSync.appendFileSync(logPath, `${ts} ${msg}\n`); } catch { /* non-fatal */ }
|
|
1755
|
+
};
|
|
1756
|
+
|
|
1550
1757
|
// ══════════════════════════════════════════════════════════════════════════
|
|
1551
|
-
// Phase 1: Prepare files
|
|
1758
|
+
// Phase 1: Prepare files sequentially (chunk + graph, no embedding)
|
|
1552
1759
|
// ══════════════════════════════════════════════════════════════════════════
|
|
1760
|
+
logToFile(`Phase 1: preparing ${total} files (concurrency=${CONCURRENCY})`);
|
|
1761
|
+
|
|
1553
1762
|
const preparedFiles = [];
|
|
1554
1763
|
let prepared = 0;
|
|
1555
1764
|
let skipped = 0;
|
|
1765
|
+
let errors = 0;
|
|
1556
1766
|
|
|
1557
|
-
// Process in batches of CONCURRENCY
|
|
1558
1767
|
for (let i = 0; i < files.length; i += CONCURRENCY) {
|
|
1559
1768
|
const batch = files.slice(i, i + CONCURRENCY);
|
|
1560
1769
|
const promises = batch.map(async (relPath) => {
|
|
1561
1770
|
const filePath = path.join(this.root, relPath);
|
|
1562
1771
|
try {
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1772
|
+
return await this.prepareFile(filePath);
|
|
1773
|
+
} catch (e) {
|
|
1774
|
+
logToFile(`ERROR prepare ${relPath}: ${(e as Error).message}`);
|
|
1775
|
+
errors++;
|
|
1566
1776
|
return null;
|
|
1567
1777
|
}
|
|
1568
1778
|
});
|
|
@@ -1579,7 +1789,7 @@ class CodebaseIndexer {
|
|
|
1579
1789
|
}
|
|
1580
1790
|
}
|
|
1581
1791
|
|
|
1582
|
-
|
|
1792
|
+
logToFile(`Phase 1 done: ${prepared} prepared, ${skipped} skipped, ${errors} errors`);
|
|
1583
1793
|
|
|
1584
1794
|
// ══════════════════════════════════════════════════════════════════════════
|
|
1585
1795
|
// Phase 2: Batch embed + store (sequential, batch forward pass)
|
|
@@ -1587,19 +1797,24 @@ class CodebaseIndexer {
|
|
|
1587
1797
|
let chunksEmbedded = 0;
|
|
1588
1798
|
if (preparedFiles.length > 0) {
|
|
1589
1799
|
const totalChunks = preparedFiles.reduce((sum, pf) => sum + pf.rows.length, 0);
|
|
1590
|
-
|
|
1800
|
+
logToFile(`Phase 2: embedding ${totalChunks} chunks from ${preparedFiles.length} files`);
|
|
1591
1801
|
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1802
|
+
try {
|
|
1803
|
+
chunksEmbedded = await this.embedAndStore(preparedFiles, 32, (done, embedTotal, phase) => {
|
|
1804
|
+
if (onProgress) onProgress(done, embedTotal, `embedding`, done, "embed");
|
|
1805
|
+
});
|
|
1806
|
+
logToFile(`Phase 2 done: ${chunksEmbedded} chunks embedded and stored`);
|
|
1807
|
+
} catch (e) {
|
|
1808
|
+
logToFile(`Phase 2 FAILED: ${(e as Error).message}\n${(e as Error).stack || ""}`);
|
|
1809
|
+
throw e;
|
|
1810
|
+
}
|
|
1811
|
+
} else {
|
|
1812
|
+
logToFile(`Phase 2 skipped: no prepared files`);
|
|
1597
1813
|
}
|
|
1598
1814
|
|
|
1599
|
-
const indexed = prepared;
|
|
1815
|
+
const indexed = prepared;
|
|
1600
1816
|
|
|
1601
1817
|
// FR-005: Build semantic similarity edges as post-pass
|
|
1602
|
-
// Disabled by default (O(n²) — slow on large repos). Enable via graph.semantic_edges: true
|
|
1603
1818
|
let semanticEdges = 0;
|
|
1604
1819
|
if (chunksEmbedded > 0 && this.graphBuilder && this.graphDB && GRAPH_CONFIG.semantic_edges) {
|
|
1605
1820
|
try {
|
|
@@ -1612,30 +1827,25 @@ class CodebaseIndexer {
|
|
|
1612
1827
|
.filter(r => r.chunk_id && r.vector)
|
|
1613
1828
|
.map(r => ({ chunk_id: r.chunk_id, vector: Array.from(r.vector), file: r.file }));
|
|
1614
1829
|
|
|
1615
|
-
// Skip if too many chunks — O(n²) becomes prohibitive
|
|
1616
1830
|
const maxChunks = GRAPH_CONFIG.semantic_edges_max_chunks ?? 500;
|
|
1617
1831
|
if (chunkData.length > maxChunks) {
|
|
1618
|
-
|
|
1832
|
+
logToFile(`Semantic edges skipped: ${chunkData.length} chunks > max ${maxChunks}`);
|
|
1619
1833
|
} else {
|
|
1620
1834
|
semanticEdges = await this.graphBuilder.buildSemanticEdges(chunkData, 0.8, 3);
|
|
1621
|
-
|
|
1835
|
+
logToFile(`Semantic edges: ${semanticEdges} built`);
|
|
1622
1836
|
}
|
|
1623
1837
|
}
|
|
1624
1838
|
} catch (e) {
|
|
1625
|
-
|
|
1626
|
-
// non-fatal — explicit edges still work
|
|
1839
|
+
logToFile(`Semantic edges FAILED: ${(e as Error).message}`);
|
|
1627
1840
|
}
|
|
1628
1841
|
}
|
|
1629
1842
|
|
|
1630
|
-
// Cleanup
|
|
1843
|
+
// Cleanup LSP
|
|
1631
1844
|
if (this.graphBuilder) {
|
|
1632
|
-
try {
|
|
1633
|
-
await this.graphBuilder.cleanup();
|
|
1634
|
-
} catch {
|
|
1635
|
-
// Best effort — continue even if cleanup fails
|
|
1636
|
-
}
|
|
1845
|
+
try { await this.graphBuilder.cleanup(); } catch { /* best effort */ }
|
|
1637
1846
|
}
|
|
1638
1847
|
|
|
1848
|
+
logToFile(`indexAll complete: ${indexed} indexed, ${chunksEmbedded} embedded, ${semanticEdges} semantic edges`);
|
|
1639
1849
|
return { indexed, skipped, total, semanticEdges };
|
|
1640
1850
|
}
|
|
1641
1851
|
|
|
@@ -1699,9 +1909,13 @@ class CodebaseIndexer {
|
|
|
1699
1909
|
if (entry.isDirectory() && entry.name !== "lancedb") {
|
|
1700
1910
|
try {
|
|
1701
1911
|
const indexer = await new CodebaseIndexer(this.root, entry.name).init();
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1912
|
+
try {
|
|
1913
|
+
const stat = await indexer.getStats();
|
|
1914
|
+
if (stat.fileCount > 0 || stat.chunkCount > 0) {
|
|
1915
|
+
stats.push(stat);
|
|
1916
|
+
}
|
|
1917
|
+
} finally {
|
|
1918
|
+
await indexer.unloadModel();
|
|
1705
1919
|
}
|
|
1706
1920
|
} catch {}
|
|
1707
1921
|
}
|
|
@@ -1711,20 +1925,38 @@ class CodebaseIndexer {
|
|
|
1711
1925
|
}
|
|
1712
1926
|
|
|
1713
1927
|
async clear() {
|
|
1928
|
+
// Close open DB handles before deleting their files
|
|
1929
|
+
if (this.chunkStore) { try { this.chunkStore.close(); } catch { /* best effort */ } this.chunkStore = null; }
|
|
1930
|
+
if (this.graphBuilder) { try { await this.graphBuilder.cleanup(); } catch { /* best effort */ } this.graphBuilder = null; }
|
|
1931
|
+
if (this.graphDB) { try { await this.graphDB.close(); } catch { /* best effort */ } this.graphDB = null; }
|
|
1932
|
+
if (this.usageTracker) { try { await this.usageTracker.save(); } catch { /* best effort */ } this.usageTracker = null; }
|
|
1933
|
+
this.db = null;
|
|
1934
|
+
|
|
1714
1935
|
await fs.rm(this.cacheDir, { recursive: true, force: true });
|
|
1715
1936
|
this.hashes = {};
|
|
1716
1937
|
if (this.bm25) { this.bm25.clear(); this.bm25 = null; }
|
|
1717
1938
|
this._bm25Rows = null;
|
|
1939
|
+
this._chunkCache = null;
|
|
1718
1940
|
this.metrics = null;
|
|
1941
|
+
this.model = null;
|
|
1719
1942
|
await this.init();
|
|
1720
1943
|
}
|
|
1721
1944
|
|
|
1722
1945
|
async clearAll() {
|
|
1946
|
+
// Close open DB handles before deleting their files
|
|
1947
|
+
if (this.chunkStore) { try { this.chunkStore.close(); } catch { /* best effort */ } this.chunkStore = null; }
|
|
1948
|
+
if (this.graphBuilder) { try { await this.graphBuilder.cleanup(); } catch { /* best effort */ } this.graphBuilder = null; }
|
|
1949
|
+
if (this.graphDB) { try { await this.graphDB.close(); } catch { /* best effort */ } this.graphDB = null; }
|
|
1950
|
+
if (this.usageTracker) { try { await this.usageTracker.save(); } catch { /* best effort */ } this.usageTracker = null; }
|
|
1951
|
+
this.db = null;
|
|
1952
|
+
|
|
1723
1953
|
await fs.rm(this.baseDir, { recursive: true, force: true });
|
|
1724
1954
|
this.hashes = {};
|
|
1725
1955
|
if (this.bm25) { this.bm25.clear(); this.bm25 = null; }
|
|
1726
1956
|
this._bm25Rows = null;
|
|
1957
|
+
this._chunkCache = null;
|
|
1727
1958
|
this.metrics = null;
|
|
1959
|
+
this.model = null;
|
|
1728
1960
|
clearQueryCache();
|
|
1729
1961
|
await this.init();
|
|
1730
1962
|
}
|
|
@@ -1768,7 +2000,9 @@ function getWorkspaceConfig() {
|
|
|
1768
2000
|
// ── Singleton indexer pool ──────────────────────────────────────────────────
|
|
1769
2001
|
// Prevents LevelDB lock conflicts when parallel searches hit the same index.
|
|
1770
2002
|
// Each unique (projectRoot, indexName) gets one shared CodebaseIndexer.
|
|
1771
|
-
|
|
2003
|
+
// TTL eviction: idle indexers are unloaded after POOL_IDLE_TTL_MS to free memory.
|
|
2004
|
+
const POOL_IDLE_TTL_MS = 5 * 60 * 1000; // 5 minutes idle → unload model + caches
|
|
2005
|
+
const _indexerPool = new Map<string, { indexer: CodebaseIndexer; refCount: number; initPromise: Promise<CodebaseIndexer>; idleTimer?: ReturnType<typeof setTimeout> }>();
|
|
1772
2006
|
|
|
1773
2007
|
/**
|
|
1774
2008
|
* Get or create a shared CodebaseIndexer for the given project + index.
|
|
@@ -1787,6 +2021,11 @@ async function getIndexer(projectRoot: string, indexName: string = "code"): Prom
|
|
|
1787
2021
|
const existing = _indexerPool.get(key);
|
|
1788
2022
|
if (existing) {
|
|
1789
2023
|
existing.refCount++;
|
|
2024
|
+
// Cancel pending idle eviction — someone is using it again
|
|
2025
|
+
if (existing.idleTimer) {
|
|
2026
|
+
clearTimeout(existing.idleTimer);
|
|
2027
|
+
existing.idleTimer = undefined;
|
|
2028
|
+
}
|
|
1790
2029
|
return existing.initPromise;
|
|
1791
2030
|
}
|
|
1792
2031
|
|
|
@@ -1798,15 +2037,35 @@ async function getIndexer(projectRoot: string, indexName: string = "code"): Prom
|
|
|
1798
2037
|
|
|
1799
2038
|
/**
|
|
1800
2039
|
* Release a reference to a shared indexer. When refCount reaches 0,
|
|
1801
|
-
*
|
|
1802
|
-
*
|
|
2040
|
+
* starts a TTL timer. If no one reuses within POOL_IDLE_TTL_MS,
|
|
2041
|
+
* the indexer is fully unloaded (model disposed, caches freed, DB closed).
|
|
1803
2042
|
*/
|
|
1804
2043
|
function releaseIndexer(projectRoot: string, indexName: string = "code") {
|
|
1805
2044
|
const key = `${projectRoot}::${indexName}`;
|
|
1806
2045
|
const entry = _indexerPool.get(key);
|
|
1807
2046
|
if (!entry) return;
|
|
1808
2047
|
entry.refCount = Math.max(0, entry.refCount - 1);
|
|
1809
|
-
|
|
2048
|
+
|
|
2049
|
+
if (entry.refCount === 0) {
|
|
2050
|
+
// Start idle eviction timer — free memory if not reused soon
|
|
2051
|
+
if (entry.idleTimer) clearTimeout(entry.idleTimer);
|
|
2052
|
+
entry.idleTimer = setTimeout(async () => {
|
|
2053
|
+
// Double-check: still idle?
|
|
2054
|
+
const current = _indexerPool.get(key);
|
|
2055
|
+
if (!current || current.refCount > 0) return;
|
|
2056
|
+
_indexerPool.delete(key);
|
|
2057
|
+
try {
|
|
2058
|
+
const indexer = await current.initPromise;
|
|
2059
|
+
await indexer.unloadModel();
|
|
2060
|
+
} catch {
|
|
2061
|
+
// best effort
|
|
2062
|
+
}
|
|
2063
|
+
}, POOL_IDLE_TTL_MS);
|
|
2064
|
+
// Don't keep process alive for idle eviction
|
|
2065
|
+
if (entry.idleTimer && typeof entry.idleTimer === "object" && "unref" in entry.idleTimer) {
|
|
2066
|
+
(entry.idleTimer as NodeJS.Timeout).unref();
|
|
2067
|
+
}
|
|
2068
|
+
}
|
|
1810
2069
|
}
|
|
1811
2070
|
|
|
1812
2071
|
/**
|
|
@@ -1817,6 +2076,7 @@ async function destroyIndexer(projectRoot: string, indexName: string = "code") {
|
|
|
1817
2076
|
const key = `${projectRoot}::${indexName}`;
|
|
1818
2077
|
const entry = _indexerPool.get(key);
|
|
1819
2078
|
if (!entry) return;
|
|
2079
|
+
if (entry.idleTimer) clearTimeout(entry.idleTimer);
|
|
1820
2080
|
_indexerPool.delete(key);
|
|
1821
2081
|
try {
|
|
1822
2082
|
const indexer = await entry.initPromise;
|
|
@@ -1826,4 +2086,8 @@ async function destroyIndexer(projectRoot: string, indexName: string = "code") {
|
|
|
1826
2086
|
}
|
|
1827
2087
|
}
|
|
1828
2088
|
|
|
1829
|
-
|
|
2089
|
+
function getDecomposerConfig() {
|
|
2090
|
+
return DECOMPOSER_CONFIG;
|
|
2091
|
+
}
|
|
2092
|
+
|
|
2093
|
+
export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig, getWorkspaceConfig, getDecomposerConfig, getIndexer, releaseIndexer, destroyIndexer, disposeSharedModel };
|