@dreb/coding-agent 1.18.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/tools/search.d.ts.map +1 -1
- package/dist/core/tools/search.js +14 -36
- package/dist/core/tools/search.js.map +1 -1
- package/package.json +2 -1
- package/dist/core/search/chunker.d.ts +0 -21
- package/dist/core/search/chunker.d.ts.map +0 -1
- package/dist/core/search/chunker.js +0 -51
- package/dist/core/search/chunker.js.map +0 -1
- package/dist/core/search/db.d.ts +0 -89
- package/dist/core/search/db.d.ts.map +0 -1
- package/dist/core/search/db.js +0 -406
- package/dist/core/search/db.js.map +0 -1
- package/dist/core/search/embedder.d.ts +0 -52
- package/dist/core/search/embedder.d.ts.map +0 -1
- package/dist/core/search/embedder.js +0 -158
- package/dist/core/search/embedder.js.map +0 -1
- package/dist/core/search/index-manager.d.ts +0 -55
- package/dist/core/search/index-manager.d.ts.map +0 -1
- package/dist/core/search/index-manager.js +0 -311
- package/dist/core/search/index-manager.js.map +0 -1
- package/dist/core/search/metrics/bm25.d.ts +0 -10
- package/dist/core/search/metrics/bm25.d.ts.map +0 -1
- package/dist/core/search/metrics/bm25.js +0 -32
- package/dist/core/search/metrics/bm25.js.map +0 -1
- package/dist/core/search/metrics/git-recency.d.ts +0 -14
- package/dist/core/search/metrics/git-recency.d.ts.map +0 -1
- package/dist/core/search/metrics/git-recency.js +0 -123
- package/dist/core/search/metrics/git-recency.js.map +0 -1
- package/dist/core/search/metrics/import-graph.d.ts +0 -15
- package/dist/core/search/metrics/import-graph.d.ts.map +0 -1
- package/dist/core/search/metrics/import-graph.js +0 -115
- package/dist/core/search/metrics/import-graph.js.map +0 -1
- package/dist/core/search/metrics/path-match.d.ts +0 -13
- package/dist/core/search/metrics/path-match.d.ts.map +0 -1
- package/dist/core/search/metrics/path-match.js +0 -54
- package/dist/core/search/metrics/path-match.js.map +0 -1
- package/dist/core/search/metrics/symbol-match.d.ts +0 -12
- package/dist/core/search/metrics/symbol-match.d.ts.map +0 -1
- package/dist/core/search/metrics/symbol-match.js +0 -62
- package/dist/core/search/metrics/symbol-match.js.map +0 -1
- package/dist/core/search/metrics/tokenize.d.ts +0 -12
- package/dist/core/search/metrics/tokenize.d.ts.map +0 -1
- package/dist/core/search/metrics/tokenize.js +0 -29
- package/dist/core/search/metrics/tokenize.js.map +0 -1
- package/dist/core/search/poem.d.ts +0 -38
- package/dist/core/search/poem.d.ts.map +0 -1
- package/dist/core/search/poem.js +0 -214
- package/dist/core/search/poem.js.map +0 -1
- package/dist/core/search/query-classifier.d.ts +0 -17
- package/dist/core/search/query-classifier.d.ts.map +0 -1
- package/dist/core/search/query-classifier.js +0 -54
- package/dist/core/search/query-classifier.js.map +0 -1
- package/dist/core/search/scanner.d.ts +0 -30
- package/dist/core/search/scanner.d.ts.map +0 -1
- package/dist/core/search/scanner.js +0 -344
- package/dist/core/search/scanner.js.map +0 -1
- package/dist/core/search/search.d.ts +0 -51
- package/dist/core/search/search.d.ts.map +0 -1
- package/dist/core/search/search.js +0 -381
- package/dist/core/search/search.js.map +0 -1
- package/dist/core/search/text-chunker.d.ts +0 -15
- package/dist/core/search/text-chunker.d.ts.map +0 -1
- package/dist/core/search/text-chunker.js +0 -580
- package/dist/core/search/text-chunker.js.map +0 -1
- package/dist/core/search/tree-sitter-chunker.d.ts +0 -25
- package/dist/core/search/tree-sitter-chunker.d.ts.map +0 -1
- package/dist/core/search/tree-sitter-chunker.js +0 -357
- package/dist/core/search/tree-sitter-chunker.js.map +0 -1
- package/dist/core/search/types.d.ts +0 -96
- package/dist/core/search/types.d.ts.map +0 -1
- package/dist/core/search/types.js +0 -6
- package/dist/core/search/types.js.map +0 -1
- package/dist/core/search/vector-store.d.ts +0 -43
- package/dist/core/search/vector-store.d.ts.map +0 -1
- package/dist/core/search/vector-store.js +0 -73
- package/dist/core/search/vector-store.js.map +0 -1
|
@@ -1,381 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Main search API.
|
|
3
|
-
*
|
|
4
|
-
* Orchestrates: check/build index → compute all 6 metrics → classify query
|
|
5
|
-
* → duplicate columns → POEM rank → assemble results.
|
|
6
|
-
*/
|
|
7
|
-
import { existsSync, unlinkSync } from "node:fs";
|
|
8
|
-
import { homedir } from "node:os";
|
|
9
|
-
import path from "node:path";
|
|
10
|
-
import { Embedder } from "./embedder.js";
|
|
11
|
-
import { IndexManager } from "./index-manager.js";
|
|
12
|
-
import { computeBm25Scores } from "./metrics/bm25.js";
|
|
13
|
-
import { computeGitRecencyScores } from "./metrics/git-recency.js";
|
|
14
|
-
import { computeImportGraphScores } from "./metrics/import-graph.js";
|
|
15
|
-
import { computePathMatchScores } from "./metrics/path-match.js";
|
|
16
|
-
import { computeSymbolMatchScores } from "./metrics/symbol-match.js";
|
|
17
|
-
import { poemRank } from "./poem.js";
|
|
18
|
-
import { classifyQuery } from "./query-classifier.js";
|
|
19
|
-
import { topKSimilar } from "./vector-store.js";
|
|
20
|
-
// ============================================================================
|
|
21
|
-
// Constants
|
|
22
|
-
// ============================================================================
|
|
23
|
-
const DEFAULT_MODEL_NAME = "Xenova/all-MiniLM-L6-v2";
|
|
24
|
-
const DEFAULT_RESULT_LIMIT = 20;
|
|
25
|
-
const METRIC_CANDIDATE_LIMIT = 1000;
|
|
26
|
-
// ============================================================================
|
|
27
|
-
// Search Engine
|
|
28
|
-
// ============================================================================
|
|
29
|
-
export class SearchEngine {
|
|
30
|
-
projectRoot;
|
|
31
|
-
indexManager = null;
|
|
32
|
-
embedderPromise = null;
|
|
33
|
-
searchQueue = Promise.resolve();
|
|
34
|
-
constructor(projectRoot) {
|
|
35
|
-
this.projectRoot = projectRoot;
|
|
36
|
-
}
|
|
37
|
-
/** Check if semantic search is available (requires node:sqlite). */
|
|
38
|
-
static isAvailable() {
|
|
39
|
-
return IndexManager.isAvailable();
|
|
40
|
-
}
|
|
41
|
-
/**
|
|
42
|
-
* Search the codebase with a natural language or identifier query.
|
|
43
|
-
*
|
|
44
|
-
* On first call, builds the index (scans, chunks, embeds). Subsequent calls
|
|
45
|
-
* incrementally update changed files before searching.
|
|
46
|
-
*/
|
|
47
|
-
async search(query, options) {
|
|
48
|
-
// Chain through searchQueue so concurrent calls serialize
|
|
49
|
-
let resolve;
|
|
50
|
-
const gate = new Promise((r) => {
|
|
51
|
-
resolve = r;
|
|
52
|
-
});
|
|
53
|
-
const waitFor = this.searchQueue;
|
|
54
|
-
this.searchQueue = gate;
|
|
55
|
-
try {
|
|
56
|
-
await waitFor;
|
|
57
|
-
const limit = options?.limit ?? DEFAULT_RESULT_LIMIT;
|
|
58
|
-
const onProgress = options?.onProgress;
|
|
59
|
-
// Ensure index is built and up to date
|
|
60
|
-
const indexManager = this.getIndexManager();
|
|
61
|
-
const db = indexManager.getDb();
|
|
62
|
-
// Share our embedder with IndexManager so it doesn't create a second one
|
|
63
|
-
const embedder = await this.getOrCreateEmbedder();
|
|
64
|
-
indexManager.setEmbedder(embedder);
|
|
65
|
-
await indexManager.buildIndex(onProgress);
|
|
66
|
-
await indexManager.ensureEmbeddings(onProgress);
|
|
67
|
-
// Get all chunks (potentially filtered by path)
|
|
68
|
-
let allChunks = db.getAllChunks();
|
|
69
|
-
if (options?.pathFilter) {
|
|
70
|
-
const filter = options.pathFilter;
|
|
71
|
-
allChunks = allChunks.filter((c) => c.filePath.startsWith(filter));
|
|
72
|
-
}
|
|
73
|
-
if (allChunks.length === 0) {
|
|
74
|
-
return [];
|
|
75
|
-
}
|
|
76
|
-
// Classify query type for POEM column weighting
|
|
77
|
-
const queryType = classifyQuery(query);
|
|
78
|
-
// Compute all 6 metrics
|
|
79
|
-
onProgress?.("searching", 0, 6);
|
|
80
|
-
// 1. BM25 (FTS5)
|
|
81
|
-
const bm25Scores = computeBm25Scores(db, sanitizeFtsQuery(query), METRIC_CANDIDATE_LIMIT);
|
|
82
|
-
onProgress?.("searching", 1, 6);
|
|
83
|
-
// 2. Cosine similarity (vector search)
|
|
84
|
-
const cosineScores = await this.computeVectorScores(db, query, METRIC_CANDIDATE_LIMIT, onProgress);
|
|
85
|
-
onProgress?.("searching", 2, 6);
|
|
86
|
-
// 3. Path match
|
|
87
|
-
const pathScores = computePathMatchScores(query, allChunks);
|
|
88
|
-
onProgress?.("searching", 3, 6);
|
|
89
|
-
// 4. Symbol match
|
|
90
|
-
const symbols = db.getAllSymbols();
|
|
91
|
-
const symbolScores = computeSymbolMatchScores(query, symbols);
|
|
92
|
-
onProgress?.("searching", 4, 6);
|
|
93
|
-
// 5. Import graph (use BM25 + cosine as seed scores, aggregated per file)
|
|
94
|
-
// Only use files with strong scores as seeds — low-scoring files (e.g. from
|
|
95
|
-
// common OR terms matching everywhere) pollute the seed set and prevent
|
|
96
|
-
// meaningful propagation.
|
|
97
|
-
const fileSeedScores = aggregateFileScores(allChunks, bm25Scores, cosineScores);
|
|
98
|
-
const seedThreshold = computeSeedThreshold(fileSeedScores);
|
|
99
|
-
const filteredSeeds = new Map();
|
|
100
|
-
for (const [fileId, score] of fileSeedScores) {
|
|
101
|
-
if (score >= seedThreshold)
|
|
102
|
-
filteredSeeds.set(fileId, score);
|
|
103
|
-
}
|
|
104
|
-
const fileIdToChunkIds = buildFileChunkMap(allChunks);
|
|
105
|
-
const importScores = computeImportGraphScores(db, filteredSeeds, fileIdToChunkIds);
|
|
106
|
-
onProgress?.("searching", 5, 6);
|
|
107
|
-
// 6. Git recency
|
|
108
|
-
const recencyScores = await computeGitRecencyScores(this.projectRoot, allChunks);
|
|
109
|
-
onProgress?.("searching", 6, 6);
|
|
110
|
-
// Build MetricScores for each candidate chunk
|
|
111
|
-
const candidateIds = collectCandidateIds(bm25Scores, cosineScores, pathScores, symbolScores, importScores, recencyScores);
|
|
112
|
-
const candidates = new Map();
|
|
113
|
-
for (const id of candidateIds) {
|
|
114
|
-
candidates.set(id, {
|
|
115
|
-
bm25: bm25Scores.get(id) ?? 0,
|
|
116
|
-
cosine: cosineScores.get(id) ?? 0,
|
|
117
|
-
pathMatch: pathScores.get(id) ?? 0,
|
|
118
|
-
symbolMatch: symbolScores.get(id) ?? 0,
|
|
119
|
-
importGraph: importScores.get(id) ?? 0,
|
|
120
|
-
gitRecency: recencyScores.get(id) ?? 0,
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
if (candidates.size === 0) {
|
|
124
|
-
return [];
|
|
125
|
-
}
|
|
126
|
-
// POEM rank
|
|
127
|
-
const ranked = poemRank(candidates, queryType);
|
|
128
|
-
// Assemble results
|
|
129
|
-
const chunkMap = new Map();
|
|
130
|
-
for (const chunk of allChunks) {
|
|
131
|
-
chunkMap.set(chunk.id, chunk);
|
|
132
|
-
}
|
|
133
|
-
const results = [];
|
|
134
|
-
for (const candidate of ranked.slice(0, limit)) {
|
|
135
|
-
const chunk = chunkMap.get(candidate.id);
|
|
136
|
-
if (chunk) {
|
|
137
|
-
results.push({
|
|
138
|
-
chunk,
|
|
139
|
-
scores: candidate.scores,
|
|
140
|
-
rank: candidate.rank,
|
|
141
|
-
});
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
return results;
|
|
145
|
-
}
|
|
146
|
-
finally {
|
|
147
|
-
resolve();
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
/** Get index stats without opening a new connection. */
|
|
151
|
-
getStats() {
|
|
152
|
-
if (!this.indexManager)
|
|
153
|
-
return null;
|
|
154
|
-
return this.indexManager.getStats();
|
|
155
|
-
}
|
|
156
|
-
/**
|
|
157
|
-
* Reset the search index — delete the DB and close the IndexManager.
|
|
158
|
-
*
|
|
159
|
-
* Preserves the embedder (expensive ONNX model, unrelated to index state).
|
|
160
|
-
* The next `search()` call will lazily re-create the IndexManager and build
|
|
161
|
-
* a fresh index from scratch.
|
|
162
|
-
*/
|
|
163
|
-
resetIndex() {
|
|
164
|
-
// Close DB connection first (WAL mode may hold locks)
|
|
165
|
-
this.indexManager?.close();
|
|
166
|
-
this.indexManager = null;
|
|
167
|
-
// Delete the DB file
|
|
168
|
-
const dbPath = path.join(this.projectRoot, ".dreb", "index", "search.db");
|
|
169
|
-
if (existsSync(dbPath)) {
|
|
170
|
-
unlinkSync(dbPath);
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
/** Dispose resources. */
|
|
174
|
-
close() {
|
|
175
|
-
this.indexManager?.close();
|
|
176
|
-
this.indexManager = null;
|
|
177
|
-
// Dispose embedder if it was created
|
|
178
|
-
if (this.embedderPromise) {
|
|
179
|
-
this.embedderPromise.then((e) => e.dispose()).catch(() => { });
|
|
180
|
-
this.embedderPromise = null;
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
// ========================================================================
|
|
184
|
-
// Private
|
|
185
|
-
// ========================================================================
|
|
186
|
-
getIndexManager() {
|
|
187
|
-
if (!this.indexManager) {
|
|
188
|
-
const config = this.getIndexConfig();
|
|
189
|
-
this.indexManager = new IndexManager(config);
|
|
190
|
-
this.indexManager.open();
|
|
191
|
-
}
|
|
192
|
-
return this.indexManager;
|
|
193
|
-
}
|
|
194
|
-
getIndexConfig() {
|
|
195
|
-
return {
|
|
196
|
-
projectRoot: this.projectRoot,
|
|
197
|
-
indexDir: path.join(this.projectRoot, ".dreb", "index"),
|
|
198
|
-
globalMemoryDir: path.join(homedir(), ".dreb", "memory"),
|
|
199
|
-
modelName: DEFAULT_MODEL_NAME,
|
|
200
|
-
};
|
|
201
|
-
}
|
|
202
|
-
getOrCreateEmbedder() {
|
|
203
|
-
if (!this.embedderPromise) {
|
|
204
|
-
this.embedderPromise = (async () => {
|
|
205
|
-
try {
|
|
206
|
-
const config = this.getIndexConfig();
|
|
207
|
-
const embedder = new Embedder({
|
|
208
|
-
modelCacheDir: path.join(homedir(), ".dreb", "agent", "models"),
|
|
209
|
-
modelName: config.modelName,
|
|
210
|
-
});
|
|
211
|
-
await embedder.initialize();
|
|
212
|
-
return embedder;
|
|
213
|
-
}
|
|
214
|
-
catch (err) {
|
|
215
|
-
this.embedderPromise = null; // reset on failure for retry
|
|
216
|
-
throw err;
|
|
217
|
-
}
|
|
218
|
-
})();
|
|
219
|
-
}
|
|
220
|
-
return this.embedderPromise;
|
|
221
|
-
}
|
|
222
|
-
async computeVectorScores(db, query, limit, _onProgress) {
|
|
223
|
-
const config = this.getIndexConfig();
|
|
224
|
-
const embedder = await this.getOrCreateEmbedder();
|
|
225
|
-
// Embed the query
|
|
226
|
-
const queryVector = await embedder.embedQuery(query);
|
|
227
|
-
// Get all stored embeddings
|
|
228
|
-
const storedVectors = db.getAllEmbeddings(config.modelName);
|
|
229
|
-
if (storedVectors.size === 0) {
|
|
230
|
-
return new Map();
|
|
231
|
-
}
|
|
232
|
-
const topK = topKSimilar(queryVector, storedVectors, limit);
|
|
233
|
-
// Convert to Map, clamping negative similarities to 0
|
|
234
|
-
const scores = new Map();
|
|
235
|
-
for (const { id, score } of topK) {
|
|
236
|
-
scores.set(id, Math.max(0, score));
|
|
237
|
-
}
|
|
238
|
-
return scores;
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
// ============================================================================
|
|
242
|
-
// Helpers
|
|
243
|
-
// ============================================================================
|
|
244
|
-
/** Collect all unique chunk IDs that appear in any metric's results. */
|
|
245
|
-
function collectCandidateIds(...scoreMaps) {
|
|
246
|
-
const ids = new Set();
|
|
247
|
-
for (const map of scoreMaps) {
|
|
248
|
-
for (const id of map.keys()) {
|
|
249
|
-
ids.add(id);
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
return ids;
|
|
253
|
-
}
|
|
254
|
-
/** Aggregate chunk-level scores to file-level scores (max per file). */
|
|
255
|
-
function aggregateFileScores(chunks, ...scoreMaps) {
|
|
256
|
-
const fileScores = new Map();
|
|
257
|
-
for (const chunk of chunks) {
|
|
258
|
-
let maxScore = 0;
|
|
259
|
-
for (const map of scoreMaps) {
|
|
260
|
-
const s = map.get(chunk.id);
|
|
261
|
-
if (s !== undefined && s > maxScore)
|
|
262
|
-
maxScore = s;
|
|
263
|
-
}
|
|
264
|
-
if (maxScore > 0) {
|
|
265
|
-
const existing = fileScores.get(chunk.fileId);
|
|
266
|
-
if (existing === undefined || maxScore > existing) {
|
|
267
|
-
fileScores.set(chunk.fileId, maxScore);
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
return fileScores;
|
|
272
|
-
}
|
|
273
|
-
/**
|
|
274
|
-
* Compute a dynamic threshold for import graph seeds.
|
|
275
|
-
* Uses the median score — only the top half of files are strong enough seeds.
|
|
276
|
-
* Falls back to 0.1 minimum to avoid accepting near-zero scores.
|
|
277
|
-
*/
|
|
278
|
-
function computeSeedThreshold(fileScores) {
|
|
279
|
-
if (fileScores.size === 0)
|
|
280
|
-
return 0;
|
|
281
|
-
const sorted = [...fileScores.values()].sort((a, b) => b - a);
|
|
282
|
-
const median = sorted[Math.floor(sorted.length / 2)];
|
|
283
|
-
return Math.max(median, 0.1);
|
|
284
|
-
}
|
|
285
|
-
/** Build a map of fileId → chunk IDs for that file. */
|
|
286
|
-
function buildFileChunkMap(chunks) {
|
|
287
|
-
const map = new Map();
|
|
288
|
-
for (const chunk of chunks) {
|
|
289
|
-
const existing = map.get(chunk.fileId);
|
|
290
|
-
if (existing)
|
|
291
|
-
existing.push(chunk.id);
|
|
292
|
-
else
|
|
293
|
-
map.set(chunk.fileId, [chunk.id]);
|
|
294
|
-
}
|
|
295
|
-
return map;
|
|
296
|
-
}
|
|
297
|
-
/** Common English stopwords to exclude from FTS queries. */
|
|
298
|
-
const STOPWORDS = new Set([
|
|
299
|
-
"a",
|
|
300
|
-
"an",
|
|
301
|
-
"and",
|
|
302
|
-
"are",
|
|
303
|
-
"as",
|
|
304
|
-
"at",
|
|
305
|
-
"be",
|
|
306
|
-
"but",
|
|
307
|
-
"by",
|
|
308
|
-
"for",
|
|
309
|
-
"from",
|
|
310
|
-
"had",
|
|
311
|
-
"has",
|
|
312
|
-
"have",
|
|
313
|
-
"he",
|
|
314
|
-
"her",
|
|
315
|
-
"his",
|
|
316
|
-
"how",
|
|
317
|
-
"i",
|
|
318
|
-
"if",
|
|
319
|
-
"in",
|
|
320
|
-
"into",
|
|
321
|
-
"is",
|
|
322
|
-
"it",
|
|
323
|
-
"its",
|
|
324
|
-
"me",
|
|
325
|
-
"my",
|
|
326
|
-
"no",
|
|
327
|
-
"not",
|
|
328
|
-
"of",
|
|
329
|
-
"on",
|
|
330
|
-
"or",
|
|
331
|
-
"our",
|
|
332
|
-
"she",
|
|
333
|
-
"so",
|
|
334
|
-
"than",
|
|
335
|
-
"that",
|
|
336
|
-
"the",
|
|
337
|
-
"their",
|
|
338
|
-
"them",
|
|
339
|
-
"then",
|
|
340
|
-
"there",
|
|
341
|
-
"these",
|
|
342
|
-
"they",
|
|
343
|
-
"this",
|
|
344
|
-
"to",
|
|
345
|
-
"up",
|
|
346
|
-
"us",
|
|
347
|
-
"was",
|
|
348
|
-
"we",
|
|
349
|
-
"what",
|
|
350
|
-
"when",
|
|
351
|
-
"where",
|
|
352
|
-
"which",
|
|
353
|
-
"who",
|
|
354
|
-
"will",
|
|
355
|
-
"with",
|
|
356
|
-
"would",
|
|
357
|
-
"you",
|
|
358
|
-
"your",
|
|
359
|
-
]);
|
|
360
|
-
/**
|
|
361
|
-
* Sanitize a query string for FTS5 MATCH syntax.
|
|
362
|
-
* FTS5 chokes on certain characters — strip operators and wrap terms.
|
|
363
|
-
*
|
|
364
|
-
* Removes stopwords and uses OR between terms so multi-word queries return
|
|
365
|
-
* partial matches (FTS5's default implicit AND is too restrictive).
|
|
366
|
-
*/
|
|
367
|
-
function sanitizeFtsQuery(query) {
|
|
368
|
-
// Remove FTS5 operators and special chars
|
|
369
|
-
const cleaned = query
|
|
370
|
-
.replace(/[*"():^{}[\]~!@#$%&=+|<>]/g, " ")
|
|
371
|
-
.replace(/\bAND\b|\bOR\b|\bNOT\b|\bNEAR\b/gi, " ")
|
|
372
|
-
.trim();
|
|
373
|
-
// Split into tokens, remove stopwords, join with OR
|
|
374
|
-
const tokens = cleaned.split(/\s+/).filter((t) => t.length > 0 && !STOPWORDS.has(t.toLowerCase()));
|
|
375
|
-
if (tokens.length === 0)
|
|
376
|
-
return '""';
|
|
377
|
-
if (tokens.length === 1)
|
|
378
|
-
return tokens[0];
|
|
379
|
-
return tokens.join(" OR ");
|
|
380
|
-
}
|
|
381
|
-
//# sourceMappingURL=search.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"search.js","sourceRoot":"","sources":["../../../src/core/search/search.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACnE,OAAO,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AACjE,OAAO,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAEtD,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,MAAM,kBAAkB,GAAG,yBAAyB,CAAC;AACrD,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAChC,MAAM,sBAAsB,GAAG,IAAI,CAAC;AAepC,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E,MAAM,OAAO,YAAY;IACP,WAAW,CAAS;IAC7B,YAAY,GAAwB,IAAI,CAAC;IACzC,eAAe,GAA6B,IAAI,CAAC;IACjD,WAAW,GAAkB,OAAO,CAAC,OAAO,EAAE,CAAC;IAEvD,YAAY,WAAmB,EAAE;QAChC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IAAA,CAC/B;IAED,oEAAoE;IACpE,MAAM,CAAC,WAAW,GAAY;QAC7B,OAAO,YAAY,CAAC,WAAW,EAAE,CAAC;IAAA,CAClC;IAED;;;;;OAKG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,OAAuB,EAA2B;QAC7E,0DAA0D;QAC1D,IAAI,OAAoB,CAAC;QACzB,MAAM,IAAI,GAAG,IAAI,OAAO,CAAO,CAAC,CAAC,EAAE,EAAE,CAAC;YACrC,OAAO,GAAG,CAAC,CAAC;QAAA,CACZ,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC;QACjC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAExB,IAAI,CAAC;YACJ,MAAM,OAAO,CAAC;YAEd,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,oBAAoB,CAAC;YACrD,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,CAAC;YAEvC,uCAAuC;YACvC,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;YAC5C,MAAM,EAAE,GAAG,YAAY,CAAC,KAAK,EAAE,CAAC;YAEhC,yEAAyE;YACzE,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAClD,YAAY,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;YAEnC,MAAM,YAAY,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;YAC1C,MAAM,YAAY,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;YAEhD,gDAAgD;YAChD,IAAI,SAAS,GAAG,EAAE,CAAC,YAAY,EAAE,CAAC;YAClC,IAAI,OAAO,EAAE,UAAU,EAAE,CAAC;gBACzB,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC;gBAClC,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;YACpE,CAAC;YAED,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC5B,OAAO,EAAE,CAAC;YACX,CAAC;YAED,gDAAgD;YAChD,MAAM,SAAS,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;YAEvC,wBAAwB;YACxB,UAAU,EAAE,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAEhC,iBAAiB;YACjB,MAAM,UAAU,GAAG,iBAAiB,CAAC,EAAE,EAAE,gBAAgB,CAAC,KAAK,CAAC,EAAE,sBAAsB,CAAC,CAAC;YAC1F,UAAU,EAAE,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAEhC,uCAAuC;YACvC,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,EAAE,EAAE,KAAK,EAAE,sBAAsB,EAAE,UAAU,CAAC,CAAC;YACnG,UAAU,EAAE,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAEhC,gBAAgB;YAChB,MAAM,UAAU,GAAG,sBAAsB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;YAC5D,UAAU,EAAE,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAEhC,kBAAkB;YAClB,MAAM,OAAO,GAAG,EAAE,CAAC,aAAa,EAAE,CAAC;YACnC,MAAM,YAAY,GAAG,wBAAwB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;YAC9D,UAAU,EAAE,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAEhC,0EAA0E;YAC1E,8EAA4E;YAC5E,wEAAwE;YACxE,0BAA0B;YAC1B,MAAM,cAAc,GAAG,mBAAmB,CAAC,SAAS,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC;YAChF,MAAM,aAAa,GAAG,oBAAoB,CAAC,cAAc,CAAC,CAAC;YAC3D,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;YAChD,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,cAAc,EAAE,CAAC;gBAC9C,IAAI,KAAK,IAAI,aAAa;oBAAE,aAAa,CAAC,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;YAC9D,CAAC;YACD,MAAM,gBAAgB,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;YACtD,MAAM,YAAY,GAAG,wBAAwB,CAAC,EAAE,EAAE,aAAa,EAAE,gBAAgB,CAAC,CAAC;YACnF,UAAU,EAAE,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAEhC,iBAAiB;YACjB,MAAM,aAAa,GAAG,MAAM,uBAAuB,CAAC,IAAI,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;YACjF,UAAU,EAAE,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAEhC,8CAA8C;YAC9C,MAAM,YAAY,GAAG,mBAAmB,CACvC,UAAU,EACV,YAAY,EACZ,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,aAAa,CACb,CAAC;YACF,MAAM,UAAU,GAAG,IAAI,GAAG,EAAwB,CAAC;YAEnD,KAAK,MAAM,EAAE,IAAI,YAAY,EAAE,CAAC;gBAC/B,UAAU,CAAC,GAAG,CAAC,EAAE,EAAE;oBAClB,IAAI,EAAE,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC;oBAC7B,MAAM,EAAE,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC;oBACjC,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC;oBAClC,WAAW,EAAE,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC;oBACtC,WAAW,EAAE,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC;oBACtC,UAAU,EAAE,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC;iBACtC,CAAC,CAAC;YACJ,CAAC;YAED,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;gBAC3B,OAAO,EAAE,CAAC;YACX,CAAC;YAED,YAAY;YACZ,MAAM,MAAM,GAAG,QAAQ,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;YAE/C,mBAAmB;YACnB,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAuB,CAAC;YAChD,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,CAAC;gBAC/B,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;YAC/B,CAAC;YAED,MAAM,OAAO,GAAmB,EAAE,CAAC;YACnC,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,CAAC;gBAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;gBACzC,IAAI,KAAK,EAAE,CAAC;oBACX,OAAO,CAAC,IAAI,CAAC;wBACZ,KAAK;wBACL,MAAM,EAAE,SAAS,CAAC,MAAM;wBACxB,IAAI,EAAE,SAAS,CAAC,IAAI;qBACpB,CAAC,CAAC;gBACJ,CAAC;YACF,CAAC;YAED,OAAO,OAAO,CAAC;QAChB,CAAC;gBAAS,CAAC;YACV,OAAO,EAAE,CAAC;QACX,CAAC;IAAA,CACD;IAED,wDAAwD;IACxD,QAAQ,GAA6C;QACpD,IAAI,CAAC,IAAI,CAAC,YAAY;YAAE,OAAO,IAAI,CAAC;QACpC,OAAO,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,CAAC;IAAA,CACpC;IAED;;;;;;OAMG;IACH,UAAU,GAAS;QAClB,sDAAsD;QACtD,IAAI,CAAC,YAAY,EAAE,KAAK,EAAE,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QAEzB,qBAAqB;QACrB,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,CAAC,CAAC;QAC1E,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YACxB,UAAU,CAAC,MAAM,CAAC,CAAC;QACpB,CAAC;IAAA,CACD;IAED,yBAAyB;IACzB,KAAK,GAAS;QACb,IAAI,CAAC,YAAY,EAAE,KAAK,EAAE,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QACzB,qCAAqC;QACrC,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YAC1B,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAC,CAAC,CAAC,CAAC;YAC9D,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC7B,CAAC;IAAA,CACD;IAED,2EAA2E;IAC3E,UAAU;IACV,2EAA2E;IAEnE,eAAe,GAAiB;QACvC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;YACrC,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;YAC7C,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;QAC1B,CAAC;QACD,OAAO,IAAI,CAAC,YAAY,CAAC;IAAA,CACzB;IAEO,cAAc,GAAgB;QACrC,OAAO;YACN,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,EAAE,OAAO,CAAC;YACvD,eAAe,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,QAAQ,CAAC;YACxD,SAAS,EAAE,kBAAkB;SAC7B,CAAC;IAAA,CACF;IAEO,mBAAmB,GAAsB;QAChD,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YAC3B,IAAI,CAAC,eAAe,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC;gBACnC,IAAI,CAAC;oBACJ,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;oBACrC,MAAM,QAAQ,GAAG,IAAI,QAAQ,CAAC;wBAC7B,aAAa,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,CAAC;wBAC/D,SAAS,EAAE,MAAM,CAAC,SAAS;qBAC3B,CAAC,CAAC;oBACH,MAAM,QAAQ,CAAC,UAAU,EAAE,CAAC;oBAC5B,OAAO,QAAQ,CAAC;gBACjB,CAAC;gBAAC,OAAO,GAAG,EAAE,CAAC;oBACd,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,CAAC,6BAA6B;oBAC1D,MAAM,GAAG,CAAC;gBACX,CAAC;YAAA,CACD,CAAC,EAAE,CAAC;QACN,CAAC;QACD,OAAO,IAAI,CAAC,eAAe,CAAC;IAAA,CAC5B;IAEO,KAAK,CAAC,mBAAmB,CAChC,EAAkB,EAClB,KAAa,EACb,KAAa,EACb,WAAmC,EACJ;QAC/B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QACrC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAElD,kBAAkB;QAClB,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QAErD,4BAA4B;QAC5B,MAAM,aAAa,GAAG,EAAE,CAAC,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAE5D,IAAI,aAAa,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,GAAG,EAAE,CAAC;QAClB,CAAC;QAED,MAAM,IAAI,GAAG,WAAW,CAAC,WAAW,EAAE,aAAa,EAAE,KAAK,CAAC,CAAC;QAE5D,sDAAsD;QACtD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;QACzC,KAAK,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,IAAI,EAAE,CAAC;YAClC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;QACpC,CAAC;QACD,OAAO,MAAM,CAAC;IAAA,CACd;CACD;AAED,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E,wEAAwE;AACxE,SAAS,mBAAmB,CAAC,GAAG,SAAgC,EAAe;IAC9E,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC7B,KAAK,MAAM,EAAE,IAAI,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC;YAC7B,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACb,CAAC;IACF,CAAC;IACD,OAAO,GAAG,CAAC;AAAA,CACX;AAED,wEAAwE;AACxE,SAAS,mBAAmB,CAAC,MAAqB,EAAE,GAAG,SAAgC,EAAuB;IAC7G,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE7C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC5B,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC7B,MAAM,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAC5B,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,GAAG,QAAQ;gBAAE,QAAQ,GAAG,CAAC,CAAC;QACnD,CAAC;QACD,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAC9C,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;gBACnD,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;YACxC,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,UAAU,CAAC;AAAA,CAClB;AAED;;;;GAIG;AACH,SAAS,oBAAoB,CAAC,UAA+B,EAAU;IACtE,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACpC,MAAM,MAAM,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IACrD,OAAO,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAAA,CAC7B;AAED,yDAAuD;AACvD,SAAS,iBAAiB,CAAC,MAAqB,EAAyB;IACxE,MAAM,GAAG,GAAG,IAAI,GAAG,EAAoB,CAAC;IACxC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,QAAQ;YAAE,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;;YACjC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;IACxC,CAAC;IACD,OAAO,GAAG,CAAC;AAAA,CACX;AAED,4DAA4D;AAC5D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACzB,GAAG;IACH,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,KAAK;IACL,MAAM;IACN,KAAK;IACL,KAAK;IACL,MAAM;IACN,IAAI;IACJ,KAAK;IACL,KAAK;IACL,KAAK;IACL,GAAG;IACH,IAAI;IACJ,IAAI;IACJ,MAAM;IACN,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,MAAM;IACN,MAAM;IACN,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,KAAK;IACL,MAAM;IACN,MAAM;IACN,OAAO;IACP,KAAK;IACL,MAAM;CACN,CAAC,CAAC;AAEH;;;;;;GAMG;AACH,SAAS,gBAAgB,CAAC,KAAa,EAAU;IAChD,0CAA0C;IAC1C,MAAM,OAAO,GAAG,KAAK;SACnB,OAAO,CAAC,4BAA4B,EAAE,GAAG,CAAC;SAC1C,OAAO,CAAC,mCAAmC,EAAE,GAAG,CAAC;SACjD,IAAI,EAAE,CAAC;IAET,oDAAoD;IACpD,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IACnG,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACrC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC;IAC1C,OAAO,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAAA,CAC3B","sourcesContent":["/**\n * Main search API.\n *\n * Orchestrates: check/build index → compute all 6 metrics → classify query\n * → duplicate columns → POEM rank → assemble results.\n */\n\nimport { existsSync, unlinkSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport path from \"node:path\";\nimport type { SearchDatabase } from \"./db.js\";\nimport { Embedder } from \"./embedder.js\";\nimport { IndexManager } from \"./index-manager.js\";\nimport { computeBm25Scores } from \"./metrics/bm25.js\";\nimport { computeGitRecencyScores } from \"./metrics/git-recency.js\";\nimport { computeImportGraphScores } from \"./metrics/import-graph.js\";\nimport { computePathMatchScores } from \"./metrics/path-match.js\";\nimport { computeSymbolMatchScores } from \"./metrics/symbol-match.js\";\nimport { poemRank } from \"./poem.js\";\nimport { classifyQuery } from \"./query-classifier.js\";\nimport type { IndexConfig, IndexProgressCallback, MetricScores, SearchResult, StoredChunk } from \"./types.js\";\nimport { topKSimilar } from \"./vector-store.js\";\n\n// ============================================================================\n// Constants\n// ============================================================================\n\nconst DEFAULT_MODEL_NAME = \"Xenova/all-MiniLM-L6-v2\";\nconst DEFAULT_RESULT_LIMIT = 20;\nconst METRIC_CANDIDATE_LIMIT = 1000;\n\n// ============================================================================\n// Search Options\n// ============================================================================\n\nexport interface SearchOptions {\n\t/** Maximum number of results to return. Default: 20. */\n\tlimit?: number;\n\t/** Restrict search to files under this path (relative to project root). */\n\tpathFilter?: string;\n\t/** Progress callback for indexing operations. */\n\tonProgress?: IndexProgressCallback;\n}\n\n// ============================================================================\n// Search Engine\n// ============================================================================\n\nexport class SearchEngine {\n\tprivate readonly projectRoot: string;\n\tprivate indexManager: IndexManager | null = null;\n\tprivate embedderPromise: Promise<Embedder> | null = null;\n\tprivate searchQueue: Promise<void> = Promise.resolve();\n\n\tconstructor(projectRoot: string) {\n\t\tthis.projectRoot = projectRoot;\n\t}\n\n\t/** Check if semantic search is available (requires node:sqlite). */\n\tstatic isAvailable(): boolean {\n\t\treturn IndexManager.isAvailable();\n\t}\n\n\t/**\n\t * Search the codebase with a natural language or identifier query.\n\t *\n\t * On first call, builds the index (scans, chunks, embeds). Subsequent calls\n\t * incrementally update changed files before searching.\n\t */\n\tasync search(query: string, options?: SearchOptions): Promise<SearchResult[]> {\n\t\t// Chain through searchQueue so concurrent calls serialize\n\t\tlet resolve!: () => void;\n\t\tconst gate = new Promise<void>((r) => {\n\t\t\tresolve = r;\n\t\t});\n\t\tconst waitFor = this.searchQueue;\n\t\tthis.searchQueue = gate;\n\n\t\ttry {\n\t\t\tawait waitFor;\n\n\t\t\tconst limit = options?.limit ?? DEFAULT_RESULT_LIMIT;\n\t\t\tconst onProgress = options?.onProgress;\n\n\t\t\t// Ensure index is built and up to date\n\t\t\tconst indexManager = this.getIndexManager();\n\t\t\tconst db = indexManager.getDb();\n\n\t\t\t// Share our embedder with IndexManager so it doesn't create a second one\n\t\t\tconst embedder = await this.getOrCreateEmbedder();\n\t\t\tindexManager.setEmbedder(embedder);\n\n\t\t\tawait indexManager.buildIndex(onProgress);\n\t\t\tawait indexManager.ensureEmbeddings(onProgress);\n\n\t\t\t// Get all chunks (potentially filtered by path)\n\t\t\tlet allChunks = db.getAllChunks();\n\t\t\tif (options?.pathFilter) {\n\t\t\t\tconst filter = options.pathFilter;\n\t\t\t\tallChunks = allChunks.filter((c) => c.filePath.startsWith(filter));\n\t\t\t}\n\n\t\t\tif (allChunks.length === 0) {\n\t\t\t\treturn [];\n\t\t\t}\n\n\t\t\t// Classify query type for POEM column weighting\n\t\t\tconst queryType = classifyQuery(query);\n\n\t\t\t// Compute all 6 metrics\n\t\t\tonProgress?.(\"searching\", 0, 6);\n\n\t\t\t// 1. BM25 (FTS5)\n\t\t\tconst bm25Scores = computeBm25Scores(db, sanitizeFtsQuery(query), METRIC_CANDIDATE_LIMIT);\n\t\t\tonProgress?.(\"searching\", 1, 6);\n\n\t\t\t// 2. Cosine similarity (vector search)\n\t\t\tconst cosineScores = await this.computeVectorScores(db, query, METRIC_CANDIDATE_LIMIT, onProgress);\n\t\t\tonProgress?.(\"searching\", 2, 6);\n\n\t\t\t// 3. Path match\n\t\t\tconst pathScores = computePathMatchScores(query, allChunks);\n\t\t\tonProgress?.(\"searching\", 3, 6);\n\n\t\t\t// 4. Symbol match\n\t\t\tconst symbols = db.getAllSymbols();\n\t\t\tconst symbolScores = computeSymbolMatchScores(query, symbols);\n\t\t\tonProgress?.(\"searching\", 4, 6);\n\n\t\t\t// 5. Import graph (use BM25 + cosine as seed scores, aggregated per file)\n\t\t\t// Only use files with strong scores as seeds — low-scoring files (e.g. from\n\t\t\t// common OR terms matching everywhere) pollute the seed set and prevent\n\t\t\t// meaningful propagation.\n\t\t\tconst fileSeedScores = aggregateFileScores(allChunks, bm25Scores, cosineScores);\n\t\t\tconst seedThreshold = computeSeedThreshold(fileSeedScores);\n\t\t\tconst filteredSeeds = new Map<number, number>();\n\t\t\tfor (const [fileId, score] of fileSeedScores) {\n\t\t\t\tif (score >= seedThreshold) filteredSeeds.set(fileId, score);\n\t\t\t}\n\t\t\tconst fileIdToChunkIds = buildFileChunkMap(allChunks);\n\t\t\tconst importScores = computeImportGraphScores(db, filteredSeeds, fileIdToChunkIds);\n\t\t\tonProgress?.(\"searching\", 5, 6);\n\n\t\t\t// 6. Git recency\n\t\t\tconst recencyScores = await computeGitRecencyScores(this.projectRoot, allChunks);\n\t\t\tonProgress?.(\"searching\", 6, 6);\n\n\t\t\t// Build MetricScores for each candidate chunk\n\t\t\tconst candidateIds = collectCandidateIds(\n\t\t\t\tbm25Scores,\n\t\t\t\tcosineScores,\n\t\t\t\tpathScores,\n\t\t\t\tsymbolScores,\n\t\t\t\timportScores,\n\t\t\t\trecencyScores,\n\t\t\t);\n\t\t\tconst candidates = new Map<number, MetricScores>();\n\n\t\t\tfor (const id of candidateIds) {\n\t\t\t\tcandidates.set(id, {\n\t\t\t\t\tbm25: bm25Scores.get(id) ?? 0,\n\t\t\t\t\tcosine: cosineScores.get(id) ?? 0,\n\t\t\t\t\tpathMatch: pathScores.get(id) ?? 0,\n\t\t\t\t\tsymbolMatch: symbolScores.get(id) ?? 0,\n\t\t\t\t\timportGraph: importScores.get(id) ?? 0,\n\t\t\t\t\tgitRecency: recencyScores.get(id) ?? 0,\n\t\t\t\t});\n\t\t\t}\n\n\t\t\tif (candidates.size === 0) {\n\t\t\t\treturn [];\n\t\t\t}\n\n\t\t\t// POEM rank\n\t\t\tconst ranked = poemRank(candidates, queryType);\n\n\t\t\t// Assemble results\n\t\t\tconst chunkMap = new Map<number, StoredChunk>();\n\t\t\tfor (const chunk of allChunks) {\n\t\t\t\tchunkMap.set(chunk.id, chunk);\n\t\t\t}\n\n\t\t\tconst results: SearchResult[] = [];\n\t\t\tfor (const candidate of ranked.slice(0, limit)) {\n\t\t\t\tconst chunk = chunkMap.get(candidate.id);\n\t\t\t\tif (chunk) {\n\t\t\t\t\tresults.push({\n\t\t\t\t\t\tchunk,\n\t\t\t\t\t\tscores: candidate.scores,\n\t\t\t\t\t\trank: candidate.rank,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\n\t\t\treturn results;\n\t\t} finally {\n\t\t\tresolve();\n\t\t}\n\t}\n\n\t/** Get index stats without opening a new connection. */\n\tgetStats(): { files: number; chunks: number } | null {\n\t\tif (!this.indexManager) return null;\n\t\treturn this.indexManager.getStats();\n\t}\n\n\t/**\n\t * Reset the search index — delete the DB and close the IndexManager.\n\t *\n\t * Preserves the embedder (expensive ONNX model, unrelated to index state).\n\t * The next `search()` call will lazily re-create the IndexManager and build\n\t * a fresh index from scratch.\n\t */\n\tresetIndex(): void {\n\t\t// Close DB connection first (WAL mode may hold locks)\n\t\tthis.indexManager?.close();\n\t\tthis.indexManager = null;\n\n\t\t// Delete the DB file\n\t\tconst dbPath = path.join(this.projectRoot, \".dreb\", \"index\", \"search.db\");\n\t\tif (existsSync(dbPath)) {\n\t\t\tunlinkSync(dbPath);\n\t\t}\n\t}\n\n\t/** Dispose resources. */\n\tclose(): void {\n\t\tthis.indexManager?.close();\n\t\tthis.indexManager = null;\n\t\t// Dispose embedder if it was created\n\t\tif (this.embedderPromise) {\n\t\t\tthis.embedderPromise.then((e) => e.dispose()).catch(() => {});\n\t\t\tthis.embedderPromise = null;\n\t\t}\n\t}\n\n\t// ========================================================================\n\t// Private\n\t// ========================================================================\n\n\tprivate getIndexManager(): IndexManager {\n\t\tif (!this.indexManager) {\n\t\t\tconst config = this.getIndexConfig();\n\t\t\tthis.indexManager = new IndexManager(config);\n\t\t\tthis.indexManager.open();\n\t\t}\n\t\treturn this.indexManager;\n\t}\n\n\tprivate getIndexConfig(): IndexConfig {\n\t\treturn {\n\t\t\tprojectRoot: this.projectRoot,\n\t\t\tindexDir: path.join(this.projectRoot, \".dreb\", \"index\"),\n\t\t\tglobalMemoryDir: path.join(homedir(), \".dreb\", \"memory\"),\n\t\t\tmodelName: DEFAULT_MODEL_NAME,\n\t\t};\n\t}\n\n\tprivate getOrCreateEmbedder(): Promise<Embedder> {\n\t\tif (!this.embedderPromise) {\n\t\t\tthis.embedderPromise = (async () => {\n\t\t\t\ttry {\n\t\t\t\t\tconst config = this.getIndexConfig();\n\t\t\t\t\tconst embedder = new Embedder({\n\t\t\t\t\t\tmodelCacheDir: path.join(homedir(), \".dreb\", \"agent\", \"models\"),\n\t\t\t\t\t\tmodelName: config.modelName,\n\t\t\t\t\t});\n\t\t\t\t\tawait embedder.initialize();\n\t\t\t\t\treturn embedder;\n\t\t\t\t} catch (err) {\n\t\t\t\t\tthis.embedderPromise = null; // reset on failure for retry\n\t\t\t\t\tthrow err;\n\t\t\t\t}\n\t\t\t})();\n\t\t}\n\t\treturn this.embedderPromise;\n\t}\n\n\tprivate async computeVectorScores(\n\t\tdb: SearchDatabase,\n\t\tquery: string,\n\t\tlimit: number,\n\t\t_onProgress?: IndexProgressCallback,\n\t): Promise<Map<number, number>> {\n\t\tconst config = this.getIndexConfig();\n\t\tconst embedder = await this.getOrCreateEmbedder();\n\n\t\t// Embed the query\n\t\tconst queryVector = await embedder.embedQuery(query);\n\n\t\t// Get all stored embeddings\n\t\tconst storedVectors = db.getAllEmbeddings(config.modelName);\n\n\t\tif (storedVectors.size === 0) {\n\t\t\treturn new Map();\n\t\t}\n\n\t\tconst topK = topKSimilar(queryVector, storedVectors, limit);\n\n\t\t// Convert to Map, clamping negative similarities to 0\n\t\tconst scores = new Map<number, number>();\n\t\tfor (const { id, score } of topK) {\n\t\t\tscores.set(id, Math.max(0, score));\n\t\t}\n\t\treturn scores;\n\t}\n}\n\n// ============================================================================\n// Helpers\n// ============================================================================\n\n/** Collect all unique chunk IDs that appear in any metric's results. */\nfunction collectCandidateIds(...scoreMaps: Map<number, number>[]): Set<number> {\n\tconst ids = new Set<number>();\n\tfor (const map of scoreMaps) {\n\t\tfor (const id of map.keys()) {\n\t\t\tids.add(id);\n\t\t}\n\t}\n\treturn ids;\n}\n\n/** Aggregate chunk-level scores to file-level scores (max per file). */\nfunction aggregateFileScores(chunks: StoredChunk[], ...scoreMaps: Map<number, number>[]): Map<number, number> {\n\tconst fileScores = new Map<number, number>();\n\n\tfor (const chunk of chunks) {\n\t\tlet maxScore = 0;\n\t\tfor (const map of scoreMaps) {\n\t\t\tconst s = map.get(chunk.id);\n\t\t\tif (s !== undefined && s > maxScore) maxScore = s;\n\t\t}\n\t\tif (maxScore > 0) {\n\t\t\tconst existing = fileScores.get(chunk.fileId);\n\t\t\tif (existing === undefined || maxScore > existing) {\n\t\t\t\tfileScores.set(chunk.fileId, maxScore);\n\t\t\t}\n\t\t}\n\t}\n\n\treturn fileScores;\n}\n\n/**\n * Compute a dynamic threshold for import graph seeds.\n * Uses the median score — only the top half of files are strong enough seeds.\n * Falls back to 0.1 minimum to avoid accepting near-zero scores.\n */\nfunction computeSeedThreshold(fileScores: Map<number, number>): number {\n\tif (fileScores.size === 0) return 0;\n\tconst sorted = [...fileScores.values()].sort((a, b) => b - a);\n\tconst median = sorted[Math.floor(sorted.length / 2)];\n\treturn Math.max(median, 0.1);\n}\n\n/** Build a map of fileId → chunk IDs for that file. */\nfunction buildFileChunkMap(chunks: StoredChunk[]): Map<number, number[]> {\n\tconst map = new Map<number, number[]>();\n\tfor (const chunk of chunks) {\n\t\tconst existing = map.get(chunk.fileId);\n\t\tif (existing) existing.push(chunk.id);\n\t\telse map.set(chunk.fileId, [chunk.id]);\n\t}\n\treturn map;\n}\n\n/** Common English stopwords to exclude from FTS queries. */\nconst STOPWORDS = new Set([\n\t\"a\",\n\t\"an\",\n\t\"and\",\n\t\"are\",\n\t\"as\",\n\t\"at\",\n\t\"be\",\n\t\"but\",\n\t\"by\",\n\t\"for\",\n\t\"from\",\n\t\"had\",\n\t\"has\",\n\t\"have\",\n\t\"he\",\n\t\"her\",\n\t\"his\",\n\t\"how\",\n\t\"i\",\n\t\"if\",\n\t\"in\",\n\t\"into\",\n\t\"is\",\n\t\"it\",\n\t\"its\",\n\t\"me\",\n\t\"my\",\n\t\"no\",\n\t\"not\",\n\t\"of\",\n\t\"on\",\n\t\"or\",\n\t\"our\",\n\t\"she\",\n\t\"so\",\n\t\"than\",\n\t\"that\",\n\t\"the\",\n\t\"their\",\n\t\"them\",\n\t\"then\",\n\t\"there\",\n\t\"these\",\n\t\"they\",\n\t\"this\",\n\t\"to\",\n\t\"up\",\n\t\"us\",\n\t\"was\",\n\t\"we\",\n\t\"what\",\n\t\"when\",\n\t\"where\",\n\t\"which\",\n\t\"who\",\n\t\"will\",\n\t\"with\",\n\t\"would\",\n\t\"you\",\n\t\"your\",\n]);\n\n/**\n * Sanitize a query string for FTS5 MATCH syntax.\n * FTS5 chokes on certain characters — strip operators and wrap terms.\n *\n * Removes stopwords and uses OR between terms so multi-word queries return\n * partial matches (FTS5's default implicit AND is too restrictive).\n */\nfunction sanitizeFtsQuery(query: string): string {\n\t// Remove FTS5 operators and special chars\n\tconst cleaned = query\n\t\t.replace(/[*\"():^{}[\\]~!@#$%&=+|<>]/g, \" \")\n\t\t.replace(/\\bAND\\b|\\bOR\\b|\\bNOT\\b|\\bNEAR\\b/gi, \" \")\n\t\t.trim();\n\n\t// Split into tokens, remove stopwords, join with OR\n\tconst tokens = cleaned.split(/\\s+/).filter((t) => t.length > 0 && !STOPWORDS.has(t.toLowerCase()));\n\tif (tokens.length === 0) return '\"\"';\n\tif (tokens.length === 1) return tokens[0];\n\treturn tokens.join(\" OR \");\n}\n"]}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Text file chunker for the semantic search subsystem.
|
|
3
|
-
*
|
|
4
|
-
* Splits non-code files (markdown, YAML, JSON, TOML, plaintext) into
|
|
5
|
-
* semantically meaningful chunks using format-specific boundary detection.
|
|
6
|
-
*/
|
|
7
|
-
import type { Chunk, TextFileType } from "./types.js";
|
|
8
|
-
/**
|
|
9
|
-
* Chunk a non-code text file by format-specific boundaries.
|
|
10
|
-
*
|
|
11
|
-
* Returns at least one chunk for any non-empty input. Empty files produce
|
|
12
|
-
* a single chunk of kind 'file'.
|
|
13
|
-
*/
|
|
14
|
-
export declare function chunkTextFile(content: string, filePath: string, fileType: TextFileType): Chunk[];
|
|
15
|
-
//# sourceMappingURL=text-chunker.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"text-chunker.d.ts","sourceRoot":"","sources":["../../../src/core/search/text-chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAmBtD;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,GAAG,KAAK,EAAE,CAkChG","sourcesContent":["/**\n * Text file chunker for the semantic search subsystem.\n *\n * Splits non-code files (markdown, YAML, JSON, TOML, plaintext) into\n * semantically meaningful chunks using format-specific boundary detection.\n */\n\nimport type { Chunk, TextFileType } from \"./types.js\";\n\n// ============================================================================\n// Constants\n// ============================================================================\n\n/** Files smaller than this are returned as a single chunk. */\nconst MIN_SPLIT_SIZE = 500;\n\n/** Maximum characters per chunk — oversized sections are split at paragraph boundaries. */\nconst MAX_CHUNK_SIZE = 8000;\n\n/** Minimum characters for a plaintext paragraph chunk (small ones get merged). */\nconst MIN_PARAGRAPH_SIZE = 200;\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Chunk a non-code text file by format-specific boundaries.\n *\n * Returns at least one chunk for any non-empty input. Empty files produce\n * a single chunk of kind 'file'.\n */\nexport function chunkTextFile(content: string, filePath: string, fileType: TextFileType): Chunk[] {\n\t// Empty or trivially small files → single chunk\n\tif (content.length < MIN_SPLIT_SIZE) {\n\t\treturn [wholeFileChunk(content, filePath, fileType)];\n\t}\n\n\tlet chunks: Chunk[];\n\tswitch (fileType) {\n\t\tcase \"markdown\":\n\t\t\tchunks = chunkMarkdown(content, filePath);\n\t\t\tbreak;\n\t\tcase \"yaml\":\n\t\t\tchunks = chunkYaml(content, filePath);\n\t\t\tbreak;\n\t\tcase \"json\":\n\t\t\tchunks = chunkJson(content, filePath);\n\t\t\tbreak;\n\t\tcase \"toml\":\n\t\t\tchunks = chunkToml(content, filePath);\n\t\t\tbreak;\n\t\tcase \"plaintext\":\n\t\t\tchunks = chunkPlaintext(content, filePath);\n\t\t\tbreak;\n\t\tdefault:\n\t\t\tchunks = [];\n\t}\n\n\t// Fallback: if format-specific parsing produced nothing, return whole file\n\tif (chunks.length === 0) {\n\t\treturn [wholeFileChunk(content, filePath, fileType)];\n\t}\n\n\t// Enforce max chunk size — split oversized chunks at paragraph boundaries\n\treturn chunks.flatMap((chunk) => enforceMaxSize(chunk));\n}\n\n// ============================================================================\n// Markdown\n// ============================================================================\n\n/** Heading regex: lines starting with 1–6 # characters followed by a space. */\nconst HEADING_RE = /^(#{1,6})\\s+(.+)$/;\n\nfunction chunkMarkdown(content: string, filePath: string): Chunk[] {\n\tconst lines = content.split(\"\\n\");\n\n\t// Identify heading positions and levels\n\tconst headings: Array<{ line: number; level: number; text: string }> = [];\n\tfor (let i = 0; i < lines.length; i++) {\n\t\tconst match = HEADING_RE.exec(lines[i]);\n\t\tif (match) {\n\t\t\theadings.push({ line: i, level: match[1].length, text: match[2].trim() });\n\t\t}\n\t}\n\n\t// No headings → treat as plaintext\n\tif (headings.length === 0) {\n\t\treturn chunkPlaintext(content, filePath);\n\t}\n\n\tconst chunks: Chunk[] = [];\n\n\t// Content before the first heading (preamble)\n\tif (headings[0].line > 0) {\n\t\tconst preambleLines = lines.slice(0, headings[0].line);\n\t\tconst preambleContent = preambleLines.join(\"\\n\");\n\t\tif (preambleContent.trim().length > 0) {\n\t\t\tchunks.push({\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: 1,\n\t\t\t\tendLine: headings[0].line, // 1-indexed, line before the first heading\n\t\t\t\tkind: \"heading_section\",\n\t\t\t\tname: null,\n\t\t\t\tcontent: preambleContent,\n\t\t\t\tfileType: \"markdown\",\n\t\t\t});\n\t\t}\n\t}\n\n\t// Each heading owns all lines until the next heading of same or higher level\n\tfor (let i = 0; i < headings.length; i++) {\n\t\tconst start = headings[i].line;\n\t\tlet end: number;\n\n\t\t// Find the next heading at the same or higher (lower number) level\n\t\tlet nextSameOrHigher = -1;\n\t\tfor (let j = i + 1; j < headings.length; j++) {\n\t\t\tif (headings[j].level <= headings[i].level) {\n\t\t\t\tnextSameOrHigher = j;\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\n\t\tif (nextSameOrHigher !== -1) {\n\t\t\tend = headings[nextSameOrHigher].line - 1;\n\t\t} else {\n\t\t\tend = lines.length - 1;\n\t\t}\n\n\t\tconst sectionLines = lines.slice(start, end + 1);\n\t\tconst sectionContent = sectionLines.join(\"\\n\");\n\n\t\tchunks.push({\n\t\t\tfilePath,\n\t\t\tstartLine: start + 1, // 1-indexed\n\t\t\tendLine: end + 1, // 1-indexed, inclusive\n\t\t\tkind: \"heading_section\",\n\t\t\tname: headings[i].text,\n\t\t\tcontent: sectionContent,\n\t\t\tfileType: \"markdown\",\n\t\t});\n\t}\n\n\treturn chunks;\n}\n\n// ============================================================================\n// YAML\n// ============================================================================\n\n/**\n * Top-level YAML key: a line that starts with a non-space, non-comment\n * character and contains a colon. Excludes YAML directives (---/...).\n */\nconst YAML_TOP_KEY_RE = /^([a-zA-Z_][a-zA-Z0-9_.-]*)\\s*:/;\n\nfunction chunkYaml(content: string, filePath: string): Chunk[] {\n\tconst lines = content.split(\"\\n\");\n\n\t// Find top-level key positions\n\tconst keys: Array<{ line: number; name: string }> = [];\n\tfor (let i = 0; i < lines.length; i++) {\n\t\tconst line = lines[i];\n\t\t// Skip comments, blank lines, YAML directives\n\t\tif (line.startsWith(\"#\") || line.startsWith(\"---\") || line.startsWith(\"...\") || line.trim() === \"\") {\n\t\t\tcontinue;\n\t\t}\n\t\tconst match = YAML_TOP_KEY_RE.exec(line);\n\t\tif (match) {\n\t\t\tkeys.push({ line: i, name: match[1] });\n\t\t}\n\t}\n\n\tif (keys.length === 0) {\n\t\treturn [];\n\t}\n\n\tconst chunks: Chunk[] = [];\n\n\t// Preamble (comments, directives before first key) is included in the\n\t// first key's chunk via the `start = 0` logic below for `i === 0`.\n\n\tfor (let i = 0; i < keys.length; i++) {\n\t\t// Include any preceding comments/blank lines that belong to this key\n\t\t// by looking backwards from the key to find attached comments\n\t\tlet start = keys[i].line;\n\t\tif (i === 0) {\n\t\t\t// First key includes any preamble (comments, directives)\n\t\t\tstart = 0;\n\t\t} else {\n\t\t\t// Look back for comment lines directly above\n\t\t\tlet scan = keys[i].line - 1;\n\t\t\twhile (scan > keys[i - 1].line) {\n\t\t\t\tconst trimmed = lines[scan].trim();\n\t\t\t\tif (trimmed.startsWith(\"#\") || trimmed === \"\") {\n\t\t\t\t\tscan--;\n\t\t\t\t} else {\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\t}\n\t\t\tstart = scan + 1;\n\t\t}\n\n\t\tconst end = i < keys.length - 1 ? keys[i + 1].line - 1 : lines.length - 1;\n\n\t\t// Trim trailing blank lines to find the real end\n\t\tlet realEnd = end;\n\t\twhile (realEnd > start && lines[realEnd].trim() === \"\") {\n\t\t\trealEnd--;\n\t\t}\n\t\t// But keep at least the key line\n\t\tif (realEnd < keys[i].line) realEnd = keys[i].line;\n\n\t\t// Include trailing blank lines within the range for line counting,\n\t\t// but use them as separators\n\t\tconst sectionLines = lines.slice(start, end + 1);\n\t\tconst sectionContent = sectionLines.join(\"\\n\");\n\n\t\tchunks.push({\n\t\t\tfilePath,\n\t\t\tstartLine: start + 1,\n\t\t\tendLine: end + 1,\n\t\t\tkind: \"top_level_key\",\n\t\t\tname: keys[i].name,\n\t\t\tcontent: sectionContent,\n\t\t\tfileType: \"yaml\",\n\t\t});\n\t}\n\n\treturn chunks;\n}\n\n// ============================================================================\n// JSON\n// ============================================================================\n\nfunction chunkJson(content: string, filePath: string): Chunk[] {\n\tlet parsed: unknown;\n\ttry {\n\t\tparsed = JSON.parse(content);\n\t} catch {\n\t\t// Invalid JSON → return as whole file\n\t\treturn [];\n\t}\n\n\t// Only split top-level objects. Arrays and primitives → single chunk.\n\tif (typeof parsed !== \"object\" || parsed === null || Array.isArray(parsed)) {\n\t\treturn [];\n\t}\n\n\tconst topKeys = Object.keys(parsed);\n\tif (topKeys.length === 0) {\n\t\treturn [];\n\t}\n\n\t// For JSON we can't rely on simple line scanning because values can span\n\t// multiple lines with arbitrary nesting. Instead, re-serialize each\n\t// top-level key and locate its position in the original content.\n\tconst lines = content.split(\"\\n\");\n\tconst chunks: Chunk[] = [];\n\n\t// Strategy: scan the original text to find each top-level key's line range.\n\t// A top-level key in formatted JSON appears as ` \"key\":` at indent level 1.\n\t// In minified JSON with a single line, we fall back to serialized slicing.\n\tif (lines.length === 1) {\n\t\t// Minified JSON — produce one chunk per top-level key using re-serialized content\n\t\tconst obj = parsed as Record<string, unknown>;\n\t\tfor (const key of topKeys) {\n\t\t\tconst serialized = JSON.stringify({ [key]: obj[key] }, null, 2);\n\t\t\tchunks.push({\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: 1,\n\t\t\t\tendLine: 1,\n\t\t\t\tkind: \"top_level_key\",\n\t\t\t\tname: key,\n\t\t\t\tcontent: serialized,\n\t\t\t\tfileType: \"json\",\n\t\t\t});\n\t\t}\n\t\treturn chunks;\n\t}\n\n\t// Multi-line JSON: find each top-level key by scanning for `\"key\":` at\n\t// brace depth 1.\n\tconst keyPositions: Array<{ key: string; startLine: number }> = [];\n\tlet depth = 0;\n\tfor (let i = 0; i < lines.length; i++) {\n\t\tconst line = lines[i];\n\t\tfor (let c = 0; c < line.length; c++) {\n\t\t\tconst ch = line[c];\n\t\t\tif (ch === '\"') {\n\t\t\t\t// Skip string content\n\t\t\t\tc++;\n\t\t\t\twhile (c < line.length && line[c] !== '\"') {\n\t\t\t\t\tif (line[c] === \"\\\\\") c++; // skip escaped char\n\t\t\t\t\tc++;\n\t\t\t\t}\n\t\t\t\t// Check if this string at depth 1 is a key (followed by :)\n\t\t\t\tif (depth === 1) {\n\t\t\t\t\t// Extract the key name\n\t\t\t\t\tconst keyMatch = /^\\s*\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\\s*:/.exec(line);\n\t\t\t\t\tif (keyMatch) {\n\t\t\t\t\t\tconst foundKey = keyMatch[1].replace(/\\\\\"/g, '\"').replace(/\\\\\\\\/g, \"\\\\\");\n\t\t\t\t\t\tif (topKeys.includes(foundKey) && !keyPositions.some((kp) => kp.key === foundKey)) {\n\t\t\t\t\t\t\tkeyPositions.push({ key: foundKey, startLine: i });\n\t\t\t\t\t\t}\n\t\t\t\t\t\tbreak; // Move to next line — we found the key on this line\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t} else if (ch === \"{\" || ch === \"[\") {\n\t\t\t\tdepth++;\n\t\t\t} else if (ch === \"}\" || ch === \"]\") {\n\t\t\t\tdepth--;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Build chunks from key positions\n\tfor (let i = 0; i < keyPositions.length; i++) {\n\t\tconst start = keyPositions[i].startLine;\n\t\tconst end = i < keyPositions.length - 1 ? keyPositions[i + 1].startLine - 1 : lines.length - 1;\n\n\t\t// Trim the trailing closing brace of the root object from the last chunk\n\t\tlet realEnd = end;\n\t\tif (i === keyPositions.length - 1) {\n\t\t\t// Walk back from end to skip the root closing brace and trailing whitespace\n\t\t\twhile (realEnd > start && lines[realEnd].trim() === \"\") realEnd--;\n\t\t\tif (realEnd > start && lines[realEnd].trim() === \"}\") realEnd--;\n\t\t}\n\n\t\t// Trim trailing commas and blank lines from non-last chunks too\n\t\twhile (realEnd > start && lines[realEnd].trim() === \"\") realEnd--;\n\n\t\tconst sectionContent = lines.slice(start, realEnd + 1).join(\"\\n\");\n\n\t\tchunks.push({\n\t\t\tfilePath,\n\t\t\tstartLine: start + 1,\n\t\t\tendLine: realEnd + 1,\n\t\t\tkind: \"top_level_key\",\n\t\t\tname: keyPositions[i].key,\n\t\t\tcontent: sectionContent,\n\t\t\tfileType: \"json\",\n\t\t});\n\t}\n\n\treturn chunks;\n}\n\n// ============================================================================\n// TOML\n// ============================================================================\n\n/** TOML section header: [section] or [[array-of-tables]]. */\nconst TOML_SECTION_RE = /^\\[{1,2}([^\\]]+)\\]{1,2}\\s*$/;\n\n/** TOML top-level key-value pair (not indented, before any section). */\nconst TOML_KV_RE = /^([a-zA-Z_][a-zA-Z0-9_.-]*)\\s*=/;\n\nfunction chunkToml(content: string, filePath: string): Chunk[] {\n\tconst lines = content.split(\"\\n\");\n\n\t// Phase 1: Identify all top-level boundaries (sections and top-level KV pairs before sections)\n\tconst boundaries: Array<{\n\t\tline: number;\n\t\tkind: \"section\" | \"kv\";\n\t\tname: string;\n\t}> = [];\n\n\tlet firstSectionLine = lines.length;\n\n\t// Find section headers first\n\tfor (let i = 0; i < lines.length; i++) {\n\t\tconst match = TOML_SECTION_RE.exec(lines[i]);\n\t\tif (match) {\n\t\t\tif (i < firstSectionLine) firstSectionLine = i;\n\t\t\tboundaries.push({ line: i, kind: \"section\", name: match[1].trim() });\n\t\t}\n\t}\n\n\t// Find top-level key-value pairs (lines before the first section)\n\tconst kvGroups: Array<{ startLine: number; name: string }> = [];\n\tfor (let i = 0; i < firstSectionLine; i++) {\n\t\tconst line = lines[i];\n\t\tif (line.trim() === \"\" || line.trim().startsWith(\"#\")) continue;\n\t\tconst match = TOML_KV_RE.exec(line);\n\t\tif (match) {\n\t\t\tkvGroups.push({ startLine: i, name: match[1] });\n\t\t}\n\t}\n\n\t// Merge KV pairs into boundaries\n\tfor (const kv of kvGroups) {\n\t\tboundaries.push({ line: kv.startLine, kind: \"kv\", name: kv.name });\n\t}\n\n\t// Sort boundaries by line number\n\tboundaries.sort((a, b) => a.line - b.line);\n\n\tif (boundaries.length === 0) {\n\t\treturn [];\n\t}\n\n\tconst chunks: Chunk[] = [];\n\n\tfor (let i = 0; i < boundaries.length; i++) {\n\t\tconst boundary = boundaries[i];\n\t\tlet start = boundary.line;\n\t\tconst end = i < boundaries.length - 1 ? boundaries[i + 1].line - 1 : lines.length - 1;\n\n\t\tif (i === 0) {\n\t\t\t// First boundary — include any leading comments/preamble\n\t\t\tstart = 0;\n\t\t} else {\n\t\t\t// Look back for comment/blank lines attached to this boundary\n\t\t\tlet scan = boundary.line - 1;\n\t\t\tconst prevEnd = boundaries[i - 1].line;\n\t\t\twhile (scan > prevEnd) {\n\t\t\t\tconst trimmed = lines[scan].trim();\n\t\t\t\tif (trimmed.startsWith(\"#\") || trimmed === \"\") {\n\t\t\t\t\tscan--;\n\t\t\t\t} else {\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\t}\n\t\t\tstart = scan + 1;\n\t\t}\n\n\t\tconst sectionContent = lines.slice(start, end + 1).join(\"\\n\");\n\n\t\tchunks.push({\n\t\t\tfilePath,\n\t\t\tstartLine: start + 1,\n\t\t\tendLine: end + 1,\n\t\t\tkind: \"top_level_key\",\n\t\t\tname: boundary.name,\n\t\t\tcontent: sectionContent,\n\t\t\tfileType: \"toml\",\n\t\t});\n\t}\n\n\treturn chunks;\n}\n\n// ============================================================================\n// Plaintext\n// ============================================================================\n\nfunction chunkPlaintext(content: string, filePath: string): Chunk[] {\n\tconst lines = content.split(\"\\n\");\n\n\t// Split into paragraphs at double-newline boundaries\n\tconst paragraphs: Array<{ startLine: number; endLine: number; content: string }> = [];\n\tlet paraStart = -1;\n\tlet consecutiveBlanks = 0;\n\n\tfor (let i = 0; i < lines.length; i++) {\n\t\tconst isBlank = lines[i].trim() === \"\";\n\n\t\tif (isBlank) {\n\t\t\tconsecutiveBlanks++;\n\t\t\tif (consecutiveBlanks >= 2 && paraStart !== -1) {\n\t\t\t\t// End current paragraph at the last non-blank line\n\t\t\t\tlet paraEnd = i - consecutiveBlanks;\n\t\t\t\tif (paraEnd < paraStart) paraEnd = paraStart;\n\t\t\t\tparagraphs.push({\n\t\t\t\t\tstartLine: paraStart,\n\t\t\t\t\tendLine: paraEnd,\n\t\t\t\t\tcontent: lines.slice(paraStart, paraEnd + 1).join(\"\\n\"),\n\t\t\t\t});\n\t\t\t\tparaStart = -1;\n\t\t\t}\n\t\t} else {\n\t\t\tif (paraStart === -1) {\n\t\t\t\tparaStart = i;\n\t\t\t}\n\t\t\tconsecutiveBlanks = 0;\n\t\t}\n\t}\n\n\t// Don't forget the last paragraph\n\tif (paraStart !== -1) {\n\t\tlet paraEnd = lines.length - 1;\n\t\twhile (paraEnd > paraStart && lines[paraEnd].trim() === \"\") paraEnd--;\n\t\tparagraphs.push({\n\t\t\tstartLine: paraStart,\n\t\t\tendLine: paraEnd,\n\t\t\tcontent: lines.slice(paraStart, paraEnd + 1).join(\"\\n\"),\n\t\t});\n\t}\n\n\tif (paragraphs.length === 0) {\n\t\treturn [];\n\t}\n\n\t// Group small paragraphs together to meet the minimum size\n\tconst chunks: Chunk[] = [];\n\tlet groupStart = paragraphs[0].startLine;\n\tlet groupEnd = paragraphs[0].endLine;\n\tlet groupContent = paragraphs[0].content;\n\n\tfor (let i = 1; i < paragraphs.length; i++) {\n\t\tconst para = paragraphs[i];\n\n\t\tif (groupContent.length < MIN_PARAGRAPH_SIZE) {\n\t\t\t// Merge with current group\n\t\t\tgroupEnd = para.endLine;\n\t\t\tgroupContent += `\\n\\n${para.content}`;\n\t\t} else {\n\t\t\t// Emit current group, start new one\n\t\t\tchunks.push({\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: groupStart + 1,\n\t\t\t\tendLine: groupEnd + 1,\n\t\t\t\tkind: \"paragraph\",\n\t\t\t\tname: extractParagraphName(groupContent),\n\t\t\t\tcontent: groupContent,\n\t\t\t\tfileType: \"plaintext\",\n\t\t\t});\n\t\t\tgroupStart = para.startLine;\n\t\t\tgroupEnd = para.endLine;\n\t\t\tgroupContent = para.content;\n\t\t}\n\t}\n\n\t// Emit final group\n\t// If the final group is too small and there are existing chunks, merge with the last one\n\tif (groupContent.length < MIN_PARAGRAPH_SIZE && chunks.length > 0) {\n\t\tconst last = chunks[chunks.length - 1];\n\t\tlast.endLine = groupEnd + 1;\n\t\tlast.content += `\\n\\n${groupContent}`;\n\t} else {\n\t\tchunks.push({\n\t\t\tfilePath,\n\t\t\tstartLine: groupStart + 1,\n\t\t\tendLine: groupEnd + 1,\n\t\t\tkind: \"paragraph\",\n\t\t\tname: extractParagraphName(groupContent),\n\t\t\tcontent: groupContent,\n\t\t\tfileType: \"plaintext\",\n\t\t});\n\t}\n\n\treturn chunks;\n}\n\n/** Extract a short name from the first line of a paragraph, truncated. */\nfunction extractParagraphName(content: string): string | null {\n\tconst firstLine = content.split(\"\\n\")[0].trim();\n\tif (firstLine.length === 0) return null;\n\tif (firstLine.length <= 60) return firstLine;\n\treturn `${firstLine.slice(0, 57)}...`;\n}\n\n// ============================================================================\n// Chunk Size Enforcement\n// ============================================================================\n\n/**\n * If a chunk exceeds MAX_CHUNK_SIZE, split it at paragraph boundaries\n * (double newlines). If no paragraph boundaries exist, split at line\n * boundaries near the limit.\n */\nfunction enforceMaxSize(chunk: Chunk): Chunk[] {\n\tif (chunk.content.length <= MAX_CHUNK_SIZE) {\n\t\treturn [chunk];\n\t}\n\n\tconst lines = chunk.content.split(\"\\n\");\n\tconst subChunks: Chunk[] = [];\n\tlet currentLines: string[] = [];\n\tlet currentSize = 0;\n\tlet chunkStartLine = chunk.startLine;\n\tlet partIndex = 0;\n\n\tfor (let i = 0; i < lines.length; i++) {\n\t\tconst line = lines[i];\n\t\tconst lineSize = line.length + 1; // +1 for newline\n\n\t\t// Check if adding this line would exceed the limit\n\t\tif (currentSize + lineSize > MAX_CHUNK_SIZE && currentLines.length > 0) {\n\t\t\t// Try to find a paragraph boundary (blank line) to split at\n\t\t\tlet splitAt = currentLines.length;\n\t\t\tfor (let j = currentLines.length - 1; j > 0; j--) {\n\t\t\t\tif (currentLines[j].trim() === \"\") {\n\t\t\t\t\tsplitAt = j;\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// Emit the sub-chunk up to the split point\n\t\t\tconst emitLines = currentLines.slice(0, splitAt);\n\t\t\tconst emitContent = emitLines.join(\"\\n\");\n\t\t\tconst emitEndLine = chunkStartLine + splitAt - 1;\n\n\t\t\tsubChunks.push({\n\t\t\t\tfilePath: chunk.filePath,\n\t\t\t\tstartLine: chunkStartLine,\n\t\t\t\tendLine: emitEndLine,\n\t\t\t\tkind: chunk.kind,\n\t\t\t\tname: partIndex === 0 ? chunk.name : chunk.name ? `${chunk.name} (cont.)` : null,\n\t\t\t\tcontent: emitContent,\n\t\t\t\tfileType: chunk.fileType,\n\t\t\t});\n\t\t\tpartIndex++;\n\n\t\t\t// Keep remaining lines from the split\n\t\t\tconst remaining = currentLines.slice(splitAt);\n\t\t\tcurrentLines = [...remaining, line];\n\t\t\tchunkStartLine = emitEndLine + 1;\n\t\t\tcurrentSize = currentLines.join(\"\\n\").length;\n\t\t} else {\n\t\t\tcurrentLines.push(line);\n\t\t\tcurrentSize += lineSize;\n\t\t}\n\t}\n\n\t// Emit remaining lines\n\tif (currentLines.length > 0) {\n\t\tconst emitContent = currentLines.join(\"\\n\");\n\t\tsubChunks.push({\n\t\t\tfilePath: chunk.filePath,\n\t\t\tstartLine: chunkStartLine,\n\t\t\tendLine: chunk.endLine,\n\t\t\tkind: chunk.kind,\n\t\t\tname: partIndex === 0 ? chunk.name : chunk.name ? `${chunk.name} (cont.)` : null,\n\t\t\tcontent: emitContent,\n\t\t\tfileType: chunk.fileType,\n\t\t});\n\t}\n\n\treturn subChunks.length > 0 ? subChunks : [chunk];\n}\n\n// ============================================================================\n// Helpers\n// ============================================================================\n\nfunction wholeFileChunk(content: string, filePath: string, fileType: TextFileType): Chunk {\n\tconst lineCount = content.split(\"\\n\").length;\n\treturn {\n\t\tfilePath,\n\t\tstartLine: 1,\n\t\tendLine: lineCount,\n\t\tkind: \"file\",\n\t\tname: null,\n\t\tcontent,\n\t\tfileType,\n\t};\n}\n"]}
|