@oscharko-dev/keiko-workspace 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/binaryDetect.d.ts +6 -0
- package/dist/binaryDetect.d.ts.map +1 -0
- package/dist/binaryDetect.js +20 -0
- package/dist/contextPack.d.ts +24 -0
- package/dist/contextPack.d.ts.map +1 -0
- package/dist/contextPack.js +118 -0
- package/dist/detect.d.ts +5 -0
- package/dist/detect.d.ts.map +1 -0
- package/dist/detect.js +144 -0
- package/dist/discovery.d.ts +10 -0
- package/dist/discovery.d.ts.map +1 -0
- package/dist/discovery.js +199 -0
- package/dist/document-extraction.d.ts +44 -0
- package/dist/document-extraction.d.ts.map +1 -0
- package/dist/document-extraction.js +372 -0
- package/dist/errors.d.ts +3 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +4 -0
- package/dist/fs.d.ts +25 -0
- package/dist/fs.d.ts.map +1 -0
- package/dist/fs.js +69 -0
- package/dist/gitHistory.d.ts +3 -0
- package/dist/gitHistory.d.ts.map +1 -0
- package/dist/gitHistory.js +317 -0
- package/dist/ignore.d.ts +15 -0
- package/dist/ignore.d.ts.map +1 -0
- package/dist/ignore.js +248 -0
- package/dist/importGraph.d.ts +3 -0
- package/dist/importGraph.d.ts.map +1 -0
- package/dist/importGraph.js +131 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +25 -0
- package/dist/paths.d.ts +3 -0
- package/dist/paths.d.ts.map +1 -0
- package/dist/paths.js +38 -0
- package/dist/realpath.d.ts +9 -0
- package/dist/realpath.d.ts.map +1 -0
- package/dist/realpath.js +72 -0
- package/dist/repoSearch.d.ts +46 -0
- package/dist/repoSearch.d.ts.map +1 -0
- package/dist/repoSearch.js +350 -0
- package/dist/repoSearchEntries.d.ts +15 -0
- package/dist/repoSearchEntries.d.ts.map +1 -0
- package/dist/repoSearchEntries.js +106 -0
- package/dist/repoSearchLineSelection.d.ts +18 -0
- package/dist/repoSearchLineSelection.d.ts.map +1 -0
- package/dist/repoSearchLineSelection.js +43 -0
- package/dist/repoSearchMatchers.d.ts +8 -0
- package/dist/repoSearchMatchers.d.ts.map +1 -0
- package/dist/repoSearchMatchers.js +414 -0
- package/dist/repoSearchPolicy.d.ts +34 -0
- package/dist/repoSearchPolicy.d.ts.map +1 -0
- package/dist/repoSearchPolicy.js +342 -0
- package/dist/repoSearchRegexSafety.d.ts +2 -0
- package/dist/repoSearchRegexSafety.d.ts.map +1 -0
- package/dist/repoSearchRegexSafety.js +15 -0
- package/dist/repoSearchScan.d.ts +62 -0
- package/dist/repoSearchScan.d.ts.map +1 -0
- package/dist/repoSearchScan.js +292 -0
- package/dist/retrieval.d.ts +10 -0
- package/dist/retrieval.d.ts.map +1 -0
- package/dist/retrieval.js +74 -0
- package/dist/stableId.d.ts +4 -0
- package/dist/stableId.d.ts.map +1 -0
- package/dist/stableId.js +49 -0
- package/dist/structuralAdapters.d.ts +27 -0
- package/dist/structuralAdapters.d.ts.map +1 -0
- package/dist/structuralAdapters.js +87 -0
- package/dist/summary.d.ts +4 -0
- package/dist/summary.d.ts.map +1 -0
- package/dist/summary.js +54 -0
- package/dist/testSourcePairing.d.ts +3 -0
- package/dist/testSourcePairing.d.ts.map +1 -0
- package/dist/testSourcePairing.js +179 -0
- package/dist/types.d.ts +3 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +4 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +4 -0
- package/package.json +35 -0
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
// Pure match strategies and the query fingerprint used by the repo-search facade (Issue #179).
|
|
2
|
+
// Kept separate from repoSearch.ts to hold the file-length cap and to make every matcher
|
|
3
|
+
// independently testable.
|
|
4
|
+
import { createHash } from "node:crypto";
|
|
5
|
+
import { RepoSearchInvalidQueryError } from "./errors.js";
|
|
6
|
+
import { regexSafetyIssue } from "./repoSearchRegexSafety.js";
|
|
7
|
+
export function fingerprintFor(query) {
|
|
8
|
+
const canonical = JSON.stringify({
|
|
9
|
+
kind: query.kind,
|
|
10
|
+
text: query.text,
|
|
11
|
+
caseSensitive: query.caseSensitive,
|
|
12
|
+
maxResults: query.maxResults,
|
|
13
|
+
});
|
|
14
|
+
return createHash("sha256").update(canonical).digest("hex").slice(0, 16);
|
|
15
|
+
}
|
|
16
|
+
// Issue #177 retrieval correctness: a natural-language question carries function words ("the",
|
|
17
|
+
// "to", "are", "based", "on", ...) that appear on nearly every prose line. Scoring the raw
|
|
18
|
+
// whitespace tokens let those stop words match almost everything, so the global
|
|
19
|
+
// `maxMatchesReturned` budget was exhausted on the first alphabetically-scanned files and the
|
|
20
|
+
// rest of a multi-file scope was never read (a `docs/` connect would only ever surface its
|
|
21
|
+
// first file, never the file the question was actually about). We mirror the exploration
|
|
22
|
+
// planner's fixed English stop-word policy (planner/anchors.ts in keiko-workflows - duplicated
|
|
23
|
+
// here rather than imported because the architecture forbids keiko-workspace depending on the
|
|
24
|
+
// higher-level keiko-workflows package): strip surrounding punctuation, drop single-character and
|
|
25
|
+
// stop-word tokens, and keep `adr-0022`/`file.ts`-style hyphenated and dotted identifiers intact.
|
|
26
|
+
const NL_STOP_WORDS = new Set([
|
|
27
|
+
"the",
|
|
28
|
+
"and",
|
|
29
|
+
"for",
|
|
30
|
+
"with",
|
|
31
|
+
"from",
|
|
32
|
+
"this",
|
|
33
|
+
"that",
|
|
34
|
+
"what",
|
|
35
|
+
"where",
|
|
36
|
+
"when",
|
|
37
|
+
"which",
|
|
38
|
+
"have",
|
|
39
|
+
"has",
|
|
40
|
+
"had",
|
|
41
|
+
"are",
|
|
42
|
+
"was",
|
|
43
|
+
"were",
|
|
44
|
+
"is",
|
|
45
|
+
"be",
|
|
46
|
+
"been",
|
|
47
|
+
"being",
|
|
48
|
+
"do",
|
|
49
|
+
"does",
|
|
50
|
+
"did",
|
|
51
|
+
"doing",
|
|
52
|
+
"of",
|
|
53
|
+
"in",
|
|
54
|
+
"on",
|
|
55
|
+
"at",
|
|
56
|
+
"to",
|
|
57
|
+
"an",
|
|
58
|
+
"as",
|
|
59
|
+
"or",
|
|
60
|
+
"but",
|
|
61
|
+
"not",
|
|
62
|
+
"no",
|
|
63
|
+
"yes",
|
|
64
|
+
"if",
|
|
65
|
+
"by",
|
|
66
|
+
"it",
|
|
67
|
+
"its",
|
|
68
|
+
"you",
|
|
69
|
+
"your",
|
|
70
|
+
"we",
|
|
71
|
+
"our",
|
|
72
|
+
"they",
|
|
73
|
+
"their",
|
|
74
|
+
"them",
|
|
75
|
+
"he",
|
|
76
|
+
"she",
|
|
77
|
+
"his",
|
|
78
|
+
"her",
|
|
79
|
+
"my",
|
|
80
|
+
"me",
|
|
81
|
+
"i",
|
|
82
|
+
"us",
|
|
83
|
+
"how",
|
|
84
|
+
"why",
|
|
85
|
+
"who",
|
|
86
|
+
"whom",
|
|
87
|
+
"whose",
|
|
88
|
+
"than",
|
|
89
|
+
"then",
|
|
90
|
+
"there",
|
|
91
|
+
"can",
|
|
92
|
+
"could",
|
|
93
|
+
"would",
|
|
94
|
+
"should",
|
|
95
|
+
"may",
|
|
96
|
+
"might",
|
|
97
|
+
"must",
|
|
98
|
+
"will",
|
|
99
|
+
"so",
|
|
100
|
+
"such",
|
|
101
|
+
"any",
|
|
102
|
+
"all",
|
|
103
|
+
"some",
|
|
104
|
+
"every",
|
|
105
|
+
"each",
|
|
106
|
+
"about",
|
|
107
|
+
"into",
|
|
108
|
+
"only",
|
|
109
|
+
"based",
|
|
110
|
+
"answer",
|
|
111
|
+
"aber",
|
|
112
|
+
"alle",
|
|
113
|
+
"als",
|
|
114
|
+
"am",
|
|
115
|
+
"an",
|
|
116
|
+
"auch",
|
|
117
|
+
"auf",
|
|
118
|
+
"aus",
|
|
119
|
+
"bei",
|
|
120
|
+
"bin",
|
|
121
|
+
"bis",
|
|
122
|
+
"bitte",
|
|
123
|
+
"da",
|
|
124
|
+
"das",
|
|
125
|
+
"dass",
|
|
126
|
+
"dein",
|
|
127
|
+
"deine",
|
|
128
|
+
"dem",
|
|
129
|
+
"den",
|
|
130
|
+
"der",
|
|
131
|
+
"des",
|
|
132
|
+
"die",
|
|
133
|
+
"dir",
|
|
134
|
+
"du",
|
|
135
|
+
"durch",
|
|
136
|
+
"ein",
|
|
137
|
+
"eine",
|
|
138
|
+
"einem",
|
|
139
|
+
"einen",
|
|
140
|
+
"einer",
|
|
141
|
+
"es",
|
|
142
|
+
"für",
|
|
143
|
+
"habe",
|
|
144
|
+
"haben",
|
|
145
|
+
"hat",
|
|
146
|
+
"ich",
|
|
147
|
+
"im",
|
|
148
|
+
"ist",
|
|
149
|
+
"kann",
|
|
150
|
+
"kannst",
|
|
151
|
+
"kein",
|
|
152
|
+
"keine",
|
|
153
|
+
"mit",
|
|
154
|
+
"mir",
|
|
155
|
+
"nach",
|
|
156
|
+
"nicht",
|
|
157
|
+
"noch",
|
|
158
|
+
"oder",
|
|
159
|
+
"sagen",
|
|
160
|
+
"sind",
|
|
161
|
+
"und",
|
|
162
|
+
"uns",
|
|
163
|
+
"von",
|
|
164
|
+
"war",
|
|
165
|
+
"was",
|
|
166
|
+
"welche",
|
|
167
|
+
"welchen",
|
|
168
|
+
"welcher",
|
|
169
|
+
"welches",
|
|
170
|
+
"wenn",
|
|
171
|
+
"wer",
|
|
172
|
+
"wie",
|
|
173
|
+
"wir",
|
|
174
|
+
"wird",
|
|
175
|
+
"wo",
|
|
176
|
+
"zu",
|
|
177
|
+
"zum",
|
|
178
|
+
"zur",
|
|
179
|
+
]);
|
|
180
|
+
const DEFINITION_INTENT_TOKENS = new Set([
|
|
181
|
+
"define",
|
|
182
|
+
"defined",
|
|
183
|
+
"definition",
|
|
184
|
+
"declare",
|
|
185
|
+
"declared",
|
|
186
|
+
"declaration",
|
|
187
|
+
"implement",
|
|
188
|
+
"implements",
|
|
189
|
+
"implemented",
|
|
190
|
+
"implementation",
|
|
191
|
+
]);
|
|
192
|
+
const HTTP_METHOD_TOKENS = new Set([
|
|
193
|
+
"get",
|
|
194
|
+
"post",
|
|
195
|
+
"put",
|
|
196
|
+
"patch",
|
|
197
|
+
"delete",
|
|
198
|
+
"head",
|
|
199
|
+
"options",
|
|
200
|
+
]);
|
|
201
|
+
const TECHNICAL_PHRASES = [
|
|
202
|
+
{ pattern: /\btype[\s_-]?script\b/iu, term: "typescript" },
|
|
203
|
+
{ pattern: /\bjava[\s_-]?script\b/iu, term: "javascript" },
|
|
204
|
+
{ pattern: /\bnode(?:\.js)?\b/iu, term: "node" },
|
|
205
|
+
{ pattern: /\bnext(?:\.js)?\b/iu, term: "nextjs" },
|
|
206
|
+
{ pattern: /\bpackage\.json\b/iu, term: "package.json" },
|
|
207
|
+
{ pattern: /\bpackage[\s_-]?manager\b|\bpaket[\s_-]?manager\b/iu, term: "package-manager" },
|
|
208
|
+
{ pattern: /\btest[\s_-]?runner\b|\btestumgebung\b/iu, term: "test-runner" },
|
|
209
|
+
];
|
|
210
|
+
// Strip leading/trailing non-alphanumeric characters (Unicode-aware) while preserving internal
|
|
211
|
+
// punctuation such as the hyphen in "ADR-0022" or the dot in "file.ts". Anchored, single
|
|
212
|
+
// character-class quantifiers only - linear in input length (ReDoS-safe).
|
|
213
|
+
function normalizeNaturalLanguageToken(raw) {
|
|
214
|
+
return raw.replace(/^[^\p{L}\p{N}]+/u, "").replace(/[^\p{L}\p{N}]+$/u, "");
|
|
215
|
+
}
|
|
216
|
+
function naturalLanguageNormalizedTokens(rawTokens) {
|
|
217
|
+
return rawTokens.map(normalizeNaturalLanguageToken).filter((t) => t.length > 0);
|
|
218
|
+
}
|
|
219
|
+
// Extract the content tokens a relevance score should be computed over. Falls back to the
|
|
220
|
+
// normalized-but-unfiltered tokens when filtering removes everything (a degenerate single-char
|
|
221
|
+
// or stop-word-only query), so the matcher never silently scores nothing.
|
|
222
|
+
function naturalLanguageContentTokens(rawTokens, caseSensitive) {
|
|
223
|
+
const normalized = naturalLanguageNormalizedTokens(rawTokens).map((t) => caseSensitive ? t : t.toLowerCase());
|
|
224
|
+
const content = normalized.filter((t) => t.length >= 2 && !NL_STOP_WORDS.has(t.toLowerCase()));
|
|
225
|
+
return content.length > 0 ? content : normalized;
|
|
226
|
+
}
|
|
227
|
+
function technicalPhraseTerms(queryText, caseSensitive) {
|
|
228
|
+
const terms = new Set();
|
|
229
|
+
for (const entry of TECHNICAL_PHRASES) {
|
|
230
|
+
if (entry.pattern.test(queryText)) {
|
|
231
|
+
terms.add(caseSensitive ? entry.term : entry.term.toLowerCase());
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
return [...terms];
|
|
235
|
+
}
|
|
236
|
+
function uniqueStrings(values) {
|
|
237
|
+
return [...new Set(values)];
|
|
238
|
+
}
|
|
239
|
+
function isDefinitionIntentToken(token) {
|
|
240
|
+
return DEFINITION_INTENT_TOKENS.has(token.toLowerCase());
|
|
241
|
+
}
|
|
242
|
+
function isSymbolLikeToken(token) {
|
|
243
|
+
return /[A-Z_]/u.test(token) || token.includes("-");
|
|
244
|
+
}
|
|
245
|
+
function analyzeNaturalLanguageIntent(normalizedTokens, caseSensitive) {
|
|
246
|
+
const lowered = normalizedTokens.map((t) => t.toLowerCase());
|
|
247
|
+
const definitionIntent = lowered.some(isDefinitionIntentToken);
|
|
248
|
+
const symbolTokens = uniqueStrings(normalizedTokens
|
|
249
|
+
.filter((t) => isSymbolLikeToken(t) && !DEFINITION_INTENT_TOKENS.has(t.toLowerCase()))
|
|
250
|
+
.map((t) => (caseSensitive ? t : t.toLowerCase())));
|
|
251
|
+
const routeTokens = uniqueStrings(normalizedTokens
|
|
252
|
+
.filter((t) => t.includes("/"))
|
|
253
|
+
.map((t) => (caseSensitive ? t : t.toLowerCase())));
|
|
254
|
+
const httpMethods = uniqueStrings(lowered
|
|
255
|
+
.filter((t) => HTTP_METHOD_TOKENS.has(t))
|
|
256
|
+
.map((t) => (caseSensitive ? t : t.toLowerCase())));
|
|
257
|
+
return { definitionIntent, symbolTokens, routeTokens, httpMethods };
|
|
258
|
+
}
|
|
259
|
+
function escapeRegExp(text) {
|
|
260
|
+
return text.replace(/[.*+?^${}()|[\]\\]/gu, "\\$&");
|
|
261
|
+
}
|
|
262
|
+
function lineLooksLikeImport(line) {
|
|
263
|
+
return /^\s*import\b/u.test(line) || /^\s*export\s*\{/u.test(line);
|
|
264
|
+
}
|
|
265
|
+
function lineLooksLikeSymbolDefinition(line, symbolToken, caseSensitive) {
|
|
266
|
+
const escaped = escapeRegExp(symbolToken);
|
|
267
|
+
const flags = caseSensitive ? "u" : "iu";
|
|
268
|
+
const patterns = [
|
|
269
|
+
new RegExp(`\\b(?:export\\s+)?(?:async\\s+)?function\\s+${escaped}\\b`, flags),
|
|
270
|
+
new RegExp(`\\b(?:export\\s+)?(?:const|let|var)\\s+${escaped}\\b`, flags),
|
|
271
|
+
new RegExp(`\\b(?:export\\s+)?(?:class|interface|type|enum)\\s+${escaped}\\b`, flags),
|
|
272
|
+
new RegExp(`\\b${escaped}\\s*[:=]\\s*(?:async\\s*)?\\(`, flags),
|
|
273
|
+
];
|
|
274
|
+
return patterns.some((pattern) => pattern.test(line));
|
|
275
|
+
}
|
|
276
|
+
function lineLooksLikeRouteDeclaration(line, haystack, intent) {
|
|
277
|
+
const routeHit = intent.routeTokens.some((token) => haystack.includes(token));
|
|
278
|
+
const methodHit = intent.httpMethods.some((method) => haystack.includes(`"${method}"`));
|
|
279
|
+
if (!routeHit || !methodHit) {
|
|
280
|
+
return false;
|
|
281
|
+
}
|
|
282
|
+
return line.includes("method:") || line.includes("pattern:");
|
|
283
|
+
}
|
|
284
|
+
function adjustedDefinitionIntentScore(line, haystack, baseScore, intent, caseSensitive) {
|
|
285
|
+
if (!intent.definitionIntent) {
|
|
286
|
+
return baseScore;
|
|
287
|
+
}
|
|
288
|
+
let bonus = 0;
|
|
289
|
+
let penalty = 0;
|
|
290
|
+
for (const symbolToken of intent.symbolTokens) {
|
|
291
|
+
if (!haystack.includes(symbolToken)) {
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
if (lineLooksLikeSymbolDefinition(line, symbolToken, caseSensitive)) {
|
|
295
|
+
bonus = Math.max(bonus, 0.75);
|
|
296
|
+
}
|
|
297
|
+
else if (lineLooksLikeImport(line)) {
|
|
298
|
+
penalty = Math.max(penalty, 0.2);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
if (lineLooksLikeRouteDeclaration(line, haystack, intent)) {
|
|
302
|
+
bonus = Math.max(bonus, 0.65);
|
|
303
|
+
}
|
|
304
|
+
return Math.max(0, Math.min(1, baseScore + bonus - penalty));
|
|
305
|
+
}
|
|
306
|
+
function buildNaturalLanguageMatcher(query) {
|
|
307
|
+
const rawTokens = query.text.split(/\s+/).filter((t) => t.length > 0);
|
|
308
|
+
const normalizedTokens = naturalLanguageNormalizedTokens(rawTokens);
|
|
309
|
+
// GRD-033: dedupe content tokens (as symbol/route/method tokens already are) so a repeated
|
|
310
|
+
// query word does not double-count in `hits/total`, which over-rewarded prose-heavy scopes.
|
|
311
|
+
const tokens = uniqueStrings([
|
|
312
|
+
...naturalLanguageContentTokens(rawTokens, query.caseSensitive),
|
|
313
|
+
...technicalPhraseTerms(query.text, query.caseSensitive),
|
|
314
|
+
]);
|
|
315
|
+
const intent = analyzeNaturalLanguageIntent(normalizedTokens, query.caseSensitive);
|
|
316
|
+
const total = tokens.length;
|
|
317
|
+
return {
|
|
318
|
+
match: (line) => {
|
|
319
|
+
if (total === 0) {
|
|
320
|
+
return 0;
|
|
321
|
+
}
|
|
322
|
+
const haystack = query.caseSensitive ? line : line.toLowerCase();
|
|
323
|
+
let hits = 0;
|
|
324
|
+
for (const token of tokens) {
|
|
325
|
+
if (haystack.includes(token)) {
|
|
326
|
+
hits += 1;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
if (hits === 0) {
|
|
330
|
+
return 0;
|
|
331
|
+
}
|
|
332
|
+
return adjustedDefinitionIntentScore(line, haystack, hits / total, intent, query.caseSensitive);
|
|
333
|
+
},
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
function buildExactSymbolMatcher(query) {
|
|
337
|
+
if (/\s/.test(query.text)) {
|
|
338
|
+
throw new RepoSearchInvalidQueryError("exact-symbol query must not contain whitespace");
|
|
339
|
+
}
|
|
340
|
+
const needle = query.caseSensitive ? query.text : query.text.toLowerCase();
|
|
341
|
+
return {
|
|
342
|
+
match: (line) => {
|
|
343
|
+
const haystack = query.caseSensitive ? line : line.toLowerCase();
|
|
344
|
+
return haystack.includes(needle) ? 1 : 0;
|
|
345
|
+
},
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
function buildRegexMatcher(query) {
|
|
349
|
+
const issue = regexSafetyIssue(query.text);
|
|
350
|
+
if (issue !== undefined) {
|
|
351
|
+
throw new RepoSearchInvalidQueryError(issue);
|
|
352
|
+
}
|
|
353
|
+
let regex;
|
|
354
|
+
try {
|
|
355
|
+
regex = new RegExp(query.text, query.caseSensitive ? "g" : "gi");
|
|
356
|
+
}
|
|
357
|
+
catch {
|
|
358
|
+
throw new RepoSearchInvalidQueryError(`invalid regex: ${query.text}`);
|
|
359
|
+
}
|
|
360
|
+
const cap = 100;
|
|
361
|
+
return {
|
|
362
|
+
match: (line) => {
|
|
363
|
+
regex.lastIndex = 0;
|
|
364
|
+
let count = 0;
|
|
365
|
+
while (regex.exec(line) !== null && count < cap) {
|
|
366
|
+
count += 1;
|
|
367
|
+
if (regex.lastIndex === 0) {
|
|
368
|
+
break;
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
return count === 0 ? 0 : count / cap;
|
|
372
|
+
},
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
export function buildMatcher(query) {
|
|
376
|
+
if (query.kind === "natural-language") {
|
|
377
|
+
return buildNaturalLanguageMatcher(query);
|
|
378
|
+
}
|
|
379
|
+
if (query.kind === "exact-symbol") {
|
|
380
|
+
return buildExactSymbolMatcher(query);
|
|
381
|
+
}
|
|
382
|
+
if (query.kind === "regex") {
|
|
383
|
+
return buildRegexMatcher(query);
|
|
384
|
+
}
|
|
385
|
+
throw new RepoSearchInvalidQueryError(`unsupported query kind: ${query.kind}`);
|
|
386
|
+
}
|
|
387
|
+
// Anchored-glob compilation for findFiles. Supports `*`, `**`, `?`, and literal characters.
|
|
388
|
+
// Brace expansion and extglob patterns are intentionally not supported.
|
|
389
|
+
export function compileGlob(pattern, caseSensitive = true) {
|
|
390
|
+
let body = "";
|
|
391
|
+
let i = 0;
|
|
392
|
+
while (i < pattern.length) {
|
|
393
|
+
const ch = pattern.charAt(i);
|
|
394
|
+
if (ch === "*" && pattern.charAt(i + 1) === "*") {
|
|
395
|
+
body += ".*";
|
|
396
|
+
i += pattern.charAt(i + 2) === "/" ? 3 : 2;
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
if (ch === "*") {
|
|
400
|
+
body += "[^/]*";
|
|
401
|
+
}
|
|
402
|
+
else if (ch === "?") {
|
|
403
|
+
body += "[^/]";
|
|
404
|
+
}
|
|
405
|
+
else if (/[.+^${}()|[\]\\]/.test(ch)) {
|
|
406
|
+
body += `\\${ch}`;
|
|
407
|
+
}
|
|
408
|
+
else {
|
|
409
|
+
body += ch;
|
|
410
|
+
}
|
|
411
|
+
i += 1;
|
|
412
|
+
}
|
|
413
|
+
return new RegExp(`^${body}$`, caseSensitive ? "u" : "iu");
|
|
414
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { CandidateOmissionReason, RetrievalQuery } from "@oscharko-dev/keiko-contracts/connected-context";
|
|
2
|
+
import type { DiscoveredFile } from "./types.js";
|
|
3
|
+
export type SearchIntent = "project-metadata" | "repository-overview" | "targeted-code-search" | "diagnostic-search" | "clarification-needed" | "generic";
|
|
4
|
+
export type SearchPolicyMode = "workspace-root-default" | "explicit-scope";
|
|
5
|
+
export type CandidateBucket = "canonical-metadata" | "overview-doc" | "exact-path" | "symbol-source" | "source" | "test" | "docs" | "lockfile" | "low-value" | "other";
|
|
6
|
+
export interface SearchHints {
|
|
7
|
+
readonly retrievalIntent?: SearchIntent | undefined;
|
|
8
|
+
}
|
|
9
|
+
export interface SearchPolicy {
|
|
10
|
+
readonly mode: SearchPolicyMode;
|
|
11
|
+
readonly intent: SearchIntent;
|
|
12
|
+
readonly applyGitignore: boolean;
|
|
13
|
+
readonly omitLowValueWorkspaceFiles: boolean;
|
|
14
|
+
}
|
|
15
|
+
export interface SearchDiagnostics {
|
|
16
|
+
readonly policyMode: SearchPolicyMode;
|
|
17
|
+
readonly intent: SearchIntent;
|
|
18
|
+
readonly filesDiscovered: number;
|
|
19
|
+
readonly filesAfterPolicy: number;
|
|
20
|
+
readonly ignoredByDiscovery: number;
|
|
21
|
+
readonly deniedByDiscovery: number;
|
|
22
|
+
readonly candidateBuckets: Readonly<Record<CandidateBucket, number>>;
|
|
23
|
+
}
|
|
24
|
+
export interface CandidateOrderingResult {
|
|
25
|
+
readonly files: readonly DiscoveredFile[];
|
|
26
|
+
readonly diagnostics: SearchDiagnostics;
|
|
27
|
+
}
|
|
28
|
+
export declare function resolveSearchPolicy(hasExplicitRelativePaths: boolean, hints: SearchHints | undefined): SearchPolicy;
|
|
29
|
+
export declare function legacyDiscoveryPolicy(hasExplicitRelativePaths: boolean): SearchPolicy;
|
|
30
|
+
export declare function policyOmissionReason(scopePath: string, policy: SearchPolicy): CandidateOmissionReason | undefined;
|
|
31
|
+
export declare function extraIgnoreLinesForSearch(policy: SearchPolicy): readonly string[];
|
|
32
|
+
export declare function orderCandidatesForSearch(files: readonly DiscoveredFile[], query: RetrievalQuery, policy: SearchPolicy, ignoredByDiscovery: number, deniedByDiscovery: number): CandidateOrderingResult;
|
|
33
|
+
export declare function shouldScoreContent(query: RetrievalQuery, text: string, policy: SearchPolicy): boolean;
|
|
34
|
+
//# sourceMappingURL=repoSearchPolicy.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"repoSearchPolicy.d.ts","sourceRoot":"","sources":["../src/repoSearchPolicy.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,uBAAuB,EACvB,cAAc,EACf,MAAM,iDAAiD,CAAC;AACzD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,MAAM,YAAY,GACpB,kBAAkB,GAClB,qBAAqB,GACrB,sBAAsB,GACtB,mBAAmB,GACnB,sBAAsB,GACtB,SAAS,CAAC;AAEd,MAAM,MAAM,gBAAgB,GAAG,wBAAwB,GAAG,gBAAgB,CAAC;AAE3E,MAAM,MAAM,eAAe,GACvB,oBAAoB,GACpB,cAAc,GACd,YAAY,GACZ,eAAe,GACf,QAAQ,GACR,MAAM,GACN,MAAM,GACN,UAAU,GACV,WAAW,GACX,OAAO,CAAC;AAEZ,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,eAAe,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;CACrD;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,IAAI,EAAE,gBAAgB,CAAC;IAChC,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC;IAC9B,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC;IACjC,QAAQ,CAAC,0BAA0B,EAAE,OAAO,CAAC;CAC9C;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,UAAU,EAAE,gBAAgB,CAAC;IACtC,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC;IAC9B,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,gBAAgB,EAAE,QAAQ,CAAC,MAAM,CAAC,eAAe,EAAE,MAAM,CAAC,CAAC,CAAC;CACtE;AAED,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,KAAK,EAAE,SAAS,cAAc,EAAE,CAAC;IAC1C,QAAQ,CAAC,WAAW,EAAE,iBAAiB,CAAC;CACzC;AA4TD,wBAAgB,mBAAmB,CACjC,wBAAwB,EAAE,OAAO,EACjC,KAAK,EAAE,WAAW,GAAG,SAAS,GAC7B,YAAY,CASd;AAOD,wBAAgB,qBAAqB,CAAC,wBAAwB,EAAE,OAAO,GAAG,YAAY,CAOrF;AAED,wBAAgB,oBAAoB,CAClC,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,YAAY,GACnB,uBAAuB,GAAG,SAAS,CAQrC;AAED,wBAAgB,yBAAyB,CAAC,MAAM,EAAE,YAAY,GAAG,SAAS,MAAM,EAAE,CAEjF;AAED,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,SAAS,cAAc,EAAE,EAChC,KAAK,EAAE,cAAc,EACrB,MAAM,EAAE,YAAY,EACpB,kBAAkB,EAAE,MAAM,EAC1B,iBAAiB,EAAE,MAAM,GACxB,uBAAuB,CAczB;AAED,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,cAAc,EACrB,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,YAAY,GACnB,OAAO,CAUT"}
|