@oscharko-dev/keiko-workspace 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/binaryDetect.d.ts +6 -0
- package/dist/binaryDetect.d.ts.map +1 -0
- package/dist/binaryDetect.js +20 -0
- package/dist/contextPack.d.ts +24 -0
- package/dist/contextPack.d.ts.map +1 -0
- package/dist/contextPack.js +118 -0
- package/dist/detect.d.ts +5 -0
- package/dist/detect.d.ts.map +1 -0
- package/dist/detect.js +144 -0
- package/dist/discovery.d.ts +10 -0
- package/dist/discovery.d.ts.map +1 -0
- package/dist/discovery.js +199 -0
- package/dist/document-extraction.d.ts +44 -0
- package/dist/document-extraction.d.ts.map +1 -0
- package/dist/document-extraction.js +372 -0
- package/dist/errors.d.ts +3 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +4 -0
- package/dist/fs.d.ts +25 -0
- package/dist/fs.d.ts.map +1 -0
- package/dist/fs.js +69 -0
- package/dist/gitHistory.d.ts +3 -0
- package/dist/gitHistory.d.ts.map +1 -0
- package/dist/gitHistory.js +317 -0
- package/dist/ignore.d.ts +15 -0
- package/dist/ignore.d.ts.map +1 -0
- package/dist/ignore.js +248 -0
- package/dist/importGraph.d.ts +3 -0
- package/dist/importGraph.d.ts.map +1 -0
- package/dist/importGraph.js +131 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +25 -0
- package/dist/paths.d.ts +3 -0
- package/dist/paths.d.ts.map +1 -0
- package/dist/paths.js +38 -0
- package/dist/realpath.d.ts +9 -0
- package/dist/realpath.d.ts.map +1 -0
- package/dist/realpath.js +72 -0
- package/dist/repoSearch.d.ts +46 -0
- package/dist/repoSearch.d.ts.map +1 -0
- package/dist/repoSearch.js +350 -0
- package/dist/repoSearchEntries.d.ts +15 -0
- package/dist/repoSearchEntries.d.ts.map +1 -0
- package/dist/repoSearchEntries.js +106 -0
- package/dist/repoSearchLineSelection.d.ts +18 -0
- package/dist/repoSearchLineSelection.d.ts.map +1 -0
- package/dist/repoSearchLineSelection.js +43 -0
- package/dist/repoSearchMatchers.d.ts +8 -0
- package/dist/repoSearchMatchers.d.ts.map +1 -0
- package/dist/repoSearchMatchers.js +414 -0
- package/dist/repoSearchPolicy.d.ts +34 -0
- package/dist/repoSearchPolicy.d.ts.map +1 -0
- package/dist/repoSearchPolicy.js +342 -0
- package/dist/repoSearchRegexSafety.d.ts +2 -0
- package/dist/repoSearchRegexSafety.d.ts.map +1 -0
- package/dist/repoSearchRegexSafety.js +15 -0
- package/dist/repoSearchScan.d.ts +62 -0
- package/dist/repoSearchScan.d.ts.map +1 -0
- package/dist/repoSearchScan.js +292 -0
- package/dist/retrieval.d.ts +10 -0
- package/dist/retrieval.d.ts.map +1 -0
- package/dist/retrieval.js +74 -0
- package/dist/stableId.d.ts +4 -0
- package/dist/stableId.d.ts.map +1 -0
- package/dist/stableId.js +49 -0
- package/dist/structuralAdapters.d.ts +27 -0
- package/dist/structuralAdapters.d.ts.map +1 -0
- package/dist/structuralAdapters.js +87 -0
- package/dist/summary.d.ts +4 -0
- package/dist/summary.d.ts.map +1 -0
- package/dist/summary.js +54 -0
- package/dist/testSourcePairing.d.ts +3 -0
- package/dist/testSourcePairing.d.ts.map +1 -0
- package/dist/testSourcePairing.js +179 -0
- package/dist/types.d.ts +3 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +4 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +4 -0
- package/package.json +35 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
// Governed, deterministic, audit-friendly repository search facade (Epic #177, Issue #179).
|
|
2
|
+
// Composes the existing workspace primitives — discovery, deny policy, realpath gate,
|
|
3
|
+
// readWorkspaceFile, plus the new binaryDetect and stableId modules — into three public
|
|
4
|
+
// APIs that emit normalized EvidenceAtom output: searchText, findFiles, readExcerpt.
|
|
5
|
+
// Pure JS (no subprocess, no ripgrep — deferred). Every fs touch goes through the
|
|
6
|
+
// WorkspaceFs port. Stable IDs are reproducible across runs given the same inputs.
|
|
7
|
+
import { isValidScopePath, validateRetrievalQuery, } from "@oscharko-dev/keiko-contracts/connected-context";
|
|
8
|
+
import { readWorkspaceFile } from "./discovery.js";
|
|
9
|
+
import { RepoSearchInvalidQueryError, RepoSearchInvalidRangeError, RepoSearchUnsupportedFileError, } from "./errors.js";
|
|
10
|
+
import { nodeWorkspaceFs } from "./fs.js";
|
|
11
|
+
import { isDenied } from "./ignore.js";
|
|
12
|
+
import { resolveWithinWorkspace } from "./paths.js";
|
|
13
|
+
import { containedRealPathInfo } from "./realpath.js";
|
|
14
|
+
import { buildMatcher, compileGlob, fingerprintFor } from "./repoSearchMatchers.js";
|
|
15
|
+
import { buildAtom, buildCandidate, elapsed, gatherCandidates, hitLimit, isImageScopePath, isIoError, probeBinary, scanFile, } from "./repoSearchScan.js";
|
|
16
|
+
import { policyOmissionReason, resolveSearchPolicy, } from "./repoSearchPolicy.js";
|
|
17
|
+
export const DEFAULT_SEARCH_LIMITS = {
|
|
18
|
+
maxFilesScanned: 2_000,
|
|
19
|
+
maxMatchesReturned: 200,
|
|
20
|
+
maxBytesPerFileScanned: 524_288,
|
|
21
|
+
elapsedMsMax: 5_000,
|
|
22
|
+
};
|
|
23
|
+
// Upper bound (2 MiB) on how many bytes of a file readExcerpt will load to reach a requested line
|
|
24
|
+
// window. The returned excerpt content is still clamped to the caller's request.maxBytes; this cap
|
|
25
|
+
// only governs how deep into a file we can slice. Decoupling it from request.maxBytes lets excerpts
|
|
26
|
+
// be read from files far larger than a single excerpt budget (a 16 KiB doc was previously unreadable
|
|
27
|
+
// and crashed the grounded request — Epic #177). Kept in step with the planner's 2 MiB scan cap so
|
|
28
|
+
// any file the search can match can also be excerpted. Files larger than this raise
|
|
29
|
+
// FileTooLargeError, which callers handle as a graceful omission.
|
|
30
|
+
const MAX_EXCERPT_FILE_BYTES = 2_097_152;
|
|
31
|
+
// ─── Internal helpers ─────────────────────────────────────────────────────────
|
|
32
|
+
function clampToBytes(text, maxBytes) {
|
|
33
|
+
if (maxBytes <= 0) {
|
|
34
|
+
return { excerpt: "", truncated: true };
|
|
35
|
+
}
|
|
36
|
+
const encoded = new TextEncoder().encode(text);
|
|
37
|
+
if (encoded.length <= maxBytes) {
|
|
38
|
+
return { excerpt: text, truncated: false };
|
|
39
|
+
}
|
|
40
|
+
const buffer = encoded.subarray(0, maxBytes);
|
|
41
|
+
const excerpt = new TextDecoder("utf-8", { fatal: false }).decode(buffer).replace(/�+$/u, "");
|
|
42
|
+
return { excerpt, truncated: true };
|
|
43
|
+
}
|
|
44
|
+
function assertQuery(query) {
|
|
45
|
+
const result = validateRetrievalQuery(query);
|
|
46
|
+
if (!result.ok) {
|
|
47
|
+
throw new RepoSearchInvalidQueryError(`query invalid: ${result.reasons.join(", ")}`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
function assertWorkspaceRoot(workspace) {
|
|
51
|
+
if (workspace.root.length === 0) {
|
|
52
|
+
throw new RepoSearchInvalidQueryError("scope.workspace.root is empty");
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
function isAborted(signal) {
|
|
56
|
+
return signal?.aborted === true;
|
|
57
|
+
}
|
|
58
|
+
function abortedSearchResult(elapsedMs) {
|
|
59
|
+
return {
|
|
60
|
+
atoms: [],
|
|
61
|
+
candidates: [],
|
|
62
|
+
filesScanned: 0,
|
|
63
|
+
elapsedMs,
|
|
64
|
+
truncated: true,
|
|
65
|
+
diagnostics: undefined,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
69
|
+
// Yields to the event loop every SCAN_YIELD_INTERVAL files so a large cold NFS/SMB workspace
|
|
70
|
+
// cannot block the event loop for multiple seconds. discoverFiles() itself remains synchronous
|
|
71
|
+
// (sync walk is load-bearing for importGraph/testSourcePairing callers); the yield here covers
|
|
72
|
+
// the already-async per-file scan pass where the loop overhead is measurable.
|
|
73
|
+
const SCAN_YIELD_INTERVAL = 64;
|
|
74
|
+
function buildSearchTextRunner(scope, query, limits, deps) {
|
|
75
|
+
return {
|
|
76
|
+
scope,
|
|
77
|
+
limits: {
|
|
78
|
+
...limits,
|
|
79
|
+
maxMatchesReturned: Math.min(limits.maxMatchesReturned, query.maxResults),
|
|
80
|
+
},
|
|
81
|
+
fs: deps.fs,
|
|
82
|
+
nowMs: deps.nowMs,
|
|
83
|
+
startMs: deps.nowMs(),
|
|
84
|
+
signal: deps.signal,
|
|
85
|
+
matcher: buildMatcher(query),
|
|
86
|
+
fingerprint: fingerprintFor(query),
|
|
87
|
+
policy: resolveSearchPolicy(scope.relativePaths.length > 0, deps.searchHints),
|
|
88
|
+
query,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
async function runScanLoop(runner, candidateSet, state, atoms, candidates) {
|
|
92
|
+
let loopIndex = 0;
|
|
93
|
+
for (const file of candidateSet.files) {
|
|
94
|
+
if (hitLimit(runner, state)) {
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
loopIndex += 1;
|
|
98
|
+
if (loopIndex % SCAN_YIELD_INTERVAL === 0) {
|
|
99
|
+
await new Promise((resolve) => {
|
|
100
|
+
setImmediate(resolve);
|
|
101
|
+
});
|
|
102
|
+
if (hitLimit(runner, state)) {
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
await scanFile(runner, file, state, atoms, candidates);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
export async function searchText(scope, query, limits = DEFAULT_SEARCH_LIMITS, deps = {}) {
|
|
110
|
+
assertWorkspaceRoot(scope.workspace);
|
|
111
|
+
assertQuery(query);
|
|
112
|
+
if (query.kind === "file-pattern") {
|
|
113
|
+
throw new RepoSearchInvalidQueryError("searchText does not accept file-pattern queries");
|
|
114
|
+
}
|
|
115
|
+
const fs = deps.fs ?? nodeWorkspaceFs;
|
|
116
|
+
const nowMs = deps.nowMs ?? Date.now;
|
|
117
|
+
const runner = buildSearchTextRunner(scope, query, limits, {
|
|
118
|
+
fs,
|
|
119
|
+
nowMs,
|
|
120
|
+
...(deps.searchHints !== undefined ? { searchHints: deps.searchHints } : {}),
|
|
121
|
+
...(deps.signal !== undefined ? { signal: deps.signal } : {}),
|
|
122
|
+
});
|
|
123
|
+
if (isAborted(deps.signal)) {
|
|
124
|
+
return abortedSearchResult(elapsed(runner));
|
|
125
|
+
}
|
|
126
|
+
const candidateSet = gatherCandidates(scope, query, limits, fs, runner.policy);
|
|
127
|
+
if (isAborted(deps.signal)) {
|
|
128
|
+
return { ...abortedSearchResult(elapsed(runner)), diagnostics: candidateSet.diagnostics };
|
|
129
|
+
}
|
|
130
|
+
const atoms = [];
|
|
131
|
+
const candidates = [];
|
|
132
|
+
// Seed truncated from candidate gathering so a scope.relativePaths cap is preserved.
|
|
133
|
+
const state = {
|
|
134
|
+
filesScanned: 0,
|
|
135
|
+
matchesReturned: 0,
|
|
136
|
+
truncated: candidateSet.truncated,
|
|
137
|
+
};
|
|
138
|
+
await runScanLoop(runner, candidateSet, state, atoms, candidates);
|
|
139
|
+
return {
|
|
140
|
+
atoms,
|
|
141
|
+
candidates,
|
|
142
|
+
filesScanned: state.filesScanned,
|
|
143
|
+
elapsedMs: elapsed(runner),
|
|
144
|
+
truncated: state.truncated,
|
|
145
|
+
diagnostics: candidateSet.diagnostics,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
function emitFileListing(ctx, relativePath, atoms) {
|
|
149
|
+
atoms.push(buildAtom({
|
|
150
|
+
scopeId: ctx.scope.scopeId,
|
|
151
|
+
scopePath: relativePath,
|
|
152
|
+
lineRange: undefined,
|
|
153
|
+
provenanceKind: "file-listing",
|
|
154
|
+
tool: "repo.findFiles",
|
|
155
|
+
queryFingerprint: ctx.fingerprint,
|
|
156
|
+
score: 1,
|
|
157
|
+
emittedAtMs: ctx.nowMs(),
|
|
158
|
+
}));
|
|
159
|
+
}
|
|
160
|
+
function hitFileListingLimit(state, maxMatches, startMs, nowMs, limits, signal) {
|
|
161
|
+
return (isAborted(signal) ||
|
|
162
|
+
state.atoms.length >= maxMatches ||
|
|
163
|
+
nowMs() - startMs > limits.elapsedMsMax);
|
|
164
|
+
}
|
|
165
|
+
function collectFileListings(ctx, candidateSet, policy, inputs) {
|
|
166
|
+
const state = {
|
|
167
|
+
atoms: [],
|
|
168
|
+
candidates: [],
|
|
169
|
+
filesScanned: 0,
|
|
170
|
+
truncated: candidateSet.truncated,
|
|
171
|
+
};
|
|
172
|
+
for (const file of candidateSet.files) {
|
|
173
|
+
if (hitFileListingLimit(state, inputs.maxMatches, inputs.startMs, ctx.nowMs, inputs.limits, inputs.signal)) {
|
|
174
|
+
state.truncated = true;
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
if (isDenied(file.relativePath)) {
|
|
178
|
+
state.candidates.push(buildCandidate(file.relativePath, "ignored"));
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
181
|
+
const omitted = policyOmissionReason(file.relativePath, policy);
|
|
182
|
+
if (omitted !== undefined) {
|
|
183
|
+
state.candidates.push(buildCandidate(file.relativePath, omitted));
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
186
|
+
state.filesScanned += 1;
|
|
187
|
+
if (ctx.regex.test(file.relativePath)) {
|
|
188
|
+
emitFileListing(ctx, file.relativePath, state.atoms);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return state;
|
|
192
|
+
}
|
|
193
|
+
function findFilesSync(scope, query, limits, fs, nowMs, hints, signal) {
|
|
194
|
+
const startMs = nowMs();
|
|
195
|
+
if (isAborted(signal)) {
|
|
196
|
+
return abortedSearchResult(0);
|
|
197
|
+
}
|
|
198
|
+
// Honor the per-query cap alongside the global limit (Finding 2).
|
|
199
|
+
const effectiveMaxMatches = Math.min(limits.maxMatchesReturned, query.maxResults);
|
|
200
|
+
const ctx = {
|
|
201
|
+
scope,
|
|
202
|
+
regex: compileGlob(query.text, query.caseSensitive),
|
|
203
|
+
fingerprint: fingerprintFor(query),
|
|
204
|
+
nowMs,
|
|
205
|
+
};
|
|
206
|
+
const policy = resolveSearchPolicy(scope.relativePaths.length > 0, hints);
|
|
207
|
+
const candidateSet = gatherCandidates(scope, query, limits, fs, policy);
|
|
208
|
+
if (isAborted(signal)) {
|
|
209
|
+
return { ...abortedSearchResult(nowMs() - startMs), diagnostics: candidateSet.diagnostics };
|
|
210
|
+
}
|
|
211
|
+
const state = collectFileListings(ctx, candidateSet, policy, {
|
|
212
|
+
limits,
|
|
213
|
+
maxMatches: effectiveMaxMatches,
|
|
214
|
+
startMs,
|
|
215
|
+
...(signal !== undefined ? { signal } : {}),
|
|
216
|
+
});
|
|
217
|
+
return {
|
|
218
|
+
atoms: state.atoms,
|
|
219
|
+
candidates: state.candidates,
|
|
220
|
+
filesScanned: state.filesScanned,
|
|
221
|
+
elapsedMs: nowMs() - startMs,
|
|
222
|
+
truncated: state.truncated,
|
|
223
|
+
diagnostics: candidateSet.diagnostics,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
export async function findFiles(scope, query, limits = DEFAULT_SEARCH_LIMITS, deps = {}) {
|
|
227
|
+
assertWorkspaceRoot(scope.workspace);
|
|
228
|
+
assertQuery(query);
|
|
229
|
+
if (query.kind !== "file-pattern") {
|
|
230
|
+
throw new RepoSearchInvalidQueryError("findFiles requires a file-pattern query");
|
|
231
|
+
}
|
|
232
|
+
const fs = deps.fs ?? nodeWorkspaceFs;
|
|
233
|
+
const nowMs = deps.nowMs ?? Date.now;
|
|
234
|
+
return await Promise.resolve(findFilesSync(scope, query, limits, fs, nowMs, deps.searchHints, deps.signal));
|
|
235
|
+
}
|
|
236
|
+
function buildExcerptFingerprint(request) {
|
|
237
|
+
return fingerprintFor({
|
|
238
|
+
kind: "natural-language",
|
|
239
|
+
text: `${request.scopePath}:${request.startLine.toString()}-${request.endLine.toString()}`,
|
|
240
|
+
caseSensitive: false,
|
|
241
|
+
maxResults: 1,
|
|
242
|
+
emittedAtMs: 0,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
function isWithinSelectedScope(scope, scopePath) {
|
|
246
|
+
if (scope.relativePaths.length === 0) {
|
|
247
|
+
return true;
|
|
248
|
+
}
|
|
249
|
+
return scope.relativePaths.some((selectedPath) => scopePath === selectedPath || scopePath.startsWith(`${selectedPath}/`));
|
|
250
|
+
}
|
|
251
|
+
function normalizeScopePath(scopePath) {
|
|
252
|
+
return scopePath.split("\\").join("/");
|
|
253
|
+
}
|
|
254
|
+
function assertExcerptWithinSelectedScope(scope, scopePath) {
|
|
255
|
+
if (isWithinSelectedScope(scope, scopePath)) {
|
|
256
|
+
return;
|
|
257
|
+
}
|
|
258
|
+
throw new RepoSearchUnsupportedFileError(`cannot read excerpt outside selected scope: ${scopePath}`, "outside-scope");
|
|
259
|
+
}
|
|
260
|
+
function resolveExcerptTarget(scope, scopePath, fs) {
|
|
261
|
+
const abs = resolveWithinWorkspace(scope.workspace.root, scopePath);
|
|
262
|
+
const contained = containedRealPathInfo(fs, scope.workspace.root, abs);
|
|
263
|
+
const realScopePath = normalizeScopePath(contained.realRelative);
|
|
264
|
+
return { path: contained.path, realScopePath };
|
|
265
|
+
}
|
|
266
|
+
function assertExcerptReadableByPolicy(requestPath, realScopePath) {
|
|
267
|
+
// Deny gates must fire BEFORE any byte read (incl. the binary probe) so that a denied path such
|
|
268
|
+
// as .env is never read at all, including through an in-workspace symlink. .gitignore is not a
|
|
269
|
+
// context policy boundary; safe ignored/dot files remain readable when the user scopes them in.
|
|
270
|
+
if (isDenied(requestPath) || isDenied(realScopePath)) {
|
|
271
|
+
throw new RepoSearchUnsupportedFileError(`cannot read excerpt of denied path: ${requestPath}`, "denied");
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
function assertExcerptRange(request) {
|
|
275
|
+
if (!Number.isInteger(request.startLine) ||
|
|
276
|
+
!Number.isInteger(request.endLine) ||
|
|
277
|
+
request.startLine < 1 ||
|
|
278
|
+
request.endLine < request.startLine) {
|
|
279
|
+
throw new RepoSearchInvalidRangeError(`invalid line range: ${request.startLine.toString()}-${request.endLine.toString()}`);
|
|
280
|
+
}
|
|
281
|
+
if (!Number.isFinite(request.maxBytes) ||
|
|
282
|
+
!Number.isInteger(request.maxBytes) ||
|
|
283
|
+
request.maxBytes < 0) {
|
|
284
|
+
throw new RepoSearchInvalidRangeError(`invalid maxBytes: ${String(request.maxBytes)} (must be a finite non-negative integer)`);
|
|
285
|
+
}
|
|
286
|
+
if (!isValidScopePath(request.scopePath, { mustBeRelative: true })) {
|
|
287
|
+
throw new RepoSearchInvalidRangeError(`invalid scopePath: ${request.scopePath}`);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
// Probes for binary content and throws RepoSearchUnsupportedFileError on both binary detection
|
|
291
|
+
// and IO errors (EACCES, ENOENT, …) so the caller can treat both as a graceful skip.
|
|
292
|
+
async function assertExcerptNotBinary(fs, absolutePath, size, scopePath) {
|
|
293
|
+
let isBinary;
|
|
294
|
+
try {
|
|
295
|
+
isBinary = await probeBinary(fs, absolutePath, size);
|
|
296
|
+
}
|
|
297
|
+
catch (err) {
|
|
298
|
+
// TOCTOU: permissions or availability may change between stat and probe (EACCES, ENOENT, …).
|
|
299
|
+
// Re-classify as an unsupported-file skip so readKeptExcerpts degrades gracefully instead
|
|
300
|
+
// of crashing the whole grounded answer (the comment at grounded-orchestrator readKeptExcerpts
|
|
301
|
+
// explicitly promises this invariant).
|
|
302
|
+
if (isIoError(err)) {
|
|
303
|
+
throw new RepoSearchUnsupportedFileError(`cannot read excerpt of unreadable file: ${scopePath}`, "io-error");
|
|
304
|
+
}
|
|
305
|
+
throw err;
|
|
306
|
+
}
|
|
307
|
+
if (isBinary) {
|
|
308
|
+
throw new RepoSearchUnsupportedFileError(`cannot read excerpt of binary file: ${scopePath}`, "binary");
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
export async function readExcerpt(scope, request, deps = {}) {
|
|
312
|
+
if (isAborted(deps.signal)) {
|
|
313
|
+
throw new RepoSearchUnsupportedFileError("repo-search operation aborted", "aborted");
|
|
314
|
+
}
|
|
315
|
+
assertExcerptRange(request);
|
|
316
|
+
assertWorkspaceRoot(scope.workspace);
|
|
317
|
+
assertExcerptWithinSelectedScope(scope, request.scopePath);
|
|
318
|
+
if (isImageScopePath(request.scopePath)) {
|
|
319
|
+
throw new RepoSearchUnsupportedFileError(`cannot read excerpt of image file: ${request.scopePath}`, "binary");
|
|
320
|
+
}
|
|
321
|
+
const fs = deps.fs ?? nodeWorkspaceFs;
|
|
322
|
+
const nowMs = deps.nowMs ?? Date.now;
|
|
323
|
+
const target = resolveExcerptTarget(scope, request.scopePath, fs);
|
|
324
|
+
assertExcerptReadableByPolicy(request.scopePath, target.realScopePath);
|
|
325
|
+
assertExcerptWithinSelectedScope(scope, target.realScopePath);
|
|
326
|
+
const stat = fs.stat(target.path);
|
|
327
|
+
await assertExcerptNotBinary(fs, target.path, stat.size, request.scopePath);
|
|
328
|
+
if (isAborted(deps.signal)) {
|
|
329
|
+
throw new RepoSearchUnsupportedFileError("repo-search operation aborted", "aborted");
|
|
330
|
+
}
|
|
331
|
+
// Read enough of the file to reach the requested line window (bounded by MAX_EXCERPT_FILE_BYTES),
|
|
332
|
+
// then clamp the returned content to the caller's request.maxBytes budget. The read cap is
|
|
333
|
+
// intentionally larger than request.maxBytes so a window deep in a multi-kibibyte file is still
|
|
334
|
+
// reachable instead of the whole file being rejected.
|
|
335
|
+
const content = readWorkspaceFile(scope.workspace, request.scopePath, { maxBytes: MAX_EXCERPT_FILE_BYTES }, fs);
|
|
336
|
+
const allLines = content.text.split("\n");
|
|
337
|
+
const slice = allLines.slice(request.startLine - 1, request.endLine).join("\n");
|
|
338
|
+
const clamped = clampToBytes(slice, request.maxBytes);
|
|
339
|
+
const atom = buildAtom({
|
|
340
|
+
scopeId: scope.scopeId,
|
|
341
|
+
scopePath: request.scopePath,
|
|
342
|
+
lineRange: { startLine: request.startLine, endLine: request.endLine },
|
|
343
|
+
provenanceKind: "excerpt-read",
|
|
344
|
+
tool: "repo.readExcerpt",
|
|
345
|
+
queryFingerprint: buildExcerptFingerprint(request),
|
|
346
|
+
score: 1,
|
|
347
|
+
emittedAtMs: nowMs(),
|
|
348
|
+
});
|
|
349
|
+
return { atom, content: clamped.excerpt, truncated: clamped.truncated };
|
|
350
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { WorkspaceFs } from "./fs.js";
|
|
2
|
+
import type { DiscoveredFile, WorkspaceInfo } from "./types.js";
|
|
3
|
+
interface ScopeShape {
|
|
4
|
+
readonly workspace: WorkspaceInfo;
|
|
5
|
+
readonly relativePaths: readonly string[];
|
|
6
|
+
}
|
|
7
|
+
interface LimitsShape {
|
|
8
|
+
readonly maxFilesScanned: number;
|
|
9
|
+
}
|
|
10
|
+
export declare function collectFromEntries(scope: ScopeShape, limits: LimitsShape, fs: WorkspaceFs): {
|
|
11
|
+
files: readonly DiscoveredFile[];
|
|
12
|
+
truncated: boolean;
|
|
13
|
+
};
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=repoSearchEntries.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"repoSearchEntries.d.ts","sourceRoot":"","sources":["../src/repoSearchEntries.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAqB,WAAW,EAAE,MAAM,SAAS,CAAC;AAI9D,OAAO,KAAK,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAGhE,UAAU,UAAU;IAClB,QAAQ,CAAC,SAAS,EAAE,aAAa,CAAC;IAClC,QAAQ,CAAC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;CAC3C;AAED,UAAU,WAAW;IACnB,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;CAClC;AAiHD,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,UAAU,EACjB,MAAM,EAAE,WAAW,EACnB,EAAE,EAAE,WAAW,GACd;IAAE,KAAK,EAAE,SAAS,cAAc,EAAE,CAAC;IAAC,SAAS,EAAE,OAAO,CAAA;CAAE,CAgB1D"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { isDenied } from "./ignore.js";
|
|
2
|
+
import { resolveWithinWorkspace } from "./paths.js";
|
|
3
|
+
import { containedRealPathInfo } from "./realpath.js";
|
|
4
|
+
import { RepoSearchInvalidQueryError } from "./errors.js";
|
|
5
|
+
function normalizeScopePath(scopePath) {
|
|
6
|
+
return scopePath.split("\\").join("/");
|
|
7
|
+
}
|
|
8
|
+
function readDirSorted(fs, absoluteDir) {
|
|
9
|
+
try {
|
|
10
|
+
return [...fs.readDir(absoluteDir)].sort((a, b) => (a.name < b.name ? -1 : 1));
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
return [];
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
function pushAllowedFile(walk, relPath, absPath) {
|
|
17
|
+
if (walk.files.length > walk.limits.maxFilesScanned) {
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
const stat = walk.fs.stat(absPath);
|
|
21
|
+
if (!stat.isFile) {
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
walk.files.push({ relativePath: relPath, sizeBytes: stat.size });
|
|
25
|
+
if (walk.files.length > walk.limits.maxFilesScanned) {
|
|
26
|
+
walk.truncated = true;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
function allowedByFilters(relPath) {
|
|
30
|
+
return !isDenied(relPath);
|
|
31
|
+
}
|
|
32
|
+
function handleDirectoryEntry(walk, absoluteDir, dirRel, entry, depth) {
|
|
33
|
+
if (entry.isSymbolicLink) {
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
const root = walk.scope.workspace.root;
|
|
37
|
+
const childRel = dirRel.length === 0 ? entry.name : `${dirRel}/${entry.name}`;
|
|
38
|
+
if (!allowedByFilters(childRel)) {
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
const childAbs = resolveWithinWorkspace(root, childRel);
|
|
42
|
+
const contained = containedRealPathInfo(walk.fs, root, childAbs);
|
|
43
|
+
const realRel = normalizeScopePath(contained.realRelative);
|
|
44
|
+
if (!allowedByFilters(realRel)) {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
if (entry.isDirectory) {
|
|
48
|
+
walkEntryDirectory(walk, contained.path, realRel, depth + 1);
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
pushAllowedFile(walk, realRel, contained.path);
|
|
52
|
+
}
|
|
53
|
+
function walkEntryDirectory(walk, absoluteDir, dirRel, depth) {
|
|
54
|
+
if (depth > 12 || walk.truncated) {
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
for (const entry of readDirSorted(walk.fs, absoluteDir)) {
|
|
58
|
+
if (walk.files.length > walk.limits.maxFilesScanned) {
|
|
59
|
+
walk.truncated = true;
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
handleDirectoryEntry(walk, absoluteDir, dirRel, entry, depth);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
function handleScopeEntry(walk, entry) {
|
|
66
|
+
const root = walk.scope.workspace.root;
|
|
67
|
+
const abs = resolveWithinWorkspace(root, entry);
|
|
68
|
+
const contained = containedRealPathInfo(walk.fs, root, abs);
|
|
69
|
+
const entryRel = normalizeScopePath(entry);
|
|
70
|
+
const realRel = normalizeScopePath(contained.realRelative);
|
|
71
|
+
if (isDenied(entryRel) || isDenied(realRel)) {
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
let stat;
|
|
75
|
+
try {
|
|
76
|
+
stat = walk.fs.stat(contained.path);
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
throw new RepoSearchInvalidQueryError("Connected scope path is not accessible from the selected project.");
|
|
80
|
+
}
|
|
81
|
+
if (!allowedByFilters(entryRel) || !allowedByFilters(realRel)) {
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
if (stat.isDirectory) {
|
|
85
|
+
walkEntryDirectory(walk, contained.path, realRel, 1);
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
pushAllowedFile(walk, realRel, contained.path);
|
|
89
|
+
}
|
|
90
|
+
export function collectFromEntries(scope, limits, fs) {
|
|
91
|
+
const out = [];
|
|
92
|
+
const walk = {
|
|
93
|
+
scope,
|
|
94
|
+
limits,
|
|
95
|
+
fs,
|
|
96
|
+
files: out,
|
|
97
|
+
truncated: false,
|
|
98
|
+
};
|
|
99
|
+
for (const entry of scope.relativePaths) {
|
|
100
|
+
if (walk.truncated) {
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
handleScopeEntry(walk, entry);
|
|
104
|
+
}
|
|
105
|
+
return { files: out.slice(0, limits.maxFilesScanned), truncated: walk.truncated };
|
|
106
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { LineMatcher } from "./repoSearchMatchers.js";
|
|
2
|
+
export interface LineSelectionRunner {
|
|
3
|
+
readonly limits: {
|
|
4
|
+
readonly elapsedMsMax: number;
|
|
5
|
+
};
|
|
6
|
+
readonly matcher: LineMatcher;
|
|
7
|
+
readonly nowMs: () => number;
|
|
8
|
+
readonly startMs: number;
|
|
9
|
+
}
|
|
10
|
+
export interface LineSelectionState {
|
|
11
|
+
truncated: boolean;
|
|
12
|
+
}
|
|
13
|
+
export interface ScoredLine {
|
|
14
|
+
readonly line: number;
|
|
15
|
+
readonly score: number;
|
|
16
|
+
}
|
|
17
|
+
export declare function collectBestLines(runner: LineSelectionRunner, text: string, state: LineSelectionState): readonly ScoredLine[];
|
|
18
|
+
//# sourceMappingURL=repoSearchLineSelection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"repoSearchLineSelection.d.ts","sourceRoot":"","sources":["../src/repoSearchLineSelection.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAQ3D,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,MAAM,EAAE;QAAE,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IACnD,QAAQ,CAAC,OAAO,EAAE,WAAW,CAAC;IAC9B,QAAQ,CAAC,KAAK,EAAE,MAAM,MAAM,CAAC;IAC7B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AA6BD,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,mBAAmB,EAC3B,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,kBAAkB,GACxB,SAAS,UAAU,EAAE,CAmBvB"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
// Per-file cap on emitted lexical matches (Epic #177 retrieval fix). A connected-scope question
|
|
2
|
+
// carries several content tokens, so a prose-heavy file can match many low-signal lines. Keeping
|
|
3
|
+
// only each file's best lines makes the evidence diverse across the scope.
|
|
4
|
+
const MAX_MATCHES_PER_FILE = 3;
|
|
5
|
+
const LINE_TIMEOUT_CHECK_INTERVAL = 256;
|
|
6
|
+
function elapsed(runner) {
|
|
7
|
+
return runner.nowMs() - runner.startMs;
|
|
8
|
+
}
|
|
9
|
+
function timedOut(runner, state, lineIndex) {
|
|
10
|
+
if (lineIndex % LINE_TIMEOUT_CHECK_INTERVAL !== 0 ||
|
|
11
|
+
elapsed(runner) <= runner.limits.elapsedMsMax) {
|
|
12
|
+
return false;
|
|
13
|
+
}
|
|
14
|
+
state.truncated = true;
|
|
15
|
+
return true;
|
|
16
|
+
}
|
|
17
|
+
function insertBestLine(best, candidate) {
|
|
18
|
+
best.push(candidate);
|
|
19
|
+
best.sort((a, b) => (b.score !== a.score ? b.score - a.score : a.line - b.line));
|
|
20
|
+
if (best.length > MAX_MATCHES_PER_FILE) {
|
|
21
|
+
best.pop();
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
export function collectBestLines(runner, text, state) {
|
|
25
|
+
const best = [];
|
|
26
|
+
let lineStart = 0;
|
|
27
|
+
let lineNumber = 1;
|
|
28
|
+
for (let i = 0; i <= text.length; i += 1) {
|
|
29
|
+
if (i < text.length && text.charCodeAt(i) !== 10 /* \n */) {
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
if (timedOut(runner, state, lineNumber - 1)) {
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
const score = runner.matcher.match(text.slice(lineStart, i));
|
|
36
|
+
if (score > 0) {
|
|
37
|
+
insertBestLine(best, { line: lineNumber, score });
|
|
38
|
+
}
|
|
39
|
+
lineStart = i + 1;
|
|
40
|
+
lineNumber += 1;
|
|
41
|
+
}
|
|
42
|
+
return best.sort((a, b) => a.line - b.line);
|
|
43
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { RetrievalQuery } from "@oscharko-dev/keiko-contracts/connected-context";
|
|
2
|
+
export interface LineMatcher {
|
|
3
|
+
readonly match: (line: string) => number;
|
|
4
|
+
}
|
|
5
|
+
export declare function fingerprintFor(query: RetrievalQuery): string;
|
|
6
|
+
export declare function buildMatcher(query: RetrievalQuery): LineMatcher;
|
|
7
|
+
export declare function compileGlob(pattern: string, caseSensitive?: boolean): RegExp;
|
|
8
|
+
//# sourceMappingURL=repoSearchMatchers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"repoSearchMatchers.d.ts","sourceRoot":"","sources":["../src/repoSearchMatchers.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iDAAiD,CAAC;AAItF,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;CAC1C;AAED,wBAAgB,cAAc,CAAC,KAAK,EAAE,cAAc,GAAG,MAAM,CAQ5D;AAoaD,wBAAgB,YAAY,CAAC,KAAK,EAAE,cAAc,GAAG,WAAW,CAW/D;AAID,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,aAAa,UAAO,GAAG,MAAM,CAsBzE"}
|