@cue-dev/retrieval-core 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +27 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/chunking.d.ts +64 -0
- package/dist/chunking.js +983 -0
- package/dist/index.d.ts +673 -0
- package/dist/index.js +6605 -0
- package/dist/indexing-ignore.d.ts +9 -0
- package/dist/indexing-ignore.js +151 -0
- package/dist/remote-sync.d.ts +193 -0
- package/dist/remote-sync.js +816 -0
- package/package.json +37 -0
- package/scripts/poc-node-parser-host.cjs +105 -0
- package/scripts/poc-parser-availability-benchmark.ts +338 -0
- package/src/chunking.ts +1187 -0
- package/src/index.ts +8338 -0
- package/src/indexing-ignore.ts +179 -0
- package/src/remote-sync.ts +1119 -0
- package/test/benchmark.thresholds.test.ts +815 -0
- package/test/chunking.config.test.ts +84 -0
- package/test/chunking.language-aware.test.ts +1248 -0
- package/test/chunking.parser-availability.poc.test.ts +86 -0
- package/test/claude-agent-provider.test.ts +209 -0
- package/test/embedding-context-prefix.test.ts +101 -0
- package/test/embedding-provider.test.ts +570 -0
- package/test/enhance-confidence.test.ts +752 -0
- package/test/index-prep.concurrency.regression.test.ts +142 -0
- package/test/integration.test.ts +508 -0
- package/test/local-sqlite.integration.test.ts +258 -0
- package/test/mcp-search-quality.regression.test.ts +1358 -0
- package/test/remote-sync.integration.test.ts +350 -0
- package/test/smart-cutoff.config.test.ts +86 -0
- package/test/snippet-integrity.config.test.ts +59 -0
- package/tsconfig.build.json +17 -0
- package/tsconfig.json +4 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
import { mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
5
|
+
import {
|
|
6
|
+
RemoteSyncHttpResponseError,
|
|
7
|
+
buildRemoteSyncDeltaFromState,
|
|
8
|
+
collectProjectFileStats,
|
|
9
|
+
runRemoteAdaptiveSync,
|
|
10
|
+
runRemoteDeltaSync,
|
|
11
|
+
type RemoteSyncStateFile
|
|
12
|
+
} from "../src/index.js";
|
|
13
|
+
|
|
14
|
+
const tempDirs: string[] = [];
|
|
15
|
+
|
|
16
|
+
async function createProject(): Promise<string> {
|
|
17
|
+
const dir = await mkdtemp(join(tmpdir(), "cue-remote-sync-test-"));
|
|
18
|
+
tempDirs.push(dir);
|
|
19
|
+
await mkdir(join(dir, "src"), { recursive: true });
|
|
20
|
+
return dir;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
afterEach(async () => {
|
|
24
|
+
while (tempDirs.length > 0) {
|
|
25
|
+
const dir = tempDirs.pop();
|
|
26
|
+
if (!dir) {
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
await rm(dir, { recursive: true, force: true });
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
describe("runRemoteDeltaSync", () => {
|
|
34
|
+
it("excludes .tmp directories from remote sync deltas", async () => {
|
|
35
|
+
const root = await createProject();
|
|
36
|
+
await mkdir(join(root, ".tmp", "cache"), { recursive: true });
|
|
37
|
+
await writeFile(join(root, "src", "keep.ts"), "export const KEEP = true;\n");
|
|
38
|
+
await writeFile(join(root, ".tmp", "cache", "noise.ts"), "export const NOISE = true;\n");
|
|
39
|
+
|
|
40
|
+
const result = await buildRemoteSyncDeltaFromState({
|
|
41
|
+
project_root_path: root,
|
|
42
|
+
force_full_upsert: true
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
expect(result.delta.upsert_files.map((file) => file.path)).toEqual(["src/keep.ts"]);
|
|
46
|
+
expect(Object.keys(result.next_files)).toEqual(["src/keep.ts"]);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("excludes common dependency/cache/vcs directories and binary/media file suffixes", async () => {
|
|
50
|
+
const root = await createProject();
|
|
51
|
+
await mkdir(join(root, "vendor", "pkg"), { recursive: true });
|
|
52
|
+
await mkdir(join(root, ".venv", "bin"), { recursive: true });
|
|
53
|
+
await mkdir(join(root, "venv", "bin"), { recursive: true });
|
|
54
|
+
await mkdir(join(root, "__pycache__"), { recursive: true });
|
|
55
|
+
await mkdir(join(root, ".pytest_cache"), { recursive: true });
|
|
56
|
+
await mkdir(join(root, ".svn"), { recursive: true });
|
|
57
|
+
await mkdir(join(root, ".hg"), { recursive: true });
|
|
58
|
+
await writeFile(join(root, "src", "keep.ts"), "export const KEEP = true;\n");
|
|
59
|
+
await writeFile(join(root, "vendor", "pkg", "noise.ts"), "export const NOISE = true;\n");
|
|
60
|
+
await writeFile(join(root, ".venv", "bin", "activate"), "source venv/bin/activate\n");
|
|
61
|
+
await writeFile(join(root, "venv", "bin", "activate"), "source venv/bin/activate\n");
|
|
62
|
+
await writeFile(join(root, "__pycache__", "module.pyc"), "noise\n");
|
|
63
|
+
await writeFile(join(root, ".pytest_cache", "state"), "noise\n");
|
|
64
|
+
await writeFile(join(root, ".svn", "entries"), "noise\n");
|
|
65
|
+
await writeFile(join(root, ".hg", "store"), "noise\n");
|
|
66
|
+
await writeFile(join(root, "native.dll"), "noise\n");
|
|
67
|
+
await writeFile(join(root, "native.so"), "noise\n");
|
|
68
|
+
await writeFile(join(root, "binary.exe"), "noise\n");
|
|
69
|
+
await writeFile(join(root, "cache.pyc"), "noise\n");
|
|
70
|
+
await writeFile(join(root, "image.png"), "noise\n");
|
|
71
|
+
await writeFile(join(root, "image.jpg"), "noise\n");
|
|
72
|
+
await writeFile(join(root, "image.jpeg"), "noise\n");
|
|
73
|
+
await writeFile(join(root, "image.gif"), "noise\n");
|
|
74
|
+
await writeFile(join(root, "image.webp"), "noise\n");
|
|
75
|
+
await writeFile(join(root, "image.svg"), "noise\n");
|
|
76
|
+
await writeFile(join(root, "video.mp4"), "noise\n");
|
|
77
|
+
await writeFile(join(root, "video.mov"), "noise\n");
|
|
78
|
+
await writeFile(join(root, "video.avi"), "noise\n");
|
|
79
|
+
await writeFile(join(root, "video.mkv"), "noise\n");
|
|
80
|
+
await writeFile(join(root, "blob.bin"), "noise\n");
|
|
81
|
+
await writeFile(join(root, "native.dylib"), "noise\n");
|
|
82
|
+
await writeFile(join(root, "spec.pdf"), "noise\n");
|
|
83
|
+
|
|
84
|
+
const result = await buildRemoteSyncDeltaFromState({
|
|
85
|
+
project_root_path: root,
|
|
86
|
+
force_full_upsert: true
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
expect(result.delta.upsert_files.map((file) => file.path)).toEqual(["src/keep.ts"]);
|
|
90
|
+
expect(Object.keys(result.next_files)).toEqual(["src/keep.ts"]);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it("applies .contextignore and .cueignore patterns to remote sync deltas", async () => {
|
|
94
|
+
const root = await createProject();
|
|
95
|
+
await mkdir(join(root, "src", "generated"), { recursive: true });
|
|
96
|
+
await mkdir(join(root, "generated"), { recursive: true });
|
|
97
|
+
await mkdir(join(root, "build"), { recursive: true });
|
|
98
|
+
|
|
99
|
+
await writeFile(
|
|
100
|
+
join(root, ".contextignore"),
|
|
101
|
+
"# comments and blank lines are ignored\n\n**/generated/**\n"
|
|
102
|
+
);
|
|
103
|
+
await writeFile(join(root, ".cueignore"), "build/\n");
|
|
104
|
+
await writeFile(join(root, "src", "keep.ts"), "export const KEEP = true;\n");
|
|
105
|
+
await writeFile(join(root, "src", "generated", "types.ts"), "export const GENERATED = true;\n");
|
|
106
|
+
await writeFile(join(root, "generated", "root.ts"), "export const ROOT_GENERATED = true;\n");
|
|
107
|
+
await writeFile(join(root, "build", "bundle.ts"), "export const BUNDLE = true;\n");
|
|
108
|
+
|
|
109
|
+
const result = await buildRemoteSyncDeltaFromState({
|
|
110
|
+
project_root_path: root,
|
|
111
|
+
force_full_upsert: true
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
expect(result.delta.upsert_files.map((file) => file.path)).toEqual(["src/keep.ts"]);
|
|
115
|
+
expect(Object.keys(result.next_files)).toEqual(["src/keep.ts"]);
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it("maps .mjs/.cjs to javascript and .mts/.cts to typescript language metadata during scans", async () => {
|
|
119
|
+
const root = await createProject();
|
|
120
|
+
await writeFile(join(root, "src", "runtime.mjs"), "export const runtime = true;\n");
|
|
121
|
+
await writeFile(join(root, "src", "loader.cjs"), "module.exports = { loader: true };\n");
|
|
122
|
+
await writeFile(join(root, "src", "types.mts"), "export const typed = true as const;\n");
|
|
123
|
+
await writeFile(join(root, "src", "legacy.cts"), "export = { typed: true };\n");
|
|
124
|
+
|
|
125
|
+
const stats = await collectProjectFileStats(root);
|
|
126
|
+
expect(stats.get("src/runtime.mjs")?.language).toBe("javascript");
|
|
127
|
+
expect(stats.get("src/loader.cjs")?.language).toBe("javascript");
|
|
128
|
+
expect(stats.get("src/types.mts")?.language).toBe("typescript");
|
|
129
|
+
expect(stats.get("src/legacy.cts")?.language).toBe("typescript");
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it("applies add/modify/delete changes incrementally", async () => {
|
|
133
|
+
const root = await createProject();
|
|
134
|
+
await writeFile(join(root, "src", "a.ts"), "export const A = 1;\n");
|
|
135
|
+
await writeFile(join(root, "src", "b.ts"), "export const B = 1;\n");
|
|
136
|
+
|
|
137
|
+
const calls: Array<{ upserts: string[]; deleted: string[]; base?: string }> = [];
|
|
138
|
+
const first = await runRemoteDeltaSync({
|
|
139
|
+
project_root_path: root,
|
|
140
|
+
workspace_id: "ws-1",
|
|
141
|
+
max_body_bytes: 1024 * 1024,
|
|
142
|
+
push_delta: async (request) => {
|
|
143
|
+
calls.push({
|
|
144
|
+
upserts: request.upsert_files.map((file) => file.path).sort(),
|
|
145
|
+
deleted: [...request.deleted_paths].sort(),
|
|
146
|
+
base: request.base_index_version
|
|
147
|
+
});
|
|
148
|
+
return {
|
|
149
|
+
workspace_id: "ws-1",
|
|
150
|
+
index_version: "idx-v1"
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
expect(first.changed).toBe(true);
|
|
156
|
+
expect(first.applied_delta).toEqual({ upsert_files: 2, deleted_paths: 0 });
|
|
157
|
+
expect(calls).toHaveLength(1);
|
|
158
|
+
expect(calls[0]?.upserts).toEqual(["src/a.ts", "src/b.ts"]);
|
|
159
|
+
expect(calls[0]?.deleted).toEqual([]);
|
|
160
|
+
|
|
161
|
+
await new Promise((resolveSleep) => setTimeout(resolveSleep, 5));
|
|
162
|
+
await rm(join(root, "src", "b.ts"));
|
|
163
|
+
await writeFile(join(root, "src", "a.ts"), "export const A = 2;\n");
|
|
164
|
+
await writeFile(join(root, "src", "c.ts"), "export const C = 1;\n");
|
|
165
|
+
|
|
166
|
+
const secondCalls: Array<{ upserts: string[]; deleted: string[]; base?: string }> = [];
|
|
167
|
+
const second = await runRemoteDeltaSync({
|
|
168
|
+
project_root_path: root,
|
|
169
|
+
workspace_id: "ws-1",
|
|
170
|
+
previous_state: first.state,
|
|
171
|
+
max_body_bytes: 1024 * 1024,
|
|
172
|
+
push_delta: async (request) => {
|
|
173
|
+
secondCalls.push({
|
|
174
|
+
upserts: request.upsert_files.map((file) => file.path).sort(),
|
|
175
|
+
deleted: [...request.deleted_paths].sort(),
|
|
176
|
+
base: request.base_index_version
|
|
177
|
+
});
|
|
178
|
+
return {
|
|
179
|
+
workspace_id: "ws-1",
|
|
180
|
+
index_version: "idx-v2"
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
expect(second.changed).toBe(true);
|
|
186
|
+
expect(second.applied_delta).toEqual({ upsert_files: 2, deleted_paths: 1 });
|
|
187
|
+
expect(secondCalls).toHaveLength(1);
|
|
188
|
+
expect(secondCalls[0]?.upserts).toEqual(["src/a.ts", "src/c.ts"]);
|
|
189
|
+
expect(secondCalls[0]?.deleted).toEqual(["src/b.ts"]);
|
|
190
|
+
expect(secondCalls[0]?.base).toBe("idx-v1");
|
|
191
|
+
expect(Object.keys(second.state.files).sort()).toEqual(["src/a.ts", "src/c.ts"]);
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it("returns unchanged state without remote call when no files changed", async () => {
|
|
195
|
+
const root = await createProject();
|
|
196
|
+
await writeFile(join(root, "src", "stable.ts"), "export const STABLE = true;\n");
|
|
197
|
+
|
|
198
|
+
const initial = await buildRemoteSyncDeltaFromState({
|
|
199
|
+
project_root_path: root,
|
|
200
|
+
force_full_upsert: true
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
const previousState: RemoteSyncStateFile = {
|
|
204
|
+
mode: "remote_delta_v1",
|
|
205
|
+
workspace_id: "ws-1",
|
|
206
|
+
last_index_version: "idx-v1",
|
|
207
|
+
files: initial.next_files,
|
|
208
|
+
updated_at: new Date().toISOString()
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
let called = false;
|
|
212
|
+
const result = await runRemoteDeltaSync({
|
|
213
|
+
project_root_path: root,
|
|
214
|
+
previous_state: previousState,
|
|
215
|
+
workspace_id: "ws-1",
|
|
216
|
+
max_body_bytes: 1024 * 1024,
|
|
217
|
+
push_delta: async () => {
|
|
218
|
+
called = true;
|
|
219
|
+
return {
|
|
220
|
+
workspace_id: "ws-1",
|
|
221
|
+
index_version: "idx-v2"
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
expect(called).toBe(false);
|
|
227
|
+
expect(result.changed).toBe(false);
|
|
228
|
+
expect(result.index_version).toBe("idx-v1");
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it("retries once with full-upsert when base index is stale", async () => {
|
|
232
|
+
const root = await createProject();
|
|
233
|
+
await writeFile(join(root, "src", "retry.ts"), "export const RETRY = 1;\n");
|
|
234
|
+
|
|
235
|
+
const previousBuild = await buildRemoteSyncDeltaFromState({
|
|
236
|
+
project_root_path: root,
|
|
237
|
+
force_full_upsert: true
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
const previousState: RemoteSyncStateFile = {
|
|
241
|
+
mode: "remote_delta_v1",
|
|
242
|
+
workspace_id: "ws-1",
|
|
243
|
+
last_index_version: "idx-stale",
|
|
244
|
+
files: previousBuild.next_files,
|
|
245
|
+
updated_at: new Date().toISOString()
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
await new Promise((resolveSleep) => setTimeout(resolveSleep, 5));
|
|
249
|
+
await writeFile(join(root, "src", "retry.ts"), "export const RETRY = 2;\n");
|
|
250
|
+
|
|
251
|
+
const bases: Array<string | undefined> = [];
|
|
252
|
+
let callCount = 0;
|
|
253
|
+
const result = await runRemoteDeltaSync({
|
|
254
|
+
project_root_path: root,
|
|
255
|
+
previous_state: previousState,
|
|
256
|
+
workspace_id: "ws-1",
|
|
257
|
+
max_body_bytes: 1024 * 1024,
|
|
258
|
+
retries: 1,
|
|
259
|
+
push_delta: async (request) => {
|
|
260
|
+
callCount += 1;
|
|
261
|
+
bases.push(request.base_index_version);
|
|
262
|
+
if (callCount === 1) {
|
|
263
|
+
throw new RemoteSyncHttpResponseError("INVALID_ARGUMENT: base index version is stale", 400);
|
|
264
|
+
}
|
|
265
|
+
return {
|
|
266
|
+
workspace_id: "ws-1",
|
|
267
|
+
index_version: "idx-v2"
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
expect(callCount).toBe(2);
|
|
273
|
+
expect(bases[0]).toBe("idx-stale");
|
|
274
|
+
expect(bases[1]).toBeUndefined();
|
|
275
|
+
expect(result.index_version).toBe("idx-v2");
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
it("uses blob_commit_v2 protocol when advertised and falls back on unsupported endpoint", async () => {
|
|
279
|
+
const root = await createProject();
|
|
280
|
+
await writeFile(join(root, "src", "adaptive.ts"), "export const ADAPTIVE = 1;\n");
|
|
281
|
+
|
|
282
|
+
const deltaCalls: Array<{ upserts: number; deletes: number }> = [];
|
|
283
|
+
const v2Result = await runRemoteAdaptiveSync({
|
|
284
|
+
project_root_path: root,
|
|
285
|
+
workspace_id: "ws-1",
|
|
286
|
+
capabilities: {
|
|
287
|
+
max_body_bytes: 1024 * 1024,
|
|
288
|
+
sync_protocols: ["delta_v1", "blob_commit_v2"],
|
|
289
|
+
max_blob_bytes: 128 * 1024,
|
|
290
|
+
max_blob_batch_bytes: 512 * 1024,
|
|
291
|
+
max_commit_body_bytes: 512 * 1024,
|
|
292
|
+
upload_concurrency_hint: 2
|
|
293
|
+
},
|
|
294
|
+
push_delta: async (request) => {
|
|
295
|
+
deltaCalls.push({
|
|
296
|
+
upserts: request.upsert_files.length,
|
|
297
|
+
deletes: request.deleted_paths.length
|
|
298
|
+
});
|
|
299
|
+
return {
|
|
300
|
+
workspace_id: "ws-1",
|
|
301
|
+
index_version: "idx-delta"
|
|
302
|
+
};
|
|
303
|
+
},
|
|
304
|
+
upload_blobs: async (request) => ({
|
|
305
|
+
accepted_hashes: request.blobs.map((blob) => blob.hash),
|
|
306
|
+
already_present_hashes: [],
|
|
307
|
+
rejected: []
|
|
308
|
+
}),
|
|
309
|
+
commit_v2: async () => ({
|
|
310
|
+
workspace_id: "ws-1",
|
|
311
|
+
index_version: "idx-v2"
|
|
312
|
+
})
|
|
313
|
+
});
|
|
314
|
+
expect(v2Result.protocol).toBe("blob_commit_v2");
|
|
315
|
+
expect(v2Result.index_version).toBe("idx-v2");
|
|
316
|
+
expect(deltaCalls).toHaveLength(0);
|
|
317
|
+
|
|
318
|
+
await new Promise((resolveSleep) => setTimeout(resolveSleep, 5));
|
|
319
|
+
await writeFile(join(root, "src", "adaptive.ts"), "export const ADAPTIVE = 2;\n");
|
|
320
|
+
|
|
321
|
+
const fallbackResult = await runRemoteAdaptiveSync({
|
|
322
|
+
project_root_path: root,
|
|
323
|
+
workspace_id: "ws-1",
|
|
324
|
+
previous_state: v2Result.state,
|
|
325
|
+
capabilities: {
|
|
326
|
+
max_body_bytes: 1024 * 1024,
|
|
327
|
+
sync_protocols: ["delta_v1", "blob_commit_v2"]
|
|
328
|
+
},
|
|
329
|
+
push_delta: async (request) => {
|
|
330
|
+
deltaCalls.push({
|
|
331
|
+
upserts: request.upsert_files.length,
|
|
332
|
+
deletes: request.deleted_paths.length
|
|
333
|
+
});
|
|
334
|
+
return {
|
|
335
|
+
workspace_id: "ws-1",
|
|
336
|
+
index_version: "idx-delta-fallback"
|
|
337
|
+
};
|
|
338
|
+
},
|
|
339
|
+
upload_blobs: async () => {
|
|
340
|
+
throw new RemoteSyncHttpResponseError("NOT_FOUND: /v1/sync/blobs/upload", 404);
|
|
341
|
+
},
|
|
342
|
+
commit_v2: async () => ({
|
|
343
|
+
workspace_id: "ws-1",
|
|
344
|
+
index_version: "idx-ignored"
|
|
345
|
+
})
|
|
346
|
+
});
|
|
347
|
+
expect(fallbackResult.protocol).toBe("delta_v1");
|
|
348
|
+
expect(deltaCalls.length).toBeGreaterThan(0);
|
|
349
|
+
});
|
|
350
|
+
});
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
__applySmartCutoffCandidatesForTests,
|
|
4
|
+
mergeRetrievalScoringConfig,
|
|
5
|
+
resolveRetrievalScoringProfile
|
|
6
|
+
} from "../src/index.js";
|
|
7
|
+
|
|
8
|
+
function sampleCandidates(scores: number[]) {
|
|
9
|
+
return scores.map((score, index) => ({
|
|
10
|
+
path: `src/candidate-${index + 1}.ts`,
|
|
11
|
+
start_line: index * 10 + 1,
|
|
12
|
+
end_line: index * 10 + 5,
|
|
13
|
+
snippet: `export const candidate${index + 1} = ${score};`,
|
|
14
|
+
score,
|
|
15
|
+
reason: "semantic match" as const
|
|
16
|
+
}));
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
describe("retrieval smart cutoff", () => {
|
|
20
|
+
it("applies delta and ratio guards after the safe-harbor minimum", () => {
|
|
21
|
+
const baseline = resolveRetrievalScoringProfile("baseline");
|
|
22
|
+
const config = mergeRetrievalScoringConfig(baseline.config, {
|
|
23
|
+
rerank: {
|
|
24
|
+
smart_cutoff_enabled: true,
|
|
25
|
+
smart_cutoff_min_k: 2,
|
|
26
|
+
smart_cutoff_max_k: 8,
|
|
27
|
+
smart_cutoff_min_score: 0.25,
|
|
28
|
+
smart_cutoff_top_ratio: 0.5,
|
|
29
|
+
smart_cutoff_delta_abs: 0.2
|
|
30
|
+
}
|
|
31
|
+
}).rerank;
|
|
32
|
+
|
|
33
|
+
const output = __applySmartCutoffCandidatesForTests({
|
|
34
|
+
config,
|
|
35
|
+
candidates: sampleCandidates([0.92, 0.88, 0.56, 0.55, 0.41])
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
expect(output).toHaveLength(2);
|
|
39
|
+
expect(output[0]?.score).toBe(0.92);
|
|
40
|
+
expect(output[1]?.score).toBe(0.88);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("keeps at least min_k results even when score floors are not met", () => {
|
|
44
|
+
const baseline = resolveRetrievalScoringProfile("baseline");
|
|
45
|
+
const config = mergeRetrievalScoringConfig(baseline.config, {
|
|
46
|
+
rerank: {
|
|
47
|
+
smart_cutoff_enabled: true,
|
|
48
|
+
smart_cutoff_min_k: 2,
|
|
49
|
+
smart_cutoff_max_k: 8,
|
|
50
|
+
smart_cutoff_min_score: 0.8,
|
|
51
|
+
smart_cutoff_top_ratio: 0.95,
|
|
52
|
+
smart_cutoff_delta_abs: 0.01
|
|
53
|
+
}
|
|
54
|
+
}).rerank;
|
|
55
|
+
|
|
56
|
+
const output = __applySmartCutoffCandidatesForTests({
|
|
57
|
+
config,
|
|
58
|
+
candidates: sampleCandidates([0.91, 0.31, 0.3, 0.29])
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
expect(output).toHaveLength(2);
|
|
62
|
+
expect(output[0]?.score).toBe(0.91);
|
|
63
|
+
expect(output[1]?.score).toBe(0.31);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it("rejects invalid smart cutoff ranges in scoring config", () => {
|
|
67
|
+
const baseline = resolveRetrievalScoringProfile("baseline");
|
|
68
|
+
|
|
69
|
+
expect(() =>
|
|
70
|
+
mergeRetrievalScoringConfig(baseline.config, {
|
|
71
|
+
rerank: {
|
|
72
|
+
smart_cutoff_min_k: 3,
|
|
73
|
+
smart_cutoff_max_k: 2
|
|
74
|
+
}
|
|
75
|
+
})
|
|
76
|
+
).toThrow(/smart_cutoff_max_k must be >= smart_cutoff_min_k/);
|
|
77
|
+
|
|
78
|
+
expect(() =>
|
|
79
|
+
mergeRetrievalScoringConfig(baseline.config, {
|
|
80
|
+
rerank: {
|
|
81
|
+
smart_cutoff_top_ratio: 0
|
|
82
|
+
}
|
|
83
|
+
})
|
|
84
|
+
).toThrow(/smart_cutoff_top_ratio must be in \(0, 1\]/);
|
|
85
|
+
});
|
|
86
|
+
});
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG,
|
|
4
|
+
mergeRetrievalSnippetIntegrityConfig
|
|
5
|
+
} from "../src/index.js";
|
|
6
|
+
|
|
7
|
+
describe("retrieval-core snippet integrity config validation", () => {
|
|
8
|
+
it("uses dark-launch defaults", () => {
|
|
9
|
+
expect(DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG).toMatchObject({
|
|
10
|
+
enabled: false,
|
|
11
|
+
target_languages: ["typescript", "tsx", "javascript", "jsx"],
|
|
12
|
+
max_contiguous_gap_lines: 6,
|
|
13
|
+
marker_template_version: "v1",
|
|
14
|
+
repair_enabled: false,
|
|
15
|
+
repair_max_envelope_lines: 260,
|
|
16
|
+
repair_max_snippet_chars: 3600
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("accepts valid overrides and normalizes aliases", () => {
|
|
21
|
+
const merged = mergeRetrievalSnippetIntegrityConfig(DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG, {
|
|
22
|
+
enabled: true,
|
|
23
|
+
target_languages: ["ts", "js", "tsx"],
|
|
24
|
+
max_contiguous_gap_lines: 10,
|
|
25
|
+
repair_enabled: true,
|
|
26
|
+
repair_max_envelope_lines: 400,
|
|
27
|
+
repair_max_snippet_chars: 4200
|
|
28
|
+
});
|
|
29
|
+
expect(merged.enabled).toBe(true);
|
|
30
|
+
expect(merged.target_languages).toEqual(["typescript", "javascript", "tsx"]);
|
|
31
|
+
expect(merged.max_contiguous_gap_lines).toBe(10);
|
|
32
|
+
expect(merged.marker_template_version).toBe("v1");
|
|
33
|
+
expect(merged.repair_enabled).toBe(true);
|
|
34
|
+
expect(merged.repair_max_envelope_lines).toBe(400);
|
|
35
|
+
expect(merged.repair_max_snippet_chars).toBe(4200);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it("rejects unsupported target languages", () => {
|
|
39
|
+
expect(() =>
|
|
40
|
+
mergeRetrievalSnippetIntegrityConfig(DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG, {
|
|
41
|
+
target_languages: ["python"]
|
|
42
|
+
})
|
|
43
|
+
).toThrow(/unsupported target language/);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("rejects invalid repair caps", () => {
|
|
47
|
+
expect(() =>
|
|
48
|
+
mergeRetrievalSnippetIntegrityConfig(DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG, {
|
|
49
|
+
repair_max_envelope_lines: 0
|
|
50
|
+
})
|
|
51
|
+
).toThrow(/repair_max_envelope_lines must be a positive integer/);
|
|
52
|
+
|
|
53
|
+
expect(() =>
|
|
54
|
+
mergeRetrievalSnippetIntegrityConfig(DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG, {
|
|
55
|
+
repair_max_snippet_chars: -1
|
|
56
|
+
})
|
|
57
|
+
).toThrow(/repair_max_snippet_chars must be a positive integer/);
|
|
58
|
+
});
|
|
59
|
+
});
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "../../tsconfig.base.json",
|
|
3
|
+
"compilerOptions": {
|
|
4
|
+
"composite": true,
|
|
5
|
+
"declaration": true,
|
|
6
|
+
"declarationMap": false,
|
|
7
|
+
"rootDir": "src",
|
|
8
|
+
"outDir": "dist",
|
|
9
|
+
"tsBuildInfoFile": "dist/.tsbuildinfo"
|
|
10
|
+
},
|
|
11
|
+
"include": ["src/**/*.ts"],
|
|
12
|
+
"references": [
|
|
13
|
+
{ "path": "../contracts/tsconfig.build.json" },
|
|
14
|
+
{ "path": "../data-plane/tsconfig.build.json" },
|
|
15
|
+
{ "path": "../observability/tsconfig.build.json" }
|
|
16
|
+
]
|
|
17
|
+
}
|
package/tsconfig.json
ADDED