@cue-dev/retrieval-core 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +27 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/chunking.d.ts +64 -0
- package/dist/chunking.js +983 -0
- package/dist/index.d.ts +673 -0
- package/dist/index.js +6605 -0
- package/dist/indexing-ignore.d.ts +9 -0
- package/dist/indexing-ignore.js +151 -0
- package/dist/remote-sync.d.ts +193 -0
- package/dist/remote-sync.js +816 -0
- package/package.json +37 -0
- package/scripts/poc-node-parser-host.cjs +105 -0
- package/scripts/poc-parser-availability-benchmark.ts +338 -0
- package/src/chunking.ts +1187 -0
- package/src/index.ts +8338 -0
- package/src/indexing-ignore.ts +179 -0
- package/src/remote-sync.ts +1119 -0
- package/test/benchmark.thresholds.test.ts +815 -0
- package/test/chunking.config.test.ts +84 -0
- package/test/chunking.language-aware.test.ts +1248 -0
- package/test/chunking.parser-availability.poc.test.ts +86 -0
- package/test/claude-agent-provider.test.ts +209 -0
- package/test/embedding-context-prefix.test.ts +101 -0
- package/test/embedding-provider.test.ts +570 -0
- package/test/enhance-confidence.test.ts +752 -0
- package/test/index-prep.concurrency.regression.test.ts +142 -0
- package/test/integration.test.ts +508 -0
- package/test/local-sqlite.integration.test.ts +258 -0
- package/test/mcp-search-quality.regression.test.ts +1358 -0
- package/test/remote-sync.integration.test.ts +350 -0
- package/test/smart-cutoff.config.test.ts +86 -0
- package/test/snippet-integrity.config.test.ts +59 -0
- package/tsconfig.build.json +17 -0
- package/tsconfig.json +4 -0
|
@@ -0,0 +1,1358 @@
|
|
|
1
|
+
import { mkdtemp, rm } from "node:fs/promises";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
5
|
+
import { SearchContextOutputSchema } from "@cue-dev/contracts";
|
|
6
|
+
import {
|
|
7
|
+
InMemoryQueryCache,
|
|
8
|
+
SqliteIndexRepository,
|
|
9
|
+
SqliteQueryCache,
|
|
10
|
+
type IndexRepository,
|
|
11
|
+
type RankedChunkCandidate
|
|
12
|
+
} from "@cue-dev/data-plane";
|
|
13
|
+
import { RetrievalCore } from "../src/index.js";
|
|
14
|
+
|
|
15
|
+
function firstRank(results: Array<{ path: string }>, path: string): number {
|
|
16
|
+
const index = results.findIndex((row) => row.path === path);
|
|
17
|
+
return index >= 0 ? index + 1 : Number.POSITIVE_INFINITY;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function markdownCount(results: Array<{ path: string }>): number {
|
|
21
|
+
return results.filter((row) => row.path.toLowerCase().endsWith(".md")).length;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function buildLongCircuitBreakerFixture(): string {
|
|
25
|
+
const lines: string[] = [
|
|
26
|
+
"export interface RiskState {",
|
|
27
|
+
" accountId: string;",
|
|
28
|
+
" pnlSeries: number[];",
|
|
29
|
+
" window: number[];",
|
|
30
|
+
"}",
|
|
31
|
+
"",
|
|
32
|
+
"const HARD_DRAWDOWN_LIMIT_BPS = 650;",
|
|
33
|
+
"const RISK_ACCUMULATOR_LIMIT = 25_000;",
|
|
34
|
+
"",
|
|
35
|
+
"function calculateDrawdownBps(values: number[], checksum: number): number {",
|
|
36
|
+
" return values.length > 0 ? Math.max(...values) + (checksum % 7) : checksum % 7;",
|
|
37
|
+
"}",
|
|
38
|
+
"",
|
|
39
|
+
"function captureWindowChecksum(values: number[]): number {",
|
|
40
|
+
" return values.reduce((sum, value) => sum + value, 0);",
|
|
41
|
+
"}",
|
|
42
|
+
"",
|
|
43
|
+
"function publishTripEvent(input: { reason: string; drawdownBps: number; riskAccumulator: number }) {",
|
|
44
|
+
" return { eventId: `${input.reason}-${input.drawdownBps}-${input.riskAccumulator}` };",
|
|
45
|
+
"}",
|
|
46
|
+
"",
|
|
47
|
+
"function freezeOrderEntry(accountId: string, eventId: string): void {",
|
|
48
|
+
" void accountId;",
|
|
49
|
+
" void eventId;",
|
|
50
|
+
"}",
|
|
51
|
+
"",
|
|
52
|
+
"function recordTripAuditLog(accountId: string, eventId: string, checksum: number): string {",
|
|
53
|
+
" void accountId;",
|
|
54
|
+
" void eventId;",
|
|
55
|
+
" return `${eventId}-${checksum}`;",
|
|
56
|
+
"}",
|
|
57
|
+
"",
|
|
58
|
+
"export function evaluateCircuitBreaker(state: RiskState): boolean {",
|
|
59
|
+
" const startWindowChecksum = captureWindowChecksum(state.window);",
|
|
60
|
+
" const drawdownBps = calculateDrawdownBps(state.pnlSeries, startWindowChecksum);",
|
|
61
|
+
" const circuitActivationSeed = state.window.length + state.accountId.length;",
|
|
62
|
+
" let riskAccumulator = 0;"
|
|
63
|
+
];
|
|
64
|
+
for (let i = 0; i < 35; i += 1) {
|
|
65
|
+
lines.push(` riskAccumulator += state.window[${i}] ?? 0;`);
|
|
66
|
+
}
|
|
67
|
+
lines.push(
|
|
68
|
+
" const shouldTrip = drawdownBps >= HARD_DRAWDOWN_LIMIT_BPS || riskAccumulator >= RISK_ACCUMULATOR_LIMIT;",
|
|
69
|
+
" if (!shouldTrip) {",
|
|
70
|
+
" return false;",
|
|
71
|
+
" }",
|
|
72
|
+
" const tripEvent = publishTripEvent({",
|
|
73
|
+
" reason: 'hard-drawdown',",
|
|
74
|
+
" drawdownBps,",
|
|
75
|
+
" riskAccumulator",
|
|
76
|
+
" });",
|
|
77
|
+
" const tripAuditDigest = recordTripAuditLog(state.accountId, tripEvent.eventId, startWindowChecksum);",
|
|
78
|
+
" freezeOrderEntry(state.accountId, tripAuditDigest);",
|
|
79
|
+
" return true;",
|
|
80
|
+
"}"
|
|
81
|
+
);
|
|
82
|
+
return lines.join("\n");
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function buildOverlapHotspotFixture(): string {
|
|
86
|
+
const lines: string[] = [
|
|
87
|
+
"export function overlapMergeHotspot(seed: number): number {",
|
|
88
|
+
" let total = seed;"
|
|
89
|
+
];
|
|
90
|
+
for (let i = 0; i < 40; i += 1) {
|
|
91
|
+
lines.push(` total += overlapMergeSignal(seed, ${i}); // overlap chunk merge hotspot coverage`);
|
|
92
|
+
}
|
|
93
|
+
lines.push(
|
|
94
|
+
" return total;",
|
|
95
|
+
"}",
|
|
96
|
+
"",
|
|
97
|
+
"function overlapMergeSignal(seed: number, step: number): number {",
|
|
98
|
+
" return seed + step;",
|
|
99
|
+
"}"
|
|
100
|
+
);
|
|
101
|
+
return lines.join("\n");
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function buildLargeOverlapPressureFixture(): string {
|
|
105
|
+
const lines: string[] = [
|
|
106
|
+
"export function overlapPressureSignal(seed: number): number {",
|
|
107
|
+
" let total = seed;"
|
|
108
|
+
];
|
|
109
|
+
for (let i = 0; i < 220; i += 1) {
|
|
110
|
+
lines.push(
|
|
111
|
+
` total += overlapPressureStep(seed, ${i}); // overlap pressure signal token cluster for chunk overlap diagnostics`
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
lines.push(
|
|
115
|
+
" return total;",
|
|
116
|
+
"}",
|
|
117
|
+
"",
|
|
118
|
+
"function overlapPressureStep(seed: number, step: number): number {",
|
|
119
|
+
" return seed + step;",
|
|
120
|
+
"}"
|
|
121
|
+
);
|
|
122
|
+
return lines.join("\n");
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
describe("mcp search quality regressions", () => {
|
|
126
|
+
const dirs: string[] = [];
|
|
127
|
+
|
|
128
|
+
afterEach(async () => {
|
|
129
|
+
while (dirs.length > 0) {
|
|
130
|
+
const dir = dirs.pop();
|
|
131
|
+
if (dir) {
|
|
132
|
+
await rm(dir, { recursive: true, force: true });
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it("keeps MCP enforcement/config targets in top results and suppresses guidance noise", async () => {
|
|
138
|
+
const root = await mkdtemp(join(tmpdir(), "cue-mcp-quality-"));
|
|
139
|
+
dirs.push(root);
|
|
140
|
+
const sqlitePath = join(root, "mcp-quality.sqlite");
|
|
141
|
+
|
|
142
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
143
|
+
await repo.migrate();
|
|
144
|
+
await repo.upsertWorkspace({
|
|
145
|
+
workspace_id: "ws-mcp",
|
|
146
|
+
tenant_id: "tenant-mcp",
|
|
147
|
+
name: "mcp",
|
|
148
|
+
project_root_path: "/workspace/mcp"
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
const cache = new SqliteQueryCache(sqlitePath);
|
|
152
|
+
const core = new RetrievalCore(repo, cache);
|
|
153
|
+
|
|
154
|
+
try {
|
|
155
|
+
await core.indexArtifact({
|
|
156
|
+
tenant_id: "tenant-mcp",
|
|
157
|
+
workspace_id: "ws-mcp",
|
|
158
|
+
index_version: "idx-mcp-v1",
|
|
159
|
+
files: [
|
|
160
|
+
{
|
|
161
|
+
path: "apps/mcp-api/src/server.ts",
|
|
162
|
+
language: "typescript",
|
|
163
|
+
content: `
|
|
164
|
+
export function resolveMcpAuthContext() {}
|
|
165
|
+
export function assertSessionMatchesAuth() {}
|
|
166
|
+
export function enforceMcpTenantBinding() {}
|
|
167
|
+
export function streamableHttpMcpRoute() {}
|
|
168
|
+
`
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
path: "apps/mcp-api/src/state.ts",
|
|
172
|
+
language: "typescript",
|
|
173
|
+
content: `
|
|
174
|
+
export const CUE_MCP_TOKEN_BINDINGS_JSON = "CUE_MCP_TOKEN_BINDINGS_JSON";
|
|
175
|
+
export function parseMcpTokenBindingsJson(raw: string) { return raw; }
|
|
176
|
+
export function resolveMcpAuthConfigFromEnv() { return { tenant_id: "t", workspace_id: "w", project_root_path: "/p" }; }
|
|
177
|
+
`
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
path: "apps/mcp-api/src/mcp-stdio.ts",
|
|
181
|
+
language: "typescript",
|
|
182
|
+
content: `
|
|
183
|
+
export function bootstrapRemoteProxy() {}
|
|
184
|
+
export function runRemoteMcpStdioServer() {}
|
|
185
|
+
export const REMOTE_BINDING_FIELDS = ["workspace_id", "project_root_path"];
|
|
186
|
+
`
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
path: "apps/mcp-api/src/mcp-tool-guidance.ts",
|
|
190
|
+
language: "typescript",
|
|
191
|
+
content: `
|
|
192
|
+
// guidance-only text containing overlapping auth/config terms
|
|
193
|
+
export const GUIDE = "tenant_id workspace_id project_root_path token binding mcp security config";
|
|
194
|
+
`
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
path: "apps/mcp-api/src/tools/search-context.ts",
|
|
198
|
+
language: "typescript",
|
|
199
|
+
content: "export function searchContextTool() { return 'tool'; }"
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
path: "apps/mcp-api/src/tools/enhance-prompt.ts",
|
|
203
|
+
language: "typescript",
|
|
204
|
+
content: "export function enhancePromptTool() { return 'tool'; }"
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
path: "apps/mcp-api/src/docs/reference.md",
|
|
208
|
+
language: "markdown",
|
|
209
|
+
content: "MCP docs reference for tokens, tenant_id, workspace_id, and project_root_path."
|
|
210
|
+
}
|
|
211
|
+
]
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
const enforcement = await core.searchContext({
|
|
215
|
+
trace_id: "trc-mcp-enforcement",
|
|
216
|
+
tenant_id: "tenant-mcp",
|
|
217
|
+
workspace_id: "ws-mcp",
|
|
218
|
+
request: {
|
|
219
|
+
project_root_path: "/workspace/mcp",
|
|
220
|
+
query: "resolveMcpAuthContext enforceMcpTenantBinding assertSessionMatchesAuth streamable HTTP /mcp",
|
|
221
|
+
top_k: 8,
|
|
222
|
+
filters: { path_prefix: "apps/mcp-api/src" }
|
|
223
|
+
}
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
const config = await core.searchContext({
|
|
227
|
+
trace_id: "trc-mcp-config",
|
|
228
|
+
tenant_id: "tenant-mcp",
|
|
229
|
+
workspace_id: "ws-mcp",
|
|
230
|
+
request: {
|
|
231
|
+
project_root_path: "/workspace/mcp",
|
|
232
|
+
query: "CUE_MCP_TOKEN_BINDINGS_JSON parse token bindings tenant_id workspace_id project_root_path",
|
|
233
|
+
top_k: 6,
|
|
234
|
+
filters: { path_prefix: "apps/mcp-api/src" }
|
|
235
|
+
}
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
const stdio = await core.searchContext({
|
|
239
|
+
trace_id: "trc-mcp-stdio",
|
|
240
|
+
tenant_id: "tenant-mcp",
|
|
241
|
+
workspace_id: "ws-mcp",
|
|
242
|
+
request: {
|
|
243
|
+
project_root_path: "/workspace/mcp",
|
|
244
|
+
query: "mcp-stdio remote bootstrap token binding workspace_id project_root_path",
|
|
245
|
+
top_k: 6,
|
|
246
|
+
filters: { path_prefix: "apps/mcp-api/src" }
|
|
247
|
+
}
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
const enforcementTarget = firstRank(enforcement.results, "apps/mcp-api/src/server.ts");
|
|
251
|
+
const configTarget = firstRank(config.results, "apps/mcp-api/src/state.ts");
|
|
252
|
+
const stdioTarget = firstRank(stdio.results, "apps/mcp-api/src/mcp-stdio.ts");
|
|
253
|
+
const enforcementGuidance = firstRank(enforcement.results, "apps/mcp-api/src/mcp-tool-guidance.ts");
|
|
254
|
+
const configGuidance = firstRank(config.results, "apps/mcp-api/src/mcp-tool-guidance.ts");
|
|
255
|
+
|
|
256
|
+
expect(enforcementTarget).toBeLessThanOrEqual(3);
|
|
257
|
+
expect(configTarget).toBeLessThanOrEqual(3);
|
|
258
|
+
expect(stdioTarget).toBeLessThanOrEqual(3);
|
|
259
|
+
expect(enforcementGuidance).toBeGreaterThan(enforcementTarget);
|
|
260
|
+
expect(configGuidance).toBeGreaterThan(configTarget);
|
|
261
|
+
} finally {
|
|
262
|
+
cache.close();
|
|
263
|
+
repo.close();
|
|
264
|
+
}
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
it("prioritizes architecture entrypoints and limits markdown saturation for architecture queries", async () => {
|
|
268
|
+
const root = await mkdtemp(join(tmpdir(), "cue-architecture-ranking-"));
|
|
269
|
+
dirs.push(root);
|
|
270
|
+
const sqlitePath = join(root, "architecture-ranking.sqlite");
|
|
271
|
+
|
|
272
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
273
|
+
await repo.migrate();
|
|
274
|
+
await repo.upsertWorkspace({
|
|
275
|
+
workspace_id: "ws-architecture",
|
|
276
|
+
tenant_id: "tenant-architecture",
|
|
277
|
+
name: "architecture",
|
|
278
|
+
project_root_path: "/workspace/architecture"
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
const cache = new SqliteQueryCache(sqlitePath);
|
|
282
|
+
const core = new RetrievalCore(repo, cache);
|
|
283
|
+
|
|
284
|
+
try {
|
|
285
|
+
await core.indexArtifact({
|
|
286
|
+
tenant_id: "tenant-architecture",
|
|
287
|
+
workspace_id: "ws-architecture",
|
|
288
|
+
index_version: "idx-architecture-v1",
|
|
289
|
+
files: [
|
|
290
|
+
{
|
|
291
|
+
path: "apps/api/src/index.ts",
|
|
292
|
+
language: "typescript",
|
|
293
|
+
content:
|
|
294
|
+
"export function bootstrapApiEntrypoint() {}\nexport function mountRoutesAndMiddleware() {}\n"
|
|
295
|
+
},
|
|
296
|
+
{
|
|
297
|
+
path: "packages/oracle/src/index.ts",
|
|
298
|
+
language: "typescript",
|
|
299
|
+
content: "export function startOracleCycle() {}\nexport function buildOracleModules() {}\n"
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
path: "packages/quant/src/index.ts",
|
|
303
|
+
language: "typescript",
|
|
304
|
+
content: "export function startQuantEngine() {}\nexport function createQuantModules() {}\n"
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
path: "packages/shared/src/index.ts",
|
|
308
|
+
language: "typescript",
|
|
309
|
+
content: "export * from './risk';\nexport * from './db';\n"
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
path: "apps/dashboard/src/app/layout.tsx",
|
|
313
|
+
language: "tsx",
|
|
314
|
+
content: "export default function RootLayout({ children }) { return children; }\n"
|
|
315
|
+
},
|
|
316
|
+
{
|
|
317
|
+
path: "packages/db/src/index.ts",
|
|
318
|
+
language: "typescript",
|
|
319
|
+
content: "export function createDbClient() {}\n"
|
|
320
|
+
},
|
|
321
|
+
{
|
|
322
|
+
path: "turbo.json",
|
|
323
|
+
language: "json",
|
|
324
|
+
content: "{\"pipeline\":{\"build\":{\"dependsOn\":[\"^build\"]}}}\n"
|
|
325
|
+
},
|
|
326
|
+
{
|
|
327
|
+
path: "package.json",
|
|
328
|
+
language: "json",
|
|
329
|
+
content: "{\"name\":\"architecture-monorepo\",\"workspaces\":[\"apps/*\",\"packages/*\"]}\n"
|
|
330
|
+
},
|
|
331
|
+
{
|
|
332
|
+
path: "docs/README.md",
|
|
333
|
+
language: "markdown",
|
|
334
|
+
content:
|
|
335
|
+
"Architecture overview and trading system modules with entry points and structure documentation."
|
|
336
|
+
},
|
|
337
|
+
{
|
|
338
|
+
path: "docs/02-architecture-overview.md",
|
|
339
|
+
language: "markdown",
|
|
340
|
+
content: "System architecture overview describing modules, entry points, and organization."
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
path: "docs/oracle-arch-review.md",
|
|
344
|
+
language: "markdown",
|
|
345
|
+
content: "Architecture review for oracle modules and architecture evolution."
|
|
346
|
+
},
|
|
347
|
+
{
|
|
348
|
+
path: "docs/05-shared-infra.md",
|
|
349
|
+
language: "markdown",
|
|
350
|
+
content: "Shared infrastructure architecture module layout and architecture notes."
|
|
351
|
+
}
|
|
352
|
+
]
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
const result = await core.searchContext({
|
|
356
|
+
trace_id: "trc-architecture-query",
|
|
357
|
+
tenant_id: "tenant-architecture",
|
|
358
|
+
workspace_id: "ws-architecture",
|
|
359
|
+
request: {
|
|
360
|
+
project_root_path: "/workspace/architecture",
|
|
361
|
+
query: "How is the trading system architecture organized? What are the main modules and entry points?",
|
|
362
|
+
top_k: 8
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
const mdCount = markdownCount(result.results);
|
|
367
|
+
const codeCount = result.results.length - mdCount;
|
|
368
|
+
const structuralTargets = [
|
|
369
|
+
"apps/api/src/index.ts",
|
|
370
|
+
"packages/oracle/src/index.ts",
|
|
371
|
+
"packages/quant/src/index.ts",
|
|
372
|
+
"packages/shared/src/index.ts",
|
|
373
|
+
"apps/dashboard/src/app/layout.tsx",
|
|
374
|
+
"packages/db/src/index.ts",
|
|
375
|
+
"turbo.json",
|
|
376
|
+
"package.json"
|
|
377
|
+
];
|
|
378
|
+
const structuralHits = structuralTargets.filter((path) => firstRank(result.results, path) <= 8).length;
|
|
379
|
+
|
|
380
|
+
expect(mdCount).toBeLessThanOrEqual(2);
|
|
381
|
+
expect(codeCount).toBeGreaterThanOrEqual(5);
|
|
382
|
+
expect(firstRank(result.results, "apps/api/src/index.ts")).toBeLessThanOrEqual(4);
|
|
383
|
+
expect(structuralHits).toBeGreaterThanOrEqual(5);
|
|
384
|
+
} finally {
|
|
385
|
+
cache.close();
|
|
386
|
+
repo.close();
|
|
387
|
+
}
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
it("keeps enhancer architecture context refs code-forward with markdown diversity cap", async () => {
|
|
391
|
+
const root = await mkdtemp(join(tmpdir(), "cue-enhancer-architecture-"));
|
|
392
|
+
dirs.push(root);
|
|
393
|
+
const sqlitePath = join(root, "enhancer-architecture.sqlite");
|
|
394
|
+
|
|
395
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
396
|
+
await repo.migrate();
|
|
397
|
+
await repo.upsertWorkspace({
|
|
398
|
+
workspace_id: "ws-enhancer-architecture",
|
|
399
|
+
tenant_id: "tenant-enhancer-architecture",
|
|
400
|
+
name: "enhancer-architecture",
|
|
401
|
+
project_root_path: "/workspace/enhancer-architecture"
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
const cache = new SqliteQueryCache(sqlitePath);
|
|
405
|
+
const core = new RetrievalCore(repo, cache);
|
|
406
|
+
|
|
407
|
+
try {
|
|
408
|
+
await core.indexArtifact({
|
|
409
|
+
tenant_id: "tenant-enhancer-architecture",
|
|
410
|
+
workspace_id: "ws-enhancer-architecture",
|
|
411
|
+
index_version: "idx-enhancer-architecture-v1",
|
|
412
|
+
files: [
|
|
413
|
+
{
|
|
414
|
+
path: "apps/api/src/index.ts",
|
|
415
|
+
language: "typescript",
|
|
416
|
+
content:
|
|
417
|
+
"export function bootstrapApiEntrypoint() {}\nexport function mountRoutesAndMiddleware() {}\n"
|
|
418
|
+
},
|
|
419
|
+
{
|
|
420
|
+
path: "packages/oracle/src/index.ts",
|
|
421
|
+
language: "typescript",
|
|
422
|
+
content: "export function startOracleCycle() {}\n"
|
|
423
|
+
},
|
|
424
|
+
{
|
|
425
|
+
path: "packages/quant/src/index.ts",
|
|
426
|
+
language: "typescript",
|
|
427
|
+
content: "export function startQuantEngine() {}\n"
|
|
428
|
+
},
|
|
429
|
+
{
|
|
430
|
+
path: "packages/shared/src/index.ts",
|
|
431
|
+
language: "typescript",
|
|
432
|
+
content: "export * from './risk';\nexport * from './db';\n"
|
|
433
|
+
},
|
|
434
|
+
{
|
|
435
|
+
path: "docs/README.md",
|
|
436
|
+
language: "markdown",
|
|
437
|
+
content:
|
|
438
|
+
"Architecture overview and trading system modules with entry points and structure documentation."
|
|
439
|
+
},
|
|
440
|
+
{
|
|
441
|
+
path: "docs/02-architecture-overview.md",
|
|
442
|
+
language: "markdown",
|
|
443
|
+
content: "System architecture overview describing modules, entry points, and organization."
|
|
444
|
+
},
|
|
445
|
+
{
|
|
446
|
+
path: "docs/oracle-arch-review.md",
|
|
447
|
+
language: "markdown",
|
|
448
|
+
content: "Architecture review for oracle modules and architecture evolution."
|
|
449
|
+
}
|
|
450
|
+
]
|
|
451
|
+
});
|
|
452
|
+
|
|
453
|
+
const output = await core.enhancePrompt({
|
|
454
|
+
trace_id: "trc-enhancer-architecture-query",
|
|
455
|
+
tenant_id: "tenant-enhancer-architecture",
|
|
456
|
+
workspace_id: "ws-enhancer-architecture",
|
|
457
|
+
request: {
|
|
458
|
+
project_root_path: "/workspace/enhancer-architecture",
|
|
459
|
+
prompt: "How is the trading system architecture organized? What are the main modules and entry points?",
|
|
460
|
+
conversation_history: [
|
|
461
|
+
{
|
|
462
|
+
role: "user",
|
|
463
|
+
content: "Focus on code entry points and wiring, keep docs secondary."
|
|
464
|
+
}
|
|
465
|
+
]
|
|
466
|
+
}
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
const mdCount = markdownCount(output.context_refs);
|
|
470
|
+
const codeCount = output.context_refs.length - mdCount;
|
|
471
|
+
|
|
472
|
+
expect(mdCount).toBeLessThanOrEqual(2);
|
|
473
|
+
expect(codeCount).toBeGreaterThanOrEqual(2);
|
|
474
|
+
expect(firstRank(output.context_refs, "apps/api/src/index.ts")).toBeLessThanOrEqual(3);
|
|
475
|
+
} finally {
|
|
476
|
+
cache.close();
|
|
477
|
+
repo.close();
|
|
478
|
+
}
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
it("maintains or improves snippet completeness with upgraded chunk windows", async () => {
|
|
482
|
+
const root = await mkdtemp(join(tmpdir(), "cue-mcp-snippet-quality-"));
|
|
483
|
+
dirs.push(root);
|
|
484
|
+
const sqlitePath = join(root, "mcp-snippet-quality.sqlite");
|
|
485
|
+
|
|
486
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
487
|
+
await repo.migrate();
|
|
488
|
+
await repo.upsertWorkspace({
|
|
489
|
+
workspace_id: "ws-snippet-legacy",
|
|
490
|
+
tenant_id: "tenant-snippet",
|
|
491
|
+
name: "snippet-quality-legacy",
|
|
492
|
+
project_root_path: "/workspace/snippet-legacy"
|
|
493
|
+
});
|
|
494
|
+
await repo.upsertWorkspace({
|
|
495
|
+
workspace_id: "ws-snippet-upgraded",
|
|
496
|
+
tenant_id: "tenant-snippet",
|
|
497
|
+
name: "snippet-quality-upgraded",
|
|
498
|
+
project_root_path: "/workspace/snippet-upgraded"
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
const cache = new SqliteQueryCache(sqlitePath);
|
|
502
|
+
const legacyCore = new RetrievalCore(repo, cache, {
|
|
503
|
+
chunkingConfig: {
|
|
504
|
+
strategy: "sliding",
|
|
505
|
+
target_chunk_tokens: 220,
|
|
506
|
+
chunk_overlap_tokens: 40
|
|
507
|
+
}
|
|
508
|
+
});
|
|
509
|
+
const upgradedCore = new RetrievalCore(repo, cache, {
|
|
510
|
+
chunkingConfig: {
|
|
511
|
+
strategy: "sliding",
|
|
512
|
+
target_chunk_tokens: 420,
|
|
513
|
+
chunk_overlap_tokens: 90
|
|
514
|
+
}
|
|
515
|
+
});
|
|
516
|
+
|
|
517
|
+
try {
|
|
518
|
+
const files = [
|
|
519
|
+
{
|
|
520
|
+
path: "packages/shared/src/risk/circuit-breaker.ts",
|
|
521
|
+
language: "typescript",
|
|
522
|
+
content: buildLongCircuitBreakerFixture()
|
|
523
|
+
},
|
|
524
|
+
{
|
|
525
|
+
path: "docs/risk-architecture.md",
|
|
526
|
+
language: "markdown",
|
|
527
|
+
content: "High-level risk architecture notes: circuit breaker policy, drawdown thresholds, and account controls."
|
|
528
|
+
}
|
|
529
|
+
] as const;
|
|
530
|
+
|
|
531
|
+
await legacyCore.indexArtifact({
|
|
532
|
+
tenant_id: "tenant-snippet",
|
|
533
|
+
workspace_id: "ws-snippet-legacy",
|
|
534
|
+
index_version: "idx-snippet-legacy-v1",
|
|
535
|
+
files: [...files]
|
|
536
|
+
});
|
|
537
|
+
await upgradedCore.indexArtifact({
|
|
538
|
+
tenant_id: "tenant-snippet",
|
|
539
|
+
workspace_id: "ws-snippet-upgraded",
|
|
540
|
+
index_version: "idx-snippet-upgraded-v1",
|
|
541
|
+
files: [...files]
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
const query = "evaluateCircuitBreaker circuitActivationSeed tripAuditDigest freezeOrderEntry hard drawdown";
|
|
545
|
+
const legacyRetrieval = await legacyCore.searchContext({
|
|
546
|
+
trace_id: "trc-snippet-completeness-legacy",
|
|
547
|
+
tenant_id: "tenant-snippet",
|
|
548
|
+
workspace_id: "ws-snippet-legacy",
|
|
549
|
+
request: {
|
|
550
|
+
project_root_path: "/workspace/snippet-legacy",
|
|
551
|
+
query,
|
|
552
|
+
top_k: 5
|
|
553
|
+
}
|
|
554
|
+
});
|
|
555
|
+
const upgradedRetrieval = await upgradedCore.searchContext({
|
|
556
|
+
trace_id: "trc-snippet-completeness-upgraded",
|
|
557
|
+
tenant_id: "tenant-snippet",
|
|
558
|
+
workspace_id: "ws-snippet-upgraded",
|
|
559
|
+
request: {
|
|
560
|
+
project_root_path: "/workspace/snippet-upgraded",
|
|
561
|
+
query,
|
|
562
|
+
top_k: 5
|
|
563
|
+
}
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
const targetPath = "packages/shared/src/risk/circuit-breaker.ts";
|
|
567
|
+
const legacyTop = legacyRetrieval.results.find((row) => row.path === targetPath);
|
|
568
|
+
const upgradedTop = upgradedRetrieval.results.find((row) => row.path === targetPath);
|
|
569
|
+
expect(legacyTop).toBeDefined();
|
|
570
|
+
expect(upgradedTop).toBeDefined();
|
|
571
|
+
|
|
572
|
+
const legacySpan = (legacyTop?.end_line ?? 0) - (legacyTop?.start_line ?? 0);
|
|
573
|
+
const upgradedSpan = (upgradedTop?.end_line ?? 0) - (upgradedTop?.start_line ?? 0);
|
|
574
|
+
expect(upgradedSpan).toBeGreaterThanOrEqual(legacySpan);
|
|
575
|
+
expect((upgradedTop?.snippet.length ?? 0)).toBeGreaterThanOrEqual(legacyTop?.snippet.length ?? 0);
|
|
576
|
+
|
|
577
|
+
const tokenCoverage = (snippet: string | undefined): number =>
|
|
578
|
+
["circuitActivationSeed", "tripAuditDigest", "freezeOrderEntry"].filter((token) => snippet?.includes(token))
|
|
579
|
+
.length;
|
|
580
|
+
expect(tokenCoverage(upgradedTop?.snippet)).toBeGreaterThanOrEqual(tokenCoverage(legacyTop?.snippet));
|
|
581
|
+
expect(tokenCoverage(upgradedTop?.snippet)).toBeGreaterThanOrEqual(2);
|
|
582
|
+
} finally {
|
|
583
|
+
cache.close();
|
|
584
|
+
repo.close();
|
|
585
|
+
}
|
|
586
|
+
});
|
|
587
|
+
|
|
588
|
+
it("merges overlapping top candidates to recover distinct file coverage", async () => {
|
|
589
|
+
const root = await mkdtemp(join(tmpdir(), "cue-overlap-merge-quality-"));
|
|
590
|
+
dirs.push(root);
|
|
591
|
+
const sqlitePath = join(root, "overlap-merge-quality.sqlite");
|
|
592
|
+
|
|
593
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
594
|
+
await repo.migrate();
|
|
595
|
+
await repo.upsertWorkspace({
|
|
596
|
+
workspace_id: "ws-overlap",
|
|
597
|
+
tenant_id: "tenant-overlap",
|
|
598
|
+
name: "overlap-merge",
|
|
599
|
+
project_root_path: "/workspace/overlap"
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
const writerCache = new InMemoryQueryCache();
|
|
603
|
+
const withoutMergeCache = new InMemoryQueryCache();
|
|
604
|
+
const withMergeCache = new InMemoryQueryCache();
|
|
605
|
+
const writer = new RetrievalCore(repo, writerCache, {
|
|
606
|
+
chunkingConfig: {
|
|
607
|
+
strategy: "sliding",
|
|
608
|
+
target_chunk_tokens: 70,
|
|
609
|
+
chunk_overlap_tokens: 60
|
|
610
|
+
}
|
|
611
|
+
});
|
|
612
|
+
const withoutMerge = new RetrievalCore(repo, withoutMergeCache, {
|
|
613
|
+
scoringConfig: {
|
|
614
|
+
candidate_weights: {
|
|
615
|
+
lexical_weight: 1,
|
|
616
|
+
vector_weight: 0,
|
|
617
|
+
path_match_boost: 0.2,
|
|
618
|
+
recency_boost: 0,
|
|
619
|
+
generated_penalty: 0
|
|
620
|
+
},
|
|
621
|
+
rerank: {
|
|
622
|
+
merge_overlapping_chunks_enabled: false
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
});
|
|
626
|
+
const withMerge = new RetrievalCore(repo, withMergeCache, {
|
|
627
|
+
scoringConfig: {
|
|
628
|
+
candidate_weights: {
|
|
629
|
+
lexical_weight: 1,
|
|
630
|
+
vector_weight: 0,
|
|
631
|
+
path_match_boost: 0,
|
|
632
|
+
recency_boost: 0,
|
|
633
|
+
generated_penalty: 0
|
|
634
|
+
},
|
|
635
|
+
rerank: {
|
|
636
|
+
merge_overlapping_chunks_enabled: true
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
});
|
|
640
|
+
|
|
641
|
+
try {
|
|
642
|
+
await writer.indexArtifact({
|
|
643
|
+
tenant_id: "tenant-overlap",
|
|
644
|
+
workspace_id: "ws-overlap",
|
|
645
|
+
index_version: "idx-overlap-v1",
|
|
646
|
+
files: [
|
|
647
|
+
{
|
|
648
|
+
path: "src/hotspot.ts",
|
|
649
|
+
language: "typescript",
|
|
650
|
+
content: buildOverlapHotspotFixture()
|
|
651
|
+
},
|
|
652
|
+
{
|
|
653
|
+
path: "src/secondary.ts",
|
|
654
|
+
language: "typescript",
|
|
655
|
+
content:
|
|
656
|
+
"export function overlapCoverageFallback(): string { return 'overlap merge hotspot coverage fallback target overlap chunk'; }"
|
|
657
|
+
},
|
|
658
|
+
{
|
|
659
|
+
path: "src/noise.ts",
|
|
660
|
+
language: "typescript",
|
|
661
|
+
content: "export const NOISE = 'unrelated';"
|
|
662
|
+
}
|
|
663
|
+
]
|
|
664
|
+
});
|
|
665
|
+
|
|
666
|
+
const query = "overlapMergeSignal overlap chunk merge hotspot coverage fallback target";
|
|
667
|
+
const baseline = await withoutMerge.searchContext({
|
|
668
|
+
trace_id: "trc-overlap-no-merge",
|
|
669
|
+
tenant_id: "tenant-overlap",
|
|
670
|
+
workspace_id: "ws-overlap",
|
|
671
|
+
request: {
|
|
672
|
+
project_root_path: "/workspace/overlap",
|
|
673
|
+
query,
|
|
674
|
+
top_k: 2
|
|
675
|
+
}
|
|
676
|
+
});
|
|
677
|
+
const merged = await withMerge.searchContext({
|
|
678
|
+
trace_id: "trc-overlap-merge",
|
|
679
|
+
tenant_id: "tenant-overlap",
|
|
680
|
+
workspace_id: "ws-overlap",
|
|
681
|
+
request: {
|
|
682
|
+
project_root_path: "/workspace/overlap",
|
|
683
|
+
query,
|
|
684
|
+
top_k: 2
|
|
685
|
+
}
|
|
686
|
+
});
|
|
687
|
+
|
|
688
|
+
expect(baseline.results[0]?.path).toBe("src/hotspot.ts");
|
|
689
|
+
expect(baseline.results[1]?.path).toBe("src/hotspot.ts");
|
|
690
|
+
const baselineFirst = baseline.results[0];
|
|
691
|
+
const baselineSecond = baseline.results[1];
|
|
692
|
+
expect(baselineFirst).toBeDefined();
|
|
693
|
+
expect(baselineSecond).toBeDefined();
|
|
694
|
+
const overlapStart = Math.max(baselineFirst?.start_line ?? 0, baselineSecond?.start_line ?? 0);
|
|
695
|
+
const overlapEnd = Math.min(baselineFirst?.end_line ?? 0, baselineSecond?.end_line ?? 0);
|
|
696
|
+
expect(overlapStart).toBeLessThanOrEqual(overlapEnd);
|
|
697
|
+
|
|
698
|
+
expect(merged.results[0]?.path).toBe("src/hotspot.ts");
|
|
699
|
+
expect(merged.results.some((result) => result.path === "src/secondary.ts")).toBe(true);
|
|
700
|
+
expect(new Set(merged.results.map((result) => result.path)).size).toBeGreaterThan(
|
|
701
|
+
new Set(baseline.results.map((result) => result.path)).size
|
|
702
|
+
);
|
|
703
|
+
const mergedHotspot = merged.results.find((result) => result.path === "src/hotspot.ts");
|
|
704
|
+
expect((mergedHotspot?.end_line ?? 0) - (mergedHotspot?.start_line ?? 0)).toBeGreaterThan(40);
|
|
705
|
+
} finally {
|
|
706
|
+
repo.close();
|
|
707
|
+
}
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
it("avoids heavy same-file overlap even when merge span cap prevents direct consolidation", async () => {
|
|
711
|
+
const root = await mkdtemp(join(tmpdir(), "cue-overlap-selection-quality-"));
|
|
712
|
+
dirs.push(root);
|
|
713
|
+
const sqlitePath = join(root, "overlap-selection-quality.sqlite");
|
|
714
|
+
|
|
715
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
716
|
+
await repo.migrate();
|
|
717
|
+
await repo.upsertWorkspace({
|
|
718
|
+
workspace_id: "ws-overlap-selection",
|
|
719
|
+
tenant_id: "tenant-overlap-selection",
|
|
720
|
+
name: "overlap-selection",
|
|
721
|
+
project_root_path: "/workspace/overlap-selection"
|
|
722
|
+
});
|
|
723
|
+
|
|
724
|
+
const writer = new RetrievalCore(repo, new InMemoryQueryCache(), {
|
|
725
|
+
chunkingConfig: {
|
|
726
|
+
strategy: "sliding",
|
|
727
|
+
target_chunk_tokens: 70,
|
|
728
|
+
chunk_overlap_tokens: 60
|
|
729
|
+
}
|
|
730
|
+
});
|
|
731
|
+
const merged = new RetrievalCore(repo, new InMemoryQueryCache(), {
|
|
732
|
+
scoringConfig: {
|
|
733
|
+
candidate_weights: {
|
|
734
|
+
lexical_weight: 1,
|
|
735
|
+
vector_weight: 0,
|
|
736
|
+
path_match_boost: 0,
|
|
737
|
+
recency_boost: 0,
|
|
738
|
+
generated_penalty: 0
|
|
739
|
+
},
|
|
740
|
+
rerank: {
|
|
741
|
+
merge_overlapping_chunks_enabled: true,
|
|
742
|
+
merge_max_span_lines: 8
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
});
|
|
746
|
+
|
|
747
|
+
try {
|
|
748
|
+
await writer.indexArtifact({
|
|
749
|
+
tenant_id: "tenant-overlap-selection",
|
|
750
|
+
workspace_id: "ws-overlap-selection",
|
|
751
|
+
index_version: "idx-overlap-selection-v1",
|
|
752
|
+
files: [
|
|
753
|
+
{
|
|
754
|
+
path: "src/pressure.ts",
|
|
755
|
+
language: "typescript",
|
|
756
|
+
content: buildLargeOverlapPressureFixture()
|
|
757
|
+
},
|
|
758
|
+
{
|
|
759
|
+
path: "src/fallback.ts",
|
|
760
|
+
language: "typescript",
|
|
761
|
+
content:
|
|
762
|
+
"export function overlapPressureFallback(): string { return 'overlap pressure signal token cluster fallback target'; }"
|
|
763
|
+
},
|
|
764
|
+
{
|
|
765
|
+
path: "src/extra.ts",
|
|
766
|
+
language: "typescript",
|
|
767
|
+
content:
|
|
768
|
+
"export function overlapPressureExtra(): string { return 'overlap pressure signal token cluster extra coverage'; }"
|
|
769
|
+
}
|
|
770
|
+
]
|
|
771
|
+
});
|
|
772
|
+
|
|
773
|
+
const retrieval = await merged.searchContext({
|
|
774
|
+
trace_id: "trc-overlap-selection",
|
|
775
|
+
tenant_id: "tenant-overlap-selection",
|
|
776
|
+
workspace_id: "ws-overlap-selection",
|
|
777
|
+
request: {
|
|
778
|
+
project_root_path: "/workspace/overlap-selection",
|
|
779
|
+
query: "overlap pressure signal token cluster fallback target",
|
|
780
|
+
top_k: 3
|
|
781
|
+
}
|
|
782
|
+
});
|
|
783
|
+
|
|
784
|
+
expect(retrieval.results.some((result) => result.path === "src/fallback.ts")).toBe(true);
|
|
785
|
+
|
|
786
|
+
const byPath = new Map<string, Array<{ start: number; end: number }>>();
|
|
787
|
+
for (const result of retrieval.results) {
|
|
788
|
+
const rows = byPath.get(result.path);
|
|
789
|
+
const range = { start: result.start_line, end: result.end_line };
|
|
790
|
+
if (rows) {
|
|
791
|
+
rows.push(range);
|
|
792
|
+
} else {
|
|
793
|
+
byPath.set(result.path, [range]);
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
for (const ranges of byPath.values()) {
|
|
798
|
+
for (let i = 0; i < ranges.length; i += 1) {
|
|
799
|
+
const left = ranges[i];
|
|
800
|
+
if (!left) {
|
|
801
|
+
continue;
|
|
802
|
+
}
|
|
803
|
+
for (let j = i + 1; j < ranges.length; j += 1) {
|
|
804
|
+
const right = ranges[j];
|
|
805
|
+
if (!right) {
|
|
806
|
+
continue;
|
|
807
|
+
}
|
|
808
|
+
const overlapStart = Math.max(left.start, right.start);
|
|
809
|
+
const overlapEnd = Math.min(left.end, right.end);
|
|
810
|
+
const overlapLength = overlapEnd >= overlapStart ? overlapEnd - overlapStart + 1 : 0;
|
|
811
|
+
const leftLength = Math.max(1, left.end - left.start + 1);
|
|
812
|
+
const rightLength = Math.max(1, right.end - right.start + 1);
|
|
813
|
+
const smaller = Math.min(leftLength, rightLength);
|
|
814
|
+
const overlapRatio = overlapLength / smaller;
|
|
815
|
+
expect(overlapRatio).toBeLessThan(0.2);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
} finally {
|
|
820
|
+
repo.close();
|
|
821
|
+
}
|
|
822
|
+
});
|
|
823
|
+
|
|
824
|
+
it("absorbs contiguous same-file chunks during selection so top-k recovers file diversity", async () => {
|
|
825
|
+
const now = new Date().toISOString();
|
|
826
|
+
const rankedCandidates: RankedChunkCandidate[] = [
|
|
827
|
+
{
|
|
828
|
+
chunk_id: "chunk-circuit-1",
|
|
829
|
+
file_id: "file-circuit",
|
|
830
|
+
path: "src/circuitBreaker.ts",
|
|
831
|
+
start_line: 1,
|
|
832
|
+
end_line: 207,
|
|
833
|
+
snippet: "export function circuitBreakerPrimary() { return 'circuit breaker primary'; }",
|
|
834
|
+
updated_at: now,
|
|
835
|
+
score: 0.98,
|
|
836
|
+
lexical_score: 0.98,
|
|
837
|
+
vector_score: 0,
|
|
838
|
+
path_match: false,
|
|
839
|
+
recency_boosted: true
|
|
840
|
+
},
|
|
841
|
+
{
|
|
842
|
+
chunk_id: "chunk-circuit-2",
|
|
843
|
+
file_id: "file-circuit",
|
|
844
|
+
path: "src/circuitBreaker.ts",
|
|
845
|
+
start_line: 208,
|
|
846
|
+
end_line: 400,
|
|
847
|
+
snippet: "export function circuitBreakerSecondary() { return 'circuit breaker secondary'; }",
|
|
848
|
+
updated_at: now,
|
|
849
|
+
score: 0.97,
|
|
850
|
+
lexical_score: 0.97,
|
|
851
|
+
vector_score: 0,
|
|
852
|
+
path_match: false,
|
|
853
|
+
recency_boosted: true
|
|
854
|
+
},
|
|
855
|
+
{
|
|
856
|
+
chunk_id: "chunk-fallback",
|
|
857
|
+
file_id: "file-fallback",
|
|
858
|
+
path: "src/fallback.ts",
|
|
859
|
+
start_line: 1,
|
|
860
|
+
end_line: 80,
|
|
861
|
+
snippet: "export function fallbackSignal() { return 'fallback target'; }",
|
|
862
|
+
updated_at: now,
|
|
863
|
+
score: 0.75,
|
|
864
|
+
lexical_score: 0.75,
|
|
865
|
+
vector_score: 0,
|
|
866
|
+
path_match: false,
|
|
867
|
+
recency_boosted: true
|
|
868
|
+
},
|
|
869
|
+
{
|
|
870
|
+
chunk_id: "chunk-estimator",
|
|
871
|
+
file_id: "file-estimator",
|
|
872
|
+
path: "src/estimator.ts",
|
|
873
|
+
start_line: 1,
|
|
874
|
+
end_line: 70,
|
|
875
|
+
snippet: "export function estimatorSignal() { return 'estimator target'; }",
|
|
876
|
+
updated_at: now,
|
|
877
|
+
score: 0.74,
|
|
878
|
+
lexical_score: 0.74,
|
|
879
|
+
vector_score: 0,
|
|
880
|
+
path_match: false,
|
|
881
|
+
recency_boosted: true
|
|
882
|
+
}
|
|
883
|
+
];
|
|
884
|
+
|
|
885
|
+
const store = {
|
|
886
|
+
getLatestReadyIndex: async () => ({
|
|
887
|
+
index_id: "idx-overlap-adjacent",
|
|
888
|
+
workspace_id: "ws-overlap-adjacent",
|
|
889
|
+
tenant_id: "tenant-overlap-adjacent",
|
|
890
|
+
index_version: "idx-overlap-adjacent-v1",
|
|
891
|
+
status: "ready" as const,
|
|
892
|
+
created_at: now,
|
|
893
|
+
updated_at: now
|
|
894
|
+
}),
|
|
895
|
+
getIndexMetadata: async () => ({
|
|
896
|
+
index_id: "idx-overlap-adjacent",
|
|
897
|
+
tenant_id: "tenant-overlap-adjacent",
|
|
898
|
+
embedding_provider: "deterministic",
|
|
899
|
+
embedding_model: "pseudo-sha256",
|
|
900
|
+
embedding_dimensions: 24,
|
|
901
|
+
embedding_version: "v1",
|
|
902
|
+
chunking_strategy: "sliding" as const,
|
|
903
|
+
chunking_fallback_strategy: "sliding" as const
|
|
904
|
+
}),
|
|
905
|
+
rankChunksByIndex: async () => rankedCandidates,
|
|
906
|
+
listChunksByIndex: async () => []
|
|
907
|
+
} as unknown as IndexRepository;
|
|
908
|
+
|
|
909
|
+
const merged = new RetrievalCore(store, new InMemoryQueryCache(), {
|
|
910
|
+
scoringConfig: {
|
|
911
|
+
candidate_weights: {
|
|
912
|
+
lexical_weight: 1,
|
|
913
|
+
vector_weight: 0,
|
|
914
|
+
path_match_boost: 0,
|
|
915
|
+
recency_boost: 0,
|
|
916
|
+
generated_penalty: 0
|
|
917
|
+
},
|
|
918
|
+
rerank: {
|
|
919
|
+
merge_overlapping_chunks_enabled: true,
|
|
920
|
+
merge_max_span_lines: 40
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
});
|
|
924
|
+
|
|
925
|
+
const retrieval = await merged.searchContext({
|
|
926
|
+
trace_id: "trc-overlap-adjacent-selection",
|
|
927
|
+
tenant_id: "tenant-overlap-adjacent",
|
|
928
|
+
workspace_id: "ws-overlap-adjacent",
|
|
929
|
+
request: {
|
|
930
|
+
project_root_path: "/workspace/overlap-adjacent",
|
|
931
|
+
query: "circuit breaker fallback estimator",
|
|
932
|
+
top_k: 3
|
|
933
|
+
}
|
|
934
|
+
});
|
|
935
|
+
|
|
936
|
+
const distinctPaths = new Set(retrieval.results.map((result) => result.path));
|
|
937
|
+
expect(retrieval.results).toHaveLength(3);
|
|
938
|
+
expect(distinctPaths.size).toBe(3);
|
|
939
|
+
expect(retrieval.results.filter((result) => result.path === "src/circuitBreaker.ts")).toHaveLength(1);
|
|
940
|
+
expect(distinctPaths.has("src/fallback.ts")).toBe(true);
|
|
941
|
+
expect(distinctPaths.has("src/estimator.ts")).toBe(true);
|
|
942
|
+
|
|
943
|
+
const mergedCircuitBreaker = retrieval.results.find((result) => result.path === "src/circuitBreaker.ts");
|
|
944
|
+
expect(mergedCircuitBreaker).toBeDefined();
|
|
945
|
+
expect(mergedCircuitBreaker?.start_line).toBe(1);
|
|
946
|
+
expect(mergedCircuitBreaker?.end_line).toBe(400);
|
|
947
|
+
});
|
|
948
|
+
|
|
949
|
+
it("packs same-file contextual spans with elision markers when context packing is enabled", async () => {
|
|
950
|
+
const root = await mkdtemp(join(tmpdir(), "cue-context-packing-quality-"));
|
|
951
|
+
dirs.push(root);
|
|
952
|
+
const sqlitePath = join(root, "context-packing-quality.sqlite");
|
|
953
|
+
|
|
954
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
955
|
+
await repo.migrate();
|
|
956
|
+
await repo.upsertWorkspace({
|
|
957
|
+
workspace_id: "ws-context-pack",
|
|
958
|
+
tenant_id: "tenant-context-pack",
|
|
959
|
+
name: "context-pack",
|
|
960
|
+
project_root_path: "/workspace/context-pack"
|
|
961
|
+
});
|
|
962
|
+
|
|
963
|
+
const cache = new InMemoryQueryCache();
|
|
964
|
+
const core = new RetrievalCore(repo, cache, {
|
|
965
|
+
chunkingConfig: {
|
|
966
|
+
strategy: "sliding",
|
|
967
|
+
target_chunk_tokens: 35,
|
|
968
|
+
chunk_overlap_tokens: 8
|
|
969
|
+
},
|
|
970
|
+
scoringConfig: {
|
|
971
|
+
rerank: {
|
|
972
|
+
merge_overlapping_chunks_enabled: false
|
|
973
|
+
}
|
|
974
|
+
},
|
|
975
|
+
contextPackingConfig: {
|
|
976
|
+
enabled: true,
|
|
977
|
+
max_spans_per_result: 2,
|
|
978
|
+
max_gap_lines: 90,
|
|
979
|
+
max_snippet_chars: 3_200,
|
|
980
|
+
enhancer_snippet_char_limit: 2_200
|
|
981
|
+
}
|
|
982
|
+
});
|
|
983
|
+
|
|
984
|
+
try {
|
|
985
|
+
const bridgeNoise = Array.from({ length: 60 }, (_, idx) => `const bridgeNoise${idx} = ${idx};`);
|
|
986
|
+
await core.indexArtifact({
|
|
987
|
+
tenant_id: "tenant-context-pack",
|
|
988
|
+
workspace_id: "ws-context-pack",
|
|
989
|
+
index_version: "idx-context-pack-v1",
|
|
990
|
+
files: [
|
|
991
|
+
{
|
|
992
|
+
path: "packages/oracle/src/estimator.ts",
|
|
993
|
+
language: "typescript",
|
|
994
|
+
content: [
|
|
995
|
+
"export function computeProbability(marketSignal: number): number {",
|
|
996
|
+
" const base = marketSignal * 0.7;",
|
|
997
|
+
" const normalized = Math.max(0, Math.min(1, base));",
|
|
998
|
+
" return normalized;",
|
|
999
|
+
"}",
|
|
1000
|
+
"",
|
|
1001
|
+
...bridgeNoise,
|
|
1002
|
+
"",
|
|
1003
|
+
"export function fetchLlmInputs(llmInputFlow: string[]): string[] {",
|
|
1004
|
+
" return llmInputFlow.filter(Boolean);",
|
|
1005
|
+
"}"
|
|
1006
|
+
].join("\n")
|
|
1007
|
+
}
|
|
1008
|
+
]
|
|
1009
|
+
});
|
|
1010
|
+
|
|
1011
|
+
const retrieval = await core.searchContext({
|
|
1012
|
+
trace_id: "trc-context-pack",
|
|
1013
|
+
tenant_id: "tenant-context-pack",
|
|
1014
|
+
workspace_id: "ws-context-pack",
|
|
1015
|
+
request: {
|
|
1016
|
+
project_root_path: "/workspace/context-pack",
|
|
1017
|
+
query: "computeProbability marketSignal fetchLlmInputs llmInputFlow",
|
|
1018
|
+
top_k: 1
|
|
1019
|
+
}
|
|
1020
|
+
});
|
|
1021
|
+
|
|
1022
|
+
const top = retrieval.results[0];
|
|
1023
|
+
expect(top).toBeDefined();
|
|
1024
|
+
expect(top?.path).toBe("packages/oracle/src/estimator.ts");
|
|
1025
|
+
expect(top?.snippet.includes("computeProbability")).toBe(true);
|
|
1026
|
+
expect(top?.snippet.includes("fetchLlmInputs")).toBe(true);
|
|
1027
|
+
expect(top?.snippet.includes("...")).toBe(true);
|
|
1028
|
+
expect(top?.reason.includes("contextual spans")).toBe(true);
|
|
1029
|
+
expect((top?.end_line ?? 0) - (top?.start_line ?? 0)).toBeGreaterThan(20);
|
|
1030
|
+
} finally {
|
|
1031
|
+
repo.close();
|
|
1032
|
+
}
|
|
1033
|
+
});
|
|
1034
|
+
|
|
1035
|
+
it("adds deterministic truncation metadata marker for broken TS function boundaries when enabled", async () => {
|
|
1036
|
+
const root = await mkdtemp(join(tmpdir(), "cue-snippet-integrity-quality-"));
|
|
1037
|
+
dirs.push(root);
|
|
1038
|
+
const sqlitePath = join(root, "snippet-integrity-quality.sqlite");
|
|
1039
|
+
|
|
1040
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
1041
|
+
await repo.migrate();
|
|
1042
|
+
await repo.upsertWorkspace({
|
|
1043
|
+
workspace_id: "ws-snippet-integrity",
|
|
1044
|
+
tenant_id: "tenant-snippet-integrity",
|
|
1045
|
+
name: "snippet-integrity",
|
|
1046
|
+
project_root_path: "/workspace/snippet-integrity"
|
|
1047
|
+
});
|
|
1048
|
+
|
|
1049
|
+
const cache = new InMemoryQueryCache();
|
|
1050
|
+
const core = new RetrievalCore(repo, cache, {
|
|
1051
|
+
chunkingConfig: {
|
|
1052
|
+
strategy: "sliding",
|
|
1053
|
+
target_chunk_tokens: 35,
|
|
1054
|
+
chunk_overlap_tokens: 8
|
|
1055
|
+
},
|
|
1056
|
+
scoringConfig: {
|
|
1057
|
+
rerank: {
|
|
1058
|
+
merge_overlapping_chunks_enabled: false
|
|
1059
|
+
}
|
|
1060
|
+
},
|
|
1061
|
+
snippetIntegrityConfig: {
|
|
1062
|
+
enabled: true,
|
|
1063
|
+
target_languages: ["typescript"],
|
|
1064
|
+
max_contiguous_gap_lines: 6,
|
|
1065
|
+
marker_template_version: "v1"
|
|
1066
|
+
}
|
|
1067
|
+
});
|
|
1068
|
+
|
|
1069
|
+
try {
|
|
1070
|
+
const body = Array.from(
|
|
1071
|
+
{ length: 500 },
|
|
1072
|
+
(_, idx) => ` const checkpoint_${idx} = input + ${idx}; // estimator continuity signal`
|
|
1073
|
+
);
|
|
1074
|
+
await core.indexArtifact({
|
|
1075
|
+
tenant_id: "tenant-snippet-integrity",
|
|
1076
|
+
workspace_id: "ws-snippet-integrity",
|
|
1077
|
+
index_version: "idx-snippet-integrity-v1",
|
|
1078
|
+
files: [
|
|
1079
|
+
{
|
|
1080
|
+
path: "src/estimator.ts",
|
|
1081
|
+
language: "typescript",
|
|
1082
|
+
content: [
|
|
1083
|
+
"export function estimateProbability(input: number): number {",
|
|
1084
|
+
...body,
|
|
1085
|
+
" return input;",
|
|
1086
|
+
"}",
|
|
1087
|
+
"",
|
|
1088
|
+
"export function calibrateProbability(input: number): number {",
|
|
1089
|
+
" return estimateProbability(input + 1);",
|
|
1090
|
+
"}"
|
|
1091
|
+
].join("\n")
|
|
1092
|
+
}
|
|
1093
|
+
]
|
|
1094
|
+
});
|
|
1095
|
+
|
|
1096
|
+
const retrieval = await core.searchContext({
|
|
1097
|
+
trace_id: "trc-snippet-integrity",
|
|
1098
|
+
tenant_id: "tenant-snippet-integrity",
|
|
1099
|
+
workspace_id: "ws-snippet-integrity",
|
|
1100
|
+
request: {
|
|
1101
|
+
project_root_path: "/workspace/snippet-integrity",
|
|
1102
|
+
query: "estimateProbability checkpoint_20 checkpoint_430",
|
|
1103
|
+
top_k: 1
|
|
1104
|
+
}
|
|
1105
|
+
});
|
|
1106
|
+
|
|
1107
|
+
expect(() => SearchContextOutputSchema.parse(retrieval)).not.toThrow();
|
|
1108
|
+
const top = retrieval.results[0];
|
|
1109
|
+
expect(top).toBeDefined();
|
|
1110
|
+
expect(top?.path).toBe("src/estimator.ts");
|
|
1111
|
+
const markerLine = top?.snippet.split("\n").find((line) => line.includes("[truncated:v1"));
|
|
1112
|
+
expect(markerLine).toBeDefined();
|
|
1113
|
+
expect(markerLine).toMatch(/symbol=(?!unknown)[A-Za-z_$][\w$]*/);
|
|
1114
|
+
expect(markerLine).toContain("estimated_total_lines=");
|
|
1115
|
+
expect(markerLine).toContain("through_line=");
|
|
1116
|
+
const omittedAfterMatch = markerLine?.match(/omitted_after=(\d+)/);
|
|
1117
|
+
expect(Number.parseInt(omittedAfterMatch?.[1] ?? "0", 10)).toBeGreaterThan(0);
|
|
1118
|
+
} finally {
|
|
1119
|
+
repo.close();
|
|
1120
|
+
}
|
|
1121
|
+
});
|
|
1122
|
+
|
|
1123
|
+
it("repairs contiguous TS snippets before annotation when repair is enabled", async () => {
|
|
1124
|
+
const root = await mkdtemp(join(tmpdir(), "cue-snippet-repair-quality-"));
|
|
1125
|
+
dirs.push(root);
|
|
1126
|
+
const sqlitePath = join(root, "snippet-repair-quality.sqlite");
|
|
1127
|
+
|
|
1128
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
1129
|
+
await repo.migrate();
|
|
1130
|
+
await repo.upsertWorkspace({
|
|
1131
|
+
workspace_id: "ws-snippet-repair",
|
|
1132
|
+
tenant_id: "tenant-snippet-repair",
|
|
1133
|
+
name: "snippet-repair",
|
|
1134
|
+
project_root_path: "/workspace/snippet-repair"
|
|
1135
|
+
});
|
|
1136
|
+
|
|
1137
|
+
const cache = new InMemoryQueryCache();
|
|
1138
|
+
const core = new RetrievalCore(repo, cache, {
|
|
1139
|
+
chunkingConfig: {
|
|
1140
|
+
strategy: "sliding",
|
|
1141
|
+
target_chunk_tokens: 35,
|
|
1142
|
+
chunk_overlap_tokens: 8
|
|
1143
|
+
},
|
|
1144
|
+
scoringConfig: {
|
|
1145
|
+
rerank: {
|
|
1146
|
+
merge_overlapping_chunks_enabled: false
|
|
1147
|
+
}
|
|
1148
|
+
},
|
|
1149
|
+
snippetIntegrityConfig: {
|
|
1150
|
+
enabled: true,
|
|
1151
|
+
target_languages: ["typescript"],
|
|
1152
|
+
max_contiguous_gap_lines: 6,
|
|
1153
|
+
marker_template_version: "v1",
|
|
1154
|
+
repair_enabled: true,
|
|
1155
|
+
repair_max_envelope_lines: 260,
|
|
1156
|
+
repair_max_snippet_chars: 8_000
|
|
1157
|
+
}
|
|
1158
|
+
});
|
|
1159
|
+
|
|
1160
|
+
try {
|
|
1161
|
+
const body = Array.from(
|
|
1162
|
+
{ length: 120 },
|
|
1163
|
+
(_, idx) => ` const checkpoint_${idx} = input + ${idx}; // repair continuity signal`
|
|
1164
|
+
);
|
|
1165
|
+
await core.indexArtifact({
|
|
1166
|
+
tenant_id: "tenant-snippet-repair",
|
|
1167
|
+
workspace_id: "ws-snippet-repair",
|
|
1168
|
+
index_version: "idx-snippet-repair-v1",
|
|
1169
|
+
files: [
|
|
1170
|
+
{
|
|
1171
|
+
path: "src/repair.ts",
|
|
1172
|
+
language: "typescript",
|
|
1173
|
+
content: [
|
|
1174
|
+
"export function estimateRepair(input: number): number {",
|
|
1175
|
+
...body,
|
|
1176
|
+
" return input;",
|
|
1177
|
+
"}"
|
|
1178
|
+
].join("\n")
|
|
1179
|
+
}
|
|
1180
|
+
]
|
|
1181
|
+
});
|
|
1182
|
+
|
|
1183
|
+
const retrieval = await core.searchContext({
|
|
1184
|
+
trace_id: "trc-snippet-repair",
|
|
1185
|
+
tenant_id: "tenant-snippet-repair",
|
|
1186
|
+
workspace_id: "ws-snippet-repair",
|
|
1187
|
+
request: {
|
|
1188
|
+
project_root_path: "/workspace/snippet-repair",
|
|
1189
|
+
query: "estimateRepair checkpoint_10 checkpoint_110",
|
|
1190
|
+
top_k: 1
|
|
1191
|
+
}
|
|
1192
|
+
});
|
|
1193
|
+
|
|
1194
|
+
const top = retrieval.results[0];
|
|
1195
|
+
expect(top).toBeDefined();
|
|
1196
|
+
expect(top?.path).toBe("src/repair.ts");
|
|
1197
|
+
expect(top?.snippet.includes("checkpoint_10")).toBe(true);
|
|
1198
|
+
expect(top?.snippet.includes("checkpoint_110")).toBe(true);
|
|
1199
|
+
expect(top?.snippet.includes("[truncated:v1")).toBe(false);
|
|
1200
|
+
expect((top?.end_line ?? 0) - (top?.start_line ?? 0)).toBeGreaterThan(80);
|
|
1201
|
+
} finally {
|
|
1202
|
+
repo.close();
|
|
1203
|
+
}
|
|
1204
|
+
});
|
|
1205
|
+
|
|
1206
|
+
it("falls back to truncation marker when repair output still exceeds caps", async () => {
|
|
1207
|
+
const root = await mkdtemp(join(tmpdir(), "cue-snippet-repair-fallback-"));
|
|
1208
|
+
dirs.push(root);
|
|
1209
|
+
const sqlitePath = join(root, "snippet-repair-fallback.sqlite");
|
|
1210
|
+
|
|
1211
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
1212
|
+
await repo.migrate();
|
|
1213
|
+
await repo.upsertWorkspace({
|
|
1214
|
+
workspace_id: "ws-snippet-repair-fallback",
|
|
1215
|
+
tenant_id: "tenant-snippet-repair-fallback",
|
|
1216
|
+
name: "snippet-repair-fallback",
|
|
1217
|
+
project_root_path: "/workspace/snippet-repair-fallback"
|
|
1218
|
+
});
|
|
1219
|
+
|
|
1220
|
+
const cache = new InMemoryQueryCache();
|
|
1221
|
+
const core = new RetrievalCore(repo, cache, {
|
|
1222
|
+
chunkingConfig: {
|
|
1223
|
+
strategy: "sliding",
|
|
1224
|
+
target_chunk_tokens: 35,
|
|
1225
|
+
chunk_overlap_tokens: 8
|
|
1226
|
+
},
|
|
1227
|
+
scoringConfig: {
|
|
1228
|
+
rerank: {
|
|
1229
|
+
merge_overlapping_chunks_enabled: false
|
|
1230
|
+
}
|
|
1231
|
+
},
|
|
1232
|
+
snippetIntegrityConfig: {
|
|
1233
|
+
enabled: true,
|
|
1234
|
+
target_languages: ["typescript"],
|
|
1235
|
+
max_contiguous_gap_lines: 6,
|
|
1236
|
+
marker_template_version: "v1",
|
|
1237
|
+
repair_enabled: true,
|
|
1238
|
+
repair_max_envelope_lines: 260,
|
|
1239
|
+
repair_max_snippet_chars: 220
|
|
1240
|
+
}
|
|
1241
|
+
});
|
|
1242
|
+
|
|
1243
|
+
try {
|
|
1244
|
+
const body = Array.from(
|
|
1245
|
+
{ length: 120 },
|
|
1246
|
+
(_, idx) => ` const checkpoint_${idx} = input + ${idx}; // repair fallback signal`
|
|
1247
|
+
);
|
|
1248
|
+
await core.indexArtifact({
|
|
1249
|
+
tenant_id: "tenant-snippet-repair-fallback",
|
|
1250
|
+
workspace_id: "ws-snippet-repair-fallback",
|
|
1251
|
+
index_version: "idx-snippet-repair-fallback-v1",
|
|
1252
|
+
files: [
|
|
1253
|
+
{
|
|
1254
|
+
path: "src/repair-fallback.ts",
|
|
1255
|
+
language: "typescript",
|
|
1256
|
+
content: [
|
|
1257
|
+
"export function estimateRepairFallback(input: number): number {",
|
|
1258
|
+
...body,
|
|
1259
|
+
" return input;",
|
|
1260
|
+
"}"
|
|
1261
|
+
].join("\n")
|
|
1262
|
+
}
|
|
1263
|
+
]
|
|
1264
|
+
});
|
|
1265
|
+
|
|
1266
|
+
const retrieval = await core.searchContext({
|
|
1267
|
+
trace_id: "trc-snippet-repair-fallback",
|
|
1268
|
+
tenant_id: "tenant-snippet-repair-fallback",
|
|
1269
|
+
workspace_id: "ws-snippet-repair-fallback",
|
|
1270
|
+
request: {
|
|
1271
|
+
project_root_path: "/workspace/snippet-repair-fallback",
|
|
1272
|
+
query: "estimateRepairFallback checkpoint_10 checkpoint_110",
|
|
1273
|
+
top_k: 1
|
|
1274
|
+
}
|
|
1275
|
+
});
|
|
1276
|
+
|
|
1277
|
+
const top = retrieval.results[0];
|
|
1278
|
+
expect(top).toBeDefined();
|
|
1279
|
+
expect(top?.path).toBe("src/repair-fallback.ts");
|
|
1280
|
+
expect(top?.snippet.includes("[truncated:v1")).toBe(true);
|
|
1281
|
+
} finally {
|
|
1282
|
+
repo.close();
|
|
1283
|
+
}
|
|
1284
|
+
});
|
|
1285
|
+
|
|
1286
|
+
it("does not add truncation marker for non-target languages", async () => {
|
|
1287
|
+
const root = await mkdtemp(join(tmpdir(), "cue-snippet-integrity-language-gate-"));
|
|
1288
|
+
dirs.push(root);
|
|
1289
|
+
const sqlitePath = join(root, "snippet-integrity-language-gate.sqlite");
|
|
1290
|
+
|
|
1291
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
1292
|
+
await repo.migrate();
|
|
1293
|
+
await repo.upsertWorkspace({
|
|
1294
|
+
workspace_id: "ws-snippet-language-gate",
|
|
1295
|
+
tenant_id: "tenant-snippet-language-gate",
|
|
1296
|
+
name: "snippet-language-gate",
|
|
1297
|
+
project_root_path: "/workspace/snippet-language-gate"
|
|
1298
|
+
});
|
|
1299
|
+
|
|
1300
|
+
const cache = new InMemoryQueryCache();
|
|
1301
|
+
const core = new RetrievalCore(repo, cache, {
|
|
1302
|
+
chunkingConfig: {
|
|
1303
|
+
strategy: "language_aware",
|
|
1304
|
+
target_chunk_tokens: 40,
|
|
1305
|
+
chunk_overlap_tokens: 8,
|
|
1306
|
+
enabled_languages: ["go"]
|
|
1307
|
+
},
|
|
1308
|
+
snippetIntegrityConfig: {
|
|
1309
|
+
enabled: true,
|
|
1310
|
+
target_languages: ["typescript"],
|
|
1311
|
+
max_contiguous_gap_lines: 6,
|
|
1312
|
+
marker_template_version: "v1"
|
|
1313
|
+
}
|
|
1314
|
+
});
|
|
1315
|
+
|
|
1316
|
+
try {
|
|
1317
|
+
const steps = Array.from({ length: 220 }, (_, idx) => `\ttotal += input + ${idx}`);
|
|
1318
|
+
await core.indexArtifact({
|
|
1319
|
+
tenant_id: "tenant-snippet-language-gate",
|
|
1320
|
+
workspace_id: "ws-snippet-language-gate",
|
|
1321
|
+
index_version: "idx-snippet-language-gate-v1",
|
|
1322
|
+
files: [
|
|
1323
|
+
{
|
|
1324
|
+
path: "pkg/runtime/estimate.go",
|
|
1325
|
+
language: "go",
|
|
1326
|
+
content: [
|
|
1327
|
+
"package runtime",
|
|
1328
|
+
"",
|
|
1329
|
+
"func EstimateProbability(input int) int {",
|
|
1330
|
+
"\ttotal := 0",
|
|
1331
|
+
...steps,
|
|
1332
|
+
"\treturn total",
|
|
1333
|
+
"}"
|
|
1334
|
+
].join("\n")
|
|
1335
|
+
}
|
|
1336
|
+
]
|
|
1337
|
+
});
|
|
1338
|
+
|
|
1339
|
+
const retrieval = await core.searchContext({
|
|
1340
|
+
trace_id: "trc-snippet-language-gate",
|
|
1341
|
+
tenant_id: "tenant-snippet-language-gate",
|
|
1342
|
+
workspace_id: "ws-snippet-language-gate",
|
|
1343
|
+
request: {
|
|
1344
|
+
project_root_path: "/workspace/snippet-language-gate",
|
|
1345
|
+
query: "EstimateProbability input 200",
|
|
1346
|
+
top_k: 1
|
|
1347
|
+
}
|
|
1348
|
+
});
|
|
1349
|
+
|
|
1350
|
+
const top = retrieval.results[0];
|
|
1351
|
+
expect(top).toBeDefined();
|
|
1352
|
+
expect(top?.path).toBe("pkg/runtime/estimate.go");
|
|
1353
|
+
expect(top?.snippet.includes("[truncated:v1")).toBe(false);
|
|
1354
|
+
} finally {
|
|
1355
|
+
repo.close();
|
|
1356
|
+
}
|
|
1357
|
+
});
|
|
1358
|
+
});
|