@rce-mcp/retrieval-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +19 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/chunking.d.ts +50 -0
- package/dist/chunking.js +520 -0
- package/dist/index.d.ts +390 -0
- package/dist/index.js +3417 -0
- package/dist/remote-sync.d.ts +116 -0
- package/dist/remote-sync.js +476 -0
- package/package.json +33 -0
- package/scripts/poc-node-parser-host.cjs +101 -0
- package/scripts/poc-parser-availability-benchmark.ts +290 -0
- package/src/chunking.ts +641 -0
- package/src/index.ts +4338 -0
- package/src/remote-sync.ts +651 -0
- package/test/benchmark.thresholds.test.ts +752 -0
- package/test/chunking.language-aware.test.ts +279 -0
- package/test/chunking.parser-availability.poc.test.ts +60 -0
- package/test/embedding-provider.test.ts +121 -0
- package/test/enhance-confidence.test.ts +357 -0
- package/test/integration.test.ts +324 -0
- package/test/local-sqlite.integration.test.ts +258 -0
- package/test/remote-sync.integration.test.ts +177 -0
- package/tsconfig.build.json +17 -0
- package/tsconfig.json +4 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
import { afterEach, describe, expect, it, vi } from "vitest";
|
|
2
|
+
import { InMemoryQueryCache } from "@rce-mcp/data-plane";
|
|
3
|
+
import { InMemoryIndexStore, RetrievalCore } from "../src/index.js";
|
|
4
|
+
|
|
5
|
+
describe("enhance prompt confidence controls", () => {
|
|
6
|
+
afterEach(() => {
|
|
7
|
+
vi.restoreAllMocks();
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
it("adds clarification questions and filters risky refs when retrieval confidence is low", async () => {
|
|
11
|
+
const core = new RetrievalCore(new InMemoryIndexStore(), new InMemoryQueryCache());
|
|
12
|
+
vi.spyOn(core, "searchContext").mockResolvedValue({
|
|
13
|
+
trace_id: "trc-low-confidence",
|
|
14
|
+
results: [
|
|
15
|
+
{
|
|
16
|
+
path: "docs/reference/hooks.md",
|
|
17
|
+
start_line: 1,
|
|
18
|
+
end_line: 20,
|
|
19
|
+
snippet: "addHook lifecycle ordering and examples",
|
|
20
|
+
score: 0.71,
|
|
21
|
+
reason: "semantic match"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
path: "test/hooks.test.js",
|
|
25
|
+
start_line: 1,
|
|
26
|
+
end_line: 25,
|
|
27
|
+
snippet: "addHook lifecycle should preserve plugin encapsulation",
|
|
28
|
+
score: 0.707,
|
|
29
|
+
reason: "path and token overlap"
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
path: "lib/hooks.js",
|
|
33
|
+
start_line: 1,
|
|
34
|
+
end_line: 40,
|
|
35
|
+
snippet: "function addHook(name, fn) { return hooks.add(name, fn) }",
|
|
36
|
+
score: 0.704,
|
|
37
|
+
reason: "exact symbol match"
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
path: "examples/hooks.js",
|
|
41
|
+
start_line: 1,
|
|
42
|
+
end_line: 22,
|
|
43
|
+
snippet: "example for addHook lifecycle behavior",
|
|
44
|
+
score: 0.703,
|
|
45
|
+
reason: "semantic match"
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
path: "lib/error-handler.js",
|
|
49
|
+
start_line: 1,
|
|
50
|
+
end_line: 30,
|
|
51
|
+
snippet: "function setErrorHandler(fn) { return fn }",
|
|
52
|
+
score: 0.702,
|
|
53
|
+
reason: "path and token overlap"
|
|
54
|
+
}
|
|
55
|
+
],
|
|
56
|
+
search_metadata: {
|
|
57
|
+
latency_ms: 15,
|
|
58
|
+
retrieval_mode: "hybrid",
|
|
59
|
+
index_version: "idx-1"
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
const output = await core.enhancePrompt({
|
|
64
|
+
trace_id: "trc-low-confidence",
|
|
65
|
+
tenant_id: "tenant-dev",
|
|
66
|
+
workspace_id: "ws-dev",
|
|
67
|
+
request: {
|
|
68
|
+
prompt: "Refine addHook lifecycle runtime behavior and preserve plugin compatibility.",
|
|
69
|
+
project_root_path: "/workspace/dev",
|
|
70
|
+
conversation_history: [{ role: "user", content: "Avoid docs and tests as primary context." }]
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
expect(output.warnings.some((warning) => /Low retrieval confidence/i.test(warning))).toBe(true);
|
|
75
|
+
expect(output.questions.length).toBeGreaterThan(0);
|
|
76
|
+
expect(output.context_refs.length).toBeGreaterThan(0);
|
|
77
|
+
expect(output.context_refs.some((ref) => ref.path === "lib/hooks.js")).toBe(true);
|
|
78
|
+
expect(output.context_refs.every((ref) => !/^docs\/|^test\/|^examples\//.test(ref.path))).toBe(true);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("builds enhancer retrieval query from history symbols and implementation hints", async () => {
|
|
82
|
+
const core = new RetrievalCore(new InMemoryIndexStore(), new InMemoryQueryCache());
|
|
83
|
+
const searchSpy = vi.spyOn(core, "searchContext").mockResolvedValue({
|
|
84
|
+
trace_id: "trc-query-build",
|
|
85
|
+
results: [
|
|
86
|
+
{
|
|
87
|
+
path: "lib/error-handler.js",
|
|
88
|
+
start_line: 1,
|
|
89
|
+
end_line: 20,
|
|
90
|
+
snippet: "function setErrorHandler(fn) { return fn }",
|
|
91
|
+
score: 0.95,
|
|
92
|
+
reason: "exact symbol match"
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
path: "lib/hooks.js",
|
|
96
|
+
start_line: 1,
|
|
97
|
+
end_line: 20,
|
|
98
|
+
snippet: "function addHook(name, fn) { return hooks.add(name, fn) }",
|
|
99
|
+
score: 0.3,
|
|
100
|
+
reason: "path and token overlap"
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
path: "fastify.js",
|
|
104
|
+
start_line: 1,
|
|
105
|
+
end_line: 20,
|
|
106
|
+
snippet: "function setErrorHandler(fn) { return buildErrorHandler(fn) }",
|
|
107
|
+
score: 0.28,
|
|
108
|
+
reason: "path and token overlap"
|
|
109
|
+
}
|
|
110
|
+
],
|
|
111
|
+
search_metadata: {
|
|
112
|
+
latency_ms: 10,
|
|
113
|
+
retrieval_mode: "hybrid",
|
|
114
|
+
index_version: "idx-2"
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
await core.enhancePrompt({
|
|
119
|
+
trace_id: "trc-query-build",
|
|
120
|
+
tenant_id: "tenant-dev",
|
|
121
|
+
workspace_id: "ws-dev",
|
|
122
|
+
request: {
|
|
123
|
+
prompt: "Improve request pipeline error handling and keep behavior stable.",
|
|
124
|
+
project_root_path: "/workspace/dev",
|
|
125
|
+
conversation_history: [
|
|
126
|
+
{ role: "user", content: "Focus on `setErrorHandler` and onRequest ordering." },
|
|
127
|
+
{ role: "assistant", content: "Likely files include lib/error-handler.js and fastify.js." }
|
|
128
|
+
]
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
expect(searchSpy).toHaveBeenCalledTimes(1);
|
|
133
|
+
const retrievalQuery = searchSpy.mock.calls[0]?.[0].request.query ?? "";
|
|
134
|
+
expect(retrievalQuery).toContain("setErrorHandler");
|
|
135
|
+
expect(retrievalQuery).toContain("lib/error-handler.js");
|
|
136
|
+
expect(retrievalQuery).toMatch(/query_intent:\s+symbol-heavy/);
|
|
137
|
+
expect(retrievalQuery).toMatch(/retrieval_hints:\s+.*setErrorHandler.*lib\/error-handler\.js/);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
it("routes symbol-heavy prompts to tighter pre-rerank caps", async () => {
|
|
141
|
+
const core = new RetrievalCore(new InMemoryIndexStore(), new InMemoryQueryCache(), {
|
|
142
|
+
enhancerConfig: {
|
|
143
|
+
max_candidates_pre_rerank: 4
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
const searchSpy = vi.spyOn(core, "searchContext").mockResolvedValue({
|
|
147
|
+
trace_id: "trc-symbol-intent-cap",
|
|
148
|
+
results: [
|
|
149
|
+
{
|
|
150
|
+
path: "lib/error-handler.js",
|
|
151
|
+
start_line: 1,
|
|
152
|
+
end_line: 20,
|
|
153
|
+
snippet: "function setErrorHandler(fn) { return buildErrorHandler(fn) }",
|
|
154
|
+
score: 0.92,
|
|
155
|
+
reason: "exact symbol match"
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
path: "src/server.ts",
|
|
159
|
+
start_line: 1,
|
|
160
|
+
end_line: 20,
|
|
161
|
+
snippet: "addHook onRequest preHandler onError lifecycle",
|
|
162
|
+
score: 0.9,
|
|
163
|
+
reason: "path and token overlap"
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
path: "internal/router.ts",
|
|
167
|
+
start_line: 1,
|
|
168
|
+
end_line: 20,
|
|
169
|
+
snippet: "registerRoute and dispatchRequest internals",
|
|
170
|
+
score: 0.89,
|
|
171
|
+
reason: "path and token overlap"
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
path: "app/runtime.ts",
|
|
175
|
+
start_line: 1,
|
|
176
|
+
end_line: 20,
|
|
177
|
+
snippet: "runtime hook orchestration",
|
|
178
|
+
score: 0.88,
|
|
179
|
+
reason: "semantic match"
|
|
180
|
+
}
|
|
181
|
+
],
|
|
182
|
+
search_metadata: {
|
|
183
|
+
latency_ms: 10,
|
|
184
|
+
retrieval_mode: "hybrid",
|
|
185
|
+
index_version: "idx-symbol-intent"
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
const output = await core.enhancePrompt({
|
|
190
|
+
trace_id: "trc-symbol-intent-cap",
|
|
191
|
+
tenant_id: "tenant-dev",
|
|
192
|
+
workspace_id: "ws-dev",
|
|
193
|
+
request: {
|
|
194
|
+
prompt: "Refine `setErrorHandler` + `addHook` + `onRequest` + `preHandler` flow.",
|
|
195
|
+
project_root_path: "/workspace/dev",
|
|
196
|
+
conversation_history: [{ role: "user", content: "Target exact lifecycle symbols only." }]
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
const retrievalQuery = searchSpy.mock.calls[0]?.[0].request.query ?? "";
|
|
201
|
+
expect(retrievalQuery).toMatch(/query_intent:\s+symbol-heavy/);
|
|
202
|
+
expect(output.context_refs.length).toBeLessThanOrEqual(3);
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
it("routes conceptual prompts without forcing implementation-only filtering", async () => {
|
|
206
|
+
const core = new RetrievalCore(new InMemoryIndexStore(), new InMemoryQueryCache());
|
|
207
|
+
const searchSpy = vi.spyOn(core, "searchContext").mockResolvedValue({
|
|
208
|
+
trace_id: "trc-conceptual-intent",
|
|
209
|
+
results: [
|
|
210
|
+
{
|
|
211
|
+
path: "docs/architecture/retrieval.md",
|
|
212
|
+
start_line: 1,
|
|
213
|
+
end_line: 30,
|
|
214
|
+
snippet: "retrieval architecture and ranking tradeoffs",
|
|
215
|
+
score: 0.91,
|
|
216
|
+
reason: "path and token overlap"
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
path: "src/retrieval/rerank.ts",
|
|
220
|
+
start_line: 1,
|
|
221
|
+
end_line: 20,
|
|
222
|
+
snippet: "rerank implementation details",
|
|
223
|
+
score: 0.89,
|
|
224
|
+
reason: "semantic match"
|
|
225
|
+
}
|
|
226
|
+
],
|
|
227
|
+
search_metadata: {
|
|
228
|
+
latency_ms: 9,
|
|
229
|
+
retrieval_mode: "hybrid",
|
|
230
|
+
index_version: "idx-conceptual-intent"
|
|
231
|
+
}
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
const output = await core.enhancePrompt({
|
|
235
|
+
trace_id: "trc-conceptual-intent",
|
|
236
|
+
tenant_id: "tenant-dev",
|
|
237
|
+
workspace_id: "ws-dev",
|
|
238
|
+
request: {
|
|
239
|
+
prompt: "Explain architecture tradeoffs for retrieval ranking and docs guidance.",
|
|
240
|
+
project_root_path: "/workspace/dev",
|
|
241
|
+
conversation_history: [{ role: "user", content: "Need high-level conceptual guidance." }]
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
const retrievalQuery = searchSpy.mock.calls[0]?.[0].request.query ?? "";
|
|
246
|
+
expect(retrievalQuery).toMatch(/query_intent:\s+conceptual/);
|
|
247
|
+
expect(output.context_refs.some((ref) => ref.path.startsWith("docs/"))).toBe(true);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
it("applies negation-aware filtering for docs/tests/examples/archive paths", async () => {
|
|
251
|
+
const core = new RetrievalCore(new InMemoryIndexStore(), new InMemoryQueryCache());
|
|
252
|
+
vi.spyOn(core, "searchContext").mockResolvedValue({
|
|
253
|
+
trace_id: "trc-negation-filter",
|
|
254
|
+
results: [
|
|
255
|
+
{
|
|
256
|
+
path: "docs/reference/blueprints.md",
|
|
257
|
+
start_line: 1,
|
|
258
|
+
end_line: 20,
|
|
259
|
+
snippet: "blueprints runtime design notes",
|
|
260
|
+
score: 0.93,
|
|
261
|
+
reason: "path and token overlap"
|
|
262
|
+
},
|
|
263
|
+
{
|
|
264
|
+
path: "tests/blueprints.test.py",
|
|
265
|
+
start_line: 1,
|
|
266
|
+
end_line: 20,
|
|
267
|
+
snippet: "test blueprint registration order",
|
|
268
|
+
score: 0.92,
|
|
269
|
+
reason: "path and token overlap"
|
|
270
|
+
},
|
|
271
|
+
{
|
|
272
|
+
path: "examples/blueprint_demo.py",
|
|
273
|
+
start_line: 1,
|
|
274
|
+
end_line: 20,
|
|
275
|
+
snippet: "example blueprint usage",
|
|
276
|
+
score: 0.91,
|
|
277
|
+
reason: "semantic match"
|
|
278
|
+
},
|
|
279
|
+
{
|
|
280
|
+
path: "src/flask/_archive/blueprints_legacy.py",
|
|
281
|
+
start_line: 1,
|
|
282
|
+
end_line: 30,
|
|
283
|
+
snippet: "legacy blueprint implementation",
|
|
284
|
+
score: 0.9,
|
|
285
|
+
reason: "semantic match"
|
|
286
|
+
},
|
|
287
|
+
{
|
|
288
|
+
path: "src/flask/blueprints.py",
|
|
289
|
+
start_line: 1,
|
|
290
|
+
end_line: 40,
|
|
291
|
+
snippet: "def register_blueprint(app, blueprint): return app",
|
|
292
|
+
score: 0.89,
|
|
293
|
+
reason: "exact symbol match"
|
|
294
|
+
}
|
|
295
|
+
],
|
|
296
|
+
search_metadata: {
|
|
297
|
+
latency_ms: 12,
|
|
298
|
+
retrieval_mode: "hybrid",
|
|
299
|
+
index_version: "idx-negation"
|
|
300
|
+
}
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
const output = await core.enhancePrompt({
|
|
304
|
+
trace_id: "trc-negation-filter",
|
|
305
|
+
tenant_id: "tenant-dev",
|
|
306
|
+
workspace_id: "ws-dev",
|
|
307
|
+
request: {
|
|
308
|
+
prompt: "Improve blueprints runtime behavior and avoid docs/tests/examples/archive context.",
|
|
309
|
+
project_root_path: "/workspace/dev",
|
|
310
|
+
conversation_history: [{ role: "user", content: "Keep implementation focused." }]
|
|
311
|
+
}
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
expect(output.context_refs.length).toBeGreaterThan(0);
|
|
315
|
+
expect(output.context_refs[0]?.path).toBe("src/flask/blueprints.py");
|
|
316
|
+
expect(output.context_refs.every((ref) => !/docs\/|tests\/|examples\/|_archive\//.test(ref.path))).toBe(true);
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
it("caps expansion hints and emits path-form hint expansions", async () => {
|
|
320
|
+
const core = new RetrievalCore(new InMemoryIndexStore(), new InMemoryQueryCache(), {
|
|
321
|
+
enhancerConfig: {
|
|
322
|
+
max_expansion_hints: 6
|
|
323
|
+
}
|
|
324
|
+
});
|
|
325
|
+
const searchSpy = vi.spyOn(core, "searchContext").mockResolvedValue({
|
|
326
|
+
trace_id: "trc-hint-cap",
|
|
327
|
+
results: [],
|
|
328
|
+
search_metadata: {
|
|
329
|
+
latency_ms: 8,
|
|
330
|
+
retrieval_mode: "hybrid",
|
|
331
|
+
index_version: "idx-cap"
|
|
332
|
+
}
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
await core.enhancePrompt({
|
|
336
|
+
trace_id: "trc-hint-cap",
|
|
337
|
+
tenant_id: "tenant-dev",
|
|
338
|
+
workspace_id: "ws-dev",
|
|
339
|
+
request: {
|
|
340
|
+
prompt: "Harden JSON provider and blueprints runtime behavior without docs.",
|
|
341
|
+
project_root_path: "/workspace/dev",
|
|
342
|
+
conversation_history: [{ role: "user", content: "Focus on `render_template_string` and json provider internals." }]
|
|
343
|
+
}
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
const retrievalQuery = searchSpy.mock.calls[0]?.[0].request.query ?? "";
|
|
347
|
+
expect(retrievalQuery).toContain("json/provider");
|
|
348
|
+
const hintLine = retrievalQuery
|
|
349
|
+
.split("\n")
|
|
350
|
+
.find((line) => line.startsWith("retrieval_hints:"));
|
|
351
|
+
const hintCount = (hintLine ?? "")
|
|
352
|
+
.replace(/^retrieval_hints:\s*/u, "")
|
|
353
|
+
.split(/\s+/)
|
|
354
|
+
.filter(Boolean).length;
|
|
355
|
+
expect(hintCount).toBeLessThanOrEqual(6);
|
|
356
|
+
});
|
|
357
|
+
});
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
2
|
+
import { newDb } from "pg-mem";
|
|
3
|
+
import { InMemoryQueryCache, PostgresIndexRepository } from "@rce-mcp/data-plane";
|
|
4
|
+
import { DeterministicEmbeddingProvider, RetrievalCore, RetrievalError } from "../src/index.js";
|
|
5
|
+
|
|
6
|
+
describe("retrieval-core postgres integration", () => {
|
|
7
|
+
let repository: PostgresIndexRepository;
|
|
8
|
+
let close: () => Promise<void>;
|
|
9
|
+
|
|
10
|
+
beforeEach(async () => {
|
|
11
|
+
const db = newDb({ autoCreateForeignKeyIndices: true });
|
|
12
|
+
const { Pool } = db.adapters.createPg();
|
|
13
|
+
const pool = new Pool();
|
|
14
|
+
|
|
15
|
+
repository = new PostgresIndexRepository(pool, {
|
|
16
|
+
preferPgVector: false,
|
|
17
|
+
chunkEmbeddingDimensions: 24
|
|
18
|
+
});
|
|
19
|
+
await repository.migrate();
|
|
20
|
+
|
|
21
|
+
close = async () => {
|
|
22
|
+
await pool.end();
|
|
23
|
+
};
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
afterEach(async () => {
|
|
27
|
+
await close();
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("handles indexing lifecycle with added/modified/deleted and secret exclusion", async () => {
|
|
31
|
+
await repository.upsertWorkspace({
|
|
32
|
+
workspace_id: "ws-a",
|
|
33
|
+
tenant_id: "tenant-a",
|
|
34
|
+
name: "workspace-a",
|
|
35
|
+
project_root_path: "/workspace/a"
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
const core = new RetrievalCore(repository, new InMemoryQueryCache());
|
|
39
|
+
|
|
40
|
+
const reportV1 = await core.indexArtifact({
|
|
41
|
+
tenant_id: "tenant-a",
|
|
42
|
+
workspace_id: "ws-a",
|
|
43
|
+
index_version: "idx-v1",
|
|
44
|
+
files: [
|
|
45
|
+
{
|
|
46
|
+
path: "src/app.ts",
|
|
47
|
+
language: "typescript",
|
|
48
|
+
content: "export function add(a: number, b: number) { return a + b; }"
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
path: "src/utils.ts",
|
|
52
|
+
language: "typescript",
|
|
53
|
+
content: "export function clamp(v: number) { return Math.max(0, v); }"
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
path: "src/secrets.env",
|
|
57
|
+
content: "AWS_ACCESS_KEY_ID=AKIAABCDEFGHIJKLMNOP"
|
|
58
|
+
}
|
|
59
|
+
]
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
expect(reportV1.counts.added).toBe(2);
|
|
63
|
+
expect(reportV1.counts.skipped).toBe(1);
|
|
64
|
+
expect(reportV1.warnings[0]?.category).toBe("secret_exclusion");
|
|
65
|
+
|
|
66
|
+
const readyAfterV1 = await repository.getLatestReadyIndex({
|
|
67
|
+
tenant_id: "tenant-a",
|
|
68
|
+
workspace_id: "ws-a"
|
|
69
|
+
});
|
|
70
|
+
expect(readyAfterV1).toBeDefined();
|
|
71
|
+
const metadataV1 = await repository.getIndexMetadata?.({
|
|
72
|
+
tenant_id: "tenant-a",
|
|
73
|
+
index_id: readyAfterV1!.index_id
|
|
74
|
+
});
|
|
75
|
+
expect(metadataV1?.chunking_strategy).toBe("sliding");
|
|
76
|
+
expect(metadataV1?.embedding_provider).toBe("deterministic");
|
|
77
|
+
expect(metadataV1?.embedding_model).toBe("pseudo-sha256");
|
|
78
|
+
expect(metadataV1?.embedding_dimensions).toBe(24);
|
|
79
|
+
|
|
80
|
+
const reportV2 = await core.indexArtifact({
|
|
81
|
+
tenant_id: "tenant-a",
|
|
82
|
+
workspace_id: "ws-a",
|
|
83
|
+
index_version: "idx-v2",
|
|
84
|
+
files: [
|
|
85
|
+
{
|
|
86
|
+
path: "src/app.ts",
|
|
87
|
+
language: "typescript",
|
|
88
|
+
content: "export function add(a: number, b: number) { return a + b + 1; }"
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
path: "src/new.ts",
|
|
92
|
+
language: "typescript",
|
|
93
|
+
content: "export function multiply(a: number, b: number) { return a * b; }"
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
path: "src/secrets.env",
|
|
97
|
+
content: "Bearer abcdefghijklmnopqrstuvwxyz12345"
|
|
98
|
+
}
|
|
99
|
+
]
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
expect(reportV2.counts.modified).toBe(1);
|
|
103
|
+
expect(reportV2.counts.added).toBe(1);
|
|
104
|
+
expect(reportV2.counts.deleted).toBe(1);
|
|
105
|
+
expect(reportV2.counts.skipped).toBe(1);
|
|
106
|
+
|
|
107
|
+
const search = await core.searchContext({
|
|
108
|
+
trace_id: "trc-test",
|
|
109
|
+
tenant_id: "tenant-a",
|
|
110
|
+
workspace_id: "ws-a",
|
|
111
|
+
request: {
|
|
112
|
+
project_root_path: "/workspace/a",
|
|
113
|
+
query: "multiply function",
|
|
114
|
+
top_k: 8
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
expect(search.search_metadata.index_version).toBe("idx-v2");
|
|
119
|
+
expect(search.results.some((row) => row.path === "src/new.ts")).toBe(true);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it("serves retrieval from persisted index across retrieval-core instances", async () => {
|
|
123
|
+
await repository.upsertWorkspace({
|
|
124
|
+
workspace_id: "ws-a",
|
|
125
|
+
tenant_id: "tenant-a",
|
|
126
|
+
name: "workspace-a",
|
|
127
|
+
project_root_path: "/workspace/a"
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const writer = new RetrievalCore(repository, new InMemoryQueryCache());
|
|
131
|
+
await writer.indexArtifact({
|
|
132
|
+
tenant_id: "tenant-a",
|
|
133
|
+
workspace_id: "ws-a",
|
|
134
|
+
index_version: "idx-v1",
|
|
135
|
+
files: [
|
|
136
|
+
{
|
|
137
|
+
path: "src/db.ts",
|
|
138
|
+
language: "typescript",
|
|
139
|
+
content: "export const DB_NAME = 'primary';"
|
|
140
|
+
}
|
|
141
|
+
]
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
const reader = new RetrievalCore(repository, new InMemoryQueryCache());
|
|
145
|
+
const result = await reader.searchContext({
|
|
146
|
+
trace_id: "trc-read",
|
|
147
|
+
tenant_id: "tenant-a",
|
|
148
|
+
workspace_id: "ws-a",
|
|
149
|
+
request: {
|
|
150
|
+
project_root_path: "/workspace/a",
|
|
151
|
+
query: "DB_NAME",
|
|
152
|
+
top_k: 8
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
expect(result.results.length).toBeGreaterThan(0);
|
|
157
|
+
expect(result.results[0]?.path).toBe("src/db.ts");
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it("enforces tenant isolation on retrieval", async () => {
|
|
161
|
+
await repository.upsertWorkspace({
|
|
162
|
+
workspace_id: "ws-a",
|
|
163
|
+
tenant_id: "tenant-a",
|
|
164
|
+
name: "workspace-a",
|
|
165
|
+
project_root_path: "/workspace/a"
|
|
166
|
+
});
|
|
167
|
+
await repository.upsertWorkspace({
|
|
168
|
+
workspace_id: "ws-b",
|
|
169
|
+
tenant_id: "tenant-b",
|
|
170
|
+
name: "workspace-b",
|
|
171
|
+
project_root_path: "/workspace/b"
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
const core = new RetrievalCore(repository, new InMemoryQueryCache());
|
|
175
|
+
|
|
176
|
+
await core.indexArtifact({
|
|
177
|
+
tenant_id: "tenant-a",
|
|
178
|
+
workspace_id: "ws-a",
|
|
179
|
+
index_version: "idx-a1",
|
|
180
|
+
files: [{ path: "src/a.ts", content: "export const TENANT_A = true;" }]
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
await core.indexArtifact({
|
|
184
|
+
tenant_id: "tenant-b",
|
|
185
|
+
workspace_id: "ws-b",
|
|
186
|
+
index_version: "idx-b1",
|
|
187
|
+
files: [{ path: "src/b.ts", content: "export const TENANT_B = true;" }]
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
const tenantBSearch = await core.searchContext({
|
|
191
|
+
trace_id: "trc-b",
|
|
192
|
+
tenant_id: "tenant-b",
|
|
193
|
+
workspace_id: "ws-b",
|
|
194
|
+
request: {
|
|
195
|
+
project_root_path: "/workspace/b",
|
|
196
|
+
query: "TENANT_B",
|
|
197
|
+
top_k: 8
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
expect(tenantBSearch.results[0]?.path).toBe("src/b.ts");
|
|
201
|
+
|
|
202
|
+
await expect(
|
|
203
|
+
core.searchContext({
|
|
204
|
+
trace_id: "trc-cross",
|
|
205
|
+
tenant_id: "tenant-b",
|
|
206
|
+
workspace_id: "ws-a",
|
|
207
|
+
request: {
|
|
208
|
+
project_root_path: "/workspace/a",
|
|
209
|
+
query: "TENANT_A",
|
|
210
|
+
top_k: 8
|
|
211
|
+
}
|
|
212
|
+
})
|
|
213
|
+
).rejects.toMatchObject({
|
|
214
|
+
code: "NOT_FOUND"
|
|
215
|
+
} satisfies Partial<RetrievalError>);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
it("persists configured embedding metadata on index finalize", async () => {
|
|
219
|
+
await repository.upsertWorkspace({
|
|
220
|
+
workspace_id: "ws-a",
|
|
221
|
+
tenant_id: "tenant-a",
|
|
222
|
+
name: "workspace-a",
|
|
223
|
+
project_root_path: "/workspace/a"
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
const core = new RetrievalCore(repository, new InMemoryQueryCache(), {
|
|
227
|
+
embeddingProvider: new DeterministicEmbeddingProvider({
|
|
228
|
+
model: "pseudo-sha256-custom",
|
|
229
|
+
dimensions: 32,
|
|
230
|
+
version: "v2"
|
|
231
|
+
})
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
await core.indexArtifact({
|
|
235
|
+
tenant_id: "tenant-a",
|
|
236
|
+
workspace_id: "ws-a",
|
|
237
|
+
index_version: "idx-meta",
|
|
238
|
+
files: [{ path: "src/a.ts", content: "export const A = 1;" }]
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
const latest = await repository.getLatestReadyIndex({
|
|
242
|
+
tenant_id: "tenant-a",
|
|
243
|
+
workspace_id: "ws-a"
|
|
244
|
+
});
|
|
245
|
+
expect(latest).toBeDefined();
|
|
246
|
+
const metadata = await repository.getIndexMetadata?.({
|
|
247
|
+
tenant_id: "tenant-a",
|
|
248
|
+
index_id: latest!.index_id
|
|
249
|
+
});
|
|
250
|
+
expect(metadata?.embedding_provider).toBe("deterministic");
|
|
251
|
+
expect(metadata?.embedding_model).toBe("pseudo-sha256-custom");
|
|
252
|
+
expect(metadata?.embedding_dimensions).toBe(32);
|
|
253
|
+
expect(metadata?.embedding_version).toBe("v2");
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
it("returns INVALID_ARGUMENT when query embedding config is incompatible with latest ready index", async () => {
|
|
257
|
+
await repository.upsertWorkspace({
|
|
258
|
+
workspace_id: "ws-a",
|
|
259
|
+
tenant_id: "tenant-a",
|
|
260
|
+
name: "workspace-a",
|
|
261
|
+
project_root_path: "/workspace/a"
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
const writer = new RetrievalCore(repository, new InMemoryQueryCache());
|
|
265
|
+
await writer.indexArtifact({
|
|
266
|
+
tenant_id: "tenant-a",
|
|
267
|
+
workspace_id: "ws-a",
|
|
268
|
+
index_version: "idx-v1",
|
|
269
|
+
files: [{ path: "src/a.ts", content: "export const A = 1;" }]
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
const request = {
|
|
273
|
+
trace_id: "trc-mismatch",
|
|
274
|
+
tenant_id: "tenant-a",
|
|
275
|
+
workspace_id: "ws-a",
|
|
276
|
+
request: {
|
|
277
|
+
project_root_path: "/workspace/a",
|
|
278
|
+
query: "A constant",
|
|
279
|
+
top_k: 5
|
|
280
|
+
}
|
|
281
|
+
} as const;
|
|
282
|
+
|
|
283
|
+
const cases = [
|
|
284
|
+
{
|
|
285
|
+
name: "provider mismatch",
|
|
286
|
+
options: {
|
|
287
|
+
embeddingDescriptor: {
|
|
288
|
+
provider: "openai_compatible",
|
|
289
|
+
model: "Qwen/Qwen3-Embedding-4B",
|
|
290
|
+
dimensions: 24
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
name: "model mismatch",
|
|
296
|
+
options: {
|
|
297
|
+
embeddingDescriptor: {
|
|
298
|
+
provider: "deterministic",
|
|
299
|
+
model: "pseudo-sha256-v2",
|
|
300
|
+
dimensions: 24
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
name: "dimension mismatch",
|
|
306
|
+
options: {
|
|
307
|
+
embeddingDescriptor: {
|
|
308
|
+
provider: "deterministic",
|
|
309
|
+
model: "pseudo-sha256",
|
|
310
|
+
dimensions: 64
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
] as const;
|
|
315
|
+
|
|
316
|
+
for (const testCase of cases) {
|
|
317
|
+
const reader = new RetrievalCore(repository, new InMemoryQueryCache(), testCase.options);
|
|
318
|
+
await expect(reader.searchContext(request)).rejects.toMatchObject({
|
|
319
|
+
code: "INVALID_ARGUMENT"
|
|
320
|
+
} satisfies Partial<RetrievalError>);
|
|
321
|
+
await expect(reader.searchContext(request)).rejects.toThrow(/reindex required/i);
|
|
322
|
+
}
|
|
323
|
+
});
|
|
324
|
+
});
|