@cue-dev/retrieval-core 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +27 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/chunking.d.ts +64 -0
- package/dist/chunking.js +983 -0
- package/dist/index.d.ts +673 -0
- package/dist/index.js +6605 -0
- package/dist/indexing-ignore.d.ts +9 -0
- package/dist/indexing-ignore.js +151 -0
- package/dist/remote-sync.d.ts +193 -0
- package/dist/remote-sync.js +816 -0
- package/package.json +37 -0
- package/scripts/poc-node-parser-host.cjs +105 -0
- package/scripts/poc-parser-availability-benchmark.ts +338 -0
- package/src/chunking.ts +1187 -0
- package/src/index.ts +8338 -0
- package/src/indexing-ignore.ts +179 -0
- package/src/remote-sync.ts +1119 -0
- package/test/benchmark.thresholds.test.ts +815 -0
- package/test/chunking.config.test.ts +84 -0
- package/test/chunking.language-aware.test.ts +1248 -0
- package/test/chunking.parser-availability.poc.test.ts +86 -0
- package/test/claude-agent-provider.test.ts +209 -0
- package/test/embedding-context-prefix.test.ts +101 -0
- package/test/embedding-provider.test.ts +570 -0
- package/test/enhance-confidence.test.ts +752 -0
- package/test/index-prep.concurrency.regression.test.ts +142 -0
- package/test/integration.test.ts +508 -0
- package/test/local-sqlite.integration.test.ts +258 -0
- package/test/mcp-search-quality.regression.test.ts +1358 -0
- package/test/remote-sync.integration.test.ts +350 -0
- package/test/smart-cutoff.config.test.ts +86 -0
- package/test/snippet-integrity.config.test.ts +59 -0
- package/tsconfig.build.json +17 -0
- package/tsconfig.json +4 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import { mkdtemp, rm } from "node:fs/promises";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
5
|
+
import { SqliteIndexRepository, SqliteQueryCache } from "@cue-dev/data-plane";
|
|
6
|
+
import { RetrievalCore } from "../src/index.js";
|
|
7
|
+
|
|
8
|
+
describe("retrieval-core local sqlite integration", () => {
|
|
9
|
+
const dirs: string[] = [];
|
|
10
|
+
|
|
11
|
+
afterEach(async () => {
|
|
12
|
+
while (dirs.length > 0) {
|
|
13
|
+
const dir = dirs.pop();
|
|
14
|
+
if (dir) {
|
|
15
|
+
await rm(dir, { recursive: true, force: true });
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("persists indexing lifecycle and serves search from durable local store", async () => {
|
|
21
|
+
const dir = await mkdtemp(join(tmpdir(), "cue-local-core-"));
|
|
22
|
+
dirs.push(dir);
|
|
23
|
+
const sqlitePath = join(dir, "data-plane.sqlite");
|
|
24
|
+
|
|
25
|
+
const repo1 = new SqliteIndexRepository(sqlitePath);
|
|
26
|
+
await repo1.migrate();
|
|
27
|
+
await repo1.upsertWorkspace({
|
|
28
|
+
workspace_id: "ws-local",
|
|
29
|
+
tenant_id: "tenant-local",
|
|
30
|
+
name: "local-workspace",
|
|
31
|
+
project_root_path: "/workspace/local"
|
|
32
|
+
});
|
|
33
|
+
const cache1 = new SqliteQueryCache(sqlitePath);
|
|
34
|
+
const core1 = new RetrievalCore(repo1, cache1);
|
|
35
|
+
|
|
36
|
+
await core1.indexArtifact({
|
|
37
|
+
tenant_id: "tenant-local",
|
|
38
|
+
workspace_id: "ws-local",
|
|
39
|
+
index_version: "idx-v1",
|
|
40
|
+
files: [
|
|
41
|
+
{
|
|
42
|
+
path: "src/math.ts",
|
|
43
|
+
language: "typescript",
|
|
44
|
+
content: "export function add(a: number, b: number) { return a + b; }"
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
path: "src/old.ts",
|
|
48
|
+
language: "typescript",
|
|
49
|
+
content: "export const OLD = true;"
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const reportV2 = await core1.indexArtifact({
|
|
55
|
+
tenant_id: "tenant-local",
|
|
56
|
+
workspace_id: "ws-local",
|
|
57
|
+
index_version: "idx-v2",
|
|
58
|
+
files: [
|
|
59
|
+
{
|
|
60
|
+
path: "src/math.ts",
|
|
61
|
+
language: "typescript",
|
|
62
|
+
content: "export function add(a: number, b: number) { return a + b + 1; }"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
path: "src/new.ts",
|
|
66
|
+
language: "typescript",
|
|
67
|
+
content: "export function multiply(a: number, b: number) { return a * b; }"
|
|
68
|
+
}
|
|
69
|
+
]
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
expect(reportV2.counts.modified).toBe(1);
|
|
73
|
+
expect(reportV2.counts.added).toBe(1);
|
|
74
|
+
expect(reportV2.counts.deleted).toBe(1);
|
|
75
|
+
|
|
76
|
+
repo1.close();
|
|
77
|
+
cache1.close();
|
|
78
|
+
|
|
79
|
+
const repo2 = new SqliteIndexRepository(sqlitePath);
|
|
80
|
+
await repo2.migrate();
|
|
81
|
+
const cache2 = new SqliteQueryCache(sqlitePath);
|
|
82
|
+
const core2 = new RetrievalCore(repo2, cache2);
|
|
83
|
+
|
|
84
|
+
const result = await core2.searchContext({
|
|
85
|
+
trace_id: "trc-local",
|
|
86
|
+
tenant_id: "tenant-local",
|
|
87
|
+
workspace_id: "ws-local",
|
|
88
|
+
request: {
|
|
89
|
+
project_root_path: "/workspace/local",
|
|
90
|
+
query: "multiply function",
|
|
91
|
+
top_k: 8
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
expect(result.search_metadata.index_version).toBe("idx-v2");
|
|
96
|
+
expect(result.results.some((row) => row.path === "src/new.ts")).toBe(true);
|
|
97
|
+
|
|
98
|
+
const latest = await repo2.getLatestReadyIndex({
|
|
99
|
+
tenant_id: "tenant-local",
|
|
100
|
+
workspace_id: "ws-local"
|
|
101
|
+
});
|
|
102
|
+
expect(latest).toBeDefined();
|
|
103
|
+
const metadata = await repo2.getIndexMetadata?.({
|
|
104
|
+
tenant_id: "tenant-local",
|
|
105
|
+
index_id: latest!.index_id
|
|
106
|
+
});
|
|
107
|
+
expect(metadata?.chunking_strategy).toBe("language_aware");
|
|
108
|
+
expect(metadata?.embedding_provider).toBe("deterministic");
|
|
109
|
+
expect(metadata?.embedding_model).toBe("pseudo-sha256");
|
|
110
|
+
expect(metadata?.embedding_dimensions).toBe(24);
|
|
111
|
+
|
|
112
|
+
repo2.close();
|
|
113
|
+
cache2.close();
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("applies delta indexing against a base index and preserves unchanged files", async () => {
|
|
117
|
+
const dir = await mkdtemp(join(tmpdir(), "cue-local-core-delta-"));
|
|
118
|
+
dirs.push(dir);
|
|
119
|
+
const sqlitePath = join(dir, "data-plane.sqlite");
|
|
120
|
+
|
|
121
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
122
|
+
await repo.migrate();
|
|
123
|
+
await repo.upsertWorkspace({
|
|
124
|
+
workspace_id: "ws-delta",
|
|
125
|
+
tenant_id: "tenant-local",
|
|
126
|
+
name: "delta-workspace",
|
|
127
|
+
project_root_path: "/workspace/local"
|
|
128
|
+
});
|
|
129
|
+
const cache = new SqliteQueryCache(sqlitePath);
|
|
130
|
+
const core = new RetrievalCore(repo, cache);
|
|
131
|
+
|
|
132
|
+
await core.indexArtifact({
|
|
133
|
+
tenant_id: "tenant-local",
|
|
134
|
+
workspace_id: "ws-delta",
|
|
135
|
+
index_version: "idx-v1",
|
|
136
|
+
files: [
|
|
137
|
+
{
|
|
138
|
+
path: "src/base.ts",
|
|
139
|
+
language: "typescript",
|
|
140
|
+
content: "export const BASE = true;"
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
path: "src/math.ts",
|
|
144
|
+
language: "typescript",
|
|
145
|
+
content: "export function add(a: number, b: number) { return a + b; }"
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
path: "src/legacy.ts",
|
|
149
|
+
language: "typescript",
|
|
150
|
+
content: "export const LEGACY_FLAG = true;"
|
|
151
|
+
}
|
|
152
|
+
]
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
const report = await core.indexArtifactDelta({
|
|
156
|
+
tenant_id: "tenant-local",
|
|
157
|
+
workspace_id: "ws-delta",
|
|
158
|
+
index_version: "idx-v2",
|
|
159
|
+
base_index_version: "idx-v1",
|
|
160
|
+
upsert_files: [
|
|
161
|
+
{
|
|
162
|
+
path: "src/math.ts",
|
|
163
|
+
language: "typescript",
|
|
164
|
+
content: "export function add(a: number, b: number) { return a + b + 1; }"
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
path: "src/new.ts",
|
|
168
|
+
language: "typescript",
|
|
169
|
+
content: "export function multiply(a: number, b: number) { return a * b; }"
|
|
170
|
+
}
|
|
171
|
+
],
|
|
172
|
+
deleted_paths: ["src/legacy.ts"]
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
expect(report.counts.added).toBe(1);
|
|
176
|
+
expect(report.counts.modified).toBe(1);
|
|
177
|
+
expect(report.counts.deleted).toBe(1);
|
|
178
|
+
expect(report.counts.unchanged).toBe(1);
|
|
179
|
+
|
|
180
|
+
const search = await core.searchContext({
|
|
181
|
+
trace_id: "trc-delta",
|
|
182
|
+
tenant_id: "tenant-local",
|
|
183
|
+
workspace_id: "ws-delta",
|
|
184
|
+
request: {
|
|
185
|
+
project_root_path: "/workspace/local",
|
|
186
|
+
query: "multiply function",
|
|
187
|
+
top_k: 8
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
expect(search.search_metadata.index_version).toBe("idx-v2");
|
|
191
|
+
expect(search.results.some((row) => row.path === "src/new.ts")).toBe(true);
|
|
192
|
+
|
|
193
|
+
const oldSearch = await core.searchContext({
|
|
194
|
+
trace_id: "trc-delta-legacy",
|
|
195
|
+
tenant_id: "tenant-local",
|
|
196
|
+
workspace_id: "ws-delta",
|
|
197
|
+
request: {
|
|
198
|
+
project_root_path: "/workspace/local",
|
|
199
|
+
query: "LEGACY_FLAG",
|
|
200
|
+
top_k: 8
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
expect(oldSearch.results.some((row) => row.path === "src/legacy.ts")).toBe(false);
|
|
204
|
+
|
|
205
|
+
repo.close();
|
|
206
|
+
cache.close();
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it("rejects delta indexing when base index version is missing or not ready", async () => {
|
|
210
|
+
const dir = await mkdtemp(join(tmpdir(), "cue-local-core-delta-invalid-base-"));
|
|
211
|
+
dirs.push(dir);
|
|
212
|
+
const sqlitePath = join(dir, "data-plane.sqlite");
|
|
213
|
+
|
|
214
|
+
const repo = new SqliteIndexRepository(sqlitePath);
|
|
215
|
+
await repo.migrate();
|
|
216
|
+
await repo.upsertWorkspace({
|
|
217
|
+
workspace_id: "ws-delta-invalid",
|
|
218
|
+
tenant_id: "tenant-local",
|
|
219
|
+
name: "delta-invalid-workspace",
|
|
220
|
+
project_root_path: "/workspace/local"
|
|
221
|
+
});
|
|
222
|
+
const cache = new SqliteQueryCache(sqlitePath);
|
|
223
|
+
const core = new RetrievalCore(repo, cache);
|
|
224
|
+
|
|
225
|
+
await core.indexArtifact({
|
|
226
|
+
tenant_id: "tenant-local",
|
|
227
|
+
workspace_id: "ws-delta-invalid",
|
|
228
|
+
index_version: "idx-v1",
|
|
229
|
+
files: [
|
|
230
|
+
{
|
|
231
|
+
path: "src/a.ts",
|
|
232
|
+
language: "typescript",
|
|
233
|
+
content: "export const A = 1;"
|
|
234
|
+
}
|
|
235
|
+
]
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
await expect(
|
|
239
|
+
core.indexArtifactDelta({
|
|
240
|
+
tenant_id: "tenant-local",
|
|
241
|
+
workspace_id: "ws-delta-invalid",
|
|
242
|
+
index_version: "idx-v2",
|
|
243
|
+
base_index_version: "idx-does-not-exist",
|
|
244
|
+
upsert_files: [
|
|
245
|
+
{
|
|
246
|
+
path: "src/a.ts",
|
|
247
|
+
language: "typescript",
|
|
248
|
+
content: "export const A = 2;"
|
|
249
|
+
}
|
|
250
|
+
],
|
|
251
|
+
deleted_paths: []
|
|
252
|
+
})
|
|
253
|
+
).rejects.toThrow(/base index version/i);
|
|
254
|
+
|
|
255
|
+
repo.close();
|
|
256
|
+
cache.close();
|
|
257
|
+
});
|
|
258
|
+
});
|