@rce-mcp/retrieval-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +19 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/chunking.d.ts +50 -0
- package/dist/chunking.js +520 -0
- package/dist/index.d.ts +390 -0
- package/dist/index.js +3417 -0
- package/dist/remote-sync.d.ts +116 -0
- package/dist/remote-sync.js +476 -0
- package/package.json +33 -0
- package/scripts/poc-node-parser-host.cjs +101 -0
- package/scripts/poc-parser-availability-benchmark.ts +290 -0
- package/src/chunking.ts +641 -0
- package/src/index.ts +4338 -0
- package/src/remote-sync.ts +651 -0
- package/test/benchmark.thresholds.test.ts +752 -0
- package/test/chunking.language-aware.test.ts +279 -0
- package/test/chunking.parser-availability.poc.test.ts +60 -0
- package/test/embedding-provider.test.ts +121 -0
- package/test/enhance-confidence.test.ts +357 -0
- package/test/integration.test.ts +324 -0
- package/test/local-sqlite.integration.test.ts +258 -0
- package/test/remote-sync.integration.test.ts +177 -0
- package/tsconfig.build.json +17 -0
- package/tsconfig.json +4 -0
|
@@ -0,0 +1,651 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { readdir, readFile, stat, writeFile } from "node:fs/promises";
|
|
4
|
+
import { join, relative, resolve } from "node:path";
|
|
5
|
+
|
|
6
|
+
export const REMOTE_SYNC_STATE_MODE = "remote_delta_v1" as const;
|
|
7
|
+
export const DEFAULT_REMOTE_SYNC_MAX_FILE_SIZE_BYTES = 1_000_000;
|
|
8
|
+
|
|
9
|
+
const DEFAULT_EXCLUDED_DIRS = new Set([
|
|
10
|
+
".cache",
|
|
11
|
+
".git",
|
|
12
|
+
".next",
|
|
13
|
+
".nuxt",
|
|
14
|
+
".rce",
|
|
15
|
+
".rce-tool",
|
|
16
|
+
".svelte-kit",
|
|
17
|
+
".turbo",
|
|
18
|
+
"build",
|
|
19
|
+
"coverage",
|
|
20
|
+
"dist",
|
|
21
|
+
"node_modules",
|
|
22
|
+
"out",
|
|
23
|
+
"target"
|
|
24
|
+
]);
|
|
25
|
+
|
|
26
|
+
const DEFAULT_EXCLUDED_FILES = new Set([
|
|
27
|
+
"bun.lock",
|
|
28
|
+
"bun.lockb",
|
|
29
|
+
"Cargo.lock",
|
|
30
|
+
"composer.lock",
|
|
31
|
+
"Gemfile.lock",
|
|
32
|
+
"package-lock.json",
|
|
33
|
+
"Pipfile.lock",
|
|
34
|
+
"pnpm-lock.yaml",
|
|
35
|
+
"yarn.lock"
|
|
36
|
+
]);
|
|
37
|
+
|
|
38
|
+
const DEFAULT_EXCLUDED_FILE_SUFFIXES = new Set([".min.js", ".min.css", ".map"]);
|
|
39
|
+
|
|
40
|
+
export interface RemoteSyncUploadCandidate {
|
|
41
|
+
path: string;
|
|
42
|
+
content: string;
|
|
43
|
+
language?: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface RemoteSyncProjectFileStat {
|
|
47
|
+
path: string;
|
|
48
|
+
full_path: string;
|
|
49
|
+
size: number;
|
|
50
|
+
mtime_ms: number;
|
|
51
|
+
language?: string;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface RemoteSyncStateEntry {
|
|
55
|
+
content_hash: string;
|
|
56
|
+
size: number;
|
|
57
|
+
mtime_ms: number;
|
|
58
|
+
language?: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface RemoteSyncStateFile {
|
|
62
|
+
mode: typeof REMOTE_SYNC_STATE_MODE;
|
|
63
|
+
workspace_id?: string;
|
|
64
|
+
last_index_version?: string;
|
|
65
|
+
files: Record<string, RemoteSyncStateEntry>;
|
|
66
|
+
updated_at: string;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface RemoteSyncDeltaPayload {
|
|
70
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
71
|
+
deleted_paths: string[];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface RemoteSyncDeltaBatch {
|
|
75
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
76
|
+
deleted_paths: string[];
|
|
77
|
+
approx_bytes: number;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export interface BuildRemoteSyncDeltaResult {
|
|
81
|
+
delta: RemoteSyncDeltaPayload;
|
|
82
|
+
upsert_state_entries: Record<string, RemoteSyncStateEntry>;
|
|
83
|
+
next_files: Record<string, RemoteSyncStateEntry>;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export interface RemoteSyncScanOptions {
|
|
87
|
+
max_file_size_bytes?: number;
|
|
88
|
+
excluded_dirs?: Set<string>;
|
|
89
|
+
excluded_files?: Set<string>;
|
|
90
|
+
excluded_file_suffixes?: Set<string>;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export class RemoteSyncHttpResponseError extends Error {
|
|
94
|
+
constructor(
|
|
95
|
+
message: string,
|
|
96
|
+
readonly status: number,
|
|
97
|
+
readonly payload?: unknown
|
|
98
|
+
) {
|
|
99
|
+
super(message);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export interface RunRemoteDeltaSyncInput {
|
|
104
|
+
project_root_path: string;
|
|
105
|
+
scan_root_path?: string;
|
|
106
|
+
workspace_id?: string;
|
|
107
|
+
previous_state?: RemoteSyncStateFile;
|
|
108
|
+
force_full_upsert?: boolean;
|
|
109
|
+
max_body_bytes: number;
|
|
110
|
+
retries?: number;
|
|
111
|
+
initial_delay_ms?: number;
|
|
112
|
+
stale_base_error?: (error: unknown) => boolean;
|
|
113
|
+
persist_state?: (state: RemoteSyncStateFile) => Promise<void>;
|
|
114
|
+
push_delta: (request: {
|
|
115
|
+
workspace_id?: string;
|
|
116
|
+
project_root_path: string;
|
|
117
|
+
base_index_version?: string;
|
|
118
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
119
|
+
deleted_paths: string[];
|
|
120
|
+
}) => Promise<{
|
|
121
|
+
workspace_id?: string;
|
|
122
|
+
index_version?: string;
|
|
123
|
+
}>;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export interface RunRemoteDeltaSyncResult {
|
|
127
|
+
state: RemoteSyncStateFile;
|
|
128
|
+
changed: boolean;
|
|
129
|
+
workspace_id?: string;
|
|
130
|
+
index_version?: string;
|
|
131
|
+
applied_delta: {
|
|
132
|
+
upsert_files: number;
|
|
133
|
+
deleted_paths: number;
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function nowIso(): string {
|
|
138
|
+
return new Date().toISOString();
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function extensionToLanguage(path: string): string | undefined {
|
|
142
|
+
if (path.endsWith(".ts") || path.endsWith(".tsx")) return "typescript";
|
|
143
|
+
if (path.endsWith(".js") || path.endsWith(".jsx")) return "javascript";
|
|
144
|
+
if (path.endsWith(".py")) return "python";
|
|
145
|
+
if (path.endsWith(".go")) return "go";
|
|
146
|
+
if (path.endsWith(".rs")) return "rust";
|
|
147
|
+
if (path.endsWith(".java")) return "java";
|
|
148
|
+
if (path.endsWith(".json")) return "json";
|
|
149
|
+
if (path.endsWith(".md")) return "markdown";
|
|
150
|
+
if (path.endsWith(".yml") || path.endsWith(".yaml")) return "yaml";
|
|
151
|
+
return undefined;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function looksBinary(content: string): boolean {
|
|
155
|
+
return content.includes("\0");
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function sha256Text(value: string): string {
|
|
159
|
+
return createHash("sha256").update(value).digest("hex");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function shouldExcludeFile(path: string, excludedFiles: Set<string>, excludedSuffixes: Set<string>): boolean {
|
|
163
|
+
const lower = path.toLowerCase();
|
|
164
|
+
if (excludedFiles.has(path) || excludedFiles.has(lower)) {
|
|
165
|
+
return true;
|
|
166
|
+
}
|
|
167
|
+
for (const suffix of excludedSuffixes) {
|
|
168
|
+
if (lower.endsWith(suffix)) {
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function resolveScanOptions(options?: RemoteSyncScanOptions): Required<RemoteSyncScanOptions> {
|
|
176
|
+
return {
|
|
177
|
+
max_file_size_bytes: options?.max_file_size_bytes ?? DEFAULT_REMOTE_SYNC_MAX_FILE_SIZE_BYTES,
|
|
178
|
+
excluded_dirs: options?.excluded_dirs ?? DEFAULT_EXCLUDED_DIRS,
|
|
179
|
+
excluded_files: options?.excluded_files ?? DEFAULT_EXCLUDED_FILES,
|
|
180
|
+
excluded_file_suffixes: options?.excluded_file_suffixes ?? DEFAULT_EXCLUDED_FILE_SUFFIXES
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export async function collectProjectFileStats(
|
|
185
|
+
project_root_path: string,
|
|
186
|
+
options?: RemoteSyncScanOptions
|
|
187
|
+
): Promise<Map<string, RemoteSyncProjectFileStat>> {
|
|
188
|
+
const root = resolve(project_root_path);
|
|
189
|
+
const resolvedOptions = resolveScanOptions(options);
|
|
190
|
+
const output = new Map<string, RemoteSyncProjectFileStat>();
|
|
191
|
+
|
|
192
|
+
async function walk(dir: string): Promise<void> {
|
|
193
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
194
|
+
for (const entry of entries) {
|
|
195
|
+
if (entry.isDirectory()) {
|
|
196
|
+
if (resolvedOptions.excluded_dirs.has(entry.name)) {
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
await walk(join(dir, entry.name));
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (!entry.isFile()) {
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (shouldExcludeFile(entry.name, resolvedOptions.excluded_files, resolvedOptions.excluded_file_suffixes)) {
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const fullPath = join(dir, entry.name);
|
|
212
|
+
const fileStat = await stat(fullPath);
|
|
213
|
+
if (fileStat.size > resolvedOptions.max_file_size_bytes) {
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const repoPath = relative(root, fullPath).replace(/\\/g, "/");
|
|
218
|
+
output.set(repoPath, {
|
|
219
|
+
path: repoPath,
|
|
220
|
+
full_path: fullPath,
|
|
221
|
+
size: fileStat.size,
|
|
222
|
+
mtime_ms: Math.trunc(fileStat.mtimeMs),
|
|
223
|
+
language: extensionToLanguage(repoPath)
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
await walk(root);
|
|
229
|
+
return output;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
export async function collectUploadCandidates(
|
|
233
|
+
project_root_path: string,
|
|
234
|
+
options?: RemoteSyncScanOptions
|
|
235
|
+
): Promise<RemoteSyncUploadCandidate[]> {
|
|
236
|
+
const stats = await collectProjectFileStats(project_root_path, options);
|
|
237
|
+
const output: RemoteSyncUploadCandidate[] = [];
|
|
238
|
+
|
|
239
|
+
for (const repoPath of [...stats.keys()].sort((a, b) => a.localeCompare(b))) {
|
|
240
|
+
const fileStat = stats.get(repoPath)!;
|
|
241
|
+
let content: string;
|
|
242
|
+
try {
|
|
243
|
+
content = await readFile(fileStat.full_path, "utf8");
|
|
244
|
+
} catch {
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
if (looksBinary(content)) {
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
output.push({
|
|
252
|
+
path: repoPath,
|
|
253
|
+
content,
|
|
254
|
+
...(fileStat.language ? { language: fileStat.language } : {})
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return output;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
export async function buildRemoteSyncDeltaFromState(input: {
|
|
262
|
+
project_root_path: string;
|
|
263
|
+
previous_state?: RemoteSyncStateFile;
|
|
264
|
+
force_full_upsert: boolean;
|
|
265
|
+
options?: RemoteSyncScanOptions;
|
|
266
|
+
}): Promise<BuildRemoteSyncDeltaResult> {
|
|
267
|
+
const projectFiles = await collectProjectFileStats(input.project_root_path, input.options);
|
|
268
|
+
const previousFiles = input.previous_state?.files ?? {};
|
|
269
|
+
const upsertFiles: RemoteSyncUploadCandidate[] = [];
|
|
270
|
+
const deletedPaths: string[] = [];
|
|
271
|
+
const upsertStateEntries: Record<string, RemoteSyncStateEntry> = {};
|
|
272
|
+
const nextFiles: Record<string, RemoteSyncStateEntry> = {};
|
|
273
|
+
const sortedPaths = [...projectFiles.keys()].sort((a, b) => a.localeCompare(b));
|
|
274
|
+
|
|
275
|
+
for (const repoPath of sortedPaths) {
|
|
276
|
+
const projectFile = projectFiles.get(repoPath)!;
|
|
277
|
+
const previous = previousFiles[repoPath];
|
|
278
|
+
const statChanged =
|
|
279
|
+
!previous ||
|
|
280
|
+
previous.size !== projectFile.size ||
|
|
281
|
+
previous.mtime_ms !== projectFile.mtime_ms ||
|
|
282
|
+
input.force_full_upsert;
|
|
283
|
+
|
|
284
|
+
if (!statChanged) {
|
|
285
|
+
nextFiles[repoPath] = previous;
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
let content: string;
|
|
290
|
+
try {
|
|
291
|
+
content = await readFile(projectFile.full_path, "utf8");
|
|
292
|
+
} catch {
|
|
293
|
+
if (previous) {
|
|
294
|
+
nextFiles[repoPath] = previous;
|
|
295
|
+
}
|
|
296
|
+
continue;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
if (looksBinary(content)) {
|
|
300
|
+
if (previous) {
|
|
301
|
+
nextFiles[repoPath] = previous;
|
|
302
|
+
}
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const contentHash = sha256Text(content);
|
|
307
|
+
const nextEntry: RemoteSyncStateEntry = {
|
|
308
|
+
content_hash: contentHash,
|
|
309
|
+
size: projectFile.size,
|
|
310
|
+
mtime_ms: projectFile.mtime_ms,
|
|
311
|
+
...(projectFile.language ? { language: projectFile.language } : {})
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
nextFiles[repoPath] = nextEntry;
|
|
315
|
+
if (!input.force_full_upsert && previous?.content_hash === contentHash) {
|
|
316
|
+
continue;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
upsertFiles.push({
|
|
320
|
+
path: repoPath,
|
|
321
|
+
content,
|
|
322
|
+
...(projectFile.language ? { language: projectFile.language } : {})
|
|
323
|
+
});
|
|
324
|
+
upsertStateEntries[repoPath] = nextEntry;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
for (const previousPath of Object.keys(previousFiles)) {
|
|
328
|
+
if (!projectFiles.has(previousPath)) {
|
|
329
|
+
deletedPaths.push(previousPath);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
return {
|
|
334
|
+
delta: {
|
|
335
|
+
upsert_files: upsertFiles,
|
|
336
|
+
deleted_paths: deletedPaths
|
|
337
|
+
},
|
|
338
|
+
upsert_state_entries: upsertStateEntries,
|
|
339
|
+
next_files: nextFiles
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
export function estimateRemoteSyncDeltaRequestSize(input: {
|
|
344
|
+
project_root_path: string;
|
|
345
|
+
workspace_id?: string;
|
|
346
|
+
base_index_version?: string;
|
|
347
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
348
|
+
deleted_paths: string[];
|
|
349
|
+
}): number {
|
|
350
|
+
const payload = {
|
|
351
|
+
project_root_path: input.project_root_path,
|
|
352
|
+
...(input.workspace_id ? { workspace_id: input.workspace_id } : {}),
|
|
353
|
+
...(input.base_index_version ? { base_index_version: input.base_index_version } : {}),
|
|
354
|
+
upsert_files: input.upsert_files.map((file) => ({
|
|
355
|
+
path: file.path,
|
|
356
|
+
content: file.content,
|
|
357
|
+
...(file.language ? { language: file.language } : {})
|
|
358
|
+
})),
|
|
359
|
+
deleted_paths: input.deleted_paths
|
|
360
|
+
};
|
|
361
|
+
return Buffer.byteLength(JSON.stringify(payload), "utf8");
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
export function splitRemoteSyncDeltaIntoBatches(input: {
|
|
365
|
+
project_root_path: string;
|
|
366
|
+
workspace_id?: string;
|
|
367
|
+
base_index_version?: string;
|
|
368
|
+
delta: RemoteSyncDeltaPayload;
|
|
369
|
+
max_body_bytes: number;
|
|
370
|
+
}): RemoteSyncDeltaBatch[] {
|
|
371
|
+
const budget = Math.max(256 * 1024, Math.floor(input.max_body_bytes * 0.8));
|
|
372
|
+
const upserts = [...input.delta.upsert_files].sort((a, b) => a.path.localeCompare(b.path));
|
|
373
|
+
const deleted = [...new Set(input.delta.deleted_paths)].sort((a, b) => a.localeCompare(b));
|
|
374
|
+
const batches: RemoteSyncDeltaBatch[] = [];
|
|
375
|
+
let currentUpserts: RemoteSyncUploadCandidate[] = [];
|
|
376
|
+
let currentDeleted: string[] = [];
|
|
377
|
+
|
|
378
|
+
const flush = () => {
|
|
379
|
+
if (currentUpserts.length === 0 && currentDeleted.length === 0) {
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
batches.push({
|
|
383
|
+
upsert_files: currentUpserts,
|
|
384
|
+
deleted_paths: currentDeleted,
|
|
385
|
+
approx_bytes: estimateRemoteSyncDeltaRequestSize({
|
|
386
|
+
project_root_path: input.project_root_path,
|
|
387
|
+
workspace_id: input.workspace_id,
|
|
388
|
+
base_index_version: input.base_index_version,
|
|
389
|
+
upsert_files: currentUpserts,
|
|
390
|
+
deleted_paths: currentDeleted
|
|
391
|
+
})
|
|
392
|
+
});
|
|
393
|
+
currentUpserts = [];
|
|
394
|
+
currentDeleted = [];
|
|
395
|
+
};
|
|
396
|
+
|
|
397
|
+
const canFit = (nextUpserts: RemoteSyncUploadCandidate[], nextDeleted: string[]) =>
|
|
398
|
+
estimateRemoteSyncDeltaRequestSize({
|
|
399
|
+
project_root_path: input.project_root_path,
|
|
400
|
+
workspace_id: input.workspace_id,
|
|
401
|
+
base_index_version: input.base_index_version,
|
|
402
|
+
upsert_files: nextUpserts,
|
|
403
|
+
deleted_paths: nextDeleted
|
|
404
|
+
}) <= budget;
|
|
405
|
+
|
|
406
|
+
for (const file of upserts) {
|
|
407
|
+
const candidateUpserts = [...currentUpserts, file];
|
|
408
|
+
if (canFit(candidateUpserts, currentDeleted)) {
|
|
409
|
+
currentUpserts = candidateUpserts;
|
|
410
|
+
continue;
|
|
411
|
+
}
|
|
412
|
+
flush();
|
|
413
|
+
if (!canFit([file], [])) {
|
|
414
|
+
throw new Error(`delta upsert payload too large for path ${file.path}`);
|
|
415
|
+
}
|
|
416
|
+
currentUpserts = [file];
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
for (const path of deleted) {
|
|
420
|
+
const candidateDeleted = [...currentDeleted, path];
|
|
421
|
+
if (canFit(currentUpserts, candidateDeleted)) {
|
|
422
|
+
currentDeleted = candidateDeleted;
|
|
423
|
+
continue;
|
|
424
|
+
}
|
|
425
|
+
flush();
|
|
426
|
+
if (!canFit([], [path])) {
|
|
427
|
+
throw new Error(`delta delete payload too large for path ${path}`);
|
|
428
|
+
}
|
|
429
|
+
currentDeleted = [path];
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
flush();
|
|
433
|
+
return batches;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
export async function readRemoteSyncState(path: string): Promise<RemoteSyncStateFile | undefined> {
|
|
437
|
+
if (!existsSync(path)) {
|
|
438
|
+
return undefined;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
try {
|
|
442
|
+
const parsed = JSON.parse(await readFile(path, "utf8")) as Partial<RemoteSyncStateFile>;
|
|
443
|
+
if (
|
|
444
|
+
!parsed ||
|
|
445
|
+
parsed.mode !== REMOTE_SYNC_STATE_MODE ||
|
|
446
|
+
typeof parsed.files !== "object" ||
|
|
447
|
+
parsed.files === null ||
|
|
448
|
+
Array.isArray(parsed.files)
|
|
449
|
+
) {
|
|
450
|
+
return undefined;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
return {
|
|
454
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
455
|
+
workspace_id: parsed.workspace_id,
|
|
456
|
+
last_index_version: parsed.last_index_version,
|
|
457
|
+
files: parsed.files as Record<string, RemoteSyncStateEntry>,
|
|
458
|
+
updated_at: typeof parsed.updated_at === "string" ? parsed.updated_at : nowIso()
|
|
459
|
+
};
|
|
460
|
+
} catch {
|
|
461
|
+
return undefined;
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
export async function writeRemoteSyncState(path: string, state: RemoteSyncStateFile): Promise<void> {
|
|
466
|
+
await writeFile(
|
|
467
|
+
path,
|
|
468
|
+
JSON.stringify(
|
|
469
|
+
{
|
|
470
|
+
...state,
|
|
471
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
472
|
+
updated_at: nowIso()
|
|
473
|
+
},
|
|
474
|
+
null,
|
|
475
|
+
2
|
|
476
|
+
)
|
|
477
|
+
);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
export function isStaleBaseIndexError(error: unknown): boolean {
|
|
481
|
+
if (!(error instanceof RemoteSyncHttpResponseError)) {
|
|
482
|
+
return false;
|
|
483
|
+
}
|
|
484
|
+
if (error.status !== 400) {
|
|
485
|
+
return false;
|
|
486
|
+
}
|
|
487
|
+
const message = error.message.toLowerCase();
|
|
488
|
+
return message.includes("invalid_argument") && message.includes("base index version");
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
export function isDeltaUnsupportedError(error: unknown): boolean {
|
|
492
|
+
if (error instanceof RemoteSyncHttpResponseError) {
|
|
493
|
+
if ([404, 405, 501].includes(error.status)) {
|
|
494
|
+
return true;
|
|
495
|
+
}
|
|
496
|
+
const message = error.message.toLowerCase();
|
|
497
|
+
return message.includes("not_found") && message.includes("push-delta");
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
if (error instanceof Error) {
|
|
501
|
+
const message = error.message.toLowerCase();
|
|
502
|
+
return message.includes("push-delta") && (message.includes("not found") || message.includes("404"));
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
return false;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
export async function retryWithBackoff<T>(input: {
|
|
509
|
+
fn: () => Promise<T>;
|
|
510
|
+
retries: number;
|
|
511
|
+
initial_delay_ms: number;
|
|
512
|
+
}): Promise<T> {
|
|
513
|
+
let attempt = 0;
|
|
514
|
+
let lastError: unknown;
|
|
515
|
+
while (attempt < input.retries) {
|
|
516
|
+
try {
|
|
517
|
+
return await input.fn();
|
|
518
|
+
} catch (error) {
|
|
519
|
+
lastError = error;
|
|
520
|
+
attempt += 1;
|
|
521
|
+
if (attempt >= input.retries) {
|
|
522
|
+
break;
|
|
523
|
+
}
|
|
524
|
+
await new Promise((resolveSleep) => setTimeout(resolveSleep, input.initial_delay_ms * 2 ** (attempt - 1)));
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
throw lastError;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
export async function runRemoteDeltaSync(input: RunRemoteDeltaSyncInput): Promise<RunRemoteDeltaSyncResult> {
|
|
531
|
+
const retries = input.retries ?? 3;
|
|
532
|
+
const initialDelayMs = input.initial_delay_ms ?? 500;
|
|
533
|
+
const isStaleError = input.stale_base_error ?? isStaleBaseIndexError;
|
|
534
|
+
|
|
535
|
+
const execute = async (previousState: RemoteSyncStateFile | undefined, forceFullUpsert: boolean): Promise<RunRemoteDeltaSyncResult> => {
|
|
536
|
+
const scanRootPath = input.scan_root_path ?? input.project_root_path;
|
|
537
|
+
const deltaBuild = await buildRemoteSyncDeltaFromState({
|
|
538
|
+
project_root_path: scanRootPath,
|
|
539
|
+
previous_state: previousState,
|
|
540
|
+
force_full_upsert: forceFullUpsert
|
|
541
|
+
});
|
|
542
|
+
|
|
543
|
+
const nextWorkspaceId = input.workspace_id ?? previousState?.workspace_id;
|
|
544
|
+
|
|
545
|
+
if (deltaBuild.delta.upsert_files.length === 0 && deltaBuild.delta.deleted_paths.length === 0) {
|
|
546
|
+
const unchangedState: RemoteSyncStateFile = {
|
|
547
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
548
|
+
workspace_id: nextWorkspaceId,
|
|
549
|
+
last_index_version: previousState?.last_index_version,
|
|
550
|
+
files: deltaBuild.next_files,
|
|
551
|
+
updated_at: nowIso()
|
|
552
|
+
};
|
|
553
|
+
await input.persist_state?.(unchangedState);
|
|
554
|
+
return {
|
|
555
|
+
state: unchangedState,
|
|
556
|
+
changed: false,
|
|
557
|
+
workspace_id: unchangedState.workspace_id,
|
|
558
|
+
index_version: unchangedState.last_index_version,
|
|
559
|
+
applied_delta: {
|
|
560
|
+
upsert_files: 0,
|
|
561
|
+
deleted_paths: 0
|
|
562
|
+
}
|
|
563
|
+
};
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
let currentBaseIndexVersion = previousState?.last_index_version;
|
|
567
|
+
let currentWorkspaceId = nextWorkspaceId;
|
|
568
|
+
const progressFiles: Record<string, RemoteSyncStateEntry> = { ...(previousState?.files ?? {}) };
|
|
569
|
+
let uploadedCount = 0;
|
|
570
|
+
let deletedCount = 0;
|
|
571
|
+
|
|
572
|
+
const batches = splitRemoteSyncDeltaIntoBatches({
|
|
573
|
+
project_root_path: input.project_root_path,
|
|
574
|
+
workspace_id: currentWorkspaceId,
|
|
575
|
+
base_index_version: currentBaseIndexVersion,
|
|
576
|
+
delta: deltaBuild.delta,
|
|
577
|
+
max_body_bytes: input.max_body_bytes
|
|
578
|
+
});
|
|
579
|
+
|
|
580
|
+
let latestState: RemoteSyncStateFile | undefined;
|
|
581
|
+
|
|
582
|
+
for (const batch of batches) {
|
|
583
|
+
const result = await retryWithBackoff({
|
|
584
|
+
retries,
|
|
585
|
+
initial_delay_ms: initialDelayMs,
|
|
586
|
+
fn: async () =>
|
|
587
|
+
input.push_delta({
|
|
588
|
+
workspace_id: currentWorkspaceId,
|
|
589
|
+
project_root_path: input.project_root_path,
|
|
590
|
+
...(currentBaseIndexVersion ? { base_index_version: currentBaseIndexVersion } : {}),
|
|
591
|
+
upsert_files: batch.upsert_files,
|
|
592
|
+
deleted_paths: batch.deleted_paths
|
|
593
|
+
})
|
|
594
|
+
});
|
|
595
|
+
|
|
596
|
+
currentWorkspaceId = result.workspace_id ?? currentWorkspaceId;
|
|
597
|
+
currentBaseIndexVersion = result.index_version ?? currentBaseIndexVersion;
|
|
598
|
+
|
|
599
|
+
for (const file of batch.upsert_files) {
|
|
600
|
+
const entry = deltaBuild.upsert_state_entries[file.path];
|
|
601
|
+
if (entry) {
|
|
602
|
+
progressFiles[file.path] = entry;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
for (const path of batch.deleted_paths) {
|
|
606
|
+
delete progressFiles[path];
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
uploadedCount += batch.upsert_files.length;
|
|
610
|
+
deletedCount += batch.deleted_paths.length;
|
|
611
|
+
|
|
612
|
+
latestState = {
|
|
613
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
614
|
+
workspace_id: currentWorkspaceId,
|
|
615
|
+
last_index_version: currentBaseIndexVersion,
|
|
616
|
+
files: progressFiles,
|
|
617
|
+
updated_at: nowIso()
|
|
618
|
+
};
|
|
619
|
+
await input.persist_state?.(latestState);
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
const finalState = latestState ?? {
|
|
623
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
624
|
+
workspace_id: currentWorkspaceId,
|
|
625
|
+
last_index_version: currentBaseIndexVersion,
|
|
626
|
+
files: progressFiles,
|
|
627
|
+
updated_at: nowIso()
|
|
628
|
+
};
|
|
629
|
+
|
|
630
|
+
return {
|
|
631
|
+
state: finalState,
|
|
632
|
+
changed: true,
|
|
633
|
+
workspace_id: currentWorkspaceId,
|
|
634
|
+
index_version: currentBaseIndexVersion,
|
|
635
|
+
applied_delta: {
|
|
636
|
+
upsert_files: uploadedCount,
|
|
637
|
+
deleted_paths: deletedCount
|
|
638
|
+
}
|
|
639
|
+
};
|
|
640
|
+
};
|
|
641
|
+
|
|
642
|
+
const forceFullUpsert = input.force_full_upsert ?? false;
|
|
643
|
+
try {
|
|
644
|
+
return await execute(input.previous_state, forceFullUpsert);
|
|
645
|
+
} catch (error) {
|
|
646
|
+
if (!forceFullUpsert && input.previous_state && isStaleError(error)) {
|
|
647
|
+
return execute(undefined, true);
|
|
648
|
+
}
|
|
649
|
+
throw error;
|
|
650
|
+
}
|
|
651
|
+
}
|