@cue-dev/retrieval-core 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +27 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/chunking.d.ts +64 -0
- package/dist/chunking.js +983 -0
- package/dist/index.d.ts +673 -0
- package/dist/index.js +6605 -0
- package/dist/indexing-ignore.d.ts +9 -0
- package/dist/indexing-ignore.js +151 -0
- package/dist/remote-sync.d.ts +193 -0
- package/dist/remote-sync.js +816 -0
- package/package.json +37 -0
- package/scripts/poc-node-parser-host.cjs +105 -0
- package/scripts/poc-parser-availability-benchmark.ts +338 -0
- package/src/chunking.ts +1187 -0
- package/src/index.ts +8338 -0
- package/src/indexing-ignore.ts +179 -0
- package/src/remote-sync.ts +1119 -0
- package/test/benchmark.thresholds.test.ts +815 -0
- package/test/chunking.config.test.ts +84 -0
- package/test/chunking.language-aware.test.ts +1248 -0
- package/test/chunking.parser-availability.poc.test.ts +86 -0
- package/test/claude-agent-provider.test.ts +209 -0
- package/test/embedding-context-prefix.test.ts +101 -0
- package/test/embedding-provider.test.ts +570 -0
- package/test/enhance-confidence.test.ts +752 -0
- package/test/index-prep.concurrency.regression.test.ts +142 -0
- package/test/integration.test.ts +508 -0
- package/test/local-sqlite.integration.test.ts +258 -0
- package/test/mcp-search-quality.regression.test.ts +1358 -0
- package/test/remote-sync.integration.test.ts +350 -0
- package/test/smart-cutoff.config.test.ts +86 -0
- package/test/snippet-integrity.config.test.ts +59 -0
- package/tsconfig.build.json +17 -0
- package/tsconfig.json +4 -0
|
@@ -0,0 +1,1119 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { readdir, readFile, stat, writeFile } from "node:fs/promises";
|
|
4
|
+
import { join, relative, resolve } from "node:path";
|
|
5
|
+
import { loadIndexingIgnoreMatcher, normalizeRepoRelativePath } from "./indexing-ignore.js";
|
|
6
|
+
|
|
7
|
+
export const REMOTE_SYNC_STATE_MODE = "remote_delta_v1" as const;
|
|
8
|
+
export const DEFAULT_REMOTE_SYNC_MAX_FILE_SIZE_BYTES = 1_000_000;
|
|
9
|
+
|
|
10
|
+
const DEFAULT_EXCLUDED_DIRS = new Set([
|
|
11
|
+
".tmp",
|
|
12
|
+
".cache",
|
|
13
|
+
".git",
|
|
14
|
+
".hg",
|
|
15
|
+
".next",
|
|
16
|
+
".nuxt",
|
|
17
|
+
".pytest_cache",
|
|
18
|
+
".cue",
|
|
19
|
+
".cue-tool",
|
|
20
|
+
".svn",
|
|
21
|
+
".svelte-kit",
|
|
22
|
+
".turbo",
|
|
23
|
+
".venv",
|
|
24
|
+
"__pycache__",
|
|
25
|
+
"build",
|
|
26
|
+
"coverage",
|
|
27
|
+
"dist",
|
|
28
|
+
"node_modules",
|
|
29
|
+
"out",
|
|
30
|
+
"target",
|
|
31
|
+
"vendor",
|
|
32
|
+
"venv"
|
|
33
|
+
]);
|
|
34
|
+
|
|
35
|
+
const DEFAULT_EXCLUDED_FILES = new Set([
|
|
36
|
+
"bun.lock",
|
|
37
|
+
"bun.lockb",
|
|
38
|
+
"Cargo.lock",
|
|
39
|
+
"composer.lock",
|
|
40
|
+
"Gemfile.lock",
|
|
41
|
+
"package-lock.json",
|
|
42
|
+
"Pipfile.lock",
|
|
43
|
+
"pnpm-lock.yaml",
|
|
44
|
+
"yarn.lock"
|
|
45
|
+
]);
|
|
46
|
+
|
|
47
|
+
const DEFAULT_EXCLUDED_FILE_SUFFIXES = new Set([
|
|
48
|
+
".min.js",
|
|
49
|
+
".min.css",
|
|
50
|
+
".map",
|
|
51
|
+
".exe",
|
|
52
|
+
".dll",
|
|
53
|
+
".so",
|
|
54
|
+
".bin",
|
|
55
|
+
".dylib",
|
|
56
|
+
".pyc",
|
|
57
|
+
".png",
|
|
58
|
+
".jpg",
|
|
59
|
+
".jpeg",
|
|
60
|
+
".gif",
|
|
61
|
+
".webp",
|
|
62
|
+
".svg",
|
|
63
|
+
".mp4",
|
|
64
|
+
".mov",
|
|
65
|
+
".avi",
|
|
66
|
+
".mkv",
|
|
67
|
+
".pdf"
|
|
68
|
+
]);
|
|
69
|
+
|
|
70
|
+
export interface RemoteSyncUploadCandidate {
|
|
71
|
+
path: string;
|
|
72
|
+
content: string;
|
|
73
|
+
language?: string;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export interface RemoteSyncProjectFileStat {
|
|
77
|
+
path: string;
|
|
78
|
+
full_path: string;
|
|
79
|
+
size: number;
|
|
80
|
+
mtime_ms: number;
|
|
81
|
+
language?: string;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export interface RemoteSyncStateEntry {
|
|
85
|
+
content_hash: string;
|
|
86
|
+
size: number;
|
|
87
|
+
mtime_ms: number;
|
|
88
|
+
language?: string;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface RemoteSyncStateFile {
|
|
92
|
+
mode: typeof REMOTE_SYNC_STATE_MODE;
|
|
93
|
+
workspace_id?: string;
|
|
94
|
+
last_index_version?: string;
|
|
95
|
+
files: Record<string, RemoteSyncStateEntry>;
|
|
96
|
+
updated_at: string;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export interface RemoteSyncDeltaPayload {
|
|
100
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
101
|
+
deleted_paths: string[];
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export interface RemoteSyncDeltaBatch {
|
|
105
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
106
|
+
deleted_paths: string[];
|
|
107
|
+
approx_bytes: number;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export interface BuildRemoteSyncDeltaResult {
|
|
111
|
+
delta: RemoteSyncDeltaPayload;
|
|
112
|
+
upsert_state_entries: Record<string, RemoteSyncStateEntry>;
|
|
113
|
+
next_files: Record<string, RemoteSyncStateEntry>;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export interface RemoteSyncScanOptions {
|
|
117
|
+
max_file_size_bytes?: number;
|
|
118
|
+
excluded_dirs?: Set<string>;
|
|
119
|
+
excluded_files?: Set<string>;
|
|
120
|
+
excluded_file_suffixes?: Set<string>;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export class RemoteSyncHttpResponseError extends Error {
|
|
124
|
+
constructor(
|
|
125
|
+
message: string,
|
|
126
|
+
readonly status: number,
|
|
127
|
+
readonly payload?: unknown
|
|
128
|
+
) {
|
|
129
|
+
super(message);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
export interface RunRemoteDeltaSyncInput {
|
|
134
|
+
project_root_path: string;
|
|
135
|
+
scan_root_path?: string;
|
|
136
|
+
workspace_id?: string;
|
|
137
|
+
previous_state?: RemoteSyncStateFile;
|
|
138
|
+
force_full_upsert?: boolean;
|
|
139
|
+
max_body_bytes: number;
|
|
140
|
+
retries?: number;
|
|
141
|
+
initial_delay_ms?: number;
|
|
142
|
+
stale_base_error?: (error: unknown) => boolean;
|
|
143
|
+
persist_state?: (state: RemoteSyncStateFile) => Promise<void>;
|
|
144
|
+
on_batch_processed?: (event: {
|
|
145
|
+
batch_index: number;
|
|
146
|
+
batch_count: number;
|
|
147
|
+
approx_bytes: number;
|
|
148
|
+
upsert_files: number;
|
|
149
|
+
deleted_paths: number;
|
|
150
|
+
latency_ms: number;
|
|
151
|
+
}) => void | Promise<void>;
|
|
152
|
+
push_delta: (request: {
|
|
153
|
+
workspace_id?: string;
|
|
154
|
+
project_root_path: string;
|
|
155
|
+
base_index_version?: string;
|
|
156
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
157
|
+
deleted_paths: string[];
|
|
158
|
+
}) => Promise<{
|
|
159
|
+
workspace_id?: string;
|
|
160
|
+
index_version?: string;
|
|
161
|
+
}>;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export interface RunRemoteDeltaSyncResult {
|
|
165
|
+
state: RemoteSyncStateFile;
|
|
166
|
+
changed: boolean;
|
|
167
|
+
workspace_id?: string;
|
|
168
|
+
index_version?: string;
|
|
169
|
+
applied_delta: {
|
|
170
|
+
upsert_files: number;
|
|
171
|
+
deleted_paths: number;
|
|
172
|
+
};
|
|
173
|
+
stats: {
|
|
174
|
+
batches_total: number;
|
|
175
|
+
bytes_total: number;
|
|
176
|
+
latency_ms: number;
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
export interface RemoteSyncCapabilities {
|
|
181
|
+
max_body_bytes: number;
|
|
182
|
+
sync_protocols?: string[];
|
|
183
|
+
max_blob_bytes?: number;
|
|
184
|
+
max_blob_batch_bytes?: number;
|
|
185
|
+
max_commit_body_bytes?: number;
|
|
186
|
+
upload_concurrency_hint?: number;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export interface RunRemoteAdaptiveSyncInput {
|
|
190
|
+
project_root_path: string;
|
|
191
|
+
scan_root_path?: string;
|
|
192
|
+
workspace_id?: string;
|
|
193
|
+
previous_state?: RemoteSyncStateFile;
|
|
194
|
+
force_full_upsert?: boolean;
|
|
195
|
+
capabilities: RemoteSyncCapabilities;
|
|
196
|
+
retries?: number;
|
|
197
|
+
initial_delay_ms?: number;
|
|
198
|
+
stale_base_error?: (error: unknown) => boolean;
|
|
199
|
+
persist_state?: (state: RemoteSyncStateFile) => Promise<void>;
|
|
200
|
+
push_delta: RunRemoteDeltaSyncInput["push_delta"];
|
|
201
|
+
upload_blobs: (request: {
|
|
202
|
+
workspace_id?: string;
|
|
203
|
+
project_root_path: string;
|
|
204
|
+
blobs: Array<{
|
|
205
|
+
hash: string;
|
|
206
|
+
content: string;
|
|
207
|
+
size_bytes: number;
|
|
208
|
+
}>;
|
|
209
|
+
}) => Promise<{
|
|
210
|
+
accepted_hashes: string[];
|
|
211
|
+
already_present_hashes: string[];
|
|
212
|
+
rejected: Array<{ hash: string; reason: string }>;
|
|
213
|
+
}>;
|
|
214
|
+
commit_v2: (request: {
|
|
215
|
+
workspace_id?: string;
|
|
216
|
+
project_root_path: string;
|
|
217
|
+
base_index_version?: string;
|
|
218
|
+
upsert_files: Array<{
|
|
219
|
+
path: string;
|
|
220
|
+
blob_hash: string;
|
|
221
|
+
language?: string;
|
|
222
|
+
generated?: boolean;
|
|
223
|
+
binary?: boolean;
|
|
224
|
+
updated_at?: string;
|
|
225
|
+
}>;
|
|
226
|
+
deleted_paths: string[];
|
|
227
|
+
}) => Promise<{
|
|
228
|
+
workspace_id?: string;
|
|
229
|
+
index_version?: string;
|
|
230
|
+
}>;
|
|
231
|
+
on_upload_strategy_change?: (event: {
|
|
232
|
+
previous_concurrency: number;
|
|
233
|
+
next_concurrency: number;
|
|
234
|
+
reason: "success" | "error";
|
|
235
|
+
}) => void | Promise<void>;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
export interface RunRemoteAdaptiveSyncResult extends RunRemoteDeltaSyncResult {
|
|
239
|
+
protocol: "delta_v1" | "blob_commit_v2";
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function nowIso(): string {
|
|
243
|
+
return new Date().toISOString();
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function extensionToLanguage(path: string): string | undefined {
|
|
247
|
+
if (path.endsWith(".ts") || path.endsWith(".tsx") || path.endsWith(".mts") || path.endsWith(".cts")) {
|
|
248
|
+
return "typescript";
|
|
249
|
+
}
|
|
250
|
+
if (path.endsWith(".js") || path.endsWith(".jsx") || path.endsWith(".mjs") || path.endsWith(".cjs")) {
|
|
251
|
+
return "javascript";
|
|
252
|
+
}
|
|
253
|
+
if (path.endsWith(".py")) return "python";
|
|
254
|
+
if (path.endsWith(".go")) return "go";
|
|
255
|
+
if (path.endsWith(".rs")) return "rust";
|
|
256
|
+
if (path.endsWith(".java")) return "java";
|
|
257
|
+
if (path.endsWith(".json")) return "json";
|
|
258
|
+
if (path.endsWith(".md")) return "markdown";
|
|
259
|
+
if (path.endsWith(".yml") || path.endsWith(".yaml")) return "yaml";
|
|
260
|
+
return undefined;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function looksBinary(content: string): boolean {
|
|
264
|
+
return content.includes("\0");
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function sha256Text(value: string): string {
|
|
268
|
+
return createHash("sha256").update(value).digest("hex");
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function shouldExcludeFile(path: string, excludedFiles: Set<string>, excludedSuffixes: Set<string>): boolean {
|
|
272
|
+
const lower = path.toLowerCase();
|
|
273
|
+
if (excludedFiles.has(path) || excludedFiles.has(lower)) {
|
|
274
|
+
return true;
|
|
275
|
+
}
|
|
276
|
+
for (const suffix of excludedSuffixes) {
|
|
277
|
+
if (lower.endsWith(suffix)) {
|
|
278
|
+
return true;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
return false;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
function resolveScanOptions(options?: RemoteSyncScanOptions): Required<RemoteSyncScanOptions> {
|
|
285
|
+
return {
|
|
286
|
+
max_file_size_bytes: options?.max_file_size_bytes ?? DEFAULT_REMOTE_SYNC_MAX_FILE_SIZE_BYTES,
|
|
287
|
+
excluded_dirs: options?.excluded_dirs ?? DEFAULT_EXCLUDED_DIRS,
|
|
288
|
+
excluded_files: options?.excluded_files ?? DEFAULT_EXCLUDED_FILES,
|
|
289
|
+
excluded_file_suffixes: options?.excluded_file_suffixes ?? DEFAULT_EXCLUDED_FILE_SUFFIXES
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
export async function collectProjectFileStats(
|
|
294
|
+
project_root_path: string,
|
|
295
|
+
options?: RemoteSyncScanOptions
|
|
296
|
+
): Promise<Map<string, RemoteSyncProjectFileStat>> {
|
|
297
|
+
const root = resolve(project_root_path);
|
|
298
|
+
const resolvedOptions = resolveScanOptions(options);
|
|
299
|
+
const ignoreMatcher = await loadIndexingIgnoreMatcher(root);
|
|
300
|
+
const output = new Map<string, RemoteSyncProjectFileStat>();
|
|
301
|
+
|
|
302
|
+
async function walk(dir: string): Promise<void> {
|
|
303
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
304
|
+
for (const entry of entries) {
|
|
305
|
+
const fullPath = join(dir, entry.name);
|
|
306
|
+
const repoPath = normalizeRepoRelativePath(relative(root, fullPath));
|
|
307
|
+
|
|
308
|
+
if (entry.isDirectory()) {
|
|
309
|
+
if (resolvedOptions.excluded_dirs.has(entry.name) || ignoreMatcher.shouldIgnorePath(repoPath, "dir")) {
|
|
310
|
+
continue;
|
|
311
|
+
}
|
|
312
|
+
await walk(fullPath);
|
|
313
|
+
continue;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if (!entry.isFile()) {
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
if (shouldExcludeFile(entry.name, resolvedOptions.excluded_files, resolvedOptions.excluded_file_suffixes)) {
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
if (ignoreMatcher.shouldIgnorePath(repoPath, "file")) {
|
|
324
|
+
continue;
|
|
325
|
+
}
|
|
326
|
+
const fileStat = await stat(fullPath);
|
|
327
|
+
if (fileStat.size > resolvedOptions.max_file_size_bytes) {
|
|
328
|
+
continue;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
output.set(repoPath, {
|
|
332
|
+
path: repoPath,
|
|
333
|
+
full_path: fullPath,
|
|
334
|
+
size: fileStat.size,
|
|
335
|
+
mtime_ms: Math.trunc(fileStat.mtimeMs),
|
|
336
|
+
language: extensionToLanguage(repoPath)
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
await walk(root);
|
|
342
|
+
return output;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
export async function collectUploadCandidates(
|
|
346
|
+
project_root_path: string,
|
|
347
|
+
options?: RemoteSyncScanOptions
|
|
348
|
+
): Promise<RemoteSyncUploadCandidate[]> {
|
|
349
|
+
const stats = await collectProjectFileStats(project_root_path, options);
|
|
350
|
+
const output: RemoteSyncUploadCandidate[] = [];
|
|
351
|
+
|
|
352
|
+
for (const repoPath of [...stats.keys()].sort((a, b) => a.localeCompare(b))) {
|
|
353
|
+
const fileStat = stats.get(repoPath)!;
|
|
354
|
+
let content: string;
|
|
355
|
+
try {
|
|
356
|
+
content = await readFile(fileStat.full_path, "utf8");
|
|
357
|
+
} catch {
|
|
358
|
+
continue;
|
|
359
|
+
}
|
|
360
|
+
if (looksBinary(content)) {
|
|
361
|
+
continue;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
output.push({
|
|
365
|
+
path: repoPath,
|
|
366
|
+
content,
|
|
367
|
+
...(fileStat.language ? { language: fileStat.language } : {})
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
return output;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
export async function buildRemoteSyncDeltaFromState(input: {
|
|
375
|
+
project_root_path: string;
|
|
376
|
+
previous_state?: RemoteSyncStateFile;
|
|
377
|
+
force_full_upsert: boolean;
|
|
378
|
+
options?: RemoteSyncScanOptions;
|
|
379
|
+
}): Promise<BuildRemoteSyncDeltaResult> {
|
|
380
|
+
const projectFiles = await collectProjectFileStats(input.project_root_path, input.options);
|
|
381
|
+
const previousFiles = input.previous_state?.files ?? {};
|
|
382
|
+
const upsertFiles: RemoteSyncUploadCandidate[] = [];
|
|
383
|
+
const deletedPaths: string[] = [];
|
|
384
|
+
const upsertStateEntries: Record<string, RemoteSyncStateEntry> = {};
|
|
385
|
+
const nextFiles: Record<string, RemoteSyncStateEntry> = {};
|
|
386
|
+
const sortedPaths = [...projectFiles.keys()].sort((a, b) => a.localeCompare(b));
|
|
387
|
+
|
|
388
|
+
for (const repoPath of sortedPaths) {
|
|
389
|
+
const projectFile = projectFiles.get(repoPath)!;
|
|
390
|
+
const previous = previousFiles[repoPath];
|
|
391
|
+
const statChanged =
|
|
392
|
+
!previous ||
|
|
393
|
+
previous.size !== projectFile.size ||
|
|
394
|
+
previous.mtime_ms !== projectFile.mtime_ms ||
|
|
395
|
+
input.force_full_upsert;
|
|
396
|
+
|
|
397
|
+
if (!statChanged) {
|
|
398
|
+
nextFiles[repoPath] = previous;
|
|
399
|
+
continue;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
let content: string;
|
|
403
|
+
try {
|
|
404
|
+
content = await readFile(projectFile.full_path, "utf8");
|
|
405
|
+
} catch {
|
|
406
|
+
if (previous) {
|
|
407
|
+
nextFiles[repoPath] = previous;
|
|
408
|
+
}
|
|
409
|
+
continue;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
if (looksBinary(content)) {
|
|
413
|
+
if (previous) {
|
|
414
|
+
nextFiles[repoPath] = previous;
|
|
415
|
+
}
|
|
416
|
+
continue;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
const contentHash = sha256Text(content);
|
|
420
|
+
const nextEntry: RemoteSyncStateEntry = {
|
|
421
|
+
content_hash: contentHash,
|
|
422
|
+
size: projectFile.size,
|
|
423
|
+
mtime_ms: projectFile.mtime_ms,
|
|
424
|
+
...(projectFile.language ? { language: projectFile.language } : {})
|
|
425
|
+
};
|
|
426
|
+
|
|
427
|
+
nextFiles[repoPath] = nextEntry;
|
|
428
|
+
if (!input.force_full_upsert && previous?.content_hash === contentHash) {
|
|
429
|
+
continue;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
upsertFiles.push({
|
|
433
|
+
path: repoPath,
|
|
434
|
+
content,
|
|
435
|
+
...(projectFile.language ? { language: projectFile.language } : {})
|
|
436
|
+
});
|
|
437
|
+
upsertStateEntries[repoPath] = nextEntry;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
for (const previousPath of Object.keys(previousFiles)) {
|
|
441
|
+
if (!projectFiles.has(previousPath)) {
|
|
442
|
+
deletedPaths.push(previousPath);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
return {
|
|
447
|
+
delta: {
|
|
448
|
+
upsert_files: upsertFiles,
|
|
449
|
+
deleted_paths: deletedPaths
|
|
450
|
+
},
|
|
451
|
+
upsert_state_entries: upsertStateEntries,
|
|
452
|
+
next_files: nextFiles
|
|
453
|
+
};
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
export function estimateRemoteSyncDeltaRequestSize(input: {
|
|
457
|
+
project_root_path: string;
|
|
458
|
+
workspace_id?: string;
|
|
459
|
+
base_index_version?: string;
|
|
460
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
461
|
+
deleted_paths: string[];
|
|
462
|
+
}): number {
|
|
463
|
+
const payload = {
|
|
464
|
+
project_root_path: input.project_root_path,
|
|
465
|
+
...(input.workspace_id ? { workspace_id: input.workspace_id } : {}),
|
|
466
|
+
...(input.base_index_version ? { base_index_version: input.base_index_version } : {}),
|
|
467
|
+
upsert_files: input.upsert_files.map((file) => ({
|
|
468
|
+
path: file.path,
|
|
469
|
+
content: file.content,
|
|
470
|
+
...(file.language ? { language: file.language } : {})
|
|
471
|
+
})),
|
|
472
|
+
deleted_paths: input.deleted_paths
|
|
473
|
+
};
|
|
474
|
+
return Buffer.byteLength(JSON.stringify(payload), "utf8");
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
export function splitRemoteSyncDeltaIntoBatches(input: {
|
|
478
|
+
project_root_path: string;
|
|
479
|
+
workspace_id?: string;
|
|
480
|
+
base_index_version?: string;
|
|
481
|
+
delta: RemoteSyncDeltaPayload;
|
|
482
|
+
max_body_bytes: number;
|
|
483
|
+
}): RemoteSyncDeltaBatch[] {
|
|
484
|
+
const budget = Math.max(256 * 1024, Math.floor(input.max_body_bytes * 0.8));
|
|
485
|
+
const upserts = [...input.delta.upsert_files].sort((a, b) => a.path.localeCompare(b.path));
|
|
486
|
+
const deleted = [...new Set(input.delta.deleted_paths)].sort((a, b) => a.localeCompare(b));
|
|
487
|
+
const batches: RemoteSyncDeltaBatch[] = [];
|
|
488
|
+
let currentUpserts: RemoteSyncUploadCandidate[] = [];
|
|
489
|
+
let currentDeleted: string[] = [];
|
|
490
|
+
|
|
491
|
+
const flush = () => {
|
|
492
|
+
if (currentUpserts.length === 0 && currentDeleted.length === 0) {
|
|
493
|
+
return;
|
|
494
|
+
}
|
|
495
|
+
batches.push({
|
|
496
|
+
upsert_files: currentUpserts,
|
|
497
|
+
deleted_paths: currentDeleted,
|
|
498
|
+
approx_bytes: estimateRemoteSyncDeltaRequestSize({
|
|
499
|
+
project_root_path: input.project_root_path,
|
|
500
|
+
workspace_id: input.workspace_id,
|
|
501
|
+
base_index_version: input.base_index_version,
|
|
502
|
+
upsert_files: currentUpserts,
|
|
503
|
+
deleted_paths: currentDeleted
|
|
504
|
+
})
|
|
505
|
+
});
|
|
506
|
+
currentUpserts = [];
|
|
507
|
+
currentDeleted = [];
|
|
508
|
+
};
|
|
509
|
+
|
|
510
|
+
const canFit = (nextUpserts: RemoteSyncUploadCandidate[], nextDeleted: string[]) =>
|
|
511
|
+
estimateRemoteSyncDeltaRequestSize({
|
|
512
|
+
project_root_path: input.project_root_path,
|
|
513
|
+
workspace_id: input.workspace_id,
|
|
514
|
+
base_index_version: input.base_index_version,
|
|
515
|
+
upsert_files: nextUpserts,
|
|
516
|
+
deleted_paths: nextDeleted
|
|
517
|
+
}) <= budget;
|
|
518
|
+
|
|
519
|
+
for (const file of upserts) {
|
|
520
|
+
const candidateUpserts = [...currentUpserts, file];
|
|
521
|
+
if (canFit(candidateUpserts, currentDeleted)) {
|
|
522
|
+
currentUpserts = candidateUpserts;
|
|
523
|
+
continue;
|
|
524
|
+
}
|
|
525
|
+
flush();
|
|
526
|
+
if (!canFit([file], [])) {
|
|
527
|
+
throw new Error(`delta upsert payload too large for path ${file.path}`);
|
|
528
|
+
}
|
|
529
|
+
currentUpserts = [file];
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
for (const path of deleted) {
|
|
533
|
+
const candidateDeleted = [...currentDeleted, path];
|
|
534
|
+
if (canFit(currentUpserts, candidateDeleted)) {
|
|
535
|
+
currentDeleted = candidateDeleted;
|
|
536
|
+
continue;
|
|
537
|
+
}
|
|
538
|
+
flush();
|
|
539
|
+
if (!canFit([], [path])) {
|
|
540
|
+
throw new Error(`delta delete payload too large for path ${path}`);
|
|
541
|
+
}
|
|
542
|
+
currentDeleted = [path];
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
flush();
|
|
546
|
+
return batches;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
export async function readRemoteSyncState(path: string): Promise<RemoteSyncStateFile | undefined> {
|
|
550
|
+
if (!existsSync(path)) {
|
|
551
|
+
return undefined;
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
try {
|
|
555
|
+
const parsed = JSON.parse(await readFile(path, "utf8")) as Partial<RemoteSyncStateFile>;
|
|
556
|
+
if (
|
|
557
|
+
!parsed ||
|
|
558
|
+
parsed.mode !== REMOTE_SYNC_STATE_MODE ||
|
|
559
|
+
typeof parsed.files !== "object" ||
|
|
560
|
+
parsed.files === null ||
|
|
561
|
+
Array.isArray(parsed.files)
|
|
562
|
+
) {
|
|
563
|
+
return undefined;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
return {
|
|
567
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
568
|
+
workspace_id: parsed.workspace_id,
|
|
569
|
+
last_index_version: parsed.last_index_version,
|
|
570
|
+
files: parsed.files as Record<string, RemoteSyncStateEntry>,
|
|
571
|
+
updated_at: typeof parsed.updated_at === "string" ? parsed.updated_at : nowIso()
|
|
572
|
+
};
|
|
573
|
+
} catch {
|
|
574
|
+
return undefined;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
export async function writeRemoteSyncState(path: string, state: RemoteSyncStateFile): Promise<void> {
|
|
579
|
+
await writeFile(
|
|
580
|
+
path,
|
|
581
|
+
JSON.stringify(
|
|
582
|
+
{
|
|
583
|
+
...state,
|
|
584
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
585
|
+
updated_at: nowIso()
|
|
586
|
+
},
|
|
587
|
+
null,
|
|
588
|
+
2
|
|
589
|
+
)
|
|
590
|
+
);
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
export function isStaleBaseIndexError(error: unknown): boolean {
|
|
594
|
+
if (!(error instanceof RemoteSyncHttpResponseError)) {
|
|
595
|
+
return false;
|
|
596
|
+
}
|
|
597
|
+
if (error.status !== 400) {
|
|
598
|
+
return false;
|
|
599
|
+
}
|
|
600
|
+
const message = error.message.toLowerCase();
|
|
601
|
+
return message.includes("invalid_argument") && message.includes("base index version");
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
export function isDeltaUnsupportedError(error: unknown): boolean {
|
|
605
|
+
if (error instanceof RemoteSyncHttpResponseError) {
|
|
606
|
+
if ([404, 405, 501].includes(error.status)) {
|
|
607
|
+
return true;
|
|
608
|
+
}
|
|
609
|
+
const message = error.message.toLowerCase();
|
|
610
|
+
return message.includes("not_found") && message.includes("push-delta");
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
if (error instanceof Error) {
|
|
614
|
+
const message = error.message.toLowerCase();
|
|
615
|
+
return message.includes("push-delta") && (message.includes("not found") || message.includes("404"));
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
return false;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
export function isBlobCommitV2UnsupportedError(error: unknown): boolean {
|
|
622
|
+
if (error instanceof RemoteSyncHttpResponseError) {
|
|
623
|
+
if ([404, 405, 501].includes(error.status)) {
|
|
624
|
+
return true;
|
|
625
|
+
}
|
|
626
|
+
const message = error.message.toLowerCase();
|
|
627
|
+
return (
|
|
628
|
+
(message.includes("blob_commit_v2") && message.includes("disabled")) ||
|
|
629
|
+
message.includes("commit-v2") ||
|
|
630
|
+
message.includes("blobs/upload")
|
|
631
|
+
) && message.includes("not found");
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
if (error instanceof Error) {
|
|
635
|
+
const message = error.message.toLowerCase();
|
|
636
|
+
return (
|
|
637
|
+
(message.includes("commit-v2") || message.includes("blobs/upload")) &&
|
|
638
|
+
(message.includes("not found") || message.includes("404"))
|
|
639
|
+
);
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
return false;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
function isRetryableAdaptiveError(error: unknown): boolean {
|
|
646
|
+
if (error instanceof RemoteSyncHttpResponseError) {
|
|
647
|
+
return error.status === 429 || error.status >= 500;
|
|
648
|
+
}
|
|
649
|
+
return false;
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
function computeBlobBatches(input: {
|
|
653
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
654
|
+
max_blob_bytes: number;
|
|
655
|
+
max_blob_batch_bytes: number;
|
|
656
|
+
}): Array<
|
|
657
|
+
Array<{
|
|
658
|
+
hash: string;
|
|
659
|
+
content: string;
|
|
660
|
+
size_bytes: number;
|
|
661
|
+
}>
|
|
662
|
+
> {
|
|
663
|
+
const deduped = new Map<string, { hash: string; content: string; size_bytes: number }>();
|
|
664
|
+
for (const file of input.upsert_files) {
|
|
665
|
+
const sizeBytes = Buffer.byteLength(file.content, "utf8");
|
|
666
|
+
if (sizeBytes > input.max_blob_bytes) {
|
|
667
|
+
throw new Error(`blob payload too large for path ${file.path}`);
|
|
668
|
+
}
|
|
669
|
+
const hash = sha256Text(file.content);
|
|
670
|
+
if (!deduped.has(hash)) {
|
|
671
|
+
deduped.set(hash, {
|
|
672
|
+
hash,
|
|
673
|
+
content: file.content,
|
|
674
|
+
size_bytes: sizeBytes
|
|
675
|
+
});
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
const blobs = [...deduped.values()].sort((a, b) => a.hash.localeCompare(b.hash));
|
|
680
|
+
const batches: Array<Array<{ hash: string; content: string; size_bytes: number }>> = [];
|
|
681
|
+
let current: Array<{ hash: string; content: string; size_bytes: number }> = [];
|
|
682
|
+
let currentBytes = 0;
|
|
683
|
+
for (const blob of blobs) {
|
|
684
|
+
if (blob.size_bytes > input.max_blob_batch_bytes) {
|
|
685
|
+
throw new Error(`blob ${blob.hash} exceeds max blob batch bytes`);
|
|
686
|
+
}
|
|
687
|
+
if (current.length > 0 && currentBytes + blob.size_bytes > input.max_blob_batch_bytes) {
|
|
688
|
+
batches.push(current);
|
|
689
|
+
current = [];
|
|
690
|
+
currentBytes = 0;
|
|
691
|
+
}
|
|
692
|
+
current.push(blob);
|
|
693
|
+
currentBytes += blob.size_bytes;
|
|
694
|
+
}
|
|
695
|
+
if (current.length > 0) {
|
|
696
|
+
batches.push(current);
|
|
697
|
+
}
|
|
698
|
+
return batches;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
function supportsBlobCommitV2(capabilities: RemoteSyncCapabilities): boolean {
|
|
702
|
+
const protocols = capabilities.sync_protocols ?? [];
|
|
703
|
+
return protocols.includes("blob_commit_v2");
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
export async function runRemoteAdaptiveSync(input: RunRemoteAdaptiveSyncInput): Promise<RunRemoteAdaptiveSyncResult> {
|
|
707
|
+
const runStartedAt = Date.now();
|
|
708
|
+
const retries = input.retries ?? 3;
|
|
709
|
+
const initialDelayMs = input.initial_delay_ms ?? 500;
|
|
710
|
+
const staleBaseError = input.stale_base_error ?? isStaleBaseIndexError;
|
|
711
|
+
|
|
712
|
+
if (!supportsBlobCommitV2(input.capabilities)) {
|
|
713
|
+
const delta = await runRemoteDeltaSync({
|
|
714
|
+
project_root_path: input.project_root_path,
|
|
715
|
+
scan_root_path: input.scan_root_path,
|
|
716
|
+
workspace_id: input.workspace_id,
|
|
717
|
+
previous_state: input.previous_state,
|
|
718
|
+
force_full_upsert: input.force_full_upsert,
|
|
719
|
+
max_body_bytes: input.capabilities.max_body_bytes,
|
|
720
|
+
retries,
|
|
721
|
+
initial_delay_ms: initialDelayMs,
|
|
722
|
+
stale_base_error: staleBaseError,
|
|
723
|
+
persist_state: input.persist_state,
|
|
724
|
+
push_delta: input.push_delta
|
|
725
|
+
});
|
|
726
|
+
return {
|
|
727
|
+
...delta,
|
|
728
|
+
protocol: "delta_v1"
|
|
729
|
+
};
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
const execute = async (previousState: RemoteSyncStateFile | undefined, forceFullUpsert: boolean) => {
|
|
733
|
+
const scanRootPath = input.scan_root_path ?? input.project_root_path;
|
|
734
|
+
const deltaBuild = await buildRemoteSyncDeltaFromState({
|
|
735
|
+
project_root_path: scanRootPath,
|
|
736
|
+
previous_state: previousState,
|
|
737
|
+
force_full_upsert: forceFullUpsert
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
const nextWorkspaceId = input.workspace_id ?? previousState?.workspace_id;
|
|
741
|
+
if (deltaBuild.delta.upsert_files.length === 0 && deltaBuild.delta.deleted_paths.length === 0) {
|
|
742
|
+
const unchangedState: RemoteSyncStateFile = {
|
|
743
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
744
|
+
workspace_id: nextWorkspaceId,
|
|
745
|
+
last_index_version: previousState?.last_index_version,
|
|
746
|
+
files: deltaBuild.next_files,
|
|
747
|
+
updated_at: nowIso()
|
|
748
|
+
};
|
|
749
|
+
await input.persist_state?.(unchangedState);
|
|
750
|
+
return {
|
|
751
|
+
state: unchangedState,
|
|
752
|
+
changed: false,
|
|
753
|
+
workspace_id: unchangedState.workspace_id,
|
|
754
|
+
index_version: unchangedState.last_index_version,
|
|
755
|
+
applied_delta: {
|
|
756
|
+
upsert_files: 0,
|
|
757
|
+
deleted_paths: 0
|
|
758
|
+
},
|
|
759
|
+
stats: {
|
|
760
|
+
batches_total: 0,
|
|
761
|
+
bytes_total: 0,
|
|
762
|
+
latency_ms: Date.now() - runStartedAt
|
|
763
|
+
},
|
|
764
|
+
protocol: "blob_commit_v2" as const
|
|
765
|
+
};
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
const maxBlobBytes = input.capabilities.max_blob_bytes ?? 128 * 1024;
|
|
769
|
+
const maxBlobBatchBytes = input.capabilities.max_blob_batch_bytes ?? Math.max(maxBlobBytes, 1024 * 1024);
|
|
770
|
+
const maxCommitBodyBytes = input.capabilities.max_commit_body_bytes ?? input.capabilities.max_body_bytes;
|
|
771
|
+
let currentConcurrency = Math.max(
|
|
772
|
+
1,
|
|
773
|
+
Math.min(input.capabilities.upload_concurrency_hint ?? 4, 16)
|
|
774
|
+
);
|
|
775
|
+
|
|
776
|
+
const blobBatches = computeBlobBatches({
|
|
777
|
+
upsert_files: deltaBuild.delta.upsert_files,
|
|
778
|
+
max_blob_bytes: maxBlobBytes,
|
|
779
|
+
max_blob_batch_bytes: maxBlobBatchBytes
|
|
780
|
+
});
|
|
781
|
+
const pending = [...blobBatches];
|
|
782
|
+
const acknowledged = new Set<string>();
|
|
783
|
+
let bytesTotal = 0;
|
|
784
|
+
|
|
785
|
+
while (pending.length > 0) {
|
|
786
|
+
const wave = pending.splice(0, currentConcurrency);
|
|
787
|
+
const settled = await Promise.allSettled(
|
|
788
|
+
wave.map(async (batch) =>
|
|
789
|
+
retryWithBackoff({
|
|
790
|
+
retries,
|
|
791
|
+
initial_delay_ms: initialDelayMs,
|
|
792
|
+
fn: async () =>
|
|
793
|
+
input.upload_blobs({
|
|
794
|
+
workspace_id: nextWorkspaceId,
|
|
795
|
+
project_root_path: input.project_root_path,
|
|
796
|
+
blobs: batch
|
|
797
|
+
})
|
|
798
|
+
})
|
|
799
|
+
)
|
|
800
|
+
);
|
|
801
|
+
|
|
802
|
+
let waveHadRetryableError = false;
|
|
803
|
+
for (let idx = 0; idx < settled.length; idx += 1) {
|
|
804
|
+
const result = settled[idx]!;
|
|
805
|
+
const batch = wave[idx]!;
|
|
806
|
+
if (result.status === "rejected") {
|
|
807
|
+
if (isRetryableAdaptiveError(result.reason)) {
|
|
808
|
+
pending.push(batch);
|
|
809
|
+
waveHadRetryableError = true;
|
|
810
|
+
continue;
|
|
811
|
+
}
|
|
812
|
+
throw result.reason;
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
const payload = result.value;
|
|
816
|
+
for (const hash of payload.accepted_hashes) {
|
|
817
|
+
acknowledged.add(hash.toLowerCase());
|
|
818
|
+
}
|
|
819
|
+
for (const hash of payload.already_present_hashes) {
|
|
820
|
+
acknowledged.add(hash.toLowerCase());
|
|
821
|
+
}
|
|
822
|
+
if (payload.rejected.length > 0) {
|
|
823
|
+
throw new Error(`blob upload rejected: ${payload.rejected[0]!.reason}`);
|
|
824
|
+
}
|
|
825
|
+
bytesTotal += batch.reduce((sum, blob) => sum + blob.size_bytes, 0);
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
if (waveHadRetryableError) {
|
|
829
|
+
const previous = currentConcurrency;
|
|
830
|
+
currentConcurrency = Math.max(1, Math.floor(currentConcurrency / 2));
|
|
831
|
+
await input.on_upload_strategy_change?.({
|
|
832
|
+
previous_concurrency: previous,
|
|
833
|
+
next_concurrency: currentConcurrency,
|
|
834
|
+
reason: "error"
|
|
835
|
+
});
|
|
836
|
+
} else if (currentConcurrency < Math.max(1, input.capabilities.upload_concurrency_hint ?? 4)) {
|
|
837
|
+
const previous = currentConcurrency;
|
|
838
|
+
currentConcurrency += 1;
|
|
839
|
+
await input.on_upload_strategy_change?.({
|
|
840
|
+
previous_concurrency: previous,
|
|
841
|
+
next_concurrency: currentConcurrency,
|
|
842
|
+
reason: "success"
|
|
843
|
+
});
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
const dedupedUpserts = new Map<string, RemoteSyncUploadCandidate>();
|
|
848
|
+
for (const file of deltaBuild.delta.upsert_files) {
|
|
849
|
+
dedupedUpserts.set(file.path, file);
|
|
850
|
+
}
|
|
851
|
+
const commitUpserts = [...dedupedUpserts.values()]
|
|
852
|
+
.sort((a, b) => a.path.localeCompare(b.path))
|
|
853
|
+
.map((file) => ({
|
|
854
|
+
path: file.path,
|
|
855
|
+
blob_hash: sha256Text(file.content),
|
|
856
|
+
...(file.language ? { language: file.language } : {})
|
|
857
|
+
}));
|
|
858
|
+
const commitDeletes = [...new Set(deltaBuild.delta.deleted_paths)].sort((a, b) => a.localeCompare(b));
|
|
859
|
+
|
|
860
|
+
const missingHash = commitUpserts.find((file) => !acknowledged.has(file.blob_hash.toLowerCase()));
|
|
861
|
+
if (missingHash) {
|
|
862
|
+
throw new Error(`missing uploaded blob hash for commit: ${missingHash.blob_hash}`);
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
if (
|
|
866
|
+
Buffer.byteLength(
|
|
867
|
+
JSON.stringify({
|
|
868
|
+
project_root_path: input.project_root_path,
|
|
869
|
+
workspace_id: nextWorkspaceId,
|
|
870
|
+
base_index_version: previousState?.last_index_version,
|
|
871
|
+
upsert_files: commitUpserts,
|
|
872
|
+
deleted_paths: commitDeletes
|
|
873
|
+
}),
|
|
874
|
+
"utf8"
|
|
875
|
+
) > maxCommitBodyBytes
|
|
876
|
+
) {
|
|
877
|
+
throw new Error("commit-v2 payload exceeds max_commit_body_bytes");
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
const commit = await retryWithBackoff({
|
|
881
|
+
retries,
|
|
882
|
+
initial_delay_ms: initialDelayMs,
|
|
883
|
+
fn: async () =>
|
|
884
|
+
input.commit_v2({
|
|
885
|
+
workspace_id: nextWorkspaceId,
|
|
886
|
+
project_root_path: input.project_root_path,
|
|
887
|
+
...(previousState?.last_index_version ? { base_index_version: previousState.last_index_version } : {}),
|
|
888
|
+
upsert_files: commitUpserts,
|
|
889
|
+
deleted_paths: commitDeletes
|
|
890
|
+
})
|
|
891
|
+
});
|
|
892
|
+
|
|
893
|
+
const finalWorkspace = commit.workspace_id ?? nextWorkspaceId;
|
|
894
|
+
const finalIndexVersion = commit.index_version ?? previousState?.last_index_version;
|
|
895
|
+
const finalState: RemoteSyncStateFile = {
|
|
896
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
897
|
+
workspace_id: finalWorkspace,
|
|
898
|
+
last_index_version: finalIndexVersion,
|
|
899
|
+
files: deltaBuild.next_files,
|
|
900
|
+
updated_at: nowIso()
|
|
901
|
+
};
|
|
902
|
+
await input.persist_state?.(finalState);
|
|
903
|
+
|
|
904
|
+
return {
|
|
905
|
+
state: finalState,
|
|
906
|
+
changed: true,
|
|
907
|
+
workspace_id: finalWorkspace,
|
|
908
|
+
index_version: finalIndexVersion,
|
|
909
|
+
applied_delta: {
|
|
910
|
+
upsert_files: commitUpserts.length,
|
|
911
|
+
deleted_paths: commitDeletes.length
|
|
912
|
+
},
|
|
913
|
+
stats: {
|
|
914
|
+
batches_total: blobBatches.length,
|
|
915
|
+
bytes_total: bytesTotal,
|
|
916
|
+
latency_ms: Date.now() - runStartedAt
|
|
917
|
+
},
|
|
918
|
+
protocol: "blob_commit_v2" as const
|
|
919
|
+
};
|
|
920
|
+
};
|
|
921
|
+
|
|
922
|
+
const forceFullUpsert = input.force_full_upsert ?? false;
|
|
923
|
+
try {
|
|
924
|
+
return await execute(input.previous_state, forceFullUpsert);
|
|
925
|
+
} catch (error) {
|
|
926
|
+
if (isBlobCommitV2UnsupportedError(error)) {
|
|
927
|
+
const delta = await runRemoteDeltaSync({
|
|
928
|
+
project_root_path: input.project_root_path,
|
|
929
|
+
scan_root_path: input.scan_root_path,
|
|
930
|
+
workspace_id: input.workspace_id,
|
|
931
|
+
previous_state: input.previous_state,
|
|
932
|
+
force_full_upsert: input.force_full_upsert,
|
|
933
|
+
max_body_bytes: input.capabilities.max_body_bytes,
|
|
934
|
+
retries,
|
|
935
|
+
initial_delay_ms: initialDelayMs,
|
|
936
|
+
stale_base_error: staleBaseError,
|
|
937
|
+
persist_state: input.persist_state,
|
|
938
|
+
push_delta: input.push_delta
|
|
939
|
+
});
|
|
940
|
+
return {
|
|
941
|
+
...delta,
|
|
942
|
+
protocol: "delta_v1"
|
|
943
|
+
};
|
|
944
|
+
}
|
|
945
|
+
if (!forceFullUpsert && input.previous_state && staleBaseError(error)) {
|
|
946
|
+
return execute(undefined, true);
|
|
947
|
+
}
|
|
948
|
+
throw error;
|
|
949
|
+
}
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
export async function retryWithBackoff<T>(input: {
|
|
953
|
+
fn: () => Promise<T>;
|
|
954
|
+
retries: number;
|
|
955
|
+
initial_delay_ms: number;
|
|
956
|
+
}): Promise<T> {
|
|
957
|
+
let attempt = 0;
|
|
958
|
+
let lastError: unknown;
|
|
959
|
+
while (attempt < input.retries) {
|
|
960
|
+
try {
|
|
961
|
+
return await input.fn();
|
|
962
|
+
} catch (error) {
|
|
963
|
+
lastError = error;
|
|
964
|
+
attempt += 1;
|
|
965
|
+
if (attempt >= input.retries) {
|
|
966
|
+
break;
|
|
967
|
+
}
|
|
968
|
+
await new Promise((resolveSleep) => setTimeout(resolveSleep, input.initial_delay_ms * 2 ** (attempt - 1)));
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
throw lastError;
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
export async function runRemoteDeltaSync(input: RunRemoteDeltaSyncInput): Promise<RunRemoteDeltaSyncResult> {
|
|
975
|
+
const runStartedAt = Date.now();
|
|
976
|
+
const retries = input.retries ?? 3;
|
|
977
|
+
const initialDelayMs = input.initial_delay_ms ?? 500;
|
|
978
|
+
const isStaleError = input.stale_base_error ?? isStaleBaseIndexError;
|
|
979
|
+
|
|
980
|
+
const execute = async (previousState: RemoteSyncStateFile | undefined, forceFullUpsert: boolean): Promise<RunRemoteDeltaSyncResult> => {
|
|
981
|
+
const scanRootPath = input.scan_root_path ?? input.project_root_path;
|
|
982
|
+
const deltaBuild = await buildRemoteSyncDeltaFromState({
|
|
983
|
+
project_root_path: scanRootPath,
|
|
984
|
+
previous_state: previousState,
|
|
985
|
+
force_full_upsert: forceFullUpsert
|
|
986
|
+
});
|
|
987
|
+
|
|
988
|
+
const nextWorkspaceId = input.workspace_id ?? previousState?.workspace_id;
|
|
989
|
+
|
|
990
|
+
if (deltaBuild.delta.upsert_files.length === 0 && deltaBuild.delta.deleted_paths.length === 0) {
|
|
991
|
+
const unchangedState: RemoteSyncStateFile = {
|
|
992
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
993
|
+
workspace_id: nextWorkspaceId,
|
|
994
|
+
last_index_version: previousState?.last_index_version,
|
|
995
|
+
files: deltaBuild.next_files,
|
|
996
|
+
updated_at: nowIso()
|
|
997
|
+
};
|
|
998
|
+
await input.persist_state?.(unchangedState);
|
|
999
|
+
return {
|
|
1000
|
+
state: unchangedState,
|
|
1001
|
+
changed: false,
|
|
1002
|
+
workspace_id: unchangedState.workspace_id,
|
|
1003
|
+
index_version: unchangedState.last_index_version,
|
|
1004
|
+
applied_delta: {
|
|
1005
|
+
upsert_files: 0,
|
|
1006
|
+
deleted_paths: 0
|
|
1007
|
+
},
|
|
1008
|
+
stats: {
|
|
1009
|
+
batches_total: 0,
|
|
1010
|
+
bytes_total: 0,
|
|
1011
|
+
latency_ms: Date.now() - runStartedAt
|
|
1012
|
+
}
|
|
1013
|
+
};
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
let currentBaseIndexVersion = previousState?.last_index_version;
|
|
1017
|
+
let currentWorkspaceId = nextWorkspaceId;
|
|
1018
|
+
const progressFiles: Record<string, RemoteSyncStateEntry> = { ...(previousState?.files ?? {}) };
|
|
1019
|
+
let uploadedCount = 0;
|
|
1020
|
+
let deletedCount = 0;
|
|
1021
|
+
let bytesTotal = 0;
|
|
1022
|
+
|
|
1023
|
+
const batches = splitRemoteSyncDeltaIntoBatches({
|
|
1024
|
+
project_root_path: input.project_root_path,
|
|
1025
|
+
workspace_id: currentWorkspaceId,
|
|
1026
|
+
base_index_version: currentBaseIndexVersion,
|
|
1027
|
+
delta: deltaBuild.delta,
|
|
1028
|
+
max_body_bytes: input.max_body_bytes
|
|
1029
|
+
});
|
|
1030
|
+
|
|
1031
|
+
let latestState: RemoteSyncStateFile | undefined;
|
|
1032
|
+
|
|
1033
|
+
for (let batchIndex = 0; batchIndex < batches.length; batchIndex += 1) {
|
|
1034
|
+
const batch = batches[batchIndex]!;
|
|
1035
|
+
const batchStartedAt = Date.now();
|
|
1036
|
+
const result = await retryWithBackoff({
|
|
1037
|
+
retries,
|
|
1038
|
+
initial_delay_ms: initialDelayMs,
|
|
1039
|
+
fn: async () =>
|
|
1040
|
+
input.push_delta({
|
|
1041
|
+
workspace_id: currentWorkspaceId,
|
|
1042
|
+
project_root_path: input.project_root_path,
|
|
1043
|
+
...(currentBaseIndexVersion ? { base_index_version: currentBaseIndexVersion } : {}),
|
|
1044
|
+
upsert_files: batch.upsert_files,
|
|
1045
|
+
deleted_paths: batch.deleted_paths
|
|
1046
|
+
})
|
|
1047
|
+
});
|
|
1048
|
+
|
|
1049
|
+
currentWorkspaceId = result.workspace_id ?? currentWorkspaceId;
|
|
1050
|
+
currentBaseIndexVersion = result.index_version ?? currentBaseIndexVersion;
|
|
1051
|
+
|
|
1052
|
+
for (const file of batch.upsert_files) {
|
|
1053
|
+
const entry = deltaBuild.upsert_state_entries[file.path];
|
|
1054
|
+
if (entry) {
|
|
1055
|
+
progressFiles[file.path] = entry;
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
for (const path of batch.deleted_paths) {
|
|
1059
|
+
delete progressFiles[path];
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
uploadedCount += batch.upsert_files.length;
|
|
1063
|
+
deletedCount += batch.deleted_paths.length;
|
|
1064
|
+
bytesTotal += batch.approx_bytes;
|
|
1065
|
+
|
|
1066
|
+
await input.on_batch_processed?.({
|
|
1067
|
+
batch_index: batchIndex,
|
|
1068
|
+
batch_count: batches.length,
|
|
1069
|
+
approx_bytes: batch.approx_bytes,
|
|
1070
|
+
upsert_files: batch.upsert_files.length,
|
|
1071
|
+
deleted_paths: batch.deleted_paths.length,
|
|
1072
|
+
latency_ms: Date.now() - batchStartedAt
|
|
1073
|
+
});
|
|
1074
|
+
|
|
1075
|
+
latestState = {
|
|
1076
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
1077
|
+
workspace_id: currentWorkspaceId,
|
|
1078
|
+
last_index_version: currentBaseIndexVersion,
|
|
1079
|
+
files: progressFiles,
|
|
1080
|
+
updated_at: nowIso()
|
|
1081
|
+
};
|
|
1082
|
+
await input.persist_state?.(latestState);
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
const finalState = latestState ?? {
|
|
1086
|
+
mode: REMOTE_SYNC_STATE_MODE,
|
|
1087
|
+
workspace_id: currentWorkspaceId,
|
|
1088
|
+
last_index_version: currentBaseIndexVersion,
|
|
1089
|
+
files: progressFiles,
|
|
1090
|
+
updated_at: nowIso()
|
|
1091
|
+
};
|
|
1092
|
+
|
|
1093
|
+
return {
|
|
1094
|
+
state: finalState,
|
|
1095
|
+
changed: true,
|
|
1096
|
+
workspace_id: currentWorkspaceId,
|
|
1097
|
+
index_version: currentBaseIndexVersion,
|
|
1098
|
+
applied_delta: {
|
|
1099
|
+
upsert_files: uploadedCount,
|
|
1100
|
+
deleted_paths: deletedCount
|
|
1101
|
+
},
|
|
1102
|
+
stats: {
|
|
1103
|
+
batches_total: batches.length,
|
|
1104
|
+
bytes_total: bytesTotal,
|
|
1105
|
+
latency_ms: Date.now() - runStartedAt
|
|
1106
|
+
}
|
|
1107
|
+
};
|
|
1108
|
+
};
|
|
1109
|
+
|
|
1110
|
+
const forceFullUpsert = input.force_full_upsert ?? false;
|
|
1111
|
+
try {
|
|
1112
|
+
return await execute(input.previous_state, forceFullUpsert);
|
|
1113
|
+
} catch (error) {
|
|
1114
|
+
if (!forceFullUpsert && input.previous_state && isStaleError(error)) {
|
|
1115
|
+
return execute(undefined, true);
|
|
1116
|
+
}
|
|
1117
|
+
throw error;
|
|
1118
|
+
}
|
|
1119
|
+
}
|