@cue-dev/retrieval-core 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,816 @@
1
+ import { createHash } from "node:crypto";
2
+ import { existsSync } from "node:fs";
3
+ import { readdir, readFile, stat, writeFile } from "node:fs/promises";
4
+ import { join, relative, resolve } from "node:path";
5
+ import { loadIndexingIgnoreMatcher, normalizeRepoRelativePath } from "./indexing-ignore.js";
6
+ export const REMOTE_SYNC_STATE_MODE = "remote_delta_v1";
7
+ export const DEFAULT_REMOTE_SYNC_MAX_FILE_SIZE_BYTES = 1_000_000;
8
+ const DEFAULT_EXCLUDED_DIRS = new Set([
9
+ ".tmp",
10
+ ".cache",
11
+ ".git",
12
+ ".hg",
13
+ ".next",
14
+ ".nuxt",
15
+ ".pytest_cache",
16
+ ".cue",
17
+ ".cue-tool",
18
+ ".svn",
19
+ ".svelte-kit",
20
+ ".turbo",
21
+ ".venv",
22
+ "__pycache__",
23
+ "build",
24
+ "coverage",
25
+ "dist",
26
+ "node_modules",
27
+ "out",
28
+ "target",
29
+ "vendor",
30
+ "venv"
31
+ ]);
32
+ const DEFAULT_EXCLUDED_FILES = new Set([
33
+ "bun.lock",
34
+ "bun.lockb",
35
+ "Cargo.lock",
36
+ "composer.lock",
37
+ "Gemfile.lock",
38
+ "package-lock.json",
39
+ "Pipfile.lock",
40
+ "pnpm-lock.yaml",
41
+ "yarn.lock"
42
+ ]);
43
+ const DEFAULT_EXCLUDED_FILE_SUFFIXES = new Set([
44
+ ".min.js",
45
+ ".min.css",
46
+ ".map",
47
+ ".exe",
48
+ ".dll",
49
+ ".so",
50
+ ".bin",
51
+ ".dylib",
52
+ ".pyc",
53
+ ".png",
54
+ ".jpg",
55
+ ".jpeg",
56
+ ".gif",
57
+ ".webp",
58
+ ".svg",
59
+ ".mp4",
60
+ ".mov",
61
+ ".avi",
62
+ ".mkv",
63
+ ".pdf"
64
+ ]);
65
+ export class RemoteSyncHttpResponseError extends Error {
66
+ status;
67
+ payload;
68
+ constructor(message, status, payload) {
69
+ super(message);
70
+ this.status = status;
71
+ this.payload = payload;
72
+ }
73
+ }
74
+ function nowIso() {
75
+ return new Date().toISOString();
76
+ }
77
+ function extensionToLanguage(path) {
78
+ if (path.endsWith(".ts") || path.endsWith(".tsx") || path.endsWith(".mts") || path.endsWith(".cts")) {
79
+ return "typescript";
80
+ }
81
+ if (path.endsWith(".js") || path.endsWith(".jsx") || path.endsWith(".mjs") || path.endsWith(".cjs")) {
82
+ return "javascript";
83
+ }
84
+ if (path.endsWith(".py"))
85
+ return "python";
86
+ if (path.endsWith(".go"))
87
+ return "go";
88
+ if (path.endsWith(".rs"))
89
+ return "rust";
90
+ if (path.endsWith(".java"))
91
+ return "java";
92
+ if (path.endsWith(".json"))
93
+ return "json";
94
+ if (path.endsWith(".md"))
95
+ return "markdown";
96
+ if (path.endsWith(".yml") || path.endsWith(".yaml"))
97
+ return "yaml";
98
+ return undefined;
99
+ }
100
+ function looksBinary(content) {
101
+ return content.includes("\0");
102
+ }
103
+ function sha256Text(value) {
104
+ return createHash("sha256").update(value).digest("hex");
105
+ }
106
+ function shouldExcludeFile(path, excludedFiles, excludedSuffixes) {
107
+ const lower = path.toLowerCase();
108
+ if (excludedFiles.has(path) || excludedFiles.has(lower)) {
109
+ return true;
110
+ }
111
+ for (const suffix of excludedSuffixes) {
112
+ if (lower.endsWith(suffix)) {
113
+ return true;
114
+ }
115
+ }
116
+ return false;
117
+ }
118
+ function resolveScanOptions(options) {
119
+ return {
120
+ max_file_size_bytes: options?.max_file_size_bytes ?? DEFAULT_REMOTE_SYNC_MAX_FILE_SIZE_BYTES,
121
+ excluded_dirs: options?.excluded_dirs ?? DEFAULT_EXCLUDED_DIRS,
122
+ excluded_files: options?.excluded_files ?? DEFAULT_EXCLUDED_FILES,
123
+ excluded_file_suffixes: options?.excluded_file_suffixes ?? DEFAULT_EXCLUDED_FILE_SUFFIXES
124
+ };
125
+ }
126
+ export async function collectProjectFileStats(project_root_path, options) {
127
+ const root = resolve(project_root_path);
128
+ const resolvedOptions = resolveScanOptions(options);
129
+ const ignoreMatcher = await loadIndexingIgnoreMatcher(root);
130
+ const output = new Map();
131
+ async function walk(dir) {
132
+ const entries = await readdir(dir, { withFileTypes: true });
133
+ for (const entry of entries) {
134
+ const fullPath = join(dir, entry.name);
135
+ const repoPath = normalizeRepoRelativePath(relative(root, fullPath));
136
+ if (entry.isDirectory()) {
137
+ if (resolvedOptions.excluded_dirs.has(entry.name) || ignoreMatcher.shouldIgnorePath(repoPath, "dir")) {
138
+ continue;
139
+ }
140
+ await walk(fullPath);
141
+ continue;
142
+ }
143
+ if (!entry.isFile()) {
144
+ continue;
145
+ }
146
+ if (shouldExcludeFile(entry.name, resolvedOptions.excluded_files, resolvedOptions.excluded_file_suffixes)) {
147
+ continue;
148
+ }
149
+ if (ignoreMatcher.shouldIgnorePath(repoPath, "file")) {
150
+ continue;
151
+ }
152
+ const fileStat = await stat(fullPath);
153
+ if (fileStat.size > resolvedOptions.max_file_size_bytes) {
154
+ continue;
155
+ }
156
+ output.set(repoPath, {
157
+ path: repoPath,
158
+ full_path: fullPath,
159
+ size: fileStat.size,
160
+ mtime_ms: Math.trunc(fileStat.mtimeMs),
161
+ language: extensionToLanguage(repoPath)
162
+ });
163
+ }
164
+ }
165
+ await walk(root);
166
+ return output;
167
+ }
168
+ export async function collectUploadCandidates(project_root_path, options) {
169
+ const stats = await collectProjectFileStats(project_root_path, options);
170
+ const output = [];
171
+ for (const repoPath of [...stats.keys()].sort((a, b) => a.localeCompare(b))) {
172
+ const fileStat = stats.get(repoPath);
173
+ let content;
174
+ try {
175
+ content = await readFile(fileStat.full_path, "utf8");
176
+ }
177
+ catch {
178
+ continue;
179
+ }
180
+ if (looksBinary(content)) {
181
+ continue;
182
+ }
183
+ output.push({
184
+ path: repoPath,
185
+ content,
186
+ ...(fileStat.language ? { language: fileStat.language } : {})
187
+ });
188
+ }
189
+ return output;
190
+ }
191
+ export async function buildRemoteSyncDeltaFromState(input) {
192
+ const projectFiles = await collectProjectFileStats(input.project_root_path, input.options);
193
+ const previousFiles = input.previous_state?.files ?? {};
194
+ const upsertFiles = [];
195
+ const deletedPaths = [];
196
+ const upsertStateEntries = {};
197
+ const nextFiles = {};
198
+ const sortedPaths = [...projectFiles.keys()].sort((a, b) => a.localeCompare(b));
199
+ for (const repoPath of sortedPaths) {
200
+ const projectFile = projectFiles.get(repoPath);
201
+ const previous = previousFiles[repoPath];
202
+ const statChanged = !previous ||
203
+ previous.size !== projectFile.size ||
204
+ previous.mtime_ms !== projectFile.mtime_ms ||
205
+ input.force_full_upsert;
206
+ if (!statChanged) {
207
+ nextFiles[repoPath] = previous;
208
+ continue;
209
+ }
210
+ let content;
211
+ try {
212
+ content = await readFile(projectFile.full_path, "utf8");
213
+ }
214
+ catch {
215
+ if (previous) {
216
+ nextFiles[repoPath] = previous;
217
+ }
218
+ continue;
219
+ }
220
+ if (looksBinary(content)) {
221
+ if (previous) {
222
+ nextFiles[repoPath] = previous;
223
+ }
224
+ continue;
225
+ }
226
+ const contentHash = sha256Text(content);
227
+ const nextEntry = {
228
+ content_hash: contentHash,
229
+ size: projectFile.size,
230
+ mtime_ms: projectFile.mtime_ms,
231
+ ...(projectFile.language ? { language: projectFile.language } : {})
232
+ };
233
+ nextFiles[repoPath] = nextEntry;
234
+ if (!input.force_full_upsert && previous?.content_hash === contentHash) {
235
+ continue;
236
+ }
237
+ upsertFiles.push({
238
+ path: repoPath,
239
+ content,
240
+ ...(projectFile.language ? { language: projectFile.language } : {})
241
+ });
242
+ upsertStateEntries[repoPath] = nextEntry;
243
+ }
244
+ for (const previousPath of Object.keys(previousFiles)) {
245
+ if (!projectFiles.has(previousPath)) {
246
+ deletedPaths.push(previousPath);
247
+ }
248
+ }
249
+ return {
250
+ delta: {
251
+ upsert_files: upsertFiles,
252
+ deleted_paths: deletedPaths
253
+ },
254
+ upsert_state_entries: upsertStateEntries,
255
+ next_files: nextFiles
256
+ };
257
+ }
258
+ export function estimateRemoteSyncDeltaRequestSize(input) {
259
+ const payload = {
260
+ project_root_path: input.project_root_path,
261
+ ...(input.workspace_id ? { workspace_id: input.workspace_id } : {}),
262
+ ...(input.base_index_version ? { base_index_version: input.base_index_version } : {}),
263
+ upsert_files: input.upsert_files.map((file) => ({
264
+ path: file.path,
265
+ content: file.content,
266
+ ...(file.language ? { language: file.language } : {})
267
+ })),
268
+ deleted_paths: input.deleted_paths
269
+ };
270
+ return Buffer.byteLength(JSON.stringify(payload), "utf8");
271
+ }
272
+ export function splitRemoteSyncDeltaIntoBatches(input) {
273
+ const budget = Math.max(256 * 1024, Math.floor(input.max_body_bytes * 0.8));
274
+ const upserts = [...input.delta.upsert_files].sort((a, b) => a.path.localeCompare(b.path));
275
+ const deleted = [...new Set(input.delta.deleted_paths)].sort((a, b) => a.localeCompare(b));
276
+ const batches = [];
277
+ let currentUpserts = [];
278
+ let currentDeleted = [];
279
+ const flush = () => {
280
+ if (currentUpserts.length === 0 && currentDeleted.length === 0) {
281
+ return;
282
+ }
283
+ batches.push({
284
+ upsert_files: currentUpserts,
285
+ deleted_paths: currentDeleted,
286
+ approx_bytes: estimateRemoteSyncDeltaRequestSize({
287
+ project_root_path: input.project_root_path,
288
+ workspace_id: input.workspace_id,
289
+ base_index_version: input.base_index_version,
290
+ upsert_files: currentUpserts,
291
+ deleted_paths: currentDeleted
292
+ })
293
+ });
294
+ currentUpserts = [];
295
+ currentDeleted = [];
296
+ };
297
+ const canFit = (nextUpserts, nextDeleted) => estimateRemoteSyncDeltaRequestSize({
298
+ project_root_path: input.project_root_path,
299
+ workspace_id: input.workspace_id,
300
+ base_index_version: input.base_index_version,
301
+ upsert_files: nextUpserts,
302
+ deleted_paths: nextDeleted
303
+ }) <= budget;
304
+ for (const file of upserts) {
305
+ const candidateUpserts = [...currentUpserts, file];
306
+ if (canFit(candidateUpserts, currentDeleted)) {
307
+ currentUpserts = candidateUpserts;
308
+ continue;
309
+ }
310
+ flush();
311
+ if (!canFit([file], [])) {
312
+ throw new Error(`delta upsert payload too large for path ${file.path}`);
313
+ }
314
+ currentUpserts = [file];
315
+ }
316
+ for (const path of deleted) {
317
+ const candidateDeleted = [...currentDeleted, path];
318
+ if (canFit(currentUpserts, candidateDeleted)) {
319
+ currentDeleted = candidateDeleted;
320
+ continue;
321
+ }
322
+ flush();
323
+ if (!canFit([], [path])) {
324
+ throw new Error(`delta delete payload too large for path ${path}`);
325
+ }
326
+ currentDeleted = [path];
327
+ }
328
+ flush();
329
+ return batches;
330
+ }
331
+ export async function readRemoteSyncState(path) {
332
+ if (!existsSync(path)) {
333
+ return undefined;
334
+ }
335
+ try {
336
+ const parsed = JSON.parse(await readFile(path, "utf8"));
337
+ if (!parsed ||
338
+ parsed.mode !== REMOTE_SYNC_STATE_MODE ||
339
+ typeof parsed.files !== "object" ||
340
+ parsed.files === null ||
341
+ Array.isArray(parsed.files)) {
342
+ return undefined;
343
+ }
344
+ return {
345
+ mode: REMOTE_SYNC_STATE_MODE,
346
+ workspace_id: parsed.workspace_id,
347
+ last_index_version: parsed.last_index_version,
348
+ files: parsed.files,
349
+ updated_at: typeof parsed.updated_at === "string" ? parsed.updated_at : nowIso()
350
+ };
351
+ }
352
+ catch {
353
+ return undefined;
354
+ }
355
+ }
356
+ export async function writeRemoteSyncState(path, state) {
357
+ await writeFile(path, JSON.stringify({
358
+ ...state,
359
+ mode: REMOTE_SYNC_STATE_MODE,
360
+ updated_at: nowIso()
361
+ }, null, 2));
362
+ }
363
+ export function isStaleBaseIndexError(error) {
364
+ if (!(error instanceof RemoteSyncHttpResponseError)) {
365
+ return false;
366
+ }
367
+ if (error.status !== 400) {
368
+ return false;
369
+ }
370
+ const message = error.message.toLowerCase();
371
+ return message.includes("invalid_argument") && message.includes("base index version");
372
+ }
373
+ export function isDeltaUnsupportedError(error) {
374
+ if (error instanceof RemoteSyncHttpResponseError) {
375
+ if ([404, 405, 501].includes(error.status)) {
376
+ return true;
377
+ }
378
+ const message = error.message.toLowerCase();
379
+ return message.includes("not_found") && message.includes("push-delta");
380
+ }
381
+ if (error instanceof Error) {
382
+ const message = error.message.toLowerCase();
383
+ return message.includes("push-delta") && (message.includes("not found") || message.includes("404"));
384
+ }
385
+ return false;
386
+ }
387
+ export function isBlobCommitV2UnsupportedError(error) {
388
+ if (error instanceof RemoteSyncHttpResponseError) {
389
+ if ([404, 405, 501].includes(error.status)) {
390
+ return true;
391
+ }
392
+ const message = error.message.toLowerCase();
393
+ return ((message.includes("blob_commit_v2") && message.includes("disabled")) ||
394
+ message.includes("commit-v2") ||
395
+ message.includes("blobs/upload")) && message.includes("not found");
396
+ }
397
+ if (error instanceof Error) {
398
+ const message = error.message.toLowerCase();
399
+ return ((message.includes("commit-v2") || message.includes("blobs/upload")) &&
400
+ (message.includes("not found") || message.includes("404")));
401
+ }
402
+ return false;
403
+ }
404
+ function isRetryableAdaptiveError(error) {
405
+ if (error instanceof RemoteSyncHttpResponseError) {
406
+ return error.status === 429 || error.status >= 500;
407
+ }
408
+ return false;
409
+ }
410
+ function computeBlobBatches(input) {
411
+ const deduped = new Map();
412
+ for (const file of input.upsert_files) {
413
+ const sizeBytes = Buffer.byteLength(file.content, "utf8");
414
+ if (sizeBytes > input.max_blob_bytes) {
415
+ throw new Error(`blob payload too large for path ${file.path}`);
416
+ }
417
+ const hash = sha256Text(file.content);
418
+ if (!deduped.has(hash)) {
419
+ deduped.set(hash, {
420
+ hash,
421
+ content: file.content,
422
+ size_bytes: sizeBytes
423
+ });
424
+ }
425
+ }
426
+ const blobs = [...deduped.values()].sort((a, b) => a.hash.localeCompare(b.hash));
427
+ const batches = [];
428
+ let current = [];
429
+ let currentBytes = 0;
430
+ for (const blob of blobs) {
431
+ if (blob.size_bytes > input.max_blob_batch_bytes) {
432
+ throw new Error(`blob ${blob.hash} exceeds max blob batch bytes`);
433
+ }
434
+ if (current.length > 0 && currentBytes + blob.size_bytes > input.max_blob_batch_bytes) {
435
+ batches.push(current);
436
+ current = [];
437
+ currentBytes = 0;
438
+ }
439
+ current.push(blob);
440
+ currentBytes += blob.size_bytes;
441
+ }
442
+ if (current.length > 0) {
443
+ batches.push(current);
444
+ }
445
+ return batches;
446
+ }
447
+ function supportsBlobCommitV2(capabilities) {
448
+ const protocols = capabilities.sync_protocols ?? [];
449
+ return protocols.includes("blob_commit_v2");
450
+ }
451
+ export async function runRemoteAdaptiveSync(input) {
452
+ const runStartedAt = Date.now();
453
+ const retries = input.retries ?? 3;
454
+ const initialDelayMs = input.initial_delay_ms ?? 500;
455
+ const staleBaseError = input.stale_base_error ?? isStaleBaseIndexError;
456
+ if (!supportsBlobCommitV2(input.capabilities)) {
457
+ const delta = await runRemoteDeltaSync({
458
+ project_root_path: input.project_root_path,
459
+ scan_root_path: input.scan_root_path,
460
+ workspace_id: input.workspace_id,
461
+ previous_state: input.previous_state,
462
+ force_full_upsert: input.force_full_upsert,
463
+ max_body_bytes: input.capabilities.max_body_bytes,
464
+ retries,
465
+ initial_delay_ms: initialDelayMs,
466
+ stale_base_error: staleBaseError,
467
+ persist_state: input.persist_state,
468
+ push_delta: input.push_delta
469
+ });
470
+ return {
471
+ ...delta,
472
+ protocol: "delta_v1"
473
+ };
474
+ }
475
+ const execute = async (previousState, forceFullUpsert) => {
476
+ const scanRootPath = input.scan_root_path ?? input.project_root_path;
477
+ const deltaBuild = await buildRemoteSyncDeltaFromState({
478
+ project_root_path: scanRootPath,
479
+ previous_state: previousState,
480
+ force_full_upsert: forceFullUpsert
481
+ });
482
+ const nextWorkspaceId = input.workspace_id ?? previousState?.workspace_id;
483
+ if (deltaBuild.delta.upsert_files.length === 0 && deltaBuild.delta.deleted_paths.length === 0) {
484
+ const unchangedState = {
485
+ mode: REMOTE_SYNC_STATE_MODE,
486
+ workspace_id: nextWorkspaceId,
487
+ last_index_version: previousState?.last_index_version,
488
+ files: deltaBuild.next_files,
489
+ updated_at: nowIso()
490
+ };
491
+ await input.persist_state?.(unchangedState);
492
+ return {
493
+ state: unchangedState,
494
+ changed: false,
495
+ workspace_id: unchangedState.workspace_id,
496
+ index_version: unchangedState.last_index_version,
497
+ applied_delta: {
498
+ upsert_files: 0,
499
+ deleted_paths: 0
500
+ },
501
+ stats: {
502
+ batches_total: 0,
503
+ bytes_total: 0,
504
+ latency_ms: Date.now() - runStartedAt
505
+ },
506
+ protocol: "blob_commit_v2"
507
+ };
508
+ }
509
+ const maxBlobBytes = input.capabilities.max_blob_bytes ?? 128 * 1024;
510
+ const maxBlobBatchBytes = input.capabilities.max_blob_batch_bytes ?? Math.max(maxBlobBytes, 1024 * 1024);
511
+ const maxCommitBodyBytes = input.capabilities.max_commit_body_bytes ?? input.capabilities.max_body_bytes;
512
+ let currentConcurrency = Math.max(1, Math.min(input.capabilities.upload_concurrency_hint ?? 4, 16));
513
+ const blobBatches = computeBlobBatches({
514
+ upsert_files: deltaBuild.delta.upsert_files,
515
+ max_blob_bytes: maxBlobBytes,
516
+ max_blob_batch_bytes: maxBlobBatchBytes
517
+ });
518
+ const pending = [...blobBatches];
519
+ const acknowledged = new Set();
520
+ let bytesTotal = 0;
521
+ while (pending.length > 0) {
522
+ const wave = pending.splice(0, currentConcurrency);
523
+ const settled = await Promise.allSettled(wave.map(async (batch) => retryWithBackoff({
524
+ retries,
525
+ initial_delay_ms: initialDelayMs,
526
+ fn: async () => input.upload_blobs({
527
+ workspace_id: nextWorkspaceId,
528
+ project_root_path: input.project_root_path,
529
+ blobs: batch
530
+ })
531
+ })));
532
+ let waveHadRetryableError = false;
533
+ for (let idx = 0; idx < settled.length; idx += 1) {
534
+ const result = settled[idx];
535
+ const batch = wave[idx];
536
+ if (result.status === "rejected") {
537
+ if (isRetryableAdaptiveError(result.reason)) {
538
+ pending.push(batch);
539
+ waveHadRetryableError = true;
540
+ continue;
541
+ }
542
+ throw result.reason;
543
+ }
544
+ const payload = result.value;
545
+ for (const hash of payload.accepted_hashes) {
546
+ acknowledged.add(hash.toLowerCase());
547
+ }
548
+ for (const hash of payload.already_present_hashes) {
549
+ acknowledged.add(hash.toLowerCase());
550
+ }
551
+ if (payload.rejected.length > 0) {
552
+ throw new Error(`blob upload rejected: ${payload.rejected[0].reason}`);
553
+ }
554
+ bytesTotal += batch.reduce((sum, blob) => sum + blob.size_bytes, 0);
555
+ }
556
+ if (waveHadRetryableError) {
557
+ const previous = currentConcurrency;
558
+ currentConcurrency = Math.max(1, Math.floor(currentConcurrency / 2));
559
+ await input.on_upload_strategy_change?.({
560
+ previous_concurrency: previous,
561
+ next_concurrency: currentConcurrency,
562
+ reason: "error"
563
+ });
564
+ }
565
+ else if (currentConcurrency < Math.max(1, input.capabilities.upload_concurrency_hint ?? 4)) {
566
+ const previous = currentConcurrency;
567
+ currentConcurrency += 1;
568
+ await input.on_upload_strategy_change?.({
569
+ previous_concurrency: previous,
570
+ next_concurrency: currentConcurrency,
571
+ reason: "success"
572
+ });
573
+ }
574
+ }
575
+ const dedupedUpserts = new Map();
576
+ for (const file of deltaBuild.delta.upsert_files) {
577
+ dedupedUpserts.set(file.path, file);
578
+ }
579
+ const commitUpserts = [...dedupedUpserts.values()]
580
+ .sort((a, b) => a.path.localeCompare(b.path))
581
+ .map((file) => ({
582
+ path: file.path,
583
+ blob_hash: sha256Text(file.content),
584
+ ...(file.language ? { language: file.language } : {})
585
+ }));
586
+ const commitDeletes = [...new Set(deltaBuild.delta.deleted_paths)].sort((a, b) => a.localeCompare(b));
587
+ const missingHash = commitUpserts.find((file) => !acknowledged.has(file.blob_hash.toLowerCase()));
588
+ if (missingHash) {
589
+ throw new Error(`missing uploaded blob hash for commit: ${missingHash.blob_hash}`);
590
+ }
591
+ if (Buffer.byteLength(JSON.stringify({
592
+ project_root_path: input.project_root_path,
593
+ workspace_id: nextWorkspaceId,
594
+ base_index_version: previousState?.last_index_version,
595
+ upsert_files: commitUpserts,
596
+ deleted_paths: commitDeletes
597
+ }), "utf8") > maxCommitBodyBytes) {
598
+ throw new Error("commit-v2 payload exceeds max_commit_body_bytes");
599
+ }
600
+ const commit = await retryWithBackoff({
601
+ retries,
602
+ initial_delay_ms: initialDelayMs,
603
+ fn: async () => input.commit_v2({
604
+ workspace_id: nextWorkspaceId,
605
+ project_root_path: input.project_root_path,
606
+ ...(previousState?.last_index_version ? { base_index_version: previousState.last_index_version } : {}),
607
+ upsert_files: commitUpserts,
608
+ deleted_paths: commitDeletes
609
+ })
610
+ });
611
+ const finalWorkspace = commit.workspace_id ?? nextWorkspaceId;
612
+ const finalIndexVersion = commit.index_version ?? previousState?.last_index_version;
613
+ const finalState = {
614
+ mode: REMOTE_SYNC_STATE_MODE,
615
+ workspace_id: finalWorkspace,
616
+ last_index_version: finalIndexVersion,
617
+ files: deltaBuild.next_files,
618
+ updated_at: nowIso()
619
+ };
620
+ await input.persist_state?.(finalState);
621
+ return {
622
+ state: finalState,
623
+ changed: true,
624
+ workspace_id: finalWorkspace,
625
+ index_version: finalIndexVersion,
626
+ applied_delta: {
627
+ upsert_files: commitUpserts.length,
628
+ deleted_paths: commitDeletes.length
629
+ },
630
+ stats: {
631
+ batches_total: blobBatches.length,
632
+ bytes_total: bytesTotal,
633
+ latency_ms: Date.now() - runStartedAt
634
+ },
635
+ protocol: "blob_commit_v2"
636
+ };
637
+ };
638
+ const forceFullUpsert = input.force_full_upsert ?? false;
639
+ try {
640
+ return await execute(input.previous_state, forceFullUpsert);
641
+ }
642
+ catch (error) {
643
+ if (isBlobCommitV2UnsupportedError(error)) {
644
+ const delta = await runRemoteDeltaSync({
645
+ project_root_path: input.project_root_path,
646
+ scan_root_path: input.scan_root_path,
647
+ workspace_id: input.workspace_id,
648
+ previous_state: input.previous_state,
649
+ force_full_upsert: input.force_full_upsert,
650
+ max_body_bytes: input.capabilities.max_body_bytes,
651
+ retries,
652
+ initial_delay_ms: initialDelayMs,
653
+ stale_base_error: staleBaseError,
654
+ persist_state: input.persist_state,
655
+ push_delta: input.push_delta
656
+ });
657
+ return {
658
+ ...delta,
659
+ protocol: "delta_v1"
660
+ };
661
+ }
662
+ if (!forceFullUpsert && input.previous_state && staleBaseError(error)) {
663
+ return execute(undefined, true);
664
+ }
665
+ throw error;
666
+ }
667
+ }
668
+ export async function retryWithBackoff(input) {
669
+ let attempt = 0;
670
+ let lastError;
671
+ while (attempt < input.retries) {
672
+ try {
673
+ return await input.fn();
674
+ }
675
+ catch (error) {
676
+ lastError = error;
677
+ attempt += 1;
678
+ if (attempt >= input.retries) {
679
+ break;
680
+ }
681
+ await new Promise((resolveSleep) => setTimeout(resolveSleep, input.initial_delay_ms * 2 ** (attempt - 1)));
682
+ }
683
+ }
684
+ throw lastError;
685
+ }
686
+ export async function runRemoteDeltaSync(input) {
687
+ const runStartedAt = Date.now();
688
+ const retries = input.retries ?? 3;
689
+ const initialDelayMs = input.initial_delay_ms ?? 500;
690
+ const isStaleError = input.stale_base_error ?? isStaleBaseIndexError;
691
+ const execute = async (previousState, forceFullUpsert) => {
692
+ const scanRootPath = input.scan_root_path ?? input.project_root_path;
693
+ const deltaBuild = await buildRemoteSyncDeltaFromState({
694
+ project_root_path: scanRootPath,
695
+ previous_state: previousState,
696
+ force_full_upsert: forceFullUpsert
697
+ });
698
+ const nextWorkspaceId = input.workspace_id ?? previousState?.workspace_id;
699
+ if (deltaBuild.delta.upsert_files.length === 0 && deltaBuild.delta.deleted_paths.length === 0) {
700
+ const unchangedState = {
701
+ mode: REMOTE_SYNC_STATE_MODE,
702
+ workspace_id: nextWorkspaceId,
703
+ last_index_version: previousState?.last_index_version,
704
+ files: deltaBuild.next_files,
705
+ updated_at: nowIso()
706
+ };
707
+ await input.persist_state?.(unchangedState);
708
+ return {
709
+ state: unchangedState,
710
+ changed: false,
711
+ workspace_id: unchangedState.workspace_id,
712
+ index_version: unchangedState.last_index_version,
713
+ applied_delta: {
714
+ upsert_files: 0,
715
+ deleted_paths: 0
716
+ },
717
+ stats: {
718
+ batches_total: 0,
719
+ bytes_total: 0,
720
+ latency_ms: Date.now() - runStartedAt
721
+ }
722
+ };
723
+ }
724
+ let currentBaseIndexVersion = previousState?.last_index_version;
725
+ let currentWorkspaceId = nextWorkspaceId;
726
+ const progressFiles = { ...(previousState?.files ?? {}) };
727
+ let uploadedCount = 0;
728
+ let deletedCount = 0;
729
+ let bytesTotal = 0;
730
+ const batches = splitRemoteSyncDeltaIntoBatches({
731
+ project_root_path: input.project_root_path,
732
+ workspace_id: currentWorkspaceId,
733
+ base_index_version: currentBaseIndexVersion,
734
+ delta: deltaBuild.delta,
735
+ max_body_bytes: input.max_body_bytes
736
+ });
737
+ let latestState;
738
+ for (let batchIndex = 0; batchIndex < batches.length; batchIndex += 1) {
739
+ const batch = batches[batchIndex];
740
+ const batchStartedAt = Date.now();
741
+ const result = await retryWithBackoff({
742
+ retries,
743
+ initial_delay_ms: initialDelayMs,
744
+ fn: async () => input.push_delta({
745
+ workspace_id: currentWorkspaceId,
746
+ project_root_path: input.project_root_path,
747
+ ...(currentBaseIndexVersion ? { base_index_version: currentBaseIndexVersion } : {}),
748
+ upsert_files: batch.upsert_files,
749
+ deleted_paths: batch.deleted_paths
750
+ })
751
+ });
752
+ currentWorkspaceId = result.workspace_id ?? currentWorkspaceId;
753
+ currentBaseIndexVersion = result.index_version ?? currentBaseIndexVersion;
754
+ for (const file of batch.upsert_files) {
755
+ const entry = deltaBuild.upsert_state_entries[file.path];
756
+ if (entry) {
757
+ progressFiles[file.path] = entry;
758
+ }
759
+ }
760
+ for (const path of batch.deleted_paths) {
761
+ delete progressFiles[path];
762
+ }
763
+ uploadedCount += batch.upsert_files.length;
764
+ deletedCount += batch.deleted_paths.length;
765
+ bytesTotal += batch.approx_bytes;
766
+ await input.on_batch_processed?.({
767
+ batch_index: batchIndex,
768
+ batch_count: batches.length,
769
+ approx_bytes: batch.approx_bytes,
770
+ upsert_files: batch.upsert_files.length,
771
+ deleted_paths: batch.deleted_paths.length,
772
+ latency_ms: Date.now() - batchStartedAt
773
+ });
774
+ latestState = {
775
+ mode: REMOTE_SYNC_STATE_MODE,
776
+ workspace_id: currentWorkspaceId,
777
+ last_index_version: currentBaseIndexVersion,
778
+ files: progressFiles,
779
+ updated_at: nowIso()
780
+ };
781
+ await input.persist_state?.(latestState);
782
+ }
783
+ const finalState = latestState ?? {
784
+ mode: REMOTE_SYNC_STATE_MODE,
785
+ workspace_id: currentWorkspaceId,
786
+ last_index_version: currentBaseIndexVersion,
787
+ files: progressFiles,
788
+ updated_at: nowIso()
789
+ };
790
+ return {
791
+ state: finalState,
792
+ changed: true,
793
+ workspace_id: currentWorkspaceId,
794
+ index_version: currentBaseIndexVersion,
795
+ applied_delta: {
796
+ upsert_files: uploadedCount,
797
+ deleted_paths: deletedCount
798
+ },
799
+ stats: {
800
+ batches_total: batches.length,
801
+ bytes_total: bytesTotal,
802
+ latency_ms: Date.now() - runStartedAt
803
+ }
804
+ };
805
+ };
806
+ const forceFullUpsert = input.force_full_upsert ?? false;
807
+ try {
808
+ return await execute(input.previous_state, forceFullUpsert);
809
+ }
810
+ catch (error) {
811
+ if (!forceFullUpsert && input.previous_state && isStaleError(error)) {
812
+ return execute(undefined, true);
813
+ }
814
+ throw error;
815
+ }
816
+ }