@indigoai-us/hq-cloud 5.46.0 → 5.47.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/sync-runner.d.ts +12 -0
- package/dist/bin/sync-runner.d.ts.map +1 -1
- package/dist/bin/sync-runner.js +39 -0
- package/dist/bin/sync-runner.js.map +1 -1
- package/dist/bin/sync-runner.test.js +27 -1
- package/dist/bin/sync-runner.test.js.map +1 -1
- package/dist/cli/share.d.ts.map +1 -1
- package/dist/cli/share.js +17 -2
- package/dist/cli/share.js.map +1 -1
- package/dist/cli/share.test.js +2 -0
- package/dist/cli/share.test.js.map +1 -1
- package/dist/cli/sync-scope.test.js +1 -0
- package/dist/cli/sync-scope.test.js.map +1 -1
- package/dist/cli/sync.d.ts.map +1 -1
- package/dist/cli/sync.js +11 -1
- package/dist/cli/sync.js.map +1 -1
- package/dist/cli/sync.test.js +1 -0
- package/dist/cli/sync.test.js.map +1 -1
- package/dist/object-io.d.ts +218 -0
- package/dist/object-io.d.ts.map +1 -0
- package/dist/object-io.js +588 -0
- package/dist/object-io.js.map +1 -0
- package/dist/object-io.test.d.ts +11 -0
- package/dist/object-io.test.d.ts.map +1 -0
- package/dist/object-io.test.js +568 -0
- package/dist/object-io.test.js.map +1 -0
- package/dist/s3.d.ts +37 -0
- package/dist/s3.d.ts.map +1 -1
- package/dist/s3.js +225 -201
- package/dist/s3.js.map +1 -1
- package/dist/s3.test.js +21 -0
- package/dist/s3.test.js.map +1 -1
- package/dist/vault-client.d.ts +68 -0
- package/dist/vault-client.d.ts.map +1 -1
- package/dist/vault-client.js +35 -0
- package/dist/vault-client.js.map +1 -1
- package/package.json +1 -1
- package/scripts/presign-transport-e2e.mjs +203 -0
- package/scripts/vault-rebaseline.sh +275 -0
- package/scripts/vault-rescue.sh +8 -0
- package/src/bin/sync-runner.test.ts +41 -0
- package/src/bin/sync-runner.ts +52 -0
- package/src/cli/share.test.ts +2 -0
- package/src/cli/share.ts +29 -2
- package/src/cli/sync-scope.test.ts +1 -0
- package/src/cli/sync.test.ts +1 -0
- package/src/cli/sync.ts +22 -1
- package/src/object-io.test.ts +663 -0
- package/src/object-io.ts +782 -0
- package/src/s3.test.ts +24 -0
- package/src/s3.ts +277 -237
- package/src/vault-client.ts +101 -0
package/src/s3.ts
CHANGED
|
@@ -8,31 +8,14 @@
|
|
|
8
8
|
|
|
9
9
|
import * as fs from "fs";
|
|
10
10
|
import * as path from "path";
|
|
11
|
-
import {
|
|
12
|
-
S3Client,
|
|
13
|
-
PutObjectCommand,
|
|
14
|
-
GetObjectCommand,
|
|
15
|
-
ListObjectsV2Command,
|
|
16
|
-
DeleteObjectCommand,
|
|
17
|
-
HeadObjectCommand,
|
|
18
|
-
} from "@aws-sdk/client-s3";
|
|
19
11
|
import type { EntityContext } from "./types.js";
|
|
12
|
+
import { resolveObjectIO, type ObjectIO } from "./object-io.js";
|
|
20
13
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
function buildClient(ctx: EntityContext): S3Client {
|
|
27
|
-
return new S3Client({
|
|
28
|
-
region: ctx.region,
|
|
29
|
-
credentials: {
|
|
30
|
-
accessKeyId: ctx.credentials.accessKeyId,
|
|
31
|
-
secretAccessKey: ctx.credentials.secretAccessKey,
|
|
32
|
-
sessionToken: ctx.credentials.sessionToken,
|
|
33
|
-
},
|
|
34
|
-
});
|
|
35
|
-
}
|
|
14
|
+
// Byte/metadata transport is resolved per-call via resolveObjectIO(ctx) — the
|
|
15
|
+
// default is the AWS S3 SDK over STS-vended credentials (S3SdkObjectIO), but a
|
|
16
|
+
// session may select the presigned-URL transport (PresignObjectIO) via
|
|
17
|
+
// setObjectIOFactory. The symlink/mode/mtime/created-at semantics below are
|
|
18
|
+
// transport-agnostic: they compose on top of the ObjectIO primitives.
|
|
36
19
|
|
|
37
20
|
/**
|
|
38
21
|
* Author identity stamped onto S3 user-defined metadata at upload time. The
|
|
@@ -247,114 +230,215 @@ export function encodeSymlinkBody(target: string): Buffer {
|
|
|
247
230
|
return Buffer.from(SYMLINK_BODY_PREFIX + target, "utf-8");
|
|
248
231
|
}
|
|
249
232
|
|
|
250
|
-
|
|
233
|
+
/**
|
|
234
|
+
* Batch pre-mint transport URLs for `keys` under `op` so the subsequent
|
|
235
|
+
* per-file transfer calls (downloadFile/headRemoteFile/…) reuse them instead
|
|
236
|
+
* of presigning one key at a time. On the presigned-URL transport this turns
|
|
237
|
+
* an N-file leg from N presign requests into ceil(N/100) — the difference
|
|
238
|
+
* between completing a bulk pull and 429ing past the 100-req/hr limit. No-op
|
|
239
|
+
* on the S3 SDK transport (which has no presign step) and harmless if called
|
|
240
|
+
* with an empty list. Best-effort: a prime failure never propagates — the
|
|
241
|
+
* per-file path falls back to a single presign.
|
|
242
|
+
*
|
|
243
|
+
* Call it once, right before a transfer loop, with the full key set the loop
|
|
244
|
+
* will touch. The presigned transport memoizes one IO instance per company for
|
|
245
|
+
* the run, so the warmed cache is the same one the loop drains.
|
|
246
|
+
*/
|
|
247
|
+
export async function primeObjectTransport(
|
|
251
248
|
ctx: EntityContext,
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
const
|
|
257
|
-
|
|
249
|
+
op: "get" | "put" | "delete",
|
|
250
|
+
keys: string[],
|
|
251
|
+
): Promise<void> {
|
|
252
|
+
if (keys.length === 0) return;
|
|
253
|
+
const io = resolveObjectIO(ctx);
|
|
254
|
+
if (!io.prime) return;
|
|
255
|
+
await io.prime(
|
|
256
|
+
op,
|
|
257
|
+
keys.map((key) => ({ key })),
|
|
258
|
+
);
|
|
259
|
+
}
|
|
258
260
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
// some filesystems report (APFS returns full ms+fraction; ext4
|
|
281
|
-
// is integer-ms). String(int) on the read side matches the
|
|
282
|
-
// strict-numeric regex `^-?[0-9]{1,16}$` — optional leading `-`,
|
|
283
|
-
// no leading zeros, no decimals, no exponents.
|
|
284
|
-
//
|
|
285
|
-
// Codex PR #27 P2: accept the full finite range, including 0
|
|
286
|
-
// (Unix epoch) and negatives (pre-epoch / reproducible-build
|
|
287
|
-
// clamping). Earlier `> 0` filter silently dropped legitimate
|
|
288
|
-
// timestamps and broke the round-trip guarantee for that subset.
|
|
289
|
-
const mtimeFloor = Math.floor(lstat.mtimeMs);
|
|
290
|
-
if (Number.isFinite(lstat.mtimeMs)) {
|
|
291
|
-
mtimeMsStamp = String(mtimeFloor);
|
|
292
|
-
}
|
|
293
|
-
// birthtimeMs filter: only stamp when the filesystem actually
|
|
294
|
-
// tracks a separate creation time. ext4 historically returns 0
|
|
295
|
-
// (unsupported) or equals mtimeMs (no distinct tracking); tmpfs
|
|
296
|
-
// and some FUSE mounts behave similarly. Filtering at the source
|
|
297
|
-
// keeps the metadata header free of noise — the receiver can
|
|
298
|
-
// assume hq-btime, if present, carries real signal.
|
|
299
|
-
//
|
|
300
|
-
// Compare the floored values (not raw lstat.birthtimeMs vs
|
|
301
|
-
// lstat.mtimeMs) because APFS exposes sub-millisecond fractions —
|
|
302
|
-
// two timestamps representing the "same moment" for sync purposes
|
|
303
|
-
// can differ by < 1 ms and pass a strict `!==` check while serializing
|
|
304
|
-
// to the same integer-ms string. Comparing floor-to-floor matches
|
|
305
|
-
// what we actually emit on the wire.
|
|
306
|
-
const btimeFloor = Math.floor(lstat.birthtimeMs);
|
|
307
|
-
if (
|
|
308
|
-
Number.isFinite(lstat.birthtimeMs) &&
|
|
309
|
-
btimeFloor > 0 &&
|
|
310
|
-
btimeFloor !== mtimeFloor
|
|
311
|
-
) {
|
|
312
|
-
btimeMsStamp = String(btimeFloor);
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
} catch {
|
|
316
|
-
// Leave stamps undefined; receiver applies its umask default and
|
|
317
|
-
// leaves mtime at write-time (the legacy back-compat path).
|
|
261
|
+
/**
|
|
262
|
+
* Source-side mode + mtime (+ btime when distinct) metadata for a regular
|
|
263
|
+
* file, from a single lstat. Symlinks carry none (OS-controlled mode; a link's
|
|
264
|
+
* mtime isn't user-meaningful — the wire body is the target string, not file
|
|
265
|
+
* content). Shared by uploadFile and the primeUploads pre-pass so the PUT
|
|
266
|
+
* metadata they produce is byte-identical. See the FILE_*_META_KEY docs for the
|
|
267
|
+
* per-field rationale.
|
|
268
|
+
*/
|
|
269
|
+
function buildModeTimeMetadata(lstat: fs.Stats): Record<string, string> {
|
|
270
|
+
const meta: Record<string, string> = {};
|
|
271
|
+
if (lstat.isSymbolicLink()) return meta;
|
|
272
|
+
meta[FILE_MODE_META_KEY] = (lstat.mode & 0o777).toString(8);
|
|
273
|
+
const mtimeFloor = Math.floor(lstat.mtimeMs);
|
|
274
|
+
if (Number.isFinite(lstat.mtimeMs)) meta[FILE_MTIME_META_KEY] = String(mtimeFloor);
|
|
275
|
+
const btimeFloor = Math.floor(lstat.birthtimeMs);
|
|
276
|
+
if (
|
|
277
|
+
Number.isFinite(lstat.birthtimeMs) &&
|
|
278
|
+
btimeFloor > 0 &&
|
|
279
|
+
btimeFloor !== mtimeFloor
|
|
280
|
+
) {
|
|
281
|
+
meta[FILE_BTIME_META_KEY] = String(btimeFloor);
|
|
318
282
|
}
|
|
283
|
+
return meta;
|
|
284
|
+
}
|
|
319
285
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
286
|
+
/**
|
|
287
|
+
* Resolve the created-at to stamp: the existing object's value (preserved
|
|
288
|
+
* across re-uploads so the hq-console NEW-pill window doesn't reset) or now for
|
|
289
|
+
* a first upload. HEAD failure / no author → now. Shared by upload* and
|
|
290
|
+
* primeUploads so both agree on the value signed into the PUT.
|
|
291
|
+
*/
|
|
292
|
+
async function resolveCreatedAt(
|
|
293
|
+
io: ObjectIO,
|
|
294
|
+
key: string,
|
|
295
|
+
author?: UploadAuthor,
|
|
296
|
+
): Promise<string> {
|
|
325
297
|
let createdAt = new Date().toISOString();
|
|
326
298
|
if (author) {
|
|
327
299
|
try {
|
|
328
|
-
const head = await
|
|
329
|
-
|
|
330
|
-
);
|
|
331
|
-
const existing = head.Metadata?.["created-at"];
|
|
300
|
+
const head = await io.headObject(key);
|
|
301
|
+
const existing = head?.metadata?.["created-at"];
|
|
332
302
|
if (typeof existing === "string" && existing.length > 0) {
|
|
333
303
|
createdAt = existing;
|
|
334
304
|
}
|
|
335
305
|
} catch {
|
|
336
|
-
// Object doesn't exist yet, or HEAD
|
|
306
|
+
// Object doesn't exist yet, or HEAD failed — keep now (first upload).
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
return createdAt;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* One upload's identity for {@link primeUploads}: the vault key, the local
|
|
314
|
+
* path (to lstat for mode/mtime), whether it's a symlink, and the author.
|
|
315
|
+
*/
|
|
316
|
+
export interface UploadPrimeItem {
|
|
317
|
+
key: string;
|
|
318
|
+
localPath: string;
|
|
319
|
+
isSymlink: boolean;
|
|
320
|
+
author?: UploadAuthor;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Batch pre-mint PUT URLs (+ the created-at HEADs they depend on) for a set of
|
|
325
|
+
* uploads, signing the SAME metadata uploadFile/uploadSymlink would compute so
|
|
326
|
+
* the transfer loop can replay the cached headers. Turns an N-file push from
|
|
327
|
+
* ~N presign calls (1 per PUT, sometimes 2-3 with HEADs) into ceil(N/1000) GET
|
|
328
|
+
* + ceil(N/1000) PUT — the difference between completing a bulk push and 429ing
|
|
329
|
+
* past the 100/hr limit. No-op on the S3 SDK transport; best-effort.
|
|
330
|
+
*
|
|
331
|
+
* The per-item created-at HEADs run over the GET cache primed first, so they
|
|
332
|
+
* cost S3 round-trips but NO extra presign calls (not counted against 100/hr).
|
|
333
|
+
*/
|
|
334
|
+
export async function primeUploads(
|
|
335
|
+
ctx: EntityContext,
|
|
336
|
+
items: UploadPrimeItem[],
|
|
337
|
+
): Promise<void> {
|
|
338
|
+
const io = resolveObjectIO(ctx);
|
|
339
|
+
if (!io.prime || items.length === 0) return;
|
|
340
|
+
|
|
341
|
+
// Prime GET first so each item's created-at HEAD reuses a cached URL.
|
|
342
|
+
await io.prime(
|
|
343
|
+
"get",
|
|
344
|
+
items.map((i) => ({ key: i.key })),
|
|
345
|
+
);
|
|
346
|
+
|
|
347
|
+
// Build per-key PUT metadata with the SAME builders the upload path uses,
|
|
348
|
+
// bounded-concurrently (the HEADs are cheap cached-GET fetches).
|
|
349
|
+
const putKeys: Array<{
|
|
350
|
+
key: string;
|
|
351
|
+
contentType: string;
|
|
352
|
+
metadata: Record<string, string>;
|
|
353
|
+
}> = [];
|
|
354
|
+
const CONCURRENCY = 16;
|
|
355
|
+
let next = 0;
|
|
356
|
+
const worker = async (): Promise<void> => {
|
|
357
|
+
while (next < items.length) {
|
|
358
|
+
const it = items[next++];
|
|
359
|
+
const createdAt = await resolveCreatedAt(io, it.key, it.author);
|
|
360
|
+
if (it.isSymlink) {
|
|
361
|
+
putKeys.push({
|
|
362
|
+
key: it.key,
|
|
363
|
+
contentType: "application/octet-stream",
|
|
364
|
+
metadata: {
|
|
365
|
+
[SYMLINK_TARGET_META_KEY]: SYMLINK_MARKER_META_VALUE,
|
|
366
|
+
...(it.author ? buildAuthorMetadata(it.author, createdAt) : {}),
|
|
367
|
+
},
|
|
368
|
+
});
|
|
369
|
+
} else {
|
|
370
|
+
let modeTime: Record<string, string> = {};
|
|
371
|
+
try {
|
|
372
|
+
modeTime = buildModeTimeMetadata(fs.lstatSync(it.localPath));
|
|
373
|
+
} catch {
|
|
374
|
+
// raced rm / EPERM — leave stamps off (receiver umask default).
|
|
375
|
+
}
|
|
376
|
+
putKeys.push({
|
|
377
|
+
key: it.key,
|
|
378
|
+
contentType: getMimeType(it.key),
|
|
379
|
+
metadata: {
|
|
380
|
+
...(it.author ? buildAuthorMetadata(it.author, createdAt) : {}),
|
|
381
|
+
...modeTime,
|
|
382
|
+
},
|
|
383
|
+
});
|
|
384
|
+
}
|
|
337
385
|
}
|
|
386
|
+
};
|
|
387
|
+
await Promise.all(
|
|
388
|
+
Array.from({ length: Math.min(CONCURRENCY, items.length) }, worker),
|
|
389
|
+
);
|
|
390
|
+
|
|
391
|
+
await io.prime("put", putKeys);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
export async function uploadFile(
|
|
395
|
+
ctx: EntityContext,
|
|
396
|
+
localPath: string,
|
|
397
|
+
key: string,
|
|
398
|
+
author?: UploadAuthor,
|
|
399
|
+
): Promise<{ etag: string }> {
|
|
400
|
+
const io = resolveObjectIO(ctx);
|
|
401
|
+
const body = fs.readFileSync(localPath);
|
|
402
|
+
|
|
403
|
+
// Fast path: a primeUploads() pre-pass already signed this file's metadata
|
|
404
|
+
// into a cached PUT URL. Skip the lstat-metadata + created-at HEAD and just
|
|
405
|
+
// send the body — putObject replays the cached headers (computed by the SAME
|
|
406
|
+
// builders below, so identical). hasPrimedPut only reports true with >60s of
|
|
407
|
+
// URL lifetime left, so the cache can't expire before the putObject below.
|
|
408
|
+
if (io.hasPrimedPut?.(key)) {
|
|
409
|
+
const primed = await io.putObject({
|
|
410
|
+
key,
|
|
411
|
+
body,
|
|
412
|
+
contentType: getMimeType(key),
|
|
413
|
+
metadata: {},
|
|
414
|
+
});
|
|
415
|
+
return { etag: primed.etag };
|
|
338
416
|
}
|
|
339
417
|
|
|
418
|
+
// Source-side mode/mtime/btime (Bug #5 + 5.37.0) and the preserved
|
|
419
|
+
// created-at (so the NEW-pill window doesn't reset on re-upload). Both via
|
|
420
|
+
// the shared builders that primeUploads uses, so a primed PUT carries the
|
|
421
|
+
// identical metadata — see buildModeTimeMetadata / resolveCreatedAt.
|
|
422
|
+
let modeTime: Record<string, string> = {};
|
|
423
|
+
try {
|
|
424
|
+
modeTime = buildModeTimeMetadata(fs.lstatSync(localPath));
|
|
425
|
+
} catch {
|
|
426
|
+
// raced rm / EPERM — leave stamps off; receiver keeps its umask default.
|
|
427
|
+
}
|
|
428
|
+
const createdAt = await resolveCreatedAt(io, key, author);
|
|
340
429
|
const Metadata: Record<string, string> = {
|
|
341
430
|
...(author ? buildAuthorMetadata(author, createdAt) : {}),
|
|
342
|
-
...
|
|
343
|
-
...(mtimeMsStamp ? { [FILE_MTIME_META_KEY]: mtimeMsStamp } : {}),
|
|
344
|
-
...(btimeMsStamp ? { [FILE_BTIME_META_KEY]: btimeMsStamp } : {}),
|
|
431
|
+
...modeTime,
|
|
345
432
|
};
|
|
346
433
|
|
|
347
|
-
const response = await
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
...(Object.keys(Metadata).length > 0 ? { Metadata } : {}),
|
|
354
|
-
}),
|
|
355
|
-
);
|
|
434
|
+
const response = await io.putObject({
|
|
435
|
+
key,
|
|
436
|
+
body,
|
|
437
|
+
contentType: getMimeType(key),
|
|
438
|
+
metadata: Metadata,
|
|
439
|
+
});
|
|
356
440
|
|
|
357
|
-
return { etag: response.
|
|
441
|
+
return { etag: response.etag };
|
|
358
442
|
}
|
|
359
443
|
|
|
360
444
|
/**
|
|
@@ -375,26 +459,25 @@ export async function uploadSymlink(
|
|
|
375
459
|
key: string,
|
|
376
460
|
author?: UploadAuthor,
|
|
377
461
|
): Promise<{ etag: string }> {
|
|
378
|
-
const
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
//
|
|
382
|
-
//
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
createdAt = existing;
|
|
392
|
-
}
|
|
393
|
-
} catch {
|
|
394
|
-
// First upload of this key, or HEAD denied — keep `now`.
|
|
395
|
-
}
|
|
462
|
+
const io = resolveObjectIO(ctx);
|
|
463
|
+
const symlinkBody = encodeSymlinkBody(target);
|
|
464
|
+
|
|
465
|
+
// Fast path: primeUploads() already signed this symlink's metadata into a
|
|
466
|
+
// cached PUT URL — send the body, replay the cached headers.
|
|
467
|
+
if (io.hasPrimedPut?.(key)) {
|
|
468
|
+
const primed = await io.putObject({
|
|
469
|
+
key,
|
|
470
|
+
body: symlinkBody,
|
|
471
|
+
contentType: "application/octet-stream",
|
|
472
|
+
metadata: {},
|
|
473
|
+
});
|
|
474
|
+
return { etag: primed.etag };
|
|
396
475
|
}
|
|
397
476
|
|
|
477
|
+
// Same created-at preservation as uploadFile (shared resolveCreatedAt) so the
|
|
478
|
+
// NEW-pill window doesn't reset on re-upload, and so a primed PUT matches.
|
|
479
|
+
const createdAt = await resolveCreatedAt(io, key, author);
|
|
480
|
+
|
|
398
481
|
const Metadata: Record<string, string> = {
|
|
399
482
|
// Marker-only: a constant flag value, not the target. The body
|
|
400
483
|
// is the source of truth for the target (no 2 KiB cap, no
|
|
@@ -404,23 +487,20 @@ export async function uploadSymlink(
|
|
|
404
487
|
...(author ? buildAuthorMetadata(author, createdAt) : {}),
|
|
405
488
|
};
|
|
406
489
|
|
|
407
|
-
const response = await
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
Metadata,
|
|
420
|
-
}),
|
|
421
|
-
);
|
|
490
|
+
const response = await io.putObject({
|
|
491
|
+
key,
|
|
492
|
+
// Body = SYMLINK_BODY_PREFIX + target (UTF-8). The prefix is what
|
|
493
|
+
// makes a symlink record's ETag distinguishable from a regular
|
|
494
|
+
// file whose contents happen to equal the target string — the
|
|
495
|
+
// LIST-based pull planner can't see per-object metadata, so ETag
|
|
496
|
+
// is its only drift signal across symlink ↔ regular-file
|
|
497
|
+
// transitions. See SYMLINK_BODY_PREFIX doc above.
|
|
498
|
+
body: symlinkBody,
|
|
499
|
+
contentType: "application/octet-stream",
|
|
500
|
+
metadata: Metadata,
|
|
501
|
+
});
|
|
422
502
|
|
|
423
|
-
return { etag: response.
|
|
503
|
+
return { etag: response.etag };
|
|
424
504
|
}
|
|
425
505
|
|
|
426
506
|
/**
|
|
@@ -437,18 +517,14 @@ export async function downloadFile(
|
|
|
437
517
|
key: string,
|
|
438
518
|
localPath: string,
|
|
439
519
|
): Promise<{ metadata?: Record<string, string> }> {
|
|
440
|
-
const
|
|
441
|
-
|
|
442
|
-
const response = await client.send(
|
|
443
|
-
new GetObjectCommand({
|
|
444
|
-
Bucket: ctx.bucketName,
|
|
445
|
-
Key: key,
|
|
446
|
-
}),
|
|
447
|
-
);
|
|
520
|
+
const io = resolveObjectIO(ctx);
|
|
448
521
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
522
|
+
// The transport returns the full object body buffered + its user metadata.
|
|
523
|
+
// downloadFile already buffered the whole object (writeFileSync of the
|
|
524
|
+
// concatenated chunks), so buffering at the transport layer is behavior-
|
|
525
|
+
// preserving — symlink record bodies are tiny and regular files were read
|
|
526
|
+
// fully into memory regardless.
|
|
527
|
+
const { body: objectBody, metadata } = await io.getObject(key);
|
|
452
528
|
|
|
453
529
|
const dir = path.dirname(localPath);
|
|
454
530
|
if (!fs.existsSync(dir)) {
|
|
@@ -463,23 +539,26 @@ export async function downloadFile(
|
|
|
463
539
|
// S3 lowercases user-metadata keys on read (and sometimes on
|
|
464
540
|
// write), so the lookup uses the lowercased form. We don't
|
|
465
541
|
// normalize Metadata keys ourselves — the AWS SDK already does it.
|
|
466
|
-
const symlinkMarker =
|
|
542
|
+
const symlinkMarker = metadata?.[SYMLINK_TARGET_META_KEY];
|
|
543
|
+
// Discriminator: the metadata marker is the primary signal, but the body
|
|
544
|
+
// prefix is a header-loss fallback (S3 cross-region replication of data
|
|
545
|
+
// only, a console copy that drops Metadata, a metadata-stripping transport,
|
|
546
|
+
// or a poisoned regular-file re-upload of a sentinel). Honor BOTH — a
|
|
547
|
+
// marker-less object whose body starts with SYMLINK_BODY_PREFIX still
|
|
548
|
+
// rematerializes as a link instead of being written out as plain
|
|
549
|
+
// `hq-symlink:<target>` text, which would poison the key on the next push.
|
|
550
|
+
// The body is already buffered (tiny for symlink records); reading it here
|
|
551
|
+
// is behavior-preserving for regular files (whose body never starts with
|
|
552
|
+
// the prefix per the SYMLINK_BODY_PREFIX doc). See SYMLINK_BODY_PREFIX.
|
|
553
|
+
const bodyString = objectBody.toString("utf-8");
|
|
467
554
|
const isSymlinkRecord =
|
|
468
|
-
typeof symlinkMarker === "string" && symlinkMarker.length > 0
|
|
555
|
+
(typeof symlinkMarker === "string" && symlinkMarker.length > 0) ||
|
|
556
|
+
bodyString.startsWith(SYMLINK_BODY_PREFIX);
|
|
469
557
|
if (isSymlinkRecord) {
|
|
470
|
-
//
|
|
471
|
-
// are bounded by target length (typically
|
|
472
|
-
// relative paths, hard-capped by S3's 5 GB object
|
|
473
|
-
//
|
|
474
|
-
// released back to the connection pool — without this, a sync
|
|
475
|
-
// over a tree with many symlinks can stall or pool-exhaust.
|
|
476
|
-
const chunks: Buffer[] = [];
|
|
477
|
-
const stream = response.Body as AsyncIterable<Uint8Array>;
|
|
478
|
-
for await (const chunk of stream) {
|
|
479
|
-
chunks.push(Buffer.from(chunk));
|
|
480
|
-
}
|
|
481
|
-
const bodyString = Buffer.concat(chunks).toString("utf-8");
|
|
482
|
-
|
|
558
|
+
// The target lives in the body (marker-only metadata convention).
|
|
559
|
+
// Symlink record bodies are bounded by target length (typically
|
|
560
|
+
// <300 bytes for relative paths, hard-capped by S3's 5 GB object
|
|
561
|
+
// size); the transport already buffered it.
|
|
483
562
|
let symlinkTarget: string;
|
|
484
563
|
if (bodyString.startsWith(SYMLINK_BODY_PREFIX)) {
|
|
485
564
|
symlinkTarget = bodyString.slice(SYMLINK_BODY_PREFIX.length);
|
|
@@ -488,8 +567,12 @@ export async function downloadFile(
|
|
|
488
567
|
// this PR's lifetime stored the target in metadata (raw or
|
|
489
568
|
// base64) rather than the body. decodeSymlinkMetadataValue
|
|
490
569
|
// round-trip-validates so a raw value passes through and a
|
|
491
|
-
// base64 value decodes; either way we get the target.
|
|
492
|
-
|
|
570
|
+
// base64 value decodes; either way we get the target. This branch
|
|
571
|
+
// is only reachable when the body lacks the prefix, which (given
|
|
572
|
+
// isSymlinkRecord) means the marker is present — the `?? ""` is a
|
|
573
|
+
// type guard for that invariant, and the length-0 check below
|
|
574
|
+
// catches the impossible empty case rather than passing it on.
|
|
575
|
+
symlinkTarget = decodeSymlinkMetadataValue(symlinkMarker ?? "");
|
|
493
576
|
}
|
|
494
577
|
|
|
495
578
|
if (symlinkTarget.length === 0) {
|
|
@@ -515,7 +598,7 @@ export async function downloadFile(
|
|
|
515
598
|
}
|
|
516
599
|
}
|
|
517
600
|
fs.symlinkSync(symlinkTarget, localPath);
|
|
518
|
-
return { metadata
|
|
601
|
+
return { metadata };
|
|
519
602
|
}
|
|
520
603
|
|
|
521
604
|
// Symmetric to the symlink branch above: when a key was previously a
|
|
@@ -542,12 +625,7 @@ export async function downloadFile(
|
|
|
542
625
|
}
|
|
543
626
|
}
|
|
544
627
|
|
|
545
|
-
|
|
546
|
-
const stream = response.Body as AsyncIterable<Uint8Array>;
|
|
547
|
-
for await (const chunk of stream) {
|
|
548
|
-
chunks.push(Buffer.from(chunk));
|
|
549
|
-
}
|
|
550
|
-
fs.writeFileSync(localPath, Buffer.concat(chunks));
|
|
628
|
+
fs.writeFileSync(localPath, objectBody);
|
|
551
629
|
|
|
552
630
|
// Bug #5 — apply source-side mode after the byte write. See
|
|
553
631
|
// FILE_MODE_META_KEY for the metadata contract. Parses defensively:
|
|
@@ -563,7 +641,7 @@ export async function downloadFile(
|
|
|
563
641
|
// requires 1–4 pure octal digits (`[0-7]{1,4}$`), which matches what
|
|
564
642
|
// the upload side stamps (`(mode & 0o777).toString(8)` → at most
|
|
565
643
|
// three digits, all 0–7) and rejects everything else.
|
|
566
|
-
const modeOctal =
|
|
644
|
+
const modeOctal = metadata?.[FILE_MODE_META_KEY];
|
|
567
645
|
if (typeof modeOctal === "string" && /^[0-7]{1,4}$/.test(modeOctal)) {
|
|
568
646
|
const parsed = parseInt(modeOctal, 8);
|
|
569
647
|
if (Number.isFinite(parsed) && parsed >= 0 && parsed <= 0o777) {
|
|
@@ -602,7 +680,7 @@ export async function downloadFile(
|
|
|
602
680
|
// similarly lstats after downloadFile). If a future caller stamps the
|
|
603
681
|
// journal BEFORE downloadFile completes, the fast-path will stale and
|
|
604
682
|
// re-hash every sync forever — keep the call-site invariant intact.
|
|
605
|
-
const mtimeRaw =
|
|
683
|
+
const mtimeRaw = metadata?.[FILE_MTIME_META_KEY];
|
|
606
684
|
if (typeof mtimeRaw === "string" && /^-?[0-9]{1,16}$/.test(mtimeRaw)) {
|
|
607
685
|
const mtimeMs = parseInt(mtimeRaw, 10);
|
|
608
686
|
if (Number.isFinite(mtimeMs)) {
|
|
@@ -627,7 +705,7 @@ export async function downloadFile(
|
|
|
627
705
|
// distinct creation time, so a future receiver upgrade picks it up
|
|
628
706
|
// automatically without a server-side data migration.
|
|
629
707
|
|
|
630
|
-
return { metadata
|
|
708
|
+
return { metadata };
|
|
631
709
|
}
|
|
632
710
|
|
|
633
711
|
export interface RemoteFile {
|
|
@@ -641,26 +719,14 @@ export async function listRemoteFiles(
|
|
|
641
719
|
ctx: EntityContext,
|
|
642
720
|
prefix?: string,
|
|
643
721
|
): Promise<RemoteFile[]> {
|
|
644
|
-
const
|
|
722
|
+
const io = resolveObjectIO(ctx);
|
|
645
723
|
const files: RemoteFile[] = [];
|
|
646
724
|
let continuationToken: string | undefined;
|
|
647
725
|
|
|
648
726
|
do {
|
|
649
|
-
const
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
Prefix: prefix,
|
|
653
|
-
ContinuationToken: continuationToken,
|
|
654
|
-
}),
|
|
655
|
-
);
|
|
656
|
-
|
|
657
|
-
for (const obj of response.Contents || []) {
|
|
658
|
-
// Pre-fix this guard was `!obj.Key || !obj.Size`. The `!obj.Size` test
|
|
659
|
-
// is truthy when Size === 0 (a real 0-byte object like `.gitkeep`),
|
|
660
|
-
// silently filtering legitimate placeholder files out of every pull
|
|
661
|
-
// plan. Narrow the guard to "no key" only; surface real 0-byte
|
|
662
|
-
// objects to the planner.
|
|
663
|
-
if (!obj.Key) continue;
|
|
727
|
+
const page = await io.listObjects({ prefix, continuationToken });
|
|
728
|
+
|
|
729
|
+
for (const obj of page.objects) {
|
|
664
730
|
// Drop S3 directory-marker objects: the canonical shape is `0-byte
|
|
665
731
|
// key ending in '/'` (S3 console "Create folder", `aws s3 sync` of
|
|
666
732
|
// empty dirs, sync tools that mirror empty trees). Two downstream
|
|
@@ -670,29 +736,29 @@ export async function listRemoteFiles(
|
|
|
670
736
|
// → EISDIR "open" after the parent mkdir creates the leaf as a
|
|
671
737
|
// directory). Filtering here eliminates both.
|
|
672
738
|
//
|
|
673
|
-
// Narrow on
|
|
739
|
+
// Narrow on size===0 (not just trailing-slash) so a hypothetical
|
|
674
740
|
// non-empty object whose key happens to end in '/' is NOT silently
|
|
675
741
|
// hidden — it stays visible and downloadFile surfaces the same
|
|
676
742
|
// EISDIR "open" error pointing at the specific key, which is the
|
|
677
743
|
// signal an operator needs to reconcile the bucket. The vault
|
|
678
744
|
// service doesn't have a code path that produces such an object,
|
|
679
|
-
// but
|
|
745
|
+
// but the listing returns whatever lives in the bucket; silent
|
|
680
746
|
// drop would be worse than loud failure for that case.
|
|
681
747
|
//
|
|
682
748
|
// Real 0-byte placeholders like `.gitkeep` never end in `/` and
|
|
683
749
|
// continue to flow through — the 5.13.0 `.gitkeep` regression
|
|
684
|
-
// remains fixed.
|
|
685
|
-
if (obj.
|
|
750
|
+
// remains fixed. (The `!key` guard now lives in the ObjectIO layer.)
|
|
751
|
+
if (obj.key.endsWith("/") && obj.size === 0) continue;
|
|
686
752
|
|
|
687
753
|
files.push({
|
|
688
|
-
key: obj.
|
|
689
|
-
size: obj.
|
|
690
|
-
lastModified: obj.
|
|
691
|
-
etag: obj.
|
|
754
|
+
key: obj.key,
|
|
755
|
+
size: obj.size,
|
|
756
|
+
lastModified: obj.lastModified,
|
|
757
|
+
etag: obj.etag,
|
|
692
758
|
});
|
|
693
759
|
}
|
|
694
760
|
|
|
695
|
-
continuationToken =
|
|
761
|
+
continuationToken = page.nextContinuationToken;
|
|
696
762
|
} while (continuationToken);
|
|
697
763
|
|
|
698
764
|
return files;
|
|
@@ -702,14 +768,7 @@ export async function deleteRemoteFile(
|
|
|
702
768
|
ctx: EntityContext,
|
|
703
769
|
key: string,
|
|
704
770
|
): Promise<void> {
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
await client.send(
|
|
708
|
-
new DeleteObjectCommand({
|
|
709
|
-
Bucket: ctx.bucketName,
|
|
710
|
-
Key: key,
|
|
711
|
-
}),
|
|
712
|
-
);
|
|
771
|
+
await resolveObjectIO(ctx).deleteObject(key);
|
|
713
772
|
}
|
|
714
773
|
|
|
715
774
|
/**
|
|
@@ -719,26 +778,7 @@ export async function headRemoteFile(
|
|
|
719
778
|
ctx: EntityContext,
|
|
720
779
|
key: string,
|
|
721
780
|
): Promise<{ lastModified: Date; etag: string; size: number; metadata?: Record<string, string> } | null> {
|
|
722
|
-
|
|
723
|
-
try {
|
|
724
|
-
const response = await client.send(
|
|
725
|
-
new HeadObjectCommand({
|
|
726
|
-
Bucket: ctx.bucketName,
|
|
727
|
-
Key: key,
|
|
728
|
-
}),
|
|
729
|
-
);
|
|
730
|
-
return {
|
|
731
|
-
lastModified: response.LastModified || new Date(),
|
|
732
|
-
etag: response.ETag || "",
|
|
733
|
-
size: response.ContentLength || 0,
|
|
734
|
-
metadata: response.Metadata,
|
|
735
|
-
};
|
|
736
|
-
} catch (err: unknown) {
|
|
737
|
-
if (err && typeof err === "object" && "name" in err && err.name === "NotFound") {
|
|
738
|
-
return null;
|
|
739
|
-
}
|
|
740
|
-
throw err;
|
|
741
|
-
}
|
|
781
|
+
return resolveObjectIO(ctx).headObject(key);
|
|
742
782
|
}
|
|
743
783
|
|
|
744
784
|
function getMimeType(filePath: string): string {
|