@indigoai-us/hq-cloud 5.45.0 → 5.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/sync-runner.d.ts +12 -0
- package/dist/bin/sync-runner.d.ts.map +1 -1
- package/dist/bin/sync-runner.js +78 -12
- package/dist/bin/sync-runner.js.map +1 -1
- package/dist/bin/sync-runner.test.js +27 -1
- package/dist/bin/sync-runner.test.js.map +1 -1
- package/dist/cli/share.d.ts.map +1 -1
- package/dist/cli/share.js +17 -2
- package/dist/cli/share.js.map +1 -1
- package/dist/cli/share.test.js +2 -0
- package/dist/cli/share.test.js.map +1 -1
- package/dist/cli/sync-scope.test.js +1 -0
- package/dist/cli/sync-scope.test.js.map +1 -1
- package/dist/cli/sync.d.ts.map +1 -1
- package/dist/cli/sync.js +11 -1
- package/dist/cli/sync.js.map +1 -1
- package/dist/cli/sync.test.js +1 -0
- package/dist/cli/sync.test.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/object-io.d.ts +218 -0
- package/dist/object-io.d.ts.map +1 -0
- package/dist/object-io.js +588 -0
- package/dist/object-io.js.map +1 -0
- package/dist/object-io.test.d.ts +11 -0
- package/dist/object-io.test.d.ts.map +1 -0
- package/dist/object-io.test.js +568 -0
- package/dist/object-io.test.js.map +1 -0
- package/dist/s3.d.ts +37 -0
- package/dist/s3.d.ts.map +1 -1
- package/dist/s3.js +207 -198
- package/dist/s3.js.map +1 -1
- package/dist/skill-telemetry.d.ts +107 -0
- package/dist/skill-telemetry.d.ts.map +1 -0
- package/dist/skill-telemetry.js +395 -0
- package/dist/skill-telemetry.js.map +1 -0
- package/dist/skill-telemetry.test.d.ts +2 -0
- package/dist/skill-telemetry.test.d.ts.map +1 -0
- package/dist/skill-telemetry.test.js +219 -0
- package/dist/skill-telemetry.test.js.map +1 -0
- package/dist/vault-client.d.ts +91 -0
- package/dist/vault-client.d.ts.map +1 -1
- package/dist/vault-client.js +45 -0
- package/dist/vault-client.js.map +1 -1
- package/package.json +1 -1
- package/scripts/presign-transport-e2e.mjs +203 -0
- package/scripts/vault-rebaseline.sh +275 -0
- package/scripts/vault-rescue.sh +291 -0
- package/src/bin/sync-runner.test.ts +41 -0
- package/src/bin/sync-runner.ts +91 -13
- package/src/cli/share.test.ts +2 -0
- package/src/cli/share.ts +29 -2
- package/src/cli/sync-scope.test.ts +1 -0
- package/src/cli/sync.test.ts +1 -0
- package/src/cli/sync.ts +22 -1
- package/src/index.ts +16 -0
- package/src/object-io.test.ts +663 -0
- package/src/object-io.ts +782 -0
- package/src/s3.ts +259 -233
- package/src/skill-telemetry.test.ts +279 -0
- package/src/skill-telemetry.ts +499 -0
- package/src/vault-client.ts +135 -0
package/src/s3.ts
CHANGED
|
@@ -8,31 +8,14 @@
|
|
|
8
8
|
|
|
9
9
|
import * as fs from "fs";
|
|
10
10
|
import * as path from "path";
|
|
11
|
-
import {
|
|
12
|
-
S3Client,
|
|
13
|
-
PutObjectCommand,
|
|
14
|
-
GetObjectCommand,
|
|
15
|
-
ListObjectsV2Command,
|
|
16
|
-
DeleteObjectCommand,
|
|
17
|
-
HeadObjectCommand,
|
|
18
|
-
} from "@aws-sdk/client-s3";
|
|
19
11
|
import type { EntityContext } from "./types.js";
|
|
12
|
+
import { resolveObjectIO, type ObjectIO } from "./object-io.js";
|
|
20
13
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
function buildClient(ctx: EntityContext): S3Client {
|
|
27
|
-
return new S3Client({
|
|
28
|
-
region: ctx.region,
|
|
29
|
-
credentials: {
|
|
30
|
-
accessKeyId: ctx.credentials.accessKeyId,
|
|
31
|
-
secretAccessKey: ctx.credentials.secretAccessKey,
|
|
32
|
-
sessionToken: ctx.credentials.sessionToken,
|
|
33
|
-
},
|
|
34
|
-
});
|
|
35
|
-
}
|
|
14
|
+
// Byte/metadata transport is resolved per-call via resolveObjectIO(ctx) — the
|
|
15
|
+
// default is the AWS S3 SDK over STS-vended credentials (S3SdkObjectIO), but a
|
|
16
|
+
// session may select the presigned-URL transport (PresignObjectIO) via
|
|
17
|
+
// setObjectIOFactory. The symlink/mode/mtime/created-at semantics below are
|
|
18
|
+
// transport-agnostic: they compose on top of the ObjectIO primitives.
|
|
36
19
|
|
|
37
20
|
/**
|
|
38
21
|
* Author identity stamped onto S3 user-defined metadata at upload time. The
|
|
@@ -247,114 +230,215 @@ export function encodeSymlinkBody(target: string): Buffer {
|
|
|
247
230
|
return Buffer.from(SYMLINK_BODY_PREFIX + target, "utf-8");
|
|
248
231
|
}
|
|
249
232
|
|
|
250
|
-
|
|
233
|
+
/**
|
|
234
|
+
* Batch pre-mint transport URLs for `keys` under `op` so the subsequent
|
|
235
|
+
* per-file transfer calls (downloadFile/headRemoteFile/…) reuse them instead
|
|
236
|
+
* of presigning one key at a time. On the presigned-URL transport this turns
|
|
237
|
+
* an N-file leg from N presign requests into ceil(N/100) — the difference
|
|
238
|
+
* between completing a bulk pull and 429ing past the 100-req/hr limit. No-op
|
|
239
|
+
* on the S3 SDK transport (which has no presign step) and harmless if called
|
|
240
|
+
* with an empty list. Best-effort: a prime failure never propagates — the
|
|
241
|
+
* per-file path falls back to a single presign.
|
|
242
|
+
*
|
|
243
|
+
* Call it once, right before a transfer loop, with the full key set the loop
|
|
244
|
+
* will touch. The presigned transport memoizes one IO instance per company for
|
|
245
|
+
* the run, so the warmed cache is the same one the loop drains.
|
|
246
|
+
*/
|
|
247
|
+
export async function primeObjectTransport(
|
|
251
248
|
ctx: EntityContext,
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
const
|
|
257
|
-
|
|
249
|
+
op: "get" | "put" | "delete",
|
|
250
|
+
keys: string[],
|
|
251
|
+
): Promise<void> {
|
|
252
|
+
if (keys.length === 0) return;
|
|
253
|
+
const io = resolveObjectIO(ctx);
|
|
254
|
+
if (!io.prime) return;
|
|
255
|
+
await io.prime(
|
|
256
|
+
op,
|
|
257
|
+
keys.map((key) => ({ key })),
|
|
258
|
+
);
|
|
259
|
+
}
|
|
258
260
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
// some filesystems report (APFS returns full ms+fraction; ext4
|
|
281
|
-
// is integer-ms). String(int) on the read side matches the
|
|
282
|
-
// strict-numeric regex `^-?[0-9]{1,16}$` — optional leading `-`,
|
|
283
|
-
// no leading zeros, no decimals, no exponents.
|
|
284
|
-
//
|
|
285
|
-
// Codex PR #27 P2: accept the full finite range, including 0
|
|
286
|
-
// (Unix epoch) and negatives (pre-epoch / reproducible-build
|
|
287
|
-
// clamping). Earlier `> 0` filter silently dropped legitimate
|
|
288
|
-
// timestamps and broke the round-trip guarantee for that subset.
|
|
289
|
-
const mtimeFloor = Math.floor(lstat.mtimeMs);
|
|
290
|
-
if (Number.isFinite(lstat.mtimeMs)) {
|
|
291
|
-
mtimeMsStamp = String(mtimeFloor);
|
|
292
|
-
}
|
|
293
|
-
// birthtimeMs filter: only stamp when the filesystem actually
|
|
294
|
-
// tracks a separate creation time. ext4 historically returns 0
|
|
295
|
-
// (unsupported) or equals mtimeMs (no distinct tracking); tmpfs
|
|
296
|
-
// and some FUSE mounts behave similarly. Filtering at the source
|
|
297
|
-
// keeps the metadata header free of noise — the receiver can
|
|
298
|
-
// assume hq-btime, if present, carries real signal.
|
|
299
|
-
//
|
|
300
|
-
// Compare the floored values (not raw lstat.birthtimeMs vs
|
|
301
|
-
// lstat.mtimeMs) because APFS exposes sub-millisecond fractions —
|
|
302
|
-
// two timestamps representing the "same moment" for sync purposes
|
|
303
|
-
// can differ by < 1 ms and pass a strict `!==` check while serializing
|
|
304
|
-
// to the same integer-ms string. Comparing floor-to-floor matches
|
|
305
|
-
// what we actually emit on the wire.
|
|
306
|
-
const btimeFloor = Math.floor(lstat.birthtimeMs);
|
|
307
|
-
if (
|
|
308
|
-
Number.isFinite(lstat.birthtimeMs) &&
|
|
309
|
-
btimeFloor > 0 &&
|
|
310
|
-
btimeFloor !== mtimeFloor
|
|
311
|
-
) {
|
|
312
|
-
btimeMsStamp = String(btimeFloor);
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
} catch {
|
|
316
|
-
// Leave stamps undefined; receiver applies its umask default and
|
|
317
|
-
// leaves mtime at write-time (the legacy back-compat path).
|
|
261
|
+
/**
|
|
262
|
+
* Source-side mode + mtime (+ btime when distinct) metadata for a regular
|
|
263
|
+
* file, from a single lstat. Symlinks carry none (OS-controlled mode; a link's
|
|
264
|
+
* mtime isn't user-meaningful — the wire body is the target string, not file
|
|
265
|
+
* content). Shared by uploadFile and the primeUploads pre-pass so the PUT
|
|
266
|
+
* metadata they produce is byte-identical. See the FILE_*_META_KEY docs for the
|
|
267
|
+
* per-field rationale.
|
|
268
|
+
*/
|
|
269
|
+
function buildModeTimeMetadata(lstat: fs.Stats): Record<string, string> {
|
|
270
|
+
const meta: Record<string, string> = {};
|
|
271
|
+
if (lstat.isSymbolicLink()) return meta;
|
|
272
|
+
meta[FILE_MODE_META_KEY] = (lstat.mode & 0o777).toString(8);
|
|
273
|
+
const mtimeFloor = Math.floor(lstat.mtimeMs);
|
|
274
|
+
if (Number.isFinite(lstat.mtimeMs)) meta[FILE_MTIME_META_KEY] = String(mtimeFloor);
|
|
275
|
+
const btimeFloor = Math.floor(lstat.birthtimeMs);
|
|
276
|
+
if (
|
|
277
|
+
Number.isFinite(lstat.birthtimeMs) &&
|
|
278
|
+
btimeFloor > 0 &&
|
|
279
|
+
btimeFloor !== mtimeFloor
|
|
280
|
+
) {
|
|
281
|
+
meta[FILE_BTIME_META_KEY] = String(btimeFloor);
|
|
318
282
|
}
|
|
283
|
+
return meta;
|
|
284
|
+
}
|
|
319
285
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
286
|
+
/**
|
|
287
|
+
* Resolve the created-at to stamp: the existing object's value (preserved
|
|
288
|
+
* across re-uploads so the hq-console NEW-pill window doesn't reset) or now for
|
|
289
|
+
* a first upload. HEAD failure / no author → now. Shared by upload* and
|
|
290
|
+
* primeUploads so both agree on the value signed into the PUT.
|
|
291
|
+
*/
|
|
292
|
+
async function resolveCreatedAt(
|
|
293
|
+
io: ObjectIO,
|
|
294
|
+
key: string,
|
|
295
|
+
author?: UploadAuthor,
|
|
296
|
+
): Promise<string> {
|
|
325
297
|
let createdAt = new Date().toISOString();
|
|
326
298
|
if (author) {
|
|
327
299
|
try {
|
|
328
|
-
const head = await
|
|
329
|
-
|
|
330
|
-
);
|
|
331
|
-
const existing = head.Metadata?.["created-at"];
|
|
300
|
+
const head = await io.headObject(key);
|
|
301
|
+
const existing = head?.metadata?.["created-at"];
|
|
332
302
|
if (typeof existing === "string" && existing.length > 0) {
|
|
333
303
|
createdAt = existing;
|
|
334
304
|
}
|
|
335
305
|
} catch {
|
|
336
|
-
// Object doesn't exist yet, or HEAD
|
|
306
|
+
// Object doesn't exist yet, or HEAD failed — keep now (first upload).
|
|
337
307
|
}
|
|
338
308
|
}
|
|
309
|
+
return createdAt;
|
|
310
|
+
}
|
|
339
311
|
|
|
312
|
+
/**
|
|
313
|
+
* One upload's identity for {@link primeUploads}: the vault key, the local
|
|
314
|
+
* path (to lstat for mode/mtime), whether it's a symlink, and the author.
|
|
315
|
+
*/
|
|
316
|
+
export interface UploadPrimeItem {
|
|
317
|
+
key: string;
|
|
318
|
+
localPath: string;
|
|
319
|
+
isSymlink: boolean;
|
|
320
|
+
author?: UploadAuthor;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Batch pre-mint PUT URLs (+ the created-at HEADs they depend on) for a set of
|
|
325
|
+
* uploads, signing the SAME metadata uploadFile/uploadSymlink would compute so
|
|
326
|
+
* the transfer loop can replay the cached headers. Turns an N-file push from
|
|
327
|
+
* ~N presign calls (1 per PUT, sometimes 2-3 with HEADs) into ceil(N/1000) GET
|
|
328
|
+
* + ceil(N/1000) PUT — the difference between completing a bulk push and 429ing
|
|
329
|
+
* past the 100/hr limit. No-op on the S3 SDK transport; best-effort.
|
|
330
|
+
*
|
|
331
|
+
* The per-item created-at HEADs run over the GET cache primed first, so they
|
|
332
|
+
* cost S3 round-trips but NO extra presign calls (not counted against 100/hr).
|
|
333
|
+
*/
|
|
334
|
+
export async function primeUploads(
|
|
335
|
+
ctx: EntityContext,
|
|
336
|
+
items: UploadPrimeItem[],
|
|
337
|
+
): Promise<void> {
|
|
338
|
+
const io = resolveObjectIO(ctx);
|
|
339
|
+
if (!io.prime || items.length === 0) return;
|
|
340
|
+
|
|
341
|
+
// Prime GET first so each item's created-at HEAD reuses a cached URL.
|
|
342
|
+
await io.prime(
|
|
343
|
+
"get",
|
|
344
|
+
items.map((i) => ({ key: i.key })),
|
|
345
|
+
);
|
|
346
|
+
|
|
347
|
+
// Build per-key PUT metadata with the SAME builders the upload path uses,
|
|
348
|
+
// bounded-concurrently (the HEADs are cheap cached-GET fetches).
|
|
349
|
+
const putKeys: Array<{
|
|
350
|
+
key: string;
|
|
351
|
+
contentType: string;
|
|
352
|
+
metadata: Record<string, string>;
|
|
353
|
+
}> = [];
|
|
354
|
+
const CONCURRENCY = 16;
|
|
355
|
+
let next = 0;
|
|
356
|
+
const worker = async (): Promise<void> => {
|
|
357
|
+
while (next < items.length) {
|
|
358
|
+
const it = items[next++];
|
|
359
|
+
const createdAt = await resolveCreatedAt(io, it.key, it.author);
|
|
360
|
+
if (it.isSymlink) {
|
|
361
|
+
putKeys.push({
|
|
362
|
+
key: it.key,
|
|
363
|
+
contentType: "application/octet-stream",
|
|
364
|
+
metadata: {
|
|
365
|
+
[SYMLINK_TARGET_META_KEY]: SYMLINK_MARKER_META_VALUE,
|
|
366
|
+
...(it.author ? buildAuthorMetadata(it.author, createdAt) : {}),
|
|
367
|
+
},
|
|
368
|
+
});
|
|
369
|
+
} else {
|
|
370
|
+
let modeTime: Record<string, string> = {};
|
|
371
|
+
try {
|
|
372
|
+
modeTime = buildModeTimeMetadata(fs.lstatSync(it.localPath));
|
|
373
|
+
} catch {
|
|
374
|
+
// raced rm / EPERM — leave stamps off (receiver umask default).
|
|
375
|
+
}
|
|
376
|
+
putKeys.push({
|
|
377
|
+
key: it.key,
|
|
378
|
+
contentType: getMimeType(it.key),
|
|
379
|
+
metadata: {
|
|
380
|
+
...(it.author ? buildAuthorMetadata(it.author, createdAt) : {}),
|
|
381
|
+
...modeTime,
|
|
382
|
+
},
|
|
383
|
+
});
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
};
|
|
387
|
+
await Promise.all(
|
|
388
|
+
Array.from({ length: Math.min(CONCURRENCY, items.length) }, worker),
|
|
389
|
+
);
|
|
390
|
+
|
|
391
|
+
await io.prime("put", putKeys);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
export async function uploadFile(
|
|
395
|
+
ctx: EntityContext,
|
|
396
|
+
localPath: string,
|
|
397
|
+
key: string,
|
|
398
|
+
author?: UploadAuthor,
|
|
399
|
+
): Promise<{ etag: string }> {
|
|
400
|
+
const io = resolveObjectIO(ctx);
|
|
401
|
+
const body = fs.readFileSync(localPath);
|
|
402
|
+
|
|
403
|
+
// Fast path: a primeUploads() pre-pass already signed this file's metadata
|
|
404
|
+
// into a cached PUT URL. Skip the lstat-metadata + created-at HEAD and just
|
|
405
|
+
// send the body — putObject replays the cached headers (computed by the SAME
|
|
406
|
+
// builders below, so identical). hasPrimedPut only reports true with >60s of
|
|
407
|
+
// URL lifetime left, so the cache can't expire before the putObject below.
|
|
408
|
+
if (io.hasPrimedPut?.(key)) {
|
|
409
|
+
const primed = await io.putObject({
|
|
410
|
+
key,
|
|
411
|
+
body,
|
|
412
|
+
contentType: getMimeType(key),
|
|
413
|
+
metadata: {},
|
|
414
|
+
});
|
|
415
|
+
return { etag: primed.etag };
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// Source-side mode/mtime/btime (Bug #5 + 5.37.0) and the preserved
|
|
419
|
+
// created-at (so the NEW-pill window doesn't reset on re-upload). Both via
|
|
420
|
+
// the shared builders that primeUploads uses, so a primed PUT carries the
|
|
421
|
+
// identical metadata — see buildModeTimeMetadata / resolveCreatedAt.
|
|
422
|
+
let modeTime: Record<string, string> = {};
|
|
423
|
+
try {
|
|
424
|
+
modeTime = buildModeTimeMetadata(fs.lstatSync(localPath));
|
|
425
|
+
} catch {
|
|
426
|
+
// raced rm / EPERM — leave stamps off; receiver keeps its umask default.
|
|
427
|
+
}
|
|
428
|
+
const createdAt = await resolveCreatedAt(io, key, author);
|
|
340
429
|
const Metadata: Record<string, string> = {
|
|
341
430
|
...(author ? buildAuthorMetadata(author, createdAt) : {}),
|
|
342
|
-
...
|
|
343
|
-
...(mtimeMsStamp ? { [FILE_MTIME_META_KEY]: mtimeMsStamp } : {}),
|
|
344
|
-
...(btimeMsStamp ? { [FILE_BTIME_META_KEY]: btimeMsStamp } : {}),
|
|
431
|
+
...modeTime,
|
|
345
432
|
};
|
|
346
433
|
|
|
347
|
-
const response = await
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
...(Object.keys(Metadata).length > 0 ? { Metadata } : {}),
|
|
354
|
-
}),
|
|
355
|
-
);
|
|
434
|
+
const response = await io.putObject({
|
|
435
|
+
key,
|
|
436
|
+
body,
|
|
437
|
+
contentType: getMimeType(key),
|
|
438
|
+
metadata: Metadata,
|
|
439
|
+
});
|
|
356
440
|
|
|
357
|
-
return { etag: response.
|
|
441
|
+
return { etag: response.etag };
|
|
358
442
|
}
|
|
359
443
|
|
|
360
444
|
/**
|
|
@@ -375,26 +459,25 @@ export async function uploadSymlink(
|
|
|
375
459
|
key: string,
|
|
376
460
|
author?: UploadAuthor,
|
|
377
461
|
): Promise<{ etag: string }> {
|
|
378
|
-
const
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
//
|
|
382
|
-
//
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
createdAt = existing;
|
|
392
|
-
}
|
|
393
|
-
} catch {
|
|
394
|
-
// First upload of this key, or HEAD denied — keep `now`.
|
|
395
|
-
}
|
|
462
|
+
const io = resolveObjectIO(ctx);
|
|
463
|
+
const symlinkBody = encodeSymlinkBody(target);
|
|
464
|
+
|
|
465
|
+
// Fast path: primeUploads() already signed this symlink's metadata into a
|
|
466
|
+
// cached PUT URL — send the body, replay the cached headers.
|
|
467
|
+
if (io.hasPrimedPut?.(key)) {
|
|
468
|
+
const primed = await io.putObject({
|
|
469
|
+
key,
|
|
470
|
+
body: symlinkBody,
|
|
471
|
+
contentType: "application/octet-stream",
|
|
472
|
+
metadata: {},
|
|
473
|
+
});
|
|
474
|
+
return { etag: primed.etag };
|
|
396
475
|
}
|
|
397
476
|
|
|
477
|
+
// Same created-at preservation as uploadFile (shared resolveCreatedAt) so the
|
|
478
|
+
// NEW-pill window doesn't reset on re-upload, and so a primed PUT matches.
|
|
479
|
+
const createdAt = await resolveCreatedAt(io, key, author);
|
|
480
|
+
|
|
398
481
|
const Metadata: Record<string, string> = {
|
|
399
482
|
// Marker-only: a constant flag value, not the target. The body
|
|
400
483
|
// is the source of truth for the target (no 2 KiB cap, no
|
|
@@ -404,23 +487,20 @@ export async function uploadSymlink(
|
|
|
404
487
|
...(author ? buildAuthorMetadata(author, createdAt) : {}),
|
|
405
488
|
};
|
|
406
489
|
|
|
407
|
-
const response = await
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
Metadata,
|
|
420
|
-
}),
|
|
421
|
-
);
|
|
490
|
+
const response = await io.putObject({
|
|
491
|
+
key,
|
|
492
|
+
// Body = SYMLINK_BODY_PREFIX + target (UTF-8). The prefix is what
|
|
493
|
+
// makes a symlink record's ETag distinguishable from a regular
|
|
494
|
+
// file whose contents happen to equal the target string — the
|
|
495
|
+
// LIST-based pull planner can't see per-object metadata, so ETag
|
|
496
|
+
// is its only drift signal across symlink ↔ regular-file
|
|
497
|
+
// transitions. See SYMLINK_BODY_PREFIX doc above.
|
|
498
|
+
body: symlinkBody,
|
|
499
|
+
contentType: "application/octet-stream",
|
|
500
|
+
metadata: Metadata,
|
|
501
|
+
});
|
|
422
502
|
|
|
423
|
-
return { etag: response.
|
|
503
|
+
return { etag: response.etag };
|
|
424
504
|
}
|
|
425
505
|
|
|
426
506
|
/**
|
|
@@ -437,18 +517,14 @@ export async function downloadFile(
|
|
|
437
517
|
key: string,
|
|
438
518
|
localPath: string,
|
|
439
519
|
): Promise<{ metadata?: Record<string, string> }> {
|
|
440
|
-
const
|
|
520
|
+
const io = resolveObjectIO(ctx);
|
|
441
521
|
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
);
|
|
448
|
-
|
|
449
|
-
if (!response.Body) {
|
|
450
|
-
throw new Error(`Empty response for ${key}`);
|
|
451
|
-
}
|
|
522
|
+
// The transport returns the full object body buffered + its user metadata.
|
|
523
|
+
// downloadFile already buffered the whole object (writeFileSync of the
|
|
524
|
+
// concatenated chunks), so buffering at the transport layer is behavior-
|
|
525
|
+
// preserving — symlink record bodies are tiny and regular files were read
|
|
526
|
+
// fully into memory regardless.
|
|
527
|
+
const { body: objectBody, metadata } = await io.getObject(key);
|
|
452
528
|
|
|
453
529
|
const dir = path.dirname(localPath);
|
|
454
530
|
if (!fs.existsSync(dir)) {
|
|
@@ -463,22 +539,15 @@ export async function downloadFile(
|
|
|
463
539
|
// S3 lowercases user-metadata keys on read (and sometimes on
|
|
464
540
|
// write), so the lookup uses the lowercased form. We don't
|
|
465
541
|
// normalize Metadata keys ourselves — the AWS SDK already does it.
|
|
466
|
-
const symlinkMarker =
|
|
542
|
+
const symlinkMarker = metadata?.[SYMLINK_TARGET_META_KEY];
|
|
467
543
|
const isSymlinkRecord =
|
|
468
544
|
typeof symlinkMarker === "string" && symlinkMarker.length > 0;
|
|
469
545
|
if (isSymlinkRecord) {
|
|
470
|
-
//
|
|
471
|
-
// are bounded by target length (typically
|
|
472
|
-
// relative paths, hard-capped by S3's 5 GB object
|
|
473
|
-
//
|
|
474
|
-
|
|
475
|
-
// over a tree with many symlinks can stall or pool-exhaust.
|
|
476
|
-
const chunks: Buffer[] = [];
|
|
477
|
-
const stream = response.Body as AsyncIterable<Uint8Array>;
|
|
478
|
-
for await (const chunk of stream) {
|
|
479
|
-
chunks.push(Buffer.from(chunk));
|
|
480
|
-
}
|
|
481
|
-
const bodyString = Buffer.concat(chunks).toString("utf-8");
|
|
546
|
+
// The target lives in the body (marker-only metadata convention).
|
|
547
|
+
// Symlink record bodies are bounded by target length (typically
|
|
548
|
+
// <300 bytes for relative paths, hard-capped by S3's 5 GB object
|
|
549
|
+
// size); the transport already buffered it.
|
|
550
|
+
const bodyString = objectBody.toString("utf-8");
|
|
482
551
|
|
|
483
552
|
let symlinkTarget: string;
|
|
484
553
|
if (bodyString.startsWith(SYMLINK_BODY_PREFIX)) {
|
|
@@ -515,7 +584,7 @@ export async function downloadFile(
|
|
|
515
584
|
}
|
|
516
585
|
}
|
|
517
586
|
fs.symlinkSync(symlinkTarget, localPath);
|
|
518
|
-
return { metadata
|
|
587
|
+
return { metadata };
|
|
519
588
|
}
|
|
520
589
|
|
|
521
590
|
// Symmetric to the symlink branch above: when a key was previously a
|
|
@@ -542,12 +611,7 @@ export async function downloadFile(
|
|
|
542
611
|
}
|
|
543
612
|
}
|
|
544
613
|
|
|
545
|
-
|
|
546
|
-
const stream = response.Body as AsyncIterable<Uint8Array>;
|
|
547
|
-
for await (const chunk of stream) {
|
|
548
|
-
chunks.push(Buffer.from(chunk));
|
|
549
|
-
}
|
|
550
|
-
fs.writeFileSync(localPath, Buffer.concat(chunks));
|
|
614
|
+
fs.writeFileSync(localPath, objectBody);
|
|
551
615
|
|
|
552
616
|
// Bug #5 — apply source-side mode after the byte write. See
|
|
553
617
|
// FILE_MODE_META_KEY for the metadata contract. Parses defensively:
|
|
@@ -563,7 +627,7 @@ export async function downloadFile(
|
|
|
563
627
|
// requires 1–4 pure octal digits (`[0-7]{1,4}$`), which matches what
|
|
564
628
|
// the upload side stamps (`(mode & 0o777).toString(8)` → at most
|
|
565
629
|
// three digits, all 0–7) and rejects everything else.
|
|
566
|
-
const modeOctal =
|
|
630
|
+
const modeOctal = metadata?.[FILE_MODE_META_KEY];
|
|
567
631
|
if (typeof modeOctal === "string" && /^[0-7]{1,4}$/.test(modeOctal)) {
|
|
568
632
|
const parsed = parseInt(modeOctal, 8);
|
|
569
633
|
if (Number.isFinite(parsed) && parsed >= 0 && parsed <= 0o777) {
|
|
@@ -602,7 +666,7 @@ export async function downloadFile(
|
|
|
602
666
|
// similarly lstats after downloadFile). If a future caller stamps the
|
|
603
667
|
// journal BEFORE downloadFile completes, the fast-path will stale and
|
|
604
668
|
// re-hash every sync forever — keep the call-site invariant intact.
|
|
605
|
-
const mtimeRaw =
|
|
669
|
+
const mtimeRaw = metadata?.[FILE_MTIME_META_KEY];
|
|
606
670
|
if (typeof mtimeRaw === "string" && /^-?[0-9]{1,16}$/.test(mtimeRaw)) {
|
|
607
671
|
const mtimeMs = parseInt(mtimeRaw, 10);
|
|
608
672
|
if (Number.isFinite(mtimeMs)) {
|
|
@@ -627,7 +691,7 @@ export async function downloadFile(
|
|
|
627
691
|
// distinct creation time, so a future receiver upgrade picks it up
|
|
628
692
|
// automatically without a server-side data migration.
|
|
629
693
|
|
|
630
|
-
return { metadata
|
|
694
|
+
return { metadata };
|
|
631
695
|
}
|
|
632
696
|
|
|
633
697
|
export interface RemoteFile {
|
|
@@ -641,26 +705,14 @@ export async function listRemoteFiles(
|
|
|
641
705
|
ctx: EntityContext,
|
|
642
706
|
prefix?: string,
|
|
643
707
|
): Promise<RemoteFile[]> {
|
|
644
|
-
const
|
|
708
|
+
const io = resolveObjectIO(ctx);
|
|
645
709
|
const files: RemoteFile[] = [];
|
|
646
710
|
let continuationToken: string | undefined;
|
|
647
711
|
|
|
648
712
|
do {
|
|
649
|
-
const
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
Prefix: prefix,
|
|
653
|
-
ContinuationToken: continuationToken,
|
|
654
|
-
}),
|
|
655
|
-
);
|
|
656
|
-
|
|
657
|
-
for (const obj of response.Contents || []) {
|
|
658
|
-
// Pre-fix this guard was `!obj.Key || !obj.Size`. The `!obj.Size` test
|
|
659
|
-
// is truthy when Size === 0 (a real 0-byte object like `.gitkeep`),
|
|
660
|
-
// silently filtering legitimate placeholder files out of every pull
|
|
661
|
-
// plan. Narrow the guard to "no key" only; surface real 0-byte
|
|
662
|
-
// objects to the planner.
|
|
663
|
-
if (!obj.Key) continue;
|
|
713
|
+
const page = await io.listObjects({ prefix, continuationToken });
|
|
714
|
+
|
|
715
|
+
for (const obj of page.objects) {
|
|
664
716
|
// Drop S3 directory-marker objects: the canonical shape is `0-byte
|
|
665
717
|
// key ending in '/'` (S3 console "Create folder", `aws s3 sync` of
|
|
666
718
|
// empty dirs, sync tools that mirror empty trees). Two downstream
|
|
@@ -670,29 +722,29 @@ export async function listRemoteFiles(
|
|
|
670
722
|
// → EISDIR "open" after the parent mkdir creates the leaf as a
|
|
671
723
|
// directory). Filtering here eliminates both.
|
|
672
724
|
//
|
|
673
|
-
// Narrow on
|
|
725
|
+
// Narrow on size===0 (not just trailing-slash) so a hypothetical
|
|
674
726
|
// non-empty object whose key happens to end in '/' is NOT silently
|
|
675
727
|
// hidden — it stays visible and downloadFile surfaces the same
|
|
676
728
|
// EISDIR "open" error pointing at the specific key, which is the
|
|
677
729
|
// signal an operator needs to reconcile the bucket. The vault
|
|
678
730
|
// service doesn't have a code path that produces such an object,
|
|
679
|
-
// but
|
|
731
|
+
// but the listing returns whatever lives in the bucket; silent
|
|
680
732
|
// drop would be worse than loud failure for that case.
|
|
681
733
|
//
|
|
682
734
|
// Real 0-byte placeholders like `.gitkeep` never end in `/` and
|
|
683
735
|
// continue to flow through — the 5.13.0 `.gitkeep` regression
|
|
684
|
-
// remains fixed.
|
|
685
|
-
if (obj.
|
|
736
|
+
// remains fixed. (The `!key` guard now lives in the ObjectIO layer.)
|
|
737
|
+
if (obj.key.endsWith("/") && obj.size === 0) continue;
|
|
686
738
|
|
|
687
739
|
files.push({
|
|
688
|
-
key: obj.
|
|
689
|
-
size: obj.
|
|
690
|
-
lastModified: obj.
|
|
691
|
-
etag: obj.
|
|
740
|
+
key: obj.key,
|
|
741
|
+
size: obj.size,
|
|
742
|
+
lastModified: obj.lastModified,
|
|
743
|
+
etag: obj.etag,
|
|
692
744
|
});
|
|
693
745
|
}
|
|
694
746
|
|
|
695
|
-
continuationToken =
|
|
747
|
+
continuationToken = page.nextContinuationToken;
|
|
696
748
|
} while (continuationToken);
|
|
697
749
|
|
|
698
750
|
return files;
|
|
@@ -702,14 +754,7 @@ export async function deleteRemoteFile(
|
|
|
702
754
|
ctx: EntityContext,
|
|
703
755
|
key: string,
|
|
704
756
|
): Promise<void> {
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
await client.send(
|
|
708
|
-
new DeleteObjectCommand({
|
|
709
|
-
Bucket: ctx.bucketName,
|
|
710
|
-
Key: key,
|
|
711
|
-
}),
|
|
712
|
-
);
|
|
757
|
+
await resolveObjectIO(ctx).deleteObject(key);
|
|
713
758
|
}
|
|
714
759
|
|
|
715
760
|
/**
|
|
@@ -719,26 +764,7 @@ export async function headRemoteFile(
|
|
|
719
764
|
ctx: EntityContext,
|
|
720
765
|
key: string,
|
|
721
766
|
): Promise<{ lastModified: Date; etag: string; size: number; metadata?: Record<string, string> } | null> {
|
|
722
|
-
|
|
723
|
-
try {
|
|
724
|
-
const response = await client.send(
|
|
725
|
-
new HeadObjectCommand({
|
|
726
|
-
Bucket: ctx.bucketName,
|
|
727
|
-
Key: key,
|
|
728
|
-
}),
|
|
729
|
-
);
|
|
730
|
-
return {
|
|
731
|
-
lastModified: response.LastModified || new Date(),
|
|
732
|
-
etag: response.ETag || "",
|
|
733
|
-
size: response.ContentLength || 0,
|
|
734
|
-
metadata: response.Metadata,
|
|
735
|
-
};
|
|
736
|
-
} catch (err: unknown) {
|
|
737
|
-
if (err && typeof err === "object" && "name" in err && err.name === "NotFound") {
|
|
738
|
-
return null;
|
|
739
|
-
}
|
|
740
|
-
throw err;
|
|
741
|
-
}
|
|
767
|
+
return resolveObjectIO(ctx).headObject(key);
|
|
742
768
|
}
|
|
743
769
|
|
|
744
770
|
function getMimeType(filePath: string): string {
|