@indigoai-us/hq-cloud 5.16.0 → 5.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +19 -0
- package/.github/workflows/publish.yml +53 -0
- package/dist/cli/share.d.ts +28 -0
- package/dist/cli/share.d.ts.map +1 -1
- package/dist/cli/share.js +227 -24
- package/dist/cli/share.js.map +1 -1
- package/dist/cli/share.test.js +414 -2
- package/dist/cli/share.test.js.map +1 -1
- package/dist/cli/sync.d.ts.map +1 -1
- package/dist/cli/sync.js +98 -17
- package/dist/cli/sync.js.map +1 -1
- package/dist/cli/sync.test.js +302 -0
- package/dist/cli/sync.test.js.map +1 -1
- package/dist/journal.d.ts +26 -0
- package/dist/journal.d.ts.map +1 -1
- package/dist/journal.js +31 -0
- package/dist/journal.js.map +1 -1
- package/dist/s3.d.ts +91 -0
- package/dist/s3.d.ts.map +1 -1
- package/dist/s3.js +245 -0
- package/dist/s3.js.map +1 -1
- package/dist/s3.test.js +347 -1
- package/dist/s3.test.js.map +1 -1
- package/package.json +1 -1
- package/src/cli/share.test.ts +510 -2
- package/src/cli/share.ts +305 -28
- package/src/cli/sync.test.ts +345 -0
- package/src/cli/sync.ts +133 -24
- package/src/journal.ts +33 -0
- package/src/s3.test.ts +415 -1
- package/src/s3.ts +271 -0
- package/tsconfig.json +12 -1
package/src/s3.ts
CHANGED
|
@@ -73,6 +73,102 @@ function buildAuthorMetadata(
|
|
|
73
73
|
return meta;
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
/**
|
|
77
|
+
* S3 user-metadata header that marks an object as a symlink record.
|
|
78
|
+
* The value is now an OPAQUE MARKER ('1') — the target lives in the
|
|
79
|
+
* object body. Earlier drafts of this feature stored the target in
|
|
80
|
+
* metadata (raw, then base64), but S3 user-metadata is HTTP-header-
|
|
81
|
+
* bound: total ≤ 2 KiB across all user-defined keys + values. A
|
|
82
|
+
* sufficiently long POSIX target (or one with author-metadata
|
|
83
|
+
* adding to the total) would exceed the limit and PutObject would
|
|
84
|
+
* reject the upload outright. Moving the target to the body — which
|
|
85
|
+
* has no such limit — makes target length bounded only by S3's 5 GB
|
|
86
|
+
* object size cap. The metadata header still serves as the read-
|
|
87
|
+
* time discriminator (cheaper than peeking at body bytes via HEAD).
|
|
88
|
+
*
|
|
89
|
+
* Backward compat: downloadFile prefers the body (sliced after
|
|
90
|
+
* SYMLINK_BODY_PREFIX) for the target string. If the body doesn't
|
|
91
|
+
* carry the prefix (a legacy upload from earlier in this PR's
|
|
92
|
+
* lifetime), it falls back to base64-decoding the metadata value —
|
|
93
|
+
* the round-trip-validating decoder returns raw or decoded as
|
|
94
|
+
* appropriate. Any prior in-flight upload still resolves correctly.
|
|
95
|
+
*/
|
|
96
|
+
export const SYMLINK_TARGET_META_KEY = "hq-symlink-target";
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Constant value written to SYMLINK_TARGET_META_KEY. Any non-empty
|
|
100
|
+
* string would work as a discriminator — '1' is just compact and
|
|
101
|
+
* conventional for boolean flags in HTTP headers.
|
|
102
|
+
*/
|
|
103
|
+
export const SYMLINK_MARKER_META_VALUE = "1";
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Encode a target for the S3 metadata header value. Retained as the
|
|
107
|
+
* legacy encoder so a downloader can still receive it and round-trip
|
|
108
|
+
* via decodeSymlinkMetadataValue, but new uploads use
|
|
109
|
+
* SYMLINK_MARKER_META_VALUE — the target lives in the body now.
|
|
110
|
+
*/
|
|
111
|
+
export function encodeSymlinkMetadataValue(target: string): string {
|
|
112
|
+
return Buffer.from(target, "utf-8").toString("base64");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Decode a target from the S3 metadata header value. Used as a
|
|
117
|
+
* legacy fallback when the body doesn't carry SYMLINK_BODY_PREFIX
|
|
118
|
+
* (i.e. an in-flight upload from earlier in this PR before the
|
|
119
|
+
* marker-only metadata convention). Round-trip-validates: if the
|
|
120
|
+
* value isn't valid base64 of UTF-8, returns the raw string.
|
|
121
|
+
*/
|
|
122
|
+
export function decodeSymlinkMetadataValue(value: string): string {
|
|
123
|
+
try {
|
|
124
|
+
const decoded = Buffer.from(value, "base64").toString("utf-8");
|
|
125
|
+
if (Buffer.from(decoded, "utf-8").toString("base64") === value) {
|
|
126
|
+
return decoded;
|
|
127
|
+
}
|
|
128
|
+
} catch {
|
|
129
|
+
// fall through
|
|
130
|
+
}
|
|
131
|
+
return value;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Magic prefix prepended to symlink-record bodies on the wire. Two
|
|
136
|
+
* properties this gives us:
|
|
137
|
+
*
|
|
138
|
+
* 1. ETag distinguishability. S3 ETag = MD5(body). Without a prefix,
|
|
139
|
+
* a symlink whose target string equals some regular file's exact
|
|
140
|
+
* contents would produce the same ETag, and the LIST-based pull
|
|
141
|
+
* planner (which can't see per-object metadata) would classify a
|
|
142
|
+
* symlink ↔ regular-file transition as "no change" and never
|
|
143
|
+
* replace the local representation. The prefix makes those two
|
|
144
|
+
* shapes ETag-distinguishable for the realistic case (collision
|
|
145
|
+
* now requires a regular file whose contents *start* with this
|
|
146
|
+
* prefix, which is implausible for any non-malicious source).
|
|
147
|
+
*
|
|
148
|
+
* 2. Fallback discriminator. If user-metadata is ever lost (S3
|
|
149
|
+
* cross-region replication of object data only, manual S3 console
|
|
150
|
+
* copy that drops Metadata), the body prefix lets a downloader
|
|
151
|
+
* recover the symlink record without needing the metadata header.
|
|
152
|
+
* We don't currently rely on this fallback — the metadata header
|
|
153
|
+
* is still the primary discriminator on the read path — but the
|
|
154
|
+
* prefix keeps the option open and avoids painting us into a
|
|
155
|
+
* "metadata is the only signal" corner.
|
|
156
|
+
*
|
|
157
|
+
* Format: `hq-symlink:` + target string (UTF-8 bytes). No trailing
|
|
158
|
+
* newline. The colon separates the marker from the target so a future
|
|
159
|
+
* extension can encode additional fields if needed.
|
|
160
|
+
*/
|
|
161
|
+
export const SYMLINK_BODY_PREFIX = "hq-symlink:";
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Encode/decode the symlink wire body. Kept as exported helpers so the
|
|
165
|
+
* format is centrally defined and tests can probe both sides without
|
|
166
|
+
* duplicating the prefix string.
|
|
167
|
+
*/
|
|
168
|
+
export function encodeSymlinkBody(target: string): Buffer {
|
|
169
|
+
return Buffer.from(SYMLINK_BODY_PREFIX + target, "utf-8");
|
|
170
|
+
}
|
|
171
|
+
|
|
76
172
|
export async function uploadFile(
|
|
77
173
|
ctx: EntityContext,
|
|
78
174
|
localPath: string,
|
|
@@ -117,6 +213,72 @@ export async function uploadFile(
|
|
|
117
213
|
return { etag: response.ETag || "" };
|
|
118
214
|
}
|
|
119
215
|
|
|
216
|
+
/**
|
|
217
|
+
* Upload a symlink as a zero-byte object whose user metadata carries the
|
|
218
|
+
* link's target string. Mirrors uploadFile's signature so callers can pick
|
|
219
|
+
* the right primitive once they've classified the entry as link vs file.
|
|
220
|
+
*
|
|
221
|
+
* The target string is stored verbatim — whatever fs.readlinkSync returned.
|
|
222
|
+
* Relative targets transfer cleanly across machines; absolute targets are
|
|
223
|
+
* preserved as-is and may be broken on a destination that doesn't share
|
|
224
|
+
* the source's $HOME layout. Cross-machine portability of absolute targets
|
|
225
|
+
* is out of scope for this primitive — the policy decision lives in the
|
|
226
|
+
* caller (currently: upload anyway, never silently rewrite).
|
|
227
|
+
*/
|
|
228
|
+
export async function uploadSymlink(
|
|
229
|
+
ctx: EntityContext,
|
|
230
|
+
target: string,
|
|
231
|
+
key: string,
|
|
232
|
+
author?: UploadAuthor,
|
|
233
|
+
): Promise<{ etag: string }> {
|
|
234
|
+
const client = buildClient(ctx);
|
|
235
|
+
|
|
236
|
+
// Same created-at preservation logic as uploadFile so the hq-console
|
|
237
|
+
// NEW-pill ageing window doesn't reset when a symlink is re-uploaded
|
|
238
|
+
// unchanged across syncs.
|
|
239
|
+
let createdAt = new Date().toISOString();
|
|
240
|
+
if (author) {
|
|
241
|
+
try {
|
|
242
|
+
const head = await client.send(
|
|
243
|
+
new HeadObjectCommand({ Bucket: ctx.bucketName, Key: key }),
|
|
244
|
+
);
|
|
245
|
+
const existing = head.Metadata?.["created-at"];
|
|
246
|
+
if (typeof existing === "string" && existing.length > 0) {
|
|
247
|
+
createdAt = existing;
|
|
248
|
+
}
|
|
249
|
+
} catch {
|
|
250
|
+
// First upload of this key, or HEAD denied — keep `now`.
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const Metadata: Record<string, string> = {
|
|
255
|
+
// Marker-only: a constant flag value, not the target. The body
|
|
256
|
+
// is the source of truth for the target (no 2 KiB cap, no
|
|
257
|
+
// header-encoding considerations). See SYMLINK_TARGET_META_KEY
|
|
258
|
+
// doc for the full reasoning.
|
|
259
|
+
[SYMLINK_TARGET_META_KEY]: SYMLINK_MARKER_META_VALUE,
|
|
260
|
+
...(author ? buildAuthorMetadata(author, createdAt) : {}),
|
|
261
|
+
};
|
|
262
|
+
|
|
263
|
+
const response = await client.send(
|
|
264
|
+
new PutObjectCommand({
|
|
265
|
+
Bucket: ctx.bucketName,
|
|
266
|
+
Key: key,
|
|
267
|
+
// Body = SYMLINK_BODY_PREFIX + target (UTF-8). The prefix is what
|
|
268
|
+
// makes a symlink record's ETag distinguishable from a regular
|
|
269
|
+
// file whose contents happen to equal the target string — the
|
|
270
|
+
// LIST-based pull planner can't see per-object metadata, so ETag
|
|
271
|
+
// is its only drift signal across symlink ↔ regular-file
|
|
272
|
+
// transitions. See SYMLINK_BODY_PREFIX doc above.
|
|
273
|
+
Body: encodeSymlinkBody(target),
|
|
274
|
+
ContentType: "application/octet-stream",
|
|
275
|
+
Metadata,
|
|
276
|
+
}),
|
|
277
|
+
);
|
|
278
|
+
|
|
279
|
+
return { etag: response.ETag || "" };
|
|
280
|
+
}
|
|
281
|
+
|
|
120
282
|
export async function downloadFile(
|
|
121
283
|
ctx: EntityContext,
|
|
122
284
|
key: string,
|
|
@@ -140,6 +302,93 @@ export async function downloadFile(
|
|
|
140
302
|
fs.mkdirSync(dir, { recursive: true });
|
|
141
303
|
}
|
|
142
304
|
|
|
305
|
+
// Symlink path: presence of SYMLINK_TARGET_META_KEY (any non-empty
|
|
306
|
+
// value) is the discriminator. The TARGET is now sourced from the
|
|
307
|
+
// body — the marker-only metadata convention removes the 2 KiB
|
|
308
|
+
// header limit so long POSIX targets don't fail PutObject.
|
|
309
|
+
//
|
|
310
|
+
// S3 lowercases user-metadata keys on read (and sometimes on
|
|
311
|
+
// write), so the lookup uses the lowercased form. We don't
|
|
312
|
+
// normalize Metadata keys ourselves — the AWS SDK already does it.
|
|
313
|
+
const symlinkMarker = response.Metadata?.[SYMLINK_TARGET_META_KEY];
|
|
314
|
+
const isSymlinkRecord =
|
|
315
|
+
typeof symlinkMarker === "string" && symlinkMarker.length > 0;
|
|
316
|
+
if (isSymlinkRecord) {
|
|
317
|
+
// Consume the body to extract the target. Symlink record bodies
|
|
318
|
+
// are bounded by target length (typically <300 bytes for
|
|
319
|
+
// relative paths, hard-capped by S3's 5 GB object size); the
|
|
320
|
+
// read is cheap. Drain explicitly so the SDK's HTTP socket is
|
|
321
|
+
// released back to the connection pool — without this, a sync
|
|
322
|
+
// over a tree with many symlinks can stall or pool-exhaust.
|
|
323
|
+
const chunks: Buffer[] = [];
|
|
324
|
+
const stream = response.Body as AsyncIterable<Uint8Array>;
|
|
325
|
+
for await (const chunk of stream) {
|
|
326
|
+
chunks.push(Buffer.from(chunk));
|
|
327
|
+
}
|
|
328
|
+
const bodyString = Buffer.concat(chunks).toString("utf-8");
|
|
329
|
+
|
|
330
|
+
let symlinkTarget: string;
|
|
331
|
+
if (bodyString.startsWith(SYMLINK_BODY_PREFIX)) {
|
|
332
|
+
symlinkTarget = bodyString.slice(SYMLINK_BODY_PREFIX.length);
|
|
333
|
+
} else {
|
|
334
|
+
// Backward-compat fallback: a legacy upload from earlier in
|
|
335
|
+
// this PR's lifetime stored the target in metadata (raw or
|
|
336
|
+
// base64) rather than the body. decodeSymlinkMetadataValue
|
|
337
|
+
// round-trip-validates so a raw value passes through and a
|
|
338
|
+
// base64 value decodes; either way we get the target.
|
|
339
|
+
symlinkTarget = decodeSymlinkMetadataValue(symlinkMarker);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
if (symlinkTarget.length === 0) {
|
|
343
|
+
throw new Error(
|
|
344
|
+
`Symlink record for ${key} had no target (body: ${bodyString.length} bytes, marker: ${symlinkMarker})`,
|
|
345
|
+
);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// Replace whatever's at localPath. unlink covers regular files; for
|
|
349
|
+
// a stale symlink this also frees the slot. ENOENT is fine — first
|
|
350
|
+
// download of this key has nothing to clear. Other errors propagate
|
|
351
|
+
// because they signal a real problem (permissions, parent missing).
|
|
352
|
+
try {
|
|
353
|
+
fs.unlinkSync(localPath);
|
|
354
|
+
} catch (err: unknown) {
|
|
355
|
+
if (
|
|
356
|
+
!err ||
|
|
357
|
+
typeof err !== "object" ||
|
|
358
|
+
!("code" in err) ||
|
|
359
|
+
(err as { code?: string }).code !== "ENOENT"
|
|
360
|
+
) {
|
|
361
|
+
throw err;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
fs.symlinkSync(symlinkTarget, localPath);
|
|
365
|
+
return;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Symmetric to the symlink branch above: when a key was previously a
|
|
369
|
+
// symlink and is later replaced in S3 by a regular object, the local
|
|
370
|
+
// path still holds the stale symlink from the last sync. Without this
|
|
371
|
+
// unlink, fs.writeFileSync follows the link and overwrites its
|
|
372
|
+
// target file's contents — leaving the link in place and the new
|
|
373
|
+
// regular object never materializing at the intended path. lstat
|
|
374
|
+
// (not statSync) avoids following the link to test what's there.
|
|
375
|
+
try {
|
|
376
|
+
const existing = fs.lstatSync(localPath);
|
|
377
|
+
if (existing.isSymbolicLink()) {
|
|
378
|
+
fs.unlinkSync(localPath);
|
|
379
|
+
}
|
|
380
|
+
} catch (err: unknown) {
|
|
381
|
+
// ENOENT means nothing's there; let writeFileSync handle creation.
|
|
382
|
+
if (
|
|
383
|
+
err &&
|
|
384
|
+
typeof err === "object" &&
|
|
385
|
+
"code" in err &&
|
|
386
|
+
(err as { code?: string }).code !== "ENOENT"
|
|
387
|
+
) {
|
|
388
|
+
throw err;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
143
392
|
const chunks: Buffer[] = [];
|
|
144
393
|
const stream = response.Body as AsyncIterable<Uint8Array>;
|
|
145
394
|
for await (const chunk of stream) {
|
|
@@ -179,6 +428,28 @@ export async function listRemoteFiles(
|
|
|
179
428
|
// plan. Narrow the guard to "no key" only; surface real 0-byte
|
|
180
429
|
// objects to the planner.
|
|
181
430
|
if (!obj.Key) continue;
|
|
431
|
+
// Drop S3 directory-marker objects: the canonical shape is `0-byte
|
|
432
|
+
// key ending in '/'` (S3 console "Create folder", `aws s3 sync` of
|
|
433
|
+
// empty dirs, sync tools that mirror empty trees). Two downstream
|
|
434
|
+
// sites blow up on them — pull planner (sync.ts: `hashFile` calls
|
|
435
|
+
// `readFileSync` on an existing local dir → EISDIR "read") and the
|
|
436
|
+
// download path (s3.ts: `writeFileSync` on a trailing-slash path
|
|
437
|
+
// → EISDIR "open" after the parent mkdir creates the leaf as a
|
|
438
|
+
// directory). Filtering here eliminates both.
|
|
439
|
+
//
|
|
440
|
+
// Narrow on Size===0 (not just trailing-slash) so a hypothetical
|
|
441
|
+
// non-empty object whose key happens to end in '/' is NOT silently
|
|
442
|
+
// hidden — it stays visible and downloadFile surfaces the same
|
|
443
|
+
// EISDIR "open" error pointing at the specific key, which is the
|
|
444
|
+
// signal an operator needs to reconcile the bucket. The vault
|
|
445
|
+
// service doesn't have a code path that produces such an object,
|
|
446
|
+
// but ListObjectsV2 returns whatever lives in the bucket; silent
|
|
447
|
+
// drop would be worse than loud failure for that case.
|
|
448
|
+
//
|
|
449
|
+
// Real 0-byte placeholders like `.gitkeep` never end in `/` and
|
|
450
|
+
// continue to flow through — the 5.13.0 `.gitkeep` regression
|
|
451
|
+
// remains fixed.
|
|
452
|
+
if (obj.Key.endsWith("/") && (obj.Size ?? 0) === 0) continue;
|
|
182
453
|
|
|
183
454
|
files.push({
|
|
184
455
|
key: obj.Key,
|
package/tsconfig.json
CHANGED
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
{
|
|
2
|
-
"extends": "../../tsconfig.base.json",
|
|
3
2
|
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "ESNext",
|
|
5
|
+
"moduleResolution": "bundler",
|
|
6
|
+
"lib": ["ES2022"],
|
|
7
|
+
"strict": true,
|
|
8
|
+
"esModuleInterop": true,
|
|
9
|
+
"skipLibCheck": true,
|
|
10
|
+
"forceConsistentCasingInFileNames": true,
|
|
11
|
+
"declaration": true,
|
|
12
|
+
"declarationMap": true,
|
|
13
|
+
"sourceMap": true,
|
|
14
|
+
"resolveJsonModule": true,
|
|
4
15
|
"outDir": "dist",
|
|
5
16
|
"rootDir": "src"
|
|
6
17
|
},
|