@indigoai-us/hq-cloud 5.17.0 → 5.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/s3.ts CHANGED
@@ -73,6 +73,102 @@ function buildAuthorMetadata(
73
73
  return meta;
74
74
  }
75
75
 
76
+ /**
77
+ * S3 user-metadata header that marks an object as a symlink record.
78
+ * The value is now an OPAQUE MARKER ('1') — the target lives in the
79
+ * object body. Earlier drafts of this feature stored the target in
80
+ * metadata (raw, then base64), but S3 user-metadata is HTTP-header-
81
+ * bound: total ≤ 2 KiB across all user-defined keys + values. A
82
+ * sufficiently long POSIX target (or one with author-metadata
83
+ * adding to the total) would exceed the limit and PutObject would
84
+ * reject the upload outright. Moving the target to the body — which
85
+ * has no such limit — makes target length bounded only by S3's 5 GB
86
+ * object size cap. The metadata header still serves as the read-
87
+ * time discriminator (cheaper than peeking at body bytes via HEAD).
88
+ *
89
+ * Backward compat: downloadFile prefers the body (sliced after
90
+ * SYMLINK_BODY_PREFIX) for the target string. If the body doesn't
91
+ * carry the prefix (a legacy upload from earlier in this PR's
92
+ * lifetime), it falls back to base64-decoding the metadata value —
93
+ * the round-trip-validating decoder returns raw or decoded as
94
+ * appropriate. Any prior in-flight upload still resolves correctly.
95
+ */
96
+ export const SYMLINK_TARGET_META_KEY = "hq-symlink-target";
97
+
98
+ /**
99
+ * Constant value written to SYMLINK_TARGET_META_KEY. Any non-empty
100
+ * string would work as a discriminator — '1' is just compact and
101
+ * conventional for boolean flags in HTTP headers.
102
+ */
103
+ export const SYMLINK_MARKER_META_VALUE = "1";
104
+
105
+ /**
106
+ * Encode a target for the S3 metadata header value. Retained as the
107
+ * legacy encoder so a downloader can still receive it and round-trip
108
+ * via decodeSymlinkMetadataValue, but new uploads use
109
+ * SYMLINK_MARKER_META_VALUE — the target lives in the body now.
110
+ */
111
+ export function encodeSymlinkMetadataValue(target: string): string {
112
+ return Buffer.from(target, "utf-8").toString("base64");
113
+ }
114
+
115
+ /**
116
+ * Decode a target from the S3 metadata header value. Used as a
117
+ * legacy fallback when the body doesn't carry SYMLINK_BODY_PREFIX
118
+ * (i.e. an in-flight upload from earlier in this PR before the
119
+ * marker-only metadata convention). Round-trip-validates: if the
120
+ * value isn't valid base64 of UTF-8, returns the raw string.
121
+ */
122
+ export function decodeSymlinkMetadataValue(value: string): string {
123
+ try {
124
+ const decoded = Buffer.from(value, "base64").toString("utf-8");
125
+ if (Buffer.from(decoded, "utf-8").toString("base64") === value) {
126
+ return decoded;
127
+ }
128
+ } catch {
129
+ // fall through
130
+ }
131
+ return value;
132
+ }
133
+
134
+ /**
135
+ * Magic prefix prepended to symlink-record bodies on the wire. Two
136
+ * properties this gives us:
137
+ *
138
+ * 1. ETag distinguishability. S3 ETag = MD5(body). Without a prefix,
139
+ * a symlink whose target string equals some regular file's exact
140
+ * contents would produce the same ETag, and the LIST-based pull
141
+ * planner (which can't see per-object metadata) would classify a
142
+ * symlink ↔ regular-file transition as "no change" and never
143
+ * replace the local representation. The prefix makes those two
144
+ * shapes ETag-distinguishable for the realistic case (collision
145
+ * now requires a regular file whose contents *start* with this
146
+ * prefix, which is implausible for any non-malicious source).
147
+ *
148
+ * 2. Fallback discriminator. If user-metadata is ever lost (S3
149
+ * cross-region replication of object data only, manual S3 console
150
+ * copy that drops Metadata), the body prefix lets a downloader
151
+ * recover the symlink record without needing the metadata header.
152
+ * We don't currently rely on this fallback — the metadata header
153
+ * is still the primary discriminator on the read path — but the
154
+ * prefix keeps the option open and avoids painting us into a
155
+ * "metadata is the only signal" corner.
156
+ *
157
+ * Format: `hq-symlink:` + target string (UTF-8 bytes). No trailing
158
+ * newline. The colon separates the marker from the target so a future
159
+ * extension can encode additional fields if needed.
160
+ */
161
+ export const SYMLINK_BODY_PREFIX = "hq-symlink:";
162
+
163
+ /**
164
+ * Encode/decode the symlink wire body. Kept as exported helpers so the
165
+ * format is centrally defined and tests can probe both sides without
166
+ * duplicating the prefix string.
167
+ */
168
+ export function encodeSymlinkBody(target: string): Buffer {
169
+ return Buffer.from(SYMLINK_BODY_PREFIX + target, "utf-8");
170
+ }
171
+
76
172
  export async function uploadFile(
77
173
  ctx: EntityContext,
78
174
  localPath: string,
@@ -117,6 +213,72 @@ export async function uploadFile(
117
213
  return { etag: response.ETag || "" };
118
214
  }
119
215
 
216
+ /**
217
+ * Upload a symlink as a zero-byte object whose user metadata carries the
218
+ * link's target string. Mirrors uploadFile's signature so callers can pick
219
+ * the right primitive once they've classified the entry as link vs file.
220
+ *
221
+ * The target string is stored verbatim — whatever fs.readlinkSync returned.
222
+ * Relative targets transfer cleanly across machines; absolute targets are
223
+ * preserved as-is and may be broken on a destination that doesn't share
224
+ * the source's $HOME layout. Cross-machine portability of absolute targets
225
+ * is out of scope for this primitive — the policy decision lives in the
226
+ * caller (currently: upload anyway, never silently rewrite).
227
+ */
228
+ export async function uploadSymlink(
229
+ ctx: EntityContext,
230
+ target: string,
231
+ key: string,
232
+ author?: UploadAuthor,
233
+ ): Promise<{ etag: string }> {
234
+ const client = buildClient(ctx);
235
+
236
+ // Same created-at preservation logic as uploadFile so the hq-console
237
+ // NEW-pill ageing window doesn't reset when a symlink is re-uploaded
238
+ // unchanged across syncs.
239
+ let createdAt = new Date().toISOString();
240
+ if (author) {
241
+ try {
242
+ const head = await client.send(
243
+ new HeadObjectCommand({ Bucket: ctx.bucketName, Key: key }),
244
+ );
245
+ const existing = head.Metadata?.["created-at"];
246
+ if (typeof existing === "string" && existing.length > 0) {
247
+ createdAt = existing;
248
+ }
249
+ } catch {
250
+ // First upload of this key, or HEAD denied — keep `now`.
251
+ }
252
+ }
253
+
254
+ const Metadata: Record<string, string> = {
255
+ // Marker-only: a constant flag value, not the target. The body
256
+ // is the source of truth for the target (no 2 KiB cap, no
257
+ // header-encoding considerations). See SYMLINK_TARGET_META_KEY
258
+ // doc for the full reasoning.
259
+ [SYMLINK_TARGET_META_KEY]: SYMLINK_MARKER_META_VALUE,
260
+ ...(author ? buildAuthorMetadata(author, createdAt) : {}),
261
+ };
262
+
263
+ const response = await client.send(
264
+ new PutObjectCommand({
265
+ Bucket: ctx.bucketName,
266
+ Key: key,
267
+ // Body = SYMLINK_BODY_PREFIX + target (UTF-8). The prefix is what
268
+ // makes a symlink record's ETag distinguishable from a regular
269
+ // file whose contents happen to equal the target string — the
270
+ // LIST-based pull planner can't see per-object metadata, so ETag
271
+ // is its only drift signal across symlink ↔ regular-file
272
+ // transitions. See SYMLINK_BODY_PREFIX doc above.
273
+ Body: encodeSymlinkBody(target),
274
+ ContentType: "application/octet-stream",
275
+ Metadata,
276
+ }),
277
+ );
278
+
279
+ return { etag: response.ETag || "" };
280
+ }
281
+
120
282
  export async function downloadFile(
121
283
  ctx: EntityContext,
122
284
  key: string,
@@ -140,6 +302,93 @@ export async function downloadFile(
140
302
  fs.mkdirSync(dir, { recursive: true });
141
303
  }
142
304
 
305
+ // Symlink path: presence of SYMLINK_TARGET_META_KEY (any non-empty
306
+ // value) is the discriminator. The TARGET is now sourced from the
307
+ // body — the marker-only metadata convention removes the 2 KiB
308
+ // header limit so long POSIX targets don't fail PutObject.
309
+ //
310
+ // S3 lowercases user-metadata keys on read (and sometimes on
311
+ // write), so the lookup uses the lowercased form. We don't
312
+ // normalize Metadata keys ourselves — the AWS SDK already does it.
313
+ const symlinkMarker = response.Metadata?.[SYMLINK_TARGET_META_KEY];
314
+ const isSymlinkRecord =
315
+ typeof symlinkMarker === "string" && symlinkMarker.length > 0;
316
+ if (isSymlinkRecord) {
317
+ // Consume the body to extract the target. Symlink record bodies
318
+ // are bounded by target length (typically <300 bytes for
319
+ // relative paths, hard-capped by S3's 5 GB object size); the
320
+ // read is cheap. Drain explicitly so the SDK's HTTP socket is
321
+ // released back to the connection pool — without this, a sync
322
+ // over a tree with many symlinks can stall or pool-exhaust.
323
+ const chunks: Buffer[] = [];
324
+ const stream = response.Body as AsyncIterable<Uint8Array>;
325
+ for await (const chunk of stream) {
326
+ chunks.push(Buffer.from(chunk));
327
+ }
328
+ const bodyString = Buffer.concat(chunks).toString("utf-8");
329
+
330
+ let symlinkTarget: string;
331
+ if (bodyString.startsWith(SYMLINK_BODY_PREFIX)) {
332
+ symlinkTarget = bodyString.slice(SYMLINK_BODY_PREFIX.length);
333
+ } else {
334
+ // Backward-compat fallback: a legacy upload from earlier in
335
+ // this PR's lifetime stored the target in metadata (raw or
336
+ // base64) rather than the body. decodeSymlinkMetadataValue
337
+ // round-trip-validates so a raw value passes through and a
338
+ // base64 value decodes; either way we get the target.
339
+ symlinkTarget = decodeSymlinkMetadataValue(symlinkMarker);
340
+ }
341
+
342
+ if (symlinkTarget.length === 0) {
343
+ throw new Error(
344
+ `Symlink record for ${key} had no target (body: ${bodyString.length} bytes, marker: ${symlinkMarker})`,
345
+ );
346
+ }
347
+
348
+ // Replace whatever's at localPath. unlink covers regular files; for
349
+ // a stale symlink this also frees the slot. ENOENT is fine — first
350
+ // download of this key has nothing to clear. Other errors propagate
351
+ // because they signal a real problem (permissions, parent missing).
352
+ try {
353
+ fs.unlinkSync(localPath);
354
+ } catch (err: unknown) {
355
+ if (
356
+ !err ||
357
+ typeof err !== "object" ||
358
+ !("code" in err) ||
359
+ (err as { code?: string }).code !== "ENOENT"
360
+ ) {
361
+ throw err;
362
+ }
363
+ }
364
+ fs.symlinkSync(symlinkTarget, localPath);
365
+ return;
366
+ }
367
+
368
+ // Symmetric to the symlink branch above: when a key was previously a
369
+ // symlink and is later replaced in S3 by a regular object, the local
370
+ // path still holds the stale symlink from the last sync. Without this
371
+ // unlink, fs.writeFileSync follows the link and overwrites its
372
+ // target file's contents — leaving the link in place and the new
373
+ // regular object never materializing at the intended path. lstat
374
+ // (not statSync) avoids following the link to test what's there.
375
+ try {
376
+ const existing = fs.lstatSync(localPath);
377
+ if (existing.isSymbolicLink()) {
378
+ fs.unlinkSync(localPath);
379
+ }
380
+ } catch (err: unknown) {
381
+ // ENOENT means nothing's there; let writeFileSync handle creation.
382
+ if (
383
+ err &&
384
+ typeof err === "object" &&
385
+ "code" in err &&
386
+ (err as { code?: string }).code !== "ENOENT"
387
+ ) {
388
+ throw err;
389
+ }
390
+ }
391
+
143
392
  const chunks: Buffer[] = [];
144
393
  const stream = response.Body as AsyncIterable<Uint8Array>;
145
394
  for await (const chunk of stream) {
@@ -179,6 +428,28 @@ export async function listRemoteFiles(
179
428
  // plan. Narrow the guard to "no key" only; surface real 0-byte
180
429
  // objects to the planner.
181
430
  if (!obj.Key) continue;
431
+ // Drop S3 directory-marker objects: the canonical shape is `0-byte
432
+ // key ending in '/'` (S3 console "Create folder", `aws s3 sync` of
433
+ // empty dirs, sync tools that mirror empty trees). Two downstream
434
+ // sites blow up on them — pull planner (sync.ts: `hashFile` calls
435
+ // `readFileSync` on an existing local dir → EISDIR "read") and the
436
+ // download path (s3.ts: `writeFileSync` on a trailing-slash path
437
+ // → EISDIR "open" after the parent mkdir creates the leaf as a
438
+ // directory). Filtering here eliminates both.
439
+ //
440
+ // Narrow on Size===0 (not just trailing-slash) so a hypothetical
441
+ // non-empty object whose key happens to end in '/' is NOT silently
442
+ // hidden — it stays visible and downloadFile surfaces the same
443
+ // EISDIR "open" error pointing at the specific key, which is the
444
+ // signal an operator needs to reconcile the bucket. The vault
445
+ // service doesn't have a code path that produces such an object,
446
+ // but ListObjectsV2 returns whatever lives in the bucket; silent
447
+ // drop would be worse than loud failure for that case.
448
+ //
449
+ // Real 0-byte placeholders like `.gitkeep` never end in `/` and
450
+ // continue to flow through — the 5.13.0 `.gitkeep` regression
451
+ // remains fixed.
452
+ if (obj.Key.endsWith("/") && (obj.Size ?? 0) === 0) continue;
182
453
 
183
454
  files.push({
184
455
  key: obj.Key,
package/tsconfig.json CHANGED
@@ -1,6 +1,17 @@
1
1
  {
2
- "extends": "../../tsconfig.base.json",
3
2
  "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ESNext",
5
+ "moduleResolution": "bundler",
6
+ "lib": ["ES2022"],
7
+ "strict": true,
8
+ "esModuleInterop": true,
9
+ "skipLibCheck": true,
10
+ "forceConsistentCasingInFileNames": true,
11
+ "declaration": true,
12
+ "declarationMap": true,
13
+ "sourceMap": true,
14
+ "resolveJsonModule": true,
4
15
  "outDir": "dist",
5
16
  "rootDir": "src"
6
17
  },