@indigoai-us/hq-cloud 5.22.0 → 5.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/index.d.ts +9 -3
  2. package/dist/index.d.ts.map +1 -1
  3. package/dist/index.js +9 -1
  4. package/dist/index.js.map +1 -1
  5. package/dist/journal.d.ts +76 -1
  6. package/dist/journal.d.ts.map +1 -1
  7. package/dist/journal.js +148 -1
  8. package/dist/journal.js.map +1 -1
  9. package/dist/journal.test.js +251 -5
  10. package/dist/journal.test.js.map +1 -1
  11. package/dist/prefix-coalesce.d.ts +38 -0
  12. package/dist/prefix-coalesce.d.ts.map +1 -0
  13. package/dist/prefix-coalesce.js +69 -0
  14. package/dist/prefix-coalesce.js.map +1 -0
  15. package/dist/prefix-coalesce.test.d.ts +2 -0
  16. package/dist/prefix-coalesce.test.d.ts.map +1 -0
  17. package/dist/prefix-coalesce.test.js +77 -0
  18. package/dist/prefix-coalesce.test.js.map +1 -0
  19. package/dist/public-surface.test.d.ts +15 -0
  20. package/dist/public-surface.test.d.ts.map +1 -0
  21. package/dist/public-surface.test.js +105 -0
  22. package/dist/public-surface.test.js.map +1 -0
  23. package/dist/remote-pull.d.ts +145 -1
  24. package/dist/remote-pull.d.ts.map +1 -1
  25. package/dist/remote-pull.js +258 -1
  26. package/dist/remote-pull.js.map +1 -1
  27. package/dist/remote-pull.test.js +470 -2
  28. package/dist/remote-pull.test.js.map +1 -1
  29. package/dist/scope-shrink.d.ts +109 -0
  30. package/dist/scope-shrink.d.ts.map +1 -0
  31. package/dist/scope-shrink.js +196 -0
  32. package/dist/scope-shrink.js.map +1 -0
  33. package/dist/scope-shrink.test.d.ts +13 -0
  34. package/dist/scope-shrink.test.d.ts.map +1 -0
  35. package/dist/scope-shrink.test.js +342 -0
  36. package/dist/scope-shrink.test.js.map +1 -0
  37. package/dist/types.d.ts +48 -1
  38. package/dist/types.d.ts.map +1 -1
  39. package/dist/vault-client.d.ts +178 -0
  40. package/dist/vault-client.d.ts.map +1 -1
  41. package/dist/vault-client.js +73 -0
  42. package/dist/vault-client.js.map +1 -1
  43. package/dist/vault-client.test.js +226 -0
  44. package/dist/vault-client.test.js.map +1 -1
  45. package/package.json +1 -1
  46. package/src/index.ts +67 -0
  47. package/src/journal.test.ts +284 -5
  48. package/src/journal.ts +167 -2
  49. package/src/prefix-coalesce.test.ts +95 -0
  50. package/src/prefix-coalesce.ts +72 -0
  51. package/src/public-surface.test.ts +112 -0
  52. package/src/remote-pull.test.ts +540 -3
  53. package/src/remote-pull.ts +419 -2
  54. package/src/scope-shrink.test.ts +402 -0
  55. package/src/scope-shrink.ts +264 -0
  56. package/src/types.ts +49 -1
  57. package/src/vault-client.test.ts +335 -0
  58. package/src/vault-client.ts +223 -0
@@ -12,8 +12,31 @@
12
12
  * bidirectional auto-sync the Settings toggle exposes.
13
13
  */
14
14
  import type { RemoteFile } from "./s3.js";
15
- import { normalizeEtag } from "./journal.js";
16
- import type { SyncJournal } from "./types.js";
15
+ import { listRemoteFiles } from "./s3.js";
16
+ import {
17
+ appendPullRecord,
18
+ gcTombstones,
19
+ generatePullId,
20
+ lastPullRecord,
21
+ normalizeEtag,
22
+ } from "./journal.js";
23
+ import type {
24
+ EntityContext,
25
+ PullRecord,
26
+ SyncJournal,
27
+ } from "./types.js";
28
+ import type {
29
+ ExplicitGrant,
30
+ MembershipSyncConfig,
31
+ } from "./vault-client.js";
32
+ import { coalescePrefixes } from "./prefix-coalesce.js";
33
+ import {
34
+ applyScopeShrink,
35
+ buildScopeShrinkPlan,
36
+ ScopeShrinkBlockedError,
37
+ type ApplyScopeShrinkResult,
38
+ type ScopeShrinkPlan,
39
+ } from "./scope-shrink.js";
17
40
 
18
41
  /** Minimal shape every entry in `skip` has — `key` is the only field
19
42
  * guaranteed to be populated. Remote-listing skips carry the full RemoteFile;
@@ -99,3 +122,397 @@ export function decideRemotePulls({
99
122
 
100
123
  return { download, deleteLocal, skip };
101
124
  }
125
+
126
+ // ───────────────────────────────────────────────────────────────────────────
127
+ // US-005: ACL-aware narrowing — engine layer
128
+ // ───────────────────────────────────────────────────────────────────────────
129
+
130
+ /**
131
+ * Hard cap on coalesced prefixes per STS vend (US-001-D). The vault-service
132
+ * `validateVendRequest` rejects `paths.length > 10`, so the engine MUST
133
+ * either shard into multiple vends + ListObjectsV2 calls when the coalesced
134
+ * grant set exceeds this OR fall back to a broad list + post-filter.
135
+ */
136
+ export const VEND_PATH_CAP = 10;
137
+
138
+ /**
139
+ * Threshold above which the engine prefers a single broad ListObjectsV2 +
140
+ * client-side post-filter instead of fanning out N vends. Tuned for the
141
+ * US-001-B p99 finding (TBD live) — N coalesced prefixes <= 50 is cheaper as
142
+ * vend-fanout (~5 STS calls); > 50 is cheaper as one broad list.
143
+ */
144
+ export const POST_FILTER_THRESHOLD = 50;
145
+
146
+ /** Bounded parallelism for vend fan-out (5 concurrent STS+ListObjectsV2 calls). */
147
+ export const VEND_FANOUT_CONCURRENCY = 5;
148
+
149
+ /**
150
+ * Effective per-company sync scope, resolved from the membership's sync-config
151
+ * + (if `shared`) the caller's explicit grants. Returned by
152
+ * `resolveCompanyScope` and consumed by `pullCompany`.
153
+ *
154
+ * `strategy: "vend-fanout"` issues 1..N narrowed STS+ListObjectsV2 calls,
155
+ * union'd. `strategy: "broad-postfilter"` issues one wide list and filters
156
+ * client-side. `strategy: "all"` is the legacy syncMode='all' path.
157
+ */
158
+ export interface CompanyScope {
159
+ companyUid: string;
160
+ syncMode: MembershipSyncConfig["syncMode"];
161
+ /** Coalesced prefix set. For `all`, this is the single company prefix. */
162
+ prefixSet: string[];
163
+ /**
164
+ * Strategy chosen by `resolveCompanyScope` based on coalesced count vs
165
+ * `VEND_PATH_CAP` and `POST_FILTER_THRESHOLD`.
166
+ */
167
+ strategy: "all" | "vend-fanout" | "broad-postfilter";
168
+ }
169
+
170
+ export interface ResolveCompanyScopeInput {
171
+ companyUid: string;
172
+ companyPrefix: string; // e.g. "companies/indigo/"
173
+ syncConfig: MembershipSyncConfig;
174
+ /** Required when `syncConfig.syncMode === 'shared'`. */
175
+ explicitGrants?: ExplicitGrant[];
176
+ }
177
+
178
+ /**
179
+ * Resolve the effective sync scope for one per-company leg.
180
+ *
181
+ * Decision table:
182
+ * - `syncMode === 'all'` → strategy `all`, prefixSet [companyPrefix]
183
+ * - `syncMode === 'shared'` → coalesce explicit grants. If count
184
+ * ≤ VEND_PATH_CAP → `vend-fanout`.
185
+ * If ≤ POST_FILTER_THRESHOLD → still
186
+ * `vend-fanout` (sharded). Else
187
+ * `broad-postfilter`.
188
+ * - `syncMode === 'custom'` → coalesce customPaths, same decision
189
+ * table as `shared`.
190
+ *
191
+ * Pure function. No network, no journal mutation.
192
+ */
193
+ export function resolveCompanyScope(
194
+ input: ResolveCompanyScopeInput,
195
+ ): CompanyScope {
196
+ const { companyUid, companyPrefix, syncConfig, explicitGrants } = input;
197
+
198
+ if (syncConfig.syncMode === "all") {
199
+ return {
200
+ companyUid,
201
+ syncMode: "all",
202
+ prefixSet: [companyPrefix],
203
+ strategy: "all",
204
+ };
205
+ }
206
+
207
+ let raw: string[];
208
+ if (syncConfig.syncMode === "custom") {
209
+ raw = syncConfig.customPaths ?? [];
210
+ } else {
211
+ // 'shared'
212
+ raw = (explicitGrants ?? []).map((g) => g.path);
213
+ }
214
+ const prefixSet = coalescePrefixes(raw);
215
+
216
+ // Empty grant set in `shared` mode means the caller has no explicit grants
217
+ // for this company. Returning an empty prefixSet here lets `pullCompany`
218
+ // short-circuit — issuing zero ListObjectsV2 calls and downloading
219
+ // nothing, the correct "I have no shared access" outcome.
220
+ const strategy: CompanyScope["strategy"] =
221
+ prefixSet.length > POST_FILTER_THRESHOLD
222
+ ? "broad-postfilter"
223
+ : "vend-fanout";
224
+
225
+ return {
226
+ companyUid,
227
+ syncMode: syncConfig.syncMode,
228
+ prefixSet,
229
+ strategy,
230
+ };
231
+ }
232
+
233
+ /**
234
+ * Split a coalesced prefix set into batches of at most `VEND_PATH_CAP`
235
+ * prefixes each. Each batch maps to a single STS vend + ListObjectsV2 call.
236
+ */
237
+ export function batchPrefixesForVend(
238
+ prefixes: string[],
239
+ cap: number = VEND_PATH_CAP,
240
+ ): string[][] {
241
+ if (cap <= 0) throw new Error(`batchPrefixesForVend: cap must be > 0`);
242
+ const batches: string[][] = [];
243
+ for (let i = 0; i < prefixes.length; i += cap) {
244
+ batches.push(prefixes.slice(i, i + cap));
245
+ }
246
+ return batches;
247
+ }
248
+
249
+ /**
250
+ * Bounded-parallel mapper. Awaits up to `concurrency` promises at once.
251
+ * Used to fan out per-batch ListObjectsV2 calls without exhausting the
252
+ * AWS SDK or hitting STS throttles.
253
+ */
254
+ async function mapWithConcurrency<T, R>(
255
+ items: T[],
256
+ concurrency: number,
257
+ fn: (item: T, index: number) => Promise<R>,
258
+ ): Promise<R[]> {
259
+ const results: R[] = new Array(items.length);
260
+ let cursor = 0;
261
+ async function worker(): Promise<void> {
262
+ while (true) {
263
+ const i = cursor++;
264
+ if (i >= items.length) return;
265
+ results[i] = await fn(items[i]!, i);
266
+ }
267
+ }
268
+ const workers: Promise<void>[] = [];
269
+ for (let i = 0; i < Math.min(concurrency, items.length); i++) {
270
+ workers.push(worker());
271
+ }
272
+ await Promise.all(workers);
273
+ return results;
274
+ }
275
+
276
+ export interface ListRemoteForScopeInput {
277
+ ctx: EntityContext;
278
+ scope: CompanyScope;
279
+ /**
280
+ * Override for tests / alternative S3 surfaces. Defaults to the package's
281
+ * own `listRemoteFiles`. Signature matches `(ctx, prefix?) => RemoteFile[]`.
282
+ */
283
+ listFn?: (
284
+ ctx: EntityContext,
285
+ prefix?: string,
286
+ ) => Promise<RemoteFile[]>;
287
+ /**
288
+ * Override for tests to vend a per-batch narrowed EntityContext. Default:
289
+ * reuse `ctx` (which the orchestrator is expected to have already vended
290
+ * appropriately for the scope). The full per-batch STS vend wiring will
291
+ * land in US-006 along with the CLI.
292
+ */
293
+ vendForBatchFn?: (
294
+ ctx: EntityContext,
295
+ paths: string[],
296
+ ) => Promise<EntityContext>;
297
+ }
298
+
299
+ /**
300
+ * List remote objects in scope, applying the chosen strategy:
301
+ * - `all` — one broad ListObjectsV2 under the company prefix.
302
+ * - `vend-fanout` — one ListObjectsV2 per coalesced batch (≤ VEND_PATH_CAP),
303
+ * bounded parallel, results union'd. The caller is
304
+ * responsible for vending narrowed credentials when
305
+ * this path is taken (`vendForBatchFn`).
306
+ * - `broad-postfilter`— one broad ListObjectsV2 + client-side filter
307
+ * against `scope.prefixSet`.
308
+ *
309
+ * Dedup by key so multi-batch overlaps don't double-download.
310
+ */
311
+ export async function listRemoteForScope(
312
+ input: ListRemoteForScopeInput,
313
+ ): Promise<RemoteFile[]> {
314
+ const list = input.listFn ?? listRemoteFiles;
315
+ const { ctx, scope } = input;
316
+
317
+ if (scope.strategy === "all") {
318
+ return list(ctx, scope.prefixSet[0]);
319
+ }
320
+
321
+ if (scope.strategy === "broad-postfilter") {
322
+ const all = await list(ctx);
323
+ return all.filter((f) =>
324
+ scope.prefixSet.some((p) => f.key.startsWith(p)),
325
+ );
326
+ }
327
+
328
+ // vend-fanout
329
+ if (scope.prefixSet.length === 0) return [];
330
+ const batches = batchPrefixesForVend(scope.prefixSet);
331
+ const perBatch = await mapWithConcurrency(
332
+ batches,
333
+ VEND_FANOUT_CONCURRENCY,
334
+ async (paths) => {
335
+ const batchCtx = input.vendForBatchFn
336
+ ? await input.vendForBatchFn(ctx, paths)
337
+ : ctx;
338
+ // For a coalesced batch we issue one ListObjectsV2 per prefix in the
339
+ // batch. We can't issue one ListObjectsV2 across N prefixes (the API
340
+ // takes a single Prefix); the per-batch grouping exists for the STS
341
+ // session policy ceiling, not the list call itself.
342
+ const lists = await Promise.all(paths.map((p) => list(batchCtx, p)));
343
+ return lists.flat();
344
+ },
345
+ );
346
+ return dedupByKey(perBatch.flat());
347
+ }
348
+
349
+ function dedupByKey(files: RemoteFile[]): RemoteFile[] {
350
+ const seen = new Set<string>();
351
+ const out: RemoteFile[] = [];
352
+ for (const f of files) {
353
+ if (seen.has(f.key)) continue;
354
+ seen.add(f.key);
355
+ out.push(f);
356
+ }
357
+ return out;
358
+ }
359
+
360
+ // ── Per-company orchestration: scope-shrink + listing + PullRecord ──────────
361
+
362
+ export interface PullCompanyInput {
363
+ ctx: EntityContext;
364
+ journal: SyncJournal;
365
+ hqRoot: string;
366
+ scope: CompanyScope;
367
+ /** Set of conflict-store keys to forward to `decideRemotePulls`. */
368
+ conflictKeys?: Set<string>;
369
+ /** Honor the operator override on dirty orphans (US-005 contract). */
370
+ forceScopeShrink?: boolean;
371
+ /** Listing override hook — see `ListRemoteForScopeInput.listFn`. */
372
+ listFn?: ListRemoteForScopeInput["listFn"];
373
+ vendForBatchFn?: ListRemoteForScopeInput["vendForBatchFn"];
374
+ /** Time injector for tests; defaults to real wall clock. */
375
+ now?: () => Date;
376
+ }
377
+
378
+ export interface PullCompanyResult {
379
+ /** Effective scope used. */
380
+ scope: CompanyScope;
381
+ /** Remote files listed under the scope (post-dedup, post-filter). */
382
+ remoteFiles: RemoteFile[];
383
+ /** Pure download/delete/skip decision from `decideRemotePulls`. */
384
+ decision: RemotePullDecision;
385
+ /** Scope-shrink plan computed before listing. */
386
+ scopeShrinkPlan: ScopeShrinkPlan;
387
+ /** Applied scope-shrink action (counts). `null` when no shrink was needed. */
388
+ scopeShrinkApplied: ApplyScopeShrinkResult | null;
389
+ /** Pull record appended to `journal.pulls`. */
390
+ pullRecord: PullRecord;
391
+ /** Tombstones GC'd at the start of this leg. */
392
+ tombstonesGcd: number;
393
+ }
394
+
395
+ /**
396
+ * Per-company sync leg — the engine half of `pullAll` for ONE company.
397
+ *
398
+ * Flow:
399
+ * 1. GC expired tombstones (cheap; bounds journal growth).
400
+ * 2. Resolve last-pull scope (or `["companyPrefix"]` if no record exists).
401
+ * 3. Build scope-shrink plan + abort on dirty orphans (unless force).
402
+ * 4. Apply scope-shrink (delete clean orphans, tombstone entries).
403
+ * 5. List remote under current scope (vend-fanout / broad-postfilter / all).
404
+ * 6. Compute download/delete/skip via `decideRemotePulls`.
405
+ * 7. Append a `PullRecord` capturing the actual `syncMode` + `prefixSet`.
406
+ *
407
+ * Step 6 returns the decision plan — the actual S3 GETs and FS writes
408
+ * remain in the CLI layer (`hq-cli/src/commands/cloud.ts`'s `pullAll`),
409
+ * which threads conflict detection + remoteEtag stamping on completion.
410
+ * US-006 wires this orchestrator into the CLI.
411
+ */
412
+ export async function pullCompany(
413
+ input: PullCompanyInput,
414
+ ): Promise<PullCompanyResult> {
415
+ const now = input.now ?? (() => new Date());
416
+ const startedAt = now().toISOString();
417
+ const conflictKeys = input.conflictKeys ?? new Set<string>();
418
+
419
+ const tombstonesGcd = gcTombstones(input.journal, now().getTime());
420
+
421
+ const last = lastPullRecord(input.journal, input.scope.companyUid);
422
+ const lastPrefixSet =
423
+ last?.prefixSet && last.prefixSet.length > 0
424
+ ? last.prefixSet
425
+ : // No record OR a v1-migrated record with empty prefixSet — treat
426
+ // the last scope as "everything under the company prefix". For the
427
+ // `all` -> `shared` flip this correctly flags shared-mode orphans.
428
+ [companyPrefixOf(input.scope, last)];
429
+
430
+ const scopeShrinkPlan = buildScopeShrinkPlan({
431
+ journal: input.journal,
432
+ hqRoot: input.hqRoot,
433
+ lastPrefixSet,
434
+ currentPrefixSet: input.scope.prefixSet,
435
+ });
436
+
437
+ let scopeShrinkApplied: ApplyScopeShrinkResult | null = null;
438
+ if (scopeShrinkPlan.scopeChangeDetected) {
439
+ if (
440
+ scopeShrinkPlan.dirty.length > 0 &&
441
+ !input.forceScopeShrink
442
+ ) {
443
+ throw new ScopeShrinkBlockedError(
444
+ input.scope.companyUid,
445
+ last?.syncMode ?? "unknown",
446
+ input.scope.syncMode,
447
+ scopeShrinkPlan.dirty,
448
+ scopeShrinkPlan.clean,
449
+ );
450
+ }
451
+ scopeShrinkApplied = applyScopeShrink({
452
+ journal: input.journal,
453
+ plan: scopeShrinkPlan,
454
+ hqRoot: input.hqRoot,
455
+ forceScopeShrink: input.forceScopeShrink ?? false,
456
+ });
457
+ }
458
+
459
+ const remoteFiles = await listRemoteForScope({
460
+ ctx: input.ctx,
461
+ scope: input.scope,
462
+ listFn: input.listFn,
463
+ vendForBatchFn: input.vendForBatchFn,
464
+ });
465
+
466
+ const decision = decideRemotePulls({
467
+ remoteFiles,
468
+ journal: input.journal,
469
+ conflictKeys,
470
+ });
471
+
472
+ const completedAt = now().toISOString();
473
+ const pullRecord: PullRecord = {
474
+ pullId: generatePullId(now().getTime()),
475
+ companyUid: input.scope.companyUid,
476
+ startedAt,
477
+ completedAt,
478
+ syncMode: input.scope.syncMode,
479
+ prefixSet: [...input.scope.prefixSet],
480
+ scopeChangeDetected: scopeShrinkPlan.scopeChangeDetected,
481
+ orphansRemoved: scopeShrinkApplied?.cleanRemoved ?? 0,
482
+ orphansBlocked:
483
+ scopeShrinkApplied && input.forceScopeShrink
484
+ ? scopeShrinkApplied.dirtyTombstoned
485
+ : scopeShrinkPlan.dirty.length,
486
+ };
487
+ appendPullRecord(input.journal, pullRecord);
488
+
489
+ return {
490
+ scope: input.scope,
491
+ remoteFiles,
492
+ decision,
493
+ scopeShrinkPlan,
494
+ scopeShrinkApplied,
495
+ pullRecord,
496
+ tombstonesGcd,
497
+ };
498
+ }
499
+
500
+ /**
501
+ * Recover the "company prefix" for a v1-migrated record with no recorded
502
+ * `prefixSet`. We derive it from the current scope's first prefix's parent
503
+ * (best-effort) — the only consumer of this fallback is the v1 → v2
504
+ * migration window. After one pull lands a v2 record, this branch never
505
+ * runs again for that company.
506
+ */
507
+ function companyPrefixOf(
508
+ scope: CompanyScope,
509
+ _last: PullRecord | undefined,
510
+ ): string {
511
+ // For `all` mode, scope.prefixSet[0] IS the company prefix.
512
+ if (scope.strategy === "all" && scope.prefixSet[0]) return scope.prefixSet[0];
513
+ // Otherwise, derive `companies/{slug}/` from the first prefix. ACL grant
514
+ // paths always start with `companies/{slug}/...`.
515
+ const first = scope.prefixSet[0] ?? "";
516
+ const m = first.match(/^(companies\/[^/]+\/)/);
517
+ return m ? m[1]! : first;
518
+ }