@indigoai-us/hq-cloud 5.8.3 → 5.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -794,7 +794,10 @@ describe("per-company fanout", () => {
794
794
  });
795
795
 
796
796
  const code = await runRunner(["--companies"], deps);
797
- expect(code).toBe(0); // whole fanout still returns 0
797
+ // Exit 2 on partial fanout (one company errored). The fanout completed
798
+ // — beta still synced — but the rollup carries `partial: true` and a
799
+ // non-zero exit signals automated monitors that the run wasn't clean.
800
+ expect(code).toBe(2);
798
801
 
799
802
  // Error event for acme (company-level) with path sentinel "(company)"
800
803
  // — error-class events route to stderr.
@@ -820,6 +823,95 @@ describe("per-company fanout", () => {
820
823
  expect(betaComplete).toBeDefined();
821
824
  expect(betaComplete?.filesDownloaded).toBe(1);
822
825
  });
826
+
827
+ /**
828
+ * Regression test for the rollup-bug from the personal-sync 401 incident.
829
+ *
830
+ * Setup: company "personal" has 3 files queued for download. The first two
831
+ * arrive (emitting `progress` events) and then the sync function throws on
832
+ * the third (mid-stream 401). Before the fix, the runner's `all-complete`
833
+ * reported `filesDownloaded: 0` for the whole run because the throwing
834
+ * company never emitted a clean `complete` and the rollup only summed
835
+ * companies that did. The fix: walk every company, sum partial counts
836
+ * captured from progress events, flip `partial: true`, exit non-zero.
837
+ */
838
+ it("aborted-mid-stream company contributes its partial counts to all-complete", async () => {
839
+ const deps = makeDeps({
840
+ createVaultClient: () =>
841
+ makeVaultStub({
842
+ memberships: [{ companyUid: "cmp_personal" }],
843
+ entityGet: (uid: string) =>
844
+ Promise.resolve({
845
+ uid,
846
+ slug: "personal",
847
+ } as unknown as EntityInfo),
848
+ }),
849
+ sync: vi
850
+ .fn<(opts: SyncOptions) => Promise<SyncResult>>()
851
+ .mockImplementationOnce(async (opts: SyncOptions) => {
852
+ // Two files land before the throw — these counts must show up in
853
+ // the rollup even though the sync function never returns cleanly.
854
+ opts.onEvent?.({ type: "progress", path: "notes/a.md", bytes: 100 });
855
+ opts.onEvent?.({ type: "progress", path: "notes/b.md", bytes: 250 });
856
+ throw new Error("401 Unauthorized — token expired");
857
+ }),
858
+ });
859
+
860
+ const code = await runRunner(["--companies"], deps);
861
+ expect(code).toBe(2);
862
+
863
+ // 1) The aborted company emits a `complete` event with aborted=true
864
+ // and the partial counts that the progress stream captured. Without
865
+ // this, consumers walking the `complete` event stream would never
866
+ // see the 350 bytes that hit disk.
867
+ const completeEvents = deps.stdout
868
+ .events()
869
+ .filter(
870
+ (e): e is Extract<RunnerEvent, { type: "complete" }> =>
871
+ e.type === "complete",
872
+ );
873
+ expect(completeEvents).toHaveLength(1);
874
+ expect(completeEvents[0]).toMatchObject({
875
+ company: "personal",
876
+ filesDownloaded: 2,
877
+ bytesDownloaded: 350,
878
+ filesUploaded: 0,
879
+ bytesUploaded: 0,
880
+ aborted: true,
881
+ });
882
+
883
+ // 2) The all-complete rollup includes the partial counts and is flagged
884
+ // `partial: true` with a per-company breakdown. Before the fix this
885
+ // was `filesDownloaded: 0` and there was no `partial` field at all.
886
+ const all = deps.stdout
887
+ .events()
888
+ .find(
889
+ (e): e is Extract<RunnerEvent, { type: "all-complete" }> =>
890
+ e.type === "all-complete",
891
+ );
892
+ expect(all).toBeDefined();
893
+ expect(all).toMatchObject({
894
+ companiesAttempted: 1,
895
+ filesDownloaded: 2,
896
+ bytesDownloaded: 350,
897
+ filesUploaded: 0,
898
+ bytesUploaded: 0,
899
+ partial: true,
900
+ errors: [
901
+ { company: "personal", message: "401 Unauthorized — token expired" },
902
+ ],
903
+ });
904
+ expect(all?.companies).toEqual([
905
+ {
906
+ company: "personal",
907
+ status: "errored",
908
+ filesDownloaded: 2,
909
+ bytesDownloaded: 350,
910
+ filesUploaded: 0,
911
+ bytesUploaded: 0,
912
+ },
913
+ ]);
914
+ });
823
915
  });
824
916
 
825
917
  // ---------------------------------------------------------------------------
@@ -860,6 +952,25 @@ describe("all-complete aggregate", () => {
860
952
  bytesUploaded: 0,
861
953
  conflictPaths: [],
862
954
  errors: [],
955
+ partial: false,
956
+ companies: [
957
+ {
958
+ company: "acme",
959
+ status: "complete",
960
+ filesDownloaded: 3,
961
+ bytesDownloaded: 100,
962
+ filesUploaded: 0,
963
+ bytesUploaded: 0,
964
+ },
965
+ {
966
+ company: "beta",
967
+ status: "complete",
968
+ filesDownloaded: 4,
969
+ bytesDownloaded: 250,
970
+ filesUploaded: 0,
971
+ bytesUploaded: 0,
972
+ },
973
+ ],
863
974
  });
864
975
  });
865
976
 
@@ -879,7 +990,8 @@ describe("all-complete aggregate", () => {
879
990
  });
880
991
 
881
992
  const code = await runRunner(["--companies"], deps);
882
- expect(code).toBe(0);
993
+ // Exit 2 — partial fanout (acme errored, beta clean).
994
+ expect(code).toBe(2);
883
995
  const all = deps.stdout
884
996
  .events()
885
997
  .find((e) => e.type === "all-complete") as Extract<RunnerEvent, { type: "all-complete" }>;
@@ -887,6 +999,7 @@ describe("all-complete aggregate", () => {
887
999
  expect(all.errors).toEqual([
888
1000
  { company: "acme", message: "acme failed" },
889
1001
  ]);
1002
+ expect(all.partial).toBe(true);
890
1003
  });
891
1004
  });
892
1005
 
@@ -1047,6 +1160,12 @@ describe("--direction", () => {
1047
1160
  ["--companies", "--direction", "both"],
1048
1161
  deps,
1049
1162
  );
1163
+ // A clean conflict-abort (push returned aborted: true, no exception)
1164
+ // exits 0 — nothing threw, the user's --on-conflict abort policy just
1165
+ // decided to skip pull. The all-complete event carries `partial: true`
1166
+ // for monitors that want to see "didn't complete cleanly", but the exit
1167
+ // code stays 0 so the Tauri menubar's Sentry alert doesn't fire on what
1168
+ // is normal user-policy behavior.
1050
1169
  expect(code).toBe(0);
1051
1170
  expect(shareSpy).toHaveBeenCalledTimes(1);
1052
1171
  expect(syncSpy).not.toHaveBeenCalled();
@@ -1055,6 +1174,15 @@ describe("--direction", () => {
1055
1174
  .events()
1056
1175
  .find((e) => e.type === "complete") as Extract<RunnerEvent, { type: "complete" }>;
1057
1176
  expect(complete.aborted).toBe(true);
1177
+
1178
+ const all = deps.stdout
1179
+ .events()
1180
+ .find((e) => e.type === "all-complete") as Extract<RunnerEvent, { type: "all-complete" }>;
1181
+ expect(all.partial).toBe(true);
1182
+ expect(all.companies[0]).toMatchObject({
1183
+ company: "acme",
1184
+ status: "aborted",
1185
+ });
1058
1186
  });
1059
1187
 
1060
1188
  it("direction=push: passes skipUnchanged and company root path to share()", async () => {
@@ -1131,6 +1259,25 @@ describe("--direction", () => {
1131
1259
  bytesUploaded: 125,
1132
1260
  conflictPaths: [],
1133
1261
  errors: [],
1262
+ partial: false,
1263
+ companies: [
1264
+ {
1265
+ company: "acme",
1266
+ status: "complete",
1267
+ filesDownloaded: 3,
1268
+ bytesDownloaded: 100,
1269
+ filesUploaded: 1,
1270
+ bytesUploaded: 50,
1271
+ },
1272
+ {
1273
+ company: "beta",
1274
+ status: "complete",
1275
+ filesDownloaded: 4,
1276
+ bytesDownloaded: 250,
1277
+ filesUploaded: 2,
1278
+ bytesUploaded: 75,
1279
+ },
1280
+ ],
1134
1281
  });
1135
1282
  });
1136
1283
 
@@ -37,8 +37,19 @@
37
37
  * of requiring per-event capture calls in the menubar.
38
38
  *
39
39
  * Exit code:
40
- * 0 — event stream describes the outcome (including setup-needed)
41
- * 1 argv parse error or unrecoverable pre-sync failure
40
+ * 0 — event stream describes the outcome. The runner finished its protocol
41
+ * without any company throwing. Includes setup-needed, auth-error, and
42
+ * runs where every company completed OR cleanly returned `aborted: true`
43
+ * (a `--on-conflict abort` policy decision is not an error).
44
+ * 1 — argv parse error or unrecoverable pre-sync failure.
45
+ * 2 — at least one company threw mid-stream (e.g. mid-fanout 401, network
46
+ * reset, S3 5xx after retries). The all-complete event carries
47
+ * `partial: true` and per-company partial counts captured from
48
+ * `progress` events before the throw, so consumers parsing ndjson see
49
+ * what actually transferred. This is distinct from exit 0 with
50
+ * `partial: true` (clean conflict-aborts) — exit 2 is "something
51
+ * unexpected happened", which the Tauri menubar converts to a Sentry
52
+ * alert. Conflict-aborts intentionally do NOT alert.
42
53
  */
43
54
 
44
55
  import * as os from "os";
@@ -168,6 +179,36 @@ export type RunnerEvent =
168
179
  */
169
180
  conflictPaths: Array<{ company: string; path: string; direction: "pull" | "push" }>;
170
181
  errors: Array<{ company: string; message: string }>;
182
+ /**
183
+ * True when at least one company in the fanout did not complete cleanly
184
+ * — either it returned `aborted: true` (e.g. conflict-abort) or its sync
185
+ * function threw mid-stream (e.g. mid-fanout 401). When `partial: true`,
186
+ * the totals above include partial counts captured from `progress` events
187
+ * before the abort, NOT just companies that emitted a clean `complete`.
188
+ *
189
+ * Automated monitors should check this field — `errors.length > 0` alone
190
+ * isn't sufficient because a `aborted: true` return doesn't push to
191
+ * `errors` (it's a clean conflict-abort, not an exception).
192
+ */
193
+ partial: boolean;
194
+ /**
195
+ * Per-company breakdown of the fanout. Always present, one entry per
196
+ * planned company, in fanout order. Lets consumers reconcile per-company
197
+ * partial counts with the aggregate without re-walking `complete` /
198
+ * `error` event streams. The `status` field is the canonical signal:
199
+ * - "complete" — sync returned cleanly, `aborted: false`
200
+ * - "aborted" — sync returned cleanly with `aborted: true` (conflict-abort)
201
+ * - "errored" — sync threw mid-stream; counts are sourced from progress
202
+ * events seen before the throw
203
+ */
204
+ companies: Array<{
205
+ company: string;
206
+ status: "complete" | "aborted" | "errored";
207
+ filesDownloaded: number;
208
+ bytesDownloaded: number;
209
+ filesUploaded: number;
210
+ bytesUploaded: number;
211
+ }>;
171
212
  };
172
213
 
173
214
  /**
@@ -533,17 +574,56 @@ export async function runRunner(
533
574
  const shareFn = deps.share ?? defaultShare;
534
575
  const doPush = parsed.direction === "push" || parsed.direction === "both";
535
576
  const doPull = parsed.direction === "pull" || parsed.direction === "both";
536
- let totalDownloaded = 0;
537
- let totalDownloadedBytes = 0;
538
- let totalUploaded = 0;
539
- let totalUploadedBytes = 0;
540
577
  const errors: Array<{ company: string; message: string }> = [];
541
578
  const allConflicts: Array<{ company: string; path: string; direction: "pull" | "push" }> = [];
542
579
 
580
+ // Per-company state, keyed by the company label (slug or UID-fallback) so
581
+ // both `progress` (which streams) and `complete`/throw (which lands once)
582
+ // can update the same row. The rollup at the bottom of the function walks
583
+ // every entry — this is the source of truth that closes the bug where an
584
+ // aborted company's partial counts were dropped from `all-complete`.
585
+ //
586
+ // We seed `direction` from the parsed flag so we know whether a `progress`
587
+ // event without a clear phase should bump downloaded or uploaded counters.
588
+ // For `direction: "both"` runs we lean on the path of the in-flight phase
589
+ // — push runs first and sets `phaseRef.current = "push"` while shareFn runs,
590
+ // pull sets it to "pull". The closure shared by tagAndEmit reads `.current`
591
+ // at event time, so progress events route to the right column.
592
+ type CompanyStatus = "complete" | "aborted" | "errored";
593
+ interface CompanyState {
594
+ company: string;
595
+ status: CompanyStatus;
596
+ filesDownloaded: number;
597
+ bytesDownloaded: number;
598
+ filesUploaded: number;
599
+ bytesUploaded: number;
600
+ }
601
+ const stateByCompany = new Map<string, CompanyState>();
602
+
543
603
  for (const target of plan) {
544
604
  const companyLabel = target.slug;
605
+ const state: CompanyState = {
606
+ company: companyLabel,
607
+ // Default to "errored" so a throw before any complete-or-clean-abort
608
+ // path (the original bug) leaves the entry flagged as not-clean. The
609
+ // success/clean-abort paths overwrite this before the loop body exits.
610
+ status: "errored",
611
+ filesDownloaded: 0,
612
+ bytesDownloaded: 0,
613
+ filesUploaded: 0,
614
+ bytesUploaded: 0,
615
+ };
616
+ stateByCompany.set(companyLabel, state);
617
+
618
+ // Which phase is currently emitting `progress` events. Mutable closure so
619
+ // tagAndEmit (defined once below) reads the latest value when each event
620
+ // fires. "pull" is the default for back-compat with pull-only runs.
621
+ let activePhase: "pull" | "push" = doPush && !doPull ? "push" : "pull";
622
+
545
623
  // Per-company event tagger — shared by push and pull phases so progress
546
624
  // rows land on the right company regardless of which phase emitted them.
625
+ // Also updates `state` for `progress` events so the rollup has accurate
626
+ // partial counts even if the sync function throws before returning.
547
627
  const tagAndEmit = (event: SyncProgressEvent): void => {
548
628
  if (event.type === "plan") {
549
629
  emit({
@@ -557,6 +637,13 @@ export async function runRunner(
557
637
  filesToConflict: event.filesToConflict,
558
638
  });
559
639
  } else if (event.type === "progress") {
640
+ if (activePhase === "push") {
641
+ state.filesUploaded += 1;
642
+ state.bytesUploaded += event.bytes;
643
+ } else {
644
+ state.filesDownloaded += 1;
645
+ state.bytesDownloaded += event.bytes;
646
+ }
560
647
  emit({
561
648
  type: "progress",
562
649
  company: companyLabel,
@@ -604,6 +691,7 @@ export async function runRunner(
604
691
  // point with `skipUnchanged` so we don't re-upload files that haven't
605
692
  // changed since the last sync.
606
693
  if (doPush) {
694
+ activePhase = "push";
607
695
  pushResult = await shareFn({
608
696
  paths: [path.join(parsed.hqRoot, "companies", target.slug)],
609
697
  company: target.uid,
@@ -619,6 +707,7 @@ export async function runRunner(
619
707
  // the user has local edits + remote drift; blindly pulling would erase
620
708
  // whichever side `--on-conflict abort` just protected.
621
709
  if (doPull && !pushResult.aborted) {
710
+ activePhase = "pull";
622
711
  pullResult = await syncFn({
623
712
  company: target.uid,
624
713
  vaultConfig,
@@ -637,6 +726,20 @@ export async function runRunner(
637
726
  ...pullResult.conflictPaths,
638
727
  ...pushResult.conflictPaths,
639
728
  ];
729
+ const aborted = pullResult.aborted || pushResult.aborted;
730
+
731
+ // Overwrite the progress-derived counts with the authoritative numbers
732
+ // from the sync/share return values. The `progress` stream over-counts
733
+ // when the inner walker emits a progress row for a file it then skips
734
+ // due to a journal hit — a clean return value is the source of truth.
735
+ // For the throw case below this overwrite never runs, so `state` keeps
736
+ // its progress-derived counts (which is exactly what we want there).
737
+ state.filesDownloaded = pullResult.filesDownloaded;
738
+ state.bytesDownloaded = pullResult.bytesDownloaded;
739
+ state.filesUploaded = pushResult.filesUploaded;
740
+ state.bytesUploaded = pushResult.bytesUploaded;
741
+ state.status = aborted ? "aborted" : "complete";
742
+
640
743
  emit({
641
744
  type: "complete",
642
745
  company: companyLabel,
@@ -654,7 +757,7 @@ export async function runRunner(
654
757
  conflictPaths: mergedConflictPaths,
655
758
  // Either phase aborting marks the company aborted — the UI treats
656
759
  // `aborted: true` as "sync didn't complete cleanly for this company".
657
- aborted: pullResult.aborted || pushResult.aborted,
760
+ aborted,
658
761
  });
659
762
  for (const p of pullResult.conflictPaths) {
660
763
  allConflicts.push({ company: companyLabel, path: p, direction: "pull" });
@@ -662,13 +765,28 @@ export async function runRunner(
662
765
  for (const p of pushResult.conflictPaths) {
663
766
  allConflicts.push({ company: companyLabel, path: p, direction: "push" });
664
767
  }
665
- totalDownloaded += pullResult.filesDownloaded;
666
- totalDownloadedBytes += pullResult.bytesDownloaded;
667
- totalUploaded += pushResult.filesUploaded;
668
- totalUploadedBytes += pushResult.bytesUploaded;
669
768
  } catch (err) {
670
769
  const message = err instanceof Error ? err.message : String(err);
671
770
  errors.push({ company: companyLabel, message });
771
+ // `state.status` was seeded as "errored" at loop entry — the throw
772
+ // path leaves it there, and `state.files{Down,Up}loaded` reflects the
773
+ // partial counts captured from `progress` events before the throw.
774
+ // Emit a `complete` event with `aborted: true` and those partial
775
+ // counts so consumers walking the `complete` event stream see every
776
+ // company in the fanout uniformly. This is the fix for the misleading
777
+ // rollup — see file header `Exit code: 2` doc.
778
+ emit({
779
+ type: "complete",
780
+ company: companyLabel,
781
+ filesDownloaded: state.filesDownloaded,
782
+ bytesDownloaded: state.bytesDownloaded,
783
+ filesUploaded: state.filesUploaded,
784
+ bytesUploaded: state.bytesUploaded,
785
+ filesSkipped: 0,
786
+ conflicts: 0,
787
+ conflictPaths: [],
788
+ aborted: true,
789
+ });
672
790
  emit({
673
791
  type: "error",
674
792
  company: companyLabel,
@@ -679,6 +797,43 @@ export async function runRunner(
679
797
  }
680
798
  }
681
799
 
800
+ // Walk every per-company entry — the map holds one row per planned company,
801
+ // including ones that aborted via thrown exception. This is the fix for the
802
+ // bug where `all-complete` reported `filesDownloaded: 0` for an aborted
803
+ // personal-sync that had already emitted thousands of `progress` events:
804
+ // the rollup used to only sum companies that emitted a clean `complete`,
805
+ // which silently dropped partials when the sync function threw.
806
+ let totalDownloaded = 0;
807
+ let totalDownloadedBytes = 0;
808
+ let totalUploaded = 0;
809
+ let totalUploadedBytes = 0;
810
+ let partial = false;
811
+ const companies: Array<{
812
+ company: string;
813
+ status: CompanyStatus;
814
+ filesDownloaded: number;
815
+ bytesDownloaded: number;
816
+ filesUploaded: number;
817
+ bytesUploaded: number;
818
+ }> = [];
819
+ for (const target of plan) {
820
+ const s = stateByCompany.get(target.slug);
821
+ if (!s) continue; // unreachable — every plan entry seeds the map
822
+ totalDownloaded += s.filesDownloaded;
823
+ totalDownloadedBytes += s.bytesDownloaded;
824
+ totalUploaded += s.filesUploaded;
825
+ totalUploadedBytes += s.bytesUploaded;
826
+ if (s.status !== "complete") partial = true;
827
+ companies.push({
828
+ company: s.company,
829
+ status: s.status,
830
+ filesDownloaded: s.filesDownloaded,
831
+ bytesDownloaded: s.bytesDownloaded,
832
+ filesUploaded: s.filesUploaded,
833
+ bytesUploaded: s.bytesUploaded,
834
+ });
835
+ }
836
+
682
837
  emit({
683
838
  type: "all-complete",
684
839
  companiesAttempted: plan.length,
@@ -688,8 +843,15 @@ export async function runRunner(
688
843
  bytesUploaded: totalUploadedBytes,
689
844
  conflictPaths: allConflicts,
690
845
  errors,
846
+ partial,
847
+ companies,
691
848
  });
692
- return 0;
849
+ // Exit 2 only when something actually threw (`errors.length > 0`). A clean
850
+ // conflict-abort sets `partial: true` in the JSON but exits 0 — the Tauri
851
+ // menubar's non-zero-exit Sentry capture would otherwise fire for normal
852
+ // user-policy outcomes. Consumers that want to flag any non-clean outcome
853
+ // (clean-abort + thrown-error) read `partial` from the JSON.
854
+ return errors.length > 0 ? 2 : 0;
693
855
  }
694
856
 
695
857
  // ---------------------------------------------------------------------------
@@ -99,6 +99,105 @@ describe("isExpiring — expiresAt shape tolerance", () => {
99
99
  });
100
100
  });
101
101
 
102
+ // ---------------------------------------------------------------------------
103
+ // Stale-pool detection — decodeAccessTokenClientId + getValidAccessToken
104
+ // self-evicts cached tokens minted by a different App Client (e.g. dev pool
105
+ // tokens left over from before the 2026-04-25 cutover).
106
+ // ---------------------------------------------------------------------------
107
+
108
+ /** Build a minimal unsigned JWT carrying the given claims. Cognito's real */
109
+ /** tokens are RS256-signed; we don't verify here so the signature can be */
110
+ /** anything — only the base64url-encoded payload matters. */
111
+ function makeAccessToken(claims: Record<string, unknown>): string {
112
+ const header = Buffer.from(JSON.stringify({ alg: "RS256", typ: "JWT" }))
113
+ .toString("base64")
114
+ .replace(/=+$/, "");
115
+ const payload = Buffer.from(JSON.stringify(claims))
116
+ .toString("base64")
117
+ .replace(/=+$/, "");
118
+ return `${header}.${payload}.signature`;
119
+ }
120
+
121
+ const DEV_CLIENT = "4mmujmjq3srakdueg656b9m0mp";
122
+ const PROD_CLIENT = "7acei2c8v870enheptb1j5foln";
123
+
124
+ const baseConfig = {
125
+ region: "us-east-1",
126
+ userPoolDomain: "vault-indigo-hq-prod",
127
+ clientId: PROD_CLIENT,
128
+ };
129
+
130
+ describe("decodeAccessTokenClientId", () => {
131
+ it("returns the client_id claim from a well-formed JWT", async () => {
132
+ const { decodeAccessTokenClientId } = await importModule();
133
+ const token = makeAccessToken({ client_id: DEV_CLIENT, sub: "abc" });
134
+ expect(decodeAccessTokenClientId(token)).toBe(DEV_CLIENT);
135
+ });
136
+
137
+ it("returns null when client_id is absent", async () => {
138
+ const { decodeAccessTokenClientId } = await importModule();
139
+ const token = makeAccessToken({ sub: "abc" });
140
+ expect(decodeAccessTokenClientId(token)).toBeNull();
141
+ });
142
+
143
+ it("returns null when the token has fewer than two segments", async () => {
144
+ const { decodeAccessTokenClientId } = await importModule();
145
+ expect(decodeAccessTokenClientId("not-a-jwt")).toBeNull();
146
+ });
147
+
148
+ it("returns null when the payload isn't valid JSON", async () => {
149
+ const { decodeAccessTokenClientId } = await importModule();
150
+ expect(decodeAccessTokenClientId("aaa.bbb.ccc")).toBeNull();
151
+ });
152
+ });
153
+
154
+ describe("getValidAccessToken stale-pool detection", () => {
155
+ it("evicts a cached token whose client_id mismatches the current config", async () => {
156
+ const { saveCachedTokens, loadCachedTokens, getValidAccessToken } =
157
+ await importModule();
158
+ const devToken = makeAccessToken({ client_id: DEV_CLIENT, sub: "abc" });
159
+ saveCachedTokens({
160
+ ...baseTokens,
161
+ accessToken: devToken,
162
+ expiresAt: Date.now() + 60 * 60 * 1000,
163
+ });
164
+ expect(loadCachedTokens()).not.toBeNull();
165
+
166
+ await expect(
167
+ getValidAccessToken(baseConfig, { interactive: false }),
168
+ ).rejects.toThrow(/No valid HQ session/);
169
+
170
+ expect(loadCachedTokens()).toBeNull();
171
+ });
172
+
173
+ it("keeps a cached token whose client_id matches", async () => {
174
+ const { saveCachedTokens, getValidAccessToken } = await importModule();
175
+ const prodToken = makeAccessToken({ client_id: PROD_CLIENT, sub: "abc" });
176
+ saveCachedTokens({
177
+ ...baseTokens,
178
+ accessToken: prodToken,
179
+ expiresAt: Date.now() + 60 * 60 * 1000,
180
+ });
181
+ const access = await getValidAccessToken(baseConfig, {
182
+ interactive: false,
183
+ });
184
+ expect(access).toBe(prodToken);
185
+ });
186
+
187
+ it("keeps a cached token when client_id can't be decoded (back-compat)", async () => {
188
+ const { saveCachedTokens, getValidAccessToken } = await importModule();
189
+ saveCachedTokens({
190
+ ...baseTokens,
191
+ accessToken: "opaque-non-jwt",
192
+ expiresAt: Date.now() + 60 * 60 * 1000,
193
+ });
194
+ const access = await getValidAccessToken(baseConfig, {
195
+ interactive: false,
196
+ });
197
+ expect(access).toBe("opaque-non-jwt");
198
+ });
199
+ });
200
+
102
201
  // ---------------------------------------------------------------------------
103
202
  // Round-trip: writers emit epoch-ms, readers read epoch-ms
104
203
  // ---------------------------------------------------------------------------
@@ -120,6 +120,33 @@ export function isExpiring(tokens: CognitoTokens, bufferSeconds = 60): boolean {
120
120
  return expiresAt - Date.now() < bufferSeconds * 1000;
121
121
  }
122
122
 
123
+ /**
124
+ * Decode the `client_id` claim from a Cognito access token (no signature
125
+ * verification — we only need to identify which App Client minted it).
126
+ * Returns null when the token can't be parsed.
127
+ *
128
+ * Used by `getValidAccessToken` to detect stale cached sessions that target
129
+ * a different Cognito App Client. The canonical case is a pre-2026-04-25
130
+ * cache file holding a `hq-vault-dev` token after the user upgraded to a
131
+ * post-cutover CLI: the access token stays "non-expiring" for an hour but
132
+ * the prod vault API rejects it with 401, and the dev refresh token can't
133
+ * be exchanged at the prod token endpoint. Detecting the mismatch and
134
+ * forcing a re-login is the only safe self-heal.
135
+ */
136
+ export function decodeAccessTokenClientId(accessToken: string): string | null {
137
+ try {
138
+ const parts = accessToken.split(".");
139
+ if (parts.length < 2) return null;
140
+ const payloadB64 = parts[1];
141
+ const padded = payloadB64 + "=".repeat((4 - (payloadB64.length % 4)) % 4);
142
+ const json = Buffer.from(padded, "base64").toString("utf-8");
143
+ const claims = JSON.parse(json) as { client_id?: unknown };
144
+ return typeof claims.client_id === "string" ? claims.client_id : null;
145
+ } catch {
146
+ return null;
147
+ }
148
+ }
149
+
123
150
  // ---------------------------------------------------------------------------
124
151
  // PKCE
125
152
  // ---------------------------------------------------------------------------
@@ -375,7 +402,22 @@ export async function getValidAccessToken(
375
402
  options: { interactive?: boolean } = {},
376
403
  ): Promise<string> {
377
404
  const interactive = options.interactive ?? true;
378
- const cached = loadCachedTokens();
405
+ let cached = loadCachedTokens();
406
+
407
+ // Stale-pool detection: if the cached access token was issued by a
408
+ // different Cognito App Client than the one we're talking to now, drop the
409
+ // cache and re-authenticate. Without this, a user holding a pre-cutover
410
+ // dev-pool token would either keep using a token the prod API rejects
411
+ // with 401, or attempt a refresh against the prod token endpoint with a
412
+ // dev refresh token (InvalidGrant). See `decodeAccessTokenClientId` for
413
+ // the full rationale.
414
+ if (cached) {
415
+ const cachedClientId = decodeAccessTokenClientId(cached.accessToken);
416
+ if (cachedClientId !== null && cachedClientId !== config.clientId) {
417
+ clearCachedTokens();
418
+ cached = null;
419
+ }
420
+ }
379
421
 
380
422
  if (cached && !isExpiring(cached)) return cached.accessToken;
381
423