npm - @indigoai-us/hq-cloud - Versions diffs - 5.8.3 → 5.9.1 - Mend

@indigoai-us/hq-cloud 5.8.3 → 5.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/bin/sync-runner.d.ts +43 -2
package/dist/bin/sync-runner.d.ts.map +1 -1
package/dist/bin/sync-runner.js +113 -12
package/dist/bin/sync-runner.js.map +1 -1
package/dist/bin/sync-runner.test.js +136 -2
package/dist/bin/sync-runner.test.js.map +1 -1
package/dist/cognito-auth.d.ts +14 -0
package/dist/cognito-auth.d.ts.map +1 -1
package/dist/cognito-auth.js +43 -1
package/dist/cognito-auth.js.map +1 -1
package/dist/cognito-auth.test.js +83 -0
package/dist/cognito-auth.test.js.map +1 -1
package/package.json +1 -1
package/src/bin/sync-runner.test.ts +149 -2
package/src/bin/sync-runner.ts +174 -12
package/src/cognito-auth.test.ts +99 -0
package/src/cognito-auth.ts +43 -1

package/src/bin/sync-runner.test.ts CHANGED Viewed

@@ -794,7 +794,10 @@ describe("per-company fanout", () => {
     });
     const code = await runRunner(["--companies"], deps);
-    expect(code).toBe(0); // whole fanout still returns 0
+    // Exit 2 on partial fanout (one company errored). The fanout completed
+    // — beta still synced — but the rollup carries `partial: true` and a
+    // non-zero exit signals automated monitors that the run wasn't clean.
+    expect(code).toBe(2);
     // Error event for acme (company-level) with path sentinel "(company)"
     // — error-class events route to stderr.
@@ -820,6 +823,95 @@ describe("per-company fanout", () => {
     expect(betaComplete).toBeDefined();
     expect(betaComplete?.filesDownloaded).toBe(1);
   });
+  /**
+   * Regression test for the rollup-bug from the personal-sync 401 incident.
+   *
+   * Setup: company "personal" has 3 files queued for download. The first two
+   * arrive (emitting `progress` events) and then the sync function throws on
+   * the third (mid-stream 401). Before the fix, the runner's `all-complete`
+   * reported `filesDownloaded: 0` for the whole run because the throwing
+   * company never emitted a clean `complete` and the rollup only summed
+   * companies that did. The fix: walk every company, sum partial counts
+   * captured from progress events, flip `partial: true`, exit non-zero.
+   */
+  it("aborted-mid-stream company contributes its partial counts to all-complete", async () => {
+    const deps = makeDeps({
+      createVaultClient: () =>
+        makeVaultStub({
+          memberships: [{ companyUid: "cmp_personal" }],
+          entityGet: (uid: string) =>
+            Promise.resolve({
+              uid,
+              slug: "personal",
+            } as unknown as EntityInfo),
+        }),
+      sync: vi
+        .fn<(opts: SyncOptions) => Promise<SyncResult>>()
+        .mockImplementationOnce(async (opts: SyncOptions) => {
+          // Two files land before the throw — these counts must show up in
+          // the rollup even though the sync function never returns cleanly.
+          opts.onEvent?.({ type: "progress", path: "notes/a.md", bytes: 100 });
+          opts.onEvent?.({ type: "progress", path: "notes/b.md", bytes: 250 });
+          throw new Error("401 Unauthorized — token expired");
+        }),
+    });
+    const code = await runRunner(["--companies"], deps);
+    expect(code).toBe(2);
+    // 1) The aborted company emits a `complete` event with aborted=true
+    //    and the partial counts that the progress stream captured. Without
+    //    this, consumers walking the `complete` event stream would never
+    //    see the 350 bytes that hit disk.
+    const completeEvents = deps.stdout
+      .events()
+      .filter(
+        (e): e is Extract<RunnerEvent, { type: "complete" }> =>
+          e.type === "complete",
+      );
+    expect(completeEvents).toHaveLength(1);
+    expect(completeEvents[0]).toMatchObject({
+      company: "personal",
+      filesDownloaded: 2,
+      bytesDownloaded: 350,
+      filesUploaded: 0,
+      bytesUploaded: 0,
+      aborted: true,
+    });
+    // 2) The all-complete rollup includes the partial counts and is flagged
+    //    `partial: true` with a per-company breakdown. Before the fix this
+    //    was `filesDownloaded: 0` and there was no `partial` field at all.
+    const all = deps.stdout
+      .events()
+      .find(
+        (e): e is Extract<RunnerEvent, { type: "all-complete" }> =>
+          e.type === "all-complete",
+      );
+    expect(all).toBeDefined();
+    expect(all).toMatchObject({
+      companiesAttempted: 1,
+      filesDownloaded: 2,
+      bytesDownloaded: 350,
+      filesUploaded: 0,
+      bytesUploaded: 0,
+      partial: true,
+      errors: [
+        { company: "personal", message: "401 Unauthorized — token expired" },
+      ],
+    });
+    expect(all?.companies).toEqual([
+      {
+        company: "personal",
+        status: "errored",
+        filesDownloaded: 2,
+        bytesDownloaded: 350,
+        filesUploaded: 0,
+        bytesUploaded: 0,
+      },
+    ]);
+  });
 });
 // ---------------------------------------------------------------------------
@@ -860,6 +952,25 @@ describe("all-complete aggregate", () => {
       bytesUploaded: 0,
       conflictPaths: [],
       errors: [],
+      partial: false,
+      companies: [
+        {
+          company: "acme",
+          status: "complete",
+          filesDownloaded: 3,
+          bytesDownloaded: 100,
+          filesUploaded: 0,
+          bytesUploaded: 0,
+        },
+        {
+          company: "beta",
+          status: "complete",
+          filesDownloaded: 4,
+          bytesDownloaded: 250,
+          filesUploaded: 0,
+          bytesUploaded: 0,
+        },
+      ],
     });
   });
@@ -879,7 +990,8 @@ describe("all-complete aggregate", () => {
     });
     const code = await runRunner(["--companies"], deps);
-    expect(code).toBe(0);
+    // Exit 2 — partial fanout (acme errored, beta clean).
+    expect(code).toBe(2);
     const all = deps.stdout
       .events()
       .find((e) => e.type === "all-complete") as Extract<RunnerEvent, { type: "all-complete" }>;
@@ -887,6 +999,7 @@ describe("all-complete aggregate", () => {
     expect(all.errors).toEqual([
       { company: "acme", message: "acme failed" },
     ]);
+    expect(all.partial).toBe(true);
   });
 });
@@ -1047,6 +1160,12 @@ describe("--direction", () => {
       ["--companies", "--direction", "both"],
       deps,
     );
+    // A clean conflict-abort (push returned aborted: true, no exception)
+    // exits 0 — nothing threw, the user's --on-conflict abort policy just
+    // decided to skip pull. The all-complete event carries `partial: true`
+    // for monitors that want to see "didn't complete cleanly", but the exit
+    // code stays 0 so the Tauri menubar's Sentry alert doesn't fire on what
+    // is normal user-policy behavior.
     expect(code).toBe(0);
     expect(shareSpy).toHaveBeenCalledTimes(1);
     expect(syncSpy).not.toHaveBeenCalled();
@@ -1055,6 +1174,15 @@ describe("--direction", () => {
       .events()
       .find((e) => e.type === "complete") as Extract<RunnerEvent, { type: "complete" }>;
     expect(complete.aborted).toBe(true);
+    const all = deps.stdout
+      .events()
+      .find((e) => e.type === "all-complete") as Extract<RunnerEvent, { type: "all-complete" }>;
+    expect(all.partial).toBe(true);
+    expect(all.companies[0]).toMatchObject({
+      company: "acme",
+      status: "aborted",
+    });
   });
   it("direction=push: passes skipUnchanged and company root path to share()", async () => {
@@ -1131,6 +1259,25 @@ describe("--direction", () => {
       bytesUploaded: 125,
       conflictPaths: [],
       errors: [],
+      partial: false,
+      companies: [
+        {
+          company: "acme",
+          status: "complete",
+          filesDownloaded: 3,
+          bytesDownloaded: 100,
+          filesUploaded: 1,
+          bytesUploaded: 50,
+        },
+        {
+          company: "beta",
+          status: "complete",
+          filesDownloaded: 4,
+          bytesDownloaded: 250,
+          filesUploaded: 2,
+          bytesUploaded: 75,
+        },
+      ],
     });
   });

package/src/bin/sync-runner.ts CHANGED Viewed

@@ -37,8 +37,19 @@
  * of requiring per-event capture calls in the menubar.
  *
  * Exit code:
- *   0 — event stream describes the outcome (including setup-needed)
- *   1 — argv parse error or unrecoverable pre-sync failure
+ *   0 — event stream describes the outcome. The runner finished its protocol
+ *       without any company throwing. Includes setup-needed, auth-error, and
+ *       runs where every company completed OR cleanly returned `aborted: true`
+ *       (a `--on-conflict abort` policy decision is not an error).
+ *   1 — argv parse error or unrecoverable pre-sync failure.
+ *   2 — at least one company threw mid-stream (e.g. mid-fanout 401, network
+ *       reset, S3 5xx after retries). The all-complete event carries
+ *       `partial: true` and per-company partial counts captured from
+ *       `progress` events before the throw, so consumers parsing ndjson see
+ *       what actually transferred. This is distinct from exit 0 with
+ *       `partial: true` (clean conflict-aborts) — exit 2 is "something
+ *       unexpected happened", which the Tauri menubar converts to a Sentry
+ *       alert. Conflict-aborts intentionally do NOT alert.
  */
 import * as os from "os";
@@ -168,6 +179,36 @@ export type RunnerEvent =
        */
       conflictPaths: Array<{ company: string; path: string; direction: "pull" | "push" }>;
       errors: Array<{ company: string; message: string }>;
+      /**
+       * True when at least one company in the fanout did not complete cleanly
+       * — either it returned `aborted: true` (e.g. conflict-abort) or its sync
+       * function threw mid-stream (e.g. mid-fanout 401). When `partial: true`,
+       * the totals above include partial counts captured from `progress` events
+       * before the abort, NOT just companies that emitted a clean `complete`.
+       *
+       * Automated monitors should check this field — `errors.length > 0` alone
+       * isn't sufficient because a `aborted: true` return doesn't push to
+       * `errors` (it's a clean conflict-abort, not an exception).
+       */
+      partial: boolean;
+      /**
+       * Per-company breakdown of the fanout. Always present, one entry per
+       * planned company, in fanout order. Lets consumers reconcile per-company
+       * partial counts with the aggregate without re-walking `complete` /
+       * `error` event streams. The `status` field is the canonical signal:
+       * - "complete" — sync returned cleanly, `aborted: false`
+       * - "aborted"  — sync returned cleanly with `aborted: true` (conflict-abort)
+       * - "errored"  — sync threw mid-stream; counts are sourced from progress
+       *                events seen before the throw
+       */
+      companies: Array<{
+        company: string;
+        status: "complete" | "aborted" | "errored";
+        filesDownloaded: number;
+        bytesDownloaded: number;
+        filesUploaded: number;
+        bytesUploaded: number;
+      }>;
     };
 /**
@@ -533,17 +574,56 @@ export async function runRunner(
   const shareFn = deps.share ?? defaultShare;
   const doPush = parsed.direction === "push" || parsed.direction === "both";
   const doPull = parsed.direction === "pull" || parsed.direction === "both";
-  let totalDownloaded = 0;
-  let totalDownloadedBytes = 0;
-  let totalUploaded = 0;
-  let totalUploadedBytes = 0;
   const errors: Array<{ company: string; message: string }> = [];
   const allConflicts: Array<{ company: string; path: string; direction: "pull" | "push" }> = [];
+  // Per-company state, keyed by the company label (slug or UID-fallback) so
+  // both `progress` (which streams) and `complete`/throw (which lands once)
+  // can update the same row. The rollup at the bottom of the function walks
+  // every entry — this is the source of truth that closes the bug where an
+  // aborted company's partial counts were dropped from `all-complete`.
+  //
+  // We seed `direction` from the parsed flag so we know whether a `progress`
+  // event without a clear phase should bump downloaded or uploaded counters.
+  // For `direction: "both"` runs we lean on the path of the in-flight phase
+  // — push runs first and sets `phaseRef.current = "push"` while shareFn runs,
+  // pull sets it to "pull". The closure shared by tagAndEmit reads `.current`
+  // at event time, so progress events route to the right column.
+  type CompanyStatus = "complete" | "aborted" | "errored";
+  interface CompanyState {
+    company: string;
+    status: CompanyStatus;
+    filesDownloaded: number;
+    bytesDownloaded: number;
+    filesUploaded: number;
+    bytesUploaded: number;
+  }
+  const stateByCompany = new Map<string, CompanyState>();
   for (const target of plan) {
     const companyLabel = target.slug;
+    const state: CompanyState = {
+      company: companyLabel,
+      // Default to "errored" so a throw before any complete-or-clean-abort
+      // path (the original bug) leaves the entry flagged as not-clean. The
+      // success/clean-abort paths overwrite this before the loop body exits.
+      status: "errored",
+      filesDownloaded: 0,
+      bytesDownloaded: 0,
+      filesUploaded: 0,
+      bytesUploaded: 0,
+    };
+    stateByCompany.set(companyLabel, state);
+    // Which phase is currently emitting `progress` events. Mutable closure so
+    // tagAndEmit (defined once below) reads the latest value when each event
+    // fires. "pull" is the default for back-compat with pull-only runs.
+    let activePhase: "pull" | "push" = doPush && !doPull ? "push" : "pull";
     // Per-company event tagger — shared by push and pull phases so progress
     // rows land on the right company regardless of which phase emitted them.
+    // Also updates `state` for `progress` events so the rollup has accurate
+    // partial counts even if the sync function throws before returning.
     const tagAndEmit = (event: SyncProgressEvent): void => {
       if (event.type === "plan") {
         emit({
@@ -557,6 +637,13 @@ export async function runRunner(
           filesToConflict: event.filesToConflict,
         });
       } else if (event.type === "progress") {
+        if (activePhase === "push") {
+          state.filesUploaded += 1;
+          state.bytesUploaded += event.bytes;
+        } else {
+          state.filesDownloaded += 1;
+          state.bytesDownloaded += event.bytes;
+        }
         emit({
           type: "progress",
           company: companyLabel,
@@ -604,6 +691,7 @@ export async function runRunner(
       // point with `skipUnchanged` so we don't re-upload files that haven't
       // changed since the last sync.
       if (doPush) {
+        activePhase = "push";
         pushResult = await shareFn({
           paths: [path.join(parsed.hqRoot, "companies", target.slug)],
           company: target.uid,
@@ -619,6 +707,7 @@ export async function runRunner(
       // the user has local edits + remote drift; blindly pulling would erase
       // whichever side `--on-conflict abort` just protected.
       if (doPull && !pushResult.aborted) {
+        activePhase = "pull";
         pullResult = await syncFn({
           company: target.uid,
           vaultConfig,
@@ -637,6 +726,20 @@ export async function runRunner(
         ...pullResult.conflictPaths,
         ...pushResult.conflictPaths,
       ];
+      const aborted = pullResult.aborted || pushResult.aborted;
+      // Overwrite the progress-derived counts with the authoritative numbers
+      // from the sync/share return values. The `progress` stream over-counts
+      // when the inner walker emits a progress row for a file it then skips
+      // due to a journal hit — a clean return value is the source of truth.
+      // For the throw case below this overwrite never runs, so `state` keeps
+      // its progress-derived counts (which is exactly what we want there).
+      state.filesDownloaded = pullResult.filesDownloaded;
+      state.bytesDownloaded = pullResult.bytesDownloaded;
+      state.filesUploaded = pushResult.filesUploaded;
+      state.bytesUploaded = pushResult.bytesUploaded;
+      state.status = aborted ? "aborted" : "complete";
       emit({
         type: "complete",
         company: companyLabel,
@@ -654,7 +757,7 @@ export async function runRunner(
         conflictPaths: mergedConflictPaths,
         // Either phase aborting marks the company aborted — the UI treats
         // `aborted: true` as "sync didn't complete cleanly for this company".
-        aborted: pullResult.aborted || pushResult.aborted,
+        aborted,
       });
       for (const p of pullResult.conflictPaths) {
         allConflicts.push({ company: companyLabel, path: p, direction: "pull" });
@@ -662,13 +765,28 @@ export async function runRunner(
       for (const p of pushResult.conflictPaths) {
         allConflicts.push({ company: companyLabel, path: p, direction: "push" });
       }
-      totalDownloaded += pullResult.filesDownloaded;
-      totalDownloadedBytes += pullResult.bytesDownloaded;
-      totalUploaded += pushResult.filesUploaded;
-      totalUploadedBytes += pushResult.bytesUploaded;
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
       errors.push({ company: companyLabel, message });
+      // `state.status` was seeded as "errored" at loop entry — the throw
+      // path leaves it there, and `state.files{Down,Up}loaded` reflects the
+      // partial counts captured from `progress` events before the throw.
+      // Emit a `complete` event with `aborted: true` and those partial
+      // counts so consumers walking the `complete` event stream see every
+      // company in the fanout uniformly. This is the fix for the misleading
+      // rollup — see file header `Exit code: 2` doc.
+      emit({
+        type: "complete",
+        company: companyLabel,
+        filesDownloaded: state.filesDownloaded,
+        bytesDownloaded: state.bytesDownloaded,
+        filesUploaded: state.filesUploaded,
+        bytesUploaded: state.bytesUploaded,
+        filesSkipped: 0,
+        conflicts: 0,
+        conflictPaths: [],
+        aborted: true,
+      });
       emit({
         type: "error",
         company: companyLabel,
@@ -679,6 +797,43 @@ export async function runRunner(
     }
   }
+  // Walk every per-company entry — the map holds one row per planned company,
+  // including ones that aborted via thrown exception. This is the fix for the
+  // bug where `all-complete` reported `filesDownloaded: 0` for an aborted
+  // personal-sync that had already emitted thousands of `progress` events:
+  // the rollup used to only sum companies that emitted a clean `complete`,
+  // which silently dropped partials when the sync function threw.
+  let totalDownloaded = 0;
+  let totalDownloadedBytes = 0;
+  let totalUploaded = 0;
+  let totalUploadedBytes = 0;
+  let partial = false;
+  const companies: Array<{
+    company: string;
+    status: CompanyStatus;
+    filesDownloaded: number;
+    bytesDownloaded: number;
+    filesUploaded: number;
+    bytesUploaded: number;
+  }> = [];
+  for (const target of plan) {
+    const s = stateByCompany.get(target.slug);
+    if (!s) continue; // unreachable — every plan entry seeds the map
+    totalDownloaded += s.filesDownloaded;
+    totalDownloadedBytes += s.bytesDownloaded;
+    totalUploaded += s.filesUploaded;
+    totalUploadedBytes += s.bytesUploaded;
+    if (s.status !== "complete") partial = true;
+    companies.push({
+      company: s.company,
+      status: s.status,
+      filesDownloaded: s.filesDownloaded,
+      bytesDownloaded: s.bytesDownloaded,
+      filesUploaded: s.filesUploaded,
+      bytesUploaded: s.bytesUploaded,
+    });
+  }
   emit({
     type: "all-complete",
     companiesAttempted: plan.length,
@@ -688,8 +843,15 @@ export async function runRunner(
     bytesUploaded: totalUploadedBytes,
     conflictPaths: allConflicts,
     errors,
+    partial,
+    companies,
   });
-  return 0;
+  // Exit 2 only when something actually threw (`errors.length > 0`). A clean
+  // conflict-abort sets `partial: true` in the JSON but exits 0 — the Tauri
+  // menubar's non-zero-exit Sentry capture would otherwise fire for normal
+  // user-policy outcomes. Consumers that want to flag any non-clean outcome
+  // (clean-abort + thrown-error) read `partial` from the JSON.
+  return errors.length > 0 ? 2 : 0;
 }
 // ---------------------------------------------------------------------------

package/src/cognito-auth.test.ts CHANGED Viewed

@@ -99,6 +99,105 @@ describe("isExpiring — expiresAt shape tolerance", () => {
   });
 });
+// ---------------------------------------------------------------------------
+// Stale-pool detection — decodeAccessTokenClientId + getValidAccessToken
+// self-evicts cached tokens minted by a different App Client (e.g. dev pool
+// tokens left over from before the 2026-04-25 cutover).
+// ---------------------------------------------------------------------------
+/** Build a minimal unsigned JWT carrying the given claims. Cognito's real */
+/** tokens are RS256-signed; we don't verify here so the signature can be     */
+/** anything — only the base64url-encoded payload matters.                    */
+function makeAccessToken(claims: Record<string, unknown>): string {
+  const header = Buffer.from(JSON.stringify({ alg: "RS256", typ: "JWT" }))
+    .toString("base64")
+    .replace(/=+$/, "");
+  const payload = Buffer.from(JSON.stringify(claims))
+    .toString("base64")
+    .replace(/=+$/, "");
+  return `${header}.${payload}.signature`;
+}
+const DEV_CLIENT = "4mmujmjq3srakdueg656b9m0mp";
+const PROD_CLIENT = "7acei2c8v870enheptb1j5foln";
+const baseConfig = {
+  region: "us-east-1",
+  userPoolDomain: "vault-indigo-hq-prod",
+  clientId: PROD_CLIENT,
+};
+describe("decodeAccessTokenClientId", () => {
+  it("returns the client_id claim from a well-formed JWT", async () => {
+    const { decodeAccessTokenClientId } = await importModule();
+    const token = makeAccessToken({ client_id: DEV_CLIENT, sub: "abc" });
+    expect(decodeAccessTokenClientId(token)).toBe(DEV_CLIENT);
+  });
+  it("returns null when client_id is absent", async () => {
+    const { decodeAccessTokenClientId } = await importModule();
+    const token = makeAccessToken({ sub: "abc" });
+    expect(decodeAccessTokenClientId(token)).toBeNull();
+  });
+  it("returns null when the token has fewer than two segments", async () => {
+    const { decodeAccessTokenClientId } = await importModule();
+    expect(decodeAccessTokenClientId("not-a-jwt")).toBeNull();
+  });
+  it("returns null when the payload isn't valid JSON", async () => {
+    const { decodeAccessTokenClientId } = await importModule();
+    expect(decodeAccessTokenClientId("aaa.bbb.ccc")).toBeNull();
+  });
+});
+describe("getValidAccessToken stale-pool detection", () => {
+  it("evicts a cached token whose client_id mismatches the current config", async () => {
+    const { saveCachedTokens, loadCachedTokens, getValidAccessToken } =
+      await importModule();
+    const devToken = makeAccessToken({ client_id: DEV_CLIENT, sub: "abc" });
+    saveCachedTokens({
+      ...baseTokens,
+      accessToken: devToken,
+      expiresAt: Date.now() + 60 * 60 * 1000,
+    });
+    expect(loadCachedTokens()).not.toBeNull();
+    await expect(
+      getValidAccessToken(baseConfig, { interactive: false }),
+    ).rejects.toThrow(/No valid HQ session/);
+    expect(loadCachedTokens()).toBeNull();
+  });
+  it("keeps a cached token whose client_id matches", async () => {
+    const { saveCachedTokens, getValidAccessToken } = await importModule();
+    const prodToken = makeAccessToken({ client_id: PROD_CLIENT, sub: "abc" });
+    saveCachedTokens({
+      ...baseTokens,
+      accessToken: prodToken,
+      expiresAt: Date.now() + 60 * 60 * 1000,
+    });
+    const access = await getValidAccessToken(baseConfig, {
+      interactive: false,
+    });
+    expect(access).toBe(prodToken);
+  });
+  it("keeps a cached token when client_id can't be decoded (back-compat)", async () => {
+    const { saveCachedTokens, getValidAccessToken } = await importModule();
+    saveCachedTokens({
+      ...baseTokens,
+      accessToken: "opaque-non-jwt",
+      expiresAt: Date.now() + 60 * 60 * 1000,
+    });
+    const access = await getValidAccessToken(baseConfig, {
+      interactive: false,
+    });
+    expect(access).toBe("opaque-non-jwt");
+  });
+});
 // ---------------------------------------------------------------------------
 // Round-trip: writers emit epoch-ms, readers read epoch-ms
 // ---------------------------------------------------------------------------

package/src/cognito-auth.ts CHANGED Viewed

@@ -120,6 +120,33 @@ export function isExpiring(tokens: CognitoTokens, bufferSeconds = 60): boolean {
   return expiresAt - Date.now() < bufferSeconds * 1000;
 }
+/**
+ * Decode the `client_id` claim from a Cognito access token (no signature
+ * verification — we only need to identify which App Client minted it).
+ * Returns null when the token can't be parsed.
+ *
+ * Used by `getValidAccessToken` to detect stale cached sessions that target
+ * a different Cognito App Client. The canonical case is a pre-2026-04-25
+ * cache file holding a `hq-vault-dev` token after the user upgraded to a
+ * post-cutover CLI: the access token stays "non-expiring" for an hour but
+ * the prod vault API rejects it with 401, and the dev refresh token can't
+ * be exchanged at the prod token endpoint. Detecting the mismatch and
+ * forcing a re-login is the only safe self-heal.
+ */
+export function decodeAccessTokenClientId(accessToken: string): string | null {
+  try {
+    const parts = accessToken.split(".");
+    if (parts.length < 2) return null;
+    const payloadB64 = parts[1];
+    const padded = payloadB64 + "=".repeat((4 - (payloadB64.length % 4)) % 4);
+    const json = Buffer.from(padded, "base64").toString("utf-8");
+    const claims = JSON.parse(json) as { client_id?: unknown };
+    return typeof claims.client_id === "string" ? claims.client_id : null;
+  } catch {
+    return null;
+  }
+}
 // ---------------------------------------------------------------------------
 // PKCE
 // ---------------------------------------------------------------------------
@@ -375,7 +402,22 @@ export async function getValidAccessToken(
   options: { interactive?: boolean } = {},
 ): Promise<string> {
   const interactive = options.interactive ?? true;
-  const cached = loadCachedTokens();
+  let cached = loadCachedTokens();
+  // Stale-pool detection: if the cached access token was issued by a
+  // different Cognito App Client than the one we're talking to now, drop the
+  // cache and re-authenticate. Without this, a user holding a pre-cutover
+  // dev-pool token would either keep using a token the prod API rejects
+  // with 401, or attempt a refresh against the prod token endpoint with a
+  // dev refresh token (InvalidGrant). See `decodeAccessTokenClientId` for
+  // the full rationale.
+  if (cached) {
+    const cachedClientId = decodeAccessTokenClientId(cached.accessToken);
+    if (cachedClientId !== null && cachedClientId !== config.clientId) {
+      clearCachedTokens();
+      cached = null;
+    }
+  }
   if (cached && !isExpiring(cached)) return cached.accessToken;