npm - vidspotai-shared - Versions diffs - 1.0.90 → 1.0.92 - Mend

vidspotai-shared 1.0.90 → 1.0.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/lib/services/aiGen/providers/google/google.service.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAelB,qBAAa,aAAc,SAAQ,wBAAwB;IAKzD,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAK;;IAQ/C;;;;;;;;;OASG;IACG,YAAY,CAChB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC;IAoChC;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAoCvB;;;;OAIG;YACW,kBAAkB;~~IA+B1B~~,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;~~IA0M3B~~,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAiH3C,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;YAiBnB,cAAc;IAwG5B;;;;;;OAMG;IACH;;;;OAIG;IACG,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAIjC,aAAa,CAAC,EAAE,QAAQ,EAAE,QAAY,EAAE,UAAmB,EAAE,SAAiB,EAAE,SAAa,EAAE,SAAS,EAAE,WAAW,EAAE,EAAE,iBAAiB,GAAG,MAAM;CA8BpJ"}
1	+ {"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAelB,qBAAa,aAAc,SAAQ,wBAAwB;IAKzD,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAK;;IAQ/C;;;;;;;;;OASG;IACG,YAAY,CAChB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC;IAoChC;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAoCvB;;;;OAIG;YACW,kBAAkB;IAsC1B,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IA0O3B,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAiH3C,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;YAiBnB,cAAc;IAwG5B;;;;;;OAMG;IACH;;;;OAIG;IACG,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAIjC,aAAa,CAAC,EAAE,QAAQ,EAAE,QAAY,EAAE,UAAmB,EAAE,SAAiB,EAAE,SAAa,EAAE,SAAS,EAAE,WAAW,EAAE,EAAE,iBAAiB,GAAG,MAAM;CA8BpJ"}

package/lib/services/aiGen/providers/google/google.service.js CHANGED Viewed

@@ -122,8 +122,14 @@ class GoogleService extends baseAiGenProvider_service_1.BaseAiGenProviderService
                     // typed UserFacingError so the scene processor stores a translatable
                     // code instead of the raw provider JSON.
                     const userFacing = (0, googleErrors_1.classifyGoogleApiError)(err);
-                    if (userFacing)
+                    if (userFacing) {
+                        // Preserve the ORIGINAL provider error so the key-pool failover
+                        // (classifyGoogleKeyHealth) can still read the underlying
+                        // status/httpCode after this conversion. Without it the failover saw
+                        // only the friendly string and never failed over to the second key.
+                        userFacing.cause = err;
                         throw userFacing;
+                    }
                     throw err;
                 }
                 const backoffMs = 1000 * 2 ** (attempt - 1); // 1s, 2s, 4s
@@ -269,32 +275,62 @@ class GoogleService extends baseAiGenProvider_service_1.BaseAiGenProviderService
             catch (err) {
                 lastErr = err;
                 const keyFailure = (0, googleErrors_1.classifyGoogleKeyHealth)(err);
-                // Not an account-level failure (e.g. content rejection, bad params,
-                // transient RPM 429) → don't burn other accounts on it; surface as-is.
+                // Not a fail-over-able failure (e.g. content rejection, bad params) →
+                // don't burn other accounts on it; surface as-is.
                 if (!keyFailure)
                     throw err;
-                // Account-level failure: circuit-break this key and try the next one.
-                // (No operation was created above, so this account wasn't charged.)
-                await this.keyPool.markDepleted(keyEntry.id, keyFailure.ttlSeconds, keyFailure.reason);
-                logger_1.logger.warn("Google Veo: key account failure — failing over", {
+                // Observability: log the RAW provider error once so we can see whether
+                // this was an RPM vs RPD limit and on which key (AI Studio's dashboard
+                // lags real-time, so this is our authoritative signal — Ammar 2026-06-20).
+                const rawCause = err.cause;
+                logger_1.logger.warn("Google Veo: key submit failed — failing over to next key", {
                     modelKey: params.modelKey,
                     keyId: keyEntry.id,
                     kind: keyFailure.kind,
                     attempt: attempt + 1,
                     poolSize: this.keyPool.size,
+                    rawError: rawCause instanceof Error
+                        ? rawCause.message.slice(0, 300)
+                        : err instanceof Error
+                            ? err.message.slice(0, 300)
+                            : String(err),
                 });
+                // Circuit-break ONLY persistent account faults (billing depleted / auth
+                // revoked) — those keep failing on every call until a human fixes them.
+                // A `quota` (RPM/RPD) limit is transient and key-specific: the per-key
+                // routing counters already steer new jobs away until it resets, so a
+                // persistent circuit-break would needlessly idle the key. We still fail
+                // over within THIS job via the `tried` set below.
+                if (keyFailure.kind !== "quota") {
+                    await this.keyPool.markDepleted(keyEntry.id, keyFailure.ttlSeconds, keyFailure.reason);
+                }
                 if (isLastUsableKey || tried.size >= this.keyPool.size) {
-                    // No untried key left — EVERY Google billing account is down. This is
-                    // a platform-wide outage (the 2026-06-18 Veo-depletion class), not a
-                    // per-job blip: page Slack at the source with full context so on-call
-                    // sees it immediately rather than relying on downstream classification.
-                    logger_1.logger.error("Google Veo: ALL keys exhausted — every account failing over", {
-                        modelKey: params.modelKey,
-                        kind: keyFailure.kind,
-                        reason: keyFailure.reason,
-                        triedKeys: [...tried],
-                        poolSize: this.keyPool.size,
-                    });
+                    // No untried key left.
+                    if (keyFailure.kind === "quota") {
+                        // Every key is momentarily rate-limited — expected capacity pressure,
+                        // NOT a platform outage. Stay at warn (non-paging) and surface a
+                        // VIDEO_PROVIDER_RATE_LIMITED so the scene fails cleanly + refunds
+                        // (and the job-start capacity selector already had its chance to
+                        // spill to another provider). Do NOT page on-call for this.
+                        logger_1.logger.warn("Google Veo: all keys rate-limited — no key with budget right now", {
+                            modelKey: params.modelKey,
+                            reason: keyFailure.reason,
+                            triedKeys: [...tried],
+                            poolSize: this.keyPool.size,
+                        });
+                    }
+                    else {
+                        // EVERY Google billing account is down (depleted / revoked). This is
+                        // a platform-wide outage (the 2026-06-18 Veo-depletion class): page
+                        // Slack at the source with full context so on-call sees it immediately.
+                        logger_1.logger.error("Google Veo: ALL keys exhausted — every account failing over", {
+                            modelKey: params.modelKey,
+                            kind: keyFailure.kind,
+                            reason: keyFailure.reason,
+                            triedKeys: [...tried],
+                            poolSize: this.keyPool.size,
+                        });
+                    }
                     // Propagate so the job surfaces a friendly error + refunds, and the
                     // job-level provider fallback can spill to another provider.
                     throw err;

package/lib/services/aiGen/providers/google/googleApiKeys.d.ts CHANGED Viewed

@@ -12,13 +12,17 @@
  *   - key[0] = the NEW key (vidspotai project), currently **Tier 1** (2/min,
  *     10/day per Veo model). Used FIRST, deliberately, to drive usage and
  *     promote its billing account up the tier ladder.
- *   - key[1] = the CURRENT key, **Tier 2** (4/min, 50/day). Used once key[0]
- *     is out of per-minute / per-day budget.
- * Aggregate Veo budget = T1 + T2 = 6/min, 60/day. When BOTH are exhausted, the
- * job-start capacity selector (videoJobProcessor) spills to another provider.
+ *   - key[1] = the overflow key, **Tier 3** (10/min, 500/day) as of 2026-06-20
+ *     (was Tier 2 4/50; its billing account was promoted). Used once key[0]
+ *     is out of per-minute / per-day budget, OR immediately as failover when a
+ *     key[0] submit returns a per-key 429 (see google.service generateVideo).
+ * Aggregate Veo budget = T1 + T3 = 12/min, 510/day (GOOGLE_API_KEY_TIERS=1,3 on
+ * Doppler prd + Railway). When BOTH are exhausted, the job-start capacity
+ * selector (videoJobProcessor) spills to another provider, else the scene fails
+ * cleanly with VIDEO_PROVIDER_RATE_LIMITED (refunded, non-paging).
  *
- * When key[1]'s account is billed it moves to Tier 3; bump its tier in
- * GOOGLE_API_KEY_TIERS then (no code change — the ladder below handles it).
+ * Tier changes need NO code change — bump GOOGLE_API_KEY_TIERS and the ladder
+ * below rescales both the per-key router caps and the aggregate model-level gate.
  *
  * Vertex AI (DEFERRED, on record for the future): Veo is also available via
  * Vertex, where quota is **per-project** and the current billing account is

package/lib/services/aiGen/providers/google/googleApiKeys.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"googleApiKeys.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleApiKeys.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG~~;AAoBH,8EAA8E;AAC9E,wBAAgB,kBAAkB,IAAI,MAAM,EAAE,CAa7C;AAED,yEAAyE;AACzE,wBAAgB,sBAAsB,IAAI,MAAM,EAAE,CAUjD;AAED,8EAA8E;AAC9E,wBAAgB,iBAAiB,IAAI,MAAM,CAE1C;AAED,6DAA6D;AAC7D,wBAAgB,iBAAiB,CAC/B,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,KAAK,GAAG,KAAK,GAClB,MAAM,CAGR;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,CAIjE;AAED,uDAAuD;AACvD,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAEjD;AAID;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAE5F;AAED,iFAAiF;AACjF,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,CAAA;CAAE,CAIrF"}
1	+ {"version":3,"file":"googleApiKeys.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleApiKeys.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AAoBH,8EAA8E;AAC9E,wBAAgB,kBAAkB,IAAI,MAAM,EAAE,CAa7C;AAED,yEAAyE;AACzE,wBAAgB,sBAAsB,IAAI,MAAM,EAAE,CAUjD;AAED,8EAA8E;AAC9E,wBAAgB,iBAAiB,IAAI,MAAM,CAE1C;AAED,6DAA6D;AAC7D,wBAAgB,iBAAiB,CAC/B,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,KAAK,GAAG,KAAK,GAClB,MAAM,CAGR;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,CAIjE;AAED,uDAAuD;AACvD,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAEjD;AAID;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAE5F;AAED,iFAAiF;AACjF,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,CAAA;CAAE,CAIrF"}

package/lib/services/aiGen/providers/google/googleApiKeys.js CHANGED Viewed

@@ -13,13 +13,17 @@
  *   - key[0] = the NEW key (vidspotai project), currently **Tier 1** (2/min,
  *     10/day per Veo model). Used FIRST, deliberately, to drive usage and
  *     promote its billing account up the tier ladder.
- *   - key[1] = the CURRENT key, **Tier 2** (4/min, 50/day). Used once key[0]
- *     is out of per-minute / per-day budget.
- * Aggregate Veo budget = T1 + T2 = 6/min, 60/day. When BOTH are exhausted, the
- * job-start capacity selector (videoJobProcessor) spills to another provider.
+ *   - key[1] = the overflow key, **Tier 3** (10/min, 500/day) as of 2026-06-20
+ *     (was Tier 2 4/50; its billing account was promoted). Used once key[0]
+ *     is out of per-minute / per-day budget, OR immediately as failover when a
+ *     key[0] submit returns a per-key 429 (see google.service generateVideo).
+ * Aggregate Veo budget = T1 + T3 = 12/min, 510/day (GOOGLE_API_KEY_TIERS=1,3 on
+ * Doppler prd + Railway). When BOTH are exhausted, the job-start capacity
+ * selector (videoJobProcessor) spills to another provider, else the scene fails
+ * cleanly with VIDEO_PROVIDER_RATE_LIMITED (refunded, non-paging).
  *
- * When key[1]'s account is billed it moves to Tier 3; bump its tier in
- * GOOGLE_API_KEY_TIERS then (no code change — the ladder below handles it).
+ * Tier changes need NO code change — bump GOOGLE_API_KEY_TIERS and the ladder
+ * below rescales both the per-key router caps and the aggregate model-level gate.
  *
  * Vertex AI (DEFERRED, on record for the future): Veo is also available via
  * Vertex, where quota is **per-project** and the current billing account is

package/lib/services/aiGen/providers/google/googleErrors.d.ts CHANGED Viewed

@@ -10,14 +10,16 @@ import { UserFacingError } from "../../../../utils/errors";
  * error instead of raw provider JSON that pages Slack as a platform bug.
  */
 export declare function classifyGoogleApiError(err: any): UserFacingError | null;
-/** A key-level (billing / auth) failure that warrants failing over to the next
- *  key in the pool and circuit-breaking the failed one for `ttlSeconds`. */
+/** A key-level failure that warrants failing over to the next key in the pool.
+ *  `billing`/`auth` also circuit-break the failed key for `ttlSeconds`; `quota`
+ *  (a per-key RPM/RPD limit) fails over WITHOUT a persistent circuit-break —
+ *  the per-key routing counters already steer traffic away until it resets. */
 export interface GoogleKeyHealthFailure {
-    /** "billing" | "auth" — the class of account problem, for logs + TTL choice. */
-    kind: "billing" | "auth";
+    /** Class of key problem, for logs + TTL + paging policy. */
+    kind: "billing" | "auth" | "quota";
     /** Short human reason, persisted as the circuit-breaker value (Loki/debug). */
     reason: string;
-    /** How long to skip this key before re-probing it. */
+    /** How long to skip this key before re-probing it (only used for billing/auth). */
     ttlSeconds: number;
 }
 /**
@@ -26,16 +28,21 @@ export interface GoogleKeyHealthFailure {
  * per-minute rate limit or a content/validation error.
  *
  * Returns a failure descriptor when the pool should fail over to the next key
- * and circuit-break this one; null otherwise (caller handles normally).
+ * (and, for billing/auth, circuit-break this one); null otherwise (a genuine
+ * content/param error the caller surfaces as-is).
  *
  * IMPORTANT (cost safety): this is only consulted on a THROWN submit — i.e. no
  * Veo operation was created, so the account was NOT billed. Failing over to a
  * second billing account therefore cannot double-charge for the same job.
  *
- * Deliberately does NOT match a bare per-minute 429 / RESOURCE_EXHAUSTED with
- * no billing signal: that's our own RPM cap, not an account outage, and the
- * existing quota router + provider-fallback chain already handle it. Failing
- * over on every RPM blip would needlessly drain the reserve account.
+ * ⚠️ The submit path wraps the raw provider error in a UserFacingError BEFORE
+ * this runs (withTransientRetry → classifyGoogleApiError), so we MUST unwrap:
+ * read the original error off `.cause` for the JSON status/code, and use the
+ * UserFacingError's own `.code` as a secondary signal. The previous version
+ * only read `err.message` — which by this point was the friendly canned string
+ * — so it matched nothing and the failover to the second key NEVER fired. That
+ * (plus the billing misclassification above) is why two funded keys behaved
+ * like one and jobs failed instead of retrying on the overflow key.
  */
 export declare function classifyGoogleKeyHealth(err: any): GoogleKeyHealthFailure | null;
 //# sourceMappingURL=googleErrors.d.ts.map

package/lib/services/aiGen/providers/google/googleErrors.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"googleErrors.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleErrors.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EAGhB,MAAM,0BAA0B,CAAC;AAElC;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,GAAG,GAAG,eAAe,GAAG,IAAI,~~CA4IvE~~;AAED;~~4EAC4E;AAC5E~~,MAAM,WAAW,sBAAsB;IACrC,~~gFAAgF~~;~~IAChF~~,IAAI,EAAE,SAAS,GAAG,MAAM,CAAC;~~IACzB~~,+EAA+E;IAC/E,MAAM,EAAE,MAAM,CAAC;IACf,~~sDAAsD~~;~~IACtD~~,UAAU,EAAE,MAAM,CAAC;CACpB;~~AAUD;;;;;;;;;;;;;;;;GAgBG~~;AACH,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,GAAG,GAAG,sBAAsB,GAAG,IAAI,~~CA4C~~/E"}
1	+ {"version":3,"file":"googleErrors.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleErrors.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EAGhB,MAAM,0BAA0B,CAAC;AAElC;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,GAAG,GAAG,eAAe,GAAG,IAAI,CAoJvE;AAED;;;+EAG+E;AAC/E,MAAM,WAAW,sBAAsB;IACrC,4DAA4D;IAC5D,IAAI,EAAE,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;IACnC,+EAA+E;IAC/E,MAAM,EAAE,MAAM,CAAC;IACf,mFAAmF;IACnF,UAAU,EAAE,MAAM,CAAC;CACpB;AAcD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,GAAG,GAAG,sBAAsB,GAAG,IAAI,CAuE/E"}

package/lib/services/aiGen/providers/google/googleErrors.js CHANGED Viewed

@@ -22,13 +22,21 @@ function classifyGoogleApiError(err) {
         const status = inner.status;
         const msg = inner.message ?? raw;
         // Billing depletion / billing-not-enabled. Veo surfaces this as
-        // RESOURCE_EXHAUSTED / 429 but with a billing-specific message ("Your
-        // prepayment credits are depleted… manage your project and billing") or as
-        // FAILED_PRECONDITION. This is an ACCOUNT problem, NOT a per-minute rate
-        // limit — give it the distinct ACCOUNT_QUOTA_EXCEEDED code (which DOES page
-        // Slack) and a SAFE message. Never echo the raw provider text: it tells the
-        // end user to go manage OUR Google AI Studio billing (the bug we're fixing).
-        const billingSignal = /prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|\bbilling\b|free tier is not available|check your plan and billing/i.test(msg) || status === "FAILED_PRECONDITION";
+        // RESOURCE_EXHAUSTED / 429 but with a billing-DEPLETION-specific message
+        // ("Your prepayment credits are depleted… manage your project and billing")
+        // or as FAILED_PRECONDITION. This is an ACCOUNT problem, NOT a per-minute /
+        // per-day rate limit — give it the distinct ACCOUNT_QUOTA_EXCEEDED code
+        // (which DOES page Slack) and a SAFE message.
+        //
+        // ⚠️ NARROWED 2026-06-20 (prod incident): Google's ORDINARY quota 429 also
+        // reads "You exceeded your current quota, please check your plan and billing
+        // details" — so matching bare `billing` / `check your plan and billing` here
+        // mislabeled every Veo rate limit as ACCOUNT_QUOTA_EXCEEDED. That paged Slack
+        // on funded accounts AND (because the key-pool failover deliberately skips
+        // billing faults) suppressed the failover to the second key. Match ONLY the
+        // depletion-specific phrases now; a bare 429/RESOURCE_EXHAUSTED with no
+        // depletion signal falls through to the rate-limit branch below.
+        const billingSignal = /prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|free tier is not available|billing account (?:is )?(?:closed|suspended|disabled|not (?:active|configured))/i.test(msg) || status === "FAILED_PRECONDITION";
         if (billingSignal) {
             return new errors_1.UserFacingError((0, errors_1.friendlyMessageForCode)(errors_1.USER_FACING_ERROR_CODES.ACCOUNT_QUOTA_EXCEEDED), errors_1.USER_FACING_ERROR_CODES.ACCOUNT_QUOTA_EXCEEDED);
         }
@@ -118,28 +126,44 @@ function classifyGoogleApiError(err) {
 // dead account (and to route the whole pipeline onto the healthy key), short
 // enough that a top-up recovers within half an hour. Auth/permission problems
 // (revoked / not-yet-propagated key, disabled API) re-probe faster — they're
-// often a transient config rollout — at 15 min.
+// often a transient config rollout — at 15 min. A per-key rate limit is not an
+// account outage (the key recovers as its minute/day window rolls), so its
+// "cooloff" is short and mostly nominal — the in-job failover skips the key via
+// the `tried` set, and cross-job routing is handled by the per-key counters.
 const BILLING_CIRCUIT_TTL_S = 30 * 60;
 const AUTH_CIRCUIT_TTL_S = 15 * 60;
+const QUOTA_CIRCUIT_TTL_S = 60;
 /**
  * Classifies whether a Veo *submit* failure is an ACCOUNT-LEVEL problem with
  * THIS key (billing exhausted / auth revoked) — distinct from a transient
  * per-minute rate limit or a content/validation error.
  *
  * Returns a failure descriptor when the pool should fail over to the next key
- * and circuit-break this one; null otherwise (caller handles normally).
+ * (and, for billing/auth, circuit-break this one); null otherwise (a genuine
+ * content/param error the caller surfaces as-is).
  *
  * IMPORTANT (cost safety): this is only consulted on a THROWN submit — i.e. no
  * Veo operation was created, so the account was NOT billed. Failing over to a
  * second billing account therefore cannot double-charge for the same job.
  *
- * Deliberately does NOT match a bare per-minute 429 / RESOURCE_EXHAUSTED with
- * no billing signal: that's our own RPM cap, not an account outage, and the
- * existing quota router + provider-fallback chain already handle it. Failing
- * over on every RPM blip would needlessly drain the reserve account.
+ * ⚠️ The submit path wraps the raw provider error in a UserFacingError BEFORE
+ * this runs (withTransientRetry → classifyGoogleApiError), so we MUST unwrap:
+ * read the original error off `.cause` for the JSON status/code, and use the
+ * UserFacingError's own `.code` as a secondary signal. The previous version
+ * only read `err.message` — which by this point was the friendly canned string
+ * — so it matched nothing and the failover to the second key NEVER fired. That
+ * (plus the billing misclassification above) is why two funded keys behaved
+ * like one and jobs failed instead of retrying on the overflow key.
  */
 function classifyGoogleKeyHealth(err) {
-    const raw = err?.message ?? "";
+    const userFacingCode = err && typeof err === "object" && typeof err.code === "string"
+        ? err.code
+        : undefined;
+    // Prefer the original provider error (attached as `.cause`) for JSON parsing.
+    const rawErr = err && typeof err === "object" && err.cause
+        ? err.cause
+        : err;
+    const raw = rawErr?.message ?? err?.message ?? "";
     let httpCode;
     let status;
     let msg = raw;
@@ -151,14 +175,17 @@ function classifyGoogleKeyHealth(err) {
         msg = inner.message ?? raw;
     }
     catch {
-        // Non-JSON message — match against the raw string below.
+        // Non-JSON message — match against the raw string + the user-facing code.
     }
-    // Billing depletion. Veo surfaces this as RESOURCE_EXHAUSTED / 429 but with a
-    // billing-specific message ("Your prepayment credits are depleted… manage
-    // your project and billing"), OR as FAILED_PRECONDITION (free-tier/billing
-    // not enabled). Match the billing signal, NOT a bare 429.
-    if (/prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|billing|free tier is not available|check your plan and billing|FAILED_PRECONDITION/i.test(msg) ||
-        status === "FAILED_PRECONDITION") {
+    // Billing depletion / disabled — a real ACCOUNT outage, NOT a rate limit.
+    // Narrowed (2026-06-20) to depletion-specific phrases: Google's ordinary quota
+    // 429 also says "check your plan and billing details", so matching bare
+    // `billing` here is exactly what misrouted rate limits into a 30-min circuit
+    // break + Slack page. Keep FAILED_PRECONDITION (free-tier/billing not enabled)
+    // and the upstream classifier's ACCOUNT_QUOTA_EXCEEDED code.
+    if (/prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|free tier is not available|billing account (?:is )?(?:closed|suspended|disabled|not (?:active|configured))/i.test(msg) ||
+        status === "FAILED_PRECONDITION" ||
+        userFacingCode === errors_1.USER_FACING_ERROR_CODES.ACCOUNT_QUOTA_EXCEEDED) {
         return { kind: "billing", reason: `billing: ${msg.slice(0, 120)}`, ttlSeconds: BILLING_CIRCUIT_TTL_S };
     }
     // Auth / permission: key revoked, not yet propagated, API disabled, or the
@@ -168,8 +195,20 @@ function classifyGoogleKeyHealth(err) {
         httpCode === 403 ||
         status === "PERMISSION_DENIED" ||
         status === "UNAUTHENTICATED" ||
+        userFacingCode === errors_1.USER_FACING_ERROR_CODES.PROVIDER_AUTH_ERROR ||
         /API[_ ]key not valid|API_KEY_INVALID|permission denied|SERVICE_DISABLED|has not been used in project|consumer .* (?:suspended|disabled)/i.test(msg)) {
         return { kind: "auth", reason: `auth: ${msg.slice(0, 120)}`, ttlSeconds: AUTH_CIRCUIT_TTL_S };
     }
+    // Per-key rate limit (RPM / RPD). This is the case that was previously
+    // EXCLUDED — and the whole reason the pool never failed over on a 429. A bare
+    // 429 / RESOURCE_EXHAUSTED (no billing signal) means THIS key is out of budget
+    // for the moment; fail over to the next key, which "would mostly always have
+    // limit available" (Ammar, 2026-06-20). No persistent circuit-break: the
+    // per-key routing counters + the in-job `tried` set handle skipping it.
+    if (status === "RESOURCE_EXHAUSTED" ||
+        httpCode === 429 ||
+        userFacingCode === errors_1.USER_FACING_ERROR_CODES.VIDEO_PROVIDER_RATE_LIMITED) {
+        return { kind: "quota", reason: `quota: ${msg.slice(0, 120)}`, ttlSeconds: QUOTA_CIRCUIT_TTL_S };
+    }
     return null;
 }

package/lib/services/aiGen/providers/google/googleKeyPool.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"googleKeyPool.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleKeyPool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,OAAO,EACL,iBAAiB,EAKlB,MAAM,iBAAiB,CAAC;AAEzB;;;;;;;;;;;;;;;;GAgBG;AAEH,UAAU,QAAQ;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,WAAW,CAAC;IACpB,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;CACd;~~AAcD~~,cAAM,aAAa;IACjB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAa;;IA4BrC,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,8EAA8E;IAC9E,IAAI,MAAM,IAAI,MAAM,EAAE,CAErB;IAUD,OAAO,CAAC,WAAW;IAInB,kFAAkF;IAC5E,YAAY,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBjF,qFAAqF;YACvE,WAAW;IAiBzB,mDAAmD;IACnD,IAAI,aAAa,IAAI,WAAW,CAE/B;IAED,gFAAgF;IAChF,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,WAAW,GAAG,SAAS;IAK3D;;;;;;;;OAQG;IACG,aAAa,CACjB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,IAAI,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GACjB,OAAO,CAAC,QAAQ,CAAC;~~IA2DpB~~,8EAA8E;YAChE,OAAO;CAUtB;AAID,sCAAsC;AACtC,wBAAgB,gBAAgB,IAAI,aAAa,CAGhD;AAED,OAAO,EAAE,iBAAiB,EAAE,CAAC;AAC7B,YAAY,EAAE,aAAa,EAAE,CAAC"}
1	+ {"version":3,"file":"googleKeyPool.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleKeyPool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,OAAO,EACL,iBAAiB,EAKlB,MAAM,iBAAiB,CAAC;AAEzB;;;;;;;;;;;;;;;;GAgBG;AAEH,UAAU,QAAQ;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,WAAW,CAAC;IACpB,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;CACd;AAiDD,cAAM,aAAa;IACjB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAa;;IA4BrC,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,8EAA8E;IAC9E,IAAI,MAAM,IAAI,MAAM,EAAE,CAErB;IAUD,OAAO,CAAC,WAAW;IAInB,kFAAkF;IAC5E,YAAY,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBjF,qFAAqF;YACvE,WAAW;IAiBzB,mDAAmD;IACnD,IAAI,aAAa,IAAI,WAAW,CAE/B;IAED,gFAAgF;IAChF,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,WAAW,GAAG,SAAS;IAK3D;;;;;;;;OAQG;IACG,aAAa,CACjB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,IAAI,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GACjB,OAAO,CAAC,QAAQ,CAAC;IA4DpB,8EAA8E;YAChE,OAAO;CAUtB;AAID,sCAAsC;AACtC,wBAAgB,gBAAgB,IAAI,aAAa,CAGhD;AAED,OAAO,EAAE,iBAAiB,EAAE,CAAC;AAC7B,YAAY,EAAE,aAAa,EAAE,CAAC"}

package/lib/services/aiGen/providers/google/googleKeyPool.js CHANGED Viewed

@@ -7,6 +7,40 @@ const logger_1 = require("../../../../utils/logger");
 const redis_service_1 = require("../../../redis.service");
 const googleApiKeys_1 = require("./googleApiKeys");
 Object.defineProperty(exports, "googleApiKeyCount", { enumerable: true, get: function () { return googleApiKeys_1.googleApiKeyCount; } });
+/**
+ * Atomic per-key check-and-consume for the routing counters (RPM + RPD).
+ *
+ * Replaces the old GET-then-INCR in pickForSubmit, which was NON-atomic: under
+ * concurrent submits every caller read "this key has room" before any of them
+ * incremented, so they all piled onto the SAME key (usually the tiny Tier-1
+ * key0) past its ceiling while the other key sat idle — then Google 429'd and,
+ * with the failover bug, the job just failed. Checking and consuming in one
+ * Lua round-trip makes the per-key cap a hard, race-free gate.
+ *
+ * Returns { 1, newMin, dayUsed } when a slot was consumed, or
+ *         { 0, minUsed, dayUsed } when this key is already at its min/day cap.
+ * A limit of 0 means "no cap" (skip that gate). The minute counter is a fixed
+ * 60s window (matches the legacy per-key behavior); the day counter expires at
+ * UTC midnight.
+ */
+const CONSUME_KEY_SCRIPT = `
+local minKey = KEYS[1]
+local dayKey = KEYS[2]
+local minLimit = tonumber(ARGV[1])
+local dayLimit = tonumber(ARGV[2])
+local secsTilMidnight = tonumber(ARGV[3])
+local minUsed = tonumber(redis.call('GET', minKey) or '0')
+local dayUsed = tonumber(redis.call('GET', dayKey) or '0')
+if minLimit > 0 and minUsed >= minLimit then return { 0, minUsed, dayUsed } end
+if dayLimit > 0 and dayUsed >= dayLimit then return { 0, minUsed, dayUsed } end
+local newMin = redis.call('INCR', minKey)
+if newMin == 1 then redis.call('EXPIRE', minKey, 60) end
+if dayLimit > 0 then
+  local newDay = redis.call('INCR', dayKey)
+  if newDay == 1 then redis.call('EXPIRE', dayKey, secsTilMidnight) end
+end
+return { 1, newMin, dayUsed }
+`;
 function utcDateKey() {
     return new Date().toISOString().slice(0, 10);
 }
@@ -134,27 +168,21 @@ class GoogleKeyPool {
         if (candidates.length === 1)
             return candidates[0];
         const date = utcDateKey();
+        const secsTilMidnight = secsUntilMidnight();
         for (const entry of candidates) {
             try {
                 const perKeyMinLimit = (0, googleApiKeys_1.scaleLimitForTier)(baselineMin, entry.tier, "rpm");
                 const perKeyDayLimit = (0, googleApiKeys_1.scaleLimitForTier)(baselineDay, entry.tier, "rpd");
                 const dayKey = `gkpool:${entry.id}:${modelId}:day:${date}`;
                 const minKey = `gkpool:${entry.id}:${modelId}:min`;
-                const [dayRaw, minRaw] = await Promise.all([
-                    client.get(dayKey),
-                    client.get(minKey),
-                ]);
-                const dayUsed = dayRaw ? Number(dayRaw) : 0;
-                const minUsed = minRaw ? Number(minRaw) : 0;
-                const dayOk = perKeyDayLimit <= 0 || dayUsed < perKeyDayLimit;
-                const minOk = perKeyMinLimit <= 0 || minUsed < perKeyMinLimit;
-                if (dayOk && minOk) {
-                    await this.consume(entry, modelId, date);
+                // Atomic check-and-consume: no GET-then-INCR race, so two concurrent
+                // submits can't both think key0 has room and oversubscribe it.
+                const res = (await client.eval(CONSUME_KEY_SCRIPT, 2, minKey, dayKey, String(perKeyMinLimit), String(perKeyDayLimit), String(secsTilMidnight)));
+                if (res[0] === 1)
                     return entry;
-                }
             }
             catch (err) {
-                logger_1.logger.warn("googleKeyPool: routing read failed, trying next key", {
+                logger_1.logger.warn("googleKeyPool: routing eval failed, trying next key", {
                     keyId: entry.id,
                     err: err instanceof Error ? err.message : String(err),
                 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "vidspotai-shared",
-  "version": "1.0.90",
+  "version": "1.0.92",
   "main": "lib/index.js",
   "types": "lib/index.d.ts",
   "exports": {