vidspotai-shared 1.0.90 → 1.0.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAelB,qBAAa,aAAc,SAAQ,wBAAwB;IAKzD,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAK;;IAQ/C;;;;;;;;;OASG;IACG,YAAY,CAChB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC;IAoChC;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAoCvB;;;;OAIG;YACW,kBAAkB;IA+B1B,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IA0M3B,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAiH3C,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;YAiBnB,cAAc;IAwG5B;;;;;;OAMG;IACH;;;;OAIG;IACG,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAIjC,aAAa,CAAC,EAAE,QAAQ,EAAE,QAAY,EAAE,UAAmB,EAAE,SAAiB,EAAE,SAAa,EAAE,SAAS,EAAE,WAAW,EAAE,EAAE,iBAAiB,GAAG,MAAM;CA8BpJ"}
1
+ {"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAelB,qBAAa,aAAc,SAAQ,wBAAwB;IAKzD,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAK;;IAQ/C;;;;;;;;;OASG;IACG,YAAY,CAChB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC;IAoChC;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAoCvB;;;;OAIG;YACW,kBAAkB;IAsC1B,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IA0O3B,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAiH3C,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;YAiBnB,cAAc;IAwG5B;;;;;;OAMG;IACH;;;;OAIG;IACG,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAIjC,aAAa,CAAC,EAAE,QAAQ,EAAE,QAAY,EAAE,UAAmB,EAAE,SAAiB,EAAE,SAAa,EAAE,SAAS,EAAE,WAAW,EAAE,EAAE,iBAAiB,GAAG,MAAM;CA8BpJ"}
@@ -122,8 +122,14 @@ class GoogleService extends baseAiGenProvider_service_1.BaseAiGenProviderService
122
122
  // typed UserFacingError so the scene processor stores a translatable
123
123
  // code instead of the raw provider JSON.
124
124
  const userFacing = (0, googleErrors_1.classifyGoogleApiError)(err);
125
- if (userFacing)
125
+ if (userFacing) {
126
+ // Preserve the ORIGINAL provider error so the key-pool failover
127
+ // (classifyGoogleKeyHealth) can still read the underlying
128
+ // status/httpCode after this conversion. Without it the failover saw
129
+ // only the friendly string and never failed over to the second key.
130
+ userFacing.cause = err;
126
131
  throw userFacing;
132
+ }
127
133
  throw err;
128
134
  }
129
135
  const backoffMs = 1000 * 2 ** (attempt - 1); // 1s, 2s, 4s
@@ -269,32 +275,62 @@ class GoogleService extends baseAiGenProvider_service_1.BaseAiGenProviderService
269
275
  catch (err) {
270
276
  lastErr = err;
271
277
  const keyFailure = (0, googleErrors_1.classifyGoogleKeyHealth)(err);
272
- // Not an account-level failure (e.g. content rejection, bad params,
273
- // transient RPM 429) → don't burn other accounts on it; surface as-is.
278
+ // Not a fail-over-able failure (e.g. content rejection, bad params) →
279
+ // don't burn other accounts on it; surface as-is.
274
280
  if (!keyFailure)
275
281
  throw err;
276
- // Account-level failure: circuit-break this key and try the next one.
277
- // (No operation was created above, so this account wasn't charged.)
278
- await this.keyPool.markDepleted(keyEntry.id, keyFailure.ttlSeconds, keyFailure.reason);
279
- logger_1.logger.warn("Google Veo: key account failure — failing over", {
282
+ // Observability: log the RAW provider error once so we can see whether
283
+ // this was an RPM vs RPD limit and on which key (AI Studio's dashboard
284
+ // lags real-time, so this is our authoritative signal — Ammar 2026-06-20).
285
+ const rawCause = err.cause;
286
+ logger_1.logger.warn("Google Veo: key submit failed — failing over to next key", {
280
287
  modelKey: params.modelKey,
281
288
  keyId: keyEntry.id,
282
289
  kind: keyFailure.kind,
283
290
  attempt: attempt + 1,
284
291
  poolSize: this.keyPool.size,
292
+ rawError: rawCause instanceof Error
293
+ ? rawCause.message.slice(0, 300)
294
+ : err instanceof Error
295
+ ? err.message.slice(0, 300)
296
+ : String(err),
285
297
  });
298
+ // Circuit-break ONLY persistent account faults (billing depleted / auth
299
+ // revoked) — those keep failing on every call until a human fixes them.
300
+ // A `quota` (RPM/RPD) limit is transient and key-specific: the per-key
301
+ // routing counters already steer new jobs away until it resets, so a
302
+ // persistent circuit-break would needlessly idle the key. We still fail
303
+ // over within THIS job via the `tried` set below.
304
+ if (keyFailure.kind !== "quota") {
305
+ await this.keyPool.markDepleted(keyEntry.id, keyFailure.ttlSeconds, keyFailure.reason);
306
+ }
286
307
  if (isLastUsableKey || tried.size >= this.keyPool.size) {
287
- // No untried key left — EVERY Google billing account is down. This is
288
- // a platform-wide outage (the 2026-06-18 Veo-depletion class), not a
289
- // per-job blip: page Slack at the source with full context so on-call
290
- // sees it immediately rather than relying on downstream classification.
291
- logger_1.logger.error("Google Veo: ALL keys exhausted every account failing over", {
292
- modelKey: params.modelKey,
293
- kind: keyFailure.kind,
294
- reason: keyFailure.reason,
295
- triedKeys: [...tried],
296
- poolSize: this.keyPool.size,
297
- });
308
+ // No untried key left.
309
+ if (keyFailure.kind === "quota") {
310
+ // Every key is momentarily rate-limited expected capacity pressure,
311
+ // NOT a platform outage. Stay at warn (non-paging) and surface a
312
+ // VIDEO_PROVIDER_RATE_LIMITED so the scene fails cleanly + refunds
313
+ // (and the job-start capacity selector already had its chance to
314
+ // spill to another provider). Do NOT page on-call for this.
315
+ logger_1.logger.warn("Google Veo: all keys rate-limited — no key with budget right now", {
316
+ modelKey: params.modelKey,
317
+ reason: keyFailure.reason,
318
+ triedKeys: [...tried],
319
+ poolSize: this.keyPool.size,
320
+ });
321
+ }
322
+ else {
323
+ // EVERY Google billing account is down (depleted / revoked). This is
324
+ // a platform-wide outage (the 2026-06-18 Veo-depletion class): page
325
+ // Slack at the source with full context so on-call sees it immediately.
326
+ logger_1.logger.error("Google Veo: ALL keys exhausted — every account failing over", {
327
+ modelKey: params.modelKey,
328
+ kind: keyFailure.kind,
329
+ reason: keyFailure.reason,
330
+ triedKeys: [...tried],
331
+ poolSize: this.keyPool.size,
332
+ });
333
+ }
298
334
  // Propagate so the job surfaces a friendly error + refunds, and the
299
335
  // job-level provider fallback can spill to another provider.
300
336
  throw err;
@@ -12,13 +12,17 @@
12
12
  * - key[0] = the NEW key (vidspotai project), currently **Tier 1** (2/min,
13
13
  * 10/day per Veo model). Used FIRST, deliberately, to drive usage and
14
14
  * promote its billing account up the tier ladder.
15
- * - key[1] = the CURRENT key, **Tier 2** (4/min, 50/day). Used once key[0]
16
- * is out of per-minute / per-day budget.
17
- * Aggregate Veo budget = T1 + T2 = 6/min, 60/day. When BOTH are exhausted, the
18
- * job-start capacity selector (videoJobProcessor) spills to another provider.
15
+ * - key[1] = the overflow key, **Tier 3** (10/min, 500/day) as of 2026-06-20
16
+ * (was Tier 2 4/50; its billing account was promoted). Used once key[0]
17
+ * is out of per-minute / per-day budget, OR immediately as failover when a
18
+ * key[0] submit returns a per-key 429 (see google.service generateVideo).
19
+ * Aggregate Veo budget = T1 + T3 = 12/min, 510/day (GOOGLE_API_KEY_TIERS=1,3 on
20
+ * Doppler prd + Railway). When BOTH are exhausted, the job-start capacity
21
+ * selector (videoJobProcessor) spills to another provider, else the scene fails
22
+ * cleanly with VIDEO_PROVIDER_RATE_LIMITED (refunded, non-paging).
19
23
  *
20
- * When key[1]'s account is billed it moves to Tier 3; bump its tier in
21
- * GOOGLE_API_KEY_TIERS then (no code change the ladder below handles it).
24
+ * Tier changes need NO code change bump GOOGLE_API_KEY_TIERS and the ladder
25
+ * below rescales both the per-key router caps and the aggregate model-level gate.
22
26
  *
23
27
  * Vertex AI (DEFERRED, on record for the future): Veo is also available via
24
28
  * Vertex, where quota is **per-project** and the current billing account is
@@ -1 +1 @@
1
- {"version":3,"file":"googleApiKeys.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleApiKeys.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AAoBH,8EAA8E;AAC9E,wBAAgB,kBAAkB,IAAI,MAAM,EAAE,CAa7C;AAED,yEAAyE;AACzE,wBAAgB,sBAAsB,IAAI,MAAM,EAAE,CAUjD;AAED,8EAA8E;AAC9E,wBAAgB,iBAAiB,IAAI,MAAM,CAE1C;AAED,6DAA6D;AAC7D,wBAAgB,iBAAiB,CAC/B,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,KAAK,GAAG,KAAK,GAClB,MAAM,CAGR;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,CAIjE;AAED,uDAAuD;AACvD,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAEjD;AAID;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAE5F;AAED,iFAAiF;AACjF,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,CAAA;CAAE,CAIrF"}
1
+ {"version":3,"file":"googleApiKeys.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleApiKeys.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AAoBH,8EAA8E;AAC9E,wBAAgB,kBAAkB,IAAI,MAAM,EAAE,CAa7C;AAED,yEAAyE;AACzE,wBAAgB,sBAAsB,IAAI,MAAM,EAAE,CAUjD;AAED,8EAA8E;AAC9E,wBAAgB,iBAAiB,IAAI,MAAM,CAE1C;AAED,6DAA6D;AAC7D,wBAAgB,iBAAiB,CAC/B,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,KAAK,GAAG,KAAK,GAClB,MAAM,CAGR;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,CAIjE;AAED,uDAAuD;AACvD,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAEjD;AAID;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAE5F;AAED,iFAAiF;AACjF,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,CAAA;CAAE,CAIrF"}
@@ -13,13 +13,17 @@
13
13
  * - key[0] = the NEW key (vidspotai project), currently **Tier 1** (2/min,
14
14
  * 10/day per Veo model). Used FIRST, deliberately, to drive usage and
15
15
  * promote its billing account up the tier ladder.
16
- * - key[1] = the CURRENT key, **Tier 2** (4/min, 50/day). Used once key[0]
17
- * is out of per-minute / per-day budget.
18
- * Aggregate Veo budget = T1 + T2 = 6/min, 60/day. When BOTH are exhausted, the
19
- * job-start capacity selector (videoJobProcessor) spills to another provider.
16
+ * - key[1] = the overflow key, **Tier 3** (10/min, 500/day) as of 2026-06-20
17
+ * (was Tier 2 4/50; its billing account was promoted). Used once key[0]
18
+ * is out of per-minute / per-day budget, OR immediately as failover when a
19
+ * key[0] submit returns a per-key 429 (see google.service generateVideo).
20
+ * Aggregate Veo budget = T1 + T3 = 12/min, 510/day (GOOGLE_API_KEY_TIERS=1,3 on
21
+ * Doppler prd + Railway). When BOTH are exhausted, the job-start capacity
22
+ * selector (videoJobProcessor) spills to another provider, else the scene fails
23
+ * cleanly with VIDEO_PROVIDER_RATE_LIMITED (refunded, non-paging).
20
24
  *
21
- * When key[1]'s account is billed it moves to Tier 3; bump its tier in
22
- * GOOGLE_API_KEY_TIERS then (no code change the ladder below handles it).
25
+ * Tier changes need NO code change bump GOOGLE_API_KEY_TIERS and the ladder
26
+ * below rescales both the per-key router caps and the aggregate model-level gate.
23
27
  *
24
28
  * Vertex AI (DEFERRED, on record for the future): Veo is also available via
25
29
  * Vertex, where quota is **per-project** and the current billing account is
@@ -10,14 +10,16 @@ import { UserFacingError } from "../../../../utils/errors";
10
10
  * error instead of raw provider JSON that pages Slack as a platform bug.
11
11
  */
12
12
  export declare function classifyGoogleApiError(err: any): UserFacingError | null;
13
- /** A key-level (billing / auth) failure that warrants failing over to the next
14
- * key in the pool and circuit-breaking the failed one for `ttlSeconds`. */
13
+ /** A key-level failure that warrants failing over to the next key in the pool.
14
+ * `billing`/`auth` also circuit-break the failed key for `ttlSeconds`; `quota`
15
+ * (a per-key RPM/RPD limit) fails over WITHOUT a persistent circuit-break —
16
+ * the per-key routing counters already steer traffic away until it resets. */
15
17
  export interface GoogleKeyHealthFailure {
16
- /** "billing" | "auth" — the class of account problem, for logs + TTL choice. */
17
- kind: "billing" | "auth";
18
+ /** Class of key problem, for logs + TTL + paging policy. */
19
+ kind: "billing" | "auth" | "quota";
18
20
  /** Short human reason, persisted as the circuit-breaker value (Loki/debug). */
19
21
  reason: string;
20
- /** How long to skip this key before re-probing it. */
22
+ /** How long to skip this key before re-probing it (only used for billing/auth). */
21
23
  ttlSeconds: number;
22
24
  }
23
25
  /**
@@ -26,16 +28,21 @@ export interface GoogleKeyHealthFailure {
26
28
  * per-minute rate limit or a content/validation error.
27
29
  *
28
30
  * Returns a failure descriptor when the pool should fail over to the next key
29
- * and circuit-break this one; null otherwise (caller handles normally).
31
+ * (and, for billing/auth, circuit-break this one); null otherwise (a genuine
32
+ * content/param error the caller surfaces as-is).
30
33
  *
31
34
  * IMPORTANT (cost safety): this is only consulted on a THROWN submit — i.e. no
32
35
  * Veo operation was created, so the account was NOT billed. Failing over to a
33
36
  * second billing account therefore cannot double-charge for the same job.
34
37
  *
35
- * Deliberately does NOT match a bare per-minute 429 / RESOURCE_EXHAUSTED with
36
- * no billing signal: that's our own RPM cap, not an account outage, and the
37
- * existing quota router + provider-fallback chain already handle it. Failing
38
- * over on every RPM blip would needlessly drain the reserve account.
38
+ * ⚠️ The submit path wraps the raw provider error in a UserFacingError BEFORE
39
+ * this runs (withTransientRetry classifyGoogleApiError), so we MUST unwrap:
40
+ * read the original error off `.cause` for the JSON status/code, and use the
41
+ * UserFacingError's own `.code` as a secondary signal. The previous version
42
+ * only read `err.message` — which by this point was the friendly canned string
43
+ * — so it matched nothing and the failover to the second key NEVER fired. That
44
+ * (plus the billing misclassification above) is why two funded keys behaved
45
+ * like one and jobs failed instead of retrying on the overflow key.
39
46
  */
40
47
  export declare function classifyGoogleKeyHealth(err: any): GoogleKeyHealthFailure | null;
41
48
  //# sourceMappingURL=googleErrors.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"googleErrors.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleErrors.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EAGhB,MAAM,0BAA0B,CAAC;AAElC;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,GAAG,GAAG,eAAe,GAAG,IAAI,CA4IvE;AAED;4EAC4E;AAC5E,MAAM,WAAW,sBAAsB;IACrC,gFAAgF;IAChF,IAAI,EAAE,SAAS,GAAG,MAAM,CAAC;IACzB,+EAA+E;IAC/E,MAAM,EAAE,MAAM,CAAC;IACf,sDAAsD;IACtD,UAAU,EAAE,MAAM,CAAC;CACpB;AAUD;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,GAAG,GAAG,sBAAsB,GAAG,IAAI,CA4C/E"}
1
+ {"version":3,"file":"googleErrors.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleErrors.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EAGhB,MAAM,0BAA0B,CAAC;AAElC;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,GAAG,GAAG,eAAe,GAAG,IAAI,CAoJvE;AAED;;;+EAG+E;AAC/E,MAAM,WAAW,sBAAsB;IACrC,4DAA4D;IAC5D,IAAI,EAAE,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;IACnC,+EAA+E;IAC/E,MAAM,EAAE,MAAM,CAAC;IACf,mFAAmF;IACnF,UAAU,EAAE,MAAM,CAAC;CACpB;AAcD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,GAAG,GAAG,sBAAsB,GAAG,IAAI,CAuE/E"}
@@ -22,13 +22,21 @@ function classifyGoogleApiError(err) {
22
22
  const status = inner.status;
23
23
  const msg = inner.message ?? raw;
24
24
  // Billing depletion / billing-not-enabled. Veo surfaces this as
25
- // RESOURCE_EXHAUSTED / 429 but with a billing-specific message ("Your
26
- // prepayment credits are depleted… manage your project and billing") or as
27
- // FAILED_PRECONDITION. This is an ACCOUNT problem, NOT a per-minute rate
28
- // limit — give it the distinct ACCOUNT_QUOTA_EXCEEDED code (which DOES page
29
- // Slack) and a SAFE message. Never echo the raw provider text: it tells the
30
- // end user to go manage OUR Google AI Studio billing (the bug we're fixing).
31
- const billingSignal = /prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|\bbilling\b|free tier is not available|check your plan and billing/i.test(msg) || status === "FAILED_PRECONDITION";
25
+ // RESOURCE_EXHAUSTED / 429 but with a billing-DEPLETION-specific message
26
+ // ("Your prepayment credits are depleted… manage your project and billing")
27
+ // or as FAILED_PRECONDITION. This is an ACCOUNT problem, NOT a per-minute /
28
+ // per-day rate limit — give it the distinct ACCOUNT_QUOTA_EXCEEDED code
29
+ // (which DOES page Slack) and a SAFE message.
30
+ //
31
+ // ⚠️ NARROWED 2026-06-20 (prod incident): Google's ORDINARY quota 429 also
32
+ // reads "You exceeded your current quota, please check your plan and billing
33
+ // details" — so matching bare `billing` / `check your plan and billing` here
34
+ // mislabeled every Veo rate limit as ACCOUNT_QUOTA_EXCEEDED. That paged Slack
35
+ // on funded accounts AND (because the key-pool failover deliberately skips
36
+ // billing faults) suppressed the failover to the second key. Match ONLY the
37
+ // depletion-specific phrases now; a bare 429/RESOURCE_EXHAUSTED with no
38
+ // depletion signal falls through to the rate-limit branch below.
39
+ const billingSignal = /prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|free tier is not available|billing account (?:is )?(?:closed|suspended|disabled|not (?:active|configured))/i.test(msg) || status === "FAILED_PRECONDITION";
32
40
  if (billingSignal) {
33
41
  return new errors_1.UserFacingError((0, errors_1.friendlyMessageForCode)(errors_1.USER_FACING_ERROR_CODES.ACCOUNT_QUOTA_EXCEEDED), errors_1.USER_FACING_ERROR_CODES.ACCOUNT_QUOTA_EXCEEDED);
34
42
  }
@@ -118,28 +126,44 @@ function classifyGoogleApiError(err) {
118
126
  // dead account (and to route the whole pipeline onto the healthy key), short
119
127
  // enough that a top-up recovers within half an hour. Auth/permission problems
120
128
  // (revoked / not-yet-propagated key, disabled API) re-probe faster — they're
121
- // often a transient config rollout — at 15 min.
129
+ // often a transient config rollout — at 15 min. A per-key rate limit is not an
130
+ // account outage (the key recovers as its minute/day window rolls), so its
131
+ // "cooloff" is short and mostly nominal — the in-job failover skips the key via
132
+ // the `tried` set, and cross-job routing is handled by the per-key counters.
122
133
  const BILLING_CIRCUIT_TTL_S = 30 * 60;
123
134
  const AUTH_CIRCUIT_TTL_S = 15 * 60;
135
+ const QUOTA_CIRCUIT_TTL_S = 60;
124
136
  /**
125
137
  * Classifies whether a Veo *submit* failure is an ACCOUNT-LEVEL problem with
126
138
  * THIS key (billing exhausted / auth revoked) — distinct from a transient
127
139
  * per-minute rate limit or a content/validation error.
128
140
  *
129
141
  * Returns a failure descriptor when the pool should fail over to the next key
130
- * and circuit-break this one; null otherwise (caller handles normally).
142
+ * (and, for billing/auth, circuit-break this one); null otherwise (a genuine
143
+ * content/param error the caller surfaces as-is).
131
144
  *
132
145
  * IMPORTANT (cost safety): this is only consulted on a THROWN submit — i.e. no
133
146
  * Veo operation was created, so the account was NOT billed. Failing over to a
134
147
  * second billing account therefore cannot double-charge for the same job.
135
148
  *
136
- * Deliberately does NOT match a bare per-minute 429 / RESOURCE_EXHAUSTED with
137
- * no billing signal: that's our own RPM cap, not an account outage, and the
138
- * existing quota router + provider-fallback chain already handle it. Failing
139
- * over on every RPM blip would needlessly drain the reserve account.
149
+ * ⚠️ The submit path wraps the raw provider error in a UserFacingError BEFORE
150
+ * this runs (withTransientRetry classifyGoogleApiError), so we MUST unwrap:
151
+ * read the original error off `.cause` for the JSON status/code, and use the
152
+ * UserFacingError's own `.code` as a secondary signal. The previous version
153
+ * only read `err.message` — which by this point was the friendly canned string
154
+ * — so it matched nothing and the failover to the second key NEVER fired. That
155
+ * (plus the billing misclassification above) is why two funded keys behaved
156
+ * like one and jobs failed instead of retrying on the overflow key.
140
157
  */
141
158
  function classifyGoogleKeyHealth(err) {
142
- const raw = err?.message ?? "";
159
+ const userFacingCode = err && typeof err === "object" && typeof err.code === "string"
160
+ ? err.code
161
+ : undefined;
162
+ // Prefer the original provider error (attached as `.cause`) for JSON parsing.
163
+ const rawErr = err && typeof err === "object" && err.cause
164
+ ? err.cause
165
+ : err;
166
+ const raw = rawErr?.message ?? err?.message ?? "";
143
167
  let httpCode;
144
168
  let status;
145
169
  let msg = raw;
@@ -151,14 +175,17 @@ function classifyGoogleKeyHealth(err) {
151
175
  msg = inner.message ?? raw;
152
176
  }
153
177
  catch {
154
- // Non-JSON message — match against the raw string below.
178
+ // Non-JSON message — match against the raw string + the user-facing code.
155
179
  }
156
- // Billing depletion. Veo surfaces this as RESOURCE_EXHAUSTED / 429 but with a
157
- // billing-specific message ("Your prepayment credits are depleted… manage
158
- // your project and billing"), OR as FAILED_PRECONDITION (free-tier/billing
159
- // not enabled). Match the billing signal, NOT a bare 429.
160
- if (/prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|billing|free tier is not available|check your plan and billing|FAILED_PRECONDITION/i.test(msg) ||
161
- status === "FAILED_PRECONDITION") {
180
+ // Billing depletion / disabled a real ACCOUNT outage, NOT a rate limit.
181
+ // Narrowed (2026-06-20) to depletion-specific phrases: Google's ordinary quota
182
+ // 429 also says "check your plan and billing details", so matching bare
183
+ // `billing` here is exactly what misrouted rate limits into a 30-min circuit
184
+ // break + Slack page. Keep FAILED_PRECONDITION (free-tier/billing not enabled)
185
+ // and the upstream classifier's ACCOUNT_QUOTA_EXCEEDED code.
186
+ if (/prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|free tier is not available|billing account (?:is )?(?:closed|suspended|disabled|not (?:active|configured))/i.test(msg) ||
187
+ status === "FAILED_PRECONDITION" ||
188
+ userFacingCode === errors_1.USER_FACING_ERROR_CODES.ACCOUNT_QUOTA_EXCEEDED) {
162
189
  return { kind: "billing", reason: `billing: ${msg.slice(0, 120)}`, ttlSeconds: BILLING_CIRCUIT_TTL_S };
163
190
  }
164
191
  // Auth / permission: key revoked, not yet propagated, API disabled, or the
@@ -168,8 +195,20 @@ function classifyGoogleKeyHealth(err) {
168
195
  httpCode === 403 ||
169
196
  status === "PERMISSION_DENIED" ||
170
197
  status === "UNAUTHENTICATED" ||
198
+ userFacingCode === errors_1.USER_FACING_ERROR_CODES.PROVIDER_AUTH_ERROR ||
171
199
  /API[_ ]key not valid|API_KEY_INVALID|permission denied|SERVICE_DISABLED|has not been used in project|consumer .* (?:suspended|disabled)/i.test(msg)) {
172
200
  return { kind: "auth", reason: `auth: ${msg.slice(0, 120)}`, ttlSeconds: AUTH_CIRCUIT_TTL_S };
173
201
  }
202
+ // Per-key rate limit (RPM / RPD). This is the case that was previously
203
+ // EXCLUDED — and the whole reason the pool never failed over on a 429. A bare
204
+ // 429 / RESOURCE_EXHAUSTED (no billing signal) means THIS key is out of budget
205
+ // for the moment; fail over to the next key, which "would mostly always have
206
+ // limit available" (Ammar, 2026-06-20). No persistent circuit-break: the
207
+ // per-key routing counters + the in-job `tried` set handle skipping it.
208
+ if (status === "RESOURCE_EXHAUSTED" ||
209
+ httpCode === 429 ||
210
+ userFacingCode === errors_1.USER_FACING_ERROR_CODES.VIDEO_PROVIDER_RATE_LIMITED) {
211
+ return { kind: "quota", reason: `quota: ${msg.slice(0, 120)}`, ttlSeconds: QUOTA_CIRCUIT_TTL_S };
212
+ }
174
213
  return null;
175
214
  }
@@ -1 +1 @@
1
- {"version":3,"file":"googleKeyPool.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleKeyPool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,OAAO,EACL,iBAAiB,EAKlB,MAAM,iBAAiB,CAAC;AAEzB;;;;;;;;;;;;;;;;GAgBG;AAEH,UAAU,QAAQ;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,WAAW,CAAC;IACpB,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;CACd;AAcD,cAAM,aAAa;IACjB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAa;;IA4BrC,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,8EAA8E;IAC9E,IAAI,MAAM,IAAI,MAAM,EAAE,CAErB;IAUD,OAAO,CAAC,WAAW;IAInB,kFAAkF;IAC5E,YAAY,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBjF,qFAAqF;YACvE,WAAW;IAiBzB,mDAAmD;IACnD,IAAI,aAAa,IAAI,WAAW,CAE/B;IAED,gFAAgF;IAChF,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,WAAW,GAAG,SAAS;IAK3D;;;;;;;;OAQG;IACG,aAAa,CACjB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,IAAI,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GACjB,OAAO,CAAC,QAAQ,CAAC;IA2DpB,8EAA8E;YAChE,OAAO;CAUtB;AAID,sCAAsC;AACtC,wBAAgB,gBAAgB,IAAI,aAAa,CAGhD;AAED,OAAO,EAAE,iBAAiB,EAAE,CAAC;AAC7B,YAAY,EAAE,aAAa,EAAE,CAAC"}
1
+ {"version":3,"file":"googleKeyPool.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleKeyPool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,OAAO,EACL,iBAAiB,EAKlB,MAAM,iBAAiB,CAAC;AAEzB;;;;;;;;;;;;;;;;GAgBG;AAEH,UAAU,QAAQ;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,WAAW,CAAC;IACpB,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;CACd;AAiDD,cAAM,aAAa;IACjB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAa;;IA4BrC,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,8EAA8E;IAC9E,IAAI,MAAM,IAAI,MAAM,EAAE,CAErB;IAUD,OAAO,CAAC,WAAW;IAInB,kFAAkF;IAC5E,YAAY,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBjF,qFAAqF;YACvE,WAAW;IAiBzB,mDAAmD;IACnD,IAAI,aAAa,IAAI,WAAW,CAE/B;IAED,gFAAgF;IAChF,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,WAAW,GAAG,SAAS;IAK3D;;;;;;;;OAQG;IACG,aAAa,CACjB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,IAAI,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GACjB,OAAO,CAAC,QAAQ,CAAC;IA4DpB,8EAA8E;YAChE,OAAO;CAUtB;AAID,sCAAsC;AACtC,wBAAgB,gBAAgB,IAAI,aAAa,CAGhD;AAED,OAAO,EAAE,iBAAiB,EAAE,CAAC;AAC7B,YAAY,EAAE,aAAa,EAAE,CAAC"}
@@ -7,6 +7,40 @@ const logger_1 = require("../../../../utils/logger");
7
7
  const redis_service_1 = require("../../../redis.service");
8
8
  const googleApiKeys_1 = require("./googleApiKeys");
9
9
  Object.defineProperty(exports, "googleApiKeyCount", { enumerable: true, get: function () { return googleApiKeys_1.googleApiKeyCount; } });
10
+ /**
11
+ * Atomic per-key check-and-consume for the routing counters (RPM + RPD).
12
+ *
13
+ * Replaces the old GET-then-INCR in pickForSubmit, which was NON-atomic: under
14
+ * concurrent submits every caller read "this key has room" before any of them
15
+ * incremented, so they all piled onto the SAME key (usually the tiny Tier-1
16
+ * key0) past its ceiling while the other key sat idle — then Google 429'd and,
17
+ * with the failover bug, the job just failed. Checking and consuming in one
18
+ * Lua round-trip makes the per-key cap a hard, race-free gate.
19
+ *
20
+ * Returns { 1, newMin, dayUsed } when a slot was consumed, or
21
+ * { 0, minUsed, dayUsed } when this key is already at its min/day cap.
22
+ * A limit of 0 means "no cap" (skip that gate). The minute counter is a fixed
23
+ * 60s window (matches the legacy per-key behavior); the day counter expires at
24
+ * UTC midnight.
25
+ */
26
+ const CONSUME_KEY_SCRIPT = `
27
+ local minKey = KEYS[1]
28
+ local dayKey = KEYS[2]
29
+ local minLimit = tonumber(ARGV[1])
30
+ local dayLimit = tonumber(ARGV[2])
31
+ local secsTilMidnight = tonumber(ARGV[3])
32
+ local minUsed = tonumber(redis.call('GET', minKey) or '0')
33
+ local dayUsed = tonumber(redis.call('GET', dayKey) or '0')
34
+ if minLimit > 0 and minUsed >= minLimit then return { 0, minUsed, dayUsed } end
35
+ if dayLimit > 0 and dayUsed >= dayLimit then return { 0, minUsed, dayUsed } end
36
+ local newMin = redis.call('INCR', minKey)
37
+ if newMin == 1 then redis.call('EXPIRE', minKey, 60) end
38
+ if dayLimit > 0 then
39
+ local newDay = redis.call('INCR', dayKey)
40
+ if newDay == 1 then redis.call('EXPIRE', dayKey, secsTilMidnight) end
41
+ end
42
+ return { 1, newMin, dayUsed }
43
+ `;
10
44
  function utcDateKey() {
11
45
  return new Date().toISOString().slice(0, 10);
12
46
  }
@@ -134,27 +168,21 @@ class GoogleKeyPool {
134
168
  if (candidates.length === 1)
135
169
  return candidates[0];
136
170
  const date = utcDateKey();
171
+ const secsTilMidnight = secsUntilMidnight();
137
172
  for (const entry of candidates) {
138
173
  try {
139
174
  const perKeyMinLimit = (0, googleApiKeys_1.scaleLimitForTier)(baselineMin, entry.tier, "rpm");
140
175
  const perKeyDayLimit = (0, googleApiKeys_1.scaleLimitForTier)(baselineDay, entry.tier, "rpd");
141
176
  const dayKey = `gkpool:${entry.id}:${modelId}:day:${date}`;
142
177
  const minKey = `gkpool:${entry.id}:${modelId}:min`;
143
- const [dayRaw, minRaw] = await Promise.all([
144
- client.get(dayKey),
145
- client.get(minKey),
146
- ]);
147
- const dayUsed = dayRaw ? Number(dayRaw) : 0;
148
- const minUsed = minRaw ? Number(minRaw) : 0;
149
- const dayOk = perKeyDayLimit <= 0 || dayUsed < perKeyDayLimit;
150
- const minOk = perKeyMinLimit <= 0 || minUsed < perKeyMinLimit;
151
- if (dayOk && minOk) {
152
- await this.consume(entry, modelId, date);
178
+ // Atomic check-and-consume: no GET-then-INCR race, so two concurrent
179
+ // submits can't both think key0 has room and oversubscribe it.
180
+ const res = (await client.eval(CONSUME_KEY_SCRIPT, 2, minKey, dayKey, String(perKeyMinLimit), String(perKeyDayLimit), String(secsTilMidnight)));
181
+ if (res[0] === 1)
153
182
  return entry;
154
- }
155
183
  }
156
184
  catch (err) {
157
- logger_1.logger.warn("googleKeyPool: routing read failed, trying next key", {
185
+ logger_1.logger.warn("googleKeyPool: routing eval failed, trying next key", {
158
186
  keyId: entry.id,
159
187
  err: err instanceof Error ? err.message : String(err),
160
188
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vidspotai-shared",
3
- "version": "1.0.90",
3
+ "version": "1.0.92",
4
4
  "main": "lib/index.js",
5
5
  "types": "lib/index.d.ts",
6
6
  "exports": {