vidspotai-shared 1.0.91 → 1.0.92
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/services/aiGen/providers/google/google.service.d.ts.map +1 -1
- package/lib/services/aiGen/providers/google/google.service.js +54 -18
- package/lib/services/aiGen/providers/google/googleApiKeys.d.ts +10 -6
- package/lib/services/aiGen/providers/google/googleApiKeys.d.ts.map +1 -1
- package/lib/services/aiGen/providers/google/googleApiKeys.js +10 -6
- package/lib/services/aiGen/providers/google/googleErrors.d.ts +17 -10
- package/lib/services/aiGen/providers/google/googleErrors.d.ts.map +1 -1
- package/lib/services/aiGen/providers/google/googleErrors.js +60 -21
- package/lib/services/aiGen/providers/google/googleKeyPool.d.ts.map +1 -1
- package/lib/services/aiGen/providers/google/googleKeyPool.js +40 -12
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAelB,qBAAa,aAAc,SAAQ,wBAAwB;IAKzD,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAK;;IAQ/C;;;;;;;;;OASG;IACG,YAAY,CAChB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC;IAoChC;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAoCvB;;;;OAIG;YACW,kBAAkB;
|
|
1
|
+
{"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAelB,qBAAa,aAAc,SAAQ,wBAAwB;IAKzD,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAK;;IAQ/C;;;;;;;;;OASG;IACG,YAAY,CAChB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC;IAoChC;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAoCvB;;;;OAIG;YACW,kBAAkB;IAsC1B,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IA0O3B,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAiH3C,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;YAiBnB,cAAc;IAwG5B;;;;;;OAMG;IACH;;;;OAIG;IACG,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAIjC,aAAa,CAAC,EAAE,QAAQ,EAAE,QAAY,EAAE,UAAmB,EAAE,SAAiB,EAAE,SAAa,EAAE,SAAS,EAAE,WAAW,EAAE,EAAE,iBAAiB,GAAG,MAAM;CA8BpJ"}
|
|
@@ -122,8 +122,14 @@ class GoogleService extends baseAiGenProvider_service_1.BaseAiGenProviderService
|
|
|
122
122
|
// typed UserFacingError so the scene processor stores a translatable
|
|
123
123
|
// code instead of the raw provider JSON.
|
|
124
124
|
const userFacing = (0, googleErrors_1.classifyGoogleApiError)(err);
|
|
125
|
-
if (userFacing)
|
|
125
|
+
if (userFacing) {
|
|
126
|
+
// Preserve the ORIGINAL provider error so the key-pool failover
|
|
127
|
+
// (classifyGoogleKeyHealth) can still read the underlying
|
|
128
|
+
// status/httpCode after this conversion. Without it the failover saw
|
|
129
|
+
// only the friendly string and never failed over to the second key.
|
|
130
|
+
userFacing.cause = err;
|
|
126
131
|
throw userFacing;
|
|
132
|
+
}
|
|
127
133
|
throw err;
|
|
128
134
|
}
|
|
129
135
|
const backoffMs = 1000 * 2 ** (attempt - 1); // 1s, 2s, 4s
|
|
@@ -269,32 +275,62 @@ class GoogleService extends baseAiGenProvider_service_1.BaseAiGenProviderService
|
|
|
269
275
|
catch (err) {
|
|
270
276
|
lastErr = err;
|
|
271
277
|
const keyFailure = (0, googleErrors_1.classifyGoogleKeyHealth)(err);
|
|
272
|
-
// Not
|
|
273
|
-
//
|
|
278
|
+
// Not a fail-over-able failure (e.g. content rejection, bad params) →
|
|
279
|
+
// don't burn other accounts on it; surface as-is.
|
|
274
280
|
if (!keyFailure)
|
|
275
281
|
throw err;
|
|
276
|
-
//
|
|
277
|
-
//
|
|
278
|
-
|
|
279
|
-
|
|
282
|
+
// Observability: log the RAW provider error once so we can see whether
|
|
283
|
+
// this was an RPM vs RPD limit and on which key (AI Studio's dashboard
|
|
284
|
+
// lags real-time, so this is our authoritative signal — Ammar 2026-06-20).
|
|
285
|
+
const rawCause = err.cause;
|
|
286
|
+
logger_1.logger.warn("Google Veo: key submit failed — failing over to next key", {
|
|
280
287
|
modelKey: params.modelKey,
|
|
281
288
|
keyId: keyEntry.id,
|
|
282
289
|
kind: keyFailure.kind,
|
|
283
290
|
attempt: attempt + 1,
|
|
284
291
|
poolSize: this.keyPool.size,
|
|
292
|
+
rawError: rawCause instanceof Error
|
|
293
|
+
? rawCause.message.slice(0, 300)
|
|
294
|
+
: err instanceof Error
|
|
295
|
+
? err.message.slice(0, 300)
|
|
296
|
+
: String(err),
|
|
285
297
|
});
|
|
298
|
+
// Circuit-break ONLY persistent account faults (billing depleted / auth
|
|
299
|
+
// revoked) — those keep failing on every call until a human fixes them.
|
|
300
|
+
// A `quota` (RPM/RPD) limit is transient and key-specific: the per-key
|
|
301
|
+
// routing counters already steer new jobs away until it resets, so a
|
|
302
|
+
// persistent circuit-break would needlessly idle the key. We still fail
|
|
303
|
+
// over within THIS job via the `tried` set below.
|
|
304
|
+
if (keyFailure.kind !== "quota") {
|
|
305
|
+
await this.keyPool.markDepleted(keyEntry.id, keyFailure.ttlSeconds, keyFailure.reason);
|
|
306
|
+
}
|
|
286
307
|
if (isLastUsableKey || tried.size >= this.keyPool.size) {
|
|
287
|
-
// No untried key left
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
308
|
+
// No untried key left.
|
|
309
|
+
if (keyFailure.kind === "quota") {
|
|
310
|
+
// Every key is momentarily rate-limited — expected capacity pressure,
|
|
311
|
+
// NOT a platform outage. Stay at warn (non-paging) and surface a
|
|
312
|
+
// VIDEO_PROVIDER_RATE_LIMITED so the scene fails cleanly + refunds
|
|
313
|
+
// (and the job-start capacity selector already had its chance to
|
|
314
|
+
// spill to another provider). Do NOT page on-call for this.
|
|
315
|
+
logger_1.logger.warn("Google Veo: all keys rate-limited — no key with budget right now", {
|
|
316
|
+
modelKey: params.modelKey,
|
|
317
|
+
reason: keyFailure.reason,
|
|
318
|
+
triedKeys: [...tried],
|
|
319
|
+
poolSize: this.keyPool.size,
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
else {
|
|
323
|
+
// EVERY Google billing account is down (depleted / revoked). This is
|
|
324
|
+
// a platform-wide outage (the 2026-06-18 Veo-depletion class): page
|
|
325
|
+
// Slack at the source with full context so on-call sees it immediately.
|
|
326
|
+
logger_1.logger.error("Google Veo: ALL keys exhausted — every account failing over", {
|
|
327
|
+
modelKey: params.modelKey,
|
|
328
|
+
kind: keyFailure.kind,
|
|
329
|
+
reason: keyFailure.reason,
|
|
330
|
+
triedKeys: [...tried],
|
|
331
|
+
poolSize: this.keyPool.size,
|
|
332
|
+
});
|
|
333
|
+
}
|
|
298
334
|
// Propagate so the job surfaces a friendly error + refunds, and the
|
|
299
335
|
// job-level provider fallback can spill to another provider.
|
|
300
336
|
throw err;
|
|
@@ -12,13 +12,17 @@
|
|
|
12
12
|
* - key[0] = the NEW key (vidspotai project), currently **Tier 1** (2/min,
|
|
13
13
|
* 10/day per Veo model). Used FIRST, deliberately, to drive usage and
|
|
14
14
|
* promote its billing account up the tier ladder.
|
|
15
|
-
* - key[1] = the
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
15
|
+
* - key[1] = the overflow key, **Tier 3** (10/min, 500/day) as of 2026-06-20
|
|
16
|
+
* (was Tier 2 4/50; its billing account was promoted). Used once key[0]
|
|
17
|
+
* is out of per-minute / per-day budget, OR immediately as failover when a
|
|
18
|
+
* key[0] submit returns a per-key 429 (see google.service generateVideo).
|
|
19
|
+
* Aggregate Veo budget = T1 + T3 = 12/min, 510/day (GOOGLE_API_KEY_TIERS=1,3 on
|
|
20
|
+
* Doppler prd + Railway). When BOTH are exhausted, the job-start capacity
|
|
21
|
+
* selector (videoJobProcessor) spills to another provider, else the scene fails
|
|
22
|
+
* cleanly with VIDEO_PROVIDER_RATE_LIMITED (refunded, non-paging).
|
|
19
23
|
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
24
|
+
* Tier changes need NO code change — bump GOOGLE_API_KEY_TIERS and the ladder
|
|
25
|
+
* below rescales both the per-key router caps and the aggregate model-level gate.
|
|
22
26
|
*
|
|
23
27
|
* Vertex AI (DEFERRED, on record for the future): Veo is also available via
|
|
24
28
|
* Vertex, where quota is **per-project** and the current billing account is
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"googleApiKeys.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleApiKeys.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"googleApiKeys.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleApiKeys.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AAoBH,8EAA8E;AAC9E,wBAAgB,kBAAkB,IAAI,MAAM,EAAE,CAa7C;AAED,yEAAyE;AACzE,wBAAgB,sBAAsB,IAAI,MAAM,EAAE,CAUjD;AAED,8EAA8E;AAC9E,wBAAgB,iBAAiB,IAAI,MAAM,CAE1C;AAED,6DAA6D;AAC7D,wBAAgB,iBAAiB,CAC/B,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,KAAK,GAAG,KAAK,GAClB,MAAM,CAGR;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,CAIjE;AAED,uDAAuD;AACvD,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAEjD;AAID;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAE5F;AAED,iFAAiF;AACjF,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,CAAA;CAAE,CAIrF"}
|
|
@@ -13,13 +13,17 @@
|
|
|
13
13
|
* - key[0] = the NEW key (vidspotai project), currently **Tier 1** (2/min,
|
|
14
14
|
* 10/day per Veo model). Used FIRST, deliberately, to drive usage and
|
|
15
15
|
* promote its billing account up the tier ladder.
|
|
16
|
-
* - key[1] = the
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
16
|
+
* - key[1] = the overflow key, **Tier 3** (10/min, 500/day) as of 2026-06-20
|
|
17
|
+
* (was Tier 2 4/50; its billing account was promoted). Used once key[0]
|
|
18
|
+
* is out of per-minute / per-day budget, OR immediately as failover when a
|
|
19
|
+
* key[0] submit returns a per-key 429 (see google.service generateVideo).
|
|
20
|
+
* Aggregate Veo budget = T1 + T3 = 12/min, 510/day (GOOGLE_API_KEY_TIERS=1,3 on
|
|
21
|
+
* Doppler prd + Railway). When BOTH are exhausted, the job-start capacity
|
|
22
|
+
* selector (videoJobProcessor) spills to another provider, else the scene fails
|
|
23
|
+
* cleanly with VIDEO_PROVIDER_RATE_LIMITED (refunded, non-paging).
|
|
20
24
|
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
25
|
+
* Tier changes need NO code change — bump GOOGLE_API_KEY_TIERS and the ladder
|
|
26
|
+
* below rescales both the per-key router caps and the aggregate model-level gate.
|
|
23
27
|
*
|
|
24
28
|
* Vertex AI (DEFERRED, on record for the future): Veo is also available via
|
|
25
29
|
* Vertex, where quota is **per-project** and the current billing account is
|
|
@@ -10,14 +10,16 @@ import { UserFacingError } from "../../../../utils/errors";
|
|
|
10
10
|
* error instead of raw provider JSON that pages Slack as a platform bug.
|
|
11
11
|
*/
|
|
12
12
|
export declare function classifyGoogleApiError(err: any): UserFacingError | null;
|
|
13
|
-
/** A key-level
|
|
14
|
-
*
|
|
13
|
+
/** A key-level failure that warrants failing over to the next key in the pool.
|
|
14
|
+
* `billing`/`auth` also circuit-break the failed key for `ttlSeconds`; `quota`
|
|
15
|
+
* (a per-key RPM/RPD limit) fails over WITHOUT a persistent circuit-break —
|
|
16
|
+
* the per-key routing counters already steer traffic away until it resets. */
|
|
15
17
|
export interface GoogleKeyHealthFailure {
|
|
16
|
-
/**
|
|
17
|
-
kind: "billing" | "auth";
|
|
18
|
+
/** Class of key problem, for logs + TTL + paging policy. */
|
|
19
|
+
kind: "billing" | "auth" | "quota";
|
|
18
20
|
/** Short human reason, persisted as the circuit-breaker value (Loki/debug). */
|
|
19
21
|
reason: string;
|
|
20
|
-
/** How long to skip this key before re-probing it. */
|
|
22
|
+
/** How long to skip this key before re-probing it (only used for billing/auth). */
|
|
21
23
|
ttlSeconds: number;
|
|
22
24
|
}
|
|
23
25
|
/**
|
|
@@ -26,16 +28,21 @@ export interface GoogleKeyHealthFailure {
|
|
|
26
28
|
* per-minute rate limit or a content/validation error.
|
|
27
29
|
*
|
|
28
30
|
* Returns a failure descriptor when the pool should fail over to the next key
|
|
29
|
-
* and circuit-break this one; null otherwise (
|
|
31
|
+
* (and, for billing/auth, circuit-break this one); null otherwise (a genuine
|
|
32
|
+
* content/param error the caller surfaces as-is).
|
|
30
33
|
*
|
|
31
34
|
* IMPORTANT (cost safety): this is only consulted on a THROWN submit — i.e. no
|
|
32
35
|
* Veo operation was created, so the account was NOT billed. Failing over to a
|
|
33
36
|
* second billing account therefore cannot double-charge for the same job.
|
|
34
37
|
*
|
|
35
|
-
*
|
|
36
|
-
*
|
|
37
|
-
*
|
|
38
|
-
*
|
|
38
|
+
* ⚠️ The submit path wraps the raw provider error in a UserFacingError BEFORE
|
|
39
|
+
* this runs (withTransientRetry → classifyGoogleApiError), so we MUST unwrap:
|
|
40
|
+
* read the original error off `.cause` for the JSON status/code, and use the
|
|
41
|
+
* UserFacingError's own `.code` as a secondary signal. The previous version
|
|
42
|
+
* only read `err.message` — which by this point was the friendly canned string
|
|
43
|
+
* — so it matched nothing and the failover to the second key NEVER fired. That
|
|
44
|
+
* (plus the billing misclassification above) is why two funded keys behaved
|
|
45
|
+
* like one and jobs failed instead of retrying on the overflow key.
|
|
39
46
|
*/
|
|
40
47
|
export declare function classifyGoogleKeyHealth(err: any): GoogleKeyHealthFailure | null;
|
|
41
48
|
//# sourceMappingURL=googleErrors.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"googleErrors.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleErrors.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EAGhB,MAAM,0BAA0B,CAAC;AAElC;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,GAAG,GAAG,eAAe,GAAG,IAAI,
|
|
1
|
+
{"version":3,"file":"googleErrors.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleErrors.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EAGhB,MAAM,0BAA0B,CAAC;AAElC;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,GAAG,GAAG,eAAe,GAAG,IAAI,CAoJvE;AAED;;;+EAG+E;AAC/E,MAAM,WAAW,sBAAsB;IACrC,4DAA4D;IAC5D,IAAI,EAAE,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;IACnC,+EAA+E;IAC/E,MAAM,EAAE,MAAM,CAAC;IACf,mFAAmF;IACnF,UAAU,EAAE,MAAM,CAAC;CACpB;AAcD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,GAAG,GAAG,sBAAsB,GAAG,IAAI,CAuE/E"}
|
|
@@ -22,13 +22,21 @@ function classifyGoogleApiError(err) {
|
|
|
22
22
|
const status = inner.status;
|
|
23
23
|
const msg = inner.message ?? raw;
|
|
24
24
|
// Billing depletion / billing-not-enabled. Veo surfaces this as
|
|
25
|
-
// RESOURCE_EXHAUSTED / 429 but with a billing-specific message
|
|
26
|
-
// prepayment credits are depleted… manage your project and billing")
|
|
27
|
-
// FAILED_PRECONDITION. This is an ACCOUNT problem, NOT a per-minute
|
|
28
|
-
// limit — give it the distinct ACCOUNT_QUOTA_EXCEEDED code
|
|
29
|
-
// Slack) and a SAFE message.
|
|
30
|
-
//
|
|
31
|
-
|
|
25
|
+
// RESOURCE_EXHAUSTED / 429 but with a billing-DEPLETION-specific message
|
|
26
|
+
// ("Your prepayment credits are depleted… manage your project and billing")
|
|
27
|
+
// or as FAILED_PRECONDITION. This is an ACCOUNT problem, NOT a per-minute /
|
|
28
|
+
// per-day rate limit — give it the distinct ACCOUNT_QUOTA_EXCEEDED code
|
|
29
|
+
// (which DOES page Slack) and a SAFE message.
|
|
30
|
+
//
|
|
31
|
+
// ⚠️ NARROWED 2026-06-20 (prod incident): Google's ORDINARY quota 429 also
|
|
32
|
+
// reads "You exceeded your current quota, please check your plan and billing
|
|
33
|
+
// details" — so matching bare `billing` / `check your plan and billing` here
|
|
34
|
+
// mislabeled every Veo rate limit as ACCOUNT_QUOTA_EXCEEDED. That paged Slack
|
|
35
|
+
// on funded accounts AND (because the key-pool failover deliberately skips
|
|
36
|
+
// billing faults) suppressed the failover to the second key. Match ONLY the
|
|
37
|
+
// depletion-specific phrases now; a bare 429/RESOURCE_EXHAUSTED with no
|
|
38
|
+
// depletion signal falls through to the rate-limit branch below.
|
|
39
|
+
const billingSignal = /prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|free tier is not available|billing account (?:is )?(?:closed|suspended|disabled|not (?:active|configured))/i.test(msg) || status === "FAILED_PRECONDITION";
|
|
32
40
|
if (billingSignal) {
|
|
33
41
|
return new errors_1.UserFacingError((0, errors_1.friendlyMessageForCode)(errors_1.USER_FACING_ERROR_CODES.ACCOUNT_QUOTA_EXCEEDED), errors_1.USER_FACING_ERROR_CODES.ACCOUNT_QUOTA_EXCEEDED);
|
|
34
42
|
}
|
|
@@ -118,28 +126,44 @@ function classifyGoogleApiError(err) {
|
|
|
118
126
|
// dead account (and to route the whole pipeline onto the healthy key), short
|
|
119
127
|
// enough that a top-up recovers within half an hour. Auth/permission problems
|
|
120
128
|
// (revoked / not-yet-propagated key, disabled API) re-probe faster — they're
|
|
121
|
-
// often a transient config rollout — at 15 min.
|
|
129
|
+
// often a transient config rollout — at 15 min. A per-key rate limit is not an
|
|
130
|
+
// account outage (the key recovers as its minute/day window rolls), so its
|
|
131
|
+
// "cooloff" is short and mostly nominal — the in-job failover skips the key via
|
|
132
|
+
// the `tried` set, and cross-job routing is handled by the per-key counters.
|
|
122
133
|
const BILLING_CIRCUIT_TTL_S = 30 * 60;
|
|
123
134
|
const AUTH_CIRCUIT_TTL_S = 15 * 60;
|
|
135
|
+
const QUOTA_CIRCUIT_TTL_S = 60;
|
|
124
136
|
/**
|
|
125
137
|
* Classifies whether a Veo *submit* failure is an ACCOUNT-LEVEL problem with
|
|
126
138
|
* THIS key (billing exhausted / auth revoked) — distinct from a transient
|
|
127
139
|
* per-minute rate limit or a content/validation error.
|
|
128
140
|
*
|
|
129
141
|
* Returns a failure descriptor when the pool should fail over to the next key
|
|
130
|
-
* and circuit-break this one; null otherwise (
|
|
142
|
+
* (and, for billing/auth, circuit-break this one); null otherwise (a genuine
|
|
143
|
+
* content/param error the caller surfaces as-is).
|
|
131
144
|
*
|
|
132
145
|
* IMPORTANT (cost safety): this is only consulted on a THROWN submit — i.e. no
|
|
133
146
|
* Veo operation was created, so the account was NOT billed. Failing over to a
|
|
134
147
|
* second billing account therefore cannot double-charge for the same job.
|
|
135
148
|
*
|
|
136
|
-
*
|
|
137
|
-
*
|
|
138
|
-
*
|
|
139
|
-
*
|
|
149
|
+
* ⚠️ The submit path wraps the raw provider error in a UserFacingError BEFORE
|
|
150
|
+
* this runs (withTransientRetry → classifyGoogleApiError), so we MUST unwrap:
|
|
151
|
+
* read the original error off `.cause` for the JSON status/code, and use the
|
|
152
|
+
* UserFacingError's own `.code` as a secondary signal. The previous version
|
|
153
|
+
* only read `err.message` — which by this point was the friendly canned string
|
|
154
|
+
* — so it matched nothing and the failover to the second key NEVER fired. That
|
|
155
|
+
* (plus the billing misclassification above) is why two funded keys behaved
|
|
156
|
+
* like one and jobs failed instead of retrying on the overflow key.
|
|
140
157
|
*/
|
|
141
158
|
function classifyGoogleKeyHealth(err) {
|
|
142
|
-
const
|
|
159
|
+
const userFacingCode = err && typeof err === "object" && typeof err.code === "string"
|
|
160
|
+
? err.code
|
|
161
|
+
: undefined;
|
|
162
|
+
// Prefer the original provider error (attached as `.cause`) for JSON parsing.
|
|
163
|
+
const rawErr = err && typeof err === "object" && err.cause
|
|
164
|
+
? err.cause
|
|
165
|
+
: err;
|
|
166
|
+
const raw = rawErr?.message ?? err?.message ?? "";
|
|
143
167
|
let httpCode;
|
|
144
168
|
let status;
|
|
145
169
|
let msg = raw;
|
|
@@ -151,14 +175,17 @@ function classifyGoogleKeyHealth(err) {
|
|
|
151
175
|
msg = inner.message ?? raw;
|
|
152
176
|
}
|
|
153
177
|
catch {
|
|
154
|
-
// Non-JSON message — match against the raw string
|
|
178
|
+
// Non-JSON message — match against the raw string + the user-facing code.
|
|
155
179
|
}
|
|
156
|
-
// Billing depletion
|
|
157
|
-
//
|
|
158
|
-
// your
|
|
159
|
-
//
|
|
160
|
-
|
|
161
|
-
|
|
180
|
+
// Billing depletion / disabled — a real ACCOUNT outage, NOT a rate limit.
|
|
181
|
+
// Narrowed (2026-06-20) to depletion-specific phrases: Google's ordinary quota
|
|
182
|
+
// 429 also says "check your plan and billing details", so matching bare
|
|
183
|
+
// `billing` here is exactly what misrouted rate limits into a 30-min circuit
|
|
184
|
+
// break + Slack page. Keep FAILED_PRECONDITION (free-tier/billing not enabled)
|
|
185
|
+
// and the upstream classifier's ACCOUNT_QUOTA_EXCEEDED code.
|
|
186
|
+
if (/prepayment credits? (?:are|is) depleted|credits? (?:are|is) depleted|free tier is not available|billing account (?:is )?(?:closed|suspended|disabled|not (?:active|configured))/i.test(msg) ||
|
|
187
|
+
status === "FAILED_PRECONDITION" ||
|
|
188
|
+
userFacingCode === errors_1.USER_FACING_ERROR_CODES.ACCOUNT_QUOTA_EXCEEDED) {
|
|
162
189
|
return { kind: "billing", reason: `billing: ${msg.slice(0, 120)}`, ttlSeconds: BILLING_CIRCUIT_TTL_S };
|
|
163
190
|
}
|
|
164
191
|
// Auth / permission: key revoked, not yet propagated, API disabled, or the
|
|
@@ -168,8 +195,20 @@ function classifyGoogleKeyHealth(err) {
|
|
|
168
195
|
httpCode === 403 ||
|
|
169
196
|
status === "PERMISSION_DENIED" ||
|
|
170
197
|
status === "UNAUTHENTICATED" ||
|
|
198
|
+
userFacingCode === errors_1.USER_FACING_ERROR_CODES.PROVIDER_AUTH_ERROR ||
|
|
171
199
|
/API[_ ]key not valid|API_KEY_INVALID|permission denied|SERVICE_DISABLED|has not been used in project|consumer .* (?:suspended|disabled)/i.test(msg)) {
|
|
172
200
|
return { kind: "auth", reason: `auth: ${msg.slice(0, 120)}`, ttlSeconds: AUTH_CIRCUIT_TTL_S };
|
|
173
201
|
}
|
|
202
|
+
// Per-key rate limit (RPM / RPD). This is the case that was previously
|
|
203
|
+
// EXCLUDED — and the whole reason the pool never failed over on a 429. A bare
|
|
204
|
+
// 429 / RESOURCE_EXHAUSTED (no billing signal) means THIS key is out of budget
|
|
205
|
+
// for the moment; fail over to the next key, which "would mostly always have
|
|
206
|
+
// limit available" (Ammar, 2026-06-20). No persistent circuit-break: the
|
|
207
|
+
// per-key routing counters + the in-job `tried` set handle skipping it.
|
|
208
|
+
if (status === "RESOURCE_EXHAUSTED" ||
|
|
209
|
+
httpCode === 429 ||
|
|
210
|
+
userFacingCode === errors_1.USER_FACING_ERROR_CODES.VIDEO_PROVIDER_RATE_LIMITED) {
|
|
211
|
+
return { kind: "quota", reason: `quota: ${msg.slice(0, 120)}`, ttlSeconds: QUOTA_CIRCUIT_TTL_S };
|
|
212
|
+
}
|
|
174
213
|
return null;
|
|
175
214
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"googleKeyPool.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleKeyPool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,OAAO,EACL,iBAAiB,EAKlB,MAAM,iBAAiB,CAAC;AAEzB;;;;;;;;;;;;;;;;GAgBG;AAEH,UAAU,QAAQ;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,WAAW,CAAC;IACpB,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;CACd;
|
|
1
|
+
{"version":3,"file":"googleKeyPool.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/googleKeyPool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,OAAO,EACL,iBAAiB,EAKlB,MAAM,iBAAiB,CAAC;AAEzB;;;;;;;;;;;;;;;;GAgBG;AAEH,UAAU,QAAQ;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,WAAW,CAAC;IACpB,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;CACd;AAiDD,cAAM,aAAa;IACjB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAa;;IA4BrC,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,8EAA8E;IAC9E,IAAI,MAAM,IAAI,MAAM,EAAE,CAErB;IAUD,OAAO,CAAC,WAAW;IAInB,kFAAkF;IAC5E,YAAY,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBjF,qFAAqF;YACvE,WAAW;IAiBzB,mDAAmD;IACnD,IAAI,aAAa,IAAI,WAAW,CAE/B;IAED,gFAAgF;IAChF,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,WAAW,GAAG,SAAS;IAK3D;;;;;;;;OAQG;IACG,aAAa,CACjB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,IAAI,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GACjB,OAAO,CAAC,QAAQ,CAAC;IA4DpB,8EAA8E;YAChE,OAAO;CAUtB;AAID,sCAAsC;AACtC,wBAAgB,gBAAgB,IAAI,aAAa,CAGhD;AAED,OAAO,EAAE,iBAAiB,EAAE,CAAC;AAC7B,YAAY,EAAE,aAAa,EAAE,CAAC"}
|
|
@@ -7,6 +7,40 @@ const logger_1 = require("../../../../utils/logger");
|
|
|
7
7
|
const redis_service_1 = require("../../../redis.service");
|
|
8
8
|
const googleApiKeys_1 = require("./googleApiKeys");
|
|
9
9
|
Object.defineProperty(exports, "googleApiKeyCount", { enumerable: true, get: function () { return googleApiKeys_1.googleApiKeyCount; } });
|
|
10
|
+
/**
|
|
11
|
+
* Atomic per-key check-and-consume for the routing counters (RPM + RPD).
|
|
12
|
+
*
|
|
13
|
+
* Replaces the old GET-then-INCR in pickForSubmit, which was NON-atomic: under
|
|
14
|
+
* concurrent submits every caller read "this key has room" before any of them
|
|
15
|
+
* incremented, so they all piled onto the SAME key (usually the tiny Tier-1
|
|
16
|
+
* key0) past its ceiling while the other key sat idle — then Google 429'd and,
|
|
17
|
+
* with the failover bug, the job just failed. Checking and consuming in one
|
|
18
|
+
* Lua round-trip makes the per-key cap a hard, race-free gate.
|
|
19
|
+
*
|
|
20
|
+
* Returns { 1, newMin, dayUsed } when a slot was consumed, or
|
|
21
|
+
* { 0, minUsed, dayUsed } when this key is already at its min/day cap.
|
|
22
|
+
* A limit of 0 means "no cap" (skip that gate). The minute counter is a fixed
|
|
23
|
+
* 60s window (matches the legacy per-key behavior); the day counter expires at
|
|
24
|
+
* UTC midnight.
|
|
25
|
+
*/
|
|
26
|
+
const CONSUME_KEY_SCRIPT = `
|
|
27
|
+
local minKey = KEYS[1]
|
|
28
|
+
local dayKey = KEYS[2]
|
|
29
|
+
local minLimit = tonumber(ARGV[1])
|
|
30
|
+
local dayLimit = tonumber(ARGV[2])
|
|
31
|
+
local secsTilMidnight = tonumber(ARGV[3])
|
|
32
|
+
local minUsed = tonumber(redis.call('GET', minKey) or '0')
|
|
33
|
+
local dayUsed = tonumber(redis.call('GET', dayKey) or '0')
|
|
34
|
+
if minLimit > 0 and minUsed >= minLimit then return { 0, minUsed, dayUsed } end
|
|
35
|
+
if dayLimit > 0 and dayUsed >= dayLimit then return { 0, minUsed, dayUsed } end
|
|
36
|
+
local newMin = redis.call('INCR', minKey)
|
|
37
|
+
if newMin == 1 then redis.call('EXPIRE', minKey, 60) end
|
|
38
|
+
if dayLimit > 0 then
|
|
39
|
+
local newDay = redis.call('INCR', dayKey)
|
|
40
|
+
if newDay == 1 then redis.call('EXPIRE', dayKey, secsTilMidnight) end
|
|
41
|
+
end
|
|
42
|
+
return { 1, newMin, dayUsed }
|
|
43
|
+
`;
|
|
10
44
|
function utcDateKey() {
|
|
11
45
|
return new Date().toISOString().slice(0, 10);
|
|
12
46
|
}
|
|
@@ -134,27 +168,21 @@ class GoogleKeyPool {
|
|
|
134
168
|
if (candidates.length === 1)
|
|
135
169
|
return candidates[0];
|
|
136
170
|
const date = utcDateKey();
|
|
171
|
+
const secsTilMidnight = secsUntilMidnight();
|
|
137
172
|
for (const entry of candidates) {
|
|
138
173
|
try {
|
|
139
174
|
const perKeyMinLimit = (0, googleApiKeys_1.scaleLimitForTier)(baselineMin, entry.tier, "rpm");
|
|
140
175
|
const perKeyDayLimit = (0, googleApiKeys_1.scaleLimitForTier)(baselineDay, entry.tier, "rpd");
|
|
141
176
|
const dayKey = `gkpool:${entry.id}:${modelId}:day:${date}`;
|
|
142
177
|
const minKey = `gkpool:${entry.id}:${modelId}:min`;
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
])
|
|
147
|
-
const dayUsed = dayRaw ? Number(dayRaw) : 0;
|
|
148
|
-
const minUsed = minRaw ? Number(minRaw) : 0;
|
|
149
|
-
const dayOk = perKeyDayLimit <= 0 || dayUsed < perKeyDayLimit;
|
|
150
|
-
const minOk = perKeyMinLimit <= 0 || minUsed < perKeyMinLimit;
|
|
151
|
-
if (dayOk && minOk) {
|
|
152
|
-
await this.consume(entry, modelId, date);
|
|
178
|
+
// Atomic check-and-consume: no GET-then-INCR race, so two concurrent
|
|
179
|
+
// submits can't both think key0 has room and oversubscribe it.
|
|
180
|
+
const res = (await client.eval(CONSUME_KEY_SCRIPT, 2, minKey, dayKey, String(perKeyMinLimit), String(perKeyDayLimit), String(secsTilMidnight)));
|
|
181
|
+
if (res[0] === 1)
|
|
153
182
|
return entry;
|
|
154
|
-
}
|
|
155
183
|
}
|
|
156
184
|
catch (err) {
|
|
157
|
-
logger_1.logger.warn("googleKeyPool: routing
|
|
185
|
+
logger_1.logger.warn("googleKeyPool: routing eval failed, trying next key", {
|
|
158
186
|
keyId: entry.id,
|
|
159
187
|
err: err instanceof Error ? err.message : String(err),
|
|
160
188
|
});
|