@tstdl/base 0.93.160 → 0.93.162

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,20 @@
1
1
  import { vertexAI } from '@genkit-ai/google-genai';
2
2
  import { GenkitError, modelRef } from 'genkit';
3
3
  import { genkitPlugin } from 'genkit/plugin';
4
- import { shuffle } from '../../utils/array/index.js';
4
+ import { distinct, shuffle } from '../../utils/array/index.js';
5
5
  import { isInstanceOf, isNullOrUndefined } from '../../utils/type-guards.js';
6
6
  import { millisecondsPerMinute, millisecondsPerSecond } from '../../utils/units.js';
7
7
  const pluginKey = 'vertexai-multi-location';
8
8
  const geminiModelReference = vertexAI.model('gemini-2.5-flash');
9
+ const defaultTokenLimitThreshold = 131_072;
9
10
  export function vertexAiMultiLocation(options) {
11
+ if (options.locations.length == 0) {
12
+ throw new GenkitError({
13
+ status: 'INVALID_ARGUMENT',
14
+ message: 'At least one location must be provided for vertexAiMultiLocation',
15
+ });
16
+ }
17
+ const tokenLimitThreshold = options.tokenLimitThreshold ?? defaultTokenLimitThreshold;
10
18
  const locationConfigs = options.locations.map((location) => {
11
19
  const circuitBreakerKey = `genkit:vertex-ai:location:${location}`;
12
20
  const tokenLimitCircuitBreakerKey = `${circuitBreakerKey}:token-limit`;
@@ -41,16 +49,19 @@ export function vertexAiMultiLocation(options) {
41
49
  const shuffledConfigs = shuffle([...locationConfigs]);
42
50
  let lastError;
43
51
  let isLargeRequest = false;
52
+ const skippedLocations = [];
44
53
  for (const { location, circuitBreaker, tokenLimitCircuitBreaker } of shuffledConfigs) {
45
54
  const check = await circuitBreaker.check();
46
55
  if (!check.allowed) {
47
56
  options.logger.warn(`Location ${location} is currently unhealthy. Skipping...`);
57
+ skippedLocations.push({ location, reason: 'unhealthy' });
48
58
  continue;
49
59
  }
50
60
  if (isLargeRequest) {
51
61
  const tokenCheck = await tokenLimitCircuitBreaker.check();
52
62
  if (!tokenCheck.allowed) {
53
63
  options.logger.warn(`Location ${location} is known to have a low token limit. Skipping for this large request...`);
64
+ skippedLocations.push({ location, reason: 'known to have low token limits' });
54
65
  continue;
55
66
  }
56
67
  }
@@ -65,6 +76,10 @@ export function vertexAiMultiLocation(options) {
65
76
  onChunk: streamingCallback,
66
77
  });
67
78
  await circuitBreaker.recordSuccess();
79
+ const isLargeSuccess = isLargeRequest || ((result.usage?.inputTokens ?? 0) > tokenLimitThreshold);
80
+ if (isLargeSuccess) {
81
+ await tokenLimitCircuitBreaker.recordSuccess();
82
+ }
68
83
  return result;
69
84
  }
70
85
  catch (error) {
@@ -88,6 +103,14 @@ export function vertexAiMultiLocation(options) {
88
103
  }
89
104
  }
90
105
  }
106
+ if (isNullOrUndefined(lastError)) {
107
+ const uniqueReasons = distinct(skippedLocations.map((s) => s.reason));
108
+ const reasonsString = uniqueReasons.join(' or ');
109
+ throw new GenkitError({
110
+ status: 'UNAVAILABLE',
111
+ message: `All locations were skipped because they are ${reasonsString}`,
112
+ });
113
+ }
91
114
  throw lastError;
92
115
  });
93
116
  };
@@ -138,4 +138,42 @@ describe('Genkit vertexai-multi-location Plugin Tests', () => {
138
138
  });
139
139
  expect(response2.text).toBe('success from cb-success');
140
140
  });
141
+ it('should throw if no locations are provided', async () => {
142
+ expect(() => {
143
+ vertexAiMultiLocation({
144
+ locations: [],
145
+ circuitBreakerProvider: cbProvider,
146
+ logger,
147
+ });
148
+ }).toThrow('At least one location must be provided');
149
+ });
150
+ it('should throw if all locations are unhealthy', async () => {
151
+ const ai2 = genkit({
152
+ plugins: [
153
+ vertexAiMultiLocation({
154
+ locations: ['unhealthy-1'],
155
+ circuitBreakerProvider: cbProvider,
156
+ logger,
157
+ circuitBreakerConfig: { resetTimeout: 1000000, threshold: 1 },
158
+ }),
159
+ ],
160
+ });
161
+ // Manually trip the circuit breaker
162
+ const cb = cbProvider.provide('genkit:vertex-ai:location:unhealthy-1', { threshold: 1, resetTimeout: 1000000 });
163
+ await cb.recordFailure();
164
+ ai2.defineModel({
165
+ name: 'vertexai/gemini-2.5-flash',
166
+ }, async () => {
167
+ return {
168
+ message: {
169
+ role: 'model',
170
+ content: [{ text: 'success' }],
171
+ },
172
+ };
173
+ });
174
+ await expect(ai2.generate({
175
+ model: 'vertexai-multi-location/gemini-2.5-flash',
176
+ prompt: 'test',
177
+ })).rejects.toThrow('All locations were skipped because they are unhealthy');
178
+ });
141
179
  });
@@ -5,6 +5,7 @@ import { CircuitBreakerState } from '../../../circuit-breaker/index.js';
5
5
  import { CircuitBreakerProvider } from '../../../circuit-breaker/provider.js';
6
6
  import { Logger } from '../../../logger/logger.js';
7
7
  import { setupIntegrationTest } from '../../../testing/index.js';
8
+ import { timeout } from '../../../utils/timing.js';
8
9
  import { vertexAiMultiLocation } from '../multi-region.plugin.js';
9
10
  vi.mock('#/utils/array/index.js', async (importOriginal) => {
10
11
  const actual = await importOriginal();
@@ -43,6 +44,7 @@ describe('Genkit vertexai-multi-location Token Limit Fallback Tests', () => {
43
44
  circuitBreakerProvider: cbProvider,
44
45
  logger,
45
46
  circuitBreakerConfig: { resetTimeout: 1_000_000, threshold: 1 },
47
+ tokenLimitCircuitBreakerConfig: { resetTimeout: 10, threshold: 1 },
46
48
  }),
47
49
  ],
48
50
  });
@@ -135,4 +137,73 @@ describe('Genkit vertexai-multi-location Token Limit Fallback Tests', () => {
135
137
  expect(region2Called).toBe(false); // Should be skipped because it is known to have a low limit
136
138
  expect(region3Called).toBe(true); // Should be tried as it is not known to be limited
137
139
  });
140
+ it('should record success for token limit breaker when it succeeds after being known to be large', async () => {
141
+ const tokenLimitErrorMessage = 'Unable to submit request because the input token count is 135224 but model only supports up to 131072.';
142
+ let region1Called = false;
143
+ let region2Called = false;
144
+ let region3Called = false;
145
+ ai.defineModel({ name: 'vertexai/gemini-2.5-flash' }, async (request) => {
146
+ if (request.config?.location === 'region-1') {
147
+ region1Called = true;
148
+ throw new GenkitError({
149
+ status: 'INVALID_ARGUMENT',
150
+ message: tokenLimitErrorMessage,
151
+ });
152
+ }
153
+ if (request.config?.location === 'region-2') {
154
+ region2Called = true;
155
+ return { message: { role: 'model', content: [{ text: 'success from region-2' }] } };
156
+ }
157
+ if (request.config?.location === 'region-3') {
158
+ region3Called = true;
159
+ return { message: { role: 'model', content: [{ text: 'success from region-3' }] } };
160
+ }
161
+ throw new Error(`Unexpected location: ${request.config?.location}`);
162
+ });
163
+ // Initial shuffle is [region-1, region-2, region-3].
164
+ // Let's use a very short reset timeout for region-2.
165
+ const shortConfig = { threshold: 1, resetTimeout: 10 };
166
+ const tokenLimitCBShort = cbProvider.provide('genkit:vertex-ai:location:region-2:token-limit', shortConfig);
167
+ await tokenLimitCBShort.recordFailure();
168
+ // Small delay to ensure it goes to Half-Open
169
+ await timeout(50);
170
+ await ai.generate({
171
+ model: 'vertexai-multi-location/gemini-2.5-flash',
172
+ prompt: 'test',
173
+ });
174
+ expect(region1Called).toBe(true);
175
+ expect(region2Called).toBe(true); // Should NOT be skipped because it is Half-Open
176
+ expect(region3Called).toBe(false);
177
+ const status = await tokenLimitCBShort.check();
178
+ expect(status.state).toBe(CircuitBreakerState.Closed);
179
+ });
180
+ it('should record success for token limit breaker via usage-based learning (proactive success)', async () => {
181
+ // We want to verify that if a request has many tokens, it records success even if isLargeRequest was false.
182
+ const tokenLimitThreshold = 131_072;
183
+ const aiWithThreshold = genkit({
184
+ plugins: [
185
+ vertexAiMultiLocation({
186
+ locations: ['region-1'],
187
+ circuitBreakerProvider: cbProvider,
188
+ logger,
189
+ tokenLimitThreshold,
190
+ }),
191
+ ],
192
+ });
193
+ // Trip the breaker first
194
+ const tokenLimitCB = cbProvider.provide('genkit:vertex-ai:location:region-1:token-limit', { threshold: 1, resetTimeout: 1000000 });
195
+ await tokenLimitCB.recordFailure();
196
+ aiWithThreshold.defineModel({ name: 'vertexai/gemini-2.5-flash' }, async () => ({
197
+ message: { role: 'model', content: [{ text: 'success' }] },
198
+ usage: { inputTokens: tokenLimitThreshold + 1 },
199
+ }));
200
+ // This request is NOT known to be large initially.
201
+ // But it returns usage > threshold.
202
+ await aiWithThreshold.generate({
203
+ model: 'vertexai-multi-location/gemini-2.5-flash',
204
+ prompt: 'test',
205
+ });
206
+ const status = await tokenLimitCB.check();
207
+ expect(status.state).toBe(CircuitBreakerState.Closed);
208
+ });
138
209
  });
@@ -12,4 +12,10 @@ export declare abstract class VertexAiMultiLocationOptions {
12
12
  * By default, a threshold of 1 and a reset timeout of 15 minutes is used.
13
13
  */
14
14
  tokenLimitCircuitBreakerConfig?: Partial<CircuitBreakerConfig>;
15
+ /**
16
+ * Threshold of input tokens after which a request is considered large.
17
+ * A successful request with more tokens than this threshold will record a success for the token limit circuit breaker.
18
+ * Defaults to 131,072.
19
+ */
20
+ tokenLimitThreshold?: number;
15
21
  }
@@ -11,4 +11,10 @@ export class VertexAiMultiLocationOptions {
11
11
  * By default, a threshold of 1 and a reset timeout of 15 minutes is used.
12
12
  */
13
13
  tokenLimitCircuitBreakerConfig;
14
+ /**
15
+ * Threshold of input tokens after which a request is considered large.
16
+ * A successful request with more tokens than this threshold will record a success for the token limit circuit breaker.
17
+ * Defaults to 131,072.
18
+ */
19
+ tokenLimitThreshold;
14
20
  }
@@ -13,7 +13,7 @@ export type CircuitBreakerConfig = {
13
13
  export type CircuitBreakerCheckResult = {
14
14
  allowed: boolean;
15
15
  state: CircuitBreakerState;
16
- isProbe?: boolean;
16
+ isProbe: boolean;
17
17
  };
18
18
  export type CircuitBreakerArgument = string | (CircuitBreakerConfig & {
19
19
  key: string;
@@ -1,8 +1,12 @@
1
+ import { afterResolve } from '../../injector/index.js';
1
2
  import { CircuitBreaker, type CircuitBreakerCheckResult } from '../circuit-breaker.js';
2
3
  export declare class PostgresCircuitBreakerService extends CircuitBreaker {
3
4
  #private;
5
+ private static checkStatement;
6
+ [afterResolve](): void;
4
7
  check(): Promise<CircuitBreakerCheckResult>;
5
8
  recordSuccess(): Promise<void>;
6
9
  recordFailure(): Promise<void>;
7
10
  recordFailures(count: number): Promise<void>;
11
+ private getPreparedCheckStatement;
8
12
  }
@@ -4,40 +4,42 @@ var __decorate = (this && this.__decorate) || function (decorators, target, key,
4
4
  else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
5
5
  return c > 3 && r && Object.defineProperty(target, key, r), r;
6
6
  };
7
- import { sql } from 'drizzle-orm';
8
- import { injectArgument, provide, Singleton } from '../../injector/index.js';
9
- import { interval, TRANSACTION_TIMESTAMP } from '../../orm/index.js';
7
+ var PostgresCircuitBreakerService_1;
8
+ import { and, eq, lte, sql, isNotNull as sqlIsNotNull } from 'drizzle-orm';
9
+ import { afterResolve, injectArgument, provide, Singleton } from '../../injector/index.js';
10
+ import { coalesce, interval, TRANSACTION_TIMESTAMP } from '../../orm/index.js';
10
11
  import { DatabaseConfig, injectRepository } from '../../orm/server/index.js';
11
- import { currentTimestamp } from '../../utils/date-time.js';
12
- import { isDefined, isString, isUndefined } from '../../utils/type-guards.js';
12
+ import { isString, isUndefined } from '../../utils/type-guards.js';
13
13
  import { millisecondsPerSecond } from '../../utils/units.js';
14
14
  import { CircuitBreaker, CircuitBreakerState } from '../circuit-breaker.js';
15
15
  import { PostgresCircuitBreaker } from './model.js';
16
16
  import { PostgresCircuitBreakerModuleConfig } from './module.js';
17
+ import { circuitBreaker } from './schemas.js';
17
18
  let PostgresCircuitBreakerService = class PostgresCircuitBreakerService extends CircuitBreaker {
19
+ static { PostgresCircuitBreakerService_1 = this; }
20
+ static checkStatement;
18
21
  #repository = injectRepository(PostgresCircuitBreaker);
19
22
  #arg = injectArgument(this);
20
23
  #key = isString(this.#arg) ? this.#arg : this.#arg.key;
21
24
  #threshold = (isString(this.#arg) ? undefined : this.#arg.threshold) ?? 5;
22
25
  #resetTimeout = (isString(this.#arg) ? undefined : this.#arg.resetTimeout) ?? 30 * millisecondsPerSecond;
26
+ [afterResolve]() {
27
+ PostgresCircuitBreakerService_1.checkStatement ??= this.getPreparedCheckStatement();
28
+ }
23
29
  async check() {
24
- return await this.#repository.transaction(async (tx) => {
25
- const breaker = await this.#repository.withTransaction(tx).tryLoadByQuery({ key: this.#key });
26
- if (isUndefined(breaker) || breaker.state === CircuitBreakerState.Closed) {
27
- return { allowed: true, state: CircuitBreakerState.Closed };
28
- }
29
- if (breaker.state === CircuitBreakerState.HalfOpen) {
30
- return { allowed: true, state: CircuitBreakerState.HalfOpen, isProbe: false };
31
- }
32
- // State is Open
33
- if (currentTimestamp() < (breaker.resetTimestamp ?? 0)) {
34
- return { allowed: false, state: CircuitBreakerState.Open };
35
- }
36
- // Atomic transition from Open -> HalfOpen (The Probe)
37
- const updated = await this.#repository.withTransaction(tx).tryUpdateByQuery({ key: this.#key, state: CircuitBreakerState.Open }, { state: CircuitBreakerState.HalfOpen });
38
- const isProbe = isDefined(updated);
39
- return { allowed: isProbe, state: isProbe ? CircuitBreakerState.HalfOpen : CircuitBreakerState.Open, isProbe };
40
- });
30
+ const [result] = await PostgresCircuitBreakerService_1.checkStatement.execute({ key: this.#key });
31
+ // 1. Breaker doesn't exist or is Closed
32
+ if (isUndefined(result) || (result.state === CircuitBreakerState.Closed)) {
33
+ return { allowed: true, state: CircuitBreakerState.Closed, isProbe: false };
34
+ }
35
+ // 2. Atomic transition to HalfOpen succeeded. This request is the chosen probe!
36
+ if (result.isProbe) {
37
+ return { allowed: true, state: CircuitBreakerState.HalfOpen, isProbe: true };
38
+ }
39
+ // 3. Fallback: Catch-all for failed transitions.
40
+ // - If state is HalfOpen, someone else is probing. Reject.
41
+ // - If state is Open, timeout hasn't expired. Reject
42
+ return { allowed: false, state: result.state, isProbe: false };
41
43
  }
42
44
  async recordSuccess() {
43
45
  await this.#repository.tryDeleteByQuery({ key: this.#key });
@@ -46,7 +48,6 @@ let PostgresCircuitBreakerService = class PostgresCircuitBreakerService extends
46
48
  await this.recordFailures(1);
47
49
  }
48
50
  async recordFailures(count) {
49
- const table = this.#repository.table;
50
51
  const initialTrip = count >= this.#threshold;
51
52
  const initialState = initialTrip ? CircuitBreakerState.Open : CircuitBreakerState.Closed;
52
53
  const initialResetTimestamp = initialTrip
@@ -58,19 +59,40 @@ let PostgresCircuitBreakerService = class PostgresCircuitBreakerService extends
58
59
  failureCount: count,
59
60
  resetTimestamp: initialResetTimestamp,
60
61
  }, {
61
- failureCount: sql `${table.failureCount} + ${count}`,
62
+ failureCount: sql `${circuitBreaker.failureCount} + ${count}`,
62
63
  state: sql `CASE
63
- WHEN ${table.failureCount} + ${count} >= ${this.#threshold} THEN ${CircuitBreakerState.Open}
64
- ELSE ${table.state}
64
+ WHEN ${circuitBreaker.failureCount} + ${count} >= ${this.#threshold} THEN ${CircuitBreakerState.Open}
65
+ ELSE ${circuitBreaker.state}
65
66
  END`,
66
67
  resetTimestamp: sql `CASE
67
- WHEN ${table.failureCount} + ${count} >= ${this.#threshold} THEN ${TRANSACTION_TIMESTAMP} + ${interval(this.#resetTimeout, 'milliseconds')}
68
- ELSE ${table.resetTimestamp}
68
+ WHEN ${circuitBreaker.failureCount} + ${count} >= ${this.#threshold} THEN ${TRANSACTION_TIMESTAMP} + ${interval(this.#resetTimeout, 'milliseconds')}
69
+ ELSE ${circuitBreaker.resetTimestamp}
69
70
  END`,
70
71
  });
71
72
  }
73
+ getPreparedCheckStatement() {
74
+ const session = this.#repository.session;
75
+ const attemptUpdate = session.$with('attempt_update').as(() => session
76
+ .update(circuitBreaker)
77
+ .set({ state: CircuitBreakerState.HalfOpen })
78
+ .where(and(eq(circuitBreaker.key, sql.placeholder('key')), eq(circuitBreaker.state, CircuitBreakerState.Open), lte(circuitBreaker.resetTimestamp, TRANSACTION_TIMESTAMP)))
79
+ .returning({
80
+ key: circuitBreaker.key,
81
+ state: circuitBreaker.state,
82
+ }));
83
+ return session
84
+ .with(attemptUpdate)
85
+ .select({
86
+ state: coalesce(attemptUpdate.state, circuitBreaker.state),
87
+ isProbe: sqlIsNotNull(attemptUpdate.key),
88
+ })
89
+ .from(circuitBreaker)
90
+ .leftJoin(attemptUpdate, eq(circuitBreaker.key, attemptUpdate.key))
91
+ .where(eq(circuitBreaker.key, sql.placeholder('key')))
92
+ .prepare('circuit_breaker_check');
93
+ }
72
94
  };
73
- PostgresCircuitBreakerService = __decorate([
95
+ PostgresCircuitBreakerService = PostgresCircuitBreakerService_1 = __decorate([
74
96
  Singleton({
75
97
  argumentIdentityProvider: (arg) => isString(arg) ? arg : arg.key,
76
98
  providers: [
@@ -58,9 +58,7 @@ describe('Circuit Breaker (Standalone) Tests', () => {
58
58
  expect(probe.isProbe).toBe(true);
59
59
  // Subsequent check should be denied (Half-Open wait)
60
60
  const subsequent = await breaker.check();
61
- expect(subsequent.allowed).toBe(true); // check() allows HalfOpen, but queue logic restricts it.
62
- // Wait, let's verify PostgresCircuitBreakerService logic:
63
- // if (breaker.state === CircuitBreakerState.HalfOpen) return { allowed: true, state: CircuitBreakerState.HalfOpen, isProbe: false };
61
+ expect(subsequent.allowed).toBe(false);
64
62
  expect(subsequent.state).toBe(CircuitBreakerState.HalfOpen);
65
63
  expect(subsequent.isProbe).toBe(false);
66
64
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tstdl/base",
3
- "version": "0.93.160",
3
+ "version": "0.93.162",
4
4
  "author": "Patrick Hein",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -152,8 +152,8 @@
152
152
  "type-fest": "^5.4"
153
153
  },
154
154
  "peerDependencies": {
155
- "@aws-sdk/client-s3": "^3.1002",
156
- "@aws-sdk/s3-request-presigner": "^3.1002",
155
+ "@aws-sdk/client-s3": "^3.1003",
156
+ "@aws-sdk/s3-request-presigner": "^3.1003",
157
157
  "@genkit-ai/google-genai": "^1.29",
158
158
  "@google-cloud/storage": "^7.19",
159
159
  "@toon-format/toon": "^2.1.0",
@@ -184,7 +184,7 @@
184
184
  },
185
185
  "devDependencies": {
186
186
  "@biomejs/biome": "2.4",
187
- "@stylistic/eslint-plugin": "5.9",
187
+ "@stylistic/eslint-plugin": "5.10",
188
188
  "@types/koa__router": "12.0",
189
189
  "@types/luxon": "3.7",
190
190
  "@types/mjml": "4.7",