@tstdl/base 0.93.153 → 0.93.154

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,9 +3,27 @@ import { GenkitError, modelRef } from 'genkit';
3
3
  import { genkitPlugin } from 'genkit/plugin';
4
4
  import { shuffle } from '../../utils/array/index.js';
5
5
  import { isInstanceOf, isNullOrUndefined } from '../../utils/type-guards.js';
6
+ import { millisecondsPerMinute, millisecondsPerSecond } from '../../utils/units.js';
6
7
  const pluginKey = 'vertexai-multi-location';
7
8
  const geminiModelReference = vertexAI.model('gemini-2.5-flash');
8
9
  export function vertexAiMultiLocation(options) {
10
+ const locationConfigs = options.locations.map((location) => {
11
+ const circuitBreakerKey = `genkit:vertex-ai:location:${location}`;
12
+ const tokenLimitCircuitBreakerKey = `${circuitBreakerKey}:token-limit`;
13
+ return {
14
+ location,
15
+ circuitBreaker: options.circuitBreakerProvider.provide(circuitBreakerKey, {
16
+ threshold: 1,
17
+ resetTimeout: 30 * millisecondsPerSecond,
18
+ ...options.circuitBreakerConfig,
19
+ }),
20
+ tokenLimitCircuitBreaker: options.circuitBreakerProvider.provide(tokenLimitCircuitBreakerKey, {
21
+ threshold: 1,
22
+ resetTimeout: 15 * millisecondsPerMinute,
23
+ ...options.tokenLimitCircuitBreakerConfig,
24
+ }),
25
+ };
26
+ });
9
27
  const createVirtualizedModelAction = async (ai, modelName) => {
10
28
  const baseModelName = `vertexai/${modelName}`;
11
29
  const target = modelName;
@@ -20,20 +38,22 @@ export function vertexAiMultiLocation(options) {
20
38
  configSchema: baseModelAction.__action.inputSchema?.shape?.config,
21
39
  label: `${baseModelAction.__action.description ?? baseModelAction.__action.name} (Multi-Location Routing)`,
22
40
  }, async (request, streamingCallback) => {
23
- const shuffledLocations = shuffle([...options.locations]);
41
+ const shuffledConfigs = shuffle([...locationConfigs]);
24
42
  let lastError;
25
- for (const location of shuffledLocations) {
26
- const circuitBreakerKey = `genkit:vertex-ai:location:${location}`;
27
- const circuitBreaker = options.circuitBreakerProvider.provide(circuitBreakerKey, {
28
- threshold: 1, // Aggressive for 429
29
- resetTimeout: options.circuitBreakerConfig?.resetTimeout ?? 30000,
30
- ...options.circuitBreakerConfig,
31
- });
43
+ let isLargeRequest = false;
44
+ for (const { location, circuitBreaker, tokenLimitCircuitBreaker } of shuffledConfigs) {
32
45
  const check = await circuitBreaker.check();
33
46
  if (!check.allowed) {
34
47
  options.logger.warn(`Location ${location} is currently unhealthy. Skipping...`);
35
48
  continue;
36
49
  }
50
+ if (isLargeRequest) {
51
+ const tokenCheck = await tokenLimitCircuitBreaker.check();
52
+ if (!tokenCheck.allowed) {
53
+ options.logger.warn(`Location ${location} is known to have a low token limit. Skipping for this large request...`);
54
+ continue;
55
+ }
56
+ }
37
57
  try {
38
58
  const result = await baseModelAction({
39
59
  ...request,
@@ -52,12 +72,20 @@ export function vertexAiMultiLocation(options) {
52
72
  if (!isInstanceOf(error, GenkitError)) {
53
73
  throw error;
54
74
  }
55
- const isRetryable = ((error.status == 'RESOURCE_EXHAUSTED') || (error.status == 'UNAVAILABLE') || error.message.includes('quota'));
75
+ const isTokenLimitError = (error.status == 'INVALID_ARGUMENT') && error.message.includes('input token count') && error.message.includes('model only supports up to');
76
+ const isRetryable = isTokenLimitError || ((error.status == 'RESOURCE_EXHAUSTED') || (error.status == 'UNAVAILABLE') || error.message.includes('quota'));
56
77
  if (!isRetryable) {
57
78
  throw error;
58
79
  }
59
- options.logger.warn(`Location ${location} responded with ${error.status}. Tripping circuit breaker and trying next location...`);
60
- await circuitBreaker.recordFailure();
80
+ if (isTokenLimitError) {
81
+ options.logger.warn(`Location ${location} responded with token limit error. Trying next location...`);
82
+ isLargeRequest = true;
83
+ await tokenLimitCircuitBreaker.recordFailure();
84
+ }
85
+ else {
86
+ options.logger.warn(`Location ${location} responded with ${error.status}. Tripping circuit breaker and trying next location...`);
87
+ await circuitBreaker.recordFailure();
88
+ }
61
89
  }
62
90
  }
63
91
  throw lastError;
@@ -0,0 +1,2 @@
1
+ /** biome-ignore-all lint/suspicious/useAwait: defineModel requires async */
2
+ export {};
@@ -0,0 +1,138 @@
1
+ /** biome-ignore-all lint/suspicious/useAwait: defineModel requires async */
2
+ import { genkit, GenkitError, z } from 'genkit';
3
+ import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
4
+ import { CircuitBreakerState } from '../../../circuit-breaker/index.js';
5
+ import { CircuitBreakerProvider } from '../../../circuit-breaker/provider.js';
6
+ import { Logger } from '../../../logger/logger.js';
7
+ import { setupIntegrationTest } from '../../../testing/index.js';
8
+ import { vertexAiMultiLocation } from '../multi-region.plugin.js';
9
+ vi.mock('#/utils/array/index.js', async (importOriginal) => {
10
+ const actual = await importOriginal();
11
+ return {
12
+ ...actual,
13
+ shuffle: vi.fn((items) => [...items]),
14
+ };
15
+ });
16
+ vi.mock('@genkit-ai/google-genai', () => ({
17
+ // biome-ignore lint/style/useNamingConvention: given
18
+ vertexAI: {
19
+ model: vi.fn((name) => ({
20
+ name: `vertexai/${name}`,
21
+ info: { label: 'mock' },
22
+ configSchema: z.object({}),
23
+ })),
24
+ },
25
+ // biome-ignore lint/style/useNamingConvention: given
26
+ googleAI: vi.fn(),
27
+ }));
28
+ describe('Genkit vertexai-multi-location Token Limit Fallback Tests', () => {
29
+ let ai;
30
+ let cbProvider;
31
+ let logger;
32
+ beforeAll(async () => {
33
+ const { injector } = await setupIntegrationTest({ modules: { circuitBreaker: true } });
34
+ cbProvider = injector.resolve(CircuitBreakerProvider);
35
+ logger = injector.resolve(Logger, 'Test');
36
+ });
37
+ beforeEach(async () => {
38
+ vi.clearAllMocks();
39
+ ai = genkit({
40
+ plugins: [
41
+ vertexAiMultiLocation({
42
+ locations: ['region-1', 'region-2', 'region-3'],
43
+ circuitBreakerProvider: cbProvider,
44
+ logger,
45
+ circuitBreakerConfig: { resetTimeout: 1_000_000, threshold: 1 },
46
+ }),
47
+ ],
48
+ });
49
+ const config = { threshold: 1, resetTimeout: 1_000_000 };
50
+ await cbProvider.provide('genkit:vertex-ai:location:region-1', config).recordSuccess();
51
+ await cbProvider.provide('genkit:vertex-ai:location:region-2', config).recordSuccess();
52
+ await cbProvider.provide('genkit:vertex-ai:location:region-3', config).recordSuccess();
53
+ await cbProvider.provide('genkit:vertex-ai:location:region-1:token-limit', config).recordSuccess();
54
+ await cbProvider.provide('genkit:vertex-ai:location:region-2:token-limit', config).recordSuccess();
55
+ await cbProvider.provide('genkit:vertex-ai:location:region-3:token-limit', config).recordSuccess();
56
+ });
57
+ it('should fallback on token limit error but NOT trip main circuit breaker', async () => {
58
+ const tokenLimitErrorMessage = 'Unable to submit request because the input token count is 135224 but model only supports up to 131072.';
59
+ let region1Called = false;
60
+ let region2Called = false;
61
+ ai.defineModel({
62
+ name: 'vertexai/gemini-2.5-flash',
63
+ }, async (request) => {
64
+ if (request.config?.location === 'region-1') {
65
+ region1Called = true;
66
+ throw new GenkitError({
67
+ status: 'INVALID_ARGUMENT',
68
+ message: tokenLimitErrorMessage,
69
+ });
70
+ }
71
+ if (request.config?.location === 'region-2') {
72
+ region2Called = true;
73
+ return {
74
+ message: {
75
+ role: 'model',
76
+ content: [{ text: 'success from region-2' }],
77
+ },
78
+ };
79
+ }
80
+ throw new Error('Unexpected location');
81
+ });
82
+ const response = await ai.generate({
83
+ model: 'vertexai-multi-location/gemini-2.5-flash',
84
+ prompt: 'test',
85
+ });
86
+ expect(response.text).toBe('success from region-2');
87
+ expect(region1Called).toBe(true);
88
+ expect(region2Called).toBe(true);
89
+ // Verify main circuit breaker for region-1 is still CLOSED (allowed)
90
+ const cb = cbProvider.provide('genkit:vertex-ai:location:region-1', { threshold: 1, resetTimeout: 1000000 });
91
+ const status = await cb.check();
92
+ expect(status.state).toBe(CircuitBreakerState.Closed);
93
+ expect(status.allowed).toBe(true);
94
+ });
95
+ it('should skip locations with known token limits within the same request once it is known to be large', async () => {
96
+ const tokenLimitErrorMessage = 'Unable to submit request because the input token count is 135224 but model only supports up to 131072.';
97
+ // First, trip the token limit breaker for region-2
98
+ const tokenLimitCB2 = cbProvider.provide('genkit:vertex-ai:location:region-2:token-limit', { threshold: 1, resetTimeout: 1000000 });
99
+ await tokenLimitCB2.recordFailure();
100
+ let region1Called = false;
101
+ let region2Called = false;
102
+ let region3Called = false;
103
+ ai.defineModel({
104
+ name: 'vertexai/gemini-2.5-flash',
105
+ }, async (request) => {
106
+ if (request.config?.location === 'region-1') {
107
+ region1Called = true;
108
+ throw new GenkitError({
109
+ status: 'INVALID_ARGUMENT',
110
+ message: tokenLimitErrorMessage,
111
+ });
112
+ }
113
+ if (request.config?.location === 'region-2') {
114
+ region2Called = true;
115
+ return { message: { role: 'model', content: [{ text: 'success from region-2' }] } };
116
+ }
117
+ if (request.config?.location === 'region-3') {
118
+ region3Called = true;
119
+ return {
120
+ message: {
121
+ role: 'model',
122
+ content: [{ text: 'success from region-3' }],
123
+ },
124
+ };
125
+ }
126
+ throw new Error('Unexpected location');
127
+ });
128
+ // Initial shuffle is mocked to [region-1, region-2, region-3]
129
+ const response = await ai.generate({
130
+ model: 'vertexai-multi-location/gemini-2.5-flash',
131
+ prompt: 'test',
132
+ });
133
+ expect(response.text).toBe('success from region-3');
134
+ expect(region1Called).toBe(true); // Fails, makes request "known to be large"
135
+ expect(region2Called).toBe(false); // Should be skipped because it is known to have a low limit
136
+ expect(region3Called).toBe(true); // Should be tried as it is not known to be limited
137
+ });
138
+ });
@@ -1,5 +1,5 @@
1
1
  import type { CircuitBreakerConfig } from '../../circuit-breaker/circuit-breaker.js';
2
- export interface VertexAiMultiLocationOptions {
2
+ export declare abstract class VertexAiMultiLocationOptions {
3
3
  /** The Google Cloud locations to use for routing. */
4
4
  locations: string[];
5
5
  /**
@@ -7,4 +7,9 @@ export interface VertexAiMultiLocationOptions {
7
7
  * By default, a threshold of 1 is used for 429 errors.
8
8
  */
9
9
  circuitBreakerConfig?: Partial<CircuitBreakerConfig>;
10
+ /**
11
+ * Optional token limit circuit breaker configuration.
12
+ * By default, a threshold of 1 and a reset timeout of 15 minutes is used.
13
+ */
14
+ tokenLimitCircuitBreakerConfig?: Partial<CircuitBreakerConfig>;
10
15
  }
@@ -1 +1,14 @@
1
- export {};
1
+ export class VertexAiMultiLocationOptions {
2
+ /** The Google Cloud locations to use for routing. */
3
+ locations;
4
+ /**
5
+ * Optional circuit breaker configuration.
6
+ * By default, a threshold of 1 is used for 429 errors.
7
+ */
8
+ circuitBreakerConfig;
9
+ /**
10
+ * Optional token limit circuit breaker configuration.
11
+ * By default, a threshold of 1 and a reset timeout of 15 minutes is used.
12
+ */
13
+ tokenLimitCircuitBreakerConfig;
14
+ }
@@ -34,7 +34,7 @@ import { TstdlCategoryParents, TstdlDocumentCategoryLabels, TstdlDocumentPropert
34
34
  const config = {
35
35
  database: {
36
36
  host: string('DATABASE_HOST', '127.0.0.1'),
37
- port: positiveInteger('DATABASE_PORT', 5432),
37
+ port: positiveInteger('DATABASE_PORT', 15433),
38
38
  user: string('DATABASE_USER', 'tstdl'),
39
39
  pass: string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
40
40
  database: string('DATABASE_NAME', 'tstdl'),
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,52 @@
1
+ import { configurePostgresRateLimiter, migratePostgresRateLimiterSchema } from '../../rate-limit/postgres/index.js';
2
+ import { RateLimiterProvider } from '../../rate-limit/index.js';
3
+ import { Injector, runInInjectionContext } from '../../injector/index.js';
4
+ import { configureOrm } from '../../orm/server/index.js';
5
+ import { ConsoleLogTransport, LogFormatter, Logger, LogTransport, PrettyPrintLogFormatter } from '../../logger/index.js';
6
+ import * as configParser from '../../utils/config-parser.js';
7
+ import { timeout } from '../../utils/timing.js';
8
+ async function main() {
9
+ const injector = new Injector('ExampleInjector');
10
+ // 1. Configure Logging
11
+ injector.register(LogFormatter, { useToken: PrettyPrintLogFormatter });
12
+ injector.register(LogTransport, { useToken: ConsoleLogTransport });
13
+ const logger = injector.resolve(Logger);
14
+ // 2. Configure Database
15
+ configureOrm({
16
+ connection: {
17
+ host: configParser.string('DATABASE_HOST', '127.0.0.1'),
18
+ port: configParser.positiveInteger('DATABASE_PORT', 15433),
19
+ user: configParser.string('DATABASE_USER', 'tstdl'),
20
+ password: configParser.string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
21
+ database: configParser.string('DATABASE_NAME', 'tstdl'),
22
+ },
23
+ });
24
+ // 3. Configure Rate Limiter
25
+ configurePostgresRateLimiter();
26
+ // 4. Run Migrations (for setup)
27
+ logger.info('Running migrations...');
28
+ await runInInjectionContext(injector, migratePostgresRateLimiterSchema);
29
+ // 5. Get a Rate Limiter Instance
30
+ const provider = injector.resolve(RateLimiterProvider);
31
+ const limiter = provider.get('api-limiter', {
32
+ burstCapacity: 10,
33
+ refillInterval: 1000, // 10 tokens per second
34
+ });
35
+ const resource = 'user-123';
36
+ // 6. Simulate Traffic
37
+ logger.info('Starting simulation...');
38
+ for (let i = 0; i < 15; i++) {
39
+ const success = await limiter.tryAcquire(resource);
40
+ if (success) {
41
+ logger.info(`Request ${i + 1}: Allowed`);
42
+ }
43
+ else {
44
+ logger.warn(`Request ${i + 1}: Throttled`);
45
+ }
46
+ // Small delay to simulate some processing/network time
47
+ await timeout(50);
48
+ }
49
+ // 7. Cleanup
50
+ await injector.dispose();
51
+ }
52
+ void main();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tstdl/base",
3
- "version": "0.93.153",
3
+ "version": "0.93.154",
4
4
  "author": "Patrick Hein",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -152,8 +152,8 @@
152
152
  "type-fest": "^5.4"
153
153
  },
154
154
  "peerDependencies": {
155
- "@aws-sdk/client-s3": "^3.1000",
156
- "@aws-sdk/s3-request-presigner": "^3.1000",
155
+ "@aws-sdk/client-s3": "^3.1001",
156
+ "@aws-sdk/s3-request-presigner": "^3.1001",
157
157
  "@genkit-ai/google-genai": "^1.29",
158
158
  "@google-cloud/storage": "^7.19",
159
159
  "@toon-format/toon": "^2.1.0",
package/test1.js CHANGED
@@ -14,7 +14,7 @@ import { assert } from './utils/type-guards.js';
14
14
  const config = {
15
15
  database: {
16
16
  host: configParser.string('DATABASE_HOST', '127.0.0.1'),
17
- port: configParser.positiveInteger('DATABASE_PORT', 5432),
17
+ port: configParser.positiveInteger('DATABASE_PORT', 15433),
18
18
  user: configParser.string('DATABASE_USER', 'tstdl'),
19
19
  pass: configParser.string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
20
20
  database: configParser.string('DATABASE_NAME', 'tstdl'),
package/test4.js CHANGED
@@ -8,7 +8,7 @@ import { boolean, positiveInteger, string } from './utils/config-parser.js';
8
8
  const config = {
9
9
  database: {
10
10
  host: string('DATABASE_HOST', '127.0.0.1'),
11
- port: positiveInteger('DATABASE_PORT', 5432),
11
+ port: positiveInteger('DATABASE_PORT', 15433),
12
12
  user: string('DATABASE_USER', 'tstdl'),
13
13
  pass: string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
14
14
  database: string('DATABASE_NAME', 'tstdl'),
@@ -48,7 +48,7 @@ export async function setupIntegrationTest(options = {}) {
48
48
  // 2. Database Config
49
49
  const dbConfig = {
50
50
  host: configParser.string('DATABASE_HOST', '127.0.0.1'),
51
- port: configParser.positiveInteger('DATABASE_PORT', 5432),
51
+ port: configParser.positiveInteger('DATABASE_PORT', 15433),
52
52
  user: configParser.string('DATABASE_USER', 'tstdl'),
53
53
  password: configParser.string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
54
54
  database: configParser.string('DATABASE_NAME', 'tstdl'),