@tstdl/base 0.93.153 → 0.93.154
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ai/genkit/multi-region.plugin.js +39 -11
- package/ai/genkit/tests/token-limit-fallback.test.d.ts +2 -0
- package/ai/genkit/tests/token-limit-fallback.test.js +138 -0
- package/ai/genkit/types.d.ts +6 -1
- package/ai/genkit/types.js +14 -1
- package/examples/document-management/main.js +1 -1
- package/examples/rate-limit/basic-usage.d.ts +1 -0
- package/examples/rate-limit/basic-usage.js +52 -0
- package/package.json +3 -3
- package/test1.js +1 -1
- package/test4.js +1 -1
- package/testing/integration-setup.js +1 -1
|
@@ -3,9 +3,27 @@ import { GenkitError, modelRef } from 'genkit';
|
|
|
3
3
|
import { genkitPlugin } from 'genkit/plugin';
|
|
4
4
|
import { shuffle } from '../../utils/array/index.js';
|
|
5
5
|
import { isInstanceOf, isNullOrUndefined } from '../../utils/type-guards.js';
|
|
6
|
+
import { millisecondsPerMinute, millisecondsPerSecond } from '../../utils/units.js';
|
|
6
7
|
const pluginKey = 'vertexai-multi-location';
|
|
7
8
|
const geminiModelReference = vertexAI.model('gemini-2.5-flash');
|
|
8
9
|
export function vertexAiMultiLocation(options) {
|
|
10
|
+
const locationConfigs = options.locations.map((location) => {
|
|
11
|
+
const circuitBreakerKey = `genkit:vertex-ai:location:${location}`;
|
|
12
|
+
const tokenLimitCircuitBreakerKey = `${circuitBreakerKey}:token-limit`;
|
|
13
|
+
return {
|
|
14
|
+
location,
|
|
15
|
+
circuitBreaker: options.circuitBreakerProvider.provide(circuitBreakerKey, {
|
|
16
|
+
threshold: 1,
|
|
17
|
+
resetTimeout: 30 * millisecondsPerSecond,
|
|
18
|
+
...options.circuitBreakerConfig,
|
|
19
|
+
}),
|
|
20
|
+
tokenLimitCircuitBreaker: options.circuitBreakerProvider.provide(tokenLimitCircuitBreakerKey, {
|
|
21
|
+
threshold: 1,
|
|
22
|
+
resetTimeout: 15 * millisecondsPerMinute,
|
|
23
|
+
...options.tokenLimitCircuitBreakerConfig,
|
|
24
|
+
}),
|
|
25
|
+
};
|
|
26
|
+
});
|
|
9
27
|
const createVirtualizedModelAction = async (ai, modelName) => {
|
|
10
28
|
const baseModelName = `vertexai/${modelName}`;
|
|
11
29
|
const target = modelName;
|
|
@@ -20,20 +38,22 @@ export function vertexAiMultiLocation(options) {
|
|
|
20
38
|
configSchema: baseModelAction.__action.inputSchema?.shape?.config,
|
|
21
39
|
label: `${baseModelAction.__action.description ?? baseModelAction.__action.name} (Multi-Location Routing)`,
|
|
22
40
|
}, async (request, streamingCallback) => {
|
|
23
|
-
const
|
|
41
|
+
const shuffledConfigs = shuffle([...locationConfigs]);
|
|
24
42
|
let lastError;
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
const circuitBreaker = options.circuitBreakerProvider.provide(circuitBreakerKey, {
|
|
28
|
-
threshold: 1, // Aggressive for 429
|
|
29
|
-
resetTimeout: options.circuitBreakerConfig?.resetTimeout ?? 30000,
|
|
30
|
-
...options.circuitBreakerConfig,
|
|
31
|
-
});
|
|
43
|
+
let isLargeRequest = false;
|
|
44
|
+
for (const { location, circuitBreaker, tokenLimitCircuitBreaker } of shuffledConfigs) {
|
|
32
45
|
const check = await circuitBreaker.check();
|
|
33
46
|
if (!check.allowed) {
|
|
34
47
|
options.logger.warn(`Location ${location} is currently unhealthy. Skipping...`);
|
|
35
48
|
continue;
|
|
36
49
|
}
|
|
50
|
+
if (isLargeRequest) {
|
|
51
|
+
const tokenCheck = await tokenLimitCircuitBreaker.check();
|
|
52
|
+
if (!tokenCheck.allowed) {
|
|
53
|
+
options.logger.warn(`Location ${location} is known to have a low token limit. Skipping for this large request...`);
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
37
57
|
try {
|
|
38
58
|
const result = await baseModelAction({
|
|
39
59
|
...request,
|
|
@@ -52,12 +72,20 @@ export function vertexAiMultiLocation(options) {
|
|
|
52
72
|
if (!isInstanceOf(error, GenkitError)) {
|
|
53
73
|
throw error;
|
|
54
74
|
}
|
|
55
|
-
const
|
|
75
|
+
const isTokenLimitError = (error.status == 'INVALID_ARGUMENT') && error.message.includes('input token count') && error.message.includes('model only supports up to');
|
|
76
|
+
const isRetryable = isTokenLimitError || ((error.status == 'RESOURCE_EXHAUSTED') || (error.status == 'UNAVAILABLE') || error.message.includes('quota'));
|
|
56
77
|
if (!isRetryable) {
|
|
57
78
|
throw error;
|
|
58
79
|
}
|
|
59
|
-
|
|
60
|
-
|
|
80
|
+
if (isTokenLimitError) {
|
|
81
|
+
options.logger.warn(`Location ${location} responded with token limit error. Trying next location...`);
|
|
82
|
+
isLargeRequest = true;
|
|
83
|
+
await tokenLimitCircuitBreaker.recordFailure();
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
options.logger.warn(`Location ${location} responded with ${error.status}. Tripping circuit breaker and trying next location...`);
|
|
87
|
+
await circuitBreaker.recordFailure();
|
|
88
|
+
}
|
|
61
89
|
}
|
|
62
90
|
}
|
|
63
91
|
throw lastError;
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/** biome-ignore-all lint/suspicious/useAwait: defineModel requires async */
|
|
2
|
+
import { genkit, GenkitError, z } from 'genkit';
|
|
3
|
+
import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
4
|
+
import { CircuitBreakerState } from '../../../circuit-breaker/index.js';
|
|
5
|
+
import { CircuitBreakerProvider } from '../../../circuit-breaker/provider.js';
|
|
6
|
+
import { Logger } from '../../../logger/logger.js';
|
|
7
|
+
import { setupIntegrationTest } from '../../../testing/index.js';
|
|
8
|
+
import { vertexAiMultiLocation } from '../multi-region.plugin.js';
|
|
9
|
+
vi.mock('#/utils/array/index.js', async (importOriginal) => {
|
|
10
|
+
const actual = await importOriginal();
|
|
11
|
+
return {
|
|
12
|
+
...actual,
|
|
13
|
+
shuffle: vi.fn((items) => [...items]),
|
|
14
|
+
};
|
|
15
|
+
});
|
|
16
|
+
vi.mock('@genkit-ai/google-genai', () => ({
|
|
17
|
+
// biome-ignore lint/style/useNamingConvention: given
|
|
18
|
+
vertexAI: {
|
|
19
|
+
model: vi.fn((name) => ({
|
|
20
|
+
name: `vertexai/${name}`,
|
|
21
|
+
info: { label: 'mock' },
|
|
22
|
+
configSchema: z.object({}),
|
|
23
|
+
})),
|
|
24
|
+
},
|
|
25
|
+
// biome-ignore lint/style/useNamingConvention: given
|
|
26
|
+
googleAI: vi.fn(),
|
|
27
|
+
}));
|
|
28
|
+
describe('Genkit vertexai-multi-location Token Limit Fallback Tests', () => {
|
|
29
|
+
let ai;
|
|
30
|
+
let cbProvider;
|
|
31
|
+
let logger;
|
|
32
|
+
beforeAll(async () => {
|
|
33
|
+
const { injector } = await setupIntegrationTest({ modules: { circuitBreaker: true } });
|
|
34
|
+
cbProvider = injector.resolve(CircuitBreakerProvider);
|
|
35
|
+
logger = injector.resolve(Logger, 'Test');
|
|
36
|
+
});
|
|
37
|
+
beforeEach(async () => {
|
|
38
|
+
vi.clearAllMocks();
|
|
39
|
+
ai = genkit({
|
|
40
|
+
plugins: [
|
|
41
|
+
vertexAiMultiLocation({
|
|
42
|
+
locations: ['region-1', 'region-2', 'region-3'],
|
|
43
|
+
circuitBreakerProvider: cbProvider,
|
|
44
|
+
logger,
|
|
45
|
+
circuitBreakerConfig: { resetTimeout: 1_000_000, threshold: 1 },
|
|
46
|
+
}),
|
|
47
|
+
],
|
|
48
|
+
});
|
|
49
|
+
const config = { threshold: 1, resetTimeout: 1_000_000 };
|
|
50
|
+
await cbProvider.provide('genkit:vertex-ai:location:region-1', config).recordSuccess();
|
|
51
|
+
await cbProvider.provide('genkit:vertex-ai:location:region-2', config).recordSuccess();
|
|
52
|
+
await cbProvider.provide('genkit:vertex-ai:location:region-3', config).recordSuccess();
|
|
53
|
+
await cbProvider.provide('genkit:vertex-ai:location:region-1:token-limit', config).recordSuccess();
|
|
54
|
+
await cbProvider.provide('genkit:vertex-ai:location:region-2:token-limit', config).recordSuccess();
|
|
55
|
+
await cbProvider.provide('genkit:vertex-ai:location:region-3:token-limit', config).recordSuccess();
|
|
56
|
+
});
|
|
57
|
+
it('should fallback on token limit error but NOT trip main circuit breaker', async () => {
|
|
58
|
+
const tokenLimitErrorMessage = 'Unable to submit request because the input token count is 135224 but model only supports up to 131072.';
|
|
59
|
+
let region1Called = false;
|
|
60
|
+
let region2Called = false;
|
|
61
|
+
ai.defineModel({
|
|
62
|
+
name: 'vertexai/gemini-2.5-flash',
|
|
63
|
+
}, async (request) => {
|
|
64
|
+
if (request.config?.location === 'region-1') {
|
|
65
|
+
region1Called = true;
|
|
66
|
+
throw new GenkitError({
|
|
67
|
+
status: 'INVALID_ARGUMENT',
|
|
68
|
+
message: tokenLimitErrorMessage,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
if (request.config?.location === 'region-2') {
|
|
72
|
+
region2Called = true;
|
|
73
|
+
return {
|
|
74
|
+
message: {
|
|
75
|
+
role: 'model',
|
|
76
|
+
content: [{ text: 'success from region-2' }],
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
throw new Error('Unexpected location');
|
|
81
|
+
});
|
|
82
|
+
const response = await ai.generate({
|
|
83
|
+
model: 'vertexai-multi-location/gemini-2.5-flash',
|
|
84
|
+
prompt: 'test',
|
|
85
|
+
});
|
|
86
|
+
expect(response.text).toBe('success from region-2');
|
|
87
|
+
expect(region1Called).toBe(true);
|
|
88
|
+
expect(region2Called).toBe(true);
|
|
89
|
+
// Verify main circuit breaker for region-1 is still CLOSED (allowed)
|
|
90
|
+
const cb = cbProvider.provide('genkit:vertex-ai:location:region-1', { threshold: 1, resetTimeout: 1000000 });
|
|
91
|
+
const status = await cb.check();
|
|
92
|
+
expect(status.state).toBe(CircuitBreakerState.Closed);
|
|
93
|
+
expect(status.allowed).toBe(true);
|
|
94
|
+
});
|
|
95
|
+
it('should skip locations with known token limits within the same request once it is known to be large', async () => {
|
|
96
|
+
const tokenLimitErrorMessage = 'Unable to submit request because the input token count is 135224 but model only supports up to 131072.';
|
|
97
|
+
// First, trip the token limit breaker for region-2
|
|
98
|
+
const tokenLimitCB2 = cbProvider.provide('genkit:vertex-ai:location:region-2:token-limit', { threshold: 1, resetTimeout: 1000000 });
|
|
99
|
+
await tokenLimitCB2.recordFailure();
|
|
100
|
+
let region1Called = false;
|
|
101
|
+
let region2Called = false;
|
|
102
|
+
let region3Called = false;
|
|
103
|
+
ai.defineModel({
|
|
104
|
+
name: 'vertexai/gemini-2.5-flash',
|
|
105
|
+
}, async (request) => {
|
|
106
|
+
if (request.config?.location === 'region-1') {
|
|
107
|
+
region1Called = true;
|
|
108
|
+
throw new GenkitError({
|
|
109
|
+
status: 'INVALID_ARGUMENT',
|
|
110
|
+
message: tokenLimitErrorMessage,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
if (request.config?.location === 'region-2') {
|
|
114
|
+
region2Called = true;
|
|
115
|
+
return { message: { role: 'model', content: [{ text: 'success from region-2' }] } };
|
|
116
|
+
}
|
|
117
|
+
if (request.config?.location === 'region-3') {
|
|
118
|
+
region3Called = true;
|
|
119
|
+
return {
|
|
120
|
+
message: {
|
|
121
|
+
role: 'model',
|
|
122
|
+
content: [{ text: 'success from region-3' }],
|
|
123
|
+
},
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
throw new Error('Unexpected location');
|
|
127
|
+
});
|
|
128
|
+
// Initial shuffle is mocked to [region-1, region-2, region-3]
|
|
129
|
+
const response = await ai.generate({
|
|
130
|
+
model: 'vertexai-multi-location/gemini-2.5-flash',
|
|
131
|
+
prompt: 'test',
|
|
132
|
+
});
|
|
133
|
+
expect(response.text).toBe('success from region-3');
|
|
134
|
+
expect(region1Called).toBe(true); // Fails, makes request "known to be large"
|
|
135
|
+
expect(region2Called).toBe(false); // Should be skipped because it is known to have a low limit
|
|
136
|
+
expect(region3Called).toBe(true); // Should be tried as it is not known to be limited
|
|
137
|
+
});
|
|
138
|
+
});
|
package/ai/genkit/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { CircuitBreakerConfig } from '../../circuit-breaker/circuit-breaker.js';
|
|
2
|
-
export
|
|
2
|
+
export declare abstract class VertexAiMultiLocationOptions {
|
|
3
3
|
/** The Google Cloud locations to use for routing. */
|
|
4
4
|
locations: string[];
|
|
5
5
|
/**
|
|
@@ -7,4 +7,9 @@ export interface VertexAiMultiLocationOptions {
|
|
|
7
7
|
* By default, a threshold of 1 is used for 429 errors.
|
|
8
8
|
*/
|
|
9
9
|
circuitBreakerConfig?: Partial<CircuitBreakerConfig>;
|
|
10
|
+
/**
|
|
11
|
+
* Optional token limit circuit breaker configuration.
|
|
12
|
+
* By default, a threshold of 1 and a reset timeout of 15 minutes is used.
|
|
13
|
+
*/
|
|
14
|
+
tokenLimitCircuitBreakerConfig?: Partial<CircuitBreakerConfig>;
|
|
10
15
|
}
|
package/ai/genkit/types.js
CHANGED
|
@@ -1 +1,14 @@
|
|
|
1
|
-
export {
|
|
1
|
+
export class VertexAiMultiLocationOptions {
|
|
2
|
+
/** The Google Cloud locations to use for routing. */
|
|
3
|
+
locations;
|
|
4
|
+
/**
|
|
5
|
+
* Optional circuit breaker configuration.
|
|
6
|
+
* By default, a threshold of 1 is used for 429 errors.
|
|
7
|
+
*/
|
|
8
|
+
circuitBreakerConfig;
|
|
9
|
+
/**
|
|
10
|
+
* Optional token limit circuit breaker configuration.
|
|
11
|
+
* By default, a threshold of 1 and a reset timeout of 15 minutes is used.
|
|
12
|
+
*/
|
|
13
|
+
tokenLimitCircuitBreakerConfig;
|
|
14
|
+
}
|
|
@@ -34,7 +34,7 @@ import { TstdlCategoryParents, TstdlDocumentCategoryLabels, TstdlDocumentPropert
|
|
|
34
34
|
const config = {
|
|
35
35
|
database: {
|
|
36
36
|
host: string('DATABASE_HOST', '127.0.0.1'),
|
|
37
|
-
port: positiveInteger('DATABASE_PORT',
|
|
37
|
+
port: positiveInteger('DATABASE_PORT', 15433),
|
|
38
38
|
user: string('DATABASE_USER', 'tstdl'),
|
|
39
39
|
pass: string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
|
|
40
40
|
database: string('DATABASE_NAME', 'tstdl'),
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { configurePostgresRateLimiter, migratePostgresRateLimiterSchema } from '../../rate-limit/postgres/index.js';
|
|
2
|
+
import { RateLimiterProvider } from '../../rate-limit/index.js';
|
|
3
|
+
import { Injector, runInInjectionContext } from '../../injector/index.js';
|
|
4
|
+
import { configureOrm } from '../../orm/server/index.js';
|
|
5
|
+
import { ConsoleLogTransport, LogFormatter, Logger, LogTransport, PrettyPrintLogFormatter } from '../../logger/index.js';
|
|
6
|
+
import * as configParser from '../../utils/config-parser.js';
|
|
7
|
+
import { timeout } from '../../utils/timing.js';
|
|
8
|
+
async function main() {
|
|
9
|
+
const injector = new Injector('ExampleInjector');
|
|
10
|
+
// 1. Configure Logging
|
|
11
|
+
injector.register(LogFormatter, { useToken: PrettyPrintLogFormatter });
|
|
12
|
+
injector.register(LogTransport, { useToken: ConsoleLogTransport });
|
|
13
|
+
const logger = injector.resolve(Logger);
|
|
14
|
+
// 2. Configure Database
|
|
15
|
+
configureOrm({
|
|
16
|
+
connection: {
|
|
17
|
+
host: configParser.string('DATABASE_HOST', '127.0.0.1'),
|
|
18
|
+
port: configParser.positiveInteger('DATABASE_PORT', 15433),
|
|
19
|
+
user: configParser.string('DATABASE_USER', 'tstdl'),
|
|
20
|
+
password: configParser.string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
|
|
21
|
+
database: configParser.string('DATABASE_NAME', 'tstdl'),
|
|
22
|
+
},
|
|
23
|
+
});
|
|
24
|
+
// 3. Configure Rate Limiter
|
|
25
|
+
configurePostgresRateLimiter();
|
|
26
|
+
// 4. Run Migrations (for setup)
|
|
27
|
+
logger.info('Running migrations...');
|
|
28
|
+
await runInInjectionContext(injector, migratePostgresRateLimiterSchema);
|
|
29
|
+
// 5. Get a Rate Limiter Instance
|
|
30
|
+
const provider = injector.resolve(RateLimiterProvider);
|
|
31
|
+
const limiter = provider.get('api-limiter', {
|
|
32
|
+
burstCapacity: 10,
|
|
33
|
+
refillInterval: 1000, // 10 tokens per second
|
|
34
|
+
});
|
|
35
|
+
const resource = 'user-123';
|
|
36
|
+
// 6. Simulate Traffic
|
|
37
|
+
logger.info('Starting simulation...');
|
|
38
|
+
for (let i = 0; i < 15; i++) {
|
|
39
|
+
const success = await limiter.tryAcquire(resource);
|
|
40
|
+
if (success) {
|
|
41
|
+
logger.info(`Request ${i + 1}: Allowed`);
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
logger.warn(`Request ${i + 1}: Throttled`);
|
|
45
|
+
}
|
|
46
|
+
// Small delay to simulate some processing/network time
|
|
47
|
+
await timeout(50);
|
|
48
|
+
}
|
|
49
|
+
// 7. Cleanup
|
|
50
|
+
await injector.dispose();
|
|
51
|
+
}
|
|
52
|
+
void main();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tstdl/base",
|
|
3
|
-
"version": "0.93.
|
|
3
|
+
"version": "0.93.154",
|
|
4
4
|
"author": "Patrick Hein",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -152,8 +152,8 @@
|
|
|
152
152
|
"type-fest": "^5.4"
|
|
153
153
|
},
|
|
154
154
|
"peerDependencies": {
|
|
155
|
-
"@aws-sdk/client-s3": "^3.
|
|
156
|
-
"@aws-sdk/s3-request-presigner": "^3.
|
|
155
|
+
"@aws-sdk/client-s3": "^3.1001",
|
|
156
|
+
"@aws-sdk/s3-request-presigner": "^3.1001",
|
|
157
157
|
"@genkit-ai/google-genai": "^1.29",
|
|
158
158
|
"@google-cloud/storage": "^7.19",
|
|
159
159
|
"@toon-format/toon": "^2.1.0",
|
package/test1.js
CHANGED
|
@@ -14,7 +14,7 @@ import { assert } from './utils/type-guards.js';
|
|
|
14
14
|
const config = {
|
|
15
15
|
database: {
|
|
16
16
|
host: configParser.string('DATABASE_HOST', '127.0.0.1'),
|
|
17
|
-
port: configParser.positiveInteger('DATABASE_PORT',
|
|
17
|
+
port: configParser.positiveInteger('DATABASE_PORT', 15433),
|
|
18
18
|
user: configParser.string('DATABASE_USER', 'tstdl'),
|
|
19
19
|
pass: configParser.string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
|
|
20
20
|
database: configParser.string('DATABASE_NAME', 'tstdl'),
|
package/test4.js
CHANGED
|
@@ -8,7 +8,7 @@ import { boolean, positiveInteger, string } from './utils/config-parser.js';
|
|
|
8
8
|
const config = {
|
|
9
9
|
database: {
|
|
10
10
|
host: string('DATABASE_HOST', '127.0.0.1'),
|
|
11
|
-
port: positiveInteger('DATABASE_PORT',
|
|
11
|
+
port: positiveInteger('DATABASE_PORT', 15433),
|
|
12
12
|
user: string('DATABASE_USER', 'tstdl'),
|
|
13
13
|
pass: string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
|
|
14
14
|
database: string('DATABASE_NAME', 'tstdl'),
|
|
@@ -48,7 +48,7 @@ export async function setupIntegrationTest(options = {}) {
|
|
|
48
48
|
// 2. Database Config
|
|
49
49
|
const dbConfig = {
|
|
50
50
|
host: configParser.string('DATABASE_HOST', '127.0.0.1'),
|
|
51
|
-
port: configParser.positiveInteger('DATABASE_PORT',
|
|
51
|
+
port: configParser.positiveInteger('DATABASE_PORT', 15433),
|
|
52
52
|
user: configParser.string('DATABASE_USER', 'tstdl'),
|
|
53
53
|
password: configParser.string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
|
|
54
54
|
database: configParser.string('DATABASE_NAME', 'tstdl'),
|