npm - @tstdl/base - Versions diffs - 0.93.153 → 0.93.154 - Mend

@tstdl/base 0.93.153 → 0.93.154

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/ai/genkit/multi-region.plugin.js +39 -11
package/ai/genkit/tests/token-limit-fallback.test.d.ts +2 -0
package/ai/genkit/tests/token-limit-fallback.test.js +138 -0
package/ai/genkit/types.d.ts +6 -1
package/ai/genkit/types.js +14 -1
package/examples/document-management/main.js +1 -1
package/examples/rate-limit/basic-usage.d.ts +1 -0
package/examples/rate-limit/basic-usage.js +52 -0
package/package.json +3 -3
package/test1.js +1 -1
package/test4.js +1 -1
package/testing/integration-setup.js +1 -1

package/ai/genkit/multi-region.plugin.js CHANGED Viewed

@@ -3,9 +3,27 @@ import { GenkitError, modelRef } from 'genkit';
 import { genkitPlugin } from 'genkit/plugin';
 import { shuffle } from '../../utils/array/index.js';
 import { isInstanceOf, isNullOrUndefined } from '../../utils/type-guards.js';
+import { millisecondsPerMinute, millisecondsPerSecond } from '../../utils/units.js';
 const pluginKey = 'vertexai-multi-location';
 const geminiModelReference = vertexAI.model('gemini-2.5-flash');
 export function vertexAiMultiLocation(options) {
+    const locationConfigs = options.locations.map((location) => {
+        const circuitBreakerKey = `genkit:vertex-ai:location:${location}`;
+        const tokenLimitCircuitBreakerKey = `${circuitBreakerKey}:token-limit`;
+        return {
+            location,
+            circuitBreaker: options.circuitBreakerProvider.provide(circuitBreakerKey, {
+                threshold: 1,
+                resetTimeout: 30 * millisecondsPerSecond,
+                ...options.circuitBreakerConfig,
+            }),
+            tokenLimitCircuitBreaker: options.circuitBreakerProvider.provide(tokenLimitCircuitBreakerKey, {
+                threshold: 1,
+                resetTimeout: 15 * millisecondsPerMinute,
+                ...options.tokenLimitCircuitBreakerConfig,
+            }),
+        };
+    });
     const createVirtualizedModelAction = async (ai, modelName) => {
         const baseModelName = `vertexai/${modelName}`;
         const target = modelName;
@@ -20,20 +38,22 @@ export function vertexAiMultiLocation(options) {
             configSchema: baseModelAction.__action.inputSchema?.shape?.config,
             label: `${baseModelAction.__action.description ?? baseModelAction.__action.name} (Multi-Location Routing)`,
         }, async (request, streamingCallback) => {
-            const shuffledLocations = shuffle([...options.locations]);
+            const shuffledConfigs = shuffle([...locationConfigs]);
             let lastError;
-            for (const location of shuffledLocations) {
-                const circuitBreakerKey = `genkit:vertex-ai:location:${location}`;
-                const circuitBreaker = options.circuitBreakerProvider.provide(circuitBreakerKey, {
-                    threshold: 1, // Aggressive for 429
-                    resetTimeout: options.circuitBreakerConfig?.resetTimeout ?? 30000,
-                    ...options.circuitBreakerConfig,
-                });
+            let isLargeRequest = false;
+            for (const { location, circuitBreaker, tokenLimitCircuitBreaker } of shuffledConfigs) {
                 const check = await circuitBreaker.check();
                 if (!check.allowed) {
                     options.logger.warn(`Location ${location} is currently unhealthy. Skipping...`);
                     continue;
                 }
+                if (isLargeRequest) {
+                    const tokenCheck = await tokenLimitCircuitBreaker.check();
+                    if (!tokenCheck.allowed) {
+                        options.logger.warn(`Location ${location} is known to have a low token limit. Skipping for this large request...`);
+                        continue;
+                    }
+                }
                 try {
                     const result = await baseModelAction({
                         ...request,
@@ -52,12 +72,20 @@ export function vertexAiMultiLocation(options) {
                     if (!isInstanceOf(error, GenkitError)) {
                         throw error;
                     }
-                    const isRetryable = ((error.status == 'RESOURCE_EXHAUSTED') || (error.status == 'UNAVAILABLE') || error.message.includes('quota'));
+                    const isTokenLimitError = (error.status == 'INVALID_ARGUMENT') && error.message.includes('input token count') && error.message.includes('model only supports up to');
+                    const isRetryable = isTokenLimitError || ((error.status == 'RESOURCE_EXHAUSTED') || (error.status == 'UNAVAILABLE') || error.message.includes('quota'));
                     if (!isRetryable) {
                         throw error;
                     }
-                    options.logger.warn(`Location ${location} responded with ${error.status}. Tripping circuit breaker and trying next location...`);
-                    await circuitBreaker.recordFailure();
+                    if (isTokenLimitError) {
+                        options.logger.warn(`Location ${location} responded with token limit error. Trying next location...`);
+                        isLargeRequest = true;
+                        await tokenLimitCircuitBreaker.recordFailure();
+                    }
+                    else {
+                        options.logger.warn(`Location ${location} responded with ${error.status}. Tripping circuit breaker and trying next location...`);
+                        await circuitBreaker.recordFailure();
+                    }
                 }
             }
             throw lastError;

package/ai/genkit/tests/token-limit-fallback.test.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ /** biome-ignore-all lint/suspicious/useAwait: defineModel requires async */
2	+ export {};

package/ai/genkit/tests/token-limit-fallback.test.js ADDED Viewed

@@ -0,0 +1,138 @@
+/** biome-ignore-all lint/suspicious/useAwait: defineModel requires async */
+import { genkit, GenkitError, z } from 'genkit';
+import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
+import { CircuitBreakerState } from '../../../circuit-breaker/index.js';
+import { CircuitBreakerProvider } from '../../../circuit-breaker/provider.js';
+import { Logger } from '../../../logger/logger.js';
+import { setupIntegrationTest } from '../../../testing/index.js';
+import { vertexAiMultiLocation } from '../multi-region.plugin.js';
+vi.mock('#/utils/array/index.js', async (importOriginal) => {
+    const actual = await importOriginal();
+    return {
+        ...actual,
+        shuffle: vi.fn((items) => [...items]),
+    };
+});
+vi.mock('@genkit-ai/google-genai', () => ({
+    // biome-ignore lint/style/useNamingConvention: given
+    vertexAI: {
+        model: vi.fn((name) => ({
+            name: `vertexai/${name}`,
+            info: { label: 'mock' },
+            configSchema: z.object({}),
+        })),
+    },
+    // biome-ignore lint/style/useNamingConvention: given
+    googleAI: vi.fn(),
+}));
+describe('Genkit vertexai-multi-location Token Limit Fallback Tests', () => {
+    let ai;
+    let cbProvider;
+    let logger;
+    beforeAll(async () => {
+        const { injector } = await setupIntegrationTest({ modules: { circuitBreaker: true } });
+        cbProvider = injector.resolve(CircuitBreakerProvider);
+        logger = injector.resolve(Logger, 'Test');
+    });
+    beforeEach(async () => {
+        vi.clearAllMocks();
+        ai = genkit({
+            plugins: [
+                vertexAiMultiLocation({
+                    locations: ['region-1', 'region-2', 'region-3'],
+                    circuitBreakerProvider: cbProvider,
+                    logger,
+                    circuitBreakerConfig: { resetTimeout: 1_000_000, threshold: 1 },
+                }),
+            ],
+        });
+        const config = { threshold: 1, resetTimeout: 1_000_000 };
+        await cbProvider.provide('genkit:vertex-ai:location:region-1', config).recordSuccess();
+        await cbProvider.provide('genkit:vertex-ai:location:region-2', config).recordSuccess();
+        await cbProvider.provide('genkit:vertex-ai:location:region-3', config).recordSuccess();
+        await cbProvider.provide('genkit:vertex-ai:location:region-1:token-limit', config).recordSuccess();
+        await cbProvider.provide('genkit:vertex-ai:location:region-2:token-limit', config).recordSuccess();
+        await cbProvider.provide('genkit:vertex-ai:location:region-3:token-limit', config).recordSuccess();
+    });
+    it('should fallback on token limit error but NOT trip main circuit breaker', async () => {
+        const tokenLimitErrorMessage = 'Unable to submit request because the input token count is 135224 but model only supports up to 131072.';
+        let region1Called = false;
+        let region2Called = false;
+        ai.defineModel({
+            name: 'vertexai/gemini-2.5-flash',
+        }, async (request) => {
+            if (request.config?.location === 'region-1') {
+                region1Called = true;
+                throw new GenkitError({
+                    status: 'INVALID_ARGUMENT',
+                    message: tokenLimitErrorMessage,
+                });
+            }
+            if (request.config?.location === 'region-2') {
+                region2Called = true;
+                return {
+                    message: {
+                        role: 'model',
+                        content: [{ text: 'success from region-2' }],
+                    },
+                };
+            }
+            throw new Error('Unexpected location');
+        });
+        const response = await ai.generate({
+            model: 'vertexai-multi-location/gemini-2.5-flash',
+            prompt: 'test',
+        });
+        expect(response.text).toBe('success from region-2');
+        expect(region1Called).toBe(true);
+        expect(region2Called).toBe(true);
+        // Verify main circuit breaker for region-1 is still CLOSED (allowed)
+        const cb = cbProvider.provide('genkit:vertex-ai:location:region-1', { threshold: 1, resetTimeout: 1000000 });
+        const status = await cb.check();
+        expect(status.state).toBe(CircuitBreakerState.Closed);
+        expect(status.allowed).toBe(true);
+    });
+    it('should skip locations with known token limits within the same request once it is known to be large', async () => {
+        const tokenLimitErrorMessage = 'Unable to submit request because the input token count is 135224 but model only supports up to 131072.';
+        // First, trip the token limit breaker for region-2
+        const tokenLimitCB2 = cbProvider.provide('genkit:vertex-ai:location:region-2:token-limit', { threshold: 1, resetTimeout: 1000000 });
+        await tokenLimitCB2.recordFailure();
+        let region1Called = false;
+        let region2Called = false;
+        let region3Called = false;
+        ai.defineModel({
+            name: 'vertexai/gemini-2.5-flash',
+        }, async (request) => {
+            if (request.config?.location === 'region-1') {
+                region1Called = true;
+                throw new GenkitError({
+                    status: 'INVALID_ARGUMENT',
+                    message: tokenLimitErrorMessage,
+                });
+            }
+            if (request.config?.location === 'region-2') {
+                region2Called = true;
+                return { message: { role: 'model', content: [{ text: 'success from region-2' }] } };
+            }
+            if (request.config?.location === 'region-3') {
+                region3Called = true;
+                return {
+                    message: {
+                        role: 'model',
+                        content: [{ text: 'success from region-3' }],
+                    },
+                };
+            }
+            throw new Error('Unexpected location');
+        });
+        // Initial shuffle is mocked to [region-1, region-2, region-3]
+        const response = await ai.generate({
+            model: 'vertexai-multi-location/gemini-2.5-flash',
+            prompt: 'test',
+        });
+        expect(response.text).toBe('success from region-3');
+        expect(region1Called).toBe(true); // Fails, makes request "known to be large"
+        expect(region2Called).toBe(false); // Should be skipped because it is known to have a low limit
+        expect(region3Called).toBe(true); // Should be tried as it is not known to be limited
+    });
+});

package/ai/genkit/types.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { CircuitBreakerConfig } from '../../circuit-breaker/circuit-breaker.js';
-export interface VertexAiMultiLocationOptions {
+export declare abstract class VertexAiMultiLocationOptions {
     /** The Google Cloud locations to use for routing. */
     locations: string[];
     /**
@@ -7,4 +7,9 @@ export interface VertexAiMultiLocationOptions {
      * By default, a threshold of 1 is used for 429 errors.
      */
     circuitBreakerConfig?: Partial<CircuitBreakerConfig>;
+    /**
+     * Optional token limit circuit breaker configuration.
+     * By default, a threshold of 1 and a reset timeout of 15 minutes is used.
+     */
+    tokenLimitCircuitBreakerConfig?: Partial<CircuitBreakerConfig>;
 }

package/ai/genkit/types.js CHANGED Viewed

@@ -1 +1,14 @@
-export {};
+export class VertexAiMultiLocationOptions {
+    /** The Google Cloud locations to use for routing. */
+    locations;
+    /**
+     * Optional circuit breaker configuration.
+     * By default, a threshold of 1 is used for 429 errors.
+     */
+    circuitBreakerConfig;
+    /**
+     * Optional token limit circuit breaker configuration.
+     * By default, a threshold of 1 and a reset timeout of 15 minutes is used.
+     */
+    tokenLimitCircuitBreakerConfig;
+}

package/examples/document-management/main.js CHANGED Viewed

@@ -34,7 +34,7 @@ import { TstdlCategoryParents, TstdlDocumentCategoryLabels, TstdlDocumentPropert
 const config = {
     database: {
         host: string('DATABASE_HOST', '127.0.0.1'),
-        port: positiveInteger('DATABASE_PORT', 5432),
+        port: positiveInteger('DATABASE_PORT', 15433),
         user: string('DATABASE_USER', 'tstdl'),
         pass: string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
         database: string('DATABASE_NAME', 'tstdl'),

package/examples/rate-limit/basic-usage.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/examples/rate-limit/basic-usage.js ADDED Viewed

@@ -0,0 +1,52 @@
+import { configurePostgresRateLimiter, migratePostgresRateLimiterSchema } from '../../rate-limit/postgres/index.js';
+import { RateLimiterProvider } from '../../rate-limit/index.js';
+import { Injector, runInInjectionContext } from '../../injector/index.js';
+import { configureOrm } from '../../orm/server/index.js';
+import { ConsoleLogTransport, LogFormatter, Logger, LogTransport, PrettyPrintLogFormatter } from '../../logger/index.js';
+import * as configParser from '../../utils/config-parser.js';
+import { timeout } from '../../utils/timing.js';
+async function main() {
+    const injector = new Injector('ExampleInjector');
+    // 1. Configure Logging
+    injector.register(LogFormatter, { useToken: PrettyPrintLogFormatter });
+    injector.register(LogTransport, { useToken: ConsoleLogTransport });
+    const logger = injector.resolve(Logger);
+    // 2. Configure Database
+    configureOrm({
+        connection: {
+            host: configParser.string('DATABASE_HOST', '127.0.0.1'),
+            port: configParser.positiveInteger('DATABASE_PORT', 15433),
+            user: configParser.string('DATABASE_USER', 'tstdl'),
+            password: configParser.string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
+            database: configParser.string('DATABASE_NAME', 'tstdl'),
+        },
+    });
+    // 3. Configure Rate Limiter
+    configurePostgresRateLimiter();
+    // 4. Run Migrations (for setup)
+    logger.info('Running migrations...');
+    await runInInjectionContext(injector, migratePostgresRateLimiterSchema);
+    // 5. Get a Rate Limiter Instance
+    const provider = injector.resolve(RateLimiterProvider);
+    const limiter = provider.get('api-limiter', {
+        burstCapacity: 10,
+        refillInterval: 1000, // 10 tokens per second
+    });
+    const resource = 'user-123';
+    // 6. Simulate Traffic
+    logger.info('Starting simulation...');
+    for (let i = 0; i < 15; i++) {
+        const success = await limiter.tryAcquire(resource);
+        if (success) {
+            logger.info(`Request ${i + 1}: Allowed`);
+        }
+        else {
+            logger.warn(`Request ${i + 1}: Throttled`);
+        }
+        // Small delay to simulate some processing/network time
+        await timeout(50);
+    }
+    // 7. Cleanup
+    await injector.dispose();
+}
+void main();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tstdl/base",
-  "version": "0.93.153",
+  "version": "0.93.154",
   "author": "Patrick Hein",
   "publishConfig": {
     "access": "public"
@@ -152,8 +152,8 @@
     "type-fest": "^5.4"
   },
   "peerDependencies": {
-    "@aws-sdk/client-s3": "^3.1000",
-    "@aws-sdk/s3-request-presigner": "^3.1000",
+    "@aws-sdk/client-s3": "^3.1001",
+    "@aws-sdk/s3-request-presigner": "^3.1001",
     "@genkit-ai/google-genai": "^1.29",
     "@google-cloud/storage": "^7.19",
     "@toon-format/toon": "^2.1.0",

package/test1.js CHANGED Viewed

@@ -14,7 +14,7 @@ import { assert } from './utils/type-guards.js';
 const config = {
     database: {
         host: configParser.string('DATABASE_HOST', '127.0.0.1'),
-        port: configParser.positiveInteger('DATABASE_PORT', 5432),
+        port: configParser.positiveInteger('DATABASE_PORT', 15433),
         user: configParser.string('DATABASE_USER', 'tstdl'),
         pass: configParser.string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
         database: configParser.string('DATABASE_NAME', 'tstdl'),

package/test4.js CHANGED Viewed

@@ -8,7 +8,7 @@ import { boolean, positiveInteger, string } from './utils/config-parser.js';
 const config = {
     database: {
         host: string('DATABASE_HOST', '127.0.0.1'),
-        port: positiveInteger('DATABASE_PORT', 5432),
+        port: positiveInteger('DATABASE_PORT', 15433),
         user: string('DATABASE_USER', 'tstdl'),
         pass: string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
         database: string('DATABASE_NAME', 'tstdl'),

package/testing/integration-setup.js CHANGED Viewed

@@ -48,7 +48,7 @@ export async function setupIntegrationTest(options = {}) {
     // 2. Database Config
     const dbConfig = {
         host: configParser.string('DATABASE_HOST', '127.0.0.1'),
-        port: configParser.positiveInteger('DATABASE_PORT', 5432),
+        port: configParser.positiveInteger('DATABASE_PORT', 15433),
         user: configParser.string('DATABASE_USER', 'tstdl'),
         password: configParser.string('DATABASE_PASS', 'wf7rq6glrk5jykne'),
         database: configParser.string('DATABASE_NAME', 'tstdl'),