@sw4rm/js-sdk 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +178 -1
- package/dist/cjs/index.cjs +2819 -292
- package/dist/esm/index.js +2736 -284
- package/dist/types/agentConfig.d.ts +245 -0
- package/dist/types/audit.d.ts +214 -0
- package/dist/types/clients/handoff.d.ts +44 -1
- package/dist/types/clients/negotiationRoom.d.ts +81 -5
- package/dist/types/clients/negotiationRoomStore.d.ts +155 -0
- package/dist/types/clients/workflow.d.ts +15 -0
- package/dist/types/constants/index.d.ts +100 -0
- package/dist/types/index.d.ts +14 -5
- package/dist/types/internal/baseClient.d.ts +6 -0
- package/dist/types/internal/envelope.d.ts +16 -0
- package/dist/types/internal/errorMapping.d.ts +116 -0
- package/dist/types/internal/worktreeState.d.ts +60 -0
- package/dist/types/llm/anthropic.d.ts +83 -0
- package/dist/types/llm/client.d.ts +107 -0
- package/dist/types/llm/factory.d.ts +69 -0
- package/dist/types/llm/groq.d.ts +79 -0
- package/dist/types/llm/index.d.ts +45 -0
- package/dist/types/llm/mock.d.ts +89 -0
- package/dist/types/llm/rateLimiter.d.ts +101 -0
- package/dist/types/persistentActivityBuffer.d.ts +94 -0
- package/dist/types/runtime/cancellation.d.ts +41 -0
- package/dist/types/runtime/delegation.d.ts +20 -0
- package/dist/types/runtime/gateway.d.ts +80 -0
- package/package.json +4 -2
- package/protos/activity.proto +24 -0
- package/protos/common.proto +141 -0
- package/protos/connector.proto +29 -0
- package/protos/handoff.proto +105 -0
- package/protos/hitl.proto +23 -0
- package/protos/logging.proto +20 -0
- package/protos/negotiation.proto +57 -0
- package/protos/negotiation_room.proto +220 -0
- package/protos/policy.proto +55 -0
- package/protos/reasoning.proto +41 -0
- package/protos/registry.proto +47 -0
- package/protos/router.proto +16 -0
- package/protos/scheduler.proto +52 -0
- package/protos/scheduler_policy.proto +36 -0
- package/protos/tool.proto +47 -0
- package/protos/workflow.proto +116 -0
- package/protos/worktree.proto +33 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Abstract LLM client interface.
|
|
3
|
+
*
|
|
4
|
+
* This module provides the base interface for LLM clients used by SW4RM agents.
|
|
5
|
+
* Implementations can use Anthropic API directly, Groq API, or mock clients
|
|
6
|
+
* for testing.
|
|
7
|
+
*
|
|
8
|
+
* @module llm/client
|
|
9
|
+
*/
|
|
10
|
+
/** Response from an LLM API call. */
|
|
11
|
+
export interface LlmResponse {
|
|
12
|
+
/** The generated text content. */
|
|
13
|
+
content: string;
|
|
14
|
+
/** The model that generated the response. */
|
|
15
|
+
model: string;
|
|
16
|
+
/** Token usage statistics (if available). */
|
|
17
|
+
usage?: {
|
|
18
|
+
input_tokens: number;
|
|
19
|
+
output_tokens: number;
|
|
20
|
+
};
|
|
21
|
+
/** Additional response metadata. */
|
|
22
|
+
metadata?: Record<string, unknown>;
|
|
23
|
+
}
|
|
24
|
+
/** Options for LLM query calls. */
|
|
25
|
+
export interface QueryOptions {
|
|
26
|
+
/** Optional system prompt for context. */
|
|
27
|
+
systemPrompt?: string;
|
|
28
|
+
/** Maximum tokens to generate (default 4096). */
|
|
29
|
+
maxTokens?: number;
|
|
30
|
+
/** Sampling temperature 0.0-2.0 (default 1.0). */
|
|
31
|
+
temperature?: number;
|
|
32
|
+
/** Override the default model. */
|
|
33
|
+
model?: string;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Abstract interface for LLM clients.
|
|
37
|
+
*
|
|
38
|
+
* All LLM client implementations must implement this interface.
|
|
39
|
+
* This allows SW4RM agents to be provider-agnostic.
|
|
40
|
+
*
|
|
41
|
+
* Implementations:
|
|
42
|
+
* - GroqClient: Uses Groq API directly (API key)
|
|
43
|
+
* - AnthropicClient: Uses Anthropic API directly (API key)
|
|
44
|
+
* - MockLlmClient: For testing without API calls
|
|
45
|
+
*/
|
|
46
|
+
export interface LlmClient {
|
|
47
|
+
/**
|
|
48
|
+
* Send a query to the LLM and get a complete response.
|
|
49
|
+
*
|
|
50
|
+
* @param prompt - The user prompt/query to send.
|
|
51
|
+
* @param opts - Optional query configuration.
|
|
52
|
+
* @returns LlmResponse with the generated content and metadata.
|
|
53
|
+
* @throws LlmError on API errors or failures.
|
|
54
|
+
* @throws LlmAuthenticationError on authentication failures.
|
|
55
|
+
* @throws LlmRateLimitError when rate limits are exceeded.
|
|
56
|
+
* @throws LlmTimeoutError when the request times out.
|
|
57
|
+
*/
|
|
58
|
+
query(prompt: string, opts?: QueryOptions): Promise<LlmResponse>;
|
|
59
|
+
/**
|
|
60
|
+
* Stream a query response chunk by chunk.
|
|
61
|
+
*
|
|
62
|
+
* @param prompt - The user prompt/query to send.
|
|
63
|
+
* @param opts - Optional query configuration.
|
|
64
|
+
* @yields Text chunks as they arrive from the API.
|
|
65
|
+
* @throws LlmError on API errors or failures.
|
|
66
|
+
*/
|
|
67
|
+
streamQuery(prompt: string, opts?: QueryOptions): AsyncGenerator<string>;
|
|
68
|
+
}
|
|
69
|
+
/** Base exception for all LLM client errors. */
|
|
70
|
+
export declare class LlmError extends Error {
|
|
71
|
+
constructor(message: string);
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Raised when API authentication fails.
|
|
75
|
+
*
|
|
76
|
+
* Common causes:
|
|
77
|
+
* - Invalid API key
|
|
78
|
+
* - Expired credentials
|
|
79
|
+
* - Missing credentials
|
|
80
|
+
*/
|
|
81
|
+
export declare class LlmAuthenticationError extends LlmError {
|
|
82
|
+
constructor(message: string);
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Raised when API rate limits are exceeded.
|
|
86
|
+
*
|
|
87
|
+
* The caller should implement exponential backoff when handling this.
|
|
88
|
+
*/
|
|
89
|
+
export declare class LlmRateLimitError extends LlmError {
|
|
90
|
+
constructor(message: string);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Raised when an API request times out.
|
|
94
|
+
*
|
|
95
|
+
* Consider increasing the timeout or simplifying the prompt.
|
|
96
|
+
*/
|
|
97
|
+
export declare class LlmTimeoutError extends LlmError {
|
|
98
|
+
constructor(message: string);
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Raised when the prompt exceeds the model's context length.
|
|
102
|
+
*
|
|
103
|
+
* Consider truncating the prompt or using a model with larger context.
|
|
104
|
+
*/
|
|
105
|
+
export declare class LlmContextLengthError extends LlmError {
|
|
106
|
+
constructor(message: string);
|
|
107
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory for creating LLM clients.
|
|
3
|
+
*
|
|
4
|
+
* Provides a simple way to create the appropriate LLM client based on
|
|
5
|
+
* environment configuration or explicit parameters.
|
|
6
|
+
*
|
|
7
|
+
* @module llm/factory
|
|
8
|
+
*/
|
|
9
|
+
import type { LlmClient } from './client.js';
|
|
10
|
+
/** Options for the LLM client factory. */
|
|
11
|
+
export interface CreateLlmClientOptions {
|
|
12
|
+
/**
|
|
13
|
+
* The type of client to create.
|
|
14
|
+
*
|
|
15
|
+
* Valid types: "groq", "anthropic", "mock".
|
|
16
|
+
*
|
|
17
|
+
* If not specified, reads from the `LLM_CLIENT_TYPE` environment variable.
|
|
18
|
+
* Defaults to "mock" if neither is set.
|
|
19
|
+
*/
|
|
20
|
+
clientType?: string;
|
|
21
|
+
/**
|
|
22
|
+
* Default model to use for queries.
|
|
23
|
+
*
|
|
24
|
+
* If not specified, reads from the `LLM_DEFAULT_MODEL` environment variable.
|
|
25
|
+
* Falls back to each provider's default model.
|
|
26
|
+
*/
|
|
27
|
+
model?: string;
|
|
28
|
+
/** API key override (passed to the provider constructor). */
|
|
29
|
+
apiKey?: string;
|
|
30
|
+
/** Request timeout in milliseconds (passed to the provider constructor). */
|
|
31
|
+
timeoutMs?: number;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Create an LLM client based on environment or explicit type.
|
|
35
|
+
*
|
|
36
|
+
* Resolution order for client type:
|
|
37
|
+
* 1. Explicit `clientType` option
|
|
38
|
+
* 2. `LLM_CLIENT_TYPE` environment variable
|
|
39
|
+
* 3. Default: "mock"
|
|
40
|
+
*
|
|
41
|
+
* Environment variables:
|
|
42
|
+
* - `LLM_CLIENT_TYPE`: "groq", "anthropic", or "mock" (default: "mock")
|
|
43
|
+
* - `LLM_DEFAULT_MODEL`: Default model override
|
|
44
|
+
*
|
|
45
|
+
* @param opts - Optional factory configuration.
|
|
46
|
+
* @returns An LlmClient instance of the requested type.
|
|
47
|
+
* @throws Error if an invalid client type is specified.
|
|
48
|
+
*
|
|
49
|
+
* @example
|
|
50
|
+
* ```typescript
|
|
51
|
+
* import { createLlmClient } from '@sw4rm/js-sdk';
|
|
52
|
+
*
|
|
53
|
+
* // Auto-detect from env (default: mock)
|
|
54
|
+
* const client = createLlmClient();
|
|
55
|
+
*
|
|
56
|
+
* // Explicit Groq client
|
|
57
|
+
* const groq = createLlmClient({ clientType: 'groq' });
|
|
58
|
+
*
|
|
59
|
+
* // Mock client for testing
|
|
60
|
+
* const mock = createLlmClient({ clientType: 'mock' });
|
|
61
|
+
*
|
|
62
|
+
* // Anthropic client with model override
|
|
63
|
+
* const claude = createLlmClient({
|
|
64
|
+
* clientType: 'anthropic',
|
|
65
|
+
* model: 'claude-sonnet-4-20250514',
|
|
66
|
+
* });
|
|
67
|
+
* ```
|
|
68
|
+
*/
|
|
69
|
+
export declare function createLlmClient(opts?: CreateLlmClientOptions): LlmClient;
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Groq LLM client for SW4RM agents.
|
|
3
|
+
*
|
|
4
|
+
* Thin wrapper around the Groq REST API using native `fetch()`.
|
|
5
|
+
* No tool-calling or diagnostic mode -- just prompt in, text out,
|
|
6
|
+
* with rate limiting.
|
|
7
|
+
*
|
|
8
|
+
* Credentials (in order):
|
|
9
|
+
* 1. `apiKey` constructor parameter
|
|
10
|
+
* 2. `GROQ_API_KEY` environment variable
|
|
11
|
+
* 3. `~/.groq` file (plain text, one line)
|
|
12
|
+
*
|
|
13
|
+
* @module llm/groq
|
|
14
|
+
*/
|
|
15
|
+
import type { LlmClient, LlmResponse, QueryOptions } from './client.js';
|
|
16
|
+
/** Options for constructing a GroqClient. */
|
|
17
|
+
export interface GroqClientOptions {
|
|
18
|
+
/** API key override (takes precedence over env / dotfile). */
|
|
19
|
+
apiKey?: string;
|
|
20
|
+
/** Default model to use for queries. */
|
|
21
|
+
defaultModel?: string;
|
|
22
|
+
/** Request timeout in milliseconds. */
|
|
23
|
+
timeoutMs?: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* LLM client for the Groq API (OpenAI-compatible chat completions).
|
|
27
|
+
*
|
|
28
|
+
* Credentials are resolved from (in order):
|
|
29
|
+
* 1. `apiKey` constructor option
|
|
30
|
+
* 2. `GROQ_API_KEY` environment variable
|
|
31
|
+
* 3. `~/.groq` file (plain text, one line)
|
|
32
|
+
*
|
|
33
|
+
* Environment variables:
|
|
34
|
+
* - `GROQ_API_KEY`: API key override
|
|
35
|
+
* - `GROQ_DEFAULT_MODEL`: Default model override
|
|
36
|
+
*/
|
|
37
|
+
export declare class GroqClient implements LlmClient {
|
|
38
|
+
/** The resolved API key. */
|
|
39
|
+
readonly apiKey: string;
|
|
40
|
+
/** The default model used when none is specified per-call. */
|
|
41
|
+
readonly defaultModel: string;
|
|
42
|
+
private readonly timeoutMs;
|
|
43
|
+
private readonly rateLimiter;
|
|
44
|
+
constructor(opts?: GroqClientOptions);
|
|
45
|
+
/**
|
|
46
|
+
* Load API key from ~/.groq file.
|
|
47
|
+
*
|
|
48
|
+
* @returns The key string, or null if the file does not exist.
|
|
49
|
+
*/
|
|
50
|
+
static loadKeyFile(): string | null;
|
|
51
|
+
private estimateTokens;
|
|
52
|
+
private buildMessages;
|
|
53
|
+
/**
|
|
54
|
+
* Map an HTTP error response to the appropriate LlmError subclass.
|
|
55
|
+
*
|
|
56
|
+
* @param status - HTTP status code.
|
|
57
|
+
* @param body - Parsed response body (if available).
|
|
58
|
+
* @param message - Raw error message.
|
|
59
|
+
*/
|
|
60
|
+
private mapError;
|
|
61
|
+
/**
|
|
62
|
+
* Send a query to Groq and get a complete response.
|
|
63
|
+
*
|
|
64
|
+
* @param prompt - The user prompt/query.
|
|
65
|
+
* @param opts - Optional query configuration.
|
|
66
|
+
* @returns LlmResponse with generated content and metadata.
|
|
67
|
+
*/
|
|
68
|
+
query(prompt: string, opts?: QueryOptions): Promise<LlmResponse>;
|
|
69
|
+
/**
|
|
70
|
+
* Stream a query response chunk by chunk.
|
|
71
|
+
*
|
|
72
|
+
* Uses Server-Sent Events (SSE) streaming from the Groq API.
|
|
73
|
+
*
|
|
74
|
+
* @param prompt - The user prompt/query.
|
|
75
|
+
* @param opts - Optional query configuration.
|
|
76
|
+
* @yields Text chunks as they arrive from the API.
|
|
77
|
+
*/
|
|
78
|
+
streamQuery(prompt: string, opts?: QueryOptions): AsyncGenerator<string>;
|
|
79
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SW4RM LLM -- LLM client abstraction for SW4RM agents.
|
|
3
|
+
*
|
|
4
|
+
* This module provides a unified interface for LLM interactions, supporting:
|
|
5
|
+
* - Groq API (API key from ~/.groq or GROQ_API_KEY)
|
|
6
|
+
* - Anthropic API (API key from ~/.anthropic or ANTHROPIC_API_KEY)
|
|
7
|
+
* - Mock client for testing
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { createLlmClient } from '@sw4rm/js-sdk';
|
|
12
|
+
*
|
|
13
|
+
* // Create client via environment (LLM_CLIENT_TYPE=groq|anthropic|mock)
|
|
14
|
+
* const client = createLlmClient();
|
|
15
|
+
*
|
|
16
|
+
* // Or explicitly
|
|
17
|
+
* const client = createLlmClient({ clientType: 'groq' });
|
|
18
|
+
*
|
|
19
|
+
* // Query the LLM
|
|
20
|
+
* const response = await client.query(
|
|
21
|
+
* 'Analyze this task and suggest next steps',
|
|
22
|
+
* { systemPrompt: 'You are a helpful task analysis agent.' },
|
|
23
|
+
* );
|
|
24
|
+
* console.log(response.content);
|
|
25
|
+
*
|
|
26
|
+
* // Stream responses
|
|
27
|
+
* for await (const chunk of client.streamQuery('Generate a report')) {
|
|
28
|
+
* process.stdout.write(chunk);
|
|
29
|
+
* }
|
|
30
|
+
* ```
|
|
31
|
+
*
|
|
32
|
+
* @module llm
|
|
33
|
+
*/
|
|
34
|
+
export type { LlmClient, LlmResponse, QueryOptions } from './client.js';
|
|
35
|
+
export { LlmError, LlmAuthenticationError, LlmRateLimitError, LlmTimeoutError, LlmContextLengthError, } from './client.js';
|
|
36
|
+
export type { RateLimiterConfig } from './rateLimiter.js';
|
|
37
|
+
export { buildRateLimiterConfig, TokenBucket, getGlobalRateLimiter, resetGlobalRateLimiter, } from './rateLimiter.js';
|
|
38
|
+
export { MockLlmClient } from './mock.js';
|
|
39
|
+
export type { MockCallRecord } from './mock.js';
|
|
40
|
+
export { GroqClient } from './groq.js';
|
|
41
|
+
export type { GroqClientOptions } from './groq.js';
|
|
42
|
+
export { AnthropicClient } from './anthropic.js';
|
|
43
|
+
export type { AnthropicClientOptions } from './anthropic.js';
|
|
44
|
+
export { createLlmClient } from './factory.js';
|
|
45
|
+
export type { CreateLlmClientOptions } from './factory.js';
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock LLM client for testing.
|
|
3
|
+
*
|
|
4
|
+
* Provides deterministic responses without making actual API calls.
|
|
5
|
+
* Cycles through a canned response array and records call history.
|
|
6
|
+
*
|
|
7
|
+
* @module llm/mock
|
|
8
|
+
*/
|
|
9
|
+
import type { LlmClient, LlmResponse, QueryOptions } from './client.js';
|
|
10
|
+
/** A recorded call entry from the mock client. */
|
|
11
|
+
export interface MockCallRecord {
|
|
12
|
+
prompt: string;
|
|
13
|
+
systemPrompt?: string;
|
|
14
|
+
maxTokens: number;
|
|
15
|
+
temperature: number;
|
|
16
|
+
model: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Mock LLM client for testing without API calls.
|
|
20
|
+
*
|
|
21
|
+
* Returns configurable responses for testing purposes. If no responses are
|
|
22
|
+
* provided, returns a default echo of the prompt.
|
|
23
|
+
*
|
|
24
|
+
* @example
|
|
25
|
+
* ```typescript
|
|
26
|
+
* import { MockLlmClient } from '@sw4rm/js-sdk';
|
|
27
|
+
*
|
|
28
|
+
* // Simple usage -- returns echo of prompt
|
|
29
|
+
* const client = new MockLlmClient();
|
|
30
|
+
* const response = await client.query('Hello');
|
|
31
|
+
* console.log(response.content); // "Mock response to: Hello"
|
|
32
|
+
*
|
|
33
|
+
* // Custom responses (cycles)
|
|
34
|
+
* const client2 = new MockLlmClient({
|
|
35
|
+
* responses: ['First response', 'Second response'],
|
|
36
|
+
* });
|
|
37
|
+
*
|
|
38
|
+
* // Custom response generator
|
|
39
|
+
* const client3 = new MockLlmClient({
|
|
40
|
+
* responseGenerator: (prompt) => `Processed: ${prompt}`,
|
|
41
|
+
* });
|
|
42
|
+
* ```
|
|
43
|
+
*/
|
|
44
|
+
export declare class MockLlmClient implements LlmClient {
|
|
45
|
+
/** The model name returned in responses. */
|
|
46
|
+
readonly defaultModel: string;
|
|
47
|
+
private readonly responses;
|
|
48
|
+
private responseIndex;
|
|
49
|
+
private readonly responseGenerator;
|
|
50
|
+
private _callCount;
|
|
51
|
+
private readonly _callHistory;
|
|
52
|
+
constructor(opts?: {
|
|
53
|
+
/** Model name to return in responses. */
|
|
54
|
+
defaultModel?: string;
|
|
55
|
+
/** List of responses to return in order (cycles when exhausted). */
|
|
56
|
+
responses?: string[];
|
|
57
|
+
/** Function to generate responses from prompts. */
|
|
58
|
+
responseGenerator?: (prompt: string) => string;
|
|
59
|
+
});
|
|
60
|
+
/** Number of queries made to this client. */
|
|
61
|
+
get callCount(): number;
|
|
62
|
+
/** History of all calls made to this client. */
|
|
63
|
+
get callHistory(): readonly MockCallRecord[];
|
|
64
|
+
/** Reset call count, history, and response index. */
|
|
65
|
+
reset(): void;
|
|
66
|
+
/**
|
|
67
|
+
* Return a mock response.
|
|
68
|
+
*
|
|
69
|
+
* Priority for determining response content:
|
|
70
|
+
* 1. `responseGenerator` function (if provided)
|
|
71
|
+
* 2. `responses` array (cycles through entries)
|
|
72
|
+
* 3. Default echo: "Mock response to: <prompt>"
|
|
73
|
+
*
|
|
74
|
+
* @param prompt - The prompt (recorded in history).
|
|
75
|
+
* @param opts - Optional query configuration (recorded in history).
|
|
76
|
+
* @returns LlmResponse with mock content.
|
|
77
|
+
*/
|
|
78
|
+
query(prompt: string, opts?: QueryOptions): Promise<LlmResponse>;
|
|
79
|
+
/**
|
|
80
|
+
* Stream a mock response.
|
|
81
|
+
*
|
|
82
|
+
* Yields the response word by word to simulate streaming.
|
|
83
|
+
*
|
|
84
|
+
* @param prompt - The prompt (recorded in history).
|
|
85
|
+
* @param opts - Optional query configuration.
|
|
86
|
+
* @yields Words from the mock response.
|
|
87
|
+
*/
|
|
88
|
+
streamQuery(prompt: string, opts?: QueryOptions): AsyncGenerator<string>;
|
|
89
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token bucket rate limiter for LLM API requests.
|
|
3
|
+
*
|
|
4
|
+
* Provides proactive rate limiting to avoid 429 errors. All LLM clients
|
|
5
|
+
* in the process share a global singleton bucket.
|
|
6
|
+
*
|
|
7
|
+
* Configuration via environment variables:
|
|
8
|
+
* LLM_RATE_LIMIT_ENABLED: "1" (default) or "0"
|
|
9
|
+
* LLM_RATE_LIMIT_TOKENS_PER_MIN: 250000 (default, matches Groq free tier)
|
|
10
|
+
* LLM_RATE_LIMIT_ADAPTIVE: "1" (default) -- reduce budget on 429, recover on success
|
|
11
|
+
* LLM_RATE_LIMIT_REDUCTION_FACTOR: 0.7
|
|
12
|
+
* LLM_RATE_LIMIT_RECOVERY_FACTOR: 1.1
|
|
13
|
+
* LLM_RATE_LIMIT_COOLDOWN_SECONDS: 30
|
|
14
|
+
* LLM_RATE_LIMIT_RECOVERY_SUCCESS_THRESHOLD: 20
|
|
15
|
+
*
|
|
16
|
+
* @module llm/rateLimiter
|
|
17
|
+
*/
|
|
18
|
+
/** Configuration for the token bucket rate limiter. */
|
|
19
|
+
export interface RateLimiterConfig {
|
|
20
|
+
/** Token budget per minute. */
|
|
21
|
+
tokensPerMinute: number;
|
|
22
|
+
/** Burst allowance multiplier (1.0 = no burst above budget). */
|
|
23
|
+
burstAllowance: number;
|
|
24
|
+
/** Minimum tokens assumed per request. */
|
|
25
|
+
minTokensPerRequest: number;
|
|
26
|
+
/** Maximum seconds to wait before throwing a timeout. */
|
|
27
|
+
maxWaitSeconds: number;
|
|
28
|
+
/** Whether the rate limiter is enabled at all. */
|
|
29
|
+
enabled: boolean;
|
|
30
|
+
/** Whether adaptive throttling is enabled. */
|
|
31
|
+
adaptiveEnabled: boolean;
|
|
32
|
+
/** Factor to multiply budget by after a 429 event. */
|
|
33
|
+
reductionFactor: number;
|
|
34
|
+
/** Factor to multiply budget by during recovery. */
|
|
35
|
+
recoveryFactor: number;
|
|
36
|
+
/** Seconds to wait after last 429 before allowing recovery. */
|
|
37
|
+
cooldownSeconds: number;
|
|
38
|
+
/** Number of consecutive successes required before recovery. */
|
|
39
|
+
successesForRecovery: number;
|
|
40
|
+
}
|
|
41
|
+
/** Build a RateLimiterConfig from environment variables and optional overrides. */
|
|
42
|
+
export declare function buildRateLimiterConfig(overrides?: Partial<RateLimiterConfig>): RateLimiterConfig;
|
|
43
|
+
/**
|
|
44
|
+
* Token bucket rate limiter with adaptive throttling.
|
|
45
|
+
*
|
|
46
|
+
* Refills tokens at a steady rate. When a 429 is reported via
|
|
47
|
+
* {@link recordRateLimit}, the budget is reduced. After enough successes
|
|
48
|
+
* and a cooldown period, the budget recovers.
|
|
49
|
+
*/
|
|
50
|
+
export declare class TokenBucket {
|
|
51
|
+
private readonly config;
|
|
52
|
+
private readonly baseTpm;
|
|
53
|
+
private currentTpm;
|
|
54
|
+
private readonly minTpm;
|
|
55
|
+
private tokens;
|
|
56
|
+
private lastRefill;
|
|
57
|
+
private lastRateLimitTime;
|
|
58
|
+
private successesSinceLimit;
|
|
59
|
+
constructor(config?: Partial<RateLimiterConfig>);
|
|
60
|
+
private refill;
|
|
61
|
+
private maybeRecover;
|
|
62
|
+
/**
|
|
63
|
+
* Acquire tokens, waiting if necessary.
|
|
64
|
+
*
|
|
65
|
+
* @param estimatedTokens - Estimated token count for the request.
|
|
66
|
+
* @returns Time spent waiting in milliseconds (0 if immediate).
|
|
67
|
+
* @throws Error if waiting exceeds {@link RateLimiterConfig.maxWaitSeconds}.
|
|
68
|
+
*/
|
|
69
|
+
acquire(estimatedTokens: number): Promise<number>;
|
|
70
|
+
/**
|
|
71
|
+
* Record a 429 event -- adaptively reduce budget.
|
|
72
|
+
*
|
|
73
|
+
* Call this when the upstream API returns HTTP 429 or an equivalent
|
|
74
|
+
* rate-limit error.
|
|
75
|
+
*/
|
|
76
|
+
recordRateLimit(): void;
|
|
77
|
+
/**
|
|
78
|
+
* Record a successful request for adaptive recovery.
|
|
79
|
+
*
|
|
80
|
+
* Call this after each successful API response.
|
|
81
|
+
*/
|
|
82
|
+
recordSuccess(): void;
|
|
83
|
+
/** Current available token count. */
|
|
84
|
+
get availableTokens(): number;
|
|
85
|
+
/** Current tokens-per-minute budget (may be reduced after 429). */
|
|
86
|
+
get currentTokensPerMinute(): number;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Get or create the global rate limiter singleton.
|
|
90
|
+
*
|
|
91
|
+
* @param config - Optional configuration overrides (only used on first call).
|
|
92
|
+
* @returns The shared TokenBucket instance.
|
|
93
|
+
*/
|
|
94
|
+
export declare function getGlobalRateLimiter(config?: Partial<RateLimiterConfig>): TokenBucket;
|
|
95
|
+
/**
|
|
96
|
+
* Reset the global rate limiter (for testing).
|
|
97
|
+
*
|
|
98
|
+
* Clears the singleton so the next call to {@link getGlobalRateLimiter}
|
|
99
|
+
* creates a fresh instance.
|
|
100
|
+
*/
|
|
101
|
+
export declare function resetGlobalRateLimiter(): void;
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { EnvelopeState } from './constants/index.js';
|
|
2
|
+
export interface EnvelopeRecord {
|
|
3
|
+
message_id: string;
|
|
4
|
+
direction: 'in' | 'out';
|
|
5
|
+
envelope: Record<string, unknown>;
|
|
6
|
+
ts_ms: number;
|
|
7
|
+
ack_stage: number;
|
|
8
|
+
error_code: number;
|
|
9
|
+
ack_note: string;
|
|
10
|
+
}
|
|
11
|
+
export interface PersistenceBackend {
|
|
12
|
+
load(): {
|
|
13
|
+
records: Record<string, EnvelopeRecord>;
|
|
14
|
+
order: string[];
|
|
15
|
+
};
|
|
16
|
+
save(records: Record<string, EnvelopeRecord>, order: string[]): void;
|
|
17
|
+
clear(): void;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* JSON file persistence backend.
|
|
21
|
+
*/
|
|
22
|
+
export declare class JSONFilePersistence implements PersistenceBackend {
|
|
23
|
+
private filePath;
|
|
24
|
+
constructor(filePath?: string);
|
|
25
|
+
load(): {
|
|
26
|
+
records: Record<string, EnvelopeRecord>;
|
|
27
|
+
order: string[];
|
|
28
|
+
};
|
|
29
|
+
save(records: Record<string, EnvelopeRecord>, order: string[]): void;
|
|
30
|
+
clear(): void;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Persistent Activity Buffer with Three-ID model support.
|
|
34
|
+
*
|
|
35
|
+
* Tracks inbound/outbound envelopes by message_id and records ACK progression.
|
|
36
|
+
* Supports multiple persistence backends (JSON file, etc.) and provides
|
|
37
|
+
* reconciliation on startup to restore previous state.
|
|
38
|
+
*
|
|
39
|
+
* When the buffer is full, new records are REJECTED with BufferFullError
|
|
40
|
+
* per spec compliance (not silently pruned).
|
|
41
|
+
*/
|
|
42
|
+
export declare class PersistentActivityBuffer {
|
|
43
|
+
private byId;
|
|
44
|
+
private byIdempotencyToken;
|
|
45
|
+
private order;
|
|
46
|
+
private maxItems;
|
|
47
|
+
private persistence;
|
|
48
|
+
private dedupWindowS;
|
|
49
|
+
private dirty;
|
|
50
|
+
constructor(opts?: {
|
|
51
|
+
maxItems?: number;
|
|
52
|
+
persistence?: PersistenceBackend;
|
|
53
|
+
dedupWindowS?: number;
|
|
54
|
+
});
|
|
55
|
+
private loadFromPersistence;
|
|
56
|
+
private saveToPersistence;
|
|
57
|
+
private checkCapacity;
|
|
58
|
+
private cleanupExpiredDedupEntries;
|
|
59
|
+
/**
|
|
60
|
+
* Record an incoming envelope. Throws BufferFullError if buffer is at capacity.
|
|
61
|
+
*/
|
|
62
|
+
recordIncoming(envelope: Record<string, unknown>): EnvelopeRecord;
|
|
63
|
+
/**
|
|
64
|
+
* Record an outgoing envelope. Throws BufferFullError if buffer is at capacity.
|
|
65
|
+
*/
|
|
66
|
+
recordOutgoing(envelope: Record<string, unknown>): EnvelopeRecord;
|
|
67
|
+
/**
|
|
68
|
+
* Process an ACK for a previously recorded message.
|
|
69
|
+
*/
|
|
70
|
+
ack(ackMsg: {
|
|
71
|
+
ack_for_message_id: string;
|
|
72
|
+
ack_stage?: number;
|
|
73
|
+
error_code?: number;
|
|
74
|
+
note?: string;
|
|
75
|
+
}): EnvelopeRecord | undefined;
|
|
76
|
+
/** Get record by message ID. */
|
|
77
|
+
get(messageId: string): EnvelopeRecord | undefined;
|
|
78
|
+
/** Get record by idempotency token (for deduplication). */
|
|
79
|
+
getByIdempotencyToken(token: string): EnvelopeRecord | undefined;
|
|
80
|
+
/** Get all un-ACKed records. */
|
|
81
|
+
unacked(): EnvelopeRecord[];
|
|
82
|
+
/** Get N most recent records. */
|
|
83
|
+
recent(n?: number): EnvelopeRecord[];
|
|
84
|
+
/** Update envelope state for a message. */
|
|
85
|
+
updateState(messageId: string, newState: EnvelopeState): EnvelopeRecord | undefined;
|
|
86
|
+
/** Return unacked outgoing messages for reconciliation. */
|
|
87
|
+
reconcile(): EnvelopeRecord[];
|
|
88
|
+
/** Force save to persistence. */
|
|
89
|
+
flush(): void;
|
|
90
|
+
/** Clear all records. */
|
|
91
|
+
clear(): void;
|
|
92
|
+
/** Get the count of records. */
|
|
93
|
+
get size(): number;
|
|
94
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { ErrorCode } from '../internal/errorMapping.js';
|
|
2
|
+
export declare const MIN_GRACE_PERIOD_MS = 5000;
|
|
3
|
+
export type CancellationMetadataValue = string | number | boolean | null;
|
|
4
|
+
export type CancellationMetadata = Record<string, CancellationMetadataValue>;
|
|
5
|
+
export interface CancelDelegationRequest {
|
|
6
|
+
correlationId: string;
|
|
7
|
+
reason?: string;
|
|
8
|
+
gracePeriodMs?: number;
|
|
9
|
+
metadata?: Record<string, unknown>;
|
|
10
|
+
}
|
|
11
|
+
export interface CancelDelegationResponse {
|
|
12
|
+
acknowledged: boolean;
|
|
13
|
+
correlationId: string;
|
|
14
|
+
gracePeriodMs: number;
|
|
15
|
+
message: string;
|
|
16
|
+
metadata: CancellationMetadata;
|
|
17
|
+
}
|
|
18
|
+
export interface CancellationFlag {
|
|
19
|
+
cancelled: boolean;
|
|
20
|
+
gracePeriodMs: number;
|
|
21
|
+
cancelTimeMs: number;
|
|
22
|
+
metadata: CancellationMetadata;
|
|
23
|
+
}
|
|
24
|
+
export interface CancellationManagerOptions {
|
|
25
|
+
nowMsFn?: () => number;
|
|
26
|
+
}
|
|
27
|
+
export declare class CancellationValidationError extends Error {
|
|
28
|
+
constructor(message: string);
|
|
29
|
+
}
|
|
30
|
+
export declare class CancellationManager {
|
|
31
|
+
readonly childDelegations: Map<string, Set<string>>;
|
|
32
|
+
readonly cancellationFlags: Map<string, CancellationFlag>;
|
|
33
|
+
private readonly nowMs;
|
|
34
|
+
constructor(options?: CancellationManagerOptions);
|
|
35
|
+
registerChildDelegation(parentCorrelationId: string, childCorrelationId: string): void;
|
|
36
|
+
handleCancelDelegation(cancel: CancelDelegationRequest): CancelDelegationResponse;
|
|
37
|
+
isCancelled(correlationId: string): boolean;
|
|
38
|
+
isGraceExpired(correlationId: string, nowMs?: number): boolean;
|
|
39
|
+
forcedPreemptionErrorCode(correlationId: string, nowMs?: number): ErrorCode;
|
|
40
|
+
collectForcedPreemptions(correlationIds: Iterable<string>, nowMs?: number): Set<string>;
|
|
41
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { type BudgetEnvelope, type HandoffRequest, type HandoffResponse, type SwarmDelegationPolicy } from '../clients/handoff.js';
|
|
2
|
+
export declare const RETRY_AFTER_JITTER_RATIO = 0.2;
|
|
3
|
+
export declare const DEFAULT_EFFECTIVE_MAX_REDIRECTS = 2;
|
|
4
|
+
export interface DelegateToSwarmOptions {
|
|
5
|
+
sendHandoffFn: (request: HandoffRequest) => Promise<HandoffResponse> | HandoffResponse;
|
|
6
|
+
fromAgent: string;
|
|
7
|
+
toAgent: string;
|
|
8
|
+
reason: string;
|
|
9
|
+
budget: BudgetEnvelope;
|
|
10
|
+
delegationPolicy?: SwarmDelegationPolicy;
|
|
11
|
+
requestId?: string;
|
|
12
|
+
contextSnapshot?: Uint8Array;
|
|
13
|
+
capabilitiesRequired?: string[];
|
|
14
|
+
priority?: number;
|
|
15
|
+
timeoutMs?: number;
|
|
16
|
+
nowMsFn?: () => number;
|
|
17
|
+
sleepMsFn?: (milliseconds: number) => Promise<void> | void;
|
|
18
|
+
randUniformFn?: (low: number, high: number) => number;
|
|
19
|
+
}
|
|
20
|
+
export declare function delegateToSwarm(options: DelegateToSwarmOptions): Promise<HandoffResponse>;
|