@sw4rm/js-sdk 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -1
- package/dist/cjs/index.cjs +1573 -88
- package/dist/esm/index.js +1530 -80
- package/dist/types/clients/handoff.d.ts +44 -1
- package/dist/types/index.d.ts +5 -1
- package/dist/types/internal/errorMapping.d.ts +2 -0
- package/dist/types/llm/anthropic.d.ts +83 -0
- package/dist/types/llm/client.d.ts +107 -0
- package/dist/types/llm/factory.d.ts +69 -0
- package/dist/types/llm/groq.d.ts +79 -0
- package/dist/types/llm/index.d.ts +45 -0
- package/dist/types/llm/mock.d.ts +89 -0
- package/dist/types/llm/rateLimiter.d.ts +101 -0
- package/dist/types/runtime/cancellation.d.ts +41 -0
- package/dist/types/runtime/delegation.d.ts +20 -0
- package/dist/types/runtime/gateway.d.ts +80 -0
- package/package.json +1 -1
- package/protos/common.proto +7 -0
- package/protos/handoff.proto +42 -0
- package/protos/registry.proto +11 -0
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
*
|
|
10
10
|
* Based on handoff.proto definitions.
|
|
11
11
|
*/
|
|
12
|
+
import { ErrorCode } from '../internal/errorMapping.js';
|
|
12
13
|
/**
|
|
13
14
|
* Status of a handoff request.
|
|
14
15
|
*/
|
|
@@ -20,6 +21,33 @@ export declare enum HandoffStatus {
|
|
|
20
21
|
COMPLETED = 4,
|
|
21
22
|
EXPIRED = 5
|
|
22
23
|
}
|
|
24
|
+
export declare const DEFAULT_MAX_RETRIES_ON_OVERLOADED = 2;
|
|
25
|
+
export declare const DEFAULT_INITIAL_BACKOFF_MS = 250;
|
|
26
|
+
export declare const DEFAULT_BACKOFF_MULTIPLIER = 2;
|
|
27
|
+
export declare const DEFAULT_MAX_BACKOFF_MS = 2000;
|
|
28
|
+
export declare const DEFAULT_MAX_REDIRECTS = 0;
|
|
29
|
+
/**
|
|
30
|
+
* SW4-004 budget envelope for cross-swarm delegation.
|
|
31
|
+
*/
|
|
32
|
+
export interface BudgetEnvelope {
|
|
33
|
+
tokenBudgetRemaining?: number;
|
|
34
|
+
wallTimeRemainingMs?: number;
|
|
35
|
+
/** Required for cross-swarm delegation. */
|
|
36
|
+
deadlineEpochMs: number;
|
|
37
|
+
currentDepth?: number;
|
|
38
|
+
maxDelegationDepth?: number;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* SW4-004/SW4-005 delegation policy envelope.
|
|
42
|
+
*/
|
|
43
|
+
export interface SwarmDelegationPolicy {
|
|
44
|
+
maxRetriesOnOverloaded?: number;
|
|
45
|
+
initialBackoffMs?: number;
|
|
46
|
+
backoffMultiplier?: number;
|
|
47
|
+
maxBackoffMs?: number;
|
|
48
|
+
allowSpilloverRouting?: boolean;
|
|
49
|
+
maxRedirects?: number;
|
|
50
|
+
}
|
|
23
51
|
/**
|
|
24
52
|
* A request to hand off work from one agent to another.
|
|
25
53
|
*
|
|
@@ -43,6 +71,10 @@ export interface HandoffRequest {
|
|
|
43
71
|
priority: number;
|
|
44
72
|
/** Timeout in milliseconds for the handoff to be accepted */
|
|
45
73
|
timeoutMs?: number;
|
|
74
|
+
/** SW4-004 budget envelope for cross-swarm handoff */
|
|
75
|
+
budget?: BudgetEnvelope;
|
|
76
|
+
/** SW4-004/SW4-005 delegation policy envelope */
|
|
77
|
+
delegationPolicy?: SwarmDelegationPolicy;
|
|
46
78
|
/** Timestamp when the request was created (ISO-8601 string) */
|
|
47
79
|
createdAt?: string;
|
|
48
80
|
}
|
|
@@ -61,6 +93,17 @@ export interface HandoffResponse {
|
|
|
61
93
|
acceptingAgent?: string;
|
|
62
94
|
/** Reason for rejection (if not accepted) */
|
|
63
95
|
rejectionReason?: string;
|
|
96
|
+
/** SW4-004/SW4-005 rejection code */
|
|
97
|
+
rejectionCode?: ErrorCode;
|
|
98
|
+
/** SW4-004 retry hint in milliseconds (for OVERLOADED) */
|
|
99
|
+
retryAfterMs?: number;
|
|
100
|
+
/** SW4-005 redirect target agent ID */
|
|
101
|
+
redirectToAgentId?: string;
|
|
102
|
+
}
|
|
103
|
+
export interface RejectHandoffOptions {
|
|
104
|
+
rejectionCode?: ErrorCode;
|
|
105
|
+
retryAfterMs?: number;
|
|
106
|
+
redirectToAgentId?: string;
|
|
64
107
|
}
|
|
65
108
|
/**
|
|
66
109
|
* Error thrown when a handoff operation fails validation.
|
|
@@ -148,7 +191,7 @@ export declare class HandoffClient {
|
|
|
148
191
|
* await client.rejectHandoff("handoff-123", "Agent at capacity");
|
|
149
192
|
* ```
|
|
150
193
|
*/
|
|
151
|
-
rejectHandoff(handoffId: string, reason: string): Promise<void>;
|
|
194
|
+
rejectHandoff(handoffId: string, reason: string, options?: RejectHandoffOptions): Promise<void>;
|
|
152
195
|
/**
|
|
153
196
|
* Get all pending handoff requests for an agent.
|
|
154
197
|
*
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export declare const version = "0.
|
|
1
|
+
export declare const version = "0.6.0";
|
|
2
2
|
export * from './clients/router.js';
|
|
3
3
|
export * from './clients/scheduler.js';
|
|
4
4
|
export * from './clients/schedulerPolicy.js';
|
|
@@ -29,6 +29,9 @@ export * from './runtime/ackHelpers.js';
|
|
|
29
29
|
export * from './runtime/activitySync.js';
|
|
30
30
|
export * from './runtime/streams.js';
|
|
31
31
|
export * from './runtime/negotiationEvents.js';
|
|
32
|
+
export * from './runtime/delegation.js';
|
|
33
|
+
export * from './runtime/cancellation.js';
|
|
34
|
+
export * from './runtime/gateway.js';
|
|
32
35
|
export * from './persistence/persistence.js';
|
|
33
36
|
export * from './runtime/voting.js';
|
|
34
37
|
export * from './runtime/policyStore.js';
|
|
@@ -50,3 +53,4 @@ export * from './secrets/resolver.js';
|
|
|
50
53
|
export * from './secrets/backends/file.js';
|
|
51
54
|
export * from './secrets/backends/keyring.js';
|
|
52
55
|
export * from './secrets/factory.js';
|
|
56
|
+
export * from './llm/index.js';
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic LLM client for SW4RM agents.
|
|
3
|
+
*
|
|
4
|
+
* Thin wrapper around the Anthropic Messages REST API using native `fetch()`.
|
|
5
|
+
* Handles system-message normalization (Anthropic uses a top-level `system`
|
|
6
|
+
* param, not an in-band system message).
|
|
7
|
+
*
|
|
8
|
+
* Credentials (in order):
|
|
9
|
+
* 1. `apiKey` constructor parameter
|
|
10
|
+
* 2. `ANTHROPIC_API_KEY` environment variable
|
|
11
|
+
* 3. `~/.anthropic` file (plain text, one line)
|
|
12
|
+
*
|
|
13
|
+
* @module llm/anthropic
|
|
14
|
+
*/
|
|
15
|
+
import type { LlmClient, LlmResponse, QueryOptions } from './client.js';
|
|
16
|
+
/** Options for constructing an AnthropicClient. */
|
|
17
|
+
export interface AnthropicClientOptions {
|
|
18
|
+
/** API key override (takes precedence over env / dotfile). */
|
|
19
|
+
apiKey?: string;
|
|
20
|
+
/** Default model to use for queries. */
|
|
21
|
+
defaultModel?: string;
|
|
22
|
+
/** Request timeout in milliseconds. */
|
|
23
|
+
timeoutMs?: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* LLM client for the Anthropic Messages API.
|
|
27
|
+
*
|
|
28
|
+
* Credentials are resolved from (in order):
|
|
29
|
+
* 1. `apiKey` constructor option
|
|
30
|
+
* 2. `ANTHROPIC_API_KEY` environment variable
|
|
31
|
+
* 3. `~/.anthropic` file (plain text, one line)
|
|
32
|
+
*
|
|
33
|
+
* Environment variables:
|
|
34
|
+
* - `ANTHROPIC_API_KEY`: API key override
|
|
35
|
+
* - `ANTHROPIC_DEFAULT_MODEL`: Default model override
|
|
36
|
+
*/
|
|
37
|
+
export declare class AnthropicClient implements LlmClient {
|
|
38
|
+
/** The resolved API key. */
|
|
39
|
+
readonly apiKey: string;
|
|
40
|
+
/** The default model used when none is specified per-call. */
|
|
41
|
+
readonly defaultModel: string;
|
|
42
|
+
private readonly timeoutMs;
|
|
43
|
+
private readonly rateLimiter;
|
|
44
|
+
constructor(opts?: AnthropicClientOptions);
|
|
45
|
+
/**
|
|
46
|
+
* Load API key from ~/.anthropic file.
|
|
47
|
+
*
|
|
48
|
+
* @returns The key string, or null if the file does not exist.
|
|
49
|
+
*/
|
|
50
|
+
static loadKeyFile(): string | null;
|
|
51
|
+
private estimateTokens;
|
|
52
|
+
/**
|
|
53
|
+
* Map an HTTP error response to the appropriate LlmError subclass.
|
|
54
|
+
*
|
|
55
|
+
* @param status - HTTP status code.
|
|
56
|
+
* @param body - Parsed response body (if available).
|
|
57
|
+
* @param message - Raw error message.
|
|
58
|
+
*/
|
|
59
|
+
private mapError;
|
|
60
|
+
/**
|
|
61
|
+
* Send a query to Anthropic and get a complete response.
|
|
62
|
+
*
|
|
63
|
+
* System prompts are sent via the top-level `system` parameter rather
|
|
64
|
+
* than as a system message in the messages array, per the Anthropic API
|
|
65
|
+
* specification.
|
|
66
|
+
*
|
|
67
|
+
* @param prompt - The user prompt/query.
|
|
68
|
+
* @param opts - Optional query configuration.
|
|
69
|
+
* @returns LlmResponse with generated content and metadata.
|
|
70
|
+
*/
|
|
71
|
+
query(prompt: string, opts?: QueryOptions): Promise<LlmResponse>;
|
|
72
|
+
/**
|
|
73
|
+
* Stream a query response chunk by chunk.
|
|
74
|
+
*
|
|
75
|
+
* Uses Server-Sent Events (SSE) streaming from the Anthropic API.
|
|
76
|
+
* System prompts are sent via the top-level `system` parameter.
|
|
77
|
+
*
|
|
78
|
+
* @param prompt - The user prompt/query.
|
|
79
|
+
* @param opts - Optional query configuration.
|
|
80
|
+
* @yields Text chunks as they arrive from the API.
|
|
81
|
+
*/
|
|
82
|
+
streamQuery(prompt: string, opts?: QueryOptions): AsyncGenerator<string>;
|
|
83
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Abstract LLM client interface.
|
|
3
|
+
*
|
|
4
|
+
* This module provides the base interface for LLM clients used by SW4RM agents.
|
|
5
|
+
* Implementations can use Anthropic API directly, Groq API, or mock clients
|
|
6
|
+
* for testing.
|
|
7
|
+
*
|
|
8
|
+
* @module llm/client
|
|
9
|
+
*/
|
|
10
|
+
/** Response from an LLM API call. */
|
|
11
|
+
export interface LlmResponse {
|
|
12
|
+
/** The generated text content. */
|
|
13
|
+
content: string;
|
|
14
|
+
/** The model that generated the response. */
|
|
15
|
+
model: string;
|
|
16
|
+
/** Token usage statistics (if available). */
|
|
17
|
+
usage?: {
|
|
18
|
+
input_tokens: number;
|
|
19
|
+
output_tokens: number;
|
|
20
|
+
};
|
|
21
|
+
/** Additional response metadata. */
|
|
22
|
+
metadata?: Record<string, unknown>;
|
|
23
|
+
}
|
|
24
|
+
/** Options for LLM query calls. */
|
|
25
|
+
export interface QueryOptions {
|
|
26
|
+
/** Optional system prompt for context. */
|
|
27
|
+
systemPrompt?: string;
|
|
28
|
+
/** Maximum tokens to generate (default 4096). */
|
|
29
|
+
maxTokens?: number;
|
|
30
|
+
/** Sampling temperature 0.0-2.0 (default 1.0). */
|
|
31
|
+
temperature?: number;
|
|
32
|
+
/** Override the default model. */
|
|
33
|
+
model?: string;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Abstract interface for LLM clients.
|
|
37
|
+
*
|
|
38
|
+
* All LLM client implementations must implement this interface.
|
|
39
|
+
* This allows SW4RM agents to be provider-agnostic.
|
|
40
|
+
*
|
|
41
|
+
* Implementations:
|
|
42
|
+
* - GroqClient: Uses Groq API directly (API key)
|
|
43
|
+
* - AnthropicClient: Uses Anthropic API directly (API key)
|
|
44
|
+
* - MockLlmClient: For testing without API calls
|
|
45
|
+
*/
|
|
46
|
+
export interface LlmClient {
|
|
47
|
+
/**
|
|
48
|
+
* Send a query to the LLM and get a complete response.
|
|
49
|
+
*
|
|
50
|
+
* @param prompt - The user prompt/query to send.
|
|
51
|
+
* @param opts - Optional query configuration.
|
|
52
|
+
* @returns LlmResponse with the generated content and metadata.
|
|
53
|
+
* @throws LlmError on API errors or failures.
|
|
54
|
+
* @throws LlmAuthenticationError on authentication failures.
|
|
55
|
+
* @throws LlmRateLimitError when rate limits are exceeded.
|
|
56
|
+
* @throws LlmTimeoutError when the request times out.
|
|
57
|
+
*/
|
|
58
|
+
query(prompt: string, opts?: QueryOptions): Promise<LlmResponse>;
|
|
59
|
+
/**
|
|
60
|
+
* Stream a query response chunk by chunk.
|
|
61
|
+
*
|
|
62
|
+
* @param prompt - The user prompt/query to send.
|
|
63
|
+
* @param opts - Optional query configuration.
|
|
64
|
+
* @yields Text chunks as they arrive from the API.
|
|
65
|
+
* @throws LlmError on API errors or failures.
|
|
66
|
+
*/
|
|
67
|
+
streamQuery(prompt: string, opts?: QueryOptions): AsyncGenerator<string>;
|
|
68
|
+
}
|
|
69
|
+
/** Base exception for all LLM client errors. */
|
|
70
|
+
export declare class LlmError extends Error {
|
|
71
|
+
constructor(message: string);
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Raised when API authentication fails.
|
|
75
|
+
*
|
|
76
|
+
* Common causes:
|
|
77
|
+
* - Invalid API key
|
|
78
|
+
* - Expired credentials
|
|
79
|
+
* - Missing credentials
|
|
80
|
+
*/
|
|
81
|
+
export declare class LlmAuthenticationError extends LlmError {
|
|
82
|
+
constructor(message: string);
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Raised when API rate limits are exceeded.
|
|
86
|
+
*
|
|
87
|
+
* The caller should implement exponential backoff when handling this.
|
|
88
|
+
*/
|
|
89
|
+
export declare class LlmRateLimitError extends LlmError {
|
|
90
|
+
constructor(message: string);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Raised when an API request times out.
|
|
94
|
+
*
|
|
95
|
+
* Consider increasing the timeout or simplifying the prompt.
|
|
96
|
+
*/
|
|
97
|
+
export declare class LlmTimeoutError extends LlmError {
|
|
98
|
+
constructor(message: string);
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Raised when the prompt exceeds the model's context length.
|
|
102
|
+
*
|
|
103
|
+
* Consider truncating the prompt or using a model with larger context.
|
|
104
|
+
*/
|
|
105
|
+
export declare class LlmContextLengthError extends LlmError {
|
|
106
|
+
constructor(message: string);
|
|
107
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory for creating LLM clients.
|
|
3
|
+
*
|
|
4
|
+
* Provides a simple way to create the appropriate LLM client based on
|
|
5
|
+
* environment configuration or explicit parameters.
|
|
6
|
+
*
|
|
7
|
+
* @module llm/factory
|
|
8
|
+
*/
|
|
9
|
+
import type { LlmClient } from './client.js';
|
|
10
|
+
/** Options for the LLM client factory. */
|
|
11
|
+
export interface CreateLlmClientOptions {
|
|
12
|
+
/**
|
|
13
|
+
* The type of client to create.
|
|
14
|
+
*
|
|
15
|
+
* Valid types: "groq", "anthropic", "mock".
|
|
16
|
+
*
|
|
17
|
+
* If not specified, reads from the `LLM_CLIENT_TYPE` environment variable.
|
|
18
|
+
* Defaults to "mock" if neither is set.
|
|
19
|
+
*/
|
|
20
|
+
clientType?: string;
|
|
21
|
+
/**
|
|
22
|
+
* Default model to use for queries.
|
|
23
|
+
*
|
|
24
|
+
* If not specified, reads from the `LLM_DEFAULT_MODEL` environment variable.
|
|
25
|
+
* Falls back to each provider's default model.
|
|
26
|
+
*/
|
|
27
|
+
model?: string;
|
|
28
|
+
/** API key override (passed to the provider constructor). */
|
|
29
|
+
apiKey?: string;
|
|
30
|
+
/** Request timeout in milliseconds (passed to the provider constructor). */
|
|
31
|
+
timeoutMs?: number;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Create an LLM client based on environment or explicit type.
|
|
35
|
+
*
|
|
36
|
+
* Resolution order for client type:
|
|
37
|
+
* 1. Explicit `clientType` option
|
|
38
|
+
* 2. `LLM_CLIENT_TYPE` environment variable
|
|
39
|
+
* 3. Default: "mock"
|
|
40
|
+
*
|
|
41
|
+
* Environment variables:
|
|
42
|
+
* - `LLM_CLIENT_TYPE`: "groq", "anthropic", or "mock" (default: "mock")
|
|
43
|
+
* - `LLM_DEFAULT_MODEL`: Default model override
|
|
44
|
+
*
|
|
45
|
+
* @param opts - Optional factory configuration.
|
|
46
|
+
* @returns An LlmClient instance of the requested type.
|
|
47
|
+
* @throws Error if an invalid client type is specified.
|
|
48
|
+
*
|
|
49
|
+
* @example
|
|
50
|
+
* ```typescript
|
|
51
|
+
* import { createLlmClient } from '@sw4rm/js-sdk';
|
|
52
|
+
*
|
|
53
|
+
* // Auto-detect from env (default: mock)
|
|
54
|
+
* const client = createLlmClient();
|
|
55
|
+
*
|
|
56
|
+
* // Explicit Groq client
|
|
57
|
+
* const groq = createLlmClient({ clientType: 'groq' });
|
|
58
|
+
*
|
|
59
|
+
* // Mock client for testing
|
|
60
|
+
* const mock = createLlmClient({ clientType: 'mock' });
|
|
61
|
+
*
|
|
62
|
+
* // Anthropic client with model override
|
|
63
|
+
* const claude = createLlmClient({
|
|
64
|
+
* clientType: 'anthropic',
|
|
65
|
+
* model: 'claude-sonnet-4-20250514',
|
|
66
|
+
* });
|
|
67
|
+
* ```
|
|
68
|
+
*/
|
|
69
|
+
export declare function createLlmClient(opts?: CreateLlmClientOptions): LlmClient;
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Groq LLM client for SW4RM agents.
|
|
3
|
+
*
|
|
4
|
+
* Thin wrapper around the Groq REST API using native `fetch()`.
|
|
5
|
+
* No tool-calling or diagnostic mode -- just prompt in, text out,
|
|
6
|
+
* with rate limiting.
|
|
7
|
+
*
|
|
8
|
+
* Credentials (in order):
|
|
9
|
+
* 1. `apiKey` constructor parameter
|
|
10
|
+
* 2. `GROQ_API_KEY` environment variable
|
|
11
|
+
* 3. `~/.groq` file (plain text, one line)
|
|
12
|
+
*
|
|
13
|
+
* @module llm/groq
|
|
14
|
+
*/
|
|
15
|
+
import type { LlmClient, LlmResponse, QueryOptions } from './client.js';
|
|
16
|
+
/** Options for constructing a GroqClient. */
|
|
17
|
+
export interface GroqClientOptions {
|
|
18
|
+
/** API key override (takes precedence over env / dotfile). */
|
|
19
|
+
apiKey?: string;
|
|
20
|
+
/** Default model to use for queries. */
|
|
21
|
+
defaultModel?: string;
|
|
22
|
+
/** Request timeout in milliseconds. */
|
|
23
|
+
timeoutMs?: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* LLM client for the Groq API (OpenAI-compatible chat completions).
|
|
27
|
+
*
|
|
28
|
+
* Credentials are resolved from (in order):
|
|
29
|
+
* 1. `apiKey` constructor option
|
|
30
|
+
* 2. `GROQ_API_KEY` environment variable
|
|
31
|
+
* 3. `~/.groq` file (plain text, one line)
|
|
32
|
+
*
|
|
33
|
+
* Environment variables:
|
|
34
|
+
* - `GROQ_API_KEY`: API key override
|
|
35
|
+
* - `GROQ_DEFAULT_MODEL`: Default model override
|
|
36
|
+
*/
|
|
37
|
+
export declare class GroqClient implements LlmClient {
|
|
38
|
+
/** The resolved API key. */
|
|
39
|
+
readonly apiKey: string;
|
|
40
|
+
/** The default model used when none is specified per-call. */
|
|
41
|
+
readonly defaultModel: string;
|
|
42
|
+
private readonly timeoutMs;
|
|
43
|
+
private readonly rateLimiter;
|
|
44
|
+
constructor(opts?: GroqClientOptions);
|
|
45
|
+
/**
|
|
46
|
+
* Load API key from ~/.groq file.
|
|
47
|
+
*
|
|
48
|
+
* @returns The key string, or null if the file does not exist.
|
|
49
|
+
*/
|
|
50
|
+
static loadKeyFile(): string | null;
|
|
51
|
+
private estimateTokens;
|
|
52
|
+
private buildMessages;
|
|
53
|
+
/**
|
|
54
|
+
* Map an HTTP error response to the appropriate LlmError subclass.
|
|
55
|
+
*
|
|
56
|
+
* @param status - HTTP status code.
|
|
57
|
+
* @param body - Parsed response body (if available).
|
|
58
|
+
* @param message - Raw error message.
|
|
59
|
+
*/
|
|
60
|
+
private mapError;
|
|
61
|
+
/**
|
|
62
|
+
* Send a query to Groq and get a complete response.
|
|
63
|
+
*
|
|
64
|
+
* @param prompt - The user prompt/query.
|
|
65
|
+
* @param opts - Optional query configuration.
|
|
66
|
+
* @returns LlmResponse with generated content and metadata.
|
|
67
|
+
*/
|
|
68
|
+
query(prompt: string, opts?: QueryOptions): Promise<LlmResponse>;
|
|
69
|
+
/**
|
|
70
|
+
* Stream a query response chunk by chunk.
|
|
71
|
+
*
|
|
72
|
+
* Uses Server-Sent Events (SSE) streaming from the Groq API.
|
|
73
|
+
*
|
|
74
|
+
* @param prompt - The user prompt/query.
|
|
75
|
+
* @param opts - Optional query configuration.
|
|
76
|
+
* @yields Text chunks as they arrive from the API.
|
|
77
|
+
*/
|
|
78
|
+
streamQuery(prompt: string, opts?: QueryOptions): AsyncGenerator<string>;
|
|
79
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SW4RM LLM -- LLM client abstraction for SW4RM agents.
|
|
3
|
+
*
|
|
4
|
+
* This module provides a unified interface for LLM interactions, supporting:
|
|
5
|
+
* - Groq API (API key from ~/.groq or GROQ_API_KEY)
|
|
6
|
+
* - Anthropic API (API key from ~/.anthropic or ANTHROPIC_API_KEY)
|
|
7
|
+
* - Mock client for testing
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { createLlmClient } from '@sw4rm/js-sdk';
|
|
12
|
+
*
|
|
13
|
+
* // Create client via environment (LLM_CLIENT_TYPE=groq|anthropic|mock)
|
|
14
|
+
* const client = createLlmClient();
|
|
15
|
+
*
|
|
16
|
+
* // Or explicitly
|
|
17
|
+
* const client = createLlmClient({ clientType: 'groq' });
|
|
18
|
+
*
|
|
19
|
+
* // Query the LLM
|
|
20
|
+
* const response = await client.query(
|
|
21
|
+
* 'Analyze this task and suggest next steps',
|
|
22
|
+
* { systemPrompt: 'You are a helpful task analysis agent.' },
|
|
23
|
+
* );
|
|
24
|
+
* console.log(response.content);
|
|
25
|
+
*
|
|
26
|
+
* // Stream responses
|
|
27
|
+
* for await (const chunk of client.streamQuery('Generate a report')) {
|
|
28
|
+
* process.stdout.write(chunk);
|
|
29
|
+
* }
|
|
30
|
+
* ```
|
|
31
|
+
*
|
|
32
|
+
* @module llm
|
|
33
|
+
*/
|
|
34
|
+
export type { LlmClient, LlmResponse, QueryOptions } from './client.js';
|
|
35
|
+
export { LlmError, LlmAuthenticationError, LlmRateLimitError, LlmTimeoutError, LlmContextLengthError, } from './client.js';
|
|
36
|
+
export type { RateLimiterConfig } from './rateLimiter.js';
|
|
37
|
+
export { buildRateLimiterConfig, TokenBucket, getGlobalRateLimiter, resetGlobalRateLimiter, } from './rateLimiter.js';
|
|
38
|
+
export { MockLlmClient } from './mock.js';
|
|
39
|
+
export type { MockCallRecord } from './mock.js';
|
|
40
|
+
export { GroqClient } from './groq.js';
|
|
41
|
+
export type { GroqClientOptions } from './groq.js';
|
|
42
|
+
export { AnthropicClient } from './anthropic.js';
|
|
43
|
+
export type { AnthropicClientOptions } from './anthropic.js';
|
|
44
|
+
export { createLlmClient } from './factory.js';
|
|
45
|
+
export type { CreateLlmClientOptions } from './factory.js';
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock LLM client for testing.
|
|
3
|
+
*
|
|
4
|
+
* Provides deterministic responses without making actual API calls.
|
|
5
|
+
* Cycles through a canned response array and records call history.
|
|
6
|
+
*
|
|
7
|
+
* @module llm/mock
|
|
8
|
+
*/
|
|
9
|
+
import type { LlmClient, LlmResponse, QueryOptions } from './client.js';
|
|
10
|
+
/** A recorded call entry from the mock client. */
|
|
11
|
+
export interface MockCallRecord {
|
|
12
|
+
prompt: string;
|
|
13
|
+
systemPrompt?: string;
|
|
14
|
+
maxTokens: number;
|
|
15
|
+
temperature: number;
|
|
16
|
+
model: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Mock LLM client for testing without API calls.
|
|
20
|
+
*
|
|
21
|
+
* Returns configurable responses for testing purposes. If no responses are
|
|
22
|
+
* provided, returns a default echo of the prompt.
|
|
23
|
+
*
|
|
24
|
+
* @example
|
|
25
|
+
* ```typescript
|
|
26
|
+
* import { MockLlmClient } from '@sw4rm/js-sdk';
|
|
27
|
+
*
|
|
28
|
+
* // Simple usage -- returns echo of prompt
|
|
29
|
+
* const client = new MockLlmClient();
|
|
30
|
+
* const response = await client.query('Hello');
|
|
31
|
+
* console.log(response.content); // "Mock response to: Hello"
|
|
32
|
+
*
|
|
33
|
+
* // Custom responses (cycles)
|
|
34
|
+
* const client2 = new MockLlmClient({
|
|
35
|
+
* responses: ['First response', 'Second response'],
|
|
36
|
+
* });
|
|
37
|
+
*
|
|
38
|
+
* // Custom response generator
|
|
39
|
+
* const client3 = new MockLlmClient({
|
|
40
|
+
* responseGenerator: (prompt) => `Processed: ${prompt}`,
|
|
41
|
+
* });
|
|
42
|
+
* ```
|
|
43
|
+
*/
|
|
44
|
+
export declare class MockLlmClient implements LlmClient {
|
|
45
|
+
/** The model name returned in responses. */
|
|
46
|
+
readonly defaultModel: string;
|
|
47
|
+
private readonly responses;
|
|
48
|
+
private responseIndex;
|
|
49
|
+
private readonly responseGenerator;
|
|
50
|
+
private _callCount;
|
|
51
|
+
private readonly _callHistory;
|
|
52
|
+
constructor(opts?: {
|
|
53
|
+
/** Model name to return in responses. */
|
|
54
|
+
defaultModel?: string;
|
|
55
|
+
/** List of responses to return in order (cycles when exhausted). */
|
|
56
|
+
responses?: string[];
|
|
57
|
+
/** Function to generate responses from prompts. */
|
|
58
|
+
responseGenerator?: (prompt: string) => string;
|
|
59
|
+
});
|
|
60
|
+
/** Number of queries made to this client. */
|
|
61
|
+
get callCount(): number;
|
|
62
|
+
/** History of all calls made to this client. */
|
|
63
|
+
get callHistory(): readonly MockCallRecord[];
|
|
64
|
+
/** Reset call count, history, and response index. */
|
|
65
|
+
reset(): void;
|
|
66
|
+
/**
|
|
67
|
+
* Return a mock response.
|
|
68
|
+
*
|
|
69
|
+
* Priority for determining response content:
|
|
70
|
+
* 1. `responseGenerator` function (if provided)
|
|
71
|
+
* 2. `responses` array (cycles through entries)
|
|
72
|
+
* 3. Default echo: "Mock response to: <prompt>"
|
|
73
|
+
*
|
|
74
|
+
* @param prompt - The prompt (recorded in history).
|
|
75
|
+
* @param opts - Optional query configuration (recorded in history).
|
|
76
|
+
* @returns LlmResponse with mock content.
|
|
77
|
+
*/
|
|
78
|
+
query(prompt: string, opts?: QueryOptions): Promise<LlmResponse>;
|
|
79
|
+
/**
|
|
80
|
+
* Stream a mock response.
|
|
81
|
+
*
|
|
82
|
+
* Yields the response word by word to simulate streaming.
|
|
83
|
+
*
|
|
84
|
+
* @param prompt - The prompt (recorded in history).
|
|
85
|
+
* @param opts - Optional query configuration.
|
|
86
|
+
* @yields Words from the mock response.
|
|
87
|
+
*/
|
|
88
|
+
streamQuery(prompt: string, opts?: QueryOptions): AsyncGenerator<string>;
|
|
89
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token bucket rate limiter for LLM API requests.
|
|
3
|
+
*
|
|
4
|
+
* Provides proactive rate limiting to avoid 429 errors. All LLM clients
|
|
5
|
+
* in the process share a global singleton bucket.
|
|
6
|
+
*
|
|
7
|
+
* Configuration via environment variables:
|
|
8
|
+
* LLM_RATE_LIMIT_ENABLED: "1" (default) or "0"
|
|
9
|
+
* LLM_RATE_LIMIT_TOKENS_PER_MIN: 250000 (default, matches Groq free tier)
|
|
10
|
+
* LLM_RATE_LIMIT_ADAPTIVE: "1" (default) -- reduce budget on 429, recover on success
|
|
11
|
+
* LLM_RATE_LIMIT_REDUCTION_FACTOR: 0.7
|
|
12
|
+
* LLM_RATE_LIMIT_RECOVERY_FACTOR: 1.1
|
|
13
|
+
* LLM_RATE_LIMIT_COOLDOWN_SECONDS: 30
|
|
14
|
+
* LLM_RATE_LIMIT_RECOVERY_SUCCESS_THRESHOLD: 20
|
|
15
|
+
*
|
|
16
|
+
* @module llm/rateLimiter
|
|
17
|
+
*/
|
|
18
|
+
/** Configuration for the token bucket rate limiter. */
|
|
19
|
+
export interface RateLimiterConfig {
|
|
20
|
+
/** Token budget per minute. */
|
|
21
|
+
tokensPerMinute: number;
|
|
22
|
+
/** Burst allowance multiplier (1.0 = no burst above budget). */
|
|
23
|
+
burstAllowance: number;
|
|
24
|
+
/** Minimum tokens assumed per request. */
|
|
25
|
+
minTokensPerRequest: number;
|
|
26
|
+
/** Maximum seconds to wait before throwing a timeout. */
|
|
27
|
+
maxWaitSeconds: number;
|
|
28
|
+
/** Whether the rate limiter is enabled at all. */
|
|
29
|
+
enabled: boolean;
|
|
30
|
+
/** Whether adaptive throttling is enabled. */
|
|
31
|
+
adaptiveEnabled: boolean;
|
|
32
|
+
/** Factor to multiply budget by after a 429 event. */
|
|
33
|
+
reductionFactor: number;
|
|
34
|
+
/** Factor to multiply budget by during recovery. */
|
|
35
|
+
recoveryFactor: number;
|
|
36
|
+
/** Seconds to wait after last 429 before allowing recovery. */
|
|
37
|
+
cooldownSeconds: number;
|
|
38
|
+
/** Number of consecutive successes required before recovery. */
|
|
39
|
+
successesForRecovery: number;
|
|
40
|
+
}
|
|
41
|
+
/** Build a RateLimiterConfig from environment variables and optional overrides. */
|
|
42
|
+
export declare function buildRateLimiterConfig(overrides?: Partial<RateLimiterConfig>): RateLimiterConfig;
|
|
43
|
+
/**
|
|
44
|
+
* Token bucket rate limiter with adaptive throttling.
|
|
45
|
+
*
|
|
46
|
+
* Refills tokens at a steady rate. When a 429 is reported via
|
|
47
|
+
* {@link recordRateLimit}, the budget is reduced. After enough successes
|
|
48
|
+
* and a cooldown period, the budget recovers.
|
|
49
|
+
*/
|
|
50
|
+
export declare class TokenBucket {
|
|
51
|
+
private readonly config;
|
|
52
|
+
private readonly baseTpm;
|
|
53
|
+
private currentTpm;
|
|
54
|
+
private readonly minTpm;
|
|
55
|
+
private tokens;
|
|
56
|
+
private lastRefill;
|
|
57
|
+
private lastRateLimitTime;
|
|
58
|
+
private successesSinceLimit;
|
|
59
|
+
constructor(config?: Partial<RateLimiterConfig>);
|
|
60
|
+
private refill;
|
|
61
|
+
private maybeRecover;
|
|
62
|
+
/**
|
|
63
|
+
* Acquire tokens, waiting if necessary.
|
|
64
|
+
*
|
|
65
|
+
* @param estimatedTokens - Estimated token count for the request.
|
|
66
|
+
* @returns Time spent waiting in milliseconds (0 if immediate).
|
|
67
|
+
* @throws Error if waiting exceeds {@link RateLimiterConfig.maxWaitSeconds}.
|
|
68
|
+
*/
|
|
69
|
+
acquire(estimatedTokens: number): Promise<number>;
|
|
70
|
+
/**
|
|
71
|
+
* Record a 429 event -- adaptively reduce budget.
|
|
72
|
+
*
|
|
73
|
+
* Call this when the upstream API returns HTTP 429 or an equivalent
|
|
74
|
+
* rate-limit error.
|
|
75
|
+
*/
|
|
76
|
+
recordRateLimit(): void;
|
|
77
|
+
/**
|
|
78
|
+
* Record a successful request for adaptive recovery.
|
|
79
|
+
*
|
|
80
|
+
* Call this after each successful API response.
|
|
81
|
+
*/
|
|
82
|
+
recordSuccess(): void;
|
|
83
|
+
/** Current available token count. */
|
|
84
|
+
get availableTokens(): number;
|
|
85
|
+
/** Current tokens-per-minute budget (may be reduced after 429). */
|
|
86
|
+
get currentTokensPerMinute(): number;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Get or create the global rate limiter singleton.
|
|
90
|
+
*
|
|
91
|
+
* @param config - Optional configuration overrides (only used on first call).
|
|
92
|
+
* @returns The shared TokenBucket instance.
|
|
93
|
+
*/
|
|
94
|
+
export declare function getGlobalRateLimiter(config?: Partial<RateLimiterConfig>): TokenBucket;
|
|
95
|
+
/**
|
|
96
|
+
* Reset the global rate limiter (for testing).
|
|
97
|
+
*
|
|
98
|
+
* Clears the singleton so the next call to {@link getGlobalRateLimiter}
|
|
99
|
+
* creates a fresh instance.
|
|
100
|
+
*/
|
|
101
|
+
export declare function resetGlobalRateLimiter(): void;
|