@compilr-dev/agents 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1277 -0
- package/dist/agent.d.ts +1272 -0
- package/dist/agent.js +1912 -0
- package/dist/anchors/builtin.d.ts +24 -0
- package/dist/anchors/builtin.js +61 -0
- package/dist/anchors/index.d.ts +6 -0
- package/dist/anchors/index.js +5 -0
- package/dist/anchors/manager.d.ts +115 -0
- package/dist/anchors/manager.js +412 -0
- package/dist/anchors/types.d.ts +168 -0
- package/dist/anchors/types.js +10 -0
- package/dist/context/index.d.ts +12 -0
- package/dist/context/index.js +10 -0
- package/dist/context/manager.d.ts +224 -0
- package/dist/context/manager.js +770 -0
- package/dist/context/types.d.ts +377 -0
- package/dist/context/types.js +7 -0
- package/dist/costs/index.d.ts +8 -0
- package/dist/costs/index.js +7 -0
- package/dist/costs/tracker.d.ts +121 -0
- package/dist/costs/tracker.js +295 -0
- package/dist/costs/types.d.ts +157 -0
- package/dist/costs/types.js +8 -0
- package/dist/errors.d.ts +178 -0
- package/dist/errors.js +249 -0
- package/dist/guardrails/builtin.d.ts +27 -0
- package/dist/guardrails/builtin.js +223 -0
- package/dist/guardrails/index.d.ts +6 -0
- package/dist/guardrails/index.js +5 -0
- package/dist/guardrails/manager.d.ts +117 -0
- package/dist/guardrails/manager.js +288 -0
- package/dist/guardrails/types.d.ts +159 -0
- package/dist/guardrails/types.js +7 -0
- package/dist/hooks/index.d.ts +31 -0
- package/dist/hooks/index.js +29 -0
- package/dist/hooks/manager.d.ts +147 -0
- package/dist/hooks/manager.js +600 -0
- package/dist/hooks/types.d.ts +368 -0
- package/dist/hooks/types.js +12 -0
- package/dist/index.d.ts +45 -0
- package/dist/index.js +73 -0
- package/dist/mcp/client.d.ts +93 -0
- package/dist/mcp/client.js +287 -0
- package/dist/mcp/errors.d.ts +60 -0
- package/dist/mcp/errors.js +78 -0
- package/dist/mcp/index.d.ts +43 -0
- package/dist/mcp/index.js +45 -0
- package/dist/mcp/manager.d.ts +120 -0
- package/dist/mcp/manager.js +276 -0
- package/dist/mcp/tools.d.ts +54 -0
- package/dist/mcp/tools.js +99 -0
- package/dist/mcp/types.d.ts +150 -0
- package/dist/mcp/types.js +40 -0
- package/dist/memory/index.d.ts +8 -0
- package/dist/memory/index.js +7 -0
- package/dist/memory/loader.d.ts +114 -0
- package/dist/memory/loader.js +463 -0
- package/dist/memory/types.d.ts +182 -0
- package/dist/memory/types.js +8 -0
- package/dist/messages/index.d.ts +82 -0
- package/dist/messages/index.js +155 -0
- package/dist/permissions/index.d.ts +5 -0
- package/dist/permissions/index.js +4 -0
- package/dist/permissions/manager.d.ts +125 -0
- package/dist/permissions/manager.js +379 -0
- package/dist/permissions/types.d.ts +162 -0
- package/dist/permissions/types.js +7 -0
- package/dist/providers/claude.d.ts +90 -0
- package/dist/providers/claude.js +348 -0
- package/dist/providers/index.d.ts +8 -0
- package/dist/providers/index.js +11 -0
- package/dist/providers/mock.d.ts +133 -0
- package/dist/providers/mock.js +204 -0
- package/dist/providers/types.d.ts +168 -0
- package/dist/providers/types.js +4 -0
- package/dist/rate-limit/index.d.ts +45 -0
- package/dist/rate-limit/index.js +47 -0
- package/dist/rate-limit/limiter.d.ts +104 -0
- package/dist/rate-limit/limiter.js +326 -0
- package/dist/rate-limit/provider-wrapper.d.ts +112 -0
- package/dist/rate-limit/provider-wrapper.js +201 -0
- package/dist/rate-limit/retry.d.ts +108 -0
- package/dist/rate-limit/retry.js +287 -0
- package/dist/rate-limit/types.d.ts +181 -0
- package/dist/rate-limit/types.js +22 -0
- package/dist/rehearsal/file-analyzer.d.ts +22 -0
- package/dist/rehearsal/file-analyzer.js +351 -0
- package/dist/rehearsal/git-analyzer.d.ts +22 -0
- package/dist/rehearsal/git-analyzer.js +472 -0
- package/dist/rehearsal/index.d.ts +35 -0
- package/dist/rehearsal/index.js +36 -0
- package/dist/rehearsal/manager.d.ts +100 -0
- package/dist/rehearsal/manager.js +290 -0
- package/dist/rehearsal/types.d.ts +235 -0
- package/dist/rehearsal/types.js +8 -0
- package/dist/skills/index.d.ts +160 -0
- package/dist/skills/index.js +282 -0
- package/dist/state/agent-state.d.ts +41 -0
- package/dist/state/agent-state.js +88 -0
- package/dist/state/checkpointer.d.ts +110 -0
- package/dist/state/checkpointer.js +362 -0
- package/dist/state/errors.d.ts +66 -0
- package/dist/state/errors.js +88 -0
- package/dist/state/index.d.ts +35 -0
- package/dist/state/index.js +37 -0
- package/dist/state/serializer.d.ts +55 -0
- package/dist/state/serializer.js +172 -0
- package/dist/state/types.d.ts +312 -0
- package/dist/state/types.js +14 -0
- package/dist/tools/builtin/bash-output.d.ts +61 -0
- package/dist/tools/builtin/bash-output.js +90 -0
- package/dist/tools/builtin/bash.d.ts +150 -0
- package/dist/tools/builtin/bash.js +354 -0
- package/dist/tools/builtin/edit.d.ts +50 -0
- package/dist/tools/builtin/edit.js +215 -0
- package/dist/tools/builtin/glob.d.ts +62 -0
- package/dist/tools/builtin/glob.js +244 -0
- package/dist/tools/builtin/grep.d.ts +74 -0
- package/dist/tools/builtin/grep.js +363 -0
- package/dist/tools/builtin/index.d.ts +44 -0
- package/dist/tools/builtin/index.js +69 -0
- package/dist/tools/builtin/kill-shell.d.ts +44 -0
- package/dist/tools/builtin/kill-shell.js +80 -0
- package/dist/tools/builtin/read-file.d.ts +57 -0
- package/dist/tools/builtin/read-file.js +184 -0
- package/dist/tools/builtin/shell-manager.d.ts +176 -0
- package/dist/tools/builtin/shell-manager.js +337 -0
- package/dist/tools/builtin/task.d.ts +202 -0
- package/dist/tools/builtin/task.js +350 -0
- package/dist/tools/builtin/todo.d.ts +207 -0
- package/dist/tools/builtin/todo.js +453 -0
- package/dist/tools/builtin/utils.d.ts +27 -0
- package/dist/tools/builtin/utils.js +70 -0
- package/dist/tools/builtin/web-fetch.d.ts +96 -0
- package/dist/tools/builtin/web-fetch.js +290 -0
- package/dist/tools/builtin/write-file.d.ts +54 -0
- package/dist/tools/builtin/write-file.js +147 -0
- package/dist/tools/define.d.ts +60 -0
- package/dist/tools/define.js +65 -0
- package/dist/tools/index.d.ts +10 -0
- package/dist/tools/index.js +37 -0
- package/dist/tools/registry.d.ts +79 -0
- package/dist/tools/registry.js +151 -0
- package/dist/tools/types.d.ts +59 -0
- package/dist/tools/types.js +4 -0
- package/dist/tracing/hooks.d.ts +58 -0
- package/dist/tracing/hooks.js +377 -0
- package/dist/tracing/index.d.ts +51 -0
- package/dist/tracing/index.js +55 -0
- package/dist/tracing/logging.d.ts +78 -0
- package/dist/tracing/logging.js +310 -0
- package/dist/tracing/manager.d.ts +160 -0
- package/dist/tracing/manager.js +468 -0
- package/dist/tracing/otel.d.ts +102 -0
- package/dist/tracing/otel.js +246 -0
- package/dist/tracing/types.d.ts +346 -0
- package/dist/tracing/types.js +38 -0
- package/dist/utils/index.d.ts +23 -0
- package/dist/utils/index.js +44 -0
- package/package.json +79 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core types for LLM providers
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Message roles in a conversation
|
|
6
|
+
*/
|
|
7
|
+
export type MessageRole = 'user' | 'assistant' | 'system';
|
|
8
|
+
/**
|
|
9
|
+
* Content block types
|
|
10
|
+
*/
|
|
11
|
+
export type ContentBlockType = 'text' | 'tool_use' | 'tool_result' | 'thinking';
|
|
12
|
+
/**
|
|
13
|
+
* Text content block
|
|
14
|
+
*/
|
|
15
|
+
export interface TextBlock {
|
|
16
|
+
type: 'text';
|
|
17
|
+
text: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Tool use content block (AI wants to call a tool)
|
|
21
|
+
*/
|
|
22
|
+
export interface ToolUseBlock {
|
|
23
|
+
type: 'tool_use';
|
|
24
|
+
id: string;
|
|
25
|
+
name: string;
|
|
26
|
+
input: Record<string, unknown>;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Tool result content block (result of a tool call)
|
|
30
|
+
*/
|
|
31
|
+
export interface ToolResultBlock {
|
|
32
|
+
type: 'tool_result';
|
|
33
|
+
toolUseId: string;
|
|
34
|
+
content: string;
|
|
35
|
+
isError?: boolean;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Thinking content block (Claude's reasoning process)
|
|
39
|
+
*/
|
|
40
|
+
export interface ThinkingBlock {
|
|
41
|
+
type: 'thinking';
|
|
42
|
+
thinking: string;
|
|
43
|
+
/**
|
|
44
|
+
* Encrypted signature for verification when passing back to API
|
|
45
|
+
*/
|
|
46
|
+
signature?: string;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Union of all content block types
|
|
50
|
+
*/
|
|
51
|
+
export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock | ThinkingBlock;
|
|
52
|
+
/**
|
|
53
|
+
* A message in a conversation
|
|
54
|
+
*/
|
|
55
|
+
export interface Message {
|
|
56
|
+
role: MessageRole;
|
|
57
|
+
content: string | ContentBlock[];
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Token usage from an LLM response (returned on 'done' chunks)
|
|
61
|
+
*/
|
|
62
|
+
export interface LLMUsage {
|
|
63
|
+
inputTokens: number;
|
|
64
|
+
outputTokens: number;
|
|
65
|
+
cacheReadTokens?: number;
|
|
66
|
+
cacheCreationTokens?: number;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Streaming chunk types
|
|
70
|
+
*/
|
|
71
|
+
export interface StreamChunk {
|
|
72
|
+
type: 'text' | 'tool_use_start' | 'tool_use_delta' | 'tool_use_end' | 'thinking_start' | 'thinking_delta' | 'thinking_end' | 'done';
|
|
73
|
+
text?: string;
|
|
74
|
+
toolUse?: {
|
|
75
|
+
id: string;
|
|
76
|
+
name: string;
|
|
77
|
+
input?: Record<string, unknown>;
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* Thinking block data (for thinking_start/thinking_end)
|
|
81
|
+
*/
|
|
82
|
+
thinking?: {
|
|
83
|
+
thinking?: string;
|
|
84
|
+
signature?: string;
|
|
85
|
+
};
|
|
86
|
+
/**
|
|
87
|
+
* Token usage (only present on 'done' chunks)
|
|
88
|
+
*/
|
|
89
|
+
usage?: LLMUsage;
|
|
90
|
+
/**
|
|
91
|
+
* Model that generated this response (only present on 'done' chunks)
|
|
92
|
+
*/
|
|
93
|
+
model?: string;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Extended thinking configuration
|
|
97
|
+
*
|
|
98
|
+
* @see https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
|
|
99
|
+
*/
|
|
100
|
+
export interface ThinkingConfig {
|
|
101
|
+
/**
|
|
102
|
+
* Enable or disable extended thinking
|
|
103
|
+
*/
|
|
104
|
+
type: 'enabled' | 'disabled';
|
|
105
|
+
/**
|
|
106
|
+
* Token budget for thinking (minimum 1024, must be less than maxTokens)
|
|
107
|
+
*/
|
|
108
|
+
budgetTokens: number;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Options for chat requests
|
|
112
|
+
*/
|
|
113
|
+
export interface ChatOptions {
|
|
114
|
+
model?: string;
|
|
115
|
+
maxTokens?: number;
|
|
116
|
+
temperature?: number;
|
|
117
|
+
stopSequences?: string[];
|
|
118
|
+
tools?: ToolDefinition[];
|
|
119
|
+
/**
|
|
120
|
+
* Extended thinking configuration (Claude-specific)
|
|
121
|
+
*
|
|
122
|
+
* When enabled, Claude will show its reasoning process before
|
|
123
|
+
* providing the final response. Requires budget_tokens >= 1024.
|
|
124
|
+
*
|
|
125
|
+
* @example
|
|
126
|
+
* ```typescript
|
|
127
|
+
* thinking: { type: 'enabled', budgetTokens: 10000 }
|
|
128
|
+
* ```
|
|
129
|
+
*/
|
|
130
|
+
thinking?: ThinkingConfig;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Tool definition for the LLM
|
|
134
|
+
*/
|
|
135
|
+
export interface ToolDefinition {
|
|
136
|
+
name: string;
|
|
137
|
+
description: string;
|
|
138
|
+
inputSchema: {
|
|
139
|
+
type: 'object';
|
|
140
|
+
properties: Record<string, unknown>;
|
|
141
|
+
required?: string[];
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Result of a tool execution
|
|
146
|
+
*/
|
|
147
|
+
export interface ToolResult {
|
|
148
|
+
toolUseId: string;
|
|
149
|
+
result: unknown;
|
|
150
|
+
isError?: boolean;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* LLM Provider interface - all providers must implement this
|
|
154
|
+
*/
|
|
155
|
+
export interface LLMProvider {
|
|
156
|
+
/**
|
|
157
|
+
* Provider name (e.g., 'claude', 'openai', 'gemini')
|
|
158
|
+
*/
|
|
159
|
+
readonly name: string;
|
|
160
|
+
/**
|
|
161
|
+
* Send messages and get a streaming response
|
|
162
|
+
*/
|
|
163
|
+
chat(messages: Message[], options?: ChatOptions): AsyncIterable<StreamChunk>;
|
|
164
|
+
/**
|
|
165
|
+
* Count tokens in messages (optional, provider-specific)
|
|
166
|
+
*/
|
|
167
|
+
countTokens?(messages: Message[]): Promise<number>;
|
|
168
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate Limiting & Automatic Retry
|
|
3
|
+
*
|
|
4
|
+
* This module provides:
|
|
5
|
+
* - Token bucket rate limiter for API calls
|
|
6
|
+
* - Automatic retry with exponential backoff
|
|
7
|
+
* - Provider wrapper combining both features
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import {
|
|
12
|
+
* createRateLimiter,
|
|
13
|
+
* withRetry,
|
|
14
|
+
* wrapWithRateLimit,
|
|
15
|
+
* RetryPresets,
|
|
16
|
+
* ProviderRateLimits,
|
|
17
|
+
* } from '@compilr-dev/agents';
|
|
18
|
+
*
|
|
19
|
+
* // Use rate limiter directly
|
|
20
|
+
* const limiter = createRateLimiter({
|
|
21
|
+
* requestsPerMinute: 60,
|
|
22
|
+
* tokensPerMinute: 100000,
|
|
23
|
+
* });
|
|
24
|
+
*
|
|
25
|
+
* // Wrap existing provider
|
|
26
|
+
* const provider = createClaudeProvider();
|
|
27
|
+
* const rateLimited = wrapWithRateLimit(provider, {
|
|
28
|
+
* rateLimit: ProviderRateLimits.claude.tier1,
|
|
29
|
+
* retry: RetryPresets.respectful(),
|
|
30
|
+
* });
|
|
31
|
+
*
|
|
32
|
+
* // Use with retry only
|
|
33
|
+
* const result = await withRetry(
|
|
34
|
+
* () => someApiCall(),
|
|
35
|
+
* { maxRetries: 3, baseDelayMs: 1000 }
|
|
36
|
+
* );
|
|
37
|
+
* ```
|
|
38
|
+
*
|
|
39
|
+
* @module rate-limit
|
|
40
|
+
*/
|
|
41
|
+
export type { RateLimiterConfig, RateLimiterStats, RetryConfig, RateLimitRetryConfig, AcquireResult, RateLimiter, } from './types.js';
|
|
42
|
+
export { RateLimitExceededError, isRateLimitExceededError } from './types.js';
|
|
43
|
+
export { TokenBucketRateLimiter, createRateLimiter, createNoopRateLimiter } from './limiter.js';
|
|
44
|
+
export { withRetry, createRetryWithRateLimit, RetryPresets, RetryStats } from './retry.js';
|
|
45
|
+
export { RateLimitedProvider, wrapWithRateLimit, ProviderRateLimits } from './provider-wrapper.js';
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate Limiting & Automatic Retry
|
|
3
|
+
*
|
|
4
|
+
* This module provides:
|
|
5
|
+
* - Token bucket rate limiter for API calls
|
|
6
|
+
* - Automatic retry with exponential backoff
|
|
7
|
+
* - Provider wrapper combining both features
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import {
|
|
12
|
+
* createRateLimiter,
|
|
13
|
+
* withRetry,
|
|
14
|
+
* wrapWithRateLimit,
|
|
15
|
+
* RetryPresets,
|
|
16
|
+
* ProviderRateLimits,
|
|
17
|
+
* } from '@compilr-dev/agents';
|
|
18
|
+
*
|
|
19
|
+
* // Use rate limiter directly
|
|
20
|
+
* const limiter = createRateLimiter({
|
|
21
|
+
* requestsPerMinute: 60,
|
|
22
|
+
* tokensPerMinute: 100000,
|
|
23
|
+
* });
|
|
24
|
+
*
|
|
25
|
+
* // Wrap existing provider
|
|
26
|
+
* const provider = createClaudeProvider();
|
|
27
|
+
* const rateLimited = wrapWithRateLimit(provider, {
|
|
28
|
+
* rateLimit: ProviderRateLimits.claude.tier1,
|
|
29
|
+
* retry: RetryPresets.respectful(),
|
|
30
|
+
* });
|
|
31
|
+
*
|
|
32
|
+
* // Use with retry only
|
|
33
|
+
* const result = await withRetry(
|
|
34
|
+
* () => someApiCall(),
|
|
35
|
+
* { maxRetries: 3, baseDelayMs: 1000 }
|
|
36
|
+
* );
|
|
37
|
+
* ```
|
|
38
|
+
*
|
|
39
|
+
* @module rate-limit
|
|
40
|
+
*/
|
|
41
|
+
export { RateLimitExceededError, isRateLimitExceededError } from './types.js';
|
|
42
|
+
// Rate Limiter
|
|
43
|
+
export { TokenBucketRateLimiter, createRateLimiter, createNoopRateLimiter } from './limiter.js';
|
|
44
|
+
// Retry
|
|
45
|
+
export { withRetry, createRetryWithRateLimit, RetryPresets, RetryStats } from './retry.js';
|
|
46
|
+
// Provider Wrapper
|
|
47
|
+
export { RateLimitedProvider, wrapWithRateLimit, ProviderRateLimits } from './provider-wrapper.js';
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Bucket Rate Limiter
|
|
3
|
+
*
|
|
4
|
+
* Implements a token bucket algorithm for rate limiting API requests.
|
|
5
|
+
* Supports both request-based and token-based limits.
|
|
6
|
+
*/
|
|
7
|
+
import type { RateLimiterConfig, RateLimiterStats, RateLimiter, AcquireResult } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Token bucket rate limiter implementation
|
|
10
|
+
*
|
|
11
|
+
* Uses the token bucket algorithm:
|
|
12
|
+
* - Tokens are added at a constant rate (refill rate)
|
|
13
|
+
* - Each request consumes tokens
|
|
14
|
+
* - If insufficient tokens, request waits or fails
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* const limiter = new TokenBucketRateLimiter({
|
|
19
|
+
* requestsPerMinute: 60,
|
|
20
|
+
* tokensPerMinute: 100000,
|
|
21
|
+
* maxConcurrent: 5,
|
|
22
|
+
* });
|
|
23
|
+
*
|
|
24
|
+
* // Before making a request
|
|
25
|
+
* await limiter.acquire(1000); // estimated tokens
|
|
26
|
+
*
|
|
27
|
+
* try {
|
|
28
|
+
* const result = await makeRequest();
|
|
29
|
+
* limiter.reportUsage(actualTokens);
|
|
30
|
+
* } finally {
|
|
31
|
+
* limiter.release();
|
|
32
|
+
* }
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
export declare class TokenBucketRateLimiter implements RateLimiter {
|
|
36
|
+
private readonly config;
|
|
37
|
+
private requestTokens;
|
|
38
|
+
private lastRequestRefill;
|
|
39
|
+
private llmTokens;
|
|
40
|
+
private lastTokenRefill;
|
|
41
|
+
private concurrent;
|
|
42
|
+
private totalRequests;
|
|
43
|
+
private totalTokens;
|
|
44
|
+
private rateLimitHits;
|
|
45
|
+
private totalWaitTimeMs;
|
|
46
|
+
private waiters;
|
|
47
|
+
constructor(config?: RateLimiterConfig);
|
|
48
|
+
/**
|
|
49
|
+
* Acquire permission to make a request
|
|
50
|
+
*/
|
|
51
|
+
acquire(estimatedTokens?: number): Promise<AcquireResult>;
|
|
52
|
+
/**
|
|
53
|
+
* Release a concurrent request slot
|
|
54
|
+
*/
|
|
55
|
+
release(): void;
|
|
56
|
+
/**
|
|
57
|
+
* Report actual token usage
|
|
58
|
+
*/
|
|
59
|
+
reportUsage(tokens: number): void;
|
|
60
|
+
/**
|
|
61
|
+
* Get current statistics
|
|
62
|
+
*/
|
|
63
|
+
getStats(): RateLimiterStats;
|
|
64
|
+
/**
|
|
65
|
+
* Reset the rate limiter
|
|
66
|
+
*/
|
|
67
|
+
reset(): void;
|
|
68
|
+
/**
|
|
69
|
+
* Check if a request can be made immediately
|
|
70
|
+
*/
|
|
71
|
+
canAcquire(estimatedTokens?: number): boolean;
|
|
72
|
+
/**
|
|
73
|
+
* Refill token buckets based on elapsed time
|
|
74
|
+
*/
|
|
75
|
+
private refill;
|
|
76
|
+
/**
|
|
77
|
+
* Check if can acquire without refilling
|
|
78
|
+
*/
|
|
79
|
+
private canAcquireNow;
|
|
80
|
+
/**
|
|
81
|
+
* Actually acquire the tokens
|
|
82
|
+
*/
|
|
83
|
+
private doAcquire;
|
|
84
|
+
/**
|
|
85
|
+
* Estimate wait time until tokens are available
|
|
86
|
+
*/
|
|
87
|
+
private estimateWaitTime;
|
|
88
|
+
/**
|
|
89
|
+
* Wait and try to acquire
|
|
90
|
+
*/
|
|
91
|
+
private waitAndAcquire;
|
|
92
|
+
/**
|
|
93
|
+
* Process waiting requests
|
|
94
|
+
*/
|
|
95
|
+
private processWaiters;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Create a rate limiter with the given configuration
|
|
99
|
+
*/
|
|
100
|
+
export declare function createRateLimiter(config?: RateLimiterConfig): RateLimiter;
|
|
101
|
+
/**
|
|
102
|
+
* Create a no-op rate limiter (for testing or unlimited access)
|
|
103
|
+
*/
|
|
104
|
+
export declare function createNoopRateLimiter(): RateLimiter;
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Bucket Rate Limiter
|
|
3
|
+
*
|
|
4
|
+
* Implements a token bucket algorithm for rate limiting API requests.
|
|
5
|
+
* Supports both request-based and token-based limits.
|
|
6
|
+
*/
|
|
7
|
+
import { RateLimitExceededError } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Default configuration values
|
|
10
|
+
*/
|
|
11
|
+
const DEFAULTS = {
|
|
12
|
+
requestsPerMinute: 60,
|
|
13
|
+
tokensPerMinute: 0,
|
|
14
|
+
maxConcurrent: 0,
|
|
15
|
+
throwOnLimit: false,
|
|
16
|
+
};
|
|
17
|
+
/**
|
|
18
|
+
* Token bucket rate limiter implementation
|
|
19
|
+
*
|
|
20
|
+
* Uses the token bucket algorithm:
|
|
21
|
+
* - Tokens are added at a constant rate (refill rate)
|
|
22
|
+
* - Each request consumes tokens
|
|
23
|
+
* - If insufficient tokens, request waits or fails
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* ```typescript
|
|
27
|
+
* const limiter = new TokenBucketRateLimiter({
|
|
28
|
+
* requestsPerMinute: 60,
|
|
29
|
+
* tokensPerMinute: 100000,
|
|
30
|
+
* maxConcurrent: 5,
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* // Before making a request
|
|
34
|
+
* await limiter.acquire(1000); // estimated tokens
|
|
35
|
+
*
|
|
36
|
+
* try {
|
|
37
|
+
* const result = await makeRequest();
|
|
38
|
+
* limiter.reportUsage(actualTokens);
|
|
39
|
+
* } finally {
|
|
40
|
+
* limiter.release();
|
|
41
|
+
* }
|
|
42
|
+
* ```
|
|
43
|
+
*/
|
|
44
|
+
export class TokenBucketRateLimiter {
|
|
45
|
+
config;
|
|
46
|
+
// Request bucket
|
|
47
|
+
requestTokens;
|
|
48
|
+
lastRequestRefill;
|
|
49
|
+
// LLM token bucket
|
|
50
|
+
llmTokens;
|
|
51
|
+
lastTokenRefill;
|
|
52
|
+
// Concurrent request tracking
|
|
53
|
+
concurrent = 0;
|
|
54
|
+
// Statistics
|
|
55
|
+
totalRequests = 0;
|
|
56
|
+
totalTokens = 0;
|
|
57
|
+
rateLimitHits = 0;
|
|
58
|
+
totalWaitTimeMs = 0;
|
|
59
|
+
// Pending waiters queue
|
|
60
|
+
waiters = [];
|
|
61
|
+
constructor(config = {}) {
|
|
62
|
+
this.config = {
|
|
63
|
+
requestsPerMinute: config.requestsPerMinute ?? DEFAULTS.requestsPerMinute,
|
|
64
|
+
tokensPerMinute: config.tokensPerMinute ?? DEFAULTS.tokensPerMinute,
|
|
65
|
+
maxConcurrent: config.maxConcurrent ?? DEFAULTS.maxConcurrent,
|
|
66
|
+
throwOnLimit: config.throwOnLimit ?? DEFAULTS.throwOnLimit,
|
|
67
|
+
};
|
|
68
|
+
// Initialize buckets to full capacity
|
|
69
|
+
this.requestTokens = this.config.requestsPerMinute;
|
|
70
|
+
this.llmTokens = this.config.tokensPerMinute;
|
|
71
|
+
this.lastRequestRefill = Date.now();
|
|
72
|
+
this.lastTokenRefill = Date.now();
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Acquire permission to make a request
|
|
76
|
+
*/
|
|
77
|
+
async acquire(estimatedTokens = 0) {
|
|
78
|
+
const startTime = Date.now();
|
|
79
|
+
// Refill buckets
|
|
80
|
+
this.refill();
|
|
81
|
+
// Check if we can acquire immediately
|
|
82
|
+
if (this.canAcquireNow(estimatedTokens)) {
|
|
83
|
+
return this.doAcquire(estimatedTokens, startTime);
|
|
84
|
+
}
|
|
85
|
+
// Calculate wait time
|
|
86
|
+
const waitMs = this.estimateWaitTime(estimatedTokens);
|
|
87
|
+
// If throwOnLimit is true, throw immediately
|
|
88
|
+
if (this.config.throwOnLimit) {
|
|
89
|
+
this.rateLimitHits++;
|
|
90
|
+
throw new RateLimitExceededError(`Rate limit exceeded. Estimated wait: ${String(waitMs)}ms`, waitMs);
|
|
91
|
+
}
|
|
92
|
+
// Wait and retry
|
|
93
|
+
return this.waitAndAcquire(estimatedTokens, startTime);
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Release a concurrent request slot
|
|
97
|
+
*/
|
|
98
|
+
release() {
|
|
99
|
+
if (this.concurrent > 0) {
|
|
100
|
+
this.concurrent--;
|
|
101
|
+
}
|
|
102
|
+
// Process waiting requests
|
|
103
|
+
this.processWaiters();
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Report actual token usage
|
|
107
|
+
*/
|
|
108
|
+
reportUsage(tokens) {
|
|
109
|
+
this.totalTokens += tokens;
|
|
110
|
+
// Deduct from token bucket if tracking
|
|
111
|
+
if (this.config.tokensPerMinute > 0) {
|
|
112
|
+
// Already deducted estimated, adjust for actual
|
|
113
|
+
// This is a simplified approach - actual might be different from estimated
|
|
114
|
+
this.llmTokens = Math.max(0, this.llmTokens);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Get current statistics
|
|
119
|
+
*/
|
|
120
|
+
getStats() {
|
|
121
|
+
this.refill();
|
|
122
|
+
return {
|
|
123
|
+
availableRequests: Math.floor(this.requestTokens),
|
|
124
|
+
availableTokens: Math.floor(this.llmTokens),
|
|
125
|
+
currentConcurrent: this.concurrent,
|
|
126
|
+
totalRequests: this.totalRequests,
|
|
127
|
+
totalTokens: this.totalTokens,
|
|
128
|
+
rateLimitHits: this.rateLimitHits,
|
|
129
|
+
totalWaitTimeMs: this.totalWaitTimeMs,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Reset the rate limiter
|
|
134
|
+
*/
|
|
135
|
+
reset() {
|
|
136
|
+
this.requestTokens = this.config.requestsPerMinute;
|
|
137
|
+
this.llmTokens = this.config.tokensPerMinute;
|
|
138
|
+
this.lastRequestRefill = Date.now();
|
|
139
|
+
this.lastTokenRefill = Date.now();
|
|
140
|
+
this.concurrent = 0;
|
|
141
|
+
this.totalRequests = 0;
|
|
142
|
+
this.totalTokens = 0;
|
|
143
|
+
this.rateLimitHits = 0;
|
|
144
|
+
this.totalWaitTimeMs = 0;
|
|
145
|
+
// Reject all waiters
|
|
146
|
+
for (const waiter of this.waiters) {
|
|
147
|
+
waiter.resolve({
|
|
148
|
+
acquired: false,
|
|
149
|
+
waitedMs: Date.now() - waiter.startTime,
|
|
150
|
+
estimatedWaitMs: 0,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
this.waiters = [];
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Check if a request can be made immediately
|
|
157
|
+
*/
|
|
158
|
+
canAcquire(estimatedTokens = 0) {
|
|
159
|
+
this.refill();
|
|
160
|
+
return this.canAcquireNow(estimatedTokens);
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Refill token buckets based on elapsed time
|
|
164
|
+
*/
|
|
165
|
+
refill() {
|
|
166
|
+
const now = Date.now();
|
|
167
|
+
// Refill request tokens
|
|
168
|
+
const requestElapsed = now - this.lastRequestRefill;
|
|
169
|
+
const requestRefillRate = this.config.requestsPerMinute / 60000; // per ms
|
|
170
|
+
this.requestTokens = Math.min(this.config.requestsPerMinute, this.requestTokens + requestElapsed * requestRefillRate);
|
|
171
|
+
this.lastRequestRefill = now;
|
|
172
|
+
// Refill LLM tokens
|
|
173
|
+
if (this.config.tokensPerMinute > 0) {
|
|
174
|
+
const tokenElapsed = now - this.lastTokenRefill;
|
|
175
|
+
const tokenRefillRate = this.config.tokensPerMinute / 60000; // per ms
|
|
176
|
+
this.llmTokens = Math.min(this.config.tokensPerMinute, this.llmTokens + tokenElapsed * tokenRefillRate);
|
|
177
|
+
this.lastTokenRefill = now;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Check if can acquire without refilling
|
|
182
|
+
*/
|
|
183
|
+
canAcquireNow(estimatedTokens) {
|
|
184
|
+
// Check request limit
|
|
185
|
+
if (this.requestTokens < 1) {
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
// Check token limit
|
|
189
|
+
if (this.config.tokensPerMinute > 0 && this.llmTokens < estimatedTokens) {
|
|
190
|
+
return false;
|
|
191
|
+
}
|
|
192
|
+
// Check concurrent limit
|
|
193
|
+
if (this.config.maxConcurrent > 0 && this.concurrent >= this.config.maxConcurrent) {
|
|
194
|
+
return false;
|
|
195
|
+
}
|
|
196
|
+
return true;
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Actually acquire the tokens
|
|
200
|
+
*/
|
|
201
|
+
doAcquire(estimatedTokens, startTime) {
|
|
202
|
+
this.requestTokens--;
|
|
203
|
+
if (this.config.tokensPerMinute > 0) {
|
|
204
|
+
this.llmTokens -= estimatedTokens;
|
|
205
|
+
}
|
|
206
|
+
this.concurrent++;
|
|
207
|
+
this.totalRequests++;
|
|
208
|
+
const waitedMs = Date.now() - startTime;
|
|
209
|
+
this.totalWaitTimeMs += waitedMs;
|
|
210
|
+
return {
|
|
211
|
+
acquired: true,
|
|
212
|
+
waitedMs,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Estimate wait time until tokens are available
|
|
217
|
+
*/
|
|
218
|
+
estimateWaitTime(estimatedTokens) {
|
|
219
|
+
let waitMs = 0;
|
|
220
|
+
// Time to wait for request token
|
|
221
|
+
if (this.requestTokens < 1) {
|
|
222
|
+
const requestRefillRate = this.config.requestsPerMinute / 60000;
|
|
223
|
+
const requestWait = (1 - this.requestTokens) / requestRefillRate;
|
|
224
|
+
waitMs = Math.max(waitMs, requestWait);
|
|
225
|
+
}
|
|
226
|
+
// Time to wait for LLM tokens
|
|
227
|
+
if (this.config.tokensPerMinute > 0 && this.llmTokens < estimatedTokens) {
|
|
228
|
+
const tokenRefillRate = this.config.tokensPerMinute / 60000;
|
|
229
|
+
const tokenWait = (estimatedTokens - this.llmTokens) / tokenRefillRate;
|
|
230
|
+
waitMs = Math.max(waitMs, tokenWait);
|
|
231
|
+
}
|
|
232
|
+
// For concurrent limit, we can't estimate well - use a fixed delay
|
|
233
|
+
if (this.config.maxConcurrent > 0 && this.concurrent >= this.config.maxConcurrent) {
|
|
234
|
+
waitMs = Math.max(waitMs, 100); // Minimum wait for concurrent
|
|
235
|
+
}
|
|
236
|
+
return Math.ceil(waitMs);
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Wait and try to acquire
|
|
240
|
+
*/
|
|
241
|
+
waitAndAcquire(estimatedTokens, startTime) {
|
|
242
|
+
this.rateLimitHits++;
|
|
243
|
+
return new Promise((resolve) => {
|
|
244
|
+
this.waiters.push({
|
|
245
|
+
resolve,
|
|
246
|
+
estimatedTokens,
|
|
247
|
+
startTime,
|
|
248
|
+
});
|
|
249
|
+
// Set up a timer to check periodically
|
|
250
|
+
const checkInterval = setInterval(() => {
|
|
251
|
+
this.refill();
|
|
252
|
+
this.processWaiters();
|
|
253
|
+
}, 100);
|
|
254
|
+
// Timeout after 5 minutes max wait
|
|
255
|
+
const timeout = setTimeout(() => {
|
|
256
|
+
clearInterval(checkInterval);
|
|
257
|
+
const index = this.waiters.findIndex((w) => w.resolve === resolve);
|
|
258
|
+
if (index !== -1) {
|
|
259
|
+
this.waiters.splice(index, 1);
|
|
260
|
+
resolve({
|
|
261
|
+
acquired: false,
|
|
262
|
+
waitedMs: Date.now() - startTime,
|
|
263
|
+
estimatedWaitMs: this.estimateWaitTime(estimatedTokens),
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
}, 5 * 60 * 1000);
|
|
267
|
+
// Store cleanup function
|
|
268
|
+
const originalResolve = resolve;
|
|
269
|
+
const wrappedResolve = (result) => {
|
|
270
|
+
clearInterval(checkInterval);
|
|
271
|
+
clearTimeout(timeout);
|
|
272
|
+
originalResolve(result);
|
|
273
|
+
};
|
|
274
|
+
// Update the waiter with wrapped resolve
|
|
275
|
+
const waiterIndex = this.waiters.findIndex((w) => w.resolve === resolve);
|
|
276
|
+
if (waiterIndex !== -1) {
|
|
277
|
+
this.waiters[waiterIndex].resolve = wrappedResolve;
|
|
278
|
+
}
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* Process waiting requests
|
|
283
|
+
*/
|
|
284
|
+
processWaiters() {
|
|
285
|
+
// Process waiters in FIFO order
|
|
286
|
+
while (this.waiters.length > 0) {
|
|
287
|
+
const waiter = this.waiters[0];
|
|
288
|
+
this.refill();
|
|
289
|
+
if (this.canAcquireNow(waiter.estimatedTokens)) {
|
|
290
|
+
this.waiters.shift();
|
|
291
|
+
waiter.resolve(this.doAcquire(waiter.estimatedTokens, waiter.startTime));
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
// Can't process more waiters yet
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Create a rate limiter with the given configuration
|
|
302
|
+
*/
|
|
303
|
+
export function createRateLimiter(config) {
|
|
304
|
+
return new TokenBucketRateLimiter(config);
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Create a no-op rate limiter (for testing or unlimited access)
|
|
308
|
+
*/
|
|
309
|
+
export function createNoopRateLimiter() {
|
|
310
|
+
return {
|
|
311
|
+
acquire: () => Promise.resolve({ acquired: true, waitedMs: 0 }),
|
|
312
|
+
release: () => { },
|
|
313
|
+
reportUsage: () => { },
|
|
314
|
+
getStats: () => ({
|
|
315
|
+
availableRequests: Infinity,
|
|
316
|
+
availableTokens: Infinity,
|
|
317
|
+
currentConcurrent: 0,
|
|
318
|
+
totalRequests: 0,
|
|
319
|
+
totalTokens: 0,
|
|
320
|
+
rateLimitHits: 0,
|
|
321
|
+
totalWaitTimeMs: 0,
|
|
322
|
+
}),
|
|
323
|
+
reset: () => { },
|
|
324
|
+
canAcquire: () => true,
|
|
325
|
+
};
|
|
326
|
+
}
|