@reactive-agents/llm-provider 0.7.8 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +545 -14
- package/dist/index.js +979 -169
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -3,6 +3,27 @@ import * as effect_Cause from 'effect/Cause';
|
|
|
3
3
|
import * as effect_Types from 'effect/Types';
|
|
4
4
|
import * as effect_Duration from 'effect/Duration';
|
|
5
5
|
|
|
6
|
+
/**
|
|
7
|
+
* ProviderCapabilities — static capability declaration for each LLM provider.
|
|
8
|
+
*
|
|
9
|
+
* These are coarse-grained, model-agnostic flags that reflect what the
|
|
10
|
+
* provider's API reliably supports. They let the framework choose between
|
|
11
|
+
* native function calling and structured-output fallback paths without
|
|
12
|
+
* querying the provider at runtime.
|
|
13
|
+
*/
|
|
14
|
+
interface ProviderCapabilities {
|
|
15
|
+
/** Provider supports native function / tool calling (structured tool_use). */
|
|
16
|
+
readonly supportsToolCalling: boolean;
|
|
17
|
+
/** Provider supports streaming completions. */
|
|
18
|
+
readonly supportsStreaming: boolean;
|
|
19
|
+
/** Provider supports structured / JSON output modes natively. */
|
|
20
|
+
readonly supportsStructuredOutput: boolean;
|
|
21
|
+
/** Provider can return per-token log probabilities. */
|
|
22
|
+
readonly supportsLogprobs: boolean;
|
|
23
|
+
}
|
|
24
|
+
/** Safe defaults — assumes minimal capabilities for unknown providers. */
|
|
25
|
+
declare const DEFAULT_CAPABILITIES: ProviderCapabilities;
|
|
26
|
+
|
|
6
27
|
/**
|
|
7
28
|
* Schema for LLM provider selection.
|
|
8
29
|
* Supported providers: anthropic, openai, ollama, gemini, litellm, custom.
|
|
@@ -482,6 +503,8 @@ type LLMMessage = {
|
|
|
482
503
|
readonly role: "tool";
|
|
483
504
|
/** Tool call ID this result corresponds to */
|
|
484
505
|
readonly toolCallId: string;
|
|
506
|
+
/** Name of the tool that produced this result */
|
|
507
|
+
readonly toolName?: string;
|
|
485
508
|
/** Plain text result/output */
|
|
486
509
|
readonly content: string;
|
|
487
510
|
};
|
|
@@ -499,6 +522,10 @@ type LLMMessage = {
|
|
|
499
522
|
* };
|
|
500
523
|
* ```
|
|
501
524
|
*/
|
|
525
|
+
/**
|
|
526
|
+
* Schema for token usage statistics from an LLM response.
|
|
527
|
+
* Used for cost tracking, budget enforcement, and observability.
|
|
528
|
+
*/
|
|
502
529
|
declare const TokenUsageSchema: Schema.Struct<{
|
|
503
530
|
/** Tokens consumed by the input (messages + system prompt) */
|
|
504
531
|
inputTokens: typeof Schema.Number;
|
|
@@ -590,6 +617,30 @@ declare const ToolCallSchema: Schema.Struct<{
|
|
|
590
617
|
* When the model decides to call a tool, this describes which tool and with what inputs.
|
|
591
618
|
*/
|
|
592
619
|
type ToolCall = Schema.Schema.Type<typeof ToolCallSchema>;
|
|
620
|
+
/**
|
|
621
|
+
* Log probability information for a single token.
|
|
622
|
+
* Returned by providers that support logprobs (OpenAI, Ollama).
|
|
623
|
+
*
|
|
624
|
+
* @example
|
|
625
|
+
* ```typescript
|
|
626
|
+
* const logprob: TokenLogprob = {
|
|
627
|
+
* token: "Paris",
|
|
628
|
+
* logprob: -0.0234,
|
|
629
|
+
* topLogprobs: [
|
|
630
|
+
* { token: "Paris", logprob: -0.0234 },
|
|
631
|
+
* { token: "London", logprob: -3.89 },
|
|
632
|
+
* ]
|
|
633
|
+
* };
|
|
634
|
+
* ```
|
|
635
|
+
*/
|
|
636
|
+
type TokenLogprob = {
|
|
637
|
+
readonly token: string;
|
|
638
|
+
readonly logprob: number;
|
|
639
|
+
readonly topLogprobs?: readonly {
|
|
640
|
+
token: string;
|
|
641
|
+
logprob: number;
|
|
642
|
+
}[];
|
|
643
|
+
};
|
|
593
644
|
/**
|
|
594
645
|
* Request to the LLM for a completion.
|
|
595
646
|
* Includes messages, model configuration, tool definitions, and sampling parameters.
|
|
@@ -630,6 +681,10 @@ type CompletionRequest = {
|
|
|
630
681
|
readonly tools?: readonly ToolDefinition[];
|
|
631
682
|
/** System prompt (optional, prepended to user messages) */
|
|
632
683
|
readonly systemPrompt?: string;
|
|
684
|
+
/** Request log probabilities for each output token (optional) */
|
|
685
|
+
readonly logprobs?: boolean;
|
|
686
|
+
/** Number of most likely tokens to return log probabilities for (optional, 1-20) */
|
|
687
|
+
readonly topLogprobs?: number;
|
|
633
688
|
};
|
|
634
689
|
/**
|
|
635
690
|
* Schema for LLM response.
|
|
@@ -675,6 +730,15 @@ declare const CompletionResponseSchema: Schema.Struct<{
|
|
|
675
730
|
}>>>;
|
|
676
731
|
/** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
|
|
677
732
|
thinking: Schema.optional<typeof Schema.String>;
|
|
733
|
+
/** Token-level log probabilities (when requested via logprobs in CompletionRequest) */
|
|
734
|
+
logprobs: Schema.optional<Schema.Array$<Schema.Struct<{
|
|
735
|
+
token: typeof Schema.String;
|
|
736
|
+
logprob: typeof Schema.Number;
|
|
737
|
+
topLogprobs: Schema.optional<Schema.Array$<Schema.Struct<{
|
|
738
|
+
token: typeof Schema.String;
|
|
739
|
+
logprob: typeof Schema.Number;
|
|
740
|
+
}>>>;
|
|
741
|
+
}>>>;
|
|
678
742
|
}>;
|
|
679
743
|
/**
|
|
680
744
|
* LLM response to a completion request.
|
|
@@ -729,6 +793,11 @@ type StreamEvent = {
|
|
|
729
793
|
readonly type: "usage";
|
|
730
794
|
/** Final token usage for the request */
|
|
731
795
|
readonly usage: TokenUsage;
|
|
796
|
+
} | {
|
|
797
|
+
/** Token-level log probabilities (accumulated over the full response) */
|
|
798
|
+
readonly type: "logprobs";
|
|
799
|
+
/** Per-token logprob data */
|
|
800
|
+
readonly logprobs: readonly TokenLogprob[];
|
|
732
801
|
} | {
|
|
733
802
|
/** Error occurred during streaming */
|
|
734
803
|
readonly type: "error";
|
|
@@ -923,8 +992,21 @@ declare const LLMService_base: Context.TagClass<LLMService, "LLMService", {
|
|
|
923
992
|
/**
|
|
924
993
|
* Report structured output capabilities for this provider.
|
|
925
994
|
* Used by the structured output pipeline to select optimal JSON extraction strategy.
|
|
995
|
+
*
|
|
996
|
+
* @deprecated Superseded by `capabilities()`. This method is retained for backward
|
|
997
|
+
* compatibility. New code should use `capabilities()` and read
|
|
998
|
+
* `supportsStructuredOutput` from `ProviderCapabilities`.
|
|
926
999
|
*/
|
|
927
1000
|
readonly getStructuredOutputCapabilities: () => Effect.Effect<StructuredOutputCapabilities, never>;
|
|
1001
|
+
/**
|
|
1002
|
+
* Declare the provider's runtime capabilities.
|
|
1003
|
+
* Returns a static, pure value — no API calls are made.
|
|
1004
|
+
*
|
|
1005
|
+
* Subsumes `getStructuredOutputCapabilities()`. Use this method for all
|
|
1006
|
+
* new provider-capability checks (tool calling, streaming, structured output,
|
|
1007
|
+
* logprobs).
|
|
1008
|
+
*/
|
|
1009
|
+
readonly capabilities: () => Effect.Effect<ProviderCapabilities, never>;
|
|
928
1010
|
}>;
|
|
929
1011
|
/**
|
|
930
1012
|
* Core LLM service — all LLM interactions go through this.
|
|
@@ -1054,6 +1136,22 @@ declare const LLMConfig_base: Context.TagClass<LLMConfig, "LLMConfig", {
|
|
|
1054
1136
|
* ```
|
|
1055
1137
|
*/
|
|
1056
1138
|
readonly observabilityVerbosity: ObservabilityVerbosity;
|
|
1139
|
+
/**
|
|
1140
|
+
* Custom pricing registry for calculating token costs.
|
|
1141
|
+
* Maps model identifiers to input/output token costs per 1 million tokens.
|
|
1142
|
+
* Overrides built-in framework pricing if an exact match is found.
|
|
1143
|
+
*
|
|
1144
|
+
* @example
|
|
1145
|
+
* ```typescript
|
|
1146
|
+
* pricingRegistry: {
|
|
1147
|
+
* "my-fine-tuned-model": { input: 0.5, output: 1.5 }
|
|
1148
|
+
* }
|
|
1149
|
+
* ```
|
|
1150
|
+
*/
|
|
1151
|
+
readonly pricingRegistry?: Record<string, {
|
|
1152
|
+
readonly input: number;
|
|
1153
|
+
readonly output: number;
|
|
1154
|
+
}>;
|
|
1057
1155
|
}>;
|
|
1058
1156
|
/**
|
|
1059
1157
|
* LLM service configuration.
|
|
@@ -1226,6 +1324,22 @@ declare const llmConfigFromEnv: {
|
|
|
1226
1324
|
* ```
|
|
1227
1325
|
*/
|
|
1228
1326
|
readonly observabilityVerbosity: ObservabilityVerbosity;
|
|
1327
|
+
/**
|
|
1328
|
+
* Custom pricing registry for calculating token costs.
|
|
1329
|
+
* Maps model identifiers to input/output token costs per 1 million tokens.
|
|
1330
|
+
* Overrides built-in framework pricing if an exact match is found.
|
|
1331
|
+
*
|
|
1332
|
+
* @example
|
|
1333
|
+
* ```typescript
|
|
1334
|
+
* pricingRegistry: {
|
|
1335
|
+
* "my-fine-tuned-model": { input: 0.5, output: 1.5 }
|
|
1336
|
+
* }
|
|
1337
|
+
* ```
|
|
1338
|
+
*/
|
|
1339
|
+
readonly pricingRegistry?: Record<string, {
|
|
1340
|
+
readonly input: number;
|
|
1341
|
+
readonly output: number;
|
|
1342
|
+
}>;
|
|
1229
1343
|
};
|
|
1230
1344
|
/**
|
|
1231
1345
|
* Effect-TS Layer that provides LLMConfig from environment variables.
|
|
@@ -1286,23 +1400,49 @@ declare const GeminiProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
|
|
|
1286
1400
|
|
|
1287
1401
|
declare const LiteLLMProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
|
|
1288
1402
|
|
|
1403
|
+
interface ToolCallSpec {
|
|
1404
|
+
name: string;
|
|
1405
|
+
args: Record<string, unknown>;
|
|
1406
|
+
id?: string;
|
|
1407
|
+
}
|
|
1408
|
+
type TestTurn = {
|
|
1409
|
+
text: string;
|
|
1410
|
+
match?: string;
|
|
1411
|
+
} | {
|
|
1412
|
+
json: unknown;
|
|
1413
|
+
match?: string;
|
|
1414
|
+
} | {
|
|
1415
|
+
toolCall: ToolCallSpec;
|
|
1416
|
+
match?: string;
|
|
1417
|
+
} | {
|
|
1418
|
+
toolCalls: ToolCallSpec[];
|
|
1419
|
+
match?: string;
|
|
1420
|
+
} | {
|
|
1421
|
+
error: string;
|
|
1422
|
+
match?: string;
|
|
1423
|
+
};
|
|
1289
1424
|
/**
|
|
1290
|
-
* Create a deterministic test LLM service.
|
|
1291
|
-
*
|
|
1425
|
+
* Create a deterministic test LLM service using a scenario of sequential turns.
|
|
1426
|
+
*
|
|
1427
|
+
* Turns are consumed in order. Each LLM call scans forward from the current
|
|
1428
|
+
* position for the first matching turn (or unconditional turn). The last turn
|
|
1429
|
+
* repeats when the scenario is exhausted, so single-turn tests need no special
|
|
1430
|
+
* handling.
|
|
1292
1431
|
*
|
|
1293
1432
|
* Usage:
|
|
1294
1433
|
* ```ts
|
|
1295
|
-
* const layer = TestLLMServiceLayer(
|
|
1296
|
-
*
|
|
1297
|
-
*
|
|
1298
|
-
*
|
|
1434
|
+
* const layer = TestLLMServiceLayer([
|
|
1435
|
+
* { toolCall: { name: "web-search", args: { query: "AI news" } } },
|
|
1436
|
+
* { text: "Here is the summary..." },
|
|
1437
|
+
* ]);
|
|
1299
1438
|
* ```
|
|
1300
1439
|
*/
|
|
1301
|
-
declare const TestLLMService: (
|
|
1440
|
+
declare const TestLLMService: (scenario: TestTurn[]) => typeof LLMService.Service;
|
|
1302
1441
|
/**
|
|
1303
|
-
* Create a test Layer for LLMService with
|
|
1442
|
+
* Create a test Layer for LLMService with a deterministic turn scenario.
|
|
1443
|
+
* Turns are consumed sequentially; the last turn repeats when exhausted.
|
|
1304
1444
|
*/
|
|
1305
|
-
declare const TestLLMServiceLayer: (
|
|
1445
|
+
declare const TestLLMServiceLayer: (scenario?: TestTurn[]) => Layer.Layer<LLMService, never, never>;
|
|
1306
1446
|
|
|
1307
1447
|
/**
|
|
1308
1448
|
* Estimate token count for messages.
|
|
@@ -1310,10 +1450,44 @@ declare const TestLLMServiceLayer: (responses?: Record<string, string>) => Layer
|
|
|
1310
1450
|
* This is used as a fallback when the provider's token counting API is unavailable.
|
|
1311
1451
|
*/
|
|
1312
1452
|
declare const estimateTokenCount: (messages: readonly LLMMessage[]) => Effect.Effect<number, never>;
|
|
1453
|
+
/**
|
|
1454
|
+
* Provider-specific caching discount information.
|
|
1455
|
+
* Each provider handles cached tokens differently:
|
|
1456
|
+
* - Anthropic: cache_read = 0.1× base, cache_write = 1.25× base
|
|
1457
|
+
* - OpenAI: cached = 0.5× base (automatic for >1024 token prompts)
|
|
1458
|
+
* - Gemini: cached = ~0.25× base (context caching)
|
|
1459
|
+
*/
|
|
1460
|
+
interface CacheUsage {
|
|
1461
|
+
/** Tokens read from Anthropic prompt cache (billed at 10% of base input price) */
|
|
1462
|
+
readonly cache_read_input_tokens?: number;
|
|
1463
|
+
/** Tokens written to Anthropic prompt cache (billed at 125% of base input price) */
|
|
1464
|
+
readonly cache_creation_input_tokens?: number;
|
|
1465
|
+
/** Tokens served from OpenAI's automatic prompt cache (billed at 50% of base input price) */
|
|
1466
|
+
readonly cached_tokens?: number;
|
|
1467
|
+
/** Tokens served from Gemini context cache (billed at ~25% of base input price) */
|
|
1468
|
+
readonly cached_content_token_count?: number;
|
|
1469
|
+
}
|
|
1313
1470
|
/**
|
|
1314
1471
|
* Calculate cost in USD given token counts and model name.
|
|
1315
|
-
|
|
1316
|
-
|
|
1472
|
+
* Supports provider-specific caching discounts:
|
|
1473
|
+
* - Anthropic: prompt caching (10% read cost, 125% write cost)
|
|
1474
|
+
* - OpenAI: automatic caching (50% cost for cached tokens)
|
|
1475
|
+
* - Gemini: context caching (~25% cost for cached tokens)
|
|
1476
|
+
*
|
|
1477
|
+
* @param inputTokens - Total input tokens from provider response
|
|
1478
|
+
* @param outputTokens - Total output tokens from provider response
|
|
1479
|
+
* @param model - Model identifier for pricing lookup
|
|
1480
|
+
* @param usage - Provider-specific cache token breakdown
|
|
1481
|
+
* @param registry - Custom pricing overrides
|
|
1482
|
+
* @param pricing - Direct per-1M pricing (e.g. from LiteLLM proxy)
|
|
1483
|
+
*/
|
|
1484
|
+
declare const calculateCost: (inputTokens: number, outputTokens: number, model: string, usage?: CacheUsage, registry?: Record<string, {
|
|
1485
|
+
input: number;
|
|
1486
|
+
output: number;
|
|
1487
|
+
}>, pricing?: {
|
|
1488
|
+
input?: number;
|
|
1489
|
+
output?: number;
|
|
1490
|
+
}) => number;
|
|
1317
1491
|
|
|
1318
1492
|
/**
|
|
1319
1493
|
* Retry policy for LLM calls.
|
|
@@ -1328,6 +1502,44 @@ type CircuitBreakerConfig = {
|
|
|
1328
1502
|
};
|
|
1329
1503
|
declare const defaultCircuitBreakerConfig: CircuitBreakerConfig;
|
|
1330
1504
|
|
|
1505
|
+
/**
|
|
1506
|
+
* Token costs per 1 million tokens in USD.
|
|
1507
|
+
*/
|
|
1508
|
+
interface ModelPricing {
|
|
1509
|
+
readonly input: number;
|
|
1510
|
+
readonly output: number;
|
|
1511
|
+
}
|
|
1512
|
+
/**
|
|
1513
|
+
* Normalized interface for dynamic LLM pricing providers.
|
|
1514
|
+
* Implement this interface to fetch live pricing from APIs or custom sources.
|
|
1515
|
+
*/
|
|
1516
|
+
interface PricingProvider {
|
|
1517
|
+
/**
|
|
1518
|
+
* Fetch and return a pricing registry for all available models.
|
|
1519
|
+
* Maps model identifiers (e.g. "gpt-4o", "anthropic/claude-3-opus") to their USD cost per 1M tokens.
|
|
1520
|
+
*/
|
|
1521
|
+
fetchPricing(): Effect.Effect<Record<string, ModelPricing>, Error, never>;
|
|
1522
|
+
}
|
|
1523
|
+
/**
|
|
1524
|
+
* OpenRouter Pricing Provider.
|
|
1525
|
+
* Fetches the latest live pricing for all 100+ models available on OpenRouter.
|
|
1526
|
+
* OpenRouter model IDs look like: "openai/gpt-4o", "meta-llama/llama-3-70b-instruct"
|
|
1527
|
+
*/
|
|
1528
|
+
declare const openRouterPricingProvider: PricingProvider;
|
|
1529
|
+
/**
|
|
1530
|
+
* Custom URL Pricing Provider.
|
|
1531
|
+
* Fetches pricing from a custom HTTP endpoint (like a GitHub Gist) that returns a JSON record.
|
|
1532
|
+
*
|
|
1533
|
+
* @example
|
|
1534
|
+
* ```json
|
|
1535
|
+
* {
|
|
1536
|
+
* "my-fine-tuned-model": { "input": 0.5, "output": 1.5 },
|
|
1537
|
+
* "another-model": { "input": 2.0, "output": 4.0 }
|
|
1538
|
+
* }
|
|
1539
|
+
* ```
|
|
1540
|
+
*/
|
|
1541
|
+
declare const urlPricingProvider: (url: string) => PricingProvider;
|
|
1542
|
+
|
|
1331
1543
|
/**
|
|
1332
1544
|
* Schema for ReAct action parsing.
|
|
1333
1545
|
*/
|
|
@@ -1422,11 +1634,14 @@ declare function getProviderDefaultModel(provider: string): string | undefined;
|
|
|
1422
1634
|
* Create the LLM provider layer for a specific provider.
|
|
1423
1635
|
* Uses env vars for configuration by default.
|
|
1424
1636
|
*/
|
|
1425
|
-
declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test",
|
|
1637
|
+
declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testScenario?: TestTurn[], model?: string, modelParams?: {
|
|
1426
1638
|
thinking?: boolean;
|
|
1427
1639
|
temperature?: number;
|
|
1428
1640
|
maxTokens?: number;
|
|
1429
|
-
}, circuitBreaker?: Partial<CircuitBreakerConfig
|
|
1641
|
+
}, circuitBreaker?: Partial<CircuitBreakerConfig>, pricingRegistry?: Record<string, {
|
|
1642
|
+
readonly input: number;
|
|
1643
|
+
readonly output: number;
|
|
1644
|
+
}>) => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
1430
1645
|
/**
|
|
1431
1646
|
* LLM layer with custom config (for programmatic use).
|
|
1432
1647
|
*/
|
|
@@ -1476,4 +1691,320 @@ interface CircuitBreaker {
|
|
|
1476
1691
|
*/
|
|
1477
1692
|
declare const makeCircuitBreaker: (config?: Partial<CircuitBreakerConfig>) => CircuitBreaker;
|
|
1478
1693
|
|
|
1479
|
-
|
|
1694
|
+
/**
|
|
1695
|
+
* Rate Limiter — throttles LLM requests BEFORE they hit the API to prevent
|
|
1696
|
+
* 429 errors. Uses a sliding window algorithm for both request-per-minute
|
|
1697
|
+
* and token-per-minute limits, plus a concurrency semaphore.
|
|
1698
|
+
*/
|
|
1699
|
+
|
|
1700
|
+
/**
|
|
1701
|
+
* Configuration for the rate limiter.
|
|
1702
|
+
*
|
|
1703
|
+
* @example
|
|
1704
|
+
* ```typescript
|
|
1705
|
+
* const config: RateLimiterConfig = {
|
|
1706
|
+
* requestsPerMinute: 60,
|
|
1707
|
+
* tokensPerMinute: 100_000,
|
|
1708
|
+
* maxConcurrent: 10,
|
|
1709
|
+
* };
|
|
1710
|
+
* ```
|
|
1711
|
+
*/
|
|
1712
|
+
interface RateLimiterConfig {
|
|
1713
|
+
/** Maximum requests per minute (sliding window). Default: 60 */
|
|
1714
|
+
readonly requestsPerMinute?: number;
|
|
1715
|
+
/** Maximum estimated input tokens per minute (sliding window). Default: 100,000 */
|
|
1716
|
+
readonly tokensPerMinute?: number;
|
|
1717
|
+
/** Maximum concurrent in-flight requests. Default: 10 */
|
|
1718
|
+
readonly maxConcurrent?: number;
|
|
1719
|
+
}
|
|
1720
|
+
interface RateLimiter {
|
|
1721
|
+
/**
|
|
1722
|
+
* Acquire a rate limiter slot. Returns an Effect that resolves when a slot
|
|
1723
|
+
* is available. If the limit is hit, the Effect will delay until the oldest
|
|
1724
|
+
* entry in the sliding window expires.
|
|
1725
|
+
*
|
|
1726
|
+
* @param messages - Optional messages to estimate token count for token-based limiting.
|
|
1727
|
+
* When omitted, only request-count and concurrency limits apply.
|
|
1728
|
+
*/
|
|
1729
|
+
readonly acquire: (messages?: readonly LLMMessage[]) => Effect.Effect<void, never>;
|
|
1730
|
+
/**
|
|
1731
|
+
* Signal that a request has completed (decrements concurrent count).
|
|
1732
|
+
* Must be called after every `acquire()` once the request finishes.
|
|
1733
|
+
*/
|
|
1734
|
+
readonly release: () => void;
|
|
1735
|
+
/**
|
|
1736
|
+
* Current number of in-flight requests.
|
|
1737
|
+
*/
|
|
1738
|
+
readonly concurrentCount: () => number;
|
|
1739
|
+
/**
|
|
1740
|
+
* Number of requests recorded in the current sliding window.
|
|
1741
|
+
*/
|
|
1742
|
+
readonly windowRequestCount: () => number;
|
|
1743
|
+
/**
|
|
1744
|
+
* Number of estimated tokens recorded in the current sliding window.
|
|
1745
|
+
*/
|
|
1746
|
+
readonly windowTokenCount: () => number;
|
|
1747
|
+
}
|
|
1748
|
+
/**
|
|
1749
|
+
* Create a rate limiter with configurable thresholds.
|
|
1750
|
+
*
|
|
1751
|
+
* Uses a sliding window algorithm: timestamps of recent requests are stored
|
|
1752
|
+
* in an array. On `acquire()`, expired entries (older than 60s) are pruned.
|
|
1753
|
+
* If the remaining count >= limit, the caller waits until the oldest entry
|
|
1754
|
+
* would expire from the window.
|
|
1755
|
+
*
|
|
1756
|
+
* @example
|
|
1757
|
+
* ```typescript
|
|
1758
|
+
* const limiter = makeRateLimiter({ requestsPerMinute: 30 });
|
|
1759
|
+
* // In an Effect pipeline:
|
|
1760
|
+
* yield* limiter.acquire(messages);
|
|
1761
|
+
* try {
|
|
1762
|
+
* yield* llm.complete(request);
|
|
1763
|
+
* } finally {
|
|
1764
|
+
* limiter.release();
|
|
1765
|
+
* }
|
|
1766
|
+
* ```
|
|
1767
|
+
*/
|
|
1768
|
+
declare const makeRateLimiter: (config?: RateLimiterConfig) => RateLimiter;
|
|
1769
|
+
|
|
1770
|
+
/**
|
|
1771
|
+
* Rate-Limited Provider — wraps an existing LLMService with rate limiting.
|
|
1772
|
+
*
|
|
1773
|
+
* Intercepts `complete()`, `stream()`, and `completeStructured()` calls,
|
|
1774
|
+
* acquiring a rate limiter slot before each request and releasing it afterward.
|
|
1775
|
+
* Passthrough for `embed()`, `countTokens()`, `getModelConfig()`, and
|
|
1776
|
+
* `getStructuredOutputCapabilities()`.
|
|
1777
|
+
*/
|
|
1778
|
+
|
|
1779
|
+
/**
|
|
1780
|
+
* Create a Layer that wraps the existing LLMService with rate limiting.
|
|
1781
|
+
*
|
|
1782
|
+
* The returned layer depends on an upstream LLMService (i.e., it must be
|
|
1783
|
+
* `.pipe(Layer.provide(baseLlmLayer))` to resolve the dependency).
|
|
1784
|
+
*
|
|
1785
|
+
* @example
|
|
1786
|
+
* ```typescript
|
|
1787
|
+
* const rateLimitedLlm = makeRateLimitedProvider({ requestsPerMinute: 30 })
|
|
1788
|
+
* .pipe(Layer.provide(AnthropicProviderLive));
|
|
1789
|
+
* ```
|
|
1790
|
+
*/
|
|
1791
|
+
declare const makeRateLimitedProvider: (config?: RateLimiterConfig) => Layer.Layer<LLMService, never, LLMService>;
|
|
1792
|
+
|
|
1793
|
+
/**
|
|
1794
|
+
* Configuration for the FallbackChain graceful degradation strategy.
|
|
1795
|
+
*
|
|
1796
|
+
* Specifies ordered lists of fallback providers and models, along with
|
|
1797
|
+
* the error threshold that triggers switching to the next provider.
|
|
1798
|
+
*
|
|
1799
|
+
* @example
|
|
1800
|
+
* ```typescript
|
|
1801
|
+
* const config: FallbackConfig = {
|
|
1802
|
+
* providers: ["anthropic", "openai", "gemini"],
|
|
1803
|
+
* models: ["claude-sonnet-4-20250514", "claude-haiku-3-20250520"],
|
|
1804
|
+
* errorThreshold: 3,
|
|
1805
|
+
* };
|
|
1806
|
+
* ```
|
|
1807
|
+
*/
|
|
1808
|
+
interface FallbackConfig {
|
|
1809
|
+
/** Ordered list of provider names to try in sequence. */
|
|
1810
|
+
readonly providers: string[];
|
|
1811
|
+
/** Ordered list of model names to try within the same provider. */
|
|
1812
|
+
readonly models?: string[];
|
|
1813
|
+
/** Consecutive errors on a provider before switching to next. Default: 3 */
|
|
1814
|
+
readonly errorThreshold?: number;
|
|
1815
|
+
}
|
|
1816
|
+
/**
|
|
1817
|
+
* FallbackChain manages graceful degradation when LLM providers or models fail.
|
|
1818
|
+
*
|
|
1819
|
+
* Tracks consecutive errors per provider and automatically switches to the next
|
|
1820
|
+
* provider when the error threshold is exceeded. On rate limits (429), falls back
|
|
1821
|
+
* to a cheaper model within the same provider.
|
|
1822
|
+
*
|
|
1823
|
+
* Use case: Deploy with Anthropic as primary, OpenAI as secondary, Gemini as
|
|
1824
|
+
* fallback. If Claude API goes down, automatically route to GPT. If quota exceeded,
|
|
1825
|
+
* switch from claude-sonnet to claude-haiku to reduce cost/load.
|
|
1826
|
+
*
|
|
1827
|
+
* @example
|
|
1828
|
+
* ```typescript
|
|
1829
|
+
* const chain = new FallbackChain({
|
|
1830
|
+
* providers: ["anthropic", "openai"],
|
|
1831
|
+
* models: ["claude-sonnet-4-20250514", "claude-haiku-3-20250520"],
|
|
1832
|
+
* errorThreshold: 3,
|
|
1833
|
+
* });
|
|
1834
|
+
*
|
|
1835
|
+
* // Record errors
|
|
1836
|
+
* chain.recordError("anthropic");
|
|
1837
|
+
* chain.recordError("anthropic");
|
|
1838
|
+
* chain.recordError("anthropic"); // threshold met, switch to openai
|
|
1839
|
+
*
|
|
1840
|
+
* console.log(chain.currentProvider()); // "openai"
|
|
1841
|
+
*
|
|
1842
|
+
* // Record rate limit, fall back to cheaper model
|
|
1843
|
+
* chain.recordRateLimit("openai");
|
|
1844
|
+
* console.log(chain.currentModel()); // "claude-haiku-3-20250520"
|
|
1845
|
+
*
|
|
1846
|
+
* // Successful call resets error count
|
|
1847
|
+
* chain.recordSuccess("openai");
|
|
1848
|
+
*
|
|
1849
|
+
* // Check if more fallbacks available
|
|
1850
|
+
* if (!chain.hasFallback()) {
|
|
1851
|
+
* console.log("All providers exhausted!");
|
|
1852
|
+
* }
|
|
1853
|
+
* ```
|
|
1854
|
+
*/
|
|
1855
|
+
declare class FallbackChain {
|
|
1856
|
+
private readonly config;
|
|
1857
|
+
/** Error count per provider. */
|
|
1858
|
+
private readonly errorCounts;
|
|
1859
|
+
/** Current index in the providers list. */
|
|
1860
|
+
private currentProviderIndex;
|
|
1861
|
+
/** Current index in the models list. */
|
|
1862
|
+
private currentModelIndex;
|
|
1863
|
+
/** Threshold for switching to next provider. */
|
|
1864
|
+
private readonly threshold;
|
|
1865
|
+
constructor(config: FallbackConfig);
|
|
1866
|
+
/**
|
|
1867
|
+
* Record an error for the given provider.
|
|
1868
|
+
* Increments the error count and switches to the next provider if threshold is met.
|
|
1869
|
+
*
|
|
1870
|
+
* @param provider - Provider name that errored
|
|
1871
|
+
*/
|
|
1872
|
+
recordError(provider: string): void;
|
|
1873
|
+
/**
|
|
1874
|
+
* Record a rate limit error (429) for the given provider.
|
|
1875
|
+
* Falls back to the next model in the chain.
|
|
1876
|
+
*
|
|
1877
|
+
* @param _provider - Provider name that was rate limited (parameter name _ to indicate unused)
|
|
1878
|
+
*/
|
|
1879
|
+
recordRateLimit(_provider: string): void;
|
|
1880
|
+
/**
|
|
1881
|
+
* Record a successful call for the given provider.
|
|
1882
|
+
* Resets the error count for that provider.
|
|
1883
|
+
*
|
|
1884
|
+
* @param provider - Provider name that succeeded
|
|
1885
|
+
*/
|
|
1886
|
+
recordSuccess(provider: string): void;
|
|
1887
|
+
/**
|
|
1888
|
+
* Get the currently active provider.
|
|
1889
|
+
*
|
|
1890
|
+
* @returns Name of the provider to use
|
|
1891
|
+
*/
|
|
1892
|
+
currentProvider(): string;
|
|
1893
|
+
/**
|
|
1894
|
+
* Get the currently active model.
|
|
1895
|
+
* Returns undefined if no models are configured.
|
|
1896
|
+
*
|
|
1897
|
+
* @returns Name of the model to use, or undefined if no models configured
|
|
1898
|
+
*/
|
|
1899
|
+
currentModel(): string | undefined;
|
|
1900
|
+
/**
|
|
1901
|
+
* Check if there are more fallbacks available (provider or model).
|
|
1902
|
+
*
|
|
1903
|
+
* @returns true if there are unused fallback providers or models, false if all exhausted
|
|
1904
|
+
*/
|
|
1905
|
+
hasFallback(): boolean;
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
/**
|
|
1909
|
+
* Validates and auto-repairs a message array before sending to any LLM provider.
|
|
1910
|
+
* Silent — logs warnings in debug mode, never throws.
|
|
1911
|
+
*/
|
|
1912
|
+
declare function validateAndRepairMessages(messages: readonly LLMMessage[]): readonly LLMMessage[];
|
|
1913
|
+
|
|
1914
|
+
/**
|
|
1915
|
+
* Provider Behavior Adapters — lightweight hooks that compensate for
|
|
1916
|
+
* model-specific behavior differences without polluting the core kernel.
|
|
1917
|
+
*
|
|
1918
|
+
* The kernel calls adapter methods at well-defined hook points.
|
|
1919
|
+
* Frontier models return undefined (no intervention needed).
|
|
1920
|
+
* Local/mid models return explicit guidance to improve task completion rates.
|
|
1921
|
+
*
|
|
1922
|
+
* Hook call sites in react-kernel.ts:
|
|
1923
|
+
* systemPromptPatch — once, when building the static system prompt
|
|
1924
|
+
* taskFraming — once, wrapping the initial user task message
|
|
1925
|
+
* toolGuidance — once, appended to system prompt after tool schema block
|
|
1926
|
+
* continuationHint — each iteration, injected as user message after tool results
|
|
1927
|
+
* errorRecovery — when a tool returns a failed result
|
|
1928
|
+
* synthesisPrompt — when transitioning from research → produce phase
|
|
1929
|
+
* qualityCheck — optional self-eval prompt injected before final answer
|
|
1930
|
+
*/
|
|
1931
|
+
interface ProviderAdapter {
|
|
1932
|
+
/**
|
|
1933
|
+
* Patch the system prompt for model-specific needs.
|
|
1934
|
+
* Called once when building the system prompt.
|
|
1935
|
+
*/
|
|
1936
|
+
systemPromptPatch?(basePrompt: string, tier: string): string | undefined;
|
|
1937
|
+
/**
|
|
1938
|
+
* Wrap or annotate the initial task message.
|
|
1939
|
+
* Called once when the first user message is constructed.
|
|
1940
|
+
* Return undefined to use the task as-is.
|
|
1941
|
+
*/
|
|
1942
|
+
taskFraming?(context: {
|
|
1943
|
+
task: string;
|
|
1944
|
+
requiredTools: readonly string[];
|
|
1945
|
+
tier: string;
|
|
1946
|
+
}): string | undefined;
|
|
1947
|
+
/**
|
|
1948
|
+
* Append inline tool usage guidance after the tool schema block in the system prompt.
|
|
1949
|
+
* Helps local models that ignore JSON schema descriptions.
|
|
1950
|
+
* Return undefined to add nothing.
|
|
1951
|
+
*/
|
|
1952
|
+
toolGuidance?(context: {
|
|
1953
|
+
toolNames: readonly string[];
|
|
1954
|
+
requiredTools: readonly string[];
|
|
1955
|
+
tier: string;
|
|
1956
|
+
}): string | undefined;
|
|
1957
|
+
/**
|
|
1958
|
+
* Generate a continuation hint injected as a user message after tool results.
|
|
1959
|
+
* Called each iteration when required tools are still pending.
|
|
1960
|
+
*/
|
|
1961
|
+
continuationHint?(context: {
|
|
1962
|
+
toolsUsed: ReadonlySet<string>;
|
|
1963
|
+
requiredTools: readonly string[];
|
|
1964
|
+
missingTools: readonly string[];
|
|
1965
|
+
iteration: number;
|
|
1966
|
+
maxIterations: number;
|
|
1967
|
+
lastToolName?: string;
|
|
1968
|
+
lastToolResultPreview?: string;
|
|
1969
|
+
}): string | undefined;
|
|
1970
|
+
/**
|
|
1971
|
+
* Generate recovery guidance when a tool call fails or returns an error.
|
|
1972
|
+
* Called after a failed tool execution. Return undefined to skip.
|
|
1973
|
+
*/
|
|
1974
|
+
errorRecovery?(context: {
|
|
1975
|
+
toolName: string;
|
|
1976
|
+
errorContent: string;
|
|
1977
|
+
missingTools: readonly string[];
|
|
1978
|
+
tier: string;
|
|
1979
|
+
}): string | undefined;
|
|
1980
|
+
/**
|
|
1981
|
+
* Generate a synthesis prompt injected when the model has gathered enough data
|
|
1982
|
+
* and needs to transition to producing the output.
|
|
1983
|
+
* Called when all research tools are satisfied and only output tools remain.
|
|
1984
|
+
* Return undefined to skip.
|
|
1985
|
+
*/
|
|
1986
|
+
synthesisPrompt?(context: {
|
|
1987
|
+
toolsUsed: ReadonlySet<string>;
|
|
1988
|
+
missingOutputTools: readonly string[];
|
|
1989
|
+
observationCount: number;
|
|
1990
|
+
tier: string;
|
|
1991
|
+
}): string | undefined;
|
|
1992
|
+
/**
|
|
1993
|
+
* Generate a self-evaluation prompt injected just before the model declares
|
|
1994
|
+
* a final answer. Return undefined to skip the quality check.
|
|
1995
|
+
*/
|
|
1996
|
+
qualityCheck?(context: {
|
|
1997
|
+
task: string;
|
|
1998
|
+
requiredTools: readonly string[];
|
|
1999
|
+
toolsUsed: ReadonlySet<string>;
|
|
2000
|
+
tier: string;
|
|
2001
|
+
}): string | undefined;
|
|
2002
|
+
}
|
|
2003
|
+
declare const defaultAdapter: ProviderAdapter;
|
|
2004
|
+
declare const localModelAdapter: ProviderAdapter;
|
|
2005
|
+
declare function selectAdapter(_capabilities: {
|
|
2006
|
+
supportsToolCalling: boolean;
|
|
2007
|
+
}, tier?: string): ProviderAdapter;
|
|
2008
|
+
declare function recommendStrategyForTier(_tier: string | undefined, _configuredStrategy: string, _requiredTools?: readonly string[]): string | undefined;
|
|
2009
|
+
|
|
2010
|
+
export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheUsage, type CacheableContentBlock, type CircuitBreaker, type CircuitBreakerConfig, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DEFAULT_CAPABILITIES, DefaultEmbeddingConfig, type EmbeddingCache, type EmbeddingConfig, EmbeddingConfigSchema, FallbackChain, type FallbackConfig, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, type ModelPricing, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, type PricingProvider, PromptManager, PromptManagerLive, type ProviderAdapter, type ProviderCapabilities, type RateLimiter, type RateLimiterConfig, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, type TestTurn, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenLogprob, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolCallSpec, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, defaultAdapter, defaultCircuitBreakerConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, localModelAdapter, makeCacheable, makeCircuitBreaker, makeEmbeddingCache, makeRateLimitedProvider, makeRateLimiter, openRouterPricingProvider, recommendStrategyForTier, retryPolicy, selectAdapter, urlPricingProvider, validateAndRepairMessages };
|