@reactive-agents/llm-provider 0.6.3 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -10
- package/dist/index.d.ts +54 -4
- package/dist/index.js +248 -71
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@ Provides a unified `LLMService` interface with adapters for Anthropic, OpenAI, G
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
9
9
|
```bash
|
|
10
|
-
bun add @reactive-agents/llm-provider
|
|
10
|
+
bun add @reactive-agents/llm-provider
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
Install the SDK for your chosen provider:
|
|
@@ -20,20 +20,23 @@ bun add @google/genai # Google Gemini
|
|
|
20
20
|
|
|
21
21
|
## Supported Providers
|
|
22
22
|
|
|
23
|
-
| Provider
|
|
24
|
-
|
|
25
|
-
| `anthropic` | claude-haiku, claude-sonnet, claude-opus | ✓
|
|
26
|
-
| `openai`
|
|
27
|
-
| `gemini`
|
|
28
|
-
| `ollama`
|
|
29
|
-
| `test`
|
|
23
|
+
| Provider | Models | Streaming | Embeddings | Structured Output |
|
|
24
|
+
| ----------- | ---------------------------------------- | --------- | ---------- | ----------------- |
|
|
25
|
+
| `anthropic` | claude-haiku, claude-sonnet, claude-opus | ✓ | — | ✓ |
|
|
26
|
+
| `openai` | gpt-4o, gpt-4o-mini, o1-\* | ✓ | ✓ | ✓ |
|
|
27
|
+
| `gemini` | gemini-2.0-flash, gemini-2.5-pro | ✓ | ✓ | ✓ |
|
|
28
|
+
| `ollama` | any local model | ✓ | ✓ | ✓ |
|
|
29
|
+
| `test` | deterministic mock | ✓ | ✓ | — |
|
|
30
30
|
|
|
31
31
|
## Usage
|
|
32
32
|
|
|
33
33
|
### Anthropic
|
|
34
34
|
|
|
35
35
|
```typescript
|
|
36
|
-
import {
|
|
36
|
+
import {
|
|
37
|
+
createLLMProviderLayer,
|
|
38
|
+
LLMService,
|
|
39
|
+
} from "@reactive-agents/llm-provider";
|
|
37
40
|
import { Effect } from "effect";
|
|
38
41
|
|
|
39
42
|
const layer = createLLMProviderLayer("anthropic");
|
|
@@ -51,7 +54,10 @@ const result = await Effect.runPromise(
|
|
|
51
54
|
### Google Gemini
|
|
52
55
|
|
|
53
56
|
```typescript
|
|
54
|
-
import {
|
|
57
|
+
import {
|
|
58
|
+
createLLMProviderLayer,
|
|
59
|
+
LLMService,
|
|
60
|
+
} from "@reactive-agents/llm-provider";
|
|
55
61
|
import { Effect } from "effect";
|
|
56
62
|
|
|
57
63
|
// Set GOOGLE_API_KEY in your environment
|
package/dist/index.d.ts
CHANGED
|
@@ -910,7 +910,7 @@ declare const LLMService_base: Context.TagClass<LLMService, "LLMService", {
|
|
|
910
910
|
* Anthropic has no embeddings API — routes to OpenAI or Ollama
|
|
911
911
|
* per LLMConfig.embeddingConfig.
|
|
912
912
|
*/
|
|
913
|
-
readonly embed: (texts: readonly string[], model?: string) => Effect.Effect<readonly number[][], LLMErrors>;
|
|
913
|
+
readonly embed: (texts: readonly string[], model?: string) => Effect.Effect<readonly (readonly number[])[], LLMErrors>;
|
|
914
914
|
/**
|
|
915
915
|
* Count tokens for a set of messages.
|
|
916
916
|
* Used for context window management.
|
|
@@ -1306,7 +1306,7 @@ declare const TestLLMServiceLayer: (responses?: Record<string, string>) => Layer
|
|
|
1306
1306
|
|
|
1307
1307
|
/**
|
|
1308
1308
|
* Estimate token count for messages.
|
|
1309
|
-
* Uses
|
|
1309
|
+
* Uses content-aware heuristics: ~3 chars/token for code/JSON, ~4 for English text.
|
|
1310
1310
|
* This is used as a fallback when the provider's token counting API is unavailable.
|
|
1311
1311
|
*/
|
|
1312
1312
|
declare const estimateTokenCount: (messages: readonly LLMMessage[]) => Effect.Effect<number, never>;
|
|
@@ -1321,6 +1321,12 @@ declare const calculateCost: (inputTokens: number, outputTokens: number, model:
|
|
|
1321
1321
|
* Only retries on rate limit and timeout errors.
|
|
1322
1322
|
*/
|
|
1323
1323
|
declare const retryPolicy: Schedule.Schedule<[number, effect_Duration.Duration], LLMErrors, never>;
|
|
1324
|
+
type CircuitBreakerConfig = {
|
|
1325
|
+
readonly failureThreshold: number;
|
|
1326
|
+
readonly cooldownMs: number;
|
|
1327
|
+
readonly halfOpenRequests: number;
|
|
1328
|
+
};
|
|
1329
|
+
declare const defaultCircuitBreakerConfig: CircuitBreakerConfig;
|
|
1324
1330
|
|
|
1325
1331
|
/**
|
|
1326
1332
|
* Schema for ReAct action parsing.
|
|
@@ -1420,10 +1426,54 @@ declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "olla
|
|
|
1420
1426
|
thinking?: boolean;
|
|
1421
1427
|
temperature?: number;
|
|
1422
1428
|
maxTokens?: number;
|
|
1423
|
-
}) => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
1429
|
+
}, circuitBreaker?: Partial<CircuitBreakerConfig>) => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
1424
1430
|
/**
|
|
1425
1431
|
* LLM layer with custom config (for programmatic use).
|
|
1426
1432
|
*/
|
|
1427
1433
|
declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm") => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
1428
1434
|
|
|
1429
|
-
|
|
1435
|
+
/**
|
|
1436
|
+
* Content-hash embedding cache — deduplicates embed() calls per text.
|
|
1437
|
+
* Cache is keyed by Bun.hash(text) and avoids re-embedding identical strings.
|
|
1438
|
+
*/
|
|
1439
|
+
|
|
1440
|
+
interface EmbeddingCache {
|
|
1441
|
+
/** Wrap an embed function with content-hash deduplication. */
|
|
1442
|
+
readonly embed: (texts: readonly string[], model?: string) => Effect.Effect<readonly (readonly number[])[], LLMErrors>;
|
|
1443
|
+
/** Number of cached embeddings. */
|
|
1444
|
+
readonly size: () => number;
|
|
1445
|
+
/** Clear all cached entries. */
|
|
1446
|
+
readonly clear: () => void;
|
|
1447
|
+
}
|
|
1448
|
+
/**
|
|
1449
|
+
* Create an embedding cache that wraps an underlying embed function.
|
|
1450
|
+
* Each text is hashed individually; only cache-misses are sent to the LLM.
|
|
1451
|
+
*/
|
|
1452
|
+
declare const makeEmbeddingCache: (underlying: (texts: readonly string[], model?: string) => Effect.Effect<readonly (readonly number[])[], LLMErrors>) => EmbeddingCache;
|
|
1453
|
+
|
|
1454
|
+
/**
|
|
1455
|
+
* Circuit Breaker — prevents cascading failures by fast-failing when
|
|
1456
|
+
* the underlying LLM provider is consistently erroring.
|
|
1457
|
+
*
|
|
1458
|
+
* States: CLOSED (normal) → OPEN (fast-fail) → HALF_OPEN (test one request)
|
|
1459
|
+
*/
|
|
1460
|
+
|
|
1461
|
+
type State = "closed" | "open" | "half_open";
|
|
1462
|
+
interface CircuitBreaker {
|
|
1463
|
+
/** Wrap an Effect with circuit breaker protection. */
|
|
1464
|
+
readonly protect: <A>(effect: Effect.Effect<A, LLMErrors>) => Effect.Effect<A, LLMErrors>;
|
|
1465
|
+
/** Current state. */
|
|
1466
|
+
readonly state: () => State;
|
|
1467
|
+
/** Reset to closed. */
|
|
1468
|
+
readonly reset: () => void;
|
|
1469
|
+
}
|
|
1470
|
+
/**
|
|
1471
|
+
* Create a circuit breaker with configurable thresholds.
|
|
1472
|
+
*
|
|
1473
|
+
* - After `failureThreshold` consecutive failures → OPEN (fast-fail).
|
|
1474
|
+
* - After `cooldownMs` → HALF_OPEN (allow one test request).
|
|
1475
|
+
* - If test request succeeds → CLOSED. If it fails → OPEN again.
|
|
1476
|
+
*/
|
|
1477
|
+
declare const makeCircuitBreaker: (config?: Partial<CircuitBreakerConfig>) => CircuitBreaker;
|
|
1478
|
+
|
|
1479
|
+
export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CircuitBreaker, type CircuitBreakerConfig, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingCache, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, defaultCircuitBreakerConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, makeCacheable, makeCircuitBreaker, makeEmbeddingCache, retryPolicy };
|
package/dist/index.js
CHANGED
|
@@ -1432,25 +1432,36 @@ import { Effect as Effect3, Context as Context3, Layer as Layer2 } from "effect"
|
|
|
1432
1432
|
|
|
1433
1433
|
// src/token-counter.ts
|
|
1434
1434
|
import { Effect as Effect2 } from "effect";
|
|
1435
|
+
function charsPerToken(text) {
|
|
1436
|
+
if (text.length === 0) return 4;
|
|
1437
|
+
const sample = text.slice(0, 2e3);
|
|
1438
|
+
const codeSignals = (sample.match(/[{}();=<>\[\]]/g) ?? []).length;
|
|
1439
|
+
const jsonSignals = (sample.match(/"\w+"\s*:/g) ?? []).length;
|
|
1440
|
+
const ratio = (codeSignals + jsonSignals) / sample.length;
|
|
1441
|
+
if (ratio > 0.08) return 3;
|
|
1442
|
+
if (ratio > 0.04) return 3.5;
|
|
1443
|
+
return 4;
|
|
1444
|
+
}
|
|
1435
1445
|
var estimateTokenCount = (messages) => Effect2.sync(() => {
|
|
1436
|
-
let
|
|
1446
|
+
let totalTokens = 0;
|
|
1437
1447
|
for (const msg of messages) {
|
|
1438
1448
|
if (typeof msg.content === "string") {
|
|
1439
|
-
|
|
1449
|
+
totalTokens += Math.ceil(msg.content.length / charsPerToken(msg.content));
|
|
1440
1450
|
} else {
|
|
1441
1451
|
for (const block of msg.content) {
|
|
1442
1452
|
if (block.type === "text") {
|
|
1443
|
-
|
|
1453
|
+
totalTokens += Math.ceil(block.text.length / charsPerToken(block.text));
|
|
1444
1454
|
} else if (block.type === "tool_result") {
|
|
1445
|
-
|
|
1455
|
+
totalTokens += Math.ceil(block.content.length / charsPerToken(block.content));
|
|
1446
1456
|
} else if (block.type === "tool_use") {
|
|
1447
|
-
|
|
1457
|
+
const json = JSON.stringify(block.input);
|
|
1458
|
+
totalTokens += Math.ceil(json.length / 3);
|
|
1448
1459
|
}
|
|
1449
1460
|
}
|
|
1450
1461
|
}
|
|
1451
|
-
|
|
1462
|
+
totalTokens += 4;
|
|
1452
1463
|
}
|
|
1453
|
-
return
|
|
1464
|
+
return totalTokens;
|
|
1454
1465
|
});
|
|
1455
1466
|
var calculateCost = (inputTokens, outputTokens, model) => {
|
|
1456
1467
|
const costMap = {
|
|
@@ -1578,6 +1589,11 @@ var retryPolicy = Schedule.intersect(
|
|
|
1578
1589
|
(error) => error._tag === "LLMRateLimitError" || error._tag === "LLMTimeoutError"
|
|
1579
1590
|
)
|
|
1580
1591
|
);
|
|
1592
|
+
var defaultCircuitBreakerConfig = {
|
|
1593
|
+
failureThreshold: 5,
|
|
1594
|
+
cooldownMs: 3e4,
|
|
1595
|
+
halfOpenRequests: 1
|
|
1596
|
+
};
|
|
1581
1597
|
|
|
1582
1598
|
// src/providers/anthropic.ts
|
|
1583
1599
|
var toAnthropicMessages = (messages) => messages.filter((m) => m.role !== "system").map((m) => {
|
|
@@ -1622,6 +1638,16 @@ var toEffectError = (error, provider) => {
|
|
|
1622
1638
|
cause: error
|
|
1623
1639
|
});
|
|
1624
1640
|
};
|
|
1641
|
+
var MIN_SYSTEM_CACHE_CHARS = 4096;
|
|
1642
|
+
var buildSystemParam = (systemPrompt) => {
|
|
1643
|
+
if (!systemPrompt) return void 0;
|
|
1644
|
+
if (systemPrompt.length < MIN_SYSTEM_CACHE_CHARS) return systemPrompt;
|
|
1645
|
+
return [{
|
|
1646
|
+
type: "text",
|
|
1647
|
+
text: systemPrompt,
|
|
1648
|
+
cache_control: { type: "ephemeral" }
|
|
1649
|
+
}];
|
|
1650
|
+
};
|
|
1625
1651
|
var AnthropicProviderLive = Layer3.effect(
|
|
1626
1652
|
LLMService,
|
|
1627
1653
|
Effect4.gen(function* () {
|
|
@@ -1644,7 +1670,7 @@ var AnthropicProviderLive = Layer3.effect(
|
|
|
1644
1670
|
model,
|
|
1645
1671
|
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
1646
1672
|
temperature: request.temperature ?? config.defaultTemperature,
|
|
1647
|
-
system: request.systemPrompt,
|
|
1673
|
+
system: buildSystemParam(request.systemPrompt),
|
|
1648
1674
|
messages: toAnthropicMessages(request.messages),
|
|
1649
1675
|
stop_sequences: request.stopSequences ? [...request.stopSequences] : void 0,
|
|
1650
1676
|
tools: request.tools?.map(toAnthropicTool)
|
|
@@ -1674,7 +1700,7 @@ var AnthropicProviderLive = Layer3.effect(
|
|
|
1674
1700
|
model,
|
|
1675
1701
|
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
1676
1702
|
temperature: request.temperature ?? config.defaultTemperature,
|
|
1677
|
-
system: request.systemPrompt,
|
|
1703
|
+
system: buildSystemParam(request.systemPrompt),
|
|
1678
1704
|
messages: toAnthropicMessages(request.messages)
|
|
1679
1705
|
});
|
|
1680
1706
|
stream.on("text", (text) => {
|
|
@@ -1714,17 +1740,13 @@ var AnthropicProviderLive = Layer3.effect(
|
|
|
1714
1740
|
});
|
|
1715
1741
|
}),
|
|
1716
1742
|
completeStructured: (request) => Effect4.gen(function* () {
|
|
1717
|
-
const
|
|
1718
|
-
|
|
1719
|
-
null,
|
|
1720
|
-
2
|
|
1721
|
-
);
|
|
1743
|
+
const jsonSchema = Schema2.encodedSchema(request.outputSchema);
|
|
1744
|
+
const schemaStr = JSON.stringify(jsonSchema, null, 2);
|
|
1722
1745
|
const messagesWithFormat = [
|
|
1723
1746
|
...request.messages,
|
|
1724
1747
|
{
|
|
1725
1748
|
role: "user",
|
|
1726
|
-
content: `
|
|
1727
|
-
Respond with ONLY valid JSON matching this schema:
|
|
1749
|
+
content: `Respond with ONLY valid JSON matching this schema:
|
|
1728
1750
|
${schemaStr}
|
|
1729
1751
|
|
|
1730
1752
|
No markdown, no code fences, just raw JSON.`
|
|
@@ -1741,9 +1763,11 @@ No markdown, no code fences, just raw JSON.`
|
|
|
1741
1763
|
},
|
|
1742
1764
|
{
|
|
1743
1765
|
role: "user",
|
|
1744
|
-
content: `That response
|
|
1766
|
+
content: `That response did not match the schema. Error: ${String(lastError)}. Please try again with valid JSON only.`
|
|
1745
1767
|
}
|
|
1746
1768
|
];
|
|
1769
|
+
const anthropicMsgs = toAnthropicMessages(msgs);
|
|
1770
|
+
anthropicMsgs.push({ role: "assistant", content: "{" });
|
|
1747
1771
|
const completeResult = yield* Effect4.tryPromise({
|
|
1748
1772
|
try: () => {
|
|
1749
1773
|
const client = getClient();
|
|
@@ -1751,8 +1775,8 @@ No markdown, no code fences, just raw JSON.`
|
|
|
1751
1775
|
model: typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel,
|
|
1752
1776
|
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
1753
1777
|
temperature: request.temperature ?? config.defaultTemperature,
|
|
1754
|
-
system: request.systemPrompt,
|
|
1755
|
-
messages:
|
|
1778
|
+
system: buildSystemParam(request.systemPrompt),
|
|
1779
|
+
messages: anthropicMsgs
|
|
1756
1780
|
});
|
|
1757
1781
|
},
|
|
1758
1782
|
catch: (error) => toEffectError(error, "anthropic")
|
|
@@ -1761,8 +1785,9 @@ No markdown, no code fences, just raw JSON.`
|
|
|
1761
1785
|
completeResult,
|
|
1762
1786
|
typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel
|
|
1763
1787
|
);
|
|
1788
|
+
const fullContent = "{" + response.content;
|
|
1764
1789
|
try {
|
|
1765
|
-
const parsed = JSON.parse(
|
|
1790
|
+
const parsed = JSON.parse(fullContent);
|
|
1766
1791
|
const decoded = Schema2.decodeUnknownEither(
|
|
1767
1792
|
request.outputSchema
|
|
1768
1793
|
)(parsed);
|
|
@@ -2031,49 +2056,56 @@ var OpenAIProviderLive = Layer4.effect(
|
|
|
2031
2056
|
});
|
|
2032
2057
|
}),
|
|
2033
2058
|
completeStructured: (request) => Effect5.gen(function* () {
|
|
2034
|
-
const
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
);
|
|
2039
|
-
const
|
|
2059
|
+
const jsonSchema = Schema3.encodedSchema(request.outputSchema);
|
|
2060
|
+
const schemaObj = JSON.parse(JSON.stringify(jsonSchema));
|
|
2061
|
+
const schemaStr = JSON.stringify(schemaObj, null, 2);
|
|
2062
|
+
const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
|
|
2063
|
+
const client = getClient();
|
|
2064
|
+
const maxRetries = request.maxParseRetries ?? 2;
|
|
2065
|
+
const requestBody = {
|
|
2066
|
+
model,
|
|
2067
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
2068
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
2069
|
+
response_format: {
|
|
2070
|
+
type: "json_schema",
|
|
2071
|
+
json_schema: {
|
|
2072
|
+
name: "structured_output",
|
|
2073
|
+
strict: true,
|
|
2074
|
+
schema: schemaObj
|
|
2075
|
+
}
|
|
2076
|
+
}
|
|
2077
|
+
};
|
|
2078
|
+
const messages = [
|
|
2040
2079
|
...request.messages,
|
|
2041
2080
|
{
|
|
2042
2081
|
role: "user",
|
|
2043
|
-
content: `
|
|
2044
|
-
|
|
2045
|
-
${schemaStr}
|
|
2046
|
-
|
|
2047
|
-
No markdown, no code fences, just raw JSON.`
|
|
2082
|
+
content: `Respond with JSON matching this schema:
|
|
2083
|
+
${schemaStr}`
|
|
2048
2084
|
}
|
|
2049
2085
|
];
|
|
2050
2086
|
let lastError = null;
|
|
2051
|
-
const maxRetries = request.maxParseRetries ?? 2;
|
|
2052
2087
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
2053
|
-
const msgs = attempt === 0 ?
|
|
2054
|
-
...
|
|
2088
|
+
const msgs = attempt === 0 ? messages : [
|
|
2089
|
+
...messages,
|
|
2055
2090
|
{
|
|
2056
2091
|
role: "assistant",
|
|
2057
2092
|
content: String(lastError)
|
|
2058
2093
|
},
|
|
2059
2094
|
{
|
|
2060
2095
|
role: "user",
|
|
2061
|
-
content: `That response
|
|
2096
|
+
content: `That response did not match the schema. Error: ${String(lastError)}. Please try again.`
|
|
2062
2097
|
}
|
|
2063
2098
|
];
|
|
2064
|
-
const client = getClient();
|
|
2065
2099
|
const completeResult = yield* Effect5.tryPromise({
|
|
2066
2100
|
try: () => client.chat.completions.create({
|
|
2067
|
-
|
|
2068
|
-
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
2069
|
-
temperature: request.temperature ?? config.defaultTemperature,
|
|
2101
|
+
...requestBody,
|
|
2070
2102
|
messages: toOpenAIMessages(msgs)
|
|
2071
2103
|
}),
|
|
2072
2104
|
catch: (error) => toEffectError2(error, "openai")
|
|
2073
2105
|
});
|
|
2074
2106
|
const response = mapOpenAIResponse(
|
|
2075
2107
|
completeResult,
|
|
2076
|
-
|
|
2108
|
+
model
|
|
2077
2109
|
);
|
|
2078
2110
|
try {
|
|
2079
2111
|
const parsed = JSON.parse(response.content);
|
|
@@ -2423,11 +2455,10 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2423
2455
|
});
|
|
2424
2456
|
}),
|
|
2425
2457
|
completeStructured: (request) => Effect6.gen(function* () {
|
|
2426
|
-
const
|
|
2427
|
-
|
|
2428
|
-
|
|
2429
|
-
|
|
2430
|
-
);
|
|
2458
|
+
const encodedSchema = Schema4.encodedSchema(request.outputSchema);
|
|
2459
|
+
const schemaObj = JSON.parse(JSON.stringify(encodedSchema));
|
|
2460
|
+
const schemaStr = JSON.stringify(schemaObj, null, 2);
|
|
2461
|
+
const ollamaFormat = schemaObj && typeof schemaObj === "object" && schemaObj.properties ? schemaObj : "json";
|
|
2431
2462
|
const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
|
|
2432
2463
|
let lastError = null;
|
|
2433
2464
|
const maxRetries = request.maxParseRetries ?? 2;
|
|
@@ -2473,7 +2504,7 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2473
2504
|
model,
|
|
2474
2505
|
messages: msgs,
|
|
2475
2506
|
stream: false,
|
|
2476
|
-
format:
|
|
2507
|
+
format: ollamaFormat,
|
|
2477
2508
|
keep_alive: "5m",
|
|
2478
2509
|
options: {
|
|
2479
2510
|
temperature: request.temperature ?? config.defaultTemperature,
|
|
@@ -2529,7 +2560,7 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2529
2560
|
}),
|
|
2530
2561
|
getStructuredOutputCapabilities: () => Effect6.succeed({
|
|
2531
2562
|
nativeJsonMode: true,
|
|
2532
|
-
jsonSchemaEnforcement:
|
|
2563
|
+
jsonSchemaEnforcement: true,
|
|
2533
2564
|
prefillSupport: false,
|
|
2534
2565
|
grammarConstraints: true
|
|
2535
2566
|
})
|
|
@@ -2656,6 +2687,8 @@ var GeminiProviderLive = Layer6.effect(
|
|
|
2656
2687
|
if (opts.tools?.length) {
|
|
2657
2688
|
cfg.tools = toGeminiTools([...opts.tools]);
|
|
2658
2689
|
}
|
|
2690
|
+
if (opts.responseMimeType) cfg.responseMimeType = opts.responseMimeType;
|
|
2691
|
+
if (opts.responseSchema) cfg.responseSchema = opts.responseSchema;
|
|
2659
2692
|
return cfg;
|
|
2660
2693
|
};
|
|
2661
2694
|
return LLMService.of({
|
|
@@ -2754,20 +2787,20 @@ var GeminiProviderLive = Layer6.effect(
|
|
|
2754
2787
|
});
|
|
2755
2788
|
}),
|
|
2756
2789
|
completeStructured: (request) => Effect7.gen(function* () {
|
|
2757
|
-
const
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2790
|
+
const jsonSchema = Schema5.encodedSchema(request.outputSchema);
|
|
2791
|
+
const schemaObj = JSON.parse(JSON.stringify(jsonSchema));
|
|
2792
|
+
const schemaStr = JSON.stringify(schemaObj, null, 2);
|
|
2793
|
+
const client = yield* Effect7.promise(() => getClient());
|
|
2794
|
+
let model = typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel;
|
|
2795
|
+
if (!model || model.startsWith("claude") || model.startsWith("gpt-")) {
|
|
2796
|
+
model = GEMINI_DEFAULT_MODEL;
|
|
2797
|
+
}
|
|
2762
2798
|
const messagesWithFormat = [
|
|
2763
2799
|
...request.messages,
|
|
2764
2800
|
{
|
|
2765
2801
|
role: "user",
|
|
2766
|
-
content: `
|
|
2767
|
-
|
|
2768
|
-
${schemaStr}
|
|
2769
|
-
|
|
2770
|
-
No markdown, no code fences, just raw JSON.`
|
|
2802
|
+
content: `Respond with JSON matching this schema:
|
|
2803
|
+
${schemaStr}`
|
|
2771
2804
|
}
|
|
2772
2805
|
];
|
|
2773
2806
|
let lastError = null;
|
|
@@ -2781,14 +2814,9 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2781
2814
|
},
|
|
2782
2815
|
{
|
|
2783
2816
|
role: "user",
|
|
2784
|
-
content: `That response
|
|
2817
|
+
content: `That response did not match the schema. Error: ${String(lastError)}. Please try again.`
|
|
2785
2818
|
}
|
|
2786
2819
|
];
|
|
2787
|
-
const client = yield* Effect7.promise(() => getClient());
|
|
2788
|
-
let model = typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel;
|
|
2789
|
-
if (!model || model.startsWith("claude") || model.startsWith("gpt-")) {
|
|
2790
|
-
model = GEMINI_DEFAULT_MODEL;
|
|
2791
|
-
}
|
|
2792
2820
|
const response = yield* Effect7.tryPromise({
|
|
2793
2821
|
try: () => client.models.generateContent({
|
|
2794
2822
|
model,
|
|
@@ -2796,7 +2824,9 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2796
2824
|
config: buildGeminiConfig({
|
|
2797
2825
|
maxTokens: request.maxTokens,
|
|
2798
2826
|
temperature: request.temperature,
|
|
2799
|
-
systemPrompt: request.systemPrompt
|
|
2827
|
+
systemPrompt: request.systemPrompt,
|
|
2828
|
+
responseMimeType: "application/json",
|
|
2829
|
+
responseSchema: schemaObj
|
|
2800
2830
|
})
|
|
2801
2831
|
}),
|
|
2802
2832
|
catch: toEffectError3
|
|
@@ -3375,8 +3405,147 @@ var ComplexityAnalysisSchema = Schema8.Struct({
|
|
|
3375
3405
|
});
|
|
3376
3406
|
|
|
3377
3407
|
// src/runtime.ts
|
|
3378
|
-
import { Layer as Layer9 } from "effect";
|
|
3379
|
-
|
|
3408
|
+
import { Effect as Effect12, Layer as Layer9 } from "effect";
|
|
3409
|
+
|
|
3410
|
+
// src/embedding-cache.ts
|
|
3411
|
+
import { Effect as Effect10 } from "effect";
|
|
3412
|
+
var MAX_ENTRIES = 5e3;
|
|
3413
|
+
var makeEmbeddingCache = (underlying) => {
|
|
3414
|
+
const caches = /* @__PURE__ */ new Map();
|
|
3415
|
+
const getModelCache = (model) => {
|
|
3416
|
+
let c = caches.get(model);
|
|
3417
|
+
if (!c) {
|
|
3418
|
+
c = /* @__PURE__ */ new Map();
|
|
3419
|
+
caches.set(model, c);
|
|
3420
|
+
}
|
|
3421
|
+
return c;
|
|
3422
|
+
};
|
|
3423
|
+
const evictIfNeeded = (cache) => {
|
|
3424
|
+
if (cache.size > MAX_ENTRIES) {
|
|
3425
|
+
const evictCount = Math.floor(MAX_ENTRIES * 0.2);
|
|
3426
|
+
const keys = cache.keys();
|
|
3427
|
+
for (let i = 0; i < evictCount; i++) {
|
|
3428
|
+
const next = keys.next();
|
|
3429
|
+
if (next.done) break;
|
|
3430
|
+
cache.delete(next.value);
|
|
3431
|
+
}
|
|
3432
|
+
}
|
|
3433
|
+
};
|
|
3434
|
+
return {
|
|
3435
|
+
embed: (texts, model) => Effect10.gen(function* () {
|
|
3436
|
+
const modelKey = model ?? "__default__";
|
|
3437
|
+
const cache = getModelCache(modelKey);
|
|
3438
|
+
const results = new Array(texts.length);
|
|
3439
|
+
const misses = [];
|
|
3440
|
+
for (let i = 0; i < texts.length; i++) {
|
|
3441
|
+
const hash = Bun.hash(texts[i]).toString(36);
|
|
3442
|
+
const cached = cache.get(hash);
|
|
3443
|
+
if (cached) {
|
|
3444
|
+
results[i] = cached;
|
|
3445
|
+
} else {
|
|
3446
|
+
results[i] = null;
|
|
3447
|
+
misses.push({ index: i, text: texts[i] });
|
|
3448
|
+
}
|
|
3449
|
+
}
|
|
3450
|
+
if (misses.length === 0) {
|
|
3451
|
+
return results;
|
|
3452
|
+
}
|
|
3453
|
+
const missTexts = misses.map((m) => m.text);
|
|
3454
|
+
const embeddings = yield* underlying(missTexts, model);
|
|
3455
|
+
for (let j = 0; j < misses.length; j++) {
|
|
3456
|
+
const { index: index2, text } = misses[j];
|
|
3457
|
+
const embedding = embeddings[j];
|
|
3458
|
+
const hash = Bun.hash(text).toString(36);
|
|
3459
|
+
cache.set(hash, embedding);
|
|
3460
|
+
results[index2] = embedding;
|
|
3461
|
+
}
|
|
3462
|
+
evictIfNeeded(cache);
|
|
3463
|
+
return results;
|
|
3464
|
+
}),
|
|
3465
|
+
size: () => {
|
|
3466
|
+
let total = 0;
|
|
3467
|
+
for (const c of caches.values()) total += c.size;
|
|
3468
|
+
return total;
|
|
3469
|
+
},
|
|
3470
|
+
clear: () => caches.clear()
|
|
3471
|
+
};
|
|
3472
|
+
};
|
|
3473
|
+
|
|
3474
|
+
// src/circuit-breaker.ts
|
|
3475
|
+
import { Effect as Effect11 } from "effect";
|
|
3476
|
+
var makeCircuitBreaker = (config = {}) => {
|
|
3477
|
+
const { failureThreshold, cooldownMs } = {
|
|
3478
|
+
...defaultCircuitBreakerConfig,
|
|
3479
|
+
...config
|
|
3480
|
+
};
|
|
3481
|
+
let currentState = "closed";
|
|
3482
|
+
let consecutiveFailures = 0;
|
|
3483
|
+
let openedAt = 0;
|
|
3484
|
+
const onSuccess = () => {
|
|
3485
|
+
consecutiveFailures = 0;
|
|
3486
|
+
currentState = "closed";
|
|
3487
|
+
};
|
|
3488
|
+
const onFailure = () => {
|
|
3489
|
+
consecutiveFailures++;
|
|
3490
|
+
if (consecutiveFailures >= failureThreshold) {
|
|
3491
|
+
currentState = "open";
|
|
3492
|
+
openedAt = Date.now();
|
|
3493
|
+
}
|
|
3494
|
+
};
|
|
3495
|
+
return {
|
|
3496
|
+
protect: (effect) => Effect11.gen(function* () {
|
|
3497
|
+
if (currentState === "open") {
|
|
3498
|
+
if (Date.now() - openedAt >= cooldownMs) {
|
|
3499
|
+
currentState = "half_open";
|
|
3500
|
+
} else {
|
|
3501
|
+
return yield* Effect11.fail(
|
|
3502
|
+
new LLMError({
|
|
3503
|
+
message: `Circuit breaker OPEN \u2014 ${consecutiveFailures} consecutive failures. Retry after ${Math.ceil((cooldownMs - (Date.now() - openedAt)) / 1e3)}s cooldown.`,
|
|
3504
|
+
provider: "custom",
|
|
3505
|
+
cause: void 0
|
|
3506
|
+
})
|
|
3507
|
+
);
|
|
3508
|
+
}
|
|
3509
|
+
}
|
|
3510
|
+
const result = yield* Effect11.exit(effect);
|
|
3511
|
+
if (result._tag === "Success") {
|
|
3512
|
+
onSuccess();
|
|
3513
|
+
return result.value;
|
|
3514
|
+
}
|
|
3515
|
+
onFailure();
|
|
3516
|
+
return yield* Effect11.failCause(result.cause);
|
|
3517
|
+
}),
|
|
3518
|
+
state: () => currentState,
|
|
3519
|
+
reset: () => {
|
|
3520
|
+
currentState = "closed";
|
|
3521
|
+
consecutiveFailures = 0;
|
|
3522
|
+
openedAt = 0;
|
|
3523
|
+
}
|
|
3524
|
+
};
|
|
3525
|
+
};
|
|
3526
|
+
|
|
3527
|
+
// src/runtime.ts
|
|
3528
|
+
var EmbeddingCacheLayer = Layer9.effect(
|
|
3529
|
+
LLMService,
|
|
3530
|
+
Effect12.gen(function* () {
|
|
3531
|
+
const llm = yield* LLMService;
|
|
3532
|
+
const cache = makeEmbeddingCache(llm.embed);
|
|
3533
|
+
return LLMService.of({ ...llm, embed: cache.embed });
|
|
3534
|
+
})
|
|
3535
|
+
);
|
|
3536
|
+
var makeCircuitBreakerLayer = (config) => Layer9.effect(
|
|
3537
|
+
LLMService,
|
|
3538
|
+
Effect12.gen(function* () {
|
|
3539
|
+
const llm = yield* LLMService;
|
|
3540
|
+
const breaker = makeCircuitBreaker(config);
|
|
3541
|
+
return LLMService.of({
|
|
3542
|
+
...llm,
|
|
3543
|
+
complete: (req) => breaker.protect(llm.complete(req)),
|
|
3544
|
+
stream: (req) => breaker.protect(llm.stream(req))
|
|
3545
|
+
});
|
|
3546
|
+
})
|
|
3547
|
+
);
|
|
3548
|
+
var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams, circuitBreaker) => {
|
|
3380
3549
|
if (provider === "test") {
|
|
3381
3550
|
return Layer9.mergeAll(
|
|
3382
3551
|
TestLLMServiceLayer(testResponses ?? {}),
|
|
@@ -3390,16 +3559,21 @@ var createLLMProviderLayer = (provider = "anthropic", testResponses, model, mode
|
|
|
3390
3559
|
if (modelParams?.maxTokens !== void 0) configOverrides.defaultMaxTokens = modelParams.maxTokens;
|
|
3391
3560
|
const configLayer = Object.keys(configOverrides).length > 0 ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, ...configOverrides })) : LLMConfigFromEnv;
|
|
3392
3561
|
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
|
|
3393
|
-
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3562
|
+
const baseProviderLayer = providerLayer.pipe(Layer9.provide(configLayer));
|
|
3563
|
+
let llmLayer = EmbeddingCacheLayer.pipe(Layer9.provide(baseProviderLayer));
|
|
3564
|
+
if (circuitBreaker) {
|
|
3565
|
+
llmLayer = EmbeddingCacheLayer.pipe(
|
|
3566
|
+
Layer9.provide(makeCircuitBreakerLayer(circuitBreaker).pipe(Layer9.provide(baseProviderLayer)))
|
|
3567
|
+
);
|
|
3568
|
+
}
|
|
3569
|
+
return Layer9.mergeAll(llmLayer, PromptManagerLive);
|
|
3397
3570
|
};
|
|
3398
3571
|
var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
|
|
3399
3572
|
const configLayer = Layer9.succeed(LLMConfig, config);
|
|
3400
3573
|
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
|
|
3574
|
+
const baseProviderLayer = providerLayer.pipe(Layer9.provide(configLayer));
|
|
3401
3575
|
return Layer9.mergeAll(
|
|
3402
|
-
|
|
3576
|
+
EmbeddingCacheLayer.pipe(Layer9.provide(baseProviderLayer)),
|
|
3403
3577
|
PromptManagerLive
|
|
3404
3578
|
);
|
|
3405
3579
|
};
|
|
@@ -3447,10 +3621,13 @@ export {
|
|
|
3447
3621
|
calculateCost,
|
|
3448
3622
|
createLLMProviderLayer,
|
|
3449
3623
|
createLLMProviderLayerWithConfig,
|
|
3624
|
+
defaultCircuitBreakerConfig,
|
|
3450
3625
|
estimateTokenCount,
|
|
3451
3626
|
getProviderDefaultModel,
|
|
3452
3627
|
llmConfigFromEnv,
|
|
3453
3628
|
makeCacheable,
|
|
3629
|
+
makeCircuitBreaker,
|
|
3630
|
+
makeEmbeddingCache,
|
|
3454
3631
|
retryPolicy
|
|
3455
3632
|
};
|
|
3456
3633
|
//# sourceMappingURL=index.js.map
|