@reactive-agents/llm-provider 0.7.8 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +545 -14
- package/dist/index.js +979 -169
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -1120,6 +1120,14 @@ var init_dist = __esm({
|
|
|
1120
1120
|
}
|
|
1121
1121
|
});
|
|
1122
1122
|
|
|
1123
|
+
// src/capabilities.ts
|
|
1124
|
+
var DEFAULT_CAPABILITIES = {
|
|
1125
|
+
supportsToolCalling: false,
|
|
1126
|
+
supportsStreaming: true,
|
|
1127
|
+
supportsStructuredOutput: false,
|
|
1128
|
+
supportsLogprobs: false
|
|
1129
|
+
};
|
|
1130
|
+
|
|
1123
1131
|
// src/types.ts
|
|
1124
1132
|
import { Schema } from "effect";
|
|
1125
1133
|
var LLMProviderType = Schema.Literal(
|
|
@@ -1378,7 +1386,24 @@ var CompletionResponseSchema = Schema.Struct({
|
|
|
1378
1386
|
/** Tool calls emitted by the model (if any) */
|
|
1379
1387
|
toolCalls: Schema.optional(Schema.Array(ToolCallSchema)),
|
|
1380
1388
|
/** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
|
|
1381
|
-
thinking: Schema.optional(Schema.String)
|
|
1389
|
+
thinking: Schema.optional(Schema.String),
|
|
1390
|
+
/** Token-level log probabilities (when requested via logprobs in CompletionRequest) */
|
|
1391
|
+
logprobs: Schema.optional(
|
|
1392
|
+
Schema.Array(
|
|
1393
|
+
Schema.Struct({
|
|
1394
|
+
token: Schema.String,
|
|
1395
|
+
logprob: Schema.Number,
|
|
1396
|
+
topLogprobs: Schema.optional(
|
|
1397
|
+
Schema.Array(
|
|
1398
|
+
Schema.Struct({
|
|
1399
|
+
token: Schema.String,
|
|
1400
|
+
logprob: Schema.Number
|
|
1401
|
+
})
|
|
1402
|
+
)
|
|
1403
|
+
)
|
|
1404
|
+
})
|
|
1405
|
+
)
|
|
1406
|
+
)
|
|
1382
1407
|
});
|
|
1383
1408
|
|
|
1384
1409
|
// src/errors.ts
|
|
@@ -1418,12 +1443,16 @@ var llmConfigFromEnv = LLMConfig.of({
|
|
|
1418
1443
|
provider: process.env.EMBEDDING_PROVIDER ?? "openai",
|
|
1419
1444
|
batchSize: 100
|
|
1420
1445
|
},
|
|
1421
|
-
supportsPromptCaching: (
|
|
1446
|
+
supportsPromptCaching: (() => {
|
|
1447
|
+
const m = process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514";
|
|
1448
|
+
return m.startsWith("claude") || m.startsWith("gemini") || m.startsWith("gpt");
|
|
1449
|
+
})(),
|
|
1422
1450
|
maxRetries: Number(process.env.LLM_MAX_RETRIES ?? 3),
|
|
1423
1451
|
timeoutMs: Number(process.env.LLM_TIMEOUT_MS ?? 3e4),
|
|
1424
1452
|
defaultMaxTokens: 4096,
|
|
1425
1453
|
defaultTemperature: Number(process.env.LLM_DEFAULT_TEMPERATURE ?? 0.7),
|
|
1426
|
-
observabilityVerbosity: process.env.LLM_OBSERVABILITY_VERBOSITY ?? "full"
|
|
1454
|
+
observabilityVerbosity: process.env.LLM_OBSERVABILITY_VERBOSITY ?? "full",
|
|
1455
|
+
pricingRegistry: {}
|
|
1427
1456
|
});
|
|
1428
1457
|
var LLMConfigFromEnv = Layer.succeed(LLMConfig, llmConfigFromEnv);
|
|
1429
1458
|
|
|
@@ -1463,20 +1492,84 @@ var estimateTokenCount = (messages) => Effect2.sync(() => {
|
|
|
1463
1492
|
}
|
|
1464
1493
|
return totalTokens;
|
|
1465
1494
|
});
|
|
1466
|
-
|
|
1495
|
+
function getPricing(model, registry, pricing) {
|
|
1496
|
+
if (pricing?.input !== void 0 && pricing?.output !== void 0) {
|
|
1497
|
+
return { input: pricing.input, output: pricing.output };
|
|
1498
|
+
}
|
|
1499
|
+
if (registry && registry[model]) return registry[model];
|
|
1467
1500
|
const costMap = {
|
|
1468
|
-
|
|
1501
|
+
// ── Anthropic ──
|
|
1502
|
+
"claude-3-5-haiku-20241022": { input: 0.8, output: 4 },
|
|
1503
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1469
1504
|
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1470
1505
|
"claude-sonnet-4-5-20250929": { input: 3, output: 15 },
|
|
1471
1506
|
"claude-opus-4-20250514": { input: 15, output: 75 },
|
|
1472
|
-
"
|
|
1507
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1508
|
+
"claude-3-5-sonnet-20240620": { input: 3, output: 15 },
|
|
1509
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1510
|
+
"claude-3-sonnet-20240229": { input: 3, output: 15 },
|
|
1511
|
+
// ── OpenAI ──
|
|
1473
1512
|
"gpt-4o": { input: 2.5, output: 10 },
|
|
1513
|
+
"gpt-4o-2024-11-20": { input: 2.5, output: 10 },
|
|
1514
|
+
"gpt-4o-2024-08-06": { input: 2.5, output: 10 },
|
|
1515
|
+
"gpt-4o-2024-05-13": { input: 5, output: 15 },
|
|
1516
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1517
|
+
"gpt-4o-mini-2024-07-18": { input: 0.15, output: 0.6 },
|
|
1518
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1519
|
+
"gpt-4-turbo-2024-04-09": { input: 10, output: 30 },
|
|
1520
|
+
"gpt-4": { input: 30, output: 60 },
|
|
1521
|
+
"gpt-4-0613": { input: 30, output: 60 },
|
|
1522
|
+
"gpt-3.5-turbo": { input: 0.5, output: 1.5 },
|
|
1523
|
+
"o1": { input: 15, output: 60 },
|
|
1524
|
+
"o1-mini": { input: 3, output: 12 },
|
|
1525
|
+
"o1-preview": { input: 15, output: 60 },
|
|
1526
|
+
"o3": { input: 10, output: 40 },
|
|
1527
|
+
"o3-mini": { input: 1.1, output: 4.4 },
|
|
1528
|
+
"o4-mini": { input: 1.1, output: 4.4 },
|
|
1529
|
+
// ── Google Gemini ──
|
|
1474
1530
|
"gemini-2.0-flash": { input: 0.1, output: 0.4 },
|
|
1531
|
+
"gemini-2.5-flash": { input: 0.15, output: 0.6 },
|
|
1532
|
+
"gemini-2.5-flash-preview-05-20": { input: 0.15, output: 0.6 },
|
|
1533
|
+
"gemini-2.5-pro": { input: 1.25, output: 10 },
|
|
1475
1534
|
"gemini-2.5-pro-preview-03-25": { input: 1.25, output: 10 },
|
|
1476
|
-
"gemini-
|
|
1535
|
+
"gemini-2.5-pro-preview-05-06": { input: 1.25, output: 10 },
|
|
1536
|
+
"gemini-1.5-pro": { input: 1.25, output: 5 },
|
|
1537
|
+
"gemini-1.5-flash": { input: 0.075, output: 0.3 },
|
|
1538
|
+
"gemini-embedding-001": { input: 0, output: 0 },
|
|
1539
|
+
// ── Meta Llama (via LiteLLM / cloud providers) ──
|
|
1540
|
+
"llama-3.1-405b": { input: 3, output: 3 },
|
|
1541
|
+
"llama-3.1-70b": { input: 0.88, output: 0.88 },
|
|
1542
|
+
"llama-3.1-8b": { input: 0.18, output: 0.18 },
|
|
1543
|
+
"llama-3.3-70b": { input: 0.88, output: 0.88 },
|
|
1544
|
+
// ── Mistral ──
|
|
1545
|
+
"mistral-large-latest": { input: 2, output: 6 },
|
|
1546
|
+
"mistral-small-latest": { input: 0.2, output: 0.6 },
|
|
1547
|
+
"codestral-latest": { input: 0.3, output: 0.9 }
|
|
1477
1548
|
};
|
|
1478
|
-
|
|
1479
|
-
|
|
1549
|
+
if (costMap[model]) return costMap[model];
|
|
1550
|
+
const m = model.toLowerCase();
|
|
1551
|
+
if (m.includes("haiku") || m.includes("flash") || m.includes("mini") || m.includes("small") || m.includes("8b") || m.includes("7b") || m.includes("lite")) {
|
|
1552
|
+
return { input: 0.15, output: 0.6 };
|
|
1553
|
+
}
|
|
1554
|
+
if (m.includes("opus") || m.includes("large") || m.includes("405b") || m.includes("gpt-4") && !m.includes("turbo") && !m.includes("o-") && !m.includes("mini")) {
|
|
1555
|
+
return { input: 15, output: 75 };
|
|
1556
|
+
}
|
|
1557
|
+
return { input: 3, output: 15 };
|
|
1558
|
+
}
|
|
1559
|
+
var calculateCost = (inputTokens, outputTokens, model, usage, registry, pricing) => {
|
|
1560
|
+
const costs = getPricing(model, registry, pricing);
|
|
1561
|
+
const anthropicCacheRead = usage?.cache_read_input_tokens ?? 0;
|
|
1562
|
+
const anthropicCacheWrite = usage?.cache_creation_input_tokens ?? 0;
|
|
1563
|
+
const openaiCached = usage?.cached_tokens ?? 0;
|
|
1564
|
+
const geminiCached = usage?.cached_content_token_count ?? 0;
|
|
1565
|
+
const baseInputTokens = inputTokens - anthropicCacheRead - anthropicCacheWrite - openaiCached - geminiCached;
|
|
1566
|
+
const inputCost = baseInputTokens / 1e6 * costs.input;
|
|
1567
|
+
const outputCost = outputTokens / 1e6 * costs.output;
|
|
1568
|
+
const anthropicCacheWriteCost = anthropicCacheWrite / 1e6 * costs.input * 1.25;
|
|
1569
|
+
const anthropicCacheReadCost = anthropicCacheRead / 1e6 * costs.input * 0.1;
|
|
1570
|
+
const openaiCachedCost = openaiCached / 1e6 * costs.input * 0.5;
|
|
1571
|
+
const geminiCachedCost = geminiCached / 1e6 * costs.input * 0.25;
|
|
1572
|
+
return inputCost + outputCost + anthropicCacheWriteCost + anthropicCacheReadCost + openaiCachedCost + geminiCachedCost;
|
|
1480
1573
|
};
|
|
1481
1574
|
|
|
1482
1575
|
// src/prompt-manager.ts
|
|
@@ -1614,13 +1707,14 @@ var toAnthropicMessages = (messages) => messages.filter((m) => m.role !== "syste
|
|
|
1614
1707
|
)
|
|
1615
1708
|
};
|
|
1616
1709
|
});
|
|
1617
|
-
var toAnthropicTool = (tool) => ({
|
|
1710
|
+
var toAnthropicTool = (tool, cached = false) => ({
|
|
1618
1711
|
name: tool.name,
|
|
1619
1712
|
description: tool.description,
|
|
1620
1713
|
input_schema: {
|
|
1621
1714
|
type: "object",
|
|
1622
1715
|
...tool.inputSchema
|
|
1623
|
-
}
|
|
1716
|
+
},
|
|
1717
|
+
...cached ? { cache_control: { type: "ephemeral" } } : {}
|
|
1624
1718
|
});
|
|
1625
1719
|
var toEffectError = (error, provider) => {
|
|
1626
1720
|
const err = error;
|
|
@@ -1673,11 +1767,13 @@ var AnthropicProviderLive = Layer3.effect(
|
|
|
1673
1767
|
system: buildSystemParam(request.systemPrompt),
|
|
1674
1768
|
messages: toAnthropicMessages(request.messages),
|
|
1675
1769
|
stop_sequences: request.stopSequences ? [...request.stopSequences] : void 0,
|
|
1676
|
-
tools: request.tools?.map(
|
|
1770
|
+
tools: request.tools?.map(
|
|
1771
|
+
(t, i) => toAnthropicTool(t, i === (request.tools?.length ?? 0) - 1)
|
|
1772
|
+
)
|
|
1677
1773
|
}),
|
|
1678
1774
|
catch: (error) => toEffectError(error, "anthropic")
|
|
1679
1775
|
});
|
|
1680
|
-
return mapAnthropicResponse(response, model);
|
|
1776
|
+
return mapAnthropicResponse(response, model, config.pricingRegistry);
|
|
1681
1777
|
}).pipe(
|
|
1682
1778
|
Effect4.retry(retryPolicy),
|
|
1683
1779
|
Effect4.timeout("30 seconds"),
|
|
@@ -1701,10 +1797,24 @@ var AnthropicProviderLive = Layer3.effect(
|
|
|
1701
1797
|
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
1702
1798
|
temperature: request.temperature ?? config.defaultTemperature,
|
|
1703
1799
|
system: buildSystemParam(request.systemPrompt),
|
|
1704
|
-
messages: toAnthropicMessages(request.messages)
|
|
1800
|
+
messages: toAnthropicMessages(request.messages),
|
|
1801
|
+
tools: request.tools?.map(
|
|
1802
|
+
(t, i) => toAnthropicTool(t, i === (request.tools?.length ?? 0) - 1)
|
|
1803
|
+
)
|
|
1705
1804
|
});
|
|
1706
|
-
stream.on("
|
|
1707
|
-
|
|
1805
|
+
stream.on("streamEvent", (event) => {
|
|
1806
|
+
const e = event;
|
|
1807
|
+
if (e.type === "content_block_delta") {
|
|
1808
|
+
if (e.delta?.type === "text_delta" && e.delta.text) {
|
|
1809
|
+
emit.single({ type: "text_delta", text: e.delta.text });
|
|
1810
|
+
} else if (e.delta?.type === "input_json_delta" && e.delta.partial_json) {
|
|
1811
|
+
emit.single({ type: "tool_use_delta", input: e.delta.partial_json });
|
|
1812
|
+
}
|
|
1813
|
+
} else if (e.type === "content_block_start") {
|
|
1814
|
+
if (e.content_block?.type === "tool_use" && e.content_block.id && e.content_block.name) {
|
|
1815
|
+
emit.single({ type: "tool_use_start", id: e.content_block.id, name: e.content_block.name });
|
|
1816
|
+
}
|
|
1817
|
+
}
|
|
1708
1818
|
});
|
|
1709
1819
|
stream.on("finalMessage", (message) => {
|
|
1710
1820
|
const msg = message;
|
|
@@ -1721,7 +1831,12 @@ var AnthropicProviderLive = Layer3.effect(
|
|
|
1721
1831
|
estimatedCost: calculateCost(
|
|
1722
1832
|
msg.usage.input_tokens,
|
|
1723
1833
|
msg.usage.output_tokens,
|
|
1724
|
-
model
|
|
1834
|
+
model,
|
|
1835
|
+
{
|
|
1836
|
+
cache_creation_input_tokens: msg.usage.cache_creation_input_tokens,
|
|
1837
|
+
cache_read_input_tokens: msg.usage.cache_read_input_tokens
|
|
1838
|
+
},
|
|
1839
|
+
config.pricingRegistry
|
|
1725
1840
|
)
|
|
1726
1841
|
}
|
|
1727
1842
|
});
|
|
@@ -1867,11 +1982,17 @@ No markdown, no code fences, just raw JSON.`
|
|
|
1867
1982
|
jsonSchemaEnforcement: false,
|
|
1868
1983
|
prefillSupport: true,
|
|
1869
1984
|
grammarConstraints: false
|
|
1985
|
+
}),
|
|
1986
|
+
capabilities: () => Effect4.succeed({
|
|
1987
|
+
supportsToolCalling: true,
|
|
1988
|
+
supportsStreaming: true,
|
|
1989
|
+
supportsStructuredOutput: true,
|
|
1990
|
+
supportsLogprobs: false
|
|
1870
1991
|
})
|
|
1871
1992
|
});
|
|
1872
1993
|
})
|
|
1873
1994
|
);
|
|
1874
|
-
var mapAnthropicResponse = (response, model) => {
|
|
1995
|
+
var mapAnthropicResponse = (response, model, registry) => {
|
|
1875
1996
|
const textContent = response.content.filter(
|
|
1876
1997
|
(b) => b.type === "text"
|
|
1877
1998
|
).map((b) => b.text).join("");
|
|
@@ -1893,7 +2014,12 @@ var mapAnthropicResponse = (response, model) => {
|
|
|
1893
2014
|
estimatedCost: calculateCost(
|
|
1894
2015
|
response.usage.input_tokens,
|
|
1895
2016
|
response.usage.output_tokens,
|
|
1896
|
-
model
|
|
2017
|
+
model,
|
|
2018
|
+
{
|
|
2019
|
+
cache_creation_input_tokens: response.usage.cache_creation_input_tokens,
|
|
2020
|
+
cache_read_input_tokens: response.usage.cache_read_input_tokens
|
|
2021
|
+
},
|
|
2022
|
+
registry
|
|
1897
2023
|
)
|
|
1898
2024
|
},
|
|
1899
2025
|
model: response.model ?? model,
|
|
@@ -1911,6 +2037,28 @@ var toOpenAIMessages = (messages) => messages.map((m) => {
|
|
|
1911
2037
|
content: m.content
|
|
1912
2038
|
};
|
|
1913
2039
|
}
|
|
2040
|
+
if (m.role === "assistant" && typeof m.content !== "string") {
|
|
2041
|
+
const blocks = m.content;
|
|
2042
|
+
const textParts = blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
|
|
2043
|
+
const toolUseBlocks = blocks.filter(
|
|
2044
|
+
(b) => b.type === "tool_use"
|
|
2045
|
+
);
|
|
2046
|
+
if (toolUseBlocks.length > 0) {
|
|
2047
|
+
return {
|
|
2048
|
+
role: "assistant",
|
|
2049
|
+
content: textParts || "",
|
|
2050
|
+
tool_calls: toolUseBlocks.map((tc) => ({
|
|
2051
|
+
id: tc.id,
|
|
2052
|
+
type: "function",
|
|
2053
|
+
function: {
|
|
2054
|
+
name: tc.name,
|
|
2055
|
+
arguments: typeof tc.input === "string" ? tc.input : JSON.stringify(tc.input)
|
|
2056
|
+
}
|
|
2057
|
+
}))
|
|
2058
|
+
};
|
|
2059
|
+
}
|
|
2060
|
+
return { role: "assistant", content: textParts };
|
|
2061
|
+
}
|
|
1914
2062
|
return {
|
|
1915
2063
|
role: m.role,
|
|
1916
2064
|
content: typeof m.content === "string" ? m.content : m.content.filter(
|
|
@@ -1933,12 +2081,49 @@ var toEffectError2 = (error, provider) => {
|
|
|
1933
2081
|
cause: error
|
|
1934
2082
|
});
|
|
1935
2083
|
};
|
|
1936
|
-
var
|
|
2084
|
+
var isStrictToolCallingSupported = (model) => {
|
|
2085
|
+
const m = model.toLowerCase();
|
|
2086
|
+
return m.includes("gpt-4o") && (m.includes("2024-08-06") || m.includes("2024-11-20") || !m.includes("2024-05-13")) || m.includes("gpt-4o-mini") || m.startsWith("o1") || m.startsWith("o3") || m.startsWith("o4");
|
|
2087
|
+
};
|
|
2088
|
+
var toStrictToolSchema = (schema) => {
|
|
2089
|
+
if (!schema || typeof schema !== "object") return schema;
|
|
2090
|
+
const newSchema = JSON.parse(JSON.stringify(schema));
|
|
2091
|
+
if (newSchema.type === "object" && newSchema.properties) {
|
|
2092
|
+
const originalRequired = new Set(newSchema.required ?? []);
|
|
2093
|
+
newSchema.additionalProperties = false;
|
|
2094
|
+
newSchema.required = Object.keys(newSchema.properties);
|
|
2095
|
+
for (const key of Object.keys(newSchema.properties)) {
|
|
2096
|
+
const prop = newSchema.properties[key];
|
|
2097
|
+
if (typeof prop === "object" && prop !== null) {
|
|
2098
|
+
delete prop.default;
|
|
2099
|
+
}
|
|
2100
|
+
if (!originalRequired.has(key) && prop && typeof prop === "object") {
|
|
2101
|
+
if (prop.type && prop.type !== "null" && !prop.anyOf) {
|
|
2102
|
+
prop.anyOf = [{ type: prop.type }, { type: "null" }];
|
|
2103
|
+
delete prop.type;
|
|
2104
|
+
}
|
|
2105
|
+
}
|
|
2106
|
+
if (prop.type === "object" && prop.properties) {
|
|
2107
|
+
newSchema.properties[key] = toStrictToolSchema(prop);
|
|
2108
|
+
} else if (prop.anyOf) {
|
|
2109
|
+
prop.anyOf = prop.anyOf.map(
|
|
2110
|
+
(variant) => variant && variant.type === "object" ? { ...variant, additionalProperties: false } : variant
|
|
2111
|
+
);
|
|
2112
|
+
}
|
|
2113
|
+
if (prop.type === "array" && prop.items && prop.items.type === "object") {
|
|
2114
|
+
newSchema.properties[key].items = toStrictToolSchema(prop.items);
|
|
2115
|
+
}
|
|
2116
|
+
}
|
|
2117
|
+
}
|
|
2118
|
+
return newSchema;
|
|
2119
|
+
};
|
|
2120
|
+
var toOpenAITool = (tool, strict) => ({
|
|
1937
2121
|
type: "function",
|
|
1938
2122
|
function: {
|
|
1939
2123
|
name: tool.name,
|
|
1940
2124
|
description: tool.description,
|
|
1941
|
-
parameters: tool.inputSchema
|
|
2125
|
+
parameters: strict ? toStrictToolSchema(tool.inputSchema) : tool.inputSchema,
|
|
2126
|
+
strict: strict || void 0
|
|
1942
2127
|
}
|
|
1943
2128
|
});
|
|
1944
2129
|
var OpenAIProviderLive = Layer4.effect(
|
|
@@ -1970,14 +2155,21 @@ var OpenAIProviderLive = Layer4.effect(
|
|
|
1970
2155
|
messages,
|
|
1971
2156
|
stop: request.stopSequences ? [...request.stopSequences] : void 0
|
|
1972
2157
|
};
|
|
2158
|
+
if (request.logprobs) {
|
|
2159
|
+
requestBody.logprobs = true;
|
|
2160
|
+
if (request.topLogprobs != null) {
|
|
2161
|
+
requestBody.top_logprobs = request.topLogprobs;
|
|
2162
|
+
}
|
|
2163
|
+
}
|
|
1973
2164
|
if (request.tools && request.tools.length > 0) {
|
|
1974
|
-
|
|
2165
|
+
const strict = isStrictToolCallingSupported(model);
|
|
2166
|
+
requestBody.tools = request.tools.map((t) => toOpenAITool(t, strict));
|
|
1975
2167
|
}
|
|
1976
2168
|
const response = yield* Effect5.tryPromise({
|
|
1977
2169
|
try: () => client.chat.completions.create(requestBody),
|
|
1978
2170
|
catch: (error) => toEffectError2(error, "openai")
|
|
1979
2171
|
});
|
|
1980
|
-
return mapOpenAIResponse(response, model);
|
|
2172
|
+
return mapOpenAIResponse(response, model, config.pricingRegistry);
|
|
1981
2173
|
}).pipe(
|
|
1982
2174
|
Effect5.retry(retryPolicy),
|
|
1983
2175
|
Effect5.timeout("30 seconds"),
|
|
@@ -2009,38 +2201,71 @@ var OpenAIProviderLive = Layer4.effect(
|
|
|
2009
2201
|
}
|
|
2010
2202
|
return msgs;
|
|
2011
2203
|
})(),
|
|
2012
|
-
|
|
2204
|
+
tools: request.tools && request.tools.length > 0 ? request.tools.map((t) => toOpenAITool(t, isStrictToolCallingSupported(model))) : void 0,
|
|
2205
|
+
stream: true,
|
|
2206
|
+
stream_options: { include_usage: true }
|
|
2013
2207
|
});
|
|
2014
2208
|
let fullContent = "";
|
|
2209
|
+
const toolCallAccum = /* @__PURE__ */ new Map();
|
|
2210
|
+
let finalUsage;
|
|
2015
2211
|
for await (const chunk of stream) {
|
|
2016
2212
|
const delta = chunk.choices[0]?.delta?.content;
|
|
2017
2213
|
if (delta) {
|
|
2018
2214
|
fullContent += delta;
|
|
2019
2215
|
emit.single({ type: "text_delta", text: delta });
|
|
2020
2216
|
}
|
|
2217
|
+
const toolDeltas = chunk.choices[0]?.delta?.tool_calls;
|
|
2218
|
+
if (toolDeltas) {
|
|
2219
|
+
for (const tc of toolDeltas) {
|
|
2220
|
+
const existing = toolCallAccum.get(tc.index);
|
|
2221
|
+
if (existing) {
|
|
2222
|
+
if (tc.function?.arguments) existing.arguments += tc.function.arguments;
|
|
2223
|
+
} else {
|
|
2224
|
+
toolCallAccum.set(tc.index, {
|
|
2225
|
+
id: tc.id ?? "",
|
|
2226
|
+
name: tc.function?.name ?? "",
|
|
2227
|
+
arguments: tc.function?.arguments ?? ""
|
|
2228
|
+
});
|
|
2229
|
+
if (tc.id && tc.function?.name) {
|
|
2230
|
+
emit.single({ type: "tool_use_start", id: tc.id, name: tc.function.name });
|
|
2231
|
+
}
|
|
2232
|
+
}
|
|
2233
|
+
if (tc.function?.arguments) {
|
|
2234
|
+
emit.single({ type: "tool_use_delta", input: tc.function.arguments });
|
|
2235
|
+
}
|
|
2236
|
+
}
|
|
2237
|
+
}
|
|
2238
|
+
if (chunk.usage) {
|
|
2239
|
+
finalUsage = chunk.usage;
|
|
2240
|
+
}
|
|
2021
2241
|
if (chunk.choices[0]?.finish_reason) {
|
|
2022
2242
|
emit.single({
|
|
2023
2243
|
type: "content_complete",
|
|
2024
2244
|
content: fullContent
|
|
2025
2245
|
});
|
|
2026
|
-
const inputTokens = chunk.usage?.prompt_tokens ?? 0;
|
|
2027
|
-
const outputTokens = chunk.usage?.completion_tokens ?? 0;
|
|
2028
|
-
emit.single({
|
|
2029
|
-
type: "usage",
|
|
2030
|
-
usage: {
|
|
2031
|
-
inputTokens,
|
|
2032
|
-
outputTokens,
|
|
2033
|
-
totalTokens: inputTokens + outputTokens,
|
|
2034
|
-
estimatedCost: calculateCost(
|
|
2035
|
-
inputTokens,
|
|
2036
|
-
outputTokens,
|
|
2037
|
-
model
|
|
2038
|
-
)
|
|
2039
|
-
}
|
|
2040
|
-
});
|
|
2041
|
-
emit.end();
|
|
2042
2246
|
}
|
|
2043
2247
|
}
|
|
2248
|
+
const inputTokens = finalUsage?.prompt_tokens ?? 0;
|
|
2249
|
+
const outputTokens = finalUsage?.completion_tokens ?? 0;
|
|
2250
|
+
const cacheUsage = {
|
|
2251
|
+
cached_tokens: finalUsage?.prompt_tokens_details?.cached_tokens
|
|
2252
|
+
};
|
|
2253
|
+
emit.single({
|
|
2254
|
+
type: "usage",
|
|
2255
|
+
usage: {
|
|
2256
|
+
inputTokens,
|
|
2257
|
+
outputTokens,
|
|
2258
|
+
totalTokens: inputTokens + outputTokens,
|
|
2259
|
+
estimatedCost: calculateCost(
|
|
2260
|
+
inputTokens,
|
|
2261
|
+
outputTokens,
|
|
2262
|
+
model,
|
|
2263
|
+
cacheUsage,
|
|
2264
|
+
config.pricingRegistry
|
|
2265
|
+
)
|
|
2266
|
+
}
|
|
2267
|
+
});
|
|
2268
|
+
emit.end();
|
|
2044
2269
|
} catch (error) {
|
|
2045
2270
|
const err = error;
|
|
2046
2271
|
emit.fail(
|
|
@@ -2105,7 +2330,8 @@ ${schemaStr}`
|
|
|
2105
2330
|
});
|
|
2106
2331
|
const response = mapOpenAIResponse(
|
|
2107
2332
|
completeResult,
|
|
2108
|
-
model
|
|
2333
|
+
model,
|
|
2334
|
+
config.pricingRegistry
|
|
2109
2335
|
);
|
|
2110
2336
|
try {
|
|
2111
2337
|
const parsed = JSON.parse(response.content);
|
|
@@ -2167,11 +2393,17 @@ ${schemaStr}`
|
|
|
2167
2393
|
jsonSchemaEnforcement: true,
|
|
2168
2394
|
prefillSupport: false,
|
|
2169
2395
|
grammarConstraints: false
|
|
2396
|
+
}),
|
|
2397
|
+
capabilities: () => Effect5.succeed({
|
|
2398
|
+
supportsToolCalling: true,
|
|
2399
|
+
supportsStreaming: true,
|
|
2400
|
+
supportsStructuredOutput: true,
|
|
2401
|
+
supportsLogprobs: true
|
|
2170
2402
|
})
|
|
2171
2403
|
});
|
|
2172
2404
|
})
|
|
2173
2405
|
);
|
|
2174
|
-
var mapOpenAIResponse = (response, model) => {
|
|
2406
|
+
var mapOpenAIResponse = (response, model, registry) => {
|
|
2175
2407
|
const message = response.choices[0]?.message;
|
|
2176
2408
|
const content = message?.content ?? "";
|
|
2177
2409
|
const rawToolCalls = message?.tool_calls;
|
|
@@ -2190,6 +2422,17 @@ var mapOpenAIResponse = (response, model) => {
|
|
|
2190
2422
|
input
|
|
2191
2423
|
};
|
|
2192
2424
|
}) : void 0;
|
|
2425
|
+
const rawLogprobs = response.choices[0]?.logprobs?.content;
|
|
2426
|
+
const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
|
|
2427
|
+
token: lp.token,
|
|
2428
|
+
logprob: lp.logprob,
|
|
2429
|
+
...lp.top_logprobs ? {
|
|
2430
|
+
topLogprobs: lp.top_logprobs.map((tlp) => ({
|
|
2431
|
+
token: tlp.token,
|
|
2432
|
+
logprob: tlp.logprob
|
|
2433
|
+
}))
|
|
2434
|
+
} : {}
|
|
2435
|
+
})) : void 0;
|
|
2193
2436
|
return {
|
|
2194
2437
|
content,
|
|
2195
2438
|
stopReason,
|
|
@@ -2200,11 +2443,16 @@ var mapOpenAIResponse = (response, model) => {
|
|
|
2200
2443
|
estimatedCost: calculateCost(
|
|
2201
2444
|
response.usage?.prompt_tokens ?? 0,
|
|
2202
2445
|
response.usage?.completion_tokens ?? 0,
|
|
2203
|
-
model
|
|
2446
|
+
model,
|
|
2447
|
+
{
|
|
2448
|
+
cached_tokens: response.usage?.prompt_tokens_details?.cached_tokens
|
|
2449
|
+
},
|
|
2450
|
+
registry
|
|
2204
2451
|
)
|
|
2205
2452
|
},
|
|
2206
2453
|
model: response.model ?? model,
|
|
2207
|
-
toolCalls
|
|
2454
|
+
toolCalls,
|
|
2455
|
+
...logprobs ? { logprobs } : {}
|
|
2208
2456
|
};
|
|
2209
2457
|
};
|
|
2210
2458
|
|
|
@@ -2216,7 +2464,7 @@ var PROVIDER_DEFAULT_MODELS = {
|
|
|
2216
2464
|
anthropic: "claude-sonnet-4-20250514",
|
|
2217
2465
|
openai: "gpt-4o",
|
|
2218
2466
|
ollama: "cogito:14b",
|
|
2219
|
-
gemini: "gemini-2.
|
|
2467
|
+
gemini: "gemini-2.5-flash",
|
|
2220
2468
|
litellm: "gpt-4o",
|
|
2221
2469
|
test: "test-model"
|
|
2222
2470
|
};
|
|
@@ -2349,7 +2597,9 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2349
2597
|
options: {
|
|
2350
2598
|
temperature: request.temperature ?? config.defaultTemperature,
|
|
2351
2599
|
num_predict: request.maxTokens ?? config.defaultMaxTokens,
|
|
2352
|
-
stop: request.stopSequences ? [...request.stopSequences] : void 0
|
|
2600
|
+
stop: request.stopSequences ? [...request.stopSequences] : void 0,
|
|
2601
|
+
...request.logprobs ? { logprobs: true } : {},
|
|
2602
|
+
...request.topLogprobs != null ? { top_logprobs: request.topLogprobs } : {}
|
|
2353
2603
|
}
|
|
2354
2604
|
});
|
|
2355
2605
|
},
|
|
@@ -2363,6 +2613,17 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2363
2613
|
response.message?.tool_calls
|
|
2364
2614
|
);
|
|
2365
2615
|
const hasToolCalls = toolCalls && toolCalls.length > 0;
|
|
2616
|
+
const rawLogprobs = response.logprobs;
|
|
2617
|
+
const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
|
|
2618
|
+
token: lp.token,
|
|
2619
|
+
logprob: lp.logprob,
|
|
2620
|
+
...lp.top_logprobs ? {
|
|
2621
|
+
topLogprobs: lp.top_logprobs.map((tlp) => ({
|
|
2622
|
+
token: tlp.token,
|
|
2623
|
+
logprob: tlp.logprob
|
|
2624
|
+
}))
|
|
2625
|
+
} : {}
|
|
2626
|
+
})) : void 0;
|
|
2366
2627
|
return {
|
|
2367
2628
|
content,
|
|
2368
2629
|
stopReason: hasToolCalls ? "tool_use" : response.done_reason === "stop" ? "end_turn" : response.done_reason === "length" ? "max_tokens" : "end_turn",
|
|
@@ -2375,7 +2636,8 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2375
2636
|
},
|
|
2376
2637
|
model: response.model ?? model,
|
|
2377
2638
|
toolCalls,
|
|
2378
|
-
...thinkingContent ? { thinking: thinkingContent } : {}
|
|
2639
|
+
...thinkingContent ? { thinking: thinkingContent } : {},
|
|
2640
|
+
...logprobs ? { logprobs } : {}
|
|
2379
2641
|
};
|
|
2380
2642
|
}).pipe(
|
|
2381
2643
|
Effect6.retry(retryPolicy),
|
|
@@ -2409,6 +2671,7 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2409
2671
|
model,
|
|
2410
2672
|
config.thinking
|
|
2411
2673
|
);
|
|
2674
|
+
const wantLogprobs = request.logprobs ?? false;
|
|
2412
2675
|
const stream = await client.chat({
|
|
2413
2676
|
model,
|
|
2414
2677
|
messages: msgs,
|
|
@@ -2418,10 +2681,13 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2418
2681
|
keep_alive: "5m",
|
|
2419
2682
|
options: {
|
|
2420
2683
|
temperature: request.temperature ?? config.defaultTemperature,
|
|
2421
|
-
num_predict: request.maxTokens ?? config.defaultMaxTokens
|
|
2684
|
+
num_predict: request.maxTokens ?? config.defaultMaxTokens,
|
|
2685
|
+
...wantLogprobs ? { logprobs: true } : {}
|
|
2422
2686
|
}
|
|
2423
2687
|
});
|
|
2424
2688
|
let fullContent = "";
|
|
2689
|
+
const accumulatedLogprobs = [];
|
|
2690
|
+
const accumulatedToolCalls = [];
|
|
2425
2691
|
for await (const chunk of stream) {
|
|
2426
2692
|
if (chunk.message?.content) {
|
|
2427
2693
|
fullContent += chunk.message.content;
|
|
@@ -2430,11 +2696,53 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2430
2696
|
text: chunk.message.content
|
|
2431
2697
|
});
|
|
2432
2698
|
}
|
|
2699
|
+
if (chunk.message?.tool_calls && Array.isArray(chunk.message.tool_calls)) {
|
|
2700
|
+
for (const tc of chunk.message.tool_calls) {
|
|
2701
|
+
const toolCall = {
|
|
2702
|
+
id: `ollama-tc-${Date.now()}-${accumulatedToolCalls.length}`,
|
|
2703
|
+
name: tc.function.name,
|
|
2704
|
+
input: tc.function.arguments
|
|
2705
|
+
};
|
|
2706
|
+
accumulatedToolCalls.push(toolCall);
|
|
2707
|
+
emit.single({
|
|
2708
|
+
type: "tool_use_start",
|
|
2709
|
+
id: toolCall.id,
|
|
2710
|
+
name: toolCall.name
|
|
2711
|
+
});
|
|
2712
|
+
emit.single({
|
|
2713
|
+
type: "tool_use_delta",
|
|
2714
|
+
input: JSON.stringify(tc.function.arguments)
|
|
2715
|
+
});
|
|
2716
|
+
}
|
|
2717
|
+
}
|
|
2718
|
+
if (wantLogprobs) {
|
|
2719
|
+
const chunkLp = chunk.logprobs;
|
|
2720
|
+
if (Array.isArray(chunkLp)) {
|
|
2721
|
+
for (const lp of chunkLp) {
|
|
2722
|
+
accumulatedLogprobs.push({
|
|
2723
|
+
token: lp.token,
|
|
2724
|
+
logprob: lp.logprob,
|
|
2725
|
+
...lp.top_logprobs ? { topLogprobs: lp.top_logprobs.map((t) => ({ token: t.token, logprob: t.logprob })) } : {}
|
|
2726
|
+
});
|
|
2727
|
+
}
|
|
2728
|
+
}
|
|
2729
|
+
}
|
|
2433
2730
|
if (chunk.done) {
|
|
2731
|
+
const hasToolCalls = accumulatedToolCalls.length > 0;
|
|
2732
|
+
const doneReason = chunk.done_reason;
|
|
2434
2733
|
emit.single({
|
|
2435
2734
|
type: "content_complete",
|
|
2436
|
-
content: fullContent
|
|
2735
|
+
content: fullContent,
|
|
2736
|
+
...hasToolCalls ? { stopReason: "tool_use" } : {
|
|
2737
|
+
stopReason: doneReason === "stop" ? "end_turn" : doneReason === "length" ? "max_tokens" : "end_turn"
|
|
2738
|
+
}
|
|
2437
2739
|
});
|
|
2740
|
+
if (accumulatedLogprobs.length > 0) {
|
|
2741
|
+
emit.single({
|
|
2742
|
+
type: "logprobs",
|
|
2743
|
+
logprobs: accumulatedLogprobs
|
|
2744
|
+
});
|
|
2745
|
+
}
|
|
2438
2746
|
emit.single({
|
|
2439
2747
|
type: "usage",
|
|
2440
2748
|
usage: {
|
|
@@ -2563,6 +2871,12 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2563
2871
|
jsonSchemaEnforcement: true,
|
|
2564
2872
|
prefillSupport: false,
|
|
2565
2873
|
grammarConstraints: true
|
|
2874
|
+
}),
|
|
2875
|
+
capabilities: () => Effect6.succeed({
|
|
2876
|
+
supportsToolCalling: true,
|
|
2877
|
+
supportsStreaming: true,
|
|
2878
|
+
supportsStructuredOutput: true,
|
|
2879
|
+
supportsLogprobs: false
|
|
2566
2880
|
})
|
|
2567
2881
|
});
|
|
2568
2882
|
})
|
|
@@ -2579,7 +2893,7 @@ var toGeminiContents = (messages) => {
|
|
|
2579
2893
|
role: "user",
|
|
2580
2894
|
parts: [{
|
|
2581
2895
|
functionResponse: {
|
|
2582
|
-
name: "
|
|
2896
|
+
name: msg.toolName ?? "unknown_tool",
|
|
2583
2897
|
response: { content: msg.content }
|
|
2584
2898
|
}
|
|
2585
2899
|
}]
|
|
@@ -2601,7 +2915,7 @@ var toGeminiContents = (messages) => {
|
|
|
2601
2915
|
} else if (block.type === "tool_result") {
|
|
2602
2916
|
parts.push({
|
|
2603
2917
|
functionResponse: {
|
|
2604
|
-
name: "
|
|
2918
|
+
name: block.name ?? "unknown_tool",
|
|
2605
2919
|
response: { content: block.content }
|
|
2606
2920
|
}
|
|
2607
2921
|
});
|
|
@@ -2643,7 +2957,7 @@ var toEffectError3 = (error) => {
|
|
|
2643
2957
|
cause: error
|
|
2644
2958
|
});
|
|
2645
2959
|
};
|
|
2646
|
-
var mapGeminiResponse = (response, model) => {
|
|
2960
|
+
var mapGeminiResponse = (response, model, registry) => {
|
|
2647
2961
|
const toolCalls = response.functionCalls?.map((fc, i) => ({
|
|
2648
2962
|
id: `call_${i}`,
|
|
2649
2963
|
name: fc.name,
|
|
@@ -2658,7 +2972,15 @@ var mapGeminiResponse = (response, model) => {
|
|
|
2658
2972
|
inputTokens,
|
|
2659
2973
|
outputTokens,
|
|
2660
2974
|
totalTokens: inputTokens + outputTokens,
|
|
2661
|
-
estimatedCost: calculateCost(
|
|
2975
|
+
estimatedCost: calculateCost(
|
|
2976
|
+
inputTokens,
|
|
2977
|
+
outputTokens,
|
|
2978
|
+
model,
|
|
2979
|
+
{
|
|
2980
|
+
cached_content_token_count: response.usageMetadata?.cachedContentTokenCount
|
|
2981
|
+
},
|
|
2982
|
+
registry
|
|
2983
|
+
)
|
|
2662
2984
|
},
|
|
2663
2985
|
model,
|
|
2664
2986
|
toolCalls: toolCalls?.length ? toolCalls : void 0
|
|
@@ -2714,7 +3036,7 @@ var GeminiProviderLive = Layer6.effect(
|
|
|
2714
3036
|
}),
|
|
2715
3037
|
catch: toEffectError3
|
|
2716
3038
|
});
|
|
2717
|
-
return mapGeminiResponse(response, model);
|
|
3039
|
+
return mapGeminiResponse(response, model, config.pricingRegistry);
|
|
2718
3040
|
}).pipe(
|
|
2719
3041
|
Effect7.retry(retryPolicy),
|
|
2720
3042
|
Effect7.timeout("30 seconds"),
|
|
@@ -2746,30 +3068,56 @@ var GeminiProviderLive = Layer6.effect(
|
|
|
2746
3068
|
config: buildGeminiConfig({
|
|
2747
3069
|
maxTokens: request.maxTokens,
|
|
2748
3070
|
temperature: request.temperature,
|
|
2749
|
-
systemPrompt
|
|
3071
|
+
systemPrompt,
|
|
3072
|
+
tools: request.tools
|
|
2750
3073
|
})
|
|
2751
3074
|
});
|
|
2752
3075
|
let fullContent = "";
|
|
2753
3076
|
let inputTokens = 0;
|
|
2754
3077
|
let outputTokens = 0;
|
|
3078
|
+
let cachedContentTokens = 0;
|
|
3079
|
+
const accumulatedToolCalls = [];
|
|
2755
3080
|
for await (const chunk of stream) {
|
|
2756
3081
|
if (chunk.text) {
|
|
2757
3082
|
emit.single({ type: "text_delta", text: chunk.text });
|
|
2758
3083
|
fullContent += chunk.text;
|
|
2759
3084
|
}
|
|
3085
|
+
const fcs = chunk.functionCalls;
|
|
3086
|
+
if (fcs && fcs.length > 0) {
|
|
3087
|
+
for (const fc of fcs) {
|
|
3088
|
+
const tcId = `gemini-tc-${Date.now()}-${accumulatedToolCalls.length}`;
|
|
3089
|
+
accumulatedToolCalls.push({ id: tcId, name: fc.name, input: fc.args });
|
|
3090
|
+
emit.single({ type: "tool_use_start", id: tcId, name: fc.name });
|
|
3091
|
+
emit.single({ type: "tool_use_delta", input: JSON.stringify(fc.args) });
|
|
3092
|
+
}
|
|
3093
|
+
}
|
|
2760
3094
|
if (chunk.usageMetadata) {
|
|
2761
3095
|
inputTokens = chunk.usageMetadata.promptTokenCount ?? 0;
|
|
2762
3096
|
outputTokens = chunk.usageMetadata.candidatesTokenCount ?? 0;
|
|
3097
|
+
cachedContentTokens = chunk.usageMetadata.cachedContentTokenCount ?? 0;
|
|
2763
3098
|
}
|
|
2764
3099
|
}
|
|
2765
|
-
|
|
3100
|
+
const hasToolCalls = accumulatedToolCalls.length > 0;
|
|
3101
|
+
emit.single({
|
|
3102
|
+
type: "content_complete",
|
|
3103
|
+
content: fullContent,
|
|
3104
|
+
...hasToolCalls ? { stopReason: "tool_use", toolCalls: accumulatedToolCalls } : {}
|
|
3105
|
+
});
|
|
2766
3106
|
emit.single({
|
|
2767
3107
|
type: "usage",
|
|
2768
3108
|
usage: {
|
|
2769
3109
|
inputTokens,
|
|
2770
3110
|
outputTokens,
|
|
2771
3111
|
totalTokens: inputTokens + outputTokens,
|
|
2772
|
-
estimatedCost: calculateCost(
|
|
3112
|
+
estimatedCost: calculateCost(
|
|
3113
|
+
inputTokens,
|
|
3114
|
+
outputTokens,
|
|
3115
|
+
model,
|
|
3116
|
+
{
|
|
3117
|
+
cached_content_token_count: cachedContentTokens || void 0
|
|
3118
|
+
},
|
|
3119
|
+
config.pricingRegistry
|
|
3120
|
+
)
|
|
2773
3121
|
}
|
|
2774
3122
|
});
|
|
2775
3123
|
emit.end();
|
|
@@ -2831,7 +3179,7 @@ ${schemaStr}`
|
|
|
2831
3179
|
}),
|
|
2832
3180
|
catch: toEffectError3
|
|
2833
3181
|
});
|
|
2834
|
-
const mapped = mapGeminiResponse(response, model);
|
|
3182
|
+
const mapped = mapGeminiResponse(response, model, config.pricingRegistry);
|
|
2835
3183
|
try {
|
|
2836
3184
|
const parsed = JSON.parse(mapped.content);
|
|
2837
3185
|
const decoded = Schema5.decodeUnknownEither(
|
|
@@ -2884,6 +3232,12 @@ ${schemaStr}`
|
|
|
2884
3232
|
jsonSchemaEnforcement: false,
|
|
2885
3233
|
prefillSupport: false,
|
|
2886
3234
|
grammarConstraints: false
|
|
3235
|
+
}),
|
|
3236
|
+
capabilities: () => Effect7.succeed({
|
|
3237
|
+
supportsToolCalling: true,
|
|
3238
|
+
supportsStreaming: true,
|
|
3239
|
+
supportsStructuredOutput: true,
|
|
3240
|
+
supportsLogprobs: false
|
|
2887
3241
|
})
|
|
2888
3242
|
});
|
|
2889
3243
|
})
|
|
@@ -2929,7 +3283,7 @@ var toLiteLLMTool = (tool) => ({
|
|
|
2929
3283
|
parameters: tool.inputSchema
|
|
2930
3284
|
}
|
|
2931
3285
|
});
|
|
2932
|
-
var mapLiteLLMResponse = (response, model) => {
|
|
3286
|
+
var mapLiteLLMResponse = (response, model, registry) => {
|
|
2933
3287
|
const message = response.choices[0]?.message;
|
|
2934
3288
|
const content = message?.content ?? "";
|
|
2935
3289
|
const rawToolCalls = message?.tool_calls;
|
|
@@ -2954,7 +3308,14 @@ var mapLiteLLMResponse = (response, model) => {
|
|
|
2954
3308
|
estimatedCost: calculateCost(
|
|
2955
3309
|
response.usage?.prompt_tokens ?? 0,
|
|
2956
3310
|
response.usage?.completion_tokens ?? 0,
|
|
2957
|
-
model
|
|
3311
|
+
model,
|
|
3312
|
+
void 0,
|
|
3313
|
+
registry,
|
|
3314
|
+
// Prioritize costs returned directly from the proxy if available
|
|
3315
|
+
response.usage?.input_cost !== void 0 && response.usage?.output_cost !== void 0 ? {
|
|
3316
|
+
input: response.usage.input_cost / (response.usage.prompt_tokens || 1) * 1e6,
|
|
3317
|
+
output: response.usage.output_cost / (response.usage.completion_tokens || 1) * 1e6
|
|
3318
|
+
} : void 0
|
|
2958
3319
|
)
|
|
2959
3320
|
},
|
|
2960
3321
|
model: response.model ?? model,
|
|
@@ -3008,7 +3369,11 @@ var LiteLLMProviderLive = Layer7.effect(
|
|
|
3008
3369
|
try: () => liteLLMFetch(baseURL, "/chat/completions", requestBody, apiKey),
|
|
3009
3370
|
catch: (error) => toEffectError4(error)
|
|
3010
3371
|
});
|
|
3011
|
-
return mapLiteLLMResponse(
|
|
3372
|
+
return mapLiteLLMResponse(
|
|
3373
|
+
response,
|
|
3374
|
+
model,
|
|
3375
|
+
config.pricingRegistry
|
|
3376
|
+
);
|
|
3012
3377
|
}).pipe(
|
|
3013
3378
|
Effect8.retry(retryPolicy),
|
|
3014
3379
|
Effect8.timeout("30 seconds"),
|
|
@@ -3094,7 +3459,9 @@ var LiteLLMProviderLive = Layer7.effect(
|
|
|
3094
3459
|
estimatedCost: calculateCost(
|
|
3095
3460
|
inputTokens,
|
|
3096
3461
|
outputTokens,
|
|
3097
|
-
model
|
|
3462
|
+
model,
|
|
3463
|
+
void 0,
|
|
3464
|
+
config.pricingRegistry
|
|
3098
3465
|
)
|
|
3099
3466
|
}
|
|
3100
3467
|
});
|
|
@@ -3165,7 +3532,8 @@ No markdown, no code fences, just raw JSON.`
|
|
|
3165
3532
|
});
|
|
3166
3533
|
const response = mapLiteLLMResponse(
|
|
3167
3534
|
completeResult,
|
|
3168
|
-
model
|
|
3535
|
+
model,
|
|
3536
|
+
config.pricingRegistry
|
|
3169
3537
|
);
|
|
3170
3538
|
try {
|
|
3171
3539
|
const parsed = JSON.parse(response.content);
|
|
@@ -3229,6 +3597,12 @@ No markdown, no code fences, just raw JSON.`
|
|
|
3229
3597
|
jsonSchemaEnforcement: false,
|
|
3230
3598
|
prefillSupport: false,
|
|
3231
3599
|
grammarConstraints: false
|
|
3600
|
+
}),
|
|
3601
|
+
capabilities: () => Effect8.succeed({
|
|
3602
|
+
supportsToolCalling: true,
|
|
3603
|
+
supportsStreaming: true,
|
|
3604
|
+
supportsStructuredOutput: true,
|
|
3605
|
+
supportsLogprobs: false
|
|
3232
3606
|
})
|
|
3233
3607
|
});
|
|
3234
3608
|
})
|
|
@@ -3236,109 +3610,217 @@ No markdown, no code fences, just raw JSON.`
|
|
|
3236
3610
|
|
|
3237
3611
|
// src/testing.ts
|
|
3238
3612
|
import { Effect as Effect9, Layer as Layer8, Stream as Stream6, Schema as Schema7 } from "effect";
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
|
|
3242
|
-
|
|
3243
|
-
|
|
3244
|
-
|
|
3245
|
-
|
|
3246
|
-
|
|
3613
|
+
function fakeUsage(inputLen, outputLen) {
|
|
3614
|
+
return {
|
|
3615
|
+
inputTokens: Math.ceil(inputLen / 4),
|
|
3616
|
+
outputTokens: Math.ceil(outputLen / 4),
|
|
3617
|
+
totalTokens: Math.ceil(inputLen / 4) + Math.ceil(outputLen / 4),
|
|
3618
|
+
estimatedCost: 0
|
|
3619
|
+
};
|
|
3620
|
+
}
|
|
3621
|
+
function extractSearchText(messages, request) {
|
|
3622
|
+
const lastMessage = messages[messages.length - 1];
|
|
3623
|
+
const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
|
|
3624
|
+
const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
|
|
3625
|
+
return `${content} ${systemPrompt}`.trim();
|
|
3626
|
+
}
|
|
3627
|
+
function resolveTurn(scenario, callIndex, searchText) {
|
|
3628
|
+
for (let i = callIndex.value; i < scenario.length; i++) {
|
|
3629
|
+
const turn = scenario[i];
|
|
3630
|
+
const guard = turn.match;
|
|
3631
|
+
if (!guard || new RegExp(guard, "i").test(searchText)) {
|
|
3632
|
+
callIndex.value = Math.min(i + 1, scenario.length - 1);
|
|
3633
|
+
return { turn, matchedIndex: i };
|
|
3634
|
+
}
|
|
3635
|
+
}
|
|
3636
|
+
return { turn: scenario[scenario.length - 1], matchedIndex: scenario.length - 1 };
|
|
3637
|
+
}
|
|
3638
|
+
function buildToolCalls(specs, matchedIndex) {
|
|
3639
|
+
return specs.map((spec, i) => ({
|
|
3640
|
+
id: spec.id ?? `call-${matchedIndex}-${i}`,
|
|
3641
|
+
name: spec.name,
|
|
3642
|
+
input: spec.args
|
|
3643
|
+
}));
|
|
3644
|
+
}
|
|
3645
|
+
var TestLLMService = (scenario) => {
|
|
3646
|
+
const callIndex = { value: 0 };
|
|
3647
|
+
return {
|
|
3648
|
+
complete: (request) => Effect9.gen(function* () {
|
|
3649
|
+
const searchText = extractSearchText(request.messages, request);
|
|
3650
|
+
const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
|
|
3651
|
+
if ("error" in turn) {
|
|
3652
|
+
throw new Error(turn.error);
|
|
3653
|
+
}
|
|
3654
|
+
if ("toolCall" in turn) {
|
|
3247
3655
|
return {
|
|
3248
|
-
content:
|
|
3249
|
-
stopReason: "
|
|
3250
|
-
usage:
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
totalTokens: Math.ceil(content.length / 4) + Math.ceil(response.length / 4),
|
|
3254
|
-
estimatedCost: 0
|
|
3255
|
-
},
|
|
3256
|
-
model: "test-model"
|
|
3656
|
+
content: "",
|
|
3657
|
+
stopReason: "tool_use",
|
|
3658
|
+
usage: fakeUsage(searchText.length, 0),
|
|
3659
|
+
model: "test-model",
|
|
3660
|
+
toolCalls: buildToolCalls([turn.toolCall], matchedIndex)
|
|
3257
3661
|
};
|
|
3258
3662
|
}
|
|
3259
|
-
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
estimatedCost: 0
|
|
3268
|
-
},
|
|
3269
|
-
model: "test-model"
|
|
3270
|
-
};
|
|
3271
|
-
}),
|
|
3272
|
-
stream: (request) => {
|
|
3273
|
-
const lastMessage = request.messages[request.messages.length - 1];
|
|
3274
|
-
const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
|
|
3275
|
-
const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
|
|
3276
|
-
const searchText = `${content} ${systemPrompt}`;
|
|
3277
|
-
let matchedResponse = "Test response";
|
|
3278
|
-
for (const [pattern, response] of Object.entries(responses)) {
|
|
3279
|
-
if (pattern.length > 0 && searchText.includes(pattern)) {
|
|
3280
|
-
matchedResponse = response;
|
|
3281
|
-
break;
|
|
3663
|
+
if ("toolCalls" in turn) {
|
|
3664
|
+
return {
|
|
3665
|
+
content: "",
|
|
3666
|
+
stopReason: "tool_use",
|
|
3667
|
+
usage: fakeUsage(searchText.length, 0),
|
|
3668
|
+
model: "test-model",
|
|
3669
|
+
toolCalls: buildToolCalls(turn.toolCalls, matchedIndex)
|
|
3670
|
+
};
|
|
3282
3671
|
}
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
|
|
3286
|
-
|
|
3287
|
-
|
|
3288
|
-
|
|
3289
|
-
|
|
3290
|
-
|
|
3291
|
-
|
|
3292
|
-
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3298
|
-
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3672
|
+
const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
|
|
3673
|
+
return {
|
|
3674
|
+
content,
|
|
3675
|
+
stopReason: "end_turn",
|
|
3676
|
+
usage: fakeUsage(searchText.length, content.length),
|
|
3677
|
+
model: "test-model"
|
|
3678
|
+
};
|
|
3679
|
+
}),
|
|
3680
|
+
stream: (request) => {
|
|
3681
|
+
const searchText = extractSearchText(request.messages, request);
|
|
3682
|
+
const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
|
|
3683
|
+
if ("error" in turn) {
|
|
3684
|
+
return Effect9.succeed(
|
|
3685
|
+
Stream6.make(
|
|
3686
|
+
{ type: "error", error: turn.error }
|
|
3687
|
+
)
|
|
3688
|
+
);
|
|
3689
|
+
}
|
|
3690
|
+
const specs = "toolCall" in turn ? [turn.toolCall] : "toolCalls" in turn ? turn.toolCalls : null;
|
|
3691
|
+
if (specs) {
|
|
3692
|
+
const events = [
|
|
3693
|
+
...specs.flatMap((spec, i) => [
|
|
3694
|
+
{
|
|
3695
|
+
type: "tool_use_start",
|
|
3696
|
+
id: spec.id ?? `call-${matchedIndex}-${i}`,
|
|
3697
|
+
name: spec.name
|
|
3698
|
+
},
|
|
3699
|
+
{
|
|
3700
|
+
type: "tool_use_delta",
|
|
3701
|
+
input: JSON.stringify(spec.args)
|
|
3702
|
+
}
|
|
3703
|
+
]),
|
|
3704
|
+
{ type: "content_complete", content: "" },
|
|
3705
|
+
{ type: "usage", usage: fakeUsage(searchText.length, 0) }
|
|
3706
|
+
];
|
|
3707
|
+
return Effect9.succeed(
|
|
3708
|
+
Stream6.fromIterable(events)
|
|
3709
|
+
);
|
|
3710
|
+
}
|
|
3711
|
+
const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
|
|
3712
|
+
const inputTokens = Math.ceil(searchText.length / 4);
|
|
3713
|
+
const outputTokens = Math.ceil(content.length / 4);
|
|
3714
|
+
return Effect9.succeed(
|
|
3715
|
+
Stream6.make(
|
|
3716
|
+
{ type: "text_delta", text: content },
|
|
3717
|
+
{ type: "content_complete", content },
|
|
3718
|
+
{
|
|
3719
|
+
type: "usage",
|
|
3720
|
+
usage: {
|
|
3721
|
+
inputTokens,
|
|
3722
|
+
outputTokens,
|
|
3723
|
+
totalTokens: inputTokens + outputTokens,
|
|
3724
|
+
estimatedCost: 0
|
|
3725
|
+
}
|
|
3303
3726
|
}
|
|
3304
|
-
|
|
3727
|
+
)
|
|
3728
|
+
);
|
|
3729
|
+
},
|
|
3730
|
+
completeStructured: (request) => Effect9.gen(function* () {
|
|
3731
|
+
const searchText = extractSearchText(request.messages, request);
|
|
3732
|
+
const { turn } = resolveTurn(scenario, callIndex, searchText);
|
|
3733
|
+
if ("error" in turn) {
|
|
3734
|
+
throw new Error(turn.error);
|
|
3735
|
+
}
|
|
3736
|
+
if ("json" in turn) {
|
|
3737
|
+
return turn.json;
|
|
3738
|
+
}
|
|
3739
|
+
const responseContent = "text" in turn ? turn.text : "{}";
|
|
3740
|
+
const parsed = JSON.parse(responseContent);
|
|
3741
|
+
return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
|
|
3742
|
+
}),
|
|
3743
|
+
embed: (texts) => Effect9.succeed(
|
|
3744
|
+
texts.map(() => new Array(768).fill(0).map(() => Math.random()))
|
|
3745
|
+
),
|
|
3746
|
+
countTokens: (messages) => Effect9.succeed(
|
|
3747
|
+
messages.reduce(
|
|
3748
|
+
(sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
|
|
3749
|
+
0
|
|
3305
3750
|
)
|
|
3306
|
-
)
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
|
|
3315
|
-
|
|
3751
|
+
),
|
|
3752
|
+
getModelConfig: () => Effect9.succeed({
|
|
3753
|
+
provider: "anthropic",
|
|
3754
|
+
model: "test-model"
|
|
3755
|
+
}),
|
|
3756
|
+
getStructuredOutputCapabilities: () => Effect9.succeed({
|
|
3757
|
+
nativeJsonMode: true,
|
|
3758
|
+
jsonSchemaEnforcement: false,
|
|
3759
|
+
prefillSupport: false,
|
|
3760
|
+
grammarConstraints: false
|
|
3761
|
+
}),
|
|
3762
|
+
capabilities: () => Effect9.succeed({
|
|
3763
|
+
...DEFAULT_CAPABILITIES,
|
|
3764
|
+
supportsToolCalling: true,
|
|
3765
|
+
// Test provider emits native FC stream events (tool_use_start/tool_use_delta)
|
|
3766
|
+
supportsStreaming: true
|
|
3767
|
+
})
|
|
3768
|
+
};
|
|
3769
|
+
};
|
|
3770
|
+
var TestLLMServiceLayer = (scenario = [{ text: "" }]) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(scenario)));
|
|
3771
|
+
|
|
3772
|
+
// src/pricing.ts
|
|
3773
|
+
import { Effect as Effect10 } from "effect";
|
|
3774
|
+
var openRouterPricingProvider = {
|
|
3775
|
+
fetchPricing: () => Effect10.gen(function* () {
|
|
3776
|
+
const res = yield* Effect10.tryPromise({
|
|
3777
|
+
try: () => fetch("https://openrouter.ai/api/v1/models"),
|
|
3778
|
+
catch: (e) => new Error(`Fetch failed: ${e}`)
|
|
3779
|
+
});
|
|
3780
|
+
if (!res.ok) {
|
|
3781
|
+
return yield* Effect10.fail(new Error(`OpenRouter API returned ${res.status}`));
|
|
3782
|
+
}
|
|
3783
|
+
const json = yield* Effect10.tryPromise({
|
|
3784
|
+
try: () => res.json(),
|
|
3785
|
+
catch: (e) => new Error(`JSON parse failed: ${e}`)
|
|
3786
|
+
});
|
|
3787
|
+
const registry = {};
|
|
3788
|
+
for (const model of json.data) {
|
|
3789
|
+
registry[model.id] = {
|
|
3790
|
+
input: parseFloat(model.pricing.prompt) * 1e6,
|
|
3791
|
+
output: parseFloat(model.pricing.completion) * 1e6
|
|
3792
|
+
};
|
|
3793
|
+
const shortName = model.id.split("/").pop();
|
|
3794
|
+
if (shortName && !registry[shortName]) {
|
|
3795
|
+
registry[shortName] = registry[model.id];
|
|
3316
3796
|
}
|
|
3317
3797
|
}
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3798
|
+
return registry;
|
|
3799
|
+
})
|
|
3800
|
+
};
|
|
3801
|
+
var urlPricingProvider = (url) => ({
|
|
3802
|
+
fetchPricing: () => Effect10.gen(function* () {
|
|
3803
|
+
const res = yield* Effect10.tryPromise({
|
|
3804
|
+
try: () => fetch(url),
|
|
3805
|
+
catch: (e) => new Error(`Fetch failed: ${e}`)
|
|
3806
|
+
});
|
|
3807
|
+
if (!res.ok) {
|
|
3808
|
+
return yield* Effect10.fail(new Error(`Custom pricing URL returned ${res.status}`));
|
|
3809
|
+
}
|
|
3810
|
+
const json = yield* Effect10.tryPromise({
|
|
3811
|
+
try: () => res.json(),
|
|
3812
|
+
catch: (e) => new Error(`JSON parse failed: ${e}`)
|
|
3813
|
+
});
|
|
3814
|
+
const registry = {};
|
|
3815
|
+
for (const [key, value] of Object.entries(json)) {
|
|
3816
|
+
registry[key] = {
|
|
3817
|
+
input: Number(value.input),
|
|
3818
|
+
output: Number(value.output)
|
|
3819
|
+
};
|
|
3820
|
+
}
|
|
3821
|
+
return registry;
|
|
3339
3822
|
})
|
|
3340
3823
|
});
|
|
3341
|
-
var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
|
|
3342
3824
|
|
|
3343
3825
|
// src/structured-output.ts
|
|
3344
3826
|
import { Schema as Schema8 } from "effect";
|
|
@@ -3405,10 +3887,10 @@ var ComplexityAnalysisSchema = Schema8.Struct({
|
|
|
3405
3887
|
});
|
|
3406
3888
|
|
|
3407
3889
|
// src/runtime.ts
|
|
3408
|
-
import { Effect as
|
|
3890
|
+
import { Effect as Effect13, Layer as Layer9 } from "effect";
|
|
3409
3891
|
|
|
3410
3892
|
// src/embedding-cache.ts
|
|
3411
|
-
import { Effect as
|
|
3893
|
+
import { Effect as Effect11 } from "effect";
|
|
3412
3894
|
var MAX_ENTRIES = 5e3;
|
|
3413
3895
|
var makeEmbeddingCache = (underlying) => {
|
|
3414
3896
|
const caches = /* @__PURE__ */ new Map();
|
|
@@ -3432,7 +3914,7 @@ var makeEmbeddingCache = (underlying) => {
|
|
|
3432
3914
|
}
|
|
3433
3915
|
};
|
|
3434
3916
|
return {
|
|
3435
|
-
embed: (texts, model) =>
|
|
3917
|
+
embed: (texts, model) => Effect11.gen(function* () {
|
|
3436
3918
|
const modelKey = model ?? "__default__";
|
|
3437
3919
|
const cache = getModelCache(modelKey);
|
|
3438
3920
|
const results = new Array(texts.length);
|
|
@@ -3472,7 +3954,7 @@ var makeEmbeddingCache = (underlying) => {
|
|
|
3472
3954
|
};
|
|
3473
3955
|
|
|
3474
3956
|
// src/circuit-breaker.ts
|
|
3475
|
-
import { Effect as
|
|
3957
|
+
import { Effect as Effect12 } from "effect";
|
|
3476
3958
|
var makeCircuitBreaker = (config = {}) => {
|
|
3477
3959
|
const { failureThreshold, cooldownMs } = {
|
|
3478
3960
|
...defaultCircuitBreakerConfig,
|
|
@@ -3493,12 +3975,12 @@ var makeCircuitBreaker = (config = {}) => {
|
|
|
3493
3975
|
}
|
|
3494
3976
|
};
|
|
3495
3977
|
return {
|
|
3496
|
-
protect: (effect) =>
|
|
3978
|
+
protect: (effect) => Effect12.gen(function* () {
|
|
3497
3979
|
if (currentState === "open") {
|
|
3498
3980
|
if (Date.now() - openedAt >= cooldownMs) {
|
|
3499
3981
|
currentState = "half_open";
|
|
3500
3982
|
} else {
|
|
3501
|
-
return yield*
|
|
3983
|
+
return yield* Effect12.fail(
|
|
3502
3984
|
new LLMError({
|
|
3503
3985
|
message: `Circuit breaker OPEN \u2014 ${consecutiveFailures} consecutive failures. Retry after ${Math.ceil((cooldownMs - (Date.now() - openedAt)) / 1e3)}s cooldown.`,
|
|
3504
3986
|
provider: "custom",
|
|
@@ -3507,13 +3989,13 @@ var makeCircuitBreaker = (config = {}) => {
|
|
|
3507
3989
|
);
|
|
3508
3990
|
}
|
|
3509
3991
|
}
|
|
3510
|
-
const result = yield*
|
|
3992
|
+
const result = yield* Effect12.exit(effect);
|
|
3511
3993
|
if (result._tag === "Success") {
|
|
3512
3994
|
onSuccess();
|
|
3513
3995
|
return result.value;
|
|
3514
3996
|
}
|
|
3515
3997
|
onFailure();
|
|
3516
|
-
return yield*
|
|
3998
|
+
return yield* Effect12.failCause(result.cause);
|
|
3517
3999
|
}),
|
|
3518
4000
|
state: () => currentState,
|
|
3519
4001
|
reset: () => {
|
|
@@ -3527,7 +4009,7 @@ var makeCircuitBreaker = (config = {}) => {
|
|
|
3527
4009
|
// src/runtime.ts
|
|
3528
4010
|
var EmbeddingCacheLayer = Layer9.effect(
|
|
3529
4011
|
LLMService,
|
|
3530
|
-
|
|
4012
|
+
Effect13.gen(function* () {
|
|
3531
4013
|
const llm = yield* LLMService;
|
|
3532
4014
|
const cache = makeEmbeddingCache(llm.embed);
|
|
3533
4015
|
return LLMService.of({ ...llm, embed: cache.embed });
|
|
@@ -3535,7 +4017,7 @@ var EmbeddingCacheLayer = Layer9.effect(
|
|
|
3535
4017
|
);
|
|
3536
4018
|
var makeCircuitBreakerLayer = (config) => Layer9.effect(
|
|
3537
4019
|
LLMService,
|
|
3538
|
-
|
|
4020
|
+
Effect13.gen(function* () {
|
|
3539
4021
|
const llm = yield* LLMService;
|
|
3540
4022
|
const breaker = makeCircuitBreaker(config);
|
|
3541
4023
|
return LLMService.of({
|
|
@@ -3545,10 +4027,10 @@ var makeCircuitBreakerLayer = (config) => Layer9.effect(
|
|
|
3545
4027
|
});
|
|
3546
4028
|
})
|
|
3547
4029
|
);
|
|
3548
|
-
var createLLMProviderLayer = (provider = "anthropic",
|
|
4030
|
+
var createLLMProviderLayer = (provider = "anthropic", testScenario, model, modelParams, circuitBreaker, pricingRegistry) => {
|
|
3549
4031
|
if (provider === "test") {
|
|
3550
4032
|
return Layer9.mergeAll(
|
|
3551
|
-
TestLLMServiceLayer(
|
|
4033
|
+
TestLLMServiceLayer(testScenario ?? [{ text: "" }]),
|
|
3552
4034
|
PromptManagerLive
|
|
3553
4035
|
);
|
|
3554
4036
|
}
|
|
@@ -3557,6 +4039,7 @@ var createLLMProviderLayer = (provider = "anthropic", testResponses, model, mode
|
|
|
3557
4039
|
if (modelParams?.thinking !== void 0) configOverrides.thinking = modelParams.thinking;
|
|
3558
4040
|
if (modelParams?.temperature !== void 0) configOverrides.defaultTemperature = modelParams.temperature;
|
|
3559
4041
|
if (modelParams?.maxTokens !== void 0) configOverrides.defaultMaxTokens = modelParams.maxTokens;
|
|
4042
|
+
if (pricingRegistry) configOverrides.pricingRegistry = pricingRegistry;
|
|
3560
4043
|
const configLayer = Object.keys(configOverrides).length > 0 ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, ...configOverrides })) : LLMConfigFromEnv;
|
|
3561
4044
|
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
|
|
3562
4045
|
const baseProviderLayer = providerLayer.pipe(Layer9.provide(configLayer));
|
|
@@ -3577,13 +4060,331 @@ var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
|
|
|
3577
4060
|
PromptManagerLive
|
|
3578
4061
|
);
|
|
3579
4062
|
};
|
|
4063
|
+
|
|
4064
|
+
// src/rate-limiter.ts
|
|
4065
|
+
import { Effect as Effect14 } from "effect";
|
|
4066
|
+
var DEFAULT_CONFIG = {
|
|
4067
|
+
requestsPerMinute: 60,
|
|
4068
|
+
tokensPerMinute: 1e5,
|
|
4069
|
+
maxConcurrent: 10
|
|
4070
|
+
};
|
|
4071
|
+
var makeRateLimiter = (config = {}) => {
|
|
4072
|
+
const resolved = { ...DEFAULT_CONFIG, ...config };
|
|
4073
|
+
const WINDOW_MS = 6e4;
|
|
4074
|
+
const window2 = [];
|
|
4075
|
+
let concurrent = 0;
|
|
4076
|
+
const prune = (now) => {
|
|
4077
|
+
const cutoff = now - WINDOW_MS;
|
|
4078
|
+
while (window2.length > 0 && window2[0].ts <= cutoff) {
|
|
4079
|
+
window2.shift();
|
|
4080
|
+
}
|
|
4081
|
+
};
|
|
4082
|
+
const currentTokens = () => {
|
|
4083
|
+
return window2.reduce((sum, entry) => sum + entry.tokens, 0);
|
|
4084
|
+
};
|
|
4085
|
+
return {
|
|
4086
|
+
acquire: (messages) => Effect14.gen(function* () {
|
|
4087
|
+
const estimatedTokens = messages ? yield* estimateTokenCount(messages) : 0;
|
|
4088
|
+
while (true) {
|
|
4089
|
+
const now = Date.now();
|
|
4090
|
+
prune(now);
|
|
4091
|
+
if (concurrent >= resolved.maxConcurrent) {
|
|
4092
|
+
yield* Effect14.sleep("100 millis");
|
|
4093
|
+
continue;
|
|
4094
|
+
}
|
|
4095
|
+
if (window2.length >= resolved.requestsPerMinute) {
|
|
4096
|
+
const oldestTs = window2[0].ts;
|
|
4097
|
+
const waitMs = oldestTs + WINDOW_MS - now;
|
|
4098
|
+
if (waitMs > 0) {
|
|
4099
|
+
yield* Effect14.sleep(`${waitMs} millis`);
|
|
4100
|
+
continue;
|
|
4101
|
+
}
|
|
4102
|
+
}
|
|
4103
|
+
if (estimatedTokens > 0 && currentTokens() + estimatedTokens > resolved.tokensPerMinute && window2.length > 0) {
|
|
4104
|
+
const oldestTs = window2[0].ts;
|
|
4105
|
+
const waitMs = oldestTs + WINDOW_MS - now;
|
|
4106
|
+
if (waitMs > 0) {
|
|
4107
|
+
yield* Effect14.sleep(`${waitMs} millis`);
|
|
4108
|
+
continue;
|
|
4109
|
+
}
|
|
4110
|
+
}
|
|
4111
|
+
window2.push({ ts: now, tokens: estimatedTokens });
|
|
4112
|
+
concurrent++;
|
|
4113
|
+
return;
|
|
4114
|
+
}
|
|
4115
|
+
}),
|
|
4116
|
+
release: () => {
|
|
4117
|
+
if (concurrent > 0) concurrent--;
|
|
4118
|
+
},
|
|
4119
|
+
concurrentCount: () => concurrent,
|
|
4120
|
+
windowRequestCount: () => {
|
|
4121
|
+
prune(Date.now());
|
|
4122
|
+
return window2.length;
|
|
4123
|
+
},
|
|
4124
|
+
windowTokenCount: () => {
|
|
4125
|
+
prune(Date.now());
|
|
4126
|
+
return currentTokens();
|
|
4127
|
+
}
|
|
4128
|
+
};
|
|
4129
|
+
};
|
|
4130
|
+
|
|
4131
|
+
// src/rate-limited-provider.ts
|
|
4132
|
+
import { Effect as Effect15, Layer as Layer10 } from "effect";
|
|
4133
|
+
var makeRateLimitedProvider = (config = {}) => Layer10.effect(
|
|
4134
|
+
LLMService,
|
|
4135
|
+
Effect15.gen(function* () {
|
|
4136
|
+
const svc = yield* LLMService;
|
|
4137
|
+
const limiter = makeRateLimiter(config);
|
|
4138
|
+
return {
|
|
4139
|
+
complete: (req) => Effect15.gen(function* () {
|
|
4140
|
+
yield* limiter.acquire(req.messages);
|
|
4141
|
+
try {
|
|
4142
|
+
return yield* svc.complete(req);
|
|
4143
|
+
} finally {
|
|
4144
|
+
limiter.release();
|
|
4145
|
+
}
|
|
4146
|
+
}),
|
|
4147
|
+
stream: (req) => Effect15.gen(function* () {
|
|
4148
|
+
yield* limiter.acquire(req.messages);
|
|
4149
|
+
try {
|
|
4150
|
+
return yield* svc.stream(req);
|
|
4151
|
+
} finally {
|
|
4152
|
+
limiter.release();
|
|
4153
|
+
}
|
|
4154
|
+
}),
|
|
4155
|
+
completeStructured: (req) => Effect15.gen(function* () {
|
|
4156
|
+
yield* limiter.acquire(req.messages);
|
|
4157
|
+
try {
|
|
4158
|
+
return yield* svc.completeStructured(req);
|
|
4159
|
+
} finally {
|
|
4160
|
+
limiter.release();
|
|
4161
|
+
}
|
|
4162
|
+
}),
|
|
4163
|
+
// Passthrough — embedding, token counting, config, and capabilities are not rate-limited
|
|
4164
|
+
embed: svc.embed,
|
|
4165
|
+
countTokens: svc.countTokens,
|
|
4166
|
+
getModelConfig: svc.getModelConfig,
|
|
4167
|
+
getStructuredOutputCapabilities: svc.getStructuredOutputCapabilities,
|
|
4168
|
+
capabilities: svc.capabilities
|
|
4169
|
+
};
|
|
4170
|
+
})
|
|
4171
|
+
);
|
|
4172
|
+
|
|
4173
|
+
// src/fallback-chain.ts
|
|
4174
|
+
var FallbackChain = class {
|
|
4175
|
+
constructor(config) {
|
|
4176
|
+
this.config = config;
|
|
4177
|
+
this.threshold = config.errorThreshold ?? 3;
|
|
4178
|
+
}
|
|
4179
|
+
/** Error count per provider. */
|
|
4180
|
+
errorCounts = /* @__PURE__ */ new Map();
|
|
4181
|
+
/** Current index in the providers list. */
|
|
4182
|
+
currentProviderIndex = 0;
|
|
4183
|
+
/** Current index in the models list. */
|
|
4184
|
+
currentModelIndex = 0;
|
|
4185
|
+
/** Threshold for switching to next provider. */
|
|
4186
|
+
threshold;
|
|
4187
|
+
/**
|
|
4188
|
+
* Record an error for the given provider.
|
|
4189
|
+
* Increments the error count and switches to the next provider if threshold is met.
|
|
4190
|
+
*
|
|
4191
|
+
* @param provider - Provider name that errored
|
|
4192
|
+
*/
|
|
4193
|
+
recordError(provider) {
|
|
4194
|
+
const count = (this.errorCounts.get(provider) ?? 0) + 1;
|
|
4195
|
+
this.errorCounts.set(provider, count);
|
|
4196
|
+
if (count >= this.threshold && this.currentProviderIndex < this.config.providers.length - 1) {
|
|
4197
|
+
this.currentProviderIndex++;
|
|
4198
|
+
}
|
|
4199
|
+
}
|
|
4200
|
+
/**
|
|
4201
|
+
* Record a rate limit error (429) for the given provider.
|
|
4202
|
+
* Falls back to the next model in the chain.
|
|
4203
|
+
*
|
|
4204
|
+
* @param _provider - Provider name that was rate limited (parameter name _ to indicate unused)
|
|
4205
|
+
*/
|
|
4206
|
+
recordRateLimit(_provider) {
|
|
4207
|
+
if (this.config.models && this.currentModelIndex < this.config.models.length - 1) {
|
|
4208
|
+
this.currentModelIndex++;
|
|
4209
|
+
}
|
|
4210
|
+
}
|
|
4211
|
+
/**
|
|
4212
|
+
* Record a successful call for the given provider.
|
|
4213
|
+
* Resets the error count for that provider.
|
|
4214
|
+
*
|
|
4215
|
+
* @param provider - Provider name that succeeded
|
|
4216
|
+
*/
|
|
4217
|
+
recordSuccess(provider) {
|
|
4218
|
+
this.errorCounts.set(provider, 0);
|
|
4219
|
+
}
|
|
4220
|
+
/**
|
|
4221
|
+
* Get the currently active provider.
|
|
4222
|
+
*
|
|
4223
|
+
* @returns Name of the provider to use
|
|
4224
|
+
*/
|
|
4225
|
+
currentProvider() {
|
|
4226
|
+
const provider = this.config.providers[this.currentProviderIndex];
|
|
4227
|
+
if (!provider) {
|
|
4228
|
+
throw new Error(`FallbackChain: Invalid provider index ${this.currentProviderIndex}`);
|
|
4229
|
+
}
|
|
4230
|
+
return provider;
|
|
4231
|
+
}
|
|
4232
|
+
/**
|
|
4233
|
+
* Get the currently active model.
|
|
4234
|
+
* Returns undefined if no models are configured.
|
|
4235
|
+
*
|
|
4236
|
+
* @returns Name of the model to use, or undefined if no models configured
|
|
4237
|
+
*/
|
|
4238
|
+
currentModel() {
|
|
4239
|
+
return this.config.models?.[this.currentModelIndex];
|
|
4240
|
+
}
|
|
4241
|
+
/**
|
|
4242
|
+
* Check if there are more fallbacks available (provider or model).
|
|
4243
|
+
*
|
|
4244
|
+
* @returns true if there are unused fallback providers or models, false if all exhausted
|
|
4245
|
+
*/
|
|
4246
|
+
hasFallback() {
|
|
4247
|
+
const hasProviderFallback = this.currentProviderIndex < this.config.providers.length - 1;
|
|
4248
|
+
const hasModelFallback = this.config.models !== void 0 && this.currentModelIndex < this.config.models.length - 1;
|
|
4249
|
+
return hasProviderFallback || hasModelFallback;
|
|
4250
|
+
}
|
|
4251
|
+
};
|
|
4252
|
+
|
|
4253
|
+
// src/validation.ts
|
|
4254
|
+
function validateAndRepairMessages(messages) {
|
|
4255
|
+
if (messages.length === 0) return messages;
|
|
4256
|
+
const repaired = [];
|
|
4257
|
+
const toolCallIds = /* @__PURE__ */ new Set();
|
|
4258
|
+
for (let i = 0; i < messages.length; i++) {
|
|
4259
|
+
const msg = messages[i];
|
|
4260
|
+
if (msg.role === "assistant") {
|
|
4261
|
+
const toolCalls = msg.tool_calls ?? msg.toolCalls ?? [];
|
|
4262
|
+
for (const tc of toolCalls) {
|
|
4263
|
+
if (tc.id) toolCallIds.add(tc.id);
|
|
4264
|
+
}
|
|
4265
|
+
const content = typeof msg.content === "string" ? msg.content : "";
|
|
4266
|
+
repaired.push({ ...msg, content: content || "" });
|
|
4267
|
+
continue;
|
|
4268
|
+
}
|
|
4269
|
+
if (msg.role === "tool") {
|
|
4270
|
+
const callId = msg.tool_call_id ?? msg.toolCallId;
|
|
4271
|
+
if (callId && !toolCallIds.has(callId)) {
|
|
4272
|
+
continue;
|
|
4273
|
+
}
|
|
4274
|
+
repaired.push(msg);
|
|
4275
|
+
continue;
|
|
4276
|
+
}
|
|
4277
|
+
if (msg.role === "user" || msg.role === "system") {
|
|
4278
|
+
const content = typeof msg.content === "string" ? msg.content : "";
|
|
4279
|
+
if (!content.trim()) {
|
|
4280
|
+
repaired.push({ ...msg, content: "..." });
|
|
4281
|
+
continue;
|
|
4282
|
+
}
|
|
4283
|
+
}
|
|
4284
|
+
repaired.push(msg);
|
|
4285
|
+
}
|
|
4286
|
+
return repaired;
|
|
4287
|
+
}
|
|
4288
|
+
|
|
4289
|
+
// src/adapter.ts
|
|
4290
|
+
var defaultAdapter = {
|
|
4291
|
+
continuationHint({ missingTools, toolsUsed, iteration, maxIterations }) {
|
|
4292
|
+
if (missingTools.length === 0) {
|
|
4293
|
+
return toolsUsed.size > 0 ? "You have completed all required tool calls. Now synthesize the results and provide your FINAL ANSWER." : void 0;
|
|
4294
|
+
}
|
|
4295
|
+
const toolList = missingTools.join(", ");
|
|
4296
|
+
const urgency = iteration >= maxIterations - 3 ? ` You have ${maxIterations - iteration} iterations left.` : "";
|
|
4297
|
+
return `You must still call: ${toolList}. Call the next required tool now.${urgency}`;
|
|
4298
|
+
}
|
|
4299
|
+
};
|
|
4300
|
+
var localModelAdapter = {
|
|
4301
|
+
systemPromptPatch(basePrompt, tier) {
|
|
4302
|
+
if (tier !== "local") return void 0;
|
|
4303
|
+
return basePrompt + "\n\nIMPORTANT: When given a multi-step task, complete ALL steps in sequence. After gathering information, immediately proceed to the next step. Never stop after only searching \u2014 always produce the deliverable.";
|
|
4304
|
+
},
|
|
4305
|
+
taskFraming({ task, requiredTools, tier }) {
|
|
4306
|
+
if (tier !== "local" || requiredTools.length === 0) return void 0;
|
|
4307
|
+
const steps = requiredTools.map((t, i) => `${i + 1}. Call ${t}`).join("\n");
|
|
4308
|
+
return `${task}
|
|
4309
|
+
|
|
4310
|
+
Complete these steps in order:
|
|
4311
|
+
${steps}
|
|
4312
|
+
Do not stop until all steps are done.`;
|
|
4313
|
+
},
|
|
4314
|
+
toolGuidance({ requiredTools, tier }) {
|
|
4315
|
+
if (tier !== "local" || requiredTools.length === 0) return void 0;
|
|
4316
|
+
return `
|
|
4317
|
+
Required tools for this task: ${requiredTools.join(", ")}. You MUST call all of them before giving a final answer.`;
|
|
4318
|
+
},
|
|
4319
|
+
continuationHint({ toolsUsed, missingTools, iteration, maxIterations, lastToolName }) {
|
|
4320
|
+
if (missingTools.length === 0) return void 0;
|
|
4321
|
+
const urgency = iteration >= maxIterations - 2 ? " This is urgent \u2014 you are running low on iterations." : "";
|
|
4322
|
+
if (lastToolName && (lastToolName.includes("search") || lastToolName.includes("http"))) {
|
|
4323
|
+
const writeTools = missingTools.filter((t) => t.includes("write") || t.includes("file"));
|
|
4324
|
+
if (writeTools.length > 0) {
|
|
4325
|
+
return `You have gathered research data. Synthesize the findings and call ${writeTools[0]} to save the output.${urgency} Do NOT search again.`;
|
|
4326
|
+
}
|
|
4327
|
+
}
|
|
4328
|
+
if (missingTools.length === 1) {
|
|
4329
|
+
return `Your next step: call ${missingTools[0]}. You have all the information you need.${urgency}`;
|
|
4330
|
+
}
|
|
4331
|
+
return `Complete these steps in order: ${missingTools.join(" \u2192 ")}.${urgency} Proceed with the first one now.`;
|
|
4332
|
+
},
|
|
4333
|
+
errorRecovery({ toolName, errorContent, missingTools, tier }) {
|
|
4334
|
+
if (tier !== "local") return void 0;
|
|
4335
|
+
const isNotFound = errorContent.includes("404") || errorContent.includes("Not Found");
|
|
4336
|
+
const isTimeout = errorContent.toLowerCase().includes("timeout");
|
|
4337
|
+
if (isNotFound) {
|
|
4338
|
+
return `${toolName} returned 404 \u2014 that URL doesn't exist. Try a different URL or use web-search to find the correct one.${missingTools.length > 0 ? ` You still need to call: ${missingTools.join(", ")}.` : ""}`;
|
|
4339
|
+
}
|
|
4340
|
+
if (isTimeout) {
|
|
4341
|
+
return `${toolName} timed out. Try again with a simpler request, or skip this step and proceed with what you have.`;
|
|
4342
|
+
}
|
|
4343
|
+
return `${toolName} failed. Try an alternative approach or use a different tool to get the information you need.`;
|
|
4344
|
+
},
|
|
4345
|
+
synthesisPrompt({ missingOutputTools, observationCount, tier }) {
|
|
4346
|
+
if (tier !== "local" || missingOutputTools.length === 0) return void 0;
|
|
4347
|
+
return `You have gathered ${observationCount} piece${observationCount !== 1 ? "s" : ""} of information. That is enough. Do NOT search again. Now call ${missingOutputTools[0]} to produce the final output. Synthesize everything you have learned into a complete, well-structured response.`;
|
|
4348
|
+
},
|
|
4349
|
+
qualityCheck({ task, requiredTools, toolsUsed, tier }) {
|
|
4350
|
+
if (tier !== "local") return void 0;
|
|
4351
|
+
const unmet = requiredTools.filter((t) => !toolsUsed.has(t));
|
|
4352
|
+
if (unmet.length > 0) {
|
|
4353
|
+
return `Before finishing: you have not yet called ${unmet.join(", ")}. Call ${unmet[0]} now.`;
|
|
4354
|
+
}
|
|
4355
|
+
return `Review your answer: does it fully address the task "${task.slice(0, 120)}"? If yes, give it. If not, complete the missing parts first.`;
|
|
4356
|
+
}
|
|
4357
|
+
};
|
|
4358
|
+
var midModelAdapter = {
|
|
4359
|
+
continuationHint({ missingTools, toolsUsed, iteration, maxIterations }) {
|
|
4360
|
+
if (missingTools.length === 0) {
|
|
4361
|
+
return toolsUsed.size > 0 ? "All required tools called. Synthesize and give your final answer." : void 0;
|
|
4362
|
+
}
|
|
4363
|
+
const urgency = iteration >= maxIterations - 2 ? ` (${maxIterations - iteration} steps left)` : "";
|
|
4364
|
+
return `Still needed: ${missingTools.join(", ")}. Call the next one now.${urgency}`;
|
|
4365
|
+
},
|
|
4366
|
+
synthesisPrompt({ missingOutputTools, tier }) {
|
|
4367
|
+
if (tier !== "mid" || missingOutputTools.length === 0) return void 0;
|
|
4368
|
+
return `Research complete. Now call ${missingOutputTools[0]} to produce the output.`;
|
|
4369
|
+
}
|
|
4370
|
+
};
|
|
4371
|
+
function selectAdapter(_capabilities, tier) {
|
|
4372
|
+
if (tier === "local") return localModelAdapter;
|
|
4373
|
+
if (tier === "mid") return midModelAdapter;
|
|
4374
|
+
return defaultAdapter;
|
|
4375
|
+
}
|
|
4376
|
+
function recommendStrategyForTier(_tier, _configuredStrategy, _requiredTools) {
|
|
4377
|
+
return void 0;
|
|
4378
|
+
}
|
|
3580
4379
|
export {
|
|
3581
4380
|
AnthropicProviderLive,
|
|
3582
4381
|
CacheControlSchema,
|
|
3583
4382
|
CompletionResponseSchema,
|
|
3584
4383
|
ComplexityAnalysisSchema,
|
|
4384
|
+
DEFAULT_CAPABILITIES,
|
|
3585
4385
|
DefaultEmbeddingConfig,
|
|
3586
4386
|
EmbeddingConfigSchema,
|
|
4387
|
+
FallbackChain,
|
|
3587
4388
|
GeminiProviderLive,
|
|
3588
4389
|
ImageContentBlockSchema,
|
|
3589
4390
|
ImageSourceSchema,
|
|
@@ -3621,13 +4422,22 @@ export {
|
|
|
3621
4422
|
calculateCost,
|
|
3622
4423
|
createLLMProviderLayer,
|
|
3623
4424
|
createLLMProviderLayerWithConfig,
|
|
4425
|
+
defaultAdapter,
|
|
3624
4426
|
defaultCircuitBreakerConfig,
|
|
3625
4427
|
estimateTokenCount,
|
|
3626
4428
|
getProviderDefaultModel,
|
|
3627
4429
|
llmConfigFromEnv,
|
|
4430
|
+
localModelAdapter,
|
|
3628
4431
|
makeCacheable,
|
|
3629
4432
|
makeCircuitBreaker,
|
|
3630
4433
|
makeEmbeddingCache,
|
|
3631
|
-
|
|
4434
|
+
makeRateLimitedProvider,
|
|
4435
|
+
makeRateLimiter,
|
|
4436
|
+
openRouterPricingProvider,
|
|
4437
|
+
recommendStrategyForTier,
|
|
4438
|
+
retryPolicy,
|
|
4439
|
+
selectAdapter,
|
|
4440
|
+
urlPricingProvider,
|
|
4441
|
+
validateAndRepairMessages
|
|
3632
4442
|
};
|
|
3633
4443
|
//# sourceMappingURL=index.js.map
|