@reactive-agents/llm-provider 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +57 -2
- package/dist/index.js +174 -54
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -673,6 +673,8 @@ declare const CompletionResponseSchema: Schema.Struct<{
|
|
|
673
673
|
/** Tool input parameters (arbitrary JSON-compatible object) */
|
|
674
674
|
input: typeof Schema.Unknown;
|
|
675
675
|
}>>>;
|
|
676
|
+
/** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
|
|
677
|
+
thinking: Schema.optional<typeof Schema.String>;
|
|
676
678
|
}>;
|
|
677
679
|
/**
|
|
678
680
|
* LLM response to a completion request.
|
|
@@ -804,6 +806,20 @@ type ObservabilityVerbosity =
|
|
|
804
806
|
"metadata"
|
|
805
807
|
/** Capture complete request/response payloads — higher overhead, useful for debugging. */
|
|
806
808
|
| "full";
|
|
809
|
+
/**
|
|
810
|
+
* Provider-reported capabilities for structured JSON output.
|
|
811
|
+
* Used by the structured output pipeline to select the optimal extraction strategy.
|
|
812
|
+
*/
|
|
813
|
+
type StructuredOutputCapabilities = {
|
|
814
|
+
/** Provider supports forcing JSON-only output (OpenAI, Gemini, Ollama) */
|
|
815
|
+
readonly nativeJsonMode: boolean;
|
|
816
|
+
/** Provider can enforce a JSON Schema on the output (OpenAI structured outputs) */
|
|
817
|
+
readonly jsonSchemaEnforcement: boolean;
|
|
818
|
+
/** Provider supports assistant message prefill to start response with "{" (Anthropic) */
|
|
819
|
+
readonly prefillSupport: boolean;
|
|
820
|
+
/** Provider supports GBNF grammar constraints for exact schema matching (Ollama/llama.cpp) */
|
|
821
|
+
readonly grammarConstraints: boolean;
|
|
822
|
+
};
|
|
807
823
|
|
|
808
824
|
declare const LLMError_base: new <A extends Record<string, any> = {}>(args: effect_Types.Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => effect_Cause.YieldableError & {
|
|
809
825
|
readonly _tag: "LLMError";
|
|
@@ -904,6 +920,11 @@ declare const LLMService_base: Context.TagClass<LLMService, "LLMService", {
|
|
|
904
920
|
* Get current model configuration.
|
|
905
921
|
*/
|
|
906
922
|
readonly getModelConfig: () => Effect.Effect<ModelConfig, never>;
|
|
923
|
+
/**
|
|
924
|
+
* Report structured output capabilities for this provider.
|
|
925
|
+
* Used by the structured output pipeline to select optimal JSON extraction strategy.
|
|
926
|
+
*/
|
|
927
|
+
readonly getStructuredOutputCapabilities: () => Effect.Effect<StructuredOutputCapabilities, never>;
|
|
907
928
|
}>;
|
|
908
929
|
/**
|
|
909
930
|
* Core LLM service — all LLM interactions go through this.
|
|
@@ -992,6 +1013,15 @@ declare const LLMConfig_base: Context.TagClass<LLMConfig, "LLMConfig", {
|
|
|
992
1013
|
* @default 30000 (30 seconds)
|
|
993
1014
|
*/
|
|
994
1015
|
readonly timeoutMs: number;
|
|
1016
|
+
/**
|
|
1017
|
+
* Enable/disable thinking mode for thinking-capable models.
|
|
1018
|
+
* - `true` — Always enable thinking (e.g., qwen3.5, DeepSeek-R1)
|
|
1019
|
+
* - `false` — Always disable thinking (e.g., cogito:14b that crashes with think:true)
|
|
1020
|
+
* - `undefined` — Auto-detect based on model capabilities (Ollama only)
|
|
1021
|
+
*
|
|
1022
|
+
* @default undefined (auto-detect)
|
|
1023
|
+
*/
|
|
1024
|
+
readonly thinking?: boolean;
|
|
995
1025
|
/**
|
|
996
1026
|
* Default maximum output tokens for LLM responses.
|
|
997
1027
|
* Used if a CompletionRequest does not specify maxTokens.
|
|
@@ -1155,6 +1185,15 @@ declare const llmConfigFromEnv: {
|
|
|
1155
1185
|
* @default 30000 (30 seconds)
|
|
1156
1186
|
*/
|
|
1157
1187
|
readonly timeoutMs: number;
|
|
1188
|
+
/**
|
|
1189
|
+
* Enable/disable thinking mode for thinking-capable models.
|
|
1190
|
+
* - `true` — Always enable thinking (e.g., qwen3.5, DeepSeek-R1)
|
|
1191
|
+
* - `false` — Always disable thinking (e.g., cogito:14b that crashes with think:true)
|
|
1192
|
+
* - `undefined` — Auto-detect based on model capabilities (Ollama only)
|
|
1193
|
+
*
|
|
1194
|
+
* @default undefined (auto-detect)
|
|
1195
|
+
*/
|
|
1196
|
+
readonly thinking?: boolean;
|
|
1158
1197
|
/**
|
|
1159
1198
|
* Default maximum output tokens for LLM responses.
|
|
1160
1199
|
* Used if a CompletionRequest does not specify maxTokens.
|
|
@@ -1361,14 +1400,30 @@ declare const ComplexityAnalysisSchema: Schema.Struct<{
|
|
|
1361
1400
|
}>;
|
|
1362
1401
|
type ComplexityAnalysis = Schema.Schema.Type<typeof ComplexityAnalysisSchema>;
|
|
1363
1402
|
|
|
1403
|
+
/**
|
|
1404
|
+
* Default model constants for each LLM provider.
|
|
1405
|
+
* Single source of truth — used by providers at construction time
|
|
1406
|
+
* and by the runtime to resolve model names for display/metrics.
|
|
1407
|
+
*/
|
|
1408
|
+
declare const PROVIDER_DEFAULT_MODELS: Record<string, string>;
|
|
1409
|
+
/**
|
|
1410
|
+
* Get the default model for a given provider.
|
|
1411
|
+
* Returns undefined if the provider is not recognized.
|
|
1412
|
+
*/
|
|
1413
|
+
declare function getProviderDefaultModel(provider: string): string | undefined;
|
|
1414
|
+
|
|
1364
1415
|
/**
|
|
1365
1416
|
* Create the LLM provider layer for a specific provider.
|
|
1366
1417
|
* Uses env vars for configuration by default.
|
|
1367
1418
|
*/
|
|
1368
|
-
declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testResponses?: Record<string, string>, model?: string
|
|
1419
|
+
declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testResponses?: Record<string, string>, model?: string, modelParams?: {
|
|
1420
|
+
thinking?: boolean;
|
|
1421
|
+
temperature?: number;
|
|
1422
|
+
maxTokens?: number;
|
|
1423
|
+
}) => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
1369
1424
|
/**
|
|
1370
1425
|
* LLM layer with custom config (for programmatic use).
|
|
1371
1426
|
*/
|
|
1372
1427
|
declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm") => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
1373
1428
|
|
|
1374
|
-
export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, llmConfigFromEnv, makeCacheable, retryPolicy };
|
|
1429
|
+
export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, makeCacheable, retryPolicy };
|
package/dist/index.js
CHANGED
|
@@ -1376,7 +1376,9 @@ var CompletionResponseSchema = Schema.Struct({
|
|
|
1376
1376
|
/** Actual model identifier used (may differ from request) */
|
|
1377
1377
|
model: Schema.String,
|
|
1378
1378
|
/** Tool calls emitted by the model (if any) */
|
|
1379
|
-
toolCalls: Schema.optional(Schema.Array(ToolCallSchema))
|
|
1379
|
+
toolCalls: Schema.optional(Schema.Array(ToolCallSchema)),
|
|
1380
|
+
/** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
|
|
1381
|
+
thinking: Schema.optional(Schema.String)
|
|
1380
1382
|
});
|
|
1381
1383
|
|
|
1382
1384
|
// src/errors.ts
|
|
@@ -1405,7 +1407,7 @@ var LLMConfig = class extends Context2.Tag("LLMConfig")() {
|
|
|
1405
1407
|
};
|
|
1406
1408
|
var llmConfigFromEnv = LLMConfig.of({
|
|
1407
1409
|
defaultProvider: "anthropic",
|
|
1408
|
-
defaultModel: process.env.LLM_DEFAULT_MODEL
|
|
1410
|
+
defaultModel: process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514",
|
|
1409
1411
|
anthropicApiKey: process.env.ANTHROPIC_API_KEY,
|
|
1410
1412
|
openaiApiKey: process.env.OPENAI_API_KEY,
|
|
1411
1413
|
googleApiKey: process.env.GOOGLE_API_KEY,
|
|
@@ -1416,7 +1418,7 @@ var llmConfigFromEnv = LLMConfig.of({
|
|
|
1416
1418
|
provider: process.env.EMBEDDING_PROVIDER ?? "openai",
|
|
1417
1419
|
batchSize: 100
|
|
1418
1420
|
},
|
|
1419
|
-
supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL
|
|
1421
|
+
supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514").startsWith("claude"),
|
|
1420
1422
|
maxRetries: Number(process.env.LLM_MAX_RETRIES ?? 3),
|
|
1421
1423
|
timeoutMs: Number(process.env.LLM_TIMEOUT_MS ?? 3e4),
|
|
1422
1424
|
defaultMaxTokens: 4096,
|
|
@@ -1834,6 +1836,12 @@ No markdown, no code fences, just raw JSON.`
|
|
|
1834
1836
|
getModelConfig: () => Effect4.succeed({
|
|
1835
1837
|
provider: "anthropic",
|
|
1836
1838
|
model: config.defaultModel
|
|
1839
|
+
}),
|
|
1840
|
+
getStructuredOutputCapabilities: () => Effect4.succeed({
|
|
1841
|
+
nativeJsonMode: false,
|
|
1842
|
+
jsonSchemaEnforcement: false,
|
|
1843
|
+
prefillSupport: true,
|
|
1844
|
+
grammarConstraints: false
|
|
1837
1845
|
})
|
|
1838
1846
|
});
|
|
1839
1847
|
})
|
|
@@ -2121,6 +2129,12 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2121
2129
|
getModelConfig: () => Effect5.succeed({
|
|
2122
2130
|
provider: "openai",
|
|
2123
2131
|
model: defaultModel
|
|
2132
|
+
}),
|
|
2133
|
+
getStructuredOutputCapabilities: () => Effect5.succeed({
|
|
2134
|
+
nativeJsonMode: true,
|
|
2135
|
+
jsonSchemaEnforcement: true,
|
|
2136
|
+
prefillSupport: false,
|
|
2137
|
+
grammarConstraints: false
|
|
2124
2138
|
})
|
|
2125
2139
|
});
|
|
2126
2140
|
})
|
|
@@ -2164,12 +2178,29 @@ var mapOpenAIResponse = (response, model) => {
|
|
|
2164
2178
|
|
|
2165
2179
|
// src/providers/local.ts
|
|
2166
2180
|
import { Effect as Effect6, Layer as Layer5, Stream as Stream3, Schema as Schema4 } from "effect";
|
|
2181
|
+
|
|
2182
|
+
// src/provider-defaults.ts
|
|
2183
|
+
var PROVIDER_DEFAULT_MODELS = {
|
|
2184
|
+
anthropic: "claude-sonnet-4-20250514",
|
|
2185
|
+
openai: "gpt-4o",
|
|
2186
|
+
ollama: "cogito:14b",
|
|
2187
|
+
gemini: "gemini-2.0-flash",
|
|
2188
|
+
litellm: "gpt-4o",
|
|
2189
|
+
test: "test-model"
|
|
2190
|
+
};
|
|
2191
|
+
function getProviderDefaultModel(provider) {
|
|
2192
|
+
return PROVIDER_DEFAULT_MODELS[provider];
|
|
2193
|
+
}
|
|
2194
|
+
|
|
2195
|
+
// src/providers/local.ts
|
|
2167
2196
|
var toOllamaMessages = (messages) => messages.map((m) => {
|
|
2168
2197
|
if (m.role === "tool") {
|
|
2169
2198
|
return { role: "tool", content: m.content };
|
|
2170
2199
|
}
|
|
2171
2200
|
if (m.role === "assistant") {
|
|
2172
|
-
const textContent = typeof m.content === "string" ? m.content : m.content.filter(
|
|
2201
|
+
const textContent = typeof m.content === "string" ? m.content : m.content.filter(
|
|
2202
|
+
(b) => b.type === "text"
|
|
2203
|
+
).map((b) => b.text).join("");
|
|
2173
2204
|
const toolUseBlocks = typeof m.content !== "string" ? m.content.filter(
|
|
2174
2205
|
(b) => b.type === "tool_use"
|
|
2175
2206
|
) : [];
|
|
@@ -2189,7 +2220,9 @@ var toOllamaMessages = (messages) => messages.map((m) => {
|
|
|
2189
2220
|
}
|
|
2190
2221
|
return {
|
|
2191
2222
|
role: m.role,
|
|
2192
|
-
content: typeof m.content === "string" ? m.content : m.content.filter(
|
|
2223
|
+
content: typeof m.content === "string" ? m.content : m.content.filter(
|
|
2224
|
+
(b) => b.type === "text"
|
|
2225
|
+
).map((b) => b.text).join("")
|
|
2193
2226
|
};
|
|
2194
2227
|
});
|
|
2195
2228
|
var toOllamaTools = (tools) => {
|
|
@@ -2211,12 +2244,50 @@ var parseToolCalls = (toolCalls) => {
|
|
|
2211
2244
|
input: tc.function.arguments
|
|
2212
2245
|
}));
|
|
2213
2246
|
};
|
|
2247
|
+
var thinkingCapabilityCache = /* @__PURE__ */ new Map();
|
|
2248
|
+
async function supportsThinking(client, model) {
|
|
2249
|
+
const cached = thinkingCapabilityCache.get(model);
|
|
2250
|
+
if (cached !== void 0) return cached;
|
|
2251
|
+
try {
|
|
2252
|
+
const info = await client.show({ model });
|
|
2253
|
+
const template = info.template ?? "";
|
|
2254
|
+
const result = template.includes("think") || template.includes("<|thinking|>");
|
|
2255
|
+
thinkingCapabilityCache.set(model, result);
|
|
2256
|
+
return result;
|
|
2257
|
+
} catch {
|
|
2258
|
+
thinkingCapabilityCache.set(model, false);
|
|
2259
|
+
return false;
|
|
2260
|
+
}
|
|
2261
|
+
}
|
|
2262
|
+
async function resolveThinking(client, model, configThinking) {
|
|
2263
|
+
if (configThinking === false) return void 0;
|
|
2264
|
+
if (configThinking === true) return true;
|
|
2265
|
+
const capable = await supportsThinking(client, model);
|
|
2266
|
+
return capable ? true : void 0;
|
|
2267
|
+
}
|
|
2268
|
+
function ollamaError(error, model) {
|
|
2269
|
+
const msg = error?.message ?? String(error);
|
|
2270
|
+
const status = error?.status_code ?? error?.statusCode;
|
|
2271
|
+
if (status === 404 || /model\s+['"]?\S+['"]?\s+not found/i.test(msg)) {
|
|
2272
|
+
const modelName = model ?? msg.match(/model\s+['"]?(\S+?)['"]?\s+not found/i)?.[1] ?? "unknown";
|
|
2273
|
+
return new LLMError({
|
|
2274
|
+
message: `Model "${modelName}" not found locally. Run: ollama pull ${modelName}`,
|
|
2275
|
+
provider: "ollama",
|
|
2276
|
+
cause: error
|
|
2277
|
+
});
|
|
2278
|
+
}
|
|
2279
|
+
return new LLMError({
|
|
2280
|
+
message: `Ollama request failed: ${msg}`,
|
|
2281
|
+
provider: "ollama",
|
|
2282
|
+
cause: error
|
|
2283
|
+
});
|
|
2284
|
+
}
|
|
2214
2285
|
var LocalProviderLive = Layer5.effect(
|
|
2215
2286
|
LLMService,
|
|
2216
2287
|
Effect6.gen(function* () {
|
|
2217
2288
|
const config = yield* LLMConfig;
|
|
2218
2289
|
const endpoint = config.ollamaEndpoint ?? "http://localhost:11434";
|
|
2219
|
-
const defaultModel = config.defaultModel.startsWith("claude") || config.defaultModel.startsWith("gpt") ? "
|
|
2290
|
+
const defaultModel = config.defaultModel.startsWith("claude") || config.defaultModel.startsWith("gpt") ? getProviderDefaultModel("ollama") ?? "cogito:14b" : config.defaultModel;
|
|
2220
2291
|
const getClient = async () => {
|
|
2221
2292
|
const { Ollama: Ollama3 } = await Promise.resolve().then(() => (init_dist(), dist_exports));
|
|
2222
2293
|
return new Ollama3({ host: endpoint });
|
|
@@ -2231,11 +2302,17 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2231
2302
|
if (request.systemPrompt) {
|
|
2232
2303
|
msgs.unshift({ role: "system", content: request.systemPrompt });
|
|
2233
2304
|
}
|
|
2305
|
+
const think = await resolveThinking(
|
|
2306
|
+
client,
|
|
2307
|
+
model,
|
|
2308
|
+
config.thinking
|
|
2309
|
+
);
|
|
2234
2310
|
return client.chat({
|
|
2235
2311
|
model,
|
|
2236
2312
|
messages: msgs,
|
|
2237
2313
|
tools: toOllamaTools(request.tools),
|
|
2238
2314
|
stream: false,
|
|
2315
|
+
...think !== void 0 ? { think } : {},
|
|
2239
2316
|
keep_alive: "5m",
|
|
2240
2317
|
options: {
|
|
2241
2318
|
temperature: request.temperature ?? config.defaultTemperature,
|
|
@@ -2244,13 +2321,10 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2244
2321
|
}
|
|
2245
2322
|
});
|
|
2246
2323
|
},
|
|
2247
|
-
catch: (error) =>
|
|
2248
|
-
message: `Ollama request failed: ${error}`,
|
|
2249
|
-
provider: "ollama",
|
|
2250
|
-
cause: error
|
|
2251
|
-
})
|
|
2324
|
+
catch: (error) => ollamaError(error, model)
|
|
2252
2325
|
});
|
|
2253
2326
|
const content = response.message?.content ?? "";
|
|
2327
|
+
const thinkingContent = response.message?.thinking || void 0;
|
|
2254
2328
|
const inputTokens = response.prompt_eval_count ?? 0;
|
|
2255
2329
|
const outputTokens = response.eval_count ?? 0;
|
|
2256
2330
|
const toolCalls = parseToolCalls(
|
|
@@ -2268,7 +2342,8 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2268
2342
|
// Local models are free
|
|
2269
2343
|
},
|
|
2270
2344
|
model: response.model ?? model,
|
|
2271
|
-
toolCalls
|
|
2345
|
+
toolCalls,
|
|
2346
|
+
...thinkingContent ? { thinking: thinkingContent } : {}
|
|
2272
2347
|
};
|
|
2273
2348
|
}).pipe(
|
|
2274
2349
|
Effect6.retry(retryPolicy),
|
|
@@ -2292,13 +2367,22 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2292
2367
|
const client = await getClient();
|
|
2293
2368
|
const msgs = toOllamaMessages(request.messages);
|
|
2294
2369
|
if (request.systemPrompt) {
|
|
2295
|
-
msgs.unshift({
|
|
2370
|
+
msgs.unshift({
|
|
2371
|
+
role: "system",
|
|
2372
|
+
content: request.systemPrompt
|
|
2373
|
+
});
|
|
2296
2374
|
}
|
|
2375
|
+
const think = await resolveThinking(
|
|
2376
|
+
client,
|
|
2377
|
+
model,
|
|
2378
|
+
config.thinking
|
|
2379
|
+
);
|
|
2297
2380
|
const stream = await client.chat({
|
|
2298
2381
|
model,
|
|
2299
2382
|
messages: msgs,
|
|
2300
2383
|
tools: toOllamaTools(request.tools),
|
|
2301
2384
|
stream: true,
|
|
2385
|
+
...think !== void 0 ? { think } : {},
|
|
2302
2386
|
keep_alive: "5m",
|
|
2303
2387
|
options: {
|
|
2304
2388
|
temperature: request.temperature ?? config.defaultTemperature,
|
|
@@ -2332,14 +2416,7 @@ var LocalProviderLive = Layer5.effect(
|
|
|
2332
2416
|
}
|
|
2333
2417
|
}
|
|
2334
2418
|
} catch (error) {
|
|
2335
|
-
|
|
2336
|
-
emit.fail(
|
|
2337
|
-
new LLMError({
|
|
2338
|
-
message: err.message ?? String(error),
|
|
2339
|
-
provider: "ollama",
|
|
2340
|
-
cause: error
|
|
2341
|
-
})
|
|
2342
|
-
);
|
|
2419
|
+
emit.fail(ollamaError(error, model));
|
|
2343
2420
|
}
|
|
2344
2421
|
};
|
|
2345
2422
|
void doStream();
|
|
@@ -2404,18 +2481,14 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2404
2481
|
}
|
|
2405
2482
|
});
|
|
2406
2483
|
},
|
|
2407
|
-
catch: (error) =>
|
|
2408
|
-
message: `Ollama request failed: ${error}`,
|
|
2409
|
-
provider: "ollama",
|
|
2410
|
-
cause: error
|
|
2411
|
-
})
|
|
2484
|
+
catch: (error) => ollamaError(error, model)
|
|
2412
2485
|
});
|
|
2413
2486
|
const content = response.message?.content ?? "";
|
|
2414
2487
|
try {
|
|
2415
2488
|
const parsed = JSON.parse(content);
|
|
2416
|
-
const decoded = Schema4.decodeUnknownEither(
|
|
2417
|
-
|
|
2418
|
-
)
|
|
2489
|
+
const decoded = Schema4.decodeUnknownEither(request.outputSchema)(
|
|
2490
|
+
parsed
|
|
2491
|
+
);
|
|
2419
2492
|
if (decoded._tag === "Right") {
|
|
2420
2493
|
return decoded.right;
|
|
2421
2494
|
}
|
|
@@ -2442,11 +2515,10 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2442
2515
|
});
|
|
2443
2516
|
return response.embeddings;
|
|
2444
2517
|
},
|
|
2445
|
-
catch: (error) =>
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
|
|
2449
|
-
})
|
|
2518
|
+
catch: (error) => ollamaError(
|
|
2519
|
+
error,
|
|
2520
|
+
model ?? config.embeddingConfig.model ?? "nomic-embed-text"
|
|
2521
|
+
)
|
|
2450
2522
|
}),
|
|
2451
2523
|
countTokens: (messages) => Effect6.gen(function* () {
|
|
2452
2524
|
return yield* estimateTokenCount(messages);
|
|
@@ -2454,6 +2526,12 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2454
2526
|
getModelConfig: () => Effect6.succeed({
|
|
2455
2527
|
provider: "ollama",
|
|
2456
2528
|
model: defaultModel
|
|
2529
|
+
}),
|
|
2530
|
+
getStructuredOutputCapabilities: () => Effect6.succeed({
|
|
2531
|
+
nativeJsonMode: true,
|
|
2532
|
+
jsonSchemaEnforcement: false,
|
|
2533
|
+
prefillSupport: false,
|
|
2534
|
+
grammarConstraints: true
|
|
2457
2535
|
})
|
|
2458
2536
|
});
|
|
2459
2537
|
})
|
|
@@ -2770,6 +2848,12 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2770
2848
|
getModelConfig: () => Effect7.succeed({
|
|
2771
2849
|
provider: "gemini",
|
|
2772
2850
|
model: config.defaultModel
|
|
2851
|
+
}),
|
|
2852
|
+
getStructuredOutputCapabilities: () => Effect7.succeed({
|
|
2853
|
+
nativeJsonMode: true,
|
|
2854
|
+
jsonSchemaEnforcement: false,
|
|
2855
|
+
prefillSupport: false,
|
|
2856
|
+
grammarConstraints: false
|
|
2773
2857
|
})
|
|
2774
2858
|
});
|
|
2775
2859
|
})
|
|
@@ -3109,6 +3193,12 @@ No markdown, no code fences, just raw JSON.`
|
|
|
3109
3193
|
getModelConfig: () => Effect8.succeed({
|
|
3110
3194
|
provider: "litellm",
|
|
3111
3195
|
model: defaultModel
|
|
3196
|
+
}),
|
|
3197
|
+
getStructuredOutputCapabilities: () => Effect8.succeed({
|
|
3198
|
+
nativeJsonMode: false,
|
|
3199
|
+
jsonSchemaEnforcement: false,
|
|
3200
|
+
prefillSupport: false,
|
|
3201
|
+
grammarConstraints: false
|
|
3112
3202
|
})
|
|
3113
3203
|
});
|
|
3114
3204
|
})
|
|
@@ -3149,25 +3239,42 @@ var TestLLMService = (responses) => ({
|
|
|
3149
3239
|
model: "test-model"
|
|
3150
3240
|
};
|
|
3151
3241
|
}),
|
|
3152
|
-
stream: (
|
|
3153
|
-
|
|
3154
|
-
|
|
3155
|
-
|
|
3156
|
-
|
|
3157
|
-
|
|
3158
|
-
|
|
3159
|
-
|
|
3160
|
-
|
|
3161
|
-
|
|
3162
|
-
usage: {
|
|
3163
|
-
inputTokens: 0,
|
|
3164
|
-
outputTokens: 0,
|
|
3165
|
-
totalTokens: 0,
|
|
3166
|
-
estimatedCost: 0
|
|
3167
|
-
}
|
|
3242
|
+
stream: (request) => {
|
|
3243
|
+
const lastMessage = request.messages[request.messages.length - 1];
|
|
3244
|
+
const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
|
|
3245
|
+
const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
|
|
3246
|
+
const searchText = `${content} ${systemPrompt}`;
|
|
3247
|
+
let matchedResponse = "Test response";
|
|
3248
|
+
for (const [pattern, response] of Object.entries(responses)) {
|
|
3249
|
+
if (pattern.length > 0 && searchText.includes(pattern)) {
|
|
3250
|
+
matchedResponse = response;
|
|
3251
|
+
break;
|
|
3168
3252
|
}
|
|
3169
|
-
|
|
3170
|
-
|
|
3253
|
+
}
|
|
3254
|
+
const inputTokens = Math.ceil(content.length / 4);
|
|
3255
|
+
const outputTokens = Math.ceil(matchedResponse.length / 4);
|
|
3256
|
+
return Effect9.succeed(
|
|
3257
|
+
Stream6.make(
|
|
3258
|
+
{
|
|
3259
|
+
type: "text_delta",
|
|
3260
|
+
text: matchedResponse
|
|
3261
|
+
},
|
|
3262
|
+
{
|
|
3263
|
+
type: "content_complete",
|
|
3264
|
+
content: matchedResponse
|
|
3265
|
+
},
|
|
3266
|
+
{
|
|
3267
|
+
type: "usage",
|
|
3268
|
+
usage: {
|
|
3269
|
+
inputTokens,
|
|
3270
|
+
outputTokens,
|
|
3271
|
+
totalTokens: inputTokens + outputTokens,
|
|
3272
|
+
estimatedCost: 0
|
|
3273
|
+
}
|
|
3274
|
+
}
|
|
3275
|
+
)
|
|
3276
|
+
);
|
|
3277
|
+
},
|
|
3171
3278
|
completeStructured: (request) => Effect9.gen(function* () {
|
|
3172
3279
|
const lastMessage = request.messages[request.messages.length - 1];
|
|
3173
3280
|
const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
|
|
@@ -3193,6 +3300,12 @@ var TestLLMService = (responses) => ({
|
|
|
3193
3300
|
getModelConfig: () => Effect9.succeed({
|
|
3194
3301
|
provider: "anthropic",
|
|
3195
3302
|
model: "test-model"
|
|
3303
|
+
}),
|
|
3304
|
+
getStructuredOutputCapabilities: () => Effect9.succeed({
|
|
3305
|
+
nativeJsonMode: true,
|
|
3306
|
+
jsonSchemaEnforcement: false,
|
|
3307
|
+
prefillSupport: false,
|
|
3308
|
+
grammarConstraints: false
|
|
3196
3309
|
})
|
|
3197
3310
|
});
|
|
3198
3311
|
var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
|
|
@@ -3263,14 +3376,19 @@ var ComplexityAnalysisSchema = Schema8.Struct({
|
|
|
3263
3376
|
|
|
3264
3377
|
// src/runtime.ts
|
|
3265
3378
|
import { Layer as Layer9 } from "effect";
|
|
3266
|
-
var createLLMProviderLayer = (provider = "anthropic", testResponses, model) => {
|
|
3379
|
+
var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams) => {
|
|
3267
3380
|
if (provider === "test") {
|
|
3268
3381
|
return Layer9.mergeAll(
|
|
3269
3382
|
TestLLMServiceLayer(testResponses ?? {}),
|
|
3270
3383
|
PromptManagerLive
|
|
3271
3384
|
);
|
|
3272
3385
|
}
|
|
3273
|
-
const
|
|
3386
|
+
const configOverrides = {};
|
|
3387
|
+
if (model) configOverrides.defaultModel = model;
|
|
3388
|
+
if (modelParams?.thinking !== void 0) configOverrides.thinking = modelParams.thinking;
|
|
3389
|
+
if (modelParams?.temperature !== void 0) configOverrides.defaultTemperature = modelParams.temperature;
|
|
3390
|
+
if (modelParams?.maxTokens !== void 0) configOverrides.defaultMaxTokens = modelParams.maxTokens;
|
|
3391
|
+
const configLayer = Object.keys(configOverrides).length > 0 ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, ...configOverrides })) : LLMConfigFromEnv;
|
|
3274
3392
|
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
|
|
3275
3393
|
return Layer9.mergeAll(
|
|
3276
3394
|
providerLayer.pipe(Layer9.provide(configLayer)),
|
|
@@ -3309,6 +3427,7 @@ export {
|
|
|
3309
3427
|
ModelConfigSchema,
|
|
3310
3428
|
ModelPresets,
|
|
3311
3429
|
OpenAIProviderLive,
|
|
3430
|
+
PROVIDER_DEFAULT_MODELS,
|
|
3312
3431
|
PlanSchema,
|
|
3313
3432
|
PromptManager,
|
|
3314
3433
|
PromptManagerLive,
|
|
@@ -3329,6 +3448,7 @@ export {
|
|
|
3329
3448
|
createLLMProviderLayer,
|
|
3330
3449
|
createLLMProviderLayerWithConfig,
|
|
3331
3450
|
estimateTokenCount,
|
|
3451
|
+
getProviderDefaultModel,
|
|
3332
3452
|
llmConfigFromEnv,
|
|
3333
3453
|
makeCacheable,
|
|
3334
3454
|
retryPolicy
|