@reactive-agents/llm-provider 0.7.8 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1378,7 +1378,24 @@ var CompletionResponseSchema = Schema.Struct({
1378
1378
  /** Tool calls emitted by the model (if any) */
1379
1379
  toolCalls: Schema.optional(Schema.Array(ToolCallSchema)),
1380
1380
  /** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
1381
- thinking: Schema.optional(Schema.String)
1381
+ thinking: Schema.optional(Schema.String),
1382
+ /** Token-level log probabilities (when requested via logprobs in CompletionRequest) */
1383
+ logprobs: Schema.optional(
1384
+ Schema.Array(
1385
+ Schema.Struct({
1386
+ token: Schema.String,
1387
+ logprob: Schema.Number,
1388
+ topLogprobs: Schema.optional(
1389
+ Schema.Array(
1390
+ Schema.Struct({
1391
+ token: Schema.String,
1392
+ logprob: Schema.Number
1393
+ })
1394
+ )
1395
+ )
1396
+ })
1397
+ )
1398
+ )
1382
1399
  });
1383
1400
 
1384
1401
  // src/errors.ts
@@ -1970,6 +1987,12 @@ var OpenAIProviderLive = Layer4.effect(
1970
1987
  messages,
1971
1988
  stop: request.stopSequences ? [...request.stopSequences] : void 0
1972
1989
  };
1990
+ if (request.logprobs) {
1991
+ requestBody.logprobs = true;
1992
+ if (request.topLogprobs != null) {
1993
+ requestBody.top_logprobs = request.topLogprobs;
1994
+ }
1995
+ }
1973
1996
  if (request.tools && request.tools.length > 0) {
1974
1997
  requestBody.tools = request.tools.map(toOpenAITool);
1975
1998
  }
@@ -2190,6 +2213,17 @@ var mapOpenAIResponse = (response, model) => {
2190
2213
  input
2191
2214
  };
2192
2215
  }) : void 0;
2216
+ const rawLogprobs = response.choices[0]?.logprobs?.content;
2217
+ const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
2218
+ token: lp.token,
2219
+ logprob: lp.logprob,
2220
+ ...lp.top_logprobs ? {
2221
+ topLogprobs: lp.top_logprobs.map((tlp) => ({
2222
+ token: tlp.token,
2223
+ logprob: tlp.logprob
2224
+ }))
2225
+ } : {}
2226
+ })) : void 0;
2193
2227
  return {
2194
2228
  content,
2195
2229
  stopReason,
@@ -2204,7 +2238,8 @@ var mapOpenAIResponse = (response, model) => {
2204
2238
  )
2205
2239
  },
2206
2240
  model: response.model ?? model,
2207
- toolCalls
2241
+ toolCalls,
2242
+ ...logprobs ? { logprobs } : {}
2208
2243
  };
2209
2244
  };
2210
2245
 
@@ -2349,7 +2384,9 @@ var LocalProviderLive = Layer5.effect(
2349
2384
  options: {
2350
2385
  temperature: request.temperature ?? config.defaultTemperature,
2351
2386
  num_predict: request.maxTokens ?? config.defaultMaxTokens,
2352
- stop: request.stopSequences ? [...request.stopSequences] : void 0
2387
+ stop: request.stopSequences ? [...request.stopSequences] : void 0,
2388
+ ...request.logprobs ? { logprobs: true } : {},
2389
+ ...request.topLogprobs != null ? { top_logprobs: request.topLogprobs } : {}
2353
2390
  }
2354
2391
  });
2355
2392
  },
@@ -2363,6 +2400,17 @@ var LocalProviderLive = Layer5.effect(
2363
2400
  response.message?.tool_calls
2364
2401
  );
2365
2402
  const hasToolCalls = toolCalls && toolCalls.length > 0;
2403
+ const rawLogprobs = response.logprobs;
2404
+ const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
2405
+ token: lp.token,
2406
+ logprob: lp.logprob,
2407
+ ...lp.top_logprobs ? {
2408
+ topLogprobs: lp.top_logprobs.map((tlp) => ({
2409
+ token: tlp.token,
2410
+ logprob: tlp.logprob
2411
+ }))
2412
+ } : {}
2413
+ })) : void 0;
2366
2414
  return {
2367
2415
  content,
2368
2416
  stopReason: hasToolCalls ? "tool_use" : response.done_reason === "stop" ? "end_turn" : response.done_reason === "length" ? "max_tokens" : "end_turn",
@@ -2375,7 +2423,8 @@ var LocalProviderLive = Layer5.effect(
2375
2423
  },
2376
2424
  model: response.model ?? model,
2377
2425
  toolCalls,
2378
- ...thinkingContent ? { thinking: thinkingContent } : {}
2426
+ ...thinkingContent ? { thinking: thinkingContent } : {},
2427
+ ...logprobs ? { logprobs } : {}
2379
2428
  };
2380
2429
  }).pipe(
2381
2430
  Effect6.retry(retryPolicy),
@@ -2409,6 +2458,7 @@ var LocalProviderLive = Layer5.effect(
2409
2458
  model,
2410
2459
  config.thinking
2411
2460
  );
2461
+ const wantLogprobs = request.logprobs ?? false;
2412
2462
  const stream = await client.chat({
2413
2463
  model,
2414
2464
  messages: msgs,
@@ -2418,10 +2468,12 @@ var LocalProviderLive = Layer5.effect(
2418
2468
  keep_alive: "5m",
2419
2469
  options: {
2420
2470
  temperature: request.temperature ?? config.defaultTemperature,
2421
- num_predict: request.maxTokens ?? config.defaultMaxTokens
2471
+ num_predict: request.maxTokens ?? config.defaultMaxTokens,
2472
+ ...wantLogprobs ? { logprobs: true } : {}
2422
2473
  }
2423
2474
  });
2424
2475
  let fullContent = "";
2476
+ const accumulatedLogprobs = [];
2425
2477
  for await (const chunk of stream) {
2426
2478
  if (chunk.message?.content) {
2427
2479
  fullContent += chunk.message.content;
@@ -2430,11 +2482,29 @@ var LocalProviderLive = Layer5.effect(
2430
2482
  text: chunk.message.content
2431
2483
  });
2432
2484
  }
2485
+ if (wantLogprobs) {
2486
+ const chunkLp = chunk.logprobs;
2487
+ if (Array.isArray(chunkLp)) {
2488
+ for (const lp of chunkLp) {
2489
+ accumulatedLogprobs.push({
2490
+ token: lp.token,
2491
+ logprob: lp.logprob,
2492
+ ...lp.top_logprobs ? { topLogprobs: lp.top_logprobs.map((t) => ({ token: t.token, logprob: t.logprob })) } : {}
2493
+ });
2494
+ }
2495
+ }
2496
+ }
2433
2497
  if (chunk.done) {
2434
2498
  emit.single({
2435
2499
  type: "content_complete",
2436
2500
  content: fullContent
2437
2501
  });
2502
+ if (accumulatedLogprobs.length > 0) {
2503
+ emit.single({
2504
+ type: "logprobs",
2505
+ logprobs: accumulatedLogprobs
2506
+ });
2507
+ }
2438
2508
  emit.single({
2439
2509
  type: "usage",
2440
2510
  usage: {
@@ -3236,109 +3306,158 @@ No markdown, no code fences, just raw JSON.`
3236
3306
 
3237
3307
  // src/testing.ts
3238
3308
  import { Effect as Effect9, Layer as Layer8, Stream as Stream6, Schema as Schema7 } from "effect";
3239
- var TestLLMService = (responses) => ({
3240
- complete: (request) => Effect9.gen(function* () {
3241
- const lastMessage = request.messages[request.messages.length - 1];
3242
- const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
3243
- const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
3244
- const searchText = `${content} ${systemPrompt}`;
3245
- for (const [pattern, response] of Object.entries(responses)) {
3246
- if (pattern.length > 0 && searchText.includes(pattern)) {
3309
+ function fakeUsage(inputLen, outputLen) {
3310
+ return {
3311
+ inputTokens: Math.ceil(inputLen / 4),
3312
+ outputTokens: Math.ceil(outputLen / 4),
3313
+ totalTokens: Math.ceil(inputLen / 4) + Math.ceil(outputLen / 4),
3314
+ estimatedCost: 0
3315
+ };
3316
+ }
3317
+ function extractSearchText(messages, request) {
3318
+ const lastMessage = messages[messages.length - 1];
3319
+ const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
3320
+ const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
3321
+ return `${content} ${systemPrompt}`.trim();
3322
+ }
3323
+ function resolveTurn(scenario, callIndex, searchText) {
3324
+ for (let i = callIndex.value; i < scenario.length; i++) {
3325
+ const turn = scenario[i];
3326
+ const guard = turn.match;
3327
+ if (!guard || new RegExp(guard, "i").test(searchText)) {
3328
+ callIndex.value = Math.min(i + 1, scenario.length - 1);
3329
+ return { turn, matchedIndex: i };
3330
+ }
3331
+ }
3332
+ return { turn: scenario[scenario.length - 1], matchedIndex: scenario.length - 1 };
3333
+ }
3334
+ function buildToolCalls(specs, matchedIndex) {
3335
+ return specs.map((spec, i) => ({
3336
+ id: spec.id ?? `call-${matchedIndex}-${i}`,
3337
+ name: spec.name,
3338
+ input: spec.args
3339
+ }));
3340
+ }
3341
+ var TestLLMService = (scenario) => {
3342
+ const callIndex = { value: 0 };
3343
+ return {
3344
+ complete: (request) => Effect9.gen(function* () {
3345
+ const searchText = extractSearchText(request.messages, request);
3346
+ const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
3347
+ if ("error" in turn) {
3348
+ throw new Error(turn.error);
3349
+ }
3350
+ if ("toolCall" in turn) {
3247
3351
  return {
3248
- content: response,
3249
- stopReason: "end_turn",
3250
- usage: {
3251
- inputTokens: Math.ceil(content.length / 4),
3252
- outputTokens: Math.ceil(response.length / 4),
3253
- totalTokens: Math.ceil(content.length / 4) + Math.ceil(response.length / 4),
3254
- estimatedCost: 0
3255
- },
3256
- model: "test-model"
3352
+ content: "",
3353
+ stopReason: "tool_use",
3354
+ usage: fakeUsage(searchText.length, 0),
3355
+ model: "test-model",
3356
+ toolCalls: buildToolCalls([turn.toolCall], matchedIndex)
3257
3357
  };
3258
3358
  }
3259
- }
3260
- return {
3261
- content: "Test response",
3262
- stopReason: "end_turn",
3263
- usage: {
3264
- inputTokens: 0,
3265
- outputTokens: 0,
3266
- totalTokens: 0,
3267
- estimatedCost: 0
3268
- },
3269
- model: "test-model"
3270
- };
3271
- }),
3272
- stream: (request) => {
3273
- const lastMessage = request.messages[request.messages.length - 1];
3274
- const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
3275
- const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
3276
- const searchText = `${content} ${systemPrompt}`;
3277
- let matchedResponse = "Test response";
3278
- for (const [pattern, response] of Object.entries(responses)) {
3279
- if (pattern.length > 0 && searchText.includes(pattern)) {
3280
- matchedResponse = response;
3281
- break;
3359
+ if ("toolCalls" in turn) {
3360
+ return {
3361
+ content: "",
3362
+ stopReason: "tool_use",
3363
+ usage: fakeUsage(searchText.length, 0),
3364
+ model: "test-model",
3365
+ toolCalls: buildToolCalls(turn.toolCalls, matchedIndex)
3366
+ };
3282
3367
  }
3283
- }
3284
- const inputTokens = Math.ceil(content.length / 4);
3285
- const outputTokens = Math.ceil(matchedResponse.length / 4);
3286
- return Effect9.succeed(
3287
- Stream6.make(
3288
- {
3289
- type: "text_delta",
3290
- text: matchedResponse
3291
- },
3292
- {
3293
- type: "content_complete",
3294
- content: matchedResponse
3295
- },
3296
- {
3297
- type: "usage",
3298
- usage: {
3299
- inputTokens,
3300
- outputTokens,
3301
- totalTokens: inputTokens + outputTokens,
3302
- estimatedCost: 0
3368
+ const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
3369
+ return {
3370
+ content,
3371
+ stopReason: "end_turn",
3372
+ usage: fakeUsage(searchText.length, content.length),
3373
+ model: "test-model"
3374
+ };
3375
+ }),
3376
+ stream: (request) => {
3377
+ const searchText = extractSearchText(request.messages, request);
3378
+ const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
3379
+ if ("error" in turn) {
3380
+ return Effect9.succeed(
3381
+ Stream6.make(
3382
+ { type: "error", error: turn.error }
3383
+ )
3384
+ );
3385
+ }
3386
+ const specs = "toolCall" in turn ? [turn.toolCall] : "toolCalls" in turn ? turn.toolCalls : null;
3387
+ if (specs) {
3388
+ const events = [
3389
+ ...specs.flatMap((spec, i) => [
3390
+ {
3391
+ type: "tool_use_start",
3392
+ id: spec.id ?? `call-${matchedIndex}-${i}`,
3393
+ name: spec.name
3394
+ },
3395
+ {
3396
+ type: "tool_use_delta",
3397
+ input: JSON.stringify(spec.args)
3398
+ }
3399
+ ]),
3400
+ { type: "content_complete", content: "" },
3401
+ { type: "usage", usage: fakeUsage(searchText.length, 0) }
3402
+ ];
3403
+ return Effect9.succeed(
3404
+ Stream6.fromIterable(events)
3405
+ );
3406
+ }
3407
+ const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
3408
+ const inputTokens = Math.ceil(searchText.length / 4);
3409
+ const outputTokens = Math.ceil(content.length / 4);
3410
+ return Effect9.succeed(
3411
+ Stream6.make(
3412
+ { type: "text_delta", text: content },
3413
+ { type: "content_complete", content },
3414
+ {
3415
+ type: "usage",
3416
+ usage: {
3417
+ inputTokens,
3418
+ outputTokens,
3419
+ totalTokens: inputTokens + outputTokens,
3420
+ estimatedCost: 0
3421
+ }
3303
3422
  }
3304
- }
3423
+ )
3424
+ );
3425
+ },
3426
+ completeStructured: (request) => Effect9.gen(function* () {
3427
+ const searchText = extractSearchText(request.messages, request);
3428
+ const { turn } = resolveTurn(scenario, callIndex, searchText);
3429
+ if ("error" in turn) {
3430
+ throw new Error(turn.error);
3431
+ }
3432
+ if ("json" in turn) {
3433
+ return turn.json;
3434
+ }
3435
+ const responseContent = "text" in turn ? turn.text : "{}";
3436
+ const parsed = JSON.parse(responseContent);
3437
+ return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
3438
+ }),
3439
+ embed: (texts) => Effect9.succeed(
3440
+ texts.map(() => new Array(768).fill(0).map(() => Math.random()))
3441
+ ),
3442
+ countTokens: (messages) => Effect9.succeed(
3443
+ messages.reduce(
3444
+ (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
3445
+ 0
3305
3446
  )
3306
- );
3307
- },
3308
- completeStructured: (request) => Effect9.gen(function* () {
3309
- const lastMessage = request.messages[request.messages.length - 1];
3310
- const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
3311
- let responseContent = "Test response";
3312
- for (const [pattern, response] of Object.entries(responses)) {
3313
- if (content.includes(pattern)) {
3314
- responseContent = response;
3315
- break;
3316
- }
3317
- }
3318
- const parsed = JSON.parse(responseContent);
3319
- return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
3320
- }),
3321
- embed: (texts) => Effect9.succeed(
3322
- texts.map(() => new Array(768).fill(0).map(() => Math.random()))
3323
- ),
3324
- countTokens: (messages) => Effect9.succeed(
3325
- messages.reduce(
3326
- (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
3327
- 0
3328
- )
3329
- ),
3330
- getModelConfig: () => Effect9.succeed({
3331
- provider: "anthropic",
3332
- model: "test-model"
3333
- }),
3334
- getStructuredOutputCapabilities: () => Effect9.succeed({
3335
- nativeJsonMode: true,
3336
- jsonSchemaEnforcement: false,
3337
- prefillSupport: false,
3338
- grammarConstraints: false
3339
- })
3340
- });
3341
- var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
3447
+ ),
3448
+ getModelConfig: () => Effect9.succeed({
3449
+ provider: "anthropic",
3450
+ model: "test-model"
3451
+ }),
3452
+ getStructuredOutputCapabilities: () => Effect9.succeed({
3453
+ nativeJsonMode: true,
3454
+ jsonSchemaEnforcement: false,
3455
+ prefillSupport: false,
3456
+ grammarConstraints: false
3457
+ })
3458
+ };
3459
+ };
3460
+ var TestLLMServiceLayer = (scenario = [{ text: "" }]) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(scenario)));
3342
3461
 
3343
3462
  // src/structured-output.ts
3344
3463
  import { Schema as Schema8 } from "effect";
@@ -3545,10 +3664,10 @@ var makeCircuitBreakerLayer = (config) => Layer9.effect(
3545
3664
  });
3546
3665
  })
3547
3666
  );
3548
- var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams, circuitBreaker) => {
3667
+ var createLLMProviderLayer = (provider = "anthropic", testScenario, model, modelParams, circuitBreaker) => {
3549
3668
  if (provider === "test") {
3550
3669
  return Layer9.mergeAll(
3551
- TestLLMServiceLayer(testResponses ?? {}),
3670
+ TestLLMServiceLayer(testScenario ?? [{ text: "" }]),
3552
3671
  PromptManagerLive
3553
3672
  );
3554
3673
  }
@@ -3577,6 +3696,86 @@ var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
3577
3696
  PromptManagerLive
3578
3697
  );
3579
3698
  };
3699
+
3700
+ // src/fallback-chain.ts
3701
+ var FallbackChain = class {
3702
+ constructor(config) {
3703
+ this.config = config;
3704
+ this.threshold = config.errorThreshold ?? 3;
3705
+ }
3706
+ /** Error count per provider. */
3707
+ errorCounts = /* @__PURE__ */ new Map();
3708
+ /** Current index in the providers list. */
3709
+ currentProviderIndex = 0;
3710
+ /** Current index in the models list. */
3711
+ currentModelIndex = 0;
3712
+ /** Threshold for switching to next provider. */
3713
+ threshold;
3714
+ /**
3715
+ * Record an error for the given provider.
3716
+ * Increments the error count and switches to the next provider if threshold is met.
3717
+ *
3718
+ * @param provider - Provider name that errored
3719
+ */
3720
+ recordError(provider) {
3721
+ const count = (this.errorCounts.get(provider) ?? 0) + 1;
3722
+ this.errorCounts.set(provider, count);
3723
+ if (count >= this.threshold && this.currentProviderIndex < this.config.providers.length - 1) {
3724
+ this.currentProviderIndex++;
3725
+ }
3726
+ }
3727
+ /**
3728
+ * Record a rate limit error (429) for the given provider.
3729
+ * Falls back to the next model in the chain.
3730
+ *
3731
+ * @param _provider - Provider name that was rate limited (parameter name _ to indicate unused)
3732
+ */
3733
+ recordRateLimit(_provider) {
3734
+ if (this.config.models && this.currentModelIndex < this.config.models.length - 1) {
3735
+ this.currentModelIndex++;
3736
+ }
3737
+ }
3738
+ /**
3739
+ * Record a successful call for the given provider.
3740
+ * Resets the error count for that provider.
3741
+ *
3742
+ * @param provider - Provider name that succeeded
3743
+ */
3744
+ recordSuccess(provider) {
3745
+ this.errorCounts.set(provider, 0);
3746
+ }
3747
+ /**
3748
+ * Get the currently active provider.
3749
+ *
3750
+ * @returns Name of the provider to use
3751
+ */
3752
+ currentProvider() {
3753
+ const provider = this.config.providers[this.currentProviderIndex];
3754
+ if (!provider) {
3755
+ throw new Error(`FallbackChain: Invalid provider index ${this.currentProviderIndex}`);
3756
+ }
3757
+ return provider;
3758
+ }
3759
+ /**
3760
+ * Get the currently active model.
3761
+ * Returns undefined if no models are configured.
3762
+ *
3763
+ * @returns Name of the model to use, or undefined if no models configured
3764
+ */
3765
+ currentModel() {
3766
+ return this.config.models?.[this.currentModelIndex];
3767
+ }
3768
+ /**
3769
+ * Check if there are more fallbacks available (provider or model).
3770
+ *
3771
+ * @returns true if there are unused fallback providers or models, false if all exhausted
3772
+ */
3773
+ hasFallback() {
3774
+ const hasProviderFallback = this.currentProviderIndex < this.config.providers.length - 1;
3775
+ const hasModelFallback = this.config.models !== void 0 && this.currentModelIndex < this.config.models.length - 1;
3776
+ return hasProviderFallback || hasModelFallback;
3777
+ }
3778
+ };
3580
3779
  export {
3581
3780
  AnthropicProviderLive,
3582
3781
  CacheControlSchema,
@@ -3584,6 +3783,7 @@ export {
3584
3783
  ComplexityAnalysisSchema,
3585
3784
  DefaultEmbeddingConfig,
3586
3785
  EmbeddingConfigSchema,
3786
+ FallbackChain,
3587
3787
  GeminiProviderLive,
3588
3788
  ImageContentBlockSchema,
3589
3789
  ImageSourceSchema,