npm - @reactive-agents/llm-provider - Versions diffs - 0.7.8 → 0.8.0 - Mend

@reactive-agents/llm-provider 0.7.8 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -1378,7 +1378,24 @@ var CompletionResponseSchema = Schema.Struct({
   /** Tool calls emitted by the model (if any) */
   toolCalls: Schema.optional(Schema.Array(ToolCallSchema)),
   /** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
-  thinking: Schema.optional(Schema.String)
+  thinking: Schema.optional(Schema.String),
+  /** Token-level log probabilities (when requested via logprobs in CompletionRequest) */
+  logprobs: Schema.optional(
+    Schema.Array(
+      Schema.Struct({
+        token: Schema.String,
+        logprob: Schema.Number,
+        topLogprobs: Schema.optional(
+          Schema.Array(
+            Schema.Struct({
+              token: Schema.String,
+              logprob: Schema.Number
+            })
+          )
+        )
+      })
+    )
+  )
 });
 // src/errors.ts
@@ -1970,6 +1987,12 @@ var OpenAIProviderLive = Layer4.effect(
           messages,
           stop: request.stopSequences ? [...request.stopSequences] : void 0
         };
+        if (request.logprobs) {
+          requestBody.logprobs = true;
+          if (request.topLogprobs != null) {
+            requestBody.top_logprobs = request.topLogprobs;
+          }
+        }
         if (request.tools && request.tools.length > 0) {
           requestBody.tools = request.tools.map(toOpenAITool);
         }
@@ -2190,6 +2213,17 @@ var mapOpenAIResponse = (response, model) => {
       input
     };
   }) : void 0;
+  const rawLogprobs = response.choices[0]?.logprobs?.content;
+  const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
+    token: lp.token,
+    logprob: lp.logprob,
+    ...lp.top_logprobs ? {
+      topLogprobs: lp.top_logprobs.map((tlp) => ({
+        token: tlp.token,
+        logprob: tlp.logprob
+      }))
+    } : {}
+  })) : void 0;
   return {
     content,
     stopReason,
@@ -2204,7 +2238,8 @@ var mapOpenAIResponse = (response, model) => {
       )
     },
     model: response.model ?? model,
-    toolCalls
+    toolCalls,
+    ...logprobs ? { logprobs } : {}
   };
 };
@@ -2349,7 +2384,9 @@ var LocalProviderLive = Layer5.effect(
               options: {
                 temperature: request.temperature ?? config.defaultTemperature,
                 num_predict: request.maxTokens ?? config.defaultMaxTokens,
-                stop: request.stopSequences ? [...request.stopSequences] : void 0
+                stop: request.stopSequences ? [...request.stopSequences] : void 0,
+                ...request.logprobs ? { logprobs: true } : {},
+                ...request.topLogprobs != null ? { top_logprobs: request.topLogprobs } : {}
               }
             });
           },
@@ -2363,6 +2400,17 @@ var LocalProviderLive = Layer5.effect(
           response.message?.tool_calls
         );
         const hasToolCalls = toolCalls && toolCalls.length > 0;
+        const rawLogprobs = response.logprobs;
+        const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
+          token: lp.token,
+          logprob: lp.logprob,
+          ...lp.top_logprobs ? {
+            topLogprobs: lp.top_logprobs.map((tlp) => ({
+              token: tlp.token,
+              logprob: tlp.logprob
+            }))
+          } : {}
+        })) : void 0;
         return {
           content,
           stopReason: hasToolCalls ? "tool_use" : response.done_reason === "stop" ? "end_turn" : response.done_reason === "length" ? "max_tokens" : "end_turn",
@@ -2375,7 +2423,8 @@ var LocalProviderLive = Layer5.effect(
           },
           model: response.model ?? model,
           toolCalls,
-          ...thinkingContent ? { thinking: thinkingContent } : {}
+          ...thinkingContent ? { thinking: thinkingContent } : {},
+          ...logprobs ? { logprobs } : {}
         };
       }).pipe(
         Effect6.retry(retryPolicy),
@@ -2409,6 +2458,7 @@ var LocalProviderLive = Layer5.effect(
                 model,
                 config.thinking
               );
+              const wantLogprobs = request.logprobs ?? false;
               const stream = await client.chat({
                 model,
                 messages: msgs,
@@ -2418,10 +2468,12 @@ var LocalProviderLive = Layer5.effect(
                 keep_alive: "5m",
                 options: {
                   temperature: request.temperature ?? config.defaultTemperature,
-                  num_predict: request.maxTokens ?? config.defaultMaxTokens
+                  num_predict: request.maxTokens ?? config.defaultMaxTokens,
+                  ...wantLogprobs ? { logprobs: true } : {}
                 }
               });
               let fullContent = "";
+              const accumulatedLogprobs = [];
               for await (const chunk of stream) {
                 if (chunk.message?.content) {
                   fullContent += chunk.message.content;
@@ -2430,11 +2482,29 @@ var LocalProviderLive = Layer5.effect(
                     text: chunk.message.content
                   });
                 }
+                if (wantLogprobs) {
+                  const chunkLp = chunk.logprobs;
+                  if (Array.isArray(chunkLp)) {
+                    for (const lp of chunkLp) {
+                      accumulatedLogprobs.push({
+                        token: lp.token,
+                        logprob: lp.logprob,
+                        ...lp.top_logprobs ? { topLogprobs: lp.top_logprobs.map((t) => ({ token: t.token, logprob: t.logprob })) } : {}
+                      });
+                    }
+                  }
+                }
                 if (chunk.done) {
                   emit.single({
                     type: "content_complete",
                     content: fullContent
                   });
+                  if (accumulatedLogprobs.length > 0) {
+                    emit.single({
+                      type: "logprobs",
+                      logprobs: accumulatedLogprobs
+                    });
+                  }
                   emit.single({
                     type: "usage",
                     usage: {
@@ -3236,109 +3306,158 @@ No markdown, no code fences, just raw JSON.`
 // src/testing.ts
 import { Effect as Effect9, Layer as Layer8, Stream as Stream6, Schema as Schema7 } from "effect";
-var TestLLMService = (responses) => ({
-  complete: (request) => Effect9.gen(function* () {
-    const lastMessage = request.messages[request.messages.length - 1];
-    const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
-    const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
-    const searchText = `${content} ${systemPrompt}`;
-    for (const [pattern, response] of Object.entries(responses)) {
-      if (pattern.length > 0 && searchText.includes(pattern)) {
+function fakeUsage(inputLen, outputLen) {
+  return {
+    inputTokens: Math.ceil(inputLen / 4),
+    outputTokens: Math.ceil(outputLen / 4),
+    totalTokens: Math.ceil(inputLen / 4) + Math.ceil(outputLen / 4),
+    estimatedCost: 0
+  };
+}
+function extractSearchText(messages, request) {
+  const lastMessage = messages[messages.length - 1];
+  const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
+  const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
+  return `${content} ${systemPrompt}`.trim();
+}
+function resolveTurn(scenario, callIndex, searchText) {
+  for (let i = callIndex.value; i < scenario.length; i++) {
+    const turn = scenario[i];
+    const guard = turn.match;
+    if (!guard || new RegExp(guard, "i").test(searchText)) {
+      callIndex.value = Math.min(i + 1, scenario.length - 1);
+      return { turn, matchedIndex: i };
+    }
+  }
+  return { turn: scenario[scenario.length - 1], matchedIndex: scenario.length - 1 };
+}
+function buildToolCalls(specs, matchedIndex) {
+  return specs.map((spec, i) => ({
+    id: spec.id ?? `call-${matchedIndex}-${i}`,
+    name: spec.name,
+    input: spec.args
+  }));
+}
+var TestLLMService = (scenario) => {
+  const callIndex = { value: 0 };
+  return {
+    complete: (request) => Effect9.gen(function* () {
+      const searchText = extractSearchText(request.messages, request);
+      const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
+      if ("error" in turn) {
+        throw new Error(turn.error);
+      }
+      if ("toolCall" in turn) {
         return {
-          content: response,
-          stopReason: "end_turn",
-          usage: {
-            inputTokens: Math.ceil(content.length / 4),
-            outputTokens: Math.ceil(response.length / 4),
-            totalTokens: Math.ceil(content.length / 4) + Math.ceil(response.length / 4),
-            estimatedCost: 0
-          },
-          model: "test-model"
+          content: "",
+          stopReason: "tool_use",
+          usage: fakeUsage(searchText.length, 0),
+          model: "test-model",
+          toolCalls: buildToolCalls([turn.toolCall], matchedIndex)
         };
       }
-    }
-    return {
-      content: "Test response",
-      stopReason: "end_turn",
-      usage: {
-        inputTokens: 0,
-        outputTokens: 0,
-        totalTokens: 0,
-        estimatedCost: 0
-      },
-      model: "test-model"
-    };
-  }),
-  stream: (request) => {
-    const lastMessage = request.messages[request.messages.length - 1];
-    const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
-    const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
-    const searchText = `${content} ${systemPrompt}`;
-    let matchedResponse = "Test response";
-    for (const [pattern, response] of Object.entries(responses)) {
-      if (pattern.length > 0 && searchText.includes(pattern)) {
-        matchedResponse = response;
-        break;
+      if ("toolCalls" in turn) {
+        return {
+          content: "",
+          stopReason: "tool_use",
+          usage: fakeUsage(searchText.length, 0),
+          model: "test-model",
+          toolCalls: buildToolCalls(turn.toolCalls, matchedIndex)
+        };
       }
-    }
-    const inputTokens = Math.ceil(content.length / 4);
-    const outputTokens = Math.ceil(matchedResponse.length / 4);
-    return Effect9.succeed(
-      Stream6.make(
-        {
-          type: "text_delta",
-          text: matchedResponse
-        },
-        {
-          type: "content_complete",
-          content: matchedResponse
-        },
-        {
-          type: "usage",
-          usage: {
-            inputTokens,
-            outputTokens,
-            totalTokens: inputTokens + outputTokens,
-            estimatedCost: 0
+      const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
+      return {
+        content,
+        stopReason: "end_turn",
+        usage: fakeUsage(searchText.length, content.length),
+        model: "test-model"
+      };
+    }),
+    stream: (request) => {
+      const searchText = extractSearchText(request.messages, request);
+      const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
+      if ("error" in turn) {
+        return Effect9.succeed(
+          Stream6.make(
+            { type: "error", error: turn.error }
+          )
+        );
+      }
+      const specs = "toolCall" in turn ? [turn.toolCall] : "toolCalls" in turn ? turn.toolCalls : null;
+      if (specs) {
+        const events = [
+          ...specs.flatMap((spec, i) => [
+            {
+              type: "tool_use_start",
+              id: spec.id ?? `call-${matchedIndex}-${i}`,
+              name: spec.name
+            },
+            {
+              type: "tool_use_delta",
+              input: JSON.stringify(spec.args)
+            }
+          ]),
+          { type: "content_complete", content: "" },
+          { type: "usage", usage: fakeUsage(searchText.length, 0) }
+        ];
+        return Effect9.succeed(
+          Stream6.fromIterable(events)
+        );
+      }
+      const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
+      const inputTokens = Math.ceil(searchText.length / 4);
+      const outputTokens = Math.ceil(content.length / 4);
+      return Effect9.succeed(
+        Stream6.make(
+          { type: "text_delta", text: content },
+          { type: "content_complete", content },
+          {
+            type: "usage",
+            usage: {
+              inputTokens,
+              outputTokens,
+              totalTokens: inputTokens + outputTokens,
+              estimatedCost: 0
+            }
           }
-        }
+        )
+      );
+    },
+    completeStructured: (request) => Effect9.gen(function* () {
+      const searchText = extractSearchText(request.messages, request);
+      const { turn } = resolveTurn(scenario, callIndex, searchText);
+      if ("error" in turn) {
+        throw new Error(turn.error);
+      }
+      if ("json" in turn) {
+        return turn.json;
+      }
+      const responseContent = "text" in turn ? turn.text : "{}";
+      const parsed = JSON.parse(responseContent);
+      return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
+    }),
+    embed: (texts) => Effect9.succeed(
+      texts.map(() => new Array(768).fill(0).map(() => Math.random()))
+    ),
+    countTokens: (messages) => Effect9.succeed(
+      messages.reduce(
+        (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
+        0
       )
-    );
-  },
-  completeStructured: (request) => Effect9.gen(function* () {
-    const lastMessage = request.messages[request.messages.length - 1];
-    const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
-    let responseContent = "Test response";
-    for (const [pattern, response] of Object.entries(responses)) {
-      if (content.includes(pattern)) {
-        responseContent = response;
-        break;
-      }
-    }
-    const parsed = JSON.parse(responseContent);
-    return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
-  }),
-  embed: (texts) => Effect9.succeed(
-    texts.map(() => new Array(768).fill(0).map(() => Math.random()))
-  ),
-  countTokens: (messages) => Effect9.succeed(
-    messages.reduce(
-      (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
-      0
-    )
-  ),
-  getModelConfig: () => Effect9.succeed({
-    provider: "anthropic",
-    model: "test-model"
-  }),
-  getStructuredOutputCapabilities: () => Effect9.succeed({
-    nativeJsonMode: true,
-    jsonSchemaEnforcement: false,
-    prefillSupport: false,
-    grammarConstraints: false
-  })
-});
-var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
+    ),
+    getModelConfig: () => Effect9.succeed({
+      provider: "anthropic",
+      model: "test-model"
+    }),
+    getStructuredOutputCapabilities: () => Effect9.succeed({
+      nativeJsonMode: true,
+      jsonSchemaEnforcement: false,
+      prefillSupport: false,
+      grammarConstraints: false
+    })
+  };
+};
+var TestLLMServiceLayer = (scenario = [{ text: "" }]) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(scenario)));
 // src/structured-output.ts
 import { Schema as Schema8 } from "effect";
@@ -3545,10 +3664,10 @@ var makeCircuitBreakerLayer = (config) => Layer9.effect(
     });
   })
 );
-var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams, circuitBreaker) => {
+var createLLMProviderLayer = (provider = "anthropic", testScenario, model, modelParams, circuitBreaker) => {
   if (provider === "test") {
     return Layer9.mergeAll(
-      TestLLMServiceLayer(testResponses ?? {}),
+      TestLLMServiceLayer(testScenario ?? [{ text: "" }]),
       PromptManagerLive
     );
   }
@@ -3577,6 +3696,86 @@ var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
     PromptManagerLive
   );
 };
+// src/fallback-chain.ts
+var FallbackChain = class {
+  constructor(config) {
+    this.config = config;
+    this.threshold = config.errorThreshold ?? 3;
+  }
+  /** Error count per provider. */
+  errorCounts = /* @__PURE__ */ new Map();
+  /** Current index in the providers list. */
+  currentProviderIndex = 0;
+  /** Current index in the models list. */
+  currentModelIndex = 0;
+  /** Threshold for switching to next provider. */
+  threshold;
+  /**
+   * Record an error for the given provider.
+   * Increments the error count and switches to the next provider if threshold is met.
+   *
+   * @param provider - Provider name that errored
+   */
+  recordError(provider) {
+    const count = (this.errorCounts.get(provider) ?? 0) + 1;
+    this.errorCounts.set(provider, count);
+    if (count >= this.threshold && this.currentProviderIndex < this.config.providers.length - 1) {
+      this.currentProviderIndex++;
+    }
+  }
+  /**
+   * Record a rate limit error (429) for the given provider.
+   * Falls back to the next model in the chain.
+   *
+   * @param _provider - Provider name that was rate limited (parameter name _ to indicate unused)
+   */
+  recordRateLimit(_provider) {
+    if (this.config.models && this.currentModelIndex < this.config.models.length - 1) {
+      this.currentModelIndex++;
+    }
+  }
+  /**
+   * Record a successful call for the given provider.
+   * Resets the error count for that provider.
+   *
+   * @param provider - Provider name that succeeded
+   */
+  recordSuccess(provider) {
+    this.errorCounts.set(provider, 0);
+  }
+  /**
+   * Get the currently active provider.
+   *
+   * @returns Name of the provider to use
+   */
+  currentProvider() {
+    const provider = this.config.providers[this.currentProviderIndex];
+    if (!provider) {
+      throw new Error(`FallbackChain: Invalid provider index ${this.currentProviderIndex}`);
+    }
+    return provider;
+  }
+  /**
+   * Get the currently active model.
+   * Returns undefined if no models are configured.
+   *
+   * @returns Name of the model to use, or undefined if no models configured
+   */
+  currentModel() {
+    return this.config.models?.[this.currentModelIndex];
+  }
+  /**
+   * Check if there are more fallbacks available (provider or model).
+   *
+   * @returns true if there are unused fallback providers or models, false if all exhausted
+   */
+  hasFallback() {
+    const hasProviderFallback = this.currentProviderIndex < this.config.providers.length - 1;
+    const hasModelFallback = this.config.models !== void 0 && this.currentModelIndex < this.config.models.length - 1;
+    return hasProviderFallback || hasModelFallback;
+  }
+};
 export {
   AnthropicProviderLive,
   CacheControlSchema,
@@ -3584,6 +3783,7 @@ export {
   ComplexityAnalysisSchema,
   DefaultEmbeddingConfig,
   EmbeddingConfigSchema,
+  FallbackChain,
   GeminiProviderLive,
   ImageContentBlockSchema,
   ImageSourceSchema,