npm - @lobehub/lobehub - Versions diffs - 2.0.0-next.100 → 2.0.0-next.101 - Mend

@lobehub/lobehub 2.0.0-next.100 → 2.0.0-next.101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/AGENTS.md +1 -0
package/CHANGELOG.md +25 -0
package/CLAUDE.md +1 -0
package/changelog/v1.json +9 -0
package/package.json +1 -1
package/packages/model-runtime/src/core/RouterRuntime/createRuntime.ts +42 -18
package/packages/model-runtime/src/core/streams/bedrock/claude.ts +17 -3
package/packages/model-runtime/src/providers/anthropic/index.ts +1 -38
package/packages/model-runtime/src/providers/anthropic/resolveCacheTTL.ts +44 -0
package/packages/model-runtime/src/providers/bedrock/index.test.ts +127 -11
package/packages/model-runtime/src/providers/bedrock/index.ts +47 -13

package/AGENTS.md CHANGED Viewed

@@ -28,6 +28,7 @@ The project follows a well-organized monorepo structure:
 ### Git Workflow
+- The current release branch is `next` instead of `main` until v2.0.0 is officially released
 - Use rebase for git pull
 - Git commit messages should prefix with gitmoji
 - Git branch name format: `username/feat/feature-name`

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,31 @@
 # Changelog
+## [Version 2.0.0-next.101](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.100...v2.0.0-next.101)
+<sup>Released on **2025-11-22**</sup>
+#### ✨ Features
+- **misc**: Support bedrok prompt cache and usage compute.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### What's improved
+- **misc**: Support bedrok prompt cache and usage compute, closes [#10337](https://github.com/lobehub/lobe-chat/issues/10337) ([beb9471](https://github.com/lobehub/lobe-chat/commit/beb9471))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
 ## [Version 2.0.0-next.100](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.99...v2.0.0-next.100)
 <sup>Released on **2025-11-21**</sup>

package/CLAUDE.md CHANGED Viewed

@@ -14,6 +14,7 @@ read @.cursor/rules/project-structure.mdc
 ### Git Workflow
+- The current release branch is `next` instead of `main` until v2.0.0 is officially released
 - use rebase for git pull
 - git commit message should prefix with gitmoji
 - git branch name format example: tj/feat/feature-name

package/changelog/v1.json CHANGED Viewed

@@ -1,4 +1,13 @@
 [
+  {
+    "children": {
+      "features": [
+        "Support bedrok prompt cache and usage compute."
+      ]
+    },
+    "date": "2025-11-22",
+    "version": "2.0.0-next.101"
+  },
   {
     "children": {
       "fixes": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/lobehub",
-  "version": "2.0.0-next.100",
+  "version": "2.0.0-next.101",
   "description": "LobeHub - an open-source,comprehensive AI Agent framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",

package/packages/model-runtime/src/core/RouterRuntime/createRuntime.ts CHANGED Viewed

@@ -148,10 +148,9 @@ export const createRouterRuntime = ({
     }
     /**
-     * TODO: 考虑添加缓存机制，避免重复创建相同配置的 runtimes
+     * Resolve routers configuration and validate
      */
-    private async createRuntimesByRouters(model?: string): Promise<RuntimeItem[]> {
-      // 动态获取 routers，支持传入 model
+    private async resolveRouters(model?: string): Promise<RouterInstance[]> {
       const resolvedRouters =
         typeof this._routers === 'function'
           ? await this._routers(this._options, { model })
@@ -161,6 +160,41 @@ export const createRouterRuntime = ({
         throw new Error('empty providers');
       }
+      return resolvedRouters;
+    }
+    /**
+     * Create runtime for inference requests (chat, generateObject, etc.)
+     * Finds the router that matches the model, or uses the last router as fallback
+     */
+    private async createRuntimeForInference(model: string): Promise<RuntimeItem> {
+      const resolvedRouters = await this.resolveRouters(model);
+      const matchedRouter =
+        resolvedRouters.find((router) => {
+          if (router.models && router.models.length > 0) {
+            return router.models.includes(model);
+          }
+          return false;
+        }) ?? resolvedRouters.at(-1)!;
+      const providerAI =
+        matchedRouter.runtime ?? baseRuntimeMap[matchedRouter.apiType] ?? LobeOpenAI;
+      const finalOptions = { ...this._params, ...this._options, ...matchedRouter.options };
+      const runtime: LobeRuntimeAI = new providerAI({ ...finalOptions, id: this._id });
+      return {
+        id: matchedRouter.apiType,
+        models: matchedRouter.models,
+        runtime,
+      };
+    }
+    /**
+     * Create all runtimes for listing models
+     */
+    private async createRuntimes(): Promise<RuntimeItem[]> {
+      const resolvedRouters = await this.resolveRouters();
       return resolvedRouters.map((router) => {
         const providerAI = router.runtime ?? baseRuntimeMap[router.apiType] ?? LobeOpenAI;
         const finalOptions = { ...this._params, ...this._options, ...router.options };
@@ -176,16 +210,8 @@ export const createRouterRuntime = ({
     // Check if it can match a specific model, otherwise default to using the last runtime
     async getRuntimeByModel(model: string) {
-      const runtimes = await this.createRuntimesByRouters(model);
-      for (const runtimeItem of runtimes) {
-        const models = runtimeItem.models || [];
-        if (models.includes(model)) {
-          return runtimeItem.runtime;
-        }
-      }
-      return runtimes.at(-1)!.runtime;
+      const runtimeItem = await this.createRuntimeForInference(model);
+      return runtimeItem.runtime;
     }
     async chat(payload: ChatStreamPayload, options?: ChatMethodOptions) {
@@ -222,9 +248,8 @@ export const createRouterRuntime = ({
     async models() {
       if (modelsOption && typeof modelsOption === 'function') {
-        // 延迟创建 runtimes
-        const runtimes = await this.createRuntimesByRouters();
-        // 如果是函数式配置，使用最后一个运行时的客户端来调用函数
+        const runtimes = await this.createRuntimes();
+        // If it's a functional configuration, use the last runtime's client to call the function
         const lastRuntime = runtimes.at(-1)?.runtime;
         if (lastRuntime && 'client' in lastRuntime) {
           const modelList = await modelsOption({ client: (lastRuntime as any).client });
@@ -232,8 +257,7 @@ export const createRouterRuntime = ({
         }
       }
-      // 延迟创建 runtimes
-      const runtimes = await this.createRuntimesByRouters();
+      const runtimes = await this.createRuntimes();
       return runtimes.at(-1)?.runtime.models?.();
     }

package/packages/model-runtime/src/core/streams/bedrock/claude.ts CHANGED Viewed

@@ -7,18 +7,32 @@ import {
   StreamContext,
   createCallbacksTransformer,
   createSSEProtocolTransformer,
+  createTokenSpeedCalculator,
 } from '../protocol';
 import { createBedrockStream } from './common';
 export const AWSBedrockClaudeStream = (
   res: InvokeModelWithResponseStreamResponse | ReadableStream,
-  cb?: ChatStreamCallbacks,
+  options?: {
+    callbacks?: ChatStreamCallbacks;
+    inputStartAt?: number;
+    payload?: Parameters<typeof transformAnthropicStream>[2];
+  },
 ): ReadableStream<string> => {
   const streamStack: StreamContext = { id: 'chat_' + nanoid() };
   const stream = res instanceof ReadableStream ? res : createBedrockStream(res);
+  const transformWithPayload: typeof transformAnthropicStream = (chunk, ctx) =>
+    transformAnthropicStream(chunk, ctx, options?.payload);
   return stream
-    .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack))
-    .pipeThrough(createCallbacksTransformer(cb));
+    .pipeThrough(
+      createTokenSpeedCalculator(transformWithPayload, {
+        inputStartAt: options?.inputStartAt,
+        streamStack,
+      }),
+    )
+    .pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
+    .pipeThrough(createCallbacksTransformer(options?.callbacks));
 };

package/packages/model-runtime/src/providers/anthropic/index.ts CHANGED Viewed

@@ -21,6 +21,7 @@ import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
 import { StreamingResponse } from '../../utils/response';
 import { createAnthropicGenerateObject } from './generateObject';
 import { handleAnthropicError } from './handleAnthropicError';
+import { resolveCacheTTL } from './resolveCacheTTL';
 export interface AnthropicModelCard {
   created_at: string;
@@ -33,44 +34,6 @@ type anthropicTools = Anthropic.Tool | Anthropic.WebSearchTool20250305;
 const modelsWithSmallContextWindow = new Set(['claude-3-opus-20240229', 'claude-3-haiku-20240307']);
 const DEFAULT_BASE_URL = 'https://api.anthropic.com';
-const DEFAULT_CACHE_TTL = '5m' as const;
-type CacheTTL = Anthropic.Messages.CacheControlEphemeral['ttl'];
-/**
- * Resolves cache TTL from Anthropic payload or request settings
- * Returns the first valid TTL found in system messages or content blocks
- */
-const resolveCacheTTL = (
-  requestPayload: ChatStreamPayload,
-  anthropicPayload: Anthropic.MessageCreateParams,
-): CacheTTL | undefined => {
-  // Check system messages for cache TTL
-  if (Array.isArray(anthropicPayload.system)) {
-    for (const block of anthropicPayload.system) {
-      const ttl = block.cache_control?.ttl;
-      if (ttl) return ttl;
-    }
-  }
-  // Check message content blocks for cache TTL
-  for (const message of anthropicPayload.messages ?? []) {
-    if (!Array.isArray(message.content)) continue;
-    for (const block of message.content) {
-      // Message content blocks might have cache_control property
-      const ttl = ('cache_control' in block && block.cache_control?.ttl) as CacheTTL | undefined;
-      if (ttl) return ttl;
-    }
-  }
-  // Use default TTL if context caching is enabled
-  if (requestPayload.enabledContextCaching) {
-    return DEFAULT_CACHE_TTL;
-  }
-  return undefined;
-};
 interface AnthropicAIParams extends ClientOptions {
   id?: string;

package/packages/model-runtime/src/providers/anthropic/resolveCacheTTL.ts ADDED Viewed

@@ -0,0 +1,44 @@
+import Anthropic from '@anthropic-ai/sdk';
+import { ChatStreamPayload } from '../../types';
+type CacheTTL = Anthropic.Messages.CacheControlEphemeral['ttl'];
+const DEFAULT_CACHE_TTL = '5m' as const;
+/**
+ * Resolves cache TTL from Anthropic payload or request settings.
+ * Returns the first valid TTL found in system messages or content blocks.
+ */
+export const resolveCacheTTL = (
+  requestPayload: ChatStreamPayload,
+  anthropicPayload: {
+    messages: Anthropic.MessageCreateParams['messages'];
+    system: Anthropic.MessageCreateParams['system'];
+  },
+): CacheTTL | undefined => {
+  // Check system messages for cache TTL
+  if (Array.isArray(anthropicPayload.system)) {
+    for (const block of anthropicPayload.system) {
+      const ttl = block.cache_control?.ttl;
+      if (ttl) return ttl;
+    }
+  }
+  // Check message content blocks for cache TTL
+  for (const message of anthropicPayload.messages ?? []) {
+    if (!Array.isArray(message.content)) continue;
+    for (const block of message.content) {
+      const ttl = ('cache_control' in block && block.cache_control?.ttl) as CacheTTL | undefined;
+      if (ttl) return ttl;
+    }
+  }
+  // Use default TTL if context caching is enabled
+  if (requestPayload.enabledContextCaching) {
+    return DEFAULT_CACHE_TTL;
+  }
+  return undefined;
+};

package/packages/model-runtime/src/providers/bedrock/index.test.ts CHANGED Viewed

@@ -173,7 +173,18 @@ describe('LobeBedrockAI', () => {
           body: JSON.stringify({
             anthropic_version: 'bedrock-2023-05-31',
             max_tokens: 4096,
-            messages: [{ content: 'Hello', role: 'user' }],
+            messages: [
+              {
+                content: [
+                  {
+                    cache_control: { type: 'ephemeral' },
+                    text: 'Hello',
+                    type: 'text',
+                  },
+                ],
+                role: 'user',
+              },
+            ],
             temperature: 0,
             top_p: 1,
           }),
@@ -211,8 +222,25 @@ describe('LobeBedrockAI', () => {
           body: JSON.stringify({
             anthropic_version: 'bedrock-2023-05-31',
             max_tokens: 4096,
-            messages: [{ content: 'Hello', role: 'user' }],
-            system: 'You are an awesome greeter',
+            messages: [
+              {
+                content: [
+                  {
+                    cache_control: { type: 'ephemeral' },
+                    text: 'Hello',
+                    type: 'text',
+                  },
+                ],
+                role: 'user',
+              },
+            ],
+            system: [
+              {
+                cache_control: { type: 'ephemeral' },
+                text: 'You are an awesome greeter',
+                type: 'text',
+              },
+            ],
             temperature: 0,
             top_p: 1,
           }),
@@ -248,7 +276,18 @@ describe('LobeBedrockAI', () => {
           body: JSON.stringify({
             anthropic_version: 'bedrock-2023-05-31',
             max_tokens: 2048,
-            messages: [{ content: 'Hello', role: 'user' }],
+            messages: [
+              {
+                content: [
+                  {
+                    cache_control: { type: 'ephemeral' },
+                    text: 'Hello',
+                    type: 'text',
+                  },
+                ],
+                role: 'user',
+              },
+            ],
             temperature: 0.25,
             top_p: 1,
           }),
@@ -327,7 +366,18 @@ describe('LobeBedrockAI', () => {
           body: JSON.stringify({
             anthropic_version: 'bedrock-2023-05-31',
             max_tokens: 4096,
-            messages: [{ content: 'Hello', role: 'user' }],
+            messages: [
+              {
+                content: [
+                  {
+                    cache_control: { type: 'ephemeral' },
+                    text: 'Hello',
+                    type: 'text',
+                  },
+                ],
+                role: 'user',
+              },
+            ],
             temperature: 0,
           }),
           contentType: 'application/json',
@@ -363,7 +413,18 @@ describe('LobeBedrockAI', () => {
           body: JSON.stringify({
             anthropic_version: 'bedrock-2023-05-31',
             max_tokens: 2048,
-            messages: [{ content: 'Hello', role: 'user' }],
+            messages: [
+              {
+                content: [
+                  {
+                    cache_control: { type: 'ephemeral' },
+                    text: 'Hello',
+                    type: 'text',
+                  },
+                ],
+                role: 'user',
+              },
+            ],
             temperature: 0.25,
             top_p: 1,
           }),
@@ -418,7 +479,18 @@ describe('LobeBedrockAI', () => {
             body: JSON.stringify({
               anthropic_version: 'bedrock-2023-05-31',
               max_tokens: 4096,
-              messages: [{ content: 'Hello', role: 'user' }],
+              messages: [
+                {
+                  content: [
+                    {
+                      cache_control: { type: 'ephemeral' },
+                      text: 'Hello',
+                      type: 'text',
+                    },
+                  ],
+                  role: 'user',
+                },
+              ],
               temperature: 0.4, // temperature / 2, top_p omitted due to conflict
             }),
             contentType: 'application/json',
@@ -450,7 +522,18 @@ describe('LobeBedrockAI', () => {
             body: JSON.stringify({
               anthropic_version: 'bedrock-2023-05-31',
               max_tokens: 4096,
-              messages: [{ content: 'Hello', role: 'user' }],
+              messages: [
+                {
+                  content: [
+                    {
+                      cache_control: { type: 'ephemeral' },
+                      text: 'Hello',
+                      type: 'text',
+                    },
+                  ],
+                  role: 'user',
+                },
+              ],
               top_p: 0.9, // temperature omitted since not provided
             }),
             contentType: 'application/json',
@@ -483,7 +566,18 @@ describe('LobeBedrockAI', () => {
             body: JSON.stringify({
               anthropic_version: 'bedrock-2023-05-31',
               max_tokens: 4096,
-              messages: [{ content: 'Hello', role: 'user' }],
+              messages: [
+                {
+                  content: [
+                    {
+                      cache_control: { type: 'ephemeral' },
+                      text: 'Hello',
+                      type: 'text',
+                    },
+                  ],
+                  role: 'user',
+                },
+              ],
               temperature: 0.4, // temperature / 2
               top_p: 0.9, // both parameters allowed for older models
             }),
@@ -517,7 +611,18 @@ describe('LobeBedrockAI', () => {
             body: JSON.stringify({
               anthropic_version: 'bedrock-2023-05-31',
               max_tokens: 4096,
-              messages: [{ content: 'Hello', role: 'user' }],
+              messages: [
+                {
+                  content: [
+                    {
+                      cache_control: { type: 'ephemeral' },
+                      text: 'Hello',
+                      type: 'text',
+                    },
+                  ],
+                  role: 'user',
+                },
+              ],
               temperature: 0.3, // temperature / 2, top_p omitted due to conflict
             }),
             contentType: 'application/json',
@@ -550,7 +655,18 @@ describe('LobeBedrockAI', () => {
             body: JSON.stringify({
               anthropic_version: 'bedrock-2023-05-31',
               max_tokens: 4096,
-              messages: [{ content: 'Hello', role: 'user' }],
+              messages: [
+                {
+                  content: [
+                    {
+                      cache_control: { type: 'ephemeral' },
+                      text: 'Hello',
+                      type: 'text',
+                    },
+                  ],
+                  role: 'user',
+                },
+              ],
               temperature: 0.35, // temperature / 2, top_p omitted due to conflict
             }),
             contentType: 'application/json',

package/packages/model-runtime/src/providers/bedrock/index.ts CHANGED Viewed

@@ -23,7 +23,9 @@ import {
 import { AgentRuntimeErrorType } from '../../types/error';
 import { AgentRuntimeError } from '../../utils/createError';
 import { debugStream } from '../../utils/debugStream';
+import { getModelPricing } from '../../utils/getModelPricing';
 import { StreamingResponse } from '../../utils/response';
+import { resolveCacheTTL } from '../anthropic/resolveCacheTTL';
 /**
  * A prompt constructor for HuggingFace LLama 2 chat models.
@@ -148,7 +150,16 @@ export class LobeBedrockAI implements LobeRuntimeAI {
     payload: ChatStreamPayload,
     options?: ChatMethodOptions,
   ): Promise<Response> => {
-    const { max_tokens, messages, model, temperature, top_p, tools } = payload;
+    const {
+      enabledContextCaching = true,
+      max_tokens,
+      messages,
+      model,
+      temperature,
+      top_p,
+      tools,
+    } = payload;
+    const inputStartAt = Date.now();
     const system_message = messages.find((m) => m.role === 'system');
     const user_messages = messages.filter((m) => m.role !== 'system');
@@ -159,17 +170,29 @@ export class LobeBedrockAI implements LobeRuntimeAI {
       { hasConflict, normalizeTemperature: true, preferTemperature: true },
     );
+    const systemPrompts = !!system_message?.content
+      ? ([
+          {
+            cache_control: enabledContextCaching ? { type: 'ephemeral' } : undefined,
+            text: system_message.content as string,
+            type: 'text',
+          },
+        ] as any)
+      : undefined;
+    const anthropicPayload = {
+      anthropic_version: 'bedrock-2023-05-31',
+      max_tokens: max_tokens || 4096,
+      messages: await buildAnthropicMessages(user_messages, { enabledContextCaching }),
+      system: systemPrompts,
+      temperature: resolvedParams.temperature,
+      tools: buildAnthropicTools(tools, { enabledContextCaching }),
+      top_p: resolvedParams.top_p,
+    };
     const command = new InvokeModelWithResponseStreamCommand({
       accept: 'application/json',
-      body: JSON.stringify({
-        anthropic_version: 'bedrock-2023-05-31',
-        max_tokens: max_tokens || 4096,
-        messages: await buildAnthropicMessages(user_messages),
-        system: system_message?.content as string,
-        temperature: resolvedParams.temperature,
-        tools: buildAnthropicTools(tools),
-        top_p: resolvedParams.top_p,
-      }),
+      body: JSON.stringify(anthropicPayload),
       contentType: 'application/json',
       modelId: model,
     });
@@ -186,10 +209,21 @@ export class LobeBedrockAI implements LobeRuntimeAI {
         debugStream(debug).catch(console.error);
       }
+      const pricing = await getModelPricing(payload.model, ModelProvider.Bedrock);
+      const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicPayload);
+      const pricingOptions = cacheTTL ? { lookupParams: { ttl: cacheTTL } } : undefined;
       // Respond with the stream
-      return StreamingResponse(AWSBedrockClaudeStream(prod, options?.callback), {
-        headers: options?.headers,
-      });
+      return StreamingResponse(
+        AWSBedrockClaudeStream(prod, {
+          callbacks: options?.callback,
+          inputStartAt,
+          payload: { model, pricing, pricingOptions, provider: ModelProvider.Bedrock },
+        }),
+        {
+          headers: options?.headers,
+        },
+      );
     } catch (e) {
       const err = e as Error & { $metadata: any };