@lobehub/lobehub 2.0.0-next.100 → 2.0.0-next.101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -28,6 +28,7 @@ The project follows a well-organized monorepo structure:
28
28
 
29
29
  ### Git Workflow
30
30
 
31
+ - The current release branch is `next` instead of `main` until v2.0.0 is officially released
31
32
  - Use rebase for git pull
32
33
  - Git commit messages should prefix with gitmoji
33
34
  - Git branch name format: `username/feat/feature-name`
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ## [Version 2.0.0-next.101](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.100...v2.0.0-next.101)
6
+
7
+ <sup>Released on **2025-11-22**</sup>
8
+
9
+ #### ✨ Features
10
+
11
+ - **misc**: Support bedrok prompt cache and usage compute.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### What's improved
19
+
20
+ - **misc**: Support bedrok prompt cache and usage compute, closes [#10337](https://github.com/lobehub/lobe-chat/issues/10337) ([beb9471](https://github.com/lobehub/lobe-chat/commit/beb9471))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
5
30
  ## [Version 2.0.0-next.100](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.99...v2.0.0-next.100)
6
31
 
7
32
  <sup>Released on **2025-11-21**</sup>
package/CLAUDE.md CHANGED
@@ -14,6 +14,7 @@ read @.cursor/rules/project-structure.mdc
14
14
 
15
15
  ### Git Workflow
16
16
 
17
+ - The current release branch is `next` instead of `main` until v2.0.0 is officially released
17
18
  - use rebase for git pull
18
19
  - git commit message should prefix with gitmoji
19
20
  - git branch name format example: tj/feat/feature-name
package/changelog/v1.json CHANGED
@@ -1,4 +1,13 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "features": [
5
+ "Support bedrok prompt cache and usage compute."
6
+ ]
7
+ },
8
+ "date": "2025-11-22",
9
+ "version": "2.0.0-next.101"
10
+ },
2
11
  {
3
12
  "children": {
4
13
  "fixes": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/lobehub",
3
- "version": "2.0.0-next.100",
3
+ "version": "2.0.0-next.101",
4
4
  "description": "LobeHub - an open-source,comprehensive AI Agent framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -148,10 +148,9 @@ export const createRouterRuntime = ({
148
148
  }
149
149
 
150
150
  /**
151
- * TODO: 考虑添加缓存机制,避免重复创建相同配置的 runtimes
151
+ * Resolve routers configuration and validate
152
152
  */
153
- private async createRuntimesByRouters(model?: string): Promise<RuntimeItem[]> {
154
- // 动态获取 routers,支持传入 model
153
+ private async resolveRouters(model?: string): Promise<RouterInstance[]> {
155
154
  const resolvedRouters =
156
155
  typeof this._routers === 'function'
157
156
  ? await this._routers(this._options, { model })
@@ -161,6 +160,41 @@ export const createRouterRuntime = ({
161
160
  throw new Error('empty providers');
162
161
  }
163
162
 
163
+ return resolvedRouters;
164
+ }
165
+
166
+ /**
167
+ * Create runtime for inference requests (chat, generateObject, etc.)
168
+ * Finds the router that matches the model, or uses the last router as fallback
169
+ */
170
+ private async createRuntimeForInference(model: string): Promise<RuntimeItem> {
171
+ const resolvedRouters = await this.resolveRouters(model);
172
+
173
+ const matchedRouter =
174
+ resolvedRouters.find((router) => {
175
+ if (router.models && router.models.length > 0) {
176
+ return router.models.includes(model);
177
+ }
178
+ return false;
179
+ }) ?? resolvedRouters.at(-1)!;
180
+
181
+ const providerAI =
182
+ matchedRouter.runtime ?? baseRuntimeMap[matchedRouter.apiType] ?? LobeOpenAI;
183
+ const finalOptions = { ...this._params, ...this._options, ...matchedRouter.options };
184
+ const runtime: LobeRuntimeAI = new providerAI({ ...finalOptions, id: this._id });
185
+
186
+ return {
187
+ id: matchedRouter.apiType,
188
+ models: matchedRouter.models,
189
+ runtime,
190
+ };
191
+ }
192
+
193
+ /**
194
+ * Create all runtimes for listing models
195
+ */
196
+ private async createRuntimes(): Promise<RuntimeItem[]> {
197
+ const resolvedRouters = await this.resolveRouters();
164
198
  return resolvedRouters.map((router) => {
165
199
  const providerAI = router.runtime ?? baseRuntimeMap[router.apiType] ?? LobeOpenAI;
166
200
  const finalOptions = { ...this._params, ...this._options, ...router.options };
@@ -176,16 +210,8 @@ export const createRouterRuntime = ({
176
210
 
177
211
  // Check if it can match a specific model, otherwise default to using the last runtime
178
212
  async getRuntimeByModel(model: string) {
179
- const runtimes = await this.createRuntimesByRouters(model);
180
-
181
- for (const runtimeItem of runtimes) {
182
- const models = runtimeItem.models || [];
183
- if (models.includes(model)) {
184
- return runtimeItem.runtime;
185
- }
186
- }
187
-
188
- return runtimes.at(-1)!.runtime;
213
+ const runtimeItem = await this.createRuntimeForInference(model);
214
+ return runtimeItem.runtime;
189
215
  }
190
216
 
191
217
  async chat(payload: ChatStreamPayload, options?: ChatMethodOptions) {
@@ -222,9 +248,8 @@ export const createRouterRuntime = ({
222
248
 
223
249
  async models() {
224
250
  if (modelsOption && typeof modelsOption === 'function') {
225
- // 延迟创建 runtimes
226
- const runtimes = await this.createRuntimesByRouters();
227
- // 如果是函数式配置,使用最后一个运行时的客户端来调用函数
251
+ const runtimes = await this.createRuntimes();
252
+ // If it's a functional configuration, use the last runtime's client to call the function
228
253
  const lastRuntime = runtimes.at(-1)?.runtime;
229
254
  if (lastRuntime && 'client' in lastRuntime) {
230
255
  const modelList = await modelsOption({ client: (lastRuntime as any).client });
@@ -232,8 +257,7 @@ export const createRouterRuntime = ({
232
257
  }
233
258
  }
234
259
 
235
- // 延迟创建 runtimes
236
- const runtimes = await this.createRuntimesByRouters();
260
+ const runtimes = await this.createRuntimes();
237
261
  return runtimes.at(-1)?.runtime.models?.();
238
262
  }
239
263
 
@@ -7,18 +7,32 @@ import {
7
7
  StreamContext,
8
8
  createCallbacksTransformer,
9
9
  createSSEProtocolTransformer,
10
+ createTokenSpeedCalculator,
10
11
  } from '../protocol';
11
12
  import { createBedrockStream } from './common';
12
13
 
13
14
  export const AWSBedrockClaudeStream = (
14
15
  res: InvokeModelWithResponseStreamResponse | ReadableStream,
15
- cb?: ChatStreamCallbacks,
16
+ options?: {
17
+ callbacks?: ChatStreamCallbacks;
18
+ inputStartAt?: number;
19
+ payload?: Parameters<typeof transformAnthropicStream>[2];
20
+ },
16
21
  ): ReadableStream<string> => {
17
22
  const streamStack: StreamContext = { id: 'chat_' + nanoid() };
18
23
 
19
24
  const stream = res instanceof ReadableStream ? res : createBedrockStream(res);
20
25
 
26
+ const transformWithPayload: typeof transformAnthropicStream = (chunk, ctx) =>
27
+ transformAnthropicStream(chunk, ctx, options?.payload);
28
+
21
29
  return stream
22
- .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack))
23
- .pipeThrough(createCallbacksTransformer(cb));
30
+ .pipeThrough(
31
+ createTokenSpeedCalculator(transformWithPayload, {
32
+ inputStartAt: options?.inputStartAt,
33
+ streamStack,
34
+ }),
35
+ )
36
+ .pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
37
+ .pipeThrough(createCallbacksTransformer(options?.callbacks));
24
38
  };
@@ -21,6 +21,7 @@ import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
21
21
  import { StreamingResponse } from '../../utils/response';
22
22
  import { createAnthropicGenerateObject } from './generateObject';
23
23
  import { handleAnthropicError } from './handleAnthropicError';
24
+ import { resolveCacheTTL } from './resolveCacheTTL';
24
25
 
25
26
  export interface AnthropicModelCard {
26
27
  created_at: string;
@@ -33,44 +34,6 @@ type anthropicTools = Anthropic.Tool | Anthropic.WebSearchTool20250305;
33
34
  const modelsWithSmallContextWindow = new Set(['claude-3-opus-20240229', 'claude-3-haiku-20240307']);
34
35
 
35
36
  const DEFAULT_BASE_URL = 'https://api.anthropic.com';
36
- const DEFAULT_CACHE_TTL = '5m' as const;
37
-
38
- type CacheTTL = Anthropic.Messages.CacheControlEphemeral['ttl'];
39
-
40
- /**
41
- * Resolves cache TTL from Anthropic payload or request settings
42
- * Returns the first valid TTL found in system messages or content blocks
43
- */
44
- const resolveCacheTTL = (
45
- requestPayload: ChatStreamPayload,
46
- anthropicPayload: Anthropic.MessageCreateParams,
47
- ): CacheTTL | undefined => {
48
- // Check system messages for cache TTL
49
- if (Array.isArray(anthropicPayload.system)) {
50
- for (const block of anthropicPayload.system) {
51
- const ttl = block.cache_control?.ttl;
52
- if (ttl) return ttl;
53
- }
54
- }
55
-
56
- // Check message content blocks for cache TTL
57
- for (const message of anthropicPayload.messages ?? []) {
58
- if (!Array.isArray(message.content)) continue;
59
-
60
- for (const block of message.content) {
61
- // Message content blocks might have cache_control property
62
- const ttl = ('cache_control' in block && block.cache_control?.ttl) as CacheTTL | undefined;
63
- if (ttl) return ttl;
64
- }
65
- }
66
-
67
- // Use default TTL if context caching is enabled
68
- if (requestPayload.enabledContextCaching) {
69
- return DEFAULT_CACHE_TTL;
70
- }
71
-
72
- return undefined;
73
- };
74
37
 
75
38
  interface AnthropicAIParams extends ClientOptions {
76
39
  id?: string;
@@ -0,0 +1,44 @@
1
+ import Anthropic from '@anthropic-ai/sdk';
2
+
3
+ import { ChatStreamPayload } from '../../types';
4
+
5
+ type CacheTTL = Anthropic.Messages.CacheControlEphemeral['ttl'];
6
+
7
+ const DEFAULT_CACHE_TTL = '5m' as const;
8
+
9
+ /**
10
+ * Resolves cache TTL from Anthropic payload or request settings.
11
+ * Returns the first valid TTL found in system messages or content blocks.
12
+ */
13
+ export const resolveCacheTTL = (
14
+ requestPayload: ChatStreamPayload,
15
+ anthropicPayload: {
16
+ messages: Anthropic.MessageCreateParams['messages'];
17
+ system: Anthropic.MessageCreateParams['system'];
18
+ },
19
+ ): CacheTTL | undefined => {
20
+ // Check system messages for cache TTL
21
+ if (Array.isArray(anthropicPayload.system)) {
22
+ for (const block of anthropicPayload.system) {
23
+ const ttl = block.cache_control?.ttl;
24
+ if (ttl) return ttl;
25
+ }
26
+ }
27
+
28
+ // Check message content blocks for cache TTL
29
+ for (const message of anthropicPayload.messages ?? []) {
30
+ if (!Array.isArray(message.content)) continue;
31
+
32
+ for (const block of message.content) {
33
+ const ttl = ('cache_control' in block && block.cache_control?.ttl) as CacheTTL | undefined;
34
+ if (ttl) return ttl;
35
+ }
36
+ }
37
+
38
+ // Use default TTL if context caching is enabled
39
+ if (requestPayload.enabledContextCaching) {
40
+ return DEFAULT_CACHE_TTL;
41
+ }
42
+
43
+ return undefined;
44
+ };
@@ -173,7 +173,18 @@ describe('LobeBedrockAI', () => {
173
173
  body: JSON.stringify({
174
174
  anthropic_version: 'bedrock-2023-05-31',
175
175
  max_tokens: 4096,
176
- messages: [{ content: 'Hello', role: 'user' }],
176
+ messages: [
177
+ {
178
+ content: [
179
+ {
180
+ cache_control: { type: 'ephemeral' },
181
+ text: 'Hello',
182
+ type: 'text',
183
+ },
184
+ ],
185
+ role: 'user',
186
+ },
187
+ ],
177
188
  temperature: 0,
178
189
  top_p: 1,
179
190
  }),
@@ -211,8 +222,25 @@ describe('LobeBedrockAI', () => {
211
222
  body: JSON.stringify({
212
223
  anthropic_version: 'bedrock-2023-05-31',
213
224
  max_tokens: 4096,
214
- messages: [{ content: 'Hello', role: 'user' }],
215
- system: 'You are an awesome greeter',
225
+ messages: [
226
+ {
227
+ content: [
228
+ {
229
+ cache_control: { type: 'ephemeral' },
230
+ text: 'Hello',
231
+ type: 'text',
232
+ },
233
+ ],
234
+ role: 'user',
235
+ },
236
+ ],
237
+ system: [
238
+ {
239
+ cache_control: { type: 'ephemeral' },
240
+ text: 'You are an awesome greeter',
241
+ type: 'text',
242
+ },
243
+ ],
216
244
  temperature: 0,
217
245
  top_p: 1,
218
246
  }),
@@ -248,7 +276,18 @@ describe('LobeBedrockAI', () => {
248
276
  body: JSON.stringify({
249
277
  anthropic_version: 'bedrock-2023-05-31',
250
278
  max_tokens: 2048,
251
- messages: [{ content: 'Hello', role: 'user' }],
279
+ messages: [
280
+ {
281
+ content: [
282
+ {
283
+ cache_control: { type: 'ephemeral' },
284
+ text: 'Hello',
285
+ type: 'text',
286
+ },
287
+ ],
288
+ role: 'user',
289
+ },
290
+ ],
252
291
  temperature: 0.25,
253
292
  top_p: 1,
254
293
  }),
@@ -327,7 +366,18 @@ describe('LobeBedrockAI', () => {
327
366
  body: JSON.stringify({
328
367
  anthropic_version: 'bedrock-2023-05-31',
329
368
  max_tokens: 4096,
330
- messages: [{ content: 'Hello', role: 'user' }],
369
+ messages: [
370
+ {
371
+ content: [
372
+ {
373
+ cache_control: { type: 'ephemeral' },
374
+ text: 'Hello',
375
+ type: 'text',
376
+ },
377
+ ],
378
+ role: 'user',
379
+ },
380
+ ],
331
381
  temperature: 0,
332
382
  }),
333
383
  contentType: 'application/json',
@@ -363,7 +413,18 @@ describe('LobeBedrockAI', () => {
363
413
  body: JSON.stringify({
364
414
  anthropic_version: 'bedrock-2023-05-31',
365
415
  max_tokens: 2048,
366
- messages: [{ content: 'Hello', role: 'user' }],
416
+ messages: [
417
+ {
418
+ content: [
419
+ {
420
+ cache_control: { type: 'ephemeral' },
421
+ text: 'Hello',
422
+ type: 'text',
423
+ },
424
+ ],
425
+ role: 'user',
426
+ },
427
+ ],
367
428
  temperature: 0.25,
368
429
  top_p: 1,
369
430
  }),
@@ -418,7 +479,18 @@ describe('LobeBedrockAI', () => {
418
479
  body: JSON.stringify({
419
480
  anthropic_version: 'bedrock-2023-05-31',
420
481
  max_tokens: 4096,
421
- messages: [{ content: 'Hello', role: 'user' }],
482
+ messages: [
483
+ {
484
+ content: [
485
+ {
486
+ cache_control: { type: 'ephemeral' },
487
+ text: 'Hello',
488
+ type: 'text',
489
+ },
490
+ ],
491
+ role: 'user',
492
+ },
493
+ ],
422
494
  temperature: 0.4, // temperature / 2, top_p omitted due to conflict
423
495
  }),
424
496
  contentType: 'application/json',
@@ -450,7 +522,18 @@ describe('LobeBedrockAI', () => {
450
522
  body: JSON.stringify({
451
523
  anthropic_version: 'bedrock-2023-05-31',
452
524
  max_tokens: 4096,
453
- messages: [{ content: 'Hello', role: 'user' }],
525
+ messages: [
526
+ {
527
+ content: [
528
+ {
529
+ cache_control: { type: 'ephemeral' },
530
+ text: 'Hello',
531
+ type: 'text',
532
+ },
533
+ ],
534
+ role: 'user',
535
+ },
536
+ ],
454
537
  top_p: 0.9, // temperature omitted since not provided
455
538
  }),
456
539
  contentType: 'application/json',
@@ -483,7 +566,18 @@ describe('LobeBedrockAI', () => {
483
566
  body: JSON.stringify({
484
567
  anthropic_version: 'bedrock-2023-05-31',
485
568
  max_tokens: 4096,
486
- messages: [{ content: 'Hello', role: 'user' }],
569
+ messages: [
570
+ {
571
+ content: [
572
+ {
573
+ cache_control: { type: 'ephemeral' },
574
+ text: 'Hello',
575
+ type: 'text',
576
+ },
577
+ ],
578
+ role: 'user',
579
+ },
580
+ ],
487
581
  temperature: 0.4, // temperature / 2
488
582
  top_p: 0.9, // both parameters allowed for older models
489
583
  }),
@@ -517,7 +611,18 @@ describe('LobeBedrockAI', () => {
517
611
  body: JSON.stringify({
518
612
  anthropic_version: 'bedrock-2023-05-31',
519
613
  max_tokens: 4096,
520
- messages: [{ content: 'Hello', role: 'user' }],
614
+ messages: [
615
+ {
616
+ content: [
617
+ {
618
+ cache_control: { type: 'ephemeral' },
619
+ text: 'Hello',
620
+ type: 'text',
621
+ },
622
+ ],
623
+ role: 'user',
624
+ },
625
+ ],
521
626
  temperature: 0.3, // temperature / 2, top_p omitted due to conflict
522
627
  }),
523
628
  contentType: 'application/json',
@@ -550,7 +655,18 @@ describe('LobeBedrockAI', () => {
550
655
  body: JSON.stringify({
551
656
  anthropic_version: 'bedrock-2023-05-31',
552
657
  max_tokens: 4096,
553
- messages: [{ content: 'Hello', role: 'user' }],
658
+ messages: [
659
+ {
660
+ content: [
661
+ {
662
+ cache_control: { type: 'ephemeral' },
663
+ text: 'Hello',
664
+ type: 'text',
665
+ },
666
+ ],
667
+ role: 'user',
668
+ },
669
+ ],
554
670
  temperature: 0.35, // temperature / 2, top_p omitted due to conflict
555
671
  }),
556
672
  contentType: 'application/json',
@@ -23,7 +23,9 @@ import {
23
23
  import { AgentRuntimeErrorType } from '../../types/error';
24
24
  import { AgentRuntimeError } from '../../utils/createError';
25
25
  import { debugStream } from '../../utils/debugStream';
26
+ import { getModelPricing } from '../../utils/getModelPricing';
26
27
  import { StreamingResponse } from '../../utils/response';
28
+ import { resolveCacheTTL } from '../anthropic/resolveCacheTTL';
27
29
 
28
30
  /**
29
31
  * A prompt constructor for HuggingFace LLama 2 chat models.
@@ -148,7 +150,16 @@ export class LobeBedrockAI implements LobeRuntimeAI {
148
150
  payload: ChatStreamPayload,
149
151
  options?: ChatMethodOptions,
150
152
  ): Promise<Response> => {
151
- const { max_tokens, messages, model, temperature, top_p, tools } = payload;
153
+ const {
154
+ enabledContextCaching = true,
155
+ max_tokens,
156
+ messages,
157
+ model,
158
+ temperature,
159
+ top_p,
160
+ tools,
161
+ } = payload;
162
+ const inputStartAt = Date.now();
152
163
  const system_message = messages.find((m) => m.role === 'system');
153
164
  const user_messages = messages.filter((m) => m.role !== 'system');
154
165
 
@@ -159,17 +170,29 @@ export class LobeBedrockAI implements LobeRuntimeAI {
159
170
  { hasConflict, normalizeTemperature: true, preferTemperature: true },
160
171
  );
161
172
 
173
+ const systemPrompts = !!system_message?.content
174
+ ? ([
175
+ {
176
+ cache_control: enabledContextCaching ? { type: 'ephemeral' } : undefined,
177
+ text: system_message.content as string,
178
+ type: 'text',
179
+ },
180
+ ] as any)
181
+ : undefined;
182
+
183
+ const anthropicPayload = {
184
+ anthropic_version: 'bedrock-2023-05-31',
185
+ max_tokens: max_tokens || 4096,
186
+ messages: await buildAnthropicMessages(user_messages, { enabledContextCaching }),
187
+ system: systemPrompts,
188
+ temperature: resolvedParams.temperature,
189
+ tools: buildAnthropicTools(tools, { enabledContextCaching }),
190
+ top_p: resolvedParams.top_p,
191
+ };
192
+
162
193
  const command = new InvokeModelWithResponseStreamCommand({
163
194
  accept: 'application/json',
164
- body: JSON.stringify({
165
- anthropic_version: 'bedrock-2023-05-31',
166
- max_tokens: max_tokens || 4096,
167
- messages: await buildAnthropicMessages(user_messages),
168
- system: system_message?.content as string,
169
- temperature: resolvedParams.temperature,
170
- tools: buildAnthropicTools(tools),
171
- top_p: resolvedParams.top_p,
172
- }),
195
+ body: JSON.stringify(anthropicPayload),
173
196
  contentType: 'application/json',
174
197
  modelId: model,
175
198
  });
@@ -186,10 +209,21 @@ export class LobeBedrockAI implements LobeRuntimeAI {
186
209
  debugStream(debug).catch(console.error);
187
210
  }
188
211
 
212
+ const pricing = await getModelPricing(payload.model, ModelProvider.Bedrock);
213
+ const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicPayload);
214
+ const pricingOptions = cacheTTL ? { lookupParams: { ttl: cacheTTL } } : undefined;
215
+
189
216
  // Respond with the stream
190
- return StreamingResponse(AWSBedrockClaudeStream(prod, options?.callback), {
191
- headers: options?.headers,
192
- });
217
+ return StreamingResponse(
218
+ AWSBedrockClaudeStream(prod, {
219
+ callbacks: options?.callback,
220
+ inputStartAt,
221
+ payload: { model, pricing, pricingOptions, provider: ModelProvider.Bedrock },
222
+ }),
223
+ {
224
+ headers: options?.headers,
225
+ },
226
+ );
193
227
  } catch (e) {
194
228
  const err = e as Error & { $metadata: any };
195
229