@lobehub/lobehub 2.0.0-next.100 → 2.0.0-next.101
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +1 -0
- package/CHANGELOG.md +25 -0
- package/CLAUDE.md +1 -0
- package/changelog/v1.json +9 -0
- package/package.json +1 -1
- package/packages/model-runtime/src/core/RouterRuntime/createRuntime.ts +42 -18
- package/packages/model-runtime/src/core/streams/bedrock/claude.ts +17 -3
- package/packages/model-runtime/src/providers/anthropic/index.ts +1 -38
- package/packages/model-runtime/src/providers/anthropic/resolveCacheTTL.ts +44 -0
- package/packages/model-runtime/src/providers/bedrock/index.test.ts +127 -11
- package/packages/model-runtime/src/providers/bedrock/index.ts +47 -13
package/AGENTS.md
CHANGED
|
@@ -28,6 +28,7 @@ The project follows a well-organized monorepo structure:
|
|
|
28
28
|
|
|
29
29
|
### Git Workflow
|
|
30
30
|
|
|
31
|
+
- The current release branch is `next` instead of `main` until v2.0.0 is officially released
|
|
31
32
|
- Use rebase for git pull
|
|
32
33
|
- Git commit messages should prefix with gitmoji
|
|
33
34
|
- Git branch name format: `username/feat/feature-name`
|
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
# Changelog
|
|
4
4
|
|
|
5
|
+
## [Version 2.0.0-next.101](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.100...v2.0.0-next.101)
|
|
6
|
+
|
|
7
|
+
<sup>Released on **2025-11-22**</sup>
|
|
8
|
+
|
|
9
|
+
#### ✨ Features
|
|
10
|
+
|
|
11
|
+
- **misc**: Support bedrok prompt cache and usage compute.
|
|
12
|
+
|
|
13
|
+
<br/>
|
|
14
|
+
|
|
15
|
+
<details>
|
|
16
|
+
<summary><kbd>Improvements and Fixes</kbd></summary>
|
|
17
|
+
|
|
18
|
+
#### What's improved
|
|
19
|
+
|
|
20
|
+
- **misc**: Support bedrok prompt cache and usage compute, closes [#10337](https://github.com/lobehub/lobe-chat/issues/10337) ([beb9471](https://github.com/lobehub/lobe-chat/commit/beb9471))
|
|
21
|
+
|
|
22
|
+
</details>
|
|
23
|
+
|
|
24
|
+
<div align="right">
|
|
25
|
+
|
|
26
|
+
[](#readme-top)
|
|
27
|
+
|
|
28
|
+
</div>
|
|
29
|
+
|
|
5
30
|
## [Version 2.0.0-next.100](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.99...v2.0.0-next.100)
|
|
6
31
|
|
|
7
32
|
<sup>Released on **2025-11-21**</sup>
|
package/CLAUDE.md
CHANGED
|
@@ -14,6 +14,7 @@ read @.cursor/rules/project-structure.mdc
|
|
|
14
14
|
|
|
15
15
|
### Git Workflow
|
|
16
16
|
|
|
17
|
+
- The current release branch is `next` instead of `main` until v2.0.0 is officially released
|
|
17
18
|
- use rebase for git pull
|
|
18
19
|
- git commit message should prefix with gitmoji
|
|
19
20
|
- git branch name format example: tj/feat/feature-name
|
package/changelog/v1.json
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lobehub/lobehub",
|
|
3
|
-
"version": "2.0.0-next.
|
|
3
|
+
"version": "2.0.0-next.101",
|
|
4
4
|
"description": "LobeHub - an open-source,comprehensive AI Agent framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"framework",
|
|
@@ -148,10 +148,9 @@ export const createRouterRuntime = ({
|
|
|
148
148
|
}
|
|
149
149
|
|
|
150
150
|
/**
|
|
151
|
-
*
|
|
151
|
+
* Resolve routers configuration and validate
|
|
152
152
|
*/
|
|
153
|
-
private async
|
|
154
|
-
// 动态获取 routers,支持传入 model
|
|
153
|
+
private async resolveRouters(model?: string): Promise<RouterInstance[]> {
|
|
155
154
|
const resolvedRouters =
|
|
156
155
|
typeof this._routers === 'function'
|
|
157
156
|
? await this._routers(this._options, { model })
|
|
@@ -161,6 +160,41 @@ export const createRouterRuntime = ({
|
|
|
161
160
|
throw new Error('empty providers');
|
|
162
161
|
}
|
|
163
162
|
|
|
163
|
+
return resolvedRouters;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Create runtime for inference requests (chat, generateObject, etc.)
|
|
168
|
+
* Finds the router that matches the model, or uses the last router as fallback
|
|
169
|
+
*/
|
|
170
|
+
private async createRuntimeForInference(model: string): Promise<RuntimeItem> {
|
|
171
|
+
const resolvedRouters = await this.resolveRouters(model);
|
|
172
|
+
|
|
173
|
+
const matchedRouter =
|
|
174
|
+
resolvedRouters.find((router) => {
|
|
175
|
+
if (router.models && router.models.length > 0) {
|
|
176
|
+
return router.models.includes(model);
|
|
177
|
+
}
|
|
178
|
+
return false;
|
|
179
|
+
}) ?? resolvedRouters.at(-1)!;
|
|
180
|
+
|
|
181
|
+
const providerAI =
|
|
182
|
+
matchedRouter.runtime ?? baseRuntimeMap[matchedRouter.apiType] ?? LobeOpenAI;
|
|
183
|
+
const finalOptions = { ...this._params, ...this._options, ...matchedRouter.options };
|
|
184
|
+
const runtime: LobeRuntimeAI = new providerAI({ ...finalOptions, id: this._id });
|
|
185
|
+
|
|
186
|
+
return {
|
|
187
|
+
id: matchedRouter.apiType,
|
|
188
|
+
models: matchedRouter.models,
|
|
189
|
+
runtime,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Create all runtimes for listing models
|
|
195
|
+
*/
|
|
196
|
+
private async createRuntimes(): Promise<RuntimeItem[]> {
|
|
197
|
+
const resolvedRouters = await this.resolveRouters();
|
|
164
198
|
return resolvedRouters.map((router) => {
|
|
165
199
|
const providerAI = router.runtime ?? baseRuntimeMap[router.apiType] ?? LobeOpenAI;
|
|
166
200
|
const finalOptions = { ...this._params, ...this._options, ...router.options };
|
|
@@ -176,16 +210,8 @@ export const createRouterRuntime = ({
|
|
|
176
210
|
|
|
177
211
|
// Check if it can match a specific model, otherwise default to using the last runtime
|
|
178
212
|
async getRuntimeByModel(model: string) {
|
|
179
|
-
const
|
|
180
|
-
|
|
181
|
-
for (const runtimeItem of runtimes) {
|
|
182
|
-
const models = runtimeItem.models || [];
|
|
183
|
-
if (models.includes(model)) {
|
|
184
|
-
return runtimeItem.runtime;
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
return runtimes.at(-1)!.runtime;
|
|
213
|
+
const runtimeItem = await this.createRuntimeForInference(model);
|
|
214
|
+
return runtimeItem.runtime;
|
|
189
215
|
}
|
|
190
216
|
|
|
191
217
|
async chat(payload: ChatStreamPayload, options?: ChatMethodOptions) {
|
|
@@ -222,9 +248,8 @@ export const createRouterRuntime = ({
|
|
|
222
248
|
|
|
223
249
|
async models() {
|
|
224
250
|
if (modelsOption && typeof modelsOption === 'function') {
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
// 如果是函数式配置,使用最后一个运行时的客户端来调用函数
|
|
251
|
+
const runtimes = await this.createRuntimes();
|
|
252
|
+
// If it's a functional configuration, use the last runtime's client to call the function
|
|
228
253
|
const lastRuntime = runtimes.at(-1)?.runtime;
|
|
229
254
|
if (lastRuntime && 'client' in lastRuntime) {
|
|
230
255
|
const modelList = await modelsOption({ client: (lastRuntime as any).client });
|
|
@@ -232,8 +257,7 @@ export const createRouterRuntime = ({
|
|
|
232
257
|
}
|
|
233
258
|
}
|
|
234
259
|
|
|
235
|
-
|
|
236
|
-
const runtimes = await this.createRuntimesByRouters();
|
|
260
|
+
const runtimes = await this.createRuntimes();
|
|
237
261
|
return runtimes.at(-1)?.runtime.models?.();
|
|
238
262
|
}
|
|
239
263
|
|
|
@@ -7,18 +7,32 @@ import {
|
|
|
7
7
|
StreamContext,
|
|
8
8
|
createCallbacksTransformer,
|
|
9
9
|
createSSEProtocolTransformer,
|
|
10
|
+
createTokenSpeedCalculator,
|
|
10
11
|
} from '../protocol';
|
|
11
12
|
import { createBedrockStream } from './common';
|
|
12
13
|
|
|
13
14
|
export const AWSBedrockClaudeStream = (
|
|
14
15
|
res: InvokeModelWithResponseStreamResponse | ReadableStream,
|
|
15
|
-
|
|
16
|
+
options?: {
|
|
17
|
+
callbacks?: ChatStreamCallbacks;
|
|
18
|
+
inputStartAt?: number;
|
|
19
|
+
payload?: Parameters<typeof transformAnthropicStream>[2];
|
|
20
|
+
},
|
|
16
21
|
): ReadableStream<string> => {
|
|
17
22
|
const streamStack: StreamContext = { id: 'chat_' + nanoid() };
|
|
18
23
|
|
|
19
24
|
const stream = res instanceof ReadableStream ? res : createBedrockStream(res);
|
|
20
25
|
|
|
26
|
+
const transformWithPayload: typeof transformAnthropicStream = (chunk, ctx) =>
|
|
27
|
+
transformAnthropicStream(chunk, ctx, options?.payload);
|
|
28
|
+
|
|
21
29
|
return stream
|
|
22
|
-
.pipeThrough(
|
|
23
|
-
|
|
30
|
+
.pipeThrough(
|
|
31
|
+
createTokenSpeedCalculator(transformWithPayload, {
|
|
32
|
+
inputStartAt: options?.inputStartAt,
|
|
33
|
+
streamStack,
|
|
34
|
+
}),
|
|
35
|
+
)
|
|
36
|
+
.pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
|
|
37
|
+
.pipeThrough(createCallbacksTransformer(options?.callbacks));
|
|
24
38
|
};
|
|
@@ -21,6 +21,7 @@ import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
|
|
|
21
21
|
import { StreamingResponse } from '../../utils/response';
|
|
22
22
|
import { createAnthropicGenerateObject } from './generateObject';
|
|
23
23
|
import { handleAnthropicError } from './handleAnthropicError';
|
|
24
|
+
import { resolveCacheTTL } from './resolveCacheTTL';
|
|
24
25
|
|
|
25
26
|
export interface AnthropicModelCard {
|
|
26
27
|
created_at: string;
|
|
@@ -33,44 +34,6 @@ type anthropicTools = Anthropic.Tool | Anthropic.WebSearchTool20250305;
|
|
|
33
34
|
const modelsWithSmallContextWindow = new Set(['claude-3-opus-20240229', 'claude-3-haiku-20240307']);
|
|
34
35
|
|
|
35
36
|
const DEFAULT_BASE_URL = 'https://api.anthropic.com';
|
|
36
|
-
const DEFAULT_CACHE_TTL = '5m' as const;
|
|
37
|
-
|
|
38
|
-
type CacheTTL = Anthropic.Messages.CacheControlEphemeral['ttl'];
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* Resolves cache TTL from Anthropic payload or request settings
|
|
42
|
-
* Returns the first valid TTL found in system messages or content blocks
|
|
43
|
-
*/
|
|
44
|
-
const resolveCacheTTL = (
|
|
45
|
-
requestPayload: ChatStreamPayload,
|
|
46
|
-
anthropicPayload: Anthropic.MessageCreateParams,
|
|
47
|
-
): CacheTTL | undefined => {
|
|
48
|
-
// Check system messages for cache TTL
|
|
49
|
-
if (Array.isArray(anthropicPayload.system)) {
|
|
50
|
-
for (const block of anthropicPayload.system) {
|
|
51
|
-
const ttl = block.cache_control?.ttl;
|
|
52
|
-
if (ttl) return ttl;
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Check message content blocks for cache TTL
|
|
57
|
-
for (const message of anthropicPayload.messages ?? []) {
|
|
58
|
-
if (!Array.isArray(message.content)) continue;
|
|
59
|
-
|
|
60
|
-
for (const block of message.content) {
|
|
61
|
-
// Message content blocks might have cache_control property
|
|
62
|
-
const ttl = ('cache_control' in block && block.cache_control?.ttl) as CacheTTL | undefined;
|
|
63
|
-
if (ttl) return ttl;
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// Use default TTL if context caching is enabled
|
|
68
|
-
if (requestPayload.enabledContextCaching) {
|
|
69
|
-
return DEFAULT_CACHE_TTL;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
return undefined;
|
|
73
|
-
};
|
|
74
37
|
|
|
75
38
|
interface AnthropicAIParams extends ClientOptions {
|
|
76
39
|
id?: string;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
2
|
+
|
|
3
|
+
import { ChatStreamPayload } from '../../types';
|
|
4
|
+
|
|
5
|
+
type CacheTTL = Anthropic.Messages.CacheControlEphemeral['ttl'];
|
|
6
|
+
|
|
7
|
+
const DEFAULT_CACHE_TTL = '5m' as const;
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Resolves cache TTL from Anthropic payload or request settings.
|
|
11
|
+
* Returns the first valid TTL found in system messages or content blocks.
|
|
12
|
+
*/
|
|
13
|
+
export const resolveCacheTTL = (
|
|
14
|
+
requestPayload: ChatStreamPayload,
|
|
15
|
+
anthropicPayload: {
|
|
16
|
+
messages: Anthropic.MessageCreateParams['messages'];
|
|
17
|
+
system: Anthropic.MessageCreateParams['system'];
|
|
18
|
+
},
|
|
19
|
+
): CacheTTL | undefined => {
|
|
20
|
+
// Check system messages for cache TTL
|
|
21
|
+
if (Array.isArray(anthropicPayload.system)) {
|
|
22
|
+
for (const block of anthropicPayload.system) {
|
|
23
|
+
const ttl = block.cache_control?.ttl;
|
|
24
|
+
if (ttl) return ttl;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Check message content blocks for cache TTL
|
|
29
|
+
for (const message of anthropicPayload.messages ?? []) {
|
|
30
|
+
if (!Array.isArray(message.content)) continue;
|
|
31
|
+
|
|
32
|
+
for (const block of message.content) {
|
|
33
|
+
const ttl = ('cache_control' in block && block.cache_control?.ttl) as CacheTTL | undefined;
|
|
34
|
+
if (ttl) return ttl;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Use default TTL if context caching is enabled
|
|
39
|
+
if (requestPayload.enabledContextCaching) {
|
|
40
|
+
return DEFAULT_CACHE_TTL;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return undefined;
|
|
44
|
+
};
|
|
@@ -173,7 +173,18 @@ describe('LobeBedrockAI', () => {
|
|
|
173
173
|
body: JSON.stringify({
|
|
174
174
|
anthropic_version: 'bedrock-2023-05-31',
|
|
175
175
|
max_tokens: 4096,
|
|
176
|
-
messages: [
|
|
176
|
+
messages: [
|
|
177
|
+
{
|
|
178
|
+
content: [
|
|
179
|
+
{
|
|
180
|
+
cache_control: { type: 'ephemeral' },
|
|
181
|
+
text: 'Hello',
|
|
182
|
+
type: 'text',
|
|
183
|
+
},
|
|
184
|
+
],
|
|
185
|
+
role: 'user',
|
|
186
|
+
},
|
|
187
|
+
],
|
|
177
188
|
temperature: 0,
|
|
178
189
|
top_p: 1,
|
|
179
190
|
}),
|
|
@@ -211,8 +222,25 @@ describe('LobeBedrockAI', () => {
|
|
|
211
222
|
body: JSON.stringify({
|
|
212
223
|
anthropic_version: 'bedrock-2023-05-31',
|
|
213
224
|
max_tokens: 4096,
|
|
214
|
-
messages: [
|
|
215
|
-
|
|
225
|
+
messages: [
|
|
226
|
+
{
|
|
227
|
+
content: [
|
|
228
|
+
{
|
|
229
|
+
cache_control: { type: 'ephemeral' },
|
|
230
|
+
text: 'Hello',
|
|
231
|
+
type: 'text',
|
|
232
|
+
},
|
|
233
|
+
],
|
|
234
|
+
role: 'user',
|
|
235
|
+
},
|
|
236
|
+
],
|
|
237
|
+
system: [
|
|
238
|
+
{
|
|
239
|
+
cache_control: { type: 'ephemeral' },
|
|
240
|
+
text: 'You are an awesome greeter',
|
|
241
|
+
type: 'text',
|
|
242
|
+
},
|
|
243
|
+
],
|
|
216
244
|
temperature: 0,
|
|
217
245
|
top_p: 1,
|
|
218
246
|
}),
|
|
@@ -248,7 +276,18 @@ describe('LobeBedrockAI', () => {
|
|
|
248
276
|
body: JSON.stringify({
|
|
249
277
|
anthropic_version: 'bedrock-2023-05-31',
|
|
250
278
|
max_tokens: 2048,
|
|
251
|
-
messages: [
|
|
279
|
+
messages: [
|
|
280
|
+
{
|
|
281
|
+
content: [
|
|
282
|
+
{
|
|
283
|
+
cache_control: { type: 'ephemeral' },
|
|
284
|
+
text: 'Hello',
|
|
285
|
+
type: 'text',
|
|
286
|
+
},
|
|
287
|
+
],
|
|
288
|
+
role: 'user',
|
|
289
|
+
},
|
|
290
|
+
],
|
|
252
291
|
temperature: 0.25,
|
|
253
292
|
top_p: 1,
|
|
254
293
|
}),
|
|
@@ -327,7 +366,18 @@ describe('LobeBedrockAI', () => {
|
|
|
327
366
|
body: JSON.stringify({
|
|
328
367
|
anthropic_version: 'bedrock-2023-05-31',
|
|
329
368
|
max_tokens: 4096,
|
|
330
|
-
messages: [
|
|
369
|
+
messages: [
|
|
370
|
+
{
|
|
371
|
+
content: [
|
|
372
|
+
{
|
|
373
|
+
cache_control: { type: 'ephemeral' },
|
|
374
|
+
text: 'Hello',
|
|
375
|
+
type: 'text',
|
|
376
|
+
},
|
|
377
|
+
],
|
|
378
|
+
role: 'user',
|
|
379
|
+
},
|
|
380
|
+
],
|
|
331
381
|
temperature: 0,
|
|
332
382
|
}),
|
|
333
383
|
contentType: 'application/json',
|
|
@@ -363,7 +413,18 @@ describe('LobeBedrockAI', () => {
|
|
|
363
413
|
body: JSON.stringify({
|
|
364
414
|
anthropic_version: 'bedrock-2023-05-31',
|
|
365
415
|
max_tokens: 2048,
|
|
366
|
-
messages: [
|
|
416
|
+
messages: [
|
|
417
|
+
{
|
|
418
|
+
content: [
|
|
419
|
+
{
|
|
420
|
+
cache_control: { type: 'ephemeral' },
|
|
421
|
+
text: 'Hello',
|
|
422
|
+
type: 'text',
|
|
423
|
+
},
|
|
424
|
+
],
|
|
425
|
+
role: 'user',
|
|
426
|
+
},
|
|
427
|
+
],
|
|
367
428
|
temperature: 0.25,
|
|
368
429
|
top_p: 1,
|
|
369
430
|
}),
|
|
@@ -418,7 +479,18 @@ describe('LobeBedrockAI', () => {
|
|
|
418
479
|
body: JSON.stringify({
|
|
419
480
|
anthropic_version: 'bedrock-2023-05-31',
|
|
420
481
|
max_tokens: 4096,
|
|
421
|
-
messages: [
|
|
482
|
+
messages: [
|
|
483
|
+
{
|
|
484
|
+
content: [
|
|
485
|
+
{
|
|
486
|
+
cache_control: { type: 'ephemeral' },
|
|
487
|
+
text: 'Hello',
|
|
488
|
+
type: 'text',
|
|
489
|
+
},
|
|
490
|
+
],
|
|
491
|
+
role: 'user',
|
|
492
|
+
},
|
|
493
|
+
],
|
|
422
494
|
temperature: 0.4, // temperature / 2, top_p omitted due to conflict
|
|
423
495
|
}),
|
|
424
496
|
contentType: 'application/json',
|
|
@@ -450,7 +522,18 @@ describe('LobeBedrockAI', () => {
|
|
|
450
522
|
body: JSON.stringify({
|
|
451
523
|
anthropic_version: 'bedrock-2023-05-31',
|
|
452
524
|
max_tokens: 4096,
|
|
453
|
-
messages: [
|
|
525
|
+
messages: [
|
|
526
|
+
{
|
|
527
|
+
content: [
|
|
528
|
+
{
|
|
529
|
+
cache_control: { type: 'ephemeral' },
|
|
530
|
+
text: 'Hello',
|
|
531
|
+
type: 'text',
|
|
532
|
+
},
|
|
533
|
+
],
|
|
534
|
+
role: 'user',
|
|
535
|
+
},
|
|
536
|
+
],
|
|
454
537
|
top_p: 0.9, // temperature omitted since not provided
|
|
455
538
|
}),
|
|
456
539
|
contentType: 'application/json',
|
|
@@ -483,7 +566,18 @@ describe('LobeBedrockAI', () => {
|
|
|
483
566
|
body: JSON.stringify({
|
|
484
567
|
anthropic_version: 'bedrock-2023-05-31',
|
|
485
568
|
max_tokens: 4096,
|
|
486
|
-
messages: [
|
|
569
|
+
messages: [
|
|
570
|
+
{
|
|
571
|
+
content: [
|
|
572
|
+
{
|
|
573
|
+
cache_control: { type: 'ephemeral' },
|
|
574
|
+
text: 'Hello',
|
|
575
|
+
type: 'text',
|
|
576
|
+
},
|
|
577
|
+
],
|
|
578
|
+
role: 'user',
|
|
579
|
+
},
|
|
580
|
+
],
|
|
487
581
|
temperature: 0.4, // temperature / 2
|
|
488
582
|
top_p: 0.9, // both parameters allowed for older models
|
|
489
583
|
}),
|
|
@@ -517,7 +611,18 @@ describe('LobeBedrockAI', () => {
|
|
|
517
611
|
body: JSON.stringify({
|
|
518
612
|
anthropic_version: 'bedrock-2023-05-31',
|
|
519
613
|
max_tokens: 4096,
|
|
520
|
-
messages: [
|
|
614
|
+
messages: [
|
|
615
|
+
{
|
|
616
|
+
content: [
|
|
617
|
+
{
|
|
618
|
+
cache_control: { type: 'ephemeral' },
|
|
619
|
+
text: 'Hello',
|
|
620
|
+
type: 'text',
|
|
621
|
+
},
|
|
622
|
+
],
|
|
623
|
+
role: 'user',
|
|
624
|
+
},
|
|
625
|
+
],
|
|
521
626
|
temperature: 0.3, // temperature / 2, top_p omitted due to conflict
|
|
522
627
|
}),
|
|
523
628
|
contentType: 'application/json',
|
|
@@ -550,7 +655,18 @@ describe('LobeBedrockAI', () => {
|
|
|
550
655
|
body: JSON.stringify({
|
|
551
656
|
anthropic_version: 'bedrock-2023-05-31',
|
|
552
657
|
max_tokens: 4096,
|
|
553
|
-
messages: [
|
|
658
|
+
messages: [
|
|
659
|
+
{
|
|
660
|
+
content: [
|
|
661
|
+
{
|
|
662
|
+
cache_control: { type: 'ephemeral' },
|
|
663
|
+
text: 'Hello',
|
|
664
|
+
type: 'text',
|
|
665
|
+
},
|
|
666
|
+
],
|
|
667
|
+
role: 'user',
|
|
668
|
+
},
|
|
669
|
+
],
|
|
554
670
|
temperature: 0.35, // temperature / 2, top_p omitted due to conflict
|
|
555
671
|
}),
|
|
556
672
|
contentType: 'application/json',
|
|
@@ -23,7 +23,9 @@ import {
|
|
|
23
23
|
import { AgentRuntimeErrorType } from '../../types/error';
|
|
24
24
|
import { AgentRuntimeError } from '../../utils/createError';
|
|
25
25
|
import { debugStream } from '../../utils/debugStream';
|
|
26
|
+
import { getModelPricing } from '../../utils/getModelPricing';
|
|
26
27
|
import { StreamingResponse } from '../../utils/response';
|
|
28
|
+
import { resolveCacheTTL } from '../anthropic/resolveCacheTTL';
|
|
27
29
|
|
|
28
30
|
/**
|
|
29
31
|
* A prompt constructor for HuggingFace LLama 2 chat models.
|
|
@@ -148,7 +150,16 @@ export class LobeBedrockAI implements LobeRuntimeAI {
|
|
|
148
150
|
payload: ChatStreamPayload,
|
|
149
151
|
options?: ChatMethodOptions,
|
|
150
152
|
): Promise<Response> => {
|
|
151
|
-
const {
|
|
153
|
+
const {
|
|
154
|
+
enabledContextCaching = true,
|
|
155
|
+
max_tokens,
|
|
156
|
+
messages,
|
|
157
|
+
model,
|
|
158
|
+
temperature,
|
|
159
|
+
top_p,
|
|
160
|
+
tools,
|
|
161
|
+
} = payload;
|
|
162
|
+
const inputStartAt = Date.now();
|
|
152
163
|
const system_message = messages.find((m) => m.role === 'system');
|
|
153
164
|
const user_messages = messages.filter((m) => m.role !== 'system');
|
|
154
165
|
|
|
@@ -159,17 +170,29 @@ export class LobeBedrockAI implements LobeRuntimeAI {
|
|
|
159
170
|
{ hasConflict, normalizeTemperature: true, preferTemperature: true },
|
|
160
171
|
);
|
|
161
172
|
|
|
173
|
+
const systemPrompts = !!system_message?.content
|
|
174
|
+
? ([
|
|
175
|
+
{
|
|
176
|
+
cache_control: enabledContextCaching ? { type: 'ephemeral' } : undefined,
|
|
177
|
+
text: system_message.content as string,
|
|
178
|
+
type: 'text',
|
|
179
|
+
},
|
|
180
|
+
] as any)
|
|
181
|
+
: undefined;
|
|
182
|
+
|
|
183
|
+
const anthropicPayload = {
|
|
184
|
+
anthropic_version: 'bedrock-2023-05-31',
|
|
185
|
+
max_tokens: max_tokens || 4096,
|
|
186
|
+
messages: await buildAnthropicMessages(user_messages, { enabledContextCaching }),
|
|
187
|
+
system: systemPrompts,
|
|
188
|
+
temperature: resolvedParams.temperature,
|
|
189
|
+
tools: buildAnthropicTools(tools, { enabledContextCaching }),
|
|
190
|
+
top_p: resolvedParams.top_p,
|
|
191
|
+
};
|
|
192
|
+
|
|
162
193
|
const command = new InvokeModelWithResponseStreamCommand({
|
|
163
194
|
accept: 'application/json',
|
|
164
|
-
body: JSON.stringify(
|
|
165
|
-
anthropic_version: 'bedrock-2023-05-31',
|
|
166
|
-
max_tokens: max_tokens || 4096,
|
|
167
|
-
messages: await buildAnthropicMessages(user_messages),
|
|
168
|
-
system: system_message?.content as string,
|
|
169
|
-
temperature: resolvedParams.temperature,
|
|
170
|
-
tools: buildAnthropicTools(tools),
|
|
171
|
-
top_p: resolvedParams.top_p,
|
|
172
|
-
}),
|
|
195
|
+
body: JSON.stringify(anthropicPayload),
|
|
173
196
|
contentType: 'application/json',
|
|
174
197
|
modelId: model,
|
|
175
198
|
});
|
|
@@ -186,10 +209,21 @@ export class LobeBedrockAI implements LobeRuntimeAI {
|
|
|
186
209
|
debugStream(debug).catch(console.error);
|
|
187
210
|
}
|
|
188
211
|
|
|
212
|
+
const pricing = await getModelPricing(payload.model, ModelProvider.Bedrock);
|
|
213
|
+
const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicPayload);
|
|
214
|
+
const pricingOptions = cacheTTL ? { lookupParams: { ttl: cacheTTL } } : undefined;
|
|
215
|
+
|
|
189
216
|
// Respond with the stream
|
|
190
|
-
return StreamingResponse(
|
|
191
|
-
|
|
192
|
-
|
|
217
|
+
return StreamingResponse(
|
|
218
|
+
AWSBedrockClaudeStream(prod, {
|
|
219
|
+
callbacks: options?.callback,
|
|
220
|
+
inputStartAt,
|
|
221
|
+
payload: { model, pricing, pricingOptions, provider: ModelProvider.Bedrock },
|
|
222
|
+
}),
|
|
223
|
+
{
|
|
224
|
+
headers: options?.headers,
|
|
225
|
+
},
|
|
226
|
+
);
|
|
193
227
|
} catch (e) {
|
|
194
228
|
const err = e as Error & { $metadata: any };
|
|
195
229
|
|