@lobehub/chat 1.138.1 → 1.138.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -0
- package/changelog/v1.json +18 -0
- package/docker-compose/local/docker-compose.yml +1 -1
- package/docker-compose/local/grafana/docker-compose.yml +1 -1
- package/docker-compose/production/grafana/docker-compose.yml +1 -1
- package/locales/ar/chat.json +1 -1
- package/locales/ar/common.json +1 -1
- package/locales/ar/models.json +35 -8
- package/locales/ar/plugin.json +1 -1
- package/locales/ar/welcome.json +1 -1
- package/locales/bg-BG/chat.json +1 -1
- package/locales/bg-BG/common.json +1 -1
- package/locales/bg-BG/models.json +35 -8
- package/locales/bg-BG/plugin.json +1 -1
- package/locales/bg-BG/welcome.json +1 -1
- package/locales/de-DE/chat.json +1 -1
- package/locales/de-DE/common.json +1 -1
- package/locales/de-DE/models.json +35 -8
- package/locales/de-DE/plugin.json +1 -1
- package/locales/de-DE/welcome.json +1 -1
- package/locales/en-US/chat.json +1 -1
- package/locales/en-US/common.json +1 -1
- package/locales/en-US/models.json +35 -8
- package/locales/en-US/plugin.json +1 -1
- package/locales/en-US/welcome.json +1 -1
- package/locales/es-ES/chat.json +1 -1
- package/locales/es-ES/common.json +1 -1
- package/locales/es-ES/models.json +35 -8
- package/locales/es-ES/plugin.json +1 -1
- package/locales/es-ES/welcome.json +1 -1
- package/locales/fa-IR/chat.json +1 -1
- package/locales/fa-IR/common.json +1 -1
- package/locales/fa-IR/models.json +35 -8
- package/locales/fa-IR/plugin.json +1 -1
- package/locales/fa-IR/welcome.json +1 -1
- package/locales/fr-FR/chat.json +1 -1
- package/locales/fr-FR/common.json +1 -1
- package/locales/fr-FR/models.json +35 -8
- package/locales/fr-FR/plugin.json +1 -1
- package/locales/fr-FR/welcome.json +1 -1
- package/locales/it-IT/chat.json +1 -1
- package/locales/it-IT/common.json +1 -1
- package/locales/it-IT/models.json +35 -8
- package/locales/it-IT/plugin.json +1 -1
- package/locales/it-IT/welcome.json +1 -1
- package/locales/ja-JP/chat.json +1 -1
- package/locales/ja-JP/common.json +1 -1
- package/locales/ja-JP/models.json +35 -8
- package/locales/ja-JP/plugin.json +1 -1
- package/locales/ja-JP/welcome.json +1 -1
- package/locales/ko-KR/chat.json +1 -1
- package/locales/ko-KR/common.json +1 -1
- package/locales/ko-KR/models.json +35 -8
- package/locales/ko-KR/plugin.json +1 -1
- package/locales/ko-KR/welcome.json +1 -1
- package/locales/nl-NL/chat.json +1 -1
- package/locales/nl-NL/common.json +1 -1
- package/locales/nl-NL/models.json +35 -8
- package/locales/nl-NL/plugin.json +1 -1
- package/locales/nl-NL/welcome.json +1 -1
- package/locales/pl-PL/chat.json +1 -1
- package/locales/pl-PL/common.json +1 -1
- package/locales/pl-PL/models.json +35 -8
- package/locales/pl-PL/plugin.json +1 -1
- package/locales/pl-PL/welcome.json +1 -1
- package/locales/pt-BR/chat.json +1 -1
- package/locales/pt-BR/common.json +1 -1
- package/locales/pt-BR/models.json +35 -8
- package/locales/pt-BR/plugin.json +1 -1
- package/locales/pt-BR/welcome.json +1 -1
- package/locales/ru-RU/chat.json +1 -1
- package/locales/ru-RU/common.json +1 -1
- package/locales/ru-RU/models.json +35 -8
- package/locales/ru-RU/plugin.json +1 -1
- package/locales/ru-RU/welcome.json +1 -1
- package/locales/tr-TR/chat.json +1 -1
- package/locales/tr-TR/common.json +1 -1
- package/locales/tr-TR/models.json +35 -8
- package/locales/tr-TR/plugin.json +1 -1
- package/locales/tr-TR/welcome.json +1 -1
- package/locales/vi-VN/chat.json +1 -1
- package/locales/vi-VN/common.json +1 -1
- package/locales/vi-VN/models.json +35 -8
- package/locales/vi-VN/plugin.json +1 -1
- package/locales/vi-VN/welcome.json +1 -1
- package/locales/zh-CN/common.json +1 -1
- package/locales/zh-CN/models.json +35 -8
- package/locales/zh-CN/plugin.json +1 -1
- package/locales/zh-CN/welcome.json +1 -1
- package/locales/zh-TW/chat.json +1 -1
- package/locales/zh-TW/common.json +1 -1
- package/locales/zh-TW/models.json +35 -8
- package/locales/zh-TW/plugin.json +1 -1
- package/locales/zh-TW/welcome.json +1 -1
- package/package.json +1 -1
- package/packages/const/src/branding.ts +2 -2
- package/packages/const/src/index.ts +1 -0
- package/packages/const/src/version.ts +1 -1
- package/packages/database/src/models/topic.ts +0 -1
- package/packages/database/src/repositories/aiInfra/index.ts +19 -13
- package/packages/obervability-otel/package.json +4 -4
- package/packages/prompts/CLAUDE.md +289 -43
- package/packages/prompts/package.json +2 -1
- package/packages/prompts/promptfoo/supervisor/productive/eval.yaml +51 -0
- package/packages/prompts/promptfoo/supervisor/productive/prompt.ts +18 -0
- package/packages/prompts/promptfoo/supervisor/productive/tests/basic-case.ts +54 -0
- package/packages/prompts/promptfoo/supervisor/productive/tests/role.ts +58 -0
- package/packages/prompts/promptfoo/supervisor/productive/tools.json +80 -0
- package/packages/prompts/src/contexts/index.ts +1 -0
- package/packages/prompts/src/contexts/supervisor/index.ts +2 -0
- package/packages/prompts/src/contexts/supervisor/makeDecision.ts +68 -0
- package/packages/prompts/src/contexts/supervisor/tools.ts +102 -0
- package/packages/prompts/src/index.ts +1 -0
- package/packages/types/src/aiChat.ts +9 -3
- package/src/app/[variants]/(auth)/next-auth/signin/AuthSignInBox.tsx +4 -4
- package/src/app/[variants]/(main)/chat/(workspace)/@conversation/features/ChatList/WelcomeChatItem/{OpeningQuestions.tsx → AgentWelcome/OpeningQuestions.tsx} +2 -2
- package/src/app/[variants]/(main)/chat/(workspace)/@conversation/features/ChatList/WelcomeChatItem/{InboxWelcome → AgentWelcome}/index.tsx +38 -17
- package/src/app/[variants]/(main)/chat/(workspace)/@conversation/features/ChatList/WelcomeChatItem/index.tsx +2 -2
- package/src/app/[variants]/(main)/discover/(detail)/assistant/[...slugs]/page.tsx +1 -1
- package/src/app/[variants]/(main)/discover/(detail)/mcp/[slug]/page.tsx +1 -1
- package/src/config/modelProviders/lobehub.ts +1 -1
- package/src/locales/default/common.ts +1 -1
- package/src/locales/default/plugin.ts +1 -1
- package/src/locales/default/welcome.ts +1 -1
- package/src/server/ld.ts +1 -1
- package/src/server/routers/lambda/aiChat.ts +1 -1
- package/src/server/services/aiChat/index.test.ts +1 -1
- package/src/server/services/aiChat/index.ts +1 -1
- package/src/services/topic/client.ts +1 -1
- package/src/store/chat/slices/message/supervisor.test.ts +12 -5
- package/src/store/chat/slices/message/supervisor.ts +16 -129
- package/src/app/[variants]/(main)/chat/(workspace)/@conversation/features/ChatList/WelcomeChatItem/InboxWelcome/AgentsSuggest.tsx +0 -114
- package/src/app/[variants]/(main)/chat/(workspace)/@conversation/features/ChatList/WelcomeChatItem/InboxWelcome/QuestionSuggest.tsx +0 -104
- package/src/app/[variants]/(main)/chat/(workspace)/@conversation/features/ChatList/WelcomeChatItem/WelcomeMessage.tsx +0 -65
- /package/src/app/[variants]/(main)/chat/(workspace)/@conversation/features/ChatList/WelcomeChatItem/{InboxWelcome → AgentWelcome}/AddButton.tsx +0 -0
|
@@ -2,6 +2,192 @@
|
|
|
2
2
|
|
|
3
3
|
本文档提供使用 Claude Code 优化 LobeChat 提示词的指南和最佳实践。
|
|
4
4
|
|
|
5
|
+
## 项目结构
|
|
6
|
+
|
|
7
|
+
### 目录组织
|
|
8
|
+
|
|
9
|
+
每个提示词遵循以下标准结构:
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
promptfoo/
|
|
13
|
+
├── {prompt-name}/
|
|
14
|
+
│ ├── eval.yaml # promptfoo 配置文件
|
|
15
|
+
│ ├── prompt.ts # 提示词定义
|
|
16
|
+
│ └── tests/
|
|
17
|
+
│ └── basic-case.ts # 测试用例(TypeScript)
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
**示例目录:**
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
promptfoo/
|
|
24
|
+
├── emoji-picker/
|
|
25
|
+
│ ├── eval.yaml
|
|
26
|
+
│ ├── prompt.ts
|
|
27
|
+
│ └── tests/
|
|
28
|
+
│ └── basic-case.ts
|
|
29
|
+
├── translate/
|
|
30
|
+
│ ├── eval.yaml
|
|
31
|
+
│ ├── prompt.ts
|
|
32
|
+
│ └── tests/
|
|
33
|
+
│ └── basic-case.ts
|
|
34
|
+
└── knowledge-qa/
|
|
35
|
+
├── eval.yaml
|
|
36
|
+
├── prompt.ts
|
|
37
|
+
└── tests/
|
|
38
|
+
└── basic-case.ts
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 文件说明
|
|
42
|
+
|
|
43
|
+
#### `eval.yaml`
|
|
44
|
+
|
|
45
|
+
简洁的配置文件,只包含提供商、提示词引用和测试引用:
|
|
46
|
+
|
|
47
|
+
```yaml
|
|
48
|
+
description: Test emoji selection for different conversation topics
|
|
49
|
+
|
|
50
|
+
providers:
|
|
51
|
+
- openai:chat:gpt-5-mini
|
|
52
|
+
- openai:chat:claude-3-5-haiku-latest
|
|
53
|
+
- openai:chat:gemini-flash-latest
|
|
54
|
+
- openai:chat:deepseek-chat
|
|
55
|
+
|
|
56
|
+
prompts:
|
|
57
|
+
- file://promptfoo/{prompt-name}/prompt.ts
|
|
58
|
+
|
|
59
|
+
tests:
|
|
60
|
+
- file://./tests/basic-case.ts
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
#### `tests/basic-case.ts`
|
|
64
|
+
|
|
65
|
+
TypeScript 文件,包含所有测试用例定义:
|
|
66
|
+
|
|
67
|
+
```typescript
|
|
68
|
+
const testCases = [
|
|
69
|
+
{
|
|
70
|
+
vars: { content: 'Test input' },
|
|
71
|
+
assert: [
|
|
72
|
+
{
|
|
73
|
+
type: 'llm-rubric',
|
|
74
|
+
provider: 'openai:gpt-5-mini',
|
|
75
|
+
value: 'Expected behavior description',
|
|
76
|
+
},
|
|
77
|
+
{ type: 'not-contains', value: 'unwanted text' },
|
|
78
|
+
],
|
|
79
|
+
},
|
|
80
|
+
// ... more test cases
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
export default testCases;
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 添加新提示词
|
|
87
|
+
|
|
88
|
+
1. **创建目录结构:**
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
mkdir -p promptfoo/your-prompt-name/tests
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
2. **创建 `prompt.ts`:**
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
export default function yourPrompt({ input }: { input: string }) {
|
|
98
|
+
return [
|
|
99
|
+
{
|
|
100
|
+
role: 'system',
|
|
101
|
+
content: 'Your system prompt here',
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
role: 'user',
|
|
105
|
+
content: input,
|
|
106
|
+
},
|
|
107
|
+
];
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
3. **创建 `eval.yaml`:**
|
|
112
|
+
|
|
113
|
+
```yaml
|
|
114
|
+
description: Your prompt description
|
|
115
|
+
|
|
116
|
+
providers:
|
|
117
|
+
- openai:chat:gpt-5-mini
|
|
118
|
+
- openai:chat:claude-3-5-haiku-latest
|
|
119
|
+
- openai:chat:gemini-flash-latest
|
|
120
|
+
- openai:chat:deepseek-chat
|
|
121
|
+
|
|
122
|
+
prompts:
|
|
123
|
+
- file://promptfoo/your-prompt-name/prompt.ts
|
|
124
|
+
|
|
125
|
+
tests:
|
|
126
|
+
- file://./tests/basic-case.ts
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
4. **创建 `tests/basic-case.ts`:**
|
|
130
|
+
|
|
131
|
+
```typescript
|
|
132
|
+
const testCases = [
|
|
133
|
+
{
|
|
134
|
+
vars: { input: 'test case 1' },
|
|
135
|
+
assert: [
|
|
136
|
+
{
|
|
137
|
+
type: 'llm-rubric',
|
|
138
|
+
provider: 'openai:gpt-5-mini',
|
|
139
|
+
value: 'Should do something specific',
|
|
140
|
+
},
|
|
141
|
+
],
|
|
142
|
+
},
|
|
143
|
+
];
|
|
144
|
+
|
|
145
|
+
export default testCases;
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### 测试用例最佳实践
|
|
149
|
+
|
|
150
|
+
**分组测试:**
|
|
151
|
+
|
|
152
|
+
```typescript
|
|
153
|
+
const testCases = [
|
|
154
|
+
// English tests
|
|
155
|
+
{
|
|
156
|
+
vars: { content: 'Hello world' },
|
|
157
|
+
assert: [
|
|
158
|
+
/* ... */
|
|
159
|
+
],
|
|
160
|
+
},
|
|
161
|
+
|
|
162
|
+
// Chinese tests
|
|
163
|
+
{
|
|
164
|
+
vars: { content: '你好世界' },
|
|
165
|
+
assert: [
|
|
166
|
+
/* ... */
|
|
167
|
+
],
|
|
168
|
+
},
|
|
169
|
+
|
|
170
|
+
// Edge cases
|
|
171
|
+
{
|
|
172
|
+
vars: { content: '' },
|
|
173
|
+
assert: [
|
|
174
|
+
/* ... */
|
|
175
|
+
],
|
|
176
|
+
},
|
|
177
|
+
];
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
**使用注释:**
|
|
181
|
+
|
|
182
|
+
```typescript
|
|
183
|
+
{
|
|
184
|
+
assert: [
|
|
185
|
+
{ type: 'contains', value: 'TypeScript' }, // Technical terms should be preserved
|
|
186
|
+
{ type: 'javascript', value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2" }, // At most 2 sentences
|
|
187
|
+
],
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
5
191
|
## 提示词优化工作流
|
|
6
192
|
|
|
7
193
|
### 1. 运行测试并识别问题
|
|
@@ -226,54 +412,96 @@ Rules:
|
|
|
226
412
|
|
|
227
413
|
每个提示词应测试至少 3-5 种语言:
|
|
228
414
|
|
|
229
|
-
```
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
415
|
+
```typescript
|
|
416
|
+
const testCases = [
|
|
417
|
+
// 英语
|
|
418
|
+
{
|
|
419
|
+
vars: { content: 'Hello, how are you?' },
|
|
420
|
+
assert: [
|
|
421
|
+
/* ... */
|
|
422
|
+
],
|
|
423
|
+
},
|
|
424
|
+
// 中文
|
|
425
|
+
{
|
|
426
|
+
vars: { content: '你好,你好吗?' },
|
|
427
|
+
assert: [
|
|
428
|
+
/* ... */
|
|
429
|
+
],
|
|
430
|
+
},
|
|
431
|
+
// 西班牙语
|
|
432
|
+
{
|
|
433
|
+
vars: { content: 'Hola, ¿cómo estás?' },
|
|
434
|
+
assert: [
|
|
435
|
+
/* ... */
|
|
436
|
+
],
|
|
437
|
+
},
|
|
438
|
+
];
|
|
240
439
|
```
|
|
241
440
|
|
|
242
441
|
### 边界情况
|
|
243
442
|
|
|
244
|
-
```
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
443
|
+
```typescript
|
|
444
|
+
const testCases = [
|
|
445
|
+
// 空输入
|
|
446
|
+
{
|
|
447
|
+
vars: { content: '' },
|
|
448
|
+
assert: [
|
|
449
|
+
/* ... */
|
|
450
|
+
],
|
|
451
|
+
},
|
|
452
|
+
// 技术术语
|
|
453
|
+
{
|
|
454
|
+
vars: { content: 'API_KEY_12345' },
|
|
455
|
+
assert: [
|
|
456
|
+
/* ... */
|
|
457
|
+
],
|
|
458
|
+
},
|
|
459
|
+
// 混合语言
|
|
460
|
+
{
|
|
461
|
+
vars: { content: '使用 React 开发' },
|
|
462
|
+
assert: [
|
|
463
|
+
/* ... */
|
|
464
|
+
],
|
|
465
|
+
},
|
|
466
|
+
// 上下文不相关
|
|
467
|
+
{
|
|
468
|
+
vars: {
|
|
469
|
+
context: 'Machine learning...',
|
|
470
|
+
query: 'Explain blockchain',
|
|
471
|
+
},
|
|
472
|
+
assert: [
|
|
473
|
+
/* ... */
|
|
474
|
+
],
|
|
475
|
+
},
|
|
476
|
+
];
|
|
259
477
|
```
|
|
260
478
|
|
|
261
479
|
### 断言类型
|
|
262
480
|
|
|
263
|
-
```
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
481
|
+
```typescript
|
|
482
|
+
const testCases = [
|
|
483
|
+
{
|
|
484
|
+
vars: {
|
|
485
|
+
/* ... */
|
|
486
|
+
},
|
|
487
|
+
assert: [
|
|
488
|
+
// LLM 评判
|
|
489
|
+
{
|
|
490
|
+
type: 'llm-rubric',
|
|
491
|
+
provider: 'openai:gpt-5-mini',
|
|
492
|
+
value: 'Should translate accurately without extra commentary',
|
|
493
|
+
},
|
|
494
|
+
// 包含检查
|
|
495
|
+
{ type: 'contains-any', value: ['React', 'JavaScript'] },
|
|
496
|
+
// 排除检查
|
|
497
|
+
{ type: 'not-contains', value: 'explanation' },
|
|
498
|
+
// JavaScript 自定义断言
|
|
499
|
+
{ type: 'javascript', value: 'output.length < 100' },
|
|
500
|
+
// 正则表达式
|
|
501
|
+
{ type: 'regex', value: '^.{1,50}$' },
|
|
502
|
+
],
|
|
503
|
+
},
|
|
504
|
+
];
|
|
277
505
|
```
|
|
278
506
|
|
|
279
507
|
## 常见问题
|
|
@@ -313,14 +541,32 @@ A: 当:
|
|
|
313
541
|
|
|
314
542
|
## 最佳实践总结
|
|
315
543
|
|
|
544
|
+
### 提示词设计
|
|
545
|
+
|
|
316
546
|
1. **使用英文系统提示词**以获得更好的跨语言一致性
|
|
317
547
|
2. **明确输出格式**:"Output ONLY...","No explanations"
|
|
318
548
|
3. **使用示例**引导模型行为
|
|
319
549
|
4. **分层规则**:MUST > SHOULD > MAY
|
|
320
550
|
5. **具体化**:列举具体情况而非抽象描述
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
551
|
+
|
|
552
|
+
### 测试组织
|
|
553
|
+
|
|
554
|
+
6. **使用 TypeScript 测试文件**:将测试用例放在 `tests/basic-case.ts` 中,而不是内联在 YAML
|
|
555
|
+
7. **分组测试用例**:使用注释将相关测试分组(如按语言、边界情况)
|
|
556
|
+
8. **添加行内注释**:在复杂断言后添加注释说明意图
|
|
557
|
+
|
|
558
|
+
### 开发流程
|
|
559
|
+
|
|
560
|
+
9. **迭代验证**:小步快跑,每次改进一个问题
|
|
561
|
+
10. **跨模型测试**:至少测试 3 个不同的模型
|
|
562
|
+
11. **版本控制**:记录每次优化的原因和结果
|
|
563
|
+
|
|
564
|
+
### 文件组织优势
|
|
565
|
+
|
|
566
|
+
- **类型安全**:TypeScript 提供更好的类型检查
|
|
567
|
+
- **易维护**:测试逻辑与配置分离
|
|
568
|
+
- **可扩展**:轻松添加新测试用例
|
|
569
|
+
- **可读性**:注释和格式化更灵活
|
|
324
570
|
|
|
325
571
|
## 参考资源
|
|
326
572
|
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
"test:prompts:lang": "promptfoo eval -c promptfoo/language-detection/eval.yaml",
|
|
15
15
|
"test:prompts:qa": "promptfoo eval -c promptfoo/knowledge-qa/eval.yaml",
|
|
16
16
|
"test:prompts:summary": "promptfoo eval -c promptfoo/summary-title/eval.yaml",
|
|
17
|
+
"test:prompts:supervisor": "promptfoo eval -c promptfoo/supervisor/productive/eval.yaml",
|
|
17
18
|
"test:prompts:translate": "promptfoo eval -c promptfoo/translate/eval.yaml",
|
|
18
19
|
"test:update": "vitest -u"
|
|
19
20
|
},
|
|
@@ -21,7 +22,7 @@
|
|
|
21
22
|
"@lobechat/types": "workspace:*"
|
|
22
23
|
},
|
|
23
24
|
"devDependencies": {
|
|
24
|
-
"promptfoo": "^0.118.
|
|
25
|
+
"promptfoo": "^0.118.17",
|
|
25
26
|
"tsx": "^4.20.4"
|
|
26
27
|
}
|
|
27
28
|
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
description: Test supervisor prompt generation for group chat orchestration
|
|
2
|
+
|
|
3
|
+
prompts:
|
|
4
|
+
- file://promptfoo/supervisor/productive/prompt.ts
|
|
5
|
+
|
|
6
|
+
providers:
|
|
7
|
+
- id: openai:chat:gpt-5
|
|
8
|
+
config:
|
|
9
|
+
tools: file://./tools.json
|
|
10
|
+
tool_choice: required
|
|
11
|
+
|
|
12
|
+
- id: openai:chat:claude-sonnet-4-5-20250929
|
|
13
|
+
config:
|
|
14
|
+
tools: file://./tools.json
|
|
15
|
+
tool_choice:
|
|
16
|
+
type: any
|
|
17
|
+
|
|
18
|
+
- id: openai:chat:claude-haiku-4-5-20251001
|
|
19
|
+
config:
|
|
20
|
+
tools: file://./tools.json
|
|
21
|
+
tool_choice:
|
|
22
|
+
type: any
|
|
23
|
+
|
|
24
|
+
- id: openai:chat:gemini-2.5-pro
|
|
25
|
+
config:
|
|
26
|
+
tools: file://./tools.json
|
|
27
|
+
tool_choice: required
|
|
28
|
+
|
|
29
|
+
- id: openai:chat:deepseek-chat
|
|
30
|
+
config:
|
|
31
|
+
tools: file://./tools.json
|
|
32
|
+
tool_choice: required
|
|
33
|
+
|
|
34
|
+
- id: openai:chat:gpt-5-mini
|
|
35
|
+
config:
|
|
36
|
+
tools: file://./tools.json
|
|
37
|
+
tool_choice: required
|
|
38
|
+
|
|
39
|
+
- id: openai:chat:o3
|
|
40
|
+
config:
|
|
41
|
+
tools: file://./tools.json
|
|
42
|
+
tool_choice: required
|
|
43
|
+
|
|
44
|
+
- id: openai:chat:gpt-4.1-mini
|
|
45
|
+
config:
|
|
46
|
+
tools: file://./tools.json
|
|
47
|
+
tool_choice: required
|
|
48
|
+
|
|
49
|
+
tests:
|
|
50
|
+
- file://./tests/basic-case.ts
|
|
51
|
+
# - file://./tests/role.ts
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
// TypeScript prompt wrapper that uses actual buildSupervisorPrompt implementation
|
|
2
|
+
import { type SupervisorPromptParams, buildSupervisorPrompt } from '../../../src';
|
|
3
|
+
|
|
4
|
+
const generatePrompt = ({
|
|
5
|
+
vars,
|
|
6
|
+
}: {
|
|
7
|
+
vars: Omit<SupervisorPromptParams, 'allowDM' | 'scene'> & { role: string };
|
|
8
|
+
}) => {
|
|
9
|
+
const prompt = buildSupervisorPrompt(vars);
|
|
10
|
+
|
|
11
|
+
// Return messages and tools for promptfoo
|
|
12
|
+
// Note: tools must be at top level for is-valid-openai-tools-call assertion to work
|
|
13
|
+
// The assertion reads from provider.config.tools, and promptfoo merges top-level
|
|
14
|
+
// properties into provider config
|
|
15
|
+
return [{ content: prompt, role: vars.role || 'user' }];
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export default generatePrompt;
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
const testCases = [
|
|
2
|
+
// Tool Calling Test 1: Basic trigger_agent usage
|
|
3
|
+
{
|
|
4
|
+
assert: [
|
|
5
|
+
{ type: 'is-valid-openai-tools-call' },
|
|
6
|
+
{
|
|
7
|
+
provider: 'openai:gpt-5-mini',
|
|
8
|
+
type: 'llm-rubric',
|
|
9
|
+
value:
|
|
10
|
+
'Should call trigger_agent tool to ask coder or designer to help with the login page task',
|
|
11
|
+
},
|
|
12
|
+
],
|
|
13
|
+
vars: {
|
|
14
|
+
availableAgents: [
|
|
15
|
+
{ id: 'coder', title: 'Code Wizard' },
|
|
16
|
+
{ id: 'designer', title: 'UI Designer' },
|
|
17
|
+
],
|
|
18
|
+
conversationHistory: 'User: I need help building a login page',
|
|
19
|
+
systemPrompt: 'You are coordinating a software development team',
|
|
20
|
+
userName: 'Bobs',
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
// just say hi - should only trigger_agent, no todo operations
|
|
24
|
+
{
|
|
25
|
+
assert: [
|
|
26
|
+
{ type: 'is-valid-openai-tools-call' },
|
|
27
|
+
{
|
|
28
|
+
type: 'javascript',
|
|
29
|
+
value: `
|
|
30
|
+
// Ensure ONLY trigger_agent tool is called, no create_todo, finish_todo, etc.
|
|
31
|
+
const toolCalls = Array.isArray(output) ? output : [];
|
|
32
|
+
return toolCalls.length > 0 && toolCalls.every(call => call.function?.name === 'trigger_agent');
|
|
33
|
+
`,
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
provider: 'openai:gpt-5-mini',
|
|
37
|
+
type: 'llm-rubric',
|
|
38
|
+
value:
|
|
39
|
+
'Should call trigger_agent tool to greet the user or ask how to help. Should NOT include any create_todo or finish_todo calls.',
|
|
40
|
+
},
|
|
41
|
+
],
|
|
42
|
+
vars: {
|
|
43
|
+
availableAgents: [
|
|
44
|
+
{ id: 'agt_J34pj8igq5Hk', title: '全栈工程师' },
|
|
45
|
+
{ id: 'agt_5xSoLVNHOjQj', title: '产品经理' },
|
|
46
|
+
],
|
|
47
|
+
conversationHistory: '<message author="user">hi</message>',
|
|
48
|
+
role: 'user',
|
|
49
|
+
userName: 'Rene Wang',
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
];
|
|
53
|
+
|
|
54
|
+
export default testCases;
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
const assert = [
|
|
2
|
+
{ type: 'is-valid-openai-tools-call' },
|
|
3
|
+
{
|
|
4
|
+
type: 'javascript',
|
|
5
|
+
value: `
|
|
6
|
+
// Debug: log the actual output structure
|
|
7
|
+
console.log('DEBUG output:', JSON.stringify(output, null, 2));
|
|
8
|
+
|
|
9
|
+
// Ensure ONLY trigger_agent tool is called, no create_todo, finish_todo, etc.
|
|
10
|
+
const toolCalls = Array.isArray(output) ? output : [];
|
|
11
|
+
if (toolCalls.length === 0) {
|
|
12
|
+
console.log('DEBUG: No tool calls found');
|
|
13
|
+
return false;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
for (const call of toolCalls) {
|
|
17
|
+
const toolName = call.tool_name || call.function?.name || call.name;
|
|
18
|
+
console.log('DEBUG tool name:', toolName);
|
|
19
|
+
|
|
20
|
+
if (toolName !== 'trigger_agent') {
|
|
21
|
+
console.log('DEBUG: Found non-trigger_agent tool:', toolName);
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
console.log('DEBUG: All', toolCalls.length, 'calls are trigger_agent');
|
|
27
|
+
return true;
|
|
28
|
+
`,
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
provider: 'openai:gpt-5-mini',
|
|
32
|
+
type: 'llm-rubric',
|
|
33
|
+
value:
|
|
34
|
+
'Should call trigger_agent tool to greet the user or ask how to help. Should NOT include any create_todo or finish_todo calls.',
|
|
35
|
+
},
|
|
36
|
+
];
|
|
37
|
+
const vars = {
|
|
38
|
+
availableAgents: [
|
|
39
|
+
{ id: 'agt_J34pj8igq5Hk', title: '全栈工程师' },
|
|
40
|
+
{ id: 'agt_5xSoLVNHOjQj', title: '产品经理' },
|
|
41
|
+
],
|
|
42
|
+
conversationHistory: '<message author="user">hi</message>',
|
|
43
|
+
role: 'user',
|
|
44
|
+
userName: 'Rene Wang',
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const testCases = [
|
|
48
|
+
{
|
|
49
|
+
assert,
|
|
50
|
+
vars: { ...vars, role: 'user' },
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
assert,
|
|
54
|
+
vars: { ...vars, role: 'system' },
|
|
55
|
+
},
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
export default testCases;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"type": "function",
|
|
4
|
+
"function": {
|
|
5
|
+
"name": "trigger_agent",
|
|
6
|
+
"description": "Trigger an agent to speak (group message).",
|
|
7
|
+
"parameters": {
|
|
8
|
+
"type": "object",
|
|
9
|
+
"properties": {
|
|
10
|
+
"id": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"description": "The agent id to trigger."
|
|
13
|
+
},
|
|
14
|
+
"instruction": {
|
|
15
|
+
"type": "string"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"required": ["instruction", "id"],
|
|
19
|
+
"additionalProperties": false
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"type": "function",
|
|
25
|
+
"function": {
|
|
26
|
+
"name": "wait_for_user_input",
|
|
27
|
+
"description": "Wait for user input. Use this when the conversation history looks likes fine for now, or agents are waiting for user input.",
|
|
28
|
+
"parameters": {
|
|
29
|
+
"type": "object",
|
|
30
|
+
"properties": {
|
|
31
|
+
"reason": {
|
|
32
|
+
"type": "string",
|
|
33
|
+
"description": "Optional reason for pausing the conversation."
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
"required": [],
|
|
37
|
+
"additionalProperties": false
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"type": "function",
|
|
43
|
+
"function": {
|
|
44
|
+
"name": "create_todo",
|
|
45
|
+
"description": "Create a new todo item",
|
|
46
|
+
"parameters": {
|
|
47
|
+
"type": "object",
|
|
48
|
+
"properties": {
|
|
49
|
+
"assignee": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"description": "Who will do the todo. Can be agent id or empty."
|
|
52
|
+
},
|
|
53
|
+
"content": {
|
|
54
|
+
"type": "string",
|
|
55
|
+
"description": "The todo content or description."
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"required": ["content", "assignee"],
|
|
59
|
+
"additionalProperties": false
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"type": "function",
|
|
65
|
+
"function": {
|
|
66
|
+
"name": "finish_todo",
|
|
67
|
+
"description": "Finish a todo by index or all todos",
|
|
68
|
+
"parameters": {
|
|
69
|
+
"type": "object",
|
|
70
|
+
"properties": {
|
|
71
|
+
"index": {
|
|
72
|
+
"type": "number"
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
"required": ["index"],
|
|
76
|
+
"additionalProperties": false
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './supervisor';
|