@lobehub/chat 1.134.3 → 1.134.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/.github/workflows/docker-database.yml +2 -0
  2. package/.github/workflows/docker-pglite.yml +2 -0
  3. package/.github/workflows/docker.yml +2 -0
  4. package/.github/workflows/release.yml +6 -0
  5. package/.github/workflows/sync-database-schema.yml +2 -0
  6. package/CHANGELOG.md +25 -0
  7. package/changelog/v1.json +9 -0
  8. package/package.json +3 -2
  9. package/packages/prompts/CLAUDE.md +329 -0
  10. package/packages/prompts/README.md +224 -0
  11. package/packages/prompts/package.json +14 -1
  12. package/packages/prompts/promptfoo/emoji-picker/eval.yaml +170 -0
  13. package/packages/prompts/promptfoo/emoji-picker/prompt.ts +16 -0
  14. package/packages/prompts/promptfoo/knowledge-qa/eval.yaml +89 -0
  15. package/packages/prompts/promptfoo/knowledge-qa/prompt.ts +26 -0
  16. package/packages/prompts/promptfoo/language-detection/eval.yaml +65 -0
  17. package/packages/prompts/promptfoo/language-detection/prompt.ts +16 -0
  18. package/packages/prompts/promptfoo/summary-title/eval.yaml +85 -0
  19. package/packages/prompts/promptfoo/summary-title/prompt.ts +18 -0
  20. package/packages/prompts/promptfoo/translate/eval.yaml +79 -0
  21. package/packages/prompts/promptfoo/translate/prompt.ts +18 -0
  22. package/packages/prompts/promptfooconfig.yaml +35 -0
  23. package/packages/prompts/src/chains/__tests__/__snapshots__/answerWithContext.test.ts.snap +164 -0
  24. package/packages/prompts/src/chains/__tests__/__snapshots__/pickEmoji.test.ts.snap +58 -0
  25. package/packages/prompts/src/chains/__tests__/__snapshots__/summaryTitle.test.ts.snap +26 -0
  26. package/packages/prompts/src/chains/__tests__/__snapshots__/translate.test.ts.snap +22 -0
  27. package/packages/prompts/src/chains/__tests__/answerWithContext.test.ts +18 -63
  28. package/packages/prompts/src/chains/__tests__/pickEmoji.test.ts +2 -37
  29. package/packages/prompts/src/chains/__tests__/summaryTitle.test.ts +2 -16
  30. package/packages/prompts/src/chains/__tests__/translate.test.ts +1 -12
  31. package/packages/prompts/src/chains/answerWithContext.ts +45 -21
  32. package/packages/prompts/src/chains/pickEmoji.ts +20 -6
  33. package/packages/prompts/src/chains/summaryTitle.ts +20 -15
  34. package/packages/prompts/src/chains/translate.ts +8 -2
@@ -1,4 +1,6 @@
1
1
  name: Publish Database Docker Image
2
+ permissions:
3
+ contents: read
2
4
 
3
5
  on:
4
6
  workflow_dispatch:
@@ -1,4 +1,6 @@
1
1
  name: Publish Docker Pglite Image
2
+ permissions:
3
+ contents: read
2
4
 
3
5
  on:
4
6
  workflow_dispatch:
@@ -1,4 +1,6 @@
1
1
  name: Publish Docker Image
2
+ permissions:
3
+ contents: read
2
4
 
3
5
  on:
4
6
  workflow_dispatch:
@@ -1,4 +1,10 @@
1
1
  name: Release CI
2
+
3
+ permissions:
4
+ contents: write
5
+ issues: write
6
+ pull-requests: write
7
+
2
8
  on:
3
9
  push:
4
10
  branches:
@@ -1,4 +1,6 @@
1
1
  name: Database Schema Visualization CI
2
+ permissions:
3
+ contents: read
2
4
 
3
5
  on:
4
6
  push:
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ### [Version 1.134.4](https://github.com/lobehub/lobe-chat/compare/v1.134.3...v1.134.4)
6
+
7
+ <sup>Released on **2025-10-05**</sup>
8
+
9
+ #### 💄 Styles
10
+
11
+ - **misc**: Add promptfoo to improve prompts quality.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### Styles
19
+
20
+ - **misc**: Add promptfoo to improve prompts quality, closes [#9568](https://github.com/lobehub/lobe-chat/issues/9568) ([33874c2](https://github.com/lobehub/lobe-chat/commit/33874c2))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
5
30
  ### [Version 1.134.3](https://github.com/lobehub/lobe-chat/compare/v1.134.2...v1.134.3)
6
31
 
7
32
  <sup>Released on **2025-10-05**</sup>
package/changelog/v1.json CHANGED
@@ -1,4 +1,13 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "improvements": [
5
+ "Add promptfoo to improve prompts quality."
6
+ ]
7
+ },
8
+ "date": "2025-10-05",
9
+ "version": "1.134.4"
10
+ },
2
11
  {
3
12
  "children": {
4
13
  "fixes": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.134.3",
3
+ "version": "1.134.4",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -379,7 +379,8 @@
379
379
  },
380
380
  "pnpm": {
381
381
  "onlyBuiltDependencies": [
382
- "@vercel/speed-insights"
382
+ "@vercel/speed-insights",
383
+ "better-sqlite3"
383
384
  ],
384
385
  "overrides": {
385
386
  "mdast-util-gfm-autolink-literal": "2.0.0"
@@ -0,0 +1,329 @@
1
+ # Prompt Engineering Guide for @lobechat/prompts
2
+
3
+ 本文档提供使用 Claude Code 优化 LobeChat 提示词的指南和最佳实践。
4
+
5
+ ## 提示词优化工作流
6
+
7
+ ### 1. 运行测试并识别问题
8
+
9
+ ```bash
10
+ # 运行特定提示词测试
11
+ pnpm promptfoo eval -c promptfoo/ < prompt-name > /eval.yaml
12
+
13
+ # 查看失败的测试详情
14
+ pnpm promptfoo eval -c promptfoo/ < prompt-name > /eval.yaml 2>&1 | grep -A 20 "FAIL"
15
+ ```
16
+
17
+ **关注点:**
18
+
19
+ - 失败率和失败模式
20
+ - 不同模型的行为差异
21
+ - 具体的失败原因(来自 llm-rubric 的评价)
22
+
23
+ ### 2. 分析失败原因
24
+
25
+ **常见问题模式:**
26
+
27
+ - **输出格式问题**:模型添加了不需要的解释或上下文
28
+ - **语言混淆**:在多语言场景下使用了错误的语言
29
+ - **过度 / 不足翻译**:技术术语被翻译或保留不当
30
+ - **上下文理解**:未正确理解何时使用 / 忽略上下文
31
+ - **一致性问题**:不同模型间的行为不一致
32
+
33
+ ### 3. 更新提示词
34
+
35
+ **优化策略:**
36
+
37
+ #### 使用英文提示词
38
+
39
+ ```typescript
40
+ // ❌ 不好 - 中文提示词在多语言场景下容易混淆
41
+ content: '你是一名翻译助手,请将内容翻译为...';
42
+
43
+ // ✅ 好 - 英文提示词更通用
44
+ content: 'You are a translation assistant. Translate the content to...';
45
+ ```
46
+
47
+ #### 明确输出要求
48
+
49
+ ```typescript
50
+ // ❌ 不好 - 模糊的指令
51
+ content: 'Please translate the text';
52
+
53
+ // ✅ 好 - 具体的规则
54
+ content: `Translate the text.
55
+
56
+ Rules:
57
+ - Output ONLY the translated text, no explanations
58
+ - Preserve technical terms exactly as they appear
59
+ - No additional commentary`;
60
+ ```
61
+
62
+ #### 使用示例指导行为
63
+
64
+ ```typescript
65
+ // ✅ 提供具体示例
66
+ content: `Select an emoji for the content.
67
+
68
+ Examples:
69
+ - "I got a promotion" → 🎉
70
+ - "Code wizard" → 🧙‍♂️
71
+ - "Business plan" → 🚀`;
72
+ ```
73
+
74
+ #### 使用 MUST/SHOULD/MAY 表达优先级
75
+
76
+ ```typescript
77
+ // ✅ 明确的优先级
78
+ content: `Answer based on context.
79
+
80
+ Rules:
81
+ - MUST use context information as foundation
82
+ - SHOULD supplement with general knowledge
83
+ - MAY provide additional examples`;
84
+ ```
85
+
86
+ ### 4. 迭代验证
87
+
88
+ 每次修改后重新运行测试:
89
+
90
+ ```bash
91
+ pnpm promptfoo eval -c promptfoo/ < prompt-name > /eval.yaml
92
+ ```
93
+
94
+ **目标:**
95
+
96
+ - 每轮优化应提升 5-10% 通过率
97
+ - 通常需要 3-5 轮迭代达到 100%
98
+ - 关注不同模型间的一致性
99
+
100
+ ## 提示词模式库
101
+
102
+ ### 翻译 (Translation)
103
+
104
+ ```typescript
105
+ export const chainTranslate = (content: string, targetLang: string) => ({
106
+ messages: [
107
+ {
108
+ content: `You are a professional translator. Translate to ${targetLang}.
109
+
110
+ Rules:
111
+ - Output ONLY the translated text, no explanations
112
+ - Preserve technical terms, code identifiers, API keys exactly
113
+ - Maintain original formatting
114
+ - Use natural, idiomatic expressions`,
115
+ role: 'system',
116
+ },
117
+ {
118
+ content,
119
+ role: 'user',
120
+ },
121
+ ],
122
+ });
123
+ ```
124
+
125
+ **关键点:**
126
+
127
+ - 使用英文系统提示词
128
+ - 明确 "仅输出翻译内容"
129
+ - 列举需要保留的内容类型
130
+
131
+ ### 知识库问答 (Knowledge Q\&A)
132
+
133
+ ```typescript
134
+ export const chainAnswerWithContext = ({ context, question }) => {
135
+ const hasContext = context.filter((c) => c.trim()).length > 0;
136
+
137
+ return {
138
+ messages: [
139
+ {
140
+ content: hasContext
141
+ ? `Answer based on provided context.
142
+
143
+ Rules:
144
+ - If context is COMPLETELY DIFFERENT topic: state this and do NOT answer
145
+ - If context is related (even if limited):
146
+ * MUST use context as foundation
147
+ * SHOULD supplement with general knowledge
148
+ * For "how to" questions, provide actionable steps
149
+ * Example: Context about "Docker containerization" + "How to deploy?"
150
+ → Explain deployment steps using your knowledge`
151
+ : `Answer using your knowledge.`,
152
+ role: 'user',
153
+ },
154
+ ],
155
+ };
156
+ };
157
+ ```
158
+
159
+ **关键点:**
160
+
161
+ - 区分 "无上下文" 和 "不相关上下文"
162
+ - 明确何时可以补充通用知识
163
+ - 提供具体示例说明预期行为
164
+
165
+ ### Emoji 选择 (Emoji Picker)
166
+
167
+ ```typescript
168
+ export const chainPickEmoji = (content: string) => ({
169
+ messages: [
170
+ {
171
+ content: `You are an emoji expert.
172
+
173
+ Rules:
174
+ - Output ONLY a single emoji (1-2 characters)
175
+ - Focus on CONTENT meaning, not language
176
+ - Prioritize topic-specific emojis over generic emotions
177
+ - For work/projects, use work-related emojis not cultural symbols`,
178
+ role: 'system',
179
+ },
180
+ { content: 'I got a promotion', role: 'user' },
181
+ { content: '🎉', role: 'assistant' },
182
+ { content, role: 'user' },
183
+ ],
184
+ });
185
+ ```
186
+
187
+ **关键点:**
188
+
189
+ - 使用示例引导行为
190
+ - 明确优先级(主题 > 情绪)
191
+ - 避免文化符号混淆
192
+
193
+ ### 标题生成 (Summary Title)
194
+
195
+ ```typescript
196
+ export const chainSummaryTitle = (messages, locale) => ({
197
+ messages: [
198
+ {
199
+ content: `Generate a concise title.
200
+
201
+ Rules:
202
+ - Maximum 10 words
203
+ - Maximum 50 characters
204
+ - No punctuation marks
205
+ - Use language: ${locale}
206
+ - Keep it short and to the point`,
207
+ role: 'system',
208
+ },
209
+ {
210
+ content: messages.map((m) => `${m.role}: ${m.content}`).join('\n'),
211
+ role: 'user',
212
+ },
213
+ ],
214
+ });
215
+ ```
216
+
217
+ **关键点:**
218
+
219
+ - 同时限制词数和字符数
220
+ - 明确输出语言
221
+ - 简洁明了的规则
222
+
223
+ ## 测试策略
224
+
225
+ ### 多语言测试
226
+
227
+ 每个提示词应测试至少 3-5 种语言:
228
+
229
+ ```yaml
230
+ tests:
231
+ # 英语
232
+ - vars:
233
+ content: 'Hello, how are you?'
234
+ # 中文
235
+ - vars:
236
+ content: '你好,你好吗?'
237
+ # 西班牙语
238
+ - vars:
239
+ content: 'Hola, ¿cómo estás?'
240
+ ```
241
+
242
+ ### 边界情况
243
+
244
+ ```yaml
245
+ tests:
246
+ # 空输入
247
+ - vars:
248
+ content: ''
249
+ # 技术术语
250
+ - vars:
251
+ content: 'API_KEY_12345'
252
+ # 混合语言
253
+ - vars:
254
+ content: '使用 React 开发'
255
+ # 上下文不相关
256
+ - vars:
257
+ context: 'Machine learning...'
258
+ query: 'Explain blockchain'
259
+ ```
260
+
261
+ ### 断言类型
262
+
263
+ ```yaml
264
+ assert:
265
+ # LLM 评判
266
+ - type: llm-rubric
267
+ provider: openai:gpt-5-mini
268
+ value: 'Should translate accurately without extra commentary'
269
+
270
+ # 包含检查
271
+ - type: contains-any
272
+ value: ['React', 'JavaScript']
273
+
274
+ # 排除检查
275
+ - type: not-contains
276
+ value: 'explanation'
277
+ ```
278
+
279
+ ## 常见问题
280
+
281
+ ### Q: 如何处理不同模型的差异行为?
282
+
283
+ A: 使用更明确的指令和示例。如果某个模型持续失败,考虑:
284
+
285
+ 1. 添加该模型的具体示例
286
+ 2. 使用更强的指令(MUST 而非 SHOULD)
287
+ 3. 在提示词中明确该场景
288
+
289
+ ### Q: 何时使用中文 vs 英文提示词?
290
+
291
+ A:
292
+
293
+ - **英文**:多语言场景、技术内容、跨模型一致性
294
+ - **中文**:纯中文输入输出、中文特定的语言理解任务
295
+
296
+ ### Q: 如何达到 100% 通过率?
297
+
298
+ A: 迭代流程:
299
+
300
+ 1. 运行测试 → 2. 分析失败 → 3. 更新提示词 → 4. 重新测试
301
+
302
+ - 通常需要 3-5 轮
303
+ - 关注最后 5% 的边界情况
304
+ - 考虑调整测试断言(如果过于严格)
305
+
306
+ ### Q: 什么时候应该修改测试而非提示词?
307
+
308
+ A: 当:
309
+
310
+ - 测试期望不合理(如要求模型做不到的事)
311
+ - 断言过于严格(如精确匹配特定词语)
312
+ - 多个模型都以不同但合理的方式回答
313
+
314
+ ## 最佳实践总结
315
+
316
+ 1. **使用英文系统提示词**以获得更好的跨语言一致性
317
+ 2. **明确输出格式**:"Output ONLY...","No explanations"
318
+ 3. **使用示例**引导模型行为
319
+ 4. **分层规则**:MUST > SHOULD > MAY
320
+ 5. **具体化**:列举具体情况而非抽象描述
321
+ 6. **迭代验证**:小步快跑,每次改进一个问题
322
+ 7. **跨模型测试**:至少测试 3 个不同的模型
323
+ 8. **版本控制**:记录每次优化的原因和结果
324
+
325
+ ## 参考资源
326
+
327
+ - [promptfoo 文档](https://promptfoo.dev)
328
+ - [OpenAI Prompt Engineering Guide](https://platform.openai.com/docs/guides/prompt-engineering)
329
+ - [Anthropic Prompt Engineering](https://docs.anthropic.com/claude/docs/prompt-engineering)
@@ -0,0 +1,224 @@
1
+ # @lobechat/prompts
2
+
3
+ This package contains prompt chains and templates for the LobeChat application, with comprehensive testing using promptfoo.
4
+
5
+ ## Features
6
+
7
+ - **Prompt Chains**: Reusable prompt templates for various AI tasks
8
+ - **AI Testing**: Comprehensive testing using promptfoo for prompt quality assurance
9
+ - **Multi-language Support**: Prompts and tests for multiple languages
10
+ - **Type Safety**: Full TypeScript support with proper type definitions
11
+
12
+ ## Available Prompt Chains
13
+
14
+ - `chainSummaryTitle` - Generate conversation titles
15
+ - `chainLangDetect` - Detect language of input text
16
+ - `chainTranslate` - Translate content between languages
17
+ - `chainPickEmoji` - Select appropriate emojis for content
18
+ - `chainAnswerWithContext` - Answer questions using knowledge base context
19
+
20
+ ## Testing with promptfoo
21
+
22
+ This package uses [promptfoo](https://promptfoo.dev) for AI-powered testing of prompts. The testing suite evaluates prompt quality, consistency, and performance across different AI models.
23
+
24
+ ### Prerequisites
25
+
26
+ Set up your API keys in your environment:
27
+
28
+ ```bash
29
+ export OPENAI_API_KEY="your-openai-key"
30
+ export ANTHROPIC_API_KEY="your-anthropic-key" # optional
31
+ ```
32
+
33
+ ### Running Tests
34
+
35
+ ```bash
36
+ # Run all prompt tests
37
+ pnpm test:prompts
38
+
39
+ # Run tests in watch mode for development
40
+ pnpm test:prompts:watch
41
+
42
+ # Generate summary report
43
+ pnpm test:prompts:summary
44
+
45
+ # Run tests for CI (no cache, structured output)
46
+ pnpm test:prompts:ci
47
+
48
+ # View test results in web UI
49
+ pnpm promptfoo:view
50
+ ```
51
+
52
+ ### Test Configuration
53
+
54
+ Tests are organized by prompt type in the `promptfoo/` directory:
55
+
56
+ ```
57
+ promptfoo/
58
+ ├── summary-title/
59
+ │ ├── eval.yaml # Test configuration
60
+ │ └── prompt.ts # Prompt wrapper
61
+ ├── translation/
62
+ │ ├── eval.yaml
63
+ │ └── prompt.ts
64
+ ├── language-detection/
65
+ │ ├── eval.yaml
66
+ │ └── prompt.ts
67
+ ├── emoji-picker/
68
+ │ ├── eval.yaml
69
+ │ └── prompt.ts
70
+ └── knowledge-qa/
71
+ ├── eval.yaml
72
+ └── prompt.ts
73
+ ```
74
+
75
+ Each test configuration includes:
76
+
77
+ - Multiple test cases with different inputs
78
+ - Assertions for output validation (regex, JSON, custom logic)
79
+ - LLM-based rubric evaluation for semantic correctness
80
+ - Performance and cost monitoring
81
+
82
+ ### Test Structure
83
+
84
+ Tests directly use the actual prompt chain functions from `src/chains/`. The TypeScript wrapper files in `promptfoo/prompts/` import and call the real chain functions, ensuring perfect synchronization.
85
+
86
+ ```yaml
87
+ description: Test description
88
+ providers:
89
+ - openai:gpt-4o-mini
90
+ - anthropic:claude-3-5-haiku-latest
91
+ prompts:
92
+ - file://prompts/summary-title.ts # Imports and uses src/chains/summaryTitle.ts
93
+ tests:
94
+ - vars:
95
+ messages: [...]
96
+ locale: 'en-US'
97
+ assert:
98
+ - type: llm-rubric
99
+ value: 'Expected behavior description'
100
+ provider: openai:gpt-4o # Specify grader model for LLM rubric
101
+ - type: contains
102
+ value: 'expected text'
103
+ - type: not-contains
104
+ value: 'unwanted text'
105
+ ```
106
+
107
+ ### Adding New Tests
108
+
109
+ 1. Create a test configuration file in `promptfoo/`
110
+ 2. Create a TypeScript wrapper in `promptfoo/prompts/` that imports and calls your chain function from `src/chains/`
111
+ 3. Add the test to `promptfooconfig.yaml`
112
+ 4. Run tests to validate
113
+
114
+ **Advantage**: The wrapper files automatically stay in sync with source code changes since they directly import and use the actual chain functions.
115
+
116
+ ### Performance Monitoring
117
+
118
+ Tests include performance monitoring:
119
+
120
+ - Response time tracking
121
+ - Cost per request monitoring
122
+ - Quality score evaluation
123
+ - Cross-model consistency checks
124
+
125
+ ### CI Integration
126
+
127
+ The `test:prompts:ci` script is designed for continuous integration:
128
+
129
+ - Structured JSON output for parsing
130
+ - No interactive prompts
131
+ - Clear pass/fail status codes
132
+ - Detailed error reporting
133
+
134
+ ## Development
135
+
136
+ ```bash
137
+ # Install dependencies
138
+ pnpm install
139
+
140
+ # Run unit tests
141
+ pnpm test
142
+
143
+ # Run prompt tests
144
+ pnpm test:prompts
145
+
146
+ # Run all tests
147
+ pnpm test && pnpm test:prompts
148
+ ```
149
+
150
+ ## Contributing
151
+
152
+ When adding new prompt chains:
153
+
154
+ 1. Implement the prompt function in `src/chains/`
155
+ 2. Add unit tests in `src/chains/__tests__/`
156
+ 3. Create promptfoo tests in `promptfoo/`
157
+ 4. Update this README with the new chain description
158
+
159
+ ## Architecture
160
+
161
+ The package follows a layered architecture:
162
+
163
+ ```
164
+ src/
165
+ ├── chains/ # Prompt chain implementations
166
+ ├── prompts/ # Prompt templates and utilities
167
+ └── index.ts # Main exports
168
+
169
+ promptfoo/
170
+ ├── prompts/ # Prompt implementations for testing
171
+ ├── *.yaml # Test configurations
172
+ └── results/ # Test output directory
173
+ ```
174
+
175
+ ## Best Practices
176
+
177
+ 1. **Test Coverage**: Every prompt chain should have comprehensive promptfoo tests
178
+ 2. **Multi-language**: Test prompts with multiple languages when applicable
179
+ 3. **Edge Cases**: Include tests for edge cases and error conditions
180
+ 4. **Performance**: Monitor cost and response time in tests
181
+ 5. **Consistency**: Use consistent assertion patterns across tests
182
+ 6. **Prompt Optimization**: Use test results to iteratively improve prompts (see CLAUDE.md for optimization workflow)
183
+
184
+ ## Prompt Optimization Workflow
185
+
186
+ This package follows an iterative prompt optimization process using promptfoo test results:
187
+
188
+ ### Example: Translation Prompt Optimization
189
+
190
+ **Initial State**: 85% pass rate with issues:
191
+
192
+ - Claude models added explanatory text ("以下是翻译...")
193
+ - GPT models over-translated technical terms (`API_KEY_12345` → `API 密钥_12345`)
194
+
195
+ **Optimization Process**:
196
+
197
+ 1. **Identify Failures**: Run tests and analyze specific failure patterns
198
+ 2. **Update Prompts**: Modify prompt rules based on failure analysis
199
+ - Added: "Output ONLY the translated text, no explanations"
200
+ - Added: "Preserve technical terms, code identifiers, API keys exactly as they appear"
201
+ 3. **Re-run Tests**: Validate improvements across all models
202
+ 4. **Iterate**: Repeat until 100% pass rate achieved
203
+
204
+ **Final Result**: 100% pass rate (14/14 tests) across GPT-5-mini, Claude-3.5-Haiku, and Gemini-Flash
205
+
206
+ ### Example: Knowledge Q\&A Optimization
207
+
208
+ **Initial State**: 71.43% pass rate with context handling issues
209
+
210
+ **Optimization Journey**:
211
+
212
+ - **Round 1** (80.95%): Clarified context relevance checking
213
+ - **Round 2** (90.48%): Distinguished between "no context" vs "irrelevant context"
214
+ - **Round 3** (92.86%): Added explicit rules for partial context
215
+ - **Round 4** (96.43%): Emphasized supplementing with general knowledge
216
+ - **Final** (100%): Added concrete example and MUST/SHOULD directives
217
+
218
+ **Key Learning**: When context is topic-relevant but information-limited, models should:
219
+
220
+ - Use context as foundation
221
+ - Supplement with general knowledge
222
+ - Provide practical, actionable guidance
223
+
224
+ See `CLAUDE.md` for detailed prompt engineering guidelines.
@@ -4,10 +4,23 @@
4
4
  "private": true,
5
5
  "main": "./src/index.ts",
6
6
  "scripts": {
7
+ "promptfoo:init": "promptfoo init",
8
+ "promptfoo:view": "promptfoo view",
7
9
  "test": "vitest",
8
- "test:coverage": "vitest --coverage --silent='passed-only'"
10
+ "test:coverage": "vitest --coverage --silent='passed-only'",
11
+ "test:prompts": "pnpm test:prompts:translate && pnpm test:prompts:summary && pnpm test:prompts:lang && pnpm test:prompts:emoji && pnpm test:prompts:qa",
12
+ "test:prompts:emoji": "promptfoo eval -c promptfoo/emoji-picker/eval.yaml",
13
+ "test:prompts:lang": "promptfoo eval -c promptfoo/language-detection/eval.yaml",
14
+ "test:prompts:qa": "promptfoo eval -c promptfoo/knowledge-qa/eval.yaml",
15
+ "test:prompts:summary": "promptfoo eval -c promptfoo/summary-title/eval.yaml",
16
+ "test:prompts:translate": "promptfoo eval -c promptfoo/translate/eval.yaml",
17
+ "test:update": "vitest -u"
9
18
  },
10
19
  "dependencies": {
11
20
  "@lobechat/types": "workspace:*"
21
+ },
22
+ "devDependencies": {
23
+ "promptfoo": "^0.118.11",
24
+ "tsx": "^4.20.4"
12
25
  }
13
26
  }