@proofhound/optimization-strategy 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE +190 -0
  2. package/dist/error-pattern-analysis/analysis-types.d.ts +45 -0
  3. package/dist/error-pattern-analysis/analysis-types.d.ts.map +1 -0
  4. package/dist/error-pattern-analysis/analysis-types.js +3 -0
  5. package/dist/error-pattern-analysis/analysis-types.js.map +1 -0
  6. package/dist/error-pattern-analysis/analyze.d.ts +81 -0
  7. package/dist/error-pattern-analysis/analyze.d.ts.map +1 -0
  8. package/dist/error-pattern-analysis/analyze.js +423 -0
  9. package/dist/error-pattern-analysis/analyze.js.map +1 -0
  10. package/dist/error-pattern-analysis/config.schema.d.ts +16 -0
  11. package/dist/error-pattern-analysis/config.schema.d.ts.map +1 -0
  12. package/dist/error-pattern-analysis/config.schema.js +26 -0
  13. package/dist/error-pattern-analysis/config.schema.js.map +1 -0
  14. package/dist/error-pattern-analysis/confusion-pairs.d.ts +37 -0
  15. package/dist/error-pattern-analysis/confusion-pairs.d.ts.map +1 -0
  16. package/dist/error-pattern-analysis/confusion-pairs.js +109 -0
  17. package/dist/error-pattern-analysis/confusion-pairs.js.map +1 -0
  18. package/dist/error-pattern-analysis/generate-initial.d.ts +36 -0
  19. package/dist/error-pattern-analysis/generate-initial.d.ts.map +1 -0
  20. package/dist/error-pattern-analysis/generate-initial.js +261 -0
  21. package/dist/error-pattern-analysis/generate-initial.js.map +1 -0
  22. package/dist/error-pattern-analysis/generate.d.ts +57 -0
  23. package/dist/error-pattern-analysis/generate.d.ts.map +1 -0
  24. package/dist/error-pattern-analysis/generate.js +369 -0
  25. package/dist/error-pattern-analysis/generate.js.map +1 -0
  26. package/dist/error-pattern-analysis/index.d.ts +8 -0
  27. package/dist/error-pattern-analysis/index.d.ts.map +1 -0
  28. package/dist/error-pattern-analysis/index.js +29 -0
  29. package/dist/error-pattern-analysis/index.js.map +1 -0
  30. package/dist/error-pattern-analysis/parse.d.ts +92 -0
  31. package/dist/error-pattern-analysis/parse.d.ts.map +1 -0
  32. package/dist/error-pattern-analysis/parse.js +456 -0
  33. package/dist/error-pattern-analysis/parse.js.map +1 -0
  34. package/dist/error-pattern-analysis/prompts/analyze-confusion.system.en-US.md +50 -0
  35. package/dist/error-pattern-analysis/prompts/analyze-confusion.system.md +61 -0
  36. package/dist/error-pattern-analysis/prompts/analyze-regression.system.en-US.md +50 -0
  37. package/dist/error-pattern-analysis/prompts/analyze-regression.system.md +61 -0
  38. package/dist/error-pattern-analysis/prompts/generate-initial.system.en-US.md +43 -0
  39. package/dist/error-pattern-analysis/prompts/generate-initial.system.md +49 -0
  40. package/dist/error-pattern-analysis/prompts/generate.system.en-US.md +53 -0
  41. package/dist/error-pattern-analysis/prompts/generate.system.md +68 -0
  42. package/dist/error-pattern-analysis/prompts/loader.d.ts +46 -0
  43. package/dist/error-pattern-analysis/prompts/loader.d.ts.map +1 -0
  44. package/dist/error-pattern-analysis/prompts/loader.js +109 -0
  45. package/dist/error-pattern-analysis/prompts/loader.js.map +1 -0
  46. package/dist/error-pattern-analysis/prompts/optimization-tips.en-US.md +25 -0
  47. package/dist/error-pattern-analysis/prompts/optimization-tips.md +38 -0
  48. package/dist/error-pattern-analysis/prompts/summarize.system.en-US.md +48 -0
  49. package/dist/error-pattern-analysis/prompts/summarize.system.md +69 -0
  50. package/dist/error-pattern-analysis/prompts.d.ts +79 -0
  51. package/dist/error-pattern-analysis/prompts.d.ts.map +1 -0
  52. package/dist/error-pattern-analysis/prompts.js +659 -0
  53. package/dist/error-pattern-analysis/prompts.js.map +1 -0
  54. package/dist/error-pattern-analysis/token-budget.d.ts +20 -0
  55. package/dist/error-pattern-analysis/token-budget.d.ts.map +1 -0
  56. package/dist/error-pattern-analysis/token-budget.js +88 -0
  57. package/dist/error-pattern-analysis/token-budget.js.map +1 -0
  58. package/dist/index.d.ts +9 -0
  59. package/dist/index.d.ts.map +1 -0
  60. package/dist/index.js +27 -0
  61. package/dist/index.js.map +1 -0
  62. package/dist/loop/best.d.ts +3 -0
  63. package/dist/loop/best.d.ts.map +1 -0
  64. package/dist/loop/best.js +43 -0
  65. package/dist/loop/best.js.map +1 -0
  66. package/dist/loop/goals.d.ts +6 -0
  67. package/dist/loop/goals.d.ts.map +1 -0
  68. package/dist/loop/goals.js +38 -0
  69. package/dist/loop/goals.js.map +1 -0
  70. package/dist/loop/round-outcome.d.ts +14 -0
  71. package/dist/loop/round-outcome.d.ts.map +1 -0
  72. package/dist/loop/round-outcome.js +18 -0
  73. package/dist/loop/round-outcome.js.map +1 -0
  74. package/dist/loop/run-iteration-loop.d.ts +5 -0
  75. package/dist/loop/run-iteration-loop.d.ts.map +1 -0
  76. package/dist/loop/run-iteration-loop.js +247 -0
  77. package/dist/loop/run-iteration-loop.js.map +1 -0
  78. package/dist/loop/types.d.ts +190 -0
  79. package/dist/loop/types.d.ts.map +1 -0
  80. package/dist/loop/types.js +13 -0
  81. package/dist/loop/types.js.map +1 -0
  82. package/dist/registry.d.ts +5 -0
  83. package/dist/registry.d.ts.map +1 -0
  84. package/dist/registry.js +19 -0
  85. package/dist/registry.js.map +1 -0
  86. package/dist/types.d.ts +10 -0
  87. package/dist/types.d.ts.map +1 -0
  88. package/dist/types.js +3 -0
  89. package/dist/types.js.map +1 -0
  90. package/package.json +52 -0
@@ -0,0 +1,659 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getSystemPrompts = exports.getOptimizationTipNames = exports.SYSTEM_PROMPTS_EN = exports.SYSTEM_PROMPTS = exports.SUMMARIZE_SYSTEM_PROMPT = exports.PROMPT_FILES = exports.OPTIMIZATION_TIP_NAMES_EN = exports.OPTIMIZATION_TIP_NAMES = exports.OPTIMIZATION_TIPS = exports.GENERATE_SYSTEM_PROMPT = exports.ANALYZE_REGRESSION_SYSTEM_PROMPT = exports.ANALYZE_CONFUSION_SYSTEM_PROMPT = void 0;
4
+ exports.formatRoundHistory = formatRoundHistory;
5
+ exports.formatToolboxSwitchHint = formatToolboxSwitchHint;
6
+ exports.fitRoundHistoryToBudget = fitRoundHistoryToBudget;
7
+ exports.buildAnalyzeConfusionMessages = buildAnalyzeConfusionMessages;
8
+ exports.buildAnalyzeRegressionMessages = buildAnalyzeRegressionMessages;
9
+ exports.buildSummarizeMessages = buildSummarizeMessages;
10
+ exports.buildGenerateMessages = buildGenerateMessages;
11
+ exports.extractVariableNames = extractVariableNames;
12
+ // LLM prompt construction — system prompts all come from prompts/*.md (loaded by prompts/loader.ts)
13
+ // This file is only responsible for assembling the user prompt (including dynamic variable substitution).
14
+ const goals_1 = require("../loop/goals");
15
+ const shared_1 = require("@proofhound/shared");
16
+ const loader_1 = require("./prompts/loader");
17
+ const token_budget_1 = require("./token-budget");
18
+ // Re-export system prompts + the SYSTEM_PROMPTS map (keeps backward-compatible API)
19
+ var loader_2 = require("./prompts/loader");
20
+ Object.defineProperty(exports, "ANALYZE_CONFUSION_SYSTEM_PROMPT", { enumerable: true, get: function () { return loader_2.ANALYZE_CONFUSION_SYSTEM_PROMPT; } });
21
+ Object.defineProperty(exports, "ANALYZE_REGRESSION_SYSTEM_PROMPT", { enumerable: true, get: function () { return loader_2.ANALYZE_REGRESSION_SYSTEM_PROMPT; } });
22
+ Object.defineProperty(exports, "GENERATE_SYSTEM_PROMPT", { enumerable: true, get: function () { return loader_2.GENERATE_SYSTEM_PROMPT; } });
23
+ Object.defineProperty(exports, "OPTIMIZATION_TIPS", { enumerable: true, get: function () { return loader_2.OPTIMIZATION_TIPS; } });
24
+ Object.defineProperty(exports, "OPTIMIZATION_TIP_NAMES", { enumerable: true, get: function () { return loader_2.OPTIMIZATION_TIP_NAMES; } });
25
+ Object.defineProperty(exports, "OPTIMIZATION_TIP_NAMES_EN", { enumerable: true, get: function () { return loader_2.OPTIMIZATION_TIP_NAMES_EN; } });
26
+ Object.defineProperty(exports, "PROMPT_FILES", { enumerable: true, get: function () { return loader_2.PROMPT_FILES; } });
27
+ Object.defineProperty(exports, "SUMMARIZE_SYSTEM_PROMPT", { enumerable: true, get: function () { return loader_2.SUMMARIZE_SYSTEM_PROMPT; } });
28
+ Object.defineProperty(exports, "SYSTEM_PROMPTS", { enumerable: true, get: function () { return loader_2.SYSTEM_PROMPTS; } });
29
+ Object.defineProperty(exports, "SYSTEM_PROMPTS_EN", { enumerable: true, get: function () { return loader_2.SYSTEM_PROMPTS_EN; } });
30
+ Object.defineProperty(exports, "getOptimizationTipNames", { enumerable: true, get: function () { return loader_2.getOptimizationTipNames; } });
31
+ Object.defineProperty(exports, "getSystemPrompts", { enumerable: true, get: function () { return loader_2.getSystemPrompts; } });
32
+ // =========================
33
+ // Common fragments
34
+ // =========================
35
+ function scopeLabel(scope, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
36
+ if (language === 'en-US')
37
+ return scope.kind === 'overall' ? 'overall' : `class "${scope.label}"`;
38
+ return scope.kind === 'overall' ? '整体' : `分类「${scope.label}」`;
39
+ }
40
+ function fmtNum(n, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
41
+ if (n === null || n === undefined || Number.isNaN(n))
42
+ return language === 'en-US' ? '(missing)' : '(缺失)';
43
+ return n.toFixed(4);
44
+ }
45
+ function signedGap(observed, target, op) {
46
+ // Normalize the gap by op direction: positive = exceeded the goal; negative = how much more is needed
47
+ // >= / > : gap = observed - target (larger is better)
48
+ // <= : gap = target - observed (smaller is better; displayed as "how much further it needs to drop")
49
+ const delta = op === '<=' ? target - observed : observed - target;
50
+ const sign = delta >= 0 ? '+' : '';
51
+ return `${sign}${delta.toFixed(4)}`;
52
+ }
53
+ // "Optimization goal vs current actual" comparison table — display goals and actual values side by side
54
+ function formatGoalsWithProgress(goals, metrics, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
55
+ if (goals.length === 0)
56
+ return language === 'en-US' ? '(no optimization goals declared)' : '(未声明优化目标)';
57
+ return goals
58
+ .map((g) => {
59
+ const observed = (0, goals_1.readMetric)(metrics, g);
60
+ const achieved = observed !== null && (0, goals_1.compare)(observed, g.op, g.value);
61
+ const gapStr = observed === null ? '?' : signedGap(observed, g.value, g.op);
62
+ const status = achieved
63
+ ? language === 'en-US'
64
+ ? 'achieved'
65
+ : '✅ 已达成'
66
+ : language === 'en-US'
67
+ ? 'not achieved'
68
+ : '❌ 未达成';
69
+ if (language === 'en-US') {
70
+ return `- \`${g.metric}\` for ${scopeLabel(g.scope, language)}: target \`${g.op} ${g.value}\`; observed \`${fmtNum(observed, language)}\`; gap \`${gapStr}\`; ${status}`;
71
+ }
72
+ return `- ${scopeLabel(g.scope, language)} 的 \`${g.metric}\`:目标 \`${g.op} ${g.value}\`;当前实际 \`${fmtNum(observed, language)}\`;差距 \`${gapStr}\`;${status}`;
73
+ })
74
+ .join('\n');
75
+ }
76
+ // "Full metrics for the scope of interest" — only display all metrics within the scopes the goals cover (overall / specific classes),
77
+ // classes not covered are not displayed (saves tokens + lets the LLM focus)
78
+ function formatRelevantMetrics(goals, metrics, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
79
+ if (goals.length === 0) {
80
+ // When there are no goals, degrade to displaying overall (avoiding a completely empty user prompt)
81
+ return [
82
+ language === 'en-US' ? '### Overall' : '### 整体',
83
+ '```json',
84
+ JSON.stringify(metrics.overall, null, 2),
85
+ '```',
86
+ ].join('\n');
87
+ }
88
+ const involvedClasses = new Set();
89
+ let needsOverall = false;
90
+ for (const g of goals) {
91
+ if (g.scope.kind === 'overall')
92
+ needsOverall = true;
93
+ else
94
+ involvedClasses.add(g.scope.label);
95
+ }
96
+ const sections = [];
97
+ if (needsOverall) {
98
+ sections.push(language === 'en-US' ? '### Overall' : '### 整体', '```json', JSON.stringify(metrics.overall ?? {}, null, 2), '```');
99
+ }
100
+ for (const cls of involvedClasses) {
101
+ const slice = metrics.perClass?.[cls];
102
+ sections.push(language === 'en-US' ? `### Class "${cls}"` : `### 分类「${cls}」`, '```json', JSON.stringify(slice ?? {}, null, 2), '```');
103
+ }
104
+ return sections.join('\n');
105
+ }
106
+ function formatVariableList(vars, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
107
+ if (vars.length === 0) {
108
+ return language === 'en-US'
109
+ ? '(the current prompt references no dataset field variables)'
110
+ : '(当前 prompt 不引用任何数据集字段变量)';
111
+ }
112
+ return vars.map((v) => `- \`{{${v}}}\``).join('\n');
113
+ }
114
+ function formatAnalysisOnlyFields(fields, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
115
+ if (!fields || fields.length === 0)
116
+ return language === 'en-US' ? '(none)' : '(无)';
117
+ return fields.map((f) => `- \`${f}\``).join('\n');
118
+ }
119
+ // =========================
120
+ // Cross-round history section rendering — see docs/specs/25-optimizations.md §11.3 "cross-round history injection"
121
+ // Primary metric taken from goals[0] (consistent with the deltaFromPrev calculation); when there are no goals, degrade to metrics.overall.accuracy
122
+ // =========================
123
+ function fmtDelta(d) {
124
+ if (d === null)
125
+ return 'Δ -- ';
126
+ const sign = d >= 0 ? '+' : '';
127
+ return `Δ ${sign}${d.toFixed(4)}`;
128
+ }
129
+ function readPrimaryMetric(metrics, goals) {
130
+ const primary = goals[0];
131
+ if (primary) {
132
+ const v = (0, goals_1.readMetric)(metrics, primary);
133
+ return v;
134
+ }
135
+ const v = metrics.overall?.accuracy;
136
+ return typeof v === 'number' ? v : null;
137
+ }
138
+ function primaryMetricName(goals) {
139
+ return goals[0]?.metric ?? 'accuracy';
140
+ }
141
+ function formatRoundHistory(history, goals, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
142
+ if (history.length === 0)
143
+ return '';
144
+ const metricName = primaryMetricName(goals);
145
+ const head = language === 'en-US'
146
+ ? [
147
+ `## Optimization History (${history.length} rounds, chronological; use it to avoid repeated failed directions)`,
148
+ `> Reading guide: Δ is the change in the primary metric (${metricName}) from the previous round; ★ marks the current best round.`,
149
+ '',
150
+ ]
151
+ : [
152
+ `## 历史优化轨迹(共 ${history.length} 轮,按时间正序;用于避免重复无效尝试)`,
153
+ `> 解读:Δ 是与上一轮主指标 (${metricName}) 的差值;★ 标记当前 best 轮;★+ 表示该 best 同时被采用为下一轮 base`,
154
+ '',
155
+ ];
156
+ const lines = history.map((entry) => {
157
+ const primary = readPrimaryMetric(entry.metrics, goals);
158
+ const primaryStr = primary === null ? (language === 'en-US' ? '(missing)' : '(缺失)') : primary.toFixed(4);
159
+ const bestMark = entry.isBest ? '★' : '';
160
+ const changeIds = entry.appliedChanges.map((c) => c.changeId).filter(Boolean);
161
+ const changeIdsStr = changeIds.length > 0 ? `[${changeIds.join(', ')}]` : language === 'en-US' ? '(none)' : '(无)';
162
+ const summary = entry.changeSummary.trim().length > 0
163
+ ? entry.changeSummary.trim()
164
+ : language === 'en-US'
165
+ ? '(not provided)'
166
+ : '(未提供)';
167
+ const tips = entry.appliedTips.filter((t) => t.trim().length > 0);
168
+ const tipsStr = tips.length > 0 ? `[${tips.join(', ')}]` : language === 'en-US' ? '(not declared)' : '(未声明)';
169
+ const prefix = language === 'en-US'
170
+ ? `- Round ${entry.roundIndex} (${fmtDelta(entry.deltaFromPrev)}) ${bestMark} ${metricName} ${primaryStr}`
171
+ : `- 第 ${entry.roundIndex} 轮 (${fmtDelta(entry.deltaFromPrev)}) ${bestMark} ${metricName} ${primaryStr}`;
172
+ return [
173
+ prefix,
174
+ ` - changeSummary: ${summary}`,
175
+ ` - appliedChanges: ${changeIdsStr}`,
176
+ ` - appliedTips: ${tipsStr}`,
177
+ ].join('\n');
178
+ });
179
+ return [...head, ...lines].join('\n');
180
+ }
181
+ // Toolbox-rotation-hint section rendering — injected by the caller into the generate user prompt when !isBest for ≥ 2 consecutive rounds.
182
+ // A soft hint: list already-tried techniques + the full toolbox + suggested wording; does not force the LLM to switch.
183
+ // See docs/specs/25 §11.3 "toolbox rotation hint"
184
+ function formatToolboxSwitchHint(recentlyUsedTips, allTipNames, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
185
+ const usedSet = new Set(recentlyUsedTips.filter((t) => t.trim().length > 0));
186
+ const usedList = usedSet.size > 0
187
+ ? Array.from(usedSet)
188
+ .map((t) => `\`${t}\``)
189
+ .join(', ')
190
+ : language === 'en-US'
191
+ ? '(no historical technique names recognized)'
192
+ : '(无法识别历史技巧名)';
193
+ const toolboxList = allTipNames.map((t) => `\`${t}\``).join(', ');
194
+ if (language === 'en-US') {
195
+ return [
196
+ '## Toolbox Rotation Hint',
197
+ '> The last 2 new versions did not improve the historical best metric. Prefer techniques not already tried.',
198
+ '',
199
+ `- Techniques tried in the last 2 rounds (appliedTips): ${usedList}`,
200
+ `- All toolbox techniques: ${toolboxList}`,
201
+ "- Suggestion: include at least one toolbox item outside the tried list in this round's `appliedTips`, unless the evidence bundle clearly requires staying with the same direction.",
202
+ ].join('\n');
203
+ }
204
+ return [
205
+ '## 工具箱轮换提示',
206
+ '> 近 2 轮的新版本均未刷新历史最佳指标,可能在某种技巧组合上原地转圈。请优先尝试**未使用过**的优化技巧。',
207
+ '',
208
+ `- 近 2 轮已尝试技巧(appliedTips):${usedList}`,
209
+ `- 工具箱全部技巧(见 optimization-tips.md):${toolboxList}`,
210
+ '- 建议:本轮 `appliedTips` 中至少包含一个上面"已尝试"清单外的工具箱条目;若 evidenceBundle 确无对应方向证据,可在 `changeSummary` 说明仍沿用原方向的理由。',
211
+ ].join('\n');
212
+ }
213
+ // Common helper: append the optimization-history section into the user prompt array
214
+ // Call site: after the '' separator following the goal-vs-actual section, spread ...renderRoundHistorySection(history, goals)
215
+ // Returns [formatted, ''] to keep the existing section separator style; when history is empty, returns [] to render nothing (backward compatible with first round)
216
+ function renderRoundHistorySection(history, goals, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
217
+ if (!history || history.length === 0)
218
+ return [];
219
+ return [formatRoundHistory(history, goals, language), ''];
220
+ }
221
+ // Toolbox-rotation-hint assembly — the caller spreads this after the optimization-history section in the generate user prompt
222
+ // When hint is undefined, returns [] and renders nothing (streak < 2 / first-round scenario)
223
+ function renderToolboxSwitchHintSection(hint, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
224
+ if (!hint)
225
+ return [];
226
+ return [formatToolboxSwitchHint(hint.recentlyUsedTips, hint.allTipNames, language), ''];
227
+ }
228
+ // =========================
229
+ // Cross-round history token-budget degradation — see docs/specs/25-optimizations.md §11.3
230
+ // L0 full → L1 early rounds: changeSummary truncated to 200 chars + appliedChanges only keep changeId →
231
+ // L2 early rounds: changeSummary truncated to 50 chars + appliedChanges cleared →
232
+ // L3 only the most recent 1 round contains changeSummary / appliedChanges; the rest are cleared
233
+ // Estimation calibration goes through formatRoundHistory + estimateMessagesTokens (same function as the caller probe, to avoid drift)
234
+ // =========================
235
+ const HISTORY_RECENT_KEEP = 3;
236
+ const HISTORY_L1_CHANGE_SUMMARY_CHARS = 200;
237
+ const HISTORY_L2_CHANGE_SUMMARY_CHARS = 50;
238
+ function estimateRoundHistoryTokens(history, goals, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
239
+ if (history.length === 0)
240
+ return 0;
241
+ return (0, token_budget_1.estimateMessagesTokens)('', formatRoundHistory(history, goals, language), 0).inputTokens;
242
+ }
243
+ function fitRoundHistoryToBudget(history, budgetTokens, goals, language = shared_1.DEFAULT_PROMPT_LANGUAGE) {
244
+ if (!history || history.length === 0) {
245
+ return {
246
+ fitted: history,
247
+ level: 0,
248
+ truncated: false,
249
+ entryCount: 0,
250
+ budgetTokens,
251
+ estimatedTokens: 0,
252
+ };
253
+ }
254
+ const l0Tokens = estimateRoundHistoryTokens(history, goals, language);
255
+ if (l0Tokens <= budgetTokens) {
256
+ return {
257
+ fitted: history,
258
+ level: 0,
259
+ truncated: false,
260
+ entryCount: history.length,
261
+ budgetTokens,
262
+ estimatedTokens: l0Tokens,
263
+ };
264
+ }
265
+ const earlyCount = Math.max(0, history.length - HISTORY_RECENT_KEEP);
266
+ // L1: compress changeSummary in early rounds + slim appliedChanges
267
+ const l1 = history.map((entry, i) => {
268
+ if (i >= earlyCount)
269
+ return entry;
270
+ return {
271
+ ...entry,
272
+ changeSummary: (0, token_budget_1.truncateLongText)(entry.changeSummary, HISTORY_L1_CHANGE_SUMMARY_CHARS),
273
+ appliedChanges: entry.appliedChanges.map((c) => ({ changeId: c.changeId })),
274
+ };
275
+ });
276
+ const l1Tokens = estimateRoundHistoryTokens(l1, goals, language);
277
+ if (l1Tokens <= budgetTokens) {
278
+ return {
279
+ fitted: l1,
280
+ level: 1,
281
+ truncated: true,
282
+ entryCount: l1.length,
283
+ budgetTokens,
284
+ estimatedTokens: l1Tokens,
285
+ };
286
+ }
287
+ // L2: further truncate changeSummary in early rounds + clear appliedChanges
288
+ const l2 = history.map((entry, i) => {
289
+ if (i >= earlyCount)
290
+ return entry;
291
+ return {
292
+ ...entry,
293
+ changeSummary: (0, token_budget_1.truncateLongText)(entry.changeSummary, HISTORY_L2_CHANGE_SUMMARY_CHARS),
294
+ appliedChanges: [],
295
+ };
296
+ });
297
+ const l2Tokens = estimateRoundHistoryTokens(l2, goals, language);
298
+ if (l2Tokens <= budgetTokens) {
299
+ return {
300
+ fitted: l2,
301
+ level: 2,
302
+ truncated: true,
303
+ entryCount: l2.length,
304
+ budgetTokens,
305
+ estimatedTokens: l2Tokens,
306
+ };
307
+ }
308
+ // L3: only the most recent 1 round contains changeSummary / appliedChanges; the rest are cleared (but metrics + delta are kept)
309
+ const l3 = history.map((entry, i) => {
310
+ if (i === history.length - 1)
311
+ return entry;
312
+ return { ...entry, changeSummary: '', appliedChanges: [] };
313
+ });
314
+ const l3Tokens = estimateRoundHistoryTokens(l3, goals, language);
315
+ return {
316
+ fitted: l3,
317
+ level: 3,
318
+ truncated: true,
319
+ entryCount: l3.length,
320
+ budgetTokens,
321
+ estimatedTokens: l3Tokens,
322
+ };
323
+ }
324
+ function buildAnalyzeConfusionMessages(args) {
325
+ const { pair, currentVersion, metrics, goals, fieldWhitelist, roundHistory } = args;
326
+ const language = args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE;
327
+ const system = (0, loader_1.getSystemPrompts)(language).analyzeConfusion;
328
+ const user = language === 'en-US'
329
+ ? [
330
+ `## Confusion Pair: \`${pair.expected}\` -> \`${pair.predicted}\` (${pair.count} failed samples; showing ${pair.samples.length})`,
331
+ '',
332
+ '## Current Prompt',
333
+ '```',
334
+ currentVersion.body,
335
+ '```',
336
+ '',
337
+ '## promptVariables (available, immutable)',
338
+ formatVariableList(fieldWhitelist.promptVariables, language),
339
+ '',
340
+ '## analysisOnlyFields (read-only; forbidden in the final prompt)',
341
+ formatAnalysisOnlyFields(fieldWhitelist.analysisOnlyFields, language),
342
+ '',
343
+ '## Optimization Goals vs Current Metrics',
344
+ formatGoalsWithProgress(goals, metrics, language),
345
+ '',
346
+ ...renderRoundHistorySection(roundHistory, goals, language),
347
+ '## Relevant Metrics',
348
+ formatRelevantMetrics(goals, metrics, language),
349
+ '',
350
+ `## Failed Samples (${pair.samples.length})`,
351
+ '```json',
352
+ JSON.stringify(pair.samples, null, 2),
353
+ '```',
354
+ '',
355
+ 'Output JSON according to the system instructions.',
356
+ ].join('\n')
357
+ : [
358
+ `## 本批混淆对:\`${pair.expected}\` → \`${pair.predicted}\`(共 ${pair.count} 条失败样本,本批展示 ${pair.samples.length} 条)`,
359
+ '',
360
+ '## 当前提示词全文',
361
+ '```',
362
+ currentVersion.body,
363
+ '```',
364
+ '',
365
+ '## promptVariables(可用、不可改)',
366
+ formatVariableList(fieldWhitelist.promptVariables, language),
367
+ '',
368
+ '## analysisOnlyFields(仅可阅读、严禁出现在最终 prompt 中)',
369
+ formatAnalysisOnlyFields(fieldWhitelist.analysisOnlyFields, language),
370
+ '',
371
+ '## 优化目标 vs 当前实际',
372
+ formatGoalsWithProgress(goals, metrics, language),
373
+ '',
374
+ ...renderRoundHistorySection(roundHistory, goals, language),
375
+ '## 涉及范围的完整指标(仅展示与优化目标相关的范围)',
376
+ formatRelevantMetrics(goals, metrics, language),
377
+ '',
378
+ `## 本批失败样本(共 ${pair.samples.length} 条)`,
379
+ '```json',
380
+ JSON.stringify(pair.samples, null, 2),
381
+ '```',
382
+ '',
383
+ '请按 system 指令输出 JSON。',
384
+ ].join('\n');
385
+ return { system, user };
386
+ }
387
+ function buildAnalyzeRegressionMessages(args) {
388
+ const { group, currentVersion, previousVersion, metrics, goals, fieldWhitelist, roundHistory } = args;
389
+ const language = args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE;
390
+ const system = (0, loader_1.getSystemPrompts)(language).analyzeRegression;
391
+ const previousPromptSection = language === 'en-US'
392
+ ? previousVersion
393
+ ? [
394
+ `## Previous Comparable Prompt (v${previousVersion.versionNumber}; for regression attribution only)`,
395
+ '```',
396
+ previousVersion.body,
397
+ '```',
398
+ ]
399
+ : ['## Previous Comparable Prompt', '(not provided; do not claim a specific prompt change caused regression)']
400
+ : previousVersion
401
+ ? [
402
+ `## 上一可比 prompt 模板(v${previousVersion.versionNumber};仅用于回归归因)`,
403
+ '```',
404
+ previousVersion.body,
405
+ '```',
406
+ ]
407
+ : [
408
+ '## 上一可比 prompt 模板',
409
+ '(未提供上一版 prompt / diff;不得声称某段具体改动导致回归,只能基于样本描述当前 prompt 的风险倾向。)',
410
+ ];
411
+ const user = language === 'en-US'
412
+ ? [
413
+ `## Regression Samples (predicted=\`${group.predicted}\`, count ${group.count})`,
414
+ '',
415
+ ...previousPromptSection,
416
+ '',
417
+ '## Current Prompt',
418
+ '```',
419
+ currentVersion.body,
420
+ '```',
421
+ '',
422
+ '## promptVariables (available, immutable)',
423
+ formatVariableList(fieldWhitelist.promptVariables, language),
424
+ '',
425
+ '## analysisOnlyFields (read-only; forbidden in the final prompt)',
426
+ formatAnalysisOnlyFields(fieldWhitelist.analysisOnlyFields, language),
427
+ '',
428
+ '## Optimization Goals vs Current Metrics',
429
+ formatGoalsWithProgress(goals, metrics, language),
430
+ '',
431
+ ...renderRoundHistorySection(roundHistory, goals, language),
432
+ '## Relevant Metrics',
433
+ formatRelevantMetrics(goals, metrics, language),
434
+ '',
435
+ `## Regression Samples (${group.samples.length})`,
436
+ '```json',
437
+ JSON.stringify(group.samples, null, 2),
438
+ '```',
439
+ '',
440
+ 'Output JSON according to the system instructions.',
441
+ ].join('\n')
442
+ : [
443
+ `## 本批回归样本(predicted=\`${group.predicted}\`,共 ${group.count} 条)`,
444
+ '',
445
+ ...previousPromptSection,
446
+ '',
447
+ '## 当前提示词全文',
448
+ '```',
449
+ currentVersion.body,
450
+ '```',
451
+ '',
452
+ '## promptVariables(可用、不可改)',
453
+ formatVariableList(fieldWhitelist.promptVariables, language),
454
+ '',
455
+ '## analysisOnlyFields(仅可阅读、严禁出现在最终 prompt 中)',
456
+ formatAnalysisOnlyFields(fieldWhitelist.analysisOnlyFields, language),
457
+ '',
458
+ '## 优化目标 vs 当前实际',
459
+ formatGoalsWithProgress(goals, metrics, language),
460
+ '',
461
+ ...renderRoundHistorySection(roundHistory, goals, language),
462
+ '## 涉及范围的完整指标(仅展示与优化目标相关的范围)',
463
+ formatRelevantMetrics(goals, metrics, language),
464
+ '',
465
+ `## 回归样本(共 ${group.samples.length} 条)`,
466
+ '```json',
467
+ JSON.stringify(group.samples, null, 2),
468
+ '```',
469
+ '',
470
+ '请按 system 指令输出 JSON。',
471
+ ].join('\n');
472
+ return { system, user };
473
+ }
474
+ function buildSummarizeMessages(args) {
475
+ const language = args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE;
476
+ const system = (0, loader_1.getSystemPrompts)(language).summarize;
477
+ const user = language === 'en-US'
478
+ ? [
479
+ '## Optimization Goals vs Current Metrics',
480
+ formatGoalsWithProgress(args.goals, args.metrics, language),
481
+ '',
482
+ ...renderRoundHistorySection(args.roundHistory, args.goals, language),
483
+ '## Relevant Metrics',
484
+ formatRelevantMetrics(args.goals, args.metrics, language),
485
+ '',
486
+ `## Child Analysis Results (${args.collectedBatches.length} batches)`,
487
+ '```json',
488
+ JSON.stringify(args.collectedBatches, null, 2),
489
+ '```',
490
+ '',
491
+ 'Summarize these batches into final JSON according to the system instructions.',
492
+ ].join('\n')
493
+ : [
494
+ '## 优化目标 vs 当前实际',
495
+ formatGoalsWithProgress(args.goals, args.metrics, language),
496
+ '',
497
+ ...renderRoundHistorySection(args.roundHistory, args.goals, language),
498
+ '## 涉及范围的完整指标(仅展示与优化目标相关的范围)',
499
+ formatRelevantMetrics(args.goals, args.metrics, language),
500
+ '',
501
+ `## 子分析结果汇总(${args.collectedBatches.length} 个 batch)`,
502
+ '```json',
503
+ JSON.stringify(args.collectedBatches, null, 2),
504
+ '```',
505
+ '',
506
+ '请按 system 指令把这些 batch 汇总成最终 JSON。',
507
+ ].join('\n');
508
+ return { system, user };
509
+ }
510
+ function buildGenerateMessages(args) {
511
+ const { currentVersion, errorAnalysisText, analysisEvidenceBundle, metrics, goals, fieldWhitelist, optimizationHint, roundHistory, toolboxSwitchHint, } = args;
512
+ const language = args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE;
513
+ const system = (0, loader_1.getSystemPrompts)(language).generate;
514
+ // The output schema is no longer re-stated by the LLM — the system auto-assembles the output-format section from the schema at runtime and appends it to the body tail.
515
+ // Here we stuff the "auto-assembled output format section" as-is into the user prompt purely to let the LLM see what the full prompt looks like,
516
+ // avoiding it reinventing the wheel (the 6th hard constraint in the system prompt already forbids newPromptBody from re-stating the output format).
517
+ const autoOutputFormat = (0, shared_1.buildOutputFormatInstruction)(currentVersion.outputSchema, { language });
518
+ const schemaSection = autoOutputFormat
519
+ ? language === 'en-US'
520
+ ? [
521
+ '## Runtime Output-Format Section (reference only; do not restate it in newPromptBody)',
522
+ '> The system appends this section to newPromptBody at runtime. The output format is determined only by output schema.',
523
+ '',
524
+ autoOutputFormat,
525
+ ]
526
+ : [
527
+ '## 运行时自动拼接的输出格式段(仅供参考;禁止在 newPromptBody 中复述任何输出格式 / JSON schema / 字段说明)',
528
+ '> 下面这段会由系统在运行时拼接到 newPromptBody 尾部,输出格式由 output schema 唯一决定,不需要你写。',
529
+ '',
530
+ autoOutputFormat,
531
+ ]
532
+ : [];
533
+ const judgmentSection = currentVersion.judgmentRules
534
+ ? [
535
+ language === 'en-US' ? '## Immutable Judgment Rules' : '## 不可改动的 judgment rules',
536
+ '```json',
537
+ JSON.stringify(currentVersion.judgmentRules, null, 2),
538
+ '```',
539
+ ]
540
+ : [];
541
+ // base's already-used ∩ whitelist = must be retained verbatim in newPromptBody (system hard constraint #1)
542
+ // Explicitly listing them is less likely to be missed by the LLM during a full-section rewrite than burying them in the text
543
+ const allowedSet = new Set(fieldWhitelist.promptVariables);
544
+ const requiredVariables = extractVariableNames(currentVersion.body).filter((v) => allowedSet.has(v));
545
+ const requiredVariablesSection = requiredVariables.length > 0
546
+ ? [
547
+ language === 'en-US'
548
+ ? '## Required Variable Placeholders (used by base; do not remove)'
549
+ : '## 必须保留的变量占位(base 已使用,禁止删除 — 硬约束 #1)',
550
+ language === 'en-US'
551
+ ? '> These placeholders must appear exactly in newPromptBody. They are the only runtime path from sample data into the business model.'
552
+ : '> 下列占位**必须逐字、原样**出现在 newPromptBody 中(位置随意)。它们是运行时把样本数据注入业务模型的唯一通道;删掉它们模型推理时根本看不到样本,整批输出会立即塌缩到同一标签。',
553
+ '',
554
+ ...requiredVariables.map((v) => `- \`{{${v}}}\``),
555
+ '',
556
+ ]
557
+ : [];
558
+ const user = language === 'en-US'
559
+ ? [
560
+ `## Current Prompt Template (v${currentVersion.versionNumber})`,
561
+ '```',
562
+ currentVersion.body,
563
+ '```',
564
+ '',
565
+ ...requiredVariablesSection,
566
+ '## Structured Evidence Bundle (primary source)',
567
+ analysisEvidenceBundle
568
+ ? ['```json', JSON.stringify(analysisEvidenceBundle, null, 2), '```'].join('\n')
569
+ : '(no structured evidence bundle; use fallback summary below)',
570
+ '',
571
+ '## Error Analysis Fallback Summary',
572
+ errorAnalysisText,
573
+ '',
574
+ '## Optimization Goals vs Current Metrics',
575
+ formatGoalsWithProgress(goals, metrics, language),
576
+ '',
577
+ ...renderRoundHistorySection(roundHistory, goals, language),
578
+ ...renderToolboxSwitchHintSection(toolboxSwitchHint, language),
579
+ '## Relevant Metrics',
580
+ formatRelevantMetrics(goals, metrics, language),
581
+ '',
582
+ '## promptVariables (available, immutable)',
583
+ formatVariableList(fieldWhitelist.promptVariables, language),
584
+ '',
585
+ '## analysisOnlyFields (forbidden in the new prompt)',
586
+ formatAnalysisOnlyFields(fieldWhitelist.analysisOnlyFields, language),
587
+ '',
588
+ '## modifiableSections',
589
+ fieldWhitelist.modifiableSections && fieldWhitelist.modifiableSections.length > 0
590
+ ? fieldWhitelist.modifiableSections.map((s) => `- \`${s}\``).join('\n')
591
+ : '(not constrained; rewrite as needed while respecting all other constraints)',
592
+ '',
593
+ ...schemaSection,
594
+ '',
595
+ ...judgmentSection,
596
+ '',
597
+ '## User Generation Guidance',
598
+ optimizationHint && optimizationHint.trim().length > 0 ? optimizationHint : '(none)',
599
+ '',
600
+ 'Output JSON according to the system instructions.',
601
+ ].join('\n')
602
+ : [
603
+ '## 当前 prompt 模板(v' + currentVersion.versionNumber + ')',
604
+ '```',
605
+ currentVersion.body,
606
+ '```',
607
+ '',
608
+ ...requiredVariablesSection,
609
+ '## 结构化错误证据包(来自 analyze / summarize 阶段,优先依据)',
610
+ analysisEvidenceBundle
611
+ ? ['```json', JSON.stringify(analysisEvidenceBundle, null, 2), '```'].join('\n')
612
+ : '(无结构化证据包,使用下方旧摘要 fallback)',
613
+ '',
614
+ '## 错误分析摘要 fallback(仅在证据包缺字段时参考)',
615
+ errorAnalysisText,
616
+ '',
617
+ '## 优化目标 vs 当前实际',
618
+ formatGoalsWithProgress(goals, metrics, language),
619
+ '',
620
+ ...renderRoundHistorySection(roundHistory, goals, language),
621
+ ...renderToolboxSwitchHintSection(toolboxSwitchHint, language),
622
+ '## 涉及范围的完整指标(仅展示与优化目标相关的范围)',
623
+ formatRelevantMetrics(goals, metrics, language),
624
+ '',
625
+ '## promptVariables(可用、不可改)',
626
+ formatVariableList(fieldWhitelist.promptVariables, language),
627
+ '',
628
+ '## analysisOnlyFields(严禁出现在新 prompt 中)',
629
+ formatAnalysisOnlyFields(fieldWhitelist.analysisOnlyFields, language),
630
+ '',
631
+ '## modifiableSections(仅可在这些段落内改)',
632
+ fieldWhitelist.modifiableSections && fieldWhitelist.modifiableSections.length > 0
633
+ ? fieldWhitelist.modifiableSections.map((s) => `- \`${s}\``).join('\n')
634
+ : '(未限定 — 可在不违反其它约束的前提下整体改写)',
635
+ '',
636
+ ...schemaSection,
637
+ '',
638
+ ...judgmentSection,
639
+ '',
640
+ '## 用户给的提示词生成指引',
641
+ optimizationHint && optimizationHint.trim().length > 0 ? optimizationHint : '(无)',
642
+ '',
643
+ '请按 system 指令输出 JSON。',
644
+ ].join('\n');
645
+ return { system, user };
646
+ }
647
+ // =========================
648
+ // Variable name extraction — used to validate "the new prompt can only use a subset of promptVariables"
649
+ // =========================
650
+ function extractVariableNames(promptBody) {
651
+ const re = /\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}/g;
652
+ const set = new Set();
653
+ let m;
654
+ while ((m = re.exec(promptBody)) !== null) {
655
+ set.add(m[1]);
656
+ }
657
+ return [...set];
658
+ }
659
+ //# sourceMappingURL=prompts.js.map