@proofhound/optimization-strategy 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/dist/error-pattern-analysis/analysis-types.d.ts +45 -0
- package/dist/error-pattern-analysis/analysis-types.d.ts.map +1 -0
- package/dist/error-pattern-analysis/analysis-types.js +3 -0
- package/dist/error-pattern-analysis/analysis-types.js.map +1 -0
- package/dist/error-pattern-analysis/analyze.d.ts +81 -0
- package/dist/error-pattern-analysis/analyze.d.ts.map +1 -0
- package/dist/error-pattern-analysis/analyze.js +423 -0
- package/dist/error-pattern-analysis/analyze.js.map +1 -0
- package/dist/error-pattern-analysis/config.schema.d.ts +16 -0
- package/dist/error-pattern-analysis/config.schema.d.ts.map +1 -0
- package/dist/error-pattern-analysis/config.schema.js +26 -0
- package/dist/error-pattern-analysis/config.schema.js.map +1 -0
- package/dist/error-pattern-analysis/confusion-pairs.d.ts +37 -0
- package/dist/error-pattern-analysis/confusion-pairs.d.ts.map +1 -0
- package/dist/error-pattern-analysis/confusion-pairs.js +109 -0
- package/dist/error-pattern-analysis/confusion-pairs.js.map +1 -0
- package/dist/error-pattern-analysis/generate-initial.d.ts +36 -0
- package/dist/error-pattern-analysis/generate-initial.d.ts.map +1 -0
- package/dist/error-pattern-analysis/generate-initial.js +261 -0
- package/dist/error-pattern-analysis/generate-initial.js.map +1 -0
- package/dist/error-pattern-analysis/generate.d.ts +57 -0
- package/dist/error-pattern-analysis/generate.d.ts.map +1 -0
- package/dist/error-pattern-analysis/generate.js +369 -0
- package/dist/error-pattern-analysis/generate.js.map +1 -0
- package/dist/error-pattern-analysis/index.d.ts +8 -0
- package/dist/error-pattern-analysis/index.d.ts.map +1 -0
- package/dist/error-pattern-analysis/index.js +29 -0
- package/dist/error-pattern-analysis/index.js.map +1 -0
- package/dist/error-pattern-analysis/parse.d.ts +92 -0
- package/dist/error-pattern-analysis/parse.d.ts.map +1 -0
- package/dist/error-pattern-analysis/parse.js +456 -0
- package/dist/error-pattern-analysis/parse.js.map +1 -0
- package/dist/error-pattern-analysis/prompts/analyze-confusion.system.en-US.md +50 -0
- package/dist/error-pattern-analysis/prompts/analyze-confusion.system.md +61 -0
- package/dist/error-pattern-analysis/prompts/analyze-regression.system.en-US.md +50 -0
- package/dist/error-pattern-analysis/prompts/analyze-regression.system.md +61 -0
- package/dist/error-pattern-analysis/prompts/generate-initial.system.en-US.md +43 -0
- package/dist/error-pattern-analysis/prompts/generate-initial.system.md +49 -0
- package/dist/error-pattern-analysis/prompts/generate.system.en-US.md +53 -0
- package/dist/error-pattern-analysis/prompts/generate.system.md +68 -0
- package/dist/error-pattern-analysis/prompts/loader.d.ts +46 -0
- package/dist/error-pattern-analysis/prompts/loader.d.ts.map +1 -0
- package/dist/error-pattern-analysis/prompts/loader.js +109 -0
- package/dist/error-pattern-analysis/prompts/loader.js.map +1 -0
- package/dist/error-pattern-analysis/prompts/optimization-tips.en-US.md +25 -0
- package/dist/error-pattern-analysis/prompts/optimization-tips.md +38 -0
- package/dist/error-pattern-analysis/prompts/summarize.system.en-US.md +48 -0
- package/dist/error-pattern-analysis/prompts/summarize.system.md +69 -0
- package/dist/error-pattern-analysis/prompts.d.ts +79 -0
- package/dist/error-pattern-analysis/prompts.d.ts.map +1 -0
- package/dist/error-pattern-analysis/prompts.js +659 -0
- package/dist/error-pattern-analysis/prompts.js.map +1 -0
- package/dist/error-pattern-analysis/token-budget.d.ts +20 -0
- package/dist/error-pattern-analysis/token-budget.d.ts.map +1 -0
- package/dist/error-pattern-analysis/token-budget.js +88 -0
- package/dist/error-pattern-analysis/token-budget.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +27 -0
- package/dist/index.js.map +1 -0
- package/dist/loop/best.d.ts +3 -0
- package/dist/loop/best.d.ts.map +1 -0
- package/dist/loop/best.js +43 -0
- package/dist/loop/best.js.map +1 -0
- package/dist/loop/goals.d.ts +6 -0
- package/dist/loop/goals.d.ts.map +1 -0
- package/dist/loop/goals.js +38 -0
- package/dist/loop/goals.js.map +1 -0
- package/dist/loop/round-outcome.d.ts +14 -0
- package/dist/loop/round-outcome.d.ts.map +1 -0
- package/dist/loop/round-outcome.js +18 -0
- package/dist/loop/round-outcome.js.map +1 -0
- package/dist/loop/run-iteration-loop.d.ts +5 -0
- package/dist/loop/run-iteration-loop.d.ts.map +1 -0
- package/dist/loop/run-iteration-loop.js +247 -0
- package/dist/loop/run-iteration-loop.js.map +1 -0
- package/dist/loop/types.d.ts +190 -0
- package/dist/loop/types.d.ts.map +1 -0
- package/dist/loop/types.js +13 -0
- package/dist/loop/types.js.map +1 -0
- package/dist/registry.d.ts +5 -0
- package/dist/registry.d.ts.map +1 -0
- package/dist/registry.js +19 -0
- package/dist/registry.js.map +1 -0
- package/dist/types.d.ts +10 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +52 -0
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.analyzeFailures = analyzeFailures;
|
|
4
|
+
exports.buildRunResultForCall = buildRunResultForCall;
|
|
5
|
+
// Error-sample analysis — confusion pairs + regression + multiple LLM calls + a second LLM summary
|
|
6
|
+
// Token budget: before each LLM call, use estimateMessagesTokens to estimate the baseline; degrade when exceeding maxInputTokensPerBatch
|
|
7
|
+
// (fitSamplesToBudget → field truncation → reduce batch count)
|
|
8
|
+
const llm_client_1 = require("@proofhound/llm-client");
|
|
9
|
+
const confusion_pairs_1 = require("./confusion-pairs");
|
|
10
|
+
const prompts_1 = require("./prompts");
|
|
11
|
+
const parse_1 = require("./parse");
|
|
12
|
+
const token_budget_1 = require("./token-budget");
|
|
13
|
+
const shared_1 = require("@proofhound/shared");
|
|
14
|
+
async function analyzeFailures(args, deps) {
|
|
15
|
+
const promptLanguage = args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE;
|
|
16
|
+
// Cross-round history token-budget degradation — shared one fitted history across all batches to avoid repeated estimates
|
|
17
|
+
// History takes at most 40% of the batch input budget; the rest goes to error samples + evidence
|
|
18
|
+
const historyCap = Math.floor(args.strategyConfig.maxInputTokensPerBatch * 0.4);
|
|
19
|
+
const fittedHistoryResult = (0, prompts_1.fitRoundHistoryToBudget)(args.roundHistory, historyCap, args.goals, promptLanguage);
|
|
20
|
+
const argsWithFittedHistory = {
|
|
21
|
+
...args,
|
|
22
|
+
promptLanguage,
|
|
23
|
+
roundHistory: fittedHistoryResult.fitted,
|
|
24
|
+
};
|
|
25
|
+
const confusionPairs = (0, confusion_pairs_1.buildConfusionPairs)({
|
|
26
|
+
runResults: args.currentRunResults,
|
|
27
|
+
samples: args.samples,
|
|
28
|
+
whitelist: args.fieldWhitelist,
|
|
29
|
+
topN: args.strategyConfig.topConfusionPairs,
|
|
30
|
+
maxSamplesPerPair: args.strategyConfig.maxSamplesPerConfusionPair,
|
|
31
|
+
});
|
|
32
|
+
const regressionGroups = (0, confusion_pairs_1.buildRegressionGroups)({
|
|
33
|
+
currentRunResults: args.currentRunResults,
|
|
34
|
+
previousRunResults: args.previousRunResults,
|
|
35
|
+
samples: args.samples,
|
|
36
|
+
whitelist: args.fieldWhitelist,
|
|
37
|
+
maxSamples: args.strategyConfig.maxRegressionSamples,
|
|
38
|
+
});
|
|
39
|
+
const totalConfusionFailures = confusionPairs.reduce((sum, p) => sum + p.count, 0);
|
|
40
|
+
const totalRegressionSamples = regressionGroups.reduce((sum, g) => sum + g.count, 0);
|
|
41
|
+
const batches = [];
|
|
42
|
+
let anyTruncated = false;
|
|
43
|
+
for (const pair of confusionPairs) {
|
|
44
|
+
const batch = await runConfusionBatch(pair, argsWithFittedHistory, deps);
|
|
45
|
+
batches.push(batch);
|
|
46
|
+
if (batch.llmTruncated)
|
|
47
|
+
anyTruncated = true;
|
|
48
|
+
}
|
|
49
|
+
for (const group of regressionGroups) {
|
|
50
|
+
const batch = await runRegressionBatch(group, argsWithFittedHistory, deps);
|
|
51
|
+
batches.push(batch);
|
|
52
|
+
if (batch.llmTruncated)
|
|
53
|
+
anyTruncated = true;
|
|
54
|
+
}
|
|
55
|
+
const { summary, budget: summarizeBudget } = await runSummarize(argsWithFittedHistory, batches, deps, {
|
|
56
|
+
totalConfusionFailures,
|
|
57
|
+
totalRegressionSamples,
|
|
58
|
+
truncated: anyTruncated,
|
|
59
|
+
});
|
|
60
|
+
if (summary.truncated)
|
|
61
|
+
anyTruncated = true;
|
|
62
|
+
const evidenceBundle = buildAnalysisEvidenceBundle(summary, batches, {
|
|
63
|
+
totalConfusionFailures,
|
|
64
|
+
totalRegressionSamples,
|
|
65
|
+
truncated: anyTruncated,
|
|
66
|
+
});
|
|
67
|
+
return {
|
|
68
|
+
errorAnalysisText: summary.summary,
|
|
69
|
+
summary,
|
|
70
|
+
evidenceBundle,
|
|
71
|
+
batches,
|
|
72
|
+
confusionPairs,
|
|
73
|
+
regressionGroups,
|
|
74
|
+
truncated: anyTruncated,
|
|
75
|
+
totalConfusionFailures,
|
|
76
|
+
totalRegressionSamples,
|
|
77
|
+
summarizeBudget,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
// Field truncation threshold in the extreme case where "a single sample exceeds the budget" (chars, derived by 4 chars/token)
|
|
81
|
+
const PER_FIELD_TRUNCATE_CHARS = 2_000;
|
|
82
|
+
async function runAnalysisBatch(spec, args, deps) {
|
|
83
|
+
const { system, user } = spec.buildMessages();
|
|
84
|
+
const result = await (0, llm_client_1.invokeLLM)({
|
|
85
|
+
model: args.analysisModel,
|
|
86
|
+
limiterKey: args.analysisLimiterKey,
|
|
87
|
+
messages: [
|
|
88
|
+
{ role: 'system', content: system },
|
|
89
|
+
{ role: 'user', content: user },
|
|
90
|
+
],
|
|
91
|
+
params: {
|
|
92
|
+
temperature: args.strategyConfig.temperature,
|
|
93
|
+
maxTokens: args.strategyConfig.maxAnalysisOutputTokens,
|
|
94
|
+
},
|
|
95
|
+
context: {
|
|
96
|
+
source: 'optimization_analysis',
|
|
97
|
+
stepName: spec.stepName,
|
|
98
|
+
requestId: `optimization:${args.optimizationId}:r${args.roundNumber}:${spec.source}:${spec.requestKey}`,
|
|
99
|
+
promptVersionId: args.currentVersion.id,
|
|
100
|
+
promptLanguage: args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE,
|
|
101
|
+
},
|
|
102
|
+
}, deps);
|
|
103
|
+
const parsed = spec.parseOutput(result.content, result.finishReason);
|
|
104
|
+
const { errorPatterns, suggestedChanges } = (0, parse_1.normalizeEvidenceBundle)({ errorPatterns: parsed.errorPatterns, suggestedChanges: parsed.suggestedChanges }, {
|
|
105
|
+
source: spec.source,
|
|
106
|
+
bucketKey: spec.bucketKey,
|
|
107
|
+
affectedCountFallback: spec.affectedCountFallback,
|
|
108
|
+
});
|
|
109
|
+
return {
|
|
110
|
+
source: spec.source,
|
|
111
|
+
title: `${spec.source}: ${spec.bucketKey}`,
|
|
112
|
+
llmTruncated: parsed.truncated,
|
|
113
|
+
errorPatterns,
|
|
114
|
+
suggestedChanges,
|
|
115
|
+
rawContent: parsed.rawContent,
|
|
116
|
+
budget: {
|
|
117
|
+
baselineInputTokens: spec.fitResult.baseline,
|
|
118
|
+
sampleBudgetTokens: spec.fitResult.sampleBudget,
|
|
119
|
+
estimatedSampleTokens: spec.fitResult.estimatedSampleTokens,
|
|
120
|
+
originalSampleCount: spec.originalSampleCount,
|
|
121
|
+
fittedSampleCount: spec.fitResult.fitted.length,
|
|
122
|
+
droppedSampleCount: spec.fitResult.dropped.length,
|
|
123
|
+
fieldsTruncated: spec.fitResult.fieldsTruncated,
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
async function runConfusionBatch(pair, args, deps) {
|
|
128
|
+
const fitResult = fitSamplesForConfusion(pair, args);
|
|
129
|
+
const trimmedPair = { ...pair, samples: fitResult.fitted };
|
|
130
|
+
return runAnalysisBatch({
|
|
131
|
+
source: 'confusion',
|
|
132
|
+
bucketKey: `${pair.expected}→${pair.predicted}`,
|
|
133
|
+
fitResult,
|
|
134
|
+
originalSampleCount: pair.samples.length,
|
|
135
|
+
affectedCountFallback: pair.count,
|
|
136
|
+
buildMessages: () => (0, prompts_1.buildAnalyzeConfusionMessages)({
|
|
137
|
+
pair: trimmedPair,
|
|
138
|
+
currentVersion: args.currentVersion,
|
|
139
|
+
metrics: args.metrics,
|
|
140
|
+
goals: args.goals,
|
|
141
|
+
fieldWhitelist: args.fieldWhitelist,
|
|
142
|
+
roundHistory: args.roundHistory,
|
|
143
|
+
promptLanguage: args.promptLanguage,
|
|
144
|
+
}),
|
|
145
|
+
parseOutput: parse_1.parseConfusionAnalysisOutput,
|
|
146
|
+
stepName: 'error_pattern_analyze_confusion',
|
|
147
|
+
requestKey: `${pair.expected}_to_${pair.predicted}`,
|
|
148
|
+
}, args, deps);
|
|
149
|
+
}
|
|
150
|
+
function fitSamplesForConfusion(pair, args) {
|
|
151
|
+
// 1) Probe: clear samples and construct one message to estimate the fixed overhead (including the fitted cross-round history)
|
|
152
|
+
const probePair = { ...pair, samples: [] };
|
|
153
|
+
const probe = (0, prompts_1.buildAnalyzeConfusionMessages)({
|
|
154
|
+
pair: probePair,
|
|
155
|
+
currentVersion: args.currentVersion,
|
|
156
|
+
metrics: args.metrics,
|
|
157
|
+
goals: args.goals,
|
|
158
|
+
fieldWhitelist: args.fieldWhitelist,
|
|
159
|
+
roundHistory: args.roundHistory,
|
|
160
|
+
promptLanguage: args.promptLanguage,
|
|
161
|
+
});
|
|
162
|
+
const baseline = (0, token_budget_1.estimateMessagesTokens)(probe.system, probe.user, args.strategyConfig.maxAnalysisOutputTokens);
|
|
163
|
+
const sampleBudget = (0, token_budget_1.computeSampleBudget)(args.strategyConfig.maxInputTokensPerBatch, baseline.inputTokens);
|
|
164
|
+
// 2) fit
|
|
165
|
+
let { fitted, dropped, estimatedTokens } = (0, token_budget_1.fitSamplesToBudget)(pair.samples, sampleBudget, 0);
|
|
166
|
+
// 3) Nothing fits but samples do exist → force-fit 1 sample and brute-force-truncate its fields
|
|
167
|
+
let fieldsTruncated = false;
|
|
168
|
+
if (fitted.length === 0 && pair.samples.length > 0) {
|
|
169
|
+
const head = pair.samples[0];
|
|
170
|
+
const truncated = (0, token_budget_1.truncateStringFields)(head, PER_FIELD_TRUNCATE_CHARS);
|
|
171
|
+
fitted = [truncated];
|
|
172
|
+
dropped = pair.samples.slice(1);
|
|
173
|
+
estimatedTokens = (0, token_budget_1.estimateTokens)(truncated);
|
|
174
|
+
fieldsTruncated = true;
|
|
175
|
+
}
|
|
176
|
+
return {
|
|
177
|
+
fitted,
|
|
178
|
+
dropped,
|
|
179
|
+
baseline: baseline.inputTokens,
|
|
180
|
+
fieldsTruncated,
|
|
181
|
+
sampleBudget,
|
|
182
|
+
estimatedSampleTokens: estimatedTokens,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
async function runRegressionBatch(group, args, deps) {
|
|
186
|
+
const fitResult = fitSamplesForRegression(group, args);
|
|
187
|
+
const trimmedGroup = { ...group, samples: fitResult.fitted };
|
|
188
|
+
return runAnalysisBatch({
|
|
189
|
+
source: 'regression',
|
|
190
|
+
bucketKey: `predicted=${group.predicted}`,
|
|
191
|
+
fitResult,
|
|
192
|
+
originalSampleCount: group.samples.length,
|
|
193
|
+
affectedCountFallback: group.count,
|
|
194
|
+
buildMessages: () => (0, prompts_1.buildAnalyzeRegressionMessages)({
|
|
195
|
+
group: trimmedGroup,
|
|
196
|
+
currentVersion: args.currentVersion,
|
|
197
|
+
previousVersion: args.previousVersion,
|
|
198
|
+
metrics: args.metrics,
|
|
199
|
+
goals: args.goals,
|
|
200
|
+
fieldWhitelist: args.fieldWhitelist,
|
|
201
|
+
roundHistory: args.roundHistory,
|
|
202
|
+
promptLanguage: args.promptLanguage,
|
|
203
|
+
}),
|
|
204
|
+
parseOutput: parse_1.parseRegressionAnalysisOutput,
|
|
205
|
+
stepName: 'error_pattern_analyze_regression',
|
|
206
|
+
requestKey: group.predicted,
|
|
207
|
+
}, args, deps);
|
|
208
|
+
}
|
|
209
|
+
function fitSamplesForRegression(group, args) {
|
|
210
|
+
const probeGroup = { ...group, samples: [] };
|
|
211
|
+
const probe = (0, prompts_1.buildAnalyzeRegressionMessages)({
|
|
212
|
+
group: probeGroup,
|
|
213
|
+
currentVersion: args.currentVersion,
|
|
214
|
+
previousVersion: args.previousVersion,
|
|
215
|
+
metrics: args.metrics,
|
|
216
|
+
goals: args.goals,
|
|
217
|
+
fieldWhitelist: args.fieldWhitelist,
|
|
218
|
+
roundHistory: args.roundHistory,
|
|
219
|
+
promptLanguage: args.promptLanguage,
|
|
220
|
+
});
|
|
221
|
+
const baseline = (0, token_budget_1.estimateMessagesTokens)(probe.system, probe.user, args.strategyConfig.maxAnalysisOutputTokens);
|
|
222
|
+
const sampleBudget = (0, token_budget_1.computeSampleBudget)(args.strategyConfig.maxInputTokensPerBatch, baseline.inputTokens);
|
|
223
|
+
let { fitted, dropped, estimatedTokens } = (0, token_budget_1.fitSamplesToBudget)(group.samples, sampleBudget, 0);
|
|
224
|
+
let fieldsTruncated = false;
|
|
225
|
+
if (fitted.length === 0 && group.samples.length > 0) {
|
|
226
|
+
const head = group.samples[0];
|
|
227
|
+
const truncated = (0, token_budget_1.truncateStringFields)(head, PER_FIELD_TRUNCATE_CHARS);
|
|
228
|
+
fitted = [truncated];
|
|
229
|
+
dropped = group.samples.slice(1);
|
|
230
|
+
estimatedTokens = (0, token_budget_1.estimateTokens)(truncated);
|
|
231
|
+
fieldsTruncated = true;
|
|
232
|
+
}
|
|
233
|
+
return {
|
|
234
|
+
fitted,
|
|
235
|
+
dropped,
|
|
236
|
+
baseline: baseline.inputTokens,
|
|
237
|
+
fieldsTruncated,
|
|
238
|
+
sampleBudget,
|
|
239
|
+
estimatedSampleTokens: estimatedTokens,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
function buildAnalysisEvidenceBundle(summary, batches, stats) {
|
|
243
|
+
return {
|
|
244
|
+
evidenceBundleVersion: 1,
|
|
245
|
+
summary: summary.summary,
|
|
246
|
+
errorPatterns: summary.errorPatterns,
|
|
247
|
+
suggestedChanges: summary.suggestedChanges,
|
|
248
|
+
conflicts: summary.conflicts ?? [],
|
|
249
|
+
sourceStats: {
|
|
250
|
+
batchCount: batches.length,
|
|
251
|
+
totalConfusionFailures: stats.totalConfusionFailures,
|
|
252
|
+
totalRegressionSamples: stats.totalRegressionSamples,
|
|
253
|
+
truncated: stats.truncated || summary.truncated,
|
|
254
|
+
},
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
// Summarize field-truncation character cap (controls long fields like reason / change / rationale within a batch)
|
|
258
|
+
const SUMMARIZE_FIELD_TRUNCATE_CHARS = 600;
|
|
259
|
+
async function runSummarize(args, batches, deps, stats) {
|
|
260
|
+
if (batches.length === 0) {
|
|
261
|
+
const summary = {
|
|
262
|
+
summary: '本轮没有失败样本可供分析(confusion + regression 均为空)。建议直接继续下一轮,或检查实验是否真的执行了。',
|
|
263
|
+
errorPatterns: [],
|
|
264
|
+
suggestedChanges: [],
|
|
265
|
+
conflicts: [],
|
|
266
|
+
evidenceBundleVersion: 1,
|
|
267
|
+
truncated: false,
|
|
268
|
+
rawContent: '',
|
|
269
|
+
};
|
|
270
|
+
deps.logger.info({
|
|
271
|
+
optimizationId: args.optimizationId,
|
|
272
|
+
roundNumber: args.roundNumber,
|
|
273
|
+
reason: 'no_batches',
|
|
274
|
+
confusionPairsCount: 0,
|
|
275
|
+
regressionGroupsCount: 0,
|
|
276
|
+
currentFailureCount: args.currentRunResults.filter((r) => r.isCorrect === false).length,
|
|
277
|
+
currentRunResultsCount: args.currentRunResults.length,
|
|
278
|
+
previousRunResultsCount: args.previousRunResults?.length ?? 0,
|
|
279
|
+
hasPreviousRound: args.previousRunResults != null,
|
|
280
|
+
samplesWithExpectedCount: args.samples.filter((s) => s.expected != null).length,
|
|
281
|
+
}, 'analyze_skipped');
|
|
282
|
+
return {
|
|
283
|
+
summary,
|
|
284
|
+
budget: undefined,
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
// Probe: estimate the baseline with empty batches (including the fitted cross-round history)
|
|
288
|
+
const probe = (0, prompts_1.buildSummarizeMessages)({
|
|
289
|
+
goals: args.goals,
|
|
290
|
+
metrics: args.metrics,
|
|
291
|
+
collectedBatches: [],
|
|
292
|
+
roundHistory: args.roundHistory,
|
|
293
|
+
promptLanguage: args.promptLanguage,
|
|
294
|
+
});
|
|
295
|
+
const baseline = (0, token_budget_1.estimateMessagesTokens)(probe.system, probe.user, args.strategyConfig.maxSummarizeOutputTokens);
|
|
296
|
+
const budget = (0, token_budget_1.computeSampleBudget)(args.strategyConfig.maxInputTokensPerBatch, baseline.inputTokens);
|
|
297
|
+
// Compute the token footprint of the original batches
|
|
298
|
+
const rawCollected = batches.map((b) => ({
|
|
299
|
+
source: b.source,
|
|
300
|
+
title: b.title,
|
|
301
|
+
payload: { errorPatterns: b.errorPatterns, suggestedChanges: b.suggestedChanges },
|
|
302
|
+
}));
|
|
303
|
+
let collected = rawCollected;
|
|
304
|
+
let fieldTruncationApplied = false;
|
|
305
|
+
let droppedBatchCount = 0;
|
|
306
|
+
let currentTokens = (0, token_budget_1.estimateTokens)(collected);
|
|
307
|
+
// Phase 1: over budget → field truncation
|
|
308
|
+
if (currentTokens > budget) {
|
|
309
|
+
collected = rawCollected.map((b) => (0, token_budget_1.truncateAllStringFieldsInObject)(b, SUMMARIZE_FIELD_TRUNCATE_CHARS));
|
|
310
|
+
fieldTruncationApplied = true;
|
|
311
|
+
currentTokens = (0, token_budget_1.estimateTokens)(collected);
|
|
312
|
+
}
|
|
313
|
+
// Phase 2: still over budget → drop batches (confusion is preferred = placed first)
|
|
314
|
+
if (currentTokens > budget) {
|
|
315
|
+
const sortedConfusionFirst = [...collected].sort((a, b) => a.source === 'confusion' && b.source !== 'confusion'
|
|
316
|
+
? -1
|
|
317
|
+
: a.source !== 'confusion' && b.source === 'confusion'
|
|
318
|
+
? 1
|
|
319
|
+
: 0);
|
|
320
|
+
const kept = [];
|
|
321
|
+
let used = 0;
|
|
322
|
+
for (const b of sortedConfusionFirst) {
|
|
323
|
+
const t = (0, token_budget_1.estimateTokens)(b);
|
|
324
|
+
if (used + t > budget && kept.length >= 1) {
|
|
325
|
+
droppedBatchCount++;
|
|
326
|
+
continue;
|
|
327
|
+
}
|
|
328
|
+
kept.push(b);
|
|
329
|
+
used += t;
|
|
330
|
+
}
|
|
331
|
+
collected = kept;
|
|
332
|
+
currentTokens = used;
|
|
333
|
+
}
|
|
334
|
+
const { system, user } = (0, prompts_1.buildSummarizeMessages)({
|
|
335
|
+
goals: args.goals,
|
|
336
|
+
metrics: args.metrics,
|
|
337
|
+
collectedBatches: collected,
|
|
338
|
+
roundHistory: args.roundHistory,
|
|
339
|
+
promptLanguage: args.promptLanguage,
|
|
340
|
+
});
|
|
341
|
+
const messages = [
|
|
342
|
+
{ role: 'system', content: system },
|
|
343
|
+
{ role: 'user', content: user },
|
|
344
|
+
];
|
|
345
|
+
const result = await (0, llm_client_1.invokeLLM)({
|
|
346
|
+
model: args.analysisModel,
|
|
347
|
+
limiterKey: args.analysisLimiterKey,
|
|
348
|
+
messages,
|
|
349
|
+
params: {
|
|
350
|
+
temperature: args.strategyConfig.temperature,
|
|
351
|
+
maxTokens: args.strategyConfig.maxSummarizeOutputTokens,
|
|
352
|
+
},
|
|
353
|
+
context: {
|
|
354
|
+
source: 'optimization_analysis',
|
|
355
|
+
stepName: 'error_pattern_summarize',
|
|
356
|
+
requestId: `optimization:${args.optimizationId}:r${args.roundNumber}:summarize`,
|
|
357
|
+
promptVersionId: args.currentVersion.id,
|
|
358
|
+
},
|
|
359
|
+
runResult: buildRunResultForCall({
|
|
360
|
+
meta: args.runResultMeta,
|
|
361
|
+
runResultId: args.analysisRunResultId,
|
|
362
|
+
source: 'optimization_analysis',
|
|
363
|
+
roundIndex: args.roundNumber,
|
|
364
|
+
messages,
|
|
365
|
+
inputVariables: {
|
|
366
|
+
optimizationId: args.optimizationId,
|
|
367
|
+
roundNumber: args.roundNumber,
|
|
368
|
+
stepName: 'error_pattern_summarize',
|
|
369
|
+
promptLanguage: args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE,
|
|
370
|
+
},
|
|
371
|
+
}),
|
|
372
|
+
// run_results.parsed_output feeds the detail-page errorPatterns / suggestedChanges (SPEC 25 §11.3);
|
|
373
|
+
// finishReason is filled when externally re-parsed; here null ensures the key fields are at least available.
|
|
374
|
+
parseResponse: (content) => {
|
|
375
|
+
try {
|
|
376
|
+
const parsed = (0, parse_1.parseSummarizeOutput)(content, null);
|
|
377
|
+
return {
|
|
378
|
+
...parsed,
|
|
379
|
+
evidenceBundle: buildAnalysisEvidenceBundle(parsed, batches, stats),
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
catch {
|
|
383
|
+
return null;
|
|
384
|
+
}
|
|
385
|
+
},
|
|
386
|
+
}, deps);
|
|
387
|
+
return {
|
|
388
|
+
summary: (0, parse_1.parseSummarizeOutput)(result.content, result.finishReason),
|
|
389
|
+
budget: {
|
|
390
|
+
baselineInputTokens: baseline.inputTokens,
|
|
391
|
+
estimatedBatchesTokens: currentTokens,
|
|
392
|
+
fieldTruncationApplied,
|
|
393
|
+
droppedBatchCount,
|
|
394
|
+
},
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
// Construct the RunResultContext when analyze / generate call invokeLLM.
|
|
398
|
+
// Only when the caller provides both meta and runResultId, return the context; otherwise return undefined
|
|
399
|
+
// (invokeLLM internally guards with `runResult && runResultWriter` to decide whether to write the table).
|
|
400
|
+
// roundIndex is required: the detail page's listOptimizationLlmRunResults filters by isNotNull(round_index);
|
|
401
|
+
// a missing value drops the whole row (causing errorPatterns / suggestedChanges / promptDiff to disappear entirely).
|
|
402
|
+
function buildRunResultForCall(input) {
|
|
403
|
+
if (!input.meta || !input.runResultId)
|
|
404
|
+
return undefined;
|
|
405
|
+
return {
|
|
406
|
+
id: input.runResultId,
|
|
407
|
+
projectId: input.meta.projectId,
|
|
408
|
+
source: input.source,
|
|
409
|
+
sourceId: input.meta.sourceId,
|
|
410
|
+
promptVersionId: input.meta.promptVersionId,
|
|
411
|
+
modelId: input.meta.modelId,
|
|
412
|
+
sampleId: null,
|
|
413
|
+
externalId: null,
|
|
414
|
+
renderedPrompt: { messages: input.messages },
|
|
415
|
+
inputVariables: input.inputVariables,
|
|
416
|
+
expectedOutput: null,
|
|
417
|
+
dbosWorkflowId: input.meta.dbosWorkflowId ?? null,
|
|
418
|
+
bullmqJobId: input.meta.bullmqJobId ?? null,
|
|
419
|
+
attempt: input.meta.attempt ?? 0,
|
|
420
|
+
roundIndex: input.roundIndex,
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
//# sourceMappingURL=analyze.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze.js","sourceRoot":"","sources":["../../src/error-pattern-analysis/analyze.ts"],"names":[],"mappings":";;AAmIA,0CAyEC;AAmbD,sDA0BC;AAzpBD,mGAAmG;AACnG,yIAAyI;AACzI,+DAA+D;AAC/D,uDAMgC;AAWhC,uDAM2B;AAC3B,uCAKmB;AACnB,mCASiB;AACjB,iDAOwB;AACxB,+CAAqF;AAgF9E,KAAK,UAAU,eAAe,CACnC,IAAyB,EACzB,IAA2B;IAE3B,MAAM,cAAc,GAAG,IAAI,CAAC,cAAc,IAAI,gCAAuB,CAAC;IACtE,0HAA0H;IAC1H,iGAAiG;IACjG,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,sBAAsB,GAAG,GAAG,CAAC,CAAC;IAChF,MAAM,mBAAmB,GAAG,IAAA,iCAAuB,EAAC,IAAI,CAAC,YAAY,EAAE,UAAU,EAAE,IAAI,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;IAC/G,MAAM,qBAAqB,GAAwB;QACjD,GAAG,IAAI;QACP,cAAc;QACd,YAAY,EAAE,mBAAmB,CAAC,MAAM;KACzC,CAAC;IAEF,MAAM,cAAc,GAAG,IAAA,qCAAmB,EAAC;QACzC,UAAU,EAAE,IAAI,CAAC,iBAAiB;QAClC,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,SAAS,EAAE,IAAI,CAAC,cAAc;QAC9B,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,iBAAiB;QAC3C,iBAAiB,EAAE,IAAI,CAAC,cAAc,CAAC,0BAA0B;KAClE,CAAC,CAAC;IAEH,MAAM,gBAAgB,GAAG,IAAA,uCAAqB,EAAC;QAC7C,iBAAiB,EAAE,IAAI,CAAC,iBAAiB;QACzC,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;QAC3C,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,SAAS,EAAE,IAAI,CAAC,cAAc;QAC9B,UAAU,EAAE,IAAI,CAAC,cAAc,CAAC,oBAAoB;KACrD,CAAC,CAAC;IAEH,MAAM,sBAAsB,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACnF,MAAM,sBAAsB,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAErF,MAAM,OAAO,GAAyB,EAAE,CAAC;IACzC,IAAI,YAAY,GAAG,KAAK,CAAC;IAEzB,KAAK,MAAM,IAAI,IAAI,cAAc,EAAE,CAAC;QAClC,MAAM,KAAK,GAAG,MAAM,iBAAiB,CAAC,IAAI,EAAE,qBAAqB,EAAE,IAAI,CAAC,CAAC;QACzE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,IAAI,KAAK,CAAC,YAAY;YAAE,YAAY,GAAG,IAAI,CAAC;IAC9C,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,gBAAgB,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,kBAAkB,CAAC,KAAK,EAAE,qBAAqB,EAAE,IAAI,CAAC,CAAC;QAC3E,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,IAAI,KAAK,CAAC,YAAY;YAAE,YAAY,GAAG,IAAI,CAAC;IAC9C,CAAC;IAED,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,YAAY,CAAC,qBAAqB,EAAE,OAAO,EAAE,IAAI,EAAE;QACpG,sBAAsB;QACtB,sBAAsB;QACtB,SAAS,EAAE,YAAY;KACxB,CAAC,CAAC;IACH,IAAI,OAAO,CAAC,SAAS;QAAE,YAAY,GAAG,IAAI,CAAC;IAC3C,MAAM,cAAc,GAAG,2BAA2B,CAAC,OAAO,EAAE,OAAO,EAAE;QACnE,sBAAsB;QACtB,sBAAsB;QACtB,SAAS,EAAE,YAAY;KACxB,CAAC,CAAC;IAEH,OAAO;QACL,iBAAiB,EAAE,OAAO,CAAC,OAAO;QAClC,OAAO;QACP,cAAc;QACd,OAAO;QACP,cAAc;QACd,gBAAgB;QAChB,SAAS,EAAE,YAAY;QACvB,sBAAsB;QACtB,sBAAsB;QACtB,eAAe;KAChB,CAAC;AACJ,CAAC;AAED,8HAA8H;AAC9H,MAAM,wBAAwB,GAAG,KAAK,CAAC;AAmCvC,KAAK,UAAU,gBAAgB,CAC7B,IAAuB,EACvB,IAAyB,EACzB,IAA2B;IAE3B,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,IAAA,sBAAS,EAC5B;QACE,KAAK,EAAE,IAAI,CAAC,aAAa;QACzB,UAAU,EAAE,IAAI,CAAC,kBAAkB;QACnC,QAAQ,EAAE;YACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE;YACnC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE;SAChC;QACD,MAAM,EAAE;YACN,WAAW,EAAE,IAAI,CAAC,cAAc,CAAC,WAAW;YAC5C,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,uBAAuB;SACvD;QACD,OAAO,EAAE;YACP,MAAM,EAAE,uBAAuB;YAC/B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,SAAS,EAAE,gBAAgB,IAAI,CAAC,cAAc,KAAK,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU,EAAE;YACvG,eAAe,EAAE,IAAI,CAAC,cAAc,CAAC,EAAE;YACvC,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,gCAAuB;SAC/D;KACF,EACD,IAAI,CACL,CAAC;IACF,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,YAAY,CAAC,CAAC;IACrE,MAAM,EAAE,aAAa,EAAE,gBAAgB,EAAE,GAAG,IAAA,+BAAuB,EACjE,EAAE,aAAa,EAAE,MAAM,CAAC,aAAa,EAAE,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,EAAE,EAClF;QACE,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,qBAAqB,EAAE,IAAI,CAAC,qBAAqB;KAClD,CACF,CAAC;IACF,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,KAAK,EAAE,GAAG,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,SAAS,EAAE;QAC1C,YAAY,EAAE,MAAM,CAAC,SAAS;QAC9B,aAAa;QACb,gBAAgB;QAChB,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,MAAM,EAAE;YACN,mBAAmB,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ;YAC5C,kBAAkB,EAAE,IAAI,CAAC,SAAS,CAAC,YAAY;YAC/C,qBAAqB,EAAE,IAAI,CAAC,SAAS,CAAC,qBAAqB;YAC3D,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,iBAAiB,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,MAAM;YAC/C,kBAAkB,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,MAAM;YACjD,eAAe,EAAE,IAAI,CAAC,SAAS,CAAC,eAAe;SAChD;KACF,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,IAAmB,EACnB,IAAyB,EACzB,IAA2B;IAE3B,MAAM,SAAS,GAAG,sBAAsB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACrD,MAAM,WAAW,GAAkB,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,SAAS,CAAC,MAAM,EAAE,CAAC;IAC1E,OAAO,gBAAgB,CACrB;QACE,MAAM,EAAE,WAAW;QACnB,SAAS,EAAE,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,SAAS,EAAE;QAC/C,SAAS;QACT,mBAAmB,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM;QACxC,qBAAqB,EAAE,IAAI,CAAC,KAAK;QACjC,aAAa,EAAE,GAAG,EAAE,CAClB,IAAA,uCAA6B,EAAC;YAC5B,IAAI,EAAE,WAAW;YACjB,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;SACpC,CAAC;QACJ,WAAW,EAAE,oCAA4B;QACzC,QAAQ,EAAE,iCAAiC;QAC3C,UAAU,EAAE,GAAG,IAAI,CAAC,QAAQ,OAAO,IAAI,CAAC,SAAS,EAAE;KACpD,EACD,IAAI,EACJ,IAAI,CACL,CAAC;AACJ,CAAC;AAED,SAAS,sBAAsB,CAAC,IAAmB,EAAE,IAAyB;IAC5E,8HAA8H;IAC9H,MAAM,SAAS,GAAkB,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAC1D,MAAM,KAAK,GAAG,IAAA,uCAA6B,EAAC;QAC1C,IAAI,EAAE,SAAS;QACf,cAAc,EAAE,IAAI,CAAC,cAAc;QACnC,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,cAAc,EAAE,IAAI,CAAC,cAAc;QACnC,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;KACpC,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,IAAA,qCAAsB,EAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,uBAAuB,CAAC,CAAC;IAC/G,MAAM,YAAY,GAAG,IAAA,kCAAmB,EAAC,IAAI,CAAC,cAAc,CAAC,sBAAsB,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE3G,SAAS;IACT,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,IAAA,iCAAkB,EAAC,IAAI,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC;IAE7F,gGAAgG;IAChG,IAAI,eAAe,GAAG,KAAK,CAAC;IAC5B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,IAAA,mCAAoB,EAAC,IAAI,EAAE,wBAAwB,CAAC,CAAC;QACvE,MAAM,GAAG,CAAC,SAAS,CAAC,CAAC;QACrB,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAChC,eAAe,GAAG,IAAA,6BAAc,EAAC,SAAS,CAAC,CAAC;QAC5C,eAAe,GAAG,IAAI,CAAC;IACzB,CAAC;IAED,OAAO;QACL,MAAM;QACN,OAAO;QACP,QAAQ,EAAE,QAAQ,CAAC,WAAW;QAC9B,eAAe;QACf,YAAY;QACZ,qBAAqB,EAAE,eAAe;KACvC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAC/B,KAAsB,EACtB,IAAyB,EACzB,IAA2B;IAE3B,MAAM,SAAS,GAAG,uBAAuB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACvD,MAAM,YAAY,GAAoB,EAAE,GAAG,KAAK,EAAE,OAAO,EAAE,SAAS,CAAC,MAAM,EAAE,CAAC;IAC9E,OAAO,gBAAgB,CACrB;QACE,MAAM,EAAE,YAAY;QACpB,SAAS,EAAE,aAAa,KAAK,CAAC,SAAS,EAAE;QACzC,SAAS;QACT,mBAAmB,EAAE,KAAK,CAAC,OAAO,CAAC,MAAM;QACzC,qBAAqB,EAAE,KAAK,CAAC,KAAK;QAClC,aAAa,EAAE,GAAG,EAAE,CAClB,IAAA,wCAA8B,EAAC;YAC7B,KAAK,EAAE,YAAY;YACnB,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;SACpC,CAAC;QACJ,WAAW,EAAE,qCAA6B;QAC1C,QAAQ,EAAE,kCAAkC;QAC5C,UAAU,EAAE,KAAK,CAAC,SAAS;KAC5B,EACD,IAAI,EACJ,IAAI,CACL,CAAC;AACJ,CAAC;AAED,SAAS,uBAAuB,CAAC,KAAsB,EAAE,IAAyB;IAChF,MAAM,UAAU,GAAoB,EAAE,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAC9D,MAAM,KAAK,GAAG,IAAA,wCAA8B,EAAC;QAC3C,KAAK,EAAE,UAAU;QACjB,cAAc,EAAE,IAAI,CAAC,cAAc;QACnC,eAAe,EAAE,IAAI,CAAC,eAAe;QACrC,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,cAAc,EAAE,IAAI,CAAC,cAAc;QACnC,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;KACpC,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,IAAA,qCAAsB,EAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,uBAAuB,CAAC,CAAC;IAC/G,MAAM,YAAY,GAAG,IAAA,kCAAmB,EAAC,IAAI,CAAC,cAAc,CAAC,sBAAsB,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE3G,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,IAAA,iCAAkB,EAAC,KAAK,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC;IAE9F,IAAI,eAAe,GAAG,KAAK,CAAC;IAC5B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAA,mCAAoB,EAAC,IAAI,EAAE,wBAAwB,CAAC,CAAC;QACvE,MAAM,GAAG,CAAC,SAAS,CAAC,CAAC;QACrB,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACjC,eAAe,GAAG,IAAA,6BAAc,EAAC,SAAS,CAAC,CAAC;QAC5C,eAAe,GAAG,IAAI,CAAC;IACzB,CAAC;IAED,OAAO;QACL,MAAM;QACN,OAAO;QACP,QAAQ,EAAE,QAAQ,CAAC,WAAW;QAC9B,eAAe;QACf,YAAY;QACZ,qBAAqB,EAAE,eAAe;KACvC,CAAC;AACJ,CAAC;AAED,SAAS,2BAA2B,CAClC,OAAwB,EACxB,OAA6B,EAC7B,KAIC;IAED,OAAO;QACL,qBAAqB,EAAE,CAAC;QACxB,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;QAC1C,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,EAAE;QAClC,WAAW,EAAE;YACX,UAAU,EAAE,OAAO,CAAC,MAAM;YAC1B,sBAAsB,EAAE,KAAK,CAAC,sBAAsB;YACpD,sBAAsB,EAAE,KAAK,CAAC,sBAAsB;YACpD,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS;SAChD;KACF,CAAC;AACJ,CAAC;AAED,kHAAkH;AAClH,MAAM,8BAA8B,GAAG,GAAG,CAAC;AAE3C,KAAK,UAAU,YAAY,CACzB,IAAyB,EACzB,OAA6B,EAC7B,IAA2B,EAC3B,KAIC;IAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,OAAO,GAAoB;YAC/B,OAAO,EACL,kEAAkE;YACpE,aAAa,EAAE,EAAE;YACjB,gBAAgB,EAAE,EAAE;YACpB,SAAS,EAAE,EAAE;YACb,qBAAqB,EAAE,CAAC;YACxB,SAAS,EAAE,KAAK;YAChB,UAAU,EAAE,EAAE;SACf,CAAC;QACF,IAAI,CAAC,MAAM,CAAC,IAAI,CACd;YACE,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,MAAM,EAAE,YAAY;YACpB,mBAAmB,EAAE,CAAC;YACtB,qBAAqB,EAAE,CAAC;YACxB,mBAAmB,EAAE,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,KAAK,CAAC,CAAC,MAAM;YACvF,sBAAsB,EAAE,IAAI,CAAC,iBAAiB,CAAC,MAAM;YACrD,uBAAuB,EAAE,IAAI,CAAC,kBAAkB,EAAE,MAAM,IAAI,CAAC;YAC7D,gBAAgB,EAAE,IAAI,CAAC,kBAAkB,IAAI,IAAI;YACjD,wBAAwB,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,MAAM;SAChF,EACD,iBAAiB,CAClB,CAAC;QACF,OAAO;YACL,OAAO;YACP,MAAM,EAAE,SAAS;SAClB,CAAC;IACJ,CAAC;IAED,6FAA6F;IAC7F,MAAM,KAAK,GAAG,IAAA,gCAAsB,EAAC;QACnC,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,gBAAgB,EAAE,EAAE;QACpB,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;KACpC,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,IAAA,qCAAsB,EAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,wBAAwB,CAAC,CAAC;IAChH,MAAM,MAAM,GAAG,IAAA,kCAAmB,EAAC,IAAI,CAAC,cAAc,CAAC,sBAAsB,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;IAErG,sDAAsD;IACtD,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvC,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,OAAO,EAAE,EAAE,aAAa,EAAE,CAAC,CAAC,aAAa,EAAE,gBAAgB,EAAE,CAAC,CAAC,gBAAgB,EAAE;KAClF,CAAC,CAAC,CAAC;IACJ,IAAI,SAAS,GAAG,YAAY,CAAC;IAC7B,IAAI,sBAAsB,GAAG,KAAK,CAAC;IACnC,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAC1B,IAAI,aAAa,GAAG,IAAA,6BAAc,EAAC,SAAS,CAAC,CAAC;IAE9C,0CAA0C;IAC1C,IAAI,aAAa,GAAG,MAAM,EAAE,CAAC;QAC3B,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAA,8CAA+B,EAAC,CAAC,EAAE,8BAA8B,CAAC,CAAC,CAAC;QACxG,sBAAsB,GAAG,IAAI,CAAC;QAC9B,aAAa,GAAG,IAAA,6BAAc,EAAC,SAAS,CAAC,CAAC;IAC5C,CAAC;IAED,oFAAoF;IACpF,IAAI,aAAa,GAAG,MAAM,EAAE,CAAC;QAC3B,MAAM,oBAAoB,GAAG,CAAC,GAAG,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACxD,CAAC,CAAC,MAAM,KAAK,WAAW,IAAI,CAAC,CAAC,MAAM,KAAK,WAAW;YAClD,CAAC,CAAC,CAAC,CAAC;YACJ,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,IAAI,CAAC,CAAC,MAAM,KAAK,WAAW;gBACpD,CAAC,CAAC,CAAC;gBACH,CAAC,CAAC,CAAC,CACR,CAAC;QACF,MAAM,IAAI,GAAqB,EAAE,CAAC;QAClC,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,CAAC,IAAI,oBAAoB,EAAE,CAAC;YACrC,MAAM,CAAC,GAAG,IAAA,6BAAc,EAAC,CAAC,CAAC,CAAC;YAC5B,IAAI,IAAI,GAAG,CAAC,GAAG,MAAM,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC1C,iBAAiB,EAAE,CAAC;gBACpB,SAAS;YACX,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACb,IAAI,IAAI,CAAC,CAAC;QACZ,CAAC;QACD,SAAS,GAAG,IAAI,CAAC;QACjB,aAAa,GAAG,IAAI,CAAC;IACvB,CAAC;IAED,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,IAAA,gCAAsB,EAAC;QAC9C,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,gBAAgB,EAAE,SAAS;QAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;KACpC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAiB;QAC7B,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE;QACnC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE;KAChC,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,IAAA,sBAAS,EAC5B;QACE,KAAK,EAAE,IAAI,CAAC,aAAa;QACzB,UAAU,EAAE,IAAI,CAAC,kBAAkB;QACnC,QAAQ;QACR,MAAM,EAAE;YACN,WAAW,EAAE,IAAI,CAAC,cAAc,CAAC,WAAW;YAC5C,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,wBAAwB;SACxD;QACD,OAAO,EAAE;YACP,MAAM,EAAE,uBAAuB;YAC/B,QAAQ,EAAE,yBAAyB;YACnC,SAAS,EAAE,gBAAgB,IAAI,CAAC,cAAc,KAAK,IAAI,CAAC,WAAW,YAAY;YAC/E,eAAe,EAAE,IAAI,CAAC,cAAc,CAAC,EAAE;SACxC;QACD,SAAS,EAAE,qBAAqB,CAAC;YAC/B,IAAI,EAAE,IAAI,CAAC,aAAa;YACxB,WAAW,EAAE,IAAI,CAAC,mBAAmB;YACrC,MAAM,EAAE,uBAAuB;YAC/B,UAAU,EAAE,IAAI,CAAC,WAAW;YAC5B,QAAQ;YACR,cAAc,EAAE;gBACd,cAAc,EAAE,IAAI,CAAC,cAAc;gBACnC,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,QAAQ,EAAE,yBAAyB;gBACnC,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,gCAAuB;aAC/D;SACF,CAAC;QACF,oGAAoG;QACpG,6GAA6G;QAC7G,aAAa,EAAE,CAAC,OAAO,EAAE,EAAE;YACzB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAA,4BAAoB,EAAC,OAAO,EAAE,IAAI,CAAC,CAAC;gBACnD,OAAO;oBACL,GAAG,MAAM;oBACT,cAAc,EAAE,2BAA2B,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC;iBACpE,CAAC;YACJ,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;KACF,EACD,IAAI,CACL,CAAC;IAEF,OAAO;QACL,OAAO,EAAE,IAAA,4BAAoB,EAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,YAAY,CAAC;QAClE,MAAM,EAAE;YACN,mBAAmB,EAAE,QAAQ,CAAC,WAAW;YACzC,sBAAsB,EAAE,aAAa;YACrC,sBAAsB;YACtB,iBAAiB;SAClB;KACF,CAAC;AACJ,CAAC;AAID,yEAAyE;AACzE,0GAA0G;AAC1G,0GAA0G;AAC1G,6GAA6G;AAC7G,qHAAqH;AACrH,SAAgB,qBAAqB,CAAC,KAOrC;IACC,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW;QAAE,OAAO,SAAS,CAAC;IACxD,OAAO;QACL,EAAE,EAAE,KAAK,CAAC,WAAW;QACrB,SAAS,EAAE,KAAK,CAAC,IAAI,CAAC,SAAS;QAC/B,MAAM,EAAE,KAAK,CAAC,MAAM;QACpB,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,QAAQ;QAC7B,eAAe,EAAE,KAAK,CAAC,IAAI,CAAC,eAAe;QAC3C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO;QAC3B,QAAQ,EAAE,IAAI;QACd,UAAU,EAAE,IAAI;QAChB,cAAc,EAAE,EAAE,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE;QAC5C,cAAc,EAAE,KAAK,CAAC,cAAc;QACpC,cAAc,EAAE,IAAI;QACpB,cAAc,EAAE,KAAK,CAAC,IAAI,CAAC,cAAc,IAAI,IAAI;QACjD,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI;QAC3C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC;QAChC,UAAU,EAAE,KAAK,CAAC,UAAU;KAC7B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export declare const errorPatternAnalysisConfigSchema: z.ZodObject<{
|
|
3
|
+
maxInputTokensPerBatch: z.ZodDefault<z.ZodNumber>;
|
|
4
|
+
maxAnalysisOutputTokens: z.ZodDefault<z.ZodNumber>;
|
|
5
|
+
maxSummarizeOutputTokens: z.ZodDefault<z.ZodNumber>;
|
|
6
|
+
maxGenerationOutputTokens: z.ZodDefault<z.ZodNumber>;
|
|
7
|
+
temperature: z.ZodDefault<z.ZodNumber>;
|
|
8
|
+
topConfusionPairs: z.ZodDefault<z.ZodNumber>;
|
|
9
|
+
maxSamplesPerConfusionPair: z.ZodDefault<z.ZodNumber>;
|
|
10
|
+
maxRegressionSamples: z.ZodDefault<z.ZodNumber>;
|
|
11
|
+
initialSamplingRounds: z.ZodDefault<z.ZodNumber>;
|
|
12
|
+
initialSamplesPerRound: z.ZodDefault<z.ZodNumber>;
|
|
13
|
+
}, z.core.$strip>;
|
|
14
|
+
export type ErrorPatternAnalysisConfig = z.infer<typeof errorPatternAnalysisConfigSchema>;
|
|
15
|
+
export declare const DEFAULT_ERROR_PATTERN_ANALYSIS_CONFIG: ErrorPatternAnalysisConfig;
|
|
16
|
+
//# sourceMappingURL=config.schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.schema.d.ts","sourceRoot":"","sources":["../../src/error-pattern-analysis/config.schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,gCAAgC;;;;;;;;;;;iBAmB3C,CAAC;AAEH,MAAM,MAAM,0BAA0B,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,gCAAgC,CAAC,CAAC;AAE1F,eAAO,MAAM,qCAAqC,EAAE,0BACR,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DEFAULT_ERROR_PATTERN_ANALYSIS_CONFIG = exports.errorPatternAnalysisConfigSchema = void 0;
|
|
4
|
+
const zod_1 = require("zod");
|
|
5
|
+
exports.errorPatternAnalysisConfigSchema = zod_1.z.object({
|
|
6
|
+
// Maximum estimated input tokens a single batch can absorb — over this triggers batch splitting
|
|
7
|
+
maxInputTokensPerBatch: zod_1.z.number().int().positive().default(60_000),
|
|
8
|
+
// Output token caps at each analyze stage
|
|
9
|
+
maxAnalysisOutputTokens: zod_1.z.number().int().positive().default(4096),
|
|
10
|
+
maxSummarizeOutputTokens: zod_1.z.number().int().positive().default(4096),
|
|
11
|
+
maxGenerationOutputTokens: zod_1.z.number().int().positive().default(8192),
|
|
12
|
+
temperature: zod_1.z.number().min(0).max(2).default(0.3),
|
|
13
|
+
// Take TOP N confusion pairs
|
|
14
|
+
topConfusionPairs: zod_1.z.number().int().positive().default(5),
|
|
15
|
+
// Maximum number of samples per confusion pair sent to the LLM
|
|
16
|
+
maxSamplesPerConfusionPair: zod_1.z.number().int().positive().default(8),
|
|
17
|
+
// Maximum number of regression samples sent to the LLM (truncated by latest if more)
|
|
18
|
+
maxRegressionSamples: zod_1.z.number().int().positive().default(20),
|
|
19
|
+
// from_dataset_only start: how many samples the analysis LLM uses to generate the initial prompt
|
|
20
|
+
// — initialSamplingRounds: how many independent sampling rounds are combined into observations
|
|
21
|
+
initialSamplingRounds: zod_1.z.number().int().min(1).max(10).default(1),
|
|
22
|
+
// — how many records to randomly sample from the dataset per round
|
|
23
|
+
initialSamplesPerRound: zod_1.z.number().int().min(1).max(200).default(20),
|
|
24
|
+
});
|
|
25
|
+
exports.DEFAULT_ERROR_PATTERN_ANALYSIS_CONFIG = exports.errorPatternAnalysisConfigSchema.parse({});
|
|
26
|
+
//# sourceMappingURL=config.schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.schema.js","sourceRoot":"","sources":["../../src/error-pattern-analysis/config.schema.ts"],"names":[],"mappings":";;;AAAA,6BAAwB;AAEX,QAAA,gCAAgC,GAAG,OAAC,CAAC,MAAM,CAAC;IACvD,gGAAgG;IAChG,sBAAsB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC;IACnE,0CAA0C;IAC1C,uBAAuB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;IAClE,wBAAwB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;IACnE,yBAAyB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;IACpE,WAAW,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;IAClD,6BAA6B;IAC7B,iBAAiB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACzD,+DAA+D;IAC/D,0BAA0B,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IAClE,qFAAqF;IACrF,oBAAoB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;IAC7D,iGAAiG;IACjG,+FAA+F;IAC/F,qBAAqB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IACjE,mEAAmE;IACnE,sBAAsB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;CACrE,CAAC,CAAC;AAIU,QAAA,qCAAqC,GAChD,wCAAgC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { FieldWhitelist, RunResultRecord, SampleRecord } from '../loop/types';
|
|
2
|
+
export interface SampleView {
|
|
3
|
+
sampleId: string;
|
|
4
|
+
inputForAnalysis: Record<string, unknown>;
|
|
5
|
+
expected: string | null;
|
|
6
|
+
predicted: string | null;
|
|
7
|
+
errorMessage?: string | null;
|
|
8
|
+
}
|
|
9
|
+
export interface ConfusionPair {
|
|
10
|
+
expected: string;
|
|
11
|
+
predicted: string;
|
|
12
|
+
count: number;
|
|
13
|
+
sampleIds: string[];
|
|
14
|
+
samples: SampleView[];
|
|
15
|
+
}
|
|
16
|
+
export interface RegressionGroup {
|
|
17
|
+
predicted: string;
|
|
18
|
+
count: number;
|
|
19
|
+
samples: SampleView[];
|
|
20
|
+
}
|
|
21
|
+
export interface BuildConfusionPairsArgs {
|
|
22
|
+
runResults: RunResultRecord[];
|
|
23
|
+
samples: SampleRecord[];
|
|
24
|
+
whitelist: FieldWhitelist;
|
|
25
|
+
topN: number;
|
|
26
|
+
maxSamplesPerPair: number;
|
|
27
|
+
}
|
|
28
|
+
export declare function buildConfusionPairs(args: BuildConfusionPairsArgs): ConfusionPair[];
|
|
29
|
+
export interface BuildRegressionGroupsArgs {
|
|
30
|
+
currentRunResults: RunResultRecord[];
|
|
31
|
+
previousRunResults: RunResultRecord[] | null;
|
|
32
|
+
samples: SampleRecord[];
|
|
33
|
+
whitelist: FieldWhitelist;
|
|
34
|
+
maxSamples: number;
|
|
35
|
+
}
|
|
36
|
+
export declare function buildRegressionGroups(args: BuildRegressionGroupsArgs): RegressionGroup[];
|
|
37
|
+
//# sourceMappingURL=confusion-pairs.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"confusion-pairs.d.ts","sourceRoot":"","sources":["../../src/error-pattern-analysis/confusion-pairs.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAEnF,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IAEjB,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC9B;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,EAAE,CAAC;IAEpB,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAE9B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB;AA+CD,MAAM,WAAW,uBAAuB;IACtC,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,SAAS,EAAE,cAAc,CAAC;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,uBAAuB,GAAG,aAAa,EAAE,CA2BlF;AAED,MAAM,WAAW,yBAAyB;IACxC,iBAAiB,EAAE,eAAe,EAAE,CAAC;IACrC,kBAAkB,EAAE,eAAe,EAAE,GAAG,IAAI,CAAC;IAC7C,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,SAAS,EAAE,cAAc,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,yBAAyB,GAAG,eAAe,EAAE,CA+BxF"}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.buildConfusionPairs = buildConfusionPairs;
|
|
4
|
+
exports.buildRegressionGroups = buildRegressionGroups;
|
|
5
|
+
function projectInput(sample, whitelist) {
|
|
6
|
+
const allowed = new Set([
|
|
7
|
+
...whitelist.promptVariables,
|
|
8
|
+
...(whitelist.analysisOnlyFields ?? []),
|
|
9
|
+
]);
|
|
10
|
+
// No fields configured → return as-is (defensive fallback)
|
|
11
|
+
if (allowed.size === 0)
|
|
12
|
+
return sample.input;
|
|
13
|
+
const projected = {};
|
|
14
|
+
for (const field of allowed) {
|
|
15
|
+
if (field in sample.input)
|
|
16
|
+
projected[field] = sample.input[field];
|
|
17
|
+
}
|
|
18
|
+
return projected;
|
|
19
|
+
}
|
|
20
|
+
function asLabel(value) {
|
|
21
|
+
if (value == null)
|
|
22
|
+
return null;
|
|
23
|
+
if (typeof value === 'string')
|
|
24
|
+
return value;
|
|
25
|
+
if (typeof value === 'number' || typeof value === 'boolean')
|
|
26
|
+
return String(value);
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
function buildSampleView(sample, rr, whitelist) {
|
|
30
|
+
return {
|
|
31
|
+
sampleId: sample.id,
|
|
32
|
+
inputForAnalysis: projectInput(sample, whitelist),
|
|
33
|
+
expected: asLabel(sample.expected),
|
|
34
|
+
predicted: rr.decisionOutput ?? asLabel(rr.parsedOutput),
|
|
35
|
+
errorMessage: rr.errorMessage ?? null,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
function indexById(items) {
|
|
39
|
+
return new Map(items.map((item) => [item.id, item]));
|
|
40
|
+
}
|
|
41
|
+
function indexResultsBySampleId(runResults) {
|
|
42
|
+
const m = new Map();
|
|
43
|
+
for (const rr of runResults)
|
|
44
|
+
m.set(rr.sampleId, rr);
|
|
45
|
+
return m;
|
|
46
|
+
}
|
|
47
|
+
function buildConfusionPairs(args) {
|
|
48
|
+
const sampleById = indexById(args.samples);
|
|
49
|
+
const pairs = new Map();
|
|
50
|
+
for (const rr of args.runResults) {
|
|
51
|
+
const sample = sampleById.get(rr.sampleId);
|
|
52
|
+
if (!sample)
|
|
53
|
+
continue;
|
|
54
|
+
// Only consider judgable errors (has expected + has predicted + isCorrect===false)
|
|
55
|
+
const expected = asLabel(sample.expected);
|
|
56
|
+
const predicted = rr.decisionOutput ?? asLabel(rr.parsedOutput);
|
|
57
|
+
if (rr.isCorrect !== false)
|
|
58
|
+
continue;
|
|
59
|
+
if (expected == null || predicted == null)
|
|
60
|
+
continue;
|
|
61
|
+
const key = `${expected}→${predicted}`;
|
|
62
|
+
let pair = pairs.get(key);
|
|
63
|
+
if (!pair) {
|
|
64
|
+
pair = { expected, predicted, count: 0, sampleIds: [], samples: [] };
|
|
65
|
+
pairs.set(key, pair);
|
|
66
|
+
}
|
|
67
|
+
pair.count++;
|
|
68
|
+
pair.sampleIds.push(sample.id);
|
|
69
|
+
if (pair.samples.length < args.maxSamplesPerPair) {
|
|
70
|
+
pair.samples.push(buildSampleView(sample, rr, args.whitelist));
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return [...pairs.values()].sort((a, b) => b.count - a.count).slice(0, args.topN);
|
|
74
|
+
}
|
|
75
|
+
function buildRegressionGroups(args) {
|
|
76
|
+
if (!args.previousRunResults || args.previousRunResults.length === 0)
|
|
77
|
+
return [];
|
|
78
|
+
const sampleById = indexById(args.samples);
|
|
79
|
+
const prevBySample = indexResultsBySampleId(args.previousRunResults);
|
|
80
|
+
const regressionViews = [];
|
|
81
|
+
for (const curr of args.currentRunResults) {
|
|
82
|
+
if (curr.isCorrect !== false)
|
|
83
|
+
continue;
|
|
84
|
+
const prev = prevBySample.get(curr.sampleId);
|
|
85
|
+
if (!prev || prev.isCorrect !== true)
|
|
86
|
+
continue;
|
|
87
|
+
const sample = sampleById.get(curr.sampleId);
|
|
88
|
+
if (!sample)
|
|
89
|
+
continue;
|
|
90
|
+
regressionViews.push(buildSampleView(sample, curr, args.whitelist));
|
|
91
|
+
if (regressionViews.length >= args.maxSamples)
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
if (regressionViews.length === 0)
|
|
95
|
+
return [];
|
|
96
|
+
const groups = new Map();
|
|
97
|
+
for (const view of regressionViews) {
|
|
98
|
+
const key = view.predicted ?? '__unknown__';
|
|
99
|
+
let g = groups.get(key);
|
|
100
|
+
if (!g) {
|
|
101
|
+
g = { predicted: key, count: 0, samples: [] };
|
|
102
|
+
groups.set(key, g);
|
|
103
|
+
}
|
|
104
|
+
g.count++;
|
|
105
|
+
g.samples.push(view);
|
|
106
|
+
}
|
|
107
|
+
return [...groups.values()].sort((a, b) => b.count - a.count);
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=confusion-pairs.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"confusion-pairs.js","sourceRoot":"","sources":["../../src/error-pattern-analysis/confusion-pairs.ts"],"names":[],"mappings":";;AAiFA,kDA2BC;AAUD,sDA+BC;AAzHD,SAAS,YAAY,CAAC,MAAoB,EAAE,SAAyB;IACnE,MAAM,OAAO,GAAG,IAAI,GAAG,CAAS;QAC9B,GAAG,SAAS,CAAC,eAAe;QAC5B,GAAG,CAAC,SAAS,CAAC,kBAAkB,IAAI,EAAE,CAAC;KACxC,CAAC,CAAC;IACH,2DAA2D;IAC3D,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC;IAC5C,MAAM,SAAS,GAA4B,EAAE,CAAC;IAC9C,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,KAAK,IAAI,MAAM,CAAC,KAAK;YAAE,SAAS,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,OAAO,CAAC,KAAc;IAC7B,IAAI,KAAK,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IAC/B,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,SAAS;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IAClF,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,eAAe,CACtB,MAAoB,EACpB,EAAmB,EACnB,SAAyB;IAEzB,OAAO;QACL,QAAQ,EAAE,MAAM,CAAC,EAAE;QACnB,gBAAgB,EAAE,YAAY,CAAC,MAAM,EAAE,SAAS,CAAC;QACjD,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC;QAClC,SAAS,EAAE,EAAE,CAAC,cAAc,IAAI,OAAO,CAAC,EAAE,CAAC,YAAY,CAAC;QACxD,YAAY,EAAE,EAAE,CAAC,YAAY,IAAI,IAAI;KACtC,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAA2B,KAAU;IACrD,OAAO,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC;AACvD,CAAC;AAED,SAAS,sBAAsB,CAAC,UAA6B;IAC3D,MAAM,CAAC,GAAG,IAAI,GAAG,EAA2B,CAAC;IAC7C,KAAK,MAAM,EAAE,IAAI,UAAU;QAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACpD,OAAO,CAAC,CAAC;AACX,CAAC;AAUD,SAAgB,mBAAmB,CAAC,IAA6B;IAC/D,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAyB,CAAC;IAE/C,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QACjC,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,mFAAmF;QACnF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,SAAS,GAAG,EAAE,CAAC,cAAc,IAAI,OAAO,CAAC,EAAE,CAAC,YAAY,CAAC,CAAC;QAChE,IAAI,EAAE,CAAC,SAAS,KAAK,KAAK;YAAE,SAAS;QACrC,IAAI,QAAQ,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI;YAAE,SAAS;QAEpD,MAAM,GAAG,GAAG,GAAG,QAAQ,IAAI,SAAS,EAAE,CAAC;QACvC,IAAI,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,IAAI,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;YACrE,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QACvB,CAAC;QACD,IAAI,CAAC,KAAK,EAAE,CAAC;QACb,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAC/B,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACjD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,EAAE,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;AACnF,CAAC;AAUD,SAAgB,qBAAqB,CAAC,IAA+B;IACnE,IAAI,CAAC,IAAI,CAAC,kBAAkB,IAAI,IAAI,CAAC,kBAAkB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAChF,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,YAAY,GAAG,sBAAsB,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IAErE,MAAM,eAAe,GAAiB,EAAE,CAAC;IACzC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAC1C,IAAI,IAAI,CAAC,SAAS,KAAK,KAAK;YAAE,SAAS;QACvC,MAAM,IAAI,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI;YAAE,SAAS;QAC/C,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,eAAe,CAAC,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;QACpE,IAAI,eAAe,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU;YAAE,MAAM;IACvD,CAAC;IAED,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE5C,MAAM,MAAM,GAAG,IAAI,GAAG,EAA2B,CAAC;IAClD,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,IAAI,aAAa,CAAC;QAC5C,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACxB,IAAI,CAAC,CAAC,EAAE,CAAC;YACP,CAAC,GAAG,EAAE,SAAS,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;YAC9C,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACrB,CAAC;QACD,CAAC,CAAC,KAAK,EAAE,CAAC;QACV,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACvB,CAAC;IAED,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AAChE,CAAC"}
|