@proofhound/optimization-strategy 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE +190 -0
  2. package/dist/error-pattern-analysis/analysis-types.d.ts +45 -0
  3. package/dist/error-pattern-analysis/analysis-types.d.ts.map +1 -0
  4. package/dist/error-pattern-analysis/analysis-types.js +3 -0
  5. package/dist/error-pattern-analysis/analysis-types.js.map +1 -0
  6. package/dist/error-pattern-analysis/analyze.d.ts +81 -0
  7. package/dist/error-pattern-analysis/analyze.d.ts.map +1 -0
  8. package/dist/error-pattern-analysis/analyze.js +423 -0
  9. package/dist/error-pattern-analysis/analyze.js.map +1 -0
  10. package/dist/error-pattern-analysis/config.schema.d.ts +16 -0
  11. package/dist/error-pattern-analysis/config.schema.d.ts.map +1 -0
  12. package/dist/error-pattern-analysis/config.schema.js +26 -0
  13. package/dist/error-pattern-analysis/config.schema.js.map +1 -0
  14. package/dist/error-pattern-analysis/confusion-pairs.d.ts +37 -0
  15. package/dist/error-pattern-analysis/confusion-pairs.d.ts.map +1 -0
  16. package/dist/error-pattern-analysis/confusion-pairs.js +109 -0
  17. package/dist/error-pattern-analysis/confusion-pairs.js.map +1 -0
  18. package/dist/error-pattern-analysis/generate-initial.d.ts +36 -0
  19. package/dist/error-pattern-analysis/generate-initial.d.ts.map +1 -0
  20. package/dist/error-pattern-analysis/generate-initial.js +261 -0
  21. package/dist/error-pattern-analysis/generate-initial.js.map +1 -0
  22. package/dist/error-pattern-analysis/generate.d.ts +57 -0
  23. package/dist/error-pattern-analysis/generate.d.ts.map +1 -0
  24. package/dist/error-pattern-analysis/generate.js +369 -0
  25. package/dist/error-pattern-analysis/generate.js.map +1 -0
  26. package/dist/error-pattern-analysis/index.d.ts +8 -0
  27. package/dist/error-pattern-analysis/index.d.ts.map +1 -0
  28. package/dist/error-pattern-analysis/index.js +29 -0
  29. package/dist/error-pattern-analysis/index.js.map +1 -0
  30. package/dist/error-pattern-analysis/parse.d.ts +92 -0
  31. package/dist/error-pattern-analysis/parse.d.ts.map +1 -0
  32. package/dist/error-pattern-analysis/parse.js +456 -0
  33. package/dist/error-pattern-analysis/parse.js.map +1 -0
  34. package/dist/error-pattern-analysis/prompts/analyze-confusion.system.en-US.md +50 -0
  35. package/dist/error-pattern-analysis/prompts/analyze-confusion.system.md +61 -0
  36. package/dist/error-pattern-analysis/prompts/analyze-regression.system.en-US.md +50 -0
  37. package/dist/error-pattern-analysis/prompts/analyze-regression.system.md +61 -0
  38. package/dist/error-pattern-analysis/prompts/generate-initial.system.en-US.md +43 -0
  39. package/dist/error-pattern-analysis/prompts/generate-initial.system.md +49 -0
  40. package/dist/error-pattern-analysis/prompts/generate.system.en-US.md +53 -0
  41. package/dist/error-pattern-analysis/prompts/generate.system.md +68 -0
  42. package/dist/error-pattern-analysis/prompts/loader.d.ts +46 -0
  43. package/dist/error-pattern-analysis/prompts/loader.d.ts.map +1 -0
  44. package/dist/error-pattern-analysis/prompts/loader.js +109 -0
  45. package/dist/error-pattern-analysis/prompts/loader.js.map +1 -0
  46. package/dist/error-pattern-analysis/prompts/optimization-tips.en-US.md +25 -0
  47. package/dist/error-pattern-analysis/prompts/optimization-tips.md +38 -0
  48. package/dist/error-pattern-analysis/prompts/summarize.system.en-US.md +48 -0
  49. package/dist/error-pattern-analysis/prompts/summarize.system.md +69 -0
  50. package/dist/error-pattern-analysis/prompts.d.ts +79 -0
  51. package/dist/error-pattern-analysis/prompts.d.ts.map +1 -0
  52. package/dist/error-pattern-analysis/prompts.js +659 -0
  53. package/dist/error-pattern-analysis/prompts.js.map +1 -0
  54. package/dist/error-pattern-analysis/token-budget.d.ts +20 -0
  55. package/dist/error-pattern-analysis/token-budget.d.ts.map +1 -0
  56. package/dist/error-pattern-analysis/token-budget.js +88 -0
  57. package/dist/error-pattern-analysis/token-budget.js.map +1 -0
  58. package/dist/index.d.ts +9 -0
  59. package/dist/index.d.ts.map +1 -0
  60. package/dist/index.js +27 -0
  61. package/dist/index.js.map +1 -0
  62. package/dist/loop/best.d.ts +3 -0
  63. package/dist/loop/best.d.ts.map +1 -0
  64. package/dist/loop/best.js +43 -0
  65. package/dist/loop/best.js.map +1 -0
  66. package/dist/loop/goals.d.ts +6 -0
  67. package/dist/loop/goals.d.ts.map +1 -0
  68. package/dist/loop/goals.js +38 -0
  69. package/dist/loop/goals.js.map +1 -0
  70. package/dist/loop/round-outcome.d.ts +14 -0
  71. package/dist/loop/round-outcome.d.ts.map +1 -0
  72. package/dist/loop/round-outcome.js +18 -0
  73. package/dist/loop/round-outcome.js.map +1 -0
  74. package/dist/loop/run-iteration-loop.d.ts +5 -0
  75. package/dist/loop/run-iteration-loop.d.ts.map +1 -0
  76. package/dist/loop/run-iteration-loop.js +247 -0
  77. package/dist/loop/run-iteration-loop.js.map +1 -0
  78. package/dist/loop/types.d.ts +190 -0
  79. package/dist/loop/types.d.ts.map +1 -0
  80. package/dist/loop/types.js +13 -0
  81. package/dist/loop/types.js.map +1 -0
  82. package/dist/registry.d.ts +5 -0
  83. package/dist/registry.d.ts.map +1 -0
  84. package/dist/registry.js +19 -0
  85. package/dist/registry.js.map +1 -0
  86. package/dist/types.d.ts +10 -0
  87. package/dist/types.d.ts.map +1 -0
  88. package/dist/types.js +3 -0
  89. package/dist/types.js.map +1 -0
  90. package/package.json +52 -0
@@ -0,0 +1,423 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.analyzeFailures = analyzeFailures;
4
+ exports.buildRunResultForCall = buildRunResultForCall;
5
+ // Error-sample analysis — confusion pairs + regression + multiple LLM calls + a second LLM summary
6
+ // Token budget: before each LLM call, use estimateMessagesTokens to estimate the baseline; degrade when exceeding maxInputTokensPerBatch
7
+ // (fitSamplesToBudget → field truncation → reduce batch count)
8
+ const llm_client_1 = require("@proofhound/llm-client");
9
+ const confusion_pairs_1 = require("./confusion-pairs");
10
+ const prompts_1 = require("./prompts");
11
+ const parse_1 = require("./parse");
12
+ const token_budget_1 = require("./token-budget");
13
+ const shared_1 = require("@proofhound/shared");
14
+ async function analyzeFailures(args, deps) {
15
+ const promptLanguage = args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE;
16
+ // Cross-round history token-budget degradation — shared one fitted history across all batches to avoid repeated estimates
17
+ // History takes at most 40% of the batch input budget; the rest goes to error samples + evidence
18
+ const historyCap = Math.floor(args.strategyConfig.maxInputTokensPerBatch * 0.4);
19
+ const fittedHistoryResult = (0, prompts_1.fitRoundHistoryToBudget)(args.roundHistory, historyCap, args.goals, promptLanguage);
20
+ const argsWithFittedHistory = {
21
+ ...args,
22
+ promptLanguage,
23
+ roundHistory: fittedHistoryResult.fitted,
24
+ };
25
+ const confusionPairs = (0, confusion_pairs_1.buildConfusionPairs)({
26
+ runResults: args.currentRunResults,
27
+ samples: args.samples,
28
+ whitelist: args.fieldWhitelist,
29
+ topN: args.strategyConfig.topConfusionPairs,
30
+ maxSamplesPerPair: args.strategyConfig.maxSamplesPerConfusionPair,
31
+ });
32
+ const regressionGroups = (0, confusion_pairs_1.buildRegressionGroups)({
33
+ currentRunResults: args.currentRunResults,
34
+ previousRunResults: args.previousRunResults,
35
+ samples: args.samples,
36
+ whitelist: args.fieldWhitelist,
37
+ maxSamples: args.strategyConfig.maxRegressionSamples,
38
+ });
39
+ const totalConfusionFailures = confusionPairs.reduce((sum, p) => sum + p.count, 0);
40
+ const totalRegressionSamples = regressionGroups.reduce((sum, g) => sum + g.count, 0);
41
+ const batches = [];
42
+ let anyTruncated = false;
43
+ for (const pair of confusionPairs) {
44
+ const batch = await runConfusionBatch(pair, argsWithFittedHistory, deps);
45
+ batches.push(batch);
46
+ if (batch.llmTruncated)
47
+ anyTruncated = true;
48
+ }
49
+ for (const group of regressionGroups) {
50
+ const batch = await runRegressionBatch(group, argsWithFittedHistory, deps);
51
+ batches.push(batch);
52
+ if (batch.llmTruncated)
53
+ anyTruncated = true;
54
+ }
55
+ const { summary, budget: summarizeBudget } = await runSummarize(argsWithFittedHistory, batches, deps, {
56
+ totalConfusionFailures,
57
+ totalRegressionSamples,
58
+ truncated: anyTruncated,
59
+ });
60
+ if (summary.truncated)
61
+ anyTruncated = true;
62
+ const evidenceBundle = buildAnalysisEvidenceBundle(summary, batches, {
63
+ totalConfusionFailures,
64
+ totalRegressionSamples,
65
+ truncated: anyTruncated,
66
+ });
67
+ return {
68
+ errorAnalysisText: summary.summary,
69
+ summary,
70
+ evidenceBundle,
71
+ batches,
72
+ confusionPairs,
73
+ regressionGroups,
74
+ truncated: anyTruncated,
75
+ totalConfusionFailures,
76
+ totalRegressionSamples,
77
+ summarizeBudget,
78
+ };
79
+ }
80
+ // Field truncation threshold in the extreme case where "a single sample exceeds the budget" (chars, derived by 4 chars/token)
81
+ const PER_FIELD_TRUNCATE_CHARS = 2_000;
82
+ async function runAnalysisBatch(spec, args, deps) {
83
+ const { system, user } = spec.buildMessages();
84
+ const result = await (0, llm_client_1.invokeLLM)({
85
+ model: args.analysisModel,
86
+ limiterKey: args.analysisLimiterKey,
87
+ messages: [
88
+ { role: 'system', content: system },
89
+ { role: 'user', content: user },
90
+ ],
91
+ params: {
92
+ temperature: args.strategyConfig.temperature,
93
+ maxTokens: args.strategyConfig.maxAnalysisOutputTokens,
94
+ },
95
+ context: {
96
+ source: 'optimization_analysis',
97
+ stepName: spec.stepName,
98
+ requestId: `optimization:${args.optimizationId}:r${args.roundNumber}:${spec.source}:${spec.requestKey}`,
99
+ promptVersionId: args.currentVersion.id,
100
+ promptLanguage: args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE,
101
+ },
102
+ }, deps);
103
+ const parsed = spec.parseOutput(result.content, result.finishReason);
104
+ const { errorPatterns, suggestedChanges } = (0, parse_1.normalizeEvidenceBundle)({ errorPatterns: parsed.errorPatterns, suggestedChanges: parsed.suggestedChanges }, {
105
+ source: spec.source,
106
+ bucketKey: spec.bucketKey,
107
+ affectedCountFallback: spec.affectedCountFallback,
108
+ });
109
+ return {
110
+ source: spec.source,
111
+ title: `${spec.source}: ${spec.bucketKey}`,
112
+ llmTruncated: parsed.truncated,
113
+ errorPatterns,
114
+ suggestedChanges,
115
+ rawContent: parsed.rawContent,
116
+ budget: {
117
+ baselineInputTokens: spec.fitResult.baseline,
118
+ sampleBudgetTokens: spec.fitResult.sampleBudget,
119
+ estimatedSampleTokens: spec.fitResult.estimatedSampleTokens,
120
+ originalSampleCount: spec.originalSampleCount,
121
+ fittedSampleCount: spec.fitResult.fitted.length,
122
+ droppedSampleCount: spec.fitResult.dropped.length,
123
+ fieldsTruncated: spec.fitResult.fieldsTruncated,
124
+ },
125
+ };
126
+ }
127
+ async function runConfusionBatch(pair, args, deps) {
128
+ const fitResult = fitSamplesForConfusion(pair, args);
129
+ const trimmedPair = { ...pair, samples: fitResult.fitted };
130
+ return runAnalysisBatch({
131
+ source: 'confusion',
132
+ bucketKey: `${pair.expected}→${pair.predicted}`,
133
+ fitResult,
134
+ originalSampleCount: pair.samples.length,
135
+ affectedCountFallback: pair.count,
136
+ buildMessages: () => (0, prompts_1.buildAnalyzeConfusionMessages)({
137
+ pair: trimmedPair,
138
+ currentVersion: args.currentVersion,
139
+ metrics: args.metrics,
140
+ goals: args.goals,
141
+ fieldWhitelist: args.fieldWhitelist,
142
+ roundHistory: args.roundHistory,
143
+ promptLanguage: args.promptLanguage,
144
+ }),
145
+ parseOutput: parse_1.parseConfusionAnalysisOutput,
146
+ stepName: 'error_pattern_analyze_confusion',
147
+ requestKey: `${pair.expected}_to_${pair.predicted}`,
148
+ }, args, deps);
149
+ }
150
+ function fitSamplesForConfusion(pair, args) {
151
+ // 1) Probe: clear samples and construct one message to estimate the fixed overhead (including the fitted cross-round history)
152
+ const probePair = { ...pair, samples: [] };
153
+ const probe = (0, prompts_1.buildAnalyzeConfusionMessages)({
154
+ pair: probePair,
155
+ currentVersion: args.currentVersion,
156
+ metrics: args.metrics,
157
+ goals: args.goals,
158
+ fieldWhitelist: args.fieldWhitelist,
159
+ roundHistory: args.roundHistory,
160
+ promptLanguage: args.promptLanguage,
161
+ });
162
+ const baseline = (0, token_budget_1.estimateMessagesTokens)(probe.system, probe.user, args.strategyConfig.maxAnalysisOutputTokens);
163
+ const sampleBudget = (0, token_budget_1.computeSampleBudget)(args.strategyConfig.maxInputTokensPerBatch, baseline.inputTokens);
164
+ // 2) fit
165
+ let { fitted, dropped, estimatedTokens } = (0, token_budget_1.fitSamplesToBudget)(pair.samples, sampleBudget, 0);
166
+ // 3) Nothing fits but samples do exist → force-fit 1 sample and brute-force-truncate its fields
167
+ let fieldsTruncated = false;
168
+ if (fitted.length === 0 && pair.samples.length > 0) {
169
+ const head = pair.samples[0];
170
+ const truncated = (0, token_budget_1.truncateStringFields)(head, PER_FIELD_TRUNCATE_CHARS);
171
+ fitted = [truncated];
172
+ dropped = pair.samples.slice(1);
173
+ estimatedTokens = (0, token_budget_1.estimateTokens)(truncated);
174
+ fieldsTruncated = true;
175
+ }
176
+ return {
177
+ fitted,
178
+ dropped,
179
+ baseline: baseline.inputTokens,
180
+ fieldsTruncated,
181
+ sampleBudget,
182
+ estimatedSampleTokens: estimatedTokens,
183
+ };
184
+ }
185
+ async function runRegressionBatch(group, args, deps) {
186
+ const fitResult = fitSamplesForRegression(group, args);
187
+ const trimmedGroup = { ...group, samples: fitResult.fitted };
188
+ return runAnalysisBatch({
189
+ source: 'regression',
190
+ bucketKey: `predicted=${group.predicted}`,
191
+ fitResult,
192
+ originalSampleCount: group.samples.length,
193
+ affectedCountFallback: group.count,
194
+ buildMessages: () => (0, prompts_1.buildAnalyzeRegressionMessages)({
195
+ group: trimmedGroup,
196
+ currentVersion: args.currentVersion,
197
+ previousVersion: args.previousVersion,
198
+ metrics: args.metrics,
199
+ goals: args.goals,
200
+ fieldWhitelist: args.fieldWhitelist,
201
+ roundHistory: args.roundHistory,
202
+ promptLanguage: args.promptLanguage,
203
+ }),
204
+ parseOutput: parse_1.parseRegressionAnalysisOutput,
205
+ stepName: 'error_pattern_analyze_regression',
206
+ requestKey: group.predicted,
207
+ }, args, deps);
208
+ }
209
+ function fitSamplesForRegression(group, args) {
210
+ const probeGroup = { ...group, samples: [] };
211
+ const probe = (0, prompts_1.buildAnalyzeRegressionMessages)({
212
+ group: probeGroup,
213
+ currentVersion: args.currentVersion,
214
+ previousVersion: args.previousVersion,
215
+ metrics: args.metrics,
216
+ goals: args.goals,
217
+ fieldWhitelist: args.fieldWhitelist,
218
+ roundHistory: args.roundHistory,
219
+ promptLanguage: args.promptLanguage,
220
+ });
221
+ const baseline = (0, token_budget_1.estimateMessagesTokens)(probe.system, probe.user, args.strategyConfig.maxAnalysisOutputTokens);
222
+ const sampleBudget = (0, token_budget_1.computeSampleBudget)(args.strategyConfig.maxInputTokensPerBatch, baseline.inputTokens);
223
+ let { fitted, dropped, estimatedTokens } = (0, token_budget_1.fitSamplesToBudget)(group.samples, sampleBudget, 0);
224
+ let fieldsTruncated = false;
225
+ if (fitted.length === 0 && group.samples.length > 0) {
226
+ const head = group.samples[0];
227
+ const truncated = (0, token_budget_1.truncateStringFields)(head, PER_FIELD_TRUNCATE_CHARS);
228
+ fitted = [truncated];
229
+ dropped = group.samples.slice(1);
230
+ estimatedTokens = (0, token_budget_1.estimateTokens)(truncated);
231
+ fieldsTruncated = true;
232
+ }
233
+ return {
234
+ fitted,
235
+ dropped,
236
+ baseline: baseline.inputTokens,
237
+ fieldsTruncated,
238
+ sampleBudget,
239
+ estimatedSampleTokens: estimatedTokens,
240
+ };
241
+ }
242
+ function buildAnalysisEvidenceBundle(summary, batches, stats) {
243
+ return {
244
+ evidenceBundleVersion: 1,
245
+ summary: summary.summary,
246
+ errorPatterns: summary.errorPatterns,
247
+ suggestedChanges: summary.suggestedChanges,
248
+ conflicts: summary.conflicts ?? [],
249
+ sourceStats: {
250
+ batchCount: batches.length,
251
+ totalConfusionFailures: stats.totalConfusionFailures,
252
+ totalRegressionSamples: stats.totalRegressionSamples,
253
+ truncated: stats.truncated || summary.truncated,
254
+ },
255
+ };
256
+ }
257
+ // Summarize field-truncation character cap (controls long fields like reason / change / rationale within a batch)
258
+ const SUMMARIZE_FIELD_TRUNCATE_CHARS = 600;
259
+ async function runSummarize(args, batches, deps, stats) {
260
+ if (batches.length === 0) {
261
+ const summary = {
262
+ summary: '本轮没有失败样本可供分析(confusion + regression 均为空)。建议直接继续下一轮,或检查实验是否真的执行了。',
263
+ errorPatterns: [],
264
+ suggestedChanges: [],
265
+ conflicts: [],
266
+ evidenceBundleVersion: 1,
267
+ truncated: false,
268
+ rawContent: '',
269
+ };
270
+ deps.logger.info({
271
+ optimizationId: args.optimizationId,
272
+ roundNumber: args.roundNumber,
273
+ reason: 'no_batches',
274
+ confusionPairsCount: 0,
275
+ regressionGroupsCount: 0,
276
+ currentFailureCount: args.currentRunResults.filter((r) => r.isCorrect === false).length,
277
+ currentRunResultsCount: args.currentRunResults.length,
278
+ previousRunResultsCount: args.previousRunResults?.length ?? 0,
279
+ hasPreviousRound: args.previousRunResults != null,
280
+ samplesWithExpectedCount: args.samples.filter((s) => s.expected != null).length,
281
+ }, 'analyze_skipped');
282
+ return {
283
+ summary,
284
+ budget: undefined,
285
+ };
286
+ }
287
+ // Probe: estimate the baseline with empty batches (including the fitted cross-round history)
288
+ const probe = (0, prompts_1.buildSummarizeMessages)({
289
+ goals: args.goals,
290
+ metrics: args.metrics,
291
+ collectedBatches: [],
292
+ roundHistory: args.roundHistory,
293
+ promptLanguage: args.promptLanguage,
294
+ });
295
+ const baseline = (0, token_budget_1.estimateMessagesTokens)(probe.system, probe.user, args.strategyConfig.maxSummarizeOutputTokens);
296
+ const budget = (0, token_budget_1.computeSampleBudget)(args.strategyConfig.maxInputTokensPerBatch, baseline.inputTokens);
297
+ // Compute the token footprint of the original batches
298
+ const rawCollected = batches.map((b) => ({
299
+ source: b.source,
300
+ title: b.title,
301
+ payload: { errorPatterns: b.errorPatterns, suggestedChanges: b.suggestedChanges },
302
+ }));
303
+ let collected = rawCollected;
304
+ let fieldTruncationApplied = false;
305
+ let droppedBatchCount = 0;
306
+ let currentTokens = (0, token_budget_1.estimateTokens)(collected);
307
+ // Phase 1: over budget → field truncation
308
+ if (currentTokens > budget) {
309
+ collected = rawCollected.map((b) => (0, token_budget_1.truncateAllStringFieldsInObject)(b, SUMMARIZE_FIELD_TRUNCATE_CHARS));
310
+ fieldTruncationApplied = true;
311
+ currentTokens = (0, token_budget_1.estimateTokens)(collected);
312
+ }
313
+ // Phase 2: still over budget → drop batches (confusion is preferred = placed first)
314
+ if (currentTokens > budget) {
315
+ const sortedConfusionFirst = [...collected].sort((a, b) => a.source === 'confusion' && b.source !== 'confusion'
316
+ ? -1
317
+ : a.source !== 'confusion' && b.source === 'confusion'
318
+ ? 1
319
+ : 0);
320
+ const kept = [];
321
+ let used = 0;
322
+ for (const b of sortedConfusionFirst) {
323
+ const t = (0, token_budget_1.estimateTokens)(b);
324
+ if (used + t > budget && kept.length >= 1) {
325
+ droppedBatchCount++;
326
+ continue;
327
+ }
328
+ kept.push(b);
329
+ used += t;
330
+ }
331
+ collected = kept;
332
+ currentTokens = used;
333
+ }
334
+ const { system, user } = (0, prompts_1.buildSummarizeMessages)({
335
+ goals: args.goals,
336
+ metrics: args.metrics,
337
+ collectedBatches: collected,
338
+ roundHistory: args.roundHistory,
339
+ promptLanguage: args.promptLanguage,
340
+ });
341
+ const messages = [
342
+ { role: 'system', content: system },
343
+ { role: 'user', content: user },
344
+ ];
345
+ const result = await (0, llm_client_1.invokeLLM)({
346
+ model: args.analysisModel,
347
+ limiterKey: args.analysisLimiterKey,
348
+ messages,
349
+ params: {
350
+ temperature: args.strategyConfig.temperature,
351
+ maxTokens: args.strategyConfig.maxSummarizeOutputTokens,
352
+ },
353
+ context: {
354
+ source: 'optimization_analysis',
355
+ stepName: 'error_pattern_summarize',
356
+ requestId: `optimization:${args.optimizationId}:r${args.roundNumber}:summarize`,
357
+ promptVersionId: args.currentVersion.id,
358
+ },
359
+ runResult: buildRunResultForCall({
360
+ meta: args.runResultMeta,
361
+ runResultId: args.analysisRunResultId,
362
+ source: 'optimization_analysis',
363
+ roundIndex: args.roundNumber,
364
+ messages,
365
+ inputVariables: {
366
+ optimizationId: args.optimizationId,
367
+ roundNumber: args.roundNumber,
368
+ stepName: 'error_pattern_summarize',
369
+ promptLanguage: args.promptLanguage ?? shared_1.DEFAULT_PROMPT_LANGUAGE,
370
+ },
371
+ }),
372
+ // run_results.parsed_output feeds the detail-page errorPatterns / suggestedChanges (SPEC 25 §11.3);
373
+ // finishReason is filled when externally re-parsed; here null ensures the key fields are at least available.
374
+ parseResponse: (content) => {
375
+ try {
376
+ const parsed = (0, parse_1.parseSummarizeOutput)(content, null);
377
+ return {
378
+ ...parsed,
379
+ evidenceBundle: buildAnalysisEvidenceBundle(parsed, batches, stats),
380
+ };
381
+ }
382
+ catch {
383
+ return null;
384
+ }
385
+ },
386
+ }, deps);
387
+ return {
388
+ summary: (0, parse_1.parseSummarizeOutput)(result.content, result.finishReason),
389
+ budget: {
390
+ baselineInputTokens: baseline.inputTokens,
391
+ estimatedBatchesTokens: currentTokens,
392
+ fieldTruncationApplied,
393
+ droppedBatchCount,
394
+ },
395
+ };
396
+ }
397
+ // Construct the RunResultContext when analyze / generate call invokeLLM.
398
+ // Only when the caller provides both meta and runResultId, return the context; otherwise return undefined
399
+ // (invokeLLM internally guards with `runResult && runResultWriter` to decide whether to write the table).
400
+ // roundIndex is required: the detail page's listOptimizationLlmRunResults filters by isNotNull(round_index);
401
+ // a missing value drops the whole row (causing errorPatterns / suggestedChanges / promptDiff to disappear entirely).
402
+ function buildRunResultForCall(input) {
403
+ if (!input.meta || !input.runResultId)
404
+ return undefined;
405
+ return {
406
+ id: input.runResultId,
407
+ projectId: input.meta.projectId,
408
+ source: input.source,
409
+ sourceId: input.meta.sourceId,
410
+ promptVersionId: input.meta.promptVersionId,
411
+ modelId: input.meta.modelId,
412
+ sampleId: null,
413
+ externalId: null,
414
+ renderedPrompt: { messages: input.messages },
415
+ inputVariables: input.inputVariables,
416
+ expectedOutput: null,
417
+ dbosWorkflowId: input.meta.dbosWorkflowId ?? null,
418
+ bullmqJobId: input.meta.bullmqJobId ?? null,
419
+ attempt: input.meta.attempt ?? 0,
420
+ roundIndex: input.roundIndex,
421
+ };
422
+ }
423
+ //# sourceMappingURL=analyze.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"analyze.js","sourceRoot":"","sources":["../../src/error-pattern-analysis/analyze.ts"],"names":[],"mappings":";;AAmIA,0CAyEC;AAmbD,sDA0BC;AAzpBD,mGAAmG;AACnG,yIAAyI;AACzI,+DAA+D;AAC/D,uDAMgC;AAWhC,uDAM2B;AAC3B,uCAKmB;AACnB,mCASiB;AACjB,iDAOwB;AACxB,+CAAqF;AAgF9E,KAAK,UAAU,eAAe,CACnC,IAAyB,EACzB,IAA2B;IAE3B,MAAM,cAAc,GAAG,IAAI,CAAC,cAAc,IAAI,gCAAuB,CAAC;IACtE,0HAA0H;IAC1H,iGAAiG;IACjG,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,sBAAsB,GAAG,GAAG,CAAC,CAAC;IAChF,MAAM,mBAAmB,GAAG,IAAA,iCAAuB,EAAC,IAAI,CAAC,YAAY,EAAE,UAAU,EAAE,IAAI,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;IAC/G,MAAM,qBAAqB,GAAwB;QACjD,GAAG,IAAI;QACP,cAAc;QACd,YAAY,EAAE,mBAAmB,CAAC,MAAM;KACzC,CAAC;IAEF,MAAM,cAAc,GAAG,IAAA,qCAAmB,EAAC;QACzC,UAAU,EAAE,IAAI,CAAC,iBAAiB;QAClC,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,SAAS,EAAE,IAAI,CAAC,cAAc;QAC9B,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,iBAAiB;QAC3C,iBAAiB,EAAE,IAAI,CAAC,cAAc,CAAC,0BAA0B;KAClE,CAAC,CAAC;IAEH,MAAM,gBAAgB,GAAG,IAAA,uCAAqB,EAAC;QAC7C,iBAAiB,EAAE,IAAI,CAAC,iBAAiB;QACzC,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;QAC3C,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,SAAS,EAAE,IAAI,CAAC,cAAc;QAC9B,UAAU,EAAE,IAAI,CAAC,cAAc,CAAC,oBAAoB;KACrD,CAAC,CAAC;IAEH,MAAM,sBAAsB,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACnF,MAAM,sBAAsB,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAErF,MAAM,OAAO,GAAyB,EAAE,CAAC;IACzC,IAAI,YAAY,GAAG,KAAK,CAAC;IAEzB,KAAK,MAAM,IAAI,IAAI,cAAc,EAAE,CAAC;QAClC,MAAM,KAAK,GAAG,MAAM,iBAAiB,CAAC,IAAI,EAAE,qBAAqB,EAAE,IAAI,CAAC,CAAC;QACzE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,IAAI,KAAK,CAAC,YAAY;YAAE,YAAY,GAAG,IAAI,CAAC;IAC9C,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,gBAAgB,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,kBAAkB,CAAC,KAAK,EAAE,qBAAqB,EAAE,IAAI,CAAC,CAAC;QAC3E,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,IAAI,KAAK,CAAC,YAAY;YAAE,YAAY,GAAG,IAAI,CAAC;IAC9C,CAAC;IAED,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,YAAY,CAAC,qBAAqB,EAAE,OAAO,EAAE,IAAI,EAAE;QACpG,sBAAsB;QACtB,sBAAsB;QACtB,SAAS,EAAE,YAAY;KACxB,CAAC,CAAC;IACH,IAAI,OAAO,CAAC,SAAS;QAAE,YAAY,GAAG,IAAI,CAAC;IAC3C,MAAM,cAAc,GAAG,2BAA2B,CAAC,OAAO,EAAE,OAAO,EAAE;QACnE,sBAAsB;QACtB,sBAAsB;QACtB,SAAS,EAAE,YAAY;KACxB,CAAC,CAAC;IAEH,OAAO;QACL,iBAAiB,EAAE,OAAO,CAAC,OAAO;QAClC,OAAO;QACP,cAAc;QACd,OAAO;QACP,cAAc;QACd,gBAAgB;QAChB,SAAS,EAAE,YAAY;QACvB,sBAAsB;QACtB,sBAAsB;QACtB,eAAe;KAChB,CAAC;AACJ,CAAC;AAED,8HAA8H;AAC9H,MAAM,wBAAwB,GAAG,KAAK,CAAC;AAmCvC,KAAK,UAAU,gBAAgB,CAC7B,IAAuB,EACvB,IAAyB,EACzB,IAA2B;IAE3B,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,IAAA,sBAAS,EAC5B;QACE,KAAK,EAAE,IAAI,CAAC,aAAa;QACzB,UAAU,EAAE,IAAI,CAAC,kBAAkB;QACnC,QAAQ,EAAE;YACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE;YACnC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE;SAChC;QACD,MAAM,EAAE;YACN,WAAW,EAAE,IAAI,CAAC,cAAc,CAAC,WAAW;YAC5C,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,uBAAuB;SACvD;QACD,OAAO,EAAE;YACP,MAAM,EAAE,uBAAuB;YAC/B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,SAAS,EAAE,gBAAgB,IAAI,CAAC,cAAc,KAAK,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU,EAAE;YACvG,eAAe,EAAE,IAAI,CAAC,cAAc,CAAC,EAAE;YACvC,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,gCAAuB;SAC/D;KACF,EACD,IAAI,CACL,CAAC;IACF,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,YAAY,CAAC,CAAC;IACrE,MAAM,EAAE,aAAa,EAAE,gBAAgB,EAAE,GAAG,IAAA,+BAAuB,EACjE,EAAE,aAAa,EAAE,MAAM,CAAC,aAAa,EAAE,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,EAAE,EAClF;QACE,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,qBAAqB,EAAE,IAAI,CAAC,qBAAqB;KAClD,CACF,CAAC;IACF,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,KAAK,EAAE,GAAG,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,SAAS,EAAE;QAC1C,YAAY,EAAE,MAAM,CAAC,SAAS;QAC9B,aAAa;QACb,gBAAgB;QAChB,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,MAAM,EAAE;YACN,mBAAmB,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ;YAC5C,kBAAkB,EAAE,IAAI,CAAC,SAAS,CAAC,YAAY;YAC/C,qBAAqB,EAAE,IAAI,CAAC,SAAS,CAAC,qBAAqB;YAC3D,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,iBAAiB,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,MAAM;YAC/C,kBAAkB,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,MAAM;YACjD,eAAe,EAAE,IAAI,CAAC,SAAS,CAAC,eAAe;SAChD;KACF,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,IAAmB,EACnB,IAAyB,EACzB,IAA2B;IAE3B,MAAM,SAAS,GAAG,sBAAsB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACrD,MAAM,WAAW,GAAkB,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,SAAS,CAAC,MAAM,EAAE,CAAC;IAC1E,OAAO,gBAAgB,CACrB;QACE,MAAM,EAAE,WAAW;QACnB,SAAS,EAAE,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,SAAS,EAAE;QAC/C,SAAS;QACT,mBAAmB,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM;QACxC,qBAAqB,EAAE,IAAI,CAAC,KAAK;QACjC,aAAa,EAAE,GAAG,EAAE,CAClB,IAAA,uCAA6B,EAAC;YAC5B,IAAI,EAAE,WAAW;YACjB,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;SACpC,CAAC;QACJ,WAAW,EAAE,oCAA4B;QACzC,QAAQ,EAAE,iCAAiC;QAC3C,UAAU,EAAE,GAAG,IAAI,CAAC,QAAQ,OAAO,IAAI,CAAC,SAAS,EAAE;KACpD,EACD,IAAI,EACJ,IAAI,CACL,CAAC;AACJ,CAAC;AAED,SAAS,sBAAsB,CAAC,IAAmB,EAAE,IAAyB;IAC5E,8HAA8H;IAC9H,MAAM,SAAS,GAAkB,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAC1D,MAAM,KAAK,GAAG,IAAA,uCAA6B,EAAC;QAC1C,IAAI,EAAE,SAAS;QACf,cAAc,EAAE,IAAI,CAAC,cAAc;QACnC,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,cAAc,EAAE,IAAI,CAAC,cAAc;QACnC,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;KACpC,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,IAAA,qCAAsB,EAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,uBAAuB,CAAC,CAAC;IAC/G,MAAM,YAAY,GAAG,IAAA,kCAAmB,EAAC,IAAI,CAAC,cAAc,CAAC,sBAAsB,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE3G,SAAS;IACT,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,IAAA,iCAAkB,EAAC,IAAI,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC;IAE7F,gGAAgG;IAChG,IAAI,eAAe,GAAG,KAAK,CAAC;IAC5B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,IAAA,mCAAoB,EAAC,IAAI,EAAE,wBAAwB,CAAC,CAAC;QACvE,MAAM,GAAG,CAAC,SAAS,CAAC,CAAC;QACrB,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAChC,eAAe,GAAG,IAAA,6BAAc,EAAC,SAAS,CAAC,CAAC;QAC5C,eAAe,GAAG,IAAI,CAAC;IACzB,CAAC;IAED,OAAO;QACL,MAAM;QACN,OAAO;QACP,QAAQ,EAAE,QAAQ,CAAC,WAAW;QAC9B,eAAe;QACf,YAAY;QACZ,qBAAqB,EAAE,eAAe;KACvC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAC/B,KAAsB,EACtB,IAAyB,EACzB,IAA2B;IAE3B,MAAM,SAAS,GAAG,uBAAuB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACvD,MAAM,YAAY,GAAoB,EAAE,GAAG,KAAK,EAAE,OAAO,EAAE,SAAS,CAAC,MAAM,EAAE,CAAC;IAC9E,OAAO,gBAAgB,CACrB;QACE,MAAM,EAAE,YAAY;QACpB,SAAS,EAAE,aAAa,KAAK,CAAC,SAAS,EAAE;QACzC,SAAS;QACT,mBAAmB,EAAE,KAAK,CAAC,OAAO,CAAC,MAAM;QACzC,qBAAqB,EAAE,KAAK,CAAC,KAAK;QAClC,aAAa,EAAE,GAAG,EAAE,CAClB,IAAA,wCAA8B,EAAC;YAC7B,KAAK,EAAE,YAAY;YACnB,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;SACpC,CAAC;QACJ,WAAW,EAAE,qCAA6B;QAC1C,QAAQ,EAAE,kCAAkC;QAC5C,UAAU,EAAE,KAAK,CAAC,SAAS;KAC5B,EACD,IAAI,EACJ,IAAI,CACL,CAAC;AACJ,CAAC;AAED,SAAS,uBAAuB,CAAC,KAAsB,EAAE,IAAyB;IAChF,MAAM,UAAU,GAAoB,EAAE,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAC9D,MAAM,KAAK,GAAG,IAAA,wCAA8B,EAAC;QAC3C,KAAK,EAAE,UAAU;QACjB,cAAc,EAAE,IAAI,CAAC,cAAc;QACnC,eAAe,EAAE,IAAI,CAAC,eAAe;QACrC,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,cAAc,EAAE,IAAI,CAAC,cAAc;QACnC,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;KACpC,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,IAAA,qCAAsB,EAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,uBAAuB,CAAC,CAAC;IAC/G,MAAM,YAAY,GAAG,IAAA,kCAAmB,EAAC,IAAI,CAAC,cAAc,CAAC,sBAAsB,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE3G,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,IAAA,iCAAkB,EAAC,KAAK,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC;IAE9F,IAAI,eAAe,GAAG,KAAK,CAAC;IAC5B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAA,mCAAoB,EAAC,IAAI,EAAE,wBAAwB,CAAC,CAAC;QACvE,MAAM,GAAG,CAAC,SAAS,CAAC,CAAC;QACrB,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACjC,eAAe,GAAG,IAAA,6BAAc,EAAC,SAAS,CAAC,CAAC;QAC5C,eAAe,GAAG,IAAI,CAAC;IACzB,CAAC;IAED,OAAO;QACL,MAAM;QACN,OAAO;QACP,QAAQ,EAAE,QAAQ,CAAC,WAAW;QAC9B,eAAe;QACf,YAAY;QACZ,qBAAqB,EAAE,eAAe;KACvC,CAAC;AACJ,CAAC;AAED,SAAS,2BAA2B,CAClC,OAAwB,EACxB,OAA6B,EAC7B,KAIC;IAED,OAAO;QACL,qBAAqB,EAAE,CAAC;QACxB,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;QAC1C,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,EAAE;QAClC,WAAW,EAAE;YACX,UAAU,EAAE,OAAO,CAAC,MAAM;YAC1B,sBAAsB,EAAE,KAAK,CAAC,sBAAsB;YACpD,sBAAsB,EAAE,KAAK,CAAC,sBAAsB;YACpD,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS;SAChD;KACF,CAAC;AACJ,CAAC;AAED,kHAAkH;AAClH,MAAM,8BAA8B,GAAG,GAAG,CAAC;AAE3C,KAAK,UAAU,YAAY,CACzB,IAAyB,EACzB,OAA6B,EAC7B,IAA2B,EAC3B,KAIC;IAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,OAAO,GAAoB;YAC/B,OAAO,EACL,kEAAkE;YACpE,aAAa,EAAE,EAAE;YACjB,gBAAgB,EAAE,EAAE;YACpB,SAAS,EAAE,EAAE;YACb,qBAAqB,EAAE,CAAC;YACxB,SAAS,EAAE,KAAK;YAChB,UAAU,EAAE,EAAE;SACf,CAAC;QACF,IAAI,CAAC,MAAM,CAAC,IAAI,CACd;YACE,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,MAAM,EAAE,YAAY;YACpB,mBAAmB,EAAE,CAAC;YACtB,qBAAqB,EAAE,CAAC;YACxB,mBAAmB,EAAE,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,KAAK,CAAC,CAAC,MAAM;YACvF,sBAAsB,EAAE,IAAI,CAAC,iBAAiB,CAAC,MAAM;YACrD,uBAAuB,EAAE,IAAI,CAAC,kBAAkB,EAAE,MAAM,IAAI,CAAC;YAC7D,gBAAgB,EAAE,IAAI,CAAC,kBAAkB,IAAI,IAAI;YACjD,wBAAwB,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,MAAM;SAChF,EACD,iBAAiB,CAClB,CAAC;QACF,OAAO;YACL,OAAO;YACP,MAAM,EAAE,SAAS;SAClB,CAAC;IACJ,CAAC;IAED,6FAA6F;IAC7F,MAAM,KAAK,GAAG,IAAA,gCAAsB,EAAC;QACnC,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,gBAAgB,EAAE,EAAE;QACpB,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;KACpC,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,IAAA,qCAAsB,EAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,wBAAwB,CAAC,CAAC;IAChH,MAAM,MAAM,GAAG,IAAA,kCAAmB,EAAC,IAAI,CAAC,cAAc,CAAC,sBAAsB,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;IAErG,sDAAsD;IACtD,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvC,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,OAAO,EAAE,EAAE,aAAa,EAAE,CAAC,CAAC,aAAa,EAAE,gBAAgB,EAAE,CAAC,CAAC,gBAAgB,EAAE;KAClF,CAAC,CAAC,CAAC;IACJ,IAAI,SAAS,GAAG,YAAY,CAAC;IAC7B,IAAI,sBAAsB,GAAG,KAAK,CAAC;IACnC,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAC1B,IAAI,aAAa,GAAG,IAAA,6BAAc,EAAC,SAAS,CAAC,CAAC;IAE9C,0CAA0C;IAC1C,IAAI,aAAa,GAAG,MAAM,EAAE,CAAC;QAC3B,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAA,8CAA+B,EAAC,CAAC,EAAE,8BAA8B,CAAC,CAAC,CAAC;QACxG,sBAAsB,GAAG,IAAI,CAAC;QAC9B,aAAa,GAAG,IAAA,6BAAc,EAAC,SAAS,CAAC,CAAC;IAC5C,CAAC;IAED,oFAAoF;IACpF,IAAI,aAAa,GAAG,MAAM,EAAE,CAAC;QAC3B,MAAM,oBAAoB,GAAG,CAAC,GAAG,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACxD,CAAC,CAAC,MAAM,KAAK,WAAW,IAAI,CAAC,CAAC,MAAM,KAAK,WAAW;YAClD,CAAC,CAAC,CAAC,CAAC;YACJ,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,IAAI,CAAC,CAAC,MAAM,KAAK,WAAW;gBACpD,CAAC,CAAC,CAAC;gBACH,CAAC,CAAC,CAAC,CACR,CAAC;QACF,MAAM,IAAI,GAAqB,EAAE,CAAC;QAClC,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,CAAC,IAAI,oBAAoB,EAAE,CAAC;YACrC,MAAM,CAAC,GAAG,IAAA,6BAAc,EAAC,CAAC,CAAC,CAAC;YAC5B,IAAI,IAAI,GAAG,CAAC,GAAG,MAAM,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC1C,iBAAiB,EAAE,CAAC;gBACpB,SAAS;YACX,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACb,IAAI,IAAI,CAAC,CAAC;QACZ,CAAC;QACD,SAAS,GAAG,IAAI,CAAC;QACjB,aAAa,GAAG,IAAI,CAAC;IACvB,CAAC;IAED,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,IAAA,gCAAsB,EAAC;QAC9C,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,gBAAgB,EAAE,SAAS;QAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;KACpC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAiB;QAC7B,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE;QACnC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE;KAChC,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,IAAA,sBAAS,EAC5B;QACE,KAAK,EAAE,IAAI,CAAC,aAAa;QACzB,UAAU,EAAE,IAAI,CAAC,kBAAkB;QACnC,QAAQ;QACR,MAAM,EAAE;YACN,WAAW,EAAE,IAAI,CAAC,cAAc,CAAC,WAAW;YAC5C,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,wBAAwB;SACxD;QACD,OAAO,EAAE;YACP,MAAM,EAAE,uBAAuB;YAC/B,QAAQ,EAAE,yBAAyB;YACnC,SAAS,EAAE,gBAAgB,IAAI,CAAC,cAAc,KAAK,IAAI,CAAC,WAAW,YAAY;YAC/E,eAAe,EAAE,IAAI,CAAC,cAAc,CAAC,EAAE;SACxC;QACD,SAAS,EAAE,qBAAqB,CAAC;YAC/B,IAAI,EAAE,IAAI,CAAC,aAAa;YACxB,WAAW,EAAE,IAAI,CAAC,mBAAmB;YACrC,MAAM,EAAE,uBAAuB;YAC/B,UAAU,EAAE,IAAI,CAAC,WAAW;YAC5B,QAAQ;YACR,cAAc,EAAE;gBACd,cAAc,EAAE,IAAI,CAAC,cAAc;gBACnC,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,QAAQ,EAAE,yBAAyB;gBACnC,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,gCAAuB;aAC/D;SACF,CAAC;QACF,oGAAoG;QACpG,6GAA6G;QAC7G,aAAa,EAAE,CAAC,OAAO,EAAE,EAAE;YACzB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAA,4BAAoB,EAAC,OAAO,EAAE,IAAI,CAAC,CAAC;gBACnD,OAAO;oBACL,GAAG,MAAM;oBACT,cAAc,EAAE,2BAA2B,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC;iBACpE,CAAC;YACJ,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;KACF,EACD,IAAI,CACL,CAAC;IAEF,OAAO;QACL,OAAO,EAAE,IAAA,4BAAoB,EAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,YAAY,CAAC;QAClE,MAAM,EAAE;YACN,mBAAmB,EAAE,QAAQ,CAAC,WAAW;YACzC,sBAAsB,EAAE,aAAa;YACrC,sBAAsB;YACtB,iBAAiB;SAClB;KACF,CAAC;AACJ,CAAC;AAID,yEAAyE;AACzE,0GAA0G;AAC1G,0GAA0G;AAC1G,6GAA6G;AAC7G,qHAAqH;AACrH,SAAgB,qBAAqB,CAAC,KAOrC;IACC,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW;QAAE,OAAO,SAAS,CAAC;IACxD,OAAO;QACL,EAAE,EAAE,KAAK,CAAC,WAAW;QACrB,SAAS,EAAE,KAAK,CAAC,IAAI,CAAC,SAAS;QAC/B,MAAM,EAAE,KAAK,CAAC,MAAM;QACpB,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,QAAQ;QAC7B,eAAe,EAAE,KAAK,CAAC,IAAI,CAAC,eAAe;QAC3C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO;QAC3B,QAAQ,EAAE,IAAI;QACd,UAAU,EAAE,IAAI;QAChB,cAAc,EAAE,EAAE,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE;QAC5C,cAAc,EAAE,KAAK,CAAC,cAAc;QACpC,cAAc,EAAE,IAAI;QACpB,cAAc,EAAE,KAAK,CAAC,IAAI,CAAC,cAAc,IAAI,IAAI;QACjD,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI;QAC3C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC;QAChC,UAAU,EAAE,KAAK,CAAC,UAAU;KAC7B,CAAC;AACJ,CAAC"}
@@ -0,0 +1,16 @@
1
+ import { z } from 'zod';
2
+ export declare const errorPatternAnalysisConfigSchema: z.ZodObject<{
3
+ maxInputTokensPerBatch: z.ZodDefault<z.ZodNumber>;
4
+ maxAnalysisOutputTokens: z.ZodDefault<z.ZodNumber>;
5
+ maxSummarizeOutputTokens: z.ZodDefault<z.ZodNumber>;
6
+ maxGenerationOutputTokens: z.ZodDefault<z.ZodNumber>;
7
+ temperature: z.ZodDefault<z.ZodNumber>;
8
+ topConfusionPairs: z.ZodDefault<z.ZodNumber>;
9
+ maxSamplesPerConfusionPair: z.ZodDefault<z.ZodNumber>;
10
+ maxRegressionSamples: z.ZodDefault<z.ZodNumber>;
11
+ initialSamplingRounds: z.ZodDefault<z.ZodNumber>;
12
+ initialSamplesPerRound: z.ZodDefault<z.ZodNumber>;
13
+ }, z.core.$strip>;
14
+ export type ErrorPatternAnalysisConfig = z.infer<typeof errorPatternAnalysisConfigSchema>;
15
+ export declare const DEFAULT_ERROR_PATTERN_ANALYSIS_CONFIG: ErrorPatternAnalysisConfig;
16
+ //# sourceMappingURL=config.schema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.schema.d.ts","sourceRoot":"","sources":["../../src/error-pattern-analysis/config.schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,gCAAgC;;;;;;;;;;;iBAmB3C,CAAC;AAEH,MAAM,MAAM,0BAA0B,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,gCAAgC,CAAC,CAAC;AAE1F,eAAO,MAAM,qCAAqC,EAAE,0BACR,CAAC"}
@@ -0,0 +1,26 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DEFAULT_ERROR_PATTERN_ANALYSIS_CONFIG = exports.errorPatternAnalysisConfigSchema = void 0;
4
+ const zod_1 = require("zod");
5
+ exports.errorPatternAnalysisConfigSchema = zod_1.z.object({
6
+ // Maximum estimated input tokens a single batch can absorb — over this triggers batch splitting
7
+ maxInputTokensPerBatch: zod_1.z.number().int().positive().default(60_000),
8
+ // Output token caps at each analyze stage
9
+ maxAnalysisOutputTokens: zod_1.z.number().int().positive().default(4096),
10
+ maxSummarizeOutputTokens: zod_1.z.number().int().positive().default(4096),
11
+ maxGenerationOutputTokens: zod_1.z.number().int().positive().default(8192),
12
+ temperature: zod_1.z.number().min(0).max(2).default(0.3),
13
+ // Take TOP N confusion pairs
14
+ topConfusionPairs: zod_1.z.number().int().positive().default(5),
15
+ // Maximum number of samples per confusion pair sent to the LLM
16
+ maxSamplesPerConfusionPair: zod_1.z.number().int().positive().default(8),
17
+ // Maximum number of regression samples sent to the LLM (truncated by latest if more)
18
+ maxRegressionSamples: zod_1.z.number().int().positive().default(20),
19
+ // from_dataset_only start: how many samples the analysis LLM uses to generate the initial prompt
20
+ // — initialSamplingRounds: how many independent sampling rounds are combined into observations
21
+ initialSamplingRounds: zod_1.z.number().int().min(1).max(10).default(1),
22
+ // — how many records to randomly sample from the dataset per round
23
+ initialSamplesPerRound: zod_1.z.number().int().min(1).max(200).default(20),
24
+ });
25
+ exports.DEFAULT_ERROR_PATTERN_ANALYSIS_CONFIG = exports.errorPatternAnalysisConfigSchema.parse({});
26
+ //# sourceMappingURL=config.schema.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.schema.js","sourceRoot":"","sources":["../../src/error-pattern-analysis/config.schema.ts"],"names":[],"mappings":";;;AAAA,6BAAwB;AAEX,QAAA,gCAAgC,GAAG,OAAC,CAAC,MAAM,CAAC;IACvD,gGAAgG;IAChG,sBAAsB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC;IACnE,0CAA0C;IAC1C,uBAAuB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;IAClE,wBAAwB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;IACnE,yBAAyB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;IACpE,WAAW,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;IAClD,6BAA6B;IAC7B,iBAAiB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACzD,+DAA+D;IAC/D,0BAA0B,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IAClE,qFAAqF;IACrF,oBAAoB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;IAC7D,iGAAiG;IACjG,+FAA+F;IAC/F,qBAAqB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IACjE,mEAAmE;IACnE,sBAAsB,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;CACrE,CAAC,CAAC;AAIU,QAAA,qCAAqC,GAChD,wCAAgC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC"}
@@ -0,0 +1,37 @@
1
+ import type { FieldWhitelist, RunResultRecord, SampleRecord } from '../loop/types';
2
+ export interface SampleView {
3
+ sampleId: string;
4
+ inputForAnalysis: Record<string, unknown>;
5
+ expected: string | null;
6
+ predicted: string | null;
7
+ errorMessage?: string | null;
8
+ }
9
+ export interface ConfusionPair {
10
+ expected: string;
11
+ predicted: string;
12
+ count: number;
13
+ sampleIds: string[];
14
+ samples: SampleView[];
15
+ }
16
+ export interface RegressionGroup {
17
+ predicted: string;
18
+ count: number;
19
+ samples: SampleView[];
20
+ }
21
+ export interface BuildConfusionPairsArgs {
22
+ runResults: RunResultRecord[];
23
+ samples: SampleRecord[];
24
+ whitelist: FieldWhitelist;
25
+ topN: number;
26
+ maxSamplesPerPair: number;
27
+ }
28
+ export declare function buildConfusionPairs(args: BuildConfusionPairsArgs): ConfusionPair[];
29
+ export interface BuildRegressionGroupsArgs {
30
+ currentRunResults: RunResultRecord[];
31
+ previousRunResults: RunResultRecord[] | null;
32
+ samples: SampleRecord[];
33
+ whitelist: FieldWhitelist;
34
+ maxSamples: number;
35
+ }
36
+ export declare function buildRegressionGroups(args: BuildRegressionGroupsArgs): RegressionGroup[];
37
+ //# sourceMappingURL=confusion-pairs.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"confusion-pairs.d.ts","sourceRoot":"","sources":["../../src/error-pattern-analysis/confusion-pairs.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAEnF,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IAEjB,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC9B;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,EAAE,CAAC;IAEpB,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAE9B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB;AA+CD,MAAM,WAAW,uBAAuB;IACtC,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,SAAS,EAAE,cAAc,CAAC;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,uBAAuB,GAAG,aAAa,EAAE,CA2BlF;AAED,MAAM,WAAW,yBAAyB;IACxC,iBAAiB,EAAE,eAAe,EAAE,CAAC;IACrC,kBAAkB,EAAE,eAAe,EAAE,GAAG,IAAI,CAAC;IAC7C,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,SAAS,EAAE,cAAc,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,yBAAyB,GAAG,eAAe,EAAE,CA+BxF"}
@@ -0,0 +1,109 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildConfusionPairs = buildConfusionPairs;
4
+ exports.buildRegressionGroups = buildRegressionGroups;
5
+ function projectInput(sample, whitelist) {
6
+ const allowed = new Set([
7
+ ...whitelist.promptVariables,
8
+ ...(whitelist.analysisOnlyFields ?? []),
9
+ ]);
10
+ // No fields configured → return as-is (defensive fallback)
11
+ if (allowed.size === 0)
12
+ return sample.input;
13
+ const projected = {};
14
+ for (const field of allowed) {
15
+ if (field in sample.input)
16
+ projected[field] = sample.input[field];
17
+ }
18
+ return projected;
19
+ }
20
+ function asLabel(value) {
21
+ if (value == null)
22
+ return null;
23
+ if (typeof value === 'string')
24
+ return value;
25
+ if (typeof value === 'number' || typeof value === 'boolean')
26
+ return String(value);
27
+ return null;
28
+ }
29
+ function buildSampleView(sample, rr, whitelist) {
30
+ return {
31
+ sampleId: sample.id,
32
+ inputForAnalysis: projectInput(sample, whitelist),
33
+ expected: asLabel(sample.expected),
34
+ predicted: rr.decisionOutput ?? asLabel(rr.parsedOutput),
35
+ errorMessage: rr.errorMessage ?? null,
36
+ };
37
+ }
38
+ function indexById(items) {
39
+ return new Map(items.map((item) => [item.id, item]));
40
+ }
41
+ function indexResultsBySampleId(runResults) {
42
+ const m = new Map();
43
+ for (const rr of runResults)
44
+ m.set(rr.sampleId, rr);
45
+ return m;
46
+ }
47
+ function buildConfusionPairs(args) {
48
+ const sampleById = indexById(args.samples);
49
+ const pairs = new Map();
50
+ for (const rr of args.runResults) {
51
+ const sample = sampleById.get(rr.sampleId);
52
+ if (!sample)
53
+ continue;
54
+ // Only consider judgable errors (has expected + has predicted + isCorrect===false)
55
+ const expected = asLabel(sample.expected);
56
+ const predicted = rr.decisionOutput ?? asLabel(rr.parsedOutput);
57
+ if (rr.isCorrect !== false)
58
+ continue;
59
+ if (expected == null || predicted == null)
60
+ continue;
61
+ const key = `${expected}→${predicted}`;
62
+ let pair = pairs.get(key);
63
+ if (!pair) {
64
+ pair = { expected, predicted, count: 0, sampleIds: [], samples: [] };
65
+ pairs.set(key, pair);
66
+ }
67
+ pair.count++;
68
+ pair.sampleIds.push(sample.id);
69
+ if (pair.samples.length < args.maxSamplesPerPair) {
70
+ pair.samples.push(buildSampleView(sample, rr, args.whitelist));
71
+ }
72
+ }
73
+ return [...pairs.values()].sort((a, b) => b.count - a.count).slice(0, args.topN);
74
+ }
75
+ function buildRegressionGroups(args) {
76
+ if (!args.previousRunResults || args.previousRunResults.length === 0)
77
+ return [];
78
+ const sampleById = indexById(args.samples);
79
+ const prevBySample = indexResultsBySampleId(args.previousRunResults);
80
+ const regressionViews = [];
81
+ for (const curr of args.currentRunResults) {
82
+ if (curr.isCorrect !== false)
83
+ continue;
84
+ const prev = prevBySample.get(curr.sampleId);
85
+ if (!prev || prev.isCorrect !== true)
86
+ continue;
87
+ const sample = sampleById.get(curr.sampleId);
88
+ if (!sample)
89
+ continue;
90
+ regressionViews.push(buildSampleView(sample, curr, args.whitelist));
91
+ if (regressionViews.length >= args.maxSamples)
92
+ break;
93
+ }
94
+ if (regressionViews.length === 0)
95
+ return [];
96
+ const groups = new Map();
97
+ for (const view of regressionViews) {
98
+ const key = view.predicted ?? '__unknown__';
99
+ let g = groups.get(key);
100
+ if (!g) {
101
+ g = { predicted: key, count: 0, samples: [] };
102
+ groups.set(key, g);
103
+ }
104
+ g.count++;
105
+ g.samples.push(view);
106
+ }
107
+ return [...groups.values()].sort((a, b) => b.count - a.count);
108
+ }
109
+ //# sourceMappingURL=confusion-pairs.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"confusion-pairs.js","sourceRoot":"","sources":["../../src/error-pattern-analysis/confusion-pairs.ts"],"names":[],"mappings":";;AAiFA,kDA2BC;AAUD,sDA+BC;AAzHD,SAAS,YAAY,CAAC,MAAoB,EAAE,SAAyB;IACnE,MAAM,OAAO,GAAG,IAAI,GAAG,CAAS;QAC9B,GAAG,SAAS,CAAC,eAAe;QAC5B,GAAG,CAAC,SAAS,CAAC,kBAAkB,IAAI,EAAE,CAAC;KACxC,CAAC,CAAC;IACH,2DAA2D;IAC3D,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC;IAC5C,MAAM,SAAS,GAA4B,EAAE,CAAC;IAC9C,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,KAAK,IAAI,MAAM,CAAC,KAAK;YAAE,SAAS,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,OAAO,CAAC,KAAc;IAC7B,IAAI,KAAK,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IAC/B,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,SAAS;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IAClF,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,eAAe,CACtB,MAAoB,EACpB,EAAmB,EACnB,SAAyB;IAEzB,OAAO;QACL,QAAQ,EAAE,MAAM,CAAC,EAAE;QACnB,gBAAgB,EAAE,YAAY,CAAC,MAAM,EAAE,SAAS,CAAC;QACjD,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC;QAClC,SAAS,EAAE,EAAE,CAAC,cAAc,IAAI,OAAO,CAAC,EAAE,CAAC,YAAY,CAAC;QACxD,YAAY,EAAE,EAAE,CAAC,YAAY,IAAI,IAAI;KACtC,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAA2B,KAAU;IACrD,OAAO,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC;AACvD,CAAC;AAED,SAAS,sBAAsB,CAAC,UAA6B;IAC3D,MAAM,CAAC,GAAG,IAAI,GAAG,EAA2B,CAAC;IAC7C,KAAK,MAAM,EAAE,IAAI,UAAU;QAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACpD,OAAO,CAAC,CAAC;AACX,CAAC;AAUD,SAAgB,mBAAmB,CAAC,IAA6B;IAC/D,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAyB,CAAC;IAE/C,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QACjC,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,mFAAmF;QACnF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,SAAS,GAAG,EAAE,CAAC,cAAc,IAAI,OAAO,CAAC,EAAE,CAAC,YAAY,CAAC,CAAC;QAChE,IAAI,EAAE,CAAC,SAAS,KAAK,KAAK;YAAE,SAAS;QACrC,IAAI,QAAQ,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI;YAAE,SAAS;QAEpD,MAAM,GAAG,GAAG,GAAG,QAAQ,IAAI,SAAS,EAAE,CAAC;QACvC,IAAI,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,IAAI,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;YACrE,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QACvB,CAAC;QACD,IAAI,CAAC,KAAK,EAAE,CAAC;QACb,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAC/B,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACjD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,EAAE,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;AACnF,CAAC;AAUD,SAAgB,qBAAqB,CAAC,IAA+B;IACnE,IAAI,CAAC,IAAI,CAAC,kBAAkB,IAAI,IAAI,CAAC,kBAAkB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAChF,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,YAAY,GAAG,sBAAsB,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IAErE,MAAM,eAAe,GAAiB,EAAE,CAAC;IACzC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAC1C,IAAI,IAAI,CAAC,SAAS,KAAK,KAAK;YAAE,SAAS;QACvC,MAAM,IAAI,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI;YAAE,SAAS;QAC/C,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,eAAe,CAAC,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;QACpE,IAAI,eAAe,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU;YAAE,MAAM;IACvD,CAAC;IAED,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE5C,MAAM,MAAM,GAAG,IAAI,GAAG,EAA2B,CAAC;IAClD,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,IAAI,aAAa,CAAC;QAC5C,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACxB,IAAI,CAAC,CAAC,EAAE,CAAC;YACP,CAAC,GAAG,EAAE,SAAS,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;YAC9C,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACrB,CAAC;QACD,CAAC,CAAC,KAAK,EAAE,CAAC;QACV,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACvB,CAAC;IAED,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AAChE,CAAC"}