tlc-claude-code 1.4.9 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +23 -0
- package/CODING-STANDARDS.md +408 -0
- package/bin/install.js +2 -0
- package/dashboard/dist/components/QualityGatePane.d.ts +38 -0
- package/dashboard/dist/components/QualityGatePane.js +31 -0
- package/dashboard/dist/components/QualityGatePane.test.d.ts +1 -0
- package/dashboard/dist/components/QualityGatePane.test.js +147 -0
- package/dashboard/dist/components/orchestration/AgentCard.d.ts +26 -0
- package/dashboard/dist/components/orchestration/AgentCard.js +60 -0
- package/dashboard/dist/components/orchestration/AgentCard.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/AgentCard.test.js +63 -0
- package/dashboard/dist/components/orchestration/AgentControls.d.ts +11 -0
- package/dashboard/dist/components/orchestration/AgentControls.js +20 -0
- package/dashboard/dist/components/orchestration/AgentControls.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/AgentControls.test.js +52 -0
- package/dashboard/dist/components/orchestration/AgentDetail.d.ts +35 -0
- package/dashboard/dist/components/orchestration/AgentDetail.js +37 -0
- package/dashboard/dist/components/orchestration/AgentDetail.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/AgentDetail.test.js +79 -0
- package/dashboard/dist/components/orchestration/AgentList.d.ts +31 -0
- package/dashboard/dist/components/orchestration/AgentList.js +47 -0
- package/dashboard/dist/components/orchestration/AgentList.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/AgentList.test.js +64 -0
- package/dashboard/dist/components/orchestration/CostMeter.d.ts +11 -0
- package/dashboard/dist/components/orchestration/CostMeter.js +28 -0
- package/dashboard/dist/components/orchestration/CostMeter.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/CostMeter.test.js +50 -0
- package/dashboard/dist/components/orchestration/ModelSelector.d.ts +20 -0
- package/dashboard/dist/components/orchestration/ModelSelector.js +12 -0
- package/dashboard/dist/components/orchestration/ModelSelector.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/ModelSelector.test.js +56 -0
- package/dashboard/dist/components/orchestration/OrchestrationDashboard.d.ts +28 -0
- package/dashboard/dist/components/orchestration/OrchestrationDashboard.js +28 -0
- package/dashboard/dist/components/orchestration/OrchestrationDashboard.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/OrchestrationDashboard.test.js +56 -0
- package/dashboard/dist/components/orchestration/QualityIndicator.d.ts +11 -0
- package/dashboard/dist/components/orchestration/QualityIndicator.js +37 -0
- package/dashboard/dist/components/orchestration/QualityIndicator.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/QualityIndicator.test.js +52 -0
- package/dashboard/dist/components/orchestration/index.d.ts +8 -0
- package/dashboard/dist/components/orchestration/index.js +8 -0
- package/package.json +1 -1
- package/server/lib/access-control.js +352 -0
- package/server/lib/access-control.test.js +322 -0
- package/server/lib/agents-cancel-command.js +139 -0
- package/server/lib/agents-cancel-command.test.js +180 -0
- package/server/lib/agents-get-command.js +159 -0
- package/server/lib/agents-get-command.test.js +167 -0
- package/server/lib/agents-list-command.js +150 -0
- package/server/lib/agents-list-command.test.js +149 -0
- package/server/lib/agents-logs-command.js +126 -0
- package/server/lib/agents-logs-command.test.js +198 -0
- package/server/lib/agents-retry-command.js +117 -0
- package/server/lib/agents-retry-command.test.js +192 -0
- package/server/lib/budget-limits.js +222 -0
- package/server/lib/budget-limits.test.js +214 -0
- package/server/lib/code-generator.js +291 -0
- package/server/lib/code-generator.test.js +307 -0
- package/server/lib/cost-command.js +290 -0
- package/server/lib/cost-command.test.js +202 -0
- package/server/lib/cost-optimizer.js +404 -0
- package/server/lib/cost-optimizer.test.js +232 -0
- package/server/lib/cost-projections.js +302 -0
- package/server/lib/cost-projections.test.js +217 -0
- package/server/lib/cost-reports.js +277 -0
- package/server/lib/cost-reports.test.js +254 -0
- package/server/lib/cost-tracker.js +216 -0
- package/server/lib/cost-tracker.test.js +302 -0
- package/server/lib/crypto-patterns.js +433 -0
- package/server/lib/crypto-patterns.test.js +346 -0
- package/server/lib/design-command.js +385 -0
- package/server/lib/design-command.test.js +249 -0
- package/server/lib/design-parser.js +237 -0
- package/server/lib/design-parser.test.js +290 -0
- package/server/lib/gemini-vision.js +377 -0
- package/server/lib/gemini-vision.test.js +282 -0
- package/server/lib/input-validator.js +360 -0
- package/server/lib/input-validator.test.js +295 -0
- package/server/lib/litellm-client.js +232 -0
- package/server/lib/litellm-client.test.js +267 -0
- package/server/lib/litellm-command.js +291 -0
- package/server/lib/litellm-command.test.js +260 -0
- package/server/lib/litellm-config.js +273 -0
- package/server/lib/litellm-config.test.js +212 -0
- package/server/lib/model-pricing.js +189 -0
- package/server/lib/model-pricing.test.js +178 -0
- package/server/lib/models-command.js +223 -0
- package/server/lib/models-command.test.js +193 -0
- package/server/lib/optimize-command.js +197 -0
- package/server/lib/optimize-command.test.js +193 -0
- package/server/lib/orchestration-integration.js +206 -0
- package/server/lib/orchestration-integration.test.js +235 -0
- package/server/lib/output-encoder.js +308 -0
- package/server/lib/output-encoder.test.js +312 -0
- package/server/lib/quality-evaluator.js +396 -0
- package/server/lib/quality-evaluator.test.js +337 -0
- package/server/lib/quality-gate-command.js +340 -0
- package/server/lib/quality-gate-command.test.js +321 -0
- package/server/lib/quality-gate-scorer.js +378 -0
- package/server/lib/quality-gate-scorer.test.js +376 -0
- package/server/lib/quality-history.js +265 -0
- package/server/lib/quality-history.test.js +359 -0
- package/server/lib/quality-presets.js +288 -0
- package/server/lib/quality-presets.test.js +269 -0
- package/server/lib/quality-retry.js +323 -0
- package/server/lib/quality-retry.test.js +325 -0
- package/server/lib/quality-thresholds.js +255 -0
- package/server/lib/quality-thresholds.test.js +237 -0
- package/server/lib/secure-auth.js +333 -0
- package/server/lib/secure-auth.test.js +288 -0
- package/server/lib/secure-code-command.js +540 -0
- package/server/lib/secure-code-command.test.js +309 -0
- package/server/lib/secure-errors.js +521 -0
- package/server/lib/secure-errors.test.js +298 -0
- package/server/lib/vision-command.js +372 -0
- package/server/lib/vision-command.test.js +255 -0
- package/server/lib/visual-command.js +350 -0
- package/server/lib/visual-command.test.js +256 -0
- package/server/lib/visual-testing.js +315 -0
- package/server/lib/visual-testing.test.js +357 -0
- package/server/package-lock.json +2 -2
- package/server/package.json +1 -1
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quality Retry Tests
|
|
3
|
+
*
|
|
4
|
+
* Tests for auto-retry logic with better models on quality failure
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { describe, it, beforeEach } = require('node:test');
|
|
8
|
+
const assert = require('node:assert');
|
|
9
|
+
|
|
10
|
+
const {
|
|
11
|
+
createRetryManager,
|
|
12
|
+
shouldRetry,
|
|
13
|
+
selectBetterModel,
|
|
14
|
+
buildRetryPrompt,
|
|
15
|
+
trackRetryCost,
|
|
16
|
+
getRetryHistory,
|
|
17
|
+
retryWithFeedback,
|
|
18
|
+
} = require('./quality-retry.js');
|
|
19
|
+
|
|
20
|
+
describe('Quality Retry', () => {
|
|
21
|
+
describe('createRetryManager', () => {
|
|
22
|
+
it('creates manager with default options', () => {
|
|
23
|
+
const manager = createRetryManager();
|
|
24
|
+
assert.ok(manager);
|
|
25
|
+
assert.ok(manager.options);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('accepts custom max retries', () => {
|
|
29
|
+
const manager = createRetryManager({ maxRetries: 5 });
|
|
30
|
+
assert.strictEqual(manager.options.maxRetries, 5);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it('accepts budget limit', () => {
|
|
34
|
+
const manager = createRetryManager({ budgetLimit: 1.0 });
|
|
35
|
+
assert.strictEqual(manager.options.budgetLimit, 1.0);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('initializes empty history', () => {
|
|
39
|
+
const manager = createRetryManager();
|
|
40
|
+
assert.deepStrictEqual(manager.history, []);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
describe('shouldRetry', () => {
|
|
45
|
+
it('returns true on quality failure', () => {
|
|
46
|
+
const evaluation = { pass: false };
|
|
47
|
+
const result = shouldRetry(evaluation);
|
|
48
|
+
assert.strictEqual(result, true);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('returns false on quality pass', () => {
|
|
52
|
+
const evaluation = { pass: true };
|
|
53
|
+
const result = shouldRetry(evaluation);
|
|
54
|
+
assert.strictEqual(result, false);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('returns false when max retries reached', () => {
|
|
58
|
+
const evaluation = { pass: false };
|
|
59
|
+
const options = { maxRetries: 3, currentRetry: 3 };
|
|
60
|
+
const result = shouldRetry(evaluation, options);
|
|
61
|
+
assert.strictEqual(result, false);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('returns false when budget exceeded', () => {
|
|
65
|
+
const evaluation = { pass: false };
|
|
66
|
+
const options = { budgetLimit: 1.0, spentBudget: 1.5 };
|
|
67
|
+
const result = shouldRetry(evaluation, options);
|
|
68
|
+
assert.strictEqual(result, false);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it('respects specific dimension failures', () => {
|
|
72
|
+
const evaluation = { pass: false, failed: ['style'] };
|
|
73
|
+
const options = { retryOnDimensions: ['correctness'] };
|
|
74
|
+
const result = shouldRetry(evaluation, options);
|
|
75
|
+
assert.strictEqual(result, false);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it('returns reason for not retrying', () => {
|
|
79
|
+
const evaluation = { pass: false };
|
|
80
|
+
const options = { maxRetries: 3, currentRetry: 3 };
|
|
81
|
+
const result = shouldRetry(evaluation, options, { reason: true });
|
|
82
|
+
assert.ok(result.reason);
|
|
83
|
+
assert.ok(result.reason.includes('max'));
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
describe('selectBetterModel', () => {
|
|
88
|
+
it('escalates to better model', () => {
|
|
89
|
+
const currentModel = 'gpt-3.5-turbo';
|
|
90
|
+
const models = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo'];
|
|
91
|
+
const better = selectBetterModel(currentModel, models);
|
|
92
|
+
assert.strictEqual(better, 'gpt-4');
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('returns null when at best model', () => {
|
|
96
|
+
const currentModel = 'gpt-4-turbo';
|
|
97
|
+
const models = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo'];
|
|
98
|
+
const better = selectBetterModel(currentModel, models);
|
|
99
|
+
assert.strictEqual(better, null);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it('respects budget constraints', () => {
|
|
103
|
+
const currentModel = 'gpt-3.5-turbo';
|
|
104
|
+
const models = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo'];
|
|
105
|
+
const costs = { 'gpt-3.5-turbo': 0.01, 'gpt-4': 0.10, 'gpt-4-turbo': 0.15 };
|
|
106
|
+
const options = { remainingBudget: 0.05, costs };
|
|
107
|
+
const better = selectBetterModel(currentModel, models, options);
|
|
108
|
+
assert.strictEqual(better, null);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('considers model capabilities', () => {
|
|
112
|
+
const currentModel = 'basic';
|
|
113
|
+
const models = ['basic', 'advanced', 'premium'];
|
|
114
|
+
const capabilities = {
|
|
115
|
+
basic: ['text'],
|
|
116
|
+
advanced: ['text', 'code'],
|
|
117
|
+
premium: ['text', 'code', 'reasoning'],
|
|
118
|
+
};
|
|
119
|
+
const options = { requiredCapability: 'code', capabilities };
|
|
120
|
+
const better = selectBetterModel(currentModel, models, options);
|
|
121
|
+
assert.strictEqual(better, 'advanced');
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
it('returns model tier info', () => {
|
|
125
|
+
const currentModel = 'gpt-3.5-turbo';
|
|
126
|
+
const models = ['gpt-3.5-turbo', 'gpt-4'];
|
|
127
|
+
const result = selectBetterModel(currentModel, models, { details: true });
|
|
128
|
+
assert.ok(result.model);
|
|
129
|
+
assert.ok(result.tier !== undefined);
|
|
130
|
+
});
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
describe('buildRetryPrompt', () => {
|
|
134
|
+
it('includes original prompt', () => {
|
|
135
|
+
const original = 'Write a function to add numbers';
|
|
136
|
+
const prompt = buildRetryPrompt(original, {});
|
|
137
|
+
assert.ok(prompt.includes('add numbers'));
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
it('includes failure context', () => {
|
|
141
|
+
const original = 'Write a function';
|
|
142
|
+
const context = { failedDimensions: ['style'] };
|
|
143
|
+
const prompt = buildRetryPrompt(original, context);
|
|
144
|
+
assert.ok(prompt.includes('style'));
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it('includes specific failure reasons', () => {
|
|
148
|
+
const original = 'Write a function';
|
|
149
|
+
const context = {
|
|
150
|
+
failedDimensions: ['style'],
|
|
151
|
+
failures: {
|
|
152
|
+
style: { score: 60, threshold: 80, reason: 'inconsistent indentation' },
|
|
153
|
+
},
|
|
154
|
+
};
|
|
155
|
+
const prompt = buildRetryPrompt(original, context);
|
|
156
|
+
assert.ok(prompt.includes('indentation'));
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('includes improvement suggestions', () => {
|
|
160
|
+
const original = 'Write tests';
|
|
161
|
+
const context = {
|
|
162
|
+
suggestions: ['Add edge case tests', 'Test error handling'],
|
|
163
|
+
};
|
|
164
|
+
const prompt = buildRetryPrompt(original, context);
|
|
165
|
+
assert.ok(prompt.includes('edge case') || prompt.includes('error handling'));
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
it('formats prompt for model comprehension', () => {
|
|
169
|
+
const original = 'Write code';
|
|
170
|
+
const context = { failedDimensions: ['correctness'] };
|
|
171
|
+
const prompt = buildRetryPrompt(original, context);
|
|
172
|
+
assert.ok(prompt.includes('improve') || prompt.includes('fix'));
|
|
173
|
+
});
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
describe('trackRetryCost', () => {
|
|
177
|
+
it('accumulates cost across retries', () => {
|
|
178
|
+
const manager = createRetryManager();
|
|
179
|
+
trackRetryCost(manager, 0.05);
|
|
180
|
+
trackRetryCost(manager, 0.10);
|
|
181
|
+
// Use approximate comparison for floating point
|
|
182
|
+
assert.ok(Math.abs(manager.totalCost - 0.15) < 0.0001);
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
it('records cost per attempt', () => {
|
|
186
|
+
const manager = createRetryManager();
|
|
187
|
+
trackRetryCost(manager, 0.05, { attempt: 1 });
|
|
188
|
+
trackRetryCost(manager, 0.10, { attempt: 2 });
|
|
189
|
+
assert.ok(manager.costPerAttempt);
|
|
190
|
+
assert.strictEqual(manager.costPerAttempt[1], 0.05);
|
|
191
|
+
assert.strictEqual(manager.costPerAttempt[2], 0.10);
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it('returns remaining budget', () => {
|
|
195
|
+
const manager = createRetryManager({ budgetLimit: 1.0 });
|
|
196
|
+
trackRetryCost(manager, 0.30);
|
|
197
|
+
const remaining = manager.budgetLimit - manager.totalCost;
|
|
198
|
+
assert.strictEqual(remaining, 0.70);
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it('tracks model used per attempt', () => {
|
|
202
|
+
const manager = createRetryManager();
|
|
203
|
+
trackRetryCost(manager, 0.05, { attempt: 1, model: 'gpt-3.5-turbo' });
|
|
204
|
+
trackRetryCost(manager, 0.10, { attempt: 2, model: 'gpt-4' });
|
|
205
|
+
assert.strictEqual(manager.modelPerAttempt[1], 'gpt-3.5-turbo');
|
|
206
|
+
assert.strictEqual(manager.modelPerAttempt[2], 'gpt-4');
|
|
207
|
+
});
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
describe('getRetryHistory', () => {
|
|
211
|
+
it('returns all retry attempts', () => {
|
|
212
|
+
const manager = createRetryManager();
|
|
213
|
+
manager.history = [
|
|
214
|
+
{ attempt: 1, model: 'gpt-3.5', score: 60, pass: false },
|
|
215
|
+
{ attempt: 2, model: 'gpt-4', score: 85, pass: true },
|
|
216
|
+
];
|
|
217
|
+
const history = getRetryHistory(manager);
|
|
218
|
+
assert.strictEqual(history.length, 2);
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
it('includes scores per attempt', () => {
|
|
222
|
+
const manager = createRetryManager();
|
|
223
|
+
manager.history = [
|
|
224
|
+
{ attempt: 1, scores: { style: 60, correctness: 70 } },
|
|
225
|
+
];
|
|
226
|
+
const history = getRetryHistory(manager);
|
|
227
|
+
assert.ok(history[0].scores);
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
it('includes model escalation path', () => {
|
|
231
|
+
const manager = createRetryManager();
|
|
232
|
+
manager.history = [
|
|
233
|
+
{ attempt: 1, model: 'gpt-3.5-turbo' },
|
|
234
|
+
{ attempt: 2, model: 'gpt-4' },
|
|
235
|
+
{ attempt: 3, model: 'gpt-4-turbo' },
|
|
236
|
+
];
|
|
237
|
+
const history = getRetryHistory(manager, { escalationPath: true });
|
|
238
|
+
assert.ok(history.escalationPath);
|
|
239
|
+
assert.deepStrictEqual(history.escalationPath, ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo']);
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
it('calculates improvement between attempts', () => {
|
|
243
|
+
const manager = createRetryManager();
|
|
244
|
+
manager.history = [
|
|
245
|
+
{ attempt: 1, composite: 60 },
|
|
246
|
+
{ attempt: 2, composite: 75 },
|
|
247
|
+
];
|
|
248
|
+
const history = getRetryHistory(manager, { improvements: true });
|
|
249
|
+
assert.ok(history.improvements);
|
|
250
|
+
assert.strictEqual(history.improvements[1], 15);
|
|
251
|
+
});
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
describe('retryWithFeedback', () => {
|
|
255
|
+
it('improves results with feedback', async () => {
|
|
256
|
+
let callCount = 0;
|
|
257
|
+
const mockExecute = async (prompt, model) => {
|
|
258
|
+
callCount++;
|
|
259
|
+
return callCount === 1 ? 'bad code' : 'good code';
|
|
260
|
+
};
|
|
261
|
+
const mockEvaluate = async (output) => {
|
|
262
|
+
return output === 'good code'
|
|
263
|
+
? { pass: true, composite: 90 }
|
|
264
|
+
: { pass: false, composite: 60, failed: ['correctness'] };
|
|
265
|
+
};
|
|
266
|
+
const result = await retryWithFeedback(
|
|
267
|
+
'Write code',
|
|
268
|
+
{ execute: mockExecute, evaluate: mockEvaluate, maxRetries: 3 }
|
|
269
|
+
);
|
|
270
|
+
assert.strictEqual(result.pass, true);
|
|
271
|
+
assert.ok(callCount >= 2);
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
it('returns final result after max retries', async () => {
|
|
275
|
+
let callCount = 0;
|
|
276
|
+
const mockExecute = async () => {
|
|
277
|
+
callCount++;
|
|
278
|
+
return 'bad code';
|
|
279
|
+
};
|
|
280
|
+
const mockEvaluate = async () => ({ pass: false, composite: 50 });
|
|
281
|
+
const result = await retryWithFeedback(
|
|
282
|
+
'Write code',
|
|
283
|
+
{ execute: mockExecute, evaluate: mockEvaluate, maxRetries: 2 }
|
|
284
|
+
);
|
|
285
|
+
assert.strictEqual(result.pass, false);
|
|
286
|
+
assert.strictEqual(callCount, 2);
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
it('escalates model on failure', async () => {
|
|
290
|
+
const modelsUsed = [];
|
|
291
|
+
const mockExecute = async (prompt, model) => {
|
|
292
|
+
modelsUsed.push(model);
|
|
293
|
+
return 'code';
|
|
294
|
+
};
|
|
295
|
+
let evalCount = 0;
|
|
296
|
+
const mockEvaluate = async () => {
|
|
297
|
+
evalCount++;
|
|
298
|
+
return evalCount < 3
|
|
299
|
+
? { pass: false, composite: 60 }
|
|
300
|
+
: { pass: true, composite: 90 };
|
|
301
|
+
};
|
|
302
|
+
await retryWithFeedback(
|
|
303
|
+
'Write code',
|
|
304
|
+
{
|
|
305
|
+
execute: mockExecute,
|
|
306
|
+
evaluate: mockEvaluate,
|
|
307
|
+
models: ['basic', 'advanced', 'premium'],
|
|
308
|
+
initialModel: 'basic',
|
|
309
|
+
maxRetries: 5,
|
|
310
|
+
}
|
|
311
|
+
);
|
|
312
|
+
assert.ok(modelsUsed.includes('advanced') || modelsUsed.includes('premium'));
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
it('includes retry history in result', async () => {
|
|
316
|
+
const mockExecute = async () => 'code';
|
|
317
|
+
const mockEvaluate = async () => ({ pass: true, composite: 90 });
|
|
318
|
+
const result = await retryWithFeedback(
|
|
319
|
+
'Write code',
|
|
320
|
+
{ execute: mockExecute, evaluate: mockEvaluate }
|
|
321
|
+
);
|
|
322
|
+
assert.ok(result.history);
|
|
323
|
+
});
|
|
324
|
+
});
|
|
325
|
+
});
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quality Thresholds Module
|
|
3
|
+
*
|
|
4
|
+
* Configurable quality thresholds per operation
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Preset constants
|
|
9
|
+
*/
|
|
10
|
+
const PRESET_FAST = 'fast';
|
|
11
|
+
const PRESET_BALANCED = 'balanced';
|
|
12
|
+
const PRESET_THOROUGH = 'thorough';
|
|
13
|
+
const PRESET_CRITICAL = 'critical';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Preset configurations
|
|
17
|
+
*/
|
|
18
|
+
const PRESETS = {
|
|
19
|
+
[PRESET_FAST]: {
|
|
20
|
+
name: 'fast',
|
|
21
|
+
default: 50,
|
|
22
|
+
modelTier: 'basic',
|
|
23
|
+
maxRetries: 0,
|
|
24
|
+
skipDimensions: ['documentation'],
|
|
25
|
+
dimensions: {
|
|
26
|
+
style: 40,
|
|
27
|
+
correctness: 60,
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
[PRESET_BALANCED]: {
|
|
31
|
+
name: 'balanced',
|
|
32
|
+
default: 70,
|
|
33
|
+
modelTier: 'standard',
|
|
34
|
+
maxRetries: 2,
|
|
35
|
+
dimensions: {
|
|
36
|
+
style: 70,
|
|
37
|
+
completeness: 70,
|
|
38
|
+
correctness: 75,
|
|
39
|
+
documentation: 60,
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
[PRESET_THOROUGH]: {
|
|
43
|
+
name: 'thorough',
|
|
44
|
+
default: 85,
|
|
45
|
+
modelTier: 'premium',
|
|
46
|
+
minModel: 'gpt-4',
|
|
47
|
+
maxRetries: 3,
|
|
48
|
+
dimensions: {
|
|
49
|
+
style: 85,
|
|
50
|
+
completeness: 85,
|
|
51
|
+
correctness: 90,
|
|
52
|
+
documentation: 80,
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
[PRESET_CRITICAL]: {
|
|
56
|
+
name: 'critical',
|
|
57
|
+
default: 95,
|
|
58
|
+
modelTier: 'premium',
|
|
59
|
+
minModel: 'gpt-4-turbo',
|
|
60
|
+
maxRetries: 5,
|
|
61
|
+
dimensions: {
|
|
62
|
+
style: 90,
|
|
63
|
+
completeness: 95,
|
|
64
|
+
correctness: 98,
|
|
65
|
+
documentation: 85,
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Create a thresholds configuration
|
|
72
|
+
* @param {Object} options - Threshold options
|
|
73
|
+
* @returns {Object} Thresholds config
|
|
74
|
+
*/
|
|
75
|
+
function createThresholds(options = {}) {
|
|
76
|
+
return {
|
|
77
|
+
default: options.default ?? 70,
|
|
78
|
+
operations: options.operations || {},
|
|
79
|
+
dimensions: options.dimensions || {},
|
|
80
|
+
...options,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Get threshold for an operation
|
|
86
|
+
* @param {Object} thresholds - Thresholds config
|
|
87
|
+
* @param {string} operation - Operation name
|
|
88
|
+
* @returns {number} Threshold value
|
|
89
|
+
*/
|
|
90
|
+
function getThreshold(thresholds, operation) {
|
|
91
|
+
if (!operation) {
|
|
92
|
+
return thresholds.default;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const opThreshold = thresholds.operations?.[operation];
|
|
96
|
+
if (typeof opThreshold === 'number') {
|
|
97
|
+
return opThreshold;
|
|
98
|
+
}
|
|
99
|
+
if (typeof opThreshold === 'object' && opThreshold.default !== undefined) {
|
|
100
|
+
return opThreshold.default;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return thresholds.default;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Get threshold for a specific dimension
|
|
108
|
+
* @param {Object} thresholds - Thresholds config
|
|
109
|
+
* @param {string} dimension - Dimension name
|
|
110
|
+
* @param {string} operation - Operation name (optional)
|
|
111
|
+
* @returns {number} Threshold value
|
|
112
|
+
*/
|
|
113
|
+
function getDimensionThreshold(thresholds, dimension, operation = null) {
|
|
114
|
+
// Check operation-specific dimension threshold
|
|
115
|
+
if (operation && thresholds.operations?.[operation]) {
|
|
116
|
+
const opConfig = thresholds.operations[operation];
|
|
117
|
+
if (typeof opConfig === 'object' && opConfig.dimensions?.[dimension] !== undefined) {
|
|
118
|
+
return opConfig.dimensions[dimension];
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Check global dimension threshold
|
|
123
|
+
if (thresholds.dimensions?.[dimension] !== undefined) {
|
|
124
|
+
return thresholds.dimensions[dimension];
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Fall back to default
|
|
128
|
+
return thresholds.default;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Check if scores meet thresholds
|
|
133
|
+
* @param {Object} thresholds - Thresholds config
|
|
134
|
+
* @param {Object} scores - Dimension scores
|
|
135
|
+
* @param {Object} options - Check options
|
|
136
|
+
* @returns {Object} Result with pass/fail and details
|
|
137
|
+
*/
|
|
138
|
+
function checkThreshold(thresholds, scores, options = {}) {
|
|
139
|
+
const failed = [];
|
|
140
|
+
const margins = {};
|
|
141
|
+
let allPass = true;
|
|
142
|
+
|
|
143
|
+
// Check composite score if present
|
|
144
|
+
if (scores.composite !== undefined) {
|
|
145
|
+
const threshold = thresholds.default;
|
|
146
|
+
const margin = scores.composite - threshold;
|
|
147
|
+
if (options.margins) {
|
|
148
|
+
margins.composite = margin;
|
|
149
|
+
}
|
|
150
|
+
if (scores.composite < threshold) {
|
|
151
|
+
allPass = false;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Check each dimension
|
|
156
|
+
for (const [dim, score] of Object.entries(scores)) {
|
|
157
|
+
if (dim === 'composite') continue;
|
|
158
|
+
|
|
159
|
+
const threshold = getDimensionThreshold(thresholds, dim);
|
|
160
|
+
const margin = score - threshold;
|
|
161
|
+
|
|
162
|
+
if (options.margins) {
|
|
163
|
+
margins[dim] = margin;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (score < threshold) {
|
|
167
|
+
failed.push(dim);
|
|
168
|
+
allPass = false;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const result = {
|
|
173
|
+
pass: allPass,
|
|
174
|
+
failed: failed.length > 0 ? failed : undefined,
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
if (options.margins) {
|
|
178
|
+
result.margins = margins;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return result;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Apply a preset configuration
|
|
186
|
+
* @param {string|Object} preset - Preset name or config
|
|
187
|
+
* @param {Object} overrides - Custom overrides
|
|
188
|
+
* @returns {Object} Thresholds config
|
|
189
|
+
*/
|
|
190
|
+
function applyPreset(preset, overrides = {}) {
|
|
191
|
+
const presetConfig = typeof preset === 'string' ? PRESETS[preset] : preset;
|
|
192
|
+
|
|
193
|
+
if (!presetConfig) {
|
|
194
|
+
throw new Error(`Unknown preset: ${preset}`);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
...presetConfig,
|
|
199
|
+
...overrides,
|
|
200
|
+
preset: presetConfig.name,
|
|
201
|
+
dimensions: {
|
|
202
|
+
...presetConfig.dimensions,
|
|
203
|
+
...overrides.dimensions,
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Save thresholds to persistent storage
|
|
210
|
+
* @param {Object} thresholds - Thresholds to save
|
|
211
|
+
* @param {Object} options - Save options with save function
|
|
212
|
+
* @returns {Promise<Object>} Save result
|
|
213
|
+
*/
|
|
214
|
+
async function saveThresholds(thresholds, options = {}) {
|
|
215
|
+
const { save } = options;
|
|
216
|
+
if (save) {
|
|
217
|
+
await save(thresholds);
|
|
218
|
+
}
|
|
219
|
+
return { success: true };
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Load thresholds from persistent storage
|
|
224
|
+
* @param {Object} options - Load options with load function
|
|
225
|
+
* @returns {Promise<Object>} Loaded thresholds
|
|
226
|
+
*/
|
|
227
|
+
async function loadThresholds(options = {}) {
|
|
228
|
+
const { load } = options;
|
|
229
|
+
if (load) {
|
|
230
|
+
const data = await load();
|
|
231
|
+
if (data) {
|
|
232
|
+
// Validate loaded config
|
|
233
|
+
if (typeof data.default !== 'number') {
|
|
234
|
+
data.default = 70;
|
|
235
|
+
}
|
|
236
|
+
return data;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
return createThresholds();
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
module.exports = {
|
|
243
|
+
createThresholds,
|
|
244
|
+
getThreshold,
|
|
245
|
+
getDimensionThreshold,
|
|
246
|
+
checkThreshold,
|
|
247
|
+
applyPreset,
|
|
248
|
+
saveThresholds,
|
|
249
|
+
loadThresholds,
|
|
250
|
+
PRESET_FAST,
|
|
251
|
+
PRESET_BALANCED,
|
|
252
|
+
PRESET_THOROUGH,
|
|
253
|
+
PRESET_CRITICAL,
|
|
254
|
+
PRESETS,
|
|
255
|
+
};
|