tlc-claude-code 1.4.9 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +23 -0
- package/CODING-STANDARDS.md +408 -0
- package/bin/install.js +2 -0
- package/dashboard/dist/components/QualityGatePane.d.ts +38 -0
- package/dashboard/dist/components/QualityGatePane.js +31 -0
- package/dashboard/dist/components/QualityGatePane.test.d.ts +1 -0
- package/dashboard/dist/components/QualityGatePane.test.js +147 -0
- package/dashboard/dist/components/orchestration/AgentCard.d.ts +26 -0
- package/dashboard/dist/components/orchestration/AgentCard.js +60 -0
- package/dashboard/dist/components/orchestration/AgentCard.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/AgentCard.test.js +63 -0
- package/dashboard/dist/components/orchestration/AgentControls.d.ts +11 -0
- package/dashboard/dist/components/orchestration/AgentControls.js +20 -0
- package/dashboard/dist/components/orchestration/AgentControls.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/AgentControls.test.js +52 -0
- package/dashboard/dist/components/orchestration/AgentDetail.d.ts +35 -0
- package/dashboard/dist/components/orchestration/AgentDetail.js +37 -0
- package/dashboard/dist/components/orchestration/AgentDetail.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/AgentDetail.test.js +79 -0
- package/dashboard/dist/components/orchestration/AgentList.d.ts +31 -0
- package/dashboard/dist/components/orchestration/AgentList.js +47 -0
- package/dashboard/dist/components/orchestration/AgentList.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/AgentList.test.js +64 -0
- package/dashboard/dist/components/orchestration/CostMeter.d.ts +11 -0
- package/dashboard/dist/components/orchestration/CostMeter.js +28 -0
- package/dashboard/dist/components/orchestration/CostMeter.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/CostMeter.test.js +50 -0
- package/dashboard/dist/components/orchestration/ModelSelector.d.ts +20 -0
- package/dashboard/dist/components/orchestration/ModelSelector.js +12 -0
- package/dashboard/dist/components/orchestration/ModelSelector.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/ModelSelector.test.js +56 -0
- package/dashboard/dist/components/orchestration/OrchestrationDashboard.d.ts +28 -0
- package/dashboard/dist/components/orchestration/OrchestrationDashboard.js +28 -0
- package/dashboard/dist/components/orchestration/OrchestrationDashboard.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/OrchestrationDashboard.test.js +56 -0
- package/dashboard/dist/components/orchestration/QualityIndicator.d.ts +11 -0
- package/dashboard/dist/components/orchestration/QualityIndicator.js +37 -0
- package/dashboard/dist/components/orchestration/QualityIndicator.test.d.ts +1 -0
- package/dashboard/dist/components/orchestration/QualityIndicator.test.js +52 -0
- package/dashboard/dist/components/orchestration/index.d.ts +8 -0
- package/dashboard/dist/components/orchestration/index.js +8 -0
- package/package.json +1 -1
- package/server/lib/access-control.js +352 -0
- package/server/lib/access-control.test.js +322 -0
- package/server/lib/agents-cancel-command.js +139 -0
- package/server/lib/agents-cancel-command.test.js +180 -0
- package/server/lib/agents-get-command.js +159 -0
- package/server/lib/agents-get-command.test.js +167 -0
- package/server/lib/agents-list-command.js +150 -0
- package/server/lib/agents-list-command.test.js +149 -0
- package/server/lib/agents-logs-command.js +126 -0
- package/server/lib/agents-logs-command.test.js +198 -0
- package/server/lib/agents-retry-command.js +117 -0
- package/server/lib/agents-retry-command.test.js +192 -0
- package/server/lib/budget-limits.js +222 -0
- package/server/lib/budget-limits.test.js +214 -0
- package/server/lib/code-generator.js +291 -0
- package/server/lib/code-generator.test.js +307 -0
- package/server/lib/cost-command.js +290 -0
- package/server/lib/cost-command.test.js +202 -0
- package/server/lib/cost-optimizer.js +404 -0
- package/server/lib/cost-optimizer.test.js +232 -0
- package/server/lib/cost-projections.js +302 -0
- package/server/lib/cost-projections.test.js +217 -0
- package/server/lib/cost-reports.js +277 -0
- package/server/lib/cost-reports.test.js +254 -0
- package/server/lib/cost-tracker.js +216 -0
- package/server/lib/cost-tracker.test.js +302 -0
- package/server/lib/crypto-patterns.js +433 -0
- package/server/lib/crypto-patterns.test.js +346 -0
- package/server/lib/design-command.js +385 -0
- package/server/lib/design-command.test.js +249 -0
- package/server/lib/design-parser.js +237 -0
- package/server/lib/design-parser.test.js +290 -0
- package/server/lib/gemini-vision.js +377 -0
- package/server/lib/gemini-vision.test.js +282 -0
- package/server/lib/input-validator.js +360 -0
- package/server/lib/input-validator.test.js +295 -0
- package/server/lib/litellm-client.js +232 -0
- package/server/lib/litellm-client.test.js +267 -0
- package/server/lib/litellm-command.js +291 -0
- package/server/lib/litellm-command.test.js +260 -0
- package/server/lib/litellm-config.js +273 -0
- package/server/lib/litellm-config.test.js +212 -0
- package/server/lib/model-pricing.js +189 -0
- package/server/lib/model-pricing.test.js +178 -0
- package/server/lib/models-command.js +223 -0
- package/server/lib/models-command.test.js +193 -0
- package/server/lib/optimize-command.js +197 -0
- package/server/lib/optimize-command.test.js +193 -0
- package/server/lib/orchestration-integration.js +206 -0
- package/server/lib/orchestration-integration.test.js +235 -0
- package/server/lib/output-encoder.js +308 -0
- package/server/lib/output-encoder.test.js +312 -0
- package/server/lib/quality-evaluator.js +396 -0
- package/server/lib/quality-evaluator.test.js +337 -0
- package/server/lib/quality-gate-command.js +340 -0
- package/server/lib/quality-gate-command.test.js +321 -0
- package/server/lib/quality-gate-scorer.js +378 -0
- package/server/lib/quality-gate-scorer.test.js +376 -0
- package/server/lib/quality-history.js +265 -0
- package/server/lib/quality-history.test.js +359 -0
- package/server/lib/quality-presets.js +288 -0
- package/server/lib/quality-presets.test.js +269 -0
- package/server/lib/quality-retry.js +323 -0
- package/server/lib/quality-retry.test.js +325 -0
- package/server/lib/quality-thresholds.js +255 -0
- package/server/lib/quality-thresholds.test.js +237 -0
- package/server/lib/secure-auth.js +333 -0
- package/server/lib/secure-auth.test.js +288 -0
- package/server/lib/secure-code-command.js +540 -0
- package/server/lib/secure-code-command.test.js +309 -0
- package/server/lib/secure-errors.js +521 -0
- package/server/lib/secure-errors.test.js +298 -0
- package/server/lib/vision-command.js +372 -0
- package/server/lib/vision-command.test.js +255 -0
- package/server/lib/visual-command.js +350 -0
- package/server/lib/visual-command.test.js +256 -0
- package/server/lib/visual-testing.js +315 -0
- package/server/lib/visual-testing.test.js +357 -0
- package/server/package-lock.json +2 -2
- package/server/package.json +1 -1
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quality Presets Tests
|
|
3
|
+
*
|
|
4
|
+
* Tests for pre-configured quality levels for common use cases
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { describe, it, beforeEach } = require('node:test');
|
|
8
|
+
const assert = require('node:assert');
|
|
9
|
+
|
|
10
|
+
const {
|
|
11
|
+
getPreset,
|
|
12
|
+
createCustomPreset,
|
|
13
|
+
recommendPreset,
|
|
14
|
+
applyPreset,
|
|
15
|
+
listPresets,
|
|
16
|
+
PRESET_FAST,
|
|
17
|
+
PRESET_BALANCED,
|
|
18
|
+
PRESET_THOROUGH,
|
|
19
|
+
PRESET_CRITICAL,
|
|
20
|
+
} = require('./quality-presets.js');
|
|
21
|
+
|
|
22
|
+
describe('Quality Presets', () => {
|
|
23
|
+
describe('getPreset', () => {
|
|
24
|
+
it('returns preset configuration', () => {
|
|
25
|
+
const preset = getPreset('fast');
|
|
26
|
+
assert.ok(preset);
|
|
27
|
+
assert.ok(preset.thresholds);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it('returns null for unknown preset', () => {
|
|
31
|
+
const preset = getPreset('unknown');
|
|
32
|
+
assert.strictEqual(preset, null);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('returns preset by constant', () => {
|
|
36
|
+
const preset = getPreset(PRESET_FAST);
|
|
37
|
+
assert.ok(preset);
|
|
38
|
+
assert.strictEqual(preset.name, 'fast');
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
describe('preset fast', () => {
|
|
43
|
+
it('has low thresholds', () => {
|
|
44
|
+
const preset = getPreset('fast');
|
|
45
|
+
assert.ok(preset.thresholds.default <= 60);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('allows cheaper models', () => {
|
|
49
|
+
const preset = getPreset('fast');
|
|
50
|
+
assert.ok(
|
|
51
|
+
preset.modelTier === 'basic' ||
|
|
52
|
+
(preset.allowedModels && preset.allowedModels.some((m) => m.includes('3.5')))
|
|
53
|
+
);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it('has minimal retry configuration', () => {
|
|
57
|
+
const preset = getPreset('fast');
|
|
58
|
+
assert.ok(preset.maxRetries <= 1);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('skips some dimensions', () => {
|
|
62
|
+
const preset = getPreset('fast');
|
|
63
|
+
assert.ok(preset.skipDimensions && preset.skipDimensions.length > 0);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe('preset balanced', () => {
|
|
68
|
+
it('has moderate thresholds', () => {
|
|
69
|
+
const preset = getPreset('balanced');
|
|
70
|
+
assert.ok(preset.thresholds.default >= 65 && preset.thresholds.default <= 80);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('allows mid-tier models', () => {
|
|
74
|
+
const preset = getPreset('balanced');
|
|
75
|
+
assert.ok(preset.modelTier === 'standard' || !preset.modelTier);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it('has reasonable retry count', () => {
|
|
79
|
+
const preset = getPreset('balanced');
|
|
80
|
+
assert.ok(preset.maxRetries >= 1 && preset.maxRetries <= 3);
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
describe('preset thorough', () => {
|
|
85
|
+
it('has high thresholds', () => {
|
|
86
|
+
const preset = getPreset('thorough');
|
|
87
|
+
assert.ok(preset.thresholds.default >= 80);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it('requires better models', () => {
|
|
91
|
+
const preset = getPreset('thorough');
|
|
92
|
+
assert.ok(
|
|
93
|
+
preset.modelTier === 'premium' ||
|
|
94
|
+
preset.minModel ||
|
|
95
|
+
(preset.allowedModels && preset.allowedModels.some((m) => m.includes('4')))
|
|
96
|
+
);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('enables all dimensions', () => {
|
|
100
|
+
const preset = getPreset('thorough');
|
|
101
|
+
assert.ok(!preset.skipDimensions || preset.skipDimensions.length === 0);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('has higher retry budget', () => {
|
|
105
|
+
const preset = getPreset('thorough');
|
|
106
|
+
assert.ok(preset.maxRetries >= 2);
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
describe('preset critical', () => {
|
|
111
|
+
it('has highest thresholds', () => {
|
|
112
|
+
const preset = getPreset('critical');
|
|
113
|
+
assert.ok(preset.thresholds.default >= 90);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('requires premium models', () => {
|
|
117
|
+
const preset = getPreset('critical');
|
|
118
|
+
assert.ok(preset.modelTier === 'premium' || preset.minModel);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it('has strictest correctness threshold', () => {
|
|
122
|
+
const preset = getPreset('critical');
|
|
123
|
+
assert.ok(
|
|
124
|
+
preset.thresholds.dimensions?.correctness >= 95 ||
|
|
125
|
+
preset.thresholds.default >= 90
|
|
126
|
+
);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('enables all quality checks', () => {
|
|
130
|
+
const preset = getPreset('critical');
|
|
131
|
+
assert.ok(!preset.skipDimensions || preset.skipDimensions.length === 0);
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
describe('createCustomPreset', () => {
|
|
136
|
+
it('creates preset with custom thresholds', () => {
|
|
137
|
+
const custom = createCustomPreset('my-preset', {
|
|
138
|
+
thresholds: { default: 75 },
|
|
139
|
+
});
|
|
140
|
+
assert.strictEqual(custom.name, 'my-preset');
|
|
141
|
+
assert.strictEqual(custom.thresholds.default, 75);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('validates threshold values', () => {
|
|
145
|
+
assert.throws(() => {
|
|
146
|
+
createCustomPreset('invalid', { thresholds: { default: 150 } });
|
|
147
|
+
});
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it('allows extending existing preset', () => {
|
|
151
|
+
const custom = createCustomPreset('my-fast', {
|
|
152
|
+
extends: 'fast',
|
|
153
|
+
thresholds: { default: 55 },
|
|
154
|
+
});
|
|
155
|
+
assert.ok(custom.skipDimensions);
|
|
156
|
+
assert.strictEqual(custom.thresholds.default, 55);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('saves to preset registry', () => {
|
|
160
|
+
createCustomPreset('saved-preset', { thresholds: { default: 70 } });
|
|
161
|
+
const retrieved = getPreset('saved-preset');
|
|
162
|
+
assert.ok(retrieved);
|
|
163
|
+
assert.strictEqual(retrieved.thresholds.default, 70);
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
describe('recommendPreset', () => {
|
|
168
|
+
it('suggests preset by task type', () => {
|
|
169
|
+
const recommendation = recommendPreset({ task: 'quick-fix' });
|
|
170
|
+
assert.strictEqual(recommendation, 'fast');
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
it('suggests balanced for normal work', () => {
|
|
174
|
+
const recommendation = recommendPreset({ task: 'feature' });
|
|
175
|
+
assert.strictEqual(recommendation, 'balanced');
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it('suggests thorough for production', () => {
|
|
179
|
+
const recommendation = recommendPreset({ task: 'release' });
|
|
180
|
+
assert.ok(recommendation === 'thorough' || recommendation === 'critical');
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it('suggests critical for security code', () => {
|
|
184
|
+
const recommendation = recommendPreset({ task: 'security' });
|
|
185
|
+
assert.strictEqual(recommendation, 'critical');
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it('considers time constraints', () => {
|
|
189
|
+
const rushed = recommendPreset({ task: 'feature', timeConstrained: true });
|
|
190
|
+
const normal = recommendPreset({ task: 'feature', timeConstrained: false });
|
|
191
|
+
const rushedPreset = getPreset(rushed);
|
|
192
|
+
const normalPreset = getPreset(normal);
|
|
193
|
+
assert.ok(rushedPreset.thresholds.default <= normalPreset.thresholds.default);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('considers importance level', () => {
|
|
197
|
+
const lowImportance = recommendPreset({ importance: 'low' });
|
|
198
|
+
const highImportance = recommendPreset({ importance: 'high' });
|
|
199
|
+
const lowPreset = getPreset(lowImportance);
|
|
200
|
+
const highPreset = getPreset(highImportance);
|
|
201
|
+
assert.ok(lowPreset.thresholds.default < highPreset.thresholds.default);
|
|
202
|
+
});
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
describe('applyPreset', () => {
|
|
206
|
+
it('updates current config with preset values', () => {
|
|
207
|
+
const current = { thresholds: { default: 50 } };
|
|
208
|
+
const updated = applyPreset(current, 'balanced');
|
|
209
|
+
const balanced = getPreset('balanced');
|
|
210
|
+
assert.strictEqual(updated.thresholds.default, balanced.thresholds.default);
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
it('preserves non-preset config', () => {
|
|
214
|
+
const current = {
|
|
215
|
+
thresholds: { default: 50 },
|
|
216
|
+
customField: 'preserved',
|
|
217
|
+
};
|
|
218
|
+
const updated = applyPreset(current, 'fast');
|
|
219
|
+
assert.strictEqual(updated.customField, 'preserved');
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it('records applied preset name', () => {
|
|
223
|
+
const current = {};
|
|
224
|
+
const updated = applyPreset(current, 'thorough');
|
|
225
|
+
assert.strictEqual(updated.appliedPreset, 'thorough');
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
it('returns new config without mutating original', () => {
|
|
229
|
+
const current = { thresholds: { default: 50 } };
|
|
230
|
+
const updated = applyPreset(current, 'balanced');
|
|
231
|
+
assert.strictEqual(current.thresholds.default, 50);
|
|
232
|
+
assert.notStrictEqual(updated.thresholds.default, 50);
|
|
233
|
+
});
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
describe('listPresets', () => {
|
|
237
|
+
it('returns all available presets', () => {
|
|
238
|
+
const presets = listPresets();
|
|
239
|
+
assert.ok(Array.isArray(presets));
|
|
240
|
+
assert.ok(presets.length >= 4);
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
it('includes built-in presets', () => {
|
|
244
|
+
const presets = listPresets();
|
|
245
|
+
const names = presets.map((p) => p.name);
|
|
246
|
+
assert.ok(names.includes('fast'));
|
|
247
|
+
assert.ok(names.includes('balanced'));
|
|
248
|
+
assert.ok(names.includes('thorough'));
|
|
249
|
+
assert.ok(names.includes('critical'));
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
it('includes custom presets', () => {
|
|
253
|
+
createCustomPreset('my-custom', { thresholds: { default: 77 } });
|
|
254
|
+
const presets = listPresets();
|
|
255
|
+
const names = presets.map((p) => p.name);
|
|
256
|
+
assert.ok(names.includes('my-custom'));
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
it('returns preset descriptions', () => {
|
|
260
|
+
const presets = listPresets();
|
|
261
|
+
assert.ok(presets.every((p) => p.description));
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
it('returns preset threshold summaries', () => {
|
|
265
|
+
const presets = listPresets({ summary: true });
|
|
266
|
+
assert.ok(presets.every((p) => p.thresholdSummary !== undefined));
|
|
267
|
+
});
|
|
268
|
+
});
|
|
269
|
+
});
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quality Retry Module
|
|
3
|
+
*
|
|
4
|
+
* Auto-retry logic with better models on quality failure
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Create a retry manager
|
|
9
|
+
* @param {Object} options - Manager options
|
|
10
|
+
* @returns {Object} Retry manager
|
|
11
|
+
*/
|
|
12
|
+
function createRetryManager(options = {}) {
|
|
13
|
+
const budgetLimit = options.budgetLimit ?? Infinity;
|
|
14
|
+
return {
|
|
15
|
+
options: {
|
|
16
|
+
maxRetries: options.maxRetries ?? 3,
|
|
17
|
+
budgetLimit,
|
|
18
|
+
...options,
|
|
19
|
+
},
|
|
20
|
+
history: [],
|
|
21
|
+
totalCost: 0,
|
|
22
|
+
budgetLimit,
|
|
23
|
+
costPerAttempt: {},
|
|
24
|
+
modelPerAttempt: {},
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Determine if retry should be attempted
|
|
30
|
+
* @param {Object} evaluation - Evaluation result
|
|
31
|
+
* @param {Object} options - Retry options
|
|
32
|
+
* @param {Object} resultOptions - Result options
|
|
33
|
+
* @returns {boolean|Object} Whether to retry
|
|
34
|
+
*/
|
|
35
|
+
function shouldRetry(evaluation, options = {}, resultOptions = {}) {
|
|
36
|
+
const {
|
|
37
|
+
maxRetries = 3,
|
|
38
|
+
currentRetry = 0,
|
|
39
|
+
budgetLimit = Infinity,
|
|
40
|
+
spentBudget = 0,
|
|
41
|
+
retryOnDimensions = null,
|
|
42
|
+
} = options;
|
|
43
|
+
|
|
44
|
+
// Don't retry if passed
|
|
45
|
+
if (evaluation.pass) {
|
|
46
|
+
if (resultOptions.reason) {
|
|
47
|
+
return { retry: false, reason: 'evaluation passed' };
|
|
48
|
+
}
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Check max retries
|
|
53
|
+
if (currentRetry >= maxRetries) {
|
|
54
|
+
if (resultOptions.reason) {
|
|
55
|
+
return { retry: false, reason: 'max retries reached' };
|
|
56
|
+
}
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Check budget
|
|
61
|
+
if (spentBudget >= budgetLimit) {
|
|
62
|
+
if (resultOptions.reason) {
|
|
63
|
+
return { retry: false, reason: 'budget exceeded' };
|
|
64
|
+
}
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Check specific dimension filtering
|
|
69
|
+
if (retryOnDimensions && evaluation.failed) {
|
|
70
|
+
const shouldRetryDimension = evaluation.failed.some((d) =>
|
|
71
|
+
retryOnDimensions.includes(d)
|
|
72
|
+
);
|
|
73
|
+
if (!shouldRetryDimension) {
|
|
74
|
+
if (resultOptions.reason) {
|
|
75
|
+
return { retry: false, reason: 'no matching dimensions to retry' };
|
|
76
|
+
}
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (resultOptions.reason) {
|
|
82
|
+
return { retry: true, reason: 'quality failure' };
|
|
83
|
+
}
|
|
84
|
+
return true;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Select a better model for retry
|
|
89
|
+
* @param {string} currentModel - Current model
|
|
90
|
+
* @param {string[]} models - Available models (ordered by quality)
|
|
91
|
+
* @param {Object} options - Selection options
|
|
92
|
+
* @returns {string|null|Object} Better model or null
|
|
93
|
+
*/
|
|
94
|
+
function selectBetterModel(currentModel, models, options = {}) {
|
|
95
|
+
const {
|
|
96
|
+
remainingBudget = Infinity,
|
|
97
|
+
costs = {},
|
|
98
|
+
requiredCapability = null,
|
|
99
|
+
capabilities = {},
|
|
100
|
+
details = false,
|
|
101
|
+
} = options;
|
|
102
|
+
|
|
103
|
+
const currentIndex = models.indexOf(currentModel);
|
|
104
|
+
|
|
105
|
+
// Find next better model
|
|
106
|
+
for (let i = currentIndex + 1; i < models.length; i++) {
|
|
107
|
+
const candidate = models[i];
|
|
108
|
+
|
|
109
|
+
// Check budget
|
|
110
|
+
const cost = costs[candidate] || 0;
|
|
111
|
+
if (cost > remainingBudget) {
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Check capabilities
|
|
116
|
+
if (requiredCapability && capabilities[candidate]) {
|
|
117
|
+
if (!capabilities[candidate].includes(requiredCapability)) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (details) {
|
|
123
|
+
return {
|
|
124
|
+
model: candidate,
|
|
125
|
+
tier: i,
|
|
126
|
+
cost: costs[candidate],
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
return candidate;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (details) {
|
|
133
|
+
return { model: null, tier: null, reason: 'no better model available' };
|
|
134
|
+
}
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Build retry prompt with failure context
|
|
140
|
+
* @param {string} originalPrompt - Original prompt
|
|
141
|
+
* @param {Object} context - Failure context
|
|
142
|
+
* @returns {string} Enhanced retry prompt
|
|
143
|
+
*/
|
|
144
|
+
function buildRetryPrompt(originalPrompt, context) {
|
|
145
|
+
const { failedDimensions = [], failures = {}, suggestions = [] } = context;
|
|
146
|
+
|
|
147
|
+
let prompt = originalPrompt;
|
|
148
|
+
|
|
149
|
+
if (failedDimensions.length > 0) {
|
|
150
|
+
prompt += '\n\n---\nPrevious attempt failed quality checks. Please improve the following:';
|
|
151
|
+
|
|
152
|
+
for (const dim of failedDimensions) {
|
|
153
|
+
const failure = failures[dim];
|
|
154
|
+
if (failure) {
|
|
155
|
+
prompt += `\n- ${dim}: score ${failure.score}/${failure.threshold}`;
|
|
156
|
+
if (failure.reason) {
|
|
157
|
+
prompt += ` (${failure.reason})`;
|
|
158
|
+
}
|
|
159
|
+
} else {
|
|
160
|
+
prompt += `\n- ${dim}`;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (suggestions.length > 0) {
|
|
166
|
+
prompt += '\n\nSuggestions to fix:';
|
|
167
|
+
for (const suggestion of suggestions) {
|
|
168
|
+
prompt += `\n- ${suggestion}`;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return prompt;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Track cost of a retry attempt
|
|
177
|
+
* @param {Object} manager - Retry manager
|
|
178
|
+
* @param {number} cost - Cost of attempt
|
|
179
|
+
* @param {Object} options - Tracking options
|
|
180
|
+
*/
|
|
181
|
+
function trackRetryCost(manager, cost, options = {}) {
|
|
182
|
+
const { attempt, model } = options;
|
|
183
|
+
|
|
184
|
+
manager.totalCost = (manager.totalCost || 0) + cost;
|
|
185
|
+
|
|
186
|
+
if (attempt !== undefined) {
|
|
187
|
+
if (!manager.costPerAttempt) {
|
|
188
|
+
manager.costPerAttempt = {};
|
|
189
|
+
}
|
|
190
|
+
manager.costPerAttempt[attempt] = cost;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (model && attempt !== undefined) {
|
|
194
|
+
if (!manager.modelPerAttempt) {
|
|
195
|
+
manager.modelPerAttempt = {};
|
|
196
|
+
}
|
|
197
|
+
manager.modelPerAttempt[attempt] = model;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Get retry history
|
|
203
|
+
* @param {Object} manager - Retry manager
|
|
204
|
+
* @param {Object} options - History options
|
|
205
|
+
* @returns {Array|Object} History records
|
|
206
|
+
*/
|
|
207
|
+
function getRetryHistory(manager, options = {}) {
|
|
208
|
+
const { escalationPath = false, improvements = false } = options;
|
|
209
|
+
|
|
210
|
+
const history = manager.history || [];
|
|
211
|
+
|
|
212
|
+
if (!escalationPath && !improvements) {
|
|
213
|
+
return history;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const result = {
|
|
217
|
+
attempts: history,
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
if (escalationPath) {
|
|
221
|
+
result.escalationPath = history.map((h) => h.model).filter(Boolean);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
if (improvements) {
|
|
225
|
+
result.improvements = {};
|
|
226
|
+
for (let i = 1; i < history.length; i++) {
|
|
227
|
+
const prev = history[i - 1].composite || 0;
|
|
228
|
+
const curr = history[i].composite || 0;
|
|
229
|
+
result.improvements[i] = curr - prev;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return result;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Execute retry loop with feedback
|
|
238
|
+
* @param {string} prompt - Original prompt
|
|
239
|
+
* @param {Object} options - Execution options
|
|
240
|
+
* @returns {Promise<Object>} Final result
|
|
241
|
+
*/
|
|
242
|
+
async function retryWithFeedback(prompt, options = {}) {
|
|
243
|
+
const {
|
|
244
|
+
execute,
|
|
245
|
+
evaluate,
|
|
246
|
+
maxRetries = 3,
|
|
247
|
+
models = ['basic', 'advanced', 'premium'],
|
|
248
|
+
initialModel = 'basic',
|
|
249
|
+
budgetLimit = Infinity,
|
|
250
|
+
} = options;
|
|
251
|
+
|
|
252
|
+
const history = [];
|
|
253
|
+
let currentModel = initialModel;
|
|
254
|
+
let currentPrompt = prompt;
|
|
255
|
+
let totalCost = 0;
|
|
256
|
+
let attempt = 0;
|
|
257
|
+
|
|
258
|
+
while (attempt < maxRetries) {
|
|
259
|
+
attempt++;
|
|
260
|
+
|
|
261
|
+
// Execute with current model and prompt
|
|
262
|
+
const output = await execute(currentPrompt, currentModel);
|
|
263
|
+
|
|
264
|
+
// Evaluate the output
|
|
265
|
+
const evaluation = await evaluate(output);
|
|
266
|
+
|
|
267
|
+
// Record in history
|
|
268
|
+
history.push({
|
|
269
|
+
attempt,
|
|
270
|
+
model: currentModel,
|
|
271
|
+
...evaluation,
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
// Check if passed
|
|
275
|
+
if (evaluation.pass) {
|
|
276
|
+
return {
|
|
277
|
+
pass: true,
|
|
278
|
+
output,
|
|
279
|
+
...evaluation,
|
|
280
|
+
history,
|
|
281
|
+
attempts: attempt,
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Check if should retry
|
|
286
|
+
if (attempt >= maxRetries) {
|
|
287
|
+
break;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Try to escalate model
|
|
291
|
+
const betterModel = selectBetterModel(currentModel, models, {
|
|
292
|
+
remainingBudget: budgetLimit - totalCost,
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
if (betterModel) {
|
|
296
|
+
currentModel = betterModel;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Build retry prompt with feedback
|
|
300
|
+
currentPrompt = buildRetryPrompt(prompt, {
|
|
301
|
+
failedDimensions: evaluation.failed || [],
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Return final result (failed)
|
|
306
|
+
const lastEval = history[history.length - 1];
|
|
307
|
+
return {
|
|
308
|
+
pass: false,
|
|
309
|
+
...lastEval,
|
|
310
|
+
history,
|
|
311
|
+
attempts: attempt,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
module.exports = {
|
|
316
|
+
createRetryManager,
|
|
317
|
+
shouldRetry,
|
|
318
|
+
selectBetterModel,
|
|
319
|
+
buildRetryPrompt,
|
|
320
|
+
trackRetryCost,
|
|
321
|
+
getRetryHistory,
|
|
322
|
+
retryWithFeedback,
|
|
323
|
+
};
|