@keel_flow/runtime 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +15 -0
- package/dist/agent-runtime.d.ts +8 -0
- package/dist/agent-runtime.d.ts.map +1 -0
- package/dist/agent-runtime.js +9 -0
- package/dist/agent-runtime.js.map +1 -0
- package/dist/client.d.ts +15 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +16 -0
- package/dist/client.js.map +1 -0
- package/dist/cost.d.ts +3 -0
- package/dist/cost.d.ts.map +1 -0
- package/dist/cost.js +52 -0
- package/dist/cost.js.map +1 -0
- package/dist/dispatch-subagent.d.ts +24 -0
- package/dist/dispatch-subagent.d.ts.map +1 -0
- package/dist/dispatch-subagent.js +90 -0
- package/dist/dispatch-subagent.js.map +1 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/provider.d.ts +55 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/provider.js +18 -0
- package/dist/provider.js.map +1 -0
- package/dist/providers/anthropic.d.ts +9 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +116 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/claude-bridge.d.ts +20 -0
- package/dist/providers/claude-bridge.d.ts.map +1 -0
- package/dist/providers/claude-bridge.js +109 -0
- package/dist/providers/claude-bridge.js.map +1 -0
- package/dist/providers/openai-compatible.d.ts +9 -0
- package/dist/providers/openai-compatible.d.ts.map +1 -0
- package/dist/providers/openai-compatible.js +171 -0
- package/dist/providers/openai-compatible.js.map +1 -0
- package/dist/run-agent.d.ts +43 -0
- package/dist/run-agent.d.ts.map +1 -0
- package/dist/run-agent.js +318 -0
- package/dist/run-agent.js.map +1 -0
- package/dist/spec-checker/__fixtures__/mock-provider.d.ts +20 -0
- package/dist/spec-checker/__fixtures__/mock-provider.d.ts.map +1 -0
- package/dist/spec-checker/__fixtures__/mock-provider.js +34 -0
- package/dist/spec-checker/__fixtures__/mock-provider.js.map +1 -0
- package/dist/spec-checker/adversarial.d.ts +15 -0
- package/dist/spec-checker/adversarial.d.ts.map +1 -0
- package/dist/spec-checker/adversarial.js +77 -0
- package/dist/spec-checker/adversarial.js.map +1 -0
- package/dist/spec-checker/aggregate.d.ts +17 -0
- package/dist/spec-checker/aggregate.d.ts.map +1 -0
- package/dist/spec-checker/aggregate.js +25 -0
- package/dist/spec-checker/aggregate.js.map +1 -0
- package/dist/spec-checker/bias.d.ts +16 -0
- package/dist/spec-checker/bias.d.ts.map +1 -0
- package/dist/spec-checker/bias.js +26 -0
- package/dist/spec-checker/bias.js.map +1 -0
- package/dist/spec-checker/bidirectional.d.ts +21 -0
- package/dist/spec-checker/bidirectional.d.ts.map +1 -0
- package/dist/spec-checker/bidirectional.js +97 -0
- package/dist/spec-checker/bidirectional.js.map +1 -0
- package/dist/spec-checker/calibration.d.ts +15 -0
- package/dist/spec-checker/calibration.d.ts.map +1 -0
- package/dist/spec-checker/calibration.js +58 -0
- package/dist/spec-checker/calibration.js.map +1 -0
- package/dist/spec-checker/claims.d.ts +26 -0
- package/dist/spec-checker/claims.d.ts.map +1 -0
- package/dist/spec-checker/claims.js +104 -0
- package/dist/spec-checker/claims.js.map +1 -0
- package/dist/spec-checker/index.d.ts +40 -0
- package/dist/spec-checker/index.d.ts.map +1 -0
- package/dist/spec-checker/index.js +308 -0
- package/dist/spec-checker/index.js.map +1 -0
- package/dist/spec-checker/prompts.d.ts +11 -0
- package/dist/spec-checker/prompts.d.ts.map +1 -0
- package/dist/spec-checker/prompts.js +11 -0
- package/dist/spec-checker/prompts.js.map +1 -0
- package/dist/spec-checker/rubric.d.ts +14 -0
- package/dist/spec-checker/rubric.d.ts.map +1 -0
- package/dist/spec-checker/rubric.js +68 -0
- package/dist/spec-checker/rubric.js.map +1 -0
- package/dist/spec-checker/score.d.ts +16 -0
- package/dist/spec-checker/score.d.ts.map +1 -0
- package/dist/spec-checker/score.js +45 -0
- package/dist/spec-checker/score.js.map +1 -0
- package/dist/spec-checker/shim.d.ts +4 -0
- package/dist/spec-checker/shim.d.ts.map +1 -0
- package/dist/spec-checker/shim.js +69 -0
- package/dist/spec-checker/shim.js.map +1 -0
- package/dist/spec-checker.d.ts +4 -0
- package/dist/spec-checker.d.ts.map +1 -0
- package/dist/spec-checker.js +2 -0
- package/dist/spec-checker.js.map +1 -0
- package/dist/tools.d.ts +6 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +18 -0
- package/dist/tools.js.map +1 -0
- package/package.json +45 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import { createAnthropicClient, DEFAULT_MODEL } from "../client.js";
|
|
3
|
+
import { buildAnthropicShim } from "./shim.js";
|
|
4
|
+
import { decomposeSpec } from "./rubric.js";
|
|
5
|
+
import { judgeClaims, judgeOneClaim } from "./claims.js";
|
|
6
|
+
import { bidirectionalCheck } from "./bidirectional.js";
|
|
7
|
+
import { adversarialPass } from "./adversarial.js";
|
|
8
|
+
import { runWithPositionSwap } from "./bias.js";
|
|
9
|
+
import { aggregateVerdict, CONFIDENCE_FLOOR } from "./aggregate.js";
|
|
10
|
+
function resolveMode(opts) {
|
|
11
|
+
if (opts?.mode !== undefined) {
|
|
12
|
+
return opts.mode;
|
|
13
|
+
}
|
|
14
|
+
const fromEnv = process.env["KEEL_SPEC_MODE"];
|
|
15
|
+
if (fromEnv === "fast" || fromEnv === "balanced" || fromEnv === "strict") {
|
|
16
|
+
return fromEnv;
|
|
17
|
+
}
|
|
18
|
+
return "balanced";
|
|
19
|
+
}
|
|
20
|
+
function pendingResult(mode) {
|
|
21
|
+
return {
|
|
22
|
+
mode,
|
|
23
|
+
verdict: "pending",
|
|
24
|
+
rubric: null,
|
|
25
|
+
claims: [],
|
|
26
|
+
costUsd: 0,
|
|
27
|
+
latencyMs: 0,
|
|
28
|
+
passed: true,
|
|
29
|
+
violations: [],
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
function claimToViolation(v, claimText) {
|
|
33
|
+
if (v.verdict === "pass" || v.verdict === "na")
|
|
34
|
+
return null;
|
|
35
|
+
const lowConfidenceFail = v.verdict === "fail" && v.confidence < CONFIDENCE_FLOOR;
|
|
36
|
+
const severity = v.verdict === "fail" && !lowConfidenceFail ? "critical" : "warning";
|
|
37
|
+
const evidence = v.evidence ? ` (${v.evidence})` : "";
|
|
38
|
+
const escalation = lowConfidenceFail ? " [escalate: low-confidence]" : "";
|
|
39
|
+
return {
|
|
40
|
+
principleId: "spec-compliance",
|
|
41
|
+
severity,
|
|
42
|
+
message: `[${v.verdict}] ${claimText}${evidence}${escalation}`,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export function toGateCheck(r) {
|
|
46
|
+
if (r.verdict === "pending") {
|
|
47
|
+
return { passed: true, violations: [], pending: true };
|
|
48
|
+
}
|
|
49
|
+
const violations = [];
|
|
50
|
+
const claimText = new Map();
|
|
51
|
+
if (r.rubric) {
|
|
52
|
+
for (const c of r.rubric.claims)
|
|
53
|
+
claimText.set(c.id, c.text);
|
|
54
|
+
}
|
|
55
|
+
for (const v of r.claims) {
|
|
56
|
+
const text = claimText.get(v.claimId) ?? v.claimId;
|
|
57
|
+
const vio = claimToViolation(v, text);
|
|
58
|
+
if (vio)
|
|
59
|
+
violations.push(vio);
|
|
60
|
+
}
|
|
61
|
+
if (r.bidirectional?.divergent && r.verdict !== "fail") {
|
|
62
|
+
violations.push({
|
|
63
|
+
principleId: "spec-compliance",
|
|
64
|
+
severity: "warning",
|
|
65
|
+
message: `bidirectional divergence: ${r.bidirectional.summary} (sim=${r.bidirectional.similarity.toFixed(2)})`,
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
if (r.adversarial && r.adversarial.concerns.length > 0) {
|
|
69
|
+
for (const c of r.adversarial.concerns) {
|
|
70
|
+
violations.push({
|
|
71
|
+
principleId: "spec-compliance",
|
|
72
|
+
severity: "warning",
|
|
73
|
+
message: `adversarial concern: ${c}`,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return { passed: r.verdict === "pass", violations };
|
|
78
|
+
}
|
|
79
|
+
export function createSpecChecker(opts) {
|
|
80
|
+
return async function checkSpec(args) {
|
|
81
|
+
const mode = resolveMode(opts);
|
|
82
|
+
if (!args.specSummary) {
|
|
83
|
+
return pendingResult(mode);
|
|
84
|
+
}
|
|
85
|
+
let provider;
|
|
86
|
+
let model;
|
|
87
|
+
if (opts?.provider) {
|
|
88
|
+
provider = opts.provider;
|
|
89
|
+
model = opts.model ?? provider.defaultModel;
|
|
90
|
+
}
|
|
91
|
+
else if (opts?.client) {
|
|
92
|
+
model = opts.model ?? DEFAULT_MODEL;
|
|
93
|
+
provider = buildAnthropicShim(opts.client, model);
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
const ac = createAnthropicClient(opts?.model !== undefined ? { model: opts.model } : undefined);
|
|
97
|
+
model = ac.model;
|
|
98
|
+
provider = buildAnthropicShim(ac.client, ac.model);
|
|
99
|
+
}
|
|
100
|
+
const telemetry = opts?.telemetry;
|
|
101
|
+
const startedAt = Date.now();
|
|
102
|
+
let costUsd = 0;
|
|
103
|
+
const rubricEventId = randomUUID();
|
|
104
|
+
const sessionPlaceholder = "";
|
|
105
|
+
const { rubric, stats: rubricStats } = await decomposeSpec(provider, args.specSummary, model);
|
|
106
|
+
costUsd += rubricStats.costUsd;
|
|
107
|
+
if (telemetry) {
|
|
108
|
+
telemetry.emit({
|
|
109
|
+
sessionId: sessionPlaceholder,
|
|
110
|
+
parentEventId: null,
|
|
111
|
+
workspaceId: null,
|
|
112
|
+
triggeredBy: { kind: "user" },
|
|
113
|
+
kind: "spec-judge.rubric",
|
|
114
|
+
payload: {
|
|
115
|
+
claims: rubric.claims,
|
|
116
|
+
specSummary: args.specSummary,
|
|
117
|
+
eventId: rubricEventId,
|
|
118
|
+
},
|
|
119
|
+
model,
|
|
120
|
+
inputTokens: rubricStats.inputTokens,
|
|
121
|
+
outputTokens: rubricStats.outputTokens,
|
|
122
|
+
costUsd: rubricStats.costUsd,
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
const claimVerdicts = [];
|
|
126
|
+
if (mode === "strict") {
|
|
127
|
+
for (const claim of rubric.claims) {
|
|
128
|
+
const primary = await judgeOneClaim(provider, model, args.specSummary, claim, args.diff);
|
|
129
|
+
costUsd += primary.stats.costUsd;
|
|
130
|
+
claimVerdicts.push(primary.verdict);
|
|
131
|
+
if (telemetry) {
|
|
132
|
+
telemetry.emit({
|
|
133
|
+
sessionId: sessionPlaceholder,
|
|
134
|
+
parentEventId: rubricEventId,
|
|
135
|
+
workspaceId: null,
|
|
136
|
+
triggeredBy: { kind: "user" },
|
|
137
|
+
kind: "spec-judge.claim",
|
|
138
|
+
payload: {
|
|
139
|
+
claimId: primary.verdict.claimId,
|
|
140
|
+
verdict: primary.verdict.verdict,
|
|
141
|
+
confidence: primary.verdict.confidence,
|
|
142
|
+
swapped: false,
|
|
143
|
+
},
|
|
144
|
+
model,
|
|
145
|
+
inputTokens: primary.stats.inputTokens,
|
|
146
|
+
outputTokens: primary.stats.outputTokens,
|
|
147
|
+
costUsd: primary.stats.costUsd,
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
else {
|
|
153
|
+
const judged = await judgeClaims(provider, model, args.specSummary, rubric, args.diff, { concurrency: opts?.claimConcurrency ?? 4 });
|
|
154
|
+
costUsd += judged.stats.costUsd;
|
|
155
|
+
for (const v of judged.verdicts)
|
|
156
|
+
claimVerdicts.push(v);
|
|
157
|
+
if (telemetry) {
|
|
158
|
+
for (const v of judged.verdicts) {
|
|
159
|
+
telemetry.emit({
|
|
160
|
+
sessionId: sessionPlaceholder,
|
|
161
|
+
parentEventId: rubricEventId,
|
|
162
|
+
workspaceId: null,
|
|
163
|
+
triggeredBy: { kind: "user" },
|
|
164
|
+
kind: "spec-judge.claim",
|
|
165
|
+
payload: {
|
|
166
|
+
claimId: v.claimId,
|
|
167
|
+
verdict: v.verdict,
|
|
168
|
+
confidence: v.confidence,
|
|
169
|
+
swapped: false,
|
|
170
|
+
},
|
|
171
|
+
model,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
let bidirectional;
|
|
177
|
+
let adversarial;
|
|
178
|
+
let positionBias;
|
|
179
|
+
if (mode === "balanced" || mode === "strict") {
|
|
180
|
+
const [bd, ad] = await Promise.all([
|
|
181
|
+
bidirectionalCheck(provider, model, args.specSummary, args.diff, {
|
|
182
|
+
...(opts?.embed ? { embed: opts.embed } : {}),
|
|
183
|
+
...(opts?.bidirectionalThreshold !== undefined
|
|
184
|
+
? { threshold: opts.bidirectionalThreshold }
|
|
185
|
+
: {}),
|
|
186
|
+
}),
|
|
187
|
+
adversarialPass(provider, model, args.specSummary, args.diff, rubric),
|
|
188
|
+
]);
|
|
189
|
+
costUsd += bd.stats.costUsd;
|
|
190
|
+
costUsd += ad.stats.costUsd;
|
|
191
|
+
bidirectional = {
|
|
192
|
+
summary: bd.summary,
|
|
193
|
+
similarity: bd.similarity,
|
|
194
|
+
divergent: bd.divergent,
|
|
195
|
+
};
|
|
196
|
+
adversarial = {
|
|
197
|
+
concerns: ad.concerns,
|
|
198
|
+
raisedPartial: ad.raisedPartial,
|
|
199
|
+
};
|
|
200
|
+
if (telemetry) {
|
|
201
|
+
telemetry.emit({
|
|
202
|
+
sessionId: sessionPlaceholder,
|
|
203
|
+
parentEventId: rubricEventId,
|
|
204
|
+
workspaceId: null,
|
|
205
|
+
triggeredBy: { kind: "user" },
|
|
206
|
+
kind: "spec-judge.bidirectional",
|
|
207
|
+
payload: {
|
|
208
|
+
similarity: bd.similarity,
|
|
209
|
+
divergent: bd.divergent,
|
|
210
|
+
},
|
|
211
|
+
model,
|
|
212
|
+
inputTokens: bd.stats.inputTokens,
|
|
213
|
+
outputTokens: bd.stats.outputTokens,
|
|
214
|
+
costUsd: bd.stats.costUsd,
|
|
215
|
+
});
|
|
216
|
+
telemetry.emit({
|
|
217
|
+
sessionId: sessionPlaceholder,
|
|
218
|
+
parentEventId: rubricEventId,
|
|
219
|
+
workspaceId: null,
|
|
220
|
+
triggeredBy: { kind: "user" },
|
|
221
|
+
kind: "spec-judge.adversarial",
|
|
222
|
+
payload: {
|
|
223
|
+
concernsCount: ad.concerns.length,
|
|
224
|
+
raisedPartial: ad.raisedPartial,
|
|
225
|
+
},
|
|
226
|
+
model,
|
|
227
|
+
inputTokens: ad.stats.inputTokens,
|
|
228
|
+
outputTokens: ad.stats.outputTokens,
|
|
229
|
+
costUsd: ad.stats.costUsd,
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
if (mode === "strict") {
|
|
234
|
+
let swapsRun = 0;
|
|
235
|
+
let disagreements = 0;
|
|
236
|
+
const claimsById = new Map(rubric.claims.map((c) => [c.id, c]));
|
|
237
|
+
for (let i = 0; i < claimVerdicts.length; i++) {
|
|
238
|
+
const v = claimVerdicts[i];
|
|
239
|
+
if (!v)
|
|
240
|
+
continue;
|
|
241
|
+
const claim = claimsById.get(v.claimId);
|
|
242
|
+
if (!claim)
|
|
243
|
+
continue;
|
|
244
|
+
swapsRun++;
|
|
245
|
+
const swap = await runWithPositionSwap(provider, model, args.specSummary, claim, args.diff, v);
|
|
246
|
+
costUsd += swap.stats.costUsd;
|
|
247
|
+
if (swap.disagreed)
|
|
248
|
+
disagreements++;
|
|
249
|
+
claimVerdicts[i] = swap.finalVerdict;
|
|
250
|
+
if (telemetry) {
|
|
251
|
+
telemetry.emit({
|
|
252
|
+
sessionId: sessionPlaceholder,
|
|
253
|
+
parentEventId: rubricEventId,
|
|
254
|
+
workspaceId: null,
|
|
255
|
+
triggeredBy: { kind: "user" },
|
|
256
|
+
kind: "spec-judge.claim",
|
|
257
|
+
payload: {
|
|
258
|
+
claimId: swap.swapped.claimId,
|
|
259
|
+
verdict: swap.swapped.verdict,
|
|
260
|
+
confidence: swap.swapped.confidence,
|
|
261
|
+
swapped: true,
|
|
262
|
+
},
|
|
263
|
+
model,
|
|
264
|
+
inputTokens: swap.stats.inputTokens,
|
|
265
|
+
outputTokens: swap.stats.outputTokens,
|
|
266
|
+
costUsd: swap.stats.costUsd,
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
positionBias = { swapsRun, disagreements };
|
|
271
|
+
}
|
|
272
|
+
let verdict = aggregateVerdict({
|
|
273
|
+
rubric,
|
|
274
|
+
claims: claimVerdicts,
|
|
275
|
+
...(bidirectional ? { bidirectional: { divergent: bidirectional.divergent } } : {}),
|
|
276
|
+
...(adversarial ? { adversarial: { raisedPartial: adversarial.raisedPartial } } : {}),
|
|
277
|
+
...(positionBias ? { positionBias: { disagreements: positionBias.disagreements } } : {}),
|
|
278
|
+
});
|
|
279
|
+
if (adversarial && adversarial.raisedPartial && verdict === "pass") {
|
|
280
|
+
verdict = "partial";
|
|
281
|
+
}
|
|
282
|
+
const result = {
|
|
283
|
+
mode,
|
|
284
|
+
verdict,
|
|
285
|
+
rubric,
|
|
286
|
+
claims: claimVerdicts,
|
|
287
|
+
...(bidirectional ? { bidirectional } : {}),
|
|
288
|
+
...(adversarial ? { adversarial } : {}),
|
|
289
|
+
...(positionBias ? { positionBias } : {}),
|
|
290
|
+
costUsd,
|
|
291
|
+
latencyMs: Date.now() - startedAt,
|
|
292
|
+
};
|
|
293
|
+
const gate = toGateCheck(result);
|
|
294
|
+
return {
|
|
295
|
+
...result,
|
|
296
|
+
passed: gate.passed,
|
|
297
|
+
violations: gate.violations,
|
|
298
|
+
};
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
export { decomposeSpec } from "./rubric.js";
|
|
302
|
+
export { judgeClaims, judgeOneClaim, summarizeDiff } from "./claims.js";
|
|
303
|
+
export { bidirectionalCheck, jaccard, cosine } from "./bidirectional.js";
|
|
304
|
+
export { adversarialPass } from "./adversarial.js";
|
|
305
|
+
export { runWithPositionSwap } from "./bias.js";
|
|
306
|
+
export { aggregateVerdict } from "./aggregate.js";
|
|
307
|
+
export { loadCalibration } from "./calibration.js";
|
|
308
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/spec-checker/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAWzC,OAAO,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAEpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,mBAAmB,EAAE,MAAM,WAAW,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AA4BpE,SAAS,WAAW,CAAC,IAAyB;IAC5C,IAAI,IAAI,EAAE,IAAI,KAAK,SAAS,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC,IAAI,CAAC;IACnB,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC9C,IAAI,OAAO,KAAK,MAAM,IAAI,OAAO,KAAK,UAAU,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzE,OAAO,OAAO,CAAC;IACjB,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,SAAS,aAAa,CAAC,IAAmB;IACxC,OAAO;QACL,IAAI;QACJ,OAAO,EAAE,SAAS;QAClB,MAAM,EAAE,IAAI;QACZ,MAAM,EAAE,EAAE;QACV,OAAO,EAAE,CAAC;QACV,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,IAAI;QACZ,UAAU,EAAE,EAAE;KACf,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CACvB,CAAmB,EACnB,SAAiB;IAEjB,IAAI,CAAC,CAAC,OAAO,KAAK,MAAM,IAAI,CAAC,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IAC5D,MAAM,iBAAiB,GACrB,CAAC,CAAC,OAAO,KAAK,MAAM,IAAI,CAAC,CAAC,UAAU,GAAG,gBAAgB,CAAC;IAC1D,MAAM,QAAQ,GACZ,CAAC,CAAC,OAAO,KAAK,MAAM,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC;IACtE,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,MAAM,UAAU,GAAG,iBAAiB,CAAC,CAAC,CAAC,6BAA6B,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1E,OAAO;QACL,WAAW,EAAE,iBAAiB;QAC9B,QAAQ;QACR,OAAO,EAAE,IAAI,CAAC,CAAC,OAAO,KAAK,SAAS,GAAG,QAAQ,GAAG,UAAU,EAAE;KAC/D,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,CAAkB;IAK5C,IAAI,CAAC,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QAC5B,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IACzD,CAAC;IACD,MAAM,UAAU,GAAgB,EAAE,CAAC;IACnC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC5C,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;QACb,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM;YAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;IAC/D,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC;QACnD,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;QACtC,IAAI,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IACD,IAAI,CAAC,CAAC,aAAa,EAAE,SAAS,IAAI,CAAC,CAAC,OAAO,KAAK,MAAM,EAAE,CAAC;QACvD,UAAU,CAAC,IAAI,CAAC;YACd,WAAW,EAAE,iBAAiB;YAC9B,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,6BAA6B,CAAC,CAAC,aAAa,CAAC,OAAO,SAAS,CAAC,CAAC,aAAa,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;SAC/G,CAAC,CAAC;IACL,CAAC;IACD,IAAI,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvD,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,WAAW,CAAC,QAAQ,EAAE,CAAC;YACvC,UAAU,CAAC,IAAI,CAAC;gBACd,WAAW,EAAE,iBAAiB;gBAC9B,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,wBAAwB,CAAC,EAAE;aACrC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,OAAO,KAAK,MAAM,EAAE,UAAU,EAAE,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,IAAyB;IACzD,OAAO,KAAK,UAAU,SAAS,CAC7B,IAAmB;QAEnB,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;QAE/B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;QAED,IAAI,QAAuB,CAAC;QAC5B,IAAI,KAAa,CAAC;QAClB,IAAI,IAAI,EAAE,QAAQ,EAAE,CAAC;YACnB,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;YACzB,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,QAAQ,CAAC,YAAY,CAAC;QAC9C,CAAC;aAAM,IAAI,IAAI,EAAE,MAAM,EAAE,CAAC;YACxB,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,aAAa,CAAC;YACpC,QAAQ,GAAG,kBAAkB,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QACpD,CAAC;aAAM,CAAC;YACN,MAAM,EAAE,GAAG,qBAAqB,CAC9B,IAAI,EAAE,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,SAAS,CAC9D,CAAC;YACF,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC;YACjB,QAAQ,GAAG,kBAAkB,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,EAAE,SAAS,CAAC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,MAAM,aAAa,GAAG,UAAU,EAAE,CAAC;QACnC,MAAM,kBAAkB,GAAG,EAAE,CAAC;QAE9B,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,MAAM,aAAa,CACxD,QAAQ,EACR,IAAI,CAAC,WAAW,EAChB,KAAK,CACN,CAAC;QACF,OAAO,IAAI,WAAW,CAAC,OAAO,CAAC;QAE/B,IAAI,SAAS,EAAE,CAAC;YACd,SAAS,CAAC,IAAI,CAAC;gBACb,SAAS,EAAE,kBAAkB;gBAC7B,aAAa,EAAE,IAAI;gBACnB,WAAW,EAAE,IAAI;gBACjB,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;gBAC7B,IAAI,EAAE,mBAAmB;gBACzB,OAAO,EAAE;oBACP,MAAM,EAAE,MAAM,CAAC,MAAM;oBACrB,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,OAAO,EAAE,aAAa;iBACvB;gBACD,KAAK;gBACL,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,YAAY,EAAE,WAAW,CAAC,YAAY;gBACtC,OAAO,EAAE,WAAW,CAAC,OAAO;aAC7B,CAAC,CAAC;QACL,CAAC;QAED,MAAM,aAAa,GAAuB,EAAE,CAAC;QAE7C,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;gBAClC,MAAM,OAAO,GAAG,MAAM,aAAa,CACjC,QAAQ,EACR,KAAK,EACL,IAAI,CAAC,WAAW,EAChB,KAAK,EACL,IAAI,CAAC,IAAI,CACV,CAAC;gBACF,OAAO,IAAI,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC;gBACjC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;gBACpC,IAAI,SAAS,EAAE,CAAC;oBACd,SAAS,CAAC,IAAI,CAAC;wBACb,SAAS,EAAE,kBAAkB;wBAC7B,aAAa,EAAE,aAAa;wBAC5B,WAAW,EAAE,IAAI;wBACjB,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;wBAC7B,IAAI,EAAE,kBAAkB;wBACxB,OAAO,EAAE;4BACP,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO;4BAChC,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO;4BAChC,UAAU,EAAE,OAAO,CAAC,OAAO,CAAC,UAAU;4BACtC,OAAO,EAAE,KAAK;yBACf;wBACD,KAAK;wBACL,WAAW,EAAE,OAAO,CAAC,KAAK,CAAC,WAAW;wBACtC,YAAY,EAAE,OAAO,CAAC,KAAK,CAAC,YAAY;wBACxC,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,OAAO;qBAC/B,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,MAAM,WAAW,CAC9B,QAAQ,EACR,KAAK,EACL,IAAI,CAAC,WAAW,EAChB,MAAM,EACN,IAAI,CAAC,IAAI,EACT,EAAE,WAAW,EAAE,IAAI,EAAE,gBAAgB,IAAI,CAAC,EAAE,CAC7C,CAAC;YACF,OAAO,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC;YAChC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ;gBAAE,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACvD,IAAI,SAAS,EAAE,CAAC;gBACd,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;oBAChC,SAAS,CAAC,IAAI,CAAC;wBACb,SAAS,EAAE,kBAAkB;wBAC7B,aAAa,EAAE,aAAa;wBAC5B,WAAW,EAAE,IAAI;wBACjB,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;wBAC7B,IAAI,EAAE,kBAAkB;wBACxB,OAAO,EAAE;4BACP,OAAO,EAAE,CAAC,CAAC,OAAO;4BAClB,OAAO,EAAE,CAAC,CAAC,OAAO;4BAClB,UAAU,EAAE,CAAC,CAAC,UAAU;4BACxB,OAAO,EAAE,KAAK;yBACf;wBACD,KAAK;qBACN,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,aAA2D,CAAC;QAChE,IAAI,WAAuD,CAAC;QAC5D,IAAI,YAAyD,CAAC;QAE9D,IAAI,IAAI,KAAK,UAAU,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC7C,MAAM,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;gBACjC,kBAAkB,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,IAAI,EAAE;oBAC/D,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7C,GAAG,CAAC,IAAI,EAAE,sBAAsB,KAAK,SAAS;wBAC5C,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,sBAAsB,EAAE;wBAC5C,CAAC,CAAC,EAAE,CAAC;iBACR,CAAC;gBACF,eAAe,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC;aACtE,CAAC,CAAC;YACH,OAAO,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC;YAC5B,OAAO,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC;YAC5B,aAAa,GAAG;gBACd,OAAO,EAAE,EAAE,CAAC,OAAO;gBACnB,UAAU,EAAE,EAAE,CAAC,UAAU;gBACzB,SAAS,EAAE,EAAE,CAAC,SAAS;aACxB,CAAC;YACF,WAAW,GAAG;gBACZ,QAAQ,EAAE,EAAE,CAAC,QAAQ;gBACrB,aAAa,EAAE,EAAE,CAAC,aAAa;aAChC,CAAC;YAEF,IAAI,SAAS,EAAE,CAAC;gBACd,SAAS,CAAC,IAAI,CAAC;oBACb,SAAS,EAAE,kBAAkB;oBAC7B,aAAa,EAAE,aAAa;oBAC5B,WAAW,EAAE,IAAI;oBACjB,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;oBAC7B,IAAI,EAAE,0BAA0B;oBAChC,OAAO,EAAE;wBACP,UAAU,EAAE,EAAE,CAAC,UAAU;wBACzB,SAAS,EAAE,EAAE,CAAC,SAAS;qBACxB;oBACD,KAAK;oBACL,WAAW,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW;oBACjC,YAAY,EAAE,EAAE,CAAC,KAAK,CAAC,YAAY;oBACnC,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO;iBAC1B,CAAC,CAAC;gBACH,SAAS,CAAC,IAAI,CAAC;oBACb,SAAS,EAAE,kBAAkB;oBAC7B,aAAa,EAAE,aAAa;oBAC5B,WAAW,EAAE,IAAI;oBACjB,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;oBAC7B,IAAI,EAAE,wBAAwB;oBAC9B,OAAO,EAAE;wBACP,aAAa,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;wBACjC,aAAa,EAAE,EAAE,CAAC,aAAa;qBAChC;oBACD,KAAK;oBACL,WAAW,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW;oBACjC,YAAY,EAAE,EAAE,CAAC,KAAK,CAAC,YAAY;oBACnC,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO;iBAC1B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtB,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,IAAI,aAAa,GAAG,CAAC,CAAC;YACtB,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YAChE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,MAAM,CAAC,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;gBAC3B,IAAI,CAAC,CAAC;oBAAE,SAAS;gBACjB,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxC,IAAI,CAAC,KAAK;oBAAE,SAAS;gBACrB,QAAQ,EAAE,CAAC;gBACX,MAAM,IAAI,GAAG,MAAM,mBAAmB,CACpC,QAAQ,EACR,KAAK,EACL,IAAI,CAAC,WAAW,EAChB,KAAK,EACL,IAAI,CAAC,IAAI,EACT,CAAC,CACF,CAAC;gBACF,OAAO,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC;gBAC9B,IAAI,IAAI,CAAC,SAAS;oBAAE,aAAa,EAAE,CAAC;gBACpC,aAAa,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,YAAY,CAAC;gBACrC,IAAI,SAAS,EAAE,CAAC;oBACd,SAAS,CAAC,IAAI,CAAC;wBACb,SAAS,EAAE,kBAAkB;wBAC7B,aAAa,EAAE,aAAa;wBAC5B,WAAW,EAAE,IAAI;wBACjB,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;wBAC7B,IAAI,EAAE,kBAAkB;wBACxB,OAAO,EAAE;4BACP,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO;4BAC7B,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO;4BAC7B,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,UAAU;4BACnC,OAAO,EAAE,IAAI;yBACd;wBACD,KAAK;wBACL,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW;wBACnC,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY;wBACrC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO;qBAC5B,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YACD,YAAY,GAAG,EAAE,QAAQ,EAAE,aAAa,EAAE,CAAC;QAC7C,CAAC;QAED,IAAI,OAAO,GAAG,gBAAgB,CAAC;YAC7B,MAAM;YACN,MAAM,EAAE,aAAa;YACrB,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,EAAE,SAAS,EAAE,aAAa,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACnF,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,EAAE,aAAa,EAAE,WAAW,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACrF,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,EAAE,aAAa,EAAE,YAAY,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACzF,CAAC,CAAC;QAEH,IAAI,WAAW,IAAI,WAAW,CAAC,aAAa,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;YACnE,OAAO,GAAG,SAAS,CAAC;QACtB,CAAC;QAED,MAAM,MAAM,GAAoB;YAC9B,IAAI;YACJ,OAAO;YACP,MAAM;YACN,MAAM,EAAE,aAAa;YACrB,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3C,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACvC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACzC,OAAO;YACP,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;SAClC,CAAC;QAEF,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QACjC,OAAO;YACL,GAAG,MAAM;YACT,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,UAAU,EAAE,IAAI,CAAC,UAAU;SAC5B,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC;AAED,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACxE,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,mBAAmB,EAAE,MAAM,WAAW,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export declare const RUBRIC_SYSTEM = "You are a precise spec auditor. Decompose a spec summary into 3-10 atomic, independently checkable claims. Each claim is one testable assertion. Tag every claim with weight: must (load-bearing for spec), should (expected but not load-bearing), or may (nice-to-have). Use only the emit_rubric tool to return your answer.";
|
|
2
|
+
export declare const RUBRIC_USER: (specSummary: string) => string;
|
|
3
|
+
export declare const RUBRIC_RETRY_PREFIX = "Your previous emit_rubric call failed schema validation. Fix the issues and call emit_rubric again. Validation error:\n";
|
|
4
|
+
export declare const CLAIM_SYSTEM = "You are judging whether one specific claim from a spec rubric is satisfied by a code diff. Return verdict via emit_claim_verdict: pass (clearly satisfied), fail (clearly unsatisfied), partial (partially done), or na (claim is not applicable to this diff). Confidence is 0..1. Cite a path:line or short rationale as evidence.";
|
|
5
|
+
export declare const CLAIM_USER: (specSummary: string, claimText: string, claimId: string, diffSummary: string) => string;
|
|
6
|
+
export declare const CLAIM_USER_SWAPPED: (specSummary: string, claimText: string, claimId: string, diffSummary: string) => string;
|
|
7
|
+
export declare const BIDIRECTIONAL_SYSTEM = "Summarize what a code diff DOES in 1-3 short bullets. Be neutral; do not reference the spec. Use only the emit_diff_summary tool.";
|
|
8
|
+
export declare const BIDIRECTIONAL_USER: (diffSummary: string) => string;
|
|
9
|
+
export declare const ADVERSARIAL_SYSTEM = "You are a skeptical reviewer. Identify ways this diff might plausibly fail to satisfy the spec. Be specific. If no concern is real, return an empty list. Use the emit_concerns tool.";
|
|
10
|
+
export declare const ADVERSARIAL_USER: (specSummary: string, diffSummary: string, rubricText: string) => string;
|
|
11
|
+
//# sourceMappingURL=prompts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../src/spec-checker/prompts.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,aAAa,oUACyS,CAAC;AAEpU,eAAO,MAAM,WAAW,GAAI,aAAa,MAAM,KAAG,MAC6C,CAAC;AAEhG,eAAO,MAAM,mBAAmB,4HAC2F,CAAC;AAE5H,eAAO,MAAM,YAAY,yUAC+S,CAAC;AAEzU,eAAO,MAAM,UAAU,GACrB,aAAa,MAAM,EACnB,WAAW,MAAM,EACjB,SAAS,MAAM,EACf,aAAa,MAAM,KAClB,MACwL,CAAC;AAE5L,eAAO,MAAM,kBAAkB,GAC7B,aAAa,MAAM,EACnB,WAAW,MAAM,EACjB,SAAS,MAAM,EACf,aAAa,MAAM,KAClB,MACwL,CAAC;AAE5L,eAAO,MAAM,oBAAoB,sIACoG,CAAC;AAEtI,eAAO,MAAM,kBAAkB,GAAI,aAAa,MAAM,KAAG,MACyC,CAAC;AAEnG,eAAO,MAAM,kBAAkB,0LAC0J,CAAC;AAE1L,eAAO,MAAM,gBAAgB,GAC3B,aAAa,MAAM,EACnB,aAAa,MAAM,EACnB,YAAY,MAAM,KACjB,MAC+L,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export const RUBRIC_SYSTEM = "You are a precise spec auditor. Decompose a spec summary into 3-10 atomic, independently checkable claims. Each claim is one testable assertion. Tag every claim with weight: must (load-bearing for spec), should (expected but not load-bearing), or may (nice-to-have). Use only the emit_rubric tool to return your answer.";
|
|
2
|
+
export const RUBRIC_USER = (specSummary) => `Spec summary:\n${specSummary}\n\nDecompose this into 3-10 claims. Use the emit_rubric tool.`;
|
|
3
|
+
export const RUBRIC_RETRY_PREFIX = "Your previous emit_rubric call failed schema validation. Fix the issues and call emit_rubric again. Validation error:\n";
|
|
4
|
+
export const CLAIM_SYSTEM = "You are judging whether one specific claim from a spec rubric is satisfied by a code diff. Return verdict via emit_claim_verdict: pass (clearly satisfied), fail (clearly unsatisfied), partial (partially done), or na (claim is not applicable to this diff). Confidence is 0..1. Cite a path:line or short rationale as evidence.";
|
|
5
|
+
export const CLAIM_USER = (specSummary, claimText, claimId, diffSummary) => `Spec summary:\n${specSummary}\n\nClaim to judge (id=${claimId}):\n${claimText}\n\nDiff:\n${diffSummary}\n\nJudge whether the diff satisfies this single claim. Call emit_claim_verdict.`;
|
|
6
|
+
export const CLAIM_USER_SWAPPED = (specSummary, claimText, claimId, diffSummary) => `Diff:\n${diffSummary}\n\nSpec summary:\n${specSummary}\n\nClaim to judge (id=${claimId}):\n${claimText}\n\nJudge whether the diff satisfies this single claim. Call emit_claim_verdict.`;
|
|
7
|
+
export const BIDIRECTIONAL_SYSTEM = "Summarize what a code diff DOES in 1-3 short bullets. Be neutral; do not reference the spec. Use only the emit_diff_summary tool.";
|
|
8
|
+
export const BIDIRECTIONAL_USER = (diffSummary) => `Diff:\n${diffSummary}\n\nSummarize what this diff does in 1-3 bullets. Call emit_diff_summary.`;
|
|
9
|
+
export const ADVERSARIAL_SYSTEM = "You are a skeptical reviewer. Identify ways this diff might plausibly fail to satisfy the spec. Be specific. If no concern is real, return an empty list. Use the emit_concerns tool.";
|
|
10
|
+
export const ADVERSARIAL_USER = (specSummary, diffSummary, rubricText) => `Spec summary:\n${specSummary}\n\nRubric:\n${rubricText}\n\nDiff:\n${diffSummary}\n\nList specific concerns about whether this diff satisfies the spec. Empty list if none. Call emit_concerns.`;
|
|
11
|
+
//# sourceMappingURL=prompts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompts.js","sourceRoot":"","sources":["../../src/spec-checker/prompts.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,aAAa,GACxB,iUAAiU,CAAC;AAEpU,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,WAAmB,EAAU,EAAE,CACzD,kBAAkB,WAAW,gEAAgE,CAAC;AAEhG,MAAM,CAAC,MAAM,mBAAmB,GAC9B,yHAAyH,CAAC;AAE5H,MAAM,CAAC,MAAM,YAAY,GACvB,sUAAsU,CAAC;AAEzU,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,WAAmB,EACnB,SAAiB,EACjB,OAAe,EACf,WAAmB,EACX,EAAE,CACV,kBAAkB,WAAW,0BAA0B,OAAO,OAAO,SAAS,cAAc,WAAW,kFAAkF,CAAC;AAE5L,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAChC,WAAmB,EACnB,SAAiB,EACjB,OAAe,EACf,WAAmB,EACX,EAAE,CACV,UAAU,WAAW,sBAAsB,WAAW,0BAA0B,OAAO,OAAO,SAAS,kFAAkF,CAAC;AAE5L,MAAM,CAAC,MAAM,oBAAoB,GAC/B,mIAAmI,CAAC;AAEtI,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,WAAmB,EAAU,EAAE,CAChE,UAAU,WAAW,2EAA2E,CAAC;AAEnG,MAAM,CAAC,MAAM,kBAAkB,GAC7B,uLAAuL,CAAC;AAE1L,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAC9B,WAAmB,EACnB,WAAmB,EACnB,UAAkB,EACV,EAAE,CACV,kBAAkB,WAAW,gBAAgB,UAAU,cAAc,WAAW,gHAAgH,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { SpecRubric } from "@keel_flow/schema";
|
|
2
|
+
import type { ModelProvider } from "../provider.js";
|
|
3
|
+
export interface RubricCallStats {
|
|
4
|
+
inputTokens: number;
|
|
5
|
+
outputTokens: number;
|
|
6
|
+
costUsd: number;
|
|
7
|
+
model: string;
|
|
8
|
+
}
|
|
9
|
+
export interface RubricResult {
|
|
10
|
+
rubric: SpecRubric;
|
|
11
|
+
stats: RubricCallStats;
|
|
12
|
+
}
|
|
13
|
+
export declare function decomposeSpec(provider: ModelProvider, specSummary: string, model: string): Promise<RubricResult>;
|
|
14
|
+
//# sourceMappingURL=rubric.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rubric.d.ts","sourceRoot":"","sources":["../../src/spec-checker/rubric.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAkB,MAAM,gBAAgB,CAAC;AAIpE,MAAM,WAAW,eAAe;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,UAAU,CAAC;IACnB,KAAK,EAAE,eAAe,CAAC;CACxB;AA2BD,wBAAsB,aAAa,CACjC,QAAQ,EAAE,aAAa,EACvB,WAAW,EAAE,MAAM,EACnB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,YAAY,CAAC,CAuDvB"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { SpecRubricSchema } from "@keel_flow/schema";
|
|
2
|
+
import { computeCost } from "../cost.js";
|
|
3
|
+
import { RUBRIC_SYSTEM, RUBRIC_USER, RUBRIC_RETRY_PREFIX } from "./prompts.js";
|
|
4
|
+
const rubricTool = {
|
|
5
|
+
name: "emit_rubric",
|
|
6
|
+
description: "Emit the rubric: an array of 3-10 atomic claims with id, text, and weight.",
|
|
7
|
+
inputSchema: {
|
|
8
|
+
type: "object",
|
|
9
|
+
properties: {
|
|
10
|
+
claims: {
|
|
11
|
+
type: "array",
|
|
12
|
+
minItems: 1,
|
|
13
|
+
items: {
|
|
14
|
+
type: "object",
|
|
15
|
+
properties: {
|
|
16
|
+
id: { type: "string" },
|
|
17
|
+
text: { type: "string" },
|
|
18
|
+
weight: { type: "string", enum: ["must", "should", "may"] },
|
|
19
|
+
},
|
|
20
|
+
required: ["id", "text", "weight"],
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
},
|
|
24
|
+
required: ["claims"],
|
|
25
|
+
},
|
|
26
|
+
};
|
|
27
|
+
export async function decomposeSpec(provider, specSummary, model) {
|
|
28
|
+
let attempts = 0;
|
|
29
|
+
let lastError = "";
|
|
30
|
+
while (attempts < 2) {
|
|
31
|
+
attempts++;
|
|
32
|
+
const userPrompt = attempts === 1
|
|
33
|
+
? RUBRIC_USER(specSummary)
|
|
34
|
+
: RUBRIC_RETRY_PREFIX + lastError + "\n\n" + RUBRIC_USER(specSummary);
|
|
35
|
+
const response = await provider.generate({
|
|
36
|
+
model,
|
|
37
|
+
system: RUBRIC_SYSTEM,
|
|
38
|
+
messages: [{ role: "user", content: [{ type: "text", text: userPrompt }] }],
|
|
39
|
+
tools: [rubricTool],
|
|
40
|
+
toolChoice: { name: "emit_rubric" },
|
|
41
|
+
maxTokens: 2048,
|
|
42
|
+
});
|
|
43
|
+
const toolUse = response.content.find((b) => b.type === "tool_use");
|
|
44
|
+
if (!toolUse || toolUse.type !== "tool_use") {
|
|
45
|
+
lastError = "No tool_use block returned. You must call emit_rubric.";
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
const parsed = SpecRubricSchema.safeParse(toolUse.input);
|
|
49
|
+
if (!parsed.success) {
|
|
50
|
+
lastError = parsed.error.errors
|
|
51
|
+
.map((e) => `${e.path.join(".")}: ${e.message}`)
|
|
52
|
+
.join("; ");
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
const costUsd = computeCost(provider.kind, model, response.usage.inputTokens, response.usage.outputTokens);
|
|
56
|
+
return {
|
|
57
|
+
rubric: parsed.data,
|
|
58
|
+
stats: {
|
|
59
|
+
inputTokens: response.usage.inputTokens,
|
|
60
|
+
outputTokens: response.usage.outputTokens,
|
|
61
|
+
costUsd,
|
|
62
|
+
model,
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
throw new Error(`decomposeSpec: rubric validation failed after 2 attempts: ${lastError}`);
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=rubric.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rubric.js","sourceRoot":"","sources":["../../src/spec-checker/rubric.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAGrD,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAc/E,MAAM,UAAU,GAAmB;IACjC,IAAI,EAAE,aAAa;IACnB,WAAW,EACT,4EAA4E;IAC9E,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,MAAM,EAAE;gBACN,IAAI,EAAE,OAAO;gBACb,QAAQ,EAAE,CAAC;gBACX,KAAK,EAAE;oBACL,IAAI,EAAE,QAAQ;oBACd,UAAU,EAAE;wBACV,EAAE,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;wBACtB,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;wBACxB,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,EAAE;qBAC5D;oBACD,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC;iBACnC;aACF;SACF;QACD,QAAQ,EAAE,CAAC,QAAQ,CAAC;KACrB;CACF,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAuB,EACvB,WAAmB,EACnB,KAAa;IAEb,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,SAAS,GAAG,EAAE,CAAC;IAEnB,OAAO,QAAQ,GAAG,CAAC,EAAE,CAAC;QACpB,QAAQ,EAAE,CAAC;QACX,MAAM,UAAU,GACd,QAAQ,KAAK,CAAC;YACZ,CAAC,CAAC,WAAW,CAAC,WAAW,CAAC;YAC1B,CAAC,CAAC,mBAAmB,GAAG,SAAS,GAAG,MAAM,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;QAE1E,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC;YACvC,KAAK;YACL,MAAM,EAAE,aAAa;YACrB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;YAC3E,KAAK,EAAE,CAAC,UAAU,CAAC;YACnB,UAAU,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;YACnC,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC;QACpE,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC5C,SAAS,GAAG,wDAAwD,CAAC;YACrE,SAAS;QACX,CAAC;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACzD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM;iBAC5B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC;iBAC/C,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,SAAS;QACX,CAAC;QAED,MAAM,OAAO,GAAG,WAAW,CACzB,QAAQ,CAAC,IAAI,EACb,KAAK,EACL,QAAQ,CAAC,KAAK,CAAC,WAAW,EAC1B,QAAQ,CAAC,KAAK,CAAC,YAAY,CAC5B,CAAC;QAEF,OAAO;YACL,MAAM,EAAE,MAAM,CAAC,IAAI;YACnB,KAAK,EAAE;gBACL,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,WAAW;gBACvC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC,YAAY;gBACzC,OAAO;gBACP,KAAK;aACN;SACF,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,KAAK,CACb,6DAA6D,SAAS,EAAE,CACzE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export type RegressionResult = {
|
|
2
|
+
expected: "pass" | "fail" | "partial";
|
|
3
|
+
actual: "pass" | "fail" | "partial" | "pending";
|
|
4
|
+
};
|
|
5
|
+
export interface ConfusionCounts {
|
|
6
|
+
truePositives: number;
|
|
7
|
+
falseNegatives: number;
|
|
8
|
+
trueNegatives: number;
|
|
9
|
+
falsePositives: number;
|
|
10
|
+
}
|
|
11
|
+
export declare function confusion(results: RegressionResult[]): ConfusionCounts;
|
|
12
|
+
export declare function sensitivity(results: RegressionResult[]): number;
|
|
13
|
+
export declare function specificity(results: RegressionResult[]): number;
|
|
14
|
+
export declare function balancedAccuracy(results: RegressionResult[]): number;
|
|
15
|
+
export declare function youdenJ(results: RegressionResult[]): number;
|
|
16
|
+
//# sourceMappingURL=score.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"score.d.ts","sourceRoot":"","sources":["../../src/spec-checker/score.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,gBAAgB,GAAG;IAC7B,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;IACtC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,SAAS,CAAC;CACjD,CAAC;AAUF,MAAM,WAAW,eAAe;IAC9B,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,wBAAgB,SAAS,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,eAAe,CAmBtE;AAED,wBAAgB,WAAW,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,MAAM,CAI/D;AAED,wBAAgB,WAAW,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,MAAM,CAI/D;AAED,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,MAAM,CAEpE;AAED,wBAAgB,OAAO,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,MAAM,CAE3D"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
function toClass(verdict) {
|
|
2
|
+
return verdict === "pass" ? "clean" : "flag";
|
|
3
|
+
}
|
|
4
|
+
export function confusion(results) {
|
|
5
|
+
const c = {
|
|
6
|
+
truePositives: 0,
|
|
7
|
+
falseNegatives: 0,
|
|
8
|
+
trueNegatives: 0,
|
|
9
|
+
falsePositives: 0,
|
|
10
|
+
};
|
|
11
|
+
for (const r of results) {
|
|
12
|
+
const exp = toClass(r.expected);
|
|
13
|
+
const act = toClass(r.actual);
|
|
14
|
+
if (exp === "flag") {
|
|
15
|
+
if (act === "flag")
|
|
16
|
+
c.truePositives++;
|
|
17
|
+
else
|
|
18
|
+
c.falseNegatives++;
|
|
19
|
+
}
|
|
20
|
+
else {
|
|
21
|
+
if (act === "clean")
|
|
22
|
+
c.trueNegatives++;
|
|
23
|
+
else
|
|
24
|
+
c.falsePositives++;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return c;
|
|
28
|
+
}
|
|
29
|
+
export function sensitivity(results) {
|
|
30
|
+
const { truePositives, falseNegatives } = confusion(results);
|
|
31
|
+
const allBad = truePositives + falseNegatives;
|
|
32
|
+
return allBad === 0 ? 1 : truePositives / allBad;
|
|
33
|
+
}
|
|
34
|
+
export function specificity(results) {
|
|
35
|
+
const { trueNegatives, falsePositives } = confusion(results);
|
|
36
|
+
const allGood = trueNegatives + falsePositives;
|
|
37
|
+
return allGood === 0 ? 1 : trueNegatives / allGood;
|
|
38
|
+
}
|
|
39
|
+
export function balancedAccuracy(results) {
|
|
40
|
+
return (sensitivity(results) + specificity(results)) / 2;
|
|
41
|
+
}
|
|
42
|
+
export function youdenJ(results) {
|
|
43
|
+
return sensitivity(results) + specificity(results) - 1;
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=score.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"score.js","sourceRoot":"","sources":["../../src/spec-checker/score.ts"],"names":[],"mappings":"AAOA,SAAS,OAAO,CACd,OAAkE;IAElE,OAAO,OAAO,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;AAC/C,CAAC;AASD,MAAM,UAAU,SAAS,CAAC,OAA2B;IACnD,MAAM,CAAC,GAAoB;QACzB,aAAa,EAAE,CAAC;QAChB,cAAc,EAAE,CAAC;QACjB,aAAa,EAAE,CAAC;QAChB,cAAc,EAAE,CAAC;KAClB,CAAC;IACF,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC9B,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;YACnB,IAAI,GAAG,KAAK,MAAM;gBAAE,CAAC,CAAC,aAAa,EAAE,CAAC;;gBACjC,CAAC,CAAC,cAAc,EAAE,CAAC;QAC1B,CAAC;aAAM,CAAC;YACN,IAAI,GAAG,KAAK,OAAO;gBAAE,CAAC,CAAC,aAAa,EAAE,CAAC;;gBAClC,CAAC,CAAC,cAAc,EAAE,CAAC;QAC1B,CAAC;IACH,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,OAA2B;IACrD,MAAM,EAAE,aAAa,EAAE,cAAc,EAAE,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;IAC7D,MAAM,MAAM,GAAG,aAAa,GAAG,cAAc,CAAC;IAC9C,OAAO,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,MAAM,CAAC;AACnD,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,OAA2B;IACrD,MAAM,EAAE,aAAa,EAAE,cAAc,EAAE,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;IAC7D,MAAM,OAAO,GAAG,aAAa,GAAG,cAAc,CAAC;IAC/C,OAAO,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,OAAO,CAAC;AACrD,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,OAA2B;IAC1D,OAAO,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC;AAC3D,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAA2B;IACjD,OAAO,WAAW,CAAC,OAAO,CAAC,GAAG,WAAW,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;AACzD,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"shim.d.ts","sourceRoot":"","sources":["../../src/spec-checker/shim.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,SAAS,MAAM,mBAAmB,CAAC;AAC/C,OAAO,KAAK,EACV,aAAa,EAGd,MAAM,gBAAgB,CAAC;AAExB,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,SAAS,EACjB,YAAY,EAAE,MAAM,GACnB,aAAa,CAgFf"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
export function buildAnthropicShim(client, defaultModel) {
|
|
2
|
+
return {
|
|
3
|
+
kind: "anthropic",
|
|
4
|
+
defaultModel,
|
|
5
|
+
async generate(req) {
|
|
6
|
+
const tools = req.tools?.map((t) => ({
|
|
7
|
+
name: t.name,
|
|
8
|
+
description: t.description,
|
|
9
|
+
input_schema: {
|
|
10
|
+
type: "object",
|
|
11
|
+
...t.inputSchema,
|
|
12
|
+
},
|
|
13
|
+
}));
|
|
14
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- shim passes through toolChoice at runtime
|
|
15
|
+
let tool_choice;
|
|
16
|
+
if (req.toolChoice === "any")
|
|
17
|
+
tool_choice = { type: "any" };
|
|
18
|
+
else if (req.toolChoice === "auto")
|
|
19
|
+
tool_choice = { type: "auto" };
|
|
20
|
+
else if (req.toolChoice && typeof req.toolChoice === "object") {
|
|
21
|
+
tool_choice = { type: "tool", name: req.toolChoice.name };
|
|
22
|
+
}
|
|
23
|
+
const callArgs = {
|
|
24
|
+
model: req.model,
|
|
25
|
+
max_tokens: req.maxTokens ?? 4096,
|
|
26
|
+
messages: req.messages.map((m) => {
|
|
27
|
+
const text = m.content
|
|
28
|
+
.filter((b) => b.type === "text")
|
|
29
|
+
.map((b) => (b.type === "text" ? b.text : ""))
|
|
30
|
+
.join("");
|
|
31
|
+
return { role: m.role, content: text };
|
|
32
|
+
}),
|
|
33
|
+
...(req.system ? { system: req.system } : {}),
|
|
34
|
+
...(tools ? { tools } : {}),
|
|
35
|
+
...(tool_choice ? { tool_choice } : {}),
|
|
36
|
+
};
|
|
37
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- shim passes assembled call args to Anthropic SDK at runtime
|
|
38
|
+
const response = await client.messages.create(callArgs);
|
|
39
|
+
const content = response.content.map((block) => {
|
|
40
|
+
if (block.type === "text") {
|
|
41
|
+
return { type: "text", text: block.text ?? "" };
|
|
42
|
+
}
|
|
43
|
+
return {
|
|
44
|
+
type: "tool_use",
|
|
45
|
+
id: block.id ?? "",
|
|
46
|
+
name: block.name ?? "",
|
|
47
|
+
input: block.input,
|
|
48
|
+
};
|
|
49
|
+
});
|
|
50
|
+
const sr = (response.stop_reason ?? "end_turn");
|
|
51
|
+
const stopReason = sr === "tool_use"
|
|
52
|
+
? "tool_use"
|
|
53
|
+
: sr === "max_tokens"
|
|
54
|
+
? "max_tokens"
|
|
55
|
+
: sr === "stop_sequence"
|
|
56
|
+
? "stop_sequence"
|
|
57
|
+
: "end_turn";
|
|
58
|
+
return {
|
|
59
|
+
content,
|
|
60
|
+
stopReason,
|
|
61
|
+
usage: {
|
|
62
|
+
inputTokens: response.usage.input_tokens,
|
|
63
|
+
outputTokens: response.usage.output_tokens,
|
|
64
|
+
},
|
|
65
|
+
};
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=shim.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"shim.js","sourceRoot":"","sources":["../../src/spec-checker/shim.ts"],"names":[],"mappings":"AAOA,MAAM,UAAU,kBAAkB,CAChC,MAAiB,EACjB,YAAoB;IAEpB,OAAO;QACL,IAAI,EAAE,WAAW;QACjB,YAAY;QACZ,KAAK,CAAC,QAAQ,CAAC,GAAsB;YACnC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACnC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,YAAY,EAAE;oBACZ,IAAI,EAAE,QAAiB;oBACvB,GAAI,CAAC,CAAC,WAAuC;iBAC9C;aACF,CAAC,CAAC,CAAC;YAEJ,2GAA2G;YAC3G,IAAI,WAAgB,CAAC;YACrB,IAAI,GAAG,CAAC,UAAU,KAAK,KAAK;gBAAE,WAAW,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;iBACvD,IAAI,GAAG,CAAC,UAAU,KAAK,MAAM;gBAAE,WAAW,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;iBAC9D,IAAI,GAAG,CAAC,UAAU,IAAI,OAAO,GAAG,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;gBAC9D,WAAW,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;YAC5D,CAAC;YAED,MAAM,QAAQ,GAAG;gBACf,KAAK,EAAE,GAAG,CAAC,KAAK;gBAChB,UAAU,EAAE,GAAG,CAAC,SAAS,IAAI,IAAI;gBACjC,QAAQ,EAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;oBAC/B,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO;yBACnB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC;yBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;yBAC7C,IAAI,CAAC,EAAE,CAAC,CAAC;oBACZ,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAA4B,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;gBACjE,CAAC,CAAC;gBACF,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7C,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC3B,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACxC,CAAC;YAEF,6HAA6H;YAC7H,MAAM,QAAQ,GAAG,MAAO,MAAM,CAAC,QAAgB,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAEjE,MAAM,OAAO,GACX,QAAQ,CAAC,OAOV,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;gBACd,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;oBAC1B,OAAO,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC;gBAC3D,CAAC;gBACD,OAAO;oBACL,IAAI,EAAE,UAAmB;oBACzB,EAAE,EAAE,KAAK,CAAC,EAAE,IAAI,EAAE;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,EAAE;oBACtB,KAAK,EAAE,KAAK,CAAC,KAAK;iBACnB,CAAC;YACJ,CAAC,CAAC,CAAC;YAEH,MAAM,EAAE,GAAG,CAAC,QAAQ,CAAC,WAAW,IAAI,UAAU,CAAW,CAAC;YAC1D,MAAM,UAAU,GACd,EAAE,KAAK,UAAU;gBACf,CAAC,CAAC,UAAU;gBACZ,CAAC,CAAC,EAAE,KAAK,YAAY;oBACnB,CAAC,CAAC,YAAY;oBACd,CAAC,CAAC,EAAE,KAAK,eAAe;wBACtB,CAAC,CAAC,eAAe;wBACjB,CAAC,CAAC,UAAU,CAAC;YAErB,OAAO;gBACL,OAAO;gBACP,UAAU;gBACV,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,YAAY;oBACxC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC,aAAa;iBAC3C;aACF,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { createSpecChecker, toGateCheck, decomposeSpec, judgeClaims, judgeOneClaim, summarizeDiff, bidirectionalCheck, jaccard, cosine, adversarialPass, runWithPositionSwap, aggregateVerdict, loadCalibration, } from "./spec-checker/index.js";
|
|
2
|
+
export type { SpecCheckerOptions, SpecCheckArgs, SpecChecker, SpecCheckerCallable, CalibrationReport, LoadCalibrationOpts, } from "./spec-checker/index.js";
|
|
3
|
+
export type { SpecCheckResult } from "@keel_flow/schema";
|
|
4
|
+
//# sourceMappingURL=spec-checker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"spec-checker.d.ts","sourceRoot":"","sources":["../src/spec-checker.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,EACjB,WAAW,EACX,aAAa,EACb,WAAW,EACX,aAAa,EACb,aAAa,EACb,kBAAkB,EAClB,OAAO,EACP,MAAM,EACN,eAAe,EACf,mBAAmB,EACnB,gBAAgB,EAChB,eAAe,GAChB,MAAM,yBAAyB,CAAC;AACjC,YAAY,EACV,kBAAkB,EAClB,aAAa,EACb,WAAW,EACX,mBAAmB,EACnB,iBAAiB,EACjB,mBAAmB,GACpB,MAAM,yBAAyB,CAAC;AACjC,YAAY,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export { createSpecChecker, toGateCheck, decomposeSpec, judgeClaims, judgeOneClaim, summarizeDiff, bidirectionalCheck, jaccard, cosine, adversarialPass, runWithPositionSwap, aggregateVerdict, loadCalibration, } from "./spec-checker/index.js";
|
|
2
|
+
//# sourceMappingURL=spec-checker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"spec-checker.js","sourceRoot":"","sources":["../src/spec-checker.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,EACjB,WAAW,EACX,aAAa,EACb,WAAW,EACX,aAAa,EACb,aAAa,EACb,kBAAkB,EAClB,OAAO,EACP,MAAM,EACN,eAAe,EACf,mBAAmB,EACnB,gBAAgB,EAChB,eAAe,GAChB,MAAM,yBAAyB,CAAC"}
|