@keel_flow/runtime 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +15 -0
- package/dist/agent-runtime.d.ts +8 -0
- package/dist/agent-runtime.d.ts.map +1 -0
- package/dist/agent-runtime.js +9 -0
- package/dist/agent-runtime.js.map +1 -0
- package/dist/client.d.ts +15 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +16 -0
- package/dist/client.js.map +1 -0
- package/dist/cost.d.ts +3 -0
- package/dist/cost.d.ts.map +1 -0
- package/dist/cost.js +52 -0
- package/dist/cost.js.map +1 -0
- package/dist/dispatch-subagent.d.ts +24 -0
- package/dist/dispatch-subagent.d.ts.map +1 -0
- package/dist/dispatch-subagent.js +90 -0
- package/dist/dispatch-subagent.js.map +1 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/provider.d.ts +55 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/provider.js +18 -0
- package/dist/provider.js.map +1 -0
- package/dist/providers/anthropic.d.ts +9 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +116 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/claude-bridge.d.ts +20 -0
- package/dist/providers/claude-bridge.d.ts.map +1 -0
- package/dist/providers/claude-bridge.js +109 -0
- package/dist/providers/claude-bridge.js.map +1 -0
- package/dist/providers/openai-compatible.d.ts +9 -0
- package/dist/providers/openai-compatible.d.ts.map +1 -0
- package/dist/providers/openai-compatible.js +171 -0
- package/dist/providers/openai-compatible.js.map +1 -0
- package/dist/run-agent.d.ts +43 -0
- package/dist/run-agent.d.ts.map +1 -0
- package/dist/run-agent.js +318 -0
- package/dist/run-agent.js.map +1 -0
- package/dist/spec-checker/__fixtures__/mock-provider.d.ts +20 -0
- package/dist/spec-checker/__fixtures__/mock-provider.d.ts.map +1 -0
- package/dist/spec-checker/__fixtures__/mock-provider.js +34 -0
- package/dist/spec-checker/__fixtures__/mock-provider.js.map +1 -0
- package/dist/spec-checker/adversarial.d.ts +15 -0
- package/dist/spec-checker/adversarial.d.ts.map +1 -0
- package/dist/spec-checker/adversarial.js +77 -0
- package/dist/spec-checker/adversarial.js.map +1 -0
- package/dist/spec-checker/aggregate.d.ts +17 -0
- package/dist/spec-checker/aggregate.d.ts.map +1 -0
- package/dist/spec-checker/aggregate.js +25 -0
- package/dist/spec-checker/aggregate.js.map +1 -0
- package/dist/spec-checker/bias.d.ts +16 -0
- package/dist/spec-checker/bias.d.ts.map +1 -0
- package/dist/spec-checker/bias.js +26 -0
- package/dist/spec-checker/bias.js.map +1 -0
- package/dist/spec-checker/bidirectional.d.ts +21 -0
- package/dist/spec-checker/bidirectional.d.ts.map +1 -0
- package/dist/spec-checker/bidirectional.js +97 -0
- package/dist/spec-checker/bidirectional.js.map +1 -0
- package/dist/spec-checker/calibration.d.ts +15 -0
- package/dist/spec-checker/calibration.d.ts.map +1 -0
- package/dist/spec-checker/calibration.js +58 -0
- package/dist/spec-checker/calibration.js.map +1 -0
- package/dist/spec-checker/claims.d.ts +26 -0
- package/dist/spec-checker/claims.d.ts.map +1 -0
- package/dist/spec-checker/claims.js +104 -0
- package/dist/spec-checker/claims.js.map +1 -0
- package/dist/spec-checker/index.d.ts +40 -0
- package/dist/spec-checker/index.d.ts.map +1 -0
- package/dist/spec-checker/index.js +308 -0
- package/dist/spec-checker/index.js.map +1 -0
- package/dist/spec-checker/prompts.d.ts +11 -0
- package/dist/spec-checker/prompts.d.ts.map +1 -0
- package/dist/spec-checker/prompts.js +11 -0
- package/dist/spec-checker/prompts.js.map +1 -0
- package/dist/spec-checker/rubric.d.ts +14 -0
- package/dist/spec-checker/rubric.d.ts.map +1 -0
- package/dist/spec-checker/rubric.js +68 -0
- package/dist/spec-checker/rubric.js.map +1 -0
- package/dist/spec-checker/score.d.ts +16 -0
- package/dist/spec-checker/score.d.ts.map +1 -0
- package/dist/spec-checker/score.js +45 -0
- package/dist/spec-checker/score.js.map +1 -0
- package/dist/spec-checker/shim.d.ts +4 -0
- package/dist/spec-checker/shim.d.ts.map +1 -0
- package/dist/spec-checker/shim.js +69 -0
- package/dist/spec-checker/shim.js.map +1 -0
- package/dist/spec-checker.d.ts +4 -0
- package/dist/spec-checker.d.ts.map +1 -0
- package/dist/spec-checker.js +2 -0
- package/dist/spec-checker.js.map +1 -0
- package/dist/tools.d.ts +6 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +18 -0
- package/dist/tools.js.map +1 -0
- package/package.json +45 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { computeCost } from "../cost.js";
|
|
2
|
+
import { BIDIRECTIONAL_SYSTEM, BIDIRECTIONAL_USER } from "./prompts.js";
|
|
3
|
+
import { summarizeDiff } from "./claims.js";
|
|
4
|
+
const summaryTool = {
|
|
5
|
+
name: "emit_diff_summary",
|
|
6
|
+
description: "Emit a 1-3 bullet summary of what the diff does.",
|
|
7
|
+
inputSchema: {
|
|
8
|
+
type: "object",
|
|
9
|
+
properties: {
|
|
10
|
+
summary: { type: "string" },
|
|
11
|
+
},
|
|
12
|
+
required: ["summary"],
|
|
13
|
+
},
|
|
14
|
+
};
|
|
15
|
+
function tokenize(s) {
|
|
16
|
+
return new Set(s
|
|
17
|
+
.toLowerCase()
|
|
18
|
+
.replace(/[^a-z0-9\s]/g, " ")
|
|
19
|
+
.split(/\s+/)
|
|
20
|
+
.filter((t) => t.length > 2));
|
|
21
|
+
}
|
|
22
|
+
export function jaccard(a, b) {
|
|
23
|
+
const ta = tokenize(a);
|
|
24
|
+
const tb = tokenize(b);
|
|
25
|
+
if (ta.size === 0 && tb.size === 0)
|
|
26
|
+
return 1;
|
|
27
|
+
let intersection = 0;
|
|
28
|
+
for (const t of ta)
|
|
29
|
+
if (tb.has(t))
|
|
30
|
+
intersection++;
|
|
31
|
+
const union = ta.size + tb.size - intersection;
|
|
32
|
+
if (union === 0)
|
|
33
|
+
return 0;
|
|
34
|
+
return intersection / union;
|
|
35
|
+
}
|
|
36
|
+
export function cosine(a, b) {
|
|
37
|
+
if (a.length !== b.length) {
|
|
38
|
+
throw new Error("cosine: vectors must have equal length");
|
|
39
|
+
}
|
|
40
|
+
let dot = 0;
|
|
41
|
+
let na = 0;
|
|
42
|
+
let nb = 0;
|
|
43
|
+
for (let i = 0; i < a.length; i++) {
|
|
44
|
+
const ai = a[i] ?? 0;
|
|
45
|
+
const bi = b[i] ?? 0;
|
|
46
|
+
dot += ai * bi;
|
|
47
|
+
na += ai * ai;
|
|
48
|
+
nb += bi * bi;
|
|
49
|
+
}
|
|
50
|
+
if (na === 0 || nb === 0)
|
|
51
|
+
return 0;
|
|
52
|
+
return dot / (Math.sqrt(na) * Math.sqrt(nb));
|
|
53
|
+
}
|
|
54
|
+
export async function bidirectionalCheck(provider, model, specSummary, diff, opts = {}) {
|
|
55
|
+
const threshold = opts.threshold ?? 0.55;
|
|
56
|
+
const diffSummary = summarizeDiff(diff);
|
|
57
|
+
const response = await provider.generate({
|
|
58
|
+
model,
|
|
59
|
+
system: BIDIRECTIONAL_SYSTEM,
|
|
60
|
+
messages: [
|
|
61
|
+
{ role: "user", content: [{ type: "text", text: BIDIRECTIONAL_USER(diffSummary) }] },
|
|
62
|
+
],
|
|
63
|
+
tools: [summaryTool],
|
|
64
|
+
toolChoice: { name: "emit_diff_summary" },
|
|
65
|
+
maxTokens: 512,
|
|
66
|
+
});
|
|
67
|
+
const toolUse = response.content.find((b) => b.type === "tool_use");
|
|
68
|
+
let summary = "";
|
|
69
|
+
if (toolUse && toolUse.type === "tool_use") {
|
|
70
|
+
const input = toolUse.input;
|
|
71
|
+
if (typeof input.summary === "string")
|
|
72
|
+
summary = input.summary;
|
|
73
|
+
}
|
|
74
|
+
let similarity;
|
|
75
|
+
if (opts.embed) {
|
|
76
|
+
const [specVec, sumVec] = await Promise.all([
|
|
77
|
+
opts.embed(specSummary),
|
|
78
|
+
opts.embed(summary),
|
|
79
|
+
]);
|
|
80
|
+
similarity = cosine(specVec, sumVec);
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
similarity = jaccard(specSummary, summary);
|
|
84
|
+
}
|
|
85
|
+
const costUsd = computeCost(provider.kind, model, response.usage.inputTokens, response.usage.outputTokens);
|
|
86
|
+
return {
|
|
87
|
+
summary,
|
|
88
|
+
similarity,
|
|
89
|
+
divergent: similarity < threshold,
|
|
90
|
+
stats: {
|
|
91
|
+
inputTokens: response.usage.inputTokens,
|
|
92
|
+
outputTokens: response.usage.outputTokens,
|
|
93
|
+
costUsd,
|
|
94
|
+
},
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
//# sourceMappingURL=bidirectional.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bidirectional.js","sourceRoot":"","sources":["../../src/spec-checker/bidirectional.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AACxE,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAoB5C,MAAM,WAAW,GAAmB;IAClC,IAAI,EAAE,mBAAmB;IACzB,WAAW,EAAE,kDAAkD;IAC/D,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,OAAO,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;SAC5B;QACD,QAAQ,EAAE,CAAC,SAAS,CAAC;KACtB;CACF,CAAC;AAEF,SAAS,QAAQ,CAAC,CAAS;IACzB,OAAO,IAAI,GAAG,CACZ,CAAC;SACE,WAAW,EAAE;SACb,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC;SAC5B,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAC/B,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,CAAS,EAAE,CAAS;IAC1C,MAAM,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IACvB,MAAM,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IACvB,IAAI,EAAE,CAAC,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC7C,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,MAAM,CAAC,IAAI,EAAE;QAAE,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,YAAY,EAAE,CAAC;IAClD,MAAM,KAAK,GAAG,EAAE,CAAC,IAAI,GAAG,EAAE,CAAC,IAAI,GAAG,YAAY,CAAC;IAC/C,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1B,OAAO,YAAY,GAAG,KAAK,CAAC;AAC9B,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,CAAW,EAAE,CAAW;IAC7C,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,EAAE,GAAG,CAAC,CAAC;IACX,IAAI,EAAE,GAAG,CAAC,CAAC;IACX,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACrB,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACrB,GAAG,IAAI,EAAE,GAAG,EAAE,CAAC;QACf,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC;QACd,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC;IAChB,CAAC;IACD,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACnC,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;AAC/C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAuB,EACvB,KAAa,EACb,WAAmB,EACnB,IAAU,EACV,OAA0B,EAAE;IAE5B,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC;IACzC,MAAM,WAAW,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC;QACvC,KAAK;QACL,MAAM,EAAE,oBAAoB;QAC5B,QAAQ,EAAE;YACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,CAAC,WAAW,CAAC,EAAE,CAAC,EAAE;SACrF;QACD,KAAK,EAAE,CAAC,WAAW,CAAC;QACpB,UAAU,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;QACzC,SAAS,EAAE,GAAG;KACf,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC;IACpE,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,OAAO,IAAI,OAAO,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,OAAO,CAAC,KAA8B,CAAC;QACrD,IAAI,OAAO,KAAK,CAAC,OAAO,KAAK,QAAQ;YAAE,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC;IACjE,CAAC;IAED,IAAI,UAAkB,CAAC;IACvB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC1C,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC;YACvB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC;SACpB,CAAC,CAAC;QACH,UAAU,GAAG,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IACvC,CAAC;SAAM,CAAC;QACN,UAAU,GAAG,OAAO,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IAC7C,CAAC;IAED,MAAM,OAAO,GAAG,WAAW,CACzB,QAAQ,CAAC,IAAI,EACb,KAAK,EACL,QAAQ,CAAC,KAAK,CAAC,WAAW,EAC1B,QAAQ,CAAC,KAAK,CAAC,YAAY,CAC5B,CAAC;IAEF,OAAO;QACL,OAAO;QACP,UAAU;QACV,SAAS,EAAE,UAAU,GAAG,SAAS;QACjC,KAAK,EAAE;YACL,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,WAAW;YACvC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC,YAAY;YACzC,OAAO;SACR;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { AgentEvent } from "@keel_flow/schema";
|
|
2
|
+
export interface SpecTypeStats {
|
|
3
|
+
n: number;
|
|
4
|
+
precisionApprox: number;
|
|
5
|
+
}
|
|
6
|
+
export interface CalibrationReport {
|
|
7
|
+
calibrated: boolean;
|
|
8
|
+
perSpecType?: Record<string, SpecTypeStats>;
|
|
9
|
+
}
|
|
10
|
+
export interface LoadCalibrationOpts {
|
|
11
|
+
readEvents: () => Promise<AgentEvent[]>;
|
|
12
|
+
minSamples?: number;
|
|
13
|
+
}
|
|
14
|
+
export declare function loadCalibration(opts: LoadCalibrationOpts): Promise<CalibrationReport>;
|
|
15
|
+
//# sourceMappingURL=calibration.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"calibration.d.ts","sourceRoot":"","sources":["../../src/spec-checker/calibration.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,MAAM,WAAW,aAAa;IAC5B,CAAC,EAAE,MAAM,CAAC;IACV,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,OAAO,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;CAC7C;AAED,MAAM,WAAW,mBAAmB;IAClC,UAAU,EAAE,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC;IACxC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAYD,wBAAsB,eAAe,CACnC,IAAI,EAAE,mBAAmB,GACxB,OAAO,CAAC,iBAAiB,CAAC,CA4D5B"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
function specTypeKey(summary) {
|
|
2
|
+
return summary
|
|
3
|
+
.toLowerCase()
|
|
4
|
+
.replace(/[^a-z0-9\s]/g, " ")
|
|
5
|
+
.split(/\s+/)
|
|
6
|
+
.filter((t) => t.length > 0)
|
|
7
|
+
.slice(0, 3)
|
|
8
|
+
.join(" ");
|
|
9
|
+
}
|
|
10
|
+
export async function loadCalibration(opts) {
|
|
11
|
+
const minSamples = opts.minSamples ?? 50;
|
|
12
|
+
const events = await opts.readEvents();
|
|
13
|
+
const rubricEvents = events.filter((e) => e.kind === "spec-judge.rubric");
|
|
14
|
+
const claimEvents = events.filter((e) => e.kind === "spec-judge.claim");
|
|
15
|
+
if (rubricEvents.length === 0)
|
|
16
|
+
return { calibrated: false };
|
|
17
|
+
const summaryByEventId = new Map();
|
|
18
|
+
for (const r of rubricEvents) {
|
|
19
|
+
const payload = r.payload;
|
|
20
|
+
if (typeof payload.specSummary === "string") {
|
|
21
|
+
const key = typeof payload.eventId === "string" ? payload.eventId : r.id;
|
|
22
|
+
summaryByEventId.set(key, payload.specSummary);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
const perGroup = new Map();
|
|
26
|
+
for (const c of claimEvents) {
|
|
27
|
+
const parentId = c.parentEventId;
|
|
28
|
+
if (!parentId)
|
|
29
|
+
continue;
|
|
30
|
+
const summary = summaryByEventId.get(parentId);
|
|
31
|
+
if (!summary)
|
|
32
|
+
continue;
|
|
33
|
+
const key = specTypeKey(summary);
|
|
34
|
+
const payload = c.payload;
|
|
35
|
+
const confidence = typeof payload.confidence === "number" ? payload.confidence : 0;
|
|
36
|
+
const verdict = typeof payload.verdict === "string" ? payload.verdict : "";
|
|
37
|
+
const entry = perGroup.get(key) ?? { n: 0, highConfHits: 0 };
|
|
38
|
+
entry.n += 1;
|
|
39
|
+
if (confidence >= 0.8 && (verdict === "pass" || verdict === "fail")) {
|
|
40
|
+
entry.highConfHits += 1;
|
|
41
|
+
}
|
|
42
|
+
perGroup.set(key, entry);
|
|
43
|
+
}
|
|
44
|
+
const perSpecType = {};
|
|
45
|
+
let totalN = 0;
|
|
46
|
+
for (const [k, v] of perGroup.entries()) {
|
|
47
|
+
totalN += v.n;
|
|
48
|
+
perSpecType[k] = {
|
|
49
|
+
n: v.n,
|
|
50
|
+
precisionApprox: v.n === 0 ? 0 : v.highConfHits / v.n,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
return {
|
|
54
|
+
calibrated: totalN >= minSamples,
|
|
55
|
+
perSpecType,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
//# sourceMappingURL=calibration.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"calibration.js","sourceRoot":"","sources":["../../src/spec-checker/calibration.ts"],"names":[],"mappings":"AAiBA,SAAS,WAAW,CAAC,OAAe;IAClC,OAAO,OAAO;SACX,WAAW,EAAE;SACb,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC;SAC5B,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;SAC3B,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;SACX,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,IAAyB;IAEzB,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,EAAE,CAAC;IACzC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;IAEvC,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,mBAAmB,CAAC,CAAC;IAC1E,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,kBAAkB,CAAC,CAAC;IAExE,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;IAE5D,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAkB,CAAC;IACnD,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,CAAC,CAAC,OAAuD,CAAC;QAC1E,IAAI,OAAO,OAAO,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;YAC5C,MAAM,GAAG,GACP,OAAO,OAAO,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/D,gBAAgB,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,GAAG,EAGrB,CAAC;IAEJ,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,CAAC,CAAC,aAAa,CAAC;QACjC,IAAI,CAAC,QAAQ;YAAE,SAAS;QACxB,MAAM,OAAO,GAAG,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC/C,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,MAAM,GAAG,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;QACjC,MAAM,OAAO,GAAG,CAAC,CAAC,OAGjB,CAAC;QACF,MAAM,UAAU,GACd,OAAO,OAAO,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAClE,MAAM,OAAO,GAAG,OAAO,OAAO,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;QAE3E,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;QAC7D,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC;QACb,IAAI,UAAU,IAAI,GAAG,IAAI,CAAC,OAAO,KAAK,MAAM,IAAI,OAAO,KAAK,MAAM,CAAC,EAAE,CAAC;YACpE,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC;QAC1B,CAAC;QACD,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC3B,CAAC;IAED,MAAM,WAAW,GAAkC,EAAE,CAAC;IACtD,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,QAAQ,CAAC,OAAO,EAAE,EAAE,CAAC;QACxC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC;QACd,WAAW,CAAC,CAAC,CAAC,GAAG;YACf,CAAC,EAAE,CAAC,CAAC,CAAC;YACN,eAAe,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC;SACtD,CAAC;IACJ,CAAC;IAED,OAAO;QACL,UAAU,EAAE,MAAM,IAAI,UAAU;QAChC,WAAW;KACZ,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { SpecClaimVerdict, SpecRubric } from "@keel_flow/schema";
|
|
2
|
+
import type { Diff } from "@keel_flow/core";
|
|
3
|
+
import type { ModelProvider } from "../provider.js";
|
|
4
|
+
export interface ClaimJudgeStats {
|
|
5
|
+
inputTokens: number;
|
|
6
|
+
outputTokens: number;
|
|
7
|
+
costUsd: number;
|
|
8
|
+
}
|
|
9
|
+
export interface ClaimJudgeResult {
|
|
10
|
+
verdict: SpecClaimVerdict;
|
|
11
|
+
stats: ClaimJudgeStats;
|
|
12
|
+
}
|
|
13
|
+
export declare function summarizeDiff(diff: Diff): string;
|
|
14
|
+
export interface JudgeClaimOpts {
|
|
15
|
+
swapped?: boolean;
|
|
16
|
+
}
|
|
17
|
+
export declare function judgeOneClaim(provider: ModelProvider, model: string, specSummary: string, claim: SpecRubric["claims"][number], diff: Diff, opts?: JudgeClaimOpts): Promise<ClaimJudgeResult>;
|
|
18
|
+
export interface JudgeClaimsOpts {
|
|
19
|
+
concurrency?: number;
|
|
20
|
+
}
|
|
21
|
+
export interface JudgeClaimsResult {
|
|
22
|
+
verdicts: SpecClaimVerdict[];
|
|
23
|
+
stats: ClaimJudgeStats;
|
|
24
|
+
}
|
|
25
|
+
export declare function judgeClaims(provider: ModelProvider, model: string, specSummary: string, rubric: SpecRubric, diff: Diff, opts?: JudgeClaimsOpts, onResult?: (verdict: SpecClaimVerdict, stats: ClaimJudgeStats, swapped: boolean) => void): Promise<JudgeClaimsResult>;
|
|
26
|
+
//# sourceMappingURL=claims.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claims.d.ts","sourceRoot":"","sources":["../../src/spec-checker/claims.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACtE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,KAAK,EAAE,aAAa,EAAkB,MAAM,gBAAgB,CAAC;AAIpE,MAAM,WAAW,eAAe;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,gBAAgB,CAAC;IAC1B,KAAK,EAAE,eAAe,CAAC;CACxB;AAoBD,wBAAgB,aAAa,CAAC,IAAI,EAAE,IAAI,GAAG,MAAM,CA+BhD;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,wBAAsB,aAAa,CACjC,QAAQ,EAAE,aAAa,EACvB,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EACnB,KAAK,EAAE,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,EACnC,IAAI,EAAE,IAAI,EACV,IAAI,GAAE,cAAmB,GACxB,OAAO,CAAC,gBAAgB,CAAC,CAsC3B;AAED,MAAM,WAAW,eAAe;IAC9B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,gBAAgB,EAAE,CAAC;IAC7B,KAAK,EAAE,eAAe,CAAC;CACxB;AAED,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,aAAa,EACvB,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,UAAU,EAClB,IAAI,EAAE,IAAI,EACV,IAAI,GAAE,eAAoB,EAC1B,QAAQ,CAAC,EAAE,CACT,OAAO,EAAE,gBAAgB,EACzB,KAAK,EAAE,eAAe,EACtB,OAAO,EAAE,OAAO,KACb,IAAI,GACR,OAAO,CAAC,iBAAiB,CAAC,CA+B5B"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { SpecClaimVerdictSchema } from "@keel_flow/schema";
|
|
2
|
+
import { computeCost } from "../cost.js";
|
|
3
|
+
import { CLAIM_SYSTEM, CLAIM_USER, CLAIM_USER_SWAPPED } from "./prompts.js";
|
|
4
|
+
const claimTool = {
|
|
5
|
+
name: "emit_claim_verdict",
|
|
6
|
+
description: "Emit a single verdict for one claim.",
|
|
7
|
+
inputSchema: {
|
|
8
|
+
type: "object",
|
|
9
|
+
properties: {
|
|
10
|
+
claimId: { type: "string" },
|
|
11
|
+
verdict: { type: "string", enum: ["pass", "fail", "partial", "na"] },
|
|
12
|
+
evidence: { type: "string" },
|
|
13
|
+
confidence: { type: "number", minimum: 0, maximum: 1 },
|
|
14
|
+
},
|
|
15
|
+
required: ["claimId", "verdict", "confidence"],
|
|
16
|
+
},
|
|
17
|
+
};
|
|
18
|
+
const DIFF_FILE_MAX_CHARS = 4000;
|
|
19
|
+
const DIFF_TOTAL_MAX_CHARS = 24000;
|
|
20
|
+
export function summarizeDiff(diff) {
|
|
21
|
+
if (diff.files.length === 0)
|
|
22
|
+
return "(empty diff)";
|
|
23
|
+
const parts = [];
|
|
24
|
+
let totalChars = 0;
|
|
25
|
+
for (const f of diff.files) {
|
|
26
|
+
const header = `${f.status} ${f.path} (+${f.linesAdded}/-${f.linesRemoved})`;
|
|
27
|
+
let filePart = header;
|
|
28
|
+
if (f.addedText && f.addedText.length > 0) {
|
|
29
|
+
const budget = Math.min(DIFF_FILE_MAX_CHARS, DIFF_TOTAL_MAX_CHARS - totalChars - header.length - 20);
|
|
30
|
+
if (budget > 0) {
|
|
31
|
+
const excerpt = f.addedText.length <= budget
|
|
32
|
+
? f.addedText
|
|
33
|
+
: f.addedText.slice(0, budget) + "\n…[truncated]";
|
|
34
|
+
filePart = `${header}\n${excerpt}`;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
if (totalChars + filePart.length > DIFF_TOTAL_MAX_CHARS) {
|
|
38
|
+
parts.push(header + "\n…[truncated]");
|
|
39
|
+
break;
|
|
40
|
+
}
|
|
41
|
+
totalChars += filePart.length;
|
|
42
|
+
parts.push(filePart);
|
|
43
|
+
}
|
|
44
|
+
return parts.join("\n\n");
|
|
45
|
+
}
|
|
46
|
+
export async function judgeOneClaim(provider, model, specSummary, claim, diff, opts = {}) {
|
|
47
|
+
const diffSummary = summarizeDiff(diff);
|
|
48
|
+
const userPrompt = opts.swapped
|
|
49
|
+
? CLAIM_USER_SWAPPED(specSummary, claim.text, claim.id, diffSummary)
|
|
50
|
+
: CLAIM_USER(specSummary, claim.text, claim.id, diffSummary);
|
|
51
|
+
const response = await provider.generate({
|
|
52
|
+
model,
|
|
53
|
+
system: CLAIM_SYSTEM,
|
|
54
|
+
messages: [{ role: "user", content: [{ type: "text", text: userPrompt }] }],
|
|
55
|
+
tools: [claimTool],
|
|
56
|
+
toolChoice: { name: "emit_claim_verdict" },
|
|
57
|
+
maxTokens: 1024,
|
|
58
|
+
});
|
|
59
|
+
const toolUse = response.content.find((b) => b.type === "tool_use");
|
|
60
|
+
if (!toolUse || toolUse.type !== "tool_use") {
|
|
61
|
+
throw new Error(`judgeOneClaim: no emit_claim_verdict tool_use returned for claim ${claim.id}`);
|
|
62
|
+
}
|
|
63
|
+
const parsed = SpecClaimVerdictSchema.parse(toolUse.input);
|
|
64
|
+
const costUsd = computeCost(provider.kind, model, response.usage.inputTokens, response.usage.outputTokens);
|
|
65
|
+
return {
|
|
66
|
+
verdict: { ...parsed, claimId: claim.id },
|
|
67
|
+
stats: {
|
|
68
|
+
inputTokens: response.usage.inputTokens,
|
|
69
|
+
outputTokens: response.usage.outputTokens,
|
|
70
|
+
costUsd,
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
export async function judgeClaims(provider, model, specSummary, rubric, diff, opts = {}, onResult) {
|
|
75
|
+
const concurrency = Math.max(1, opts.concurrency ?? 4);
|
|
76
|
+
const items = rubric.claims;
|
|
77
|
+
const verdicts = new Array(items.length);
|
|
78
|
+
const aggStats = {
|
|
79
|
+
inputTokens: 0,
|
|
80
|
+
outputTokens: 0,
|
|
81
|
+
costUsd: 0,
|
|
82
|
+
};
|
|
83
|
+
let cursor = 0;
|
|
84
|
+
async function worker() {
|
|
85
|
+
while (true) {
|
|
86
|
+
const idx = cursor++;
|
|
87
|
+
if (idx >= items.length)
|
|
88
|
+
return;
|
|
89
|
+
const claim = items[idx];
|
|
90
|
+
if (!claim)
|
|
91
|
+
return;
|
|
92
|
+
const r = await judgeOneClaim(provider, model, specSummary, claim, diff);
|
|
93
|
+
verdicts[idx] = r.verdict;
|
|
94
|
+
aggStats.inputTokens += r.stats.inputTokens;
|
|
95
|
+
aggStats.outputTokens += r.stats.outputTokens;
|
|
96
|
+
aggStats.costUsd += r.stats.costUsd;
|
|
97
|
+
if (onResult)
|
|
98
|
+
onResult(r.verdict, r.stats, false);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, () => worker()));
|
|
102
|
+
return { verdicts, stats: aggStats };
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=claims.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claims.js","sourceRoot":"","sources":["../../src/spec-checker/claims.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAI3D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAa5E,MAAM,SAAS,GAAmB;IAChC,IAAI,EAAE,oBAAoB;IAC1B,WAAW,EAAE,sCAAsC;IACnD,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,OAAO,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC3B,OAAO,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,EAAE;YACpE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC5B,UAAU,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE;SACvD;QACD,QAAQ,EAAE,CAAC,SAAS,EAAE,SAAS,EAAE,YAAY,CAAC;KAC/C;CACF,CAAC;AAEF,MAAM,mBAAmB,GAAG,IAAI,CAAC;AACjC,MAAM,oBAAoB,GAAG,KAAK,CAAC;AAEnC,MAAM,UAAU,aAAa,CAAC,IAAU;IACtC,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,cAAc,CAAC;IAEnD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,YAAY,GAAG,CAAC;QAC7E,IAAI,QAAQ,GAAG,MAAM,CAAC;QAEtB,IAAI,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,mBAAmB,EAAE,oBAAoB,GAAG,UAAU,GAAG,MAAM,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;YACrG,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;gBACf,MAAM,OAAO,GACX,CAAC,CAAC,SAAS,CAAC,MAAM,IAAI,MAAM;oBAC1B,CAAC,CAAC,CAAC,CAAC,SAAS;oBACb,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,GAAG,gBAAgB,CAAC;gBACtD,QAAQ,GAAG,GAAG,MAAM,KAAK,OAAO,EAAE,CAAC;YACrC,CAAC;QACH,CAAC;QAED,IAAI,UAAU,GAAG,QAAQ,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;YACxD,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,gBAAgB,CAAC,CAAC;YACtC,MAAM;QACR,CAAC;QAED,UAAU,IAAI,QAAQ,CAAC,MAAM,CAAC;QAC9B,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvB,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC;AAMD,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAuB,EACvB,KAAa,EACb,WAAmB,EACnB,KAAmC,EACnC,IAAU,EACV,OAAuB,EAAE;IAEzB,MAAM,WAAW,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO;QAC7B,CAAC,CAAC,kBAAkB,CAAC,WAAW,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,WAAW,CAAC;QACpE,CAAC,CAAC,UAAU,CAAC,WAAW,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,WAAW,CAAC,CAAC;IAE/D,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC;QACvC,KAAK;QACL,MAAM,EAAE,YAAY;QACpB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;QAC3E,KAAK,EAAE,CAAC,SAAS,CAAC;QAClB,UAAU,EAAE,EAAE,IAAI,EAAE,oBAAoB,EAAE;QAC1C,SAAS,EAAE,IAAI;KAChB,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC;IACpE,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CACb,oEAAoE,KAAK,CAAC,EAAE,EAAE,CAC/E,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,sBAAsB,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC3D,MAAM,OAAO,GAAG,WAAW,CACzB,QAAQ,CAAC,IAAI,EACb,KAAK,EACL,QAAQ,CAAC,KAAK,CAAC,WAAW,EAC1B,QAAQ,CAAC,KAAK,CAAC,YAAY,CAC5B,CAAC;IAEF,OAAO;QACL,OAAO,EAAE,EAAE,GAAG,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,EAAE;QACzC,KAAK,EAAE;YACL,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,WAAW;YACvC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC,YAAY;YACzC,OAAO;SACR;KACF,CAAC;AACJ,CAAC;AAWD,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,QAAuB,EACvB,KAAa,EACb,WAAmB,EACnB,MAAkB,EAClB,IAAU,EACV,OAAwB,EAAE,EAC1B,QAIS;IAET,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC;IACvD,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;IAC5B,MAAM,QAAQ,GAAuB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7D,MAAM,QAAQ,GAAoB;QAChC,WAAW,EAAE,CAAC;QACd,YAAY,EAAE,CAAC;QACf,OAAO,EAAE,CAAC;KACX,CAAC;IAEF,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,UAAU,MAAM;QACnB,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC;YACrB,IAAI,GAAG,IAAI,KAAK,CAAC,MAAM;gBAAE,OAAO;YAChC,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;YACzB,IAAI,CAAC,KAAK;gBAAE,OAAO;YACnB,MAAM,CAAC,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;YACzE,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC;YAC1B,QAAQ,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,CAAC,WAAW,CAAC;YAC5C,QAAQ,CAAC,YAAY,IAAI,CAAC,CAAC,KAAK,CAAC,YAAY,CAAC;YAC9C,QAAQ,CAAC,OAAO,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;YACpC,IAAI,QAAQ;gBAAE,QAAQ,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC,CAC5E,CAAC;IAEF,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACvC,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type Anthropic from "@anthropic-ai/sdk";
|
|
2
|
+
import type { Diff } from "@keel_flow/core";
|
|
3
|
+
import type { ArchitectureMap, SpecCheckMode, SpecCheckResult, Violation } from "@keel_flow/schema";
|
|
4
|
+
import type { TelemetryEmitter } from "@keel_flow/telemetry";
|
|
5
|
+
import type { ModelProvider } from "../provider.js";
|
|
6
|
+
export interface SpecCheckerOptions {
|
|
7
|
+
provider?: ModelProvider;
|
|
8
|
+
client?: Anthropic;
|
|
9
|
+
model?: string;
|
|
10
|
+
mode?: SpecCheckMode;
|
|
11
|
+
telemetry?: TelemetryEmitter;
|
|
12
|
+
embed?: (text: string) => Promise<number[]>;
|
|
13
|
+
bidirectionalThreshold?: number;
|
|
14
|
+
claimConcurrency?: number;
|
|
15
|
+
}
|
|
16
|
+
export interface SpecCheckArgs {
|
|
17
|
+
diff: Diff;
|
|
18
|
+
architecture: ArchitectureMap;
|
|
19
|
+
specSummary: string;
|
|
20
|
+
}
|
|
21
|
+
export interface SpecCheckerCallable extends SpecCheckResult {
|
|
22
|
+
passed: boolean;
|
|
23
|
+
violations: Violation[];
|
|
24
|
+
}
|
|
25
|
+
export type SpecChecker = (args: SpecCheckArgs) => Promise<SpecCheckerCallable>;
|
|
26
|
+
export declare function toGateCheck(r: SpecCheckResult): {
|
|
27
|
+
passed: boolean;
|
|
28
|
+
violations: Violation[];
|
|
29
|
+
pending?: boolean;
|
|
30
|
+
};
|
|
31
|
+
export declare function createSpecChecker(opts?: SpecCheckerOptions): SpecChecker;
|
|
32
|
+
export { decomposeSpec } from "./rubric.js";
|
|
33
|
+
export { judgeClaims, judgeOneClaim, summarizeDiff } from "./claims.js";
|
|
34
|
+
export { bidirectionalCheck, jaccard, cosine } from "./bidirectional.js";
|
|
35
|
+
export { adversarialPass } from "./adversarial.js";
|
|
36
|
+
export { runWithPositionSwap } from "./bias.js";
|
|
37
|
+
export { aggregateVerdict } from "./aggregate.js";
|
|
38
|
+
export { loadCalibration } from "./calibration.js";
|
|
39
|
+
export type { CalibrationReport, LoadCalibrationOpts } from "./calibration.js";
|
|
40
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/spec-checker/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,SAAS,MAAM,mBAAmB,CAAC;AAC/C,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,KAAK,EACV,eAAe,EACf,aAAa,EACb,eAAe,EAEf,SAAS,EACV,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAE7D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AASpD,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,MAAM,CAAC,EAAE,SAAS,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,aAAa,CAAC;IACrB,SAAS,CAAC,EAAE,gBAAgB,CAAC;IAC7B,KAAK,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC5C,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAChC,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,IAAI,CAAC;IACX,YAAY,EAAE,eAAe,CAAC;IAC9B,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,mBAAoB,SAAQ,eAAe;IAC1D,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB;AAED,MAAM,MAAM,WAAW,GAAG,CACxB,IAAI,EAAE,aAAa,KAChB,OAAO,CAAC,mBAAmB,CAAC,CAAC;AA4ClC,wBAAgB,WAAW,CAAC,CAAC,EAAE,eAAe,GAAG;IAC/C,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CA+BA;AAED,wBAAgB,iBAAiB,CAAC,IAAI,CAAC,EAAE,kBAAkB,GAAG,WAAW,CAkQxE;AAED,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACxE,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,mBAAmB,EAAE,MAAM,WAAW,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,YAAY,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC"}
|