tryassay 0.3.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/pricing-enforcer.d.ts +45 -0
- package/dist/api/pricing-enforcer.js +144 -0
- package/dist/api/pricing-enforcer.js.map +1 -0
- package/dist/api/server.d.ts +28 -0
- package/dist/api/server.js +265 -0
- package/dist/api/server.js.map +1 -0
- package/dist/api/team-session.d.ts +59 -0
- package/dist/api/team-session.js +240 -0
- package/dist/api/team-session.js.map +1 -0
- package/dist/cli.js +142 -2
- package/dist/cli.js.map +1 -1
- package/dist/commands/api.d.ts +4 -0
- package/dist/commands/api.js +50 -0
- package/dist/commands/api.js.map +1 -0
- package/dist/commands/runtime.d.ts +69 -0
- package/dist/commands/runtime.js +673 -0
- package/dist/commands/runtime.js.map +1 -1
- package/dist/runtime/agent-loop.d.ts +6 -0
- package/dist/runtime/agent-loop.js +87 -5
- package/dist/runtime/agent-loop.js.map +1 -1
- package/dist/runtime/agent-spawner.d.ts +56 -0
- package/dist/runtime/agent-spawner.js +217 -0
- package/dist/runtime/agent-spawner.js.map +1 -0
- package/dist/runtime/agents/code-agent.d.ts +11 -0
- package/dist/runtime/agents/code-agent.js +90 -0
- package/dist/runtime/agents/code-agent.js.map +1 -0
- package/dist/runtime/agents/coordinator-agent.d.ts +20 -0
- package/dist/runtime/agents/coordinator-agent.js +182 -0
- package/dist/runtime/agents/coordinator-agent.js.map +1 -0
- package/dist/runtime/agents/ops-agent.d.ts +11 -0
- package/dist/runtime/agents/ops-agent.js +113 -0
- package/dist/runtime/agents/ops-agent.js.map +1 -0
- package/dist/runtime/agents/research-agent.d.ts +11 -0
- package/dist/runtime/agents/research-agent.js +114 -0
- package/dist/runtime/agents/research-agent.js.map +1 -0
- package/dist/runtime/agents/review-agent.d.ts +11 -0
- package/dist/runtime/agents/review-agent.js +96 -0
- package/dist/runtime/agents/review-agent.js.map +1 -0
- package/dist/runtime/agents/test-agent.d.ts +11 -0
- package/dist/runtime/agents/test-agent.js +114 -0
- package/dist/runtime/agents/test-agent.js.map +1 -0
- package/dist/runtime/capability-registry.d.ts +62 -0
- package/dist/runtime/capability-registry.js +191 -0
- package/dist/runtime/capability-registry.js.map +1 -0
- package/dist/runtime/collusion-detector.d.ts +35 -0
- package/dist/runtime/collusion-detector.js +97 -0
- package/dist/runtime/collusion-detector.js.map +1 -0
- package/dist/runtime/composition-verifier.d.ts +22 -0
- package/dist/runtime/composition-verifier.js +265 -0
- package/dist/runtime/composition-verifier.js.map +1 -0
- package/dist/runtime/confidence-calibrator.d.ts +10 -0
- package/dist/runtime/confidence-calibrator.js +95 -0
- package/dist/runtime/confidence-calibrator.js.map +1 -0
- package/dist/runtime/domain-coverage-analyzer.d.ts +24 -0
- package/dist/runtime/domain-coverage-analyzer.js +178 -0
- package/dist/runtime/domain-coverage-analyzer.js.map +1 -0
- package/dist/runtime/enriched-prompt-builder.d.ts +25 -0
- package/dist/runtime/enriched-prompt-builder.js +173 -0
- package/dist/runtime/enriched-prompt-builder.js.map +1 -0
- package/dist/runtime/gap-detector.d.ts +6 -0
- package/dist/runtime/gap-detector.js +111 -0
- package/dist/runtime/gap-detector.js.map +1 -0
- package/dist/runtime/human-escalation.d.ts +41 -0
- package/dist/runtime/human-escalation.js +122 -0
- package/dist/runtime/human-escalation.js.map +1 -0
- package/dist/runtime/kill-switch.d.ts +51 -0
- package/dist/runtime/kill-switch.js +185 -0
- package/dist/runtime/kill-switch.js.map +1 -0
- package/dist/runtime/layer2-guardian.d.ts +81 -0
- package/dist/runtime/layer2-guardian.js +263 -0
- package/dist/runtime/layer2-guardian.js.map +1 -0
- package/dist/runtime/message-bus.d.ts +57 -0
- package/dist/runtime/message-bus.js +115 -0
- package/dist/runtime/message-bus.js.map +1 -0
- package/dist/runtime/multi-agent-loop.d.ts +37 -0
- package/dist/runtime/multi-agent-loop.js +411 -0
- package/dist/runtime/multi-agent-loop.js.map +1 -0
- package/dist/runtime/pattern-extractor.d.ts +20 -0
- package/dist/runtime/pattern-extractor.js +257 -0
- package/dist/runtime/pattern-extractor.js.map +1 -0
- package/dist/runtime/planner.d.ts +2 -2
- package/dist/runtime/planner.js +10 -7
- package/dist/runtime/planner.js.map +1 -1
- package/dist/runtime/prompt-safety-analyzer.d.ts +17 -0
- package/dist/runtime/prompt-safety-analyzer.js +230 -0
- package/dist/runtime/prompt-safety-analyzer.js.map +1 -0
- package/dist/runtime/reasoner.d.ts +2 -2
- package/dist/runtime/reasoner.js +9 -5
- package/dist/runtime/reasoner.js.map +1 -1
- package/dist/runtime/reflector.d.ts +7 -1
- package/dist/runtime/reflector.js.map +1 -1
- package/dist/runtime/rollback-manager.d.ts +50 -0
- package/dist/runtime/rollback-manager.js +157 -0
- package/dist/runtime/rollback-manager.js.map +1 -0
- package/dist/runtime/rule-canary-deployer.d.ts +69 -0
- package/dist/runtime/rule-canary-deployer.js +289 -0
- package/dist/runtime/rule-canary-deployer.js.map +1 -0
- package/dist/runtime/rule-conflict-detector.d.ts +48 -0
- package/dist/runtime/rule-conflict-detector.js +214 -0
- package/dist/runtime/rule-conflict-detector.js.map +1 -0
- package/dist/runtime/rule-meta-verifier.d.ts +18 -0
- package/dist/runtime/rule-meta-verifier.js +275 -0
- package/dist/runtime/rule-meta-verifier.js.map +1 -0
- package/dist/runtime/rule-proposal-manager.d.ts +95 -0
- package/dist/runtime/rule-proposal-manager.js +190 -0
- package/dist/runtime/rule-proposal-manager.js.map +1 -0
- package/dist/runtime/safety-enforcer.d.ts +35 -0
- package/dist/runtime/safety-enforcer.js +165 -0
- package/dist/runtime/safety-enforcer.js.map +1 -0
- package/dist/runtime/safety-status.d.ts +48 -0
- package/dist/runtime/safety-status.js +119 -0
- package/dist/runtime/safety-status.js.map +1 -0
- package/dist/runtime/shadow-runner.d.ts +14 -0
- package/dist/runtime/shadow-runner.js +190 -0
- package/dist/runtime/shadow-runner.js.map +1 -0
- package/dist/runtime/shared-memory.d.ts +47 -0
- package/dist/runtime/shared-memory.js +151 -0
- package/dist/runtime/shared-memory.js.map +1 -0
- package/dist/runtime/specialized-agent.d.ts +72 -0
- package/dist/runtime/specialized-agent.js +123 -0
- package/dist/runtime/specialized-agent.js.map +1 -0
- package/dist/runtime/stall-detector.d.ts +13 -0
- package/dist/runtime/stall-detector.js +121 -0
- package/dist/runtime/stall-detector.js.map +1 -0
- package/dist/runtime/strategy-library.d.ts +11 -0
- package/dist/runtime/strategy-library.js +142 -0
- package/dist/runtime/strategy-library.js.map +1 -0
- package/dist/runtime/supabase-experience-store.d.ts +19 -0
- package/dist/runtime/supabase-experience-store.js +215 -0
- package/dist/runtime/supabase-experience-store.js.map +1 -0
- package/dist/runtime/tool-approval.d.ts +51 -0
- package/dist/runtime/tool-approval.js +148 -0
- package/dist/runtime/tool-approval.js.map +1 -0
- package/dist/runtime/tool-sandbox.d.ts +43 -0
- package/dist/runtime/tool-sandbox.js +394 -0
- package/dist/runtime/tool-sandbox.js.map +1 -0
- package/dist/runtime/tool-verifier.d.ts +18 -0
- package/dist/runtime/tool-verifier.js +323 -0
- package/dist/runtime/tool-verifier.js.map +1 -0
- package/dist/runtime/trust-manager.d.ts +63 -0
- package/dist/runtime/trust-manager.js +212 -0
- package/dist/runtime/trust-manager.js.map +1 -0
- package/dist/runtime/two-agent-loop.d.ts +35 -0
- package/dist/runtime/two-agent-loop.js +208 -0
- package/dist/runtime/two-agent-loop.js.map +1 -0
- package/dist/runtime/types.d.ts +939 -1
- package/dist/runtime/verification-intensity.d.ts +34 -0
- package/dist/runtime/verification-intensity.js +104 -0
- package/dist/runtime/verification-intensity.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// Assay Verified Agent Runtime — Confidence Calibrator
|
|
3
|
+
// Compares stated confidence vs actual outcomes per domain.
|
|
4
|
+
// Adjusts effective confidence to reduce overconfidence.
|
|
5
|
+
// ============================================================
|
|
6
|
+
const BUCKET_RANGES = [
|
|
7
|
+
[0.0, 0.2],
|
|
8
|
+
[0.2, 0.4],
|
|
9
|
+
[0.4, 0.6],
|
|
10
|
+
[0.6, 0.8],
|
|
11
|
+
[0.8, 1.0],
|
|
12
|
+
];
|
|
13
|
+
export class ConfidenceCalibrator {
|
|
14
|
+
profiles = new Map();
|
|
15
|
+
constructor(initial) {
|
|
16
|
+
if (initial) {
|
|
17
|
+
for (const p of initial) {
|
|
18
|
+
this.profiles.set(p.domain, p);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
calibrate(domain, statedConfidence) {
|
|
23
|
+
const profile = this.profiles.get(domain);
|
|
24
|
+
if (!profile || profile.adjustmentFactor >= 1.0) {
|
|
25
|
+
return statedConfidence; // no adjustment needed or no data
|
|
26
|
+
}
|
|
27
|
+
// Only adjust downward (overconfidence correction)
|
|
28
|
+
return Math.min(statedConfidence, statedConfidence * profile.adjustmentFactor);
|
|
29
|
+
}
|
|
30
|
+
async update(experience) {
|
|
31
|
+
const domain = experience.domain;
|
|
32
|
+
const confidence = experience.decision.confidence;
|
|
33
|
+
const succeeded = experience.outcome === 'success';
|
|
34
|
+
let profile = this.profiles.get(domain);
|
|
35
|
+
if (!profile) {
|
|
36
|
+
profile = {
|
|
37
|
+
domain,
|
|
38
|
+
buckets: BUCKET_RANGES.map(range => ({
|
|
39
|
+
confidenceRange: range,
|
|
40
|
+
totalDecisions: 0,
|
|
41
|
+
actualSuccessRate: 0,
|
|
42
|
+
gap: 0,
|
|
43
|
+
})),
|
|
44
|
+
expectedCalibrationError: 0,
|
|
45
|
+
adjustmentFactor: 1.0,
|
|
46
|
+
lastUpdated: new Date().toISOString(),
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
// Find the right bucket
|
|
50
|
+
const bucketIndex = BUCKET_RANGES.findIndex(([low, high]) => confidence >= low && confidence < high);
|
|
51
|
+
// Handle confidence === 1.0 edge case
|
|
52
|
+
const idx = bucketIndex === -1 ? BUCKET_RANGES.length - 1 : bucketIndex;
|
|
53
|
+
const bucket = profile.buckets[idx];
|
|
54
|
+
// Update bucket stats
|
|
55
|
+
const prevSuccesses = bucket.actualSuccessRate * bucket.totalDecisions;
|
|
56
|
+
bucket.totalDecisions++;
|
|
57
|
+
bucket.actualSuccessRate = (prevSuccesses + (succeeded ? 1 : 0)) / bucket.totalDecisions;
|
|
58
|
+
// Gap = midpoint of range - actual success rate (positive = overconfident)
|
|
59
|
+
const midpoint = (bucket.confidenceRange[0] + bucket.confidenceRange[1]) / 2;
|
|
60
|
+
bucket.gap = midpoint - bucket.actualSuccessRate;
|
|
61
|
+
// Compute ECE (Expected Calibration Error)
|
|
62
|
+
const totalDecisions = profile.buckets.reduce((s, b) => s + b.totalDecisions, 0);
|
|
63
|
+
let ece = 0;
|
|
64
|
+
for (const b of profile.buckets) {
|
|
65
|
+
if (b.totalDecisions === 0)
|
|
66
|
+
continue;
|
|
67
|
+
const mid = (b.confidenceRange[0] + b.confidenceRange[1]) / 2;
|
|
68
|
+
ece += (b.totalDecisions / totalDecisions) * Math.abs(mid - b.actualSuccessRate);
|
|
69
|
+
}
|
|
70
|
+
profile.expectedCalibrationError = ece;
|
|
71
|
+
// Compute adjustment factor from high-confidence bucket
|
|
72
|
+
// If agent says 0.8-1.0 confidence but only succeeds 60% of the time,
|
|
73
|
+
// adjustment = 0.60 / 0.90 ≈ 0.67
|
|
74
|
+
const highBucket = profile.buckets[profile.buckets.length - 1];
|
|
75
|
+
if (highBucket.totalDecisions >= 5) {
|
|
76
|
+
const highMidpoint = (highBucket.confidenceRange[0] + highBucket.confidenceRange[1]) / 2;
|
|
77
|
+
const ratio = highBucket.actualSuccessRate / highMidpoint;
|
|
78
|
+
profile.adjustmentFactor = Math.min(1.0, ratio); // never adjust upward
|
|
79
|
+
}
|
|
80
|
+
profile.lastUpdated = new Date().toISOString();
|
|
81
|
+
this.profiles.set(domain, profile);
|
|
82
|
+
}
|
|
83
|
+
getProfile(domain) {
|
|
84
|
+
return this.profiles.get(domain) ?? null;
|
|
85
|
+
}
|
|
86
|
+
getAllProfiles() {
|
|
87
|
+
return Array.from(this.profiles.values());
|
|
88
|
+
}
|
|
89
|
+
load(profiles) {
|
|
90
|
+
for (const p of profiles) {
|
|
91
|
+
this.profiles.set(p.domain, p);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=confidence-calibrator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"confidence-calibrator.js","sourceRoot":"","sources":["../../src/runtime/confidence-calibrator.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,uDAAuD;AACvD,4DAA4D;AAC5D,yDAAyD;AACzD,+DAA+D;AAI/D,MAAM,aAAa,GAAuB;IACxC,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;CACX,CAAC;AAEF,MAAM,OAAO,oBAAoB;IACvB,QAAQ,GAAoC,IAAI,GAAG,EAAE,CAAC;IAE9D,YAAY,OAA8B;QACxC,IAAI,OAAO,EAAE,CAAC;YACZ,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;gBACxB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,SAAS,CAAC,MAAc,EAAE,gBAAwB;QAChD,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC1C,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,gBAAgB,IAAI,GAAG,EAAE,CAAC;YAChD,OAAO,gBAAgB,CAAC,CAAC,kCAAkC;QAC7D,CAAC;QACD,mDAAmD;QACnD,OAAO,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACjF,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,UAAsB;QACjC,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC;QACjC,MAAM,UAAU,GAAG,UAAU,CAAC,QAAQ,CAAC,UAAU,CAAC;QAClD,MAAM,SAAS,GAAG,UAAU,CAAC,OAAO,KAAK,SAAS,CAAC;QAEnD,IAAI,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,GAAG;gBACR,MAAM;gBACN,OAAO,EAAE,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;oBACnC,eAAe,EAAE,KAAK;oBACtB,cAAc,EAAE,CAAC;oBACjB,iBAAiB,EAAE,CAAC;oBACpB,GAAG,EAAE,CAAC;iBACP,CAAC,CAAC;gBACH,wBAAwB,EAAE,CAAC;gBAC3B,gBAAgB,EAAE,GAAG;gBACrB,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACtC,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,MAAM,WAAW,GAAG,aAAa,CAAC,SAAS,CACzC,CAAC,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,UAAU,IAAI,GAAG,IAAI,UAAU,GAAG,IAAI,CACxD,CAAC;QACF,sCAAsC;QACtC,MAAM,GAAG,GAAG,WAAW,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;QACxE,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAEpC,sBAAsB;QACtB,MAAM,aAAa,GAAG,MAAM,CAAC,iBAAiB,GAAG,MAAM,CAAC,cAAc,CAAC;QACvE,MAAM,CAAC,cAAc,EAAE,CAAC;QACxB,MAAM,CAAC,iBAAiB,GAAG,CAAC,aAAa,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,cAAc,CAAC;QAEzF,2EAA2E;QAC3E,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC7E,MAAM,CAAC,GAAG,GAAG,QAAQ,GAAG,MAAM,CAAC,iBAAiB,CAAC;QAEjD,2CAA2C;QAC3C,MAAM,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;QACjF,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;YAChC,IAAI,CAAC,CAAC,cAAc,KAAK,CAAC;gBAAE,SAAS;YACrC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC9D,GAAG,IAAI,CAAC,CAAC,CAAC,cAAc,GAAG,cAAc,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,iBAAiB,CAAC,CAAC;QACnF,CAAC;QACD,OAAO,CAAC,wBAAwB,GAAG,GAAG,CAAC;QAEvC,wDAAwD;QACxD,sEAAsE;QACtE,kCAAkC;QAClC,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/D,IAAI,UAAU,CAAC,cAAc,IAAI,CAAC,EAAE,CAAC;YACnC,MAAM,YAAY,GAAG,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACzF,MAAM,KAAK,GAAG,UAAU,CAAC,iBAAiB,GAAG,YAAY,CAAC;YAC1D,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,sBAAsB;QACzE,CAAC;QAED,OAAO,CAAC,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC/C,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACrC,CAAC;IAED,UAAU,CAAC,MAAc;QACvB,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC;IAC3C,CAAC;IAED,cAAc;QACZ,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,IAAI,CAAC,QAA8B;QACjC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { AgentDefinition, ToolDefinition } from './types.js';
|
|
2
|
+
export interface CoverageAnalysis {
|
|
3
|
+
readonly claimed_scope: string;
|
|
4
|
+
readonly verified_scope: string;
|
|
5
|
+
readonly gaps: readonly string[];
|
|
6
|
+
readonly overreach: readonly string[];
|
|
7
|
+
readonly tool_access: {
|
|
8
|
+
readonly justified: readonly string[];
|
|
9
|
+
readonly questionable: readonly string[];
|
|
10
|
+
readonly missing: readonly string[];
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
export declare class DomainCoverageAnalyzer {
|
|
14
|
+
/**
|
|
15
|
+
* Analyze an agent definition for coverage gaps and overreach.
|
|
16
|
+
* Compares the agent's declared domain against known patterns
|
|
17
|
+
* and the available tool registry.
|
|
18
|
+
*/
|
|
19
|
+
analyze(agent: AgentDefinition, availableTools: readonly ToolDefinition[]): CoverageAnalysis;
|
|
20
|
+
private matchDomain;
|
|
21
|
+
private isLanguageRelated;
|
|
22
|
+
private analyzeToolAccess;
|
|
23
|
+
private summarizeVerifiedScope;
|
|
24
|
+
}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// Assay Verified Agent Runtime — Domain Coverage Analyzer
|
|
3
|
+
// Analyzes agent definitions for domain coverage gaps and overreach.
|
|
4
|
+
// ============================================================
|
|
5
|
+
const DOMAIN_EXPECTATIONS = {
|
|
6
|
+
kubernetes: {
|
|
7
|
+
languages: ['yaml', 'helm'],
|
|
8
|
+
frameworks: ['kubernetes', 'docker', 'helm'],
|
|
9
|
+
expected_tools: ['cli_wrapper'],
|
|
10
|
+
expected_claims: ['security', 'correctness', 'completeness'],
|
|
11
|
+
},
|
|
12
|
+
database: {
|
|
13
|
+
languages: ['sql', 'typescript', 'python'],
|
|
14
|
+
frameworks: ['postgresql', 'mysql', 'mongodb', 'prisma', 'drizzle'],
|
|
15
|
+
expected_tools: ['cli_wrapper', 'data_transform'],
|
|
16
|
+
expected_claims: ['security', 'correctness', 'performance'],
|
|
17
|
+
},
|
|
18
|
+
frontend: {
|
|
19
|
+
languages: ['typescript', 'javascript', 'css', 'html'],
|
|
20
|
+
frameworks: ['react', 'vue', 'svelte', 'next.js', 'angular'],
|
|
21
|
+
expected_tools: ['cli_wrapper'],
|
|
22
|
+
expected_claims: ['correctness', 'completeness', 'type-safety'],
|
|
23
|
+
},
|
|
24
|
+
api: {
|
|
25
|
+
languages: ['typescript', 'python', 'go'],
|
|
26
|
+
frameworks: ['express', 'fastify', 'flask', 'gin'],
|
|
27
|
+
expected_tools: ['api_client', 'cli_wrapper'],
|
|
28
|
+
expected_claims: ['security', 'correctness', 'error-handling'],
|
|
29
|
+
},
|
|
30
|
+
security: {
|
|
31
|
+
languages: ['typescript', 'python', 'go', 'rust'],
|
|
32
|
+
frameworks: [],
|
|
33
|
+
expected_tools: ['verification_helper', 'cli_wrapper'],
|
|
34
|
+
expected_claims: ['security', 'correctness'],
|
|
35
|
+
},
|
|
36
|
+
infrastructure: {
|
|
37
|
+
languages: ['yaml', 'hcl', 'terraform'],
|
|
38
|
+
frameworks: ['terraform', 'cloudformation', 'pulumi', 'ansible'],
|
|
39
|
+
expected_tools: ['cli_wrapper'],
|
|
40
|
+
expected_claims: ['security', 'correctness', 'completeness'],
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
// ── Domain Coverage Analyzer ───────────────────────────────
|
|
44
|
+
export class DomainCoverageAnalyzer {
|
|
45
|
+
/**
|
|
46
|
+
* Analyze an agent definition for coverage gaps and overreach.
|
|
47
|
+
* Compares the agent's declared domain against known patterns
|
|
48
|
+
* and the available tool registry.
|
|
49
|
+
*/
|
|
50
|
+
analyze(agent, availableTools) {
|
|
51
|
+
const gaps = [];
|
|
52
|
+
const overreach = [];
|
|
53
|
+
// Match against known domain expectations
|
|
54
|
+
const domainKey = this.matchDomain(agent.domain);
|
|
55
|
+
const expectations = domainKey ? DOMAIN_EXPECTATIONS[domainKey] : null;
|
|
56
|
+
// Check language coverage
|
|
57
|
+
if (expectations) {
|
|
58
|
+
for (const lang of expectations.languages) {
|
|
59
|
+
if (!agent.domain.languages.includes(lang)) {
|
|
60
|
+
gaps.push(`Domain "${domainKey}" typically handles "${lang}" but agent doesn't declare it`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// Check for overreach: languages not expected for this domain
|
|
64
|
+
for (const lang of agent.domain.languages) {
|
|
65
|
+
const isExpected = expectations.languages.includes(lang);
|
|
66
|
+
const isRelated = this.isLanguageRelated(lang, expectations.languages);
|
|
67
|
+
if (!isExpected && !isRelated) {
|
|
68
|
+
overreach.push(`Agent declares "${lang}" which is unusual for domain "${domainKey}"`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Check claim category coverage
|
|
72
|
+
for (const cat of expectations.expected_claims) {
|
|
73
|
+
if (!agent.domain.claim_categories.includes(cat)) {
|
|
74
|
+
gaps.push(`Domain "${domainKey}" typically covers "${cat}" claims but agent doesn't declare it`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// Analyze tool access
|
|
79
|
+
const toolAnalysis = this.analyzeToolAccess(agent, availableTools, expectations);
|
|
80
|
+
// Check for overly broad domain description
|
|
81
|
+
if (agent.domain.description.length < 20) {
|
|
82
|
+
gaps.push('Domain description is too brief to determine scope');
|
|
83
|
+
}
|
|
84
|
+
if (agent.domain.description.toLowerCase().includes('everything') ||
|
|
85
|
+
agent.domain.description.toLowerCase().includes('all code') ||
|
|
86
|
+
agent.domain.description.toLowerCase().includes('any language')) {
|
|
87
|
+
overreach.push('Domain description is overly broad (claims to handle "everything" or "all")');
|
|
88
|
+
}
|
|
89
|
+
// Check constraints consistency
|
|
90
|
+
if (agent.trust_level === 'sandboxed' && agent.constraints.can_spawn_agents) {
|
|
91
|
+
overreach.push('Sandboxed agents cannot spawn other agents');
|
|
92
|
+
}
|
|
93
|
+
if (agent.trust_level === 'standard' && agent.constraints.can_spawn_agents) {
|
|
94
|
+
overreach.push('Standard trust agents cannot spawn other agents');
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
claimed_scope: agent.domain.description,
|
|
98
|
+
verified_scope: this.summarizeVerifiedScope(agent, expectations),
|
|
99
|
+
gaps,
|
|
100
|
+
overreach,
|
|
101
|
+
tool_access: toolAnalysis,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
// ── Private helpers ──────────────────────────────────────
|
|
105
|
+
matchDomain(domain) {
|
|
106
|
+
const desc = domain.description.toLowerCase();
|
|
107
|
+
const frameworks = domain.frameworks.map(f => f.toLowerCase());
|
|
108
|
+
for (const [key, _exp] of Object.entries(DOMAIN_EXPECTATIONS)) {
|
|
109
|
+
if (desc.includes(key))
|
|
110
|
+
return key;
|
|
111
|
+
if (frameworks.some(f => _exp.frameworks.includes(f)))
|
|
112
|
+
return key;
|
|
113
|
+
}
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
isLanguageRelated(lang, expectedLangs) {
|
|
117
|
+
const related = {
|
|
118
|
+
typescript: ['javascript'],
|
|
119
|
+
javascript: ['typescript'],
|
|
120
|
+
yaml: ['json'],
|
|
121
|
+
json: ['yaml'],
|
|
122
|
+
hcl: ['terraform'],
|
|
123
|
+
};
|
|
124
|
+
return (related[lang] ?? []).some(r => expectedLangs.includes(r));
|
|
125
|
+
}
|
|
126
|
+
analyzeToolAccess(agent, availableTools, expectations) {
|
|
127
|
+
const justified = [];
|
|
128
|
+
const questionable = [];
|
|
129
|
+
const missing = [];
|
|
130
|
+
// Check each requested tool
|
|
131
|
+
for (const toolId of agent.tools) {
|
|
132
|
+
const tool = availableTools.find(t => t.id === toolId);
|
|
133
|
+
if (!tool) {
|
|
134
|
+
questionable.push(`${toolId} (not found in registry)`);
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
// Check if tool type is expected for this domain
|
|
138
|
+
if (expectations && expectations.expected_tools.includes(tool.type)) {
|
|
139
|
+
justified.push(toolId);
|
|
140
|
+
}
|
|
141
|
+
else if (!expectations) {
|
|
142
|
+
justified.push(toolId); // Can't determine, assume justified
|
|
143
|
+
}
|
|
144
|
+
else {
|
|
145
|
+
questionable.push(`${toolId} (type "${tool.type}" not typical for domain)`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// Check for missing tools
|
|
149
|
+
if (expectations) {
|
|
150
|
+
for (const expectedType of expectations.expected_tools) {
|
|
151
|
+
const hasType = agent.tools.some(id => {
|
|
152
|
+
const tool = availableTools.find(t => t.id === id);
|
|
153
|
+
return tool?.type === expectedType;
|
|
154
|
+
});
|
|
155
|
+
if (!hasType) {
|
|
156
|
+
missing.push(`No "${expectedType}" tool — typically needed for this domain`);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return { justified, questionable, missing };
|
|
161
|
+
}
|
|
162
|
+
summarizeVerifiedScope(agent, expectations) {
|
|
163
|
+
const parts = [];
|
|
164
|
+
parts.push(`Languages: ${agent.domain.languages.join(', ')}`);
|
|
165
|
+
if (agent.domain.frameworks.length > 0) {
|
|
166
|
+
parts.push(`Frameworks: ${agent.domain.frameworks.join(', ')}`);
|
|
167
|
+
}
|
|
168
|
+
parts.push(`Claims: ${agent.domain.claim_categories.join(', ')}`);
|
|
169
|
+
if (expectations) {
|
|
170
|
+
parts.push(`Matched domain: known pattern`);
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
parts.push(`Domain: no known pattern match (custom)`);
|
|
174
|
+
}
|
|
175
|
+
return parts.join('. ');
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
//# sourceMappingURL=domain-coverage-analyzer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"domain-coverage-analyzer.js","sourceRoot":"","sources":["../../src/runtime/domain-coverage-analyzer.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,0DAA0D;AAC1D,qEAAqE;AACrE,+DAA+D;AA8B/D,MAAM,mBAAmB,GAAsC;IAC7D,UAAU,EAAE;QACV,SAAS,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC;QAC3B,UAAU,EAAE,CAAC,YAAY,EAAE,QAAQ,EAAE,MAAM,CAAC;QAC5C,cAAc,EAAE,CAAC,aAAa,CAAC;QAC/B,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,cAAc,CAAC;KAC7D;IACD,QAAQ,EAAE;QACR,SAAS,EAAE,CAAC,KAAK,EAAE,YAAY,EAAE,QAAQ,CAAC;QAC1C,UAAU,EAAE,CAAC,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC;QACnE,cAAc,EAAE,CAAC,aAAa,EAAE,gBAAgB,CAAC;QACjD,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,aAAa,CAAC;KAC5D;IACD,QAAQ,EAAE;QACR,SAAS,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,CAAC;QACtD,UAAU,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC;QAC5D,cAAc,EAAE,CAAC,aAAa,CAAC;QAC/B,eAAe,EAAE,CAAC,aAAa,EAAE,cAAc,EAAE,aAAa,CAAC;KAChE;IACD,GAAG,EAAE;QACH,SAAS,EAAE,CAAC,YAAY,EAAE,QAAQ,EAAE,IAAI,CAAC;QACzC,UAAU,EAAE,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,CAAC;QAClD,cAAc,EAAE,CAAC,YAAY,EAAE,aAAa,CAAC;QAC7C,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,gBAAgB,CAAC;KAC/D;IACD,QAAQ,EAAE;QACR,SAAS,EAAE,CAAC,YAAY,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC;QACjD,UAAU,EAAE,EAAE;QACd,cAAc,EAAE,CAAC,qBAAqB,EAAE,aAAa,CAAC;QACtD,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,CAAC;KAC7C;IACD,cAAc,EAAE;QACd,SAAS,EAAE,CAAC,MAAM,EAAE,KAAK,EAAE,WAAW,CAAC;QACvC,UAAU,EAAE,CAAC,WAAW,EAAE,gBAAgB,EAAE,QAAQ,EAAE,SAAS,CAAC;QAChE,cAAc,EAAE,CAAC,aAAa,CAAC;QAC/B,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,cAAc,CAAC;KAC7D;CACF,CAAC;AAEF,8DAA8D;AAE9D,MAAM,OAAO,sBAAsB;IACjC;;;;OAIG;IACH,OAAO,CACL,KAAsB,EACtB,cAAyC;QAEzC,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,0CAA0C;QAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,SAAS,CAAC,CAAC,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAEvE,0BAA0B;QAC1B,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,MAAM,IAAI,IAAI,YAAY,CAAC,SAAS,EAAE,CAAC;gBAC1C,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC3C,IAAI,CAAC,IAAI,CAAC,WAAW,SAAS,wBAAwB,IAAI,gCAAgC,CAAC,CAAC;gBAC9F,CAAC;YACH,CAAC;YAED,8DAA8D;YAC9D,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBAC1C,MAAM,UAAU,GAAG,YAAY,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACzD,MAAM,SAAS,GAAG,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;gBACvE,IAAI,CAAC,UAAU,IAAI,CAAC,SAAS,EAAE,CAAC;oBAC9B,SAAS,CAAC,IAAI,CAAC,mBAAmB,IAAI,kCAAkC,SAAS,GAAG,CAAC,CAAC;gBACxF,CAAC;YACH,CAAC;YAED,gCAAgC;YAChC,KAAK,MAAM,GAAG,IAAI,YAAY,CAAC,eAAe,EAAE,CAAC;gBAC/C,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACjD,IAAI,CAAC,IAAI,CAAC,WAAW,SAAS,uBAAuB,GAAG,uCAAuC,CAAC,CAAC;gBACnG,CAAC;YACH,CAAC;QACH,CAAC;QAED,sBAAsB;QACtB,MAAM,YAAY,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,cAAc,EAAE,YAAY,CAAC,CAAC;QAEjF,4CAA4C;QAC5C,IAAI,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACzC,IAAI,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;QAClE,CAAC;QACD,IAAI,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;YAC7D,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC;YAC3D,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;YACpE,SAAS,CAAC,IAAI,CAAC,6EAA6E,CAAC,CAAC;QAChG,CAAC;QAED,gCAAgC;QAChC,IAAI,KAAK,CAAC,WAAW,KAAK,WAAW,IAAI,KAAK,CAAC,WAAW,CAAC,gBAAgB,EAAE,CAAC;YAC5E,SAAS,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QAC/D,CAAC;QACD,IAAI,KAAK,CAAC,WAAW,KAAK,UAAU,IAAI,KAAK,CAAC,WAAW,CAAC,gBAAgB,EAAE,CAAC;YAC3E,SAAS,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACpE,CAAC;QAED,OAAO;YACL,aAAa,EAAE,KAAK,CAAC,MAAM,CAAC,WAAW;YACvC,cAAc,EAAE,IAAI,CAAC,sBAAsB,CAAC,KAAK,EAAE,YAAY,CAAC;YAChE,IAAI;YACJ,SAAS;YACT,WAAW,EAAE,YAAY;SAC1B,CAAC;IACJ,CAAC;IAED,4DAA4D;IAEpD,WAAW,CAAC,MAAiC;QACnD,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC;QAC9C,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;QAE/D,KAAK,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,mBAAmB,CAAC,EAAE,CAAC;YAC9D,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;gBAAE,OAAO,GAAG,CAAC;YACnC,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAAE,OAAO,GAAG,CAAC;QACpE,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,iBAAiB,CAAC,IAAY,EAAE,aAAuB;QAC7D,MAAM,OAAO,GAA6B;YACxC,UAAU,EAAE,CAAC,YAAY,CAAC;YAC1B,UAAU,EAAE,CAAC,YAAY,CAAC;YAC1B,IAAI,EAAE,CAAC,MAAM,CAAC;YACd,IAAI,EAAE,CAAC,MAAM,CAAC;YACd,GAAG,EAAE,CAAC,WAAW,CAAC;SACnB,CAAC;QACF,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpE,CAAC;IAEO,iBAAiB,CACvB,KAAsB,EACtB,cAAyC,EACzC,YAAsC;QAEtC,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAa,EAAE,CAAC;QAClC,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,4BAA4B;QAC5B,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;YACjC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,MAAM,CAAC,CAAC;YACvD,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,YAAY,CAAC,IAAI,CAAC,GAAG,MAAM,0BAA0B,CAAC,CAAC;gBACvD,SAAS;YACX,CAAC;YAED,iDAAiD;YACjD,IAAI,YAAY,IAAI,YAAY,CAAC,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACpE,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACzB,CAAC;iBAAM,IAAI,CAAC,YAAY,EAAE,CAAC;gBACzB,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,oCAAoC;YAC9D,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,IAAI,CAAC,GAAG,MAAM,WAAW,IAAI,CAAC,IAAI,2BAA2B,CAAC,CAAC;YAC9E,CAAC;QACH,CAAC;QAED,0BAA0B;QAC1B,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,MAAM,YAAY,IAAI,YAAY,CAAC,cAAc,EAAE,CAAC;gBACvD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;oBACpC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;oBACnD,OAAO,IAAI,EAAE,IAAI,KAAK,YAAY,CAAC;gBACrC,CAAC,CAAC,CAAC;gBACH,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,OAAO,CAAC,IAAI,CAAC,OAAO,YAAY,2CAA2C,CAAC,CAAC;gBAC/E,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IAC9C,CAAC;IAEO,sBAAsB,CAC5B,KAAsB,EACtB,YAAsC;QAEtC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,cAAc,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9D,IAAI,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvC,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClE,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClE,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAC9C,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;CACF"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { Observation, Decision, AgentConfig, ReasoningContext, PlanningContext } from './types.js';
|
|
2
|
+
import type { ExperienceStoreInterface } from './reflector.js';
|
|
3
|
+
import type { ConfidenceCalibrator } from './confidence-calibrator.js';
|
|
4
|
+
import type { StrategyLibrary } from './strategy-library.js';
|
|
5
|
+
import type { ExtractionResult } from './pattern-extractor.js';
|
|
6
|
+
export declare class EnrichedPromptBuilder {
|
|
7
|
+
private store;
|
|
8
|
+
private calibrator;
|
|
9
|
+
private strategyLibrary;
|
|
10
|
+
private lastExtraction;
|
|
11
|
+
private tokenBudget;
|
|
12
|
+
constructor(opts: {
|
|
13
|
+
store: ExperienceStoreInterface;
|
|
14
|
+
calibrator: ConfidenceCalibrator;
|
|
15
|
+
strategyLibrary: StrategyLibrary;
|
|
16
|
+
tokenBudget?: number;
|
|
17
|
+
});
|
|
18
|
+
setLastExtraction(result: ExtractionResult): void;
|
|
19
|
+
forReasoning(observation: Observation, agentConfig: AgentConfig): Promise<ReasoningContext>;
|
|
20
|
+
forPlanning(decision: Decision, agentConfig: AgentConfig): Promise<PlanningContext>;
|
|
21
|
+
private buildReasoningPrompt;
|
|
22
|
+
private buildPlanningPrompt;
|
|
23
|
+
private inferDomain;
|
|
24
|
+
private inferDomainFromDecision;
|
|
25
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// Assay Verified Agent Runtime — Enriched Prompt Builder
|
|
3
|
+
// Assembles structured learning context for Reasoner and Planner.
|
|
4
|
+
// Replaces raw experience injection with distilled knowledge.
|
|
5
|
+
// ============================================================
|
|
6
|
+
export class EnrichedPromptBuilder {
|
|
7
|
+
store;
|
|
8
|
+
calibrator;
|
|
9
|
+
strategyLibrary;
|
|
10
|
+
lastExtraction = null;
|
|
11
|
+
tokenBudget;
|
|
12
|
+
constructor(opts) {
|
|
13
|
+
this.store = opts.store;
|
|
14
|
+
this.calibrator = opts.calibrator;
|
|
15
|
+
this.strategyLibrary = opts.strategyLibrary;
|
|
16
|
+
this.tokenBudget = opts.tokenBudget ?? 2000;
|
|
17
|
+
}
|
|
18
|
+
setLastExtraction(result) {
|
|
19
|
+
this.lastExtraction = result;
|
|
20
|
+
}
|
|
21
|
+
async forReasoning(observation, agentConfig) {
|
|
22
|
+
const contextString = `${observation.source} ${JSON.stringify(observation.payload).slice(0, 200)}`;
|
|
23
|
+
// Get relevant experiences
|
|
24
|
+
let relevantExperiences;
|
|
25
|
+
if ('search' in this.store && typeof this.store.search === 'function') {
|
|
26
|
+
relevantExperiences = await this.store.search(contextString, 3);
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
const exps = await this.store.getRelevantExperiences(contextString, 3);
|
|
30
|
+
relevantExperiences = exps.map((e, i) => ({
|
|
31
|
+
experience: e,
|
|
32
|
+
similarity: 1 - i * 0.1,
|
|
33
|
+
}));
|
|
34
|
+
}
|
|
35
|
+
// Infer domain from observation
|
|
36
|
+
const domain = this.inferDomain(observation);
|
|
37
|
+
// Get skill profile
|
|
38
|
+
const skillProfile = this.lastExtraction?.skillProfiles.find(sp => sp.domain === domain) ?? null;
|
|
39
|
+
// Get patterns and anti-patterns
|
|
40
|
+
const patterns = (this.lastExtraction?.patterns ?? []).filter(p => p.domain === domain).slice(0, 3);
|
|
41
|
+
const antiPatterns = (this.lastExtraction?.antiPatterns ?? []).filter(ap => ap.domain === domain).slice(0, 3);
|
|
42
|
+
// Get calibration
|
|
43
|
+
const calibration = this.calibrator.getProfile(domain);
|
|
44
|
+
// Build prompt section
|
|
45
|
+
const promptSection = this.buildReasoningPrompt(domain, skillProfile, patterns, antiPatterns, calibration, relevantExperiences);
|
|
46
|
+
return {
|
|
47
|
+
relevantExperiences,
|
|
48
|
+
skillProfile,
|
|
49
|
+
patterns,
|
|
50
|
+
antiPatterns,
|
|
51
|
+
calibration,
|
|
52
|
+
promptSection,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
async forPlanning(decision, agentConfig) {
|
|
56
|
+
const domain = this.inferDomainFromDecision(decision);
|
|
57
|
+
// Get operation types from proposed actions
|
|
58
|
+
const opTypes = new Set(decision.proposedActions.map(a => a.operationType));
|
|
59
|
+
// Get ranked strategies for each operation type
|
|
60
|
+
const allStrategies = [];
|
|
61
|
+
for (const opType of opTypes) {
|
|
62
|
+
const ranked = await this.strategyLibrary.rank(domain, opType);
|
|
63
|
+
allStrategies.push(...ranked.slice(0, 3));
|
|
64
|
+
}
|
|
65
|
+
// Get hotspots
|
|
66
|
+
const hotspots = (this.lastExtraction?.verificationHotspots ?? [])
|
|
67
|
+
.filter(h => h.failRate > 0.1)
|
|
68
|
+
.sort((a, b) => b.failRate - a.failRate)
|
|
69
|
+
.slice(0, 5);
|
|
70
|
+
const promptSection = this.buildPlanningPrompt(domain, allStrategies, hotspots);
|
|
71
|
+
return {
|
|
72
|
+
rankedStrategies: allStrategies,
|
|
73
|
+
hotspots,
|
|
74
|
+
promptSection,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
buildReasoningPrompt(domain, skillProfile, patterns, antiPatterns, calibration, experiences) {
|
|
78
|
+
const sections = [];
|
|
79
|
+
// Calibration (highest priority, smallest)
|
|
80
|
+
if (calibration) {
|
|
81
|
+
const tendency = calibration.adjustmentFactor < 0.9
|
|
82
|
+
? `overconfident by ~${Math.round((1 - calibration.adjustmentFactor) * 100)}%`
|
|
83
|
+
: calibration.adjustmentFactor < 1.0
|
|
84
|
+
? `slightly overconfident`
|
|
85
|
+
: `well-calibrated`;
|
|
86
|
+
sections.push(`**Confidence calibration (${domain}):** You tend to be ${tendency}. ECE: ${calibration.expectedCalibrationError.toFixed(3)}`);
|
|
87
|
+
}
|
|
88
|
+
// Anti-patterns (second priority)
|
|
89
|
+
if (antiPatterns.length > 0) {
|
|
90
|
+
sections.push(`**What fails in ${domain}:**`);
|
|
91
|
+
for (const ap of antiPatterns) {
|
|
92
|
+
sections.push(`- ${ap.description} (${ap.claimCategory}, seen ${ap.occurrenceCount}x)`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// Patterns (third priority)
|
|
96
|
+
if (patterns.length > 0) {
|
|
97
|
+
sections.push(`**What works in ${domain}:**`);
|
|
98
|
+
for (const p of patterns) {
|
|
99
|
+
sections.push(`- ${p.description} (verified ${p.verificationCount}x)`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// Experiences (fourth priority)
|
|
103
|
+
if (experiences.length > 0) {
|
|
104
|
+
sections.push(`**Similar past experiences:**`);
|
|
105
|
+
for (const se of experiences) {
|
|
106
|
+
const e = se.experience;
|
|
107
|
+
sections.push(`- [${e.outcome}] ${e.lessons.slice(0, 2).join('; ')} (similarity: ${se.similarity.toFixed(2)})`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
// Skill profile (fifth priority)
|
|
111
|
+
if (skillProfile) {
|
|
112
|
+
sections.push(`**Track record (${domain}):** ${(skillProfile.successRate * 100).toFixed(0)}% success across ${skillProfile.totalExperiences} experiences. First-pass rate: ${(skillProfile.firstPassRate * 100).toFixed(0)}%`);
|
|
113
|
+
}
|
|
114
|
+
// Trim to token budget (rough estimate: 4 chars per token)
|
|
115
|
+
const joined = sections.join('\n');
|
|
116
|
+
const charBudget = this.tokenBudget * 4;
|
|
117
|
+
if (joined.length > charBudget) {
|
|
118
|
+
return joined.slice(0, charBudget) + '\n[...truncated]';
|
|
119
|
+
}
|
|
120
|
+
return joined;
|
|
121
|
+
}
|
|
122
|
+
buildPlanningPrompt(domain, strategies, hotspots) {
|
|
123
|
+
const sections = [];
|
|
124
|
+
if (strategies.length > 0) {
|
|
125
|
+
sections.push(`## Available Strategies\n`);
|
|
126
|
+
sections.push(`For ${domain}, these strategies have worked:`);
|
|
127
|
+
for (let i = 0; i < strategies.length; i++) {
|
|
128
|
+
const s = strategies[i];
|
|
129
|
+
sections.push(`${i + 1}. "${s.name}" — ${(s.successRate * 100).toFixed(0)}% success, used ${s.timesUsed}x`);
|
|
130
|
+
sections.push(` ${s.description}`);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (hotspots.length > 0) {
|
|
134
|
+
sections.push(`\n## Verification Hotspots (avoid these)\n`);
|
|
135
|
+
for (const h of hotspots) {
|
|
136
|
+
sections.push(`- ${h.claimCategory}: ${(h.failRate * 100).toFixed(0)}% fail rate. Common cause: ${h.topFailureReasons[0] ?? 'unknown'}`);
|
|
137
|
+
sections.push(` → Ensure your plan addresses this explicitly.`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return sections.join('\n');
|
|
141
|
+
}
|
|
142
|
+
inferDomain(observation) {
|
|
143
|
+
const payload = JSON.stringify(observation.payload).toLowerCase();
|
|
144
|
+
if (payload.includes('test'))
|
|
145
|
+
return 'testing';
|
|
146
|
+
if (payload.includes('security') || payload.includes('auth'))
|
|
147
|
+
return 'security';
|
|
148
|
+
if (payload.includes('deploy') || payload.includes('ci'))
|
|
149
|
+
return 'devops';
|
|
150
|
+
if (payload.includes('database') || payload.includes('sql'))
|
|
151
|
+
return 'database';
|
|
152
|
+
if (payload.includes('api') || payload.includes('endpoint'))
|
|
153
|
+
return 'api';
|
|
154
|
+
if (observation.source === 'filesystem')
|
|
155
|
+
return 'filesystem';
|
|
156
|
+
return 'general';
|
|
157
|
+
}
|
|
158
|
+
inferDomainFromDecision(decision) {
|
|
159
|
+
const reasoning = decision.reasoning.toLowerCase();
|
|
160
|
+
if (reasoning.includes('test'))
|
|
161
|
+
return 'testing';
|
|
162
|
+
if (reasoning.includes('security') || reasoning.includes('auth'))
|
|
163
|
+
return 'security';
|
|
164
|
+
if (reasoning.includes('deploy') || reasoning.includes('ci'))
|
|
165
|
+
return 'devops';
|
|
166
|
+
if (reasoning.includes('database') || reasoning.includes('sql'))
|
|
167
|
+
return 'database';
|
|
168
|
+
if (reasoning.includes('api') || reasoning.includes('endpoint'))
|
|
169
|
+
return 'api';
|
|
170
|
+
return 'general';
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
//# sourceMappingURL=enriched-prompt-builder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enriched-prompt-builder.js","sourceRoot":"","sources":["../../src/runtime/enriched-prompt-builder.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,yDAAyD;AACzD,kEAAkE;AAClE,8DAA8D;AAC9D,+DAA+D;AAsB/D,MAAM,OAAO,qBAAqB;IACxB,KAAK,CAA2B;IAChC,UAAU,CAAuB;IACjC,eAAe,CAAkB;IACjC,cAAc,GAA4B,IAAI,CAAC;IAC/C,WAAW,CAAS;IAE5B,YAAY,IAKX;QACC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;QACxB,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC;QAClC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,eAAe,CAAC;QAC5C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC;IAC9C,CAAC;IAED,iBAAiB,CAAC,MAAwB;QACxC,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,YAAY,CAChB,WAAwB,EACxB,WAAwB;QAExB,MAAM,aAAa,GAAG,GAAG,WAAW,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;QAEnG,2BAA2B;QAC3B,IAAI,mBAAuC,CAAC;QAC5C,IAAI,QAAQ,IAAI,IAAI,CAAC,KAAK,IAAI,OAAQ,IAAI,CAAC,KAAiC,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YACnG,mBAAmB,GAAG,MAAO,IAAI,CAAC,KAAiC,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAC/F,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;YACvE,mBAAmB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;gBACxC,UAAU,EAAE,CAAC;gBACb,UAAU,EAAE,CAAC,GAAG,CAAC,GAAG,GAAG;aACxB,CAAC,CAAC,CAAC;QACN,CAAC;QAED,gCAAgC;QAChC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;QAE7C,oBAAoB;QACpB,MAAM,YAAY,GAAG,IAAI,CAAC,cAAc,EAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,MAAM,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC;QAEjG,iCAAiC;QACjC,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,cAAc,EAAE,QAAQ,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACpG,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAE9G,kBAAkB;QAClB,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QAEvD,uBAAuB;QACvB,MAAM,aAAa,GAAG,IAAI,CAAC,oBAAoB,CAC7C,MAAM,EACN,YAAY,EACZ,QAAQ,EACR,YAAY,EACZ,WAAW,EACX,mBAAmB,CACpB,CAAC;QAEF,OAAO;YACL,mBAAmB;YACnB,YAAY;YACZ,QAAQ;YACR,YAAY;YACZ,WAAW;YACX,aAAa;SACd,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,WAAW,CACf,QAAkB,EAClB,WAAwB;QAExB,MAAM,MAAM,GAAG,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,CAAC;QAEtD,4CAA4C;QAC5C,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC;QAE5E,gDAAgD;QAChD,MAAM,aAAa,GAAe,EAAE,CAAC;QACrC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;YAC/D,aAAa,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC5C,CAAC;QAED,eAAe;QACf,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,cAAc,EAAE,oBAAoB,IAAI,EAAE,CAAC;aAC/D,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC;aAC7B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;aACvC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAEf,MAAM,aAAa,GAAG,IAAI,CAAC,mBAAmB,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC;QAEhF,OAAO;YACL,gBAAgB,EAAE,aAAa;YAC/B,QAAQ;YACR,aAAa;SACd,CAAC;IACJ,CAAC;IAEO,oBAAoB,CAC1B,MAAc,EACd,YAAiC,EACjC,QAAwB,EACxB,YAAgC,EAChC,WAAsC,EACtC,WAA+B;QAE/B,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,2CAA2C;QAC3C,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,QAAQ,GAAG,WAAW,CAAC,gBAAgB,GAAG,GAAG;gBACjD,CAAC,CAAC,qBAAqB,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC,gBAAgB,CAAC,GAAG,GAAG,CAAC,GAAG;gBAC9E,CAAC,CAAC,WAAW,CAAC,gBAAgB,GAAG,GAAG;oBACpC,CAAC,CAAC,wBAAwB;oBAC1B,CAAC,CAAC,iBAAiB,CAAC;YACtB,QAAQ,CAAC,IAAI,CAAC,6BAA6B,MAAM,uBAAuB,QAAQ,UAAU,WAAW,CAAC,wBAAwB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC/I,CAAC;QAED,kCAAkC;QAClC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,QAAQ,CAAC,IAAI,CAAC,mBAAmB,MAAM,KAAK,CAAC,CAAC;YAC9C,KAAK,MAAM,EAAE,IAAI,YAAY,EAAE,CAAC;gBAC9B,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,WAAW,KAAK,EAAE,CAAC,aAAa,UAAU,EAAE,CAAC,eAAe,IAAI,CAAC,CAAC;YAC1F,CAAC;QACH,CAAC;QAED,4BAA4B;QAC5B,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,QAAQ,CAAC,IAAI,CAAC,mBAAmB,MAAM,KAAK,CAAC,CAAC;YAC9C,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;gBACzB,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,cAAc,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC;YACzE,CAAC;QACH,CAAC;QAED,gCAAgC;QAChC,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,QAAQ,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;YAC/C,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;gBAC7B,MAAM,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC;gBACxB,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,KAAK,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAClH,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,IAAI,YAAY,EAAE,CAAC;YACjB,QAAQ,CAAC,IAAI,CAAC,mBAAmB,MAAM,QAAQ,CAAC,YAAY,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,oBAAoB,YAAY,CAAC,gBAAgB,kCAAkC,CAAC,YAAY,CAAC,aAAa,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACjO,CAAC;QAED,2DAA2D;QAC3D,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;QACxC,IAAI,MAAM,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC;YAC/B,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,GAAG,kBAAkB,CAAC;QAC1D,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,mBAAmB,CACzB,MAAc,EACd,UAAsB,EACtB,QAA+B;QAE/B,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;YAC3C,QAAQ,CAAC,IAAI,CAAC,OAAO,MAAM,iCAAiC,CAAC,CAAC;YAC9D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3C,MAAM,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;gBACxB,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC;gBAC5G,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,QAAQ,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;YAC5D,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;gBACzB,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,aAAa,KAAK,CAAC,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,8BAA8B,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,IAAI,SAAS,EAAE,CAAC,CAAC;gBACzI,QAAQ,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;YACnE,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC;IAEO,WAAW,CAAC,WAAwB;QAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QAClE,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,SAAS,CAAC;QAC/C,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,UAAU,CAAC;QAChF,IAAI,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC;YAAE,OAAO,QAAQ,CAAC;QAC1E,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,UAAU,CAAC;QAC/E,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,OAAO,KAAK,CAAC;QAC1E,IAAI,WAAW,CAAC,MAAM,KAAK,YAAY;YAAE,OAAO,YAAY,CAAC;QAC7D,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,uBAAuB,CAAC,QAAkB;QAChD,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;QACnD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,SAAS,CAAC;QACjD,IAAI,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,UAAU,CAAC;QACpF,IAAI,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC;YAAE,OAAO,QAAQ,CAAC;QAC9E,IAAI,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,UAAU,CAAC;QACnF,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,OAAO,KAAK,CAAC;QAC9E,OAAO,SAAS,CAAC;IACnB,CAAC;CACF"}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// Assay Verified Agent Runtime — Gap Detector
|
|
3
|
+
// Identifies verification hotspots where formal rules could
|
|
4
|
+
// replace LLM verification, and proposes rule candidates.
|
|
5
|
+
// ============================================================
|
|
6
|
+
import { randomUUID } from 'node:crypto';
|
|
7
|
+
import { getClient, MODEL } from '../lib/anthropic.js';
|
|
8
|
+
export class GapDetector {
|
|
9
|
+
async analyze(hotspots) {
|
|
10
|
+
const candidates = [];
|
|
11
|
+
const eligibleHotspots = hotspots.filter(h => h.candidateForFormalRule &&
|
|
12
|
+
h.failRate > 0.2 &&
|
|
13
|
+
h.formalCoverage < 0.1 &&
|
|
14
|
+
h.totalClaims >= 10);
|
|
15
|
+
for (const hotspot of eligibleHotspots) {
|
|
16
|
+
try {
|
|
17
|
+
const candidate = await this.proposeRule(hotspot);
|
|
18
|
+
if (candidate && this.validateTestCases(candidate)) {
|
|
19
|
+
candidates.push(candidate);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
// Skip failed proposals
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return candidates;
|
|
27
|
+
}
|
|
28
|
+
async proposeRule(hotspot) {
|
|
29
|
+
const client = getClient();
|
|
30
|
+
const response = await client.messages.create({
|
|
31
|
+
model: MODEL,
|
|
32
|
+
max_tokens: 2048,
|
|
33
|
+
system: `You design formal verification rules for code. Rules must be regex or AST-based patterns that can be checked deterministically without an LLM.
|
|
34
|
+
|
|
35
|
+
Respond with ONLY JSON, no markdown fences.`,
|
|
36
|
+
messages: [{
|
|
37
|
+
role: 'user',
|
|
38
|
+
content: `This verification hotspot has a ${(hotspot.failRate * 100).toFixed(0)}% failure rate but only ${(hotspot.formalCoverage * 100).toFixed(0)}% formal verification coverage:
|
|
39
|
+
|
|
40
|
+
Category: ${hotspot.claimCategory}
|
|
41
|
+
Total claims: ${hotspot.totalClaims}
|
|
42
|
+
Failed claims: ${hotspot.failedClaims}
|
|
43
|
+
Top failure reasons: ${hotspot.topFailureReasons.join('; ')}
|
|
44
|
+
|
|
45
|
+
Propose a formal rule (regex pattern) that could catch the most common failure pattern in this category.
|
|
46
|
+
|
|
47
|
+
Respond:
|
|
48
|
+
{
|
|
49
|
+
"description": "what the rule checks",
|
|
50
|
+
"rationale": "why this should be formal, not LLM",
|
|
51
|
+
"substrate": "regex",
|
|
52
|
+
"pattern": "the regex pattern",
|
|
53
|
+
"language": "typescript",
|
|
54
|
+
"testCases": [
|
|
55
|
+
{ "input": "code that should FAIL", "language": "typescript", "expectedVerdict": "FAIL", "description": "what this tests" },
|
|
56
|
+
{ "input": "code that should PASS", "language": "typescript", "expectedVerdict": "PASS", "description": "what this tests" }
|
|
57
|
+
],
|
|
58
|
+
"estimatedFormalCoverage": 0.3
|
|
59
|
+
}`,
|
|
60
|
+
}],
|
|
61
|
+
});
|
|
62
|
+
const text = response.content[0].type === 'text' ? response.content[0].text : '';
|
|
63
|
+
try {
|
|
64
|
+
let cleaned = text.trim();
|
|
65
|
+
if (cleaned.startsWith('```')) {
|
|
66
|
+
cleaned = cleaned.replace(/^```(?:json)?\s*/, '').replace(/\s*```$/, '');
|
|
67
|
+
}
|
|
68
|
+
const parsed = JSON.parse(cleaned);
|
|
69
|
+
return {
|
|
70
|
+
id: randomUUID(),
|
|
71
|
+
claimCategory: hotspot.claimCategory,
|
|
72
|
+
description: parsed.description,
|
|
73
|
+
rationale: parsed.rationale,
|
|
74
|
+
substrate: parsed.substrate === 'ast' ? 'ast' : 'regex',
|
|
75
|
+
pattern: parsed.pattern,
|
|
76
|
+
language: parsed.language ?? 'typescript',
|
|
77
|
+
testCases: Array.isArray(parsed.testCases) ? parsed.testCases : [],
|
|
78
|
+
sourceHotspot: hotspot.claimCategory,
|
|
79
|
+
supportingExperiences: [],
|
|
80
|
+
estimatedFormalCoverage: parsed.estimatedFormalCoverage ?? 0,
|
|
81
|
+
status: 'proposed',
|
|
82
|
+
proposedAt: new Date().toISOString(),
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
validateTestCases(candidate) {
|
|
90
|
+
if (candidate.testCases.length < 2)
|
|
91
|
+
return false;
|
|
92
|
+
if (candidate.substrate !== 'regex')
|
|
93
|
+
return true; // can't validate AST locally
|
|
94
|
+
try {
|
|
95
|
+
const regex = new RegExp(candidate.pattern, 'gm');
|
|
96
|
+
for (const tc of candidate.testCases) {
|
|
97
|
+
const matches = regex.test(tc.input);
|
|
98
|
+
regex.lastIndex = 0; // reset stateful regex
|
|
99
|
+
const formalVerdict = matches ? 'FAIL' : 'PASS';
|
|
100
|
+
if (formalVerdict !== tc.expectedVerdict) {
|
|
101
|
+
return false; // test case doesn't match expected behavior
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return true;
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
return false; // invalid regex
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
//# sourceMappingURL=gap-detector.js.map
|