@vibecheckai/cli 3.2.6 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/registry.js +192 -5
- package/bin/runners/lib/agent-firewall/change-packet/builder.js +280 -6
- package/bin/runners/lib/agent-firewall/critic/index.js +151 -0
- package/bin/runners/lib/agent-firewall/critic/judge.js +432 -0
- package/bin/runners/lib/agent-firewall/critic/prompts.js +305 -0
- package/bin/runners/lib/agent-firewall/lawbook/distributor.js +465 -0
- package/bin/runners/lib/agent-firewall/lawbook/evaluator.js +604 -0
- package/bin/runners/lib/agent-firewall/lawbook/index.js +304 -0
- package/bin/runners/lib/agent-firewall/lawbook/registry.js +514 -0
- package/bin/runners/lib/agent-firewall/lawbook/schema.js +420 -0
- package/bin/runners/lib/agent-firewall/logger.js +141 -0
- package/bin/runners/lib/agent-firewall/policy/loader.js +312 -4
- package/bin/runners/lib/agent-firewall/policy/rules/ghost-env.js +113 -1
- package/bin/runners/lib/agent-firewall/policy/rules/ghost-route.js +133 -6
- package/bin/runners/lib/agent-firewall/proposal/extractor.js +394 -0
- package/bin/runners/lib/agent-firewall/proposal/index.js +212 -0
- package/bin/runners/lib/agent-firewall/proposal/schema.js +251 -0
- package/bin/runners/lib/agent-firewall/proposal/validator.js +386 -0
- package/bin/runners/lib/agent-firewall/reality/index.js +332 -0
- package/bin/runners/lib/agent-firewall/reality/state.js +625 -0
- package/bin/runners/lib/agent-firewall/reality/watcher.js +322 -0
- package/bin/runners/lib/agent-firewall/risk/index.js +173 -0
- package/bin/runners/lib/agent-firewall/risk/scorer.js +328 -0
- package/bin/runners/lib/agent-firewall/risk/thresholds.js +321 -0
- package/bin/runners/lib/agent-firewall/risk/vectors.js +421 -0
- package/bin/runners/lib/agent-firewall/simulator/diff-simulator.js +472 -0
- package/bin/runners/lib/agent-firewall/simulator/import-resolver.js +346 -0
- package/bin/runners/lib/agent-firewall/simulator/index.js +181 -0
- package/bin/runners/lib/agent-firewall/simulator/route-validator.js +380 -0
- package/bin/runners/lib/agent-firewall/time-machine/incident-correlator.js +661 -0
- package/bin/runners/lib/agent-firewall/time-machine/index.js +267 -0
- package/bin/runners/lib/agent-firewall/time-machine/replay-engine.js +436 -0
- package/bin/runners/lib/agent-firewall/time-machine/state-reconstructor.js +490 -0
- package/bin/runners/lib/agent-firewall/time-machine/timeline-builder.js +530 -0
- package/bin/runners/lib/analyzers.js +81 -18
- package/bin/runners/lib/authority-badge.js +425 -0
- package/bin/runners/lib/cli-output.js +7 -1
- package/bin/runners/lib/error-handler.js +16 -9
- package/bin/runners/lib/exit-codes.js +275 -0
- package/bin/runners/lib/global-flags.js +37 -0
- package/bin/runners/lib/help-formatter.js +413 -0
- package/bin/runners/lib/logger.js +38 -0
- package/bin/runners/lib/unified-cli-output.js +604 -0
- package/bin/runners/lib/upsell.js +148 -0
- package/bin/runners/runApprove.js +1200 -0
- package/bin/runners/runAuth.js +324 -95
- package/bin/runners/runCheckpoint.js +39 -21
- package/bin/runners/runClassify.js +859 -0
- package/bin/runners/runContext.js +136 -24
- package/bin/runners/runDoctor.js +108 -68
- package/bin/runners/runFix.js +6 -5
- package/bin/runners/runGuard.js +212 -118
- package/bin/runners/runInit.js +3 -2
- package/bin/runners/runMcp.js +130 -52
- package/bin/runners/runPolish.js +43 -20
- package/bin/runners/runProve.js +1 -2
- package/bin/runners/runReport.js +3 -2
- package/bin/runners/runScan.js +63 -44
- package/bin/runners/runShip.js +3 -4
- package/bin/runners/runValidate.js +19 -2
- package/bin/runners/runWatch.js +104 -53
- package/bin/vibecheck.js +106 -19
- package/mcp-server/HARDENING_SUMMARY.md +299 -0
- package/mcp-server/agent-firewall-interceptor.js +367 -31
- package/mcp-server/authority-tools.js +569 -0
- package/mcp-server/conductor/conflict-resolver.js +588 -0
- package/mcp-server/conductor/execution-planner.js +544 -0
- package/mcp-server/conductor/index.js +377 -0
- package/mcp-server/conductor/lock-manager.js +615 -0
- package/mcp-server/conductor/request-queue.js +550 -0
- package/mcp-server/conductor/session-manager.js +500 -0
- package/mcp-server/conductor/tools.js +510 -0
- package/mcp-server/index.js +1149 -243
- package/mcp-server/lib/{api-client.js → api-client.cjs} +40 -4
- package/mcp-server/lib/logger.cjs +30 -0
- package/mcp-server/logger.js +173 -0
- package/mcp-server/package.json +2 -2
- package/mcp-server/premium-tools.js +2 -2
- package/mcp-server/tier-auth.js +245 -35
- package/mcp-server/truth-firewall-tools.js +145 -15
- package/mcp-server/vibecheck-tools.js +2 -2
- package/package.json +2 -3
- package/mcp-server/index.old.js +0 -4137
- package/mcp-server/package-lock.json +0 -165
|
@@ -3,6 +3,12 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Builds change packets from diffs + agent intent.
|
|
5
5
|
* Each packet is a complete audit artifact of an AI code change attempt.
|
|
6
|
+
*
|
|
7
|
+
* Enhanced with:
|
|
8
|
+
* - Risk scoring
|
|
9
|
+
* - Simulation results
|
|
10
|
+
* - Critic verdict
|
|
11
|
+
* - Override tracking
|
|
6
12
|
*/
|
|
7
13
|
|
|
8
14
|
"use strict";
|
|
@@ -10,6 +16,22 @@
|
|
|
10
16
|
const crypto = require("crypto");
|
|
11
17
|
const path = require("path");
|
|
12
18
|
|
|
19
|
+
/**
|
|
20
|
+
* @typedef {Object} ProofArtifact
|
|
21
|
+
* @property {string} changeId - Unique change identifier
|
|
22
|
+
* @property {string} decision - ALLOW, BLOCK, or REQUIRE_CONFIRMATION
|
|
23
|
+
* @property {Array} rulesTriggered - Rules that were triggered
|
|
24
|
+
* @property {Array} assumptionsFailed - Failed assumptions
|
|
25
|
+
* @property {number} riskScore - Numerical risk score
|
|
26
|
+
* @property {string} riskLevel - LOW, MEDIUM, HIGH, CRITICAL
|
|
27
|
+
* @property {Object} simulationResult - Result of diff simulation
|
|
28
|
+
* @property {Object} criticVerdict - Critic LLM verdict
|
|
29
|
+
* @property {string} timestamp - ISO timestamp
|
|
30
|
+
* @property {boolean} overrideUsed - Whether override was used
|
|
31
|
+
* @property {string} overrideBy - Who overrode (if applicable)
|
|
32
|
+
* @property {string} overrideReason - Reason for override
|
|
33
|
+
*/
|
|
34
|
+
|
|
13
35
|
/**
|
|
14
36
|
* Build a change packet from diff and agent intent
|
|
15
37
|
* @param {object} params
|
|
@@ -22,6 +44,11 @@ const path = require("path");
|
|
|
22
44
|
* @param {object} params.verdict - Policy verdict
|
|
23
45
|
* @param {object} params.unblockPlan - Unblock plan (if blocked)
|
|
24
46
|
* @param {object} params.policy - Policy used for evaluation
|
|
47
|
+
* @param {object} params.riskScore - Risk scoring result
|
|
48
|
+
* @param {object} params.simulationResult - Diff simulation result
|
|
49
|
+
* @param {object} params.criticVerdict - Critic LLM verdict
|
|
50
|
+
* @param {object} params.proposal - Structured change proposal
|
|
51
|
+
* @param {object} params.override - Override information
|
|
25
52
|
* @returns {object} Change packet
|
|
26
53
|
*/
|
|
27
54
|
function buildChangePacket({
|
|
@@ -33,7 +60,12 @@ function buildChangePacket({
|
|
|
33
60
|
evidence = [],
|
|
34
61
|
verdict,
|
|
35
62
|
unblockPlan = null,
|
|
36
|
-
policy = null
|
|
63
|
+
policy = null,
|
|
64
|
+
riskScore = null,
|
|
65
|
+
simulationResult = null,
|
|
66
|
+
criticVerdict = null,
|
|
67
|
+
proposal = null,
|
|
68
|
+
override = null
|
|
37
69
|
}) {
|
|
38
70
|
const timestamp = new Date().toISOString();
|
|
39
71
|
|
|
@@ -61,27 +93,83 @@ function buildChangePacket({
|
|
|
61
93
|
domain: classifyFileDomain(filePath)
|
|
62
94
|
}];
|
|
63
95
|
|
|
64
|
-
//
|
|
96
|
+
// Extract failed assumptions from evidence
|
|
97
|
+
const assumptionsFailed = evidence
|
|
98
|
+
.filter(e => e.status === "UNPROVEN" || !e.verified)
|
|
99
|
+
.map(e => e.claim?.key || e.claim?.type || e.assumption || "unknown");
|
|
100
|
+
|
|
101
|
+
// Extract triggered rules from verdict
|
|
102
|
+
const rulesTriggered = (verdict?.violations || [])
|
|
103
|
+
.map(v => v.rule || v.type || v.id)
|
|
104
|
+
.filter(Boolean);
|
|
105
|
+
|
|
106
|
+
// Build packet with enhanced proof artifact fields
|
|
65
107
|
const packet = {
|
|
66
108
|
id,
|
|
67
109
|
timestamp,
|
|
68
110
|
agentId,
|
|
69
111
|
intent: intent || "No intent provided",
|
|
112
|
+
|
|
113
|
+
// Original fields
|
|
70
114
|
diff: diff || null,
|
|
71
115
|
files,
|
|
72
116
|
claims,
|
|
73
117
|
evidence,
|
|
118
|
+
|
|
119
|
+
// Verdict and decision
|
|
74
120
|
verdict: verdict || {
|
|
75
121
|
decision: "ALLOW",
|
|
76
122
|
violations: [],
|
|
77
123
|
message: "No verdict provided"
|
|
78
124
|
},
|
|
79
125
|
unblockPlan: unblockPlan || null,
|
|
126
|
+
|
|
127
|
+
// Enhanced proof artifact fields
|
|
128
|
+
proof: {
|
|
129
|
+
changeId: `c-${id}`,
|
|
130
|
+
decision: verdict?.decision || "ALLOW",
|
|
131
|
+
rulesTriggered,
|
|
132
|
+
assumptionsFailed,
|
|
133
|
+
riskScore: riskScore?.total ?? null,
|
|
134
|
+
riskLevel: riskScore?.level || null,
|
|
135
|
+
riskFactors: riskScore?.reasons || [],
|
|
136
|
+
simulationResult: simulationResult ? {
|
|
137
|
+
passed: simulationResult.passed,
|
|
138
|
+
errorCount: simulationResult.errors?.length || 0,
|
|
139
|
+
warningCount: simulationResult.warnings?.length || 0,
|
|
140
|
+
errors: (simulationResult.errors || []).slice(0, 5).map(e => e.message || e),
|
|
141
|
+
warnings: (simulationResult.warnings || []).slice(0, 5).map(w => w.message || w),
|
|
142
|
+
} : null,
|
|
143
|
+
criticVerdict: criticVerdict ? {
|
|
144
|
+
verdict: criticVerdict.verdict,
|
|
145
|
+
confidence: criticVerdict.confidence,
|
|
146
|
+
reasoning: criticVerdict.reasoning || [],
|
|
147
|
+
violations: criticVerdict.violations || [],
|
|
148
|
+
} : null,
|
|
149
|
+
overrideUsed: override?.used || false,
|
|
150
|
+
overrideBy: override?.by || null,
|
|
151
|
+
overrideReason: override?.reason || null,
|
|
152
|
+
overrideTimestamp: override?.timestamp || null,
|
|
153
|
+
},
|
|
154
|
+
|
|
155
|
+
// Structured proposal (if provided)
|
|
156
|
+
proposal: proposal ? {
|
|
157
|
+
intent: proposal.intent,
|
|
158
|
+
summary: proposal.summary,
|
|
159
|
+
confidence: proposal.confidence,
|
|
160
|
+
assumptions: proposal.assumptions,
|
|
161
|
+
filesTouched: proposal.filesTouched,
|
|
162
|
+
operationCount: proposal.operations?.length || 0,
|
|
163
|
+
} : null,
|
|
164
|
+
|
|
165
|
+
// Metadata
|
|
80
166
|
metadata: {
|
|
81
167
|
totalFiles: files.length,
|
|
82
168
|
totalLines: linesChanged,
|
|
83
169
|
policyVersion: policy?.version || "unknown",
|
|
84
|
-
policyProfile: policy?.profile || "unknown"
|
|
170
|
+
policyProfile: policy?.profile || "unknown",
|
|
171
|
+
policyMode: policy?.mode || "unknown",
|
|
172
|
+
domains: [...new Set(files.map(f => f.domain))],
|
|
85
173
|
}
|
|
86
174
|
};
|
|
87
175
|
|
|
@@ -98,6 +186,11 @@ function buildChangePacket({
|
|
|
98
186
|
* @param {object} params.verdict - Policy verdict
|
|
99
187
|
* @param {object} params.unblockPlan - Unblock plan (if blocked)
|
|
100
188
|
* @param {object} params.policy - Policy used for evaluation
|
|
189
|
+
* @param {object} params.riskScore - Risk scoring result
|
|
190
|
+
* @param {object} params.simulationResult - Diff simulation result
|
|
191
|
+
* @param {object} params.criticVerdict - Critic LLM verdict
|
|
192
|
+
* @param {object} params.proposal - Structured change proposal
|
|
193
|
+
* @param {object} params.override - Override information
|
|
101
194
|
* @returns {object} Change packet
|
|
102
195
|
*/
|
|
103
196
|
function buildMultiFileChangePacket({
|
|
@@ -107,7 +200,12 @@ function buildMultiFileChangePacket({
|
|
|
107
200
|
evidence = [],
|
|
108
201
|
verdict,
|
|
109
202
|
unblockPlan = null,
|
|
110
|
-
policy = null
|
|
203
|
+
policy = null,
|
|
204
|
+
riskScore = null,
|
|
205
|
+
simulationResult = null,
|
|
206
|
+
criticVerdict = null,
|
|
207
|
+
proposal = null,
|
|
208
|
+
override = null
|
|
111
209
|
}) {
|
|
112
210
|
const timestamp = new Date().toISOString();
|
|
113
211
|
|
|
@@ -148,30 +246,86 @@ function buildMultiFileChangePacket({
|
|
|
148
246
|
.filter(Boolean)
|
|
149
247
|
.join("\n\n");
|
|
150
248
|
|
|
249
|
+
// Extract failed assumptions from evidence
|
|
250
|
+
const assumptionsFailed = evidence
|
|
251
|
+
.filter(e => e.status === "UNPROVEN" || !e.verified)
|
|
252
|
+
.map(e => e.claim?.key || e.claim?.type || e.assumption || "unknown");
|
|
253
|
+
|
|
254
|
+
// Extract triggered rules from verdict
|
|
255
|
+
const rulesTriggered = (verdict?.violations || [])
|
|
256
|
+
.map(v => v.rule || v.type || v.id)
|
|
257
|
+
.filter(Boolean);
|
|
258
|
+
|
|
151
259
|
const packet = {
|
|
152
260
|
id,
|
|
153
261
|
timestamp,
|
|
154
262
|
agentId,
|
|
155
263
|
intent: intent || "No intent provided",
|
|
264
|
+
|
|
265
|
+
// Original fields
|
|
156
266
|
diff: unifiedDiff ? {
|
|
157
267
|
unified: unifiedDiff,
|
|
158
|
-
before: null,
|
|
268
|
+
before: null,
|
|
159
269
|
after: null
|
|
160
270
|
} : null,
|
|
161
271
|
files,
|
|
162
272
|
claims,
|
|
163
273
|
evidence,
|
|
274
|
+
|
|
275
|
+
// Verdict and decision
|
|
164
276
|
verdict: verdict || {
|
|
165
277
|
decision: "ALLOW",
|
|
166
278
|
violations: [],
|
|
167
279
|
message: "No verdict provided"
|
|
168
280
|
},
|
|
169
281
|
unblockPlan: unblockPlan || null,
|
|
282
|
+
|
|
283
|
+
// Enhanced proof artifact fields
|
|
284
|
+
proof: {
|
|
285
|
+
changeId: `c-${id}`,
|
|
286
|
+
decision: verdict?.decision || "ALLOW",
|
|
287
|
+
rulesTriggered,
|
|
288
|
+
assumptionsFailed,
|
|
289
|
+
riskScore: riskScore?.total ?? null,
|
|
290
|
+
riskLevel: riskScore?.level || null,
|
|
291
|
+
riskFactors: riskScore?.reasons || [],
|
|
292
|
+
simulationResult: simulationResult ? {
|
|
293
|
+
passed: simulationResult.passed,
|
|
294
|
+
errorCount: simulationResult.errors?.length || 0,
|
|
295
|
+
warningCount: simulationResult.warnings?.length || 0,
|
|
296
|
+
errors: (simulationResult.errors || []).slice(0, 5).map(e => e.message || e),
|
|
297
|
+
warnings: (simulationResult.warnings || []).slice(0, 5).map(w => w.message || w),
|
|
298
|
+
} : null,
|
|
299
|
+
criticVerdict: criticVerdict ? {
|
|
300
|
+
verdict: criticVerdict.verdict,
|
|
301
|
+
confidence: criticVerdict.confidence,
|
|
302
|
+
reasoning: criticVerdict.reasoning || [],
|
|
303
|
+
violations: criticVerdict.violations || [],
|
|
304
|
+
} : null,
|
|
305
|
+
overrideUsed: override?.used || false,
|
|
306
|
+
overrideBy: override?.by || null,
|
|
307
|
+
overrideReason: override?.reason || null,
|
|
308
|
+
overrideTimestamp: override?.timestamp || null,
|
|
309
|
+
},
|
|
310
|
+
|
|
311
|
+
// Structured proposal (if provided)
|
|
312
|
+
proposal: proposal ? {
|
|
313
|
+
intent: proposal.intent,
|
|
314
|
+
summary: proposal.summary,
|
|
315
|
+
confidence: proposal.confidence,
|
|
316
|
+
assumptions: proposal.assumptions,
|
|
317
|
+
filesTouched: proposal.filesTouched,
|
|
318
|
+
operationCount: proposal.operations?.length || 0,
|
|
319
|
+
} : null,
|
|
320
|
+
|
|
321
|
+
// Metadata
|
|
170
322
|
metadata: {
|
|
171
323
|
totalFiles: files.length,
|
|
172
324
|
totalLines: files.reduce((sum, f) => sum + f.linesChanged, 0),
|
|
173
325
|
policyVersion: policy?.version || "unknown",
|
|
174
|
-
policyProfile: policy?.profile || "unknown"
|
|
326
|
+
policyProfile: policy?.profile || "unknown",
|
|
327
|
+
policyMode: policy?.mode || "unknown",
|
|
328
|
+
domains: [...new Set(files.map(f => f.domain))],
|
|
175
329
|
}
|
|
176
330
|
};
|
|
177
331
|
|
|
@@ -206,9 +360,129 @@ function classifyFileDomain(filePath) {
|
|
|
206
360
|
return "general";
|
|
207
361
|
}
|
|
208
362
|
|
|
363
|
+
/**
|
|
364
|
+
* Build a standalone proof artifact for compliance
|
|
365
|
+
* @param {object} params - Proof parameters
|
|
366
|
+
* @returns {ProofArtifact} Proof artifact
|
|
367
|
+
*/
|
|
368
|
+
function buildProofArtifact({
|
|
369
|
+
changeId,
|
|
370
|
+
decision,
|
|
371
|
+
rulesTriggered = [],
|
|
372
|
+
assumptionsFailed = [],
|
|
373
|
+
riskScore = null,
|
|
374
|
+
simulationResult = null,
|
|
375
|
+
criticVerdict = null,
|
|
376
|
+
override = null,
|
|
377
|
+
}) {
|
|
378
|
+
const timestamp = new Date().toISOString();
|
|
379
|
+
|
|
380
|
+
return {
|
|
381
|
+
changeId: changeId || `c-${crypto.randomBytes(8).toString("hex")}`,
|
|
382
|
+
decision: decision || "BLOCK",
|
|
383
|
+
rulesTriggered,
|
|
384
|
+
assumptionsFailed,
|
|
385
|
+
riskScore: riskScore?.total ?? null,
|
|
386
|
+
riskLevel: riskScore?.level || "UNKNOWN",
|
|
387
|
+
riskFactors: riskScore?.reasons || [],
|
|
388
|
+
simulationResult: simulationResult ? {
|
|
389
|
+
passed: simulationResult.passed,
|
|
390
|
+
errorCount: simulationResult.errors?.length || 0,
|
|
391
|
+
warningCount: simulationResult.warnings?.length || 0,
|
|
392
|
+
brokenImports: (simulationResult.errors || [])
|
|
393
|
+
.filter(e => e.type === "broken_import" || e.type === "unresolved_import")
|
|
394
|
+
.map(e => e.import),
|
|
395
|
+
} : null,
|
|
396
|
+
criticVerdict: criticVerdict ? {
|
|
397
|
+
verdict: criticVerdict.verdict,
|
|
398
|
+
confidence: criticVerdict.confidence,
|
|
399
|
+
reasoning: criticVerdict.reasoning || [],
|
|
400
|
+
} : null,
|
|
401
|
+
timestamp,
|
|
402
|
+
overrideUsed: override?.used || false,
|
|
403
|
+
overrideBy: override?.by || null,
|
|
404
|
+
overrideReason: override?.reason || null,
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Extract proof artifact from a change packet
|
|
410
|
+
* @param {object} packet - Change packet
|
|
411
|
+
* @returns {ProofArtifact} Proof artifact
|
|
412
|
+
*/
|
|
413
|
+
function extractProofArtifact(packet) {
|
|
414
|
+
if (packet.proof) {
|
|
415
|
+
return {
|
|
416
|
+
...packet.proof,
|
|
417
|
+
timestamp: packet.timestamp,
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Build from legacy packet format
|
|
422
|
+
return {
|
|
423
|
+
changeId: `c-${packet.id}`,
|
|
424
|
+
decision: packet.verdict?.decision || "UNKNOWN",
|
|
425
|
+
rulesTriggered: (packet.verdict?.violations || []).map(v => v.rule || v.type),
|
|
426
|
+
assumptionsFailed: packet.evidence
|
|
427
|
+
?.filter(e => e.status === "UNPROVEN")
|
|
428
|
+
.map(e => e.claim?.key) || [],
|
|
429
|
+
riskScore: null,
|
|
430
|
+
riskLevel: "UNKNOWN",
|
|
431
|
+
simulationResult: null,
|
|
432
|
+
criticVerdict: null,
|
|
433
|
+
timestamp: packet.timestamp,
|
|
434
|
+
overrideUsed: false,
|
|
435
|
+
overrideBy: null,
|
|
436
|
+
overrideReason: null,
|
|
437
|
+
};
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Format proof artifact for display
|
|
442
|
+
* @param {ProofArtifact} proof - Proof artifact
|
|
443
|
+
* @returns {string} Formatted string
|
|
444
|
+
*/
|
|
445
|
+
function formatProofArtifact(proof) {
|
|
446
|
+
const lines = [
|
|
447
|
+
`Change ID: ${proof.changeId}`,
|
|
448
|
+
`Decision: ${proof.decision}`,
|
|
449
|
+
`Timestamp: ${proof.timestamp}`,
|
|
450
|
+
"",
|
|
451
|
+
];
|
|
452
|
+
|
|
453
|
+
if (proof.riskScore !== null) {
|
|
454
|
+
lines.push(`Risk Score: ${proof.riskScore} (${proof.riskLevel})`);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (proof.rulesTriggered.length > 0) {
|
|
458
|
+
lines.push(`Rules Triggered: ${proof.rulesTriggered.join(", ")}`);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
if (proof.assumptionsFailed.length > 0) {
|
|
462
|
+
lines.push(`Assumptions Failed: ${proof.assumptionsFailed.join(", ")}`);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
if (proof.simulationResult) {
|
|
466
|
+
lines.push(`Simulation: ${proof.simulationResult.passed ? "PASSED" : "FAILED"} (${proof.simulationResult.errorCount} errors)`);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
if (proof.criticVerdict) {
|
|
470
|
+
lines.push(`Critic: ${proof.criticVerdict.verdict} (${(proof.criticVerdict.confidence * 100).toFixed(0)}% confidence)`);
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
if (proof.overrideUsed) {
|
|
474
|
+
lines.push(`Override: Used by ${proof.overrideBy} - ${proof.overrideReason}`);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return lines.join("\n");
|
|
478
|
+
}
|
|
479
|
+
|
|
209
480
|
module.exports = {
|
|
210
481
|
buildChangePacket,
|
|
211
482
|
buildMultiFileChangePacket,
|
|
483
|
+
buildProofArtifact,
|
|
484
|
+
extractProofArtifact,
|
|
485
|
+
formatProofArtifact,
|
|
212
486
|
calculateLinesChanged,
|
|
213
487
|
classifyFileDomain
|
|
214
488
|
};
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Critic Module
|
|
3
|
+
*
|
|
4
|
+
* Entry point for the Critic LLM judge.
|
|
5
|
+
* The "savage" that evaluates proposal quality.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const { critic } = require('./critic');
|
|
9
|
+
*
|
|
10
|
+
* // Configure with LLM client
|
|
11
|
+
* critic.setClient(async (params) => {
|
|
12
|
+
* return await callOpenAI(params);
|
|
13
|
+
* });
|
|
14
|
+
*
|
|
15
|
+
* // Evaluate a proposal
|
|
16
|
+
* const verdict = await critic.evaluate({
|
|
17
|
+
* proposal,
|
|
18
|
+
* validationResults,
|
|
19
|
+
* riskScore,
|
|
20
|
+
* simulationResult,
|
|
21
|
+
* realityState,
|
|
22
|
+
* });
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
"use strict";
|
|
26
|
+
|
|
27
|
+
const {
|
|
28
|
+
CriticJudge,
|
|
29
|
+
createJudge,
|
|
30
|
+
defaultJudge,
|
|
31
|
+
} = require("./judge");
|
|
32
|
+
|
|
33
|
+
const {
|
|
34
|
+
CRITIC_SYSTEM_PROMPT,
|
|
35
|
+
EVALUATION_PROMPT_TEMPLATE,
|
|
36
|
+
VAGUENESS_CHECK_PROMPT,
|
|
37
|
+
ASSUMPTION_VERIFICATION_PROMPT,
|
|
38
|
+
buildEvaluationPrompt,
|
|
39
|
+
buildVaguenessPrompt,
|
|
40
|
+
buildVerificationPrompt,
|
|
41
|
+
parseCriticResponse,
|
|
42
|
+
} = require("./prompts");
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Critic singleton interface
|
|
46
|
+
*/
|
|
47
|
+
const critic = {
|
|
48
|
+
/**
|
|
49
|
+
* Set the LLM client for the default judge
|
|
50
|
+
* @param {Function} client - LLM client function
|
|
51
|
+
*/
|
|
52
|
+
setClient(client) {
|
|
53
|
+
defaultJudge.setClient(client);
|
|
54
|
+
},
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Check if critic is available
|
|
58
|
+
* @returns {boolean} Is available
|
|
59
|
+
*/
|
|
60
|
+
isAvailable() {
|
|
61
|
+
return defaultJudge.isAvailable();
|
|
62
|
+
},
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Evaluate a proposal
|
|
66
|
+
* @param {Object} params - Evaluation parameters
|
|
67
|
+
* @returns {Promise<Object>} Critic verdict
|
|
68
|
+
*/
|
|
69
|
+
async evaluate(params) {
|
|
70
|
+
return defaultJudge.evaluate(params);
|
|
71
|
+
},
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Check for vagueness
|
|
75
|
+
* @param {Object} proposal - Proposal to check
|
|
76
|
+
* @returns {Promise<Object>} Vagueness analysis
|
|
77
|
+
*/
|
|
78
|
+
async checkVagueness(proposal) {
|
|
79
|
+
return defaultJudge.checkVagueness(proposal);
|
|
80
|
+
},
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Verify assumptions
|
|
84
|
+
* @param {Array} assumptions - Assumptions to verify
|
|
85
|
+
* @param {Object} realityState - Repository state
|
|
86
|
+
* @returns {Promise<Object>} Verification results
|
|
87
|
+
*/
|
|
88
|
+
async verifyAssumptions(assumptions, realityState) {
|
|
89
|
+
return defaultJudge.verifyAssumptions(assumptions, realityState);
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Create a new judge instance
|
|
94
|
+
* @param {Object} options - Configuration
|
|
95
|
+
* @returns {CriticJudge} New judge instance
|
|
96
|
+
*/
|
|
97
|
+
createJudge(options) {
|
|
98
|
+
return createJudge(options);
|
|
99
|
+
},
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Get the system prompt
|
|
103
|
+
* @returns {string} System prompt
|
|
104
|
+
*/
|
|
105
|
+
getSystemPrompt() {
|
|
106
|
+
return CRITIC_SYSTEM_PROMPT;
|
|
107
|
+
},
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Build an evaluation prompt
|
|
111
|
+
* @param {Object} data - Prompt data
|
|
112
|
+
* @returns {string} Filled prompt
|
|
113
|
+
*/
|
|
114
|
+
buildPrompt(data) {
|
|
115
|
+
return buildEvaluationPrompt(data);
|
|
116
|
+
},
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Parse a critic response
|
|
120
|
+
* @param {string} response - LLM response
|
|
121
|
+
* @returns {Object} Parsed verdict
|
|
122
|
+
*/
|
|
123
|
+
parseResponse(response) {
|
|
124
|
+
return parseCriticResponse(response);
|
|
125
|
+
},
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Quick rule-based evaluation (no LLM)
|
|
129
|
+
* @param {Object} params - Evaluation params
|
|
130
|
+
* @returns {Object} Verdict
|
|
131
|
+
*/
|
|
132
|
+
quickEvaluate(params) {
|
|
133
|
+
return defaultJudge.ruleBasedEvaluation(params);
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
module.exports = {
|
|
138
|
+
critic,
|
|
139
|
+
CriticJudge,
|
|
140
|
+
createJudge,
|
|
141
|
+
defaultJudge,
|
|
142
|
+
// Prompt exports
|
|
143
|
+
CRITIC_SYSTEM_PROMPT,
|
|
144
|
+
EVALUATION_PROMPT_TEMPLATE,
|
|
145
|
+
VAGUENESS_CHECK_PROMPT,
|
|
146
|
+
ASSUMPTION_VERIFICATION_PROMPT,
|
|
147
|
+
buildEvaluationPrompt,
|
|
148
|
+
buildVaguenessPrompt,
|
|
149
|
+
buildVerificationPrompt,
|
|
150
|
+
parseCriticResponse,
|
|
151
|
+
};
|