@vibecheckai/cli 3.2.6 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/bin/registry.js +192 -5
  2. package/bin/runners/lib/agent-firewall/change-packet/builder.js +280 -6
  3. package/bin/runners/lib/agent-firewall/critic/index.js +151 -0
  4. package/bin/runners/lib/agent-firewall/critic/judge.js +432 -0
  5. package/bin/runners/lib/agent-firewall/critic/prompts.js +305 -0
  6. package/bin/runners/lib/agent-firewall/lawbook/distributor.js +465 -0
  7. package/bin/runners/lib/agent-firewall/lawbook/evaluator.js +604 -0
  8. package/bin/runners/lib/agent-firewall/lawbook/index.js +304 -0
  9. package/bin/runners/lib/agent-firewall/lawbook/registry.js +514 -0
  10. package/bin/runners/lib/agent-firewall/lawbook/schema.js +420 -0
  11. package/bin/runners/lib/agent-firewall/logger.js +141 -0
  12. package/bin/runners/lib/agent-firewall/policy/loader.js +312 -4
  13. package/bin/runners/lib/agent-firewall/policy/rules/ghost-env.js +113 -1
  14. package/bin/runners/lib/agent-firewall/policy/rules/ghost-route.js +133 -6
  15. package/bin/runners/lib/agent-firewall/proposal/extractor.js +394 -0
  16. package/bin/runners/lib/agent-firewall/proposal/index.js +212 -0
  17. package/bin/runners/lib/agent-firewall/proposal/schema.js +251 -0
  18. package/bin/runners/lib/agent-firewall/proposal/validator.js +386 -0
  19. package/bin/runners/lib/agent-firewall/reality/index.js +332 -0
  20. package/bin/runners/lib/agent-firewall/reality/state.js +625 -0
  21. package/bin/runners/lib/agent-firewall/reality/watcher.js +322 -0
  22. package/bin/runners/lib/agent-firewall/risk/index.js +173 -0
  23. package/bin/runners/lib/agent-firewall/risk/scorer.js +328 -0
  24. package/bin/runners/lib/agent-firewall/risk/thresholds.js +321 -0
  25. package/bin/runners/lib/agent-firewall/risk/vectors.js +421 -0
  26. package/bin/runners/lib/agent-firewall/simulator/diff-simulator.js +472 -0
  27. package/bin/runners/lib/agent-firewall/simulator/import-resolver.js +346 -0
  28. package/bin/runners/lib/agent-firewall/simulator/index.js +181 -0
  29. package/bin/runners/lib/agent-firewall/simulator/route-validator.js +380 -0
  30. package/bin/runners/lib/agent-firewall/time-machine/incident-correlator.js +661 -0
  31. package/bin/runners/lib/agent-firewall/time-machine/index.js +267 -0
  32. package/bin/runners/lib/agent-firewall/time-machine/replay-engine.js +436 -0
  33. package/bin/runners/lib/agent-firewall/time-machine/state-reconstructor.js +490 -0
  34. package/bin/runners/lib/agent-firewall/time-machine/timeline-builder.js +530 -0
  35. package/bin/runners/lib/analyzers.js +81 -18
  36. package/bin/runners/lib/authority-badge.js +425 -0
  37. package/bin/runners/lib/cli-output.js +7 -1
  38. package/bin/runners/lib/error-handler.js +16 -9
  39. package/bin/runners/lib/exit-codes.js +275 -0
  40. package/bin/runners/lib/global-flags.js +37 -0
  41. package/bin/runners/lib/help-formatter.js +413 -0
  42. package/bin/runners/lib/logger.js +38 -0
  43. package/bin/runners/lib/unified-cli-output.js +604 -0
  44. package/bin/runners/lib/upsell.js +148 -0
  45. package/bin/runners/runApprove.js +1200 -0
  46. package/bin/runners/runAuth.js +324 -95
  47. package/bin/runners/runCheckpoint.js +39 -21
  48. package/bin/runners/runClassify.js +859 -0
  49. package/bin/runners/runContext.js +136 -24
  50. package/bin/runners/runDoctor.js +108 -68
  51. package/bin/runners/runFix.js +6 -5
  52. package/bin/runners/runGuard.js +212 -118
  53. package/bin/runners/runInit.js +3 -2
  54. package/bin/runners/runMcp.js +130 -52
  55. package/bin/runners/runPolish.js +43 -20
  56. package/bin/runners/runProve.js +1 -2
  57. package/bin/runners/runReport.js +3 -2
  58. package/bin/runners/runScan.js +63 -44
  59. package/bin/runners/runShip.js +3 -4
  60. package/bin/runners/runValidate.js +19 -2
  61. package/bin/runners/runWatch.js +104 -53
  62. package/bin/vibecheck.js +106 -19
  63. package/mcp-server/HARDENING_SUMMARY.md +299 -0
  64. package/mcp-server/agent-firewall-interceptor.js +367 -31
  65. package/mcp-server/authority-tools.js +569 -0
  66. package/mcp-server/conductor/conflict-resolver.js +588 -0
  67. package/mcp-server/conductor/execution-planner.js +544 -0
  68. package/mcp-server/conductor/index.js +377 -0
  69. package/mcp-server/conductor/lock-manager.js +615 -0
  70. package/mcp-server/conductor/request-queue.js +550 -0
  71. package/mcp-server/conductor/session-manager.js +500 -0
  72. package/mcp-server/conductor/tools.js +510 -0
  73. package/mcp-server/index.js +1149 -243
  74. package/mcp-server/lib/{api-client.js → api-client.cjs} +40 -4
  75. package/mcp-server/lib/logger.cjs +30 -0
  76. package/mcp-server/logger.js +173 -0
  77. package/mcp-server/package.json +2 -2
  78. package/mcp-server/premium-tools.js +2 -2
  79. package/mcp-server/tier-auth.js +245 -35
  80. package/mcp-server/truth-firewall-tools.js +145 -15
  81. package/mcp-server/vibecheck-tools.js +2 -2
  82. package/package.json +2 -3
  83. package/mcp-server/index.old.js +0 -4137
  84. package/mcp-server/package-lock.json +0 -165
@@ -0,0 +1,432 @@
1
+ /**
2
+ * Critic LLM Judge
3
+ *
4
+ * The "savage" judge that evaluates proposal quality.
5
+ * Detects hand-waving, vague intent, and unverified assumptions.
6
+ *
7
+ * Philosophy: "If this change cannot be proven safe by the repository, block it."
8
+ */
9
+
10
+ "use strict";
11
+
12
+ const {
13
+ CRITIC_SYSTEM_PROMPT,
14
+ buildEvaluationPrompt,
15
+ buildVaguenessPrompt,
16
+ buildVerificationPrompt,
17
+ parseCriticResponse,
18
+ } = require("./prompts");
19
+
20
+ /**
21
+ * @typedef {Object} CriticVerdict
22
+ * @property {string} verdict - ALLOW, BLOCK, or REQUIRE_CONFIRMATION
23
+ * @property {number} confidence - Confidence in verdict (0-1)
24
+ * @property {Array} reasoning - Reasons for the verdict
25
+ * @property {Array} violations - Specific violations found
26
+ * @property {Array} recommendations - Suggestions for improvement
27
+ */
28
+
29
+ /**
30
+ * Default LLM client configuration
31
+ */
32
+ const DEFAULT_CONFIG = {
33
+ model: "gpt-4-turbo-preview",
34
+ temperature: 0.1, // Low temperature for consistent judgments
35
+ maxTokens: 1000,
36
+ timeout: 30000,
37
+ };
38
+
39
+ /**
40
+ * Critic Judge class
41
+ */
42
+ class CriticJudge {
43
+ constructor(options = {}) {
44
+ this.config = { ...DEFAULT_CONFIG, ...options };
45
+ this.llmClient = options.llmClient || null;
46
+ this.enabled = options.enabled !== false;
47
+ this.fallbackMode = options.fallbackMode || "conservative";
48
+ }
49
+
50
+ /**
51
+ * Set the LLM client
52
+ * @param {Function} client - LLM client function
53
+ */
54
+ setClient(client) {
55
+ this.llmClient = client;
56
+ }
57
+
58
+ /**
59
+ * Check if critic is available
60
+ * @returns {boolean} Is available
61
+ */
62
+ isAvailable() {
63
+ return this.enabled && this.llmClient !== null;
64
+ }
65
+
66
+ /**
67
+ * Evaluate a proposal
68
+ * @param {Object} params - Evaluation parameters
69
+ * @returns {Promise<CriticVerdict>} Critic verdict
70
+ */
71
+ async evaluate(params) {
72
+ const {
73
+ proposal,
74
+ validationResults = {},
75
+ riskScore = {},
76
+ simulationResult = {},
77
+ realityState = {},
78
+ } = params;
79
+
80
+ // If critic is disabled, use rule-based evaluation
81
+ if (!this.isAvailable()) {
82
+ return this.ruleBasedEvaluation(params);
83
+ }
84
+
85
+ try {
86
+ // Build the evaluation prompt
87
+ const prompt = buildEvaluationPrompt({
88
+ proposal,
89
+ validationResults,
90
+ riskScore,
91
+ simulationResult,
92
+ realityState,
93
+ });
94
+
95
+ // Call LLM
96
+ const response = await this.callLLM(prompt);
97
+
98
+ // Parse response
99
+ const verdict = parseCriticResponse(response);
100
+
101
+ // Validate verdict
102
+ return this.validateVerdict(verdict);
103
+ } catch (error) {
104
+ console.warn(`Critic LLM evaluation failed: ${error.message}`);
105
+
106
+ // Fall back to rule-based evaluation
107
+ if (this.fallbackMode === "conservative") {
108
+ return this.ruleBasedEvaluation(params);
109
+ } else {
110
+ return {
111
+ verdict: "ALLOW",
112
+ confidence: 0.3,
113
+ reasoning: ["Critic unavailable, using permissive fallback"],
114
+ violations: [],
115
+ recommendations: ["Consider manual review"],
116
+ };
117
+ }
118
+ }
119
+ }
120
+
121
+ /**
122
+ * Check for vagueness in proposal
123
+ * @param {Object} proposal - Proposal to check
124
+ * @returns {Promise<Object>} Vagueness analysis
125
+ */
126
+ async checkVagueness(proposal) {
127
+ if (!this.isAvailable()) {
128
+ return this.ruleBasedVaguenessCheck(proposal);
129
+ }
130
+
131
+ try {
132
+ const prompt = buildVaguenessPrompt(proposal);
133
+ const response = await this.callLLM(prompt);
134
+ return parseCriticResponse(response);
135
+ } catch (error) {
136
+ return this.ruleBasedVaguenessCheck(proposal);
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Verify assumptions against reality
142
+ * @param {Array} assumptions - Assumptions to verify
143
+ * @param {Object} realityState - Repository state
144
+ * @returns {Promise<Object>} Verification results
145
+ */
146
+ async verifyAssumptions(assumptions, realityState) {
147
+ if (!this.isAvailable()) {
148
+ return this.ruleBasedAssumptionVerification(assumptions, realityState);
149
+ }
150
+
151
+ try {
152
+ const prompt = buildVerificationPrompt(assumptions, realityState);
153
+ const response = await this.callLLM(prompt);
154
+ return parseCriticResponse(response);
155
+ } catch (error) {
156
+ return this.ruleBasedAssumptionVerification(assumptions, realityState);
157
+ }
158
+ }
159
+
160
+ /**
161
+ * Call the LLM
162
+ * @param {string} prompt - User prompt
163
+ * @returns {Promise<string>} LLM response
164
+ */
165
+ async callLLM(prompt) {
166
+ if (!this.llmClient) {
167
+ throw new Error("LLM client not configured");
168
+ }
169
+
170
+ return this.llmClient({
171
+ systemPrompt: CRITIC_SYSTEM_PROMPT,
172
+ userPrompt: prompt,
173
+ model: this.config.model,
174
+ temperature: this.config.temperature,
175
+ maxTokens: this.config.maxTokens,
176
+ });
177
+ }
178
+
179
+ /**
180
+ * Validate and normalize verdict
181
+ * @param {Object} verdict - Raw verdict
182
+ * @returns {CriticVerdict} Validated verdict
183
+ */
184
+ validateVerdict(verdict) {
185
+ const validVerdicts = ["ALLOW", "BLOCK", "REQUIRE_CONFIRMATION"];
186
+
187
+ return {
188
+ verdict: validVerdicts.includes(verdict.verdict) ? verdict.verdict : "BLOCK",
189
+ confidence: typeof verdict.confidence === "number"
190
+ ? Math.max(0, Math.min(1, verdict.confidence))
191
+ : 0.5,
192
+ reasoning: Array.isArray(verdict.reasoning) ? verdict.reasoning : [],
193
+ violations: Array.isArray(verdict.violations) ? verdict.violations : [],
194
+ recommendations: Array.isArray(verdict.recommendations) ? verdict.recommendations : [],
195
+ };
196
+ }
197
+
198
+ /**
199
+ * Rule-based evaluation fallback
200
+ * @param {Object} params - Evaluation parameters
201
+ * @returns {CriticVerdict} Verdict
202
+ */
203
+ ruleBasedEvaluation(params) {
204
+ const { proposal, validationResults, riskScore, simulationResult } = params;
205
+
206
+ const violations = [];
207
+ const reasoning = [];
208
+ let verdict = "ALLOW";
209
+ let confidence = 0.7;
210
+
211
+ // Check simulation result
212
+ if (simulationResult && !simulationResult.passed) {
213
+ violations.push("Simulation failed");
214
+ reasoning.push("Change would break imports or routes");
215
+ verdict = "BLOCK";
216
+ confidence = 0.9;
217
+ }
218
+
219
+ // Check risk score
220
+ if (riskScore?.total >= 80) {
221
+ violations.push(`High risk score: ${riskScore.total}`);
222
+ reasoning.push("Risk score exceeds safe threshold");
223
+ verdict = verdict === "BLOCK" ? "BLOCK" : "REQUIRE_CONFIRMATION";
224
+ confidence = Math.max(confidence, 0.8);
225
+ }
226
+
227
+ // Check unverified assumptions
228
+ if (validationResults?.invalid?.length > 0) {
229
+ for (const invalid of validationResults.invalid) {
230
+ violations.push(`Unverified assumption: ${invalid.assumption?.key || invalid.assumption?.type}`);
231
+ }
232
+ reasoning.push(`${validationResults.invalid.length} assumptions could not be verified`);
233
+ verdict = "BLOCK";
234
+ confidence = 0.85;
235
+ }
236
+
237
+ // Check vagueness
238
+ const vaguenessCheck = this.ruleBasedVaguenessCheck(proposal);
239
+ if (vaguenessCheck.specificityScore < 4) {
240
+ violations.push("Proposal is too vague");
241
+ reasoning.push(`Specificity score: ${vaguenessCheck.specificityScore}/10`);
242
+ if (verdict === "ALLOW") verdict = "REQUIRE_CONFIRMATION";
243
+ }
244
+
245
+ // Check for sensitive domains without explicit acknowledgment
246
+ const sensitiveDomains = ["auth", "payments", "database"];
247
+ const touchesSensitive = (proposal.operations || []).some(op => {
248
+ const path = (op.path || "").toLowerCase();
249
+ return sensitiveDomains.some(d => path.includes(d));
250
+ });
251
+
252
+ if (touchesSensitive && !proposal.riskAcknowledgment) {
253
+ violations.push("Touches sensitive domains without risk acknowledgment");
254
+ reasoning.push("Changes to auth/payments/database require explicit acknowledgment");
255
+ if (verdict === "ALLOW") verdict = "REQUIRE_CONFIRMATION";
256
+ }
257
+
258
+ // No violations = allow
259
+ if (violations.length === 0) {
260
+ reasoning.push("No violations detected");
261
+ }
262
+
263
+ return {
264
+ verdict,
265
+ confidence,
266
+ reasoning,
267
+ violations,
268
+ recommendations: violations.length > 0
269
+ ? ["Address violations before proceeding", "Add missing assumptions"]
270
+ : [],
271
+ };
272
+ }
273
+
274
+ /**
275
+ * Rule-based vagueness check
276
+ * @param {Object} proposal - Proposal to check
277
+ * @returns {Object} Vagueness analysis
278
+ */
279
+ ruleBasedVaguenessCheck(proposal) {
280
+ const vagueTerms = [];
281
+ let specificityScore = 10;
282
+
283
+ // Check intent
284
+ const vagueIntents = ["fix", "update", "change", "modify", "improve", "refactor", "adjust"];
285
+ const intentWords = (proposal.intent || "").toLowerCase().split("_");
286
+
287
+ if (intentWords.length === 1 && vagueIntents.includes(intentWords[0])) {
288
+ vagueTerms.push(proposal.intent);
289
+ specificityScore -= 3;
290
+ }
291
+
292
+ // Check summary
293
+ if (!proposal.summary) {
294
+ specificityScore -= 2;
295
+ } else if (proposal.summary.length < 20) {
296
+ specificityScore -= 1;
297
+ }
298
+
299
+ // Check assumptions
300
+ if (!proposal.assumptions || proposal.assumptions.length === 0) {
301
+ specificityScore -= 2;
302
+ }
303
+
304
+ // Check operation count vs explanation
305
+ const opCount = (proposal.operations || []).length;
306
+ if (opCount > 3 && (!proposal.summary || proposal.summary.length < 50)) {
307
+ specificityScore -= 2;
308
+ }
309
+
310
+ // Check for vague words in summary
311
+ const vagueWords = ["some", "various", "etc", "stuff", "things", "somehow"];
312
+ if (proposal.summary) {
313
+ for (const word of vagueWords) {
314
+ if (proposal.summary.toLowerCase().includes(word)) {
315
+ vagueTerms.push(word);
316
+ specificityScore -= 1;
317
+ }
318
+ }
319
+ }
320
+
321
+ specificityScore = Math.max(1, specificityScore);
322
+
323
+ return {
324
+ specificityScore,
325
+ vagueTerms,
326
+ suggestions: vagueTerms.length > 0
327
+ ? [`Replace vague terms: ${vagueTerms.join(", ")}`, "Add specific details"]
328
+ : [],
329
+ };
330
+ }
331
+
332
+ /**
333
+ * Rule-based assumption verification
334
+ * @param {Array} assumptions - Assumptions to verify
335
+ * @param {Object} realityState - Repository state
336
+ * @returns {Object} Verification results
337
+ */
338
+ ruleBasedAssumptionVerification(assumptions, realityState) {
339
+ const results = [];
340
+ let verifiedCount = 0;
341
+
342
+ for (const assumption of assumptions) {
343
+ let verified = false;
344
+ let evidence = null;
345
+ let reason = "";
346
+
347
+ switch (assumption.type) {
348
+ case "env":
349
+ if (realityState?.envVars?.has(assumption.key)) {
350
+ verified = true;
351
+ evidence = `Found in env vars: ${assumption.key}`;
352
+ } else {
353
+ reason = `Env var '${assumption.key}' not found in declared variables`;
354
+ }
355
+ break;
356
+
357
+ case "route":
358
+ const routeExists = realityState?.routes?.some(r =>
359
+ r.path === assumption.path &&
360
+ (r.method === assumption.method || !assumption.method)
361
+ );
362
+ if (routeExists) {
363
+ verified = true;
364
+ evidence = `Route ${assumption.method || "GET"} ${assumption.path} is registered`;
365
+ } else {
366
+ reason = `Route ${assumption.path} not found in registered routes`;
367
+ }
368
+ break;
369
+
370
+ case "service":
371
+ const serviceExists = realityState?.services?.some(s =>
372
+ s.name === assumption.key || s.name === assumption.name
373
+ );
374
+ if (serviceExists) {
375
+ verified = true;
376
+ evidence = `Service ${assumption.key || assumption.name} is registered`;
377
+ } else {
378
+ reason = `Service ${assumption.key || assumption.name} not found`;
379
+ }
380
+ break;
381
+
382
+ case "file":
383
+ if (realityState?.files?.has(assumption.path?.replace(/\\/g, "/"))) {
384
+ verified = true;
385
+ evidence = `File ${assumption.path} exists`;
386
+ } else {
387
+ reason = `File ${assumption.path} not found`;
388
+ }
389
+ break;
390
+
391
+ default:
392
+ reason = `Unknown assumption type: ${assumption.type}`;
393
+ }
394
+
395
+ if (verified) verifiedCount++;
396
+
397
+ results.push({
398
+ assumption: assumption.key || assumption.path || assumption.type,
399
+ verified,
400
+ evidence,
401
+ reason,
402
+ });
403
+ }
404
+
405
+ return {
406
+ results,
407
+ overallVerificationRate: assumptions.length > 0
408
+ ? verifiedCount / assumptions.length
409
+ : 1,
410
+ };
411
+ }
412
+ }
413
+
414
+ /**
415
+ * Create a critic judge instance
416
+ * @param {Object} options - Configuration options
417
+ * @returns {CriticJudge} Judge instance
418
+ */
419
+ function createJudge(options = {}) {
420
+ return new CriticJudge(options);
421
+ }
422
+
423
+ /**
424
+ * Default judge instance
425
+ */
426
+ const defaultJudge = createJudge();
427
+
428
+ module.exports = {
429
+ CriticJudge,
430
+ createJudge,
431
+ defaultJudge,
432
+ };