@vibecheckai/cli 3.1.8 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/bin/registry.js +106 -116
  2. package/bin/runners/context/generators/mcp.js +18 -0
  3. package/bin/runners/context/index.js +72 -4
  4. package/bin/runners/context/proof-context.js +293 -1
  5. package/bin/runners/context/security-scanner.js +311 -73
  6. package/bin/runners/lib/analyzers.js +607 -20
  7. package/bin/runners/lib/detectors-v2.js +172 -15
  8. package/bin/runners/lib/entitlements-v2.js +48 -1
  9. package/bin/runners/lib/evidence-pack.js +678 -0
  10. package/bin/runners/lib/html-proof-report.js +913 -0
  11. package/bin/runners/lib/missions/plan.js +231 -41
  12. package/bin/runners/lib/missions/templates.js +125 -0
  13. package/bin/runners/lib/scan-output.js +492 -253
  14. package/bin/runners/lib/ship-output.js +901 -641
  15. package/bin/runners/runCheckpoint.js +44 -3
  16. package/bin/runners/runContext.d.ts +4 -0
  17. package/bin/runners/runDoctor.js +10 -2
  18. package/bin/runners/runFix.js +51 -341
  19. package/bin/runners/runInit.js +11 -0
  20. package/bin/runners/runPolish.d.ts +4 -0
  21. package/bin/runners/runPolish.js +608 -29
  22. package/bin/runners/runProve.js +210 -25
  23. package/bin/runners/runReality.js +846 -101
  24. package/bin/runners/runScan.js +238 -4
  25. package/bin/runners/runShip.js +19 -3
  26. package/bin/runners/runWatch.js +14 -1
  27. package/bin/vibecheck.js +32 -2
  28. package/mcp-server/consolidated-tools.js +408 -42
  29. package/mcp-server/index.js +152 -15
  30. package/mcp-server/proof-tools.js +571 -0
  31. package/mcp-server/tier-auth.js +22 -19
  32. package/mcp-server/tools-v3.js +744 -0
  33. package/mcp-server/truth-firewall-tools.js +190 -4
  34. package/package.json +3 -1
  35. package/bin/runners/runInstall.js +0 -281
  36. package/bin/runners/runLabs.js +0 -341
@@ -1,25 +1,73 @@
1
1
  /**
2
2
  * Vibecheck MCP Consolidated Tools
3
3
  *
4
- * Reduced from 50+ tools to 15 focused tools that map to CLI commands.
4
+ * Reduced from 50+ tools to a curated set for agents.
5
5
  * Each tool returns evidence-backed responses with file/line citations.
6
6
  *
7
7
  * Tool Categories:
8
8
  * 1. Core Commands (5) - ship, scan, fix, verify, ctx
9
9
  * 2. Truth Queries (5) - truthpack, routes, env, auth, billing
10
10
  * 3. Evidence (3) - validate_claim, evidence, proof_graph
11
- * 4. Utilities (2) - status, doctor
11
+ * 4. Proof Artifacts (2) - evidence_pack, allowlist
12
+ * 5. Utilities (2) - status, doctor
13
+ *
14
+ * Response Format (Standardized):
15
+ * {
16
+ * ok: boolean,
17
+ * data: any,
18
+ * evidence: Array<{ file, line, snippet, confidence }>,
19
+ * metadata: { timestamp, cached, projectFingerprint }
20
+ * }
12
21
  */
13
22
 
14
23
  import { execSync, spawn } from 'child_process';
15
24
  import fs from 'fs';
16
25
  import path from 'path';
26
+ import { handleTruthFirewallTool } from './truth-firewall-tools.js';
27
+ import { PROOF_TOOLS, handleProofTool } from './proof-tools.js';
17
28
 
18
29
  // ============================================================================
19
- // TOOL DEFINITIONS (15 Core Tools)
30
+ // STANDARDIZED RESPONSE WRAPPER
20
31
  // ============================================================================
21
32
 
22
- export const CONSOLIDATED_TOOLS = [
33
+ function wrapResponse(data, options = {}) {
34
+ const { evidence = [], cached = false, error = null } = options;
35
+
36
+ if (error) {
37
+ return {
38
+ ok: false,
39
+ error: typeof error === 'string' ? error : error.message,
40
+ data: null,
41
+ evidence: [],
42
+ metadata: {
43
+ timestamp: new Date().toISOString(),
44
+ cached: false
45
+ }
46
+ };
47
+ }
48
+
49
+ return {
50
+ ok: true,
51
+ data,
52
+ evidence: evidence.map(e => ({
53
+ file: e.file || null,
54
+ line: e.line || e.lines || null,
55
+ snippet: e.snippet || e.code || null,
56
+ confidence: e.confidence || 0.9,
57
+ reason: e.reason || null
58
+ })),
59
+ metadata: {
60
+ timestamp: new Date().toISOString(),
61
+ cached
62
+ }
63
+ };
64
+ }
65
+
66
+ // ============================================================================
67
+ // TOOL DEFINITIONS (Curated for Agents)
68
+ // ============================================================================
69
+
70
+ const ALL_TOOLS = [
23
71
  // === CORE COMMANDS (5) ===
24
72
  {
25
73
  name: "vibecheck.ship",
@@ -159,6 +207,57 @@ export const CONSOLIDATED_TOOLS = [
159
207
  required: ["claim"]
160
208
  }
161
209
  },
210
+ {
211
+ name: "vibecheck.compile_context",
212
+ description: "Get task-focused context with invariants and policy controls.",
213
+ inputSchema: {
214
+ type: "object",
215
+ properties: {
216
+ projectPath: { type: "string", description: "Path to project root", default: "." },
217
+ task: { type: "string", description: "Task description" },
218
+ policy: { type: "string", enum: ["strict", "balanced", "permissive"], default: "strict" }
219
+ },
220
+ required: ["task"]
221
+ }
222
+ },
223
+ {
224
+ name: "vibecheck.search_evidence",
225
+ description: "Search for evidence with file/line citations.",
226
+ inputSchema: {
227
+ type: "object",
228
+ properties: {
229
+ projectPath: { type: "string", description: "Path to project root", default: "." },
230
+ query: { type: "string", description: "What to search for" },
231
+ type: { type: "string", enum: ["route", "handler", "middleware", "component", "env_var", "model", "any"], default: "any" },
232
+ limit: { type: "number", default: 10 }
233
+ },
234
+ required: ["query"]
235
+ }
236
+ },
237
+ {
238
+ name: "vibecheck.find_counterexamples",
239
+ description: "Find counterexamples that falsify auth/billing claims.",
240
+ inputSchema: {
241
+ type: "object",
242
+ properties: {
243
+ projectPath: { type: "string", description: "Path to project root", default: "." },
244
+ claim: { type: "string", enum: ["auth_enforced", "billing_gate_exists", "route_guarded", "no_bypass"] },
245
+ subject: { type: "object" }
246
+ },
247
+ required: ["claim", "subject"]
248
+ }
249
+ },
250
+ {
251
+ name: "vibecheck.check_invariants",
252
+ description: "Check invariants (auth/billing/ux/api) for ship-killers.",
253
+ inputSchema: {
254
+ type: "object",
255
+ properties: {
256
+ projectPath: { type: "string", description: "Path to project root", default: "." },
257
+ category: { type: "string", enum: ["all", "auth", "billing", "security", "ux", "api"], default: "all" }
258
+ }
259
+ }
260
+ },
162
261
  {
163
262
  name: "vibecheck.get_evidence",
164
263
  description: "Get file/line evidence for a specific finding or claim.",
@@ -240,6 +339,42 @@ export const CONSOLIDATED_TOOLS = [
240
339
  }
241
340
  },
242
341
 
342
+ // === PROOF ARTIFACTS (2) ===
343
+ {
344
+ name: "vibecheck.evidence_pack",
345
+ description: "Build shareable evidence pack with videos, traces, screenshots. Returns zip path.",
346
+ inputSchema: {
347
+ type: "object",
348
+ properties: {
349
+ projectPath: { type: "string", description: "Path to project root", default: "." },
350
+ includeVideos: { type: "boolean", description: "Include recorded videos", default: true },
351
+ includeTraces: { type: "boolean", description: "Include Playwright traces", default: true },
352
+ includeScreenshots: { type: "boolean", description: "Include screenshots", default: true },
353
+ applyAllowlist: { type: "boolean", description: "Filter by allowlist", default: true }
354
+ }
355
+ }
356
+ },
357
+ {
358
+ name: "vibecheck.allowlist",
359
+ description: "Manage finding allowlist. Add entries to suppress known false positives.",
360
+ inputSchema: {
361
+ type: "object",
362
+ properties: {
363
+ projectPath: { type: "string", description: "Path to project root", default: "." },
364
+ action: {
365
+ type: "string",
366
+ enum: ["list", "add", "remove", "check"],
367
+ description: "Action to perform",
368
+ default: "list"
369
+ },
370
+ findingId: { type: "string", description: "Finding ID to add/remove (for add/remove)" },
371
+ pattern: { type: "string", description: "Pattern to match (for add)" },
372
+ reason: { type: "string", description: "Reason for allowlisting (for add)" },
373
+ scope: { type: "string", enum: ["global", "file", "line"], default: "global" }
374
+ }
375
+ }
376
+ },
377
+
243
378
  // === UTILITIES (2) ===
244
379
  {
245
380
  name: "vibecheck.status",
@@ -260,9 +395,62 @@ export const CONSOLIDATED_TOOLS = [
260
395
  projectPath: { type: "string", description: "Path to project root", default: "." }
261
396
  }
262
397
  }
398
+ },
399
+
400
+ // === RUNTIME (1) ===
401
+ {
402
+ name: "vibecheck.reality",
403
+ description: "Run runtime browser verification with optional video/trace recording.",
404
+ inputSchema: {
405
+ type: "object",
406
+ properties: {
407
+ url: { type: "string", description: "Base URL to verify (required)" },
408
+ projectPath: { type: "string", description: "Path to project root", default: "." },
409
+ auth: { type: "string", description: "Login credentials as email:password" },
410
+ recordVideo: { type: "boolean", description: "Record video of session", default: false },
411
+ recordTrace: { type: "boolean", description: "Record Playwright trace", default: false },
412
+ maxPages: { type: "number", description: "Max pages to crawl", default: 18 }
413
+ },
414
+ required: ["url"]
415
+ }
263
416
  }
264
417
  ];
265
418
 
419
+ const ALLOWED_TOOL_NAMES = new Set([
420
+ // Core
421
+ "vibecheck.ship",
422
+ "vibecheck.scan",
423
+ "vibecheck.ctx",
424
+ // Truth
425
+ "vibecheck.get_truthpack",
426
+ "vibecheck.validate_claim",
427
+ "vibecheck.compile_context",
428
+ "vibecheck.search_evidence",
429
+ "vibecheck.find_counterexamples",
430
+ "vibecheck.check_invariants",
431
+ // Proof artifacts
432
+ "vibecheck.evidence_pack",
433
+ "vibecheck.allowlist",
434
+ // Runtime
435
+ "vibecheck.reality",
436
+ // Proof tools (new)
437
+ "vibecheck.prove",
438
+ "vibecheck.prove_status",
439
+ "vibecheck.get_evidence",
440
+ "vibecheck.check_flaky",
441
+ "vibecheck.allowlist_add",
442
+ "vibecheck.get_proof_graph",
443
+ // Utilities
444
+ "vibecheck.status",
445
+ ]);
446
+
447
+ // Combine base tools with proof tools
448
+ const COMBINED_TOOLS = [...ALL_TOOLS, ...PROOF_TOOLS];
449
+
450
+ export const CONSOLIDATED_TOOLS = COMBINED_TOOLS.filter((tool) =>
451
+ ALLOWED_TOOL_NAMES.has(tool.name),
452
+ );
453
+
266
454
  // ============================================================================
267
455
  // TOOL HANDLERS
268
456
  // ============================================================================
@@ -273,54 +461,46 @@ export async function handleConsolidatedTool(name, args) {
273
461
  switch (name) {
274
462
  // Core Commands
275
463
  case "vibecheck.ship":
276
- return await runCliCommand("ship", projectPath, args);
464
+ return wrapResponse(await runCliCommand("ship", projectPath, args));
277
465
  case "vibecheck.scan":
278
- return await runCliCommand("scan", projectPath, args);
279
- case "vibecheck.fix":
280
- return await runCliCommand("fix", projectPath, args);
281
- case "vibecheck.verify":
282
- return await runCliCommand("verify", projectPath, args);
466
+ return wrapResponse(await runCliCommand("scan", projectPath, args));
283
467
  case "vibecheck.ctx":
284
- return await runCliCommand("ctx", projectPath, args);
285
-
286
- // Truth Queries
468
+ return wrapResponse(await runCliCommand("ctx", projectPath, args));
469
+
470
+ // Truth Firewall / Evidence
287
471
  case "vibecheck.get_truthpack":
288
- return await getTruthpack(projectPath, args.refresh);
289
- case "vibecheck.get_routes":
290
- return await getRoutes(projectPath, args.type);
291
- case "vibecheck.get_env":
292
- return await getEnv(projectPath);
293
- case "vibecheck.get_auth":
294
- return await getAuth(projectPath);
295
- case "vibecheck.get_billing":
296
- return await getBilling(projectPath);
297
-
298
- // Evidence
299
472
  case "vibecheck.validate_claim":
300
- return await validateClaim(projectPath, args.claim, args.type);
473
+ case "vibecheck.compile_context":
474
+ case "vibecheck.search_evidence":
475
+ case "vibecheck.find_counterexamples":
476
+ case "vibecheck.check_invariants":
477
+ return await handleTruthFirewallTool(name, args, projectPath);
478
+
479
+ // Proof Artifacts
480
+ case "vibecheck.evidence_pack":
481
+ return await handleEvidencePack(projectPath, args);
482
+ case "vibecheck.allowlist":
483
+ return await handleAllowlist(projectPath, args);
484
+
485
+ // Runtime
486
+ case "vibecheck.reality":
487
+ return await handleReality(projectPath, args);
488
+
489
+ // Proof Tools
490
+ case "vibecheck.prove":
491
+ case "vibecheck.prove_status":
301
492
  case "vibecheck.get_evidence":
302
- return await getEvidence(projectPath, args);
493
+ case "vibecheck.check_flaky":
494
+ case "vibecheck.allowlist_add":
303
495
  case "vibecheck.get_proof_graph":
304
- return await getProofGraph(projectPath);
496
+ return await handleProofTool(name, { ...args, projectPath });
305
497
 
306
498
  // Utilities
307
499
  case "vibecheck.status":
308
- return await getStatus(projectPath);
309
- case "vibecheck.doctor":
310
- return await runCliCommand("doctor", projectPath, args);
311
-
312
- // Spec-required tools
313
- case "vibecheck.get_contracts":
314
- return await getContracts(projectPath);
315
- case "vibecheck.validate_plan":
316
- return await validatePlan(projectPath, args.plan);
317
- case "vibecheck.share":
318
- return await buildShare(projectPath, args.missionDir);
319
- case "vibecheck.pr_comment":
320
- return await renderPRComment(projectPath, args.maxFindings);
500
+ return wrapResponse(await getStatus(projectPath));
321
501
 
322
502
  default:
323
- return { error: `Unknown tool: ${name}`, available: CONSOLIDATED_TOOLS.map(t => t.name) };
503
+ return wrapResponse(null, { error: `Unknown tool: ${name}. Available: ${CONSOLIDATED_TOOLS.map(t => t.name).join(', ')}` });
324
504
  }
325
505
  }
326
506
 
@@ -801,4 +981,190 @@ async function renderPRComment(projectPath, maxFindings = 12) {
801
981
  }
802
982
  }
803
983
 
804
- export default { CONSOLIDATED_TOOLS, handleConsolidatedTool };
984
+ // ============================================================================
985
+ // PROOF ARTIFACT HANDLERS
986
+ // ============================================================================
987
+
988
+ async function handleEvidencePack(projectPath, args) {
989
+ try {
990
+ // Try to import the evidence-pack module
991
+ const evidencePackPath = path.join(path.dirname(new URL(import.meta.url).pathname), '..', 'bin', 'runners', 'lib', 'evidence-pack.js');
992
+
993
+ // For Windows, fix the path
994
+ const normalizedPath = process.platform === 'win32'
995
+ ? evidencePackPath.replace(/^\/([A-Za-z]):/, '$1:')
996
+ : evidencePackPath;
997
+
998
+ let evidencePack;
999
+ try {
1000
+ evidencePack = await import(`file://${normalizedPath}`);
1001
+ } catch {
1002
+ // Fall back to CLI command
1003
+ return wrapResponse(await runCliCommand("evidence-pack", projectPath, args));
1004
+ }
1005
+
1006
+ const pack = await evidencePack.buildEvidencePack(projectPath, {
1007
+ includeVideos: args.includeVideos !== false,
1008
+ includeTraces: args.includeTraces !== false,
1009
+ includeScreenshots: args.includeScreenshots !== false,
1010
+ applyAllowlist: args.applyAllowlist !== false
1011
+ });
1012
+
1013
+ return wrapResponse({
1014
+ packId: pack.id,
1015
+ manifestPath: pack.manifestPath,
1016
+ zipPath: pack.zipPath,
1017
+ summary: pack.summary
1018
+ }, {
1019
+ evidence: pack.manifest.findings.slice(0, 5).map(f => ({
1020
+ file: f.where?.file,
1021
+ line: f.where?.line,
1022
+ snippet: f.what,
1023
+ confidence: f.confidence
1024
+ }))
1025
+ });
1026
+ } catch (error) {
1027
+ return wrapResponse(null, { error: error.message });
1028
+ }
1029
+ }
1030
+
1031
+ async function handleAllowlist(projectPath, args) {
1032
+ try {
1033
+ const evidencePackPath = path.join(path.dirname(new URL(import.meta.url).pathname), '..', 'bin', 'runners', 'lib', 'evidence-pack.js');
1034
+ const normalizedPath = process.platform === 'win32'
1035
+ ? evidencePackPath.replace(/^\/([A-Za-z]):/, '$1:')
1036
+ : evidencePackPath;
1037
+
1038
+ let evidencePack;
1039
+ try {
1040
+ evidencePack = await import(`file://${normalizedPath}`);
1041
+ } catch {
1042
+ return wrapResponse(null, { error: 'Evidence pack module not available' });
1043
+ }
1044
+
1045
+ const action = args.action || 'list';
1046
+
1047
+ switch (action) {
1048
+ case 'list': {
1049
+ const allowlist = evidencePack.loadAllowlist(projectPath);
1050
+ return wrapResponse({
1051
+ entries: allowlist.entries || [],
1052
+ lastUpdated: allowlist.lastUpdated
1053
+ });
1054
+ }
1055
+
1056
+ case 'add': {
1057
+ if (!args.findingId && !args.pattern) {
1058
+ return wrapResponse(null, { error: 'Either findingId or pattern is required' });
1059
+ }
1060
+
1061
+ const entry = evidencePack.addToAllowlist(projectPath, {
1062
+ findingId: args.findingId,
1063
+ pattern: args.pattern,
1064
+ reason: args.reason || 'Added via MCP tool',
1065
+ scope: args.scope || 'global',
1066
+ addedBy: 'mcp'
1067
+ });
1068
+
1069
+ return wrapResponse({
1070
+ added: entry,
1071
+ message: `Added allowlist entry: ${entry.id}`
1072
+ });
1073
+ }
1074
+
1075
+ case 'remove': {
1076
+ if (!args.findingId) {
1077
+ return wrapResponse(null, { error: 'findingId is required for remove action' });
1078
+ }
1079
+
1080
+ const allowlist = evidencePack.loadAllowlist(projectPath);
1081
+ const before = allowlist.entries.length;
1082
+ allowlist.entries = allowlist.entries.filter(e => e.id !== args.findingId && e.findingId !== args.findingId);
1083
+ evidencePack.saveAllowlist(projectPath, allowlist);
1084
+
1085
+ return wrapResponse({
1086
+ removed: before - allowlist.entries.length,
1087
+ remaining: allowlist.entries.length
1088
+ });
1089
+ }
1090
+
1091
+ case 'check': {
1092
+ if (!args.findingId) {
1093
+ return wrapResponse(null, { error: 'findingId is required for check action' });
1094
+ }
1095
+
1096
+ const allowlist = evidencePack.loadAllowlist(projectPath);
1097
+ const result = evidencePack.isAllowlisted({ id: args.findingId }, allowlist);
1098
+
1099
+ return wrapResponse({
1100
+ allowed: result.allowed,
1101
+ reason: result.reason,
1102
+ entryId: result.entry?.id
1103
+ });
1104
+ }
1105
+
1106
+ default:
1107
+ return wrapResponse(null, { error: `Unknown action: ${action}` });
1108
+ }
1109
+ } catch (error) {
1110
+ return wrapResponse(null, { error: error.message });
1111
+ }
1112
+ }
1113
+
1114
+ async function handleReality(projectPath, args) {
1115
+ try {
1116
+ if (!args.url) {
1117
+ return wrapResponse(null, { error: 'url is required' });
1118
+ }
1119
+
1120
+ const cmdArgs = {
1121
+ url: args.url,
1122
+ projectPath,
1123
+ json: true
1124
+ };
1125
+
1126
+ if (args.auth) cmdArgs.auth = args.auth;
1127
+ if (args.recordVideo) cmdArgs['--video'] = true;
1128
+ if (args.recordTrace) cmdArgs['--trace'] = true;
1129
+ if (args.maxPages) cmdArgs.maxPages = args.maxPages;
1130
+
1131
+ // Build CLI command
1132
+ const binPath = path.join(path.dirname(new URL(import.meta.url).pathname), '..', 'bin', 'vibecheck.js');
1133
+ const normalizedBinPath = process.platform === 'win32'
1134
+ ? binPath.replace(/^\/([A-Za-z]):/, '$1:')
1135
+ : binPath;
1136
+
1137
+ let cliArgs = ['reality', '--url', args.url, '--json'];
1138
+ if (args.auth) cliArgs.push('--auth', args.auth);
1139
+ if (args.recordVideo) cliArgs.push('--video');
1140
+ if (args.recordTrace) cliArgs.push('--trace');
1141
+ if (args.maxPages) cliArgs.push('--max-pages', String(args.maxPages));
1142
+
1143
+ const result = execSync(`node "${normalizedBinPath}" ${cliArgs.join(' ')}`, {
1144
+ cwd: projectPath,
1145
+ encoding: 'utf8',
1146
+ timeout: 300000 // 5 minutes for runtime tests
1147
+ });
1148
+
1149
+ try {
1150
+ const parsed = JSON.parse(result);
1151
+ return wrapResponse(parsed, {
1152
+ evidence: (parsed.findings || []).slice(0, 5).map(f => ({
1153
+ file: f.file,
1154
+ line: f.line,
1155
+ snippet: f.title,
1156
+ confidence: f.confidence || 0.8
1157
+ }))
1158
+ });
1159
+ } catch {
1160
+ return wrapResponse({ output: result });
1161
+ }
1162
+ } catch (error) {
1163
+ return wrapResponse(null, {
1164
+ error: error.message,
1165
+ output: error.stdout || error.stderr
1166
+ });
1167
+ }
1168
+ }
1169
+
1170
+ export default { CONSOLIDATED_TOOLS, handleConsolidatedTool, wrapResponse };