@vibecheckai/cli 3.1.8 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/bin/registry.js +106 -116
  2. package/bin/runners/context/generators/mcp.js +18 -0
  3. package/bin/runners/context/index.js +72 -4
  4. package/bin/runners/context/proof-context.js +293 -1
  5. package/bin/runners/context/security-scanner.js +311 -73
  6. package/bin/runners/lib/analyzers.js +607 -20
  7. package/bin/runners/lib/detectors-v2.js +172 -15
  8. package/bin/runners/lib/entitlements-v2.js +48 -1
  9. package/bin/runners/lib/evidence-pack.js +678 -0
  10. package/bin/runners/lib/html-proof-report.js +913 -0
  11. package/bin/runners/lib/missions/plan.js +231 -41
  12. package/bin/runners/lib/missions/templates.js +125 -0
  13. package/bin/runners/lib/scan-output.js +492 -253
  14. package/bin/runners/lib/ship-output.js +901 -641
  15. package/bin/runners/runCheckpoint.js +44 -3
  16. package/bin/runners/runContext.d.ts +4 -0
  17. package/bin/runners/runDoctor.js +10 -2
  18. package/bin/runners/runFix.js +51 -341
  19. package/bin/runners/runInit.js +11 -0
  20. package/bin/runners/runPolish.d.ts +4 -0
  21. package/bin/runners/runPolish.js +608 -29
  22. package/bin/runners/runProve.js +210 -25
  23. package/bin/runners/runReality.js +846 -101
  24. package/bin/runners/runScan.js +238 -4
  25. package/bin/runners/runShip.js +19 -3
  26. package/bin/runners/runWatch.js +14 -1
  27. package/bin/vibecheck.js +32 -2
  28. package/mcp-server/consolidated-tools.js +408 -42
  29. package/mcp-server/index.js +152 -15
  30. package/mcp-server/proof-tools.js +571 -0
  31. package/mcp-server/tier-auth.js +22 -19
  32. package/mcp-server/tools-v3.js +744 -0
  33. package/mcp-server/truth-firewall-tools.js +190 -4
  34. package/package.json +3 -1
  35. package/bin/runners/runInstall.js +0 -281
  36. package/bin/runners/runLabs.js +0 -341
@@ -1,69 +1,259 @@
1
1
  // bin/runners/lib/missions/plan.js
2
+ // ═══════════════════════════════════════════════════════════════════════════════
3
+ // MISSION PLANNING - Hardened with confidence scoring and better deduplication
4
+ // ═══════════════════════════════════════════════════════════════════════════════
5
+
6
+ /**
7
+ * Score a finding for priority ordering
8
+ * Enhanced with confidence-based scoring
9
+ */
2
10
  function scoreFinding(f) {
3
- if (f.severity === "BLOCK") return 100;
4
- if (f.severity === "WARN") return 50;
5
- return 0;
11
+ let score = 0;
12
+
13
+ // Base severity score
14
+ if (f.severity === "BLOCK") score += 100;
15
+ else if (f.severity === "WARN") score += 50;
16
+
17
+ // Confidence adjustment (findings with evidence are more reliable)
18
+ const confidence = f.confidence || (f.evidence?.length > 0 ? 0.8 : 0.5);
19
+ score = Math.round(score * confidence);
20
+
21
+ // Boost for findings with file evidence (more actionable)
22
+ if (f.file || f.evidence?.some(e => e.file)) {
23
+ score += 10;
24
+ }
25
+
26
+ // Boost for security-related categories
27
+ if (['Security', 'GhostAuth', 'AuthCoverage', 'Billing'].includes(f.category)) {
28
+ score += 20;
29
+ }
30
+
31
+ return score;
6
32
  }
7
33
 
8
- function missionFromFinding(f) {
9
- const typeByCategory = {
10
- Security: "REMOVE_OWNER_MODE",
11
- Billing: "FIX_STRIPE_WEBHOOKS",
12
- Entitlements: "ENFORCE_PAID_SURFACE",
13
- GhostAuth: "ADD_SERVER_AUTH",
14
- MissingRoute: "FIX_MISSING_ROUTE",
15
- EnvContract: "FIX_ENV_CONTRACT",
16
- FakeSuccess: "FIX_FAKE_SUCCESS",
17
- DeadUI: "FIX_DEAD_UI",
18
- AuthCoverage: "ADD_SERVER_AUTH"
19
- };
34
+ /**
35
+ * Generate a fingerprint for deduplication
36
+ * Uses multiple signals to identify truly unique findings
37
+ */
38
+ function generateFingerprint(f) {
39
+ const parts = [];
40
+
41
+ // Primary: category + normalized title
42
+ parts.push(f.category || 'Unknown');
43
+
44
+ // Normalize title (remove specific IDs, file paths, line numbers)
45
+ let normalizedTitle = (f.title || '')
46
+ .replace(/[a-f0-9]{8,}/gi, 'HASH') // Remove hash-like IDs
47
+ .replace(/:\d+/g, ':LINE') // Normalize line numbers
48
+ .replace(/\/[^/\s]+\.(ts|js|tsx|jsx)/gi, '/FILE.$1') // Normalize file paths
49
+ .trim();
50
+ parts.push(normalizedTitle);
51
+
52
+ // Secondary: file if available (for file-specific issues)
53
+ if (f.file) {
54
+ // Normalize the file path to base name for grouping
55
+ const fileName = f.file.split(/[/\\]/).pop() || f.file;
56
+ parts.push(fileName);
57
+ }
58
+
59
+ return parts.join('|');
60
+ }
61
+
62
+ /**
63
+ * Extended category to mission type mapping
64
+ * Includes new categories from enhanced detection
65
+ */
66
+ const CATEGORY_TO_MISSION_TYPE = {
67
+ // Security & Auth
68
+ Security: "REMOVE_OWNER_MODE",
69
+ GhostAuth: "ADD_SERVER_AUTH",
70
+ AuthCoverage: "ADD_SERVER_AUTH",
71
+ AuthDrift: "FIX_AUTH_DRIFT",
72
+
73
+ // Billing & Payments
74
+ Billing: "FIX_STRIPE_WEBHOOKS",
75
+ Entitlements: "ENFORCE_PAID_SURFACE",
76
+
77
+ // Routes & APIs
78
+ MissingRoute: "FIX_MISSING_ROUTE",
79
+ RouteDrift: "FIX_ROUTE_DRIFT",
80
+
81
+ // Environment & Config
82
+ EnvContract: "FIX_ENV_CONTRACT",
83
+
84
+ // Reality/Runtime issues
85
+ FakeSuccess: "FIX_FAKE_SUCCESS",
86
+ DeadUI: "FIX_DEAD_UI",
87
+ FakeDomain: "FIX_MOCK_DOMAINS",
88
+ FakeResponse: "FIX_PLACEHOLDER_DATA",
89
+ MockStatus: "FIX_MOCK_DOMAINS",
90
+
91
+ // Code Quality
92
+ EmptyCatch: "FIX_EMPTY_CATCH",
93
+ TestKeys: "FIX_TEST_KEYS",
94
+ HardcodedSecrets: "FIX_HARDCODED_SECRETS",
95
+ SilentFallback: "FIX_SILENT_FALLBACK",
96
+ };
20
97
 
98
+ /**
99
+ * Mission type priority (lower = higher priority)
100
+ * Security issues come first, then billing, then everything else
101
+ */
102
+ const MISSION_PRIORITY = {
103
+ // P0: Critical security (immediate fix required)
104
+ REMOVE_OWNER_MODE: 1,
105
+ FIX_HARDCODED_SECRETS: 2,
106
+ FIX_AUTH_DRIFT: 3,
107
+
108
+ // P1: Security & billing (fix before shipping)
109
+ FIX_STRIPE_WEBHOOKS: 10,
110
+ ENFORCE_PAID_SURFACE: 11,
111
+ ADD_SERVER_AUTH: 12,
112
+ FIX_TEST_KEYS: 13,
113
+
114
+ // P2: Fake data (fix before production)
115
+ FIX_MOCK_DOMAINS: 20,
116
+ FIX_PLACEHOLDER_DATA: 21,
117
+ FIX_FAKE_SUCCESS: 22,
118
+
119
+ // P3: Code quality (fix when possible)
120
+ FIX_MISSING_ROUTE: 30,
121
+ FIX_ROUTE_DRIFT: 31,
122
+ FIX_ENV_CONTRACT: 32,
123
+ FIX_EMPTY_CATCH: 33,
124
+ FIX_SILENT_FALLBACK: 34,
125
+
126
+ // P4: UI issues (fix before polish)
127
+ FIX_DEAD_UI: 40,
128
+
129
+ // P5: Generic (lowest priority)
130
+ GENERIC_FIX: 99,
131
+ };
132
+
133
+ /**
134
+ * Create a mission from a finding
135
+ * Enhanced with confidence and better metadata
136
+ */
137
+ function missionFromFinding(f, relatedFindings = []) {
138
+ const type = CATEGORY_TO_MISSION_TYPE[f.category] || "GENERIC_FIX";
139
+ const allFindingIds = [f.id, ...relatedFindings.map(r => r.id)];
140
+
141
+ // Calculate mission confidence based on findings
142
+ const confidences = [f.confidence || 0.5, ...relatedFindings.map(r => r.confidence || 0.5)];
143
+ const avgConfidence = confidences.reduce((a, b) => a + b, 0) / confidences.length;
144
+
21
145
  return {
22
146
  id: `M_${f.id}`,
23
- type: typeByCategory[f.category] || "GENERIC_FIX",
147
+ type,
24
148
  title: f.title,
25
149
  severity: f.severity,
26
150
  category: f.category,
151
+ confidence: avgConfidence,
27
152
  successCriteria: [
28
- `Finding ${f.id} no longer appears in ship results`
153
+ `Finding ${f.id} no longer appears in ship results`,
154
+ ...(relatedFindings.length > 0 ?
155
+ [`${relatedFindings.length} related finding(s) also resolved`] : []
156
+ )
29
157
  ],
30
- targetFindingIds: [f.id]
158
+ targetFindingIds: allFindingIds,
159
+ findingCount: allFindingIds.length,
160
+ // Include evidence for the LLM context
161
+ evidence: f.evidence || [],
162
+ file: f.file || null,
31
163
  };
32
164
  }
33
165
 
34
- function planMissions(findings, { maxMissions = 12, blocksOnlyFirst = true } = {}) {
35
- const sorted = [...findings].sort((a,b) => scoreFinding(b) - scoreFinding(a));
166
+ /**
167
+ * Group related findings that can be fixed together
168
+ * E.g., multiple Dead UI issues in the same file
169
+ */
170
+ function groupRelatedFindings(findings) {
171
+ const groups = new Map();
172
+
173
+ for (const f of findings) {
174
+ // Group key: category + file (if available)
175
+ const file = f.file || f.evidence?.[0]?.file || 'unknown';
176
+ const groupKey = `${f.category}:${file}`;
177
+
178
+ if (!groups.has(groupKey)) {
179
+ groups.set(groupKey, []);
180
+ }
181
+ groups.get(groupKey).push(f);
182
+ }
183
+
184
+ return groups;
185
+ }
186
+
187
+ /**
188
+ * Plan missions from findings with enhanced deduplication and prioritization
189
+ *
190
+ * @param {Array} findings - List of findings from ship/scan
191
+ * @param {Object} options - Planning options
192
+ * @returns {Array} Planned missions
193
+ */
194
+ function planMissions(findings, { maxMissions = 12, blocksOnlyFirst = true, groupRelated = true } = {}) {
195
+ // Step 1: Sort by score (severity + confidence + evidence)
196
+ const sorted = [...findings].sort((a, b) => scoreFinding(b) - scoreFinding(a));
36
197
 
37
- // Cost control: if there are BLOCKs, only plan for BLOCKs first
198
+ // Step 2: Filter to BLOCKs only if we have them (cost control)
38
199
  const hasBlocks = sorted.some(f => f.severity === "BLOCK");
39
200
  const scoped = (blocksOnlyFirst && hasBlocks)
40
201
  ? sorted.filter(f => f.severity === "BLOCK")
41
202
  : sorted;
42
203
 
43
- const seen = new Set();
44
- const filtered = [];
204
+ // Step 3: Deduplicate using fingerprints
205
+ const seenFingerprints = new Set();
206
+ const deduplicated = [];
207
+
45
208
  for (const f of scoped) {
46
- const k = `${f.category}:${f.title}`;
47
- if (f.severity === "WARN" && seen.has(k)) continue;
48
- seen.add(k);
49
- filtered.push(f);
209
+ const fingerprint = generateFingerprint(f);
210
+
211
+ // Skip exact duplicates
212
+ if (seenFingerprints.has(fingerprint)) continue;
213
+ seenFingerprints.add(fingerprint);
214
+
215
+ // Also check for near-duplicates (same category + similar title)
216
+ const nearDupeKey = `${f.category}:${(f.title || '').substring(0, 50)}`;
217
+ if (f.severity === "WARN" && seenFingerprints.has(nearDupeKey)) continue;
218
+ seenFingerprints.add(nearDupeKey);
219
+
220
+ deduplicated.push(f);
50
221
  }
51
222
 
52
- const missions = filtered.slice(0, maxMissions).map(missionFromFinding);
223
+ // Step 4: Group related findings (optional - reduces noise)
224
+ let missions = [];
225
+
226
+ if (groupRelated) {
227
+ const groups = groupRelatedFindings(deduplicated);
228
+
229
+ for (const [groupKey, groupFindings] of groups) {
230
+ // Take the highest severity finding as primary
231
+ const primary = groupFindings[0]; // Already sorted by score
232
+ const related = groupFindings.slice(1, 5); // Limit related findings
233
+
234
+ missions.push(missionFromFinding(primary, related));
235
+ }
236
+ } else {
237
+ missions = deduplicated.map(f => missionFromFinding(f));
238
+ }
53
239
 
54
- const priority = {
55
- REMOVE_OWNER_MODE: 1,
56
- FIX_STRIPE_WEBHOOKS: 2,
57
- ENFORCE_PAID_SURFACE: 3,
58
- ADD_SERVER_AUTH: 4,
59
- FIX_MISSING_ROUTE: 5,
60
- FIX_FAKE_SUCCESS: 6,
61
- FIX_ENV_CONTRACT: 7,
62
- GENERIC_FIX: 99
63
- };
240
+ // Step 5: Sort by priority and limit
241
+ missions.sort((a, b) => {
242
+ const prioA = MISSION_PRIORITY[a.type] || 50;
243
+ const prioB = MISSION_PRIORITY[b.type] || 50;
244
+ if (prioA !== prioB) return prioA - prioB;
245
+
246
+ // Secondary sort by confidence (higher first)
247
+ return (b.confidence || 0.5) - (a.confidence || 0.5);
248
+ });
64
249
 
65
- missions.sort((a,b) => (priority[a.type] || 50) - (priority[b.type] || 50));
66
- return missions;
250
+ return missions.slice(0, maxMissions);
67
251
  }
68
252
 
69
- module.exports = { planMissions };
253
+ module.exports = {
254
+ planMissions,
255
+ scoreFinding,
256
+ generateFingerprint,
257
+ CATEGORY_TO_MISSION_TYPE,
258
+ MISSION_PRIORITY
259
+ };
@@ -179,6 +179,131 @@ function templateForMissionType(type) {
179
179
  success: ["Auth drift findings disappear."]
180
180
  };
181
181
 
182
+ // ═══════════════════════════════════════════════════════════════════════════════
183
+ // ENHANCED MISSION TYPES - World-class detection and fixing
184
+ // ═══════════════════════════════════════════════════════════════════════════════
185
+
186
+ case "FIX_EMPTY_CATCH":
187
+ return {
188
+ intent: "Add proper error handling to empty catch blocks. Silent failures hide bugs and security issues.",
189
+ do: [
190
+ "Add error logging: console.error('Context:', err) or use structured logger.",
191
+ "Re-throw the error OR return a meaningful error response to caller.",
192
+ "If intentionally ignoring, add explicit comment explaining WHY (e.g., // Expected: optional feature).",
193
+ "Consider adding error tracking (Sentry, etc.) for production visibility."
194
+ ],
195
+ dont: [
196
+ "Do not just add a comment without actual handling.",
197
+ "Do not swallow errors in auth, payment, or data mutation paths.",
198
+ "Do not use console.log for errors (use console.error)."
199
+ ],
200
+ success: ["Empty catch findings disappear and errors become visible."]
201
+ };
202
+
203
+ case "FIX_TEST_KEYS":
204
+ return {
205
+ intent: "Replace test/demo API keys with environment variable references. Test keys in production = security breach.",
206
+ do: [
207
+ "Replace sk_test_*, pk_test_*, api_key_test with process.env.STRIPE_SECRET_KEY etc.",
208
+ "Add the env var to .env.example with a placeholder comment.",
209
+ "Ensure the code fails fast if env var is missing (no silent fallback to test key).",
210
+ "Add runtime validation: if (!process.env.STRIPE_SECRET_KEY) throw new Error('Missing STRIPE_SECRET_KEY')."
211
+ ],
212
+ dont: [
213
+ "Do not leave test keys as fallback defaults.",
214
+ "Do not commit .env files with real keys.",
215
+ "Do not use generic names like API_KEY - be specific (STRIPE_SECRET_KEY, SENDGRID_API_KEY)."
216
+ ],
217
+ success: ["Test key findings disappear and production uses real credentials."]
218
+ };
219
+
220
+ case "FIX_MOCK_DOMAINS":
221
+ return {
222
+ intent: "Replace hardcoded mock/localhost URLs with configurable endpoints. Mock domains in production = broken features.",
223
+ do: [
224
+ "Replace localhost:*, jsonplaceholder.typicode.com, mockapi.io with process.env.API_BASE_URL.",
225
+ "Add the env var to .env.example: API_BASE_URL=https://api.yourproduct.com",
226
+ "Add URL validation at startup to catch misconfiguration early.",
227
+ "For development, use .env.local with localhost values."
228
+ ],
229
+ dont: [
230
+ "Do not use localhost as a fallback default.",
231
+ "Do not hardcode staging URLs - use env vars for all environments.",
232
+ "Do not mix mock and real endpoints in the same codebase without clear separation."
233
+ ],
234
+ success: ["Mock domain findings disappear and API calls hit real backends."]
235
+ };
236
+
237
+ case "FIX_PLACEHOLDER_DATA":
238
+ return {
239
+ intent: "Replace lorem ipsum and placeholder data with real data fetching or meaningful defaults.",
240
+ do: [
241
+ "Replace 'Lorem ipsum', 'John Doe', 'user@example.com' with actual data bindings.",
242
+ "If data comes from API: ensure proper loading states and error handling.",
243
+ "If truly static: use real, contextually appropriate content.",
244
+ "For avatars/images: use real assets or proper placeholder services with fallbacks."
245
+ ],
246
+ dont: [
247
+ "Do not show placeholder data to real users.",
248
+ "Do not use obviously fake data (123-456-7890, test@test.com) in production UI.",
249
+ "Do not remove placeholder without adding real data source."
250
+ ],
251
+ success: ["Placeholder data findings disappear and UI shows real content."]
252
+ };
253
+
254
+ case "FIX_HARDCODED_SECRETS":
255
+ return {
256
+ intent: "Move hardcoded secrets to environment variables. Secrets in code = compromised on first commit.",
257
+ do: [
258
+ "Extract secret to environment variable with descriptive name.",
259
+ "Add to .env.example with CHANGEME or empty placeholder.",
260
+ "Add .env to .gitignore if not already present.",
261
+ "Add startup validation to fail fast on missing secrets.",
262
+ "Consider using a secrets manager (Vault, AWS Secrets Manager) for production."
263
+ ],
264
+ dont: [
265
+ "Do not leave secrets in code comments.",
266
+ "Do not use generic names (SECRET, PASSWORD) - be specific.",
267
+ "Do not commit the actual secret value anywhere.",
268
+ "Do not use base64 encoding as 'encryption' - it's not."
269
+ ],
270
+ success: ["Hardcoded secret findings disappear and secrets are externalized."]
271
+ };
272
+
273
+ case "FIX_SIMULATED_BILLING":
274
+ return {
275
+ intent: "Replace simulated billing responses with real payment processor integration.",
276
+ do: [
277
+ "Connect to real Stripe/payment processor in production mode.",
278
+ "Ensure webhook handlers verify signatures and process real events.",
279
+ "Add proper error handling for payment failures.",
280
+ "Implement idempotency to prevent double charges."
281
+ ],
282
+ dont: [
283
+ "Do not show 'Payment successful' without real charge.",
284
+ "Do not skip signature verification in production.",
285
+ "Do not trust client-side payment confirmations."
286
+ ],
287
+ success: ["Simulated billing findings disappear and payments are real."]
288
+ };
289
+
290
+ case "FIX_SILENT_FALLBACK":
291
+ return {
292
+ intent: "Make failures visible instead of silently returning success. Silent fallbacks hide broken features.",
293
+ do: [
294
+ "Remove catch blocks that return { success: true } or empty data.",
295
+ "Surface errors to the UI with appropriate messaging.",
296
+ "Log errors with context for debugging.",
297
+ "Consider graceful degradation that's VISIBLE (e.g., 'Feature temporarily unavailable')."
298
+ ],
299
+ dont: [
300
+ "Do not return success: true when operation failed.",
301
+ "Do not show success toast/UI when API returned error.",
302
+ "Do not hide errors from users entirely - they need to know something went wrong."
303
+ ],
304
+ success: ["Silent fallback findings disappear and failures become visible."]
305
+ };
306
+
182
307
  default:
183
308
  return {
184
309
  intent: "Fix the specific finding with smallest correct patch.",