@boshu2/vibe-check 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/.agents/bundles/ml-learning-loop-complete-plan-2025-11-28.md +908 -0
  2. package/.agents/bundles/unified-vibe-system-plan-phase1-2025-11-28.md +962 -0
  3. package/.agents/bundles/unified-vibe-system-research-2025-11-28.md +1003 -0
  4. package/.agents/bundles/vibe-check-ecosystem-plan-2025-11-29.md +635 -0
  5. package/.agents/bundles/vibe-check-gamification-complete-2025-11-29.md +132 -0
  6. package/.agents/bundles/vibe-score-scientific-framework-2025-11-28.md +602 -0
  7. package/.vibe-check/calibration.json +38 -0
  8. package/.vibe-check/latest.json +114 -0
  9. package/CHANGELOG.md +47 -0
  10. package/CLAUDE.md +178 -0
  11. package/README.md +222 -7
  12. package/action.yml +270 -0
  13. package/dashboard/app.js +494 -0
  14. package/dashboard/index.html +235 -0
  15. package/dashboard/styles.css +647 -0
  16. package/dist/calibration/ece.d.ts +26 -0
  17. package/dist/calibration/ece.d.ts.map +1 -0
  18. package/dist/calibration/ece.js +93 -0
  19. package/dist/calibration/ece.js.map +1 -0
  20. package/dist/calibration/index.d.ts +3 -0
  21. package/dist/calibration/index.d.ts.map +1 -0
  22. package/dist/calibration/index.js +15 -0
  23. package/dist/calibration/index.js.map +1 -0
  24. package/dist/calibration/storage.d.ts +34 -0
  25. package/dist/calibration/storage.d.ts.map +1 -0
  26. package/dist/calibration/storage.js +188 -0
  27. package/dist/calibration/storage.js.map +1 -0
  28. package/dist/cli.js +31 -76
  29. package/dist/cli.js.map +1 -1
  30. package/dist/commands/analyze.d.ts +16 -0
  31. package/dist/commands/analyze.d.ts.map +1 -0
  32. package/dist/commands/analyze.js +256 -0
  33. package/dist/commands/analyze.js.map +1 -0
  34. package/dist/commands/index.d.ts +5 -0
  35. package/dist/commands/index.d.ts.map +1 -0
  36. package/dist/commands/index.js +13 -0
  37. package/dist/commands/index.js.map +1 -0
  38. package/dist/commands/init-hook.d.ts +3 -0
  39. package/dist/commands/init-hook.d.ts.map +1 -0
  40. package/dist/commands/init-hook.js +161 -0
  41. package/dist/commands/init-hook.js.map +1 -0
  42. package/dist/commands/level.d.ts +3 -0
  43. package/dist/commands/level.d.ts.map +1 -0
  44. package/dist/commands/level.js +277 -0
  45. package/dist/commands/level.js.map +1 -0
  46. package/dist/commands/profile.d.ts +4 -0
  47. package/dist/commands/profile.d.ts.map +1 -0
  48. package/dist/commands/profile.js +143 -0
  49. package/dist/commands/profile.js.map +1 -0
  50. package/dist/gamification/achievements.d.ts +15 -0
  51. package/dist/gamification/achievements.d.ts.map +1 -0
  52. package/dist/gamification/achievements.js +273 -0
  53. package/dist/gamification/achievements.js.map +1 -0
  54. package/dist/gamification/index.d.ts +8 -0
  55. package/dist/gamification/index.d.ts.map +1 -0
  56. package/dist/gamification/index.js +30 -0
  57. package/dist/gamification/index.js.map +1 -0
  58. package/dist/gamification/profile.d.ts +46 -0
  59. package/dist/gamification/profile.d.ts.map +1 -0
  60. package/dist/gamification/profile.js +272 -0
  61. package/dist/gamification/profile.js.map +1 -0
  62. package/dist/gamification/streaks.d.ts +26 -0
  63. package/dist/gamification/streaks.d.ts.map +1 -0
  64. package/dist/gamification/streaks.js +132 -0
  65. package/dist/gamification/streaks.js.map +1 -0
  66. package/dist/gamification/types.d.ts +111 -0
  67. package/dist/gamification/types.d.ts.map +1 -0
  68. package/dist/gamification/types.js +26 -0
  69. package/dist/gamification/types.js.map +1 -0
  70. package/dist/gamification/xp.d.ts +37 -0
  71. package/dist/gamification/xp.d.ts.map +1 -0
  72. package/dist/gamification/xp.js +115 -0
  73. package/dist/gamification/xp.js.map +1 -0
  74. package/dist/git.d.ts +11 -0
  75. package/dist/git.d.ts.map +1 -1
  76. package/dist/git.js +52 -0
  77. package/dist/git.js.map +1 -1
  78. package/dist/metrics/code-stability.d.ts +13 -0
  79. package/dist/metrics/code-stability.d.ts.map +1 -0
  80. package/dist/metrics/code-stability.js +74 -0
  81. package/dist/metrics/code-stability.js.map +1 -0
  82. package/dist/metrics/file-churn.d.ts +8 -0
  83. package/dist/metrics/file-churn.d.ts.map +1 -0
  84. package/dist/metrics/file-churn.js +75 -0
  85. package/dist/metrics/file-churn.js.map +1 -0
  86. package/dist/metrics/time-spiral.d.ts +8 -0
  87. package/dist/metrics/time-spiral.d.ts.map +1 -0
  88. package/dist/metrics/time-spiral.js +69 -0
  89. package/dist/metrics/time-spiral.js.map +1 -0
  90. package/dist/metrics/velocity-anomaly.d.ts +13 -0
  91. package/dist/metrics/velocity-anomaly.d.ts.map +1 -0
  92. package/dist/metrics/velocity-anomaly.js +67 -0
  93. package/dist/metrics/velocity-anomaly.js.map +1 -0
  94. package/dist/output/index.d.ts +6 -3
  95. package/dist/output/index.d.ts.map +1 -1
  96. package/dist/output/index.js +4 -3
  97. package/dist/output/index.js.map +1 -1
  98. package/dist/output/json.d.ts +2 -2
  99. package/dist/output/json.d.ts.map +1 -1
  100. package/dist/output/json.js +54 -0
  101. package/dist/output/json.js.map +1 -1
  102. package/dist/output/markdown.d.ts +2 -2
  103. package/dist/output/markdown.d.ts.map +1 -1
  104. package/dist/output/markdown.js +34 -1
  105. package/dist/output/markdown.js.map +1 -1
  106. package/dist/output/terminal.d.ts +6 -2
  107. package/dist/output/terminal.d.ts.map +1 -1
  108. package/dist/output/terminal.js +131 -3
  109. package/dist/output/terminal.js.map +1 -1
  110. package/dist/recommend/index.d.ts +3 -0
  111. package/dist/recommend/index.d.ts.map +1 -0
  112. package/dist/recommend/index.js +14 -0
  113. package/dist/recommend/index.js.map +1 -0
  114. package/dist/recommend/ordered-logistic.d.ts +49 -0
  115. package/dist/recommend/ordered-logistic.d.ts.map +1 -0
  116. package/dist/recommend/ordered-logistic.js +153 -0
  117. package/dist/recommend/ordered-logistic.js.map +1 -0
  118. package/dist/recommend/questions.d.ts +19 -0
  119. package/dist/recommend/questions.d.ts.map +1 -0
  120. package/dist/recommend/questions.js +73 -0
  121. package/dist/recommend/questions.js.map +1 -0
  122. package/dist/score/index.d.ts +21 -0
  123. package/dist/score/index.d.ts.map +1 -0
  124. package/dist/score/index.js +48 -0
  125. package/dist/score/index.js.map +1 -0
  126. package/dist/score/weights.d.ts +16 -0
  127. package/dist/score/weights.d.ts.map +1 -0
  128. package/dist/score/weights.js +28 -0
  129. package/dist/score/weights.js.map +1 -0
  130. package/dist/types.d.ts +83 -0
  131. package/dist/types.d.ts.map +1 -1
  132. package/hooks/pre-push +103 -0
  133. package/package.json +10 -9
@@ -0,0 +1,908 @@
1
+ # ML Learning Loop: Complete Implementation Plan
2
+
3
+ **Type:** Plan
4
+ **Created:** 2025-11-28
5
+ **Depends On:** Gap analysis from current session
6
+ **Loop:** Middle (bridges research to implementation)
7
+ **Tags:** vibe-check, ml-learning, calibration, ordered-logistic, partial-fit
8
+
9
+ ---
10
+
11
+ ## Overview
12
+
13
+ Implement the **complete ML learning loop** for vibe-check. This plan addresses ALL 6 identified gaps to make the model actually learn from experience.
14
+
15
+ **What changes:**
16
+ 1. Add `partialFit()` to ordered-logistic.ts for incremental learning
17
+ 2. Add `retrain()` trigger to storage.ts based on ECE threshold or sample count
18
+ 3. Modify `level` command to use ML model + actual metrics (not additive formula)
19
+ 4. Wire `--calibrate` to trigger learning after adding sample
20
+ 5. Add outcome inference from vibe score to "true" level
21
+ 6. Update model weights in calibration.json after learning
22
+
23
+ **What doesn't change:**
24
+ - Existing 4 semantic-free metrics
25
+ - CLI interface
26
+ - ECE calculation formula
27
+ - Storage file format (adds fields, backward compatible)
28
+
29
+ ---
30
+
31
+ ## The 6 Gaps Addressed
32
+
33
+ | Gap | Solution | Files Modified |
34
+ |-----|----------|----------------|
35
+ | Model Learning | Add `partialFit()` | `ordered-logistic.ts` |
36
+ | Feedback Loop | Add `retrain()` trigger | `storage.ts` |
37
+ | Level Uses ML | Replace `calculateBaseLevel` with `predict` | `level.ts` |
38
+ | Metrics Integration | Fetch recent git metrics in `level` | `level.ts` |
39
+ | Outcome-Based Updates | Infer "true" level from score | `ece.ts`, `storage.ts` |
40
+ | Calibration Triggers Learning | Call `retrain()` after `addSample` | `storage.ts` |
41
+
42
+ ---
43
+
44
+ ## PDC Strategy
45
+
46
+ ### Prevent
47
+ - [x] Read all existing code (completed above)
48
+ - [ ] Run `npm test` before starting
49
+ - [ ] Commit after each file modification
50
+
51
+ ### Detect
52
+ - [ ] `npm run build` after each TypeScript change
53
+ - [ ] `npm test` after completing each gap
54
+ - [ ] Manual test: `vibe-check level --quick` should use ML
55
+
56
+ ### Correct
57
+ - [ ] Git revert individual commits if issues found
58
+ - [ ] Each function is independent - can revert selectively
59
+
60
+ ---
61
+
62
+ ## Files to Modify
63
+
64
+ ### 1. `src/recommend/ordered-logistic.ts` (ADD `partialFit`)
65
+
66
+ **Purpose:** Enable incremental learning from calibration samples
67
+
68
+ **Current:** Lines 1-113 (prediction only, no learning)
69
+
70
+ **Add after line 112 (before closing):**
71
+
72
+ ```typescript
73
+ /**
74
+ * Single-step stochastic gradient descent update.
75
+ * Updates weights based on one sample's prediction error.
76
+ *
77
+ * For ordered logistic regression:
78
+ * - We minimize negative log-likelihood
79
+ * - Gradient for weight[j] = (p_k - y_k) * x_j summed over cutpoints
80
+ *
81
+ * Learning rate decays: lr = initialLr / (1 + decay * n)
82
+ */
83
+ export function partialFit(
84
+ model: ModelState,
85
+ features: number[],
86
+ trueLevel: number,
87
+ learningRate: number = 0.01,
88
+ sampleCount: number = 1
89
+ ): ModelState {
90
+ const effectiveLr = learningRate / (1 + 0.001 * sampleCount);
91
+
92
+ // Get current predictions
93
+ const probs = predictProba(features, model);
94
+
95
+ // Create one-hot target
96
+ const target = new Array(N_LEVELS).fill(0);
97
+ target[Math.min(Math.max(0, Math.round(trueLevel)), N_LEVELS - 1)] = 1;
98
+
99
+ // Gradient for weights: dL/dw_j = sum_k (p_k - y_k) * x_j
100
+ const newWeights = [...model.weights];
101
+ for (let j = 0; j < features.length && j < newWeights.length; j++) {
102
+ let gradient = 0;
103
+ for (let k = 0; k < N_LEVELS; k++) {
104
+ gradient += (probs[k] - target[k]) * features[j];
105
+ }
106
+ newWeights[j] -= effectiveLr * gradient;
107
+ }
108
+
109
+ // Gradient for thresholds: dL/dt_k = p_k - cumTarget_k
110
+ const newThresholds = [...model.thresholds];
111
+ let cumTarget = 0;
112
+ for (let k = 0; k < model.thresholds.length; k++) {
113
+ cumTarget += target[k];
114
+ const cumProb = probs.slice(0, k + 1).reduce((a, b) => a + b, 0);
115
+ const gradient = cumProb - cumTarget;
116
+ newThresholds[k] -= effectiveLr * gradient;
117
+ }
118
+
119
+ // Ensure thresholds remain ordered
120
+ for (let i = 1; i < newThresholds.length; i++) {
121
+ if (newThresholds[i] <= newThresholds[i - 1]) {
122
+ newThresholds[i] = newThresholds[i - 1] + 0.1;
123
+ }
124
+ }
125
+
126
+ return {
127
+ weights: newWeights,
128
+ thresholds: newThresholds,
129
+ };
130
+ }
131
+
132
+ /**
133
+ * Batch partial fit - applies partialFit to multiple samples.
134
+ * Processes samples in order, accumulating updates.
135
+ */
136
+ export function batchPartialFit(
137
+ model: ModelState,
138
+ samples: Array<{ features: number[]; trueLevel: number }>,
139
+ learningRate: number = 0.01
140
+ ): ModelState {
141
+ let current = model;
142
+ for (let i = 0; i < samples.length; i++) {
143
+ current = partialFit(
144
+ current,
145
+ samples[i].features,
146
+ samples[i].trueLevel,
147
+ learningRate,
148
+ i + 1
149
+ );
150
+ }
151
+ return current;
152
+ }
153
+ ```
154
+
155
+ **Validation:** `npm run build`
156
+
157
+ ---
158
+
159
+ ### 2. `src/calibration/ece.ts` (ADD `inferTrueLevel`)
160
+
161
+ **Purpose:** Infer the "true" level based on actual vibe score outcome
162
+
163
+ **Add after line 72:**
164
+
165
+ ```typescript
166
+ /**
167
+ * Infer the "true" vibe level from an actual vibe score.
168
+ * This is used to generate training labels for the model.
169
+ *
170
+ * Maps score ranges to levels:
171
+ * - 0.90-1.00 → 5 (Elite flow)
172
+ * - 0.80-0.90 → 4 (High flow)
173
+ * - 0.65-0.80 → 3 (Balanced)
174
+ * - 0.50-0.65 → 2 (AI-Augmented)
175
+ * - 0.30-0.50 → 1 (Human-Led)
176
+ * - 0.00-0.30 → 0 (Manual)
177
+ */
178
+ export function inferTrueLevel(vibeScore: number): 0 | 1 | 2 | 3 | 4 | 5 {
179
+ if (vibeScore >= 0.90) return 5;
180
+ if (vibeScore >= 0.80) return 4;
181
+ if (vibeScore >= 0.65) return 3;
182
+ if (vibeScore >= 0.50) return 2;
183
+ if (vibeScore >= 0.30) return 1;
184
+ return 0;
185
+ }
186
+ ```
187
+
188
+ **Update exports in `src/calibration/index.ts`:**
189
+
190
+ ```typescript
191
+ export { loadCalibration, saveCalibration, addSample, getCalibrationPath } from './storage';
192
+ export { calculateECE, assessOutcome, inferTrueLevel } from './ece';
193
+ ```
194
+
195
+ **Validation:** `npm run build`
196
+
197
+ ---
198
+
199
+ ### 3. `src/calibration/storage.ts` (ADD learning loop)
200
+
201
+ **Purpose:** Trigger retraining when samples accumulate or ECE degrades
202
+
203
+ **Replace entire file (lines 1-71):**
204
+
205
+ ```typescript
206
+ import * as fs from 'fs';
207
+ import * as path from 'path';
208
+ import { CalibrationState, CalibrationSample } from '../types';
209
+ import { DEFAULT_MODEL, partialFit, batchPartialFit, ModelState } from '../recommend/ordered-logistic';
210
+ import { calculateECE, inferTrueLevel } from './ece';
211
+
212
+ const CALIBRATION_DIR = '.vibe-check';
213
+ const CALIBRATION_FILE = 'calibration.json';
214
+
215
+ // Retraining triggers
216
+ const RETRAIN_SAMPLE_INTERVAL = 10; // Retrain every N samples
217
+ const RETRAIN_ECE_THRESHOLD = 0.15; // Retrain if ECE exceeds this
218
+
219
+ /**
220
+ * Get calibration file path for a repository.
221
+ */
222
+ export function getCalibrationPath(repoPath: string): string {
223
+ return path.join(repoPath, CALIBRATION_DIR, CALIBRATION_FILE);
224
+ }
225
+
226
+ /**
227
+ * Load calibration state from disk.
228
+ */
229
+ export function loadCalibration(repoPath: string): CalibrationState {
230
+ const filePath = getCalibrationPath(repoPath);
231
+
232
+ if (fs.existsSync(filePath)) {
233
+ try {
234
+ const data = fs.readFileSync(filePath, 'utf-8');
235
+ const state = JSON.parse(data);
236
+ // Ensure dates are Date objects
237
+ state.lastUpdated = new Date(state.lastUpdated);
238
+ state.samples = state.samples.map((s: CalibrationSample) => ({
239
+ ...s,
240
+ timestamp: new Date(s.timestamp),
241
+ }));
242
+ return state;
243
+ } catch {
244
+ return defaultCalibrationState();
245
+ }
246
+ }
247
+
248
+ return defaultCalibrationState();
249
+ }
250
+
251
+ /**
252
+ * Save calibration state to disk.
253
+ */
254
+ export function saveCalibration(repoPath: string, state: CalibrationState): void {
255
+ const dirPath = path.join(repoPath, CALIBRATION_DIR);
256
+ const filePath = getCalibrationPath(repoPath);
257
+
258
+ if (!fs.existsSync(dirPath)) {
259
+ fs.mkdirSync(dirPath, { recursive: true });
260
+ }
261
+
262
+ fs.writeFileSync(filePath, JSON.stringify(state, null, 2));
263
+ }
264
+
265
+ /**
266
+ * Add a calibration sample and potentially trigger retraining.
267
+ *
268
+ * Retraining is triggered when:
269
+ * 1. Sample count is a multiple of RETRAIN_SAMPLE_INTERVAL, OR
270
+ * 2. ECE exceeds RETRAIN_ECE_THRESHOLD
271
+ *
272
+ * Returns updated state with potentially new weights.
273
+ */
274
+ export function addSample(
275
+ repoPath: string,
276
+ sample: CalibrationSample
277
+ ): CalibrationState {
278
+ const state = loadCalibration(repoPath);
279
+ state.samples.push(sample);
280
+ state.lastUpdated = new Date();
281
+
282
+ // Check if retraining is needed
283
+ const shouldRetrain =
284
+ state.samples.length % RETRAIN_SAMPLE_INTERVAL === 0 ||
285
+ state.ece > RETRAIN_ECE_THRESHOLD;
286
+
287
+ if (shouldRetrain && state.samples.length >= 5) {
288
+ const updatedState = retrain(state);
289
+ saveCalibration(repoPath, updatedState);
290
+ return updatedState;
291
+ }
292
+
293
+ // Just save without retraining
294
+ saveCalibration(repoPath, state);
295
+ return state;
296
+ }
297
+
298
+ /**
299
+ * Retrain the model using all accumulated samples.
300
+ *
301
+ * Uses batch partial fit with inferred true levels from vibe scores.
302
+ */
303
+ export function retrain(state: CalibrationState): CalibrationState {
304
+ if (state.samples.length < 5) {
305
+ return state; // Not enough data
306
+ }
307
+
308
+ // Prepare training data: use vibeScore to infer "true" level
309
+ const trainingData = state.samples.map((sample) => ({
310
+ features: sample.features,
311
+ trueLevel: inferTrueLevel(sample.vibeScore),
312
+ }));
313
+
314
+ // Start from default model (or could start from current weights)
315
+ const initialModel: ModelState = {
316
+ weights: [...DEFAULT_MODEL.weights],
317
+ thresholds: [...DEFAULT_MODEL.thresholds],
318
+ };
319
+
320
+ // Train with multiple epochs for better convergence
321
+ let model = initialModel;
322
+ const epochs = Math.min(10, Math.ceil(50 / state.samples.length));
323
+ for (let epoch = 0; epoch < epochs; epoch++) {
324
+ model = batchPartialFit(model, trainingData, 0.05);
325
+ }
326
+
327
+ // Calculate new ECE
328
+ const newEce = calculateECE(state.samples);
329
+
330
+ return {
331
+ ...state,
332
+ weights: model.weights,
333
+ thresholds: model.thresholds,
334
+ ece: newEce,
335
+ lastUpdated: new Date(),
336
+ version: '2.1.0', // Bump version to indicate ML-learned weights
337
+ };
338
+ }
339
+
340
+ /**
341
+ * Force retraining (manual trigger).
342
+ */
343
+ export function forceRetrain(repoPath: string): CalibrationState {
344
+ const state = loadCalibration(repoPath);
345
+ if (state.samples.length < 5) {
346
+ return state;
347
+ }
348
+ const updatedState = retrain(state);
349
+ saveCalibration(repoPath, updatedState);
350
+ return updatedState;
351
+ }
352
+
353
+ function defaultCalibrationState(): CalibrationState {
354
+ return {
355
+ samples: [],
356
+ weights: DEFAULT_MODEL.weights,
357
+ thresholds: DEFAULT_MODEL.thresholds,
358
+ ece: 0,
359
+ lastUpdated: new Date(),
360
+ version: '2.0.0',
361
+ };
362
+ }
363
+ ```
364
+
365
+ **Update exports in `src/calibration/index.ts`:**
366
+
367
+ ```typescript
368
+ export { loadCalibration, saveCalibration, addSample, getCalibrationPath, retrain, forceRetrain } from './storage';
369
+ export { calculateECE, assessOutcome, inferTrueLevel } from './ece';
370
+ ```
371
+
372
+ **Validation:** `npm run build`
373
+
374
+ ---
375
+
376
+ ### 4. `src/recommend/index.ts` (ADD export for partialFit)
377
+
378
+ **Purpose:** Export new learning functions
379
+
380
+ **Replace lines 1-2:**
381
+
382
+ ```typescript
383
+ export { predictProba, predict, predictWithConfidence, DEFAULT_MODEL, ModelState, partialFit, batchPartialFit } from './ordered-logistic';
384
+ export { VIBE_QUESTIONS, calculateBaseLevel, Question } from './questions';
385
+ ```
386
+
387
+ **Validation:** `npm run build`
388
+
389
+ ---
390
+
391
+ ### 5. `src/commands/level.ts` (USE ML model + real metrics)
392
+
393
+ **Purpose:** Replace additive formula with ML prediction using learned weights + actual metrics
394
+
395
+ **Replace entire file (lines 1-178):**
396
+
397
+ ```typescript
398
+ import { Command } from 'commander';
399
+ import chalk from 'chalk';
400
+ import { QuestionResponses } from '../types';
401
+ import { VIBE_QUESTIONS, calculateBaseLevel } from '../recommend/questions';
402
+ import { predictWithConfidence, ModelState } from '../recommend';
403
+ import { loadCalibration } from '../calibration';
404
+ import { getCommits, isGitRepo, getFileStats } from '../git';
405
+ import { calculateFileChurn } from '../metrics/file-churn';
406
+ import { calculateTimeSpiral } from '../metrics/time-spiral';
407
+ import { calculateVelocityAnomaly } from '../metrics/velocity-anomaly';
408
+ import { calculateCodeStability } from '../metrics/code-stability';
409
+
410
+ interface LevelResult {
411
+ level: number;
412
+ confidence: number;
413
+ responses: QuestionResponses;
414
+ reasoning: string[];
415
+ source: 'ml' | 'fallback';
416
+ ece?: number;
417
+ sampleCount?: number;
418
+ }
419
+
420
+ export function createLevelCommand(): Command {
421
+ const cmd = new Command('level')
422
+ .description('Classify vibe level for upcoming work (interactive)')
423
+ .option('--quick', 'Non-interactive mode with neutral defaults', false)
424
+ .option('--json', 'Output as JSON', false)
425
+ .option('-r, --repo <path>', 'Repository path for metrics', process.cwd())
426
+ .option('--since <date>', 'Git history start for metrics (default: 30 days ago)', '30 days ago')
427
+ .option(
428
+ '--answers <responses>',
429
+ 'Pre-filled answers as JSON (e.g., \'{"reversibility":1,"blastRadius":0}\')'
430
+ )
431
+ .action(async (options) => {
432
+ await runLevel(options);
433
+ });
434
+
435
+ return cmd;
436
+ }
437
+
438
+ async function runLevel(options: {
439
+ quick: boolean;
440
+ json: boolean;
441
+ repo: string;
442
+ since: string;
443
+ answers?: string;
444
+ }): Promise<void> {
445
+ let responses: QuestionResponses;
446
+
447
+ if (options.quick) {
448
+ // Non-interactive: use defaults or provided answers
449
+ responses = {
450
+ reversibility: 0,
451
+ blastRadius: 0,
452
+ verificationCost: 0,
453
+ domainComplexity: 0,
454
+ aiTrackRecord: 0,
455
+ };
456
+
457
+ if (options.answers) {
458
+ try {
459
+ const provided = JSON.parse(options.answers);
460
+ responses = { ...responses, ...provided };
461
+ } catch {
462
+ console.error(chalk.red('Invalid --answers JSON'));
463
+ process.exit(1);
464
+ }
465
+ }
466
+ } else {
467
+ // Interactive mode
468
+ if (!process.stdin.isTTY) {
469
+ console.error(chalk.yellow('Non-interactive terminal detected. Use --quick for non-interactive mode.'));
470
+ process.exit(1);
471
+ }
472
+ responses = await askQuestions();
473
+ }
474
+
475
+ const result = await classifyLevel(responses, options.repo, options.since);
476
+
477
+ if (options.json) {
478
+ console.log(JSON.stringify(result, null, 2));
479
+ } else {
480
+ displayResult(result);
481
+ }
482
+ }
483
+
484
+ async function askQuestions(): Promise<QuestionResponses> {
485
+ // Dynamic import for enquirer
486
+ const Enquirer = (await import('enquirer')).default;
487
+
488
+ const responses: Partial<QuestionResponses> = {};
489
+
490
+ console.log('');
491
+ console.log(chalk.bold.cyan('═'.repeat(60)));
492
+ console.log(chalk.bold.cyan(' VIBE LEVEL CLASSIFICATION'));
493
+ console.log(chalk.bold.cyan('═'.repeat(60)));
494
+ console.log('');
495
+ console.log(chalk.gray('Answer 5 questions to determine the appropriate vibe level.'));
496
+ console.log(chalk.gray('Use ↑/↓ arrows to select, Enter to confirm.'));
497
+ console.log('');
498
+
499
+ for (const question of VIBE_QUESTIONS) {
500
+ const answer = await Enquirer.prompt<{ answer: string }>({
501
+ type: 'select',
502
+ name: 'answer',
503
+ message: question.text,
504
+ choices: question.options.map((opt) => ({
505
+ name: opt.label,
506
+ message: `${opt.label} ${chalk.gray('- ' + opt.description)}`,
507
+ value: String(opt.value),
508
+ })),
509
+ });
510
+
511
+ const selected = question.options.find((o) => o.label === answer.answer);
512
+ responses[question.id] = (selected?.value ?? 0) as -2 | -1 | 0 | 1;
513
+ console.log('');
514
+ }
515
+
516
+ return responses as QuestionResponses;
517
+ }
518
+
519
+ async function classifyLevel(
520
+ responses: QuestionResponses,
521
+ repoPath: string,
522
+ since: string
523
+ ): Promise<LevelResult> {
524
+ // Try to get real metrics from git history
525
+ let metricsFeatures = [0.7, 0.7, 0.7, 0.7]; // Defaults if no git history
526
+ let source: 'ml' | 'fallback' = 'fallback';
527
+
528
+ try {
529
+ if (await isGitRepo(repoPath)) {
530
+ const commits = await getCommits(repoPath, since);
531
+
532
+ if (commits.length >= 3) {
533
+ const fileStats = await getFileStats(repoPath, since);
534
+
535
+ const fileChurn = calculateFileChurn(commits, fileStats.filesPerCommit);
536
+ const timeSpiral = calculateTimeSpiral(commits);
537
+ const velocityAnomaly = calculateVelocityAnomaly(commits);
538
+ const codeStability = calculateCodeStability(commits, fileStats.lineStats);
539
+
540
+ metricsFeatures = [
541
+ fileChurn.value / 100,
542
+ timeSpiral.value / 100,
543
+ velocityAnomaly.value / 100,
544
+ codeStability.value / 100,
545
+ ];
546
+ source = 'ml';
547
+ }
548
+ }
549
+ } catch {
550
+ // Fall back to defaults if git fails
551
+ }
552
+
553
+ // Load calibration state (contains learned weights)
554
+ const calibration = loadCalibration(repoPath);
555
+
556
+ // Build full feature vector: 5 questions + 4 metrics
557
+ const features = [
558
+ responses.reversibility,
559
+ responses.blastRadius,
560
+ responses.verificationCost,
561
+ responses.domainComplexity,
562
+ responses.aiTrackRecord,
563
+ ...metricsFeatures,
564
+ ];
565
+
566
+ // Use ML model with learned weights
567
+ const model: ModelState = {
568
+ weights: calibration.weights,
569
+ thresholds: calibration.thresholds,
570
+ };
571
+
572
+ const prediction = predictWithConfidence(features, model);
573
+
574
+ // Use ML prediction (NOT additive formula)
575
+ const level = prediction.level;
576
+ const confidence = prediction.confidence;
577
+
578
+ // Build reasoning
579
+ const reasoning: string[] = [];
580
+
581
+ if (source === 'ml') {
582
+ reasoning.push(`Based on ${since} git history + your answers`);
583
+ if (metricsFeatures[0] < 0.7) reasoning.push('File churn detected - code needed rework');
584
+ if (metricsFeatures[1] < 0.7) reasoning.push('Time spirals detected - rapid fix commits');
585
+ } else {
586
+ reasoning.push('No git history available - using question answers only');
587
+ }
588
+
589
+ if (responses.reversibility <= -1) reasoning.push('Low reversibility requires careful review');
590
+ if (responses.blastRadius <= -1) reasoning.push('Wide blast radius increases risk');
591
+ if (responses.verificationCost <= -1) reasoning.push('High verification cost needs extra attention');
592
+ if (responses.domainComplexity <= -1) reasoning.push('Domain complexity may cause AI errors');
593
+ if (responses.aiTrackRecord <= -1) reasoning.push('AI track record suggests caution');
594
+
595
+ if (reasoning.length === 0) {
596
+ reasoning.push('Standard risk profile - proceed with appropriate level');
597
+ }
598
+
599
+ return {
600
+ level,
601
+ confidence,
602
+ responses,
603
+ reasoning,
604
+ source,
605
+ ece: calibration.ece,
606
+ sampleCount: calibration.samples.length,
607
+ };
608
+ }
609
+
610
+ function displayResult(result: LevelResult): void {
611
+ const levelDescriptions: Record<number, { name: string; trust: string; verify: string }> = {
612
+ 5: { name: 'Full Automation', trust: '95%', verify: 'Final review only' },
613
+ 4: { name: 'High Trust', trust: '80%', verify: 'Spot check' },
614
+ 3: { name: 'Balanced', trust: '60%', verify: 'Review key outputs' },
615
+ 2: { name: 'AI-Augmented', trust: '40%', verify: 'Review every change' },
616
+ 1: { name: 'Human-Led', trust: '20%', verify: 'Review every line' },
617
+ 0: { name: 'Manual Only', trust: '0%', verify: 'No AI assistance' },
618
+ };
619
+
620
+ const desc = levelDescriptions[result.level];
621
+
622
+ console.log('');
623
+ console.log(chalk.bold.cyan('═'.repeat(60)));
624
+ console.log('');
625
+
626
+ // Level display with color coding
627
+ const levelColor = result.level >= 4 ? chalk.green : result.level >= 2 ? chalk.yellow : chalk.red;
628
+ console.log(` ${chalk.bold('RECOMMENDED LEVEL:')} ${levelColor.bold(`${result.level} - ${desc.name}`)}`);
629
+ console.log('');
630
+ console.log(` ${chalk.gray('Trust:')} ${desc.trust}`);
631
+ console.log(` ${chalk.gray('Verify:')} ${desc.verify}`);
632
+ console.log(` ${chalk.gray('Confidence:')} ${(result.confidence * 100).toFixed(0)}%`);
633
+ console.log('');
634
+
635
+ // Model info
636
+ if (result.source === 'ml') {
637
+ console.log(chalk.green(` ✓ Using ML model with ${result.sampleCount || 0} calibration samples`));
638
+ if (result.ece !== undefined && result.ece > 0) {
639
+ console.log(chalk.gray(` ECE: ${(result.ece * 100).toFixed(1)}%`));
640
+ }
641
+ } else {
642
+ console.log(chalk.yellow(` ⚠ Fallback mode (no git history available)`));
643
+ }
644
+
645
+ console.log('');
646
+ console.log(chalk.bold.yellow(' REASONING:'));
647
+ for (const reason of result.reasoning) {
648
+ console.log(chalk.yellow(` • ${reason}`));
649
+ }
650
+
651
+ console.log('');
652
+ console.log(chalk.bold.cyan('═'.repeat(60)));
653
+ console.log('');
654
+ console.log(chalk.gray(` After your work, run:`));
655
+ console.log(chalk.white(` vibe-check --score --calibrate ${result.level}`));
656
+ console.log('');
657
+ }
658
+ ```
659
+
660
+ **Validation:** `npm run build && npm run dev level --quick --json`
661
+
662
+ ---
663
+
664
+ ## Implementation Order
665
+
666
+ **CRITICAL: Sequence matters. Do not reorder.**
667
+
668
+ | Step | Action | Validation | Rollback |
669
+ |------|--------|------------|----------|
670
+ | 0 | Run baseline tests | `npm test` passes | N/A |
671
+ | 1 | Add `partialFit` to ordered-logistic.ts | `npm run build` | `git checkout src/recommend/ordered-logistic.ts` |
672
+ | 2 | Add `inferTrueLevel` to ece.ts | `npm run build` | `git checkout src/calibration/ece.ts` |
673
+ | 3 | Replace storage.ts with learning loop | `npm run build` | `git checkout src/calibration/storage.ts` |
674
+ | 4 | Update calibration/index.ts exports | `npm run build` | `git checkout src/calibration/index.ts` |
675
+ | 5 | Update recommend/index.ts exports | `npm run build` | `git checkout src/recommend/index.ts` |
676
+ | 6 | Replace level.ts with ML version | `npm run build` | `git checkout src/commands/level.ts` |
677
+ | 7 | Full integration test | `npm test && npm run dev level --quick` | Revert all |
678
+ | 8 | Commit | `git commit` | N/A |
679
+
680
+ ---
681
+
682
+ ## Validation Strategy
683
+
684
+ ### Syntax Validation
685
+ ```bash
686
+ npm run build
687
+ # Expected: No TypeScript errors
688
+ ```
689
+
690
+ ### Unit Test Validation
691
+ ```bash
692
+ npm test
693
+ # Expected: All existing tests pass
694
+ ```
695
+
696
+ ### Integration Validation
697
+ ```bash
698
+ # Test ML model is used
699
+ npm run dev level --quick --json
700
+ # Expected: Output includes "source": "ml" if in git repo
701
+
702
+ # Test calibration triggers learning
703
+ npm run dev analyze --score --calibrate 3 --since "1 week ago"
704
+ # Check .vibe-check/calibration.json has updated weights after 10 samples
705
+
706
+ # Verify ECE is calculated
707
+ cat .vibe-check/calibration.json | grep '"ece"'
708
+ # Expected: ece value present
709
+ ```
710
+
711
+ ### Manual Validation: Learning Loop
712
+ ```bash
713
+ # Simulate 10 calibration samples
714
+ for i in {1..10}; do
715
+ npm run dev analyze --score --calibrate 3 --since "1 week ago" > /dev/null
716
+ done
717
+
718
+ # Check weights have changed from defaults
719
+ cat .vibe-check/calibration.json
720
+ # Expected: weights array differs from DEFAULT_MODEL.weights
721
+ # Expected: version is "2.1.0" (indicating ML-learned)
722
+ ```
723
+
724
+ ---
725
+
726
+ ## Rollback Procedure
727
+
728
+ **Time to rollback:** ~3 minutes
729
+
730
+ ### Full Rollback
731
+ ```bash
732
+ # Step 1: Reset all changed files
733
+ git checkout \
734
+ src/recommend/ordered-logistic.ts \
735
+ src/recommend/index.ts \
736
+ src/calibration/ece.ts \
737
+ src/calibration/storage.ts \
738
+ src/calibration/index.ts \
739
+ src/commands/level.ts
740
+
741
+ # Step 2: Rebuild
742
+ npm run build
743
+
744
+ # Step 3: Verify
745
+ npm test
746
+ ```
747
+
748
+ ### Partial Rollback (keep learning, revert level command)
749
+ ```bash
750
+ git checkout src/commands/level.ts
751
+ npm run build
752
+ ```
753
+
754
+ ---
755
+
756
+ ## Risk Assessment
757
+
758
+ ### Medium Risk: Learning Instability
759
+ - **What:** Weights could diverge with bad samples
760
+ - **Mitigation:** Start from DEFAULT_MODEL each retrain, multiple epochs, ordered thresholds enforcement
761
+ - **Detection:** Check weights are reasonable numbers (-10 to 10)
762
+ - **Recovery:** Delete .vibe-check/calibration.json to reset
763
+
764
+ ### Low Risk: Git Performance in Level Command
765
+ - **What:** Reading 30 days of history could be slow
766
+ - **Mitigation:** Only fetch if isGitRepo() and has commits
767
+ - **Detection:** `time npm run dev level --quick`
768
+ - **Recovery:** Reduce `--since` default or skip metrics
769
+
770
+ ### Low Risk: Backward Compatibility
771
+ - **What:** Old calibration.json files
772
+ - **Mitigation:** All fields are optional, defaults provided
773
+ - **Detection:** Load old file, check it works
774
+ - **Recovery:** Version field allows migration if needed
775
+
776
+ ---
777
+
778
+ ## Approval Checklist
779
+
780
+ **Human must verify before /implement:**
781
+
782
+ - [ ] Every file specified precisely (full content provided)
783
+ - [ ] All code complete (no placeholders)
784
+ - [ ] Validation commands provided
785
+ - [ ] Rollback procedure complete
786
+ - [ ] Implementation order is correct
787
+ - [ ] Risks identified and mitigated
788
+ - [ ] No breaking changes to existing functionality
789
+ - [ ] All 6 gaps addressed
790
+
791
+ ---
792
+
793
+ ## Progress Files
794
+
795
+ ### `feature-list.json`
796
+
797
+ ```json
798
+ {
799
+ "project": "vibe-check",
800
+ "version": "2.1.0",
801
+ "features": [
802
+ {
803
+ "id": "ml-learning-loop",
804
+ "name": "ML Learning Loop",
805
+ "description": "Complete implementation of model learning from calibration samples",
806
+ "status": "pending",
807
+ "passes": false,
808
+ "files": [
809
+ "src/recommend/ordered-logistic.ts",
810
+ "src/recommend/index.ts",
811
+ "src/calibration/ece.ts",
812
+ "src/calibration/storage.ts",
813
+ "src/calibration/index.ts",
814
+ "src/commands/level.ts"
815
+ ],
816
+ "validation": "npm run build && npm test && npm run dev level --quick --json",
817
+ "gaps_addressed": [
818
+ "Model Learning (partial_fit)",
819
+ "Feedback Loop (retrain trigger)",
820
+ "Level Uses ML",
821
+ "Metrics Integration",
822
+ "Outcome-Based Updates",
823
+ "Calibration Triggers Learning"
824
+ ]
825
+ }
826
+ ]
827
+ }
828
+ ```
829
+
830
+ ### `claude-progress.json`
831
+
832
+ ```json
833
+ {
834
+ "project": "vibe-check",
835
+ "current_state": {
836
+ "phase": "planning",
837
+ "working_on": "ML Learning Loop - Complete Implementation",
838
+ "next_steps": [
839
+ "Approve implementation plan",
840
+ "Run /implement",
841
+ "Verify learning loop works with 10 samples"
842
+ ],
843
+ "blockers": []
844
+ },
845
+ "sessions": [
846
+ {
847
+ "date": "2025-11-28",
848
+ "summary": "Created complete plan addressing all 6 ML learning gaps"
849
+ }
850
+ ]
851
+ }
852
+ ```
853
+
854
+ ---
855
+
856
+ ## Summary: Before vs After
857
+
858
+ ### Before (Current State)
859
+
860
+ ```
861
+ vibe-check level --quick
862
+
863
+ calculateBaseLevel(responses) // Simple: 3 + Q1 + Q2 + Q3 + Q4 + Q5
864
+
865
+ return level (no ML, no metrics, no learning)
866
+ ```
867
+
868
+ ```
869
+ vibe-check --calibrate 3
870
+
871
+ addSample(sample) // Store passively
872
+
873
+ // No learning ever happens
874
+ ```
875
+
876
+ ### After (This Plan)
877
+
878
+ ```
879
+ vibe-check level --quick
880
+
881
+ loadCalibration(repo) // Get learned weights
882
+ getCommits + getFileStats // Get actual git metrics
883
+ features = [questions..., metrics...]
884
+ predictWithConfidence(features, model) // Use ML
885
+
886
+ return level (ML-based, uses real metrics)
887
+ ```
888
+
889
+ ```
890
+ vibe-check --calibrate 3
891
+
892
+ addSample(sample)
893
+
894
+ if (samples % 10 === 0 || ece > 0.15):
895
+ retrain():
896
+ - inferTrueLevel from vibeScore
897
+ - batchPartialFit(model, samples)
898
+ - calculateECE()
899
+ - save updated weights
900
+
901
+ // Model learns and improves
902
+ ```
903
+
904
+ ---
905
+
906
+ ## Next Step
907
+
908
+ Once approved: `/implement ml-learning-loop-complete-plan-2025-11-28.md`