@boshu2/vibe-check 1.0.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/bundles/ml-learning-loop-complete-plan-2025-11-28.md +908 -0
- package/.agents/bundles/unified-vibe-system-plan-phase1-2025-11-28.md +962 -0
- package/.agents/bundles/unified-vibe-system-research-2025-11-28.md +1003 -0
- package/.agents/bundles/vibe-check-ecosystem-plan-2025-11-29.md +635 -0
- package/.agents/bundles/vibe-check-gamification-complete-2025-11-29.md +132 -0
- package/.agents/bundles/vibe-score-scientific-framework-2025-11-28.md +602 -0
- package/.vibe-check/calibration.json +38 -0
- package/.vibe-check/latest.json +114 -0
- package/CHANGELOG.md +47 -0
- package/CLAUDE.md +178 -0
- package/README.md +222 -7
- package/action.yml +270 -0
- package/dashboard/app.js +494 -0
- package/dashboard/index.html +235 -0
- package/dashboard/styles.css +647 -0
- package/dist/calibration/ece.d.ts +26 -0
- package/dist/calibration/ece.d.ts.map +1 -0
- package/dist/calibration/ece.js +93 -0
- package/dist/calibration/ece.js.map +1 -0
- package/dist/calibration/index.d.ts +3 -0
- package/dist/calibration/index.d.ts.map +1 -0
- package/dist/calibration/index.js +15 -0
- package/dist/calibration/index.js.map +1 -0
- package/dist/calibration/storage.d.ts +34 -0
- package/dist/calibration/storage.d.ts.map +1 -0
- package/dist/calibration/storage.js +188 -0
- package/dist/calibration/storage.js.map +1 -0
- package/dist/cli.js +31 -76
- package/dist/cli.js.map +1 -1
- package/dist/commands/analyze.d.ts +16 -0
- package/dist/commands/analyze.d.ts.map +1 -0
- package/dist/commands/analyze.js +256 -0
- package/dist/commands/analyze.js.map +1 -0
- package/dist/commands/index.d.ts +5 -0
- package/dist/commands/index.d.ts.map +1 -0
- package/dist/commands/index.js +13 -0
- package/dist/commands/index.js.map +1 -0
- package/dist/commands/init-hook.d.ts +3 -0
- package/dist/commands/init-hook.d.ts.map +1 -0
- package/dist/commands/init-hook.js +161 -0
- package/dist/commands/init-hook.js.map +1 -0
- package/dist/commands/level.d.ts +3 -0
- package/dist/commands/level.d.ts.map +1 -0
- package/dist/commands/level.js +277 -0
- package/dist/commands/level.js.map +1 -0
- package/dist/commands/profile.d.ts +4 -0
- package/dist/commands/profile.d.ts.map +1 -0
- package/dist/commands/profile.js +143 -0
- package/dist/commands/profile.js.map +1 -0
- package/dist/gamification/achievements.d.ts +15 -0
- package/dist/gamification/achievements.d.ts.map +1 -0
- package/dist/gamification/achievements.js +273 -0
- package/dist/gamification/achievements.js.map +1 -0
- package/dist/gamification/index.d.ts +8 -0
- package/dist/gamification/index.d.ts.map +1 -0
- package/dist/gamification/index.js +30 -0
- package/dist/gamification/index.js.map +1 -0
- package/dist/gamification/profile.d.ts +46 -0
- package/dist/gamification/profile.d.ts.map +1 -0
- package/dist/gamification/profile.js +272 -0
- package/dist/gamification/profile.js.map +1 -0
- package/dist/gamification/streaks.d.ts +26 -0
- package/dist/gamification/streaks.d.ts.map +1 -0
- package/dist/gamification/streaks.js +132 -0
- package/dist/gamification/streaks.js.map +1 -0
- package/dist/gamification/types.d.ts +111 -0
- package/dist/gamification/types.d.ts.map +1 -0
- package/dist/gamification/types.js +26 -0
- package/dist/gamification/types.js.map +1 -0
- package/dist/gamification/xp.d.ts +37 -0
- package/dist/gamification/xp.d.ts.map +1 -0
- package/dist/gamification/xp.js +115 -0
- package/dist/gamification/xp.js.map +1 -0
- package/dist/git.d.ts +11 -0
- package/dist/git.d.ts.map +1 -1
- package/dist/git.js +52 -0
- package/dist/git.js.map +1 -1
- package/dist/metrics/code-stability.d.ts +13 -0
- package/dist/metrics/code-stability.d.ts.map +1 -0
- package/dist/metrics/code-stability.js +74 -0
- package/dist/metrics/code-stability.js.map +1 -0
- package/dist/metrics/file-churn.d.ts +8 -0
- package/dist/metrics/file-churn.d.ts.map +1 -0
- package/dist/metrics/file-churn.js +75 -0
- package/dist/metrics/file-churn.js.map +1 -0
- package/dist/metrics/time-spiral.d.ts +8 -0
- package/dist/metrics/time-spiral.d.ts.map +1 -0
- package/dist/metrics/time-spiral.js +69 -0
- package/dist/metrics/time-spiral.js.map +1 -0
- package/dist/metrics/velocity-anomaly.d.ts +13 -0
- package/dist/metrics/velocity-anomaly.d.ts.map +1 -0
- package/dist/metrics/velocity-anomaly.js +67 -0
- package/dist/metrics/velocity-anomaly.js.map +1 -0
- package/dist/output/index.d.ts +6 -3
- package/dist/output/index.d.ts.map +1 -1
- package/dist/output/index.js +4 -3
- package/dist/output/index.js.map +1 -1
- package/dist/output/json.d.ts +2 -2
- package/dist/output/json.d.ts.map +1 -1
- package/dist/output/json.js +54 -0
- package/dist/output/json.js.map +1 -1
- package/dist/output/markdown.d.ts +2 -2
- package/dist/output/markdown.d.ts.map +1 -1
- package/dist/output/markdown.js +34 -1
- package/dist/output/markdown.js.map +1 -1
- package/dist/output/terminal.d.ts +6 -2
- package/dist/output/terminal.d.ts.map +1 -1
- package/dist/output/terminal.js +131 -3
- package/dist/output/terminal.js.map +1 -1
- package/dist/recommend/index.d.ts +3 -0
- package/dist/recommend/index.d.ts.map +1 -0
- package/dist/recommend/index.js +14 -0
- package/dist/recommend/index.js.map +1 -0
- package/dist/recommend/ordered-logistic.d.ts +49 -0
- package/dist/recommend/ordered-logistic.d.ts.map +1 -0
- package/dist/recommend/ordered-logistic.js +153 -0
- package/dist/recommend/ordered-logistic.js.map +1 -0
- package/dist/recommend/questions.d.ts +19 -0
- package/dist/recommend/questions.d.ts.map +1 -0
- package/dist/recommend/questions.js +73 -0
- package/dist/recommend/questions.js.map +1 -0
- package/dist/score/index.d.ts +21 -0
- package/dist/score/index.d.ts.map +1 -0
- package/dist/score/index.js +48 -0
- package/dist/score/index.js.map +1 -0
- package/dist/score/weights.d.ts +16 -0
- package/dist/score/weights.d.ts.map +1 -0
- package/dist/score/weights.js +28 -0
- package/dist/score/weights.js.map +1 -0
- package/dist/types.d.ts +83 -0
- package/dist/types.d.ts.map +1 -1
- package/hooks/pre-push +103 -0
- package/package.json +10 -9
|
@@ -0,0 +1,908 @@
|
|
|
1
|
+
# ML Learning Loop: Complete Implementation Plan
|
|
2
|
+
|
|
3
|
+
**Type:** Plan
|
|
4
|
+
**Created:** 2025-11-28
|
|
5
|
+
**Depends On:** Gap analysis from current session
|
|
6
|
+
**Loop:** Middle (bridges research to implementation)
|
|
7
|
+
**Tags:** vibe-check, ml-learning, calibration, ordered-logistic, partial-fit
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Overview
|
|
12
|
+
|
|
13
|
+
Implement the **complete ML learning loop** for vibe-check. This plan addresses ALL 6 identified gaps to make the model actually learn from experience.
|
|
14
|
+
|
|
15
|
+
**What changes:**
|
|
16
|
+
1. Add `partialFit()` to ordered-logistic.ts for incremental learning
|
|
17
|
+
2. Add `retrain()` trigger to storage.ts based on ECE threshold or sample count
|
|
18
|
+
3. Modify `level` command to use ML model + actual metrics (not additive formula)
|
|
19
|
+
4. Wire `--calibrate` to trigger learning after adding sample
|
|
20
|
+
5. Add outcome inference from vibe score to "true" level
|
|
21
|
+
6. Update model weights in calibration.json after learning
|
|
22
|
+
|
|
23
|
+
**What doesn't change:**
|
|
24
|
+
- Existing 4 semantic-free metrics
|
|
25
|
+
- CLI interface
|
|
26
|
+
- ECE calculation formula
|
|
27
|
+
- Storage file format (adds fields, backward compatible)
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## The 6 Gaps Addressed
|
|
32
|
+
|
|
33
|
+
| Gap | Solution | Files Modified |
|
|
34
|
+
|-----|----------|----------------|
|
|
35
|
+
| Model Learning | Add `partialFit()` | `ordered-logistic.ts` |
|
|
36
|
+
| Feedback Loop | Add `retrain()` trigger | `storage.ts` |
|
|
37
|
+
| Level Uses ML | Replace `calculateBaseLevel` with `predict` | `level.ts` |
|
|
38
|
+
| Metrics Integration | Fetch recent git metrics in `level` | `level.ts` |
|
|
39
|
+
| Outcome-Based Updates | Infer "true" level from score | `ece.ts`, `storage.ts` |
|
|
40
|
+
| Calibration Triggers Learning | Call `retrain()` after `addSample` | `storage.ts` |
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## PDC Strategy
|
|
45
|
+
|
|
46
|
+
### Prevent
|
|
47
|
+
- [x] Read all existing code (completed above)
|
|
48
|
+
- [ ] Run `npm test` before starting
|
|
49
|
+
- [ ] Commit after each file modification
|
|
50
|
+
|
|
51
|
+
### Detect
|
|
52
|
+
- [ ] `npm run build` after each TypeScript change
|
|
53
|
+
- [ ] `npm test` after completing each gap
|
|
54
|
+
- [ ] Manual test: `vibe-check level --quick` should use ML
|
|
55
|
+
|
|
56
|
+
### Correct
|
|
57
|
+
- [ ] Git revert individual commits if issues found
|
|
58
|
+
- [ ] Each function is independent - can revert selectively
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Files to Modify
|
|
63
|
+
|
|
64
|
+
### 1. `src/recommend/ordered-logistic.ts` (ADD `partialFit`)
|
|
65
|
+
|
|
66
|
+
**Purpose:** Enable incremental learning from calibration samples
|
|
67
|
+
|
|
68
|
+
**Current:** Lines 1-113 (prediction only, no learning)
|
|
69
|
+
|
|
70
|
+
**Add after line 112 (before closing):**
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
/**
|
|
74
|
+
* Single-step stochastic gradient descent update.
|
|
75
|
+
* Updates weights based on one sample's prediction error.
|
|
76
|
+
*
|
|
77
|
+
* For ordered logistic regression:
|
|
78
|
+
* - We minimize negative log-likelihood
|
|
79
|
+
* - Gradient for weight[j] = (p_k - y_k) * x_j summed over cutpoints
|
|
80
|
+
*
|
|
81
|
+
* Learning rate decays: lr = initialLr / (1 + decay * n)
|
|
82
|
+
*/
|
|
83
|
+
export function partialFit(
|
|
84
|
+
model: ModelState,
|
|
85
|
+
features: number[],
|
|
86
|
+
trueLevel: number,
|
|
87
|
+
learningRate: number = 0.01,
|
|
88
|
+
sampleCount: number = 1
|
|
89
|
+
): ModelState {
|
|
90
|
+
const effectiveLr = learningRate / (1 + 0.001 * sampleCount);
|
|
91
|
+
|
|
92
|
+
// Get current predictions
|
|
93
|
+
const probs = predictProba(features, model);
|
|
94
|
+
|
|
95
|
+
// Create one-hot target
|
|
96
|
+
const target = new Array(N_LEVELS).fill(0);
|
|
97
|
+
target[Math.min(Math.max(0, Math.round(trueLevel)), N_LEVELS - 1)] = 1;
|
|
98
|
+
|
|
99
|
+
// Gradient for weights: dL/dw_j = sum_k (p_k - y_k) * x_j
|
|
100
|
+
const newWeights = [...model.weights];
|
|
101
|
+
for (let j = 0; j < features.length && j < newWeights.length; j++) {
|
|
102
|
+
let gradient = 0;
|
|
103
|
+
for (let k = 0; k < N_LEVELS; k++) {
|
|
104
|
+
gradient += (probs[k] - target[k]) * features[j];
|
|
105
|
+
}
|
|
106
|
+
newWeights[j] -= effectiveLr * gradient;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Gradient for thresholds: dL/dt_k = p_k - cumTarget_k
|
|
110
|
+
const newThresholds = [...model.thresholds];
|
|
111
|
+
let cumTarget = 0;
|
|
112
|
+
for (let k = 0; k < model.thresholds.length; k++) {
|
|
113
|
+
cumTarget += target[k];
|
|
114
|
+
const cumProb = probs.slice(0, k + 1).reduce((a, b) => a + b, 0);
|
|
115
|
+
const gradient = cumProb - cumTarget;
|
|
116
|
+
newThresholds[k] -= effectiveLr * gradient;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Ensure thresholds remain ordered
|
|
120
|
+
for (let i = 1; i < newThresholds.length; i++) {
|
|
121
|
+
if (newThresholds[i] <= newThresholds[i - 1]) {
|
|
122
|
+
newThresholds[i] = newThresholds[i - 1] + 0.1;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
weights: newWeights,
|
|
128
|
+
thresholds: newThresholds,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Batch partial fit - applies partialFit to multiple samples.
|
|
134
|
+
* Processes samples in order, accumulating updates.
|
|
135
|
+
*/
|
|
136
|
+
export function batchPartialFit(
|
|
137
|
+
model: ModelState,
|
|
138
|
+
samples: Array<{ features: number[]; trueLevel: number }>,
|
|
139
|
+
learningRate: number = 0.01
|
|
140
|
+
): ModelState {
|
|
141
|
+
let current = model;
|
|
142
|
+
for (let i = 0; i < samples.length; i++) {
|
|
143
|
+
current = partialFit(
|
|
144
|
+
current,
|
|
145
|
+
samples[i].features,
|
|
146
|
+
samples[i].trueLevel,
|
|
147
|
+
learningRate,
|
|
148
|
+
i + 1
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
return current;
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Validation:** `npm run build`
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
### 2. `src/calibration/ece.ts` (ADD `inferTrueLevel`)
|
|
160
|
+
|
|
161
|
+
**Purpose:** Infer the "true" level based on actual vibe score outcome
|
|
162
|
+
|
|
163
|
+
**Add after line 72:**
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
/**
|
|
167
|
+
* Infer the "true" vibe level from an actual vibe score.
|
|
168
|
+
* This is used to generate training labels for the model.
|
|
169
|
+
*
|
|
170
|
+
* Maps score ranges to levels:
|
|
171
|
+
* - 0.90-1.00 → 5 (Elite flow)
|
|
172
|
+
* - 0.80-0.90 → 4 (High flow)
|
|
173
|
+
* - 0.65-0.80 → 3 (Balanced)
|
|
174
|
+
* - 0.50-0.65 → 2 (AI-Augmented)
|
|
175
|
+
* - 0.30-0.50 → 1 (Human-Led)
|
|
176
|
+
* - 0.00-0.30 → 0 (Manual)
|
|
177
|
+
*/
|
|
178
|
+
export function inferTrueLevel(vibeScore: number): 0 | 1 | 2 | 3 | 4 | 5 {
|
|
179
|
+
if (vibeScore >= 0.90) return 5;
|
|
180
|
+
if (vibeScore >= 0.80) return 4;
|
|
181
|
+
if (vibeScore >= 0.65) return 3;
|
|
182
|
+
if (vibeScore >= 0.50) return 2;
|
|
183
|
+
if (vibeScore >= 0.30) return 1;
|
|
184
|
+
return 0;
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
**Update exports in `src/calibration/index.ts`:**
|
|
189
|
+
|
|
190
|
+
```typescript
|
|
191
|
+
export { loadCalibration, saveCalibration, addSample, getCalibrationPath } from './storage';
|
|
192
|
+
export { calculateECE, assessOutcome, inferTrueLevel } from './ece';
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
**Validation:** `npm run build`
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
### 3. `src/calibration/storage.ts` (ADD learning loop)
|
|
200
|
+
|
|
201
|
+
**Purpose:** Trigger retraining when samples accumulate or ECE degrades
|
|
202
|
+
|
|
203
|
+
**Replace entire file (lines 1-71):**
|
|
204
|
+
|
|
205
|
+
```typescript
|
|
206
|
+
import * as fs from 'fs';
|
|
207
|
+
import * as path from 'path';
|
|
208
|
+
import { CalibrationState, CalibrationSample } from '../types';
|
|
209
|
+
import { DEFAULT_MODEL, partialFit, batchPartialFit, ModelState } from '../recommend/ordered-logistic';
|
|
210
|
+
import { calculateECE, inferTrueLevel } from './ece';
|
|
211
|
+
|
|
212
|
+
const CALIBRATION_DIR = '.vibe-check';
|
|
213
|
+
const CALIBRATION_FILE = 'calibration.json';
|
|
214
|
+
|
|
215
|
+
// Retraining triggers
|
|
216
|
+
const RETRAIN_SAMPLE_INTERVAL = 10; // Retrain every N samples
|
|
217
|
+
const RETRAIN_ECE_THRESHOLD = 0.15; // Retrain if ECE exceeds this
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Get calibration file path for a repository.
|
|
221
|
+
*/
|
|
222
|
+
export function getCalibrationPath(repoPath: string): string {
|
|
223
|
+
return path.join(repoPath, CALIBRATION_DIR, CALIBRATION_FILE);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Load calibration state from disk.
|
|
228
|
+
*/
|
|
229
|
+
export function loadCalibration(repoPath: string): CalibrationState {
|
|
230
|
+
const filePath = getCalibrationPath(repoPath);
|
|
231
|
+
|
|
232
|
+
if (fs.existsSync(filePath)) {
|
|
233
|
+
try {
|
|
234
|
+
const data = fs.readFileSync(filePath, 'utf-8');
|
|
235
|
+
const state = JSON.parse(data);
|
|
236
|
+
// Ensure dates are Date objects
|
|
237
|
+
state.lastUpdated = new Date(state.lastUpdated);
|
|
238
|
+
state.samples = state.samples.map((s: CalibrationSample) => ({
|
|
239
|
+
...s,
|
|
240
|
+
timestamp: new Date(s.timestamp),
|
|
241
|
+
}));
|
|
242
|
+
return state;
|
|
243
|
+
} catch {
|
|
244
|
+
return defaultCalibrationState();
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return defaultCalibrationState();
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Save calibration state to disk.
|
|
253
|
+
*/
|
|
254
|
+
export function saveCalibration(repoPath: string, state: CalibrationState): void {
|
|
255
|
+
const dirPath = path.join(repoPath, CALIBRATION_DIR);
|
|
256
|
+
const filePath = getCalibrationPath(repoPath);
|
|
257
|
+
|
|
258
|
+
if (!fs.existsSync(dirPath)) {
|
|
259
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
fs.writeFileSync(filePath, JSON.stringify(state, null, 2));
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Add a calibration sample and potentially trigger retraining.
|
|
267
|
+
*
|
|
268
|
+
* Retraining is triggered when:
|
|
269
|
+
* 1. Sample count is a multiple of RETRAIN_SAMPLE_INTERVAL, OR
|
|
270
|
+
* 2. ECE exceeds RETRAIN_ECE_THRESHOLD
|
|
271
|
+
*
|
|
272
|
+
* Returns updated state with potentially new weights.
|
|
273
|
+
*/
|
|
274
|
+
export function addSample(
|
|
275
|
+
repoPath: string,
|
|
276
|
+
sample: CalibrationSample
|
|
277
|
+
): CalibrationState {
|
|
278
|
+
const state = loadCalibration(repoPath);
|
|
279
|
+
state.samples.push(sample);
|
|
280
|
+
state.lastUpdated = new Date();
|
|
281
|
+
|
|
282
|
+
// Check if retraining is needed
|
|
283
|
+
const shouldRetrain =
|
|
284
|
+
state.samples.length % RETRAIN_SAMPLE_INTERVAL === 0 ||
|
|
285
|
+
state.ece > RETRAIN_ECE_THRESHOLD;
|
|
286
|
+
|
|
287
|
+
if (shouldRetrain && state.samples.length >= 5) {
|
|
288
|
+
const updatedState = retrain(state);
|
|
289
|
+
saveCalibration(repoPath, updatedState);
|
|
290
|
+
return updatedState;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Just save without retraining
|
|
294
|
+
saveCalibration(repoPath, state);
|
|
295
|
+
return state;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Retrain the model using all accumulated samples.
|
|
300
|
+
*
|
|
301
|
+
* Uses batch partial fit with inferred true levels from vibe scores.
|
|
302
|
+
*/
|
|
303
|
+
export function retrain(state: CalibrationState): CalibrationState {
|
|
304
|
+
if (state.samples.length < 5) {
|
|
305
|
+
return state; // Not enough data
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Prepare training data: use vibeScore to infer "true" level
|
|
309
|
+
const trainingData = state.samples.map((sample) => ({
|
|
310
|
+
features: sample.features,
|
|
311
|
+
trueLevel: inferTrueLevel(sample.vibeScore),
|
|
312
|
+
}));
|
|
313
|
+
|
|
314
|
+
// Start from default model (or could start from current weights)
|
|
315
|
+
const initialModel: ModelState = {
|
|
316
|
+
weights: [...DEFAULT_MODEL.weights],
|
|
317
|
+
thresholds: [...DEFAULT_MODEL.thresholds],
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
// Train with multiple epochs for better convergence
|
|
321
|
+
let model = initialModel;
|
|
322
|
+
const epochs = Math.min(10, Math.ceil(50 / state.samples.length));
|
|
323
|
+
for (let epoch = 0; epoch < epochs; epoch++) {
|
|
324
|
+
model = batchPartialFit(model, trainingData, 0.05);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Calculate new ECE
|
|
328
|
+
const newEce = calculateECE(state.samples);
|
|
329
|
+
|
|
330
|
+
return {
|
|
331
|
+
...state,
|
|
332
|
+
weights: model.weights,
|
|
333
|
+
thresholds: model.thresholds,
|
|
334
|
+
ece: newEce,
|
|
335
|
+
lastUpdated: new Date(),
|
|
336
|
+
version: '2.1.0', // Bump version to indicate ML-learned weights
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Force retraining (manual trigger).
|
|
342
|
+
*/
|
|
343
|
+
export function forceRetrain(repoPath: string): CalibrationState {
|
|
344
|
+
const state = loadCalibration(repoPath);
|
|
345
|
+
if (state.samples.length < 5) {
|
|
346
|
+
return state;
|
|
347
|
+
}
|
|
348
|
+
const updatedState = retrain(state);
|
|
349
|
+
saveCalibration(repoPath, updatedState);
|
|
350
|
+
return updatedState;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
function defaultCalibrationState(): CalibrationState {
|
|
354
|
+
return {
|
|
355
|
+
samples: [],
|
|
356
|
+
weights: DEFAULT_MODEL.weights,
|
|
357
|
+
thresholds: DEFAULT_MODEL.thresholds,
|
|
358
|
+
ece: 0,
|
|
359
|
+
lastUpdated: new Date(),
|
|
360
|
+
version: '2.0.0',
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
**Update exports in `src/calibration/index.ts`:**
|
|
366
|
+
|
|
367
|
+
```typescript
|
|
368
|
+
export { loadCalibration, saveCalibration, addSample, getCalibrationPath, retrain, forceRetrain } from './storage';
|
|
369
|
+
export { calculateECE, assessOutcome, inferTrueLevel } from './ece';
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
**Validation:** `npm run build`
|
|
373
|
+
|
|
374
|
+
---
|
|
375
|
+
|
|
376
|
+
### 4. `src/recommend/index.ts` (ADD export for partialFit)
|
|
377
|
+
|
|
378
|
+
**Purpose:** Export new learning functions
|
|
379
|
+
|
|
380
|
+
**Replace lines 1-2:**
|
|
381
|
+
|
|
382
|
+
```typescript
|
|
383
|
+
export { predictProba, predict, predictWithConfidence, DEFAULT_MODEL, ModelState, partialFit, batchPartialFit } from './ordered-logistic';
|
|
384
|
+
export { VIBE_QUESTIONS, calculateBaseLevel, Question } from './questions';
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
**Validation:** `npm run build`
|
|
388
|
+
|
|
389
|
+
---
|
|
390
|
+
|
|
391
|
+
### 5. `src/commands/level.ts` (USE ML model + real metrics)
|
|
392
|
+
|
|
393
|
+
**Purpose:** Replace additive formula with ML prediction using learned weights + actual metrics
|
|
394
|
+
|
|
395
|
+
**Replace entire file (lines 1-178):**
|
|
396
|
+
|
|
397
|
+
```typescript
|
|
398
|
+
import { Command } from 'commander';
|
|
399
|
+
import chalk from 'chalk';
|
|
400
|
+
import { QuestionResponses } from '../types';
|
|
401
|
+
import { VIBE_QUESTIONS, calculateBaseLevel } from '../recommend/questions';
|
|
402
|
+
import { predictWithConfidence, ModelState } from '../recommend';
|
|
403
|
+
import { loadCalibration } from '../calibration';
|
|
404
|
+
import { getCommits, isGitRepo, getFileStats } from '../git';
|
|
405
|
+
import { calculateFileChurn } from '../metrics/file-churn';
|
|
406
|
+
import { calculateTimeSpiral } from '../metrics/time-spiral';
|
|
407
|
+
import { calculateVelocityAnomaly } from '../metrics/velocity-anomaly';
|
|
408
|
+
import { calculateCodeStability } from '../metrics/code-stability';
|
|
409
|
+
|
|
410
|
+
interface LevelResult {
|
|
411
|
+
level: number;
|
|
412
|
+
confidence: number;
|
|
413
|
+
responses: QuestionResponses;
|
|
414
|
+
reasoning: string[];
|
|
415
|
+
source: 'ml' | 'fallback';
|
|
416
|
+
ece?: number;
|
|
417
|
+
sampleCount?: number;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
export function createLevelCommand(): Command {
|
|
421
|
+
const cmd = new Command('level')
|
|
422
|
+
.description('Classify vibe level for upcoming work (interactive)')
|
|
423
|
+
.option('--quick', 'Non-interactive mode with neutral defaults', false)
|
|
424
|
+
.option('--json', 'Output as JSON', false)
|
|
425
|
+
.option('-r, --repo <path>', 'Repository path for metrics', process.cwd())
|
|
426
|
+
.option('--since <date>', 'Git history start for metrics (default: 30 days ago)', '30 days ago')
|
|
427
|
+
.option(
|
|
428
|
+
'--answers <responses>',
|
|
429
|
+
'Pre-filled answers as JSON (e.g., \'{"reversibility":1,"blastRadius":0}\')'
|
|
430
|
+
)
|
|
431
|
+
.action(async (options) => {
|
|
432
|
+
await runLevel(options);
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
return cmd;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
async function runLevel(options: {
|
|
439
|
+
quick: boolean;
|
|
440
|
+
json: boolean;
|
|
441
|
+
repo: string;
|
|
442
|
+
since: string;
|
|
443
|
+
answers?: string;
|
|
444
|
+
}): Promise<void> {
|
|
445
|
+
let responses: QuestionResponses;
|
|
446
|
+
|
|
447
|
+
if (options.quick) {
|
|
448
|
+
// Non-interactive: use defaults or provided answers
|
|
449
|
+
responses = {
|
|
450
|
+
reversibility: 0,
|
|
451
|
+
blastRadius: 0,
|
|
452
|
+
verificationCost: 0,
|
|
453
|
+
domainComplexity: 0,
|
|
454
|
+
aiTrackRecord: 0,
|
|
455
|
+
};
|
|
456
|
+
|
|
457
|
+
if (options.answers) {
|
|
458
|
+
try {
|
|
459
|
+
const provided = JSON.parse(options.answers);
|
|
460
|
+
responses = { ...responses, ...provided };
|
|
461
|
+
} catch {
|
|
462
|
+
console.error(chalk.red('Invalid --answers JSON'));
|
|
463
|
+
process.exit(1);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
} else {
|
|
467
|
+
// Interactive mode
|
|
468
|
+
if (!process.stdin.isTTY) {
|
|
469
|
+
console.error(chalk.yellow('Non-interactive terminal detected. Use --quick for non-interactive mode.'));
|
|
470
|
+
process.exit(1);
|
|
471
|
+
}
|
|
472
|
+
responses = await askQuestions();
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const result = await classifyLevel(responses, options.repo, options.since);
|
|
476
|
+
|
|
477
|
+
if (options.json) {
|
|
478
|
+
console.log(JSON.stringify(result, null, 2));
|
|
479
|
+
} else {
|
|
480
|
+
displayResult(result);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
async function askQuestions(): Promise<QuestionResponses> {
|
|
485
|
+
// Dynamic import for enquirer
|
|
486
|
+
const Enquirer = (await import('enquirer')).default;
|
|
487
|
+
|
|
488
|
+
const responses: Partial<QuestionResponses> = {};
|
|
489
|
+
|
|
490
|
+
console.log('');
|
|
491
|
+
console.log(chalk.bold.cyan('═'.repeat(60)));
|
|
492
|
+
console.log(chalk.bold.cyan(' VIBE LEVEL CLASSIFICATION'));
|
|
493
|
+
console.log(chalk.bold.cyan('═'.repeat(60)));
|
|
494
|
+
console.log('');
|
|
495
|
+
console.log(chalk.gray('Answer 5 questions to determine the appropriate vibe level.'));
|
|
496
|
+
console.log(chalk.gray('Use ↑/↓ arrows to select, Enter to confirm.'));
|
|
497
|
+
console.log('');
|
|
498
|
+
|
|
499
|
+
for (const question of VIBE_QUESTIONS) {
|
|
500
|
+
const answer = await Enquirer.prompt<{ answer: string }>({
|
|
501
|
+
type: 'select',
|
|
502
|
+
name: 'answer',
|
|
503
|
+
message: question.text,
|
|
504
|
+
choices: question.options.map((opt) => ({
|
|
505
|
+
name: opt.label,
|
|
506
|
+
message: `${opt.label} ${chalk.gray('- ' + opt.description)}`,
|
|
507
|
+
value: String(opt.value),
|
|
508
|
+
})),
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
const selected = question.options.find((o) => o.label === answer.answer);
|
|
512
|
+
responses[question.id] = (selected?.value ?? 0) as -2 | -1 | 0 | 1;
|
|
513
|
+
console.log('');
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
return responses as QuestionResponses;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
async function classifyLevel(
|
|
520
|
+
responses: QuestionResponses,
|
|
521
|
+
repoPath: string,
|
|
522
|
+
since: string
|
|
523
|
+
): Promise<LevelResult> {
|
|
524
|
+
// Try to get real metrics from git history
|
|
525
|
+
let metricsFeatures = [0.7, 0.7, 0.7, 0.7]; // Defaults if no git history
|
|
526
|
+
let source: 'ml' | 'fallback' = 'fallback';
|
|
527
|
+
|
|
528
|
+
try {
|
|
529
|
+
if (await isGitRepo(repoPath)) {
|
|
530
|
+
const commits = await getCommits(repoPath, since);
|
|
531
|
+
|
|
532
|
+
if (commits.length >= 3) {
|
|
533
|
+
const fileStats = await getFileStats(repoPath, since);
|
|
534
|
+
|
|
535
|
+
const fileChurn = calculateFileChurn(commits, fileStats.filesPerCommit);
|
|
536
|
+
const timeSpiral = calculateTimeSpiral(commits);
|
|
537
|
+
const velocityAnomaly = calculateVelocityAnomaly(commits);
|
|
538
|
+
const codeStability = calculateCodeStability(commits, fileStats.lineStats);
|
|
539
|
+
|
|
540
|
+
metricsFeatures = [
|
|
541
|
+
fileChurn.value / 100,
|
|
542
|
+
timeSpiral.value / 100,
|
|
543
|
+
velocityAnomaly.value / 100,
|
|
544
|
+
codeStability.value / 100,
|
|
545
|
+
];
|
|
546
|
+
source = 'ml';
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
} catch {
|
|
550
|
+
// Fall back to defaults if git fails
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// Load calibration state (contains learned weights)
|
|
554
|
+
const calibration = loadCalibration(repoPath);
|
|
555
|
+
|
|
556
|
+
// Build full feature vector: 5 questions + 4 metrics
|
|
557
|
+
const features = [
|
|
558
|
+
responses.reversibility,
|
|
559
|
+
responses.blastRadius,
|
|
560
|
+
responses.verificationCost,
|
|
561
|
+
responses.domainComplexity,
|
|
562
|
+
responses.aiTrackRecord,
|
|
563
|
+
...metricsFeatures,
|
|
564
|
+
];
|
|
565
|
+
|
|
566
|
+
// Use ML model with learned weights
|
|
567
|
+
const model: ModelState = {
|
|
568
|
+
weights: calibration.weights,
|
|
569
|
+
thresholds: calibration.thresholds,
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
const prediction = predictWithConfidence(features, model);
|
|
573
|
+
|
|
574
|
+
// Use ML prediction (NOT additive formula)
|
|
575
|
+
const level = prediction.level;
|
|
576
|
+
const confidence = prediction.confidence;
|
|
577
|
+
|
|
578
|
+
// Build reasoning
|
|
579
|
+
const reasoning: string[] = [];
|
|
580
|
+
|
|
581
|
+
if (source === 'ml') {
|
|
582
|
+
reasoning.push(`Based on ${since} git history + your answers`);
|
|
583
|
+
if (metricsFeatures[0] < 0.7) reasoning.push('File churn detected - code needed rework');
|
|
584
|
+
if (metricsFeatures[1] < 0.7) reasoning.push('Time spirals detected - rapid fix commits');
|
|
585
|
+
} else {
|
|
586
|
+
reasoning.push('No git history available - using question answers only');
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
if (responses.reversibility <= -1) reasoning.push('Low reversibility requires careful review');
|
|
590
|
+
if (responses.blastRadius <= -1) reasoning.push('Wide blast radius increases risk');
|
|
591
|
+
if (responses.verificationCost <= -1) reasoning.push('High verification cost needs extra attention');
|
|
592
|
+
if (responses.domainComplexity <= -1) reasoning.push('Domain complexity may cause AI errors');
|
|
593
|
+
if (responses.aiTrackRecord <= -1) reasoning.push('AI track record suggests caution');
|
|
594
|
+
|
|
595
|
+
if (reasoning.length === 0) {
|
|
596
|
+
reasoning.push('Standard risk profile - proceed with appropriate level');
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
return {
|
|
600
|
+
level,
|
|
601
|
+
confidence,
|
|
602
|
+
responses,
|
|
603
|
+
reasoning,
|
|
604
|
+
source,
|
|
605
|
+
ece: calibration.ece,
|
|
606
|
+
sampleCount: calibration.samples.length,
|
|
607
|
+
};
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
function displayResult(result: LevelResult): void {
|
|
611
|
+
const levelDescriptions: Record<number, { name: string; trust: string; verify: string }> = {
|
|
612
|
+
5: { name: 'Full Automation', trust: '95%', verify: 'Final review only' },
|
|
613
|
+
4: { name: 'High Trust', trust: '80%', verify: 'Spot check' },
|
|
614
|
+
3: { name: 'Balanced', trust: '60%', verify: 'Review key outputs' },
|
|
615
|
+
2: { name: 'AI-Augmented', trust: '40%', verify: 'Review every change' },
|
|
616
|
+
1: { name: 'Human-Led', trust: '20%', verify: 'Review every line' },
|
|
617
|
+
0: { name: 'Manual Only', trust: '0%', verify: 'No AI assistance' },
|
|
618
|
+
};
|
|
619
|
+
|
|
620
|
+
const desc = levelDescriptions[result.level];
|
|
621
|
+
|
|
622
|
+
console.log('');
|
|
623
|
+
console.log(chalk.bold.cyan('═'.repeat(60)));
|
|
624
|
+
console.log('');
|
|
625
|
+
|
|
626
|
+
// Level display with color coding
|
|
627
|
+
const levelColor = result.level >= 4 ? chalk.green : result.level >= 2 ? chalk.yellow : chalk.red;
|
|
628
|
+
console.log(` ${chalk.bold('RECOMMENDED LEVEL:')} ${levelColor.bold(`${result.level} - ${desc.name}`)}`);
|
|
629
|
+
console.log('');
|
|
630
|
+
console.log(` ${chalk.gray('Trust:')} ${desc.trust}`);
|
|
631
|
+
console.log(` ${chalk.gray('Verify:')} ${desc.verify}`);
|
|
632
|
+
console.log(` ${chalk.gray('Confidence:')} ${(result.confidence * 100).toFixed(0)}%`);
|
|
633
|
+
console.log('');
|
|
634
|
+
|
|
635
|
+
// Model info
|
|
636
|
+
if (result.source === 'ml') {
|
|
637
|
+
console.log(chalk.green(` ✓ Using ML model with ${result.sampleCount || 0} calibration samples`));
|
|
638
|
+
if (result.ece !== undefined && result.ece > 0) {
|
|
639
|
+
console.log(chalk.gray(` ECE: ${(result.ece * 100).toFixed(1)}%`));
|
|
640
|
+
}
|
|
641
|
+
} else {
|
|
642
|
+
console.log(chalk.yellow(` ⚠ Fallback mode (no git history available)`));
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
console.log('');
|
|
646
|
+
console.log(chalk.bold.yellow(' REASONING:'));
|
|
647
|
+
for (const reason of result.reasoning) {
|
|
648
|
+
console.log(chalk.yellow(` • ${reason}`));
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
console.log('');
|
|
652
|
+
console.log(chalk.bold.cyan('═'.repeat(60)));
|
|
653
|
+
console.log('');
|
|
654
|
+
console.log(chalk.gray(` After your work, run:`));
|
|
655
|
+
console.log(chalk.white(` vibe-check --score --calibrate ${result.level}`));
|
|
656
|
+
console.log('');
|
|
657
|
+
}
|
|
658
|
+
```
|
|
659
|
+
|
|
660
|
+
**Validation:** `npm run build && npm run dev level --quick --json`
|
|
661
|
+
|
|
662
|
+
---
|
|
663
|
+
|
|
664
|
+
## Implementation Order
|
|
665
|
+
|
|
666
|
+
**CRITICAL: Sequence matters. Do not reorder.**
|
|
667
|
+
|
|
668
|
+
| Step | Action | Validation | Rollback |
|
|
669
|
+
|------|--------|------------|----------|
|
|
670
|
+
| 0 | Run baseline tests | `npm test` passes | N/A |
|
|
671
|
+
| 1 | Add `partialFit` to ordered-logistic.ts | `npm run build` | `git checkout src/recommend/ordered-logistic.ts` |
|
|
672
|
+
| 2 | Add `inferTrueLevel` to ece.ts | `npm run build` | `git checkout src/calibration/ece.ts` |
|
|
673
|
+
| 3 | Replace storage.ts with learning loop | `npm run build` | `git checkout src/calibration/storage.ts` |
|
|
674
|
+
| 4 | Update calibration/index.ts exports | `npm run build` | `git checkout src/calibration/index.ts` |
|
|
675
|
+
| 5 | Update recommend/index.ts exports | `npm run build` | `git checkout src/recommend/index.ts` |
|
|
676
|
+
| 6 | Replace level.ts with ML version | `npm run build` | `git checkout src/commands/level.ts` |
|
|
677
|
+
| 7 | Full integration test | `npm test && npm run dev level --quick` | Revert all |
|
|
678
|
+
| 8 | Commit | `git commit` | N/A |
|
|
679
|
+
|
|
680
|
+
---
|
|
681
|
+
|
|
682
|
+
## Validation Strategy
|
|
683
|
+
|
|
684
|
+
### Syntax Validation
|
|
685
|
+
```bash
|
|
686
|
+
npm run build
|
|
687
|
+
# Expected: No TypeScript errors
|
|
688
|
+
```
|
|
689
|
+
|
|
690
|
+
### Unit Test Validation
|
|
691
|
+
```bash
|
|
692
|
+
npm test
|
|
693
|
+
# Expected: All existing tests pass
|
|
694
|
+
```
|
|
695
|
+
|
|
696
|
+
### Integration Validation
|
|
697
|
+
```bash
|
|
698
|
+
# Test ML model is used
|
|
699
|
+
npm run dev level --quick --json
|
|
700
|
+
# Expected: Output includes "source": "ml" if in git repo
|
|
701
|
+
|
|
702
|
+
# Test calibration triggers learning
|
|
703
|
+
npm run dev analyze --score --calibrate 3 --since "1 week ago"
|
|
704
|
+
# Check .vibe-check/calibration.json has updated weights after 10 samples
|
|
705
|
+
|
|
706
|
+
# Verify ECE is calculated
|
|
707
|
+
cat .vibe-check/calibration.json | grep '"ece"'
|
|
708
|
+
# Expected: ece value present
|
|
709
|
+
```
|
|
710
|
+
|
|
711
|
+
### Manual Validation: Learning Loop
|
|
712
|
+
```bash
|
|
713
|
+
# Simulate 10 calibration samples
|
|
714
|
+
for i in {1..10}; do
|
|
715
|
+
npm run dev analyze --score --calibrate 3 --since "1 week ago" > /dev/null
|
|
716
|
+
done
|
|
717
|
+
|
|
718
|
+
# Check weights have changed from defaults
|
|
719
|
+
cat .vibe-check/calibration.json
|
|
720
|
+
# Expected: weights array differs from DEFAULT_MODEL.weights
|
|
721
|
+
# Expected: version is "2.1.0" (indicating ML-learned)
|
|
722
|
+
```
|
|
723
|
+
|
|
724
|
+
---
|
|
725
|
+
|
|
726
|
+
## Rollback Procedure
|
|
727
|
+
|
|
728
|
+
**Time to rollback:** ~3 minutes
|
|
729
|
+
|
|
730
|
+
### Full Rollback
|
|
731
|
+
```bash
|
|
732
|
+
# Step 1: Reset all changed files
|
|
733
|
+
git checkout \
|
|
734
|
+
src/recommend/ordered-logistic.ts \
|
|
735
|
+
src/recommend/index.ts \
|
|
736
|
+
src/calibration/ece.ts \
|
|
737
|
+
src/calibration/storage.ts \
|
|
738
|
+
src/calibration/index.ts \
|
|
739
|
+
src/commands/level.ts
|
|
740
|
+
|
|
741
|
+
# Step 2: Rebuild
|
|
742
|
+
npm run build
|
|
743
|
+
|
|
744
|
+
# Step 3: Verify
|
|
745
|
+
npm test
|
|
746
|
+
```
|
|
747
|
+
|
|
748
|
+
### Partial Rollback (keep learning, revert level command)
|
|
749
|
+
```bash
|
|
750
|
+
git checkout src/commands/level.ts
|
|
751
|
+
npm run build
|
|
752
|
+
```
|
|
753
|
+
|
|
754
|
+
---
|
|
755
|
+
|
|
756
|
+
## Risk Assessment
|
|
757
|
+
|
|
758
|
+
### Medium Risk: Learning Instability
|
|
759
|
+
- **What:** Weights could diverge with bad samples
|
|
760
|
+
- **Mitigation:** Start from DEFAULT_MODEL each retrain, multiple epochs, ordered thresholds enforcement
|
|
761
|
+
- **Detection:** Check weights are reasonable numbers (-10 to 10)
|
|
762
|
+
- **Recovery:** Delete .vibe-check/calibration.json to reset
|
|
763
|
+
|
|
764
|
+
### Low Risk: Git Performance in Level Command
|
|
765
|
+
- **What:** Reading 30 days of history could be slow
|
|
766
|
+
- **Mitigation:** Only fetch if isGitRepo() and has commits
|
|
767
|
+
- **Detection:** `time npm run dev level --quick`
|
|
768
|
+
- **Recovery:** Reduce `--since` default or skip metrics
|
|
769
|
+
|
|
770
|
+
### Low Risk: Backward Compatibility
|
|
771
|
+
- **What:** Old calibration.json files
|
|
772
|
+
- **Mitigation:** All fields are optional, defaults provided
|
|
773
|
+
- **Detection:** Load old file, check it works
|
|
774
|
+
- **Recovery:** Version field allows migration if needed
|
|
775
|
+
|
|
776
|
+
---
|
|
777
|
+
|
|
778
|
+
## Approval Checklist
|
|
779
|
+
|
|
780
|
+
**Human must verify before /implement:**
|
|
781
|
+
|
|
782
|
+
- [ ] Every file specified precisely (full content provided)
|
|
783
|
+
- [ ] All code complete (no placeholders)
|
|
784
|
+
- [ ] Validation commands provided
|
|
785
|
+
- [ ] Rollback procedure complete
|
|
786
|
+
- [ ] Implementation order is correct
|
|
787
|
+
- [ ] Risks identified and mitigated
|
|
788
|
+
- [ ] No breaking changes to existing functionality
|
|
789
|
+
- [ ] All 6 gaps addressed
|
|
790
|
+
|
|
791
|
+
---
|
|
792
|
+
|
|
793
|
+
## Progress Files
|
|
794
|
+
|
|
795
|
+
### `feature-list.json`
|
|
796
|
+
|
|
797
|
+
```json
|
|
798
|
+
{
|
|
799
|
+
"project": "vibe-check",
|
|
800
|
+
"version": "2.1.0",
|
|
801
|
+
"features": [
|
|
802
|
+
{
|
|
803
|
+
"id": "ml-learning-loop",
|
|
804
|
+
"name": "ML Learning Loop",
|
|
805
|
+
"description": "Complete implementation of model learning from calibration samples",
|
|
806
|
+
"status": "pending",
|
|
807
|
+
"passes": false,
|
|
808
|
+
"files": [
|
|
809
|
+
"src/recommend/ordered-logistic.ts",
|
|
810
|
+
"src/recommend/index.ts",
|
|
811
|
+
"src/calibration/ece.ts",
|
|
812
|
+
"src/calibration/storage.ts",
|
|
813
|
+
"src/calibration/index.ts",
|
|
814
|
+
"src/commands/level.ts"
|
|
815
|
+
],
|
|
816
|
+
"validation": "npm run build && npm test && npm run dev level --quick --json",
|
|
817
|
+
"gaps_addressed": [
|
|
818
|
+
"Model Learning (partial_fit)",
|
|
819
|
+
"Feedback Loop (retrain trigger)",
|
|
820
|
+
"Level Uses ML",
|
|
821
|
+
"Metrics Integration",
|
|
822
|
+
"Outcome-Based Updates",
|
|
823
|
+
"Calibration Triggers Learning"
|
|
824
|
+
]
|
|
825
|
+
}
|
|
826
|
+
]
|
|
827
|
+
}
|
|
828
|
+
```
|
|
829
|
+
|
|
830
|
+
### `claude-progress.json`
|
|
831
|
+
|
|
832
|
+
```json
|
|
833
|
+
{
|
|
834
|
+
"project": "vibe-check",
|
|
835
|
+
"current_state": {
|
|
836
|
+
"phase": "planning",
|
|
837
|
+
"working_on": "ML Learning Loop - Complete Implementation",
|
|
838
|
+
"next_steps": [
|
|
839
|
+
"Approve implementation plan",
|
|
840
|
+
"Run /implement",
|
|
841
|
+
"Verify learning loop works with 10 samples"
|
|
842
|
+
],
|
|
843
|
+
"blockers": []
|
|
844
|
+
},
|
|
845
|
+
"sessions": [
|
|
846
|
+
{
|
|
847
|
+
"date": "2025-11-28",
|
|
848
|
+
"summary": "Created complete plan addressing all 6 ML learning gaps"
|
|
849
|
+
}
|
|
850
|
+
]
|
|
851
|
+
}
|
|
852
|
+
```
|
|
853
|
+
|
|
854
|
+
---
|
|
855
|
+
|
|
856
|
+
## Summary: Before vs After
|
|
857
|
+
|
|
858
|
+
### Before (Current State)
|
|
859
|
+
|
|
860
|
+
```
|
|
861
|
+
vibe-check level --quick
|
|
862
|
+
↓
|
|
863
|
+
calculateBaseLevel(responses) // Simple: 3 + Q1 + Q2 + Q3 + Q4 + Q5
|
|
864
|
+
↓
|
|
865
|
+
return level (no ML, no metrics, no learning)
|
|
866
|
+
```
|
|
867
|
+
|
|
868
|
+
```
|
|
869
|
+
vibe-check --calibrate 3
|
|
870
|
+
↓
|
|
871
|
+
addSample(sample) // Store passively
|
|
872
|
+
↓
|
|
873
|
+
// No learning ever happens
|
|
874
|
+
```
|
|
875
|
+
|
|
876
|
+
### After (This Plan)
|
|
877
|
+
|
|
878
|
+
```
|
|
879
|
+
vibe-check level --quick
|
|
880
|
+
↓
|
|
881
|
+
loadCalibration(repo) // Get learned weights
|
|
882
|
+
getCommits + getFileStats // Get actual git metrics
|
|
883
|
+
features = [questions..., metrics...]
|
|
884
|
+
predictWithConfidence(features, model) // Use ML
|
|
885
|
+
↓
|
|
886
|
+
return level (ML-based, uses real metrics)
|
|
887
|
+
```
|
|
888
|
+
|
|
889
|
+
```
|
|
890
|
+
vibe-check --calibrate 3
|
|
891
|
+
↓
|
|
892
|
+
addSample(sample)
|
|
893
|
+
↓
|
|
894
|
+
if (samples % 10 === 0 || ece > 0.15):
|
|
895
|
+
retrain():
|
|
896
|
+
- inferTrueLevel from vibeScore
|
|
897
|
+
- batchPartialFit(model, samples)
|
|
898
|
+
- calculateECE()
|
|
899
|
+
- save updated weights
|
|
900
|
+
↓
|
|
901
|
+
// Model learns and improves
|
|
902
|
+
```
|
|
903
|
+
|
|
904
|
+
---
|
|
905
|
+
|
|
906
|
+
## Next Step
|
|
907
|
+
|
|
908
|
+
Once approved: `/implement ml-learning-loop-complete-plan-2025-11-28.md`
|