jfl 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"map.d.ts","sourceRoot":"","sources":["../../src/types/map.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,MAAM,YAAY,GACpB,iBAAiB,GAAG,eAAe,GACnC,cAAc,GAAG,gBAAgB,GAAG,aAAa,GACjD,iBAAiB,GAAG,mBAAmB,GACvC,eAAe,GAAG,qBAAqB,GAAG,sBAAsB,GAAG,oBAAoB,GACvF,cAAc,GACd,eAAe,GAAG,eAAe,GACjC,iBAAiB,GAAG,kBAAkB,GACtC,oBAAoB,GAAG,kBAAkB,GACzC,eAAe,GAAG,kBAAkB,GACpC,WAAW,GAAG,kBAAkB,GAChC,qBAAqB,GACrB,qBAAqB,GAAG,oBAAoB,GAC5C,gBAAgB,GAAG,gBAAgB,GACnC,eAAe,GAAG,eAAe,GAAG,aAAa,GAAG,cAAc,GAClE,uBAAuB,GAAG,0BAA0B,GAAG,gBAAgB,GACvE,gBAAgB,GAAG,aAAa,GAAG,eAAe,GAAG,YAAY,GACjE,oBAAoB,GAAG,0BAA0B,GACjD,mBAAmB,GAAG,wBAAwB,GAC9C,YAAY,GAAG,iBAAiB,GAChC,QAAQ,CAAA;AAEZ,MAAM,WAAW,WAAW;IAC1B,eAAe,EAAE,MAAM,CAAA;IACvB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IACpC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAA;IACrB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACvB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAA;IACV,EAAE,EAAE,MAAM,CAAA;IACV,IAAI,EAAE,YAAY,CAAA;IAClB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IAC7B,GAAG,CAAC,EAAE,MAAM,CAAA;CACb;AAED,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAA;IACV,QAAQ,EAAE,MAAM,CAAA;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAA;IAClB,SAAS,EAAE,KAAK,GAAG,WAAW,GAAG,MAAM,CAAA;IACvC,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,CAAC,EAAE,MAAM,CAAA;CACrB;AAED,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAA;AAEnD,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,SAAS,GAAG,SAAS,GAAG,UAAU,GAAG,QAAQ,CAAA;AAE/E,MAAM,MAAM,WAAW,GAAG,gBAAgB,GAAG,UAAU,GAAG,eAAe,CAAA;AAEzE,eAAO,MAAM,mBAAmB,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,SAAS,EAAE,SAAS,CAAC,CAsBjF,CAAA;AAED,eAAO,MAAM,gBAAgB,EAAE,MAAM,CAAC,SAAS,EAAE,SAAS,CAMzD,CAAA"}
1
+ {"version":3,"file":"map.d.ts","sourceRoot":"","sources":["../../src/types/map.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,MAAM,YAAY,GACpB,iBAAiB,GAAG,eAAe,GACnC,cAAc,GAAG,gBAAgB,GAAG,aAAa,GACjD,iBAAiB,GAAG,mBAAmB,GACvC,eAAe,GAAG,qBAAqB,GAAG,sBAAsB,GAAG,oBAAoB,GACvF,cAAc,GACd,eAAe,GAAG,eAAe,GACjC,iBAAiB,GAAG,kBAAkB,GACtC,oBAAoB,GAAG,kBAAkB,GACzC,eAAe,GAAG,kBAAkB,GACpC,WAAW,GAAG,kBAAkB,GAChC,qBAAqB,GACrB,qBAAqB,GAAG,oBAAoB,GAC5C,gBAAgB,GAAG,gBAAgB,GACnC,eAAe,GAAG,eAAe,GAAG,aAAa,GAAG,cAAc,GAClE,uBAAuB,GAAG,0BAA0B,GAAG,gBAAgB,GACvE,gBAAgB,GAAG,aAAa,GAAG,eAAe,GAAG,YAAY,GACjE,oBAAoB,GAAG,0BAA0B,GACjD,mBAAmB,GAAG,wBAAwB,GAC9C,YAAY,GAAG,iBAAiB,GAChC,YAAY,GAAG,aAAa,GAAG,uBAAuB,GACtD,QAAQ,CAAA;AAEZ,MAAM,WAAW,WAAW;IAC1B,eAAe,EAAE,MAAM,CAAA;IACvB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IACpC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAA;IACrB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACvB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAA;IACV,EAAE,EAAE,MAAM,CAAA;IACV,IAAI,EAAE,YAAY,CAAA;IAClB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IAC7B,GAAG,CAAC,EAAE,MAAM,CAAA;CACb;AAED,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAA;IACV,QAAQ,EAAE,MAAM,CAAA;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAA;IAClB,SAAS,EAAE,KAAK,GAAG,WAAW,GAAG,MAAM,CAAA;IACvC,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,CAAC,EAAE,MAAM,CAAA;CACrB;AAED,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAA;AAEnD,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,SAAS,GAAG,SAAS,GAAG,UAAU,GAAG,QAAQ,CAAA;AAE/E,MAAM,MAAM,WAAW,GAAG,gBAAgB,GAAG,UAAU,GAAG,eAAe,CAAA;AAEzE,eAAO,MAAM,mBAAmB,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,SAAS,EAAE,SAAS,CAAC,CAsBjF,CAAA;AAED,eAAO,MAAM,gBAAgB,EAAE,MAAM,CAAC,SAAS,EAAE,SAAS,CAMzD,CAAA"}
@@ -1 +1 @@
1
- {"version":3,"file":"map.js","sourceRoot":"","sources":["../../src/types/map.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA4DH,MAAM,CAAC,MAAM,mBAAmB,GAAsD;IACpF,gBAAgB,EAAE;QAChB,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,QAAQ;QACjB,OAAO,EAAE,QAAQ;QACjB,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,OAAO;KAChB;IACD,UAAU,EAAE;QACV,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,QAAQ;QACjB,OAAO,EAAE,QAAQ;QACjB,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IACD,eAAe,EAAE;QACf,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,MAAM;QACf,OAAO,EAAE,QAAQ;QACjB,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;CACF,CAAA;AAED,MAAM,CAAC,MAAM,gBAAgB,GAAiC;IAC5D,KAAK,EAAE,QAAQ;IACf,OAAO,EAAE,MAAM;IACf,OAAO,EAAE,MAAM;IACf,QAAQ,EAAE,MAAM;IAChB,MAAM,EAAE,QAAQ;CACjB,CAAA"}
1
+ {"version":3,"file":"map.js","sourceRoot":"","sources":["../../src/types/map.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA6DH,MAAM,CAAC,MAAM,mBAAmB,GAAsD;IACpF,gBAAgB,EAAE;QAChB,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,QAAQ;QACjB,OAAO,EAAE,QAAQ;QACjB,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,OAAO;KAChB;IACD,UAAU,EAAE;QACV,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,QAAQ;QACjB,OAAO,EAAE,QAAQ;QACjB,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IACD,eAAe,EAAE;QACf,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,MAAM;QACf,OAAO,EAAE,QAAQ;QACjB,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;CACF,CAAA;AAED,MAAM,CAAC,MAAM,gBAAgB,GAAiC;IAC5D,KAAK,EAAE,QAAQ;IACf,OAAO,EAAE,MAAM;IACf,OAAO,EAAE,MAAM;IACf,QAAQ,EAAE,MAAM;IAChB,MAAM,EAAE,QAAQ;CACjB,CAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "jfl",
3
- "version": "0.4.2",
3
+ "version": "0.4.4",
4
4
  "description": "Just Fucking Launch - CLI for AI-powered GTM and development",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -0,0 +1,540 @@
1
+ name: JFL Eval Suite
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize]
6
+ branches: [main]
7
+
8
+ permissions:
9
+ contents: write
10
+ pull-requests: write
11
+
12
+ jobs:
13
+ eval:
14
+ name: Run JFL Eval
15
+ runs-on: ubuntu-latest
16
+ timeout-minutes: 15
17
+ if: startsWith(github.head_ref, 'pp/') || contains(github.event.pull_request.labels.*.name, 'run-eval')
18
+
19
+ env:
20
+ JFL_HUB_URL: ${{ secrets.JFL_HUB_URL }}
21
+ JFL_HUB_TOKEN: ${{ secrets.JFL_HUB_TOKEN }}
22
+ JFL_TELEMETRY: 'off'
23
+ NODE_OPTIONS: '--experimental-vm-modules'
24
+
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+ with:
28
+ fetch-depth: 0
29
+
30
+ - uses: actions/setup-node@v4
31
+ with:
32
+ node-version: '22'
33
+ cache: 'npm'
34
+
35
+ - name: Install dependencies
36
+ run: npm ci
37
+
38
+ - name: Build
39
+ run: npx tsc --build --noEmit
40
+
41
+ - name: Predict eval delta (Stratus, optional)
42
+ id: predict
43
+ if: env.STRATUS_API_KEY != ''
44
+ continue-on-error: true
45
+ env:
46
+ STRATUS_API_KEY: ${{ secrets.STRATUS_API_KEY }}
47
+ PR_TITLE: ${{ github.event.pull_request.title }}
48
+ run: |
49
+ DIFF_SUMMARY=$(git diff origin/main...HEAD --stat | tail -5)
50
+ CURRENT_SCORE=$(cat .jfl/eval.jsonl 2>/dev/null | tail -1 | node -e "let d='';process.stdin.on('data',c=>d+=c);process.stdin.on('end',()=>{try{console.log(JSON.parse(d).composite||0)}catch{console.log(0)}})" 2>/dev/null || echo "0")
51
+
52
+ RESULT=$(node -e "
53
+ const { Predictor } = require('./dist/lib/predictor.js');
54
+ const p = new Predictor();
55
+ p.predict({
56
+ proposal: { description: '$PR_TITLE — $DIFF_SUMMARY', change_type: 'fix', scope: 'medium' },
57
+ current_score: $CURRENT_SCORE,
58
+ goal: 'test_pass_rate >= 1.0',
59
+ recent_trajectory: [],
60
+ }).then(r => {
61
+ console.log(JSON.stringify({id: r.prediction_id, delta: r.predicted_delta, confidence: r.confidence, recommendation: r.recommendation}));
62
+ }).catch(e => {
63
+ console.error('Prediction failed:', e.message);
64
+ console.log(JSON.stringify({id: '', delta: 0, confidence: 0, recommendation: 'revise'}));
65
+ });
66
+ " 2>/dev/null || echo '{"id":"","delta":0,"confidence":0,"recommendation":"revise"}')
67
+
68
+ echo "prediction_id=$(echo "$RESULT" | node -e "let d='';process.stdin.on('data',c=>d+=c);process.stdin.on('end',()=>{try{console.log(JSON.parse(d).id)}catch{console.log('')}})")" >> $GITHUB_OUTPUT
69
+ echo "predicted_delta=$(echo "$RESULT" | node -e "let d='';process.stdin.on('data',c=>d+=c);process.stdin.on('end',()=>{try{console.log(JSON.parse(d).delta)}catch{console.log(0)}})")" >> $GITHUB_OUTPUT
70
+ echo "confidence=$(echo "$RESULT" | node -e "let d='';process.stdin.on('data',c=>d+=c);process.stdin.on('end',()=>{try{console.log(JSON.parse(d).confidence)}catch{console.log(0)}})")" >> $GITHUB_OUTPUT
71
+ echo "recommendation=$(echo "$RESULT" | node -e "let d='';process.stdin.on('data',c=>d+=c);process.stdin.on('end',()=>{try{console.log(JSON.parse(d).recommendation)}catch{console.log('revise')}})")" >> $GITHUB_OUTPUT
72
+
73
+ - name: Run tests (baseline from main)
74
+ id: baseline
75
+ run: |
76
+ # Clean switch to main — remove PR-only files, restore main state
77
+ PR_SHA=$(git rev-parse HEAD)
78
+ git checkout origin/main --force
79
+ git clean -fd
80
+ npm ci --ignore-scripts 2>/dev/null || true
81
+
82
+ RESULT=$(npx jest --json --silent 2>/dev/null || echo '{}')
83
+ PASSING=$(echo "$RESULT" | node -e "
84
+ let d=''; process.stdin.on('data',c=>d+=c); process.stdin.on('end',()=>{
85
+ try { const j=JSON.parse(d); console.log(j.numPassedTests||0); }
86
+ catch(e) { console.log(0); }
87
+ })
88
+ ")
89
+ TOTAL=$(echo "$RESULT" | node -e "
90
+ let d=''; process.stdin.on('data',c=>d+=c); process.stdin.on('end',()=>{
91
+ try { const j=JSON.parse(d); console.log(j.numTotalTests||1); }
92
+ catch(e) { console.log(1); }
93
+ })
94
+ ")
95
+ SCORE=$(node -e "console.log(($TOTAL>0?$PASSING/$TOTAL:0).toFixed(4))")
96
+ echo "score=$SCORE" >> $GITHUB_OUTPUT
97
+ echo "passing=$PASSING" >> $GITHUB_OUTPUT
98
+ echo "total=$TOTAL" >> $GITHUB_OUTPUT
99
+ echo "Baseline: $SCORE ($PASSING/$TOTAL)"
100
+
101
+ # Switch back to PR branch
102
+ git checkout $PR_SHA --force
103
+ git clean -fd
104
+
105
+ - name: Run tests (PR branch)
106
+ id: pr_eval
107
+ run: |
108
+ npm ci --ignore-scripts 2>/dev/null || true
109
+
110
+ RESULT=$(npx jest --json --silent 2>/dev/null || echo '{}')
111
+ PASSING=$(echo "$RESULT" | node -e "
112
+ let d=''; process.stdin.on('data',c=>d+=c); process.stdin.on('end',()=>{
113
+ try { const j=JSON.parse(d); console.log(j.numPassedTests||0); }
114
+ catch(e) { console.log(0); }
115
+ })
116
+ ")
117
+ TOTAL=$(echo "$RESULT" | node -e "
118
+ let d=''; process.stdin.on('data',c=>d+=c); process.stdin.on('end',()=>{
119
+ try { const j=JSON.parse(d); console.log(j.numTotalTests||1); }
120
+ catch(e) { console.log(1); }
121
+ })
122
+ ")
123
+ SCORE=$(node -e "console.log(($TOTAL>0?$PASSING/$TOTAL:0).toFixed(4))")
124
+ echo "score=$SCORE" >> $GITHUB_OUTPUT
125
+ echo "passing=$PASSING" >> $GITHUB_OUTPUT
126
+ echo "total=$TOTAL" >> $GITHUB_OUTPUT
127
+ echo "PR score: $SCORE ($PASSING/$TOTAL)"
128
+
129
+ - name: Compute delta
130
+ id: delta
131
+ run: |
132
+ BASELINE=${{ steps.baseline.outputs.score }}
133
+ PR_SCORE=${{ steps.pr_eval.outputs.score }}
134
+ BASELINE_TOTAL=${{ steps.baseline.outputs.total }}
135
+ PR_TOTAL=${{ steps.pr_eval.outputs.total }}
136
+ BASELINE_PASSING=${{ steps.baseline.outputs.passing }}
137
+ PR_PASSING=${{ steps.pr_eval.outputs.passing }}
138
+
139
+ # Pass rate delta
140
+ DELTA=$(node -e "console.log(($PR_SCORE - $BASELINE).toFixed(4))")
141
+
142
+ # Test count delta
143
+ TESTS_ADDED=$(node -e "console.log($PR_TOTAL - $BASELINE_TOTAL)")
144
+ TESTS_PASSING_ADDED=$(node -e "console.log($PR_PASSING - $BASELINE_PASSING)")
145
+
146
+ # Improved = pass_rate up OR (pass_rate maintained at 1.0 AND more passing tests)
147
+ IMPROVED=$(node -e "
148
+ const prScore = $PR_SCORE;
149
+ const baseline = $BASELINE;
150
+ const testsAdded = $PR_PASSING - $BASELINE_PASSING;
151
+ const passRateUp = prScore > baseline;
152
+ const passRateMaintained = prScore >= 1.0 && baseline >= 1.0;
153
+ const moreTests = testsAdded > 0;
154
+ const noRegression = prScore >= baseline;
155
+ console.log(passRateUp || (passRateMaintained && moreTests) || (noRegression && moreTests));
156
+ ")
157
+
158
+ echo "delta=$DELTA" >> $GITHUB_OUTPUT
159
+ echo "improved=$IMPROVED" >> $GITHUB_OUTPUT
160
+ echo "tests_added=$TESTS_ADDED" >> $GITHUB_OUTPUT
161
+ echo "tests_passing_added=$TESTS_PASSING_ADDED" >> $GITHUB_OUTPUT
162
+ echo "Delta: $DELTA, tests_added=$TESTS_ADDED (improved=$IMPROVED)"
163
+
164
+ - name: Resolve prediction (Stratus, optional)
165
+ if: steps.predict.outputs.prediction_id != ''
166
+ continue-on-error: true
167
+ env:
168
+ STRATUS_API_KEY: ${{ secrets.STRATUS_API_KEY }}
169
+ run: |
170
+ PRED_ID="${{ steps.predict.outputs.prediction_id }}"
171
+ DELTA=${{ steps.delta.outputs.delta }}
172
+ PR_SCORE=${{ steps.pr_eval.outputs.score }}
173
+
174
+ node -e "
175
+ const { Predictor } = require('./dist/lib/predictor.js');
176
+ const p = new Predictor();
177
+ p.resolve('$PRED_ID', $DELTA, $PR_SCORE, '${{ github.run_id }}')
178
+ .then(() => console.log('Prediction resolved: $PRED_ID'))
179
+ .catch(e => console.error('Resolve failed:', e.message));
180
+ " 2>/dev/null || echo "Warning: could not resolve prediction"
181
+
182
+ - name: AI quality assessment
183
+ id: ai_eval
184
+ env:
185
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
186
+ OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
187
+ PR_TITLE: ${{ github.event.pull_request.title }}
188
+ run: |
189
+ # Get diff for AI assessment
190
+ git diff origin/main...HEAD -- '*.ts' '*.tsx' '*.js' '*.jsx' | head -3000 > /tmp/eval-diff.txt
191
+ DIFF_SIZE=$(wc -c < /tmp/eval-diff.txt | tr -d ' ')
192
+
193
+ if [ "$DIFF_SIZE" -lt 10 ]; then
194
+ echo "quality_score=0.5" >> $GITHUB_OUTPUT
195
+ echo "quality_reason=No code diff to assess" >> $GITHUB_OUTPUT
196
+ exit 0
197
+ fi
198
+
199
+ # Gather context (same as review workflow)
200
+ node -e "
201
+ const fs = require('fs');
202
+ const sections = [];
203
+ try { const c = JSON.parse(fs.readFileSync('.jfl/config.json','utf8')); sections.push('Project: ' + (c.name||'unknown') + ' - ' + (c.description||'')); } catch(e) {}
204
+ try { sections.push(fs.readFileSync('knowledge/ARCHITECTURE.md','utf8').substring(0,1500)); } catch(e) {}
205
+ try { sections.push(fs.readFileSync('knowledge/SERVICE_SPEC.md','utf8').substring(0,800)); } catch(e) {}
206
+ fs.writeFileSync('/tmp/eval-context.txt', sections.join('\n'));
207
+ "
208
+
209
+ # Build AI eval payload
210
+ node -e "
211
+ const fs = require('fs');
212
+ const diff = fs.readFileSync('/tmp/eval-diff.txt', 'utf8').substring(0, 8000);
213
+ const context = fs.readFileSync('/tmp/eval-context.txt', 'utf8').substring(0, 2000);
214
+ const title = process.env.PR_TITLE || '';
215
+
216
+ const testMetrics = {
217
+ baseline_pass_rate: ${{ steps.baseline.outputs.score }},
218
+ pr_pass_rate: ${{ steps.pr_eval.outputs.score }},
219
+ baseline_tests: ${{ steps.baseline.outputs.total }},
220
+ pr_tests: ${{ steps.pr_eval.outputs.total }},
221
+ tests_added: ${{ steps.delta.outputs.tests_added }},
222
+ };
223
+
224
+ const payload = {
225
+ model: 'gpt-4o-mini',
226
+ messages: [
227
+ {
228
+ role: 'system',
229
+ content: [
230
+ 'You are an AI eval scorer for a software project. Score this PR on overall quality improvement.',
231
+ '',
232
+ '--- PROJECT CONTEXT ---',
233
+ context,
234
+ '--- END CONTEXT ---',
235
+ '',
236
+ 'Test metrics: ' + JSON.stringify(testMetrics),
237
+ '',
238
+ 'Score the PR from 0.0 to 1.0 on these dimensions:',
239
+ '- correctness: Does the code work? Any bugs?',
240
+ '- coverage: Did it add meaningful tests?',
241
+ '- architecture: Does it follow project patterns?',
242
+ '- value: Does it add real value to the project?',
243
+ '',
244
+ 'Respond in this exact JSON format (no markdown):',
245
+ '{\"quality_score\": 0.0-1.0, \"dimensions\": {\"correctness\": 0.0-1.0, \"coverage\": 0.0-1.0, \"architecture\": 0.0-1.0, \"value\": 0.0-1.0}, \"reason\": \"1-2 sentence summary\"}',
246
+ ].join('\\n')
247
+ },
248
+ { role: 'user', content: 'PR: ' + title + '\\n\\n' + diff }
249
+ ],
250
+ max_tokens: 300,
251
+ temperature: 0.1
252
+ };
253
+
254
+ fs.writeFileSync('/tmp/eval-payload.json', JSON.stringify(payload));
255
+ "
256
+
257
+ # Try OpenAI
258
+ RESULT=""
259
+ if [ -n "$OPENAI_API_KEY" ]; then
260
+ RESPONSE=$(curl -s -X POST https://api.openai.com/v1/chat/completions \
261
+ -H "Content-Type: application/json" \
262
+ -H "Authorization: Bearer $OPENAI_API_KEY" \
263
+ -d @/tmp/eval-payload.json 2>/dev/null)
264
+ RESULT=$(echo "$RESPONSE" | node -e "
265
+ let d=''; process.stdin.on('data',c=>d+=c); process.stdin.on('end',()=>{
266
+ try { console.log(JSON.parse(d).choices[0].message.content); }
267
+ catch(e) { console.log(''); }
268
+ })
269
+ ")
270
+ fi
271
+
272
+ # Fallback to OpenRouter
273
+ if [ -z "$RESULT" ] && [ -n "$OPENROUTER_API_KEY" ]; then
274
+ node -e "
275
+ const fs = require('fs');
276
+ const p = JSON.parse(fs.readFileSync('/tmp/eval-payload.json', 'utf8'));
277
+ p.model = 'anthropic/claude-sonnet-4';
278
+ fs.writeFileSync('/tmp/eval-payload-or.json', JSON.stringify(p));
279
+ "
280
+ RESPONSE=$(curl -s -X POST https://openrouter.ai/api/v1/chat/completions \
281
+ -H "Content-Type: application/json" \
282
+ -H "Authorization: Bearer $OPENROUTER_API_KEY" \
283
+ -d @/tmp/eval-payload-or.json 2>/dev/null)
284
+ RESULT=$(echo "$RESPONSE" | node -e "
285
+ let d=''; process.stdin.on('data',c=>d+=c); process.stdin.on('end',()=>{
286
+ try { console.log(JSON.parse(d).choices[0].message.content); }
287
+ catch(e) { console.log(''); }
288
+ })
289
+ ")
290
+ fi
291
+
292
+ # Parse AI eval result
293
+ node -e "
294
+ const raw = \`$RESULT\` || '{}';
295
+ try {
296
+ const parsed = JSON.parse(raw.match(/\{[\s\S]*\}/)?.[0] || '{}');
297
+ const score = Math.max(0, Math.min(1, parsed.quality_score || 0.5));
298
+ const reason = (parsed.reason || 'AI eval unavailable').replace(/\n/g, ' ');
299
+ const dims = parsed.dimensions || {};
300
+ console.log('quality_score=' + score.toFixed(2));
301
+ console.log('quality_reason=' + reason);
302
+ console.log('dim_correctness=' + (dims.correctness || 0.5).toFixed(2));
303
+ console.log('dim_coverage=' + (dims.coverage || 0.5).toFixed(2));
304
+ console.log('dim_architecture=' + (dims.architecture || 0.5).toFixed(2));
305
+ console.log('dim_value=' + (dims.value || 0.5).toFixed(2));
306
+ } catch(e) {
307
+ console.log('quality_score=0.50');
308
+ console.log('quality_reason=AI eval parse error');
309
+ console.log('dim_correctness=0.50');
310
+ console.log('dim_coverage=0.50');
311
+ console.log('dim_architecture=0.50');
312
+ console.log('dim_value=0.50');
313
+ }
314
+ " >> $GITHUB_OUTPUT
315
+
316
+ - name: Detect agent from branch
317
+ id: agent
318
+ run: |
319
+ BRANCH="${{ github.head_ref }}"
320
+ # Extract agent from branch prefix: pp/* → peter-parker, bot/* → bot, etc.
321
+ if [[ "$BRANCH" == pp/* ]]; then
322
+ echo "name=peter-parker" >> $GITHUB_OUTPUT
323
+ elif [[ "$BRANCH" == bot/* ]]; then
324
+ echo "name=bot" >> $GITHUB_OUTPUT
325
+ elif [[ "$BRANCH" == agent/* ]]; then
326
+ # agent/agent-name/description → agent-name
327
+ AGENT_NAME=$(echo "$BRANCH" | cut -d'/' -f2)
328
+ echo "name=$AGENT_NAME" >> $GITHUB_OUTPUT
329
+ else
330
+ echo "name=${{ github.event.pull_request.user.login }}" >> $GITHUB_OUTPUT
331
+ fi
332
+
333
+ - name: Commit eval entry to PR branch
334
+ run: |
335
+ git config user.name "github-actions[bot]"
336
+ git config user.email "github-actions[bot]@users.noreply.github.com"
337
+
338
+ # Checkout the actual PR branch (not detached HEAD)
339
+ git checkout ${{ github.head_ref }}
340
+
341
+ AGENT="${{ steps.agent.outputs.name }}"
342
+ PRED_ID="${{ steps.predict.outputs.prediction_id }}"
343
+
344
+ # Write eval entry (matches readEvals path: .jfl/eval.jsonl)
345
+ mkdir -p .jfl
346
+ cat >> .jfl/eval.jsonl << EOF
347
+ {"v":1,"ts":"$(date -u +%Y-%m-%dT%H:%M:%SZ)","agent":"$AGENT","dataset":"ci-eval","metrics":{"test_pass_rate":${{ steps.pr_eval.outputs.score }},"tests_passed":${{ steps.pr_eval.outputs.passing }},"tests_total":${{ steps.pr_eval.outputs.total }}},"composite":${{ steps.pr_eval.outputs.score }},"delta":{"composite":${{ steps.delta.outputs.delta }}},"model_version":"$AGENT-run","improved":${{ steps.delta.outputs.improved }},"pr_number":${{ github.event.pull_request.number }},"branch":"${{ github.head_ref }}","quality_score":${{ steps.ai_eval.outputs.quality_score }},"dim_correctness":${{ steps.ai_eval.outputs.dim_correctness }},"dim_coverage":${{ steps.ai_eval.outputs.dim_coverage }},"dim_architecture":${{ steps.ai_eval.outputs.dim_architecture }},"dim_value":${{ steps.ai_eval.outputs.dim_value }},"prediction_id":"$PRED_ID"}
348
+ EOF
349
+
350
+ # Write eval:scored event for hub file watcher
351
+ cat >> .jfl/service-events.jsonl << EOF
352
+ {"ts":"$(date -u +%Y-%m-%dT%H:%M:%S.000Z)","type":"eval:scored","source":"ci","data":{"agent":"$AGENT","composite":${{ steps.pr_eval.outputs.score }},"baseline":${{ steps.baseline.outputs.score }},"delta":${{ steps.delta.outputs.delta }},"improved":"${{ steps.delta.outputs.improved }}","pr_number":${{ github.event.pull_request.number }},"branch":"${{ github.head_ref }}","run_url":"${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}","quality_score":${{ steps.ai_eval.outputs.quality_score }},"dim_correctness":${{ steps.ai_eval.outputs.dim_correctness }},"dim_coverage":${{ steps.ai_eval.outputs.dim_coverage }},"dim_architecture":${{ steps.ai_eval.outputs.dim_architecture }},"dim_value":${{ steps.ai_eval.outputs.dim_value }}}}
353
+ EOF
354
+
355
+ git add .jfl/eval.jsonl .jfl/service-events.jsonl
356
+ git add .jfl/predictions/ 2>/dev/null || true
357
+ git commit -m "eval: scored PR #${{ github.event.pull_request.number }} (composite=${{ steps.pr_eval.outputs.score }}, delta=${{ steps.delta.outputs.delta }})"
358
+ git push origin ${{ github.head_ref }}
359
+
360
+ - name: Post eval:scored event to hub (best-effort)
361
+ if: env.JFL_HUB_URL != ''
362
+ continue-on-error: true
363
+ run: |
364
+ curl -sf -X POST "${JFL_HUB_URL}/api/events" \
365
+ -H "Content-Type: application/json" \
366
+ -H "Authorization: Bearer ${JFL_HUB_TOKEN}" \
367
+ -d '{
368
+ "type": "eval:scored",
369
+ "source": "github-actions",
370
+ "data": {
371
+ "pr_number": ${{ github.event.pull_request.number }},
372
+ "pr_url": "${{ github.event.pull_request.html_url }}",
373
+ "branch": "${{ github.head_ref }}",
374
+ "commit_sha": "${{ github.sha }}",
375
+ "agent": "${{ steps.agent.outputs.name }}",
376
+ "metrics": {
377
+ "test_pass_rate": ${{ steps.pr_eval.outputs.score }},
378
+ "test_count": ${{ steps.pr_eval.outputs.total }},
379
+ "tests_passing": ${{ steps.pr_eval.outputs.passing }}
380
+ },
381
+ "baseline": ${{ steps.baseline.outputs.score }},
382
+ "baseline_total": ${{ steps.baseline.outputs.total }},
383
+ "baseline_passing": ${{ steps.baseline.outputs.passing }},
384
+ "composite": ${{ steps.pr_eval.outputs.score }},
385
+ "delta": ${{ steps.delta.outputs.delta }},
386
+ "tests_added": ${{ steps.delta.outputs.tests_added }},
387
+ "tests_passing_added": ${{ steps.delta.outputs.tests_passing_added }},
388
+ "improved": ${{ steps.delta.outputs.improved }},
389
+ "run_id": "${{ github.run_id }}",
390
+ "run_url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
391
+ }
392
+ }' || echo "Warning: could not post to hub (hub may not be running)"
393
+
394
+ - name: Comment on PR
395
+ uses: actions/github-script@v7
396
+ with:
397
+ script: |
398
+ const delta = parseFloat('${{ steps.delta.outputs.delta }}');
399
+ const improved = '${{ steps.delta.outputs.improved }}' === 'true';
400
+ const baseline = '${{ steps.baseline.outputs.score }}';
401
+ const prScore = '${{ steps.pr_eval.outputs.score }}';
402
+ const baselineTotal = parseInt('${{ steps.baseline.outputs.total }}');
403
+ const prTotal = parseInt('${{ steps.pr_eval.outputs.total }}');
404
+ const baselinePassing = parseInt('${{ steps.baseline.outputs.passing }}');
405
+ const prPassing = parseInt('${{ steps.pr_eval.outputs.passing }}');
406
+ const testsAdded = parseInt('${{ steps.delta.outputs.tests_added }}');
407
+ const testsPassingAdded = parseInt('${{ steps.delta.outputs.tests_passing_added }}');
408
+ const runUrl = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
409
+
410
+ const emoji = improved ? ':green_circle:' : (delta < 0 ? ':red_circle:' : ':yellow_circle:');
411
+ let verdict = 'UNCHANGED';
412
+ if (delta > 0) verdict = 'IMPROVED';
413
+ else if (delta < 0) verdict = 'REGRESSED';
414
+ else if (testsPassingAdded > 0) verdict = 'IMPROVED (more tests)';
415
+
416
+ const testsDeltaStr = testsAdded > 0 ? `+${testsAdded}` : `${testsAdded}`;
417
+ const qualityScore = parseFloat('${{ steps.ai_eval.outputs.quality_score }}') || 0.5;
418
+ const qualityReason = '${{ steps.ai_eval.outputs.quality_reason }}' || 'N/A';
419
+ const dimCorrectness = '${{ steps.ai_eval.outputs.dim_correctness }}' || '—';
420
+ const dimCoverage = '${{ steps.ai_eval.outputs.dim_coverage }}' || '—';
421
+ const dimArchitecture = '${{ steps.ai_eval.outputs.dim_architecture }}' || '—';
422
+ const dimValue = '${{ steps.ai_eval.outputs.dim_value }}' || '—';
423
+
424
+ // Prediction data (optional)
425
+ const predId = '${{ steps.predict.outputs.prediction_id }}' || '';
426
+ const predDelta = parseFloat('${{ steps.predict.outputs.predicted_delta }}') || 0;
427
+ const predConf = parseFloat('${{ steps.predict.outputs.confidence }}') || 0;
428
+ const predRec = '${{ steps.predict.outputs.recommendation }}' || '';
429
+
430
+ const predictionSection = predId ? [
431
+ '',
432
+ `### Stratus Prediction`,
433
+ '',
434
+ `| Metric | Value |`,
435
+ `|--------|-------|`,
436
+ `| Predicted delta | ${predDelta >= 0 ? '+' : ''}${predDelta.toFixed(4)} |`,
437
+ `| Actual delta | ${delta >= 0 ? '+' : ''}${delta.toFixed(4)} |`,
438
+ `| Prediction error | ${Math.abs(predDelta - delta).toFixed(4)} |`,
439
+ `| Confidence | ${Math.round(predConf * 100)}% |`,
440
+ `| Recommendation | ${predRec} |`,
441
+ `| Direction correct | ${(predDelta >= 0) === (delta >= 0) ? ':white_check_mark:' : ':x:'} |`,
442
+ '',
443
+ ] : [];
444
+
445
+ const body = [
446
+ `## JFL Eval Results ${emoji}`,
447
+ '',
448
+ '| Metric | Baseline (main) | PR | Delta |',
449
+ '|--------|-----------------|-----|-------|',
450
+ `| test_pass_rate | ${baseline} (${baselinePassing}/${baselineTotal}) | ${prScore} (${prPassing}/${prTotal}) | ${delta > 0 ? '+' : ''}${delta.toFixed(4)} |`,
451
+ `| test_count | ${baselineTotal} | ${prTotal} | ${testsDeltaStr} |`,
452
+ `| tests_passing | ${baselinePassing} | ${prPassing} | ${testsPassingAdded > 0 ? '+' : ''}${testsPassingAdded} |`,
453
+ '',
454
+ `### AI Quality Score: ${qualityScore.toFixed(2)}`,
455
+ '',
456
+ '| Dimension | Score |',
457
+ '|-----------|-------|',
458
+ `| Correctness | ${dimCorrectness} |`,
459
+ `| Coverage | ${dimCoverage} |`,
460
+ `| Architecture | ${dimArchitecture} |`,
461
+ `| Value | ${dimValue} |`,
462
+ '',
463
+ `> ${qualityReason}`,
464
+ ...predictionSection,
465
+ `**Verdict: ${verdict}**`,
466
+ '',
467
+ improved
468
+ ? 'Auto-merge eligible via JFL flow.'
469
+ : (delta < 0
470
+ ? 'Regression detected. Manual review required.'
471
+ : 'No improvement detected. Manual review recommended.'),
472
+ '',
473
+ '---',
474
+ `[View run](${runUrl}) | *Evaluated by JFL self-driving loop*`,
475
+ ].join('\n');
476
+
477
+ const { data: comments } = await github.rest.issues.listComments({
478
+ owner: context.repo.owner,
479
+ repo: context.repo.repo,
480
+ issue_number: context.issue.number,
481
+ });
482
+
483
+ const existing = comments.find(c =>
484
+ c.user.type === 'Bot' && c.body.includes('JFL Eval Results')
485
+ );
486
+
487
+ if (existing) {
488
+ await github.rest.issues.updateComment({
489
+ owner: context.repo.owner,
490
+ repo: context.repo.repo,
491
+ comment_id: existing.id,
492
+ body,
493
+ });
494
+ } else {
495
+ await github.rest.issues.createComment({
496
+ owner: context.repo.owner,
497
+ repo: context.repo.repo,
498
+ issue_number: context.issue.number,
499
+ body,
500
+ });
501
+ }
502
+
503
+ - name: Auto-merge or flag regression
504
+ if: always()
505
+ env:
506
+ GH_TOKEN: ${{ github.token }}
507
+ run: |
508
+ PR_NUMBER=${{ github.event.pull_request.number }}
509
+ IMPROVED="${{ steps.delta.outputs.improved }}"
510
+
511
+ if [ "$IMPROVED" = "true" ]; then
512
+ # Check if AI review requested changes
513
+ REVIEW_STATE=$(gh pr view $PR_NUMBER --json reviews --jq '[.reviews[] | select(.author.login == "github-actions")] | last | .state // "NONE"' 2>/dev/null || echo "NONE")
514
+
515
+ if [ "$REVIEW_STATE" = "CHANGES_REQUESTED" ]; then
516
+ echo "Eval passed but AI review has blockers — holding merge"
517
+ gh pr comment $PR_NUMBER --body "**JFL Eval passed** (test_pass_rate improved) but **AI review found blockers**. Address red findings, then eval will re-run and auto-merge."
518
+ else
519
+ echo "Eval passed, no review blockers — auto-merging PR #$PR_NUMBER"
520
+ gh pr merge $PR_NUMBER --merge --delete-branch \
521
+ --body "Auto-merged by JFL eval: test_pass_rate improved by ${{ steps.delta.outputs.delta }} (${{ steps.baseline.outputs.score }} → ${{ steps.pr_eval.outputs.score }})"
522
+ fi
523
+ else
524
+ echo "Eval regression — requesting changes on PR #$PR_NUMBER"
525
+ gh pr review $PR_NUMBER --request-changes \
526
+ --body "JFL eval regression: test_pass_rate dropped by ${{ steps.delta.outputs.delta }} (${{ steps.baseline.outputs.score }} → ${{ steps.pr_eval.outputs.score }}). Run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
527
+ fi
528
+
529
+ - name: Eval Summary
530
+ if: always()
531
+ run: |
532
+ echo "### JFL Eval Results" >> $GITHUB_STEP_SUMMARY
533
+ echo "" >> $GITHUB_STEP_SUMMARY
534
+ echo "| Metric | Baseline | PR | Delta |" >> $GITHUB_STEP_SUMMARY
535
+ echo "|--------|----------|-----|-------|" >> $GITHUB_STEP_SUMMARY
536
+ echo "| test_pass_rate | ${{ steps.baseline.outputs.score }} | ${{ steps.pr_eval.outputs.score }} | ${{ steps.delta.outputs.delta }} |" >> $GITHUB_STEP_SUMMARY
537
+ echo "| test_count | ${{ steps.baseline.outputs.total }} | ${{ steps.pr_eval.outputs.total }} | ${{ steps.delta.outputs.tests_added }} |" >> $GITHUB_STEP_SUMMARY
538
+ echo "| tests_passing | ${{ steps.baseline.outputs.passing }} | ${{ steps.pr_eval.outputs.passing }} | ${{ steps.delta.outputs.tests_passing_added }} |" >> $GITHUB_STEP_SUMMARY
539
+ echo "" >> $GITHUB_STEP_SUMMARY
540
+ echo "Improved: ${{ steps.delta.outputs.improved }}" >> $GITHUB_STEP_SUMMARY