promptup-plugin 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +78 -0
- package/bin/install.cjs +306 -0
- package/bin/promptup-plugin +8 -0
- package/dist/config.d.ts +40 -0
- package/dist/config.js +123 -0
- package/dist/db.d.ts +35 -0
- package/dist/db.js +327 -0
- package/dist/decision-detector.d.ts +11 -0
- package/dist/decision-detector.js +47 -0
- package/dist/evaluator.d.ts +10 -0
- package/dist/evaluator.js +844 -0
- package/dist/git-activity-extractor.d.ts +35 -0
- package/dist/git-activity-extractor.js +167 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +54 -0
- package/dist/pr-report-generator.d.ts +20 -0
- package/dist/pr-report-generator.js +421 -0
- package/dist/shared/decision-classifier.d.ts +60 -0
- package/dist/shared/decision-classifier.js +385 -0
- package/dist/shared/decision-score.d.ts +7 -0
- package/dist/shared/decision-score.js +31 -0
- package/dist/shared/dimensions.d.ts +43 -0
- package/dist/shared/dimensions.js +361 -0
- package/dist/shared/scoring.d.ts +89 -0
- package/dist/shared/scoring.js +161 -0
- package/dist/shared/types.d.ts +108 -0
- package/dist/shared/types.js +9 -0
- package/dist/tools.d.ts +30 -0
- package/dist/tools.js +456 -0
- package/dist/transcript-parser.d.ts +36 -0
- package/dist/transcript-parser.js +201 -0
- package/hooks/auto-eval.sh +44 -0
- package/hooks/check-update.sh +26 -0
- package/hooks/debug-hook.sh +3 -0
- package/hooks/hooks.json +36 -0
- package/hooks/render-eval.sh +137 -0
- package/package.json +60 -0
- package/skills/eval/SKILL.md +12 -0
- package/skills/pr-report/SKILL.md +37 -0
- package/skills/status/SKILL.md +28 -0
- package/statusline.sh +46 -0
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PR Report Generator
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates generating a Decision Quality Score (DQS) report for a pull request.
|
|
5
|
+
* Uses git + gh CLI to gather branch/PR/commit data, then matches commits to sessions
|
|
6
|
+
* via timestamp and project_path. Falls back to heuristic detection if no plugin
|
|
7
|
+
* decisions are captured.
|
|
8
|
+
*
|
|
9
|
+
* STANDALONE port — no imports from @promptup/shared or workspace packages.
|
|
10
|
+
*/
|
|
11
|
+
import { execFile as _execFile } from 'node:child_process';
|
|
12
|
+
import { promisify } from 'node:util';
|
|
13
|
+
import { ulid } from 'ulid';
|
|
14
|
+
import { computeDQS } from './shared/decision-score.js';
|
|
15
|
+
import { getDecisionsBySessions, getSessionsByTimeRange, getMessagesBySession, getLatestEvaluation, getEvaluationsBySession, insertPRReport, getPRReportByBranch, insertDecision, insertMessages, getSessionsByBranch, getSession, } from './db.js';
|
|
16
|
+
import { detectDecisions } from './decision-detector.js';
|
|
17
|
+
import { extractAndStoreGitActivity } from './git-activity-extractor.js';
|
|
18
|
+
import { evaluateSession } from './evaluator.js';
|
|
19
|
+
import { parseTranscript } from './transcript-parser.js';
|
|
20
|
+
const execFile = promisify(_execFile);
|
|
21
|
+
// ─── Shell helpers ───────────────────────────────────────────────────────────
|
|
22
|
+
async function run(cmd, args, cwd) {
|
|
23
|
+
const { stdout } = await execFile(cmd, args, { cwd, timeout: 15_000 });
|
|
24
|
+
return stdout.trim();
|
|
25
|
+
}
|
|
26
|
+
async function runSafe(cmd, args, cwd) {
|
|
27
|
+
try {
|
|
28
|
+
return await run(cmd, args, cwd);
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
// ─── Git / GH helpers ────────────────────────────────────────────────────────
|
|
35
|
+
async function getCurrentBranch(cwd) {
|
|
36
|
+
const branch = await run('git', ['branch', '--show-current'], cwd);
|
|
37
|
+
if (!branch)
|
|
38
|
+
throw new Error('Unable to determine current git branch');
|
|
39
|
+
return branch;
|
|
40
|
+
}
|
|
41
|
+
async function getRepo(cwd) {
|
|
42
|
+
const repo = await runSafe('gh', ['repo', 'view', '--json', 'nameWithOwner', '-q', '.nameWithOwner'], cwd);
|
|
43
|
+
return repo ?? '';
|
|
44
|
+
}
|
|
45
|
+
async function detectBaseBranch(branch, cwd) {
|
|
46
|
+
// Primary: get the base branch from the PR itself via gh
|
|
47
|
+
const prBase = await runSafe('gh', ['pr', 'view', '--json', 'baseRefName', '-q', '.baseRefName'], cwd);
|
|
48
|
+
if (prBase)
|
|
49
|
+
return prBase;
|
|
50
|
+
// Fallback: try common base branch names
|
|
51
|
+
for (const candidate of ['main', 'master', 'develop']) {
|
|
52
|
+
const exists = await runSafe('git', ['rev-parse', '--verify', candidate], cwd);
|
|
53
|
+
if (exists !== null)
|
|
54
|
+
return candidate;
|
|
55
|
+
}
|
|
56
|
+
return 'main';
|
|
57
|
+
}
|
|
58
|
+
async function getCommits(cwd, baseBranch) {
|
|
59
|
+
const base = baseBranch ?? 'main';
|
|
60
|
+
const raw = await runSafe('git', ['log', `${base}..HEAD`, '--format=%H|%s|%aI'], cwd);
|
|
61
|
+
if (!raw)
|
|
62
|
+
return [];
|
|
63
|
+
return raw
|
|
64
|
+
.split('\n')
|
|
65
|
+
.filter(Boolean)
|
|
66
|
+
.map(line => {
|
|
67
|
+
const [hash, subject, date] = line.split('|');
|
|
68
|
+
return { hash: hash ?? '', subject: subject ?? '', date: date ?? '' };
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
async function getPR(branch, cwd) {
|
|
72
|
+
const raw = await runSafe('gh', [
|
|
73
|
+
'pr', 'list',
|
|
74
|
+
`--head=${branch}`,
|
|
75
|
+
'--state', 'all',
|
|
76
|
+
'--json', 'number,url,title,state',
|
|
77
|
+
'--jq', 'sort_by(.state == "OPEN" | not) | .[0]',
|
|
78
|
+
], cwd);
|
|
79
|
+
if (!raw || raw === 'null')
|
|
80
|
+
return null;
|
|
81
|
+
try {
|
|
82
|
+
return JSON.parse(raw);
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
async function checkGhAvailable() {
|
|
89
|
+
const result = await runSafe('gh', ['auth', 'status']);
|
|
90
|
+
return result !== null;
|
|
91
|
+
}
|
|
92
|
+
async function postPRComment(prNumber, body, cwd) {
|
|
93
|
+
await run('gh', ['pr', 'comment', String(prNumber), '--body', body], cwd);
|
|
94
|
+
}
|
|
95
|
+
// ─── Session matching ─────────────────────────────────────────────────────────
|
|
96
|
+
function windowAroundCommit(isoDate) {
|
|
97
|
+
const ms = new Date(isoDate).getTime();
|
|
98
|
+
const HOUR = 60 * 60 * 1000;
|
|
99
|
+
return {
|
|
100
|
+
from: new Date(ms - HOUR).toISOString(),
|
|
101
|
+
to: new Date(ms + HOUR).toISOString(),
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Match sessions to a branch using exact git activity data where available.
|
|
106
|
+
* Falls back to the timestamp heuristic for sessions without extracted git data.
|
|
107
|
+
*/
|
|
108
|
+
function matchSessionsToBranch(branch, commits, projectPath) {
|
|
109
|
+
// Primary: use sessions that actually checked out / committed / pushed to this branch
|
|
110
|
+
const exactIds = getSessionsByBranch(branch);
|
|
111
|
+
if (exactIds.length > 0) {
|
|
112
|
+
return exactIds;
|
|
113
|
+
}
|
|
114
|
+
// Fallback: timestamp window around each commit (legacy behaviour for old sessions)
|
|
115
|
+
const sessionIds = new Set();
|
|
116
|
+
for (const commit of commits) {
|
|
117
|
+
const { from, to } = windowAroundCommit(commit.date);
|
|
118
|
+
const sessions = getSessionsByTimeRange(from, to, projectPath);
|
|
119
|
+
for (const s of sessions) {
|
|
120
|
+
sessionIds.add(s.id);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
// Opportunistically extract and store git activity from these sessions
|
|
124
|
+
// so future calls can use the exact path.
|
|
125
|
+
for (const sessionId of sessionIds) {
|
|
126
|
+
const messages = getMessagesBySession(sessionId, 10000, 0);
|
|
127
|
+
extractAndStoreGitActivity(messages, sessionId);
|
|
128
|
+
}
|
|
129
|
+
return [...sessionIds];
|
|
130
|
+
}
|
|
131
|
+
// ─── Decision gathering ───────────────────────────────────────────────────────
|
|
132
|
+
function gatherDecisions(sessionIds) {
|
|
133
|
+
// First try plugin/daemon-captured decisions
|
|
134
|
+
const existing = getDecisionsBySessions(sessionIds);
|
|
135
|
+
if (existing.length > 0)
|
|
136
|
+
return existing;
|
|
137
|
+
// Fall back to heuristic detection from messages
|
|
138
|
+
const heuristic = [];
|
|
139
|
+
for (const sid of sessionIds) {
|
|
140
|
+
const messages = getMessagesBySession(sid, 10000, 0);
|
|
141
|
+
const detected = detectDecisions(messages, sid);
|
|
142
|
+
// Persist detected decisions so they're available in future queries
|
|
143
|
+
for (const d of detected) {
|
|
144
|
+
insertDecision(d);
|
|
145
|
+
}
|
|
146
|
+
heuristic.push(...detected);
|
|
147
|
+
}
|
|
148
|
+
return heuristic;
|
|
149
|
+
}
|
|
150
|
+
function getSignal(d) {
|
|
151
|
+
return d.signal ?? 'low';
|
|
152
|
+
}
|
|
153
|
+
function formatDecisionLine(d) {
|
|
154
|
+
const row = d;
|
|
155
|
+
const depth = row.depth ?? '';
|
|
156
|
+
const meta = depth ? ` [${depth}]` : '';
|
|
157
|
+
return `- ${d.context.slice(0, 120)}${meta}`;
|
|
158
|
+
}
|
|
159
|
+
function buildProgressBar(score, width = 20) {
|
|
160
|
+
const filled = Math.round((score / 100) * width);
|
|
161
|
+
const empty = width - filled;
|
|
162
|
+
const block = score >= 70 ? '🟩' : score >= 40 ? '🟨' : '🟥';
|
|
163
|
+
return block.repeat(filled) + '⬜'.repeat(empty);
|
|
164
|
+
}
|
|
165
|
+
/** Truncate reasoning to fit in a table cell */
|
|
166
|
+
function truncReasoning(reasoning, max = 60) {
|
|
167
|
+
if (!reasoning)
|
|
168
|
+
return '';
|
|
169
|
+
const firstSentence = reasoning.split(/\.\s/)[0];
|
|
170
|
+
const text = firstSentence.length <= max ? firstSentence : firstSentence.slice(0, max - 1) + '…';
|
|
171
|
+
return text.replace(/\|/g, '/');
|
|
172
|
+
}
|
|
173
|
+
function buildMarkdown(opts) {
|
|
174
|
+
const { compositeScore, dqs, sessionCount, decisions, commits, breakdown, dimensionScores, userMessages, assistantMessages, evalCount } = opts;
|
|
175
|
+
const dqsDisplay = dqs !== null ? `${dqs}/100` : 'N/A';
|
|
176
|
+
// ── Hero: Composite Score ──────────────────────────────────────────────
|
|
177
|
+
const lines = [
|
|
178
|
+
'## PromptUP Report',
|
|
179
|
+
'',
|
|
180
|
+
];
|
|
181
|
+
if (compositeScore !== null) {
|
|
182
|
+
const heroBar = buildProgressBar(compositeScore, 20);
|
|
183
|
+
const classification = compositeScore <= 40 ? 'Junior' : compositeScore <= 70 ? 'Middle' : 'Senior';
|
|
184
|
+
lines.push(`### Composite Score: ${compositeScore}/100 — **${classification}**`, '', heroBar, '');
|
|
185
|
+
// 3-column dimension table: Dimension | Score + Bar | Why
|
|
186
|
+
if (dimensionScores && dimensionScores.length > 0) {
|
|
187
|
+
lines.push('| Dimension | Score | Why |', '|-----------|-------|-----|');
|
|
188
|
+
for (const d of dimensionScores) {
|
|
189
|
+
const label = d.key.replace(/_/g, ' ');
|
|
190
|
+
const dimBar = buildProgressBar(d.score, 8);
|
|
191
|
+
const why = truncReasoning(d.reasoning);
|
|
192
|
+
lines.push(`| ${label} | ${dimBar} ${d.score} | ${why} |`);
|
|
193
|
+
}
|
|
194
|
+
lines.push('');
|
|
195
|
+
}
|
|
196
|
+
if (evalCount && evalCount > 1) {
|
|
197
|
+
lines.push(`*Score averaged across ${evalCount} evaluations*`, '');
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
// ── Stats line ─────────────────────────────────────────────────────────
|
|
201
|
+
const statParts = [];
|
|
202
|
+
if (userMessages !== undefined)
|
|
203
|
+
statParts.push(`Developer prompts: **${userMessages}**`);
|
|
204
|
+
if (assistantMessages !== undefined)
|
|
205
|
+
statParts.push(`Claude responses: **${assistantMessages}**`);
|
|
206
|
+
statParts.push(`Sessions: ${sessionCount}`);
|
|
207
|
+
statParts.push(`DQS: ${dqsDisplay}`);
|
|
208
|
+
statParts.push(`Decisions: ${decisions.length}`);
|
|
209
|
+
lines.push(statParts.join(' | '), '');
|
|
210
|
+
// ── Decisions ──────────────────────────────────────────────────────────
|
|
211
|
+
const highSignal = decisions.filter(d => getSignal(d) === 'high');
|
|
212
|
+
const mediumSignal = decisions.filter(d => getSignal(d) === 'medium');
|
|
213
|
+
const lowSignal = decisions.filter(d => getSignal(d) === 'low');
|
|
214
|
+
const TYPE_ICONS = {
|
|
215
|
+
steer: '🔀', reject: '🚫', validate: '✅',
|
|
216
|
+
modify: '✏️', scope: '📐', accept: '👍',
|
|
217
|
+
};
|
|
218
|
+
if (highSignal.length > 0 || mediumSignal.length > 0) {
|
|
219
|
+
lines.push('### Decisions', '');
|
|
220
|
+
// Show high-signal decisions prominently
|
|
221
|
+
for (const d of highSignal) {
|
|
222
|
+
const icon = TYPE_ICONS[d.type] ?? '•';
|
|
223
|
+
lines.push(`${icon} **${d.context.slice(0, 120)}**`);
|
|
224
|
+
}
|
|
225
|
+
// Show medium-signal decisions normally
|
|
226
|
+
for (const d of mediumSignal) {
|
|
227
|
+
const icon = TYPE_ICONS[d.type] ?? '•';
|
|
228
|
+
lines.push(`${icon} ${d.context.slice(0, 120)}`);
|
|
229
|
+
}
|
|
230
|
+
if (lowSignal.length > 0) {
|
|
231
|
+
lines.push('', `*+ ${lowSignal.length} routine decision${lowSignal.length > 1 ? 's' : ''} not shown*`);
|
|
232
|
+
}
|
|
233
|
+
lines.push('');
|
|
234
|
+
}
|
|
235
|
+
else if (decisions.length > 0) {
|
|
236
|
+
lines.push(`### Decisions`, '', `*${decisions.length} routine decisions (no high-signal choices detected)*`, '');
|
|
237
|
+
}
|
|
238
|
+
else {
|
|
239
|
+
lines.push('### Decisions', '', '*No decisions captured. Run /eval first to extract decisions from the session.*', '');
|
|
240
|
+
}
|
|
241
|
+
// Commits block
|
|
242
|
+
const commitLines = commits
|
|
243
|
+
.map(c => `- \`${c.hash.slice(0, 7)}\` ${c.subject}`)
|
|
244
|
+
.join('\n');
|
|
245
|
+
lines.push('', `<details><summary>Commits (${commits.length})</summary>`, '', commitLines || '_No commits found._', '', '</details>', '', '---', '*Generated by [PromptUP](https://github.com/alex-muradov/ClawWork)*');
|
|
246
|
+
return lines.join('\n');
|
|
247
|
+
}
|
|
248
|
+
// ─── Main export ──────────────────────────────────────────────────────────────
|
|
249
|
+
export async function generatePRReport(options) {
|
|
250
|
+
const { post = false, projectPath } = options;
|
|
251
|
+
// 1. Resolve branch
|
|
252
|
+
const branch = options.branch ?? (await getCurrentBranch(projectPath));
|
|
253
|
+
// 2. Check cache
|
|
254
|
+
// We need the repo for the cache key — get it first
|
|
255
|
+
const ghAvailable = await checkGhAvailable();
|
|
256
|
+
const repo = ghAvailable ? await getRepo(projectPath) : '';
|
|
257
|
+
const cached = getPRReportByBranch(branch, repo);
|
|
258
|
+
if (cached) {
|
|
259
|
+
return { report: cached, isNew: false };
|
|
260
|
+
}
|
|
261
|
+
// 3. Get PR info
|
|
262
|
+
let prInfo = null;
|
|
263
|
+
if (ghAvailable) {
|
|
264
|
+
prInfo = await getPR(branch, projectPath);
|
|
265
|
+
}
|
|
266
|
+
// 4. Get commits (base branch from PR via gh, or fallback detection)
|
|
267
|
+
const baseBranch = await detectBaseBranch(branch, projectPath);
|
|
268
|
+
const commits = await getCommits(projectPath, baseBranch);
|
|
269
|
+
// 5. Match sessions — prefer exact git activity data, fall back to timestamps
|
|
270
|
+
let sessionIds = matchSessionsToBranch(branch, commits, projectPath);
|
|
271
|
+
// 5b. If no sessions matched (plugin not installed yet, no hooks), find the latest transcript
|
|
272
|
+
if (sessionIds.length === 0) {
|
|
273
|
+
const { findLatestTranscript } = await import('./transcript-parser.js');
|
|
274
|
+
const latestTranscript = findLatestTranscript();
|
|
275
|
+
if (latestTranscript) {
|
|
276
|
+
const msgs = parseTranscript(latestTranscript);
|
|
277
|
+
if (msgs.length >= 3) {
|
|
278
|
+
const sid = ulid();
|
|
279
|
+
const now = new Date().toISOString();
|
|
280
|
+
const { insertSession } = await import('./db.js');
|
|
281
|
+
insertSession({
|
|
282
|
+
id: sid,
|
|
283
|
+
project_path: projectPath ?? process.cwd(),
|
|
284
|
+
transcript_path: latestTranscript,
|
|
285
|
+
status: 'completed',
|
|
286
|
+
message_count: msgs.length,
|
|
287
|
+
started_at: msgs[0].created_at,
|
|
288
|
+
ended_at: msgs[msgs.length - 1].created_at,
|
|
289
|
+
created_at: now,
|
|
290
|
+
});
|
|
291
|
+
for (const m of msgs)
|
|
292
|
+
m.session_id = sid;
|
|
293
|
+
insertMessages(msgs);
|
|
294
|
+
sessionIds = [sid];
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
// 6. Gather decisions
|
|
299
|
+
const decisions = gatherDecisions(sessionIds);
|
|
300
|
+
// 7. Compute DQS — use validate decisions as proxy for validation rate
|
|
301
|
+
const validateCount = decisions.filter(d => d.type === 'validate').length;
|
|
302
|
+
const validationRate = decisions.length > 0 ? validateCount / decisions.length : 0;
|
|
303
|
+
const dqs = computeDQS(decisions, validationRate);
|
|
304
|
+
// 8. Build decision breakdown
|
|
305
|
+
const breakdown = {};
|
|
306
|
+
for (const d of decisions) {
|
|
307
|
+
const t = d.type;
|
|
308
|
+
breakdown[t] = (breakdown[t] ?? 0) + 1;
|
|
309
|
+
}
|
|
310
|
+
// 9. Auto-eval sessions that haven't been evaluated yet
|
|
311
|
+
// This makes /pr-report self-contained — no need to run /eval first
|
|
312
|
+
for (const sid of sessionIds) {
|
|
313
|
+
const existingEval = getLatestEvaluation(sid);
|
|
314
|
+
if (!existingEval) {
|
|
315
|
+
// Try to find and parse the transcript for this session
|
|
316
|
+
const session = getSession(sid);
|
|
317
|
+
if (session?.transcript_path) {
|
|
318
|
+
try {
|
|
319
|
+
const msgs = parseTranscript(session.transcript_path);
|
|
320
|
+
if (msgs.length >= 3) {
|
|
321
|
+
// Store messages if not already stored
|
|
322
|
+
for (const m of msgs)
|
|
323
|
+
m.session_id = sid;
|
|
324
|
+
insertMessages(msgs);
|
|
325
|
+
// Run eval (extracts decisions + scores in one shot)
|
|
326
|
+
console.log(`[pr-report] Auto-evaluating session ${sid.slice(0, 8)}...`);
|
|
327
|
+
await evaluateSession(sid, msgs, 'manual');
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
catch (err) {
|
|
331
|
+
console.log(`[pr-report] Could not auto-eval session ${sid.slice(0, 8)}: ${err}`);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
// 10. Fetch evaluations (averaged across all evals) + message counts
|
|
337
|
+
let compositeScore = null;
|
|
338
|
+
let dimensionScores;
|
|
339
|
+
let userMessages = 0;
|
|
340
|
+
let assistantMessages = 0;
|
|
341
|
+
// Collect reasoning from the most recent eval (reasoning doesn't average)
|
|
342
|
+
let latestReasoning = {};
|
|
343
|
+
const allEvals = [];
|
|
344
|
+
for (const sid of sessionIds) {
|
|
345
|
+
const evals = getEvaluationsBySession(sid);
|
|
346
|
+
for (const evalRow of evals) {
|
|
347
|
+
try {
|
|
348
|
+
const dims = JSON.parse(evalRow.dimension_scores).map((d) => ({ key: d.key, score: d.score }));
|
|
349
|
+
allEvals.push({ composite: evalRow.composite_score, dims });
|
|
350
|
+
// Capture reasoning from latest eval (last one wins)
|
|
351
|
+
const fullDims = JSON.parse(evalRow.dimension_scores);
|
|
352
|
+
for (const d of fullDims) {
|
|
353
|
+
if (d.reasoning)
|
|
354
|
+
latestReasoning[d.key] = d.reasoning;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
catch { /* skip malformed */ }
|
|
358
|
+
}
|
|
359
|
+
// Message counts by role
|
|
360
|
+
const messages = getMessagesBySession(sid, 10000, 0);
|
|
361
|
+
userMessages += messages.filter(m => m.role === 'user').length;
|
|
362
|
+
assistantMessages += messages.filter(m => m.role === 'assistant').length;
|
|
363
|
+
}
|
|
364
|
+
// Average across all evaluations to reduce variance
|
|
365
|
+
if (allEvals.length > 0) {
|
|
366
|
+
compositeScore = Math.round(allEvals.reduce((sum, e) => sum + e.composite, 0) / allEvals.length);
|
|
367
|
+
// Average each dimension across evals
|
|
368
|
+
const dimSums = {};
|
|
369
|
+
for (const e of allEvals) {
|
|
370
|
+
for (const d of e.dims) {
|
|
371
|
+
if (!dimSums[d.key])
|
|
372
|
+
dimSums[d.key] = { total: 0, count: 0 };
|
|
373
|
+
dimSums[d.key].total += d.score;
|
|
374
|
+
dimSums[d.key].count += 1;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
dimensionScores = Object.entries(dimSums).map(([key, v]) => ({
|
|
378
|
+
key,
|
|
379
|
+
reasoning: latestReasoning[key],
|
|
380
|
+
score: Math.round(v.total / v.count),
|
|
381
|
+
}));
|
|
382
|
+
}
|
|
383
|
+
// 11. Generate markdown
|
|
384
|
+
const markdown = buildMarkdown({
|
|
385
|
+
compositeScore,
|
|
386
|
+
dqs,
|
|
387
|
+
sessionCount: sessionIds.length,
|
|
388
|
+
decisions,
|
|
389
|
+
commits,
|
|
390
|
+
breakdown,
|
|
391
|
+
dimensionScores,
|
|
392
|
+
userMessages: userMessages || undefined,
|
|
393
|
+
assistantMessages: assistantMessages || undefined,
|
|
394
|
+
evalCount: allEvals.length > 1 ? allEvals.length : undefined,
|
|
395
|
+
});
|
|
396
|
+
// 10. Build report row
|
|
397
|
+
const now = new Date().toISOString();
|
|
398
|
+
const report = {
|
|
399
|
+
id: ulid(),
|
|
400
|
+
branch,
|
|
401
|
+
repo,
|
|
402
|
+
pr_number: prInfo?.number ?? null,
|
|
403
|
+
pr_url: prInfo?.url ?? null,
|
|
404
|
+
commits: JSON.stringify(commits),
|
|
405
|
+
session_ids: JSON.stringify(sessionIds),
|
|
406
|
+
total_decisions: decisions.length,
|
|
407
|
+
decision_breakdown: JSON.stringify(breakdown),
|
|
408
|
+
dqs,
|
|
409
|
+
markdown,
|
|
410
|
+
posted_at: null,
|
|
411
|
+
created_at: now,
|
|
412
|
+
};
|
|
413
|
+
// 11. Persist
|
|
414
|
+
insertPRReport(report);
|
|
415
|
+
// 12. Optionally post as PR comment
|
|
416
|
+
if (post && prInfo && ghAvailable) {
|
|
417
|
+
await postPRComment(prInfo.number, markdown, projectPath);
|
|
418
|
+
report.posted_at = new Date().toISOString();
|
|
419
|
+
}
|
|
420
|
+
return { report, isNew: true };
|
|
421
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Heuristic decision classifier for PromptUp.
|
|
3
|
+
*
|
|
4
|
+
* Classifies a user message (in context of the previous assistant turn and
|
|
5
|
+
* tool uses) into one of six decision types using ordered pattern rules.
|
|
6
|
+
* First matching rule wins.
|
|
7
|
+
*
|
|
8
|
+
* STANDALONE copy — no imports from @promptup/shared.
|
|
9
|
+
*/
|
|
10
|
+
import type { DecisionType, DecisionDepth, DecisionOpinionation, DecisionSignal } from './types.js';
|
|
11
|
+
export type { DecisionType, DecisionSignal };
|
|
12
|
+
export interface ClassifiedDecision {
|
|
13
|
+
type: DecisionType;
|
|
14
|
+
/** Combined AI->Dev summary (<=120 chars). */
|
|
15
|
+
context: string;
|
|
16
|
+
/** What Claude did/proposed (<=80 chars). */
|
|
17
|
+
aiAction: string;
|
|
18
|
+
/** What the developer decided (<=80 chars). */
|
|
19
|
+
devReaction: string;
|
|
20
|
+
/** Identifier of the rule that matched. */
|
|
21
|
+
matchedRule: string;
|
|
22
|
+
/** File paths extracted from Edit/Write tool uses. */
|
|
23
|
+
filesAffected: string[];
|
|
24
|
+
depth: DecisionDepth;
|
|
25
|
+
opinionation: DecisionOpinionation;
|
|
26
|
+
/** Signal level — high/medium/low for filtering. */
|
|
27
|
+
signal: DecisionSignal;
|
|
28
|
+
}
|
|
29
|
+
type ToolUse = {
|
|
30
|
+
name: string;
|
|
31
|
+
input: Record<string, unknown>;
|
|
32
|
+
};
|
|
33
|
+
/**
|
|
34
|
+
* Extract a concise description of what the AI did from the previous assistant
|
|
35
|
+
* message and its tool uses (<=80 chars).
|
|
36
|
+
*/
|
|
37
|
+
export declare function extractAiAction(prevAssistantMessage: string | null, prevToolUses: ToolUse[] | null): string;
|
|
38
|
+
/**
|
|
39
|
+
* Classify signal level: high/medium/low.
|
|
40
|
+
* - high: architectural depth, OR high opinionation, OR reject, OR steer+tactical+
|
|
41
|
+
* - medium: tactical+medium-opinionation, OR validate, OR scope
|
|
42
|
+
* - low: surface depth, OR low-opinionation accept, OR trivial
|
|
43
|
+
*/
|
|
44
|
+
export declare function classifySignal(type: DecisionType, depth: DecisionDepth, opinionation: DecisionOpinionation, devReaction: string): DecisionSignal;
|
|
45
|
+
export declare function classifyDepth(msg: string): DecisionDepth;
|
|
46
|
+
export declare function classifyOpinionation(msg: string): DecisionOpinionation;
|
|
47
|
+
/**
|
|
48
|
+
* Convert a raw user message into a clean decision summary (<=80 chars).
|
|
49
|
+
* Not a quote — a reworded description of what the decision was.
|
|
50
|
+
*/
|
|
51
|
+
export declare function summarizeContext(userMessage: string, type: DecisionType): string;
|
|
52
|
+
/**
|
|
53
|
+
* Classify a user message into a decision type using heuristic rules.
|
|
54
|
+
*
|
|
55
|
+
* @param userMessage The incoming user message to classify.
|
|
56
|
+
* @param prevAssistantMessage The preceding assistant message (or null).
|
|
57
|
+
* @param prevToolUses Tool uses from the preceding assistant turn (or null).
|
|
58
|
+
* @returns ClassifiedDecision if a rule matched, otherwise null.
|
|
59
|
+
*/
|
|
60
|
+
export declare function classifyDecision(userMessage: string, prevAssistantMessage: string | null, prevToolUses: ToolUse[] | null): ClassifiedDecision | null;
|