promptup-plugin 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/evaluator.js +14 -0
- package/dist/pr-report-generator.js +24 -25
- package/package.json +1 -1
package/dist/evaluator.js
CHANGED
|
@@ -11,6 +11,7 @@ import { ulid } from 'ulid';
|
|
|
11
11
|
import { BASE_DIMENSIONS, BASE_DIMENSION_KEYS, DOMAIN_DIMENSIONS, DOMAIN_DIMENSION_KEYS, WEIGHT_PROFILES, } from './shared/dimensions.js';
|
|
12
12
|
import { computeCompositeScore, computeDomainComposite, computeTechComposite, computeOverallComposite, computeGrandComposite, computeRiskFlagsWithHistory, } from './shared/scoring.js';
|
|
13
13
|
import { getLatestEvaluation, insertEvaluation, insertDecision, } from './db.js';
|
|
14
|
+
import { detectDecisions } from './decision-detector.js';
|
|
14
15
|
/**
|
|
15
16
|
* Combined role + skill roadmaps catalog for tech detection.
|
|
16
17
|
* Mirrors the full list from @promptup/shared/roadmaps without importing it.
|
|
@@ -362,6 +363,19 @@ export async function evaluateSession(sessionId, messages, triggerType, weightPr
|
|
|
362
363
|
domainDimensionScores = heuristic.domainDimensionScores;
|
|
363
364
|
techExpertise = heuristicTechDetect(messages);
|
|
364
365
|
recommendations = heuristic.recommendations;
|
|
366
|
+
// Extract decisions via heuristic detector (Claude path does this via LLM)
|
|
367
|
+
try {
|
|
368
|
+
const heuristicDecisions = detectDecisions(messages, sessionId);
|
|
369
|
+
if (heuristicDecisions.length > 0) {
|
|
370
|
+
for (const d of heuristicDecisions) {
|
|
371
|
+
insertDecision(d);
|
|
372
|
+
}
|
|
373
|
+
console.log(`[eval] Heuristic extracted ${heuristicDecisions.length} decisions`);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
catch (decErr) {
|
|
377
|
+
console.warn(`[eval] Heuristic decision extraction failed: ${decErr}`);
|
|
378
|
+
}
|
|
365
379
|
rawEvaluation = JSON.stringify({
|
|
366
380
|
activity_log: heuristicActivityLog(messages),
|
|
367
381
|
domain_dimensions: domainDimensionScores,
|
|
@@ -130,22 +130,23 @@ function matchSessionsToBranch(branch, commits, projectPath) {
|
|
|
130
130
|
}
|
|
131
131
|
// ─── Decision gathering ───────────────────────────────────────────────────────
|
|
132
132
|
function gatherDecisions(sessionIds) {
|
|
133
|
-
//
|
|
133
|
+
// Get existing decisions from DB
|
|
134
134
|
const existing = getDecisionsBySessions(sessionIds);
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
// Fall back to heuristic detection from messages
|
|
138
|
-
const heuristic = [];
|
|
135
|
+
// Also run heuristic detection on sessions that have no decisions yet
|
|
136
|
+
const sessionsWithDecisions = new Set(existing.map(d => d.session_id));
|
|
139
137
|
for (const sid of sessionIds) {
|
|
138
|
+
if (sessionsWithDecisions.has(sid))
|
|
139
|
+
continue; // Already has decisions from Claude or previous heuristic
|
|
140
140
|
const messages = getMessagesBySession(sid, 10000, 0);
|
|
141
|
+
if (messages.length < 3)
|
|
142
|
+
continue;
|
|
141
143
|
const detected = detectDecisions(messages, sid);
|
|
142
|
-
// Persist detected decisions so they're available in future queries
|
|
143
144
|
for (const d of detected) {
|
|
144
145
|
insertDecision(d);
|
|
145
146
|
}
|
|
146
|
-
|
|
147
|
+
existing.push(...detected);
|
|
147
148
|
}
|
|
148
|
-
return
|
|
149
|
+
return existing;
|
|
149
150
|
}
|
|
150
151
|
function getSignal(d) {
|
|
151
152
|
return d.signal ?? 'low';
|
|
@@ -254,10 +255,7 @@ export async function generatePRReport(options) {
|
|
|
254
255
|
// We need the repo for the cache key — get it first
|
|
255
256
|
const ghAvailable = await checkGhAvailable();
|
|
256
257
|
const repo = ghAvailable ? await getRepo(projectPath) : '';
|
|
257
|
-
|
|
258
|
-
if (cached) {
|
|
259
|
-
return { report: cached, isNew: false };
|
|
260
|
-
}
|
|
258
|
+
// Always regenerate — no cache. Scores evolve as more evals run.
|
|
261
259
|
// 3. Get PR info
|
|
262
260
|
let prInfo = null;
|
|
263
261
|
if (ghAvailable) {
|
|
@@ -295,19 +293,8 @@ export async function generatePRReport(options) {
|
|
|
295
293
|
}
|
|
296
294
|
}
|
|
297
295
|
}
|
|
298
|
-
// 6.
|
|
299
|
-
|
|
300
|
-
// 7. Compute DQS — use validate decisions as proxy for validation rate
|
|
301
|
-
const validateCount = decisions.filter(d => d.type === 'validate').length;
|
|
302
|
-
const validationRate = decisions.length > 0 ? validateCount / decisions.length : 0;
|
|
303
|
-
const dqs = computeDQS(decisions, validationRate);
|
|
304
|
-
// 8. Build decision breakdown
|
|
305
|
-
const breakdown = {};
|
|
306
|
-
for (const d of decisions) {
|
|
307
|
-
const t = d.type;
|
|
308
|
-
breakdown[t] = (breakdown[t] ?? 0) + 1;
|
|
309
|
-
}
|
|
310
|
-
// 9. Auto-eval sessions that haven't been evaluated yet
|
|
296
|
+
// 6. Auto-eval sessions FIRST so decisions get extracted before DQS
|
|
297
|
+
// This makes /pr-report self-contained — no need to run /eval first
|
|
311
298
|
// This makes /pr-report self-contained — no need to run /eval first
|
|
312
299
|
for (const sid of sessionIds) {
|
|
313
300
|
const existingEval = getLatestEvaluation(sid);
|
|
@@ -333,6 +320,18 @@ export async function generatePRReport(options) {
|
|
|
333
320
|
}
|
|
334
321
|
}
|
|
335
322
|
}
|
|
323
|
+
// 7. Now gather decisions (AFTER auto-eval extracted them)
|
|
324
|
+
const decisions = gatherDecisions(sessionIds);
|
|
325
|
+
// 8. Compute DQS
|
|
326
|
+
const validateCount = decisions.filter(d => d.type === 'validate').length;
|
|
327
|
+
const validationRate = decisions.length > 0 ? validateCount / decisions.length : 0;
|
|
328
|
+
const dqs = computeDQS(decisions, validationRate);
|
|
329
|
+
// 9. Build decision breakdown
|
|
330
|
+
const breakdown = {};
|
|
331
|
+
for (const d of decisions) {
|
|
332
|
+
const t = d.type;
|
|
333
|
+
breakdown[t] = (breakdown[t] ?? 0) + 1;
|
|
334
|
+
}
|
|
336
335
|
// 10. Fetch evaluations (averaged across all evals) + message counts
|
|
337
336
|
let compositeScore = null;
|
|
338
337
|
let dimensionScores;
|
package/package.json
CHANGED