promptup-plugin 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/evaluator.js CHANGED
@@ -180,7 +180,7 @@ ${convo}
180
180
  Return ONLY valid JSON with no markdown formatting, no code fences, no extra text:
181
181
  {"dimensions":[{"key":"task_decomposition","score":0,"reasoning":"..."},{"key":"prompt_specificity","score":0,"reasoning":"..."},{"key":"output_validation","score":0,"reasoning":"..."},{"key":"iteration_quality","score":0,"reasoning":"..."},{"key":"strategic_tool_usage","score":0,"reasoning":"..."},{"key":"context_management","score":0,"reasoning":"..."}],"domain_dimensions":[{"key":"architectural_awareness","score":0,"reasoning":"..."},{"key":"error_anticipation","score":0,"reasoning":"..."},{"key":"technical_vocabulary","score":0,"reasoning":"..."},{"key":"dependency_reasoning","score":0,"reasoning":"..."},{"key":"tradeoff_articulation","score":0,"reasoning":"..."}],"tech_expertise":[{"roadmap":"typescript","score":75,"competencies":{"type_system":80,"generics":70}}],"recommendations":[{"dimension_key":"...","priority":"high","recommendation":"Add context to prompts","suggestions":["Instead of 'no', try 'no — terminal shows nothing after response'","Instead of 'yep', try 'yes, use the Stop hook approach'"]}],"activity_log":["Did X","Did Y","Fixed Z"],"decisions":[{"type":"steer","summary":"Chose bcrypt over argon2 — simpler dependency","signal":"high"},{"type":"validate","summary":"Ran integration tests after auth implementation","signal":"medium"}]}`;
182
182
  }
183
- function runClaudeCode(prompt, timeoutMs = 120_000) {
183
+ function runClaudeCode(prompt, timeoutMs = 180_000) {
184
184
  return new Promise((resolve, reject) => {
185
185
  // Strip CLAUDECODE env var to allow spawning from within a Claude Code session
186
186
  const env = { ...process.env };
@@ -196,7 +196,7 @@ function runClaudeCode(prompt, timeoutMs = 120_000) {
196
196
  proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
197
197
  const timer = setTimeout(() => {
198
198
  proc.kill('SIGTERM');
199
- reject(new Error(`Claude Code timed out after ${timeoutMs}ms`));
199
+ reject(new Error(`[timeout] Claude Code timed out after ${timeoutMs}ms (prompt size: ${prompt.length} chars)`));
200
200
  }, timeoutMs);
201
201
  proc.on('close', (code) => {
202
202
  clearTimeout(timer);
@@ -204,16 +204,22 @@ function runClaudeCode(prompt, timeoutMs = 120_000) {
204
204
  resolve(stdout.trim());
205
205
  }
206
206
  else {
207
- reject(new Error(`Claude Code exited with code ${code}: ${stderr.slice(0, 500)}`));
207
+ reject(new Error(`[exit] Claude Code exited with code ${code}: ${stderr.slice(0, 1000)}`));
208
208
  }
209
209
  });
210
210
  proc.on('error', (err) => {
211
211
  clearTimeout(timer);
212
- reject(err);
212
+ reject(new Error(`[spawn] Could not start claude: ${err.message}`));
213
213
  });
214
- // Write prompt to stdin and close
215
- proc.stdin.write(prompt);
216
- proc.stdin.end();
214
+ // Write prompt to stdin with backpressure handling
215
+ const ok = proc.stdin.write(prompt);
216
+ if (!ok) {
217
+ // Buffer is full — wait for drain before closing
218
+ proc.stdin.once('drain', () => { proc.stdin.end(); });
219
+ }
220
+ else {
221
+ proc.stdin.end();
222
+ }
217
223
  });
218
224
  }
219
225
  function parseClaudeResponse(raw) {
@@ -250,9 +256,11 @@ export async function evaluateSession(sessionId, messages, triggerType, weightPr
250
256
  let recommendations = [];
251
257
  let usedClaude = false;
252
258
  try {
253
- console.log(`[eval] Running Claude Code evaluation for session ${sessionId.slice(0, 8)}...`);
254
259
  const prompt = buildEvalPrompt(messages);
255
- const rawOutput = await runClaudeCode(prompt);
260
+ // Scale timeout: 180s base + 1s per message over 100
261
+ const timeoutMs = 180_000 + Math.max(0, messages.length - 100) * 1000;
262
+ console.log(`[eval] Running Claude evaluation for ${sessionId.slice(0, 8)} (${messages.length} msgs, ${prompt.length} chars, timeout ${Math.round(timeoutMs / 1000)}s)...`);
263
+ const rawOutput = await runClaudeCode(prompt, timeoutMs);
256
264
  const result = parseClaudeResponse(rawOutput);
257
265
  usedClaude = true;
258
266
  // Store structured data in raw_evaluation (activity log + decisions + raw text)
@@ -340,7 +348,14 @@ export async function evaluateSession(sessionId, messages, triggerType, weightPr
340
348
  console.log(`[eval] Claude Code evaluation complete for ${sessionId.slice(0, 8)}`);
341
349
  }
342
350
  catch (err) {
343
- console.warn(`[eval] Claude Code unavailable, using heuristic fallback:`, err.message);
351
+ const msg = err.message || String(err);
352
+ const category = msg.startsWith('[timeout]') ? 'TIMEOUT'
353
+ : msg.startsWith('[spawn]') ? 'SPAWN_FAILED'
354
+ : msg.startsWith('[exit]') ? 'PROCESS_ERROR'
355
+ : msg.includes('No JSON object found') ? 'PARSE_FAILED'
356
+ : msg.includes('Missing dimensions') ? 'INVALID_RESPONSE'
357
+ : 'UNKNOWN';
358
+ console.warn(`[eval] Claude failed (${category}), using heuristic fallback: ${msg}`);
344
359
  // Fall back to heuristic — generate basic activity log from messages
345
360
  const heuristic = heuristicEvaluate(messages, profile);
346
361
  dimensionScores = heuristic.dimensionScores;
@@ -445,10 +460,23 @@ function heuristicEvaluate(messages, profile) {
445
460
  if (!def)
446
461
  continue;
447
462
  const next = def.ranges.find(r => r.min > dim.score);
463
+ const tipMap = {
464
+ task_decomposition: 'Break your next task into 2-3 explicit steps before starting',
465
+ prompt_specificity: 'Add one constraint or example to your next prompt',
466
+ output_validation: 'Check one output against your expectation before moving on',
467
+ iteration_quality: 'When something doesn\'t work, name what failed before retrying',
468
+ strategic_tool_usage: 'Try a different tool or approach for your next task',
469
+ context_management: 'Summarize where you are before switching topics',
470
+ architectural_awareness: 'Name one system-level concern before making a change',
471
+ error_anticipation: 'Ask "what could break?" once before implementing',
472
+ technical_vocabulary: 'Use the precise term for what you\'re describing',
473
+ dependency_reasoning: 'Trace one data flow before changing it',
474
+ tradeoff_articulation: 'Name one alternative you considered and why you didn\'t pick it',
475
+ };
448
476
  recommendations.push({
449
477
  dimension_key: dim.key,
450
478
  priority: dim.score < 35 ? 'high' : dim.score < 55 ? 'medium' : 'low',
451
- recommendation: next ? `Aim for: ${next.description}` : 'Continue current approach',
479
+ recommendation: tipMap[dim.key] || (next ? `Build toward: ${next.label}` : 'Continue current approach'),
452
480
  suggestions: def.signals.slice(0, 2),
453
481
  });
454
482
  }
@@ -254,10 +254,7 @@ export async function generatePRReport(options) {
254
254
  // We need the repo for the cache key — get it first
255
255
  const ghAvailable = await checkGhAvailable();
256
256
  const repo = ghAvailable ? await getRepo(projectPath) : '';
257
- const cached = getPRReportByBranch(branch, repo);
258
- if (cached) {
259
- return { report: cached, isNew: false };
260
- }
257
+ // Always regenerate no cache. Scores evolve as more evals run.
261
258
  // 3. Get PR info
262
259
  let prInfo = null;
263
260
  if (ghAvailable) {
@@ -295,19 +292,8 @@ export async function generatePRReport(options) {
295
292
  }
296
293
  }
297
294
  }
298
- // 6. Gather decisions
299
- const decisions = gatherDecisions(sessionIds);
300
- // 7. Compute DQS — use validate decisions as proxy for validation rate
301
- const validateCount = decisions.filter(d => d.type === 'validate').length;
302
- const validationRate = decisions.length > 0 ? validateCount / decisions.length : 0;
303
- const dqs = computeDQS(decisions, validationRate);
304
- // 8. Build decision breakdown
305
- const breakdown = {};
306
- for (const d of decisions) {
307
- const t = d.type;
308
- breakdown[t] = (breakdown[t] ?? 0) + 1;
309
- }
310
- // 9. Auto-eval sessions that haven't been evaluated yet
295
+ // 6. Auto-eval sessions FIRST so decisions get extracted before DQS
296
+ // This makes /pr-report self-contained — no need to run /eval first
311
297
  // This makes /pr-report self-contained — no need to run /eval first
312
298
  for (const sid of sessionIds) {
313
299
  const existingEval = getLatestEvaluation(sid);
@@ -333,6 +319,18 @@ export async function generatePRReport(options) {
333
319
  }
334
320
  }
335
321
  }
322
+ // 7. Now gather decisions (AFTER auto-eval extracted them)
323
+ const decisions = gatherDecisions(sessionIds);
324
+ // 8. Compute DQS
325
+ const validateCount = decisions.filter(d => d.type === 'validate').length;
326
+ const validationRate = decisions.length > 0 ? validateCount / decisions.length : 0;
327
+ const dqs = computeDQS(decisions, validationRate);
328
+ // 9. Build decision breakdown
329
+ const breakdown = {};
330
+ for (const d of decisions) {
331
+ const t = d.type;
332
+ breakdown[t] = (breakdown[t] ?? 0) + 1;
333
+ }
336
334
  // 10. Fetch evaluations (averaged across all evals) + message counts
337
335
  let compositeScore = null;
338
336
  let dimensionScores;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "promptup-plugin",
3
- "version": "0.1.6",
3
+ "version": "0.1.8",
4
4
  "description": "AI coding skill evaluator for Claude Code — 11-dimension scoring, decision intelligence, PR reports",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",