promptup-plugin 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/evaluator.js +39 -11
  2. package/package.json +1 -1
package/dist/evaluator.js CHANGED
@@ -180,7 +180,7 @@ ${convo}
180
180
  Return ONLY valid JSON with no markdown formatting, no code fences, no extra text:
181
181
  {"dimensions":[{"key":"task_decomposition","score":0,"reasoning":"..."},{"key":"prompt_specificity","score":0,"reasoning":"..."},{"key":"output_validation","score":0,"reasoning":"..."},{"key":"iteration_quality","score":0,"reasoning":"..."},{"key":"strategic_tool_usage","score":0,"reasoning":"..."},{"key":"context_management","score":0,"reasoning":"..."}],"domain_dimensions":[{"key":"architectural_awareness","score":0,"reasoning":"..."},{"key":"error_anticipation","score":0,"reasoning":"..."},{"key":"technical_vocabulary","score":0,"reasoning":"..."},{"key":"dependency_reasoning","score":0,"reasoning":"..."},{"key":"tradeoff_articulation","score":0,"reasoning":"..."}],"tech_expertise":[{"roadmap":"typescript","score":75,"competencies":{"type_system":80,"generics":70}}],"recommendations":[{"dimension_key":"...","priority":"high","recommendation":"Add context to prompts","suggestions":["Instead of 'no', try 'no — terminal shows nothing after response'","Instead of 'yep', try 'yes, use the Stop hook approach'"]}],"activity_log":["Did X","Did Y","Fixed Z"],"decisions":[{"type":"steer","summary":"Chose bcrypt over argon2 — simpler dependency","signal":"high"},{"type":"validate","summary":"Ran integration tests after auth implementation","signal":"medium"}]}`;
182
182
  }
183
- function runClaudeCode(prompt, timeoutMs = 120_000) {
183
+ function runClaudeCode(prompt, timeoutMs = 180_000) {
184
184
  return new Promise((resolve, reject) => {
185
185
  // Strip CLAUDECODE env var to allow spawning from within a Claude Code session
186
186
  const env = { ...process.env };
@@ -196,7 +196,7 @@ function runClaudeCode(prompt, timeoutMs = 120_000) {
196
196
  proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
197
197
  const timer = setTimeout(() => {
198
198
  proc.kill('SIGTERM');
199
- reject(new Error(`Claude Code timed out after ${timeoutMs}ms`));
199
+ reject(new Error(`[timeout] Claude Code timed out after ${timeoutMs}ms (prompt size: ${prompt.length} chars)`));
200
200
  }, timeoutMs);
201
201
  proc.on('close', (code) => {
202
202
  clearTimeout(timer);
@@ -204,16 +204,22 @@ function runClaudeCode(prompt, timeoutMs = 120_000) {
204
204
  resolve(stdout.trim());
205
205
  }
206
206
  else {
207
- reject(new Error(`Claude Code exited with code ${code}: ${stderr.slice(0, 500)}`));
207
+ reject(new Error(`[exit] Claude Code exited with code ${code}: ${stderr.slice(0, 1000)}`));
208
208
  }
209
209
  });
210
210
  proc.on('error', (err) => {
211
211
  clearTimeout(timer);
212
- reject(err);
212
+ reject(new Error(`[spawn] Could not start claude: ${err.message}`));
213
213
  });
214
- // Write prompt to stdin and close
215
- proc.stdin.write(prompt);
216
- proc.stdin.end();
214
+ // Write prompt to stdin with backpressure handling
215
+ const ok = proc.stdin.write(prompt);
216
+ if (!ok) {
217
+ // Buffer is full — wait for drain before closing
218
+ proc.stdin.once('drain', () => { proc.stdin.end(); });
219
+ }
220
+ else {
221
+ proc.stdin.end();
222
+ }
217
223
  });
218
224
  }
219
225
  function parseClaudeResponse(raw) {
@@ -250,9 +256,11 @@ export async function evaluateSession(sessionId, messages, triggerType, weightPr
250
256
  let recommendations = [];
251
257
  let usedClaude = false;
252
258
  try {
253
- console.log(`[eval] Running Claude Code evaluation for session ${sessionId.slice(0, 8)}...`);
254
259
  const prompt = buildEvalPrompt(messages);
255
- const rawOutput = await runClaudeCode(prompt);
260
+ // Scale timeout: 180s base + 1s per message over 100
261
+ const timeoutMs = 180_000 + Math.max(0, messages.length - 100) * 1000;
262
+ console.log(`[eval] Running Claude evaluation for ${sessionId.slice(0, 8)} (${messages.length} msgs, ${prompt.length} chars, timeout ${Math.round(timeoutMs / 1000)}s)...`);
263
+ const rawOutput = await runClaudeCode(prompt, timeoutMs);
256
264
  const result = parseClaudeResponse(rawOutput);
257
265
  usedClaude = true;
258
266
  // Store structured data in raw_evaluation (activity log + decisions + raw text)
@@ -340,7 +348,14 @@ export async function evaluateSession(sessionId, messages, triggerType, weightPr
340
348
  console.log(`[eval] Claude Code evaluation complete for ${sessionId.slice(0, 8)}`);
341
349
  }
342
350
  catch (err) {
343
- console.warn(`[eval] Claude Code unavailable, using heuristic fallback:`, err.message);
351
+ const msg = err.message || String(err);
352
+ const category = msg.startsWith('[timeout]') ? 'TIMEOUT'
353
+ : msg.startsWith('[spawn]') ? 'SPAWN_FAILED'
354
+ : msg.startsWith('[exit]') ? 'PROCESS_ERROR'
355
+ : msg.includes('No JSON object found') ? 'PARSE_FAILED'
356
+ : msg.includes('Missing dimensions') ? 'INVALID_RESPONSE'
357
+ : 'UNKNOWN';
358
+ console.warn(`[eval] Claude failed (${category}), using heuristic fallback: ${msg}`);
344
359
  // Fall back to heuristic — generate basic activity log from messages
345
360
  const heuristic = heuristicEvaluate(messages, profile);
346
361
  dimensionScores = heuristic.dimensionScores;
@@ -445,10 +460,23 @@ function heuristicEvaluate(messages, profile) {
445
460
  if (!def)
446
461
  continue;
447
462
  const next = def.ranges.find(r => r.min > dim.score);
463
+ const tipMap = {
464
+ task_decomposition: 'Break your next task into 2-3 explicit steps before starting',
465
+ prompt_specificity: 'Add one constraint or example to your next prompt',
466
+ output_validation: 'Check one output against your expectation before moving on',
467
+ iteration_quality: 'When something doesn\'t work, name what failed before retrying',
468
+ strategic_tool_usage: 'Try a different tool or approach for your next task',
469
+ context_management: 'Summarize where you are before switching topics',
470
+ architectural_awareness: 'Name one system-level concern before making a change',
471
+ error_anticipation: 'Ask "what could break?" once before implementing',
472
+ technical_vocabulary: 'Use the precise term for what you\'re describing',
473
+ dependency_reasoning: 'Trace one data flow before changing it',
474
+ tradeoff_articulation: 'Name one alternative you considered and why you didn\'t pick it',
475
+ };
448
476
  recommendations.push({
449
477
  dimension_key: dim.key,
450
478
  priority: dim.score < 35 ? 'high' : dim.score < 55 ? 'medium' : 'low',
451
- recommendation: next ? `Aim for: ${next.description}` : 'Continue current approach',
479
+ recommendation: tipMap[dim.key] || (next ? `Build toward: ${next.label}` : 'Continue current approach'),
452
480
  suggestions: def.signals.slice(0, 2),
453
481
  });
454
482
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "promptup-plugin",
3
- "version": "0.1.6",
3
+ "version": "0.1.7",
4
4
  "description": "AI coding skill evaluator for Claude Code — 11-dimension scoring, decision intelligence, PR reports",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",