npm - promptup-plugin - Versions diffs - 0.1.6 → 0.1.8 - Mend

promptup-plugin 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/evaluator.js +39 -11
package/dist/pr-report-generator.js +15 -17
package/package.json +1 -1

package/dist/evaluator.js CHANGED Viewed

@@ -180,7 +180,7 @@ ${convo}
 Return ONLY valid JSON with no markdown formatting, no code fences, no extra text:
 {"dimensions":[{"key":"task_decomposition","score":0,"reasoning":"..."},{"key":"prompt_specificity","score":0,"reasoning":"..."},{"key":"output_validation","score":0,"reasoning":"..."},{"key":"iteration_quality","score":0,"reasoning":"..."},{"key":"strategic_tool_usage","score":0,"reasoning":"..."},{"key":"context_management","score":0,"reasoning":"..."}],"domain_dimensions":[{"key":"architectural_awareness","score":0,"reasoning":"..."},{"key":"error_anticipation","score":0,"reasoning":"..."},{"key":"technical_vocabulary","score":0,"reasoning":"..."},{"key":"dependency_reasoning","score":0,"reasoning":"..."},{"key":"tradeoff_articulation","score":0,"reasoning":"..."}],"tech_expertise":[{"roadmap":"typescript","score":75,"competencies":{"type_system":80,"generics":70}}],"recommendations":[{"dimension_key":"...","priority":"high","recommendation":"Add context to prompts","suggestions":["Instead of 'no', try 'no — terminal shows nothing after response'","Instead of 'yep', try 'yes, use the Stop hook approach'"]}],"activity_log":["Did X","Did Y","Fixed Z"],"decisions":[{"type":"steer","summary":"Chose bcrypt over argon2 — simpler dependency","signal":"high"},{"type":"validate","summary":"Ran integration tests after auth implementation","signal":"medium"}]}`;
 }
-function runClaudeCode(prompt, timeoutMs = 120_000) {
+function runClaudeCode(prompt, timeoutMs = 180_000) {
     return new Promise((resolve, reject) => {
         // Strip CLAUDECODE env var to allow spawning from within a Claude Code session
         const env = { ...process.env };
@@ -196,7 +196,7 @@ function runClaudeCode(prompt, timeoutMs = 120_000) {
         proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
         const timer = setTimeout(() => {
             proc.kill('SIGTERM');
-            reject(new Error(`Claude Code timed out after ${timeoutMs}ms`));
+            reject(new Error(`[timeout] Claude Code timed out after ${timeoutMs}ms (prompt size: ${prompt.length} chars)`));
         }, timeoutMs);
         proc.on('close', (code) => {
             clearTimeout(timer);
@@ -204,16 +204,22 @@ function runClaudeCode(prompt, timeoutMs = 120_000) {
                 resolve(stdout.trim());
             }
             else {
-                reject(new Error(`Claude Code exited with code ${code}: ${stderr.slice(0, 500)}`));
+                reject(new Error(`[exit] Claude Code exited with code ${code}: ${stderr.slice(0, 1000)}`));
             }
         });
         proc.on('error', (err) => {
             clearTimeout(timer);
-            reject(err);
+            reject(new Error(`[spawn] Could not start claude: ${err.message}`));
         });
-        // Write prompt to stdin and close
-        proc.stdin.write(prompt);
-        proc.stdin.end();
+        // Write prompt to stdin with backpressure handling
+        const ok = proc.stdin.write(prompt);
+        if (!ok) {
+            // Buffer is full — wait for drain before closing
+            proc.stdin.once('drain', () => { proc.stdin.end(); });
+        }
+        else {
+            proc.stdin.end();
+        }
     });
 }
 function parseClaudeResponse(raw) {
@@ -250,9 +256,11 @@ export async function evaluateSession(sessionId, messages, triggerType, weightPr
     let recommendations = [];
     let usedClaude = false;
     try {
-        console.log(`[eval] Running Claude Code evaluation for session ${sessionId.slice(0, 8)}...`);
         const prompt = buildEvalPrompt(messages);
-        const rawOutput = await runClaudeCode(prompt);
+        // Scale timeout: 180s base + 1s per message over 100
+        const timeoutMs = 180_000 + Math.max(0, messages.length - 100) * 1000;
+        console.log(`[eval] Running Claude evaluation for ${sessionId.slice(0, 8)} (${messages.length} msgs, ${prompt.length} chars, timeout ${Math.round(timeoutMs / 1000)}s)...`);
+        const rawOutput = await runClaudeCode(prompt, timeoutMs);
         const result = parseClaudeResponse(rawOutput);
         usedClaude = true;
         // Store structured data in raw_evaluation (activity log + decisions + raw text)
@@ -340,7 +348,14 @@ export async function evaluateSession(sessionId, messages, triggerType, weightPr
         console.log(`[eval] Claude Code evaluation complete for ${sessionId.slice(0, 8)}`);
     }
     catch (err) {
-        console.warn(`[eval] Claude Code unavailable, using heuristic fallback:`, err.message);
+        const msg = err.message || String(err);
+        const category = msg.startsWith('[timeout]') ? 'TIMEOUT'
+            : msg.startsWith('[spawn]') ? 'SPAWN_FAILED'
+            : msg.startsWith('[exit]') ? 'PROCESS_ERROR'
+            : msg.includes('No JSON object found') ? 'PARSE_FAILED'
+            : msg.includes('Missing dimensions') ? 'INVALID_RESPONSE'
+            : 'UNKNOWN';
+        console.warn(`[eval] Claude failed (${category}), using heuristic fallback: ${msg}`);
         // Fall back to heuristic — generate basic activity log from messages
         const heuristic = heuristicEvaluate(messages, profile);
         dimensionScores = heuristic.dimensionScores;
@@ -445,10 +460,23 @@ function heuristicEvaluate(messages, profile) {
         if (!def)
             continue;
         const next = def.ranges.find(r => r.min > dim.score);
+        const tipMap = {
+            task_decomposition: 'Break your next task into 2-3 explicit steps before starting',
+            prompt_specificity: 'Add one constraint or example to your next prompt',
+            output_validation: 'Check one output against your expectation before moving on',
+            iteration_quality: 'When something doesn\'t work, name what failed before retrying',
+            strategic_tool_usage: 'Try a different tool or approach for your next task',
+            context_management: 'Summarize where you are before switching topics',
+            architectural_awareness: 'Name one system-level concern before making a change',
+            error_anticipation: 'Ask "what could break?" once before implementing',
+            technical_vocabulary: 'Use the precise term for what you\'re describing',
+            dependency_reasoning: 'Trace one data flow before changing it',
+            tradeoff_articulation: 'Name one alternative you considered and why you didn\'t pick it',
+        };
         recommendations.push({
             dimension_key: dim.key,
             priority: dim.score < 35 ? 'high' : dim.score < 55 ? 'medium' : 'low',
-            recommendation: next ? `Aim for: ${next.description}` : 'Continue current approach',
+            recommendation: tipMap[dim.key] || (next ? `Build toward: ${next.label}` : 'Continue current approach'),
             suggestions: def.signals.slice(0, 2),
         });
     }

package/dist/pr-report-generator.js CHANGED Viewed

@@ -254,10 +254,7 @@ export async function generatePRReport(options) {
     // We need the repo for the cache key — get it first
     const ghAvailable = await checkGhAvailable();
     const repo = ghAvailable ? await getRepo(projectPath) : '';
-    const cached = getPRReportByBranch(branch, repo);
-    if (cached) {
-        return { report: cached, isNew: false };
-    }
+    // Always regenerate — no cache. Scores evolve as more evals run.
     // 3. Get PR info
     let prInfo = null;
     if (ghAvailable) {
@@ -295,19 +292,8 @@ export async function generatePRReport(options) {
             }
         }
     }
-    // 6. Gather decisions
-    const decisions = gatherDecisions(sessionIds);
-    // 7. Compute DQS — use validate decisions as proxy for validation rate
-    const validateCount = decisions.filter(d => d.type === 'validate').length;
-    const validationRate = decisions.length > 0 ? validateCount / decisions.length : 0;
-    const dqs = computeDQS(decisions, validationRate);
-    // 8. Build decision breakdown
-    const breakdown = {};
-    for (const d of decisions) {
-        const t = d.type;
-        breakdown[t] = (breakdown[t] ?? 0) + 1;
-    }
-    // 9. Auto-eval sessions that haven't been evaluated yet
+    // 6. Auto-eval sessions FIRST so decisions get extracted before DQS
+    //    This makes /pr-report self-contained — no need to run /eval first
     //    This makes /pr-report self-contained — no need to run /eval first
     for (const sid of sessionIds) {
         const existingEval = getLatestEvaluation(sid);
@@ -333,6 +319,18 @@ export async function generatePRReport(options) {
             }
         }
     }
+    // 7. Now gather decisions (AFTER auto-eval extracted them)
+    const decisions = gatherDecisions(sessionIds);
+    // 8. Compute DQS
+    const validateCount = decisions.filter(d => d.type === 'validate').length;
+    const validationRate = decisions.length > 0 ? validateCount / decisions.length : 0;
+    const dqs = computeDQS(decisions, validationRate);
+    // 9. Build decision breakdown
+    const breakdown = {};
+    for (const d of decisions) {
+        const t = d.type;
+        breakdown[t] = (breakdown[t] ?? 0) + 1;
+    }
     // 10. Fetch evaluations (averaged across all evals) + message counts
     let compositeScore = null;
     let dimensionScores;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "promptup-plugin",
-  "version": "0.1.6",
+  "version": "0.1.8",
   "description": "AI coding skill evaluator for Claude Code — 11-dimension scoring, decision intelligence, PR reports",
   "type": "module",
   "main": "./dist/index.js",