promptup-plugin 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/evaluator.js +39 -11
- package/package.json +1 -1
package/dist/evaluator.js
CHANGED
|
@@ -180,7 +180,7 @@ ${convo}
|
|
|
180
180
|
Return ONLY valid JSON with no markdown formatting, no code fences, no extra text:
|
|
181
181
|
{"dimensions":[{"key":"task_decomposition","score":0,"reasoning":"..."},{"key":"prompt_specificity","score":0,"reasoning":"..."},{"key":"output_validation","score":0,"reasoning":"..."},{"key":"iteration_quality","score":0,"reasoning":"..."},{"key":"strategic_tool_usage","score":0,"reasoning":"..."},{"key":"context_management","score":0,"reasoning":"..."}],"domain_dimensions":[{"key":"architectural_awareness","score":0,"reasoning":"..."},{"key":"error_anticipation","score":0,"reasoning":"..."},{"key":"technical_vocabulary","score":0,"reasoning":"..."},{"key":"dependency_reasoning","score":0,"reasoning":"..."},{"key":"tradeoff_articulation","score":0,"reasoning":"..."}],"tech_expertise":[{"roadmap":"typescript","score":75,"competencies":{"type_system":80,"generics":70}}],"recommendations":[{"dimension_key":"...","priority":"high","recommendation":"Add context to prompts","suggestions":["Instead of 'no', try 'no — terminal shows nothing after response'","Instead of 'yep', try 'yes, use the Stop hook approach'"]}],"activity_log":["Did X","Did Y","Fixed Z"],"decisions":[{"type":"steer","summary":"Chose bcrypt over argon2 — simpler dependency","signal":"high"},{"type":"validate","summary":"Ran integration tests after auth implementation","signal":"medium"}]}`;
|
|
182
182
|
}
|
|
183
|
-
function runClaudeCode(prompt, timeoutMs =
|
|
183
|
+
function runClaudeCode(prompt, timeoutMs = 180_000) {
|
|
184
184
|
return new Promise((resolve, reject) => {
|
|
185
185
|
// Strip CLAUDECODE env var to allow spawning from within a Claude Code session
|
|
186
186
|
const env = { ...process.env };
|
|
@@ -196,7 +196,7 @@ function runClaudeCode(prompt, timeoutMs = 120_000) {
|
|
|
196
196
|
proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
|
|
197
197
|
const timer = setTimeout(() => {
|
|
198
198
|
proc.kill('SIGTERM');
|
|
199
|
-
reject(new Error(`Claude Code timed out after ${timeoutMs}ms`));
|
|
199
|
+
reject(new Error(`[timeout] Claude Code timed out after ${timeoutMs}ms (prompt size: ${prompt.length} chars)`));
|
|
200
200
|
}, timeoutMs);
|
|
201
201
|
proc.on('close', (code) => {
|
|
202
202
|
clearTimeout(timer);
|
|
@@ -204,16 +204,22 @@ function runClaudeCode(prompt, timeoutMs = 120_000) {
|
|
|
204
204
|
resolve(stdout.trim());
|
|
205
205
|
}
|
|
206
206
|
else {
|
|
207
|
-
reject(new Error(`Claude Code exited with code ${code}: ${stderr.slice(0,
|
|
207
|
+
reject(new Error(`[exit] Claude Code exited with code ${code}: ${stderr.slice(0, 1000)}`));
|
|
208
208
|
}
|
|
209
209
|
});
|
|
210
210
|
proc.on('error', (err) => {
|
|
211
211
|
clearTimeout(timer);
|
|
212
|
-
reject(err);
|
|
212
|
+
reject(new Error(`[spawn] Could not start claude: ${err.message}`));
|
|
213
213
|
});
|
|
214
|
-
// Write prompt to stdin
|
|
215
|
-
proc.stdin.write(prompt);
|
|
216
|
-
|
|
214
|
+
// Write prompt to stdin with backpressure handling
|
|
215
|
+
const ok = proc.stdin.write(prompt);
|
|
216
|
+
if (!ok) {
|
|
217
|
+
// Buffer is full — wait for drain before closing
|
|
218
|
+
proc.stdin.once('drain', () => { proc.stdin.end(); });
|
|
219
|
+
}
|
|
220
|
+
else {
|
|
221
|
+
proc.stdin.end();
|
|
222
|
+
}
|
|
217
223
|
});
|
|
218
224
|
}
|
|
219
225
|
function parseClaudeResponse(raw) {
|
|
@@ -250,9 +256,11 @@ export async function evaluateSession(sessionId, messages, triggerType, weightPr
|
|
|
250
256
|
let recommendations = [];
|
|
251
257
|
let usedClaude = false;
|
|
252
258
|
try {
|
|
253
|
-
console.log(`[eval] Running Claude Code evaluation for session ${sessionId.slice(0, 8)}...`);
|
|
254
259
|
const prompt = buildEvalPrompt(messages);
|
|
255
|
-
|
|
260
|
+
// Scale timeout: 180s base + 1s per message over 100
|
|
261
|
+
const timeoutMs = 180_000 + Math.max(0, messages.length - 100) * 1000;
|
|
262
|
+
console.log(`[eval] Running Claude evaluation for ${sessionId.slice(0, 8)} (${messages.length} msgs, ${prompt.length} chars, timeout ${Math.round(timeoutMs / 1000)}s)...`);
|
|
263
|
+
const rawOutput = await runClaudeCode(prompt, timeoutMs);
|
|
256
264
|
const result = parseClaudeResponse(rawOutput);
|
|
257
265
|
usedClaude = true;
|
|
258
266
|
// Store structured data in raw_evaluation (activity log + decisions + raw text)
|
|
@@ -340,7 +348,14 @@ export async function evaluateSession(sessionId, messages, triggerType, weightPr
|
|
|
340
348
|
console.log(`[eval] Claude Code evaluation complete for ${sessionId.slice(0, 8)}`);
|
|
341
349
|
}
|
|
342
350
|
catch (err) {
|
|
343
|
-
|
|
351
|
+
const msg = err.message || String(err);
|
|
352
|
+
const category = msg.startsWith('[timeout]') ? 'TIMEOUT'
|
|
353
|
+
: msg.startsWith('[spawn]') ? 'SPAWN_FAILED'
|
|
354
|
+
: msg.startsWith('[exit]') ? 'PROCESS_ERROR'
|
|
355
|
+
: msg.includes('No JSON object found') ? 'PARSE_FAILED'
|
|
356
|
+
: msg.includes('Missing dimensions') ? 'INVALID_RESPONSE'
|
|
357
|
+
: 'UNKNOWN';
|
|
358
|
+
console.warn(`[eval] Claude failed (${category}), using heuristic fallback: ${msg}`);
|
|
344
359
|
// Fall back to heuristic — generate basic activity log from messages
|
|
345
360
|
const heuristic = heuristicEvaluate(messages, profile);
|
|
346
361
|
dimensionScores = heuristic.dimensionScores;
|
|
@@ -445,10 +460,23 @@ function heuristicEvaluate(messages, profile) {
|
|
|
445
460
|
if (!def)
|
|
446
461
|
continue;
|
|
447
462
|
const next = def.ranges.find(r => r.min > dim.score);
|
|
463
|
+
const tipMap = {
|
|
464
|
+
task_decomposition: 'Break your next task into 2-3 explicit steps before starting',
|
|
465
|
+
prompt_specificity: 'Add one constraint or example to your next prompt',
|
|
466
|
+
output_validation: 'Check one output against your expectation before moving on',
|
|
467
|
+
iteration_quality: 'When something doesn\'t work, name what failed before retrying',
|
|
468
|
+
strategic_tool_usage: 'Try a different tool or approach for your next task',
|
|
469
|
+
context_management: 'Summarize where you are before switching topics',
|
|
470
|
+
architectural_awareness: 'Name one system-level concern before making a change',
|
|
471
|
+
error_anticipation: 'Ask "what could break?" once before implementing',
|
|
472
|
+
technical_vocabulary: 'Use the precise term for what you\'re describing',
|
|
473
|
+
dependency_reasoning: 'Trace one data flow before changing it',
|
|
474
|
+
tradeoff_articulation: 'Name one alternative you considered and why you didn\'t pick it',
|
|
475
|
+
};
|
|
448
476
|
recommendations.push({
|
|
449
477
|
dimension_key: dim.key,
|
|
450
478
|
priority: dim.score < 35 ? 'high' : dim.score < 55 ? 'medium' : 'low',
|
|
451
|
-
recommendation: next ? `
|
|
479
|
+
recommendation: tipMap[dim.key] || (next ? `Build toward: ${next.label}` : 'Continue current approach'),
|
|
452
480
|
suggestions: def.signals.slice(0, 2),
|
|
453
481
|
});
|
|
454
482
|
}
|
package/package.json
CHANGED