taskmonkey-cli 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/tm.js +8 -0
- package/package.json +1 -1
- package/src/commands/optimize-prompt.js +64 -0
package/bin/tm.js
CHANGED
|
@@ -10,6 +10,7 @@ import { logs } from '../src/commands/logs.js';
|
|
|
10
10
|
import { chat } from '../src/commands/chat.js';
|
|
11
11
|
import { testChat } from '../src/commands/test-chat.js';
|
|
12
12
|
import { testConversations } from '../src/commands/test-conversations.js';
|
|
13
|
+
import { optimizePrompt } from '../src/commands/optimize-prompt.js';
|
|
13
14
|
|
|
14
15
|
const program = new Command();
|
|
15
16
|
|
|
@@ -72,4 +73,11 @@ program
|
|
|
72
73
|
.option('-v, --verbose', 'Show response excerpts')
|
|
73
74
|
.action(testConversations);
|
|
74
75
|
|
|
76
|
+
program
|
|
77
|
+
.command('optimize-prompt')
|
|
78
|
+
.description('Analyze test failures and suggest prompt improvements via GPT-4')
|
|
79
|
+
.option('-t, --task <slug>', 'Monkey task slug')
|
|
80
|
+
.option('-m, --model <model>', 'OpenAI model', 'gpt-4o')
|
|
81
|
+
.action(optimizePrompt);
|
|
82
|
+
|
|
75
83
|
program.parse();
|
package/package.json
CHANGED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import ora from 'ora';
|
|
3
|
+
import { createClient } from '../lib/api.js';
|
|
4
|
+
|
|
5
|
+
export async function optimizePrompt(options) {
|
|
6
|
+
const client = createClient();
|
|
7
|
+
|
|
8
|
+
const spinner = ora('Running tests and analyzing...').start();
|
|
9
|
+
|
|
10
|
+
try {
|
|
11
|
+
const result = await client.post('/api/test/optimize-prompt', {
|
|
12
|
+
task: options.task || null,
|
|
13
|
+
model: options.model || 'gpt-4o',
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
spinner.stop();
|
|
17
|
+
|
|
18
|
+
// Test results
|
|
19
|
+
const rate = result.rate || (result.total > 0 ? Math.round((result.passed / result.total) * 100) : 100);
|
|
20
|
+
const rateColor = rate === 100 ? chalk.green : rate >= 70 ? chalk.yellow : chalk.red;
|
|
21
|
+
console.log(rateColor(`${result.passed}/${result.total} bestanden (${rate}%)`));
|
|
22
|
+
|
|
23
|
+
if (result.passed === result.total) {
|
|
24
|
+
console.log(chalk.green('\nAlle Tests bestanden — kein Optimierungsbedarf.'));
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Show failures
|
|
29
|
+
if (result.failures?.length > 0) {
|
|
30
|
+
console.log(chalk.red(`\n${result.failures.length} fehlgeschlagen:`));
|
|
31
|
+
for (const f of result.failures) {
|
|
32
|
+
console.log(chalk.red(` ✗ ${f.test}`));
|
|
33
|
+
for (const err of f.errors) {
|
|
34
|
+
console.log(chalk.gray(` ${err}`));
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Show suggestion
|
|
40
|
+
if (result.suggestion?.reasoning) {
|
|
41
|
+
console.log(chalk.cyan('\n━━━ GPT-4 Optimierungsvorschlag ━━━\n'));
|
|
42
|
+
console.log(chalk.white(result.suggestion.reasoning));
|
|
43
|
+
|
|
44
|
+
if (result.suggestion.changes?.length > 0) {
|
|
45
|
+
console.log(chalk.cyan('\nÄnderungen:'));
|
|
46
|
+
for (const change of result.suggestion.changes) {
|
|
47
|
+
console.log(chalk.white(` • ${change}`));
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (result.suggestion.new_prompt) {
|
|
52
|
+
console.log(chalk.cyan('\n━━━ Neuer Prompt ━━━\n'));
|
|
53
|
+
console.log(chalk.gray(result.suggestion.new_prompt));
|
|
54
|
+
console.log(chalk.cyan('\n━━━━━━━━━━━━━━━━━━━━\n'));
|
|
55
|
+
console.log(chalk.yellow('Tipp: Kopiere den Prompt in deine Config, dann:'));
|
|
56
|
+
console.log(chalk.gray(' tm sync && tm test-conversations'));
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
} catch (err) {
|
|
61
|
+
spinner.fail(err.message);
|
|
62
|
+
process.exit(1);
|
|
63
|
+
}
|
|
64
|
+
}
|