taskmonkey-cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/tm.js CHANGED
@@ -8,6 +8,8 @@ import { pull } from '../src/commands/pull.js';
8
8
  import { watch } from '../src/commands/watch.js';
9
9
  import { logs } from '../src/commands/logs.js';
10
10
  import { chat } from '../src/commands/chat.js';
11
+ import { testChat } from '../src/commands/test-chat.js';
12
+ import { testConversations } from '../src/commands/test-conversations.js';
11
13
 
12
14
  const program = new Command();
13
15
 
@@ -57,4 +59,17 @@ program
57
59
  .option('-p, --public', 'Public chat (no auth, no tools)')
58
60
  .action(chat);
59
61
 
62
+ program
63
+ .command('test-chat <message>')
64
+ .description('Single-shot chat test (send message, see response + tool calls)')
65
+ .option('-t, --task <slug>', 'Monkey task context')
66
+ .action(testChat);
67
+
68
+ program
69
+ .command('test-conversations')
70
+ .description('Run all conversation_tests from config')
71
+ .option('-t, --task <slug>', 'Monkey task slug')
72
+ .option('-v, --verbose', 'Show response excerpts')
73
+ .action(testConversations);
74
+
60
75
  program.parse();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "taskmonkey-cli",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "TaskMonkey CLI — Remote dev tools for tenant config editing and tool testing",
5
5
  "bin": {
6
6
  "tm": "./bin/tm.js",
@@ -0,0 +1,14 @@
1
+ ---
2
+ description: Einzelne Chat-Nachricht testen und Antwort + Tool-Calls sehen
3
+ ---
4
+
5
+ Sende eine einzelne Nachricht an den Chat und zeige die Antwort mit allen Tool-Calls.
6
+ Nützlich um Prompt-Änderungen schnell zu testen.
7
+
8
+ ```bash
9
+ tm test-chat "welche erdbeeren habt ihr?"
10
+ tm test-chat "Tomate 15" --task inventur
11
+ ```
12
+
13
+ Zeige dem User das Ergebnis. Wenn Tool-Calls gemacht wurden, liste sie auf.
14
+ Wenn das Ergebnis nicht den Erwartungen entspricht, schlage Prompt-Änderungen vor.
@@ -0,0 +1,25 @@
1
+ ---
2
+ description: Automatische Konversationstests aus der Config ausführen
3
+ ---
4
+
5
+ Führt alle conversation_tests aus der Tenant-Config aus und zeigt das Ergebnis.
6
+
7
+ ```bash
8
+ tm test-conversations
9
+ tm test-conversations --task inventur --verbose
10
+ ```
11
+
12
+ Test-Cases werden in der Config definiert:
13
+ ```php
14
+ 'conversation_tests' => [
15
+ [
16
+ 'description' => 'Erdbeere suchen',
17
+ 'user_message' => 'welche erdbeeren habt ihr?',
18
+ 'expect_tool_calls' => ['getProducts'],
19
+ 'expect_response_contains' => ['Erdbeere', 'Preis'],
20
+ ],
21
+ ]
22
+ ```
23
+
24
+ Bei fehlgeschlagenen Tests: Analysiere warum und schlage Prompt-Änderungen vor.
25
+ Nach Änderungen: `tm sync` und dann `tm test-conversations` erneut ausführen.
@@ -0,0 +1,43 @@
1
+ import chalk from 'chalk';
2
+ import ora from 'ora';
3
+ import { createClient } from '../lib/api.js';
4
+
5
+ export async function testChat(message, options) {
6
+ const client = createClient();
7
+
8
+ const spinner = ora('Sending test message...').start();
9
+
10
+ try {
11
+ const result = await client.post('/api/test/chat', {
12
+ message,
13
+ task: options.task || null,
14
+ });
15
+
16
+ spinner.stop();
17
+
18
+ // Tool calls
19
+ if (result.tool_calls?.length > 0) {
20
+ for (const tool of result.tool_calls) {
21
+ const icon = tool.success !== false ? chalk.green('✓') : chalk.red('✗');
22
+ const args = tool.args ? ' ' + chalk.gray(JSON.stringify(tool.args).substring(0, 100)) : '';
23
+ console.log(` ${icon} ${chalk.yellow(tool.tool)}${args}`);
24
+ }
25
+ console.log();
26
+ }
27
+
28
+ // Response
29
+ if (result.response) {
30
+ console.log(result.response.replace(/\n{3,}/g, '\n\n').trim());
31
+ }
32
+
33
+ // Suggestions
34
+ if (result.suggestions?.length > 0) {
35
+ console.log();
36
+ console.log(result.suggestions.map(s => chalk.bgGray.white(` ${s} `)).join(' '));
37
+ }
38
+
39
+ } catch (err) {
40
+ spinner.fail(err.message);
41
+ process.exit(1);
42
+ }
43
+ }
@@ -0,0 +1,56 @@
1
+ import chalk from 'chalk';
2
+ import ora from 'ora';
3
+ import { createClient } from '../lib/api.js';
4
+
5
+ export async function testConversations(options) {
6
+ const client = createClient();
7
+
8
+ const spinner = ora('Running conversation tests...').start();
9
+
10
+ try {
11
+ const result = await client.post('/api/test/conversations', {
12
+ task: options.task || null,
13
+ });
14
+
15
+ spinner.stop();
16
+
17
+ if (result.total === 0) {
18
+ console.log(chalk.yellow('Keine conversation_tests definiert.'));
19
+ console.log(chalk.gray('Füge conversation_tests zur Tenant-Config hinzu.'));
20
+ return;
21
+ }
22
+
23
+ // Results
24
+ for (const test of result.results) {
25
+ const icon = test.status === 'pass' ? chalk.green('✓')
26
+ : test.status === 'fail' ? chalk.red('✗')
27
+ : chalk.gray('○');
28
+
29
+ console.log(`${icon} ${test.description}`);
30
+
31
+ if (test.tools_called?.length > 0) {
32
+ console.log(chalk.gray(` Tools: ${test.tools_called.join(', ')}`));
33
+ }
34
+
35
+ if (test.errors?.length > 0) {
36
+ for (const err of test.errors) {
37
+ console.log(chalk.red(` ✗ ${err}`));
38
+ }
39
+ }
40
+
41
+ if (options.verbose && test.response_excerpt) {
42
+ console.log(chalk.gray(` Response: ${test.response_excerpt}`));
43
+ }
44
+ }
45
+
46
+ // Summary
47
+ console.log();
48
+ const rate = result.total > 0 ? Math.round((result.passed / result.total) * 100) : 0;
49
+ const color = rate === 100 ? chalk.green : rate >= 70 ? chalk.yellow : chalk.red;
50
+ console.log(color(`${result.passed}/${result.total} bestanden (${rate}%)`));
51
+
52
+ } catch (err) {
53
+ spinner.fail(err.message);
54
+ process.exit(1);
55
+ }
56
+ }