@agentuity/cli 0.1.22 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +36 -5
- package/dist/cli.js.map +1 -1
- package/dist/cmd/ai/cadence/index.d.ts +3 -0
- package/dist/cmd/ai/cadence/index.d.ts.map +1 -0
- package/dist/cmd/ai/cadence/index.js +29 -0
- package/dist/cmd/ai/cadence/index.js.map +1 -0
- package/dist/cmd/ai/cadence/list.d.ts +3 -0
- package/dist/cmd/ai/cadence/list.d.ts.map +1 -0
- package/dist/cmd/ai/cadence/list.js +167 -0
- package/dist/cmd/ai/cadence/list.js.map +1 -0
- package/dist/cmd/ai/cadence/pause.d.ts +3 -0
- package/dist/cmd/ai/cadence/pause.d.ts.map +1 -0
- package/dist/cmd/ai/cadence/pause.js +103 -0
- package/dist/cmd/ai/cadence/pause.js.map +1 -0
- package/dist/cmd/ai/cadence/resume.d.ts +3 -0
- package/dist/cmd/ai/cadence/resume.d.ts.map +1 -0
- package/dist/cmd/ai/cadence/resume.js +106 -0
- package/dist/cmd/ai/cadence/resume.js.map +1 -0
- package/dist/cmd/ai/cadence/status.d.ts +3 -0
- package/dist/cmd/ai/cadence/status.d.ts.map +1 -0
- package/dist/cmd/ai/cadence/status.js +129 -0
- package/dist/cmd/ai/cadence/status.js.map +1 -0
- package/dist/cmd/ai/cadence/stop.d.ts +3 -0
- package/dist/cmd/ai/cadence/stop.d.ts.map +1 -0
- package/dist/cmd/ai/cadence/stop.js +107 -0
- package/dist/cmd/ai/cadence/stop.js.map +1 -0
- package/dist/cmd/ai/cadence/util.d.ts +44 -0
- package/dist/cmd/ai/cadence/util.d.ts.map +1 -0
- package/dist/cmd/ai/cadence/util.js +52 -0
- package/dist/cmd/ai/cadence/util.js.map +1 -0
- package/dist/cmd/ai/index.d.ts.map +1 -1
- package/dist/cmd/ai/index.js +2 -1
- package/dist/cmd/ai/index.js.map +1 -1
- package/dist/cmd/auth/machine/setup.js +1 -1
- package/dist/cmd/auth/machine/setup.js.map +1 -1
- package/dist/cmd/auth/ssh/add.js +1 -1
- package/dist/cmd/auth/ssh/add.js.map +1 -1
- package/dist/cmd/cloud/eval/get.d.ts +2 -0
- package/dist/cmd/cloud/eval/get.d.ts.map +1 -0
- package/dist/cmd/cloud/eval/get.js +79 -0
- package/dist/cmd/cloud/eval/get.js.map +1 -0
- package/dist/cmd/cloud/eval/index.d.ts +2 -0
- package/dist/cmd/cloud/eval/index.d.ts.map +1 -0
- package/dist/cmd/cloud/eval/index.js +15 -0
- package/dist/cmd/cloud/eval/index.js.map +1 -0
- package/dist/cmd/cloud/eval/list.d.ts +2 -0
- package/dist/cmd/cloud/eval/list.d.ts.map +1 -0
- package/dist/cmd/cloud/eval/list.js +119 -0
- package/dist/cmd/cloud/eval/list.js.map +1 -0
- package/dist/cmd/cloud/eval-run/get.d.ts +2 -0
- package/dist/cmd/cloud/eval-run/get.d.ts.map +1 -0
- package/dist/cmd/cloud/eval-run/get.js +106 -0
- package/dist/cmd/cloud/eval-run/get.js.map +1 -0
- package/dist/cmd/cloud/eval-run/index.d.ts +2 -0
- package/dist/cmd/cloud/eval-run/index.d.ts.map +1 -0
- package/dist/cmd/cloud/eval-run/index.js +15 -0
- package/dist/cmd/cloud/eval-run/index.js.map +1 -0
- package/dist/cmd/cloud/eval-run/list.d.ts +2 -0
- package/dist/cmd/cloud/eval-run/list.d.ts.map +1 -0
- package/dist/cmd/cloud/eval-run/list.js +140 -0
- package/dist/cmd/cloud/eval-run/list.js.map +1 -0
- package/dist/cmd/cloud/index.d.ts.map +1 -1
- package/dist/cmd/cloud/index.js +4 -0
- package/dist/cmd/cloud/index.js.map +1 -1
- package/dist/cmd/cloud/machine/list.d.ts.map +1 -1
- package/dist/cmd/cloud/machine/list.js +16 -0
- package/dist/cmd/cloud/machine/list.js.map +1 -1
- package/dist/cmd/cloud/queue/dlq.d.ts.map +1 -1
- package/dist/cmd/cloud/queue/dlq.js +15 -10
- package/dist/cmd/cloud/queue/dlq.js.map +1 -1
- package/dist/cmd/cloud/sandbox/snapshot/build.d.ts.map +1 -1
- package/dist/cmd/cloud/sandbox/snapshot/build.js.map +1 -1
- package/dist/cmd/cloud/sandbox/snapshot/create.d.ts.map +1 -1
- package/dist/cmd/cloud/sandbox/snapshot/create.js +5 -1
- package/dist/cmd/cloud/sandbox/snapshot/create.js.map +1 -1
- package/dist/cmd/cloud/storage/upload.js +1 -1
- package/dist/cmd/cloud/storage/upload.js.map +1 -1
- package/dist/cmd/cloud/vector/stats.d.ts.map +1 -1
- package/dist/cmd/cloud/vector/stats.js +3 -1
- package/dist/cmd/cloud/vector/stats.js.map +1 -1
- package/dist/cmd/cloud/vector/upsert.js +1 -1
- package/dist/cmd/cloud/vector/upsert.js.map +1 -1
- package/dist/cmd/project/delete.d.ts.map +1 -1
- package/dist/cmd/project/delete.js +46 -10
- package/dist/cmd/project/delete.js.map +1 -1
- package/dist/cmd/setup/index.d.ts.map +1 -1
- package/dist/cmd/setup/index.js +4 -1
- package/dist/cmd/setup/index.js.map +1 -1
- package/dist/env-util.d.ts.map +1 -1
- package/dist/env-util.js +4 -1
- package/dist/env-util.js.map +1 -1
- package/dist/schema-parser.d.ts +17 -1
- package/dist/schema-parser.d.ts.map +1 -1
- package/dist/schema-parser.js +131 -2
- package/dist/schema-parser.js.map +1 -1
- package/dist/tui.d.ts.map +1 -1
- package/dist/tui.js +4 -0
- package/dist/tui.js.map +1 -1
- package/package.json +6 -6
- package/src/cli.ts +40 -5
- package/src/cmd/ai/cadence/index.ts +30 -0
- package/src/cmd/ai/cadence/list.ts +183 -0
- package/src/cmd/ai/cadence/pause.ts +119 -0
- package/src/cmd/ai/cadence/resume.ts +124 -0
- package/src/cmd/ai/cadence/status.ts +141 -0
- package/src/cmd/ai/cadence/stop.ts +124 -0
- package/src/cmd/ai/cadence/util.ts +86 -0
- package/src/cmd/ai/index.ts +2 -1
- package/src/cmd/auth/machine/setup.ts +1 -1
- package/src/cmd/auth/ssh/add.ts +1 -1
- package/src/cmd/cloud/eval/get.ts +85 -0
- package/src/cmd/cloud/eval/index.ts +15 -0
- package/src/cmd/cloud/eval/list.ts +129 -0
- package/src/cmd/cloud/eval-run/get.ts +113 -0
- package/src/cmd/cloud/eval-run/index.ts +15 -0
- package/src/cmd/cloud/eval-run/list.ts +150 -0
- package/src/cmd/cloud/index.ts +4 -0
- package/src/cmd/cloud/machine/list.ts +16 -0
- package/src/cmd/cloud/queue/dlq.ts +15 -10
- package/src/cmd/cloud/sandbox/snapshot/build.ts +5 -1
- package/src/cmd/cloud/sandbox/snapshot/create.ts +5 -1
- package/src/cmd/cloud/storage/upload.ts +1 -1
- package/src/cmd/cloud/vector/stats.ts +3 -1
- package/src/cmd/cloud/vector/upsert.ts +1 -1
- package/src/cmd/project/delete.ts +55 -10
- package/src/cmd/setup/index.ts +4 -1
- package/src/env-util.ts +4 -1
- package/src/schema-parser.ts +150 -2
- package/src/tui.ts +5 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { createSubcommand } from '../../../types';
|
|
3
|
+
import * as tui from '../../../tui';
|
|
4
|
+
import { APIError, evalGet } from '@agentuity/server';
|
|
5
|
+
import { getCommand } from '../../../command-prefix';
|
|
6
|
+
import { ErrorCode } from '../../../errors';
|
|
7
|
+
|
|
8
|
+
const EvalGetResponseSchema = z.object({
|
|
9
|
+
id: z.string().describe('Eval ID'),
|
|
10
|
+
name: z.string().describe('Eval name'),
|
|
11
|
+
identifier: z.string().nullable().describe('Stable eval identifier'),
|
|
12
|
+
agent_identifier: z.string().describe('Agent identifier'),
|
|
13
|
+
created_at: z.string().describe('Creation timestamp'),
|
|
14
|
+
updated_at: z.string().describe('Last updated timestamp'),
|
|
15
|
+
project_id: z.string().describe('Project ID'),
|
|
16
|
+
org_id: z.string().describe('Organization ID'),
|
|
17
|
+
description: z.string().nullable().describe('Eval description'),
|
|
18
|
+
devmode: z.boolean().describe('Whether this is a devmode eval'),
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
export const getSubcommand = createSubcommand({
|
|
22
|
+
name: 'get',
|
|
23
|
+
description: 'Get details about a specific eval',
|
|
24
|
+
tags: ['read-only', 'fast', 'requires-auth'],
|
|
25
|
+
examples: [
|
|
26
|
+
{
|
|
27
|
+
command: getCommand('cloud eval get eval_abc123xyz'),
|
|
28
|
+
description: 'Get an eval by ID',
|
|
29
|
+
},
|
|
30
|
+
],
|
|
31
|
+
requires: { auth: true, apiClient: true },
|
|
32
|
+
idempotent: true,
|
|
33
|
+
schema: {
|
|
34
|
+
args: z.object({
|
|
35
|
+
eval_id: z.string().describe('Eval ID'),
|
|
36
|
+
}),
|
|
37
|
+
response: EvalGetResponseSchema,
|
|
38
|
+
},
|
|
39
|
+
async handler(ctx) {
|
|
40
|
+
const { apiClient, args, options } = ctx;
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
const evalData = await evalGet(apiClient, args.eval_id);
|
|
44
|
+
|
|
45
|
+
const result = {
|
|
46
|
+
id: evalData.id,
|
|
47
|
+
name: evalData.name,
|
|
48
|
+
identifier: evalData.identifier,
|
|
49
|
+
agent_identifier: evalData.agentIdentifier,
|
|
50
|
+
created_at: evalData.createdAt,
|
|
51
|
+
updated_at: evalData.updatedAt,
|
|
52
|
+
project_id: evalData.projectId,
|
|
53
|
+
org_id: evalData.orgId,
|
|
54
|
+
description: evalData.description,
|
|
55
|
+
devmode: evalData.devmode,
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
if (options.json) {
|
|
59
|
+
return result;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const tableData: Record<string, string> = {
|
|
63
|
+
ID: evalData.id,
|
|
64
|
+
Name: evalData.name,
|
|
65
|
+
Identifier: evalData.identifier || '-',
|
|
66
|
+
Agent: evalData.agentIdentifier,
|
|
67
|
+
Project: evalData.projectId,
|
|
68
|
+
Organization: evalData.orgId,
|
|
69
|
+
Description: evalData.description || '-',
|
|
70
|
+
Devmode: evalData.devmode ? tui.colorSuccess('✓') : tui.colorError('✗'),
|
|
71
|
+
Created: new Date(evalData.createdAt).toLocaleString(),
|
|
72
|
+
Updated: new Date(evalData.updatedAt).toLocaleString(),
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
tui.table([tableData], Object.keys(tableData), { layout: 'vertical', padStart: ' ' });
|
|
76
|
+
|
|
77
|
+
return result;
|
|
78
|
+
} catch (ex) {
|
|
79
|
+
if (ex instanceof APIError && ex.status === 404) {
|
|
80
|
+
tui.fatal(`Eval ${args.eval_id} not found`, ErrorCode.RESOURCE_NOT_FOUND);
|
|
81
|
+
}
|
|
82
|
+
tui.fatal(`Failed to get eval: ${ex}`, ErrorCode.API_ERROR);
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
});
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { createCommand } from '../../../types';
|
|
2
|
+
import { getSubcommand } from './get';
|
|
3
|
+
import { listSubcommand } from './list';
|
|
4
|
+
import { getCommand } from '../../../command-prefix';
|
|
5
|
+
|
|
6
|
+
export const evalCommand = createCommand({
|
|
7
|
+
name: 'eval',
|
|
8
|
+
description: 'Manage evals',
|
|
9
|
+
tags: ['requires-auth'],
|
|
10
|
+
examples: [
|
|
11
|
+
{ command: getCommand('cloud eval list'), description: 'List all evals' },
|
|
12
|
+
{ command: getCommand('cloud eval get <id>'), description: 'Get eval details' },
|
|
13
|
+
],
|
|
14
|
+
subcommands: [getSubcommand, listSubcommand],
|
|
15
|
+
});
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { createSubcommand } from '../../../types';
|
|
3
|
+
import * as tui from '../../../tui';
|
|
4
|
+
import { getCommand } from '../../../command-prefix';
|
|
5
|
+
import { ErrorCode } from '../../../errors';
|
|
6
|
+
import { evalList } from '@agentuity/server';
|
|
7
|
+
|
|
8
|
+
const EvalListResponseSchema = z.array(
|
|
9
|
+
z.object({
|
|
10
|
+
id: z.string().describe('Eval ID'),
|
|
11
|
+
name: z.string().describe('Eval name'),
|
|
12
|
+
identifier: z.string().nullable().describe('Stable eval identifier'),
|
|
13
|
+
agent_identifier: z.string().describe('Agent identifier'),
|
|
14
|
+
created_at: z.string().describe('Creation timestamp'),
|
|
15
|
+
updated_at: z.string().describe('Last updated timestamp'),
|
|
16
|
+
project_id: z.string().describe('Project ID'),
|
|
17
|
+
description: z.string().nullable().describe('Eval description'),
|
|
18
|
+
devmode: z.boolean().describe('Whether this is a devmode eval'),
|
|
19
|
+
})
|
|
20
|
+
);
|
|
21
|
+
|
|
22
|
+
export const listSubcommand = createSubcommand({
|
|
23
|
+
name: 'list',
|
|
24
|
+
description: 'List evals',
|
|
25
|
+
tags: ['read-only', 'fast', 'requires-auth'],
|
|
26
|
+
examples: [
|
|
27
|
+
{ command: getCommand('cloud eval list'), description: 'List 10 most recent evals' },
|
|
28
|
+
{
|
|
29
|
+
command: getCommand('cloud eval list --count=25'),
|
|
30
|
+
description: 'List 25 most recent evals',
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
command: getCommand('cloud eval list --project-id=proj_*'),
|
|
34
|
+
description: 'Filter by project',
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
command: getCommand('cloud eval list --agent-id=agent_*'),
|
|
38
|
+
description: 'Filter by agent',
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
command: getCommand('cloud eval list --all'),
|
|
42
|
+
description: 'List all evals regardless of project context',
|
|
43
|
+
},
|
|
44
|
+
],
|
|
45
|
+
aliases: ['ls'],
|
|
46
|
+
requires: { auth: true, apiClient: true },
|
|
47
|
+
optional: { project: true },
|
|
48
|
+
idempotent: true,
|
|
49
|
+
pagination: {
|
|
50
|
+
supported: true,
|
|
51
|
+
defaultLimit: 10,
|
|
52
|
+
maxLimit: 100,
|
|
53
|
+
parameters: {
|
|
54
|
+
limit: 'count',
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
schema: {
|
|
58
|
+
options: z.object({
|
|
59
|
+
count: z.coerce
|
|
60
|
+
.number()
|
|
61
|
+
.int()
|
|
62
|
+
.min(1)
|
|
63
|
+
.max(100)
|
|
64
|
+
.default(10)
|
|
65
|
+
.describe('Number of evals to list (1–100)'),
|
|
66
|
+
projectId: z.string().optional().describe('Filter by project ID'),
|
|
67
|
+
agentId: z.string().optional().describe('Filter by agent ID'),
|
|
68
|
+
all: z.boolean().optional().describe('List all evals regardless of project context'),
|
|
69
|
+
}),
|
|
70
|
+
response: EvalListResponseSchema,
|
|
71
|
+
},
|
|
72
|
+
webUrl: (ctx) => {
|
|
73
|
+
const projectId = ctx.opts?.all ? undefined : ctx.opts?.projectId || ctx.project?.projectId;
|
|
74
|
+
return projectId ? `/projects/${encodeURIComponent(projectId)}/evals` : undefined;
|
|
75
|
+
},
|
|
76
|
+
async handler(ctx) {
|
|
77
|
+
const { apiClient, project, opts, options } = ctx;
|
|
78
|
+
|
|
79
|
+
const projectId = opts.all ? undefined : opts.projectId || project?.projectId;
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
const evals = await evalList(apiClient, {
|
|
83
|
+
projectId,
|
|
84
|
+
agentId: opts.agentId,
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
const result = evals.map((e) => ({
|
|
88
|
+
id: e.id,
|
|
89
|
+
name: e.name,
|
|
90
|
+
identifier: e.identifier,
|
|
91
|
+
agent_identifier: e.agentIdentifier,
|
|
92
|
+
created_at: e.createdAt,
|
|
93
|
+
updated_at: e.updatedAt,
|
|
94
|
+
project_id: e.projectId,
|
|
95
|
+
description: e.description,
|
|
96
|
+
devmode: e.devmode,
|
|
97
|
+
}));
|
|
98
|
+
|
|
99
|
+
if (options.json) {
|
|
100
|
+
return result;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (evals.length === 0) {
|
|
104
|
+
tui.info('No evals found.');
|
|
105
|
+
return [];
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const tableData = evals.map((e) => ({
|
|
109
|
+
ID: e.id,
|
|
110
|
+
Name: e.name.length > 30 ? e.name.substring(0, 27) + '...' : e.name,
|
|
111
|
+
Agent: e.agentIdentifier || '-',
|
|
112
|
+
Devmode: e.devmode ? '✓' : '✗',
|
|
113
|
+
Created: new Date(e.createdAt).toLocaleString(),
|
|
114
|
+
}));
|
|
115
|
+
|
|
116
|
+
tui.table(tableData, [
|
|
117
|
+
{ name: 'ID', alignment: 'left' },
|
|
118
|
+
{ name: 'Name', alignment: 'left' },
|
|
119
|
+
{ name: 'Agent', alignment: 'left' },
|
|
120
|
+
{ name: 'Devmode', alignment: 'center' },
|
|
121
|
+
{ name: 'Created', alignment: 'left' },
|
|
122
|
+
]);
|
|
123
|
+
|
|
124
|
+
return result;
|
|
125
|
+
} catch (ex) {
|
|
126
|
+
tui.fatal(`Failed to list evals: ${ex}`, ErrorCode.API_ERROR);
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
});
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { createSubcommand } from '../../../types';
|
|
3
|
+
import * as tui from '../../../tui';
|
|
4
|
+
import { APIError, evalRunGet } from '@agentuity/server';
|
|
5
|
+
import { getCommand } from '../../../command-prefix';
|
|
6
|
+
import { ErrorCode } from '../../../errors';
|
|
7
|
+
|
|
8
|
+
const EvalRunGetResponseSchema = z.object({
|
|
9
|
+
id: z.string().describe('Eval run ID'),
|
|
10
|
+
eval_id: z.string().describe('Eval ID'),
|
|
11
|
+
eval_name: z.string().nullable().describe('Eval name'),
|
|
12
|
+
agent_identifier: z.string().nullable().describe('Agent identifier'),
|
|
13
|
+
session_id: z.string().describe('Session ID'),
|
|
14
|
+
created_at: z.string().describe('Creation timestamp'),
|
|
15
|
+
updated_at: z.string().describe('Last updated timestamp'),
|
|
16
|
+
project_id: z.string().describe('Project ID'),
|
|
17
|
+
org_id: z.string().describe('Organization ID'),
|
|
18
|
+
deployment_id: z.string().nullable().describe('Deployment ID'),
|
|
19
|
+
devmode: z.boolean().describe('Whether this is a devmode run'),
|
|
20
|
+
pending: z.boolean().describe('Whether the eval run is pending'),
|
|
21
|
+
success: z.boolean().describe('Whether the eval run succeeded'),
|
|
22
|
+
error: z.string().nullable().describe('Error message if failed'),
|
|
23
|
+
result: z.any().nullable().describe('Eval run result'),
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
export const getSubcommand = createSubcommand({
|
|
27
|
+
name: 'get',
|
|
28
|
+
description: 'Get details about a specific eval run',
|
|
29
|
+
tags: ['read-only', 'fast', 'requires-auth'],
|
|
30
|
+
examples: [
|
|
31
|
+
{
|
|
32
|
+
command: getCommand('cloud eval-run get evalrun_abc123xyz'),
|
|
33
|
+
description: 'Get an eval run by ID',
|
|
34
|
+
},
|
|
35
|
+
],
|
|
36
|
+
requires: { auth: true, apiClient: true },
|
|
37
|
+
idempotent: true,
|
|
38
|
+
schema: {
|
|
39
|
+
args: z.object({
|
|
40
|
+
eval_run_id: z.string().describe('Eval run ID'),
|
|
41
|
+
}),
|
|
42
|
+
response: EvalRunGetResponseSchema,
|
|
43
|
+
},
|
|
44
|
+
async handler(ctx) {
|
|
45
|
+
const { apiClient, args, options } = ctx;
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
const evalRunData = await evalRunGet(apiClient, args.eval_run_id);
|
|
49
|
+
|
|
50
|
+
const result = {
|
|
51
|
+
id: evalRunData.id,
|
|
52
|
+
eval_id: evalRunData.evalId,
|
|
53
|
+
eval_name: evalRunData.evalName,
|
|
54
|
+
agent_identifier: evalRunData.agentIdentifier,
|
|
55
|
+
session_id: evalRunData.sessionId,
|
|
56
|
+
created_at: evalRunData.createdAt,
|
|
57
|
+
updated_at: evalRunData.updatedAt,
|
|
58
|
+
project_id: evalRunData.projectId,
|
|
59
|
+
org_id: evalRunData.orgId,
|
|
60
|
+
deployment_id: evalRunData.deploymentId,
|
|
61
|
+
devmode: evalRunData.devmode,
|
|
62
|
+
pending: evalRunData.pending,
|
|
63
|
+
success: evalRunData.success,
|
|
64
|
+
error: evalRunData.error,
|
|
65
|
+
result: evalRunData.result,
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
if (options.json) {
|
|
69
|
+
return result;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const tableData: Record<string, string> = {
|
|
73
|
+
ID: evalRunData.id,
|
|
74
|
+
'Eval ID': evalRunData.evalId,
|
|
75
|
+
};
|
|
76
|
+
if (evalRunData.evalName) {
|
|
77
|
+
tableData['Eval Name'] = evalRunData.evalName;
|
|
78
|
+
}
|
|
79
|
+
if (evalRunData.agentIdentifier) {
|
|
80
|
+
tableData['Agent'] = evalRunData.agentIdentifier;
|
|
81
|
+
}
|
|
82
|
+
tableData['Session ID'] = evalRunData.sessionId;
|
|
83
|
+
tableData['Project'] = evalRunData.projectId;
|
|
84
|
+
tableData['Organization'] = evalRunData.orgId;
|
|
85
|
+
tableData['Devmode'] = evalRunData.devmode ? '✓ Yes' : '✗ No';
|
|
86
|
+
tableData['Success'] = evalRunData.success ? tui.colorSuccess('✓') : tui.colorError('✗');
|
|
87
|
+
tableData['Pending'] = evalRunData.pending ? '⏳ Yes' : '✓ No';
|
|
88
|
+
if (evalRunData.result?.reason) {
|
|
89
|
+
tableData['Reason'] = evalRunData.result.reason;
|
|
90
|
+
}
|
|
91
|
+
if (evalRunData.error) {
|
|
92
|
+
tableData['Error'] = tui.colorError(evalRunData.error);
|
|
93
|
+
}
|
|
94
|
+
tableData['Created'] = new Date(evalRunData.createdAt).toLocaleString();
|
|
95
|
+
tableData['Updated'] = new Date(evalRunData.updatedAt).toLocaleString();
|
|
96
|
+
|
|
97
|
+
tui.table([tableData], Object.keys(tableData), { layout: 'vertical', padStart: ' ' });
|
|
98
|
+
|
|
99
|
+
if (evalRunData.result && Object.keys(evalRunData.result).length > 0) {
|
|
100
|
+
console.log('');
|
|
101
|
+
console.log(tui.bold('Result:'));
|
|
102
|
+
console.log(JSON.stringify(evalRunData.result, null, 2));
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return result;
|
|
106
|
+
} catch (ex) {
|
|
107
|
+
if (ex instanceof APIError && ex.status === 404) {
|
|
108
|
+
tui.fatal(`Eval run ${args.eval_run_id} not found`, ErrorCode.RESOURCE_NOT_FOUND);
|
|
109
|
+
}
|
|
110
|
+
tui.fatal(`Failed to get eval run: ${ex}`, ErrorCode.API_ERROR);
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
});
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { createCommand } from '../../../types';
|
|
2
|
+
import { getSubcommand } from './get';
|
|
3
|
+
import { listSubcommand } from './list';
|
|
4
|
+
import { getCommand } from '../../../command-prefix';
|
|
5
|
+
|
|
6
|
+
export const evalRunCommand = createCommand({
|
|
7
|
+
name: 'eval-run',
|
|
8
|
+
description: 'Manage eval runs',
|
|
9
|
+
tags: ['requires-auth'],
|
|
10
|
+
examples: [
|
|
11
|
+
{ command: getCommand('cloud eval-run list'), description: 'List all eval runs' },
|
|
12
|
+
{ command: getCommand('cloud eval-run get <id>'), description: 'Get eval run details' },
|
|
13
|
+
],
|
|
14
|
+
subcommands: [getSubcommand, listSubcommand],
|
|
15
|
+
});
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { createSubcommand } from '../../../types';
|
|
3
|
+
import * as tui from '../../../tui';
|
|
4
|
+
import { getCommand } from '../../../command-prefix';
|
|
5
|
+
import { ErrorCode } from '../../../errors';
|
|
6
|
+
import { evalRunList } from '@agentuity/server';
|
|
7
|
+
|
|
8
|
+
const EvalRunListResponseSchema = z.array(
|
|
9
|
+
z.object({
|
|
10
|
+
id: z.string().describe('Eval run ID'),
|
|
11
|
+
eval_id: z.string().describe('Eval ID'),
|
|
12
|
+
eval_name: z.string().nullable().describe('Eval name'),
|
|
13
|
+
agent_identifier: z.string().nullable().describe('Agent identifier'),
|
|
14
|
+
session_id: z.string().describe('Session ID'),
|
|
15
|
+
created_at: z.string().describe('Creation timestamp'),
|
|
16
|
+
pending: z.boolean().describe('Whether the eval run is pending'),
|
|
17
|
+
success: z.boolean().describe('Whether the eval run succeeded'),
|
|
18
|
+
error: z.string().nullable().describe('Error message if failed'),
|
|
19
|
+
reason: z.string().nullable().describe('Reason for the result'),
|
|
20
|
+
})
|
|
21
|
+
);
|
|
22
|
+
|
|
23
|
+
export const listSubcommand = createSubcommand({
|
|
24
|
+
name: 'list',
|
|
25
|
+
description: 'List eval runs',
|
|
26
|
+
tags: ['read-only', 'fast', 'requires-auth'],
|
|
27
|
+
examples: [
|
|
28
|
+
{ command: getCommand('cloud eval-run list'), description: 'List 10 most recent eval runs' },
|
|
29
|
+
{
|
|
30
|
+
command: getCommand('cloud eval-run list --count=25'),
|
|
31
|
+
description: 'List 25 most recent eval runs',
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
command: getCommand('cloud eval-run list --eval-id=eval_*'),
|
|
35
|
+
description: 'Filter by eval',
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
command: getCommand('cloud eval-run list --session-id=sess_*'),
|
|
39
|
+
description: 'Filter by session',
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
command: getCommand('cloud eval-run list --project-id=proj_*'),
|
|
43
|
+
description: 'Filter by project',
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
command: getCommand('cloud eval-run list --agent-id=agent_*'),
|
|
47
|
+
description: 'Filter by agent',
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
command: getCommand('cloud eval-run list --all'),
|
|
51
|
+
description: 'List all eval runs regardless of project context',
|
|
52
|
+
},
|
|
53
|
+
],
|
|
54
|
+
aliases: ['ls'],
|
|
55
|
+
requires: { auth: true, apiClient: true },
|
|
56
|
+
optional: { project: true },
|
|
57
|
+
idempotent: true,
|
|
58
|
+
pagination: {
|
|
59
|
+
supported: true,
|
|
60
|
+
defaultLimit: 10,
|
|
61
|
+
maxLimit: 100,
|
|
62
|
+
parameters: {
|
|
63
|
+
limit: 'count',
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
schema: {
|
|
67
|
+
options: z.object({
|
|
68
|
+
count: z.coerce
|
|
69
|
+
.number()
|
|
70
|
+
.int()
|
|
71
|
+
.min(1)
|
|
72
|
+
.max(100)
|
|
73
|
+
.default(10)
|
|
74
|
+
.describe('Number of eval runs to list (1–100)'),
|
|
75
|
+
projectId: z.string().optional().describe('Filter by project ID'),
|
|
76
|
+
all: z.boolean().optional().describe('List all eval runs regardless of project context'),
|
|
77
|
+
evalId: z.string().optional().describe('Filter by eval ID'),
|
|
78
|
+
agentId: z.string().optional().describe('Filter by agent ID'),
|
|
79
|
+
sessionId: z.string().optional().describe('Filter by session ID'),
|
|
80
|
+
}),
|
|
81
|
+
response: EvalRunListResponseSchema,
|
|
82
|
+
},
|
|
83
|
+
webUrl: (ctx) => {
|
|
84
|
+
const projectId = ctx.opts?.all ? undefined : ctx.opts?.projectId || ctx.project?.projectId;
|
|
85
|
+
return projectId ? `/projects/${encodeURIComponent(projectId)}/eval-runs` : undefined;
|
|
86
|
+
},
|
|
87
|
+
async handler(ctx) {
|
|
88
|
+
const { apiClient, project, opts, options } = ctx;
|
|
89
|
+
|
|
90
|
+
const projectId = opts.all ? undefined : opts.projectId || project?.projectId;
|
|
91
|
+
|
|
92
|
+
try {
|
|
93
|
+
const evalRuns = await evalRunList(apiClient, {
|
|
94
|
+
projectId,
|
|
95
|
+
evalId: opts.evalId,
|
|
96
|
+
sessionId: opts.sessionId,
|
|
97
|
+
agentId: opts.agentId,
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
const result = evalRuns.map((r) => ({
|
|
101
|
+
id: r.id,
|
|
102
|
+
eval_id: r.evalId,
|
|
103
|
+
eval_name: r.evalName,
|
|
104
|
+
agent_identifier: r.agentIdentifier,
|
|
105
|
+
session_id: r.sessionId,
|
|
106
|
+
created_at: r.createdAt,
|
|
107
|
+
pending: r.pending,
|
|
108
|
+
success: r.success,
|
|
109
|
+
error: r.error,
|
|
110
|
+
reason: r.result?.reason ?? null,
|
|
111
|
+
}));
|
|
112
|
+
|
|
113
|
+
if (options.json) {
|
|
114
|
+
return result;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (evalRuns.length === 0) {
|
|
118
|
+
tui.info('No eval runs found.');
|
|
119
|
+
return [];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const tableData = evalRuns.map((r) => {
|
|
123
|
+
const reason = r.result?.reason;
|
|
124
|
+
return {
|
|
125
|
+
ID: r.id,
|
|
126
|
+
'Eval Name': r.evalName || '-',
|
|
127
|
+
Agent: r.agentIdentifier || '-',
|
|
128
|
+
Success: r.success ? '✓' : '✗',
|
|
129
|
+
Pending: r.pending ? '⏳' : '✓',
|
|
130
|
+
Reason: reason ? (reason.length > 30 ? reason.substring(0, 27) + '...' : reason) : '-',
|
|
131
|
+
Created: new Date(r.createdAt).toLocaleString(),
|
|
132
|
+
};
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
tui.table(tableData, [
|
|
136
|
+
{ name: 'ID', alignment: 'left' },
|
|
137
|
+
{ name: 'Eval Name', alignment: 'left' },
|
|
138
|
+
{ name: 'Agent', alignment: 'left' },
|
|
139
|
+
{ name: 'Success', alignment: 'center' },
|
|
140
|
+
{ name: 'Pending', alignment: 'center' },
|
|
141
|
+
{ name: 'Reason', alignment: 'left' },
|
|
142
|
+
{ name: 'Created', alignment: 'left' },
|
|
143
|
+
]);
|
|
144
|
+
|
|
145
|
+
return result;
|
|
146
|
+
} catch (ex) {
|
|
147
|
+
tui.fatal(`Failed to list eval runs: ${ex}`, ErrorCode.API_ERROR);
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
});
|
package/src/cmd/cloud/index.ts
CHANGED
|
@@ -18,6 +18,8 @@ import vectorCommand from './vector';
|
|
|
18
18
|
import sandboxCommand from './sandbox';
|
|
19
19
|
import { regionSubcommand } from './region';
|
|
20
20
|
import { machineCommand } from './machine';
|
|
21
|
+
import { evalCommand } from './eval';
|
|
22
|
+
import { evalRunCommand } from './eval-run';
|
|
21
23
|
import { getCommand } from '../../command-prefix';
|
|
22
24
|
|
|
23
25
|
export const command = createCommand({
|
|
@@ -38,6 +40,8 @@ export const command = createCommand({
|
|
|
38
40
|
vectorCommand,
|
|
39
41
|
sandboxCommand,
|
|
40
42
|
envCommand,
|
|
43
|
+
evalCommand,
|
|
44
|
+
evalRunCommand,
|
|
41
45
|
deploySubcommand,
|
|
42
46
|
dbCommand,
|
|
43
47
|
redisCommand,
|
|
@@ -14,6 +14,10 @@ const MachineListResponseSchema = z.array(
|
|
|
14
14
|
region: z.string().describe('Region'),
|
|
15
15
|
orgName: z.string().nullable().optional().describe('Organization name'),
|
|
16
16
|
createdAt: z.string().describe('Creation timestamp'),
|
|
17
|
+
privateIPv4: z.string().nullable().optional().describe('Private IPv4 address'),
|
|
18
|
+
availabilityZone: z.string().nullable().optional().describe('Availability zone'),
|
|
19
|
+
instanceType: z.string().nullable().optional().describe('Instance type'),
|
|
20
|
+
instanceTags: z.array(z.string()).nullable().optional().describe('Instance tags'),
|
|
17
21
|
})
|
|
18
22
|
);
|
|
19
23
|
|
|
@@ -48,6 +52,10 @@ export const listSubcommand = createSubcommand({
|
|
|
48
52
|
region: m.region,
|
|
49
53
|
orgName: m.orgName ?? undefined,
|
|
50
54
|
createdAt: m.createdAt,
|
|
55
|
+
privateIPv4: m.privateIPv4 ?? undefined,
|
|
56
|
+
availabilityZone: m.availabilityZone ?? undefined,
|
|
57
|
+
instanceType: m.instanceType ?? undefined,
|
|
58
|
+
instanceTags: m.instanceTags ?? undefined,
|
|
51
59
|
}));
|
|
52
60
|
|
|
53
61
|
if (!options.json) {
|
|
@@ -59,6 +67,10 @@ export const listSubcommand = createSubcommand({
|
|
|
59
67
|
Status: m.status,
|
|
60
68
|
Provider: m.provider,
|
|
61
69
|
Region: m.region,
|
|
70
|
+
AZ: m.availabilityZone ?? '-',
|
|
71
|
+
Type: m.instanceType ?? '-',
|
|
72
|
+
'Private IP': m.privateIPv4 ?? '-',
|
|
73
|
+
Tags: m.instanceTags?.join(', ') || '-',
|
|
62
74
|
Created: new Date(m.createdAt).toLocaleString(),
|
|
63
75
|
}));
|
|
64
76
|
|
|
@@ -67,6 +79,10 @@ export const listSubcommand = createSubcommand({
|
|
|
67
79
|
{ name: 'Status', alignment: 'left' },
|
|
68
80
|
{ name: 'Provider', alignment: 'left' },
|
|
69
81
|
{ name: 'Region', alignment: 'left' },
|
|
82
|
+
{ name: 'AZ', alignment: 'left' },
|
|
83
|
+
{ name: 'Type', alignment: 'left' },
|
|
84
|
+
{ name: 'Private IP', alignment: 'left' },
|
|
85
|
+
{ name: 'Tags', alignment: 'left' },
|
|
70
86
|
{ name: 'Created', alignment: 'left' },
|
|
71
87
|
]);
|
|
72
88
|
}
|
|
@@ -19,7 +19,8 @@ const DlqListResponseSchema = z.object({
|
|
|
19
19
|
offset: z.number(),
|
|
20
20
|
failure_reason: z.string().nullable(),
|
|
21
21
|
delivery_attempts: z.number(),
|
|
22
|
-
moved_at: z.string(),
|
|
22
|
+
moved_at: z.string().nullable(),
|
|
23
|
+
created_at: z.string(),
|
|
23
24
|
})
|
|
24
25
|
),
|
|
25
26
|
total: z.number().optional(),
|
|
@@ -65,14 +66,17 @@ const listDlqSubcommand = createSubcommand({
|
|
|
65
66
|
if (result.messages.length === 0) {
|
|
66
67
|
tui.info('No messages in dead letter queue');
|
|
67
68
|
} else {
|
|
68
|
-
const tableData = result.messages.map((m: DeadLetterMessage) =>
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
69
|
+
const tableData = result.messages.map((m: DeadLetterMessage) => {
|
|
70
|
+
const timestamp = m.moved_at ?? m.original_published_at ?? m.published_at ?? m.created_at;
|
|
71
|
+
return {
|
|
72
|
+
ID: m.id.substring(0, 8) + '...',
|
|
73
|
+
Offset: m.offset,
|
|
74
|
+
Reason: m.failure_reason?.substring(0, 30) || 'Unknown',
|
|
75
|
+
Attempts: m.delivery_attempts,
|
|
76
|
+
'Failed At': timestamp ? new Date(timestamp).toLocaleString() : 'N/A',
|
|
77
|
+
};
|
|
78
|
+
});
|
|
79
|
+
tui.table(tableData, ['ID', 'Offset', 'Reason', 'Attempts', 'Failed At']);
|
|
76
80
|
}
|
|
77
81
|
}
|
|
78
82
|
|
|
@@ -82,7 +86,8 @@ const listDlqSubcommand = createSubcommand({
|
|
|
82
86
|
offset: m.offset,
|
|
83
87
|
failure_reason: m.failure_reason ?? null,
|
|
84
88
|
delivery_attempts: m.delivery_attempts,
|
|
85
|
-
moved_at: m.moved_at,
|
|
89
|
+
moved_at: m.moved_at ?? null,
|
|
90
|
+
created_at: m.created_at,
|
|
86
91
|
})),
|
|
87
92
|
total: result.total,
|
|
88
93
|
};
|
|
@@ -811,7 +811,11 @@ export const buildSubcommand = createCommand({
|
|
|
811
811
|
clearOnError: true,
|
|
812
812
|
callback: async (updateProgress) => {
|
|
813
813
|
const uploadFile = Bun.file(uploadPath);
|
|
814
|
-
const progressStream = createProgressStream(
|
|
814
|
+
const progressStream = createProgressStream(
|
|
815
|
+
uploadFile,
|
|
816
|
+
uploadSize,
|
|
817
|
+
updateProgress
|
|
818
|
+
);
|
|
815
819
|
await snapshotUpload(client, {
|
|
816
820
|
snapshotId: initResult.snapshotId!,
|
|
817
821
|
body: progressStream,
|
|
@@ -55,7 +55,11 @@ export const createSubcommand = createCommand({
|
|
|
55
55
|
.describe('Display name for the snapshot (letters, numbers, underscores, dashes only)'),
|
|
56
56
|
description: z.string().optional().describe('Description of the snapshot'),
|
|
57
57
|
tag: z.string().optional().describe('Tag for the snapshot (defaults to "latest")'),
|
|
58
|
-
public: z
|
|
58
|
+
public: z
|
|
59
|
+
.boolean()
|
|
60
|
+
.optional()
|
|
61
|
+
.default(false)
|
|
62
|
+
.describe('Make the snapshot publicly accessible'),
|
|
59
63
|
}),
|
|
60
64
|
response: SnapshotCreateResponseSchema,
|
|
61
65
|
},
|
|
@@ -13,7 +13,7 @@ export const uploadSubcommand = createSubcommand({
|
|
|
13
13
|
name: 'upload',
|
|
14
14
|
aliases: ['put'],
|
|
15
15
|
description: 'Upload a file to storage bucket',
|
|
16
|
-
tags: ['write', 'requires-auth'],
|
|
16
|
+
tags: ['write', 'requires-auth', 'uses-stdin'],
|
|
17
17
|
requires: { auth: true },
|
|
18
18
|
optional: { org: true },
|
|
19
19
|
idempotent: false,
|
|
@@ -22,7 +22,9 @@ const VectorNamespaceStatsSchema = z.object({
|
|
|
22
22
|
});
|
|
23
23
|
|
|
24
24
|
const VectorStatsPaginatedSchema = z.object({
|
|
25
|
-
namespaces: z
|
|
25
|
+
namespaces: z
|
|
26
|
+
.record(z.string(), VectorNamespaceStatsSchema)
|
|
27
|
+
.describe('Map of namespace names to their statistics'),
|
|
26
28
|
total: z.number().describe('Total number of namespaces across all pages'),
|
|
27
29
|
limit: z.number().describe('Number of namespaces requested per page'),
|
|
28
30
|
offset: z.number().describe('Number of namespaces skipped'),
|
|
@@ -24,7 +24,7 @@ export const upsertSubcommand = createCommand({
|
|
|
24
24
|
name: 'upsert',
|
|
25
25
|
aliases: ['put', 'add'],
|
|
26
26
|
description: 'Add or update vectors in the vector storage',
|
|
27
|
-
tags: ['mutating', 'updates-resource', 'slow', 'requires-auth'],
|
|
27
|
+
tags: ['mutating', 'updates-resource', 'slow', 'requires-auth', 'uses-stdin'],
|
|
28
28
|
idempotent: true,
|
|
29
29
|
requires: { auth: true, region: true },
|
|
30
30
|
optional: { project: true },
|