@agents-at-scale/ark 0.1.55 → 0.1.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/arkServices.js +14 -0
- package/dist/commands/completion/index.js +9 -6
- package/dist/commands/export/index.js +0 -1
- package/dist/commands/generate/generators/team.js +0 -1
- package/dist/commands/install/index.js +33 -30
- package/dist/commands/marketplace/index.js +18 -3
- package/dist/commands/models/create.js +1 -0
- package/dist/commands/models/kubernetes/manifest-builder.js +22 -0
- package/dist/commands/models/providers/anthropic.d.ts +15 -0
- package/dist/commands/models/providers/anthropic.js +72 -0
- package/dist/commands/models/providers/factory.js +3 -0
- package/dist/commands/models/providers/index.d.ts +3 -4
- package/dist/commands/models/providers/index.js +1 -0
- package/dist/commands/uninstall/index.js +8 -2
- package/dist/components/ChatUI.js +4 -17
- package/dist/index.js +0 -2
- package/dist/lib/arkApiClient.d.ts +14 -4
- package/dist/lib/arkApiClient.js +51 -34
- package/dist/lib/chatClient.d.ts +4 -6
- package/dist/lib/chatClient.js +136 -102
- package/dist/lib/errors.d.ts +0 -1
- package/dist/lib/errors.js +0 -1
- package/dist/lib/marketplaceFetcher.d.ts +1 -0
- package/dist/lib/marketplaceFetcher.js +17 -0
- package/dist/lib/types.d.ts +0 -38
- package/dist/marketplaceServices.d.ts +6 -1
- package/dist/marketplaceServices.js +19 -3
- package/dist/types/arkService.d.ts +1 -0
- package/dist/types/marketplace.d.ts +1 -1
- package/package.json +5 -3
- package/templates/marketplace/marketplace.json.example +2 -2
- package/templates/tool/uv.lock +794 -95
- package/dist/commands/evaluation/index.d.ts +0 -3
- package/dist/commands/evaluation/index.js +0 -60
- package/dist/lib/executeEvaluation.d.ts +0 -16
- package/dist/lib/executeEvaluation.js +0 -155
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
import { Command } from 'commander';
|
|
2
|
-
import chalk from 'chalk';
|
|
3
|
-
import { executeDirectEvaluation, executeQueryEvaluation, } from '../../lib/executeEvaluation.js';
|
|
4
|
-
import { readStdin } from '../../lib/stdin.js';
|
|
5
|
-
export function createEvaluationCommand(_) {
|
|
6
|
-
const evaluationCommand = new Command('evaluation');
|
|
7
|
-
evaluationCommand
|
|
8
|
-
.description('Execute evaluations against evaluators')
|
|
9
|
-
.argument('<evaluator-name>', 'Name of the evaluator to use')
|
|
10
|
-
.argument('[query-name]', 'Name of the query to evaluate (for query-based evaluation)')
|
|
11
|
-
.option('--input <input>', 'Input text for direct evaluation')
|
|
12
|
-
.option('--output <output>', 'Output text for direct evaluation')
|
|
13
|
-
.option('--response-target <target>', 'Response target for query evaluation (e.g., agent:my-agent)')
|
|
14
|
-
.option('--timeout <timeout>', 'Evaluation timeout (e.g., "30s", "5m")')
|
|
15
|
-
.option('--watch-timeout <timeout>', 'CLI watch timeout')
|
|
16
|
-
.action(async (evaluatorName, queryName, options) => {
|
|
17
|
-
if (options.input && options.output) {
|
|
18
|
-
await executeDirectEvaluation({
|
|
19
|
-
evaluatorName,
|
|
20
|
-
input: options.input,
|
|
21
|
-
output: options.output,
|
|
22
|
-
timeout: options.timeout,
|
|
23
|
-
watchTimeout: options.watchTimeout,
|
|
24
|
-
});
|
|
25
|
-
}
|
|
26
|
-
else if (queryName) {
|
|
27
|
-
await executeQueryEvaluation({
|
|
28
|
-
evaluatorName,
|
|
29
|
-
queryName,
|
|
30
|
-
responseTarget: options.responseTarget,
|
|
31
|
-
timeout: options.timeout,
|
|
32
|
-
watchTimeout: options.watchTimeout,
|
|
33
|
-
});
|
|
34
|
-
}
|
|
35
|
-
else {
|
|
36
|
-
const stdinQueryName = await readStdin();
|
|
37
|
-
if (stdinQueryName) {
|
|
38
|
-
await executeQueryEvaluation({
|
|
39
|
-
evaluatorName,
|
|
40
|
-
queryName: stdinQueryName,
|
|
41
|
-
responseTarget: options.responseTarget,
|
|
42
|
-
timeout: options.timeout,
|
|
43
|
-
watchTimeout: options.watchTimeout,
|
|
44
|
-
});
|
|
45
|
-
}
|
|
46
|
-
else {
|
|
47
|
-
console.error(chalk.red('Error: Must provide either:'));
|
|
48
|
-
console.error(' - --input and --output for direct evaluation');
|
|
49
|
-
console.error(' - <query-name> for query-based evaluation');
|
|
50
|
-
console.error(' - Pipe query name from stdin');
|
|
51
|
-
console.error('\nExamples:');
|
|
52
|
-
console.error(' ark evaluation my-evaluator --input "test" --output "result"');
|
|
53
|
-
console.error(' ark evaluation my-evaluator my-query');
|
|
54
|
-
console.error(' echo "my-query" | ark evaluation my-evaluator');
|
|
55
|
-
process.exit(1);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
});
|
|
59
|
-
return evaluationCommand;
|
|
60
|
-
}
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
export interface DirectEvaluationOptions {
|
|
2
|
-
evaluatorName: string;
|
|
3
|
-
input: string;
|
|
4
|
-
output: string;
|
|
5
|
-
timeout?: string;
|
|
6
|
-
watchTimeout?: string;
|
|
7
|
-
}
|
|
8
|
-
export interface QueryEvaluationOptions {
|
|
9
|
-
evaluatorName: string;
|
|
10
|
-
queryName: string;
|
|
11
|
-
responseTarget?: string;
|
|
12
|
-
timeout?: string;
|
|
13
|
-
watchTimeout?: string;
|
|
14
|
-
}
|
|
15
|
-
export declare function executeDirectEvaluation(options: DirectEvaluationOptions): Promise<void>;
|
|
16
|
-
export declare function executeQueryEvaluation(options: QueryEvaluationOptions): Promise<void>;
|
|
@@ -1,155 +0,0 @@
|
|
|
1
|
-
import { execa } from 'execa';
|
|
2
|
-
import ora from 'ora';
|
|
3
|
-
import chalk from 'chalk';
|
|
4
|
-
import output from './output.js';
|
|
5
|
-
import { ExitCodes } from './errors.js';
|
|
6
|
-
import { parseDuration } from './duration.js';
|
|
7
|
-
async function waitForEvaluationAndDisplayResults(evaluationName, watchTimeoutMs, watchTimeoutDisplay) {
|
|
8
|
-
const spinner = ora('Waiting for evaluation completion...').start();
|
|
9
|
-
try {
|
|
10
|
-
await execa('kubectl', [
|
|
11
|
-
'wait',
|
|
12
|
-
'--for=condition=Completed',
|
|
13
|
-
`evaluation/${evaluationName}`,
|
|
14
|
-
`--timeout=${Math.floor(watchTimeoutMs / 1000)}s`,
|
|
15
|
-
], { timeout: watchTimeoutMs });
|
|
16
|
-
}
|
|
17
|
-
catch (error) {
|
|
18
|
-
spinner.stop();
|
|
19
|
-
if (error instanceof Error && error.message.includes('timed out waiting')) {
|
|
20
|
-
console.error(chalk.red(`Evaluation did not complete within ${watchTimeoutDisplay}`));
|
|
21
|
-
process.exit(ExitCodes.Timeout);
|
|
22
|
-
}
|
|
23
|
-
throw error;
|
|
24
|
-
}
|
|
25
|
-
spinner.stop();
|
|
26
|
-
const { stdout } = await execa('kubectl', ['get', 'evaluation', evaluationName, '-o', 'json'], { stdio: 'pipe' });
|
|
27
|
-
const evaluation = JSON.parse(stdout);
|
|
28
|
-
const status = evaluation.status;
|
|
29
|
-
if (status?.phase === 'done') {
|
|
30
|
-
console.log(chalk.green('\nEvaluation completed successfully:'));
|
|
31
|
-
if (status.score !== undefined) {
|
|
32
|
-
console.log(`Score: ${status.score}`);
|
|
33
|
-
}
|
|
34
|
-
if (status.passed !== undefined) {
|
|
35
|
-
console.log(`Result: ${status.passed ? chalk.green('PASSED') : chalk.red('FAILED')}`);
|
|
36
|
-
}
|
|
37
|
-
if (status.message) {
|
|
38
|
-
console.log(`Message: ${status.message}`);
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
else if (status?.phase === 'error') {
|
|
42
|
-
console.error(chalk.red(status.message || 'Evaluation failed with unknown error'));
|
|
43
|
-
process.exit(ExitCodes.OperationError);
|
|
44
|
-
}
|
|
45
|
-
else {
|
|
46
|
-
output.warning(`Unexpected evaluation phase: ${status?.phase}`);
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
export async function executeDirectEvaluation(options) {
|
|
50
|
-
const spinner = ora('Creating evaluation...').start();
|
|
51
|
-
const queryTimeoutMs = options.timeout
|
|
52
|
-
? parseDuration(options.timeout)
|
|
53
|
-
: parseDuration('5m');
|
|
54
|
-
const watchTimeoutMs = options.watchTimeout
|
|
55
|
-
? parseDuration(options.watchTimeout)
|
|
56
|
-
: queryTimeoutMs + 60000;
|
|
57
|
-
const timestamp = Date.now();
|
|
58
|
-
const evaluationName = `cli-eval-${timestamp}`;
|
|
59
|
-
const evaluationManifest = {
|
|
60
|
-
apiVersion: 'ark.mckinsey.com/v1alpha1',
|
|
61
|
-
kind: 'Evaluation',
|
|
62
|
-
metadata: {
|
|
63
|
-
name: evaluationName,
|
|
64
|
-
},
|
|
65
|
-
spec: {
|
|
66
|
-
type: 'direct',
|
|
67
|
-
evaluator: {
|
|
68
|
-
name: options.evaluatorName,
|
|
69
|
-
},
|
|
70
|
-
config: {
|
|
71
|
-
input: options.input,
|
|
72
|
-
output: options.output,
|
|
73
|
-
},
|
|
74
|
-
...(options.timeout && { timeout: options.timeout }),
|
|
75
|
-
ttl: '1h',
|
|
76
|
-
},
|
|
77
|
-
};
|
|
78
|
-
try {
|
|
79
|
-
spinner.text = 'Submitting evaluation...';
|
|
80
|
-
await execa('kubectl', ['apply', '-f', '-'], {
|
|
81
|
-
input: JSON.stringify(evaluationManifest),
|
|
82
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
83
|
-
});
|
|
84
|
-
spinner.stop();
|
|
85
|
-
const watchTimeoutDisplay = options.watchTimeout ?? `${Math.floor(watchTimeoutMs / 1000)}s`;
|
|
86
|
-
await waitForEvaluationAndDisplayResults(evaluationName, watchTimeoutMs, watchTimeoutDisplay);
|
|
87
|
-
}
|
|
88
|
-
catch (error) {
|
|
89
|
-
spinner.stop();
|
|
90
|
-
console.error(chalk.red(error instanceof Error ? error.message : 'Unknown error'));
|
|
91
|
-
process.exit(ExitCodes.CliError);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
export async function executeQueryEvaluation(options) {
|
|
95
|
-
const spinner = ora('Creating evaluation...').start();
|
|
96
|
-
const queryTimeoutMs = options.timeout
|
|
97
|
-
? parseDuration(options.timeout)
|
|
98
|
-
: parseDuration('5m');
|
|
99
|
-
const watchTimeoutMs = options.watchTimeout
|
|
100
|
-
? parseDuration(options.watchTimeout)
|
|
101
|
-
: queryTimeoutMs + 60000;
|
|
102
|
-
const timestamp = Date.now();
|
|
103
|
-
const evaluationName = `cli-eval-${timestamp}`;
|
|
104
|
-
let responseTarget;
|
|
105
|
-
if (options.responseTarget) {
|
|
106
|
-
const parts = options.responseTarget.split(':');
|
|
107
|
-
if (parts.length === 2) {
|
|
108
|
-
responseTarget = {
|
|
109
|
-
type: parts[0],
|
|
110
|
-
name: parts[1],
|
|
111
|
-
};
|
|
112
|
-
}
|
|
113
|
-
else {
|
|
114
|
-
spinner.stop();
|
|
115
|
-
console.error(chalk.red('Invalid response-target format. Use: type:name (e.g., agent:my-agent)'));
|
|
116
|
-
process.exit(ExitCodes.CliError);
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
const evaluationManifest = {
|
|
120
|
-
apiVersion: 'ark.mckinsey.com/v1alpha1',
|
|
121
|
-
kind: 'Evaluation',
|
|
122
|
-
metadata: {
|
|
123
|
-
name: evaluationName,
|
|
124
|
-
},
|
|
125
|
-
spec: {
|
|
126
|
-
type: 'query',
|
|
127
|
-
evaluator: {
|
|
128
|
-
name: options.evaluatorName,
|
|
129
|
-
},
|
|
130
|
-
config: {
|
|
131
|
-
queryRef: {
|
|
132
|
-
name: options.queryName,
|
|
133
|
-
},
|
|
134
|
-
...(responseTarget && { responseTarget }),
|
|
135
|
-
},
|
|
136
|
-
...(options.timeout && { timeout: options.timeout }),
|
|
137
|
-
ttl: '1h',
|
|
138
|
-
},
|
|
139
|
-
};
|
|
140
|
-
try {
|
|
141
|
-
spinner.text = 'Submitting evaluation...';
|
|
142
|
-
await execa('kubectl', ['apply', '-f', '-'], {
|
|
143
|
-
input: JSON.stringify(evaluationManifest),
|
|
144
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
145
|
-
});
|
|
146
|
-
spinner.stop();
|
|
147
|
-
const watchTimeoutDisplay = options.watchTimeout ?? `${Math.floor(watchTimeoutMs / 1000)}s`;
|
|
148
|
-
await waitForEvaluationAndDisplayResults(evaluationName, watchTimeoutMs, watchTimeoutDisplay);
|
|
149
|
-
}
|
|
150
|
-
catch (error) {
|
|
151
|
-
spinner.stop();
|
|
152
|
-
console.error(chalk.red(error instanceof Error ? error.message : 'Unknown error'));
|
|
153
|
-
process.exit(ExitCodes.CliError);
|
|
154
|
-
}
|
|
155
|
-
}
|