@gnsx/genesys.agent.eval 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/dist/src/adapters/anthropic-adapter.d.ts +24 -0
- package/dist/src/adapters/anthropic-adapter.d.ts.map +1 -0
- package/dist/src/adapters/anthropic-adapter.js +80 -0
- package/dist/src/adapters/anthropic-adapter.js.map +1 -0
- package/dist/src/adapters/gemini-adapter.d.ts +23 -0
- package/dist/src/adapters/gemini-adapter.d.ts.map +1 -0
- package/dist/src/adapters/gemini-adapter.js +79 -0
- package/dist/src/adapters/gemini-adapter.js.map +1 -0
- package/dist/src/adapters/ollama-adapter.d.ts +28 -0
- package/dist/src/adapters/ollama-adapter.d.ts.map +1 -0
- package/dist/src/adapters/ollama-adapter.js +54 -0
- package/dist/src/adapters/ollama-adapter.js.map +1 -0
- package/dist/src/adapters/openai-adapter.d.ts +24 -0
- package/dist/src/adapters/openai-adapter.d.ts.map +1 -0
- package/dist/src/adapters/openai-adapter.js +80 -0
- package/dist/src/adapters/openai-adapter.js.map +1 -0
- package/dist/src/adapters/pi-adapter.d.ts +27 -0
- package/dist/src/adapters/pi-adapter.d.ts.map +1 -0
- package/dist/src/adapters/pi-adapter.js +136 -0
- package/dist/src/adapters/pi-adapter.js.map +1 -0
- package/dist/src/agent-adapter.d.ts +130 -0
- package/dist/src/agent-adapter.d.ts.map +1 -0
- package/dist/src/agent-adapter.js +134 -0
- package/dist/src/agent-adapter.js.map +1 -0
- package/dist/src/args.d.ts +22 -0
- package/dist/src/args.d.ts.map +1 -0
- package/dist/src/args.js +224 -0
- package/dist/src/args.js.map +1 -0
- package/dist/src/cli-runner.d.ts +39 -0
- package/dist/src/cli-runner.d.ts.map +1 -0
- package/dist/src/cli-runner.js +105 -0
- package/dist/src/cli-runner.js.map +1 -0
- package/dist/src/embedding-judge.d.ts +93 -0
- package/dist/src/embedding-judge.d.ts.map +1 -0
- package/dist/src/embedding-judge.js +160 -0
- package/dist/src/embedding-judge.js.map +1 -0
- package/dist/src/index.d.ts +15 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +20 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/judge.d.ts +95 -0
- package/dist/src/judge.d.ts.map +1 -0
- package/dist/src/judge.js +189 -0
- package/dist/src/judge.js.map +1 -0
- package/dist/src/launcher.d.ts +9 -0
- package/dist/src/launcher.d.ts.map +1 -0
- package/dist/src/launcher.js +129 -0
- package/dist/src/launcher.js.map +1 -0
- package/dist/src/reporter.d.ts +86 -0
- package/dist/src/reporter.d.ts.map +1 -0
- package/dist/src/reporter.js +384 -0
- package/dist/src/reporter.js.map +1 -0
- package/dist/src/runner.d.ts +75 -0
- package/dist/src/runner.d.ts.map +1 -0
- package/dist/src/runner.js +165 -0
- package/dist/src/runner.js.map +1 -0
- package/dist/src/test-loader.d.ts +66 -0
- package/dist/src/test-loader.d.ts.map +1 -0
- package/dist/src/test-loader.js +140 -0
- package/dist/src/test-loader.js.map +1 -0
- package/dist/src/types.d.ts +161 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +7 -0
- package/dist/src/types.js.map +1 -0
- package/dist/src/utils/package.d.ts +16 -0
- package/dist/src/utils/package.d.ts.map +1 -0
- package/dist/src/utils/package.js +30 -0
- package/dist/src/utils/package.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/examples/basic-tests.yaml +22 -0
- package/package.json +41 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent adapter interface and factory for creating adapters.
|
|
3
|
+
*
|
|
4
|
+
* The adapter pattern allows the eval harness to work with different
|
|
5
|
+
* agent backends (pi framework, direct Anthropic, OpenAI, etc.) through
|
|
6
|
+
* a common interface.
|
|
7
|
+
*
|
|
8
|
+
* @module agent-adapter
|
|
9
|
+
*/
|
|
10
|
+
import type { AgentConfig, AgentResponse, TestCase } from './types.js';
|
|
11
|
+
/**
|
|
12
|
+
* Interface that all agent adapters must implement.
|
|
13
|
+
*
|
|
14
|
+
* Adapters are responsible for:
|
|
15
|
+
* - Initializing the agent with configuration
|
|
16
|
+
* - Running test cases and returning responses
|
|
17
|
+
* - Cleaning up resources when done
|
|
18
|
+
*/
|
|
19
|
+
export interface AgentAdapter {
|
|
20
|
+
/** Human-readable name of this adapter */
|
|
21
|
+
readonly name: string;
|
|
22
|
+
/** Model identifier being used */
|
|
23
|
+
readonly model: string;
|
|
24
|
+
/**
|
|
25
|
+
* Initialize the adapter with the given configuration.
|
|
26
|
+
*
|
|
27
|
+
* @param config - Agent configuration
|
|
28
|
+
* @throws Error if initialization fails
|
|
29
|
+
*/
|
|
30
|
+
initialize(config: AgentConfig): Promise<void>;
|
|
31
|
+
/**
|
|
32
|
+
* Run a test case through the agent.
|
|
33
|
+
*
|
|
34
|
+
* @param test - The test case to run
|
|
35
|
+
* @param suiteContext - Optional global context from the test suite
|
|
36
|
+
* @returns The agent's response
|
|
37
|
+
* @throws Error if execution fails
|
|
38
|
+
*/
|
|
39
|
+
run(test: TestCase, suiteContext?: string): Promise<AgentResponse>;
|
|
40
|
+
/**
|
|
41
|
+
* Clean up any resources used by the adapter.
|
|
42
|
+
*/
|
|
43
|
+
dispose(): Promise<void>;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Factory function type for creating agent adapters.
|
|
47
|
+
*/
|
|
48
|
+
export type AdapterFactory = () => AgentAdapter;
|
|
49
|
+
/**
|
|
50
|
+
* Register an adapter factory for a given agent name.
|
|
51
|
+
*
|
|
52
|
+
* @param name - The agent name/identifier
|
|
53
|
+
* @param factory - Factory function that creates the adapter
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```typescript
|
|
57
|
+
* registerAdapter('anthropic', () => new AnthropicAdapter());
|
|
58
|
+
* ```
|
|
59
|
+
*/
|
|
60
|
+
export declare function registerAdapter(name: string, factory: AdapterFactory): void;
|
|
61
|
+
/**
|
|
62
|
+
* Create an agent adapter by name.
|
|
63
|
+
*
|
|
64
|
+
* @param name - The agent name (e.g., 'pi', 'anthropic', 'openai')
|
|
65
|
+
* @returns A new adapter instance
|
|
66
|
+
* @throws Error if the adapter name is not registered
|
|
67
|
+
*
|
|
68
|
+
* @example
|
|
69
|
+
* ```typescript
|
|
70
|
+
* const adapter = createAdapter('anthropic');
|
|
71
|
+
* await adapter.initialize({ cwd: process.cwd(), model: 'claude-3-5-sonnet' });
|
|
72
|
+
* ```
|
|
73
|
+
*/
|
|
74
|
+
export declare function createAdapter(name: string): AgentAdapter;
|
|
75
|
+
/**
|
|
76
|
+
* Get a list of available adapter names.
|
|
77
|
+
*
|
|
78
|
+
* @returns Array of registered adapter names
|
|
79
|
+
*/
|
|
80
|
+
export declare function getAvailableAdapters(): string[];
|
|
81
|
+
/**
|
|
82
|
+
* Check if an adapter is registered for the given name.
|
|
83
|
+
*
|
|
84
|
+
* @param name - The adapter name to check
|
|
85
|
+
* @returns True if the adapter is available
|
|
86
|
+
*/
|
|
87
|
+
export declare function isAdapterAvailable(name: string): boolean;
|
|
88
|
+
/**
|
|
89
|
+
* Error thrown when an adapter operation fails.
|
|
90
|
+
*/
|
|
91
|
+
export declare class AdapterError extends Error {
|
|
92
|
+
readonly adapterName: string;
|
|
93
|
+
readonly cause?: unknown | undefined;
|
|
94
|
+
constructor(message: string, adapterName: string, cause?: unknown | undefined);
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Error thrown when an adapter is not properly initialized.
|
|
98
|
+
*/
|
|
99
|
+
export declare class AdapterNotInitializedError extends AdapterError {
|
|
100
|
+
constructor(adapterName: string);
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Base class for agent adapters with common functionality.
|
|
104
|
+
*
|
|
105
|
+
* Extend this class when implementing new adapters.
|
|
106
|
+
*/
|
|
107
|
+
export declare abstract class BaseAgentAdapter implements AgentAdapter {
|
|
108
|
+
protected _config?: AgentConfig;
|
|
109
|
+
protected _initialized: boolean;
|
|
110
|
+
abstract readonly name: string;
|
|
111
|
+
abstract readonly model: string;
|
|
112
|
+
/**
|
|
113
|
+
* Build the full prompt from test case and suite context.
|
|
114
|
+
*
|
|
115
|
+
* @param test - The test case
|
|
116
|
+
* @param suiteContext - Optional global context
|
|
117
|
+
* @returns The combined prompt string
|
|
118
|
+
*/
|
|
119
|
+
protected buildPrompt(test: TestCase, suiteContext?: string): string;
|
|
120
|
+
initialize(config: AgentConfig): Promise<void>;
|
|
121
|
+
abstract run(test: TestCase, suiteContext?: string): Promise<AgentResponse>;
|
|
122
|
+
dispose(): Promise<void>;
|
|
123
|
+
/**
|
|
124
|
+
* Assert that the adapter is initialized.
|
|
125
|
+
*
|
|
126
|
+
* @throws AdapterNotInitializedError if not initialized
|
|
127
|
+
*/
|
|
128
|
+
protected assertInitialized(): void;
|
|
129
|
+
}
|
|
130
|
+
//# sourceMappingURL=agent-adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-adapter.d.ts","sourceRoot":"","sources":["../../src/agent-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEvE;;;;;;;GAOG;AACH,MAAM,WAAW,YAAY;IAC3B,0CAA0C;IAC1C,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAEtB,kCAAkC;IAClC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAEvB;;;;;OAKG;IACH,UAAU,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAE/C;;;;;;;OAOG;IACH,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IAEnE;;OAEG;IACH,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG,MAAM,YAAY,CAAC;AAOhD;;;;;;;;;;GAUG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,cAAc,GAAG,IAAI,CAE3E;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAUxD;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,EAAE,CAE/C;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAExD;AAED;;GAEG;AACH,qBAAa,YAAa,SAAQ,KAAK;aAGnB,WAAW,EAAE,MAAM;aACnB,KAAK,CAAC,EAAE,OAAO;gBAF/B,OAAO,EAAE,MAAM,EACC,WAAW,EAAE,MAAM,EACnB,KAAK,CAAC,EAAE,OAAO,YAAA;CAKlC;AAED;;GAEG;AACH,qBAAa,0BAA2B,SAAQ,YAAY;gBAC9C,WAAW,EAAE,MAAM;CAOhC;AAED;;;;GAIG;AACH,8BAAsB,gBAAiB,YAAW,YAAY;IAC5D,SAAS,CAAC,OAAO,CAAC,EAAE,WAAW,CAAC;IAChC,SAAS,CAAC,YAAY,UAAS;IAE/B,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAEhC;;;;;;OAMG;IACH,SAAS,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,EAAE,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM;IAgB9D,UAAU,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAKpD,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IAErE,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAK9B;;;;OAIG;IACH,SAAS,CAAC,iBAAiB,IAAI,IAAI;CAKpC"}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent adapter interface and factory for creating adapters.
|
|
3
|
+
*
|
|
4
|
+
* The adapter pattern allows the eval harness to work with different
|
|
5
|
+
* agent backends (pi framework, direct Anthropic, OpenAI, etc.) through
|
|
6
|
+
* a common interface.
|
|
7
|
+
*
|
|
8
|
+
* @module agent-adapter
|
|
9
|
+
*/
|
|
10
|
+
/**
|
|
11
|
+
* Registry of available agent adapters.
|
|
12
|
+
*/
|
|
13
|
+
const adapterRegistry = new Map();
|
|
14
|
+
/**
|
|
15
|
+
* Register an adapter factory for a given agent name.
|
|
16
|
+
*
|
|
17
|
+
* @param name - The agent name/identifier
|
|
18
|
+
* @param factory - Factory function that creates the adapter
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* registerAdapter('anthropic', () => new AnthropicAdapter());
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export function registerAdapter(name, factory) {
|
|
26
|
+
adapterRegistry.set(name, factory);
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Create an agent adapter by name.
|
|
30
|
+
*
|
|
31
|
+
* @param name - The agent name (e.g., 'pi', 'anthropic', 'openai')
|
|
32
|
+
* @returns A new adapter instance
|
|
33
|
+
* @throws Error if the adapter name is not registered
|
|
34
|
+
*
|
|
35
|
+
* @example
|
|
36
|
+
* ```typescript
|
|
37
|
+
* const adapter = createAdapter('anthropic');
|
|
38
|
+
* await adapter.initialize({ cwd: process.cwd(), model: 'claude-3-5-sonnet' });
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export function createAdapter(name) {
|
|
42
|
+
const factory = adapterRegistry.get(name);
|
|
43
|
+
if (!factory) {
|
|
44
|
+
const available = Array.from(adapterRegistry.keys()).join(', ');
|
|
45
|
+
throw new Error(`Unknown agent adapter: "${name}". ` +
|
|
46
|
+
`Available adapters: ${available || 'none registered'}`);
|
|
47
|
+
}
|
|
48
|
+
return factory();
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Get a list of available adapter names.
|
|
52
|
+
*
|
|
53
|
+
* @returns Array of registered adapter names
|
|
54
|
+
*/
|
|
55
|
+
export function getAvailableAdapters() {
|
|
56
|
+
return Array.from(adapterRegistry.keys());
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Check if an adapter is registered for the given name.
|
|
60
|
+
*
|
|
61
|
+
* @param name - The adapter name to check
|
|
62
|
+
* @returns True if the adapter is available
|
|
63
|
+
*/
|
|
64
|
+
export function isAdapterAvailable(name) {
|
|
65
|
+
return adapterRegistry.has(name);
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Error thrown when an adapter operation fails.
|
|
69
|
+
*/
|
|
70
|
+
export class AdapterError extends Error {
|
|
71
|
+
adapterName;
|
|
72
|
+
cause;
|
|
73
|
+
constructor(message, adapterName, cause) {
|
|
74
|
+
super(message);
|
|
75
|
+
this.adapterName = adapterName;
|
|
76
|
+
this.cause = cause;
|
|
77
|
+
this.name = 'AdapterError';
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Error thrown when an adapter is not properly initialized.
|
|
82
|
+
*/
|
|
83
|
+
export class AdapterNotInitializedError extends AdapterError {
|
|
84
|
+
constructor(adapterName) {
|
|
85
|
+
super(`Adapter "${adapterName}" is not initialized. Call initialize() before using.`, adapterName);
|
|
86
|
+
this.name = 'AdapterNotInitializedError';
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Base class for agent adapters with common functionality.
|
|
91
|
+
*
|
|
92
|
+
* Extend this class when implementing new adapters.
|
|
93
|
+
*/
|
|
94
|
+
export class BaseAgentAdapter {
|
|
95
|
+
_config;
|
|
96
|
+
_initialized = false;
|
|
97
|
+
/**
|
|
98
|
+
* Build the full prompt from test case and suite context.
|
|
99
|
+
*
|
|
100
|
+
* @param test - The test case
|
|
101
|
+
* @param suiteContext - Optional global context
|
|
102
|
+
* @returns The combined prompt string
|
|
103
|
+
*/
|
|
104
|
+
buildPrompt(test, suiteContext) {
|
|
105
|
+
const parts = [];
|
|
106
|
+
if (suiteContext) {
|
|
107
|
+
parts.push('Context:', suiteContext, '');
|
|
108
|
+
}
|
|
109
|
+
if (test.context) {
|
|
110
|
+
parts.push('Specific Context:', test.context, '');
|
|
111
|
+
}
|
|
112
|
+
parts.push('Task:', test.input);
|
|
113
|
+
return parts.join('\n');
|
|
114
|
+
}
|
|
115
|
+
async initialize(config) {
|
|
116
|
+
this._config = config;
|
|
117
|
+
this._initialized = true;
|
|
118
|
+
}
|
|
119
|
+
async dispose() {
|
|
120
|
+
this._initialized = false;
|
|
121
|
+
this._config = undefined;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Assert that the adapter is initialized.
|
|
125
|
+
*
|
|
126
|
+
* @throws AdapterNotInitializedError if not initialized
|
|
127
|
+
*/
|
|
128
|
+
assertInitialized() {
|
|
129
|
+
if (!this._initialized) {
|
|
130
|
+
throw new AdapterNotInitializedError(this.name);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
//# sourceMappingURL=agent-adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-adapter.js","sourceRoot":"","sources":["../../src/agent-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAgDH;;GAEG;AACH,MAAM,eAAe,GAAG,IAAI,GAAG,EAA0B,CAAC;AAE1D;;;;;;;;;;GAUG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,OAAuB;IACnE,eAAe,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1C,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChE,MAAM,IAAI,KAAK,CACb,2BAA2B,IAAI,KAAK;YACpC,uBAAuB,SAAS,IAAI,iBAAiB,EAAE,CACxD,CAAC;IACJ,CAAC;IACD,OAAO,OAAO,EAAE,CAAC;AACnB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,oBAAoB;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;AAC5C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,MAAM,OAAO,YAAa,SAAQ,KAAK;IAGnB;IACA;IAHlB,YACE,OAAe,EACC,WAAmB,EACnB,KAAe;QAE/B,KAAK,CAAC,OAAO,CAAC,CAAC;QAHC,gBAAW,GAAX,WAAW,CAAQ;QACnB,UAAK,GAAL,KAAK,CAAU;QAG/B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,0BAA2B,SAAQ,YAAY;IAC1D,YAAY,WAAmB;QAC7B,KAAK,CACH,YAAY,WAAW,uDAAuD,EAC9E,WAAW,CACZ,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,4BAA4B,CAAC;IAC3C,CAAC;CACF;AAED;;;;GAIG;AACH,MAAM,OAAgB,gBAAgB;IAC1B,OAAO,CAAe;IACtB,YAAY,GAAG,KAAK,CAAC;IAK/B;;;;;;OAMG;IACO,WAAW,CAAC,IAAc,EAAE,YAAqB;QACzD,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,EAAE,EAAE,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACpD,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAEhC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,MAAmB;QAClC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;QACtB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;IAC3B,CAAC;IAID,KAAK,CAAC,OAAO;QACX,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;QAC1B,IAAI,CAAC,OAAO,GAAG,SAAS,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACO,iBAAiB;QACzB,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,MAAM,IAAI,0BAA0B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI argument definitions and parsing for the eval harness.
|
|
3
|
+
*
|
|
4
|
+
* @module args
|
|
5
|
+
*/
|
|
6
|
+
import type { Args } from './types.js';
|
|
7
|
+
/**
|
|
8
|
+
* Parse command line arguments for the eval harness.
|
|
9
|
+
*
|
|
10
|
+
* @param argv - Process arguments (excluding node and script path)
|
|
11
|
+
* @returns Parsed arguments
|
|
12
|
+
*/
|
|
13
|
+
export declare function parseArgs(argv: string[]): Args;
|
|
14
|
+
/**
|
|
15
|
+
* Print help message and exit.
|
|
16
|
+
*/
|
|
17
|
+
export declare function printHelp(): void;
|
|
18
|
+
/**
|
|
19
|
+
* Print version and exit.
|
|
20
|
+
*/
|
|
21
|
+
export declare function printVersion(): void;
|
|
22
|
+
//# sourceMappingURL=args.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"args.d.ts","sourceRoot":"","sources":["../../src/args.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAavC;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,IAAI,CAyK9C;AAYD;;GAEG;AACH,wBAAgB,SAAS,IAAI,IAAI,CA+BhC;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,IAAI,CAEnC"}
|
package/dist/src/args.js
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI argument definitions and parsing for the eval harness.
|
|
3
|
+
*
|
|
4
|
+
* @module args
|
|
5
|
+
*/
|
|
6
|
+
const VALID_FORMATS = ['console', 'json', 'html'];
|
|
7
|
+
const VALID_JUDGE_TYPES = ['embedding', 'llm'];
|
|
8
|
+
function isValidFormat(value) {
|
|
9
|
+
return VALID_FORMATS.includes(value);
|
|
10
|
+
}
|
|
11
|
+
function isValidJudgeType(value) {
|
|
12
|
+
return VALID_JUDGE_TYPES.includes(value);
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Parse command line arguments for the eval harness.
|
|
16
|
+
*
|
|
17
|
+
* @param argv - Process arguments (excluding node and script path)
|
|
18
|
+
* @returns Parsed arguments
|
|
19
|
+
*/
|
|
20
|
+
export function parseArgs(argv) {
|
|
21
|
+
const args = {
|
|
22
|
+
tests: './eval-tests.yaml',
|
|
23
|
+
agent: 'genesys',
|
|
24
|
+
cwd: process.cwd(),
|
|
25
|
+
timeout: 120,
|
|
26
|
+
format: 'console',
|
|
27
|
+
parallel: 1,
|
|
28
|
+
judgeType: 'embedding',
|
|
29
|
+
judgeModel: 'claude-3-5-sonnet-20241022',
|
|
30
|
+
judgeProvider: 'anthropic',
|
|
31
|
+
help: false,
|
|
32
|
+
version: false,
|
|
33
|
+
};
|
|
34
|
+
let i = 0;
|
|
35
|
+
while (i < argv.length) {
|
|
36
|
+
const arg = argv[i];
|
|
37
|
+
switch (arg) {
|
|
38
|
+
case '-h':
|
|
39
|
+
case '--help': {
|
|
40
|
+
args.help = true;
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
case '-v':
|
|
44
|
+
case '--version': {
|
|
45
|
+
args.version = true;
|
|
46
|
+
break;
|
|
47
|
+
}
|
|
48
|
+
case '--tests': {
|
|
49
|
+
const next = argv[++i];
|
|
50
|
+
if (!next) {
|
|
51
|
+
console.error('--tests requires a value');
|
|
52
|
+
process.exit(1);
|
|
53
|
+
}
|
|
54
|
+
args.tests = next;
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
case '-a':
|
|
58
|
+
case '--agent': {
|
|
59
|
+
const next = argv[++i];
|
|
60
|
+
if (!next) {
|
|
61
|
+
console.error('--agent requires a value');
|
|
62
|
+
process.exit(1);
|
|
63
|
+
}
|
|
64
|
+
args.agent = next;
|
|
65
|
+
break;
|
|
66
|
+
}
|
|
67
|
+
case '--cwd': {
|
|
68
|
+
const next = argv[++i];
|
|
69
|
+
if (!next) {
|
|
70
|
+
console.error('--cwd requires a value');
|
|
71
|
+
process.exit(1);
|
|
72
|
+
}
|
|
73
|
+
args.cwd = next;
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
case '-t':
|
|
77
|
+
case '--timeout': {
|
|
78
|
+
const next = argv[++i];
|
|
79
|
+
if (!next) {
|
|
80
|
+
console.error('--timeout requires a value');
|
|
81
|
+
process.exit(1);
|
|
82
|
+
}
|
|
83
|
+
const timeout = parseInt(next, 10);
|
|
84
|
+
if (isNaN(timeout) || timeout < 1) {
|
|
85
|
+
console.error('--timeout must be a positive integer (seconds)');
|
|
86
|
+
process.exit(1);
|
|
87
|
+
}
|
|
88
|
+
args.timeout = timeout;
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
case '-o':
|
|
92
|
+
case '--output': {
|
|
93
|
+
const next = argv[++i];
|
|
94
|
+
if (!next) {
|
|
95
|
+
console.error('--output requires a value');
|
|
96
|
+
process.exit(1);
|
|
97
|
+
}
|
|
98
|
+
args.output = next;
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
case '--format': {
|
|
102
|
+
const next = argv[++i];
|
|
103
|
+
if (!next) {
|
|
104
|
+
console.error('--format requires a value');
|
|
105
|
+
process.exit(1);
|
|
106
|
+
}
|
|
107
|
+
if (!isValidFormat(next)) {
|
|
108
|
+
console.error(`--format must be one of: ${VALID_FORMATS.join(', ')}`);
|
|
109
|
+
process.exit(1);
|
|
110
|
+
}
|
|
111
|
+
args.format = next;
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
case '-p':
|
|
115
|
+
case '--parallel': {
|
|
116
|
+
const next = argv[++i];
|
|
117
|
+
if (!next) {
|
|
118
|
+
console.error('--parallel requires a value');
|
|
119
|
+
process.exit(1);
|
|
120
|
+
}
|
|
121
|
+
const parallel = parseInt(next, 10);
|
|
122
|
+
if (isNaN(parallel) || parallel < 1) {
|
|
123
|
+
console.error('--parallel must be a positive integer');
|
|
124
|
+
process.exit(1);
|
|
125
|
+
}
|
|
126
|
+
args.parallel = parallel;
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
case '--judge-type': {
|
|
130
|
+
const next = argv[++i];
|
|
131
|
+
if (!next) {
|
|
132
|
+
console.error('--judge-type requires a value');
|
|
133
|
+
process.exit(1);
|
|
134
|
+
}
|
|
135
|
+
if (!isValidJudgeType(next)) {
|
|
136
|
+
console.error(`--judge-type must be one of: ${VALID_JUDGE_TYPES.join(', ')}`);
|
|
137
|
+
process.exit(1);
|
|
138
|
+
}
|
|
139
|
+
args.judgeType = next;
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
case '--judge-model': {
|
|
143
|
+
const next = argv[++i];
|
|
144
|
+
if (!next) {
|
|
145
|
+
console.error('--judge-model requires a value');
|
|
146
|
+
process.exit(1);
|
|
147
|
+
}
|
|
148
|
+
args.judgeModel = next;
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
case '--judge-provider': {
|
|
152
|
+
const next = argv[++i];
|
|
153
|
+
if (!next) {
|
|
154
|
+
console.error('--judge-provider requires a value');
|
|
155
|
+
process.exit(1);
|
|
156
|
+
}
|
|
157
|
+
args.judgeProvider = next;
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
default: {
|
|
161
|
+
// Unknown flags are reported as errors
|
|
162
|
+
if (arg.startsWith('-')) {
|
|
163
|
+
console.error(`Unknown option: ${arg}`);
|
|
164
|
+
console.error('Run with --help for usage information');
|
|
165
|
+
process.exit(1);
|
|
166
|
+
}
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
i++;
|
|
171
|
+
}
|
|
172
|
+
return args;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Get package version from package.json.
|
|
176
|
+
*
|
|
177
|
+
* @returns Version string
|
|
178
|
+
*/
|
|
179
|
+
function getVersion() {
|
|
180
|
+
// This will be replaced with actual version at build time
|
|
181
|
+
return '1.0.0';
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Print help message and exit.
|
|
185
|
+
*/
|
|
186
|
+
export function printHelp() {
|
|
187
|
+
const version = getVersion();
|
|
188
|
+
console.log(`genesys-eval v${version}
|
|
189
|
+
|
|
190
|
+
Usage:
|
|
191
|
+
genesys-eval [options]
|
|
192
|
+
|
|
193
|
+
Options:
|
|
194
|
+
--tests <path> Path to YAML test file (default: ./eval-tests.yaml)
|
|
195
|
+
-a, --agent <command> Agent CLI command: pi, genesys, or any custom command
|
|
196
|
+
(default: genesys). Supports commands with arguments.
|
|
197
|
+
--cwd <dir> Working directory for test context (default: cwd)
|
|
198
|
+
-t, --timeout <secs> Timeout per test in seconds (default: 120)
|
|
199
|
+
-o, --output <path> Output file for results (optional)
|
|
200
|
+
--format <format> Output format: console, json, html (default: console)
|
|
201
|
+
-p, --parallel <n> Run n tests in parallel (default: 1)
|
|
202
|
+
--judge-type <type> Judge type: embedding, llm (default: embedding)
|
|
203
|
+
embedding = fast cosine similarity (default)
|
|
204
|
+
llm = use LLM to evaluate (requires API key)
|
|
205
|
+
--judge-model <model> Model for LLM judge (default: claude-3-5-sonnet-20241022)
|
|
206
|
+
--judge-provider <p> Provider for LLM judge (default: anthropic)
|
|
207
|
+
-v, --version Print version and exit
|
|
208
|
+
-h, --help Print this help and exit
|
|
209
|
+
|
|
210
|
+
Examples:
|
|
211
|
+
genesys-eval # Run with genesys agent
|
|
212
|
+
genesys-eval -a pi # Run with pi agent
|
|
213
|
+
genesys-eval -a "tsx src/launcher.ts" # Run with local dev build
|
|
214
|
+
genesys-eval -a "node ./dist/src/launcher.js" # Run with compiled local build
|
|
215
|
+
genesys-eval -p 4 --output results.json # Run 4 tests in parallel
|
|
216
|
+
`);
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Print version and exit.
|
|
220
|
+
*/
|
|
221
|
+
export function printVersion() {
|
|
222
|
+
console.log(getVersion());
|
|
223
|
+
}
|
|
224
|
+
//# sourceMappingURL=args.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"args.js","sourceRoot":"","sources":["../../src/args.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAIH,MAAM,aAAa,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AAClD,MAAM,iBAAiB,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;AAE/C,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AACvC,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa;IACrC,OAAO,iBAAiB,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,IAAI,GAAS;QACjB,KAAK,EAAE,mBAAmB;QAC1B,KAAK,EAAE,SAAS;QAChB,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;QAClB,OAAO,EAAE,GAAG;QACZ,MAAM,EAAE,SAAS;QACjB,QAAQ,EAAE,CAAC;QACX,SAAS,EAAE,WAAW;QACtB,UAAU,EAAE,4BAA4B;QACxC,aAAa,EAAE,WAAW;QAC1B,IAAI,EAAE,KAAK;QACX,OAAO,EAAE,KAAK;KACf,CAAC;IAEF,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEpB,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;gBACjB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;gBACpB,MAAM;YACR,CAAC;YAED,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;oBAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;gBAClB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;oBAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;gBAClB,MAAM;YACR,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;oBACxC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC;gBAChB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;oBAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBACnC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;oBAClC,OAAO,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;oBAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;gBACvB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;oBAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;gBACnB,MAAM;YACR,CAAC;YAED,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;oBAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,OAAO,CAAC,KAAK,CAAC,4BAA4B,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;oBACtE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;gBACnB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,YAAY,CAAC,CAAC,CAAC;gBAClB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;oBAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBACpC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;oBACpC,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;oBACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;gBACzB,MAAM;YACR,CAAC;YAED,KAAK,cAAc,CAAC,CAAC,CAAC;gBACpB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;oBAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC5B,OAAO,CAAC,KAAK,CAAC,gCAAgC,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;oBAC9E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;gBACtB,MAAM;YACR,CAAC;YAED,KAAK,eAAe,CAAC,CAAC,CAAC;gBACrB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;oBAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC;gBACvB,MAAM;YACR,CAAC;YAED,KAAK,kBAAkB,CAAC,CAAC,CAAC;gBACxB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC;oBACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;gBAC1B,MAAM;YACR,CAAC;YAED,OAAO,CAAC,CAAC,CAAC;gBACR,uCAAuC;gBACvC,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;oBACxB,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAC;oBACxC,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;oBACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,MAAM;YACR,CAAC;QACH,CAAC;QAED,CAAC,EAAE,CAAC;IACN,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;GAIG;AACH,SAAS,UAAU;IACjB,0DAA0D;IAC1D,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS;IACvB,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;IAC7B,OAAO,CAAC,GAAG,CAAC,iBAAiB,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4BrC,CAAC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY;IAC1B,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Simple CLI runner for executing pi, genesys, or any custom agent CLI.
|
|
3
|
+
*
|
|
4
|
+
* Prompt is passed via stdin for all agents.
|
|
5
|
+
*
|
|
6
|
+
* @module cli-runner
|
|
7
|
+
*/
|
|
8
|
+
import type { AgentResponse } from './types.js';
|
|
9
|
+
/**
|
|
10
|
+
* Options for running a CLI command.
|
|
11
|
+
*/
|
|
12
|
+
export interface RunOptions {
|
|
13
|
+
/** Working directory for the command */
|
|
14
|
+
cwd: string;
|
|
15
|
+
/** Timeout in milliseconds */
|
|
16
|
+
timeout: number;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Error thrown when CLI execution fails.
|
|
20
|
+
*/
|
|
21
|
+
export declare class CLIError extends Error {
|
|
22
|
+
readonly command: string;
|
|
23
|
+
readonly exitCode: number;
|
|
24
|
+
readonly stderr: string;
|
|
25
|
+
constructor(message: string, command: string, exitCode: number, stderr: string);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Run a prompt through a CLI agent.
|
|
29
|
+
*
|
|
30
|
+
* The prompt is passed via stdin to avoid shell escaping issues
|
|
31
|
+
* with multiline strings on Windows.
|
|
32
|
+
*
|
|
33
|
+
* @param agent - The agent command to use (e.g., 'pi', 'genesys', 'tsx src/launcher.ts')
|
|
34
|
+
* @param prompt - The prompt to send
|
|
35
|
+
* @param options - Execution options
|
|
36
|
+
* @returns The agent response
|
|
37
|
+
*/
|
|
38
|
+
export declare function runAgent(agent: string, prompt: string, options: RunOptions): Promise<AgentResponse>;
|
|
39
|
+
//# sourceMappingURL=cli-runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli-runner.d.ts","sourceRoot":"","sources":["../../src/cli-runner.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAIH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,wCAAwC;IACxC,GAAG,EAAE,MAAM,CAAC;IAEZ,8BAA8B;IAC9B,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,qBAAa,QAAS,SAAQ,KAAK;aAGf,OAAO,EAAE,MAAM;aACf,QAAQ,EAAE,MAAM;aAChB,MAAM,EAAE,MAAM;gBAH9B,OAAO,EAAE,MAAM,EACC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM;CAKjC;AA2BD;;;;;;;;;;GAUG;AACH,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,aAAa,CAAC,CAuExB"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Simple CLI runner for executing pi, genesys, or any custom agent CLI.
|
|
3
|
+
*
|
|
4
|
+
* Prompt is passed via stdin for all agents.
|
|
5
|
+
*
|
|
6
|
+
* @module cli-runner
|
|
7
|
+
*/
|
|
8
|
+
import { spawn } from 'node:child_process';
|
|
9
|
+
/**
|
|
10
|
+
* Error thrown when CLI execution fails.
|
|
11
|
+
*/
|
|
12
|
+
export class CLIError extends Error {
|
|
13
|
+
command;
|
|
14
|
+
exitCode;
|
|
15
|
+
stderr;
|
|
16
|
+
constructor(message, command, exitCode, stderr) {
|
|
17
|
+
super(message);
|
|
18
|
+
this.command = command;
|
|
19
|
+
this.exitCode = exitCode;
|
|
20
|
+
this.stderr = stderr;
|
|
21
|
+
this.name = 'CLIError';
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Parse an agent command string into command and arguments.
|
|
26
|
+
*
|
|
27
|
+
* Simple commands like "genesys" or "pi" return [cmd, []].
|
|
28
|
+
* Compound commands like "tsx src/launcher.ts" return [cmd, args].
|
|
29
|
+
*
|
|
30
|
+
* @param agentCommand - The agent command string
|
|
31
|
+
* @returns Tuple of [command, arguments]
|
|
32
|
+
*/
|
|
33
|
+
function parseAgentCommand(agentCommand) {
|
|
34
|
+
const trimmed = agentCommand.trim();
|
|
35
|
+
// If no spaces, it's a simple command
|
|
36
|
+
if (!trimmed.includes(' ')) {
|
|
37
|
+
return [trimmed, []];
|
|
38
|
+
}
|
|
39
|
+
// Parse compound command (basic space-splitting, no quote handling needed for our use case)
|
|
40
|
+
const parts = trimmed.split(/\s+/);
|
|
41
|
+
const cmd = parts[0];
|
|
42
|
+
const args = parts.slice(1);
|
|
43
|
+
return [cmd, args];
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Run a prompt through a CLI agent.
|
|
47
|
+
*
|
|
48
|
+
* The prompt is passed via stdin to avoid shell escaping issues
|
|
49
|
+
* with multiline strings on Windows.
|
|
50
|
+
*
|
|
51
|
+
* @param agent - The agent command to use (e.g., 'pi', 'genesys', 'tsx src/launcher.ts')
|
|
52
|
+
* @param prompt - The prompt to send
|
|
53
|
+
* @param options - Execution options
|
|
54
|
+
* @returns The agent response
|
|
55
|
+
*/
|
|
56
|
+
export async function runAgent(agent, prompt, options) {
|
|
57
|
+
const startTime = Date.now();
|
|
58
|
+
// console.log(`Running ${agent} with prompt: [${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}]`);
|
|
59
|
+
return new Promise((resolve, reject) => {
|
|
60
|
+
let stdout = '';
|
|
61
|
+
let stderr = '';
|
|
62
|
+
const [cmd, cmdArgs] = parseAgentCommand(agent);
|
|
63
|
+
// Use shell mode for compound commands or on Windows for .cmd/.ps1 support
|
|
64
|
+
const isCompoundCommand = cmdArgs.length > 0;
|
|
65
|
+
const useShell = isCompoundCommand || process.platform === 'win32';
|
|
66
|
+
// Build spawn arguments: command plus -p flag and any original args
|
|
67
|
+
const spawnArgs = [...cmdArgs, '-p'];
|
|
68
|
+
const child = spawn(cmd, spawnArgs, {
|
|
69
|
+
cwd: options.cwd,
|
|
70
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
71
|
+
env: { ...process.env },
|
|
72
|
+
shell: useShell,
|
|
73
|
+
});
|
|
74
|
+
// Write prompt to stdin and close it
|
|
75
|
+
if (child.stdin) {
|
|
76
|
+
child.stdin.write(prompt, 'utf-8');
|
|
77
|
+
child.stdin.end();
|
|
78
|
+
}
|
|
79
|
+
child.stdout?.on('data', (data) => {
|
|
80
|
+
stdout += data.toString();
|
|
81
|
+
});
|
|
82
|
+
child.stderr?.on('data', (data) => {
|
|
83
|
+
stderr += data.toString();
|
|
84
|
+
});
|
|
85
|
+
const timeout = setTimeout(() => {
|
|
86
|
+
child.kill('SIGTERM');
|
|
87
|
+
reject(new CLIError(`Command timed out after ${options.timeout}ms`, `${agent} -p`, -1, stderr));
|
|
88
|
+
}, options.timeout);
|
|
89
|
+
child.on('error', (error) => {
|
|
90
|
+
clearTimeout(timeout);
|
|
91
|
+
reject(new CLIError(`Failed to spawn ${agent}: ${error.message}. Make sure the command is installed and in PATH.`, `${agent} -p`, -1, stderr));
|
|
92
|
+
});
|
|
93
|
+
child.on('close', (code) => {
|
|
94
|
+
clearTimeout(timeout);
|
|
95
|
+
const durationMs = Date.now() - startTime;
|
|
96
|
+
resolve({
|
|
97
|
+
output: stdout.trim(),
|
|
98
|
+
exitCode: code ?? 0,
|
|
99
|
+
stderr: stderr.trim(),
|
|
100
|
+
durationMs,
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=cli-runner.js.map
|