@gnsx/genesys.agent.eval 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +9 -0
  2. package/dist/src/adapters/anthropic-adapter.d.ts +24 -0
  3. package/dist/src/adapters/anthropic-adapter.d.ts.map +1 -0
  4. package/dist/src/adapters/anthropic-adapter.js +80 -0
  5. package/dist/src/adapters/anthropic-adapter.js.map +1 -0
  6. package/dist/src/adapters/gemini-adapter.d.ts +23 -0
  7. package/dist/src/adapters/gemini-adapter.d.ts.map +1 -0
  8. package/dist/src/adapters/gemini-adapter.js +79 -0
  9. package/dist/src/adapters/gemini-adapter.js.map +1 -0
  10. package/dist/src/adapters/ollama-adapter.d.ts +28 -0
  11. package/dist/src/adapters/ollama-adapter.d.ts.map +1 -0
  12. package/dist/src/adapters/ollama-adapter.js +54 -0
  13. package/dist/src/adapters/ollama-adapter.js.map +1 -0
  14. package/dist/src/adapters/openai-adapter.d.ts +24 -0
  15. package/dist/src/adapters/openai-adapter.d.ts.map +1 -0
  16. package/dist/src/adapters/openai-adapter.js +80 -0
  17. package/dist/src/adapters/openai-adapter.js.map +1 -0
  18. package/dist/src/adapters/pi-adapter.d.ts +27 -0
  19. package/dist/src/adapters/pi-adapter.d.ts.map +1 -0
  20. package/dist/src/adapters/pi-adapter.js +136 -0
  21. package/dist/src/adapters/pi-adapter.js.map +1 -0
  22. package/dist/src/agent-adapter.d.ts +130 -0
  23. package/dist/src/agent-adapter.d.ts.map +1 -0
  24. package/dist/src/agent-adapter.js +134 -0
  25. package/dist/src/agent-adapter.js.map +1 -0
  26. package/dist/src/args.d.ts +22 -0
  27. package/dist/src/args.d.ts.map +1 -0
  28. package/dist/src/args.js +224 -0
  29. package/dist/src/args.js.map +1 -0
  30. package/dist/src/cli-runner.d.ts +39 -0
  31. package/dist/src/cli-runner.d.ts.map +1 -0
  32. package/dist/src/cli-runner.js +105 -0
  33. package/dist/src/cli-runner.js.map +1 -0
  34. package/dist/src/embedding-judge.d.ts +93 -0
  35. package/dist/src/embedding-judge.d.ts.map +1 -0
  36. package/dist/src/embedding-judge.js +160 -0
  37. package/dist/src/embedding-judge.js.map +1 -0
  38. package/dist/src/index.d.ts +15 -0
  39. package/dist/src/index.d.ts.map +1 -0
  40. package/dist/src/index.js +20 -0
  41. package/dist/src/index.js.map +1 -0
  42. package/dist/src/judge.d.ts +95 -0
  43. package/dist/src/judge.d.ts.map +1 -0
  44. package/dist/src/judge.js +189 -0
  45. package/dist/src/judge.js.map +1 -0
  46. package/dist/src/launcher.d.ts +9 -0
  47. package/dist/src/launcher.d.ts.map +1 -0
  48. package/dist/src/launcher.js +129 -0
  49. package/dist/src/launcher.js.map +1 -0
  50. package/dist/src/reporter.d.ts +86 -0
  51. package/dist/src/reporter.d.ts.map +1 -0
  52. package/dist/src/reporter.js +384 -0
  53. package/dist/src/reporter.js.map +1 -0
  54. package/dist/src/runner.d.ts +75 -0
  55. package/dist/src/runner.d.ts.map +1 -0
  56. package/dist/src/runner.js +165 -0
  57. package/dist/src/runner.js.map +1 -0
  58. package/dist/src/test-loader.d.ts +66 -0
  59. package/dist/src/test-loader.d.ts.map +1 -0
  60. package/dist/src/test-loader.js +140 -0
  61. package/dist/src/test-loader.js.map +1 -0
  62. package/dist/src/types.d.ts +161 -0
  63. package/dist/src/types.d.ts.map +1 -0
  64. package/dist/src/types.js +7 -0
  65. package/dist/src/types.js.map +1 -0
  66. package/dist/src/utils/package.d.ts +16 -0
  67. package/dist/src/utils/package.d.ts.map +1 -0
  68. package/dist/src/utils/package.js +30 -0
  69. package/dist/src/utils/package.js.map +1 -0
  70. package/dist/tsconfig.tsbuildinfo +1 -0
  71. package/examples/basic-tests.yaml +22 -0
  72. package/package.json +41 -0
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Agent adapter interface and factory for creating adapters.
3
+ *
4
+ * The adapter pattern allows the eval harness to work with different
5
+ * agent backends (pi framework, direct Anthropic, OpenAI, etc.) through
6
+ * a common interface.
7
+ *
8
+ * @module agent-adapter
9
+ */
10
+ import type { AgentConfig, AgentResponse, TestCase } from './types.js';
11
+ /**
12
+ * Interface that all agent adapters must implement.
13
+ *
14
+ * Adapters are responsible for:
15
+ * - Initializing the agent with configuration
16
+ * - Running test cases and returning responses
17
+ * - Cleaning up resources when done
18
+ */
19
+ export interface AgentAdapter {
20
+ /** Human-readable name of this adapter */
21
+ readonly name: string;
22
+ /** Model identifier being used */
23
+ readonly model: string;
24
+ /**
25
+ * Initialize the adapter with the given configuration.
26
+ *
27
+ * @param config - Agent configuration
28
+ * @throws Error if initialization fails
29
+ */
30
+ initialize(config: AgentConfig): Promise<void>;
31
+ /**
32
+ * Run a test case through the agent.
33
+ *
34
+ * @param test - The test case to run
35
+ * @param suiteContext - Optional global context from the test suite
36
+ * @returns The agent's response
37
+ * @throws Error if execution fails
38
+ */
39
+ run(test: TestCase, suiteContext?: string): Promise<AgentResponse>;
40
+ /**
41
+ * Clean up any resources used by the adapter.
42
+ */
43
+ dispose(): Promise<void>;
44
+ }
45
+ /**
46
+ * Factory function type for creating agent adapters.
47
+ */
48
+ export type AdapterFactory = () => AgentAdapter;
49
+ /**
50
+ * Register an adapter factory for a given agent name.
51
+ *
52
+ * @param name - The agent name/identifier
53
+ * @param factory - Factory function that creates the adapter
54
+ *
55
+ * @example
56
+ * ```typescript
57
+ * registerAdapter('anthropic', () => new AnthropicAdapter());
58
+ * ```
59
+ */
60
+ export declare function registerAdapter(name: string, factory: AdapterFactory): void;
61
+ /**
62
+ * Create an agent adapter by name.
63
+ *
64
+ * @param name - The agent name (e.g., 'pi', 'anthropic', 'openai')
65
+ * @returns A new adapter instance
66
+ * @throws Error if the adapter name is not registered
67
+ *
68
+ * @example
69
+ * ```typescript
70
+ * const adapter = createAdapter('anthropic');
71
+ * await adapter.initialize({ cwd: process.cwd(), model: 'claude-3-5-sonnet' });
72
+ * ```
73
+ */
74
+ export declare function createAdapter(name: string): AgentAdapter;
75
+ /**
76
+ * Get a list of available adapter names.
77
+ *
78
+ * @returns Array of registered adapter names
79
+ */
80
+ export declare function getAvailableAdapters(): string[];
81
+ /**
82
+ * Check if an adapter is registered for the given name.
83
+ *
84
+ * @param name - The adapter name to check
85
+ * @returns True if the adapter is available
86
+ */
87
+ export declare function isAdapterAvailable(name: string): boolean;
88
+ /**
89
+ * Error thrown when an adapter operation fails.
90
+ */
91
+ export declare class AdapterError extends Error {
92
+ readonly adapterName: string;
93
+ readonly cause?: unknown | undefined;
94
+ constructor(message: string, adapterName: string, cause?: unknown | undefined);
95
+ }
96
+ /**
97
+ * Error thrown when an adapter is not properly initialized.
98
+ */
99
+ export declare class AdapterNotInitializedError extends AdapterError {
100
+ constructor(adapterName: string);
101
+ }
102
+ /**
103
+ * Base class for agent adapters with common functionality.
104
+ *
105
+ * Extend this class when implementing new adapters.
106
+ */
107
+ export declare abstract class BaseAgentAdapter implements AgentAdapter {
108
+ protected _config?: AgentConfig;
109
+ protected _initialized: boolean;
110
+ abstract readonly name: string;
111
+ abstract readonly model: string;
112
+ /**
113
+ * Build the full prompt from test case and suite context.
114
+ *
115
+ * @param test - The test case
116
+ * @param suiteContext - Optional global context
117
+ * @returns The combined prompt string
118
+ */
119
+ protected buildPrompt(test: TestCase, suiteContext?: string): string;
120
+ initialize(config: AgentConfig): Promise<void>;
121
+ abstract run(test: TestCase, suiteContext?: string): Promise<AgentResponse>;
122
+ dispose(): Promise<void>;
123
+ /**
124
+ * Assert that the adapter is initialized.
125
+ *
126
+ * @throws AdapterNotInitializedError if not initialized
127
+ */
128
+ protected assertInitialized(): void;
129
+ }
130
+ //# sourceMappingURL=agent-adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent-adapter.d.ts","sourceRoot":"","sources":["../../src/agent-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEvE;;;;;;;GAOG;AACH,MAAM,WAAW,YAAY;IAC3B,0CAA0C;IAC1C,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAEtB,kCAAkC;IAClC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAEvB;;;;;OAKG;IACH,UAAU,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAE/C;;;;;;;OAOG;IACH,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IAEnE;;OAEG;IACH,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG,MAAM,YAAY,CAAC;AAOhD;;;;;;;;;;GAUG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,cAAc,GAAG,IAAI,CAE3E;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAUxD;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,EAAE,CAE/C;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAExD;AAED;;GAEG;AACH,qBAAa,YAAa,SAAQ,KAAK;aAGnB,WAAW,EAAE,MAAM;aACnB,KAAK,CAAC,EAAE,OAAO;gBAF/B,OAAO,EAAE,MAAM,EACC,WAAW,EAAE,MAAM,EACnB,KAAK,CAAC,EAAE,OAAO,YAAA;CAKlC;AAED;;GAEG;AACH,qBAAa,0BAA2B,SAAQ,YAAY;gBAC9C,WAAW,EAAE,MAAM;CAOhC;AAED;;;;GAIG;AACH,8BAAsB,gBAAiB,YAAW,YAAY;IAC5D,SAAS,CAAC,OAAO,CAAC,EAAE,WAAW,CAAC;IAChC,SAAS,CAAC,YAAY,UAAS;IAE/B,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAEhC;;;;;;OAMG;IACH,SAAS,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,EAAE,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM;IAgB9D,UAAU,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAKpD,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IAErE,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAK9B;;;;OAIG;IACH,SAAS,CAAC,iBAAiB,IAAI,IAAI;CAKpC"}
@@ -0,0 +1,134 @@
1
+ /**
2
+ * Agent adapter interface and factory for creating adapters.
3
+ *
4
+ * The adapter pattern allows the eval harness to work with different
5
+ * agent backends (pi framework, direct Anthropic, OpenAI, etc.) through
6
+ * a common interface.
7
+ *
8
+ * @module agent-adapter
9
+ */
10
+ /**
11
+ * Registry of available agent adapters.
12
+ */
13
+ const adapterRegistry = new Map();
14
+ /**
15
+ * Register an adapter factory for a given agent name.
16
+ *
17
+ * @param name - The agent name/identifier
18
+ * @param factory - Factory function that creates the adapter
19
+ *
20
+ * @example
21
+ * ```typescript
22
+ * registerAdapter('anthropic', () => new AnthropicAdapter());
23
+ * ```
24
+ */
25
+ export function registerAdapter(name, factory) {
26
+ adapterRegistry.set(name, factory);
27
+ }
28
+ /**
29
+ * Create an agent adapter by name.
30
+ *
31
+ * @param name - The agent name (e.g., 'pi', 'anthropic', 'openai')
32
+ * @returns A new adapter instance
33
+ * @throws Error if the adapter name is not registered
34
+ *
35
+ * @example
36
+ * ```typescript
37
+ * const adapter = createAdapter('anthropic');
38
+ * await adapter.initialize({ cwd: process.cwd(), model: 'claude-3-5-sonnet' });
39
+ * ```
40
+ */
41
+ export function createAdapter(name) {
42
+ const factory = adapterRegistry.get(name);
43
+ if (!factory) {
44
+ const available = Array.from(adapterRegistry.keys()).join(', ');
45
+ throw new Error(`Unknown agent adapter: "${name}". ` +
46
+ `Available adapters: ${available || 'none registered'}`);
47
+ }
48
+ return factory();
49
+ }
50
+ /**
51
+ * Get a list of available adapter names.
52
+ *
53
+ * @returns Array of registered adapter names
54
+ */
55
+ export function getAvailableAdapters() {
56
+ return Array.from(adapterRegistry.keys());
57
+ }
58
+ /**
59
+ * Check if an adapter is registered for the given name.
60
+ *
61
+ * @param name - The adapter name to check
62
+ * @returns True if the adapter is available
63
+ */
64
+ export function isAdapterAvailable(name) {
65
+ return adapterRegistry.has(name);
66
+ }
67
+ /**
68
+ * Error thrown when an adapter operation fails.
69
+ */
70
+ export class AdapterError extends Error {
71
+ adapterName;
72
+ cause;
73
+ constructor(message, adapterName, cause) {
74
+ super(message);
75
+ this.adapterName = adapterName;
76
+ this.cause = cause;
77
+ this.name = 'AdapterError';
78
+ }
79
+ }
80
+ /**
81
+ * Error thrown when an adapter is not properly initialized.
82
+ */
83
+ export class AdapterNotInitializedError extends AdapterError {
84
+ constructor(adapterName) {
85
+ super(`Adapter "${adapterName}" is not initialized. Call initialize() before using.`, adapterName);
86
+ this.name = 'AdapterNotInitializedError';
87
+ }
88
+ }
89
+ /**
90
+ * Base class for agent adapters with common functionality.
91
+ *
92
+ * Extend this class when implementing new adapters.
93
+ */
94
+ export class BaseAgentAdapter {
95
+ _config;
96
+ _initialized = false;
97
+ /**
98
+ * Build the full prompt from test case and suite context.
99
+ *
100
+ * @param test - The test case
101
+ * @param suiteContext - Optional global context
102
+ * @returns The combined prompt string
103
+ */
104
+ buildPrompt(test, suiteContext) {
105
+ const parts = [];
106
+ if (suiteContext) {
107
+ parts.push('Context:', suiteContext, '');
108
+ }
109
+ if (test.context) {
110
+ parts.push('Specific Context:', test.context, '');
111
+ }
112
+ parts.push('Task:', test.input);
113
+ return parts.join('\n');
114
+ }
115
+ async initialize(config) {
116
+ this._config = config;
117
+ this._initialized = true;
118
+ }
119
+ async dispose() {
120
+ this._initialized = false;
121
+ this._config = undefined;
122
+ }
123
+ /**
124
+ * Assert that the adapter is initialized.
125
+ *
126
+ * @throws AdapterNotInitializedError if not initialized
127
+ */
128
+ assertInitialized() {
129
+ if (!this._initialized) {
130
+ throw new AdapterNotInitializedError(this.name);
131
+ }
132
+ }
133
+ }
134
+ //# sourceMappingURL=agent-adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent-adapter.js","sourceRoot":"","sources":["../../src/agent-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAgDH;;GAEG;AACH,MAAM,eAAe,GAAG,IAAI,GAAG,EAA0B,CAAC;AAE1D;;;;;;;;;;GAUG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,OAAuB;IACnE,eAAe,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1C,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChE,MAAM,IAAI,KAAK,CACb,2BAA2B,IAAI,KAAK;YACpC,uBAAuB,SAAS,IAAI,iBAAiB,EAAE,CACxD,CAAC;IACJ,CAAC;IACD,OAAO,OAAO,EAAE,CAAC;AACnB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,oBAAoB;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;AAC5C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,MAAM,OAAO,YAAa,SAAQ,KAAK;IAGnB;IACA;IAHlB,YACE,OAAe,EACC,WAAmB,EACnB,KAAe;QAE/B,KAAK,CAAC,OAAO,CAAC,CAAC;QAHC,gBAAW,GAAX,WAAW,CAAQ;QACnB,UAAK,GAAL,KAAK,CAAU;QAG/B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,0BAA2B,SAAQ,YAAY;IAC1D,YAAY,WAAmB;QAC7B,KAAK,CACH,YAAY,WAAW,uDAAuD,EAC9E,WAAW,CACZ,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,4BAA4B,CAAC;IAC3C,CAAC;CACF;AAED;;;;GAIG;AACH,MAAM,OAAgB,gBAAgB;IAC1B,OAAO,CAAe;IACtB,YAAY,GAAG,KAAK,CAAC;IAK/B;;;;;;OAMG;IACO,WAAW,CAAC,IAAc,EAAE,YAAqB;QACzD,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,EAAE,EAAE,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACpD,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAEhC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,MAAmB;QAClC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;QACtB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;IAC3B,CAAC;IAID,KAAK,CAAC,OAAO;QACX,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;QAC1B,IAAI,CAAC,OAAO,GAAG,SAAS,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACO,iBAAiB;QACzB,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,MAAM,IAAI,0BAA0B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,22 @@
1
+ /**
2
+ * CLI argument definitions and parsing for the eval harness.
3
+ *
4
+ * @module args
5
+ */
6
+ import type { Args } from './types.js';
7
+ /**
8
+ * Parse command line arguments for the eval harness.
9
+ *
10
+ * @param argv - Process arguments (excluding node and script path)
11
+ * @returns Parsed arguments
12
+ */
13
+ export declare function parseArgs(argv: string[]): Args;
14
+ /**
15
+ * Print help message and exit.
16
+ */
17
+ export declare function printHelp(): void;
18
+ /**
19
+ * Print version and exit.
20
+ */
21
+ export declare function printVersion(): void;
22
+ //# sourceMappingURL=args.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"args.d.ts","sourceRoot":"","sources":["../../src/args.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAavC;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,IAAI,CAyK9C;AAYD;;GAEG;AACH,wBAAgB,SAAS,IAAI,IAAI,CA+BhC;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,IAAI,CAEnC"}
@@ -0,0 +1,224 @@
1
+ /**
2
+ * CLI argument definitions and parsing for the eval harness.
3
+ *
4
+ * @module args
5
+ */
6
+ const VALID_FORMATS = ['console', 'json', 'html'];
7
+ const VALID_JUDGE_TYPES = ['embedding', 'llm'];
8
+ function isValidFormat(value) {
9
+ return VALID_FORMATS.includes(value);
10
+ }
11
+ function isValidJudgeType(value) {
12
+ return VALID_JUDGE_TYPES.includes(value);
13
+ }
14
+ /**
15
+ * Parse command line arguments for the eval harness.
16
+ *
17
+ * @param argv - Process arguments (excluding node and script path)
18
+ * @returns Parsed arguments
19
+ */
20
+ export function parseArgs(argv) {
21
+ const args = {
22
+ tests: './eval-tests.yaml',
23
+ agent: 'genesys',
24
+ cwd: process.cwd(),
25
+ timeout: 120,
26
+ format: 'console',
27
+ parallel: 1,
28
+ judgeType: 'embedding',
29
+ judgeModel: 'claude-3-5-sonnet-20241022',
30
+ judgeProvider: 'anthropic',
31
+ help: false,
32
+ version: false,
33
+ };
34
+ let i = 0;
35
+ while (i < argv.length) {
36
+ const arg = argv[i];
37
+ switch (arg) {
38
+ case '-h':
39
+ case '--help': {
40
+ args.help = true;
41
+ break;
42
+ }
43
+ case '-v':
44
+ case '--version': {
45
+ args.version = true;
46
+ break;
47
+ }
48
+ case '--tests': {
49
+ const next = argv[++i];
50
+ if (!next) {
51
+ console.error('--tests requires a value');
52
+ process.exit(1);
53
+ }
54
+ args.tests = next;
55
+ break;
56
+ }
57
+ case '-a':
58
+ case '--agent': {
59
+ const next = argv[++i];
60
+ if (!next) {
61
+ console.error('--agent requires a value');
62
+ process.exit(1);
63
+ }
64
+ args.agent = next;
65
+ break;
66
+ }
67
+ case '--cwd': {
68
+ const next = argv[++i];
69
+ if (!next) {
70
+ console.error('--cwd requires a value');
71
+ process.exit(1);
72
+ }
73
+ args.cwd = next;
74
+ break;
75
+ }
76
+ case '-t':
77
+ case '--timeout': {
78
+ const next = argv[++i];
79
+ if (!next) {
80
+ console.error('--timeout requires a value');
81
+ process.exit(1);
82
+ }
83
+ const timeout = parseInt(next, 10);
84
+ if (isNaN(timeout) || timeout < 1) {
85
+ console.error('--timeout must be a positive integer (seconds)');
86
+ process.exit(1);
87
+ }
88
+ args.timeout = timeout;
89
+ break;
90
+ }
91
+ case '-o':
92
+ case '--output': {
93
+ const next = argv[++i];
94
+ if (!next) {
95
+ console.error('--output requires a value');
96
+ process.exit(1);
97
+ }
98
+ args.output = next;
99
+ break;
100
+ }
101
+ case '--format': {
102
+ const next = argv[++i];
103
+ if (!next) {
104
+ console.error('--format requires a value');
105
+ process.exit(1);
106
+ }
107
+ if (!isValidFormat(next)) {
108
+ console.error(`--format must be one of: ${VALID_FORMATS.join(', ')}`);
109
+ process.exit(1);
110
+ }
111
+ args.format = next;
112
+ break;
113
+ }
114
+ case '-p':
115
+ case '--parallel': {
116
+ const next = argv[++i];
117
+ if (!next) {
118
+ console.error('--parallel requires a value');
119
+ process.exit(1);
120
+ }
121
+ const parallel = parseInt(next, 10);
122
+ if (isNaN(parallel) || parallel < 1) {
123
+ console.error('--parallel must be a positive integer');
124
+ process.exit(1);
125
+ }
126
+ args.parallel = parallel;
127
+ break;
128
+ }
129
+ case '--judge-type': {
130
+ const next = argv[++i];
131
+ if (!next) {
132
+ console.error('--judge-type requires a value');
133
+ process.exit(1);
134
+ }
135
+ if (!isValidJudgeType(next)) {
136
+ console.error(`--judge-type must be one of: ${VALID_JUDGE_TYPES.join(', ')}`);
137
+ process.exit(1);
138
+ }
139
+ args.judgeType = next;
140
+ break;
141
+ }
142
+ case '--judge-model': {
143
+ const next = argv[++i];
144
+ if (!next) {
145
+ console.error('--judge-model requires a value');
146
+ process.exit(1);
147
+ }
148
+ args.judgeModel = next;
149
+ break;
150
+ }
151
+ case '--judge-provider': {
152
+ const next = argv[++i];
153
+ if (!next) {
154
+ console.error('--judge-provider requires a value');
155
+ process.exit(1);
156
+ }
157
+ args.judgeProvider = next;
158
+ break;
159
+ }
160
+ default: {
161
+ // Unknown flags are reported as errors
162
+ if (arg.startsWith('-')) {
163
+ console.error(`Unknown option: ${arg}`);
164
+ console.error('Run with --help for usage information');
165
+ process.exit(1);
166
+ }
167
+ break;
168
+ }
169
+ }
170
+ i++;
171
+ }
172
+ return args;
173
+ }
174
+ /**
175
+ * Get package version from package.json.
176
+ *
177
+ * @returns Version string
178
+ */
179
+ function getVersion() {
180
+ // This will be replaced with actual version at build time
181
+ return '1.0.0';
182
+ }
183
+ /**
184
+ * Print help message and exit.
185
+ */
186
+ export function printHelp() {
187
+ const version = getVersion();
188
+ console.log(`genesys-eval v${version}
189
+
190
+ Usage:
191
+ genesys-eval [options]
192
+
193
+ Options:
194
+ --tests <path> Path to YAML test file (default: ./eval-tests.yaml)
195
+ -a, --agent <command> Agent CLI command: pi, genesys, or any custom command
196
+ (default: genesys). Supports commands with arguments.
197
+ --cwd <dir> Working directory for test context (default: cwd)
198
+ -t, --timeout <secs> Timeout per test in seconds (default: 120)
199
+ -o, --output <path> Output file for results (optional)
200
+ --format <format> Output format: console, json, html (default: console)
201
+ -p, --parallel <n> Run n tests in parallel (default: 1)
202
+ --judge-type <type> Judge type: embedding, llm (default: embedding)
203
+ embedding = fast cosine similarity (default)
204
+ llm = use LLM to evaluate (requires API key)
205
+ --judge-model <model> Model for LLM judge (default: claude-3-5-sonnet-20241022)
206
+ --judge-provider <p> Provider for LLM judge (default: anthropic)
207
+ -v, --version Print version and exit
208
+ -h, --help Print this help and exit
209
+
210
+ Examples:
211
+ genesys-eval # Run with genesys agent
212
+ genesys-eval -a pi # Run with pi agent
213
+ genesys-eval -a "tsx src/launcher.ts" # Run with local dev build
214
+ genesys-eval -a "node ./dist/src/launcher.js" # Run with compiled local build
215
+ genesys-eval -p 4 --output results.json # Run 4 tests in parallel
216
+ `);
217
+ }
218
+ /**
219
+ * Print version and exit.
220
+ */
221
+ export function printVersion() {
222
+ console.log(getVersion());
223
+ }
224
+ //# sourceMappingURL=args.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"args.js","sourceRoot":"","sources":["../../src/args.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAIH,MAAM,aAAa,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AAClD,MAAM,iBAAiB,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;AAE/C,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AACvC,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa;IACrC,OAAO,iBAAiB,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,IAAI,GAAS;QACjB,KAAK,EAAE,mBAAmB;QAC1B,KAAK,EAAE,SAAS;QAChB,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;QAClB,OAAO,EAAE,GAAG;QACZ,MAAM,EAAE,SAAS;QACjB,QAAQ,EAAE,CAAC;QACX,SAAS,EAAE,WAAW;QACtB,UAAU,EAAE,4BAA4B;QACxC,aAAa,EAAE,WAAW;QAC1B,IAAI,EAAE,KAAK;QACX,OAAO,EAAE,KAAK;KACf,CAAC;IAEF,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEpB,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;gBACjB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;gBACpB,MAAM;YACR,CAAC;YAED,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;oBAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;gBAClB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;oBAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;gBAClB,MAAM;YACR,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;oBACxC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC;gBAChB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;oBAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBACnC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;oBAClC,OAAO,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;oBAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;gBACvB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;oBAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;gBACnB,MAAM;YACR,CAAC;YAED,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;oBAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,OAAO,CAAC,KAAK,CAAC,4BAA4B,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;oBACtE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;gBACnB,MAAM;YACR,CAAC;YAED,KAAK,IAAI,CAAC;YACV,KAAK,YAAY,CAAC,CAAC,CAAC;gBAClB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;oBAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBACpC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;oBACpC,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;oBACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;gBACzB,MAAM;YACR,CAAC;YAED,KAAK,cAAc,CAAC,CAAC,CAAC;gBACpB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;oBAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC5B,OAAO,CAAC,KAAK,CAAC,gCAAgC,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;oBAC9E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;gBACtB,MAAM;YACR,CAAC;YAED,KAAK,eAAe,CAAC,CAAC,CAAC;gBACrB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;oBAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC;gBACvB,MAAM;YACR,CAAC;YAED,KAAK,kBAAkB,CAAC,CAAC,CAAC;gBACxB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,OAAO,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC;oBACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;gBAC1B,MAAM;YACR,CAAC;YAED,OAAO,CAAC,CAAC,CAAC;gBACR,uCAAuC;gBACvC,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;oBACxB,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAC;oBACxC,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;oBACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,MAAM;YACR,CAAC;QACH,CAAC;QAED,CAAC,EAAE,CAAC;IACN,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;GAIG;AACH,SAAS,UAAU;IACjB,0DAA0D;IAC1D,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS;IACvB,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;IAC7B,OAAO,CAAC,GAAG,CAAC,iBAAiB,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4BrC,CAAC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY;IAC1B,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;AAC5B,CAAC"}
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Simple CLI runner for executing pi, genesys, or any custom agent CLI.
3
+ *
4
+ * Prompt is passed via stdin for all agents.
5
+ *
6
+ * @module cli-runner
7
+ */
8
+ import type { AgentResponse } from './types.js';
9
+ /**
10
+ * Options for running a CLI command.
11
+ */
12
+ export interface RunOptions {
13
+ /** Working directory for the command */
14
+ cwd: string;
15
+ /** Timeout in milliseconds */
16
+ timeout: number;
17
+ }
18
+ /**
19
+ * Error thrown when CLI execution fails.
20
+ */
21
+ export declare class CLIError extends Error {
22
+ readonly command: string;
23
+ readonly exitCode: number;
24
+ readonly stderr: string;
25
+ constructor(message: string, command: string, exitCode: number, stderr: string);
26
+ }
27
+ /**
28
+ * Run a prompt through a CLI agent.
29
+ *
30
+ * The prompt is passed via stdin to avoid shell escaping issues
31
+ * with multiline strings on Windows.
32
+ *
33
+ * @param agent - The agent command to use (e.g., 'pi', 'genesys', 'tsx src/launcher.ts')
34
+ * @param prompt - The prompt to send
35
+ * @param options - Execution options
36
+ * @returns The agent response
37
+ */
38
+ export declare function runAgent(agent: string, prompt: string, options: RunOptions): Promise<AgentResponse>;
39
+ //# sourceMappingURL=cli-runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli-runner.d.ts","sourceRoot":"","sources":["../../src/cli-runner.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAIH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,wCAAwC;IACxC,GAAG,EAAE,MAAM,CAAC;IAEZ,8BAA8B;IAC9B,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,qBAAa,QAAS,SAAQ,KAAK;aAGf,OAAO,EAAE,MAAM;aACf,QAAQ,EAAE,MAAM;aAChB,MAAM,EAAE,MAAM;gBAH9B,OAAO,EAAE,MAAM,EACC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM;CAKjC;AA2BD;;;;;;;;;;GAUG;AACH,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,aAAa,CAAC,CAuExB"}
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Simple CLI runner for executing pi, genesys, or any custom agent CLI.
3
+ *
4
+ * Prompt is passed via stdin for all agents.
5
+ *
6
+ * @module cli-runner
7
+ */
8
+ import { spawn } from 'node:child_process';
9
+ /**
10
+ * Error thrown when CLI execution fails.
11
+ */
12
+ export class CLIError extends Error {
13
+ command;
14
+ exitCode;
15
+ stderr;
16
+ constructor(message, command, exitCode, stderr) {
17
+ super(message);
18
+ this.command = command;
19
+ this.exitCode = exitCode;
20
+ this.stderr = stderr;
21
+ this.name = 'CLIError';
22
+ }
23
+ }
24
+ /**
25
+ * Parse an agent command string into command and arguments.
26
+ *
27
+ * Simple commands like "genesys" or "pi" return [cmd, []].
28
+ * Compound commands like "tsx src/launcher.ts" return [cmd, args].
29
+ *
30
+ * @param agentCommand - The agent command string
31
+ * @returns Tuple of [command, arguments]
32
+ */
33
+ function parseAgentCommand(agentCommand) {
34
+ const trimmed = agentCommand.trim();
35
+ // If no spaces, it's a simple command
36
+ if (!trimmed.includes(' ')) {
37
+ return [trimmed, []];
38
+ }
39
+ // Parse compound command (basic space-splitting, no quote handling needed for our use case)
40
+ const parts = trimmed.split(/\s+/);
41
+ const cmd = parts[0];
42
+ const args = parts.slice(1);
43
+ return [cmd, args];
44
+ }
45
+ /**
46
+ * Run a prompt through a CLI agent.
47
+ *
48
+ * The prompt is passed via stdin to avoid shell escaping issues
49
+ * with multiline strings on Windows.
50
+ *
51
+ * @param agent - The agent command to use (e.g., 'pi', 'genesys', 'tsx src/launcher.ts')
52
+ * @param prompt - The prompt to send
53
+ * @param options - Execution options
54
+ * @returns The agent response
55
+ */
56
+ export async function runAgent(agent, prompt, options) {
57
+ const startTime = Date.now();
58
+ // console.log(`Running ${agent} with prompt: [${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}]`);
59
+ return new Promise((resolve, reject) => {
60
+ let stdout = '';
61
+ let stderr = '';
62
+ const [cmd, cmdArgs] = parseAgentCommand(agent);
63
+ // Use shell mode for compound commands or on Windows for .cmd/.ps1 support
64
+ const isCompoundCommand = cmdArgs.length > 0;
65
+ const useShell = isCompoundCommand || process.platform === 'win32';
66
+ // Build spawn arguments: command plus -p flag and any original args
67
+ const spawnArgs = [...cmdArgs, '-p'];
68
+ const child = spawn(cmd, spawnArgs, {
69
+ cwd: options.cwd,
70
+ stdio: ['pipe', 'pipe', 'pipe'],
71
+ env: { ...process.env },
72
+ shell: useShell,
73
+ });
74
+ // Write prompt to stdin and close it
75
+ if (child.stdin) {
76
+ child.stdin.write(prompt, 'utf-8');
77
+ child.stdin.end();
78
+ }
79
+ child.stdout?.on('data', (data) => {
80
+ stdout += data.toString();
81
+ });
82
+ child.stderr?.on('data', (data) => {
83
+ stderr += data.toString();
84
+ });
85
+ const timeout = setTimeout(() => {
86
+ child.kill('SIGTERM');
87
+ reject(new CLIError(`Command timed out after ${options.timeout}ms`, `${agent} -p`, -1, stderr));
88
+ }, options.timeout);
89
+ child.on('error', (error) => {
90
+ clearTimeout(timeout);
91
+ reject(new CLIError(`Failed to spawn ${agent}: ${error.message}. Make sure the command is installed and in PATH.`, `${agent} -p`, -1, stderr));
92
+ });
93
+ child.on('close', (code) => {
94
+ clearTimeout(timeout);
95
+ const durationMs = Date.now() - startTime;
96
+ resolve({
97
+ output: stdout.trim(),
98
+ exitCode: code ?? 0,
99
+ stderr: stderr.trim(),
100
+ durationMs,
101
+ });
102
+ });
103
+ });
104
+ }
105
+ //# sourceMappingURL=cli-runner.js.map