@orchagent/cli 0.3.85 → 0.3.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,334 @@
1
+ "use strict";
2
+ /**
3
+ * Mock Agent Runner — executes managed_loop agents with mocked sub-agent responses.
4
+ *
5
+ * Used by `orch test` to test orchestration chains in CI without live sub-agents.
6
+ * The LLM still runs the full tool-use loop, but custom tool calls return
7
+ * deterministic mock responses instead of calling real sub-agents.
8
+ */
9
+ var __importDefault = (this && this.__importDefault) || function (mod) {
10
+ return (mod && mod.__esModule) ? mod : { "default": mod };
11
+ };
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.validateMockedFixture = validateMockedFixture;
14
+ exports.runMockedAgentFixtureTests = runMockedAgentFixtureTests;
15
+ const promises_1 = __importDefault(require("fs/promises"));
16
+ const path_1 = __importDefault(require("path"));
17
+ const os_1 = __importDefault(require("os"));
18
+ const child_process_1 = require("child_process");
19
+ const chalk_1 = __importDefault(require("chalk"));
20
+ const fast_deep_equal_1 = __importDefault(require("fast-deep-equal"));
21
+ const errors_1 = require("./errors");
22
+ const llm_1 = require("./llm");
23
+ // SDK packages needed by agent_runner.py per provider
24
+ const SDK_PACKAGES = {
25
+ anthropic: 'anthropic',
26
+ openai: 'openai',
27
+ gemini: 'google-genai',
28
+ };
29
+ // ─── Validation ──────────────────────────────────────────────────────────────
30
+ function validateMockedFixture(data, fixturePath, customToolNames) {
31
+ const fileName = path_1.default.basename(fixturePath);
32
+ if (typeof data !== 'object' || data === null) {
33
+ throw new errors_1.CliError(`Invalid fixture ${fileName}: must be a JSON object`);
34
+ }
35
+ const obj = data;
36
+ if (!obj.input || typeof obj.input !== 'object') {
37
+ throw new errors_1.CliError(`Invalid fixture ${fileName}: missing required "input" field`);
38
+ }
39
+ if (!obj.mocks || typeof obj.mocks !== 'object' || Array.isArray(obj.mocks)) {
40
+ throw new errors_1.CliError(`Invalid fixture ${fileName}: "mocks" must be an object mapping tool names to responses`);
41
+ }
42
+ if (!obj.expected_output && !obj.expected_contains) {
43
+ throw new errors_1.CliError(`Invalid fixture ${fileName}: must have "expected_output" or "expected_contains"`);
44
+ }
45
+ // Warn about mock keys that don't match any custom tool
46
+ const mockKeys = Object.keys(obj.mocks);
47
+ const unknownMocks = mockKeys.filter(k => !customToolNames.includes(k));
48
+ if (unknownMocks.length > 0) {
49
+ process.stderr.write(chalk_1.default.yellow(` Warning: ${fileName} mocks unknown tool(s): ${unknownMocks.join(', ')}\n`));
50
+ }
51
+ return data;
52
+ }
53
+ // ─── Runner ──────────────────────────────────────────────────────────────────
54
+ function runCommand(command, args) {
55
+ return new Promise((resolve) => {
56
+ const proc = (0, child_process_1.spawn)(command, args, {
57
+ stdio: ['pipe', 'pipe', 'pipe'],
58
+ shell: true,
59
+ });
60
+ let stdout = '';
61
+ let stderr = '';
62
+ proc.stdout?.on('data', (d) => { stdout += d.toString(); });
63
+ proc.stderr?.on('data', (d) => { stderr += d.toString(); });
64
+ proc.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }));
65
+ proc.on('error', (err) => resolve({ code: 1, stdout, stderr: err.message }));
66
+ });
67
+ }
68
+ async function runAgentWithMocks(tempDir, env, maxTurns, verbose) {
69
+ return new Promise((resolve) => {
70
+ const args = [
71
+ 'agent_runner.py',
72
+ '--max-turns', String(maxTurns),
73
+ '--mock-tools', 'mock_tools.json',
74
+ ];
75
+ if (verbose)
76
+ args.push('--verbose');
77
+ const proc = (0, child_process_1.spawn)('python3', args, {
78
+ cwd: tempDir,
79
+ stdio: ['pipe', 'pipe', 'pipe'],
80
+ env,
81
+ });
82
+ proc.stdin.end();
83
+ let stdout = '';
84
+ let stderr = '';
85
+ proc.stdout?.on('data', (data) => {
86
+ stdout += data.toString();
87
+ });
88
+ proc.stderr?.on('data', (data) => {
89
+ const text = data.toString();
90
+ stderr += text;
91
+ if (verbose) {
92
+ // Filter out heartbeat dots and orchagent events
93
+ for (const line of text.split('\n')) {
94
+ if (line.startsWith('@@ORCHAGENT_EVENT:'))
95
+ continue;
96
+ if (line.trim() === '.' || line.trim() === '')
97
+ continue;
98
+ process.stderr.write(chalk_1.default.gray(` ${line}\n`));
99
+ }
100
+ }
101
+ });
102
+ proc.on('close', (code) => {
103
+ resolve({ exitCode: code ?? 1, stdout, stderr });
104
+ });
105
+ proc.on('error', (err) => {
106
+ resolve({ exitCode: 1, stdout, stderr: err.message });
107
+ });
108
+ });
109
+ }
110
+ // ─── Public API ──────────────────────────────────────────────────────────────
111
+ async function runMockedAgentFixtureTests(agentDir, fixtures, manifest, verbose, config) {
112
+ process.stderr.write(chalk_1.default.blue('\nRunning mocked orchestration tests...\n\n'));
113
+ // Read prompt.md
114
+ let prompt;
115
+ try {
116
+ prompt = await promises_1.default.readFile(path_1.default.join(agentDir, 'prompt.md'), 'utf-8');
117
+ }
118
+ catch {
119
+ throw new errors_1.CliError('prompt.md not found (required for mocked orchestration tests)');
120
+ }
121
+ // Read output schema if available
122
+ let outputSchema;
123
+ try {
124
+ const raw = await promises_1.default.readFile(path_1.default.join(agentDir, 'schema.json'), 'utf-8');
125
+ const schemas = JSON.parse(raw);
126
+ outputSchema = schemas.output;
127
+ }
128
+ catch {
129
+ // Optional
130
+ }
131
+ // Get custom tools from manifest
132
+ const customTools = manifest.loop?.custom_tools ||
133
+ manifest.custom_tools ||
134
+ [];
135
+ const customToolNames = customTools.map((t) => t.name);
136
+ if (customTools.length === 0) {
137
+ process.stderr.write(chalk_1.default.yellow(' Warning: No custom_tools defined — mocks will have no effect\n\n'));
138
+ }
139
+ // Detect LLM key
140
+ const supportedProviders = (manifest.supported_providers || ['any']);
141
+ const detected = await (0, llm_1.detectLlmKey)(supportedProviders, config);
142
+ if (!detected) {
143
+ throw new errors_1.CliError('No LLM key found for mocked orchestration tests.\n' +
144
+ 'Set an environment variable (e.g., ANTHROPIC_API_KEY) or run `orch secrets set <PROVIDER>_API_KEY <key>`');
145
+ }
146
+ const { provider, key, model: serverModel } = detected;
147
+ const model = serverModel ?? (0, llm_1.getDefaultModel)(provider);
148
+ const apiKeyEnvVar = llm_1.PROVIDER_ENV_VARS[provider];
149
+ // Check Python 3 available
150
+ try {
151
+ const { code } = await runCommand('python3', ['--version']);
152
+ if (code !== 0)
153
+ throw new Error();
154
+ }
155
+ catch {
156
+ throw new errors_1.CliError('Python 3 is required for mocked orchestration tests.\n' +
157
+ 'Install Python 3: https://python.org/downloads');
158
+ }
159
+ // Check LLM SDK installed
160
+ const sdkPackage = SDK_PACKAGES[provider] || 'anthropic';
161
+ const sdkImportName = provider === 'gemini' ? 'google.genai' : sdkPackage;
162
+ try {
163
+ const { code } = await runCommand('python3', ['-c', `import ${sdkImportName}`]);
164
+ if (code !== 0) {
165
+ process.stderr.write(` Installing ${sdkPackage} Python SDK...\n`);
166
+ const install = await runCommand('python3', ['-m', 'pip', 'install', '-q', sdkPackage]);
167
+ if (install.code !== 0) {
168
+ throw new errors_1.CliError(`Failed to install ${sdkPackage} SDK. Install manually: pip install ${sdkPackage}`);
169
+ }
170
+ }
171
+ }
172
+ catch (err) {
173
+ if (err instanceof errors_1.CliError)
174
+ throw err;
175
+ throw new errors_1.CliError(`Failed to check Python SDK: ${err}`);
176
+ }
177
+ // Find agent_runner.py
178
+ const runnerPaths = [
179
+ path_1.default.join(__dirname, '..', 'resources', 'agent_runner.py'),
180
+ path_1.default.join(__dirname, '..', '..', 'src', 'resources', 'agent_runner.py'),
181
+ ];
182
+ let runnerContent;
183
+ for (const p of runnerPaths) {
184
+ try {
185
+ runnerContent = await promises_1.default.readFile(p, 'utf-8');
186
+ break;
187
+ }
188
+ catch {
189
+ continue;
190
+ }
191
+ }
192
+ if (!runnerContent) {
193
+ throw new errors_1.CliError('Agent runner script not found. Reinstall the CLI: npm install -g @orchagent/cli');
194
+ }
195
+ const maxTurns = manifest.max_turns ??
196
+ manifest.loop?.max_turns ?? 25;
197
+ process.stderr.write(` Provider: ${provider} (${model})\n`);
198
+ process.stderr.write(` Custom tools: ${customToolNames.join(', ') || '(none)'}\n`);
199
+ process.stderr.write(` Max turns: ${maxTurns}\n\n`);
200
+ let passed = 0;
201
+ let failed = 0;
202
+ for (const fixturePath of fixtures) {
203
+ const fixtureName = path_1.default.basename(fixturePath);
204
+ const raw = await promises_1.default.readFile(fixturePath, 'utf-8');
205
+ let parsed;
206
+ try {
207
+ parsed = JSON.parse(raw);
208
+ }
209
+ catch (e) {
210
+ process.stderr.write(chalk_1.default.red(` ${fixtureName}: ERROR\n`));
211
+ process.stderr.write(chalk_1.default.red(` Invalid JSON: ${e.message}\n`));
212
+ failed++;
213
+ continue;
214
+ }
215
+ const desc = parsed.description;
216
+ process.stderr.write(` ${fixtureName}${desc ? ` (${desc})` : ''}: `);
217
+ let fixture;
218
+ try {
219
+ fixture = validateMockedFixture(parsed, fixturePath, customToolNames);
220
+ }
221
+ catch (err) {
222
+ process.stderr.write(chalk_1.default.red('ERROR\n'));
223
+ process.stderr.write(chalk_1.default.red(` ${err.message}\n`));
224
+ failed++;
225
+ continue;
226
+ }
227
+ // Create temp dir for this fixture
228
+ const tempDir = path_1.default.join(os_1.default.tmpdir(), `orchagent-mock-test-${Date.now()}`);
229
+ await promises_1.default.mkdir(tempDir, { recursive: true });
230
+ try {
231
+ // Write all files the agent runner needs
232
+ await Promise.all([
233
+ promises_1.default.writeFile(path_1.default.join(tempDir, 'agent_runner.py'), runnerContent),
234
+ promises_1.default.writeFile(path_1.default.join(tempDir, 'prompt.md'), prompt),
235
+ promises_1.default.writeFile(path_1.default.join(tempDir, 'input.json'), JSON.stringify(fixture.input, null, 2)),
236
+ promises_1.default.writeFile(path_1.default.join(tempDir, 'mock_tools.json'), JSON.stringify(fixture.mocks)),
237
+ customTools.length > 0
238
+ ? promises_1.default.writeFile(path_1.default.join(tempDir, 'custom_tools.json'), JSON.stringify(customTools))
239
+ : Promise.resolve(),
240
+ outputSchema
241
+ ? promises_1.default.writeFile(path_1.default.join(tempDir, 'output_schema.json'), JSON.stringify(outputSchema))
242
+ : Promise.resolve(),
243
+ ]);
244
+ // Build env
245
+ const subprocessEnv = { ...process.env };
246
+ subprocessEnv.LOCAL_MODE = '1';
247
+ subprocessEnv.LLM_PROVIDER = provider;
248
+ subprocessEnv.LLM_MODEL = model;
249
+ if (apiKeyEnvVar && key) {
250
+ subprocessEnv[apiKeyEnvVar] = key;
251
+ }
252
+ // Run the agent loop with mocked tools
253
+ const result = await runAgentWithMocks(tempDir, subprocessEnv, maxTurns, verbose);
254
+ if (result.exitCode !== 0 || !result.stdout.trim()) {
255
+ process.stderr.write(chalk_1.default.red('ERROR\n'));
256
+ if (result.stdout.trim()) {
257
+ try {
258
+ const errJson = JSON.parse(result.stdout.trim());
259
+ if (errJson.error) {
260
+ process.stderr.write(chalk_1.default.red(` ${errJson.error}\n`));
261
+ }
262
+ }
263
+ catch {
264
+ process.stderr.write(chalk_1.default.red(` Agent exited with code ${result.exitCode}\n`));
265
+ }
266
+ }
267
+ else {
268
+ process.stderr.write(chalk_1.default.red(` Agent exited with code ${result.exitCode} (no output)\n`));
269
+ }
270
+ failed++;
271
+ continue;
272
+ }
273
+ // Parse output
274
+ let output;
275
+ try {
276
+ output = JSON.parse(result.stdout.trim());
277
+ }
278
+ catch {
279
+ process.stderr.write(chalk_1.default.red('ERROR\n'));
280
+ process.stderr.write(chalk_1.default.red(` Agent output is not valid JSON\n`));
281
+ if (verbose) {
282
+ process.stderr.write(chalk_1.default.gray(` stdout: ${result.stdout.trim().slice(0, 200)}\n`));
283
+ }
284
+ failed++;
285
+ continue;
286
+ }
287
+ // Validate against expectations
288
+ let testPassed = true;
289
+ const failures = [];
290
+ if (fixture.expected_output) {
291
+ if (!(0, fast_deep_equal_1.default)(output, fixture.expected_output)) {
292
+ testPassed = false;
293
+ failures.push(`Expected: ${JSON.stringify(fixture.expected_output, null, 2)}\n` +
294
+ ` Got: ${JSON.stringify(output, null, 2)}`);
295
+ }
296
+ }
297
+ if (fixture.expected_contains) {
298
+ const outputStr = JSON.stringify(output);
299
+ for (const expected of fixture.expected_contains) {
300
+ if (!outputStr.includes(expected)) {
301
+ testPassed = false;
302
+ failures.push(`Expected output to contain: "${expected}"`);
303
+ }
304
+ }
305
+ }
306
+ if (testPassed) {
307
+ process.stderr.write(chalk_1.default.green('PASS\n'));
308
+ passed++;
309
+ if (verbose) {
310
+ process.stderr.write(chalk_1.default.gray(` Input: ${JSON.stringify(fixture.input)}\n`));
311
+ process.stderr.write(chalk_1.default.gray(` Output: ${JSON.stringify(output)}\n`));
312
+ }
313
+ }
314
+ else {
315
+ process.stderr.write(chalk_1.default.red('FAIL\n'));
316
+ failed++;
317
+ for (const f of failures) {
318
+ process.stderr.write(chalk_1.default.red(` ${f}\n`));
319
+ }
320
+ }
321
+ }
322
+ finally {
323
+ try {
324
+ await promises_1.default.rm(tempDir, { recursive: true, force: true });
325
+ }
326
+ catch {
327
+ // Ignore cleanup errors
328
+ }
329
+ }
330
+ }
331
+ process.stderr.write('\n');
332
+ process.stderr.write(`Mocked orchestration tests: ${passed} passed, ${failed} failed\n`);
333
+ return failed > 0 ? 1 : 0;
334
+ }
@@ -141,7 +141,7 @@ function printUpdateNotification() {
141
141
  const current = package_json_1.default.version;
142
142
  if (isNewer(cachedLatest, current)) {
143
143
  process.stderr.write(`\nUpdate available: v${current} → v${cachedLatest}\n` +
144
- `Run \`npm update -g @orchagent/cli\` to update\n`);
144
+ `Run \`npm install -g @orchagent/cli@latest\` to update\n`);
145
145
  }
146
146
  }
147
147
  catch {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@orchagent/cli",
3
- "version": "0.3.85",
3
+ "version": "0.3.87",
4
4
  "description": "Command-line interface for orchagent — deploy and run AI agents for your team",
5
5
  "license": "MIT",
6
6
  "author": "orchagent <hello@orchagent.io>",
@@ -352,10 +352,14 @@ def execute_custom_tool(command_template, params):
352
352
  return execute_bash(command)
353
353
 
354
354
 
355
- def dispatch_tool(tool_name, tool_input, custom_tools_config):
355
+ def dispatch_tool(tool_name, tool_input, custom_tools_config, mock_tools=None):
356
356
  """
357
357
  Dispatch a tool call. Returns (result_text, is_submit).
358
358
  is_submit is True only when tool_name == "submit_result".
359
+
360
+ When mock_tools is provided, custom tools with matching names return
361
+ the mock response instead of executing the real command. Built-in
362
+ tools (bash, read_file, etc.) are never mocked.
359
363
  """
360
364
  if tool_name == "bash":
361
365
  return execute_bash(tool_input.get("command", "")), False
@@ -374,6 +378,12 @@ def dispatch_tool(tool_name, tool_input, custom_tools_config):
374
378
  elif tool_name == "submit_result":
375
379
  return json.dumps(tool_input), True
376
380
  else:
381
+ # Check mock_tools first — return mock response if available
382
+ if mock_tools and tool_name in mock_tools:
383
+ mock_response = mock_tools[tool_name]
384
+ if isinstance(mock_response, str):
385
+ return mock_response, False
386
+ return json.dumps(mock_response), False
377
387
  for ct in custom_tools_config:
378
388
  if ct["name"] == tool_name:
379
389
  return execute_custom_tool(ct["command"], tool_input), False
@@ -685,10 +695,27 @@ def main():
685
695
  parser = argparse.ArgumentParser()
686
696
  parser.add_argument("--max-turns", type=int, default=25)
687
697
  parser.add_argument("--verbose", action="store_true", help="Log tool calls to stderr")
698
+ parser.add_argument("--mock-tools", type=str, default=None,
699
+ help="Path to JSON file mapping tool names to mock responses")
688
700
  args = parser.parse_args()
689
701
 
690
702
  _VERBOSE = args.verbose
691
703
 
704
+ # Load mock tool responses if provided (for testing orchestration chains)
705
+ mock_tools = {}
706
+ if args.mock_tools:
707
+ try:
708
+ with open(args.mock_tools, "r") as f:
709
+ mock_tools = json.load(f)
710
+ if _VERBOSE:
711
+ print("[agent] Loaded %d mock tool(s): %s" % (
712
+ len(mock_tools), ", ".join(mock_tools.keys())
713
+ ), file=sys.stderr, flush=True)
714
+ except FileNotFoundError:
715
+ error_exit("Mock tools file not found: %s" % args.mock_tools)
716
+ except json.JSONDecodeError as e:
717
+ error_exit("Invalid JSON in mock tools file: %s" % e)
718
+
692
719
  with open("prompt.md", "r") as f:
693
720
  author_prompt = f.read()
694
721
 
@@ -761,7 +788,7 @@ def main():
761
788
  for call_id, name, input_args in provider.extract_tool_calls(response):
762
789
  verbose_log(name, input_args)
763
790
  emit_event("tool_call", turn=turn + 1, tool=name, args_brief=_brief_args(name, input_args))
764
- result_text, is_submit = dispatch_tool(name, input_args, custom_tools_config)
791
+ result_text, is_submit = dispatch_tool(name, input_args, custom_tools_config, mock_tools)
765
792
  emit_event("tool_result", turn=turn + 1, tool=name, status="error" if result_text.startswith("[ERROR]") else "ok")
766
793
 
767
794
  if is_submit: