@orchagent/cli 0.3.62 → 0.3.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,11 +12,12 @@ const yaml_1 = __importDefault(require("yaml"));
12
12
  const fast_deep_equal_1 = __importDefault(require("fast-deep-equal"));
13
13
  const chokidar_1 = __importDefault(require("chokidar"));
14
14
  const errors_1 = require("../lib/errors");
15
+ const output_1 = require("../lib/output");
16
+ const dotenv_1 = require("../lib/dotenv");
15
17
  const config_1 = require("../lib/config");
16
18
  const llm_1 = require("../lib/llm");
17
- /**
18
- * Validate a fixture and return helpful errors
19
- */
19
+ const bundle_1 = require("../lib/bundle");
20
+ // ─── Utility functions ───────────────────────────────────────────────────────
20
21
  function validateFixture(data, fixturePath) {
21
22
  const fileName = path_1.default.basename(fixturePath);
22
23
  if (typeof data !== 'object' || data === null) {
@@ -35,9 +36,6 @@ function validateFixture(data, fixturePath) {
35
36
  }
36
37
  return data;
37
38
  }
38
- /**
39
- * Parse SKILL.md frontmatter
40
- */
41
39
  async function parseSkillMd(filePath) {
42
40
  try {
43
41
  const content = await promises_1.default.readFile(filePath, 'utf-8');
@@ -55,7 +53,8 @@ async function parseSkillMd(filePath) {
55
53
  }
56
54
  }
57
55
  /**
58
- * Run a command and return the result
56
+ * Run a shell command (for test runners like pytest/vitest)
57
+ * Uses shell: true because test runner commands may need PATH resolution
59
58
  */
60
59
  function runCommand(command, args, cwd, verbose) {
61
60
  return new Promise((resolve) => {
@@ -87,9 +86,6 @@ function runCommand(command, args, cwd, verbose) {
87
86
  });
88
87
  });
89
88
  }
90
- /**
91
- * Check if a command exists
92
- */
93
89
  async function commandExists(command) {
94
90
  const isWindows = process.platform === 'win32';
95
91
  const checker = isWindows ? 'where' : 'which';
@@ -104,9 +100,6 @@ async function commandExists(command) {
104
100
  return false;
105
101
  }
106
102
  }
107
- /**
108
- * Check if a file exists
109
- */
110
103
  async function fileExists(filePath) {
111
104
  try {
112
105
  await promises_1.default.access(filePath);
@@ -116,62 +109,278 @@ async function fileExists(filePath) {
116
109
  return false;
117
110
  }
118
111
  }
119
- /**
120
- * Detect the agent type from the directory structure
121
- */
122
- async function detectAgentType(agentDir) {
123
- // Check for SKILL.md first
124
- if (await fileExists(path_1.default.join(agentDir, 'SKILL.md'))) {
125
- return 'skill';
112
+ function extractTemplateVariables(template) {
113
+ const seen = new Set();
114
+ const result = [];
115
+ const pattern = /\{\{(\w+)\}\}/g;
116
+ let match;
117
+ while ((match = pattern.exec(template)) !== null) {
118
+ if (!seen.has(match[1])) {
119
+ seen.add(match[1]);
120
+ result.push(match[1]);
121
+ }
126
122
  }
127
- // Check for prompt.md (prompt agent)
128
- if (await fileExists(path_1.default.join(agentDir, 'prompt.md'))) {
129
- return 'prompt';
123
+ return result;
124
+ }
125
+ // ─── Validation ──────────────────────────────────────────────────────────────
126
+ function inferEngine(manifest, rawType) {
127
+ const hasRuntimeCommand = Boolean(manifest.runtime?.command?.trim());
128
+ const hasLoop = Boolean(manifest.loop && Object.keys(manifest.loop).length > 0);
129
+ if (hasRuntimeCommand)
130
+ return 'code_runtime';
131
+ if (hasLoop)
132
+ return 'managed_loop';
133
+ if (rawType === 'tool' || rawType === 'code')
134
+ return 'code_runtime';
135
+ if (rawType === 'agentic')
136
+ return 'managed_loop';
137
+ if (rawType === 'agent') {
138
+ if (manifest.custom_tools?.length || manifest.max_turns)
139
+ return 'managed_loop';
140
+ return 'managed_loop';
130
141
  }
131
- // Check for orchagent.json
142
+ return 'direct_llm';
143
+ }
144
+ function engineLabel(engine) {
145
+ switch (engine) {
146
+ case 'direct_llm': return 'prompt';
147
+ case 'managed_loop': return 'agent loop';
148
+ case 'code_runtime': return 'code runtime';
149
+ }
150
+ }
151
+ async function validateAgent(agentDir) {
152
+ const msgs = [];
153
+ const err = (text) => msgs.push({ level: 'error', text });
154
+ const warn = (text) => msgs.push({ level: 'warning', text });
155
+ const info = (text) => msgs.push({ level: 'info', text });
156
+ // Check for SKILL.md (skills are a separate path)
157
+ const skillPath = path_1.default.join(agentDir, 'SKILL.md');
158
+ const skillData = await parseSkillMd(skillPath);
159
+ if (skillData) {
160
+ info(`Skill: ${skillData.frontmatter.name}`);
161
+ if (!skillData.frontmatter.description) {
162
+ err('SKILL.md frontmatter missing "description"');
163
+ }
164
+ return { messages: msgs, isSkill: true, agentName: skillData.frontmatter.name };
165
+ }
166
+ // If SKILL.md exists but is invalid
167
+ if (await fileExists(skillPath)) {
168
+ warn('SKILL.md found but has invalid frontmatter (needs name + description in YAML)');
169
+ }
170
+ // Read orchagent.json
132
171
  const manifestPath = path_1.default.join(agentDir, 'orchagent.json');
133
- if (await fileExists(manifestPath)) {
172
+ let manifest;
173
+ try {
174
+ const raw = await promises_1.default.readFile(manifestPath, 'utf-8');
175
+ manifest = JSON.parse(raw);
176
+ }
177
+ catch (e) {
178
+ if (e.code === 'ENOENT') {
179
+ err('orchagent.json not found — create one with: orch init');
180
+ }
181
+ else {
182
+ err(`orchagent.json is not valid JSON: ${e.message}`);
183
+ }
184
+ return { messages: msgs, isSkill: false };
185
+ }
186
+ // Name
187
+ if (!manifest.name) {
188
+ err("'name' field is required in orchagent.json");
189
+ }
190
+ // Type
191
+ const rawType = (manifest.type || 'agent').trim().toLowerCase();
192
+ const validTypes = ['prompt', 'tool', 'agent', 'skill', 'agentic', 'code'];
193
+ if (!validTypes.includes(rawType)) {
194
+ err(`Invalid type '${manifest.type}' — use: prompt, tool, or agent`);
195
+ }
196
+ if (rawType === 'agentic')
197
+ warn("Type 'agentic' is deprecated — use 'agent'");
198
+ if (rawType === 'code')
199
+ warn("Type 'code' is deprecated — use 'tool'");
200
+ if (rawType === 'skill')
201
+ err("Skills should use SKILL.md, not orchagent.json with type='skill'");
202
+ // Engine inference
203
+ const hasRuntimeCommand = Boolean(manifest.runtime?.command?.trim());
204
+ const hasLoop = Boolean(manifest.loop && Object.keys(manifest.loop).length > 0);
205
+ if (hasRuntimeCommand && hasLoop) {
206
+ err('runtime.command and loop cannot both be set — choose one execution model');
207
+ }
208
+ const engine = inferEngine(manifest, rawType);
209
+ // run_mode
210
+ const runMode = (manifest.run_mode || 'on_demand').trim().toLowerCase();
211
+ if (runMode !== 'on_demand' && runMode !== 'always_on') {
212
+ err("run_mode must be 'on_demand' or 'always_on'");
213
+ }
214
+ if (runMode === 'always_on' && engine === 'direct_llm') {
215
+ err('run_mode=always_on requires runtime.command or loop configuration');
216
+ }
217
+ // File structure: prompt.md
218
+ const hasPrompt = await fileExists(path_1.default.join(agentDir, 'prompt.md'));
219
+ if ((engine === 'direct_llm' || engine === 'managed_loop') && !hasPrompt) {
220
+ const label = engine === 'direct_llm' ? 'prompt agents' : 'agent-type agents';
221
+ err(`prompt.md not found (required for ${label})`);
222
+ }
223
+ // File structure: entrypoint for code_runtime
224
+ let entrypoint;
225
+ if (engine === 'code_runtime') {
226
+ entrypoint = manifest.entrypoint || await (0, bundle_1.detectEntrypoint)(agentDir) || undefined;
227
+ if (!entrypoint && !manifest.runtime?.command && !manifest.source_url) {
228
+ err('No entrypoint found — create main.py, app.py, or set entrypoint in orchagent.json');
229
+ }
230
+ else if (entrypoint && !await fileExists(path_1.default.join(agentDir, entrypoint))) {
231
+ err(`Entrypoint '${entrypoint}' declared but file not found`);
232
+ }
233
+ }
234
+ // schema.json validity
235
+ const schemaPath = path_1.default.join(agentDir, 'schema.json');
236
+ const hasSchema = await fileExists(schemaPath);
237
+ if (hasSchema) {
134
238
  try {
135
- const raw = await promises_1.default.readFile(manifestPath, 'utf-8');
136
- const manifest = JSON.parse(raw);
137
- if (manifest.type === 'prompt')
138
- return 'prompt';
139
- if (manifest.type === 'skill')
140
- return 'skill';
141
- if (manifest.type === 'tool') {
142
- // Detect language
143
- if (await fileExists(path_1.default.join(agentDir, 'requirements.txt')))
144
- return 'code-python';
145
- if (await fileExists(path_1.default.join(agentDir, 'pyproject.toml')))
146
- return 'code-python';
147
- if (await fileExists(path_1.default.join(agentDir, 'package.json')))
148
- return 'code-js';
149
- // Default to Python for tool agents
150
- return 'code-python';
239
+ const raw = await promises_1.default.readFile(schemaPath, 'utf-8');
240
+ JSON.parse(raw);
241
+ }
242
+ catch {
243
+ err('schema.json contains invalid JSON');
244
+ }
245
+ }
246
+ // Deprecated fields
247
+ if (manifest.prompt) {
248
+ warn("'prompt' field in orchagent.json is ignored — use prompt.md file");
249
+ }
250
+ if (manifest.input_schema && hasSchema) {
251
+ warn("'input_schema' in orchagent.json is ignored — schema.json takes priority");
252
+ }
253
+ if (manifest.output_schema && hasSchema) {
254
+ warn("'output_schema' in orchagent.json is ignored — schema.json takes priority");
255
+ }
256
+ // Misplaced manifest fields (common error: dependencies at top level instead of under manifest)
257
+ const orchestrationFields = ['manifest_version', 'dependencies', 'max_hops', 'timeout_ms', 'per_call_downstream_cap'];
258
+ const misplaced = orchestrationFields.filter(f => (f in manifest) && !manifest.manifest);
259
+ if (misplaced.length > 0) {
260
+ err(`Orchestration fields (${misplaced.join(', ')}) must be nested under a "manifest" key`);
261
+ }
262
+ // required_secrets
263
+ if (manifest.required_secrets !== undefined) {
264
+ if (!Array.isArray(manifest.required_secrets)) {
265
+ err('required_secrets must be an array of strings');
266
+ }
267
+ else {
268
+ if (manifest.required_secrets.includes('ORCHAGENT_SERVICE_KEY')) {
269
+ warn('ORCHAGENT_SERVICE_KEY in required_secrets is not needed — the gateway auto-injects it for orchestrator agents');
270
+ }
271
+ // Check if secrets are available in local environment
272
+ const missingSecrets = manifest.required_secrets.filter(s => !process.env[s]);
273
+ if (missingSecrets.length > 0) {
274
+ warn(`Required secrets not in local environment: ${missingSecrets.join(', ')} — fixture tests may fail`);
275
+ }
276
+ }
277
+ }
278
+ // requirements.txt: orchagent vs orchagent-sdk
279
+ const reqPath = path_1.default.join(agentDir, 'requirements.txt');
280
+ if (await fileExists(reqPath)) {
281
+ try {
282
+ const reqContent = await promises_1.default.readFile(reqPath, 'utf-8');
283
+ if (/^orchagent\b/m.test(reqContent) && !/^orchagent-sdk\b/m.test(reqContent)) {
284
+ warn("requirements.txt has 'orchagent' — did you mean 'orchagent-sdk'?");
151
285
  }
152
286
  }
153
287
  catch {
154
- // Invalid manifest, continue detection
288
+ // Can't read, skip
155
289
  }
156
290
  }
157
- // Fallback: detect by file presence
158
- if (await fileExists(path_1.default.join(agentDir, 'requirements.txt')))
159
- return 'code-python';
160
- if (await fileExists(path_1.default.join(agentDir, 'pyproject.toml')))
161
- return 'code-python';
162
- if (await fileExists(path_1.default.join(agentDir, 'package.json')))
163
- return 'code-js';
164
- return 'unknown';
291
+ // custom_tools validation
292
+ if (engine === 'managed_loop' && manifest.custom_tools) {
293
+ const reservedNames = new Set(['bash', 'read_file', 'write_file', 'list_files', 'submit_result']);
294
+ const seenNames = new Set();
295
+ for (const tool of manifest.custom_tools) {
296
+ if (!tool.name || !tool.command) {
297
+ err(`Custom tool missing 'name' or 'command': ${JSON.stringify(tool)}`);
298
+ }
299
+ if (tool.name && reservedNames.has(tool.name)) {
300
+ err(`Custom tool '${tool.name}' conflicts with built-in tool name`);
301
+ }
302
+ if (tool.name && seenNames.has(tool.name)) {
303
+ err(`Duplicate custom tool name: '${tool.name}'`);
304
+ }
305
+ if (tool.name)
306
+ seenNames.add(tool.name);
307
+ }
308
+ }
309
+ // max_turns range
310
+ if (manifest.max_turns !== undefined) {
311
+ if (typeof manifest.max_turns !== 'number' || manifest.max_turns < 1 || manifest.max_turns > 50) {
312
+ err('max_turns must be a number between 1 and 50');
313
+ }
314
+ }
315
+ // Template variable mismatch (prompt.md vars vs schema.json)
316
+ if ((engine === 'direct_llm' || engine === 'managed_loop') && hasPrompt && hasSchema) {
317
+ try {
318
+ const prompt = await promises_1.default.readFile(path_1.default.join(agentDir, 'prompt.md'), 'utf-8');
319
+ const schemaRaw = await promises_1.default.readFile(schemaPath, 'utf-8');
320
+ const schemas = JSON.parse(schemaRaw);
321
+ const templateVars = extractTemplateVariables(prompt);
322
+ if (templateVars.length > 0 && schemas.input?.properties) {
323
+ const schemaProps = Object.keys(schemas.input.properties);
324
+ const missing = templateVars.filter(v => !schemaProps.includes(v));
325
+ if (missing.length > 0) {
326
+ warn(`prompt.md uses {{${missing.join('}}, {{')}}} but schema.json doesn't define ${missing.length === 1 ? 'it' : 'them'}`);
327
+ }
328
+ }
329
+ }
330
+ catch {
331
+ // Already caught above
332
+ }
333
+ }
334
+ return {
335
+ messages: msgs,
336
+ executionEngine: engine,
337
+ entrypoint,
338
+ agentName: manifest.name || undefined,
339
+ agentType: rawType,
340
+ isSkill: false,
341
+ };
165
342
  }
166
- /**
167
- * Recursively walk a directory and return all files
168
- */
343
+ function printValidation(validation) {
344
+ const errors = validation.messages.filter(m => m.level === 'error');
345
+ const warnings = validation.messages.filter(m => m.level === 'warning');
346
+ const infos = validation.messages.filter(m => m.level === 'info');
347
+ process.stderr.write(chalk_1.default.bold('\nValidating agent...\n'));
348
+ // Agent summary line
349
+ if (validation.isSkill) {
350
+ process.stderr.write(` ${chalk_1.default.bold('Type:')} skill\n`);
351
+ }
352
+ else if (validation.agentType && validation.executionEngine) {
353
+ process.stderr.write(` ${chalk_1.default.bold('Type:')} ${validation.agentType} (${engineLabel(validation.executionEngine)})\n`);
354
+ if (validation.entrypoint) {
355
+ process.stderr.write(` ${chalk_1.default.bold('Entrypoint:')} ${validation.entrypoint}\n`);
356
+ }
357
+ }
358
+ if (validation.agentName) {
359
+ process.stderr.write(` ${chalk_1.default.bold('Name:')} ${validation.agentName}\n`);
360
+ }
361
+ process.stderr.write('\n');
362
+ // Messages
363
+ for (const msg of errors) {
364
+ process.stderr.write(chalk_1.default.red(` ✗ ${msg.text}\n`));
365
+ }
366
+ for (const msg of warnings) {
367
+ process.stderr.write(chalk_1.default.yellow(` ⚠ ${msg.text}\n`));
368
+ }
369
+ for (const msg of infos) {
370
+ process.stderr.write(chalk_1.default.gray(` ℹ ${msg.text}\n`));
371
+ }
372
+ if (errors.length === 0) {
373
+ process.stderr.write(chalk_1.default.green(' ✓ Configuration valid\n'));
374
+ }
375
+ process.stderr.write('\n');
376
+ return errors.length === 0;
377
+ }
378
+ // ─── Test discovery ──────────────────────────────────────────────────────────
169
379
  async function walkDir(dir, files = []) {
170
380
  try {
171
381
  const entries = await promises_1.default.readdir(dir, { withFileTypes: true });
172
382
  for (const entry of entries) {
173
383
  const fullPath = path_1.default.join(dir, entry.name);
174
- // Skip common non-source directories
175
384
  if (entry.isDirectory()) {
176
385
  if (['node_modules', '__pycache__', '.git', 'dist', 'build', '.venv', 'venv'].includes(entry.name)) {
177
386
  continue;
@@ -188,16 +397,12 @@ async function walkDir(dir, files = []) {
188
397
  }
189
398
  return files;
190
399
  }
191
- /**
192
- * Discover test files in the agent directory
193
- */
194
400
  async function discoverTests(agentDir) {
195
401
  const result = {
196
402
  python: [],
197
403
  javascript: [],
198
404
  fixtures: [],
199
405
  };
200
- // Get all files recursively
201
406
  const allFiles = await walkDir(agentDir);
202
407
  for (const file of allFiles) {
203
408
  const basename = path_1.default.basename(file);
@@ -213,7 +418,7 @@ async function discoverTests(agentDir) {
213
418
  basename.endsWith('.spec.ts') || basename.endsWith('.spec.js')) {
214
419
  result.javascript.push(file);
215
420
  }
216
- // Fixture patterns: tests/fixture*.json or fixture*.json in tests/ subdirs
421
+ // Fixture patterns: tests/fixture*.json
217
422
  if (basename.endsWith('.json') && basename.startsWith('fixture')) {
218
423
  if (relPath.includes('tests' + path_1.default.sep) || relPath.startsWith('tests' + path_1.default.sep)) {
219
424
  result.fixtures.push(file);
@@ -222,19 +427,15 @@ async function discoverTests(agentDir) {
222
427
  }
223
428
  return result;
224
429
  }
225
- /**
226
- * Run Python tests using pytest
227
- */
430
+ // ─── Test runners ────────────────────────────────────────────────────────────
228
431
  async function runPythonTests(agentDir, verbose) {
229
432
  process.stderr.write(chalk_1.default.blue('\nRunning Python tests...\n\n'));
230
- // Check if pytest is available directly
231
433
  const hasPytest = await commandExists('pytest');
232
434
  if (hasPytest) {
233
435
  const args = verbose ? ['-v'] : [];
234
436
  const { code } = await runCommand('pytest', args, agentDir, verbose);
235
437
  return code;
236
438
  }
237
- // Try Python commands in order of preference
238
439
  const pythonCommands = process.platform === 'win32'
239
440
  ? ['python', 'py', 'python3']
240
441
  : ['python3', 'python'];
@@ -250,12 +451,8 @@ async function runPythonTests(agentDir, verbose) {
250
451
  process.stderr.write(chalk_1.default.red('No Python interpreter found. Install Python and pytest.\n'));
251
452
  return 1;
252
453
  }
253
- /**
254
- * Run JavaScript/TypeScript tests
255
- */
256
454
  async function runJsTests(agentDir, verbose) {
257
455
  process.stderr.write(chalk_1.default.blue('\nRunning JavaScript/TypeScript tests...\n\n'));
258
- // Check for vitest first
259
456
  const hasVitest = await fileExists(path_1.default.join(agentDir, 'node_modules', '.bin', 'vitest'));
260
457
  if (hasVitest) {
261
458
  const args = ['run'];
@@ -264,7 +461,6 @@ async function runJsTests(agentDir, verbose) {
264
461
  const { code } = await runCommand('npx', ['vitest', ...args], agentDir, verbose);
265
462
  return code;
266
463
  }
267
- // Fall back to npm test
268
464
  const packageJsonPath = path_1.default.join(agentDir, 'package.json');
269
465
  if (await fileExists(packageJsonPath)) {
270
466
  try {
@@ -283,15 +479,14 @@ async function runJsTests(agentDir, verbose) {
283
479
  return 1;
284
480
  }
285
481
  /**
286
- * Run fixture-based tests for prompt agents
482
+ * Run fixture tests for prompt/skill/managed_loop agents using LLM calls
287
483
  */
288
- async function runFixtureTests(agentDir, fixtures, verbose, config) {
484
+ async function runPromptFixtureTests(agentDir, fixtures, verbose, config) {
289
485
  process.stderr.write(chalk_1.default.blue('\nRunning fixture tests...\n\n'));
290
486
  // Read prompt
291
487
  let prompt;
292
488
  const promptPath = path_1.default.join(agentDir, 'prompt.md');
293
489
  const skillPath = path_1.default.join(agentDir, 'SKILL.md');
294
- // Check if this is a skill
295
490
  const skillData = await parseSkillMd(skillPath);
296
491
  if (skillData) {
297
492
  prompt = skillData.body;
@@ -327,7 +522,8 @@ async function runFixtureTests(agentDir, fixtures, verbose, config) {
327
522
  let failed = 0;
328
523
  for (const fixturePath of fixtures) {
329
524
  const fixtureName = path_1.default.basename(fixturePath);
330
- process.stderr.write(` ${fixtureName}: `);
525
+ const description = await getFixtureDescription(fixturePath);
526
+ process.stderr.write(` ${fixtureName}${description ? ` (${description})` : ''}: `);
331
527
  try {
332
528
  const raw = await promises_1.default.readFile(fixturePath, 'utf-8');
333
529
  let parsed;
@@ -338,10 +534,8 @@ async function runFixtureTests(agentDir, fixtures, verbose, config) {
338
534
  throw new errors_1.CliError(`Invalid JSON in ${path_1.default.basename(fixturePath)}: ${e.message}`);
339
535
  }
340
536
  const fixture = validateFixture(parsed, fixturePath);
341
- // Build and call LLM
342
537
  const fullPrompt = (0, llm_1.buildPrompt)(prompt, fixture.input);
343
538
  const result = await (0, llm_1.callLlm)(provider, key, model, fullPrompt, outputSchema);
344
- // Validate result
345
539
  let testPassed = true;
346
540
  const failures = [];
347
541
  if (fixture.expected_output) {
@@ -351,7 +545,6 @@ async function runFixtureTests(agentDir, fixtures, verbose, config) {
351
545
  }
352
546
  }
353
547
  if (fixture.expected_contains) {
354
- // Check if output contains expected strings
355
548
  const resultStr = JSON.stringify(result);
356
549
  for (const expected of fixture.expected_contains) {
357
550
  if (!resultStr.includes(expected)) {
@@ -387,73 +580,230 @@ async function runFixtureTests(agentDir, fixtures, verbose, config) {
387
580
  process.stderr.write(`Fixtures: ${passed} passed, ${failed} failed\n`);
388
581
  return failed > 0 ? 1 : 0;
389
582
  }
583
+ async function getFixtureDescription(fixturePath) {
584
+ try {
585
+ const raw = await promises_1.default.readFile(fixturePath, 'utf-8');
586
+ const data = JSON.parse(raw);
587
+ return data.description || null;
588
+ }
589
+ catch {
590
+ return null;
591
+ }
592
+ }
390
593
  /**
391
- * Watch mode: re-run tests on file changes
594
+ * Run a code_runtime entrypoint with JSON input on stdin, capture JSON output.
595
+ * Uses spawn with array args (no shell) to avoid injection risks.
392
596
  */
393
- async function watchTests(agentDir, agentType, verbose, config) {
394
- process.stderr.write(chalk_1.default.cyan('\nWatching for file changes... (press Ctrl+C to exit)\n\n'));
395
- const runTests = async () => {
396
- process.stderr.write(chalk_1.default.dim(`\n[${new Date().toLocaleTimeString()}] Running tests...\n`));
397
- // Re-discover tests each time to pick up new files
398
- const testFiles = await discoverTests(agentDir);
399
- await executeTests(agentDir, agentType, testFiles, verbose, config);
400
- };
401
- // Initial run
402
- await runTests();
403
- // Set up chokidar watcher
404
- let debounceTimer = null;
405
- const onChange = (filePath) => {
406
- if (debounceTimer)
407
- clearTimeout(debounceTimer);
408
- if (verbose) {
409
- process.stderr.write(chalk_1.default.dim(` Changed: ${path_1.default.relative(agentDir, filePath)}\n`));
410
- }
411
- debounceTimer = setTimeout(runTests, 300);
412
- };
413
- const watcher = chokidar_1.default.watch(agentDir, {
414
- ignored: /(node_modules|__pycache__|\.git|dist|build|\.venv|venv)/,
415
- persistent: true,
416
- ignoreInitial: true,
417
- });
418
- watcher
419
- .on('change', onChange)
420
- .on('add', onChange)
421
- .on('unlink', onChange)
422
- .on('error', (error) => {
423
- const message = error instanceof Error ? error.message : String(error);
424
- process.stderr.write(chalk_1.default.red(`Watcher error: ${message}\n`));
597
+ function runEntrypointWithInput(agentDir, entrypoint, stdinData, verbose) {
598
+ return new Promise((resolve) => {
599
+ const isJs = entrypoint.endsWith('.js') || entrypoint.endsWith('.ts') ||
600
+ entrypoint.endsWith('.mjs') || entrypoint.endsWith('.cjs');
601
+ const cmd = isJs ? 'node' : 'python3';
602
+ const proc = (0, child_process_1.spawn)(cmd, [entrypoint], {
603
+ cwd: agentDir,
604
+ stdio: ['pipe', 'pipe', 'pipe'],
605
+ env: { ...process.env, ORCHAGENT_LOCAL_EXECUTION: 'true' },
606
+ });
607
+ let stdout = '';
608
+ let stderr = '';
609
+ proc.stdout?.on('data', (data) => {
610
+ stdout += data.toString();
611
+ });
612
+ proc.stderr?.on('data', (data) => {
613
+ const text = data.toString();
614
+ stderr += text;
615
+ if (verbose) {
616
+ process.stderr.write(chalk_1.default.gray(text));
617
+ }
618
+ });
619
+ // Write input to stdin and close
620
+ proc.stdin?.write(stdinData);
621
+ proc.stdin?.end();
622
+ proc.on('close', (code) => {
623
+ resolve({ code: code ?? 1, stdout, stderr });
624
+ });
625
+ proc.on('error', (err) => {
626
+ resolve({ code: 1, stdout, stderr: err.message });
627
+ });
425
628
  });
426
- // Keep process alive
427
- await new Promise(() => { });
428
629
  }
429
630
  /**
430
- * Execute tests based on agent type and discovered test files
631
+ * Run fixture tests for code_runtime agents by executing the entrypoint
632
+ * with fixture input as stdin and validating the JSON output.
633
+ * Same interface as E2B: python main.py < input.json
431
634
  */
432
- async function executeTests(agentDir, agentType, testFiles, verbose, config) {
635
+ async function runCodeRuntimeFixtureTests(agentDir, fixtures, entrypoint, verbose) {
636
+ process.stderr.write(chalk_1.default.blue('\nRunning fixture tests (code runtime)...\n\n'));
637
+ let passed = 0;
638
+ let failed = 0;
639
+ for (const fixturePath of fixtures) {
640
+ const fixtureName = path_1.default.basename(fixturePath);
641
+ const description = await getFixtureDescription(fixturePath);
642
+ process.stderr.write(` ${fixtureName}${description ? ` (${description})` : ''}: `);
643
+ try {
644
+ const raw = await promises_1.default.readFile(fixturePath, 'utf-8');
645
+ let parsed;
646
+ try {
647
+ parsed = JSON.parse(raw);
648
+ }
649
+ catch (e) {
650
+ throw new errors_1.CliError(`Invalid JSON in ${fixtureName}: ${e.message}`);
651
+ }
652
+ const fixture = validateFixture(parsed, fixturePath);
653
+ // Run entrypoint with fixture input as stdin (same as E2B: python main.py < input.json)
654
+ const inputJson = JSON.stringify(fixture.input);
655
+ const result = await runEntrypointWithInput(agentDir, entrypoint, inputJson, verbose);
656
+ if (result.code !== 0) {
657
+ throw new Error(`Entrypoint exited with code ${result.code}` +
658
+ (result.stderr ? `\n stderr: ${result.stderr.trim().split('\n').join('\n stderr: ')}` : ''));
659
+ }
660
+ // Parse stdout as JSON
661
+ const trimmedOutput = result.stdout.trim();
662
+ let output;
663
+ try {
664
+ output = JSON.parse(trimmedOutput);
665
+ }
666
+ catch {
667
+ throw new Error(`Entrypoint output is not valid JSON.\n` +
668
+ ` stdout: ${trimmedOutput.slice(0, 200)}${trimmedOutput.length > 200 ? '...' : ''}`);
669
+ }
670
+ // Validate result
671
+ let testPassed = true;
672
+ const failures = [];
673
+ if (fixture.expected_output) {
674
+ if (!(0, fast_deep_equal_1.default)(output, fixture.expected_output)) {
675
+ testPassed = false;
676
+ failures.push(`Expected: ${JSON.stringify(fixture.expected_output, null, 2)}\n` +
677
+ ` Got: ${JSON.stringify(output, null, 2)}`);
678
+ }
679
+ }
680
+ if (fixture.expected_contains) {
681
+ const outputStr = JSON.stringify(output);
682
+ for (const expected of fixture.expected_contains) {
683
+ if (!outputStr.includes(expected)) {
684
+ testPassed = false;
685
+ failures.push(`Expected output to contain: "${expected}"`);
686
+ }
687
+ }
688
+ }
689
+ if (testPassed) {
690
+ process.stderr.write(chalk_1.default.green('PASS\n'));
691
+ passed++;
692
+ if (verbose) {
693
+ process.stderr.write(chalk_1.default.gray(` Input: ${JSON.stringify(fixture.input)}\n`));
694
+ process.stderr.write(chalk_1.default.gray(` Output: ${JSON.stringify(output)}\n`));
695
+ }
696
+ }
697
+ else {
698
+ process.stderr.write(chalk_1.default.red('FAIL\n'));
699
+ failed++;
700
+ for (const f of failures) {
701
+ process.stderr.write(chalk_1.default.red(` ${f}\n`));
702
+ }
703
+ }
704
+ }
705
+ catch (err) {
706
+ process.stderr.write(chalk_1.default.red('ERROR\n'));
707
+ failed++;
708
+ const message = err instanceof Error ? err.message : String(err);
709
+ process.stderr.write(chalk_1.default.red(` ${message}\n`));
710
+ }
711
+ }
712
+ process.stderr.write('\n');
713
+ process.stderr.write(`Fixtures: ${passed} passed, ${failed} failed\n`);
714
+ return failed > 0 ? 1 : 0;
715
+ }
716
+ // ─── Agent type detection ────────────────────────────────────────────────────
717
+ async function detectAgentType(agentDir) {
718
+ // Check for SKILL.md first
719
+ if (await fileExists(path_1.default.join(agentDir, 'SKILL.md'))) {
720
+ return 'skill';
721
+ }
722
+ // Check for orchagent.json
723
+ const manifestPath = path_1.default.join(agentDir, 'orchagent.json');
724
+ if (await fileExists(manifestPath)) {
725
+ try {
726
+ const raw = await promises_1.default.readFile(manifestPath, 'utf-8');
727
+ const manifest = JSON.parse(raw);
728
+ if (manifest.type === 'prompt')
729
+ return 'prompt';
730
+ if (manifest.type === 'skill')
731
+ return 'skill';
732
+ if (manifest.type === 'tool' || manifest.type === 'code') {
733
+ if (await fileExists(path_1.default.join(agentDir, 'package.json')))
734
+ return 'code-js';
735
+ return 'code-python';
736
+ }
737
+ if (manifest.type === 'agent' || manifest.type === 'agentic') {
738
+ // Agent with runtime.command is code-based
739
+ if (manifest.runtime?.command) {
740
+ if (await fileExists(path_1.default.join(agentDir, 'package.json')))
741
+ return 'code-js';
742
+ return 'code-python';
743
+ }
744
+ // Managed loop agent (uses prompt.md like prompt agents)
745
+ return 'prompt';
746
+ }
747
+ }
748
+ catch {
749
+ // Invalid manifest, continue detection
750
+ }
751
+ }
752
+ // Check for prompt.md (prompt agent)
753
+ if (await fileExists(path_1.default.join(agentDir, 'prompt.md'))) {
754
+ return 'prompt';
755
+ }
756
+ // Fallback: detect by file presence
757
+ if (await fileExists(path_1.default.join(agentDir, 'requirements.txt')))
758
+ return 'code-python';
759
+ if (await fileExists(path_1.default.join(agentDir, 'pyproject.toml')))
760
+ return 'code-python';
761
+ if (await fileExists(path_1.default.join(agentDir, 'package.json')))
762
+ return 'code-js';
763
+ return 'unknown';
764
+ }
765
+ // ─── Main test execution ─────────────────────────────────────────────────────
766
+ async function executeTests(agentDir, validation, testFiles, verbose, config) {
433
767
  let exitCode = 0;
434
- // Run tests based on what's available
435
768
  const hasTests = testFiles.python.length > 0 ||
436
769
  testFiles.javascript.length > 0 ||
437
770
  testFiles.fixtures.length > 0;
438
771
  if (!hasTests) {
439
- // For prompt agents/skills, suggest creating fixtures
440
- if (agentType === 'prompt' || agentType === 'skill') {
441
- process.stderr.write(chalk_1.default.yellow('No test files found.\n\n'));
442
- process.stderr.write('For prompt agents, create fixture files in tests/:\n');
443
- process.stderr.write(chalk_1.default.gray(' tests/fixture-1.json:\n'));
772
+ // Suggest appropriate test types based on agent
773
+ process.stderr.write(chalk_1.default.yellow('No test files found.\n\n'));
774
+ if (validation.executionEngine === 'code_runtime' && validation.entrypoint) {
775
+ process.stderr.write('Create fixture tests to dry-run your code:\n');
776
+ process.stderr.write(chalk_1.default.gray(' mkdir tests\n'));
777
+ process.stderr.write(chalk_1.default.gray(` # tests/fixture-basic.json — runs: ${validation.entrypoint} < input\n`));
778
+ process.stderr.write(chalk_1.default.gray(' {\n'));
779
+ process.stderr.write(chalk_1.default.gray(' "description": "Basic test",\n'));
780
+ process.stderr.write(chalk_1.default.gray(' "input": {"key": "value"},\n'));
781
+ process.stderr.write(chalk_1.default.gray(' "expected_contains": ["result"]\n'));
782
+ process.stderr.write(chalk_1.default.gray(' }\n\n'));
783
+ process.stderr.write('Or test interactively:\n');
784
+ process.stderr.write(chalk_1.default.gray(` orch run . --local --data '{"key": "value"}'\n\n`));
785
+ }
786
+ else if (validation.isSkill || validation.executionEngine === 'direct_llm' || validation.executionEngine === 'managed_loop') {
787
+ process.stderr.write('Create fixture tests in tests/:\n');
788
+ process.stderr.write(chalk_1.default.gray(' mkdir tests\n'));
789
+ process.stderr.write(chalk_1.default.gray(' # tests/fixture-basic.json — calls LLM with your prompt + input\n'));
444
790
  process.stderr.write(chalk_1.default.gray(' {\n'));
791
+ process.stderr.write(chalk_1.default.gray(' "description": "Basic test",\n'));
445
792
  process.stderr.write(chalk_1.default.gray(' "input": {"text": "Hello world"},\n'));
446
793
  process.stderr.write(chalk_1.default.gray(' "expected_contains": ["response"]\n'));
447
794
  process.stderr.write(chalk_1.default.gray(' }\n\n'));
795
+ if (validation.executionEngine === 'managed_loop') {
796
+ process.stderr.write('Or test the full agent loop:\n');
797
+ process.stderr.write(chalk_1.default.gray(` orch run . --local --data '{"task": "..."}'\n\n`));
798
+ }
448
799
  }
449
800
  else {
450
- process.stderr.write(chalk_1.default.yellow('No test files found.\n\n'));
451
801
  process.stderr.write('Supported test file patterns:\n');
452
802
  process.stderr.write(chalk_1.default.gray(' Python: test_*.py, *_test.py, tests/test_*.py\n'));
453
803
  process.stderr.write(chalk_1.default.gray(' JS/TS: *.test.ts, *.spec.ts, tests/*.test.ts\n'));
454
804
  process.stderr.write(chalk_1.default.gray(' Fixtures: tests/fixture-*.json\n\n'));
455
805
  }
456
- return 1;
806
+ return 0; // Validation passed, no tests is OK
457
807
  }
458
808
  // Run Python tests if found
459
809
  if (testFiles.python.length > 0) {
@@ -473,42 +823,226 @@ async function executeTests(agentDir, agentType, testFiles, verbose, config) {
473
823
  if (code !== 0)
474
824
  exitCode = 1;
475
825
  }
476
- // Run fixture tests if found (for prompt agents)
826
+ // Run fixture tests route by execution engine
477
827
  if (testFiles.fixtures.length > 0) {
478
828
  if (verbose) {
479
829
  process.stderr.write(chalk_1.default.gray(`Found ${testFiles.fixtures.length} fixture file(s)\n`));
480
830
  }
481
- const code = await runFixtureTests(agentDir, testFiles.fixtures, verbose, config);
482
- if (code !== 0)
483
- exitCode = 1;
831
+ if (validation.executionEngine === 'code_runtime' && validation.entrypoint) {
832
+ const code = await runCodeRuntimeFixtureTests(agentDir, testFiles.fixtures, validation.entrypoint, verbose);
833
+ if (code !== 0)
834
+ exitCode = 1;
835
+ }
836
+ else {
837
+ // Prompt, skill, and managed_loop agents: LLM-based fixture tests
838
+ const code = await runPromptFixtureTests(agentDir, testFiles.fixtures, verbose, config);
839
+ if (code !== 0)
840
+ exitCode = 1;
841
+ }
484
842
  }
485
843
  return exitCode;
486
844
  }
845
+ /**
846
+ * Run validation + all tests in sequence
847
+ */
848
+ async function runAllChecks(agentDir, verbose, config) {
849
+ // Load .env from agent directory (existing env vars take precedence)
850
+ const dotEnvVars = await (0, dotenv_1.loadDotEnv)(agentDir);
851
+ const dotEnvCount = Object.keys(dotEnvVars).length;
852
+ if (dotEnvCount > 0) {
853
+ for (const [key, value] of Object.entries(dotEnvVars)) {
854
+ if (!(key in process.env) || process.env[key] === undefined) {
855
+ process.env[key] = value;
856
+ }
857
+ }
858
+ process.stderr.write(chalk_1.default.gray(`Loaded ${dotEnvCount} variable${dotEnvCount === 1 ? '' : 's'} from .env\n`));
859
+ }
860
+ // Step 1: Validate
861
+ const validation = await validateAgent(agentDir);
862
+ const validationPassed = printValidation(validation);
863
+ if (!validationPassed) {
864
+ process.stderr.write(chalk_1.default.red('Fix validation errors above before publishing.\n'));
865
+ return 1;
866
+ }
867
+ // Step 2: Discover tests
868
+ const testFiles = await discoverTests(agentDir);
869
+ if (verbose) {
870
+ const totalTests = testFiles.python.length + testFiles.javascript.length + testFiles.fixtures.length;
871
+ process.stderr.write(chalk_1.default.gray(`Discovered ${totalTests} test file(s)\n`));
872
+ }
873
+ // Step 3: Run tests
874
+ return await executeTests(agentDir, validation, testFiles, verbose, config);
875
+ }
876
+ // ─── Watch mode ──────────────────────────────────────────────────────────────
877
+ async function watchTests(agentDir, verbose, config) {
878
+ process.stderr.write(chalk_1.default.cyan('\nWatching for file changes... (press Ctrl+C to exit)\n'));
879
+ const runTests = async () => {
880
+ process.stderr.write(chalk_1.default.dim(`\n[${new Date().toLocaleTimeString()}] Running checks...\n`));
881
+ await runAllChecks(agentDir, verbose, config);
882
+ };
883
+ // Initial run
884
+ await runTests();
885
+ // Set up chokidar watcher
886
+ let debounceTimer = null;
887
+ const onChange = (filePath) => {
888
+ if (debounceTimer)
889
+ clearTimeout(debounceTimer);
890
+ if (verbose) {
891
+ process.stderr.write(chalk_1.default.dim(` Changed: ${path_1.default.relative(agentDir, filePath)}\n`));
892
+ }
893
+ debounceTimer = setTimeout(runTests, 300);
894
+ };
895
+ const watcher = chokidar_1.default.watch(agentDir, {
896
+ ignored: /(node_modules|__pycache__|\.git|dist|build|\.venv|venv)/,
897
+ persistent: true,
898
+ ignoreInitial: true,
899
+ });
900
+ watcher
901
+ .on('change', onChange)
902
+ .on('add', onChange)
903
+ .on('unlink', onChange)
904
+ .on('error', (error) => {
905
+ const message = error instanceof Error ? error.message : String(error);
906
+ process.stderr.write(chalk_1.default.red(`Watcher error: ${message}\n`));
907
+ });
908
+ // Keep process alive
909
+ await new Promise(() => { });
910
+ }
911
+ // ─── Single run mode ─────────────────────────────────────────────────────────
912
+ /**
913
+ * Validate, then run the agent once with the given input.
914
+ * code_runtime: executes entrypoint with data as stdin.
915
+ * direct_llm / managed_loop: calls LLM with prompt + data.
916
+ */
917
+ async function runOnce(agentDir, dataJson, verbose, config) {
918
+ // Load .env from agent directory
919
+ const dotEnvVars = await (0, dotenv_1.loadDotEnv)(agentDir);
920
+ const dotEnvCount = Object.keys(dotEnvVars).length;
921
+ if (dotEnvCount > 0) {
922
+ for (const [key, value] of Object.entries(dotEnvVars)) {
923
+ if (!(key in process.env) || process.env[key] === undefined) {
924
+ process.env[key] = value;
925
+ }
926
+ }
927
+ process.stderr.write(chalk_1.default.gray(`Loaded ${dotEnvCount} variable${dotEnvCount === 1 ? '' : 's'} from .env\n`));
928
+ }
929
+ // Validate first
930
+ const validation = await validateAgent(agentDir);
931
+ const valid = printValidation(validation);
932
+ if (!valid) {
933
+ process.stderr.write(chalk_1.default.red('Fix validation errors before running.\n'));
934
+ return 1;
935
+ }
936
+ // Parse input
937
+ let inputData;
938
+ try {
939
+ inputData = JSON.parse(dataJson);
940
+ }
941
+ catch {
942
+ throw new errors_1.CliError(`Invalid JSON in --data: ${dataJson.slice(0, 100)}`);
943
+ }
944
+ if (validation.executionEngine === 'code_runtime' && validation.entrypoint) {
945
+ // Run the entrypoint with data as stdin (same as E2B sandbox)
946
+ process.stderr.write(`\nRunning: ${validation.entrypoint}\n\n`);
947
+ const result = await runEntrypointWithInput(agentDir, validation.entrypoint, dataJson, verbose);
948
+ // Show stderr if not already shown in verbose mode
949
+ if (!verbose && result.stderr.trim()) {
950
+ process.stderr.write(chalk_1.default.gray(result.stderr));
951
+ }
952
+ if (result.code !== 0) {
953
+ process.stderr.write(chalk_1.default.red(`\nExited with code ${result.code}\n`));
954
+ return 1;
955
+ }
956
+ // Print stdout (the agent's JSON output)
957
+ const trimmed = result.stdout.trim();
958
+ if (trimmed) {
959
+ try {
960
+ const parsed = JSON.parse(trimmed);
961
+ (0, output_1.printJson)(parsed);
962
+ }
963
+ catch {
964
+ // Not JSON — print raw
965
+ process.stdout.write(trimmed + '\n');
966
+ }
967
+ }
968
+ return 0;
969
+ }
970
+ // Prompt / managed_loop: call LLM with prompt + input
971
+ const promptPath = path_1.default.join(agentDir, 'prompt.md');
972
+ const skillPath = path_1.default.join(agentDir, 'SKILL.md');
973
+ let prompt;
974
+ const skillData = await parseSkillMd(skillPath);
975
+ if (skillData) {
976
+ prompt = skillData.body;
977
+ }
978
+ else {
979
+ try {
980
+ prompt = await promises_1.default.readFile(promptPath, 'utf-8');
981
+ }
982
+ catch {
983
+ throw new errors_1.CliError('No prompt.md or SKILL.md found');
984
+ }
985
+ }
986
+ // Read output schema if available
987
+ let outputSchema;
988
+ try {
989
+ const raw = await promises_1.default.readFile(path_1.default.join(agentDir, 'schema.json'), 'utf-8');
990
+ const schemas = JSON.parse(raw);
991
+ outputSchema = schemas.output;
992
+ }
993
+ catch {
994
+ // Optional
995
+ }
996
+ const detected = await (0, llm_1.detectLlmKey)(['any'], config);
997
+ if (!detected) {
998
+ throw new errors_1.CliError('No LLM key found.\n' +
999
+ 'Set an environment variable (e.g., OPENAI_API_KEY) or add one to .env');
1000
+ }
1001
+ const { provider, key, model: serverModel } = detected;
1002
+ const model = serverModel ?? (0, llm_1.getDefaultModel)(provider);
1003
+ process.stderr.write(`\nRunning with ${provider} (${model})...\n\n`);
1004
+ const fullPrompt = (0, llm_1.buildPrompt)(prompt, inputData);
1005
+ const result = await (0, llm_1.callLlm)(provider, key, model, fullPrompt, outputSchema);
1006
+ (0, output_1.printJson)(result);
1007
+ return 0;
1008
+ }
1009
+ // ─── Command registration ────────────────────────────────────────────────────
487
1010
  function registerTestCommand(program) {
488
1011
  program
489
1012
  .command('test [path]')
490
- .description('Run agent test suite locally')
1013
+ .description('Validate agent configuration and run test suite')
491
1014
  .option('-v, --verbose', 'Show detailed test output')
492
1015
  .option('-w, --watch', 'Watch for file changes and re-run tests')
1016
+ .option('-r, --run', 'Run the agent once with --data input (validate first)')
1017
+ .option('-d, --data <json>', 'JSON input for --run mode')
493
1018
  .addHelpText('after', `
494
1019
  Examples:
495
- orch test Run tests in current directory
496
- orch test ./my-agent Run tests in specified directory
1020
+ orch test Validate + run tests in current directory
1021
+ orch test ./my-agent Validate + run tests in specified directory
497
1022
  orch test --verbose Show detailed test output
498
- orch test --watch Watch mode - re-run on file changes
1023
+ orch test --watch Watch mode re-run on file changes
1024
+ orch test --run --data '{"task": "hello"}' Validate, then run once
499
1025
 
500
- Test Discovery:
501
- Python: test_*.py, *_test.py, tests/test_*.py, tests/*_test.py
502
- JS/TS: *.test.ts, *.test.js, *.spec.ts, *.spec.js, tests/*.test.*
503
- Fixtures: tests/fixture-*.json (for prompt agents)
1026
+ What it checks:
1027
+ 1. Validates orchagent.json (type, engine, required files, secrets, etc.)
1028
+ 2. Runs Python tests (pytest): test_*.py, *_test.py
1029
+ 3. Runs JS/TS tests (vitest): *.test.ts, *.spec.ts
1030
+ 4. Runs fixture tests: tests/fixture-*.json
504
1031
 
505
- Fixture Format (tests/fixture-1.json):
1032
+ Fixture Format (tests/fixture-basic.json):
506
1033
  {
1034
+ "description": "Test description",
507
1035
  "input": {"key": "value"},
508
1036
  "expected_output": {"result": "expected"},
509
- "expected_contains": ["substring"],
510
- "description": "Test description"
1037
+ "expected_contains": ["substring"]
511
1038
  }
1039
+
1040
+ For code_runtime agents, fixtures run your entrypoint with input as stdin.
1041
+ For prompt/agent types, fixtures call the LLM with your prompt + input.
1042
+
1043
+ Run mode (--run):
1044
+ Validates the agent, then executes it once with the provided --data.
1045
+ Loads .env automatically. Same interface as: orch run . --local --data '...'
512
1046
  `)
513
1047
  .action(async (agentPath, options) => {
514
1048
  const agentDir = agentPath
@@ -527,18 +1061,7 @@ Fixture Format (tests/fixture-1.json):
527
1061
  }
528
1062
  throw err;
529
1063
  }
530
- // Detect agent type
531
- const agentType = await detectAgentType(agentDir);
532
- if (options.verbose) {
533
- process.stderr.write(chalk_1.default.gray(`Detected agent type: ${agentType}\n`));
534
- }
535
- // Discover test files
536
- const testFiles = await discoverTests(agentDir);
537
- if (options.verbose) {
538
- const totalTests = testFiles.python.length + testFiles.javascript.length + testFiles.fixtures.length;
539
- process.stderr.write(chalk_1.default.gray(`Discovered ${totalTests} test file(s)\n`));
540
- }
541
- // Get config for LLM access (needed for fixture tests)
1064
+ // Get config for LLM access (needed for fixture tests and run mode)
542
1065
  let config;
543
1066
  try {
544
1067
  config = await (0, config_1.getResolvedConfig)();
@@ -546,18 +1069,27 @@ Fixture Format (tests/fixture-1.json):
546
1069
  catch {
547
1070
  // Config not available, fixture tests will use env vars only
548
1071
  }
1072
+ // Run mode: validate then execute once
1073
+ if (options.run) {
1074
+ if (!options.data) {
1075
+ throw new errors_1.CliError('Missing --data for run mode.\n\n' +
1076
+ `Usage: orch test --run --data '{"key": "value"}'`);
1077
+ }
1078
+ const exitCode = await runOnce(agentDir, options.data, !!options.verbose, config);
1079
+ process.exit(exitCode);
1080
+ }
549
1081
  // Watch mode
550
1082
  if (options.watch) {
551
- await watchTests(agentDir, agentType, !!options.verbose, config);
1083
+ await watchTests(agentDir, !!options.verbose, config);
552
1084
  return;
553
1085
  }
554
- // Run tests
555
- const exitCode = await executeTests(agentDir, agentType, testFiles, !!options.verbose, config);
1086
+ // Single run: validate + tests
1087
+ const exitCode = await runAllChecks(agentDir, !!options.verbose, config);
556
1088
  if (exitCode === 0) {
557
- process.stderr.write(chalk_1.default.green('\nAll tests passed.\n'));
1089
+ process.stderr.write(chalk_1.default.green('\nAll checks passed.\n'));
558
1090
  }
559
1091
  else {
560
- process.stderr.write(chalk_1.default.red('\nSome tests failed.\n'));
1092
+ process.stderr.write(chalk_1.default.red('\nSome checks failed.\n'));
561
1093
  }
562
1094
  process.exit(exitCode);
563
1095
  });