npm - @archal/cli - Versions diffs - 0.7.7 → 0.7.10 - Mend

@archal/cli 0.7.7 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.js +1326 -54
package/harnesses/_lib/model-configs.mjs +26 -19
package/harnesses/_lib/providers.mjs +24 -8
package/harnesses/hardened/agent.mjs +25 -105
package/harnesses/naive/agent.mjs +2 -2
package/harnesses/react/agent.mjs +2 -2
package/harnesses/zero-shot/agent.mjs +2 -2
package/package.json +1 -1

package/harnesses/_lib/model-configs.mjs CHANGED Viewed

@@ -384,24 +384,25 @@ export function getModelCapabilities(model) {
  * @returns {string | null}
  */
 export function detectModelFamily(model) {
-  const info = MODEL_REGISTRY[model];
+  const normalized = String(model ?? '').toLowerCase();
+  const info = MODEL_REGISTRY[normalized];
   if (info) return info.family;
   // Prefix-based heuristic for unregistered models
-  if (model.startsWith('claude-opus')) return 'claude-opus';
-  if (model.startsWith('claude-sonnet')) return 'claude-sonnet';
-  if (model.startsWith('claude-haiku')) return 'claude-haiku';
-  if (model.startsWith('gpt-4o-mini')) return 'gpt-4o-mini';
-  if (model.startsWith('gpt-4o')) return 'gpt-4o';
-  if (model.startsWith('gpt-4.1')) return 'gpt-4.1';
-  if (model.startsWith('gpt-5')) return 'gpt-5.1';
-  if (model.startsWith('gpt-4')) return 'gpt-4o'; // assume 4o-class
-  if (model.startsWith('o1-mini')) return 'o1-mini';
-  if (model.startsWith('o1')) return 'o1';
-  if (model.startsWith('o3-mini')) return 'o3-mini';
-  if (model.startsWith('o4-mini')) return 'o4-mini';
-  if (model.startsWith('gemini') && model.includes('pro')) return 'gemini-pro';
-  if (model.startsWith('gemini') && model.includes('flash')) return 'gemini-flash';
+  if (normalized.startsWith('claude-opus') || normalized.startsWith('opus-')) return 'claude-opus';
+  if (normalized.startsWith('claude-sonnet') || normalized.startsWith('sonnet-')) return 'claude-sonnet';
+  if (normalized.startsWith('claude-haiku') || normalized.startsWith('haiku-')) return 'claude-haiku';
+  if (normalized.startsWith('gpt-4o-mini')) return 'gpt-4o-mini';
+  if (normalized.startsWith('gpt-4o')) return 'gpt-4o';
+  if (normalized.startsWith('gpt-4.1')) return 'gpt-4.1';
+  if (normalized.startsWith('gpt-5')) return 'gpt-5.1';
+  if (normalized.startsWith('gpt-4')) return 'gpt-4o'; // assume 4o-class
+  if (normalized.startsWith('o1-mini')) return 'o1-mini';
+  if (normalized.startsWith('o1')) return 'o1';
+  if (normalized.startsWith('o3-mini')) return 'o3-mini';
+  if (normalized.startsWith('o4-mini')) return 'o4-mini';
+  if (normalized.startsWith('gemini') && normalized.includes('pro')) return 'gemini-pro';
+  if (normalized.startsWith('gemini') && normalized.includes('flash')) return 'gemini-flash';
   return null;
 }
@@ -483,12 +484,18 @@ export function isReasoningModel(model) {
  * @returns {boolean}
  */
 export function isThinkingModel(model) {
-  const info = MODEL_REGISTRY[model];
+  const normalized = String(model ?? '').toLowerCase();
+  const info = MODEL_REGISTRY[normalized];
   if (info) return info.capabilities.supportsThinking;
   // Heuristic for unregistered models — most modern models support thinking
-  if (model.startsWith('claude-')) return true;
-  if (model.startsWith('gemini-2.5') || model.startsWith('gemini-3')) return true;
-  if (model.startsWith('gpt-') || /^o[134]/.test(model)) return true;
+  if (
+    normalized.startsWith('claude-')
+    || normalized.startsWith('sonnet-')
+    || normalized.startsWith('haiku-')
+    || normalized.startsWith('opus-')
+  ) return true;
+  if (normalized.startsWith('gemini-2.5') || normalized.startsWith('gemini-3')) return true;
+  if (normalized.startsWith('gpt-') || /^o[134]/.test(normalized)) return true;
   return true; // default to true for unknown models
 }

package/harnesses/_lib/providers.mjs CHANGED Viewed

@@ -26,11 +26,17 @@ import { getModelConfig, isReasoningModel, isThinkingModel, getModelCapabilities
  * @returns {'gemini' | 'anthropic' | 'openai'}
  */
 export function detectProvider(model) {
-  if (model.startsWith('gemini-')) return 'gemini';
-  if (model.startsWith('claude-')) return 'anthropic';
+  const normalized = String(model ?? '').toLowerCase();
+  if (normalized.startsWith('gemini-')) return 'gemini';
   if (
-    model.startsWith('gpt-') ||
-    /^o[134]/.test(model)
+    normalized.startsWith('claude-')
+    || normalized.startsWith('sonnet-')
+    || normalized.startsWith('haiku-')
+    || normalized.startsWith('opus-')
+  ) return 'anthropic';
+  if (
+    normalized.startsWith('gpt-') ||
+    /^o[134]/.test(normalized)
   ) return 'openai';
   // Default to OpenAI-compatible for unknown models
   return 'openai';
@@ -140,13 +146,23 @@ function getAnthropicThinkingParam(model) {
   const budget = parseThinkingBudget();
   if (budget === null) return null;
-  // Opus 4.6 only supports adaptive thinking
-  const isOpus = model.startsWith('claude-opus');
-  if (budget === 'adaptive' || isOpus) {
+  // Only 4.6 series models support adaptive thinking.
+  // Older models (claude-sonnet-4-20250514, claude-haiku-4-5-20251001) need
+  // { type: "enabled", budget_tokens: N } — "adaptive" returns a 400 error.
+  const normalized = String(model ?? '').toLowerCase();
+  const supportsAdaptive = normalized.includes('-4-6') || normalized.includes('4-6-');
+  const isOpus = normalized.startsWith('claude-opus') || normalized.startsWith('opus-');
+  if (isOpus || (supportsAdaptive && budget === 'adaptive')) {
     return { type: 'adaptive' };
   }
-  // Other Claude models: explicit budget
+  if (budget === 'adaptive') {
+    // For non-4.6 models with default "adaptive" budget, use a sensible fixed budget
+    return { type: 'enabled', budget_tokens: 10000 };
+  }
+  // Explicit numeric budget
   return { type: 'enabled', budget_tokens: budget };
 }

package/harnesses/hardened/agent.mjs CHANGED Viewed

@@ -9,11 +9,9 @@
  *
  * Key features:
  * - Security-focused system prompt emphasizing investigation and refusal
- * - SAFETY.md prompt file injected via loadPromptContext (prepended to task)
  * - Multi-provider support (Gemini, OpenAI, Anthropic) via _lib/providers.mjs
  * - Error recovery with retries on transient failures
  * - Consecutive-error bailout at 5
- * - Temperature 0 for conservative, deterministic behavior
  * - 50 steps max for thorough investigation before acting
  *
  * Env vars (set by archal orchestrator):
@@ -36,13 +34,13 @@ import {
   getStopReason,
   withRetry,
 } from '../_lib/providers.mjs';
-import { collectTwinUrls } from '../_lib/rest-client.mjs';
+import { collectTwinUrls, discoverAllTools, callToolRest } from '../_lib/rest-client.mjs';
 import { createLogger } from '../_lib/logging.mjs';
 import { writeMetrics } from '../_lib/metrics.mjs';
 import { createAgentTrace } from '../_lib/agent-trace.mjs';
 const MAX_STEPS = 50;
-const TASK = process.env['ARCHAL_ENGINE_TASK'];
+const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
 const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
 if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
@@ -54,54 +52,16 @@ const log = createLogger({ harness: 'hardened', model: MODEL, provider });
 // ── Twin REST transport ─────────────────────────────────────────────
-const authHeaders = {};
-if (process.env['ARCHAL_TOKEN']) {
-  authHeaders['Authorization'] = `Bearer ${process.env['ARCHAL_TOKEN']}`;
-}
-const runtimeUserId = process.env['ARCHAL_RUNTIME_USER_ID'] || process.env['archal_runtime_user_id'];
-if (runtimeUserId) {
-  authHeaders['x-archal-user-id'] = runtimeUserId;
-}
-/** Collect twin URLs from ARCHAL_<TWIN>_URL env vars */
 const twinUrls = collectTwinUrls();
 if (Object.keys(twinUrls).length === 0) {
-  process.stderr.write('[hardened] FATAL: No twin URLs found in ARCHAL_*_URL env vars. Cannot proceed.\n');
+  console.error('[hardened] No twin URLs found. Check ARCHAL_TWIN_NAMES and ARCHAL_<TWIN>_URL env vars.');
   process.exit(1);
 }
-process.stderr.write(`[hardened] twin URLs: ${JSON.stringify(twinUrls)}\n`);
-/** Fetch available tools from a twin's REST endpoint */
-async function fetchTools(baseUrl) {
-  try {
-    const res = await fetch(`${baseUrl}/tools`, { headers: authHeaders });
-    if (!res.ok) return [];
-    const data = await res.json();
-    if (!Array.isArray(data)) {
-      process.stderr.write(`[hardened] Expected array from ${baseUrl}/tools, got ${typeof data}\n`);
-      return [];
-    }
-    return data;
-  } catch (err) {
-    process.stderr.write(`[hardened] Failed to fetch tools from ${baseUrl}: ${err.message}\n`);
-    return [];
-  }
-}
-/** Call a tool on a twin via REST */
-async function callToolRest(baseUrl, toolName, args) {
-  const res = await fetch(`${baseUrl}/tools/call`, {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json', ...authHeaders },
-    body: JSON.stringify({ name: toolName, arguments: args }),
-  });
-  const body = await res.text();
-  if (!res.ok) {
-    throw new Error(`Tool call ${toolName} failed (HTTP ${res.status}): ${body}`);
-  }
-  // Return as text string for provider message formatting
-  return body;
+const { tools: allTools, toolToTwin } = await discoverAllTools(twinUrls);
+if (allTools.length === 0) {
+  console.error('[hardened] No tools discovered from twins. Twin endpoints may be unreachable.');
+  process.exit(1);
 }
 // ── System prompt ───────────────────────────────────────────────────
@@ -123,37 +83,9 @@ APPROACH:
 4. If the request is safe and properly authorized, proceed methodically.
 5. Summarize what you did and why.`;
-// ── Discover tools from all twins ───────────────────────────────────
-const allTools = [];
-const toolToTwin = {};
-for (const [twinName, baseUrl] of Object.entries(twinUrls)) {
-  const tools = await fetchTools(baseUrl);
-  process.stderr.write(`[hardened] ${twinName}: ${tools.length} tools\n`);
-  for (const tool of tools) {
-    // Namespace tool names to match MCP convention used by evaluator
-    const namespacedName = `mcp__${twinName}__${tool.name}`;
-    allTools.push({
-      name: namespacedName,
-      description: tool.description || '',
-      inputSchema: tool.inputSchema || { type: 'object', properties: {} },
-    });
-    toolToTwin[namespacedName] = { twinName, baseUrl, originalName: tool.name };
-  }
-}
-process.stderr.write(`[hardened] Total tools: ${allTools.length}\n`);
-if (allTools.length === 0) {
-  process.stderr.write('[hardened] FATAL: No tools discovered from twins. Twin endpoints may be unreachable.\n');
-  process.exit(1);
-}
-const providerTools = formatToolsForProvider(provider, allTools);
 // ── Main loop ───────────────────────────────────────────────────────
+const providerTools = formatToolsForProvider(provider, allTools);
 let messages = buildInitialMessages(provider, SYSTEM_PROMPT, TASK, MODEL);
 let consecutiveErrors = 0;
@@ -211,45 +143,33 @@ try {
       break;
     }
-    // Execute each tool call via REST
+    // Execute each tool call via shared REST client
     const results = [];
     for (const tc of toolCalls) {
       const toolStart = Date.now();
       process.stderr.write(`[hardened] Step ${step + 1}: ${tc.name}(${JSON.stringify(tc.arguments).slice(0, 100)})\n`);
-      const mapping = toolToTwin[tc.name];
-      if (!mapping) {
-        const errorMsg = `Error: Unknown tool "${tc.name}"`;
+      try {
+        const result = await callToolRest(toolToTwin, tc.name, tc.arguments);
+        results.push(result);
+        consecutiveErrors = 0;
+        totalToolCalls++;
+        log.toolCall(step + 1, tc.name, tc.arguments, Date.now() - toolStart);
+      } catch (err) {
+        const errorMsg = `Error: ${err.message}`;
         results.push(errorMsg);
         consecutiveErrors++;
         totalToolCalls++;
         totalToolErrors++;
-        log.toolError(step + 1, tc.name, `Unknown tool`);
-        process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): Unknown tool ${tc.name}\n`);
-      } else {
-        try {
-          const result = await callToolRest(mapping.baseUrl, mapping.originalName, tc.arguments);
-          results.push(result);
-          consecutiveErrors = 0;
-          totalToolCalls++;
-          log.toolCall(step + 1, tc.name, tc.arguments, Date.now() - toolStart);
-        } catch (err) {
-          const errorMsg = `Error: ${err.message}`;
-          results.push(errorMsg);
-          consecutiveErrors++;
-          totalToolCalls++;
-          totalToolErrors++;
-          log.toolError(step + 1, tc.name, err.message);
-          process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): ${err.message}\n`);
+        log.toolError(step + 1, tc.name, err.message);
+        process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): ${err.message}\n`);
+        // Bail if too many consecutive errors
+        if (consecutiveErrors >= 5) {
+          process.stderr.write('[hardened] Too many consecutive tool errors — stopping.\n');
+          exitReason = 'consecutive_errors';
+          break;
         }
       }
-      // Bail if too many consecutive errors
-      if (consecutiveErrors >= 5) {
-        process.stderr.write('[hardened] Too many consecutive tool errors — stopping.\n');
-        exitReason = 'consecutive_errors';
-        break;
-      }
     }
     // Record thinking trace for this step (before bailout check so the final step is captured)

package/harnesses/naive/agent.mjs CHANGED Viewed

@@ -33,10 +33,10 @@ import { createLogger } from '../_lib/logging.mjs';
 import { writeMetrics } from '../_lib/metrics.mjs';
 const MAX_STEPS = 20;
-const TASK = process.env['ARCHAL_ENGINE_TASK'];
+const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
 const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
-if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
+if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
 if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
 // Warn when used outside demo context

package/harnesses/react/agent.mjs CHANGED Viewed

@@ -35,10 +35,10 @@ import { writeMetrics } from '../_lib/metrics.mjs';
 import { createAgentTrace } from '../_lib/agent-trace.mjs';
 const MAX_STEPS = 50;
-const TASK = process.env['ARCHAL_ENGINE_TASK'];
+const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
 const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
-if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
+if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
 if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
 const provider = detectProvider(MODEL);

package/harnesses/zero-shot/agent.mjs CHANGED Viewed

@@ -32,10 +32,10 @@ import { writeMetrics } from '../_lib/metrics.mjs';
 import { createAgentTrace } from '../_lib/agent-trace.mjs';
 const MAX_STEPS = 40;
-const TASK = process.env['ARCHAL_ENGINE_TASK'];
+const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
 const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
-if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
+if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
 if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
 const provider = detectProvider(MODEL);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@archal/cli",
-  "version": "0.7.7",
+  "version": "0.7.10",
   "description": "Pre-deployment testing for AI agents",
   "type": "module",
   "main": "dist/index.js",