npm - la-machina-engine - Versions diffs - 0.20.0 → 0.21.0 - Mend

la-machina-engine 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -259,28 +259,59 @@ const result = await engine.run({
 }
 ```
-### Done — JSON Mode, Parse Failed
+### Failed — JSON Mode, Parse / Schema Error
-When the model doesn't return valid JSON despite instructions:
+When `outputFormat: 'json'` is requested but the final model output is
+empty, not valid JSON, or fails the supplied `outputSchema`, the run
+terminates as `status: 'failed'` with a typed code (engine ≥ 0.21.0). The
+old permissive fallback (`status: 'done'` with `data` falling back to raw
+text) is removed — a strict-JSON request that can't satisfy the contract
+fails at the engine boundary so callers never mistake an empty/invalid
+output for a successful structured result.
+Raw parse failure:
 ```json
 {
   "runId": "run_abc",
-  "status": "done",
-  "data": "Here's the pricing: Starter at $29, Pro at $99, Enterprise at $299.",
+  "status": "failed",
+  "data": null,
   "meta": {
     "nodeId": "extract",
-    "turns": 2,
-    "tokensUsed": { "input": 5000, "output": 800 },
+    "turns": 0,
+    "tokensUsed": { "input": 0, "output": 0 },
     "durationMs": 6000,
-    "output": "Here's the pricing: Starter at $29, Pro at $99, Enterprise at $299."
+    "transcript": { "path": "projects/run_abc/nodes/extract", "lastShardIndex": 0 }
   },
-  "errors": [],
+  "errors": [
+    {
+      "code": "ERR_JSON_OUTPUT_PARSE",
+      "message": "Output is not valid JSON: …"
+    }
+  ],
   "timestamp": 1712966400000
 }
 ```
-`data` falls back to raw text. Client checks `typeof response.data === 'object'` to verify structured output.
+Schema mismatch (parse succeeded, validation failed):
+```json
+{
+  "runId": "run_abc",
+  "status": "failed",
+  "data": null,
+  "errors": [
+    {
+      "code": "ERR_JSON_OUTPUT_SCHEMA",
+      "message": "Output failed schema validation: …"
+    }
+  ]
+}
+```
+In both cases the engine still appends a `json_parse_failure` inspect
+event (unchanged from Plan 026) — the failure is now visible BOTH in
+inspect AND on the public run result.
 ### Paused — Human Approval Needed
@@ -456,8 +487,10 @@ await engine.resume({
 | `ERR_STREAM_PARSE` | Malformed API response | No — provider issue |
 | `ERR_STREAM_INCOMPLETE` | Stream ended without message_stop | Yes — transient |
 | `ERR_UNEXPECTED_STOP` | Unknown stop reason from API | No — investigate |
-| `SCHEMA_VALIDATION_FAILED` | JSON output doesn't match outputSchema | No — adjust schema or task |
-| `JSON_PARSE_FAILED` | Model didn't return valid JSON | No — adjust task |
+| `ERR_JSON_OUTPUT_PARSE` | `outputFormat: 'json'` returned empty / non-JSON output (engine ≥ 0.21.0) | No — adjust task / prompt |
+| `ERR_JSON_OUTPUT_SCHEMA` | JSON output failed `outputSchema` validation (engine ≥ 0.21.0) | No — adjust schema or task |
+| `SCHEMA_VALIDATION_FAILED` | (Legacy inspect-only code; pre-0.21.0) JSON output doesn't match outputSchema | No — adjust schema or task |
+| `JSON_PARSE_FAILED` | (Legacy inspect-only code; pre-0.21.0) Model didn't return valid JSON | No — adjust task |
 ### Workflow Runner Integration

package/dist/index.cjs CHANGED Viewed

@@ -3325,6 +3325,7 @@ async function agentLoop(options) {
     path: `projects/${ctx.runId}/nodes/${ctx.nodeId}`,
     lastShardIndex: 0
   };
+  let cumulativeToolCalls = 0;
   const anthropicTools = tools.map(toAnthropicTool);
   let lastAssistantText = "";
   const MAX_OUTPUT_TOKENS_RECOVERY_LIMIT = 3;
@@ -3373,7 +3374,8 @@ async function agentLoop(options) {
           status: "done",
           output: lastAssistantText || "[Token budget exhausted]",
           tokensUsed: ctx.getTokensUsed(),
-          turns: ctx.getTurnCount()
+          turns: ctx.getTurnCount(),
+          toolCallsCount: cumulativeToolCalls
         };
       }
     }
@@ -3718,6 +3720,7 @@ async function agentLoop(options) {
       apiRetryCount = 0;
       consecutive529 = 0;
       await ctx.endTurn();
+      cumulativeToolCalls += toolCallsToDispatch.length;
       await emitInspectTurn({
         inspect: options.inspect,
         turn: ctx.getTurnCount() - 1,
@@ -3751,7 +3754,8 @@ async function agentLoop(options) {
             status: "done",
             output: lastAssistantText || "[Stopped by stop hook]",
             tokensUsed: ctx.getTokensUsed(),
-            turns: ctx.getTurnCount()
+            turns: ctx.getTurnCount(),
+            toolCallsCount: cumulativeToolCalls
           };
         }
       }
@@ -3786,7 +3790,8 @@ async function agentLoop(options) {
         status: "done",
         output: lastAssistantText,
         tokensUsed: ctx.getTokensUsed(),
-        turns: ctx.getTurnCount()
+        turns: ctx.getTurnCount(),
+        toolCallsCount: cumulativeToolCalls
       };
     }
     if (stopReason === "max_tokens") {
@@ -9227,11 +9232,16 @@ function createDescribeServiceTool(opts) {
      */
     path: import_zod24.z.string().min(1).optional(),
     /**
-     * Plan 054 — defaults to 8, capped at 20. Only meaningful in
-     * search modes; ignored when fetching the full catalog or one
-     * exact endpoint.
+     * Plan 054 — defaults to 8, capped at MAX_SEARCH_LIMIT.
+     * Plan 054 review fix LOW — Zod no longer hard-rejects
+     * values above the cap. A model that picks `limit: 100`
+     * shouldn't waste a turn on a validation error; the
+     * runtime `Math.min` clamp below is the authority and
+     * silently bounds to 20. Schema still rejects non-int /
+     * non-positive values so we don't silently round a sloppy
+     * float or treat negatives as the default.
      */
-    limit: import_zod24.z.number().int().min(1).max(MAX_SEARCH_LIMIT).optional()
+    limit: import_zod24.z.number().int().min(1).optional()
   });
   const description = `Look up endpoints on one configured API service.
   1. Search: DescribeService({ service, query }) returns compact ranked matches with method, path, description, and a relevance score. Use this for broad services where only one endpoint slice is needed. Add { method } to filter.
@@ -12746,14 +12756,17 @@ ${inputJson}
       turnsUsed: ctx.getTurnCount(),
       ...result.status === "done" ? { output: result.output } : {},
       ...result.status === "failed" ? { error: result.error } : {},
-      ...result.status === "done" ? { toolCallCount: result.turns } : {},
+      // Engine 055 — accurate dispatched-tool-call count (was previously
+      // sourced from `result.turns`, so a no-tool one-turn run reported
+      // `toolCallCount: 1`). The loop accumulates the real dispatched
+      // total; default to `0` if a future code path forgets to set it.
+      ...result.status === "done" ? { toolCallCount: result.toolCallsCount ?? 0 } : {},
       ...logPath !== void 0 ? { transcriptPath: logPath } : {}
     };
     await dispatchHooks(this.config.hooks.postRun, event);
   }
-  async finalizeResult(loopResult, writer, _logPath, jsonOptions, inspect) {
+  async finalizeResult(loopResult, writer, logPath, jsonOptions, inspect) {
     if (loopResult.status === "done") {
-      await writer.setStatus("done");
       let data;
       if (jsonOptions?.outputFormat === "json") {
         const parsed = tryParseJSON2(loopResult.output);
@@ -12769,7 +12782,15 @@ ${inputJson}
                 validationError: validated.error,
                 ts: Date.now()
               });
-              data = void 0;
+              await writer.setStatus("failed");
+              return {
+                status: "failed",
+                error: new EngineError(
+                  "ERR_JSON_OUTPUT_SCHEMA",
+                  `Output failed schema validation: ${validated.error}`
+                ),
+                transcript: { path: logPath, lastShardIndex: 0 }
+              };
             }
           } else {
             data = parsed.value;
@@ -12781,14 +12802,25 @@ ${inputJson}
             parseError: parsed.error ?? "unknown parse error",
             ts: Date.now()
           });
+          await writer.setStatus("failed");
+          return {
+            status: "failed",
+            error: new EngineError(
+              "ERR_JSON_OUTPUT_PARSE",
+              `Output is not valid JSON: ${parsed.error ?? "unknown parse error"}`
+            ),
+            transcript: { path: logPath, lastShardIndex: 0 }
+          };
         }
       }
+      await writer.setStatus("done");
       return {
         status: "done",
         output: loopResult.output,
         ...data !== void 0 ? { data } : {},
         tokensUsed: loopResult.tokensUsed,
-        turns: loopResult.turns
+        turns: loopResult.turns,
+        toolCallsCount: loopResult.toolCallsCount
       };
     }
     if (loopResult.status === "paused") {