la-machina-engine 0.20.1 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -259,28 +259,59 @@ const result = await engine.run({
259
259
  }
260
260
  ```
261
261
 
262
- ### Done — JSON Mode, Parse Failed
262
+ ### Failed — JSON Mode, Parse / Schema Error
263
263
 
264
- When the model doesn't return valid JSON despite instructions:
264
+ When `outputFormat: 'json'` is requested but the final model output is
265
+ empty, not valid JSON, or fails the supplied `outputSchema`, the run
266
+ terminates as `status: 'failed'` with a typed code (engine ≥ 0.21.0). The
267
+ old permissive fallback (`status: 'done'` with `data` falling back to raw
268
+ text) is removed — a strict-JSON request that can't satisfy the contract
269
+ fails at the engine boundary so callers never mistake an empty/invalid
270
+ output for a successful structured result.
271
+
272
+ Raw parse failure:
265
273
 
266
274
  ```json
267
275
  {
268
276
  "runId": "run_abc",
269
- "status": "done",
270
- "data": "Here's the pricing: Starter at $29, Pro at $99, Enterprise at $299.",
277
+ "status": "failed",
278
+ "data": null,
271
279
  "meta": {
272
280
  "nodeId": "extract",
273
- "turns": 2,
274
- "tokensUsed": { "input": 5000, "output": 800 },
281
+ "turns": 0,
282
+ "tokensUsed": { "input": 0, "output": 0 },
275
283
  "durationMs": 6000,
276
- "output": "Here's the pricing: Starter at $29, Pro at $99, Enterprise at $299."
284
+ "transcript": { "path": "projects/run_abc/nodes/extract", "lastShardIndex": 0 }
277
285
  },
278
- "errors": [],
286
+ "errors": [
287
+ {
288
+ "code": "ERR_JSON_OUTPUT_PARSE",
289
+ "message": "Output is not valid JSON: …"
290
+ }
291
+ ],
279
292
  "timestamp": 1712966400000
280
293
  }
281
294
  ```
282
295
 
283
- `data` falls back to raw text. Client checks `typeof response.data === 'object'` to verify structured output.
296
+ Schema mismatch (parse succeeded, validation failed):
297
+
298
+ ```json
299
+ {
300
+ "runId": "run_abc",
301
+ "status": "failed",
302
+ "data": null,
303
+ "errors": [
304
+ {
305
+ "code": "ERR_JSON_OUTPUT_SCHEMA",
306
+ "message": "Output failed schema validation: …"
307
+ }
308
+ ]
309
+ }
310
+ ```
311
+
312
+ In both cases the engine still appends a `json_parse_failure` inspect
313
+ event (unchanged from Plan 026) — the failure is now visible BOTH in
314
+ inspect AND on the public run result.
284
315
 
285
316
  ### Paused — Human Approval Needed
286
317
 
@@ -456,8 +487,10 @@ await engine.resume({
456
487
  | `ERR_STREAM_PARSE` | Malformed API response | No — provider issue |
457
488
  | `ERR_STREAM_INCOMPLETE` | Stream ended without message_stop | Yes — transient |
458
489
  | `ERR_UNEXPECTED_STOP` | Unknown stop reason from API | No — investigate |
459
- | `SCHEMA_VALIDATION_FAILED` | JSON output doesn't match outputSchema | No — adjust schema or task |
460
- | `JSON_PARSE_FAILED` | Model didn't return valid JSON | No — adjust task |
490
+ | `ERR_JSON_OUTPUT_PARSE` | `outputFormat: 'json'` returned empty / non-JSON output (engine 0.21.0) | No — adjust task / prompt |
491
+ | `ERR_JSON_OUTPUT_SCHEMA` | JSON output failed `outputSchema` validation (engine ≥ 0.21.0) | No — adjust schema or task |
492
+ | `SCHEMA_VALIDATION_FAILED` | (Legacy inspect-only code; pre-0.21.0) JSON output doesn't match outputSchema | No — adjust schema or task |
493
+ | `JSON_PARSE_FAILED` | (Legacy inspect-only code; pre-0.21.0) Model didn't return valid JSON | No — adjust task |
461
494
 
462
495
  ### Workflow Runner Integration
463
496
 
package/dist/index.cjs CHANGED
@@ -3325,6 +3325,7 @@ async function agentLoop(options) {
3325
3325
  path: `projects/${ctx.runId}/nodes/${ctx.nodeId}`,
3326
3326
  lastShardIndex: 0
3327
3327
  };
3328
+ let cumulativeToolCalls = 0;
3328
3329
  const anthropicTools = tools.map(toAnthropicTool);
3329
3330
  let lastAssistantText = "";
3330
3331
  const MAX_OUTPUT_TOKENS_RECOVERY_LIMIT = 3;
@@ -3373,7 +3374,8 @@ async function agentLoop(options) {
3373
3374
  status: "done",
3374
3375
  output: lastAssistantText || "[Token budget exhausted]",
3375
3376
  tokensUsed: ctx.getTokensUsed(),
3376
- turns: ctx.getTurnCount()
3377
+ turns: ctx.getTurnCount(),
3378
+ toolCallsCount: cumulativeToolCalls
3377
3379
  };
3378
3380
  }
3379
3381
  }
@@ -3718,6 +3720,7 @@ async function agentLoop(options) {
3718
3720
  apiRetryCount = 0;
3719
3721
  consecutive529 = 0;
3720
3722
  await ctx.endTurn();
3723
+ cumulativeToolCalls += toolCallsToDispatch.length;
3721
3724
  await emitInspectTurn({
3722
3725
  inspect: options.inspect,
3723
3726
  turn: ctx.getTurnCount() - 1,
@@ -3751,7 +3754,8 @@ async function agentLoop(options) {
3751
3754
  status: "done",
3752
3755
  output: lastAssistantText || "[Stopped by stop hook]",
3753
3756
  tokensUsed: ctx.getTokensUsed(),
3754
- turns: ctx.getTurnCount()
3757
+ turns: ctx.getTurnCount(),
3758
+ toolCallsCount: cumulativeToolCalls
3755
3759
  };
3756
3760
  }
3757
3761
  }
@@ -3786,7 +3790,8 @@ async function agentLoop(options) {
3786
3790
  status: "done",
3787
3791
  output: lastAssistantText,
3788
3792
  tokensUsed: ctx.getTokensUsed(),
3789
- turns: ctx.getTurnCount()
3793
+ turns: ctx.getTurnCount(),
3794
+ toolCallsCount: cumulativeToolCalls
3790
3795
  };
3791
3796
  }
3792
3797
  if (stopReason === "max_tokens") {
@@ -12751,14 +12756,17 @@ ${inputJson}
12751
12756
  turnsUsed: ctx.getTurnCount(),
12752
12757
  ...result.status === "done" ? { output: result.output } : {},
12753
12758
  ...result.status === "failed" ? { error: result.error } : {},
12754
- ...result.status === "done" ? { toolCallCount: result.turns } : {},
12759
+ // Engine 055 accurate dispatched-tool-call count (was previously
12760
+ // sourced from `result.turns`, so a no-tool one-turn run reported
12761
+ // `toolCallCount: 1`). The loop accumulates the real dispatched
12762
+ // total; default to `0` if a future code path forgets to set it.
12763
+ ...result.status === "done" ? { toolCallCount: result.toolCallsCount ?? 0 } : {},
12755
12764
  ...logPath !== void 0 ? { transcriptPath: logPath } : {}
12756
12765
  };
12757
12766
  await dispatchHooks(this.config.hooks.postRun, event);
12758
12767
  }
12759
- async finalizeResult(loopResult, writer, _logPath, jsonOptions, inspect) {
12768
+ async finalizeResult(loopResult, writer, logPath, jsonOptions, inspect) {
12760
12769
  if (loopResult.status === "done") {
12761
- await writer.setStatus("done");
12762
12770
  let data;
12763
12771
  if (jsonOptions?.outputFormat === "json") {
12764
12772
  const parsed = tryParseJSON2(loopResult.output);
@@ -12774,7 +12782,15 @@ ${inputJson}
12774
12782
  validationError: validated.error,
12775
12783
  ts: Date.now()
12776
12784
  });
12777
- data = void 0;
12785
+ await writer.setStatus("failed");
12786
+ return {
12787
+ status: "failed",
12788
+ error: new EngineError(
12789
+ "ERR_JSON_OUTPUT_SCHEMA",
12790
+ `Output failed schema validation: ${validated.error}`
12791
+ ),
12792
+ transcript: { path: logPath, lastShardIndex: 0 }
12793
+ };
12778
12794
  }
12779
12795
  } else {
12780
12796
  data = parsed.value;
@@ -12786,14 +12802,25 @@ ${inputJson}
12786
12802
  parseError: parsed.error ?? "unknown parse error",
12787
12803
  ts: Date.now()
12788
12804
  });
12805
+ await writer.setStatus("failed");
12806
+ return {
12807
+ status: "failed",
12808
+ error: new EngineError(
12809
+ "ERR_JSON_OUTPUT_PARSE",
12810
+ `Output is not valid JSON: ${parsed.error ?? "unknown parse error"}`
12811
+ ),
12812
+ transcript: { path: logPath, lastShardIndex: 0 }
12813
+ };
12789
12814
  }
12790
12815
  }
12816
+ await writer.setStatus("done");
12791
12817
  return {
12792
12818
  status: "done",
12793
12819
  output: loopResult.output,
12794
12820
  ...data !== void 0 ? { data } : {},
12795
12821
  tokensUsed: loopResult.tokensUsed,
12796
- turns: loopResult.turns
12822
+ turns: loopResult.turns,
12823
+ toolCallsCount: loopResult.toolCallsCount
12797
12824
  };
12798
12825
  }
12799
12826
  if (loopResult.status === "paused") {