la-machina-engine 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -259,28 +259,59 @@ const result = await engine.run({
259
259
  }
260
260
  ```
261
261
 
262
- ### Done — JSON Mode, Parse Failed
262
+ ### Failed — JSON Mode, Parse / Schema Error
263
263
 
264
- When the model doesn't return valid JSON despite instructions:
264
+ When `outputFormat: 'json'` is requested but the final model output is
265
+ empty, not valid JSON, or fails the supplied `outputSchema`, the run
266
+ terminates as `status: 'failed'` with a typed code (engine ≥ 0.21.0). The
267
+ old permissive fallback (`status: 'done'` with `data` falling back to raw
268
+ text) is removed — a strict-JSON request that can't satisfy the contract
269
+ fails at the engine boundary so callers never mistake an empty/invalid
270
+ output for a successful structured result.
271
+
272
+ Raw parse failure:
265
273
 
266
274
  ```json
267
275
  {
268
276
  "runId": "run_abc",
269
- "status": "done",
270
- "data": "Here's the pricing: Starter at $29, Pro at $99, Enterprise at $299.",
277
+ "status": "failed",
278
+ "data": null,
271
279
  "meta": {
272
280
  "nodeId": "extract",
273
- "turns": 2,
274
- "tokensUsed": { "input": 5000, "output": 800 },
281
+ "turns": 0,
282
+ "tokensUsed": { "input": 0, "output": 0 },
275
283
  "durationMs": 6000,
276
- "output": "Here's the pricing: Starter at $29, Pro at $99, Enterprise at $299."
284
+ "transcript": { "path": "projects/run_abc/nodes/extract", "lastShardIndex": 0 }
277
285
  },
278
- "errors": [],
286
+ "errors": [
287
+ {
288
+ "code": "ERR_JSON_OUTPUT_PARSE",
289
+ "message": "Output is not valid JSON: …"
290
+ }
291
+ ],
279
292
  "timestamp": 1712966400000
280
293
  }
281
294
  ```
282
295
 
283
- `data` falls back to raw text. Client checks `typeof response.data === 'object'` to verify structured output.
296
+ Schema mismatch (parse succeeded, validation failed):
297
+
298
+ ```json
299
+ {
300
+ "runId": "run_abc",
301
+ "status": "failed",
302
+ "data": null,
303
+ "errors": [
304
+ {
305
+ "code": "ERR_JSON_OUTPUT_SCHEMA",
306
+ "message": "Output failed schema validation: …"
307
+ }
308
+ ]
309
+ }
310
+ ```
311
+
312
+ In both cases the engine still appends a `json_parse_failure` inspect
313
+ event (unchanged from Plan 026) — the failure is now visible BOTH in
314
+ inspect AND on the public run result.
284
315
 
285
316
  ### Paused — Human Approval Needed
286
317
 
@@ -456,8 +487,10 @@ await engine.resume({
456
487
  | `ERR_STREAM_PARSE` | Malformed API response | No — provider issue |
457
488
  | `ERR_STREAM_INCOMPLETE` | Stream ended without message_stop | Yes — transient |
458
489
  | `ERR_UNEXPECTED_STOP` | Unknown stop reason from API | No — investigate |
459
- | `SCHEMA_VALIDATION_FAILED` | JSON output doesn't match outputSchema | No — adjust schema or task |
460
- | `JSON_PARSE_FAILED` | Model didn't return valid JSON | No — adjust task |
490
+ | `ERR_JSON_OUTPUT_PARSE` | `outputFormat: 'json'` returned empty / non-JSON output (engine 0.21.0) | No — adjust task / prompt |
491
+ | `ERR_JSON_OUTPUT_SCHEMA` | JSON output failed `outputSchema` validation (engine ≥ 0.21.0) | No — adjust schema or task |
492
+ | `SCHEMA_VALIDATION_FAILED` | (Legacy inspect-only code; pre-0.21.0) JSON output doesn't match outputSchema | No — adjust schema or task |
493
+ | `JSON_PARSE_FAILED` | (Legacy inspect-only code; pre-0.21.0) Model didn't return valid JSON | No — adjust task |
461
494
 
462
495
  ### Workflow Runner Integration
463
496
 
package/dist/index.cjs CHANGED
@@ -3325,6 +3325,7 @@ async function agentLoop(options) {
3325
3325
  path: `projects/${ctx.runId}/nodes/${ctx.nodeId}`,
3326
3326
  lastShardIndex: 0
3327
3327
  };
3328
+ let cumulativeToolCalls = 0;
3328
3329
  const anthropicTools = tools.map(toAnthropicTool);
3329
3330
  let lastAssistantText = "";
3330
3331
  const MAX_OUTPUT_TOKENS_RECOVERY_LIMIT = 3;
@@ -3373,7 +3374,8 @@ async function agentLoop(options) {
3373
3374
  status: "done",
3374
3375
  output: lastAssistantText || "[Token budget exhausted]",
3375
3376
  tokensUsed: ctx.getTokensUsed(),
3376
- turns: ctx.getTurnCount()
3377
+ turns: ctx.getTurnCount(),
3378
+ toolCallsCount: cumulativeToolCalls
3377
3379
  };
3378
3380
  }
3379
3381
  }
@@ -3718,6 +3720,7 @@ async function agentLoop(options) {
3718
3720
  apiRetryCount = 0;
3719
3721
  consecutive529 = 0;
3720
3722
  await ctx.endTurn();
3723
+ cumulativeToolCalls += toolCallsToDispatch.length;
3721
3724
  await emitInspectTurn({
3722
3725
  inspect: options.inspect,
3723
3726
  turn: ctx.getTurnCount() - 1,
@@ -3751,7 +3754,8 @@ async function agentLoop(options) {
3751
3754
  status: "done",
3752
3755
  output: lastAssistantText || "[Stopped by stop hook]",
3753
3756
  tokensUsed: ctx.getTokensUsed(),
3754
- turns: ctx.getTurnCount()
3757
+ turns: ctx.getTurnCount(),
3758
+ toolCallsCount: cumulativeToolCalls
3755
3759
  };
3756
3760
  }
3757
3761
  }
@@ -3786,7 +3790,8 @@ async function agentLoop(options) {
3786
3790
  status: "done",
3787
3791
  output: lastAssistantText,
3788
3792
  tokensUsed: ctx.getTokensUsed(),
3789
- turns: ctx.getTurnCount()
3793
+ turns: ctx.getTurnCount(),
3794
+ toolCallsCount: cumulativeToolCalls
3790
3795
  };
3791
3796
  }
3792
3797
  if (stopReason === "max_tokens") {
@@ -9227,11 +9232,16 @@ function createDescribeServiceTool(opts) {
9227
9232
  */
9228
9233
  path: import_zod24.z.string().min(1).optional(),
9229
9234
  /**
9230
- * Plan 054 — defaults to 8, capped at 20. Only meaningful in
9231
- * search modes; ignored when fetching the full catalog or one
9232
- * exact endpoint.
9235
+ * Plan 054 — defaults to 8, capped at MAX_SEARCH_LIMIT.
9236
+ * Plan 054 review fix LOW Zod no longer hard-rejects
9237
+ * values above the cap. A model that picks `limit: 100`
9238
+ * shouldn't waste a turn on a validation error; the
9239
+ * runtime `Math.min` clamp below is the authority and
9240
+ * silently bounds to 20. Schema still rejects non-int /
9241
+ * non-positive values so we don't silently round a sloppy
9242
+ * float or treat negatives as the default.
9233
9243
  */
9234
- limit: import_zod24.z.number().int().min(1).max(MAX_SEARCH_LIMIT).optional()
9244
+ limit: import_zod24.z.number().int().min(1).optional()
9235
9245
  });
9236
9246
  const description = `Look up endpoints on one configured API service.
9237
9247
  1. Search: DescribeService({ service, query }) returns compact ranked matches with method, path, description, and a relevance score. Use this for broad services where only one endpoint slice is needed. Add { method } to filter.
@@ -12746,14 +12756,17 @@ ${inputJson}
12746
12756
  turnsUsed: ctx.getTurnCount(),
12747
12757
  ...result.status === "done" ? { output: result.output } : {},
12748
12758
  ...result.status === "failed" ? { error: result.error } : {},
12749
- ...result.status === "done" ? { toolCallCount: result.turns } : {},
12759
+ // Engine 055 accurate dispatched-tool-call count (was previously
12760
+ // sourced from `result.turns`, so a no-tool one-turn run reported
12761
+ // `toolCallCount: 1`). The loop accumulates the real dispatched
12762
+ // total; default to `0` if a future code path forgets to set it.
12763
+ ...result.status === "done" ? { toolCallCount: result.toolCallsCount ?? 0 } : {},
12750
12764
  ...logPath !== void 0 ? { transcriptPath: logPath } : {}
12751
12765
  };
12752
12766
  await dispatchHooks(this.config.hooks.postRun, event);
12753
12767
  }
12754
- async finalizeResult(loopResult, writer, _logPath, jsonOptions, inspect) {
12768
+ async finalizeResult(loopResult, writer, logPath, jsonOptions, inspect) {
12755
12769
  if (loopResult.status === "done") {
12756
- await writer.setStatus("done");
12757
12770
  let data;
12758
12771
  if (jsonOptions?.outputFormat === "json") {
12759
12772
  const parsed = tryParseJSON2(loopResult.output);
@@ -12769,7 +12782,15 @@ ${inputJson}
12769
12782
  validationError: validated.error,
12770
12783
  ts: Date.now()
12771
12784
  });
12772
- data = void 0;
12785
+ await writer.setStatus("failed");
12786
+ return {
12787
+ status: "failed",
12788
+ error: new EngineError(
12789
+ "ERR_JSON_OUTPUT_SCHEMA",
12790
+ `Output failed schema validation: ${validated.error}`
12791
+ ),
12792
+ transcript: { path: logPath, lastShardIndex: 0 }
12793
+ };
12773
12794
  }
12774
12795
  } else {
12775
12796
  data = parsed.value;
@@ -12781,14 +12802,25 @@ ${inputJson}
12781
12802
  parseError: parsed.error ?? "unknown parse error",
12782
12803
  ts: Date.now()
12783
12804
  });
12805
+ await writer.setStatus("failed");
12806
+ return {
12807
+ status: "failed",
12808
+ error: new EngineError(
12809
+ "ERR_JSON_OUTPUT_PARSE",
12810
+ `Output is not valid JSON: ${parsed.error ?? "unknown parse error"}`
12811
+ ),
12812
+ transcript: { path: logPath, lastShardIndex: 0 }
12813
+ };
12784
12814
  }
12785
12815
  }
12816
+ await writer.setStatus("done");
12786
12817
  return {
12787
12818
  status: "done",
12788
12819
  output: loopResult.output,
12789
12820
  ...data !== void 0 ? { data } : {},
12790
12821
  tokensUsed: loopResult.tokensUsed,
12791
- turns: loopResult.turns
12822
+ turns: loopResult.turns,
12823
+ toolCallsCount: loopResult.toolCallsCount
12792
12824
  };
12793
12825
  }
12794
12826
  if (loopResult.status === "paused") {