@smithers-orchestrator/engine 0.16.8 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@smithers-orchestrator/engine",
3
- "version": "0.16.8",
3
+ "version": "0.17.0",
4
4
  "description": "Concrete Smithers workflow execution engine",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -20,27 +20,36 @@
20
20
  "src/"
21
21
  ],
22
22
  "dependencies": {
23
+ "@effect/cluster": "^0.58.0",
24
+ "@effect/experimental": "^0.60.0",
23
25
  "@effect/platform-bun": "^0.89.0",
26
+ "@effect/rpc": "^0.75.0",
27
+ "@effect/sql": "^0.51.0",
24
28
  "@effect/sql-sqlite-bun": "^0.52.0",
29
+ "@effect/workflow": "^0.18.0",
30
+ "diff": "^9.0.0",
31
+ "drizzle-orm": "^0.45.2",
25
32
  "effect": "^3.21.1",
26
- "@smithers-orchestrator/agents": "0.16.8",
27
- "@smithers-orchestrator/db": "0.16.8",
28
- "@smithers-orchestrator/components": "0.16.8",
29
- "@smithers-orchestrator/driver": "0.16.8",
30
- "@smithers-orchestrator/graph": "0.16.8",
31
- "@smithers-orchestrator/errors": "0.16.8",
32
- "@smithers-orchestrator/sandbox": "0.16.8",
33
- "@smithers-orchestrator/memory": "0.16.8",
34
- "@smithers-orchestrator/react-reconciler": "0.16.8",
35
- "@smithers-orchestrator/observability": "0.16.8",
36
- "@smithers-orchestrator/scheduler": "0.16.8",
37
- "@smithers-orchestrator/scorers": "0.16.8",
38
- "@smithers-orchestrator/time-travel": "0.16.8",
39
- "@smithers-orchestrator/vcs": "0.16.8"
33
+ "react": "^19.2.5",
34
+ "react-dom": "^19.2.5",
35
+ "zod": "^4.3.6",
36
+ "@smithers-orchestrator/agents": "0.17.0",
37
+ "@smithers-orchestrator/components": "0.17.0",
38
+ "@smithers-orchestrator/driver": "0.17.0",
39
+ "@smithers-orchestrator/db": "0.17.0",
40
+ "@smithers-orchestrator/errors": "0.17.0",
41
+ "@smithers-orchestrator/graph": "0.17.0",
42
+ "@smithers-orchestrator/memory": "0.17.0",
43
+ "@smithers-orchestrator/react-reconciler": "0.17.0",
44
+ "@smithers-orchestrator/observability": "0.17.0",
45
+ "@smithers-orchestrator/sandbox": "0.17.0",
46
+ "@smithers-orchestrator/scheduler": "0.17.0",
47
+ "@smithers-orchestrator/scorers": "0.17.0",
48
+ "@smithers-orchestrator/time-travel": "0.17.0",
49
+ "@smithers-orchestrator/vcs": "0.17.0"
40
50
  },
41
51
  "devDependencies": {
42
52
  "@types/bun": "latest",
43
- "react": "^19.2.5",
44
53
  "typescript": "~5.9.3"
45
54
  },
46
55
  "scripts": {
package/src/approvals.js CHANGED
@@ -1,6 +1,5 @@
1
1
  import { Effect, Metric } from "effect";
2
2
  import { nowMs } from "@smithers-orchestrator/scheduler/nowMs";
3
- import { SmithersDb } from "@smithers-orchestrator/db/adapter";
4
3
  import { approvalWaitDuration, trackEvent, updateAsyncExternalWaitPending, } from "@smithers-orchestrator/observability/metrics";
5
4
  import { bridgeApprovalResolve } from "./effect/durable-deferred-bridge.js";
6
5
  import { SmithersError } from "@smithers-orchestrator/errors/SmithersError";
@@ -298,7 +298,7 @@ function resolveHandleIteration(handle, ctx) {
298
298
  * @param {Record<string, unknown>} row
299
299
  */
300
300
  function stripPersistedKeys(row) {
301
- const { runId, nodeId, iteration, payload, ...rest } = row;
301
+ const { runId: _runId, nodeId: _nodeId, iteration: _iteration, payload, ...rest } = row;
302
302
  if (payload !== undefined)
303
303
  return payload;
304
304
  return rest;
@@ -754,7 +754,7 @@ function normalizeExecutionError(result) {
754
754
  /**
755
755
  * @param {{ name: string; input: AnySchema }} options
756
756
  */
757
- function createWorkflow(options) {
757
+ function _createWorkflow(options) {
758
758
  return {
759
759
  /**
760
760
  * @param {($: BuilderApi) => BuilderNode} buildGraph
@@ -804,7 +804,7 @@ function createWorkflow(options) {
804
804
  /**
805
805
  * @param {{ name: string; params?: Record<string, unknown> }} options
806
806
  */
807
- function createComponent(options) {
807
+ function _createComponent(options) {
808
808
  return {
809
809
  /**
810
810
  * @param {($: BuilderApi, params: Record<string, unknown>) => BuilderNode} buildGraph
@@ -1,9 +1,7 @@
1
1
  import { Cause, Duration, Effect, Either, Exit, Metric, Schedule } from "effect";
2
- import { z } from "zod";
3
2
  import { buildOutputRow, stripAutoColumns, validateOutput } from "@smithers-orchestrator/db/output";
4
3
  import { TaskHeartbeatTimeout } from "@smithers-orchestrator/errors/TaskHeartbeatTimeout";
5
4
  import { TaskTimeout } from "@smithers-orchestrator/errors/TaskTimeout";
6
- import { EventBus } from "../events.js";
7
5
  import { makeAbortError, wireAbortSignal } from "./bridge-utils.js";
8
6
  import { withTaskRuntime } from "@smithers-orchestrator/driver/task-runtime";
9
7
  import { logDebug, logError, logInfo, logWarning } from "@smithers-orchestrator/observability/logging";
@@ -623,6 +621,14 @@ export const executeComputeTaskBridge = async (adapter, db, runId, desc, eventBu
623
621
  if (isHeartbeatPayloadValidationError(effectiveError)) {
624
622
  attemptMeta.failureRetryable = false;
625
623
  }
624
+ // Propagate non-retryable signal from any thrown SmithersError so the
625
+ // attempt is not retried (e.g. AGENT_CONFIG_INVALID from KimiAgent's
626
+ // expired-credentials check, or auth-failure patterns classified by
627
+ // BaseCliAgent.classifyNonRetryableAgentError).
628
+ if (effectiveError?.details?.failureRetryable === false ||
629
+ effectiveError?.code === "AGENT_CONFIG_INVALID") {
630
+ attemptMeta.failureRetryable = false;
631
+ }
626
632
  if (aborted) {
627
633
  await waitForHeartbeatWriteDrain();
628
634
  await flushHeartbeat(true);
@@ -3,7 +3,6 @@ import { renderToStaticMarkup } from "react-dom/server";
3
3
  import { Effect, Exit } from "effect";
4
4
  import { buildOutputRow, describeSchemaShape, selectOutputRow, stripAutoColumns, validateExistingOutput, validateOutput, } from "@smithers-orchestrator/db/output";
5
5
  import { awaitApprovalDurableDeferred, awaitWaitForEventDurableDeferred, bridgeApprovalResolve, bridgeWaitForEventResolve, } from "./durable-deferred-bridge.js";
6
- import { EventBus } from "../events.js";
7
6
  import { buildHumanRequestId, getHumanTaskPrompt as getStoredHumanTaskPrompt, isHumanTaskMeta, } from "../human-requests.js";
8
7
  import { parseAttemptMetaJson } from "./bridge-utils.js";
9
8
  import { updateAsyncExternalWaitPending } from "@smithers-orchestrator/observability/metrics";
@@ -344,7 +344,7 @@ export async function applyDiffBundle(bundle, targetDir) {
344
344
  await runGit(targetDir, ["apply", "--binary", "--whitespace=nowarn", "--unsafe-paths", "-"], { input: fullPatch });
345
345
  return;
346
346
  }
347
- catch (error) {
347
+ catch {
348
348
  for (const patch of bundle.patches) {
349
349
  await applyPatchFallback(patch, targetDir);
350
350
  }
@@ -1,7 +1,5 @@
1
1
  import { Effect, Metric } from "effect";
2
- import { z } from "zod";
3
2
  import { buildOutputRow, stripAutoColumns, validateOutput } from "@smithers-orchestrator/db/output";
4
- import { EventBus } from "../events.js";
5
3
  import { makeAbortError, wireAbortSignal } from "./bridge-utils.js";
6
4
  import { logDebug, logError, logInfo } from "@smithers-orchestrator/observability/logging";
7
5
  import { attemptDuration, nodeDuration } from "@smithers-orchestrator/observability/metrics";
@@ -1,6 +1,4 @@
1
1
  import { Effect } from "effect";
2
- import { SmithersDb } from "@smithers-orchestrator/db/adapter";
3
- import { EventBus } from "../events.js";
4
2
  import { toSmithersError } from "@smithers-orchestrator/errors/toSmithersError";
5
3
  import { makeWorkerTask, } from "./entity-worker.js";
6
4
  import { executeTaskActivity, makeTaskBridgeKey, RetriableTaskFailure, } from "./activity-bridge.js";
package/src/engine.js CHANGED
@@ -5,7 +5,7 @@ import { SmithersCtx } from "@smithers-orchestrator/driver/SmithersCtx";
5
5
  import { loadInput, loadOutputs } from "@smithers-orchestrator/db/snapshot";
6
6
  import { ensureSmithersTables } from "@smithers-orchestrator/db/ensure";
7
7
  import { SmithersDb } from "@smithers-orchestrator/db/adapter";
8
- import { selectOutputRow, validateOutput, validateExistingOutput, getAgentOutputSchema, describeSchemaShape, buildOutputRow, stripAutoColumns, } from "@smithers-orchestrator/db/output";
8
+ import { selectOutputRow, validateOutput, validateExistingOutput, describeSchemaShape, buildOutputRow, stripAutoColumns, } from "@smithers-orchestrator/db/output";
9
9
  import { validateInput } from "@smithers-orchestrator/db/input";
10
10
  import { schemaSignature } from "@smithers-orchestrator/db/schema-signature";
11
11
  import { withSqliteWriteRetry } from "@smithers-orchestrator/db/write-retry";
@@ -23,7 +23,6 @@ import { EventBus } from "./events.js";
23
23
  import { getJjPointer, runJj, workspaceAdd } from "@smithers-orchestrator/vcs/jj";
24
24
  import { findVcsRoot } from "@smithers-orchestrator/vcs/find-root";
25
25
  import * as BunContext from "@effect/platform-bun/BunContext";
26
- import { z } from "zod";
27
26
  import { eq, getTableName } from "drizzle-orm";
28
27
  import { getTableColumns } from "drizzle-orm/utils";
29
28
  import { Chunk, Duration, Effect, Fiber, Metric, Queue, Schedule } from "effect";
@@ -448,16 +447,6 @@ function prependToolResumeWarningMessage(prompt, warningMessage) {
448
447
  }
449
448
  return `${warningMessage}\n\n${prompt}`;
450
449
  }
451
- /**
452
- * @param {HijackCompletion} completion
453
- * @returns {Error}
454
- */
455
- function buildHijackAbortError(completion) {
456
- const err = makeAbortError(`Hijack requested for ${completion.engine}`);
457
- err.code = "RUN_HIJACKED";
458
- err.hijack = completion;
459
- return err;
460
- }
461
450
  /**
462
451
  * @param {string} cwd
463
452
  * @param {string[]} args
@@ -1526,13 +1515,11 @@ function assertResumeDurabilityMetadata(existingRun, existingConfig, current, wo
1526
1515
  else if (shouldCheckWorkflowHashes) {
1527
1516
  compareNullableString(existingRun.workflowHash, current.entryWorkflowHash, "workflow entry file changed", mismatches);
1528
1517
  }
1529
- compareNullableString(existingRun.vcsType, current.vcsType, "VCS type changed", mismatches);
1530
1518
  if ((existingRun.vcsRoot && current.vcsRoot
1531
1519
  ? resolve(existingRun.vcsRoot) !== resolve(current.vcsRoot)
1532
1520
  : (existingRun.vcsRoot ?? null) !== (current.vcsRoot ?? null))) {
1533
1521
  mismatches.push("VCS root changed");
1534
1522
  }
1535
- compareNullableString(existingRun.vcsRevision, current.vcsRevision, "VCS revision changed", mismatches);
1536
1523
  if (mismatches.length > 0) {
1537
1524
  throw new SmithersError("RESUME_METADATA_MISMATCH", `Cannot resume run because durable metadata changed: ${mismatches.join(", ")}`, {
1538
1525
  existing: {
@@ -2031,8 +2018,15 @@ function isRetryableTaskFailure(attempt) {
2031
2018
  if (meta?.failureRetryable === false) {
2032
2019
  return false;
2033
2020
  }
2021
+ const errorCode = parseAttemptErrorCode(attempt?.errorJson);
2022
+ // AGENT_CONFIG_INVALID is a deterministic configuration failure (e.g.
2023
+ // "LLM not set", unknown model). Retrying is guaranteed to fail again
2024
+ // and just multiplies cost — short-circuit immediately.
2025
+ if (errorCode === "AGENT_CONFIG_INVALID") {
2026
+ return false;
2027
+ }
2034
2028
  const kind = typeof meta?.kind === "string" ? meta.kind : null;
2035
- return !(kind !== "agent" && parseAttemptErrorCode(attempt?.errorJson) === "INVALID_OUTPUT");
2029
+ return !(kind !== "agent" && errorCode === "INVALID_OUTPUT");
2036
2030
  }
2037
2031
  /**
2038
2032
  * @param {SmithersDb} adapter
@@ -2766,13 +2760,15 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
2766
2760
  effectiveAgent.model ??
2767
2761
  effectiveAgent.modelId ??
2768
2762
  null;
2769
- const currentAgentEngine = typeof effectiveAgent.cliEngine === "string"
2763
+ const hijackCapableEngine = typeof effectiveAgent.cliEngine === "string"
2770
2764
  ? effectiveAgent.cliEngine
2771
2765
  : typeof effectiveAgent.hijackEngine === "string"
2772
2766
  ? effectiveAgent.hijackEngine
2773
- : (typeof effectiveAgent.constructor?.name === "string"
2774
- ? effectiveAgent.constructor.name
2775
- : null);
2767
+ : null;
2768
+ const currentAgentEngine = hijackCapableEngine ??
2769
+ (typeof effectiveAgent.constructor?.name === "string"
2770
+ ? effectiveAgent.constructor.name
2771
+ : null);
2776
2772
  attemptMeta.agentEngine = currentAgentEngine;
2777
2773
  const heartbeatCheckpoint = previousHeartbeat &&
2778
2774
  typeof previousHeartbeat === "object" &&
@@ -2785,15 +2781,25 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
2785
2781
  const heartbeatCheckpointUsable = !currentAgentEngine ||
2786
2782
  !heartbeatCheckpointEngine ||
2787
2783
  heartbeatCheckpointEngine === currentAgentEngine;
2788
- const checkpointResumeSession = heartbeatCheckpointUsable &&
2789
- typeof heartbeatCheckpoint?.agentResume === "string"
2784
+ // If the most recent failed attempt asked us to drop the resume
2785
+ // session (e.g. kimi crashed mid-stream and reported `kimi -r
2786
+ // <uuid>`; that session is now corrupt and re-resuming it just
2787
+ // reproduces the crash), don't reuse the captured agentResume
2788
+ // from the heartbeat. Forces the agent to start a fresh
2789
+ // session on the next attempt.
2790
+ const lastFailedAttempt = attempts.find((a) => a.state === "failed");
2791
+ const lastFailedMeta = parseAttemptMetaJson(lastFailedAttempt?.metaJson);
2792
+ const discardResumeSession = lastFailedMeta?.discardResumeSession === true;
2793
+ const checkpointResumeSession = !discardResumeSession
2794
+ && heartbeatCheckpointUsable
2795
+ && typeof heartbeatCheckpoint?.agentResume === "string"
2790
2796
  ? heartbeatCheckpoint.agentResume
2791
2797
  : undefined;
2792
2798
  const checkpointResumeMessages = heartbeatCheckpointUsable
2793
2799
  ? asConversationMessages(heartbeatCheckpoint?.agentConversation)
2794
2800
  : undefined;
2795
- const priorContinuation = currentAgentEngine
2796
- ? findHijackContinuation(attempts, currentAgentEngine)
2801
+ const priorContinuation = hijackCapableEngine
2802
+ ? findHijackContinuation(attempts, hijackCapableEngine)
2797
2803
  : undefined;
2798
2804
  const resumeSession = priorContinuation?.mode === "native-cli"
2799
2805
  ? priorContinuation.resume
@@ -2803,6 +2809,37 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
2803
2809
  : (cloneJsonValue(checkpointResumeMessages) ??
2804
2810
  checkpointResumeMessages);
2805
2811
  const guidedResumeMessages = appendToolResumeWarningMessage(resumeMessages, toolResumeWarningMessage);
2812
+ if (desc.hijack) {
2813
+ if (!hijackCapableEngine) {
2814
+ attemptMeta.failureRetryable = false;
2815
+ throw new SmithersError("TASK_HIJACK_UNSUPPORTED", `Task ${desc.nodeId} sets hijack, but its agent is not hijack-capable. Hijack requires an agent with cliEngine or hijackEngine.`, {
2816
+ nodeId: desc.nodeId,
2817
+ agentId: attemptMeta.agentId ?? undefined,
2818
+ });
2819
+ }
2820
+ const shouldAutoHijack = desc.onHijackExit === "reopen" || !priorContinuation;
2821
+ if (shouldAutoHijack && !hijackState) {
2822
+ attemptMeta.failureRetryable = false;
2823
+ throw new SmithersError("TASK_HIJACK_UNSUPPORTED", `Task ${desc.nodeId} cannot auto-hijack in this execution mode.`, {
2824
+ nodeId: desc.nodeId,
2825
+ agentId: attemptMeta.agentId ?? undefined,
2826
+ });
2827
+ }
2828
+ if (shouldAutoHijack && !hijackState.request && !hijackState.completion) {
2829
+ const requestedAtMs = nowMs();
2830
+ hijackState.request = {
2831
+ requestedAtMs,
2832
+ target: hijackCapableEngine,
2833
+ };
2834
+ await Effect.runPromise(adapter.requestRunHijack(runId, requestedAtMs, hijackCapableEngine));
2835
+ await Effect.runPromise(eventBus.emitEventWithPersist({
2836
+ type: "RunHijackRequested",
2837
+ runId,
2838
+ target: hijackCapableEngine,
2839
+ timestampMs: requestedAtMs,
2840
+ }));
2841
+ }
2842
+ }
2806
2843
  if (resumeSession) {
2807
2844
  attemptMeta.resumedFromSession = resumeSession;
2808
2845
  }
@@ -2989,37 +3026,40 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
2989
3026
  // Use fallback agent on retry attempts when available
2990
3027
  let result;
2991
3028
  try {
2992
- result = await Effect.runPromise(withSmithersSpan(smithersSpanNames.agent, Effect.promise(() => {
2993
- const agentCall = guidedResumeMessages?.length
2994
- ? {
2995
- messages: guidedResumeMessages,
2996
- }
2997
- : {
2998
- prompt: effectivePrompt,
2999
- };
3000
- return effectiveAgent.generate({
3001
- options: undefined,
3002
- abortSignal: taskSignal,
3003
- ...agentCall,
3004
- resumeSession,
3005
- lastHeartbeat: previousHeartbeat,
3006
- rootDir: taskRoot,
3007
- maxOutputBytes: toolConfig.maxOutputBytes,
3008
- timeout: desc.timeoutMs
3009
- ? { totalMs: desc.timeoutMs }
3010
- : undefined,
3011
- onStdout: (text) => {
3012
- recordInternalHeartbeat();
3013
- emitOutput(text, "stdout");
3014
- },
3015
- onStderr: (text) => {
3016
- recordInternalHeartbeat();
3017
- emitOutput(text, "stderr");
3018
- },
3019
- onEvent: handleAgentEvent,
3020
- onStepFinish: handleSdkStepFinish,
3021
- outputSchema: desc.outputSchema,
3022
- });
3029
+ result = await Effect.runPromise(withSmithersSpan(smithersSpanNames.agent, Effect.tryPromise({
3030
+ try: () => {
3031
+ const agentCall = guidedResumeMessages?.length
3032
+ ? {
3033
+ messages: guidedResumeMessages,
3034
+ }
3035
+ : {
3036
+ prompt: effectivePrompt,
3037
+ };
3038
+ return effectiveAgent.generate({
3039
+ options: undefined,
3040
+ abortSignal: taskSignal,
3041
+ ...agentCall,
3042
+ resumeSession,
3043
+ lastHeartbeat: previousHeartbeat,
3044
+ rootDir: taskRoot,
3045
+ maxOutputBytes: toolConfig.maxOutputBytes,
3046
+ timeout: desc.timeoutMs
3047
+ ? { totalMs: desc.timeoutMs }
3048
+ : undefined,
3049
+ onStdout: (text) => {
3050
+ recordInternalHeartbeat();
3051
+ emitOutput(text, "stdout");
3052
+ },
3053
+ onStderr: (text) => {
3054
+ recordInternalHeartbeat();
3055
+ emitOutput(text, "stderr");
3056
+ },
3057
+ onEvent: handleAgentEvent,
3058
+ onStepFinish: handleSdkStepFinish,
3059
+ outputSchema: desc.outputSchema,
3060
+ });
3061
+ },
3062
+ catch: (error) => error,
3023
3063
  }), {
3024
3064
  ...taskSpanContext,
3025
3065
  agent: attemptMeta.agentId ??
@@ -3100,10 +3140,12 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
3100
3140
  // Fall back to parsing text/steps for JSON
3101
3141
  if (output === undefined) {
3102
3142
  const text = result.text ?? "";
3103
- // Try to parse the whole text as JSON first
3143
+ // Try to parse the whole text as JSON first. Strip a leading
3144
+ // UTF-8 BOM and accept either object or array at the root,
3145
+ // since Zod schemas occasionally validate arrays.
3104
3146
  try {
3105
- const trimmed = text.trim();
3106
- if (trimmed.startsWith("{")) {
3147
+ const trimmed = text.replace(/^\uFEFF/, "").trim();
3148
+ if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
3107
3149
  output = JSON.parse(trimmed);
3108
3150
  }
3109
3151
  }
@@ -3280,14 +3322,28 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
3280
3322
  const retryText = retryResult.text ?? "";
3281
3323
  responseText = retryText || responseText;
3282
3324
  try {
3283
- const trimmed = retryText.trim();
3284
- if (trimmed.startsWith("{")) {
3325
+ const trimmed = retryText.replace(/^\uFEFF/, "").trim();
3326
+ if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
3285
3327
  output = JSON.parse(trimmed);
3286
3328
  }
3287
3329
  }
3288
3330
  catch {
3289
3331
  // Still not valid JSON
3290
3332
  }
3333
+ if (output === undefined) {
3334
+ // Try extracting JSON from a markdown code fence
3335
+ // (```json ... ``` or just ``` ... ```).
3336
+ const fenceMatch = retryText.match(/```(?:json)?\s*([\s\S]*?)```/i);
3337
+ if (fenceMatch) {
3338
+ const inner = fenceMatch[1].trim();
3339
+ try {
3340
+ output = JSON.parse(inner);
3341
+ }
3342
+ catch {
3343
+ // Fall through to balanced extraction
3344
+ }
3345
+ }
3346
+ }
3291
3347
  if (output === undefined) {
3292
3348
  // Try extracting balanced JSON from retry text
3293
3349
  const jsonStr = extractBalancedJson(retryText);
@@ -3303,8 +3359,6 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
3303
3359
  }
3304
3360
  if (output === undefined) {
3305
3361
  // Debug: log what we have
3306
- const debugSteps = result.steps ?? [];
3307
- const stepTexts = debugSteps.map((s, i) => `Step ${i}: ${(s?.text ?? "").slice(0, 200)}`);
3308
3362
  const finishReason = result.finishReason ?? "unknown";
3309
3363
  logDebug("agent response did not contain valid JSON output", {
3310
3364
  runId,
@@ -3319,7 +3373,11 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
3319
3373
  lastStepText: debugSteps[debugSteps.length - 1]?.text?.slice(0, 500) ??
3320
3374
  "none",
3321
3375
  }, "engine:task-json");
3322
- throw new SmithersError("INVALID_OUTPUT", "No valid JSON output found in agent response");
3376
+ const tail = (text ?? "").slice(-200).replace(/\s+/g, " ").trim();
3377
+ const tailHint = tail
3378
+ ? ` Last 200 chars of response: ${JSON.stringify(tail)}`
3379
+ : " Agent returned an empty response.";
3380
+ throw new SmithersError("INVALID_OUTPUT", `No valid JSON output found in agent response (finishReason=${finishReason}, textLength=${text.length}).${tailHint}`);
3323
3381
  }
3324
3382
  }
3325
3383
  // Output should already be parsed, but handle string case
@@ -3327,7 +3385,7 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
3327
3385
  try {
3328
3386
  payload = JSON.parse(output);
3329
3387
  }
3330
- catch (e) {
3388
+ catch {
3331
3389
  throw new SmithersError("INVALID_OUTPUT", `Failed to parse agent output as JSON. Output starts with: "${output.slice(0, 100)}"`);
3332
3390
  }
3333
3391
  }
@@ -3679,6 +3737,30 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
3679
3737
  if (isHeartbeatPayloadValidationError(effectiveError)) {
3680
3738
  attemptMeta.failureRetryable = false;
3681
3739
  }
3740
+ // Allow agents (e.g. BaseCliAgent on "LLM not set") to flag a failure as
3741
+ // non-retryable via SmithersError details. Without this, the engine would
3742
+ // retry deterministic configuration errors up to desc.retries times.
3743
+ if (effectiveError &&
3744
+ typeof effectiveError === "object" &&
3745
+ // @ts-ignore — duck-type on SmithersError shape
3746
+ effectiveError.details &&
3747
+ // @ts-ignore
3748
+ effectiveError.details.failureRetryable === false) {
3749
+ attemptMeta.failureRetryable = false;
3750
+ }
3751
+ // Honour `discardResumeSession: true` from agent-side errors (e.g. kimi
3752
+ // session-loss). The next attempt's resumeSession resolution checks
3753
+ // attemptMeta.discardResumeSession on the most recent failed attempt
3754
+ // and clears the captured agentResume so the agent starts fresh
3755
+ // instead of redundantly trying to resume a corrupt session.
3756
+ if (effectiveError &&
3757
+ typeof effectiveError === "object" &&
3758
+ // @ts-ignore — duck-type on SmithersError shape
3759
+ effectiveError.details &&
3760
+ // @ts-ignore
3761
+ effectiveError.details.discardResumeSession === true) {
3762
+ attemptMeta.discardResumeSession = true;
3763
+ }
3682
3764
  if (!heartbeatTimeoutError && (taskSignal.aborted || isAbortError(err))) {
3683
3765
  await waitForHeartbeatWriteDrain();
3684
3766
  await flushHeartbeat(true);
package/src/hot/watch.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { watch } from "node:fs";
2
- import { readdir, stat } from "node:fs/promises";
3
- import { resolve, relative } from "node:path";
2
+ import { readdir } from "node:fs/promises";
3
+ import { resolve } from "node:path";
4
4
  import { Effect } from "effect";
5
5
  import { toSmithersError } from "@smithers-orchestrator/errors/toSmithersError";
6
6
  import { logDebug, logInfo } from "@smithers-orchestrator/observability/logging";
package/src/signals.js CHANGED
@@ -1,5 +1,4 @@
1
1
  import { Effect } from "effect";
2
- import { SmithersDb } from "@smithers-orchestrator/db/adapter";
3
2
  import { bridgeSignalResolve } from "./effect/durable-deferred-bridge.js";
4
3
  import { SmithersError } from "@smithers-orchestrator/errors/SmithersError";
5
4
  import { nowMs } from "@smithers-orchestrator/scheduler/nowMs";