@smithers-orchestrator/engine 0.16.8 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +25 -16
- package/src/approvals.js +0 -1
- package/src/effect/builder.js +3 -3
- package/src/effect/compute-task-bridge.js +8 -2
- package/src/effect/deferred-state-bridge.js +0 -1
- package/src/effect/diff-bundle.js +1 -1
- package/src/effect/static-task-bridge.js +0 -2
- package/src/effect/workflow-bridge.js +0 -2
- package/src/engine.js +145 -63
- package/src/hot/watch.js +2 -2
- package/src/signals.js +0 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@smithers-orchestrator/engine",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.17.0",
|
|
4
4
|
"description": "Concrete Smithers workflow execution engine",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -20,27 +20,36 @@
|
|
|
20
20
|
"src/"
|
|
21
21
|
],
|
|
22
22
|
"dependencies": {
|
|
23
|
+
"@effect/cluster": "^0.58.0",
|
|
24
|
+
"@effect/experimental": "^0.60.0",
|
|
23
25
|
"@effect/platform-bun": "^0.89.0",
|
|
26
|
+
"@effect/rpc": "^0.75.0",
|
|
27
|
+
"@effect/sql": "^0.51.0",
|
|
24
28
|
"@effect/sql-sqlite-bun": "^0.52.0",
|
|
29
|
+
"@effect/workflow": "^0.18.0",
|
|
30
|
+
"diff": "^9.0.0",
|
|
31
|
+
"drizzle-orm": "^0.45.2",
|
|
25
32
|
"effect": "^3.21.1",
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"@smithers-orchestrator/
|
|
30
|
-
"@smithers-orchestrator/
|
|
31
|
-
"@smithers-orchestrator/
|
|
32
|
-
"@smithers-orchestrator/
|
|
33
|
-
"@smithers-orchestrator/
|
|
34
|
-
"@smithers-orchestrator/
|
|
35
|
-
"@smithers-orchestrator/
|
|
36
|
-
"@smithers-orchestrator/
|
|
37
|
-
"@smithers-orchestrator/
|
|
38
|
-
"@smithers-orchestrator/
|
|
39
|
-
"@smithers-orchestrator/
|
|
33
|
+
"react": "^19.2.5",
|
|
34
|
+
"react-dom": "^19.2.5",
|
|
35
|
+
"zod": "^4.3.6",
|
|
36
|
+
"@smithers-orchestrator/agents": "0.17.0",
|
|
37
|
+
"@smithers-orchestrator/components": "0.17.0",
|
|
38
|
+
"@smithers-orchestrator/driver": "0.17.0",
|
|
39
|
+
"@smithers-orchestrator/db": "0.17.0",
|
|
40
|
+
"@smithers-orchestrator/errors": "0.17.0",
|
|
41
|
+
"@smithers-orchestrator/graph": "0.17.0",
|
|
42
|
+
"@smithers-orchestrator/memory": "0.17.0",
|
|
43
|
+
"@smithers-orchestrator/react-reconciler": "0.17.0",
|
|
44
|
+
"@smithers-orchestrator/observability": "0.17.0",
|
|
45
|
+
"@smithers-orchestrator/sandbox": "0.17.0",
|
|
46
|
+
"@smithers-orchestrator/scheduler": "0.17.0",
|
|
47
|
+
"@smithers-orchestrator/scorers": "0.17.0",
|
|
48
|
+
"@smithers-orchestrator/time-travel": "0.17.0",
|
|
49
|
+
"@smithers-orchestrator/vcs": "0.17.0"
|
|
40
50
|
},
|
|
41
51
|
"devDependencies": {
|
|
42
52
|
"@types/bun": "latest",
|
|
43
|
-
"react": "^19.2.5",
|
|
44
53
|
"typescript": "~5.9.3"
|
|
45
54
|
},
|
|
46
55
|
"scripts": {
|
package/src/approvals.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { Effect, Metric } from "effect";
|
|
2
2
|
import { nowMs } from "@smithers-orchestrator/scheduler/nowMs";
|
|
3
|
-
import { SmithersDb } from "@smithers-orchestrator/db/adapter";
|
|
4
3
|
import { approvalWaitDuration, trackEvent, updateAsyncExternalWaitPending, } from "@smithers-orchestrator/observability/metrics";
|
|
5
4
|
import { bridgeApprovalResolve } from "./effect/durable-deferred-bridge.js";
|
|
6
5
|
import { SmithersError } from "@smithers-orchestrator/errors/SmithersError";
|
package/src/effect/builder.js
CHANGED
|
@@ -298,7 +298,7 @@ function resolveHandleIteration(handle, ctx) {
|
|
|
298
298
|
* @param {Record<string, unknown>} row
|
|
299
299
|
*/
|
|
300
300
|
function stripPersistedKeys(row) {
|
|
301
|
-
const { runId, nodeId, iteration, payload, ...rest } = row;
|
|
301
|
+
const { runId: _runId, nodeId: _nodeId, iteration: _iteration, payload, ...rest } = row;
|
|
302
302
|
if (payload !== undefined)
|
|
303
303
|
return payload;
|
|
304
304
|
return rest;
|
|
@@ -754,7 +754,7 @@ function normalizeExecutionError(result) {
|
|
|
754
754
|
/**
|
|
755
755
|
* @param {{ name: string; input: AnySchema }} options
|
|
756
756
|
*/
|
|
757
|
-
function
|
|
757
|
+
function _createWorkflow(options) {
|
|
758
758
|
return {
|
|
759
759
|
/**
|
|
760
760
|
* @param {($: BuilderApi) => BuilderNode} buildGraph
|
|
@@ -804,7 +804,7 @@ function createWorkflow(options) {
|
|
|
804
804
|
/**
|
|
805
805
|
* @param {{ name: string; params?: Record<string, unknown> }} options
|
|
806
806
|
*/
|
|
807
|
-
function
|
|
807
|
+
function _createComponent(options) {
|
|
808
808
|
return {
|
|
809
809
|
/**
|
|
810
810
|
* @param {($: BuilderApi, params: Record<string, unknown>) => BuilderNode} buildGraph
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
import { Cause, Duration, Effect, Either, Exit, Metric, Schedule } from "effect";
|
|
2
|
-
import { z } from "zod";
|
|
3
2
|
import { buildOutputRow, stripAutoColumns, validateOutput } from "@smithers-orchestrator/db/output";
|
|
4
3
|
import { TaskHeartbeatTimeout } from "@smithers-orchestrator/errors/TaskHeartbeatTimeout";
|
|
5
4
|
import { TaskTimeout } from "@smithers-orchestrator/errors/TaskTimeout";
|
|
6
|
-
import { EventBus } from "../events.js";
|
|
7
5
|
import { makeAbortError, wireAbortSignal } from "./bridge-utils.js";
|
|
8
6
|
import { withTaskRuntime } from "@smithers-orchestrator/driver/task-runtime";
|
|
9
7
|
import { logDebug, logError, logInfo, logWarning } from "@smithers-orchestrator/observability/logging";
|
|
@@ -623,6 +621,14 @@ export const executeComputeTaskBridge = async (adapter, db, runId, desc, eventBu
|
|
|
623
621
|
if (isHeartbeatPayloadValidationError(effectiveError)) {
|
|
624
622
|
attemptMeta.failureRetryable = false;
|
|
625
623
|
}
|
|
624
|
+
// Propagate non-retryable signal from any thrown SmithersError so the
|
|
625
|
+
// attempt is not retried (e.g. AGENT_CONFIG_INVALID from KimiAgent's
|
|
626
|
+
// expired-credentials check, or auth-failure patterns classified by
|
|
627
|
+
// BaseCliAgent.classifyNonRetryableAgentError).
|
|
628
|
+
if (effectiveError?.details?.failureRetryable === false ||
|
|
629
|
+
effectiveError?.code === "AGENT_CONFIG_INVALID") {
|
|
630
|
+
attemptMeta.failureRetryable = false;
|
|
631
|
+
}
|
|
626
632
|
if (aborted) {
|
|
627
633
|
await waitForHeartbeatWriteDrain();
|
|
628
634
|
await flushHeartbeat(true);
|
|
@@ -3,7 +3,6 @@ import { renderToStaticMarkup } from "react-dom/server";
|
|
|
3
3
|
import { Effect, Exit } from "effect";
|
|
4
4
|
import { buildOutputRow, describeSchemaShape, selectOutputRow, stripAutoColumns, validateExistingOutput, validateOutput, } from "@smithers-orchestrator/db/output";
|
|
5
5
|
import { awaitApprovalDurableDeferred, awaitWaitForEventDurableDeferred, bridgeApprovalResolve, bridgeWaitForEventResolve, } from "./durable-deferred-bridge.js";
|
|
6
|
-
import { EventBus } from "../events.js";
|
|
7
6
|
import { buildHumanRequestId, getHumanTaskPrompt as getStoredHumanTaskPrompt, isHumanTaskMeta, } from "../human-requests.js";
|
|
8
7
|
import { parseAttemptMetaJson } from "./bridge-utils.js";
|
|
9
8
|
import { updateAsyncExternalWaitPending } from "@smithers-orchestrator/observability/metrics";
|
|
@@ -344,7 +344,7 @@ export async function applyDiffBundle(bundle, targetDir) {
|
|
|
344
344
|
await runGit(targetDir, ["apply", "--binary", "--whitespace=nowarn", "--unsafe-paths", "-"], { input: fullPatch });
|
|
345
345
|
return;
|
|
346
346
|
}
|
|
347
|
-
catch
|
|
347
|
+
catch {
|
|
348
348
|
for (const patch of bundle.patches) {
|
|
349
349
|
await applyPatchFallback(patch, targetDir);
|
|
350
350
|
}
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import { Effect, Metric } from "effect";
|
|
2
|
-
import { z } from "zod";
|
|
3
2
|
import { buildOutputRow, stripAutoColumns, validateOutput } from "@smithers-orchestrator/db/output";
|
|
4
|
-
import { EventBus } from "../events.js";
|
|
5
3
|
import { makeAbortError, wireAbortSignal } from "./bridge-utils.js";
|
|
6
4
|
import { logDebug, logError, logInfo } from "@smithers-orchestrator/observability/logging";
|
|
7
5
|
import { attemptDuration, nodeDuration } from "@smithers-orchestrator/observability/metrics";
|
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
import { Effect } from "effect";
|
|
2
|
-
import { SmithersDb } from "@smithers-orchestrator/db/adapter";
|
|
3
|
-
import { EventBus } from "../events.js";
|
|
4
2
|
import { toSmithersError } from "@smithers-orchestrator/errors/toSmithersError";
|
|
5
3
|
import { makeWorkerTask, } from "./entity-worker.js";
|
|
6
4
|
import { executeTaskActivity, makeTaskBridgeKey, RetriableTaskFailure, } from "./activity-bridge.js";
|
package/src/engine.js
CHANGED
|
@@ -5,7 +5,7 @@ import { SmithersCtx } from "@smithers-orchestrator/driver/SmithersCtx";
|
|
|
5
5
|
import { loadInput, loadOutputs } from "@smithers-orchestrator/db/snapshot";
|
|
6
6
|
import { ensureSmithersTables } from "@smithers-orchestrator/db/ensure";
|
|
7
7
|
import { SmithersDb } from "@smithers-orchestrator/db/adapter";
|
|
8
|
-
import { selectOutputRow, validateOutput, validateExistingOutput,
|
|
8
|
+
import { selectOutputRow, validateOutput, validateExistingOutput, describeSchemaShape, buildOutputRow, stripAutoColumns, } from "@smithers-orchestrator/db/output";
|
|
9
9
|
import { validateInput } from "@smithers-orchestrator/db/input";
|
|
10
10
|
import { schemaSignature } from "@smithers-orchestrator/db/schema-signature";
|
|
11
11
|
import { withSqliteWriteRetry } from "@smithers-orchestrator/db/write-retry";
|
|
@@ -23,7 +23,6 @@ import { EventBus } from "./events.js";
|
|
|
23
23
|
import { getJjPointer, runJj, workspaceAdd } from "@smithers-orchestrator/vcs/jj";
|
|
24
24
|
import { findVcsRoot } from "@smithers-orchestrator/vcs/find-root";
|
|
25
25
|
import * as BunContext from "@effect/platform-bun/BunContext";
|
|
26
|
-
import { z } from "zod";
|
|
27
26
|
import { eq, getTableName } from "drizzle-orm";
|
|
28
27
|
import { getTableColumns } from "drizzle-orm/utils";
|
|
29
28
|
import { Chunk, Duration, Effect, Fiber, Metric, Queue, Schedule } from "effect";
|
|
@@ -448,16 +447,6 @@ function prependToolResumeWarningMessage(prompt, warningMessage) {
|
|
|
448
447
|
}
|
|
449
448
|
return `${warningMessage}\n\n${prompt}`;
|
|
450
449
|
}
|
|
451
|
-
/**
|
|
452
|
-
* @param {HijackCompletion} completion
|
|
453
|
-
* @returns {Error}
|
|
454
|
-
*/
|
|
455
|
-
function buildHijackAbortError(completion) {
|
|
456
|
-
const err = makeAbortError(`Hijack requested for ${completion.engine}`);
|
|
457
|
-
err.code = "RUN_HIJACKED";
|
|
458
|
-
err.hijack = completion;
|
|
459
|
-
return err;
|
|
460
|
-
}
|
|
461
450
|
/**
|
|
462
451
|
* @param {string} cwd
|
|
463
452
|
* @param {string[]} args
|
|
@@ -1526,13 +1515,11 @@ function assertResumeDurabilityMetadata(existingRun, existingConfig, current, wo
|
|
|
1526
1515
|
else if (shouldCheckWorkflowHashes) {
|
|
1527
1516
|
compareNullableString(existingRun.workflowHash, current.entryWorkflowHash, "workflow entry file changed", mismatches);
|
|
1528
1517
|
}
|
|
1529
|
-
compareNullableString(existingRun.vcsType, current.vcsType, "VCS type changed", mismatches);
|
|
1530
1518
|
if ((existingRun.vcsRoot && current.vcsRoot
|
|
1531
1519
|
? resolve(existingRun.vcsRoot) !== resolve(current.vcsRoot)
|
|
1532
1520
|
: (existingRun.vcsRoot ?? null) !== (current.vcsRoot ?? null))) {
|
|
1533
1521
|
mismatches.push("VCS root changed");
|
|
1534
1522
|
}
|
|
1535
|
-
compareNullableString(existingRun.vcsRevision, current.vcsRevision, "VCS revision changed", mismatches);
|
|
1536
1523
|
if (mismatches.length > 0) {
|
|
1537
1524
|
throw new SmithersError("RESUME_METADATA_MISMATCH", `Cannot resume run because durable metadata changed: ${mismatches.join(", ")}`, {
|
|
1538
1525
|
existing: {
|
|
@@ -2031,8 +2018,15 @@ function isRetryableTaskFailure(attempt) {
|
|
|
2031
2018
|
if (meta?.failureRetryable === false) {
|
|
2032
2019
|
return false;
|
|
2033
2020
|
}
|
|
2021
|
+
const errorCode = parseAttemptErrorCode(attempt?.errorJson);
|
|
2022
|
+
// AGENT_CONFIG_INVALID is a deterministic configuration failure (e.g.
|
|
2023
|
+
// "LLM not set", unknown model). Retrying is guaranteed to fail again
|
|
2024
|
+
// and just multiplies cost — short-circuit immediately.
|
|
2025
|
+
if (errorCode === "AGENT_CONFIG_INVALID") {
|
|
2026
|
+
return false;
|
|
2027
|
+
}
|
|
2034
2028
|
const kind = typeof meta?.kind === "string" ? meta.kind : null;
|
|
2035
|
-
return !(kind !== "agent" &&
|
|
2029
|
+
return !(kind !== "agent" && errorCode === "INVALID_OUTPUT");
|
|
2036
2030
|
}
|
|
2037
2031
|
/**
|
|
2038
2032
|
* @param {SmithersDb} adapter
|
|
@@ -2766,13 +2760,15 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
2766
2760
|
effectiveAgent.model ??
|
|
2767
2761
|
effectiveAgent.modelId ??
|
|
2768
2762
|
null;
|
|
2769
|
-
const
|
|
2763
|
+
const hijackCapableEngine = typeof effectiveAgent.cliEngine === "string"
|
|
2770
2764
|
? effectiveAgent.cliEngine
|
|
2771
2765
|
: typeof effectiveAgent.hijackEngine === "string"
|
|
2772
2766
|
? effectiveAgent.hijackEngine
|
|
2773
|
-
:
|
|
2774
|
-
|
|
2775
|
-
|
|
2767
|
+
: null;
|
|
2768
|
+
const currentAgentEngine = hijackCapableEngine ??
|
|
2769
|
+
(typeof effectiveAgent.constructor?.name === "string"
|
|
2770
|
+
? effectiveAgent.constructor.name
|
|
2771
|
+
: null);
|
|
2776
2772
|
attemptMeta.agentEngine = currentAgentEngine;
|
|
2777
2773
|
const heartbeatCheckpoint = previousHeartbeat &&
|
|
2778
2774
|
typeof previousHeartbeat === "object" &&
|
|
@@ -2785,15 +2781,25 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
2785
2781
|
const heartbeatCheckpointUsable = !currentAgentEngine ||
|
|
2786
2782
|
!heartbeatCheckpointEngine ||
|
|
2787
2783
|
heartbeatCheckpointEngine === currentAgentEngine;
|
|
2788
|
-
|
|
2789
|
-
|
|
2784
|
+
// If the most recent failed attempt asked us to drop the resume
|
|
2785
|
+
// session (e.g. kimi crashed mid-stream and reported `kimi -r
|
|
2786
|
+
// <uuid>`; that session is now corrupt and re-resuming it just
|
|
2787
|
+
// reproduces the crash), don't reuse the captured agentResume
|
|
2788
|
+
// from the heartbeat. Forces the agent to start a fresh
|
|
2789
|
+
// session on the next attempt.
|
|
2790
|
+
const lastFailedAttempt = attempts.find((a) => a.state === "failed");
|
|
2791
|
+
const lastFailedMeta = parseAttemptMetaJson(lastFailedAttempt?.metaJson);
|
|
2792
|
+
const discardResumeSession = lastFailedMeta?.discardResumeSession === true;
|
|
2793
|
+
const checkpointResumeSession = !discardResumeSession
|
|
2794
|
+
&& heartbeatCheckpointUsable
|
|
2795
|
+
&& typeof heartbeatCheckpoint?.agentResume === "string"
|
|
2790
2796
|
? heartbeatCheckpoint.agentResume
|
|
2791
2797
|
: undefined;
|
|
2792
2798
|
const checkpointResumeMessages = heartbeatCheckpointUsable
|
|
2793
2799
|
? asConversationMessages(heartbeatCheckpoint?.agentConversation)
|
|
2794
2800
|
: undefined;
|
|
2795
|
-
const priorContinuation =
|
|
2796
|
-
? findHijackContinuation(attempts,
|
|
2801
|
+
const priorContinuation = hijackCapableEngine
|
|
2802
|
+
? findHijackContinuation(attempts, hijackCapableEngine)
|
|
2797
2803
|
: undefined;
|
|
2798
2804
|
const resumeSession = priorContinuation?.mode === "native-cli"
|
|
2799
2805
|
? priorContinuation.resume
|
|
@@ -2803,6 +2809,37 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
2803
2809
|
: (cloneJsonValue(checkpointResumeMessages) ??
|
|
2804
2810
|
checkpointResumeMessages);
|
|
2805
2811
|
const guidedResumeMessages = appendToolResumeWarningMessage(resumeMessages, toolResumeWarningMessage);
|
|
2812
|
+
if (desc.hijack) {
|
|
2813
|
+
if (!hijackCapableEngine) {
|
|
2814
|
+
attemptMeta.failureRetryable = false;
|
|
2815
|
+
throw new SmithersError("TASK_HIJACK_UNSUPPORTED", `Task ${desc.nodeId} sets hijack, but its agent is not hijack-capable. Hijack requires an agent with cliEngine or hijackEngine.`, {
|
|
2816
|
+
nodeId: desc.nodeId,
|
|
2817
|
+
agentId: attemptMeta.agentId ?? undefined,
|
|
2818
|
+
});
|
|
2819
|
+
}
|
|
2820
|
+
const shouldAutoHijack = desc.onHijackExit === "reopen" || !priorContinuation;
|
|
2821
|
+
if (shouldAutoHijack && !hijackState) {
|
|
2822
|
+
attemptMeta.failureRetryable = false;
|
|
2823
|
+
throw new SmithersError("TASK_HIJACK_UNSUPPORTED", `Task ${desc.nodeId} cannot auto-hijack in this execution mode.`, {
|
|
2824
|
+
nodeId: desc.nodeId,
|
|
2825
|
+
agentId: attemptMeta.agentId ?? undefined,
|
|
2826
|
+
});
|
|
2827
|
+
}
|
|
2828
|
+
if (shouldAutoHijack && !hijackState.request && !hijackState.completion) {
|
|
2829
|
+
const requestedAtMs = nowMs();
|
|
2830
|
+
hijackState.request = {
|
|
2831
|
+
requestedAtMs,
|
|
2832
|
+
target: hijackCapableEngine,
|
|
2833
|
+
};
|
|
2834
|
+
await Effect.runPromise(adapter.requestRunHijack(runId, requestedAtMs, hijackCapableEngine));
|
|
2835
|
+
await Effect.runPromise(eventBus.emitEventWithPersist({
|
|
2836
|
+
type: "RunHijackRequested",
|
|
2837
|
+
runId,
|
|
2838
|
+
target: hijackCapableEngine,
|
|
2839
|
+
timestampMs: requestedAtMs,
|
|
2840
|
+
}));
|
|
2841
|
+
}
|
|
2842
|
+
}
|
|
2806
2843
|
if (resumeSession) {
|
|
2807
2844
|
attemptMeta.resumedFromSession = resumeSession;
|
|
2808
2845
|
}
|
|
@@ -2989,37 +3026,40 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
2989
3026
|
// Use fallback agent on retry attempts when available
|
|
2990
3027
|
let result;
|
|
2991
3028
|
try {
|
|
2992
|
-
result = await Effect.runPromise(withSmithersSpan(smithersSpanNames.agent, Effect.
|
|
2993
|
-
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
|
|
3002
|
-
|
|
3003
|
-
|
|
3004
|
-
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
|
|
3017
|
-
|
|
3018
|
-
|
|
3019
|
-
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3029
|
+
result = await Effect.runPromise(withSmithersSpan(smithersSpanNames.agent, Effect.tryPromise({
|
|
3030
|
+
try: () => {
|
|
3031
|
+
const agentCall = guidedResumeMessages?.length
|
|
3032
|
+
? {
|
|
3033
|
+
messages: guidedResumeMessages,
|
|
3034
|
+
}
|
|
3035
|
+
: {
|
|
3036
|
+
prompt: effectivePrompt,
|
|
3037
|
+
};
|
|
3038
|
+
return effectiveAgent.generate({
|
|
3039
|
+
options: undefined,
|
|
3040
|
+
abortSignal: taskSignal,
|
|
3041
|
+
...agentCall,
|
|
3042
|
+
resumeSession,
|
|
3043
|
+
lastHeartbeat: previousHeartbeat,
|
|
3044
|
+
rootDir: taskRoot,
|
|
3045
|
+
maxOutputBytes: toolConfig.maxOutputBytes,
|
|
3046
|
+
timeout: desc.timeoutMs
|
|
3047
|
+
? { totalMs: desc.timeoutMs }
|
|
3048
|
+
: undefined,
|
|
3049
|
+
onStdout: (text) => {
|
|
3050
|
+
recordInternalHeartbeat();
|
|
3051
|
+
emitOutput(text, "stdout");
|
|
3052
|
+
},
|
|
3053
|
+
onStderr: (text) => {
|
|
3054
|
+
recordInternalHeartbeat();
|
|
3055
|
+
emitOutput(text, "stderr");
|
|
3056
|
+
},
|
|
3057
|
+
onEvent: handleAgentEvent,
|
|
3058
|
+
onStepFinish: handleSdkStepFinish,
|
|
3059
|
+
outputSchema: desc.outputSchema,
|
|
3060
|
+
});
|
|
3061
|
+
},
|
|
3062
|
+
catch: (error) => error,
|
|
3023
3063
|
}), {
|
|
3024
3064
|
...taskSpanContext,
|
|
3025
3065
|
agent: attemptMeta.agentId ??
|
|
@@ -3100,10 +3140,12 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
3100
3140
|
// Fall back to parsing text/steps for JSON
|
|
3101
3141
|
if (output === undefined) {
|
|
3102
3142
|
const text = result.text ?? "";
|
|
3103
|
-
// Try to parse the whole text as JSON first
|
|
3143
|
+
// Try to parse the whole text as JSON first. Strip a leading
|
|
3144
|
+
// UTF-8 BOM and accept either object or array at the root,
|
|
3145
|
+
// since Zod schemas occasionally validate arrays.
|
|
3104
3146
|
try {
|
|
3105
|
-
const trimmed = text.trim();
|
|
3106
|
-
if (trimmed.startsWith("{")) {
|
|
3147
|
+
const trimmed = text.replace(/^\uFEFF/, "").trim();
|
|
3148
|
+
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
|
|
3107
3149
|
output = JSON.parse(trimmed);
|
|
3108
3150
|
}
|
|
3109
3151
|
}
|
|
@@ -3280,14 +3322,28 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
3280
3322
|
const retryText = retryResult.text ?? "";
|
|
3281
3323
|
responseText = retryText || responseText;
|
|
3282
3324
|
try {
|
|
3283
|
-
const trimmed = retryText.trim();
|
|
3284
|
-
if (trimmed.startsWith("{")) {
|
|
3325
|
+
const trimmed = retryText.replace(/^\uFEFF/, "").trim();
|
|
3326
|
+
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
|
|
3285
3327
|
output = JSON.parse(trimmed);
|
|
3286
3328
|
}
|
|
3287
3329
|
}
|
|
3288
3330
|
catch {
|
|
3289
3331
|
// Still not valid JSON
|
|
3290
3332
|
}
|
|
3333
|
+
if (output === undefined) {
|
|
3334
|
+
// Try extracting JSON from a markdown code fence
|
|
3335
|
+
// (```json ... ``` or just ``` ... ```).
|
|
3336
|
+
const fenceMatch = retryText.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
|
3337
|
+
if (fenceMatch) {
|
|
3338
|
+
const inner = fenceMatch[1].trim();
|
|
3339
|
+
try {
|
|
3340
|
+
output = JSON.parse(inner);
|
|
3341
|
+
}
|
|
3342
|
+
catch {
|
|
3343
|
+
// Fall through to balanced extraction
|
|
3344
|
+
}
|
|
3345
|
+
}
|
|
3346
|
+
}
|
|
3291
3347
|
if (output === undefined) {
|
|
3292
3348
|
// Try extracting balanced JSON from retry text
|
|
3293
3349
|
const jsonStr = extractBalancedJson(retryText);
|
|
@@ -3303,8 +3359,6 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
3303
3359
|
}
|
|
3304
3360
|
if (output === undefined) {
|
|
3305
3361
|
// Debug: log what we have
|
|
3306
|
-
const debugSteps = result.steps ?? [];
|
|
3307
|
-
const stepTexts = debugSteps.map((s, i) => `Step ${i}: ${(s?.text ?? "").slice(0, 200)}`);
|
|
3308
3362
|
const finishReason = result.finishReason ?? "unknown";
|
|
3309
3363
|
logDebug("agent response did not contain valid JSON output", {
|
|
3310
3364
|
runId,
|
|
@@ -3319,7 +3373,11 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
3319
3373
|
lastStepText: debugSteps[debugSteps.length - 1]?.text?.slice(0, 500) ??
|
|
3320
3374
|
"none",
|
|
3321
3375
|
}, "engine:task-json");
|
|
3322
|
-
|
|
3376
|
+
const tail = (text ?? "").slice(-200).replace(/\s+/g, " ").trim();
|
|
3377
|
+
const tailHint = tail
|
|
3378
|
+
? ` Last 200 chars of response: ${JSON.stringify(tail)}`
|
|
3379
|
+
: " Agent returned an empty response.";
|
|
3380
|
+
throw new SmithersError("INVALID_OUTPUT", `No valid JSON output found in agent response (finishReason=${finishReason}, textLength=${text.length}).${tailHint}`);
|
|
3323
3381
|
}
|
|
3324
3382
|
}
|
|
3325
3383
|
// Output should already be parsed, but handle string case
|
|
@@ -3327,7 +3385,7 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
3327
3385
|
try {
|
|
3328
3386
|
payload = JSON.parse(output);
|
|
3329
3387
|
}
|
|
3330
|
-
catch
|
|
3388
|
+
catch {
|
|
3331
3389
|
throw new SmithersError("INVALID_OUTPUT", `Failed to parse agent output as JSON. Output starts with: "${output.slice(0, 100)}"`);
|
|
3332
3390
|
}
|
|
3333
3391
|
}
|
|
@@ -3679,6 +3737,30 @@ async function legacyExecuteTask(adapter, db, runId, desc, descriptorMap, inputT
|
|
|
3679
3737
|
if (isHeartbeatPayloadValidationError(effectiveError)) {
|
|
3680
3738
|
attemptMeta.failureRetryable = false;
|
|
3681
3739
|
}
|
|
3740
|
+
// Allow agents (e.g. BaseCliAgent on "LLM not set") to flag a failure as
|
|
3741
|
+
// non-retryable via SmithersError details. Without this, the engine would
|
|
3742
|
+
// retry deterministic configuration errors up to desc.retries times.
|
|
3743
|
+
if (effectiveError &&
|
|
3744
|
+
typeof effectiveError === "object" &&
|
|
3745
|
+
// @ts-ignore — duck-type on SmithersError shape
|
|
3746
|
+
effectiveError.details &&
|
|
3747
|
+
// @ts-ignore
|
|
3748
|
+
effectiveError.details.failureRetryable === false) {
|
|
3749
|
+
attemptMeta.failureRetryable = false;
|
|
3750
|
+
}
|
|
3751
|
+
// Honour `discardResumeSession: true` from agent-side errors (e.g. kimi
|
|
3752
|
+
// session-loss). The next attempt's resumeSession resolution checks
|
|
3753
|
+
// attemptMeta.discardResumeSession on the most recent failed attempt
|
|
3754
|
+
// and clears the captured agentResume so the agent starts fresh
|
|
3755
|
+
// instead of redundantly trying to resume a corrupt session.
|
|
3756
|
+
if (effectiveError &&
|
|
3757
|
+
typeof effectiveError === "object" &&
|
|
3758
|
+
// @ts-ignore — duck-type on SmithersError shape
|
|
3759
|
+
effectiveError.details &&
|
|
3760
|
+
// @ts-ignore
|
|
3761
|
+
effectiveError.details.discardResumeSession === true) {
|
|
3762
|
+
attemptMeta.discardResumeSession = true;
|
|
3763
|
+
}
|
|
3682
3764
|
if (!heartbeatTimeoutError && (taskSignal.aborted || isAbortError(err))) {
|
|
3683
3765
|
await waitForHeartbeatWriteDrain();
|
|
3684
3766
|
await flushHeartbeat(true);
|
package/src/hot/watch.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { watch } from "node:fs";
|
|
2
|
-
import { readdir
|
|
3
|
-
import { resolve
|
|
2
|
+
import { readdir } from "node:fs/promises";
|
|
3
|
+
import { resolve } from "node:path";
|
|
4
4
|
import { Effect } from "effect";
|
|
5
5
|
import { toSmithersError } from "@smithers-orchestrator/errors/toSmithersError";
|
|
6
6
|
import { logDebug, logInfo } from "@smithers-orchestrator/observability/logging";
|
package/src/signals.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { Effect } from "effect";
|
|
2
|
-
import { SmithersDb } from "@smithers-orchestrator/db/adapter";
|
|
3
2
|
import { bridgeSignalResolve } from "./effect/durable-deferred-bridge.js";
|
|
4
3
|
import { SmithersError } from "@smithers-orchestrator/errors/SmithersError";
|
|
5
4
|
import { nowMs } from "@smithers-orchestrator/scheduler/nowMs";
|