zeitlich 0.2.37 → 0.2.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{activities-Bb-nAjwQ.d.ts → activities-BKhMtKDd.d.ts} +4 -2
- package/dist/{activities-vkI4_3CC.d.cts → activities-CDcwkRZs.d.cts} +4 -2
- package/dist/adapters/sandbox/bedrock/index.cjs +3 -3
- package/dist/adapters/sandbox/bedrock/index.cjs.map +1 -1
- package/dist/adapters/sandbox/bedrock/index.d.cts +6 -6
- package/dist/adapters/sandbox/bedrock/index.d.ts +6 -6
- package/dist/adapters/sandbox/bedrock/index.js +3 -3
- package/dist/adapters/sandbox/bedrock/index.js.map +1 -1
- package/dist/adapters/sandbox/bedrock/workflow.d.cts +2 -2
- package/dist/adapters/sandbox/bedrock/workflow.d.ts +2 -2
- package/dist/adapters/sandbox/daytona/index.cjs +3 -3
- package/dist/adapters/sandbox/daytona/index.cjs.map +1 -1
- package/dist/adapters/sandbox/daytona/index.d.cts +4 -4
- package/dist/adapters/sandbox/daytona/index.d.ts +4 -4
- package/dist/adapters/sandbox/daytona/index.js +3 -3
- package/dist/adapters/sandbox/daytona/index.js.map +1 -1
- package/dist/adapters/sandbox/daytona/workflow.d.cts +1 -1
- package/dist/adapters/sandbox/daytona/workflow.d.ts +1 -1
- package/dist/adapters/sandbox/e2b/index.cjs +26 -14
- package/dist/adapters/sandbox/e2b/index.cjs.map +1 -1
- package/dist/adapters/sandbox/e2b/index.d.cts +24 -4
- package/dist/adapters/sandbox/e2b/index.d.ts +24 -4
- package/dist/adapters/sandbox/e2b/index.js +26 -14
- package/dist/adapters/sandbox/e2b/index.js.map +1 -1
- package/dist/adapters/sandbox/e2b/workflow.d.cts +1 -1
- package/dist/adapters/sandbox/e2b/workflow.d.ts +1 -1
- package/dist/adapters/sandbox/inmemory/index.cjs +3 -3
- package/dist/adapters/sandbox/inmemory/index.cjs.map +1 -1
- package/dist/adapters/sandbox/inmemory/index.d.cts +4 -4
- package/dist/adapters/sandbox/inmemory/index.d.ts +4 -4
- package/dist/adapters/sandbox/inmemory/index.js +3 -3
- package/dist/adapters/sandbox/inmemory/index.js.map +1 -1
- package/dist/adapters/sandbox/inmemory/workflow.d.cts +1 -1
- package/dist/adapters/sandbox/inmemory/workflow.d.ts +1 -1
- package/dist/adapters/thread/anthropic/index.cjs +23 -3
- package/dist/adapters/thread/anthropic/index.cjs.map +1 -1
- package/dist/adapters/thread/anthropic/index.d.cts +5 -5
- package/dist/adapters/thread/anthropic/index.d.ts +5 -5
- package/dist/adapters/thread/anthropic/index.js +23 -3
- package/dist/adapters/thread/anthropic/index.js.map +1 -1
- package/dist/adapters/thread/anthropic/workflow.cjs +2 -1
- package/dist/adapters/thread/anthropic/workflow.cjs.map +1 -1
- package/dist/adapters/thread/anthropic/workflow.d.cts +5 -5
- package/dist/adapters/thread/anthropic/workflow.d.ts +5 -5
- package/dist/adapters/thread/anthropic/workflow.js +2 -1
- package/dist/adapters/thread/anthropic/workflow.js.map +1 -1
- package/dist/adapters/thread/google-genai/index.cjs +27 -3
- package/dist/adapters/thread/google-genai/index.cjs.map +1 -1
- package/dist/adapters/thread/google-genai/index.d.cts +5 -5
- package/dist/adapters/thread/google-genai/index.d.ts +5 -5
- package/dist/adapters/thread/google-genai/index.js +27 -3
- package/dist/adapters/thread/google-genai/index.js.map +1 -1
- package/dist/adapters/thread/google-genai/workflow.cjs +2 -1
- package/dist/adapters/thread/google-genai/workflow.cjs.map +1 -1
- package/dist/adapters/thread/google-genai/workflow.d.cts +5 -5
- package/dist/adapters/thread/google-genai/workflow.d.ts +5 -5
- package/dist/adapters/thread/google-genai/workflow.js +2 -1
- package/dist/adapters/thread/google-genai/workflow.js.map +1 -1
- package/dist/adapters/thread/langchain/index.cjs +23 -3
- package/dist/adapters/thread/langchain/index.cjs.map +1 -1
- package/dist/adapters/thread/langchain/index.d.cts +5 -5
- package/dist/adapters/thread/langchain/index.d.ts +5 -5
- package/dist/adapters/thread/langchain/index.js +23 -3
- package/dist/adapters/thread/langchain/index.js.map +1 -1
- package/dist/adapters/thread/langchain/workflow.cjs +2 -1
- package/dist/adapters/thread/langchain/workflow.cjs.map +1 -1
- package/dist/adapters/thread/langchain/workflow.d.cts +5 -5
- package/dist/adapters/thread/langchain/workflow.d.ts +5 -5
- package/dist/adapters/thread/langchain/workflow.js +2 -1
- package/dist/adapters/thread/langchain/workflow.js.map +1 -1
- package/dist/index.cjs +120 -30
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +11 -11
- package/dist/index.d.ts +11 -11
- package/dist/index.js +121 -31
- package/dist/index.js.map +1 -1
- package/dist/{proxy-0smGKvx8.d.ts → proxy-CUlKSvZS.d.ts} +1 -1
- package/dist/{proxy-DEtowJyd.d.cts → proxy-D_3x7RN4.d.cts} +1 -1
- package/dist/{thread-manager-C-C4pI2z.d.ts → thread-manager-CVu7o2cs.d.ts} +4 -2
- package/dist/{thread-manager-D4vgzYrh.d.cts → thread-manager-HSwyh28L.d.cts} +4 -2
- package/dist/{thread-manager-3fszQih4.d.ts → thread-manager-c1gPopAG.d.ts} +4 -2
- package/dist/{thread-manager-CzYln2OC.d.cts → thread-manager-wGi-LqIP.d.cts} +4 -2
- package/dist/{types-B37hKoWA.d.ts → types-BH_IRryz.d.ts} +10 -1
- package/dist/{types-D08CXPh8.d.cts → types-BaOw4hKI.d.cts} +10 -1
- package/dist/{types-CPKDl-y_.d.ts → types-C06FwR96.d.cts} +59 -4
- package/dist/{types-CNuWnvy9.d.ts → types-DAsQ21Rt.d.ts} +1 -1
- package/dist/{types-BO7Yju20.d.cts → types-DNr31FzL.d.ts} +59 -4
- package/dist/{types-DWEUmYAJ.d.cts → types-lm8tMNJQ.d.cts} +1 -1
- package/dist/{types-tQL9njTu.d.cts → types-yx0LzPGn.d.cts} +21 -7
- package/dist/{types-tQL9njTu.d.ts → types-yx0LzPGn.d.ts} +21 -7
- package/dist/{workflow-CjXHbZZc.d.ts → workflow-CSCkpwAL.d.ts} +2 -2
- package/dist/{workflow-Do_lzJpT.d.cts → workflow-DuvMZ8Vm.d.cts} +2 -2
- package/dist/workflow.cjs +94 -18
- package/dist/workflow.cjs.map +1 -1
- package/dist/workflow.d.cts +3 -3
- package/dist/workflow.d.ts +3 -3
- package/dist/workflow.js +95 -19
- package/dist/workflow.js.map +1 -1
- package/package.json +2 -2
- package/src/adapters/sandbox/bedrock/index.ts +12 -3
- package/src/adapters/sandbox/daytona/index.ts +12 -3
- package/src/adapters/sandbox/e2b/index.ts +36 -14
- package/src/adapters/sandbox/e2b/types.ts +16 -0
- package/src/adapters/sandbox/inmemory/index.ts +12 -3
- package/src/adapters/thread/anthropic/activities.ts +9 -0
- package/src/adapters/thread/anthropic/model-invoker.ts +3 -1
- package/src/adapters/thread/anthropic/thread-manager.ts +3 -0
- package/src/adapters/thread/google-genai/activities.ts +13 -0
- package/src/adapters/thread/google-genai/model-invoker.ts +3 -1
- package/src/adapters/thread/google-genai/thread-manager.ts +3 -0
- package/src/adapters/thread/langchain/activities.ts +9 -0
- package/src/adapters/thread/langchain/model-invoker.ts +2 -1
- package/src/adapters/thread/langchain/thread-manager.ts +3 -0
- package/src/lib/lifecycle.ts +11 -4
- package/src/lib/model/types.ts +10 -0
- package/src/lib/sandbox/manager.ts +26 -18
- package/src/lib/sandbox/types.ts +27 -7
- package/src/lib/session/session-edge-cases.integration.test.ts +265 -1
- package/src/lib/session/session.integration.test.ts +22 -1
- package/src/lib/session/session.ts +61 -7
- package/src/lib/session/types.ts +12 -0
- package/src/lib/subagent/subagent.integration.test.ts +100 -104
- package/src/lib/thread/manager.ts +18 -0
- package/src/lib/thread/proxy.ts +1 -0
- package/src/lib/thread/types.ts +9 -0
- package/src/lib/tool-router/index.ts +2 -0
- package/src/lib/tool-router/router-edge-cases.integration.test.ts +92 -0
- package/src/lib/tool-router/router.integration.test.ts +12 -0
- package/src/lib/tool-router/router.ts +89 -16
- package/src/lib/tool-router/types.ts +34 -1
- package/src/workflow.ts +2 -0
|
@@ -29,6 +29,16 @@ vi.mock("@temporalio/workflow", () => {
|
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
+
class MockCancellationScope {
|
|
33
|
+
cancellable: boolean;
|
|
34
|
+
constructor(opts?: { cancellable?: boolean }) {
|
|
35
|
+
this.cancellable = opts?.cancellable ?? true;
|
|
36
|
+
}
|
|
37
|
+
async run<T>(fn: () => Promise<T>): Promise<T> {
|
|
38
|
+
return fn();
|
|
39
|
+
}
|
|
40
|
+
cancel(): void {}
|
|
41
|
+
}
|
|
32
42
|
return {
|
|
33
43
|
proxyActivities: <T>() => ({}) as T,
|
|
34
44
|
condition: async (fn: () => boolean) => fn(),
|
|
@@ -42,6 +52,8 @@ vi.mock("@temporalio/workflow", () => {
|
|
|
42
52
|
uuid4: () =>
|
|
43
53
|
`00000000-0000-0000-0000-${String(++idCounter).padStart(12, "0")}`,
|
|
44
54
|
ApplicationFailure: MockApplicationFailure,
|
|
55
|
+
CancellationScope: MockCancellationScope,
|
|
56
|
+
isCancellation: (_err: unknown) => false,
|
|
45
57
|
log: {
|
|
46
58
|
trace: () => {},
|
|
47
59
|
debug: () => {},
|
|
@@ -61,6 +73,7 @@ type TurnScript = {
|
|
|
61
73
|
message: unknown;
|
|
62
74
|
toolCalls: RawToolCall[];
|
|
63
75
|
usage?: TokenUsage;
|
|
76
|
+
threadLengthAtCall?: number;
|
|
64
77
|
};
|
|
65
78
|
|
|
66
79
|
/**
|
|
@@ -102,6 +115,9 @@ function createMockThreadOps() {
|
|
|
102
115
|
forkThread: async (source, target) => {
|
|
103
116
|
log.push({ op: "forkThread", args: [source, target] });
|
|
104
117
|
},
|
|
118
|
+
truncateThread: async (threadId, length) => {
|
|
119
|
+
log.push({ op: "truncateThread", args: [threadId, length] });
|
|
120
|
+
},
|
|
105
121
|
});
|
|
106
122
|
return { ops, log };
|
|
107
123
|
}
|
|
@@ -113,12 +129,18 @@ function createScriptedRunAgent(
|
|
|
113
129
|
return async () => {
|
|
114
130
|
const turn = turns[call++];
|
|
115
131
|
if (!turn) {
|
|
116
|
-
return {
|
|
132
|
+
return {
|
|
133
|
+
message: "done",
|
|
134
|
+
rawToolCalls: [],
|
|
135
|
+
usage: undefined,
|
|
136
|
+
threadLengthAtCall: 0,
|
|
137
|
+
};
|
|
117
138
|
}
|
|
118
139
|
return {
|
|
119
140
|
message: turn.message,
|
|
120
141
|
rawToolCalls: turn.toolCalls,
|
|
121
142
|
usage: turn.usage,
|
|
143
|
+
threadLengthAtCall: turn.threadLengthAtCall ?? 0,
|
|
122
144
|
};
|
|
123
145
|
};
|
|
124
146
|
}
|
|
@@ -775,6 +797,9 @@ describe("createSession edge cases", () => {
|
|
|
775
797
|
forkThread: async (source, target) => {
|
|
776
798
|
log.push({ op: "forkThread", args: [source, target] });
|
|
777
799
|
},
|
|
800
|
+
truncateThread: async (threadId, length) => {
|
|
801
|
+
log.push({ op: "truncateThread", args: [threadId, length] });
|
|
802
|
+
},
|
|
778
803
|
});
|
|
779
804
|
|
|
780
805
|
const session = await createSession<
|
|
@@ -1659,4 +1684,243 @@ describe("createSession edge cases", () => {
|
|
|
1659
1684
|
expect(sandboxLog).not.toContain("pause:kept-sb");
|
|
1660
1685
|
expect(sandboxLog).not.toContain("destroy:kept-sb");
|
|
1661
1686
|
});
|
|
1687
|
+
|
|
1688
|
+
// --- Rewind flow: tool requests rewind and turn is retried -------------
|
|
1689
|
+
|
|
1690
|
+
it("rewinds the turn when a tool handler returns rewind:true", async () => {
|
|
1691
|
+
const { ops, log } = createMockThreadOps();
|
|
1692
|
+
|
|
1693
|
+
let rewindAttempts = 0;
|
|
1694
|
+
const rewindTool = defineTool({
|
|
1695
|
+
name: "Rewind" as const,
|
|
1696
|
+
description: "rewinds once then succeeds",
|
|
1697
|
+
schema: z.object({}),
|
|
1698
|
+
handler: async () => {
|
|
1699
|
+
rewindAttempts += 1;
|
|
1700
|
+
if (rewindAttempts === 1) {
|
|
1701
|
+
return {
|
|
1702
|
+
toolResponse: "ignored",
|
|
1703
|
+
data: null,
|
|
1704
|
+
rewind: true,
|
|
1705
|
+
};
|
|
1706
|
+
}
|
|
1707
|
+
return { toolResponse: "ok", data: null };
|
|
1708
|
+
},
|
|
1709
|
+
});
|
|
1710
|
+
|
|
1711
|
+
const session = await createSession({
|
|
1712
|
+
agentName: "TestAgent",
|
|
1713
|
+
thread: { mode: "new", threadId: "thread-1" },
|
|
1714
|
+
runAgent: createScriptedRunAgent([
|
|
1715
|
+
{
|
|
1716
|
+
message: "attempt-1",
|
|
1717
|
+
toolCalls: [{ id: "tc-1", name: "Rewind", args: {} }],
|
|
1718
|
+
},
|
|
1719
|
+
{
|
|
1720
|
+
message: "attempt-2",
|
|
1721
|
+
toolCalls: [{ id: "tc-2", name: "Rewind", args: {} }],
|
|
1722
|
+
},
|
|
1723
|
+
{ message: "done", toolCalls: [] },
|
|
1724
|
+
]),
|
|
1725
|
+
threadOps: ops,
|
|
1726
|
+
tools: { Rewind: rewindTool },
|
|
1727
|
+
buildContextMessage: () => "go",
|
|
1728
|
+
});
|
|
1729
|
+
|
|
1730
|
+
const stateManager = createAgentStateManager({
|
|
1731
|
+
initialState: { systemPrompt: "test" },
|
|
1732
|
+
});
|
|
1733
|
+
|
|
1734
|
+
const result = await session.runSession({ stateManager });
|
|
1735
|
+
|
|
1736
|
+
expect(result.exitReason).toBe("completed");
|
|
1737
|
+
expect(result.finalMessage).toBe("done");
|
|
1738
|
+
expect(rewindAttempts).toBe(2);
|
|
1739
|
+
|
|
1740
|
+
const truncateOps = log.filter((l) => l.op === "truncateThread");
|
|
1741
|
+
expect(truncateOps).toHaveLength(1);
|
|
1742
|
+
|
|
1743
|
+
const noRewindToolResult = log.filter((l) => {
|
|
1744
|
+
if (l.op !== "appendToolResult") return false;
|
|
1745
|
+
const config = l.args[1] as ToolResultConfig;
|
|
1746
|
+
return config.toolCallId === "tc-1";
|
|
1747
|
+
});
|
|
1748
|
+
expect(noRewindToolResult).toHaveLength(0);
|
|
1749
|
+
|
|
1750
|
+
const agentAppends = log.filter((l) => l.op === "appendAgentMessage");
|
|
1751
|
+
expect(agentAppends).toHaveLength(3);
|
|
1752
|
+
});
|
|
1753
|
+
|
|
1754
|
+
it("truncates the thread back to the pre-assistant state so sibling tool results are dropped on rewind", async () => {
|
|
1755
|
+
const { ops, log } = createMockThreadOps();
|
|
1756
|
+
|
|
1757
|
+
let rewindFired = false;
|
|
1758
|
+
|
|
1759
|
+
const siblingTool = defineTool({
|
|
1760
|
+
name: "Sibling" as const,
|
|
1761
|
+
description: "sibling",
|
|
1762
|
+
schema: z.object({}),
|
|
1763
|
+
handler: async () => ({ toolResponse: "sibling-ok", data: null }),
|
|
1764
|
+
});
|
|
1765
|
+
|
|
1766
|
+
const rewindTool = defineTool({
|
|
1767
|
+
name: "Rewind" as const,
|
|
1768
|
+
description: "rewinds",
|
|
1769
|
+
schema: z.object({}),
|
|
1770
|
+
handler: async () => {
|
|
1771
|
+
if (!rewindFired) {
|
|
1772
|
+
rewindFired = true;
|
|
1773
|
+
return { toolResponse: "ignored", data: null, rewind: true };
|
|
1774
|
+
}
|
|
1775
|
+
return { toolResponse: "ok", data: null };
|
|
1776
|
+
},
|
|
1777
|
+
});
|
|
1778
|
+
|
|
1779
|
+
const session = await createSession({
|
|
1780
|
+
agentName: "TestAgent",
|
|
1781
|
+
thread: { mode: "new", threadId: "thread-1" },
|
|
1782
|
+
runAgent: createScriptedRunAgent([
|
|
1783
|
+
{
|
|
1784
|
+
message: "parallel",
|
|
1785
|
+
toolCalls: [
|
|
1786
|
+
{ id: "tc-sibling", name: "Sibling", args: {} },
|
|
1787
|
+
{ id: "tc-rewind", name: "Rewind", args: {} },
|
|
1788
|
+
],
|
|
1789
|
+
// Invoker reports 2 stored messages (system + human) at the
|
|
1790
|
+
// moment the LLM was called.
|
|
1791
|
+
threadLengthAtCall: 2,
|
|
1792
|
+
},
|
|
1793
|
+
{ message: "done", toolCalls: [], threadLengthAtCall: 2 },
|
|
1794
|
+
]),
|
|
1795
|
+
threadOps: ops,
|
|
1796
|
+
tools: { Rewind: rewindTool, Sibling: siblingTool },
|
|
1797
|
+
buildContextMessage: () => "go",
|
|
1798
|
+
});
|
|
1799
|
+
|
|
1800
|
+
const stateManager = createAgentStateManager({
|
|
1801
|
+
initialState: { systemPrompt: "test" },
|
|
1802
|
+
});
|
|
1803
|
+
|
|
1804
|
+
const result = await session.runSession({ stateManager });
|
|
1805
|
+
|
|
1806
|
+
expect(result.exitReason).toBe("completed");
|
|
1807
|
+
|
|
1808
|
+
// Exactly one truncate fired — back to the pre-assistant-message
|
|
1809
|
+
// length that runAgent reported.
|
|
1810
|
+
const truncateOps = log.filter((l) => l.op === "truncateThread");
|
|
1811
|
+
expect(truncateOps).toHaveLength(1);
|
|
1812
|
+
const truncateOp = truncateOps[0];
|
|
1813
|
+
if (!truncateOp) throw new Error("expected truncate op");
|
|
1814
|
+
expect(truncateOp.args[1]).toBe(2);
|
|
1815
|
+
|
|
1816
|
+
// Rewinding tool never appends its own result.
|
|
1817
|
+
const rewindResultAppends = log.filter((l) => {
|
|
1818
|
+
if (l.op !== "appendToolResult") return false;
|
|
1819
|
+
const config = l.args[1] as ToolResultConfig;
|
|
1820
|
+
return config.toolCallId === "tc-rewind";
|
|
1821
|
+
});
|
|
1822
|
+
expect(rewindResultAppends).toHaveLength(0);
|
|
1823
|
+
|
|
1824
|
+
// Two assistant messages expected: one from the rewound turn, one from
|
|
1825
|
+
// the successful retry.
|
|
1826
|
+
const agentAppends = log.filter((l) => l.op === "appendAgentMessage");
|
|
1827
|
+
expect(agentAppends).toHaveLength(2);
|
|
1828
|
+
});
|
|
1829
|
+
|
|
1830
|
+
it("does not rewind when the rewinding tool is no longer present after retry", async () => {
|
|
1831
|
+
const { ops, log } = createMockThreadOps();
|
|
1832
|
+
|
|
1833
|
+
let attempts = 0;
|
|
1834
|
+
const rewindOnce = defineTool({
|
|
1835
|
+
name: "RewindOnce" as const,
|
|
1836
|
+
description: "rewinds once",
|
|
1837
|
+
schema: z.object({}),
|
|
1838
|
+
handler: async () => {
|
|
1839
|
+
attempts += 1;
|
|
1840
|
+
if (attempts === 1) {
|
|
1841
|
+
return { toolResponse: "ignored", data: null, rewind: true };
|
|
1842
|
+
}
|
|
1843
|
+
return { toolResponse: "ok", data: null };
|
|
1844
|
+
},
|
|
1845
|
+
});
|
|
1846
|
+
|
|
1847
|
+
const session = await createSession({
|
|
1848
|
+
agentName: "TestAgent",
|
|
1849
|
+
thread: { mode: "new", threadId: "thread-1" },
|
|
1850
|
+
maxTurns: 5,
|
|
1851
|
+
runAgent: createScriptedRunAgent([
|
|
1852
|
+
{
|
|
1853
|
+
message: "call-1",
|
|
1854
|
+
toolCalls: [{ id: "tc-1", name: "RewindOnce", args: {} }],
|
|
1855
|
+
},
|
|
1856
|
+
{
|
|
1857
|
+
message: "call-2",
|
|
1858
|
+
toolCalls: [{ id: "tc-2", name: "RewindOnce", args: {} }],
|
|
1859
|
+
},
|
|
1860
|
+
{ message: "done", toolCalls: [] },
|
|
1861
|
+
]),
|
|
1862
|
+
threadOps: ops,
|
|
1863
|
+
tools: { RewindOnce: rewindOnce },
|
|
1864
|
+
buildContextMessage: () => "go",
|
|
1865
|
+
});
|
|
1866
|
+
|
|
1867
|
+
const stateManager = createAgentStateManager({
|
|
1868
|
+
initialState: { systemPrompt: "test" },
|
|
1869
|
+
});
|
|
1870
|
+
|
|
1871
|
+
const result = await session.runSession({ stateManager });
|
|
1872
|
+
|
|
1873
|
+
expect(result.exitReason).toBe("completed");
|
|
1874
|
+
expect(result.finalMessage).toBe("done");
|
|
1875
|
+
// Each rewind still consumes a turn from the `maxTurns` budget:
|
|
1876
|
+
// turn 1 (rewound) + turn 2 (successful tool call) + turn 3 (done) = 3.
|
|
1877
|
+
expect(result.usage.turns).toBe(3);
|
|
1878
|
+
expect(attempts).toBe(2);
|
|
1879
|
+
|
|
1880
|
+
const truncateOps = log.filter((l) => l.op === "truncateThread");
|
|
1881
|
+
expect(truncateOps).toHaveLength(1);
|
|
1882
|
+
});
|
|
1883
|
+
|
|
1884
|
+
it("bails out with max_turns when a tool keeps requesting rewind", async () => {
|
|
1885
|
+
const { ops, log } = createMockThreadOps();
|
|
1886
|
+
|
|
1887
|
+
let attempts = 0;
|
|
1888
|
+
const alwaysRewind = defineTool({
|
|
1889
|
+
name: "AlwaysRewind" as const,
|
|
1890
|
+
description: "always rewinds",
|
|
1891
|
+
schema: z.object({}),
|
|
1892
|
+
handler: async () => {
|
|
1893
|
+
attempts += 1;
|
|
1894
|
+
return { toolResponse: "ignored", data: null, rewind: true };
|
|
1895
|
+
},
|
|
1896
|
+
});
|
|
1897
|
+
|
|
1898
|
+
const session = await createSession({
|
|
1899
|
+
agentName: "TestAgent",
|
|
1900
|
+
thread: { mode: "new", threadId: "thread-1" },
|
|
1901
|
+
maxTurns: 3,
|
|
1902
|
+
runAgent: createScriptedRunAgent([
|
|
1903
|
+
{ message: "t1", toolCalls: [{ id: "tc-1", name: "AlwaysRewind", args: {} }] },
|
|
1904
|
+
{ message: "t2", toolCalls: [{ id: "tc-2", name: "AlwaysRewind", args: {} }] },
|
|
1905
|
+
{ message: "t3", toolCalls: [{ id: "tc-3", name: "AlwaysRewind", args: {} }] },
|
|
1906
|
+
{ message: "t4", toolCalls: [{ id: "tc-4", name: "AlwaysRewind", args: {} }] },
|
|
1907
|
+
]),
|
|
1908
|
+
threadOps: ops,
|
|
1909
|
+
tools: { AlwaysRewind: alwaysRewind },
|
|
1910
|
+
buildContextMessage: () => "go",
|
|
1911
|
+
});
|
|
1912
|
+
|
|
1913
|
+
const stateManager = createAgentStateManager({
|
|
1914
|
+
initialState: { systemPrompt: "test" },
|
|
1915
|
+
});
|
|
1916
|
+
|
|
1917
|
+
const result = await session.runSession({ stateManager });
|
|
1918
|
+
|
|
1919
|
+
expect(result.exitReason).toBe("max_turns");
|
|
1920
|
+
expect(result.usage.turns).toBe(3);
|
|
1921
|
+
expect(attempts).toBe(3);
|
|
1922
|
+
|
|
1923
|
+
const truncateOps = log.filter((l) => l.op === "truncateThread");
|
|
1924
|
+
expect(truncateOps).toHaveLength(3);
|
|
1925
|
+
});
|
|
1662
1926
|
});
|
|
@@ -33,6 +33,16 @@ vi.mock("@temporalio/workflow", () => {
|
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
+
class MockCancellationScope {
|
|
37
|
+
cancellable: boolean;
|
|
38
|
+
constructor(opts?: { cancellable?: boolean }) {
|
|
39
|
+
this.cancellable = opts?.cancellable ?? true;
|
|
40
|
+
}
|
|
41
|
+
async run<T>(fn: () => Promise<T>): Promise<T> {
|
|
42
|
+
return fn();
|
|
43
|
+
}
|
|
44
|
+
cancel(): void {}
|
|
45
|
+
}
|
|
36
46
|
return {
|
|
37
47
|
proxyActivities: <T>() => ({}) as T,
|
|
38
48
|
condition: async (fn: () => boolean) => fn(),
|
|
@@ -46,6 +56,8 @@ vi.mock("@temporalio/workflow", () => {
|
|
|
46
56
|
uuid4: () =>
|
|
47
57
|
`00000000-0000-0000-0000-${String(++idCounter).padStart(12, "0")}`,
|
|
48
58
|
ApplicationFailure: MockApplicationFailure,
|
|
59
|
+
CancellationScope: MockCancellationScope,
|
|
60
|
+
isCancellation: (_err: unknown) => false,
|
|
49
61
|
log: {
|
|
50
62
|
trace: () => {},
|
|
51
63
|
debug: () => {},
|
|
@@ -105,6 +117,9 @@ function createMockThreadOps() {
|
|
|
105
117
|
forkThread: async (source, target) => {
|
|
106
118
|
log.push({ op: "forkThread", args: [source, target] });
|
|
107
119
|
},
|
|
120
|
+
truncateThread: async (threadId, length) => {
|
|
121
|
+
log.push({ op: "truncateThread", args: [threadId, length] });
|
|
122
|
+
},
|
|
108
123
|
});
|
|
109
124
|
|
|
110
125
|
return { ops, log };
|
|
@@ -123,12 +138,18 @@ function createScriptedRunAgent(
|
|
|
123
138
|
return async () => {
|
|
124
139
|
const turn = turns[call++];
|
|
125
140
|
if (!turn) {
|
|
126
|
-
return {
|
|
141
|
+
return {
|
|
142
|
+
message: "done",
|
|
143
|
+
rawToolCalls: [],
|
|
144
|
+
usage: undefined,
|
|
145
|
+
threadLengthAtCall: 0,
|
|
146
|
+
};
|
|
127
147
|
}
|
|
128
148
|
return {
|
|
129
149
|
message: turn.message,
|
|
130
150
|
rawToolCalls: turn.toolCalls,
|
|
131
151
|
usage: turn.usage,
|
|
152
|
+
threadLengthAtCall: 0,
|
|
132
153
|
};
|
|
133
154
|
};
|
|
134
155
|
}
|
|
@@ -7,7 +7,11 @@ import {
|
|
|
7
7
|
} from "@temporalio/workflow";
|
|
8
8
|
import type { SessionExitReason } from "../types";
|
|
9
9
|
import type { SessionConfig, ZeitlichSession } from "./types";
|
|
10
|
-
import type {
|
|
10
|
+
import type {
|
|
11
|
+
SandboxCreateOptions,
|
|
12
|
+
SandboxOps,
|
|
13
|
+
SandboxSnapshot,
|
|
14
|
+
} from "../sandbox/types";
|
|
11
15
|
import type {
|
|
12
16
|
AgentState,
|
|
13
17
|
AgentStateManager,
|
|
@@ -142,6 +146,7 @@ export async function createSession<
|
|
|
142
146
|
appendSystemMessage,
|
|
143
147
|
appendAgentMessage,
|
|
144
148
|
forkThread,
|
|
149
|
+
truncateThread,
|
|
145
150
|
} = threadOps;
|
|
146
151
|
|
|
147
152
|
const plugins: ToolMap[string][] = [];
|
|
@@ -253,8 +258,14 @@ export async function createSession<
|
|
|
253
258
|
nonRetryable: true,
|
|
254
259
|
});
|
|
255
260
|
}
|
|
261
|
+
const forkInit = sandboxInit as {
|
|
262
|
+
mode: "fork";
|
|
263
|
+
sandboxId: string;
|
|
264
|
+
options?: SandboxCreateOptions;
|
|
265
|
+
};
|
|
256
266
|
sandboxId = await sandboxOps.forkSandbox(
|
|
257
|
-
|
|
267
|
+
forkInit.sandboxId,
|
|
268
|
+
forkInit.options
|
|
258
269
|
);
|
|
259
270
|
sandboxOwned = true;
|
|
260
271
|
} else if (sandboxMode === "from-snapshot") {
|
|
@@ -264,10 +275,15 @@ export async function createSession<
|
|
|
264
275
|
nonRetryable: true,
|
|
265
276
|
});
|
|
266
277
|
}
|
|
267
|
-
const
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
278
|
+
const restoreInit = sandboxInit as {
|
|
279
|
+
mode: "from-snapshot";
|
|
280
|
+
snapshot: SandboxSnapshot;
|
|
281
|
+
options?: SandboxCreateOptions;
|
|
282
|
+
};
|
|
283
|
+
sandboxId = await sandboxOps.restoreSandbox(
|
|
284
|
+
restoreInit.snapshot,
|
|
285
|
+
restoreInit.options
|
|
286
|
+
);
|
|
271
287
|
sandboxOwned = true;
|
|
272
288
|
} else if (sandboxOps) {
|
|
273
289
|
const skillFiles = skills ? collectSkillFiles(skills) : undefined;
|
|
@@ -393,13 +409,24 @@ export async function createSession<
|
|
|
393
409
|
|
|
394
410
|
stateManager.setTools(toolRouter.getToolDefinitions());
|
|
395
411
|
|
|
396
|
-
const {
|
|
412
|
+
const {
|
|
413
|
+
message,
|
|
414
|
+
rawToolCalls,
|
|
415
|
+
usage,
|
|
416
|
+
threadLengthAtCall,
|
|
417
|
+
} = await runAgent({
|
|
397
418
|
threadId,
|
|
398
419
|
threadKey,
|
|
399
420
|
agentName,
|
|
400
421
|
metadata,
|
|
401
422
|
});
|
|
402
423
|
|
|
424
|
+
// The invoker loaded the thread right before calling the LLM,
|
|
425
|
+
// so it already knows how many messages were stored at that
|
|
426
|
+
// point — we use that directly as the rewind snapshot instead
|
|
427
|
+
// of a separate activity round-trip.
|
|
428
|
+
const preAssistantLength = threadLengthAtCall;
|
|
429
|
+
|
|
403
430
|
await appendAgentMessage(threadId, uuid4(), message, threadKey);
|
|
404
431
|
|
|
405
432
|
if (usage) {
|
|
@@ -452,6 +479,33 @@ export async function createSession<
|
|
|
452
479
|
}
|
|
453
480
|
}
|
|
454
481
|
|
|
482
|
+
const rewind = toolCallResults.rewind;
|
|
483
|
+
if (rewind) {
|
|
484
|
+
log.info("rewinding turn", {
|
|
485
|
+
agentName,
|
|
486
|
+
threadId,
|
|
487
|
+
turn: currentTurn,
|
|
488
|
+
toolCallId: rewind.toolCallId,
|
|
489
|
+
toolName: rewind.toolName,
|
|
490
|
+
});
|
|
491
|
+
if (preAssistantLength === undefined) {
|
|
492
|
+
throw ApplicationFailure.create({
|
|
493
|
+
message:
|
|
494
|
+
"Rewind requested but runAgent did not report " +
|
|
495
|
+
"`threadLengthAtCall`; the adapter must populate it to " +
|
|
496
|
+
"support rewinds.",
|
|
497
|
+
nonRetryable: true,
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
// Drop the assistant message + any already-saved tool results
|
|
501
|
+
// so the LLM call can be retried from the pre-assistant state.
|
|
502
|
+
// The turn counter is intentionally NOT rolled back — each
|
|
503
|
+
// rewind still consumes one of the `maxTurns` budget so a
|
|
504
|
+
// misbehaving tool cannot spin the session forever.
|
|
505
|
+
await truncateThread(threadId, preAssistantLength, threadKey);
|
|
506
|
+
continue;
|
|
507
|
+
}
|
|
508
|
+
|
|
455
509
|
if (stateManager.getStatus() === "WAITING_FOR_INPUT") {
|
|
456
510
|
const conditionMet = await condition(
|
|
457
511
|
() => stateManager.getStatus() === "RUNNING",
|
package/src/lib/session/types.ts
CHANGED
|
@@ -59,6 +59,18 @@ export interface ThreadOps<TContent = string> {
|
|
|
59
59
|
targetThreadId: string,
|
|
60
60
|
threadKey?: string
|
|
61
61
|
): Promise<void>;
|
|
62
|
+
/**
|
|
63
|
+
* Truncate the thread back to `length` messages. Used by the session's
|
|
64
|
+
* rewind flow to roll the thread back before retrying a turn. The
|
|
65
|
+
* session obtains `length` from `AgentResponse.threadLengthAtCall`,
|
|
66
|
+
* which the model invoker computes for free from the messages it
|
|
67
|
+
* loaded before invoking the LLM.
|
|
68
|
+
*/
|
|
69
|
+
truncateThread(
|
|
70
|
+
threadId: string,
|
|
71
|
+
length: number,
|
|
72
|
+
threadKey?: string
|
|
73
|
+
): Promise<void>;
|
|
62
74
|
}
|
|
63
75
|
|
|
64
76
|
/**
|