zeitlich 0.2.37 → 0.2.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/README.md +18 -0
  2. package/dist/{activities-Bb-nAjwQ.d.ts → activities-Bmu7XnaG.d.ts} +4 -4
  3. package/dist/{activities-vkI4_3CC.d.cts → activities-ByBFLvm2.d.cts} +4 -4
  4. package/dist/adapter-id-BB-mmrts.d.cts +17 -0
  5. package/dist/adapter-id-BB-mmrts.d.ts +17 -0
  6. package/dist/adapter-id-CMwVrVqv.d.cts +17 -0
  7. package/dist/adapter-id-CMwVrVqv.d.ts +17 -0
  8. package/dist/adapter-id-CbY2zeSt.d.cts +17 -0
  9. package/dist/adapter-id-CbY2zeSt.d.ts +17 -0
  10. package/dist/adapters/sandbox/bedrock/index.cjs +3 -3
  11. package/dist/adapters/sandbox/bedrock/index.cjs.map +1 -1
  12. package/dist/adapters/sandbox/bedrock/index.d.cts +6 -6
  13. package/dist/adapters/sandbox/bedrock/index.d.ts +6 -6
  14. package/dist/adapters/sandbox/bedrock/index.js +3 -3
  15. package/dist/adapters/sandbox/bedrock/index.js.map +1 -1
  16. package/dist/adapters/sandbox/bedrock/workflow.d.cts +2 -2
  17. package/dist/adapters/sandbox/bedrock/workflow.d.ts +2 -2
  18. package/dist/adapters/sandbox/daytona/index.cjs +3 -3
  19. package/dist/adapters/sandbox/daytona/index.cjs.map +1 -1
  20. package/dist/adapters/sandbox/daytona/index.d.cts +4 -4
  21. package/dist/adapters/sandbox/daytona/index.d.ts +4 -4
  22. package/dist/adapters/sandbox/daytona/index.js +3 -3
  23. package/dist/adapters/sandbox/daytona/index.js.map +1 -1
  24. package/dist/adapters/sandbox/daytona/workflow.d.cts +1 -1
  25. package/dist/adapters/sandbox/daytona/workflow.d.ts +1 -1
  26. package/dist/adapters/sandbox/e2b/index.cjs +26 -14
  27. package/dist/adapters/sandbox/e2b/index.cjs.map +1 -1
  28. package/dist/adapters/sandbox/e2b/index.d.cts +24 -4
  29. package/dist/adapters/sandbox/e2b/index.d.ts +24 -4
  30. package/dist/adapters/sandbox/e2b/index.js +26 -14
  31. package/dist/adapters/sandbox/e2b/index.js.map +1 -1
  32. package/dist/adapters/sandbox/e2b/workflow.d.cts +1 -1
  33. package/dist/adapters/sandbox/e2b/workflow.d.ts +1 -1
  34. package/dist/adapters/sandbox/inmemory/index.cjs +3 -3
  35. package/dist/adapters/sandbox/inmemory/index.cjs.map +1 -1
  36. package/dist/adapters/sandbox/inmemory/index.d.cts +4 -4
  37. package/dist/adapters/sandbox/inmemory/index.d.ts +4 -4
  38. package/dist/adapters/sandbox/inmemory/index.js +3 -3
  39. package/dist/adapters/sandbox/inmemory/index.js.map +1 -1
  40. package/dist/adapters/sandbox/inmemory/workflow.d.cts +1 -1
  41. package/dist/adapters/sandbox/inmemory/workflow.d.ts +1 -1
  42. package/dist/adapters/thread/anthropic/index.cjs +150 -13
  43. package/dist/adapters/thread/anthropic/index.cjs.map +1 -1
  44. package/dist/adapters/thread/anthropic/index.d.cts +9 -8
  45. package/dist/adapters/thread/anthropic/index.d.ts +9 -8
  46. package/dist/adapters/thread/anthropic/index.js +150 -14
  47. package/dist/adapters/thread/anthropic/index.js.map +1 -1
  48. package/dist/adapters/thread/anthropic/workflow.cjs +9 -3
  49. package/dist/adapters/thread/anthropic/workflow.cjs.map +1 -1
  50. package/dist/adapters/thread/anthropic/workflow.d.cts +6 -5
  51. package/dist/adapters/thread/anthropic/workflow.d.ts +6 -5
  52. package/dist/adapters/thread/anthropic/workflow.js +9 -4
  53. package/dist/adapters/thread/anthropic/workflow.js.map +1 -1
  54. package/dist/adapters/thread/google-genai/index.cjs +154 -13
  55. package/dist/adapters/thread/google-genai/index.cjs.map +1 -1
  56. package/dist/adapters/thread/google-genai/index.d.cts +6 -5
  57. package/dist/adapters/thread/google-genai/index.d.ts +6 -5
  58. package/dist/adapters/thread/google-genai/index.js +154 -14
  59. package/dist/adapters/thread/google-genai/index.js.map +1 -1
  60. package/dist/adapters/thread/google-genai/workflow.cjs +9 -3
  61. package/dist/adapters/thread/google-genai/workflow.cjs.map +1 -1
  62. package/dist/adapters/thread/google-genai/workflow.d.cts +6 -5
  63. package/dist/adapters/thread/google-genai/workflow.d.ts +6 -5
  64. package/dist/adapters/thread/google-genai/workflow.js +9 -4
  65. package/dist/adapters/thread/google-genai/workflow.js.map +1 -1
  66. package/dist/adapters/thread/index.cjs +16 -0
  67. package/dist/adapters/thread/index.cjs.map +1 -0
  68. package/dist/adapters/thread/index.d.cts +34 -0
  69. package/dist/adapters/thread/index.d.ts +34 -0
  70. package/dist/adapters/thread/index.js +12 -0
  71. package/dist/adapters/thread/index.js.map +1 -0
  72. package/dist/adapters/thread/langchain/index.cjs +149 -14
  73. package/dist/adapters/thread/langchain/index.cjs.map +1 -1
  74. package/dist/adapters/thread/langchain/index.d.cts +9 -8
  75. package/dist/adapters/thread/langchain/index.d.ts +9 -8
  76. package/dist/adapters/thread/langchain/index.js +149 -15
  77. package/dist/adapters/thread/langchain/index.js.map +1 -1
  78. package/dist/adapters/thread/langchain/workflow.cjs +9 -3
  79. package/dist/adapters/thread/langchain/workflow.cjs.map +1 -1
  80. package/dist/adapters/thread/langchain/workflow.d.cts +6 -5
  81. package/dist/adapters/thread/langchain/workflow.d.ts +6 -5
  82. package/dist/adapters/thread/langchain/workflow.js +9 -4
  83. package/dist/adapters/thread/langchain/workflow.js.map +1 -1
  84. package/dist/index.cjs +367 -59
  85. package/dist/index.cjs.map +1 -1
  86. package/dist/index.d.cts +11 -11
  87. package/dist/index.d.ts +11 -11
  88. package/dist/index.js +365 -61
  89. package/dist/index.js.map +1 -1
  90. package/dist/{proxy-DEtowJyd.d.cts → proxy-BAKzNGRq.d.cts} +1 -1
  91. package/dist/{proxy-0smGKvx8.d.ts → proxy-DO_MXbY4.d.ts} +1 -1
  92. package/dist/{thread-manager-C-C4pI2z.d.ts → thread-manager-CcRXasqs.d.ts} +2 -2
  93. package/dist/{thread-manager-D4vgzYrh.d.cts → thread-manager-ClwSaUnj.d.cts} +2 -2
  94. package/dist/{thread-manager-3fszQih4.d.ts → thread-manager-D-7lp1JK.d.ts} +2 -2
  95. package/dist/{thread-manager-CzYln2OC.d.cts → thread-manager-Y8Ucf0Tf.d.cts} +2 -2
  96. package/dist/{types-CPKDl-y_.d.ts → types-Bcbiq8iv.d.cts} +195 -22
  97. package/dist/{types-CNuWnvy9.d.ts → types-DAsQ21Rt.d.ts} +1 -1
  98. package/dist/{types-B37hKoWA.d.ts → types-DpHTX-iO.d.ts} +58 -1
  99. package/dist/{types-BO7Yju20.d.cts → types-Dt8-HBBT.d.ts} +195 -22
  100. package/dist/{types-D08CXPh8.d.cts → types-hFFi-Zd9.d.cts} +58 -1
  101. package/dist/{types-DWEUmYAJ.d.cts → types-lm8tMNJQ.d.cts} +1 -1
  102. package/dist/{types-tQL9njTu.d.cts → types-yx0LzPGn.d.cts} +21 -7
  103. package/dist/{types-tQL9njTu.d.ts → types-yx0LzPGn.d.ts} +21 -7
  104. package/dist/{workflow-CjXHbZZc.d.ts → workflow-Bmf9EtDW.d.ts} +83 -3
  105. package/dist/{workflow-Do_lzJpT.d.cts → workflow-Bx9utBwb.d.cts} +83 -3
  106. package/dist/workflow.cjs +266 -39
  107. package/dist/workflow.cjs.map +1 -1
  108. package/dist/workflow.d.cts +3 -3
  109. package/dist/workflow.d.ts +3 -3
  110. package/dist/workflow.js +264 -41
  111. package/dist/workflow.js.map +1 -1
  112. package/package.json +12 -2
  113. package/src/adapters/sandbox/bedrock/index.ts +12 -3
  114. package/src/adapters/sandbox/daytona/index.ts +12 -3
  115. package/src/adapters/sandbox/e2b/index.ts +36 -14
  116. package/src/adapters/sandbox/e2b/types.ts +16 -0
  117. package/src/adapters/sandbox/inmemory/index.ts +12 -3
  118. package/src/adapters/thread/adapter-id.test.ts +42 -0
  119. package/src/adapters/thread/anthropic/activities.ts +40 -5
  120. package/src/adapters/thread/anthropic/adapter-id.ts +16 -0
  121. package/src/adapters/thread/anthropic/fork-transform.test.ts +291 -0
  122. package/src/adapters/thread/anthropic/index.ts +3 -0
  123. package/src/adapters/thread/anthropic/model-invoker.ts +7 -1
  124. package/src/adapters/thread/anthropic/proxy.ts +3 -2
  125. package/src/adapters/thread/anthropic/thread-manager.ts +27 -1
  126. package/src/adapters/thread/google-genai/activities.ts +44 -5
  127. package/src/adapters/thread/google-genai/adapter-id.ts +16 -0
  128. package/src/adapters/thread/google-genai/fork-transform.test.ts +149 -0
  129. package/src/adapters/thread/google-genai/index.ts +3 -0
  130. package/src/adapters/thread/google-genai/model-invoker.ts +8 -2
  131. package/src/adapters/thread/google-genai/proxy.ts +3 -2
  132. package/src/adapters/thread/google-genai/thread-manager.ts +27 -1
  133. package/src/adapters/thread/index.ts +39 -0
  134. package/src/adapters/thread/langchain/activities.ts +40 -5
  135. package/src/adapters/thread/langchain/adapter-id.ts +16 -0
  136. package/src/adapters/thread/langchain/fork-transform.test.ts +142 -0
  137. package/src/adapters/thread/langchain/index.ts +3 -0
  138. package/src/adapters/thread/langchain/model-invoker.ts +7 -1
  139. package/src/adapters/thread/langchain/proxy.ts +3 -2
  140. package/src/adapters/thread/langchain/thread-manager.ts +27 -1
  141. package/src/lib/lifecycle.ts +14 -5
  142. package/src/lib/model/types.ts +7 -0
  143. package/src/lib/sandbox/manager.ts +26 -18
  144. package/src/lib/sandbox/types.ts +27 -7
  145. package/src/lib/session/session-edge-cases.integration.test.ts +336 -4
  146. package/src/lib/session/session.integration.test.ts +192 -2
  147. package/src/lib/session/session.ts +102 -8
  148. package/src/lib/session/types.ts +66 -3
  149. package/src/lib/state/index.ts +1 -0
  150. package/src/lib/state/manager.integration.test.ts +109 -0
  151. package/src/lib/state/manager.ts +38 -8
  152. package/src/lib/state/types.ts +25 -0
  153. package/src/lib/subagent/handler.ts +124 -11
  154. package/src/lib/subagent/index.ts +5 -1
  155. package/src/lib/subagent/subagent.integration.test.ts +628 -104
  156. package/src/lib/subagent/types.ts +63 -14
  157. package/src/lib/subagent/workflow.ts +29 -2
  158. package/src/lib/thread/index.ts +5 -0
  159. package/src/lib/thread/keys.test.ts +101 -0
  160. package/src/lib/thread/keys.ts +94 -0
  161. package/src/lib/thread/manager.test.ts +139 -0
  162. package/src/lib/thread/manager.ts +105 -9
  163. package/src/lib/thread/proxy.ts +3 -0
  164. package/src/lib/thread/types.ts +64 -1
  165. package/src/lib/tool-router/index.ts +2 -0
  166. package/src/lib/tool-router/router-edge-cases.integration.test.ts +92 -0
  167. package/src/lib/tool-router/router.integration.test.ts +12 -0
  168. package/src/lib/tool-router/router.ts +89 -16
  169. package/src/lib/tool-router/types.ts +42 -1
  170. package/src/lib/types.ts +12 -0
  171. package/src/workflow.ts +14 -1
  172. package/tsup.config.ts +1 -0
@@ -2,6 +2,7 @@ import { describe, expect, it, vi, beforeEach } from "vitest";
2
2
  import { z } from "zod";
3
3
  import type { ToolResultConfig, TokenUsage } from "../types";
4
4
  import type { ThreadOps } from "./types";
5
+ import type { PersistedThreadState } from "../state/types";
5
6
  import type { RunAgentActivity } from "../model/types";
6
7
  import type { RawToolCall } from "../tool-router/types";
7
8
  import type { SandboxOps } from "../sandbox/types";
@@ -29,6 +30,16 @@ vi.mock("@temporalio/workflow", () => {
29
30
  }
30
31
  }
31
32
 
33
+ class MockCancellationScope {
34
+ cancellable: boolean;
35
+ constructor(opts?: { cancellable?: boolean }) {
36
+ this.cancellable = opts?.cancellable ?? true;
37
+ }
38
+ async run<T>(fn: () => Promise<T>): Promise<T> {
39
+ return fn();
40
+ }
41
+ cancel(): void {}
42
+ }
32
43
  return {
33
44
  proxyActivities: <T>() => ({}) as T,
34
45
  condition: async (fn: () => boolean) => fn(),
@@ -42,6 +53,8 @@ vi.mock("@temporalio/workflow", () => {
42
53
  uuid4: () =>
43
54
  `00000000-0000-0000-0000-${String(++idCounter).padStart(12, "0")}`,
44
55
  ApplicationFailure: MockApplicationFailure,
56
+ CancellationScope: MockCancellationScope,
57
+ isCancellation: (_err: unknown) => false,
45
58
  log: {
46
59
  trace: () => {},
47
60
  debug: () => {},
@@ -83,6 +96,7 @@ function toActivityInterface<TContent = string>(
83
96
 
84
97
  function createMockThreadOps() {
85
98
  const log: { op: string; args: unknown[] }[] = [];
99
+ const stateStore = new Map<string, PersistedThreadState>();
86
100
  const ops = toActivityInterface({
87
101
  initializeThread: async (threadId) => {
88
102
  log.push({ op: "initializeThread", args: [threadId] });
@@ -101,19 +115,38 @@ function createMockThreadOps() {
101
115
  },
102
116
  forkThread: async (source, target) => {
103
117
  log.push({ op: "forkThread", args: [source, target] });
118
+ const src = stateStore.get(source);
119
+ if (src) stateStore.set(target, src);
120
+ },
121
+ truncateThread: async (threadId, messageId) => {
122
+ log.push({ op: "truncateThread", args: [threadId, messageId] });
123
+ },
124
+ loadThreadState: async (threadId) => {
125
+ log.push({ op: "loadThreadState", args: [threadId] });
126
+ return stateStore.get(threadId) ?? null;
127
+ },
128
+ saveThreadState: async (threadId, state) => {
129
+ log.push({ op: "saveThreadState", args: [threadId, state] });
130
+ stateStore.set(threadId, state);
104
131
  },
105
132
  });
106
- return { ops, log };
133
+ return { ops, log, stateStore };
107
134
  }
108
135
 
109
136
  function createScriptedRunAgent(
110
- turns: TurnScript[]
137
+ turns: TurnScript[],
138
+ assistantIdLog?: string[]
111
139
  ): RunAgentActivity<unknown> {
112
140
  let call = 0;
113
- return async () => {
141
+ return async (config) => {
142
+ assistantIdLog?.push(config.assistantMessageId);
114
143
  const turn = turns[call++];
115
144
  if (!turn) {
116
- return { message: "done", rawToolCalls: [], usage: undefined };
145
+ return {
146
+ message: "done",
147
+ rawToolCalls: [],
148
+ usage: undefined,
149
+ };
117
150
  }
118
151
  return {
119
152
  message: turn.message,
@@ -775,6 +808,16 @@ describe("createSession edge cases", () => {
775
808
  forkThread: async (source, target) => {
776
809
  log.push({ op: "forkThread", args: [source, target] });
777
810
  },
811
+ truncateThread: async (threadId, messageId) => {
812
+ log.push({ op: "truncateThread", args: [threadId, messageId] });
813
+ },
814
+ loadThreadState: async (threadId) => {
815
+ log.push({ op: "loadThreadState", args: [threadId] });
816
+ return null;
817
+ },
818
+ saveThreadState: async (threadId, state) => {
819
+ log.push({ op: "saveThreadState", args: [threadId, state] });
820
+ },
778
821
  });
779
822
 
780
823
  const session = await createSession<
@@ -1659,4 +1702,293 @@ describe("createSession edge cases", () => {
1659
1702
  expect(sandboxLog).not.toContain("pause:kept-sb");
1660
1703
  expect(sandboxLog).not.toContain("destroy:kept-sb");
1661
1704
  });
1705
+
1706
+ // --- Rewind flow: tool requests rewind and turn is retried -------------
1707
+ //
1708
+ // The session no longer issues an explicit truncateThread on rewind.
1709
+ // Instead it reuses the pre-generated assistantMessageId for the retry,
1710
+ // and the runAgent activity itself truncates the thread from that id
1711
+ // on entry. These tests assert the observable behaviour: the rewinding
1712
+ // tool's result is not appended, turns are consumed as expected, and
1713
+ // the retry invocation receives the same assistantMessageId so the
1714
+ // invoker can wipe the prior attempt.
1715
+
1716
+ it("rewinds the turn when a tool handler returns rewind:true", async () => {
1717
+ const { ops, log } = createMockThreadOps();
1718
+
1719
+ let rewindAttempts = 0;
1720
+ const rewindTool = defineTool({
1721
+ name: "Rewind" as const,
1722
+ description: "rewinds once then succeeds",
1723
+ schema: z.object({}),
1724
+ handler: async () => {
1725
+ rewindAttempts += 1;
1726
+ if (rewindAttempts === 1) {
1727
+ return {
1728
+ toolResponse: "ignored",
1729
+ data: null,
1730
+ rewind: true,
1731
+ };
1732
+ }
1733
+ return { toolResponse: "ok", data: null };
1734
+ },
1735
+ });
1736
+
1737
+ const assistantIds: string[] = [];
1738
+ const session = await createSession({
1739
+ agentName: "TestAgent",
1740
+ thread: { mode: "new", threadId: "thread-1" },
1741
+ runAgent: createScriptedRunAgent(
1742
+ [
1743
+ {
1744
+ message: "attempt-1",
1745
+ toolCalls: [{ id: "tc-1", name: "Rewind", args: {} }],
1746
+ },
1747
+ {
1748
+ message: "attempt-2",
1749
+ toolCalls: [{ id: "tc-2", name: "Rewind", args: {} }],
1750
+ },
1751
+ { message: "done", toolCalls: [] },
1752
+ ],
1753
+ assistantIds
1754
+ ),
1755
+ threadOps: ops,
1756
+ tools: { Rewind: rewindTool },
1757
+ buildContextMessage: () => "go",
1758
+ });
1759
+
1760
+ const stateManager = createAgentStateManager({
1761
+ initialState: { systemPrompt: "test" },
1762
+ });
1763
+
1764
+ const result = await session.runSession({ stateManager });
1765
+
1766
+ expect(result.exitReason).toBe("completed");
1767
+ expect(result.finalMessage).toBe("done");
1768
+ expect(rewindAttempts).toBe(2);
1769
+
1770
+ // Session does not call truncateThread directly on rewind — the
1771
+ // invoker truncates on entry via the reused assistantMessageId.
1772
+ const truncateOps = log.filter((l) => l.op === "truncateThread");
1773
+ expect(truncateOps).toHaveLength(0);
1774
+
1775
+ // The first and second calls reuse the same assistantMessageId
1776
+ // (rewind retry), then the third uses a fresh id.
1777
+ expect(assistantIds).toHaveLength(3);
1778
+ expect(assistantIds[0]).toBe(assistantIds[1]);
1779
+ expect(assistantIds[1]).not.toBe(assistantIds[2]);
1780
+
1781
+ const noRewindToolResult = log.filter((l) => {
1782
+ if (l.op !== "appendToolResult") return false;
1783
+ const config = l.args[1] as ToolResultConfig;
1784
+ return config.toolCallId === "tc-1";
1785
+ });
1786
+ expect(noRewindToolResult).toHaveLength(0);
1787
+
1788
+ const agentAppends = log.filter((l) => l.op === "appendAgentMessage");
1789
+ expect(agentAppends).toHaveLength(3);
1790
+ // The first two assistant appends reuse the same id — the second
1791
+ // will be a no-op in the real adapter because truncateFromId clears
1792
+ // the dedup marker for the old one before the retry invocation.
1793
+ const asstIds = agentAppends.map((l) => l.args[1]);
1794
+ expect(asstIds[0]).toBe(asstIds[1]);
1795
+ expect(asstIds[1]).not.toBe(asstIds[2]);
1796
+ });
1797
+
1798
+ it("reuses the assistantMessageId on rewind even with sibling tool calls", async () => {
1799
+ const { ops, log } = createMockThreadOps();
1800
+
1801
+ let rewindFired = false;
1802
+
1803
+ const siblingTool = defineTool({
1804
+ name: "Sibling" as const,
1805
+ description: "sibling",
1806
+ schema: z.object({}),
1807
+ handler: async () => ({ toolResponse: "sibling-ok", data: null }),
1808
+ });
1809
+
1810
+ const rewindTool = defineTool({
1811
+ name: "Rewind" as const,
1812
+ description: "rewinds",
1813
+ schema: z.object({}),
1814
+ handler: async () => {
1815
+ if (!rewindFired) {
1816
+ rewindFired = true;
1817
+ return { toolResponse: "ignored", data: null, rewind: true };
1818
+ }
1819
+ return { toolResponse: "ok", data: null };
1820
+ },
1821
+ });
1822
+
1823
+ const assistantIds: string[] = [];
1824
+ const session = await createSession({
1825
+ agentName: "TestAgent",
1826
+ thread: { mode: "new", threadId: "thread-1" },
1827
+ runAgent: createScriptedRunAgent(
1828
+ [
1829
+ {
1830
+ message: "parallel",
1831
+ toolCalls: [
1832
+ { id: "tc-sibling", name: "Sibling", args: {} },
1833
+ { id: "tc-rewind", name: "Rewind", args: {} },
1834
+ ],
1835
+ },
1836
+ { message: "done", toolCalls: [] },
1837
+ ],
1838
+ assistantIds
1839
+ ),
1840
+ threadOps: ops,
1841
+ tools: { Rewind: rewindTool, Sibling: siblingTool },
1842
+ buildContextMessage: () => "go",
1843
+ });
1844
+
1845
+ const stateManager = createAgentStateManager({
1846
+ initialState: { systemPrompt: "test" },
1847
+ });
1848
+
1849
+ const result = await session.runSession({ stateManager });
1850
+
1851
+ expect(result.exitReason).toBe("completed");
1852
+
1853
+ // No explicit truncate from the session — the invoker will do it
1854
+ // on entry using the reused assistantMessageId.
1855
+ const truncateOps = log.filter((l) => l.op === "truncateThread");
1856
+ expect(truncateOps).toHaveLength(0);
1857
+
1858
+ // The rewound turn and its retry share one assistantMessageId; the
1859
+ // final `done` turn gets a fresh one.
1860
+ expect(assistantIds).toHaveLength(2);
1861
+ expect(assistantIds[0]).toBe(assistantIds[1]);
1862
+
1863
+ // Rewinding tool never appends its own result.
1864
+ const rewindResultAppends = log.filter((l) => {
1865
+ if (l.op !== "appendToolResult") return false;
1866
+ const config = l.args[1] as ToolResultConfig;
1867
+ return config.toolCallId === "tc-rewind";
1868
+ });
1869
+ expect(rewindResultAppends).toHaveLength(0);
1870
+
1871
+ // Two assistant messages expected: one from the rewound turn, one
1872
+ // from the successful retry — sharing the same id.
1873
+ const agentAppends = log.filter((l) => l.op === "appendAgentMessage");
1874
+ expect(agentAppends).toHaveLength(2);
1875
+ expect(agentAppends[0]?.args[1]).toBe(agentAppends[1]?.args[1]);
1876
+ });
1877
+
1878
+ it("does not rewind when the rewinding tool is no longer present after retry", async () => {
1879
+ const { ops, log } = createMockThreadOps();
1880
+
1881
+ let attempts = 0;
1882
+ const rewindOnce = defineTool({
1883
+ name: "RewindOnce" as const,
1884
+ description: "rewinds once",
1885
+ schema: z.object({}),
1886
+ handler: async () => {
1887
+ attempts += 1;
1888
+ if (attempts === 1) {
1889
+ return { toolResponse: "ignored", data: null, rewind: true };
1890
+ }
1891
+ return { toolResponse: "ok", data: null };
1892
+ },
1893
+ });
1894
+
1895
+ const assistantIds: string[] = [];
1896
+ const session = await createSession({
1897
+ agentName: "TestAgent",
1898
+ thread: { mode: "new", threadId: "thread-1" },
1899
+ maxTurns: 5,
1900
+ runAgent: createScriptedRunAgent(
1901
+ [
1902
+ {
1903
+ message: "call-1",
1904
+ toolCalls: [{ id: "tc-1", name: "RewindOnce", args: {} }],
1905
+ },
1906
+ {
1907
+ message: "call-2",
1908
+ toolCalls: [{ id: "tc-2", name: "RewindOnce", args: {} }],
1909
+ },
1910
+ { message: "done", toolCalls: [] },
1911
+ ],
1912
+ assistantIds
1913
+ ),
1914
+ threadOps: ops,
1915
+ tools: { RewindOnce: rewindOnce },
1916
+ buildContextMessage: () => "go",
1917
+ });
1918
+
1919
+ const stateManager = createAgentStateManager({
1920
+ initialState: { systemPrompt: "test" },
1921
+ });
1922
+
1923
+ const result = await session.runSession({ stateManager });
1924
+
1925
+ expect(result.exitReason).toBe("completed");
1926
+ expect(result.finalMessage).toBe("done");
1927
+ // Each rewind still consumes a turn from the `maxTurns` budget:
1928
+ // turn 1 (rewound) + turn 2 (successful tool call) + turn 3 (done) = 3.
1929
+ expect(result.usage.turns).toBe(3);
1930
+ expect(attempts).toBe(2);
1931
+
1932
+ const truncateOps = log.filter((l) => l.op === "truncateThread");
1933
+ expect(truncateOps).toHaveLength(0);
1934
+
1935
+ // Turn 1 rewound → call 1 & 2 share an id, call 3 fresh.
1936
+ expect(assistantIds).toHaveLength(3);
1937
+ expect(assistantIds[0]).toBe(assistantIds[1]);
1938
+ expect(assistantIds[1]).not.toBe(assistantIds[2]);
1939
+ });
1940
+
1941
+ it("bails out with max_turns when a tool keeps requesting rewind", async () => {
1942
+ const { ops, log } = createMockThreadOps();
1943
+
1944
+ let attempts = 0;
1945
+ const alwaysRewind = defineTool({
1946
+ name: "AlwaysRewind" as const,
1947
+ description: "always rewinds",
1948
+ schema: z.object({}),
1949
+ handler: async () => {
1950
+ attempts += 1;
1951
+ return { toolResponse: "ignored", data: null, rewind: true };
1952
+ },
1953
+ });
1954
+
1955
+ const assistantIds: string[] = [];
1956
+ const session = await createSession({
1957
+ agentName: "TestAgent",
1958
+ thread: { mode: "new", threadId: "thread-1" },
1959
+ maxTurns: 3,
1960
+ runAgent: createScriptedRunAgent(
1961
+ [
1962
+ { message: "t1", toolCalls: [{ id: "tc-1", name: "AlwaysRewind", args: {} }] },
1963
+ { message: "t2", toolCalls: [{ id: "tc-2", name: "AlwaysRewind", args: {} }] },
1964
+ { message: "t3", toolCalls: [{ id: "tc-3", name: "AlwaysRewind", args: {} }] },
1965
+ { message: "t4", toolCalls: [{ id: "tc-4", name: "AlwaysRewind", args: {} }] },
1966
+ ],
1967
+ assistantIds
1968
+ ),
1969
+ threadOps: ops,
1970
+ tools: { AlwaysRewind: alwaysRewind },
1971
+ buildContextMessage: () => "go",
1972
+ });
1973
+
1974
+ const stateManager = createAgentStateManager({
1975
+ initialState: { systemPrompt: "test" },
1976
+ });
1977
+
1978
+ const result = await session.runSession({ stateManager });
1979
+
1980
+ expect(result.exitReason).toBe("max_turns");
1981
+ expect(result.usage.turns).toBe(3);
1982
+ expect(attempts).toBe(3);
1983
+
1984
+ // Session does not issue explicit truncates; invoker-side
1985
+ // truncation isn't visible here because runAgent is mocked.
1986
+ const truncateOps = log.filter((l) => l.op === "truncateThread");
1987
+ expect(truncateOps).toHaveLength(0);
1988
+
1989
+ // Every attempt reuses the same assistantMessageId — the LLM call
1990
+ // truncates-from-id on each replay.
1991
+ expect(assistantIds).toHaveLength(3);
1992
+ expect(new Set(assistantIds).size).toBe(1);
1993
+ });
1662
1994
  });
@@ -2,6 +2,7 @@ import { describe, expect, it, vi, beforeEach } from "vitest";
2
2
  import { z } from "zod";
3
3
  import type { ToolResultConfig, TokenUsage } from "../types";
4
4
  import type { ThreadOps } from "./types";
5
+ import type { PersistedThreadState } from "../state/types";
5
6
  import type { RunAgentActivity } from "../model/types";
6
7
  import type { RawToolCall } from "../tool-router/types";
7
8
  import type { SandboxOps } from "../sandbox/types";
@@ -33,6 +34,16 @@ vi.mock("@temporalio/workflow", () => {
33
34
  }
34
35
  }
35
36
 
37
+ class MockCancellationScope {
38
+ cancellable: boolean;
39
+ constructor(opts?: { cancellable?: boolean }) {
40
+ this.cancellable = opts?.cancellable ?? true;
41
+ }
42
+ async run<T>(fn: () => Promise<T>): Promise<T> {
43
+ return fn();
44
+ }
45
+ cancel(): void {}
46
+ }
36
47
  return {
37
48
  proxyActivities: <T>() => ({}) as T,
38
49
  condition: async (fn: () => boolean) => fn(),
@@ -46,6 +57,8 @@ vi.mock("@temporalio/workflow", () => {
46
57
  uuid4: () =>
47
58
  `00000000-0000-0000-0000-${String(++idCounter).padStart(12, "0")}`,
48
59
  ApplicationFailure: MockApplicationFailure,
60
+ CancellationScope: MockCancellationScope,
61
+ isCancellation: (_err: unknown) => false,
49
62
  log: {
50
63
  trace: () => {},
51
64
  debug: () => {},
@@ -85,6 +98,7 @@ function toActivityInterface(raw: ThreadOps): ActivityInterfaceFor<ThreadOps> {
85
98
 
86
99
  function createMockThreadOps() {
87
100
  const log: { op: string; args: unknown[] }[] = [];
101
+ const stateStore = new Map<string, PersistedThreadState>();
88
102
 
89
103
  const ops = toActivityInterface({
90
104
  initializeThread: async (threadId) => {
@@ -104,10 +118,23 @@ function createMockThreadOps() {
104
118
  },
105
119
  forkThread: async (source, target) => {
106
120
  log.push({ op: "forkThread", args: [source, target] });
121
+ const src = stateStore.get(source);
122
+ if (src) stateStore.set(target, src);
123
+ },
124
+ truncateThread: async (threadId, messageId) => {
125
+ log.push({ op: "truncateThread", args: [threadId, messageId] });
126
+ },
127
+ loadThreadState: async (threadId) => {
128
+ log.push({ op: "loadThreadState", args: [threadId] });
129
+ return stateStore.get(threadId) ?? null;
130
+ },
131
+ saveThreadState: async (threadId, state) => {
132
+ log.push({ op: "saveThreadState", args: [threadId, state] });
133
+ stateStore.set(threadId, state);
107
134
  },
108
135
  });
109
136
 
110
- return { ops, log };
137
+ return { ops, log, stateStore };
111
138
  }
112
139
 
113
140
  type TurnScript = {
@@ -123,7 +150,11 @@ function createScriptedRunAgent(
123
150
  return async () => {
124
151
  const turn = turns[call++];
125
152
  if (!turn) {
126
- return { message: "done", rawToolCalls: [], usage: undefined };
153
+ return {
154
+ message: "done",
155
+ rawToolCalls: [],
156
+ usage: undefined,
157
+ };
127
158
  }
128
159
  return {
129
160
  message: turn.message,
@@ -1088,4 +1119,163 @@ describe("createSession integration", () => {
1088
1119
  ]);
1089
1120
  expect(sandboxLog).not.toContain("create");
1090
1121
  });
1122
+
1123
+ // --- Persistent thread state ---
1124
+
1125
+ it("saves tasks + custom state to the thread store on session exit", async () => {
1126
+ const { ops, log, stateStore } = createMockThreadOps();
1127
+
1128
+ const writeTasks = defineTool({
1129
+ name: "WriteTasks" as const,
1130
+ description: "create tasks via state manager",
1131
+ schema: z.object({}),
1132
+ handler: async (
1133
+ _args: Record<string, never>,
1134
+ _ctx: RouterContext
1135
+ ): Promise<ToolHandlerResponse<null>> => ({
1136
+ toolResponse: "ok",
1137
+ data: null,
1138
+ }),
1139
+ });
1140
+
1141
+ const session = await createSession({
1142
+ agentName: "TestAgent",
1143
+ thread: { mode: "new", threadId: "thread-save" },
1144
+ runAgent: createScriptedRunAgent([
1145
+ {
1146
+ message: "doing work",
1147
+ toolCalls: [{ id: "tc-1", name: "WriteTasks", args: {} }],
1148
+ },
1149
+ { message: "done", toolCalls: [] },
1150
+ ]),
1151
+ threadOps: ops,
1152
+ tools: { WriteTasks: writeTasks },
1153
+ buildContextMessage: () => "go",
1154
+ });
1155
+
1156
+ const stateManager = createAgentStateManager<{ note: string }>({
1157
+ initialState: { systemPrompt: "test", note: "hello" },
1158
+ });
1159
+
1160
+ stateManager.setTask({
1161
+ id: "task-A",
1162
+ subject: "A",
1163
+ description: "A",
1164
+ activeForm: "doing A",
1165
+ status: "in_progress",
1166
+ metadata: { priority: "high" },
1167
+ blockedBy: [],
1168
+ blocks: [],
1169
+ });
1170
+
1171
+ const result = await session.runSession({ stateManager });
1172
+ expect(result.exitReason).toBe("completed");
1173
+
1174
+ const saves = log.filter((l) => l.op === "saveThreadState");
1175
+ expect(saves).toHaveLength(1);
1176
+ const saved = stateStore.get("thread-save");
1177
+ expect(saved).toBeDefined();
1178
+ expect(saved?.tasks).toHaveLength(1);
1179
+ if (saved) {
1180
+ expect(at(saved.tasks, 0)[0]).toBe("task-A");
1181
+ }
1182
+ expect(saved?.custom).toEqual({ note: "hello" });
1183
+ });
1184
+
1185
+ it("rehydrates tasks + custom state on continue before the agent loop runs", async () => {
1186
+ const { ops, stateStore } = createMockThreadOps();
1187
+
1188
+ stateStore.set("thread-cont", {
1189
+ tasks: [
1190
+ [
1191
+ "task-restored",
1192
+ {
1193
+ id: "task-restored",
1194
+ subject: "restored",
1195
+ description: "restored",
1196
+ activeForm: "restoring",
1197
+ status: "pending",
1198
+ metadata: {},
1199
+ blockedBy: [],
1200
+ blocks: [],
1201
+ },
1202
+ ],
1203
+ ],
1204
+ custom: { label: "from-prior-run" },
1205
+ });
1206
+
1207
+ type State = { label: string };
1208
+ let observedTasksBeforeFirstTurn: string[] = [];
1209
+ let observedLabelBeforeFirstTurn: string | undefined;
1210
+
1211
+ const session = await createSession({
1212
+ agentName: "TestAgent",
1213
+ thread: { mode: "continue", threadId: "thread-cont" },
1214
+ runAgent: async () => {
1215
+ observedTasksBeforeFirstTurn = stateManager.getTasks().map((t) => t.id);
1216
+ observedLabelBeforeFirstTurn = stateManager.get("label");
1217
+ return { message: "done", rawToolCalls: [], usage: undefined };
1218
+ },
1219
+ threadOps: ops,
1220
+ buildContextMessage: () => "continue please",
1221
+ });
1222
+
1223
+ const stateManager = createAgentStateManager<State>({
1224
+ initialState: { systemPrompt: "test", label: "initial" },
1225
+ });
1226
+
1227
+ await session.runSession({ stateManager });
1228
+
1229
+ expect(observedTasksBeforeFirstTurn).toEqual(["task-restored"]);
1230
+ expect(observedLabelBeforeFirstTurn).toBe("from-prior-run");
1231
+ });
1232
+
1233
+ it("fork copies the source thread's state slice into the new thread", async () => {
1234
+ const { ops, log, stateStore } = createMockThreadOps();
1235
+
1236
+ stateStore.set("source-thread", {
1237
+ tasks: [
1238
+ [
1239
+ "task-src",
1240
+ {
1241
+ id: "task-src",
1242
+ subject: "src",
1243
+ description: "src",
1244
+ activeForm: "src",
1245
+ status: "completed",
1246
+ metadata: {},
1247
+ blockedBy: [],
1248
+ blocks: [],
1249
+ },
1250
+ ],
1251
+ ],
1252
+ custom: { counter: 3 },
1253
+ });
1254
+
1255
+ const session = await createSession({
1256
+ agentName: "TestAgent",
1257
+ thread: { mode: "fork", threadId: "source-thread" },
1258
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1259
+ threadOps: ops,
1260
+ buildContextMessage: () => "continue",
1261
+ });
1262
+
1263
+ type State = { counter: number };
1264
+ const stateManager = createAgentStateManager<State>({
1265
+ initialState: { systemPrompt: "test", counter: 0 },
1266
+ });
1267
+
1268
+ const result = await session.runSession({ stateManager });
1269
+ expect(result.exitReason).toBe("completed");
1270
+
1271
+ const forkOps = log.filter((l) => l.op === "forkThread");
1272
+ expect(forkOps).toHaveLength(1);
1273
+ expect(at(forkOps, 0).args[0]).toBe("source-thread");
1274
+
1275
+ expect(stateManager.getTask("task-src")).toBeDefined();
1276
+ expect(stateManager.get("counter")).toBe(3);
1277
+
1278
+ const newThreadSlice = stateStore.get(result.threadId);
1279
+ expect(newThreadSlice?.tasks).toHaveLength(1);
1280
+ });
1091
1281
  });