zeitlich 0.2.38 → 0.2.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/README.md +18 -0
  2. package/dist/{activities-BKhMtKDd.d.ts → activities-Bmu7XnaG.d.ts} +4 -6
  3. package/dist/{activities-CDcwkRZs.d.cts → activities-ByBFLvm2.d.cts} +4 -6
  4. package/dist/adapter-id-BB-mmrts.d.cts +17 -0
  5. package/dist/adapter-id-BB-mmrts.d.ts +17 -0
  6. package/dist/adapter-id-CMwVrVqv.d.cts +17 -0
  7. package/dist/adapter-id-CMwVrVqv.d.ts +17 -0
  8. package/dist/adapter-id-CbY2zeSt.d.cts +17 -0
  9. package/dist/adapter-id-CbY2zeSt.d.ts +17 -0
  10. package/dist/adapters/thread/anthropic/index.cjs +140 -23
  11. package/dist/adapters/thread/anthropic/index.cjs.map +1 -1
  12. package/dist/adapters/thread/anthropic/index.d.cts +8 -7
  13. package/dist/adapters/thread/anthropic/index.d.ts +8 -7
  14. package/dist/adapters/thread/anthropic/index.js +140 -24
  15. package/dist/adapters/thread/anthropic/index.js.map +1 -1
  16. package/dist/adapters/thread/anthropic/workflow.cjs +8 -3
  17. package/dist/adapters/thread/anthropic/workflow.cjs.map +1 -1
  18. package/dist/adapters/thread/anthropic/workflow.d.cts +5 -4
  19. package/dist/adapters/thread/anthropic/workflow.d.ts +5 -4
  20. package/dist/adapters/thread/anthropic/workflow.js +8 -4
  21. package/dist/adapters/thread/anthropic/workflow.js.map +1 -1
  22. package/dist/adapters/thread/google-genai/index.cjs +140 -23
  23. package/dist/adapters/thread/google-genai/index.cjs.map +1 -1
  24. package/dist/adapters/thread/google-genai/index.d.cts +5 -4
  25. package/dist/adapters/thread/google-genai/index.d.ts +5 -4
  26. package/dist/adapters/thread/google-genai/index.js +140 -24
  27. package/dist/adapters/thread/google-genai/index.js.map +1 -1
  28. package/dist/adapters/thread/google-genai/workflow.cjs +8 -3
  29. package/dist/adapters/thread/google-genai/workflow.cjs.map +1 -1
  30. package/dist/adapters/thread/google-genai/workflow.d.cts +5 -4
  31. package/dist/adapters/thread/google-genai/workflow.d.ts +5 -4
  32. package/dist/adapters/thread/google-genai/workflow.js +8 -4
  33. package/dist/adapters/thread/google-genai/workflow.js.map +1 -1
  34. package/dist/adapters/thread/index.cjs +16 -0
  35. package/dist/adapters/thread/index.cjs.map +1 -0
  36. package/dist/adapters/thread/index.d.cts +34 -0
  37. package/dist/adapters/thread/index.d.ts +34 -0
  38. package/dist/adapters/thread/index.js +12 -0
  39. package/dist/adapters/thread/index.js.map +1 -0
  40. package/dist/adapters/thread/langchain/index.cjs +139 -24
  41. package/dist/adapters/thread/langchain/index.cjs.map +1 -1
  42. package/dist/adapters/thread/langchain/index.d.cts +8 -7
  43. package/dist/adapters/thread/langchain/index.d.ts +8 -7
  44. package/dist/adapters/thread/langchain/index.js +139 -25
  45. package/dist/adapters/thread/langchain/index.js.map +1 -1
  46. package/dist/adapters/thread/langchain/workflow.cjs +8 -3
  47. package/dist/adapters/thread/langchain/workflow.cjs.map +1 -1
  48. package/dist/adapters/thread/langchain/workflow.d.cts +5 -4
  49. package/dist/adapters/thread/langchain/workflow.d.ts +5 -4
  50. package/dist/adapters/thread/langchain/workflow.js +8 -4
  51. package/dist/adapters/thread/langchain/workflow.js.map +1 -1
  52. package/dist/index.cjs +266 -48
  53. package/dist/index.cjs.map +1 -1
  54. package/dist/index.d.cts +6 -6
  55. package/dist/index.d.ts +6 -6
  56. package/dist/index.js +263 -49
  57. package/dist/index.js.map +1 -1
  58. package/dist/{proxy-D_3x7RN4.d.cts → proxy-BAKzNGRq.d.cts} +1 -1
  59. package/dist/{proxy-CUlKSvZS.d.ts → proxy-DO_MXbY4.d.ts} +1 -1
  60. package/dist/{thread-manager-CVu7o2cs.d.ts → thread-manager-CcRXasqs.d.ts} +2 -4
  61. package/dist/{thread-manager-HSwyh28L.d.cts → thread-manager-ClwSaUnj.d.cts} +2 -4
  62. package/dist/{thread-manager-c1gPopAG.d.ts → thread-manager-D-7lp1JK.d.ts} +2 -4
  63. package/dist/{thread-manager-wGi-LqIP.d.cts → thread-manager-Y8Ucf0Tf.d.cts} +2 -4
  64. package/dist/{types-C06FwR96.d.cts → types-Bcbiq8iv.d.cts} +162 -44
  65. package/dist/{types-BH_IRryz.d.ts → types-DpHTX-iO.d.ts} +54 -6
  66. package/dist/{types-DNr31FzL.d.ts → types-Dt8-HBBT.d.ts} +162 -44
  67. package/dist/{types-BaOw4hKI.d.cts → types-hFFi-Zd9.d.cts} +54 -6
  68. package/dist/{workflow-CSCkpwAL.d.ts → workflow-Bmf9EtDW.d.ts} +82 -2
  69. package/dist/{workflow-DuvMZ8Vm.d.cts → workflow-Bx9utBwb.d.cts} +82 -2
  70. package/dist/workflow.cjs +188 -37
  71. package/dist/workflow.cjs.map +1 -1
  72. package/dist/workflow.d.cts +2 -2
  73. package/dist/workflow.d.ts +2 -2
  74. package/dist/workflow.js +185 -38
  75. package/dist/workflow.js.map +1 -1
  76. package/package.json +11 -1
  77. package/src/adapters/thread/adapter-id.test.ts +42 -0
  78. package/src/adapters/thread/anthropic/activities.ts +33 -7
  79. package/src/adapters/thread/anthropic/adapter-id.ts +16 -0
  80. package/src/adapters/thread/anthropic/fork-transform.test.ts +291 -0
  81. package/src/adapters/thread/anthropic/index.ts +3 -0
  82. package/src/adapters/thread/anthropic/model-invoker.ts +8 -4
  83. package/src/adapters/thread/anthropic/proxy.ts +3 -2
  84. package/src/adapters/thread/anthropic/thread-manager.ts +27 -4
  85. package/src/adapters/thread/google-genai/activities.ts +33 -7
  86. package/src/adapters/thread/google-genai/adapter-id.ts +16 -0
  87. package/src/adapters/thread/google-genai/fork-transform.test.ts +149 -0
  88. package/src/adapters/thread/google-genai/index.ts +3 -0
  89. package/src/adapters/thread/google-genai/model-invoker.ts +7 -3
  90. package/src/adapters/thread/google-genai/proxy.ts +3 -2
  91. package/src/adapters/thread/google-genai/thread-manager.ts +27 -4
  92. package/src/adapters/thread/index.ts +39 -0
  93. package/src/adapters/thread/langchain/activities.ts +33 -7
  94. package/src/adapters/thread/langchain/adapter-id.ts +16 -0
  95. package/src/adapters/thread/langchain/fork-transform.test.ts +142 -0
  96. package/src/adapters/thread/langchain/index.ts +3 -0
  97. package/src/adapters/thread/langchain/model-invoker.ts +8 -3
  98. package/src/adapters/thread/langchain/proxy.ts +3 -2
  99. package/src/adapters/thread/langchain/thread-manager.ts +27 -4
  100. package/src/lib/lifecycle.ts +3 -1
  101. package/src/lib/model/types.ts +7 -10
  102. package/src/lib/session/session-edge-cases.integration.test.ts +131 -63
  103. package/src/lib/session/session.integration.test.ts +174 -5
  104. package/src/lib/session/session.ts +68 -28
  105. package/src/lib/session/types.ts +60 -9
  106. package/src/lib/state/index.ts +1 -0
  107. package/src/lib/state/manager.integration.test.ts +109 -0
  108. package/src/lib/state/manager.ts +38 -8
  109. package/src/lib/state/types.ts +25 -0
  110. package/src/lib/subagent/handler.ts +124 -11
  111. package/src/lib/subagent/index.ts +5 -1
  112. package/src/lib/subagent/subagent.integration.test.ts +528 -0
  113. package/src/lib/subagent/types.ts +63 -14
  114. package/src/lib/subagent/workflow.ts +29 -2
  115. package/src/lib/thread/index.ts +5 -0
  116. package/src/lib/thread/keys.test.ts +101 -0
  117. package/src/lib/thread/keys.ts +94 -0
  118. package/src/lib/thread/manager.test.ts +139 -0
  119. package/src/lib/thread/manager.ts +92 -14
  120. package/src/lib/thread/proxy.ts +2 -0
  121. package/src/lib/thread/types.ts +60 -6
  122. package/src/lib/tool-router/types.ts +16 -8
  123. package/src/lib/types.ts +12 -0
  124. package/src/workflow.ts +12 -1
  125. package/tsup.config.ts +1 -0
@@ -2,6 +2,7 @@ import { describe, expect, it, vi, beforeEach } from "vitest";
2
2
  import { z } from "zod";
3
3
  import type { ToolResultConfig, TokenUsage } from "../types";
4
4
  import type { ThreadOps } from "./types";
5
+ import type { PersistedThreadState } from "../state/types";
5
6
  import type { RunAgentActivity } from "../model/types";
6
7
  import type { RawToolCall } from "../tool-router/types";
7
8
  import type { SandboxOps } from "../sandbox/types";
@@ -73,7 +74,6 @@ type TurnScript = {
73
74
  message: unknown;
74
75
  toolCalls: RawToolCall[];
75
76
  usage?: TokenUsage;
76
- threadLengthAtCall?: number;
77
77
  };
78
78
 
79
79
  /**
@@ -96,6 +96,7 @@ function toActivityInterface<TContent = string>(
96
96
 
97
97
  function createMockThreadOps() {
98
98
  const log: { op: string; args: unknown[] }[] = [];
99
+ const stateStore = new Map<string, PersistedThreadState>();
99
100
  const ops = toActivityInterface({
100
101
  initializeThread: async (threadId) => {
101
102
  log.push({ op: "initializeThread", args: [threadId] });
@@ -114,33 +115,43 @@ function createMockThreadOps() {
114
115
  },
115
116
  forkThread: async (source, target) => {
116
117
  log.push({ op: "forkThread", args: [source, target] });
118
+ const src = stateStore.get(source);
119
+ if (src) stateStore.set(target, src);
117
120
  },
118
- truncateThread: async (threadId, length) => {
119
- log.push({ op: "truncateThread", args: [threadId, length] });
121
+ truncateThread: async (threadId, messageId) => {
122
+ log.push({ op: "truncateThread", args: [threadId, messageId] });
123
+ },
124
+ loadThreadState: async (threadId) => {
125
+ log.push({ op: "loadThreadState", args: [threadId] });
126
+ return stateStore.get(threadId) ?? null;
127
+ },
128
+ saveThreadState: async (threadId, state) => {
129
+ log.push({ op: "saveThreadState", args: [threadId, state] });
130
+ stateStore.set(threadId, state);
120
131
  },
121
132
  });
122
- return { ops, log };
133
+ return { ops, log, stateStore };
123
134
  }
124
135
 
125
136
  function createScriptedRunAgent(
126
- turns: TurnScript[]
137
+ turns: TurnScript[],
138
+ assistantIdLog?: string[]
127
139
  ): RunAgentActivity<unknown> {
128
140
  let call = 0;
129
- return async () => {
141
+ return async (config) => {
142
+ assistantIdLog?.push(config.assistantMessageId);
130
143
  const turn = turns[call++];
131
144
  if (!turn) {
132
145
  return {
133
146
  message: "done",
134
147
  rawToolCalls: [],
135
148
  usage: undefined,
136
- threadLengthAtCall: 0,
137
149
  };
138
150
  }
139
151
  return {
140
152
  message: turn.message,
141
153
  rawToolCalls: turn.toolCalls,
142
154
  usage: turn.usage,
143
- threadLengthAtCall: turn.threadLengthAtCall ?? 0,
144
155
  };
145
156
  };
146
157
  }
@@ -797,8 +808,15 @@ describe("createSession edge cases", () => {
797
808
  forkThread: async (source, target) => {
798
809
  log.push({ op: "forkThread", args: [source, target] });
799
810
  },
800
- truncateThread: async (threadId, length) => {
801
- log.push({ op: "truncateThread", args: [threadId, length] });
811
+ truncateThread: async (threadId, messageId) => {
812
+ log.push({ op: "truncateThread", args: [threadId, messageId] });
813
+ },
814
+ loadThreadState: async (threadId) => {
815
+ log.push({ op: "loadThreadState", args: [threadId] });
816
+ return null;
817
+ },
818
+ saveThreadState: async (threadId, state) => {
819
+ log.push({ op: "saveThreadState", args: [threadId, state] });
802
820
  },
803
821
  });
804
822
 
@@ -1686,6 +1704,14 @@ describe("createSession edge cases", () => {
1686
1704
  });
1687
1705
 
1688
1706
  // --- Rewind flow: tool requests rewind and turn is retried -------------
1707
+ //
1708
+ // The session no longer issues an explicit truncateThread on rewind.
1709
+ // Instead it reuses the pre-generated assistantMessageId for the retry,
1710
+ // and the runAgent activity itself truncates the thread from that id
1711
+ // on entry. These tests assert the observable behaviour: the rewinding
1712
+ // tool's result is not appended, turns are consumed as expected, and
1713
+ // the retry invocation receives the same assistantMessageId so the
1714
+ // invoker can wipe the prior attempt.
1689
1715
 
1690
1716
  it("rewinds the turn when a tool handler returns rewind:true", async () => {
1691
1717
  const { ops, log } = createMockThreadOps();
@@ -1708,20 +1734,24 @@ describe("createSession edge cases", () => {
1708
1734
  },
1709
1735
  });
1710
1736
 
1737
+ const assistantIds: string[] = [];
1711
1738
  const session = await createSession({
1712
1739
  agentName: "TestAgent",
1713
1740
  thread: { mode: "new", threadId: "thread-1" },
1714
- runAgent: createScriptedRunAgent([
1715
- {
1716
- message: "attempt-1",
1717
- toolCalls: [{ id: "tc-1", name: "Rewind", args: {} }],
1718
- },
1719
- {
1720
- message: "attempt-2",
1721
- toolCalls: [{ id: "tc-2", name: "Rewind", args: {} }],
1722
- },
1723
- { message: "done", toolCalls: [] },
1724
- ]),
1741
+ runAgent: createScriptedRunAgent(
1742
+ [
1743
+ {
1744
+ message: "attempt-1",
1745
+ toolCalls: [{ id: "tc-1", name: "Rewind", args: {} }],
1746
+ },
1747
+ {
1748
+ message: "attempt-2",
1749
+ toolCalls: [{ id: "tc-2", name: "Rewind", args: {} }],
1750
+ },
1751
+ { message: "done", toolCalls: [] },
1752
+ ],
1753
+ assistantIds
1754
+ ),
1725
1755
  threadOps: ops,
1726
1756
  tools: { Rewind: rewindTool },
1727
1757
  buildContextMessage: () => "go",
@@ -1737,8 +1767,16 @@ describe("createSession edge cases", () => {
1737
1767
  expect(result.finalMessage).toBe("done");
1738
1768
  expect(rewindAttempts).toBe(2);
1739
1769
 
1770
+ // Session does not call truncateThread directly on rewind — the
1771
+ // invoker truncates on entry via the reused assistantMessageId.
1740
1772
  const truncateOps = log.filter((l) => l.op === "truncateThread");
1741
- expect(truncateOps).toHaveLength(1);
1773
+ expect(truncateOps).toHaveLength(0);
1774
+
1775
+ // The first and second calls reuse the same assistantMessageId
1776
+ // (rewind retry), then the third uses a fresh id.
1777
+ expect(assistantIds).toHaveLength(3);
1778
+ expect(assistantIds[0]).toBe(assistantIds[1]);
1779
+ expect(assistantIds[1]).not.toBe(assistantIds[2]);
1742
1780
 
1743
1781
  const noRewindToolResult = log.filter((l) => {
1744
1782
  if (l.op !== "appendToolResult") return false;
@@ -1749,9 +1787,15 @@ describe("createSession edge cases", () => {
1749
1787
 
1750
1788
  const agentAppends = log.filter((l) => l.op === "appendAgentMessage");
1751
1789
  expect(agentAppends).toHaveLength(3);
1790
+ // The first two assistant appends reuse the same id — the second
1791
+ // will be a no-op in the real adapter because truncateFromId clears
1792
+ // the dedup marker for the old one before the retry invocation.
1793
+ const asstIds = agentAppends.map((l) => l.args[1]);
1794
+ expect(asstIds[0]).toBe(asstIds[1]);
1795
+ expect(asstIds[1]).not.toBe(asstIds[2]);
1752
1796
  });
1753
1797
 
1754
- it("truncates the thread back to the pre-assistant state so sibling tool results are dropped on rewind", async () => {
1798
+ it("reuses the assistantMessageId on rewind even with sibling tool calls", async () => {
1755
1799
  const { ops, log } = createMockThreadOps();
1756
1800
 
1757
1801
  let rewindFired = false;
@@ -1776,22 +1820,23 @@ describe("createSession edge cases", () => {
1776
1820
  },
1777
1821
  });
1778
1822
 
1823
+ const assistantIds: string[] = [];
1779
1824
  const session = await createSession({
1780
1825
  agentName: "TestAgent",
1781
1826
  thread: { mode: "new", threadId: "thread-1" },
1782
- runAgent: createScriptedRunAgent([
1783
- {
1784
- message: "parallel",
1785
- toolCalls: [
1786
- { id: "tc-sibling", name: "Sibling", args: {} },
1787
- { id: "tc-rewind", name: "Rewind", args: {} },
1788
- ],
1789
- // Invoker reports 2 stored messages (system + human) at the
1790
- // moment the LLM was called.
1791
- threadLengthAtCall: 2,
1792
- },
1793
- { message: "done", toolCalls: [], threadLengthAtCall: 2 },
1794
- ]),
1827
+ runAgent: createScriptedRunAgent(
1828
+ [
1829
+ {
1830
+ message: "parallel",
1831
+ toolCalls: [
1832
+ { id: "tc-sibling", name: "Sibling", args: {} },
1833
+ { id: "tc-rewind", name: "Rewind", args: {} },
1834
+ ],
1835
+ },
1836
+ { message: "done", toolCalls: [] },
1837
+ ],
1838
+ assistantIds
1839
+ ),
1795
1840
  threadOps: ops,
1796
1841
  tools: { Rewind: rewindTool, Sibling: siblingTool },
1797
1842
  buildContextMessage: () => "go",
@@ -1805,13 +1850,15 @@ describe("createSession edge cases", () => {
1805
1850
 
1806
1851
  expect(result.exitReason).toBe("completed");
1807
1852
 
1808
- // Exactly one truncate fired back to the pre-assistant-message
1809
- // length that runAgent reported.
1853
+ // No explicit truncate from the session the invoker will do it
1854
+ // on entry using the reused assistantMessageId.
1810
1855
  const truncateOps = log.filter((l) => l.op === "truncateThread");
1811
- expect(truncateOps).toHaveLength(1);
1812
- const truncateOp = truncateOps[0];
1813
- if (!truncateOp) throw new Error("expected truncate op");
1814
- expect(truncateOp.args[1]).toBe(2);
1856
+ expect(truncateOps).toHaveLength(0);
1857
+
1858
+ // The rewound turn and its retry share one assistantMessageId; the
1859
+ // final `done` turn gets a fresh one.
1860
+ expect(assistantIds).toHaveLength(2);
1861
+ expect(assistantIds[0]).toBe(assistantIds[1]);
1815
1862
 
1816
1863
  // Rewinding tool never appends its own result.
1817
1864
  const rewindResultAppends = log.filter((l) => {
@@ -1821,10 +1868,11 @@ describe("createSession edge cases", () => {
1821
1868
  });
1822
1869
  expect(rewindResultAppends).toHaveLength(0);
1823
1870
 
1824
- // Two assistant messages expected: one from the rewound turn, one from
1825
- // the successful retry.
1871
+ // Two assistant messages expected: one from the rewound turn, one
1872
+ // from the successful retry — sharing the same id.
1826
1873
  const agentAppends = log.filter((l) => l.op === "appendAgentMessage");
1827
1874
  expect(agentAppends).toHaveLength(2);
1875
+ expect(agentAppends[0]?.args[1]).toBe(agentAppends[1]?.args[1]);
1828
1876
  });
1829
1877
 
1830
1878
  it("does not rewind when the rewinding tool is no longer present after retry", async () => {
@@ -1844,21 +1892,25 @@ describe("createSession edge cases", () => {
1844
1892
  },
1845
1893
  });
1846
1894
 
1895
+ const assistantIds: string[] = [];
1847
1896
  const session = await createSession({
1848
1897
  agentName: "TestAgent",
1849
1898
  thread: { mode: "new", threadId: "thread-1" },
1850
1899
  maxTurns: 5,
1851
- runAgent: createScriptedRunAgent([
1852
- {
1853
- message: "call-1",
1854
- toolCalls: [{ id: "tc-1", name: "RewindOnce", args: {} }],
1855
- },
1856
- {
1857
- message: "call-2",
1858
- toolCalls: [{ id: "tc-2", name: "RewindOnce", args: {} }],
1859
- },
1860
- { message: "done", toolCalls: [] },
1861
- ]),
1900
+ runAgent: createScriptedRunAgent(
1901
+ [
1902
+ {
1903
+ message: "call-1",
1904
+ toolCalls: [{ id: "tc-1", name: "RewindOnce", args: {} }],
1905
+ },
1906
+ {
1907
+ message: "call-2",
1908
+ toolCalls: [{ id: "tc-2", name: "RewindOnce", args: {} }],
1909
+ },
1910
+ { message: "done", toolCalls: [] },
1911
+ ],
1912
+ assistantIds
1913
+ ),
1862
1914
  threadOps: ops,
1863
1915
  tools: { RewindOnce: rewindOnce },
1864
1916
  buildContextMessage: () => "go",
@@ -1878,7 +1930,12 @@ describe("createSession edge cases", () => {
1878
1930
  expect(attempts).toBe(2);
1879
1931
 
1880
1932
  const truncateOps = log.filter((l) => l.op === "truncateThread");
1881
- expect(truncateOps).toHaveLength(1);
1933
+ expect(truncateOps).toHaveLength(0);
1934
+
1935
+ // Turn 1 rewound → call 1 & 2 share an id, call 3 fresh.
1936
+ expect(assistantIds).toHaveLength(3);
1937
+ expect(assistantIds[0]).toBe(assistantIds[1]);
1938
+ expect(assistantIds[1]).not.toBe(assistantIds[2]);
1882
1939
  });
1883
1940
 
1884
1941
  it("bails out with max_turns when a tool keeps requesting rewind", async () => {
@@ -1895,16 +1952,20 @@ describe("createSession edge cases", () => {
1895
1952
  },
1896
1953
  });
1897
1954
 
1955
+ const assistantIds: string[] = [];
1898
1956
  const session = await createSession({
1899
1957
  agentName: "TestAgent",
1900
1958
  thread: { mode: "new", threadId: "thread-1" },
1901
1959
  maxTurns: 3,
1902
- runAgent: createScriptedRunAgent([
1903
- { message: "t1", toolCalls: [{ id: "tc-1", name: "AlwaysRewind", args: {} }] },
1904
- { message: "t2", toolCalls: [{ id: "tc-2", name: "AlwaysRewind", args: {} }] },
1905
- { message: "t3", toolCalls: [{ id: "tc-3", name: "AlwaysRewind", args: {} }] },
1906
- { message: "t4", toolCalls: [{ id: "tc-4", name: "AlwaysRewind", args: {} }] },
1907
- ]),
1960
+ runAgent: createScriptedRunAgent(
1961
+ [
1962
+ { message: "t1", toolCalls: [{ id: "tc-1", name: "AlwaysRewind", args: {} }] },
1963
+ { message: "t2", toolCalls: [{ id: "tc-2", name: "AlwaysRewind", args: {} }] },
1964
+ { message: "t3", toolCalls: [{ id: "tc-3", name: "AlwaysRewind", args: {} }] },
1965
+ { message: "t4", toolCalls: [{ id: "tc-4", name: "AlwaysRewind", args: {} }] },
1966
+ ],
1967
+ assistantIds
1968
+ ),
1908
1969
  threadOps: ops,
1909
1970
  tools: { AlwaysRewind: alwaysRewind },
1910
1971
  buildContextMessage: () => "go",
@@ -1920,7 +1981,14 @@ describe("createSession edge cases", () => {
1920
1981
  expect(result.usage.turns).toBe(3);
1921
1982
  expect(attempts).toBe(3);
1922
1983
 
1984
+ // Session does not issue explicit truncates; invoker-side
1985
+ // truncation isn't visible here because runAgent is mocked.
1923
1986
  const truncateOps = log.filter((l) => l.op === "truncateThread");
1924
- expect(truncateOps).toHaveLength(3);
1987
+ expect(truncateOps).toHaveLength(0);
1988
+
1989
+ // Every attempt reuses the same assistantMessageId — the LLM call
1990
+ // truncates-from-id on each replay.
1991
+ expect(assistantIds).toHaveLength(3);
1992
+ expect(new Set(assistantIds).size).toBe(1);
1925
1993
  });
1926
1994
  });
@@ -2,6 +2,7 @@ import { describe, expect, it, vi, beforeEach } from "vitest";
2
2
  import { z } from "zod";
3
3
  import type { ToolResultConfig, TokenUsage } from "../types";
4
4
  import type { ThreadOps } from "./types";
5
+ import type { PersistedThreadState } from "../state/types";
5
6
  import type { RunAgentActivity } from "../model/types";
6
7
  import type { RawToolCall } from "../tool-router/types";
7
8
  import type { SandboxOps } from "../sandbox/types";
@@ -97,6 +98,7 @@ function toActivityInterface(raw: ThreadOps): ActivityInterfaceFor<ThreadOps> {
97
98
 
98
99
  function createMockThreadOps() {
99
100
  const log: { op: string; args: unknown[] }[] = [];
101
+ const stateStore = new Map<string, PersistedThreadState>();
100
102
 
101
103
  const ops = toActivityInterface({
102
104
  initializeThread: async (threadId) => {
@@ -116,13 +118,23 @@ function createMockThreadOps() {
116
118
  },
117
119
  forkThread: async (source, target) => {
118
120
  log.push({ op: "forkThread", args: [source, target] });
121
+ const src = stateStore.get(source);
122
+ if (src) stateStore.set(target, src);
119
123
  },
120
- truncateThread: async (threadId, length) => {
121
- log.push({ op: "truncateThread", args: [threadId, length] });
124
+ truncateThread: async (threadId, messageId) => {
125
+ log.push({ op: "truncateThread", args: [threadId, messageId] });
126
+ },
127
+ loadThreadState: async (threadId) => {
128
+ log.push({ op: "loadThreadState", args: [threadId] });
129
+ return stateStore.get(threadId) ?? null;
130
+ },
131
+ saveThreadState: async (threadId, state) => {
132
+ log.push({ op: "saveThreadState", args: [threadId, state] });
133
+ stateStore.set(threadId, state);
122
134
  },
123
135
  });
124
136
 
125
- return { ops, log };
137
+ return { ops, log, stateStore };
126
138
  }
127
139
 
128
140
  type TurnScript = {
@@ -142,14 +154,12 @@ function createScriptedRunAgent(
142
154
  message: "done",
143
155
  rawToolCalls: [],
144
156
  usage: undefined,
145
- threadLengthAtCall: 0,
146
157
  };
147
158
  }
148
159
  return {
149
160
  message: turn.message,
150
161
  rawToolCalls: turn.toolCalls,
151
162
  usage: turn.usage,
152
- threadLengthAtCall: 0,
153
163
  };
154
164
  };
155
165
  }
@@ -1109,4 +1119,163 @@ describe("createSession integration", () => {
1109
1119
  ]);
1110
1120
  expect(sandboxLog).not.toContain("create");
1111
1121
  });
1122
+
1123
+ // --- Persistent thread state ---
1124
+
1125
+ it("saves tasks + custom state to the thread store on session exit", async () => {
1126
+ const { ops, log, stateStore } = createMockThreadOps();
1127
+
1128
+ const writeTasks = defineTool({
1129
+ name: "WriteTasks" as const,
1130
+ description: "create tasks via state manager",
1131
+ schema: z.object({}),
1132
+ handler: async (
1133
+ _args: Record<string, never>,
1134
+ _ctx: RouterContext
1135
+ ): Promise<ToolHandlerResponse<null>> => ({
1136
+ toolResponse: "ok",
1137
+ data: null,
1138
+ }),
1139
+ });
1140
+
1141
+ const session = await createSession({
1142
+ agentName: "TestAgent",
1143
+ thread: { mode: "new", threadId: "thread-save" },
1144
+ runAgent: createScriptedRunAgent([
1145
+ {
1146
+ message: "doing work",
1147
+ toolCalls: [{ id: "tc-1", name: "WriteTasks", args: {} }],
1148
+ },
1149
+ { message: "done", toolCalls: [] },
1150
+ ]),
1151
+ threadOps: ops,
1152
+ tools: { WriteTasks: writeTasks },
1153
+ buildContextMessage: () => "go",
1154
+ });
1155
+
1156
+ const stateManager = createAgentStateManager<{ note: string }>({
1157
+ initialState: { systemPrompt: "test", note: "hello" },
1158
+ });
1159
+
1160
+ stateManager.setTask({
1161
+ id: "task-A",
1162
+ subject: "A",
1163
+ description: "A",
1164
+ activeForm: "doing A",
1165
+ status: "in_progress",
1166
+ metadata: { priority: "high" },
1167
+ blockedBy: [],
1168
+ blocks: [],
1169
+ });
1170
+
1171
+ const result = await session.runSession({ stateManager });
1172
+ expect(result.exitReason).toBe("completed");
1173
+
1174
+ const saves = log.filter((l) => l.op === "saveThreadState");
1175
+ expect(saves).toHaveLength(1);
1176
+ const saved = stateStore.get("thread-save");
1177
+ expect(saved).toBeDefined();
1178
+ expect(saved?.tasks).toHaveLength(1);
1179
+ if (saved) {
1180
+ expect(at(saved.tasks, 0)[0]).toBe("task-A");
1181
+ }
1182
+ expect(saved?.custom).toEqual({ note: "hello" });
1183
+ });
1184
+
1185
+ it("rehydrates tasks + custom state on continue before the agent loop runs", async () => {
1186
+ const { ops, stateStore } = createMockThreadOps();
1187
+
1188
+ stateStore.set("thread-cont", {
1189
+ tasks: [
1190
+ [
1191
+ "task-restored",
1192
+ {
1193
+ id: "task-restored",
1194
+ subject: "restored",
1195
+ description: "restored",
1196
+ activeForm: "restoring",
1197
+ status: "pending",
1198
+ metadata: {},
1199
+ blockedBy: [],
1200
+ blocks: [],
1201
+ },
1202
+ ],
1203
+ ],
1204
+ custom: { label: "from-prior-run" },
1205
+ });
1206
+
1207
+ type State = { label: string };
1208
+ let observedTasksBeforeFirstTurn: string[] = [];
1209
+ let observedLabelBeforeFirstTurn: string | undefined;
1210
+
1211
+ const session = await createSession({
1212
+ agentName: "TestAgent",
1213
+ thread: { mode: "continue", threadId: "thread-cont" },
1214
+ runAgent: async () => {
1215
+ observedTasksBeforeFirstTurn = stateManager.getTasks().map((t) => t.id);
1216
+ observedLabelBeforeFirstTurn = stateManager.get("label");
1217
+ return { message: "done", rawToolCalls: [], usage: undefined };
1218
+ },
1219
+ threadOps: ops,
1220
+ buildContextMessage: () => "continue please",
1221
+ });
1222
+
1223
+ const stateManager = createAgentStateManager<State>({
1224
+ initialState: { systemPrompt: "test", label: "initial" },
1225
+ });
1226
+
1227
+ await session.runSession({ stateManager });
1228
+
1229
+ expect(observedTasksBeforeFirstTurn).toEqual(["task-restored"]);
1230
+ expect(observedLabelBeforeFirstTurn).toBe("from-prior-run");
1231
+ });
1232
+
1233
+ it("fork copies the source thread's state slice into the new thread", async () => {
1234
+ const { ops, log, stateStore } = createMockThreadOps();
1235
+
1236
+ stateStore.set("source-thread", {
1237
+ tasks: [
1238
+ [
1239
+ "task-src",
1240
+ {
1241
+ id: "task-src",
1242
+ subject: "src",
1243
+ description: "src",
1244
+ activeForm: "src",
1245
+ status: "completed",
1246
+ metadata: {},
1247
+ blockedBy: [],
1248
+ blocks: [],
1249
+ },
1250
+ ],
1251
+ ],
1252
+ custom: { counter: 3 },
1253
+ });
1254
+
1255
+ const session = await createSession({
1256
+ agentName: "TestAgent",
1257
+ thread: { mode: "fork", threadId: "source-thread" },
1258
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1259
+ threadOps: ops,
1260
+ buildContextMessage: () => "continue",
1261
+ });
1262
+
1263
+ type State = { counter: number };
1264
+ const stateManager = createAgentStateManager<State>({
1265
+ initialState: { systemPrompt: "test", counter: 0 },
1266
+ });
1267
+
1268
+ const result = await session.runSession({ stateManager });
1269
+ expect(result.exitReason).toBe("completed");
1270
+
1271
+ const forkOps = log.filter((l) => l.op === "forkThread");
1272
+ expect(forkOps).toHaveLength(1);
1273
+ expect(at(forkOps, 0).args[0]).toBe("source-thread");
1274
+
1275
+ expect(stateManager.getTask("task-src")).toBeDefined();
1276
+ expect(stateManager.get("counter")).toBe(3);
1277
+
1278
+ const newThreadSlice = stateStore.get(result.threadId);
1279
+ expect(newThreadSlice?.tasks).toHaveLength(1);
1280
+ });
1112
1281
  });