zeitlich 0.2.38 → 0.2.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -0
- package/dist/{activities-BKhMtKDd.d.ts → activities-Bmu7XnaG.d.ts} +4 -6
- package/dist/{activities-CDcwkRZs.d.cts → activities-ByBFLvm2.d.cts} +4 -6
- package/dist/adapter-id-BB-mmrts.d.cts +17 -0
- package/dist/adapter-id-BB-mmrts.d.ts +17 -0
- package/dist/adapter-id-CMwVrVqv.d.cts +17 -0
- package/dist/adapter-id-CMwVrVqv.d.ts +17 -0
- package/dist/adapter-id-CbY2zeSt.d.cts +17 -0
- package/dist/adapter-id-CbY2zeSt.d.ts +17 -0
- package/dist/adapters/thread/anthropic/index.cjs +140 -23
- package/dist/adapters/thread/anthropic/index.cjs.map +1 -1
- package/dist/adapters/thread/anthropic/index.d.cts +8 -7
- package/dist/adapters/thread/anthropic/index.d.ts +8 -7
- package/dist/adapters/thread/anthropic/index.js +140 -24
- package/dist/adapters/thread/anthropic/index.js.map +1 -1
- package/dist/adapters/thread/anthropic/workflow.cjs +8 -3
- package/dist/adapters/thread/anthropic/workflow.cjs.map +1 -1
- package/dist/adapters/thread/anthropic/workflow.d.cts +5 -4
- package/dist/adapters/thread/anthropic/workflow.d.ts +5 -4
- package/dist/adapters/thread/anthropic/workflow.js +8 -4
- package/dist/adapters/thread/anthropic/workflow.js.map +1 -1
- package/dist/adapters/thread/google-genai/index.cjs +140 -23
- package/dist/adapters/thread/google-genai/index.cjs.map +1 -1
- package/dist/adapters/thread/google-genai/index.d.cts +5 -4
- package/dist/adapters/thread/google-genai/index.d.ts +5 -4
- package/dist/adapters/thread/google-genai/index.js +140 -24
- package/dist/adapters/thread/google-genai/index.js.map +1 -1
- package/dist/adapters/thread/google-genai/workflow.cjs +8 -3
- package/dist/adapters/thread/google-genai/workflow.cjs.map +1 -1
- package/dist/adapters/thread/google-genai/workflow.d.cts +5 -4
- package/dist/adapters/thread/google-genai/workflow.d.ts +5 -4
- package/dist/adapters/thread/google-genai/workflow.js +8 -4
- package/dist/adapters/thread/google-genai/workflow.js.map +1 -1
- package/dist/adapters/thread/index.cjs +16 -0
- package/dist/adapters/thread/index.cjs.map +1 -0
- package/dist/adapters/thread/index.d.cts +34 -0
- package/dist/adapters/thread/index.d.ts +34 -0
- package/dist/adapters/thread/index.js +12 -0
- package/dist/adapters/thread/index.js.map +1 -0
- package/dist/adapters/thread/langchain/index.cjs +139 -24
- package/dist/adapters/thread/langchain/index.cjs.map +1 -1
- package/dist/adapters/thread/langchain/index.d.cts +8 -7
- package/dist/adapters/thread/langchain/index.d.ts +8 -7
- package/dist/adapters/thread/langchain/index.js +139 -25
- package/dist/adapters/thread/langchain/index.js.map +1 -1
- package/dist/adapters/thread/langchain/workflow.cjs +8 -3
- package/dist/adapters/thread/langchain/workflow.cjs.map +1 -1
- package/dist/adapters/thread/langchain/workflow.d.cts +5 -4
- package/dist/adapters/thread/langchain/workflow.d.ts +5 -4
- package/dist/adapters/thread/langchain/workflow.js +8 -4
- package/dist/adapters/thread/langchain/workflow.js.map +1 -1
- package/dist/index.cjs +266 -48
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -6
- package/dist/index.d.ts +6 -6
- package/dist/index.js +263 -49
- package/dist/index.js.map +1 -1
- package/dist/{proxy-D_3x7RN4.d.cts → proxy-BAKzNGRq.d.cts} +1 -1
- package/dist/{proxy-CUlKSvZS.d.ts → proxy-DO_MXbY4.d.ts} +1 -1
- package/dist/{thread-manager-CVu7o2cs.d.ts → thread-manager-CcRXasqs.d.ts} +2 -4
- package/dist/{thread-manager-HSwyh28L.d.cts → thread-manager-ClwSaUnj.d.cts} +2 -4
- package/dist/{thread-manager-c1gPopAG.d.ts → thread-manager-D-7lp1JK.d.ts} +2 -4
- package/dist/{thread-manager-wGi-LqIP.d.cts → thread-manager-Y8Ucf0Tf.d.cts} +2 -4
- package/dist/{types-C06FwR96.d.cts → types-Bcbiq8iv.d.cts} +162 -44
- package/dist/{types-BH_IRryz.d.ts → types-DpHTX-iO.d.ts} +54 -6
- package/dist/{types-DNr31FzL.d.ts → types-Dt8-HBBT.d.ts} +162 -44
- package/dist/{types-BaOw4hKI.d.cts → types-hFFi-Zd9.d.cts} +54 -6
- package/dist/{workflow-CSCkpwAL.d.ts → workflow-Bmf9EtDW.d.ts} +82 -2
- package/dist/{workflow-DuvMZ8Vm.d.cts → workflow-Bx9utBwb.d.cts} +82 -2
- package/dist/workflow.cjs +188 -37
- package/dist/workflow.cjs.map +1 -1
- package/dist/workflow.d.cts +2 -2
- package/dist/workflow.d.ts +2 -2
- package/dist/workflow.js +185 -38
- package/dist/workflow.js.map +1 -1
- package/package.json +11 -1
- package/src/adapters/thread/adapter-id.test.ts +42 -0
- package/src/adapters/thread/anthropic/activities.ts +33 -7
- package/src/adapters/thread/anthropic/adapter-id.ts +16 -0
- package/src/adapters/thread/anthropic/fork-transform.test.ts +291 -0
- package/src/adapters/thread/anthropic/index.ts +3 -0
- package/src/adapters/thread/anthropic/model-invoker.ts +8 -4
- package/src/adapters/thread/anthropic/proxy.ts +3 -2
- package/src/adapters/thread/anthropic/thread-manager.ts +27 -4
- package/src/adapters/thread/google-genai/activities.ts +33 -7
- package/src/adapters/thread/google-genai/adapter-id.ts +16 -0
- package/src/adapters/thread/google-genai/fork-transform.test.ts +149 -0
- package/src/adapters/thread/google-genai/index.ts +3 -0
- package/src/adapters/thread/google-genai/model-invoker.ts +7 -3
- package/src/adapters/thread/google-genai/proxy.ts +3 -2
- package/src/adapters/thread/google-genai/thread-manager.ts +27 -4
- package/src/adapters/thread/index.ts +39 -0
- package/src/adapters/thread/langchain/activities.ts +33 -7
- package/src/adapters/thread/langchain/adapter-id.ts +16 -0
- package/src/adapters/thread/langchain/fork-transform.test.ts +142 -0
- package/src/adapters/thread/langchain/index.ts +3 -0
- package/src/adapters/thread/langchain/model-invoker.ts +8 -3
- package/src/adapters/thread/langchain/proxy.ts +3 -2
- package/src/adapters/thread/langchain/thread-manager.ts +27 -4
- package/src/lib/lifecycle.ts +3 -1
- package/src/lib/model/types.ts +7 -10
- package/src/lib/session/session-edge-cases.integration.test.ts +131 -63
- package/src/lib/session/session.integration.test.ts +174 -5
- package/src/lib/session/session.ts +68 -28
- package/src/lib/session/types.ts +60 -9
- package/src/lib/state/index.ts +1 -0
- package/src/lib/state/manager.integration.test.ts +109 -0
- package/src/lib/state/manager.ts +38 -8
- package/src/lib/state/types.ts +25 -0
- package/src/lib/subagent/handler.ts +124 -11
- package/src/lib/subagent/index.ts +5 -1
- package/src/lib/subagent/subagent.integration.test.ts +528 -0
- package/src/lib/subagent/types.ts +63 -14
- package/src/lib/subagent/workflow.ts +29 -2
- package/src/lib/thread/index.ts +5 -0
- package/src/lib/thread/keys.test.ts +101 -0
- package/src/lib/thread/keys.ts +94 -0
- package/src/lib/thread/manager.test.ts +139 -0
- package/src/lib/thread/manager.ts +92 -14
- package/src/lib/thread/proxy.ts +2 -0
- package/src/lib/thread/types.ts +60 -6
- package/src/lib/tool-router/types.ts +16 -8
- package/src/lib/types.ts +12 -0
- package/src/workflow.ts +12 -1
- package/tsup.config.ts +1 -0
|
@@ -2,6 +2,7 @@ import { describe, expect, it, vi, beforeEach } from "vitest";
|
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import type { ToolResultConfig, TokenUsage } from "../types";
|
|
4
4
|
import type { ThreadOps } from "./types";
|
|
5
|
+
import type { PersistedThreadState } from "../state/types";
|
|
5
6
|
import type { RunAgentActivity } from "../model/types";
|
|
6
7
|
import type { RawToolCall } from "../tool-router/types";
|
|
7
8
|
import type { SandboxOps } from "../sandbox/types";
|
|
@@ -73,7 +74,6 @@ type TurnScript = {
|
|
|
73
74
|
message: unknown;
|
|
74
75
|
toolCalls: RawToolCall[];
|
|
75
76
|
usage?: TokenUsage;
|
|
76
|
-
threadLengthAtCall?: number;
|
|
77
77
|
};
|
|
78
78
|
|
|
79
79
|
/**
|
|
@@ -96,6 +96,7 @@ function toActivityInterface<TContent = string>(
|
|
|
96
96
|
|
|
97
97
|
function createMockThreadOps() {
|
|
98
98
|
const log: { op: string; args: unknown[] }[] = [];
|
|
99
|
+
const stateStore = new Map<string, PersistedThreadState>();
|
|
99
100
|
const ops = toActivityInterface({
|
|
100
101
|
initializeThread: async (threadId) => {
|
|
101
102
|
log.push({ op: "initializeThread", args: [threadId] });
|
|
@@ -114,33 +115,43 @@ function createMockThreadOps() {
|
|
|
114
115
|
},
|
|
115
116
|
forkThread: async (source, target) => {
|
|
116
117
|
log.push({ op: "forkThread", args: [source, target] });
|
|
118
|
+
const src = stateStore.get(source);
|
|
119
|
+
if (src) stateStore.set(target, src);
|
|
117
120
|
},
|
|
118
|
-
truncateThread: async (threadId,
|
|
119
|
-
log.push({ op: "truncateThread", args: [threadId,
|
|
121
|
+
truncateThread: async (threadId, messageId) => {
|
|
122
|
+
log.push({ op: "truncateThread", args: [threadId, messageId] });
|
|
123
|
+
},
|
|
124
|
+
loadThreadState: async (threadId) => {
|
|
125
|
+
log.push({ op: "loadThreadState", args: [threadId] });
|
|
126
|
+
return stateStore.get(threadId) ?? null;
|
|
127
|
+
},
|
|
128
|
+
saveThreadState: async (threadId, state) => {
|
|
129
|
+
log.push({ op: "saveThreadState", args: [threadId, state] });
|
|
130
|
+
stateStore.set(threadId, state);
|
|
120
131
|
},
|
|
121
132
|
});
|
|
122
|
-
return { ops, log };
|
|
133
|
+
return { ops, log, stateStore };
|
|
123
134
|
}
|
|
124
135
|
|
|
125
136
|
function createScriptedRunAgent(
|
|
126
|
-
turns: TurnScript[]
|
|
137
|
+
turns: TurnScript[],
|
|
138
|
+
assistantIdLog?: string[]
|
|
127
139
|
): RunAgentActivity<unknown> {
|
|
128
140
|
let call = 0;
|
|
129
|
-
return async () => {
|
|
141
|
+
return async (config) => {
|
|
142
|
+
assistantIdLog?.push(config.assistantMessageId);
|
|
130
143
|
const turn = turns[call++];
|
|
131
144
|
if (!turn) {
|
|
132
145
|
return {
|
|
133
146
|
message: "done",
|
|
134
147
|
rawToolCalls: [],
|
|
135
148
|
usage: undefined,
|
|
136
|
-
threadLengthAtCall: 0,
|
|
137
149
|
};
|
|
138
150
|
}
|
|
139
151
|
return {
|
|
140
152
|
message: turn.message,
|
|
141
153
|
rawToolCalls: turn.toolCalls,
|
|
142
154
|
usage: turn.usage,
|
|
143
|
-
threadLengthAtCall: turn.threadLengthAtCall ?? 0,
|
|
144
155
|
};
|
|
145
156
|
};
|
|
146
157
|
}
|
|
@@ -797,8 +808,15 @@ describe("createSession edge cases", () => {
|
|
|
797
808
|
forkThread: async (source, target) => {
|
|
798
809
|
log.push({ op: "forkThread", args: [source, target] });
|
|
799
810
|
},
|
|
800
|
-
truncateThread: async (threadId,
|
|
801
|
-
log.push({ op: "truncateThread", args: [threadId,
|
|
811
|
+
truncateThread: async (threadId, messageId) => {
|
|
812
|
+
log.push({ op: "truncateThread", args: [threadId, messageId] });
|
|
813
|
+
},
|
|
814
|
+
loadThreadState: async (threadId) => {
|
|
815
|
+
log.push({ op: "loadThreadState", args: [threadId] });
|
|
816
|
+
return null;
|
|
817
|
+
},
|
|
818
|
+
saveThreadState: async (threadId, state) => {
|
|
819
|
+
log.push({ op: "saveThreadState", args: [threadId, state] });
|
|
802
820
|
},
|
|
803
821
|
});
|
|
804
822
|
|
|
@@ -1686,6 +1704,14 @@ describe("createSession edge cases", () => {
|
|
|
1686
1704
|
});
|
|
1687
1705
|
|
|
1688
1706
|
// --- Rewind flow: tool requests rewind and turn is retried -------------
|
|
1707
|
+
//
|
|
1708
|
+
// The session no longer issues an explicit truncateThread on rewind.
|
|
1709
|
+
// Instead it reuses the pre-generated assistantMessageId for the retry,
|
|
1710
|
+
// and the runAgent activity itself truncates the thread from that id
|
|
1711
|
+
// on entry. These tests assert the observable behaviour: the rewinding
|
|
1712
|
+
// tool's result is not appended, turns are consumed as expected, and
|
|
1713
|
+
// the retry invocation receives the same assistantMessageId so the
|
|
1714
|
+
// invoker can wipe the prior attempt.
|
|
1689
1715
|
|
|
1690
1716
|
it("rewinds the turn when a tool handler returns rewind:true", async () => {
|
|
1691
1717
|
const { ops, log } = createMockThreadOps();
|
|
@@ -1708,20 +1734,24 @@ describe("createSession edge cases", () => {
|
|
|
1708
1734
|
},
|
|
1709
1735
|
});
|
|
1710
1736
|
|
|
1737
|
+
const assistantIds: string[] = [];
|
|
1711
1738
|
const session = await createSession({
|
|
1712
1739
|
agentName: "TestAgent",
|
|
1713
1740
|
thread: { mode: "new", threadId: "thread-1" },
|
|
1714
|
-
runAgent: createScriptedRunAgent(
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1741
|
+
runAgent: createScriptedRunAgent(
|
|
1742
|
+
[
|
|
1743
|
+
{
|
|
1744
|
+
message: "attempt-1",
|
|
1745
|
+
toolCalls: [{ id: "tc-1", name: "Rewind", args: {} }],
|
|
1746
|
+
},
|
|
1747
|
+
{
|
|
1748
|
+
message: "attempt-2",
|
|
1749
|
+
toolCalls: [{ id: "tc-2", name: "Rewind", args: {} }],
|
|
1750
|
+
},
|
|
1751
|
+
{ message: "done", toolCalls: [] },
|
|
1752
|
+
],
|
|
1753
|
+
assistantIds
|
|
1754
|
+
),
|
|
1725
1755
|
threadOps: ops,
|
|
1726
1756
|
tools: { Rewind: rewindTool },
|
|
1727
1757
|
buildContextMessage: () => "go",
|
|
@@ -1737,8 +1767,16 @@ describe("createSession edge cases", () => {
|
|
|
1737
1767
|
expect(result.finalMessage).toBe("done");
|
|
1738
1768
|
expect(rewindAttempts).toBe(2);
|
|
1739
1769
|
|
|
1770
|
+
// Session does not call truncateThread directly on rewind — the
|
|
1771
|
+
// invoker truncates on entry via the reused assistantMessageId.
|
|
1740
1772
|
const truncateOps = log.filter((l) => l.op === "truncateThread");
|
|
1741
|
-
expect(truncateOps).toHaveLength(
|
|
1773
|
+
expect(truncateOps).toHaveLength(0);
|
|
1774
|
+
|
|
1775
|
+
// The first and second calls reuse the same assistantMessageId
|
|
1776
|
+
// (rewind retry), then the third uses a fresh id.
|
|
1777
|
+
expect(assistantIds).toHaveLength(3);
|
|
1778
|
+
expect(assistantIds[0]).toBe(assistantIds[1]);
|
|
1779
|
+
expect(assistantIds[1]).not.toBe(assistantIds[2]);
|
|
1742
1780
|
|
|
1743
1781
|
const noRewindToolResult = log.filter((l) => {
|
|
1744
1782
|
if (l.op !== "appendToolResult") return false;
|
|
@@ -1749,9 +1787,15 @@ describe("createSession edge cases", () => {
|
|
|
1749
1787
|
|
|
1750
1788
|
const agentAppends = log.filter((l) => l.op === "appendAgentMessage");
|
|
1751
1789
|
expect(agentAppends).toHaveLength(3);
|
|
1790
|
+
// The first two assistant appends reuse the same id — the second
|
|
1791
|
+
// will be a no-op in the real adapter because truncateFromId clears
|
|
1792
|
+
// the dedup marker for the old one before the retry invocation.
|
|
1793
|
+
const asstIds = agentAppends.map((l) => l.args[1]);
|
|
1794
|
+
expect(asstIds[0]).toBe(asstIds[1]);
|
|
1795
|
+
expect(asstIds[1]).not.toBe(asstIds[2]);
|
|
1752
1796
|
});
|
|
1753
1797
|
|
|
1754
|
-
it("
|
|
1798
|
+
it("reuses the assistantMessageId on rewind even with sibling tool calls", async () => {
|
|
1755
1799
|
const { ops, log } = createMockThreadOps();
|
|
1756
1800
|
|
|
1757
1801
|
let rewindFired = false;
|
|
@@ -1776,22 +1820,23 @@ describe("createSession edge cases", () => {
|
|
|
1776
1820
|
},
|
|
1777
1821
|
});
|
|
1778
1822
|
|
|
1823
|
+
const assistantIds: string[] = [];
|
|
1779
1824
|
const session = await createSession({
|
|
1780
1825
|
agentName: "TestAgent",
|
|
1781
1826
|
thread: { mode: "new", threadId: "thread-1" },
|
|
1782
|
-
runAgent: createScriptedRunAgent(
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1827
|
+
runAgent: createScriptedRunAgent(
|
|
1828
|
+
[
|
|
1829
|
+
{
|
|
1830
|
+
message: "parallel",
|
|
1831
|
+
toolCalls: [
|
|
1832
|
+
{ id: "tc-sibling", name: "Sibling", args: {} },
|
|
1833
|
+
{ id: "tc-rewind", name: "Rewind", args: {} },
|
|
1834
|
+
],
|
|
1835
|
+
},
|
|
1836
|
+
{ message: "done", toolCalls: [] },
|
|
1837
|
+
],
|
|
1838
|
+
assistantIds
|
|
1839
|
+
),
|
|
1795
1840
|
threadOps: ops,
|
|
1796
1841
|
tools: { Rewind: rewindTool, Sibling: siblingTool },
|
|
1797
1842
|
buildContextMessage: () => "go",
|
|
@@ -1805,13 +1850,15 @@ describe("createSession edge cases", () => {
|
|
|
1805
1850
|
|
|
1806
1851
|
expect(result.exitReason).toBe("completed");
|
|
1807
1852
|
|
|
1808
|
-
//
|
|
1809
|
-
//
|
|
1853
|
+
// No explicit truncate from the session — the invoker will do it
|
|
1854
|
+
// on entry using the reused assistantMessageId.
|
|
1810
1855
|
const truncateOps = log.filter((l) => l.op === "truncateThread");
|
|
1811
|
-
expect(truncateOps).toHaveLength(
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1856
|
+
expect(truncateOps).toHaveLength(0);
|
|
1857
|
+
|
|
1858
|
+
// The rewound turn and its retry share one assistantMessageId; the
|
|
1859
|
+
// final `done` turn gets a fresh one.
|
|
1860
|
+
expect(assistantIds).toHaveLength(2);
|
|
1861
|
+
expect(assistantIds[0]).toBe(assistantIds[1]);
|
|
1815
1862
|
|
|
1816
1863
|
// Rewinding tool never appends its own result.
|
|
1817
1864
|
const rewindResultAppends = log.filter((l) => {
|
|
@@ -1821,10 +1868,11 @@ describe("createSession edge cases", () => {
|
|
|
1821
1868
|
});
|
|
1822
1869
|
expect(rewindResultAppends).toHaveLength(0);
|
|
1823
1870
|
|
|
1824
|
-
// Two assistant messages expected: one from the rewound turn, one
|
|
1825
|
-
// the successful retry.
|
|
1871
|
+
// Two assistant messages expected: one from the rewound turn, one
|
|
1872
|
+
// from the successful retry — sharing the same id.
|
|
1826
1873
|
const agentAppends = log.filter((l) => l.op === "appendAgentMessage");
|
|
1827
1874
|
expect(agentAppends).toHaveLength(2);
|
|
1875
|
+
expect(agentAppends[0]?.args[1]).toBe(agentAppends[1]?.args[1]);
|
|
1828
1876
|
});
|
|
1829
1877
|
|
|
1830
1878
|
it("does not rewind when the rewinding tool is no longer present after retry", async () => {
|
|
@@ -1844,21 +1892,25 @@ describe("createSession edge cases", () => {
|
|
|
1844
1892
|
},
|
|
1845
1893
|
});
|
|
1846
1894
|
|
|
1895
|
+
const assistantIds: string[] = [];
|
|
1847
1896
|
const session = await createSession({
|
|
1848
1897
|
agentName: "TestAgent",
|
|
1849
1898
|
thread: { mode: "new", threadId: "thread-1" },
|
|
1850
1899
|
maxTurns: 5,
|
|
1851
|
-
runAgent: createScriptedRunAgent(
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1900
|
+
runAgent: createScriptedRunAgent(
|
|
1901
|
+
[
|
|
1902
|
+
{
|
|
1903
|
+
message: "call-1",
|
|
1904
|
+
toolCalls: [{ id: "tc-1", name: "RewindOnce", args: {} }],
|
|
1905
|
+
},
|
|
1906
|
+
{
|
|
1907
|
+
message: "call-2",
|
|
1908
|
+
toolCalls: [{ id: "tc-2", name: "RewindOnce", args: {} }],
|
|
1909
|
+
},
|
|
1910
|
+
{ message: "done", toolCalls: [] },
|
|
1911
|
+
],
|
|
1912
|
+
assistantIds
|
|
1913
|
+
),
|
|
1862
1914
|
threadOps: ops,
|
|
1863
1915
|
tools: { RewindOnce: rewindOnce },
|
|
1864
1916
|
buildContextMessage: () => "go",
|
|
@@ -1878,7 +1930,12 @@ describe("createSession edge cases", () => {
|
|
|
1878
1930
|
expect(attempts).toBe(2);
|
|
1879
1931
|
|
|
1880
1932
|
const truncateOps = log.filter((l) => l.op === "truncateThread");
|
|
1881
|
-
expect(truncateOps).toHaveLength(
|
|
1933
|
+
expect(truncateOps).toHaveLength(0);
|
|
1934
|
+
|
|
1935
|
+
// Turn 1 rewound → call 1 & 2 share an id, call 3 fresh.
|
|
1936
|
+
expect(assistantIds).toHaveLength(3);
|
|
1937
|
+
expect(assistantIds[0]).toBe(assistantIds[1]);
|
|
1938
|
+
expect(assistantIds[1]).not.toBe(assistantIds[2]);
|
|
1882
1939
|
});
|
|
1883
1940
|
|
|
1884
1941
|
it("bails out with max_turns when a tool keeps requesting rewind", async () => {
|
|
@@ -1895,16 +1952,20 @@ describe("createSession edge cases", () => {
|
|
|
1895
1952
|
},
|
|
1896
1953
|
});
|
|
1897
1954
|
|
|
1955
|
+
const assistantIds: string[] = [];
|
|
1898
1956
|
const session = await createSession({
|
|
1899
1957
|
agentName: "TestAgent",
|
|
1900
1958
|
thread: { mode: "new", threadId: "thread-1" },
|
|
1901
1959
|
maxTurns: 3,
|
|
1902
|
-
runAgent: createScriptedRunAgent(
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1960
|
+
runAgent: createScriptedRunAgent(
|
|
1961
|
+
[
|
|
1962
|
+
{ message: "t1", toolCalls: [{ id: "tc-1", name: "AlwaysRewind", args: {} }] },
|
|
1963
|
+
{ message: "t2", toolCalls: [{ id: "tc-2", name: "AlwaysRewind", args: {} }] },
|
|
1964
|
+
{ message: "t3", toolCalls: [{ id: "tc-3", name: "AlwaysRewind", args: {} }] },
|
|
1965
|
+
{ message: "t4", toolCalls: [{ id: "tc-4", name: "AlwaysRewind", args: {} }] },
|
|
1966
|
+
],
|
|
1967
|
+
assistantIds
|
|
1968
|
+
),
|
|
1908
1969
|
threadOps: ops,
|
|
1909
1970
|
tools: { AlwaysRewind: alwaysRewind },
|
|
1910
1971
|
buildContextMessage: () => "go",
|
|
@@ -1920,7 +1981,14 @@ describe("createSession edge cases", () => {
|
|
|
1920
1981
|
expect(result.usage.turns).toBe(3);
|
|
1921
1982
|
expect(attempts).toBe(3);
|
|
1922
1983
|
|
|
1984
|
+
// Session does not issue explicit truncates; invoker-side
|
|
1985
|
+
// truncation isn't visible here because runAgent is mocked.
|
|
1923
1986
|
const truncateOps = log.filter((l) => l.op === "truncateThread");
|
|
1924
|
-
expect(truncateOps).toHaveLength(
|
|
1987
|
+
expect(truncateOps).toHaveLength(0);
|
|
1988
|
+
|
|
1989
|
+
// Every attempt reuses the same assistantMessageId — the LLM call
|
|
1990
|
+
// truncates-from-id on each replay.
|
|
1991
|
+
expect(assistantIds).toHaveLength(3);
|
|
1992
|
+
expect(new Set(assistantIds).size).toBe(1);
|
|
1925
1993
|
});
|
|
1926
1994
|
});
|
|
@@ -2,6 +2,7 @@ import { describe, expect, it, vi, beforeEach } from "vitest";
|
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import type { ToolResultConfig, TokenUsage } from "../types";
|
|
4
4
|
import type { ThreadOps } from "./types";
|
|
5
|
+
import type { PersistedThreadState } from "../state/types";
|
|
5
6
|
import type { RunAgentActivity } from "../model/types";
|
|
6
7
|
import type { RawToolCall } from "../tool-router/types";
|
|
7
8
|
import type { SandboxOps } from "../sandbox/types";
|
|
@@ -97,6 +98,7 @@ function toActivityInterface(raw: ThreadOps): ActivityInterfaceFor<ThreadOps> {
|
|
|
97
98
|
|
|
98
99
|
function createMockThreadOps() {
|
|
99
100
|
const log: { op: string; args: unknown[] }[] = [];
|
|
101
|
+
const stateStore = new Map<string, PersistedThreadState>();
|
|
100
102
|
|
|
101
103
|
const ops = toActivityInterface({
|
|
102
104
|
initializeThread: async (threadId) => {
|
|
@@ -116,13 +118,23 @@ function createMockThreadOps() {
|
|
|
116
118
|
},
|
|
117
119
|
forkThread: async (source, target) => {
|
|
118
120
|
log.push({ op: "forkThread", args: [source, target] });
|
|
121
|
+
const src = stateStore.get(source);
|
|
122
|
+
if (src) stateStore.set(target, src);
|
|
119
123
|
},
|
|
120
|
-
truncateThread: async (threadId,
|
|
121
|
-
log.push({ op: "truncateThread", args: [threadId,
|
|
124
|
+
truncateThread: async (threadId, messageId) => {
|
|
125
|
+
log.push({ op: "truncateThread", args: [threadId, messageId] });
|
|
126
|
+
},
|
|
127
|
+
loadThreadState: async (threadId) => {
|
|
128
|
+
log.push({ op: "loadThreadState", args: [threadId] });
|
|
129
|
+
return stateStore.get(threadId) ?? null;
|
|
130
|
+
},
|
|
131
|
+
saveThreadState: async (threadId, state) => {
|
|
132
|
+
log.push({ op: "saveThreadState", args: [threadId, state] });
|
|
133
|
+
stateStore.set(threadId, state);
|
|
122
134
|
},
|
|
123
135
|
});
|
|
124
136
|
|
|
125
|
-
return { ops, log };
|
|
137
|
+
return { ops, log, stateStore };
|
|
126
138
|
}
|
|
127
139
|
|
|
128
140
|
type TurnScript = {
|
|
@@ -142,14 +154,12 @@ function createScriptedRunAgent(
|
|
|
142
154
|
message: "done",
|
|
143
155
|
rawToolCalls: [],
|
|
144
156
|
usage: undefined,
|
|
145
|
-
threadLengthAtCall: 0,
|
|
146
157
|
};
|
|
147
158
|
}
|
|
148
159
|
return {
|
|
149
160
|
message: turn.message,
|
|
150
161
|
rawToolCalls: turn.toolCalls,
|
|
151
162
|
usage: turn.usage,
|
|
152
|
-
threadLengthAtCall: 0,
|
|
153
163
|
};
|
|
154
164
|
};
|
|
155
165
|
}
|
|
@@ -1109,4 +1119,163 @@ describe("createSession integration", () => {
|
|
|
1109
1119
|
]);
|
|
1110
1120
|
expect(sandboxLog).not.toContain("create");
|
|
1111
1121
|
});
|
|
1122
|
+
|
|
1123
|
+
// --- Persistent thread state ---
|
|
1124
|
+
|
|
1125
|
+
it("saves tasks + custom state to the thread store on session exit", async () => {
|
|
1126
|
+
const { ops, log, stateStore } = createMockThreadOps();
|
|
1127
|
+
|
|
1128
|
+
const writeTasks = defineTool({
|
|
1129
|
+
name: "WriteTasks" as const,
|
|
1130
|
+
description: "create tasks via state manager",
|
|
1131
|
+
schema: z.object({}),
|
|
1132
|
+
handler: async (
|
|
1133
|
+
_args: Record<string, never>,
|
|
1134
|
+
_ctx: RouterContext
|
|
1135
|
+
): Promise<ToolHandlerResponse<null>> => ({
|
|
1136
|
+
toolResponse: "ok",
|
|
1137
|
+
data: null,
|
|
1138
|
+
}),
|
|
1139
|
+
});
|
|
1140
|
+
|
|
1141
|
+
const session = await createSession({
|
|
1142
|
+
agentName: "TestAgent",
|
|
1143
|
+
thread: { mode: "new", threadId: "thread-save" },
|
|
1144
|
+
runAgent: createScriptedRunAgent([
|
|
1145
|
+
{
|
|
1146
|
+
message: "doing work",
|
|
1147
|
+
toolCalls: [{ id: "tc-1", name: "WriteTasks", args: {} }],
|
|
1148
|
+
},
|
|
1149
|
+
{ message: "done", toolCalls: [] },
|
|
1150
|
+
]),
|
|
1151
|
+
threadOps: ops,
|
|
1152
|
+
tools: { WriteTasks: writeTasks },
|
|
1153
|
+
buildContextMessage: () => "go",
|
|
1154
|
+
});
|
|
1155
|
+
|
|
1156
|
+
const stateManager = createAgentStateManager<{ note: string }>({
|
|
1157
|
+
initialState: { systemPrompt: "test", note: "hello" },
|
|
1158
|
+
});
|
|
1159
|
+
|
|
1160
|
+
stateManager.setTask({
|
|
1161
|
+
id: "task-A",
|
|
1162
|
+
subject: "A",
|
|
1163
|
+
description: "A",
|
|
1164
|
+
activeForm: "doing A",
|
|
1165
|
+
status: "in_progress",
|
|
1166
|
+
metadata: { priority: "high" },
|
|
1167
|
+
blockedBy: [],
|
|
1168
|
+
blocks: [],
|
|
1169
|
+
});
|
|
1170
|
+
|
|
1171
|
+
const result = await session.runSession({ stateManager });
|
|
1172
|
+
expect(result.exitReason).toBe("completed");
|
|
1173
|
+
|
|
1174
|
+
const saves = log.filter((l) => l.op === "saveThreadState");
|
|
1175
|
+
expect(saves).toHaveLength(1);
|
|
1176
|
+
const saved = stateStore.get("thread-save");
|
|
1177
|
+
expect(saved).toBeDefined();
|
|
1178
|
+
expect(saved?.tasks).toHaveLength(1);
|
|
1179
|
+
if (saved) {
|
|
1180
|
+
expect(at(saved.tasks, 0)[0]).toBe("task-A");
|
|
1181
|
+
}
|
|
1182
|
+
expect(saved?.custom).toEqual({ note: "hello" });
|
|
1183
|
+
});
|
|
1184
|
+
|
|
1185
|
+
it("rehydrates tasks + custom state on continue before the agent loop runs", async () => {
|
|
1186
|
+
const { ops, stateStore } = createMockThreadOps();
|
|
1187
|
+
|
|
1188
|
+
stateStore.set("thread-cont", {
|
|
1189
|
+
tasks: [
|
|
1190
|
+
[
|
|
1191
|
+
"task-restored",
|
|
1192
|
+
{
|
|
1193
|
+
id: "task-restored",
|
|
1194
|
+
subject: "restored",
|
|
1195
|
+
description: "restored",
|
|
1196
|
+
activeForm: "restoring",
|
|
1197
|
+
status: "pending",
|
|
1198
|
+
metadata: {},
|
|
1199
|
+
blockedBy: [],
|
|
1200
|
+
blocks: [],
|
|
1201
|
+
},
|
|
1202
|
+
],
|
|
1203
|
+
],
|
|
1204
|
+
custom: { label: "from-prior-run" },
|
|
1205
|
+
});
|
|
1206
|
+
|
|
1207
|
+
type State = { label: string };
|
|
1208
|
+
let observedTasksBeforeFirstTurn: string[] = [];
|
|
1209
|
+
let observedLabelBeforeFirstTurn: string | undefined;
|
|
1210
|
+
|
|
1211
|
+
const session = await createSession({
|
|
1212
|
+
agentName: "TestAgent",
|
|
1213
|
+
thread: { mode: "continue", threadId: "thread-cont" },
|
|
1214
|
+
runAgent: async () => {
|
|
1215
|
+
observedTasksBeforeFirstTurn = stateManager.getTasks().map((t) => t.id);
|
|
1216
|
+
observedLabelBeforeFirstTurn = stateManager.get("label");
|
|
1217
|
+
return { message: "done", rawToolCalls: [], usage: undefined };
|
|
1218
|
+
},
|
|
1219
|
+
threadOps: ops,
|
|
1220
|
+
buildContextMessage: () => "continue please",
|
|
1221
|
+
});
|
|
1222
|
+
|
|
1223
|
+
const stateManager = createAgentStateManager<State>({
|
|
1224
|
+
initialState: { systemPrompt: "test", label: "initial" },
|
|
1225
|
+
});
|
|
1226
|
+
|
|
1227
|
+
await session.runSession({ stateManager });
|
|
1228
|
+
|
|
1229
|
+
expect(observedTasksBeforeFirstTurn).toEqual(["task-restored"]);
|
|
1230
|
+
expect(observedLabelBeforeFirstTurn).toBe("from-prior-run");
|
|
1231
|
+
});
|
|
1232
|
+
|
|
1233
|
+
it("fork copies the source thread's state slice into the new thread", async () => {
|
|
1234
|
+
const { ops, log, stateStore } = createMockThreadOps();
|
|
1235
|
+
|
|
1236
|
+
stateStore.set("source-thread", {
|
|
1237
|
+
tasks: [
|
|
1238
|
+
[
|
|
1239
|
+
"task-src",
|
|
1240
|
+
{
|
|
1241
|
+
id: "task-src",
|
|
1242
|
+
subject: "src",
|
|
1243
|
+
description: "src",
|
|
1244
|
+
activeForm: "src",
|
|
1245
|
+
status: "completed",
|
|
1246
|
+
metadata: {},
|
|
1247
|
+
blockedBy: [],
|
|
1248
|
+
blocks: [],
|
|
1249
|
+
},
|
|
1250
|
+
],
|
|
1251
|
+
],
|
|
1252
|
+
custom: { counter: 3 },
|
|
1253
|
+
});
|
|
1254
|
+
|
|
1255
|
+
const session = await createSession({
|
|
1256
|
+
agentName: "TestAgent",
|
|
1257
|
+
thread: { mode: "fork", threadId: "source-thread" },
|
|
1258
|
+
runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
|
|
1259
|
+
threadOps: ops,
|
|
1260
|
+
buildContextMessage: () => "continue",
|
|
1261
|
+
});
|
|
1262
|
+
|
|
1263
|
+
type State = { counter: number };
|
|
1264
|
+
const stateManager = createAgentStateManager<State>({
|
|
1265
|
+
initialState: { systemPrompt: "test", counter: 0 },
|
|
1266
|
+
});
|
|
1267
|
+
|
|
1268
|
+
const result = await session.runSession({ stateManager });
|
|
1269
|
+
expect(result.exitReason).toBe("completed");
|
|
1270
|
+
|
|
1271
|
+
const forkOps = log.filter((l) => l.op === "forkThread");
|
|
1272
|
+
expect(forkOps).toHaveLength(1);
|
|
1273
|
+
expect(at(forkOps, 0).args[0]).toBe("source-thread");
|
|
1274
|
+
|
|
1275
|
+
expect(stateManager.getTask("task-src")).toBeDefined();
|
|
1276
|
+
expect(stateManager.get("counter")).toBe(3);
|
|
1277
|
+
|
|
1278
|
+
const newThreadSlice = stateStore.get(result.threadId);
|
|
1279
|
+
expect(newThreadSlice?.tasks).toHaveLength(1);
|
|
1280
|
+
});
|
|
1112
1281
|
});
|