zeitlich 0.2.21 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +303 -105
  2. package/dist/adapters/sandbox/daytona/index.cjs +7 -1
  3. package/dist/adapters/sandbox/daytona/index.cjs.map +1 -1
  4. package/dist/adapters/sandbox/daytona/index.d.cts +3 -1
  5. package/dist/adapters/sandbox/daytona/index.d.ts +3 -1
  6. package/dist/adapters/sandbox/daytona/index.js +7 -1
  7. package/dist/adapters/sandbox/daytona/index.js.map +1 -1
  8. package/dist/adapters/sandbox/daytona/workflow.cjs +33 -0
  9. package/dist/adapters/sandbox/daytona/workflow.cjs.map +1 -0
  10. package/dist/adapters/sandbox/daytona/workflow.d.cts +27 -0
  11. package/dist/adapters/sandbox/daytona/workflow.d.ts +27 -0
  12. package/dist/adapters/sandbox/daytona/workflow.js +31 -0
  13. package/dist/adapters/sandbox/daytona/workflow.js.map +1 -0
  14. package/dist/adapters/sandbox/inmemory/index.cjs +18 -1
  15. package/dist/adapters/sandbox/inmemory/index.cjs.map +1 -1
  16. package/dist/adapters/sandbox/inmemory/index.d.cts +4 -2
  17. package/dist/adapters/sandbox/inmemory/index.d.ts +4 -2
  18. package/dist/adapters/sandbox/inmemory/index.js +18 -1
  19. package/dist/adapters/sandbox/inmemory/index.js.map +1 -1
  20. package/dist/adapters/sandbox/inmemory/workflow.cjs +33 -0
  21. package/dist/adapters/sandbox/inmemory/workflow.cjs.map +1 -0
  22. package/dist/adapters/sandbox/inmemory/workflow.d.cts +25 -0
  23. package/dist/adapters/sandbox/inmemory/workflow.d.ts +25 -0
  24. package/dist/adapters/sandbox/inmemory/workflow.js +31 -0
  25. package/dist/adapters/sandbox/inmemory/workflow.js.map +1 -0
  26. package/dist/adapters/sandbox/virtual/index.cjs +36 -9
  27. package/dist/adapters/sandbox/virtual/index.cjs.map +1 -1
  28. package/dist/adapters/sandbox/virtual/index.d.cts +8 -5
  29. package/dist/adapters/sandbox/virtual/index.d.ts +8 -5
  30. package/dist/adapters/sandbox/virtual/index.js +36 -9
  31. package/dist/adapters/sandbox/virtual/index.js.map +1 -1
  32. package/dist/adapters/sandbox/virtual/workflow.cjs +33 -0
  33. package/dist/adapters/sandbox/virtual/workflow.cjs.map +1 -0
  34. package/dist/adapters/sandbox/virtual/workflow.d.cts +27 -0
  35. package/dist/adapters/sandbox/virtual/workflow.d.ts +27 -0
  36. package/dist/adapters/sandbox/virtual/workflow.js +31 -0
  37. package/dist/adapters/sandbox/virtual/workflow.js.map +1 -0
  38. package/dist/adapters/thread/google-genai/index.cjs +9 -1
  39. package/dist/adapters/thread/google-genai/index.cjs.map +1 -1
  40. package/dist/adapters/thread/google-genai/index.d.cts +31 -19
  41. package/dist/adapters/thread/google-genai/index.d.ts +31 -19
  42. package/dist/adapters/thread/google-genai/index.js +9 -1
  43. package/dist/adapters/thread/google-genai/index.js.map +1 -1
  44. package/dist/adapters/thread/google-genai/workflow.cjs +33 -0
  45. package/dist/adapters/thread/google-genai/workflow.cjs.map +1 -0
  46. package/dist/adapters/thread/google-genai/workflow.d.cts +32 -0
  47. package/dist/adapters/thread/google-genai/workflow.d.ts +32 -0
  48. package/dist/adapters/thread/google-genai/workflow.js +31 -0
  49. package/dist/adapters/thread/google-genai/workflow.js.map +1 -0
  50. package/dist/adapters/thread/langchain/index.cjs +9 -1
  51. package/dist/adapters/thread/langchain/index.cjs.map +1 -1
  52. package/dist/adapters/thread/langchain/index.d.cts +27 -16
  53. package/dist/adapters/thread/langchain/index.d.ts +27 -16
  54. package/dist/adapters/thread/langchain/index.js +9 -1
  55. package/dist/adapters/thread/langchain/index.js.map +1 -1
  56. package/dist/adapters/thread/langchain/workflow.cjs +33 -0
  57. package/dist/adapters/thread/langchain/workflow.cjs.map +1 -0
  58. package/dist/adapters/thread/langchain/workflow.d.cts +32 -0
  59. package/dist/adapters/thread/langchain/workflow.d.ts +32 -0
  60. package/dist/adapters/thread/langchain/workflow.js +31 -0
  61. package/dist/adapters/thread/langchain/workflow.js.map +1 -0
  62. package/dist/index.cjs +282 -90
  63. package/dist/index.cjs.map +1 -1
  64. package/dist/index.d.cts +38 -16
  65. package/dist/index.d.ts +38 -16
  66. package/dist/index.js +281 -87
  67. package/dist/index.js.map +1 -1
  68. package/dist/queries-DModcWRy.d.cts +44 -0
  69. package/dist/queries-byD0jr1Y.d.ts +44 -0
  70. package/dist/{types-BkAYmc96.d.ts → types-B50pBPEV.d.ts} +190 -38
  71. package/dist/{types-YbL7JpEA.d.cts → types-Bll19FZJ.d.cts} +7 -0
  72. package/dist/{types-YbL7JpEA.d.ts → types-Bll19FZJ.d.ts} +7 -0
  73. package/dist/{queries-6Avfh74U.d.ts → types-BuXdFhaZ.d.cts} +7 -48
  74. package/dist/{types-BMRzfELQ.d.cts → types-ChAMwU3q.d.cts} +17 -1
  75. package/dist/{types-BMRzfELQ.d.ts → types-ChAMwU3q.d.ts} +17 -1
  76. package/dist/{types-CES_30qx.d.cts → types-DQW8l7pY.d.cts} +190 -38
  77. package/dist/{queries-CHa2iv_I.d.cts → types-GZ76HZSj.d.ts} +7 -48
  78. package/dist/workflow.cjs +244 -86
  79. package/dist/workflow.cjs.map +1 -1
  80. package/dist/workflow.d.cts +54 -65
  81. package/dist/workflow.d.ts +54 -65
  82. package/dist/workflow.js +243 -83
  83. package/dist/workflow.js.map +1 -1
  84. package/package.json +54 -2
  85. package/src/adapters/sandbox/daytona/filesystem.ts +1 -1
  86. package/src/adapters/sandbox/daytona/index.ts +8 -0
  87. package/src/adapters/sandbox/daytona/proxy.ts +56 -0
  88. package/src/adapters/sandbox/e2b/filesystem.ts +147 -0
  89. package/src/adapters/sandbox/e2b/index.ts +164 -0
  90. package/src/adapters/sandbox/e2b/types.ts +23 -0
  91. package/src/adapters/sandbox/inmemory/index.ts +27 -3
  92. package/src/adapters/sandbox/inmemory/proxy.ts +53 -0
  93. package/src/adapters/sandbox/virtual/filesystem.ts +41 -17
  94. package/src/adapters/sandbox/virtual/provider.ts +9 -1
  95. package/src/adapters/sandbox/virtual/proxy.ts +53 -0
  96. package/src/adapters/sandbox/virtual/types.ts +9 -4
  97. package/src/adapters/thread/google-genai/activities.ts +51 -17
  98. package/src/adapters/thread/google-genai/index.ts +1 -0
  99. package/src/adapters/thread/google-genai/proxy.ts +61 -0
  100. package/src/adapters/thread/langchain/activities.ts +47 -14
  101. package/src/adapters/thread/langchain/index.ts +1 -0
  102. package/src/adapters/thread/langchain/proxy.ts +61 -0
  103. package/src/lib/lifecycle.ts +57 -0
  104. package/src/lib/sandbox/manager.ts +52 -6
  105. package/src/lib/sandbox/sandbox.test.ts +12 -11
  106. package/src/lib/sandbox/types.ts +31 -4
  107. package/src/lib/session/index.ts +4 -5
  108. package/src/lib/session/session-edge-cases.integration.test.ts +491 -66
  109. package/src/lib/session/session.integration.test.ts +92 -80
  110. package/src/lib/session/session.ts +108 -96
  111. package/src/lib/session/types.ts +87 -17
  112. package/src/lib/subagent/define.ts +6 -5
  113. package/src/lib/subagent/handler.ts +148 -16
  114. package/src/lib/subagent/index.ts +4 -0
  115. package/src/lib/subagent/register.ts +10 -3
  116. package/src/lib/subagent/signals.ts +8 -0
  117. package/src/lib/subagent/subagent.integration.test.ts +893 -128
  118. package/src/lib/subagent/tool.ts +2 -2
  119. package/src/lib/subagent/types.ts +84 -21
  120. package/src/lib/subagent/workflow.ts +83 -12
  121. package/src/lib/tool-router/router-edge-cases.integration.test.ts +4 -1
  122. package/src/lib/tool-router/router.integration.test.ts +141 -5
  123. package/src/lib/tool-router/router.ts +13 -3
  124. package/src/lib/tool-router/types.ts +7 -0
  125. package/src/lib/workflow.test.ts +104 -27
  126. package/src/lib/workflow.ts +37 -19
  127. package/src/tools/bash/bash.test.ts +16 -7
  128. package/src/workflow.ts +11 -14
  129. package/tsup.config.ts +6 -0
@@ -34,7 +34,11 @@ vi.mock("@temporalio/workflow", () => {
34
34
  condition: async (fn: () => boolean) => fn(),
35
35
  defineUpdate: (name: string) => ({ __type: "update", name }),
36
36
  defineQuery: (name: string) => ({ __type: "query", name }),
37
+ defineSignal: (name: string) => ({ __type: "signal", name }),
37
38
  setHandler: (_def: unknown, _handler: unknown) => {},
39
+ startChild: async () => ({ result: () => Promise.resolve(null) }),
40
+ workflowInfo: () => ({ taskQueue: "default-queue" }),
41
+ getExternalWorkflowHandle: () => ({ signal: async () => {} }),
38
42
  uuid4: () =>
39
43
  `00000000-0000-0000-0000-${String(++idCounter).padStart(12, "0")}`,
40
44
  ApplicationFailure: MockApplicationFailure,
@@ -56,12 +60,11 @@ type TurnScript = {
56
60
  * Wraps every method on a ThreadOps object so it also has `.executeWithOptions()`,
57
61
  * matching Temporal's `ActivityInterfaceFor<ThreadOps>` shape.
58
62
  */
59
- function toActivityInterface(
60
- raw: ThreadOps,
61
- ): ActivityInterfaceFor<ThreadOps> {
63
+ function toActivityInterface(raw: ThreadOps): ActivityInterfaceFor<ThreadOps> {
62
64
  const result = {} as Record<string, unknown>;
63
65
  for (const [key, fn] of Object.entries(raw)) {
64
- const wrapped = (...args: unknown[]) => (fn as (...a: unknown[]) => unknown)(...args);
66
+ const wrapped = (...args: unknown[]) =>
67
+ (fn as (...a: unknown[]) => unknown)(...args);
65
68
  wrapped.executeWithOptions = (_opts: unknown, args: unknown[]) =>
66
69
  (fn as (...a: unknown[]) => unknown)(...args);
67
70
  result[key] = wrapped;
@@ -75,14 +78,14 @@ function createMockThreadOps() {
75
78
  initializeThread: async (threadId) => {
76
79
  log.push({ op: "initializeThread", args: [threadId] });
77
80
  },
78
- appendHumanMessage: async (threadId, content) => {
79
- log.push({ op: "appendHumanMessage", args: [threadId, content] });
81
+ appendHumanMessage: async (threadId, id, content) => {
82
+ log.push({ op: "appendHumanMessage", args: [threadId, id, content] });
80
83
  },
81
- appendToolResult: async (config) => {
82
- log.push({ op: "appendToolResult", args: [config] });
84
+ appendToolResult: async (id, config) => {
85
+ log.push({ op: "appendToolResult", args: [id, config] });
83
86
  },
84
- appendSystemMessage: async (threadId, content) => {
85
- log.push({ op: "appendSystemMessage", args: [threadId, content] });
87
+ appendSystemMessage: async (threadId, id, content) => {
88
+ log.push({ op: "appendSystemMessage", args: [threadId, id, content] });
86
89
  },
87
90
  forkThread: async (source, target) => {
88
91
  log.push({ op: "forkThread", args: [source, target] });
@@ -91,7 +94,9 @@ function createMockThreadOps() {
91
94
  return { ops, log };
92
95
  }
93
96
 
94
- function createScriptedRunAgent(turns: TurnScript[]): RunAgentActivity<unknown> {
97
+ function createScriptedRunAgent(
98
+ turns: TurnScript[]
99
+ ): RunAgentActivity<unknown> {
95
100
  let call = 0;
96
101
  return async () => {
97
102
  const turn = turns[call++];
@@ -113,7 +118,7 @@ function createEchoTool() {
113
118
  schema: z.object({ text: z.string() }),
114
119
  handler: async (
115
120
  args: { text: string },
116
- _ctx: RouterContext,
121
+ _ctx: RouterContext
117
122
  ): Promise<ToolHandlerResponse<{ echoed: string }>> => ({
118
123
  toolResponse: `Echo: ${args.text}`,
119
124
  data: { echoed: args.text },
@@ -150,7 +155,7 @@ describe("createSession edge cases", () => {
150
155
 
151
156
  const session = await createSession({
152
157
  agentName: "TestAgent",
153
- threadId: "thread-1",
158
+ thread: { mode: "new", threadId: "thread-1" },
154
159
  runAgent: createScriptedRunAgent([
155
160
  {
156
161
  message: "Need user input",
@@ -186,7 +191,7 @@ describe("createSession edge cases", () => {
186
191
 
187
192
  const session = await createSession({
188
193
  agentName: "TestAgent",
189
- threadId: "thread-1",
194
+ thread: { mode: "new", threadId: "thread-1" },
190
195
  runAgent: createScriptedRunAgent([
191
196
  {
192
197
  message: "bad calls",
@@ -216,8 +221,11 @@ describe("createSession edge cases", () => {
216
221
 
217
222
  const errorResults = log.filter((l) => {
218
223
  if (l.op !== "appendToolResult") return false;
219
- const config = l.args[0] as ToolResultConfig;
220
- return typeof config.content === "string" && config.content.includes("Invalid tool call");
224
+ const config = l.args[1] as ToolResultConfig;
225
+ return (
226
+ typeof config.content === "string" &&
227
+ config.content.includes("Invalid tool call")
228
+ );
221
229
  });
222
230
  expect(errorResults).toHaveLength(2);
223
231
  });
@@ -229,7 +237,7 @@ describe("createSession edge cases", () => {
229
237
 
230
238
  const session = await createSession({
231
239
  agentName: "TestAgent",
232
- threadId: "thread-1",
240
+ thread: { mode: "new", threadId: "thread-1" },
233
241
  runAgent: createScriptedRunAgent([
234
242
  {
235
243
  message: "no id",
@@ -260,7 +268,7 @@ describe("createSession edge cases", () => {
260
268
 
261
269
  const session = await createSession({
262
270
  agentName: "TestAgent",
263
- threadId: "thread-1",
271
+ thread: { mode: "new", threadId: "thread-1" },
264
272
  runAgent: createScriptedRunAgent([
265
273
  {
266
274
  message: "I tried calling a tool",
@@ -298,7 +306,7 @@ describe("createSession edge cases", () => {
298
306
 
299
307
  const session = await createSession({
300
308
  agentName: "TestAgent",
301
- threadId: "thread-1",
309
+ thread: { mode: "new", threadId: "thread-1" },
302
310
  runAgent: createScriptedRunAgent([
303
311
  {
304
312
  message: "calling fail",
@@ -328,7 +336,7 @@ describe("createSession edge cases", () => {
328
336
 
329
337
  // --- Tool handler throws without recovery ---
330
338
 
331
- it("session fails when tool handler throws with no failure hook", async () => {
339
+ it("session completes when tool handler throws with no failure hook (error suppressed)", async () => {
332
340
  const { ops } = createMockThreadOps();
333
341
  let endReason: string | undefined;
334
342
 
@@ -343,7 +351,7 @@ describe("createSession edge cases", () => {
343
351
 
344
352
  const session = await createSession({
345
353
  agentName: "TestAgent",
346
- threadId: "thread-1",
354
+ thread: { mode: "new", threadId: "thread-1" },
347
355
  runAgent: createScriptedRunAgent([
348
356
  {
349
357
  message: "calling fail",
@@ -364,10 +372,10 @@ describe("createSession edge cases", () => {
364
372
  initialState: { systemPrompt: "test" },
365
373
  });
366
374
 
367
- await expect(session.runSession({ stateManager })).rejects.toThrow(
368
- "unrecoverable tool",
369
- );
370
- expect(endReason).toBe("failed");
375
+ const result = await session.runSession({ stateManager });
376
+ expect(result.exitReason).toBe("completed");
377
+ expect(result.finalMessage).toBe("done");
378
+ expect(endReason).toBe("completed");
371
379
  });
372
380
 
373
381
  // --- Metadata passed through to hooks ---
@@ -379,11 +387,9 @@ describe("createSession edge cases", () => {
379
387
 
380
388
  const session = await createSession({
381
389
  agentName: "TestAgent",
382
- threadId: "thread-1",
390
+ thread: { mode: "new", threadId: "thread-1" },
383
391
  metadata: { env: "test", version: 42 },
384
- runAgent: createScriptedRunAgent([
385
- { message: "done", toolCalls: [] },
386
- ]),
392
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
387
393
  threadOps: ops,
388
394
  buildContextMessage: () => "go",
389
395
  hooks: {
@@ -422,15 +428,17 @@ describe("createSession edge cases", () => {
422
428
  data: null,
423
429
  createdAt: new Date().toISOString(),
424
430
  }),
431
+ forkSandbox: async () => "forked-sandbox-id",
432
+ pauseSandbox: async () => {},
425
433
  };
426
434
 
427
435
  const session = await createSession({
428
436
  agentName: "TestAgent",
429
- threadId: "thread-1",
437
+ thread: { mode: "new", threadId: "thread-1" },
430
438
  runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
431
439
  threadOps: ops,
432
440
  buildContextMessage: () => "go",
433
- sandbox: sandboxOps,
441
+ sandboxOps,
434
442
  });
435
443
 
436
444
  const stateManager = createAgentStateManager({
@@ -438,7 +446,7 @@ describe("createSession edge cases", () => {
438
446
  });
439
447
 
440
448
  await expect(session.runSession({ stateManager })).rejects.toThrow(
441
- "sandbox creation failed",
449
+ "sandbox creation failed"
442
450
  );
443
451
  });
444
452
 
@@ -462,17 +470,19 @@ describe("createSession edge cases", () => {
462
470
  data: null,
463
471
  createdAt: new Date().toISOString(),
464
472
  }),
473
+ forkSandbox: async () => "forked-sandbox-id",
474
+ pauseSandbox: async () => {},
465
475
  };
466
476
 
467
477
  const session = await createSession({
468
478
  agentName: "TestAgent",
469
- threadId: "thread-1",
479
+ thread: { mode: "new", threadId: "thread-1" },
470
480
  runAgent: async () => {
471
481
  throw new Error("LLM crash");
472
482
  },
473
483
  threadOps: ops,
474
484
  buildContextMessage: () => "go",
475
- sandbox: sandboxOps,
485
+ sandboxOps,
476
486
  });
477
487
 
478
488
  const stateManager = createAgentStateManager({
@@ -480,7 +490,7 @@ describe("createSession edge cases", () => {
480
490
  });
481
491
 
482
492
  await expect(session.runSession({ stateManager })).rejects.toThrow(
483
- "LLM crash",
493
+ "LLM crash"
484
494
  );
485
495
 
486
496
  expect(sandboxLog).toContain("create");
@@ -494,7 +504,7 @@ describe("createSession edge cases", () => {
494
504
 
495
505
  const session = await createSession({
496
506
  agentName: "TestAgent",
497
- threadId: "thread-1",
507
+ thread: { mode: "new", threadId: "thread-1" },
498
508
  runAgent: createScriptedRunAgent([]),
499
509
  threadOps: ops,
500
510
  buildContextMessage: () => "hi",
@@ -505,7 +515,7 @@ describe("createSession edge cases", () => {
505
515
  });
506
516
 
507
517
  await expect(session.runSession({ stateManager })).rejects.toThrow(
508
- "No system prompt in state",
518
+ "No system prompt in state"
509
519
  );
510
520
  });
511
521
 
@@ -527,7 +537,7 @@ describe("createSession edge cases", () => {
527
537
 
528
538
  const session = await createSession({
529
539
  agentName: "TestAgent",
530
- threadId: "thread-1",
540
+ thread: { mode: "new", threadId: "thread-1" },
531
541
  runAgent: createScriptedRunAgent([
532
542
  {
533
543
  message: "t1",
@@ -558,15 +568,14 @@ describe("createSession edge cases", () => {
558
568
  expect(result.usage.totalOutputTokens).toBe(50);
559
569
  });
560
570
 
561
- // --- continueThread with no source thread ---
571
+ // --- Thread fork: new threadId from source ---
562
572
 
563
- it("continueThread generates new threadId and forks when source is provided", async () => {
573
+ it("fork thread mode generates new threadId and forks when source is provided", async () => {
564
574
  const { ops, log } = createMockThreadOps();
565
575
 
566
576
  const session = await createSession({
567
577
  agentName: "TestAgent",
568
- threadId: "original-thread",
569
- continueThread: true,
578
+ thread: { mode: "fork", threadId: "original-thread" },
570
579
  runAgent: createScriptedRunAgent([
571
580
  { message: "continued", toolCalls: [] },
572
581
  ]),
@@ -597,7 +606,7 @@ describe("createSession edge cases", () => {
597
606
 
598
607
  const session = await createSession({
599
608
  agentName: "TestAgent",
600
- threadId: "thread-1",
609
+ thread: { mode: "new", threadId: "thread-1" },
601
610
  maxTurns: 1,
602
611
  runAgent: createScriptedRunAgent([
603
612
  {
@@ -642,7 +651,7 @@ describe("createSession edge cases", () => {
642
651
 
643
652
  const session = await createSession({
644
653
  agentName: "TestAgent",
645
- threadId: "thread-1",
654
+ thread: { mode: "new", threadId: "thread-1" },
646
655
  processToolsInParallel: false,
647
656
  runAgent: createScriptedRunAgent([
648
657
  {
@@ -675,7 +684,7 @@ describe("createSession edge cases", () => {
675
684
 
676
685
  const session = await createSession({
677
686
  agentName: "TestAgent",
678
- threadId: "thread-1",
687
+ thread: { mode: "new", threadId: "thread-1" },
679
688
  runAgent: createScriptedRunAgent([
680
689
  {
681
690
  message: "mixed",
@@ -700,24 +709,27 @@ describe("createSession edge cases", () => {
700
709
 
701
710
  const toolResults = log.filter((l) => l.op === "appendToolResult");
702
711
  const echoResult = toolResults.find((l) => {
703
- const config = l.args[0] as ToolResultConfig;
712
+ const config = l.args[1] as ToolResultConfig;
704
713
  return config.toolName === "Echo";
705
714
  });
706
715
  expect(echoResult).toBeDefined();
707
716
  if (echoResult) {
708
- expect((echoResult.args[0] as ToolResultConfig).content).toBe("Echo: valid");
717
+ expect((echoResult.args[1] as ToolResultConfig).content).toBe(
718
+ "Echo: valid"
719
+ );
709
720
  }
710
721
 
711
722
  const unknownResult = toolResults.find((l) => {
712
- const config = l.args[0] as ToolResultConfig;
723
+ const config = l.args[1] as ToolResultConfig;
713
724
  return config.toolName === "Unknown";
714
725
  });
715
726
  expect(unknownResult).toBeDefined();
716
727
  const unknownContent = unknownResult
717
- ? (unknownResult.args[0] as ToolResultConfig).content
728
+ ? (unknownResult.args[1] as ToolResultConfig).content
718
729
  : undefined;
719
730
  expect(
720
- typeof unknownContent === "string" && unknownContent.includes("Invalid tool call"),
731
+ typeof unknownContent === "string" &&
732
+ unknownContent.includes("Invalid tool call")
721
733
  ).toBe(true);
722
734
  });
723
735
 
@@ -728,10 +740,8 @@ describe("createSession edge cases", () => {
728
740
 
729
741
  const session = await createSession({
730
742
  agentName: "TestAgent",
731
- threadId: "thread-1",
732
- runAgent: createScriptedRunAgent([
733
- { message: "done", toolCalls: [] },
734
- ]),
743
+ thread: { mode: "new", threadId: "thread-1" },
744
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
735
745
  threadOps: ops,
736
746
  buildContextMessage: () => [
737
747
  { type: "text", text: "Hello" },
@@ -749,7 +759,7 @@ describe("createSession edge cases", () => {
749
759
  expect(humanOps).toHaveLength(1);
750
760
  const humanOp = humanOps[0];
751
761
  if (!humanOp) throw new Error("expected human op");
752
- const content = humanOp.args[1];
762
+ const content = humanOp.args[2];
753
763
  expect(Array.isArray(content)).toBe(true);
754
764
  const firstContent = (content as { type: string }[])[0];
755
765
  if (!firstContent) throw new Error("expected content item");
@@ -764,7 +774,7 @@ describe("createSession edge cases", () => {
764
774
 
765
775
  const session = await createSession({
766
776
  agentName: "TestAgent",
767
- threadId: "thread-1",
777
+ thread: { mode: "new", threadId: "thread-1" },
768
778
  runAgent: createScriptedRunAgent([
769
779
  {
770
780
  message: "t1",
@@ -813,7 +823,7 @@ describe("createSession edge cases", () => {
813
823
 
814
824
  const session = await createSession({
815
825
  agentName: "TestAgent",
816
- threadId: "thread-1",
826
+ thread: { mode: "new", threadId: "thread-1" },
817
827
  runAgent: createScriptedRunAgent([
818
828
  {
819
829
  message: "self",
@@ -834,7 +844,7 @@ describe("createSession edge cases", () => {
834
844
 
835
845
  const toolResults = log.filter((l) => {
836
846
  if (l.op !== "appendToolResult") return false;
837
- const config = l.args[0] as ToolResultConfig;
847
+ const config = l.args[1] as ToolResultConfig;
838
848
  return config.toolName === "SelfAppend";
839
849
  });
840
850
  expect(toolResults).toHaveLength(0);
@@ -847,7 +857,7 @@ describe("createSession edge cases", () => {
847
857
 
848
858
  const session = await createSession({
849
859
  agentName: "TestAgent",
850
- threadId: "thread-1",
860
+ thread: { mode: "new", threadId: "thread-1" },
851
861
  runAgent: createScriptedRunAgent([
852
862
  {
853
863
  message: "calling",
@@ -860,7 +870,10 @@ describe("createSession edge cases", () => {
860
870
  buildContextMessage: () => "go",
861
871
  hooks: {
862
872
  onPreToolUse: async ({ toolCall }) => {
863
- if (toolCall.args && (toolCall.args as { text: string }).text === "skip-me") {
873
+ if (
874
+ toolCall.args &&
875
+ (toolCall.args as { text: string }).text === "skip-me"
876
+ ) {
864
877
  return { skip: true };
865
878
  }
866
879
  return {};
@@ -878,8 +891,10 @@ describe("createSession edge cases", () => {
878
891
  expect(toolResults).toHaveLength(1);
879
892
  const toolResult = toolResults[0];
880
893
  if (!toolResult) throw new Error("expected tool result");
881
- const content = (toolResult.args[0] as ToolResultConfig).content;
882
- expect(typeof content === "string" && content.includes("Skipped")).toBe(true);
894
+ const content = (toolResult.args[1] as ToolResultConfig).content;
895
+ expect(typeof content === "string" && content.includes("Skipped")).toBe(
896
+ true
897
+ );
883
898
  });
884
899
 
885
900
  // --- Sandbox snapshot is not called on normal flow ---
@@ -897,15 +912,17 @@ describe("createSession edge cases", () => {
897
912
  createSandbox: async () => ({ sandboxId: "sb-test" }),
898
913
  destroySandbox: async () => {},
899
914
  snapshotSandbox: snapshotSpy,
915
+ forkSandbox: async () => "forked-sandbox-id",
916
+ pauseSandbox: async () => {},
900
917
  };
901
918
 
902
919
  const session = await createSession({
903
920
  agentName: "TestAgent",
904
- threadId: "thread-1",
921
+ thread: { mode: "new", threadId: "thread-1" },
905
922
  runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
906
923
  threadOps: ops,
907
924
  buildContextMessage: () => "go",
908
- sandbox: sandboxOps,
925
+ sandboxOps,
909
926
  });
910
927
 
911
928
  const stateManager = createAgentStateManager({
@@ -924,7 +941,7 @@ describe("createSession edge cases", () => {
924
941
 
925
942
  const session = await createSession({
926
943
  agentName: "TestAgent",
927
- threadId: "thread-1",
944
+ thread: { mode: "new", threadId: "thread-1" },
928
945
  runAgent: createScriptedRunAgent([
929
946
  {
930
947
  message: "t1",
@@ -959,7 +976,7 @@ describe("createSession edge cases", () => {
959
976
 
960
977
  const session = await createSession({
961
978
  agentName: "TestAgent",
962
- threadId: "thread-1",
979
+ thread: { mode: "new", threadId: "thread-1" },
963
980
  maxTurns: 0,
964
981
  runAgent: createScriptedRunAgent([]),
965
982
  threadOps: ops,
@@ -977,4 +994,412 @@ describe("createSession edge cases", () => {
977
994
  expect(result.usage.turns).toBe(0);
978
995
  expect(result.finalMessage).toBeNull();
979
996
  });
997
+
998
+ // --- sandboxId returned from runSession ---
999
+
1000
+ it("returns sandboxId from runSession when sandbox is created", async () => {
1001
+ const { ops } = createMockThreadOps();
1002
+
1003
+ const sandboxOps: SandboxOps = {
1004
+ createSandbox: async () => ({ sandboxId: "sb-created" }),
1005
+ destroySandbox: async () => {},
1006
+ pauseSandbox: async () => {},
1007
+ snapshotSandbox: async () => ({
1008
+ sandboxId: "sb-1",
1009
+ providerId: "test",
1010
+ data: null,
1011
+ createdAt: new Date().toISOString(),
1012
+ }),
1013
+ forkSandbox: async () => "forked-sb",
1014
+ };
1015
+
1016
+ const session = await createSession({
1017
+ agentName: "TestAgent",
1018
+ thread: { mode: "new", threadId: "thread-1" },
1019
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1020
+ threadOps: ops,
1021
+ buildContextMessage: () => "go",
1022
+ sandboxOps,
1023
+ });
1024
+
1025
+ const stateManager = createAgentStateManager({
1026
+ initialState: { systemPrompt: "test" },
1027
+ });
1028
+
1029
+ const result = await session.runSession({ stateManager });
1030
+ expect((result as { sandboxId?: string }).sandboxId).toBe("sb-created");
1031
+ });
1032
+
1033
+ it("returns inherited sandboxId from runSession", async () => {
1034
+ const { ops } = createMockThreadOps();
1035
+
1036
+ const sandboxOps: SandboxOps = {
1037
+ createSandbox: async () => ({ sandboxId: "sb" }),
1038
+ destroySandbox: async () => {},
1039
+ pauseSandbox: async () => {},
1040
+ snapshotSandbox: async () => ({
1041
+ sandboxId: "sb",
1042
+ providerId: "test",
1043
+ data: null,
1044
+ createdAt: new Date().toISOString(),
1045
+ }),
1046
+ forkSandbox: async () => "forked-sb",
1047
+ };
1048
+
1049
+ const session = await createSession({
1050
+ agentName: "TestAgent",
1051
+ thread: { mode: "new", threadId: "thread-1" },
1052
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1053
+ threadOps: ops,
1054
+ buildContextMessage: () => "go",
1055
+ sandbox: { mode: "inherit", sandboxId: "inherited-sb" },
1056
+ sandboxOps,
1057
+ });
1058
+
1059
+ const stateManager = createAgentStateManager({
1060
+ initialState: { systemPrompt: "test" },
1061
+ });
1062
+
1063
+ const result = await session.runSession({ stateManager });
1064
+ expect((result as { sandboxId?: string }).sandboxId).toBe("inherited-sb");
1065
+ });
1066
+
1067
+ // --- Inherited sandbox is not destroyed ---
1068
+
1069
+ it("does not destroy inherited sandbox even when sandboxOps is provided", async () => {
1070
+ const { ops } = createMockThreadOps();
1071
+ const sandboxLog: string[] = [];
1072
+
1073
+ const sandboxOps: SandboxOps = {
1074
+ createSandbox: async () => {
1075
+ sandboxLog.push("create");
1076
+ return { sandboxId: "new-sb" };
1077
+ },
1078
+ destroySandbox: async (id: string) => {
1079
+ sandboxLog.push(`destroy:${id}`);
1080
+ },
1081
+ pauseSandbox: async (id: string) => {
1082
+ sandboxLog.push(`pause:${id}`);
1083
+ },
1084
+ snapshotSandbox: async () => ({
1085
+ sandboxId: "sb-1",
1086
+ providerId: "test",
1087
+ data: null,
1088
+ createdAt: new Date().toISOString(),
1089
+ }),
1090
+ forkSandbox: async () => "forked-sb",
1091
+ };
1092
+
1093
+ const session = await createSession({
1094
+ agentName: "TestAgent",
1095
+ thread: { mode: "new", threadId: "thread-1" },
1096
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1097
+ threadOps: ops,
1098
+ buildContextMessage: () => "go",
1099
+ sandboxOps,
1100
+ sandbox: { mode: "inherit", sandboxId: "inherited-sb" },
1101
+ });
1102
+
1103
+ const stateManager = createAgentStateManager({
1104
+ initialState: { systemPrompt: "test" },
1105
+ });
1106
+
1107
+ await session.runSession({ stateManager });
1108
+
1109
+ expect(sandboxLog).toHaveLength(0);
1110
+ });
1111
+
1112
+ // --- Sandbox fork ---
1113
+
1114
+ it("forks sandbox when sandbox init mode is fork", async () => {
1115
+ const { ops } = createMockThreadOps();
1116
+ const sandboxLog: string[] = [];
1117
+
1118
+ const sandboxOps: SandboxOps = {
1119
+ createSandbox: async () => {
1120
+ sandboxLog.push("create");
1121
+ return { sandboxId: "new-sb" };
1122
+ },
1123
+ destroySandbox: async (id: string) => {
1124
+ sandboxLog.push(`destroy:${id}`);
1125
+ },
1126
+ pauseSandbox: async () => {},
1127
+ snapshotSandbox: async () => ({
1128
+ sandboxId: "sb-1",
1129
+ providerId: "test",
1130
+ data: null,
1131
+ createdAt: new Date().toISOString(),
1132
+ }),
1133
+ forkSandbox: async (id: string) => {
1134
+ sandboxLog.push(`fork:${id}`);
1135
+ return `forked-from-${id}`;
1136
+ },
1137
+ };
1138
+
1139
+ const session = await createSession({
1140
+ agentName: "TestAgent",
1141
+ thread: { mode: "new", threadId: "thread-1" },
1142
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1143
+ threadOps: ops,
1144
+ buildContextMessage: () => "go",
1145
+ sandboxOps,
1146
+ sandbox: { mode: "fork", sandboxId: "paused-sb-1" },
1147
+ });
1148
+
1149
+ const stateManager = createAgentStateManager({
1150
+ initialState: { systemPrompt: "test" },
1151
+ });
1152
+
1153
+ const result = await session.runSession({ stateManager });
1154
+
1155
+ expect(sandboxLog).toContain("fork:paused-sb-1");
1156
+ expect(sandboxLog).not.toContain("create");
1157
+ expect((result as { sandboxId?: string }).sandboxId).toBe("forked-from-paused-sb-1");
1158
+ expect(sandboxLog).toContain("destroy:forked-from-paused-sb-1");
1159
+ });
1160
+
1161
+ // --- Forked sandbox is destroyed on exit ---
1162
+
1163
+ it("destroys forked sandbox on exit (not inherited)", async () => {
1164
+ const { ops } = createMockThreadOps();
1165
+ const sandboxLog: string[] = [];
1166
+
1167
+ const sandboxOps: SandboxOps = {
1168
+ createSandbox: async () => ({ sandboxId: "new-sb" }),
1169
+ destroySandbox: async (id: string) => {
1170
+ sandboxLog.push(`destroy:${id}`);
1171
+ },
1172
+ pauseSandbox: async () => {},
1173
+ snapshotSandbox: async () => ({
1174
+ sandboxId: "sb-1",
1175
+ providerId: "test",
1176
+ data: null,
1177
+ createdAt: new Date().toISOString(),
1178
+ }),
1179
+ forkSandbox: async () => "forked-sb",
1180
+ };
1181
+
1182
+ const session = await createSession({
1183
+ agentName: "TestAgent",
1184
+ thread: { mode: "new", threadId: "thread-1" },
1185
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1186
+ threadOps: ops,
1187
+ buildContextMessage: () => "go",
1188
+ sandboxOps,
1189
+ sandbox: { mode: "fork", sandboxId: "old-sb" },
1190
+ });
1191
+
1192
+ const stateManager = createAgentStateManager({
1193
+ initialState: { systemPrompt: "test" },
1194
+ });
1195
+
1196
+ await session.runSession({ stateManager });
1197
+
1198
+ expect(sandboxLog).toContain("destroy:forked-sb");
1199
+ });
1200
+
1201
+ // --- sandboxShutdown: "pause" ---
1202
+
1203
+ it("pauses sandbox on exit when sandboxShutdown is pause", async () => {
1204
+ const { ops } = createMockThreadOps();
1205
+ const sandboxLog: string[] = [];
1206
+
1207
+ const sandboxOps: SandboxOps = {
1208
+ createSandbox: async () => ({ sandboxId: "sb-pause-test" }),
1209
+ destroySandbox: async (id: string) => {
1210
+ sandboxLog.push(`destroy:${id}`);
1211
+ },
1212
+ pauseSandbox: async (id: string) => {
1213
+ sandboxLog.push(`pause:${id}`);
1214
+ },
1215
+ snapshotSandbox: async () => ({
1216
+ sandboxId: "sb-1",
1217
+ providerId: "test",
1218
+ data: null,
1219
+ createdAt: new Date().toISOString(),
1220
+ }),
1221
+ forkSandbox: async () => "forked-sb",
1222
+ };
1223
+
1224
+ const session = await createSession({
1225
+ agentName: "TestAgent",
1226
+ thread: { mode: "new", threadId: "thread-1" },
1227
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1228
+ threadOps: ops,
1229
+ buildContextMessage: () => "go",
1230
+ sandboxOps,
1231
+ sandboxShutdown: "pause",
1232
+ });
1233
+
1234
+ const stateManager = createAgentStateManager({
1235
+ initialState: { systemPrompt: "test" },
1236
+ });
1237
+
1238
+ await session.runSession({ stateManager });
1239
+
1240
+ expect(sandboxLog).toContain("pause:sb-pause-test");
1241
+ expect(sandboxLog).not.toContain("destroy:sb-pause-test");
1242
+ });
1243
+
1244
+ // --- sandboxShutdown: "pause-until-parent-close" ---
1245
+
1246
+ it("pauses sandbox on exit when sandboxShutdown is pause-until-parent-close", async () => {
1247
+ const { ops } = createMockThreadOps();
1248
+ const sandboxLog: string[] = [];
1249
+
1250
+ const sandboxOps: SandboxOps = {
1251
+ createSandbox: async () => ({ sandboxId: "sb-parent-close" }),
1252
+ destroySandbox: async (id: string) => {
1253
+ sandboxLog.push(`destroy:${id}`);
1254
+ },
1255
+ pauseSandbox: async (id: string) => {
1256
+ sandboxLog.push(`pause:${id}`);
1257
+ },
1258
+ snapshotSandbox: async () => ({
1259
+ sandboxId: "sb-1",
1260
+ providerId: "test",
1261
+ data: null,
1262
+ createdAt: new Date().toISOString(),
1263
+ }),
1264
+ forkSandbox: async () => "forked-sb",
1265
+ };
1266
+
1267
+ const session = await createSession({
1268
+ agentName: "TestAgent",
1269
+ thread: { mode: "new", threadId: "thread-1" },
1270
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1271
+ threadOps: ops,
1272
+ buildContextMessage: () => "go",
1273
+ sandboxOps,
1274
+ sandboxShutdown: "pause-until-parent-close",
1275
+ });
1276
+
1277
+ const stateManager = createAgentStateManager({
1278
+ initialState: { systemPrompt: "test" },
1279
+ });
1280
+
1281
+ await session.runSession({ stateManager });
1282
+
1283
+ expect(sandboxLog).toContain("pause:sb-parent-close");
1284
+ expect(sandboxLog).not.toContain("destroy:sb-parent-close");
1285
+ });
1286
+
1287
+ // --- Throws when sandbox fork/continue provided without sandboxOps ---
1288
+
1289
+ it("throws when sandbox fork mode is set without sandboxOps", async () => {
1290
+ const { ops } = createMockThreadOps();
1291
+
1292
+ const session = await createSession({
1293
+ agentName: "TestAgent",
1294
+ thread: { mode: "new", threadId: "thread-1" },
1295
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1296
+ threadOps: ops,
1297
+ buildContextMessage: () => "go",
1298
+ sandbox: { mode: "fork", sandboxId: "prev-sb" },
1299
+ });
1300
+
1301
+ const stateManager = createAgentStateManager({
1302
+ initialState: { systemPrompt: "test" },
1303
+ });
1304
+
1305
+ await expect(session.runSession({ stateManager })).rejects.toThrow(
1306
+ "No sandboxOps provided — cannot fork sandbox"
1307
+ );
1308
+ });
1309
+
1310
+ // --- No sandboxId returned when no sandbox ---
1311
+
1312
+ it("does not return sandboxId when no sandbox is configured", async () => {
1313
+ const { ops } = createMockThreadOps();
1314
+
1315
+ const session = await createSession({
1316
+ agentName: "TestAgent",
1317
+ thread: { mode: "new", threadId: "thread-1" },
1318
+ runAgent: createScriptedRunAgent([{ message: "done", toolCalls: [] }]),
1319
+ threadOps: ops,
1320
+ buildContextMessage: () => "go",
1321
+ });
1322
+
1323
+ const stateManager = createAgentStateManager({
1324
+ initialState: { systemPrompt: "test" },
1325
+ });
1326
+
1327
+ const result = await session.runSession({ stateManager });
1328
+ expect((result as { sandboxId?: string }).sandboxId).toBeUndefined();
1329
+ });
1330
+
1331
+ // --- Thread: defaults to new thread when no thread field provided ---
1332
+
1333
+ it("defaults to new thread without fork when no thread field is provided", async () => {
1334
+ const { ops, log } = createMockThreadOps();
1335
+
1336
+ const session = await createSession({
1337
+ agentName: "TestAgent",
1338
+ runAgent: createScriptedRunAgent([
1339
+ { message: "done", toolCalls: [] },
1340
+ ]),
1341
+ threadOps: ops,
1342
+ buildContextMessage: () => "go",
1343
+ });
1344
+
1345
+ const stateManager = createAgentStateManager({
1346
+ initialState: { systemPrompt: "test" },
1347
+ });
1348
+
1349
+ const result = await session.runSession({ stateManager });
1350
+ expect(result.exitReason).toBe("completed");
1351
+
1352
+ const forkOps = log.filter((l) => l.op === "forkThread");
1353
+ expect(forkOps).toHaveLength(0);
1354
+
1355
+ const systemOps = log.filter((l) => l.op === "appendSystemMessage");
1356
+ expect(systemOps).toHaveLength(1);
1357
+ });
1358
+
1359
+ // --- Sandbox pause on error ---
1360
+
1361
+ it("pauses sandbox even when session fails if sandboxShutdown is pause", async () => {
1362
+ const { ops } = createMockThreadOps();
1363
+ const sandboxLog: string[] = [];
1364
+
1365
+ const sandboxOps: SandboxOps = {
1366
+ createSandbox: async () => ({ sandboxId: "sb-err" }),
1367
+ destroySandbox: async (id: string) => {
1368
+ sandboxLog.push(`destroy:${id}`);
1369
+ },
1370
+ pauseSandbox: async (id: string) => {
1371
+ sandboxLog.push(`pause:${id}`);
1372
+ },
1373
+ snapshotSandbox: async () => ({
1374
+ sandboxId: "sb-1",
1375
+ providerId: "test",
1376
+ data: null,
1377
+ createdAt: new Date().toISOString(),
1378
+ }),
1379
+ forkSandbox: async () => "forked-sb",
1380
+ };
1381
+
1382
+ const session = await createSession({
1383
+ agentName: "TestAgent",
1384
+ thread: { mode: "new", threadId: "thread-1" },
1385
+ runAgent: async () => {
1386
+ throw new Error("crash");
1387
+ },
1388
+ threadOps: ops,
1389
+ buildContextMessage: () => "go",
1390
+ sandboxOps,
1391
+ sandboxShutdown: "pause",
1392
+ });
1393
+
1394
+ const stateManager = createAgentStateManager({
1395
+ initialState: { systemPrompt: "test" },
1396
+ });
1397
+
1398
+ await expect(session.runSession({ stateManager })).rejects.toThrow(
1399
+ "crash"
1400
+ );
1401
+
1402
+ expect(sandboxLog).toContain("pause:sb-err");
1403
+ expect(sandboxLog).not.toContain("destroy:sb-err");
1404
+ });
980
1405
  });