tylor-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/.aws-setup.sh +25 -0
  2. package/.claude-plugin/plugin.json +22 -0
  3. package/.mcp.json +12 -0
  4. package/AGENTS.md +93 -0
  5. package/CLAUDE.md +99 -0
  6. package/CLAUDE_PLATFORM_AWS_SETUP.md +105 -0
  7. package/LICENSE +21 -0
  8. package/README.md +146 -0
  9. package/assets/tylor_logo.png +0 -0
  10. package/assets/tylor_threads_concept.png +0 -0
  11. package/bin/tylor.js +23 -0
  12. package/hooks/kill-thread-trigger.sh +7 -0
  13. package/hooks/post-tool-use-code-index.sh +7 -0
  14. package/hooks/session-checkpoint.sh +7 -0
  15. package/hooks/session-start.sh +7 -0
  16. package/install.py +401 -0
  17. package/install.sh +260 -0
  18. package/package.json +24 -0
  19. package/pytest.ini +2 -0
  20. package/registry.json +26 -0
  21. package/server/.env.example +24 -0
  22. package/server/__init__.py +0 -0
  23. package/server/config.py +89 -0
  24. package/server/main.py +93 -0
  25. package/server/personas/analyst.md +15 -0
  26. package/server/personas/ceo.md +14 -0
  27. package/server/personas/code_agent.md +15 -0
  28. package/server/personas/cto.md +14 -0
  29. package/server/provision.py +260 -0
  30. package/server/provision_opensearch.py +154 -0
  31. package/server/requirements.txt +26 -0
  32. package/server/storage/__init__.py +0 -0
  33. package/server/storage/dynamo.py +399 -0
  34. package/server/storage/json_store.py +359 -0
  35. package/server/storage/opensearch.py +194 -0
  36. package/server/storage/s3.py +96 -0
  37. package/server/storage/tests/__init__.py +0 -0
  38. package/server/storage/tests/test_dynamo.py +452 -0
  39. package/server/storage/tests/test_json_store.py +226 -0
  40. package/server/storage/tests/test_opensearch.py +270 -0
  41. package/server/storage/tests/test_s3.py +125 -0
  42. package/server/tests/__init__.py +0 -0
  43. package/server/tests/test_install.py +606 -0
  44. package/server/tests/test_isolation.py +90 -0
  45. package/server/tests/test_ui_server.py +385 -0
  46. package/server/tests/test_ui_shader_background.py +52 -0
  47. package/server/tests/test_ui_story_6_3.py +105 -0
  48. package/server/tools/__init__.py +0 -0
  49. package/server/tools/_mcp.py +4 -0
  50. package/server/tools/agents.py +160 -0
  51. package/server/tools/ecc/__init__.py +1 -0
  52. package/server/tools/ecc/data.py +35 -0
  53. package/server/tools/ecc/diagrams.py +23 -0
  54. package/server/tools/ecc/pipeline.py +24 -0
  55. package/server/tools/ecc/presentation.py +24 -0
  56. package/server/tools/ecc/web.py +23 -0
  57. package/server/tools/executor.py +880 -0
  58. package/server/tools/harness.py +330 -0
  59. package/server/tools/help.py +162 -0
  60. package/server/tools/hooks.py +357 -0
  61. package/server/tools/personas.py +110 -0
  62. package/server/tools/registry.py +195 -0
  63. package/server/tools/router.py +117 -0
  64. package/server/tools/skill_installer.py +230 -0
  65. package/server/tools/summarizer.py +168 -0
  66. package/server/tools/tests/__init__.py +0 -0
  67. package/server/tools/tests/test_agents.py +246 -0
  68. package/server/tools/tests/test_code_index.py +108 -0
  69. package/server/tools/tests/test_ecc_tools.py +51 -0
  70. package/server/tools/tests/test_executor.py +584 -0
  71. package/server/tools/tests/test_help_agent101.py +149 -0
  72. package/server/tools/tests/test_hooks.py +124 -0
  73. package/server/tools/tests/test_kill_thread.py +125 -0
  74. package/server/tools/tests/test_new_thread_list_threads.py +293 -0
  75. package/server/tools/tests/test_personas.py +52 -0
  76. package/server/tools/tests/test_recall_memory.py +55 -0
  77. package/server/tools/tests/test_registry_client.py +308 -0
  78. package/server/tools/tests/test_router.py +263 -0
  79. package/server/tools/tests/test_skill_installer.py +174 -0
  80. package/server/tools/tests/test_switch_thread.py +163 -0
  81. package/server/tools/tests/test_thread_command_skills.py +54 -0
  82. package/server/tools/tests/test_thread_resolver.py +165 -0
  83. package/server/tools/tests/test_tier1_schema.py +296 -0
  84. package/server/tools/thread_resolver.py +75 -0
  85. package/server/tools/tylor.py +374 -0
  86. package/server/tools/ui.py +38 -0
  87. package/server/ui_server.py +292 -0
  88. package/server/validate.py +237 -0
  89. package/skills/add-skill/SKILL.md +37 -0
  90. package/skills/afk-status/SKILL.md +20 -0
  91. package/skills/bmad/SKILL.md +14 -0
  92. package/skills/help-agent101/SKILL.md +48 -0
  93. package/skills/kill-thread/SKILL.md +35 -0
  94. package/skills/list-threads/SKILL.md +35 -0
  95. package/skills/new-thread/SKILL.md +35 -0
  96. package/skills/recall/SKILL.md +39 -0
  97. package/skills/run/SKILL.md +33 -0
  98. package/skills/set-sandbox/SKILL.md +38 -0
  99. package/skills/switch-thread/SKILL.md +38 -0
  100. package/ui/claude-logo.png +0 -0
  101. package/ui/index.html +1314 -0
@@ -0,0 +1,584 @@
1
+ """
2
+ Tests for Story 5.1: sandbox path declaration.
3
+ Run: pytest server/tools/tests/test_executor.py -v
4
+ """
5
+ from pathlib import Path
6
+ from unittest.mock import MagicMock, patch
7
+
8
+ import pytest
9
+ from mcp.server.fastmcp.exceptions import ToolError
10
+
11
+
12
+ PLUGIN_DIR = Path(__file__).parent.parent.parent.parent
13
+ SKILLS_DIR = PLUGIN_DIR / "skills"
14
+
15
+
16
+ def test_set_sandbox_skill_file_exists_and_mentions_tool():
17
+ path = SKILLS_DIR / "set-sandbox" / "SKILL.md"
18
+ assert path.exists()
19
+ text = path.read_text(encoding="utf-8")
20
+ assert text.startswith("---\n")
21
+ assert "name: set-sandbox" in text
22
+ assert "set_sandbox" in text
23
+ assert "clear" in text
24
+ assert "execute_in_sandbox" in text
25
+
26
+
27
+ def test_afk_status_skill_file_exists_and_mentions_tool():
28
+ path = SKILLS_DIR / "afk-status" / "SKILL.md"
29
+ assert path.exists()
30
+ text = path.read_text(encoding="utf-8")
31
+ assert text.startswith("---\n")
32
+ assert "name: afk-status" in text
33
+ assert "afk_status" in text
34
+ assert "No AFK session running" in text
35
+
36
+
37
+ def test_set_sandbox_validates_absolute_existing_path(tmp_path):
38
+ from server.tools import executor as executor_mod
39
+
40
+ mock_db = MagicMock()
41
+ mock_db.resolve_thread_id.return_value = "t1"
42
+ mock_db.get_thread_meta.return_value = {"SK": "THREAD#t1#META", "sandbox_roots": []}
43
+ mock_db.set_sandbox_roots.return_value = {
44
+ "SK": "THREAD#t1#META",
45
+ "sandbox_roots": [str(tmp_path)],
46
+ }
47
+
48
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
49
+ result = executor_mod.set_sandbox(str(tmp_path))
50
+
51
+ assert result == {
52
+ "status": "set",
53
+ "thread_id": "t1",
54
+ "sandbox_roots": [str(tmp_path)],
55
+ "message": f"Sandbox set to {tmp_path} — executor will reject any path outside this root",
56
+ }
57
+ mock_db.set_sandbox_roots.assert_called_once_with("t1", [str(tmp_path)])
58
+
59
+
60
+ def test_set_sandbox_rejects_relative_or_missing_paths(tmp_path):
61
+ from server.tools import executor as executor_mod
62
+
63
+ with pytest.raises(ToolError, match="Sandbox path must be absolute and exist"):
64
+ executor_mod.set_sandbox("relative/path", thread_id="t1")
65
+
66
+ missing = tmp_path / "missing"
67
+ with pytest.raises(ToolError, match="Sandbox path must be absolute and exist"):
68
+ executor_mod.set_sandbox(str(missing), thread_id="t1")
69
+
70
+
71
+ def test_set_sandbox_appends_unique_roots(tmp_path):
72
+ from server.tools import executor as executor_mod
73
+
74
+ first = tmp_path / "one"
75
+ second = tmp_path / "two"
76
+ first.mkdir()
77
+ second.mkdir()
78
+
79
+ mock_db = MagicMock()
80
+ mock_db.resolve_thread_id.return_value = "t1"
81
+ mock_db.get_thread_meta.return_value = {
82
+ "SK": "THREAD#t1#META",
83
+ "sandbox_roots": [str(first)],
84
+ }
85
+ mock_db.set_sandbox_roots.return_value = {
86
+ "SK": "THREAD#t1#META",
87
+ "sandbox_roots": [str(first), str(second)],
88
+ }
89
+
90
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
91
+ result = executor_mod.set_sandbox(str(second))
92
+
93
+ assert result["sandbox_roots"] == [str(first), str(second)]
94
+ mock_db.set_sandbox_roots.assert_called_once_with("t1", [str(first), str(second)])
95
+
96
+
97
+ def test_set_sandbox_clear_empties_roots():
98
+ from server.tools import executor as executor_mod
99
+
100
+ mock_db = MagicMock()
101
+ mock_db.resolve_thread_id.return_value = "t1"
102
+ mock_db.set_sandbox_roots.return_value = {
103
+ "SK": "THREAD#t1#META",
104
+ "sandbox_roots": [],
105
+ }
106
+
107
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
108
+ result = executor_mod.set_sandbox("clear")
109
+
110
+ assert result == {
111
+ "status": "cleared",
112
+ "thread_id": "t1",
113
+ "sandbox_roots": [],
114
+ "message": "Sandbox cleared — execution tools will refuse all path operations until a new sandbox is set",
115
+ }
116
+ mock_db.set_sandbox_roots.assert_called_once_with("t1", [])
117
+
118
+
119
+ def test_execute_in_sandbox_rejects_when_no_sandbox_configured():
120
+ from server.tools import executor as executor_mod
121
+
122
+ mock_db = MagicMock()
123
+ mock_db.resolve_thread_id.return_value = "t1"
124
+ mock_db.get_thread_meta.return_value = {"SK": "THREAD#t1#META", "sandbox_roots": []}
125
+
126
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
127
+ with pytest.raises(ToolError, match="No sandbox configured"):
128
+ executor_mod.execute_in_sandbox(command="python3 tests/run.py")
129
+
130
+
131
+ def test_dynamo_set_sandbox_roots_updates_thread_meta():
132
+ from server.tools.tests.test_switch_thread import make_client
133
+
134
+ client, table = make_client()
135
+ thread_meta = {
136
+ "PK": "USER#testuser",
137
+ "SK": "THREAD#t1#META",
138
+ "CreatedAt": "2026-05-12T08:00:00Z",
139
+ "UpdatedAt": "2026-05-12T08:00:00Z",
140
+ "Version": 1,
141
+ "Name": "thread-one",
142
+ "sandbox_roots": ["/tmp/old"],
143
+ }
144
+ table.get_item.return_value = {"Item": thread_meta}
145
+
146
+ written = client.set_sandbox_roots("t1", ["/tmp/new"])
147
+
148
+ assert written["sandbox_roots"] == ["/tmp/new"]
149
+ table.put_item.assert_called_once()
150
+ assert table.put_item.call_args.kwargs["Item"]["sandbox_roots"] == ["/tmp/new"]
151
+
152
+
153
+ def test_executor_tools_registered_as_tier1():
154
+ import asyncio
155
+ import server.main # noqa: F401
156
+ from server.tools._mcp import mcp
157
+
158
+ tools = asyncio.run(mcp.list_tools())
159
+ registered = {tool.name for tool in tools}
160
+ assert {"set_sandbox", "execute_in_sandbox"} <= registered
161
+
162
+
163
+ def _executor_db(thread_id: str, sandbox_roots: list[str]):
164
+ mock_db = MagicMock()
165
+ mock_db.resolve_thread_id.return_value = thread_id
166
+ mock_db.get_thread_meta.return_value = {
167
+ "SK": f"THREAD#{thread_id}#META",
168
+ "sandbox_roots": sandbox_roots,
169
+ }
170
+ return mock_db
171
+
172
+
173
+ def test_execute_in_sandbox_rejects_outside_absolute_path_and_logs(tmp_path):
174
+ from server.tools import executor as executor_mod
175
+
176
+ mock_db = _executor_db("t1", [str(tmp_path)])
177
+
178
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
179
+ with pytest.raises(executor_mod.SandboxViolation, match="Path /etc/passwd is outside sandbox"):
180
+ executor_mod.execute_in_sandbox(command="rm -rf /etc/passwd")
181
+
182
+ log_call = mock_db.put_item.call_args
183
+ assert log_call.args[0].startswith("THREAD#t1#MSG#")
184
+ assert log_call.args[1]["Type"] == "sandbox_violation"
185
+ assert log_call.args[1]["Command"] == "rm -rf /etc/passwd"
186
+ assert log_call.args[1]["Path"] == "/etc/passwd"
187
+
188
+
189
+ def test_execute_in_sandbox_rejects_symlink_escape(tmp_path):
190
+ from server.tools import executor as executor_mod
191
+
192
+ outside = tmp_path / "outside"
193
+ outside.mkdir()
194
+ sandbox = tmp_path / "sandbox"
195
+ sandbox.mkdir()
196
+ target = outside / "secret.txt"
197
+ target.write_text("secret", encoding="utf-8")
198
+ escape = sandbox / "escape"
199
+ escape.symlink_to(outside, target_is_directory=True)
200
+
201
+ mock_db = _executor_db("t1", [str(sandbox)])
202
+
203
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
204
+ with pytest.raises(executor_mod.SandboxViolation, match="outside sandbox"):
205
+ executor_mod.execute_in_sandbox(
206
+ command=f"cat {escape / 'secret.txt'}",
207
+ cwd=str(sandbox),
208
+ )
209
+
210
+ assert mock_db.put_item.call_args.args[1]["ResolvedPath"] == str(target.resolve())
211
+
212
+
213
+ def test_execute_in_sandbox_does_not_expand_shell_variables_outside_sandbox(tmp_path, monkeypatch):
214
+ from server.tools import executor as executor_mod
215
+
216
+ sandbox = tmp_path / "sandbox"
217
+ outside = tmp_path / "outside"
218
+ sandbox.mkdir()
219
+ outside.mkdir()
220
+ secret = outside / "secret.txt"
221
+ secret.write_text("secret", encoding="utf-8")
222
+ monkeypatch.setenv("ESCAPE_FILE", str(secret))
223
+
224
+ mock_db = _executor_db("t1", [str(sandbox)])
225
+
226
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
227
+ result = executor_mod.execute_in_sandbox(command="cat $ESCAPE_FILE", cwd=str(sandbox))
228
+
229
+ assert result["exit_code"] != 0
230
+ assert "secret" not in result["stdout"]
231
+
232
+
233
+ def test_execute_in_sandbox_runs_valid_command_and_logs_summary(tmp_path):
234
+ from server.tools import executor as executor_mod
235
+
236
+ run_py = tmp_path / "run.py"
237
+ run_py.write_text("print('ok')\n", encoding="utf-8")
238
+ mock_db = _executor_db("t1", [str(tmp_path)])
239
+
240
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
241
+ result = executor_mod.execute_in_sandbox(
242
+ command=f"python3 {run_py.name}",
243
+ cwd=str(tmp_path),
244
+ )
245
+
246
+ assert result["status"] == "completed"
247
+ assert result["exit_code"] == 0
248
+ assert result["stdout"].strip() == "ok"
249
+ assert result["stderr"] == ""
250
+ assert result["duration_ms"] >= 0
251
+ log_attrs = mock_db.put_item.call_args.args[1]
252
+ assert log_attrs["Type"] == "sandbox_execution"
253
+ assert log_attrs["Command"] == f"python3 {run_py.name}"
254
+ assert log_attrs["ExitCode"] == 0
255
+
256
+
257
+ def test_execute_in_sandbox_timeout_kills_process_and_returns_partial_output(tmp_path):
258
+ from server.tools import executor as executor_mod
259
+
260
+ slow_py = tmp_path / "slow.py"
261
+ slow_py.write_text(
262
+ "import sys, time\n"
263
+ "print('started')\n"
264
+ "sys.stdout.flush()\n"
265
+ "time.sleep(5)\n",
266
+ encoding="utf-8",
267
+ )
268
+ mock_db = _executor_db("t1", [str(tmp_path)])
269
+
270
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
271
+ result = executor_mod.execute_in_sandbox(
272
+ command=f"python3 {slow_py.name}",
273
+ cwd=str(tmp_path),
274
+ timeout_seconds=1,
275
+ )
276
+
277
+ assert result["status"] == "timeout"
278
+ assert result["exit_code"] is None
279
+ assert "started" in result["stdout"]
280
+ assert result["message"] == "Command timed out after 1s — partial stdout captured"
281
+ assert mock_db.put_item.call_args.args[1]["Type"] == "sandbox_execution"
282
+ assert mock_db.put_item.call_args.args[1]["Outcome"] == "timeout"
283
+
284
+
285
+ def test_classify_execution_failure_transient_and_logic():
286
+ from server.tools import executor as executor_mod
287
+
288
+ assert executor_mod.classify_execution_failure(
289
+ "Connection reset by peer"
290
+ ) == "transient"
291
+ assert executor_mod.classify_execution_failure(
292
+ "ModuleNotFoundError: No module named 'httpx'"
293
+ ) == "logic"
294
+ assert executor_mod.classify_execution_failure("assert 1 == 2") == "logic"
295
+
296
+
297
+ def test_execute_with_recovery_retries_transient_failures_with_backoff():
298
+ from server.tools import executor as executor_mod
299
+
300
+ mock_db = _executor_db("t1", ["/tmp"])
301
+ attempts = [
302
+ {"status": "completed", "exit_code": 1, "stdout": "", "stderr": "network timeout", "duration_ms": 1},
303
+ {"status": "completed", "exit_code": 1, "stdout": "", "stderr": "temporary failure", "duration_ms": 1},
304
+ {"status": "completed", "exit_code": 0, "stdout": "ok", "stderr": "", "duration_ms": 1},
305
+ ]
306
+
307
+ with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
308
+ executor_mod, "execute_in_sandbox", side_effect=attempts
309
+ ) as execute, patch.object(executor_mod.time, "sleep") as sleep:
310
+ result = executor_mod.execute_with_recovery("pytest -q", cwd="/tmp")
311
+
312
+ assert result["status"] == "recovered"
313
+ assert result["classification"] == "transient"
314
+ assert execute.call_count == 3
315
+ assert [call.args[0] for call in sleep.call_args_list] == [5, 15]
316
+ assert mock_db.put_item.call_args.args[1]["Type"] == "recovery_decision"
317
+
318
+
319
+ def test_execute_with_recovery_autofixes_module_not_found_and_reruns():
320
+ from server.tools import executor as executor_mod
321
+
322
+ mock_db = _executor_db("t1", ["/tmp"])
323
+ attempts = [
324
+ {
325
+ "status": "completed",
326
+ "exit_code": 1,
327
+ "stdout": "",
328
+ "stderr": "ModuleNotFoundError: No module named 'httpx'",
329
+ "duration_ms": 1,
330
+ },
331
+ {"status": "completed", "exit_code": 0, "stdout": "installed", "stderr": "", "duration_ms": 1},
332
+ {"status": "completed", "exit_code": 0, "stdout": "ok", "stderr": "", "duration_ms": 1},
333
+ ]
334
+
335
+ with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
336
+ executor_mod, "execute_in_sandbox", side_effect=attempts
337
+ ) as execute:
338
+ result = executor_mod.execute_with_recovery("python3 app.py", cwd="/tmp")
339
+
340
+ assert result["status"] == "recovered"
341
+ assert result["classification"] == "logic"
342
+ assert execute.call_args_list[1].kwargs["command"] == "python3 -m pip install httpx"
343
+ assert execute.call_count == 3
344
+ assert (
345
+ mock_db.put_item.call_args.args[1]["Content"]
346
+ == "Failure: ModuleNotFoundError -> Auto-fix: pip install httpx -> Re-run: success"
347
+ )
348
+
349
+
350
+ def test_execute_with_recovery_rejects_unsafe_module_name():
351
+ from server.tools import executor as executor_mod
352
+
353
+ mock_db = _executor_db("t1", ["/tmp"])
354
+ failed = {
355
+ "status": "completed",
356
+ "exit_code": 1,
357
+ "stdout": "",
358
+ "stderr": "ModuleNotFoundError: No module named 'httpx;touch /tmp/pwned'",
359
+ "duration_ms": 1,
360
+ }
361
+
362
+ with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
363
+ executor_mod, "execute_in_sandbox", return_value=failed
364
+ ) as execute:
365
+ result = executor_mod.execute_with_recovery("python3 app.py", cwd="/tmp")
366
+
367
+ assert result["status"] == "paused"
368
+ assert execute.call_count == 1
369
+ assert result["classification"] == "logic"
370
+
371
+
372
+ def test_execute_with_recovery_exhaustion_writes_decision_log():
373
+ from server.tools import executor as executor_mod
374
+
375
+ mock_db = _executor_db("t1", ["/tmp"])
376
+ failed = {
377
+ "status": "completed",
378
+ "exit_code": 2,
379
+ "stdout": "",
380
+ "stderr": "pytest failed",
381
+ "duration_ms": 1,
382
+ }
383
+
384
+ with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
385
+ executor_mod, "execute_in_sandbox", return_value=failed
386
+ ):
387
+ result = executor_mod.execute_with_recovery("pytest -q", cwd="/tmp")
388
+
389
+ assert result["status"] == "paused"
390
+ assert result["classification"] == "logic"
391
+ log_attrs = mock_db.put_item.call_args.args[1]
392
+ assert log_attrs["Type"] == "recovery_decision"
393
+ assert log_attrs["OriginalCommand"] == "pytest -q"
394
+ assert log_attrs["RecommendedNextStep"] == "Developer input required before continuing AFK execution."
395
+
396
+
397
+ def test_execute_with_recovery_caps_total_attempts():
398
+ from server.tools import executor as executor_mod
399
+
400
+ mock_db = _executor_db("t1", ["/tmp"])
401
+ failed = {
402
+ "status": "completed",
403
+ "exit_code": 1,
404
+ "stdout": "",
405
+ "stderr": "network timeout",
406
+ "duration_ms": 1,
407
+ }
408
+
409
+ with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
410
+ executor_mod, "execute_in_sandbox", return_value=failed
411
+ ), patch.object(executor_mod.time, "sleep"):
412
+ result = executor_mod.execute_with_recovery(
413
+ "pytest -q",
414
+ cwd="/tmp",
415
+ recovery_attempts_used=5,
416
+ )
417
+
418
+ assert result["status"] == "paused"
419
+ assert result["message"] == "Recovery cap reached — pausing autonomous execution"
420
+ assert mock_db.put_item.call_args.args[1]["Content"] == "Recovery cap reached — pausing autonomous execution"
421
+
422
+
423
+ def test_start_afk_logs_plan_executes_steps_and_completion_summary():
424
+ from server.tools import executor as executor_mod
425
+
426
+ mock_db = _executor_db("t1", ["/tmp"])
427
+ attempts = [
428
+ {"status": "completed", "exit_code": 0, "stdout": "one\n", "stderr": "", "duration_ms": 1},
429
+ {"status": "completed", "exit_code": 0, "stdout": "two\n", "stderr": "", "duration_ms": 1},
430
+ ]
431
+
432
+ with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
433
+ executor_mod, "execute_in_sandbox", side_effect=attempts
434
+ ) as execute:
435
+ result = executor_mod.start_afk(
436
+ task="run the checks",
437
+ steps=["echo one", "echo two"],
438
+ cwd="/tmp",
439
+ )
440
+
441
+ assert result["status"] == "completed"
442
+ assert result["message"] == "Task complete — see thread t1 for full execution log"
443
+ assert [call.kwargs["command"] for call in execute.call_args_list] == ["echo one", "echo two"]
444
+
445
+ log_items = [call.args[1] for call in mock_db.put_item.call_args_list if "Type" in call.args[1]]
446
+ log_types = [item["Type"] for item in log_items]
447
+ assert log_types.count("afk_plan") == 1
448
+ assert log_types.count("afk_step") == 2
449
+ assert log_types.count("afk_completion") == 1
450
+ completion = [item for item in log_items if item["Type"] == "afk_completion"][0]
451
+ assert completion["TaskDescription"] == "run the checks"
452
+ assert completion["Tests"] == "not_run"
453
+ assert completion["FilesModified"] == []
454
+
455
+
456
+ def test_start_afk_uses_sandbox_root_for_file_summary_when_cwd_omitted(tmp_path):
457
+ from server.tools import executor as executor_mod
458
+
459
+ mock_db = _executor_db("t1", [str(tmp_path)])
460
+ success = {"status": "completed", "exit_code": 0, "stdout": "ok\n", "stderr": "", "duration_ms": 1}
461
+ modified = [{"path": "app.py", "status": "M", "diff_summary": "1 file changed"}]
462
+
463
+ with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
464
+ executor_mod, "execute_in_sandbox", return_value=success
465
+ ), patch.object(executor_mod, "_files_modified_summary", return_value=modified) as summary:
466
+ result = executor_mod.start_afk(task="ship", steps=["pytest -q"])
467
+
468
+ summary.assert_called_once_with(str(tmp_path))
469
+ assert result["files_modified"] == modified
470
+
471
+
472
+ def test_start_afk_background_returns_without_running_steps_inline(tmp_path):
473
+ from server.tools import executor as executor_mod
474
+
475
+ mock_db = _executor_db("t1", [str(tmp_path)])
476
+ mock_thread = MagicMock()
477
+
478
+ with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
479
+ executor_mod.threading, "Thread", return_value=mock_thread
480
+ ), patch.object(executor_mod, "execute_in_sandbox") as execute:
481
+ result = executor_mod.start_afk(
482
+ task="long task",
483
+ steps=["python3 slow.py", "pytest -q"],
484
+ background=True,
485
+ )
486
+
487
+ assert result["status"] == "started"
488
+ assert result["current_step"] == "python3 slow.py"
489
+ assert result["steps_total"] == 2
490
+ mock_thread.start.assert_called_once()
491
+ execute.assert_not_called()
492
+
493
+
494
+ def test_start_afk_pauses_at_safe_checkpoint_when_pause_requested():
495
+ from server.tools import executor as executor_mod
496
+
497
+ mock_db = _executor_db("t1", ["/tmp"])
498
+ active = {"SK": "THREAD#t1#META", "sandbox_roots": ["/tmp"], "afk_session": {"status": "active"}}
499
+ pause_requested = {
500
+ "SK": "THREAD#t1#META",
501
+ "sandbox_roots": ["/tmp"],
502
+ "afk_session": {"status": "pause_requested"},
503
+ }
504
+ mock_db.get_thread_meta.side_effect = [active, active, pause_requested]
505
+ success = {"status": "completed", "exit_code": 0, "stdout": "ok\n", "stderr": "", "duration_ms": 1}
506
+
507
+ with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
508
+ executor_mod, "execute_in_sandbox", return_value=success
509
+ ):
510
+ result = executor_mod.start_afk(
511
+ task="run two steps",
512
+ steps=["echo ok", "echo should-not-run"],
513
+ cwd="/tmp",
514
+ )
515
+
516
+ assert result["status"] == "paused"
517
+ assert result["current_step"] == "echo should-not-run"
518
+ assert (
519
+ result["message"]
520
+ == "AFK paused — here's where I am: echo should-not-run. Type 'resume' to continue or give new instructions"
521
+ )
522
+
523
+
524
+ def test_pause_afk_marks_session_for_checkpoint_pause():
525
+ from server.tools import executor as executor_mod
526
+
527
+ mock_db = _executor_db("t1", ["/tmp"])
528
+ mock_db.get_thread_meta.return_value = {
529
+ "SK": "THREAD#t1#META",
530
+ "sandbox_roots": ["/tmp"],
531
+ "afk_session": {
532
+ "status": "active",
533
+ "current_step": "pytest -q",
534
+ "steps_completed": 1,
535
+ "steps_total": 3,
536
+ },
537
+ }
538
+
539
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
540
+ result = executor_mod.pause_afk()
541
+
542
+ assert result["status"] == "pause_requested"
543
+ assert (
544
+ result["message"]
545
+ == "AFK paused — here's where I am: pytest -q. Type 'resume' to continue or give new instructions"
546
+ )
547
+ written_meta = mock_db.put_item.call_args.args[1]
548
+ assert written_meta["afk_session"]["status"] == "pause_requested"
549
+ assert written_meta["afk_session"]["pause_requested"] is True
550
+
551
+
552
+ def test_afk_status_reports_active_session_and_idle_message():
553
+ from server.tools import executor as executor_mod
554
+
555
+ mock_db = _executor_db("t1", ["/tmp"])
556
+ mock_db.get_thread_meta.return_value = {
557
+ "SK": "THREAD#t1#META",
558
+ "sandbox_roots": ["/tmp"],
559
+ "afk_session": {
560
+ "status": "active",
561
+ "task": "ship it",
562
+ "current_step": "pytest -q",
563
+ "steps_completed": 2,
564
+ "steps_total": 4,
565
+ "started_at_monotonic": "2026-05-14T10:00:00Z", # ISO string, not float
566
+ "last_command_output": "last line",
567
+ },
568
+ }
569
+
570
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
571
+ result = executor_mod.afk_status()
572
+
573
+ assert result["status"] == "active"
574
+ assert result["current_step"] == "pytest -q"
575
+ assert result["steps_completed"] == 2
576
+ assert result["steps_total"] == 4
577
+ assert result["elapsed_seconds"] is not None # just verify it computes without crash
578
+ assert result["last_command_output"] == "last line"
579
+
580
+ mock_db.get_thread_meta.return_value = {"SK": "THREAD#t1#META", "sandbox_roots": ["/tmp"]}
581
+ with patch.object(executor_mod, "_get_db", return_value=mock_db):
582
+ idle = executor_mod.afk_status()
583
+
584
+ assert idle == {"status": "idle", "message": "No AFK session running"}