tylor-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.aws-setup.sh +25 -0
- package/.claude-plugin/plugin.json +22 -0
- package/.mcp.json +12 -0
- package/AGENTS.md +93 -0
- package/CLAUDE.md +99 -0
- package/CLAUDE_PLATFORM_AWS_SETUP.md +105 -0
- package/LICENSE +21 -0
- package/README.md +146 -0
- package/assets/tylor_logo.png +0 -0
- package/assets/tylor_threads_concept.png +0 -0
- package/bin/tylor.js +23 -0
- package/hooks/kill-thread-trigger.sh +7 -0
- package/hooks/post-tool-use-code-index.sh +7 -0
- package/hooks/session-checkpoint.sh +7 -0
- package/hooks/session-start.sh +7 -0
- package/install.py +401 -0
- package/install.sh +260 -0
- package/package.json +24 -0
- package/pytest.ini +2 -0
- package/registry.json +26 -0
- package/server/.env.example +24 -0
- package/server/__init__.py +0 -0
- package/server/config.py +89 -0
- package/server/main.py +93 -0
- package/server/personas/analyst.md +15 -0
- package/server/personas/ceo.md +14 -0
- package/server/personas/code_agent.md +15 -0
- package/server/personas/cto.md +14 -0
- package/server/provision.py +260 -0
- package/server/provision_opensearch.py +154 -0
- package/server/requirements.txt +26 -0
- package/server/storage/__init__.py +0 -0
- package/server/storage/dynamo.py +399 -0
- package/server/storage/json_store.py +359 -0
- package/server/storage/opensearch.py +194 -0
- package/server/storage/s3.py +96 -0
- package/server/storage/tests/__init__.py +0 -0
- package/server/storage/tests/test_dynamo.py +452 -0
- package/server/storage/tests/test_json_store.py +226 -0
- package/server/storage/tests/test_opensearch.py +270 -0
- package/server/storage/tests/test_s3.py +125 -0
- package/server/tests/__init__.py +0 -0
- package/server/tests/test_install.py +606 -0
- package/server/tests/test_isolation.py +90 -0
- package/server/tests/test_ui_server.py +385 -0
- package/server/tests/test_ui_shader_background.py +52 -0
- package/server/tests/test_ui_story_6_3.py +105 -0
- package/server/tools/__init__.py +0 -0
- package/server/tools/_mcp.py +4 -0
- package/server/tools/agents.py +160 -0
- package/server/tools/ecc/__init__.py +1 -0
- package/server/tools/ecc/data.py +35 -0
- package/server/tools/ecc/diagrams.py +23 -0
- package/server/tools/ecc/pipeline.py +24 -0
- package/server/tools/ecc/presentation.py +24 -0
- package/server/tools/ecc/web.py +23 -0
- package/server/tools/executor.py +880 -0
- package/server/tools/harness.py +330 -0
- package/server/tools/help.py +162 -0
- package/server/tools/hooks.py +357 -0
- package/server/tools/personas.py +110 -0
- package/server/tools/registry.py +195 -0
- package/server/tools/router.py +117 -0
- package/server/tools/skill_installer.py +230 -0
- package/server/tools/summarizer.py +168 -0
- package/server/tools/tests/__init__.py +0 -0
- package/server/tools/tests/test_agents.py +246 -0
- package/server/tools/tests/test_code_index.py +108 -0
- package/server/tools/tests/test_ecc_tools.py +51 -0
- package/server/tools/tests/test_executor.py +584 -0
- package/server/tools/tests/test_help_agent101.py +149 -0
- package/server/tools/tests/test_hooks.py +124 -0
- package/server/tools/tests/test_kill_thread.py +125 -0
- package/server/tools/tests/test_new_thread_list_threads.py +293 -0
- package/server/tools/tests/test_personas.py +52 -0
- package/server/tools/tests/test_recall_memory.py +55 -0
- package/server/tools/tests/test_registry_client.py +308 -0
- package/server/tools/tests/test_router.py +263 -0
- package/server/tools/tests/test_skill_installer.py +174 -0
- package/server/tools/tests/test_switch_thread.py +163 -0
- package/server/tools/tests/test_thread_command_skills.py +54 -0
- package/server/tools/tests/test_thread_resolver.py +165 -0
- package/server/tools/tests/test_tier1_schema.py +296 -0
- package/server/tools/thread_resolver.py +75 -0
- package/server/tools/tylor.py +374 -0
- package/server/tools/ui.py +38 -0
- package/server/ui_server.py +292 -0
- package/server/validate.py +237 -0
- package/skills/add-skill/SKILL.md +37 -0
- package/skills/afk-status/SKILL.md +20 -0
- package/skills/bmad/SKILL.md +14 -0
- package/skills/help-agent101/SKILL.md +48 -0
- package/skills/kill-thread/SKILL.md +35 -0
- package/skills/list-threads/SKILL.md +35 -0
- package/skills/new-thread/SKILL.md +35 -0
- package/skills/recall/SKILL.md +39 -0
- package/skills/run/SKILL.md +33 -0
- package/skills/set-sandbox/SKILL.md +38 -0
- package/skills/switch-thread/SKILL.md +38 -0
- package/ui/claude-logo.png +0 -0
- package/ui/index.html +1314 -0
|
@@ -0,0 +1,584 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for Story 5.1: sandbox path declaration.
|
|
3
|
+
Run: pytest server/tools/tests/test_executor.py -v
|
|
4
|
+
"""
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from unittest.mock import MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
from mcp.server.fastmcp.exceptions import ToolError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
PLUGIN_DIR = Path(__file__).parent.parent.parent.parent
|
|
13
|
+
SKILLS_DIR = PLUGIN_DIR / "skills"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_set_sandbox_skill_file_exists_and_mentions_tool():
|
|
17
|
+
path = SKILLS_DIR / "set-sandbox" / "SKILL.md"
|
|
18
|
+
assert path.exists()
|
|
19
|
+
text = path.read_text(encoding="utf-8")
|
|
20
|
+
assert text.startswith("---\n")
|
|
21
|
+
assert "name: set-sandbox" in text
|
|
22
|
+
assert "set_sandbox" in text
|
|
23
|
+
assert "clear" in text
|
|
24
|
+
assert "execute_in_sandbox" in text
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_afk_status_skill_file_exists_and_mentions_tool():
|
|
28
|
+
path = SKILLS_DIR / "afk-status" / "SKILL.md"
|
|
29
|
+
assert path.exists()
|
|
30
|
+
text = path.read_text(encoding="utf-8")
|
|
31
|
+
assert text.startswith("---\n")
|
|
32
|
+
assert "name: afk-status" in text
|
|
33
|
+
assert "afk_status" in text
|
|
34
|
+
assert "No AFK session running" in text
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_set_sandbox_validates_absolute_existing_path(tmp_path):
|
|
38
|
+
from server.tools import executor as executor_mod
|
|
39
|
+
|
|
40
|
+
mock_db = MagicMock()
|
|
41
|
+
mock_db.resolve_thread_id.return_value = "t1"
|
|
42
|
+
mock_db.get_thread_meta.return_value = {"SK": "THREAD#t1#META", "sandbox_roots": []}
|
|
43
|
+
mock_db.set_sandbox_roots.return_value = {
|
|
44
|
+
"SK": "THREAD#t1#META",
|
|
45
|
+
"sandbox_roots": [str(tmp_path)],
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
49
|
+
result = executor_mod.set_sandbox(str(tmp_path))
|
|
50
|
+
|
|
51
|
+
assert result == {
|
|
52
|
+
"status": "set",
|
|
53
|
+
"thread_id": "t1",
|
|
54
|
+
"sandbox_roots": [str(tmp_path)],
|
|
55
|
+
"message": f"Sandbox set to {tmp_path} — executor will reject any path outside this root",
|
|
56
|
+
}
|
|
57
|
+
mock_db.set_sandbox_roots.assert_called_once_with("t1", [str(tmp_path)])
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_set_sandbox_rejects_relative_or_missing_paths(tmp_path):
|
|
61
|
+
from server.tools import executor as executor_mod
|
|
62
|
+
|
|
63
|
+
with pytest.raises(ToolError, match="Sandbox path must be absolute and exist"):
|
|
64
|
+
executor_mod.set_sandbox("relative/path", thread_id="t1")
|
|
65
|
+
|
|
66
|
+
missing = tmp_path / "missing"
|
|
67
|
+
with pytest.raises(ToolError, match="Sandbox path must be absolute and exist"):
|
|
68
|
+
executor_mod.set_sandbox(str(missing), thread_id="t1")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_set_sandbox_appends_unique_roots(tmp_path):
|
|
72
|
+
from server.tools import executor as executor_mod
|
|
73
|
+
|
|
74
|
+
first = tmp_path / "one"
|
|
75
|
+
second = tmp_path / "two"
|
|
76
|
+
first.mkdir()
|
|
77
|
+
second.mkdir()
|
|
78
|
+
|
|
79
|
+
mock_db = MagicMock()
|
|
80
|
+
mock_db.resolve_thread_id.return_value = "t1"
|
|
81
|
+
mock_db.get_thread_meta.return_value = {
|
|
82
|
+
"SK": "THREAD#t1#META",
|
|
83
|
+
"sandbox_roots": [str(first)],
|
|
84
|
+
}
|
|
85
|
+
mock_db.set_sandbox_roots.return_value = {
|
|
86
|
+
"SK": "THREAD#t1#META",
|
|
87
|
+
"sandbox_roots": [str(first), str(second)],
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
91
|
+
result = executor_mod.set_sandbox(str(second))
|
|
92
|
+
|
|
93
|
+
assert result["sandbox_roots"] == [str(first), str(second)]
|
|
94
|
+
mock_db.set_sandbox_roots.assert_called_once_with("t1", [str(first), str(second)])
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_set_sandbox_clear_empties_roots():
|
|
98
|
+
from server.tools import executor as executor_mod
|
|
99
|
+
|
|
100
|
+
mock_db = MagicMock()
|
|
101
|
+
mock_db.resolve_thread_id.return_value = "t1"
|
|
102
|
+
mock_db.set_sandbox_roots.return_value = {
|
|
103
|
+
"SK": "THREAD#t1#META",
|
|
104
|
+
"sandbox_roots": [],
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
108
|
+
result = executor_mod.set_sandbox("clear")
|
|
109
|
+
|
|
110
|
+
assert result == {
|
|
111
|
+
"status": "cleared",
|
|
112
|
+
"thread_id": "t1",
|
|
113
|
+
"sandbox_roots": [],
|
|
114
|
+
"message": "Sandbox cleared — execution tools will refuse all path operations until a new sandbox is set",
|
|
115
|
+
}
|
|
116
|
+
mock_db.set_sandbox_roots.assert_called_once_with("t1", [])
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_execute_in_sandbox_rejects_when_no_sandbox_configured():
|
|
120
|
+
from server.tools import executor as executor_mod
|
|
121
|
+
|
|
122
|
+
mock_db = MagicMock()
|
|
123
|
+
mock_db.resolve_thread_id.return_value = "t1"
|
|
124
|
+
mock_db.get_thread_meta.return_value = {"SK": "THREAD#t1#META", "sandbox_roots": []}
|
|
125
|
+
|
|
126
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
127
|
+
with pytest.raises(ToolError, match="No sandbox configured"):
|
|
128
|
+
executor_mod.execute_in_sandbox(command="python3 tests/run.py")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_dynamo_set_sandbox_roots_updates_thread_meta():
|
|
132
|
+
from server.tools.tests.test_switch_thread import make_client
|
|
133
|
+
|
|
134
|
+
client, table = make_client()
|
|
135
|
+
thread_meta = {
|
|
136
|
+
"PK": "USER#testuser",
|
|
137
|
+
"SK": "THREAD#t1#META",
|
|
138
|
+
"CreatedAt": "2026-05-12T08:00:00Z",
|
|
139
|
+
"UpdatedAt": "2026-05-12T08:00:00Z",
|
|
140
|
+
"Version": 1,
|
|
141
|
+
"Name": "thread-one",
|
|
142
|
+
"sandbox_roots": ["/tmp/old"],
|
|
143
|
+
}
|
|
144
|
+
table.get_item.return_value = {"Item": thread_meta}
|
|
145
|
+
|
|
146
|
+
written = client.set_sandbox_roots("t1", ["/tmp/new"])
|
|
147
|
+
|
|
148
|
+
assert written["sandbox_roots"] == ["/tmp/new"]
|
|
149
|
+
table.put_item.assert_called_once()
|
|
150
|
+
assert table.put_item.call_args.kwargs["Item"]["sandbox_roots"] == ["/tmp/new"]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_executor_tools_registered_as_tier1():
|
|
154
|
+
import asyncio
|
|
155
|
+
import server.main # noqa: F401
|
|
156
|
+
from server.tools._mcp import mcp
|
|
157
|
+
|
|
158
|
+
tools = asyncio.run(mcp.list_tools())
|
|
159
|
+
registered = {tool.name for tool in tools}
|
|
160
|
+
assert {"set_sandbox", "execute_in_sandbox"} <= registered
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _executor_db(thread_id: str, sandbox_roots: list[str]):
|
|
164
|
+
mock_db = MagicMock()
|
|
165
|
+
mock_db.resolve_thread_id.return_value = thread_id
|
|
166
|
+
mock_db.get_thread_meta.return_value = {
|
|
167
|
+
"SK": f"THREAD#{thread_id}#META",
|
|
168
|
+
"sandbox_roots": sandbox_roots,
|
|
169
|
+
}
|
|
170
|
+
return mock_db
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def test_execute_in_sandbox_rejects_outside_absolute_path_and_logs(tmp_path):
|
|
174
|
+
from server.tools import executor as executor_mod
|
|
175
|
+
|
|
176
|
+
mock_db = _executor_db("t1", [str(tmp_path)])
|
|
177
|
+
|
|
178
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
179
|
+
with pytest.raises(executor_mod.SandboxViolation, match="Path /etc/passwd is outside sandbox"):
|
|
180
|
+
executor_mod.execute_in_sandbox(command="rm -rf /etc/passwd")
|
|
181
|
+
|
|
182
|
+
log_call = mock_db.put_item.call_args
|
|
183
|
+
assert log_call.args[0].startswith("THREAD#t1#MSG#")
|
|
184
|
+
assert log_call.args[1]["Type"] == "sandbox_violation"
|
|
185
|
+
assert log_call.args[1]["Command"] == "rm -rf /etc/passwd"
|
|
186
|
+
assert log_call.args[1]["Path"] == "/etc/passwd"
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_execute_in_sandbox_rejects_symlink_escape(tmp_path):
|
|
190
|
+
from server.tools import executor as executor_mod
|
|
191
|
+
|
|
192
|
+
outside = tmp_path / "outside"
|
|
193
|
+
outside.mkdir()
|
|
194
|
+
sandbox = tmp_path / "sandbox"
|
|
195
|
+
sandbox.mkdir()
|
|
196
|
+
target = outside / "secret.txt"
|
|
197
|
+
target.write_text("secret", encoding="utf-8")
|
|
198
|
+
escape = sandbox / "escape"
|
|
199
|
+
escape.symlink_to(outside, target_is_directory=True)
|
|
200
|
+
|
|
201
|
+
mock_db = _executor_db("t1", [str(sandbox)])
|
|
202
|
+
|
|
203
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
204
|
+
with pytest.raises(executor_mod.SandboxViolation, match="outside sandbox"):
|
|
205
|
+
executor_mod.execute_in_sandbox(
|
|
206
|
+
command=f"cat {escape / 'secret.txt'}",
|
|
207
|
+
cwd=str(sandbox),
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
assert mock_db.put_item.call_args.args[1]["ResolvedPath"] == str(target.resolve())
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def test_execute_in_sandbox_does_not_expand_shell_variables_outside_sandbox(tmp_path, monkeypatch):
|
|
214
|
+
from server.tools import executor as executor_mod
|
|
215
|
+
|
|
216
|
+
sandbox = tmp_path / "sandbox"
|
|
217
|
+
outside = tmp_path / "outside"
|
|
218
|
+
sandbox.mkdir()
|
|
219
|
+
outside.mkdir()
|
|
220
|
+
secret = outside / "secret.txt"
|
|
221
|
+
secret.write_text("secret", encoding="utf-8")
|
|
222
|
+
monkeypatch.setenv("ESCAPE_FILE", str(secret))
|
|
223
|
+
|
|
224
|
+
mock_db = _executor_db("t1", [str(sandbox)])
|
|
225
|
+
|
|
226
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
227
|
+
result = executor_mod.execute_in_sandbox(command="cat $ESCAPE_FILE", cwd=str(sandbox))
|
|
228
|
+
|
|
229
|
+
assert result["exit_code"] != 0
|
|
230
|
+
assert "secret" not in result["stdout"]
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def test_execute_in_sandbox_runs_valid_command_and_logs_summary(tmp_path):
|
|
234
|
+
from server.tools import executor as executor_mod
|
|
235
|
+
|
|
236
|
+
run_py = tmp_path / "run.py"
|
|
237
|
+
run_py.write_text("print('ok')\n", encoding="utf-8")
|
|
238
|
+
mock_db = _executor_db("t1", [str(tmp_path)])
|
|
239
|
+
|
|
240
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
241
|
+
result = executor_mod.execute_in_sandbox(
|
|
242
|
+
command=f"python3 {run_py.name}",
|
|
243
|
+
cwd=str(tmp_path),
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
assert result["status"] == "completed"
|
|
247
|
+
assert result["exit_code"] == 0
|
|
248
|
+
assert result["stdout"].strip() == "ok"
|
|
249
|
+
assert result["stderr"] == ""
|
|
250
|
+
assert result["duration_ms"] >= 0
|
|
251
|
+
log_attrs = mock_db.put_item.call_args.args[1]
|
|
252
|
+
assert log_attrs["Type"] == "sandbox_execution"
|
|
253
|
+
assert log_attrs["Command"] == f"python3 {run_py.name}"
|
|
254
|
+
assert log_attrs["ExitCode"] == 0
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def test_execute_in_sandbox_timeout_kills_process_and_returns_partial_output(tmp_path):
|
|
258
|
+
from server.tools import executor as executor_mod
|
|
259
|
+
|
|
260
|
+
slow_py = tmp_path / "slow.py"
|
|
261
|
+
slow_py.write_text(
|
|
262
|
+
"import sys, time\n"
|
|
263
|
+
"print('started')\n"
|
|
264
|
+
"sys.stdout.flush()\n"
|
|
265
|
+
"time.sleep(5)\n",
|
|
266
|
+
encoding="utf-8",
|
|
267
|
+
)
|
|
268
|
+
mock_db = _executor_db("t1", [str(tmp_path)])
|
|
269
|
+
|
|
270
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
271
|
+
result = executor_mod.execute_in_sandbox(
|
|
272
|
+
command=f"python3 {slow_py.name}",
|
|
273
|
+
cwd=str(tmp_path),
|
|
274
|
+
timeout_seconds=1,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
assert result["status"] == "timeout"
|
|
278
|
+
assert result["exit_code"] is None
|
|
279
|
+
assert "started" in result["stdout"]
|
|
280
|
+
assert result["message"] == "Command timed out after 1s — partial stdout captured"
|
|
281
|
+
assert mock_db.put_item.call_args.args[1]["Type"] == "sandbox_execution"
|
|
282
|
+
assert mock_db.put_item.call_args.args[1]["Outcome"] == "timeout"
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def test_classify_execution_failure_transient_and_logic():
|
|
286
|
+
from server.tools import executor as executor_mod
|
|
287
|
+
|
|
288
|
+
assert executor_mod.classify_execution_failure(
|
|
289
|
+
"Connection reset by peer"
|
|
290
|
+
) == "transient"
|
|
291
|
+
assert executor_mod.classify_execution_failure(
|
|
292
|
+
"ModuleNotFoundError: No module named 'httpx'"
|
|
293
|
+
) == "logic"
|
|
294
|
+
assert executor_mod.classify_execution_failure("assert 1 == 2") == "logic"
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def test_execute_with_recovery_retries_transient_failures_with_backoff():
|
|
298
|
+
from server.tools import executor as executor_mod
|
|
299
|
+
|
|
300
|
+
mock_db = _executor_db("t1", ["/tmp"])
|
|
301
|
+
attempts = [
|
|
302
|
+
{"status": "completed", "exit_code": 1, "stdout": "", "stderr": "network timeout", "duration_ms": 1},
|
|
303
|
+
{"status": "completed", "exit_code": 1, "stdout": "", "stderr": "temporary failure", "duration_ms": 1},
|
|
304
|
+
{"status": "completed", "exit_code": 0, "stdout": "ok", "stderr": "", "duration_ms": 1},
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
|
|
308
|
+
executor_mod, "execute_in_sandbox", side_effect=attempts
|
|
309
|
+
) as execute, patch.object(executor_mod.time, "sleep") as sleep:
|
|
310
|
+
result = executor_mod.execute_with_recovery("pytest -q", cwd="/tmp")
|
|
311
|
+
|
|
312
|
+
assert result["status"] == "recovered"
|
|
313
|
+
assert result["classification"] == "transient"
|
|
314
|
+
assert execute.call_count == 3
|
|
315
|
+
assert [call.args[0] for call in sleep.call_args_list] == [5, 15]
|
|
316
|
+
assert mock_db.put_item.call_args.args[1]["Type"] == "recovery_decision"
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def test_execute_with_recovery_autofixes_module_not_found_and_reruns():
|
|
320
|
+
from server.tools import executor as executor_mod
|
|
321
|
+
|
|
322
|
+
mock_db = _executor_db("t1", ["/tmp"])
|
|
323
|
+
attempts = [
|
|
324
|
+
{
|
|
325
|
+
"status": "completed",
|
|
326
|
+
"exit_code": 1,
|
|
327
|
+
"stdout": "",
|
|
328
|
+
"stderr": "ModuleNotFoundError: No module named 'httpx'",
|
|
329
|
+
"duration_ms": 1,
|
|
330
|
+
},
|
|
331
|
+
{"status": "completed", "exit_code": 0, "stdout": "installed", "stderr": "", "duration_ms": 1},
|
|
332
|
+
{"status": "completed", "exit_code": 0, "stdout": "ok", "stderr": "", "duration_ms": 1},
|
|
333
|
+
]
|
|
334
|
+
|
|
335
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
|
|
336
|
+
executor_mod, "execute_in_sandbox", side_effect=attempts
|
|
337
|
+
) as execute:
|
|
338
|
+
result = executor_mod.execute_with_recovery("python3 app.py", cwd="/tmp")
|
|
339
|
+
|
|
340
|
+
assert result["status"] == "recovered"
|
|
341
|
+
assert result["classification"] == "logic"
|
|
342
|
+
assert execute.call_args_list[1].kwargs["command"] == "python3 -m pip install httpx"
|
|
343
|
+
assert execute.call_count == 3
|
|
344
|
+
assert (
|
|
345
|
+
mock_db.put_item.call_args.args[1]["Content"]
|
|
346
|
+
== "Failure: ModuleNotFoundError -> Auto-fix: pip install httpx -> Re-run: success"
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def test_execute_with_recovery_rejects_unsafe_module_name():
|
|
351
|
+
from server.tools import executor as executor_mod
|
|
352
|
+
|
|
353
|
+
mock_db = _executor_db("t1", ["/tmp"])
|
|
354
|
+
failed = {
|
|
355
|
+
"status": "completed",
|
|
356
|
+
"exit_code": 1,
|
|
357
|
+
"stdout": "",
|
|
358
|
+
"stderr": "ModuleNotFoundError: No module named 'httpx;touch /tmp/pwned'",
|
|
359
|
+
"duration_ms": 1,
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
|
|
363
|
+
executor_mod, "execute_in_sandbox", return_value=failed
|
|
364
|
+
) as execute:
|
|
365
|
+
result = executor_mod.execute_with_recovery("python3 app.py", cwd="/tmp")
|
|
366
|
+
|
|
367
|
+
assert result["status"] == "paused"
|
|
368
|
+
assert execute.call_count == 1
|
|
369
|
+
assert result["classification"] == "logic"
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def test_execute_with_recovery_exhaustion_writes_decision_log():
|
|
373
|
+
from server.tools import executor as executor_mod
|
|
374
|
+
|
|
375
|
+
mock_db = _executor_db("t1", ["/tmp"])
|
|
376
|
+
failed = {
|
|
377
|
+
"status": "completed",
|
|
378
|
+
"exit_code": 2,
|
|
379
|
+
"stdout": "",
|
|
380
|
+
"stderr": "pytest failed",
|
|
381
|
+
"duration_ms": 1,
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
|
|
385
|
+
executor_mod, "execute_in_sandbox", return_value=failed
|
|
386
|
+
):
|
|
387
|
+
result = executor_mod.execute_with_recovery("pytest -q", cwd="/tmp")
|
|
388
|
+
|
|
389
|
+
assert result["status"] == "paused"
|
|
390
|
+
assert result["classification"] == "logic"
|
|
391
|
+
log_attrs = mock_db.put_item.call_args.args[1]
|
|
392
|
+
assert log_attrs["Type"] == "recovery_decision"
|
|
393
|
+
assert log_attrs["OriginalCommand"] == "pytest -q"
|
|
394
|
+
assert log_attrs["RecommendedNextStep"] == "Developer input required before continuing AFK execution."
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def test_execute_with_recovery_caps_total_attempts():
|
|
398
|
+
from server.tools import executor as executor_mod
|
|
399
|
+
|
|
400
|
+
mock_db = _executor_db("t1", ["/tmp"])
|
|
401
|
+
failed = {
|
|
402
|
+
"status": "completed",
|
|
403
|
+
"exit_code": 1,
|
|
404
|
+
"stdout": "",
|
|
405
|
+
"stderr": "network timeout",
|
|
406
|
+
"duration_ms": 1,
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
|
|
410
|
+
executor_mod, "execute_in_sandbox", return_value=failed
|
|
411
|
+
), patch.object(executor_mod.time, "sleep"):
|
|
412
|
+
result = executor_mod.execute_with_recovery(
|
|
413
|
+
"pytest -q",
|
|
414
|
+
cwd="/tmp",
|
|
415
|
+
recovery_attempts_used=5,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
assert result["status"] == "paused"
|
|
419
|
+
assert result["message"] == "Recovery cap reached — pausing autonomous execution"
|
|
420
|
+
assert mock_db.put_item.call_args.args[1]["Content"] == "Recovery cap reached — pausing autonomous execution"
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def test_start_afk_logs_plan_executes_steps_and_completion_summary():
|
|
424
|
+
from server.tools import executor as executor_mod
|
|
425
|
+
|
|
426
|
+
mock_db = _executor_db("t1", ["/tmp"])
|
|
427
|
+
attempts = [
|
|
428
|
+
{"status": "completed", "exit_code": 0, "stdout": "one\n", "stderr": "", "duration_ms": 1},
|
|
429
|
+
{"status": "completed", "exit_code": 0, "stdout": "two\n", "stderr": "", "duration_ms": 1},
|
|
430
|
+
]
|
|
431
|
+
|
|
432
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
|
|
433
|
+
executor_mod, "execute_in_sandbox", side_effect=attempts
|
|
434
|
+
) as execute:
|
|
435
|
+
result = executor_mod.start_afk(
|
|
436
|
+
task="run the checks",
|
|
437
|
+
steps=["echo one", "echo two"],
|
|
438
|
+
cwd="/tmp",
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
assert result["status"] == "completed"
|
|
442
|
+
assert result["message"] == "Task complete — see thread t1 for full execution log"
|
|
443
|
+
assert [call.kwargs["command"] for call in execute.call_args_list] == ["echo one", "echo two"]
|
|
444
|
+
|
|
445
|
+
log_items = [call.args[1] for call in mock_db.put_item.call_args_list if "Type" in call.args[1]]
|
|
446
|
+
log_types = [item["Type"] for item in log_items]
|
|
447
|
+
assert log_types.count("afk_plan") == 1
|
|
448
|
+
assert log_types.count("afk_step") == 2
|
|
449
|
+
assert log_types.count("afk_completion") == 1
|
|
450
|
+
completion = [item for item in log_items if item["Type"] == "afk_completion"][0]
|
|
451
|
+
assert completion["TaskDescription"] == "run the checks"
|
|
452
|
+
assert completion["Tests"] == "not_run"
|
|
453
|
+
assert completion["FilesModified"] == []
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def test_start_afk_uses_sandbox_root_for_file_summary_when_cwd_omitted(tmp_path):
|
|
457
|
+
from server.tools import executor as executor_mod
|
|
458
|
+
|
|
459
|
+
mock_db = _executor_db("t1", [str(tmp_path)])
|
|
460
|
+
success = {"status": "completed", "exit_code": 0, "stdout": "ok\n", "stderr": "", "duration_ms": 1}
|
|
461
|
+
modified = [{"path": "app.py", "status": "M", "diff_summary": "1 file changed"}]
|
|
462
|
+
|
|
463
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
|
|
464
|
+
executor_mod, "execute_in_sandbox", return_value=success
|
|
465
|
+
), patch.object(executor_mod, "_files_modified_summary", return_value=modified) as summary:
|
|
466
|
+
result = executor_mod.start_afk(task="ship", steps=["pytest -q"])
|
|
467
|
+
|
|
468
|
+
summary.assert_called_once_with(str(tmp_path))
|
|
469
|
+
assert result["files_modified"] == modified
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def test_start_afk_background_returns_without_running_steps_inline(tmp_path):
|
|
473
|
+
from server.tools import executor as executor_mod
|
|
474
|
+
|
|
475
|
+
mock_db = _executor_db("t1", [str(tmp_path)])
|
|
476
|
+
mock_thread = MagicMock()
|
|
477
|
+
|
|
478
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
|
|
479
|
+
executor_mod.threading, "Thread", return_value=mock_thread
|
|
480
|
+
), patch.object(executor_mod, "execute_in_sandbox") as execute:
|
|
481
|
+
result = executor_mod.start_afk(
|
|
482
|
+
task="long task",
|
|
483
|
+
steps=["python3 slow.py", "pytest -q"],
|
|
484
|
+
background=True,
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
assert result["status"] == "started"
|
|
488
|
+
assert result["current_step"] == "python3 slow.py"
|
|
489
|
+
assert result["steps_total"] == 2
|
|
490
|
+
mock_thread.start.assert_called_once()
|
|
491
|
+
execute.assert_not_called()
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def test_start_afk_pauses_at_safe_checkpoint_when_pause_requested():
|
|
495
|
+
from server.tools import executor as executor_mod
|
|
496
|
+
|
|
497
|
+
mock_db = _executor_db("t1", ["/tmp"])
|
|
498
|
+
active = {"SK": "THREAD#t1#META", "sandbox_roots": ["/tmp"], "afk_session": {"status": "active"}}
|
|
499
|
+
pause_requested = {
|
|
500
|
+
"SK": "THREAD#t1#META",
|
|
501
|
+
"sandbox_roots": ["/tmp"],
|
|
502
|
+
"afk_session": {"status": "pause_requested"},
|
|
503
|
+
}
|
|
504
|
+
mock_db.get_thread_meta.side_effect = [active, active, pause_requested]
|
|
505
|
+
success = {"status": "completed", "exit_code": 0, "stdout": "ok\n", "stderr": "", "duration_ms": 1}
|
|
506
|
+
|
|
507
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db), patch.object(
|
|
508
|
+
executor_mod, "execute_in_sandbox", return_value=success
|
|
509
|
+
):
|
|
510
|
+
result = executor_mod.start_afk(
|
|
511
|
+
task="run two steps",
|
|
512
|
+
steps=["echo ok", "echo should-not-run"],
|
|
513
|
+
cwd="/tmp",
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
assert result["status"] == "paused"
|
|
517
|
+
assert result["current_step"] == "echo should-not-run"
|
|
518
|
+
assert (
|
|
519
|
+
result["message"]
|
|
520
|
+
== "AFK paused — here's where I am: echo should-not-run. Type 'resume' to continue or give new instructions"
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def test_pause_afk_marks_session_for_checkpoint_pause():
|
|
525
|
+
from server.tools import executor as executor_mod
|
|
526
|
+
|
|
527
|
+
mock_db = _executor_db("t1", ["/tmp"])
|
|
528
|
+
mock_db.get_thread_meta.return_value = {
|
|
529
|
+
"SK": "THREAD#t1#META",
|
|
530
|
+
"sandbox_roots": ["/tmp"],
|
|
531
|
+
"afk_session": {
|
|
532
|
+
"status": "active",
|
|
533
|
+
"current_step": "pytest -q",
|
|
534
|
+
"steps_completed": 1,
|
|
535
|
+
"steps_total": 3,
|
|
536
|
+
},
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
540
|
+
result = executor_mod.pause_afk()
|
|
541
|
+
|
|
542
|
+
assert result["status"] == "pause_requested"
|
|
543
|
+
assert (
|
|
544
|
+
result["message"]
|
|
545
|
+
== "AFK paused — here's where I am: pytest -q. Type 'resume' to continue or give new instructions"
|
|
546
|
+
)
|
|
547
|
+
written_meta = mock_db.put_item.call_args.args[1]
|
|
548
|
+
assert written_meta["afk_session"]["status"] == "pause_requested"
|
|
549
|
+
assert written_meta["afk_session"]["pause_requested"] is True
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def test_afk_status_reports_active_session_and_idle_message():
|
|
553
|
+
from server.tools import executor as executor_mod
|
|
554
|
+
|
|
555
|
+
mock_db = _executor_db("t1", ["/tmp"])
|
|
556
|
+
mock_db.get_thread_meta.return_value = {
|
|
557
|
+
"SK": "THREAD#t1#META",
|
|
558
|
+
"sandbox_roots": ["/tmp"],
|
|
559
|
+
"afk_session": {
|
|
560
|
+
"status": "active",
|
|
561
|
+
"task": "ship it",
|
|
562
|
+
"current_step": "pytest -q",
|
|
563
|
+
"steps_completed": 2,
|
|
564
|
+
"steps_total": 4,
|
|
565
|
+
"started_at_monotonic": "2026-05-14T10:00:00Z", # ISO string, not float
|
|
566
|
+
"last_command_output": "last line",
|
|
567
|
+
},
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
571
|
+
result = executor_mod.afk_status()
|
|
572
|
+
|
|
573
|
+
assert result["status"] == "active"
|
|
574
|
+
assert result["current_step"] == "pytest -q"
|
|
575
|
+
assert result["steps_completed"] == 2
|
|
576
|
+
assert result["steps_total"] == 4
|
|
577
|
+
assert result["elapsed_seconds"] is not None # just verify it computes without crash
|
|
578
|
+
assert result["last_command_output"] == "last line"
|
|
579
|
+
|
|
580
|
+
mock_db.get_thread_meta.return_value = {"SK": "THREAD#t1#META", "sandbox_roots": ["/tmp"]}
|
|
581
|
+
with patch.object(executor_mod, "_get_db", return_value=mock_db):
|
|
582
|
+
idle = executor_mod.afk_status()
|
|
583
|
+
|
|
584
|
+
assert idle == {"status": "idle", "message": "No AFK session running"}
|