flowent 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/pyproject.toml +31 -5
- package/backend/src/flowent/agent.py +13 -4
- package/backend/src/flowent/compact.py +35 -14
- package/backend/src/flowent/llm.py +73 -7
- package/backend/src/flowent/main.py +260 -59
- package/backend/src/flowent/static/assets/index-CRSV2xu1.css +2 -0
- package/backend/src/flowent/static/assets/index-DUYj6rgD.js +82 -0
- package/backend/src/flowent/static/index.html +2 -2
- package/backend/src/flowent/storage.py +135 -3
- package/backend/src/flowent/usage.py +315 -0
- package/backend/uv.lock +971 -3
- package/dist/frontend/assets/index-CRSV2xu1.css +2 -0
- package/dist/frontend/assets/index-DUYj6rgD.js +82 -0
- package/dist/frontend/index.html +2 -2
- package/package.json +24 -3
- package/backend/src/flowent/__pycache__/__init__.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/_version.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/agent.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/approval.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/channels.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/cli.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/compact.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/context.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/llm.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/logging.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/main.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp_import.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/patch.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/paths.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/permissions.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/sandbox.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/skills.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/storage.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/tools.cpython-313.pyc +0 -0
- package/backend/src/flowent/static/assets/index-BlaCigkZ.js +0 -82
- package/backend/src/flowent/static/assets/index-CRvbsH4K.css +0 -2
- package/backend/tests/__pycache__/conftest.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_agent_tools.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_approval.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_channels.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_health.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_llm_providers.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_logging.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_mcp.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_patch.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_permissions.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_persistence.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_skills.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_startup_requirements.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_workspace_chat.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/conftest.py +0 -60
- package/backend/tests/test_agent_tools.py +0 -1124
- package/backend/tests/test_approval.py +0 -283
- package/backend/tests/test_channels.py +0 -360
- package/backend/tests/test_health.py +0 -12
- package/backend/tests/test_llm_providers.py +0 -548
- package/backend/tests/test_logging.py +0 -212
- package/backend/tests/test_mcp.py +0 -788
- package/backend/tests/test_patch.py +0 -112
- package/backend/tests/test_permissions.py +0 -588
- package/backend/tests/test_persistence.py +0 -249
- package/backend/tests/test_skills.py +0 -462
- package/backend/tests/test_startup_requirements.py +0 -144
- package/backend/tests/test_workspace_chat.py +0 -2174
- package/dist/frontend/assets/index-BlaCigkZ.js +0 -82
- package/dist/frontend/assets/index-CRvbsH4K.css +0 -2
|
@@ -1,1124 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import json
|
|
3
|
-
import logging
|
|
4
|
-
import subprocess
|
|
5
|
-
import time
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
import pytest
|
|
9
|
-
from fastapi.testclient import TestClient
|
|
10
|
-
|
|
11
|
-
from flowent.agent import FLOWENT_AGENT_SYSTEM_PROMPT, run_agent_stream
|
|
12
|
-
from flowent.llm import ProviderConnection, ProviderFormat
|
|
13
|
-
from flowent.main import create_app
|
|
14
|
-
from flowent.sandbox import SandboxCommand, SandboxRunner
|
|
15
|
-
from flowent.tools import ToolContext, ToolResult, run_tool
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def stream_events(content: str) -> list[dict[str, object]]:
|
|
19
|
-
events: list[dict[str, object]] = []
|
|
20
|
-
for raw_event in content.strip().split("\n\n"):
|
|
21
|
-
event_type = ""
|
|
22
|
-
data = ""
|
|
23
|
-
for line in raw_event.splitlines():
|
|
24
|
-
if line.startswith("event: "):
|
|
25
|
-
event_type = line.removeprefix("event: ")
|
|
26
|
-
if line.startswith("data: "):
|
|
27
|
-
data = line.removeprefix("data: ")
|
|
28
|
-
events.append({"event": event_type, "data": json.loads(data)})
|
|
29
|
-
return events
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def configure_provider(client: TestClient) -> None:
|
|
33
|
-
client.post(
|
|
34
|
-
"/api/providers",
|
|
35
|
-
json={
|
|
36
|
-
"api_key": "sk-local",
|
|
37
|
-
"base_url": "",
|
|
38
|
-
"id": "provider-openai",
|
|
39
|
-
"models": ["gpt-5.1"],
|
|
40
|
-
"name": "OpenAI",
|
|
41
|
-
"type": "openai",
|
|
42
|
-
},
|
|
43
|
-
)
|
|
44
|
-
client.put(
|
|
45
|
-
"/api/settings",
|
|
46
|
-
json={
|
|
47
|
-
"reasoning_effort": "default",
|
|
48
|
-
"selected_model": "gpt-5.1",
|
|
49
|
-
"selected_provider_id": "provider-openai",
|
|
50
|
-
},
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def tool_call_chunk(
|
|
55
|
-
name: str, arguments: dict[str, object], call_id: str = "call-1"
|
|
56
|
-
) -> dict[str, object]:
|
|
57
|
-
return {
|
|
58
|
-
"choices": [
|
|
59
|
-
{
|
|
60
|
-
"delta": {
|
|
61
|
-
"tool_calls": [
|
|
62
|
-
{
|
|
63
|
-
"index": 0,
|
|
64
|
-
"id": call_id,
|
|
65
|
-
"type": "function",
|
|
66
|
-
"function": {
|
|
67
|
-
"name": name,
|
|
68
|
-
"arguments": json.dumps(arguments),
|
|
69
|
-
},
|
|
70
|
-
}
|
|
71
|
-
]
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
]
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def text_chunk(content: str) -> dict[str, object]:
|
|
79
|
-
return {"choices": [{"delta": {"content": content}}]}
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def thinking_chunk(content: str) -> dict[str, object]:
|
|
83
|
-
return {"choices": [{"delta": {"reasoning_content": content}}]}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def test_workspace_response_streams_tool_process_and_final_text(
|
|
87
|
-
tmp_path, monkeypatch
|
|
88
|
-
) -> None:
|
|
89
|
-
monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
|
|
90
|
-
workdir = tmp_path / "workdir"
|
|
91
|
-
workdir.mkdir()
|
|
92
|
-
(workdir / "notes.txt").write_text("Launch notes")
|
|
93
|
-
monkeypatch.chdir(workdir)
|
|
94
|
-
captured_requests: list[dict[str, object]] = []
|
|
95
|
-
|
|
96
|
-
async def fake_completion(**request: object) -> object:
|
|
97
|
-
captured_requests.append(request)
|
|
98
|
-
|
|
99
|
-
async def chunks() -> object:
|
|
100
|
-
if len(captured_requests) == 1:
|
|
101
|
-
yield tool_call_chunk("read_file", {"path": "notes.txt"})
|
|
102
|
-
else:
|
|
103
|
-
yield text_chunk("Read the notes.")
|
|
104
|
-
|
|
105
|
-
return chunks()
|
|
106
|
-
|
|
107
|
-
client = TestClient(
|
|
108
|
-
create_app(serve_frontend=False, chat_completion=fake_completion)
|
|
109
|
-
)
|
|
110
|
-
configure_provider(client)
|
|
111
|
-
|
|
112
|
-
response = client.post(
|
|
113
|
-
"/api/workspace/respond",
|
|
114
|
-
json={"content": "Use the notes."},
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
assert response.status_code == 200
|
|
118
|
-
events = stream_events(response.text)
|
|
119
|
-
assert [event["event"] for event in events] == [
|
|
120
|
-
"start",
|
|
121
|
-
"output_start",
|
|
122
|
-
"tool_start",
|
|
123
|
-
"tool_done",
|
|
124
|
-
"output_start",
|
|
125
|
-
"delta",
|
|
126
|
-
"done",
|
|
127
|
-
]
|
|
128
|
-
assert events[1]["data"] == {"index": 1}
|
|
129
|
-
assert events[2]["data"]["tool"]["status"] == "running"
|
|
130
|
-
assert events[3]["data"]["status"] == "success"
|
|
131
|
-
assert events[4]["data"] == {"index": 2}
|
|
132
|
-
assert events[5]["data"] == {"content": "Read the notes."}
|
|
133
|
-
assert events[6]["data"]["message"]["content"] == "Read the notes."
|
|
134
|
-
assert len(captured_requests) == 2
|
|
135
|
-
assert captured_requests[0]["messages"][0] == {
|
|
136
|
-
"role": "system",
|
|
137
|
-
"content": FLOWENT_AGENT_SYSTEM_PROMPT,
|
|
138
|
-
}
|
|
139
|
-
second_messages = captured_requests[1]["messages"]
|
|
140
|
-
assert second_messages[0] == {
|
|
141
|
-
"role": "system",
|
|
142
|
-
"content": FLOWENT_AGENT_SYSTEM_PROMPT,
|
|
143
|
-
}
|
|
144
|
-
assert second_messages[-2]["tool_calls"][0]["function"]["name"] == "read_file"
|
|
145
|
-
assert second_messages[-1] == {
|
|
146
|
-
"role": "tool",
|
|
147
|
-
"tool_call_id": "call-1",
|
|
148
|
-
"content": "Launch notes",
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def test_tools_can_read_paths_outside_workdir(tmp_path) -> None:
|
|
153
|
-
outside = tmp_path / "outside.txt"
|
|
154
|
-
outside.write_text("outside content")
|
|
155
|
-
|
|
156
|
-
result = run_tool(
|
|
157
|
-
"read_file", {"path": str(outside)}, ToolContext(cwd=tmp_path / "work")
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
assert result.ok
|
|
161
|
-
assert result.content == "outside content"
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def test_list_dir_can_list_paths_outside_workdir(tmp_path) -> None:
|
|
165
|
-
outside = tmp_path / "outside"
|
|
166
|
-
outside.mkdir()
|
|
167
|
-
(outside / "file.txt").write_text("content")
|
|
168
|
-
|
|
169
|
-
result = run_tool(
|
|
170
|
-
"list_dir", {"path": str(outside)}, ToolContext(cwd=tmp_path / "work")
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
assert result.ok
|
|
174
|
-
assert "file.txt" in result.content
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def test_grep_files_can_search_paths_outside_workdir(tmp_path) -> None:
|
|
178
|
-
outside = tmp_path / "outside"
|
|
179
|
-
outside.mkdir()
|
|
180
|
-
(outside / "file.txt").write_text("alpha beta")
|
|
181
|
-
|
|
182
|
-
result = run_tool(
|
|
183
|
-
"grep_files",
|
|
184
|
-
{"pattern": "alpha", "path": str(outside)},
|
|
185
|
-
ToolContext(cwd=tmp_path / "work"),
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
assert result.ok
|
|
189
|
-
assert "file.txt" in result.content
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
def test_shell_command_can_write_workdir_and_tmp(tmp_path) -> None:
|
|
193
|
-
result = run_tool(
|
|
194
|
-
"shell_command",
|
|
195
|
-
{"command": "echo ok > work.txt && echo tmp > /tmp/flowent-tool-test.txt"},
|
|
196
|
-
ToolContext(cwd=tmp_path),
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
assert result.ok
|
|
200
|
-
assert (tmp_path / "work.txt").read_text().strip() == "ok"
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
def test_shell_command_cannot_write_outside_workdir_and_tmp(tmp_path) -> None:
|
|
204
|
-
outside = Path("/project/flowent/backend/tests/flowent-outside-denied.txt")
|
|
205
|
-
if outside.exists():
|
|
206
|
-
outside.unlink()
|
|
207
|
-
|
|
208
|
-
result = run_tool(
|
|
209
|
-
"shell_command",
|
|
210
|
-
{"command": f"echo denied > {outside}"},
|
|
211
|
-
ToolContext(cwd=tmp_path),
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
assert not result.ok
|
|
215
|
-
assert not outside.exists()
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def test_shell_command_has_network_by_default(tmp_path) -> None:
|
|
219
|
-
result = run_tool(
|
|
220
|
-
"shell_command",
|
|
221
|
-
{
|
|
222
|
-
"command": "python - <<'PY'\nimport socket\ns=socket.socket()\nprint('network-ready')\nPY"
|
|
223
|
-
},
|
|
224
|
-
ToolContext(cwd=tmp_path),
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
assert result.ok
|
|
228
|
-
assert "network-ready" in result.content
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
def test_sandbox_command_keeps_proc_mount_when_preflight_succeeds(
|
|
232
|
-
tmp_path, monkeypatch
|
|
233
|
-
) -> None:
|
|
234
|
-
runner = SandboxRunner(cwd=tmp_path)
|
|
235
|
-
monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: True)
|
|
236
|
-
|
|
237
|
-
command = runner.build_command(["/bin/true"])
|
|
238
|
-
|
|
239
|
-
assert command.args[command.args.index("--proc") + 1] == "/proc"
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def test_sandbox_command_omits_proc_mount_when_preflight_reports_permission_error(
|
|
243
|
-
tmp_path, monkeypatch
|
|
244
|
-
) -> None:
|
|
245
|
-
runner = SandboxRunner(cwd=tmp_path)
|
|
246
|
-
monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
|
|
247
|
-
|
|
248
|
-
command = runner.build_command(["/bin/true"])
|
|
249
|
-
|
|
250
|
-
assert "--proc" not in command.args
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
def test_sandbox_command_binds_writable_socket_path(tmp_path, monkeypatch) -> None:
|
|
254
|
-
socket_path = tmp_path / "docker.sock"
|
|
255
|
-
socket_path.touch()
|
|
256
|
-
runner = SandboxRunner(cwd=tmp_path, writable_roots=[socket_path])
|
|
257
|
-
monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
|
|
258
|
-
|
|
259
|
-
command = runner.build_command(["/bin/true"])
|
|
260
|
-
|
|
261
|
-
bind_index = command.args.index(str(socket_path))
|
|
262
|
-
assert command.args[bind_index - 1] == "--bind"
|
|
263
|
-
assert command.args[bind_index + 1] == str(socket_path)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
def test_sandbox_proc_preflight_does_not_hide_non_proc_errors(
|
|
267
|
-
tmp_path, monkeypatch
|
|
268
|
-
) -> None:
|
|
269
|
-
bwrap = tmp_path / "bwrap"
|
|
270
|
-
bwrap.write_text("#!/bin/sh\necho 'bwrap: unrelated startup failure' >&2\nexit 1\n")
|
|
271
|
-
bwrap.chmod(0o700)
|
|
272
|
-
monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
|
|
273
|
-
|
|
274
|
-
assert SandboxRunner(cwd=tmp_path).build_command(["/bin/true"]).args[0:7] == [
|
|
275
|
-
str(bwrap),
|
|
276
|
-
"--ro-bind",
|
|
277
|
-
"/",
|
|
278
|
-
"/",
|
|
279
|
-
"--dev",
|
|
280
|
-
"/dev",
|
|
281
|
-
"--proc",
|
|
282
|
-
]
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
def test_shell_command_runs_without_proc_mount_after_preflight_fallback(
|
|
286
|
-
tmp_path, monkeypatch
|
|
287
|
-
) -> None:
|
|
288
|
-
bwrap = tmp_path / "bwrap"
|
|
289
|
-
bwrap.write_text(
|
|
290
|
-
"#!/bin/sh\n"
|
|
291
|
-
'for arg in "$@"; do\n'
|
|
292
|
-
' if [ "$arg" = --proc ]; then\n'
|
|
293
|
-
' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
|
|
294
|
-
" exit 1\n"
|
|
295
|
-
" fi\n"
|
|
296
|
-
"done\n"
|
|
297
|
-
'while [ "$#" -gt 0 ]; do\n'
|
|
298
|
-
' if [ "$1" = -- ]; then\n'
|
|
299
|
-
" shift\n"
|
|
300
|
-
' exec "$@"\n'
|
|
301
|
-
" fi\n"
|
|
302
|
-
" shift\n"
|
|
303
|
-
"done\n"
|
|
304
|
-
)
|
|
305
|
-
bwrap.chmod(0o700)
|
|
306
|
-
monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
|
|
307
|
-
|
|
308
|
-
result = SandboxRunner(cwd=tmp_path).run(["/bin/sh", "-c", "printf ok"])
|
|
309
|
-
|
|
310
|
-
assert result.exit_code == 0
|
|
311
|
-
assert result.stdout == "ok"
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
def test_apply_patch_runs_without_proc_mount_after_preflight_fallback(
|
|
315
|
-
tmp_path, monkeypatch
|
|
316
|
-
) -> None:
|
|
317
|
-
bwrap = tmp_path / "bwrap"
|
|
318
|
-
bwrap.write_text(
|
|
319
|
-
"#!/bin/sh\n"
|
|
320
|
-
'for arg in "$@"; do\n'
|
|
321
|
-
' if [ "$arg" = --proc ]; then\n'
|
|
322
|
-
' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
|
|
323
|
-
" exit 1\n"
|
|
324
|
-
" fi\n"
|
|
325
|
-
"done\n"
|
|
326
|
-
'while [ "$#" -gt 0 ]; do\n'
|
|
327
|
-
' if [ "$1" = -- ]; then\n'
|
|
328
|
-
" shift\n"
|
|
329
|
-
' exec "$@"\n'
|
|
330
|
-
" fi\n"
|
|
331
|
-
" shift\n"
|
|
332
|
-
"done\n"
|
|
333
|
-
)
|
|
334
|
-
bwrap.chmod(0o700)
|
|
335
|
-
monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
|
|
336
|
-
target = tmp_path / "notes.txt"
|
|
337
|
-
target.write_text("alpha\n")
|
|
338
|
-
patch = """*** Begin Patch
|
|
339
|
-
*** Update File: notes.txt
|
|
340
|
-
@@
|
|
341
|
-
-alpha
|
|
342
|
-
+beta
|
|
343
|
-
*** End Patch
|
|
344
|
-
"""
|
|
345
|
-
|
|
346
|
-
result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
|
|
347
|
-
|
|
348
|
-
assert result.ok
|
|
349
|
-
assert target.read_text() == "beta\n"
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def test_shell_command_environment_omits_development_variables(
|
|
353
|
-
tmp_path, monkeypatch
|
|
354
|
-
) -> None:
|
|
355
|
-
monkeypatch.setenv("NODE_ENV", "production")
|
|
356
|
-
monkeypatch.setenv("VIRTUAL_ENV", "/tmp/flowent-venv")
|
|
357
|
-
monkeypatch.setenv("PYTHONPATH", "/tmp/flowent-pythonpath")
|
|
358
|
-
runner = SandboxRunner(cwd=tmp_path)
|
|
359
|
-
monkeypatch.setattr(
|
|
360
|
-
runner,
|
|
361
|
-
"build_command",
|
|
362
|
-
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
363
|
-
)
|
|
364
|
-
|
|
365
|
-
result = runner.run(
|
|
366
|
-
[
|
|
367
|
-
"/bin/sh",
|
|
368
|
-
"-c",
|
|
369
|
-
'printf \'%s|%s|%s\' "${NODE_ENV-unset}" "${VIRTUAL_ENV-unset}" "${PYTHONPATH-unset}"',
|
|
370
|
-
]
|
|
371
|
-
)
|
|
372
|
-
|
|
373
|
-
assert result.exit_code == 0
|
|
374
|
-
assert result.stdout == "unset|unset|unset"
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
def test_shell_command_environment_omits_sensitive_variables(
|
|
378
|
-
tmp_path, monkeypatch
|
|
379
|
-
) -> None:
|
|
380
|
-
monkeypatch.setenv("OPENAI_API_KEY", "sk-local")
|
|
381
|
-
monkeypatch.setenv("SECRET_TOKEN", "secret")
|
|
382
|
-
monkeypatch.setenv("NPM_TOKEN", "npm")
|
|
383
|
-
runner = SandboxRunner(cwd=tmp_path)
|
|
384
|
-
monkeypatch.setattr(
|
|
385
|
-
runner,
|
|
386
|
-
"build_command",
|
|
387
|
-
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
result = runner.run(
|
|
391
|
-
[
|
|
392
|
-
"/bin/sh",
|
|
393
|
-
"-c",
|
|
394
|
-
'printf \'%s|%s|%s\' "${OPENAI_API_KEY-unset}" "${SECRET_TOKEN-unset}" "${NPM_TOKEN-unset}"',
|
|
395
|
-
]
|
|
396
|
-
)
|
|
397
|
-
|
|
398
|
-
assert result.exit_code == 0
|
|
399
|
-
assert result.stdout == "unset|unset|unset"
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
def test_shell_command_environment_keeps_core_variables(tmp_path, monkeypatch) -> None:
|
|
403
|
-
monkeypatch.setenv("HOME", str(tmp_path / "home"))
|
|
404
|
-
monkeypatch.setenv("PATH", "/usr/local/bin:/usr/bin:/bin")
|
|
405
|
-
monkeypatch.setenv("SHELL", "/bin/sh")
|
|
406
|
-
monkeypatch.setenv("USER", "flowent")
|
|
407
|
-
runner = SandboxRunner(cwd=tmp_path)
|
|
408
|
-
monkeypatch.setattr(
|
|
409
|
-
runner,
|
|
410
|
-
"build_command",
|
|
411
|
-
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
412
|
-
)
|
|
413
|
-
|
|
414
|
-
result = runner.run(
|
|
415
|
-
[
|
|
416
|
-
"/bin/sh",
|
|
417
|
-
"-c",
|
|
418
|
-
'printf \'%s|%s|%s|%s\' "$HOME" "$PATH" "$SHELL" "$USER"',
|
|
419
|
-
]
|
|
420
|
-
)
|
|
421
|
-
|
|
422
|
-
assert result.exit_code == 0
|
|
423
|
-
assert (
|
|
424
|
-
result.stdout
|
|
425
|
-
== f"{tmp_path / 'home'}|/usr/local/bin:/usr/bin:/bin|/bin/sh|flowent"
|
|
426
|
-
)
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
def test_shell_command_environment_uses_default_path_when_missing(
|
|
430
|
-
tmp_path, monkeypatch
|
|
431
|
-
) -> None:
|
|
432
|
-
monkeypatch.delenv("PATH", raising=False)
|
|
433
|
-
runner = SandboxRunner(cwd=tmp_path)
|
|
434
|
-
captured_env: dict[str, str] = {}
|
|
435
|
-
|
|
436
|
-
def fake_run(*args, **kwargs):
|
|
437
|
-
captured_env.update(kwargs["env"])
|
|
438
|
-
return subprocess.CompletedProcess(
|
|
439
|
-
args=args[0], returncode=0, stdout="", stderr=""
|
|
440
|
-
)
|
|
441
|
-
|
|
442
|
-
monkeypatch.setattr(
|
|
443
|
-
runner,
|
|
444
|
-
"build_command",
|
|
445
|
-
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
446
|
-
)
|
|
447
|
-
monkeypatch.setattr("subprocess.run", fake_run)
|
|
448
|
-
|
|
449
|
-
result = runner.run(["/bin/sh", "-c", "true"])
|
|
450
|
-
|
|
451
|
-
assert result.exit_code == 0
|
|
452
|
-
assert (
|
|
453
|
-
captured_env["PATH"]
|
|
454
|
-
== "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
|
455
|
-
)
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
def test_shell_command_environment_accepts_explicit_overrides(
|
|
459
|
-
tmp_path, monkeypatch
|
|
460
|
-
) -> None:
|
|
461
|
-
monkeypatch.delenv("FLOWENT_TOOL_VAR", raising=False)
|
|
462
|
-
runner = SandboxRunner(cwd=tmp_path)
|
|
463
|
-
monkeypatch.setattr(
|
|
464
|
-
runner,
|
|
465
|
-
"build_command",
|
|
466
|
-
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
467
|
-
)
|
|
468
|
-
|
|
469
|
-
result = runner.run(
|
|
470
|
-
["/bin/sh", "-c", "printf '%s' \"$FLOWENT_TOOL_VAR\""],
|
|
471
|
-
env={"FLOWENT_TOOL_VAR": "explicit"},
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
assert result.exit_code == 0
|
|
475
|
-
assert result.stdout == "explicit"
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
@pytest.mark.anyio
|
|
479
|
-
async def test_async_shell_command_does_not_block_other_tasks(
|
|
480
|
-
tmp_path, monkeypatch
|
|
481
|
-
) -> None:
|
|
482
|
-
runner = SandboxRunner(cwd=tmp_path)
|
|
483
|
-
command = [
|
|
484
|
-
"/bin/sh",
|
|
485
|
-
"-c",
|
|
486
|
-
"python - <<'PY'\nimport time\ntime.sleep(0.2)\nprint('done')\nPY",
|
|
487
|
-
]
|
|
488
|
-
monkeypatch.setattr(
|
|
489
|
-
runner,
|
|
490
|
-
"build_command",
|
|
491
|
-
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
492
|
-
)
|
|
493
|
-
command_task = asyncio.create_task(runner.run_async(command, timeout_seconds=1))
|
|
494
|
-
start = time.perf_counter()
|
|
495
|
-
await asyncio.sleep(0.01)
|
|
496
|
-
elapsed = time.perf_counter() - start
|
|
497
|
-
result = await command_task
|
|
498
|
-
|
|
499
|
-
assert elapsed < 0.1
|
|
500
|
-
assert result.exit_code == 0
|
|
501
|
-
assert "done" in result.stdout
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
@pytest.mark.anyio
|
|
505
|
-
async def test_async_shell_command_timeout_returns_failed_result(
|
|
506
|
-
tmp_path, monkeypatch
|
|
507
|
-
) -> None:
|
|
508
|
-
runner = SandboxRunner(cwd=tmp_path)
|
|
509
|
-
command = [
|
|
510
|
-
"/bin/sh",
|
|
511
|
-
"-c",
|
|
512
|
-
"python - <<'PY'\nimport time\ntime.sleep(1)\nprint('late')\nPY",
|
|
513
|
-
]
|
|
514
|
-
monkeypatch.setattr(
|
|
515
|
-
runner,
|
|
516
|
-
"build_command",
|
|
517
|
-
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
518
|
-
)
|
|
519
|
-
result = await runner.run_async(
|
|
520
|
-
command,
|
|
521
|
-
timeout_seconds=0.05,
|
|
522
|
-
)
|
|
523
|
-
|
|
524
|
-
assert result.exit_code == 124
|
|
525
|
-
assert "late" not in result.stdout
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
@pytest.mark.anyio
|
|
529
|
-
async def test_agent_stream_stops_after_cancelled_tool(tmp_path) -> None:
|
|
530
|
-
cancelled = False
|
|
531
|
-
|
|
532
|
-
async def fake_completion(**request: object) -> object:
|
|
533
|
-
async def chunks() -> object:
|
|
534
|
-
yield tool_call_chunk("shell_command", {"command": "slow"})
|
|
535
|
-
|
|
536
|
-
return chunks()
|
|
537
|
-
|
|
538
|
-
async def fake_runner(
|
|
539
|
-
name: str, arguments: dict[str, object], context: ToolContext
|
|
540
|
-
):
|
|
541
|
-
nonlocal cancelled
|
|
542
|
-
try:
|
|
543
|
-
await asyncio.sleep(10)
|
|
544
|
-
except asyncio.CancelledError:
|
|
545
|
-
cancelled = True
|
|
546
|
-
raise
|
|
547
|
-
|
|
548
|
-
stream = run_agent_stream(
|
|
549
|
-
completion=fake_completion,
|
|
550
|
-
connection=ProviderConnection(
|
|
551
|
-
base_url=None,
|
|
552
|
-
model="gpt-5.1",
|
|
553
|
-
name="OpenAI",
|
|
554
|
-
provider=ProviderFormat.OPENAI,
|
|
555
|
-
secret_reference="sk-local",
|
|
556
|
-
),
|
|
557
|
-
cwd=tmp_path,
|
|
558
|
-
messages=[{"role": "user", "content": "Run it."}],
|
|
559
|
-
tool_runner=fake_runner,
|
|
560
|
-
)
|
|
561
|
-
|
|
562
|
-
await stream.__anext__()
|
|
563
|
-
await stream.__anext__()
|
|
564
|
-
await stream.__anext__()
|
|
565
|
-
next_event = asyncio.create_task(stream.__anext__())
|
|
566
|
-
await asyncio.sleep(0)
|
|
567
|
-
next_event.cancel()
|
|
568
|
-
with pytest.raises(asyncio.CancelledError):
|
|
569
|
-
await next_event
|
|
570
|
-
await stream.aclose()
|
|
571
|
-
|
|
572
|
-
assert cancelled
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
def test_shell_command_denies_ptrace_when_seccomp_is_available(tmp_path) -> None:
|
|
576
|
-
command = SandboxRunner(cwd=tmp_path).build_command(["/bin/true"])
|
|
577
|
-
if not command.seccomp_available:
|
|
578
|
-
assert command.args[0].endswith("bwrap")
|
|
579
|
-
return
|
|
580
|
-
|
|
581
|
-
result = run_tool(
|
|
582
|
-
"shell_command",
|
|
583
|
-
{
|
|
584
|
-
"command": "python - <<'PY'\nimport ctypes, os\nprint(ctypes.CDLL(None).ptrace(0, 0, None, None))\nPY"
|
|
585
|
-
},
|
|
586
|
-
ToolContext(cwd=tmp_path),
|
|
587
|
-
)
|
|
588
|
-
|
|
589
|
-
assert not result.ok or "-1" in result.content
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
def test_apply_patch_modifies_workdir_file(tmp_path) -> None:
|
|
593
|
-
target = tmp_path / "notes.txt"
|
|
594
|
-
target.write_text("alpha\nbeta\n")
|
|
595
|
-
patch = """*** Begin Patch
|
|
596
|
-
*** Update File: notes.txt
|
|
597
|
-
@@
|
|
598
|
-
-beta
|
|
599
|
-
+ready
|
|
600
|
-
*** End Patch
|
|
601
|
-
"""
|
|
602
|
-
|
|
603
|
-
result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
|
|
604
|
-
|
|
605
|
-
assert result.ok
|
|
606
|
-
assert result.title == "Edited notes.txt"
|
|
607
|
-
assert target.read_text() == "alpha\nready\n"
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
def test_apply_patch_added_file_title(tmp_path) -> None:
|
|
611
|
-
patch = """*** Begin Patch
|
|
612
|
-
*** Add File: created.txt
|
|
613
|
-
+hello
|
|
614
|
-
*** End Patch
|
|
615
|
-
"""
|
|
616
|
-
|
|
617
|
-
result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
|
|
618
|
-
|
|
619
|
-
assert result.ok
|
|
620
|
-
assert result.title == "Added created.txt"
|
|
621
|
-
assert (tmp_path / "created.txt").read_text() == "hello\n"
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
def test_apply_patch_deleted_file_title(tmp_path) -> None:
|
|
625
|
-
target = tmp_path / "old.txt"
|
|
626
|
-
target.write_text("remove me\n")
|
|
627
|
-
patch = """*** Begin Patch
|
|
628
|
-
*** Delete File: old.txt
|
|
629
|
-
*** End Patch
|
|
630
|
-
"""
|
|
631
|
-
|
|
632
|
-
result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
|
|
633
|
-
|
|
634
|
-
assert result.ok
|
|
635
|
-
assert result.title == "Deleted old.txt"
|
|
636
|
-
assert not target.exists()
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
def test_apply_patch_multiple_files_title(tmp_path) -> None:
|
|
640
|
-
target = tmp_path / "notes.txt"
|
|
641
|
-
target.write_text("alpha\nbeta\n")
|
|
642
|
-
patch = """*** Begin Patch
|
|
643
|
-
*** Update File: notes.txt
|
|
644
|
-
@@
|
|
645
|
-
-beta
|
|
646
|
-
+ready
|
|
647
|
-
*** Add File: created.txt
|
|
648
|
-
+hello
|
|
649
|
-
*** End Patch
|
|
650
|
-
"""
|
|
651
|
-
|
|
652
|
-
result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
|
|
653
|
-
|
|
654
|
-
assert result.ok
|
|
655
|
-
assert result.title == "Edited 2 files"
|
|
656
|
-
assert target.read_text() == "alpha\nready\n"
|
|
657
|
-
assert (tmp_path / "created.txt").read_text() == "hello\n"
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
def test_apply_patch_rejects_outside_workdir_file(tmp_path) -> None:
|
|
661
|
-
outside = Path(__file__).resolve().parent / "outside-patch.txt"
|
|
662
|
-
outside.write_text("alpha\n")
|
|
663
|
-
try:
|
|
664
|
-
patch = f"""*** Begin Patch
|
|
665
|
-
*** Update File: {outside}
|
|
666
|
-
@@
|
|
667
|
-
-alpha
|
|
668
|
-
+beta
|
|
669
|
-
*** End Patch
|
|
670
|
-
"""
|
|
671
|
-
|
|
672
|
-
result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
|
|
673
|
-
|
|
674
|
-
assert not result.ok
|
|
675
|
-
assert result.title == "Edit failed"
|
|
676
|
-
assert outside.read_text() == "alpha\n"
|
|
677
|
-
finally:
|
|
678
|
-
outside.unlink(missing_ok=True)
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
def test_apply_patch_uses_internal_subcommand(tmp_path, monkeypatch) -> None:
|
|
682
|
-
calls: list[list[str]] = []
|
|
683
|
-
|
|
684
|
-
def fake_run(self, command, **kwargs):
|
|
685
|
-
calls.append(command)
|
|
686
|
-
from flowent.sandbox import CommandResult
|
|
687
|
-
|
|
688
|
-
return CommandResult(
|
|
689
|
-
command=" ".join(command), exit_code=0, stderr="", stdout="{}"
|
|
690
|
-
)
|
|
691
|
-
|
|
692
|
-
monkeypatch.setattr(SandboxRunner, "run", fake_run)
|
|
693
|
-
patch = """*** Begin Patch
|
|
694
|
-
*** Add File: created.txt
|
|
695
|
-
+hello
|
|
696
|
-
*** End Patch
|
|
697
|
-
"""
|
|
698
|
-
|
|
699
|
-
result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
|
|
700
|
-
|
|
701
|
-
assert result.ok
|
|
702
|
-
assert result.title == "Edited files"
|
|
703
|
-
assert calls
|
|
704
|
-
assert calls[0][1:4] == ["-m", "flowent.cli", "apply-patch"]
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
def test_apply_patch_reports_patch_error_when_stderr_has_warning(
|
|
708
|
-
tmp_path, monkeypatch
|
|
709
|
-
) -> None:
|
|
710
|
-
def fake_run(self, command, **kwargs):
|
|
711
|
-
from flowent.sandbox import CommandResult
|
|
712
|
-
|
|
713
|
-
return CommandResult(
|
|
714
|
-
command=" ".join(command),
|
|
715
|
-
exit_code=1,
|
|
716
|
-
stderr="RuntimeWarning: flowent.cli was already imported\n",
|
|
717
|
-
stdout='{"error": "Patch context was not found."}\n',
|
|
718
|
-
)
|
|
719
|
-
|
|
720
|
-
monkeypatch.setattr(SandboxRunner, "run", fake_run)
|
|
721
|
-
patch = """*** Begin Patch
|
|
722
|
-
*** Update File: notes.txt
|
|
723
|
-
@@
|
|
724
|
-
-missing
|
|
725
|
-
+ready
|
|
726
|
-
*** End Patch
|
|
727
|
-
"""
|
|
728
|
-
|
|
729
|
-
result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
|
|
730
|
-
|
|
731
|
-
assert not result.ok
|
|
732
|
-
assert result.title == "Edit failed"
|
|
733
|
-
assert result.content == "Patch context was not found."
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
def test_web_search_result_enters_tool_output(tmp_path) -> None:
|
|
737
|
-
def fake_search(query: str):
|
|
738
|
-
return [{"title": "Result", "url": "https://example.test", "snippet": query}]
|
|
739
|
-
|
|
740
|
-
result = run_tool(
|
|
741
|
-
"web_search",
|
|
742
|
-
{"query": "release checklist"},
|
|
743
|
-
ToolContext(cwd=tmp_path, web_searcher=fake_search),
|
|
744
|
-
)
|
|
745
|
-
|
|
746
|
-
assert result.ok
|
|
747
|
-
assert "https://example.test" in result.content
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
def test_agent_continues_until_final_text_after_multiple_tool_rounds(
|
|
751
|
-
tmp_path, monkeypatch
|
|
752
|
-
) -> None:
|
|
753
|
-
monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
|
|
754
|
-
workdir = tmp_path / "workdir"
|
|
755
|
-
workdir.mkdir()
|
|
756
|
-
(workdir / "notes.txt").write_text("Launch notes")
|
|
757
|
-
monkeypatch.chdir(workdir)
|
|
758
|
-
captured_requests: list[dict[str, object]] = []
|
|
759
|
-
|
|
760
|
-
async def fake_completion(**request: object) -> object:
|
|
761
|
-
captured_requests.append(request)
|
|
762
|
-
|
|
763
|
-
async def chunks() -> object:
|
|
764
|
-
if len(captured_requests) == 1:
|
|
765
|
-
yield tool_call_chunk("list_dir", {"path": "."}, call_id="call-list")
|
|
766
|
-
elif len(captured_requests) == 2:
|
|
767
|
-
yield tool_call_chunk(
|
|
768
|
-
"read_file", {"path": "notes.txt"}, call_id="call-read"
|
|
769
|
-
)
|
|
770
|
-
else:
|
|
771
|
-
yield text_chunk("The notes are ready.")
|
|
772
|
-
|
|
773
|
-
return chunks()
|
|
774
|
-
|
|
775
|
-
client = TestClient(
|
|
776
|
-
create_app(serve_frontend=False, chat_completion=fake_completion)
|
|
777
|
-
)
|
|
778
|
-
configure_provider(client)
|
|
779
|
-
|
|
780
|
-
response = client.post(
|
|
781
|
-
"/api/workspace/respond",
|
|
782
|
-
json={"content": "Inspect the workspace."},
|
|
783
|
-
)
|
|
784
|
-
|
|
785
|
-
assert response.status_code == 200
|
|
786
|
-
events = stream_events(response.text)
|
|
787
|
-
assert [event["event"] for event in events] == [
|
|
788
|
-
"start",
|
|
789
|
-
"output_start",
|
|
790
|
-
"tool_start",
|
|
791
|
-
"tool_done",
|
|
792
|
-
"output_start",
|
|
793
|
-
"tool_start",
|
|
794
|
-
"tool_done",
|
|
795
|
-
"output_start",
|
|
796
|
-
"delta",
|
|
797
|
-
"done",
|
|
798
|
-
]
|
|
799
|
-
assert len(captured_requests) == 3
|
|
800
|
-
assert captured_requests[2]["messages"][-1] == {
|
|
801
|
-
"role": "tool",
|
|
802
|
-
"tool_call_id": "call-read",
|
|
803
|
-
"content": "Launch notes",
|
|
804
|
-
}
|
|
805
|
-
assert events[-1]["data"]["message"]["content"] == "The notes are ready."
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
@pytest.mark.anyio
|
|
809
|
-
async def test_agent_logs_model_call_decisions_after_tool_rounds(
|
|
810
|
-
tmp_path, caplog
|
|
811
|
-
) -> None:
|
|
812
|
-
(tmp_path / "notes.txt").write_text("Launch notes")
|
|
813
|
-
captured_requests: list[dict[str, object]] = []
|
|
814
|
-
caplog.set_level(logging.INFO, logger="flowent.agent")
|
|
815
|
-
|
|
816
|
-
async def fake_completion(**request: object) -> object:
|
|
817
|
-
captured_requests.append(request)
|
|
818
|
-
|
|
819
|
-
async def chunks() -> object:
|
|
820
|
-
if len(captured_requests) == 1:
|
|
821
|
-
yield tool_call_chunk("read_file", {"path": "notes.txt"})
|
|
822
|
-
else:
|
|
823
|
-
yield text_chunk("The notes are ready.")
|
|
824
|
-
|
|
825
|
-
return chunks()
|
|
826
|
-
|
|
827
|
-
events = [
|
|
828
|
-
event
|
|
829
|
-
async for event in run_agent_stream(
|
|
830
|
-
completion=fake_completion,
|
|
831
|
-
connection=ProviderConnection(
|
|
832
|
-
model="gpt-5.1",
|
|
833
|
-
name="Provider",
|
|
834
|
-
provider=ProviderFormat.OPENAI,
|
|
835
|
-
secret_reference="secret",
|
|
836
|
-
),
|
|
837
|
-
cwd=tmp_path,
|
|
838
|
-
messages=[{"role": "user", "content": "Inspect notes."}],
|
|
839
|
-
)
|
|
840
|
-
]
|
|
841
|
-
rendered_logs = "\n".join(record.getMessage() for record in caplog.records)
|
|
842
|
-
|
|
843
|
-
assert events[-1].data["message"]["content"] == "The notes are ready."
|
|
844
|
-
assert "Agent model call started" in rendered_logs
|
|
845
|
-
assert "round=1" in rendered_logs
|
|
846
|
-
assert "round=2" in rendered_logs
|
|
847
|
-
assert "decision=run_tools" in rendered_logs
|
|
848
|
-
assert "decision=final_response" in rendered_logs
|
|
849
|
-
assert "Agent continuing after tools" in rendered_logs
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
@pytest.mark.anyio
|
|
853
|
-
async def test_agent_logs_model_call_failure_after_tool_result(
|
|
854
|
-
tmp_path, caplog
|
|
855
|
-
) -> None:
|
|
856
|
-
(tmp_path / "notes.txt").write_text("Launch notes")
|
|
857
|
-
captured_requests: list[dict[str, object]] = []
|
|
858
|
-
caplog.set_level(logging.INFO, logger="flowent.agent")
|
|
859
|
-
|
|
860
|
-
async def fake_completion(**request: object) -> object:
|
|
861
|
-
captured_requests.append(request)
|
|
862
|
-
|
|
863
|
-
async def chunks() -> object:
|
|
864
|
-
if len(captured_requests) == 1:
|
|
865
|
-
yield tool_call_chunk("read_file", {"path": "notes.txt"})
|
|
866
|
-
return
|
|
867
|
-
raise RuntimeError("stream request failed")
|
|
868
|
-
|
|
869
|
-
return chunks()
|
|
870
|
-
|
|
871
|
-
with pytest.raises(RuntimeError, match="stream request failed"):
|
|
872
|
-
[
|
|
873
|
-
event
|
|
874
|
-
async for event in run_agent_stream(
|
|
875
|
-
completion=fake_completion,
|
|
876
|
-
connection=ProviderConnection(
|
|
877
|
-
model="gpt-5.1",
|
|
878
|
-
name="Provider",
|
|
879
|
-
provider=ProviderFormat.OPENAI,
|
|
880
|
-
secret_reference="secret",
|
|
881
|
-
),
|
|
882
|
-
cwd=tmp_path,
|
|
883
|
-
messages=[{"role": "user", "content": "Inspect notes."}],
|
|
884
|
-
)
|
|
885
|
-
]
|
|
886
|
-
rendered_logs = "\n".join(record.getMessage() for record in caplog.records)
|
|
887
|
-
|
|
888
|
-
assert len(captured_requests) == 2
|
|
889
|
-
assert "Agent model call failed" in rendered_logs
|
|
890
|
-
assert "round=2" in rendered_logs
|
|
891
|
-
assert "chunk_count=0" in rendered_logs
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
@pytest.mark.anyio
|
|
895
|
-
async def test_agent_does_not_log_final_response_when_responses_stream_fails(
|
|
896
|
-
tmp_path, caplog, fake_litellm_responses_transformer
|
|
897
|
-
) -> None:
|
|
898
|
-
caplog.set_level(logging.INFO, logger="flowent.agent")
|
|
899
|
-
|
|
900
|
-
async def fake_completion(**request: object) -> object:
|
|
901
|
-
async def chunks() -> object:
|
|
902
|
-
from litellm.completion_extras.litellm_responses_transformation.transformation import (
|
|
903
|
-
OpenAiResponsesToChatCompletionStreamIterator,
|
|
904
|
-
)
|
|
905
|
-
|
|
906
|
-
yield text_chunk("Partial answer.")
|
|
907
|
-
yield OpenAiResponsesToChatCompletionStreamIterator.translate_responses_chunk_to_openai_stream(
|
|
908
|
-
{
|
|
909
|
-
"response": {
|
|
910
|
-
"error": {
|
|
911
|
-
"code": "upstream_error",
|
|
912
|
-
"message": "Upstream request failed",
|
|
913
|
-
},
|
|
914
|
-
"status": "failed",
|
|
915
|
-
},
|
|
916
|
-
"type": "response.failed",
|
|
917
|
-
}
|
|
918
|
-
)
|
|
919
|
-
|
|
920
|
-
return chunks()
|
|
921
|
-
|
|
922
|
-
with pytest.raises(RuntimeError, match="Upstream request failed"):
|
|
923
|
-
[
|
|
924
|
-
event
|
|
925
|
-
async for event in run_agent_stream(
|
|
926
|
-
completion=fake_completion,
|
|
927
|
-
connection=ProviderConnection(
|
|
928
|
-
model="gpt-5.1",
|
|
929
|
-
name="Provider",
|
|
930
|
-
provider=ProviderFormat.OPENAI,
|
|
931
|
-
secret_reference="secret",
|
|
932
|
-
),
|
|
933
|
-
cwd=tmp_path,
|
|
934
|
-
messages=[{"role": "user", "content": "Inspect notes."}],
|
|
935
|
-
)
|
|
936
|
-
]
|
|
937
|
-
rendered_logs = "\n".join(record.getMessage() for record in caplog.records)
|
|
938
|
-
|
|
939
|
-
assert "Agent model call failed" in rendered_logs
|
|
940
|
-
assert "decision=final_response" not in rendered_logs
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
def test_agent_finishes_without_tools(tmp_path, monkeypatch) -> None:
|
|
944
|
-
monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
|
|
945
|
-
monkeypatch.chdir(tmp_path)
|
|
946
|
-
captured_requests: list[dict[str, object]] = []
|
|
947
|
-
|
|
948
|
-
async def fake_completion(**request: object) -> object:
|
|
949
|
-
captured_requests.append(request)
|
|
950
|
-
|
|
951
|
-
async def chunks() -> object:
|
|
952
|
-
yield text_chunk("Direct answer.")
|
|
953
|
-
|
|
954
|
-
return chunks()
|
|
955
|
-
|
|
956
|
-
client = TestClient(
|
|
957
|
-
create_app(serve_frontend=False, chat_completion=fake_completion)
|
|
958
|
-
)
|
|
959
|
-
configure_provider(client)
|
|
960
|
-
|
|
961
|
-
response = client.post(
|
|
962
|
-
"/api/workspace/respond",
|
|
963
|
-
json={"content": "Answer directly."},
|
|
964
|
-
)
|
|
965
|
-
|
|
966
|
-
assert response.status_code == 200
|
|
967
|
-
events = stream_events(response.text)
|
|
968
|
-
assert [event["event"] for event in events] == [
|
|
969
|
-
"start",
|
|
970
|
-
"output_start",
|
|
971
|
-
"delta",
|
|
972
|
-
"done",
|
|
973
|
-
]
|
|
974
|
-
assert len(captured_requests) == 1
|
|
975
|
-
assert events[-1]["data"]["message"]["content"] == "Direct answer."
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
def test_agent_streams_and_persists_thinking(tmp_path, monkeypatch) -> None:
|
|
979
|
-
monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
|
|
980
|
-
monkeypatch.chdir(tmp_path)
|
|
981
|
-
|
|
982
|
-
async def fake_completion(**request: object) -> object:
|
|
983
|
-
async def chunks() -> object:
|
|
984
|
-
yield thinking_chunk("Checking context.")
|
|
985
|
-
yield thinking_chunk(" Preparing answer.")
|
|
986
|
-
yield text_chunk("Direct answer.")
|
|
987
|
-
|
|
988
|
-
return chunks()
|
|
989
|
-
|
|
990
|
-
client = TestClient(
|
|
991
|
-
create_app(serve_frontend=False, chat_completion=fake_completion)
|
|
992
|
-
)
|
|
993
|
-
configure_provider(client)
|
|
994
|
-
|
|
995
|
-
response = client.post(
|
|
996
|
-
"/api/workspace/respond",
|
|
997
|
-
json={"content": "Answer directly."},
|
|
998
|
-
)
|
|
999
|
-
|
|
1000
|
-
assert response.status_code == 200
|
|
1001
|
-
events = stream_events(response.text)
|
|
1002
|
-
assert [event["event"] for event in events] == [
|
|
1003
|
-
"start",
|
|
1004
|
-
"output_start",
|
|
1005
|
-
"thinking_delta",
|
|
1006
|
-
"thinking_delta",
|
|
1007
|
-
"delta",
|
|
1008
|
-
"done",
|
|
1009
|
-
]
|
|
1010
|
-
assert events[2]["data"] == {"content": "Checking context."}
|
|
1011
|
-
assert events[-1]["data"]["message"]["thinking"] == (
|
|
1012
|
-
"Checking context. Preparing answer."
|
|
1013
|
-
)
|
|
1014
|
-
state = client.get("/api/state").json()
|
|
1015
|
-
assert state["messages"][-1]["thinking"] == ("Checking context. Preparing answer.")
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
def test_tool_failure_is_reported_and_agent_continues(tmp_path, monkeypatch) -> None:
|
|
1019
|
-
monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
|
|
1020
|
-
monkeypatch.chdir(tmp_path)
|
|
1021
|
-
captured_requests: list[dict[str, object]] = []
|
|
1022
|
-
|
|
1023
|
-
async def fake_completion(**request: object) -> object:
|
|
1024
|
-
captured_requests.append(request)
|
|
1025
|
-
|
|
1026
|
-
async def chunks() -> object:
|
|
1027
|
-
if len(captured_requests) == 1:
|
|
1028
|
-
yield tool_call_chunk("read_file", {"path": "missing.txt"})
|
|
1029
|
-
else:
|
|
1030
|
-
yield text_chunk("I could not read it.")
|
|
1031
|
-
|
|
1032
|
-
return chunks()
|
|
1033
|
-
|
|
1034
|
-
client = TestClient(
|
|
1035
|
-
create_app(serve_frontend=False, chat_completion=fake_completion)
|
|
1036
|
-
)
|
|
1037
|
-
configure_provider(client)
|
|
1038
|
-
|
|
1039
|
-
response = client.post(
|
|
1040
|
-
"/api/workspace/respond",
|
|
1041
|
-
json={"content": "Read it."},
|
|
1042
|
-
)
|
|
1043
|
-
|
|
1044
|
-
events = stream_events(response.text)
|
|
1045
|
-
assert "tool_error" in [event["event"] for event in events]
|
|
1046
|
-
assert len(captured_requests) == 2
|
|
1047
|
-
assert captured_requests[1]["messages"][-1]["role"] == "tool"
|
|
1048
|
-
assert captured_requests[1]["messages"][-1]["tool_call_id"] == "call-1"
|
|
1049
|
-
assert "missing.txt" in captured_requests[1]["messages"][-1]["content"]
|
|
1050
|
-
assert events[-1]["data"]["message"]["content"] == "I could not read it."
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
@pytest.mark.anyio
|
|
1054
|
-
async def test_approval_denial_result_is_sent_to_agent(tmp_path) -> None:
|
|
1055
|
-
captured_requests: list[dict[str, object]] = []
|
|
1056
|
-
|
|
1057
|
-
async def fake_completion(**request: object) -> object:
|
|
1058
|
-
captured_requests.append(request)
|
|
1059
|
-
|
|
1060
|
-
async def chunks() -> object:
|
|
1061
|
-
if len(captured_requests) == 1:
|
|
1062
|
-
yield tool_call_chunk(
|
|
1063
|
-
"shell_command",
|
|
1064
|
-
{"command": "rm -rf /important"},
|
|
1065
|
-
)
|
|
1066
|
-
else:
|
|
1067
|
-
yield text_chunk("I need explicit approval for that risk.")
|
|
1068
|
-
|
|
1069
|
-
return chunks()
|
|
1070
|
-
|
|
1071
|
-
async def denying_tool_runner(
|
|
1072
|
-
name: str,
|
|
1073
|
-
arguments: dict[str, object],
|
|
1074
|
-
context: ToolContext,
|
|
1075
|
-
) -> ToolResult:
|
|
1076
|
-
return ToolResult(
|
|
1077
|
-
content=(
|
|
1078
|
-
"Automatic approval review denied this action as high risk: "
|
|
1079
|
-
"The command can delete broad data. The agent must not work around "
|
|
1080
|
-
"this denial."
|
|
1081
|
-
),
|
|
1082
|
-
ok=False,
|
|
1083
|
-
title="Denied by reviewer",
|
|
1084
|
-
)
|
|
1085
|
-
|
|
1086
|
-
events = [
|
|
1087
|
-
event
|
|
1088
|
-
async for event in run_agent_stream(
|
|
1089
|
-
completion=fake_completion,
|
|
1090
|
-
connection=ProviderConnection(
|
|
1091
|
-
model="gpt-5.1",
|
|
1092
|
-
name="Provider",
|
|
1093
|
-
provider=ProviderFormat.OPENAI,
|
|
1094
|
-
secret_reference="secret",
|
|
1095
|
-
),
|
|
1096
|
-
cwd=tmp_path,
|
|
1097
|
-
messages=[{"role": "user", "content": "Delete the important directory."}],
|
|
1098
|
-
tool_runner=denying_tool_runner,
|
|
1099
|
-
)
|
|
1100
|
-
]
|
|
1101
|
-
|
|
1102
|
-
assert len(captured_requests) == 2
|
|
1103
|
-
assert captured_requests[1]["messages"][-1]["role"] == "tool"
|
|
1104
|
-
assert "Automatic approval review denied this action" in str(
|
|
1105
|
-
captured_requests[1]["messages"][-1]["content"]
|
|
1106
|
-
)
|
|
1107
|
-
assert "must not work around" in str(
|
|
1108
|
-
captured_requests[1]["messages"][-1]["content"]
|
|
1109
|
-
)
|
|
1110
|
-
assert events[-2].data["content"] == "I need explicit approval for that risk."
|
|
1111
|
-
assert events[-1].data["message"]["content"] == (
|
|
1112
|
-
"I need explicit approval for that risk."
|
|
1113
|
-
)
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
def test_update_plan_outputs_plan_state(tmp_path) -> None:
|
|
1117
|
-
result = run_tool(
|
|
1118
|
-
"update_plan",
|
|
1119
|
-
{"items": [{"step": "Read files", "status": "completed"}]},
|
|
1120
|
-
ToolContext(cwd=tmp_path),
|
|
1121
|
-
)
|
|
1122
|
-
|
|
1123
|
-
assert result.ok
|
|
1124
|
-
assert "Read files" in result.content
|