flowent 0.1.5 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/backend/pyproject.toml +31 -5
  2. package/backend/src/flowent/agent.py +107 -37
  3. package/backend/src/flowent/compact.py +35 -14
  4. package/backend/src/flowent/llm.py +198 -12
  5. package/backend/src/flowent/main.py +260 -59
  6. package/backend/src/flowent/static/assets/index-CRSV2xu1.css +2 -0
  7. package/backend/src/flowent/static/assets/index-DUYj6rgD.js +82 -0
  8. package/backend/src/flowent/static/index.html +2 -2
  9. package/backend/src/flowent/storage.py +135 -3
  10. package/backend/src/flowent/usage.py +315 -0
  11. package/backend/uv.lock +971 -3
  12. package/dist/frontend/assets/index-CRSV2xu1.css +2 -0
  13. package/dist/frontend/assets/index-DUYj6rgD.js +82 -0
  14. package/dist/frontend/index.html +2 -2
  15. package/package.json +24 -3
  16. package/backend/src/flowent/__pycache__/__init__.cpython-313.pyc +0 -0
  17. package/backend/src/flowent/__pycache__/_version.cpython-313.pyc +0 -0
  18. package/backend/src/flowent/__pycache__/agent.cpython-313.pyc +0 -0
  19. package/backend/src/flowent/__pycache__/approval.cpython-313.pyc +0 -0
  20. package/backend/src/flowent/__pycache__/channels.cpython-313.pyc +0 -0
  21. package/backend/src/flowent/__pycache__/cli.cpython-313.pyc +0 -0
  22. package/backend/src/flowent/__pycache__/compact.cpython-313.pyc +0 -0
  23. package/backend/src/flowent/__pycache__/context.cpython-313.pyc +0 -0
  24. package/backend/src/flowent/__pycache__/llm.cpython-313.pyc +0 -0
  25. package/backend/src/flowent/__pycache__/logging.cpython-313.pyc +0 -0
  26. package/backend/src/flowent/__pycache__/main.cpython-313.pyc +0 -0
  27. package/backend/src/flowent/__pycache__/mcp.cpython-313.pyc +0 -0
  28. package/backend/src/flowent/__pycache__/mcp_import.cpython-313.pyc +0 -0
  29. package/backend/src/flowent/__pycache__/patch.cpython-313.pyc +0 -0
  30. package/backend/src/flowent/__pycache__/paths.cpython-313.pyc +0 -0
  31. package/backend/src/flowent/__pycache__/permissions.cpython-313.pyc +0 -0
  32. package/backend/src/flowent/__pycache__/sandbox.cpython-313.pyc +0 -0
  33. package/backend/src/flowent/__pycache__/skills.cpython-313.pyc +0 -0
  34. package/backend/src/flowent/__pycache__/storage.cpython-313.pyc +0 -0
  35. package/backend/src/flowent/__pycache__/tools.cpython-313.pyc +0 -0
  36. package/backend/src/flowent/static/assets/index-Cl20cARb.css +0 -2
  37. package/backend/src/flowent/static/assets/index-dsDDsEym.js +0 -81
  38. package/backend/tests/__pycache__/conftest.cpython-313-pytest-9.0.3.pyc +0 -0
  39. package/backend/tests/__pycache__/test_agent_tools.cpython-313-pytest-9.0.3.pyc +0 -0
  40. package/backend/tests/__pycache__/test_approval.cpython-313-pytest-9.0.3.pyc +0 -0
  41. package/backend/tests/__pycache__/test_channels.cpython-313-pytest-9.0.3.pyc +0 -0
  42. package/backend/tests/__pycache__/test_health.cpython-313-pytest-9.0.3.pyc +0 -0
  43. package/backend/tests/__pycache__/test_llm_providers.cpython-313-pytest-9.0.3.pyc +0 -0
  44. package/backend/tests/__pycache__/test_logging.cpython-313-pytest-9.0.3.pyc +0 -0
  45. package/backend/tests/__pycache__/test_mcp.cpython-313-pytest-9.0.3.pyc +0 -0
  46. package/backend/tests/__pycache__/test_patch.cpython-313-pytest-9.0.3.pyc +0 -0
  47. package/backend/tests/__pycache__/test_permissions.cpython-313-pytest-9.0.3.pyc +0 -0
  48. package/backend/tests/__pycache__/test_persistence.cpython-313-pytest-9.0.3.pyc +0 -0
  49. package/backend/tests/__pycache__/test_skills.cpython-313-pytest-9.0.3.pyc +0 -0
  50. package/backend/tests/__pycache__/test_startup_requirements.cpython-313-pytest-9.0.3.pyc +0 -0
  51. package/backend/tests/__pycache__/test_workspace_chat.cpython-313-pytest-9.0.3.pyc +0 -0
  52. package/backend/tests/conftest.py +0 -21
  53. package/backend/tests/test_agent_tools.py +0 -988
  54. package/backend/tests/test_approval.py +0 -283
  55. package/backend/tests/test_channels.py +0 -360
  56. package/backend/tests/test_health.py +0 -12
  57. package/backend/tests/test_llm_providers.py +0 -387
  58. package/backend/tests/test_logging.py +0 -212
  59. package/backend/tests/test_mcp.py +0 -788
  60. package/backend/tests/test_patch.py +0 -112
  61. package/backend/tests/test_permissions.py +0 -588
  62. package/backend/tests/test_persistence.py +0 -249
  63. package/backend/tests/test_skills.py +0 -462
  64. package/backend/tests/test_startup_requirements.py +0 -144
  65. package/backend/tests/test_workspace_chat.py +0 -2122
  66. package/dist/frontend/assets/index-Cl20cARb.css +0 -2
  67. package/dist/frontend/assets/index-dsDDsEym.js +0 -81
@@ -1,988 +0,0 @@
1
- import asyncio
2
- import json
3
- import subprocess
4
- import time
5
- from pathlib import Path
6
-
7
- import pytest
8
- from fastapi.testclient import TestClient
9
-
10
- from flowent.agent import FLOWENT_AGENT_SYSTEM_PROMPT, run_agent_stream
11
- from flowent.llm import ProviderConnection, ProviderFormat
12
- from flowent.main import create_app
13
- from flowent.sandbox import SandboxCommand, SandboxRunner
14
- from flowent.tools import ToolContext, ToolResult, run_tool
15
-
16
-
17
- def stream_events(content: str) -> list[dict[str, object]]:
18
- events: list[dict[str, object]] = []
19
- for raw_event in content.strip().split("\n\n"):
20
- event_type = ""
21
- data = ""
22
- for line in raw_event.splitlines():
23
- if line.startswith("event: "):
24
- event_type = line.removeprefix("event: ")
25
- if line.startswith("data: "):
26
- data = line.removeprefix("data: ")
27
- events.append({"event": event_type, "data": json.loads(data)})
28
- return events
29
-
30
-
31
- def configure_provider(client: TestClient) -> None:
32
- client.post(
33
- "/api/providers",
34
- json={
35
- "api_key": "sk-local",
36
- "base_url": "",
37
- "id": "provider-openai",
38
- "models": ["gpt-5.1"],
39
- "name": "OpenAI",
40
- "type": "openai",
41
- },
42
- )
43
- client.put(
44
- "/api/settings",
45
- json={
46
- "reasoning_effort": "default",
47
- "selected_model": "gpt-5.1",
48
- "selected_provider_id": "provider-openai",
49
- },
50
- )
51
-
52
-
53
- def tool_call_chunk(
54
- name: str, arguments: dict[str, object], call_id: str = "call-1"
55
- ) -> dict[str, object]:
56
- return {
57
- "choices": [
58
- {
59
- "delta": {
60
- "tool_calls": [
61
- {
62
- "index": 0,
63
- "id": call_id,
64
- "type": "function",
65
- "function": {
66
- "name": name,
67
- "arguments": json.dumps(arguments),
68
- },
69
- }
70
- ]
71
- }
72
- }
73
- ]
74
- }
75
-
76
-
77
- def text_chunk(content: str) -> dict[str, object]:
78
- return {"choices": [{"delta": {"content": content}}]}
79
-
80
-
81
- def thinking_chunk(content: str) -> dict[str, object]:
82
- return {"choices": [{"delta": {"reasoning_content": content}}]}
83
-
84
-
85
- def test_workspace_response_streams_tool_process_and_final_text(
86
- tmp_path, monkeypatch
87
- ) -> None:
88
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
89
- workdir = tmp_path / "workdir"
90
- workdir.mkdir()
91
- (workdir / "notes.txt").write_text("Launch notes")
92
- monkeypatch.chdir(workdir)
93
- captured_requests: list[dict[str, object]] = []
94
-
95
- async def fake_completion(**request: object) -> object:
96
- captured_requests.append(request)
97
-
98
- async def chunks() -> object:
99
- if len(captured_requests) == 1:
100
- yield tool_call_chunk("read_file", {"path": "notes.txt"})
101
- else:
102
- yield text_chunk("Read the notes.")
103
-
104
- return chunks()
105
-
106
- client = TestClient(
107
- create_app(serve_frontend=False, chat_completion=fake_completion)
108
- )
109
- configure_provider(client)
110
-
111
- response = client.post(
112
- "/api/workspace/respond",
113
- json={"content": "Use the notes."},
114
- )
115
-
116
- assert response.status_code == 200
117
- events = stream_events(response.text)
118
- assert [event["event"] for event in events] == [
119
- "start",
120
- "output_start",
121
- "tool_start",
122
- "tool_done",
123
- "output_start",
124
- "delta",
125
- "done",
126
- ]
127
- assert events[1]["data"] == {"index": 1}
128
- assert events[2]["data"]["tool"]["status"] == "running"
129
- assert events[3]["data"]["status"] == "success"
130
- assert events[4]["data"] == {"index": 2}
131
- assert events[5]["data"] == {"content": "Read the notes."}
132
- assert events[6]["data"]["message"]["content"] == "Read the notes."
133
- assert len(captured_requests) == 2
134
- assert captured_requests[0]["messages"][0] == {
135
- "role": "system",
136
- "content": FLOWENT_AGENT_SYSTEM_PROMPT,
137
- }
138
- second_messages = captured_requests[1]["messages"]
139
- assert second_messages[0] == {
140
- "role": "system",
141
- "content": FLOWENT_AGENT_SYSTEM_PROMPT,
142
- }
143
- assert second_messages[-2]["tool_calls"][0]["function"]["name"] == "read_file"
144
- assert second_messages[-1] == {
145
- "role": "tool",
146
- "tool_call_id": "call-1",
147
- "content": "Launch notes",
148
- }
149
-
150
-
151
- def test_tools_can_read_paths_outside_workdir(tmp_path) -> None:
152
- outside = tmp_path / "outside.txt"
153
- outside.write_text("outside content")
154
-
155
- result = run_tool(
156
- "read_file", {"path": str(outside)}, ToolContext(cwd=tmp_path / "work")
157
- )
158
-
159
- assert result.ok
160
- assert result.content == "outside content"
161
-
162
-
163
- def test_list_dir_can_list_paths_outside_workdir(tmp_path) -> None:
164
- outside = tmp_path / "outside"
165
- outside.mkdir()
166
- (outside / "file.txt").write_text("content")
167
-
168
- result = run_tool(
169
- "list_dir", {"path": str(outside)}, ToolContext(cwd=tmp_path / "work")
170
- )
171
-
172
- assert result.ok
173
- assert "file.txt" in result.content
174
-
175
-
176
- def test_grep_files_can_search_paths_outside_workdir(tmp_path) -> None:
177
- outside = tmp_path / "outside"
178
- outside.mkdir()
179
- (outside / "file.txt").write_text("alpha beta")
180
-
181
- result = run_tool(
182
- "grep_files",
183
- {"pattern": "alpha", "path": str(outside)},
184
- ToolContext(cwd=tmp_path / "work"),
185
- )
186
-
187
- assert result.ok
188
- assert "file.txt" in result.content
189
-
190
-
191
- def test_shell_command_can_write_workdir_and_tmp(tmp_path) -> None:
192
- result = run_tool(
193
- "shell_command",
194
- {"command": "echo ok > work.txt && echo tmp > /tmp/flowent-tool-test.txt"},
195
- ToolContext(cwd=tmp_path),
196
- )
197
-
198
- assert result.ok
199
- assert (tmp_path / "work.txt").read_text().strip() == "ok"
200
-
201
-
202
- def test_shell_command_cannot_write_outside_workdir_and_tmp(tmp_path) -> None:
203
- outside = Path("/project/flowent/backend/tests/flowent-outside-denied.txt")
204
- if outside.exists():
205
- outside.unlink()
206
-
207
- result = run_tool(
208
- "shell_command",
209
- {"command": f"echo denied > {outside}"},
210
- ToolContext(cwd=tmp_path),
211
- )
212
-
213
- assert not result.ok
214
- assert not outside.exists()
215
-
216
-
217
- def test_shell_command_has_network_by_default(tmp_path) -> None:
218
- result = run_tool(
219
- "shell_command",
220
- {
221
- "command": "python - <<'PY'\nimport socket\ns=socket.socket()\nprint('network-ready')\nPY"
222
- },
223
- ToolContext(cwd=tmp_path),
224
- )
225
-
226
- assert result.ok
227
- assert "network-ready" in result.content
228
-
229
-
230
- def test_sandbox_command_keeps_proc_mount_when_preflight_succeeds(
231
- tmp_path, monkeypatch
232
- ) -> None:
233
- runner = SandboxRunner(cwd=tmp_path)
234
- monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: True)
235
-
236
- command = runner.build_command(["/bin/true"])
237
-
238
- assert command.args[command.args.index("--proc") + 1] == "/proc"
239
-
240
-
241
- def test_sandbox_command_omits_proc_mount_when_preflight_reports_permission_error(
242
- tmp_path, monkeypatch
243
- ) -> None:
244
- runner = SandboxRunner(cwd=tmp_path)
245
- monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
246
-
247
- command = runner.build_command(["/bin/true"])
248
-
249
- assert "--proc" not in command.args
250
-
251
-
252
- def test_sandbox_command_binds_writable_socket_path(tmp_path, monkeypatch) -> None:
253
- socket_path = tmp_path / "docker.sock"
254
- socket_path.touch()
255
- runner = SandboxRunner(cwd=tmp_path, writable_roots=[socket_path])
256
- monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
257
-
258
- command = runner.build_command(["/bin/true"])
259
-
260
- bind_index = command.args.index(str(socket_path))
261
- assert command.args[bind_index - 1] == "--bind"
262
- assert command.args[bind_index + 1] == str(socket_path)
263
-
264
-
265
- def test_sandbox_proc_preflight_does_not_hide_non_proc_errors(
266
- tmp_path, monkeypatch
267
- ) -> None:
268
- bwrap = tmp_path / "bwrap"
269
- bwrap.write_text("#!/bin/sh\necho 'bwrap: unrelated startup failure' >&2\nexit 1\n")
270
- bwrap.chmod(0o700)
271
- monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
272
-
273
- assert SandboxRunner(cwd=tmp_path).build_command(["/bin/true"]).args[0:7] == [
274
- str(bwrap),
275
- "--ro-bind",
276
- "/",
277
- "/",
278
- "--dev",
279
- "/dev",
280
- "--proc",
281
- ]
282
-
283
-
284
- def test_shell_command_runs_without_proc_mount_after_preflight_fallback(
285
- tmp_path, monkeypatch
286
- ) -> None:
287
- bwrap = tmp_path / "bwrap"
288
- bwrap.write_text(
289
- "#!/bin/sh\n"
290
- 'for arg in "$@"; do\n'
291
- ' if [ "$arg" = --proc ]; then\n'
292
- ' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
293
- " exit 1\n"
294
- " fi\n"
295
- "done\n"
296
- 'while [ "$#" -gt 0 ]; do\n'
297
- ' if [ "$1" = -- ]; then\n'
298
- " shift\n"
299
- ' exec "$@"\n'
300
- " fi\n"
301
- " shift\n"
302
- "done\n"
303
- )
304
- bwrap.chmod(0o700)
305
- monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
306
-
307
- result = SandboxRunner(cwd=tmp_path).run(["/bin/sh", "-c", "printf ok"])
308
-
309
- assert result.exit_code == 0
310
- assert result.stdout == "ok"
311
-
312
-
313
- def test_apply_patch_runs_without_proc_mount_after_preflight_fallback(
314
- tmp_path, monkeypatch
315
- ) -> None:
316
- bwrap = tmp_path / "bwrap"
317
- bwrap.write_text(
318
- "#!/bin/sh\n"
319
- 'for arg in "$@"; do\n'
320
- ' if [ "$arg" = --proc ]; then\n'
321
- ' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
322
- " exit 1\n"
323
- " fi\n"
324
- "done\n"
325
- 'while [ "$#" -gt 0 ]; do\n'
326
- ' if [ "$1" = -- ]; then\n'
327
- " shift\n"
328
- ' exec "$@"\n'
329
- " fi\n"
330
- " shift\n"
331
- "done\n"
332
- )
333
- bwrap.chmod(0o700)
334
- monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
335
- target = tmp_path / "notes.txt"
336
- target.write_text("alpha\n")
337
- patch = """*** Begin Patch
338
- *** Update File: notes.txt
339
- @@
340
- -alpha
341
- +beta
342
- *** End Patch
343
- """
344
-
345
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
346
-
347
- assert result.ok
348
- assert target.read_text() == "beta\n"
349
-
350
-
351
- def test_shell_command_environment_omits_development_variables(
352
- tmp_path, monkeypatch
353
- ) -> None:
354
- monkeypatch.setenv("NODE_ENV", "production")
355
- monkeypatch.setenv("VIRTUAL_ENV", "/tmp/flowent-venv")
356
- monkeypatch.setenv("PYTHONPATH", "/tmp/flowent-pythonpath")
357
- runner = SandboxRunner(cwd=tmp_path)
358
- monkeypatch.setattr(
359
- runner,
360
- "build_command",
361
- lambda command: SandboxCommand(command, seccomp_available=False),
362
- )
363
-
364
- result = runner.run(
365
- [
366
- "/bin/sh",
367
- "-c",
368
- 'printf \'%s|%s|%s\' "${NODE_ENV-unset}" "${VIRTUAL_ENV-unset}" "${PYTHONPATH-unset}"',
369
- ]
370
- )
371
-
372
- assert result.exit_code == 0
373
- assert result.stdout == "unset|unset|unset"
374
-
375
-
376
- def test_shell_command_environment_omits_sensitive_variables(
377
- tmp_path, monkeypatch
378
- ) -> None:
379
- monkeypatch.setenv("OPENAI_API_KEY", "sk-local")
380
- monkeypatch.setenv("SECRET_TOKEN", "secret")
381
- monkeypatch.setenv("NPM_TOKEN", "npm")
382
- runner = SandboxRunner(cwd=tmp_path)
383
- monkeypatch.setattr(
384
- runner,
385
- "build_command",
386
- lambda command: SandboxCommand(command, seccomp_available=False),
387
- )
388
-
389
- result = runner.run(
390
- [
391
- "/bin/sh",
392
- "-c",
393
- 'printf \'%s|%s|%s\' "${OPENAI_API_KEY-unset}" "${SECRET_TOKEN-unset}" "${NPM_TOKEN-unset}"',
394
- ]
395
- )
396
-
397
- assert result.exit_code == 0
398
- assert result.stdout == "unset|unset|unset"
399
-
400
-
401
- def test_shell_command_environment_keeps_core_variables(tmp_path, monkeypatch) -> None:
402
- monkeypatch.setenv("HOME", str(tmp_path / "home"))
403
- monkeypatch.setenv("PATH", "/usr/local/bin:/usr/bin:/bin")
404
- monkeypatch.setenv("SHELL", "/bin/sh")
405
- monkeypatch.setenv("USER", "flowent")
406
- runner = SandboxRunner(cwd=tmp_path)
407
- monkeypatch.setattr(
408
- runner,
409
- "build_command",
410
- lambda command: SandboxCommand(command, seccomp_available=False),
411
- )
412
-
413
- result = runner.run(
414
- [
415
- "/bin/sh",
416
- "-c",
417
- 'printf \'%s|%s|%s|%s\' "$HOME" "$PATH" "$SHELL" "$USER"',
418
- ]
419
- )
420
-
421
- assert result.exit_code == 0
422
- assert (
423
- result.stdout
424
- == f"{tmp_path / 'home'}|/usr/local/bin:/usr/bin:/bin|/bin/sh|flowent"
425
- )
426
-
427
-
428
- def test_shell_command_environment_uses_default_path_when_missing(
429
- tmp_path, monkeypatch
430
- ) -> None:
431
- monkeypatch.delenv("PATH", raising=False)
432
- runner = SandboxRunner(cwd=tmp_path)
433
- captured_env: dict[str, str] = {}
434
-
435
- def fake_run(*args, **kwargs):
436
- captured_env.update(kwargs["env"])
437
- return subprocess.CompletedProcess(
438
- args=args[0], returncode=0, stdout="", stderr=""
439
- )
440
-
441
- monkeypatch.setattr(
442
- runner,
443
- "build_command",
444
- lambda command: SandboxCommand(command, seccomp_available=False),
445
- )
446
- monkeypatch.setattr("subprocess.run", fake_run)
447
-
448
- result = runner.run(["/bin/sh", "-c", "true"])
449
-
450
- assert result.exit_code == 0
451
- assert (
452
- captured_env["PATH"]
453
- == "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
454
- )
455
-
456
-
457
- def test_shell_command_environment_accepts_explicit_overrides(
458
- tmp_path, monkeypatch
459
- ) -> None:
460
- monkeypatch.delenv("FLOWENT_TOOL_VAR", raising=False)
461
- runner = SandboxRunner(cwd=tmp_path)
462
- monkeypatch.setattr(
463
- runner,
464
- "build_command",
465
- lambda command: SandboxCommand(command, seccomp_available=False),
466
- )
467
-
468
- result = runner.run(
469
- ["/bin/sh", "-c", "printf '%s' \"$FLOWENT_TOOL_VAR\""],
470
- env={"FLOWENT_TOOL_VAR": "explicit"},
471
- )
472
-
473
- assert result.exit_code == 0
474
- assert result.stdout == "explicit"
475
-
476
-
477
- @pytest.mark.anyio
478
- async def test_async_shell_command_does_not_block_other_tasks(
479
- tmp_path, monkeypatch
480
- ) -> None:
481
- runner = SandboxRunner(cwd=tmp_path)
482
- command = [
483
- "/bin/sh",
484
- "-c",
485
- "python - <<'PY'\nimport time\ntime.sleep(0.2)\nprint('done')\nPY",
486
- ]
487
- monkeypatch.setattr(
488
- runner,
489
- "build_command",
490
- lambda command: SandboxCommand(command, seccomp_available=False),
491
- )
492
- command_task = asyncio.create_task(runner.run_async(command, timeout_seconds=1))
493
- start = time.perf_counter()
494
- await asyncio.sleep(0.01)
495
- elapsed = time.perf_counter() - start
496
- result = await command_task
497
-
498
- assert elapsed < 0.1
499
- assert result.exit_code == 0
500
- assert "done" in result.stdout
501
-
502
-
503
- @pytest.mark.anyio
504
- async def test_async_shell_command_timeout_returns_failed_result(
505
- tmp_path, monkeypatch
506
- ) -> None:
507
- runner = SandboxRunner(cwd=tmp_path)
508
- command = [
509
- "/bin/sh",
510
- "-c",
511
- "python - <<'PY'\nimport time\ntime.sleep(1)\nprint('late')\nPY",
512
- ]
513
- monkeypatch.setattr(
514
- runner,
515
- "build_command",
516
- lambda command: SandboxCommand(command, seccomp_available=False),
517
- )
518
- result = await runner.run_async(
519
- command,
520
- timeout_seconds=0.05,
521
- )
522
-
523
- assert result.exit_code == 124
524
- assert "late" not in result.stdout
525
-
526
-
527
- @pytest.mark.anyio
528
- async def test_agent_stream_stops_after_cancelled_tool(tmp_path) -> None:
529
- cancelled = False
530
-
531
- async def fake_completion(**request: object) -> object:
532
- async def chunks() -> object:
533
- yield tool_call_chunk("shell_command", {"command": "slow"})
534
-
535
- return chunks()
536
-
537
- async def fake_runner(
538
- name: str, arguments: dict[str, object], context: ToolContext
539
- ):
540
- nonlocal cancelled
541
- try:
542
- await asyncio.sleep(10)
543
- except asyncio.CancelledError:
544
- cancelled = True
545
- raise
546
-
547
- stream = run_agent_stream(
548
- completion=fake_completion,
549
- connection=ProviderConnection(
550
- base_url=None,
551
- model="gpt-5.1",
552
- name="OpenAI",
553
- provider=ProviderFormat.OPENAI,
554
- secret_reference="sk-local",
555
- ),
556
- cwd=tmp_path,
557
- messages=[{"role": "user", "content": "Run it."}],
558
- tool_runner=fake_runner,
559
- )
560
-
561
- await stream.__anext__()
562
- await stream.__anext__()
563
- await stream.__anext__()
564
- next_event = asyncio.create_task(stream.__anext__())
565
- await asyncio.sleep(0)
566
- next_event.cancel()
567
- with pytest.raises(asyncio.CancelledError):
568
- await next_event
569
- await stream.aclose()
570
-
571
- assert cancelled
572
-
573
-
574
- def test_shell_command_denies_ptrace_when_seccomp_is_available(tmp_path) -> None:
575
- command = SandboxRunner(cwd=tmp_path).build_command(["/bin/true"])
576
- if not command.seccomp_available:
577
- assert command.args[0].endswith("bwrap")
578
- return
579
-
580
- result = run_tool(
581
- "shell_command",
582
- {
583
- "command": "python - <<'PY'\nimport ctypes, os\nprint(ctypes.CDLL(None).ptrace(0, 0, None, None))\nPY"
584
- },
585
- ToolContext(cwd=tmp_path),
586
- )
587
-
588
- assert not result.ok or "-1" in result.content
589
-
590
-
591
- def test_apply_patch_modifies_workdir_file(tmp_path) -> None:
592
- target = tmp_path / "notes.txt"
593
- target.write_text("alpha\nbeta\n")
594
- patch = """*** Begin Patch
595
- *** Update File: notes.txt
596
- @@
597
- -beta
598
- +ready
599
- *** End Patch
600
- """
601
-
602
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
603
-
604
- assert result.ok
605
- assert result.title == "Edited notes.txt"
606
- assert target.read_text() == "alpha\nready\n"
607
-
608
-
609
- def test_apply_patch_added_file_title(tmp_path) -> None:
610
- patch = """*** Begin Patch
611
- *** Add File: created.txt
612
- +hello
613
- *** End Patch
614
- """
615
-
616
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
617
-
618
- assert result.ok
619
- assert result.title == "Added created.txt"
620
- assert (tmp_path / "created.txt").read_text() == "hello\n"
621
-
622
-
623
- def test_apply_patch_deleted_file_title(tmp_path) -> None:
624
- target = tmp_path / "old.txt"
625
- target.write_text("remove me\n")
626
- patch = """*** Begin Patch
627
- *** Delete File: old.txt
628
- *** End Patch
629
- """
630
-
631
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
632
-
633
- assert result.ok
634
- assert result.title == "Deleted old.txt"
635
- assert not target.exists()
636
-
637
-
638
- def test_apply_patch_multiple_files_title(tmp_path) -> None:
639
- target = tmp_path / "notes.txt"
640
- target.write_text("alpha\nbeta\n")
641
- patch = """*** Begin Patch
642
- *** Update File: notes.txt
643
- @@
644
- -beta
645
- +ready
646
- *** Add File: created.txt
647
- +hello
648
- *** End Patch
649
- """
650
-
651
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
652
-
653
- assert result.ok
654
- assert result.title == "Edited 2 files"
655
- assert target.read_text() == "alpha\nready\n"
656
- assert (tmp_path / "created.txt").read_text() == "hello\n"
657
-
658
-
659
- def test_apply_patch_rejects_outside_workdir_file(tmp_path) -> None:
660
- outside = Path(__file__).resolve().parent / "outside-patch.txt"
661
- outside.write_text("alpha\n")
662
- try:
663
- patch = f"""*** Begin Patch
664
- *** Update File: {outside}
665
- @@
666
- -alpha
667
- +beta
668
- *** End Patch
669
- """
670
-
671
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
672
-
673
- assert not result.ok
674
- assert result.title == "Edit failed"
675
- assert outside.read_text() == "alpha\n"
676
- finally:
677
- outside.unlink(missing_ok=True)
678
-
679
-
680
- def test_apply_patch_uses_internal_subcommand(tmp_path, monkeypatch) -> None:
681
- calls: list[list[str]] = []
682
-
683
- def fake_run(self, command, **kwargs):
684
- calls.append(command)
685
- from flowent.sandbox import CommandResult
686
-
687
- return CommandResult(
688
- command=" ".join(command), exit_code=0, stderr="", stdout="{}"
689
- )
690
-
691
- monkeypatch.setattr(SandboxRunner, "run", fake_run)
692
- patch = """*** Begin Patch
693
- *** Add File: created.txt
694
- +hello
695
- *** End Patch
696
- """
697
-
698
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
699
-
700
- assert result.ok
701
- assert result.title == "Edited files"
702
- assert calls
703
- assert calls[0][1:4] == ["-m", "flowent.cli", "apply-patch"]
704
-
705
-
706
- def test_apply_patch_reports_patch_error_when_stderr_has_warning(
707
- tmp_path, monkeypatch
708
- ) -> None:
709
- def fake_run(self, command, **kwargs):
710
- from flowent.sandbox import CommandResult
711
-
712
- return CommandResult(
713
- command=" ".join(command),
714
- exit_code=1,
715
- stderr="RuntimeWarning: flowent.cli was already imported\n",
716
- stdout='{"error": "Patch context was not found."}\n',
717
- )
718
-
719
- monkeypatch.setattr(SandboxRunner, "run", fake_run)
720
- patch = """*** Begin Patch
721
- *** Update File: notes.txt
722
- @@
723
- -missing
724
- +ready
725
- *** End Patch
726
- """
727
-
728
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
729
-
730
- assert not result.ok
731
- assert result.title == "Edit failed"
732
- assert result.content == "Patch context was not found."
733
-
734
-
735
- def test_web_search_result_enters_tool_output(tmp_path) -> None:
736
- def fake_search(query: str):
737
- return [{"title": "Result", "url": "https://example.test", "snippet": query}]
738
-
739
- result = run_tool(
740
- "web_search",
741
- {"query": "release checklist"},
742
- ToolContext(cwd=tmp_path, web_searcher=fake_search),
743
- )
744
-
745
- assert result.ok
746
- assert "https://example.test" in result.content
747
-
748
-
749
- def test_agent_continues_until_final_text_after_multiple_tool_rounds(
750
- tmp_path, monkeypatch
751
- ) -> None:
752
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
753
- workdir = tmp_path / "workdir"
754
- workdir.mkdir()
755
- (workdir / "notes.txt").write_text("Launch notes")
756
- monkeypatch.chdir(workdir)
757
- captured_requests: list[dict[str, object]] = []
758
-
759
- async def fake_completion(**request: object) -> object:
760
- captured_requests.append(request)
761
-
762
- async def chunks() -> object:
763
- if len(captured_requests) == 1:
764
- yield tool_call_chunk("list_dir", {"path": "."}, call_id="call-list")
765
- elif len(captured_requests) == 2:
766
- yield tool_call_chunk(
767
- "read_file", {"path": "notes.txt"}, call_id="call-read"
768
- )
769
- else:
770
- yield text_chunk("The notes are ready.")
771
-
772
- return chunks()
773
-
774
- client = TestClient(
775
- create_app(serve_frontend=False, chat_completion=fake_completion)
776
- )
777
- configure_provider(client)
778
-
779
- response = client.post(
780
- "/api/workspace/respond",
781
- json={"content": "Inspect the workspace."},
782
- )
783
-
784
- assert response.status_code == 200
785
- events = stream_events(response.text)
786
- assert [event["event"] for event in events] == [
787
- "start",
788
- "output_start",
789
- "tool_start",
790
- "tool_done",
791
- "output_start",
792
- "tool_start",
793
- "tool_done",
794
- "output_start",
795
- "delta",
796
- "done",
797
- ]
798
- assert len(captured_requests) == 3
799
- assert captured_requests[2]["messages"][-1] == {
800
- "role": "tool",
801
- "tool_call_id": "call-read",
802
- "content": "Launch notes",
803
- }
804
- assert events[-1]["data"]["message"]["content"] == "The notes are ready."
805
-
806
-
807
- def test_agent_finishes_without_tools(tmp_path, monkeypatch) -> None:
808
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
809
- monkeypatch.chdir(tmp_path)
810
- captured_requests: list[dict[str, object]] = []
811
-
812
- async def fake_completion(**request: object) -> object:
813
- captured_requests.append(request)
814
-
815
- async def chunks() -> object:
816
- yield text_chunk("Direct answer.")
817
-
818
- return chunks()
819
-
820
- client = TestClient(
821
- create_app(serve_frontend=False, chat_completion=fake_completion)
822
- )
823
- configure_provider(client)
824
-
825
- response = client.post(
826
- "/api/workspace/respond",
827
- json={"content": "Answer directly."},
828
- )
829
-
830
- assert response.status_code == 200
831
- events = stream_events(response.text)
832
- assert [event["event"] for event in events] == [
833
- "start",
834
- "output_start",
835
- "delta",
836
- "done",
837
- ]
838
- assert len(captured_requests) == 1
839
- assert events[-1]["data"]["message"]["content"] == "Direct answer."
840
-
841
-
842
- def test_agent_streams_and_persists_thinking(tmp_path, monkeypatch) -> None:
843
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
844
- monkeypatch.chdir(tmp_path)
845
-
846
- async def fake_completion(**request: object) -> object:
847
- async def chunks() -> object:
848
- yield thinking_chunk("Checking context.")
849
- yield thinking_chunk(" Preparing answer.")
850
- yield text_chunk("Direct answer.")
851
-
852
- return chunks()
853
-
854
- client = TestClient(
855
- create_app(serve_frontend=False, chat_completion=fake_completion)
856
- )
857
- configure_provider(client)
858
-
859
- response = client.post(
860
- "/api/workspace/respond",
861
- json={"content": "Answer directly."},
862
- )
863
-
864
- assert response.status_code == 200
865
- events = stream_events(response.text)
866
- assert [event["event"] for event in events] == [
867
- "start",
868
- "output_start",
869
- "thinking_delta",
870
- "thinking_delta",
871
- "delta",
872
- "done",
873
- ]
874
- assert events[2]["data"] == {"content": "Checking context."}
875
- assert events[-1]["data"]["message"]["thinking"] == (
876
- "Checking context. Preparing answer."
877
- )
878
- state = client.get("/api/state").json()
879
- assert state["messages"][-1]["thinking"] == ("Checking context. Preparing answer.")
880
-
881
-
882
- def test_tool_failure_is_reported_and_agent_continues(tmp_path, monkeypatch) -> None:
883
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
884
- monkeypatch.chdir(tmp_path)
885
- captured_requests: list[dict[str, object]] = []
886
-
887
- async def fake_completion(**request: object) -> object:
888
- captured_requests.append(request)
889
-
890
- async def chunks() -> object:
891
- if len(captured_requests) == 1:
892
- yield tool_call_chunk("read_file", {"path": "missing.txt"})
893
- else:
894
- yield text_chunk("I could not read it.")
895
-
896
- return chunks()
897
-
898
- client = TestClient(
899
- create_app(serve_frontend=False, chat_completion=fake_completion)
900
- )
901
- configure_provider(client)
902
-
903
- response = client.post(
904
- "/api/workspace/respond",
905
- json={"content": "Read it."},
906
- )
907
-
908
- events = stream_events(response.text)
909
- assert "tool_error" in [event["event"] for event in events]
910
- assert len(captured_requests) == 2
911
- assert captured_requests[1]["messages"][-1]["role"] == "tool"
912
- assert captured_requests[1]["messages"][-1]["tool_call_id"] == "call-1"
913
- assert "missing.txt" in captured_requests[1]["messages"][-1]["content"]
914
- assert events[-1]["data"]["message"]["content"] == "I could not read it."
915
-
916
-
917
- @pytest.mark.anyio
918
- async def test_approval_denial_result_is_sent_to_agent(tmp_path) -> None:
919
- captured_requests: list[dict[str, object]] = []
920
-
921
- async def fake_completion(**request: object) -> object:
922
- captured_requests.append(request)
923
-
924
- async def chunks() -> object:
925
- if len(captured_requests) == 1:
926
- yield tool_call_chunk(
927
- "shell_command",
928
- {"command": "rm -rf /important"},
929
- )
930
- else:
931
- yield text_chunk("I need explicit approval for that risk.")
932
-
933
- return chunks()
934
-
935
- async def denying_tool_runner(
936
- name: str,
937
- arguments: dict[str, object],
938
- context: ToolContext,
939
- ) -> ToolResult:
940
- return ToolResult(
941
- content=(
942
- "Automatic approval review denied this action as high risk: "
943
- "The command can delete broad data. The agent must not work around "
944
- "this denial."
945
- ),
946
- ok=False,
947
- title="Denied by reviewer",
948
- )
949
-
950
- events = [
951
- event
952
- async for event in run_agent_stream(
953
- completion=fake_completion,
954
- connection=ProviderConnection(
955
- model="gpt-5.1",
956
- name="Provider",
957
- provider=ProviderFormat.OPENAI,
958
- secret_reference="secret",
959
- ),
960
- cwd=tmp_path,
961
- messages=[{"role": "user", "content": "Delete the important directory."}],
962
- tool_runner=denying_tool_runner,
963
- )
964
- ]
965
-
966
- assert len(captured_requests) == 2
967
- assert captured_requests[1]["messages"][-1]["role"] == "tool"
968
- assert "Automatic approval review denied this action" in str(
969
- captured_requests[1]["messages"][-1]["content"]
970
- )
971
- assert "must not work around" in str(
972
- captured_requests[1]["messages"][-1]["content"]
973
- )
974
- assert events[-2].data["content"] == "I need explicit approval for that risk."
975
- assert events[-1].data["message"]["content"] == (
976
- "I need explicit approval for that risk."
977
- )
978
-
979
-
980
- def test_update_plan_outputs_plan_state(tmp_path) -> None:
981
- result = run_tool(
982
- "update_plan",
983
- {"items": [{"step": "Read files", "status": "completed"}]},
984
- ToolContext(cwd=tmp_path),
985
- )
986
-
987
- assert result.ok
988
- assert "Read files" in result.content