flowent 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/backend/pyproject.toml +31 -5
  2. package/backend/src/flowent/agent.py +13 -4
  3. package/backend/src/flowent/compact.py +35 -14
  4. package/backend/src/flowent/llm.py +73 -7
  5. package/backend/src/flowent/main.py +260 -59
  6. package/backend/src/flowent/static/assets/index-CRSV2xu1.css +2 -0
  7. package/backend/src/flowent/static/assets/index-DUYj6rgD.js +82 -0
  8. package/backend/src/flowent/static/index.html +2 -2
  9. package/backend/src/flowent/storage.py +135 -3
  10. package/backend/src/flowent/usage.py +315 -0
  11. package/backend/uv.lock +971 -3
  12. package/dist/frontend/assets/index-CRSV2xu1.css +2 -0
  13. package/dist/frontend/assets/index-DUYj6rgD.js +82 -0
  14. package/dist/frontend/index.html +2 -2
  15. package/package.json +24 -3
  16. package/backend/src/flowent/__pycache__/__init__.cpython-313.pyc +0 -0
  17. package/backend/src/flowent/__pycache__/_version.cpython-313.pyc +0 -0
  18. package/backend/src/flowent/__pycache__/agent.cpython-313.pyc +0 -0
  19. package/backend/src/flowent/__pycache__/approval.cpython-313.pyc +0 -0
  20. package/backend/src/flowent/__pycache__/channels.cpython-313.pyc +0 -0
  21. package/backend/src/flowent/__pycache__/cli.cpython-313.pyc +0 -0
  22. package/backend/src/flowent/__pycache__/compact.cpython-313.pyc +0 -0
  23. package/backend/src/flowent/__pycache__/context.cpython-313.pyc +0 -0
  24. package/backend/src/flowent/__pycache__/llm.cpython-313.pyc +0 -0
  25. package/backend/src/flowent/__pycache__/logging.cpython-313.pyc +0 -0
  26. package/backend/src/flowent/__pycache__/main.cpython-313.pyc +0 -0
  27. package/backend/src/flowent/__pycache__/mcp.cpython-313.pyc +0 -0
  28. package/backend/src/flowent/__pycache__/mcp_import.cpython-313.pyc +0 -0
  29. package/backend/src/flowent/__pycache__/patch.cpython-313.pyc +0 -0
  30. package/backend/src/flowent/__pycache__/paths.cpython-313.pyc +0 -0
  31. package/backend/src/flowent/__pycache__/permissions.cpython-313.pyc +0 -0
  32. package/backend/src/flowent/__pycache__/sandbox.cpython-313.pyc +0 -0
  33. package/backend/src/flowent/__pycache__/skills.cpython-313.pyc +0 -0
  34. package/backend/src/flowent/__pycache__/storage.cpython-313.pyc +0 -0
  35. package/backend/src/flowent/__pycache__/tools.cpython-313.pyc +0 -0
  36. package/backend/src/flowent/static/assets/index-BlaCigkZ.js +0 -82
  37. package/backend/src/flowent/static/assets/index-CRvbsH4K.css +0 -2
  38. package/backend/tests/__pycache__/conftest.cpython-313-pytest-9.0.3.pyc +0 -0
  39. package/backend/tests/__pycache__/test_agent_tools.cpython-313-pytest-9.0.3.pyc +0 -0
  40. package/backend/tests/__pycache__/test_approval.cpython-313-pytest-9.0.3.pyc +0 -0
  41. package/backend/tests/__pycache__/test_channels.cpython-313-pytest-9.0.3.pyc +0 -0
  42. package/backend/tests/__pycache__/test_health.cpython-313-pytest-9.0.3.pyc +0 -0
  43. package/backend/tests/__pycache__/test_llm_providers.cpython-313-pytest-9.0.3.pyc +0 -0
  44. package/backend/tests/__pycache__/test_logging.cpython-313-pytest-9.0.3.pyc +0 -0
  45. package/backend/tests/__pycache__/test_mcp.cpython-313-pytest-9.0.3.pyc +0 -0
  46. package/backend/tests/__pycache__/test_patch.cpython-313-pytest-9.0.3.pyc +0 -0
  47. package/backend/tests/__pycache__/test_permissions.cpython-313-pytest-9.0.3.pyc +0 -0
  48. package/backend/tests/__pycache__/test_persistence.cpython-313-pytest-9.0.3.pyc +0 -0
  49. package/backend/tests/__pycache__/test_skills.cpython-313-pytest-9.0.3.pyc +0 -0
  50. package/backend/tests/__pycache__/test_startup_requirements.cpython-313-pytest-9.0.3.pyc +0 -0
  51. package/backend/tests/__pycache__/test_workspace_chat.cpython-313-pytest-9.0.3.pyc +0 -0
  52. package/backend/tests/conftest.py +0 -60
  53. package/backend/tests/test_agent_tools.py +0 -1124
  54. package/backend/tests/test_approval.py +0 -283
  55. package/backend/tests/test_channels.py +0 -360
  56. package/backend/tests/test_health.py +0 -12
  57. package/backend/tests/test_llm_providers.py +0 -548
  58. package/backend/tests/test_logging.py +0 -212
  59. package/backend/tests/test_mcp.py +0 -788
  60. package/backend/tests/test_patch.py +0 -112
  61. package/backend/tests/test_permissions.py +0 -588
  62. package/backend/tests/test_persistence.py +0 -249
  63. package/backend/tests/test_skills.py +0 -462
  64. package/backend/tests/test_startup_requirements.py +0 -144
  65. package/backend/tests/test_workspace_chat.py +0 -2174
  66. package/dist/frontend/assets/index-BlaCigkZ.js +0 -82
  67. package/dist/frontend/assets/index-CRvbsH4K.css +0 -2
@@ -1,1124 +0,0 @@
1
- import asyncio
2
- import json
3
- import logging
4
- import subprocess
5
- import time
6
- from pathlib import Path
7
-
8
- import pytest
9
- from fastapi.testclient import TestClient
10
-
11
- from flowent.agent import FLOWENT_AGENT_SYSTEM_PROMPT, run_agent_stream
12
- from flowent.llm import ProviderConnection, ProviderFormat
13
- from flowent.main import create_app
14
- from flowent.sandbox import SandboxCommand, SandboxRunner
15
- from flowent.tools import ToolContext, ToolResult, run_tool
16
-
17
-
18
- def stream_events(content: str) -> list[dict[str, object]]:
19
- events: list[dict[str, object]] = []
20
- for raw_event in content.strip().split("\n\n"):
21
- event_type = ""
22
- data = ""
23
- for line in raw_event.splitlines():
24
- if line.startswith("event: "):
25
- event_type = line.removeprefix("event: ")
26
- if line.startswith("data: "):
27
- data = line.removeprefix("data: ")
28
- events.append({"event": event_type, "data": json.loads(data)})
29
- return events
30
-
31
-
32
- def configure_provider(client: TestClient) -> None:
33
- client.post(
34
- "/api/providers",
35
- json={
36
- "api_key": "sk-local",
37
- "base_url": "",
38
- "id": "provider-openai",
39
- "models": ["gpt-5.1"],
40
- "name": "OpenAI",
41
- "type": "openai",
42
- },
43
- )
44
- client.put(
45
- "/api/settings",
46
- json={
47
- "reasoning_effort": "default",
48
- "selected_model": "gpt-5.1",
49
- "selected_provider_id": "provider-openai",
50
- },
51
- )
52
-
53
-
54
- def tool_call_chunk(
55
- name: str, arguments: dict[str, object], call_id: str = "call-1"
56
- ) -> dict[str, object]:
57
- return {
58
- "choices": [
59
- {
60
- "delta": {
61
- "tool_calls": [
62
- {
63
- "index": 0,
64
- "id": call_id,
65
- "type": "function",
66
- "function": {
67
- "name": name,
68
- "arguments": json.dumps(arguments),
69
- },
70
- }
71
- ]
72
- }
73
- }
74
- ]
75
- }
76
-
77
-
78
- def text_chunk(content: str) -> dict[str, object]:
79
- return {"choices": [{"delta": {"content": content}}]}
80
-
81
-
82
- def thinking_chunk(content: str) -> dict[str, object]:
83
- return {"choices": [{"delta": {"reasoning_content": content}}]}
84
-
85
-
86
- def test_workspace_response_streams_tool_process_and_final_text(
87
- tmp_path, monkeypatch
88
- ) -> None:
89
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
90
- workdir = tmp_path / "workdir"
91
- workdir.mkdir()
92
- (workdir / "notes.txt").write_text("Launch notes")
93
- monkeypatch.chdir(workdir)
94
- captured_requests: list[dict[str, object]] = []
95
-
96
- async def fake_completion(**request: object) -> object:
97
- captured_requests.append(request)
98
-
99
- async def chunks() -> object:
100
- if len(captured_requests) == 1:
101
- yield tool_call_chunk("read_file", {"path": "notes.txt"})
102
- else:
103
- yield text_chunk("Read the notes.")
104
-
105
- return chunks()
106
-
107
- client = TestClient(
108
- create_app(serve_frontend=False, chat_completion=fake_completion)
109
- )
110
- configure_provider(client)
111
-
112
- response = client.post(
113
- "/api/workspace/respond",
114
- json={"content": "Use the notes."},
115
- )
116
-
117
- assert response.status_code == 200
118
- events = stream_events(response.text)
119
- assert [event["event"] for event in events] == [
120
- "start",
121
- "output_start",
122
- "tool_start",
123
- "tool_done",
124
- "output_start",
125
- "delta",
126
- "done",
127
- ]
128
- assert events[1]["data"] == {"index": 1}
129
- assert events[2]["data"]["tool"]["status"] == "running"
130
- assert events[3]["data"]["status"] == "success"
131
- assert events[4]["data"] == {"index": 2}
132
- assert events[5]["data"] == {"content": "Read the notes."}
133
- assert events[6]["data"]["message"]["content"] == "Read the notes."
134
- assert len(captured_requests) == 2
135
- assert captured_requests[0]["messages"][0] == {
136
- "role": "system",
137
- "content": FLOWENT_AGENT_SYSTEM_PROMPT,
138
- }
139
- second_messages = captured_requests[1]["messages"]
140
- assert second_messages[0] == {
141
- "role": "system",
142
- "content": FLOWENT_AGENT_SYSTEM_PROMPT,
143
- }
144
- assert second_messages[-2]["tool_calls"][0]["function"]["name"] == "read_file"
145
- assert second_messages[-1] == {
146
- "role": "tool",
147
- "tool_call_id": "call-1",
148
- "content": "Launch notes",
149
- }
150
-
151
-
152
- def test_tools_can_read_paths_outside_workdir(tmp_path) -> None:
153
- outside = tmp_path / "outside.txt"
154
- outside.write_text("outside content")
155
-
156
- result = run_tool(
157
- "read_file", {"path": str(outside)}, ToolContext(cwd=tmp_path / "work")
158
- )
159
-
160
- assert result.ok
161
- assert result.content == "outside content"
162
-
163
-
164
- def test_list_dir_can_list_paths_outside_workdir(tmp_path) -> None:
165
- outside = tmp_path / "outside"
166
- outside.mkdir()
167
- (outside / "file.txt").write_text("content")
168
-
169
- result = run_tool(
170
- "list_dir", {"path": str(outside)}, ToolContext(cwd=tmp_path / "work")
171
- )
172
-
173
- assert result.ok
174
- assert "file.txt" in result.content
175
-
176
-
177
- def test_grep_files_can_search_paths_outside_workdir(tmp_path) -> None:
178
- outside = tmp_path / "outside"
179
- outside.mkdir()
180
- (outside / "file.txt").write_text("alpha beta")
181
-
182
- result = run_tool(
183
- "grep_files",
184
- {"pattern": "alpha", "path": str(outside)},
185
- ToolContext(cwd=tmp_path / "work"),
186
- )
187
-
188
- assert result.ok
189
- assert "file.txt" in result.content
190
-
191
-
192
- def test_shell_command_can_write_workdir_and_tmp(tmp_path) -> None:
193
- result = run_tool(
194
- "shell_command",
195
- {"command": "echo ok > work.txt && echo tmp > /tmp/flowent-tool-test.txt"},
196
- ToolContext(cwd=tmp_path),
197
- )
198
-
199
- assert result.ok
200
- assert (tmp_path / "work.txt").read_text().strip() == "ok"
201
-
202
-
203
- def test_shell_command_cannot_write_outside_workdir_and_tmp(tmp_path) -> None:
204
- outside = Path("/project/flowent/backend/tests/flowent-outside-denied.txt")
205
- if outside.exists():
206
- outside.unlink()
207
-
208
- result = run_tool(
209
- "shell_command",
210
- {"command": f"echo denied > {outside}"},
211
- ToolContext(cwd=tmp_path),
212
- )
213
-
214
- assert not result.ok
215
- assert not outside.exists()
216
-
217
-
218
- def test_shell_command_has_network_by_default(tmp_path) -> None:
219
- result = run_tool(
220
- "shell_command",
221
- {
222
- "command": "python - <<'PY'\nimport socket\ns=socket.socket()\nprint('network-ready')\nPY"
223
- },
224
- ToolContext(cwd=tmp_path),
225
- )
226
-
227
- assert result.ok
228
- assert "network-ready" in result.content
229
-
230
-
231
- def test_sandbox_command_keeps_proc_mount_when_preflight_succeeds(
232
- tmp_path, monkeypatch
233
- ) -> None:
234
- runner = SandboxRunner(cwd=tmp_path)
235
- monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: True)
236
-
237
- command = runner.build_command(["/bin/true"])
238
-
239
- assert command.args[command.args.index("--proc") + 1] == "/proc"
240
-
241
-
242
- def test_sandbox_command_omits_proc_mount_when_preflight_reports_permission_error(
243
- tmp_path, monkeypatch
244
- ) -> None:
245
- runner = SandboxRunner(cwd=tmp_path)
246
- monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
247
-
248
- command = runner.build_command(["/bin/true"])
249
-
250
- assert "--proc" not in command.args
251
-
252
-
253
- def test_sandbox_command_binds_writable_socket_path(tmp_path, monkeypatch) -> None:
254
- socket_path = tmp_path / "docker.sock"
255
- socket_path.touch()
256
- runner = SandboxRunner(cwd=tmp_path, writable_roots=[socket_path])
257
- monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
258
-
259
- command = runner.build_command(["/bin/true"])
260
-
261
- bind_index = command.args.index(str(socket_path))
262
- assert command.args[bind_index - 1] == "--bind"
263
- assert command.args[bind_index + 1] == str(socket_path)
264
-
265
-
266
- def test_sandbox_proc_preflight_does_not_hide_non_proc_errors(
267
- tmp_path, monkeypatch
268
- ) -> None:
269
- bwrap = tmp_path / "bwrap"
270
- bwrap.write_text("#!/bin/sh\necho 'bwrap: unrelated startup failure' >&2\nexit 1\n")
271
- bwrap.chmod(0o700)
272
- monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
273
-
274
- assert SandboxRunner(cwd=tmp_path).build_command(["/bin/true"]).args[0:7] == [
275
- str(bwrap),
276
- "--ro-bind",
277
- "/",
278
- "/",
279
- "--dev",
280
- "/dev",
281
- "--proc",
282
- ]
283
-
284
-
285
- def test_shell_command_runs_without_proc_mount_after_preflight_fallback(
286
- tmp_path, monkeypatch
287
- ) -> None:
288
- bwrap = tmp_path / "bwrap"
289
- bwrap.write_text(
290
- "#!/bin/sh\n"
291
- 'for arg in "$@"; do\n'
292
- ' if [ "$arg" = --proc ]; then\n'
293
- ' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
294
- " exit 1\n"
295
- " fi\n"
296
- "done\n"
297
- 'while [ "$#" -gt 0 ]; do\n'
298
- ' if [ "$1" = -- ]; then\n'
299
- " shift\n"
300
- ' exec "$@"\n'
301
- " fi\n"
302
- " shift\n"
303
- "done\n"
304
- )
305
- bwrap.chmod(0o700)
306
- monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
307
-
308
- result = SandboxRunner(cwd=tmp_path).run(["/bin/sh", "-c", "printf ok"])
309
-
310
- assert result.exit_code == 0
311
- assert result.stdout == "ok"
312
-
313
-
314
- def test_apply_patch_runs_without_proc_mount_after_preflight_fallback(
315
- tmp_path, monkeypatch
316
- ) -> None:
317
- bwrap = tmp_path / "bwrap"
318
- bwrap.write_text(
319
- "#!/bin/sh\n"
320
- 'for arg in "$@"; do\n'
321
- ' if [ "$arg" = --proc ]; then\n'
322
- ' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
323
- " exit 1\n"
324
- " fi\n"
325
- "done\n"
326
- 'while [ "$#" -gt 0 ]; do\n'
327
- ' if [ "$1" = -- ]; then\n'
328
- " shift\n"
329
- ' exec "$@"\n'
330
- " fi\n"
331
- " shift\n"
332
- "done\n"
333
- )
334
- bwrap.chmod(0o700)
335
- monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
336
- target = tmp_path / "notes.txt"
337
- target.write_text("alpha\n")
338
- patch = """*** Begin Patch
339
- *** Update File: notes.txt
340
- @@
341
- -alpha
342
- +beta
343
- *** End Patch
344
- """
345
-
346
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
347
-
348
- assert result.ok
349
- assert target.read_text() == "beta\n"
350
-
351
-
352
- def test_shell_command_environment_omits_development_variables(
353
- tmp_path, monkeypatch
354
- ) -> None:
355
- monkeypatch.setenv("NODE_ENV", "production")
356
- monkeypatch.setenv("VIRTUAL_ENV", "/tmp/flowent-venv")
357
- monkeypatch.setenv("PYTHONPATH", "/tmp/flowent-pythonpath")
358
- runner = SandboxRunner(cwd=tmp_path)
359
- monkeypatch.setattr(
360
- runner,
361
- "build_command",
362
- lambda command: SandboxCommand(command, seccomp_available=False),
363
- )
364
-
365
- result = runner.run(
366
- [
367
- "/bin/sh",
368
- "-c",
369
- 'printf \'%s|%s|%s\' "${NODE_ENV-unset}" "${VIRTUAL_ENV-unset}" "${PYTHONPATH-unset}"',
370
- ]
371
- )
372
-
373
- assert result.exit_code == 0
374
- assert result.stdout == "unset|unset|unset"
375
-
376
-
377
- def test_shell_command_environment_omits_sensitive_variables(
378
- tmp_path, monkeypatch
379
- ) -> None:
380
- monkeypatch.setenv("OPENAI_API_KEY", "sk-local")
381
- monkeypatch.setenv("SECRET_TOKEN", "secret")
382
- monkeypatch.setenv("NPM_TOKEN", "npm")
383
- runner = SandboxRunner(cwd=tmp_path)
384
- monkeypatch.setattr(
385
- runner,
386
- "build_command",
387
- lambda command: SandboxCommand(command, seccomp_available=False),
388
- )
389
-
390
- result = runner.run(
391
- [
392
- "/bin/sh",
393
- "-c",
394
- 'printf \'%s|%s|%s\' "${OPENAI_API_KEY-unset}" "${SECRET_TOKEN-unset}" "${NPM_TOKEN-unset}"',
395
- ]
396
- )
397
-
398
- assert result.exit_code == 0
399
- assert result.stdout == "unset|unset|unset"
400
-
401
-
402
- def test_shell_command_environment_keeps_core_variables(tmp_path, monkeypatch) -> None:
403
- monkeypatch.setenv("HOME", str(tmp_path / "home"))
404
- monkeypatch.setenv("PATH", "/usr/local/bin:/usr/bin:/bin")
405
- monkeypatch.setenv("SHELL", "/bin/sh")
406
- monkeypatch.setenv("USER", "flowent")
407
- runner = SandboxRunner(cwd=tmp_path)
408
- monkeypatch.setattr(
409
- runner,
410
- "build_command",
411
- lambda command: SandboxCommand(command, seccomp_available=False),
412
- )
413
-
414
- result = runner.run(
415
- [
416
- "/bin/sh",
417
- "-c",
418
- 'printf \'%s|%s|%s|%s\' "$HOME" "$PATH" "$SHELL" "$USER"',
419
- ]
420
- )
421
-
422
- assert result.exit_code == 0
423
- assert (
424
- result.stdout
425
- == f"{tmp_path / 'home'}|/usr/local/bin:/usr/bin:/bin|/bin/sh|flowent"
426
- )
427
-
428
-
429
- def test_shell_command_environment_uses_default_path_when_missing(
430
- tmp_path, monkeypatch
431
- ) -> None:
432
- monkeypatch.delenv("PATH", raising=False)
433
- runner = SandboxRunner(cwd=tmp_path)
434
- captured_env: dict[str, str] = {}
435
-
436
- def fake_run(*args, **kwargs):
437
- captured_env.update(kwargs["env"])
438
- return subprocess.CompletedProcess(
439
- args=args[0], returncode=0, stdout="", stderr=""
440
- )
441
-
442
- monkeypatch.setattr(
443
- runner,
444
- "build_command",
445
- lambda command: SandboxCommand(command, seccomp_available=False),
446
- )
447
- monkeypatch.setattr("subprocess.run", fake_run)
448
-
449
- result = runner.run(["/bin/sh", "-c", "true"])
450
-
451
- assert result.exit_code == 0
452
- assert (
453
- captured_env["PATH"]
454
- == "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
455
- )
456
-
457
-
458
- def test_shell_command_environment_accepts_explicit_overrides(
459
- tmp_path, monkeypatch
460
- ) -> None:
461
- monkeypatch.delenv("FLOWENT_TOOL_VAR", raising=False)
462
- runner = SandboxRunner(cwd=tmp_path)
463
- monkeypatch.setattr(
464
- runner,
465
- "build_command",
466
- lambda command: SandboxCommand(command, seccomp_available=False),
467
- )
468
-
469
- result = runner.run(
470
- ["/bin/sh", "-c", "printf '%s' \"$FLOWENT_TOOL_VAR\""],
471
- env={"FLOWENT_TOOL_VAR": "explicit"},
472
- )
473
-
474
- assert result.exit_code == 0
475
- assert result.stdout == "explicit"
476
-
477
-
478
- @pytest.mark.anyio
479
- async def test_async_shell_command_does_not_block_other_tasks(
480
- tmp_path, monkeypatch
481
- ) -> None:
482
- runner = SandboxRunner(cwd=tmp_path)
483
- command = [
484
- "/bin/sh",
485
- "-c",
486
- "python - <<'PY'\nimport time\ntime.sleep(0.2)\nprint('done')\nPY",
487
- ]
488
- monkeypatch.setattr(
489
- runner,
490
- "build_command",
491
- lambda command: SandboxCommand(command, seccomp_available=False),
492
- )
493
- command_task = asyncio.create_task(runner.run_async(command, timeout_seconds=1))
494
- start = time.perf_counter()
495
- await asyncio.sleep(0.01)
496
- elapsed = time.perf_counter() - start
497
- result = await command_task
498
-
499
- assert elapsed < 0.1
500
- assert result.exit_code == 0
501
- assert "done" in result.stdout
502
-
503
-
504
- @pytest.mark.anyio
505
- async def test_async_shell_command_timeout_returns_failed_result(
506
- tmp_path, monkeypatch
507
- ) -> None:
508
- runner = SandboxRunner(cwd=tmp_path)
509
- command = [
510
- "/bin/sh",
511
- "-c",
512
- "python - <<'PY'\nimport time\ntime.sleep(1)\nprint('late')\nPY",
513
- ]
514
- monkeypatch.setattr(
515
- runner,
516
- "build_command",
517
- lambda command: SandboxCommand(command, seccomp_available=False),
518
- )
519
- result = await runner.run_async(
520
- command,
521
- timeout_seconds=0.05,
522
- )
523
-
524
- assert result.exit_code == 124
525
- assert "late" not in result.stdout
526
-
527
-
528
- @pytest.mark.anyio
529
- async def test_agent_stream_stops_after_cancelled_tool(tmp_path) -> None:
530
- cancelled = False
531
-
532
- async def fake_completion(**request: object) -> object:
533
- async def chunks() -> object:
534
- yield tool_call_chunk("shell_command", {"command": "slow"})
535
-
536
- return chunks()
537
-
538
- async def fake_runner(
539
- name: str, arguments: dict[str, object], context: ToolContext
540
- ):
541
- nonlocal cancelled
542
- try:
543
- await asyncio.sleep(10)
544
- except asyncio.CancelledError:
545
- cancelled = True
546
- raise
547
-
548
- stream = run_agent_stream(
549
- completion=fake_completion,
550
- connection=ProviderConnection(
551
- base_url=None,
552
- model="gpt-5.1",
553
- name="OpenAI",
554
- provider=ProviderFormat.OPENAI,
555
- secret_reference="sk-local",
556
- ),
557
- cwd=tmp_path,
558
- messages=[{"role": "user", "content": "Run it."}],
559
- tool_runner=fake_runner,
560
- )
561
-
562
- await stream.__anext__()
563
- await stream.__anext__()
564
- await stream.__anext__()
565
- next_event = asyncio.create_task(stream.__anext__())
566
- await asyncio.sleep(0)
567
- next_event.cancel()
568
- with pytest.raises(asyncio.CancelledError):
569
- await next_event
570
- await stream.aclose()
571
-
572
- assert cancelled
573
-
574
-
575
- def test_shell_command_denies_ptrace_when_seccomp_is_available(tmp_path) -> None:
576
- command = SandboxRunner(cwd=tmp_path).build_command(["/bin/true"])
577
- if not command.seccomp_available:
578
- assert command.args[0].endswith("bwrap")
579
- return
580
-
581
- result = run_tool(
582
- "shell_command",
583
- {
584
- "command": "python - <<'PY'\nimport ctypes, os\nprint(ctypes.CDLL(None).ptrace(0, 0, None, None))\nPY"
585
- },
586
- ToolContext(cwd=tmp_path),
587
- )
588
-
589
- assert not result.ok or "-1" in result.content
590
-
591
-
592
- def test_apply_patch_modifies_workdir_file(tmp_path) -> None:
593
- target = tmp_path / "notes.txt"
594
- target.write_text("alpha\nbeta\n")
595
- patch = """*** Begin Patch
596
- *** Update File: notes.txt
597
- @@
598
- -beta
599
- +ready
600
- *** End Patch
601
- """
602
-
603
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
604
-
605
- assert result.ok
606
- assert result.title == "Edited notes.txt"
607
- assert target.read_text() == "alpha\nready\n"
608
-
609
-
610
- def test_apply_patch_added_file_title(tmp_path) -> None:
611
- patch = """*** Begin Patch
612
- *** Add File: created.txt
613
- +hello
614
- *** End Patch
615
- """
616
-
617
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
618
-
619
- assert result.ok
620
- assert result.title == "Added created.txt"
621
- assert (tmp_path / "created.txt").read_text() == "hello\n"
622
-
623
-
624
- def test_apply_patch_deleted_file_title(tmp_path) -> None:
625
- target = tmp_path / "old.txt"
626
- target.write_text("remove me\n")
627
- patch = """*** Begin Patch
628
- *** Delete File: old.txt
629
- *** End Patch
630
- """
631
-
632
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
633
-
634
- assert result.ok
635
- assert result.title == "Deleted old.txt"
636
- assert not target.exists()
637
-
638
-
639
- def test_apply_patch_multiple_files_title(tmp_path) -> None:
640
- target = tmp_path / "notes.txt"
641
- target.write_text("alpha\nbeta\n")
642
- patch = """*** Begin Patch
643
- *** Update File: notes.txt
644
- @@
645
- -beta
646
- +ready
647
- *** Add File: created.txt
648
- +hello
649
- *** End Patch
650
- """
651
-
652
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
653
-
654
- assert result.ok
655
- assert result.title == "Edited 2 files"
656
- assert target.read_text() == "alpha\nready\n"
657
- assert (tmp_path / "created.txt").read_text() == "hello\n"
658
-
659
-
660
- def test_apply_patch_rejects_outside_workdir_file(tmp_path) -> None:
661
- outside = Path(__file__).resolve().parent / "outside-patch.txt"
662
- outside.write_text("alpha\n")
663
- try:
664
- patch = f"""*** Begin Patch
665
- *** Update File: {outside}
666
- @@
667
- -alpha
668
- +beta
669
- *** End Patch
670
- """
671
-
672
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
673
-
674
- assert not result.ok
675
- assert result.title == "Edit failed"
676
- assert outside.read_text() == "alpha\n"
677
- finally:
678
- outside.unlink(missing_ok=True)
679
-
680
-
681
- def test_apply_patch_uses_internal_subcommand(tmp_path, monkeypatch) -> None:
682
- calls: list[list[str]] = []
683
-
684
- def fake_run(self, command, **kwargs):
685
- calls.append(command)
686
- from flowent.sandbox import CommandResult
687
-
688
- return CommandResult(
689
- command=" ".join(command), exit_code=0, stderr="", stdout="{}"
690
- )
691
-
692
- monkeypatch.setattr(SandboxRunner, "run", fake_run)
693
- patch = """*** Begin Patch
694
- *** Add File: created.txt
695
- +hello
696
- *** End Patch
697
- """
698
-
699
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
700
-
701
- assert result.ok
702
- assert result.title == "Edited files"
703
- assert calls
704
- assert calls[0][1:4] == ["-m", "flowent.cli", "apply-patch"]
705
-
706
-
707
- def test_apply_patch_reports_patch_error_when_stderr_has_warning(
708
- tmp_path, monkeypatch
709
- ) -> None:
710
- def fake_run(self, command, **kwargs):
711
- from flowent.sandbox import CommandResult
712
-
713
- return CommandResult(
714
- command=" ".join(command),
715
- exit_code=1,
716
- stderr="RuntimeWarning: flowent.cli was already imported\n",
717
- stdout='{"error": "Patch context was not found."}\n',
718
- )
719
-
720
- monkeypatch.setattr(SandboxRunner, "run", fake_run)
721
- patch = """*** Begin Patch
722
- *** Update File: notes.txt
723
- @@
724
- -missing
725
- +ready
726
- *** End Patch
727
- """
728
-
729
- result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
730
-
731
- assert not result.ok
732
- assert result.title == "Edit failed"
733
- assert result.content == "Patch context was not found."
734
-
735
-
736
- def test_web_search_result_enters_tool_output(tmp_path) -> None:
737
- def fake_search(query: str):
738
- return [{"title": "Result", "url": "https://example.test", "snippet": query}]
739
-
740
- result = run_tool(
741
- "web_search",
742
- {"query": "release checklist"},
743
- ToolContext(cwd=tmp_path, web_searcher=fake_search),
744
- )
745
-
746
- assert result.ok
747
- assert "https://example.test" in result.content
748
-
749
-
750
- def test_agent_continues_until_final_text_after_multiple_tool_rounds(
751
- tmp_path, monkeypatch
752
- ) -> None:
753
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
754
- workdir = tmp_path / "workdir"
755
- workdir.mkdir()
756
- (workdir / "notes.txt").write_text("Launch notes")
757
- monkeypatch.chdir(workdir)
758
- captured_requests: list[dict[str, object]] = []
759
-
760
- async def fake_completion(**request: object) -> object:
761
- captured_requests.append(request)
762
-
763
- async def chunks() -> object:
764
- if len(captured_requests) == 1:
765
- yield tool_call_chunk("list_dir", {"path": "."}, call_id="call-list")
766
- elif len(captured_requests) == 2:
767
- yield tool_call_chunk(
768
- "read_file", {"path": "notes.txt"}, call_id="call-read"
769
- )
770
- else:
771
- yield text_chunk("The notes are ready.")
772
-
773
- return chunks()
774
-
775
- client = TestClient(
776
- create_app(serve_frontend=False, chat_completion=fake_completion)
777
- )
778
- configure_provider(client)
779
-
780
- response = client.post(
781
- "/api/workspace/respond",
782
- json={"content": "Inspect the workspace."},
783
- )
784
-
785
- assert response.status_code == 200
786
- events = stream_events(response.text)
787
- assert [event["event"] for event in events] == [
788
- "start",
789
- "output_start",
790
- "tool_start",
791
- "tool_done",
792
- "output_start",
793
- "tool_start",
794
- "tool_done",
795
- "output_start",
796
- "delta",
797
- "done",
798
- ]
799
- assert len(captured_requests) == 3
800
- assert captured_requests[2]["messages"][-1] == {
801
- "role": "tool",
802
- "tool_call_id": "call-read",
803
- "content": "Launch notes",
804
- }
805
- assert events[-1]["data"]["message"]["content"] == "The notes are ready."
806
-
807
-
808
- @pytest.mark.anyio
809
- async def test_agent_logs_model_call_decisions_after_tool_rounds(
810
- tmp_path, caplog
811
- ) -> None:
812
- (tmp_path / "notes.txt").write_text("Launch notes")
813
- captured_requests: list[dict[str, object]] = []
814
- caplog.set_level(logging.INFO, logger="flowent.agent")
815
-
816
- async def fake_completion(**request: object) -> object:
817
- captured_requests.append(request)
818
-
819
- async def chunks() -> object:
820
- if len(captured_requests) == 1:
821
- yield tool_call_chunk("read_file", {"path": "notes.txt"})
822
- else:
823
- yield text_chunk("The notes are ready.")
824
-
825
- return chunks()
826
-
827
- events = [
828
- event
829
- async for event in run_agent_stream(
830
- completion=fake_completion,
831
- connection=ProviderConnection(
832
- model="gpt-5.1",
833
- name="Provider",
834
- provider=ProviderFormat.OPENAI,
835
- secret_reference="secret",
836
- ),
837
- cwd=tmp_path,
838
- messages=[{"role": "user", "content": "Inspect notes."}],
839
- )
840
- ]
841
- rendered_logs = "\n".join(record.getMessage() for record in caplog.records)
842
-
843
- assert events[-1].data["message"]["content"] == "The notes are ready."
844
- assert "Agent model call started" in rendered_logs
845
- assert "round=1" in rendered_logs
846
- assert "round=2" in rendered_logs
847
- assert "decision=run_tools" in rendered_logs
848
- assert "decision=final_response" in rendered_logs
849
- assert "Agent continuing after tools" in rendered_logs
850
-
851
-
852
- @pytest.mark.anyio
853
- async def test_agent_logs_model_call_failure_after_tool_result(
854
- tmp_path, caplog
855
- ) -> None:
856
- (tmp_path / "notes.txt").write_text("Launch notes")
857
- captured_requests: list[dict[str, object]] = []
858
- caplog.set_level(logging.INFO, logger="flowent.agent")
859
-
860
- async def fake_completion(**request: object) -> object:
861
- captured_requests.append(request)
862
-
863
- async def chunks() -> object:
864
- if len(captured_requests) == 1:
865
- yield tool_call_chunk("read_file", {"path": "notes.txt"})
866
- return
867
- raise RuntimeError("stream request failed")
868
-
869
- return chunks()
870
-
871
- with pytest.raises(RuntimeError, match="stream request failed"):
872
- [
873
- event
874
- async for event in run_agent_stream(
875
- completion=fake_completion,
876
- connection=ProviderConnection(
877
- model="gpt-5.1",
878
- name="Provider",
879
- provider=ProviderFormat.OPENAI,
880
- secret_reference="secret",
881
- ),
882
- cwd=tmp_path,
883
- messages=[{"role": "user", "content": "Inspect notes."}],
884
- )
885
- ]
886
- rendered_logs = "\n".join(record.getMessage() for record in caplog.records)
887
-
888
- assert len(captured_requests) == 2
889
- assert "Agent model call failed" in rendered_logs
890
- assert "round=2" in rendered_logs
891
- assert "chunk_count=0" in rendered_logs
892
-
893
-
894
- @pytest.mark.anyio
895
- async def test_agent_does_not_log_final_response_when_responses_stream_fails(
896
- tmp_path, caplog, fake_litellm_responses_transformer
897
- ) -> None:
898
- caplog.set_level(logging.INFO, logger="flowent.agent")
899
-
900
- async def fake_completion(**request: object) -> object:
901
- async def chunks() -> object:
902
- from litellm.completion_extras.litellm_responses_transformation.transformation import (
903
- OpenAiResponsesToChatCompletionStreamIterator,
904
- )
905
-
906
- yield text_chunk("Partial answer.")
907
- yield OpenAiResponsesToChatCompletionStreamIterator.translate_responses_chunk_to_openai_stream(
908
- {
909
- "response": {
910
- "error": {
911
- "code": "upstream_error",
912
- "message": "Upstream request failed",
913
- },
914
- "status": "failed",
915
- },
916
- "type": "response.failed",
917
- }
918
- )
919
-
920
- return chunks()
921
-
922
- with pytest.raises(RuntimeError, match="Upstream request failed"):
923
- [
924
- event
925
- async for event in run_agent_stream(
926
- completion=fake_completion,
927
- connection=ProviderConnection(
928
- model="gpt-5.1",
929
- name="Provider",
930
- provider=ProviderFormat.OPENAI,
931
- secret_reference="secret",
932
- ),
933
- cwd=tmp_path,
934
- messages=[{"role": "user", "content": "Inspect notes."}],
935
- )
936
- ]
937
- rendered_logs = "\n".join(record.getMessage() for record in caplog.records)
938
-
939
- assert "Agent model call failed" in rendered_logs
940
- assert "decision=final_response" not in rendered_logs
941
-
942
-
943
- def test_agent_finishes_without_tools(tmp_path, monkeypatch) -> None:
944
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
945
- monkeypatch.chdir(tmp_path)
946
- captured_requests: list[dict[str, object]] = []
947
-
948
- async def fake_completion(**request: object) -> object:
949
- captured_requests.append(request)
950
-
951
- async def chunks() -> object:
952
- yield text_chunk("Direct answer.")
953
-
954
- return chunks()
955
-
956
- client = TestClient(
957
- create_app(serve_frontend=False, chat_completion=fake_completion)
958
- )
959
- configure_provider(client)
960
-
961
- response = client.post(
962
- "/api/workspace/respond",
963
- json={"content": "Answer directly."},
964
- )
965
-
966
- assert response.status_code == 200
967
- events = stream_events(response.text)
968
- assert [event["event"] for event in events] == [
969
- "start",
970
- "output_start",
971
- "delta",
972
- "done",
973
- ]
974
- assert len(captured_requests) == 1
975
- assert events[-1]["data"]["message"]["content"] == "Direct answer."
976
-
977
-
978
- def test_agent_streams_and_persists_thinking(tmp_path, monkeypatch) -> None:
979
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
980
- monkeypatch.chdir(tmp_path)
981
-
982
- async def fake_completion(**request: object) -> object:
983
- async def chunks() -> object:
984
- yield thinking_chunk("Checking context.")
985
- yield thinking_chunk(" Preparing answer.")
986
- yield text_chunk("Direct answer.")
987
-
988
- return chunks()
989
-
990
- client = TestClient(
991
- create_app(serve_frontend=False, chat_completion=fake_completion)
992
- )
993
- configure_provider(client)
994
-
995
- response = client.post(
996
- "/api/workspace/respond",
997
- json={"content": "Answer directly."},
998
- )
999
-
1000
- assert response.status_code == 200
1001
- events = stream_events(response.text)
1002
- assert [event["event"] for event in events] == [
1003
- "start",
1004
- "output_start",
1005
- "thinking_delta",
1006
- "thinking_delta",
1007
- "delta",
1008
- "done",
1009
- ]
1010
- assert events[2]["data"] == {"content": "Checking context."}
1011
- assert events[-1]["data"]["message"]["thinking"] == (
1012
- "Checking context. Preparing answer."
1013
- )
1014
- state = client.get("/api/state").json()
1015
- assert state["messages"][-1]["thinking"] == ("Checking context. Preparing answer.")
1016
-
1017
-
1018
- def test_tool_failure_is_reported_and_agent_continues(tmp_path, monkeypatch) -> None:
1019
- monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
1020
- monkeypatch.chdir(tmp_path)
1021
- captured_requests: list[dict[str, object]] = []
1022
-
1023
- async def fake_completion(**request: object) -> object:
1024
- captured_requests.append(request)
1025
-
1026
- async def chunks() -> object:
1027
- if len(captured_requests) == 1:
1028
- yield tool_call_chunk("read_file", {"path": "missing.txt"})
1029
- else:
1030
- yield text_chunk("I could not read it.")
1031
-
1032
- return chunks()
1033
-
1034
- client = TestClient(
1035
- create_app(serve_frontend=False, chat_completion=fake_completion)
1036
- )
1037
- configure_provider(client)
1038
-
1039
- response = client.post(
1040
- "/api/workspace/respond",
1041
- json={"content": "Read it."},
1042
- )
1043
-
1044
- events = stream_events(response.text)
1045
- assert "tool_error" in [event["event"] for event in events]
1046
- assert len(captured_requests) == 2
1047
- assert captured_requests[1]["messages"][-1]["role"] == "tool"
1048
- assert captured_requests[1]["messages"][-1]["tool_call_id"] == "call-1"
1049
- assert "missing.txt" in captured_requests[1]["messages"][-1]["content"]
1050
- assert events[-1]["data"]["message"]["content"] == "I could not read it."
1051
-
1052
-
1053
- @pytest.mark.anyio
1054
- async def test_approval_denial_result_is_sent_to_agent(tmp_path) -> None:
1055
- captured_requests: list[dict[str, object]] = []
1056
-
1057
- async def fake_completion(**request: object) -> object:
1058
- captured_requests.append(request)
1059
-
1060
- async def chunks() -> object:
1061
- if len(captured_requests) == 1:
1062
- yield tool_call_chunk(
1063
- "shell_command",
1064
- {"command": "rm -rf /important"},
1065
- )
1066
- else:
1067
- yield text_chunk("I need explicit approval for that risk.")
1068
-
1069
- return chunks()
1070
-
1071
- async def denying_tool_runner(
1072
- name: str,
1073
- arguments: dict[str, object],
1074
- context: ToolContext,
1075
- ) -> ToolResult:
1076
- return ToolResult(
1077
- content=(
1078
- "Automatic approval review denied this action as high risk: "
1079
- "The command can delete broad data. The agent must not work around "
1080
- "this denial."
1081
- ),
1082
- ok=False,
1083
- title="Denied by reviewer",
1084
- )
1085
-
1086
- events = [
1087
- event
1088
- async for event in run_agent_stream(
1089
- completion=fake_completion,
1090
- connection=ProviderConnection(
1091
- model="gpt-5.1",
1092
- name="Provider",
1093
- provider=ProviderFormat.OPENAI,
1094
- secret_reference="secret",
1095
- ),
1096
- cwd=tmp_path,
1097
- messages=[{"role": "user", "content": "Delete the important directory."}],
1098
- tool_runner=denying_tool_runner,
1099
- )
1100
- ]
1101
-
1102
- assert len(captured_requests) == 2
1103
- assert captured_requests[1]["messages"][-1]["role"] == "tool"
1104
- assert "Automatic approval review denied this action" in str(
1105
- captured_requests[1]["messages"][-1]["content"]
1106
- )
1107
- assert "must not work around" in str(
1108
- captured_requests[1]["messages"][-1]["content"]
1109
- )
1110
- assert events[-2].data["content"] == "I need explicit approval for that risk."
1111
- assert events[-1].data["message"]["content"] == (
1112
- "I need explicit approval for that risk."
1113
- )
1114
-
1115
-
1116
- def test_update_plan_outputs_plan_state(tmp_path) -> None:
1117
- result = run_tool(
1118
- "update_plan",
1119
- {"items": [{"step": "Read files", "status": "completed"}]},
1120
- ToolContext(cwd=tmp_path),
1121
- )
1122
-
1123
- assert result.ok
1124
- assert "Read files" in result.content