flowent 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/backend/pyproject.toml +1 -1
  2. package/backend/src/flowent/__pycache__/__init__.cpython-313.pyc +0 -0
  3. package/backend/src/flowent/__pycache__/_version.cpython-313.pyc +0 -0
  4. package/backend/src/flowent/__pycache__/agent.cpython-313.pyc +0 -0
  5. package/backend/src/flowent/__pycache__/approval.cpython-313.pyc +0 -0
  6. package/backend/src/flowent/__pycache__/channels.cpython-313.pyc +0 -0
  7. package/backend/src/flowent/__pycache__/cli.cpython-313.pyc +0 -0
  8. package/backend/src/flowent/__pycache__/compact.cpython-313.pyc +0 -0
  9. package/backend/src/flowent/__pycache__/context.cpython-313.pyc +0 -0
  10. package/backend/src/flowent/__pycache__/llm.cpython-313.pyc +0 -0
  11. package/backend/src/flowent/__pycache__/logging.cpython-313.pyc +0 -0
  12. package/backend/src/flowent/__pycache__/main.cpython-313.pyc +0 -0
  13. package/backend/src/flowent/__pycache__/mcp.cpython-313.pyc +0 -0
  14. package/backend/src/flowent/__pycache__/mcp_import.cpython-313.pyc +0 -0
  15. package/backend/src/flowent/__pycache__/patch.cpython-313.pyc +0 -0
  16. package/backend/src/flowent/__pycache__/paths.cpython-313.pyc +0 -0
  17. package/backend/src/flowent/__pycache__/permissions.cpython-313.pyc +0 -0
  18. package/backend/src/flowent/__pycache__/sandbox.cpython-313.pyc +0 -0
  19. package/backend/src/flowent/__pycache__/skills.cpython-313.pyc +0 -0
  20. package/backend/src/flowent/__pycache__/storage.cpython-313.pyc +0 -0
  21. package/backend/src/flowent/__pycache__/tools.cpython-313.pyc +0 -0
  22. package/backend/src/flowent/agent.py +23 -1
  23. package/backend/src/flowent/approval.py +148 -0
  24. package/backend/src/flowent/cli.py +16 -2
  25. package/backend/src/flowent/compact.py +183 -0
  26. package/backend/src/flowent/context.py +19 -1
  27. package/backend/src/flowent/llm.py +51 -11
  28. package/backend/src/flowent/logging.py +60 -0
  29. package/backend/src/flowent/main.py +696 -192
  30. package/backend/src/flowent/mcp.py +3 -1
  31. package/backend/src/flowent/patch.py +55 -31
  32. package/backend/src/flowent/paths.py +12 -0
  33. package/backend/src/flowent/permissions.py +185 -42
  34. package/backend/src/flowent/sandbox.py +146 -13
  35. package/backend/src/flowent/static/assets/index-Cl20cARb.css +2 -0
  36. package/backend/src/flowent/static/assets/index-dsDDsEym.js +81 -0
  37. package/backend/src/flowent/static/index.html +2 -2
  38. package/backend/src/flowent/storage.py +257 -9
  39. package/backend/tests/__pycache__/conftest.cpython-313-pytest-9.0.3.pyc +0 -0
  40. package/backend/tests/__pycache__/test_agent_tools.cpython-313-pytest-9.0.3.pyc +0 -0
  41. package/backend/tests/__pycache__/test_approval.cpython-313-pytest-9.0.3.pyc +0 -0
  42. package/backend/tests/__pycache__/test_channels.cpython-313-pytest-9.0.3.pyc +0 -0
  43. package/backend/tests/__pycache__/test_health.cpython-313-pytest-9.0.3.pyc +0 -0
  44. package/backend/tests/__pycache__/test_llm_providers.cpython-313-pytest-9.0.3.pyc +0 -0
  45. package/backend/tests/__pycache__/test_logging.cpython-313-pytest-9.0.3.pyc +0 -0
  46. package/backend/tests/__pycache__/test_mcp.cpython-313-pytest-9.0.3.pyc +0 -0
  47. package/backend/tests/__pycache__/test_patch.cpython-313-pytest-9.0.3.pyc +0 -0
  48. package/backend/tests/__pycache__/test_permissions.cpython-313-pytest-9.0.3.pyc +0 -0
  49. package/backend/tests/__pycache__/test_persistence.cpython-313-pytest-9.0.3.pyc +0 -0
  50. package/backend/tests/__pycache__/test_skills.cpython-313-pytest-9.0.3.pyc +0 -0
  51. package/backend/tests/__pycache__/test_startup_requirements.cpython-313-pytest-9.0.3.pyc +0 -0
  52. package/backend/tests/__pycache__/test_workspace_chat.cpython-313-pytest-9.0.3.pyc +0 -0
  53. package/backend/tests/test_agent_tools.py +312 -1
  54. package/backend/tests/test_approval.py +283 -0
  55. package/backend/tests/test_llm_providers.py +216 -0
  56. package/backend/tests/test_logging.py +30 -0
  57. package/backend/tests/test_mcp.py +76 -10
  58. package/backend/tests/test_patch.py +112 -0
  59. package/backend/tests/test_permissions.py +198 -53
  60. package/backend/tests/test_persistence.py +78 -0
  61. package/backend/tests/test_startup_requirements.py +96 -0
  62. package/backend/tests/test_workspace_chat.py +1265 -144
  63. package/backend/uv.lock +1 -1
  64. package/dist/frontend/assets/index-Cl20cARb.css +2 -0
  65. package/dist/frontend/assets/index-dsDDsEym.js +81 -0
  66. package/dist/frontend/index.html +2 -2
  67. package/package.json +2 -2
  68. package/backend/src/flowent/static/assets/index-DjF2KBwE.js +0 -81
  69. package/backend/src/flowent/static/assets/index-P-bBpJG8.css +0 -2
  70. package/dist/frontend/assets/index-DjF2KBwE.js +0 -81
  71. package/dist/frontend/assets/index-P-bBpJG8.css +0 -2
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import json
3
+ import subprocess
3
4
  import time
4
5
  from pathlib import Path
5
6
 
@@ -10,7 +11,7 @@ from flowent.agent import FLOWENT_AGENT_SYSTEM_PROMPT, run_agent_stream
10
11
  from flowent.llm import ProviderConnection, ProviderFormat
11
12
  from flowent.main import create_app
12
13
  from flowent.sandbox import SandboxCommand, SandboxRunner
13
- from flowent.tools import ToolContext, run_tool
14
+ from flowent.tools import ToolContext, ToolResult, run_tool
14
15
 
15
16
 
16
17
  def stream_events(content: str) -> list[dict[str, object]]:
@@ -226,6 +227,253 @@ def test_shell_command_has_network_by_default(tmp_path) -> None:
226
227
  assert "network-ready" in result.content
227
228
 
228
229
 
230
+ def test_sandbox_command_keeps_proc_mount_when_preflight_succeeds(
231
+ tmp_path, monkeypatch
232
+ ) -> None:
233
+ runner = SandboxRunner(cwd=tmp_path)
234
+ monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: True)
235
+
236
+ command = runner.build_command(["/bin/true"])
237
+
238
+ assert command.args[command.args.index("--proc") + 1] == "/proc"
239
+
240
+
241
+ def test_sandbox_command_omits_proc_mount_when_preflight_reports_permission_error(
242
+ tmp_path, monkeypatch
243
+ ) -> None:
244
+ runner = SandboxRunner(cwd=tmp_path)
245
+ monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
246
+
247
+ command = runner.build_command(["/bin/true"])
248
+
249
+ assert "--proc" not in command.args
250
+
251
+
252
+ def test_sandbox_command_binds_writable_socket_path(tmp_path, monkeypatch) -> None:
253
+ socket_path = tmp_path / "docker.sock"
254
+ socket_path.touch()
255
+ runner = SandboxRunner(cwd=tmp_path, writable_roots=[socket_path])
256
+ monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
257
+
258
+ command = runner.build_command(["/bin/true"])
259
+
260
+ bind_index = command.args.index(str(socket_path))
261
+ assert command.args[bind_index - 1] == "--bind"
262
+ assert command.args[bind_index + 1] == str(socket_path)
263
+
264
+
265
+ def test_sandbox_proc_preflight_does_not_hide_non_proc_errors(
266
+ tmp_path, monkeypatch
267
+ ) -> None:
268
+ bwrap = tmp_path / "bwrap"
269
+ bwrap.write_text("#!/bin/sh\necho 'bwrap: unrelated startup failure' >&2\nexit 1\n")
270
+ bwrap.chmod(0o700)
271
+ monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
272
+
273
+ assert SandboxRunner(cwd=tmp_path).build_command(["/bin/true"]).args[0:7] == [
274
+ str(bwrap),
275
+ "--ro-bind",
276
+ "/",
277
+ "/",
278
+ "--dev",
279
+ "/dev",
280
+ "--proc",
281
+ ]
282
+
283
+
284
+ def test_shell_command_runs_without_proc_mount_after_preflight_fallback(
285
+ tmp_path, monkeypatch
286
+ ) -> None:
287
+ bwrap = tmp_path / "bwrap"
288
+ bwrap.write_text(
289
+ "#!/bin/sh\n"
290
+ 'for arg in "$@"; do\n'
291
+ ' if [ "$arg" = --proc ]; then\n'
292
+ ' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
293
+ " exit 1\n"
294
+ " fi\n"
295
+ "done\n"
296
+ 'while [ "$#" -gt 0 ]; do\n'
297
+ ' if [ "$1" = -- ]; then\n'
298
+ " shift\n"
299
+ ' exec "$@"\n'
300
+ " fi\n"
301
+ " shift\n"
302
+ "done\n"
303
+ )
304
+ bwrap.chmod(0o700)
305
+ monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
306
+
307
+ result = SandboxRunner(cwd=tmp_path).run(["/bin/sh", "-c", "printf ok"])
308
+
309
+ assert result.exit_code == 0
310
+ assert result.stdout == "ok"
311
+
312
+
313
+ def test_apply_patch_runs_without_proc_mount_after_preflight_fallback(
314
+ tmp_path, monkeypatch
315
+ ) -> None:
316
+ bwrap = tmp_path / "bwrap"
317
+ bwrap.write_text(
318
+ "#!/bin/sh\n"
319
+ 'for arg in "$@"; do\n'
320
+ ' if [ "$arg" = --proc ]; then\n'
321
+ ' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
322
+ " exit 1\n"
323
+ " fi\n"
324
+ "done\n"
325
+ 'while [ "$#" -gt 0 ]; do\n'
326
+ ' if [ "$1" = -- ]; then\n'
327
+ " shift\n"
328
+ ' exec "$@"\n'
329
+ " fi\n"
330
+ " shift\n"
331
+ "done\n"
332
+ )
333
+ bwrap.chmod(0o700)
334
+ monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
335
+ target = tmp_path / "notes.txt"
336
+ target.write_text("alpha\n")
337
+ patch = """*** Begin Patch
338
+ *** Update File: notes.txt
339
+ @@
340
+ -alpha
341
+ +beta
342
+ *** End Patch
343
+ """
344
+
345
+ result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
346
+
347
+ assert result.ok
348
+ assert target.read_text() == "beta\n"
349
+
350
+
351
+ def test_shell_command_environment_omits_development_variables(
352
+ tmp_path, monkeypatch
353
+ ) -> None:
354
+ monkeypatch.setenv("NODE_ENV", "production")
355
+ monkeypatch.setenv("VIRTUAL_ENV", "/tmp/flowent-venv")
356
+ monkeypatch.setenv("PYTHONPATH", "/tmp/flowent-pythonpath")
357
+ runner = SandboxRunner(cwd=tmp_path)
358
+ monkeypatch.setattr(
359
+ runner,
360
+ "build_command",
361
+ lambda command: SandboxCommand(command, seccomp_available=False),
362
+ )
363
+
364
+ result = runner.run(
365
+ [
366
+ "/bin/sh",
367
+ "-c",
368
+ 'printf \'%s|%s|%s\' "${NODE_ENV-unset}" "${VIRTUAL_ENV-unset}" "${PYTHONPATH-unset}"',
369
+ ]
370
+ )
371
+
372
+ assert result.exit_code == 0
373
+ assert result.stdout == "unset|unset|unset"
374
+
375
+
376
+ def test_shell_command_environment_omits_sensitive_variables(
377
+ tmp_path, monkeypatch
378
+ ) -> None:
379
+ monkeypatch.setenv("OPENAI_API_KEY", "sk-local")
380
+ monkeypatch.setenv("SECRET_TOKEN", "secret")
381
+ monkeypatch.setenv("NPM_TOKEN", "npm")
382
+ runner = SandboxRunner(cwd=tmp_path)
383
+ monkeypatch.setattr(
384
+ runner,
385
+ "build_command",
386
+ lambda command: SandboxCommand(command, seccomp_available=False),
387
+ )
388
+
389
+ result = runner.run(
390
+ [
391
+ "/bin/sh",
392
+ "-c",
393
+ 'printf \'%s|%s|%s\' "${OPENAI_API_KEY-unset}" "${SECRET_TOKEN-unset}" "${NPM_TOKEN-unset}"',
394
+ ]
395
+ )
396
+
397
+ assert result.exit_code == 0
398
+ assert result.stdout == "unset|unset|unset"
399
+
400
+
401
+ def test_shell_command_environment_keeps_core_variables(tmp_path, monkeypatch) -> None:
402
+ monkeypatch.setenv("HOME", str(tmp_path / "home"))
403
+ monkeypatch.setenv("PATH", "/usr/local/bin:/usr/bin:/bin")
404
+ monkeypatch.setenv("SHELL", "/bin/sh")
405
+ monkeypatch.setenv("USER", "flowent")
406
+ runner = SandboxRunner(cwd=tmp_path)
407
+ monkeypatch.setattr(
408
+ runner,
409
+ "build_command",
410
+ lambda command: SandboxCommand(command, seccomp_available=False),
411
+ )
412
+
413
+ result = runner.run(
414
+ [
415
+ "/bin/sh",
416
+ "-c",
417
+ 'printf \'%s|%s|%s|%s\' "$HOME" "$PATH" "$SHELL" "$USER"',
418
+ ]
419
+ )
420
+
421
+ assert result.exit_code == 0
422
+ assert (
423
+ result.stdout
424
+ == f"{tmp_path / 'home'}|/usr/local/bin:/usr/bin:/bin|/bin/sh|flowent"
425
+ )
426
+
427
+
428
+ def test_shell_command_environment_uses_default_path_when_missing(
429
+ tmp_path, monkeypatch
430
+ ) -> None:
431
+ monkeypatch.delenv("PATH", raising=False)
432
+ runner = SandboxRunner(cwd=tmp_path)
433
+ captured_env: dict[str, str] = {}
434
+
435
+ def fake_run(*args, **kwargs):
436
+ captured_env.update(kwargs["env"])
437
+ return subprocess.CompletedProcess(
438
+ args=args[0], returncode=0, stdout="", stderr=""
439
+ )
440
+
441
+ monkeypatch.setattr(
442
+ runner,
443
+ "build_command",
444
+ lambda command: SandboxCommand(command, seccomp_available=False),
445
+ )
446
+ monkeypatch.setattr("subprocess.run", fake_run)
447
+
448
+ result = runner.run(["/bin/sh", "-c", "true"])
449
+
450
+ assert result.exit_code == 0
451
+ assert (
452
+ captured_env["PATH"]
453
+ == "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
454
+ )
455
+
456
+
457
+ def test_shell_command_environment_accepts_explicit_overrides(
458
+ tmp_path, monkeypatch
459
+ ) -> None:
460
+ monkeypatch.delenv("FLOWENT_TOOL_VAR", raising=False)
461
+ runner = SandboxRunner(cwd=tmp_path)
462
+ monkeypatch.setattr(
463
+ runner,
464
+ "build_command",
465
+ lambda command: SandboxCommand(command, seccomp_available=False),
466
+ )
467
+
468
+ result = runner.run(
469
+ ["/bin/sh", "-c", "printf '%s' \"$FLOWENT_TOOL_VAR\""],
470
+ env={"FLOWENT_TOOL_VAR": "explicit"},
471
+ )
472
+
473
+ assert result.exit_code == 0
474
+ assert result.stdout == "explicit"
475
+
476
+
229
477
  @pytest.mark.anyio
230
478
  async def test_async_shell_command_does_not_block_other_tasks(
231
479
  tmp_path, monkeypatch
@@ -666,6 +914,69 @@ def test_tool_failure_is_reported_and_agent_continues(tmp_path, monkeypatch) ->
666
914
  assert events[-1]["data"]["message"]["content"] == "I could not read it."
667
915
 
668
916
 
917
+ @pytest.mark.anyio
918
+ async def test_approval_denial_result_is_sent_to_agent(tmp_path) -> None:
919
+ captured_requests: list[dict[str, object]] = []
920
+
921
+ async def fake_completion(**request: object) -> object:
922
+ captured_requests.append(request)
923
+
924
+ async def chunks() -> object:
925
+ if len(captured_requests) == 1:
926
+ yield tool_call_chunk(
927
+ "shell_command",
928
+ {"command": "rm -rf /important"},
929
+ )
930
+ else:
931
+ yield text_chunk("I need explicit approval for that risk.")
932
+
933
+ return chunks()
934
+
935
+ async def denying_tool_runner(
936
+ name: str,
937
+ arguments: dict[str, object],
938
+ context: ToolContext,
939
+ ) -> ToolResult:
940
+ return ToolResult(
941
+ content=(
942
+ "Automatic approval review denied this action as high risk: "
943
+ "The command can delete broad data. The agent must not work around "
944
+ "this denial."
945
+ ),
946
+ ok=False,
947
+ title="Denied by reviewer",
948
+ )
949
+
950
+ events = [
951
+ event
952
+ async for event in run_agent_stream(
953
+ completion=fake_completion,
954
+ connection=ProviderConnection(
955
+ model="gpt-5.1",
956
+ name="Provider",
957
+ provider=ProviderFormat.OPENAI,
958
+ secret_reference="secret",
959
+ ),
960
+ cwd=tmp_path,
961
+ messages=[{"role": "user", "content": "Delete the important directory."}],
962
+ tool_runner=denying_tool_runner,
963
+ )
964
+ ]
965
+
966
+ assert len(captured_requests) == 2
967
+ assert captured_requests[1]["messages"][-1]["role"] == "tool"
968
+ assert "Automatic approval review denied this action" in str(
969
+ captured_requests[1]["messages"][-1]["content"]
970
+ )
971
+ assert "must not work around" in str(
972
+ captured_requests[1]["messages"][-1]["content"]
973
+ )
974
+ assert events[-2].data["content"] == "I need explicit approval for that risk."
975
+ assert events[-1].data["message"]["content"] == (
976
+ "I need explicit approval for that risk."
977
+ )
978
+
979
+
669
980
  def test_update_plan_outputs_plan_state(tmp_path) -> None:
670
981
  result = run_tool(
671
982
  "update_plan",
@@ -0,0 +1,283 @@
1
+ import json
2
+
3
+ import pytest
4
+
5
+ from flowent.approval import (
6
+ ApprovalReviewRequest,
7
+ ApprovalTranscriptEntry,
8
+ review_approval_request,
9
+ )
10
+ from flowent.llm import ProviderConnection, ProviderFormat
11
+
12
+
13
+ def provider_connection() -> ProviderConnection:
14
+ return ProviderConnection(
15
+ model="model",
16
+ name="Provider",
17
+ provider=ProviderFormat.OPENAI,
18
+ secret_reference="secret",
19
+ )
20
+
21
+
22
+ @pytest.mark.anyio
23
+ async def test_review_payload_includes_current_user_request_and_transcript(
24
+ tmp_path,
25
+ ) -> None:
26
+ captured_messages: list[dict[str, object]] = []
27
+
28
+ async def fake_completion(**request: object) -> object:
29
+ captured_messages.extend(request["messages"])
30
+ return {
31
+ "choices": [
32
+ {
33
+ "message": {
34
+ "content": json.dumps(
35
+ {
36
+ "risk_level": "low",
37
+ "risk_score": 25,
38
+ "rationale": "User approved after concrete risk context.",
39
+ "evidence": [
40
+ {
41
+ "message": "Assistant explained Docker socket impact.",
42
+ "why": "Establishes informed consent.",
43
+ }
44
+ ],
45
+ }
46
+ ),
47
+ "role": "assistant",
48
+ }
49
+ },
50
+ ],
51
+ }
52
+
53
+ decision = await review_approval_request(
54
+ provider_connection(),
55
+ ApprovalReviewRequest(
56
+ action="additional_permissions",
57
+ arguments={"command": "docker compose up -d --build"},
58
+ cwd=tmp_path,
59
+ tool_name="shell_command",
60
+ user_request="确认",
61
+ transcript=[
62
+ ApprovalTranscriptEntry(
63
+ role="assistant",
64
+ content=(
65
+ "This will recreate the dev container, write to the Docker "
66
+ "socket, and briefly interrupt the local service."
67
+ ),
68
+ ),
69
+ ApprovalTranscriptEntry(role="user", content="确认"),
70
+ ],
71
+ write_paths=[tmp_path / "docker.sock"],
72
+ ),
73
+ completion=fake_completion,
74
+ )
75
+
76
+ assert decision.decision == "approved"
77
+ assert decision.risk_level == "low"
78
+ assert decision.risk_score == 25
79
+ assert "informed of the concrete risk" in str(captured_messages[0]["content"])
80
+ payload = json.loads(str(captured_messages[-1]["content"]))
81
+ assert payload["user_request"] == "确认"
82
+ assert payload["transcript"][-1] == {"role": "user", "content": "确认"}
83
+
84
+
85
+ @pytest.mark.anyio
86
+ async def test_concrete_docker_socket_confirmation_can_be_approved(tmp_path) -> None:
87
+ async def fake_completion(**request: object) -> object:
88
+ return {
89
+ "choices": [
90
+ {
91
+ "message": {
92
+ "content": json.dumps(
93
+ {
94
+ "risk_level": "medium",
95
+ "risk_score": 55,
96
+ "rationale": (
97
+ "The user approved after being told the command "
98
+ "will recreate the dev container through Docker."
99
+ ),
100
+ "evidence": [],
101
+ }
102
+ ),
103
+ "role": "assistant",
104
+ }
105
+ }
106
+ ]
107
+ }
108
+
109
+ decision = await review_approval_request(
110
+ provider_connection(),
111
+ ApprovalReviewRequest(
112
+ action="additional_permissions",
113
+ arguments={
114
+ "command": "docker compose up -d --force-recreate flowent",
115
+ },
116
+ cwd=tmp_path,
117
+ tool_name="shell_command",
118
+ user_request="确认",
119
+ transcript=[
120
+ ApprovalTranscriptEntry(
121
+ role="assistant",
122
+ content=(
123
+ "This will recreate the Flowent dev container through "
124
+ "Docker and may briefly interrupt the running service."
125
+ ),
126
+ ),
127
+ ApprovalTranscriptEntry(role="user", content="确认"),
128
+ ],
129
+ write_paths=[tmp_path / "docker.sock"],
130
+ ),
131
+ completion=fake_completion,
132
+ )
133
+
134
+ assert decision.decision == "approved"
135
+ assert decision.risk_level == "medium"
136
+ assert decision.risk_score == 55
137
+
138
+
139
+ @pytest.mark.anyio
140
+ async def test_vague_confirmation_without_concrete_risk_context_is_denied(
141
+ tmp_path,
142
+ ) -> None:
143
+ captured_payload: dict[str, object] = {}
144
+
145
+ async def fake_completion(**request: object) -> object:
146
+ captured_payload.update(json.loads(str(request["messages"][-1]["content"])))
147
+ return {
148
+ "choices": [
149
+ {
150
+ "message": {
151
+ "content": json.dumps(
152
+ {
153
+ "risk_level": "high",
154
+ "risk_score": 82,
155
+ "rationale": (
156
+ "The transcript only contains a vague confirmation "
157
+ "and no concrete Docker risk explanation."
158
+ ),
159
+ "evidence": [],
160
+ }
161
+ ),
162
+ "role": "assistant",
163
+ }
164
+ }
165
+ ]
166
+ }
167
+
168
+ decision = await review_approval_request(
169
+ provider_connection(),
170
+ ApprovalReviewRequest(
171
+ action="additional_permissions",
172
+ arguments={
173
+ "command": "docker compose up -d --force-recreate flowent",
174
+ },
175
+ cwd=tmp_path,
176
+ tool_name="shell_command",
177
+ user_request="确认",
178
+ transcript=[ApprovalTranscriptEntry(role="user", content="确认")],
179
+ write_paths=[tmp_path / "docker.sock"],
180
+ ),
181
+ completion=fake_completion,
182
+ )
183
+
184
+ assert decision.decision == "denied"
185
+ assert decision.risk_level == "high"
186
+ assert decision.risk_score == 82
187
+ assert captured_payload["transcript"] == [{"role": "user", "content": "确认"}]
188
+
189
+
190
+ @pytest.mark.anyio
191
+ async def test_broad_destructive_action_with_vague_confirmation_is_denied(
192
+ tmp_path,
193
+ ) -> None:
194
+ async def fake_completion(**request: object) -> object:
195
+ return {
196
+ "choices": [
197
+ {
198
+ "message": {
199
+ "content": json.dumps(
200
+ {
201
+ "risk_level": "high",
202
+ "risk_score": 96,
203
+ "rationale": (
204
+ "The action can delete broad data and the user "
205
+ "did not approve that concrete destructive risk."
206
+ ),
207
+ "evidence": [
208
+ {
209
+ "message": "rm -rf /var/lib/postgresql",
210
+ "why": "Broad destructive write outside the task.",
211
+ }
212
+ ],
213
+ }
214
+ ),
215
+ "role": "assistant",
216
+ }
217
+ }
218
+ ]
219
+ }
220
+
221
+ decision = await review_approval_request(
222
+ provider_connection(),
223
+ ApprovalReviewRequest(
224
+ action="sandbox_failure",
225
+ arguments={"command": "rm -rf /var/lib/postgresql"},
226
+ cwd=tmp_path,
227
+ tool_name="shell_command",
228
+ tool_result="Read-only file system",
229
+ user_request="确认",
230
+ transcript=[ApprovalTranscriptEntry(role="user", content="确认")],
231
+ ),
232
+ completion=fake_completion,
233
+ )
234
+
235
+ assert decision.decision == "denied"
236
+ assert decision.risk_level == "high"
237
+ assert decision.risk_score == 96
238
+
239
+
240
+ @pytest.mark.anyio
241
+ async def test_invalid_reviewer_json_is_denied(tmp_path) -> None:
242
+ async def fake_completion(**request: object) -> object:
243
+ return {
244
+ "choices": [
245
+ {"message": {"content": "approved", "role": "assistant"}},
246
+ ],
247
+ }
248
+
249
+ decision = await review_approval_request(
250
+ provider_connection(),
251
+ ApprovalReviewRequest(
252
+ action="sandbox_failure",
253
+ arguments={"command": "touch file.txt"},
254
+ cwd=tmp_path,
255
+ tool_name="shell_command",
256
+ tool_result="Read-only file system",
257
+ ),
258
+ completion=fake_completion,
259
+ )
260
+
261
+ assert decision.decision == "denied"
262
+ assert "valid JSON" in decision.reason
263
+
264
+
265
+ @pytest.mark.anyio
266
+ async def test_reviewer_call_failure_is_denied(tmp_path) -> None:
267
+ async def fake_completion(**request: object) -> object:
268
+ raise RuntimeError("model unavailable")
269
+
270
+ decision = await review_approval_request(
271
+ provider_connection(),
272
+ ApprovalReviewRequest(
273
+ action="edit",
274
+ arguments={"patch": "*** Begin Patch\n*** End Patch"},
275
+ cwd=tmp_path,
276
+ tool_name="apply_patch",
277
+ write_paths=[tmp_path / "outside"],
278
+ ),
279
+ completion=fake_completion,
280
+ )
281
+
282
+ assert decision.decision == "denied"
283
+ assert "model unavailable" in decision.reason