flowent 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/pyproject.toml +1 -1
- package/backend/src/flowent/__pycache__/__init__.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/_version.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/agent.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/approval.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/channels.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/cli.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/compact.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/context.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/llm.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/logging.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/main.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp_import.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/patch.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/paths.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/permissions.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/sandbox.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/skills.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/storage.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/tools.cpython-313.pyc +0 -0
- package/backend/src/flowent/agent.py +23 -1
- package/backend/src/flowent/approval.py +148 -0
- package/backend/src/flowent/cli.py +16 -2
- package/backend/src/flowent/compact.py +183 -0
- package/backend/src/flowent/context.py +19 -1
- package/backend/src/flowent/llm.py +51 -11
- package/backend/src/flowent/logging.py +60 -0
- package/backend/src/flowent/main.py +696 -192
- package/backend/src/flowent/mcp.py +3 -1
- package/backend/src/flowent/patch.py +55 -31
- package/backend/src/flowent/paths.py +12 -0
- package/backend/src/flowent/permissions.py +185 -42
- package/backend/src/flowent/sandbox.py +146 -13
- package/backend/src/flowent/static/assets/index-Cl20cARb.css +2 -0
- package/backend/src/flowent/static/assets/index-dsDDsEym.js +81 -0
- package/backend/src/flowent/static/index.html +2 -2
- package/backend/src/flowent/storage.py +257 -9
- package/backend/tests/__pycache__/conftest.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_agent_tools.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_approval.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_channels.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_health.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_llm_providers.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_logging.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_mcp.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_patch.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_permissions.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_persistence.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_skills.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_startup_requirements.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_workspace_chat.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/test_agent_tools.py +312 -1
- package/backend/tests/test_approval.py +283 -0
- package/backend/tests/test_llm_providers.py +216 -0
- package/backend/tests/test_logging.py +30 -0
- package/backend/tests/test_mcp.py +76 -10
- package/backend/tests/test_patch.py +112 -0
- package/backend/tests/test_permissions.py +198 -53
- package/backend/tests/test_persistence.py +78 -0
- package/backend/tests/test_startup_requirements.py +96 -0
- package/backend/tests/test_workspace_chat.py +1265 -144
- package/backend/uv.lock +1 -1
- package/dist/frontend/assets/index-Cl20cARb.css +2 -0
- package/dist/frontend/assets/index-dsDDsEym.js +81 -0
- package/dist/frontend/index.html +2 -2
- package/package.json +2 -2
- package/backend/src/flowent/static/assets/index-DjF2KBwE.js +0 -81
- package/backend/src/flowent/static/assets/index-P-bBpJG8.css +0 -2
- package/dist/frontend/assets/index-DjF2KBwE.js +0 -81
- package/dist/frontend/assets/index-P-bBpJG8.css +0 -2
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
|
+
import subprocess
|
|
3
4
|
import time
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
@@ -10,7 +11,7 @@ from flowent.agent import FLOWENT_AGENT_SYSTEM_PROMPT, run_agent_stream
|
|
|
10
11
|
from flowent.llm import ProviderConnection, ProviderFormat
|
|
11
12
|
from flowent.main import create_app
|
|
12
13
|
from flowent.sandbox import SandboxCommand, SandboxRunner
|
|
13
|
-
from flowent.tools import ToolContext, run_tool
|
|
14
|
+
from flowent.tools import ToolContext, ToolResult, run_tool
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def stream_events(content: str) -> list[dict[str, object]]:
|
|
@@ -226,6 +227,253 @@ def test_shell_command_has_network_by_default(tmp_path) -> None:
|
|
|
226
227
|
assert "network-ready" in result.content
|
|
227
228
|
|
|
228
229
|
|
|
230
|
+
def test_sandbox_command_keeps_proc_mount_when_preflight_succeeds(
|
|
231
|
+
tmp_path, monkeypatch
|
|
232
|
+
) -> None:
|
|
233
|
+
runner = SandboxRunner(cwd=tmp_path)
|
|
234
|
+
monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: True)
|
|
235
|
+
|
|
236
|
+
command = runner.build_command(["/bin/true"])
|
|
237
|
+
|
|
238
|
+
assert command.args[command.args.index("--proc") + 1] == "/proc"
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def test_sandbox_command_omits_proc_mount_when_preflight_reports_permission_error(
|
|
242
|
+
tmp_path, monkeypatch
|
|
243
|
+
) -> None:
|
|
244
|
+
runner = SandboxRunner(cwd=tmp_path)
|
|
245
|
+
monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
|
|
246
|
+
|
|
247
|
+
command = runner.build_command(["/bin/true"])
|
|
248
|
+
|
|
249
|
+
assert "--proc" not in command.args
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def test_sandbox_command_binds_writable_socket_path(tmp_path, monkeypatch) -> None:
|
|
253
|
+
socket_path = tmp_path / "docker.sock"
|
|
254
|
+
socket_path.touch()
|
|
255
|
+
runner = SandboxRunner(cwd=tmp_path, writable_roots=[socket_path])
|
|
256
|
+
monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
|
|
257
|
+
|
|
258
|
+
command = runner.build_command(["/bin/true"])
|
|
259
|
+
|
|
260
|
+
bind_index = command.args.index(str(socket_path))
|
|
261
|
+
assert command.args[bind_index - 1] == "--bind"
|
|
262
|
+
assert command.args[bind_index + 1] == str(socket_path)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def test_sandbox_proc_preflight_does_not_hide_non_proc_errors(
|
|
266
|
+
tmp_path, monkeypatch
|
|
267
|
+
) -> None:
|
|
268
|
+
bwrap = tmp_path / "bwrap"
|
|
269
|
+
bwrap.write_text("#!/bin/sh\necho 'bwrap: unrelated startup failure' >&2\nexit 1\n")
|
|
270
|
+
bwrap.chmod(0o700)
|
|
271
|
+
monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
|
|
272
|
+
|
|
273
|
+
assert SandboxRunner(cwd=tmp_path).build_command(["/bin/true"]).args[0:7] == [
|
|
274
|
+
str(bwrap),
|
|
275
|
+
"--ro-bind",
|
|
276
|
+
"/",
|
|
277
|
+
"/",
|
|
278
|
+
"--dev",
|
|
279
|
+
"/dev",
|
|
280
|
+
"--proc",
|
|
281
|
+
]
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def test_shell_command_runs_without_proc_mount_after_preflight_fallback(
|
|
285
|
+
tmp_path, monkeypatch
|
|
286
|
+
) -> None:
|
|
287
|
+
bwrap = tmp_path / "bwrap"
|
|
288
|
+
bwrap.write_text(
|
|
289
|
+
"#!/bin/sh\n"
|
|
290
|
+
'for arg in "$@"; do\n'
|
|
291
|
+
' if [ "$arg" = --proc ]; then\n'
|
|
292
|
+
' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
|
|
293
|
+
" exit 1\n"
|
|
294
|
+
" fi\n"
|
|
295
|
+
"done\n"
|
|
296
|
+
'while [ "$#" -gt 0 ]; do\n'
|
|
297
|
+
' if [ "$1" = -- ]; then\n'
|
|
298
|
+
" shift\n"
|
|
299
|
+
' exec "$@"\n'
|
|
300
|
+
" fi\n"
|
|
301
|
+
" shift\n"
|
|
302
|
+
"done\n"
|
|
303
|
+
)
|
|
304
|
+
bwrap.chmod(0o700)
|
|
305
|
+
monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
|
|
306
|
+
|
|
307
|
+
result = SandboxRunner(cwd=tmp_path).run(["/bin/sh", "-c", "printf ok"])
|
|
308
|
+
|
|
309
|
+
assert result.exit_code == 0
|
|
310
|
+
assert result.stdout == "ok"
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def test_apply_patch_runs_without_proc_mount_after_preflight_fallback(
|
|
314
|
+
tmp_path, monkeypatch
|
|
315
|
+
) -> None:
|
|
316
|
+
bwrap = tmp_path / "bwrap"
|
|
317
|
+
bwrap.write_text(
|
|
318
|
+
"#!/bin/sh\n"
|
|
319
|
+
'for arg in "$@"; do\n'
|
|
320
|
+
' if [ "$arg" = --proc ]; then\n'
|
|
321
|
+
' echo "bwrap: Can\'t mount proc on /newroot/proc: Operation not permitted" >&2\n'
|
|
322
|
+
" exit 1\n"
|
|
323
|
+
" fi\n"
|
|
324
|
+
"done\n"
|
|
325
|
+
'while [ "$#" -gt 0 ]; do\n'
|
|
326
|
+
' if [ "$1" = -- ]; then\n'
|
|
327
|
+
" shift\n"
|
|
328
|
+
' exec "$@"\n'
|
|
329
|
+
" fi\n"
|
|
330
|
+
" shift\n"
|
|
331
|
+
"done\n"
|
|
332
|
+
)
|
|
333
|
+
bwrap.chmod(0o700)
|
|
334
|
+
monkeypatch.setattr("flowent.sandbox.sandbox_binary", lambda: str(bwrap))
|
|
335
|
+
target = tmp_path / "notes.txt"
|
|
336
|
+
target.write_text("alpha\n")
|
|
337
|
+
patch = """*** Begin Patch
|
|
338
|
+
*** Update File: notes.txt
|
|
339
|
+
@@
|
|
340
|
+
-alpha
|
|
341
|
+
+beta
|
|
342
|
+
*** End Patch
|
|
343
|
+
"""
|
|
344
|
+
|
|
345
|
+
result = run_tool("apply_patch", {"patch": patch}, ToolContext(cwd=tmp_path))
|
|
346
|
+
|
|
347
|
+
assert result.ok
|
|
348
|
+
assert target.read_text() == "beta\n"
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def test_shell_command_environment_omits_development_variables(
|
|
352
|
+
tmp_path, monkeypatch
|
|
353
|
+
) -> None:
|
|
354
|
+
monkeypatch.setenv("NODE_ENV", "production")
|
|
355
|
+
monkeypatch.setenv("VIRTUAL_ENV", "/tmp/flowent-venv")
|
|
356
|
+
monkeypatch.setenv("PYTHONPATH", "/tmp/flowent-pythonpath")
|
|
357
|
+
runner = SandboxRunner(cwd=tmp_path)
|
|
358
|
+
monkeypatch.setattr(
|
|
359
|
+
runner,
|
|
360
|
+
"build_command",
|
|
361
|
+
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
result = runner.run(
|
|
365
|
+
[
|
|
366
|
+
"/bin/sh",
|
|
367
|
+
"-c",
|
|
368
|
+
'printf \'%s|%s|%s\' "${NODE_ENV-unset}" "${VIRTUAL_ENV-unset}" "${PYTHONPATH-unset}"',
|
|
369
|
+
]
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
assert result.exit_code == 0
|
|
373
|
+
assert result.stdout == "unset|unset|unset"
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def test_shell_command_environment_omits_sensitive_variables(
|
|
377
|
+
tmp_path, monkeypatch
|
|
378
|
+
) -> None:
|
|
379
|
+
monkeypatch.setenv("OPENAI_API_KEY", "sk-local")
|
|
380
|
+
monkeypatch.setenv("SECRET_TOKEN", "secret")
|
|
381
|
+
monkeypatch.setenv("NPM_TOKEN", "npm")
|
|
382
|
+
runner = SandboxRunner(cwd=tmp_path)
|
|
383
|
+
monkeypatch.setattr(
|
|
384
|
+
runner,
|
|
385
|
+
"build_command",
|
|
386
|
+
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
result = runner.run(
|
|
390
|
+
[
|
|
391
|
+
"/bin/sh",
|
|
392
|
+
"-c",
|
|
393
|
+
'printf \'%s|%s|%s\' "${OPENAI_API_KEY-unset}" "${SECRET_TOKEN-unset}" "${NPM_TOKEN-unset}"',
|
|
394
|
+
]
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
assert result.exit_code == 0
|
|
398
|
+
assert result.stdout == "unset|unset|unset"
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def test_shell_command_environment_keeps_core_variables(tmp_path, monkeypatch) -> None:
|
|
402
|
+
monkeypatch.setenv("HOME", str(tmp_path / "home"))
|
|
403
|
+
monkeypatch.setenv("PATH", "/usr/local/bin:/usr/bin:/bin")
|
|
404
|
+
monkeypatch.setenv("SHELL", "/bin/sh")
|
|
405
|
+
monkeypatch.setenv("USER", "flowent")
|
|
406
|
+
runner = SandboxRunner(cwd=tmp_path)
|
|
407
|
+
monkeypatch.setattr(
|
|
408
|
+
runner,
|
|
409
|
+
"build_command",
|
|
410
|
+
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
result = runner.run(
|
|
414
|
+
[
|
|
415
|
+
"/bin/sh",
|
|
416
|
+
"-c",
|
|
417
|
+
'printf \'%s|%s|%s|%s\' "$HOME" "$PATH" "$SHELL" "$USER"',
|
|
418
|
+
]
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
assert result.exit_code == 0
|
|
422
|
+
assert (
|
|
423
|
+
result.stdout
|
|
424
|
+
== f"{tmp_path / 'home'}|/usr/local/bin:/usr/bin:/bin|/bin/sh|flowent"
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def test_shell_command_environment_uses_default_path_when_missing(
|
|
429
|
+
tmp_path, monkeypatch
|
|
430
|
+
) -> None:
|
|
431
|
+
monkeypatch.delenv("PATH", raising=False)
|
|
432
|
+
runner = SandboxRunner(cwd=tmp_path)
|
|
433
|
+
captured_env: dict[str, str] = {}
|
|
434
|
+
|
|
435
|
+
def fake_run(*args, **kwargs):
|
|
436
|
+
captured_env.update(kwargs["env"])
|
|
437
|
+
return subprocess.CompletedProcess(
|
|
438
|
+
args=args[0], returncode=0, stdout="", stderr=""
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
monkeypatch.setattr(
|
|
442
|
+
runner,
|
|
443
|
+
"build_command",
|
|
444
|
+
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
445
|
+
)
|
|
446
|
+
monkeypatch.setattr("subprocess.run", fake_run)
|
|
447
|
+
|
|
448
|
+
result = runner.run(["/bin/sh", "-c", "true"])
|
|
449
|
+
|
|
450
|
+
assert result.exit_code == 0
|
|
451
|
+
assert (
|
|
452
|
+
captured_env["PATH"]
|
|
453
|
+
== "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def test_shell_command_environment_accepts_explicit_overrides(
|
|
458
|
+
tmp_path, monkeypatch
|
|
459
|
+
) -> None:
|
|
460
|
+
monkeypatch.delenv("FLOWENT_TOOL_VAR", raising=False)
|
|
461
|
+
runner = SandboxRunner(cwd=tmp_path)
|
|
462
|
+
monkeypatch.setattr(
|
|
463
|
+
runner,
|
|
464
|
+
"build_command",
|
|
465
|
+
lambda command: SandboxCommand(command, seccomp_available=False),
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
result = runner.run(
|
|
469
|
+
["/bin/sh", "-c", "printf '%s' \"$FLOWENT_TOOL_VAR\""],
|
|
470
|
+
env={"FLOWENT_TOOL_VAR": "explicit"},
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
assert result.exit_code == 0
|
|
474
|
+
assert result.stdout == "explicit"
|
|
475
|
+
|
|
476
|
+
|
|
229
477
|
@pytest.mark.anyio
|
|
230
478
|
async def test_async_shell_command_does_not_block_other_tasks(
|
|
231
479
|
tmp_path, monkeypatch
|
|
@@ -666,6 +914,69 @@ def test_tool_failure_is_reported_and_agent_continues(tmp_path, monkeypatch) ->
|
|
|
666
914
|
assert events[-1]["data"]["message"]["content"] == "I could not read it."
|
|
667
915
|
|
|
668
916
|
|
|
917
|
+
@pytest.mark.anyio
|
|
918
|
+
async def test_approval_denial_result_is_sent_to_agent(tmp_path) -> None:
|
|
919
|
+
captured_requests: list[dict[str, object]] = []
|
|
920
|
+
|
|
921
|
+
async def fake_completion(**request: object) -> object:
|
|
922
|
+
captured_requests.append(request)
|
|
923
|
+
|
|
924
|
+
async def chunks() -> object:
|
|
925
|
+
if len(captured_requests) == 1:
|
|
926
|
+
yield tool_call_chunk(
|
|
927
|
+
"shell_command",
|
|
928
|
+
{"command": "rm -rf /important"},
|
|
929
|
+
)
|
|
930
|
+
else:
|
|
931
|
+
yield text_chunk("I need explicit approval for that risk.")
|
|
932
|
+
|
|
933
|
+
return chunks()
|
|
934
|
+
|
|
935
|
+
async def denying_tool_runner(
|
|
936
|
+
name: str,
|
|
937
|
+
arguments: dict[str, object],
|
|
938
|
+
context: ToolContext,
|
|
939
|
+
) -> ToolResult:
|
|
940
|
+
return ToolResult(
|
|
941
|
+
content=(
|
|
942
|
+
"Automatic approval review denied this action as high risk: "
|
|
943
|
+
"The command can delete broad data. The agent must not work around "
|
|
944
|
+
"this denial."
|
|
945
|
+
),
|
|
946
|
+
ok=False,
|
|
947
|
+
title="Denied by reviewer",
|
|
948
|
+
)
|
|
949
|
+
|
|
950
|
+
events = [
|
|
951
|
+
event
|
|
952
|
+
async for event in run_agent_stream(
|
|
953
|
+
completion=fake_completion,
|
|
954
|
+
connection=ProviderConnection(
|
|
955
|
+
model="gpt-5.1",
|
|
956
|
+
name="Provider",
|
|
957
|
+
provider=ProviderFormat.OPENAI,
|
|
958
|
+
secret_reference="secret",
|
|
959
|
+
),
|
|
960
|
+
cwd=tmp_path,
|
|
961
|
+
messages=[{"role": "user", "content": "Delete the important directory."}],
|
|
962
|
+
tool_runner=denying_tool_runner,
|
|
963
|
+
)
|
|
964
|
+
]
|
|
965
|
+
|
|
966
|
+
assert len(captured_requests) == 2
|
|
967
|
+
assert captured_requests[1]["messages"][-1]["role"] == "tool"
|
|
968
|
+
assert "Automatic approval review denied this action" in str(
|
|
969
|
+
captured_requests[1]["messages"][-1]["content"]
|
|
970
|
+
)
|
|
971
|
+
assert "must not work around" in str(
|
|
972
|
+
captured_requests[1]["messages"][-1]["content"]
|
|
973
|
+
)
|
|
974
|
+
assert events[-2].data["content"] == "I need explicit approval for that risk."
|
|
975
|
+
assert events[-1].data["message"]["content"] == (
|
|
976
|
+
"I need explicit approval for that risk."
|
|
977
|
+
)
|
|
978
|
+
|
|
979
|
+
|
|
669
980
|
def test_update_plan_outputs_plan_state(tmp_path) -> None:
|
|
670
981
|
result = run_tool(
|
|
671
982
|
"update_plan",
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from flowent.approval import (
|
|
6
|
+
ApprovalReviewRequest,
|
|
7
|
+
ApprovalTranscriptEntry,
|
|
8
|
+
review_approval_request,
|
|
9
|
+
)
|
|
10
|
+
from flowent.llm import ProviderConnection, ProviderFormat
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def provider_connection() -> ProviderConnection:
|
|
14
|
+
return ProviderConnection(
|
|
15
|
+
model="model",
|
|
16
|
+
name="Provider",
|
|
17
|
+
provider=ProviderFormat.OPENAI,
|
|
18
|
+
secret_reference="secret",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.anyio
|
|
23
|
+
async def test_review_payload_includes_current_user_request_and_transcript(
|
|
24
|
+
tmp_path,
|
|
25
|
+
) -> None:
|
|
26
|
+
captured_messages: list[dict[str, object]] = []
|
|
27
|
+
|
|
28
|
+
async def fake_completion(**request: object) -> object:
|
|
29
|
+
captured_messages.extend(request["messages"])
|
|
30
|
+
return {
|
|
31
|
+
"choices": [
|
|
32
|
+
{
|
|
33
|
+
"message": {
|
|
34
|
+
"content": json.dumps(
|
|
35
|
+
{
|
|
36
|
+
"risk_level": "low",
|
|
37
|
+
"risk_score": 25,
|
|
38
|
+
"rationale": "User approved after concrete risk context.",
|
|
39
|
+
"evidence": [
|
|
40
|
+
{
|
|
41
|
+
"message": "Assistant explained Docker socket impact.",
|
|
42
|
+
"why": "Establishes informed consent.",
|
|
43
|
+
}
|
|
44
|
+
],
|
|
45
|
+
}
|
|
46
|
+
),
|
|
47
|
+
"role": "assistant",
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
],
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
decision = await review_approval_request(
|
|
54
|
+
provider_connection(),
|
|
55
|
+
ApprovalReviewRequest(
|
|
56
|
+
action="additional_permissions",
|
|
57
|
+
arguments={"command": "docker compose up -d --build"},
|
|
58
|
+
cwd=tmp_path,
|
|
59
|
+
tool_name="shell_command",
|
|
60
|
+
user_request="确认",
|
|
61
|
+
transcript=[
|
|
62
|
+
ApprovalTranscriptEntry(
|
|
63
|
+
role="assistant",
|
|
64
|
+
content=(
|
|
65
|
+
"This will recreate the dev container, write to the Docker "
|
|
66
|
+
"socket, and briefly interrupt the local service."
|
|
67
|
+
),
|
|
68
|
+
),
|
|
69
|
+
ApprovalTranscriptEntry(role="user", content="确认"),
|
|
70
|
+
],
|
|
71
|
+
write_paths=[tmp_path / "docker.sock"],
|
|
72
|
+
),
|
|
73
|
+
completion=fake_completion,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
assert decision.decision == "approved"
|
|
77
|
+
assert decision.risk_level == "low"
|
|
78
|
+
assert decision.risk_score == 25
|
|
79
|
+
assert "informed of the concrete risk" in str(captured_messages[0]["content"])
|
|
80
|
+
payload = json.loads(str(captured_messages[-1]["content"]))
|
|
81
|
+
assert payload["user_request"] == "确认"
|
|
82
|
+
assert payload["transcript"][-1] == {"role": "user", "content": "确认"}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@pytest.mark.anyio
|
|
86
|
+
async def test_concrete_docker_socket_confirmation_can_be_approved(tmp_path) -> None:
|
|
87
|
+
async def fake_completion(**request: object) -> object:
|
|
88
|
+
return {
|
|
89
|
+
"choices": [
|
|
90
|
+
{
|
|
91
|
+
"message": {
|
|
92
|
+
"content": json.dumps(
|
|
93
|
+
{
|
|
94
|
+
"risk_level": "medium",
|
|
95
|
+
"risk_score": 55,
|
|
96
|
+
"rationale": (
|
|
97
|
+
"The user approved after being told the command "
|
|
98
|
+
"will recreate the dev container through Docker."
|
|
99
|
+
),
|
|
100
|
+
"evidence": [],
|
|
101
|
+
}
|
|
102
|
+
),
|
|
103
|
+
"role": "assistant",
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
]
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
decision = await review_approval_request(
|
|
110
|
+
provider_connection(),
|
|
111
|
+
ApprovalReviewRequest(
|
|
112
|
+
action="additional_permissions",
|
|
113
|
+
arguments={
|
|
114
|
+
"command": "docker compose up -d --force-recreate flowent",
|
|
115
|
+
},
|
|
116
|
+
cwd=tmp_path,
|
|
117
|
+
tool_name="shell_command",
|
|
118
|
+
user_request="确认",
|
|
119
|
+
transcript=[
|
|
120
|
+
ApprovalTranscriptEntry(
|
|
121
|
+
role="assistant",
|
|
122
|
+
content=(
|
|
123
|
+
"This will recreate the Flowent dev container through "
|
|
124
|
+
"Docker and may briefly interrupt the running service."
|
|
125
|
+
),
|
|
126
|
+
),
|
|
127
|
+
ApprovalTranscriptEntry(role="user", content="确认"),
|
|
128
|
+
],
|
|
129
|
+
write_paths=[tmp_path / "docker.sock"],
|
|
130
|
+
),
|
|
131
|
+
completion=fake_completion,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
assert decision.decision == "approved"
|
|
135
|
+
assert decision.risk_level == "medium"
|
|
136
|
+
assert decision.risk_score == 55
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@pytest.mark.anyio
|
|
140
|
+
async def test_vague_confirmation_without_concrete_risk_context_is_denied(
|
|
141
|
+
tmp_path,
|
|
142
|
+
) -> None:
|
|
143
|
+
captured_payload: dict[str, object] = {}
|
|
144
|
+
|
|
145
|
+
async def fake_completion(**request: object) -> object:
|
|
146
|
+
captured_payload.update(json.loads(str(request["messages"][-1]["content"])))
|
|
147
|
+
return {
|
|
148
|
+
"choices": [
|
|
149
|
+
{
|
|
150
|
+
"message": {
|
|
151
|
+
"content": json.dumps(
|
|
152
|
+
{
|
|
153
|
+
"risk_level": "high",
|
|
154
|
+
"risk_score": 82,
|
|
155
|
+
"rationale": (
|
|
156
|
+
"The transcript only contains a vague confirmation "
|
|
157
|
+
"and no concrete Docker risk explanation."
|
|
158
|
+
),
|
|
159
|
+
"evidence": [],
|
|
160
|
+
}
|
|
161
|
+
),
|
|
162
|
+
"role": "assistant",
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
]
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
decision = await review_approval_request(
|
|
169
|
+
provider_connection(),
|
|
170
|
+
ApprovalReviewRequest(
|
|
171
|
+
action="additional_permissions",
|
|
172
|
+
arguments={
|
|
173
|
+
"command": "docker compose up -d --force-recreate flowent",
|
|
174
|
+
},
|
|
175
|
+
cwd=tmp_path,
|
|
176
|
+
tool_name="shell_command",
|
|
177
|
+
user_request="确认",
|
|
178
|
+
transcript=[ApprovalTranscriptEntry(role="user", content="确认")],
|
|
179
|
+
write_paths=[tmp_path / "docker.sock"],
|
|
180
|
+
),
|
|
181
|
+
completion=fake_completion,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
assert decision.decision == "denied"
|
|
185
|
+
assert decision.risk_level == "high"
|
|
186
|
+
assert decision.risk_score == 82
|
|
187
|
+
assert captured_payload["transcript"] == [{"role": "user", "content": "确认"}]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@pytest.mark.anyio
|
|
191
|
+
async def test_broad_destructive_action_with_vague_confirmation_is_denied(
|
|
192
|
+
tmp_path,
|
|
193
|
+
) -> None:
|
|
194
|
+
async def fake_completion(**request: object) -> object:
|
|
195
|
+
return {
|
|
196
|
+
"choices": [
|
|
197
|
+
{
|
|
198
|
+
"message": {
|
|
199
|
+
"content": json.dumps(
|
|
200
|
+
{
|
|
201
|
+
"risk_level": "high",
|
|
202
|
+
"risk_score": 96,
|
|
203
|
+
"rationale": (
|
|
204
|
+
"The action can delete broad data and the user "
|
|
205
|
+
"did not approve that concrete destructive risk."
|
|
206
|
+
),
|
|
207
|
+
"evidence": [
|
|
208
|
+
{
|
|
209
|
+
"message": "rm -rf /var/lib/postgresql",
|
|
210
|
+
"why": "Broad destructive write outside the task.",
|
|
211
|
+
}
|
|
212
|
+
],
|
|
213
|
+
}
|
|
214
|
+
),
|
|
215
|
+
"role": "assistant",
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
]
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
decision = await review_approval_request(
|
|
222
|
+
provider_connection(),
|
|
223
|
+
ApprovalReviewRequest(
|
|
224
|
+
action="sandbox_failure",
|
|
225
|
+
arguments={"command": "rm -rf /var/lib/postgresql"},
|
|
226
|
+
cwd=tmp_path,
|
|
227
|
+
tool_name="shell_command",
|
|
228
|
+
tool_result="Read-only file system",
|
|
229
|
+
user_request="确认",
|
|
230
|
+
transcript=[ApprovalTranscriptEntry(role="user", content="确认")],
|
|
231
|
+
),
|
|
232
|
+
completion=fake_completion,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
assert decision.decision == "denied"
|
|
236
|
+
assert decision.risk_level == "high"
|
|
237
|
+
assert decision.risk_score == 96
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@pytest.mark.anyio
|
|
241
|
+
async def test_invalid_reviewer_json_is_denied(tmp_path) -> None:
|
|
242
|
+
async def fake_completion(**request: object) -> object:
|
|
243
|
+
return {
|
|
244
|
+
"choices": [
|
|
245
|
+
{"message": {"content": "approved", "role": "assistant"}},
|
|
246
|
+
],
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
decision = await review_approval_request(
|
|
250
|
+
provider_connection(),
|
|
251
|
+
ApprovalReviewRequest(
|
|
252
|
+
action="sandbox_failure",
|
|
253
|
+
arguments={"command": "touch file.txt"},
|
|
254
|
+
cwd=tmp_path,
|
|
255
|
+
tool_name="shell_command",
|
|
256
|
+
tool_result="Read-only file system",
|
|
257
|
+
),
|
|
258
|
+
completion=fake_completion,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
assert decision.decision == "denied"
|
|
262
|
+
assert "valid JSON" in decision.reason
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@pytest.mark.anyio
|
|
266
|
+
async def test_reviewer_call_failure_is_denied(tmp_path) -> None:
|
|
267
|
+
async def fake_completion(**request: object) -> object:
|
|
268
|
+
raise RuntimeError("model unavailable")
|
|
269
|
+
|
|
270
|
+
decision = await review_approval_request(
|
|
271
|
+
provider_connection(),
|
|
272
|
+
ApprovalReviewRequest(
|
|
273
|
+
action="edit",
|
|
274
|
+
arguments={"patch": "*** Begin Patch\n*** End Patch"},
|
|
275
|
+
cwd=tmp_path,
|
|
276
|
+
tool_name="apply_patch",
|
|
277
|
+
write_paths=[tmp_path / "outside"],
|
|
278
|
+
),
|
|
279
|
+
completion=fake_completion,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
assert decision.decision == "denied"
|
|
283
|
+
assert "model unavailable" in decision.reason
|