flowent 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/pyproject.toml +1 -1
- package/backend/src/flowent/__pycache__/__init__.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/_version.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/agent.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/approval.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/channels.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/cli.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/compact.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/context.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/llm.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/logging.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/main.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp_import.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/patch.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/paths.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/permissions.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/sandbox.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/skills.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/storage.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/tools.cpython-313.pyc +0 -0
- package/backend/src/flowent/agent.py +117 -34
- package/backend/src/flowent/approval.py +148 -0
- package/backend/src/flowent/cli.py +4 -2
- package/backend/src/flowent/context.py +19 -1
- package/backend/src/flowent/llm.py +176 -16
- package/backend/src/flowent/logging.py +60 -0
- package/backend/src/flowent/main.py +639 -210
- package/backend/src/flowent/patch.py +55 -31
- package/backend/src/flowent/permissions.py +185 -42
- package/backend/src/flowent/sandbox.py +55 -1
- package/backend/src/flowent/static/assets/index-BlaCigkZ.js +82 -0
- package/backend/src/flowent/static/assets/index-CRvbsH4K.css +2 -0
- package/backend/src/flowent/static/index.html +2 -2
- package/backend/src/flowent/storage.py +113 -18
- package/backend/tests/__pycache__/conftest.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_agent_tools.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_approval.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_channels.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_health.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_llm_providers.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_logging.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_mcp.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_patch.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_permissions.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_persistence.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_skills.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_startup_requirements.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_workspace_chat.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/conftest.py +39 -0
- package/backend/tests/test_agent_tools.py +213 -1
- package/backend/tests/test_approval.py +283 -0
- package/backend/tests/test_llm_providers.py +377 -0
- package/backend/tests/test_logging.py +30 -0
- package/backend/tests/test_patch.py +112 -0
- package/backend/tests/test_permissions.py +198 -53
- package/backend/tests/test_persistence.py +78 -0
- package/backend/tests/test_startup_requirements.py +54 -0
- package/backend/tests/test_workspace_chat.py +902 -36
- package/backend/uv.lock +1 -1
- package/dist/frontend/assets/index-BlaCigkZ.js +82 -0
- package/dist/frontend/assets/index-CRvbsH4K.css +2 -0
- package/dist/frontend/index.html +2 -2
- package/package.json +1 -1
- package/backend/src/flowent/static/assets/index-BREidonU.css +0 -2
- package/backend/src/flowent/static/assets/index-DSniOrhL.js +0 -81
- package/dist/frontend/assets/index-BREidonU.css +0 -2
- package/dist/frontend/assets/index-DSniOrhL.js +0 -81
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
|
+
import logging
|
|
3
4
|
import subprocess
|
|
4
5
|
import time
|
|
5
6
|
from pathlib import Path
|
|
@@ -11,7 +12,7 @@ from flowent.agent import FLOWENT_AGENT_SYSTEM_PROMPT, run_agent_stream
|
|
|
11
12
|
from flowent.llm import ProviderConnection, ProviderFormat
|
|
12
13
|
from flowent.main import create_app
|
|
13
14
|
from flowent.sandbox import SandboxCommand, SandboxRunner
|
|
14
|
-
from flowent.tools import ToolContext, run_tool
|
|
15
|
+
from flowent.tools import ToolContext, ToolResult, run_tool
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
def stream_events(content: str) -> list[dict[str, object]]:
|
|
@@ -249,6 +250,19 @@ def test_sandbox_command_omits_proc_mount_when_preflight_reports_permission_erro
|
|
|
249
250
|
assert "--proc" not in command.args
|
|
250
251
|
|
|
251
252
|
|
|
253
|
+
def test_sandbox_command_binds_writable_socket_path(tmp_path, monkeypatch) -> None:
|
|
254
|
+
socket_path = tmp_path / "docker.sock"
|
|
255
|
+
socket_path.touch()
|
|
256
|
+
runner = SandboxRunner(cwd=tmp_path, writable_roots=[socket_path])
|
|
257
|
+
monkeypatch.setattr("flowent.sandbox.sandbox_supports_proc_mount", lambda: False)
|
|
258
|
+
|
|
259
|
+
command = runner.build_command(["/bin/true"])
|
|
260
|
+
|
|
261
|
+
bind_index = command.args.index(str(socket_path))
|
|
262
|
+
assert command.args[bind_index - 1] == "--bind"
|
|
263
|
+
assert command.args[bind_index + 1] == str(socket_path)
|
|
264
|
+
|
|
265
|
+
|
|
252
266
|
def test_sandbox_proc_preflight_does_not_hide_non_proc_errors(
|
|
253
267
|
tmp_path, monkeypatch
|
|
254
268
|
) -> None:
|
|
@@ -791,6 +805,141 @@ def test_agent_continues_until_final_text_after_multiple_tool_rounds(
|
|
|
791
805
|
assert events[-1]["data"]["message"]["content"] == "The notes are ready."
|
|
792
806
|
|
|
793
807
|
|
|
808
|
+
@pytest.mark.anyio
|
|
809
|
+
async def test_agent_logs_model_call_decisions_after_tool_rounds(
|
|
810
|
+
tmp_path, caplog
|
|
811
|
+
) -> None:
|
|
812
|
+
(tmp_path / "notes.txt").write_text("Launch notes")
|
|
813
|
+
captured_requests: list[dict[str, object]] = []
|
|
814
|
+
caplog.set_level(logging.INFO, logger="flowent.agent")
|
|
815
|
+
|
|
816
|
+
async def fake_completion(**request: object) -> object:
|
|
817
|
+
captured_requests.append(request)
|
|
818
|
+
|
|
819
|
+
async def chunks() -> object:
|
|
820
|
+
if len(captured_requests) == 1:
|
|
821
|
+
yield tool_call_chunk("read_file", {"path": "notes.txt"})
|
|
822
|
+
else:
|
|
823
|
+
yield text_chunk("The notes are ready.")
|
|
824
|
+
|
|
825
|
+
return chunks()
|
|
826
|
+
|
|
827
|
+
events = [
|
|
828
|
+
event
|
|
829
|
+
async for event in run_agent_stream(
|
|
830
|
+
completion=fake_completion,
|
|
831
|
+
connection=ProviderConnection(
|
|
832
|
+
model="gpt-5.1",
|
|
833
|
+
name="Provider",
|
|
834
|
+
provider=ProviderFormat.OPENAI,
|
|
835
|
+
secret_reference="secret",
|
|
836
|
+
),
|
|
837
|
+
cwd=tmp_path,
|
|
838
|
+
messages=[{"role": "user", "content": "Inspect notes."}],
|
|
839
|
+
)
|
|
840
|
+
]
|
|
841
|
+
rendered_logs = "\n".join(record.getMessage() for record in caplog.records)
|
|
842
|
+
|
|
843
|
+
assert events[-1].data["message"]["content"] == "The notes are ready."
|
|
844
|
+
assert "Agent model call started" in rendered_logs
|
|
845
|
+
assert "round=1" in rendered_logs
|
|
846
|
+
assert "round=2" in rendered_logs
|
|
847
|
+
assert "decision=run_tools" in rendered_logs
|
|
848
|
+
assert "decision=final_response" in rendered_logs
|
|
849
|
+
assert "Agent continuing after tools" in rendered_logs
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
@pytest.mark.anyio
|
|
853
|
+
async def test_agent_logs_model_call_failure_after_tool_result(
|
|
854
|
+
tmp_path, caplog
|
|
855
|
+
) -> None:
|
|
856
|
+
(tmp_path / "notes.txt").write_text("Launch notes")
|
|
857
|
+
captured_requests: list[dict[str, object]] = []
|
|
858
|
+
caplog.set_level(logging.INFO, logger="flowent.agent")
|
|
859
|
+
|
|
860
|
+
async def fake_completion(**request: object) -> object:
|
|
861
|
+
captured_requests.append(request)
|
|
862
|
+
|
|
863
|
+
async def chunks() -> object:
|
|
864
|
+
if len(captured_requests) == 1:
|
|
865
|
+
yield tool_call_chunk("read_file", {"path": "notes.txt"})
|
|
866
|
+
return
|
|
867
|
+
raise RuntimeError("stream request failed")
|
|
868
|
+
|
|
869
|
+
return chunks()
|
|
870
|
+
|
|
871
|
+
with pytest.raises(RuntimeError, match="stream request failed"):
|
|
872
|
+
[
|
|
873
|
+
event
|
|
874
|
+
async for event in run_agent_stream(
|
|
875
|
+
completion=fake_completion,
|
|
876
|
+
connection=ProviderConnection(
|
|
877
|
+
model="gpt-5.1",
|
|
878
|
+
name="Provider",
|
|
879
|
+
provider=ProviderFormat.OPENAI,
|
|
880
|
+
secret_reference="secret",
|
|
881
|
+
),
|
|
882
|
+
cwd=tmp_path,
|
|
883
|
+
messages=[{"role": "user", "content": "Inspect notes."}],
|
|
884
|
+
)
|
|
885
|
+
]
|
|
886
|
+
rendered_logs = "\n".join(record.getMessage() for record in caplog.records)
|
|
887
|
+
|
|
888
|
+
assert len(captured_requests) == 2
|
|
889
|
+
assert "Agent model call failed" in rendered_logs
|
|
890
|
+
assert "round=2" in rendered_logs
|
|
891
|
+
assert "chunk_count=0" in rendered_logs
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
@pytest.mark.anyio
|
|
895
|
+
async def test_agent_does_not_log_final_response_when_responses_stream_fails(
|
|
896
|
+
tmp_path, caplog, fake_litellm_responses_transformer
|
|
897
|
+
) -> None:
|
|
898
|
+
caplog.set_level(logging.INFO, logger="flowent.agent")
|
|
899
|
+
|
|
900
|
+
async def fake_completion(**request: object) -> object:
|
|
901
|
+
async def chunks() -> object:
|
|
902
|
+
from litellm.completion_extras.litellm_responses_transformation.transformation import (
|
|
903
|
+
OpenAiResponsesToChatCompletionStreamIterator,
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
yield text_chunk("Partial answer.")
|
|
907
|
+
yield OpenAiResponsesToChatCompletionStreamIterator.translate_responses_chunk_to_openai_stream(
|
|
908
|
+
{
|
|
909
|
+
"response": {
|
|
910
|
+
"error": {
|
|
911
|
+
"code": "upstream_error",
|
|
912
|
+
"message": "Upstream request failed",
|
|
913
|
+
},
|
|
914
|
+
"status": "failed",
|
|
915
|
+
},
|
|
916
|
+
"type": "response.failed",
|
|
917
|
+
}
|
|
918
|
+
)
|
|
919
|
+
|
|
920
|
+
return chunks()
|
|
921
|
+
|
|
922
|
+
with pytest.raises(RuntimeError, match="Upstream request failed"):
|
|
923
|
+
[
|
|
924
|
+
event
|
|
925
|
+
async for event in run_agent_stream(
|
|
926
|
+
completion=fake_completion,
|
|
927
|
+
connection=ProviderConnection(
|
|
928
|
+
model="gpt-5.1",
|
|
929
|
+
name="Provider",
|
|
930
|
+
provider=ProviderFormat.OPENAI,
|
|
931
|
+
secret_reference="secret",
|
|
932
|
+
),
|
|
933
|
+
cwd=tmp_path,
|
|
934
|
+
messages=[{"role": "user", "content": "Inspect notes."}],
|
|
935
|
+
)
|
|
936
|
+
]
|
|
937
|
+
rendered_logs = "\n".join(record.getMessage() for record in caplog.records)
|
|
938
|
+
|
|
939
|
+
assert "Agent model call failed" in rendered_logs
|
|
940
|
+
assert "decision=final_response" not in rendered_logs
|
|
941
|
+
|
|
942
|
+
|
|
794
943
|
def test_agent_finishes_without_tools(tmp_path, monkeypatch) -> None:
|
|
795
944
|
monkeypatch.setenv("FLOWENT_DATA_DIR", str(tmp_path / "data"))
|
|
796
945
|
monkeypatch.chdir(tmp_path)
|
|
@@ -901,6 +1050,69 @@ def test_tool_failure_is_reported_and_agent_continues(tmp_path, monkeypatch) ->
|
|
|
901
1050
|
assert events[-1]["data"]["message"]["content"] == "I could not read it."
|
|
902
1051
|
|
|
903
1052
|
|
|
1053
|
+
@pytest.mark.anyio
|
|
1054
|
+
async def test_approval_denial_result_is_sent_to_agent(tmp_path) -> None:
|
|
1055
|
+
captured_requests: list[dict[str, object]] = []
|
|
1056
|
+
|
|
1057
|
+
async def fake_completion(**request: object) -> object:
|
|
1058
|
+
captured_requests.append(request)
|
|
1059
|
+
|
|
1060
|
+
async def chunks() -> object:
|
|
1061
|
+
if len(captured_requests) == 1:
|
|
1062
|
+
yield tool_call_chunk(
|
|
1063
|
+
"shell_command",
|
|
1064
|
+
{"command": "rm -rf /important"},
|
|
1065
|
+
)
|
|
1066
|
+
else:
|
|
1067
|
+
yield text_chunk("I need explicit approval for that risk.")
|
|
1068
|
+
|
|
1069
|
+
return chunks()
|
|
1070
|
+
|
|
1071
|
+
async def denying_tool_runner(
|
|
1072
|
+
name: str,
|
|
1073
|
+
arguments: dict[str, object],
|
|
1074
|
+
context: ToolContext,
|
|
1075
|
+
) -> ToolResult:
|
|
1076
|
+
return ToolResult(
|
|
1077
|
+
content=(
|
|
1078
|
+
"Automatic approval review denied this action as high risk: "
|
|
1079
|
+
"The command can delete broad data. The agent must not work around "
|
|
1080
|
+
"this denial."
|
|
1081
|
+
),
|
|
1082
|
+
ok=False,
|
|
1083
|
+
title="Denied by reviewer",
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
events = [
|
|
1087
|
+
event
|
|
1088
|
+
async for event in run_agent_stream(
|
|
1089
|
+
completion=fake_completion,
|
|
1090
|
+
connection=ProviderConnection(
|
|
1091
|
+
model="gpt-5.1",
|
|
1092
|
+
name="Provider",
|
|
1093
|
+
provider=ProviderFormat.OPENAI,
|
|
1094
|
+
secret_reference="secret",
|
|
1095
|
+
),
|
|
1096
|
+
cwd=tmp_path,
|
|
1097
|
+
messages=[{"role": "user", "content": "Delete the important directory."}],
|
|
1098
|
+
tool_runner=denying_tool_runner,
|
|
1099
|
+
)
|
|
1100
|
+
]
|
|
1101
|
+
|
|
1102
|
+
assert len(captured_requests) == 2
|
|
1103
|
+
assert captured_requests[1]["messages"][-1]["role"] == "tool"
|
|
1104
|
+
assert "Automatic approval review denied this action" in str(
|
|
1105
|
+
captured_requests[1]["messages"][-1]["content"]
|
|
1106
|
+
)
|
|
1107
|
+
assert "must not work around" in str(
|
|
1108
|
+
captured_requests[1]["messages"][-1]["content"]
|
|
1109
|
+
)
|
|
1110
|
+
assert events[-2].data["content"] == "I need explicit approval for that risk."
|
|
1111
|
+
assert events[-1].data["message"]["content"] == (
|
|
1112
|
+
"I need explicit approval for that risk."
|
|
1113
|
+
)
|
|
1114
|
+
|
|
1115
|
+
|
|
904
1116
|
def test_update_plan_outputs_plan_state(tmp_path) -> None:
|
|
905
1117
|
result = run_tool(
|
|
906
1118
|
"update_plan",
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from flowent.approval import (
|
|
6
|
+
ApprovalReviewRequest,
|
|
7
|
+
ApprovalTranscriptEntry,
|
|
8
|
+
review_approval_request,
|
|
9
|
+
)
|
|
10
|
+
from flowent.llm import ProviderConnection, ProviderFormat
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def provider_connection() -> ProviderConnection:
|
|
14
|
+
return ProviderConnection(
|
|
15
|
+
model="model",
|
|
16
|
+
name="Provider",
|
|
17
|
+
provider=ProviderFormat.OPENAI,
|
|
18
|
+
secret_reference="secret",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.anyio
|
|
23
|
+
async def test_review_payload_includes_current_user_request_and_transcript(
|
|
24
|
+
tmp_path,
|
|
25
|
+
) -> None:
|
|
26
|
+
captured_messages: list[dict[str, object]] = []
|
|
27
|
+
|
|
28
|
+
async def fake_completion(**request: object) -> object:
|
|
29
|
+
captured_messages.extend(request["messages"])
|
|
30
|
+
return {
|
|
31
|
+
"choices": [
|
|
32
|
+
{
|
|
33
|
+
"message": {
|
|
34
|
+
"content": json.dumps(
|
|
35
|
+
{
|
|
36
|
+
"risk_level": "low",
|
|
37
|
+
"risk_score": 25,
|
|
38
|
+
"rationale": "User approved after concrete risk context.",
|
|
39
|
+
"evidence": [
|
|
40
|
+
{
|
|
41
|
+
"message": "Assistant explained Docker socket impact.",
|
|
42
|
+
"why": "Establishes informed consent.",
|
|
43
|
+
}
|
|
44
|
+
],
|
|
45
|
+
}
|
|
46
|
+
),
|
|
47
|
+
"role": "assistant",
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
],
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
decision = await review_approval_request(
|
|
54
|
+
provider_connection(),
|
|
55
|
+
ApprovalReviewRequest(
|
|
56
|
+
action="additional_permissions",
|
|
57
|
+
arguments={"command": "docker compose up -d --build"},
|
|
58
|
+
cwd=tmp_path,
|
|
59
|
+
tool_name="shell_command",
|
|
60
|
+
user_request="确认",
|
|
61
|
+
transcript=[
|
|
62
|
+
ApprovalTranscriptEntry(
|
|
63
|
+
role="assistant",
|
|
64
|
+
content=(
|
|
65
|
+
"This will recreate the dev container, write to the Docker "
|
|
66
|
+
"socket, and briefly interrupt the local service."
|
|
67
|
+
),
|
|
68
|
+
),
|
|
69
|
+
ApprovalTranscriptEntry(role="user", content="确认"),
|
|
70
|
+
],
|
|
71
|
+
write_paths=[tmp_path / "docker.sock"],
|
|
72
|
+
),
|
|
73
|
+
completion=fake_completion,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
assert decision.decision == "approved"
|
|
77
|
+
assert decision.risk_level == "low"
|
|
78
|
+
assert decision.risk_score == 25
|
|
79
|
+
assert "informed of the concrete risk" in str(captured_messages[0]["content"])
|
|
80
|
+
payload = json.loads(str(captured_messages[-1]["content"]))
|
|
81
|
+
assert payload["user_request"] == "确认"
|
|
82
|
+
assert payload["transcript"][-1] == {"role": "user", "content": "确认"}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@pytest.mark.anyio
|
|
86
|
+
async def test_concrete_docker_socket_confirmation_can_be_approved(tmp_path) -> None:
|
|
87
|
+
async def fake_completion(**request: object) -> object:
|
|
88
|
+
return {
|
|
89
|
+
"choices": [
|
|
90
|
+
{
|
|
91
|
+
"message": {
|
|
92
|
+
"content": json.dumps(
|
|
93
|
+
{
|
|
94
|
+
"risk_level": "medium",
|
|
95
|
+
"risk_score": 55,
|
|
96
|
+
"rationale": (
|
|
97
|
+
"The user approved after being told the command "
|
|
98
|
+
"will recreate the dev container through Docker."
|
|
99
|
+
),
|
|
100
|
+
"evidence": [],
|
|
101
|
+
}
|
|
102
|
+
),
|
|
103
|
+
"role": "assistant",
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
]
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
decision = await review_approval_request(
|
|
110
|
+
provider_connection(),
|
|
111
|
+
ApprovalReviewRequest(
|
|
112
|
+
action="additional_permissions",
|
|
113
|
+
arguments={
|
|
114
|
+
"command": "docker compose up -d --force-recreate flowent",
|
|
115
|
+
},
|
|
116
|
+
cwd=tmp_path,
|
|
117
|
+
tool_name="shell_command",
|
|
118
|
+
user_request="确认",
|
|
119
|
+
transcript=[
|
|
120
|
+
ApprovalTranscriptEntry(
|
|
121
|
+
role="assistant",
|
|
122
|
+
content=(
|
|
123
|
+
"This will recreate the Flowent dev container through "
|
|
124
|
+
"Docker and may briefly interrupt the running service."
|
|
125
|
+
),
|
|
126
|
+
),
|
|
127
|
+
ApprovalTranscriptEntry(role="user", content="确认"),
|
|
128
|
+
],
|
|
129
|
+
write_paths=[tmp_path / "docker.sock"],
|
|
130
|
+
),
|
|
131
|
+
completion=fake_completion,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
assert decision.decision == "approved"
|
|
135
|
+
assert decision.risk_level == "medium"
|
|
136
|
+
assert decision.risk_score == 55
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@pytest.mark.anyio
|
|
140
|
+
async def test_vague_confirmation_without_concrete_risk_context_is_denied(
|
|
141
|
+
tmp_path,
|
|
142
|
+
) -> None:
|
|
143
|
+
captured_payload: dict[str, object] = {}
|
|
144
|
+
|
|
145
|
+
async def fake_completion(**request: object) -> object:
|
|
146
|
+
captured_payload.update(json.loads(str(request["messages"][-1]["content"])))
|
|
147
|
+
return {
|
|
148
|
+
"choices": [
|
|
149
|
+
{
|
|
150
|
+
"message": {
|
|
151
|
+
"content": json.dumps(
|
|
152
|
+
{
|
|
153
|
+
"risk_level": "high",
|
|
154
|
+
"risk_score": 82,
|
|
155
|
+
"rationale": (
|
|
156
|
+
"The transcript only contains a vague confirmation "
|
|
157
|
+
"and no concrete Docker risk explanation."
|
|
158
|
+
),
|
|
159
|
+
"evidence": [],
|
|
160
|
+
}
|
|
161
|
+
),
|
|
162
|
+
"role": "assistant",
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
]
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
decision = await review_approval_request(
|
|
169
|
+
provider_connection(),
|
|
170
|
+
ApprovalReviewRequest(
|
|
171
|
+
action="additional_permissions",
|
|
172
|
+
arguments={
|
|
173
|
+
"command": "docker compose up -d --force-recreate flowent",
|
|
174
|
+
},
|
|
175
|
+
cwd=tmp_path,
|
|
176
|
+
tool_name="shell_command",
|
|
177
|
+
user_request="确认",
|
|
178
|
+
transcript=[ApprovalTranscriptEntry(role="user", content="确认")],
|
|
179
|
+
write_paths=[tmp_path / "docker.sock"],
|
|
180
|
+
),
|
|
181
|
+
completion=fake_completion,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
assert decision.decision == "denied"
|
|
185
|
+
assert decision.risk_level == "high"
|
|
186
|
+
assert decision.risk_score == 82
|
|
187
|
+
assert captured_payload["transcript"] == [{"role": "user", "content": "确认"}]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@pytest.mark.anyio
|
|
191
|
+
async def test_broad_destructive_action_with_vague_confirmation_is_denied(
|
|
192
|
+
tmp_path,
|
|
193
|
+
) -> None:
|
|
194
|
+
async def fake_completion(**request: object) -> object:
|
|
195
|
+
return {
|
|
196
|
+
"choices": [
|
|
197
|
+
{
|
|
198
|
+
"message": {
|
|
199
|
+
"content": json.dumps(
|
|
200
|
+
{
|
|
201
|
+
"risk_level": "high",
|
|
202
|
+
"risk_score": 96,
|
|
203
|
+
"rationale": (
|
|
204
|
+
"The action can delete broad data and the user "
|
|
205
|
+
"did not approve that concrete destructive risk."
|
|
206
|
+
),
|
|
207
|
+
"evidence": [
|
|
208
|
+
{
|
|
209
|
+
"message": "rm -rf /var/lib/postgresql",
|
|
210
|
+
"why": "Broad destructive write outside the task.",
|
|
211
|
+
}
|
|
212
|
+
],
|
|
213
|
+
}
|
|
214
|
+
),
|
|
215
|
+
"role": "assistant",
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
]
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
decision = await review_approval_request(
|
|
222
|
+
provider_connection(),
|
|
223
|
+
ApprovalReviewRequest(
|
|
224
|
+
action="sandbox_failure",
|
|
225
|
+
arguments={"command": "rm -rf /var/lib/postgresql"},
|
|
226
|
+
cwd=tmp_path,
|
|
227
|
+
tool_name="shell_command",
|
|
228
|
+
tool_result="Read-only file system",
|
|
229
|
+
user_request="确认",
|
|
230
|
+
transcript=[ApprovalTranscriptEntry(role="user", content="确认")],
|
|
231
|
+
),
|
|
232
|
+
completion=fake_completion,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
assert decision.decision == "denied"
|
|
236
|
+
assert decision.risk_level == "high"
|
|
237
|
+
assert decision.risk_score == 96
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@pytest.mark.anyio
|
|
241
|
+
async def test_invalid_reviewer_json_is_denied(tmp_path) -> None:
|
|
242
|
+
async def fake_completion(**request: object) -> object:
|
|
243
|
+
return {
|
|
244
|
+
"choices": [
|
|
245
|
+
{"message": {"content": "approved", "role": "assistant"}},
|
|
246
|
+
],
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
decision = await review_approval_request(
|
|
250
|
+
provider_connection(),
|
|
251
|
+
ApprovalReviewRequest(
|
|
252
|
+
action="sandbox_failure",
|
|
253
|
+
arguments={"command": "touch file.txt"},
|
|
254
|
+
cwd=tmp_path,
|
|
255
|
+
tool_name="shell_command",
|
|
256
|
+
tool_result="Read-only file system",
|
|
257
|
+
),
|
|
258
|
+
completion=fake_completion,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
assert decision.decision == "denied"
|
|
262
|
+
assert "valid JSON" in decision.reason
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@pytest.mark.anyio
|
|
266
|
+
async def test_reviewer_call_failure_is_denied(tmp_path) -> None:
|
|
267
|
+
async def fake_completion(**request: object) -> object:
|
|
268
|
+
raise RuntimeError("model unavailable")
|
|
269
|
+
|
|
270
|
+
decision = await review_approval_request(
|
|
271
|
+
provider_connection(),
|
|
272
|
+
ApprovalReviewRequest(
|
|
273
|
+
action="edit",
|
|
274
|
+
arguments={"patch": "*** Begin Patch\n*** End Patch"},
|
|
275
|
+
cwd=tmp_path,
|
|
276
|
+
tool_name="apply_patch",
|
|
277
|
+
write_paths=[tmp_path / "outside"],
|
|
278
|
+
),
|
|
279
|
+
completion=fake_completion,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
assert decision.decision == "denied"
|
|
283
|
+
assert "model unavailable" in decision.reason
|