@kontourai/flow-agents 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +23 -0
- package/.github/workflows/publish-npm.yml +1 -1
- package/.github/workflows/release-please.yml +31 -0
- package/.github/workflows/runtime-compat.yml +118 -0
- package/CHANGELOG.md +38 -0
- package/CONTRIBUTING.md +4 -0
- package/README.md +58 -19
- package/build/src/cli/init.js +215 -5
- package/build/src/cli/utterance-check.js +236 -0
- package/build/src/cli.js +3 -0
- package/build/src/tools/build-universal-bundles.js +268 -0
- package/build/src/tools/filter-installed-packs.js +3 -0
- package/build/src/tools/validate-source-tree.js +6 -1
- package/context/scripts/telemetry/lib/config.sh +5 -1
- package/context/settings/flow-agents-settings.json +7 -0
- package/docs/agent-system-guidebook.md +4 -5
- package/docs/context-map.md +1 -0
- package/docs/index.md +46 -6
- package/docs/integrations/conformance.md +246 -0
- package/docs/integrations/framework-adapter.md +275 -0
- package/docs/integrations/harness-install.md +213 -0
- package/docs/integrations/index.md +54 -0
- package/docs/north-star.md +3 -3
- package/docs/repository-structure.md +1 -1
- package/docs/skills-map.md +10 -4
- package/docs/spec/runtime-hook-surface.md +472 -0
- package/docs/survey-utterance-check.md +308 -0
- package/docs/vision.md +45 -0
- package/docs/workflow-usage-guide.md +1 -1
- package/evals/acceptance/run.sh +4 -2
- package/evals/acceptance/test_opencode_harness.sh +121 -0
- package/evals/acceptance/test_pi_harness.sh +98 -0
- package/evals/integration/test_bundle_install.sh +226 -1
- package/evals/integration/test_bundle_lifecycle.sh +641 -0
- package/evals/integration/test_utterance_check.sh +518 -0
- package/evals/run.sh +2 -0
- package/evals/static/test_universal_bundles.sh +137 -2
- package/integrations/strands/README.md +256 -0
- package/integrations/strands/example.py +74 -0
- package/integrations/strands/flow_agents_strands/__init__.py +27 -0
- package/integrations/strands/flow_agents_strands/hooks.py +194 -0
- package/integrations/strands/flow_agents_strands/policy.py +348 -0
- package/integrations/strands/flow_agents_strands/steering.py +172 -0
- package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
- package/integrations/strands/pyproject.toml +38 -0
- package/integrations/strands/tests/__init__.py +0 -0
- package/integrations/strands/tests/test_hooks.py +304 -0
- package/integrations/strands/tests/test_policy.py +315 -0
- package/integrations/strands/tests/test_telemetry.py +184 -0
- package/integrations/strands-ts/README.md +224 -0
- package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
- package/integrations/strands-ts/package.json +53 -0
- package/integrations/strands-ts/src/hooks.ts +208 -0
- package/integrations/strands-ts/src/index.ts +22 -0
- package/integrations/strands-ts/src/policy.ts +345 -0
- package/integrations/strands-ts/src/telemetry.ts +251 -0
- package/integrations/strands-ts/test/test-policy.ts +322 -0
- package/integrations/strands-ts/test/test-telemetry.ts +226 -0
- package/integrations/strands-ts/tsconfig.json +20 -0
- package/package.json +7 -2
- package/packaging/conformance/README.md +142 -0
- package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
- package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
- package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
- package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
- package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
- package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
- package/packaging/conformance/package.json +4 -0
- package/packaging/conformance/run-conformance.js +322 -0
- package/packaging/manifest.json +59 -0
- package/schemas/flow-agents-settings.schema.json +48 -0
- package/scripts/README.md +5 -0
- package/scripts/dogfood.js +16 -0
- package/scripts/hooks/opencode-hook-adapter.js +123 -0
- package/scripts/hooks/opencode-telemetry-hook.js +101 -0
- package/scripts/hooks/pi-hook-adapter.js +123 -0
- package/scripts/hooks/pi-telemetry-hook.js +105 -0
- package/scripts/hooks/run-hook.js +8 -0
- package/scripts/hooks/utterance-check.js +327 -0
- package/scripts/telemetry/lib/config.sh +5 -1
- package/skills/idea-to-backlog/SKILL.md +1 -1
- package/src/cli/init.ts +219 -6
- package/src/cli/utterance-check.ts +324 -0
- package/src/cli.ts +3 -0
- package/src/tools/build-universal-bundles.ts +266 -0
- package/src/tools/filter-installed-packs.ts +3 -0
- package/src/tools/validate-source-tree.ts +6 -1
- package/build/src/cli/docs-preview.js +0 -39
- package/build/src/cli/export-bookmarks.js +0 -38
- package/build/src/cli/import-bookmarks.js +0 -50
- package/build/src/cli/instinct-cli.js +0 -93
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for FlowAgentsHooks — fake registry + fake event objects.
|
|
3
|
+
|
|
4
|
+
These tests exercise the full hook-wiring path without requiring
|
|
5
|
+
strands-agents to be installed. A minimal fake registry / event surface
|
|
6
|
+
mirrors the Strands API contract described in the mission brief.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import sys
|
|
11
|
+
import tempfile
|
|
12
|
+
import types
|
|
13
|
+
import unittest
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Fake Strands hook infrastructure (no SDK required)
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
class FakeHookRegistry:
|
|
23
|
+
"""Minimal stand-in for strands.hooks.HookRegistry."""
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
self._callbacks: Dict[str, List[Callable]] = {}
|
|
27
|
+
|
|
28
|
+
def add_callback(self, event_cls, callback: Callable) -> None:
|
|
29
|
+
# Use the class's __name__ as the dispatch key
|
|
30
|
+
key = event_cls.__name__
|
|
31
|
+
self._callbacks.setdefault(key, []).append(callback)
|
|
32
|
+
|
|
33
|
+
def fire(self, event) -> None:
|
|
34
|
+
key = type(event).__name__
|
|
35
|
+
for cb in self._callbacks.get(key, []):
|
|
36
|
+
cb(event)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Fake event classes — named to match what register_hooks imports from strands.hooks
|
|
40
|
+
class AgentInitializedEvent:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class BeforeInvocationEvent:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AfterInvocationEvent:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class BeforeToolCallEvent:
|
|
53
|
+
cancel_tool: Optional[str] = None
|
|
54
|
+
|
|
55
|
+
def __init__(self, tool_name: str, tool_input: Optional[dict] = None):
|
|
56
|
+
self.tool_use = {"name": tool_name, "input": tool_input or {}}
|
|
57
|
+
self.cancel_tool = None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class AfterToolCallEvent:
|
|
61
|
+
def __init__(self, tool_name: str, result: Any = None):
|
|
62
|
+
self.tool_use = {"name": tool_name, "input": {}}
|
|
63
|
+
self.result = result
|
|
64
|
+
self.retry = False
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# Install fake strands module into sys.modules so FlowAgentsHooks can import
|
|
69
|
+
# from strands.hooks without the real SDK being installed.
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
def _install_fake_strands() -> None:
|
|
73
|
+
"""Install minimal fake strands.hooks module into sys.modules."""
|
|
74
|
+
strands_mod = types.ModuleType("strands")
|
|
75
|
+
hooks_mod = types.ModuleType("strands.hooks")
|
|
76
|
+
|
|
77
|
+
# Register each class using its canonical Strands name (the class __name__)
|
|
78
|
+
for cls in [
|
|
79
|
+
AgentInitializedEvent,
|
|
80
|
+
BeforeInvocationEvent,
|
|
81
|
+
AfterInvocationEvent,
|
|
82
|
+
BeforeToolCallEvent,
|
|
83
|
+
AfterToolCallEvent,
|
|
84
|
+
]:
|
|
85
|
+
setattr(hooks_mod, cls.__name__, cls)
|
|
86
|
+
|
|
87
|
+
strands_mod.hooks = hooks_mod # type: ignore[attr-defined]
|
|
88
|
+
sys.modules["strands"] = strands_mod
|
|
89
|
+
sys.modules["strands.hooks"] = hooks_mod
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
_install_fake_strands()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
# Tests
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
class TestFlowAgentsHooksRegistration(unittest.TestCase):
|
|
100
|
+
"""Verify register_hooks wires callbacks without raising."""
|
|
101
|
+
|
|
102
|
+
def _make_hooks(self, tmp_dir: str):
|
|
103
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
104
|
+
return FlowAgentsHooks(sink_path=tmp_dir, agent_name="test-agent")
|
|
105
|
+
|
|
106
|
+
def test_register_hooks_runs_without_error(self):
|
|
107
|
+
with tempfile.TemporaryDirectory() as d:
|
|
108
|
+
hooks = self._make_hooks(d)
|
|
109
|
+
registry = FakeHookRegistry()
|
|
110
|
+
hooks.register_hooks(registry)
|
|
111
|
+
self.assertTrue(len(registry._callbacks) > 0)
|
|
112
|
+
|
|
113
|
+
def test_all_five_event_types_registered(self):
|
|
114
|
+
with tempfile.TemporaryDirectory() as d:
|
|
115
|
+
hooks = self._make_hooks(d)
|
|
116
|
+
registry = FakeHookRegistry()
|
|
117
|
+
hooks.register_hooks(registry)
|
|
118
|
+
expected = {
|
|
119
|
+
"AgentInitializedEvent",
|
|
120
|
+
"BeforeInvocationEvent",
|
|
121
|
+
"AfterInvocationEvent",
|
|
122
|
+
"BeforeToolCallEvent",
|
|
123
|
+
"AfterToolCallEvent",
|
|
124
|
+
}
|
|
125
|
+
self.assertEqual(expected, set(registry._callbacks.keys()))
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class TestFlowAgentsHooksTelemetry(unittest.TestCase):
|
|
129
|
+
"""Verify telemetry events are emitted with correct shape."""
|
|
130
|
+
|
|
131
|
+
def setUp(self):
|
|
132
|
+
self._tmp = tempfile.TemporaryDirectory()
|
|
133
|
+
self._tmp_path = Path(self._tmp.name)
|
|
134
|
+
|
|
135
|
+
def tearDown(self):
|
|
136
|
+
self._tmp.cleanup()
|
|
137
|
+
|
|
138
|
+
def _make_hooks(self):
|
|
139
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
140
|
+
return FlowAgentsHooks(
|
|
141
|
+
sink_path=str(self._tmp_path),
|
|
142
|
+
agent_name="test-agent",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
def _read_events(self):
|
|
146
|
+
log_file = self._tmp_path / "full.jsonl"
|
|
147
|
+
if not log_file.exists():
|
|
148
|
+
return []
|
|
149
|
+
return [
|
|
150
|
+
json.loads(line)
|
|
151
|
+
for line in log_file.read_text(encoding="utf-8").splitlines()
|
|
152
|
+
if line.strip()
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
def test_session_start_emitted_on_agent_initialized(self):
|
|
156
|
+
hooks = self._make_hooks()
|
|
157
|
+
hooks._on_agent_initialized(AgentInitializedEvent())
|
|
158
|
+
events = self._read_events()
|
|
159
|
+
self.assertEqual(1, len(events))
|
|
160
|
+
self.assertEqual("session.start", events[0]["event_type"])
|
|
161
|
+
|
|
162
|
+
def test_tool_invoke_emitted_on_before_tool_call(self):
|
|
163
|
+
hooks = self._make_hooks()
|
|
164
|
+
event = BeforeToolCallEvent("read", {"path": "README.md"})
|
|
165
|
+
hooks._on_before_tool_call(event)
|
|
166
|
+
events = self._read_events()
|
|
167
|
+
self.assertEqual(1, len(events))
|
|
168
|
+
self.assertEqual("tool.invoke", events[0]["event_type"])
|
|
169
|
+
self.assertEqual("read", events[0]["tool"]["name"])
|
|
170
|
+
|
|
171
|
+
def test_tool_result_emitted_on_after_tool_call(self):
|
|
172
|
+
hooks = self._make_hooks()
|
|
173
|
+
event = AfterToolCallEvent("read", result="file content")
|
|
174
|
+
hooks._on_after_tool_call(event)
|
|
175
|
+
events = self._read_events()
|
|
176
|
+
self.assertEqual(1, len(events))
|
|
177
|
+
self.assertEqual("tool.result", events[0]["event_type"])
|
|
178
|
+
self.assertEqual("file content", events[0]["tool"]["output"])
|
|
179
|
+
|
|
180
|
+
def test_session_end_emitted_on_after_invocation(self):
|
|
181
|
+
hooks = self._make_hooks()
|
|
182
|
+
hooks._on_agent_initialized(AgentInitializedEvent())
|
|
183
|
+
hooks._on_after_invocation(AfterInvocationEvent())
|
|
184
|
+
events = self._read_events()
|
|
185
|
+
types_ = [e["event_type"] for e in events]
|
|
186
|
+
self.assertIn("session.end", types_)
|
|
187
|
+
|
|
188
|
+
def test_full_lifecycle_produces_correct_sequence(self):
|
|
189
|
+
hooks = self._make_hooks()
|
|
190
|
+
hooks._on_agent_initialized(AgentInitializedEvent())
|
|
191
|
+
hooks._on_before_invocation(BeforeInvocationEvent())
|
|
192
|
+
hooks._on_before_tool_call(BeforeToolCallEvent("bash", {"command": "ls"}))
|
|
193
|
+
hooks._on_after_tool_call(AfterToolCallEvent("bash", result="file1.py"))
|
|
194
|
+
hooks._on_after_invocation(AfterInvocationEvent())
|
|
195
|
+
|
|
196
|
+
events = self._read_events()
|
|
197
|
+
types_ = [e["event_type"] for e in events]
|
|
198
|
+
self.assertEqual(
|
|
199
|
+
["session.start", "turn.user", "tool.invoke", "tool.result", "session.end"],
|
|
200
|
+
types_,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class TestFlowAgentsHooksPolicyGate(unittest.TestCase):
|
|
205
|
+
"""
|
|
206
|
+
Verify tool-call cancellation on protected-config writes.
|
|
207
|
+
|
|
208
|
+
This is the key spike proof-point: a BeforeToolCallEvent targeting a
|
|
209
|
+
protected config file must result in event.cancel_tool being set.
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
def setUp(self):
|
|
213
|
+
self._tmp = tempfile.TemporaryDirectory()
|
|
214
|
+
|
|
215
|
+
def tearDown(self):
|
|
216
|
+
self._tmp.cleanup()
|
|
217
|
+
|
|
218
|
+
def _make_hooks(self):
|
|
219
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
220
|
+
return FlowAgentsHooks(sink_path=self._tmp.name, agent_name="test")
|
|
221
|
+
|
|
222
|
+
def test_cancel_tool_set_for_protected_write(self):
|
|
223
|
+
hooks = self._make_hooks()
|
|
224
|
+
event = BeforeToolCallEvent("write", {"path": ".eslintrc.json"})
|
|
225
|
+
hooks._on_before_tool_call(event)
|
|
226
|
+
self.assertIsNotNone(event.cancel_tool)
|
|
227
|
+
self.assertIn("BLOCKED", event.cancel_tool)
|
|
228
|
+
|
|
229
|
+
def test_cancel_tool_not_set_for_safe_write(self):
|
|
230
|
+
hooks = self._make_hooks()
|
|
231
|
+
event = BeforeToolCallEvent("write", {"path": "src/main.py"})
|
|
232
|
+
hooks._on_before_tool_call(event)
|
|
233
|
+
self.assertIsNone(event.cancel_tool)
|
|
234
|
+
|
|
235
|
+
def test_cancel_tool_not_set_for_read_on_protected_file(self):
|
|
236
|
+
hooks = self._make_hooks()
|
|
237
|
+
event = BeforeToolCallEvent("read", {"path": ".eslintrc.json"})
|
|
238
|
+
hooks._on_before_tool_call(event)
|
|
239
|
+
self.assertIsNone(event.cancel_tool)
|
|
240
|
+
|
|
241
|
+
def test_cancel_tool_covers_all_protected_files(self):
|
|
242
|
+
from flow_agents_strands.policy import PROTECTED_FILES
|
|
243
|
+
hooks = self._make_hooks()
|
|
244
|
+
for fname in PROTECTED_FILES:
|
|
245
|
+
with self.subTest(file=fname):
|
|
246
|
+
event = BeforeToolCallEvent("write", {"path": f"/repo/{fname}"})
|
|
247
|
+
hooks._on_before_tool_call(event)
|
|
248
|
+
self.assertIsNotNone(
|
|
249
|
+
event.cancel_tool,
|
|
250
|
+
f"Expected cancel_tool for {fname} but got None",
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def test_telemetry_still_emitted_even_when_cancelled(self):
|
|
254
|
+
"""Policy block must not suppress telemetry."""
|
|
255
|
+
hooks = self._make_hooks()
|
|
256
|
+
event = BeforeToolCallEvent("write", {"path": "biome.json"})
|
|
257
|
+
hooks._on_before_tool_call(event)
|
|
258
|
+
log_file = Path(self._tmp.name) / "full.jsonl"
|
|
259
|
+
lines = log_file.read_text(encoding="utf-8").strip().splitlines()
|
|
260
|
+
self.assertEqual(1, len(lines))
|
|
261
|
+
parsed = json.loads(lines[0])
|
|
262
|
+
self.assertEqual("tool.invoke", parsed["event_type"])
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class TestFlowAgentsHooksSteeringContext(unittest.TestCase):
|
|
266
|
+
"""Verify steering context loads without error in an empty workspace."""
|
|
267
|
+
|
|
268
|
+
def test_steering_context_returns_string(self):
|
|
269
|
+
with tempfile.TemporaryDirectory() as d:
|
|
270
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
271
|
+
hooks = FlowAgentsHooks(sink_path=d, workspace=d)
|
|
272
|
+
ctx = hooks.steering_context()
|
|
273
|
+
self.assertIsInstance(ctx, str)
|
|
274
|
+
|
|
275
|
+
def test_steering_context_empty_when_no_flow_agents_dir(self):
|
|
276
|
+
with tempfile.TemporaryDirectory() as d:
|
|
277
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
278
|
+
hooks = FlowAgentsHooks(sink_path=d, workspace=d)
|
|
279
|
+
ctx = hooks.steering_context()
|
|
280
|
+
self.assertEqual("", ctx)
|
|
281
|
+
|
|
282
|
+
def test_steering_context_with_active_state(self):
|
|
283
|
+
"""If .flow-agents/task/state.json has active status, context is returned."""
|
|
284
|
+
with tempfile.TemporaryDirectory() as d:
|
|
285
|
+
state_dir = Path(d) / ".flow-agents" / "my-task"
|
|
286
|
+
state_dir.mkdir(parents=True)
|
|
287
|
+
state = {
|
|
288
|
+
"task_slug": "my-task",
|
|
289
|
+
"status": "in_progress",
|
|
290
|
+
"phase": "execute",
|
|
291
|
+
"next_action": {"summary": "Run tests", "target_phase": "verify"},
|
|
292
|
+
}
|
|
293
|
+
(state_dir / "state.json").write_text(
|
|
294
|
+
json.dumps(state), encoding="utf-8"
|
|
295
|
+
)
|
|
296
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
297
|
+
hooks = FlowAgentsHooks(sink_path=d, workspace=d)
|
|
298
|
+
ctx = hooks.steering_context()
|
|
299
|
+
self.assertIn("my-task", ctx)
|
|
300
|
+
self.assertIn("in_progress", ctx)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
if __name__ == "__main__":
|
|
304
|
+
unittest.main()
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for policy module — config-protection gate.
|
|
3
|
+
|
|
4
|
+
Uses stdlib unittest only; no strands-agents required.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import unittest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestPolicyGateConfigProtection(unittest.TestCase):
|
|
11
|
+
|
|
12
|
+
def setUp(self):
|
|
13
|
+
from flow_agents_strands.policy import PolicyGate
|
|
14
|
+
self._gate = PolicyGate()
|
|
15
|
+
|
|
16
|
+
# --- Blocked write tools ---
|
|
17
|
+
|
|
18
|
+
def test_blocks_write_to_eslintrc(self):
|
|
19
|
+
reason = self._gate.check_tool_call("write", {"path": "/repo/.eslintrc.json"})
|
|
20
|
+
self.assertIsNotNone(reason)
|
|
21
|
+
self.assertIn("BLOCKED", reason)
|
|
22
|
+
self.assertIn(".eslintrc.json", reason)
|
|
23
|
+
|
|
24
|
+
def test_blocks_edit_to_prettier_config(self):
|
|
25
|
+
reason = self._gate.check_tool_call("edit", {"path": "prettier.config.js"})
|
|
26
|
+
self.assertIsNotNone(reason)
|
|
27
|
+
self.assertIn("BLOCKED", reason)
|
|
28
|
+
|
|
29
|
+
def test_blocks_fs_write_to_biome_json(self):
|
|
30
|
+
reason = self._gate.check_tool_call("fs_write", {"file_path": "biome.json"})
|
|
31
|
+
self.assertIsNotNone(reason)
|
|
32
|
+
|
|
33
|
+
def test_blocks_edit_to_ruff_toml(self):
|
|
34
|
+
reason = self._gate.check_tool_call("edit", {"path": "ruff.toml"})
|
|
35
|
+
self.assertIsNotNone(reason)
|
|
36
|
+
|
|
37
|
+
def test_blocks_apply_patch_to_markdownlint(self):
|
|
38
|
+
reason = self._gate.check_tool_call(
|
|
39
|
+
"apply_patch", {"path": ".markdownlint.json"}
|
|
40
|
+
)
|
|
41
|
+
self.assertIsNotNone(reason)
|
|
42
|
+
|
|
43
|
+
def test_block_message_includes_guidance(self):
|
|
44
|
+
reason = self._gate.check_tool_call("write", {"path": ".eslintrc"})
|
|
45
|
+
self.assertIn("linter/formatter rules", reason)
|
|
46
|
+
|
|
47
|
+
# --- Allowed cases ---
|
|
48
|
+
|
|
49
|
+
def test_allows_write_to_regular_python_file(self):
|
|
50
|
+
reason = self._gate.check_tool_call("write", {"path": "src/main.py"})
|
|
51
|
+
self.assertIsNone(reason)
|
|
52
|
+
|
|
53
|
+
def test_allows_read_on_protected_file(self):
|
|
54
|
+
"""Read tools must never be blocked."""
|
|
55
|
+
reason = self._gate.check_tool_call("read", {"path": ".eslintrc.json"})
|
|
56
|
+
self.assertIsNone(reason)
|
|
57
|
+
|
|
58
|
+
def test_allows_bash(self):
|
|
59
|
+
reason = self._gate.check_tool_call("bash", {"command": "ls"})
|
|
60
|
+
self.assertIsNone(reason)
|
|
61
|
+
|
|
62
|
+
def test_allows_write_without_path(self):
|
|
63
|
+
"""No path → no block."""
|
|
64
|
+
reason = self._gate.check_tool_call("write", {})
|
|
65
|
+
self.assertIsNone(reason)
|
|
66
|
+
|
|
67
|
+
def test_allows_write_to_package_json(self):
|
|
68
|
+
reason = self._gate.check_tool_call("write", {"path": "package.json"})
|
|
69
|
+
self.assertIsNone(reason)
|
|
70
|
+
|
|
71
|
+
# --- Full protected-files coverage ---
|
|
72
|
+
|
|
73
|
+
def test_all_canonical_protected_files_are_blocked(self):
|
|
74
|
+
from flow_agents_strands.policy import PROTECTED_FILES
|
|
75
|
+
for fname in PROTECTED_FILES:
|
|
76
|
+
with self.subTest(file=fname):
|
|
77
|
+
reason = self._gate.check_tool_call("write", {"path": f"/repo/{fname}"})
|
|
78
|
+
self.assertIsNotNone(
|
|
79
|
+
reason,
|
|
80
|
+
f"Expected {fname} to be blocked but got None",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class TestPolicyGateCustomProtectedFiles(unittest.TestCase):
|
|
85
|
+
"""Verify callers can override the protected-files set."""
|
|
86
|
+
|
|
87
|
+
def test_custom_protected_set(self):
|
|
88
|
+
from flow_agents_strands.policy import PolicyGate
|
|
89
|
+
gate = PolicyGate(protected_files=frozenset(["pyproject.toml"]))
|
|
90
|
+
self.assertIsNotNone(gate.check_tool_call("write", {"path": "pyproject.toml"}))
|
|
91
|
+
# Default protected files should NOT be blocked with the custom set
|
|
92
|
+
self.assertIsNone(gate.check_tool_call("write", {"path": ".eslintrc.json"}))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
unittest.main()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ============================================================================
|
|
100
|
+
# Contract-binding tests — verify subprocess delegation to the Node.js engine
|
|
101
|
+
# ============================================================================
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class _FakeNodeProcess:
|
|
105
|
+
"""
|
|
106
|
+
Fake subprocess.run result for testing the engine binding path.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def __init__(self, returncode: int, stdout: str = "", stderr: str = ""):
|
|
110
|
+
self.returncode = returncode
|
|
111
|
+
self.stdout = stdout
|
|
112
|
+
self.stderr = stderr
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class TestPolicyGateEngineBinding(unittest.TestCase):
|
|
116
|
+
"""
|
|
117
|
+
Verify that PolicyGate delegates to the engine subprocess contract.
|
|
118
|
+
|
|
119
|
+
These tests inject a fake node path and engine path to exercise the
|
|
120
|
+
subprocess-binding code path without requiring a live Node.js process.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def _make_gate_with_fake_engine(self, fake_returncode, fake_stderr="", fake_stdout=""):
|
|
124
|
+
"""
|
|
125
|
+
Return a PolicyGate wired to a fake engine via monkeypatching.
|
|
126
|
+
|
|
127
|
+
We pass _node_bin='node' and _run_hook_path='/fake/run-hook.js' so
|
|
128
|
+
_engine_available is True, then patch _invoke_engine at the module level.
|
|
129
|
+
"""
|
|
130
|
+
import unittest.mock as mock
|
|
131
|
+
from flow_agents_strands import policy as policy_module
|
|
132
|
+
|
|
133
|
+
gate = policy_module.PolicyGate(
|
|
134
|
+
_node_bin="node",
|
|
135
|
+
_run_hook_path="/fake/run-hook.js",
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
fake_result = (fake_returncode, fake_stdout, fake_stderr)
|
|
139
|
+
self._patcher = mock.patch.object(
|
|
140
|
+
policy_module, "_invoke_engine", return_value=fake_result
|
|
141
|
+
)
|
|
142
|
+
self._mock_invoke = self._patcher.start()
|
|
143
|
+
return gate
|
|
144
|
+
|
|
145
|
+
def tearDown(self):
|
|
146
|
+
if hasattr(self, "_patcher"):
|
|
147
|
+
self._patcher.stop()
|
|
148
|
+
|
|
149
|
+
def test_engine_block_returns_stderr_reason(self):
|
|
150
|
+
"""When engine exits 2, the block reason is taken from stderr."""
|
|
151
|
+
gate = self._make_gate_with_fake_engine(
|
|
152
|
+
fake_returncode=2,
|
|
153
|
+
fake_stderr="BLOCKED: Modifying .eslintrc.json is not allowed. Fix the source code."
|
|
154
|
+
)
|
|
155
|
+
reason = gate.check_tool_call("write", {"path": ".eslintrc.json"})
|
|
156
|
+
self.assertIsNotNone(reason)
|
|
157
|
+
self.assertIn("BLOCKED", reason)
|
|
158
|
+
self.assertIn(".eslintrc.json", reason)
|
|
159
|
+
|
|
160
|
+
def test_engine_allow_returns_none(self):
|
|
161
|
+
"""When engine exits 0, check_tool_call returns None (allowed)."""
|
|
162
|
+
gate = self._make_gate_with_fake_engine(fake_returncode=0)
|
|
163
|
+
result = gate.check_tool_call("write", {"path": "src/main.ts"})
|
|
164
|
+
self.assertIsNone(result)
|
|
165
|
+
|
|
166
|
+
def test_engine_error_fails_open(self):
|
|
167
|
+
"""When engine exits non-0 non-2, check_tool_call fails open (returns None)."""
|
|
168
|
+
gate = self._make_gate_with_fake_engine(fake_returncode=1, fake_stderr="some error")
|
|
169
|
+
result = gate.check_tool_call("write", {"path": ".eslintrc.json"})
|
|
170
|
+
self.assertIsNone(result)
|
|
171
|
+
|
|
172
|
+
def test_engine_invoked_with_correct_payload_shape(self):
|
|
173
|
+
"""Verify the payload sent to the engine has the expected structure."""
|
|
174
|
+
import unittest.mock as mock
|
|
175
|
+
from flow_agents_strands import policy as policy_module
|
|
176
|
+
|
|
177
|
+
gate = policy_module.PolicyGate(
|
|
178
|
+
_node_bin="node",
|
|
179
|
+
_run_hook_path="/fake/run-hook.js",
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
with mock.patch.object(policy_module, "_invoke_engine", return_value=(0, "", "")) as m:
|
|
183
|
+
gate.check_tool_call("write", {"path": "src/main.ts"})
|
|
184
|
+
m.assert_called_once()
|
|
185
|
+
call_kwargs = m.call_args
|
|
186
|
+
# payload is passed as positional; check via args
|
|
187
|
+
payload = call_kwargs[1]["payload"] if "payload" in call_kwargs[1] else call_kwargs[0][2]
|
|
188
|
+
self.assertEqual("PreToolUse", payload.get("hook_event_name"))
|
|
189
|
+
self.assertEqual("write", payload.get("tool_name"))
|
|
190
|
+
self.assertEqual({"path": "src/main.ts"}, payload.get("tool_input"))
|
|
191
|
+
|
|
192
|
+
def test_read_tool_skips_engine(self):
|
|
193
|
+
"""Read tools must bypass the engine entirely (tool-name pre-filter)."""
|
|
194
|
+
import unittest.mock as mock
|
|
195
|
+
from flow_agents_strands import policy as policy_module
|
|
196
|
+
|
|
197
|
+
gate = policy_module.PolicyGate(
|
|
198
|
+
_node_bin="node",
|
|
199
|
+
_run_hook_path="/fake/run-hook.js",
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
with mock.patch.object(policy_module, "_invoke_engine", return_value=(2, "", "BLOCKED")) as m:
|
|
203
|
+
result = gate.check_tool_call("read", {"path": ".eslintrc.json"})
|
|
204
|
+
self.assertIsNone(result)
|
|
205
|
+
m.assert_not_called()
|
|
206
|
+
|
|
207
|
+
def test_custom_protected_set_bypasses_engine(self):
|
|
208
|
+
"""Custom protected_files use Python evaluation, not the engine subprocess."""
|
|
209
|
+
import unittest.mock as mock
|
|
210
|
+
from flow_agents_strands import policy as policy_module
|
|
211
|
+
|
|
212
|
+
gate = policy_module.PolicyGate(
|
|
213
|
+
protected_files=frozenset(["pyproject.toml"]),
|
|
214
|
+
_node_bin="node",
|
|
215
|
+
_run_hook_path="/fake/run-hook.js",
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
with mock.patch.object(policy_module, "_invoke_engine") as m:
|
|
219
|
+
result = gate.check_tool_call("write", {"path": "pyproject.toml"})
|
|
220
|
+
self.assertIsNotNone(result) # blocked by custom set
|
|
221
|
+
m.assert_not_called() # engine not called
|
|
222
|
+
|
|
223
|
+
def test_no_engine_path_falls_back_to_python(self):
|
|
224
|
+
"""When run-hook.js is not found, PolicyGate falls back to Python evaluation."""
|
|
225
|
+
import warnings
|
|
226
|
+
from flow_agents_strands import policy as policy_module
|
|
227
|
+
|
|
228
|
+
# Passing None explicitly overrides module-level resolution, forcing fallback
|
|
229
|
+
gate = policy_module.PolicyGate(_node_bin="node", _run_hook_path=None)
|
|
230
|
+
with warnings.catch_warnings(record=True) as caught:
|
|
231
|
+
warnings.simplefilter("always")
|
|
232
|
+
result = gate.check_tool_call("write", {"path": ".eslintrc.json"})
|
|
233
|
+
self.assertIsNotNone(result)
|
|
234
|
+
self.assertIn("BLOCKED", result)
|
|
235
|
+
# Should have emitted the fallback warning
|
|
236
|
+
runtime_warnings = [w for w in caught if issubclass(w.category, RuntimeWarning)]
|
|
237
|
+
self.assertEqual(1, len(runtime_warnings))
|
|
238
|
+
self.assertIn("Node.js", str(runtime_warnings[0].message))
|
|
239
|
+
|
|
240
|
+
def test_no_node_falls_back_to_python(self):
|
|
241
|
+
"""When node binary is not found, PolicyGate falls back to Python evaluation."""
|
|
242
|
+
import warnings
|
|
243
|
+
from flow_agents_strands import policy as policy_module
|
|
244
|
+
|
|
245
|
+
# Passing None explicitly overrides module-level resolution, forcing fallback
|
|
246
|
+
gate = policy_module.PolicyGate(_node_bin=None, _run_hook_path="/fake/run-hook.js")
|
|
247
|
+
with warnings.catch_warnings(record=True):
|
|
248
|
+
warnings.simplefilter("always")
|
|
249
|
+
result = gate.check_tool_call("write", {"path": ".eslintrc.json"})
|
|
250
|
+
self.assertIsNotNone(result)
|
|
251
|
+
self.assertIn("BLOCKED", result)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ============================================================================
|
|
255
|
+
# End-to-end test — invokes the actual Node.js engine
|
|
256
|
+
# ============================================================================
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class TestPolicyGateEndToEnd(unittest.TestCase):
|
|
260
|
+
"""
|
|
261
|
+
Real end-to-end test: invokes the actual node engine via subprocess.
|
|
262
|
+
|
|
263
|
+
Skipped gracefully if node is not available or the engine script cannot
|
|
264
|
+
be located.
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
@classmethod
|
|
268
|
+
def setUpClass(cls):
|
|
269
|
+
"""Resolve engine paths once; skip the whole class if unavailable."""
|
|
270
|
+
import shutil
|
|
271
|
+
from flow_agents_strands.policy import _find_engine_paths
|
|
272
|
+
|
|
273
|
+
node, run_hook = _find_engine_paths()
|
|
274
|
+
if not node or not run_hook:
|
|
275
|
+
raise unittest.SkipTest(
|
|
276
|
+
"Node.js or the Flow Agents engine script (run-hook.js) is not available. "
|
|
277
|
+
"Skipping end-to-end policy tests."
|
|
278
|
+
)
|
|
279
|
+
cls._node_bin = node
|
|
280
|
+
cls._run_hook_path = run_hook
|
|
281
|
+
|
|
282
|
+
def _make_gate(self):
|
|
283
|
+
from flow_agents_strands.policy import PolicyGate
|
|
284
|
+
return PolicyGate(_node_bin=self._node_bin, _run_hook_path=self._run_hook_path)
|
|
285
|
+
|
|
286
|
+
def test_e2e_blocks_eslintrc_write(self):
|
|
287
|
+
"""Real engine call: blocks write to .eslintrc.json."""
|
|
288
|
+
gate = self._make_gate()
|
|
289
|
+
reason = gate.check_tool_call("write", {"path": "/repo/.eslintrc.json"})
|
|
290
|
+
self.assertIsNotNone(reason, "Expected engine to block .eslintrc.json write")
|
|
291
|
+
self.assertIn("BLOCKED", reason)
|
|
292
|
+
self.assertIn(".eslintrc.json", reason)
|
|
293
|
+
|
|
294
|
+
def test_e2e_allows_safe_file_write(self):
|
|
295
|
+
"""Real engine call: allows write to src/main.ts."""
|
|
296
|
+
gate = self._make_gate()
|
|
297
|
+
result = gate.check_tool_call("write", {"path": "src/main.ts"})
|
|
298
|
+
self.assertIsNone(result, "Expected engine to allow src/main.ts write")
|
|
299
|
+
|
|
300
|
+
def test_e2e_allows_read_on_protected_file(self):
|
|
301
|
+
"""Real engine call: read tools bypass the engine (tool-name pre-filter)."""
|
|
302
|
+
gate = self._make_gate()
|
|
303
|
+
result = gate.check_tool_call("read", {"path": ".eslintrc.json"})
|
|
304
|
+
self.assertIsNone(result, "Read on protected file must never be blocked")
|
|
305
|
+
|
|
306
|
+
def test_e2e_blocks_biome_json_via_file_path_key(self):
|
|
307
|
+
"""Real engine call: blocks edit to biome.json using file_path key."""
|
|
308
|
+
gate = self._make_gate()
|
|
309
|
+
reason = gate.check_tool_call("edit", {"file_path": "biome.json"})
|
|
310
|
+
self.assertIsNotNone(reason)
|
|
311
|
+
self.assertIn("biome.json", reason)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
if __name__ == "__main__":
|
|
315
|
+
unittest.main()
|