@kontourai/flow-agents 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/.github/dependabot.yml +23 -0
  2. package/.github/workflows/publish-npm.yml +1 -1
  3. package/.github/workflows/release-please.yml +31 -0
  4. package/.github/workflows/runtime-compat.yml +118 -0
  5. package/CHANGELOG.md +38 -0
  6. package/CONTRIBUTING.md +4 -0
  7. package/README.md +58 -19
  8. package/build/src/cli/init.js +215 -5
  9. package/build/src/cli/utterance-check.js +236 -0
  10. package/build/src/cli.js +3 -0
  11. package/build/src/tools/build-universal-bundles.js +268 -0
  12. package/build/src/tools/filter-installed-packs.js +3 -0
  13. package/build/src/tools/validate-source-tree.js +6 -1
  14. package/context/scripts/telemetry/lib/config.sh +5 -1
  15. package/context/settings/flow-agents-settings.json +7 -0
  16. package/docs/agent-system-guidebook.md +4 -5
  17. package/docs/context-map.md +1 -0
  18. package/docs/index.md +46 -6
  19. package/docs/integrations/conformance.md +246 -0
  20. package/docs/integrations/framework-adapter.md +275 -0
  21. package/docs/integrations/harness-install.md +213 -0
  22. package/docs/integrations/index.md +54 -0
  23. package/docs/north-star.md +3 -3
  24. package/docs/repository-structure.md +1 -1
  25. package/docs/skills-map.md +10 -4
  26. package/docs/spec/runtime-hook-surface.md +472 -0
  27. package/docs/survey-utterance-check.md +308 -0
  28. package/docs/vision.md +45 -0
  29. package/docs/workflow-usage-guide.md +1 -1
  30. package/evals/acceptance/run.sh +4 -2
  31. package/evals/acceptance/test_opencode_harness.sh +121 -0
  32. package/evals/acceptance/test_pi_harness.sh +98 -0
  33. package/evals/integration/test_bundle_install.sh +226 -1
  34. package/evals/integration/test_bundle_lifecycle.sh +641 -0
  35. package/evals/integration/test_utterance_check.sh +518 -0
  36. package/evals/run.sh +2 -0
  37. package/evals/static/test_universal_bundles.sh +137 -2
  38. package/integrations/strands/README.md +256 -0
  39. package/integrations/strands/example.py +74 -0
  40. package/integrations/strands/flow_agents_strands/__init__.py +27 -0
  41. package/integrations/strands/flow_agents_strands/hooks.py +194 -0
  42. package/integrations/strands/flow_agents_strands/policy.py +348 -0
  43. package/integrations/strands/flow_agents_strands/steering.py +172 -0
  44. package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
  45. package/integrations/strands/pyproject.toml +38 -0
  46. package/integrations/strands/tests/__init__.py +0 -0
  47. package/integrations/strands/tests/test_hooks.py +304 -0
  48. package/integrations/strands/tests/test_policy.py +315 -0
  49. package/integrations/strands/tests/test_telemetry.py +184 -0
  50. package/integrations/strands-ts/README.md +224 -0
  51. package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
  52. package/integrations/strands-ts/package.json +53 -0
  53. package/integrations/strands-ts/src/hooks.ts +208 -0
  54. package/integrations/strands-ts/src/index.ts +22 -0
  55. package/integrations/strands-ts/src/policy.ts +345 -0
  56. package/integrations/strands-ts/src/telemetry.ts +251 -0
  57. package/integrations/strands-ts/test/test-policy.ts +322 -0
  58. package/integrations/strands-ts/test/test-telemetry.ts +226 -0
  59. package/integrations/strands-ts/tsconfig.json +20 -0
  60. package/package.json +7 -2
  61. package/packaging/conformance/README.md +142 -0
  62. package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
  63. package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
  64. package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
  65. package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
  66. package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
  67. package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
  68. package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
  69. package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
  70. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
  71. package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
  72. package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
  73. package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
  74. package/packaging/conformance/package.json +4 -0
  75. package/packaging/conformance/run-conformance.js +322 -0
  76. package/packaging/manifest.json +59 -0
  77. package/schemas/flow-agents-settings.schema.json +48 -0
  78. package/scripts/README.md +5 -0
  79. package/scripts/dogfood.js +16 -0
  80. package/scripts/hooks/opencode-hook-adapter.js +123 -0
  81. package/scripts/hooks/opencode-telemetry-hook.js +101 -0
  82. package/scripts/hooks/pi-hook-adapter.js +123 -0
  83. package/scripts/hooks/pi-telemetry-hook.js +105 -0
  84. package/scripts/hooks/run-hook.js +8 -0
  85. package/scripts/hooks/utterance-check.js +327 -0
  86. package/scripts/telemetry/lib/config.sh +5 -1
  87. package/skills/idea-to-backlog/SKILL.md +1 -1
  88. package/src/cli/init.ts +219 -6
  89. package/src/cli/utterance-check.ts +324 -0
  90. package/src/cli.ts +3 -0
  91. package/src/tools/build-universal-bundles.ts +266 -0
  92. package/src/tools/filter-installed-packs.ts +3 -0
  93. package/src/tools/validate-source-tree.ts +6 -1
  94. package/build/src/cli/docs-preview.js +0 -39
  95. package/build/src/cli/export-bookmarks.js +0 -38
  96. package/build/src/cli/import-bookmarks.js +0 -50
  97. package/build/src/cli/instinct-cli.js +0 -93
@@ -0,0 +1,304 @@
1
+ """
2
+ Tests for FlowAgentsHooks — fake registry + fake event objects.
3
+
4
+ These tests exercise the full hook-wiring path without requiring
5
+ strands-agents to be installed. A minimal fake registry / event surface
6
+ mirrors the Strands API contract described in the mission brief.
7
+ """
8
+
9
+ import json
10
+ import sys
11
+ import tempfile
12
+ import types
13
+ import unittest
14
+ from pathlib import Path
15
+ from typing import Any, Callable, Dict, List, Optional
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Fake Strands hook infrastructure (no SDK required)
20
+ # ---------------------------------------------------------------------------
21
+
22
+ class FakeHookRegistry:
23
+ """Minimal stand-in for strands.hooks.HookRegistry."""
24
+
25
+ def __init__(self):
26
+ self._callbacks: Dict[str, List[Callable]] = {}
27
+
28
+ def add_callback(self, event_cls, callback: Callable) -> None:
29
+ # Use the class's __name__ as the dispatch key
30
+ key = event_cls.__name__
31
+ self._callbacks.setdefault(key, []).append(callback)
32
+
33
+ def fire(self, event) -> None:
34
+ key = type(event).__name__
35
+ for cb in self._callbacks.get(key, []):
36
+ cb(event)
37
+
38
+
39
+ # Fake event classes — named to match what register_hooks imports from strands.hooks
40
+ class AgentInitializedEvent:
41
+ pass
42
+
43
+
44
+ class BeforeInvocationEvent:
45
+ pass
46
+
47
+
48
+ class AfterInvocationEvent:
49
+ pass
50
+
51
+
52
+ class BeforeToolCallEvent:
53
+ cancel_tool: Optional[str] = None
54
+
55
+ def __init__(self, tool_name: str, tool_input: Optional[dict] = None):
56
+ self.tool_use = {"name": tool_name, "input": tool_input or {}}
57
+ self.cancel_tool = None
58
+
59
+
60
+ class AfterToolCallEvent:
61
+ def __init__(self, tool_name: str, result: Any = None):
62
+ self.tool_use = {"name": tool_name, "input": {}}
63
+ self.result = result
64
+ self.retry = False
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Install fake strands module into sys.modules so FlowAgentsHooks can import
69
+ # from strands.hooks without the real SDK being installed.
70
+ # ---------------------------------------------------------------------------
71
+
72
+ def _install_fake_strands() -> None:
73
+ """Install minimal fake strands.hooks module into sys.modules."""
74
+ strands_mod = types.ModuleType("strands")
75
+ hooks_mod = types.ModuleType("strands.hooks")
76
+
77
+ # Register each class using its canonical Strands name (the class __name__)
78
+ for cls in [
79
+ AgentInitializedEvent,
80
+ BeforeInvocationEvent,
81
+ AfterInvocationEvent,
82
+ BeforeToolCallEvent,
83
+ AfterToolCallEvent,
84
+ ]:
85
+ setattr(hooks_mod, cls.__name__, cls)
86
+
87
+ strands_mod.hooks = hooks_mod # type: ignore[attr-defined]
88
+ sys.modules["strands"] = strands_mod
89
+ sys.modules["strands.hooks"] = hooks_mod
90
+
91
+
92
+ _install_fake_strands()
93
+
94
+
95
+ # ---------------------------------------------------------------------------
96
+ # Tests
97
+ # ---------------------------------------------------------------------------
98
+
99
+ class TestFlowAgentsHooksRegistration(unittest.TestCase):
100
+ """Verify register_hooks wires callbacks without raising."""
101
+
102
+ def _make_hooks(self, tmp_dir: str):
103
+ from flow_agents_strands import FlowAgentsHooks
104
+ return FlowAgentsHooks(sink_path=tmp_dir, agent_name="test-agent")
105
+
106
+ def test_register_hooks_runs_without_error(self):
107
+ with tempfile.TemporaryDirectory() as d:
108
+ hooks = self._make_hooks(d)
109
+ registry = FakeHookRegistry()
110
+ hooks.register_hooks(registry)
111
+ self.assertTrue(len(registry._callbacks) > 0)
112
+
113
+ def test_all_five_event_types_registered(self):
114
+ with tempfile.TemporaryDirectory() as d:
115
+ hooks = self._make_hooks(d)
116
+ registry = FakeHookRegistry()
117
+ hooks.register_hooks(registry)
118
+ expected = {
119
+ "AgentInitializedEvent",
120
+ "BeforeInvocationEvent",
121
+ "AfterInvocationEvent",
122
+ "BeforeToolCallEvent",
123
+ "AfterToolCallEvent",
124
+ }
125
+ self.assertEqual(expected, set(registry._callbacks.keys()))
126
+
127
+
128
+ class TestFlowAgentsHooksTelemetry(unittest.TestCase):
129
+ """Verify telemetry events are emitted with correct shape."""
130
+
131
+ def setUp(self):
132
+ self._tmp = tempfile.TemporaryDirectory()
133
+ self._tmp_path = Path(self._tmp.name)
134
+
135
+ def tearDown(self):
136
+ self._tmp.cleanup()
137
+
138
+ def _make_hooks(self):
139
+ from flow_agents_strands import FlowAgentsHooks
140
+ return FlowAgentsHooks(
141
+ sink_path=str(self._tmp_path),
142
+ agent_name="test-agent",
143
+ )
144
+
145
+ def _read_events(self):
146
+ log_file = self._tmp_path / "full.jsonl"
147
+ if not log_file.exists():
148
+ return []
149
+ return [
150
+ json.loads(line)
151
+ for line in log_file.read_text(encoding="utf-8").splitlines()
152
+ if line.strip()
153
+ ]
154
+
155
+ def test_session_start_emitted_on_agent_initialized(self):
156
+ hooks = self._make_hooks()
157
+ hooks._on_agent_initialized(AgentInitializedEvent())
158
+ events = self._read_events()
159
+ self.assertEqual(1, len(events))
160
+ self.assertEqual("session.start", events[0]["event_type"])
161
+
162
+ def test_tool_invoke_emitted_on_before_tool_call(self):
163
+ hooks = self._make_hooks()
164
+ event = BeforeToolCallEvent("read", {"path": "README.md"})
165
+ hooks._on_before_tool_call(event)
166
+ events = self._read_events()
167
+ self.assertEqual(1, len(events))
168
+ self.assertEqual("tool.invoke", events[0]["event_type"])
169
+ self.assertEqual("read", events[0]["tool"]["name"])
170
+
171
+ def test_tool_result_emitted_on_after_tool_call(self):
172
+ hooks = self._make_hooks()
173
+ event = AfterToolCallEvent("read", result="file content")
174
+ hooks._on_after_tool_call(event)
175
+ events = self._read_events()
176
+ self.assertEqual(1, len(events))
177
+ self.assertEqual("tool.result", events[0]["event_type"])
178
+ self.assertEqual("file content", events[0]["tool"]["output"])
179
+
180
+ def test_session_end_emitted_on_after_invocation(self):
181
+ hooks = self._make_hooks()
182
+ hooks._on_agent_initialized(AgentInitializedEvent())
183
+ hooks._on_after_invocation(AfterInvocationEvent())
184
+ events = self._read_events()
185
+ types_ = [e["event_type"] for e in events]
186
+ self.assertIn("session.end", types_)
187
+
188
+ def test_full_lifecycle_produces_correct_sequence(self):
189
+ hooks = self._make_hooks()
190
+ hooks._on_agent_initialized(AgentInitializedEvent())
191
+ hooks._on_before_invocation(BeforeInvocationEvent())
192
+ hooks._on_before_tool_call(BeforeToolCallEvent("bash", {"command": "ls"}))
193
+ hooks._on_after_tool_call(AfterToolCallEvent("bash", result="file1.py"))
194
+ hooks._on_after_invocation(AfterInvocationEvent())
195
+
196
+ events = self._read_events()
197
+ types_ = [e["event_type"] for e in events]
198
+ self.assertEqual(
199
+ ["session.start", "turn.user", "tool.invoke", "tool.result", "session.end"],
200
+ types_,
201
+ )
202
+
203
+
204
+ class TestFlowAgentsHooksPolicyGate(unittest.TestCase):
205
+ """
206
+ Verify tool-call cancellation on protected-config writes.
207
+
208
+ This is the key spike proof-point: a BeforeToolCallEvent targeting a
209
+ protected config file must result in event.cancel_tool being set.
210
+ """
211
+
212
+ def setUp(self):
213
+ self._tmp = tempfile.TemporaryDirectory()
214
+
215
+ def tearDown(self):
216
+ self._tmp.cleanup()
217
+
218
+ def _make_hooks(self):
219
+ from flow_agents_strands import FlowAgentsHooks
220
+ return FlowAgentsHooks(sink_path=self._tmp.name, agent_name="test")
221
+
222
+ def test_cancel_tool_set_for_protected_write(self):
223
+ hooks = self._make_hooks()
224
+ event = BeforeToolCallEvent("write", {"path": ".eslintrc.json"})
225
+ hooks._on_before_tool_call(event)
226
+ self.assertIsNotNone(event.cancel_tool)
227
+ self.assertIn("BLOCKED", event.cancel_tool)
228
+
229
+ def test_cancel_tool_not_set_for_safe_write(self):
230
+ hooks = self._make_hooks()
231
+ event = BeforeToolCallEvent("write", {"path": "src/main.py"})
232
+ hooks._on_before_tool_call(event)
233
+ self.assertIsNone(event.cancel_tool)
234
+
235
+ def test_cancel_tool_not_set_for_read_on_protected_file(self):
236
+ hooks = self._make_hooks()
237
+ event = BeforeToolCallEvent("read", {"path": ".eslintrc.json"})
238
+ hooks._on_before_tool_call(event)
239
+ self.assertIsNone(event.cancel_tool)
240
+
241
+ def test_cancel_tool_covers_all_protected_files(self):
242
+ from flow_agents_strands.policy import PROTECTED_FILES
243
+ hooks = self._make_hooks()
244
+ for fname in PROTECTED_FILES:
245
+ with self.subTest(file=fname):
246
+ event = BeforeToolCallEvent("write", {"path": f"/repo/{fname}"})
247
+ hooks._on_before_tool_call(event)
248
+ self.assertIsNotNone(
249
+ event.cancel_tool,
250
+ f"Expected cancel_tool for {fname} but got None",
251
+ )
252
+
253
+ def test_telemetry_still_emitted_even_when_cancelled(self):
254
+ """Policy block must not suppress telemetry."""
255
+ hooks = self._make_hooks()
256
+ event = BeforeToolCallEvent("write", {"path": "biome.json"})
257
+ hooks._on_before_tool_call(event)
258
+ log_file = Path(self._tmp.name) / "full.jsonl"
259
+ lines = log_file.read_text(encoding="utf-8").strip().splitlines()
260
+ self.assertEqual(1, len(lines))
261
+ parsed = json.loads(lines[0])
262
+ self.assertEqual("tool.invoke", parsed["event_type"])
263
+
264
+
265
+ class TestFlowAgentsHooksSteeringContext(unittest.TestCase):
266
+ """Verify steering context loads without error in an empty workspace."""
267
+
268
+ def test_steering_context_returns_string(self):
269
+ with tempfile.TemporaryDirectory() as d:
270
+ from flow_agents_strands import FlowAgentsHooks
271
+ hooks = FlowAgentsHooks(sink_path=d, workspace=d)
272
+ ctx = hooks.steering_context()
273
+ self.assertIsInstance(ctx, str)
274
+
275
+ def test_steering_context_empty_when_no_flow_agents_dir(self):
276
+ with tempfile.TemporaryDirectory() as d:
277
+ from flow_agents_strands import FlowAgentsHooks
278
+ hooks = FlowAgentsHooks(sink_path=d, workspace=d)
279
+ ctx = hooks.steering_context()
280
+ self.assertEqual("", ctx)
281
+
282
+ def test_steering_context_with_active_state(self):
283
+ """If .flow-agents/task/state.json has active status, context is returned."""
284
+ with tempfile.TemporaryDirectory() as d:
285
+ state_dir = Path(d) / ".flow-agents" / "my-task"
286
+ state_dir.mkdir(parents=True)
287
+ state = {
288
+ "task_slug": "my-task",
289
+ "status": "in_progress",
290
+ "phase": "execute",
291
+ "next_action": {"summary": "Run tests", "target_phase": "verify"},
292
+ }
293
+ (state_dir / "state.json").write_text(
294
+ json.dumps(state), encoding="utf-8"
295
+ )
296
+ from flow_agents_strands import FlowAgentsHooks
297
+ hooks = FlowAgentsHooks(sink_path=d, workspace=d)
298
+ ctx = hooks.steering_context()
299
+ self.assertIn("my-task", ctx)
300
+ self.assertIn("in_progress", ctx)
301
+
302
+
303
+ if __name__ == "__main__":
304
+ unittest.main()
@@ -0,0 +1,315 @@
1
+ """
2
+ Tests for policy module — config-protection gate.
3
+
4
+ Uses stdlib unittest only; no strands-agents required.
5
+ """
6
+
7
+ import unittest
8
+
9
+
10
+ class TestPolicyGateConfigProtection(unittest.TestCase):
11
+
12
+ def setUp(self):
13
+ from flow_agents_strands.policy import PolicyGate
14
+ self._gate = PolicyGate()
15
+
16
+ # --- Blocked write tools ---
17
+
18
+ def test_blocks_write_to_eslintrc(self):
19
+ reason = self._gate.check_tool_call("write", {"path": "/repo/.eslintrc.json"})
20
+ self.assertIsNotNone(reason)
21
+ self.assertIn("BLOCKED", reason)
22
+ self.assertIn(".eslintrc.json", reason)
23
+
24
+ def test_blocks_edit_to_prettier_config(self):
25
+ reason = self._gate.check_tool_call("edit", {"path": "prettier.config.js"})
26
+ self.assertIsNotNone(reason)
27
+ self.assertIn("BLOCKED", reason)
28
+
29
+ def test_blocks_fs_write_to_biome_json(self):
30
+ reason = self._gate.check_tool_call("fs_write", {"file_path": "biome.json"})
31
+ self.assertIsNotNone(reason)
32
+
33
+ def test_blocks_edit_to_ruff_toml(self):
34
+ reason = self._gate.check_tool_call("edit", {"path": "ruff.toml"})
35
+ self.assertIsNotNone(reason)
36
+
37
+ def test_blocks_apply_patch_to_markdownlint(self):
38
+ reason = self._gate.check_tool_call(
39
+ "apply_patch", {"path": ".markdownlint.json"}
40
+ )
41
+ self.assertIsNotNone(reason)
42
+
43
+ def test_block_message_includes_guidance(self):
44
+ reason = self._gate.check_tool_call("write", {"path": ".eslintrc"})
45
+ self.assertIn("linter/formatter rules", reason)
46
+
47
+ # --- Allowed cases ---
48
+
49
+ def test_allows_write_to_regular_python_file(self):
50
+ reason = self._gate.check_tool_call("write", {"path": "src/main.py"})
51
+ self.assertIsNone(reason)
52
+
53
+ def test_allows_read_on_protected_file(self):
54
+ """Read tools must never be blocked."""
55
+ reason = self._gate.check_tool_call("read", {"path": ".eslintrc.json"})
56
+ self.assertIsNone(reason)
57
+
58
+ def test_allows_bash(self):
59
+ reason = self._gate.check_tool_call("bash", {"command": "ls"})
60
+ self.assertIsNone(reason)
61
+
62
+ def test_allows_write_without_path(self):
63
+ """No path → no block."""
64
+ reason = self._gate.check_tool_call("write", {})
65
+ self.assertIsNone(reason)
66
+
67
+ def test_allows_write_to_package_json(self):
68
+ reason = self._gate.check_tool_call("write", {"path": "package.json"})
69
+ self.assertIsNone(reason)
70
+
71
+ # --- Full protected-files coverage ---
72
+
73
+ def test_all_canonical_protected_files_are_blocked(self):
74
+ from flow_agents_strands.policy import PROTECTED_FILES
75
+ for fname in PROTECTED_FILES:
76
+ with self.subTest(file=fname):
77
+ reason = self._gate.check_tool_call("write", {"path": f"/repo/{fname}"})
78
+ self.assertIsNotNone(
79
+ reason,
80
+ f"Expected {fname} to be blocked but got None",
81
+ )
82
+
83
+
84
+ class TestPolicyGateCustomProtectedFiles(unittest.TestCase):
85
+ """Verify callers can override the protected-files set."""
86
+
87
+ def test_custom_protected_set(self):
88
+ from flow_agents_strands.policy import PolicyGate
89
+ gate = PolicyGate(protected_files=frozenset(["pyproject.toml"]))
90
+ self.assertIsNotNone(gate.check_tool_call("write", {"path": "pyproject.toml"}))
91
+ # Default protected files should NOT be blocked with the custom set
92
+ self.assertIsNone(gate.check_tool_call("write", {"path": ".eslintrc.json"}))
93
+
94
+
95
+ if __name__ == "__main__":
96
+ unittest.main()
97
+
98
+
99
+ # ============================================================================
100
+ # Contract-binding tests — verify subprocess delegation to the Node.js engine
101
+ # ============================================================================
102
+
103
+
104
+ class _FakeNodeProcess:
105
+ """
106
+ Fake subprocess.run result for testing the engine binding path.
107
+ """
108
+
109
+ def __init__(self, returncode: int, stdout: str = "", stderr: str = ""):
110
+ self.returncode = returncode
111
+ self.stdout = stdout
112
+ self.stderr = stderr
113
+
114
+
115
+ class TestPolicyGateEngineBinding(unittest.TestCase):
116
+ """
117
+ Verify that PolicyGate delegates to the engine subprocess contract.
118
+
119
+ These tests inject a fake node path and engine path to exercise the
120
+ subprocess-binding code path without requiring a live Node.js process.
121
+ """
122
+
123
+ def _make_gate_with_fake_engine(self, fake_returncode, fake_stderr="", fake_stdout=""):
124
+ """
125
+ Return a PolicyGate wired to a fake engine via monkeypatching.
126
+
127
+ We pass _node_bin='node' and _run_hook_path='/fake/run-hook.js' so
128
+ _engine_available is True, then patch _invoke_engine at the module level.
129
+ """
130
+ import unittest.mock as mock
131
+ from flow_agents_strands import policy as policy_module
132
+
133
+ gate = policy_module.PolicyGate(
134
+ _node_bin="node",
135
+ _run_hook_path="/fake/run-hook.js",
136
+ )
137
+
138
+ fake_result = (fake_returncode, fake_stdout, fake_stderr)
139
+ self._patcher = mock.patch.object(
140
+ policy_module, "_invoke_engine", return_value=fake_result
141
+ )
142
+ self._mock_invoke = self._patcher.start()
143
+ return gate
144
+
145
+ def tearDown(self):
146
+ if hasattr(self, "_patcher"):
147
+ self._patcher.stop()
148
+
149
+ def test_engine_block_returns_stderr_reason(self):
150
+ """When engine exits 2, the block reason is taken from stderr."""
151
+ gate = self._make_gate_with_fake_engine(
152
+ fake_returncode=2,
153
+ fake_stderr="BLOCKED: Modifying .eslintrc.json is not allowed. Fix the source code."
154
+ )
155
+ reason = gate.check_tool_call("write", {"path": ".eslintrc.json"})
156
+ self.assertIsNotNone(reason)
157
+ self.assertIn("BLOCKED", reason)
158
+ self.assertIn(".eslintrc.json", reason)
159
+
160
+ def test_engine_allow_returns_none(self):
161
+ """When engine exits 0, check_tool_call returns None (allowed)."""
162
+ gate = self._make_gate_with_fake_engine(fake_returncode=0)
163
+ result = gate.check_tool_call("write", {"path": "src/main.ts"})
164
+ self.assertIsNone(result)
165
+
166
+ def test_engine_error_fails_open(self):
167
+ """When engine exits non-0 non-2, check_tool_call fails open (returns None)."""
168
+ gate = self._make_gate_with_fake_engine(fake_returncode=1, fake_stderr="some error")
169
+ result = gate.check_tool_call("write", {"path": ".eslintrc.json"})
170
+ self.assertIsNone(result)
171
+
172
+ def test_engine_invoked_with_correct_payload_shape(self):
173
+ """Verify the payload sent to the engine has the expected structure."""
174
+ import unittest.mock as mock
175
+ from flow_agents_strands import policy as policy_module
176
+
177
+ gate = policy_module.PolicyGate(
178
+ _node_bin="node",
179
+ _run_hook_path="/fake/run-hook.js",
180
+ )
181
+
182
+ with mock.patch.object(policy_module, "_invoke_engine", return_value=(0, "", "")) as m:
183
+ gate.check_tool_call("write", {"path": "src/main.ts"})
184
+ m.assert_called_once()
185
+ call_kwargs = m.call_args
186
+ # payload is passed as positional; check via args
187
+ payload = call_kwargs[1]["payload"] if "payload" in call_kwargs[1] else call_kwargs[0][2]
188
+ self.assertEqual("PreToolUse", payload.get("hook_event_name"))
189
+ self.assertEqual("write", payload.get("tool_name"))
190
+ self.assertEqual({"path": "src/main.ts"}, payload.get("tool_input"))
191
+
192
+ def test_read_tool_skips_engine(self):
193
+ """Read tools must bypass the engine entirely (tool-name pre-filter)."""
194
+ import unittest.mock as mock
195
+ from flow_agents_strands import policy as policy_module
196
+
197
+ gate = policy_module.PolicyGate(
198
+ _node_bin="node",
199
+ _run_hook_path="/fake/run-hook.js",
200
+ )
201
+
202
+ with mock.patch.object(policy_module, "_invoke_engine", return_value=(2, "", "BLOCKED")) as m:
203
+ result = gate.check_tool_call("read", {"path": ".eslintrc.json"})
204
+ self.assertIsNone(result)
205
+ m.assert_not_called()
206
+
207
+ def test_custom_protected_set_bypasses_engine(self):
208
+ """Custom protected_files use Python evaluation, not the engine subprocess."""
209
+ import unittest.mock as mock
210
+ from flow_agents_strands import policy as policy_module
211
+
212
+ gate = policy_module.PolicyGate(
213
+ protected_files=frozenset(["pyproject.toml"]),
214
+ _node_bin="node",
215
+ _run_hook_path="/fake/run-hook.js",
216
+ )
217
+
218
+ with mock.patch.object(policy_module, "_invoke_engine") as m:
219
+ result = gate.check_tool_call("write", {"path": "pyproject.toml"})
220
+ self.assertIsNotNone(result) # blocked by custom set
221
+ m.assert_not_called() # engine not called
222
+
223
+ def test_no_engine_path_falls_back_to_python(self):
224
+ """When run-hook.js is not found, PolicyGate falls back to Python evaluation."""
225
+ import warnings
226
+ from flow_agents_strands import policy as policy_module
227
+
228
+ # Passing None explicitly overrides module-level resolution, forcing fallback
229
+ gate = policy_module.PolicyGate(_node_bin="node", _run_hook_path=None)
230
+ with warnings.catch_warnings(record=True) as caught:
231
+ warnings.simplefilter("always")
232
+ result = gate.check_tool_call("write", {"path": ".eslintrc.json"})
233
+ self.assertIsNotNone(result)
234
+ self.assertIn("BLOCKED", result)
235
+ # Should have emitted the fallback warning
236
+ runtime_warnings = [w for w in caught if issubclass(w.category, RuntimeWarning)]
237
+ self.assertEqual(1, len(runtime_warnings))
238
+ self.assertIn("Node.js", str(runtime_warnings[0].message))
239
+
240
+ def test_no_node_falls_back_to_python(self):
241
+ """When node binary is not found, PolicyGate falls back to Python evaluation."""
242
+ import warnings
243
+ from flow_agents_strands import policy as policy_module
244
+
245
+ # Passing None explicitly overrides module-level resolution, forcing fallback
246
+ gate = policy_module.PolicyGate(_node_bin=None, _run_hook_path="/fake/run-hook.js")
247
+ with warnings.catch_warnings(record=True):
248
+ warnings.simplefilter("always")
249
+ result = gate.check_tool_call("write", {"path": ".eslintrc.json"})
250
+ self.assertIsNotNone(result)
251
+ self.assertIn("BLOCKED", result)
252
+
253
+
254
+ # ============================================================================
255
+ # End-to-end test — invokes the actual Node.js engine
256
+ # ============================================================================
257
+
258
+
259
+ class TestPolicyGateEndToEnd(unittest.TestCase):
260
+ """
261
+ Real end-to-end test: invokes the actual node engine via subprocess.
262
+
263
+ Skipped gracefully if node is not available or the engine script cannot
264
+ be located.
265
+ """
266
+
267
+ @classmethod
268
+ def setUpClass(cls):
269
+ """Resolve engine paths once; skip the whole class if unavailable."""
270
+ import shutil
271
+ from flow_agents_strands.policy import _find_engine_paths
272
+
273
+ node, run_hook = _find_engine_paths()
274
+ if not node or not run_hook:
275
+ raise unittest.SkipTest(
276
+ "Node.js or the Flow Agents engine script (run-hook.js) is not available. "
277
+ "Skipping end-to-end policy tests."
278
+ )
279
+ cls._node_bin = node
280
+ cls._run_hook_path = run_hook
281
+
282
+ def _make_gate(self):
283
+ from flow_agents_strands.policy import PolicyGate
284
+ return PolicyGate(_node_bin=self._node_bin, _run_hook_path=self._run_hook_path)
285
+
286
+ def test_e2e_blocks_eslintrc_write(self):
287
+ """Real engine call: blocks write to .eslintrc.json."""
288
+ gate = self._make_gate()
289
+ reason = gate.check_tool_call("write", {"path": "/repo/.eslintrc.json"})
290
+ self.assertIsNotNone(reason, "Expected engine to block .eslintrc.json write")
291
+ self.assertIn("BLOCKED", reason)
292
+ self.assertIn(".eslintrc.json", reason)
293
+
294
+ def test_e2e_allows_safe_file_write(self):
295
+ """Real engine call: allows write to src/main.ts."""
296
+ gate = self._make_gate()
297
+ result = gate.check_tool_call("write", {"path": "src/main.ts"})
298
+ self.assertIsNone(result, "Expected engine to allow src/main.ts write")
299
+
300
+ def test_e2e_allows_read_on_protected_file(self):
301
+ """Real engine call: read tools bypass the engine (tool-name pre-filter)."""
302
+ gate = self._make_gate()
303
+ result = gate.check_tool_call("read", {"path": ".eslintrc.json"})
304
+ self.assertIsNone(result, "Read on protected file must never be blocked")
305
+
306
+ def test_e2e_blocks_biome_json_via_file_path_key(self):
307
+ """Real engine call: blocks edit to biome.json using file_path key."""
308
+ gate = self._make_gate()
309
+ reason = gate.check_tool_call("edit", {"file_path": "biome.json"})
310
+ self.assertIsNotNone(reason)
311
+ self.assertIn("biome.json", reason)
312
+
313
+
314
+ if __name__ == "__main__":
315
+ unittest.main()