@kontourai/flow-agents 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +23 -0
- package/.github/workflows/release-please.yml +31 -0
- package/.github/workflows/runtime-compat.yml +118 -0
- package/CHANGELOG.md +23 -0
- package/CONTRIBUTING.md +4 -0
- package/README.md +53 -10
- package/build/src/cli/init.js +215 -5
- package/build/src/cli/utterance-check.js +65 -1
- package/build/src/tools/build-universal-bundles.js +268 -0
- package/build/src/tools/filter-installed-packs.js +3 -0
- package/build/src/tools/validate-source-tree.js +5 -1
- package/context/scripts/telemetry/lib/config.sh +5 -1
- package/context/settings/flow-agents-settings.json +7 -0
- package/docs/context-map.md +1 -0
- package/docs/index.md +45 -4
- package/docs/integrations/conformance.md +246 -0
- package/docs/integrations/framework-adapter.md +275 -0
- package/docs/integrations/harness-install.md +213 -0
- package/docs/integrations/index.md +54 -0
- package/docs/north-star.md +2 -2
- package/docs/spec/runtime-hook-surface.md +472 -0
- package/docs/survey-utterance-check.md +211 -94
- package/docs/vision.md +45 -0
- package/evals/acceptance/run.sh +4 -2
- package/evals/acceptance/test_opencode_harness.sh +121 -0
- package/evals/acceptance/test_pi_harness.sh +98 -0
- package/evals/integration/test_bundle_install.sh +226 -1
- package/evals/integration/test_bundle_lifecycle.sh +641 -0
- package/evals/integration/test_utterance_check.sh +291 -44
- package/evals/run.sh +2 -0
- package/evals/static/test_universal_bundles.sh +137 -2
- package/integrations/strands/README.md +256 -0
- package/integrations/strands/example.py +74 -0
- package/integrations/strands/flow_agents_strands/__init__.py +27 -0
- package/integrations/strands/flow_agents_strands/hooks.py +194 -0
- package/integrations/strands/flow_agents_strands/policy.py +348 -0
- package/integrations/strands/flow_agents_strands/steering.py +172 -0
- package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
- package/integrations/strands/pyproject.toml +38 -0
- package/integrations/strands/tests/__init__.py +0 -0
- package/integrations/strands/tests/test_hooks.py +304 -0
- package/integrations/strands/tests/test_policy.py +315 -0
- package/integrations/strands/tests/test_telemetry.py +184 -0
- package/integrations/strands-ts/README.md +224 -0
- package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
- package/integrations/strands-ts/package.json +53 -0
- package/integrations/strands-ts/src/hooks.ts +208 -0
- package/integrations/strands-ts/src/index.ts +22 -0
- package/integrations/strands-ts/src/policy.ts +345 -0
- package/integrations/strands-ts/src/telemetry.ts +251 -0
- package/integrations/strands-ts/test/test-policy.ts +322 -0
- package/integrations/strands-ts/test/test-telemetry.ts +226 -0
- package/integrations/strands-ts/tsconfig.json +20 -0
- package/package.json +7 -2
- package/packaging/conformance/README.md +142 -0
- package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
- package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
- package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
- package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
- package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
- package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
- package/packaging/conformance/package.json +4 -0
- package/packaging/conformance/run-conformance.js +322 -0
- package/packaging/manifest.json +59 -0
- package/schemas/flow-agents-settings.schema.json +48 -0
- package/scripts/README.md +4 -0
- package/scripts/dogfood.js +16 -0
- package/scripts/hooks/opencode-hook-adapter.js +123 -0
- package/scripts/hooks/opencode-telemetry-hook.js +101 -0
- package/scripts/hooks/pi-hook-adapter.js +123 -0
- package/scripts/hooks/pi-telemetry-hook.js +105 -0
- package/scripts/hooks/run-hook.js +8 -0
- package/scripts/hooks/utterance-check.js +124 -22
- package/scripts/telemetry/lib/config.sh +5 -1
- package/src/cli/init.ts +219 -6
- package/src/cli/utterance-check.ts +71 -1
- package/src/tools/build-universal-bundles.ts +266 -0
- package/src/tools/filter-installed-packs.ts +3 -0
- package/src/tools/validate-source-tree.ts +5 -1
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""
|
|
2
|
+
telemetry.py — Canonical Flow Agents telemetry event builder and JSONL sink.
|
|
3
|
+
|
|
4
|
+
Event taxonomy mirrors the JS telemetry hooks exactly:
|
|
5
|
+
|
|
6
|
+
claude-telemetry-hook.js → canonicalEvent() mapping:
|
|
7
|
+
SessionStart → agentSpawn
|
|
8
|
+
UserPromptSubmit → userPromptSubmit
|
|
9
|
+
PreToolUse → preToolUse
|
|
10
|
+
PostToolUse → postToolUse
|
|
11
|
+
PostToolUseFailure → postToolUse
|
|
12
|
+
Stop / SessionEnd → stop
|
|
13
|
+
SubagentStart → subagentStart
|
|
14
|
+
SubagentStop → subagentStop
|
|
15
|
+
|
|
16
|
+
telemetry.sh → schema_event_type():
|
|
17
|
+
agentSpawn / SessionStart → session.start
|
|
18
|
+
stop / Stop / SessionEnd → session.end
|
|
19
|
+
userPromptSubmit / UserPromptSubmit → turn.user
|
|
20
|
+
preToolUse / PreToolUse → tool.invoke
|
|
21
|
+
postToolUse / PostToolUse → tool.result
|
|
22
|
+
|
|
23
|
+
Strands hook events are mapped to the same canonical names so the emitted
|
|
24
|
+
JSONL records are structurally identical to those produced by the Claude Code
|
|
25
|
+
and Codex telemetry hooks.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import json
|
|
31
|
+
import os
|
|
32
|
+
import time
|
|
33
|
+
import uuid
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Any, Dict, Optional
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Strands → canonical event-name mapping
|
|
39
|
+
# (module-level dict so it is inspectable / documented)
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
STRANDS_TO_CANONICAL: Dict[str, str] = {
|
|
43
|
+
# Strands event class name → canonical Flow Agents event name
|
|
44
|
+
"AgentInitializedEvent": "agentSpawn",
|
|
45
|
+
"BeforeInvocationEvent": "userPromptSubmit",
|
|
46
|
+
"AfterInvocationEvent": "stop",
|
|
47
|
+
"BeforeToolCallEvent": "preToolUse",
|
|
48
|
+
"AfterToolCallEvent": "postToolUse",
|
|
49
|
+
"AfterModelCallEvent": "postToolUse", # closest analogue; no tool name
|
|
50
|
+
"MessageAddedEvent": "userPromptSubmit",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Canonical → schema event type (mirrors telemetry.sh schema_event_type())
|
|
54
|
+
_CANONICAL_TO_SCHEMA: Dict[str, str] = {
|
|
55
|
+
"agentSpawn": "session.start",
|
|
56
|
+
"userPromptSubmit": "turn.user",
|
|
57
|
+
"preToolUse": "tool.invoke",
|
|
58
|
+
"permissionRequest": "tool.permission_request",
|
|
59
|
+
"postToolUse": "tool.result",
|
|
60
|
+
"stop": "session.end",
|
|
61
|
+
"subagentStart": "agent.delegate",
|
|
62
|
+
"subagentStop": "agent.delegate",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _schema_event_type(canonical: str) -> str:
|
|
67
|
+
return _CANONICAL_TO_SCHEMA.get(canonical, "unknown")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
# JSONL sink
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
class TelemetrySink:
|
|
75
|
+
"""
|
|
76
|
+
Writes canonical Flow Agents telemetry events to a JSONL file.
|
|
77
|
+
|
|
78
|
+
Default path: <workspace>/.flow-agents/.telemetry/full.jsonl
|
|
79
|
+
This matches the local-files sink convention from config.sh:
|
|
80
|
+
TELEMETRY_CHANNEL_FULL_LOG_FILE = <data_dir>/full.jsonl
|
|
81
|
+
where data_dir defaults to <repo_root>/.telemetry/
|
|
82
|
+
|
|
83
|
+
For the Strands adapter we follow the harness convention of writing
|
|
84
|
+
inside .flow-agents/.telemetry/ to keep everything under one dot-dir.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
DEFAULT_SUBDIR = Path(".flow-agents") / ".telemetry"
|
|
88
|
+
DEFAULT_FILENAME = "full.jsonl"
|
|
89
|
+
SCHEMA_VERSION = "0.3.0"
|
|
90
|
+
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
sink_path: Optional[str] = None,
|
|
94
|
+
workspace: Optional[str] = None,
|
|
95
|
+
agent_name: str = "strands-agent",
|
|
96
|
+
runtime: str = "strands",
|
|
97
|
+
) -> None:
|
|
98
|
+
self.agent_name = agent_name
|
|
99
|
+
self.runtime = runtime
|
|
100
|
+
self._session_id: Optional[str] = None
|
|
101
|
+
|
|
102
|
+
ws = Path(workspace) if workspace else Path.cwd()
|
|
103
|
+
if sink_path:
|
|
104
|
+
p = Path(sink_path)
|
|
105
|
+
# If given a directory, append default filename
|
|
106
|
+
if p.suffix == "":
|
|
107
|
+
self._log_file = p / self.DEFAULT_FILENAME
|
|
108
|
+
else:
|
|
109
|
+
self._log_file = p
|
|
110
|
+
else:
|
|
111
|
+
self._log_file = ws / self.DEFAULT_SUBDIR / self.DEFAULT_FILENAME
|
|
112
|
+
|
|
113
|
+
self._log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def session_id(self) -> str:
|
|
117
|
+
if self._session_id is None:
|
|
118
|
+
self._session_id = str(uuid.uuid4())
|
|
119
|
+
return self._session_id
|
|
120
|
+
|
|
121
|
+
def _base_event(self, schema_event_type: str) -> Dict[str, Any]:
|
|
122
|
+
"""Build the base event envelope matching telemetry.sh build_base_event()."""
|
|
123
|
+
return {
|
|
124
|
+
"schema_version": self.SCHEMA_VERSION,
|
|
125
|
+
"timestamp": str(int(time.time() * 1000)),
|
|
126
|
+
"session_id": self.session_id,
|
|
127
|
+
"event_id": str(uuid.uuid4()),
|
|
128
|
+
"event_type": schema_event_type,
|
|
129
|
+
"agent": {
|
|
130
|
+
"name": self.agent_name,
|
|
131
|
+
"runtime": self.runtime,
|
|
132
|
+
"version": "unknown",
|
|
133
|
+
},
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
def emit(
|
|
137
|
+
self,
|
|
138
|
+
canonical_event: str,
|
|
139
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
140
|
+
) -> Dict[str, Any]:
|
|
141
|
+
"""
|
|
142
|
+
Build and write a canonical telemetry event.
|
|
143
|
+
|
|
144
|
+
Returns the emitted dict (useful for tests / callers that need the
|
|
145
|
+
event for further processing).
|
|
146
|
+
"""
|
|
147
|
+
schema_type = _schema_event_type(canonical_event)
|
|
148
|
+
event = self._base_event(schema_type)
|
|
149
|
+
|
|
150
|
+
# Attach hook context stub (mirrors add_hook_context() in telemetry.sh)
|
|
151
|
+
event["hook"] = {
|
|
152
|
+
"event_name": canonical_event,
|
|
153
|
+
"runtime_session_id": "",
|
|
154
|
+
"turn_id": "",
|
|
155
|
+
"transcript_path": "",
|
|
156
|
+
"model": "",
|
|
157
|
+
"source": "strands",
|
|
158
|
+
"stop_hook_active": None,
|
|
159
|
+
"last_assistant_message": "",
|
|
160
|
+
"raw_input": None,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if extra:
|
|
164
|
+
event.update(extra)
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
with self._log_file.open("a", encoding="utf-8") as fh:
|
|
168
|
+
fh.write(json.dumps(event) + "\n")
|
|
169
|
+
except OSError:
|
|
170
|
+
pass # fail-open: telemetry must never block agent work
|
|
171
|
+
|
|
172
|
+
return event
|
|
173
|
+
|
|
174
|
+
def emit_session_start(self, extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
175
|
+
return self.emit("agentSpawn", extra)
|
|
176
|
+
|
|
177
|
+
def emit_session_end(self, duration_s: float = 0.0) -> Dict[str, Any]:
|
|
178
|
+
return self.emit("stop", {"session": {"duration_s": duration_s}})
|
|
179
|
+
|
|
180
|
+
def emit_tool_invoke(
|
|
181
|
+
self,
|
|
182
|
+
tool_name: str,
|
|
183
|
+
tool_input: Optional[Dict[str, Any]] = None,
|
|
184
|
+
) -> Dict[str, Any]:
|
|
185
|
+
return self.emit(
|
|
186
|
+
"preToolUse",
|
|
187
|
+
{
|
|
188
|
+
"tool": {
|
|
189
|
+
"name": tool_name,
|
|
190
|
+
"normalized_name": _normalize_tool_name(tool_name),
|
|
191
|
+
"input": tool_input,
|
|
192
|
+
}
|
|
193
|
+
},
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def emit_tool_result(
|
|
197
|
+
self,
|
|
198
|
+
tool_name: str,
|
|
199
|
+
tool_output: Any = None,
|
|
200
|
+
) -> Dict[str, Any]:
|
|
201
|
+
return self.emit(
|
|
202
|
+
"postToolUse",
|
|
203
|
+
{
|
|
204
|
+
"tool": {
|
|
205
|
+
"name": tool_name,
|
|
206
|
+
"normalized_name": _normalize_tool_name(tool_name),
|
|
207
|
+
"output": tool_output,
|
|
208
|
+
}
|
|
209
|
+
},
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def emit_steering(self, steering_text: str) -> Dict[str, Any]:
|
|
213
|
+
"""Emit a synthetic userPromptSubmit event carrying steering context."""
|
|
214
|
+
return self.emit(
|
|
215
|
+
"userPromptSubmit",
|
|
216
|
+
{"turn": {"prompt_text": "", "steering_context": steering_text}},
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _normalize_tool_name(name: str) -> str:
|
|
221
|
+
"""
|
|
222
|
+
Mirror telemetry.sh normalize_tool_name() for the most common cases.
|
|
223
|
+
"""
|
|
224
|
+
_MAP = {
|
|
225
|
+
"bash": "execute_bash",
|
|
226
|
+
"execute_bash": "execute_bash",
|
|
227
|
+
"shell": "execute_bash",
|
|
228
|
+
"edit": "fs_write",
|
|
229
|
+
"write": "fs_write",
|
|
230
|
+
"fs_write": "fs_write",
|
|
231
|
+
"apply_patch": "fs_write",
|
|
232
|
+
"read": "fs_read",
|
|
233
|
+
"fs_read": "fs_read",
|
|
234
|
+
"task": "use_subagent",
|
|
235
|
+
"agent": "use_subagent",
|
|
236
|
+
"use_subagent": "use_subagent",
|
|
237
|
+
}
|
|
238
|
+
return _MAP.get(name.lower(), name)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.backends.legacy:build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "flow-agents-strands"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
description = "Flow Agents framework adapter for AWS Strands Agents — telemetry, policy gates, and workflow steering via the Strands hook surface."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
keywords = ["flow-agents", "strands", "aws", "agents", "telemetry", "hooks"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.9",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
22
|
+
]
|
|
23
|
+
# No runtime dependencies — strands-agents is optional
|
|
24
|
+
dependencies = []
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
# Install the real Strands SDK when you want to wire into a live Agent
|
|
28
|
+
strands = [
|
|
29
|
+
"strands-agents>=0.1.0",
|
|
30
|
+
]
|
|
31
|
+
# Development / test extras
|
|
32
|
+
dev = [
|
|
33
|
+
"strands-agents>=0.1.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[tool.setuptools.packages.find]
|
|
37
|
+
where = ["."]
|
|
38
|
+
include = ["flow_agents_strands*"]
|
|
File without changes
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for FlowAgentsHooks — fake registry + fake event objects.
|
|
3
|
+
|
|
4
|
+
These tests exercise the full hook-wiring path without requiring
|
|
5
|
+
strands-agents to be installed. A minimal fake registry / event surface
|
|
6
|
+
mirrors the Strands API contract described in the mission brief.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import sys
|
|
11
|
+
import tempfile
|
|
12
|
+
import types
|
|
13
|
+
import unittest
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Fake Strands hook infrastructure (no SDK required)
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
class FakeHookRegistry:
|
|
23
|
+
"""Minimal stand-in for strands.hooks.HookRegistry."""
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
self._callbacks: Dict[str, List[Callable]] = {}
|
|
27
|
+
|
|
28
|
+
def add_callback(self, event_cls, callback: Callable) -> None:
|
|
29
|
+
# Use the class's __name__ as the dispatch key
|
|
30
|
+
key = event_cls.__name__
|
|
31
|
+
self._callbacks.setdefault(key, []).append(callback)
|
|
32
|
+
|
|
33
|
+
def fire(self, event) -> None:
|
|
34
|
+
key = type(event).__name__
|
|
35
|
+
for cb in self._callbacks.get(key, []):
|
|
36
|
+
cb(event)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Fake event classes — named to match what register_hooks imports from strands.hooks
|
|
40
|
+
class AgentInitializedEvent:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class BeforeInvocationEvent:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AfterInvocationEvent:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class BeforeToolCallEvent:
|
|
53
|
+
cancel_tool: Optional[str] = None
|
|
54
|
+
|
|
55
|
+
def __init__(self, tool_name: str, tool_input: Optional[dict] = None):
|
|
56
|
+
self.tool_use = {"name": tool_name, "input": tool_input or {}}
|
|
57
|
+
self.cancel_tool = None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class AfterToolCallEvent:
|
|
61
|
+
def __init__(self, tool_name: str, result: Any = None):
|
|
62
|
+
self.tool_use = {"name": tool_name, "input": {}}
|
|
63
|
+
self.result = result
|
|
64
|
+
self.retry = False
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# Install fake strands module into sys.modules so FlowAgentsHooks can import
|
|
69
|
+
# from strands.hooks without the real SDK being installed.
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
def _install_fake_strands() -> None:
|
|
73
|
+
"""Install minimal fake strands.hooks module into sys.modules."""
|
|
74
|
+
strands_mod = types.ModuleType("strands")
|
|
75
|
+
hooks_mod = types.ModuleType("strands.hooks")
|
|
76
|
+
|
|
77
|
+
# Register each class using its canonical Strands name (the class __name__)
|
|
78
|
+
for cls in [
|
|
79
|
+
AgentInitializedEvent,
|
|
80
|
+
BeforeInvocationEvent,
|
|
81
|
+
AfterInvocationEvent,
|
|
82
|
+
BeforeToolCallEvent,
|
|
83
|
+
AfterToolCallEvent,
|
|
84
|
+
]:
|
|
85
|
+
setattr(hooks_mod, cls.__name__, cls)
|
|
86
|
+
|
|
87
|
+
strands_mod.hooks = hooks_mod # type: ignore[attr-defined]
|
|
88
|
+
sys.modules["strands"] = strands_mod
|
|
89
|
+
sys.modules["strands.hooks"] = hooks_mod
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
_install_fake_strands()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
# Tests
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
class TestFlowAgentsHooksRegistration(unittest.TestCase):
|
|
100
|
+
"""Verify register_hooks wires callbacks without raising."""
|
|
101
|
+
|
|
102
|
+
def _make_hooks(self, tmp_dir: str):
|
|
103
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
104
|
+
return FlowAgentsHooks(sink_path=tmp_dir, agent_name="test-agent")
|
|
105
|
+
|
|
106
|
+
def test_register_hooks_runs_without_error(self):
|
|
107
|
+
with tempfile.TemporaryDirectory() as d:
|
|
108
|
+
hooks = self._make_hooks(d)
|
|
109
|
+
registry = FakeHookRegistry()
|
|
110
|
+
hooks.register_hooks(registry)
|
|
111
|
+
self.assertTrue(len(registry._callbacks) > 0)
|
|
112
|
+
|
|
113
|
+
def test_all_five_event_types_registered(self):
|
|
114
|
+
with tempfile.TemporaryDirectory() as d:
|
|
115
|
+
hooks = self._make_hooks(d)
|
|
116
|
+
registry = FakeHookRegistry()
|
|
117
|
+
hooks.register_hooks(registry)
|
|
118
|
+
expected = {
|
|
119
|
+
"AgentInitializedEvent",
|
|
120
|
+
"BeforeInvocationEvent",
|
|
121
|
+
"AfterInvocationEvent",
|
|
122
|
+
"BeforeToolCallEvent",
|
|
123
|
+
"AfterToolCallEvent",
|
|
124
|
+
}
|
|
125
|
+
self.assertEqual(expected, set(registry._callbacks.keys()))
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class TestFlowAgentsHooksTelemetry(unittest.TestCase):
|
|
129
|
+
"""Verify telemetry events are emitted with correct shape."""
|
|
130
|
+
|
|
131
|
+
def setUp(self):
|
|
132
|
+
self._tmp = tempfile.TemporaryDirectory()
|
|
133
|
+
self._tmp_path = Path(self._tmp.name)
|
|
134
|
+
|
|
135
|
+
def tearDown(self):
|
|
136
|
+
self._tmp.cleanup()
|
|
137
|
+
|
|
138
|
+
def _make_hooks(self):
|
|
139
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
140
|
+
return FlowAgentsHooks(
|
|
141
|
+
sink_path=str(self._tmp_path),
|
|
142
|
+
agent_name="test-agent",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
def _read_events(self):
|
|
146
|
+
log_file = self._tmp_path / "full.jsonl"
|
|
147
|
+
if not log_file.exists():
|
|
148
|
+
return []
|
|
149
|
+
return [
|
|
150
|
+
json.loads(line)
|
|
151
|
+
for line in log_file.read_text(encoding="utf-8").splitlines()
|
|
152
|
+
if line.strip()
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
def test_session_start_emitted_on_agent_initialized(self):
|
|
156
|
+
hooks = self._make_hooks()
|
|
157
|
+
hooks._on_agent_initialized(AgentInitializedEvent())
|
|
158
|
+
events = self._read_events()
|
|
159
|
+
self.assertEqual(1, len(events))
|
|
160
|
+
self.assertEqual("session.start", events[0]["event_type"])
|
|
161
|
+
|
|
162
|
+
def test_tool_invoke_emitted_on_before_tool_call(self):
|
|
163
|
+
hooks = self._make_hooks()
|
|
164
|
+
event = BeforeToolCallEvent("read", {"path": "README.md"})
|
|
165
|
+
hooks._on_before_tool_call(event)
|
|
166
|
+
events = self._read_events()
|
|
167
|
+
self.assertEqual(1, len(events))
|
|
168
|
+
self.assertEqual("tool.invoke", events[0]["event_type"])
|
|
169
|
+
self.assertEqual("read", events[0]["tool"]["name"])
|
|
170
|
+
|
|
171
|
+
def test_tool_result_emitted_on_after_tool_call(self):
|
|
172
|
+
hooks = self._make_hooks()
|
|
173
|
+
event = AfterToolCallEvent("read", result="file content")
|
|
174
|
+
hooks._on_after_tool_call(event)
|
|
175
|
+
events = self._read_events()
|
|
176
|
+
self.assertEqual(1, len(events))
|
|
177
|
+
self.assertEqual("tool.result", events[0]["event_type"])
|
|
178
|
+
self.assertEqual("file content", events[0]["tool"]["output"])
|
|
179
|
+
|
|
180
|
+
def test_session_end_emitted_on_after_invocation(self):
|
|
181
|
+
hooks = self._make_hooks()
|
|
182
|
+
hooks._on_agent_initialized(AgentInitializedEvent())
|
|
183
|
+
hooks._on_after_invocation(AfterInvocationEvent())
|
|
184
|
+
events = self._read_events()
|
|
185
|
+
types_ = [e["event_type"] for e in events]
|
|
186
|
+
self.assertIn("session.end", types_)
|
|
187
|
+
|
|
188
|
+
def test_full_lifecycle_produces_correct_sequence(self):
|
|
189
|
+
hooks = self._make_hooks()
|
|
190
|
+
hooks._on_agent_initialized(AgentInitializedEvent())
|
|
191
|
+
hooks._on_before_invocation(BeforeInvocationEvent())
|
|
192
|
+
hooks._on_before_tool_call(BeforeToolCallEvent("bash", {"command": "ls"}))
|
|
193
|
+
hooks._on_after_tool_call(AfterToolCallEvent("bash", result="file1.py"))
|
|
194
|
+
hooks._on_after_invocation(AfterInvocationEvent())
|
|
195
|
+
|
|
196
|
+
events = self._read_events()
|
|
197
|
+
types_ = [e["event_type"] for e in events]
|
|
198
|
+
self.assertEqual(
|
|
199
|
+
["session.start", "turn.user", "tool.invoke", "tool.result", "session.end"],
|
|
200
|
+
types_,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class TestFlowAgentsHooksPolicyGate(unittest.TestCase):
|
|
205
|
+
"""
|
|
206
|
+
Verify tool-call cancellation on protected-config writes.
|
|
207
|
+
|
|
208
|
+
This is the key spike proof-point: a BeforeToolCallEvent targeting a
|
|
209
|
+
protected config file must result in event.cancel_tool being set.
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
def setUp(self):
|
|
213
|
+
self._tmp = tempfile.TemporaryDirectory()
|
|
214
|
+
|
|
215
|
+
def tearDown(self):
|
|
216
|
+
self._tmp.cleanup()
|
|
217
|
+
|
|
218
|
+
def _make_hooks(self):
|
|
219
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
220
|
+
return FlowAgentsHooks(sink_path=self._tmp.name, agent_name="test")
|
|
221
|
+
|
|
222
|
+
def test_cancel_tool_set_for_protected_write(self):
|
|
223
|
+
hooks = self._make_hooks()
|
|
224
|
+
event = BeforeToolCallEvent("write", {"path": ".eslintrc.json"})
|
|
225
|
+
hooks._on_before_tool_call(event)
|
|
226
|
+
self.assertIsNotNone(event.cancel_tool)
|
|
227
|
+
self.assertIn("BLOCKED", event.cancel_tool)
|
|
228
|
+
|
|
229
|
+
def test_cancel_tool_not_set_for_safe_write(self):
|
|
230
|
+
hooks = self._make_hooks()
|
|
231
|
+
event = BeforeToolCallEvent("write", {"path": "src/main.py"})
|
|
232
|
+
hooks._on_before_tool_call(event)
|
|
233
|
+
self.assertIsNone(event.cancel_tool)
|
|
234
|
+
|
|
235
|
+
def test_cancel_tool_not_set_for_read_on_protected_file(self):
|
|
236
|
+
hooks = self._make_hooks()
|
|
237
|
+
event = BeforeToolCallEvent("read", {"path": ".eslintrc.json"})
|
|
238
|
+
hooks._on_before_tool_call(event)
|
|
239
|
+
self.assertIsNone(event.cancel_tool)
|
|
240
|
+
|
|
241
|
+
def test_cancel_tool_covers_all_protected_files(self):
|
|
242
|
+
from flow_agents_strands.policy import PROTECTED_FILES
|
|
243
|
+
hooks = self._make_hooks()
|
|
244
|
+
for fname in PROTECTED_FILES:
|
|
245
|
+
with self.subTest(file=fname):
|
|
246
|
+
event = BeforeToolCallEvent("write", {"path": f"/repo/{fname}"})
|
|
247
|
+
hooks._on_before_tool_call(event)
|
|
248
|
+
self.assertIsNotNone(
|
|
249
|
+
event.cancel_tool,
|
|
250
|
+
f"Expected cancel_tool for {fname} but got None",
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def test_telemetry_still_emitted_even_when_cancelled(self):
|
|
254
|
+
"""Policy block must not suppress telemetry."""
|
|
255
|
+
hooks = self._make_hooks()
|
|
256
|
+
event = BeforeToolCallEvent("write", {"path": "biome.json"})
|
|
257
|
+
hooks._on_before_tool_call(event)
|
|
258
|
+
log_file = Path(self._tmp.name) / "full.jsonl"
|
|
259
|
+
lines = log_file.read_text(encoding="utf-8").strip().splitlines()
|
|
260
|
+
self.assertEqual(1, len(lines))
|
|
261
|
+
parsed = json.loads(lines[0])
|
|
262
|
+
self.assertEqual("tool.invoke", parsed["event_type"])
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class TestFlowAgentsHooksSteeringContext(unittest.TestCase):
|
|
266
|
+
"""Verify steering context loads without error in an empty workspace."""
|
|
267
|
+
|
|
268
|
+
def test_steering_context_returns_string(self):
|
|
269
|
+
with tempfile.TemporaryDirectory() as d:
|
|
270
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
271
|
+
hooks = FlowAgentsHooks(sink_path=d, workspace=d)
|
|
272
|
+
ctx = hooks.steering_context()
|
|
273
|
+
self.assertIsInstance(ctx, str)
|
|
274
|
+
|
|
275
|
+
def test_steering_context_empty_when_no_flow_agents_dir(self):
|
|
276
|
+
with tempfile.TemporaryDirectory() as d:
|
|
277
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
278
|
+
hooks = FlowAgentsHooks(sink_path=d, workspace=d)
|
|
279
|
+
ctx = hooks.steering_context()
|
|
280
|
+
self.assertEqual("", ctx)
|
|
281
|
+
|
|
282
|
+
def test_steering_context_with_active_state(self):
|
|
283
|
+
"""If .flow-agents/task/state.json has active status, context is returned."""
|
|
284
|
+
with tempfile.TemporaryDirectory() as d:
|
|
285
|
+
state_dir = Path(d) / ".flow-agents" / "my-task"
|
|
286
|
+
state_dir.mkdir(parents=True)
|
|
287
|
+
state = {
|
|
288
|
+
"task_slug": "my-task",
|
|
289
|
+
"status": "in_progress",
|
|
290
|
+
"phase": "execute",
|
|
291
|
+
"next_action": {"summary": "Run tests", "target_phase": "verify"},
|
|
292
|
+
}
|
|
293
|
+
(state_dir / "state.json").write_text(
|
|
294
|
+
json.dumps(state), encoding="utf-8"
|
|
295
|
+
)
|
|
296
|
+
from flow_agents_strands import FlowAgentsHooks
|
|
297
|
+
hooks = FlowAgentsHooks(sink_path=d, workspace=d)
|
|
298
|
+
ctx = hooks.steering_context()
|
|
299
|
+
self.assertIn("my-task", ctx)
|
|
300
|
+
self.assertIn("in_progress", ctx)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
if __name__ == "__main__":
|
|
304
|
+
unittest.main()
|