tylor-mcp 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/tylor.js +0 -1
- package/package.json +5 -2
- package/pytest.ini +2 -2
- package/scripts/dev-sync.js +113 -0
- package/server/tools/agents.py +29 -12
- package/server/tools/executor.py +46 -12
- package/server/tools/harness.py +174 -72
- package/server/tools/security.py +141 -0
- package/skills/tylor-run/SKILL.md +61 -0
- package/server/server.log +0 -1
- package/server/storage/tests/__init__.py +0 -0
- package/server/storage/tests/test_dynamo.py +0 -452
- package/server/storage/tests/test_json_store.py +0 -226
- package/server/storage/tests/test_opensearch.py +0 -270
- package/server/storage/tests/test_s3.py +0 -125
- package/server/tests/__init__.py +0 -0
- package/server/tests/test_install.py +0 -620
- package/server/tests/test_isolation.py +0 -90
- package/server/tests/test_ui_server.py +0 -423
- package/server/tests/test_ui_shader_background.py +0 -49
- package/server/tests/test_ui_story_6_3.py +0 -98
- package/server/tools/tests/__init__.py +0 -0
- package/server/tools/tests/test_agents.py +0 -259
- package/server/tools/tests/test_code_index.py +0 -108
- package/server/tools/tests/test_ecc_tools.py +0 -51
- package/server/tools/tests/test_executor.py +0 -623
- package/server/tools/tests/test_help_agent101.py +0 -156
- package/server/tools/tests/test_hooks.py +0 -124
- package/server/tools/tests/test_kill_thread.py +0 -125
- package/server/tools/tests/test_new_thread_list_threads.py +0 -293
- package/server/tools/tests/test_personas.py +0 -52
- package/server/tools/tests/test_recall_memory.py +0 -55
- package/server/tools/tests/test_registry_client.py +0 -322
- package/server/tools/tests/test_router.py +0 -263
- package/server/tools/tests/test_skill_installer.py +0 -193
- package/server/tools/tests/test_spawn_agent_harness.py +0 -225
- package/server/tools/tests/test_switch_thread.py +0 -163
- package/server/tools/tests/test_thread_command_skills.py +0 -60
- package/server/tools/tests/test_thread_resolver.py +0 -165
- package/server/tools/tests/test_tier1_schema.py +0 -310
package/server/tools/security.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Bumblebee security gate and risky execution guard."""
|
|
2
2
|
from __future__ import annotations
|
|
3
|
+
import asyncio
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
5
6
|
import re
|
|
@@ -160,3 +161,143 @@ def validate_skill_package(source_path: str | Path) -> None:
|
|
|
160
161
|
"Bumblebee security gate blocked skill installation because the scan detected risky exposure. "
|
|
161
162
|
f"stdout: {stdout} stderr: {stderr}"
|
|
162
163
|
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# ── Dependency file watcher ───────────────────────────────────────────────────
|
|
167
|
+
|
|
168
|
+
DEP_FILES: frozenset[str] = frozenset({
|
|
169
|
+
"requirements.txt",
|
|
170
|
+
"package.json",
|
|
171
|
+
"package-lock.json",
|
|
172
|
+
"yarn.lock",
|
|
173
|
+
"poetry.lock",
|
|
174
|
+
"Pipfile.lock",
|
|
175
|
+
"pyproject.toml",
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
PUSH_PATTERNS: list[re.Pattern] = [
|
|
179
|
+
re.compile(r"\bgit\s+push\b", re.I),
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def is_dep_file(path: str) -> bool:
|
|
184
|
+
return Path(path).name in DEP_FILES
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def should_prompt_on_push(command: str) -> bool:
|
|
188
|
+
return any(p.search(command) for p in PUSH_PATTERNS)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# ── Async package scanners ────────────────────────────────────────────────────
|
|
192
|
+
|
|
193
|
+
async def _run_scanner_async(args: list[str], cwd: str, timeout: int = 60) -> tuple[int, str, str]:
|
|
194
|
+
try:
|
|
195
|
+
result = await asyncio.to_thread(
|
|
196
|
+
subprocess.run,
|
|
197
|
+
args,
|
|
198
|
+
cwd=cwd,
|
|
199
|
+
capture_output=True,
|
|
200
|
+
text=True,
|
|
201
|
+
timeout=timeout,
|
|
202
|
+
check=False,
|
|
203
|
+
)
|
|
204
|
+
return result.returncode, result.stdout.strip(), result.stderr.strip()
|
|
205
|
+
except (OSError, subprocess.TimeoutExpired, FileNotFoundError) as exc:
|
|
206
|
+
return -1, "", str(exc)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _parse_pip_audit(stdout: str) -> list[str]:
|
|
210
|
+
try:
|
|
211
|
+
data = json.loads(stdout)
|
|
212
|
+
except (json.JSONDecodeError, ValueError):
|
|
213
|
+
return []
|
|
214
|
+
findings: list[str] = []
|
|
215
|
+
deps = data.get("dependencies", []) if isinstance(data, dict) else (data if isinstance(data, list) else [])
|
|
216
|
+
for dep in deps:
|
|
217
|
+
for vuln in dep.get("vulns", []):
|
|
218
|
+
pkg = dep.get("name", "?")
|
|
219
|
+
ver = dep.get("version", "?")
|
|
220
|
+
vid = vuln.get("id", "?")
|
|
221
|
+
desc = (vuln.get("description") or "")[:120]
|
|
222
|
+
findings.append(f"{pkg}=={ver} [{vid}]: {desc}")
|
|
223
|
+
return findings
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _parse_npm_audit(stdout: str) -> list[str]:
|
|
227
|
+
try:
|
|
228
|
+
data = json.loads(stdout)
|
|
229
|
+
except (json.JSONDecodeError, ValueError):
|
|
230
|
+
return []
|
|
231
|
+
findings: list[str] = []
|
|
232
|
+
vulns = data.get("vulnerabilities", {})
|
|
233
|
+
for name, info in (vulns.items() if isinstance(vulns, dict) else []):
|
|
234
|
+
severity = info.get("severity", "?")
|
|
235
|
+
via = info.get("via", [])
|
|
236
|
+
title = (via[0].get("title", "") if via and isinstance(via[0], dict) else "") or name
|
|
237
|
+
findings.append(f"{name} ({severity}): {title}")
|
|
238
|
+
if len(findings) >= 20:
|
|
239
|
+
break
|
|
240
|
+
return findings
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _parse_bumblebee_findings(stdout: str) -> list[str]:
|
|
244
|
+
parsed = _parse_bumblebee_output(stdout)
|
|
245
|
+
if not _is_risky_scan_result(parsed):
|
|
246
|
+
return []
|
|
247
|
+
if isinstance(parsed, dict):
|
|
248
|
+
for key in ("findings", "issues", "alerts", "warnings", "violations"):
|
|
249
|
+
items = parsed.get(key)
|
|
250
|
+
if isinstance(items, list) and items:
|
|
251
|
+
return [str(item)[:200] for item in items[:10]]
|
|
252
|
+
return ["bumblebee detected risky exposure"]
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _parse_safety(stdout: str) -> list[str]:
|
|
256
|
+
try:
|
|
257
|
+
data = json.loads(stdout)
|
|
258
|
+
except (json.JSONDecodeError, ValueError):
|
|
259
|
+
return []
|
|
260
|
+
findings: list[str] = []
|
|
261
|
+
for item in (data if isinstance(data, list) else [])[:20]:
|
|
262
|
+
if isinstance(item, list) and len(item) >= 4:
|
|
263
|
+
pkg, _, installed, desc = item[0], item[1], item[2], item[3]
|
|
264
|
+
findings.append(f"{pkg}=={installed}: {str(desc)[:120]}")
|
|
265
|
+
return findings
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
async def scan_packages_async(cwd: str | None = None) -> list[str]:
|
|
269
|
+
"""
|
|
270
|
+
Non-blocking vulnerability scan. Tries pip-audit → npm audit → bumblebee → safety.
|
|
271
|
+
Returns deduplicated finding strings. Never raises — errors are swallowed silently.
|
|
272
|
+
"""
|
|
273
|
+
cwd_str = str(Path(cwd or os.getcwd()).expanduser())
|
|
274
|
+
findings: list[str] = []
|
|
275
|
+
|
|
276
|
+
if shutil.which("pip-audit"):
|
|
277
|
+
rc, out, _ = await _run_scanner_async(["pip-audit", "--format=json", "-q"], cwd_str)
|
|
278
|
+
if out:
|
|
279
|
+
findings.extend(_parse_pip_audit(out))
|
|
280
|
+
|
|
281
|
+
if (Path(cwd_str) / "package.json").exists() and shutil.which("npm"):
|
|
282
|
+
_, out, _ = await _run_scanner_async(["npm", "audit", "--json"], cwd_str)
|
|
283
|
+
if out:
|
|
284
|
+
findings.extend(_parse_npm_audit(out))
|
|
285
|
+
|
|
286
|
+
bb = _bumblebee_path()
|
|
287
|
+
if bb:
|
|
288
|
+
rc, out, _ = await _run_scanner_async([bb, "scan", "--json"], cwd_str)
|
|
289
|
+
if out:
|
|
290
|
+
findings.extend(_parse_bumblebee_findings(out))
|
|
291
|
+
|
|
292
|
+
if not shutil.which("pip-audit") and shutil.which("safety"):
|
|
293
|
+
rc, out, _ = await _run_scanner_async(["safety", "check", "--json"], cwd_str)
|
|
294
|
+
if out:
|
|
295
|
+
findings.extend(_parse_safety(out))
|
|
296
|
+
|
|
297
|
+
seen: set[str] = set()
|
|
298
|
+
deduped: list[str] = []
|
|
299
|
+
for f in findings:
|
|
300
|
+
if f not in seen:
|
|
301
|
+
seen.add(f)
|
|
302
|
+
deduped.append(f)
|
|
303
|
+
return deduped
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: tylor-run
|
|
3
|
+
description: Activate Tylor to handle a task. Routes the request through the full Tylor harness — intent classification, skill auto-loading, multi-agent orchestration, and Bumblebee security scanning. All responses are labelled Tylor: so you always know the plugin is running, not native Claude/Codex/Antigravity.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# /Tylor_run
|
|
7
|
+
|
|
8
|
+
Use when the user invokes `/Tylor_run <task>` or explicitly wants Tylor to handle a request.
|
|
9
|
+
|
|
10
|
+
## How it works
|
|
11
|
+
|
|
12
|
+
1. Resolve the active thread ID — call `list_threads()` and pick the most recently active thread, or use one the user has already switched to.
|
|
13
|
+
2. Call `run_in_thread(thread_id=<active_thread_id>, message=<user_task>, cwd=<project_dir>)`.
|
|
14
|
+
3. Stream the result back exactly as returned — do not paraphrase or summarise it. The response already starts with `Tylor:` so the user can see the plugin is running.
|
|
15
|
+
|
|
16
|
+
## What Tylor does automatically (no commands needed)
|
|
17
|
+
|
|
18
|
+
- Classifies intent and selects the right role(s): researcher, implementer, reviewer, planner, drafter
|
|
19
|
+
- Auto-loads ECC skill groups (ecc/web, ecc/data, ecc/presentation, ecc/diagrams, ecc/pipeline) based on the task
|
|
20
|
+
- Runs a Bumblebee session-start security scan on first use in a thread
|
|
21
|
+
- Watches for dependency file changes and scans them inline
|
|
22
|
+
- Prompts before git push
|
|
23
|
+
- Spawns multiple agents in parallel when the task warrants it
|
|
24
|
+
- Streams a live orchestration log so the user can follow what is happening
|
|
25
|
+
|
|
26
|
+
## Example interactions
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
/Tylor_run research the top 3 CI/CD tools and draft a comparison doc
|
|
30
|
+
→ Tylor:
|
|
31
|
+
[agent101] intent classified: researcher, drafter
|
|
32
|
+
[agent101] auto-loaded skill: ecc/web (web_scrape, web_fetch)
|
|
33
|
+
[agent101] auto-loaded skill: ecc/presentation (build_doc)
|
|
34
|
+
[bumblebee] starting session-start package scan...
|
|
35
|
+
[agent: researcher #1] starting — gather CI/CD tool comparisons
|
|
36
|
+
$ web_fetch
|
|
37
|
+
$ web_fetch
|
|
38
|
+
[agent: researcher #1] done
|
|
39
|
+
[agent: drafter #2] starting — build comparison doc from research
|
|
40
|
+
$ build_doc
|
|
41
|
+
[agent: drafter #2] done
|
|
42
|
+
[bumblebee] ✅ session-start scan — no vulnerabilities found
|
|
43
|
+
[supervisor] complete — 2 agents ran
|
|
44
|
+
|
|
45
|
+
/Tylor_run fix the failing auth tests
|
|
46
|
+
→ Tylor:
|
|
47
|
+
[agent101] intent classified: implementer
|
|
48
|
+
[agent: implementer #1] starting — fix failing auth tests
|
|
49
|
+
$ Read
|
|
50
|
+
$ Edit
|
|
51
|
+
$ Bash
|
|
52
|
+
[agent: implementer #1] done
|
|
53
|
+
[supervisor] complete — 1 agent ran
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Notes
|
|
57
|
+
|
|
58
|
+
- The `Tylor:` label at the top of every response confirms the plugin is active — not native Claude Code, Codex, or Antigravity
|
|
59
|
+
- Works identically across all supported platforms: Claude Code CLI, Claude Desktop, GitHub Copilot CLI, Antigravity
|
|
60
|
+
- If no thread is active, suggest the user run `/new-thread <name>` first
|
|
61
|
+
- Pass `cwd` as the current project directory so agents have filesystem context
|
package/server/server.log
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
zsh: command not found: python
|
|
File without changes
|
|
@@ -1,452 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Tests for Story 2.1: DynamoDB Storage Client
|
|
3
|
-
Run: pytest server/storage/tests/test_dynamo.py -v
|
|
4
|
-
"""
|
|
5
|
-
import sys
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from unittest.mock import MagicMock, patch, call
|
|
8
|
-
|
|
9
|
-
import pytest
|
|
10
|
-
|
|
11
|
-
PLUGIN_DIR = Path(__file__).parent.parent.parent.parent
|
|
12
|
-
sys.path.insert(0, str(PLUGIN_DIR))
|
|
13
|
-
|
|
14
|
-
from mcp.server.fastmcp.exceptions import ToolError
|
|
15
|
-
from server.storage.dynamo import DynamoClient, ITEM_SIZE_LIMIT
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
# ---------------------------------------------------------------------------
|
|
19
|
-
# Helpers
|
|
20
|
-
# ---------------------------------------------------------------------------
|
|
21
|
-
|
|
22
|
-
def make_client(mock_table=None):
|
|
23
|
-
"""Return a DynamoClient with a mocked DynamoDB table."""
|
|
24
|
-
with patch("boto3.Session") as mock_session_cls:
|
|
25
|
-
mock_session = MagicMock()
|
|
26
|
-
mock_session_cls.return_value = mock_session
|
|
27
|
-
resource = MagicMock()
|
|
28
|
-
mock_session.resource.return_value = resource
|
|
29
|
-
table = mock_table or MagicMock()
|
|
30
|
-
resource.Table.return_value = table
|
|
31
|
-
client = DynamoClient(table_name="agent101", user_id="testuser")
|
|
32
|
-
client.table = table # keep reference for assertions
|
|
33
|
-
return client, table
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# ---------------------------------------------------------------------------
|
|
37
|
-
# AC1: Initialises boto3 on import with configured profile
|
|
38
|
-
# ---------------------------------------------------------------------------
|
|
39
|
-
|
|
40
|
-
def test_init_creates_boto3_session_no_profile():
|
|
41
|
-
with patch("boto3.Session") as mock_session_cls:
|
|
42
|
-
mock_session = MagicMock()
|
|
43
|
-
mock_session_cls.return_value = mock_session
|
|
44
|
-
resource = MagicMock()
|
|
45
|
-
mock_session.resource.return_value = resource
|
|
46
|
-
resource.Table.return_value = MagicMock()
|
|
47
|
-
|
|
48
|
-
DynamoClient(table_name="agent101", user_id="u1")
|
|
49
|
-
|
|
50
|
-
mock_session_cls.assert_called_once_with()
|
|
51
|
-
mock_session.resource.assert_called_once_with("dynamodb")
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def test_init_creates_boto3_session_with_profile():
|
|
55
|
-
with patch("boto3.Session") as mock_session_cls:
|
|
56
|
-
mock_session = MagicMock()
|
|
57
|
-
mock_session_cls.return_value = mock_session
|
|
58
|
-
resource = MagicMock()
|
|
59
|
-
mock_session.resource.return_value = resource
|
|
60
|
-
resource.Table.return_value = MagicMock()
|
|
61
|
-
|
|
62
|
-
DynamoClient(table_name="agent101", user_id="u1", profile="myprofile")
|
|
63
|
-
|
|
64
|
-
mock_session_cls.assert_called_once_with(profile_name="myprofile")
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
# ---------------------------------------------------------------------------
|
|
68
|
-
# AC2: put_item writes mandatory base fields for ≤400KB item
|
|
69
|
-
# ---------------------------------------------------------------------------
|
|
70
|
-
|
|
71
|
-
def test_put_item_injects_mandatory_fields():
|
|
72
|
-
client, table = make_client()
|
|
73
|
-
table.get_item.return_value = {} # item doesn't exist yet
|
|
74
|
-
table.put_item.return_value = {}
|
|
75
|
-
|
|
76
|
-
sk = "THREAD#t1#META"
|
|
77
|
-
result = client.put_item(sk=sk, attributes={"Name": "alpha"})
|
|
78
|
-
|
|
79
|
-
assert result["PK"] == "USER#testuser"
|
|
80
|
-
assert result["SK"] == sk
|
|
81
|
-
assert "CreatedAt" in result
|
|
82
|
-
assert "UpdatedAt" in result
|
|
83
|
-
assert isinstance(result["Version"], int)
|
|
84
|
-
assert result["Version"] == 1
|
|
85
|
-
# Verify DynamoDB was actually called
|
|
86
|
-
table.put_item.assert_called_once()
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def test_put_item_passes_correct_item_to_dynamo():
|
|
90
|
-
client, table = make_client()
|
|
91
|
-
table.get_item.return_value = {}
|
|
92
|
-
table.put_item.return_value = {}
|
|
93
|
-
|
|
94
|
-
result = client.put_item(sk="THREAD#t1#META", attributes={"Name": "beta"})
|
|
95
|
-
|
|
96
|
-
written = table.put_item.call_args.kwargs["Item"]
|
|
97
|
-
assert written["Name"] == "beta"
|
|
98
|
-
assert written["PK"] == "USER#testuser"
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def test_put_item_preserves_created_at_on_update():
|
|
102
|
-
"""UpdatedAt changes; CreatedAt stays the same on subsequent writes."""
|
|
103
|
-
client, table = make_client()
|
|
104
|
-
# Simulate existing item
|
|
105
|
-
existing = {
|
|
106
|
-
"PK": "USER#testuser",
|
|
107
|
-
"SK": "THREAD#t1#META",
|
|
108
|
-
"CreatedAt": "2026-01-01T00:00:00Z",
|
|
109
|
-
"UpdatedAt": "2026-01-01T00:00:00Z",
|
|
110
|
-
"Version": 3,
|
|
111
|
-
"Name": "old",
|
|
112
|
-
}
|
|
113
|
-
table.get_item.return_value = {"Item": existing}
|
|
114
|
-
table.put_item.return_value = {}
|
|
115
|
-
|
|
116
|
-
result = client.put_item(sk="THREAD#t1#META", attributes={"Name": "new"})
|
|
117
|
-
|
|
118
|
-
assert result["CreatedAt"] == "2026-01-01T00:00:00Z"
|
|
119
|
-
assert result["Version"] == 4 # incremented
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
# ---------------------------------------------------------------------------
|
|
123
|
-
# AC3: put_item rejects items > 400KB
|
|
124
|
-
# ---------------------------------------------------------------------------
|
|
125
|
-
|
|
126
|
-
def test_put_item_rejects_oversized_item():
|
|
127
|
-
client, table = make_client()
|
|
128
|
-
table.get_item.return_value = {}
|
|
129
|
-
|
|
130
|
-
big_content = "x" * (ITEM_SIZE_LIMIT + 10_000)
|
|
131
|
-
with pytest.raises(ToolError, match="400KB"):
|
|
132
|
-
client.put_item(sk="THREAD#t1#MSG#ts", attributes={"Content": big_content})
|
|
133
|
-
|
|
134
|
-
table.put_item.assert_not_called()
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def test_put_item_accepts_item_at_size_limit():
|
|
138
|
-
"""Item at exactly ITEM_SIZE_LIMIT should not raise (boundary check)."""
|
|
139
|
-
client, table = make_client()
|
|
140
|
-
table.get_item.return_value = {}
|
|
141
|
-
table.put_item.return_value = {}
|
|
142
|
-
|
|
143
|
-
# A small item well under 400KB should pass
|
|
144
|
-
small = {"Content": "x" * 100}
|
|
145
|
-
result = client.put_item(sk="THREAD#t1#META", attributes=small)
|
|
146
|
-
assert result["Version"] == 1
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
# ---------------------------------------------------------------------------
|
|
150
|
-
# AC4: Thread isolation enforced on get_item, query_thread, delete_item
|
|
151
|
-
# ---------------------------------------------------------------------------
|
|
152
|
-
|
|
153
|
-
def test_get_item_raises_on_isolation_violation():
|
|
154
|
-
client, table = make_client()
|
|
155
|
-
with pytest.raises(ToolError, match="Thread isolation violation"):
|
|
156
|
-
client.get_item(thread_id="t1", sk="THREAD#t2#META")
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
def test_get_item_passes_when_sk_matches_thread():
|
|
160
|
-
client, table = make_client()
|
|
161
|
-
table.get_item.return_value = {"Item": {"PK": "USER#testuser", "SK": "THREAD#t1#META"}}
|
|
162
|
-
|
|
163
|
-
result = client.get_item(thread_id="t1", sk="THREAD#t1#META")
|
|
164
|
-
assert result is not None
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def test_get_item_returns_none_when_not_found():
|
|
168
|
-
client, table = make_client()
|
|
169
|
-
table.get_item.return_value = {} # no "Item" key
|
|
170
|
-
|
|
171
|
-
result = client.get_item(thread_id="t1", sk="THREAD#t1#META")
|
|
172
|
-
assert result is None
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
def test_query_thread_raises_on_isolation_violation():
|
|
176
|
-
client, table = make_client()
|
|
177
|
-
with pytest.raises(ToolError, match="Thread isolation violation"):
|
|
178
|
-
client.query_thread(thread_id="t1", sk_prefix="THREAD#t2#MSG")
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
def test_query_thread_passes_correct_prefix():
|
|
182
|
-
client, table = make_client()
|
|
183
|
-
table.query.return_value = {"Items": [{"SK": "THREAD#t1#MSG#001"}]}
|
|
184
|
-
|
|
185
|
-
items = client.query_thread(thread_id="t1", sk_prefix="THREAD#t1#MSG")
|
|
186
|
-
assert len(items) == 1
|
|
187
|
-
table.query.assert_called_once()
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def test_delete_item_raises_on_isolation_violation():
|
|
191
|
-
client, table = make_client()
|
|
192
|
-
with pytest.raises(ToolError, match="Thread isolation violation"):
|
|
193
|
-
client.delete_item(thread_id="t1", sk="THREAD#t2#META")
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def test_delete_item_calls_dynamo_delete():
|
|
197
|
-
client, table = make_client()
|
|
198
|
-
table.delete_item.return_value = {}
|
|
199
|
-
|
|
200
|
-
client.delete_item(thread_id="t1", sk="THREAD#t1#META")
|
|
201
|
-
|
|
202
|
-
table.delete_item.assert_called_once_with(
|
|
203
|
-
Key={"PK": "USER#testuser", "SK": "THREAD#t1#META"}
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
# ---------------------------------------------------------------------------
|
|
208
|
-
# Story 3.3: Sub-agent output and handoff persistence
|
|
209
|
-
# ---------------------------------------------------------------------------
|
|
210
|
-
|
|
211
|
-
def test_put_agent_output_writes_expected_thread_scoped_sk():
|
|
212
|
-
client, table = make_client()
|
|
213
|
-
table.get_item.return_value = {}
|
|
214
|
-
table.put_item.return_value = {}
|
|
215
|
-
|
|
216
|
-
item = client.put_agent_output(
|
|
217
|
-
thread_id="t1",
|
|
218
|
-
agent_id="agent_a",
|
|
219
|
-
output="Agent A completed analysis.",
|
|
220
|
-
task="Analyze risk.",
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
assert item["SK"].startswith("THREAD#t1#AGENT#agent_a#OUT#")
|
|
224
|
-
assert item["ThreadId"] == "t1"
|
|
225
|
-
assert item["AgentId"] == "agent_a"
|
|
226
|
-
assert item["Type"] == "agent_output"
|
|
227
|
-
assert item["Output"] == "Agent A completed analysis."
|
|
228
|
-
assert item["Task"] == "Analyze risk."
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
def test_put_agent_output_generates_unique_sk_for_rapid_writes():
|
|
232
|
-
client, table = make_client()
|
|
233
|
-
table.get_item.return_value = {}
|
|
234
|
-
table.put_item.return_value = {}
|
|
235
|
-
|
|
236
|
-
first = client.put_agent_output(
|
|
237
|
-
thread_id="t1",
|
|
238
|
-
agent_id="agent_a",
|
|
239
|
-
output="First output.",
|
|
240
|
-
)
|
|
241
|
-
second = client.put_agent_output(
|
|
242
|
-
thread_id="t1",
|
|
243
|
-
agent_id="agent_a",
|
|
244
|
-
output="Second output.",
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
assert first["SK"] != second["SK"]
|
|
248
|
-
assert first["SK"].startswith("THREAD#t1#AGENT#agent_a#OUT#")
|
|
249
|
-
assert second["SK"].startswith("THREAD#t1#AGENT#agent_a#OUT#")
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
def test_put_agent_handoff_writes_distinct_thread_scoped_sk():
|
|
253
|
-
client, table = make_client()
|
|
254
|
-
table.get_item.return_value = {}
|
|
255
|
-
table.put_item.return_value = {}
|
|
256
|
-
handoff = {"next_agent": "agent_b", "summary": "Carry this forward."}
|
|
257
|
-
|
|
258
|
-
item = client.put_agent_handoff(
|
|
259
|
-
thread_id="t1",
|
|
260
|
-
agent_id="agent_a",
|
|
261
|
-
handoff_state=handoff,
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
assert item["SK"].startswith("THREAD#t1#AGENT#agent_a#HANDOFF#")
|
|
265
|
-
assert item["ThreadId"] == "t1"
|
|
266
|
-
assert item["AgentId"] == "agent_a"
|
|
267
|
-
assert item["Type"] == "agent_handoff"
|
|
268
|
-
assert item["HandoffState"] == handoff
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
def test_put_agent_handoff_generates_unique_sk_for_rapid_writes():
|
|
272
|
-
client, table = make_client()
|
|
273
|
-
table.get_item.return_value = {}
|
|
274
|
-
table.put_item.return_value = {}
|
|
275
|
-
|
|
276
|
-
first = client.put_agent_handoff(
|
|
277
|
-
thread_id="t1",
|
|
278
|
-
agent_id="agent_a",
|
|
279
|
-
handoff_state={"step": 1},
|
|
280
|
-
)
|
|
281
|
-
second = client.put_agent_handoff(
|
|
282
|
-
thread_id="t1",
|
|
283
|
-
agent_id="agent_a",
|
|
284
|
-
handoff_state={"step": 2},
|
|
285
|
-
)
|
|
286
|
-
|
|
287
|
-
assert first["SK"] != second["SK"]
|
|
288
|
-
assert first["SK"].startswith("THREAD#t1#AGENT#agent_a#HANDOFF#")
|
|
289
|
-
assert second["SK"].startswith("THREAD#t1#AGENT#agent_a#HANDOFF#")
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
def test_put_agent_state_writes_thread_scoped_state_record():
|
|
293
|
-
client, table = make_client()
|
|
294
|
-
table.get_item.return_value = {}
|
|
295
|
-
table.put_item.return_value = {}
|
|
296
|
-
|
|
297
|
-
item = client.put_agent_state(
|
|
298
|
-
thread_id="t1",
|
|
299
|
-
agent_id="agent_a",
|
|
300
|
-
state={
|
|
301
|
-
"Status": "active",
|
|
302
|
-
"Persona": "analyst",
|
|
303
|
-
"Task": "Analyze risk.",
|
|
304
|
-
"ToolsLoaded": ["ecc/web", "ecc/data"],
|
|
305
|
-
},
|
|
306
|
-
)
|
|
307
|
-
|
|
308
|
-
assert item["SK"] == "THREAD#t1#AGENT#agent_a#STATE"
|
|
309
|
-
assert item["ThreadId"] == "t1"
|
|
310
|
-
assert item["AgentId"] == "agent_a"
|
|
311
|
-
assert item["Type"] == "agent_state"
|
|
312
|
-
assert item["Status"] == "active"
|
|
313
|
-
assert item["Persona"] == "analyst"
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
def test_query_agent_states_is_scoped_to_thread_prefix():
|
|
317
|
-
client, table = make_client()
|
|
318
|
-
table.query.return_value = {
|
|
319
|
-
"Items": [
|
|
320
|
-
{"SK": "THREAD#t1#AGENT#agent_a#STATE", "ThreadId": "t1"},
|
|
321
|
-
]
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
result = client.query_agent_states("t1")
|
|
325
|
-
|
|
326
|
-
assert result == [{"SK": "THREAD#t1#AGENT#agent_a#STATE", "ThreadId": "t1"}]
|
|
327
|
-
table.query.assert_called_once()
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
def test_agent_output_rejects_cross_thread_sk_injection():
|
|
331
|
-
client, table = make_client()
|
|
332
|
-
with pytest.raises(ToolError, match="Invalid agent_id"):
|
|
333
|
-
client.put_agent_output(
|
|
334
|
-
thread_id="t1",
|
|
335
|
-
agent_id="THREAD#t2#AGENT#agent_a",
|
|
336
|
-
output="bad",
|
|
337
|
-
)
|
|
338
|
-
table.put_item.assert_not_called()
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
def test_switch_thread_suspends_previous_agents_and_resumes_target_agents():
|
|
342
|
-
client, table = make_client()
|
|
343
|
-
|
|
344
|
-
def raw_get_side_effect(Key):
|
|
345
|
-
sk = Key["SK"]
|
|
346
|
-
items = {
|
|
347
|
-
"THREAD#CURRENT#META": {
|
|
348
|
-
"PK": "USER#testuser",
|
|
349
|
-
"SK": "THREAD#CURRENT#META",
|
|
350
|
-
"CurrentThreadId": "thread_alpha",
|
|
351
|
-
"ActiveAt": "2026-05-12T00:00:00Z",
|
|
352
|
-
"CreatedAt": "2026-05-12T00:00:00Z",
|
|
353
|
-
"UpdatedAt": "2026-05-12T00:00:00Z",
|
|
354
|
-
"Version": 1,
|
|
355
|
-
},
|
|
356
|
-
"THREAD#thread_alpha#META": {
|
|
357
|
-
"PK": "USER#testuser",
|
|
358
|
-
"SK": "THREAD#thread_alpha#META",
|
|
359
|
-
"Version": 1,
|
|
360
|
-
},
|
|
361
|
-
"THREAD#thread_beta#META": {
|
|
362
|
-
"PK": "USER#testuser",
|
|
363
|
-
"SK": "THREAD#thread_beta#META",
|
|
364
|
-
"Version": 1,
|
|
365
|
-
},
|
|
366
|
-
}
|
|
367
|
-
item = items.get(sk)
|
|
368
|
-
return {"Item": item} if item else {}
|
|
369
|
-
|
|
370
|
-
table.get_item.side_effect = raw_get_side_effect
|
|
371
|
-
|
|
372
|
-
query_results = [
|
|
373
|
-
{
|
|
374
|
-
"Items": [
|
|
375
|
-
{
|
|
376
|
-
"PK": "USER#testuser",
|
|
377
|
-
"SK": "THREAD#thread_alpha#AGENT#agent_a#STATE",
|
|
378
|
-
"ThreadId": "thread_alpha",
|
|
379
|
-
"AgentId": "agent_a",
|
|
380
|
-
"Status": "active",
|
|
381
|
-
"Version": 1,
|
|
382
|
-
}
|
|
383
|
-
]
|
|
384
|
-
},
|
|
385
|
-
{
|
|
386
|
-
"Items": [
|
|
387
|
-
{
|
|
388
|
-
"PK": "USER#testuser",
|
|
389
|
-
"SK": "THREAD#thread_beta#AGENT#agent_b#STATE",
|
|
390
|
-
"ThreadId": "thread_beta",
|
|
391
|
-
"AgentId": "agent_b",
|
|
392
|
-
"Status": "suspended",
|
|
393
|
-
"Version": 1,
|
|
394
|
-
}
|
|
395
|
-
]
|
|
396
|
-
},
|
|
397
|
-
]
|
|
398
|
-
table.query.side_effect = query_results
|
|
399
|
-
|
|
400
|
-
result = client.switch_thread("thread_beta")
|
|
401
|
-
|
|
402
|
-
assert result["thread_id"] == "thread_beta"
|
|
403
|
-
writes = client._client.transact_write_items.call_args.kwargs["TransactItems"]
|
|
404
|
-
serialised = [w["Put"]["Item"] for w in writes if w["Put"]["Item"]["SK"]["S"].endswith("#STATE")]
|
|
405
|
-
statuses = {item["SK"]["S"]: item["Status"]["S"] for item in serialised}
|
|
406
|
-
assert statuses["THREAD#thread_alpha#AGENT#agent_a#STATE"] == "suspended"
|
|
407
|
-
assert statuses["THREAD#thread_beta#AGENT#agent_b#STATE"] == "active"
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
# ---------------------------------------------------------------------------
|
|
411
|
-
# AC5: Version increments on every write
|
|
412
|
-
# ---------------------------------------------------------------------------
|
|
413
|
-
|
|
414
|
-
def test_version_starts_at_1_for_new_item():
|
|
415
|
-
client, table = make_client()
|
|
416
|
-
table.get_item.return_value = {}
|
|
417
|
-
table.put_item.return_value = {}
|
|
418
|
-
|
|
419
|
-
result = client.put_item(sk="THREAD#t1#META", attributes={})
|
|
420
|
-
assert result["Version"] == 1
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
def test_version_increments_on_subsequent_writes():
|
|
424
|
-
client, table = make_client()
|
|
425
|
-
existing = {
|
|
426
|
-
"PK": "USER#testuser",
|
|
427
|
-
"SK": "THREAD#t1#META",
|
|
428
|
-
"CreatedAt": "2026-01-01T00:00:00Z",
|
|
429
|
-
"UpdatedAt": "2026-01-01T00:00:00Z",
|
|
430
|
-
"Version": 7,
|
|
431
|
-
}
|
|
432
|
-
table.get_item.return_value = {"Item": existing}
|
|
433
|
-
table.put_item.return_value = {}
|
|
434
|
-
|
|
435
|
-
result = client.put_item(sk="THREAD#t1#META", attributes={})
|
|
436
|
-
assert result["Version"] == 8
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
# ---------------------------------------------------------------------------
|
|
440
|
-
# ISO 8601 date format
|
|
441
|
-
# ---------------------------------------------------------------------------
|
|
442
|
-
|
|
443
|
-
def test_created_at_is_iso_8601_utc():
|
|
444
|
-
import re
|
|
445
|
-
client, table = make_client()
|
|
446
|
-
table.get_item.return_value = {}
|
|
447
|
-
table.put_item.return_value = {}
|
|
448
|
-
|
|
449
|
-
result = client.put_item(sk="THREAD#t1#META", attributes={})
|
|
450
|
-
# Must match YYYY-MM-DDTHH:MM:SSZ
|
|
451
|
-
assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$", result["CreatedAt"])
|
|
452
|
-
assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$", result["UpdatedAt"])
|