tylor-mcp 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/bin/tylor.js +0 -1
  2. package/package.json +5 -2
  3. package/pytest.ini +2 -2
  4. package/scripts/dev-sync.js +113 -0
  5. package/server/tools/agents.py +29 -12
  6. package/server/tools/executor.py +46 -12
  7. package/server/tools/harness.py +174 -72
  8. package/server/tools/security.py +141 -0
  9. package/skills/tylor-run/SKILL.md +61 -0
  10. package/server/server.log +0 -1
  11. package/server/storage/tests/__init__.py +0 -0
  12. package/server/storage/tests/test_dynamo.py +0 -452
  13. package/server/storage/tests/test_json_store.py +0 -226
  14. package/server/storage/tests/test_opensearch.py +0 -270
  15. package/server/storage/tests/test_s3.py +0 -125
  16. package/server/tests/__init__.py +0 -0
  17. package/server/tests/test_install.py +0 -620
  18. package/server/tests/test_isolation.py +0 -90
  19. package/server/tests/test_ui_server.py +0 -423
  20. package/server/tests/test_ui_shader_background.py +0 -49
  21. package/server/tests/test_ui_story_6_3.py +0 -98
  22. package/server/tools/tests/__init__.py +0 -0
  23. package/server/tools/tests/test_agents.py +0 -259
  24. package/server/tools/tests/test_code_index.py +0 -108
  25. package/server/tools/tests/test_ecc_tools.py +0 -51
  26. package/server/tools/tests/test_executor.py +0 -623
  27. package/server/tools/tests/test_help_agent101.py +0 -156
  28. package/server/tools/tests/test_hooks.py +0 -124
  29. package/server/tools/tests/test_kill_thread.py +0 -125
  30. package/server/tools/tests/test_new_thread_list_threads.py +0 -293
  31. package/server/tools/tests/test_personas.py +0 -52
  32. package/server/tools/tests/test_recall_memory.py +0 -55
  33. package/server/tools/tests/test_registry_client.py +0 -322
  34. package/server/tools/tests/test_router.py +0 -263
  35. package/server/tools/tests/test_skill_installer.py +0 -193
  36. package/server/tools/tests/test_spawn_agent_harness.py +0 -225
  37. package/server/tools/tests/test_switch_thread.py +0 -163
  38. package/server/tools/tests/test_thread_command_skills.py +0 -60
  39. package/server/tools/tests/test_thread_resolver.py +0 -165
  40. package/server/tools/tests/test_tier1_schema.py +0 -310
@@ -1,5 +1,6 @@
1
1
  """Bumblebee security gate and risky execution guard."""
2
2
  from __future__ import annotations
3
+ import asyncio
3
4
  import json
4
5
  import os
5
6
  import re
@@ -160,3 +161,143 @@ def validate_skill_package(source_path: str | Path) -> None:
160
161
  "Bumblebee security gate blocked skill installation because the scan detected risky exposure. "
161
162
  f"stdout: {stdout} stderr: {stderr}"
162
163
  )
164
+
165
+
166
+ # ── Dependency file watcher ───────────────────────────────────────────────────
167
+
168
+ DEP_FILES: frozenset[str] = frozenset({
169
+ "requirements.txt",
170
+ "package.json",
171
+ "package-lock.json",
172
+ "yarn.lock",
173
+ "poetry.lock",
174
+ "Pipfile.lock",
175
+ "pyproject.toml",
176
+ })
177
+
178
+ PUSH_PATTERNS: list[re.Pattern] = [
179
+ re.compile(r"\bgit\s+push\b", re.I),
180
+ ]
181
+
182
+
183
+ def is_dep_file(path: str) -> bool:
184
+ return Path(path).name in DEP_FILES
185
+
186
+
187
+ def should_prompt_on_push(command: str) -> bool:
188
+ return any(p.search(command) for p in PUSH_PATTERNS)
189
+
190
+
191
+ # ── Async package scanners ────────────────────────────────────────────────────
192
+
193
+ async def _run_scanner_async(args: list[str], cwd: str, timeout: int = 60) -> tuple[int, str, str]:
194
+ try:
195
+ result = await asyncio.to_thread(
196
+ subprocess.run,
197
+ args,
198
+ cwd=cwd,
199
+ capture_output=True,
200
+ text=True,
201
+ timeout=timeout,
202
+ check=False,
203
+ )
204
+ return result.returncode, result.stdout.strip(), result.stderr.strip()
205
+ except (OSError, subprocess.TimeoutExpired, FileNotFoundError) as exc:
206
+ return -1, "", str(exc)
207
+
208
+
209
+ def _parse_pip_audit(stdout: str) -> list[str]:
210
+ try:
211
+ data = json.loads(stdout)
212
+ except (json.JSONDecodeError, ValueError):
213
+ return []
214
+ findings: list[str] = []
215
+ deps = data.get("dependencies", []) if isinstance(data, dict) else (data if isinstance(data, list) else [])
216
+ for dep in deps:
217
+ for vuln in dep.get("vulns", []):
218
+ pkg = dep.get("name", "?")
219
+ ver = dep.get("version", "?")
220
+ vid = vuln.get("id", "?")
221
+ desc = (vuln.get("description") or "")[:120]
222
+ findings.append(f"{pkg}=={ver} [{vid}]: {desc}")
223
+ return findings
224
+
225
+
226
+ def _parse_npm_audit(stdout: str) -> list[str]:
227
+ try:
228
+ data = json.loads(stdout)
229
+ except (json.JSONDecodeError, ValueError):
230
+ return []
231
+ findings: list[str] = []
232
+ vulns = data.get("vulnerabilities", {})
233
+ for name, info in (vulns.items() if isinstance(vulns, dict) else []):
234
+ severity = info.get("severity", "?")
235
+ via = info.get("via", [])
236
+ title = (via[0].get("title", "") if via and isinstance(via[0], dict) else "") or name
237
+ findings.append(f"{name} ({severity}): {title}")
238
+ if len(findings) >= 20:
239
+ break
240
+ return findings
241
+
242
+
243
+ def _parse_bumblebee_findings(stdout: str) -> list[str]:
244
+ parsed = _parse_bumblebee_output(stdout)
245
+ if not _is_risky_scan_result(parsed):
246
+ return []
247
+ if isinstance(parsed, dict):
248
+ for key in ("findings", "issues", "alerts", "warnings", "violations"):
249
+ items = parsed.get(key)
250
+ if isinstance(items, list) and items:
251
+ return [str(item)[:200] for item in items[:10]]
252
+ return ["bumblebee detected risky exposure"]
253
+
254
+
255
+ def _parse_safety(stdout: str) -> list[str]:
256
+ try:
257
+ data = json.loads(stdout)
258
+ except (json.JSONDecodeError, ValueError):
259
+ return []
260
+ findings: list[str] = []
261
+ for item in (data if isinstance(data, list) else [])[:20]:
262
+ if isinstance(item, list) and len(item) >= 4:
263
+ pkg, _, installed, desc = item[0], item[1], item[2], item[3]
264
+ findings.append(f"{pkg}=={installed}: {str(desc)[:120]}")
265
+ return findings
266
+
267
+
268
+ async def scan_packages_async(cwd: str | None = None) -> list[str]:
269
+ """
270
+ Non-blocking vulnerability scan. Tries pip-audit → npm audit → bumblebee → safety.
271
+ Returns deduplicated finding strings. Never raises — errors are swallowed silently.
272
+ """
273
+ cwd_str = str(Path(cwd or os.getcwd()).expanduser())
274
+ findings: list[str] = []
275
+
276
+ if shutil.which("pip-audit"):
277
+ rc, out, _ = await _run_scanner_async(["pip-audit", "--format=json", "-q"], cwd_str)
278
+ if out:
279
+ findings.extend(_parse_pip_audit(out))
280
+
281
+ if (Path(cwd_str) / "package.json").exists() and shutil.which("npm"):
282
+ _, out, _ = await _run_scanner_async(["npm", "audit", "--json"], cwd_str)
283
+ if out:
284
+ findings.extend(_parse_npm_audit(out))
285
+
286
+ bb = _bumblebee_path()
287
+ if bb:
288
+ rc, out, _ = await _run_scanner_async([bb, "scan", "--json"], cwd_str)
289
+ if out:
290
+ findings.extend(_parse_bumblebee_findings(out))
291
+
292
+ if not shutil.which("pip-audit") and shutil.which("safety"):
293
+ rc, out, _ = await _run_scanner_async(["safety", "check", "--json"], cwd_str)
294
+ if out:
295
+ findings.extend(_parse_safety(out))
296
+
297
+ seen: set[str] = set()
298
+ deduped: list[str] = []
299
+ for f in findings:
300
+ if f not in seen:
301
+ seen.add(f)
302
+ deduped.append(f)
303
+ return deduped
@@ -0,0 +1,61 @@
1
+ ---
2
+ name: tylor-run
3
+ description: Activate Tylor to handle a task. Routes the request through the full Tylor harness — intent classification, skill auto-loading, multi-agent orchestration, and Bumblebee security scanning. All responses are labelled Tylor: so you always know the plugin is running, not native Claude/Codex/Antigravity.
4
+ ---
5
+
6
+ # /Tylor_run
7
+
8
+ Use when the user invokes `/Tylor_run <task>` or explicitly wants Tylor to handle a request.
9
+
10
+ ## How it works
11
+
12
+ 1. Resolve the active thread ID — call `list_threads()` and pick the most recently active thread, or use one the user has already switched to.
13
+ 2. Call `run_in_thread(thread_id=<active_thread_id>, message=<user_task>, cwd=<project_dir>)`.
14
+ 3. Stream the result back exactly as returned — do not paraphrase or summarise it. The response already starts with `Tylor:` so the user can see the plugin is running.
15
+
16
+ ## What Tylor does automatically (no commands needed)
17
+
18
+ - Classifies intent and selects the right role(s): researcher, implementer, reviewer, planner, drafter
19
+ - Auto-loads ECC skill groups (ecc/web, ecc/data, ecc/presentation, ecc/diagrams, ecc/pipeline) based on the task
20
+ - Runs a Bumblebee session-start security scan on first use in a thread
21
+ - Watches for dependency file changes and scans them inline
22
+ - Prompts before git push
23
+ - Spawns multiple agents in parallel when the task warrants it
24
+ - Streams a live orchestration log so the user can follow what is happening
25
+
26
+ ## Example interactions
27
+
28
+ ```
29
+ /Tylor_run research the top 3 CI/CD tools and draft a comparison doc
30
+ → Tylor:
31
+ [agent101] intent classified: researcher, drafter
32
+ [agent101] auto-loaded skill: ecc/web (web_scrape, web_fetch)
33
+ [agent101] auto-loaded skill: ecc/presentation (build_doc)
34
+ [bumblebee] starting session-start package scan...
35
+ [agent: researcher #1] starting — gather CI/CD tool comparisons
36
+ $ web_fetch
37
+ $ web_fetch
38
+ [agent: researcher #1] done
39
+ [agent: drafter #2] starting — build comparison doc from research
40
+ $ build_doc
41
+ [agent: drafter #2] done
42
+ [bumblebee] ✅ session-start scan — no vulnerabilities found
43
+ [supervisor] complete — 2 agents ran
44
+
45
+ /Tylor_run fix the failing auth tests
46
+ → Tylor:
47
+ [agent101] intent classified: implementer
48
+ [agent: implementer #1] starting — fix failing auth tests
49
+ $ Read
50
+ $ Edit
51
+ $ Bash
52
+ [agent: implementer #1] done
53
+ [supervisor] complete — 1 agent ran
54
+ ```
55
+
56
+ ## Notes
57
+
58
+ - The `Tylor:` label at the top of every response confirms the plugin is active — not native Claude Code, Codex, or Antigravity
59
+ - Works identically across all supported platforms: Claude Code CLI, Claude Desktop, GitHub Copilot CLI, Antigravity
60
+ - If no thread is active, suggest the user run `/new-thread <name>` first
61
+ - Pass `cwd` as the current project directory so agents have filesystem context
package/server/server.log DELETED
@@ -1 +0,0 @@
1
- zsh: command not found: python
File without changes
@@ -1,452 +0,0 @@
1
- """
2
- Tests for Story 2.1: DynamoDB Storage Client
3
- Run: pytest server/storage/tests/test_dynamo.py -v
4
- """
5
- import sys
6
- from pathlib import Path
7
- from unittest.mock import MagicMock, patch, call
8
-
9
- import pytest
10
-
11
- PLUGIN_DIR = Path(__file__).parent.parent.parent.parent
12
- sys.path.insert(0, str(PLUGIN_DIR))
13
-
14
- from mcp.server.fastmcp.exceptions import ToolError
15
- from server.storage.dynamo import DynamoClient, ITEM_SIZE_LIMIT
16
-
17
-
18
- # ---------------------------------------------------------------------------
19
- # Helpers
20
- # ---------------------------------------------------------------------------
21
-
22
- def make_client(mock_table=None):
23
- """Return a DynamoClient with a mocked DynamoDB table."""
24
- with patch("boto3.Session") as mock_session_cls:
25
- mock_session = MagicMock()
26
- mock_session_cls.return_value = mock_session
27
- resource = MagicMock()
28
- mock_session.resource.return_value = resource
29
- table = mock_table or MagicMock()
30
- resource.Table.return_value = table
31
- client = DynamoClient(table_name="agent101", user_id="testuser")
32
- client.table = table # keep reference for assertions
33
- return client, table
34
-
35
-
36
- # ---------------------------------------------------------------------------
37
- # AC1: Initialises boto3 on import with configured profile
38
- # ---------------------------------------------------------------------------
39
-
40
- def test_init_creates_boto3_session_no_profile():
41
- with patch("boto3.Session") as mock_session_cls:
42
- mock_session = MagicMock()
43
- mock_session_cls.return_value = mock_session
44
- resource = MagicMock()
45
- mock_session.resource.return_value = resource
46
- resource.Table.return_value = MagicMock()
47
-
48
- DynamoClient(table_name="agent101", user_id="u1")
49
-
50
- mock_session_cls.assert_called_once_with()
51
- mock_session.resource.assert_called_once_with("dynamodb")
52
-
53
-
54
- def test_init_creates_boto3_session_with_profile():
55
- with patch("boto3.Session") as mock_session_cls:
56
- mock_session = MagicMock()
57
- mock_session_cls.return_value = mock_session
58
- resource = MagicMock()
59
- mock_session.resource.return_value = resource
60
- resource.Table.return_value = MagicMock()
61
-
62
- DynamoClient(table_name="agent101", user_id="u1", profile="myprofile")
63
-
64
- mock_session_cls.assert_called_once_with(profile_name="myprofile")
65
-
66
-
67
- # ---------------------------------------------------------------------------
68
- # AC2: put_item writes mandatory base fields for ≤400KB item
69
- # ---------------------------------------------------------------------------
70
-
71
- def test_put_item_injects_mandatory_fields():
72
- client, table = make_client()
73
- table.get_item.return_value = {} # item doesn't exist yet
74
- table.put_item.return_value = {}
75
-
76
- sk = "THREAD#t1#META"
77
- result = client.put_item(sk=sk, attributes={"Name": "alpha"})
78
-
79
- assert result["PK"] == "USER#testuser"
80
- assert result["SK"] == sk
81
- assert "CreatedAt" in result
82
- assert "UpdatedAt" in result
83
- assert isinstance(result["Version"], int)
84
- assert result["Version"] == 1
85
- # Verify DynamoDB was actually called
86
- table.put_item.assert_called_once()
87
-
88
-
89
- def test_put_item_passes_correct_item_to_dynamo():
90
- client, table = make_client()
91
- table.get_item.return_value = {}
92
- table.put_item.return_value = {}
93
-
94
- result = client.put_item(sk="THREAD#t1#META", attributes={"Name": "beta"})
95
-
96
- written = table.put_item.call_args.kwargs["Item"]
97
- assert written["Name"] == "beta"
98
- assert written["PK"] == "USER#testuser"
99
-
100
-
101
- def test_put_item_preserves_created_at_on_update():
102
- """UpdatedAt changes; CreatedAt stays the same on subsequent writes."""
103
- client, table = make_client()
104
- # Simulate existing item
105
- existing = {
106
- "PK": "USER#testuser",
107
- "SK": "THREAD#t1#META",
108
- "CreatedAt": "2026-01-01T00:00:00Z",
109
- "UpdatedAt": "2026-01-01T00:00:00Z",
110
- "Version": 3,
111
- "Name": "old",
112
- }
113
- table.get_item.return_value = {"Item": existing}
114
- table.put_item.return_value = {}
115
-
116
- result = client.put_item(sk="THREAD#t1#META", attributes={"Name": "new"})
117
-
118
- assert result["CreatedAt"] == "2026-01-01T00:00:00Z"
119
- assert result["Version"] == 4 # incremented
120
-
121
-
122
- # ---------------------------------------------------------------------------
123
- # AC3: put_item rejects items > 400KB
124
- # ---------------------------------------------------------------------------
125
-
126
- def test_put_item_rejects_oversized_item():
127
- client, table = make_client()
128
- table.get_item.return_value = {}
129
-
130
- big_content = "x" * (ITEM_SIZE_LIMIT + 10_000)
131
- with pytest.raises(ToolError, match="400KB"):
132
- client.put_item(sk="THREAD#t1#MSG#ts", attributes={"Content": big_content})
133
-
134
- table.put_item.assert_not_called()
135
-
136
-
137
- def test_put_item_accepts_item_at_size_limit():
138
- """Item at exactly ITEM_SIZE_LIMIT should not raise (boundary check)."""
139
- client, table = make_client()
140
- table.get_item.return_value = {}
141
- table.put_item.return_value = {}
142
-
143
- # A small item well under 400KB should pass
144
- small = {"Content": "x" * 100}
145
- result = client.put_item(sk="THREAD#t1#META", attributes=small)
146
- assert result["Version"] == 1
147
-
148
-
149
- # ---------------------------------------------------------------------------
150
- # AC4: Thread isolation enforced on get_item, query_thread, delete_item
151
- # ---------------------------------------------------------------------------
152
-
153
- def test_get_item_raises_on_isolation_violation():
154
- client, table = make_client()
155
- with pytest.raises(ToolError, match="Thread isolation violation"):
156
- client.get_item(thread_id="t1", sk="THREAD#t2#META")
157
-
158
-
159
- def test_get_item_passes_when_sk_matches_thread():
160
- client, table = make_client()
161
- table.get_item.return_value = {"Item": {"PK": "USER#testuser", "SK": "THREAD#t1#META"}}
162
-
163
- result = client.get_item(thread_id="t1", sk="THREAD#t1#META")
164
- assert result is not None
165
-
166
-
167
- def test_get_item_returns_none_when_not_found():
168
- client, table = make_client()
169
- table.get_item.return_value = {} # no "Item" key
170
-
171
- result = client.get_item(thread_id="t1", sk="THREAD#t1#META")
172
- assert result is None
173
-
174
-
175
- def test_query_thread_raises_on_isolation_violation():
176
- client, table = make_client()
177
- with pytest.raises(ToolError, match="Thread isolation violation"):
178
- client.query_thread(thread_id="t1", sk_prefix="THREAD#t2#MSG")
179
-
180
-
181
- def test_query_thread_passes_correct_prefix():
182
- client, table = make_client()
183
- table.query.return_value = {"Items": [{"SK": "THREAD#t1#MSG#001"}]}
184
-
185
- items = client.query_thread(thread_id="t1", sk_prefix="THREAD#t1#MSG")
186
- assert len(items) == 1
187
- table.query.assert_called_once()
188
-
189
-
190
- def test_delete_item_raises_on_isolation_violation():
191
- client, table = make_client()
192
- with pytest.raises(ToolError, match="Thread isolation violation"):
193
- client.delete_item(thread_id="t1", sk="THREAD#t2#META")
194
-
195
-
196
- def test_delete_item_calls_dynamo_delete():
197
- client, table = make_client()
198
- table.delete_item.return_value = {}
199
-
200
- client.delete_item(thread_id="t1", sk="THREAD#t1#META")
201
-
202
- table.delete_item.assert_called_once_with(
203
- Key={"PK": "USER#testuser", "SK": "THREAD#t1#META"}
204
- )
205
-
206
-
207
- # ---------------------------------------------------------------------------
208
- # Story 3.3: Sub-agent output and handoff persistence
209
- # ---------------------------------------------------------------------------
210
-
211
- def test_put_agent_output_writes_expected_thread_scoped_sk():
212
- client, table = make_client()
213
- table.get_item.return_value = {}
214
- table.put_item.return_value = {}
215
-
216
- item = client.put_agent_output(
217
- thread_id="t1",
218
- agent_id="agent_a",
219
- output="Agent A completed analysis.",
220
- task="Analyze risk.",
221
- )
222
-
223
- assert item["SK"].startswith("THREAD#t1#AGENT#agent_a#OUT#")
224
- assert item["ThreadId"] == "t1"
225
- assert item["AgentId"] == "agent_a"
226
- assert item["Type"] == "agent_output"
227
- assert item["Output"] == "Agent A completed analysis."
228
- assert item["Task"] == "Analyze risk."
229
-
230
-
231
- def test_put_agent_output_generates_unique_sk_for_rapid_writes():
232
- client, table = make_client()
233
- table.get_item.return_value = {}
234
- table.put_item.return_value = {}
235
-
236
- first = client.put_agent_output(
237
- thread_id="t1",
238
- agent_id="agent_a",
239
- output="First output.",
240
- )
241
- second = client.put_agent_output(
242
- thread_id="t1",
243
- agent_id="agent_a",
244
- output="Second output.",
245
- )
246
-
247
- assert first["SK"] != second["SK"]
248
- assert first["SK"].startswith("THREAD#t1#AGENT#agent_a#OUT#")
249
- assert second["SK"].startswith("THREAD#t1#AGENT#agent_a#OUT#")
250
-
251
-
252
- def test_put_agent_handoff_writes_distinct_thread_scoped_sk():
253
- client, table = make_client()
254
- table.get_item.return_value = {}
255
- table.put_item.return_value = {}
256
- handoff = {"next_agent": "agent_b", "summary": "Carry this forward."}
257
-
258
- item = client.put_agent_handoff(
259
- thread_id="t1",
260
- agent_id="agent_a",
261
- handoff_state=handoff,
262
- )
263
-
264
- assert item["SK"].startswith("THREAD#t1#AGENT#agent_a#HANDOFF#")
265
- assert item["ThreadId"] == "t1"
266
- assert item["AgentId"] == "agent_a"
267
- assert item["Type"] == "agent_handoff"
268
- assert item["HandoffState"] == handoff
269
-
270
-
271
- def test_put_agent_handoff_generates_unique_sk_for_rapid_writes():
272
- client, table = make_client()
273
- table.get_item.return_value = {}
274
- table.put_item.return_value = {}
275
-
276
- first = client.put_agent_handoff(
277
- thread_id="t1",
278
- agent_id="agent_a",
279
- handoff_state={"step": 1},
280
- )
281
- second = client.put_agent_handoff(
282
- thread_id="t1",
283
- agent_id="agent_a",
284
- handoff_state={"step": 2},
285
- )
286
-
287
- assert first["SK"] != second["SK"]
288
- assert first["SK"].startswith("THREAD#t1#AGENT#agent_a#HANDOFF#")
289
- assert second["SK"].startswith("THREAD#t1#AGENT#agent_a#HANDOFF#")
290
-
291
-
292
- def test_put_agent_state_writes_thread_scoped_state_record():
293
- client, table = make_client()
294
- table.get_item.return_value = {}
295
- table.put_item.return_value = {}
296
-
297
- item = client.put_agent_state(
298
- thread_id="t1",
299
- agent_id="agent_a",
300
- state={
301
- "Status": "active",
302
- "Persona": "analyst",
303
- "Task": "Analyze risk.",
304
- "ToolsLoaded": ["ecc/web", "ecc/data"],
305
- },
306
- )
307
-
308
- assert item["SK"] == "THREAD#t1#AGENT#agent_a#STATE"
309
- assert item["ThreadId"] == "t1"
310
- assert item["AgentId"] == "agent_a"
311
- assert item["Type"] == "agent_state"
312
- assert item["Status"] == "active"
313
- assert item["Persona"] == "analyst"
314
-
315
-
316
- def test_query_agent_states_is_scoped_to_thread_prefix():
317
- client, table = make_client()
318
- table.query.return_value = {
319
- "Items": [
320
- {"SK": "THREAD#t1#AGENT#agent_a#STATE", "ThreadId": "t1"},
321
- ]
322
- }
323
-
324
- result = client.query_agent_states("t1")
325
-
326
- assert result == [{"SK": "THREAD#t1#AGENT#agent_a#STATE", "ThreadId": "t1"}]
327
- table.query.assert_called_once()
328
-
329
-
330
- def test_agent_output_rejects_cross_thread_sk_injection():
331
- client, table = make_client()
332
- with pytest.raises(ToolError, match="Invalid agent_id"):
333
- client.put_agent_output(
334
- thread_id="t1",
335
- agent_id="THREAD#t2#AGENT#agent_a",
336
- output="bad",
337
- )
338
- table.put_item.assert_not_called()
339
-
340
-
341
- def test_switch_thread_suspends_previous_agents_and_resumes_target_agents():
342
- client, table = make_client()
343
-
344
- def raw_get_side_effect(Key):
345
- sk = Key["SK"]
346
- items = {
347
- "THREAD#CURRENT#META": {
348
- "PK": "USER#testuser",
349
- "SK": "THREAD#CURRENT#META",
350
- "CurrentThreadId": "thread_alpha",
351
- "ActiveAt": "2026-05-12T00:00:00Z",
352
- "CreatedAt": "2026-05-12T00:00:00Z",
353
- "UpdatedAt": "2026-05-12T00:00:00Z",
354
- "Version": 1,
355
- },
356
- "THREAD#thread_alpha#META": {
357
- "PK": "USER#testuser",
358
- "SK": "THREAD#thread_alpha#META",
359
- "Version": 1,
360
- },
361
- "THREAD#thread_beta#META": {
362
- "PK": "USER#testuser",
363
- "SK": "THREAD#thread_beta#META",
364
- "Version": 1,
365
- },
366
- }
367
- item = items.get(sk)
368
- return {"Item": item} if item else {}
369
-
370
- table.get_item.side_effect = raw_get_side_effect
371
-
372
- query_results = [
373
- {
374
- "Items": [
375
- {
376
- "PK": "USER#testuser",
377
- "SK": "THREAD#thread_alpha#AGENT#agent_a#STATE",
378
- "ThreadId": "thread_alpha",
379
- "AgentId": "agent_a",
380
- "Status": "active",
381
- "Version": 1,
382
- }
383
- ]
384
- },
385
- {
386
- "Items": [
387
- {
388
- "PK": "USER#testuser",
389
- "SK": "THREAD#thread_beta#AGENT#agent_b#STATE",
390
- "ThreadId": "thread_beta",
391
- "AgentId": "agent_b",
392
- "Status": "suspended",
393
- "Version": 1,
394
- }
395
- ]
396
- },
397
- ]
398
- table.query.side_effect = query_results
399
-
400
- result = client.switch_thread("thread_beta")
401
-
402
- assert result["thread_id"] == "thread_beta"
403
- writes = client._client.transact_write_items.call_args.kwargs["TransactItems"]
404
- serialised = [w["Put"]["Item"] for w in writes if w["Put"]["Item"]["SK"]["S"].endswith("#STATE")]
405
- statuses = {item["SK"]["S"]: item["Status"]["S"] for item in serialised}
406
- assert statuses["THREAD#thread_alpha#AGENT#agent_a#STATE"] == "suspended"
407
- assert statuses["THREAD#thread_beta#AGENT#agent_b#STATE"] == "active"
408
-
409
-
410
- # ---------------------------------------------------------------------------
411
- # AC5: Version increments on every write
412
- # ---------------------------------------------------------------------------
413
-
414
- def test_version_starts_at_1_for_new_item():
415
- client, table = make_client()
416
- table.get_item.return_value = {}
417
- table.put_item.return_value = {}
418
-
419
- result = client.put_item(sk="THREAD#t1#META", attributes={})
420
- assert result["Version"] == 1
421
-
422
-
423
- def test_version_increments_on_subsequent_writes():
424
- client, table = make_client()
425
- existing = {
426
- "PK": "USER#testuser",
427
- "SK": "THREAD#t1#META",
428
- "CreatedAt": "2026-01-01T00:00:00Z",
429
- "UpdatedAt": "2026-01-01T00:00:00Z",
430
- "Version": 7,
431
- }
432
- table.get_item.return_value = {"Item": existing}
433
- table.put_item.return_value = {}
434
-
435
- result = client.put_item(sk="THREAD#t1#META", attributes={})
436
- assert result["Version"] == 8
437
-
438
-
439
- # ---------------------------------------------------------------------------
440
- # ISO 8601 date format
441
- # ---------------------------------------------------------------------------
442
-
443
- def test_created_at_is_iso_8601_utc():
444
- import re
445
- client, table = make_client()
446
- table.get_item.return_value = {}
447
- table.put_item.return_value = {}
448
-
449
- result = client.put_item(sk="THREAD#t1#META", attributes={})
450
- # Must match YYYY-MM-DDTHH:MM:SSZ
451
- assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$", result["CreatedAt"])
452
- assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$", result["UpdatedAt"])