tylor-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/.aws-setup.sh +25 -0
  2. package/.claude-plugin/plugin.json +22 -0
  3. package/.mcp.json +12 -0
  4. package/AGENTS.md +93 -0
  5. package/CLAUDE.md +99 -0
  6. package/CLAUDE_PLATFORM_AWS_SETUP.md +105 -0
  7. package/LICENSE +21 -0
  8. package/README.md +146 -0
  9. package/assets/tylor_logo.png +0 -0
  10. package/assets/tylor_threads_concept.png +0 -0
  11. package/bin/tylor.js +23 -0
  12. package/hooks/kill-thread-trigger.sh +7 -0
  13. package/hooks/post-tool-use-code-index.sh +7 -0
  14. package/hooks/session-checkpoint.sh +7 -0
  15. package/hooks/session-start.sh +7 -0
  16. package/install.py +401 -0
  17. package/install.sh +260 -0
  18. package/package.json +24 -0
  19. package/pytest.ini +2 -0
  20. package/registry.json +26 -0
  21. package/server/.env.example +24 -0
  22. package/server/__init__.py +0 -0
  23. package/server/config.py +89 -0
  24. package/server/main.py +93 -0
  25. package/server/personas/analyst.md +15 -0
  26. package/server/personas/ceo.md +14 -0
  27. package/server/personas/code_agent.md +15 -0
  28. package/server/personas/cto.md +14 -0
  29. package/server/provision.py +260 -0
  30. package/server/provision_opensearch.py +154 -0
  31. package/server/requirements.txt +26 -0
  32. package/server/storage/__init__.py +0 -0
  33. package/server/storage/dynamo.py +399 -0
  34. package/server/storage/json_store.py +359 -0
  35. package/server/storage/opensearch.py +194 -0
  36. package/server/storage/s3.py +96 -0
  37. package/server/storage/tests/__init__.py +0 -0
  38. package/server/storage/tests/test_dynamo.py +452 -0
  39. package/server/storage/tests/test_json_store.py +226 -0
  40. package/server/storage/tests/test_opensearch.py +270 -0
  41. package/server/storage/tests/test_s3.py +125 -0
  42. package/server/tests/__init__.py +0 -0
  43. package/server/tests/test_install.py +606 -0
  44. package/server/tests/test_isolation.py +90 -0
  45. package/server/tests/test_ui_server.py +385 -0
  46. package/server/tests/test_ui_shader_background.py +52 -0
  47. package/server/tests/test_ui_story_6_3.py +105 -0
  48. package/server/tools/__init__.py +0 -0
  49. package/server/tools/_mcp.py +4 -0
  50. package/server/tools/agents.py +160 -0
  51. package/server/tools/ecc/__init__.py +1 -0
  52. package/server/tools/ecc/data.py +35 -0
  53. package/server/tools/ecc/diagrams.py +23 -0
  54. package/server/tools/ecc/pipeline.py +24 -0
  55. package/server/tools/ecc/presentation.py +24 -0
  56. package/server/tools/ecc/web.py +23 -0
  57. package/server/tools/executor.py +880 -0
  58. package/server/tools/harness.py +330 -0
  59. package/server/tools/help.py +162 -0
  60. package/server/tools/hooks.py +357 -0
  61. package/server/tools/personas.py +110 -0
  62. package/server/tools/registry.py +195 -0
  63. package/server/tools/router.py +117 -0
  64. package/server/tools/skill_installer.py +230 -0
  65. package/server/tools/summarizer.py +168 -0
  66. package/server/tools/tests/__init__.py +0 -0
  67. package/server/tools/tests/test_agents.py +246 -0
  68. package/server/tools/tests/test_code_index.py +108 -0
  69. package/server/tools/tests/test_ecc_tools.py +51 -0
  70. package/server/tools/tests/test_executor.py +584 -0
  71. package/server/tools/tests/test_help_agent101.py +149 -0
  72. package/server/tools/tests/test_hooks.py +124 -0
  73. package/server/tools/tests/test_kill_thread.py +125 -0
  74. package/server/tools/tests/test_new_thread_list_threads.py +293 -0
  75. package/server/tools/tests/test_personas.py +52 -0
  76. package/server/tools/tests/test_recall_memory.py +55 -0
  77. package/server/tools/tests/test_registry_client.py +308 -0
  78. package/server/tools/tests/test_router.py +263 -0
  79. package/server/tools/tests/test_skill_installer.py +174 -0
  80. package/server/tools/tests/test_switch_thread.py +163 -0
  81. package/server/tools/tests/test_thread_command_skills.py +54 -0
  82. package/server/tools/tests/test_thread_resolver.py +165 -0
  83. package/server/tools/tests/test_tier1_schema.py +296 -0
  84. package/server/tools/thread_resolver.py +75 -0
  85. package/server/tools/tylor.py +374 -0
  86. package/server/tools/ui.py +38 -0
  87. package/server/ui_server.py +292 -0
  88. package/server/validate.py +237 -0
  89. package/skills/add-skill/SKILL.md +37 -0
  90. package/skills/afk-status/SKILL.md +20 -0
  91. package/skills/bmad/SKILL.md +14 -0
  92. package/skills/help-agent101/SKILL.md +48 -0
  93. package/skills/kill-thread/SKILL.md +35 -0
  94. package/skills/list-threads/SKILL.md +35 -0
  95. package/skills/new-thread/SKILL.md +35 -0
  96. package/skills/recall/SKILL.md +39 -0
  97. package/skills/run/SKILL.md +33 -0
  98. package/skills/set-sandbox/SKILL.md +38 -0
  99. package/skills/switch-thread/SKILL.md +38 -0
  100. package/ui/claude-logo.png +0 -0
  101. package/ui/index.html +1314 -0
@@ -0,0 +1,330 @@
1
+ """
2
+ server/tools/harness.py — Agent SDK orchestration harness.
3
+
4
+ 5 roles. Claude brings all domain knowledge. No pre-built domain agents.
5
+ Cricket coach, legal review, architecture — all Claude, right lens, right tools.
6
+ Persistent session memory per thread. Interactive with human-in-the-loop.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from pathlib import Path
12
+ from typing import AsyncIterator
13
+
14
+ from server.tools._mcp import mcp
15
+
16
+ # ── 5 roles (lenses, not knowledge bases) ────────────────────────────────────
17
+
18
+ ROLES = {
19
+ "researcher": {
20
+ "tools": ["WebFetch", "WebSearch", "Read", "Glob", "AskUserQuestion"],
21
+ "lens": (
22
+ "You are in deep research mode. Gather information thoroughly. "
23
+ "Surface options with clear tradeoffs. Ask one focused question at a time. "
24
+ "Never guess — if you need to know something, ask."
25
+ ),
26
+ },
27
+ "implementer": {
28
+ "tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep", "AskUserQuestion"],
29
+ "lens": (
30
+ "You are in implementation mode. Always read the existing code before changing anything. "
31
+ "Write clean, working code. When prototyping during planning: 50 lines max that proves "
32
+ "the concept. When building for real: production quality. "
33
+ "If you spot issues in adjacent areas (e.g. you're fixing backend but see a frontend bug), "
34
+ "flag them proactively."
35
+ ),
36
+ },
37
+ "reviewer": {
38
+ "tools": ["Read", "Glob", "Grep", "AskUserQuestion"],
39
+ "lens": (
40
+ "You are in review mode. Read everything relevant before giving feedback. "
41
+ "Be specific and actionable — no vague 'improve this'. "
42
+ "Surface risks, gaps, and improvements the user hasn't thought of. "
43
+ "If you spot compliance or legal issues, flag them explicitly."
44
+ ),
45
+ },
46
+ "planner": {
47
+ "tools": ["Read", "Write", "Glob", "AskUserQuestion"],
48
+ "lens": (
49
+ "You are in planning mode. Structure the work clearly. "
50
+ "Ask exactly what you need to know before planning — not after. "
51
+ "Break work into concrete steps. Identify dependencies and risks upfront."
52
+ ),
53
+ },
54
+ "drafter": {
55
+ "tools": ["Read", "Write", "AskUserQuestion"],
56
+ "lens": (
57
+ "You are in drafting mode. Produce polished written output: "
58
+ "specs, docs, policies, PRDs, copy, legal documents. "
59
+ "Ask the minimum necessary questions before drafting. "
60
+ "Ask about: who the audience is, what jurisdiction/context applies, "
61
+ "what decisions are already made. Then produce complete, ready-to-use output."
62
+ ),
63
+ },
64
+ }
65
+
66
+
67
+ def _get_bmad_path() -> str | None:
68
+ for p in [
69
+ Path.home() / ".tylor" / "config.json",
70
+ ]:
71
+ if p.exists():
72
+ try:
73
+ cfg = json.loads(p.read_text())
74
+ bmad = cfg.get("bmad_path")
75
+ if bmad and Path(bmad).exists():
76
+ return bmad
77
+ except Exception:
78
+ pass
79
+ for path in [Path.home() / ".tylor" / "bmad",
80
+ Path.home() / ".claude" / "plugins" / "bmad"]:
81
+ if path.exists():
82
+ return str(path)
83
+ return None
84
+
85
+
86
+ def _get_all_threads() -> list[dict]:
87
+ """Get all existing threads so Claude knows what's already open."""
88
+ try:
89
+ from server.tools.tylor import _get_db
90
+ db = _get_db()
91
+ threads = db.list_threads()
92
+ return [{"name": t.get("Name", t.get("name", "")),
93
+ "status": t.get("Status", t.get("status", "")),
94
+ "id": t.get("thread_id", t.get("id", ""))}
95
+ for t in threads if t.get("Name") or t.get("name")]
96
+ except Exception:
97
+ return []
98
+
99
+
100
+ def build_supervisor_prompt(thread_name: str, cwd: str | None, bmad_path: str | None) -> str:
101
+ context_lines = []
102
+ if thread_name:
103
+ context_lines.append(f"Active thread: {thread_name}")
104
+ if cwd:
105
+ context_lines.append(f"Project: {cwd}")
106
+
107
+ # Show all open threads so Claude can reference and suggest new ones
108
+ all_threads = _get_all_threads()
109
+ if all_threads:
110
+ thread_list = ", ".join(
111
+ f"{t['name']} ({t['status']})" for t in all_threads
112
+ if t['name'] != thread_name
113
+ )
114
+ if thread_list:
115
+ context_lines.append(f"Other open threads: {thread_list}")
116
+
117
+ if bmad_path:
118
+ context_lines.append(
119
+ f"BMAD methodology available at {bmad_path} — use for structured PRDs, "
120
+ "architecture docs, epics when the task calls for formal structured output."
121
+ )
122
+ context_block = "\n".join(context_lines)
123
+
124
+ return f"""You are a proactive supervisor in thread: "{thread_name or 'General'}".
125
+ {context_block}
126
+
127
+ You have access to 5 sub-agent roles via the Agent tool:
128
+ - researcher (WebSearch, WebFetch, Read) — gather info, surface options
129
+ - implementer (Read, Write, Edit, Bash) — build, fix, prototype
130
+ - reviewer (Read, Glob, Grep) — audit, feedback, risk
131
+ - planner (Read, Write) — structure, breakdown, roadmap
132
+ - drafter (Read, Write) — docs, specs, policies, PRDs
133
+
134
+ YOU provide all domain knowledge. Cricket, legal, medical, finance — you know it all.
135
+ Roles are just modes of working, not separate knowledge bases.
136
+
137
+ ## Core behaviours
138
+
139
+ **Spawn agents when the task has distinct phases or needs focused execution.**
140
+ Don't spawn for simple answers — just answer directly.
141
+
142
+ **Be proactive.** If you notice something the user hasn't asked about:
143
+ - Architecture ambiguity → spawn reviewer to map tradeoffs before the user commits
144
+ - Implementation unclear → spawn implementer for a quick proof of concept
145
+ - Legal/compliance risk → flag it, offer to draft the required document
146
+ - Cross-thread issue → "this looks like it affects the frontend too — want me to fix it there?"
147
+
148
+ **Ask questions before acting, not after.**
149
+ Use AskUserQuestion when you need information to proceed correctly.
150
+ Ask ONE focused question at a time. Never ask 5 things at once.
151
+
152
+ **Present decisions as options:**
153
+ Format: "Option A: [pros/cons]. Option B: [pros/cons]. I recommend X because Y."
154
+ Then ask for the decision.
155
+
156
+ **Thread memory is your context.**
157
+ Reference prior decisions naturally. Never re-ask what's already been answered.
158
+
159
+ **Cross-thread awareness.**
160
+ You can see all open threads. When a conversation spawns work that belongs in a
161
+ different thread (e.g. frontend design decisions during a PRD discussion), suggest it:
162
+ "This is growing into frontend territory — you may want a Frontend thread for this later."
163
+ Never switch threads yourself — just suggest. The user decides.
164
+
165
+ **Token efficiency.**
166
+ Use the minimum context needed. Don't repeat information already in the thread.
167
+ Brief sub-agents with only what they need — not the entire conversation history.
168
+ """
169
+
170
+
171
+ def build_agent_registry() -> dict:
172
+ try:
173
+ from claude_agent_sdk import AgentDefinition
174
+ except ImportError:
175
+ return {}
176
+
177
+ return {
178
+ name: AgentDefinition(
179
+ description=f"{name}: {role['lens'][:100]}",
180
+ prompt=role["lens"],
181
+ tools=role["tools"],
182
+ )
183
+ for name, role in ROLES.items()
184
+ }
185
+
186
+
187
+ # ── Session persistence ───────────────────────────────────────────────────────
188
+
189
+ def _sessions_file() -> Path:
190
+ return Path.home() / ".tylor" / "sessions.json"
191
+
192
+
193
+ def _load_session_id(thread_id: str) -> str | None:
194
+ f = _sessions_file()
195
+ if not f.exists():
196
+ return None
197
+ try:
198
+ return json.loads(f.read_text()).get(thread_id)
199
+ except Exception:
200
+ return None
201
+
202
+
203
+ def _save_session_id(thread_id: str, session_id: str) -> None:
204
+ f = _sessions_file()
205
+ f.parent.mkdir(parents=True, exist_ok=True)
206
+ data: dict = {}
207
+ if f.exists():
208
+ try:
209
+ data = json.loads(f.read_text())
210
+ except Exception:
211
+ pass
212
+ data[thread_id] = session_id
213
+ f.write_text(json.dumps(data, indent=2))
214
+
215
+
216
+ # ── Core harness ──────────────────────────────────────────────────────────────
217
+
218
+ async def run_with_agents(
219
+ message: str,
220
+ thread_id: str,
221
+ thread_name: str = "",
222
+ cwd: str | None = None,
223
+ ) -> AsyncIterator[str]:
224
+ try:
225
+ from claude_agent_sdk import query, ClaudeAgentOptions
226
+ except ImportError:
227
+ yield "⚠️ Agent SDK not installed. Run: pip install claude-agent-sdk"
228
+ return
229
+
230
+ bmad_path = _get_bmad_path()
231
+ system_prompt = build_supervisor_prompt(thread_name, cwd, bmad_path)
232
+ agent_registry = build_agent_registry()
233
+
234
+ session_id = _load_session_id(thread_id)
235
+
236
+ options = ClaudeAgentOptions(
237
+ system_prompt=system_prompt,
238
+ allowed_tools=[
239
+ "Read", "Write", "Edit", "Bash", "Glob", "Grep",
240
+ "WebFetch", "WebSearch", "AskUserQuestion", "Agent",
241
+ ],
242
+ agents=agent_registry,
243
+ resume=session_id,
244
+ cwd=cwd,
245
+ max_turns=20,
246
+ )
247
+
248
+ new_session_id: str | None = None
249
+ try:
250
+ async for msg in query(prompt=message, options=options):
251
+ # Capture session ID from ResultMessage (end of run)
252
+ sid = getattr(msg, "session_id", None)
253
+ if sid:
254
+ new_session_id = sid
255
+
256
+ # Stream text content from AssistantMessage
257
+ # content is List[TextBlock | ToolUseBlock | ...] or str
258
+ content = getattr(msg, "content", None) or getattr(msg, "text", None)
259
+ if isinstance(content, str) and content:
260
+ yield content
261
+ elif isinstance(content, list):
262
+ for block in content:
263
+ text = getattr(block, "text", None)
264
+ if isinstance(text, str) and text:
265
+ yield text
266
+
267
+ except Exception as exc:
268
+ yield f"\n⚠️ Error: {exc}"
269
+
270
+ if new_session_id:
271
+ _save_session_id(thread_id, new_session_id)
272
+
273
+
274
+ # ── MCP tools ─────────────────────────────────────────────────────────────────
275
+
276
+ @mcp.tool()
277
+ async def run_in_thread(thread_id: str, message: str, cwd: str | None = None) -> str:
278
+ """
279
+ Run a task in a thread using the agent harness.
280
+
281
+ Claude uses its own knowledge for any domain. The harness provides
282
+ 5 roles (researcher, implementer, reviewer, planner, drafter) and
283
+ Claude picks the right one(s) based on the task.
284
+
285
+ Persistent memory: each thread has its own session — full conversation
286
+ history preserved across sessions. Agents are interactive and ask
287
+ focused questions when they need information to proceed.
288
+
289
+ Args:
290
+ thread_id: Active thread ID.
291
+ message: What you want done.
292
+ cwd: Project directory (optional, defaults to current).
293
+ """
294
+ thread_name = ""
295
+ try:
296
+ from server.tools.tylor import _get_db
297
+ meta = _get_db().get_thread_meta(thread_id)
298
+ thread_name = (meta or {}).get("Name", "")
299
+ except Exception:
300
+ pass
301
+
302
+ chunks: list[str] = []
303
+ try:
304
+ async for chunk in run_with_agents(message, thread_id, thread_name, cwd):
305
+ chunks.append(chunk)
306
+ except asyncio.TimeoutError:
307
+ chunks.append("\n⚠️ Harness timed out (240s). The agent may still be running.")
308
+ return "".join(chunks) or "(no output)"
309
+
310
+
311
+ @mcp.tool()
312
+ def list_available_roles() -> dict:
313
+ """
314
+ Show the 5 roles the supervisor can assign to sub-agents.
315
+ Claude picks the right role(s) — you never need to specify.
316
+ """
317
+ return {
318
+ "roles": {name: role["lens"][:120] for name, role in ROLES.items()},
319
+ "how_it_works": (
320
+ "Roles are lenses. Claude brings all domain knowledge. "
321
+ "No pre-built agents for cricket, legal, medical, etc. needed. "
322
+ "Claude IS the expert. Roles just focus it: research vs implement vs review."
323
+ ),
324
+ "memory": (
325
+ "Each thread has a persistent Agent SDK session. "
326
+ "Full conversation history saved automatically. "
327
+ "SwThread resumes the session — zero re-priming."
328
+ ),
329
+ "bmad": f"BMAD available: {_get_bmad_path() is not None}",
330
+ }
@@ -0,0 +1,162 @@
1
+ """server/tools/help.py — agent101 capability discovery."""
2
+ from __future__ import annotations
3
+
4
+ from ._mcp import mcp
5
+ from . import registry as registry_mod
6
+ from .personas import list_persona_summaries
7
+
8
+
9
+ SLASH_COMMANDS = [
10
+ {
11
+ "command": "/new-thread",
12
+ "description": "Create a named thread and switch future work into it.",
13
+ },
14
+ {
15
+ "command": "/switch-thread",
16
+ "description": "List and switch to an existing thread.",
17
+ },
18
+ {
19
+ "command": "/kill-thread",
20
+ "description": "Close a thread and start async summarization.",
21
+ },
22
+ {
23
+ "command": "/list-threads",
24
+ "description": "Show available threads with status and activity.",
25
+ },
26
+ {
27
+ "command": "/recall",
28
+ "description": "Search semantic memory within a thread.",
29
+ },
30
+ {
31
+ "command": "/add-skill",
32
+ "description": "Install a skill package and update registry.json.",
33
+ },
34
+ {
35
+ "command": "/open-threads-ui",
36
+ "description": "Open the local thread visualizer UI when available.",
37
+ },
38
+ {
39
+ "command": "/set-sandbox",
40
+ "description": "Declare or clear filesystem roots for AFK execution.",
41
+ },
42
+ {
43
+ "command": "/afk-status",
44
+ "description": "Report current AFK execution progress for the active thread.",
45
+ },
46
+ ]
47
+
48
+ TIER1_TOOLS = [
49
+ {
50
+ "name": "new_thread",
51
+ "description": "Create a new named thread.",
52
+ },
53
+ {
54
+ "name": "switch_thread",
55
+ "description": "Atomically switch to a thread by ID.",
56
+ },
57
+ {
58
+ "name": "switch_thread_by_name",
59
+ "description": "Fuzzy-match a thread name and switch to it.",
60
+ },
61
+ {
62
+ "name": "kill_thread",
63
+ "description": "Close a thread and dispatch async summarization.",
64
+ },
65
+ {
66
+ "name": "recall_memory",
67
+ "description": "Search thread memory using semantic recall.",
68
+ },
69
+ {
70
+ "name": "save_memory",
71
+ "description": "Persist a typed or untyped memory fact for a thread.",
72
+ },
73
+ {
74
+ "name": "list_threads",
75
+ "description": "List threads sorted by recent activity.",
76
+ },
77
+ {
78
+ "name": "list_personas",
79
+ "description": "List available specialist personas.",
80
+ },
81
+ {
82
+ "name": "spawn_agent",
83
+ "description": "Spawn a persona-scoped sub-agent in a thread.",
84
+ },
85
+ {
86
+ "name": "add_skill",
87
+ "description": "Install a skill package and update registry.json.",
88
+ },
89
+ {
90
+ "name": "load_skill_tools",
91
+ "description": "Lazy-load a registered Tier 2 tool group.",
92
+ },
93
+ {
94
+ "name": "list_registry",
95
+ "description": "List installed skill packages without heavy schemas.",
96
+ },
97
+ {
98
+ "name": "help_agent101",
99
+ "description": "Return a current structured capability index.",
100
+ },
101
+ {
102
+ "name": "set_sandbox",
103
+ "description": "Declare or clear sandbox roots for a thread.",
104
+ },
105
+ {
106
+ "name": "execute_in_sandbox",
107
+ "description": "Run commands only after sandbox roots are configured.",
108
+ },
109
+ {
110
+ "name": "execute_with_recovery",
111
+ "description": "Run sandboxed commands with bounded AFK recovery.",
112
+ },
113
+ {
114
+ "name": "start_afk",
115
+ "description": "Plan and execute an AFK task in the active sandboxed thread.",
116
+ },
117
+ {
118
+ "name": "afk_status",
119
+ "description": "Report AFK progress for the active thread.",
120
+ },
121
+ {
122
+ "name": "pause_afk",
123
+ "description": "Request AFK execution to pause at the next checkpoint.",
124
+ },
125
+ ]
126
+
127
+
128
+ def _registered_skills() -> list[dict]:
129
+ return [
130
+ {
131
+ "name": skill["name"],
132
+ "trigger": skill.get("trigger_description", ""),
133
+ }
134
+ for skill in registry_mod.list_registry()["skills"]
135
+ ]
136
+
137
+
138
+ def _ecc_categories() -> dict[str, list[str]]:
139
+ return {
140
+ category: sorted(tools)
141
+ for category, (_module_path, tools) in registry_mod.ECC_GROUPS.items()
142
+ }
143
+
144
+
145
+ def build_help_index() -> dict:
146
+ """Build a current capability index for `/help-agent101`."""
147
+ return {
148
+ "slash_commands": list(SLASH_COMMANDS),
149
+ "tier1_tools": list(TIER1_TOOLS),
150
+ "registered_skills": _registered_skills(),
151
+ "personas": list_persona_summaries(),
152
+ "ecc_categories": _ecc_categories(),
153
+ }
154
+
155
+
156
+ @mcp.tool()
157
+ def help_agent101() -> dict:
158
+ """
159
+ Return a structured listing of agent101 slash commands, Tier 1 tools,
160
+ registered skills, personas, and ECC categories.
161
+ """
162
+ return build_help_index()