loom-agents 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. loom_agents-0.1.0/.claude/settings.local.json +9 -0
  2. loom_agents-0.1.0/.gitignore +13 -0
  3. loom_agents-0.1.0/CLAUDE.md +102 -0
  4. loom_agents-0.1.0/IMPLEMENTATION_PLAN.md +544 -0
  5. loom_agents-0.1.0/PKG-INFO +28 -0
  6. loom_agents-0.1.0/PRD-loom-v1.1.md +903 -0
  7. loom_agents-0.1.0/README.md +11 -0
  8. loom_agents-0.1.0/loom/__init__.py +3 -0
  9. loom_agents-0.1.0/loom/bus/__init__.py +0 -0
  10. loom_agents-0.1.0/loom/bus/channels.py +65 -0
  11. loom_agents-0.1.0/loom/bus/events.py +28 -0
  12. loom_agents-0.1.0/loom/bus/publisher.py +63 -0
  13. loom_agents-0.1.0/loom/bus/queue.py +30 -0
  14. loom_agents-0.1.0/loom/bus/subscriber.py +25 -0
  15. loom_agents-0.1.0/loom/cli.py +761 -0
  16. loom_agents-0.1.0/loom/config.py +125 -0
  17. loom_agents-0.1.0/loom/db/__init__.py +0 -0
  18. loom_agents-0.1.0/loom/db/connection.py +29 -0
  19. loom_agents-0.1.0/loom/db/migrations/001_initial.sql +75 -0
  20. loom_agents-0.1.0/loom/db/migrations/002_orchestration.sql +55 -0
  21. loom_agents-0.1.0/loom/db/migrations.py +46 -0
  22. loom_agents-0.1.0/loom/exceptions.py +76 -0
  23. loom_agents-0.1.0/loom/graph/__init__.py +0 -0
  24. loom_agents-0.1.0/loom/graph/cache.py +155 -0
  25. loom_agents-0.1.0/loom/graph/deps.py +112 -0
  26. loom_agents-0.1.0/loom/graph/project.py +104 -0
  27. loom_agents-0.1.0/loom/graph/store.py +624 -0
  28. loom_agents-0.1.0/loom/graph/task.py +88 -0
  29. loom_agents-0.1.0/loom/ids.py +8 -0
  30. loom_agents-0.1.0/loom/mcp/__init__.py +0 -0
  31. loom_agents-0.1.0/loom/mcp/__main__.py +5 -0
  32. loom_agents-0.1.0/loom/mcp/server.py +60 -0
  33. loom_agents-0.1.0/loom/mcp/tools.py +325 -0
  34. loom_agents-0.1.0/loom/orchestration/__init__.py +1 -0
  35. loom_agents-0.1.0/loom/orchestration/escalation.py +161 -0
  36. loom_agents-0.1.0/loom/orchestration/loop.py +173 -0
  37. loom_agents-0.1.0/loom/orchestration/retry.py +100 -0
  38. loom_agents-0.1.0/loom/orchestration/sweeper.py +36 -0
  39. loom_agents-0.1.0/loom/skills/__init__.py +0 -0
  40. loom_agents-0.1.0/loom/skills/builtin/debug_failure.md +75 -0
  41. loom_agents-0.1.0/loom/skills/builtin/decompose_project.md +78 -0
  42. loom_agents-0.1.0/loom/skills/builtin/define_done.md +41 -0
  43. loom_agents-0.1.0/loom/skills/builtin/estimate_complexity.md +42 -0
  44. loom_agents-0.1.0/loom/skills/builtin/generate_test_plan.md +66 -0
  45. loom_agents-0.1.0/loom/skills/builtin/review_output.md +69 -0
  46. loom_agents-0.1.0/loom/skills/builtin/write_spec.md +80 -0
  47. loom_agents-0.1.0/loom/skills/builtin/write_task_context.md +51 -0
  48. loom_agents-0.1.0/loom/skills/decomposer.py +269 -0
  49. loom_agents-0.1.0/loom/skills/loader.py +85 -0
  50. loom_agents-0.1.0/loom/skills/runner.py +104 -0
  51. loom_agents-0.1.0/loom/workflows/__init__.py +1 -0
  52. loom_agents-0.1.0/loom/workflows/builtin/debug_and_fix.yaml +20 -0
  53. loom_agents-0.1.0/loom/workflows/builtin/ship_feature.yaml +29 -0
  54. loom_agents-0.1.0/loom/workflows/loader.py +82 -0
  55. loom_agents-0.1.0/loom/workflows/runner.py +426 -0
  56. loom_agents-0.1.0/plan_3.md +753 -0
  57. loom_agents-0.1.0/plan_4.md +640 -0
  58. loom_agents-0.1.0/pyproject.toml +39 -0
  59. loom_agents-0.1.0/tests/__init__.py +0 -0
  60. loom_agents-0.1.0/tests/conftest.py +71 -0
  61. loom_agents-0.1.0/tests/integration/__init__.py +0 -0
  62. loom_agents-0.1.0/tests/integration/test_claim_ttl.py +212 -0
  63. loom_agents-0.1.0/tests/integration/test_e2e.py +232 -0
  64. loom_agents-0.1.0/tests/integration/test_escalation.py +255 -0
  65. loom_agents-0.1.0/tests/integration/test_retry_dlq.py +289 -0
  66. loom_agents-0.1.0/tests/integration/test_skills.py +358 -0
  67. loom_agents-0.1.0/tests/integration/test_stream_a.py +303 -0
  68. loom_agents-0.1.0/tests/integration/test_workflows.py +410 -0
  69. loom_agents-0.1.0/uv.lock +1743 -0
@@ -0,0 +1,9 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(git commit:*)",
5
+ "Bash(git pull:*)",
6
+ "Bash(uv run:*)"
7
+ ]
8
+ }
9
+ }
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ *.egg
8
+ .venv/
9
+ .env
10
+ .loom/logs/
11
+ *.log
12
+ .pytest_cache/
13
+ .mypy_cache/
@@ -0,0 +1,102 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## What Is Loom
6
+
7
+ Multi-agent project orchestration system. PostgreSQL is the source of truth, Redis is a cache and event bus, FastMCP server provides Claude Code integration. Distributed via `pipx install loom-agents`.
8
+
9
+ ## Commands
10
+
11
+ ```bash
12
+ uv sync # Install dependencies
13
+ uv run pytest tests/ -v # Run all tests (needs Docker for testcontainers)
14
+ uv run pytest tests/integration/test_e2e.py::test_full_agent_workflow -v # Single test
15
+ uv run python -m loom.mcp # Run MCP server directly (stdio transport)
16
+ uv run loom init # Scaffold a project
17
+ uv run loom up # Start Postgres + Redis, run migrations
18
+ uv run loom down # Stop containers
19
+ uv run loom status # Project overview
20
+ ```
21
+
22
+ Tests require Docker Desktop running — testcontainers spins up real Postgres 16 + Redis 7 containers automatically.
23
+
24
+ ## Architecture
25
+
26
+ ### Write Path (every task mutation)
27
+
28
+ ```
29
+ MCP tool (mcp/tools.py)
30
+ → graph/store.py writes to Postgres (ACID)
31
+ → graph/cache.py syncs to Redis hash + status sets
32
+ → bus/publisher.py publishes to Redis Stream + pub/sub
33
+ → graph/deps.py checks if blocked dependents are now unblocked
34
+ ```
35
+
36
+ Postgres is always written first. If it fails, nothing else happens. If Redis sync fails, the cache is stale but self-corrects on next read or MCP restart.
37
+
38
+ ### Read Path
39
+
40
+ ```
41
+ MCP tool calls graph/cache.py
42
+ → reads Redis sorted set / hash (fast path)
43
+ → on cache miss: reads Postgres via graph/store.py, syncs to Redis
44
+ ```
45
+
46
+ ### Module Contracts (enforced, not optional)
47
+
48
+ | Module | Rule |
49
+ |--------|------|
50
+ | `graph/store.py` | **ONLY** writer to Postgres for task data |
51
+ | `graph/cache.py` | **ONLY** reader from Redis for task data; falls back to store.py |
52
+ | `bus/channels.py` | **ALL** Redis key patterns; no string literals like `"loom:tasks:ready"` elsewhere |
53
+ | `mcp/tools.py` | Each tool **≤15 lines**; thin coordinators only, business logic in graph/ or bus/ |
54
+ | `db/migrations/` | **Never** modify existing migration files; always add new numbered files |
55
+
56
+ ### MCP Tool Pattern
57
+
58
+ Every tool follows this exact structure:
59
+ ```python
60
+ @mcp.tool()
61
+ async def loom_xxx(ctx: Context, ...args) -> dict:
62
+ app = _ctx(ctx) # Get AppContext from lifespan
63
+ result = await store.some_op(app.pool, ...) # Postgres write
64
+ await cache.sync_task(app.redis, result) # Redis sync
65
+ await publish_event(app.redis, ...) # Event publish
66
+ return result.model_dump(mode="json") # Serialized response
67
+ ```
68
+
69
+ ### MCP Server Lifespan
70
+
71
+ `mcp/server.py` manages async resources: creates asyncpg pool → runs migrations → connects Redis → rebuilds cache from Postgres → yields `AppContext(pool, redis, project_id)` → cleanup. Tools access it via `ctx.request_context.lifespan_context`.
72
+
73
+ ### Task Claiming
74
+
75
+ Uses `SELECT FOR UPDATE SKIP LOCKED` in Postgres — purpose-built for concurrent job queues. No application-level retries needed.
76
+
77
+ ### Dependency Resolution
78
+
79
+ `graph/deps.py` runs after every task completion:
80
+ - Finds dependents of the completed task
81
+ - If all their deps are now done, transitions blocked → pending and adds to ready queue
82
+ - Epic auto-completion: if all children of an epic are done, the epic is marked done
83
+
84
+ ## Key Design Decisions
85
+
86
+ - **Pydantic BaseModel** (not dataclass) for Task and all models — better serialization for MCP
87
+ - `Task.from_record(dict, depends_on)` converts asyncpg Records; `depends_on` is populated from `task_deps` table separately
88
+ - Redis hashes store strings — dict/list fields are JSON-serialized in `cache.py`
89
+ - Config loads 3 layers: `~/.loom/config.yaml` → `.loom/config.yaml` → `LOOM_*` env vars
90
+ - Task IDs: `loom-{secrets.token_hex(4)}` (8 hex chars)
91
+ - Priority scores for Redis sorted set: p0=300, p1=200, p2=100
92
+
93
+ ## Testing
94
+
95
+ Real integration tests with testcontainers (no mocks). Fixtures in `tests/conftest.py`:
96
+ - `pool` — function-scoped asyncpg pool, data cleaned between tests
97
+ - `redis_conn` — function-scoped Redis, flushed between tests
98
+ - `project` — creates a test project, returns project_id string
99
+
100
+ ## Current Phase
101
+
102
+ Phase 1 (Foundation) is implemented. Phase 2 stubs: `loom_decompose` returns not_implemented. Skills, workflows, orchestrator loop, and cloud deployment are future phases.
@@ -0,0 +1,544 @@
1
+ # Loom Phase 1 — Implementation Plan
2
+
3
+ **Project:** Loom — Multi-Agent Project Orchestration System
4
+ **Scope:** Phase 1 (Foundation)
5
+ **PRD:** `PRD-loom-v1.1.md` in this directory (read it first for full architecture context)
6
+ **Status:** Ready for implementation
7
+
8
+ ---
9
+
10
+ ## What Is Loom
11
+
12
+ Loom is a production-grade project orchestration system for large, parallel software projects executed by multiple AI agents. It provides:
13
+
14
+ - A **persistent, dependency-aware task graph** backed by PostgreSQL (source of truth) and Redis (live cache + event bus)
15
+ - A **reliable inter-agent message bus** using Redis pub/sub, streams, and queues
16
+ - An **MCP server** exposing all of the above as Claude Code-native tools
17
+ - A **CLI** for human interaction (`loom init`, `loom up`, `loom status`, etc.)
18
+
19
+ Installed globally via `pipx install loom-agents`. Runs locally via Docker Compose.
20
+
21
+ ## Phase 1 Goal
22
+
23
+ > A Claude Code agent connects to the Loom MCP server, calls `loom_ready`, claims a manually-created task, executes it, and marks it done. State persists across MCP server restarts.
24
+
25
+ ---
26
+
27
+ ## Resolved Architecture Decisions
28
+
29
+ | Decision | Choice | Notes |
30
+ |----------|--------|-------|
31
+ | Python version | **3.12** | Modern type hints (`str \| None`), best perf |
32
+ | Dev tool | **uv** | For env + dependency management during development |
33
+ | Distribution | **pipx + PyPI** as `loom-agents` | Global install, no venv friction |
34
+ | Build backend | **hatchling** | Standard, works well with uv |
35
+ | Data models | **Pydantic BaseModel** | Not dataclass — better serialization for MCP tool I/O |
36
+ | Dev setup | **Native MCP server + Docker DBs** | MCP runs as Python process; Postgres + Redis in Docker |
37
+ | MCP transport | **stdio** (local) / **SSE** (cloud) | Claude Code spawns MCP server as subprocess via stdio |
38
+ | MCP registration | **`.mcp.json`** at project root | Auto-written by `loom up`; Claude Code reads it automatically |
39
+ | Testing | **pytest + testcontainers** | Integration tests against real Postgres/Redis in Docker |
40
+ | Build order | **Bottom-up** | db → graph → bus → mcp → cli |
41
+
42
+ ## Prerequisites
43
+
44
+ - **Docker Desktop** installed and running
45
+ - **uv** installed (`curl -LsSf https://astral.sh/uv/install.sh | sh`)
46
+ - **Python 3.12+** (uv can install this: `uv python install 3.12`)
47
+
48
+ ---
49
+
50
+ ## Python Dependencies
51
+
52
+ ```toml
53
+ # pyproject.toml [project] dependencies
54
+ dependencies = [
55
+ "asyncpg>=0.30.0",
56
+ "redis[hiredis]>=5.0.0",
57
+ "fastmcp>=2.0.0",
58
+ "click>=8.1.0",
59
+ "pydantic>=2.5.0",
60
+ "pyyaml>=6.0",
61
+ ]
62
+
63
+ [dependency-groups]
64
+ dev = [
65
+ "pytest>=8.0",
66
+ "pytest-asyncio>=0.24.0",
67
+ "testcontainers[postgres,redis]>=4.0",
68
+ ]
69
+ ```
70
+
71
+ ---
72
+
73
+ ## Package Structure
74
+
75
+ ```
76
+ loom/
77
+ ├── __init__.py
78
+ ├── cli.py # Click CLI: loom init, up, down, status
79
+ ├── config.py # 3-layer config: global → project → env vars
80
+ ├── ids.py # Task ID generation: loom-{8hex}
81
+ ├── exceptions.py # LoomError hierarchy
82
+
83
+ ├── graph/
84
+ │ ├── __init__.py
85
+ │ ├── task.py # Task/TaskStatus/Priority Pydantic models
86
+ │ ├── store.py # ALL Postgres CRUD for tasks (only writer)
87
+ │ ├── cache.py # Redis sync layer (only reader, falls back to store)
88
+ │ ├── deps.py # Dependency resolution + ready queue management
89
+ │ └── project.py # Project CRUD
90
+
91
+ ├── bus/
92
+ │ ├── __init__.py
93
+ │ ├── channels.py # ALL Redis key/channel name functions (no literals elsewhere)
94
+ │ ├── events.py # EventType enum
95
+ │ ├── publisher.py # Redis publish helpers (stream + pub/sub)
96
+ │ ├── subscriber.py # Redis subscribe helpers (minimal Phase 1)
97
+ │ └── queue.py # Escalation queue (LPUSH/BRPOP)
98
+
99
+ ├── db/
100
+ │ ├── __init__.py
101
+ │ ├── connection.py # asyncpg pool setup/teardown
102
+ │ └── migrations/
103
+ │ ├── __init__.py
104
+ │ └── 001_initial.sql # Full schema (see PRD Section 4)
105
+
106
+ └── mcp/
107
+ ├── __init__.py
108
+ ├── __main__.py # Entry point: mcp.run(transport="stdio")
109
+ ├── server.py # FastMCP server with lifespan (pool + redis)
110
+ └── tools.py # 11 MCP tool implementations (thin, ≤15 lines each)
111
+ ```
112
+
113
+ ### Module Contracts (Critical)
114
+
115
+ - **`graph/store.py`** — The ONLY module that writes to Postgres for task data. All modules needing persistent data go through store.py.
116
+ - **`graph/cache.py`** — The ONLY module that reads from Redis for task data. Falls back to store.py on cache miss, then syncs Redis. Callers never choose Redis vs Postgres.
117
+ - **`bus/channels.py`** — ALL Redis key and channel name patterns as typed functions. No string literals like `"loom:tasks:ready"` appear anywhere else in the codebase.
118
+ - **`mcp/tools.py`** — Tool functions ≤15 lines each. Business logic belongs in graph/ or bus/. Tools are thin coordinators only.
119
+
120
+ ---
121
+
122
+ ## Build Steps (Ordered)
123
+
124
+ ### Step 0: Project Scaffolding
125
+
126
+ **Files:** `pyproject.toml`, `.gitignore`, all `__init__.py` files, `loom/ids.py`, `loom/exceptions.py`, `tests/conftest.py`
127
+
128
+ **`pyproject.toml`:**
129
+ - Build backend: hatchling
130
+ - Package name: `loom-agents`
131
+ - Entry point: `[project.scripts] loom = "loom.cli:cli"`
132
+ - `requires-python = ">=3.12"`
133
+ - All dependencies listed above
134
+ - `[tool.pytest.ini_options] asyncio_mode = "auto"`
135
+
136
+ **`loom/ids.py`:**
137
+ ```python
138
+ import secrets
139
+
140
+ def task_id() -> str:
141
+ """Generate conflict-free task ID: loom-{8hex}."""
142
+ return f"loom-{secrets.token_hex(4)}"
143
+ ```
144
+
145
+ **`loom/exceptions.py`:**
146
+ ```python
147
+ class LoomError(Exception):
148
+ """Base for all Loom errors."""
149
+
150
+ class TaskNotFoundError(LoomError): ...
151
+ class TaskStateError(LoomError): ... # invalid status transition
152
+ class ProjectNotFoundError(LoomError): ...
153
+ class DependencyCycleError(LoomError): ...
154
+ class ClaimConflictError(LoomError): ... # task already claimed
155
+ ```
156
+
157
+ ---
158
+
159
+ ### Step 1: Configuration System
160
+
161
+ **File:** `loom/config.py`
162
+ **Depends on:** Step 0
163
+
164
+ Pydantic models for config sections:
165
+ ```python
166
+ class DatabaseConfig(BaseModel):
167
+ url: str = "postgresql://loom:loom_local@localhost:5432/loom"
168
+
169
+ class RedisConfig(BaseModel):
170
+ url: str = "redis://localhost:6379"
171
+
172
+ class McpConfig(BaseModel):
173
+ port: int = 8765
174
+ host: str = "0.0.0.0"
175
+
176
+ class LoomConfig(BaseModel):
177
+ project_name: str = ""
178
+ project_id: str = ""
179
+ database: DatabaseConfig = DatabaseConfig()
180
+ redis: RedisConfig = RedisConfig()
181
+ mcp: McpConfig = McpConfig()
182
+ log_level: str = "INFO"
183
+ ```
184
+
185
+ `load_config(project_dir: Path | None = None) -> LoomConfig`:
186
+ 1. Start with defaults
187
+ 2. Deep-merge global config (`~/.loom/config.yaml`)
188
+ 3. Deep-merge project config (`.loom/config.yaml` relative to `project_dir` or cwd)
189
+ 4. Apply env var overrides: `LOOM_DATABASE_URL`, `LOOM_REDIS_URL`, `LOOM_MCP_PORT`, `LOOM_LOG_LEVEL`
190
+ 5. Return `LoomConfig.model_validate(config_dict)`
191
+
192
+ ---
193
+
194
+ ### Step 2: Database Connection + Migrations
195
+
196
+ **Files:** `loom/db/connection.py`, `loom/db/migrations.py`, `loom/db/migrations/001_initial.sql`
197
+ **Depends on:** Steps 0, 1
198
+
199
+ **`connection.py`** — asyncpg pool singleton:
200
+ - `init_pool(config: LoomConfig) -> asyncpg.Pool` — creates pool (min_size=2, max_size=10)
201
+ - `get_pool() -> asyncpg.Pool` — returns initialized pool or raises RuntimeError
202
+ - `close_pool()` — closes pool, resets to None
203
+
204
+ **`migrations.py`** — simple migration runner:
205
+ - Creates `_loom_migrations` tracking table if not exists
206
+ - Reads `.sql` files from `loom/db/migrations/` sorted by filename
207
+ - Applies unapplied migrations in a transaction
208
+ - Idempotent — safe to run on every startup
209
+
210
+ **`001_initial.sql`** — full schema from PRD Section 4:
211
+ - Tables: `projects`, `tasks`, `task_deps`, `events`, `escalations`, `skill_runs`
212
+ - All indexes from PRD Section 4.2
213
+ - Task statuses: `pending|claimed|done|failed|blocked|epic`
214
+ - Priorities: `p0|p1|p2`
215
+ - Task IDs: TEXT (format `loom-{8hex}`)
216
+ - Project IDs: UUID
217
+ - Context/output fields: JSONB
218
+
219
+ ---
220
+
221
+ ### Step 3: Domain Models
222
+
223
+ **File:** `loom/graph/task.py`
224
+ **Depends on:** Step 0
225
+
226
+ ```python
227
+ class TaskStatus(StrEnum):
228
+ PENDING = "pending"
229
+ CLAIMED = "claimed"
230
+ DONE = "done"
231
+ FAILED = "failed"
232
+ BLOCKED = "blocked"
233
+ EPIC = "epic"
234
+
235
+ class Priority(StrEnum):
236
+ P0 = "p0"
237
+ P1 = "p1"
238
+ P2 = "p2"
239
+
240
+ PRIORITY_SCORES = {Priority.P0: 300, Priority.P1: 200, Priority.P2: 100}
241
+ ```
242
+
243
+ **`Task(BaseModel)`** fields:
244
+ - `id: str` (default_factory=generate_task_id)
245
+ - `project_id: str`
246
+ - `title: str`
247
+ - `status: TaskStatus` (default pending)
248
+ - `priority: Priority` (default p1)
249
+ - `assignee: str | None`
250
+ - `parent_id: str | None`
251
+ - `context: dict` (JSONB — free-form, no required fields)
252
+ - `output: dict` (JSONB)
253
+ - `done_when: str | None`
254
+ - `created_at, updated_at, claimed_at, done_at: datetime | None`
255
+ - `depends_on: list[str]` (populated from task_deps on read, not stored in tasks table)
256
+
257
+ Class method: `Task.from_record(dict)` → `cls.model_validate(dict(record))`
258
+
259
+ **`ProjectStatus(BaseModel)`**: project_id, project_name, counts (dict[str,int]), ready_count, blocked_tasks (list[str]), open_escalations (int)
260
+
261
+ ---
262
+
263
+ ### Step 4: Postgres CRUD (Store Layer)
264
+
265
+ **Files:** `loom/graph/store.py`, `loom/graph/project.py`
266
+ **Depends on:** Steps 2, 3
267
+
268
+ **`store.py`** — all functions take `pool: asyncpg.Pool` as first param:
269
+
270
+ | Function | Description |
271
+ |----------|-------------|
272
+ | `create_task(pool, task)` | Insert task + dep edges in single transaction. Returns Task. |
273
+ | `get_task(pool, task_id)` | Fetch task by ID, joins task_deps for depends_on list. Returns Task or None. |
274
+ | `claim_task(pool, task_id, agent_id)` | **SELECT FOR UPDATE SKIP LOCKED**. Atomically claims pending task. Returns Task or None if unavailable. |
275
+ | `complete_task(pool, task_id, output)` | Sets status=done, stores output JSONB, sets done_at=NOW(). |
276
+ | `fail_task(pool, task_id, reason)` | Sets status=failed. |
277
+ | `update_task(pool, task_id, **fields)` | Updates mutable fields only (title, context, priority, done_when). |
278
+ | `update_task_status(pool, task_id, status)` | Direct status change (used by deps.py for blocked→pending). |
279
+ | `get_ready_tasks(pool, project_id, priority?, limit?)` | Tasks that are pending AND have no unresolved deps. SQL LEFT JOIN on task_deps. |
280
+ | `get_dependents(pool, task_id)` | Task IDs that depend on this task (from task_deps). |
281
+ | `get_task_dependencies(pool, task_id)` | Task IDs this task depends on (from task_deps). |
282
+ | `replace_dependencies(pool, task_id, depends_on)` | Delete old deps, insert new ones. |
283
+ | `list_active_tasks(pool, project_id)` | All tasks NOT in done/failed status. For cache rebuild. |
284
+ | `list_all_tasks(pool, project_id)` | All tasks. For graph display. |
285
+ | `create_escalation(pool, project_id, task_id, message)` | Insert into escalations table. |
286
+ | `record_event(pool, project_id, event_type, task_id?, agent_id?, payload)` | Insert into events table (audit log). |
287
+
288
+ **Claim implementation** (critical — from PRD):
289
+ ```python
290
+ async def claim_task(pool, task_id, agent_id):
291
+ async with pool.acquire() as conn:
292
+ async with conn.transaction():
293
+ row = await conn.fetchrow(
294
+ "SELECT * FROM tasks WHERE id = $1 AND status = 'pending' FOR UPDATE SKIP LOCKED",
295
+ task_id,
296
+ )
297
+ if row is None:
298
+ return None
299
+ updated = await conn.fetchrow(
300
+ """UPDATE tasks SET status='claimed', assignee=$1,
301
+ claimed_at=NOW(), updated_at=NOW() WHERE id=$2 RETURNING *""",
302
+ agent_id, task_id,
303
+ )
304
+ return Task.from_record(dict(updated))
305
+ ```
306
+
307
+ **`project.py`**: `create_project(pool, name, desc?)`, `get_project(pool, project_id)`, `get_project_status(pool, project_id)`
308
+
309
+ ---
310
+
311
+ ### Step 5: Redis Bus Layer
312
+
313
+ **Files:** `loom/bus/channels.py`, `loom/bus/events.py`, `loom/bus/publisher.py`, `loom/bus/subscriber.py`, `loom/bus/queue.py`
314
+ **Depends on:** Step 0 (can be built in parallel with Step 4)
315
+
316
+ **`channels.py`** — typed functions, no hardcoded strings:
317
+ ```python
318
+ def task_key(project_id, task_id) -> str: # "loom:{pid}:task:{tid}"
319
+ def status_set(project_id, status) -> str: # "loom:{pid}:tasks:{status}"
320
+ def ready_queue(project_id) -> str: # "loom:{pid}:tasks:ready"
321
+ def event_stream(project_id) -> str: # "loom:{pid}:events"
322
+ def updates_channel(project_id) -> str: # "loom:{pid}:tasks:updates"
323
+ def escalation_channel(project_id) -> str: # "loom:{pid}:escalations"
324
+ def agent_channel(project_id, agent_id) -> str: # "loom:{pid}:messages:{aid}"
325
+ def broadcast_channel(project_id) -> str: # "loom:{pid}:broadcast"
326
+ ```
327
+
328
+ **`events.py`** — EventType enum:
329
+ ```
330
+ task.created, task.claimed, task.done, task.failed, task.blocked,
331
+ task.updated, task.unblocked, message.sent, escalation.raised,
332
+ project.decomposed (Phase 2), project.complete, escalation.resolved
333
+ ```
334
+
335
+ **`publisher.py`**:
336
+ - `publish_event(redis, project_id, event_type, payload)` — XADD to stream + PUBLISH to pub/sub
337
+ - `publish_escalation(redis, project_id, task_id, message)` — PUBLISH to escalation channel
338
+ - `send_agent_message(redis, project_id, to, message, thread_id?)` — PUBLISH to agent channel
339
+
340
+ **`subscriber.py`** — minimal Phase 1 skeleton (async generator for event stream)
341
+
342
+ **`queue.py`** — `push_escalation(redis, project_id, payload)` / `pop_escalation(redis, project_id, timeout?)`
343
+
344
+ ---
345
+
346
+ ### Step 6: Redis Cache Layer
347
+
348
+ **File:** `loom/graph/cache.py`
349
+ **Depends on:** Steps 4, 5
350
+
351
+ | Function | Description |
352
+ |----------|-------------|
353
+ | `sync_task(redis, task)` | Write task to Redis hash + update status sets (SREM from all, SADD to current) |
354
+ | `add_to_ready_queue(redis, project_id, task_id, priority)` | ZADD to sorted set with priority score |
355
+ | `remove_from_ready_queue(redis, project_id, task_id)` | ZREM from sorted set |
356
+ | `get_task(redis, pool, project_id, task_id)` | HGETALL from Redis → on miss: read from Postgres, sync, return |
357
+ | `get_ready_tasks(redis, pool, project_id, limit?, priority?)` | ZREVRANGE from sorted set → on empty: fallback to Postgres, rebuild |
358
+ | `rebuild_cache(redis, pool, project_id)` | Full rebuild: read all active tasks from Postgres, sync each to Redis, rebuild ready queue |
359
+ | `rebuild_ready_queue(redis, pool, project_id)` | Delete + rebuild sorted set from Postgres |
360
+
361
+ **Gotcha:** Redis hashes store everything as strings. `sync_task` must `json.dumps()` dict/list fields. The reverse deserialization function must `json.loads()` them back.
362
+
363
+ ---
364
+
365
+ ### Step 7: Dependency Resolution
366
+
367
+ **File:** `loom/graph/deps.py`
368
+ **Depends on:** Steps 4, 6
369
+
370
+ | Function | Description |
371
+ |----------|-------------|
372
+ | `check_and_unblock(pool, redis, completed_task_id, project_id)` | Find dependents of completed task. For each, check if ALL their deps are done. If so: update status blocked→pending, sync cache, add to ready queue. Returns list of unblocked task IDs. |
373
+ | `detect_cycle(pool, task_id, new_deps)` | Iterative DFS from each new_dep looking for task_id. Returns True if cycle found. Run BEFORE adding deps. |
374
+ | `compute_initial_status(pool, depends_on)` | No deps → pending. All deps done → pending. Any dep not done → blocked. |
375
+
376
+ ---
377
+
378
+ ### Step 8: MCP Server + Tools
379
+
380
+ **Files:** `loom/mcp/server.py`, `loom/mcp/tools.py`, `loom/mcp/__main__.py`
381
+ **Depends on:** All previous steps
382
+
383
+ **`server.py`** — FastMCP with lifespan:
384
+ ```python
385
+ @asynccontextmanager
386
+ async def lifespan():
387
+ config = load_config(Path(os.environ.get("LOOM_PROJECT_DIR", ".")))
388
+ pool = await init_pool(config)
389
+ await run_migrations(pool)
390
+ redis = Redis.from_url(config.redis.url, decode_responses=True)
391
+ if config.project_id:
392
+ await rebuild_cache(redis, pool, config.project_id)
393
+ yield {"pool": pool, "redis": redis, "config": config}
394
+ await redis.aclose()
395
+ await close_pool()
396
+
397
+ mcp = FastMCP(name="loom", instructions="...", lifespan=lifespan)
398
+ ```
399
+
400
+ **`tools.py`** — 11 tools registered via `@mcp.tool`:
401
+
402
+ | Tool | Delegates To | Key Behavior |
403
+ |------|-------------|--------------|
404
+ | `loom_ready(priority?, limit?)` | cache.get_ready_tasks | Returns list of task dicts |
405
+ | `loom_claim(task_id, agent_id)` | store.claim_task + cache.sync + publisher | Atomic claim via SELECT FOR UPDATE SKIP LOCKED |
406
+ | `loom_done(task_id, output)` | store.complete + cache.sync + deps.check_and_unblock | Cascades: unblocks dependents |
407
+ | `loom_fail(task_id, reason)` | store.fail + cache.sync + escalation | Creates escalation record |
408
+ | `loom_escalate(task_id, message)` | store.update_status(blocked) + escalation | Flags task as blocked |
409
+ | `loom_create(title, context, depends_on?, priority?, parent_id?, done_when?)` | deps.compute_initial_status + store.create + cache.sync | Auto-determines pending vs blocked |
410
+ | `loom_status(task_id?)` | cache.get_task or project.get_project_status | Task detail or project overview |
411
+ | `loom_message(to, message, thread_id?)` | publisher.send_agent_message | Direct message to agent |
412
+ | `loom_decompose(goal, spec_path?, confirm?)` | — | **Phase 2 stub**: returns not_implemented |
413
+ | `loom_graph(format?)` | store.list_all_tasks | json, mermaid, or summary format |
414
+ | `loom_update(task_id, title?, context?, priority?, depends_on?, done_when?)` | store.update + cache.sync | Mutable fields only, cycle detection on deps |
415
+
416
+ Every tool: writes Postgres first → syncs Redis → publishes event → records audit event.
417
+
418
+ **`__main__.py`:**
419
+ ```python
420
+ from loom.mcp.server import mcp
421
+ mcp.run(transport="stdio")
422
+ ```
423
+
424
+ **How stdio works:** Claude Code spawns `python -m loom.mcp.server` as a subprocess. Communication is JSON-RPC over stdin/stdout. FastMCP handles the protocol. The lifespan connects to Postgres + Redis, and the process stays alive for the entire Claude Code session.
425
+
426
+ ---
427
+
428
+ ### Step 9: CLI
429
+
430
+ **File:** `loom/cli.py`
431
+ **Depends on:** Steps 1, 8
432
+
433
+ **`loom init`:**
434
+ 1. Create `.loom/` directory structure (config.yaml, skills/, workflows/, logs/)
435
+ 2. Generate project UUID
436
+ 3. Write `.loom/config.yaml` with project_name, project_id, default DB/Redis URLs
437
+ 4. Write `docker-compose.loom.yml` (Postgres 16 + Redis 7 with healthchecks, port mappings 5432/6379, persistent volumes)
438
+ 5. Write `AGENTS.md` (basic agent instructions referencing Loom tools)
439
+ 6. Write `.loom/.gitignore` for logs/
440
+
441
+ **`loom up`:**
442
+ 1. Verify `docker-compose.loom.yml` exists
443
+ 2. `docker compose -f docker-compose.loom.yml up -d postgres redis`
444
+ 3. Wait for healthchecks (retry with backoff)
445
+ 4. Run database migrations against Postgres
446
+ 5. Write/update `.mcp.json` at project root:
447
+ ```json
448
+ {
449
+ "mcpServers": {
450
+ "loom": {
451
+ "command": "uv",
452
+ "args": ["run", "--with", "loom-agents", "python", "-m", "loom.mcp.server"],
453
+ "env": { "LOOM_PROJECT_DIR": "<absolute path to project>" }
454
+ }
455
+ }
456
+ }
457
+ ```
458
+ 6. Print instructions to start Claude Code
459
+
460
+ **`loom down`:** `docker compose -f docker-compose.loom.yml down`
461
+
462
+ **`loom status [task-id]`:** Connects directly to Postgres (not MCP), shows project overview or task detail.
463
+
464
+ **Docker Compose (generated, local only — no MCP service):**
465
+ ```yaml
466
+ services:
467
+ postgres:
468
+ image: postgres:16-alpine
469
+ environment: { POSTGRES_DB: loom, POSTGRES_USER: loom, POSTGRES_PASSWORD: loom_local }
470
+ ports: ["5432:5432"]
471
+ volumes: [postgres-data:/var/lib/postgresql/data]
472
+ healthcheck: { test: ["CMD-SHELL", "pg_isready -U loom"], interval: 2s, timeout: 5s, retries: 10 }
473
+ redis:
474
+ image: redis:7-alpine
475
+ ports: ["6379:6379"]
476
+ volumes: [redis-data:/data]
477
+ command: redis-server --appendonly yes
478
+ healthcheck: { test: ["CMD", "redis-cli", "ping"], interval: 2s, timeout: 5s, retries: 10 }
479
+ volumes:
480
+ postgres-data:
481
+ redis-data:
482
+ ```
483
+
484
+ ---
485
+
486
+ ### Step 10: End-to-End Integration Tests
487
+
488
+ **File:** `tests/integration/test_e2e.py`
489
+ **Depends on:** All steps
490
+
491
+ **Test 1 — Full Agent Workflow (proves Phase 1 deliverable):**
492
+ 1. Create project and task in Postgres
493
+ 2. Rebuild Redis cache (simulates server startup)
494
+ 3. Call `get_ready_tasks` — verify task appears
495
+ 4. Call `claim_task` — verify status=claimed
496
+ 5. Call `complete_task` with output — verify status=done
497
+ 6. Verify ready queue is empty
498
+ 7. Flush Redis, rebuild cache (simulates restart)
499
+ 8. Verify task is still done with correct output (persistence)
500
+
501
+ **Test 2 — Dependency Cascade:**
502
+ 1. Create tasks A, B, C where C depends on A and B
503
+ 2. Verify only A and B are in ready queue
504
+ 3. Complete A — verify C still blocked
505
+ 4. Complete B — verify C now unblocked and in ready queue
506
+
507
+ **Test fixtures (`tests/conftest.py`):**
508
+ - Session-scoped: `postgres_container`, `redis_container` (testcontainers)
509
+ - Function-scoped: `pool` (asyncpg, runs migrations, cleans tables between tests), `redis` (flushdb between tests)
510
+
511
+ ---
512
+
513
+ ## Verification Checklist
514
+
515
+ - [ ] `uv run pytest` — all tests pass
516
+ - [ ] `loom init` creates correct directory structure and files
517
+ - [ ] `loom up` starts Postgres + Redis, runs migrations, writes `.mcp.json`
518
+ - [ ] Open Claude Code in project directory — Loom MCP tools appear in tool list
519
+ - [ ] Agent calls `loom_create` → `loom_ready` → `loom_claim` → `loom_done` successfully
520
+ - [ ] `loom down && loom up` — task state persists, cache rebuilds correctly
521
+ - [ ] Concurrent claims: two agents claiming same task — exactly one succeeds
522
+
523
+ ---
524
+
525
+ ## Known Gotchas
526
+
527
+ 1. **asyncpg Records are not dicts** — use `dict(record)` before passing to Pydantic
528
+ 2. **Redis hashes store strings** — must `json.dumps()` dict/list fields in `sync_task`, `json.loads()` on read
529
+ 3. **FastMCP lifespan API** — pin FastMCP version; verify `ctx.lifespan_context` returns the dict from `yield`
530
+ 4. **stdio + long-lived connections** — MCP server process stays alive for entire Claude Code session; asyncpg pool handles reconnection; Redis should use `retry_on_timeout=True`
531
+ 5. **Migration timing** — `loom up` must wait for Postgres healthcheck before running migrations; use retry with backoff
532
+ 6. **LOOM_PROJECT_DIR** — MCP server reads config from this env var (set in `.mcp.json`); `load_config` must accept directory parameter
533
+
534
+ ---
535
+
536
+ ## What Phase 1 Does NOT Include
537
+
538
+ - Skills system (Phase 2)
539
+ - Decomposition logic (Phase 2, `loom_decompose` is a stub)
540
+ - Workflows (Phase 2+)
541
+ - Orchestrator agent loop (Phase 3)
542
+ - Claim TTL / auto-release (Phase 3)
543
+ - GCP deployment (Phase 4)
544
+ - Slack/Obsidian integrations (Phase 4)