cardinal-ai 0.0.1 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -0
  3. data/README.md +50 -29
  4. data/Rakefile +6 -0
  5. data/app/assets/stylesheets/application.css +10 -0
  6. data/app/assets/stylesheets/cardinal.css +530 -0
  7. data/app/controllers/application_controller.rb +7 -0
  8. data/app/controllers/boards_controller.rb +5 -0
  9. data/app/controllers/cards_controller.rb +129 -0
  10. data/app/controllers/columns_controller.rb +130 -0
  11. data/app/controllers/messages_controller.rb +25 -0
  12. data/app/controllers/runs_controller.rb +58 -0
  13. data/app/helpers/application_helper.rb +35 -0
  14. data/app/javascript/application.js +2 -0
  15. data/app/javascript/controllers/application.js +7 -0
  16. data/app/javascript/controllers/autosave_controller.js +43 -0
  17. data/app/javascript/controllers/board_column_controller.js +96 -0
  18. data/app/javascript/controllers/clipboard_controller.js +18 -0
  19. data/app/javascript/controllers/composer_controller.js +10 -0
  20. data/app/javascript/controllers/index.js +3 -0
  21. data/app/javascript/controllers/modal_controller.js +45 -0
  22. data/app/javascript/controllers/reveal_controller.js +15 -0
  23. data/app/javascript/controllers/scroll_controller.js +44 -0
  24. data/app/javascript/controllers/tags_controller.js +49 -0
  25. data/app/javascript/controllers/theme_controller.js +43 -0
  26. data/app/javascript/controllers/tooltip_controller.js +37 -0
  27. data/app/jobs/ai_task_job.rb +26 -0
  28. data/app/jobs/application_job.rb +7 -0
  29. data/app/jobs/assistant_reply_job.rb +132 -0
  30. data/app/jobs/mark_pr_ready_job.rb +18 -0
  31. data/app/jobs/merge_pr_job.rb +27 -0
  32. data/app/jobs/resume_run_job.rb +30 -0
  33. data/app/jobs/start_run_job.rb +13 -0
  34. data/app/mailers/application_mailer.rb +4 -0
  35. data/app/models/agent_session.rb +8 -0
  36. data/app/models/application_record.rb +3 -0
  37. data/app/models/artifact.rb +8 -0
  38. data/app/models/board.rb +92 -0
  39. data/app/models/card.rb +83 -0
  40. data/app/models/column.rb +134 -0
  41. data/app/models/event.rb +44 -0
  42. data/app/models/run.rb +28 -0
  43. data/app/services/agent/runner.rb +379 -0
  44. data/app/services/agent/workspace.rb +138 -0
  45. data/app/services/card_transition.rb +97 -0
  46. data/app/services/claude_cli.rb +89 -0
  47. data/app/services/rules/compiler.rb +55 -0
  48. data/app/services/rules.rb +92 -0
  49. data/app/services/run_sweeper.rb +53 -0
  50. data/app/views/boards/show.html.erb +79 -0
  51. data/app/views/cards/_card.html.erb +48 -0
  52. data/app/views/cards/_detail.html.erb +190 -0
  53. data/app/views/cards/_tag_picker.html.erb +12 -0
  54. data/app/views/cards/new.html.erb +35 -0
  55. data/app/views/cards/show.html.erb +3 -0
  56. data/app/views/columns/_column.html.erb +25 -0
  57. data/app/views/columns/edit.html.erb +146 -0
  58. data/app/views/events/_event.html.erb +29 -0
  59. data/app/views/layouts/application.html.erb +46 -0
  60. data/app/views/layouts/mailer.html.erb +13 -0
  61. data/app/views/layouts/mailer.text.erb +1 -0
  62. data/app/views/pwa/manifest.json.erb +22 -0
  63. data/app/views/pwa/service-worker.js +26 -0
  64. data/bin/rails +4 -0
  65. data/bin/rake +4 -0
  66. data/cardinal.md +695 -0
  67. data/config/application.rb +60 -0
  68. data/config/boot.rb +13 -0
  69. data/config/bundler-audit.yml +5 -0
  70. data/config/cable.yml +13 -0
  71. data/config/ci.rb +20 -0
  72. data/config/credentials.yml.enc +1 -0
  73. data/config/database.yml +31 -0
  74. data/config/environment.rb +5 -0
  75. data/config/environments/development.rb +78 -0
  76. data/config/environments/production.rb +89 -0
  77. data/config/environments/test.rb +53 -0
  78. data/config/importmap.rb +6 -0
  79. data/config/initializers/assets.rb +7 -0
  80. data/config/initializers/cardinal_bootstrap.rb +12 -0
  81. data/config/initializers/cardinal_instance.rb +20 -0
  82. data/config/initializers/content_security_policy.rb +29 -0
  83. data/config/initializers/filter_parameter_logging.rb +8 -0
  84. data/config/initializers/inflections.rb +16 -0
  85. data/config/initializers/run_sweeper.rb +17 -0
  86. data/config/locales/en.yml +31 -0
  87. data/config/puma.rb +42 -0
  88. data/config/routes.rb +22 -0
  89. data/config/storage.yml +27 -0
  90. data/config.ru +6 -0
  91. data/db/migrate/20260703000001_create_cardinal_schema.rb +78 -0
  92. data/db/migrate/20260703000002_add_agent_runner_fields.rb +7 -0
  93. data/db/migrate/20260704000001_add_parent_to_cards.rb +5 -0
  94. data/db/migrate/20260704000002_add_assistant_session_to_cards.rb +5 -0
  95. data/db/seeds.rb +13 -0
  96. data/docker/agent/Dockerfile +16 -0
  97. data/exe/cardinal +111 -0
  98. data/lib/cardinal/version.rb +1 -1
  99. data/public/400.html +135 -0
  100. data/public/404.html +135 -0
  101. data/public/406-unsupported-browser.html +135 -0
  102. data/public/422.html +135 -0
  103. data/public/500.html +135 -0
  104. data/public/icon.png +0 -0
  105. data/public/icon.svg +3 -0
  106. data/public/robots.txt +1 -0
  107. data/vendor/javascript/sortablejs.js +3378 -0
  108. metadata +236 -9
data/cardinal.md ADDED
@@ -0,0 +1,695 @@
1
+ # Cardinal — Feature Alignment Document
2
+
3
+ **One-liner:** A Kanban board where columns change what a card *is*. Cards start as passive
4
+ ideas, become active AI workers when dragged into an execution column, and become reviewable
5
+ artifacts when the work is done. The board is not a task tracker — it is a control surface
6
+ for a team of AI agents.
7
+
8
+ **Status:** Draft for alignment. Nothing here is committed; the point is to agree on the
9
+ model before writing code.
10
+
11
+ ---
12
+
13
+ ## 1. Core insight: columns are policies, not labels
14
+
15
+ In a normal Kanban board a column is a label. In Cardinal a column is a **policy object**
16
+ that answers three questions:
17
+
18
+ 1. **Who services cards here?** (nobody / a shared planning assistant / a dedicated per-card agent)
19
+ 2. **What happens on entry?** (nothing / open a discussion / spawn an agent and start a run)
20
+ 3. **What is allowed here?** (chat only / read-only research / real actions with tool access)
21
+
22
+ This is the single most important modeling decision. It means:
23
+
24
+ - Board behavior is **data, not code**. Users can eventually add a second execution column
25
+ ("Research" with read-only tools, "Build" with write tools) without new application logic.
26
+ - Moving a card is a **transition event** with well-defined semantics: leave-policy of the old
27
+ column runs (e.g., pause/detach the agent), enter-policy of the new column runs (e.g., spawn
28
+ agent, start run).
29
+ - The UI can render affordances directly from the policy ("dragging here will start an agent
30
+ with these permissions") instead of hardcoding column names.
31
+
32
+ Suggested column archetypes (a column has exactly one):
33
+
34
+ | Archetype | Serviced by | On card entry |
35
+ |-------------|------------------------|----------------------------------------|
36
+ | `inbox` | nobody | nothing — parking lot |
37
+ | `planning` | shared board assistant | assistant joins the card's conversation |
38
+ | `execution` | dedicated card agent | spawn agent, start a Run |
39
+ | `review` | human | agent stops; card presents outputs for verdict |
40
+ | `terminal` | nobody | archive/lock the conversation |
41
+
42
+ The default board ships as: **Tasks (inbox) → Planning (planning) → In Progress (execution)
43
+ → Review (review) → QA (review; on entry the PR leaves draft) → Done (terminal)**.
44
+
45
+ **Every column gets a gear icon** that opens a settings modal — this is the entire admin
46
+ surface for the policy object: column name, archetype, instructions (a system-prompt
47
+ addendum given to any agent servicing cards in this column), model choice, WIP/concurrency
48
+ limit, tool permissions, plan-approval toggle, budgets, and entry/exit automations. Adding
49
+ a column is just creating a new policy; there is nothing special about the five defaults.
50
+ (See §14 for the modal layout.)
51
+
52
+ ---
53
+
54
+ ## 2. Domain model
55
+
56
+ ```
57
+ Board (repo_url, default_branch — a board is bound to one git repo; see §13)
58
+ ├── Columns (ordered; each has an archetype + policy: agent config, tool
59
+ │ permissions, WIP limit, auto-transition rules — edited via gear modal)
60
+ └── Cards
61
+ ├── belongs_to :column (position within column for ordering)
62
+ ├── branch_name, pr_url, pr_state (each card is its own branch + PR; see §13)
63
+ ├── tags, description (freeform metadata; more fields later)
64
+ ├── Conversation (exactly one, permanent — survives all column moves)
65
+ │ └── Events (append-only timeline; see §7)
66
+ ├── AgentSessions (0..n; one per visit to an execution column)
67
+ │ ├── workspace: a cage-style throwaway Docker container (repo mounted,
68
+ │ │ card branch checked out; see §13)
69
+ │ └── Runs (1..n per session; one per "go do work" invocation)
70
+ │ ├── Events (written into the card's single timeline, tagged with run_id)
71
+ │ └── Artifacts (files, diffs, links, documents — the outputs)
72
+ └── status (cached state machine value, denormalized for board rendering)
73
+ ```
74
+
75
+ Key relationships and rules:
76
+
77
+ - **Card : Conversation is 1:1 and permanent.** The conversation is the card's memory. The
78
+ planning assistant writes into it, the execution agent reads it as briefing context and
79
+ writes into it, the human writes into it at every stage. Nothing is ever in a side channel.
80
+ - **AgentSession** is the identity of "this card's dedicated AI." It owns the agent's
81
+ working state (working directory / sandbox handle, model config, accumulated context
82
+ pointer). A card dragged back into execution after revisions gets a *new* Run under the
83
+ same session if the session is resumable, or a new session if not.
84
+ - **Run** is one bounded attempt: started → (streaming events) → finished with a result
85
+ (`succeeded | failed | cancelled | needs_input`). Runs are the unit of retry, cost
86
+ accounting, and audit. Never mutate a run's events; append.
87
+ - **Artifact** is a first-class output record (file, patch, URL, rendered document) attached
88
+ to a run. Review columns render artifacts, not raw chat logs.
89
+ - **Event** is the single append-only log entry type (see §7). Everything the user sees in a
90
+ card — human messages, agent messages, status changes, tool calls, questions, column moves —
91
+ is an event. One table, one ordering, one rendering pipeline.
92
+
93
+ ---
94
+
95
+ ## 3. Card lifecycle
96
+
97
+ The card has one state machine; column archetype constrains which states are legal.
98
+
99
+ ```
100
+ draft ──► discussing ──► queued ──► working ──┬──► needs_input ──► working
101
+ ▲ ▲ │ │
102
+ │ │ │ ├──► blocked (external dependency)
103
+ │ │ │ │
104
+ │ └──── revising ◄───┐ │ └──► failed ──► (retry ⇒ queued)
105
+ │ │ ▼
106
+ └── (any) ◄── archived changes_requested ◄── in_review ◄── work_complete
107
+
108
+ └──► approved ──► done
109
+ ```
110
+
111
+ Rules of thumb:
112
+
113
+ - **Column move is the trigger; state machine is the truth.** Dragging a card into
114
+ In Progress sets `queued`; the runner picks it up and sets `working`. Dragging out of an
115
+ execution column mid-run prompts: *cancel the run* or *let it finish in place* (card
116
+ refuses to move until the user picks — no silent kills).
117
+ - **The agent finishes, the human moves the card — by default.** When a run succeeds the
118
+ card goes to `work_complete` and visually signals "ready for review," but auto-advancing
119
+ to the Review column is a per-column policy toggle (off in MVP). Physical card motion the
120
+ user didn't cause is disorienting; do it only when explicitly enabled.
121
+ - **`needs_input` is a first-class state, not a failure.** Agents will constantly need
122
+ clarification. The run parks, the card shows a prominent "waiting on you" badge, the
123
+ question is the newest event. Answering resumes the same run.
124
+ - **Rejection is a loop, not a dead end.** In Review, "request changes" adds a human event
125
+ describing what's wrong and sets `changes_requested`; dragging back to In Progress starts
126
+ a new run whose briefing includes the rejection feedback.
127
+
128
+ ---
129
+
130
+ ## 4. What happens when a card enters an execution column
131
+
132
+ Ordered, and each step is observable in the card's timeline:
133
+
134
+ 1. **Snapshot the briefing.** Compile card title + description + the planning conversation
135
+ + any prior run summaries + rejection feedback into a structured brief. Store it on the
136
+ Run (immutable). This is what the agent actually receives — the user can inspect it.
137
+ 2. **Pre-flight gate.** If the column policy requires it (default: yes), the card enters
138
+ `queued` and shows a **plan-of-attack confirmation**: the agent's first action is to post
139
+ a short "here is what I intend to do" event and wait for a 👍. One click to approve, or
140
+ reply to redirect. (Toggleable per column for trusted/low-stakes work.)
141
+ 3. **Provision the session.** Create/resume the AgentSession: sandbox or working directory,
142
+ tool permissions from column policy, budget caps.
143
+ 4. **Start the Run.** Enqueue a job; the runner drives the agent loop. Every agent message,
144
+ tool call, and status change streams into the card as events in real time.
145
+ 5. **Terminate deliberately.** The run ends in exactly one of: `succeeded` (agent posted a
146
+ **final report event** + artifacts), `needs_input`, `failed` (error + last-known state),
147
+ or `cancelled`. There is no "the agent just stopped talking" state — the runner enforces
148
+ a final event.
149
+
150
+ ---
151
+
152
+ ## 5. Shared column agent vs. dedicated card agent
153
+
154
+ Two genuinely different constructs — don't unify them into one "agent" abstraction:
155
+
156
+ | | Planning assistant (column-level) | Worker agent (card-level) |
157
+ |------------------------|-------------------------------------------|----------------------------------------|
158
+ | Cardinality | One per board | One per card (AgentSession) |
159
+ | Lifetime | Always available, stateless between cards | Created on column entry, bounded by runs |
160
+ | Context | The one card's conversation it's invoked in | Full briefing + working state + tools |
161
+ | Tools | None (chat only) — maybe read-only board ops | Real tools per column policy |
162
+ | Invocation | Reactive: responds when the user writes | Proactive: works autonomously until done |
163
+ | Cost profile | Cheap, fast model | Expensive, capable model |
164
+
165
+ Implementation consequence: the planning assistant is a plain synchronous-ish chat completion
166
+ against the card's conversation (a small job per message). The worker agent is a long-running
167
+ agentic loop with tool use, checkpointing, and streaming. Different code paths, same Event
168
+ timeline.
169
+
170
+ The planning assistant's most valuable output is a **crisp brief**: it should actively drive
171
+ toward "acceptance criteria are clear, scope is bounded" and can offer a *"Ready for
172
+ execution"* summary event that becomes the top of the briefing when the card moves.
173
+
174
+ ---
175
+
176
+ ## 6. UI: making state legible at a glance
177
+
178
+ The board must answer "who needs me?" in one glance. Card states map to a fixed visual
179
+ vocabulary (color + icon + animation), consistent everywhere:
180
+
181
+ | State | Treatment |
182
+ |---------------------|------------------------------------------------------------------|
183
+ | `draft` | Plain, muted |
184
+ | `discussing` | Chat glyph; subtle highlight when assistant has replied unread |
185
+ | `queued` | Clock glyph, dimmed pulse |
186
+ | `working` | **Animated indicator (breathing border / spinner) + live one-line status** ("running tests…") sourced from the latest progress event |
187
+ | `needs_input` | **Loud.** Amber, question-mark badge, card floats to top of column, board-level attention counter increments |
188
+ | `blocked` | Red-amber, "blocked: <reason>" chip |
189
+ | `failed` | Red, error chip, one-click "view failure / retry" |
190
+ | `work_complete` | Green check, "ready for review" chip |
191
+ | `in_review` | Eye glyph |
192
+ | `changes_requested` | Amber-red loop glyph |
193
+ | `done` / `archived` | Muted, checkmark |
194
+
195
+ Board-level chrome:
196
+
197
+ - **Attention inbox** (header): a single ordered list of every card that is waiting on the
198
+ human (`needs_input`, `failed`, `work_complete`). This is the primary navigation surface
199
+ once >3 agents run concurrently — the board becomes the map, the inbox becomes the queue.
200
+ - **Activity ticker** per execution column: "3 running · 1 waiting on you · 2 queued".
201
+ - Cards in `working` state show their **latest progress line directly on the card face** —
202
+ the user should never have to open a card to know roughly what it's doing.
203
+
204
+ Card detail view is a two-pane layout: **timeline** (the conversation/log, §7) and a
205
+ **work panel** (current run status, plan, artifacts, controls: pause / cancel / retry /
206
+ approve plan / answer question).
207
+
208
+ ---
209
+
210
+ ## 7. The card timeline: one log, typed events, aggressive collapsing
211
+
212
+ Single append-only stream of typed events. One table, polymorphic-ish `kind` + JSON payload:
213
+
214
+ ```
215
+ Event(card_id, run_id?, kind, actor, payload, created_at)
216
+
217
+ kinds:
218
+ user_message agent_message assistant_message (planning)
219
+ status_change column_move plan_proposed plan_approved
220
+ question answer progress (one-liners)
221
+ tool_call tool_result artifact_created
222
+ run_started run_finished final_report error
223
+ ```
224
+
225
+ Rendering rules (this is what keeps the card readable):
226
+
227
+ - **Three zoom levels.** *Conversation view* (default): messages, questions, plans, final
228
+ reports, artifacts — the stuff a human should read. *Activity view*: + progress lines and
229
+ tool-call summaries, collapsed into expandable groups ("ran 14 commands ▸"). *Debug view*:
230
+ everything raw, including full tool payloads.
231
+ - **Runs are visually bracketed** — a run header/footer frames its events, so a card with
232
+ three attempts reads as three chapters, each ending in a final report or failure.
233
+ - **The final report is a first-class artifact**, not the last chat message: what was done,
234
+ what changed, what to check, open questions. Review UX is built on final reports +
235
+ artifacts; the timeline is the supporting evidence.
236
+ - Human messages are never collapsed. Agent chatter is always collapsible.
237
+
238
+ ---
239
+
240
+ ## 8. Keeping N concurrent agents from becoming chaos
241
+
242
+ Chaos control is mostly *throughput control* + *attention control*:
243
+
244
+ 1. **WIP limits are load-bearing.** Execution columns get a hard concurrent-run limit
245
+ (default 3). Cards beyond it queue in-column (`queued`, visibly ordered). This is both a
246
+ UX guardrail and the natural backpressure for the job system.
247
+ 2. **One global run queue, per-board concurrency.** The runner respects board + column
248
+ limits. Priority = column position (top of column runs first) so the user reorders the
249
+ queue by dragging — no separate priority UI.
250
+ 3. **Attention inbox** (§6) serializes human interrupts. Agents park in `needs_input`
251
+ indefinitely without burning tokens.
252
+ 4. **Budgets:** per-run token/cost cap and wall-clock timeout (column policy). Hitting a cap
253
+ → `needs_input` with "I've used my budget, here's where I am — continue?" Never silent
254
+ death, never runaway spend.
255
+ 5. **Isolation by default.** Each AgentSession gets its own sandbox/workspace. Two agents
256
+ never share mutable state in MVP. Cross-card dependencies ("blocked by card X") are a
257
+ later feature — model as an explicit edge, not shared state.
258
+ 6. **Notifications are batched and quiet** except `needs_input` and `failed`, which are
259
+ immediate.
260
+
261
+ ---
262
+
263
+ ## 9. Permissions, controls, and safety rails
264
+
265
+ Layered, all enforced server-side in the runner (never trust the agent's self-restraint):
266
+
267
+ 1. **Column tool policy** — the permission boundary the user reasons about. Each execution
268
+ column declares allowed tool classes: read-only research / file & workspace writes /
269
+ network / external side-effects (email, deploy, purchases). MVP ships read+write
270
+ workspace tools, nothing externally irreversible.
271
+ 2. **Plan approval gate** (§4) — default on. The user sees intent before action.
272
+ 3. **Action-level approval for flagged tools.** Any tool marked `requires_approval` in the
273
+ column policy pauses the run into `needs_input` with a concrete "may I run X?" event.
274
+ Approvals can be remembered per-card ("allow `git push` for this card").
275
+ 4. **Budgets and timeouts** (§8) as hard caps.
276
+ 5. **Kill switches at every level:** pause/cancel a run, a card, a column ("pause all"), or
277
+ the board. Cancel is graceful (agent gets a moment to checkpoint + post a wrap-up event)
278
+ with a hard-kill fallback.
279
+ 6. **Full audit trail for free:** the event log *is* the audit log — every tool call and
280
+ result is an event tied to a run and an actor.
281
+ 7. **Sandboxing:** every agent workspace is a **cage-style throwaway Docker container** —
282
+ the repo checked out inside, the card's branch active, host isolated, destroyed after
283
+ the session. The only thing that leaves the container is what gets pushed to the card's
284
+ branch. Secrets injected per-column policy, never stored in conversation context.
285
+
286
+ ---
287
+
288
+ ## 10. MVP scope
289
+
290
+ **In:**
291
+
292
+ - One board bound to one git repo, defaulting to five columns (Ideas / Planning /
293
+ In Progress / Review / Done). Columns are addable/editable via the gear-icon settings
294
+ modal (§14) — the policy model is user-facing from day one.
295
+ - Cards: create, edit, tag, drag between columns, manual ordering. Each card gets its own
296
+ branch and PR (§13).
297
+ - Planning assistant: chat in the card in the Planning column; produces a "ready for
298
+ execution" brief.
299
+ - Execution: dedicated agent per card in a cage-style container, real runs with streaming
300
+ events, plan-approval gate, `needs_input` round-trips, final report + artifacts. **MVP
301
+ domain is coding against the board's git repo** (decided — see §15): work is committed
302
+ to the card's branch and surfaced as a PR.
303
+ - Card timeline with conversation/activity zoom levels.
304
+ - States + full visual vocabulary; attention inbox in the header.
305
+ - Concurrency limit (global, e.g. 3), per-run token budget + timeout, cancel/retry.
306
+ - Single user, no auth beyond a login, no billing.
307
+
308
+ **Explicitly out (post-MVP):**
309
+
310
+ - Custom boards/columns UI, multiple boards, multi-user/roles, cross-card dependencies,
311
+ agent-to-agent communication, auto-advancing cards, scheduled/recurring cards, external
312
+ side-effect tools (email/deploy), mobile.
313
+
314
+ **MVP demo script (the bar for "done"):** create a card → refine it with the planning
315
+ assistant → drag to In Progress → approve the agent's plan → watch live progress on the
316
+ card face → answer one clarifying question → get a final report with a diff artifact →
317
+ drag to Review → request a change → drag back → second run fixes it → approve → Done.
318
+
319
+ ---
320
+
321
+ ## 11. Technical architecture (Rails + JS)
322
+
323
+ ### Shape
324
+
325
+ ```
326
+ Browser (Hotwire: Turbo Streams + Stimulus; board DnD via SortableJS)
327
+ │ websocket (ActionCable / SolidCable)
328
+ Rails app ── SQLite in .cardinal/ (system of record: boards, columns, cards, events, runs…)
329
+
330
+ Job backend (SolidQueue or Sidekiq) ── RunnerJob per Run
331
+
332
+ Agent runtime: Claude Agent SDK subprocess per run (or raw Anthropic API loop)
333
+ └── sandboxed workspace per AgentSession (Docker container or scoped dir)
334
+ ```
335
+
336
+ ### Frontend: Hotwire first
337
+
338
+ The UI is fundamentally "server state streamed to the client": cards changing status,
339
+ events appending to timelines. Turbo Streams over ActionCable does exactly this with almost
340
+ no client state management — `broadcast_append_to card` for events, `broadcast_replace_to
341
+ board` for card face updates. Stimulus + SortableJS covers drag-and-drop (POST the move,
342
+ server validates the transition, broadcasts the result). Reach for React only if the board
343
+ interaction gets genuinely app-like later; don't start there.
344
+
345
+ ### Backend pieces
346
+
347
+ - **Models:** `Board, Column, Card, Event, AgentSession, Run, Artifact` per §2. Card state
348
+ machine via a small hand-rolled `state` enum + transition methods (AASM optional). Column
349
+ policy as a JSON column on `columns` (schema-validated), archetypes as an enum.
350
+ - **Transitions:** a `CardTransition` service object is the only code path that moves cards
351
+ between columns — validates legality, runs leave/enter policies, emits `column_move` and
352
+ `status_change` events, enqueues runs. Controllers and (later) automations all call it.
353
+ - **Runner:** `Run` row is the source of truth; `RunnerJob` (one per run) drives the agent,
354
+ translating agent output into Events as it streams. Heartbeat column on `runs` +
355
+ a sweeper job to catch dead runners → mark run `failed` honestly. Concurrency limits
356
+ enforced at dequeue time (count running runs per board/column before starting).
357
+ - **Agent runtime (decided):** the Claude **Agent SDK** run as a supervised subprocess
358
+ *inside the card's cage container*; it gives tool-use loops, streaming, and permission
359
+ hooks out of the box. The Rails runner only *supervises* — provision container, spawn,
360
+ stream-parse output into Events, enforce budgets, kill, tear down. The container boundary
361
+ doubles as the sandbox: Rails talks to it over the Docker API / exec stream, and the
362
+ agent's only exit path for work is `git push` to the card branch.
363
+ - **Approvals/interrupts:** run parks by setting `needs_input` and *exiting the job*
364
+ (persist a resume token / session id); answering enqueues a resume job. Don't hold a job
365
+ thread open waiting on a human.
366
+ - **Planning assistant:** a plain `AssistantReplyJob` per user message in planning columns —
367
+ one Messages API call with the card conversation, append the reply event. No session, no
368
+ tools. Cheap model.
369
+ - **Streaming UX:** Events written to Postgres → Turbo Stream broadcasts on the card and
370
+ board channels. For token-by-token agent text, buffer and flush progress events every
371
+ ~1–2s rather than streaming raw tokens through the DB; per-token streaming is post-MVP
372
+ polish, not architecture.
373
+
374
+ ### Why this holds up
375
+
376
+ A single SQLite file in `.cardinal/` as the system of record (events included) keeps ops at
377
+ zero — no database server, no Redis; SolidQueue/SolidCable ride on the same engine, and the
378
+ whole instance is one directory (§16). The event table will grow; it's append-only and
379
+ easily partitioned/archived later. The runner/SDK boundary means the "AI part" is swappable
380
+ without touching the product model.
381
+
382
+ ---
383
+
384
+ ## 12. Open questions to align on
385
+
386
+ (Resolved questions move to the decision log, §15.)
387
+
388
+ _None currently — next open items will come out of implementation._
389
+
390
+ ---
391
+
392
+ ## 13. Git & workspace model: card = branch = PR
393
+
394
+ Cardinal is tightly coupled to a git repo. A board is bound to exactly one repo
395
+ (`repo_url`, `default_branch`) — "multiple boards" later maps naturally to
396
+ one-board-per-repo, Asana-style.
397
+
398
+ **Per-card git lifecycle:**
399
+
400
+ 1. Card is created → nothing happens in git. Branches are cheap but noise isn't; the
401
+ branch is created on first entry into an execution column.
402
+ 2. First entry into execution → runner provisions a **cage-style throwaway container**:
403
+ clone (or cached fetch of) the repo, create `cardinal/<card-number>-<slug>` from the
404
+ board's default branch, check it out. The container is the agent's entire world.
405
+ 3. During the run the agent commits early and often to the card branch. Pushes go to the
406
+ remote card branch; a **draft PR** is opened on first push (pending open question §12.3).
407
+ The PR description is maintained by the runner: card title, link back to the card,
408
+ latest final report.
409
+ 4. `work_complete` → final push, PR marked ready for review. The PR diff *is* the primary
410
+ artifact; the final report event links to it.
411
+ 5. Revisions (`changes_requested` → re-entry into execution) → new run, **same branch**,
412
+ new commits. The PR accumulates the whole story, just like a human's PR would.
413
+ 6. Approval → merge (recommended: as the Done column's entry policy — see §12.4), branch
414
+ deleted, card `done`. Rejection/abandonment → card archived, PR closed, branch deleted.
415
+ 7. Session teardown → container destroyed. Anything not pushed is gone, by design: **the
416
+ branch is the only durable output channel**, which makes the audit story trivial.
417
+
418
+ **Why cage-style containers are the right sandbox:**
419
+
420
+ - Isolation is the default, not a policy to enforce — the agent physically cannot touch
421
+ the host, other cards' workspaces, or the repo outside its branch.
422
+ - Teardown is `docker rm`, so failed/abandoned runs leave zero residue.
423
+ - The container image bakes in the toolchain (git, language runtimes, Agent SDK), so
424
+ provisioning is seconds, not a setup script per run.
425
+ - A per-card session log inside the workspace (cage's `.cage` pattern) doubles as an
426
+ agent-facing scratch memory across runs *within* a session.
427
+
428
+ **Conflict posture (MVP):** cards are assumed independent. If a card branch falls behind
429
+ the default branch, the agent rebases at run start; a rebase conflict is a `needs_input`
430
+ event, not something the agent resolves silently. Cross-card coordination is post-MVP.
431
+
432
+ ---
433
+
434
+ ## 14. UI / UX specification
435
+
436
+ Design principle: **the board answers "who needs me?", the card answers "what happened?",
437
+ the gear answers "what are the rules here?"** Everything below serves one of those three.
438
+
439
+ ### 14.1 Board view
440
+
441
+ ```
442
+ ┌ Cardinal ▸ sidekick-app ──────────────────────────────── ⚠ 2 need you ▾ ─ + Card ┐
443
+ │ │
444
+ │ Ideas │ Planning │ In Progress ⚙ │ Review ⚙ │ Done ⚙ │
445
+ │ │ │ 2 running · 1 queued │ 1 ready │ │
446
+ │ ┌───────────┐ │ ┌──────────┐ │ ┌───────────────────┐ │ ┌───────────────┐ │ ┌─────┐ │
447
+ │ │ Dark mode │ │ │ CSV │ │ │ #14 Add rate │ │ │ #11 Fix login │ │ │ #8 ✓│ │
448
+ │ │ │ │ │ export ● │ │ │ limiting ⚡ │ │ │ redirect ✅│ │ └─────┘ │
449
+ │ └───────────┘ │ │ 2 unread │ │ │ ▸ running tests… │ │ │ PR #52 ready │ │ ┌─────┐ │
450
+ │ ┌───────────┐ │ └──────────┘ │ │ 🌿#61 ⏱14m 💰$0.87│ │ └───────────────┘ │ │ #5 ✓│ │
451
+ │ │ Onboard │ │ │ └───────────────────┘ │ │ └─────┘ │
452
+ │ │ emails │ │ │ ┌───────────────────┐ │ │ │
453
+ │ └───────────┘ │ │ │ #17 Webhook │ │ │ │
454
+ │ │ │ │ retries ❓│ │ │ │
455
+ │ │ │ │ waiting on you 8m │ │ │ │
456
+ │ │ │ └───────────────────┘ │ │ │
457
+ │ │ │ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ │ │ │
458
+ │ │ │ #19 queued (1st) │ │ │
459
+ │ │ │ └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ │ │ │
460
+ └───────────────────────────────────────────────────────────────────────────────────┘
461
+ ```
462
+
463
+ - **Card faces are status instruments.** An executing card shows: state glyph (⚡ working,
464
+ ❓ needs input, ✅ ready for review, ✖ failed), the live one-line progress event, branch/PR
465
+ chip, elapsed time and spend. Idle cards are just title + tags.
466
+ - **Queued cards render ghosted** with their queue position; dragging within the column
467
+ reorders the queue.
468
+ - **Column headers carry the activity ticker** and the gear. Execution/review archetypes
469
+ get subtle background tinting so "where behavior changes" is visible board-wide.
470
+ - **Attention dropdown** (header, `⚠ n need you`): ordered list of cards in `needs_input` /
471
+ `failed` / `work_complete`; click jumps to the card with the relevant event focused.
472
+ This is the primary work queue once several agents run at once.
473
+ - Drag affordances: while dragging, each column highlights with a one-line consequence —
474
+ *"In Progress: an agent will be assigned and start work"*, *"Done: PR will be merged"*.
475
+ The policy model makes these strings derivable, and it teaches the product's core idea
476
+ at exactly the right moment.
477
+
478
+ ### 14.2 Card detail (opens as a wide modal / side panel)
479
+
480
+ ```
481
+ ┌ #14 Add rate limiting ──────────────────────────────── ⚡ working · Run 2 ── ✕ ─┐
482
+ │ tags: backend, security 🌿 cardinal/14-rate-limiting → PR #61 (draft) │
483
+ ├────────────────────────────────────────────┬─────────────────────────────────────┤
484
+ │ TIMELINE [Conversation|Activity|Debug] │ WORK PANEL │
485
+ │ │ │
486
+ │ ── Run 1 ───────────────── failed ── │ Status: working (14m) · $0.87 │
487
+ │ ▸ 23 events (collapsed) │ Plan: ✓ approved 14m ago │
488
+ │ ── Run 2 ───────────────── running ── │ 1. ✓ add Rack::Attack │
489
+ │ 🤖 Plan: I'll add Rack::Attack with… │ 2. ✓ configure per-endpoint limits │
490
+ │ 👤 approved · redirect: skip /health │ 3. ▶ write request specs │
491
+ │ 🤖 progress: configured throttles │ 4. · update README │
492
+ │ ▸ ran 9 commands (collapsed) │ │
493
+ │ 🤖 progress: running request specs… │ Artifacts: │
494
+ │ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ │ · PR #61 — 6 files, +214 −12 │
495
+ │ [ Message the agent… ⏎ ] │ · rate_limits.md (report) │
496
+ │ │ │
497
+ │ │ [⏸ Pause] [✖ Cancel run] [↻ Retry] │
498
+ └────────────────────────────────────────────┴─────────────────────────────────────┘
499
+ ```
500
+
501
+ - Timeline zoom tabs per §7; typing in the message box mid-run delivers an interrupt/
502
+ steer event to the agent at its next checkpoint.
503
+ - The work panel is stage-aware: in Planning it shows the emerging brief; while working it
504
+ shows live plan progress; in Review it becomes the **review panel** — final report on
505
+ top, file-level diff summary, `[Approve] [Request changes]` buttons, deep link to the PR.
506
+ - "Request changes" focuses the message box with the feedback becoming both a timeline
507
+ event and the seed of the next run's briefing.
508
+
509
+ ### 14.3 Column settings modal (the gear)
510
+
511
+ ```
512
+ ┌ Column settings — “In Progress” ──────────────────────────────┐
513
+ │ Name [In Progress ] Archetype [execution ▾] │
514
+ │ │
515
+ │ Instructions (given to every agent working in this column) │
516
+ │ [ Follow the repo style guide. Write tests for all changes. ] │
517
+ │ │
518
+ │ Model [claude-sonnet ▾] Concurrency limit [3] │
519
+ │ Plan approval [● required] Budget/run [$2.00] │
520
+ │ Timeout/run [30 min] │
521
+ │ │
522
+ │ Tool permissions │
523
+ │ [✓] read & search workspace [✓] edit files │
524
+ │ [✓] run commands / tests [✓] git commit & push │
525
+ │ [ ] network access [ ] flagged tools (ask) │
526
+ │ │
527
+ │ Automations │
528
+ │ On entry: [start agent run ▾] │
529
+ │ On success:[stay + mark ready ▾] (or: move to → Review) │
530
+ │ │
531
+ │ [Cancel] [Save] │
532
+ └───────────────────────────────────────────────────────────────┘
533
+ ```
534
+
535
+ Fields shown/hidden by archetype: a `planning` column shows model + instructions only; a
536
+ `terminal` column shows just automations (e.g., *On entry: merge PR, delete branch*);
537
+ `inbox` shows nothing but the name. The modal **is** the policy editor — there is no other
538
+ admin surface.
539
+
540
+ ### 14.4 The canonical workflow, end to end
541
+
542
+ 1. **Capture** — `+ Card` → "Add rate limiting to the API" lands in Ideas. Passive.
543
+ 2. **Shape** — drag to Planning. The board assistant engages in the card: asks which
544
+ endpoints, agrees limits, writes acceptance criteria, posts a *Ready for execution*
545
+ brief event.
546
+ 3. **Launch** — drag to In Progress. Consequence hint shown during the drag. Card goes
547
+ `queued` → container provisioned, branch `cardinal/14-rate-limiting` created → agent
548
+ posts its plan → user taps 👍 (or redirects).
549
+ 4. **Work** — card face shows live progress. Agent commits/pushes; draft PR opens. A
550
+ question ("skip /health from throttling?") parks the card in `needs_input`, the
551
+ attention counter increments, the user answers from the attention dropdown, the run
552
+ resumes.
553
+ 5. **Deliver** — run succeeds: final report + PR marked ready; card shows ✅ ready for
554
+ review.
555
+ 6. **Review** — drag to Review. Work panel shows report + diff summary; user checks the
556
+ PR, requests one change; card → `changes_requested`; drag back to In Progress → Run 2
557
+ on the same branch fixes it.
558
+ 7. **Ship** — drag to Done. Terminal policy merges the PR and deletes the branch. Card
559
+ archives with its full timeline as the permanent record.
560
+
561
+ ---
562
+
563
+ ## 15. Decision log
564
+
565
+ - **2026-07-03** — Columns-as-policies confirmed as the core model; per-column gear modal
566
+ is the entire policy admin surface. Card-as-agent confirmed: execution-column entry
567
+ policy provisions a dedicated agent bound to the card. Single append-only Event timeline
568
+ per card confirmed. Concurrency/WIP limits live in column policy, not global config.
569
+ **MVP work domain = coding against a git repo.** Board binds to one repo; **each card
570
+ is its own branch and PR** (`cardinal/<n>-<slug>`). Agent workspaces are **cage-style
571
+ throwaway Docker containers** (repo cloned inside, branch checked out, destroyed on
572
+ teardown; pushed commits are the only durable output). Agent runtime = Claude Agent SDK
573
+ as a supervised subprocess inside the container. Architecture confirmed: Rails +
574
+ Hotwire + Postgres (SolidQueue/SolidCable). Cards get tags/descriptions now, richer
575
+ metadata later; multi-board (one repo per board) is post-MVP. Next step agreed: nail
576
+ down UI/UX and workflow before scaffolding (§14 drafted).
577
+ - **2026-07-03 (night)** — Portable instances (§16) adopted enthusiastically: `cardinal up`
578
+ in any repo, engine in a cage-style container alongside the running app. `.cardinal/` is
579
+ **local-only** (hidden via `.git/info/exclude` at spin-up, never committed) — boards are
580
+ personal; Cardinal is a local tool, not an app you sign into. **Datastore switched from
581
+ Postgres to SQLite** living at `.cardinal/cardinal.db` — zero service dependency, no
582
+ collision with the host app's own database, one-directory portability; verified running
583
+ with Postgres stopped. UI: near-fullscreen card modal with editing, new-card modal from
584
+ full-width column button, gear icons wired to stub policy modals, Ideas→Tasks,
585
+ model/effort chips, full-height columns, + Column button.
586
+ - **2026-07-03 (later)** — Five review/git seam questions resolved per recommendation:
587
+ (1) plan-approval gate defaults ON for columns with write tools, per-column toggleable;
588
+ (2) human-drags-only is a product principle for MVP — no auto-advance;
589
+ (3) draft PR opens on first push, flipped to ready on `work_complete`;
590
+ (4) "approve" is a reversible verdict — **the merge is Done's entry policy**;
591
+ (5) review surface = in-card final report + file-level diff summary, deep link to the
592
+ GitHub PR for line-level review. Scaffolding started: Rails 8 + Ruby 3.4 (Fullstaq) +
593
+ Postgres 15 inside the cage container, repo at github.com/palamedes/cardinal.
594
+ - **2026-07-03 (de-magic pass)** — **Archetypes are templates, not magic.** Choosing an
595
+ archetype stamps concrete on-entry rules, rule text, and starter instructions into the
596
+ column's policy at creation (and re-stamps them when the archetype is switched in the
597
+ gear modal); there is no hidden runtime fallback — a blank on-entry box means nothing
598
+ happens, visibly. **Accept rails are explicit-only:** "Accepts moves from" is a
599
+ whitelist and an empty list means the column accepts from nowhere; there is no
600
+ permissive blank default. Default board: Done also accepts from Planning — dragging
601
+ planning→Done means "closed/terminated without work" (merge_pr on a card with no PR
602
+ just finalizes it).
603
+
604
+ ---
605
+
606
+ ## 16. Portable instances: Cardinal as a local tool you point at any repo (adopted)
607
+
608
+ **Decided 2026-07-03:** Cardinal's repo is the *engine*; `cardinal` (or `cardinal up`) run
609
+ inside any repo boots a cage-style Docker container against that repo and serves the board
610
+ on its own local port — living happily alongside the app that repo already runs (your app
611
+ keeps its ports, its database, its everything; Cardinal touches none of it).
612
+
613
+ **Cardinal is not an app you sign into.** It is a local tool for any coder at any level.
614
+ Boards are personal: your Cardinal tasks in a repo are *yours*, not your teammates'.
615
+
616
+ ### The `.cardinal/` directory — local-only, never committed
617
+
618
+ ```
619
+ any-repo/
620
+ └── .cardinal/ # created by spin-up, NEVER committed to the host repo
621
+ ├── cardinal.db # the board: cards, columns, policies, events, runs (SQLite, proposed)
622
+ └── workspaces/… # per-card agent working state, scratch, logs
623
+ ```
624
+
625
+ - Spin-up excludes `.cardinal/` via **`.git/info/exclude`** rather than editing the repo's
626
+ `.gitignore` — the tool must not dirty the host repo, and `.gitignore` edits are
627
+ themselves a diff someone might accidentally commit. `info/exclude` is per-clone and
628
+ invisible to everyone else.
629
+ - Because boards are personal and local, the earlier committed-files/sync-layer idea is
630
+ **dropped** — there is no second representation to reconcile. The on-disk store inside
631
+ `.cardinal/` *is* the board. (Human-readable export — `cardinal export` to markdown —
632
+ can come later as a view, not a store.)
633
+ - Portability falls out for free: the whole instance is one directory. Copy it = backup,
634
+ delete it = uninstall, move it = the board moves.
635
+
636
+ ### What this changes — and what it doesn't
637
+
638
+ - **Unchanged:** the entire domain model (§2), lifecycle (§3), runner design (§11),
639
+ column-as-policy (§1). `Board.repo_url` simply becomes "the repo I'm sitting in."
640
+ - **Changed:** deployment (hosted app → per-repo local instances) and datastore
641
+ (Postgres → per-instance SQLite inside `.cardinal/`, recommended — §12.1); "multiple
642
+ boards" resolves to one board per repo with no multi-board UI at all.
643
+ - **Card branches remain the collaboration surface.** Your board is private, but the work
644
+ it produces ships as ordinary branches and PRs — teammates see the output, not the board.
645
+
646
+
647
+ ---
648
+
649
+ ## 17. Column rules & the three tiers of AI (adopted 2026-07-03)
650
+
651
+ A column's `on_entry` policy is a **list of rule actions** fired whenever a card lands in
652
+ it (dispatched by `Rules.fire_entry`; `on_exit` later). Archetypes only supply *defaults* —
653
+ `planning` → `assistant_greeting`, `execution` → `start_agent_run`, `terminal` →
654
+ `merge_pr`. Any column can carry any rules, so behavior stays data, not code (§1).
655
+
656
+ This gives Cardinal three cleanly separated tiers of AI:
657
+
658
+ | Tier | Construct | Lifetime | Cost profile |
659
+ |---|---|---|---|
660
+ | **Planning assistant** | `AssistantReplyJob` — replies when the user writes on a planning-column card | one reply | cheap model, no tools |
661
+ | **Maintenance agents** | `ai_task` rules → `AiTaskJob` — one bounded Claude call with a prompt template (`%{title}`, `%{description}`, `%{conversation}`), output posted to the timeline | one call | cheap, no workspace, no tools |
662
+ | **Worker agent** | `start_agent_run` rule → `StartRunJob` → `Agent::Runner` — the card's dedicated agent | AgentSession + Runs | full workspace + tools |
663
+
664
+ Example maintenance rules (all just `{action: "ai_task", prompt: "..."}` in a column's
665
+ `on_entry`): auto-tag a card on capture, distill the planning conversation into a brief on
666
+ entry to execution, sanity-check acceptance criteria before an agent is assigned.
667
+
668
+ ### Runner implementation (v1, shipped)
669
+
670
+ `Agent::Runner` drives one Run: provisions an `Agent::Workspace` (today: isolated local
671
+ clone under `.cardinal/workspaces/card-N` with origin pointed at the board repo — the
672
+ cage-container strategy slots in behind the same interface once Cardinal runs where Docker
673
+ is available), spawns **`claude -p` with `--output-format stream-json`** (the Agent SDK
674
+ headless runtime) using the column's model/max_turns/timeout, translates the stream into
675
+ timeline events (`progress`, `tool_call`, `final_report`), then pushes the branch and
676
+ opens a **draft PR via `gh`**. Credentials (`GH_TOKEN` etc.) are stripped from the agent's
677
+ environment — the runner does the pushing, the agent only commits. Cancel = TERM the
678
+ recorded PID. WIP limits enforced at job start; a finishing run kicks the next queued card.
679
+
680
+ Proven end-to-end 2026-07-03: card #4 ("Document what a Cardinal worker agent is") →
681
+ queued → working → work_complete, one scoped commit, draft PR #2, $0.08 on Sonnet.
682
+
683
+ All v1 gaps closed overnight 2026-07-03→04: heartbeats + RunSweeper (dead runs reaped,
684
+ stuck cards repaired, queues re-kicked); `needs_input` round-trips via claude session
685
+ resume (QUESTION: protocol); plan-approval gate (read-only plan phase → approve/redirect →
686
+ execute, same session); review loop (approve / request-changes → revision runs on the same
687
+ branch); merge-on-Done (`gh pr ready` + squash-merge + branch delete as the terminal rule);
688
+ gear modal is the real policy editor (including on_entry rules JSON); engine test suite
689
+ (31 tests, subprocess stubbed); workspace strategy factory (Local default + experimental
690
+ cage-style Container behind CARDINAL_WORKSPACE=container, docker/agent image);
691
+ **`bin/cardinal` (`cardinal up`)** — portable per-repo instances per §16 with
692
+ .git/info/exclude hiding, per-target `.cardinal/` data dir, and first-run
693
+ `Board.bootstrap!` (credential-sanitized origin URL). Lifecycle proven live twice:
694
+ PR #2 (docs card: work → review → revision → approve → Done → squash-merged to main) and
695
+ PR #3 (motto card: plan → approve → QUESTION → answer → work_complete).