cardinal-ai 0.0.1 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -0
  3. data/README.md +50 -29
  4. data/Rakefile +6 -0
  5. data/app/assets/stylesheets/application.css +10 -0
  6. data/app/assets/stylesheets/cardinal.css +530 -0
  7. data/app/controllers/application_controller.rb +7 -0
  8. data/app/controllers/boards_controller.rb +5 -0
  9. data/app/controllers/cards_controller.rb +129 -0
  10. data/app/controllers/columns_controller.rb +130 -0
  11. data/app/controllers/messages_controller.rb +25 -0
  12. data/app/controllers/runs_controller.rb +58 -0
  13. data/app/helpers/application_helper.rb +35 -0
  14. data/app/javascript/application.js +2 -0
  15. data/app/javascript/controllers/application.js +7 -0
  16. data/app/javascript/controllers/autosave_controller.js +43 -0
  17. data/app/javascript/controllers/board_column_controller.js +96 -0
  18. data/app/javascript/controllers/clipboard_controller.js +18 -0
  19. data/app/javascript/controllers/composer_controller.js +10 -0
  20. data/app/javascript/controllers/index.js +3 -0
  21. data/app/javascript/controllers/modal_controller.js +45 -0
  22. data/app/javascript/controllers/reveal_controller.js +15 -0
  23. data/app/javascript/controllers/scroll_controller.js +44 -0
  24. data/app/javascript/controllers/tags_controller.js +49 -0
  25. data/app/javascript/controllers/theme_controller.js +43 -0
  26. data/app/javascript/controllers/tooltip_controller.js +37 -0
  27. data/app/jobs/ai_task_job.rb +26 -0
  28. data/app/jobs/application_job.rb +7 -0
  29. data/app/jobs/assistant_reply_job.rb +132 -0
  30. data/app/jobs/mark_pr_ready_job.rb +18 -0
  31. data/app/jobs/merge_pr_job.rb +27 -0
  32. data/app/jobs/resume_run_job.rb +30 -0
  33. data/app/jobs/start_run_job.rb +13 -0
  34. data/app/mailers/application_mailer.rb +4 -0
  35. data/app/models/agent_session.rb +8 -0
  36. data/app/models/application_record.rb +3 -0
  37. data/app/models/artifact.rb +8 -0
  38. data/app/models/board.rb +92 -0
  39. data/app/models/card.rb +83 -0
  40. data/app/models/column.rb +134 -0
  41. data/app/models/event.rb +44 -0
  42. data/app/models/run.rb +28 -0
  43. data/app/services/agent/runner.rb +379 -0
  44. data/app/services/agent/workspace.rb +138 -0
  45. data/app/services/card_transition.rb +97 -0
  46. data/app/services/claude_cli.rb +89 -0
  47. data/app/services/rules/compiler.rb +55 -0
  48. data/app/services/rules.rb +92 -0
  49. data/app/services/run_sweeper.rb +53 -0
  50. data/app/views/boards/show.html.erb +79 -0
  51. data/app/views/cards/_card.html.erb +48 -0
  52. data/app/views/cards/_detail.html.erb +190 -0
  53. data/app/views/cards/_tag_picker.html.erb +12 -0
  54. data/app/views/cards/new.html.erb +35 -0
  55. data/app/views/cards/show.html.erb +3 -0
  56. data/app/views/columns/_column.html.erb +25 -0
  57. data/app/views/columns/edit.html.erb +146 -0
  58. data/app/views/events/_event.html.erb +29 -0
  59. data/app/views/layouts/application.html.erb +46 -0
  60. data/app/views/layouts/mailer.html.erb +13 -0
  61. data/app/views/layouts/mailer.text.erb +1 -0
  62. data/app/views/pwa/manifest.json.erb +22 -0
  63. data/app/views/pwa/service-worker.js +26 -0
  64. data/bin/rails +4 -0
  65. data/bin/rake +4 -0
  66. data/cardinal.md +695 -0
  67. data/config/application.rb +60 -0
  68. data/config/boot.rb +13 -0
  69. data/config/bundler-audit.yml +5 -0
  70. data/config/cable.yml +13 -0
  71. data/config/ci.rb +20 -0
  72. data/config/credentials.yml.enc +1 -0
  73. data/config/database.yml +31 -0
  74. data/config/environment.rb +5 -0
  75. data/config/environments/development.rb +78 -0
  76. data/config/environments/production.rb +89 -0
  77. data/config/environments/test.rb +53 -0
  78. data/config/importmap.rb +6 -0
  79. data/config/initializers/assets.rb +7 -0
  80. data/config/initializers/cardinal_bootstrap.rb +12 -0
  81. data/config/initializers/cardinal_instance.rb +20 -0
  82. data/config/initializers/content_security_policy.rb +29 -0
  83. data/config/initializers/filter_parameter_logging.rb +8 -0
  84. data/config/initializers/inflections.rb +16 -0
  85. data/config/initializers/run_sweeper.rb +17 -0
  86. data/config/locales/en.yml +31 -0
  87. data/config/puma.rb +42 -0
  88. data/config/routes.rb +22 -0
  89. data/config/storage.yml +27 -0
  90. data/config.ru +6 -0
  91. data/db/migrate/20260703000001_create_cardinal_schema.rb +78 -0
  92. data/db/migrate/20260703000002_add_agent_runner_fields.rb +7 -0
  93. data/db/migrate/20260704000001_add_parent_to_cards.rb +5 -0
  94. data/db/migrate/20260704000002_add_assistant_session_to_cards.rb +5 -0
  95. data/db/seeds.rb +13 -0
  96. data/docker/agent/Dockerfile +16 -0
  97. data/exe/cardinal +111 -0
  98. data/lib/cardinal/version.rb +1 -1
  99. data/public/400.html +135 -0
  100. data/public/404.html +135 -0
  101. data/public/406-unsupported-browser.html +135 -0
  102. data/public/422.html +135 -0
  103. data/public/500.html +135 -0
  104. data/public/icon.png +0 -0
  105. data/public/icon.svg +3 -0
  106. data/public/robots.txt +1 -0
  107. data/vendor/javascript/sortablejs.js +3378 -0
  108. metadata +236 -9
@@ -0,0 +1,134 @@
1
+ class Column < ApplicationRecord
2
+ ARCHETYPES = %w[inbox planning execution review terminal].freeze
3
+
4
+ belongs_to :board
5
+ has_many :cards, -> { order(:position) }, dependent: :restrict_with_error
6
+
7
+ enum :archetype, ARCHETYPES.index_by(&:itself)
8
+
9
+ # Archetypes are TEMPLATES, not magic: choosing one stamps concrete,
10
+ # editable values into the policy fields. Nothing falls back to these at
11
+ # runtime — what the gear modal shows is everything there is.
12
+ ARCHETYPE_TEMPLATES = {
13
+ "inbox" => {},
14
+ "planning" => {
15
+ "on_entry" => [{ "action" => "assistant_greeting" }],
16
+ "on_entry_text" => "The planning assistant reads the card and opens the discussion.",
17
+ "instructions" => "Drive toward crisp acceptance criteria. Open with the 2-3 sharpest questions."
18
+ },
19
+ "execution" => {
20
+ "on_entry" => [{ "action" => "start_agent_run" }],
21
+ "on_entry_text" => "Assign a dedicated worker agent to the card and start a run."
22
+ },
23
+ "review" => {},
24
+ "terminal" => {
25
+ "on_entry" => [{ "action" => "merge_pr" }],
26
+ "on_entry_text" => "Merge the card's PR and ship it."
27
+ }
28
+ }.freeze
29
+
30
+ before_create :seed_archetype_template
31
+
32
+ def archetype_template = ARCHETYPE_TEMPLATES.fetch(archetype, {})
33
+
34
+ # The policy blob is the column's entire behavior configuration (§1, §14.3).
35
+ store_accessor :policy, :instructions, :model, :effort, :concurrency_limit,
36
+ :plan_approval, :budget_per_run_cents, :timeout_minutes,
37
+ :max_turns, :tools, :on_entry, :on_success, :color, :arrivals,
38
+ :accepts_from
39
+
40
+ # Only ever emit a validated hex color into inline styles.
41
+ def safe_color
42
+ color if color.to_s.match?(/\A#\h{6}\z/)
43
+ end
44
+
45
+ # Does any AI service this column? Explicit per-column switch (default ON
46
+ # for back-compat); the inbox/Tasks intake is never AI, unconditionally.
47
+ # When false the column is inert AI-wise: no assistant, no worker runs,
48
+ # no ai_task rules — cards there are human work.
49
+ def ai?
50
+ return false if inbox?
51
+ policy["ai"] != false
52
+ end
53
+
54
+ # Which columns may move cards INTO this one (§ accept policy, card #15).
55
+ # Stored as an array of column-id strings. EXPLICIT ONLY: an empty list
56
+ # means this column accepts from nowhere — there is no permissive default.
57
+ def accepts?(source_column)
58
+ Array(accepts_from).map(&:to_s).include?(source_column.id.to_s)
59
+ end
60
+
61
+ # Start the next queued card when a run slot frees up. A queued card whose
62
+ # run parked and already has its answer recorded resumes instead of
63
+ # starting fresh.
64
+ def kick_queue
65
+ return unless ai?
66
+ return if at_wip_limit?
67
+ next_card = cards.where(status: "queued").order(:position).first
68
+ return unless next_card
69
+
70
+ parked = next_card.runs.where(status: "needs_input").order(:id).last
71
+ if parked&.briefing&.key?("pending_resume")
72
+ ResumeRunJob.perform_later(parked.id, "")
73
+ else
74
+ StartRunJob.perform_later(next_card.id)
75
+ end
76
+ end
77
+
78
+ # "claude-sonnet-4-6" → "sonnet", for compact chips on card faces.
79
+ def model_short
80
+ model.to_s[/claude-([a-z]+)/, 1] || model
81
+ end
82
+
83
+ validates :name, presence: true
84
+ validates :position, presence: true
85
+
86
+ def running_count = cards.where(status: "working").count
87
+ def queued_count = cards.where(status: "queued").count
88
+
89
+ def at_wip_limit?
90
+ execution? && concurrency_limit.present? && running_count >= concurrency_limit.to_i
91
+ end
92
+
93
+ # The built-in role contract for AI servicing this archetype — shown
94
+ # read-only in the gear modal so the Instructions field is understood as
95
+ # ADDING to this, never replacing it. Enforced in code, not editable.
96
+ BUILT_IN_ROLES = {
97
+ "planning" => "Plans only, never implements: read-only tools (physically cannot change files), " \
98
+ "drives toward a Ready-for-execution brief, and hands off — approval means " \
99
+ "\"finalize the brief\", not \"do it\".",
100
+ "execution" => "Full toolset in an isolated checkout of the card's branch. Commits as it goes but " \
101
+ "never pushes (the runner pushes); merges the default branch itself on conflict; " \
102
+ "parks with a QUESTION: when genuinely blocked; ends with a final report."
103
+ }.freeze
104
+
105
+ def built_in_role = BUILT_IN_ROLES[archetype]
106
+
107
+ # What "Use AI" concretely means here — the §5 tier distinction, visible.
108
+ AI_MODES = {
109
+ "planning" => "a shared planning assistant joins each card's conversation",
110
+ "execution" => "a dedicated worker agent is assigned to each card",
111
+ "review" => "allow AI on-entry rules (ai_task) in this column",
112
+ "terminal" => "allow AI on-entry rules (ai_task) in this column"
113
+ }.freeze
114
+
115
+ def ai_mode_description = AI_MODES[archetype]
116
+
117
+ # Stamp template values into any policy field the creator left blank.
118
+ def seed_archetype_template
119
+ archetype_template.each do |key, value|
120
+ policy[key] = value if policy[key].blank?
121
+ end
122
+ end
123
+
124
+ # One-line consequence shown while dragging a card over this column (§14.1).
125
+ def drag_hint
126
+ case archetype
127
+ when "inbox" then "Parked — no agent activity"
128
+ when "planning" then "The board assistant will join the discussion"
129
+ when "execution" then "An agent will be assigned and start work"
130
+ when "review" then "Work stops — ready for your verdict"
131
+ when "terminal" then "Closes it — PR merged and branch deleted, if there is one"
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,44 @@
1
+ class Event < ApplicationRecord
2
+ KINDS = %w[
3
+ user_message agent_message assistant_message
4
+ status_change column_move move_rejected plan_proposed plan_approved
5
+ question answer progress
6
+ tool_call tool_result artifact_created
7
+ run_started run_finished final_report error
8
+ ].freeze
9
+
10
+ # Which timeline zoom level an event first appears at (§7).
11
+ CONVERSATION_KINDS = %w[user_message agent_message assistant_message question answer
12
+ plan_proposed plan_approved final_report error
13
+ column_move move_rejected].freeze
14
+
15
+ belongs_to :card
16
+ belongs_to :run, optional: true
17
+
18
+ validates :kind, inclusion: { in: KINDS }
19
+ validates :actor, presence: true
20
+
21
+ scope :conversation, -> { where(kind: CONVERSATION_KINDS) }
22
+ scope :activity, -> { where.not(kind: %w[tool_call tool_result]) }
23
+
24
+ # Live-append new events into any open card modal. User-authored events are
25
+ # skipped — they arrive via the form's own redirect re-render.
26
+ after_create_commit -> {
27
+ broadcast_append_to card, target: "card_events", partial: "events/event", locals: { event: self }
28
+ }, unless: -> { actor == "user" }
29
+
30
+ # These kinds mean the AI has delivered what the typing indicator promised.
31
+ RESOLVES_THINKING = %w[assistant_message final_report question plan_proposed error].freeze
32
+
33
+ # Kinds that change what a card FACE shows (progress lines, thinking chip,
34
+ # replied chip) — the board must morph on these, not just the open modal.
35
+ REFRESHES_BOARD = (%w[progress run_started run_finished] + RESOLVES_THINKING).freeze
36
+
37
+ after_create_commit -> { card.broadcast_refresh_to card.board },
38
+ if: -> { REFRESHES_BOARD.include?(kind) }
39
+
40
+ after_create_commit -> { broadcast_remove_to card, target: "typing-indicator" },
41
+ if: -> { RESOLVES_THINKING.include?(kind) }
42
+
43
+ def text = payload["text"]
44
+ end
data/app/models/run.rb ADDED
@@ -0,0 +1,28 @@
1
+ class Run < ApplicationRecord
2
+ STATUSES = %w[queued running needs_input succeeded failed cancelled].freeze
3
+
4
+ belongs_to :agent_session
5
+ has_one :card, through: :agent_session
6
+ has_many :artifacts, dependent: :destroy
7
+ has_many :events, dependent: :nullify
8
+
9
+ enum :status, STATUSES.index_by(&:itself)
10
+
11
+ # A budget/timeout outcome, whether the segment parked (needs_input) or was
12
+ # recorded as a failure. The parked message ("…turn budget mid-work…") lives
13
+ # on the last question event; the failure message (failure_reason) lives on
14
+ # result_summary. Either signals "try again with a fresh budget," not a bug.
15
+ EXHAUSTION = /turn budget|max-turns budget|timed out|timeout/i
16
+
17
+ def finished? = %w[succeeded failed cancelled].include?(status)
18
+
19
+ def exhausted?
20
+ text = needs_input? ? events.where(kind: "question").order(:id).last&.text : result_summary
21
+ text.to_s.match?(EXHAUSTION)
22
+ end
23
+
24
+ # A run the user can relaunch from the work panel: an execution-column run
25
+ # that parked or failed on its budget/timeout. Restart resumes the surviving
26
+ # session (fresh budget) or starts a clean run when no session remains.
27
+ def restartable? = card.column.execution? && (needs_input? || failed?) && exhausted?
28
+ end
@@ -0,0 +1,379 @@
1
+ module Agent
2
+ # Drives one Run through its phases (cardinal.md §4, §11, §17):
3
+ #
4
+ # start → plan phase (read-only, --permission-mode plan) when the column
5
+ # requires approval, else straight to execute
6
+ # park → plan_proposed or QUESTION: → run + card go needs_input
7
+ # resume → same claude session (--resume) with the user's answer,
8
+ # approval, or plan feedback
9
+ # finish → push branch, ensure draft PR, final report, work_complete
10
+ #
11
+ # The subprocess is the Claude Agent runtime (`claude -p`, stream-json).
12
+ # Heartbeats are written while streaming; RunSweeper reaps silent runs.
13
+ class Runner
14
+ STRIP_ENV = %w[ANTHROPIC_API_KEY CLAUDECODE CLAUDE_CODE_ENTRYPOINT GH_TOKEN GITHUB_TOKEN].freeze
15
+ HEARTBEAT_EVERY = 10 # seconds
16
+ PLAN_TURNS = 20
17
+ DEFAULT_EXECUTE_TURNS = 80 # turn caps are runaway guards, not work limits
18
+
19
+ EXECUTE_RULES = <<~RULES.freeze
20
+ ## Rules
21
+ - You have the FULL toolset now: shell (bash, git), file editing, everything. Run
22
+ commands yourself — never ask who should run them.
23
+ - Work only inside this repository checkout (you are already on the card's branch).
24
+ - If the branch conflicts with origin's default branch, merge it into the card
25
+ branch yourself and resolve the conflicts as part of the work.
26
+ - Commit your work as you go with clear messages. Do NOT push — the runner pushes for you.
27
+ - Stay strictly within the card's scope. Prefer the smallest reasonable interpretation and note assumptions.
28
+ - If you are blocked on a decision only the user can make, output a single line starting with
29
+ "QUESTION:" followed by the question, then stop immediately. Do not guess on genuinely ambiguous choices.
30
+ - Finish with a concise report: what you did, what to check, any open questions.
31
+ RULES
32
+
33
+ def self.start(run) = new(run).start
34
+ def self.resume(run, message, approve: false) = new(run).resume(message, approve: approve)
35
+
36
+ attr_reader :run, :card, :column
37
+
38
+ def initialize(run)
39
+ @run = run
40
+ @card = run.card
41
+ @column = card.column
42
+ end
43
+
44
+ def start
45
+ begin_segment!(first: true)
46
+ if plan_gated?
47
+ run.update!(phase: "plan")
48
+ stream_agent(prompt: plan_prompt, mode: "plan")
49
+ else
50
+ stream_agent(prompt: briefing_prompt, mode: "execute")
51
+ end
52
+ rescue => e
53
+ record_failure(e)
54
+ ensure
55
+ column.kick_queue if column.execution?
56
+ end
57
+
58
+ def resume(message, approve: false)
59
+ begin_segment!
60
+ if run.phase == "plan" && approve
61
+ run.update!(phase: "execute")
62
+ stream_agent(prompt: "Your plan is approved — execute it now.\n\n#{EXECUTE_RULES}",
63
+ mode: "execute", resuming: true)
64
+ elsif run.phase == "plan"
65
+ stream_agent(prompt: "Feedback on your plan:\n\n#{message}\n\nRevise the plan accordingly, present it, and stop again for approval. Stay in read-only mode.",
66
+ mode: "plan", resuming: true)
67
+ else
68
+ stream_agent(prompt: "Answer from the user:\n\n#{message}\n\nContinue the work. The same rules apply (commit, don't push, QUESTION: if blocked again, final report when done).",
69
+ mode: "execute", resuming: true)
70
+ end
71
+ rescue => e
72
+ record_failure(e)
73
+ ensure
74
+ column.kick_queue if column.execution?
75
+ end
76
+
77
+ private
78
+
79
+ def plan_gated?
80
+ ActiveModel::Type::Boolean.new.cast(column.plan_approval)
81
+ end
82
+
83
+ def begin_segment!(first: false)
84
+ run.update!(status: "running", started_at: run.started_at || Time.current, heartbeat_at: Time.current)
85
+ card.update!(status: "working")
86
+ if first
87
+ card.log!("run_started", run: run, text: "Run ##{run.id} started")
88
+ else
89
+ card.log!("progress", actor: "agent", run: run, text: "Run resumed")
90
+ end
91
+ end
92
+
93
+ def stream_agent(prompt:, mode:, resuming: false)
94
+ workspace = resuming ? Workspace.attach(card) : Workspace.provision(card)
95
+ remember_base_sha(workspace) if mode == "execute"
96
+
97
+ cmd = ["claude", "-p", prompt, "--output-format", "stream-json", "--verbose",
98
+ "--permission-mode", "bypassPermissions"]
99
+ case mode
100
+ when "plan"
101
+ # Read-only exploration for the plan phase. (--permission-mode plan
102
+ # hangs headless: ExitPlanMode waits for an approval that never comes.)
103
+ cmd += ["--max-turns", PLAN_TURNS.to_s, "--tools", "Read,Glob,Grep"]
104
+ when "plan_wrap"
105
+ # Turn-capped plan: force the plan out of the context already gathered.
106
+ cmd += ["--max-turns", "3", "--tools", ""]
107
+ else
108
+ cmd += ["--max-turns", (column.max_turns.presence || DEFAULT_EXECUTE_TURNS).to_s]
109
+ end
110
+ cmd += ["--model", column.model] if column.model.present?
111
+ cmd += ["--effort", column.effort] if column.effort.present?
112
+ cmd += ["--resume", run.external_session_id] if resuming && run.external_session_id.present?
113
+
114
+ result = {}
115
+ @base_in, @base_out = run.input_tokens, run.output_tokens
116
+ @seg_in = @seg_out = 0
117
+ env = STRIP_ENV.index_with { nil }
118
+ spawn_cmd, spawn_opts = workspace.agent_spawn(cmd)
119
+ Open3.popen3(env, *spawn_cmd, **spawn_opts) do |stdin, stdout, stderr, wait|
120
+ stdin.close
121
+ run.agent_session.update!(status: "ready", config: run.agent_session.config.merge("pid" => wait.pid))
122
+ timeout_min = (column.timeout_minutes.presence || 30).to_i
123
+ watchdog = Thread.new do
124
+ sleep timeout_min * 60
125
+ @timed_out = true
126
+ Process.kill("TERM", wait.pid) rescue nil
127
+ end
128
+ err_lines = []
129
+ drain = Thread.new { stderr.each_line { |l| err_lines << l.strip; err_lines.shift while err_lines.size > 4 } }
130
+ last_beat = Time.current
131
+ stdout.each_line do |line|
132
+ if Time.current - last_beat > HEARTBEAT_EVERY
133
+ # Heartbeat + live token tally: tokens survive even if this
134
+ # segment is killed before its result event.
135
+ run.update_columns(heartbeat_at: Time.current,
136
+ input_tokens: @base_in + @seg_in,
137
+ output_tokens: @base_out + @seg_out)
138
+ last_beat = Time.current
139
+ end
140
+ begin
141
+ handle_stream_event(JSON.parse(line), result)
142
+ rescue JSON::ParserError
143
+ next
144
+ end
145
+ end
146
+ drain.join(1)
147
+ watchdog.kill
148
+ result[:exit_status] = wait.value
149
+ result[:stderr] = err_lines.join(" | ")
150
+ result[:timed_out] = @timed_out
151
+ result[:timeout_min] = timeout_min
152
+ end
153
+
154
+ mode == "execute" ? conclude_execute(workspace, result) : conclude_plan(result)
155
+ end
156
+
157
+ def handle_stream_event(json, result)
158
+ case json["type"]
159
+ when "system"
160
+ if json["subtype"] == "init"
161
+ run.update_columns(external_session_id: json["session_id"]) if json["session_id"].present?
162
+ card.log!("progress", actor: "agent", run: run, text: "Agent session started (#{json["model"]})")
163
+ end
164
+ when "assistant"
165
+ if (usage = json.dig("message", "usage"))
166
+ @seg_in += usage["input_tokens"].to_i
167
+ @seg_out += usage["output_tokens"].to_i
168
+ end
169
+ Array(json.dig("message", "content")).each do |block|
170
+ case block["type"]
171
+ when "text"
172
+ card.log!("progress", actor: "agent", run: run, text: block["text"].to_s.truncate(400)) if block["text"].present?
173
+ when "tool_use"
174
+ card.log!("tool_call", actor: "agent", run: run,
175
+ text: "#{block["name"]}: #{block["input"].to_json.truncate(160)}")
176
+ end
177
+ end
178
+ when "result"
179
+ result[:success] = json["subtype"] == "success" && !json["is_error"]
180
+ result[:subtype] = json["subtype"]
181
+ result[:report] = json["result"].to_s
182
+ result[:cost] = json["total_cost_usd"]
183
+ result[:turns] = json["num_turns"]
184
+ result[:input_tokens] = json.dig("usage", "input_tokens")
185
+ result[:output_tokens] = json.dig("usage", "output_tokens")
186
+ end
187
+ end
188
+
189
+ def conclude_plan(result)
190
+ accumulate_usage(result)
191
+ unless result[:success] && result[:report].present?
192
+ # Turn-capped mid-exploration: one tool-less wrap-up pass to force the
193
+ # plan out of the context it already gathered.
194
+ if result[:subtype] == "error_max_turns" && run.external_session_id.present? && !@plan_wrap_attempted
195
+ @plan_wrap_attempted = true
196
+ card.log!("progress", actor: "agent", run: run, text: "Hit the exploration budget — wrapping up the plan from what was learned")
197
+ return stream_agent(prompt: "You have hit your exploration limit. Present your best plan-of-attack now, using only what you have already learned. Do not use any tools.",
198
+ mode: "plan_wrap", resuming: true)
199
+ end
200
+ return record_failure(RuntimeError.new("plan phase failed — #{failure_reason(result)}"))
201
+ end
202
+ park!("plan_proposed", result[:report],
203
+ note: "Plan proposed — approve it in the work panel, or reply to redirect.")
204
+ end
205
+
206
+ def conclude_execute(workspace, result)
207
+ accumulate_usage(result)
208
+ unless result[:success]
209
+ # Budget exhaustion isn't failure — park and offer to continue (§8).
210
+ # The session survives; an answer resumes it with a fresh turn budget.
211
+ if result[:subtype] == "error_max_turns" && run.external_session_id.present?
212
+ commits = salvage_commits(workspace)
213
+ return park!("question",
214
+ "I've used this segment's turn budget mid-work#{" — #{commits} commit(s) so far are saved to the branch" if commits.to_i.positive?}. Reply (anything) to continue with a fresh budget, or cancel the run.",
215
+ note: "Agent paused at the turn budget — reply on the card to continue.")
216
+ end
217
+ salvage_commits(workspace)
218
+ return record_failure(RuntimeError.new(failure_reason(result)))
219
+ end
220
+
221
+ report = result[:report].to_s
222
+ if report.lstrip.start_with?("QUESTION:")
223
+ return park!("question", report.lstrip.delete_prefix("QUESTION:").strip,
224
+ note: "Agent is waiting on your answer — reply on the card.")
225
+ end
226
+
227
+ commits = workspace.commits_since(base_sha)
228
+ if commits.any?
229
+ workspace.push!
230
+ ensure_pull_request(workspace)
231
+ run.artifacts.create!(kind: "pull_request", name: "PR for #{card.branch_name}",
232
+ payload: { url: card.pr_url, commits: commits })
233
+ elsif card.pr_url.blank? && workspace.ahead_of_default?
234
+ # No new commits this run, but the branch carries earlier work (e.g. a
235
+ # salvage commit) that still needs a PR.
236
+ workspace.push!
237
+ ensure_pull_request(workspace)
238
+ end
239
+
240
+ run.update!(status: "succeeded", finished_at: Time.current,
241
+ result_summary: report.presence&.truncate(2000))
242
+ card.log!("final_report", actor: "agent", run: run,
243
+ text: [report.presence || "Run finished with no report.",
244
+ commits.any? ? "\n**Commits (#{commits.size}):**\n#{commits.map { |c| "- #{c}" }.join("\n")}" : "\n_No commits were made._"].join("\n"))
245
+ card.update!(status: "work_complete")
246
+ card.log!("run_finished", run: run,
247
+ text: "Run succeeded — #{result[:turns]} turns, $#{run.cost.round(2)} total")
248
+ end
249
+
250
+ def park!(kind, text, note:)
251
+ run.update!(status: "needs_input")
252
+ card.log!(kind, actor: "agent", run: run, text: text)
253
+ card.update!(status: "needs_input")
254
+ card.log!("status_change", run: run, text: note)
255
+ end
256
+
257
+ # Say WHY, not just that it died: timeout vs turn cap vs error vs crash.
258
+ def failure_reason(result)
259
+ return "timed out after #{result[:timeout_min]} minutes and was stopped — raise the column's timeout for bigger tasks, or split the card" if result[:timed_out]
260
+ return "hit this segment's max-turns budget — raise Max turns in the column's gear settings, or split the card" if result[:subtype] == "error_max_turns"
261
+ parts = ["agent did not finish cleanly (exit #{result[:exit_status]&.exitstatus || "?"})"]
262
+ parts << "last output: #{result[:report].truncate(300)}" if result[:report].present?
263
+ parts << "stderr: #{result[:stderr].truncate(300)}" if result[:stderr].present?
264
+ parts.join(" — ")
265
+ end
266
+
267
+ # A failed/timed-out segment may still hold real local commits; push them
268
+ # so the branch (and any PR) keeps the partial progress instead of the
269
+ # next provision's reset wiping it.
270
+ def salvage_commits(workspace)
271
+ commits = workspace.commits_since(base_sha)
272
+ return 0 if commits.empty?
273
+ workspace.push!
274
+ card.log!("progress", run: run, text: "Partial work preserved: #{commits.size} commit(s) pushed to #{card.branch_name}")
275
+ commits.size
276
+ rescue => e
277
+ card.log!("progress", run: run, text: "Could not preserve partial work: #{e.message.truncate(120)}")
278
+ 0
279
+ end
280
+
281
+ def record_failure(error)
282
+ run.update!(status: "failed", finished_at: Time.current,
283
+ result_summary: error.message.truncate(500))
284
+ card.update!(status: "failed")
285
+ card.log!("error", run: run, text: "Run failed: #{error.message.truncate(500)}")
286
+ end
287
+
288
+ # Cost/tokens accumulate across segments of the same run (plan + execute +
289
+ # resumes). Tokens were live-tallied during the stream; the result event's
290
+ # figures are authoritative when present, the live tally is the fallback
291
+ # for killed segments (which never emit a result).
292
+ def accumulate_usage(result)
293
+ base_in = @base_in || run.input_tokens
294
+ base_out = @base_out || run.output_tokens
295
+ run.update!(cost: run.cost + (result[:cost] || 0),
296
+ input_tokens: base_in + (result[:input_tokens] || @seg_in || 0),
297
+ output_tokens: base_out + (result[:output_tokens] || @seg_out || 0))
298
+ @base_in = @base_out = @seg_in = @seg_out = nil
299
+ end
300
+
301
+ def remember_base_sha(workspace)
302
+ return if run.briefing["base_sha"].present?
303
+ run.update!(briefing: run.briefing.merge("base_sha" => workspace.head))
304
+ end
305
+
306
+ def base_sha = run.briefing.fetch("base_sha")
307
+
308
+ def ensure_pull_request(workspace)
309
+ return if card.pr_url.present?
310
+ out, status = Open3.capture2e(
311
+ "gh", "pr", "create", "--draft",
312
+ "--head", card.branch_name,
313
+ "--title", "##{card.number} #{card.title}",
314
+ "--body", "Automated work by Cardinal card ##{card.number}'s agent.\n\n#{card.description}",
315
+ chdir: workspace.path.to_s
316
+ )
317
+ if status.success? && (url = out[%r{https://github\.com/\S+/pull/\d+}])
318
+ card.update!(pr_url: url, pr_state: "draft")
319
+ card.log!("artifact_created", run: run, text: "Draft PR opened: #{url}")
320
+ else
321
+ card.log!("progress", run: run, text: "Branch pushed (PR not created: #{out.truncate(120)})")
322
+ end
323
+ end
324
+
325
+ def briefing_prompt
326
+ <<~PROMPT
327
+ You are the dedicated worker agent for card ##{card.number} of the Cardinal board: "#{card.title}".
328
+
329
+ ## Brief
330
+ #{card.description.presence || "(no description — infer scope from the title and conversation)"}
331
+
332
+ #{"## Brief from planning (authoritative — refined with the user)\n#{planning_brief}\n" if planning_brief}
333
+ ## Card conversation so far
334
+ #{conversation_excerpt.presence || "(none)"}
335
+
336
+ #{"## Column instructions\n#{column.instructions}\n" if column.instructions.present?}
337
+ #{EXECUTE_RULES}
338
+ PROMPT
339
+ end
340
+
341
+ def plan_prompt
342
+ <<~PROMPT
343
+ You are the dedicated worker agent for card ##{card.number} of the Cardinal board: "#{card.title}".
344
+ You are in READ-ONLY PLAN MODE. Do not modify anything yet.
345
+
346
+ ## Brief
347
+ #{card.description.presence || "(no description — infer scope from the title and conversation)"}
348
+
349
+ #{"## Brief from planning (authoritative — refined with the user)\n#{planning_brief}\n" if planning_brief}
350
+ ## Card conversation so far
351
+ #{conversation_excerpt.presence || "(none)"}
352
+
353
+ #{"## Column instructions\n#{column.instructions}\n" if column.instructions.present?}
354
+ Explore the repository as needed, then present a short numbered plan-of-attack
355
+ (files you'll touch, approach, how you'll verify) and stop. The user will approve
356
+ or redirect before any changes are made.
357
+
358
+ IMPORTANT: you are read-only ONLY during this planning pass. Once the plan is
359
+ approved you will have the full toolset — shell, git, file editing — in this same
360
+ session. Plan every step as something YOU will do (including git merges and
361
+ running commands); never ask who should run a command or plan around not having
362
+ a shell.
363
+ PROMPT
364
+ end
365
+
366
+ # The planning assistant's distilled "Ready for execution" brief, if the
367
+ # conversation produced one — the most load-bearing artifact of planning.
368
+ def planning_brief
369
+ return @planning_brief if defined?(@planning_brief)
370
+ @planning_brief = card.events.where(kind: "assistant_message")
371
+ .order(:id).filter_map(&:text).reverse
372
+ .find { |t| t.match?(/ready for execution/i) }
373
+ end
374
+
375
+ def conversation_excerpt
376
+ card.events.conversation.filter_map { |e| "#{e.actor}: #{e.text}" if e.text }.last(30).join("\n")
377
+ end
378
+ end
379
+ end