cardinal-ai 0.0.1 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +21 -0
- data/README.md +50 -29
- data/Rakefile +6 -0
- data/app/assets/stylesheets/application.css +10 -0
- data/app/assets/stylesheets/cardinal.css +530 -0
- data/app/controllers/application_controller.rb +7 -0
- data/app/controllers/boards_controller.rb +5 -0
- data/app/controllers/cards_controller.rb +129 -0
- data/app/controllers/columns_controller.rb +130 -0
- data/app/controllers/messages_controller.rb +25 -0
- data/app/controllers/runs_controller.rb +58 -0
- data/app/helpers/application_helper.rb +35 -0
- data/app/javascript/application.js +2 -0
- data/app/javascript/controllers/application.js +7 -0
- data/app/javascript/controllers/autosave_controller.js +43 -0
- data/app/javascript/controllers/board_column_controller.js +96 -0
- data/app/javascript/controllers/clipboard_controller.js +18 -0
- data/app/javascript/controllers/composer_controller.js +10 -0
- data/app/javascript/controllers/index.js +3 -0
- data/app/javascript/controllers/modal_controller.js +45 -0
- data/app/javascript/controllers/reveal_controller.js +15 -0
- data/app/javascript/controllers/scroll_controller.js +44 -0
- data/app/javascript/controllers/tags_controller.js +49 -0
- data/app/javascript/controllers/theme_controller.js +43 -0
- data/app/javascript/controllers/tooltip_controller.js +37 -0
- data/app/jobs/ai_task_job.rb +26 -0
- data/app/jobs/application_job.rb +7 -0
- data/app/jobs/assistant_reply_job.rb +132 -0
- data/app/jobs/mark_pr_ready_job.rb +18 -0
- data/app/jobs/merge_pr_job.rb +27 -0
- data/app/jobs/resume_run_job.rb +30 -0
- data/app/jobs/start_run_job.rb +13 -0
- data/app/mailers/application_mailer.rb +4 -0
- data/app/models/agent_session.rb +8 -0
- data/app/models/application_record.rb +3 -0
- data/app/models/artifact.rb +8 -0
- data/app/models/board.rb +92 -0
- data/app/models/card.rb +83 -0
- data/app/models/column.rb +134 -0
- data/app/models/event.rb +44 -0
- data/app/models/run.rb +28 -0
- data/app/services/agent/runner.rb +379 -0
- data/app/services/agent/workspace.rb +138 -0
- data/app/services/card_transition.rb +97 -0
- data/app/services/claude_cli.rb +89 -0
- data/app/services/rules/compiler.rb +55 -0
- data/app/services/rules.rb +92 -0
- data/app/services/run_sweeper.rb +53 -0
- data/app/views/boards/show.html.erb +79 -0
- data/app/views/cards/_card.html.erb +48 -0
- data/app/views/cards/_detail.html.erb +190 -0
- data/app/views/cards/_tag_picker.html.erb +12 -0
- data/app/views/cards/new.html.erb +35 -0
- data/app/views/cards/show.html.erb +3 -0
- data/app/views/columns/_column.html.erb +25 -0
- data/app/views/columns/edit.html.erb +146 -0
- data/app/views/events/_event.html.erb +29 -0
- data/app/views/layouts/application.html.erb +46 -0
- data/app/views/layouts/mailer.html.erb +13 -0
- data/app/views/layouts/mailer.text.erb +1 -0
- data/app/views/pwa/manifest.json.erb +22 -0
- data/app/views/pwa/service-worker.js +26 -0
- data/bin/rails +4 -0
- data/bin/rake +4 -0
- data/cardinal.md +695 -0
- data/config/application.rb +60 -0
- data/config/boot.rb +13 -0
- data/config/bundler-audit.yml +5 -0
- data/config/cable.yml +13 -0
- data/config/ci.rb +20 -0
- data/config/credentials.yml.enc +1 -0
- data/config/database.yml +31 -0
- data/config/environment.rb +5 -0
- data/config/environments/development.rb +78 -0
- data/config/environments/production.rb +89 -0
- data/config/environments/test.rb +53 -0
- data/config/importmap.rb +6 -0
- data/config/initializers/assets.rb +7 -0
- data/config/initializers/cardinal_bootstrap.rb +12 -0
- data/config/initializers/cardinal_instance.rb +20 -0
- data/config/initializers/content_security_policy.rb +29 -0
- data/config/initializers/filter_parameter_logging.rb +8 -0
- data/config/initializers/inflections.rb +16 -0
- data/config/initializers/run_sweeper.rb +17 -0
- data/config/locales/en.yml +31 -0
- data/config/puma.rb +42 -0
- data/config/routes.rb +22 -0
- data/config/storage.yml +27 -0
- data/config.ru +6 -0
- data/db/migrate/20260703000001_create_cardinal_schema.rb +78 -0
- data/db/migrate/20260703000002_add_agent_runner_fields.rb +7 -0
- data/db/migrate/20260704000001_add_parent_to_cards.rb +5 -0
- data/db/migrate/20260704000002_add_assistant_session_to_cards.rb +5 -0
- data/db/seeds.rb +13 -0
- data/docker/agent/Dockerfile +16 -0
- data/exe/cardinal +111 -0
- data/lib/cardinal/version.rb +1 -1
- data/public/400.html +135 -0
- data/public/404.html +135 -0
- data/public/406-unsupported-browser.html +135 -0
- data/public/422.html +135 -0
- data/public/500.html +135 -0
- data/public/icon.png +0 -0
- data/public/icon.svg +3 -0
- data/public/robots.txt +1 -0
- data/vendor/javascript/sortablejs.js +3378 -0
- metadata +236 -9
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
class Column < ApplicationRecord
|
|
2
|
+
ARCHETYPES = %w[inbox planning execution review terminal].freeze
|
|
3
|
+
|
|
4
|
+
belongs_to :board
|
|
5
|
+
has_many :cards, -> { order(:position) }, dependent: :restrict_with_error
|
|
6
|
+
|
|
7
|
+
enum :archetype, ARCHETYPES.index_by(&:itself)
|
|
8
|
+
|
|
9
|
+
# Archetypes are TEMPLATES, not magic: choosing one stamps concrete,
|
|
10
|
+
# editable values into the policy fields. Nothing falls back to these at
|
|
11
|
+
# runtime — what the gear modal shows is everything there is.
|
|
12
|
+
ARCHETYPE_TEMPLATES = {
|
|
13
|
+
"inbox" => {},
|
|
14
|
+
"planning" => {
|
|
15
|
+
"on_entry" => [{ "action" => "assistant_greeting" }],
|
|
16
|
+
"on_entry_text" => "The planning assistant reads the card and opens the discussion.",
|
|
17
|
+
"instructions" => "Drive toward crisp acceptance criteria. Open with the 2-3 sharpest questions."
|
|
18
|
+
},
|
|
19
|
+
"execution" => {
|
|
20
|
+
"on_entry" => [{ "action" => "start_agent_run" }],
|
|
21
|
+
"on_entry_text" => "Assign a dedicated worker agent to the card and start a run."
|
|
22
|
+
},
|
|
23
|
+
"review" => {},
|
|
24
|
+
"terminal" => {
|
|
25
|
+
"on_entry" => [{ "action" => "merge_pr" }],
|
|
26
|
+
"on_entry_text" => "Merge the card's PR and ship it."
|
|
27
|
+
}
|
|
28
|
+
}.freeze
|
|
29
|
+
|
|
30
|
+
before_create :seed_archetype_template
|
|
31
|
+
|
|
32
|
+
def archetype_template = ARCHETYPE_TEMPLATES.fetch(archetype, {})
|
|
33
|
+
|
|
34
|
+
# The policy blob is the column's entire behavior configuration (§1, §14.3).
|
|
35
|
+
store_accessor :policy, :instructions, :model, :effort, :concurrency_limit,
|
|
36
|
+
:plan_approval, :budget_per_run_cents, :timeout_minutes,
|
|
37
|
+
:max_turns, :tools, :on_entry, :on_success, :color, :arrivals,
|
|
38
|
+
:accepts_from
|
|
39
|
+
|
|
40
|
+
# Only ever emit a validated hex color into inline styles.
|
|
41
|
+
def safe_color
|
|
42
|
+
color if color.to_s.match?(/\A#\h{6}\z/)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Does any AI service this column? Explicit per-column switch (default ON
|
|
46
|
+
# for back-compat); the inbox/Tasks intake is never AI, unconditionally.
|
|
47
|
+
# When false the column is inert AI-wise: no assistant, no worker runs,
|
|
48
|
+
# no ai_task rules — cards there are human work.
|
|
49
|
+
def ai?
|
|
50
|
+
return false if inbox?
|
|
51
|
+
policy["ai"] != false
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Which columns may move cards INTO this one (§ accept policy, card #15).
|
|
55
|
+
# Stored as an array of column-id strings. EXPLICIT ONLY: an empty list
|
|
56
|
+
# means this column accepts from nowhere — there is no permissive default.
|
|
57
|
+
def accepts?(source_column)
|
|
58
|
+
Array(accepts_from).map(&:to_s).include?(source_column.id.to_s)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Start the next queued card when a run slot frees up. A queued card whose
|
|
62
|
+
# run parked and already has its answer recorded resumes instead of
|
|
63
|
+
# starting fresh.
|
|
64
|
+
def kick_queue
|
|
65
|
+
return unless ai?
|
|
66
|
+
return if at_wip_limit?
|
|
67
|
+
next_card = cards.where(status: "queued").order(:position).first
|
|
68
|
+
return unless next_card
|
|
69
|
+
|
|
70
|
+
parked = next_card.runs.where(status: "needs_input").order(:id).last
|
|
71
|
+
if parked&.briefing&.key?("pending_resume")
|
|
72
|
+
ResumeRunJob.perform_later(parked.id, "")
|
|
73
|
+
else
|
|
74
|
+
StartRunJob.perform_later(next_card.id)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# "claude-sonnet-4-6" → "sonnet", for compact chips on card faces.
|
|
79
|
+
def model_short
|
|
80
|
+
model.to_s[/claude-([a-z]+)/, 1] || model
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
validates :name, presence: true
|
|
84
|
+
validates :position, presence: true
|
|
85
|
+
|
|
86
|
+
def running_count = cards.where(status: "working").count
|
|
87
|
+
def queued_count = cards.where(status: "queued").count
|
|
88
|
+
|
|
89
|
+
def at_wip_limit?
|
|
90
|
+
execution? && concurrency_limit.present? && running_count >= concurrency_limit.to_i
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# The built-in role contract for AI servicing this archetype — shown
|
|
94
|
+
# read-only in the gear modal so the Instructions field is understood as
|
|
95
|
+
# ADDING to this, never replacing it. Enforced in code, not editable.
|
|
96
|
+
BUILT_IN_ROLES = {
|
|
97
|
+
"planning" => "Plans only, never implements: read-only tools (physically cannot change files), " \
|
|
98
|
+
"drives toward a Ready-for-execution brief, and hands off — approval means " \
|
|
99
|
+
"\"finalize the brief\", not \"do it\".",
|
|
100
|
+
"execution" => "Full toolset in an isolated checkout of the card's branch. Commits as it goes but " \
|
|
101
|
+
"never pushes (the runner pushes); merges the default branch itself on conflict; " \
|
|
102
|
+
"parks with a QUESTION: when genuinely blocked; ends with a final report."
|
|
103
|
+
}.freeze
|
|
104
|
+
|
|
105
|
+
def built_in_role = BUILT_IN_ROLES[archetype]
|
|
106
|
+
|
|
107
|
+
# What "Use AI" concretely means here — the §5 tier distinction, visible.
|
|
108
|
+
AI_MODES = {
|
|
109
|
+
"planning" => "a shared planning assistant joins each card's conversation",
|
|
110
|
+
"execution" => "a dedicated worker agent is assigned to each card",
|
|
111
|
+
"review" => "allow AI on-entry rules (ai_task) in this column",
|
|
112
|
+
"terminal" => "allow AI on-entry rules (ai_task) in this column"
|
|
113
|
+
}.freeze
|
|
114
|
+
|
|
115
|
+
def ai_mode_description = AI_MODES[archetype]
|
|
116
|
+
|
|
117
|
+
# Stamp template values into any policy field the creator left blank.
|
|
118
|
+
def seed_archetype_template
|
|
119
|
+
archetype_template.each do |key, value|
|
|
120
|
+
policy[key] = value if policy[key].blank?
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# One-line consequence shown while dragging a card over this column (§14.1).
|
|
125
|
+
def drag_hint
|
|
126
|
+
case archetype
|
|
127
|
+
when "inbox" then "Parked — no agent activity"
|
|
128
|
+
when "planning" then "The board assistant will join the discussion"
|
|
129
|
+
when "execution" then "An agent will be assigned and start work"
|
|
130
|
+
when "review" then "Work stops — ready for your verdict"
|
|
131
|
+
when "terminal" then "Closes it — PR merged and branch deleted, if there is one"
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
data/app/models/event.rb
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
class Event < ApplicationRecord
|
|
2
|
+
KINDS = %w[
|
|
3
|
+
user_message agent_message assistant_message
|
|
4
|
+
status_change column_move move_rejected plan_proposed plan_approved
|
|
5
|
+
question answer progress
|
|
6
|
+
tool_call tool_result artifact_created
|
|
7
|
+
run_started run_finished final_report error
|
|
8
|
+
].freeze
|
|
9
|
+
|
|
10
|
+
# Which timeline zoom level an event first appears at (§7).
|
|
11
|
+
CONVERSATION_KINDS = %w[user_message agent_message assistant_message question answer
|
|
12
|
+
plan_proposed plan_approved final_report error
|
|
13
|
+
column_move move_rejected].freeze
|
|
14
|
+
|
|
15
|
+
belongs_to :card
|
|
16
|
+
belongs_to :run, optional: true
|
|
17
|
+
|
|
18
|
+
validates :kind, inclusion: { in: KINDS }
|
|
19
|
+
validates :actor, presence: true
|
|
20
|
+
|
|
21
|
+
scope :conversation, -> { where(kind: CONVERSATION_KINDS) }
|
|
22
|
+
scope :activity, -> { where.not(kind: %w[tool_call tool_result]) }
|
|
23
|
+
|
|
24
|
+
# Live-append new events into any open card modal. User-authored events are
|
|
25
|
+
# skipped — they arrive via the form's own redirect re-render.
|
|
26
|
+
after_create_commit -> {
|
|
27
|
+
broadcast_append_to card, target: "card_events", partial: "events/event", locals: { event: self }
|
|
28
|
+
}, unless: -> { actor == "user" }
|
|
29
|
+
|
|
30
|
+
# These kinds mean the AI has delivered what the typing indicator promised.
|
|
31
|
+
RESOLVES_THINKING = %w[assistant_message final_report question plan_proposed error].freeze
|
|
32
|
+
|
|
33
|
+
# Kinds that change what a card FACE shows (progress lines, thinking chip,
|
|
34
|
+
# replied chip) — the board must morph on these, not just the open modal.
|
|
35
|
+
REFRESHES_BOARD = (%w[progress run_started run_finished] + RESOLVES_THINKING).freeze
|
|
36
|
+
|
|
37
|
+
after_create_commit -> { card.broadcast_refresh_to card.board },
|
|
38
|
+
if: -> { REFRESHES_BOARD.include?(kind) }
|
|
39
|
+
|
|
40
|
+
after_create_commit -> { broadcast_remove_to card, target: "typing-indicator" },
|
|
41
|
+
if: -> { RESOLVES_THINKING.include?(kind) }
|
|
42
|
+
|
|
43
|
+
def text = payload["text"]
|
|
44
|
+
end
|
data/app/models/run.rb
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
class Run < ApplicationRecord
|
|
2
|
+
STATUSES = %w[queued running needs_input succeeded failed cancelled].freeze
|
|
3
|
+
|
|
4
|
+
belongs_to :agent_session
|
|
5
|
+
has_one :card, through: :agent_session
|
|
6
|
+
has_many :artifacts, dependent: :destroy
|
|
7
|
+
has_many :events, dependent: :nullify
|
|
8
|
+
|
|
9
|
+
enum :status, STATUSES.index_by(&:itself)
|
|
10
|
+
|
|
11
|
+
# A budget/timeout outcome, whether the segment parked (needs_input) or was
|
|
12
|
+
# recorded as a failure. The parked message ("…turn budget mid-work…") lives
|
|
13
|
+
# on the last question event; the failure message (failure_reason) lives on
|
|
14
|
+
# result_summary. Either signals "try again with a fresh budget," not a bug.
|
|
15
|
+
EXHAUSTION = /turn budget|max-turns budget|timed out|timeout/i
|
|
16
|
+
|
|
17
|
+
def finished? = %w[succeeded failed cancelled].include?(status)
|
|
18
|
+
|
|
19
|
+
def exhausted?
|
|
20
|
+
text = needs_input? ? events.where(kind: "question").order(:id).last&.text : result_summary
|
|
21
|
+
text.to_s.match?(EXHAUSTION)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# A run the user can relaunch from the work panel: an execution-column run
|
|
25
|
+
# that parked or failed on its budget/timeout. Restart resumes the surviving
|
|
26
|
+
# session (fresh budget) or starts a clean run when no session remains.
|
|
27
|
+
def restartable? = card.column.execution? && (needs_input? || failed?) && exhausted?
|
|
28
|
+
end
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
module Agent
|
|
2
|
+
# Drives one Run through its phases (cardinal.md §4, §11, §17):
|
|
3
|
+
#
|
|
4
|
+
# start → plan phase (read-only, --permission-mode plan) when the column
|
|
5
|
+
# requires approval, else straight to execute
|
|
6
|
+
# park → plan_proposed or QUESTION: → run + card go needs_input
|
|
7
|
+
# resume → same claude session (--resume) with the user's answer,
|
|
8
|
+
# approval, or plan feedback
|
|
9
|
+
# finish → push branch, ensure draft PR, final report, work_complete
|
|
10
|
+
#
|
|
11
|
+
# The subprocess is the Claude Agent runtime (`claude -p`, stream-json).
|
|
12
|
+
# Heartbeats are written while streaming; RunSweeper reaps silent runs.
|
|
13
|
+
class Runner
|
|
14
|
+
STRIP_ENV = %w[ANTHROPIC_API_KEY CLAUDECODE CLAUDE_CODE_ENTRYPOINT GH_TOKEN GITHUB_TOKEN].freeze
|
|
15
|
+
HEARTBEAT_EVERY = 10 # seconds
|
|
16
|
+
PLAN_TURNS = 20
|
|
17
|
+
DEFAULT_EXECUTE_TURNS = 80 # turn caps are runaway guards, not work limits
|
|
18
|
+
|
|
19
|
+
EXECUTE_RULES = <<~RULES.freeze
|
|
20
|
+
## Rules
|
|
21
|
+
- You have the FULL toolset now: shell (bash, git), file editing, everything. Run
|
|
22
|
+
commands yourself — never ask who should run them.
|
|
23
|
+
- Work only inside this repository checkout (you are already on the card's branch).
|
|
24
|
+
- If the branch conflicts with origin's default branch, merge it into the card
|
|
25
|
+
branch yourself and resolve the conflicts as part of the work.
|
|
26
|
+
- Commit your work as you go with clear messages. Do NOT push — the runner pushes for you.
|
|
27
|
+
- Stay strictly within the card's scope. Prefer the smallest reasonable interpretation and note assumptions.
|
|
28
|
+
- If you are blocked on a decision only the user can make, output a single line starting with
|
|
29
|
+
"QUESTION:" followed by the question, then stop immediately. Do not guess on genuinely ambiguous choices.
|
|
30
|
+
- Finish with a concise report: what you did, what to check, any open questions.
|
|
31
|
+
RULES
|
|
32
|
+
|
|
33
|
+
def self.start(run) = new(run).start
|
|
34
|
+
def self.resume(run, message, approve: false) = new(run).resume(message, approve: approve)
|
|
35
|
+
|
|
36
|
+
attr_reader :run, :card, :column
|
|
37
|
+
|
|
38
|
+
def initialize(run)
|
|
39
|
+
@run = run
|
|
40
|
+
@card = run.card
|
|
41
|
+
@column = card.column
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def start
|
|
45
|
+
begin_segment!(first: true)
|
|
46
|
+
if plan_gated?
|
|
47
|
+
run.update!(phase: "plan")
|
|
48
|
+
stream_agent(prompt: plan_prompt, mode: "plan")
|
|
49
|
+
else
|
|
50
|
+
stream_agent(prompt: briefing_prompt, mode: "execute")
|
|
51
|
+
end
|
|
52
|
+
rescue => e
|
|
53
|
+
record_failure(e)
|
|
54
|
+
ensure
|
|
55
|
+
column.kick_queue if column.execution?
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def resume(message, approve: false)
|
|
59
|
+
begin_segment!
|
|
60
|
+
if run.phase == "plan" && approve
|
|
61
|
+
run.update!(phase: "execute")
|
|
62
|
+
stream_agent(prompt: "Your plan is approved — execute it now.\n\n#{EXECUTE_RULES}",
|
|
63
|
+
mode: "execute", resuming: true)
|
|
64
|
+
elsif run.phase == "plan"
|
|
65
|
+
stream_agent(prompt: "Feedback on your plan:\n\n#{message}\n\nRevise the plan accordingly, present it, and stop again for approval. Stay in read-only mode.",
|
|
66
|
+
mode: "plan", resuming: true)
|
|
67
|
+
else
|
|
68
|
+
stream_agent(prompt: "Answer from the user:\n\n#{message}\n\nContinue the work. The same rules apply (commit, don't push, QUESTION: if blocked again, final report when done).",
|
|
69
|
+
mode: "execute", resuming: true)
|
|
70
|
+
end
|
|
71
|
+
rescue => e
|
|
72
|
+
record_failure(e)
|
|
73
|
+
ensure
|
|
74
|
+
column.kick_queue if column.execution?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def plan_gated?
|
|
80
|
+
ActiveModel::Type::Boolean.new.cast(column.plan_approval)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def begin_segment!(first: false)
|
|
84
|
+
run.update!(status: "running", started_at: run.started_at || Time.current, heartbeat_at: Time.current)
|
|
85
|
+
card.update!(status: "working")
|
|
86
|
+
if first
|
|
87
|
+
card.log!("run_started", run: run, text: "Run ##{run.id} started")
|
|
88
|
+
else
|
|
89
|
+
card.log!("progress", actor: "agent", run: run, text: "Run resumed")
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def stream_agent(prompt:, mode:, resuming: false)
|
|
94
|
+
workspace = resuming ? Workspace.attach(card) : Workspace.provision(card)
|
|
95
|
+
remember_base_sha(workspace) if mode == "execute"
|
|
96
|
+
|
|
97
|
+
cmd = ["claude", "-p", prompt, "--output-format", "stream-json", "--verbose",
|
|
98
|
+
"--permission-mode", "bypassPermissions"]
|
|
99
|
+
case mode
|
|
100
|
+
when "plan"
|
|
101
|
+
# Read-only exploration for the plan phase. (--permission-mode plan
|
|
102
|
+
# hangs headless: ExitPlanMode waits for an approval that never comes.)
|
|
103
|
+
cmd += ["--max-turns", PLAN_TURNS.to_s, "--tools", "Read,Glob,Grep"]
|
|
104
|
+
when "plan_wrap"
|
|
105
|
+
# Turn-capped plan: force the plan out of the context already gathered.
|
|
106
|
+
cmd += ["--max-turns", "3", "--tools", ""]
|
|
107
|
+
else
|
|
108
|
+
cmd += ["--max-turns", (column.max_turns.presence || DEFAULT_EXECUTE_TURNS).to_s]
|
|
109
|
+
end
|
|
110
|
+
cmd += ["--model", column.model] if column.model.present?
|
|
111
|
+
cmd += ["--effort", column.effort] if column.effort.present?
|
|
112
|
+
cmd += ["--resume", run.external_session_id] if resuming && run.external_session_id.present?
|
|
113
|
+
|
|
114
|
+
result = {}
|
|
115
|
+
@base_in, @base_out = run.input_tokens, run.output_tokens
|
|
116
|
+
@seg_in = @seg_out = 0
|
|
117
|
+
env = STRIP_ENV.index_with { nil }
|
|
118
|
+
spawn_cmd, spawn_opts = workspace.agent_spawn(cmd)
|
|
119
|
+
Open3.popen3(env, *spawn_cmd, **spawn_opts) do |stdin, stdout, stderr, wait|
|
|
120
|
+
stdin.close
|
|
121
|
+
run.agent_session.update!(status: "ready", config: run.agent_session.config.merge("pid" => wait.pid))
|
|
122
|
+
timeout_min = (column.timeout_minutes.presence || 30).to_i
|
|
123
|
+
watchdog = Thread.new do
|
|
124
|
+
sleep timeout_min * 60
|
|
125
|
+
@timed_out = true
|
|
126
|
+
Process.kill("TERM", wait.pid) rescue nil
|
|
127
|
+
end
|
|
128
|
+
err_lines = []
|
|
129
|
+
drain = Thread.new { stderr.each_line { |l| err_lines << l.strip; err_lines.shift while err_lines.size > 4 } }
|
|
130
|
+
last_beat = Time.current
|
|
131
|
+
stdout.each_line do |line|
|
|
132
|
+
if Time.current - last_beat > HEARTBEAT_EVERY
|
|
133
|
+
# Heartbeat + live token tally: tokens survive even if this
|
|
134
|
+
# segment is killed before its result event.
|
|
135
|
+
run.update_columns(heartbeat_at: Time.current,
|
|
136
|
+
input_tokens: @base_in + @seg_in,
|
|
137
|
+
output_tokens: @base_out + @seg_out)
|
|
138
|
+
last_beat = Time.current
|
|
139
|
+
end
|
|
140
|
+
begin
|
|
141
|
+
handle_stream_event(JSON.parse(line), result)
|
|
142
|
+
rescue JSON::ParserError
|
|
143
|
+
next
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
drain.join(1)
|
|
147
|
+
watchdog.kill
|
|
148
|
+
result[:exit_status] = wait.value
|
|
149
|
+
result[:stderr] = err_lines.join(" | ")
|
|
150
|
+
result[:timed_out] = @timed_out
|
|
151
|
+
result[:timeout_min] = timeout_min
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
mode == "execute" ? conclude_execute(workspace, result) : conclude_plan(result)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def handle_stream_event(json, result)
|
|
158
|
+
case json["type"]
|
|
159
|
+
when "system"
|
|
160
|
+
if json["subtype"] == "init"
|
|
161
|
+
run.update_columns(external_session_id: json["session_id"]) if json["session_id"].present?
|
|
162
|
+
card.log!("progress", actor: "agent", run: run, text: "Agent session started (#{json["model"]})")
|
|
163
|
+
end
|
|
164
|
+
when "assistant"
|
|
165
|
+
if (usage = json.dig("message", "usage"))
|
|
166
|
+
@seg_in += usage["input_tokens"].to_i
|
|
167
|
+
@seg_out += usage["output_tokens"].to_i
|
|
168
|
+
end
|
|
169
|
+
Array(json.dig("message", "content")).each do |block|
|
|
170
|
+
case block["type"]
|
|
171
|
+
when "text"
|
|
172
|
+
card.log!("progress", actor: "agent", run: run, text: block["text"].to_s.truncate(400)) if block["text"].present?
|
|
173
|
+
when "tool_use"
|
|
174
|
+
card.log!("tool_call", actor: "agent", run: run,
|
|
175
|
+
text: "#{block["name"]}: #{block["input"].to_json.truncate(160)}")
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
when "result"
|
|
179
|
+
result[:success] = json["subtype"] == "success" && !json["is_error"]
|
|
180
|
+
result[:subtype] = json["subtype"]
|
|
181
|
+
result[:report] = json["result"].to_s
|
|
182
|
+
result[:cost] = json["total_cost_usd"]
|
|
183
|
+
result[:turns] = json["num_turns"]
|
|
184
|
+
result[:input_tokens] = json.dig("usage", "input_tokens")
|
|
185
|
+
result[:output_tokens] = json.dig("usage", "output_tokens")
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def conclude_plan(result)
|
|
190
|
+
accumulate_usage(result)
|
|
191
|
+
unless result[:success] && result[:report].present?
|
|
192
|
+
# Turn-capped mid-exploration: one tool-less wrap-up pass to force the
|
|
193
|
+
# plan out of the context it already gathered.
|
|
194
|
+
if result[:subtype] == "error_max_turns" && run.external_session_id.present? && !@plan_wrap_attempted
|
|
195
|
+
@plan_wrap_attempted = true
|
|
196
|
+
card.log!("progress", actor: "agent", run: run, text: "Hit the exploration budget — wrapping up the plan from what was learned")
|
|
197
|
+
return stream_agent(prompt: "You have hit your exploration limit. Present your best plan-of-attack now, using only what you have already learned. Do not use any tools.",
|
|
198
|
+
mode: "plan_wrap", resuming: true)
|
|
199
|
+
end
|
|
200
|
+
return record_failure(RuntimeError.new("plan phase failed — #{failure_reason(result)}"))
|
|
201
|
+
end
|
|
202
|
+
park!("plan_proposed", result[:report],
|
|
203
|
+
note: "Plan proposed — approve it in the work panel, or reply to redirect.")
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def conclude_execute(workspace, result)
|
|
207
|
+
accumulate_usage(result)
|
|
208
|
+
unless result[:success]
|
|
209
|
+
# Budget exhaustion isn't failure — park and offer to continue (§8).
|
|
210
|
+
# The session survives; an answer resumes it with a fresh turn budget.
|
|
211
|
+
if result[:subtype] == "error_max_turns" && run.external_session_id.present?
|
|
212
|
+
commits = salvage_commits(workspace)
|
|
213
|
+
return park!("question",
|
|
214
|
+
"I've used this segment's turn budget mid-work#{" — #{commits} commit(s) so far are saved to the branch" if commits.to_i.positive?}. Reply (anything) to continue with a fresh budget, or cancel the run.",
|
|
215
|
+
note: "Agent paused at the turn budget — reply on the card to continue.")
|
|
216
|
+
end
|
|
217
|
+
salvage_commits(workspace)
|
|
218
|
+
return record_failure(RuntimeError.new(failure_reason(result)))
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
report = result[:report].to_s
|
|
222
|
+
if report.lstrip.start_with?("QUESTION:")
|
|
223
|
+
return park!("question", report.lstrip.delete_prefix("QUESTION:").strip,
|
|
224
|
+
note: "Agent is waiting on your answer — reply on the card.")
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
commits = workspace.commits_since(base_sha)
|
|
228
|
+
if commits.any?
|
|
229
|
+
workspace.push!
|
|
230
|
+
ensure_pull_request(workspace)
|
|
231
|
+
run.artifacts.create!(kind: "pull_request", name: "PR for #{card.branch_name}",
|
|
232
|
+
payload: { url: card.pr_url, commits: commits })
|
|
233
|
+
elsif card.pr_url.blank? && workspace.ahead_of_default?
|
|
234
|
+
# No new commits this run, but the branch carries earlier work (e.g. a
|
|
235
|
+
# salvage commit) that still needs a PR.
|
|
236
|
+
workspace.push!
|
|
237
|
+
ensure_pull_request(workspace)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
run.update!(status: "succeeded", finished_at: Time.current,
|
|
241
|
+
result_summary: report.presence&.truncate(2000))
|
|
242
|
+
card.log!("final_report", actor: "agent", run: run,
|
|
243
|
+
text: [report.presence || "Run finished with no report.",
|
|
244
|
+
commits.any? ? "\n**Commits (#{commits.size}):**\n#{commits.map { |c| "- #{c}" }.join("\n")}" : "\n_No commits were made._"].join("\n"))
|
|
245
|
+
card.update!(status: "work_complete")
|
|
246
|
+
card.log!("run_finished", run: run,
|
|
247
|
+
text: "Run succeeded — #{result[:turns]} turns, $#{run.cost.round(2)} total")
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def park!(kind, text, note:)
|
|
251
|
+
run.update!(status: "needs_input")
|
|
252
|
+
card.log!(kind, actor: "agent", run: run, text: text)
|
|
253
|
+
card.update!(status: "needs_input")
|
|
254
|
+
card.log!("status_change", run: run, text: note)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Say WHY, not just that it died: timeout vs turn cap vs error vs crash.
|
|
258
|
+
def failure_reason(result)
|
|
259
|
+
return "timed out after #{result[:timeout_min]} minutes and was stopped — raise the column's timeout for bigger tasks, or split the card" if result[:timed_out]
|
|
260
|
+
return "hit this segment's max-turns budget — raise Max turns in the column's gear settings, or split the card" if result[:subtype] == "error_max_turns"
|
|
261
|
+
parts = ["agent did not finish cleanly (exit #{result[:exit_status]&.exitstatus || "?"})"]
|
|
262
|
+
parts << "last output: #{result[:report].truncate(300)}" if result[:report].present?
|
|
263
|
+
parts << "stderr: #{result[:stderr].truncate(300)}" if result[:stderr].present?
|
|
264
|
+
parts.join(" — ")
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# A failed/timed-out segment may still hold real local commits; push them
|
|
268
|
+
# so the branch (and any PR) keeps the partial progress instead of the
|
|
269
|
+
# next provision's reset wiping it.
|
|
270
|
+
def salvage_commits(workspace)
|
|
271
|
+
commits = workspace.commits_since(base_sha)
|
|
272
|
+
return 0 if commits.empty?
|
|
273
|
+
workspace.push!
|
|
274
|
+
card.log!("progress", run: run, text: "Partial work preserved: #{commits.size} commit(s) pushed to #{card.branch_name}")
|
|
275
|
+
commits.size
|
|
276
|
+
rescue => e
|
|
277
|
+
card.log!("progress", run: run, text: "Could not preserve partial work: #{e.message.truncate(120)}")
|
|
278
|
+
0
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def record_failure(error)
|
|
282
|
+
run.update!(status: "failed", finished_at: Time.current,
|
|
283
|
+
result_summary: error.message.truncate(500))
|
|
284
|
+
card.update!(status: "failed")
|
|
285
|
+
card.log!("error", run: run, text: "Run failed: #{error.message.truncate(500)}")
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# Cost/tokens accumulate across segments of the same run (plan + execute +
|
|
289
|
+
# resumes). Tokens were live-tallied during the stream; the result event's
|
|
290
|
+
# figures are authoritative when present, the live tally is the fallback
|
|
291
|
+
# for killed segments (which never emit a result).
|
|
292
|
+
def accumulate_usage(result)
|
|
293
|
+
base_in = @base_in || run.input_tokens
|
|
294
|
+
base_out = @base_out || run.output_tokens
|
|
295
|
+
run.update!(cost: run.cost + (result[:cost] || 0),
|
|
296
|
+
input_tokens: base_in + (result[:input_tokens] || @seg_in || 0),
|
|
297
|
+
output_tokens: base_out + (result[:output_tokens] || @seg_out || 0))
|
|
298
|
+
@base_in = @base_out = @seg_in = @seg_out = nil
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def remember_base_sha(workspace)
|
|
302
|
+
return if run.briefing["base_sha"].present?
|
|
303
|
+
run.update!(briefing: run.briefing.merge("base_sha" => workspace.head))
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def base_sha = run.briefing.fetch("base_sha")
|
|
307
|
+
|
|
308
|
+
def ensure_pull_request(workspace)
|
|
309
|
+
return if card.pr_url.present?
|
|
310
|
+
out, status = Open3.capture2e(
|
|
311
|
+
"gh", "pr", "create", "--draft",
|
|
312
|
+
"--head", card.branch_name,
|
|
313
|
+
"--title", "##{card.number} #{card.title}",
|
|
314
|
+
"--body", "Automated work by Cardinal card ##{card.number}'s agent.\n\n#{card.description}",
|
|
315
|
+
chdir: workspace.path.to_s
|
|
316
|
+
)
|
|
317
|
+
if status.success? && (url = out[%r{https://github\.com/\S+/pull/\d+}])
|
|
318
|
+
card.update!(pr_url: url, pr_state: "draft")
|
|
319
|
+
card.log!("artifact_created", run: run, text: "Draft PR opened: #{url}")
|
|
320
|
+
else
|
|
321
|
+
card.log!("progress", run: run, text: "Branch pushed (PR not created: #{out.truncate(120)})")
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def briefing_prompt
|
|
326
|
+
<<~PROMPT
|
|
327
|
+
You are the dedicated worker agent for card ##{card.number} of the Cardinal board: "#{card.title}".
|
|
328
|
+
|
|
329
|
+
## Brief
|
|
330
|
+
#{card.description.presence || "(no description — infer scope from the title and conversation)"}
|
|
331
|
+
|
|
332
|
+
#{"## Brief from planning (authoritative — refined with the user)\n#{planning_brief}\n" if planning_brief}
|
|
333
|
+
## Card conversation so far
|
|
334
|
+
#{conversation_excerpt.presence || "(none)"}
|
|
335
|
+
|
|
336
|
+
#{"## Column instructions\n#{column.instructions}\n" if column.instructions.present?}
|
|
337
|
+
#{EXECUTE_RULES}
|
|
338
|
+
PROMPT
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def plan_prompt
|
|
342
|
+
<<~PROMPT
|
|
343
|
+
You are the dedicated worker agent for card ##{card.number} of the Cardinal board: "#{card.title}".
|
|
344
|
+
You are in READ-ONLY PLAN MODE. Do not modify anything yet.
|
|
345
|
+
|
|
346
|
+
## Brief
|
|
347
|
+
#{card.description.presence || "(no description — infer scope from the title and conversation)"}
|
|
348
|
+
|
|
349
|
+
#{"## Brief from planning (authoritative — refined with the user)\n#{planning_brief}\n" if planning_brief}
|
|
350
|
+
## Card conversation so far
|
|
351
|
+
#{conversation_excerpt.presence || "(none)"}
|
|
352
|
+
|
|
353
|
+
#{"## Column instructions\n#{column.instructions}\n" if column.instructions.present?}
|
|
354
|
+
Explore the repository as needed, then present a short numbered plan-of-attack
|
|
355
|
+
(files you'll touch, approach, how you'll verify) and stop. The user will approve
|
|
356
|
+
or redirect before any changes are made.
|
|
357
|
+
|
|
358
|
+
IMPORTANT: you are read-only ONLY during this planning pass. Once the plan is
|
|
359
|
+
approved you will have the full toolset — shell, git, file editing — in this same
|
|
360
|
+
session. Plan every step as something YOU will do (including git merges and
|
|
361
|
+
running commands); never ask who should run a command or plan around not having
|
|
362
|
+
a shell.
|
|
363
|
+
PROMPT
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# The planning assistant's distilled "Ready for execution" brief, if the
|
|
367
|
+
# conversation produced one — the most load-bearing artifact of planning.
|
|
368
|
+
def planning_brief
|
|
369
|
+
return @planning_brief if defined?(@planning_brief)
|
|
370
|
+
@planning_brief = card.events.where(kind: "assistant_message")
|
|
371
|
+
.order(:id).filter_map(&:text).reverse
|
|
372
|
+
.find { |t| t.match?(/ready for execution/i) }
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
def conversation_excerpt
|
|
376
|
+
card.events.conversation.filter_map { |e| "#{e.actor}: #{e.text}" if e.text }.last(30).join("\n")
|
|
377
|
+
end
|
|
378
|
+
end
|
|
379
|
+
end
|