rubyn-code 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +269 -467
  3. data/db/migrations/009_create_teams.sql +6 -6
  4. data/db/migrations/011_fix_mailbox_messages_columns.rb +35 -0
  5. data/db/migrations/012_expand_mailbox_message_types.rb +37 -0
  6. data/exe/rubyn-code +1 -1
  7. data/lib/rubyn_code/agent/RUBYN.md +17 -0
  8. data/lib/rubyn_code/agent/conversation.rb +68 -19
  9. data/lib/rubyn_code/agent/loop.rb +312 -54
  10. data/lib/rubyn_code/agent/loop_detector.rb +6 -6
  11. data/lib/rubyn_code/auth/RUBYN.md +19 -0
  12. data/lib/rubyn_code/auth/oauth.rb +40 -35
  13. data/lib/rubyn_code/auth/server.rb +16 -12
  14. data/lib/rubyn_code/auth/token_store.rb +22 -22
  15. data/lib/rubyn_code/autonomous/RUBYN.md +14 -0
  16. data/lib/rubyn_code/autonomous/daemon.rb +115 -79
  17. data/lib/rubyn_code/autonomous/idle_poller.rb +4 -8
  18. data/lib/rubyn_code/autonomous/task_claimer.rb +11 -11
  19. data/lib/rubyn_code/background/RUBYN.md +13 -0
  20. data/lib/rubyn_code/background/notifier.rb +0 -2
  21. data/lib/rubyn_code/background/worker.rb +60 -15
  22. data/lib/rubyn_code/cli/RUBYN.md +30 -0
  23. data/lib/rubyn_code/cli/app.rb +85 -9
  24. data/lib/rubyn_code/cli/commands/RUBYN.md +133 -0
  25. data/lib/rubyn_code/cli/commands/base.rb +53 -0
  26. data/lib/rubyn_code/cli/commands/budget.rb +24 -0
  27. data/lib/rubyn_code/cli/commands/clear.rb +16 -0
  28. data/lib/rubyn_code/cli/commands/compact.rb +21 -0
  29. data/lib/rubyn_code/cli/commands/context.rb +44 -0
  30. data/lib/rubyn_code/cli/commands/context_info.rb +56 -0
  31. data/lib/rubyn_code/cli/commands/cost.rb +23 -0
  32. data/lib/rubyn_code/cli/commands/diff.rb +30 -0
  33. data/lib/rubyn_code/cli/commands/doctor.rb +112 -0
  34. data/lib/rubyn_code/cli/commands/help.rb +41 -0
  35. data/lib/rubyn_code/cli/commands/model.rb +37 -0
  36. data/lib/rubyn_code/cli/commands/plan.rb +22 -0
  37. data/lib/rubyn_code/cli/commands/quit.rb +17 -0
  38. data/lib/rubyn_code/cli/commands/registry.rb +64 -0
  39. data/lib/rubyn_code/cli/commands/resume.rb +51 -0
  40. data/lib/rubyn_code/cli/commands/review.rb +26 -0
  41. data/lib/rubyn_code/cli/commands/skill.rb +32 -0
  42. data/lib/rubyn_code/cli/commands/spawn.rb +24 -0
  43. data/lib/rubyn_code/cli/commands/tasks.rb +32 -0
  44. data/lib/rubyn_code/cli/commands/tokens.rb +76 -0
  45. data/lib/rubyn_code/cli/commands/undo.rb +17 -0
  46. data/lib/rubyn_code/cli/commands/version.rb +16 -0
  47. data/lib/rubyn_code/cli/daemon_runner.rb +129 -0
  48. data/lib/rubyn_code/cli/input_handler.rb +20 -23
  49. data/lib/rubyn_code/cli/renderer.rb +25 -27
  50. data/lib/rubyn_code/cli/repl.rb +161 -194
  51. data/lib/rubyn_code/cli/setup.rb +117 -0
  52. data/lib/rubyn_code/cli/spinner.rb +40 -40
  53. data/lib/rubyn_code/cli/stream_formatter.rb +29 -28
  54. data/lib/rubyn_code/cli/version_check.rb +94 -0
  55. data/lib/rubyn_code/config/RUBYN.md +14 -0
  56. data/lib/rubyn_code/config/defaults.rb +28 -19
  57. data/lib/rubyn_code/config/project_config.rb +7 -9
  58. data/lib/rubyn_code/config/settings.rb +3 -3
  59. data/lib/rubyn_code/context/RUBYN.md +20 -0
  60. data/lib/rubyn_code/context/auto_compact.rb +7 -7
  61. data/lib/rubyn_code/context/compactor.rb +2 -2
  62. data/lib/rubyn_code/context/context_collapse.rb +45 -0
  63. data/lib/rubyn_code/context/manager.rb +20 -3
  64. data/lib/rubyn_code/context/manual_compact.rb +7 -7
  65. data/lib/rubyn_code/context/micro_compact.rb +12 -12
  66. data/lib/rubyn_code/db/RUBYN.md +40 -0
  67. data/lib/rubyn_code/db/connection.rb +13 -13
  68. data/lib/rubyn_code/db/migrator.rb +67 -27
  69. data/lib/rubyn_code/db/schema.rb +6 -6
  70. data/lib/rubyn_code/debug.rb +74 -0
  71. data/lib/rubyn_code/hooks/RUBYN.md +17 -0
  72. data/lib/rubyn_code/hooks/built_in.rb +9 -9
  73. data/lib/rubyn_code/hooks/registry.rb +5 -5
  74. data/lib/rubyn_code/hooks/runner.rb +1 -1
  75. data/lib/rubyn_code/hooks/user_hooks.rb +16 -16
  76. data/lib/rubyn_code/learning/RUBYN.md +16 -0
  77. data/lib/rubyn_code/learning/extractor.rb +22 -22
  78. data/lib/rubyn_code/learning/injector.rb +17 -18
  79. data/lib/rubyn_code/learning/instinct.rb +18 -14
  80. data/lib/rubyn_code/llm/RUBYN.md +15 -0
  81. data/lib/rubyn_code/llm/client.rb +121 -55
  82. data/lib/rubyn_code/llm/message_builder.rb +19 -15
  83. data/lib/rubyn_code/llm/streaming.rb +80 -50
  84. data/lib/rubyn_code/mcp/RUBYN.md +21 -0
  85. data/lib/rubyn_code/mcp/client.rb +25 -24
  86. data/lib/rubyn_code/mcp/config.rb +7 -7
  87. data/lib/rubyn_code/mcp/sse_transport.rb +27 -26
  88. data/lib/rubyn_code/mcp/stdio_transport.rb +22 -19
  89. data/lib/rubyn_code/mcp/tool_bridge.rb +32 -32
  90. data/lib/rubyn_code/memory/RUBYN.md +17 -0
  91. data/lib/rubyn_code/memory/models.rb +3 -3
  92. data/lib/rubyn_code/memory/search.rb +17 -17
  93. data/lib/rubyn_code/memory/session_persistence.rb +49 -34
  94. data/lib/rubyn_code/memory/store.rb +17 -17
  95. data/lib/rubyn_code/observability/RUBYN.md +19 -0
  96. data/lib/rubyn_code/observability/budget_enforcer.rb +16 -15
  97. data/lib/rubyn_code/observability/cost_calculator.rb +3 -3
  98. data/lib/rubyn_code/observability/token_counter.rb +1 -1
  99. data/lib/rubyn_code/observability/usage_reporter.rb +35 -35
  100. data/lib/rubyn_code/output/RUBYN.md +11 -0
  101. data/lib/rubyn_code/output/diff_renderer.rb +6 -6
  102. data/lib/rubyn_code/output/formatter.rb +4 -4
  103. data/lib/rubyn_code/permissions/RUBYN.md +17 -0
  104. data/lib/rubyn_code/permissions/prompter.rb +8 -8
  105. data/lib/rubyn_code/protocols/RUBYN.md +14 -0
  106. data/lib/rubyn_code/protocols/interrupt_handler.rb +1 -1
  107. data/lib/rubyn_code/protocols/plan_approval.rb +9 -9
  108. data/lib/rubyn_code/protocols/shutdown_handshake.rb +9 -11
  109. data/lib/rubyn_code/skills/RUBYN.md +19 -0
  110. data/lib/rubyn_code/skills/catalog.rb +7 -7
  111. data/lib/rubyn_code/skills/document.rb +15 -15
  112. data/lib/rubyn_code/skills/loader.rb +6 -8
  113. data/lib/rubyn_code/sub_agents/RUBYN.md +12 -0
  114. data/lib/rubyn_code/sub_agents/runner.rb +15 -15
  115. data/lib/rubyn_code/sub_agents/summarizer.rb +1 -1
  116. data/lib/rubyn_code/tasks/RUBYN.md +13 -0
  117. data/lib/rubyn_code/tasks/dag.rb +12 -16
  118. data/lib/rubyn_code/tasks/manager.rb +24 -24
  119. data/lib/rubyn_code/tasks/models.rb +4 -4
  120. data/lib/rubyn_code/teams/RUBYN.md +14 -0
  121. data/lib/rubyn_code/teams/mailbox.rb +38 -18
  122. data/lib/rubyn_code/teams/manager.rb +19 -19
  123. data/lib/rubyn_code/teams/teammate.rb +3 -4
  124. data/lib/rubyn_code/tools/RUBYN.md +38 -0
  125. data/lib/rubyn_code/tools/background_run.rb +9 -11
  126. data/lib/rubyn_code/tools/base.rb +54 -3
  127. data/lib/rubyn_code/tools/bash.rb +16 -34
  128. data/lib/rubyn_code/tools/bundle_add.rb +10 -12
  129. data/lib/rubyn_code/tools/bundle_install.rb +9 -11
  130. data/lib/rubyn_code/tools/compact.rb +10 -9
  131. data/lib/rubyn_code/tools/db_migrate.rb +17 -15
  132. data/lib/rubyn_code/tools/edit_file.rb +12 -12
  133. data/lib/rubyn_code/tools/executor.rb +9 -4
  134. data/lib/rubyn_code/tools/git_commit.rb +29 -34
  135. data/lib/rubyn_code/tools/git_diff.rb +17 -18
  136. data/lib/rubyn_code/tools/git_log.rb +17 -19
  137. data/lib/rubyn_code/tools/git_status.rb +18 -20
  138. data/lib/rubyn_code/tools/glob.rb +7 -9
  139. data/lib/rubyn_code/tools/grep.rb +11 -9
  140. data/lib/rubyn_code/tools/load_skill.rb +7 -7
  141. data/lib/rubyn_code/tools/memory_search.rb +13 -12
  142. data/lib/rubyn_code/tools/memory_write.rb +14 -12
  143. data/lib/rubyn_code/tools/rails_generate.rb +16 -16
  144. data/lib/rubyn_code/tools/read_file.rb +8 -7
  145. data/lib/rubyn_code/tools/read_inbox.rb +5 -5
  146. data/lib/rubyn_code/tools/registry.rb +2 -2
  147. data/lib/rubyn_code/tools/review_pr.rb +55 -55
  148. data/lib/rubyn_code/tools/run_specs.rb +20 -19
  149. data/lib/rubyn_code/tools/schema.rb +9 -11
  150. data/lib/rubyn_code/tools/send_message.rb +10 -10
  151. data/lib/rubyn_code/tools/spawn_agent.rb +51 -23
  152. data/lib/rubyn_code/tools/spawn_teammate.rb +21 -21
  153. data/lib/rubyn_code/tools/task.rb +28 -28
  154. data/lib/rubyn_code/tools/web_fetch.rb +46 -31
  155. data/lib/rubyn_code/tools/web_search.rb +64 -66
  156. data/lib/rubyn_code/tools/write_file.rb +7 -6
  157. data/lib/rubyn_code/version.rb +1 -1
  158. data/lib/rubyn_code.rb +136 -105
  159. metadata +94 -21
@@ -47,8 +47,12 @@ module RubynCode
47
47
  @on_text = on_text
48
48
  @skill_loader = skill_loader
49
49
  @project_root = project_root
50
+ @plan_mode = false
50
51
  end
51
52
 
53
+ # @return [Boolean]
54
+ attr_accessor :plan_mode
55
+
52
56
  # Send a user message and run the agent loop until a final text response
53
57
  # is produced or the iteration limit is reached.
54
58
  #
@@ -56,23 +60,59 @@ module RubynCode
56
60
  # @return [String] the final assistant text response
57
61
  def send_message(user_input)
58
62
  check_user_feedback(user_input)
63
+
64
+ # Drain any completed background jobs BEFORE adding the user message,
65
+ # so the LLM sees the results in the right order
66
+ drain_background_notifications
67
+
59
68
  @conversation.add_user_message(user_input)
69
+ @max_tokens_override = nil
70
+ @output_recovery_count = 0
71
+ @task_budget_remaining = nil
60
72
 
61
73
  MAX_ITERATIONS.times do |iteration|
74
+ RubynCode::Debug.loop_tick("iteration=#{iteration} messages=#{@conversation.length} max_tokens_override=#{@max_tokens_override || 'default'}")
75
+
62
76
  response = call_llm
63
77
  tool_calls = extract_tool_calls(response)
78
+ stop_reason = response.respond_to?(:stop_reason) ? response.stop_reason : nil
79
+
80
+ RubynCode::Debug.llm("stop_reason=#{stop_reason} tool_calls=#{tool_calls.size} content_blocks=#{get_content(response).size}")
64
81
 
65
82
  if tool_calls.empty?
83
+ if truncated?(response)
84
+ RubynCode::Debug.recovery('Text response truncated, entering recovery')
85
+ response = recover_truncated_response(response)
86
+ end
87
+
88
+ # If background jobs are running, wait for them instead of burning LLM calls
89
+ if has_pending_background_jobs?
90
+ @conversation.add_assistant_message(response_content(response))
91
+ wait_for_background_jobs
92
+ next
93
+ end
94
+
66
95
  @conversation.add_assistant_message(response_content(response))
67
96
  return extract_response_text(response)
68
97
  end
69
98
 
99
+ # Tier 1: If a tool-use response was truncated, silently escalate and retry
100
+ if truncated?(response) && !@max_tokens_override
101
+ RubynCode::Debug.recovery("Tier 1: Escalating max_tokens from #{Config::Defaults::CAPPED_MAX_OUTPUT_TOKENS} to #{Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS}")
102
+ @max_tokens_override = Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS
103
+ next
104
+ end
105
+
70
106
  @conversation.add_assistant_message(get_content(response))
71
107
  process_tool_calls(tool_calls)
72
108
 
109
+ # Drain notifications after tool execution — jobs may have finished
110
+ drain_background_notifications
111
+
73
112
  run_maintenance(iteration)
74
113
  end
75
114
 
115
+ RubynCode::Debug.warn("Hit MAX_ITERATIONS (#{MAX_ITERATIONS})")
76
116
  max_iterations_warning
77
117
  end
78
118
 
@@ -80,25 +120,42 @@ module RubynCode
80
120
 
81
121
  # ── LLM interaction ──────────────────────────────────────────────
82
122
 
123
+ TASK_BUDGET_TOTAL = 100_000 # tokens per user message
124
+
83
125
  def call_llm
84
126
  @hook_runner.fire(:pre_llm_call, conversation: @conversation)
85
127
 
86
- drain_background_notifications
87
-
88
- response = @llm_client.chat(
128
+ opts = {
89
129
  messages: @conversation.to_api_format,
90
- tools: tool_definitions,
130
+ tools: @plan_mode ? read_only_tool_definitions : tool_definitions,
91
131
  system: build_system_prompt,
92
132
  on_text: @on_text
93
- )
133
+ }
134
+ opts[:max_tokens] = @max_tokens_override if @max_tokens_override
135
+
136
+ # Task budget: tell the model how many tokens remain for this task
137
+ opts[:task_budget] = { total: TASK_BUDGET_TOTAL, remaining: @task_budget_remaining } if @task_budget_remaining
138
+
139
+ response = @llm_client.chat(**opts)
94
140
 
141
+ @hook_runner.fire(:post_llm_call, response: response, conversation: @conversation)
142
+ track_usage(response)
143
+ update_task_budget(response)
144
+
145
+ response
146
+ rescue LLM::Client::PromptTooLongError
147
+ # 413: context too large — compact and retry once
148
+ RubynCode::Debug.recovery('413 prompt too long — running emergency compaction')
149
+ @context_manager.check_compaction!(@conversation)
150
+
151
+ response = @llm_client.chat(**opts, messages: @conversation.to_api_format)
95
152
  @hook_runner.fire(:post_llm_call, response: response, conversation: @conversation)
96
153
  track_usage(response)
97
154
 
98
155
  response
99
156
  end
100
157
 
101
- SYSTEM_PROMPT = <<~PROMPT.freeze
158
+ SYSTEM_PROMPT = <<~PROMPT
102
159
  You are Rubyn — a snarky but lovable AI coding assistant who lives and breathes Ruby.
103
160
  You're the kind of pair programmer who'll roast your colleague's `if/elsif/elsif/else` chain
104
161
  with a smirk, then immediately rewrite it as a beautiful `case/in` with pattern matching.
@@ -158,8 +215,11 @@ module RubynCode
158
215
  - Run specs after changes. If they break, fix them.
159
216
  - When you are asked to work in a NEW directory you haven't seen yet, check for RUBYN.md, CLAUDE.md, or AGENT.md there. But don't do this unprompted on startup — those files are already loaded into your context.
160
217
  - Load skills when you need deep knowledge on a topic. Don't wing it.
218
+ - You have 112 curated best-practice skill documents covering Ruby, Rails, RSpec, design patterns, and code quality. When writing new code or reviewing existing code, load the relevant skill BEFORE implementing. Don't reinvent patterns that are already documented.
219
+ - HOWEVER: always respect patterns already established in the codebase. If the project uses a specific convention (e.g. service objects, a particular test style, a custom base class), follow that convention even if it differs from the skill doc. Consistency with the codebase beats textbook best practice. Only break from established patterns if they are genuinely harmful (security issues, major performance problems, or bugs).
161
220
  - Keep responses concise. Code speaks louder than paragraphs.
162
221
  - Use spawn_agent sparingly — only for tasks that require reading many files (10+) or deep exploration. For simple reads or edits, use tools directly. Don't spawn a sub-agent when a single read_file or grep will do.
222
+ - IMPORTANT: You can call MULTIPLE tools in a single response. When you need to read several files, search multiple patterns, or perform independent operations, return all tool_use blocks at once rather than one at a time. This is dramatically faster and cheaper. For example, if you need to read 5 files, emit 5 read_file tool calls in one response — don't read them one by one across 5 turns.
163
223
 
164
224
  ## Memory
165
225
  You have persistent memory across sessions via `memory_write` and `memory_search` tools.
@@ -173,9 +233,29 @@ module RubynCode
173
233
  Categories: user_preference, project_convention, error_resolution, decision, code_pattern
174
234
  PROMPT
175
235
 
236
+ PLAN_MODE_PROMPT = <<~PLAN
237
+ ## 🧠 Plan Mode Active
238
+
239
+ You are in PLAN MODE. This means:
240
+ - Reason through the problem step by step
241
+ - You have READ-ONLY tools available — use them to explore the codebase
242
+ - Read files, grep, glob, check git status/log/diff — gather context
243
+ - Do NOT write, edit, execute, or modify anything
244
+ - Outline your plan with numbered steps
245
+ - Identify files you'd need to read or modify
246
+ - Call out risks, edge cases, and trade-offs
247
+ - Ask clarifying questions if the request is ambiguous
248
+ - When the user is satisfied with the plan, they'll toggle plan mode off with /plan
249
+
250
+ You CAN use read-only tools. You MUST NOT use any tool that writes, edits, or executes.
251
+ PLAN
252
+
253
+ PLAN_MODE_RISK_LEVELS = %i[read].freeze
254
+
176
255
  def build_system_prompt
177
256
  parts = [SYSTEM_PROMPT]
178
257
 
258
+ parts << PLAN_MODE_PROMPT if @plan_mode
179
259
  parts << "Working directory: #{@project_root}" if @project_root
180
260
 
181
261
  # Inject memories from previous sessions
@@ -199,47 +279,61 @@ module RubynCode
199
279
  end
200
280
  end
201
281
 
282
+ # List deferred tools so the LLM knows they exist
283
+ deferred = deferred_tool_names
284
+ unless deferred.empty?
285
+ parts << "\n## Additional Tools Available"
286
+ parts << 'These tools are available but not loaded yet. Just call them by name and they will work:'
287
+ parts << deferred.map { |n| "- #{n}" }.join("\n")
288
+ end
289
+
202
290
  parts.join("\n")
203
291
  end
204
292
 
293
+ def deferred_tool_names
294
+ all_names = @tool_executor.tool_definitions.map { |t| t[:name] || t['name'] }
295
+ active_names = tool_definitions.map { |t| t[:name] || t['name'] }
296
+ all_names - active_names
297
+ end
298
+
205
299
  def load_memories
206
- return "" unless @project_root
300
+ return '' unless @project_root
207
301
 
208
302
  db = DB::Connection.instance
209
303
  search = Memory::Search.new(db, project_path: @project_root)
210
304
  recent = search.recent(limit: 20)
211
305
 
212
- return "" if recent.empty?
306
+ return '' if recent.empty?
213
307
 
214
- recent.map { |m|
215
- category = m.respond_to?(:category) ? m.category : (m[:category] || m["category"])
216
- content = m.respond_to?(:content) ? m.content : (m[:content] || m["content"])
308
+ recent.map do |m|
309
+ category = m.respond_to?(:category) ? m.category : (m[:category] || m['category'])
310
+ content = m.respond_to?(:content) ? m.content : (m[:content] || m['content'])
217
311
  "[#{category}] #{content}"
218
- }.join("\n")
312
+ end.join("\n")
219
313
  rescue StandardError
220
- ""
314
+ ''
221
315
  end
222
316
 
223
317
  def load_instincts
224
- return "" unless @project_root
318
+ return '' unless @project_root
225
319
 
226
320
  db = DB::Connection.instance
227
321
  Learning::Injector.call(db: db, project_path: @project_root)
228
322
  rescue StandardError
229
- ""
323
+ ''
230
324
  end
231
325
 
232
326
  # ── Instinct reinforcement ───────────────────────────────────
233
327
 
234
- POSITIVE_PATTERNS = /\b(yes that fixed it|that worked|perfect|thanks|exactly|great|nailed it|that.s right|correct)\b/i.freeze
235
- NEGATIVE_PATTERNS = /\b(no[, ]+use|wrong|that.s not right|instead use|don.t do that|actually[, ]+use|incorrect)\b/i.freeze
328
+ POSITIVE_PATTERNS = /\b(yes that fixed it|that worked|perfect|thanks|exactly|great|nailed it|that.s right|correct)\b/i
329
+ NEGATIVE_PATTERNS = /\b(no[, ]+use|wrong|that.s not right|instead use|don.t do that|actually[, ]+use|incorrect)\b/i
236
330
 
237
331
  def check_user_feedback(user_input)
238
332
  return unless @project_root
239
333
 
240
334
  db = DB::Connection.instance
241
335
  recent_instincts = db.query(
242
- "SELECT id FROM instincts WHERE project_path = ? ORDER BY updated_at DESC LIMIT 5",
336
+ 'SELECT id FROM instincts WHERE project_path = ? ORDER BY updated_at DESC LIMIT 5',
243
337
  [@project_root]
244
338
  ).to_a
245
339
 
@@ -247,11 +341,11 @@ module RubynCode
247
341
 
248
342
  if user_input.match?(POSITIVE_PATTERNS)
249
343
  recent_instincts.first(2).each do |row|
250
- Learning::InstinctMethods.reinforce_in_db(row["id"], db, helpful: true)
344
+ Learning::InstinctMethods.reinforce_in_db(row['id'], db, helpful: true)
251
345
  end
252
346
  elsif user_input.match?(NEGATIVE_PATTERNS)
253
347
  recent_instincts.first(2).each do |row|
254
- Learning::InstinctMethods.reinforce_in_db(row["id"], db, helpful: false)
348
+ Learning::InstinctMethods.reinforce_in_db(row['id'], db, helpful: false)
255
349
  end
256
350
  end
257
351
  rescue StandardError
@@ -274,31 +368,32 @@ module RubynCode
274
368
  INSTRUCTION_FILES.each do |name|
275
369
  collect_instruction(File.join(@project_root, name), found)
276
370
  end
277
- collect_instruction(File.join(@project_root, ".rubyn-code", "RUBYN.md"), found)
371
+ collect_instruction(File.join(@project_root, '.rubyn-code', 'RUBYN.md'), found)
278
372
 
279
373
  # One level of child directories
280
374
  INSTRUCTION_FILES.each do |name|
281
- Dir.glob(File.join(@project_root, "*", name)).each do |path|
375
+ Dir.glob(File.join(@project_root, '*', name)).each do |path|
282
376
  collect_instruction(path, found)
283
377
  end
284
378
  end
285
379
  end
286
380
 
287
381
  # User global
288
- collect_instruction(File.join(Config::Defaults::HOME_DIR, "RUBYN.md"), found)
382
+ collect_instruction(File.join(Config::Defaults::HOME_DIR, 'RUBYN.md'), found)
289
383
 
290
384
  found.uniq.join("\n\n")
291
385
  end
292
386
 
293
387
  def walk_up_for_instructions(start_dir, found)
294
388
  dir = File.dirname(start_dir)
295
- home = File.expand_path("~")
389
+ home = File.expand_path('~')
296
390
 
297
391
  while dir.length >= home.length
298
392
  INSTRUCTION_FILES.each do |name|
299
393
  collect_instruction(File.join(dir, name), found)
300
394
  end
301
395
  break if dir == home
396
+
302
397
  dir = File.dirname(dir)
303
398
  end
304
399
  end
@@ -306,21 +401,69 @@ module RubynCode
306
401
  def collect_instruction(path, found)
307
402
  return unless File.exist?(path) && File.file?(path)
308
403
 
309
- content = File.read(path, encoding: "utf-8")
310
- .encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
404
+ content = File.read(path, encoding: 'utf-8')
405
+ .encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
311
406
  .strip
312
407
  return if content.empty?
313
408
 
314
409
  found << "# From #{path}\n#{content}"
315
410
  end
316
411
 
412
+ # Core tools always included. Others load on first use.
413
+ CORE_TOOLS = %w[
414
+ read_file write_file edit_file glob grep bash
415
+ spawn_agent background_run
416
+ ].freeze
417
+
317
418
  def tool_definitions
318
- @tool_executor.tool_definitions
419
+ all_tools = @tool_executor.tool_definitions
420
+ return all_tools if all_tools.size <= CORE_TOOLS.size
421
+
422
+ @discovered_tools ||= Set.new
423
+
424
+ all_tools.select do |t|
425
+ name = t[:name] || t['name']
426
+ CORE_TOOLS.include?(name) || @discovered_tools.include?(name)
427
+ end
428
+ end
429
+
430
+ def discover_tool(name)
431
+ @discovered_tools ||= Set.new
432
+ @discovered_tools.add(name)
433
+ end
434
+
435
+ def read_only_tool_definitions
436
+ Tools::Registry.all
437
+ .select { |t| PLAN_MODE_RISK_LEVELS.include?(t::RISK_LEVEL) }
438
+ .map(&:to_schema)
439
+ end
440
+
441
+ # ── Background job waiting ────────────────────────────────────────
442
+
443
+ def wait_for_background_jobs
444
+ max_wait = 300 # 5 minutes max
445
+ poll_interval = 3
446
+
447
+ RubynCode::Debug.agent("Waiting for background jobs to finish (polling every #{poll_interval}s, max #{max_wait}s)")
448
+
449
+ elapsed = 0
450
+ while elapsed < max_wait && has_pending_background_jobs?
451
+ sleep poll_interval
452
+ elapsed += poll_interval
453
+ drain_background_notifications
454
+ end
455
+
456
+ # Final drain to pick up any last results
457
+ drain_background_notifications
458
+ RubynCode::Debug.agent("Background wait done (#{elapsed}s)")
319
459
  end
320
460
 
321
461
  # ── Tool processing ──────────────────────────────────────────────
322
462
 
323
463
  def process_tool_calls(tool_calls)
464
+ aggregate_chars = 0
465
+ budget = Config::Defaults::MAX_MESSAGE_TOOL_RESULTS_CHARS
466
+
324
467
  tool_calls.each do |tool_call|
325
468
  tool_name = field(tool_call, :name)
326
469
  tool_input = field(tool_call, :input) || {}
@@ -333,18 +476,37 @@ module RubynCode
333
476
  deny_list: @deny_list
334
477
  )
335
478
 
336
- @on_tool_call&.call(tool_name, tool_input)
479
+ begin
480
+ @on_tool_call&.call(tool_name, tool_input)
481
+ rescue StandardError
482
+ nil
483
+ end
337
484
 
338
485
  result, is_error = execute_with_permission(decision, tool_name, tool_input, tool_id)
339
486
 
340
- @on_tool_result&.call(tool_name, result, is_error)
487
+ # Enforce per-message aggregate tool result budget
488
+ aggregate_chars += result.to_s.length
489
+ if aggregate_chars > budget
490
+ remaining = [budget - (aggregate_chars - result.to_s.length), 500].max
491
+ result = "#{result.to_s[0,
492
+ remaining]}\n\n[truncated — tool result budget exceeded (#{budget} chars/message)]"
493
+ RubynCode::Debug.token("Tool result budget exceeded: #{aggregate_chars}/#{budget} chars")
494
+ end
495
+
496
+ begin
497
+ @on_tool_result&.call(tool_name, result, is_error)
498
+ rescue StandardError
499
+ nil
500
+ end
341
501
 
342
502
  @stall_detector.record(tool_name, tool_input)
503
+ # CRITICAL: always add tool_result to conversation — without this the
504
+ # API will reject the next request with "tool_use without tool_result"
343
505
  @conversation.add_tool_result(tool_id, tool_name, result, is_error: is_error)
344
506
  end
345
507
  end
346
508
 
347
- def execute_with_permission(decision, tool_name, tool_input, tool_id)
509
+ def execute_with_permission(decision, tool_name, tool_input, _tool_id)
348
510
  case decision
349
511
  when :deny
350
512
  ["Tool '#{tool_name}' is blocked by the deny list.", true]
@@ -362,9 +524,12 @@ module RubynCode
362
524
  end
363
525
 
364
526
  def execute_tool(tool_name, tool_input)
527
+ # Auto-discover tools on first use so they appear in future calls
528
+ discover_tool(tool_name)
529
+
365
530
  @hook_runner.fire(:pre_tool_use, tool_name: tool_name, tool_input: tool_input)
366
531
 
367
- result = @tool_executor.execute(tool_name, **symbolize_keys(tool_input))
532
+ result = @tool_executor.execute(tool_name, symbolize_keys(tool_input))
368
533
  @hook_runner.fire(:post_tool_use, tool_name: tool_name, tool_input: tool_input, result: result)
369
534
 
370
535
  [result.to_s, false]
@@ -391,23 +556,26 @@ module RubynCode
391
556
 
392
557
  # ── Maintenance ──────────────────────────────────────────────────
393
558
 
394
- def run_maintenance(iteration)
395
- run_micro_compact
396
- check_auto_compact
559
+ def run_maintenance(_iteration)
560
+ run_compaction
397
561
  check_budget
398
562
  check_stall_detection
399
563
  end
400
564
 
401
- def run_micro_compact
402
- @context_manager.micro_compact(@conversation)
403
- rescue NoMethodError
404
- # micro_compact not yet implemented on context_manager
405
- end
565
+ def run_compaction
566
+ before = @conversation.length
567
+ est = @context_manager.estimated_tokens(@conversation.messages)
568
+ RubynCode::Debug.token("context=#{est} tokens (~#{before} messages, threshold=#{Config::Defaults::CONTEXT_THRESHOLD_TOKENS})")
569
+
570
+ @context_manager.check_compaction!(@conversation)
406
571
 
407
- def check_auto_compact
408
- @context_manager.auto_compact(@conversation)
572
+ after = @conversation.length
573
+ if after < before
574
+ new_est = @context_manager.estimated_tokens(@conversation.messages)
575
+ RubynCode::Debug.loop_tick("Compacted: #{before} -> #{after} messages (#{est} -> #{new_est} tokens)")
576
+ end
409
577
  rescue NoMethodError
410
- # auto_compact not yet implemented on context_manager
578
+ # context_manager does not implement check_compaction! yet
411
579
  end
412
580
 
413
581
  def check_budget
@@ -434,16 +602,86 @@ module RubynCode
434
602
  notifications = @background_manager.drain_notifications
435
603
  return if notifications.nil? || notifications.empty?
436
604
 
437
- summary = notifications.map(&:to_s).join("\n")
438
- @conversation.add_user_message("[Background notifications]\n#{summary}")
605
+ summary = notifications.map { |n| format_background_notification(n) }.join("\n\n")
606
+ @conversation.add_user_message("[Background job results]\n#{summary}")
439
607
  rescue NoMethodError
440
608
  # background_manager does not support drain_notifications yet
441
609
  end
442
610
 
611
+ def has_pending_background_jobs?
612
+ return false unless @background_manager
613
+
614
+ @background_manager.active_count.positive?
615
+ rescue NoMethodError
616
+ false
617
+ end
618
+
619
+ def format_background_notification(notification)
620
+ case notification
621
+ when Hash
622
+ status = notification[:status] || 'unknown'
623
+ job_id = notification[:job_id]&.[](0..7) || 'unknown'
624
+ duration = notification[:duration] ? "#{'%.1f' % notification[:duration]}s" : 'unknown'
625
+ result = notification[:result] || '(no output)'
626
+ "Job #{job_id} [#{status}] (#{duration}):\n#{result}"
627
+ else
628
+ notification.to_s
629
+ end
630
+ end
631
+
632
+ # ── Output token recovery (3-tier, matches Claude Code) ──────────
633
+ #
634
+ # Tier 1: Silent escalation (8K → 32K) — handled in send_message
635
+ # Tier 2: Multi-turn recovery — inject continuation message, retry up to 3x
636
+ # Tier 3: Surface what we have — return partial response after exhausting retries
637
+
638
+ def truncated?(response)
639
+ reason = if response.respond_to?(:stop_reason)
640
+ response.stop_reason
641
+ elsif response.is_a?(Hash)
642
+ response[:stop_reason] || response['stop_reason']
643
+ end
644
+ reason == 'max_tokens'
645
+ end
646
+
647
+ def recover_truncated_response(response)
648
+ @max_tokens_override ||= Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS
649
+
650
+ @conversation.add_assistant_message(response_content(response))
651
+
652
+ max_retries = Config::Defaults::MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
653
+
654
+ max_retries.times do |attempt|
655
+ @output_recovery_count += 1
656
+ RubynCode::Debug.recovery("Tier 2: Recovery attempt #{attempt + 1}/#{max_retries}")
657
+
658
+ @conversation.add_user_message(
659
+ 'Output token limit hit. Resume directly — no apology, no recap, ' \
660
+ 'just continue exactly where you left off.'
661
+ )
662
+
663
+ response = call_llm
664
+
665
+ unless truncated?(response)
666
+ RubynCode::Debug.recovery("Recovery successful on attempt #{attempt + 1}")
667
+ break
668
+ end
669
+
670
+ RubynCode::Debug.recovery("Still truncated after attempt #{attempt + 1}")
671
+ @conversation.add_assistant_message(response_content(response))
672
+ end
673
+
674
+ if truncated?(response)
675
+ RubynCode::Debug.recovery("Tier 3: Exhausted #{max_retries} recovery attempts, returning partial response")
676
+ end
677
+
678
+ response
679
+ end
680
+
443
681
  # ── Response helpers ─────────────────────────────────────────────
444
682
 
445
683
  def extract_tool_calls(response)
446
- get_content(response).select { |block| block_type(block) == "tool_use" }
684
+ get_content(response).select { |block| block_type(block) == 'tool_use' }
447
685
  end
448
686
 
449
687
  def response_content(response)
@@ -452,8 +690,8 @@ module RubynCode
452
690
 
453
691
  def extract_response_text(response)
454
692
  blocks = get_content(response)
455
- blocks.select { |b| block_type(b) == "text" }
456
- .map { |b| b.respond_to?(:text) ? b.text : (b[:text] || b["text"]) }
693
+ blocks.select { |b| block_type(b) == 'text' }
694
+ .map { |b| b.respond_to?(:text) ? b.text : (b[:text] || b['text']) }
457
695
  .compact.join("\n")
458
696
  end
459
697
 
@@ -462,7 +700,7 @@ module RubynCode
462
700
  when ->(r) { r.respond_to?(:content) }
463
701
  Array(response.content)
464
702
  when Hash
465
- Array(response[:content] || response["content"])
703
+ Array(response[:content] || response['content'])
466
704
  else
467
705
  []
468
706
  end
@@ -472,7 +710,7 @@ module RubynCode
472
710
  if block.respond_to?(:type)
473
711
  block.type.to_s
474
712
  elsif block.is_a?(Hash)
475
- (block[:type] || block["type"]).to_s
713
+ (block[:type] || block['type']).to_s
476
714
  end
477
715
  end
478
716
 
@@ -480,21 +718,41 @@ module RubynCode
480
718
  usage = if response.respond_to?(:usage)
481
719
  response.usage
482
720
  elsif response.is_a?(Hash)
483
- response[:usage] || response["usage"]
721
+ response[:usage] || response['usage']
484
722
  end
485
723
  return unless usage
486
- return unless usage
724
+
725
+ input_tokens = usage.respond_to?(:input_tokens) ? usage.input_tokens : usage[:input_tokens]
726
+ output_tokens = usage.respond_to?(:output_tokens) ? usage.output_tokens : usage[:output_tokens]
727
+ cache_create = usage.respond_to?(:cache_creation_input_tokens) ? usage.cache_creation_input_tokens.to_i : 0
728
+ cache_read = usage.respond_to?(:cache_read_input_tokens) ? usage.cache_read_input_tokens.to_i : 0
729
+ cache_info = cache_create.positive? || cache_read.positive? ? " cache_create=#{cache_create} cache_read=#{cache_read}" : ''
730
+ RubynCode::Debug.token("in=#{input_tokens} out=#{output_tokens}#{cache_info}")
487
731
 
488
732
  @context_manager.track_usage(usage)
489
733
  rescue NoMethodError
490
734
  # context_manager does not implement track_usage yet
491
735
  end
492
736
 
737
+ def update_task_budget(response)
738
+ usage = response.respond_to?(:usage) ? response.usage : nil
739
+ return unless usage
740
+
741
+ output = usage.respond_to?(:output_tokens) ? usage.output_tokens.to_i : 0
742
+ input = usage.respond_to?(:input_tokens) ? usage.input_tokens.to_i : 0
743
+
744
+ # Initialize on first response, then decrement
745
+ @task_budget_remaining ||= TASK_BUDGET_TOTAL
746
+ @task_budget_remaining = [@task_budget_remaining - input - output, 0].max
747
+
748
+ RubynCode::Debug.token("task_budget_remaining=#{@task_budget_remaining}/#{TASK_BUDGET_TOTAL}")
749
+ end
750
+
493
751
  def max_iterations_warning
494
752
  warning = "Reached maximum iteration limit (#{MAX_ITERATIONS}). " \
495
- "The conversation may be incomplete. Please review the current state " \
496
- "and continue if needed."
497
- @conversation.add_assistant_message([{ type: "text", text: warning }])
753
+ 'The conversation may be incomplete. Please review the current state ' \
754
+ 'and continue if needed.'
755
+ @conversation.add_assistant_message([{ type: 'text', text: warning }])
498
756
  warning
499
757
  end
500
758
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "digest"
3
+ require 'digest'
4
4
 
5
5
  module RubynCode
6
6
  module Agent
@@ -49,9 +49,9 @@ module RubynCode
49
49
  #
50
50
  # @return [String]
51
51
  def nudge_message
52
- "You appear to be repeating the same tool call without making progress. " \
53
- "Please try a different approach, use a different tool, or ask the user " \
54
- "for clarification. Do not repeat the same action."
52
+ 'You appear to be repeating the same tool call without making progress. ' \
53
+ 'Please try a different approach, use a different tool, or ask the user ' \
54
+ 'for clarification. Do not repeat the same action.'
55
55
  end
56
56
 
57
57
  private
@@ -60,7 +60,7 @@ module RubynCode
60
60
  input_str = case tool_input
61
61
  when Hash then stable_hash(tool_input)
62
62
  when String then tool_input
63
- else ""
63
+ else ''
64
64
  end
65
65
 
66
66
  "#{tool_name}:#{Digest::SHA256.hexdigest(input_str)[0, 16]}"
@@ -71,7 +71,7 @@ module RubynCode
71
71
  def stable_hash(hash)
72
72
  hash.sort_by { |k, _| k.to_s }
73
73
  .map { |k, v| "#{k}=#{v}" }
74
- .join("&")
74
+ .join('&')
75
75
  end
76
76
  end
77
77
  end
@@ -0,0 +1,19 @@
1
+ # Auth Layer
2
+
3
+ OAuth PKCE flow + token storage with fallback chain.
4
+
5
+ ## Classes
6
+
7
+ - **`OAuth`** — Full OAuth PKCE flow. Generates code verifier/challenge, opens browser for
8
+ authorization, exchanges code for tokens. Custom errors: `StateMismatchError`,
9
+ `TokenExchangeError`, `RefreshError`.
10
+
11
+ - **`Server`** — Local WEBrick server on `127.0.0.1:19275` to receive the OAuth callback.
12
+ Uses mutex + condition variable to block until the redirect arrives. Times out after 120s.
13
+
14
+ - **`TokenStore`** — Token persistence with a three-level fallback chain:
15
+ 1. macOS Keychain (reads Claude Code's OAuth token from `Claude Code-credentials`)
16
+ 2. Local YAML file (`~/.rubyn-code/tokens.yml`)
17
+ 3. `ANTHROPIC_API_KEY` environment variable
18
+
19
+ Handles token refresh with a 5-minute expiry buffer.