RubyGems - rubyn-code - Versions diffs - 0.1.0 → 0.2.0 - Mend

rubyn-code 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (159) hide show

checksums.yaml +4 -4
data/README.md +269 -467
data/db/migrations/009_create_teams.sql +6 -6
data/db/migrations/011_fix_mailbox_messages_columns.rb +35 -0
data/db/migrations/012_expand_mailbox_message_types.rb +37 -0
data/exe/rubyn-code +1 -1
data/lib/rubyn_code/agent/RUBYN.md +17 -0
data/lib/rubyn_code/agent/conversation.rb +68 -19
data/lib/rubyn_code/agent/loop.rb +312 -54
data/lib/rubyn_code/agent/loop_detector.rb +6 -6
data/lib/rubyn_code/auth/RUBYN.md +19 -0
data/lib/rubyn_code/auth/oauth.rb +40 -35
data/lib/rubyn_code/auth/server.rb +16 -12
data/lib/rubyn_code/auth/token_store.rb +22 -22
data/lib/rubyn_code/autonomous/RUBYN.md +14 -0
data/lib/rubyn_code/autonomous/daemon.rb +115 -79
data/lib/rubyn_code/autonomous/idle_poller.rb +4 -8
data/lib/rubyn_code/autonomous/task_claimer.rb +11 -11
data/lib/rubyn_code/background/RUBYN.md +13 -0
data/lib/rubyn_code/background/notifier.rb +0 -2
data/lib/rubyn_code/background/worker.rb +60 -15
data/lib/rubyn_code/cli/RUBYN.md +30 -0
data/lib/rubyn_code/cli/app.rb +85 -9
data/lib/rubyn_code/cli/commands/RUBYN.md +133 -0
data/lib/rubyn_code/cli/commands/base.rb +53 -0
data/lib/rubyn_code/cli/commands/budget.rb +24 -0
data/lib/rubyn_code/cli/commands/clear.rb +16 -0
data/lib/rubyn_code/cli/commands/compact.rb +21 -0
data/lib/rubyn_code/cli/commands/context.rb +44 -0
data/lib/rubyn_code/cli/commands/context_info.rb +56 -0
data/lib/rubyn_code/cli/commands/cost.rb +23 -0
data/lib/rubyn_code/cli/commands/diff.rb +30 -0
data/lib/rubyn_code/cli/commands/doctor.rb +112 -0
data/lib/rubyn_code/cli/commands/help.rb +41 -0
data/lib/rubyn_code/cli/commands/model.rb +37 -0
data/lib/rubyn_code/cli/commands/plan.rb +22 -0
data/lib/rubyn_code/cli/commands/quit.rb +17 -0
data/lib/rubyn_code/cli/commands/registry.rb +64 -0
data/lib/rubyn_code/cli/commands/resume.rb +51 -0
data/lib/rubyn_code/cli/commands/review.rb +26 -0
data/lib/rubyn_code/cli/commands/skill.rb +32 -0
data/lib/rubyn_code/cli/commands/spawn.rb +24 -0
data/lib/rubyn_code/cli/commands/tasks.rb +32 -0
data/lib/rubyn_code/cli/commands/tokens.rb +76 -0
data/lib/rubyn_code/cli/commands/undo.rb +17 -0
data/lib/rubyn_code/cli/commands/version.rb +16 -0
data/lib/rubyn_code/cli/daemon_runner.rb +129 -0
data/lib/rubyn_code/cli/input_handler.rb +20 -23
data/lib/rubyn_code/cli/renderer.rb +25 -27
data/lib/rubyn_code/cli/repl.rb +161 -194
data/lib/rubyn_code/cli/setup.rb +117 -0
data/lib/rubyn_code/cli/spinner.rb +40 -40
data/lib/rubyn_code/cli/stream_formatter.rb +29 -28
data/lib/rubyn_code/cli/version_check.rb +94 -0
data/lib/rubyn_code/config/RUBYN.md +14 -0
data/lib/rubyn_code/config/defaults.rb +28 -19
data/lib/rubyn_code/config/project_config.rb +7 -9
data/lib/rubyn_code/config/settings.rb +3 -3
data/lib/rubyn_code/context/RUBYN.md +20 -0
data/lib/rubyn_code/context/auto_compact.rb +7 -7
data/lib/rubyn_code/context/compactor.rb +2 -2
data/lib/rubyn_code/context/context_collapse.rb +45 -0
data/lib/rubyn_code/context/manager.rb +20 -3
data/lib/rubyn_code/context/manual_compact.rb +7 -7
data/lib/rubyn_code/context/micro_compact.rb +12 -12
data/lib/rubyn_code/db/RUBYN.md +40 -0
data/lib/rubyn_code/db/connection.rb +13 -13
data/lib/rubyn_code/db/migrator.rb +67 -27
data/lib/rubyn_code/db/schema.rb +6 -6
data/lib/rubyn_code/debug.rb +74 -0
data/lib/rubyn_code/hooks/RUBYN.md +17 -0
data/lib/rubyn_code/hooks/built_in.rb +9 -9
data/lib/rubyn_code/hooks/registry.rb +5 -5
data/lib/rubyn_code/hooks/runner.rb +1 -1
data/lib/rubyn_code/hooks/user_hooks.rb +16 -16
data/lib/rubyn_code/learning/RUBYN.md +16 -0
data/lib/rubyn_code/learning/extractor.rb +22 -22
data/lib/rubyn_code/learning/injector.rb +17 -18
data/lib/rubyn_code/learning/instinct.rb +18 -14
data/lib/rubyn_code/llm/RUBYN.md +15 -0
data/lib/rubyn_code/llm/client.rb +121 -55
data/lib/rubyn_code/llm/message_builder.rb +19 -15
data/lib/rubyn_code/llm/streaming.rb +80 -50
data/lib/rubyn_code/mcp/RUBYN.md +21 -0
data/lib/rubyn_code/mcp/client.rb +25 -24
data/lib/rubyn_code/mcp/config.rb +7 -7
data/lib/rubyn_code/mcp/sse_transport.rb +27 -26
data/lib/rubyn_code/mcp/stdio_transport.rb +22 -19
data/lib/rubyn_code/mcp/tool_bridge.rb +32 -32
data/lib/rubyn_code/memory/RUBYN.md +17 -0
data/lib/rubyn_code/memory/models.rb +3 -3
data/lib/rubyn_code/memory/search.rb +17 -17
data/lib/rubyn_code/memory/session_persistence.rb +49 -34
data/lib/rubyn_code/memory/store.rb +17 -17
data/lib/rubyn_code/observability/RUBYN.md +19 -0
data/lib/rubyn_code/observability/budget_enforcer.rb +16 -15
data/lib/rubyn_code/observability/cost_calculator.rb +3 -3
data/lib/rubyn_code/observability/token_counter.rb +1 -1
data/lib/rubyn_code/observability/usage_reporter.rb +35 -35
data/lib/rubyn_code/output/RUBYN.md +11 -0
data/lib/rubyn_code/output/diff_renderer.rb +6 -6
data/lib/rubyn_code/output/formatter.rb +4 -4
data/lib/rubyn_code/permissions/RUBYN.md +17 -0
data/lib/rubyn_code/permissions/prompter.rb +8 -8
data/lib/rubyn_code/protocols/RUBYN.md +14 -0
data/lib/rubyn_code/protocols/interrupt_handler.rb +1 -1
data/lib/rubyn_code/protocols/plan_approval.rb +9 -9
data/lib/rubyn_code/protocols/shutdown_handshake.rb +9 -11
data/lib/rubyn_code/skills/RUBYN.md +19 -0
data/lib/rubyn_code/skills/catalog.rb +7 -7
data/lib/rubyn_code/skills/document.rb +15 -15
data/lib/rubyn_code/skills/loader.rb +6 -8
data/lib/rubyn_code/sub_agents/RUBYN.md +12 -0
data/lib/rubyn_code/sub_agents/runner.rb +15 -15
data/lib/rubyn_code/sub_agents/summarizer.rb +1 -1
data/lib/rubyn_code/tasks/RUBYN.md +13 -0
data/lib/rubyn_code/tasks/dag.rb +12 -16
data/lib/rubyn_code/tasks/manager.rb +24 -24
data/lib/rubyn_code/tasks/models.rb +4 -4
data/lib/rubyn_code/teams/RUBYN.md +14 -0
data/lib/rubyn_code/teams/mailbox.rb +38 -18
data/lib/rubyn_code/teams/manager.rb +19 -19
data/lib/rubyn_code/teams/teammate.rb +3 -4
data/lib/rubyn_code/tools/RUBYN.md +38 -0
data/lib/rubyn_code/tools/background_run.rb +9 -11
data/lib/rubyn_code/tools/base.rb +54 -3
data/lib/rubyn_code/tools/bash.rb +16 -34
data/lib/rubyn_code/tools/bundle_add.rb +10 -12
data/lib/rubyn_code/tools/bundle_install.rb +9 -11
data/lib/rubyn_code/tools/compact.rb +10 -9
data/lib/rubyn_code/tools/db_migrate.rb +17 -15
data/lib/rubyn_code/tools/edit_file.rb +12 -12
data/lib/rubyn_code/tools/executor.rb +9 -4
data/lib/rubyn_code/tools/git_commit.rb +29 -34
data/lib/rubyn_code/tools/git_diff.rb +17 -18
data/lib/rubyn_code/tools/git_log.rb +17 -19
data/lib/rubyn_code/tools/git_status.rb +18 -20
data/lib/rubyn_code/tools/glob.rb +7 -9
data/lib/rubyn_code/tools/grep.rb +11 -9
data/lib/rubyn_code/tools/load_skill.rb +7 -7
data/lib/rubyn_code/tools/memory_search.rb +13 -12
data/lib/rubyn_code/tools/memory_write.rb +14 -12
data/lib/rubyn_code/tools/rails_generate.rb +16 -16
data/lib/rubyn_code/tools/read_file.rb +8 -7
data/lib/rubyn_code/tools/read_inbox.rb +5 -5
data/lib/rubyn_code/tools/registry.rb +2 -2
data/lib/rubyn_code/tools/review_pr.rb +55 -55
data/lib/rubyn_code/tools/run_specs.rb +20 -19
data/lib/rubyn_code/tools/schema.rb +9 -11
data/lib/rubyn_code/tools/send_message.rb +10 -10
data/lib/rubyn_code/tools/spawn_agent.rb +51 -23
data/lib/rubyn_code/tools/spawn_teammate.rb +21 -21
data/lib/rubyn_code/tools/task.rb +28 -28
data/lib/rubyn_code/tools/web_fetch.rb +46 -31
data/lib/rubyn_code/tools/web_search.rb +64 -66
data/lib/rubyn_code/tools/write_file.rb +7 -6
data/lib/rubyn_code/version.rb +1 -1
data/lib/rubyn_code.rb +136 -105
metadata +94 -21

data/lib/rubyn_code/agent/loop.rb CHANGED Viewed

@@ -47,8 +47,12 @@ module RubynCode
         @on_text            = on_text
         @skill_loader       = skill_loader
         @project_root       = project_root
+        @plan_mode          = false
       end
+      # @return [Boolean]
+      attr_accessor :plan_mode
       # Send a user message and run the agent loop until a final text response
       # is produced or the iteration limit is reached.
       #
@@ -56,23 +60,59 @@ module RubynCode
       # @return [String] the final assistant text response
       def send_message(user_input)
         check_user_feedback(user_input)
+        # Drain any completed background jobs BEFORE adding the user message,
+        # so the LLM sees the results in the right order
+        drain_background_notifications
         @conversation.add_user_message(user_input)
+        @max_tokens_override = nil
+        @output_recovery_count = 0
+        @task_budget_remaining = nil
         MAX_ITERATIONS.times do |iteration|
+          RubynCode::Debug.loop_tick("iteration=#{iteration} messages=#{@conversation.length} max_tokens_override=#{@max_tokens_override || 'default'}")
           response = call_llm
           tool_calls = extract_tool_calls(response)
+          stop_reason = response.respond_to?(:stop_reason) ? response.stop_reason : nil
+          RubynCode::Debug.llm("stop_reason=#{stop_reason} tool_calls=#{tool_calls.size} content_blocks=#{get_content(response).size}")
           if tool_calls.empty?
+            if truncated?(response)
+              RubynCode::Debug.recovery('Text response truncated, entering recovery')
+              response = recover_truncated_response(response)
+            end
+            # If background jobs are running, wait for them instead of burning LLM calls
+            if has_pending_background_jobs?
+              @conversation.add_assistant_message(response_content(response))
+              wait_for_background_jobs
+              next
+            end
             @conversation.add_assistant_message(response_content(response))
             return extract_response_text(response)
           end
+          # Tier 1: If a tool-use response was truncated, silently escalate and retry
+          if truncated?(response) && !@max_tokens_override
+            RubynCode::Debug.recovery("Tier 1: Escalating max_tokens from #{Config::Defaults::CAPPED_MAX_OUTPUT_TOKENS} to #{Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS}")
+            @max_tokens_override = Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS
+            next
+          end
           @conversation.add_assistant_message(get_content(response))
           process_tool_calls(tool_calls)
+          # Drain notifications after tool execution — jobs may have finished
+          drain_background_notifications
           run_maintenance(iteration)
         end
+        RubynCode::Debug.warn("Hit MAX_ITERATIONS (#{MAX_ITERATIONS})")
         max_iterations_warning
       end
@@ -80,25 +120,42 @@ module RubynCode
       # ── LLM interaction ──────────────────────────────────────────────
+      TASK_BUDGET_TOTAL = 100_000 # tokens per user message
       def call_llm
         @hook_runner.fire(:pre_llm_call, conversation: @conversation)
-        drain_background_notifications
-        response = @llm_client.chat(
+        opts = {
           messages: @conversation.to_api_format,
-          tools: tool_definitions,
+          tools: @plan_mode ? read_only_tool_definitions : tool_definitions,
           system: build_system_prompt,
           on_text: @on_text
-        )
+        }
+        opts[:max_tokens] = @max_tokens_override if @max_tokens_override
+        # Task budget: tell the model how many tokens remain for this task
+        opts[:task_budget] = { total: TASK_BUDGET_TOTAL, remaining: @task_budget_remaining } if @task_budget_remaining
+        response = @llm_client.chat(**opts)
+        @hook_runner.fire(:post_llm_call, response: response, conversation: @conversation)
+        track_usage(response)
+        update_task_budget(response)
+        response
+      rescue LLM::Client::PromptTooLongError
+        # 413: context too large — compact and retry once
+        RubynCode::Debug.recovery('413 prompt too long — running emergency compaction')
+        @context_manager.check_compaction!(@conversation)
+        response = @llm_client.chat(**opts, messages: @conversation.to_api_format)
         @hook_runner.fire(:post_llm_call, response: response, conversation: @conversation)
         track_usage(response)
         response
       end
-      SYSTEM_PROMPT = <<~PROMPT.freeze
+      SYSTEM_PROMPT = <<~PROMPT
         You are Rubyn — a snarky but lovable AI coding assistant who lives and breathes Ruby.
         You're the kind of pair programmer who'll roast your colleague's `if/elsif/elsif/else` chain
         with a smirk, then immediately rewrite it as a beautiful `case/in` with pattern matching.
@@ -158,8 +215,11 @@ module RubynCode
         - Run specs after changes. If they break, fix them.
         - When you are asked to work in a NEW directory you haven't seen yet, check for RUBYN.md, CLAUDE.md, or AGENT.md there. But don't do this unprompted on startup — those files are already loaded into your context.
         - Load skills when you need deep knowledge on a topic. Don't wing it.
+        - You have 112 curated best-practice skill documents covering Ruby, Rails, RSpec, design patterns, and code quality. When writing new code or reviewing existing code, load the relevant skill BEFORE implementing. Don't reinvent patterns that are already documented.
+        - HOWEVER: always respect patterns already established in the codebase. If the project uses a specific convention (e.g. service objects, a particular test style, a custom base class), follow that convention even if it differs from the skill doc. Consistency with the codebase beats textbook best practice. Only break from established patterns if they are genuinely harmful (security issues, major performance problems, or bugs).
         - Keep responses concise. Code speaks louder than paragraphs.
         - Use spawn_agent sparingly — only for tasks that require reading many files (10+) or deep exploration. For simple reads or edits, use tools directly. Don't spawn a sub-agent when a single read_file or grep will do.
+        - IMPORTANT: You can call MULTIPLE tools in a single response. When you need to read several files, search multiple patterns, or perform independent operations, return all tool_use blocks at once rather than one at a time. This is dramatically faster and cheaper. For example, if you need to read 5 files, emit 5 read_file tool calls in one response — don't read them one by one across 5 turns.
         ## Memory
         You have persistent memory across sessions via `memory_write` and `memory_search` tools.
@@ -173,9 +233,29 @@ module RubynCode
         Categories: user_preference, project_convention, error_resolution, decision, code_pattern
       PROMPT
+      PLAN_MODE_PROMPT = <<~PLAN
+        ## 🧠 Plan Mode Active
+        You are in PLAN MODE. This means:
+        - Reason through the problem step by step
+        - You have READ-ONLY tools available — use them to explore the codebase
+        - Read files, grep, glob, check git status/log/diff — gather context
+        - Do NOT write, edit, execute, or modify anything
+        - Outline your plan with numbered steps
+        - Identify files you'd need to read or modify
+        - Call out risks, edge cases, and trade-offs
+        - Ask clarifying questions if the request is ambiguous
+        - When the user is satisfied with the plan, they'll toggle plan mode off with /plan
+        You CAN use read-only tools. You MUST NOT use any tool that writes, edits, or executes.
+      PLAN
+      PLAN_MODE_RISK_LEVELS = %i[read].freeze
       def build_system_prompt
         parts = [SYSTEM_PROMPT]
+        parts << PLAN_MODE_PROMPT if @plan_mode
         parts << "Working directory: #{@project_root}" if @project_root
         # Inject memories from previous sessions
@@ -199,47 +279,61 @@ module RubynCode
           end
         end
+        # List deferred tools so the LLM knows they exist
+        deferred = deferred_tool_names
+        unless deferred.empty?
+          parts << "\n## Additional Tools Available"
+          parts << 'These tools are available but not loaded yet. Just call them by name and they will work:'
+          parts << deferred.map { |n| "- #{n}" }.join("\n")
+        end
         parts.join("\n")
       end
+      def deferred_tool_names
+        all_names = @tool_executor.tool_definitions.map { |t| t[:name] || t['name'] }
+        active_names = tool_definitions.map { |t| t[:name] || t['name'] }
+        all_names - active_names
+      end
       def load_memories
-        return "" unless @project_root
+        return '' unless @project_root
         db = DB::Connection.instance
         search = Memory::Search.new(db, project_path: @project_root)
         recent = search.recent(limit: 20)
-        return "" if recent.empty?
+        return '' if recent.empty?
-        recent.map { |m|
-          category = m.respond_to?(:category) ? m.category : (m[:category] || m["category"])
-          content = m.respond_to?(:content) ? m.content : (m[:content] || m["content"])
+        recent.map do |m|
+          category = m.respond_to?(:category) ? m.category : (m[:category] || m['category'])
+          content = m.respond_to?(:content) ? m.content : (m[:content] || m['content'])
           "[#{category}] #{content}"
-        }.join("\n")
+        end.join("\n")
       rescue StandardError
-        ""
+        ''
       end
       def load_instincts
-        return "" unless @project_root
+        return '' unless @project_root
         db = DB::Connection.instance
         Learning::Injector.call(db: db, project_path: @project_root)
       rescue StandardError
-        ""
+        ''
       end
       # ── Instinct reinforcement ───────────────────────────────────
-      POSITIVE_PATTERNS = /\b(yes that fixed it|that worked|perfect|thanks|exactly|great|nailed it|that.s right|correct)\b/i.freeze
-      NEGATIVE_PATTERNS = /\b(no[, ]+use|wrong|that.s not right|instead use|don.t do that|actually[, ]+use|incorrect)\b/i.freeze
+      POSITIVE_PATTERNS = /\b(yes that fixed it|that worked|perfect|thanks|exactly|great|nailed it|that.s right|correct)\b/i
+      NEGATIVE_PATTERNS = /\b(no[, ]+use|wrong|that.s not right|instead use|don.t do that|actually[, ]+use|incorrect)\b/i
       def check_user_feedback(user_input)
         return unless @project_root
         db = DB::Connection.instance
         recent_instincts = db.query(
-          "SELECT id FROM instincts WHERE project_path = ? ORDER BY updated_at DESC LIMIT 5",
+          'SELECT id FROM instincts WHERE project_path = ? ORDER BY updated_at DESC LIMIT 5',
           [@project_root]
         ).to_a
@@ -247,11 +341,11 @@ module RubynCode
         if user_input.match?(POSITIVE_PATTERNS)
           recent_instincts.first(2).each do |row|
-            Learning::InstinctMethods.reinforce_in_db(row["id"], db, helpful: true)
+            Learning::InstinctMethods.reinforce_in_db(row['id'], db, helpful: true)
           end
         elsif user_input.match?(NEGATIVE_PATTERNS)
           recent_instincts.first(2).each do |row|
-            Learning::InstinctMethods.reinforce_in_db(row["id"], db, helpful: false)
+            Learning::InstinctMethods.reinforce_in_db(row['id'], db, helpful: false)
           end
         end
       rescue StandardError
@@ -274,31 +368,32 @@ module RubynCode
           INSTRUCTION_FILES.each do |name|
             collect_instruction(File.join(@project_root, name), found)
           end
-          collect_instruction(File.join(@project_root, ".rubyn-code", "RUBYN.md"), found)
+          collect_instruction(File.join(@project_root, '.rubyn-code', 'RUBYN.md'), found)
           # One level of child directories
           INSTRUCTION_FILES.each do |name|
-            Dir.glob(File.join(@project_root, "*", name)).each do |path|
+            Dir.glob(File.join(@project_root, '*', name)).each do |path|
               collect_instruction(path, found)
             end
           end
         end
         # User global
-        collect_instruction(File.join(Config::Defaults::HOME_DIR, "RUBYN.md"), found)
+        collect_instruction(File.join(Config::Defaults::HOME_DIR, 'RUBYN.md'), found)
         found.uniq.join("\n\n")
       end
       def walk_up_for_instructions(start_dir, found)
         dir = File.dirname(start_dir)
-        home = File.expand_path("~")
+        home = File.expand_path('~')
         while dir.length >= home.length
           INSTRUCTION_FILES.each do |name|
             collect_instruction(File.join(dir, name), found)
           end
           break if dir == home
           dir = File.dirname(dir)
         end
       end
@@ -306,21 +401,69 @@ module RubynCode
       def collect_instruction(path, found)
         return unless File.exist?(path) && File.file?(path)
-        content = File.read(path, encoding: "utf-8")
-                      .encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
+        content = File.read(path, encoding: 'utf-8')
+                      .encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
                       .strip
         return if content.empty?
         found << "# From #{path}\n#{content}"
       end
+      # Core tools always included. Others load on first use.
+      CORE_TOOLS = %w[
+        read_file write_file edit_file glob grep bash
+        spawn_agent background_run
+      ].freeze
       def tool_definitions
-        @tool_executor.tool_definitions
+        all_tools = @tool_executor.tool_definitions
+        return all_tools if all_tools.size <= CORE_TOOLS.size
+        @discovered_tools ||= Set.new
+        all_tools.select do |t|
+          name = t[:name] || t['name']
+          CORE_TOOLS.include?(name) || @discovered_tools.include?(name)
+        end
+      end
+      def discover_tool(name)
+        @discovered_tools ||= Set.new
+        @discovered_tools.add(name)
+      end
+      def read_only_tool_definitions
+        Tools::Registry.all
+                       .select { |t| PLAN_MODE_RISK_LEVELS.include?(t::RISK_LEVEL) }
+                       .map(&:to_schema)
+      end
+      # ── Background job waiting ────────────────────────────────────────
+      def wait_for_background_jobs
+        max_wait = 300 # 5 minutes max
+        poll_interval = 3
+        RubynCode::Debug.agent("Waiting for background jobs to finish (polling every #{poll_interval}s, max #{max_wait}s)")
+        elapsed = 0
+        while elapsed < max_wait && has_pending_background_jobs?
+          sleep poll_interval
+          elapsed += poll_interval
+          drain_background_notifications
+        end
+        # Final drain to pick up any last results
+        drain_background_notifications
+        RubynCode::Debug.agent("Background wait done (#{elapsed}s)")
       end
       # ── Tool processing ──────────────────────────────────────────────
       def process_tool_calls(tool_calls)
+        aggregate_chars = 0
+        budget = Config::Defaults::MAX_MESSAGE_TOOL_RESULTS_CHARS
         tool_calls.each do |tool_call|
           tool_name  = field(tool_call, :name)
           tool_input = field(tool_call, :input) || {}
@@ -333,18 +476,37 @@ module RubynCode
             deny_list: @deny_list
           )
-          @on_tool_call&.call(tool_name, tool_input)
+          begin
+            @on_tool_call&.call(tool_name, tool_input)
+          rescue StandardError
+            nil
+          end
           result, is_error = execute_with_permission(decision, tool_name, tool_input, tool_id)
-          @on_tool_result&.call(tool_name, result, is_error)
+          # Enforce per-message aggregate tool result budget
+          aggregate_chars += result.to_s.length
+          if aggregate_chars > budget
+            remaining = [budget - (aggregate_chars - result.to_s.length), 500].max
+            result = "#{result.to_s[0,
+                                    remaining]}\n\n[truncated — tool result budget exceeded (#{budget} chars/message)]"
+            RubynCode::Debug.token("Tool result budget exceeded: #{aggregate_chars}/#{budget} chars")
+          end
+          begin
+            @on_tool_result&.call(tool_name, result, is_error)
+          rescue StandardError
+            nil
+          end
           @stall_detector.record(tool_name, tool_input)
+          # CRITICAL: always add tool_result to conversation — without this the
+          # API will reject the next request with "tool_use without tool_result"
           @conversation.add_tool_result(tool_id, tool_name, result, is_error: is_error)
         end
       end
-      def execute_with_permission(decision, tool_name, tool_input, tool_id)
+      def execute_with_permission(decision, tool_name, tool_input, _tool_id)
         case decision
         when :deny
           ["Tool '#{tool_name}' is blocked by the deny list.", true]
@@ -362,9 +524,12 @@ module RubynCode
       end
       def execute_tool(tool_name, tool_input)
+        # Auto-discover tools on first use so they appear in future calls
+        discover_tool(tool_name)
         @hook_runner.fire(:pre_tool_use, tool_name: tool_name, tool_input: tool_input)
-        result = @tool_executor.execute(tool_name, **symbolize_keys(tool_input))
+        result = @tool_executor.execute(tool_name, symbolize_keys(tool_input))
         @hook_runner.fire(:post_tool_use, tool_name: tool_name, tool_input: tool_input, result: result)
         [result.to_s, false]
@@ -391,23 +556,26 @@ module RubynCode
       # ── Maintenance ──────────────────────────────────────────────────
-      def run_maintenance(iteration)
-        run_micro_compact
-        check_auto_compact
+      def run_maintenance(_iteration)
+        run_compaction
         check_budget
         check_stall_detection
       end
-      def run_micro_compact
-        @context_manager.micro_compact(@conversation)
-      rescue NoMethodError
-        # micro_compact not yet implemented on context_manager
-      end
+      def run_compaction
+        before = @conversation.length
+        est = @context_manager.estimated_tokens(@conversation.messages)
+        RubynCode::Debug.token("context=#{est} tokens (~#{before} messages, threshold=#{Config::Defaults::CONTEXT_THRESHOLD_TOKENS})")
+        @context_manager.check_compaction!(@conversation)
-      def check_auto_compact
-        @context_manager.auto_compact(@conversation)
+        after = @conversation.length
+        if after < before
+          new_est = @context_manager.estimated_tokens(@conversation.messages)
+          RubynCode::Debug.loop_tick("Compacted: #{before} -> #{after} messages (#{est} -> #{new_est} tokens)")
+        end
       rescue NoMethodError
-        # auto_compact not yet implemented on context_manager
+        # context_manager does not implement check_compaction! yet
       end
       def check_budget
@@ -434,16 +602,86 @@ module RubynCode
         notifications = @background_manager.drain_notifications
         return if notifications.nil? || notifications.empty?
-        summary = notifications.map(&:to_s).join("\n")
-        @conversation.add_user_message("[Background notifications]\n#{summary}")
+        summary = notifications.map { |n| format_background_notification(n) }.join("\n\n")
+        @conversation.add_user_message("[Background job results]\n#{summary}")
       rescue NoMethodError
         # background_manager does not support drain_notifications yet
       end
+      def has_pending_background_jobs?
+        return false unless @background_manager
+        @background_manager.active_count.positive?
+      rescue NoMethodError
+        false
+      end
+      def format_background_notification(notification)
+        case notification
+        when Hash
+          status = notification[:status] || 'unknown'
+          job_id = notification[:job_id]&.[](0..7) || 'unknown'
+          duration = notification[:duration] ? "#{'%.1f' % notification[:duration]}s" : 'unknown'
+          result = notification[:result] || '(no output)'
+          "Job #{job_id} [#{status}] (#{duration}):\n#{result}"
+        else
+          notification.to_s
+        end
+      end
+      # ── Output token recovery (3-tier, matches Claude Code) ──────────
+      #
+      # Tier 1: Silent escalation (8K → 32K) — handled in send_message
+      # Tier 2: Multi-turn recovery — inject continuation message, retry up to 3x
+      # Tier 3: Surface what we have — return partial response after exhausting retries
+      def truncated?(response)
+        reason = if response.respond_to?(:stop_reason)
+                   response.stop_reason
+                 elsif response.is_a?(Hash)
+                   response[:stop_reason] || response['stop_reason']
+                 end
+        reason == 'max_tokens'
+      end
+      def recover_truncated_response(response)
+        @max_tokens_override ||= Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS
+        @conversation.add_assistant_message(response_content(response))
+        max_retries = Config::Defaults::MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
+        max_retries.times do |attempt|
+          @output_recovery_count += 1
+          RubynCode::Debug.recovery("Tier 2: Recovery attempt #{attempt + 1}/#{max_retries}")
+          @conversation.add_user_message(
+            'Output token limit hit. Resume directly — no apology, no recap, ' \
+            'just continue exactly where you left off.'
+          )
+          response = call_llm
+          unless truncated?(response)
+            RubynCode::Debug.recovery("Recovery successful on attempt #{attempt + 1}")
+            break
+          end
+          RubynCode::Debug.recovery("Still truncated after attempt #{attempt + 1}")
+          @conversation.add_assistant_message(response_content(response))
+        end
+        if truncated?(response)
+          RubynCode::Debug.recovery("Tier 3: Exhausted #{max_retries} recovery attempts, returning partial response")
+        end
+        response
+      end
       # ── Response helpers ─────────────────────────────────────────────
       def extract_tool_calls(response)
-        get_content(response).select { |block| block_type(block) == "tool_use" }
+        get_content(response).select { |block| block_type(block) == 'tool_use' }
       end
       def response_content(response)
@@ -452,8 +690,8 @@ module RubynCode
       def extract_response_text(response)
         blocks = get_content(response)
-        blocks.select { |b| block_type(b) == "text" }
-              .map { |b| b.respond_to?(:text) ? b.text : (b[:text] || b["text"]) }
+        blocks.select { |b| block_type(b) == 'text' }
+              .map { |b| b.respond_to?(:text) ? b.text : (b[:text] || b['text']) }
               .compact.join("\n")
       end
@@ -462,7 +700,7 @@ module RubynCode
         when ->(r) { r.respond_to?(:content) }
           Array(response.content)
         when Hash
-          Array(response[:content] || response["content"])
+          Array(response[:content] || response['content'])
         else
           []
         end
@@ -472,7 +710,7 @@ module RubynCode
         if block.respond_to?(:type)
           block.type.to_s
         elsif block.is_a?(Hash)
-          (block[:type] || block["type"]).to_s
+          (block[:type] || block['type']).to_s
         end
       end
@@ -480,21 +718,41 @@ module RubynCode
         usage = if response.respond_to?(:usage)
                   response.usage
                 elsif response.is_a?(Hash)
-                  response[:usage] || response["usage"]
+                  response[:usage] || response['usage']
                 end
         return unless usage
-        return unless usage
+        input_tokens = usage.respond_to?(:input_tokens) ? usage.input_tokens : usage[:input_tokens]
+        output_tokens = usage.respond_to?(:output_tokens) ? usage.output_tokens : usage[:output_tokens]
+        cache_create = usage.respond_to?(:cache_creation_input_tokens) ? usage.cache_creation_input_tokens.to_i : 0
+        cache_read = usage.respond_to?(:cache_read_input_tokens) ? usage.cache_read_input_tokens.to_i : 0
+        cache_info = cache_create.positive? || cache_read.positive? ? " cache_create=#{cache_create} cache_read=#{cache_read}" : ''
+        RubynCode::Debug.token("in=#{input_tokens} out=#{output_tokens}#{cache_info}")
         @context_manager.track_usage(usage)
       rescue NoMethodError
         # context_manager does not implement track_usage yet
       end
+      def update_task_budget(response)
+        usage = response.respond_to?(:usage) ? response.usage : nil
+        return unless usage
+        output = usage.respond_to?(:output_tokens) ? usage.output_tokens.to_i : 0
+        input = usage.respond_to?(:input_tokens) ? usage.input_tokens.to_i : 0
+        # Initialize on first response, then decrement
+        @task_budget_remaining ||= TASK_BUDGET_TOTAL
+        @task_budget_remaining = [@task_budget_remaining - input - output, 0].max
+        RubynCode::Debug.token("task_budget_remaining=#{@task_budget_remaining}/#{TASK_BUDGET_TOTAL}")
+      end
       def max_iterations_warning
         warning = "Reached maximum iteration limit (#{MAX_ITERATIONS}). " \
-                  "The conversation may be incomplete. Please review the current state " \
-                  "and continue if needed."
-        @conversation.add_assistant_message([{ type: "text", text: warning }])
+                  'The conversation may be incomplete. Please review the current state ' \
+                  'and continue if needed.'
+        @conversation.add_assistant_message([{ type: 'text', text: warning }])
         warning
       end

data/lib/rubyn_code/agent/loop_detector.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-require "digest"
+require 'digest'
 module RubynCode
   module Agent
@@ -49,9 +49,9 @@ module RubynCode
       #
       # @return [String]
       def nudge_message
-        "You appear to be repeating the same tool call without making progress. " \
-          "Please try a different approach, use a different tool, or ask the user " \
-          "for clarification. Do not repeat the same action."
+        'You appear to be repeating the same tool call without making progress. ' \
+          'Please try a different approach, use a different tool, or ask the user ' \
+          'for clarification. Do not repeat the same action.'
       end
       private
@@ -60,7 +60,7 @@ module RubynCode
         input_str = case tool_input
                     when Hash   then stable_hash(tool_input)
                     when String then tool_input
-                    else ""
+                    else ''
                     end
         "#{tool_name}:#{Digest::SHA256.hexdigest(input_str)[0, 16]}"
@@ -71,7 +71,7 @@ module RubynCode
       def stable_hash(hash)
         hash.sort_by { |k, _| k.to_s }
             .map { |k, v| "#{k}=#{v}" }
-            .join("&")
+            .join('&')
       end
     end
   end

data/lib/rubyn_code/auth/RUBYN.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Auth Layer
+OAuth PKCE flow + token storage with fallback chain.
+## Classes
+- **`OAuth`** — Full OAuth PKCE flow. Generates code verifier/challenge, opens browser for
+  authorization, exchanges code for tokens. Custom errors: `StateMismatchError`,
+  `TokenExchangeError`, `RefreshError`.
+- **`Server`** — Local WEBrick server on `127.0.0.1:19275` to receive the OAuth callback.
+  Uses mutex + condition variable to block until the redirect arrives. Times out after 120s.
+- **`TokenStore`** — Token persistence with a three-level fallback chain:
+  1. macOS Keychain (reads Claude Code's OAuth token from `Claude Code-credentials`)
+  2. Local YAML file (`~/.rubyn-code/tokens.yml`)
+  3. `ANTHROPIC_API_KEY` environment variable
+  Handles token refresh with a 5-minute expiry buffer.