RubyGems - rubyn-code - Versions diffs - 0.2.2 → 0.3.0 - Mend

rubyn-code 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

checksums.yaml +4 -4
data/README.md +91 -3
data/lib/rubyn_code/agent/background_job_handler.rb +71 -0
data/lib/rubyn_code/agent/conversation.rb +55 -56
data/lib/rubyn_code/agent/dynamic_tool_schema.rb +99 -0
data/lib/rubyn_code/agent/feedback_handler.rb +49 -0
data/lib/rubyn_code/agent/llm_caller.rb +149 -0
data/lib/rubyn_code/agent/loop.rb +175 -683
data/lib/rubyn_code/agent/loop_detector.rb +50 -11
data/lib/rubyn_code/agent/prompts.rb +109 -0
data/lib/rubyn_code/agent/response_modes.rb +111 -0
data/lib/rubyn_code/agent/response_parser.rb +111 -0
data/lib/rubyn_code/agent/system_prompt_builder.rb +205 -0
data/lib/rubyn_code/agent/tool_processor.rb +158 -0
data/lib/rubyn_code/agent/usage_tracker.rb +59 -0
data/lib/rubyn_code/auth/oauth.rb +80 -64
data/lib/rubyn_code/auth/server.rb +21 -24
data/lib/rubyn_code/auth/token_store.rb +31 -44
data/lib/rubyn_code/autonomous/daemon.rb +29 -18
data/lib/rubyn_code/autonomous/idle_poller.rb +4 -4
data/lib/rubyn_code/autonomous/task_claimer.rb +36 -40
data/lib/rubyn_code/background/worker.rb +64 -76
data/lib/rubyn_code/cli/app.rb +128 -114
data/lib/rubyn_code/cli/commands/model.rb +75 -18
data/lib/rubyn_code/cli/commands/new_session.rb +45 -0
data/lib/rubyn_code/cli/daemon_runner.rb +28 -11
data/lib/rubyn_code/cli/renderer.rb +109 -60
data/lib/rubyn_code/cli/repl.rb +42 -373
data/lib/rubyn_code/cli/repl_commands.rb +176 -0
data/lib/rubyn_code/cli/repl_lifecycle.rb +75 -0
data/lib/rubyn_code/cli/repl_setup.rb +145 -0
data/lib/rubyn_code/cli/setup.rb +6 -2
data/lib/rubyn_code/cli/stream_formatter.rb +56 -49
data/lib/rubyn_code/cli/version_check.rb +28 -11
data/lib/rubyn_code/config/defaults.rb +10 -0
data/lib/rubyn_code/config/project_profile.rb +185 -0
data/lib/rubyn_code/config/settings.rb +100 -1
data/lib/rubyn_code/context/auto_compact.rb +1 -1
data/lib/rubyn_code/context/context_budget.rb +167 -0
data/lib/rubyn_code/context/decision_compactor.rb +99 -0
data/lib/rubyn_code/context/manager.rb +7 -5
data/lib/rubyn_code/context/micro_compact.rb +29 -19
data/lib/rubyn_code/context/schema_filter.rb +64 -0
data/lib/rubyn_code/db/connection.rb +31 -26
data/lib/rubyn_code/db/migrator.rb +44 -28
data/lib/rubyn_code/hooks/built_in.rb +14 -10
data/lib/rubyn_code/index/codebase_index.rb +245 -0
data/lib/rubyn_code/learning/extractor.rb +65 -82
data/lib/rubyn_code/learning/injector.rb +22 -23
data/lib/rubyn_code/learning/instinct.rb +71 -42
data/lib/rubyn_code/learning/shortcut.rb +95 -0
data/lib/rubyn_code/llm/adapters/anthropic.rb +270 -0
data/lib/rubyn_code/llm/adapters/anthropic_streaming.rb +215 -0
data/lib/rubyn_code/llm/adapters/base.rb +35 -0
data/lib/rubyn_code/llm/adapters/json_parsing.rb +21 -0
data/lib/rubyn_code/llm/adapters/openai.rb +246 -0
data/lib/rubyn_code/llm/adapters/openai_compatible.rb +46 -0
data/lib/rubyn_code/llm/adapters/openai_message_translator.rb +90 -0
data/lib/rubyn_code/llm/adapters/openai_streaming.rb +141 -0
data/lib/rubyn_code/llm/adapters/prompt_caching.rb +60 -0
data/lib/rubyn_code/llm/client.rb +55 -252
data/lib/rubyn_code/llm/model_router.rb +237 -0
data/lib/rubyn_code/llm/streaming.rb +4 -227
data/lib/rubyn_code/mcp/client.rb +1 -1
data/lib/rubyn_code/mcp/config.rb +9 -12
data/lib/rubyn_code/mcp/sse_transport.rb +15 -13
data/lib/rubyn_code/mcp/stdio_transport.rb +16 -18
data/lib/rubyn_code/mcp/tool_bridge.rb +31 -62
data/lib/rubyn_code/memory/session_persistence.rb +59 -58
data/lib/rubyn_code/memory/store.rb +42 -55
data/lib/rubyn_code/observability/budget_enforcer.rb +46 -32
data/lib/rubyn_code/observability/cost_calculator.rb +32 -8
data/lib/rubyn_code/observability/skill_analytics.rb +116 -0
data/lib/rubyn_code/observability/token_analytics.rb +130 -0
data/lib/rubyn_code/observability/usage_reporter.rb +79 -61
data/lib/rubyn_code/output/diff_renderer.rb +102 -77
data/lib/rubyn_code/output/formatter.rb +11 -11
data/lib/rubyn_code/permissions/policy.rb +11 -13
data/lib/rubyn_code/permissions/prompter.rb +8 -9
data/lib/rubyn_code/protocols/plan_approval.rb +25 -20
data/lib/rubyn_code/skills/document.rb +33 -29
data/lib/rubyn_code/skills/ttl_manager.rb +100 -0
data/lib/rubyn_code/sub_agents/runner.rb +20 -25
data/lib/rubyn_code/tasks/dag.rb +25 -24
data/lib/rubyn_code/tools/ask_user.rb +44 -0
data/lib/rubyn_code/tools/background_run.rb +2 -1
data/lib/rubyn_code/tools/base.rb +26 -32
data/lib/rubyn_code/tools/bash.rb +2 -1
data/lib/rubyn_code/tools/edit_file.rb +74 -18
data/lib/rubyn_code/tools/executor.rb +74 -24
data/lib/rubyn_code/tools/file_cache.rb +95 -0
data/lib/rubyn_code/tools/git_commit.rb +12 -10
data/lib/rubyn_code/tools/git_log.rb +12 -10
data/lib/rubyn_code/tools/glob.rb +23 -7
data/lib/rubyn_code/tools/grep.rb +2 -1
data/lib/rubyn_code/tools/load_skill.rb +13 -6
data/lib/rubyn_code/tools/memory_search.rb +14 -13
data/lib/rubyn_code/tools/memory_write.rb +2 -1
data/lib/rubyn_code/tools/output_compressor.rb +185 -0
data/lib/rubyn_code/tools/read_file.rb +11 -6
data/lib/rubyn_code/tools/review_pr.rb +127 -80
data/lib/rubyn_code/tools/run_specs.rb +26 -15
data/lib/rubyn_code/tools/schema.rb +4 -10
data/lib/rubyn_code/tools/spawn_agent.rb +113 -82
data/lib/rubyn_code/tools/spawn_teammate.rb +107 -64
data/lib/rubyn_code/tools/spec_output_parser.rb +118 -0
data/lib/rubyn_code/tools/task.rb +17 -17
data/lib/rubyn_code/tools/web_fetch.rb +62 -47
data/lib/rubyn_code/tools/web_search.rb +66 -48
data/lib/rubyn_code/tools/write_file.rb +59 -1
data/lib/rubyn_code/version.rb +1 -1
data/lib/rubyn_code.rb +40 -1
data/skills/rubyn_self_test.md +121 -0
metadata +53 -1

data/lib/rubyn_code/agent/loop.rb CHANGED Viewed

@@ -1,115 +1,67 @@
 # frozen_string_literal: true
+require_relative 'system_prompt_builder'
+require_relative 'response_parser'
+require_relative 'tool_processor'
+require_relative 'background_job_handler'
+require_relative 'feedback_handler'
+require_relative 'llm_caller'
 module RubynCode
   module Agent
     class Loop
+      include SystemPromptBuilder
+      include ResponseParser
+      include ToolProcessor
+      include BackgroundJobHandler
+      include FeedbackHandler
+      include LlmCaller
       MAX_ITERATIONS = Config::Defaults::MAX_ITERATIONS
-      # @param llm_client [LLM::Client]
-      # @param tool_executor [Tools::Executor]
-      # @param context_manager [Context::Manager]
-      # @param hook_runner [Hooks::Runner]
-      # @param conversation [Agent::Conversation]
-      # @param permission_tier [Symbol] one of Permissions::Tier::ALL
-      # @param deny_list [Permissions::DenyList]
-      # @param budget_enforcer [Observability::BudgetEnforcer, nil]
-      # @param background_manager [Background::Worker, nil]
-      # @param stall_detector [Agent::LoopDetector]
-      def initialize(
-        llm_client:,
-        tool_executor:,
-        context_manager:,
-        hook_runner:,
-        conversation:,
-        permission_tier: Permissions::Tier::ALLOW_READ,
-        deny_list: Permissions::DenyList.new,
-        budget_enforcer: nil,
-        background_manager: nil,
-        stall_detector: LoopDetector.new,
-        on_tool_call: nil,
-        on_tool_result: nil,
-        on_text: nil,
-        skill_loader: nil,
-        project_root: nil
-      )
-        @llm_client         = llm_client
-        @tool_executor      = tool_executor
-        @context_manager    = context_manager
-        @hook_runner        = hook_runner
-        @conversation       = conversation
-        @permission_tier    = permission_tier
-        @deny_list          = deny_list
-        @budget_enforcer    = budget_enforcer
-        @background_manager = background_manager
-        @stall_detector     = stall_detector
-        @on_tool_call       = on_tool_call
-        @on_tool_result     = on_tool_result
-        @on_text            = on_text
-        @skill_loader       = skill_loader
-        @project_root       = project_root
-        @plan_mode          = false
+      # @param opts [Hash] keyword arguments for loop configuration
+      # @option opts [LLM::Client]                    :llm_client
+      # @option opts [Tools::Executor]                :tool_executor
+      # @option opts [Context::Manager]               :context_manager
+      # @option opts [Hooks::Runner]                  :hook_runner
+      # @option opts [Agent::Conversation]            :conversation
+      # @option opts [Symbol]                         :permission_tier
+      # @option opts [Permissions::DenyList]          :deny_list
+      # @option opts [Observability::BudgetEnforcer]  :budget_enforcer
+      # @option opts [Background::Worker]             :background_manager
+      # @option opts [Agent::LoopDetector]            :stall_detector
+      # @option opts [Proc]                           :on_tool_call
+      # @option opts [Proc]                           :on_tool_result
+      # @option opts [Proc]                           :on_text
+      # @option opts [Object]                         :skill_loader
+      # @option opts [String]                         :project_root
+      def initialize(**opts)
+        assign_dependencies(opts)
+        assign_callbacks(opts)
+        @plan_mode = false
       end
       # @return [Boolean]
       attr_accessor :plan_mode
-      # Send a user message and run the agent loop until a final text response
-      # is produced or the iteration limit is reached.
+      # Send a user message and run the agent loop until a final text
+      # response is produced or the iteration limit is reached.
       #
       # @param user_input [String]
       # @return [String] the final assistant text response
       def send_message(user_input)
+        initialize_session!
         check_user_feedback(user_input)
-        # Drain any completed background jobs BEFORE adding the user message,
-        # so the LLM sees the results in the right order
         drain_background_notifications
+        inject_skill_listing unless @skills_injected
+        @decision_compactor&.detect_topic_switch(user_input)
+        @skill_ttl&.tick!
         @conversation.add_user_message(user_input)
-        @max_tokens_override = nil
-        @output_recovery_count = 0
-        @task_budget_remaining = nil
+        reset_iteration_state
         MAX_ITERATIONS.times do |iteration|
-          RubynCode::Debug.loop_tick("iteration=#{iteration} messages=#{@conversation.length} max_tokens_override=#{@max_tokens_override || 'default'}")
-          response = call_llm
-          tool_calls = extract_tool_calls(response)
-          stop_reason = response.respond_to?(:stop_reason) ? response.stop_reason : nil
-          RubynCode::Debug.llm("stop_reason=#{stop_reason} tool_calls=#{tool_calls.size} content_blocks=#{get_content(response).size}")
-          if tool_calls.empty?
-            if truncated?(response)
-              RubynCode::Debug.recovery('Text response truncated, entering recovery')
-              response = recover_truncated_response(response)
-            end
-            # If background jobs are running, wait for them instead of burning LLM calls
-            if has_pending_background_jobs?
-              @conversation.add_assistant_message(response_content(response))
-              wait_for_background_jobs
-              next
-            end
-            @conversation.add_assistant_message(response_content(response))
-            return extract_response_text(response)
-          end
-          # Tier 1: If a tool-use response was truncated, silently escalate and retry
-          if truncated?(response) && !@max_tokens_override
-            RubynCode::Debug.recovery("Tier 1: Escalating max_tokens from #{Config::Defaults::CAPPED_MAX_OUTPUT_TOKENS} to #{Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS}")
-            @max_tokens_override = Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS
-            next
-          end
-          @conversation.add_assistant_message(get_content(response))
-          process_tool_calls(tool_calls)
-          # Drain notifications after tool execution — jobs may have finished
-          drain_background_notifications
-          run_maintenance(iteration)
+          result = run_iteration(iteration)
+          return result if result
         end
         RubynCode::Debug.warn("Hit MAX_ITERATIONS (#{MAX_ITERATIONS})")
@@ -118,658 +70,198 @@ module RubynCode
       private
-      # ── LLM interaction ──────────────────────────────────────────────
-      TASK_BUDGET_TOTAL = 100_000 # tokens per user message
-      def call_llm
-        @hook_runner.fire(:pre_llm_call, conversation: @conversation)
-        opts = {
-          messages: @conversation.to_api_format,
-          tools: @plan_mode ? read_only_tool_definitions : tool_definitions,
-          system: build_system_prompt,
-          on_text: @on_text
-        }
-        opts[:max_tokens] = @max_tokens_override if @max_tokens_override
-        # Task budget: tell the model how many tokens remain for this task
-        opts[:task_budget] = { total: TASK_BUDGET_TOTAL, remaining: @task_budget_remaining } if @task_budget_remaining
-        response = @llm_client.chat(**opts)
-        @hook_runner.fire(:post_llm_call, response: response, conversation: @conversation)
-        track_usage(response)
-        update_task_budget(response)
-        response
-      rescue LLM::Client::PromptTooLongError
-        # 413: context too large — compact and retry once
-        RubynCode::Debug.recovery('413 prompt too long — running emergency compaction')
-        @context_manager.check_compaction!(@conversation)
-        response = @llm_client.chat(**opts, messages: @conversation.to_api_format)
-        @hook_runner.fire(:post_llm_call, response: response, conversation: @conversation)
-        track_usage(response)
-        response
+      def assign_dependencies(opts)
+        assign_required_deps(opts)
+        assign_optional_deps(opts)
       end
-      SYSTEM_PROMPT = <<~PROMPT
-        You are Rubyn — a snarky but lovable AI coding assistant who lives and breathes Ruby.
-        You're the kind of pair programmer who'll roast your colleague's `if/elsif/elsif/else` chain
-        with a smirk, then immediately rewrite it as a beautiful `case/in` with pattern matching.
-        You're sharp, opinionated, and genuinely helpful. Think of yourself as the senior Ruby dev
-        who's seen every Rails antipattern in production and somehow still loves this language.
-        ## Personality
-        - Snarky but never mean. You tease the code, not the coder.
-        - You celebrate good Ruby — "Oh, a proper guard clause? You love to see it."
-        - You mourn bad Ruby — "A `for` loop? In MY Ruby? It's more likely than you think."
-        - Brief and punchy. No walls of text unless teaching something important.
-        - You use Ruby metaphors: "Let's refactor this like Matz intended."
-        - When something is genuinely good code, you say so. No notes.
-        ## Ruby Convictions (non-negotiable)
-        - `frozen_string_literal: true` in every file. Every. Single. One.
-        - Prefer `each`, `map`, `select`, `reduce` over manual iteration. Always.
-        - Guard clauses over nested conditionals. Return early, return often.
-        - `Data.define` for value objects (Ruby 3.2+). `Struct` only if you need mutability.
-        - `snake_case` methods, `CamelCase` classes, `SCREAMING_SNAKE` constants. No exceptions.
-        - Single quotes unless you're interpolating. Fight me.
-        - Methods under 15 lines. Classes under 100. Extract or explain why not.
-        - Explicit over clever. Metaprogramming is a spice, not the main course.
-        - `raise` over `fail`. Rescue specific exceptions, never bare `rescue`.
-        - Prefer composition over inheritance. Mixins are not inheritance.
-        - `&&` / `||` over `and` / `or`. The precedence difference has burned too many.
-        - `dig` for nested hashes. `fetch` with defaults over `[]` with `||`.
-        - `freeze` your constants. Frozen arrays, frozen hashes, frozen regexps.
-        - No `OpenStruct`. Ever. It's slow, it's a footgun, and `Data.define` exists.
-        ## Rails Convictions
-        - Skinny controllers, fat models is dead. Skinny controllers, skinny models, service objects.
-        - `has_many :through` over `has_and_belongs_to_many`. Every time.
-        - Add database indexes for every foreign key and every column you query.
-        - Migrations are generated, not handwritten. `rails generate migration`.
-        - Strong parameters in controllers. No `permit!`. Ever.
-        - Use `find_each` for batch processing. `each` on a large scope is a memory bomb.
-        - `exists?` over `present?` for checking DB existence. One is a COUNT, the other loads the record.
-        - Scopes over class methods for chainable queries.
-        - Background jobs for anything that takes more than 100ms.
-        - Don't put business logic in callbacks. That way lies madness.
-        ## Testing Convictions
-        - RSpec > Minitest (but you'll work with either without complaining... much)
-        - FactoryBot over fixtures. Factories are explicit. Fixtures are magic.
-        - One assertion per test when practical. "It does three things" is three tests.
-        - `let` over instance variables. `let!` only when you need eager evaluation.
-        - `described_class` over repeating the class name.
-        - Test behavior, not implementation. Mock the boundary, not the internals.
-        ## How You Work
-        - For greetings and casual chat, just respond naturally. No need to run tools.
-        - Only use tools when the user asks you to DO something (read, write, search, run, review).
-        - Read before you write. Always understand existing code before suggesting changes.
-        - Use tools to verify. Don't guess if a file exists — check.
-        - Show diffs when editing. The human should see what changed.
-        - Run specs after changes. If they break, fix them.
-        - When you are asked to work in a NEW directory you haven't seen yet, check for RUBYN.md, CLAUDE.md, or AGENT.md there. But don't do this unprompted on startup — those files are already loaded into your context.
-        - Load skills when you need deep knowledge on a topic. Don't wing it.
-        - You have 112 curated best-practice skill documents covering Ruby, Rails, RSpec, design patterns, and code quality. When writing new code or reviewing existing code, load the relevant skill BEFORE implementing. Don't reinvent patterns that are already documented.
-        - HOWEVER: always respect patterns already established in the codebase. If the project uses a specific convention (e.g. service objects, a particular test style, a custom base class), follow that convention even if it differs from the skill doc. Consistency with the codebase beats textbook best practice. Only break from established patterns if they are genuinely harmful (security issues, major performance problems, or bugs).
-        - Keep responses concise. Code speaks louder than paragraphs.
-        - Use spawn_agent sparingly — only for tasks that require reading many files (10+) or deep exploration. For simple reads or edits, use tools directly. Don't spawn a sub-agent when a single read_file or grep will do.
-        - IMPORTANT: You can call MULTIPLE tools in a single response. When you need to read several files, search multiple patterns, or perform independent operations, return all tool_use blocks at once rather than one at a time. This is dramatically faster and cheaper. For example, if you need to read 5 files, emit 5 read_file tool calls in one response — don't read them one by one across 5 turns.
-        ## Memory
-        You have persistent memory across sessions via `memory_write` and `memory_search` tools.
-        Use them proactively:
-        - When the user tells you a preference or convention, save it: memory_write(content: "User prefers Grape over Rails controllers for APIs", category: "user_preference")
-        - When you discover a project pattern (e.g. "this app uses service objects in app/services/"), save it: memory_write(content: "...", category: "project_convention")
-        - When you fix a tricky bug, save the resolution: memory_write(content: "...", category: "error_resolution")
-        - When you learn a key architectural decision, save it: memory_write(content: "...", category: "decision")
-        - Before starting work on a project, search memory for context: memory_search(query: "project conventions")
-        - Don't save trivial things. Save what would be useful in a future session.
-        Categories: user_preference, project_convention, error_resolution, decision, code_pattern
-      PROMPT
-      PLAN_MODE_PROMPT = <<~PLAN
-        ## 🧠 Plan Mode Active
-        You are in PLAN MODE. This means:
-        - Reason through the problem step by step
-        - You have READ-ONLY tools available — use them to explore the codebase
-        - Read files, grep, glob, check git status/log/diff — gather context
-        - Do NOT write, edit, execute, or modify anything
-        - Outline your plan with numbered steps
-        - Identify files you'd need to read or modify
-        - Call out risks, edge cases, and trade-offs
-        - Ask clarifying questions if the request is ambiguous
-        - When the user is satisfied with the plan, they'll toggle plan mode off with /plan
-        You CAN use read-only tools. You MUST NOT use any tool that writes, edits, or executes.
-      PLAN
-      PLAN_MODE_RISK_LEVELS = %i[read].freeze
-      def build_system_prompt
-        parts = [SYSTEM_PROMPT]
-        parts << PLAN_MODE_PROMPT if @plan_mode
-        parts << "Working directory: #{@project_root}" if @project_root
-        # Inject memories from previous sessions
-        memories = load_memories
-        parts << "\n## Your Memories (from previous sessions)\n#{memories}" unless memories.empty?
-        # Load RUBYN.md / CLAUDE.md / AGENT.md files
-        rubyn_instructions = load_rubyn_md
-        parts << "\n## Project Instructions\n#{rubyn_instructions}" unless rubyn_instructions.empty?
-        # Inject learned instincts from previous sessions
-        instincts = load_instincts
-        parts << "\n## Learned Instincts (from previous sessions)\n#{instincts}" unless instincts.empty?
-        # Load custom skills
-        if @skill_loader
-          descriptions = @skill_loader.descriptions_for_prompt
-          unless descriptions.empty?
-            parts << "\n## Available Skills (use load_skill tool to load full content)"
-            parts << descriptions
-          end
-        end
-        # List deferred tools so the LLM knows they exist
-        deferred = deferred_tool_names
-        unless deferred.empty?
-          parts << "\n## Additional Tools Available"
-          parts << 'These tools are available but not loaded yet. Just call them by name and they will work:'
-          parts << deferred.map { |n| "- #{n}" }.join("\n")
-        end
-        parts.join("\n")
+      def assign_required_deps(opts)
+        @llm_client      = opts.fetch(:llm_client)
+        @tool_executor   = opts.fetch(:tool_executor)
+        @context_manager = opts.fetch(:context_manager)
+        @hook_runner     = opts.fetch(:hook_runner)
+        @conversation    = opts.fetch(:conversation)
+        @permission_tier = opts.fetch(:permission_tier, Permissions::Tier::ALLOW_READ)
+        @deny_list       = opts.fetch(:deny_list, Permissions::DenyList.new)
       end
-      def deferred_tool_names
-        all_names = @tool_executor.tool_definitions.map { |t| t[:name] || t['name'] }
-        active_names = tool_definitions.map { |t| t[:name] || t['name'] }
-        all_names - active_names
+      def assign_optional_deps(opts)
+        @budget_enforcer    = opts[:budget_enforcer]
+        @background_manager = opts[:background_manager]
+        @stall_detector     = opts.fetch(:stall_detector, LoopDetector.new)
+        @skill_loader       = opts[:skill_loader]
+        @project_root       = opts[:project_root]
+        @decision_compactor = build_decision_compactor
+        @skill_ttl          = Skills::TtlManager.new
+        @session_initialized = false
       end
-      def load_memories
-        return '' unless @project_root
-        db = DB::Connection.instance
-        search = Memory::Search.new(db, project_path: @project_root)
-        recent = search.recent(limit: 20)
-        return '' if recent.empty?
-        recent.map do |m|
-          category = m.respond_to?(:category) ? m.category : (m[:category] || m['category'])
-          content = m.respond_to?(:content) ? m.content : (m[:content] || m['content'])
-          "[#{category}] #{content}"
-        end.join("\n")
+      def build_decision_compactor
+        Context::DecisionCompactor.new(context_manager: @context_manager)
       rescue StandardError
-        ''
+        nil
       end
-      def load_instincts
-        return '' unless @project_root
+      # One-time session initialization: build project profile and
+      # codebase index so the AI doesn't have to explore from scratch.
+      def initialize_session!
+        return if @session_initialized || !@project_root
-        db = DB::Connection.instance
-        Learning::Injector.call(db: db, project_path: @project_root)
-      rescue StandardError
-        ''
+        @session_initialized = true
+        build_project_profile!
+        build_codebase_index!
       end
-      # ── Instinct reinforcement ───────────────────────────────────
-      POSITIVE_PATTERNS = /\b(yes that fixed it|that worked|perfect|thanks|exactly|great|nailed it|that.s right|correct)\b/i
-      NEGATIVE_PATTERNS = /\b(no[, ]+use|wrong|that.s not right|instead use|don.t do that|actually[, ]+use|incorrect)\b/i
-      def check_user_feedback(user_input)
-        return unless @project_root
-        db = DB::Connection.instance
-        recent_instincts = db.query(
-          'SELECT id FROM instincts WHERE project_path = ? ORDER BY updated_at DESC LIMIT 5',
-          [@project_root]
-        ).to_a
-        return if recent_instincts.empty?
-        if user_input.match?(POSITIVE_PATTERNS)
-          recent_instincts.first(2).each do |row|
-            Learning::InstinctMethods.reinforce_in_db(row['id'], db, helpful: true)
-          end
-        elsif user_input.match?(NEGATIVE_PATTERNS)
-          recent_instincts.first(2).each do |row|
-            Learning::InstinctMethods.reinforce_in_db(row['id'], db, helpful: false)
-          end
-        end
-      rescue StandardError
-        # Non-critical; don't interrupt the conversation
+      def build_project_profile!
+        profile = Config::ProjectProfile.new(project_root: @project_root)
+        profile.load_or_detect!
+        RubynCode::Debug.agent("Project profile loaded (#{profile.data.size} keys)")
+      rescue StandardError => e
+        RubynCode::Debug.warn("Project profile failed: #{e.message}")
       end
-      # Load instruction files from multiple locations.
-      # Detects RUBYN.md, CLAUDE.md, and AGENT.md — so projects that already
-      # have CLAUDE.md or AGENT.md work out of the box with Rubyn Code.
-      INSTRUCTION_FILES = %w[RUBYN.md CLAUDE.md AGENT.md].freeze
-      def load_rubyn_md
-        found = []
-        if @project_root
-          # Walk UP from project root to find parent instruction files
-          walk_up_for_instructions(@project_root, found)
-          # Project root
-          INSTRUCTION_FILES.each do |name|
-            collect_instruction(File.join(@project_root, name), found)
-          end
-          collect_instruction(File.join(@project_root, '.rubyn-code', 'RUBYN.md'), found)
-          # One level of child directories
-          INSTRUCTION_FILES.each do |name|
-            Dir.glob(File.join(@project_root, '*', name)).each do |path|
-              collect_instruction(path, found)
-            end
-          end
-        end
-        # User global
-        collect_instruction(File.join(Config::Defaults::HOME_DIR, 'RUBYN.md'), found)
-        found.uniq.join("\n\n")
+      def build_codebase_index!
+        index = Index::CodebaseIndex.new(project_root: @project_root)
+        index.load_or_build!
+        RubynCode::Debug.agent("Codebase index: #{index.stats[:nodes]} nodes, #{index.stats[:files_indexed]} files")
+      rescue StandardError => e
+        RubynCode::Debug.warn("Codebase index failed: #{e.message}")
       end
-      def walk_up_for_instructions(start_dir, found)
-        dir = File.dirname(start_dir)
-        home = File.expand_path('~')
-        while dir.length >= home.length
-          INSTRUCTION_FILES.each do |name|
-            collect_instruction(File.join(dir, name), found)
-          end
-          break if dir == home
-          dir = File.dirname(dir)
-        end
+      def assign_callbacks(opts)
+        @on_tool_call   = opts[:on_tool_call]
+        @on_tool_result = opts[:on_tool_result]
+        @on_text        = opts[:on_text]
+        @skills_injected = false
       end
-      def collect_instruction(path, found)
-        return unless File.exist?(path) && File.file?(path)
-        content = File.read(path, encoding: 'utf-8')
-                      .encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
-                      .strip
-        return if content.empty?
-        found << "# From #{path}\n#{content}"
+      def reset_iteration_state
+        @max_tokens_override   = nil
+        @output_recovery_count = 0
+        @task_budget_remaining = nil
       end
-      # Core tools always included. Others load on first use.
-      CORE_TOOLS = %w[
-        read_file write_file edit_file glob grep bash
-        spawn_agent background_run
-      ].freeze
+      def run_iteration(iteration)
+        log_iteration(iteration)
+        compact_if_needed # ensure context is under threshold before LLM call
+        response   = call_llm
+        tool_calls = extract_tool_calls(response)
+        log_response(response, tool_calls)
-      def tool_definitions
-        all_tools = @tool_executor.tool_definitions
-        return all_tools if all_tools.size <= CORE_TOOLS.size
+        return handle_text_response(response) if tool_calls.empty?
-        @discovered_tools ||= Set.new
-        all_tools.select do |t|
-          name = t[:name] || t['name']
-          CORE_TOOLS.include?(name) || @discovered_tools.include?(name)
-        end
+        handle_tool_response(response, tool_calls, iteration)
       end
-      def discover_tool(name)
-        @discovered_tools ||= Set.new
-        @discovered_tools.add(name)
+      def log_iteration(iteration)
+        RubynCode::Debug.loop_tick(
+          "iteration=#{iteration} messages=#{@conversation.length} " \
+          "max_tokens_override=#{@max_tokens_override || 'default'}"
+        )
       end
-      def read_only_tool_definitions
-        Tools::Registry.all
-                       .select { |t| PLAN_MODE_RISK_LEVELS.include?(t::RISK_LEVEL) }
-                       .map(&:to_schema)
+      def log_response(response, tool_calls)
+        stop_reason = extract_stop_reason(response)
+        RubynCode::Debug.llm(
+          "stop_reason=#{stop_reason} tool_calls=#{tool_calls.size} " \
+          "content_blocks=#{get_content(response).size}"
+        )
       end
-      # ── Background job waiting ────────────────────────────────────────
-      def wait_for_background_jobs
-        max_wait = 300 # 5 minutes max
-        poll_interval = 3
-        RubynCode::Debug.agent("Waiting for background jobs to finish (polling every #{poll_interval}s, max #{max_wait}s)")
-        elapsed = 0
-        while elapsed < max_wait && has_pending_background_jobs?
-          sleep poll_interval
-          elapsed += poll_interval
-          drain_background_notifications
-        end
-        # Final drain to pick up any last results
-        drain_background_notifications
-        RubynCode::Debug.agent("Background wait done (#{elapsed}s)")
-      end
-      # ── Tool processing ──────────────────────────────────────────────
-      def process_tool_calls(tool_calls)
-        aggregate_chars = 0
-        budget = Config::Defaults::MAX_MESSAGE_TOOL_RESULTS_CHARS
-        tool_calls.each do |tool_call|
-          tool_name  = field(tool_call, :name)
-          tool_input = field(tool_call, :input) || {}
-          tool_id    = field(tool_call, :id)
-          decision = Permissions::Policy.check(
-            tool_name: tool_name,
-            tool_input: tool_input,
-            tier: @permission_tier,
-            deny_list: @deny_list
+      def handle_text_response(response)
+        if truncated?(response)
+          RubynCode::Debug.recovery(
+            'Text response truncated, entering recovery'
           )
-          begin
-            @on_tool_call&.call(tool_name, tool_input)
-          rescue StandardError
-            nil
-          end
-          result, is_error = execute_with_permission(decision, tool_name, tool_input, tool_id)
-          # Enforce per-message aggregate tool result budget
-          aggregate_chars += result.to_s.length
-          if aggregate_chars > budget
-            remaining = [budget - (aggregate_chars - result.to_s.length), 500].max
-            result = "#{result.to_s[0,
-                                    remaining]}\n\n[truncated — tool result budget exceeded (#{budget} chars/message)]"
-            RubynCode::Debug.token("Tool result budget exceeded: #{aggregate_chars}/#{budget} chars")
-          end
-          begin
-            @on_tool_result&.call(tool_name, result, is_error)
-          rescue StandardError
-            nil
-          end
-          @stall_detector.record(tool_name, tool_input)
-          # CRITICAL: always add tool_result to conversation — without this the
-          # API will reject the next request with "tool_use without tool_result"
-          @conversation.add_tool_result(tool_id, tool_name, result, is_error: is_error)
-        end
-      end
-      def execute_with_permission(decision, tool_name, tool_input, _tool_id)
-        case decision
-        when :deny
-          ["Tool '#{tool_name}' is blocked by the deny list.", true]
-        when :ask
-          if prompt_user(tool_name, tool_input)
-            execute_tool(tool_name, tool_input)
-          else
-            ["User denied permission for '#{tool_name}'.", true]
-          end
-        when :allow
-          execute_tool(tool_name, tool_input)
-        else
-          ["Unknown permission decision: #{decision}", true]
+          response = recover_truncated_response(response)
         end
-      end
-      def execute_tool(tool_name, tool_input)
-        # Auto-discover tools on first use so they appear in future calls
-        discover_tool(tool_name)
-        @hook_runner.fire(:pre_tool_use, tool_name: tool_name, tool_input: tool_input)
-        result = @tool_executor.execute(tool_name, symbolize_keys(tool_input))
-        @hook_runner.fire(:post_tool_use, tool_name: tool_name, tool_input: tool_input, result: result)
-        [result.to_s, false]
-      rescue StandardError => e
-        ["Error executing #{tool_name}: #{e.message}", true]
-      end
-      def prompt_user(tool_name, tool_input)
-        risk = resolve_tool_risk(tool_name)
-        if risk == :destructive
-          Permissions::Prompter.confirm_destructive(tool_name, tool_input)
-        else
-          Permissions::Prompter.confirm(tool_name, tool_input)
-        end
-      end
-      def resolve_tool_risk(tool_name)
-        tool_class = Tools::Registry.get(tool_name)
-        tool_class.risk_level
-      rescue ToolNotFoundError
-        :unknown
-      end
-      # ── Maintenance ──────────────────────────────────────────────────
-      def run_maintenance(_iteration)
-        run_compaction
-        check_budget
-        check_stall_detection
-      end
-      def run_compaction
-        before = @conversation.length
-        est = @context_manager.estimated_tokens(@conversation.messages)
-        RubynCode::Debug.token("context=#{est} tokens (~#{before} messages, threshold=#{Config::Defaults::CONTEXT_THRESHOLD_TOKENS})")
-        @context_manager.check_compaction!(@conversation)
-        after = @conversation.length
-        if after < before
-          new_est = @context_manager.estimated_tokens(@conversation.messages)
-          RubynCode::Debug.loop_tick("Compacted: #{before} -> #{after} messages (#{est} -> #{new_est} tokens)")
-        end
-      rescue NoMethodError
-        # context_manager does not implement check_compaction! yet
-      end
-      def check_budget
-        return unless @budget_enforcer
-        @budget_enforcer.check!
-      rescue BudgetExceededError
-        raise
-      rescue NoMethodError
-        # budget_enforcer does not implement check! yet
-      end
-      def check_stall_detection
-        return unless @stall_detector.stalled?
-        nudge = @stall_detector.nudge_message
-        @conversation.add_user_message(nudge)
-        @stall_detector.reset!
-      end
-      def drain_background_notifications
-        return unless @background_manager
-        notifications = @background_manager.drain_notifications
-        return if notifications.nil? || notifications.empty?
-        summary = notifications.map { |n| format_background_notification(n) }.join("\n\n")
-        @conversation.add_user_message("[Background job results]\n#{summary}")
-      rescue NoMethodError
-        # background_manager does not support drain_notifications yet
-      end
-      def has_pending_background_jobs?
-        return false unless @background_manager
-        @background_manager.active_count.positive?
-      rescue NoMethodError
-        false
-      end
-      def format_background_notification(notification)
-        case notification
-        when Hash
-          status = notification[:status] || 'unknown'
-          job_id = notification[:job_id]&.[](0..7) || 'unknown'
-          duration = notification[:duration] ? "#{'%.1f' % notification[:duration]}s" : 'unknown'
-          result = notification[:result] || '(no output)'
-          "Job #{job_id} [#{status}] (#{duration}):\n#{result}"
-        else
-          notification.to_s
+        # Wait for background jobs before finalizing
+        if pending_background_jobs?
+          @conversation.add_assistant_message(response_content(response))
+          wait_for_background_jobs
+          return nil # signal: keep iterating
         end
-      end
-      # ── Output token recovery (3-tier, matches Claude Code) ──────────
-      #
-      # Tier 1: Silent escalation (8K → 32K) — handled in send_message
-      # Tier 2: Multi-turn recovery — inject continuation message, retry up to 3x
-      # Tier 3: Surface what we have — return partial response after exhausting retries
-      def truncated?(response)
-        reason = if response.respond_to?(:stop_reason)
-                   response.stop_reason
-                 elsif response.is_a?(Hash)
-                   response[:stop_reason] || response['stop_reason']
-                 end
-        reason == 'max_tokens'
-      end
+        text = extract_response_text(response)
-      def recover_truncated_response(response)
-        @max_tokens_override ||= Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS
+        return handle_empty_response if text.strip.empty?
         @conversation.add_assistant_message(response_content(response))
-        max_retries = Config::Defaults::MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
-        max_retries.times do |attempt|
-          @output_recovery_count += 1
-          RubynCode::Debug.recovery("Tier 2: Recovery attempt #{attempt + 1}/#{max_retries}")
-          @conversation.add_user_message(
-            'Output token limit hit. Resume directly — no apology, no recap, ' \
-            'just continue exactly where you left off.'
-          )
-          response = call_llm
+        # Decision-based compaction (topic switch, milestone)
+        @decision_compactor&.check!(@conversation)
-          unless truncated?(response)
-            RubynCode::Debug.recovery("Recovery successful on attempt #{attempt + 1}")
-            break
-          end
+        # Compact after the response if context is over threshold
+        compact_if_needed
-          RubynCode::Debug.recovery("Still truncated after attempt #{attempt + 1}")
-          @conversation.add_assistant_message(response_content(response))
-        end
-        if truncated?(response)
-          RubynCode::Debug.recovery("Tier 3: Exhausted #{max_retries} recovery attempts, returning partial response")
-        end
-        response
+        text
       end
-      # ── Response helpers ─────────────────────────────────────────────
-      def extract_tool_calls(response)
-        get_content(response).select { |block| block_type(block) == 'tool_use' }
-      end
-      def response_content(response)
-        get_content(response)
-      end
-      def extract_response_text(response)
-        blocks = get_content(response)
-        blocks.select { |b| block_type(b) == 'text' }
-              .map { |b| b.respond_to?(:text) ? b.text : (b[:text] || b['text']) }
-              .compact.join("\n")
-      end
+      # Empty LLM response (0 content blocks). Common after dispatching
+      # background_run — the LLM has nothing to say until results arrive.
+      # Wait briefly for jobs, then either continue or accept the empty response.
+      def handle_empty_response
+        RubynCode::Debug.llm('Empty response — waiting for background jobs')
+        sleep 2 # give jobs a moment to register as active
+        drain_background_notifications
-      def get_content(response)
-        case response
-        when ->(r) { r.respond_to?(:content) }
-          Array(response.content)
-        when Hash
-          Array(response[:content] || response['content'])
+        if pending_background_jobs?
+          wait_for_background_jobs
+          nil # keep iterating — job results are now in conversation
         else
-          []
+          RubynCode::Debug.llm('No background jobs — accepting empty response')
+          '' # return empty string to stop the loop
         end
       end
-      def block_type(block)
-        if block.respond_to?(:type)
-          block.type.to_s
-        elsif block.is_a?(Hash)
-          (block[:type] || block['type']).to_s
+      def handle_tool_response(response, tool_calls, iteration)
+        if truncated?(response) && !@max_tokens_override
+          escalate_max_tokens
+          return nil
         end
-      end
-      def track_usage(response)
-        usage = if response.respond_to?(:usage)
-                  response.usage
-                elsif response.is_a?(Hash)
-                  response[:usage] || response['usage']
-                end
-        return unless usage
-        input_tokens = usage.respond_to?(:input_tokens) ? usage.input_tokens : usage[:input_tokens]
-        output_tokens = usage.respond_to?(:output_tokens) ? usage.output_tokens : usage[:output_tokens]
-        cache_create = usage.respond_to?(:cache_creation_input_tokens) ? usage.cache_creation_input_tokens.to_i : 0
-        cache_read = usage.respond_to?(:cache_read_input_tokens) ? usage.cache_read_input_tokens.to_i : 0
-        cache_info = cache_create.positive? || cache_read.positive? ? " cache_create=#{cache_create} cache_read=#{cache_read}" : ''
-        RubynCode::Debug.token("in=#{input_tokens} out=#{output_tokens}#{cache_info}")
-        @context_manager.track_usage(usage)
-      rescue NoMethodError
-        # context_manager does not implement track_usage yet
+        @conversation.add_assistant_message(get_content(response))
+        process_tool_calls(tool_calls)
+        drain_background_notifications
+        run_maintenance(iteration)
+        nil
       end
-      def update_task_budget(response)
-        usage = response.respond_to?(:usage) ? response.usage : nil
-        return unless usage
+      # Check if context needs compaction. Runs before LLM calls and
+      # after text responses — mirrors Claude Code's "pause for compaction"
+      # behavior that keeps context manageable in long sessions.
+      def compact_if_needed
+        return unless @context_manager.needs_compaction?(@conversation.messages)
-        output = usage.respond_to?(:output_tokens) ? usage.output_tokens.to_i : 0
-        input = usage.respond_to?(:input_tokens) ? usage.input_tokens.to_i : 0
+        est = @context_manager.estimated_tokens(@conversation.messages)
+        RubynCode::Debug.token(
+          "Context over threshold (#{est}) — running compaction"
+        )
+        @context_manager.check_compaction!(@conversation)
-        # Initialize on first response, then decrement
-        @task_budget_remaining ||= TASK_BUDGET_TOTAL
-        @task_budget_remaining = [@task_budget_remaining - input - output, 0].max
+        after = @context_manager.estimated_tokens(@conversation.messages)
+        RubynCode::Debug.token("Compacted: #{est} → #{after} tokens")
+      rescue StandardError => e
+        RubynCode::Debug.warn("Compaction failed: #{e.message}")
+      end
-        RubynCode::Debug.token("task_budget_remaining=#{@task_budget_remaining}/#{TASK_BUDGET_TOTAL}")
+      def escalate_max_tokens
+        RubynCode::Debug.recovery(
+          'Tier 1: Escalating max_tokens from ' \
+          "#{Config::Defaults::CAPPED_MAX_OUTPUT_TOKENS} to " \
+          "#{Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS}"
+        )
+        @max_tokens_override = Config::Defaults::ESCALATED_MAX_OUTPUT_TOKENS
       end
       def max_iterations_warning
         warning = "Reached maximum iteration limit (#{MAX_ITERATIONS}). " \
-                  'The conversation may be incomplete. Please review the current state ' \
-                  'and continue if needed.'
+                  'The conversation may be incomplete. Please review the ' \
+                  'current state and continue if needed.'
         @conversation.add_assistant_message([{ type: 'text', text: warning }])
         warning
       end
-      # Extract a field from a Data object or Hash
-      def field(obj, key)
-        if obj.respond_to?(key)
-          obj.send(key)
-        elsif obj.is_a?(Hash)
-          obj[key] || obj[key.to_s]
-        end
-      end
-      def symbolize_keys(hash)
-        return {} unless hash.is_a?(Hash)
-        hash.transform_keys(&:to_sym)
-      end
     end
   end
 end