RubyGems - rubyn-code - Versions diffs - 0.3.0 → 0.5.0 - Mend

rubyn-code 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

checksums.yaml +4 -4
data/README.md +263 -21
data/db/migrations/013_add_failed_status_to_tasks.rb +51 -0
data/lib/rubyn_code/agent/conversation.rb +34 -4
data/lib/rubyn_code/agent/dynamic_tool_schema.rb +57 -3
data/lib/rubyn_code/agent/llm_caller.rb +11 -1
data/lib/rubyn_code/agent/loop.rb +14 -3
data/lib/rubyn_code/agent/response_modes.rb +2 -1
data/lib/rubyn_code/agent/system_prompt_builder.rb +49 -4
data/lib/rubyn_code/agent/tool_processor.rb +25 -3
data/lib/rubyn_code/auth/key_encryption.rb +118 -0
data/lib/rubyn_code/auth/token_store.rb +50 -9
data/lib/rubyn_code/autonomous/daemon.rb +117 -14
data/lib/rubyn_code/autonomous/idle_poller.rb +0 -20
data/lib/rubyn_code/autonomous/task_claimer.rb +17 -11
data/lib/rubyn_code/cli/app.rb +116 -11
data/lib/rubyn_code/cli/commands/doctor.rb +73 -0
data/lib/rubyn_code/cli/commands/install_skills.rb +44 -0
data/lib/rubyn_code/cli/commands/list_skills.rb +149 -0
data/lib/rubyn_code/cli/commands/mcp.rb +77 -0
data/lib/rubyn_code/cli/commands/model.rb +32 -2
data/lib/rubyn_code/cli/commands/provider.rb +124 -0
data/lib/rubyn_code/cli/commands/remove_skills.rb +35 -0
data/lib/rubyn_code/cli/commands/skill.rb +54 -3
data/lib/rubyn_code/cli/commands/skills.rb +104 -0
data/lib/rubyn_code/cli/daemon_runner.rb +36 -0
data/lib/rubyn_code/cli/first_run.rb +159 -0
data/lib/rubyn_code/cli/repl.rb +15 -0
data/lib/rubyn_code/cli/repl_commands.rb +3 -1
data/lib/rubyn_code/cli/repl_lifecycle.rb +1 -0
data/lib/rubyn_code/cli/repl_setup.rb +74 -1
data/lib/rubyn_code/config/defaults.rb +3 -0
data/lib/rubyn_code/config/schema.json +49 -0
data/lib/rubyn_code/config/settings.rb +12 -6
data/lib/rubyn_code/config/validator.rb +63 -0
data/lib/rubyn_code/context/context_budget.rb +18 -2
data/lib/rubyn_code/context/context_collapse.rb +34 -4
data/lib/rubyn_code/context/manager.rb +37 -3
data/lib/rubyn_code/context/manual_compact.rb +1 -1
data/lib/rubyn_code/hooks/registry.rb +4 -0
data/lib/rubyn_code/ide/adapters/tool_output.rb +330 -0
data/lib/rubyn_code/ide/client.rb +110 -0
data/lib/rubyn_code/ide/handlers/accept_edit_handler.rb +35 -0
data/lib/rubyn_code/ide/handlers/approve_tool_use_handler.rb +34 -0
data/lib/rubyn_code/ide/handlers/cancel_handler.rb +41 -0
data/lib/rubyn_code/ide/handlers/config_get_handler.rb +63 -0
data/lib/rubyn_code/ide/handlers/config_set_handler.rb +86 -0
data/lib/rubyn_code/ide/handlers/initialize_handler.rb +79 -0
data/lib/rubyn_code/ide/handlers/models_list_handler.rb +39 -0
data/lib/rubyn_code/ide/handlers/prompt_handler.rb +218 -0
data/lib/rubyn_code/ide/handlers/review_handler.rb +127 -0
data/lib/rubyn_code/ide/handlers/session_fork_handler.rb +49 -0
data/lib/rubyn_code/ide/handlers/session_list_handler.rb +41 -0
data/lib/rubyn_code/ide/handlers/session_reset_handler.rb +31 -0
data/lib/rubyn_code/ide/handlers/session_resume_handler.rb +42 -0
data/lib/rubyn_code/ide/handlers/shutdown_handler.rb +37 -0
data/lib/rubyn_code/ide/handlers.rb +76 -0
data/lib/rubyn_code/ide/protocol.rb +112 -0
data/lib/rubyn_code/ide/server.rb +186 -0
data/lib/rubyn_code/index/codebase_index.rb +69 -2
data/lib/rubyn_code/learning/extractor.rb +4 -2
data/lib/rubyn_code/llm/adapters/anthropic.rb +6 -2
data/lib/rubyn_code/llm/adapters/anthropic_compatible.rb +60 -0
data/lib/rubyn_code/llm/adapters/openai_compatible.rb +6 -2
data/lib/rubyn_code/llm/client.rb +29 -4
data/lib/rubyn_code/llm/model_router.rb +2 -1
data/lib/rubyn_code/mcp/config.rb +2 -1
data/lib/rubyn_code/mcp/tool_bridge.rb +1 -1
data/lib/rubyn_code/memory/search.rb +1 -0
data/lib/rubyn_code/observability/usage_reporter.rb +4 -2
data/lib/rubyn_code/output/diff_renderer.rb +3 -2
data/lib/rubyn_code/self_test.rb +316 -0
data/lib/rubyn_code/skills/auto_suggest.rb +131 -0
data/lib/rubyn_code/skills/catalog.rb +76 -0
data/lib/rubyn_code/skills/document.rb +8 -2
data/lib/rubyn_code/skills/gemfile_parser.rb +40 -0
data/lib/rubyn_code/skills/loader.rb +43 -0
data/lib/rubyn_code/skills/matcher.rb +89 -0
data/lib/rubyn_code/skills/pack_context.rb +163 -0
data/lib/rubyn_code/skills/pack_installer.rb +194 -0
data/lib/rubyn_code/skills/pack_manager.rb +230 -0
data/lib/rubyn_code/skills/registry_autoload.rb +112 -0
data/lib/rubyn_code/skills/registry_client.rb +241 -0
data/lib/rubyn_code/tasks/models.rb +1 -0
data/lib/rubyn_code/tools/base.rb +13 -0
data/lib/rubyn_code/tools/bash.rb +5 -0
data/lib/rubyn_code/tools/edit_file.rb +62 -5
data/lib/rubyn_code/tools/executor.rb +65 -8
data/lib/rubyn_code/tools/glob.rb +6 -0
data/lib/rubyn_code/tools/grep.rb +7 -0
data/lib/rubyn_code/tools/ide_diagnostics.rb +53 -0
data/lib/rubyn_code/tools/ide_symbols.rb +55 -0
data/lib/rubyn_code/tools/load_skill.rb +2 -1
data/lib/rubyn_code/tools/output_compressor.rb +9 -7
data/lib/rubyn_code/tools/read_file.rb +6 -0
data/lib/rubyn_code/tools/registry.rb +11 -0
data/lib/rubyn_code/tools/review_pr.rb +15 -4
data/lib/rubyn_code/tools/web_search.rb +2 -1
data/lib/rubyn_code/tools/write_file.rb +17 -0
data/lib/rubyn_code/version.rb +1 -1
data/lib/rubyn_code.rb +34 -0
data/skills/rubyn_self_test.md +88 -1
metadata +43 -1

data/lib/rubyn_code/agent/loop.rb CHANGED Viewed

@@ -44,6 +44,9 @@ module RubynCode
       # @return [Boolean]
       attr_accessor :plan_mode
+      # @return [Index::CodebaseIndex, nil]
+      attr_reader :codebase_index
       # Send a user message and run the agent loop until a final text
       # response is produced or the iteration limit is reached.
       #
@@ -56,6 +59,7 @@ module RubynCode
         inject_skill_listing unless @skills_injected
         @decision_compactor&.detect_topic_switch(user_input)
         @skill_ttl&.tick!
+        autoload_triggered_skills(user_input)
         @conversation.add_user_message(user_input)
         reset_iteration_state
@@ -90,7 +94,10 @@ module RubynCode
         @background_manager = opts[:background_manager]
         @stall_detector     = opts.fetch(:stall_detector, LoopDetector.new)
         @skill_loader       = opts[:skill_loader]
+        @skill_matcher      = opts[:skill_matcher]
+        @web_skill_autoload = opts[:web_skill_autoload]
         @project_root       = opts[:project_root]
+        @tool_wrapper       = opts[:tool_wrapper]
         @decision_compactor = build_decision_compactor
         @skill_ttl          = Skills::TtlManager.new
         @session_initialized = false
@@ -123,15 +130,17 @@ module RubynCode
       def build_codebase_index!
         index = Index::CodebaseIndex.new(project_root: @project_root)
         index.load_or_build!
+        @codebase_index = index
         RubynCode::Debug.agent("Codebase index: #{index.stats[:nodes]} nodes, #{index.stats[:files_indexed]} files")
       rescue StandardError => e
         RubynCode::Debug.warn("Codebase index failed: #{e.message}")
       end
       def assign_callbacks(opts)
-        @on_tool_call   = opts[:on_tool_call]
-        @on_tool_result = opts[:on_tool_result]
-        @on_text        = opts[:on_text]
+        @on_tool_call          = opts[:on_tool_call]
+        @on_tool_result        = opts[:on_tool_result]
+        @on_text               = opts[:on_text]
+        @on_skills_autoloaded  = opts[:on_skills_autoloaded]
         @skills_injected = false
       end
@@ -143,6 +152,7 @@ module RubynCode
       def run_iteration(iteration)
         log_iteration(iteration)
+        @context_manager.advance_turn!
         compact_if_needed # ensure context is under threshold before LLM call
         response   = call_llm
         tool_calls = extract_tool_calls(response)
@@ -224,6 +234,7 @@ module RubynCode
         @conversation.add_assistant_message(get_content(response))
         process_tool_calls(tool_calls)
         drain_background_notifications
+        @decision_compactor&.check!(@conversation)
         run_maintenance(iteration)
         nil
       end

data/lib/rubyn_code/agent/response_modes.rb CHANGED Viewed

@@ -45,7 +45,8 @@ module RubynCode
         # @param message [String] the user's input
         # @param tool_calls [Array] recent tool calls (for context)
         # @return [Symbol] one of the MODES keys
-        def detect(message, tool_calls: []) # rubocop:disable Metrics/CyclomaticComplexity -- mode detection dispatch
+        # -- mode detection dispatch
+        def detect(message, tool_calls: [])
           return :implementing if implementation_signal?(message)
           return :debugging    if debugging_signal?(message)
           return :reviewing    if reviewing_signal?(message)

data/lib/rubyn_code/agent/system_prompt_builder.rb CHANGED Viewed

@@ -64,15 +64,21 @@ module RubynCode
       def append_codebase_index(parts)
         return unless @project_root
-        index = Index::CodebaseIndex.new(project_root: @project_root)
-        loaded = index.load
-        return unless loaded && index.nodes.any?
+        index = resolve_codebase_index
+        return unless index&.nodes&.any?
-        parts << "\n## #{index.to_prompt_summary}"
+        parts << "\n## #{index.to_structural_summary}"
       rescue StandardError
         nil
       end
+      def resolve_codebase_index
+        return @codebase_index if defined?(@codebase_index) && @codebase_index
+        idx = Index::CodebaseIndex.new(project_root: @project_root)
+        idx.load
+      end
       def append_memories(parts)
         memories = load_memories
         return if memories.empty?
@@ -117,6 +123,45 @@ module RubynCode
         @skills_injected = true
       end
+      # Match the current user message against every skill's :triggers and
+      # inject the body of any new match into the conversation so the LLM sees
+      # it on the next call. Per-session dedup lives in the Matcher.
+      #
+      # When the message matches a registry pack the user hasn't installed,
+      # @web_skill_autoload silently fetches it, installs it, refreshes the
+      # catalog, and surfaces any new skill matches. Web fallback failures
+      # are silent so the turn proceeds normally.
+      def autoload_triggered_skills(user_input)
+        return unless @skill_matcher && @skill_loader
+        matches = @skill_matcher.match(user_input)
+        matches += @web_skill_autoload.try(user_input) if @web_skill_autoload
+        return if matches.empty?
+        names = matches.map { |m| m[:name] }
+        bodies = names.filter_map do |name|
+          @skill_loader.load(name)
+        rescue StandardError => e
+          RubynCode::Debug.warn("Failed to autoload skill '#{name}': #{e.message}")
+          nil
+        end
+        return if bodies.empty?
+        inject_autoloaded_bodies(bodies)
+        @on_skills_autoloaded&.call(names)
+      end
+      def inject_autoloaded_bodies(bodies)
+        @conversation.add_user_message(
+          '[system] The following skills are auto-loaded based on the next user ' \
+          "message's triggers. Use them as context. Do not mention this message " \
+          "to the user.\n\n#{bodies.join("\n\n")}"
+        )
+        @conversation.add_assistant_message(
+          [{ type: 'text', text: 'Understood.' }]
+        )
+      end
       def append_deferred_tools(parts)
         deferred = deferred_tool_names
         return if deferred.empty?

data/lib/rubyn_code/agent/tool_processor.rb CHANGED Viewed

@@ -26,7 +26,8 @@ module RubynCode
         all_tools.select { |t| core_or_discovered?(t) }
       end
-      def detect_task_context # rubocop:disable Metrics/CyclomaticComplexity -- safe navigation chain
+      # -- safe navigation chain
+      def detect_task_context
         last_msg = @conversation&.messages&.reverse_each&.find { |m| m[:role] == 'user' } # rubocop:disable Style/SafeNavigationChainLength
         return nil unless last_msg
@@ -51,6 +52,7 @@ module RubynCode
         Tools::Registry.all.select { |t| PLAN_MODE_RISK_LEVELS.include?(t::RISK_LEVEL) }.map(&:to_schema)
       end
+      # -- tool dispatch with budget + signals
       def process_tool_calls(tool_calls)
         aggregate_chars = 0
         budget = Config::Defaults::MAX_MESSAGE_TOOL_RESULTS_CHARS
@@ -62,6 +64,7 @@ module RubynCode
           notify_tool_result(field(tool_call, :name), result, is_error)
           record_tool_result(tool_call, result, is_error)
         end
+        @decision_compactor&.signal_edit_batch_complete!
       end
       def run_single_tool(tool_call)
@@ -114,15 +117,34 @@ module RubynCode
       def execute_tool(tool_name, tool_input)
         discover_tool(tool_name)
         @hook_runner.fire(:pre_tool_use, tool_name: tool_name, tool_input: tool_input)
-        result = @tool_executor.execute(tool_name, symbolize_keys(tool_input))
+        result = dispatch_tool(tool_name, tool_input)
         @hook_runner.fire(:post_tool_use, tool_name: tool_name, tool_input: tool_input, result: result)
         signal_decision_compactor(tool_name, tool_input, result)
         [result.to_s, false]
+      rescue RubynCode::UserDeniedError => e
+        # User refused this call via the IDE. Surface as is_error so the model
+        # knows the tool did not run, not that it ran and returned text.
+        [e.message, true]
       rescue StandardError => e
         ["Error executing #{tool_name}: #{e.message}", true]
       end
-      def signal_decision_compactor(tool_name, tool_input, result) # rubocop:disable Metrics/CyclomaticComplexity -- tool dispatch
+      # Run the tool through @tool_wrapper if one is configured (IDE mode),
+      # otherwise call the executor directly. The wrapper receives the raw
+      # tool name/input so it can emit protocol notifications and gate the
+      # call; the block below is what actually performs the work.
+      def dispatch_tool(tool_name, tool_input)
+        if @tool_wrapper
+          @tool_wrapper.call(tool_name, tool_input) do
+            @tool_executor.execute(tool_name, symbolize_keys(tool_input))
+          end
+        else
+          @tool_executor.execute(tool_name, symbolize_keys(tool_input))
+        end
+      end
+      # -- tool dispatch
+      def signal_decision_compactor(tool_name, tool_input, result)
         return unless @decision_compactor
         case tool_name

data/lib/rubyn_code/auth/key_encryption.rb ADDED Viewed

@@ -0,0 +1,118 @@
+# frozen_string_literal: true
+require 'openssl'
+require 'base64'
+require 'securerandom'
+require 'etc'
+require 'socket'
+module RubynCode
+  module Auth
+    # Encrypts and decrypts provider API keys at rest using AES-256-GCM.
+    #
+    # The encryption key is derived via PBKDF2 from machine-specific identifiers
+    # (username, hostname, home directory) combined with a random salt stored in
+    # ~/.rubyn-code/.encryption_salt. This means keys are only decryptable on the
+    # same machine by the same user.
+    #
+    # Encrypted values are prefixed with "enc:v1:" so plaintext values from older
+    # versions are transparently migrated on first read.
+    module KeyEncryption
+      CIPHER = 'aes-256-gcm'
+      PREFIX = 'enc:v1:'
+      IV_LENGTH = 12
+      TAG_LENGTH = 16
+      PBKDF2_ITERATIONS = 100_000
+      KEY_LENGTH = 32
+      SALT_LENGTH = 32
+      class << self
+        def encrypt(plaintext)
+          return nil unless plaintext
+          cipher = OpenSSL::Cipher.new(CIPHER).encrypt
+          key = derive_key
+          cipher.key = key
+          iv = cipher.random_iv
+          ciphertext = cipher.update(plaintext) + cipher.final
+          tag = cipher.auth_tag(TAG_LENGTH)
+          encoded = Base64.strict_encode64(iv + ciphertext + tag)
+          "#{PREFIX}#{encoded}"
+        end
+        def decrypt(value)
+          return nil unless value
+          return value unless encrypted?(value)
+          raw = Base64.strict_decode64(value.delete_prefix(PREFIX))
+          decrypt_raw(raw)
+        rescue OpenSSL::Cipher::CipherError, ArgumentError
+          nil
+        end
+        def encrypted?(value)
+          value.is_a?(String) && value.start_with?(PREFIX)
+        end
+        private
+        def decrypt_raw(raw)
+          iv = raw[0, IV_LENGTH]
+          tag = raw[-TAG_LENGTH, TAG_LENGTH]
+          ciphertext = raw[IV_LENGTH...-TAG_LENGTH]
+          cipher = OpenSSL::Cipher.new(CIPHER).decrypt
+          cipher.key = derive_key
+          cipher.iv = iv
+          cipher.auth_tag = tag
+          (cipher.update(ciphertext) + cipher.final).force_encoding('UTF-8')
+        end
+        def derive_key
+          OpenSSL::KDF.pbkdf2_hmac(
+            machine_identity,
+            salt: load_or_create_salt,
+            iterations: PBKDF2_ITERATIONS,
+            length: KEY_LENGTH,
+            hash: 'SHA256'
+          )
+        end
+        def machine_identity
+          # Use the real UID's login name rather than Etc.getlogin. Etc.getlogin
+          # reads the controlling tty's owner and can return "root" when the tty
+          # is root-owned (common after `sudo`, and in some VSCode integrated
+          # terminal setups) — even though the process itself is running as the
+          # real user. That mismatch derives a different AES key on decrypt vs.
+          # encrypt and the AEAD tag check fails, which surfaces as a misleading
+          # "No <provider> API key configured" error.
+          user = begin
+            Etc.getpwuid(Process.uid).name
+          rescue StandardError
+            ENV['USER'] || Etc.getlogin || 'unknown'
+          end
+          [user, Socket.gethostname, Dir.home].join(':')
+        end
+        def load_or_create_salt
+          path = salt_path
+          if File.exist?(path)
+            File.binread(path)
+          else
+            salt = SecureRandom.random_bytes(SALT_LENGTH)
+            FileUtils.mkdir_p(File.dirname(path), mode: 0o700)
+            File.binwrite(path, salt)
+            File.chmod(0o600, path)
+            salt
+          end
+        end
+        def salt_path
+          File.join(Config::Defaults::HOME_DIR, '.encryption_salt')
+        end
+      end
+    end
+  end
+end

data/lib/rubyn_code/auth/token_store.rb CHANGED Viewed

@@ -7,7 +7,7 @@ require 'time'
 module RubynCode
   module Auth
-    module TokenStore
+    module TokenStore # rubocop:disable Metrics/ModuleLength -- single-responsibility credential store
       EXPIRY_BUFFER_SECONDS = 300 # 5 minutes
       KEYCHAIN_SERVICE = 'Claude Code-credentials'
@@ -21,25 +21,44 @@ module RubynCode
         end
         # Load API key for a given provider. Anthropic uses the full fallback chain.
+        # Other providers: stored key → env var.
         def load_for_provider(provider)
           return load if provider == 'anthropic'
+          stored = load_provider_key(provider)
+          return { access_token: stored, type: :api_key, source: :stored } if stored
           env_key = resolve_env_key(provider)
           api_key = ENV.fetch(env_key, nil)
           api_key&.empty? == false ? { access_token: api_key, type: :api_key, source: :env } : nil
         end
-        def save(access_token:, refresh_token:, expires_at:)
+        # Store an API key for a provider in tokens.yml (encrypted at rest).
+        def save_provider_key(provider, key)
           ensure_directory!
+          data = load_tokens_file || {}
+          data['provider_keys'] ||= {}
+          data['provider_keys'][provider.to_s] = KeyEncryption.encrypt(key)
+          write_tokens_file(data)
+        end
-          data = {
-            'access_token' => access_token,
-            'refresh_token' => refresh_token,
-            'expires_at' => expires_at.is_a?(Time) ? expires_at.iso8601 : expires_at.to_s
-          }
+        # Retrieve a stored API key for a provider (decrypted transparently).
+        def load_provider_key(provider)
+          data = load_tokens_file
+          value = data&.dig('provider_keys', provider.to_s)
+          return nil unless value
-          File.write(tokens_path, YAML.dump(data))
-          File.chmod(0o600, tokens_path)
+          migrate_plaintext_key!(data, provider, value) unless KeyEncryption.encrypted?(value)
+          KeyEncryption.decrypt(value)
+        end
+        def save(access_token:, refresh_token:, expires_at:)
+          ensure_directory!
+          data = load_tokens_file || {}
+          data['access_token'] = access_token
+          data['refresh_token'] = refresh_token
+          data['expires_at'] = expires_at.is_a?(Time) ? expires_at.iso8601 : expires_at.to_s
+          write_tokens_file(data)
           data
         end
@@ -118,6 +137,28 @@ module RubynCode
           { access_token: api_key, refresh_token: nil, expires_at: nil, type: :api_key, source: :env }
         end
+        def write_tokens_file(data)
+          File.write(tokens_path, YAML.dump(data))
+          File.chmod(0o600, tokens_path)
+        end
+        # Auto-encrypt a plaintext key from a pre-encryption install.
+        def migrate_plaintext_key!(data, provider, plaintext)
+          data['provider_keys'][provider.to_s] = KeyEncryption.encrypt(plaintext)
+          write_tokens_file(data)
+        rescue StandardError
+          nil # don't break reads if migration fails
+        end
+        def load_tokens_file
+          return nil unless File.exist?(tokens_path)
+          data = YAML.safe_load_file(tokens_path, permitted_classes: [Time])
+          data.is_a?(Hash) ? data : nil
+        rescue Psych::SyntaxError, Errno::EACCES
+          nil
+        end
         def tokens_path = Config::Defaults::TOKENS_FILE
         def ensure_directory!

data/lib/rubyn_code/autonomous/daemon.rb CHANGED Viewed

@@ -14,8 +14,9 @@ module RubynCode
     #
     # Unlike the REPL, the daemon runs a full Agent::Loop per task — meaning
     # it can read files, write code, run specs, and use every tool available.
-    class Daemon
+    class Daemon # rubocop:disable Metrics/ClassLength -- daemon lifecycle + retry + audit + cost
       LIFECYCLE_STATES = %i[spawned working idle shutting_down stopped].freeze
+      MAX_TASK_RETRIES = 3
       attr_reader :agent_name, :role, :state, :runs_completed, :total_cost
@@ -32,14 +33,17 @@ module RubynCode
       # @param on_state_change [Proc, nil] callback invoked with (old_state, new_state)
       # @param on_task_complete [Proc, nil] callback invoked with (task, result_text)
       # @param on_task_error [Proc, nil] callback invoked with (task, error)
+      # @param session_persistence [Memory::SessionPersistence, nil] optional audit trail persistence
       def initialize( # rubocop:disable Metrics/ParameterLists
         agent_name:, role:, llm_client:, project_root:, task_manager:, mailbox:,
         max_runs: 100, max_cost: 10.0, poll_interval: 5, idle_timeout: 60,
-        on_state_change: nil, on_task_complete: nil, on_task_error: nil
+        on_state_change: nil, on_task_complete: nil, on_task_error: nil,
+        session_persistence: nil
       )
         assign_core_attrs(agent_name:, role:, llm_client:, project_root:, task_manager:, mailbox:)
         assign_limits(max_runs:, max_cost:, poll_interval:, idle_timeout:)
         assign_callbacks_and_state(on_state_change, on_task_complete, on_task_error)
+        @session_persistence = session_persistence
       end
       # Enters the work-idle-work cycle. Blocks the calling thread until
@@ -153,16 +157,18 @@ module RubynCode
         agent_loop = build_agent_loop
         result_text = agent_loop.send_message(build_work_prompt(task))
-        # Accumulate cost from the budget enforcer
-        track_cost_from_enforcer(agent_loop)
+        # Accumulate cost via CostCalculator using actual token counts
+        track_cost_from_context_manager(agent_loop)
         # Mark the task as completed with the agent's result.
         @task_manager.complete(task.id, result: result_text)
+        # Persist conversation as an audit trail
+        persist_session_audit(task, agent_loop)
         @on_task_complete&.call(task, result_text)
       rescue StandardError => e
-        # On failure, release the task so another agent (or retry) can pick it up.
-        @task_manager.update(task.id, status: 'pending', owner: nil, result: "Error: #{e.message}")
-        @on_task_error&.call(task, e)
+        handle_task_error(task, e)
       end
       # Builds a fresh Agent::Loop wired with all the real tools.
@@ -192,19 +198,116 @@ module RubynCode
         )
       end
-      # Accumulates cost tracked by the Agent::Loop's context manager.
+      # Computes USD cost from the context manager's token counts using
+      # Observability::CostCalculator. The old approach checked for a
+      # `total_cost` method that never existed on Context::Manager, so
+      # @total_cost was always 0.0 and the max_cost safety limit never fired.
       #
       # @param agent_loop [Agent::Loop]
       # @return [void]
-      def track_cost_from_enforcer(agent_loop)
-        # The context manager tracks token usage; we extract cost if available.
-        # This is best-effort — the daemon's own total_cost is an approximation.
+      def track_cost_from_context_manager(agent_loop)
         cm = agent_loop.instance_variable_get(:@context_manager)
-        return unless cm.respond_to?(:total_cost)
+        return unless cm
+        tokens = extract_token_counts(cm)
+        return if tokens.values.all?(&:zero?)
+        model = @llm_client.respond_to?(:model) ? @llm_client.model : 'claude-sonnet-4-6'
+        @total_cost += Observability::CostCalculator.calculate(model: model, **tokens)
+      rescue StandardError
+        # Non-critical — cost tracking is best-effort
+      end
+      # @param context_mgr [Context::Manager]
+      # @return [Hash] :input_tokens, :output_tokens
+      def extract_token_counts(context_mgr)
+        {
+          input_tokens: context_mgr.respond_to?(:total_input_tokens) ? context_mgr.total_input_tokens.to_i : 0,
+          output_tokens: context_mgr.respond_to?(:total_output_tokens) ? context_mgr.total_output_tokens.to_i : 0
+        }
+      end
+      # Handles a task error with retry backoff. Increments the retry count
+      # in the task's metadata. After MAX_TASK_RETRIES, marks the task as
+      # failed instead of releasing it back to pending.
+      #
+      # @param task [Tasks::Task]
+      # @param error [StandardError]
+      # @return [void]
+      def handle_task_error(task, error)
+        retry_count = extract_retry_count(task) + 1
+        metadata = build_retry_metadata(task, retry_count)
+        if retry_count >= MAX_TASK_RETRIES
+          @task_manager.update(
+            task.id,
+            status: 'failed',
+            owner: nil,
+            result: "Failed after #{retry_count} retries. Last error: #{error.message}",
+            metadata: JSON.generate(metadata)
+          )
+        else
+          @task_manager.update(
+            task.id,
+            status: 'pending',
+            owner: nil,
+            result: "Error (retry #{retry_count}/#{MAX_TASK_RETRIES}): #{error.message}",
+            metadata: JSON.generate(metadata)
+          )
+        end
+        @on_task_error&.call(task, error)
+      end
-        @total_cost += cm.total_cost.to_f
+      # @param task [Tasks::Task]
+      # @return [Integer]
+      def extract_retry_count(task)
+        meta = parse_task_metadata(task)
+        (meta[:retry_count] || meta['retry_count'] || 0).to_i
+      end
+      # @param task [Tasks::Task]
+      # @param retry_count [Integer]
+      # @return [Hash]
+      def build_retry_metadata(task, retry_count)
+        meta = parse_task_metadata(task)
+        meta.merge(retry_count: retry_count)
+      end
+      # @param task [Tasks::Task]
+      # @return [Hash]
+      def parse_task_metadata(task)
+        raw = task.metadata
+        case raw
+        when Hash then raw
+        when String then JSON.parse(raw, symbolize_names: true)
+        else {}
+        end
+      rescue JSON::ParserError
+        {}
+      end
+      # Persists the agent's conversation as a session audit trail after
+      # completing a task, so there's a record of what the daemon did.
+      #
+      # @param task [Tasks::Task]
+      # @param agent_loop [Agent::Loop]
+      # @return [void]
+      def persist_session_audit(task, agent_loop)
+        return unless @session_persistence
+        conversation = agent_loop.instance_variable_get(:@conversation)
+        return unless conversation.respond_to?(:messages)
+        session_id = "daemon-#{@agent_name}-#{task.id}"
+        @session_persistence.save_session(
+          session_id: session_id,
+          project_path: @project_root,
+          messages: conversation.messages,
+          title: "Daemon: #{task.title}",
+          metadata: { agent_name: @agent_name, task_id: task.id, task_title: task.title }
+        )
       rescue StandardError
-        # Non-critical
+        # Non-critical — audit persistence is best-effort
       end
       # ── Idle phase ───────────────────────────────────────────────

data/lib/rubyn_code/autonomous/idle_poller.rb CHANGED Viewed

@@ -53,26 +53,6 @@ module RubynCode
         @interrupted = true
       end
-      # Re-injects the agent's identity message when the conversation
-      # context has been compressed (i.e. the messages array is very short).
-      # This ensures the agent still knows who it is after compaction.
-      #
-      # @param messages [Array<Hash>] the current conversation messages
-      # @param identity [String] the identity/system prompt to re-inject
-      # @param threshold [Integer] message count below which re-injection triggers (default 3)
-      # @return [void]
-      def self.reinject_identity(messages, identity:, threshold: 3)
-        return if messages.length >= threshold
-        return if identity.nil? || identity.empty?
-        # Only re-inject if the identity is not already present as the
-        # first user message.
-        first_user = messages.find { |m| m[:role] == 'user' }
-        return if first_user && first_user[:content].to_s.include?(identity[0, 100])
-        messages.unshift({ role: 'user', content: identity })
-      end
       private
       # @return [Boolean]

data/lib/rubyn_code/autonomous/task_claimer.rb CHANGED Viewed

@@ -6,16 +6,19 @@ module RubynCode
     # Uses optimistic locking to handle race conditions when multiple
     # agents attempt to claim the same task concurrently.
     module TaskClaimer
-      # Finds the first ready (pending, unowned) task, claims it for the
-      # given agent, and returns the updated Task. Returns nil if no work
-      # is available.
+      MAX_RETRIES = 3
+      # Finds the first ready (pending, unowned) task that hasn't exceeded
+      # max retries, claims it for the given agent, and returns the updated
+      # Task. Returns nil if no work is available.
       #
       # @param task_manager [#db, #update_task, #list_tasks] task persistence layer
       # @param agent_name [String] unique identifier of the claiming agent
+      # @param max_retries [Integer] maximum retry count before skipping a task
       # @return [Tasks::Task, nil] the claimed task, or nil if none available
-      def self.call(task_manager:, agent_name:)
+      def self.call(task_manager:, agent_name:, max_retries: MAX_RETRIES)
         db = task_manager.db
-        claim_next_pending_task(db, agent_name)
+        claim_next_pending_task(db, agent_name, max_retries)
         fetch_claimed_task(db, agent_name)
       rescue StandardError => e
         RubynCode.logger.warn("TaskClaimer: failed to claim task: #{e.message}") if RubynCode.respond_to?(:logger)
@@ -25,17 +28,20 @@ module RubynCode
       class << self
         private
-        def claim_next_pending_task(db, agent_name)
-          db.execute(<<~SQL, [agent_name])
+        def claim_next_pending_task(db, agent_name, max_retries)
+          db.execute(<<~SQL, [agent_name, max_retries])
             UPDATE tasks
             SET owner = ?,
                 status = 'in_progress',
                 updated_at = datetime('now')
             WHERE id = (
-              SELECT id FROM tasks
-              WHERE status = 'pending'
-                AND (owner IS NULL OR owner = '')
-              ORDER BY priority DESC, created_at ASC
+              SELECT t.id FROM tasks t
+              WHERE t.status = 'pending'
+                AND (t.owner IS NULL OR t.owner = '')
+                AND COALESCE(
+                  json_extract(t.metadata, '$.retry_count'), 0
+                ) < ?
+              ORDER BY t.priority DESC, t.created_at ASC
               LIMIT 1
             )
             AND status = 'pending'