RubyGems - ruby_llm-agents - Versions diffs - 3.13.0 → 3.14.0 - Mend

ruby_llm-agents 3.13.0 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/app/controllers/ruby_llm/agents/analytics_controller.rb +8 -0
data/app/controllers/ruby_llm/agents/executions_controller.rb +3 -2
data/app/controllers/ruby_llm/agents/tenants_controller.rb +8 -2
data/app/models/ruby_llm/agents/execution.rb +12 -2
data/app/models/ruby_llm/agents/tenant.rb +30 -2
data/app/views/ruby_llm/agents/tenants/_form.html.erb +16 -7
data/lib/ruby_llm/agents/base_agent.rb +189 -21
data/lib/ruby_llm/agents/core/configuration.rb +8 -0
data/lib/ruby_llm/agents/core/llm_tenant.rb +40 -0
data/lib/ruby_llm/agents/core/version.rb +1 -1
data/lib/ruby_llm/agents/infrastructure/execution_logger_job.rb +4 -2
data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +52 -1
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 5fc64ee9b7db541e40c144d3b8007c0967f7ad63a7b5c404079d7c4c3398a646
-  data.tar.gz: 1c5b0d4c8c55390cfb5bee3ad394b47f4cae44dbd0452c5135c6ad7e7ac66a0b
+  metadata.gz: d974a549e2d99bbcd16c8345547d16c16aff7ae602da503974d9db1f32be8ce6
+  data.tar.gz: 9947da35f39521706e7c7bef349dcf9f6a63f5f732409ff1774ce0607bd6429e
 SHA512:
-  metadata.gz: 9c187e97882d3b4cf91dc98db12004d809db61a04bf95463aa32f1fa64b53a54d7f0800dfe105e0e6c5bf541b79bcdbe8f3d7537767b9c0d9814095f8fe6889d
-  data.tar.gz: 63b31bfcfcd27acaded706b78cfa96db5825e98ebd6f4a7fe4b2786ed0f15d048c706fdd157c4364618388816ab739a8c0dac34c773bcfaf0f67d9dc90397766
+  metadata.gz: 8aa785d541d2e00d237a71d5a652643e7e2da83750f8264d754c00fce6c2e33fc7d6d47caa75c1ac94659fd05703a9c488a229064cd1d01157b6845e4ef3970e
+  data.tar.gz: 89bf72286b0a6e70418268457beea87df58bb16514a170730518adeadae84eac6d6ff6553e9726e481b00c2358ad9d909c9f20170bcf41224eb7efdaf3d7f1ac

data/README.md CHANGED Viewed

@@ -317,7 +317,7 @@ mount RubyLLM::Agents::Engine => "/agents"
 - **Ruby** >= 3.1.0
 - **Rails** >= 7.0
-- **RubyLLM** >= 1.12.0
+- **RubyLLM** >= 1.16.0
 ## Contributing

data/app/controllers/ruby_llm/agents/analytics_controller.rb CHANGED Viewed

@@ -35,6 +35,7 @@ module RubyLLM
         @days = range_to_days(@selected_range)
         parse_custom_dates if @selected_range == "custom"
+        set_active_filters
         current_scope = apply_filters(time_scoped(tenant_scoped_executions))
         prior_scope = apply_filters(prior_period_scope(tenant_scoped_executions))
@@ -114,6 +115,13 @@ module RubyLLM
           []
         end
+        set_active_filters
+      end
+      # Resolves the active agent/model/tenant filters from request params.
+      # Shared by index (which also loads dropdown options) and chart_data
+      # (JSON only) so apply_filters behaves consistently on both endpoints.
+      def set_active_filters
         @filter_agent = params[:agent].presence
         @filter_model = params[:model].presence
         @filter_tenant = params[:filter_tenant].presence

data/app/controllers/ruby_llm/agents/executions_controller.rb CHANGED Viewed

@@ -214,8 +214,9 @@ module RubyLLM
         # Only show root executions - children are nested under parents
         scope = scope.where(parent_execution_id: nil)
-        # Eager load children for grouping
-        scope.includes(:child_executions)
+        # Eager load children for grouping and detail for error_message, which
+        # the list renders per row (otherwise an N+1 on error rows).
+        scope.includes(:child_executions, :detail)
       end
       # Checks whether turbo-rails is available in the host application

data/app/controllers/ruby_llm/agents/tenants_controller.rb CHANGED Viewed

@@ -19,7 +19,9 @@ module RubyLLM
       # @return [void]
       def index
         @sort_params = parse_tenant_sort_params
-        scope = TenantBudget.all
+        # Eager-load tenant_record so display_name's live name resolution does
+        # not issue a query per row.
+        scope = TenantBudget.all.includes(:tenant_record)
         if params[:q].present?
           @search_query = params[:q].to_s.strip
@@ -67,7 +69,11 @@ module RubyLLM
       # @return [void]
       def update
         @tenant = TenantBudget.find(params[:id])
-        if @tenant.update(tenant_params)
+        attrs = tenant_params
+        # Linked tenants derive their name live from the host record, so ignore
+        # any submitted name — it would be overwritten on the next record sync.
+        attrs = attrs.except(:name) if @tenant.linked?
+        if @tenant.update(attrs)
           redirect_to tenant_path(@tenant), notice: "Tenant updated successfully"
         else
           render :edit, status: :unprocessable_entity

data/app/models/ruby_llm/agents/execution.rb CHANGED Viewed

@@ -107,7 +107,13 @@ module RubyLLM
       validates :finish_reason, inclusion: {in: FINISH_REASONS}, allow_nil: true
       before_save :calculate_total_tokens, if: -> { input_tokens_changed? || output_tokens_changed? }
-      before_save :calculate_total_cost, if: -> { input_cost_changed? || output_cost_changed? }
+      # Derive total_cost from its components only when the caller did not set an
+      # explicit total in the same save. The pipeline records a
+      # cache/reasoning-aware total alongside input_cost/output_cost, and that
+      # richer value must not be overwritten with the text-only input+output sum.
+      # (Deriving from metadata is unsafe — metadata merges user-supplied agent
+      # data and a colliding key would corrupt the total.)
+      before_save :calculate_total_cost, if: -> { (input_cost_changed? || output_cost_changed?) && !total_cost_changed? }
       # Aggregates costs from all attempts using each attempt's model pricing
       #
@@ -474,7 +480,11 @@ module RubyLLM
         self.total_tokens = (input_tokens || 0) + (output_tokens || 0)
       end
-      # Calculates and sets total_cost from input and output costs
+      # Calculates and sets total_cost from input and output costs.
+      #
+      # Only runs when the caller did not provide an explicit total_cost (see
+      # the before_save guard), so a cache/reasoning-aware total supplied by the
+      # pipeline is preserved rather than collapsed to the text-only sum.
       #
       # @return [BigDecimal] The calculated total
       def calculate_total_cost

data/app/models/ruby_llm/agents/tenant.rb CHANGED Viewed

@@ -145,11 +145,17 @@ module RubyLLM
         alias_method :for_tenant!, :for!
       end
-      # Display name (name or tenant_id fallback)
+      # Display name.
+      #
+      # For tenants linked to a host model (Account, Organization, ...) the name
+      # is resolved live from that record, so a renamed record is reflected
+      # immediately instead of showing the snapshot taken when the tenant was
+      # created. Unlinked (string-id) tenants fall back to the stored name
+      # column, and tenant_id is the final fallback so this is never blank.
       #
       # @return [String]
       def display_name
-        name.presence || tenant_id
+        linked_record_name.presence || name.presence || tenant_id
       end
       # Check if tenant is linked to a user model
@@ -179,6 +185,28 @@ module RubyLLM
       def activate!
         update!(active: true)
       end
+      private
+      # Live display name from the linked host record (Account/Organization),
+      # or nil when this tenant is unlinked or the record is unavailable.
+      # Prefers the model's llm_tenant_name (which honours the configured name
+      # method), falling back to a plain #name. Never raises — name resolution
+      # must not break rendering.
+      #
+      # @return [String, nil]
+      def linked_record_name
+        record = tenant_record
+        return nil unless record
+        if record.respond_to?(:llm_tenant_name)
+          record.llm_tenant_name
+        elsif record.respond_to?(:name)
+          record.name
+        end
+      rescue
+        nil
+      end
     end
   end
 end

data/app/views/ruby_llm/agents/tenants/_form.html.erb CHANGED Viewed

@@ -17,13 +17,22 @@
     <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
       <div>
-        <%= f.label :name, "display name", class: "block text-xs font-mono text-gray-500 dark:text-gray-400 mb-1" %>
-        <%= f.text_field :name,
-            class: "w-full px-3 py-2 bg-transparent border border-gray-200 dark:border-gray-800 rounded text-sm font-mono text-gray-900 dark:text-gray-100 focus:ring-1 focus:ring-gray-400 dark:focus:ring-gray-600 focus:border-gray-400 dark:focus:border-gray-600 placeholder-gray-400 dark:placeholder-gray-600",
-            placeholder: "e.g., Acme Corporation" %>
-        <p class="mt-1 text-[10px] font-mono text-gray-400 dark:text-gray-600">
-          Falls back to tenant ID if not set.
-        </p>
+        <label class="block text-xs font-mono text-gray-500 dark:text-gray-400 mb-1">display name</label>
+        <% if tenant.linked? %>
+          <p class="w-full px-3 py-2 bg-gray-50 dark:bg-gray-900 border border-gray-200 dark:border-gray-800 rounded text-sm font-mono text-gray-500 dark:text-gray-400">
+            <%= tenant.display_name %>
+          </p>
+          <p class="mt-1 text-[10px] font-mono text-gray-400 dark:text-gray-600">
+            Managed by the linked <%= tenant.tenant_record_type %>. Renaming it updates this automatically.
+          </p>
+        <% else %>
+          <%= f.text_field :name,
+              class: "w-full px-3 py-2 bg-transparent border border-gray-200 dark:border-gray-800 rounded text-sm font-mono text-gray-900 dark:text-gray-100 focus:ring-1 focus:ring-gray-400 dark:focus:ring-gray-600 focus:border-gray-400 dark:focus:border-gray-600 placeholder-gray-400 dark:placeholder-gray-600",
+              placeholder: "e.g., Acme Corporation" %>
+          <p class="mt-1 text-[10px] font-mono text-gray-400 dark:text-gray-600">
+            Falls back to tenant ID if not set.
+          </p>
+        <% end %>
       </div>
       <div>

data/lib/ruby_llm/agents/base_agent.rb CHANGED Viewed

@@ -262,6 +262,24 @@ module RubyLLM
           @tools || (superclass.respond_to?(:tools) ? superclass.tools : [])
         end
+        # Sets or returns how this agent runs multiple tool calls returned in
+        # a single LLM response.
+        #
+        # Mirrors RubyLLM's tool_concurrency: +false+ runs them sequentially,
+        # +true+ or +:threads+ runs them in Ruby threads, and +:fibers+ runs
+        # them in fibers (requires the async gem). When unset, the agent
+        # inherits its superclass value and ultimately the global
+        # RubyLLM tool_concurrency configuration.
+        #
+        # @param value [Boolean, Symbol] Concurrency mode (omit to read)
+        # @return [Boolean, Symbol, nil] Configured mode, or nil when unset
+        def tool_concurrency(*value)
+          @tool_concurrency = value.first unless value.empty?
+          return @tool_concurrency if instance_variable_defined?(:@tool_concurrency)
+          superclass.respond_to?(:tool_concurrency) ? superclass.tool_concurrency : nil
+        end
         # @!endgroup
         # @!group Temperature DSL
@@ -738,6 +756,7 @@ module RubyLLM
       def execute(context)
         @context = context
         client = build_client(context)
+        @client = client
         # Make context available to Tool instances during tool execution
         previous_context = Thread.current[:ruby_llm_agents_caller_context]
@@ -788,7 +807,16 @@ module RubyLLM
         end
         client = client.with_schema(schema) if schema
-        client = client.with_tools(*resolved_tools) if resolved_tools.any?
+        if resolved_tools.any?
+          # Only pass concurrency when the agent overrides it; otherwise let
+          # RubyLLM apply its globally configured tool_concurrency default.
+          concurrency = self.class.tool_concurrency
+          client = if concurrency.nil?
+            client.with_tools(*resolved_tools)
+          else
+            client.with_tools(*resolved_tools, concurrency: concurrency)
+          end
+        end
         apply_tool_prompt_caching(client) if use_prompt_caching && resolved_tools.any?
         client = setup_tool_tracking(client) if resolved_tools.any?
         client = apply_messages(client, resolved_messages) if resolved_messages.any?
@@ -891,35 +919,80 @@ module RubyLLM
       # Captures response metadata to the context
       #
-      # @param response [RubyLLM::Message] The response
+      # When a tool returns RubyLLM::Tool::Halt, the response is a Halt
+      # instance with no token metadata. In that case we pull metadata from
+      # the last assistant message in the client's history.
+      #
+      # @param response [RubyLLM::Message, RubyLLM::Tool::Halt] The response
       # @param context [Pipeline::Context] The context
       def capture_response(response, context)
-        context.input_tokens = response.input_tokens
-        context.output_tokens = response.output_tokens
-        context.model_used = response.model_id || model
-        # finish_reason may not be available on all RubyLLM::Message versions
-        context.finish_reason = response.respond_to?(:finish_reason) ? response.finish_reason : nil
+        is_halt = response.is_a?(RubyLLM::Tool::Halt)
+        metadata = is_halt ? last_assistant_message_from_client : response
-        # Store tracked tool calls in context for instrumentation
-        context[:tool_calls] = @tracked_tool_calls if @tracked_tool_calls.any?
+        if metadata
+          context.input_tokens = metadata.input_tokens if metadata.respond_to?(:input_tokens)
+          context.output_tokens = metadata.output_tokens if metadata.respond_to?(:output_tokens)
+          context.model_used = (metadata.respond_to?(:model_id) && metadata.model_id) || model
-        # Capture Anthropic prompt caching metrics
-        if response.respond_to?(:cached_tokens) && response.cached_tokens&.positive?
-          context[:cached_tokens] = response.cached_tokens
+          # Capture Anthropic prompt caching metrics
+          if metadata.respond_to?(:cached_tokens) && metadata.cached_tokens&.positive?
+            context[:cached_tokens] = metadata.cached_tokens
+          end
+          if metadata.respond_to?(:cache_creation_tokens) && metadata.cache_creation_tokens&.positive?
+            context[:cache_creation_tokens] = metadata.cache_creation_tokens
+          end
+        else
+          context.model_used = model
         end
-        if response.respond_to?(:cache_creation_tokens) && response.cache_creation_tokens&.positive?
-          context[:cache_creation_tokens] = response.cache_creation_tokens
+        context.finish_reason = if is_halt
+          "halt"
+        elsif response.respond_to?(:finish_reason)
+          response.finish_reason
         end
-        calculate_costs(response, context) if context.input_tokens
+        # Store tracked tool calls in context for instrumentation
+        context[:tool_calls] = @tracked_tool_calls if @tracked_tool_calls.any?
+        calculate_costs(metadata, context) if metadata && context.input_tokens
       end
-      # Calculates costs for the response
+      # Finds the most recent assistant message with usage metadata in
+      # the active client's history. Used to recover token/model metadata
+      # when the LLM call short-circuits via Tool::Halt.
+      #
+      # @return [RubyLLM::Message, nil]
+      def last_assistant_message_from_client
+        messages = @client&.messages
+        return nil unless messages
+        messages.reverse_each.find do |m|
+          m.respond_to?(:role) && m.role == :assistant &&
+            m.respond_to?(:input_tokens) && m.input_tokens
+        end
+      end
+      # Calculates costs for the response.
+      #
+      # Providers often return dated model variants (e.g.
+      # "anthropic/claude-4.6-sonnet-20260217") that aren't in the
+      # RubyLLM::Models registry, while the agent is configured with a
+      # stable alias (e.g. "anthropic/claude-sonnet-4.6") that is. When the
+      # response's model_id misses, fall back to the agent's configured
+      # model so cost calculation still finds pricing.
+      #
+      # Text input/output are priced from the context's token counts. These
+      # reflect the final attempt's usage (a retry/fallback overwrites them per
+      # attempt); failed attempts that erred at the provider are typically not
+      # billed, so the final attempt is the charged one. On top of the text
+      # cost, cache reads/writes and reasoning tokens — which exist on the
+      # response and are billed at their own rates — are priced via RubyLLM's
+      # first-class cost helper (RubyLLM::Cost) and added in.
       #
       # @param response [RubyLLM::Message] The response
       # @param context [Pipeline::Context] The context
       def calculate_costs(response, context)
-        model_info = find_model_info(response.model_id || model)
+        model_info = find_model_info(response.model_id) || find_model_info(model)
         return unless model_info
         input_tokens = context.input_tokens || 0
@@ -929,16 +1002,111 @@ module RubyLLM
         output_price = model_info.pricing&.text_tokens&.output || 0
         context.input_cost = (input_tokens / 1_000_000.0) * input_price
-        context.output_cost = (output_tokens / 1_000_000.0) * output_price
-        context.total_cost = (context.input_cost + context.output_cost).round(6)
+        # Price cache/reasoning extras first so we know whether reasoning was
+        # actually billed at the reasoning rate. Only then exclude those tokens
+        # from the output charge — never subtract tokens that weren't charged
+        # elsewhere, or a degraded cost helper would make reasoning vanish.
+        extra = extra_token_costs(response, model_info, context)
+        billable_output = output_tokens - reasoning_tokens_charged(response, context)
+        context.output_cost = ([billable_output, 0].max / 1_000_000.0) * output_price
+        context.total_cost = (context.input_cost + context.output_cost + extra).round(6)
+      end
+      # Number of reasoning (thinking) tokens that were actually charged at the
+      # reasoning rate, recorded in the cost breakdown by +extra_token_costs+.
+      #
+      # Reasoning providers fold reasoning tokens into the reported
+      # output_tokens, so when they are billed separately they must be removed
+      # from the output-rate charge to avoid double billing. Returns 0 when no
+      # reasoning was charged (non-reasoning model, or a degraded cost helper),
+      # so reasoning tokens are never silently dropped from the output charge.
+      #
+      # @param response [Object] The response (RubyLLM::Message in production)
+      # @param context [Pipeline::Context] The context
+      # @return [Integer] Reasoning tokens to exclude from the output charge
+      def reasoning_tokens_charged(response, context)
+        breakdown = context[:cost_breakdown]
+        return 0 unless breakdown.is_a?(Hash) && breakdown.key?(:thinking)
+        return 0 unless response.respond_to?(:reasoning_tokens)
+        response.reasoning_tokens.to_i
+      end
+      # Prices the non-text token components (cache reads/writes, reasoning)
+      # that RubyLLM::Cost exposes on a response, records them in metadata for
+      # visibility, and returns their sum to add on top of text input/output.
+      #
+      # Returns 0.0 for responses that don't expose cost (plain structs/mocks)
+      # or when the registry lacks the relevant prices, so cache/reasoning
+      # accuracy is additive and never regresses text pricing.
+      #
+      # @param response [Object] The response (RubyLLM::Message in production)
+      # @param model_info [RubyLLM::Model::Info] Resolved pricing source
+      # @param context [Pipeline::Context] The context
+      # @return [Float] Combined cache + reasoning cost, or 0.0
+      def extra_token_costs(response, model_info, context)
+        cost = response_cost(response, model_info)
+        return 0.0 unless cost
+        components = {
+          cache_read: cost.cache_read,
+          cache_write: cost.cache_write,
+          thinking: cost.thinking
+        }.compact.reject { |_, value| value.zero? }
+        return 0.0 if components.empty?
+        # Round per component and sum the rounded values so the stored
+        # breakdown reconciles exactly with the amount added to total_cost.
+        breakdown = components.transform_values { |value| value.round(6) }
+        context[:cost_breakdown] = breakdown
+        breakdown.values.sum
+      rescue => e
+        # Non-standard pricing shapes can't price these components; degrade to
+        # text-only rather than failing the cost calculation.
+        log_cost_warning("extra_token_costs", e)
+        0.0
+      end
+      # Returns a RubyLLM::Cost for the response, priced against the resolved
+      # model_info (which may differ from the response's own dated model
+      # variant). Returns nil for responses that don't expose cost — e.g.
+      # simple structs/mocks in tests — so callers skip the extra components.
+      #
+      # @param response [Object] The response (RubyLLM::Message in production)
+      # @param model_info [RubyLLM::Model::Info] Resolved pricing source
+      # @return [RubyLLM::Cost, nil]
+      def response_cost(response, model_info)
+        return nil unless response.respond_to?(:cost)
+        response.cost(model: model_info)
+      rescue => e
+        log_cost_warning("response_cost", e)
+        nil
+      end
+      # Leaves a debug breadcrumb for a swallowed cost-calculation error.
+      # Cost components are best-effort, so we degrade gracefully rather than
+      # raise, but record why instead of failing silently. Logging itself must
+      # never break cost handling.
+      #
+      # @param source [String] The method that degraded
+      # @param error [Exception] The swallowed error
+      def log_cost_warning(source, error)
+        return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
+        Rails.logger.debug("[RubyLLM::Agents] #{source} skipped: #{error.class}: #{error.message}")
+      rescue
+        nil
       end
-      # Finds model pricing info
+      # Finds model pricing info.
       #
       # @param model_id [String] The model ID
       # @return [Hash, nil] Model info with pricing
       def find_model_info(model_id)
-        return nil unless defined?(RubyLLM::Models)
+        return nil unless defined?(RubyLLM::Models) && model_id
         RubyLLM::Models.find(model_id)
       rescue

data/lib/ruby_llm/agents/core/configuration.rb CHANGED Viewed

@@ -373,10 +373,18 @@ module RubyLLM
         gemini_api_base
         gpustack_api_base
         ollama_api_base
+        bedrock_api_base
+        mistral_api_base
+        perplexity_api_base
+        vertexai_api_base
         vertexai_project_id
         vertexai_location
+        xai_api_base
         request_timeout
         max_retries
+        faraday_adapter
+        deprecation_behavior
+        tool_concurrency
       ].freeze
       FORWARDED_RUBY_LLM_ATTRIBUTES.each do |attr|

data/lib/ruby_llm/agents/core/llm_tenant.rb CHANGED Viewed

@@ -113,6 +113,11 @@ module RubyLLM
           # Auto-create tenant record callback
           after_create :create_default_llm_tenant if llm_tenant_options[:budget]
+          # Keep the denormalized Tenant#name column fresh so the dashboard's
+          # SQL search/sort by name keeps working for linked tenants. Display
+          # already resolves the name live, so this only powers SQL.
+          after_update :sync_llm_tenant_name
         end
         private
@@ -143,6 +148,17 @@ module RubyLLM
         send(id_method).to_s
       end
+      # Returns this model's tenant display name, resolved live from the
+      # configured name method (`llm_tenant name: :company_name`). Resolving on
+      # read means the tenant always reflects the current value instead of the
+      # snapshot taken when its Tenant record was first created.
+      #
+      # @return [String] The current display name
+      def llm_tenant_name
+        name_method = self.class.llm_tenant_options[:name] || :to_s
+        send(name_method).to_s
+      end
       # Returns API keys resolved from the DSL configuration
       #
       # Maps provider names (e.g., :openai, :anthropic) to their resolved values
@@ -354,6 +370,30 @@ module RubyLLM
         tenant.tenant_record = self
         tenant.save!
       end
+      # Pushes the current name into the linked Tenant row when the source
+      # column changed, keeping the denormalized copy fresh for the dashboard's
+      # SQL search/sort. Display already resolves live, so this is best-effort
+      # and never raises. Only runs when the name is backed by a column we can
+      # detect a change on (method-based names are skipped — display stays
+      # correct via live resolution, only SQL search/sort may lag for those).
+      #
+      # @return [void]
+      def sync_llm_tenant_name
+        name_method = self.class.llm_tenant_options[:name]
+        return unless name_method
+        change_predicate = "saved_change_to_#{name_method}?"
+        return unless respond_to?(change_predicate) && public_send(change_predicate)
+        record = llm_tenant_record
+        return unless record&.persisted?
+        return if record.read_attribute(:name) == llm_tenant_name
+        record.update_column(:name, llm_tenant_name)
+      rescue
+        nil
+      end
     end
   end
 end

data/lib/ruby_llm/agents/core/version.rb CHANGED Viewed

@@ -4,6 +4,6 @@ module RubyLLM
   module Agents
     # Current version of the RubyLLM::Agents gem
     # @return [String] Semantic version string
-    VERSION = "3.13.0"
+    VERSION = "3.14.0"
   end
 end

data/lib/ruby_llm/agents/infrastructure/execution_logger_job.rb CHANGED Viewed

@@ -37,8 +37,10 @@ module RubyLLM
           execution.create_detail!(detail_data)
         end
-        # Calculate costs if token data is available
-        if execution.input_tokens && execution.output_tokens
+        # Calculate costs if token data is available. Skip when the pipeline
+        # already supplied an accurate total (RubyLLM::Cost, which prices cache
+        # and reasoning tokens) so we don't downgrade it to text-only pricing.
+        if execution.input_tokens && execution.output_tokens && !execution.total_cost&.positive?
           execution.calculate_costs!
           execution.save!
         end

data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb CHANGED Viewed

@@ -48,7 +48,7 @@ module RubyLLM
               raised_exception = nil
               begin
-                @app.call(context)
+                capture_llm_requests(context) { @app.call(context) }
                 context.completed_at = Time.current
                 begin
@@ -84,6 +84,55 @@ module RubyLLM
           private
+          # Fiber-local stack of in-flight request accumulators, innermost last.
+          REQUEST_CAPTURE_STACK = :ruby_llm_agents_request_capture
+          # Captures real HTTP-level provider latency for the LLM call(s) made
+          # while running the rest of the pipeline.
+          #
+          # ruby_llm 1.16 emits a "request.ruby_llm" event per HTTP request and
+          # its Railtie wires ActiveSupport::Notifications as the instrumenter
+          # in Rails, so we subscribe for the duration of the downstream call
+          # and accumulate provider time and request count (retries/fallbacks
+          # add up). This is distinct from the total pipeline duration, which
+          # also includes middleware and tool execution. The values are stored
+          # in context metadata and persisted with the execution.
+          #
+          # AS::Notifications subscriptions are process-global, so a naive
+          # subscriber would also see events from other executions running
+          # concurrently (other threads) or nested inside this one (agent-as-
+          # tool). To attribute each request to exactly one execution, we keep
+          # a fiber-local stack of accumulators and only credit the innermost
+          # one on the thread that actually emitted the event — the callback
+          # runs synchronously on the emitting thread, so its top-of-stack is
+          # the execution whose LLM call fired.
+          #
+          # @param context [Context] The execution context
+          # @return [Object] The downstream call's return value
+          def capture_llm_requests(context)
+            return yield unless defined?(ActiveSupport::Notifications)
+            accumulator = {ms: 0.0, count: 0}
+            stack = (Thread.current[REQUEST_CAPTURE_STACK] ||= [])
+            stack.push(accumulator)
+            callback = lambda do |_name, started, finished, _id, _payload|
+              top = Thread.current[REQUEST_CAPTURE_STACK]&.last
+              next unless top.equal?(accumulator)
+              accumulator[:ms] += (finished - started) * 1000.0
+              accumulator[:count] += 1
+            end
+            ActiveSupport::Notifications.subscribed(callback, "request.ruby_llm") { yield }
+          ensure
+            stack&.pop
+            if accumulator && accumulator[:count].positive?
+              context[:llm_request_ms] = accumulator[:ms].round
+              context[:llm_request_count] = accumulator[:count]
+            end
+          end
           # Creates initial execution record with 'running' status
           #
           # Creates the record synchronously so it appears on the dashboard immediately.
@@ -339,6 +388,8 @@ module RubyLLM
               cache_hit: context.cached?,
               input_tokens: context.input_tokens || 0,
               output_tokens: context.output_tokens || 0,
+              input_cost: context.input_cost,
+              output_cost: context.output_cost,
               total_cost: context.total_cost || 0,
               attempts_count: context.attempts_made,
               chosen_model_id: context.model_used,

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ruby_llm-agents
 version: !ruby/object:Gem::Version
-  version: 3.13.0
+  version: 3.14.0
 platform: ruby
 authors:
 - adham90
@@ -29,14 +29,14 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.14.1
+        version: 1.16.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.14.1
+        version: 1.16.0
 - !ruby/object:Gem::Dependency
   name: csv
   requirement: !ruby/object:Gem::Requirement