ruby_llm-agents 3.13.0 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5fc64ee9b7db541e40c144d3b8007c0967f7ad63a7b5c404079d7c4c3398a646
4
- data.tar.gz: 1c5b0d4c8c55390cfb5bee3ad394b47f4cae44dbd0452c5135c6ad7e7ac66a0b
3
+ metadata.gz: d974a549e2d99bbcd16c8345547d16c16aff7ae602da503974d9db1f32be8ce6
4
+ data.tar.gz: 9947da35f39521706e7c7bef349dcf9f6a63f5f732409ff1774ce0607bd6429e
5
5
  SHA512:
6
- metadata.gz: 9c187e97882d3b4cf91dc98db12004d809db61a04bf95463aa32f1fa64b53a54d7f0800dfe105e0e6c5bf541b79bcdbe8f3d7537767b9c0d9814095f8fe6889d
7
- data.tar.gz: 63b31bfcfcd27acaded706b78cfa96db5825e98ebd6f4a7fe4b2786ed0f15d048c706fdd157c4364618388816ab739a8c0dac34c773bcfaf0f67d9dc90397766
6
+ metadata.gz: 8aa785d541d2e00d237a71d5a652643e7e2da83750f8264d754c00fce6c2e33fc7d6d47caa75c1ac94659fd05703a9c488a229064cd1d01157b6845e4ef3970e
7
+ data.tar.gz: 89bf72286b0a6e70418268457beea87df58bb16514a170730518adeadae84eac6d6ff6553e9726e481b00c2358ad9d909c9f20170bcf41224eb7efdaf3d7f1ac
data/README.md CHANGED
@@ -317,7 +317,7 @@ mount RubyLLM::Agents::Engine => "/agents"
317
317
 
318
318
  - **Ruby** >= 3.1.0
319
319
  - **Rails** >= 7.0
320
- - **RubyLLM** >= 1.12.0
320
+ - **RubyLLM** >= 1.16.0
321
321
 
322
322
  ## Contributing
323
323
 
@@ -35,6 +35,7 @@ module RubyLLM
35
35
  @days = range_to_days(@selected_range)
36
36
  parse_custom_dates if @selected_range == "custom"
37
37
 
38
+ set_active_filters
38
39
  current_scope = apply_filters(time_scoped(tenant_scoped_executions))
39
40
  prior_scope = apply_filters(prior_period_scope(tenant_scoped_executions))
40
41
 
@@ -114,6 +115,13 @@ module RubyLLM
114
115
  []
115
116
  end
116
117
 
118
+ set_active_filters
119
+ end
120
+
121
+ # Resolves the active agent/model/tenant filters from request params.
122
+ # Shared by index (which also loads dropdown options) and chart_data
123
+ # (JSON only) so apply_filters behaves consistently on both endpoints.
124
+ def set_active_filters
117
125
  @filter_agent = params[:agent].presence
118
126
  @filter_model = params[:model].presence
119
127
  @filter_tenant = params[:filter_tenant].presence
@@ -214,8 +214,9 @@ module RubyLLM
214
214
  # Only show root executions - children are nested under parents
215
215
  scope = scope.where(parent_execution_id: nil)
216
216
 
217
- # Eager load children for grouping
218
- scope.includes(:child_executions)
217
+ # Eager load children for grouping and detail for error_message, which
218
+ # the list renders per row (otherwise an N+1 on error rows).
219
+ scope.includes(:child_executions, :detail)
219
220
  end
220
221
 
221
222
  # Checks whether turbo-rails is available in the host application
@@ -19,7 +19,9 @@ module RubyLLM
19
19
  # @return [void]
20
20
  def index
21
21
  @sort_params = parse_tenant_sort_params
22
- scope = TenantBudget.all
22
+ # Eager-load tenant_record so display_name's live name resolution does
23
+ # not issue a query per row.
24
+ scope = TenantBudget.all.includes(:tenant_record)
23
25
 
24
26
  if params[:q].present?
25
27
  @search_query = params[:q].to_s.strip
@@ -67,7 +69,11 @@ module RubyLLM
67
69
  # @return [void]
68
70
  def update
69
71
  @tenant = TenantBudget.find(params[:id])
70
- if @tenant.update(tenant_params)
72
+ attrs = tenant_params
73
+ # Linked tenants derive their name live from the host record, so ignore
74
+ # any submitted name — it would be overwritten on the next record sync.
75
+ attrs = attrs.except(:name) if @tenant.linked?
76
+ if @tenant.update(attrs)
71
77
  redirect_to tenant_path(@tenant), notice: "Tenant updated successfully"
72
78
  else
73
79
  render :edit, status: :unprocessable_entity
@@ -107,7 +107,13 @@ module RubyLLM
107
107
  validates :finish_reason, inclusion: {in: FINISH_REASONS}, allow_nil: true
108
108
 
109
109
  before_save :calculate_total_tokens, if: -> { input_tokens_changed? || output_tokens_changed? }
110
- before_save :calculate_total_cost, if: -> { input_cost_changed? || output_cost_changed? }
110
+ # Derive total_cost from its components only when the caller did not set an
111
+ # explicit total in the same save. The pipeline records a
112
+ # cache/reasoning-aware total alongside input_cost/output_cost, and that
113
+ # richer value must not be overwritten with the text-only input+output sum.
114
+ # (Deriving from metadata is unsafe — metadata merges user-supplied agent
115
+ # data and a colliding key would corrupt the total.)
116
+ before_save :calculate_total_cost, if: -> { (input_cost_changed? || output_cost_changed?) && !total_cost_changed? }
111
117
 
112
118
  # Aggregates costs from all attempts using each attempt's model pricing
113
119
  #
@@ -474,7 +480,11 @@ module RubyLLM
474
480
  self.total_tokens = (input_tokens || 0) + (output_tokens || 0)
475
481
  end
476
482
 
477
- # Calculates and sets total_cost from input and output costs
483
+ # Calculates and sets total_cost from input and output costs.
484
+ #
485
+ # Only runs when the caller did not provide an explicit total_cost (see
486
+ # the before_save guard), so a cache/reasoning-aware total supplied by the
487
+ # pipeline is preserved rather than collapsed to the text-only sum.
478
488
  #
479
489
  # @return [BigDecimal] The calculated total
480
490
  def calculate_total_cost
@@ -145,11 +145,17 @@ module RubyLLM
145
145
  alias_method :for_tenant!, :for!
146
146
  end
147
147
 
148
- # Display name (name or tenant_id fallback)
148
+ # Display name.
149
+ #
150
+ # For tenants linked to a host model (Account, Organization, ...) the name
151
+ # is resolved live from that record, so a renamed record is reflected
152
+ # immediately instead of showing the snapshot taken when the tenant was
153
+ # created. Unlinked (string-id) tenants fall back to the stored name
154
+ # column, and tenant_id is the final fallback so this is never blank.
149
155
  #
150
156
  # @return [String]
151
157
  def display_name
152
- name.presence || tenant_id
158
+ linked_record_name.presence || name.presence || tenant_id
153
159
  end
154
160
 
155
161
  # Check if tenant is linked to a user model
@@ -179,6 +185,28 @@ module RubyLLM
179
185
  def activate!
180
186
  update!(active: true)
181
187
  end
188
+
189
+ private
190
+
191
+ # Live display name from the linked host record (Account/Organization),
192
+ # or nil when this tenant is unlinked or the record is unavailable.
193
+ # Prefers the model's llm_tenant_name (which honours the configured name
194
+ # method), falling back to a plain #name. Never raises — name resolution
195
+ # must not break rendering.
196
+ #
197
+ # @return [String, nil]
198
+ def linked_record_name
199
+ record = tenant_record
200
+ return nil unless record
201
+
202
+ if record.respond_to?(:llm_tenant_name)
203
+ record.llm_tenant_name
204
+ elsif record.respond_to?(:name)
205
+ record.name
206
+ end
207
+ rescue
208
+ nil
209
+ end
182
210
  end
183
211
  end
184
212
  end
@@ -17,13 +17,22 @@
17
17
 
18
18
  <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
19
19
  <div>
20
- <%= f.label :name, "display name", class: "block text-xs font-mono text-gray-500 dark:text-gray-400 mb-1" %>
21
- <%= f.text_field :name,
22
- class: "w-full px-3 py-2 bg-transparent border border-gray-200 dark:border-gray-800 rounded text-sm font-mono text-gray-900 dark:text-gray-100 focus:ring-1 focus:ring-gray-400 dark:focus:ring-gray-600 focus:border-gray-400 dark:focus:border-gray-600 placeholder-gray-400 dark:placeholder-gray-600",
23
- placeholder: "e.g., Acme Corporation" %>
24
- <p class="mt-1 text-[10px] font-mono text-gray-400 dark:text-gray-600">
25
- Falls back to tenant ID if not set.
26
- </p>
20
+ <label class="block text-xs font-mono text-gray-500 dark:text-gray-400 mb-1">display name</label>
21
+ <% if tenant.linked? %>
22
+ <p class="w-full px-3 py-2 bg-gray-50 dark:bg-gray-900 border border-gray-200 dark:border-gray-800 rounded text-sm font-mono text-gray-500 dark:text-gray-400">
23
+ <%= tenant.display_name %>
24
+ </p>
25
+ <p class="mt-1 text-[10px] font-mono text-gray-400 dark:text-gray-600">
26
+ Managed by the linked <%= tenant.tenant_record_type %>. Renaming it updates this automatically.
27
+ </p>
28
+ <% else %>
29
+ <%= f.text_field :name,
30
+ class: "w-full px-3 py-2 bg-transparent border border-gray-200 dark:border-gray-800 rounded text-sm font-mono text-gray-900 dark:text-gray-100 focus:ring-1 focus:ring-gray-400 dark:focus:ring-gray-600 focus:border-gray-400 dark:focus:border-gray-600 placeholder-gray-400 dark:placeholder-gray-600",
31
+ placeholder: "e.g., Acme Corporation" %>
32
+ <p class="mt-1 text-[10px] font-mono text-gray-400 dark:text-gray-600">
33
+ Falls back to tenant ID if not set.
34
+ </p>
35
+ <% end %>
27
36
  </div>
28
37
 
29
38
  <div>
@@ -262,6 +262,24 @@ module RubyLLM
262
262
  @tools || (superclass.respond_to?(:tools) ? superclass.tools : [])
263
263
  end
264
264
 
265
+ # Sets or returns how this agent runs multiple tool calls returned in
266
+ # a single LLM response.
267
+ #
268
+ # Mirrors RubyLLM's tool_concurrency: +false+ runs them sequentially,
269
+ # +true+ or +:threads+ runs them in Ruby threads, and +:fibers+ runs
270
+ # them in fibers (requires the async gem). When unset, the agent
271
+ # inherits its superclass value and ultimately the global
272
+ # RubyLLM tool_concurrency configuration.
273
+ #
274
+ # @param value [Boolean, Symbol] Concurrency mode (omit to read)
275
+ # @return [Boolean, Symbol, nil] Configured mode, or nil when unset
276
+ def tool_concurrency(*value)
277
+ @tool_concurrency = value.first unless value.empty?
278
+ return @tool_concurrency if instance_variable_defined?(:@tool_concurrency)
279
+
280
+ superclass.respond_to?(:tool_concurrency) ? superclass.tool_concurrency : nil
281
+ end
282
+
265
283
  # @!endgroup
266
284
 
267
285
  # @!group Temperature DSL
@@ -738,6 +756,7 @@ module RubyLLM
738
756
  def execute(context)
739
757
  @context = context
740
758
  client = build_client(context)
759
+ @client = client
741
760
 
742
761
  # Make context available to Tool instances during tool execution
743
762
  previous_context = Thread.current[:ruby_llm_agents_caller_context]
@@ -788,7 +807,16 @@ module RubyLLM
788
807
  end
789
808
 
790
809
  client = client.with_schema(schema) if schema
791
- client = client.with_tools(*resolved_tools) if resolved_tools.any?
810
+ if resolved_tools.any?
811
+ # Only pass concurrency when the agent overrides it; otherwise let
812
+ # RubyLLM apply its globally configured tool_concurrency default.
813
+ concurrency = self.class.tool_concurrency
814
+ client = if concurrency.nil?
815
+ client.with_tools(*resolved_tools)
816
+ else
817
+ client.with_tools(*resolved_tools, concurrency: concurrency)
818
+ end
819
+ end
792
820
  apply_tool_prompt_caching(client) if use_prompt_caching && resolved_tools.any?
793
821
  client = setup_tool_tracking(client) if resolved_tools.any?
794
822
  client = apply_messages(client, resolved_messages) if resolved_messages.any?
@@ -891,35 +919,80 @@ module RubyLLM
891
919
 
892
920
  # Captures response metadata to the context
893
921
  #
894
- # @param response [RubyLLM::Message] The response
922
+ # When a tool returns RubyLLM::Tool::Halt, the response is a Halt
923
+ # instance with no token metadata. In that case we pull metadata from
924
+ # the last assistant message in the client's history.
925
+ #
926
+ # @param response [RubyLLM::Message, RubyLLM::Tool::Halt] The response
895
927
  # @param context [Pipeline::Context] The context
896
928
  def capture_response(response, context)
897
- context.input_tokens = response.input_tokens
898
- context.output_tokens = response.output_tokens
899
- context.model_used = response.model_id || model
900
- # finish_reason may not be available on all RubyLLM::Message versions
901
- context.finish_reason = response.respond_to?(:finish_reason) ? response.finish_reason : nil
929
+ is_halt = response.is_a?(RubyLLM::Tool::Halt)
930
+ metadata = is_halt ? last_assistant_message_from_client : response
902
931
 
903
- # Store tracked tool calls in context for instrumentation
904
- context[:tool_calls] = @tracked_tool_calls if @tracked_tool_calls.any?
932
+ if metadata
933
+ context.input_tokens = metadata.input_tokens if metadata.respond_to?(:input_tokens)
934
+ context.output_tokens = metadata.output_tokens if metadata.respond_to?(:output_tokens)
935
+ context.model_used = (metadata.respond_to?(:model_id) && metadata.model_id) || model
905
936
 
906
- # Capture Anthropic prompt caching metrics
907
- if response.respond_to?(:cached_tokens) && response.cached_tokens&.positive?
908
- context[:cached_tokens] = response.cached_tokens
937
+ # Capture Anthropic prompt caching metrics
938
+ if metadata.respond_to?(:cached_tokens) && metadata.cached_tokens&.positive?
939
+ context[:cached_tokens] = metadata.cached_tokens
940
+ end
941
+ if metadata.respond_to?(:cache_creation_tokens) && metadata.cache_creation_tokens&.positive?
942
+ context[:cache_creation_tokens] = metadata.cache_creation_tokens
943
+ end
944
+ else
945
+ context.model_used = model
909
946
  end
910
- if response.respond_to?(:cache_creation_tokens) && response.cache_creation_tokens&.positive?
911
- context[:cache_creation_tokens] = response.cache_creation_tokens
947
+
948
+ context.finish_reason = if is_halt
949
+ "halt"
950
+ elsif response.respond_to?(:finish_reason)
951
+ response.finish_reason
912
952
  end
913
953
 
914
- calculate_costs(response, context) if context.input_tokens
954
+ # Store tracked tool calls in context for instrumentation
955
+ context[:tool_calls] = @tracked_tool_calls if @tracked_tool_calls.any?
956
+
957
+ calculate_costs(metadata, context) if metadata && context.input_tokens
915
958
  end
916
959
 
917
- # Calculates costs for the response
960
+ # Finds the most recent assistant message with usage metadata in
961
+ # the active client's history. Used to recover token/model metadata
962
+ # when the LLM call short-circuits via Tool::Halt.
963
+ #
964
+ # @return [RubyLLM::Message, nil]
965
+ def last_assistant_message_from_client
966
+ messages = @client&.messages
967
+ return nil unless messages
968
+
969
+ messages.reverse_each.find do |m|
970
+ m.respond_to?(:role) && m.role == :assistant &&
971
+ m.respond_to?(:input_tokens) && m.input_tokens
972
+ end
973
+ end
974
+
975
+ # Calculates costs for the response.
976
+ #
977
+ # Providers often return dated model variants (e.g.
978
+ # "anthropic/claude-4.6-sonnet-20260217") that aren't in the
979
+ # RubyLLM::Models registry, while the agent is configured with a
980
+ # stable alias (e.g. "anthropic/claude-sonnet-4.6") that is. When the
981
+ # response's model_id misses, fall back to the agent's configured
982
+ # model so cost calculation still finds pricing.
983
+ #
984
+ # Text input/output are priced from the context's token counts. These
985
+ # reflect the final attempt's usage (a retry/fallback overwrites them per
986
+ # attempt); failed attempts that erred at the provider are typically not
987
+ # billed, so the final attempt is the charged one. On top of the text
988
+ # cost, cache reads/writes and reasoning tokens — which exist on the
989
+ # response and are billed at their own rates — are priced via RubyLLM's
990
+ # first-class cost helper (RubyLLM::Cost) and added in.
918
991
  #
919
992
  # @param response [RubyLLM::Message] The response
920
993
  # @param context [Pipeline::Context] The context
921
994
  def calculate_costs(response, context)
922
- model_info = find_model_info(response.model_id || model)
995
+ model_info = find_model_info(response.model_id) || find_model_info(model)
923
996
  return unless model_info
924
997
 
925
998
  input_tokens = context.input_tokens || 0
@@ -929,16 +1002,111 @@ module RubyLLM
929
1002
  output_price = model_info.pricing&.text_tokens&.output || 0
930
1003
 
931
1004
  context.input_cost = (input_tokens / 1_000_000.0) * input_price
932
- context.output_cost = (output_tokens / 1_000_000.0) * output_price
933
- context.total_cost = (context.input_cost + context.output_cost).round(6)
1005
+
1006
+ # Price cache/reasoning extras first so we know whether reasoning was
1007
+ # actually billed at the reasoning rate. Only then exclude those tokens
1008
+ # from the output charge — never subtract tokens that weren't charged
1009
+ # elsewhere, or a degraded cost helper would make reasoning vanish.
1010
+ extra = extra_token_costs(response, model_info, context)
1011
+ billable_output = output_tokens - reasoning_tokens_charged(response, context)
1012
+ context.output_cost = ([billable_output, 0].max / 1_000_000.0) * output_price
1013
+
1014
+ context.total_cost = (context.input_cost + context.output_cost + extra).round(6)
1015
+ end
1016
+
1017
+ # Number of reasoning (thinking) tokens that were actually charged at the
1018
+ # reasoning rate, recorded in the cost breakdown by +extra_token_costs+.
1019
+ #
1020
+ # Reasoning providers fold reasoning tokens into the reported
1021
+ # output_tokens, so when they are billed separately they must be removed
1022
+ # from the output-rate charge to avoid double billing. Returns 0 when no
1023
+ # reasoning was charged (non-reasoning model, or a degraded cost helper),
1024
+ # so reasoning tokens are never silently dropped from the output charge.
1025
+ #
1026
+ # @param response [Object] The response (RubyLLM::Message in production)
1027
+ # @param context [Pipeline::Context] The context
1028
+ # @return [Integer] Reasoning tokens to exclude from the output charge
1029
+ def reasoning_tokens_charged(response, context)
1030
+ breakdown = context[:cost_breakdown]
1031
+ return 0 unless breakdown.is_a?(Hash) && breakdown.key?(:thinking)
1032
+ return 0 unless response.respond_to?(:reasoning_tokens)
1033
+
1034
+ response.reasoning_tokens.to_i
1035
+ end
1036
+
1037
+ # Prices the non-text token components (cache reads/writes, reasoning)
1038
+ # that RubyLLM::Cost exposes on a response, records them in metadata for
1039
+ # visibility, and returns their sum to add on top of text input/output.
1040
+ #
1041
+ # Returns 0.0 for responses that don't expose cost (plain structs/mocks)
1042
+ # or when the registry lacks the relevant prices, so cache/reasoning
1043
+ # accuracy is additive and never regresses text pricing.
1044
+ #
1045
+ # @param response [Object] The response (RubyLLM::Message in production)
1046
+ # @param model_info [RubyLLM::Model::Info] Resolved pricing source
1047
+ # @param context [Pipeline::Context] The context
1048
+ # @return [Float] Combined cache + reasoning cost, or 0.0
1049
+ def extra_token_costs(response, model_info, context)
1050
+ cost = response_cost(response, model_info)
1051
+ return 0.0 unless cost
1052
+
1053
+ components = {
1054
+ cache_read: cost.cache_read,
1055
+ cache_write: cost.cache_write,
1056
+ thinking: cost.thinking
1057
+ }.compact.reject { |_, value| value.zero? }
1058
+ return 0.0 if components.empty?
1059
+
1060
+ # Round per component and sum the rounded values so the stored
1061
+ # breakdown reconciles exactly with the amount added to total_cost.
1062
+ breakdown = components.transform_values { |value| value.round(6) }
1063
+ context[:cost_breakdown] = breakdown
1064
+ breakdown.values.sum
1065
+ rescue => e
1066
+ # Non-standard pricing shapes can't price these components; degrade to
1067
+ # text-only rather than failing the cost calculation.
1068
+ log_cost_warning("extra_token_costs", e)
1069
+ 0.0
1070
+ end
1071
+
1072
+ # Returns a RubyLLM::Cost for the response, priced against the resolved
1073
+ # model_info (which may differ from the response's own dated model
1074
+ # variant). Returns nil for responses that don't expose cost — e.g.
1075
+ # simple structs/mocks in tests — so callers skip the extra components.
1076
+ #
1077
+ # @param response [Object] The response (RubyLLM::Message in production)
1078
+ # @param model_info [RubyLLM::Model::Info] Resolved pricing source
1079
+ # @return [RubyLLM::Cost, nil]
1080
+ def response_cost(response, model_info)
1081
+ return nil unless response.respond_to?(:cost)
1082
+
1083
+ response.cost(model: model_info)
1084
+ rescue => e
1085
+ log_cost_warning("response_cost", e)
1086
+ nil
1087
+ end
1088
+
1089
+ # Leaves a debug breadcrumb for a swallowed cost-calculation error.
1090
+ # Cost components are best-effort, so we degrade gracefully rather than
1091
+ # raise, but record why instead of failing silently. Logging itself must
1092
+ # never break cost handling.
1093
+ #
1094
+ # @param source [String] The method that degraded
1095
+ # @param error [Exception] The swallowed error
1096
+ def log_cost_warning(source, error)
1097
+ return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
1098
+
1099
+ Rails.logger.debug("[RubyLLM::Agents] #{source} skipped: #{error.class}: #{error.message}")
1100
+ rescue
1101
+ nil
934
1102
  end
935
1103
 
936
- # Finds model pricing info
1104
+ # Finds model pricing info.
937
1105
  #
938
1106
  # @param model_id [String] The model ID
939
1107
  # @return [Hash, nil] Model info with pricing
940
1108
  def find_model_info(model_id)
941
- return nil unless defined?(RubyLLM::Models)
1109
+ return nil unless defined?(RubyLLM::Models) && model_id
942
1110
 
943
1111
  RubyLLM::Models.find(model_id)
944
1112
  rescue
@@ -373,10 +373,18 @@ module RubyLLM
373
373
  gemini_api_base
374
374
  gpustack_api_base
375
375
  ollama_api_base
376
+ bedrock_api_base
377
+ mistral_api_base
378
+ perplexity_api_base
379
+ vertexai_api_base
376
380
  vertexai_project_id
377
381
  vertexai_location
382
+ xai_api_base
378
383
  request_timeout
379
384
  max_retries
385
+ faraday_adapter
386
+ deprecation_behavior
387
+ tool_concurrency
380
388
  ].freeze
381
389
 
382
390
  FORWARDED_RUBY_LLM_ATTRIBUTES.each do |attr|
@@ -113,6 +113,11 @@ module RubyLLM
113
113
 
114
114
  # Auto-create tenant record callback
115
115
  after_create :create_default_llm_tenant if llm_tenant_options[:budget]
116
+
117
+ # Keep the denormalized Tenant#name column fresh so the dashboard's
118
+ # SQL search/sort by name keeps working for linked tenants. Display
119
+ # already resolves the name live, so this only powers SQL.
120
+ after_update :sync_llm_tenant_name
116
121
  end
117
122
 
118
123
  private
@@ -143,6 +148,17 @@ module RubyLLM
143
148
  send(id_method).to_s
144
149
  end
145
150
 
151
+ # Returns this model's tenant display name, resolved live from the
152
+ # configured name method (`llm_tenant name: :company_name`). Resolving on
153
+ # read means the tenant always reflects the current value instead of the
154
+ # snapshot taken when its Tenant record was first created.
155
+ #
156
+ # @return [String] The current display name
157
+ def llm_tenant_name
158
+ name_method = self.class.llm_tenant_options[:name] || :to_s
159
+ send(name_method).to_s
160
+ end
161
+
146
162
  # Returns API keys resolved from the DSL configuration
147
163
  #
148
164
  # Maps provider names (e.g., :openai, :anthropic) to their resolved values
@@ -354,6 +370,30 @@ module RubyLLM
354
370
  tenant.tenant_record = self
355
371
  tenant.save!
356
372
  end
373
+
374
+ # Pushes the current name into the linked Tenant row when the source
375
+ # column changed, keeping the denormalized copy fresh for the dashboard's
376
+ # SQL search/sort. Display already resolves live, so this is best-effort
377
+ # and never raises. Only runs when the name is backed by a column we can
378
+ # detect a change on (method-based names are skipped — display stays
379
+ # correct via live resolution, only SQL search/sort may lag for those).
380
+ #
381
+ # @return [void]
382
+ def sync_llm_tenant_name
383
+ name_method = self.class.llm_tenant_options[:name]
384
+ return unless name_method
385
+
386
+ change_predicate = "saved_change_to_#{name_method}?"
387
+ return unless respond_to?(change_predicate) && public_send(change_predicate)
388
+
389
+ record = llm_tenant_record
390
+ return unless record&.persisted?
391
+ return if record.read_attribute(:name) == llm_tenant_name
392
+
393
+ record.update_column(:name, llm_tenant_name)
394
+ rescue
395
+ nil
396
+ end
357
397
  end
358
398
  end
359
399
  end
@@ -4,6 +4,6 @@ module RubyLLM
4
4
  module Agents
5
5
  # Current version of the RubyLLM::Agents gem
6
6
  # @return [String] Semantic version string
7
- VERSION = "3.13.0"
7
+ VERSION = "3.14.0"
8
8
  end
9
9
  end
@@ -37,8 +37,10 @@ module RubyLLM
37
37
  execution.create_detail!(detail_data)
38
38
  end
39
39
 
40
- # Calculate costs if token data is available
41
- if execution.input_tokens && execution.output_tokens
40
+ # Calculate costs if token data is available. Skip when the pipeline
41
+ # already supplied an accurate total (RubyLLM::Cost, which prices cache
42
+ # and reasoning tokens) so we don't downgrade it to text-only pricing.
43
+ if execution.input_tokens && execution.output_tokens && !execution.total_cost&.positive?
42
44
  execution.calculate_costs!
43
45
  execution.save!
44
46
  end
@@ -48,7 +48,7 @@ module RubyLLM
48
48
  raised_exception = nil
49
49
 
50
50
  begin
51
- @app.call(context)
51
+ capture_llm_requests(context) { @app.call(context) }
52
52
  context.completed_at = Time.current
53
53
 
54
54
  begin
@@ -84,6 +84,55 @@ module RubyLLM
84
84
 
85
85
  private
86
86
 
87
+ # Fiber-local stack of in-flight request accumulators, innermost last.
88
+ REQUEST_CAPTURE_STACK = :ruby_llm_agents_request_capture
89
+
90
+ # Captures real HTTP-level provider latency for the LLM call(s) made
91
+ # while running the rest of the pipeline.
92
+ #
93
+ # ruby_llm 1.16 emits a "request.ruby_llm" event per HTTP request and
94
+ # its Railtie wires ActiveSupport::Notifications as the instrumenter
95
+ # in Rails, so we subscribe for the duration of the downstream call
96
+ # and accumulate provider time and request count (retries/fallbacks
97
+ # add up). This is distinct from the total pipeline duration, which
98
+ # also includes middleware and tool execution. The values are stored
99
+ # in context metadata and persisted with the execution.
100
+ #
101
+ # AS::Notifications subscriptions are process-global, so a naive
102
+ # subscriber would also see events from other executions running
103
+ # concurrently (other threads) or nested inside this one (agent-as-
104
+ # tool). To attribute each request to exactly one execution, we keep
105
+ # a fiber-local stack of accumulators and only credit the innermost
106
+ # one on the thread that actually emitted the event — the callback
107
+ # runs synchronously on the emitting thread, so its top-of-stack is
108
+ # the execution whose LLM call fired.
109
+ #
110
+ # @param context [Context] The execution context
111
+ # @return [Object] The downstream call's return value
112
+ def capture_llm_requests(context)
113
+ return yield unless defined?(ActiveSupport::Notifications)
114
+
115
+ accumulator = {ms: 0.0, count: 0}
116
+ stack = (Thread.current[REQUEST_CAPTURE_STACK] ||= [])
117
+ stack.push(accumulator)
118
+
119
+ callback = lambda do |_name, started, finished, _id, _payload|
120
+ top = Thread.current[REQUEST_CAPTURE_STACK]&.last
121
+ next unless top.equal?(accumulator)
122
+
123
+ accumulator[:ms] += (finished - started) * 1000.0
124
+ accumulator[:count] += 1
125
+ end
126
+
127
+ ActiveSupport::Notifications.subscribed(callback, "request.ruby_llm") { yield }
128
+ ensure
129
+ stack&.pop
130
+ if accumulator && accumulator[:count].positive?
131
+ context[:llm_request_ms] = accumulator[:ms].round
132
+ context[:llm_request_count] = accumulator[:count]
133
+ end
134
+ end
135
+
87
136
  # Creates initial execution record with 'running' status
88
137
  #
89
138
  # Creates the record synchronously so it appears on the dashboard immediately.
@@ -339,6 +388,8 @@ module RubyLLM
339
388
  cache_hit: context.cached?,
340
389
  input_tokens: context.input_tokens || 0,
341
390
  output_tokens: context.output_tokens || 0,
391
+ input_cost: context.input_cost,
392
+ output_cost: context.output_cost,
342
393
  total_cost: context.total_cost || 0,
343
394
  attempts_count: context.attempts_made,
344
395
  chosen_model_id: context.model_used,
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_llm-agents
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.13.0
4
+ version: 3.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - adham90
@@ -29,14 +29,14 @@ dependencies:
29
29
  requirements:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: 1.14.1
32
+ version: 1.16.0
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
- version: 1.14.1
39
+ version: 1.16.0
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: csv
42
42
  requirement: !ruby/object:Gem::Requirement