ruby_llm-agents 3.13.0 → 3.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/controllers/ruby_llm/agents/analytics_controller.rb +8 -0
- data/app/controllers/ruby_llm/agents/executions_controller.rb +3 -2
- data/app/controllers/ruby_llm/agents/tenants_controller.rb +8 -2
- data/app/models/ruby_llm/agents/execution.rb +12 -2
- data/app/models/ruby_llm/agents/tenant.rb +30 -2
- data/app/views/ruby_llm/agents/tenants/_form.html.erb +16 -7
- data/lib/ruby_llm/agents/base_agent.rb +189 -21
- data/lib/ruby_llm/agents/core/configuration.rb +8 -0
- data/lib/ruby_llm/agents/core/llm_tenant.rb +40 -0
- data/lib/ruby_llm/agents/core/version.rb +1 -1
- data/lib/ruby_llm/agents/infrastructure/execution_logger_job.rb +4 -2
- data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +52 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d974a549e2d99bbcd16c8345547d16c16aff7ae602da503974d9db1f32be8ce6
|
|
4
|
+
data.tar.gz: 9947da35f39521706e7c7bef349dcf9f6a63f5f732409ff1774ce0607bd6429e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8aa785d541d2e00d237a71d5a652643e7e2da83750f8264d754c00fce6c2e33fc7d6d47caa75c1ac94659fd05703a9c488a229064cd1d01157b6845e4ef3970e
|
|
7
|
+
data.tar.gz: 89bf72286b0a6e70418268457beea87df58bb16514a170730518adeadae84eac6d6ff6553e9726e481b00c2358ad9d909c9f20170bcf41224eb7efdaf3d7f1ac
|
data/README.md
CHANGED
|
@@ -35,6 +35,7 @@ module RubyLLM
|
|
|
35
35
|
@days = range_to_days(@selected_range)
|
|
36
36
|
parse_custom_dates if @selected_range == "custom"
|
|
37
37
|
|
|
38
|
+
set_active_filters
|
|
38
39
|
current_scope = apply_filters(time_scoped(tenant_scoped_executions))
|
|
39
40
|
prior_scope = apply_filters(prior_period_scope(tenant_scoped_executions))
|
|
40
41
|
|
|
@@ -114,6 +115,13 @@ module RubyLLM
|
|
|
114
115
|
[]
|
|
115
116
|
end
|
|
116
117
|
|
|
118
|
+
set_active_filters
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Resolves the active agent/model/tenant filters from request params.
|
|
122
|
+
# Shared by index (which also loads dropdown options) and chart_data
|
|
123
|
+
# (JSON only) so apply_filters behaves consistently on both endpoints.
|
|
124
|
+
def set_active_filters
|
|
117
125
|
@filter_agent = params[:agent].presence
|
|
118
126
|
@filter_model = params[:model].presence
|
|
119
127
|
@filter_tenant = params[:filter_tenant].presence
|
|
@@ -214,8 +214,9 @@ module RubyLLM
|
|
|
214
214
|
# Only show root executions - children are nested under parents
|
|
215
215
|
scope = scope.where(parent_execution_id: nil)
|
|
216
216
|
|
|
217
|
-
# Eager load children for grouping
|
|
218
|
-
|
|
217
|
+
# Eager load children for grouping and detail for error_message, which
|
|
218
|
+
# the list renders per row (otherwise an N+1 on error rows).
|
|
219
|
+
scope.includes(:child_executions, :detail)
|
|
219
220
|
end
|
|
220
221
|
|
|
221
222
|
# Checks whether turbo-rails is available in the host application
|
|
@@ -19,7 +19,9 @@ module RubyLLM
|
|
|
19
19
|
# @return [void]
|
|
20
20
|
def index
|
|
21
21
|
@sort_params = parse_tenant_sort_params
|
|
22
|
-
|
|
22
|
+
# Eager-load tenant_record so display_name's live name resolution does
|
|
23
|
+
# not issue a query per row.
|
|
24
|
+
scope = TenantBudget.all.includes(:tenant_record)
|
|
23
25
|
|
|
24
26
|
if params[:q].present?
|
|
25
27
|
@search_query = params[:q].to_s.strip
|
|
@@ -67,7 +69,11 @@ module RubyLLM
|
|
|
67
69
|
# @return [void]
|
|
68
70
|
def update
|
|
69
71
|
@tenant = TenantBudget.find(params[:id])
|
|
70
|
-
|
|
72
|
+
attrs = tenant_params
|
|
73
|
+
# Linked tenants derive their name live from the host record, so ignore
|
|
74
|
+
# any submitted name — it would be overwritten on the next record sync.
|
|
75
|
+
attrs = attrs.except(:name) if @tenant.linked?
|
|
76
|
+
if @tenant.update(attrs)
|
|
71
77
|
redirect_to tenant_path(@tenant), notice: "Tenant updated successfully"
|
|
72
78
|
else
|
|
73
79
|
render :edit, status: :unprocessable_entity
|
|
@@ -107,7 +107,13 @@ module RubyLLM
|
|
|
107
107
|
validates :finish_reason, inclusion: {in: FINISH_REASONS}, allow_nil: true
|
|
108
108
|
|
|
109
109
|
before_save :calculate_total_tokens, if: -> { input_tokens_changed? || output_tokens_changed? }
|
|
110
|
-
|
|
110
|
+
# Derive total_cost from its components only when the caller did not set an
|
|
111
|
+
# explicit total in the same save. The pipeline records a
|
|
112
|
+
# cache/reasoning-aware total alongside input_cost/output_cost, and that
|
|
113
|
+
# richer value must not be overwritten with the text-only input+output sum.
|
|
114
|
+
# (Deriving from metadata is unsafe — metadata merges user-supplied agent
|
|
115
|
+
# data and a colliding key would corrupt the total.)
|
|
116
|
+
before_save :calculate_total_cost, if: -> { (input_cost_changed? || output_cost_changed?) && !total_cost_changed? }
|
|
111
117
|
|
|
112
118
|
# Aggregates costs from all attempts using each attempt's model pricing
|
|
113
119
|
#
|
|
@@ -474,7 +480,11 @@ module RubyLLM
|
|
|
474
480
|
self.total_tokens = (input_tokens || 0) + (output_tokens || 0)
|
|
475
481
|
end
|
|
476
482
|
|
|
477
|
-
# Calculates and sets total_cost from input and output costs
|
|
483
|
+
# Calculates and sets total_cost from input and output costs.
|
|
484
|
+
#
|
|
485
|
+
# Only runs when the caller did not provide an explicit total_cost (see
|
|
486
|
+
# the before_save guard), so a cache/reasoning-aware total supplied by the
|
|
487
|
+
# pipeline is preserved rather than collapsed to the text-only sum.
|
|
478
488
|
#
|
|
479
489
|
# @return [BigDecimal] The calculated total
|
|
480
490
|
def calculate_total_cost
|
|
@@ -145,11 +145,17 @@ module RubyLLM
|
|
|
145
145
|
alias_method :for_tenant!, :for!
|
|
146
146
|
end
|
|
147
147
|
|
|
148
|
-
# Display name
|
|
148
|
+
# Display name.
|
|
149
|
+
#
|
|
150
|
+
# For tenants linked to a host model (Account, Organization, ...) the name
|
|
151
|
+
# is resolved live from that record, so a renamed record is reflected
|
|
152
|
+
# immediately instead of showing the snapshot taken when the tenant was
|
|
153
|
+
# created. Unlinked (string-id) tenants fall back to the stored name
|
|
154
|
+
# column, and tenant_id is the final fallback so this is never blank.
|
|
149
155
|
#
|
|
150
156
|
# @return [String]
|
|
151
157
|
def display_name
|
|
152
|
-
name.presence || tenant_id
|
|
158
|
+
linked_record_name.presence || name.presence || tenant_id
|
|
153
159
|
end
|
|
154
160
|
|
|
155
161
|
# Check if tenant is linked to a user model
|
|
@@ -179,6 +185,28 @@ module RubyLLM
|
|
|
179
185
|
def activate!
|
|
180
186
|
update!(active: true)
|
|
181
187
|
end
|
|
188
|
+
|
|
189
|
+
private
|
|
190
|
+
|
|
191
|
+
# Live display name from the linked host record (Account/Organization),
|
|
192
|
+
# or nil when this tenant is unlinked or the record is unavailable.
|
|
193
|
+
# Prefers the model's llm_tenant_name (which honours the configured name
|
|
194
|
+
# method), falling back to a plain #name. Never raises — name resolution
|
|
195
|
+
# must not break rendering.
|
|
196
|
+
#
|
|
197
|
+
# @return [String, nil]
|
|
198
|
+
def linked_record_name
|
|
199
|
+
record = tenant_record
|
|
200
|
+
return nil unless record
|
|
201
|
+
|
|
202
|
+
if record.respond_to?(:llm_tenant_name)
|
|
203
|
+
record.llm_tenant_name
|
|
204
|
+
elsif record.respond_to?(:name)
|
|
205
|
+
record.name
|
|
206
|
+
end
|
|
207
|
+
rescue
|
|
208
|
+
nil
|
|
209
|
+
end
|
|
182
210
|
end
|
|
183
211
|
end
|
|
184
212
|
end
|
|
@@ -17,13 +17,22 @@
|
|
|
17
17
|
|
|
18
18
|
<div class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
|
19
19
|
<div>
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
20
|
+
<label class="block text-xs font-mono text-gray-500 dark:text-gray-400 mb-1">display name</label>
|
|
21
|
+
<% if tenant.linked? %>
|
|
22
|
+
<p class="w-full px-3 py-2 bg-gray-50 dark:bg-gray-900 border border-gray-200 dark:border-gray-800 rounded text-sm font-mono text-gray-500 dark:text-gray-400">
|
|
23
|
+
<%= tenant.display_name %>
|
|
24
|
+
</p>
|
|
25
|
+
<p class="mt-1 text-[10px] font-mono text-gray-400 dark:text-gray-600">
|
|
26
|
+
Managed by the linked <%= tenant.tenant_record_type %>. Renaming it updates this automatically.
|
|
27
|
+
</p>
|
|
28
|
+
<% else %>
|
|
29
|
+
<%= f.text_field :name,
|
|
30
|
+
class: "w-full px-3 py-2 bg-transparent border border-gray-200 dark:border-gray-800 rounded text-sm font-mono text-gray-900 dark:text-gray-100 focus:ring-1 focus:ring-gray-400 dark:focus:ring-gray-600 focus:border-gray-400 dark:focus:border-gray-600 placeholder-gray-400 dark:placeholder-gray-600",
|
|
31
|
+
placeholder: "e.g., Acme Corporation" %>
|
|
32
|
+
<p class="mt-1 text-[10px] font-mono text-gray-400 dark:text-gray-600">
|
|
33
|
+
Falls back to tenant ID if not set.
|
|
34
|
+
</p>
|
|
35
|
+
<% end %>
|
|
27
36
|
</div>
|
|
28
37
|
|
|
29
38
|
<div>
|
|
@@ -262,6 +262,24 @@ module RubyLLM
|
|
|
262
262
|
@tools || (superclass.respond_to?(:tools) ? superclass.tools : [])
|
|
263
263
|
end
|
|
264
264
|
|
|
265
|
+
# Sets or returns how this agent runs multiple tool calls returned in
|
|
266
|
+
# a single LLM response.
|
|
267
|
+
#
|
|
268
|
+
# Mirrors RubyLLM's tool_concurrency: +false+ runs them sequentially,
|
|
269
|
+
# +true+ or +:threads+ runs them in Ruby threads, and +:fibers+ runs
|
|
270
|
+
# them in fibers (requires the async gem). When unset, the agent
|
|
271
|
+
# inherits its superclass value and ultimately the global
|
|
272
|
+
# RubyLLM tool_concurrency configuration.
|
|
273
|
+
#
|
|
274
|
+
# @param value [Boolean, Symbol] Concurrency mode (omit to read)
|
|
275
|
+
# @return [Boolean, Symbol, nil] Configured mode, or nil when unset
|
|
276
|
+
def tool_concurrency(*value)
|
|
277
|
+
@tool_concurrency = value.first unless value.empty?
|
|
278
|
+
return @tool_concurrency if instance_variable_defined?(:@tool_concurrency)
|
|
279
|
+
|
|
280
|
+
superclass.respond_to?(:tool_concurrency) ? superclass.tool_concurrency : nil
|
|
281
|
+
end
|
|
282
|
+
|
|
265
283
|
# @!endgroup
|
|
266
284
|
|
|
267
285
|
# @!group Temperature DSL
|
|
@@ -738,6 +756,7 @@ module RubyLLM
|
|
|
738
756
|
def execute(context)
|
|
739
757
|
@context = context
|
|
740
758
|
client = build_client(context)
|
|
759
|
+
@client = client
|
|
741
760
|
|
|
742
761
|
# Make context available to Tool instances during tool execution
|
|
743
762
|
previous_context = Thread.current[:ruby_llm_agents_caller_context]
|
|
@@ -788,7 +807,16 @@ module RubyLLM
|
|
|
788
807
|
end
|
|
789
808
|
|
|
790
809
|
client = client.with_schema(schema) if schema
|
|
791
|
-
|
|
810
|
+
if resolved_tools.any?
|
|
811
|
+
# Only pass concurrency when the agent overrides it; otherwise let
|
|
812
|
+
# RubyLLM apply its globally configured tool_concurrency default.
|
|
813
|
+
concurrency = self.class.tool_concurrency
|
|
814
|
+
client = if concurrency.nil?
|
|
815
|
+
client.with_tools(*resolved_tools)
|
|
816
|
+
else
|
|
817
|
+
client.with_tools(*resolved_tools, concurrency: concurrency)
|
|
818
|
+
end
|
|
819
|
+
end
|
|
792
820
|
apply_tool_prompt_caching(client) if use_prompt_caching && resolved_tools.any?
|
|
793
821
|
client = setup_tool_tracking(client) if resolved_tools.any?
|
|
794
822
|
client = apply_messages(client, resolved_messages) if resolved_messages.any?
|
|
@@ -891,35 +919,80 @@ module RubyLLM
|
|
|
891
919
|
|
|
892
920
|
# Captures response metadata to the context
|
|
893
921
|
#
|
|
894
|
-
#
|
|
922
|
+
# When a tool returns RubyLLM::Tool::Halt, the response is a Halt
|
|
923
|
+
# instance with no token metadata. In that case we pull metadata from
|
|
924
|
+
# the last assistant message in the client's history.
|
|
925
|
+
#
|
|
926
|
+
# @param response [RubyLLM::Message, RubyLLM::Tool::Halt] The response
|
|
895
927
|
# @param context [Pipeline::Context] The context
|
|
896
928
|
def capture_response(response, context)
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
context.model_used = response.model_id || model
|
|
900
|
-
# finish_reason may not be available on all RubyLLM::Message versions
|
|
901
|
-
context.finish_reason = response.respond_to?(:finish_reason) ? response.finish_reason : nil
|
|
929
|
+
is_halt = response.is_a?(RubyLLM::Tool::Halt)
|
|
930
|
+
metadata = is_halt ? last_assistant_message_from_client : response
|
|
902
931
|
|
|
903
|
-
|
|
904
|
-
|
|
932
|
+
if metadata
|
|
933
|
+
context.input_tokens = metadata.input_tokens if metadata.respond_to?(:input_tokens)
|
|
934
|
+
context.output_tokens = metadata.output_tokens if metadata.respond_to?(:output_tokens)
|
|
935
|
+
context.model_used = (metadata.respond_to?(:model_id) && metadata.model_id) || model
|
|
905
936
|
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
937
|
+
# Capture Anthropic prompt caching metrics
|
|
938
|
+
if metadata.respond_to?(:cached_tokens) && metadata.cached_tokens&.positive?
|
|
939
|
+
context[:cached_tokens] = metadata.cached_tokens
|
|
940
|
+
end
|
|
941
|
+
if metadata.respond_to?(:cache_creation_tokens) && metadata.cache_creation_tokens&.positive?
|
|
942
|
+
context[:cache_creation_tokens] = metadata.cache_creation_tokens
|
|
943
|
+
end
|
|
944
|
+
else
|
|
945
|
+
context.model_used = model
|
|
909
946
|
end
|
|
910
|
-
|
|
911
|
-
|
|
947
|
+
|
|
948
|
+
context.finish_reason = if is_halt
|
|
949
|
+
"halt"
|
|
950
|
+
elsif response.respond_to?(:finish_reason)
|
|
951
|
+
response.finish_reason
|
|
912
952
|
end
|
|
913
953
|
|
|
914
|
-
|
|
954
|
+
# Store tracked tool calls in context for instrumentation
|
|
955
|
+
context[:tool_calls] = @tracked_tool_calls if @tracked_tool_calls.any?
|
|
956
|
+
|
|
957
|
+
calculate_costs(metadata, context) if metadata && context.input_tokens
|
|
915
958
|
end
|
|
916
959
|
|
|
917
|
-
#
|
|
960
|
+
# Finds the most recent assistant message with usage metadata in
|
|
961
|
+
# the active client's history. Used to recover token/model metadata
|
|
962
|
+
# when the LLM call short-circuits via Tool::Halt.
|
|
963
|
+
#
|
|
964
|
+
# @return [RubyLLM::Message, nil]
|
|
965
|
+
def last_assistant_message_from_client
|
|
966
|
+
messages = @client&.messages
|
|
967
|
+
return nil unless messages
|
|
968
|
+
|
|
969
|
+
messages.reverse_each.find do |m|
|
|
970
|
+
m.respond_to?(:role) && m.role == :assistant &&
|
|
971
|
+
m.respond_to?(:input_tokens) && m.input_tokens
|
|
972
|
+
end
|
|
973
|
+
end
|
|
974
|
+
|
|
975
|
+
# Calculates costs for the response.
|
|
976
|
+
#
|
|
977
|
+
# Providers often return dated model variants (e.g.
|
|
978
|
+
# "anthropic/claude-4.6-sonnet-20260217") that aren't in the
|
|
979
|
+
# RubyLLM::Models registry, while the agent is configured with a
|
|
980
|
+
# stable alias (e.g. "anthropic/claude-sonnet-4.6") that is. When the
|
|
981
|
+
# response's model_id misses, fall back to the agent's configured
|
|
982
|
+
# model so cost calculation still finds pricing.
|
|
983
|
+
#
|
|
984
|
+
# Text input/output are priced from the context's token counts. These
|
|
985
|
+
# reflect the final attempt's usage (a retry/fallback overwrites them per
|
|
986
|
+
# attempt); failed attempts that erred at the provider are typically not
|
|
987
|
+
# billed, so the final attempt is the charged one. On top of the text
|
|
988
|
+
# cost, cache reads/writes and reasoning tokens — which exist on the
|
|
989
|
+
# response and are billed at their own rates — are priced via RubyLLM's
|
|
990
|
+
# first-class cost helper (RubyLLM::Cost) and added in.
|
|
918
991
|
#
|
|
919
992
|
# @param response [RubyLLM::Message] The response
|
|
920
993
|
# @param context [Pipeline::Context] The context
|
|
921
994
|
def calculate_costs(response, context)
|
|
922
|
-
model_info = find_model_info(response.model_id || model)
|
|
995
|
+
model_info = find_model_info(response.model_id) || find_model_info(model)
|
|
923
996
|
return unless model_info
|
|
924
997
|
|
|
925
998
|
input_tokens = context.input_tokens || 0
|
|
@@ -929,16 +1002,111 @@ module RubyLLM
|
|
|
929
1002
|
output_price = model_info.pricing&.text_tokens&.output || 0
|
|
930
1003
|
|
|
931
1004
|
context.input_cost = (input_tokens / 1_000_000.0) * input_price
|
|
932
|
-
|
|
933
|
-
|
|
1005
|
+
|
|
1006
|
+
# Price cache/reasoning extras first so we know whether reasoning was
|
|
1007
|
+
# actually billed at the reasoning rate. Only then exclude those tokens
|
|
1008
|
+
# from the output charge — never subtract tokens that weren't charged
|
|
1009
|
+
# elsewhere, or a degraded cost helper would make reasoning vanish.
|
|
1010
|
+
extra = extra_token_costs(response, model_info, context)
|
|
1011
|
+
billable_output = output_tokens - reasoning_tokens_charged(response, context)
|
|
1012
|
+
context.output_cost = ([billable_output, 0].max / 1_000_000.0) * output_price
|
|
1013
|
+
|
|
1014
|
+
context.total_cost = (context.input_cost + context.output_cost + extra).round(6)
|
|
1015
|
+
end
|
|
1016
|
+
|
|
1017
|
+
# Number of reasoning (thinking) tokens that were actually charged at the
|
|
1018
|
+
# reasoning rate, recorded in the cost breakdown by +extra_token_costs+.
|
|
1019
|
+
#
|
|
1020
|
+
# Reasoning providers fold reasoning tokens into the reported
|
|
1021
|
+
# output_tokens, so when they are billed separately they must be removed
|
|
1022
|
+
# from the output-rate charge to avoid double billing. Returns 0 when no
|
|
1023
|
+
# reasoning was charged (non-reasoning model, or a degraded cost helper),
|
|
1024
|
+
# so reasoning tokens are never silently dropped from the output charge.
|
|
1025
|
+
#
|
|
1026
|
+
# @param response [Object] The response (RubyLLM::Message in production)
|
|
1027
|
+
# @param context [Pipeline::Context] The context
|
|
1028
|
+
# @return [Integer] Reasoning tokens to exclude from the output charge
|
|
1029
|
+
def reasoning_tokens_charged(response, context)
|
|
1030
|
+
breakdown = context[:cost_breakdown]
|
|
1031
|
+
return 0 unless breakdown.is_a?(Hash) && breakdown.key?(:thinking)
|
|
1032
|
+
return 0 unless response.respond_to?(:reasoning_tokens)
|
|
1033
|
+
|
|
1034
|
+
response.reasoning_tokens.to_i
|
|
1035
|
+
end
|
|
1036
|
+
|
|
1037
|
+
# Prices the non-text token components (cache reads/writes, reasoning)
|
|
1038
|
+
# that RubyLLM::Cost exposes on a response, records them in metadata for
|
|
1039
|
+
# visibility, and returns their sum to add on top of text input/output.
|
|
1040
|
+
#
|
|
1041
|
+
# Returns 0.0 for responses that don't expose cost (plain structs/mocks)
|
|
1042
|
+
# or when the registry lacks the relevant prices, so cache/reasoning
|
|
1043
|
+
# accuracy is additive and never regresses text pricing.
|
|
1044
|
+
#
|
|
1045
|
+
# @param response [Object] The response (RubyLLM::Message in production)
|
|
1046
|
+
# @param model_info [RubyLLM::Model::Info] Resolved pricing source
|
|
1047
|
+
# @param context [Pipeline::Context] The context
|
|
1048
|
+
# @return [Float] Combined cache + reasoning cost, or 0.0
|
|
1049
|
+
def extra_token_costs(response, model_info, context)
|
|
1050
|
+
cost = response_cost(response, model_info)
|
|
1051
|
+
return 0.0 unless cost
|
|
1052
|
+
|
|
1053
|
+
components = {
|
|
1054
|
+
cache_read: cost.cache_read,
|
|
1055
|
+
cache_write: cost.cache_write,
|
|
1056
|
+
thinking: cost.thinking
|
|
1057
|
+
}.compact.reject { |_, value| value.zero? }
|
|
1058
|
+
return 0.0 if components.empty?
|
|
1059
|
+
|
|
1060
|
+
# Round per component and sum the rounded values so the stored
|
|
1061
|
+
# breakdown reconciles exactly with the amount added to total_cost.
|
|
1062
|
+
breakdown = components.transform_values { |value| value.round(6) }
|
|
1063
|
+
context[:cost_breakdown] = breakdown
|
|
1064
|
+
breakdown.values.sum
|
|
1065
|
+
rescue => e
|
|
1066
|
+
# Non-standard pricing shapes can't price these components; degrade to
|
|
1067
|
+
# text-only rather than failing the cost calculation.
|
|
1068
|
+
log_cost_warning("extra_token_costs", e)
|
|
1069
|
+
0.0
|
|
1070
|
+
end
|
|
1071
|
+
|
|
1072
|
+
# Returns a RubyLLM::Cost for the response, priced against the resolved
|
|
1073
|
+
# model_info (which may differ from the response's own dated model
|
|
1074
|
+
# variant). Returns nil for responses that don't expose cost — e.g.
|
|
1075
|
+
# simple structs/mocks in tests — so callers skip the extra components.
|
|
1076
|
+
#
|
|
1077
|
+
# @param response [Object] The response (RubyLLM::Message in production)
|
|
1078
|
+
# @param model_info [RubyLLM::Model::Info] Resolved pricing source
|
|
1079
|
+
# @return [RubyLLM::Cost, nil]
|
|
1080
|
+
def response_cost(response, model_info)
|
|
1081
|
+
return nil unless response.respond_to?(:cost)
|
|
1082
|
+
|
|
1083
|
+
response.cost(model: model_info)
|
|
1084
|
+
rescue => e
|
|
1085
|
+
log_cost_warning("response_cost", e)
|
|
1086
|
+
nil
|
|
1087
|
+
end
|
|
1088
|
+
|
|
1089
|
+
# Leaves a debug breadcrumb for a swallowed cost-calculation error.
|
|
1090
|
+
# Cost components are best-effort, so we degrade gracefully rather than
|
|
1091
|
+
# raise, but record why instead of failing silently. Logging itself must
|
|
1092
|
+
# never break cost handling.
|
|
1093
|
+
#
|
|
1094
|
+
# @param source [String] The method that degraded
|
|
1095
|
+
# @param error [Exception] The swallowed error
|
|
1096
|
+
def log_cost_warning(source, error)
|
|
1097
|
+
return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
1098
|
+
|
|
1099
|
+
Rails.logger.debug("[RubyLLM::Agents] #{source} skipped: #{error.class}: #{error.message}")
|
|
1100
|
+
rescue
|
|
1101
|
+
nil
|
|
934
1102
|
end
|
|
935
1103
|
|
|
936
|
-
# Finds model pricing info
|
|
1104
|
+
# Finds model pricing info.
|
|
937
1105
|
#
|
|
938
1106
|
# @param model_id [String] The model ID
|
|
939
1107
|
# @return [Hash, nil] Model info with pricing
|
|
940
1108
|
def find_model_info(model_id)
|
|
941
|
-
return nil unless defined?(RubyLLM::Models)
|
|
1109
|
+
return nil unless defined?(RubyLLM::Models) && model_id
|
|
942
1110
|
|
|
943
1111
|
RubyLLM::Models.find(model_id)
|
|
944
1112
|
rescue
|
|
@@ -373,10 +373,18 @@ module RubyLLM
|
|
|
373
373
|
gemini_api_base
|
|
374
374
|
gpustack_api_base
|
|
375
375
|
ollama_api_base
|
|
376
|
+
bedrock_api_base
|
|
377
|
+
mistral_api_base
|
|
378
|
+
perplexity_api_base
|
|
379
|
+
vertexai_api_base
|
|
376
380
|
vertexai_project_id
|
|
377
381
|
vertexai_location
|
|
382
|
+
xai_api_base
|
|
378
383
|
request_timeout
|
|
379
384
|
max_retries
|
|
385
|
+
faraday_adapter
|
|
386
|
+
deprecation_behavior
|
|
387
|
+
tool_concurrency
|
|
380
388
|
].freeze
|
|
381
389
|
|
|
382
390
|
FORWARDED_RUBY_LLM_ATTRIBUTES.each do |attr|
|
|
@@ -113,6 +113,11 @@ module RubyLLM
|
|
|
113
113
|
|
|
114
114
|
# Auto-create tenant record callback
|
|
115
115
|
after_create :create_default_llm_tenant if llm_tenant_options[:budget]
|
|
116
|
+
|
|
117
|
+
# Keep the denormalized Tenant#name column fresh so the dashboard's
|
|
118
|
+
# SQL search/sort by name keeps working for linked tenants. Display
|
|
119
|
+
# already resolves the name live, so this only powers SQL.
|
|
120
|
+
after_update :sync_llm_tenant_name
|
|
116
121
|
end
|
|
117
122
|
|
|
118
123
|
private
|
|
@@ -143,6 +148,17 @@ module RubyLLM
|
|
|
143
148
|
send(id_method).to_s
|
|
144
149
|
end
|
|
145
150
|
|
|
151
|
+
# Returns this model's tenant display name, resolved live from the
|
|
152
|
+
# configured name method (`llm_tenant name: :company_name`). Resolving on
|
|
153
|
+
# read means the tenant always reflects the current value instead of the
|
|
154
|
+
# snapshot taken when its Tenant record was first created.
|
|
155
|
+
#
|
|
156
|
+
# @return [String] The current display name
|
|
157
|
+
def llm_tenant_name
|
|
158
|
+
name_method = self.class.llm_tenant_options[:name] || :to_s
|
|
159
|
+
send(name_method).to_s
|
|
160
|
+
end
|
|
161
|
+
|
|
146
162
|
# Returns API keys resolved from the DSL configuration
|
|
147
163
|
#
|
|
148
164
|
# Maps provider names (e.g., :openai, :anthropic) to their resolved values
|
|
@@ -354,6 +370,30 @@ module RubyLLM
|
|
|
354
370
|
tenant.tenant_record = self
|
|
355
371
|
tenant.save!
|
|
356
372
|
end
|
|
373
|
+
|
|
374
|
+
# Pushes the current name into the linked Tenant row when the source
|
|
375
|
+
# column changed, keeping the denormalized copy fresh for the dashboard's
|
|
376
|
+
# SQL search/sort. Display already resolves live, so this is best-effort
|
|
377
|
+
# and never raises. Only runs when the name is backed by a column we can
|
|
378
|
+
# detect a change on (method-based names are skipped — display stays
|
|
379
|
+
# correct via live resolution, only SQL search/sort may lag for those).
|
|
380
|
+
#
|
|
381
|
+
# @return [void]
|
|
382
|
+
def sync_llm_tenant_name
|
|
383
|
+
name_method = self.class.llm_tenant_options[:name]
|
|
384
|
+
return unless name_method
|
|
385
|
+
|
|
386
|
+
change_predicate = "saved_change_to_#{name_method}?"
|
|
387
|
+
return unless respond_to?(change_predicate) && public_send(change_predicate)
|
|
388
|
+
|
|
389
|
+
record = llm_tenant_record
|
|
390
|
+
return unless record&.persisted?
|
|
391
|
+
return if record.read_attribute(:name) == llm_tenant_name
|
|
392
|
+
|
|
393
|
+
record.update_column(:name, llm_tenant_name)
|
|
394
|
+
rescue
|
|
395
|
+
nil
|
|
396
|
+
end
|
|
357
397
|
end
|
|
358
398
|
end
|
|
359
399
|
end
|
|
@@ -37,8 +37,10 @@ module RubyLLM
|
|
|
37
37
|
execution.create_detail!(detail_data)
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
# Calculate costs if token data is available
|
|
41
|
-
|
|
40
|
+
# Calculate costs if token data is available. Skip when the pipeline
|
|
41
|
+
# already supplied an accurate total (RubyLLM::Cost, which prices cache
|
|
42
|
+
# and reasoning tokens) so we don't downgrade it to text-only pricing.
|
|
43
|
+
if execution.input_tokens && execution.output_tokens && !execution.total_cost&.positive?
|
|
42
44
|
execution.calculate_costs!
|
|
43
45
|
execution.save!
|
|
44
46
|
end
|
|
@@ -48,7 +48,7 @@ module RubyLLM
|
|
|
48
48
|
raised_exception = nil
|
|
49
49
|
|
|
50
50
|
begin
|
|
51
|
-
@app.call(context)
|
|
51
|
+
capture_llm_requests(context) { @app.call(context) }
|
|
52
52
|
context.completed_at = Time.current
|
|
53
53
|
|
|
54
54
|
begin
|
|
@@ -84,6 +84,55 @@ module RubyLLM
|
|
|
84
84
|
|
|
85
85
|
private
|
|
86
86
|
|
|
87
|
+
# Fiber-local stack of in-flight request accumulators, innermost last.
|
|
88
|
+
REQUEST_CAPTURE_STACK = :ruby_llm_agents_request_capture
|
|
89
|
+
|
|
90
|
+
# Captures real HTTP-level provider latency for the LLM call(s) made
|
|
91
|
+
# while running the rest of the pipeline.
|
|
92
|
+
#
|
|
93
|
+
# ruby_llm 1.16 emits a "request.ruby_llm" event per HTTP request and
|
|
94
|
+
# its Railtie wires ActiveSupport::Notifications as the instrumenter
|
|
95
|
+
# in Rails, so we subscribe for the duration of the downstream call
|
|
96
|
+
# and accumulate provider time and request count (retries/fallbacks
|
|
97
|
+
# add up). This is distinct from the total pipeline duration, which
|
|
98
|
+
# also includes middleware and tool execution. The values are stored
|
|
99
|
+
# in context metadata and persisted with the execution.
|
|
100
|
+
#
|
|
101
|
+
# AS::Notifications subscriptions are process-global, so a naive
|
|
102
|
+
# subscriber would also see events from other executions running
|
|
103
|
+
# concurrently (other threads) or nested inside this one (agent-as-
|
|
104
|
+
# tool). To attribute each request to exactly one execution, we keep
|
|
105
|
+
# a fiber-local stack of accumulators and only credit the innermost
|
|
106
|
+
# one on the thread that actually emitted the event — the callback
|
|
107
|
+
# runs synchronously on the emitting thread, so its top-of-stack is
|
|
108
|
+
# the execution whose LLM call fired.
|
|
109
|
+
#
|
|
110
|
+
# @param context [Context] The execution context
|
|
111
|
+
# @return [Object] The downstream call's return value
|
|
112
|
+
def capture_llm_requests(context)
|
|
113
|
+
return yield unless defined?(ActiveSupport::Notifications)
|
|
114
|
+
|
|
115
|
+
accumulator = {ms: 0.0, count: 0}
|
|
116
|
+
stack = (Thread.current[REQUEST_CAPTURE_STACK] ||= [])
|
|
117
|
+
stack.push(accumulator)
|
|
118
|
+
|
|
119
|
+
callback = lambda do |_name, started, finished, _id, _payload|
|
|
120
|
+
top = Thread.current[REQUEST_CAPTURE_STACK]&.last
|
|
121
|
+
next unless top.equal?(accumulator)
|
|
122
|
+
|
|
123
|
+
accumulator[:ms] += (finished - started) * 1000.0
|
|
124
|
+
accumulator[:count] += 1
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
ActiveSupport::Notifications.subscribed(callback, "request.ruby_llm") { yield }
|
|
128
|
+
ensure
|
|
129
|
+
stack&.pop
|
|
130
|
+
if accumulator && accumulator[:count].positive?
|
|
131
|
+
context[:llm_request_ms] = accumulator[:ms].round
|
|
132
|
+
context[:llm_request_count] = accumulator[:count]
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
87
136
|
# Creates initial execution record with 'running' status
|
|
88
137
|
#
|
|
89
138
|
# Creates the record synchronously so it appears on the dashboard immediately.
|
|
@@ -339,6 +388,8 @@ module RubyLLM
|
|
|
339
388
|
cache_hit: context.cached?,
|
|
340
389
|
input_tokens: context.input_tokens || 0,
|
|
341
390
|
output_tokens: context.output_tokens || 0,
|
|
391
|
+
input_cost: context.input_cost,
|
|
392
|
+
output_cost: context.output_cost,
|
|
342
393
|
total_cost: context.total_cost || 0,
|
|
343
394
|
attempts_count: context.attempts_made,
|
|
344
395
|
chosen_model_id: context.model_used,
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby_llm-agents
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.
|
|
4
|
+
version: 3.14.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- adham90
|
|
@@ -29,14 +29,14 @@ dependencies:
|
|
|
29
29
|
requirements:
|
|
30
30
|
- - ">="
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: 1.
|
|
32
|
+
version: 1.16.0
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: 1.
|
|
39
|
+
version: 1.16.0
|
|
40
40
|
- !ruby/object:Gem::Dependency
|
|
41
41
|
name: csv
|
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|