ruby_llm-agents 3.12.0 → 3.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/controllers/ruby_llm/agents/analytics_controller.rb +8 -0
- data/app/controllers/ruby_llm/agents/executions_controller.rb +8 -2
- data/app/controllers/ruby_llm/agents/tenants_controller.rb +8 -2
- data/app/models/ruby_llm/agents/execution.rb +63 -3
- data/app/models/ruby_llm/agents/tenant.rb +30 -2
- data/app/views/ruby_llm/agents/agents/_config_agent.html.erb +10 -6
- data/app/views/ruby_llm/agents/agents/show.html.erb +5 -4
- data/app/views/ruby_llm/agents/executions/_audio_player.html.erb +1 -1
- data/app/views/ruby_llm/agents/executions/_filters.html.erb +12 -8
- data/app/views/ruby_llm/agents/executions/show.html.erb +26 -12
- data/app/views/ruby_llm/agents/shared/_filter_dropdown.html.erb +46 -7
- data/app/views/ruby_llm/agents/shared/_tenant_filter.html.erb +2 -2
- data/app/views/ruby_llm/agents/system_config/show.html.erb +6 -2
- data/app/views/ruby_llm/agents/tenants/_form.html.erb +16 -7
- data/lib/generators/ruby_llm_agents/templates/initializer.rb.tt +27 -1
- data/lib/ruby_llm/agents/base_agent.rb +189 -21
- data/lib/ruby_llm/agents/core/configuration.rb +96 -6
- data/lib/ruby_llm/agents/core/llm_tenant.rb +40 -0
- data/lib/ruby_llm/agents/core/version.rb +1 -1
- data/lib/ruby_llm/agents/image/concerns/image_operation_execution.rb +9 -5
- data/lib/ruby_llm/agents/infrastructure/execution_logger_job.rb +4 -2
- data/lib/ruby_llm/agents/infrastructure/retention_job.rb +118 -0
- data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +52 -1
- data/lib/ruby_llm/agents/rails/engine.rb +20 -4
- data/lib/ruby_llm/agents/routing.rb +28 -5
- data/lib/ruby_llm/agents.rb +1 -0
- data/lib/tasks/ruby_llm_agents.rake +7 -0
- metadata +4 -3
|
@@ -262,6 +262,24 @@ module RubyLLM
|
|
|
262
262
|
@tools || (superclass.respond_to?(:tools) ? superclass.tools : [])
|
|
263
263
|
end
|
|
264
264
|
|
|
265
|
+
# Sets or returns how this agent runs multiple tool calls returned in
|
|
266
|
+
# a single LLM response.
|
|
267
|
+
#
|
|
268
|
+
# Mirrors RubyLLM's tool_concurrency: +false+ runs them sequentially,
|
|
269
|
+
# +true+ or +:threads+ runs them in Ruby threads, and +:fibers+ runs
|
|
270
|
+
# them in fibers (requires the async gem). When unset, the agent
|
|
271
|
+
# inherits its superclass value and ultimately the global
|
|
272
|
+
# RubyLLM tool_concurrency configuration.
|
|
273
|
+
#
|
|
274
|
+
# @param value [Boolean, Symbol] Concurrency mode (omit to read)
|
|
275
|
+
# @return [Boolean, Symbol, nil] Configured mode, or nil when unset
|
|
276
|
+
def tool_concurrency(*value)
|
|
277
|
+
@tool_concurrency = value.first unless value.empty?
|
|
278
|
+
return @tool_concurrency if instance_variable_defined?(:@tool_concurrency)
|
|
279
|
+
|
|
280
|
+
superclass.respond_to?(:tool_concurrency) ? superclass.tool_concurrency : nil
|
|
281
|
+
end
|
|
282
|
+
|
|
265
283
|
# @!endgroup
|
|
266
284
|
|
|
267
285
|
# @!group Temperature DSL
|
|
@@ -738,6 +756,7 @@ module RubyLLM
|
|
|
738
756
|
def execute(context)
|
|
739
757
|
@context = context
|
|
740
758
|
client = build_client(context)
|
|
759
|
+
@client = client
|
|
741
760
|
|
|
742
761
|
# Make context available to Tool instances during tool execution
|
|
743
762
|
previous_context = Thread.current[:ruby_llm_agents_caller_context]
|
|
@@ -788,7 +807,16 @@ module RubyLLM
|
|
|
788
807
|
end
|
|
789
808
|
|
|
790
809
|
client = client.with_schema(schema) if schema
|
|
791
|
-
|
|
810
|
+
if resolved_tools.any?
|
|
811
|
+
# Only pass concurrency when the agent overrides it; otherwise let
|
|
812
|
+
# RubyLLM apply its globally configured tool_concurrency default.
|
|
813
|
+
concurrency = self.class.tool_concurrency
|
|
814
|
+
client = if concurrency.nil?
|
|
815
|
+
client.with_tools(*resolved_tools)
|
|
816
|
+
else
|
|
817
|
+
client.with_tools(*resolved_tools, concurrency: concurrency)
|
|
818
|
+
end
|
|
819
|
+
end
|
|
792
820
|
apply_tool_prompt_caching(client) if use_prompt_caching && resolved_tools.any?
|
|
793
821
|
client = setup_tool_tracking(client) if resolved_tools.any?
|
|
794
822
|
client = apply_messages(client, resolved_messages) if resolved_messages.any?
|
|
@@ -891,35 +919,80 @@ module RubyLLM
|
|
|
891
919
|
|
|
892
920
|
# Captures response metadata to the context
|
|
893
921
|
#
|
|
894
|
-
#
|
|
922
|
+
# When a tool returns RubyLLM::Tool::Halt, the response is a Halt
|
|
923
|
+
# instance with no token metadata. In that case we pull metadata from
|
|
924
|
+
# the last assistant message in the client's history.
|
|
925
|
+
#
|
|
926
|
+
# @param response [RubyLLM::Message, RubyLLM::Tool::Halt] The response
|
|
895
927
|
# @param context [Pipeline::Context] The context
|
|
896
928
|
def capture_response(response, context)
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
context.model_used = response.model_id || model
|
|
900
|
-
# finish_reason may not be available on all RubyLLM::Message versions
|
|
901
|
-
context.finish_reason = response.respond_to?(:finish_reason) ? response.finish_reason : nil
|
|
929
|
+
is_halt = response.is_a?(RubyLLM::Tool::Halt)
|
|
930
|
+
metadata = is_halt ? last_assistant_message_from_client : response
|
|
902
931
|
|
|
903
|
-
|
|
904
|
-
|
|
932
|
+
if metadata
|
|
933
|
+
context.input_tokens = metadata.input_tokens if metadata.respond_to?(:input_tokens)
|
|
934
|
+
context.output_tokens = metadata.output_tokens if metadata.respond_to?(:output_tokens)
|
|
935
|
+
context.model_used = (metadata.respond_to?(:model_id) && metadata.model_id) || model
|
|
905
936
|
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
937
|
+
# Capture Anthropic prompt caching metrics
|
|
938
|
+
if metadata.respond_to?(:cached_tokens) && metadata.cached_tokens&.positive?
|
|
939
|
+
context[:cached_tokens] = metadata.cached_tokens
|
|
940
|
+
end
|
|
941
|
+
if metadata.respond_to?(:cache_creation_tokens) && metadata.cache_creation_tokens&.positive?
|
|
942
|
+
context[:cache_creation_tokens] = metadata.cache_creation_tokens
|
|
943
|
+
end
|
|
944
|
+
else
|
|
945
|
+
context.model_used = model
|
|
909
946
|
end
|
|
910
|
-
|
|
911
|
-
|
|
947
|
+
|
|
948
|
+
context.finish_reason = if is_halt
|
|
949
|
+
"halt"
|
|
950
|
+
elsif response.respond_to?(:finish_reason)
|
|
951
|
+
response.finish_reason
|
|
912
952
|
end
|
|
913
953
|
|
|
914
|
-
|
|
954
|
+
# Store tracked tool calls in context for instrumentation
|
|
955
|
+
context[:tool_calls] = @tracked_tool_calls if @tracked_tool_calls.any?
|
|
956
|
+
|
|
957
|
+
calculate_costs(metadata, context) if metadata && context.input_tokens
|
|
915
958
|
end
|
|
916
959
|
|
|
917
|
-
#
|
|
960
|
+
# Finds the most recent assistant message with usage metadata in
|
|
961
|
+
# the active client's history. Used to recover token/model metadata
|
|
962
|
+
# when the LLM call short-circuits via Tool::Halt.
|
|
963
|
+
#
|
|
964
|
+
# @return [RubyLLM::Message, nil]
|
|
965
|
+
def last_assistant_message_from_client
|
|
966
|
+
messages = @client&.messages
|
|
967
|
+
return nil unless messages
|
|
968
|
+
|
|
969
|
+
messages.reverse_each.find do |m|
|
|
970
|
+
m.respond_to?(:role) && m.role == :assistant &&
|
|
971
|
+
m.respond_to?(:input_tokens) && m.input_tokens
|
|
972
|
+
end
|
|
973
|
+
end
|
|
974
|
+
|
|
975
|
+
# Calculates costs for the response.
|
|
976
|
+
#
|
|
977
|
+
# Providers often return dated model variants (e.g.
|
|
978
|
+
# "anthropic/claude-4.6-sonnet-20260217") that aren't in the
|
|
979
|
+
# RubyLLM::Models registry, while the agent is configured with a
|
|
980
|
+
# stable alias (e.g. "anthropic/claude-sonnet-4.6") that is. When the
|
|
981
|
+
# response's model_id misses, fall back to the agent's configured
|
|
982
|
+
# model so cost calculation still finds pricing.
|
|
983
|
+
#
|
|
984
|
+
# Text input/output are priced from the context's token counts. These
|
|
985
|
+
# reflect the final attempt's usage (a retry/fallback overwrites them per
|
|
986
|
+
# attempt); failed attempts that erred at the provider are typically not
|
|
987
|
+
# billed, so the final attempt is the charged one. On top of the text
|
|
988
|
+
# cost, cache reads/writes and reasoning tokens — which exist on the
|
|
989
|
+
# response and are billed at their own rates — are priced via RubyLLM's
|
|
990
|
+
# first-class cost helper (RubyLLM::Cost) and added in.
|
|
918
991
|
#
|
|
919
992
|
# @param response [RubyLLM::Message] The response
|
|
920
993
|
# @param context [Pipeline::Context] The context
|
|
921
994
|
def calculate_costs(response, context)
|
|
922
|
-
model_info = find_model_info(response.model_id || model)
|
|
995
|
+
model_info = find_model_info(response.model_id) || find_model_info(model)
|
|
923
996
|
return unless model_info
|
|
924
997
|
|
|
925
998
|
input_tokens = context.input_tokens || 0
|
|
@@ -929,16 +1002,111 @@ module RubyLLM
|
|
|
929
1002
|
output_price = model_info.pricing&.text_tokens&.output || 0
|
|
930
1003
|
|
|
931
1004
|
context.input_cost = (input_tokens / 1_000_000.0) * input_price
|
|
932
|
-
|
|
933
|
-
|
|
1005
|
+
|
|
1006
|
+
# Price cache/reasoning extras first so we know whether reasoning was
|
|
1007
|
+
# actually billed at the reasoning rate. Only then exclude those tokens
|
|
1008
|
+
# from the output charge — never subtract tokens that weren't charged
|
|
1009
|
+
# elsewhere, or a degraded cost helper would make reasoning vanish.
|
|
1010
|
+
extra = extra_token_costs(response, model_info, context)
|
|
1011
|
+
billable_output = output_tokens - reasoning_tokens_charged(response, context)
|
|
1012
|
+
context.output_cost = ([billable_output, 0].max / 1_000_000.0) * output_price
|
|
1013
|
+
|
|
1014
|
+
context.total_cost = (context.input_cost + context.output_cost + extra).round(6)
|
|
1015
|
+
end
|
|
1016
|
+
|
|
1017
|
+
# Number of reasoning (thinking) tokens that were actually charged at the
|
|
1018
|
+
# reasoning rate, recorded in the cost breakdown by +extra_token_costs+.
|
|
1019
|
+
#
|
|
1020
|
+
# Reasoning providers fold reasoning tokens into the reported
|
|
1021
|
+
# output_tokens, so when they are billed separately they must be removed
|
|
1022
|
+
# from the output-rate charge to avoid double billing. Returns 0 when no
|
|
1023
|
+
# reasoning was charged (non-reasoning model, or a degraded cost helper),
|
|
1024
|
+
# so reasoning tokens are never silently dropped from the output charge.
|
|
1025
|
+
#
|
|
1026
|
+
# @param response [Object] The response (RubyLLM::Message in production)
|
|
1027
|
+
# @param context [Pipeline::Context] The context
|
|
1028
|
+
# @return [Integer] Reasoning tokens to exclude from the output charge
|
|
1029
|
+
def reasoning_tokens_charged(response, context)
|
|
1030
|
+
breakdown = context[:cost_breakdown]
|
|
1031
|
+
return 0 unless breakdown.is_a?(Hash) && breakdown.key?(:thinking)
|
|
1032
|
+
return 0 unless response.respond_to?(:reasoning_tokens)
|
|
1033
|
+
|
|
1034
|
+
response.reasoning_tokens.to_i
|
|
1035
|
+
end
|
|
1036
|
+
|
|
1037
|
+
# Prices the non-text token components (cache reads/writes, reasoning)
|
|
1038
|
+
# that RubyLLM::Cost exposes on a response, records them in metadata for
|
|
1039
|
+
# visibility, and returns their sum to add on top of text input/output.
|
|
1040
|
+
#
|
|
1041
|
+
# Returns 0.0 for responses that don't expose cost (plain structs/mocks)
|
|
1042
|
+
# or when the registry lacks the relevant prices, so cache/reasoning
|
|
1043
|
+
# accuracy is additive and never regresses text pricing.
|
|
1044
|
+
#
|
|
1045
|
+
# @param response [Object] The response (RubyLLM::Message in production)
|
|
1046
|
+
# @param model_info [RubyLLM::Model::Info] Resolved pricing source
|
|
1047
|
+
# @param context [Pipeline::Context] The context
|
|
1048
|
+
# @return [Float] Combined cache + reasoning cost, or 0.0
|
|
1049
|
+
def extra_token_costs(response, model_info, context)
|
|
1050
|
+
cost = response_cost(response, model_info)
|
|
1051
|
+
return 0.0 unless cost
|
|
1052
|
+
|
|
1053
|
+
components = {
|
|
1054
|
+
cache_read: cost.cache_read,
|
|
1055
|
+
cache_write: cost.cache_write,
|
|
1056
|
+
thinking: cost.thinking
|
|
1057
|
+
}.compact.reject { |_, value| value.zero? }
|
|
1058
|
+
return 0.0 if components.empty?
|
|
1059
|
+
|
|
1060
|
+
# Round per component and sum the rounded values so the stored
|
|
1061
|
+
# breakdown reconciles exactly with the amount added to total_cost.
|
|
1062
|
+
breakdown = components.transform_values { |value| value.round(6) }
|
|
1063
|
+
context[:cost_breakdown] = breakdown
|
|
1064
|
+
breakdown.values.sum
|
|
1065
|
+
rescue => e
|
|
1066
|
+
# Non-standard pricing shapes can't price these components; degrade to
|
|
1067
|
+
# text-only rather than failing the cost calculation.
|
|
1068
|
+
log_cost_warning("extra_token_costs", e)
|
|
1069
|
+
0.0
|
|
1070
|
+
end
|
|
1071
|
+
|
|
1072
|
+
# Returns a RubyLLM::Cost for the response, priced against the resolved
|
|
1073
|
+
# model_info (which may differ from the response's own dated model
|
|
1074
|
+
# variant). Returns nil for responses that don't expose cost — e.g.
|
|
1075
|
+
# simple structs/mocks in tests — so callers skip the extra components.
|
|
1076
|
+
#
|
|
1077
|
+
# @param response [Object] The response (RubyLLM::Message in production)
|
|
1078
|
+
# @param model_info [RubyLLM::Model::Info] Resolved pricing source
|
|
1079
|
+
# @return [RubyLLM::Cost, nil]
|
|
1080
|
+
def response_cost(response, model_info)
|
|
1081
|
+
return nil unless response.respond_to?(:cost)
|
|
1082
|
+
|
|
1083
|
+
response.cost(model: model_info)
|
|
1084
|
+
rescue => e
|
|
1085
|
+
log_cost_warning("response_cost", e)
|
|
1086
|
+
nil
|
|
1087
|
+
end
|
|
1088
|
+
|
|
1089
|
+
# Leaves a debug breadcrumb for a swallowed cost-calculation error.
|
|
1090
|
+
# Cost components are best-effort, so we degrade gracefully rather than
|
|
1091
|
+
# raise, but record why instead of failing silently. Logging itself must
|
|
1092
|
+
# never break cost handling.
|
|
1093
|
+
#
|
|
1094
|
+
# @param source [String] The method that degraded
|
|
1095
|
+
# @param error [Exception] The swallowed error
|
|
1096
|
+
def log_cost_warning(source, error)
|
|
1097
|
+
return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
1098
|
+
|
|
1099
|
+
Rails.logger.debug("[RubyLLM::Agents] #{source} skipped: #{error.class}: #{error.message}")
|
|
1100
|
+
rescue
|
|
1101
|
+
nil
|
|
934
1102
|
end
|
|
935
1103
|
|
|
936
|
-
# Finds model pricing info
|
|
1104
|
+
# Finds model pricing info.
|
|
937
1105
|
#
|
|
938
1106
|
# @param model_id [String] The model ID
|
|
939
1107
|
# @return [Hash, nil] Model info with pricing
|
|
940
1108
|
def find_model_info(model_id)
|
|
941
|
-
return nil unless defined?(RubyLLM::Models)
|
|
1109
|
+
return nil unless defined?(RubyLLM::Models) && model_id
|
|
942
1110
|
|
|
943
1111
|
RubyLLM::Models.find(model_id)
|
|
944
1112
|
rescue
|
|
@@ -50,9 +50,26 @@ module RubyLLM
|
|
|
50
50
|
# When false, executions are logged synchronously.
|
|
51
51
|
# @return [Boolean] Enable async logging (default: true)
|
|
52
52
|
|
|
53
|
+
# @!attribute [rw] soft_purge_after
|
|
54
|
+
# How long to keep full execution details (prompts, responses, tool calls,
|
|
55
|
+
# attempts) before the retention job destroys them. The executions row is
|
|
56
|
+
# preserved so cost, token, and latency analytics remain intact. A
|
|
57
|
+
# truncated copy of the error message is stamped into metadata for
|
|
58
|
+
# long-term error-rate trend analysis.
|
|
59
|
+
# Set to nil to disable soft purging.
|
|
60
|
+
# @return [ActiveSupport::Duration, nil] Soft-purge window (default: 30.days)
|
|
61
|
+
|
|
62
|
+
# @!attribute [rw] hard_purge_after
|
|
63
|
+
# How long to keep the executions row itself before the retention job
|
|
64
|
+
# destroys it entirely. Must be greater than soft_purge_after when both
|
|
65
|
+
# are set. Set to nil to retain executions indefinitely.
|
|
66
|
+
# @return [ActiveSupport::Duration, nil] Hard-purge window (default: 365.days)
|
|
67
|
+
|
|
53
68
|
# @!attribute [rw] retention_period
|
|
54
|
-
#
|
|
55
|
-
#
|
|
69
|
+
# Deprecated. Alias for hard_purge_after, kept for backward compatibility.
|
|
70
|
+
# Prefer configuring soft_purge_after and hard_purge_after explicitly.
|
|
71
|
+
# @return [ActiveSupport::Duration, nil] Hard-purge window
|
|
72
|
+
# @deprecated Use {#hard_purge_after} instead.
|
|
56
73
|
|
|
57
74
|
# @!attribute [rw] anomaly_cost_threshold
|
|
58
75
|
# Cost threshold in dollars that triggers anomaly logging.
|
|
@@ -356,10 +373,18 @@ module RubyLLM
|
|
|
356
373
|
gemini_api_base
|
|
357
374
|
gpustack_api_base
|
|
358
375
|
ollama_api_base
|
|
376
|
+
bedrock_api_base
|
|
377
|
+
mistral_api_base
|
|
378
|
+
perplexity_api_base
|
|
379
|
+
vertexai_api_base
|
|
359
380
|
vertexai_project_id
|
|
360
381
|
vertexai_location
|
|
382
|
+
xai_api_base
|
|
361
383
|
request_timeout
|
|
362
384
|
max_retries
|
|
385
|
+
faraday_adapter
|
|
386
|
+
deprecation_behavior
|
|
387
|
+
tool_concurrency
|
|
363
388
|
].freeze
|
|
364
389
|
|
|
365
390
|
FORWARDED_RUBY_LLM_ATTRIBUTES.each do |attr|
|
|
@@ -379,7 +404,6 @@ module RubyLLM
|
|
|
379
404
|
# Attributes without validation (simple accessors)
|
|
380
405
|
attr_accessor :default_model,
|
|
381
406
|
:async_logging,
|
|
382
|
-
:retention_period,
|
|
383
407
|
:dashboard_parent_controller,
|
|
384
408
|
:basic_auth_username,
|
|
385
409
|
:basic_auth_password,
|
|
@@ -464,7 +488,9 @@ module RubyLLM
|
|
|
464
488
|
:tenant_resolver,
|
|
465
489
|
:tenant_config_resolver,
|
|
466
490
|
:default_retries,
|
|
467
|
-
:budgets
|
|
491
|
+
:budgets,
|
|
492
|
+
:soft_purge_after,
|
|
493
|
+
:hard_purge_after
|
|
468
494
|
|
|
469
495
|
attr_writer :cache_store
|
|
470
496
|
|
|
@@ -594,6 +620,44 @@ module RubyLLM
|
|
|
594
620
|
@default_embedding_batch_size = value
|
|
595
621
|
end
|
|
596
622
|
|
|
623
|
+
# Sets soft_purge_after with validation
|
|
624
|
+
#
|
|
625
|
+
# @param value [ActiveSupport::Duration, Numeric, nil] Window or nil to disable
|
|
626
|
+
# @raise [ArgumentError] If value is not a Duration/Numeric or nil, or is negative
|
|
627
|
+
def soft_purge_after=(value)
|
|
628
|
+
validate_purge_window!(:soft_purge_after, value)
|
|
629
|
+
@soft_purge_after = value
|
|
630
|
+
validate_purge_ordering!
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
# Sets hard_purge_after with validation
|
|
634
|
+
#
|
|
635
|
+
# @param value [ActiveSupport::Duration, Numeric, nil] Window or nil to disable
|
|
636
|
+
# @raise [ArgumentError] If value is not a Duration/Numeric or nil, or is negative
|
|
637
|
+
def hard_purge_after=(value)
|
|
638
|
+
validate_purge_window!(:hard_purge_after, value)
|
|
639
|
+
@hard_purge_after = value
|
|
640
|
+
validate_purge_ordering!
|
|
641
|
+
end
|
|
642
|
+
|
|
643
|
+
# Deprecated alias for hard_purge_after.
|
|
644
|
+
#
|
|
645
|
+
# @return [ActiveSupport::Duration, nil]
|
|
646
|
+
# @deprecated Use {#hard_purge_after} instead.
|
|
647
|
+
def retention_period
|
|
648
|
+
hard_purge_after
|
|
649
|
+
end
|
|
650
|
+
|
|
651
|
+
# Deprecated setter for retention_period (maps to hard_purge_after).
|
|
652
|
+
#
|
|
653
|
+
# @param value [ActiveSupport::Duration, Numeric, nil]
|
|
654
|
+
# @deprecated Use {#hard_purge_after=} instead.
|
|
655
|
+
def retention_period=(value)
|
|
656
|
+
warn "[DEPRECATION] RubyLLM::Agents config.retention_period is deprecated. " \
|
|
657
|
+
"Use config.hard_purge_after instead (and set config.soft_purge_after for two-tier retention)."
|
|
658
|
+
self.hard_purge_after = value
|
|
659
|
+
end
|
|
660
|
+
|
|
597
661
|
# Sets default_embedding_dimensions with validation
|
|
598
662
|
#
|
|
599
663
|
# @param value [Integer, nil] Dimensions (must be nil or > 0)
|
|
@@ -616,7 +680,8 @@ module RubyLLM
|
|
|
616
680
|
@default_timeout = 60
|
|
617
681
|
@cache_store = nil
|
|
618
682
|
@async_logging = true
|
|
619
|
-
@
|
|
683
|
+
@soft_purge_after = 30.days
|
|
684
|
+
@hard_purge_after = 365.days
|
|
620
685
|
@anomaly_cost_threshold = 5.00
|
|
621
686
|
@anomaly_duration_threshold = 10_000
|
|
622
687
|
@dashboard_auth = ->(_controller) { true }
|
|
@@ -960,7 +1025,8 @@ module RubyLLM
|
|
|
960
1025
|
},
|
|
961
1026
|
logging: {
|
|
962
1027
|
async_logging: async_logging,
|
|
963
|
-
|
|
1028
|
+
soft_purge_after: soft_purge_after,
|
|
1029
|
+
hard_purge_after: hard_purge_after,
|
|
964
1030
|
job_retry_attempts: job_retry_attempts,
|
|
965
1031
|
track_executions: track_executions,
|
|
966
1032
|
track_cache_hits: track_cache_hits,
|
|
@@ -1161,6 +1227,30 @@ module RubyLLM
|
|
|
1161
1227
|
raise ArgumentError, "budgets[:enforcement] must be :none, :soft, or :hard"
|
|
1162
1228
|
end
|
|
1163
1229
|
end
|
|
1230
|
+
|
|
1231
|
+
# Validates a purge-window value (Duration, Numeric seconds, or nil).
|
|
1232
|
+
#
|
|
1233
|
+
# @param attr [Symbol] Attribute name for error messages
|
|
1234
|
+
# @param value [ActiveSupport::Duration, Numeric, nil] Value to validate
|
|
1235
|
+
# @raise [ArgumentError] If value is neither nil nor a non-negative duration/number
|
|
1236
|
+
def validate_purge_window!(attr, value)
|
|
1237
|
+
return if value.nil?
|
|
1238
|
+
return if value.is_a?(ActiveSupport::Duration) && value.to_i >= 0
|
|
1239
|
+
return if value.is_a?(Numeric) && value >= 0
|
|
1240
|
+
|
|
1241
|
+
raise ArgumentError, "#{attr} must be an ActiveSupport::Duration, non-negative Numeric, or nil"
|
|
1242
|
+
end
|
|
1243
|
+
|
|
1244
|
+
# Ensures soft_purge_after is strictly less than hard_purge_after when both are set.
|
|
1245
|
+
#
|
|
1246
|
+
# @raise [ArgumentError] If ordering is violated
|
|
1247
|
+
def validate_purge_ordering!
|
|
1248
|
+
return if @soft_purge_after.nil? || @hard_purge_after.nil?
|
|
1249
|
+
return if @soft_purge_after.to_i < @hard_purge_after.to_i
|
|
1250
|
+
|
|
1251
|
+
raise ArgumentError, "soft_purge_after (#{@soft_purge_after.inspect}) must be less than " \
|
|
1252
|
+
"hard_purge_after (#{@hard_purge_after.inspect})"
|
|
1253
|
+
end
|
|
1164
1254
|
end
|
|
1165
1255
|
end
|
|
1166
1256
|
end
|
|
@@ -113,6 +113,11 @@ module RubyLLM
|
|
|
113
113
|
|
|
114
114
|
# Auto-create tenant record callback
|
|
115
115
|
after_create :create_default_llm_tenant if llm_tenant_options[:budget]
|
|
116
|
+
|
|
117
|
+
# Keep the denormalized Tenant#name column fresh so the dashboard's
|
|
118
|
+
# SQL search/sort by name keeps working for linked tenants. Display
|
|
119
|
+
# already resolves the name live, so this only powers SQL.
|
|
120
|
+
after_update :sync_llm_tenant_name
|
|
116
121
|
end
|
|
117
122
|
|
|
118
123
|
private
|
|
@@ -143,6 +148,17 @@ module RubyLLM
|
|
|
143
148
|
send(id_method).to_s
|
|
144
149
|
end
|
|
145
150
|
|
|
151
|
+
# Returns this model's tenant display name, resolved live from the
|
|
152
|
+
# configured name method (`llm_tenant name: :company_name`). Resolving on
|
|
153
|
+
# read means the tenant always reflects the current value instead of the
|
|
154
|
+
# snapshot taken when its Tenant record was first created.
|
|
155
|
+
#
|
|
156
|
+
# @return [String] The current display name
|
|
157
|
+
def llm_tenant_name
|
|
158
|
+
name_method = self.class.llm_tenant_options[:name] || :to_s
|
|
159
|
+
send(name_method).to_s
|
|
160
|
+
end
|
|
161
|
+
|
|
146
162
|
# Returns API keys resolved from the DSL configuration
|
|
147
163
|
#
|
|
148
164
|
# Maps provider names (e.g., :openai, :anthropic) to their resolved values
|
|
@@ -354,6 +370,30 @@ module RubyLLM
|
|
|
354
370
|
tenant.tenant_record = self
|
|
355
371
|
tenant.save!
|
|
356
372
|
end
|
|
373
|
+
|
|
374
|
+
# Pushes the current name into the linked Tenant row when the source
|
|
375
|
+
# column changed, keeping the denormalized copy fresh for the dashboard's
|
|
376
|
+
# SQL search/sort. Display already resolves live, so this is best-effort
|
|
377
|
+
# and never raises. Only runs when the name is backed by a column we can
|
|
378
|
+
# detect a change on (method-based names are skipped — display stays
|
|
379
|
+
# correct via live resolution, only SQL search/sort may lag for those).
|
|
380
|
+
#
|
|
381
|
+
# @return [void]
|
|
382
|
+
def sync_llm_tenant_name
|
|
383
|
+
name_method = self.class.llm_tenant_options[:name]
|
|
384
|
+
return unless name_method
|
|
385
|
+
|
|
386
|
+
change_predicate = "saved_change_to_#{name_method}?"
|
|
387
|
+
return unless respond_to?(change_predicate) && public_send(change_predicate)
|
|
388
|
+
|
|
389
|
+
record = llm_tenant_record
|
|
390
|
+
return unless record&.persisted?
|
|
391
|
+
return if record.read_attribute(:name) == llm_tenant_name
|
|
392
|
+
|
|
393
|
+
record.update_column(:name, llm_tenant_name)
|
|
394
|
+
rescue
|
|
395
|
+
nil
|
|
396
|
+
end
|
|
357
397
|
end
|
|
358
398
|
end
|
|
359
399
|
end
|
|
@@ -95,12 +95,13 @@ module RubyLLM
|
|
|
95
95
|
def record_failed_execution(error, started_at)
|
|
96
96
|
return unless defined?(RubyLLM::Agents::Execution)
|
|
97
97
|
|
|
98
|
-
execution_data = build_failed_execution_data(error, started_at)
|
|
98
|
+
execution_data, detail_data = build_failed_execution_data(error, started_at)
|
|
99
99
|
|
|
100
100
|
if config.async_logging && defined?(ExecutionLoggerJob)
|
|
101
|
-
ExecutionLoggerJob.perform_later(execution_data)
|
|
101
|
+
ExecutionLoggerJob.perform_later(execution_data.merge(_detail_data: detail_data))
|
|
102
102
|
else
|
|
103
|
-
RubyLLM::Agents::Execution.create!(execution_data)
|
|
103
|
+
execution = RubyLLM::Agents::Execution.create!(execution_data)
|
|
104
|
+
execution.create_detail!(detail_data) if detail_data.present?
|
|
104
105
|
end
|
|
105
106
|
rescue => e
|
|
106
107
|
Rails.logger.error("[RubyLLM::Agents] Failed to record failed #{execution_type} execution: #{e.message}") if defined?(Rails)
|
|
@@ -124,7 +125,7 @@ module RubyLLM
|
|
|
124
125
|
end
|
|
125
126
|
|
|
126
127
|
def build_failed_execution_data(error, started_at)
|
|
127
|
-
{
|
|
128
|
+
execution_data = {
|
|
128
129
|
agent_type: self.class.name,
|
|
129
130
|
tenant_id: @tenant_id,
|
|
130
131
|
execution_type: execution_type,
|
|
@@ -137,9 +138,12 @@ module RubyLLM
|
|
|
137
138
|
started_at: started_at,
|
|
138
139
|
completed_at: Time.current,
|
|
139
140
|
error_class: error.class.name,
|
|
140
|
-
error_message: error.message.truncate(1000),
|
|
141
141
|
metadata: {}
|
|
142
142
|
}
|
|
143
|
+
|
|
144
|
+
detail_data = {error_message: error.message.to_s.truncate(1000)}
|
|
145
|
+
|
|
146
|
+
[execution_data, detail_data]
|
|
143
147
|
end
|
|
144
148
|
|
|
145
149
|
def build_metadata(result)
|
|
@@ -37,8 +37,10 @@ module RubyLLM
|
|
|
37
37
|
execution.create_detail!(detail_data)
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
# Calculate costs if token data is available
|
|
41
|
-
|
|
40
|
+
# Calculate costs if token data is available. Skip when the pipeline
|
|
41
|
+
# already supplied an accurate total (RubyLLM::Cost, which prices cache
|
|
42
|
+
# and reasoning tokens) so we don't downgrade it to text-only pricing.
|
|
43
|
+
if execution.input_tokens && execution.output_tokens && !execution.total_cost&.positive?
|
|
42
44
|
execution.calculate_costs!
|
|
43
45
|
execution.save!
|
|
44
46
|
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Agents
|
|
5
|
+
# Background job that enforces two-tier data retention on execution records.
|
|
6
|
+
#
|
|
7
|
+
# Soft pass: for executions older than {Configuration#soft_purge_after},
|
|
8
|
+
# destroys the associated execution_details and tool_executions rows,
|
|
9
|
+
# preserves a truncated copy of error_message in metadata, and stamps
|
|
10
|
+
# metadata["soft_purged_at"] so the dashboard can surface the state and
|
|
11
|
+
# the pass stays idempotent.
|
|
12
|
+
#
|
|
13
|
+
# Hard pass: for executions older than {Configuration#hard_purge_after},
|
|
14
|
+
# destroys the executions row itself. The foreign-key cascade removes
|
|
15
|
+
# any remaining details or tool_executions.
|
|
16
|
+
#
|
|
17
|
+
# Either tier may be set to nil in configuration to skip that pass.
|
|
18
|
+
#
|
|
19
|
+
# @example Enqueue manually
|
|
20
|
+
# RubyLLM::Agents::RetentionJob.perform_later
|
|
21
|
+
#
|
|
22
|
+
# @example Schedule daily (whenever gem)
|
|
23
|
+
# every 1.day, at: "3:00 am" do
|
|
24
|
+
# runner "RubyLLM::Agents::RetentionJob.perform_later"
|
|
25
|
+
# end
|
|
26
|
+
#
|
|
27
|
+
# @api public
|
|
28
|
+
class RetentionJob < ActiveJob::Base
|
|
29
|
+
queue_as :default
|
|
30
|
+
|
|
31
|
+
ERROR_MESSAGE_MAX_LENGTH = 500
|
|
32
|
+
BATCH_SIZE = 500
|
|
33
|
+
|
|
34
|
+
# Runs the soft and hard retention passes based on current configuration.
|
|
35
|
+
#
|
|
36
|
+
# @return [Hash] counts of rows affected in each pass
|
|
37
|
+
def perform
|
|
38
|
+
{
|
|
39
|
+
soft_purged: soft_purge,
|
|
40
|
+
hard_purged: hard_purge
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
# Destroys detail + tool_execution rows for executions older than the
|
|
47
|
+
# soft-purge window that have not already been soft-purged. Stamps
|
|
48
|
+
# metadata with the purge timestamp and preserves a truncated
|
|
49
|
+
# error_message for long-term error-rate analytics.
|
|
50
|
+
#
|
|
51
|
+
# The "already purged" filter runs in Ruby rather than SQL because
|
|
52
|
+
# JSON key-exists operators differ across SQLite/Postgres/MySQL; this
|
|
53
|
+
# keeps the job adapter-agnostic. We batch to bound memory.
|
|
54
|
+
def soft_purge
|
|
55
|
+
window = RubyLLM::Agents.configuration.soft_purge_after
|
|
56
|
+
return 0 if window.nil?
|
|
57
|
+
|
|
58
|
+
cutoff = window.ago
|
|
59
|
+
count = 0
|
|
60
|
+
|
|
61
|
+
Execution
|
|
62
|
+
.where("created_at < ?", cutoff)
|
|
63
|
+
.includes(:detail)
|
|
64
|
+
.find_in_batches(batch_size: BATCH_SIZE) do |batch|
|
|
65
|
+
batch.each do |execution|
|
|
66
|
+
next if execution.soft_purged?
|
|
67
|
+
|
|
68
|
+
purge_one(execution)
|
|
69
|
+
count += 1
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
count
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Destroys executions (and everything cascaded from them) older than
|
|
77
|
+
# the hard-purge window.
|
|
78
|
+
def hard_purge
|
|
79
|
+
window = RubyLLM::Agents.configuration.hard_purge_after
|
|
80
|
+
return 0 if window.nil?
|
|
81
|
+
|
|
82
|
+
cutoff = window.ago
|
|
83
|
+
total = 0
|
|
84
|
+
|
|
85
|
+
Execution.where("created_at < ?", cutoff).in_batches(of: BATCH_SIZE) do |batch|
|
|
86
|
+
total += batch.destroy_all.size
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
total
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Performs the soft purge for a single execution.
|
|
93
|
+
def purge_one(execution)
|
|
94
|
+
preserved_error = preserved_error_message(execution)
|
|
95
|
+
|
|
96
|
+
Execution.transaction do
|
|
97
|
+
execution.detail&.destroy
|
|
98
|
+
execution.tool_executions.destroy_all
|
|
99
|
+
|
|
100
|
+
new_metadata = (execution.metadata || {}).merge(
|
|
101
|
+
"soft_purged_at" => Time.current.iso8601
|
|
102
|
+
)
|
|
103
|
+
new_metadata["error_message"] = preserved_error if preserved_error
|
|
104
|
+
|
|
105
|
+
execution.update_columns(metadata: new_metadata)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Returns a truncated copy of the detail's error_message, or nil.
|
|
110
|
+
def preserved_error_message(execution)
|
|
111
|
+
raw = execution.detail&.error_message
|
|
112
|
+
return nil if raw.blank?
|
|
113
|
+
|
|
114
|
+
raw.to_s.truncate(ERROR_MESSAGE_MAX_LENGTH)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|