ruby_llm-agents 3.12.0 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/app/controllers/ruby_llm/agents/analytics_controller.rb +8 -0
  4. data/app/controllers/ruby_llm/agents/executions_controller.rb +8 -2
  5. data/app/controllers/ruby_llm/agents/tenants_controller.rb +8 -2
  6. data/app/models/ruby_llm/agents/execution.rb +63 -3
  7. data/app/models/ruby_llm/agents/tenant.rb +30 -2
  8. data/app/views/ruby_llm/agents/agents/_config_agent.html.erb +10 -6
  9. data/app/views/ruby_llm/agents/agents/show.html.erb +5 -4
  10. data/app/views/ruby_llm/agents/executions/_audio_player.html.erb +1 -1
  11. data/app/views/ruby_llm/agents/executions/_filters.html.erb +12 -8
  12. data/app/views/ruby_llm/agents/executions/show.html.erb +26 -12
  13. data/app/views/ruby_llm/agents/shared/_filter_dropdown.html.erb +46 -7
  14. data/app/views/ruby_llm/agents/shared/_tenant_filter.html.erb +2 -2
  15. data/app/views/ruby_llm/agents/system_config/show.html.erb +6 -2
  16. data/app/views/ruby_llm/agents/tenants/_form.html.erb +16 -7
  17. data/lib/generators/ruby_llm_agents/templates/initializer.rb.tt +27 -1
  18. data/lib/ruby_llm/agents/base_agent.rb +189 -21
  19. data/lib/ruby_llm/agents/core/configuration.rb +96 -6
  20. data/lib/ruby_llm/agents/core/llm_tenant.rb +40 -0
  21. data/lib/ruby_llm/agents/core/version.rb +1 -1
  22. data/lib/ruby_llm/agents/image/concerns/image_operation_execution.rb +9 -5
  23. data/lib/ruby_llm/agents/infrastructure/execution_logger_job.rb +4 -2
  24. data/lib/ruby_llm/agents/infrastructure/retention_job.rb +118 -0
  25. data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +52 -1
  26. data/lib/ruby_llm/agents/rails/engine.rb +20 -4
  27. data/lib/ruby_llm/agents/routing.rb +28 -5
  28. data/lib/ruby_llm/agents.rb +1 -0
  29. data/lib/tasks/ruby_llm_agents.rake +7 -0
  30. metadata +4 -3
@@ -262,6 +262,24 @@ module RubyLLM
262
262
  @tools || (superclass.respond_to?(:tools) ? superclass.tools : [])
263
263
  end
264
264
 
265
+ # Sets or returns how this agent runs multiple tool calls returned in
266
+ # a single LLM response.
267
+ #
268
+ # Mirrors RubyLLM's tool_concurrency: +false+ runs them sequentially,
269
+ # +true+ or +:threads+ runs them in Ruby threads, and +:fibers+ runs
270
+ # them in fibers (requires the async gem). When unset, the agent
271
+ # inherits its superclass value and ultimately the global
272
+ # RubyLLM tool_concurrency configuration.
273
+ #
274
+ # @param value [Boolean, Symbol] Concurrency mode (omit to read)
275
+ # @return [Boolean, Symbol, nil] Configured mode, or nil when unset
276
+ def tool_concurrency(*value)
277
+ @tool_concurrency = value.first unless value.empty?
278
+ return @tool_concurrency if instance_variable_defined?(:@tool_concurrency)
279
+
280
+ superclass.respond_to?(:tool_concurrency) ? superclass.tool_concurrency : nil
281
+ end
282
+
265
283
  # @!endgroup
266
284
 
267
285
  # @!group Temperature DSL
@@ -738,6 +756,7 @@ module RubyLLM
738
756
  def execute(context)
739
757
  @context = context
740
758
  client = build_client(context)
759
+ @client = client
741
760
 
742
761
  # Make context available to Tool instances during tool execution
743
762
  previous_context = Thread.current[:ruby_llm_agents_caller_context]
@@ -788,7 +807,16 @@ module RubyLLM
788
807
  end
789
808
 
790
809
  client = client.with_schema(schema) if schema
791
- client = client.with_tools(*resolved_tools) if resolved_tools.any?
810
+ if resolved_tools.any?
811
+ # Only pass concurrency when the agent overrides it; otherwise let
812
+ # RubyLLM apply its globally configured tool_concurrency default.
813
+ concurrency = self.class.tool_concurrency
814
+ client = if concurrency.nil?
815
+ client.with_tools(*resolved_tools)
816
+ else
817
+ client.with_tools(*resolved_tools, concurrency: concurrency)
818
+ end
819
+ end
792
820
  apply_tool_prompt_caching(client) if use_prompt_caching && resolved_tools.any?
793
821
  client = setup_tool_tracking(client) if resolved_tools.any?
794
822
  client = apply_messages(client, resolved_messages) if resolved_messages.any?
@@ -891,35 +919,80 @@ module RubyLLM
891
919
 
892
920
  # Captures response metadata to the context
893
921
  #
894
- # @param response [RubyLLM::Message] The response
922
+ # When a tool returns RubyLLM::Tool::Halt, the response is a Halt
923
+ # instance with no token metadata. In that case we pull metadata from
924
+ # the last assistant message in the client's history.
925
+ #
926
+ # @param response [RubyLLM::Message, RubyLLM::Tool::Halt] The response
895
927
  # @param context [Pipeline::Context] The context
896
928
  def capture_response(response, context)
897
- context.input_tokens = response.input_tokens
898
- context.output_tokens = response.output_tokens
899
- context.model_used = response.model_id || model
900
- # finish_reason may not be available on all RubyLLM::Message versions
901
- context.finish_reason = response.respond_to?(:finish_reason) ? response.finish_reason : nil
929
+ is_halt = response.is_a?(RubyLLM::Tool::Halt)
930
+ metadata = is_halt ? last_assistant_message_from_client : response
902
931
 
903
- # Store tracked tool calls in context for instrumentation
904
- context[:tool_calls] = @tracked_tool_calls if @tracked_tool_calls.any?
932
+ if metadata
933
+ context.input_tokens = metadata.input_tokens if metadata.respond_to?(:input_tokens)
934
+ context.output_tokens = metadata.output_tokens if metadata.respond_to?(:output_tokens)
935
+ context.model_used = (metadata.respond_to?(:model_id) && metadata.model_id) || model
905
936
 
906
- # Capture Anthropic prompt caching metrics
907
- if response.respond_to?(:cached_tokens) && response.cached_tokens&.positive?
908
- context[:cached_tokens] = response.cached_tokens
937
+ # Capture Anthropic prompt caching metrics
938
+ if metadata.respond_to?(:cached_tokens) && metadata.cached_tokens&.positive?
939
+ context[:cached_tokens] = metadata.cached_tokens
940
+ end
941
+ if metadata.respond_to?(:cache_creation_tokens) && metadata.cache_creation_tokens&.positive?
942
+ context[:cache_creation_tokens] = metadata.cache_creation_tokens
943
+ end
944
+ else
945
+ context.model_used = model
909
946
  end
910
- if response.respond_to?(:cache_creation_tokens) && response.cache_creation_tokens&.positive?
911
- context[:cache_creation_tokens] = response.cache_creation_tokens
947
+
948
+ context.finish_reason = if is_halt
949
+ "halt"
950
+ elsif response.respond_to?(:finish_reason)
951
+ response.finish_reason
912
952
  end
913
953
 
914
- calculate_costs(response, context) if context.input_tokens
954
+ # Store tracked tool calls in context for instrumentation
955
+ context[:tool_calls] = @tracked_tool_calls if @tracked_tool_calls.any?
956
+
957
+ calculate_costs(metadata, context) if metadata && context.input_tokens
915
958
  end
916
959
 
917
- # Calculates costs for the response
960
+ # Finds the most recent assistant message with usage metadata in
961
+ # the active client's history. Used to recover token/model metadata
962
+ # when the LLM call short-circuits via Tool::Halt.
963
+ #
964
+ # @return [RubyLLM::Message, nil]
965
+ def last_assistant_message_from_client
966
+ messages = @client&.messages
967
+ return nil unless messages
968
+
969
+ messages.reverse_each.find do |m|
970
+ m.respond_to?(:role) && m.role == :assistant &&
971
+ m.respond_to?(:input_tokens) && m.input_tokens
972
+ end
973
+ end
974
+
975
+ # Calculates costs for the response.
976
+ #
977
+ # Providers often return dated model variants (e.g.
978
+ # "anthropic/claude-4.6-sonnet-20260217") that aren't in the
979
+ # RubyLLM::Models registry, while the agent is configured with a
980
+ # stable alias (e.g. "anthropic/claude-sonnet-4.6") that is. When the
981
+ # response's model_id misses, fall back to the agent's configured
982
+ # model so cost calculation still finds pricing.
983
+ #
984
+ # Text input/output are priced from the context's token counts. These
985
+ # reflect the final attempt's usage (a retry/fallback overwrites them per
986
+ # attempt); failed attempts that erred at the provider are typically not
987
+ # billed, so the final attempt is the charged one. On top of the text
988
+ # cost, cache reads/writes and reasoning tokens — which exist on the
989
+ # response and are billed at their own rates — are priced via RubyLLM's
990
+ # first-class cost helper (RubyLLM::Cost) and added in.
918
991
  #
919
992
  # @param response [RubyLLM::Message] The response
920
993
  # @param context [Pipeline::Context] The context
921
994
  def calculate_costs(response, context)
922
- model_info = find_model_info(response.model_id || model)
995
+ model_info = find_model_info(response.model_id) || find_model_info(model)
923
996
  return unless model_info
924
997
 
925
998
  input_tokens = context.input_tokens || 0
@@ -929,16 +1002,111 @@ module RubyLLM
929
1002
  output_price = model_info.pricing&.text_tokens&.output || 0
930
1003
 
931
1004
  context.input_cost = (input_tokens / 1_000_000.0) * input_price
932
- context.output_cost = (output_tokens / 1_000_000.0) * output_price
933
- context.total_cost = (context.input_cost + context.output_cost).round(6)
1005
+
1006
+ # Price cache/reasoning extras first so we know whether reasoning was
1007
+ # actually billed at the reasoning rate. Only then exclude those tokens
1008
+ # from the output charge — never subtract tokens that weren't charged
1009
+ # elsewhere, or a degraded cost helper would make reasoning vanish.
1010
+ extra = extra_token_costs(response, model_info, context)
1011
+ billable_output = output_tokens - reasoning_tokens_charged(response, context)
1012
+ context.output_cost = ([billable_output, 0].max / 1_000_000.0) * output_price
1013
+
1014
+ context.total_cost = (context.input_cost + context.output_cost + extra).round(6)
1015
+ end
1016
+
1017
+ # Number of reasoning (thinking) tokens that were actually charged at the
1018
+ # reasoning rate, recorded in the cost breakdown by +extra_token_costs+.
1019
+ #
1020
+ # Reasoning providers fold reasoning tokens into the reported
1021
+ # output_tokens, so when they are billed separately they must be removed
1022
+ # from the output-rate charge to avoid double billing. Returns 0 when no
1023
+ # reasoning was charged (non-reasoning model, or a degraded cost helper),
1024
+ # so reasoning tokens are never silently dropped from the output charge.
1025
+ #
1026
+ # @param response [Object] The response (RubyLLM::Message in production)
1027
+ # @param context [Pipeline::Context] The context
1028
+ # @return [Integer] Reasoning tokens to exclude from the output charge
1029
+ def reasoning_tokens_charged(response, context)
1030
+ breakdown = context[:cost_breakdown]
1031
+ return 0 unless breakdown.is_a?(Hash) && breakdown.key?(:thinking)
1032
+ return 0 unless response.respond_to?(:reasoning_tokens)
1033
+
1034
+ response.reasoning_tokens.to_i
1035
+ end
1036
+
1037
+ # Prices the non-text token components (cache reads/writes, reasoning)
1038
+ # that RubyLLM::Cost exposes on a response, records them in metadata for
1039
+ # visibility, and returns their sum to add on top of text input/output.
1040
+ #
1041
+ # Returns 0.0 for responses that don't expose cost (plain structs/mocks)
1042
+ # or when the registry lacks the relevant prices, so cache/reasoning
1043
+ # accuracy is additive and never regresses text pricing.
1044
+ #
1045
+ # @param response [Object] The response (RubyLLM::Message in production)
1046
+ # @param model_info [RubyLLM::Model::Info] Resolved pricing source
1047
+ # @param context [Pipeline::Context] The context
1048
+ # @return [Float] Combined cache + reasoning cost, or 0.0
1049
+ def extra_token_costs(response, model_info, context)
1050
+ cost = response_cost(response, model_info)
1051
+ return 0.0 unless cost
1052
+
1053
+ components = {
1054
+ cache_read: cost.cache_read,
1055
+ cache_write: cost.cache_write,
1056
+ thinking: cost.thinking
1057
+ }.compact.reject { |_, value| value.zero? }
1058
+ return 0.0 if components.empty?
1059
+
1060
+ # Round per component and sum the rounded values so the stored
1061
+ # breakdown reconciles exactly with the amount added to total_cost.
1062
+ breakdown = components.transform_values { |value| value.round(6) }
1063
+ context[:cost_breakdown] = breakdown
1064
+ breakdown.values.sum
1065
+ rescue => e
1066
+ # Non-standard pricing shapes can't price these components; degrade to
1067
+ # text-only rather than failing the cost calculation.
1068
+ log_cost_warning("extra_token_costs", e)
1069
+ 0.0
1070
+ end
1071
+
1072
+ # Returns a RubyLLM::Cost for the response, priced against the resolved
1073
+ # model_info (which may differ from the response's own dated model
1074
+ # variant). Returns nil for responses that don't expose cost — e.g.
1075
+ # simple structs/mocks in tests — so callers skip the extra components.
1076
+ #
1077
+ # @param response [Object] The response (RubyLLM::Message in production)
1078
+ # @param model_info [RubyLLM::Model::Info] Resolved pricing source
1079
+ # @return [RubyLLM::Cost, nil]
1080
+ def response_cost(response, model_info)
1081
+ return nil unless response.respond_to?(:cost)
1082
+
1083
+ response.cost(model: model_info)
1084
+ rescue => e
1085
+ log_cost_warning("response_cost", e)
1086
+ nil
1087
+ end
1088
+
1089
+ # Leaves a debug breadcrumb for a swallowed cost-calculation error.
1090
+ # Cost components are best-effort, so we degrade gracefully rather than
1091
+ # raise, but record why instead of failing silently. Logging itself must
1092
+ # never break cost handling.
1093
+ #
1094
+ # @param source [String] The method that degraded
1095
+ # @param error [Exception] The swallowed error
1096
+ def log_cost_warning(source, error)
1097
+ return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
1098
+
1099
+ Rails.logger.debug("[RubyLLM::Agents] #{source} skipped: #{error.class}: #{error.message}")
1100
+ rescue
1101
+ nil
934
1102
  end
935
1103
 
936
- # Finds model pricing info
1104
+ # Finds model pricing info.
937
1105
  #
938
1106
  # @param model_id [String] The model ID
939
1107
  # @return [Hash, nil] Model info with pricing
940
1108
  def find_model_info(model_id)
941
- return nil unless defined?(RubyLLM::Models)
1109
+ return nil unless defined?(RubyLLM::Models) && model_id
942
1110
 
943
1111
  RubyLLM::Models.find(model_id)
944
1112
  rescue
@@ -50,9 +50,26 @@ module RubyLLM
50
50
  # When false, executions are logged synchronously.
51
51
  # @return [Boolean] Enable async logging (default: true)
52
52
 
53
+ # @!attribute [rw] soft_purge_after
54
+ # How long to keep full execution details (prompts, responses, tool calls,
55
+ # attempts) before the retention job destroys them. The executions row is
56
+ # preserved so cost, token, and latency analytics remain intact. A
57
+ # truncated copy of the error message is stamped into metadata for
58
+ # long-term error-rate trend analysis.
59
+ # Set to nil to disable soft purging.
60
+ # @return [ActiveSupport::Duration, nil] Soft-purge window (default: 30.days)
61
+
62
+ # @!attribute [rw] hard_purge_after
63
+ # How long to keep the executions row itself before the retention job
64
+ # destroys it entirely. Must be greater than soft_purge_after when both
65
+ # are set. Set to nil to retain executions indefinitely.
66
+ # @return [ActiveSupport::Duration, nil] Hard-purge window (default: 365.days)
67
+
53
68
  # @!attribute [rw] retention_period
54
- # How long to retain execution records before cleanup.
55
- # @return [ActiveSupport::Duration] Retention period (default: 30.days)
69
+ # Deprecated. Alias for hard_purge_after, kept for backward compatibility.
70
+ # Prefer configuring soft_purge_after and hard_purge_after explicitly.
71
+ # @return [ActiveSupport::Duration, nil] Hard-purge window
72
+ # @deprecated Use {#hard_purge_after} instead.
56
73
 
57
74
  # @!attribute [rw] anomaly_cost_threshold
58
75
  # Cost threshold in dollars that triggers anomaly logging.
@@ -356,10 +373,18 @@ module RubyLLM
356
373
  gemini_api_base
357
374
  gpustack_api_base
358
375
  ollama_api_base
376
+ bedrock_api_base
377
+ mistral_api_base
378
+ perplexity_api_base
379
+ vertexai_api_base
359
380
  vertexai_project_id
360
381
  vertexai_location
382
+ xai_api_base
361
383
  request_timeout
362
384
  max_retries
385
+ faraday_adapter
386
+ deprecation_behavior
387
+ tool_concurrency
363
388
  ].freeze
364
389
 
365
390
  FORWARDED_RUBY_LLM_ATTRIBUTES.each do |attr|
@@ -379,7 +404,6 @@ module RubyLLM
379
404
  # Attributes without validation (simple accessors)
380
405
  attr_accessor :default_model,
381
406
  :async_logging,
382
- :retention_period,
383
407
  :dashboard_parent_controller,
384
408
  :basic_auth_username,
385
409
  :basic_auth_password,
@@ -464,7 +488,9 @@ module RubyLLM
464
488
  :tenant_resolver,
465
489
  :tenant_config_resolver,
466
490
  :default_retries,
467
- :budgets
491
+ :budgets,
492
+ :soft_purge_after,
493
+ :hard_purge_after
468
494
 
469
495
  attr_writer :cache_store
470
496
 
@@ -594,6 +620,44 @@ module RubyLLM
594
620
  @default_embedding_batch_size = value
595
621
  end
596
622
 
623
+ # Sets soft_purge_after with validation
624
+ #
625
+ # @param value [ActiveSupport::Duration, Numeric, nil] Window or nil to disable
626
+ # @raise [ArgumentError] If value is not a Duration/Numeric or nil, or is negative
627
+ def soft_purge_after=(value)
628
+ validate_purge_window!(:soft_purge_after, value)
629
+ @soft_purge_after = value
630
+ validate_purge_ordering!
631
+ end
632
+
633
+ # Sets hard_purge_after with validation
634
+ #
635
+ # @param value [ActiveSupport::Duration, Numeric, nil] Window or nil to disable
636
+ # @raise [ArgumentError] If value is not a Duration/Numeric or nil, or is negative
637
+ def hard_purge_after=(value)
638
+ validate_purge_window!(:hard_purge_after, value)
639
+ @hard_purge_after = value
640
+ validate_purge_ordering!
641
+ end
642
+
643
+ # Deprecated alias for hard_purge_after.
644
+ #
645
+ # @return [ActiveSupport::Duration, nil]
646
+ # @deprecated Use {#hard_purge_after} instead.
647
+ def retention_period
648
+ hard_purge_after
649
+ end
650
+
651
+ # Deprecated setter for retention_period (maps to hard_purge_after).
652
+ #
653
+ # @param value [ActiveSupport::Duration, Numeric, nil]
654
+ # @deprecated Use {#hard_purge_after=} instead.
655
+ def retention_period=(value)
656
+ warn "[DEPRECATION] RubyLLM::Agents config.retention_period is deprecated. " \
657
+ "Use config.hard_purge_after instead (and set config.soft_purge_after for two-tier retention)."
658
+ self.hard_purge_after = value
659
+ end
660
+
597
661
  # Sets default_embedding_dimensions with validation
598
662
  #
599
663
  # @param value [Integer, nil] Dimensions (must be nil or > 0)
@@ -616,7 +680,8 @@ module RubyLLM
616
680
  @default_timeout = 60
617
681
  @cache_store = nil
618
682
  @async_logging = true
619
- @retention_period = 30.days
683
+ @soft_purge_after = 30.days
684
+ @hard_purge_after = 365.days
620
685
  @anomaly_cost_threshold = 5.00
621
686
  @anomaly_duration_threshold = 10_000
622
687
  @dashboard_auth = ->(_controller) { true }
@@ -960,7 +1025,8 @@ module RubyLLM
960
1025
  },
961
1026
  logging: {
962
1027
  async_logging: async_logging,
963
- retention_period: retention_period,
1028
+ soft_purge_after: soft_purge_after,
1029
+ hard_purge_after: hard_purge_after,
964
1030
  job_retry_attempts: job_retry_attempts,
965
1031
  track_executions: track_executions,
966
1032
  track_cache_hits: track_cache_hits,
@@ -1161,6 +1227,30 @@ module RubyLLM
1161
1227
  raise ArgumentError, "budgets[:enforcement] must be :none, :soft, or :hard"
1162
1228
  end
1163
1229
  end
1230
+
1231
+ # Validates a purge-window value (Duration, Numeric seconds, or nil).
1232
+ #
1233
+ # @param attr [Symbol] Attribute name for error messages
1234
+ # @param value [ActiveSupport::Duration, Numeric, nil] Value to validate
1235
+ # @raise [ArgumentError] If value is neither nil nor a non-negative duration/number
1236
+ def validate_purge_window!(attr, value)
1237
+ return if value.nil?
1238
+ return if value.is_a?(ActiveSupport::Duration) && value.to_i >= 0
1239
+ return if value.is_a?(Numeric) && value >= 0
1240
+
1241
+ raise ArgumentError, "#{attr} must be an ActiveSupport::Duration, non-negative Numeric, or nil"
1242
+ end
1243
+
1244
+ # Ensures soft_purge_after is strictly less than hard_purge_after when both are set.
1245
+ #
1246
+ # @raise [ArgumentError] If ordering is violated
1247
+ def validate_purge_ordering!
1248
+ return if @soft_purge_after.nil? || @hard_purge_after.nil?
1249
+ return if @soft_purge_after.to_i < @hard_purge_after.to_i
1250
+
1251
+ raise ArgumentError, "soft_purge_after (#{@soft_purge_after.inspect}) must be less than " \
1252
+ "hard_purge_after (#{@hard_purge_after.inspect})"
1253
+ end
1164
1254
  end
1165
1255
  end
1166
1256
  end
@@ -113,6 +113,11 @@ module RubyLLM
113
113
 
114
114
  # Auto-create tenant record callback
115
115
  after_create :create_default_llm_tenant if llm_tenant_options[:budget]
116
+
117
+ # Keep the denormalized Tenant#name column fresh so the dashboard's
118
+ # SQL search/sort by name keeps working for linked tenants. Display
119
+ # already resolves the name live, so this only powers SQL.
120
+ after_update :sync_llm_tenant_name
116
121
  end
117
122
 
118
123
  private
@@ -143,6 +148,17 @@ module RubyLLM
143
148
  send(id_method).to_s
144
149
  end
145
150
 
151
+ # Returns this model's tenant display name, resolved live from the
152
+ # configured name method (`llm_tenant name: :company_name`). Resolving on
153
+ # read means the tenant always reflects the current value instead of the
154
+ # snapshot taken when its Tenant record was first created.
155
+ #
156
+ # @return [String] The current display name
157
+ def llm_tenant_name
158
+ name_method = self.class.llm_tenant_options[:name] || :to_s
159
+ send(name_method).to_s
160
+ end
161
+
146
162
  # Returns API keys resolved from the DSL configuration
147
163
  #
148
164
  # Maps provider names (e.g., :openai, :anthropic) to their resolved values
@@ -354,6 +370,30 @@ module RubyLLM
354
370
  tenant.tenant_record = self
355
371
  tenant.save!
356
372
  end
373
+
374
+ # Pushes the current name into the linked Tenant row when the source
375
+ # column changed, keeping the denormalized copy fresh for the dashboard's
376
+ # SQL search/sort. Display already resolves live, so this is best-effort
377
+ # and never raises. Only runs when the name is backed by a column we can
378
+ # detect a change on (method-based names are skipped — display stays
379
+ # correct via live resolution, only SQL search/sort may lag for those).
380
+ #
381
+ # @return [void]
382
+ def sync_llm_tenant_name
383
+ name_method = self.class.llm_tenant_options[:name]
384
+ return unless name_method
385
+
386
+ change_predicate = "saved_change_to_#{name_method}?"
387
+ return unless respond_to?(change_predicate) && public_send(change_predicate)
388
+
389
+ record = llm_tenant_record
390
+ return unless record&.persisted?
391
+ return if record.read_attribute(:name) == llm_tenant_name
392
+
393
+ record.update_column(:name, llm_tenant_name)
394
+ rescue
395
+ nil
396
+ end
357
397
  end
358
398
  end
359
399
  end
@@ -4,6 +4,6 @@ module RubyLLM
4
4
  module Agents
5
5
  # Current version of the RubyLLM::Agents gem
6
6
  # @return [String] Semantic version string
7
- VERSION = "3.12.0"
7
+ VERSION = "3.14.0"
8
8
  end
9
9
  end
@@ -95,12 +95,13 @@ module RubyLLM
95
95
  def record_failed_execution(error, started_at)
96
96
  return unless defined?(RubyLLM::Agents::Execution)
97
97
 
98
- execution_data = build_failed_execution_data(error, started_at)
98
+ execution_data, detail_data = build_failed_execution_data(error, started_at)
99
99
 
100
100
  if config.async_logging && defined?(ExecutionLoggerJob)
101
- ExecutionLoggerJob.perform_later(execution_data)
101
+ ExecutionLoggerJob.perform_later(execution_data.merge(_detail_data: detail_data))
102
102
  else
103
- RubyLLM::Agents::Execution.create!(execution_data)
103
+ execution = RubyLLM::Agents::Execution.create!(execution_data)
104
+ execution.create_detail!(detail_data) if detail_data.present?
104
105
  end
105
106
  rescue => e
106
107
  Rails.logger.error("[RubyLLM::Agents] Failed to record failed #{execution_type} execution: #{e.message}") if defined?(Rails)
@@ -124,7 +125,7 @@ module RubyLLM
124
125
  end
125
126
 
126
127
  def build_failed_execution_data(error, started_at)
127
- {
128
+ execution_data = {
128
129
  agent_type: self.class.name,
129
130
  tenant_id: @tenant_id,
130
131
  execution_type: execution_type,
@@ -137,9 +138,12 @@ module RubyLLM
137
138
  started_at: started_at,
138
139
  completed_at: Time.current,
139
140
  error_class: error.class.name,
140
- error_message: error.message.truncate(1000),
141
141
  metadata: {}
142
142
  }
143
+
144
+ detail_data = {error_message: error.message.to_s.truncate(1000)}
145
+
146
+ [execution_data, detail_data]
143
147
  end
144
148
 
145
149
  def build_metadata(result)
@@ -37,8 +37,10 @@ module RubyLLM
37
37
  execution.create_detail!(detail_data)
38
38
  end
39
39
 
40
- # Calculate costs if token data is available
41
- if execution.input_tokens && execution.output_tokens
40
+ # Calculate costs if token data is available. Skip when the pipeline
41
+ # already supplied an accurate total (RubyLLM::Cost, which prices cache
42
+ # and reasoning tokens) so we don't downgrade it to text-only pricing.
43
+ if execution.input_tokens && execution.output_tokens && !execution.total_cost&.positive?
42
44
  execution.calculate_costs!
43
45
  execution.save!
44
46
  end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Agents
5
+ # Background job that enforces two-tier data retention on execution records.
6
+ #
7
+ # Soft pass: for executions older than {Configuration#soft_purge_after},
8
+ # destroys the associated execution_details and tool_executions rows,
9
+ # preserves a truncated copy of error_message in metadata, and stamps
10
+ # metadata["soft_purged_at"] so the dashboard can surface the state and
11
+ # the pass stays idempotent.
12
+ #
13
+ # Hard pass: for executions older than {Configuration#hard_purge_after},
14
+ # destroys the executions row itself. The foreign-key cascade removes
15
+ # any remaining details or tool_executions.
16
+ #
17
+ # Either tier may be set to nil in configuration to skip that pass.
18
+ #
19
+ # @example Enqueue manually
20
+ # RubyLLM::Agents::RetentionJob.perform_later
21
+ #
22
+ # @example Schedule daily (whenever gem)
23
+ # every 1.day, at: "3:00 am" do
24
+ # runner "RubyLLM::Agents::RetentionJob.perform_later"
25
+ # end
26
+ #
27
+ # @api public
28
+ class RetentionJob < ActiveJob::Base
29
+ queue_as :default
30
+
31
+ ERROR_MESSAGE_MAX_LENGTH = 500
32
+ BATCH_SIZE = 500
33
+
34
+ # Runs the soft and hard retention passes based on current configuration.
35
+ #
36
+ # @return [Hash] counts of rows affected in each pass
37
+ def perform
38
+ {
39
+ soft_purged: soft_purge,
40
+ hard_purged: hard_purge
41
+ }
42
+ end
43
+
44
+ private
45
+
46
+ # Destroys detail + tool_execution rows for executions older than the
47
+ # soft-purge window that have not already been soft-purged. Stamps
48
+ # metadata with the purge timestamp and preserves a truncated
49
+ # error_message for long-term error-rate analytics.
50
+ #
51
+ # The "already purged" filter runs in Ruby rather than SQL because
52
+ # JSON key-exists operators differ across SQLite/Postgres/MySQL; this
53
+ # keeps the job adapter-agnostic. We batch to bound memory.
54
+ def soft_purge
55
+ window = RubyLLM::Agents.configuration.soft_purge_after
56
+ return 0 if window.nil?
57
+
58
+ cutoff = window.ago
59
+ count = 0
60
+
61
+ Execution
62
+ .where("created_at < ?", cutoff)
63
+ .includes(:detail)
64
+ .find_in_batches(batch_size: BATCH_SIZE) do |batch|
65
+ batch.each do |execution|
66
+ next if execution.soft_purged?
67
+
68
+ purge_one(execution)
69
+ count += 1
70
+ end
71
+ end
72
+
73
+ count
74
+ end
75
+
76
+ # Destroys executions (and everything cascaded from them) older than
77
+ # the hard-purge window.
78
+ def hard_purge
79
+ window = RubyLLM::Agents.configuration.hard_purge_after
80
+ return 0 if window.nil?
81
+
82
+ cutoff = window.ago
83
+ total = 0
84
+
85
+ Execution.where("created_at < ?", cutoff).in_batches(of: BATCH_SIZE) do |batch|
86
+ total += batch.destroy_all.size
87
+ end
88
+
89
+ total
90
+ end
91
+
92
+ # Performs the soft purge for a single execution.
93
+ def purge_one(execution)
94
+ preserved_error = preserved_error_message(execution)
95
+
96
+ Execution.transaction do
97
+ execution.detail&.destroy
98
+ execution.tool_executions.destroy_all
99
+
100
+ new_metadata = (execution.metadata || {}).merge(
101
+ "soft_purged_at" => Time.current.iso8601
102
+ )
103
+ new_metadata["error_message"] = preserved_error if preserved_error
104
+
105
+ execution.update_columns(metadata: new_metadata)
106
+ end
107
+ end
108
+
109
+ # Returns a truncated copy of the detail's error_message, or nil.
110
+ def preserved_error_message(execution)
111
+ raw = execution.detail&.error_message
112
+ return nil if raw.blank?
113
+
114
+ raw.to_s.truncate(ERROR_MESSAGE_MAX_LENGTH)
115
+ end
116
+ end
117
+ end
118
+ end