ruby-skill-bench 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +166 -35
  3. data/docs/architecture.md +3 -1
  4. data/docs/first-eval-guide.md +7 -7
  5. data/docs/testing-guide.md +1 -1
  6. data/lib/skill_bench/agent/react_agent/loop_runner.rb +44 -9
  7. data/lib/skill_bench/agent/react_agent/step.rb +7 -1
  8. data/lib/skill_bench/cli/batch_result_printer.rb +45 -0
  9. data/lib/skill_bench/cli/eval/eval_options.rb +4 -0
  10. data/lib/skill_bench/cli/help_printer.rb +10 -2
  11. data/lib/skill_bench/cli/init_command.rb +2 -1
  12. data/lib/skill_bench/cli/result_printer.rb +1 -1
  13. data/lib/skill_bench/cli/run_command.rb +47 -9
  14. data/lib/skill_bench/cli/validate_command.rb +242 -0
  15. data/lib/skill_bench/cli.rb +3 -0
  16. data/lib/skill_bench/client.rb +43 -1
  17. data/lib/skill_bench/clients/all.rb +2 -0
  18. data/lib/skill_bench/clients/base_client.rb +12 -1
  19. data/lib/skill_bench/clients/base_url_validator.rb +105 -0
  20. data/lib/skill_bench/clients/provider_config.rb +34 -1
  21. data/lib/skill_bench/clients/provider_schemas.rb +4 -0
  22. data/lib/skill_bench/clients/providers/mistral.rb +47 -0
  23. data/lib/skill_bench/commands/init.rb +5 -0
  24. data/lib/skill_bench/commands/skill_new.rb +3 -1
  25. data/lib/skill_bench/config/applier.rb +2 -0
  26. data/lib/skill_bench/config/defaults.rb +2 -0
  27. data/lib/skill_bench/config/facade_readers.rb +7 -0
  28. data/lib/skill_bench/config/facade_writers.rb +17 -0
  29. data/lib/skill_bench/config/json_loader.rb +1 -1
  30. data/lib/skill_bench/config/store.rb +29 -0
  31. data/lib/skill_bench/config.rb +18 -0
  32. data/lib/skill_bench/evaluation/runner.rb +20 -3
  33. data/lib/skill_bench/execution/context_hydrator.rb +52 -11
  34. data/lib/skill_bench/execution/sandbox.rb +58 -11
  35. data/lib/skill_bench/judge/judge.rb +4 -0
  36. data/lib/skill_bench/judge/prompt.rb +42 -6
  37. data/lib/skill_bench/models/config.rb +32 -0
  38. data/lib/skill_bench/output_formatter.rb +60 -1
  39. data/lib/skill_bench/package_verifier.rb +1 -1
  40. data/lib/skill_bench/rails/skill_templates.rb +19 -5
  41. data/lib/skill_bench/services/agent_spawner_service.rb +7 -3
  42. data/lib/skill_bench/services/batch_runner_service.rb +111 -0
  43. data/lib/skill_bench/services/compare_option_parser.rb +1 -0
  44. data/lib/skill_bench/services/cost_calculator.rb +91 -0
  45. data/lib/skill_bench/services/html_formatter.rb +289 -0
  46. data/lib/skill_bench/services/json_formatter.rb +19 -1
  47. data/lib/skill_bench/services/junit_formatter.rb +74 -24
  48. data/lib/skill_bench/services/provider_resolver.rb +5 -2
  49. data/lib/skill_bench/services/response_cache.rb +130 -0
  50. data/lib/skill_bench/services/runner_service.rb +88 -4
  51. data/lib/skill_bench/services/summary_formatter.rb +90 -0
  52. data/lib/skill_bench/services/template_registry.rb +43 -9
  53. data/lib/skill_bench/services/trend_recorder_service.rb +29 -2
  54. data/lib/skill_bench/tools/registry.rb +29 -3
  55. data/lib/skill_bench/tools/run_command.rb +171 -19
  56. data/lib/skill_bench/trend_tracker/persistence.rb +27 -10
  57. data/lib/skill_bench/trend_tracker.rb +5 -5
  58. data/lib/skill_bench/version.rb +1 -1
  59. data/lib/skill_bench.rb +2 -3
  60. metadata +17 -36
@@ -24,6 +24,30 @@ module SkillBench
24
24
  new(raw_data)
25
25
  end
26
26
 
27
+ # Returns the configuration for a path, memoizing the parse per run.
28
+ #
29
+ # Hot paths such as {SkillBench::Services::ProviderResolver} resolve the
30
+ # provider on every run, yet skill-bench.json is stable within a single
31
+ # run. The parse is cached per absolute path and invalidated when the
32
+ # file's mtime changes, so the file is parsed at most once per run while
33
+ # a rewritten file (for example between tests) is still re-read. Reset by
34
+ # setting the @loaded ivar to nil.
35
+ #
36
+ # @param path [String] Path to config file (default: skill-bench.json)
37
+ # @return [SkillBench::Models::Config] Memoized config instance
38
+ # @raise [Errno::ENOENT] if config file not found
39
+ def self.loaded(path = 'skill-bench.json')
40
+ key = File.expand_path(path)
41
+ mtime = File.mtime(key)
42
+ cache = (@loaded ||= {})
43
+ entry = cache[key]
44
+ return entry[:config] if entry && entry[:mtime] == mtime
45
+
46
+ config = load(path)
47
+ cache[key] = { mtime: mtime, config: config }
48
+ config
49
+ end
50
+
27
51
  # Returns the configured provider name
28
52
  # @return [String, nil] Provider name
29
53
  def provider_name
@@ -36,6 +60,14 @@ module SkillBench
36
60
  @data[:config] || {}
37
61
  end
38
62
 
63
+ # Indicates whether the config explicitly selects the built-in mock
64
+ # provider, as opposed to having no provider configured at all.
65
+ #
66
+ # @return [Boolean] true when the configured provider is 'mock'
67
+ def mock?
68
+ provider_name == 'mock'
69
+ end
70
+
39
71
  # Returns max execution time
40
72
  # @return [Integer] Max execution time in seconds
41
73
  def max_execution_time
@@ -5,6 +5,7 @@ require_relative 'services/delta_table_formatter'
5
5
  require_relative 'services/feedback_generator'
6
6
  require_relative 'services/json_formatter'
7
7
  require_relative 'services/junit_formatter'
8
+ require_relative 'services/html_formatter'
8
9
 
9
10
  module SkillBench
10
11
  # Handles formatting output for different use cases (human, CI, etc.).
@@ -14,7 +15,7 @@ module SkillBench
14
15
  # Format the eval result for output.
15
16
  #
16
17
  # @param result [Hash] Eval result with keys like :eval_name, :pass, :score, etc.
17
- # @param format [Symbol] Output format (:human, :json, :junit)
18
+ # @param format [Symbol] Output format (:human, :json, :junit, :html)
18
19
  # @return [String] Formatted output string
19
20
  def self.format(result, format: :human)
20
21
  case format
@@ -22,6 +23,8 @@ module SkillBench
22
23
  Services::JsonFormatter.format(result)
23
24
  when :junit
24
25
  Services::JUnitFormatter.format(result)
26
+ when :html
27
+ Services::HtmlFormatter.format(result)
25
28
  else
26
29
  format_human(result)
27
30
  end
@@ -39,6 +42,48 @@ module SkillBench
39
42
  report&.verdict ? 0 : 1
40
43
  end
41
44
 
45
+ # Format an aggregate batch result for human output.
46
+ #
47
+ # Renders one PASS/FAIL line per eval plus a final summary line.
48
+ #
49
+ # @param aggregate [Hash] Aggregate envelope with :results and :summary.
50
+ # @return [String] Human-readable batch summary.
51
+ def self.format_batch(aggregate)
52
+ lines = aggregate[:results].map { |result| batch_result_line(result) }
53
+ lines << ''
54
+ lines << batch_summary_line(aggregate[:summary])
55
+ lines.join("\n")
56
+ end
57
+
58
+ # Determine the exit code for an aggregate batch result.
59
+ #
60
+ # @param aggregate [Hash] Aggregate envelope with a :summary.
61
+ # @return [Integer] 0 when every eval passed, 1 when any failed.
62
+ def self.batch_exit_code(aggregate)
63
+ aggregate.dig(:summary, :failed).to_i.positive? ? 1 : 0
64
+ end
65
+
66
+ # Builds a single PASS/FAIL line for one eval result.
67
+ #
68
+ # @param result [Hash] A single-eval result envelope.
69
+ # @return [String] A formatted verdict line.
70
+ def self.batch_result_line(result)
71
+ status = exit_code(result).zero? ? 'PASS' : 'FAIL'
72
+ line = "#{status} #{result[:eval_name]}"
73
+ error = result.dig(:response, :error, :message)
74
+ error ? "#{line} — #{error}" : line
75
+ end
76
+ private_class_method :batch_result_line
77
+
78
+ # Builds the trailing summary line for a batch run.
79
+ #
80
+ # @param summary [Hash] Summary with :passed, :failed and :total counts.
81
+ # @return [String] A formatted summary line.
82
+ def self.batch_summary_line(summary)
83
+ "Summary: #{summary[:passed]} passed / #{summary[:failed]} failed (#{summary[:total]} total)"
84
+ end
85
+ private_class_method :batch_summary_line
86
+
42
87
  # Format result as human-readable text.
43
88
  #
44
89
  # @param result [Hash] Eval result in old or new format.
@@ -93,6 +138,7 @@ module SkillBench
93
138
  " Eval: #{result[:eval_name] || ''}",
94
139
  " Skill: #{result[:skill_name] || ''}",
95
140
  " Provider: #{result[:provider_name] || ''}",
141
+ build_usage_line(result),
96
142
  ('═' * 55),
97
143
  ''
98
144
  ]
@@ -110,6 +156,19 @@ module SkillBench
110
156
  end
111
157
  private_class_method :format_delta_report
112
158
 
159
+ # Builds the token/cost summary line for the report header.
160
+ #
161
+ # @param result [Hash] Eval result envelope; reads :tokens and :cost.
162
+ # @return [String] A formatted "Tokens / Est. Cost" line.
163
+ def self.build_usage_line(result)
164
+ tokens = result[:tokens] || {}
165
+ total = tokens[:total_tokens] || tokens['total_tokens'] || 0
166
+ cost = result[:cost]
167
+ cost_label = cost ? Kernel.format('$%.4f', cost) : '—'
168
+ " Tokens: #{total} | Est. Cost: #{cost_label}"
169
+ end
170
+ private_class_method :build_usage_line
171
+
113
172
  # Builds iteration timeline lines from the result response.
114
173
  #
115
174
  # @param result [Hash] Eval result envelope.
@@ -25,7 +25,7 @@ module SkillBench
25
25
  lib/skill_bench/config/json_loader.rb
26
26
  lib/skill_bench/config/store.rb
27
27
  lib/skill_bench/package_verifier.rb
28
- lib/skill_bench/source_path_resolver.rb
28
+ lib/skill_bench/execution/source_path_resolver.rb
29
29
  lib/skill_bench/runner.rb
30
30
  ].freeze
31
31
 
@@ -1,16 +1,30 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'active_support/inflector'
4
-
5
3
  module SkillBench
6
4
  module Rails
7
5
  # Generates Rails-specific skill templates
8
6
  class SkillTemplates
7
+ # Convert a snake_case or kebab-case name to CamelCase.
8
+ #
9
+ # Replaces ActiveSupport's +String#camelize+ for the scaffold inputs used
10
+ # here: it splits on +_+ and +-+ separators, upcases the first letter of
11
+ # each segment, and preserves any segment that is already CamelCase.
12
+ #
13
+ # @example
14
+ # SkillTemplates.camelize('user_creator') # => "UserCreator"
15
+ # SkillTemplates.camelize('order-service') # => "OrderService"
16
+ # SkillTemplates.camelize('UserCreator') # => "UserCreator"
17
+ # @param name [String] snake_case, kebab-case, or already-CamelCase name
18
+ # @return [String] CamelCase name
19
+ def self.camelize(name)
20
+ name.split(/[-_]/).map { |segment| segment.empty? ? segment : segment[0].upcase + segment[1..] }.join
21
+ end
22
+
9
23
  # Generate a service object template
10
24
  # @param name [String] Service name (e.g., 'my_service' or 'my-service')
11
25
  # @return [String] Service object Ruby class
12
26
  def self.service_object(name)
13
- class_name = name.split(/[-_]/).map(&:capitalize).join
27
+ class_name = camelize(name)
14
28
  <<~RUBY
15
29
  # frozen_string_literal: true
16
30
 
@@ -43,7 +57,7 @@ module SkillBench
43
57
  # @param name [String] Concern name (e.g., 'my_concern')
44
58
  # @return [String] Concern module
45
59
  def self.concern(name)
46
- module_name = name.camelize
60
+ module_name = camelize(name)
47
61
  <<~RUBY
48
62
  # frozen_string_literal: true
49
63
 
@@ -67,7 +81,7 @@ module SkillBench
67
81
  # @param name [String] Model name (e.g., 'my_model')
68
82
  # @return [String] ActiveRecord model class
69
83
  def self.active_record_model(name)
70
- class_name = name.camelize
84
+ class_name = camelize(name)
71
85
  <<~RUBY
72
86
  # frozen_string_literal: true
73
87
 
@@ -7,6 +7,9 @@ module SkillBench
7
7
  module Services
8
8
  # Spawns and executes LLM agents for evaluation.
9
9
  class AgentSpawnerService
10
+ # Zeroed token usage used when a run produces no usage data (e.g. mock, rescue).
11
+ EMPTY_USAGE = { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }.freeze
12
+
10
13
  # Spawns the LLM agent with the given system prompt.
11
14
  #
12
15
  # @param evaluation [SkillBench::Models::Eval] The eval being run
@@ -33,7 +36,7 @@ module SkillBench
33
36
  #
34
37
  # @return [Hash] Agent response with result, status, runtime, usage, raw_response, iterations
35
38
  def call
36
- return { result: 'mock result', status: :success, iterations: [] } if @provider.name == 'mock'
39
+ return { result: 'mock result', status: :success, iterations: [], usage: EMPTY_USAGE } if @provider.name == 'mock'
37
40
 
38
41
  client_params = build_client_params
39
42
  max_iterations = @config&.[](:max_iterations) || @config&.[]('max_iterations') || 25
@@ -63,6 +66,7 @@ module SkillBench
63
66
  final_answer = agent_result.dig(:response, :content) || ''
64
67
  diff = Execution::Sandbox.capture_diff(sandbox.path)
65
68
  iterations = agent_result.dig(:response, :iterations) || []
69
+ usage = agent_result.dig(:response, :usage) || EMPTY_USAGE
66
70
 
67
71
  output = [final_answer, diff].reject(&:empty?).join("\n\n")
68
72
 
@@ -70,7 +74,7 @@ module SkillBench
70
74
  result: output,
71
75
  status: status,
72
76
  runtime: @provider.runtime,
73
- usage: {},
77
+ usage: usage,
74
78
  raw_response: agent_result,
75
79
  iterations: iterations
76
80
  }
@@ -80,7 +84,7 @@ module SkillBench
80
84
  result: "Error: #{e.message}",
81
85
  status: :error,
82
86
  runtime: @provider.runtime,
83
- usage: {},
87
+ usage: EMPTY_USAGE,
84
88
  raw_response: { error: e.message, backtrace: e.backtrace },
85
89
  iterations: []
86
90
  }
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+ require 'parallel'
5
+ require_relative 'runner_service'
6
+ require_relative '../output_formatter'
7
+ require_relative '../runner'
8
+
9
+ module SkillBench
10
+ module Services
11
+ # Orchestrates running many evals in a single batch.
12
+ #
13
+ # Discovers every eval under a target directory and runs
14
+ # {RunnerService} over each, returning an aggregate envelope with
15
+ # per-eval results and a pass/fail summary.
16
+ #
17
+ # Discovery reuses {SkillBench::Runner.discover_task_dirs} but never
18
+ # routes through the deprecated {SkillBench::Task::Evaluator}: each eval
19
+ # is executed by the supported {RunnerService}.
20
+ class BatchRunnerService
21
+ # Default directory scanned for evals when none is supplied.
22
+ DEFAULT_EVALS_DIR = 'evals'
23
+
24
+ # Default batch-level thread count.
25
+ #
26
+ # Each {RunnerService.call} already runs its baseline and context
27
+ # agents concurrently (#26), so this is kept modest to bound nested
28
+ # thread usage (batch threads x per-eval threads).
29
+ DEFAULT_THREADS = 2
30
+
31
+ # Runs every eval discovered under +evals_dir+.
32
+ #
33
+ # @param skill_names [Array<String>] Names of the skills to apply to every eval
34
+ # @param evals_dir [String] Directory to scan for evals
35
+ # @param pack [String, nil] Optional pack name for registry-based skill resolution
36
+ # @param registry_manifest [String, nil] Optional path to registry.json manifest
37
+ # @param threads [Integer] Batch-level thread count
38
+ # @return [Hash] Aggregate envelope with :results and :summary
39
+ # @raise [ArgumentError] when no evals are found under +evals_dir+
40
+ def self.call(skill_names:, evals_dir: DEFAULT_EVALS_DIR, pack: nil, registry_manifest: nil, threads: DEFAULT_THREADS)
41
+ new(
42
+ skill_names: skill_names,
43
+ evals_dir: evals_dir,
44
+ pack: pack,
45
+ registry_manifest: registry_manifest,
46
+ threads: threads
47
+ ).call
48
+ end
49
+
50
+ # @param skill_names [Array<String>] Names of the skills
51
+ # @param evals_dir [String] Directory to scan for evals
52
+ # @param pack [String, nil] Optional pack name
53
+ # @param registry_manifest [String, nil] Optional registry.json path
54
+ # @param threads [Integer] Batch-level thread count
55
+ def initialize(skill_names:, evals_dir:, pack:, registry_manifest:, threads:)
56
+ @skill_names = skill_names
57
+ @evals_dir = evals_dir
58
+ @pack = pack
59
+ @registry_manifest = registry_manifest
60
+ @threads = threads
61
+ end
62
+
63
+ # Discovers the target evals and runs each through {RunnerService}.
64
+ #
65
+ # @return [Hash] Aggregate envelope with :results and :summary
66
+ # @raise [ArgumentError] when no evals are found under the directory
67
+ def call
68
+ eval_dirs = discover_eval_dirs
69
+ raise ArgumentError, "No evals found under #{evals_dir}" if eval_dirs.empty?
70
+
71
+ results = run_all(eval_dirs)
72
+ { results: results, summary: summarize(results) }
73
+ end
74
+
75
+ private
76
+
77
+ attr_reader :skill_names, :evals_dir, :pack, :registry_manifest, :threads
78
+
79
+ # Finds every eval directory under the configured root.
80
+ #
81
+ # @return [Array<Pathname>] Directories that contain a task.md
82
+ def discover_eval_dirs
83
+ SkillBench::Runner.discover_task_dirs(Pathname.new(evals_dir))
84
+ end
85
+
86
+ # Runs every eval directory through {RunnerService} concurrently.
87
+ #
88
+ # @param eval_dirs [Array<Pathname>] Discovered eval directories
89
+ # @return [Array<Hash>] Per-eval RunnerService results
90
+ def run_all(eval_dirs)
91
+ Parallel.map(eval_dirs, in_threads: threads) do |eval_dir|
92
+ RunnerService.call(
93
+ eval_name: eval_dir.to_s,
94
+ skill_names: skill_names,
95
+ pack: pack,
96
+ registry_manifest: registry_manifest
97
+ )
98
+ end
99
+ end
100
+
101
+ # Tallies pass/fail counts, reusing the single-eval exit-code logic.
102
+ #
103
+ # @param results [Array<Hash>] Per-eval results
104
+ # @return [Hash] Summary with :total, :passed and :failed counts
105
+ def summarize(results)
106
+ passed = results.count { |result| SkillBench::OutputFormatter.exit_code(result).zero? }
107
+ { total: results.size, passed: passed, failed: results.size - passed }
108
+ end
109
+ end
110
+ end
111
+ end
@@ -44,6 +44,7 @@ module SkillBench
44
44
  opts.on('--variant-b SPEC', 'Second variant (e.g., "pack:hanami" or "/path/to/skill")') { |v| options[:variant_b] = v }
45
45
  opts.on('--eval PATH', 'Path to the eval directory') { |v| options[:eval] = v }
46
46
  opts.on('--format FORMAT', 'Output format (human, json)') { |v| options[:format] = v.to_sym }
47
+ opts.on('--cache', 'Enable content-addressed response caching') { ENV['SKILL_BENCH_CACHE'] = '1' }
47
48
  opts.on('-h', '--help', 'Prints this help') do
48
49
  puts opts
49
50
  raise SkillBench::HelpRequested
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SkillBench
4
+ module Services
5
+ # Estimates the USD cost of an LLM run from token usage and a model name.
6
+ #
7
+ # Prices are approximate, drawn from public OpenAI/Anthropic pricing pages,
8
+ # and expressed in USD per 1,000 tokens. Provider pricing changes over time,
9
+ # so treat the result as a rough estimate and extend {PRICES} as needed.
10
+ class CostCalculator
11
+ # Approximate per-model prices in USD per 1,000 tokens.
12
+ # Keyed by a canonical model prefix; longer prefixes win on lookup so that
13
+ # dated variants (e.g. "claude-sonnet-4-20250514") resolve correctly.
14
+ # Source: public OpenAI and Anthropic pricing pages (approximate).
15
+ PRICES = {
16
+ 'gpt-4o-mini' => { input: 0.00015, output: 0.0006 },
17
+ 'gpt-4o' => { input: 0.005, output: 0.015 },
18
+ 'gpt-4-turbo' => { input: 0.01, output: 0.03 },
19
+ 'gpt-4' => { input: 0.03, output: 0.06 },
20
+ 'gpt-3.5-turbo' => { input: 0.0005, output: 0.0015 },
21
+ 'claude-opus-4' => { input: 0.015, output: 0.075 },
22
+ 'claude-sonnet-4' => { input: 0.003, output: 0.015 },
23
+ 'claude-3-5-sonnet' => { input: 0.003, output: 0.015 },
24
+ 'claude-3-5-haiku' => { input: 0.0008, output: 0.004 },
25
+ 'claude-3-opus' => { input: 0.015, output: 0.075 },
26
+ 'claude-3-sonnet' => { input: 0.003, output: 0.015 },
27
+ 'claude-3-haiku' => { input: 0.00025, output: 0.00125 }
28
+ }.freeze
29
+
30
+ # Token count that one priced unit of {PRICES} covers.
31
+ TOKENS_PER_UNIT = 1000.0
32
+
33
+ # Estimates the USD cost for a run.
34
+ #
35
+ # @param usage [Hash, nil] Token usage with :prompt_tokens and :completion_tokens.
36
+ # @param model [String, nil] The model name (e.g. "gpt-4o").
37
+ # @return [Float, nil] Estimated cost in USD, or nil when the model is unknown.
38
+ def self.call(usage:, model:)
39
+ new(usage, model).call
40
+ end
41
+
42
+ # @param usage [Hash, nil] Token usage hash.
43
+ # @param model [String, nil] The model name.
44
+ def initialize(usage, model)
45
+ @usage = usage || {}
46
+ @model = model
47
+ end
48
+
49
+ # Estimates the USD cost for the configured usage and model.
50
+ #
51
+ # @return [Float, nil] Estimated cost in USD, or nil when the model is unknown.
52
+ def call
53
+ price = price_for(@model)
54
+ return nil unless price
55
+
56
+ input_cost = units(:prompt_tokens) * price[:input]
57
+ output_cost = units(:completion_tokens) * price[:output]
58
+ (input_cost + output_cost).round(6)
59
+ end
60
+
61
+ private
62
+
63
+ # Finds the price entry for a model by longest matching name prefix.
64
+ #
65
+ # @param model [String, nil] The model name.
66
+ # @return [Hash, nil] Price entry with :input and :output, or nil when unknown.
67
+ def price_for(model)
68
+ key = model.to_s.downcase
69
+ return PRICES[key] if PRICES.key?(key)
70
+
71
+ PRICES.select { |name, _| key.start_with?(name) }.max_by { |name, _| name.length }&.last
72
+ end
73
+
74
+ # Converts a usage token count into priced 1K-token units.
75
+ #
76
+ # @param key [Symbol] The usage key to read.
77
+ # @return [Float] The number of priced units.
78
+ def units(key)
79
+ token_count(key) / TOKENS_PER_UNIT
80
+ end
81
+
82
+ # Reads a token count from the usage hash, tolerating string keys.
83
+ #
84
+ # @param key [Symbol] The usage key (e.g. :prompt_tokens).
85
+ # @return [Integer] The token count, or zero when absent.
86
+ def token_count(key)
87
+ (@usage[key] || @usage[key.to_s] || 0).to_i
88
+ end
89
+ end
90
+ end
91
+ end