llms 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +160 -0
  4. data/bin/llms-chat +6 -0
  5. data/bin/llms-test-model-access +4 -0
  6. data/bin/llms-test-model-image-support +4 -0
  7. data/bin/llms-test-model-prompt-caching +4 -0
  8. data/bin/llms-test-model-tool-use +5 -0
  9. data/lib/llms/adapters/anthropic_message_adapter.rb +73 -0
  10. data/lib/llms/adapters/anthropic_tool_call_adapter.rb +20 -0
  11. data/lib/llms/adapters/base_message_adapter.rb +60 -0
  12. data/lib/llms/adapters/google_gemini_message_adapter.rb +72 -0
  13. data/lib/llms/adapters/google_gemini_tool_call_adapter.rb +20 -0
  14. data/lib/llms/adapters/open_ai_compatible_message_adapter.rb +88 -0
  15. data/lib/llms/adapters/open_ai_compatible_tool_call_adapter.rb +67 -0
  16. data/lib/llms/adapters.rb +12 -0
  17. data/lib/llms/apis/google_gemini_api.rb +45 -0
  18. data/lib/llms/apis/open_ai_compatible_api.rb +54 -0
  19. data/lib/llms/cli/base.rb +186 -0
  20. data/lib/llms/cli/chat.rb +92 -0
  21. data/lib/llms/cli/test_access.rb +79 -0
  22. data/lib/llms/cli/test_image_support.rb +92 -0
  23. data/lib/llms/cli/test_prompt_caching.rb +275 -0
  24. data/lib/llms/cli/test_tool_use.rb +108 -0
  25. data/lib/llms/cli.rb +12 -0
  26. data/lib/llms/conversation.rb +100 -0
  27. data/lib/llms/conversation_message.rb +60 -0
  28. data/lib/llms/conversation_tool_call.rb +14 -0
  29. data/lib/llms/conversation_tool_result.rb +15 -0
  30. data/lib/llms/exceptions.rb +33 -0
  31. data/lib/llms/executors/anthropic_executor.rb +247 -0
  32. data/lib/llms/executors/base_executor.rb +144 -0
  33. data/lib/llms/executors/google_gemini_executor.rb +212 -0
  34. data/lib/llms/executors/hugging_face_executor.rb +17 -0
  35. data/lib/llms/executors/open_ai_compatible_executor.rb +209 -0
  36. data/lib/llms/executors.rb +52 -0
  37. data/lib/llms/models/model.rb +86 -0
  38. data/lib/llms/models/provider.rb +48 -0
  39. data/lib/llms/models.rb +187 -0
  40. data/lib/llms/parsers/anthropic_chat_response_stream_parser.rb +184 -0
  41. data/lib/llms/parsers/google_gemini_chat_response_stream_parser.rb +128 -0
  42. data/lib/llms/parsers/open_ai_compatible_chat_response_stream_parser.rb +170 -0
  43. data/lib/llms/parsers/partial_json_parser.rb +77 -0
  44. data/lib/llms/parsers/sse_chat_response_stream_parser.rb +72 -0
  45. data/lib/llms/public_models.json +607 -0
  46. data/lib/llms/stream/event_emitter.rb +48 -0
  47. data/lib/llms/stream/events.rb +104 -0
  48. data/lib/llms/usage/cost_calculator.rb +75 -0
  49. data/lib/llms/usage/usage_data.rb +46 -0
  50. data/lib/llms.rb +16 -0
  51. metadata +243 -0
@@ -0,0 +1,15 @@
1
+ module LLMs
2
+ class ConversationToolResult
3
+ attr_reader :index, :tool_call_id, :tool_call_type, :name, :results, :is_error
4
+
5
+ def initialize(index, tool_call_id, tool_call_type, name, results, is_error)
6
+ raise "index is nil" if index.nil?
7
+ @index = index
8
+ @tool_call_id = tool_call_id
9
+ @tool_call_type = tool_call_type
10
+ @name = name
11
+ @results = results
12
+ @is_error = is_error
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,33 @@
1
+ module LLMs
2
+ # Base exception class for all LLMs errors
3
+ class Error < StandardError; end
4
+
5
+ # Configuration errors
6
+ class ConfigurationError < Error; end
7
+ class MissingAPIKeyError < ConfigurationError; end
8
+ class InvalidModelError < ConfigurationError; end
9
+ class UnsupportedFeatureError < ConfigurationError; end
10
+ class ModelNotFoundError < ConfigurationError; end
11
+ class ProviderNotFoundError < ConfigurationError; end
12
+
13
+ # API communication errors
14
+ class APIError < Error; end
15
+ class RateLimitError < APIError; end
16
+ class TimeoutError < APIError; end
17
+ class NetworkError < APIError; end
18
+ class AuthenticationError < APIError; end
19
+
20
+ # Usage and cost calculation errors
21
+ class UsageError < Error; end
22
+ class CostCalculationError < UsageError; end
23
+
24
+ # Tool-related errors
25
+ class ToolError < Error; end
26
+ class ToolExecutionError < ToolError; end
27
+ class ToolValidationError < ToolError; end
28
+
29
+ # Conversation and message errors
30
+ class ConversationError < Error; end
31
+ class MessageError < ConversationError; end
32
+ class InvalidMessageRoleError < MessageError; end
33
+ end
@@ -0,0 +1,247 @@
1
+ require_relative './base_executor'
2
+ require_relative '../adapters/anthropic_message_adapter'
3
+ require_relative '../parsers/anthropic_chat_response_stream_parser'
4
+ require_relative '../stream/event_emitter'
5
+ require 'anthropic'
6
+
7
+ module LLMs
8
+ module Executors
9
+ class AnthropicExecutor < BaseExecutor
10
+
11
+ def execute_conversation(conversation, &block)
12
+ if block_given?
13
+ stream_conversation(conversation) do |handler|
14
+ handler.on(:text_delta) do |event|
15
+ yield event.text
16
+ end
17
+ ## TODO configure whether to yield thinking deltas
18
+ handler.on(:thinking_delta) do |event|
19
+ yield event.thinking
20
+ end
21
+ end
22
+ else
23
+ send_conversation(conversation)
24
+ end
25
+ end
26
+
27
+ def stream_conversation(conversation)
28
+ init_new_request(conversation)
29
+
30
+ emitter = Stream::EventEmitter.new
31
+ yield emitter if block_given?
32
+
33
+ start_time = Time.now
34
+ begin
35
+ http_response, stream_parsed_response = stream_client_request(emitter)
36
+ rescue Faraday::BadRequestError => e
37
+ @last_error = e.response[:body]
38
+ return nil
39
+ end
40
+ execution_time = Time.now - start_time
41
+
42
+ api_response = stream_parsed_response || http_response
43
+
44
+ if api_response['error']
45
+ @last_error = api_response['error']
46
+ return nil
47
+ end
48
+
49
+ @last_received_message_id = LLMs::Adapters::AnthropicMessageAdapter.find_message_id(api_response)
50
+ @last_received_message = LLMs::Adapters::AnthropicMessageAdapter.message_from_api_format(api_response)
51
+ @last_usage_data = calculate_usage(api_response, execution_time)
52
+
53
+ @last_received_message
54
+ end
55
+
56
+ def send_conversation(conversation)
57
+ init_new_request(conversation)
58
+
59
+ start_time = Time.now
60
+ begin
61
+ http_response = client_request
62
+ rescue Faraday::BadRequestError => e
63
+ @last_error = e.response[:body]
64
+ return nil
65
+ end
66
+ execution_time = Time.now - start_time
67
+
68
+ if http_response['error']
69
+ @last_error = http_response['error']
70
+ return nil
71
+ end
72
+
73
+ @last_received_message_id = LLMs::Adapters::AnthropicMessageAdapter.find_message_id(http_response)
74
+ @last_received_message = LLMs::Adapters::AnthropicMessageAdapter.message_from_api_format(http_response)
75
+ @last_usage_data = calculate_usage(http_response, execution_time)
76
+
77
+ @last_received_message
78
+ end
79
+
80
+ private
81
+
82
+ def init_new_request(conversation)
83
+ @last_sent_message = conversation.last_message
84
+ @last_received_message_id = nil
85
+ @last_received_message = nil
86
+ @last_usage_data = nil
87
+ @last_error = nil
88
+
89
+ @system_prompt = conversation.system_message
90
+ @available_tools = conversation.available_tools
91
+ @formatted_messages = conversation.messages.map.with_index do |message, index|
92
+ is_last_message = index == conversation.messages.size - 1
93
+ LLMs::Adapters::AnthropicMessageAdapter.to_api_format(message, caching_enabled? && is_last_message)
94
+ end
95
+
96
+ # Figure out where to put the cache control param if no messages are provided
97
+ # In reality there should always be a message, but we'll check
98
+ if caching_enabled? && @formatted_messages.empty?
99
+ if @available_tools && @available_tools.any?
100
+ @available_tools.last[:cache_control] = {type: "ephemeral"}
101
+ elsif @system_prompt && (@system_prompt.is_a?(String) || !@system_prompt[:cache_control])
102
+ @system_prompt = {type: "text", text: @system_prompt, cache_control: {type: "ephemeral"}}
103
+ end
104
+ end
105
+ end
106
+
107
+ def request_params
108
+ {
109
+ messages: @formatted_messages,
110
+ model: @model_name,
111
+ temperature: @temperature,
112
+ }.tap do |params|
113
+ if @system_prompt
114
+ params[:system] = system_param
115
+ end
116
+ if @max_tokens
117
+ params[:max_tokens] = @max_tokens
118
+ end
119
+ ## Will override max_tokens if both are provided
120
+ if @max_completion_tokens
121
+ params[:max_tokens] = @max_completion_tokens
122
+ end
123
+ if @available_tools && @available_tools.any?
124
+ params[:tools] = tool_schemas
125
+ end
126
+ if @thinking_mode
127
+ params[:thinking] = { type: 'enabled' }.tap do |thinking_params|
128
+ if @max_thinking_tokens
129
+ thinking_params[:budget_tokens] = @max_thinking_tokens
130
+ else
131
+ # This is the minimum budget for thinking, and is required if thinking is enabled
132
+ thinking_params[:budget_tokens] = 1024
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
138
+
139
+ def client_request
140
+ @client.messages(parameters: request_params)
141
+ end
142
+
143
+ def stream_client_request(emitter)
144
+ parser = Parsers::AnthropicChatResponseStreamParser.new(emitter)
145
+
146
+ params = request_params.merge({
147
+ stream: Proc.new do |data|
148
+ parser.handle_json(data)
149
+ end
150
+ })
151
+
152
+ [@client.messages(parameters: params), parser.full_response]
153
+ end
154
+
155
+ def initialize_client
156
+ @client = Anthropic::Client.new(access_token: fetch_api_key)
157
+ end
158
+
159
+ ## TODO move to adapter
160
+ def tool_schemas
161
+ @available_tools.map do |tool|
162
+ {
163
+ name: tool.tool_schema[:name],
164
+ description: tool.tool_schema[:description],
165
+ input_schema: {
166
+ type: 'object',
167
+ properties: tool.tool_schema[:parameters][:properties],
168
+ required: tool.tool_schema[:parameters][:required]
169
+ }
170
+ }
171
+ end
172
+ end
173
+
174
+ def system_param
175
+ @system_prompt
176
+ end
177
+
178
+ def calculate_usage(api_response, execution_time)
179
+ input_tokens = nil
180
+ output_tokens = nil
181
+ token_counts = {}
182
+ cache_was_written = nil
183
+ cache_was_read = nil
184
+
185
+ if usage = api_response['usage']
186
+ input_tokens = 0
187
+ output_tokens = 0
188
+ cache_was_written = false
189
+ cache_was_read = false
190
+
191
+ if it = usage['input_tokens']
192
+ input_tokens += it
193
+ token_counts[:input] = it
194
+ end
195
+
196
+ if ccit = usage['cache_creation_input_tokens']
197
+ input_tokens += ccit
198
+ if ccit > 0
199
+ cache_was_written = true
200
+ end
201
+ end
202
+
203
+ if crit = usage['cache_read_input_tokens']
204
+ input_tokens += crit
205
+ token_counts[:cache_read] = crit
206
+ if crit > 0
207
+ cache_was_read = true
208
+ end
209
+ end
210
+
211
+ if cache_details = usage['cache_creation']
212
+ if it1h = cache_details['ephemeral_1h_input_tokens']
213
+ token_counts[:cache_write_1hr] = it1h
214
+ if it1h > 0
215
+ cache_was_written = true
216
+ end
217
+ end
218
+ if it5min = cache_details['ephemeral_5min_input_tokens']
219
+ token_counts[:cache_write_5min] = it5min
220
+ if it5min > 0
221
+ cache_was_written = true
222
+ end
223
+ end
224
+ elsif ccit = usage['cache_creation_input_tokens']
225
+ # if no details, all caching is 5min
226
+ token_counts[:cache_write_5min] = ccit
227
+ end
228
+
229
+ if ot = usage['output_tokens']
230
+ output_tokens += ot
231
+ token_counts[:output] = ot
232
+ end
233
+ end
234
+
235
+ {
236
+ input_tokens: input_tokens,
237
+ output_tokens: output_tokens,
238
+ cache_was_written: cache_was_written,
239
+ cache_was_read: cache_was_read,
240
+ token_details: token_counts,
241
+ execution_time: execution_time,
242
+ estimated_cost: calculate_cost(token_counts)
243
+ }
244
+ end
245
+ end
246
+ end
247
+ end
@@ -0,0 +1,144 @@
1
+ require_relative '../conversation'
2
+ require_relative '../models'
3
+ require_relative '../exceptions'
4
+
5
+ module LLMs
6
+ module Executors
7
+ class BaseExecutor
8
+
9
+ DEFAULT_TEMPERATURE = 0.0
10
+
11
+ attr_reader :client, :model_name, :base_url,
12
+ :system_prompt, :available_tools, :temperature,
13
+ :max_tokens, :max_completion_tokens, :max_thinking_tokens,
14
+ :thinking_mode, :thinking_effort,
15
+ :last_sent_message, :last_received_message_id, :last_received_message, :last_usage_data, :last_error
16
+
17
+ def initialize(**params)
18
+ raise LLMs::ConfigurationError, "model_name is required" if params[:model_name].nil?
19
+
20
+ @model_name = params[:model_name]
21
+
22
+ # Connection Info
23
+ @base_url = params[:base_url]
24
+ @api_key = params[:api_key] ## Will take precedence over an env var if present
25
+ @api_key_env_var = params[:api_key_env_var]
26
+ @pricing = params[:pricing]
27
+ @exclude_params = params[:exclude_params]
28
+
29
+ # Execution Info
30
+ @system_prompt = params[:system_prompt]
31
+ @temperature = validate_temperature(params[:temperature] || DEFAULT_TEMPERATURE)
32
+ @available_tools = params[:tools]
33
+
34
+ @cache_prompt = params[:cache_prompt] ## TODO caching is automatic for most models now
35
+
36
+ @max_tokens = validate_positive_integer_or_nil(params[:max_tokens], "max_tokens")
37
+ @max_completion_tokens = validate_positive_integer_or_nil(params[:max_completion_tokens], "max_completion_tokens")
38
+ @max_thinking_tokens = validate_positive_integer_or_nil(params[:max_thinking_tokens], "max_thinking_tokens")
39
+
40
+ @thinking_mode = params.key?(:thinking) ? params[:thinking] : false
41
+ @thinking_effort = validate_thinking_effort(params[:thinking_effort])
42
+
43
+ ##TODO warn if max_tokens is used instead of max_completion_tokens and model is a thinking model (or thinking is enabled)
44
+
45
+ @last_sent_message = nil
46
+ @last_received_message = nil
47
+ @last_usage_data = nil
48
+ @last_error = nil
49
+
50
+ initialize_client
51
+ end
52
+
53
+ def execute_prompt(prompt, system_prompt: nil, &block)
54
+ conversation = LLMs::Conversation.new
55
+ if sp = system_prompt || @system_prompt
56
+ conversation.set_system_message(sp)
57
+ end
58
+ conversation.add_user_message(prompt)
59
+ response_message = self.execute_conversation(conversation, &block)
60
+ response_message&.text
61
+ end
62
+
63
+ def execute_conversation(conversation, &block)
64
+ raise NotImplementedError, "Subclasses must implement execute_conversation"
65
+ end
66
+
67
+ private
68
+
69
+ def fetch_api_key
70
+ if @api_key
71
+ @api_key
72
+ elsif @api_key_env_var
73
+ ENV[@api_key_env_var] || raise("#{@api_key_env_var} not set")
74
+ else
75
+ raise LLMs::ConfigurationError, "No API key provided"
76
+ end
77
+ end
78
+
79
+ def initialize_client
80
+ raise NotImplementedError, "Subclasses must implement initialize_client"
81
+ end
82
+
83
+ def tool_schemas
84
+ raise NotImplementedError, "Subclasses must implement tool_schemas"
85
+ end
86
+
87
+ def caching_enabled?
88
+ @cache_prompt == true ##TODO caching is automatic by default now for non-Anthropic models that support it
89
+ end
90
+
91
+ def calculate_usage(response)
92
+ raise NotImplementedError, "Subclasses must implement calculate_usage"
93
+ end
94
+
95
+ def validate_thinking_effort(effort)
96
+ return if effort.nil?
97
+
98
+ if effort.to_s.in?(%w[low medium high])
99
+ effort.to_s
100
+ else
101
+ raise LLMs::ConfigurationError, "Thinking effort must be a string 'low', 'medium', or 'high', got: #{effort}"
102
+ end
103
+ end
104
+
105
+ def validate_temperature(temp)
106
+ unless temp.is_a?(Numeric) && temp >= 0.0 && temp <= 2.0
107
+ raise LLMs::ConfigurationError, "Temperature must be a number between 0.0 and 2.0, got: #{temp}"
108
+ end
109
+ temp
110
+ end
111
+
112
+ def validate_positive_integer_or_nil(tokens, name)
113
+ unless tokens.nil? || (tokens.is_a?(Integer) && tokens > 0)
114
+ raise LLMs::ConfigurationError, "#{name} must be a positive integer, got: #{tokens}"
115
+ end
116
+ tokens
117
+ end
118
+
119
+ def calculate_cost(token_counts)
120
+ return nil unless @pricing
121
+ return nil if token_counts.nil? || token_counts.empty?
122
+
123
+ token_keys = token_counts.keys.map(&:to_s)
124
+ pricing_keys = @pricing.keys.map(&:to_s)
125
+
126
+ missing_keys = token_keys - pricing_keys
127
+ ## TODO remove this special case. Is it safe to skip all missing keys if the token count is zero for them?
128
+ unless missing_keys.empty? || (missing_keys.include?('cached_input') && token_counts[:cached_input] == 0)
129
+ raise LLMs::CostCalculationError, "Pricing missing key: #{missing_keys.join(', ')}"
130
+ end
131
+
132
+ token_keys.reduce(0.0) do |sum, k|
133
+ key = k.to_sym
134
+ if token_counts[key] && token_counts[key] > 0 && @pricing[key]
135
+ sum + (token_counts[key].to_f / 1_000_000.0) * @pricing[key]
136
+ else
137
+ sum
138
+ end
139
+ end
140
+ end
141
+
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,212 @@
1
+ require_relative 'base_executor'
2
+ require_relative '../apis/google_gemini_api'
3
+ require_relative '../parsers/google_gemini_chat_response_stream_parser'
4
+ require_relative '../adapters/google_gemini_message_adapter'
5
+
6
+ module LLMs
7
+ module Executors
8
+ class GoogleGeminiExecutor < BaseExecutor
9
+
10
+ def execute_conversation(conversation, &block)
11
+ if block_given?
12
+ stream_conversation(conversation) do |handler|
13
+ handler.on(:text_delta) do |event|
14
+ yield event.text
15
+ end
16
+ end
17
+ else
18
+ send_conversation(conversation)
19
+ end
20
+ end
21
+
22
+ def stream_conversation(conversation)
23
+ init_new_request(conversation)
24
+
25
+ emitter = Stream::EventEmitter.new
26
+ yield emitter if block_given?
27
+
28
+ start_time = Time.now
29
+ begin
30
+ http_response, stream_parsed_response = stream_client_request(emitter)
31
+ rescue StandardError => e
32
+ @last_error = {'error' => e.message, 'backtrace' => e.backtrace}
33
+ return nil
34
+ end
35
+ execution_time = Time.now - start_time
36
+
37
+ if http_response && http_response['error']
38
+ @last_error = http_response
39
+ return nil
40
+ end
41
+
42
+ response_data = stream_parsed_response || http_response
43
+
44
+ @last_received_message = LLMs::Adapters::GoogleGeminiMessageAdapter.message_from_api_format(response_data)
45
+ @last_usage_data = calculate_usage(response_data, execution_time)
46
+
47
+ @last_received_message
48
+ end
49
+
50
+ def send_conversation(conversation)
51
+ init_new_request(conversation)
52
+
53
+ start_time = Time.now
54
+ begin
55
+ http_response = client_request
56
+ rescue StandardError => e
57
+ @last_error = {'error' => e.message, 'backtrace' => e.backtrace}
58
+ return nil
59
+ end
60
+ execution_time = Time.now - start_time
61
+
62
+ if http_response && http_response['error']
63
+ @last_error = http_response
64
+ return nil
65
+ end
66
+
67
+ @last_received_message = LLMs::Adapters::GoogleGeminiMessageAdapter.message_from_api_format(http_response)
68
+ if @last_received_message.nil?
69
+ @last_error = {'error' => 'No message found in the response. Can happen with thinking models if max_tokens is too low.'}
70
+ return nil
71
+ end
72
+
73
+ @last_received_message_id = "gemini-#{Time.now.to_i}" ## no message id in the response
74
+ @last_usage_data = calculate_usage(http_response, execution_time)
75
+
76
+
77
+ @last_received_message
78
+ end
79
+
80
+ private
81
+
82
+ def init_new_request(conversation)
83
+ @last_sent_message = conversation.last_message
84
+ @last_received_message = nil
85
+ @last_usage_data = nil
86
+ @last_error = nil
87
+
88
+ @formatted_messages = conversation.messages.map do |message|
89
+ LLMs::Adapters::GoogleGeminiMessageAdapter.to_api_format(message)
90
+ end
91
+
92
+ @available_tools = conversation.available_tools
93
+ end
94
+
95
+ ## TODO we are not inserting fake message_ids nor fake tool_call_ids for the response data, only in stremed events
96
+
97
+ def client_request
98
+ @client.generate_content(@model_name, @formatted_messages, request_params)
99
+ end
100
+
101
+ def stream_client_request(emitter)
102
+ parser = Parsers::GoogleGeminiChatResponseStreamParser.new(emitter)
103
+
104
+ params = request_params.merge(stream: Proc.new { |chunk| parser.add_data(chunk) })
105
+ http_response = @client.generate_content(@model_name, @formatted_messages, params)
106
+
107
+ @last_received_message_id = parser.current_message_id ##no message id in the response
108
+
109
+ [http_response, parser.full_response]
110
+ end
111
+
112
+ def request_params
113
+ generation_config = { temperature: @temperature }.tap do |config|
114
+ if @max_tokens
115
+ config[:maxOutputTokens] = @max_tokens
116
+ end
117
+ # Will override max_tokens if both are provided
118
+ if @max_completion_tokens
119
+ config[:maxOutputTokens] = @max_completion_tokens
120
+ end
121
+ if @thinking_mode
122
+ config[:thinkingConfig] = { includeThoughts: true }.tap do |thinking_config|
123
+ if @max_thinking_tokens
124
+ thinking_config[:thinkingBudget] = @max_thinking_tokens
125
+ end
126
+ end
127
+ end
128
+ end
129
+
130
+ { generationConfig: generation_config }.tap do |params|
131
+ if @system_prompt
132
+ params[:system_instruction] = { parts: [{text: @system_prompt}] }
133
+ end
134
+ if @available_tools && @available_tools.any?
135
+ params[:tools] = tool_schemas
136
+ end
137
+ end
138
+ end
139
+
140
+ def tool_schemas
141
+ [
142
+ {
143
+ function_declarations: @available_tools.map do |tool|
144
+ {
145
+ name: tool.tool_schema[:name],
146
+ description: tool.tool_schema[:description],
147
+ parameters: tool.tool_schema[:parameters]
148
+ }
149
+ end
150
+ }
151
+ ]
152
+ end
153
+
154
+ def calculate_usage(response, execution_time)
155
+ input_tokens = nil
156
+ output_tokens = nil
157
+ cache_was_written = nil
158
+ cache_was_read = nil
159
+ token_counts = {}
160
+
161
+ ## TODO cache write is never reported in usageMetadata so we can't calculate it's cost
162
+ ## Maybe there is no cost for implicit caching?
163
+ ## TODO support explicit caching
164
+
165
+ if usage_metadata = response['usageMetadata']
166
+ input_tokens = 0
167
+ output_tokens = 0
168
+ cache_was_read = false
169
+
170
+ if ptc = usage_metadata['promptTokenCount']
171
+ input_tokens += ptc
172
+ token_counts[:input] = ptc
173
+ end
174
+
175
+ if cctc = usage_metadata['cachedContentTokenCount']
176
+ cache_was_read = true
177
+ token_counts[:cache_read] = cctc
178
+ if token_counts[:input]
179
+ token_counts[:input] -= cctc
180
+ end
181
+ end
182
+
183
+ if otc = usage_metadata['thoughtsTokenCount']
184
+ output_tokens += otc
185
+ token_counts[:output] = otc
186
+ end
187
+
188
+ if ctc = usage_metadata['candidatesTokenCount']
189
+ output_tokens += ctc
190
+ token_counts[:output] ||= 0
191
+ token_counts[:output] += ctc
192
+ end
193
+ end
194
+
195
+ {
196
+ input_tokens: input_tokens,
197
+ output_tokens: output_tokens,
198
+ cache_was_written: cache_was_written,
199
+ cache_was_read: cache_was_read,
200
+ token_details: token_counts,
201
+ execution_time: execution_time,
202
+ estimated_cost: calculate_cost(token_counts)
203
+ }
204
+ end
205
+
206
+ def initialize_client
207
+ @client = LLMs::APIs::GoogleGeminiAPI.new(fetch_api_key)
208
+ end
209
+
210
+ end
211
+ end
212
+ end
@@ -0,0 +1,17 @@
1
+ require_relative './open_ai_compatible_executor'
2
+
3
+ module LLMs
4
+ module Executors
5
+ class HuggingFaceExecutor < OpenAICompatibleExecutor
6
+
7
+ private
8
+
9
+ ## TODO remove need for this class (by supporting e.g. a base_url_template param?)
10
+ def initialize_client
11
+ @base_url = "https://api-inference.huggingface.co/models/#{@model_name}/v1"
12
+ super
13
+ end
14
+
15
+ end
16
+ end
17
+ end