durable-llm 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,301 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file implements the Google provider for accessing Google's Gemini language models through their API, providing completion capabilities with authentication handling, error management, and response normalization. It establishes HTTP connections to Google's Generative Language API endpoint, processes generateContent requests with text content, handles various API error responses, and includes comprehensive response classes to format Google's API responses into a consistent interface.
4
+
5
+ require 'faraday'
6
+ require 'json'
7
+ require 'durable/llm/errors'
8
+ require 'durable/llm/providers/base'
9
+ require 'event_stream_parser'
10
+
11
+ module Durable
12
+ module Llm
13
+ module Providers
14
+ class Google < Durable::Llm::Providers::Base
15
+ BASE_URL = 'https://generativelanguage.googleapis.com'
16
+
17
+ def default_api_key
18
+ begin
19
+ Durable::Llm.configuration.google&.api_key
20
+ rescue NoMethodError
21
+ nil
22
+ end || ENV['GOOGLE_API_KEY']
23
+ end
24
+
25
+ attr_accessor :api_key
26
+
27
+ def initialize(api_key: nil)
28
+ @api_key = api_key || default_api_key
29
+ @conn = Faraday.new(url: BASE_URL) do |faraday|
30
+ faraday.request :json
31
+ faraday.response :json
32
+ faraday.adapter Faraday.default_adapter
33
+ end
34
+ end
35
+
36
+ def completion(options)
37
+ model = options[:model]
38
+ url = "/v1beta/models/#{model}:generateContent?key=#{@api_key}"
39
+
40
+ # Transform options to Google's format
41
+ request_body = transform_options(options)
42
+
43
+ response = @conn.post(url) do |req|
44
+ req.body = request_body
45
+ end
46
+
47
+ handle_response(response)
48
+ end
49
+
50
+ def embedding(model:, input:, **_options)
51
+ url = "/v1beta/models/#{model}:embedContent?key=#{@api_key}"
52
+
53
+ request_body = {
54
+ content: {
55
+ parts: [{ text: input }]
56
+ }
57
+ }
58
+
59
+ response = @conn.post(url) do |req|
60
+ req.body = request_body
61
+ end
62
+
63
+ handle_response(response, GoogleEmbeddingResponse)
64
+ end
65
+
66
+ def models
67
+ # Google doesn't provide a public models API, so return hardcoded list
68
+ [
69
+ 'gemini-1.5-flash',
70
+ 'gemini-1.5-flash-001',
71
+ 'gemini-1.5-flash-002',
72
+ 'gemini-1.5-flash-8b',
73
+ 'gemini-1.5-flash-8b-001',
74
+ 'gemini-1.5-flash-8b-latest',
75
+ 'gemini-1.5-flash-latest',
76
+ 'gemini-1.5-pro',
77
+ 'gemini-1.5-pro-001',
78
+ 'gemini-1.5-pro-002',
79
+ 'gemini-1.5-pro-latest',
80
+ 'gemini-2.0-flash',
81
+ 'gemini-2.0-flash-001',
82
+ 'gemini-2.0-flash-exp',
83
+ 'gemini-2.0-flash-lite',
84
+ 'gemini-2.0-flash-lite-001',
85
+ 'gemini-2.0-flash-live-001',
86
+ 'gemini-2.0-flash-preview-image-generation',
87
+ 'gemini-2.5-flash',
88
+ 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
89
+ 'gemini-2.5-flash-lite',
90
+ 'gemini-2.5-flash-lite-06-17',
91
+ 'gemini-2.5-flash-preview-05-20',
92
+ 'gemini-2.5-flash-preview-native-audio-dialog',
93
+ 'gemini-2.5-flash-preview-tts',
94
+ 'gemini-2.5-pro',
95
+ 'gemini-2.5-pro-preview-tts',
96
+ 'gemini-live-2.5-flash-preview',
97
+ 'text-embedding-004',
98
+ 'text-multilingual-embedding-002'
99
+ ]
100
+ end
101
+
102
+ def self.stream?
103
+ true
104
+ end
105
+
106
+ def stream(options)
107
+ model = options[:model]
108
+ url = "/v1beta/models/#{model}:streamGenerateContent?key=#{@api_key}&alt=sse"
109
+
110
+ request_body = transform_options(options)
111
+
112
+ response = @conn.post(url) do |req|
113
+ req.headers['Accept'] = 'text/event-stream'
114
+ req.body = request_body
115
+
116
+ user_proc = proc do |chunk, _size, _total|
117
+ yield GoogleStreamResponse.new(chunk)
118
+ end
119
+
120
+ req.options.on_data = to_json_stream(user_proc: user_proc)
121
+ end
122
+
123
+ handle_response(response)
124
+ end
125
+
126
+ private
127
+
128
+ def transform_options(options)
129
+ messages = options[:messages] || []
130
+ system_messages = messages.select { |m| m[:role] == 'system' }
131
+ conversation_messages = messages.reject { |m| m[:role] == 'system' }
132
+
133
+ body = {
134
+ contents: conversation_messages.map do |msg|
135
+ {
136
+ role: msg[:role] == 'assistant' ? 'model' : 'user',
137
+ parts: [{ text: msg[:content] }]
138
+ }
139
+ end
140
+ }
141
+
142
+ if system_messages.any?
143
+ body[:systemInstruction] = {
144
+ parts: [{ text: system_messages.map { |m| m[:content] }.join("\n") }]
145
+ }
146
+ end
147
+
148
+ generation_config = {}
149
+ generation_config[:temperature] = options[:temperature] if options[:temperature]
150
+ generation_config[:maxOutputTokens] = options[:max_tokens] if options[:max_tokens]
151
+ generation_config[:topP] = options[:top_p] if options[:top_p]
152
+ generation_config[:topK] = options[:top_k] if options[:top_k]
153
+
154
+ body[:generationConfig] = generation_config unless generation_config.empty?
155
+
156
+ body
157
+ end
158
+
159
+ # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
160
+ # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
161
+ def to_json_stream(user_proc:)
162
+ parser = EventStreamParser::Parser.new
163
+
164
+ proc do |chunk, _bytes, env|
165
+ if env && env.status != 200
166
+ raise_error = Faraday::Response::RaiseError.new
167
+ raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
168
+ end
169
+
170
+ parser.feed(chunk) do |_type, data|
171
+ user_proc.call(JSON.parse(data)) unless data == '[DONE]'
172
+ end
173
+ end
174
+ end
175
+
176
+ def try_parse_json(maybe_json)
177
+ JSON.parse(maybe_json)
178
+ rescue JSON::ParserError
179
+ maybe_json
180
+ end
181
+
182
+ def handle_response(response, response_class = GoogleResponse)
183
+ case response.status
184
+ when 200..299
185
+ response_class.new(response.body)
186
+ when 401
187
+ raise Durable::Llm::AuthenticationError, parse_error_message(response)
188
+ when 429
189
+ raise Durable::Llm::RateLimitError, parse_error_message(response)
190
+ when 400..499
191
+ raise Durable::Llm::InvalidRequestError, parse_error_message(response)
192
+ when 500..599
193
+ raise Durable::Llm::ServerError, parse_error_message(response)
194
+ else
195
+ raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
196
+ end
197
+ end
198
+
199
+ def parse_error_message(response)
200
+ body = begin
201
+ JSON.parse(response.body)
202
+ rescue StandardError
203
+ nil
204
+ end
205
+ message = body&.dig('error', 'message') || response.body
206
+ "#{response.status} Error: #{message}"
207
+ end
208
+
209
+ class GoogleResponse
210
+ attr_reader :raw_response
211
+
212
+ def initialize(response)
213
+ @raw_response = response
214
+ end
215
+
216
+ def choices
217
+ [GoogleChoice.new(@raw_response['candidates']&.first)]
218
+ end
219
+
220
+ def to_s
221
+ choices.map(&:to_s).join(' ')
222
+ end
223
+ end
224
+
225
+ class GoogleChoice
226
+ attr_reader :message
227
+
228
+ def initialize(candidate)
229
+ @message = GoogleMessage.new(candidate&.dig('content', 'parts')&.first)
230
+ end
231
+
232
+ def to_s
233
+ @message.to_s
234
+ end
235
+ end
236
+
237
+ class GoogleMessage
238
+ attr_reader :content
239
+
240
+ def initialize(part)
241
+ @content = part&.dig('text') || ''
242
+ end
243
+
244
+ def to_s
245
+ @content
246
+ end
247
+ end
248
+
249
+ class GoogleStreamResponse
250
+ attr_reader :choices
251
+
252
+ def initialize(parsed)
253
+ @choices = [GoogleStreamChoice.new(parsed)]
254
+ end
255
+
256
+ def to_s
257
+ @choices.map(&:to_s).join
258
+ end
259
+ end
260
+
261
+ class GoogleStreamChoice
262
+ attr_reader :delta
263
+
264
+ def initialize(parsed)
265
+ @delta = GoogleStreamDelta.new(parsed.dig('candidates', 0, 'content', 'parts', 0))
266
+ end
267
+
268
+ def to_s
269
+ @delta.to_s
270
+ end
271
+ end
272
+
273
+ class GoogleStreamDelta
274
+ attr_reader :content
275
+
276
+ def initialize(part)
277
+ @content = part&.dig('text') || ''
278
+ end
279
+
280
+ def to_s
281
+ @content
282
+ end
283
+ end
284
+
285
+ class GoogleEmbeddingResponse
286
+ attr_reader :embedding
287
+
288
+ def initialize(data)
289
+ @embedding = data.dig('embedding', 'values')
290
+ end
291
+
292
+ def to_a
293
+ @embedding
294
+ end
295
+ end
296
+ end
297
+ end
298
+ end
299
+ end
300
+
301
+ # Copyright (c) 2025 Durable Programming, LLC. All rights reserved.
@@ -1,7 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Groq provider for OpenAI-compatible API access.
4
+
1
5
  require 'faraday'
2
6
  require 'json'
3
7
  require 'durable/llm/errors'
4
8
  require 'durable/llm/providers/base'
9
+ require 'event_stream_parser'
5
10
 
6
11
  module Durable
7
12
  module Llm
@@ -15,21 +20,16 @@ module Durable
15
20
 
16
21
  attr_accessor :api_key
17
22
 
18
- def self.conn
19
- Faraday.new(url: BASE_URL) do |faraday|
23
+ def initialize(api_key: nil)
24
+ super
25
+ @conn = Faraday.new(url: BASE_URL) do |faraday|
20
26
  faraday.request :json
21
27
  faraday.response :json
22
28
  faraday.adapter Faraday.default_adapter
23
29
  end
24
30
  end
25
31
 
26
- def conn
27
- self.class.conn
28
- end
29
-
30
- def initialize(api_key: nil)
31
- @api_key = api_key || default_api_key
32
- end
32
+ attr_reader :conn
33
33
 
34
34
  def completion(options)
35
35
  response = conn.post('chat/completions') do |req|
@@ -46,7 +46,7 @@ module Durable
46
46
  req.body = { model: model, input: input, **options }
47
47
  end
48
48
 
49
- handle_response(response)
49
+ handle_response(response, GroqEmbeddingResponse)
50
50
  end
51
51
 
52
52
  def models
@@ -60,23 +60,85 @@ module Durable
60
60
  end
61
61
 
62
62
  def self.stream?
63
- false
63
+ true
64
+ end
65
+
66
+ def stream(options)
67
+ options[:stream] = true
68
+
69
+ response = conn.post('chat/completions') do |req|
70
+ req.headers['Authorization'] = "Bearer #{@api_key}"
71
+ req.headers['Accept'] = 'text/event-stream'
72
+
73
+ options['temperature'] = options['temperature'].to_f if options['temperature']
74
+
75
+ req.body = options
76
+
77
+ user_proc = proc do |chunk, _size, _total|
78
+ yield GroqStreamResponse.new(chunk)
79
+ end
80
+
81
+ req.options.on_data = to_json_stream(user_proc: user_proc)
82
+ end
83
+
84
+ handle_response(response)
64
85
  end
65
86
 
66
87
  private
67
88
 
68
- def handle_response(response)
89
+ # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
90
+ # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
91
+ # Given a proc, returns an outer proc that can be used to iterate over a JSON stream of chunks.
92
+ # For each chunk, the inner user_proc is called giving it the JSON object. The JSON object could
93
+ # be a data object or an error object as described in the OpenAI API documentation.
94
+ #
95
+ # @param user_proc [Proc] The inner proc to call for each JSON object in the chunk.
96
+ # @return [Proc] An outer proc that iterates over a raw stream, converting it to JSON.
97
+ def to_json_stream(user_proc:)
98
+ parser = EventStreamParser::Parser.new
99
+
100
+ proc do |chunk, _bytes, env|
101
+ if env && env.status != 200
102
+ raise_error = Faraday::Response::RaiseError.new
103
+ raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
104
+ end
105
+
106
+ parser.feed(chunk) do |_type, data|
107
+ user_proc.call(JSON.parse(data)) unless data == '[DONE]'
108
+ end
109
+ end
110
+ end
111
+
112
+ def try_parse_json(maybe_json)
113
+ JSON.parse(maybe_json)
114
+ rescue JSON::ParserError
115
+ maybe_json
116
+ end
117
+
118
+ def parse_error_message(response)
119
+ body = begin
120
+ JSON.parse(response.body)
121
+ rescue StandardError
122
+ nil
123
+ end
124
+ message = body&.dig('error', 'message') || response.body
125
+ "#{response.status} Error: #{message}"
126
+ end
127
+
128
+ # END-CODE-FROM
129
+
130
+ def handle_response(response, response_class = GroqResponse)
69
131
  case response.status
70
132
  when 200..299
71
- GroqResponse.new(response.body)
133
+ response_class.new(response.body)
72
134
  when 401
73
- raise Durable::Llm::AuthenticationError, response.body['error']['message']
135
+ raise Durable::Llm::AuthenticationError, parse_error_message(response)
74
136
  when 429
75
- raise Durable::Llm::RateLimitError, response.body['error']['message']
137
+ raise Durable::Llm::RateLimitError, parse_error_message(response)
76
138
  when 400..499
77
- raise Durable::Llm::InvalidRequestError, response.body['error']['message']
139
+ raise Durable::Llm::InvalidRequestError, parse_error_message(response)
78
140
  when 500..599
79
- raise Durable::Llm::ServerError, response.body['error']['message']
141
+ raise Durable::Llm::ServerError, parse_error_message(response)
80
142
  else
81
143
  raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
82
144
  end
@@ -93,6 +155,14 @@ module Durable
93
155
  @raw_response['choices'].map { |choice| GroqChoice.new(choice) }
94
156
  end
95
157
 
158
+ def data
159
+ @raw_response['data']
160
+ end
161
+
162
+ def embedding
163
+ @raw_response.dig('data', 0, 'embedding')
164
+ end
165
+
96
166
  def to_s
97
167
  choices.map(&:to_s).join(' ')
98
168
  end
@@ -131,15 +201,12 @@ module Durable
131
201
  class GroqStreamResponse
132
202
  attr_reader :choices
133
203
 
134
- def initialize(fragment)
135
- json_frag = fragment.split('data: ').last.strip
136
- puts json_frag
137
- parsed = JSON.parse(json_frag)
138
- @choices = parsed['choices'].map { |choice| GroqStreamChoice.new(choice) }
204
+ def initialize(parsed)
205
+ @choices = GroqStreamChoice.new(parsed['choices'])
139
206
  end
140
207
 
141
208
  def to_s
142
- @choices.map(&:to_s).join(' ')
209
+ @choices.to_s
143
210
  end
144
211
  end
145
212
 
@@ -147,8 +214,9 @@ module Durable
147
214
  attr_reader :delta, :finish_reason
148
215
 
149
216
  def initialize(choice)
150
- @delta = GroqStreamDelta.new(choice['delta'])
151
- @finish_reason = choice['finish_reason']
217
+ @choice = [choice].flatten.first
218
+ @delta = GroqStreamDelta.new(@choice['delta'])
219
+ @finish_reason = @choice['finish_reason']
152
220
  end
153
221
 
154
222
  def to_s
@@ -168,7 +236,21 @@ module Durable
168
236
  @content || ''
169
237
  end
170
238
  end
239
+
240
+ class GroqEmbeddingResponse
241
+ attr_reader :embedding
242
+
243
+ def initialize(data)
244
+ @embedding = data.dig('data', 0, 'embedding')
245
+ end
246
+
247
+ def to_a
248
+ @embedding
249
+ end
250
+ end
171
251
  end
172
252
  end
173
253
  end
174
254
  end
255
+
256
+ # Copyright (c) 2025 Durable Programming, LLC. All rights reserved.
@@ -1,13 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file implements the Hugging Face provider for accessing Hugging Face's inference API models.
4
+
1
5
  require 'faraday'
2
6
  require 'json'
3
7
  require 'durable/llm/errors'
4
8
  require 'durable/llm/providers/base'
9
+ require 'event_stream_parser'
5
10
 
6
11
  module Durable
7
12
  module Llm
8
13
  module Providers
14
+ # Hugging Face provider for accessing Hugging Face's inference API models.
15
+ #
16
+ # Provides completion, embedding, and streaming capabilities with authentication
17
+ # handling, error management, and response normalization.
9
18
  class Huggingface < Durable::Llm::Providers::Base
10
- BASE_URL = 'https://api-inference.huggingface.co/models'
19
+ BASE_URL = 'https://api-inference.huggingface.co'
11
20
 
12
21
  def default_api_key
13
22
  Durable::Llm.configuration.huggingface&.api_key || ENV['HUGGINGFACE_API_KEY']
@@ -22,11 +31,12 @@ module Durable
22
31
  faraday.response :json
23
32
  faraday.adapter Faraday.default_adapter
24
33
  end
34
+ super()
25
35
  end
26
36
 
27
37
  def completion(options)
28
38
  model = options.delete(:model) || 'gpt2'
29
- response = @conn.post("/#{model}") do |req|
39
+ response = @conn.post("models/#{model}") do |req|
30
40
  req.headers['Authorization'] = "Bearer #{@api_key}"
31
41
  req.body = options
32
42
  end
@@ -34,33 +44,84 @@ module Durable
34
44
  handle_response(response)
35
45
  end
36
46
 
47
+ def embedding(model:, input:, **options)
48
+ response = @conn.post("models/#{model}") do |req|
49
+ req.headers['Authorization'] = "Bearer #{@api_key}"
50
+ req.body = { inputs: input, **options }
51
+ end
52
+
53
+ handle_response(response, HuggingfaceEmbeddingResponse)
54
+ end
55
+
37
56
  def models
38
57
  self.class.models
39
58
  end
40
59
 
60
+ def self.stream?
61
+ true
62
+ end
63
+
64
+ def stream(options)
65
+ model = options.delete(:model) || 'gpt2'
66
+ options[:stream] = true
67
+
68
+ @conn.post("models/#{model}") do |req|
69
+ req.headers['Authorization'] = "Bearer #{@api_key}"
70
+ req.headers['Accept'] = 'text/event-stream'
71
+ req.body = options
72
+ req.options.on_data = to_json_stream(user_proc: proc { |chunk|
73
+ yield HuggingfaceStreamResponse.new(chunk)
74
+ })
75
+ end
76
+ end
77
+
41
78
  def self.models
42
79
  %w[gpt2 bert-base-uncased distilbert-base-uncased] # could use expansion
43
80
  end
44
81
 
45
82
  private
46
83
 
47
- def handle_response(response)
48
- case response.status
49
- when 200..299
50
- HuggingfaceResponse.new(response.body)
51
- when 401
52
- raise Durable::Llm::AuthenticationError, response.body['error']
53
- when 429
54
- raise Durable::Llm::RateLimitError, response.body['error']
55
- when 400..499
56
- raise Durable::Llm::InvalidRequestError, response.body['error']
57
- when 500..599
58
- raise Durable::Llm::ServerError, response.body['error']
59
- else
60
- raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
84
+ # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
85
+ # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
86
+ def to_json_stream(user_proc:)
87
+ parser = EventStreamParser::Parser.new
88
+
89
+ proc do |chunk, _bytes, env|
90
+ if env && env.status != 200
91
+ raise_error = Faraday::Response::RaiseError.new
92
+ raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
93
+ end
94
+
95
+ parser.feed(chunk) do |_type, data|
96
+ user_proc.call(JSON.parse(data)) unless data == '[DONE]'
97
+ end
98
+ end
99
+ end
100
+
101
+ def try_parse_json(maybe_json)
102
+ JSON.parse(maybe_json)
103
+ rescue JSON::ParserError
104
+ maybe_json
105
+ end
106
+
107
+ def handle_response(response, response_class = HuggingfaceResponse)
108
+ return response_class.new(response.body) if (200..299).cover?(response.status)
109
+
110
+ error_class = error_class_for_status(response.status)
111
+ raise error_class, response.body['error'] || "HTTP #{response.status}"
112
+ end
113
+
114
+ def error_class_for_status(status)
115
+ case status
116
+ when 401 then Durable::Llm::AuthenticationError
117
+ when 429 then Durable::Llm::RateLimitError
118
+ when 400..499 then Durable::Llm::InvalidRequestError
119
+ when 500..599 then Durable::Llm::ServerError
120
+ else Durable::Llm::APIError
61
121
  end
62
122
  end
63
123
 
124
+ # Response wrapper for Hugging Face completion API responses.
64
125
  class HuggingfaceResponse
65
126
  attr_reader :raw_response
66
127
 
@@ -69,7 +130,7 @@ module Durable
69
130
  end
70
131
 
71
132
  def choices
72
- [@raw_response.first].map { |choice| HuggingfaceChoice.new(choice) }
133
+ [HuggingfaceChoice.new(@raw_response)]
73
134
  end
74
135
 
75
136
  def to_s
@@ -77,6 +138,7 @@ module Durable
77
138
  end
78
139
  end
79
140
 
141
+ # Individual choice from Hugging Face completion response.
80
142
  class HuggingfaceChoice
81
143
  attr_reader :text
82
144
 
@@ -88,7 +150,48 @@ module Durable
88
150
  @text
89
151
  end
90
152
  end
153
+
154
+ # Response wrapper for Hugging Face embedding API responses.
155
+ class HuggingfaceEmbeddingResponse
156
+ attr_reader :embedding
157
+
158
+ def initialize(data)
159
+ @embedding = data
160
+ end
161
+
162
+ def to_a
163
+ @embedding
164
+ end
165
+ end
166
+
167
+ # Response wrapper for Hugging Face streaming API responses.
168
+ class HuggingfaceStreamResponse
169
+ attr_reader :token
170
+
171
+ def initialize(parsed)
172
+ @token = HuggingfaceStreamToken.new(parsed)
173
+ end
174
+
175
+ def to_s
176
+ @token.to_s
177
+ end
178
+ end
179
+
180
+ # Individual token from Hugging Face streaming response.
181
+ class HuggingfaceStreamToken
182
+ attr_reader :text
183
+
184
+ def initialize(token)
185
+ @text = token['token']['text']
186
+ end
187
+
188
+ def to_s
189
+ @text || ''
190
+ end
191
+ end
91
192
  end
92
193
  end
93
194
  end
94
195
  end
196
+
197
+ # Copyright (c) 2025 Durable Programming, LLC. All rights reserved.