durable-llm 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +7 -0
  3. data/CHANGELOG.md +5 -0
  4. data/CLI.md +0 -2
  5. data/Gemfile +7 -9
  6. data/README.md +564 -30
  7. data/Rakefile +16 -6
  8. data/devenv.lock +171 -0
  9. data/devenv.nix +12 -0
  10. data/devenv.yaml +8 -0
  11. data/durable-llm.gemspec +52 -0
  12. data/examples/openai_quick_complete.rb +4 -2
  13. data/lib/durable/llm/cli.rb +218 -22
  14. data/lib/durable/llm/client.rb +228 -8
  15. data/lib/durable/llm/configuration.rb +163 -10
  16. data/lib/durable/llm/convenience.rb +102 -0
  17. data/lib/durable/llm/errors.rb +185 -0
  18. data/lib/durable/llm/provider_utilities.rb +201 -0
  19. data/lib/durable/llm/providers/anthropic.rb +232 -24
  20. data/lib/durable/llm/providers/azure_openai.rb +347 -0
  21. data/lib/durable/llm/providers/base.rb +220 -11
  22. data/lib/durable/llm/providers/cohere.rb +157 -11
  23. data/lib/durable/llm/providers/deepseek.rb +233 -0
  24. data/lib/durable/llm/providers/fireworks.rb +304 -0
  25. data/lib/durable/llm/providers/google.rb +327 -0
  26. data/lib/durable/llm/providers/groq.rb +133 -25
  27. data/lib/durable/llm/providers/huggingface.rb +120 -17
  28. data/lib/durable/llm/providers/mistral.rb +431 -0
  29. data/lib/durable/llm/providers/openai.rb +150 -4
  30. data/lib/durable/llm/providers/opencode.rb +253 -0
  31. data/lib/durable/llm/providers/openrouter.rb +256 -0
  32. data/lib/durable/llm/providers/perplexity.rb +273 -0
  33. data/lib/durable/llm/providers/together.rb +346 -0
  34. data/lib/durable/llm/providers/xai.rb +355 -0
  35. data/lib/durable/llm/providers.rb +113 -13
  36. data/lib/durable/llm/response_helpers.rb +185 -0
  37. data/lib/durable/llm/version.rb +5 -1
  38. data/lib/durable/llm.rb +214 -1
  39. data/lib/durable.rb +29 -4
  40. data/sig/durable/llm.rbs +303 -1
  41. metadata +106 -28
  42. data/Gemfile.lock +0 -103
@@ -1,11 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Groq provider for OpenAI-compatible API access.
4
+
1
5
  require 'faraday'
2
6
  require 'json'
3
7
  require 'durable/llm/errors'
4
8
  require 'durable/llm/providers/base'
9
+ require 'event_stream_parser'
5
10
 
6
11
  module Durable
7
12
  module Llm
8
13
  module Providers
14
+ # Groq provider for accessing language models via OpenAI-compatible API.
15
+ #
16
+ # Provides completion, embedding, and streaming capabilities with proper
17
+ # error handling and response normalization.
9
18
  class Groq < Durable::Llm::Providers::Base
10
19
  BASE_URL = 'https://api.groq.com/openai/v1'
11
20
 
@@ -15,21 +24,16 @@ module Durable
15
24
 
16
25
  attr_accessor :api_key
17
26
 
18
- def self.conn
19
- Faraday.new(url: BASE_URL) do |faraday|
27
+ def initialize(api_key: nil)
28
+ super
29
+ @conn = Faraday.new(url: BASE_URL) do |faraday|
20
30
  faraday.request :json
21
31
  faraday.response :json
22
32
  faraday.adapter Faraday.default_adapter
23
33
  end
24
34
  end
25
35
 
26
- def conn
27
- self.class.conn
28
- end
29
-
30
- def initialize(api_key: nil)
31
- @api_key = api_key || default_api_key
32
- end
36
+ attr_reader :conn
33
37
 
34
38
  def completion(options)
35
39
  response = conn.post('chat/completions') do |req|
@@ -46,7 +50,7 @@ module Durable
46
50
  req.body = { model: model, input: input, **options }
47
51
  end
48
52
 
49
- handle_response(response)
53
+ handle_response(response, GroqEmbeddingResponse)
50
54
  end
51
55
 
52
56
  def models
@@ -60,28 +64,94 @@ module Durable
60
64
  end
61
65
 
62
66
  def self.stream?
63
- false
67
+ true
68
+ end
69
+
70
+ def stream(options)
71
+ options[:stream] = true
72
+
73
+ response = conn.post('chat/completions') do |req|
74
+ req.headers['Authorization'] = "Bearer #{@api_key}"
75
+ req.headers['Accept'] = 'text/event-stream'
76
+
77
+ options['temperature'] = options['temperature'].to_f if options['temperature']
78
+
79
+ req.body = options
80
+
81
+ user_proc = proc do |chunk, _size, _total|
82
+ yield GroqStreamResponse.new(chunk)
83
+ end
84
+
85
+ req.options.on_data = to_json_stream(user_proc: user_proc)
86
+ end
87
+
88
+ handle_response(response)
64
89
  end
65
90
 
66
91
  private
67
92
 
68
- def handle_response(response)
93
+ # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
94
+ # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
95
+ # Given a proc, returns an outer proc that can be used to iterate over a JSON stream of chunks.
96
+ # For each chunk, the inner user_proc is called giving it the JSON object. The JSON object could
97
+ # be a data object or an error object as described in the OpenAI API documentation.
98
+ #
99
+ # @param user_proc [Proc] The inner proc to call for each JSON object in the chunk.
100
+ # @return [Proc] An outer proc that iterates over a raw stream, converting it to JSON.
101
+ def to_json_stream(user_proc:)
102
+ parser = EventStreamParser::Parser.new
103
+
104
+ proc do |chunk, _bytes, env|
105
+ if env && env.status != 200
106
+ raise_error = Faraday::Response::RaiseError.new
107
+ raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
108
+ end
109
+
110
+ parser.feed(chunk) do |_type, data|
111
+ user_proc.call(JSON.parse(data)) unless data == '[DONE]'
112
+ end
113
+ end
114
+ end
115
+
116
+ def try_parse_json(maybe_json)
117
+ JSON.parse(maybe_json)
118
+ rescue JSON::ParserError
119
+ maybe_json
120
+ end
121
+
122
+ def parse_error_message(response)
123
+ body = begin
124
+ JSON.parse(response.body)
125
+ rescue StandardError
126
+ nil
127
+ end
128
+ message = body&.dig('error', 'message') || response.body
129
+ "#{response.status} Error: #{message}"
130
+ end
131
+
132
+ # END-CODE-FROM
133
+
134
+ def handle_response(response, response_class = GroqResponse)
69
135
  case response.status
70
136
  when 200..299
71
- GroqResponse.new(response.body)
137
+ response_class.new(response.body)
72
138
  when 401
73
- raise Durable::Llm::AuthenticationError, response.body['error']['message']
139
+ raise Durable::Llm::AuthenticationError, parse_error_message(response)
74
140
  when 429
75
- raise Durable::Llm::RateLimitError, response.body['error']['message']
141
+ raise Durable::Llm::RateLimitError, parse_error_message(response)
76
142
  when 400..499
77
- raise Durable::Llm::InvalidRequestError, response.body['error']['message']
143
+ raise Durable::Llm::InvalidRequestError, parse_error_message(response)
78
144
  when 500..599
79
- raise Durable::Llm::ServerError, response.body['error']['message']
145
+ raise Durable::Llm::ServerError, parse_error_message(response)
80
146
  else
81
147
  raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
82
148
  end
83
149
  end
84
150
 
151
+ # Response object for Groq chat API responses.
152
+ #
153
+ # Wraps the raw response and provides a consistent interface for accessing
154
+ # message content, embeddings, and metadata.
85
155
  class GroqResponse
86
156
  attr_reader :raw_response
87
157
 
@@ -93,6 +163,14 @@ module Durable
93
163
  @raw_response['choices'].map { |choice| GroqChoice.new(choice) }
94
164
  end
95
165
 
166
+ def data
167
+ @raw_response['data']
168
+ end
169
+
170
+ def embedding
171
+ @raw_response.dig('data', 0, 'embedding')
172
+ end
173
+
96
174
  def to_s
97
175
  choices.map(&:to_s).join(' ')
98
176
  end
@@ -102,6 +180,9 @@ module Durable
102
180
  end
103
181
  end
104
182
 
183
+ # Represents a single choice in a Groq response.
184
+ #
185
+ # Contains the message and finish reason for the choice.
105
186
  class GroqChoice
106
187
  attr_reader :message, :finish_reason
107
188
 
@@ -115,6 +196,9 @@ module Durable
115
196
  end
116
197
  end
117
198
 
199
+ # Represents a message in a Groq conversation.
200
+ #
201
+ # Messages have a role (user, assistant, system) and text content.
118
202
  class GroqMessage
119
203
  attr_reader :role, :content
120
204
 
@@ -128,27 +212,31 @@ module Durable
128
212
  end
129
213
  end
130
214
 
215
+ # Response object for streaming Groq chat chunks.
216
+ #
217
+ # Wraps individual chunks from the Server-Sent Events stream.
131
218
  class GroqStreamResponse
132
219
  attr_reader :choices
133
220
 
134
- def initialize(fragment)
135
- json_frag = fragment.split('data: ').last.strip
136
- puts json_frag
137
- parsed = JSON.parse(json_frag)
138
- @choices = parsed['choices'].map { |choice| GroqStreamChoice.new(choice) }
221
+ def initialize(parsed)
222
+ @choices = GroqStreamChoice.new(parsed['choices'])
139
223
  end
140
224
 
141
225
  def to_s
142
- @choices.map(&:to_s).join(' ')
226
+ @choices.to_s
143
227
  end
144
228
  end
145
229
 
230
+ # Represents a single choice in a streaming Groq response chunk.
231
+ #
232
+ # Contains the delta (incremental content) and finish reason for the choice.
146
233
  class GroqStreamChoice
147
234
  attr_reader :delta, :finish_reason
148
235
 
149
236
  def initialize(choice)
150
- @delta = GroqStreamDelta.new(choice['delta'])
151
- @finish_reason = choice['finish_reason']
237
+ @choice = [choice].flatten.first
238
+ @delta = GroqStreamDelta.new(@choice['delta'])
239
+ @finish_reason = @choice['finish_reason']
152
240
  end
153
241
 
154
242
  def to_s
@@ -156,6 +244,9 @@ module Durable
156
244
  end
157
245
  end
158
246
 
247
+ # Represents the incremental content delta in a streaming response.
248
+ #
249
+ # Contains the role and text content of the delta.
159
250
  class GroqStreamDelta
160
251
  attr_reader :role, :content
161
252
 
@@ -168,7 +259,24 @@ module Durable
168
259
  @content || ''
169
260
  end
170
261
  end
262
+
263
+ # Response object for Groq embedding API responses.
264
+ #
265
+ # Wraps embedding data and provides array access to the vector representation.
266
+ class GroqEmbeddingResponse
267
+ attr_reader :embedding
268
+
269
+ def initialize(data)
270
+ @embedding = data.dig('data', 0, 'embedding')
271
+ end
272
+
273
+ def to_a
274
+ @embedding
275
+ end
276
+ end
171
277
  end
172
278
  end
173
279
  end
174
280
  end
281
+
282
+ # Copyright (c) 2025 Durable Programming, LLC. All rights reserved.
@@ -1,13 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file implements the Hugging Face provider for accessing Hugging Face's inference API models.
4
+
1
5
  require 'faraday'
2
6
  require 'json'
3
7
  require 'durable/llm/errors'
4
8
  require 'durable/llm/providers/base'
9
+ require 'event_stream_parser'
5
10
 
6
11
  module Durable
7
12
  module Llm
8
13
  module Providers
14
+ # Hugging Face provider for accessing Hugging Face's inference API models.
15
+ #
16
+ # Provides completion, embedding, and streaming capabilities with authentication
17
+ # handling, error management, and response normalization.
9
18
  class Huggingface < Durable::Llm::Providers::Base
10
- BASE_URL = 'https://api-inference.huggingface.co/models'
19
+ BASE_URL = 'https://api-inference.huggingface.co'
11
20
 
12
21
  def default_api_key
13
22
  Durable::Llm.configuration.huggingface&.api_key || ENV['HUGGINGFACE_API_KEY']
@@ -22,11 +31,12 @@ module Durable
22
31
  faraday.response :json
23
32
  faraday.adapter Faraday.default_adapter
24
33
  end
34
+ super()
25
35
  end
26
36
 
27
37
  def completion(options)
28
38
  model = options.delete(:model) || 'gpt2'
29
- response = @conn.post("/#{model}") do |req|
39
+ response = @conn.post("models/#{model}") do |req|
30
40
  req.headers['Authorization'] = "Bearer #{@api_key}"
31
41
  req.body = options
32
42
  end
@@ -34,33 +44,84 @@ module Durable
34
44
  handle_response(response)
35
45
  end
36
46
 
47
+ def embedding(model:, input:, **options)
48
+ response = @conn.post("models/#{model}") do |req|
49
+ req.headers['Authorization'] = "Bearer #{@api_key}"
50
+ req.body = { inputs: input, **options }
51
+ end
52
+
53
+ handle_response(response, HuggingfaceEmbeddingResponse)
54
+ end
55
+
37
56
  def models
38
57
  self.class.models
39
58
  end
40
59
 
60
+ def self.stream?
61
+ true
62
+ end
63
+
64
+ def stream(options)
65
+ model = options.delete(:model) || 'gpt2'
66
+ options[:stream] = true
67
+
68
+ @conn.post("models/#{model}") do |req|
69
+ req.headers['Authorization'] = "Bearer #{@api_key}"
70
+ req.headers['Accept'] = 'text/event-stream'
71
+ req.body = options
72
+ req.options.on_data = to_json_stream(user_proc: proc { |chunk|
73
+ yield HuggingfaceStreamResponse.new(chunk)
74
+ })
75
+ end
76
+ end
77
+
41
78
  def self.models
42
79
  %w[gpt2 bert-base-uncased distilbert-base-uncased] # could use expansion
43
80
  end
44
81
 
45
82
  private
46
83
 
47
- def handle_response(response)
48
- case response.status
49
- when 200..299
50
- HuggingfaceResponse.new(response.body)
51
- when 401
52
- raise Durable::Llm::AuthenticationError, response.body['error']
53
- when 429
54
- raise Durable::Llm::RateLimitError, response.body['error']
55
- when 400..499
56
- raise Durable::Llm::InvalidRequestError, response.body['error']
57
- when 500..599
58
- raise Durable::Llm::ServerError, response.body['error']
59
- else
60
- raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
84
+ # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
85
+ # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
86
+ def to_json_stream(user_proc:)
87
+ parser = EventStreamParser::Parser.new
88
+
89
+ proc do |chunk, _bytes, env|
90
+ if env && env.status != 200
91
+ raise_error = Faraday::Response::RaiseError.new
92
+ raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
93
+ end
94
+
95
+ parser.feed(chunk) do |_type, data|
96
+ user_proc.call(JSON.parse(data)) unless data == '[DONE]'
97
+ end
98
+ end
99
+ end
100
+
101
+ def try_parse_json(maybe_json)
102
+ JSON.parse(maybe_json)
103
+ rescue JSON::ParserError
104
+ maybe_json
105
+ end
106
+
107
+ def handle_response(response, response_class = HuggingfaceResponse)
108
+ return response_class.new(response.body) if (200..299).cover?(response.status)
109
+
110
+ error_class = error_class_for_status(response.status)
111
+ raise error_class, response.body['error'] || "HTTP #{response.status}"
112
+ end
113
+
114
+ def error_class_for_status(status)
115
+ case status
116
+ when 401 then Durable::Llm::AuthenticationError
117
+ when 429 then Durable::Llm::RateLimitError
118
+ when 400..499 then Durable::Llm::InvalidRequestError
119
+ when 500..599 then Durable::Llm::ServerError
120
+ else Durable::Llm::APIError
61
121
  end
62
122
  end
63
123
 
124
+ # Response wrapper for Hugging Face completion API responses.
64
125
  class HuggingfaceResponse
65
126
  attr_reader :raw_response
66
127
 
@@ -69,7 +130,7 @@ module Durable
69
130
  end
70
131
 
71
132
  def choices
72
- [@raw_response.first].map { |choice| HuggingfaceChoice.new(choice) }
133
+ [HuggingfaceChoice.new(@raw_response)]
73
134
  end
74
135
 
75
136
  def to_s
@@ -77,6 +138,7 @@ module Durable
77
138
  end
78
139
  end
79
140
 
141
+ # Individual choice from Hugging Face completion response.
80
142
  class HuggingfaceChoice
81
143
  attr_reader :text
82
144
 
@@ -88,7 +150,48 @@ module Durable
88
150
  @text
89
151
  end
90
152
  end
153
+
154
+ # Response wrapper for Hugging Face embedding API responses.
155
+ class HuggingfaceEmbeddingResponse
156
+ attr_reader :embedding
157
+
158
+ def initialize(data)
159
+ @embedding = data
160
+ end
161
+
162
+ def to_a
163
+ @embedding
164
+ end
165
+ end
166
+
167
+ # Response wrapper for Hugging Face streaming API responses.
168
+ class HuggingfaceStreamResponse
169
+ attr_reader :token
170
+
171
+ def initialize(parsed)
172
+ @token = HuggingfaceStreamToken.new(parsed)
173
+ end
174
+
175
+ def to_s
176
+ @token.to_s
177
+ end
178
+ end
179
+
180
+ # Individual token from Hugging Face streaming response.
181
+ class HuggingfaceStreamToken
182
+ attr_reader :text
183
+
184
+ def initialize(token)
185
+ @text = token['token']['text']
186
+ end
187
+
188
+ def to_s
189
+ @text || ''
190
+ end
191
+ end
91
192
  end
92
193
  end
93
194
  end
94
195
  end
196
+
197
+ # Copyright (c) 2025 Durable Programming, LLC. All rights reserved.