durable-llm 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +7 -0
  3. data/CHANGELOG.md +5 -0
  4. data/CONFIGURE.md +132 -0
  5. data/Gemfile +7 -9
  6. data/Gemfile.lock +3 -3
  7. data/README.md +1 -0
  8. data/Rakefile +6 -6
  9. data/devenv.lock +103 -0
  10. data/devenv.nix +9 -0
  11. data/devenv.yaml +15 -0
  12. data/durable-llm.gemspec +44 -0
  13. data/examples/openai_quick_complete.rb +3 -1
  14. data/lib/durable/llm/cli.rb +247 -60
  15. data/lib/durable/llm/client.rb +92 -11
  16. data/lib/durable/llm/configuration.rb +174 -23
  17. data/lib/durable/llm/errors.rb +185 -0
  18. data/lib/durable/llm/providers/anthropic.rb +246 -36
  19. data/lib/durable/llm/providers/azure_openai.rb +347 -0
  20. data/lib/durable/llm/providers/base.rb +106 -9
  21. data/lib/durable/llm/providers/cohere.rb +227 -0
  22. data/lib/durable/llm/providers/deepseek.rb +233 -0
  23. data/lib/durable/llm/providers/fireworks.rb +278 -0
  24. data/lib/durable/llm/providers/google.rb +301 -0
  25. data/lib/durable/llm/providers/groq.rb +108 -29
  26. data/lib/durable/llm/providers/huggingface.rb +122 -18
  27. data/lib/durable/llm/providers/mistral.rb +431 -0
  28. data/lib/durable/llm/providers/openai.rb +162 -25
  29. data/lib/durable/llm/providers/opencode.rb +253 -0
  30. data/lib/durable/llm/providers/openrouter.rb +256 -0
  31. data/lib/durable/llm/providers/perplexity.rb +273 -0
  32. data/lib/durable/llm/providers/together.rb +346 -0
  33. data/lib/durable/llm/providers/xai.rb +355 -0
  34. data/lib/durable/llm/providers.rb +103 -15
  35. data/lib/durable/llm/version.rb +5 -1
  36. data/lib/durable/llm.rb +143 -3
  37. data/lib/durable.rb +29 -4
  38. data/sig/durable/llm.rbs +302 -1
  39. metadata +50 -36
@@ -1,7 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Groq provider for OpenAI-compatible API access.
4
+
1
5
  require 'faraday'
2
6
  require 'json'
3
7
  require 'durable/llm/errors'
4
8
  require 'durable/llm/providers/base'
9
+ require 'event_stream_parser'
5
10
 
6
11
  module Durable
7
12
  module Llm
@@ -15,20 +20,16 @@ module Durable
15
20
 
16
21
  attr_accessor :api_key
17
22
 
18
- def self.conn
19
- Faraday.new(url: BASE_URL) do |faraday|
23
+ def initialize(api_key: nil)
24
+ super
25
+ @conn = Faraday.new(url: BASE_URL) do |faraday|
20
26
  faraday.request :json
21
27
  faraday.response :json
22
28
  faraday.adapter Faraday.default_adapter
23
29
  end
24
30
  end
25
- def conn
26
- self.class.conn
27
- end
28
31
 
29
- def initialize(api_key: nil)
30
- @api_key = api_key || default_api_key
31
- end
32
+ attr_reader :conn
32
33
 
33
34
  def completion(options)
34
35
  response = conn.post('chat/completions') do |req|
@@ -45,12 +46,10 @@ module Durable
45
46
  req.body = { model: model, input: input, **options }
46
47
  end
47
48
 
48
- handle_response(response)
49
+ handle_response(response, GroqEmbeddingResponse)
49
50
  end
50
51
 
51
52
  def models
52
-
53
-
54
53
  response = conn.get('models') do |req|
55
54
  req.headers['Authorization'] = "Bearer #{@api_key}"
56
55
  end
@@ -59,28 +58,87 @@ module Durable
59
58
 
60
59
  resp['data'].map { |model| model['id'] }
61
60
  end
62
- def self.models
63
- Groq.new.models
64
- end
65
61
 
66
62
  def self.stream?
67
- false
63
+ true
64
+ end
65
+
66
+ def stream(options)
67
+ options[:stream] = true
68
+
69
+ response = conn.post('chat/completions') do |req|
70
+ req.headers['Authorization'] = "Bearer #{@api_key}"
71
+ req.headers['Accept'] = 'text/event-stream'
72
+
73
+ options['temperature'] = options['temperature'].to_f if options['temperature']
74
+
75
+ req.body = options
76
+
77
+ user_proc = proc do |chunk, _size, _total|
78
+ yield GroqStreamResponse.new(chunk)
79
+ end
80
+
81
+ req.options.on_data = to_json_stream(user_proc: user_proc)
82
+ end
83
+
84
+ handle_response(response)
68
85
  end
69
86
 
70
87
  private
71
88
 
72
- def handle_response(response)
89
+ # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
90
+ # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
91
+ # Given a proc, returns an outer proc that can be used to iterate over a JSON stream of chunks.
92
+ # For each chunk, the inner user_proc is called giving it the JSON object. The JSON object could
93
+ # be a data object or an error object as described in the OpenAI API documentation.
94
+ #
95
+ # @param user_proc [Proc] The inner proc to call for each JSON object in the chunk.
96
+ # @return [Proc] An outer proc that iterates over a raw stream, converting it to JSON.
97
+ def to_json_stream(user_proc:)
98
+ parser = EventStreamParser::Parser.new
99
+
100
+ proc do |chunk, _bytes, env|
101
+ if env && env.status != 200
102
+ raise_error = Faraday::Response::RaiseError.new
103
+ raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
104
+ end
105
+
106
+ parser.feed(chunk) do |_type, data|
107
+ user_proc.call(JSON.parse(data)) unless data == '[DONE]'
108
+ end
109
+ end
110
+ end
111
+
112
+ def try_parse_json(maybe_json)
113
+ JSON.parse(maybe_json)
114
+ rescue JSON::ParserError
115
+ maybe_json
116
+ end
117
+
118
+ def parse_error_message(response)
119
+ body = begin
120
+ JSON.parse(response.body)
121
+ rescue StandardError
122
+ nil
123
+ end
124
+ message = body&.dig('error', 'message') || response.body
125
+ "#{response.status} Error: #{message}"
126
+ end
127
+
128
+ # END-CODE-FROM
129
+
130
+ def handle_response(response, response_class = GroqResponse)
73
131
  case response.status
74
132
  when 200..299
75
- GroqResponse.new(response.body)
133
+ response_class.new(response.body)
76
134
  when 401
77
- raise Durable::Llm::AuthenticationError, response.body['error']['message']
135
+ raise Durable::Llm::AuthenticationError, parse_error_message(response)
78
136
  when 429
79
- raise Durable::Llm::RateLimitError, response.body['error']['message']
137
+ raise Durable::Llm::RateLimitError, parse_error_message(response)
80
138
  when 400..499
81
- raise Durable::Llm::InvalidRequestError, response.body['error']['message']
139
+ raise Durable::Llm::InvalidRequestError, parse_error_message(response)
82
140
  when 500..599
83
- raise Durable::Llm::ServerError, response.body['error']['message']
141
+ raise Durable::Llm::ServerError, parse_error_message(response)
84
142
  else
85
143
  raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
86
144
  end
@@ -97,9 +155,18 @@ module Durable
97
155
  @raw_response['choices'].map { |choice| GroqChoice.new(choice) }
98
156
  end
99
157
 
158
+ def data
159
+ @raw_response['data']
160
+ end
161
+
162
+ def embedding
163
+ @raw_response.dig('data', 0, 'embedding')
164
+ end
165
+
100
166
  def to_s
101
167
  choices.map(&:to_s).join(' ')
102
168
  end
169
+
103
170
  def to_h
104
171
  @raw_response.dup
105
172
  end
@@ -134,15 +201,12 @@ module Durable
134
201
  class GroqStreamResponse
135
202
  attr_reader :choices
136
203
 
137
- def initialize(fragment)
138
- json_frag = fragment.split("data: ").last.strip
139
- puts json_frag
140
- parsed = JSON.parse(json_frag)
141
- @choices = parsed['choices'].map { |choice| GroqStreamChoice.new(choice) }
204
+ def initialize(parsed)
205
+ @choices = GroqStreamChoice.new(parsed['choices'])
142
206
  end
143
207
 
144
208
  def to_s
145
- @choices.map(&:to_s).join(' ')
209
+ @choices.to_s
146
210
  end
147
211
  end
148
212
 
@@ -150,8 +214,9 @@ module Durable
150
214
  attr_reader :delta, :finish_reason
151
215
 
152
216
  def initialize(choice)
153
- @delta = GroqStreamDelta.new(choice['delta'])
154
- @finish_reason = choice['finish_reason']
217
+ @choice = [choice].flatten.first
218
+ @delta = GroqStreamDelta.new(@choice['delta'])
219
+ @finish_reason = @choice['finish_reason']
155
220
  end
156
221
 
157
222
  def to_s
@@ -171,7 +236,21 @@ module Durable
171
236
  @content || ''
172
237
  end
173
238
  end
239
+
240
+ class GroqEmbeddingResponse
241
+ attr_reader :embedding
242
+
243
+ def initialize(data)
244
+ @embedding = data.dig('data', 0, 'embedding')
245
+ end
246
+
247
+ def to_a
248
+ @embedding
249
+ end
250
+ end
174
251
  end
175
252
  end
176
253
  end
177
254
  end
255
+
256
+ # Copyright (c) 2025 Durable Programming, LLC. All rights reserved.
@@ -1,13 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file implements the Hugging Face provider for accessing Hugging Face's inference API models.
4
+
1
5
  require 'faraday'
2
6
  require 'json'
3
7
  require 'durable/llm/errors'
4
8
  require 'durable/llm/providers/base'
9
+ require 'event_stream_parser'
5
10
 
6
11
  module Durable
7
12
  module Llm
8
13
  module Providers
14
+ # Hugging Face provider for accessing Hugging Face's inference API models.
15
+ #
16
+ # Provides completion, embedding, and streaming capabilities with authentication
17
+ # handling, error management, and response normalization.
9
18
  class Huggingface < Durable::Llm::Providers::Base
10
- BASE_URL = 'https://api-inference.huggingface.co/models'
19
+ BASE_URL = 'https://api-inference.huggingface.co'
11
20
 
12
21
  def default_api_key
13
22
  Durable::Llm.configuration.huggingface&.api_key || ENV['HUGGINGFACE_API_KEY']
@@ -22,11 +31,12 @@ module Durable
22
31
  faraday.response :json
23
32
  faraday.adapter Faraday.default_adapter
24
33
  end
34
+ super()
25
35
  end
26
36
 
27
37
  def completion(options)
28
38
  model = options.delete(:model) || 'gpt2'
29
- response = @conn.post("/#{model}") do |req|
39
+ response = @conn.post("models/#{model}") do |req|
30
40
  req.headers['Authorization'] = "Bearer #{@api_key}"
31
41
  req.body = options
32
42
  end
@@ -34,32 +44,84 @@ module Durable
34
44
  handle_response(response)
35
45
  end
36
46
 
47
+ def embedding(model:, input:, **options)
48
+ response = @conn.post("models/#{model}") do |req|
49
+ req.headers['Authorization'] = "Bearer #{@api_key}"
50
+ req.body = { inputs: input, **options }
51
+ end
52
+
53
+ handle_response(response, HuggingfaceEmbeddingResponse)
54
+ end
55
+
37
56
  def models
38
57
  self.class.models
39
58
  end
59
+
60
+ def self.stream?
61
+ true
62
+ end
63
+
64
+ def stream(options)
65
+ model = options.delete(:model) || 'gpt2'
66
+ options[:stream] = true
67
+
68
+ @conn.post("models/#{model}") do |req|
69
+ req.headers['Authorization'] = "Bearer #{@api_key}"
70
+ req.headers['Accept'] = 'text/event-stream'
71
+ req.body = options
72
+ req.options.on_data = to_json_stream(user_proc: proc { |chunk|
73
+ yield HuggingfaceStreamResponse.new(chunk)
74
+ })
75
+ end
76
+ end
77
+
40
78
  def self.models
41
- ['gpt2', 'bert-base-uncased', 'distilbert-base-uncased'] # could use expansion
79
+ %w[gpt2 bert-base-uncased distilbert-base-uncased] # could use expansion
42
80
  end
43
81
 
44
82
  private
45
83
 
46
- def handle_response(response)
47
- case response.status
48
- when 200..299
49
- HuggingfaceResponse.new(response.body)
50
- when 401
51
- raise Durable::Llm::AuthenticationError, response.body['error']
52
- when 429
53
- raise Durable::Llm::RateLimitError, response.body['error']
54
- when 400..499
55
- raise Durable::Llm::InvalidRequestError, response.body['error']
56
- when 500..599
57
- raise Durable::Llm::ServerError, response.body['error']
58
- else
59
- raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
84
+ # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
85
+ # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
86
+ def to_json_stream(user_proc:)
87
+ parser = EventStreamParser::Parser.new
88
+
89
+ proc do |chunk, _bytes, env|
90
+ if env && env.status != 200
91
+ raise_error = Faraday::Response::RaiseError.new
92
+ raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
93
+ end
94
+
95
+ parser.feed(chunk) do |_type, data|
96
+ user_proc.call(JSON.parse(data)) unless data == '[DONE]'
97
+ end
60
98
  end
61
99
  end
62
100
 
101
+ def try_parse_json(maybe_json)
102
+ JSON.parse(maybe_json)
103
+ rescue JSON::ParserError
104
+ maybe_json
105
+ end
106
+
107
+ def handle_response(response, response_class = HuggingfaceResponse)
108
+ return response_class.new(response.body) if (200..299).cover?(response.status)
109
+
110
+ error_class = error_class_for_status(response.status)
111
+ raise error_class, response.body['error'] || "HTTP #{response.status}"
112
+ end
113
+
114
+ def error_class_for_status(status)
115
+ case status
116
+ when 401 then Durable::Llm::AuthenticationError
117
+ when 429 then Durable::Llm::RateLimitError
118
+ when 400..499 then Durable::Llm::InvalidRequestError
119
+ when 500..599 then Durable::Llm::ServerError
120
+ else Durable::Llm::APIError
121
+ end
122
+ end
123
+
124
+ # Response wrapper for Hugging Face completion API responses.
63
125
  class HuggingfaceResponse
64
126
  attr_reader :raw_response
65
127
 
@@ -68,7 +130,7 @@ module Durable
68
130
  end
69
131
 
70
132
  def choices
71
- [@raw_response.first].map { |choice| HuggingfaceChoice.new(choice) }
133
+ [HuggingfaceChoice.new(@raw_response)]
72
134
  end
73
135
 
74
136
  def to_s
@@ -76,6 +138,7 @@ module Durable
76
138
  end
77
139
  end
78
140
 
141
+ # Individual choice from Hugging Face completion response.
79
142
  class HuggingfaceChoice
80
143
  attr_reader :text
81
144
 
@@ -87,7 +150,48 @@ module Durable
87
150
  @text
88
151
  end
89
152
  end
153
+
154
+ # Response wrapper for Hugging Face embedding API responses.
155
+ class HuggingfaceEmbeddingResponse
156
+ attr_reader :embedding
157
+
158
+ def initialize(data)
159
+ @embedding = data
160
+ end
161
+
162
+ def to_a
163
+ @embedding
164
+ end
165
+ end
166
+
167
+ # Response wrapper for Hugging Face streaming API responses.
168
+ class HuggingfaceStreamResponse
169
+ attr_reader :token
170
+
171
+ def initialize(parsed)
172
+ @token = HuggingfaceStreamToken.new(parsed)
173
+ end
174
+
175
+ def to_s
176
+ @token.to_s
177
+ end
178
+ end
179
+
180
+ # Individual token from Hugging Face streaming response.
181
+ class HuggingfaceStreamToken
182
+ attr_reader :text
183
+
184
+ def initialize(token)
185
+ @text = token['token']['text']
186
+ end
187
+
188
+ def to_s
189
+ @text || ''
190
+ end
191
+ end
90
192
  end
91
193
  end
92
194
  end
93
195
  end
196
+
197
+ # Copyright (c) 2025 Durable Programming, LLC. All rights reserved.