durable-llm 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +7 -0
  3. data/CHANGELOG.md +5 -0
  4. data/CONFIGURE.md +132 -0
  5. data/Gemfile +7 -9
  6. data/Gemfile.lock +3 -3
  7. data/README.md +1 -0
  8. data/Rakefile +6 -6
  9. data/devenv.lock +103 -0
  10. data/devenv.nix +9 -0
  11. data/devenv.yaml +15 -0
  12. data/durable-llm.gemspec +44 -0
  13. data/examples/openai_quick_complete.rb +3 -1
  14. data/lib/durable/llm/cli.rb +247 -60
  15. data/lib/durable/llm/client.rb +92 -11
  16. data/lib/durable/llm/configuration.rb +174 -23
  17. data/lib/durable/llm/errors.rb +185 -0
  18. data/lib/durable/llm/providers/anthropic.rb +246 -36
  19. data/lib/durable/llm/providers/azure_openai.rb +347 -0
  20. data/lib/durable/llm/providers/base.rb +106 -9
  21. data/lib/durable/llm/providers/cohere.rb +227 -0
  22. data/lib/durable/llm/providers/deepseek.rb +233 -0
  23. data/lib/durable/llm/providers/fireworks.rb +278 -0
  24. data/lib/durable/llm/providers/google.rb +301 -0
  25. data/lib/durable/llm/providers/groq.rb +108 -29
  26. data/lib/durable/llm/providers/huggingface.rb +122 -18
  27. data/lib/durable/llm/providers/mistral.rb +431 -0
  28. data/lib/durable/llm/providers/openai.rb +162 -25
  29. data/lib/durable/llm/providers/opencode.rb +253 -0
  30. data/lib/durable/llm/providers/openrouter.rb +256 -0
  31. data/lib/durable/llm/providers/perplexity.rb +273 -0
  32. data/lib/durable/llm/providers/together.rb +346 -0
  33. data/lib/durable/llm/providers/xai.rb +355 -0
  34. data/lib/durable/llm/providers.rb +103 -15
  35. data/lib/durable/llm/version.rb +5 -1
  36. data/lib/durable/llm.rb +143 -3
  37. data/lib/durable.rb +29 -4
  38. data/sig/durable/llm.rbs +302 -1
  39. metadata +50 -36
@@ -0,0 +1,278 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file implements the Fireworks AI provider for accessing Fireworks AI's language models through their API, providing completion, embedding, and streaming capabilities with authentication handling, error management, and response normalization. It establishes HTTP connections to Fireworks AI's API endpoint, processes chat completions and embeddings, handles various API error responses, and includes comprehensive response classes to format Fireworks AI's API responses into a consistent interface.
4
+
5
+ require 'faraday'
6
+ require 'json'
7
+ require 'event_stream_parser'
8
+ require 'durable/llm/errors'
9
+ require 'durable/llm/providers/base'
10
+
11
+ module Durable
12
+ module Llm
13
+ module Providers
14
+ class Fireworks < Durable::Llm::Providers::Base
15
+ BASE_URL = 'https://api.fireworks.ai/inference/v1'
16
+
17
+ def default_api_key
18
+ Durable::Llm.configuration.fireworks&.api_key || ENV['FIREWORKS_API_KEY']
19
+ end
20
+
21
+ attr_accessor :api_key
22
+
23
+ # Initializes a new Fireworks provider instance.
24
+ #
25
+ # @param api_key [String, nil] The API key for Fireworks AI. If not provided, uses the default from configuration or environment.
26
+ # @return [Fireworks] A new instance of the Fireworks provider.
27
+ def initialize(api_key: nil)
28
+ super()
29
+ @api_key = api_key || default_api_key
30
+ @conn = Faraday.new(url: BASE_URL) do |faraday|
31
+ faraday.request :json
32
+ faraday.response :json
33
+ faraday.adapter Faraday.default_adapter
34
+ end
35
+ end
36
+
37
+ # Performs a chat completion request to Fireworks AI.
38
+ #
39
+ # @param options [Hash] The completion options including model, messages, temperature, etc.
40
+ # @return [FireworksResponse] The response object containing the completion results.
41
+ # @raise [Durable::Llm::AuthenticationError] If authentication fails.
42
+ # @raise [Durable::Llm::RateLimitError] If rate limit is exceeded.
43
+ # @raise [Durable::Llm::InvalidRequestError] If the request is invalid.
44
+ # @raise [Durable::Llm::ServerError] If there's a server error.
45
+ def completion(options)
46
+ response = @conn.post('chat/completions') do |req|
47
+ req.headers['Authorization'] = "Bearer #{@api_key}"
48
+ req.body = options
49
+ end
50
+
51
+ handle_response(response)
52
+ end
53
+
54
+ # Generates embeddings for the given input using Fireworks AI.
55
+ #
56
+ # @param model [String] The model to use for generating embeddings.
57
+ # @param input [String, Array<String>] The text input(s) to embed.
58
+ # @param options [Hash] Additional options for the embedding request.
59
+ # @return [FireworksEmbeddingResponse] The response object containing the embeddings.
60
+ # @raise [Durable::Llm::AuthenticationError] If authentication fails.
61
+ # @raise [Durable::Llm::RateLimitError] If rate limit is exceeded.
62
+ # @raise [Durable::Llm::InvalidRequestError] If the request is invalid.
63
+ # @raise [Durable::Llm::ServerError] If there's a server error.
64
+ def embedding(model:, input:, **options)
65
+ response = @conn.post('embeddings') do |req|
66
+ req.headers['Authorization'] = "Bearer #{@api_key}"
67
+ req.body = { model: model, input: input, **options }
68
+ end
69
+
70
+ handle_response(response, FireworksEmbeddingResponse)
71
+ end
72
+
73
+ # Retrieves the list of available models from Fireworks AI.
74
+ #
75
+ # @return [Array<String>] An array of model IDs available for use.
76
+ # @raise [Durable::Llm::AuthenticationError] If authentication fails.
77
+ # @raise [Durable::Llm::RateLimitError] If rate limit is exceeded.
78
+ # @raise [Durable::Llm::InvalidRequestError] If the request is invalid.
79
+ # @raise [Durable::Llm::ServerError] If there's a server error.
80
+ def models
81
+ response = @conn.get('models') do |req|
82
+ req.headers['Authorization'] = "Bearer #{@api_key}"
83
+ end
84
+
85
+ handle_response(response).data.map { |model| model['id'] }
86
+ end
87
+
88
+ def self.stream?
89
+ true
90
+ end
91
+
92
+ # Performs a streaming chat completion request to Fireworks AI.
93
+ #
94
+ # @param options [Hash] The completion options including model, messages, temperature, etc.
95
+ # @yield [FireworksStreamResponse] Yields each chunk of the streaming response.
96
+ # @return [nil] Returns nil after streaming is complete.
97
+ # @raise [Durable::Llm::AuthenticationError] If authentication fails.
98
+ # @raise [Durable::Llm::RateLimitError] If rate limit is exceeded.
99
+ # @raise [Durable::Llm::InvalidRequestError] If the request is invalid.
100
+ # @raise [Durable::Llm::ServerError] If there's a server error.
101
+ def stream(options)
102
+ options[:stream] = true
103
+
104
+ @conn.post('chat/completions') do |req|
105
+ req.headers['Authorization'] = "Bearer #{@api_key}"
106
+ req.headers['Accept'] = 'text/event-stream'
107
+
108
+ options['temperature'] = options['temperature'].to_f if options['temperature']
109
+
110
+ req.body = options
111
+
112
+ user_proc = proc do |chunk, _size, _total|
113
+ yield FireworksStreamResponse.new(chunk)
114
+ end
115
+
116
+ req.options.on_data = to_json_stream(user_proc: user_proc)
117
+ end
118
+
119
+ # For streaming, errors are handled in to_json_stream, no need for handle_response
120
+ nil
121
+ end
122
+
123
+ private
124
+
125
+ # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
126
+ # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
127
+ def to_json_stream(user_proc:)
128
+ parser = EventStreamParser::Parser.new
129
+
130
+ proc do |chunk, _bytes, env|
131
+ if env && env.status != 200
132
+ raise_error = Faraday::Response::RaiseError.new
133
+ raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
134
+ end
135
+
136
+ parser.feed(chunk) do |_type, data|
137
+ user_proc.call(JSON.parse(data)) unless data == '[DONE]'
138
+ end
139
+ end
140
+ end
141
+
142
+ def try_parse_json(maybe_json)
143
+ JSON.parse(maybe_json)
144
+ rescue JSON::ParserError
145
+ maybe_json
146
+ end
147
+
148
+ # END-CODE-FROM
149
+
150
+ def handle_response(response, response_class = FireworksResponse)
151
+ case response.status
152
+ when 200..299
153
+ response_class.new(response.body)
154
+ when 401
155
+ raise Durable::Llm::AuthenticationError, parse_error_message(response)
156
+ when 429
157
+ raise Durable::Llm::RateLimitError, parse_error_message(response)
158
+ when 400..499
159
+ raise Durable::Llm::InvalidRequestError, parse_error_message(response)
160
+ when 500..599
161
+ raise Durable::Llm::ServerError, parse_error_message(response)
162
+ else
163
+ raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
164
+ end
165
+ end
166
+
167
+ def parse_error_message(response)
168
+ body = begin
169
+ JSON.parse(response.body)
170
+ rescue StandardError
171
+ nil
172
+ end
173
+ message = body&.dig('error', 'message') || response.body
174
+ "#{response.status} Error: #{message}"
175
+ end
176
+
177
+ class FireworksResponse
178
+ attr_reader :raw_response
179
+
180
+ def initialize(response)
181
+ @raw_response = response
182
+ end
183
+
184
+ def choices
185
+ @raw_response['choices'].map { |choice| FireworksChoice.new(choice) }
186
+ end
187
+
188
+ def data
189
+ @raw_response['data']
190
+ end
191
+
192
+ def to_s
193
+ choices.map(&:to_s).join(' ')
194
+ end
195
+ end
196
+
197
+ class FireworksChoice
198
+ attr_reader :message, :finish_reason
199
+
200
+ def initialize(choice)
201
+ @message = FireworksMessage.new(choice['message'])
202
+ @finish_reason = choice['finish_reason']
203
+ end
204
+
205
+ def to_s
206
+ @message.to_s
207
+ end
208
+ end
209
+
210
+ class FireworksMessage
211
+ attr_reader :role, :content
212
+
213
+ def initialize(message)
214
+ @role = message['role']
215
+ @content = message['content']
216
+ end
217
+
218
+ def to_s
219
+ @content
220
+ end
221
+ end
222
+
223
+ class FireworksStreamResponse
224
+ attr_reader :choices
225
+
226
+ def initialize(parsed)
227
+ @choices = FireworksStreamChoice.new(parsed['choices'])
228
+ end
229
+
230
+ def to_s
231
+ @choices.to_s
232
+ end
233
+ end
234
+
235
+ class FireworksEmbeddingResponse
236
+ attr_reader :embedding
237
+
238
+ def initialize(data)
239
+ @embedding = data.dig('data', 0, 'embedding')
240
+ end
241
+
242
+ def to_a
243
+ @embedding
244
+ end
245
+ end
246
+
247
+ class FireworksStreamChoice
248
+ attr_reader :delta, :finish_reason
249
+
250
+ def initialize(choice)
251
+ @choice = [choice].flatten.first
252
+ @delta = FireworksStreamDelta.new(@choice['delta'])
253
+ @finish_reason = @choice['finish_reason']
254
+ end
255
+
256
+ def to_s
257
+ @delta.to_s
258
+ end
259
+ end
260
+
261
+ class FireworksStreamDelta
262
+ attr_reader :role, :content
263
+
264
+ def initialize(delta)
265
+ @role = delta['role']
266
+ @content = delta['content']
267
+ end
268
+
269
+ def to_s
270
+ @content || ''
271
+ end
272
+ end
273
+ end
274
+ end
275
+ end
276
+ end
277
+
278
+ # Copyright (c) 2025 Durable Programming, LLC. All rights reserved.
@@ -0,0 +1,301 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file implements the Google provider for accessing Google's Gemini language models through their API, providing completion capabilities with authentication handling, error management, and response normalization. It establishes HTTP connections to Google's Generative Language API endpoint, processes generateContent requests with text content, handles various API error responses, and includes comprehensive response classes to format Google's API responses into a consistent interface.
4
+
5
+ require 'faraday'
6
+ require 'json'
7
+ require 'durable/llm/errors'
8
+ require 'durable/llm/providers/base'
9
+ require 'event_stream_parser'
10
+
11
+ module Durable
12
+ module Llm
13
+ module Providers
14
+ class Google < Durable::Llm::Providers::Base
15
+ BASE_URL = 'https://generativelanguage.googleapis.com'
16
+
17
+ def default_api_key
18
+ begin
19
+ Durable::Llm.configuration.google&.api_key
20
+ rescue NoMethodError
21
+ nil
22
+ end || ENV['GOOGLE_API_KEY']
23
+ end
24
+
25
+ attr_accessor :api_key
26
+
27
+ def initialize(api_key: nil)
28
+ @api_key = api_key || default_api_key
29
+ @conn = Faraday.new(url: BASE_URL) do |faraday|
30
+ faraday.request :json
31
+ faraday.response :json
32
+ faraday.adapter Faraday.default_adapter
33
+ end
34
+ end
35
+
36
+ def completion(options)
37
+ model = options[:model]
38
+ url = "/v1beta/models/#{model}:generateContent?key=#{@api_key}"
39
+
40
+ # Transform options to Google's format
41
+ request_body = transform_options(options)
42
+
43
+ response = @conn.post(url) do |req|
44
+ req.body = request_body
45
+ end
46
+
47
+ handle_response(response)
48
+ end
49
+
50
+ def embedding(model:, input:, **_options)
51
+ url = "/v1beta/models/#{model}:embedContent?key=#{@api_key}"
52
+
53
+ request_body = {
54
+ content: {
55
+ parts: [{ text: input }]
56
+ }
57
+ }
58
+
59
+ response = @conn.post(url) do |req|
60
+ req.body = request_body
61
+ end
62
+
63
+ handle_response(response, GoogleEmbeddingResponse)
64
+ end
65
+
66
+ def models
67
+ # Google doesn't provide a public models API, so return hardcoded list
68
+ [
69
+ 'gemini-1.5-flash',
70
+ 'gemini-1.5-flash-001',
71
+ 'gemini-1.5-flash-002',
72
+ 'gemini-1.5-flash-8b',
73
+ 'gemini-1.5-flash-8b-001',
74
+ 'gemini-1.5-flash-8b-latest',
75
+ 'gemini-1.5-flash-latest',
76
+ 'gemini-1.5-pro',
77
+ 'gemini-1.5-pro-001',
78
+ 'gemini-1.5-pro-002',
79
+ 'gemini-1.5-pro-latest',
80
+ 'gemini-2.0-flash',
81
+ 'gemini-2.0-flash-001',
82
+ 'gemini-2.0-flash-exp',
83
+ 'gemini-2.0-flash-lite',
84
+ 'gemini-2.0-flash-lite-001',
85
+ 'gemini-2.0-flash-live-001',
86
+ 'gemini-2.0-flash-preview-image-generation',
87
+ 'gemini-2.5-flash',
88
+ 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
89
+ 'gemini-2.5-flash-lite',
90
+ 'gemini-2.5-flash-lite-06-17',
91
+ 'gemini-2.5-flash-preview-05-20',
92
+ 'gemini-2.5-flash-preview-native-audio-dialog',
93
+ 'gemini-2.5-flash-preview-tts',
94
+ 'gemini-2.5-pro',
95
+ 'gemini-2.5-pro-preview-tts',
96
+ 'gemini-live-2.5-flash-preview',
97
+ 'text-embedding-004',
98
+ 'text-multilingual-embedding-002'
99
+ ]
100
+ end
101
+
102
+ def self.stream?
103
+ true
104
+ end
105
+
106
+ def stream(options)
107
+ model = options[:model]
108
+ url = "/v1beta/models/#{model}:streamGenerateContent?key=#{@api_key}&alt=sse"
109
+
110
+ request_body = transform_options(options)
111
+
112
+ response = @conn.post(url) do |req|
113
+ req.headers['Accept'] = 'text/event-stream'
114
+ req.body = request_body
115
+
116
+ user_proc = proc do |chunk, _size, _total|
117
+ yield GoogleStreamResponse.new(chunk)
118
+ end
119
+
120
+ req.options.on_data = to_json_stream(user_proc: user_proc)
121
+ end
122
+
123
+ handle_response(response)
124
+ end
125
+
126
+ private
127
+
128
+ def transform_options(options)
129
+ messages = options[:messages] || []
130
+ system_messages = messages.select { |m| m[:role] == 'system' }
131
+ conversation_messages = messages.reject { |m| m[:role] == 'system' }
132
+
133
+ body = {
134
+ contents: conversation_messages.map do |msg|
135
+ {
136
+ role: msg[:role] == 'assistant' ? 'model' : 'user',
137
+ parts: [{ text: msg[:content] }]
138
+ }
139
+ end
140
+ }
141
+
142
+ if system_messages.any?
143
+ body[:systemInstruction] = {
144
+ parts: [{ text: system_messages.map { |m| m[:content] }.join("\n") }]
145
+ }
146
+ end
147
+
148
+ generation_config = {}
149
+ generation_config[:temperature] = options[:temperature] if options[:temperature]
150
+ generation_config[:maxOutputTokens] = options[:max_tokens] if options[:max_tokens]
151
+ generation_config[:topP] = options[:top_p] if options[:top_p]
152
+ generation_config[:topK] = options[:top_k] if options[:top_k]
153
+
154
+ body[:generationConfig] = generation_config unless generation_config.empty?
155
+
156
+ body
157
+ end
158
+
159
+ # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
160
+ # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
161
+ def to_json_stream(user_proc:)
162
+ parser = EventStreamParser::Parser.new
163
+
164
+ proc do |chunk, _bytes, env|
165
+ if env && env.status != 200
166
+ raise_error = Faraday::Response::RaiseError.new
167
+ raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
168
+ end
169
+
170
+ parser.feed(chunk) do |_type, data|
171
+ user_proc.call(JSON.parse(data)) unless data == '[DONE]'
172
+ end
173
+ end
174
+ end
175
+
176
+ def try_parse_json(maybe_json)
177
+ JSON.parse(maybe_json)
178
+ rescue JSON::ParserError
179
+ maybe_json
180
+ end
181
+
182
+ def handle_response(response, response_class = GoogleResponse)
183
+ case response.status
184
+ when 200..299
185
+ response_class.new(response.body)
186
+ when 401
187
+ raise Durable::Llm::AuthenticationError, parse_error_message(response)
188
+ when 429
189
+ raise Durable::Llm::RateLimitError, parse_error_message(response)
190
+ when 400..499
191
+ raise Durable::Llm::InvalidRequestError, parse_error_message(response)
192
+ when 500..599
193
+ raise Durable::Llm::ServerError, parse_error_message(response)
194
+ else
195
+ raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
196
+ end
197
+ end
198
+
199
+ def parse_error_message(response)
200
+ body = begin
201
+ JSON.parse(response.body)
202
+ rescue StandardError
203
+ nil
204
+ end
205
+ message = body&.dig('error', 'message') || response.body
206
+ "#{response.status} Error: #{message}"
207
+ end
208
+
209
+ class GoogleResponse
210
+ attr_reader :raw_response
211
+
212
+ def initialize(response)
213
+ @raw_response = response
214
+ end
215
+
216
+ def choices
217
+ [GoogleChoice.new(@raw_response['candidates']&.first)]
218
+ end
219
+
220
+ def to_s
221
+ choices.map(&:to_s).join(' ')
222
+ end
223
+ end
224
+
225
+ class GoogleChoice
226
+ attr_reader :message
227
+
228
+ def initialize(candidate)
229
+ @message = GoogleMessage.new(candidate&.dig('content', 'parts')&.first)
230
+ end
231
+
232
+ def to_s
233
+ @message.to_s
234
+ end
235
+ end
236
+
237
+ class GoogleMessage
238
+ attr_reader :content
239
+
240
+ def initialize(part)
241
+ @content = part&.dig('text') || ''
242
+ end
243
+
244
+ def to_s
245
+ @content
246
+ end
247
+ end
248
+
249
+ class GoogleStreamResponse
250
+ attr_reader :choices
251
+
252
+ def initialize(parsed)
253
+ @choices = [GoogleStreamChoice.new(parsed)]
254
+ end
255
+
256
+ def to_s
257
+ @choices.map(&:to_s).join
258
+ end
259
+ end
260
+
261
+ class GoogleStreamChoice
262
+ attr_reader :delta
263
+
264
+ def initialize(parsed)
265
+ @delta = GoogleStreamDelta.new(parsed.dig('candidates', 0, 'content', 'parts', 0))
266
+ end
267
+
268
+ def to_s
269
+ @delta.to_s
270
+ end
271
+ end
272
+
273
+ class GoogleStreamDelta
274
+ attr_reader :content
275
+
276
+ def initialize(part)
277
+ @content = part&.dig('text') || ''
278
+ end
279
+
280
+ def to_s
281
+ @content
282
+ end
283
+ end
284
+
285
+ class GoogleEmbeddingResponse
286
+ attr_reader :embedding
287
+
288
+ def initialize(data)
289
+ @embedding = data.dig('embedding', 'values')
290
+ end
291
+
292
+ def to_a
293
+ @embedding
294
+ end
295
+ end
296
+ end
297
+ end
298
+ end
299
+ end
300
+
301
+ # Copyright (c) 2025 Durable Programming, LLC. All rights reserved.