durable-llm 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +7 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +7 -9
- data/Gemfile.lock +3 -3
- data/README.md +1 -0
- data/Rakefile +6 -6
- data/devenv.lock +103 -0
- data/devenv.nix +9 -0
- data/devenv.yaml +15 -0
- data/durable-llm.gemspec +44 -0
- data/examples/openai_quick_complete.rb +3 -1
- data/lib/durable/llm/cli.rb +215 -22
- data/lib/durable/llm/client.rb +85 -6
- data/lib/durable/llm/configuration.rb +163 -10
- data/lib/durable/llm/errors.rb +185 -0
- data/lib/durable/llm/providers/anthropic.rb +232 -24
- data/lib/durable/llm/providers/azure_openai.rb +347 -0
- data/lib/durable/llm/providers/base.rb +83 -1
- data/lib/durable/llm/providers/cohere.rb +138 -11
- data/lib/durable/llm/providers/deepseek.rb +233 -0
- data/lib/durable/llm/providers/fireworks.rb +278 -0
- data/lib/durable/llm/providers/google.rb +301 -0
- data/lib/durable/llm/providers/groq.rb +107 -25
- data/lib/durable/llm/providers/huggingface.rb +120 -17
- data/lib/durable/llm/providers/mistral.rb +431 -0
- data/lib/durable/llm/providers/openai.rb +150 -4
- data/lib/durable/llm/providers/opencode.rb +253 -0
- data/lib/durable/llm/providers/openrouter.rb +256 -0
- data/lib/durable/llm/providers/perplexity.rb +273 -0
- data/lib/durable/llm/providers/together.rb +346 -0
- data/lib/durable/llm/providers/xai.rb +355 -0
- data/lib/durable/llm/providers.rb +103 -13
- data/lib/durable/llm/version.rb +5 -1
- data/lib/durable/llm.rb +141 -1
- data/lib/durable.rb +29 -4
- data/sig/durable/llm.rbs +302 -1
- metadata +48 -36
@@ -0,0 +1,301 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This file implements the Google provider for accessing Google's Gemini language models through their API, providing completion capabilities with authentication handling, error management, and response normalization. It establishes HTTP connections to Google's Generative Language API endpoint, processes generateContent requests with text content, handles various API error responses, and includes comprehensive response classes to format Google's API responses into a consistent interface.
|
4
|
+
|
5
|
+
require 'faraday'
|
6
|
+
require 'json'
|
7
|
+
require 'durable/llm/errors'
|
8
|
+
require 'durable/llm/providers/base'
|
9
|
+
require 'event_stream_parser'
|
10
|
+
|
11
|
+
module Durable
|
12
|
+
module Llm
|
13
|
+
module Providers
|
14
|
+
class Google < Durable::Llm::Providers::Base
|
15
|
+
BASE_URL = 'https://generativelanguage.googleapis.com'
|
16
|
+
|
17
|
+
def default_api_key
|
18
|
+
begin
|
19
|
+
Durable::Llm.configuration.google&.api_key
|
20
|
+
rescue NoMethodError
|
21
|
+
nil
|
22
|
+
end || ENV['GOOGLE_API_KEY']
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_accessor :api_key
|
26
|
+
|
27
|
+
def initialize(api_key: nil)
|
28
|
+
@api_key = api_key || default_api_key
|
29
|
+
@conn = Faraday.new(url: BASE_URL) do |faraday|
|
30
|
+
faraday.request :json
|
31
|
+
faraday.response :json
|
32
|
+
faraday.adapter Faraday.default_adapter
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def completion(options)
|
37
|
+
model = options[:model]
|
38
|
+
url = "/v1beta/models/#{model}:generateContent?key=#{@api_key}"
|
39
|
+
|
40
|
+
# Transform options to Google's format
|
41
|
+
request_body = transform_options(options)
|
42
|
+
|
43
|
+
response = @conn.post(url) do |req|
|
44
|
+
req.body = request_body
|
45
|
+
end
|
46
|
+
|
47
|
+
handle_response(response)
|
48
|
+
end
|
49
|
+
|
50
|
+
def embedding(model:, input:, **_options)
|
51
|
+
url = "/v1beta/models/#{model}:embedContent?key=#{@api_key}"
|
52
|
+
|
53
|
+
request_body = {
|
54
|
+
content: {
|
55
|
+
parts: [{ text: input }]
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
response = @conn.post(url) do |req|
|
60
|
+
req.body = request_body
|
61
|
+
end
|
62
|
+
|
63
|
+
handle_response(response, GoogleEmbeddingResponse)
|
64
|
+
end
|
65
|
+
|
66
|
+
def models
|
67
|
+
# Google doesn't provide a public models API, so return hardcoded list
|
68
|
+
[
|
69
|
+
'gemini-1.5-flash',
|
70
|
+
'gemini-1.5-flash-001',
|
71
|
+
'gemini-1.5-flash-002',
|
72
|
+
'gemini-1.5-flash-8b',
|
73
|
+
'gemini-1.5-flash-8b-001',
|
74
|
+
'gemini-1.5-flash-8b-latest',
|
75
|
+
'gemini-1.5-flash-latest',
|
76
|
+
'gemini-1.5-pro',
|
77
|
+
'gemini-1.5-pro-001',
|
78
|
+
'gemini-1.5-pro-002',
|
79
|
+
'gemini-1.5-pro-latest',
|
80
|
+
'gemini-2.0-flash',
|
81
|
+
'gemini-2.0-flash-001',
|
82
|
+
'gemini-2.0-flash-exp',
|
83
|
+
'gemini-2.0-flash-lite',
|
84
|
+
'gemini-2.0-flash-lite-001',
|
85
|
+
'gemini-2.0-flash-live-001',
|
86
|
+
'gemini-2.0-flash-preview-image-generation',
|
87
|
+
'gemini-2.5-flash',
|
88
|
+
'gemini-2.5-flash-exp-native-audio-thinking-dialog',
|
89
|
+
'gemini-2.5-flash-lite',
|
90
|
+
'gemini-2.5-flash-lite-06-17',
|
91
|
+
'gemini-2.5-flash-preview-05-20',
|
92
|
+
'gemini-2.5-flash-preview-native-audio-dialog',
|
93
|
+
'gemini-2.5-flash-preview-tts',
|
94
|
+
'gemini-2.5-pro',
|
95
|
+
'gemini-2.5-pro-preview-tts',
|
96
|
+
'gemini-live-2.5-flash-preview',
|
97
|
+
'text-embedding-004',
|
98
|
+
'text-multilingual-embedding-002'
|
99
|
+
]
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.stream?
|
103
|
+
true
|
104
|
+
end
|
105
|
+
|
106
|
+
def stream(options)
|
107
|
+
model = options[:model]
|
108
|
+
url = "/v1beta/models/#{model}:streamGenerateContent?key=#{@api_key}&alt=sse"
|
109
|
+
|
110
|
+
request_body = transform_options(options)
|
111
|
+
|
112
|
+
response = @conn.post(url) do |req|
|
113
|
+
req.headers['Accept'] = 'text/event-stream'
|
114
|
+
req.body = request_body
|
115
|
+
|
116
|
+
user_proc = proc do |chunk, _size, _total|
|
117
|
+
yield GoogleStreamResponse.new(chunk)
|
118
|
+
end
|
119
|
+
|
120
|
+
req.options.on_data = to_json_stream(user_proc: user_proc)
|
121
|
+
end
|
122
|
+
|
123
|
+
handle_response(response)
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
|
128
|
+
def transform_options(options)
|
129
|
+
messages = options[:messages] || []
|
130
|
+
system_messages = messages.select { |m| m[:role] == 'system' }
|
131
|
+
conversation_messages = messages.reject { |m| m[:role] == 'system' }
|
132
|
+
|
133
|
+
body = {
|
134
|
+
contents: conversation_messages.map do |msg|
|
135
|
+
{
|
136
|
+
role: msg[:role] == 'assistant' ? 'model' : 'user',
|
137
|
+
parts: [{ text: msg[:content] }]
|
138
|
+
}
|
139
|
+
end
|
140
|
+
}
|
141
|
+
|
142
|
+
if system_messages.any?
|
143
|
+
body[:systemInstruction] = {
|
144
|
+
parts: [{ text: system_messages.map { |m| m[:content] }.join("\n") }]
|
145
|
+
}
|
146
|
+
end
|
147
|
+
|
148
|
+
generation_config = {}
|
149
|
+
generation_config[:temperature] = options[:temperature] if options[:temperature]
|
150
|
+
generation_config[:maxOutputTokens] = options[:max_tokens] if options[:max_tokens]
|
151
|
+
generation_config[:topP] = options[:top_p] if options[:top_p]
|
152
|
+
generation_config[:topK] = options[:top_k] if options[:top_k]
|
153
|
+
|
154
|
+
body[:generationConfig] = generation_config unless generation_config.empty?
|
155
|
+
|
156
|
+
body
|
157
|
+
end
|
158
|
+
|
159
|
+
# CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
|
160
|
+
# MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
|
161
|
+
def to_json_stream(user_proc:)
|
162
|
+
parser = EventStreamParser::Parser.new
|
163
|
+
|
164
|
+
proc do |chunk, _bytes, env|
|
165
|
+
if env && env.status != 200
|
166
|
+
raise_error = Faraday::Response::RaiseError.new
|
167
|
+
raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
|
168
|
+
end
|
169
|
+
|
170
|
+
parser.feed(chunk) do |_type, data|
|
171
|
+
user_proc.call(JSON.parse(data)) unless data == '[DONE]'
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def try_parse_json(maybe_json)
|
177
|
+
JSON.parse(maybe_json)
|
178
|
+
rescue JSON::ParserError
|
179
|
+
maybe_json
|
180
|
+
end
|
181
|
+
|
182
|
+
def handle_response(response, response_class = GoogleResponse)
|
183
|
+
case response.status
|
184
|
+
when 200..299
|
185
|
+
response_class.new(response.body)
|
186
|
+
when 401
|
187
|
+
raise Durable::Llm::AuthenticationError, parse_error_message(response)
|
188
|
+
when 429
|
189
|
+
raise Durable::Llm::RateLimitError, parse_error_message(response)
|
190
|
+
when 400..499
|
191
|
+
raise Durable::Llm::InvalidRequestError, parse_error_message(response)
|
192
|
+
when 500..599
|
193
|
+
raise Durable::Llm::ServerError, parse_error_message(response)
|
194
|
+
else
|
195
|
+
raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def parse_error_message(response)
|
200
|
+
body = begin
|
201
|
+
JSON.parse(response.body)
|
202
|
+
rescue StandardError
|
203
|
+
nil
|
204
|
+
end
|
205
|
+
message = body&.dig('error', 'message') || response.body
|
206
|
+
"#{response.status} Error: #{message}"
|
207
|
+
end
|
208
|
+
|
209
|
+
class GoogleResponse
|
210
|
+
attr_reader :raw_response
|
211
|
+
|
212
|
+
def initialize(response)
|
213
|
+
@raw_response = response
|
214
|
+
end
|
215
|
+
|
216
|
+
def choices
|
217
|
+
[GoogleChoice.new(@raw_response['candidates']&.first)]
|
218
|
+
end
|
219
|
+
|
220
|
+
def to_s
|
221
|
+
choices.map(&:to_s).join(' ')
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
class GoogleChoice
|
226
|
+
attr_reader :message
|
227
|
+
|
228
|
+
def initialize(candidate)
|
229
|
+
@message = GoogleMessage.new(candidate&.dig('content', 'parts')&.first)
|
230
|
+
end
|
231
|
+
|
232
|
+
def to_s
|
233
|
+
@message.to_s
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
class GoogleMessage
|
238
|
+
attr_reader :content
|
239
|
+
|
240
|
+
def initialize(part)
|
241
|
+
@content = part&.dig('text') || ''
|
242
|
+
end
|
243
|
+
|
244
|
+
def to_s
|
245
|
+
@content
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
class GoogleStreamResponse
|
250
|
+
attr_reader :choices
|
251
|
+
|
252
|
+
def initialize(parsed)
|
253
|
+
@choices = [GoogleStreamChoice.new(parsed)]
|
254
|
+
end
|
255
|
+
|
256
|
+
def to_s
|
257
|
+
@choices.map(&:to_s).join
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
class GoogleStreamChoice
|
262
|
+
attr_reader :delta
|
263
|
+
|
264
|
+
def initialize(parsed)
|
265
|
+
@delta = GoogleStreamDelta.new(parsed.dig('candidates', 0, 'content', 'parts', 0))
|
266
|
+
end
|
267
|
+
|
268
|
+
def to_s
|
269
|
+
@delta.to_s
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
class GoogleStreamDelta
|
274
|
+
attr_reader :content
|
275
|
+
|
276
|
+
def initialize(part)
|
277
|
+
@content = part&.dig('text') || ''
|
278
|
+
end
|
279
|
+
|
280
|
+
def to_s
|
281
|
+
@content
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
class GoogleEmbeddingResponse
|
286
|
+
attr_reader :embedding
|
287
|
+
|
288
|
+
def initialize(data)
|
289
|
+
@embedding = data.dig('embedding', 'values')
|
290
|
+
end
|
291
|
+
|
292
|
+
def to_a
|
293
|
+
@embedding
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
# Copyright (c) 2025 Durable Programming, LLC. All rights reserved.
|
@@ -1,7 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Groq provider for OpenAI-compatible API access.
|
4
|
+
|
1
5
|
require 'faraday'
|
2
6
|
require 'json'
|
3
7
|
require 'durable/llm/errors'
|
4
8
|
require 'durable/llm/providers/base'
|
9
|
+
require 'event_stream_parser'
|
5
10
|
|
6
11
|
module Durable
|
7
12
|
module Llm
|
@@ -15,21 +20,16 @@ module Durable
|
|
15
20
|
|
16
21
|
attr_accessor :api_key
|
17
22
|
|
18
|
-
def
|
19
|
-
|
23
|
+
def initialize(api_key: nil)
|
24
|
+
super
|
25
|
+
@conn = Faraday.new(url: BASE_URL) do |faraday|
|
20
26
|
faraday.request :json
|
21
27
|
faraday.response :json
|
22
28
|
faraday.adapter Faraday.default_adapter
|
23
29
|
end
|
24
30
|
end
|
25
31
|
|
26
|
-
|
27
|
-
self.class.conn
|
28
|
-
end
|
29
|
-
|
30
|
-
def initialize(api_key: nil)
|
31
|
-
@api_key = api_key || default_api_key
|
32
|
-
end
|
32
|
+
attr_reader :conn
|
33
33
|
|
34
34
|
def completion(options)
|
35
35
|
response = conn.post('chat/completions') do |req|
|
@@ -46,7 +46,7 @@ module Durable
|
|
46
46
|
req.body = { model: model, input: input, **options }
|
47
47
|
end
|
48
48
|
|
49
|
-
handle_response(response)
|
49
|
+
handle_response(response, GroqEmbeddingResponse)
|
50
50
|
end
|
51
51
|
|
52
52
|
def models
|
@@ -60,23 +60,85 @@ module Durable
|
|
60
60
|
end
|
61
61
|
|
62
62
|
def self.stream?
|
63
|
-
|
63
|
+
true
|
64
|
+
end
|
65
|
+
|
66
|
+
def stream(options)
|
67
|
+
options[:stream] = true
|
68
|
+
|
69
|
+
response = conn.post('chat/completions') do |req|
|
70
|
+
req.headers['Authorization'] = "Bearer #{@api_key}"
|
71
|
+
req.headers['Accept'] = 'text/event-stream'
|
72
|
+
|
73
|
+
options['temperature'] = options['temperature'].to_f if options['temperature']
|
74
|
+
|
75
|
+
req.body = options
|
76
|
+
|
77
|
+
user_proc = proc do |chunk, _size, _total|
|
78
|
+
yield GroqStreamResponse.new(chunk)
|
79
|
+
end
|
80
|
+
|
81
|
+
req.options.on_data = to_json_stream(user_proc: user_proc)
|
82
|
+
end
|
83
|
+
|
84
|
+
handle_response(response)
|
64
85
|
end
|
65
86
|
|
66
87
|
private
|
67
88
|
|
68
|
-
|
89
|
+
# CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
|
90
|
+
# MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
|
91
|
+
# Given a proc, returns an outer proc that can be used to iterate over a JSON stream of chunks.
|
92
|
+
# For each chunk, the inner user_proc is called giving it the JSON object. The JSON object could
|
93
|
+
# be a data object or an error object as described in the OpenAI API documentation.
|
94
|
+
#
|
95
|
+
# @param user_proc [Proc] The inner proc to call for each JSON object in the chunk.
|
96
|
+
# @return [Proc] An outer proc that iterates over a raw stream, converting it to JSON.
|
97
|
+
def to_json_stream(user_proc:)
|
98
|
+
parser = EventStreamParser::Parser.new
|
99
|
+
|
100
|
+
proc do |chunk, _bytes, env|
|
101
|
+
if env && env.status != 200
|
102
|
+
raise_error = Faraday::Response::RaiseError.new
|
103
|
+
raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
|
104
|
+
end
|
105
|
+
|
106
|
+
parser.feed(chunk) do |_type, data|
|
107
|
+
user_proc.call(JSON.parse(data)) unless data == '[DONE]'
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def try_parse_json(maybe_json)
|
113
|
+
JSON.parse(maybe_json)
|
114
|
+
rescue JSON::ParserError
|
115
|
+
maybe_json
|
116
|
+
end
|
117
|
+
|
118
|
+
def parse_error_message(response)
|
119
|
+
body = begin
|
120
|
+
JSON.parse(response.body)
|
121
|
+
rescue StandardError
|
122
|
+
nil
|
123
|
+
end
|
124
|
+
message = body&.dig('error', 'message') || response.body
|
125
|
+
"#{response.status} Error: #{message}"
|
126
|
+
end
|
127
|
+
|
128
|
+
# END-CODE-FROM
|
129
|
+
|
130
|
+
def handle_response(response, response_class = GroqResponse)
|
69
131
|
case response.status
|
70
132
|
when 200..299
|
71
|
-
|
133
|
+
response_class.new(response.body)
|
72
134
|
when 401
|
73
|
-
raise Durable::Llm::AuthenticationError, response
|
135
|
+
raise Durable::Llm::AuthenticationError, parse_error_message(response)
|
74
136
|
when 429
|
75
|
-
raise Durable::Llm::RateLimitError, response
|
137
|
+
raise Durable::Llm::RateLimitError, parse_error_message(response)
|
76
138
|
when 400..499
|
77
|
-
raise Durable::Llm::InvalidRequestError, response
|
139
|
+
raise Durable::Llm::InvalidRequestError, parse_error_message(response)
|
78
140
|
when 500..599
|
79
|
-
raise Durable::Llm::ServerError, response
|
141
|
+
raise Durable::Llm::ServerError, parse_error_message(response)
|
80
142
|
else
|
81
143
|
raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
|
82
144
|
end
|
@@ -93,6 +155,14 @@ module Durable
|
|
93
155
|
@raw_response['choices'].map { |choice| GroqChoice.new(choice) }
|
94
156
|
end
|
95
157
|
|
158
|
+
def data
|
159
|
+
@raw_response['data']
|
160
|
+
end
|
161
|
+
|
162
|
+
def embedding
|
163
|
+
@raw_response.dig('data', 0, 'embedding')
|
164
|
+
end
|
165
|
+
|
96
166
|
def to_s
|
97
167
|
choices.map(&:to_s).join(' ')
|
98
168
|
end
|
@@ -131,15 +201,12 @@ module Durable
|
|
131
201
|
class GroqStreamResponse
|
132
202
|
attr_reader :choices
|
133
203
|
|
134
|
-
def initialize(
|
135
|
-
|
136
|
-
puts json_frag
|
137
|
-
parsed = JSON.parse(json_frag)
|
138
|
-
@choices = parsed['choices'].map { |choice| GroqStreamChoice.new(choice) }
|
204
|
+
def initialize(parsed)
|
205
|
+
@choices = GroqStreamChoice.new(parsed['choices'])
|
139
206
|
end
|
140
207
|
|
141
208
|
def to_s
|
142
|
-
@choices.
|
209
|
+
@choices.to_s
|
143
210
|
end
|
144
211
|
end
|
145
212
|
|
@@ -147,8 +214,9 @@ module Durable
|
|
147
214
|
attr_reader :delta, :finish_reason
|
148
215
|
|
149
216
|
def initialize(choice)
|
150
|
-
@
|
151
|
-
@
|
217
|
+
@choice = [choice].flatten.first
|
218
|
+
@delta = GroqStreamDelta.new(@choice['delta'])
|
219
|
+
@finish_reason = @choice['finish_reason']
|
152
220
|
end
|
153
221
|
|
154
222
|
def to_s
|
@@ -168,7 +236,21 @@ module Durable
|
|
168
236
|
@content || ''
|
169
237
|
end
|
170
238
|
end
|
239
|
+
|
240
|
+
class GroqEmbeddingResponse
|
241
|
+
attr_reader :embedding
|
242
|
+
|
243
|
+
def initialize(data)
|
244
|
+
@embedding = data.dig('data', 0, 'embedding')
|
245
|
+
end
|
246
|
+
|
247
|
+
def to_a
|
248
|
+
@embedding
|
249
|
+
end
|
250
|
+
end
|
171
251
|
end
|
172
252
|
end
|
173
253
|
end
|
174
254
|
end
|
255
|
+
|
256
|
+
# Copyright (c) 2025 Durable Programming, LLC. All rights reserved.
|
@@ -1,13 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This file implements the Hugging Face provider for accessing Hugging Face's inference API models.
|
4
|
+
|
1
5
|
require 'faraday'
|
2
6
|
require 'json'
|
3
7
|
require 'durable/llm/errors'
|
4
8
|
require 'durable/llm/providers/base'
|
9
|
+
require 'event_stream_parser'
|
5
10
|
|
6
11
|
module Durable
|
7
12
|
module Llm
|
8
13
|
module Providers
|
14
|
+
# Hugging Face provider for accessing Hugging Face's inference API models.
|
15
|
+
#
|
16
|
+
# Provides completion, embedding, and streaming capabilities with authentication
|
17
|
+
# handling, error management, and response normalization.
|
9
18
|
class Huggingface < Durable::Llm::Providers::Base
|
10
|
-
BASE_URL = 'https://api-inference.huggingface.co
|
19
|
+
BASE_URL = 'https://api-inference.huggingface.co'
|
11
20
|
|
12
21
|
def default_api_key
|
13
22
|
Durable::Llm.configuration.huggingface&.api_key || ENV['HUGGINGFACE_API_KEY']
|
@@ -22,11 +31,12 @@ module Durable
|
|
22
31
|
faraday.response :json
|
23
32
|
faraday.adapter Faraday.default_adapter
|
24
33
|
end
|
34
|
+
super()
|
25
35
|
end
|
26
36
|
|
27
37
|
def completion(options)
|
28
38
|
model = options.delete(:model) || 'gpt2'
|
29
|
-
response = @conn.post("/#{model}") do |req|
|
39
|
+
response = @conn.post("models/#{model}") do |req|
|
30
40
|
req.headers['Authorization'] = "Bearer #{@api_key}"
|
31
41
|
req.body = options
|
32
42
|
end
|
@@ -34,33 +44,84 @@ module Durable
|
|
34
44
|
handle_response(response)
|
35
45
|
end
|
36
46
|
|
47
|
+
def embedding(model:, input:, **options)
|
48
|
+
response = @conn.post("models/#{model}") do |req|
|
49
|
+
req.headers['Authorization'] = "Bearer #{@api_key}"
|
50
|
+
req.body = { inputs: input, **options }
|
51
|
+
end
|
52
|
+
|
53
|
+
handle_response(response, HuggingfaceEmbeddingResponse)
|
54
|
+
end
|
55
|
+
|
37
56
|
def models
|
38
57
|
self.class.models
|
39
58
|
end
|
40
59
|
|
60
|
+
def self.stream?
|
61
|
+
true
|
62
|
+
end
|
63
|
+
|
64
|
+
def stream(options)
|
65
|
+
model = options.delete(:model) || 'gpt2'
|
66
|
+
options[:stream] = true
|
67
|
+
|
68
|
+
@conn.post("models/#{model}") do |req|
|
69
|
+
req.headers['Authorization'] = "Bearer #{@api_key}"
|
70
|
+
req.headers['Accept'] = 'text/event-stream'
|
71
|
+
req.body = options
|
72
|
+
req.options.on_data = to_json_stream(user_proc: proc { |chunk|
|
73
|
+
yield HuggingfaceStreamResponse.new(chunk)
|
74
|
+
})
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
41
78
|
def self.models
|
42
79
|
%w[gpt2 bert-base-uncased distilbert-base-uncased] # could use expansion
|
43
80
|
end
|
44
81
|
|
45
82
|
private
|
46
83
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
84
|
+
# CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
|
85
|
+
# MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
|
86
|
+
def to_json_stream(user_proc:)
|
87
|
+
parser = EventStreamParser::Parser.new
|
88
|
+
|
89
|
+
proc do |chunk, _bytes, env|
|
90
|
+
if env && env.status != 200
|
91
|
+
raise_error = Faraday::Response::RaiseError.new
|
92
|
+
raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
|
93
|
+
end
|
94
|
+
|
95
|
+
parser.feed(chunk) do |_type, data|
|
96
|
+
user_proc.call(JSON.parse(data)) unless data == '[DONE]'
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def try_parse_json(maybe_json)
|
102
|
+
JSON.parse(maybe_json)
|
103
|
+
rescue JSON::ParserError
|
104
|
+
maybe_json
|
105
|
+
end
|
106
|
+
|
107
|
+
def handle_response(response, response_class = HuggingfaceResponse)
|
108
|
+
return response_class.new(response.body) if (200..299).cover?(response.status)
|
109
|
+
|
110
|
+
error_class = error_class_for_status(response.status)
|
111
|
+
raise error_class, response.body['error'] || "HTTP #{response.status}"
|
112
|
+
end
|
113
|
+
|
114
|
+
def error_class_for_status(status)
|
115
|
+
case status
|
116
|
+
when 401 then Durable::Llm::AuthenticationError
|
117
|
+
when 429 then Durable::Llm::RateLimitError
|
118
|
+
when 400..499 then Durable::Llm::InvalidRequestError
|
119
|
+
when 500..599 then Durable::Llm::ServerError
|
120
|
+
else Durable::Llm::APIError
|
61
121
|
end
|
62
122
|
end
|
63
123
|
|
124
|
+
# Response wrapper for Hugging Face completion API responses.
|
64
125
|
class HuggingfaceResponse
|
65
126
|
attr_reader :raw_response
|
66
127
|
|
@@ -69,7 +130,7 @@ module Durable
|
|
69
130
|
end
|
70
131
|
|
71
132
|
def choices
|
72
|
-
[
|
133
|
+
[HuggingfaceChoice.new(@raw_response)]
|
73
134
|
end
|
74
135
|
|
75
136
|
def to_s
|
@@ -77,6 +138,7 @@ module Durable
|
|
77
138
|
end
|
78
139
|
end
|
79
140
|
|
141
|
+
# Individual choice from Hugging Face completion response.
|
80
142
|
class HuggingfaceChoice
|
81
143
|
attr_reader :text
|
82
144
|
|
@@ -88,7 +150,48 @@ module Durable
|
|
88
150
|
@text
|
89
151
|
end
|
90
152
|
end
|
153
|
+
|
154
|
+
# Response wrapper for Hugging Face embedding API responses.
|
155
|
+
class HuggingfaceEmbeddingResponse
|
156
|
+
attr_reader :embedding
|
157
|
+
|
158
|
+
def initialize(data)
|
159
|
+
@embedding = data
|
160
|
+
end
|
161
|
+
|
162
|
+
def to_a
|
163
|
+
@embedding
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# Response wrapper for Hugging Face streaming API responses.
|
168
|
+
class HuggingfaceStreamResponse
|
169
|
+
attr_reader :token
|
170
|
+
|
171
|
+
def initialize(parsed)
|
172
|
+
@token = HuggingfaceStreamToken.new(parsed)
|
173
|
+
end
|
174
|
+
|
175
|
+
def to_s
|
176
|
+
@token.to_s
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Individual token from Hugging Face streaming response.
|
181
|
+
class HuggingfaceStreamToken
|
182
|
+
attr_reader :text
|
183
|
+
|
184
|
+
def initialize(token)
|
185
|
+
@text = token['token']['text']
|
186
|
+
end
|
187
|
+
|
188
|
+
def to_s
|
189
|
+
@text || ''
|
190
|
+
end
|
191
|
+
end
|
91
192
|
end
|
92
193
|
end
|
93
194
|
end
|
94
195
|
end
|
196
|
+
|
197
|
+
# Copyright (c) 2025 Durable Programming, LLC. All rights reserved.
|