simple_inference 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +48 -28
- data/lib/simple_inference/client.rb +157 -66
- data/lib/simple_inference/errors.rb +11 -5
- data/lib/simple_inference/openai.rb +178 -0
- data/lib/simple_inference/response.rb +28 -0
- data/lib/simple_inference/version.rb +1 -1
- data/lib/simple_inference.rb +2 -0
- data/sig/simple_inference.rbs +68 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ad988c1bb0af4938ea72fd303943a6dc27b90f26a8128abd737e0fca6429e081
|
|
4
|
+
data.tar.gz: 6be00487c1533201ffc48afb14a64c385b434698cf1bf3ab1c5c4ab10834d06a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 066dbeee456edae89770a5ed6541d77dda53d6ebcac59a2f277e28e00dde8b12b373cdec67bb0e79f84df781397034f1ff75694560bd6f612dca608ce6252630
|
|
7
|
+
data.tar.gz: 8008d5a95c38e45465e48a3f45fe8b7fd1cffec49e16cfd54419cbed08a11d7d613715314c91c742f63430860caac1fe332e10270cd0741401e98540a0582d65
|
data/README.md
CHANGED
|
@@ -38,12 +38,13 @@ client = SimpleInference::Client.new(
|
|
|
38
38
|
api_key: ENV["OPENAI_API_KEY"]
|
|
39
39
|
)
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
result = client.chat(
|
|
42
42
|
model: "gpt-4o-mini",
|
|
43
43
|
messages: [{ "role" => "user", "content" => "Hello!" }]
|
|
44
44
|
)
|
|
45
45
|
|
|
46
|
-
puts
|
|
46
|
+
puts result.content
|
|
47
|
+
p result.usage
|
|
47
48
|
```
|
|
48
49
|
|
|
49
50
|
## Configuration
|
|
@@ -84,13 +85,15 @@ client = SimpleInference::Client.new(
|
|
|
84
85
|
api_prefix: "" # 重要:火山引擎不使用 /v1 前缀
|
|
85
86
|
)
|
|
86
87
|
|
|
87
|
-
|
|
88
|
+
result = client.chat(
|
|
88
89
|
model: "deepseek-v3-250324",
|
|
89
90
|
messages: [
|
|
90
91
|
{ "role" => "system", "content" => "你是人工智能助手" },
|
|
91
92
|
{ "role" => "user", "content" => "你好" }
|
|
92
93
|
]
|
|
93
94
|
)
|
|
95
|
+
|
|
96
|
+
puts result.content
|
|
94
97
|
```
|
|
95
98
|
|
|
96
99
|
#### DeepSeek
|
|
@@ -150,10 +153,10 @@ client = SimpleInference::Client.new(
|
|
|
150
153
|
|
|
151
154
|
## API Methods
|
|
152
155
|
|
|
153
|
-
### Chat
|
|
156
|
+
### Chat
|
|
154
157
|
|
|
155
158
|
```ruby
|
|
156
|
-
|
|
159
|
+
result = client.chat(
|
|
157
160
|
model: "gpt-4o-mini",
|
|
158
161
|
messages: [
|
|
159
162
|
{ "role" => "system", "content" => "You are a helpful assistant." },
|
|
@@ -163,23 +166,27 @@ response = client.chat_completions(
|
|
|
163
166
|
max_tokens: 1000
|
|
164
167
|
)
|
|
165
168
|
|
|
166
|
-
puts
|
|
169
|
+
puts result.content
|
|
170
|
+
p result.usage
|
|
167
171
|
```
|
|
168
172
|
|
|
169
|
-
### Streaming Chat
|
|
173
|
+
### Streaming Chat
|
|
170
174
|
|
|
171
175
|
```ruby
|
|
172
|
-
client.
|
|
176
|
+
result = client.chat(
|
|
173
177
|
model: "gpt-4o-mini",
|
|
174
|
-
messages: [{ "role" => "user", "content" => "Tell me a story" }]
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
+
messages: [{ "role" => "user", "content" => "Tell me a story" }],
|
|
179
|
+
stream: true,
|
|
180
|
+
include_usage: true
|
|
181
|
+
) do |delta|
|
|
182
|
+
print delta
|
|
178
183
|
end
|
|
179
184
|
puts
|
|
185
|
+
|
|
186
|
+
p result.usage
|
|
180
187
|
```
|
|
181
188
|
|
|
182
|
-
|
|
189
|
+
Low-level streaming (events) is also available, and can be used as an Enumerator:
|
|
183
190
|
|
|
184
191
|
```ruby
|
|
185
192
|
stream = client.chat_completions_stream(
|
|
@@ -192,6 +199,20 @@ stream.each do |event|
|
|
|
192
199
|
end
|
|
193
200
|
```
|
|
194
201
|
|
|
202
|
+
Or as an Enumerable of delta strings:
|
|
203
|
+
|
|
204
|
+
```ruby
|
|
205
|
+
stream = client.chat_stream(
|
|
206
|
+
model: "gpt-4o-mini",
|
|
207
|
+
messages: [{ "role" => "user", "content" => "Hello" }],
|
|
208
|
+
include_usage: true
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
stream.each { |delta| print delta }
|
|
212
|
+
puts
|
|
213
|
+
p stream.result&.usage
|
|
214
|
+
```
|
|
215
|
+
|
|
195
216
|
### Embeddings
|
|
196
217
|
|
|
197
218
|
```ruby
|
|
@@ -200,7 +221,7 @@ response = client.embeddings(
|
|
|
200
221
|
input: "Hello, world!"
|
|
201
222
|
)
|
|
202
223
|
|
|
203
|
-
vector = response
|
|
224
|
+
vector = response.body["data"][0]["embedding"]
|
|
204
225
|
```
|
|
205
226
|
|
|
206
227
|
### Rerank
|
|
@@ -225,7 +246,7 @@ response = client.audio_transcriptions(
|
|
|
225
246
|
file: File.open("audio.mp3", "rb")
|
|
226
247
|
)
|
|
227
248
|
|
|
228
|
-
puts response
|
|
249
|
+
puts response.body["text"]
|
|
229
250
|
```
|
|
230
251
|
|
|
231
252
|
### Audio Translation
|
|
@@ -240,8 +261,7 @@ response = client.audio_translations(
|
|
|
240
261
|
### List Models
|
|
241
262
|
|
|
242
263
|
```ruby
|
|
243
|
-
|
|
244
|
-
models = response[:body]["data"]
|
|
264
|
+
model_ids = client.models
|
|
245
265
|
```
|
|
246
266
|
|
|
247
267
|
### Health Check
|
|
@@ -258,14 +278,13 @@ end
|
|
|
258
278
|
|
|
259
279
|
## Response Format
|
|
260
280
|
|
|
261
|
-
All methods return a
|
|
281
|
+
All HTTP methods return a `SimpleInference::Response` with:
|
|
262
282
|
|
|
263
283
|
```ruby
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
}
|
|
284
|
+
response.status # Integer HTTP status code
|
|
285
|
+
response.headers # Hash with downcased String keys
|
|
286
|
+
response.body # Parsed JSON (Hash/Array), raw String, or nil (SSE success)
|
|
287
|
+
response.success? # true for 2xx
|
|
269
288
|
```
|
|
270
289
|
|
|
271
290
|
## Error Handling
|
|
@@ -277,7 +296,8 @@ begin
|
|
|
277
296
|
client.chat_completions(model: "invalid", messages: [])
|
|
278
297
|
rescue SimpleInference::Errors::HTTPError => e
|
|
279
298
|
puts "HTTP #{e.status}: #{e.message}"
|
|
280
|
-
|
|
299
|
+
p e.body # parsed body (Hash/Array/String)
|
|
300
|
+
puts e.raw_body # raw response body string (if available)
|
|
281
301
|
end
|
|
282
302
|
```
|
|
283
303
|
|
|
@@ -299,10 +319,10 @@ client = SimpleInference::Client.new(
|
|
|
299
319
|
|
|
300
320
|
response = client.chat_completions(model: "gpt-4o-mini", messages: [...])
|
|
301
321
|
|
|
302
|
-
if response
|
|
322
|
+
if response.success?
|
|
303
323
|
# success
|
|
304
324
|
else
|
|
305
|
-
puts "Error: #{response
|
|
325
|
+
puts "Error: #{response.status} - #{response.body}"
|
|
306
326
|
end
|
|
307
327
|
```
|
|
308
328
|
|
|
@@ -370,7 +390,7 @@ class ChatsController < ApplicationController
|
|
|
370
390
|
messages: [{ "role" => "user", "content" => params[:prompt] }]
|
|
371
391
|
)
|
|
372
392
|
|
|
373
|
-
render json: response
|
|
393
|
+
render json: response.body
|
|
374
394
|
end
|
|
375
395
|
end
|
|
376
396
|
```
|
|
@@ -385,7 +405,7 @@ class EmbedJob < ApplicationJob
|
|
|
385
405
|
input: text
|
|
386
406
|
)
|
|
387
407
|
|
|
388
|
-
vector = response
|
|
408
|
+
vector = response.body["data"][0]["embedding"]
|
|
389
409
|
# Store vector...
|
|
390
410
|
end
|
|
391
411
|
end
|
|
@@ -22,21 +22,121 @@ module SimpleInference
|
|
|
22
22
|
|
|
23
23
|
# POST /v1/chat/completions
|
|
24
24
|
# params: { model: "model-name", messages: [...], ... }
|
|
25
|
-
def chat_completions(params)
|
|
25
|
+
def chat_completions(**params)
|
|
26
26
|
post_json(api_path("/chat/completions"), params)
|
|
27
27
|
end
|
|
28
28
|
|
|
29
|
+
# High-level helper for OpenAI-compatible chat.
|
|
30
|
+
#
|
|
31
|
+
# - Non-streaming: returns an OpenAI::ChatResult with `content` + `usage`.
|
|
32
|
+
# - Streaming: yields delta strings to the block (if given), accumulates, and returns OpenAI::ChatResult.
|
|
33
|
+
#
|
|
34
|
+
# @param model [String]
|
|
35
|
+
# @param messages [Array<Hash>]
|
|
36
|
+
# @param stream [Boolean] force streaming when true (default: block_given?)
|
|
37
|
+
# @param include_usage [Boolean, nil] when true (and streaming), requests usage in the final chunk
|
|
38
|
+
# @param request_logprobs [Boolean] when true, requests logprobs (and collects them in streaming mode)
|
|
39
|
+
# @param top_logprobs [Integer, nil] default: 5 (when request_logprobs is true)
|
|
40
|
+
# @param params [Hash] additional OpenAI parameters (max_tokens, temperature, etc.)
|
|
41
|
+
# @yield [String] delta content chunks (streaming only)
|
|
42
|
+
# @return [SimpleInference::OpenAI::ChatResult]
|
|
43
|
+
def chat(model:, messages:, stream: nil, include_usage: nil, request_logprobs: false, top_logprobs: 5, **params, &block)
|
|
44
|
+
raise ArgumentError, "model is required" if model.nil? || model.to_s.strip.empty?
|
|
45
|
+
raise ArgumentError, "messages must be an Array" unless messages.is_a?(Array)
|
|
46
|
+
|
|
47
|
+
use_stream = stream.nil? ? block_given? : stream
|
|
48
|
+
|
|
49
|
+
request = { model: model, messages: messages }.merge(params)
|
|
50
|
+
request.delete(:stream)
|
|
51
|
+
request.delete("stream")
|
|
52
|
+
|
|
53
|
+
if request_logprobs
|
|
54
|
+
request[:logprobs] = true unless request.key?(:logprobs) || request.key?("logprobs")
|
|
55
|
+
if top_logprobs && !(request.key?(:top_logprobs) || request.key?("top_logprobs"))
|
|
56
|
+
request[:top_logprobs] = top_logprobs
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
if use_stream && include_usage
|
|
61
|
+
stream_options = request[:stream_options] || request["stream_options"]
|
|
62
|
+
stream_options ||= {}
|
|
63
|
+
|
|
64
|
+
if stream_options.is_a?(Hash)
|
|
65
|
+
stream_options[:include_usage] = true unless stream_options.key?(:include_usage) || stream_options.key?("include_usage")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
request[:stream_options] = stream_options
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if use_stream
|
|
72
|
+
full = +""
|
|
73
|
+
finish_reason = nil
|
|
74
|
+
last_usage = nil
|
|
75
|
+
collected_logprobs = []
|
|
76
|
+
|
|
77
|
+
response =
|
|
78
|
+
chat_completions_stream(**request) do |event|
|
|
79
|
+
delta = OpenAI.chat_completion_chunk_delta(event)
|
|
80
|
+
if delta
|
|
81
|
+
full << delta
|
|
82
|
+
block.call(delta) if block
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
fr = event.is_a?(Hash) ? event.dig("choices", 0, "finish_reason") : nil
|
|
86
|
+
finish_reason = fr if fr
|
|
87
|
+
|
|
88
|
+
if request_logprobs
|
|
89
|
+
chunk_logprobs = event.is_a?(Hash) ? event.dig("choices", 0, "logprobs", "content") : nil
|
|
90
|
+
if chunk_logprobs.is_a?(Array)
|
|
91
|
+
collected_logprobs.concat(chunk_logprobs)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
usage = OpenAI.chat_completion_usage(event)
|
|
96
|
+
last_usage = usage if usage
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
OpenAI::ChatResult.new(
|
|
100
|
+
content: full,
|
|
101
|
+
usage: last_usage || OpenAI.chat_completion_usage(response),
|
|
102
|
+
finish_reason: finish_reason || OpenAI.chat_completion_finish_reason(response),
|
|
103
|
+
logprobs: collected_logprobs.empty? ? OpenAI.chat_completion_logprobs(response) : collected_logprobs,
|
|
104
|
+
response: response
|
|
105
|
+
)
|
|
106
|
+
else
|
|
107
|
+
response = chat_completions(**request)
|
|
108
|
+
OpenAI::ChatResult.new(
|
|
109
|
+
content: OpenAI.chat_completion_content(response),
|
|
110
|
+
usage: OpenAI.chat_completion_usage(response),
|
|
111
|
+
finish_reason: OpenAI.chat_completion_finish_reason(response),
|
|
112
|
+
logprobs: OpenAI.chat_completion_logprobs(response),
|
|
113
|
+
response: response
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Streaming chat as an Enumerable.
|
|
119
|
+
#
|
|
120
|
+
# @return [SimpleInference::OpenAI::ChatStream]
|
|
121
|
+
def chat_stream(model:, messages:, include_usage: nil, request_logprobs: false, top_logprobs: 5, **params)
|
|
122
|
+
OpenAI::ChatStream.new(
|
|
123
|
+
client: self,
|
|
124
|
+
model: model,
|
|
125
|
+
messages: messages,
|
|
126
|
+
include_usage: include_usage,
|
|
127
|
+
request_logprobs: request_logprobs,
|
|
128
|
+
top_logprobs: top_logprobs,
|
|
129
|
+
params: params
|
|
130
|
+
)
|
|
131
|
+
end
|
|
132
|
+
|
|
29
133
|
# POST /v1/chat/completions (streaming)
|
|
30
134
|
#
|
|
31
135
|
# Yields parsed JSON events from an OpenAI-style SSE stream (`text/event-stream`).
|
|
32
136
|
#
|
|
33
137
|
# If no block is given, returns an Enumerator.
|
|
34
|
-
def chat_completions_stream(params)
|
|
35
|
-
return enum_for(:chat_completions_stream, params) unless block_given?
|
|
36
|
-
|
|
37
|
-
unless params.is_a?(Hash)
|
|
38
|
-
raise Errors::ConfigurationError, "params must be a Hash"
|
|
39
|
-
end
|
|
138
|
+
def chat_completions_stream(**params)
|
|
139
|
+
return enum_for(:chat_completions_stream, **params) unless block_given?
|
|
40
140
|
|
|
41
141
|
body = params.dup
|
|
42
142
|
body.delete(:stream)
|
|
@@ -47,29 +147,29 @@ module SimpleInference
|
|
|
47
147
|
yield event
|
|
48
148
|
end
|
|
49
149
|
|
|
50
|
-
content_type = response.
|
|
150
|
+
content_type = response.headers["content-type"].to_s
|
|
51
151
|
|
|
52
152
|
# Streaming case: we already yielded events from the SSE stream.
|
|
53
|
-
if response
|
|
153
|
+
if response.status >= 200 && response.status < 300 && content_type.include?("text/event-stream")
|
|
54
154
|
return response
|
|
55
155
|
end
|
|
56
156
|
|
|
57
157
|
# Fallback when upstream does not support streaming (this repo's server).
|
|
58
|
-
if streaming_unsupported_error?(response
|
|
158
|
+
if streaming_unsupported_error?(response.status, response.body)
|
|
59
159
|
fallback_body = params.dup
|
|
60
160
|
fallback_body.delete(:stream)
|
|
61
161
|
fallback_body.delete("stream")
|
|
62
162
|
|
|
63
163
|
fallback_response = post_json(api_path("/chat/completions"), fallback_body)
|
|
64
|
-
chunk = synthesize_chat_completion_chunk(fallback_response
|
|
164
|
+
chunk = synthesize_chat_completion_chunk(fallback_response.body)
|
|
65
165
|
yield chunk if chunk
|
|
66
166
|
return fallback_response
|
|
67
167
|
end
|
|
68
168
|
|
|
69
169
|
# If we got a non-streaming success response (JSON), convert it into a single
|
|
70
170
|
# chunk so streaming consumers can share the same code path.
|
|
71
|
-
if response
|
|
72
|
-
chunk = synthesize_chat_completion_chunk(response
|
|
171
|
+
if response.status >= 200 && response.status < 300
|
|
172
|
+
chunk = synthesize_chat_completion_chunk(response.body)
|
|
73
173
|
yield chunk if chunk
|
|
74
174
|
end
|
|
75
175
|
|
|
@@ -77,12 +177,12 @@ module SimpleInference
|
|
|
77
177
|
end
|
|
78
178
|
|
|
79
179
|
# POST /v1/embeddings
|
|
80
|
-
def embeddings(params)
|
|
180
|
+
def embeddings(**params)
|
|
81
181
|
post_json(api_path("/embeddings"), params)
|
|
82
182
|
end
|
|
83
183
|
|
|
84
184
|
# POST /v1/rerank
|
|
85
|
-
def rerank(params)
|
|
185
|
+
def rerank(**params)
|
|
86
186
|
post_json(api_path("/rerank"), params)
|
|
87
187
|
end
|
|
88
188
|
|
|
@@ -91,6 +191,15 @@ module SimpleInference
|
|
|
91
191
|
get_json(api_path("/models"))
|
|
92
192
|
end
|
|
93
193
|
|
|
194
|
+
# Convenience wrapper for list_models.
|
|
195
|
+
#
|
|
196
|
+
# @return [Array<String>] model IDs
|
|
197
|
+
def models
|
|
198
|
+
response = list_models
|
|
199
|
+
data = response.body.is_a?(Hash) ? response.body["data"] : nil
|
|
200
|
+
Array(data).filter_map { |m| m.is_a?(Hash) ? m["id"] : nil }
|
|
201
|
+
end
|
|
202
|
+
|
|
94
203
|
# GET /health
|
|
95
204
|
def health
|
|
96
205
|
get_json("/health")
|
|
@@ -99,8 +208,8 @@ module SimpleInference
|
|
|
99
208
|
# Returns true when service is healthy, false otherwise.
|
|
100
209
|
def healthy?
|
|
101
210
|
response = get_json("/health", raise_on_http_error: false)
|
|
102
|
-
status_ok = response
|
|
103
|
-
body_status_ok = response.
|
|
211
|
+
status_ok = response.status == 200
|
|
212
|
+
body_status_ok = response.body.is_a?(Hash) && response.body["status"] == "ok"
|
|
104
213
|
status_ok && body_status_ok
|
|
105
214
|
rescue Errors::Error
|
|
106
215
|
false
|
|
@@ -108,12 +217,12 @@ module SimpleInference
|
|
|
108
217
|
|
|
109
218
|
# POST /v1/audio/transcriptions
|
|
110
219
|
# params: { file: io_or_hash, model: "model-name", **audio_options }
|
|
111
|
-
def audio_transcriptions(params)
|
|
220
|
+
def audio_transcriptions(**params)
|
|
112
221
|
post_multipart(api_path("/audio/transcriptions"), params)
|
|
113
222
|
end
|
|
114
223
|
|
|
115
224
|
# POST /v1/audio/translations
|
|
116
|
-
def audio_translations(params)
|
|
225
|
+
def audio_translations(**params)
|
|
117
226
|
post_multipart(api_path("/audio/translations"), params)
|
|
118
227
|
end
|
|
119
228
|
|
|
@@ -203,31 +312,26 @@ module SimpleInference
|
|
|
203
312
|
consume_sse_buffer!(buffer, &on_event)
|
|
204
313
|
end
|
|
205
314
|
|
|
206
|
-
return
|
|
207
|
-
status: status,
|
|
208
|
-
headers: headers,
|
|
209
|
-
body: nil,
|
|
210
|
-
}
|
|
315
|
+
return Response.new(status: status, headers: headers, body: nil)
|
|
211
316
|
end
|
|
212
317
|
|
|
213
318
|
# Non-streaming response path (adapter doesn't support streaming or server returned JSON).
|
|
214
319
|
should_parse_json = content_type.include?("json")
|
|
215
|
-
parsed_body =
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
320
|
+
parsed_body =
|
|
321
|
+
if should_parse_json
|
|
322
|
+
begin
|
|
323
|
+
parse_json(body_str)
|
|
324
|
+
rescue Errors::DecodeError
|
|
325
|
+
# Prefer HTTPError over DecodeError for non-2xx responses.
|
|
326
|
+
status >= 200 && status < 300 ? raise : body_str
|
|
327
|
+
end
|
|
328
|
+
else
|
|
329
|
+
body_str
|
|
330
|
+
end
|
|
225
331
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
body: parsed_body,
|
|
230
|
-
}
|
|
332
|
+
response = Response.new(status: status, headers: headers, body: parsed_body, raw_body: body_str)
|
|
333
|
+
maybe_raise_http_error(response: response, raise_on_http_error: raise_on_http_error, ignore_streaming_unsupported: true)
|
|
334
|
+
response
|
|
231
335
|
rescue Timeout::Error => e
|
|
232
336
|
raise Errors::TimeoutError, e.message
|
|
233
337
|
rescue SocketError, SystemCallError => e
|
|
@@ -579,13 +683,6 @@ module SimpleInference
|
|
|
579
683
|
headers = (response[:headers] || {}).transform_keys { |k| k.to_s.downcase }
|
|
580
684
|
body = response[:body].to_s
|
|
581
685
|
|
|
582
|
-
maybe_raise_http_error(
|
|
583
|
-
status: status,
|
|
584
|
-
headers: headers,
|
|
585
|
-
body_str: body,
|
|
586
|
-
raise_on_http_error: raise_on_http_error
|
|
587
|
-
)
|
|
588
|
-
|
|
589
686
|
should_parse_json =
|
|
590
687
|
if expect_json.nil?
|
|
591
688
|
content_type = headers["content-type"]
|
|
@@ -596,16 +693,19 @@ module SimpleInference
|
|
|
596
693
|
|
|
597
694
|
parsed_body =
|
|
598
695
|
if should_parse_json
|
|
599
|
-
|
|
696
|
+
begin
|
|
697
|
+
parse_json(body)
|
|
698
|
+
rescue Errors::DecodeError
|
|
699
|
+
# Prefer HTTPError over DecodeError for non-2xx responses.
|
|
700
|
+
status >= 200 && status < 300 ? raise : body
|
|
701
|
+
end
|
|
600
702
|
else
|
|
601
703
|
body
|
|
602
704
|
end
|
|
603
705
|
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
body: parsed_body,
|
|
608
|
-
}
|
|
706
|
+
response = Response.new(status: status, headers: headers, body: parsed_body, raw_body: body)
|
|
707
|
+
maybe_raise_http_error(response: response, raise_on_http_error: raise_on_http_error)
|
|
708
|
+
response
|
|
609
709
|
rescue Timeout::Error => e
|
|
610
710
|
raise Errors::TimeoutError, e.message
|
|
611
711
|
rescue SocketError, SystemCallError => e
|
|
@@ -648,26 +748,17 @@ module SimpleInference
|
|
|
648
748
|
end
|
|
649
749
|
end
|
|
650
750
|
|
|
651
|
-
def maybe_raise_http_error(
|
|
652
|
-
status:,
|
|
653
|
-
headers:,
|
|
654
|
-
body_str:,
|
|
655
|
-
raise_on_http_error:,
|
|
656
|
-
ignore_streaming_unsupported: false,
|
|
657
|
-
parsed_body: nil
|
|
658
|
-
)
|
|
751
|
+
def maybe_raise_http_error(response:, raise_on_http_error:, ignore_streaming_unsupported: false)
|
|
659
752
|
return unless raise_on_http_error?(raise_on_http_error)
|
|
660
|
-
return
|
|
753
|
+
return if response.success?
|
|
661
754
|
|
|
662
755
|
# Do not raise for the known "streaming unsupported" case; the caller will
|
|
663
756
|
# perform a non-streaming retry fallback.
|
|
664
|
-
return if ignore_streaming_unsupported && streaming_unsupported_error?(status,
|
|
757
|
+
return if ignore_streaming_unsupported && streaming_unsupported_error?(response.status, response.body)
|
|
665
758
|
|
|
666
759
|
raise Errors::HTTPError.new(
|
|
667
|
-
http_error_message(status,
|
|
668
|
-
|
|
669
|
-
headers: headers,
|
|
670
|
-
body: body_str
|
|
760
|
+
http_error_message(response.status, response.raw_body.to_s, parsed_body: response.body),
|
|
761
|
+
response: response
|
|
671
762
|
)
|
|
672
763
|
end
|
|
673
764
|
end
|
|
@@ -7,14 +7,20 @@ module SimpleInference
|
|
|
7
7
|
class ConfigurationError < Error; end
|
|
8
8
|
|
|
9
9
|
class HTTPError < Error
|
|
10
|
-
attr_reader :
|
|
10
|
+
attr_reader :response
|
|
11
11
|
|
|
12
|
-
def initialize(message,
|
|
12
|
+
def initialize(message, response:)
|
|
13
13
|
super(message)
|
|
14
|
-
@
|
|
15
|
-
@headers = headers
|
|
16
|
-
@body = body
|
|
14
|
+
@response = response
|
|
17
15
|
end
|
|
16
|
+
|
|
17
|
+
def status = @response.status
|
|
18
|
+
|
|
19
|
+
def headers = @response.headers
|
|
20
|
+
|
|
21
|
+
def body = @response.body
|
|
22
|
+
|
|
23
|
+
def raw_body = @response.raw_body
|
|
18
24
|
end
|
|
19
25
|
|
|
20
26
|
class TimeoutError < Error; end
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SimpleInference
|
|
4
|
+
# Helpers for extracting common fields from OpenAI-compatible `chat/completions` payloads.
|
|
5
|
+
#
|
|
6
|
+
# These helpers accept either:
|
|
7
|
+
# - A `SimpleInference::Response`, or
|
|
8
|
+
# - A parsed `body` / `chunk` hash (typically from JSON.parse, with String keys)
|
|
9
|
+
#
|
|
10
|
+
# Providers are "OpenAI-compatible", but many differ in subtle ways:
|
|
11
|
+
# - Some return `choices[0].text` instead of `choices[0].message.content`
|
|
12
|
+
# - Some represent `content` as an array or structured hash
|
|
13
|
+
#
|
|
14
|
+
# This module normalizes those shapes so application code can stay small and predictable.
|
|
15
|
+
module OpenAI
|
|
16
|
+
module_function
|
|
17
|
+
|
|
18
|
+
ChatResult =
|
|
19
|
+
Struct.new(
|
|
20
|
+
:content,
|
|
21
|
+
:usage,
|
|
22
|
+
:finish_reason,
|
|
23
|
+
:logprobs,
|
|
24
|
+
:response,
|
|
25
|
+
keyword_init: true
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Enumerable wrapper for streaming chat responses.
|
|
29
|
+
#
|
|
30
|
+
# @example
|
|
31
|
+
# stream = client.chat_stream(model: "...", messages: [...], include_usage: true)
|
|
32
|
+
# stream.each { |delta| print delta }
|
|
33
|
+
# p stream.result.usage
|
|
34
|
+
class ChatStream
|
|
35
|
+
include Enumerable
|
|
36
|
+
|
|
37
|
+
attr_reader :result
|
|
38
|
+
|
|
39
|
+
def initialize(client:, model:, messages:, include_usage:, request_logprobs:, top_logprobs:, params:)
|
|
40
|
+
@client = client
|
|
41
|
+
@model = model
|
|
42
|
+
@messages = messages
|
|
43
|
+
@include_usage = include_usage
|
|
44
|
+
@request_logprobs = request_logprobs
|
|
45
|
+
@top_logprobs = top_logprobs
|
|
46
|
+
@params = params
|
|
47
|
+
@started = false
|
|
48
|
+
@result = nil
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def each
|
|
52
|
+
return enum_for(:each) unless block_given?
|
|
53
|
+
raise Errors::ConfigurationError, "ChatStream can only be consumed once" if @started
|
|
54
|
+
|
|
55
|
+
@started = true
|
|
56
|
+
@result =
|
|
57
|
+
@client.chat(
|
|
58
|
+
model: @model,
|
|
59
|
+
messages: @messages,
|
|
60
|
+
stream: true,
|
|
61
|
+
include_usage: @include_usage,
|
|
62
|
+
request_logprobs: @request_logprobs,
|
|
63
|
+
top_logprobs: @top_logprobs,
|
|
64
|
+
**(@params || {})
|
|
65
|
+
) { |delta| yield delta }
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Extract assistant content from a non-streaming chat completion.
|
|
70
|
+
#
|
|
71
|
+
# @param response_or_body [Hash] SimpleInference response hash or parsed body hash
|
|
72
|
+
# @return [String, nil]
|
|
73
|
+
def chat_completion_content(response_or_body)
|
|
74
|
+
body = unwrap_body(response_or_body)
|
|
75
|
+
choice = first_choice(body)
|
|
76
|
+
return nil unless choice
|
|
77
|
+
|
|
78
|
+
raw =
|
|
79
|
+
choice.dig("message", "content") ||
|
|
80
|
+
choice["text"]
|
|
81
|
+
|
|
82
|
+
normalize_content(raw)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Extract finish_reason from a non-streaming chat completion.
|
|
86
|
+
#
|
|
87
|
+
# @param response_or_body [Hash] SimpleInference response hash or parsed body hash
|
|
88
|
+
# @return [String, nil]
|
|
89
|
+
def chat_completion_finish_reason(response_or_body)
|
|
90
|
+
body = unwrap_body(response_or_body)
|
|
91
|
+
first_choice(body)&.[]("finish_reason")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Extract usage from a chat completion response or a final streaming chunk.
|
|
95
|
+
#
|
|
96
|
+
# @param response_or_body [Hash] SimpleInference response hash, body hash, or chunk hash
|
|
97
|
+
# @return [Hash, nil] symbol-keyed usage hash
|
|
98
|
+
def chat_completion_usage(response_or_body)
|
|
99
|
+
body = unwrap_body(response_or_body)
|
|
100
|
+
usage = body.is_a?(Hash) ? body["usage"] : nil
|
|
101
|
+
return nil unless usage.is_a?(Hash)
|
|
102
|
+
|
|
103
|
+
{
|
|
104
|
+
prompt_tokens: usage["prompt_tokens"],
|
|
105
|
+
completion_tokens: usage["completion_tokens"],
|
|
106
|
+
total_tokens: usage["total_tokens"],
|
|
107
|
+
}.compact
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Extract logprobs (if present) from a non-streaming chat completion.
|
|
111
|
+
#
|
|
112
|
+
# @param response_or_body [Hash] SimpleInference response hash or parsed body hash
|
|
113
|
+
# @return [Array<Hash>, nil]
|
|
114
|
+
def chat_completion_logprobs(response_or_body)
|
|
115
|
+
body = unwrap_body(response_or_body)
|
|
116
|
+
first_choice(body)&.dig("logprobs", "content")
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Extract delta content from a streaming `chat.completion.chunk`.
|
|
120
|
+
#
|
|
121
|
+
# @param chunk [Hash] parsed streaming event hash
|
|
122
|
+
# @return [String, nil]
|
|
123
|
+
def chat_completion_chunk_delta(chunk)
|
|
124
|
+
chunk = unwrap_body(chunk)
|
|
125
|
+
return nil unless chunk.is_a?(Hash)
|
|
126
|
+
|
|
127
|
+
raw = chunk.dig("choices", 0, "delta", "content")
|
|
128
|
+
normalize_content(raw)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Normalize `content` shapes into a simple String.
|
|
132
|
+
#
|
|
133
|
+
# Supports strings, arrays of parts, and part hashes.
|
|
134
|
+
#
|
|
135
|
+
# @param value [Object]
|
|
136
|
+
# @return [String, nil]
|
|
137
|
+
def normalize_content(value)
|
|
138
|
+
case value
|
|
139
|
+
when String
|
|
140
|
+
value
|
|
141
|
+
when Array
|
|
142
|
+
value.map { |part| normalize_content(part) }.join
|
|
143
|
+
when Hash
|
|
144
|
+
value["text"] ||
|
|
145
|
+
value["content"] ||
|
|
146
|
+
value.to_s
|
|
147
|
+
when nil
|
|
148
|
+
nil
|
|
149
|
+
else
|
|
150
|
+
value.to_s
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Unwrap a full SimpleInference response into its `:body`, otherwise return the object.
|
|
155
|
+
#
|
|
156
|
+
# @param obj [Object]
|
|
157
|
+
# @return [Object]
|
|
158
|
+
def unwrap_body(obj)
|
|
159
|
+
return {} unless obj
|
|
160
|
+
return obj.body || {} if obj.respond_to?(:body)
|
|
161
|
+
|
|
162
|
+
obj
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def first_choice(body)
|
|
166
|
+
return nil unless body.is_a?(Hash)
|
|
167
|
+
|
|
168
|
+
choices = body["choices"]
|
|
169
|
+
return nil unless choices.is_a?(Array) && !choices.empty?
|
|
170
|
+
|
|
171
|
+
choice0 = choices[0]
|
|
172
|
+
return nil unless choice0.is_a?(Hash)
|
|
173
|
+
|
|
174
|
+
choice0
|
|
175
|
+
end
|
|
176
|
+
private_class_method :first_choice
|
|
177
|
+
end
|
|
178
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SimpleInference
|
|
4
|
+
# A lightweight wrapper for HTTP responses returned by SimpleInference.
|
|
5
|
+
#
|
|
6
|
+
# - `status` is an Integer HTTP status code
|
|
7
|
+
# - `headers` is a Hash with downcased String keys
|
|
8
|
+
# - `body` is a parsed JSON Hash/Array, a String, or nil (e.g. SSE streaming success)
|
|
9
|
+
# - `raw_body` is the raw response body String (when available)
|
|
10
|
+
class Response
|
|
11
|
+
attr_reader :status, :headers, :body, :raw_body
|
|
12
|
+
|
|
13
|
+
def initialize(status:, headers:, body:, raw_body: nil)
|
|
14
|
+
@status = status.to_i
|
|
15
|
+
@headers = (headers || {}).transform_keys { |k| k.to_s.downcase }
|
|
16
|
+
@body = body
|
|
17
|
+
@raw_body = raw_body
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def success?
|
|
21
|
+
status >= 200 && status < 300
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def to_h
|
|
25
|
+
{ status: status, headers: headers, body: body, raw_body: raw_body }
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
data/lib/simple_inference.rb
CHANGED
|
@@ -4,6 +4,8 @@ require_relative "simple_inference/version"
|
|
|
4
4
|
require_relative "simple_inference/config"
|
|
5
5
|
require_relative "simple_inference/errors"
|
|
6
6
|
require_relative "simple_inference/http_adapter"
|
|
7
|
+
require_relative "simple_inference/response"
|
|
8
|
+
require_relative "simple_inference/openai"
|
|
7
9
|
require_relative "simple_inference/client"
|
|
8
10
|
|
|
9
11
|
module SimpleInference
|
data/sig/simple_inference.rbs
CHANGED
|
@@ -1,3 +1,71 @@
|
|
|
1
1
|
module SimpleInference
|
|
2
2
|
VERSION: String
|
|
3
|
+
|
|
4
|
+
class Response
|
|
5
|
+
attr_reader status: Integer
|
|
6
|
+
attr_reader headers: Hash[String, untyped]
|
|
7
|
+
attr_reader body: untyped
|
|
8
|
+
attr_reader raw_body: String?
|
|
9
|
+
|
|
10
|
+
def initialize: (status: Integer, headers: Hash[untyped, untyped], body: untyped, ?raw_body: String?) -> void
|
|
11
|
+
def success?: () -> bool
|
|
12
|
+
def to_h: () -> Hash[Symbol, untyped]
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
module OpenAI
|
|
16
|
+
class ChatResult
|
|
17
|
+
attr_reader content: String?
|
|
18
|
+
attr_reader usage: Hash[Symbol, untyped]?
|
|
19
|
+
attr_reader finish_reason: String?
|
|
20
|
+
attr_reader logprobs: Array[Hash[untyped, untyped]]?
|
|
21
|
+
attr_reader response: Response
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
class ChatStream
|
|
25
|
+
include Enumerable[String]
|
|
26
|
+
attr_reader result: ChatResult?
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.chat_completion_content: (untyped) -> String?
|
|
30
|
+
def self.chat_completion_finish_reason: (untyped) -> String?
|
|
31
|
+
def self.chat_completion_usage: (untyped) -> Hash[Symbol, untyped]?
|
|
32
|
+
def self.chat_completion_logprobs: (untyped) -> Array[Hash[untyped, untyped]]?
|
|
33
|
+
def self.chat_completion_chunk_delta: (untyped) -> String?
|
|
34
|
+
def self.normalize_content: (untyped) -> String?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class Client
|
|
38
|
+
def initialize: (?Hash[untyped, untyped]) -> void
|
|
39
|
+
|
|
40
|
+
def chat: (
|
|
41
|
+
model: String,
|
|
42
|
+
messages: Array[Hash[untyped, untyped]],
|
|
43
|
+
?stream: bool?,
|
|
44
|
+
?include_usage: bool?,
|
|
45
|
+
?request_logprobs: bool,
|
|
46
|
+
?top_logprobs: Integer?,
|
|
47
|
+
**untyped
|
|
48
|
+
) { (String) -> void } -> OpenAI::ChatResult
|
|
49
|
+
|
|
50
|
+
def chat_stream: (
|
|
51
|
+
model: String,
|
|
52
|
+
messages: Array[Hash[untyped, untyped]],
|
|
53
|
+
?include_usage: bool?,
|
|
54
|
+
?request_logprobs: bool,
|
|
55
|
+
?top_logprobs: Integer?,
|
|
56
|
+
**untyped
|
|
57
|
+
) -> OpenAI::ChatStream
|
|
58
|
+
|
|
59
|
+
def chat_completions: (**untyped) -> Response
|
|
60
|
+
def chat_completions_stream: (**untyped) { (Hash[untyped, untyped]) -> void } -> Response
|
|
61
|
+
|
|
62
|
+
def embeddings: (**untyped) -> Response
|
|
63
|
+
def rerank: (**untyped) -> Response
|
|
64
|
+
def list_models: () -> Response
|
|
65
|
+
def models: () -> Array[String]
|
|
66
|
+
def health: () -> Response
|
|
67
|
+
def healthy?: () -> bool
|
|
68
|
+
def audio_transcriptions: (**untyped) -> Response
|
|
69
|
+
def audio_translations: (**untyped) -> Response
|
|
70
|
+
end
|
|
3
71
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: simple_inference
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- jasl
|
|
@@ -27,6 +27,8 @@ files:
|
|
|
27
27
|
- lib/simple_inference/http_adapter.rb
|
|
28
28
|
- lib/simple_inference/http_adapters/default.rb
|
|
29
29
|
- lib/simple_inference/http_adapters/httpx.rb
|
|
30
|
+
- lib/simple_inference/openai.rb
|
|
31
|
+
- lib/simple_inference/response.rb
|
|
30
32
|
- lib/simple_inference/version.rb
|
|
31
33
|
- sig/simple_inference.rbs
|
|
32
34
|
homepage: https://github.com/jasl/simple_inference.rb
|