simple_inference 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d8b01060969cbab2df30a38e16b7952a877188e89bd720209c15b57f9f79687
4
- data.tar.gz: e278f52f76cf6f7bd3f74e567731bbdec016769b2b720161e9907348fd9b54c3
3
+ metadata.gz: ad988c1bb0af4938ea72fd303943a6dc27b90f26a8128abd737e0fca6429e081
4
+ data.tar.gz: 6be00487c1533201ffc48afb14a64c385b434698cf1bf3ab1c5c4ab10834d06a
5
5
  SHA512:
6
- metadata.gz: cc6724a0fbe640d7af0d6bb35bfee81e6b95d501b23734f2874dfddbb2f71dcb7ae59557b742427bb9322804fbca632cbe95abe68f9ea26709303fea86550605
7
- data.tar.gz: 871b06d6e585bac84cf38ac3abef77b3940dd41f4868c76e08b19c317c2b35c93f81adde9a0ec73e9c20a689062cade65c0115d6e82afab86444d253f9964688
6
+ metadata.gz: 066dbeee456edae89770a5ed6541d77dda53d6ebcac59a2f277e28e00dde8b12b373cdec67bb0e79f84df781397034f1ff75694560bd6f612dca608ce6252630
7
+ data.tar.gz: 8008d5a95c38e45465e48a3f45fe8b7fd1cffec49e16cfd54419cbed08a11d7d613715314c91c742f63430860caac1fe332e10270cd0741401e98540a0582d65
data/README.md CHANGED
@@ -38,12 +38,13 @@ client = SimpleInference::Client.new(
38
38
  api_key: ENV["OPENAI_API_KEY"]
39
39
  )
40
40
 
41
- response = client.chat_completions(
41
+ result = client.chat(
42
42
  model: "gpt-4o-mini",
43
43
  messages: [{ "role" => "user", "content" => "Hello!" }]
44
44
  )
45
45
 
46
- puts response[:body]["choices"][0]["message"]["content"]
46
+ puts result.content
47
+ p result.usage
47
48
  ```
48
49
 
49
50
  ## Configuration
@@ -84,13 +85,15 @@ client = SimpleInference::Client.new(
84
85
  api_prefix: "" # 重要:火山引擎不使用 /v1 前缀
85
86
  )
86
87
 
87
- response = client.chat_completions(
88
+ result = client.chat(
88
89
  model: "deepseek-v3-250324",
89
90
  messages: [
90
91
  { "role" => "system", "content" => "你是人工智能助手" },
91
92
  { "role" => "user", "content" => "你好" }
92
93
  ]
93
94
  )
95
+
96
+ puts result.content
94
97
  ```
95
98
 
96
99
  #### DeepSeek
@@ -150,10 +153,10 @@ client = SimpleInference::Client.new(
150
153
 
151
154
  ## API Methods
152
155
 
153
- ### Chat Completions
156
+ ### Chat
154
157
 
155
158
  ```ruby
156
- response = client.chat_completions(
159
+ result = client.chat(
157
160
  model: "gpt-4o-mini",
158
161
  messages: [
159
162
  { "role" => "system", "content" => "You are a helpful assistant." },
@@ -163,23 +166,27 @@ response = client.chat_completions(
163
166
  max_tokens: 1000
164
167
  )
165
168
 
166
- puts response[:body]["choices"][0]["message"]["content"]
169
+ puts result.content
170
+ p result.usage
167
171
  ```
168
172
 
169
- ### Streaming Chat Completions
173
+ ### Streaming Chat
170
174
 
171
175
  ```ruby
172
- client.chat_completions_stream(
176
+ result = client.chat(
173
177
  model: "gpt-4o-mini",
174
- messages: [{ "role" => "user", "content" => "Tell me a story" }]
175
- ) do |event|
176
- delta = event.dig("choices", 0, "delta", "content")
177
- print delta if delta
178
+ messages: [{ "role" => "user", "content" => "Tell me a story" }],
179
+ stream: true,
180
+ include_usage: true
181
+ ) do |delta|
182
+ print delta
178
183
  end
179
184
  puts
185
+
186
+ p result.usage
180
187
  ```
181
188
 
182
- Or use as an Enumerator:
189
+ Low-level streaming (events) is also available, and can be used as an Enumerator:
183
190
 
184
191
  ```ruby
185
192
  stream = client.chat_completions_stream(
@@ -192,6 +199,20 @@ stream.each do |event|
192
199
  end
193
200
  ```
194
201
 
202
+ Or as an Enumerable of delta strings:
203
+
204
+ ```ruby
205
+ stream = client.chat_stream(
206
+ model: "gpt-4o-mini",
207
+ messages: [{ "role" => "user", "content" => "Hello" }],
208
+ include_usage: true
209
+ )
210
+
211
+ stream.each { |delta| print delta }
212
+ puts
213
+ p stream.result&.usage
214
+ ```
215
+
195
216
  ### Embeddings
196
217
 
197
218
  ```ruby
@@ -200,7 +221,7 @@ response = client.embeddings(
200
221
  input: "Hello, world!"
201
222
  )
202
223
 
203
- vector = response[:body]["data"][0]["embedding"]
224
+ vector = response.body["data"][0]["embedding"]
204
225
  ```
205
226
 
206
227
  ### Rerank
@@ -225,7 +246,7 @@ response = client.audio_transcriptions(
225
246
  file: File.open("audio.mp3", "rb")
226
247
  )
227
248
 
228
- puts response[:body]["text"]
249
+ puts response.body["text"]
229
250
  ```
230
251
 
231
252
  ### Audio Translation
@@ -240,8 +261,7 @@ response = client.audio_translations(
240
261
  ### List Models
241
262
 
242
263
  ```ruby
243
- response = client.list_models
244
- models = response[:body]["data"]
264
+ model_ids = client.models
245
265
  ```
246
266
 
247
267
  ### Health Check
@@ -258,14 +278,13 @@ end
258
278
 
259
279
  ## Response Format
260
280
 
261
- All methods return a Hash with:
281
+ All HTTP methods return a `SimpleInference::Response` with:
262
282
 
263
283
  ```ruby
264
- {
265
- status: 200, # HTTP status code
266
- headers: { "content-type" => "application/json", ... }, # Response headers (lowercase keys)
267
- body: { ... } # Parsed JSON body (Hash) or raw String
268
- }
284
+ response.status # Integer HTTP status code
285
+ response.headers # Hash with downcased String keys
286
+ response.body # Parsed JSON (Hash/Array), raw String, or nil (SSE success)
287
+ response.success? # true for 2xx
269
288
  ```
270
289
 
271
290
  ## Error Handling
@@ -277,7 +296,8 @@ begin
277
296
  client.chat_completions(model: "invalid", messages: [])
278
297
  rescue SimpleInference::Errors::HTTPError => e
279
298
  puts "HTTP #{e.status}: #{e.message}"
280
- puts e.body # raw response body
299
+ p e.body # parsed body (Hash/Array/String)
300
+ puts e.raw_body # raw response body string (if available)
281
301
  end
282
302
  ```
283
303
 
@@ -299,10 +319,10 @@ client = SimpleInference::Client.new(
299
319
 
300
320
  response = client.chat_completions(model: "gpt-4o-mini", messages: [...])
301
321
 
302
- if response[:status] == 200
322
+ if response.success?
303
323
  # success
304
324
  else
305
- puts "Error: #{response[:status]} - #{response[:body]}"
325
+ puts "Error: #{response.status} - #{response.body}"
306
326
  end
307
327
  ```
308
328
 
@@ -370,7 +390,7 @@ class ChatsController < ApplicationController
370
390
  messages: [{ "role" => "user", "content" => params[:prompt] }]
371
391
  )
372
392
 
373
- render json: response[:body]
393
+ render json: response.body
374
394
  end
375
395
  end
376
396
  ```
@@ -385,7 +405,7 @@ class EmbedJob < ApplicationJob
385
405
  input: text
386
406
  )
387
407
 
388
- vector = response[:body]["data"][0]["embedding"]
408
+ vector = response.body["data"][0]["embedding"]
389
409
  # Store vector...
390
410
  end
391
411
  end
@@ -22,21 +22,121 @@ module SimpleInference
22
22
 
23
23
  # POST /v1/chat/completions
24
24
  # params: { model: "model-name", messages: [...], ... }
25
- def chat_completions(params)
25
+ def chat_completions(**params)
26
26
  post_json(api_path("/chat/completions"), params)
27
27
  end
28
28
 
29
+ # High-level helper for OpenAI-compatible chat.
30
+ #
31
+ # - Non-streaming: returns an OpenAI::ChatResult with `content` + `usage`.
32
+ # - Streaming: yields delta strings to the block (if given), accumulates, and returns OpenAI::ChatResult.
33
+ #
34
+ # @param model [String]
35
+ # @param messages [Array<Hash>]
36
+ # @param stream [Boolean] force streaming when true (default: block_given?)
37
+ # @param include_usage [Boolean, nil] when true (and streaming), requests usage in the final chunk
38
+ # @param request_logprobs [Boolean] when true, requests logprobs (and collects them in streaming mode)
39
+ # @param top_logprobs [Integer, nil] default: 5 (when request_logprobs is true)
40
+ # @param params [Hash] additional OpenAI parameters (max_tokens, temperature, etc.)
41
+ # @yield [String] delta content chunks (streaming only)
42
+ # @return [SimpleInference::OpenAI::ChatResult]
43
+ def chat(model:, messages:, stream: nil, include_usage: nil, request_logprobs: false, top_logprobs: 5, **params, &block)
44
+ raise ArgumentError, "model is required" if model.nil? || model.to_s.strip.empty?
45
+ raise ArgumentError, "messages must be an Array" unless messages.is_a?(Array)
46
+
47
+ use_stream = stream.nil? ? block_given? : stream
48
+
49
+ request = { model: model, messages: messages }.merge(params)
50
+ request.delete(:stream)
51
+ request.delete("stream")
52
+
53
+ if request_logprobs
54
+ request[:logprobs] = true unless request.key?(:logprobs) || request.key?("logprobs")
55
+ if top_logprobs && !(request.key?(:top_logprobs) || request.key?("top_logprobs"))
56
+ request[:top_logprobs] = top_logprobs
57
+ end
58
+ end
59
+
60
+ if use_stream && include_usage
61
+ stream_options = request[:stream_options] || request["stream_options"]
62
+ stream_options ||= {}
63
+
64
+ if stream_options.is_a?(Hash)
65
+ stream_options[:include_usage] = true unless stream_options.key?(:include_usage) || stream_options.key?("include_usage")
66
+ end
67
+
68
+ request[:stream_options] = stream_options
69
+ end
70
+
71
+ if use_stream
72
+ full = +""
73
+ finish_reason = nil
74
+ last_usage = nil
75
+ collected_logprobs = []
76
+
77
+ response =
78
+ chat_completions_stream(**request) do |event|
79
+ delta = OpenAI.chat_completion_chunk_delta(event)
80
+ if delta
81
+ full << delta
82
+ block.call(delta) if block
83
+ end
84
+
85
+ fr = event.is_a?(Hash) ? event.dig("choices", 0, "finish_reason") : nil
86
+ finish_reason = fr if fr
87
+
88
+ if request_logprobs
89
+ chunk_logprobs = event.is_a?(Hash) ? event.dig("choices", 0, "logprobs", "content") : nil
90
+ if chunk_logprobs.is_a?(Array)
91
+ collected_logprobs.concat(chunk_logprobs)
92
+ end
93
+ end
94
+
95
+ usage = OpenAI.chat_completion_usage(event)
96
+ last_usage = usage if usage
97
+ end
98
+
99
+ OpenAI::ChatResult.new(
100
+ content: full,
101
+ usage: last_usage || OpenAI.chat_completion_usage(response),
102
+ finish_reason: finish_reason || OpenAI.chat_completion_finish_reason(response),
103
+ logprobs: collected_logprobs.empty? ? OpenAI.chat_completion_logprobs(response) : collected_logprobs,
104
+ response: response
105
+ )
106
+ else
107
+ response = chat_completions(**request)
108
+ OpenAI::ChatResult.new(
109
+ content: OpenAI.chat_completion_content(response),
110
+ usage: OpenAI.chat_completion_usage(response),
111
+ finish_reason: OpenAI.chat_completion_finish_reason(response),
112
+ logprobs: OpenAI.chat_completion_logprobs(response),
113
+ response: response
114
+ )
115
+ end
116
+ end
117
+
118
+ # Streaming chat as an Enumerable.
119
+ #
120
+ # @return [SimpleInference::OpenAI::ChatStream]
121
+ def chat_stream(model:, messages:, include_usage: nil, request_logprobs: false, top_logprobs: 5, **params)
122
+ OpenAI::ChatStream.new(
123
+ client: self,
124
+ model: model,
125
+ messages: messages,
126
+ include_usage: include_usage,
127
+ request_logprobs: request_logprobs,
128
+ top_logprobs: top_logprobs,
129
+ params: params
130
+ )
131
+ end
132
+
29
133
  # POST /v1/chat/completions (streaming)
30
134
  #
31
135
  # Yields parsed JSON events from an OpenAI-style SSE stream (`text/event-stream`).
32
136
  #
33
137
  # If no block is given, returns an Enumerator.
34
- def chat_completions_stream(params)
35
- return enum_for(:chat_completions_stream, params) unless block_given?
36
-
37
- unless params.is_a?(Hash)
38
- raise Errors::ConfigurationError, "params must be a Hash"
39
- end
138
+ def chat_completions_stream(**params)
139
+ return enum_for(:chat_completions_stream, **params) unless block_given?
40
140
 
41
141
  body = params.dup
42
142
  body.delete(:stream)
@@ -47,29 +147,29 @@ module SimpleInference
47
147
  yield event
48
148
  end
49
149
 
50
- content_type = response.dig(:headers, "content-type").to_s
150
+ content_type = response.headers["content-type"].to_s
51
151
 
52
152
  # Streaming case: we already yielded events from the SSE stream.
53
- if response[:status].to_i >= 200 && response[:status].to_i < 300 && content_type.include?("text/event-stream")
153
+ if response.status >= 200 && response.status < 300 && content_type.include?("text/event-stream")
54
154
  return response
55
155
  end
56
156
 
57
157
  # Fallback when upstream does not support streaming (this repo's server).
58
- if streaming_unsupported_error?(response[:status], response[:body])
158
+ if streaming_unsupported_error?(response.status, response.body)
59
159
  fallback_body = params.dup
60
160
  fallback_body.delete(:stream)
61
161
  fallback_body.delete("stream")
62
162
 
63
163
  fallback_response = post_json(api_path("/chat/completions"), fallback_body)
64
- chunk = synthesize_chat_completion_chunk(fallback_response[:body])
164
+ chunk = synthesize_chat_completion_chunk(fallback_response.body)
65
165
  yield chunk if chunk
66
166
  return fallback_response
67
167
  end
68
168
 
69
169
  # If we got a non-streaming success response (JSON), convert it into a single
70
170
  # chunk so streaming consumers can share the same code path.
71
- if response[:status].to_i >= 200 && response[:status].to_i < 300
72
- chunk = synthesize_chat_completion_chunk(response[:body])
171
+ if response.status >= 200 && response.status < 300
172
+ chunk = synthesize_chat_completion_chunk(response.body)
73
173
  yield chunk if chunk
74
174
  end
75
175
 
@@ -77,12 +177,12 @@ module SimpleInference
77
177
  end
78
178
 
79
179
  # POST /v1/embeddings
80
- def embeddings(params)
180
+ def embeddings(**params)
81
181
  post_json(api_path("/embeddings"), params)
82
182
  end
83
183
 
84
184
  # POST /v1/rerank
85
- def rerank(params)
185
+ def rerank(**params)
86
186
  post_json(api_path("/rerank"), params)
87
187
  end
88
188
 
@@ -91,6 +191,15 @@ module SimpleInference
91
191
  get_json(api_path("/models"))
92
192
  end
93
193
 
194
+ # Convenience wrapper for list_models.
195
+ #
196
+ # @return [Array<String>] model IDs
197
+ def models
198
+ response = list_models
199
+ data = response.body.is_a?(Hash) ? response.body["data"] : nil
200
+ Array(data).filter_map { |m| m.is_a?(Hash) ? m["id"] : nil }
201
+ end
202
+
94
203
  # GET /health
95
204
  def health
96
205
  get_json("/health")
@@ -99,8 +208,8 @@ module SimpleInference
99
208
  # Returns true when service is healthy, false otherwise.
100
209
  def healthy?
101
210
  response = get_json("/health", raise_on_http_error: false)
102
- status_ok = response[:status] == 200
103
- body_status_ok = response.dig(:body, "status") == "ok"
211
+ status_ok = response.status == 200
212
+ body_status_ok = response.body.is_a?(Hash) && response.body["status"] == "ok"
104
213
  status_ok && body_status_ok
105
214
  rescue Errors::Error
106
215
  false
@@ -108,12 +217,12 @@ module SimpleInference
108
217
 
109
218
  # POST /v1/audio/transcriptions
110
219
  # params: { file: io_or_hash, model: "model-name", **audio_options }
111
- def audio_transcriptions(params)
220
+ def audio_transcriptions(**params)
112
221
  post_multipart(api_path("/audio/transcriptions"), params)
113
222
  end
114
223
 
115
224
  # POST /v1/audio/translations
116
- def audio_translations(params)
225
+ def audio_translations(**params)
117
226
  post_multipart(api_path("/audio/translations"), params)
118
227
  end
119
228
 
@@ -203,31 +312,26 @@ module SimpleInference
203
312
  consume_sse_buffer!(buffer, &on_event)
204
313
  end
205
314
 
206
- return {
207
- status: status,
208
- headers: headers,
209
- body: nil,
210
- }
315
+ return Response.new(status: status, headers: headers, body: nil)
211
316
  end
212
317
 
213
318
  # Non-streaming response path (adapter doesn't support streaming or server returned JSON).
214
319
  should_parse_json = content_type.include?("json")
215
- parsed_body = should_parse_json ? parse_json(body_str) : body_str
216
-
217
- maybe_raise_http_error(
218
- status: status,
219
- headers: headers,
220
- body_str: body_str,
221
- raise_on_http_error: raise_on_http_error,
222
- ignore_streaming_unsupported: true,
223
- parsed_body: parsed_body
224
- )
320
+ parsed_body =
321
+ if should_parse_json
322
+ begin
323
+ parse_json(body_str)
324
+ rescue Errors::DecodeError
325
+ # Prefer HTTPError over DecodeError for non-2xx responses.
326
+ status >= 200 && status < 300 ? raise : body_str
327
+ end
328
+ else
329
+ body_str
330
+ end
225
331
 
226
- {
227
- status: status,
228
- headers: headers,
229
- body: parsed_body,
230
- }
332
+ response = Response.new(status: status, headers: headers, body: parsed_body, raw_body: body_str)
333
+ maybe_raise_http_error(response: response, raise_on_http_error: raise_on_http_error, ignore_streaming_unsupported: true)
334
+ response
231
335
  rescue Timeout::Error => e
232
336
  raise Errors::TimeoutError, e.message
233
337
  rescue SocketError, SystemCallError => e
@@ -579,13 +683,6 @@ module SimpleInference
579
683
  headers = (response[:headers] || {}).transform_keys { |k| k.to_s.downcase }
580
684
  body = response[:body].to_s
581
685
 
582
- maybe_raise_http_error(
583
- status: status,
584
- headers: headers,
585
- body_str: body,
586
- raise_on_http_error: raise_on_http_error
587
- )
588
-
589
686
  should_parse_json =
590
687
  if expect_json.nil?
591
688
  content_type = headers["content-type"]
@@ -596,16 +693,19 @@ module SimpleInference
596
693
 
597
694
  parsed_body =
598
695
  if should_parse_json
599
- parse_json(body)
696
+ begin
697
+ parse_json(body)
698
+ rescue Errors::DecodeError
699
+ # Prefer HTTPError over DecodeError for non-2xx responses.
700
+ status >= 200 && status < 300 ? raise : body
701
+ end
600
702
  else
601
703
  body
602
704
  end
603
705
 
604
- {
605
- status: status,
606
- headers: headers,
607
- body: parsed_body,
608
- }
706
+ response = Response.new(status: status, headers: headers, body: parsed_body, raw_body: body)
707
+ maybe_raise_http_error(response: response, raise_on_http_error: raise_on_http_error)
708
+ response
609
709
  rescue Timeout::Error => e
610
710
  raise Errors::TimeoutError, e.message
611
711
  rescue SocketError, SystemCallError => e
@@ -648,26 +748,17 @@ module SimpleInference
648
748
  end
649
749
  end
650
750
 
651
- def maybe_raise_http_error(
652
- status:,
653
- headers:,
654
- body_str:,
655
- raise_on_http_error:,
656
- ignore_streaming_unsupported: false,
657
- parsed_body: nil
658
- )
751
+ def maybe_raise_http_error(response:, raise_on_http_error:, ignore_streaming_unsupported: false)
659
752
  return unless raise_on_http_error?(raise_on_http_error)
660
- return unless status < 200 || status >= 300
753
+ return if response.success?
661
754
 
662
755
  # Do not raise for the known "streaming unsupported" case; the caller will
663
756
  # perform a non-streaming retry fallback.
664
- return if ignore_streaming_unsupported && streaming_unsupported_error?(status, parsed_body)
757
+ return if ignore_streaming_unsupported && streaming_unsupported_error?(response.status, response.body)
665
758
 
666
759
  raise Errors::HTTPError.new(
667
- http_error_message(status, body_str, parsed_body: parsed_body),
668
- status: status,
669
- headers: headers,
670
- body: body_str
760
+ http_error_message(response.status, response.raw_body.to_s, parsed_body: response.body),
761
+ response: response
671
762
  )
672
763
  end
673
764
  end
@@ -7,14 +7,20 @@ module SimpleInference
7
7
  class ConfigurationError < Error; end
8
8
 
9
9
  class HTTPError < Error
10
- attr_reader :status, :headers, :body
10
+ attr_reader :response
11
11
 
12
- def initialize(message, status:, headers:, body:)
12
+ def initialize(message, response:)
13
13
  super(message)
14
- @status = status
15
- @headers = headers
16
- @body = body
14
+ @response = response
17
15
  end
16
+
17
+ def status = @response.status
18
+
19
+ def headers = @response.headers
20
+
21
+ def body = @response.body
22
+
23
+ def raw_body = @response.raw_body
18
24
  end
19
25
 
20
26
  class TimeoutError < Error; end
@@ -0,0 +1,178 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SimpleInference
4
+ # Helpers for extracting common fields from OpenAI-compatible `chat/completions` payloads.
5
+ #
6
+ # These helpers accept either:
7
+ # - A `SimpleInference::Response`, or
8
+ # - A parsed `body` / `chunk` hash (typically from JSON.parse, with String keys)
9
+ #
10
+ # Providers are "OpenAI-compatible", but many differ in subtle ways:
11
+ # - Some return `choices[0].text` instead of `choices[0].message.content`
12
+ # - Some represent `content` as an array or structured hash
13
+ #
14
+ # This module normalizes those shapes so application code can stay small and predictable.
15
+ module OpenAI
16
+ module_function
17
+
18
+ ChatResult =
19
+ Struct.new(
20
+ :content,
21
+ :usage,
22
+ :finish_reason,
23
+ :logprobs,
24
+ :response,
25
+ keyword_init: true
26
+ )
27
+
28
+ # Enumerable wrapper for streaming chat responses.
29
+ #
30
+ # @example
31
+ # stream = client.chat_stream(model: "...", messages: [...], include_usage: true)
32
+ # stream.each { |delta| print delta }
33
+ # p stream.result.usage
34
+ class ChatStream
35
+ include Enumerable
36
+
37
+ attr_reader :result
38
+
39
+ def initialize(client:, model:, messages:, include_usage:, request_logprobs:, top_logprobs:, params:)
40
+ @client = client
41
+ @model = model
42
+ @messages = messages
43
+ @include_usage = include_usage
44
+ @request_logprobs = request_logprobs
45
+ @top_logprobs = top_logprobs
46
+ @params = params
47
+ @started = false
48
+ @result = nil
49
+ end
50
+
51
+ def each
52
+ return enum_for(:each) unless block_given?
53
+ raise Errors::ConfigurationError, "ChatStream can only be consumed once" if @started
54
+
55
+ @started = true
56
+ @result =
57
+ @client.chat(
58
+ model: @model,
59
+ messages: @messages,
60
+ stream: true,
61
+ include_usage: @include_usage,
62
+ request_logprobs: @request_logprobs,
63
+ top_logprobs: @top_logprobs,
64
+ **(@params || {})
65
+ ) { |delta| yield delta }
66
+ end
67
+ end
68
+
69
+ # Extract assistant content from a non-streaming chat completion.
70
+ #
71
+ # @param response_or_body [Hash] SimpleInference response hash or parsed body hash
72
+ # @return [String, nil]
73
+ def chat_completion_content(response_or_body)
74
+ body = unwrap_body(response_or_body)
75
+ choice = first_choice(body)
76
+ return nil unless choice
77
+
78
+ raw =
79
+ choice.dig("message", "content") ||
80
+ choice["text"]
81
+
82
+ normalize_content(raw)
83
+ end
84
+
85
+ # Extract finish_reason from a non-streaming chat completion.
86
+ #
87
+ # @param response_or_body [Hash] SimpleInference response hash or parsed body hash
88
+ # @return [String, nil]
89
+ def chat_completion_finish_reason(response_or_body)
90
+ body = unwrap_body(response_or_body)
91
+ first_choice(body)&.[]("finish_reason")
92
+ end
93
+
94
+ # Extract usage from a chat completion response or a final streaming chunk.
95
+ #
96
+ # @param response_or_body [Hash] SimpleInference response hash, body hash, or chunk hash
97
+ # @return [Hash, nil] symbol-keyed usage hash
98
+ def chat_completion_usage(response_or_body)
99
+ body = unwrap_body(response_or_body)
100
+ usage = body.is_a?(Hash) ? body["usage"] : nil
101
+ return nil unless usage.is_a?(Hash)
102
+
103
+ {
104
+ prompt_tokens: usage["prompt_tokens"],
105
+ completion_tokens: usage["completion_tokens"],
106
+ total_tokens: usage["total_tokens"],
107
+ }.compact
108
+ end
109
+
110
+ # Extract logprobs (if present) from a non-streaming chat completion.
111
+ #
112
+ # @param response_or_body [Hash] SimpleInference response hash or parsed body hash
113
+ # @return [Array<Hash>, nil]
114
+ def chat_completion_logprobs(response_or_body)
115
+ body = unwrap_body(response_or_body)
116
+ first_choice(body)&.dig("logprobs", "content")
117
+ end
118
+
119
+ # Extract delta content from a streaming `chat.completion.chunk`.
120
+ #
121
+ # @param chunk [Hash] parsed streaming event hash
122
+ # @return [String, nil]
123
+ def chat_completion_chunk_delta(chunk)
124
+ chunk = unwrap_body(chunk)
125
+ return nil unless chunk.is_a?(Hash)
126
+
127
+ raw = chunk.dig("choices", 0, "delta", "content")
128
+ normalize_content(raw)
129
+ end
130
+
131
+ # Normalize `content` shapes into a simple String.
132
+ #
133
+ # Supports strings, arrays of parts, and part hashes.
134
+ #
135
+ # @param value [Object]
136
+ # @return [String, nil]
137
+ def normalize_content(value)
138
+ case value
139
+ when String
140
+ value
141
+ when Array
142
+ value.map { |part| normalize_content(part) }.join
143
+ when Hash
144
+ value["text"] ||
145
+ value["content"] ||
146
+ value.to_s
147
+ when nil
148
+ nil
149
+ else
150
+ value.to_s
151
+ end
152
+ end
153
+
154
+ # Unwrap a full SimpleInference response into its `:body`, otherwise return the object.
155
+ #
156
+ # @param obj [Object]
157
+ # @return [Object]
158
+ def unwrap_body(obj)
159
+ return {} unless obj
160
+ return obj.body || {} if obj.respond_to?(:body)
161
+
162
+ obj
163
+ end
164
+
165
+ def first_choice(body)
166
+ return nil unless body.is_a?(Hash)
167
+
168
+ choices = body["choices"]
169
+ return nil unless choices.is_a?(Array) && !choices.empty?
170
+
171
+ choice0 = choices[0]
172
+ return nil unless choice0.is_a?(Hash)
173
+
174
+ choice0
175
+ end
176
+ private_class_method :first_choice
177
+ end
178
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SimpleInference
4
+ # A lightweight wrapper for HTTP responses returned by SimpleInference.
5
+ #
6
+ # - `status` is an Integer HTTP status code
7
+ # - `headers` is a Hash with downcased String keys
8
+ # - `body` is a parsed JSON Hash/Array, a String, or nil (e.g. SSE streaming success)
9
+ # - `raw_body` is the raw response body String (when available)
10
+ class Response
11
+ attr_reader :status, :headers, :body, :raw_body
12
+
13
+ def initialize(status:, headers:, body:, raw_body: nil)
14
+ @status = status.to_i
15
+ @headers = (headers || {}).transform_keys { |k| k.to_s.downcase }
16
+ @body = body
17
+ @raw_body = raw_body
18
+ end
19
+
20
+ def success?
21
+ status >= 200 && status < 300
22
+ end
23
+
24
+ def to_h
25
+ { status: status, headers: headers, body: body, raw_body: raw_body }
26
+ end
27
+ end
28
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleInference
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.5"
5
5
  end
@@ -4,6 +4,8 @@ require_relative "simple_inference/version"
4
4
  require_relative "simple_inference/config"
5
5
  require_relative "simple_inference/errors"
6
6
  require_relative "simple_inference/http_adapter"
7
+ require_relative "simple_inference/response"
8
+ require_relative "simple_inference/openai"
7
9
  require_relative "simple_inference/client"
8
10
 
9
11
  module SimpleInference
@@ -1,3 +1,71 @@
1
1
  module SimpleInference
2
2
  VERSION: String
3
+
4
+ class Response
5
+ attr_reader status: Integer
6
+ attr_reader headers: Hash[String, untyped]
7
+ attr_reader body: untyped
8
+ attr_reader raw_body: String?
9
+
10
+ def initialize: (status: Integer, headers: Hash[untyped, untyped], body: untyped, ?raw_body: String?) -> void
11
+ def success?: () -> bool
12
+ def to_h: () -> Hash[Symbol, untyped]
13
+ end
14
+
15
+ module OpenAI
16
+ class ChatResult
17
+ attr_reader content: String?
18
+ attr_reader usage: Hash[Symbol, untyped]?
19
+ attr_reader finish_reason: String?
20
+ attr_reader logprobs: Array[Hash[untyped, untyped]]?
21
+ attr_reader response: Response
22
+ end
23
+
24
+ class ChatStream
25
+ include Enumerable[String]
26
+ attr_reader result: ChatResult?
27
+ end
28
+
29
+ def self.chat_completion_content: (untyped) -> String?
30
+ def self.chat_completion_finish_reason: (untyped) -> String?
31
+ def self.chat_completion_usage: (untyped) -> Hash[Symbol, untyped]?
32
+ def self.chat_completion_logprobs: (untyped) -> Array[Hash[untyped, untyped]]?
33
+ def self.chat_completion_chunk_delta: (untyped) -> String?
34
+ def self.normalize_content: (untyped) -> String?
35
+ end
36
+
37
+ class Client
38
+ def initialize: (?Hash[untyped, untyped]) -> void
39
+
40
+ def chat: (
41
+ model: String,
42
+ messages: Array[Hash[untyped, untyped]],
43
+ ?stream: bool?,
44
+ ?include_usage: bool?,
45
+ ?request_logprobs: bool,
46
+ ?top_logprobs: Integer?,
47
+ **untyped
48
+ ) { (String) -> void } -> OpenAI::ChatResult
49
+
50
+ def chat_stream: (
51
+ model: String,
52
+ messages: Array[Hash[untyped, untyped]],
53
+ ?include_usage: bool?,
54
+ ?request_logprobs: bool,
55
+ ?top_logprobs: Integer?,
56
+ **untyped
57
+ ) -> OpenAI::ChatStream
58
+
59
+ def chat_completions: (**untyped) -> Response
60
+ def chat_completions_stream: (**untyped) { (Hash[untyped, untyped]) -> void } -> Response
61
+
62
+ def embeddings: (**untyped) -> Response
63
+ def rerank: (**untyped) -> Response
64
+ def list_models: () -> Response
65
+ def models: () -> Array[String]
66
+ def health: () -> Response
67
+ def healthy?: () -> bool
68
+ def audio_transcriptions: (**untyped) -> Response
69
+ def audio_translations: (**untyped) -> Response
70
+ end
3
71
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_inference
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - jasl
@@ -27,6 +27,8 @@ files:
27
27
  - lib/simple_inference/http_adapter.rb
28
28
  - lib/simple_inference/http_adapters/default.rb
29
29
  - lib/simple_inference/http_adapters/httpx.rb
30
+ - lib/simple_inference/openai.rb
31
+ - lib/simple_inference/response.rb
30
32
  - lib/simple_inference/version.rb
31
33
  - sig/simple_inference.rbs
32
34
  homepage: https://github.com/jasl/simple_inference.rb