RubyGems - simple_inference - Versions diffs - 0.1.4 → 0.1.5 - Mend

simple_inference 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/README.md +48 -28
data/lib/simple_inference/client.rb +157 -66
data/lib/simple_inference/errors.rb +11 -5
data/lib/simple_inference/openai.rb +178 -0
data/lib/simple_inference/response.rb +28 -0
data/lib/simple_inference/version.rb +1 -1
data/lib/simple_inference.rb +2 -0
data/sig/simple_inference.rbs +68 -0
metadata +3 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8d8b01060969cbab2df30a38e16b7952a877188e89bd720209c15b57f9f79687
-  data.tar.gz: e278f52f76cf6f7bd3f74e567731bbdec016769b2b720161e9907348fd9b54c3
+  metadata.gz: ad988c1bb0af4938ea72fd303943a6dc27b90f26a8128abd737e0fca6429e081
+  data.tar.gz: 6be00487c1533201ffc48afb14a64c385b434698cf1bf3ab1c5c4ab10834d06a
 SHA512:
-  metadata.gz: cc6724a0fbe640d7af0d6bb35bfee81e6b95d501b23734f2874dfddbb2f71dcb7ae59557b742427bb9322804fbca632cbe95abe68f9ea26709303fea86550605
-  data.tar.gz: 871b06d6e585bac84cf38ac3abef77b3940dd41f4868c76e08b19c317c2b35c93f81adde9a0ec73e9c20a689062cade65c0115d6e82afab86444d253f9964688
+  metadata.gz: 066dbeee456edae89770a5ed6541d77dda53d6ebcac59a2f277e28e00dde8b12b373cdec67bb0e79f84df781397034f1ff75694560bd6f612dca608ce6252630
+  data.tar.gz: 8008d5a95c38e45465e48a3f45fe8b7fd1cffec49e16cfd54419cbed08a11d7d613715314c91c742f63430860caac1fe332e10270cd0741401e98540a0582d65

data/README.md CHANGED Viewed

@@ -38,12 +38,13 @@ client = SimpleInference::Client.new(
   api_key: ENV["OPENAI_API_KEY"]
 )
-response = client.chat_completions(
+result = client.chat(
   model: "gpt-4o-mini",
   messages: [{ "role" => "user", "content" => "Hello!" }]
 )
-puts response[:body]["choices"][0]["message"]["content"]
+puts result.content
+p result.usage
 ```
 ## Configuration
@@ -84,13 +85,15 @@ client = SimpleInference::Client.new(
   api_prefix: ""  # 重要：火山引擎不使用 /v1 前缀
 )
-response = client.chat_completions(
+result = client.chat(
   model: "deepseek-v3-250324",
   messages: [
     { "role" => "system", "content" => "你是人工智能助手" },
     { "role" => "user", "content" => "你好" }
   ]
 )
+puts result.content
 ```
 #### DeepSeek
@@ -150,10 +153,10 @@ client = SimpleInference::Client.new(
 ## API Methods
-### Chat Completions
+### Chat
 ```ruby
-response = client.chat_completions(
+result = client.chat(
   model: "gpt-4o-mini",
   messages: [
     { "role" => "system", "content" => "You are a helpful assistant." },
@@ -163,23 +166,27 @@ response = client.chat_completions(
   max_tokens: 1000
 )
-puts response[:body]["choices"][0]["message"]["content"]
+puts result.content
+p result.usage
 ```
-### Streaming Chat Completions
+### Streaming Chat
 ```ruby
-client.chat_completions_stream(
+result = client.chat(
   model: "gpt-4o-mini",
-  messages: [{ "role" => "user", "content" => "Tell me a story" }]
-) do |event|
-  delta = event.dig("choices", 0, "delta", "content")
-  print delta if delta
+  messages: [{ "role" => "user", "content" => "Tell me a story" }],
+  stream: true,
+  include_usage: true
+) do |delta|
+  print delta
 end
 puts
+p result.usage
 ```
-Or use as an Enumerator:
+Low-level streaming (events) is also available, and can be used as an Enumerator:
 ```ruby
 stream = client.chat_completions_stream(
@@ -192,6 +199,20 @@ stream.each do |event|
 end
 ```
+Or as an Enumerable of delta strings:
+```ruby
+stream = client.chat_stream(
+  model: "gpt-4o-mini",
+  messages: [{ "role" => "user", "content" => "Hello" }],
+  include_usage: true
+)
+stream.each { |delta| print delta }
+puts
+p stream.result&.usage
+```
 ### Embeddings
 ```ruby
@@ -200,7 +221,7 @@ response = client.embeddings(
   input: "Hello, world!"
 )
-vector = response[:body]["data"][0]["embedding"]
+vector = response.body["data"][0]["embedding"]
 ```
 ### Rerank
@@ -225,7 +246,7 @@ response = client.audio_transcriptions(
   file: File.open("audio.mp3", "rb")
 )
-puts response[:body]["text"]
+puts response.body["text"]
 ```
 ### Audio Translation
@@ -240,8 +261,7 @@ response = client.audio_translations(
 ### List Models
 ```ruby
-response = client.list_models
-models = response[:body]["data"]
+model_ids = client.models
 ```
 ### Health Check
@@ -258,14 +278,13 @@ end
 ## Response Format
-All methods return a Hash with:
+All HTTP methods return a `SimpleInference::Response` with:
 ```ruby
-{
-  status: 200,                    # HTTP status code
-  headers: { "content-type" => "application/json", ... },  # Response headers (lowercase keys)
-  body: { ... }                   # Parsed JSON body (Hash) or raw String
-}
+response.status   # Integer HTTP status code
+response.headers  # Hash with downcased String keys
+response.body     # Parsed JSON (Hash/Array), raw String, or nil (SSE success)
+response.success? # true for 2xx
 ```
 ## Error Handling
@@ -277,7 +296,8 @@ begin
   client.chat_completions(model: "invalid", messages: [])
 rescue SimpleInference::Errors::HTTPError => e
   puts "HTTP #{e.status}: #{e.message}"
-  puts e.body  # raw response body
+  p e.body      # parsed body (Hash/Array/String)
+  puts e.raw_body # raw response body string (if available)
 end
 ```
@@ -299,10 +319,10 @@ client = SimpleInference::Client.new(
 response = client.chat_completions(model: "gpt-4o-mini", messages: [...])
-if response[:status] == 200
+if response.success?
   # success
 else
-  puts "Error: #{response[:status]} - #{response[:body]}"
+  puts "Error: #{response.status} - #{response.body}"
 end
 ```
@@ -370,7 +390,7 @@ class ChatsController < ApplicationController
       messages: [{ "role" => "user", "content" => params[:prompt] }]
     )
-    render json: response[:body]
+    render json: response.body
   end
 end
 ```
@@ -385,7 +405,7 @@ class EmbedJob < ApplicationJob
       input: text
     )
-    vector = response[:body]["data"][0]["embedding"]
+    vector = response.body["data"][0]["embedding"]
     # Store vector...
   end
 end

data/lib/simple_inference/client.rb CHANGED Viewed

@@ -22,21 +22,121 @@ module SimpleInference
     # POST /v1/chat/completions
     # params: { model: "model-name", messages: [...], ... }
-    def chat_completions(params)
+    def chat_completions(**params)
       post_json(api_path("/chat/completions"), params)
     end
+    # High-level helper for OpenAI-compatible chat.
+    #
+    # - Non-streaming: returns an OpenAI::ChatResult with `content` + `usage`.
+    # - Streaming: yields delta strings to the block (if given), accumulates, and returns OpenAI::ChatResult.
+    #
+    # @param model [String]
+    # @param messages [Array<Hash>]
+    # @param stream [Boolean] force streaming when true (default: block_given?)
+    # @param include_usage [Boolean, nil] when true (and streaming), requests usage in the final chunk
+    # @param request_logprobs [Boolean] when true, requests logprobs (and collects them in streaming mode)
+    # @param top_logprobs [Integer, nil] default: 5 (when request_logprobs is true)
+    # @param params [Hash] additional OpenAI parameters (max_tokens, temperature, etc.)
+    # @yield [String] delta content chunks (streaming only)
+    # @return [SimpleInference::OpenAI::ChatResult]
+    def chat(model:, messages:, stream: nil, include_usage: nil, request_logprobs: false, top_logprobs: 5, **params, &block)
+      raise ArgumentError, "model is required" if model.nil? || model.to_s.strip.empty?
+      raise ArgumentError, "messages must be an Array" unless messages.is_a?(Array)
+      use_stream = stream.nil? ? block_given? : stream
+      request = { model: model, messages: messages }.merge(params)
+      request.delete(:stream)
+      request.delete("stream")
+      if request_logprobs
+        request[:logprobs] = true unless request.key?(:logprobs) || request.key?("logprobs")
+        if top_logprobs && !(request.key?(:top_logprobs) || request.key?("top_logprobs"))
+          request[:top_logprobs] = top_logprobs
+        end
+      end
+      if use_stream && include_usage
+        stream_options = request[:stream_options] || request["stream_options"]
+        stream_options ||= {}
+        if stream_options.is_a?(Hash)
+          stream_options[:include_usage] = true unless stream_options.key?(:include_usage) || stream_options.key?("include_usage")
+        end
+        request[:stream_options] = stream_options
+      end
+      if use_stream
+        full = +""
+        finish_reason = nil
+        last_usage = nil
+        collected_logprobs = []
+        response =
+          chat_completions_stream(**request) do |event|
+            delta = OpenAI.chat_completion_chunk_delta(event)
+            if delta
+              full << delta
+              block.call(delta) if block
+            end
+            fr = event.is_a?(Hash) ? event.dig("choices", 0, "finish_reason") : nil
+            finish_reason = fr if fr
+            if request_logprobs
+              chunk_logprobs = event.is_a?(Hash) ? event.dig("choices", 0, "logprobs", "content") : nil
+              if chunk_logprobs.is_a?(Array)
+                collected_logprobs.concat(chunk_logprobs)
+              end
+            end
+            usage = OpenAI.chat_completion_usage(event)
+            last_usage = usage if usage
+          end
+        OpenAI::ChatResult.new(
+          content: full,
+          usage: last_usage || OpenAI.chat_completion_usage(response),
+          finish_reason: finish_reason || OpenAI.chat_completion_finish_reason(response),
+          logprobs: collected_logprobs.empty? ? OpenAI.chat_completion_logprobs(response) : collected_logprobs,
+          response: response
+        )
+      else
+        response = chat_completions(**request)
+        OpenAI::ChatResult.new(
+          content: OpenAI.chat_completion_content(response),
+          usage: OpenAI.chat_completion_usage(response),
+          finish_reason: OpenAI.chat_completion_finish_reason(response),
+          logprobs: OpenAI.chat_completion_logprobs(response),
+          response: response
+        )
+      end
+    end
+    # Streaming chat as an Enumerable.
+    #
+    # @return [SimpleInference::OpenAI::ChatStream]
+    def chat_stream(model:, messages:, include_usage: nil, request_logprobs: false, top_logprobs: 5, **params)
+      OpenAI::ChatStream.new(
+        client: self,
+        model: model,
+        messages: messages,
+        include_usage: include_usage,
+        request_logprobs: request_logprobs,
+        top_logprobs: top_logprobs,
+        params: params
+      )
+    end
     # POST /v1/chat/completions (streaming)
     #
     # Yields parsed JSON events from an OpenAI-style SSE stream (`text/event-stream`).
     #
     # If no block is given, returns an Enumerator.
-    def chat_completions_stream(params)
-      return enum_for(:chat_completions_stream, params) unless block_given?
-      unless params.is_a?(Hash)
-        raise Errors::ConfigurationError, "params must be a Hash"
-      end
+    def chat_completions_stream(**params)
+      return enum_for(:chat_completions_stream, **params) unless block_given?
       body = params.dup
       body.delete(:stream)
@@ -47,29 +147,29 @@ module SimpleInference
         yield event
       end
-      content_type = response.dig(:headers, "content-type").to_s
+      content_type = response.headers["content-type"].to_s
       # Streaming case: we already yielded events from the SSE stream.
-      if response[:status].to_i >= 200 && response[:status].to_i < 300 && content_type.include?("text/event-stream")
+      if response.status >= 200 && response.status < 300 && content_type.include?("text/event-stream")
         return response
       end
       # Fallback when upstream does not support streaming (this repo's server).
-      if streaming_unsupported_error?(response[:status], response[:body])
+      if streaming_unsupported_error?(response.status, response.body)
         fallback_body = params.dup
         fallback_body.delete(:stream)
         fallback_body.delete("stream")
         fallback_response = post_json(api_path("/chat/completions"), fallback_body)
-        chunk = synthesize_chat_completion_chunk(fallback_response[:body])
+        chunk = synthesize_chat_completion_chunk(fallback_response.body)
         yield chunk if chunk
         return fallback_response
       end
       # If we got a non-streaming success response (JSON), convert it into a single
       # chunk so streaming consumers can share the same code path.
-      if response[:status].to_i >= 200 && response[:status].to_i < 300
-        chunk = synthesize_chat_completion_chunk(response[:body])
+      if response.status >= 200 && response.status < 300
+        chunk = synthesize_chat_completion_chunk(response.body)
         yield chunk if chunk
       end
@@ -77,12 +177,12 @@ module SimpleInference
     end
     # POST /v1/embeddings
-    def embeddings(params)
+    def embeddings(**params)
       post_json(api_path("/embeddings"), params)
     end
     # POST /v1/rerank
-    def rerank(params)
+    def rerank(**params)
       post_json(api_path("/rerank"), params)
     end
@@ -91,6 +191,15 @@ module SimpleInference
       get_json(api_path("/models"))
     end
+    # Convenience wrapper for list_models.
+    #
+    # @return [Array<String>] model IDs
+    def models
+      response = list_models
+      data = response.body.is_a?(Hash) ? response.body["data"] : nil
+      Array(data).filter_map { |m| m.is_a?(Hash) ? m["id"] : nil }
+    end
     # GET /health
     def health
       get_json("/health")
@@ -99,8 +208,8 @@ module SimpleInference
     # Returns true when service is healthy, false otherwise.
     def healthy?
       response = get_json("/health", raise_on_http_error: false)
-      status_ok = response[:status] == 200
-      body_status_ok = response.dig(:body, "status") == "ok"
+      status_ok = response.status == 200
+      body_status_ok = response.body.is_a?(Hash) && response.body["status"] == "ok"
       status_ok && body_status_ok
     rescue Errors::Error
       false
@@ -108,12 +217,12 @@ module SimpleInference
     # POST /v1/audio/transcriptions
     # params: { file: io_or_hash, model: "model-name", **audio_options }
-    def audio_transcriptions(params)
+    def audio_transcriptions(**params)
       post_multipart(api_path("/audio/transcriptions"), params)
     end
     # POST /v1/audio/translations
-    def audio_translations(params)
+    def audio_translations(**params)
       post_multipart(api_path("/audio/translations"), params)
     end
@@ -203,31 +312,26 @@ module SimpleInference
           consume_sse_buffer!(buffer, &on_event)
         end
-        return {
-          status: status,
-          headers: headers,
-          body: nil,
-        }
+        return Response.new(status: status, headers: headers, body: nil)
       end
       # Non-streaming response path (adapter doesn't support streaming or server returned JSON).
       should_parse_json = content_type.include?("json")
-      parsed_body = should_parse_json ? parse_json(body_str) : body_str
-      maybe_raise_http_error(
-        status: status,
-        headers: headers,
-        body_str: body_str,
-        raise_on_http_error: raise_on_http_error,
-        ignore_streaming_unsupported: true,
-        parsed_body: parsed_body
-      )
+      parsed_body =
+        if should_parse_json
+          begin
+            parse_json(body_str)
+          rescue Errors::DecodeError
+            # Prefer HTTPError over DecodeError for non-2xx responses.
+            status >= 200 && status < 300 ? raise : body_str
+          end
+        else
+          body_str
+        end
-      {
-        status: status,
-        headers: headers,
-        body: parsed_body,
-      }
+      response = Response.new(status: status, headers: headers, body: parsed_body, raw_body: body_str)
+      maybe_raise_http_error(response: response, raise_on_http_error: raise_on_http_error, ignore_streaming_unsupported: true)
+      response
     rescue Timeout::Error => e
       raise Errors::TimeoutError, e.message
     rescue SocketError, SystemCallError => e
@@ -579,13 +683,6 @@ module SimpleInference
       headers = (response[:headers] || {}).transform_keys { |k| k.to_s.downcase }
       body = response[:body].to_s
-      maybe_raise_http_error(
-        status: status,
-        headers: headers,
-        body_str: body,
-        raise_on_http_error: raise_on_http_error
-      )
       should_parse_json =
         if expect_json.nil?
           content_type = headers["content-type"]
@@ -596,16 +693,19 @@ module SimpleInference
       parsed_body =
         if should_parse_json
-          parse_json(body)
+          begin
+            parse_json(body)
+          rescue Errors::DecodeError
+            # Prefer HTTPError over DecodeError for non-2xx responses.
+            status >= 200 && status < 300 ? raise : body
+          end
         else
           body
         end
-      {
-        status: status,
-        headers: headers,
-        body: parsed_body,
-      }
+      response = Response.new(status: status, headers: headers, body: parsed_body, raw_body: body)
+      maybe_raise_http_error(response: response, raise_on_http_error: raise_on_http_error)
+      response
     rescue Timeout::Error => e
       raise Errors::TimeoutError, e.message
     rescue SocketError, SystemCallError => e
@@ -648,26 +748,17 @@ module SimpleInference
       end
     end
-    def maybe_raise_http_error(
-      status:,
-      headers:,
-      body_str:,
-      raise_on_http_error:,
-      ignore_streaming_unsupported: false,
-      parsed_body: nil
-    )
+    def maybe_raise_http_error(response:, raise_on_http_error:, ignore_streaming_unsupported: false)
       return unless raise_on_http_error?(raise_on_http_error)
-      return unless status < 200 || status >= 300
+      return if response.success?
       # Do not raise for the known "streaming unsupported" case; the caller will
       # perform a non-streaming retry fallback.
-      return if ignore_streaming_unsupported && streaming_unsupported_error?(status, parsed_body)
+      return if ignore_streaming_unsupported && streaming_unsupported_error?(response.status, response.body)
       raise Errors::HTTPError.new(
-        http_error_message(status, body_str, parsed_body: parsed_body),
-        status: status,
-        headers: headers,
-        body: body_str
+        http_error_message(response.status, response.raw_body.to_s, parsed_body: response.body),
+        response: response
       )
     end
   end

data/lib/simple_inference/errors.rb CHANGED Viewed

@@ -7,14 +7,20 @@ module SimpleInference
     class ConfigurationError < Error; end
     class HTTPError < Error
-      attr_reader :status, :headers, :body
+      attr_reader :response
-      def initialize(message, status:, headers:, body:)
+      def initialize(message, response:)
         super(message)
-        @status = status
-        @headers = headers
-        @body = body
+        @response = response
       end
+      def status = @response.status
+      def headers = @response.headers
+      def body = @response.body
+      def raw_body = @response.raw_body
     end
     class TimeoutError < Error; end

data/lib/simple_inference/openai.rb ADDED Viewed

@@ -0,0 +1,178 @@
+# frozen_string_literal: true
+module SimpleInference
+  # Helpers for extracting common fields from OpenAI-compatible `chat/completions` payloads.
+  #
+  # These helpers accept either:
+  # - A `SimpleInference::Response`, or
+  # - A parsed `body` / `chunk` hash (typically from JSON.parse, with String keys)
+  #
+  # Providers are "OpenAI-compatible", but many differ in subtle ways:
+  # - Some return `choices[0].text` instead of `choices[0].message.content`
+  # - Some represent `content` as an array or structured hash
+  #
+  # This module normalizes those shapes so application code can stay small and predictable.
+  module OpenAI
+    module_function
+    ChatResult =
+      Struct.new(
+        :content,
+        :usage,
+        :finish_reason,
+        :logprobs,
+        :response,
+        keyword_init: true
+      )
+    # Enumerable wrapper for streaming chat responses.
+    #
+    # @example
+    #   stream = client.chat_stream(model: "...", messages: [...], include_usage: true)
+    #   stream.each { |delta| print delta }
+    #   p stream.result.usage
+    class ChatStream
+      include Enumerable
+      attr_reader :result
+      def initialize(client:, model:, messages:, include_usage:, request_logprobs:, top_logprobs:, params:)
+        @client = client
+        @model = model
+        @messages = messages
+        @include_usage = include_usage
+        @request_logprobs = request_logprobs
+        @top_logprobs = top_logprobs
+        @params = params
+        @started = false
+        @result = nil
+      end
+      def each
+        return enum_for(:each) unless block_given?
+        raise Errors::ConfigurationError, "ChatStream can only be consumed once" if @started
+        @started = true
+        @result =
+          @client.chat(
+            model: @model,
+            messages: @messages,
+            stream: true,
+            include_usage: @include_usage,
+            request_logprobs: @request_logprobs,
+            top_logprobs: @top_logprobs,
+            **(@params || {})
+          ) { |delta| yield delta }
+      end
+    end
+    # Extract assistant content from a non-streaming chat completion.
+    #
+    # @param response_or_body [Hash] SimpleInference response hash or parsed body hash
+    # @return [String, nil]
+    def chat_completion_content(response_or_body)
+      body = unwrap_body(response_or_body)
+      choice = first_choice(body)
+      return nil unless choice
+      raw =
+        choice.dig("message", "content") ||
+          choice["text"]
+      normalize_content(raw)
+    end
+    # Extract finish_reason from a non-streaming chat completion.
+    #
+    # @param response_or_body [Hash] SimpleInference response hash or parsed body hash
+    # @return [String, nil]
+    def chat_completion_finish_reason(response_or_body)
+      body = unwrap_body(response_or_body)
+      first_choice(body)&.[]("finish_reason")
+    end
+    # Extract usage from a chat completion response or a final streaming chunk.
+    #
+    # @param response_or_body [Hash] SimpleInference response hash, body hash, or chunk hash
+    # @return [Hash, nil] symbol-keyed usage hash
+    def chat_completion_usage(response_or_body)
+      body = unwrap_body(response_or_body)
+      usage = body.is_a?(Hash) ? body["usage"] : nil
+      return nil unless usage.is_a?(Hash)
+      {
+        prompt_tokens: usage["prompt_tokens"],
+        completion_tokens: usage["completion_tokens"],
+        total_tokens: usage["total_tokens"],
+      }.compact
+    end
+    # Extract logprobs (if present) from a non-streaming chat completion.
+    #
+    # @param response_or_body [Hash] SimpleInference response hash or parsed body hash
+    # @return [Array<Hash>, nil]
+    def chat_completion_logprobs(response_or_body)
+      body = unwrap_body(response_or_body)
+      first_choice(body)&.dig("logprobs", "content")
+    end
+    # Extract delta content from a streaming `chat.completion.chunk`.
+    #
+    # @param chunk [Hash] parsed streaming event hash
+    # @return [String, nil]
+    def chat_completion_chunk_delta(chunk)
+      chunk = unwrap_body(chunk)
+      return nil unless chunk.is_a?(Hash)
+      raw = chunk.dig("choices", 0, "delta", "content")
+      normalize_content(raw)
+    end
+    # Normalize `content` shapes into a simple String.
+    #
+    # Supports strings, arrays of parts, and part hashes.
+    #
+    # @param value [Object]
+    # @return [String, nil]
+    def normalize_content(value)
+      case value
+      when String
+        value
+      when Array
+        value.map { |part| normalize_content(part) }.join
+      when Hash
+        value["text"] ||
+          value["content"] ||
+          value.to_s
+      when nil
+        nil
+      else
+        value.to_s
+      end
+    end
+    # Unwrap a full SimpleInference response into its `:body`, otherwise return the object.
+    #
+    # @param obj [Object]
+    # @return [Object]
+    def unwrap_body(obj)
+      return {} unless obj
+      return obj.body || {} if obj.respond_to?(:body)
+      obj
+    end
+    def first_choice(body)
+      return nil unless body.is_a?(Hash)
+      choices = body["choices"]
+      return nil unless choices.is_a?(Array) && !choices.empty?
+      choice0 = choices[0]
+      return nil unless choice0.is_a?(Hash)
+      choice0
+    end
+    private_class_method :first_choice
+  end
+end

data/lib/simple_inference/response.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module SimpleInference
+  # A lightweight wrapper for HTTP responses returned by SimpleInference.
+  #
+  # - `status` is an Integer HTTP status code
+  # - `headers` is a Hash with downcased String keys
+  # - `body` is a parsed JSON Hash/Array, a String, or nil (e.g. SSE streaming success)
+  # - `raw_body` is the raw response body String (when available)
+  class Response
+    attr_reader :status, :headers, :body, :raw_body
+    def initialize(status:, headers:, body:, raw_body: nil)
+      @status = status.to_i
+      @headers = (headers || {}).transform_keys { |k| k.to_s.downcase }
+      @body = body
+      @raw_body = raw_body
+    end
+    def success?
+      status >= 200 && status < 300
+    end
+    def to_h
+      { status: status, headers: headers, body: body, raw_body: raw_body }
+    end
+  end
+end

data/lib/simple_inference/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module SimpleInference
-  VERSION = "0.1.4"
+  VERSION = "0.1.5"
 end

data/lib/simple_inference.rb CHANGED Viewed

@@ -4,6 +4,8 @@ require_relative "simple_inference/version"
 require_relative "simple_inference/config"
 require_relative "simple_inference/errors"
 require_relative "simple_inference/http_adapter"
+require_relative "simple_inference/response"
+require_relative "simple_inference/openai"
 require_relative "simple_inference/client"
 module SimpleInference

data/sig/simple_inference.rbs CHANGED Viewed

@@ -1,3 +1,71 @@
 module SimpleInference
   VERSION: String
+  class Response
+    attr_reader status: Integer
+    attr_reader headers: Hash[String, untyped]
+    attr_reader body: untyped
+    attr_reader raw_body: String?
+    def initialize: (status: Integer, headers: Hash[untyped, untyped], body: untyped, ?raw_body: String?) -> void
+    def success?: () -> bool
+    def to_h: () -> Hash[Symbol, untyped]
+  end
+  module OpenAI
+    class ChatResult
+      attr_reader content: String?
+      attr_reader usage: Hash[Symbol, untyped]?
+      attr_reader finish_reason: String?
+      attr_reader logprobs: Array[Hash[untyped, untyped]]?
+      attr_reader response: Response
+    end
+    class ChatStream
+      include Enumerable[String]
+      attr_reader result: ChatResult?
+    end
+    def self.chat_completion_content: (untyped) -> String?
+    def self.chat_completion_finish_reason: (untyped) -> String?
+    def self.chat_completion_usage: (untyped) -> Hash[Symbol, untyped]?
+    def self.chat_completion_logprobs: (untyped) -> Array[Hash[untyped, untyped]]?
+    def self.chat_completion_chunk_delta: (untyped) -> String?
+    def self.normalize_content: (untyped) -> String?
+  end
+  class Client
+    def initialize: (?Hash[untyped, untyped]) -> void
+    def chat: (
+      model: String,
+      messages: Array[Hash[untyped, untyped]],
+      ?stream: bool?,
+      ?include_usage: bool?,
+      ?request_logprobs: bool,
+      ?top_logprobs: Integer?,
+      **untyped
+    ) { (String) -> void } -> OpenAI::ChatResult
+    def chat_stream: (
+      model: String,
+      messages: Array[Hash[untyped, untyped]],
+      ?include_usage: bool?,
+      ?request_logprobs: bool,
+      ?top_logprobs: Integer?,
+      **untyped
+    ) -> OpenAI::ChatStream
+    def chat_completions: (**untyped) -> Response
+    def chat_completions_stream: (**untyped) { (Hash[untyped, untyped]) -> void } -> Response
+    def embeddings: (**untyped) -> Response
+    def rerank: (**untyped) -> Response
+    def list_models: () -> Response
+    def models: () -> Array[String]
+    def health: () -> Response
+    def healthy?: () -> bool
+    def audio_transcriptions: (**untyped) -> Response
+    def audio_translations: (**untyped) -> Response
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: simple_inference
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.5
 platform: ruby
 authors:
 - jasl
@@ -27,6 +27,8 @@ files:
 - lib/simple_inference/http_adapter.rb
 - lib/simple_inference/http_adapters/default.rb
 - lib/simple_inference/http_adapters/httpx.rb
+- lib/simple_inference/openai.rb
+- lib/simple_inference/response.rb
 - lib/simple_inference/version.rb
 - sig/simple_inference.rbs
 homepage: https://github.com/jasl/simple_inference.rb