RubyGems - ask-llm-providers - Versions diffs - 0.1.0 - Mend

ask-llm-providers 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/README.md +70 -0
data/lib/ask/llm/config.rb +33 -0
data/lib/ask/llm/http.rb +47 -0
data/lib/ask/llm/version.rb +7 -0
data/lib/ask/provider/anthropic.rb +230 -0
data/lib/ask/provider/bedrock.rb +180 -0
data/lib/ask/provider/cloudflare.rb +123 -0
data/lib/ask/provider/google.rb +216 -0
data/lib/ask/provider/mistral.rb +70 -0
data/lib/ask/provider/ollama.rb +107 -0
data/lib/ask/provider/openai.rb +155 -0
data/lib/ask-llm-providers.rb +30 -0
metadata +195 -0

data/lib/ask/provider/cloudflare.rb ADDED Viewed

@@ -0,0 +1,123 @@
+# frozen_string_literal: true
+module Ask
+  module Providers
+    # Cloudflare Workers AI provider. Supports both direct Workers AI and AI Gateway.
+    class Cloudflare < Ask::Provider
+      def initialize(config = {})
+        config = normalize_config(config)
+        super(config)
+        @http = build_http
+      end
+      def api_base
+        if @config.gateway_id
+          "https://gateway.ai.cloudflare.com/v1/#{@config.account_id}/#{@config.gateway_id}"
+        else
+          "https://api.cloudflare.com/client/v4/accounts/#{@config.account_id}/ai/v1"
+        end
+      end
+      def headers
+        { "Content-Type" => "application/json", "Authorization" => "Bearer #{@config.api_key}" }.compact
+      end
+      def chat(messages, model:, tools: nil, temperature: nil, stream: nil, schema: nil, **params, &block)
+        msgs = messages.is_a?(Ask::Conversation) ? messages.to_a : messages
+        endpoint = @config.gateway_id ? "chat/completions" : "run/#{model}"
+        payload = if @config.gateway_id
+                    { model: model, messages: msgs.map { |m| { role: (m[:role] || m["role"]).to_s, content: m[:content] || m["content"] } }, stream: stream || false }
+                  else
+                    { messages: msgs.map { |m| { role: (m[:role] || m["role"]).to_s, content: m[:content] || m["content"] } } }
+                  end
+        payload[:temperature] = temperature if temperature
+        payload.merge(params)
+        if stream && @config.gateway_id
+          chat_stream_gateway(endpoint, payload, model, &block)
+        else
+          chat_nonstream(endpoint, payload, model)
+        end
+      end
+      def list_models
+        # Workers AI lists models differently — rely on model catalog
+        []
+      end
+      def parse_error(response)
+        body = response.body rescue nil
+        body&.dig("errors", 0, "message") || body&.dig("error", "message")
+      end
+      class << self
+        def capabilities
+          { chat: true, streaming: true, vision: true }
+        end
+        def configuration_options; %i[api_key account_id gateway_id]; end
+        def configuration_requirements; %i[api_key account_id]; end
+        def slug; "cloudflare"; end
+      end
+      private
+      def normalize_config(config)
+        return config unless config.is_a?(Hash)
+        Ask::LLM::Config.new(
+          api_key: config[:api_key] || config["api_key"] || config[:cloudflare_api_key],
+          account_id: config[:account_id] || config["account_id"] || config[:cf_account_id],
+          gateway_id: config[:gateway_id] || config["gateway_id"] || config[:cf_gateway_id]
+        )
+      end
+      def build_http
+        LLM::HTTP.connection(api_base, headers: headers, request: { open_timeout: 30, timeout: 120 })
+      end
+      def chat_nonstream(endpoint, payload, model)
+        response = @http.post(endpoint) { |r| r.body = payload }
+        raise LLM::HTTP.map_error(response.status, response.body, provider: "Cloudflare") unless response.success?
+        body = response.body
+        if @config.gateway_id
+          parse_openai_response(body, model)
+        else
+          result = body["result"] || {}
+          Ask::Message.new(role: :assistant, content: result["response"], metadata: { model: model, raw: body })
+        end
+      end
+      def parse_openai_response(body, model)
+        choice = body.dig("choices", 0)
+        return Ask::Message.new(role: :assistant, content: nil) unless choice
+        msg = choice["message"]
+        Ask::Message.new(role: :assistant, content: msg["content"], metadata: { model: model, finish_reason: choice["finish_reason"], raw: body })
+      end
+      def chat_stream_gateway(endpoint, payload, model, &block)
+        stream = Ask::Stream.new
+        response = @http.post(endpoint) do |req|
+          req.body = payload.merge(stream: true)
+          req.options.on_data = proc { |data, _bytes, _env| process_stream_chunk(data, stream, model, &block) }
+        end
+        raise LLM::HTTP.map_error(response.status, JSON.parse(response.body), provider: "Cloudflare") unless response.success?
+        stream.finish!
+        stream
+      end
+      def process_stream_chunk(raw, stream, model)
+        raw.each_line do |line|
+          line = line.strip
+          next unless line.start_with?("data: ")
+          data = line[6..]
+          next if data == "[DONE]"
+          parsed = JSON.parse(data) rescue next
+          delta = parsed.dig("choices", 0, "delta") || {}
+          chunk = Ask::Chunk.new(content: delta["content"])
+          stream.add(chunk)
+          yield chunk if block_given?
+        end
+      end
+    end
+  end
+end

data/lib/ask/provider/google.rb ADDED Viewed

@@ -0,0 +1,216 @@
+# frozen_string_literal: true
+module Ask
+  module Providers
+    # Google Gemini API provider. Also supports Vertex AI via GCP service account auth.
+    class Google < Ask::Provider
+      def initialize(config = {})
+        config = normalize_config(config)
+        super(config)
+        @http = build_http
+        @project_id = config.project_id
+      end
+      def api_base
+        @config.api_base || "https://generativelanguage.googleapis.com/v1beta"
+      end
+      def headers
+        h = { "Content-Type" => "application/json" }
+        if @config.api_key
+          # Gemini uses query param auth by default
+        elsif @config.access_token
+          h["Authorization"] = "Bearer #{@config.access_token}"
+        elsif @config.vertex_token
+          h["Authorization"] = "Bearer #{@config.vertex_token}"
+        end
+        h
+      end
+      def chat(messages, model:, tools: nil, temperature: nil, stream: nil, schema: nil, **params, &block)
+        msgs = messages.is_a?(Ask::Conversation) ? messages.to_a : messages
+        payload = build_chat_payload(msgs, model, tools, temperature, stream, schema, **params)
+        path = chat_path(model)
+        if stream
+          chat_stream(path, payload, model, &block)
+        else
+          chat_nonstream(path, payload, model)
+        end
+      end
+      def embed(texts, model:)
+        texts = Array(texts)
+        response = @http.post("models/#{model}:batchEmbedContents") { |r| r.body = { requests: texts.map { |t| { model: "models/#{model}", content: { parts: [{ text: t }] } } } } }
+        raise LLM::HTTP.map_error(response.status, response.body, provider: "Google") unless response.success?
+        embeddings = response.body.dig("embeddings") || []
+        Ask::Result.success(embeddings.map { |e| e["values"] })
+      end
+      def list_models
+        response = @http.get("models") { |r| r.params["key"] = @config.api_key if @config.api_key }
+        return [] unless response.success?
+        (response.body["models"] || []).map { |m| Ask::ModelInfo.new(id: m["name"].sub("models/", ""), provider: slug) }
+      end
+      def parse_error(response)
+        body = response.body rescue nil
+        body&.dig("error", "message")
+      end
+      class << self
+        def capabilities
+          { chat: true, streaming: true, tool_calls: true, vision: true, structured_output: true, embed: true, file_upload: true }
+        end
+        def configuration_options; %i[api_key access_token vertex_token project_id api_base]; end
+        def configuration_requirements; %i[api_key]; end
+        def slug; "gemini"; end
+      end
+      private
+      def normalize_config(config)
+        return config unless config.is_a?(Hash)
+        key = config[:api_key] || config["api_key"] || config[:gemini_api_key]
+        Ask::LLM::Config.new(
+          api_key: key,
+          access_token: config[:access_token] || config["access_token"],
+          vertex_token: config[:vertex_token] || config["vertex_token"],
+          project_id: config[:project_id] || config["project_id"],
+          api_base: config[:api_base] || config["api_base"]
+        )
+      end
+      def build_http
+        LLM::HTTP.connection(api_base, headers: headers, request: { open_timeout: 30, timeout: 120 })
+      end
+      def chat_path(model)
+        model_id = model.respond_to?(:id) ? model.id : model.to_s
+        "models/#{model_id}:generateContent"
+      end
+      def build_chat_payload(messages, model, tools, temperature, stream, schema, **params)
+        contents = format_contents(messages)
+        payload = { contents: contents, systemInstruction: format_system(messages) }
+        if tools&.any?
+          payload[:tools] = [{ functionDeclarations: tools.map { |t| format_tool(t) } }]
+        end
+        if schema
+          payload[:generationConfig] ||= {}
+          payload[:generationConfig][:response_mime_type] = "application/json"
+          payload[:generationConfig][:response_schema] = schema
+        end
+        payload[:generationConfig] ||= {}
+        payload[:generationConfig][:temperature] = temperature if temperature
+        payload.merge(params)
+      end
+      def format_contents(messages)
+        messages.reject { |m| (m[:role] || m["role"]).to_s == "system" }.map { |m| format_content(m) }
+      end
+      def format_system(messages)
+        sys = messages.select { |m| (m[:role] || m["role"]).to_s == "system" }
+        return nil if sys.empty?
+        texts = sys.map { |m| m[:content] || m["content"] }.compact
+        return nil if texts.empty?
+        { parts: texts.map { |t| { text: t } } }
+      end
+      def format_content(msg)
+        role = (msg[:role] || msg["role"]).to_s
+        content = msg[:content] || msg["content"]
+        google_role = role == "assistant" ? "model" : role
+        parts = []
+        parts << { text: content } if content
+        # Handle tool calls
+        if msg[:tool_calls] || msg["tool_calls"]
+          (msg[:tool_calls] || msg["tool_calls"]).each do |tc|
+            parts << {
+              functionCall: {
+                name: tc.dig(:function, :name) || tc.dig("function", "name") || tc[:name],
+                args: parse_json(tc.dig(:function, :arguments) || tc.dig("function", "arguments") || tc[:arguments] || "{}")
+              }
+            }
+          end
+        end
+        # Handle tool results
+        if msg[:tool_call_id] || msg["tool_call_id"]
+          parts << {
+            functionResponse: {
+              name: msg[:name] || msg["name"] || "function",
+              response: { content: content || "" }
+            }
+          }
+        end
+        { role: google_role, parts: parts }
+      end
+      def format_tool(t)
+        { name: t.respond_to?(:name) ? t.name : t[:name], description: t.respond_to?(:description) ? t.description : t[:description], parameters: t.respond_to?(:parameters) ? t.parameters : (t[:parameters] || {}) }
+      end
+      def parse_json(str)
+        JSON.parse(str)
+      rescue JSON::ParserError
+        {}
+      end
+      def chat_nonstream(path, payload, model)
+        response = @http.post(path) do |req|
+          req.body = payload
+          req.params["key"] = @config.api_key if @config.api_key
+        end
+        raise LLM::HTTP.map_error(response.status, response.body, provider: "Google") unless response.success?
+        parse_response(response.body, model)
+      end
+      def parse_response(body, model)
+        candidate = body.dig("candidates", 0)
+        return Ask::Message.new(role: :assistant, content: nil) unless candidate
+        content = candidate.dig("content", "parts")&.map { |p| p["text"] }&.compact&.join
+        fc = candidate.dig("content", "parts")&.select { |p| p["functionCall"] } || []
+        tool_calls = fc.map do |p|
+          f = p["functionCall"]
+          { id: SecureRandom.hex(8), type: "function", name: f["name"], arguments: JSON.generate(f["args"] || {}) }
+        end
+        usage = body["usageMetadata"] || {}
+        Ask::Message.new(role: :assistant, content: content, tool_calls: tool_calls.empty? ? nil : tool_calls, metadata: { model: model, finish_reason: candidate["finishReason"], input_tokens: usage["promptTokenCount"], output_tokens: usage["candidatesTokenCount"], raw: body })
+      end
+      def chat_stream(path, payload, model, &block)
+        stream = Ask::Stream.new
+        response = @http.post(path) do |req|
+          req.body = payload
+          req.params["key"] = @config.api_key if @config.api_key
+          req.options.on_data = proc { |data, _bytes, _env| process_google_chunk(data, stream, model, &block) }
+        end
+        raise LLM::HTTP.map_error(response.status, JSON.parse(response.body), provider: "Google") unless response.success?
+        stream.finish!
+        stream
+      end
+      def process_google_chunk(raw, stream, model)
+        raw.each_line do |line|
+          next unless line.start_with?("data: ")
+          data = line[6..]
+          next if data.strip == "[DONE]"
+          parsed = JSON.parse(data) rescue next
+          candidate = parsed.dig("candidates", 0) or next
+          part = candidate.dig("content", "parts", 0)
+          next unless part
+          chunk = Ask::Chunk.new(content: part["text"])
+          stream.add(chunk)
+          yield chunk if block_given?
+        end
+      end
+    end
+  end
+end

data/lib/ask/provider/mistral.rb ADDED Viewed

@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+module Ask
+  module Providers
+    # Mistral AI provider. Uses OpenAI-compatible wire format.
+    class Mistral < Ask::Provider
+      def initialize(config = {})
+        config = normalize_config(config)
+        super(config)
+        @http = build_http
+      end
+      def api_base
+        @config.api_base || "https://api.mistral.ai/v1"
+      end
+      def headers
+        { "Content-Type" => "application/json", "Authorization" => "Bearer #{@config.api_key}" }.compact
+      end
+      def chat(messages, model:, tools: nil, temperature: nil, stream: nil, schema: nil, **params, &block)
+        # Reuse OpenAI provider's logic since Mistral is OpenAI-compatible
+        openai = Providers::OpenAI.new(api_key: @config.api_key, base_url: api_base)
+        openai.chat(messages, model: model, tools: tools, temperature: temperature, stream: stream, schema: schema, **params, &block)
+      end
+      def embed(texts, model:)
+        texts = Array(texts)
+        response = @http.post("embeddings") { |r| r.body = { model: model, input: texts } }
+        raise LLM::HTTP.map_error(response.status, response.body, provider: "Mistral") unless response.success?
+        embeddings = response.body["data"].map { |d| d["embedding"] }
+        Ask::Result.success(embeddings.one? ? embeddings.first : embeddings)
+      end
+      def list_models
+        response = @http.get("models")
+        return [] unless response.success?
+        response.body["data"].map { |m| Ask::ModelInfo.new(id: m["id"], provider: slug) }
+      end
+      def parse_error(response)
+        body = response.body rescue nil
+        body&.dig("error", "message") || body&.dig("error", "type")
+      end
+      class << self
+        def capabilities
+          { chat: true, streaming: true, tool_calls: true, structured_output: true, embed: true }
+        end
+        def configuration_options; %i[api_key api_base]; end
+        def configuration_requirements; %i[api_key]; end
+        def slug; "mistral"; end
+      end
+      private
+      def normalize_config(config)
+        return config unless config.is_a?(Hash)
+        Ask::LLM::Config.new(
+          api_key: config[:api_key] || config["api_key"] || config[:mistral_api_key],
+          api_base: config[:api_base] || config["api_base"]
+        )
+      end
+      def build_http
+        LLM::HTTP.connection(api_base, headers: headers, request: { open_timeout: 30, timeout: 120 })
+      end
+    end
+  end
+end

data/lib/ask/provider/ollama.rb ADDED Viewed

@@ -0,0 +1,107 @@
+# frozen_string_literal: true
+module Ask
+  module Providers
+    # Ollama provider for local LLM inference.
+    # Connects to a local Ollama server (default: http://localhost:11434).
+    class Ollama < Ask::Provider
+      def initialize(config = {})
+        config = normalize_config(config)
+        super(config)
+        @http = build_http
+      end
+      def api_base
+        @config.api_base || "http://localhost:11434"
+      end
+      def headers
+        { "Content-Type" => "application/json" }
+      end
+      def chat(messages, model:, tools: nil, temperature: nil, stream: nil, schema: nil, **params, &block)
+        msgs = messages.is_a?(Ask::Conversation) ? messages.to_a : messages
+        payload = { model: model, messages: msgs.map { |m| { role: (m[:role] || m["role"]).to_s, content: m[:content] || m["content"] } }, stream: stream || false, options: {} }
+        payload[:options][:temperature] = temperature if temperature
+        if tools&.any?
+          payload[:tools] = tools.map { |t| { type: "function", function: { name: t.respond_to?(:name) ? t.name : t[:name], description: t.respond_to?(:description) ? t.description : t[:description], parameters: t.respond_to?(:parameters) ? t.parameters : (t[:parameters] || {}) } } }
+        end
+        payload.merge(params)
+        if stream
+          chat_stream(payload, model, &block)
+        else
+          chat_nonstream(payload, model)
+        end
+      end
+      def embed(texts, model:)
+        texts = Array(texts)
+        response = @http.post("api/embeddings") { |r| r.body = { model: model, prompt: texts.first } }
+        raise LLM::HTTP.map_error(response.status, response.body, provider: "Ollama") unless response.success?
+        Ask::Result.success(response.body["embedding"])
+      end
+      def list_models
+        response = @http.get("api/tags")
+        return [] unless response.success?
+        response.body["models"].map { |m| Ask::ModelInfo.new(id: m["name"], provider: slug) }
+      end
+      class << self
+        def capabilities
+          { chat: true, streaming: true, tool_calls: true, embed: true, local: true }
+        end
+        def configuration_options; %i[api_base]; end
+        def configuration_requirements; %i[]; end
+        def slug; "ollama"; end
+        def local?; true; end
+        def assume_models_exist?; true; end
+      end
+      private
+      def normalize_config(config)
+        return config unless config.is_a?(Hash)
+        Ask::LLM::Config.new(api_base: config[:api_base] || config["api_base"])
+      end
+      def build_http
+        LLM::HTTP.connection(api_base, headers: headers, request: { open_timeout: 5, timeout: 600 })
+      end
+      def chat_nonstream(payload, model)
+        response = @http.post("api/chat") { |r| r.body = payload }
+        raise LLM::HTTP.map_error(response.status, response.body, provider: "Ollama") unless response.success?
+        msg = response.body["message"] || {}
+        Ask::Message.new(role: :assistant, content: msg["content"], metadata: { model: response.body["model"] || model, done: response.body["done"], total_duration: response.body["total_duration"], raw: response.body })
+      end
+      def chat_stream(payload, model, &block)
+        stream = Ask::Stream.new
+        response = @http.post("api/chat") do |req|
+          req.body = payload.merge(stream: true)
+          req.options.on_data = proc { |data, _bytes, _env| process_ollama_chunk(data, stream, model, &block) }
+        end
+        raise LLM::HTTP.map_error(response.status, response.body, provider: "Ollama") unless response.success?
+        stream.finish!
+        stream
+      end
+      def process_ollama_chunk(raw, stream, model)
+        raw.each_line do |line|
+          parsed = JSON.parse(line) rescue next
+          msg = parsed["message"] || {}
+          chunk = Ask::Chunk.new(content: msg["content"])
+          stream.add(chunk)
+          yield chunk if block_given?
+          if parsed["done"]
+            chunk = Ask::Chunk.new(finish_reason: "stop", usage: { total_duration: parsed["total_duration"] })
+            stream.add(chunk)
+            yield chunk if block_given?
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ask/provider/openai.rb ADDED Viewed

@@ -0,0 +1,155 @@
+# frozen_string_literal: true
+module Ask
+  module Providers
+    # OpenAI API provider. Also handles all OpenAI-compatible providers
+    # (OpenRouter, DeepSeek, Azure, XAI, Perplexity, GPUStack, etc.) via
+    # +base_url+ override.
+    class OpenAI < Ask::Provider
+      def initialize(config = {})
+        config = normalize_config(config)
+        super(config)
+        @http = build_http
+      end
+      def api_base
+        @config.base_url || "https://api.openai.com/v1"
+      end
+      def headers
+        key = @config.api_key || @config.openai_api_key
+        h = { "Content-Type" => "application/json" }
+        h["Authorization"] = "Bearer #{key}" if key
+        h["OpenAI-Organization"] = @config.organization_id if @config.organization_id
+        h["OpenAI-Project"] = @config.project_id if @config.project_id
+        h
+      end
+      def chat(messages, model:, tools: nil, temperature: nil, stream: nil, schema: nil, **params, &block)
+        msgs = messages.is_a?(Ask::Conversation) ? messages.to_a : messages
+        payload = build_chat_payload(msgs, model, tools, temperature, stream, schema, **params)
+        stream ? chat_stream(payload, model, &block) : chat_nonstream(payload, model)
+      end
+      def embed(texts, model:)
+        texts = Array(texts)
+        response = @http.post("embeddings") { |r| r.body = { model: model, input: texts } }
+        raise LLM::HTTP.map_error(response.status, response.body, provider: "OpenAI") unless response.success?
+        embeddings = response.body["data"].map { |d| d["embedding"] }
+        Ask::Result.success(embeddings.one? ? embeddings.first : embeddings)
+      end
+      def list_models
+        response = @http.get("models")
+        return [] unless response.success?
+        response.body["data"].map { |m| Ask::ModelInfo.new(id: m["id"], provider: slug, metadata: { owned_by: m["owned_by"] }) }
+      end
+      def parse_error(response)
+        body = response.body rescue nil
+        body&.dig("error", "message") || body&.dig("error", "code")
+      end
+      class << self
+def slug; "openai"; end
+                def capabilities
+          { chat: true, streaming: true, tool_calls: true, vision: true, thinking: true, structured_output: true, embed: true, transcribe: true, paint: true, moderate: true }
+        end
+        def configuration_options; %i[api_key base_url organization_id project_id]; end
+        def configuration_requirements; %i[api_key]; end
+                def configured?(config)
+          (config.respond_to?(:api_key) && !config.api_key.to_s.empty?) ||
+            (config.respond_to?(:openai_api_key) && !config.openai_api_key.to_s.empty?)
+        end
+      end
+      private
+      def normalize_config(config)
+        return config if !config.is_a?(Hash)
+        Ask::LLM::Config.new(
+          api_key: config[:api_key] || config["api_key"] || config[:openai_api_key],
+          base_url: config[:base_url] || config["base_url"],
+          organization_id: config[:organization_id] || config["organization_id"],
+          project_id: config[:project_id] || config["project_id"]
+        )
+      end
+      def build_http
+        LLM::HTTP.connection(api_base, headers: headers, request: { open_timeout: 30, timeout: 120 })
+      end
+      def build_chat_payload(messages, model, tools, temperature, stream, schema, **params)
+        payload = { model: model, messages: format_messages(messages), stream: stream || false }
+        payload[:temperature] = temperature if temperature
+        payload[:tools] = format_tools(tools) if tools&.any?
+        payload[:response_format] = { type: "json_schema", json_schema: { name: "response", schema: schema, strict: true } } if schema
+        payload.merge(params)
+      end
+      def format_messages(messages)
+        messages.map do |msg|
+          role = msg[:role] || msg["role"] || :user
+          { role: role.to_s, content: msg[:content] || msg["content"] }.tap do |fm|
+            if (tc = msg[:tool_calls] || msg["tool_calls"])
+              fm[:tool_calls] = tc.map { |t| { id: t[:id] || t["id"], type: "function", function: { name: t.dig(:function, :name) || t.dig("function", "name") || t[:name], arguments: t.dig(:function, :arguments) || t.dig("function", "arguments") || t[:arguments] } } }
+            end
+            fm[:tool_call_id] = msg[:tool_call_id] || msg["tool_call_id"] if msg[:tool_call_id] || msg["tool_call_id"]
+          end.compact
+        end
+      end
+      def format_tools(tools)
+        tools.map { |t| { type: "function", function: { name: t.respond_to?(:name) ? t.name : t[:name], description: t.respond_to?(:description) ? t.description : t[:description], parameters: t.respond_to?(:parameters) ? t.parameters : t[:parameters] } } }
+      end
+      def chat_nonstream(payload, model)
+        response = @http.post("chat/completions") { |r| r.body = payload }
+        raise LLM::HTTP.map_error(response.status, response.body, provider: "OpenAI") unless response.success?
+        parse_response(response.body, model)
+      end
+      def parse_response(body, model)
+        choice = body.dig("choices", 0)
+        return Ask::Message.new(role: :assistant, content: nil) unless choice
+        msg = choice["message"]
+        usage = body["usage"] || {}
+        Ask::Message.new(role: :assistant, content: msg["content"], tool_calls: parse_tool_calls(msg["tool_calls"]), metadata: { model: body["model"] || model, finish_reason: choice["finish_reason"], input_tokens: usage["prompt_tokens"], output_tokens: usage["completion_tokens"], raw: body })
+      end
+      def parse_tool_calls(calls)
+        return nil unless calls&.any?
+        calls.map { |tc| { id: tc["id"], type: "function", name: tc.dig("function", "name"), arguments: tc.dig("function", "arguments") } }
+      end
+      def chat_stream(payload, model, &block)
+        stream = Ask::Stream.new
+        @http.post("chat/completions") do |req|
+          req.body = payload.merge(stream: true)
+          req.options.on_data = proc { |data, _bytes, _env| process_chunk(data, stream, model, &block) }
+        end.tap { |resp| raise LLM::HTTP.map_error(resp.status, JSON.parse(resp.body), provider: "OpenAI") unless resp.success? }
+        stream.finish!
+        stream
+      end
+      def process_chunk(raw, stream, model)
+        raw.each_line do |line|
+          line = line.strip
+          next if line.empty? || line.start_with?(":") || !line.start_with?("data: ")
+          data = line[6..]; next if data == "[DONE]"
+          parsed = JSON.parse(data) rescue next
+          choice = parsed.dig("choices", 0) or next
+          delta = choice["delta"] || {}
+          chunk = Ask::Chunk.new(content: delta["content"], tool_calls: parse_stream_tool_calls(delta["tool_calls"]), finish_reason: choice["finish_reason"], usage: parsed["usage"])
+          stream.add(chunk)
+          yield chunk if block_given?
+        end
+      end
+      def parse_stream_tool_calls(calls)
+        return nil unless calls&.any?
+        calls.map { |tc| { id: tc["id"], name: tc.dig("function", "name"), arguments: tc.dig("function", "arguments"), index: tc["index"] } }
+      end
+    end
+  end
+end