RubyGems - ollama-client - Versions diffs - 0.2.0 - Mend

ollama-client 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +12 -0
data/CODE_OF_CONDUCT.md +10 -0
data/CONTRIBUTING.md +36 -0
data/LICENSE.txt +21 -0
data/PRODUCTION_FIXES.md +172 -0
data/README.md +690 -0
data/Rakefile +12 -0
data/TESTING.md +286 -0
data/examples/advanced_complex_schemas.rb +363 -0
data/examples/advanced_edge_cases.rb +241 -0
data/examples/advanced_error_handling.rb +200 -0
data/examples/advanced_multi_step_agent.rb +258 -0
data/examples/advanced_performance_testing.rb +186 -0
data/examples/complete_workflow.rb +235 -0
data/examples/dhanhq_agent.rb +752 -0
data/examples/dhanhq_tools.rb +563 -0
data/examples/structured_outputs_chat.rb +72 -0
data/examples/tool_calling_pattern.rb +266 -0
data/exe/ollama-client +4 -0
data/lib/ollama/agent/executor.rb +157 -0
data/lib/ollama/agent/messages.rb +31 -0
data/lib/ollama/agent/planner.rb +47 -0
data/lib/ollama/client.rb +775 -0
data/lib/ollama/config.rb +29 -0
data/lib/ollama/errors.rb +54 -0
data/lib/ollama/schema_validator.rb +79 -0
data/lib/ollama/schemas/base.json +5 -0
data/lib/ollama/streaming_observer.rb +22 -0
data/lib/ollama/version.rb +5 -0
data/lib/ollama_client.rb +46 -0
data/sig/ollama/client.rbs +6 -0
metadata +108 -0

data/lib/ollama/client.rb ADDED Viewed

@@ -0,0 +1,775 @@
+# frozen_string_literal: true
+require "net/http"
+require "uri"
+require "json"
+require_relative "errors"
+require_relative "schema_validator"
+require_relative "config"
+module Ollama
+  # Main client class for interacting with Ollama API
+  # rubocop:disable Metrics/ClassLength
+  class Client
+    def initialize(config: nil)
+      @config = config || default_config
+      @uri = URI("#{@config.base_url}/api/generate")
+      @chat_uri = URI("#{@config.base_url}/api/chat")
+      @base_uri = URI(@config.base_url)
+    end
+    # Chat API method matching JavaScript ollama.chat() interface
+    # Supports structured outputs via format parameter
+    #
+    # ⚠️ WARNING: chat() is NOT recommended for agent planning or tool routing.
+    # Use generate() instead for stateless, explicit state injection.
+    #
+    # @param model [String] Model name (overrides config.model)
+    # @param messages [Array<Hash>] Array of message hashes with :role and :content
+    # @param format [Hash, nil] JSON Schema for structured outputs
+    # @param options [Hash, nil] Additional options (temperature, top_p, etc.)
+    # @param strict [Boolean] If true, requires explicit opt-in and disables retries on schema violations
+    # @param include_meta [Boolean] If true, returns hash with :data and :meta keys
+    # @return [Hash] Parsed and validated JSON response matching the format schema
+    # rubocop:disable Metrics/MethodLength
+    # rubocop:disable Metrics/ParameterLists
+    def chat(messages:, model: nil, format: nil, options: {}, strict: false, allow_chat: false, return_meta: false)
+      unless allow_chat || strict
+        raise Error,
+              "chat() is intentionally gated because it is easy to misuse inside agents. " \
+              "Prefer generate(). If you really want chat(), pass allow_chat: true (or strict: true)."
+      end
+      attempts = 0
+      @current_schema = format # Store for validation
+      started_at = monotonic_time
+      begin
+        attempts += 1
+        attempt_started_at = monotonic_time
+        raw = call_chat_api(model: model, messages: messages, format: format, tools: nil, options: options)
+        attempt_latency_ms = elapsed_ms(attempt_started_at)
+        emit_response_hook(
+          raw,
+          {
+            endpoint: "/api/chat",
+            model: model || @config.model,
+            attempt: attempts,
+            attempt_latency_ms: attempt_latency_ms
+          }
+        )
+        parsed = parse_json_response(raw)
+        # CRITICAL: If format is provided, free-text output is forbidden
+        if format
+          if parsed.nil? || parsed.empty?
+            raise SchemaViolationError,
+                  "Empty or nil response when format schema is required"
+          end
+          SchemaValidator.validate!(parsed, format)
+        end
+        return parsed unless return_meta
+        {
+          "data" => parsed,
+          "meta" => {
+            "endpoint" => "/api/chat",
+            "model" => model || @config.model,
+            "attempts" => attempts,
+            "latency_ms" => elapsed_ms(started_at)
+          }
+        }
+      rescue NotFoundError => e
+        enhanced_error = enhance_not_found_error(e)
+        raise enhanced_error
+      rescue HTTPError => e
+        raise e unless e.retryable?
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      rescue InvalidJSONError, SchemaViolationError => e
+        raise e if strict
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      rescue TimeoutError, Error => e
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      end
+    end
+    # rubocop:enable Metrics/ParameterLists
+    # Raw Chat API method that returns the full parsed response body.
+    #
+    # This is intended for advanced use cases such as tool-calling loops where
+    # callers need access to fields like `message.tool_calls`.
+    #
+    # @param model [String] Model name (overrides config.model)
+    # @param messages [Array<Hash>] Array of message hashes with :role and :content
+    # @param format [Hash, nil] JSON Schema for structured outputs (validates message.content JSON when present)
+    # @param tools [Array<Hash>, nil] Tool definitions (OpenAI-style schema) sent to Ollama
+    # @param options [Hash, nil] Additional options (temperature, top_p, etc.)
+    # @return [Hash] Full parsed JSON response body from Ollama
+    # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/ParameterLists
+    def chat_raw(messages:, model: nil, format: nil, tools: nil, options: {}, strict: false, allow_chat: false,
+                 return_meta: false, stream: false, &on_chunk)
+      unless allow_chat || strict
+        raise Error,
+              "chat_raw() is intentionally gated because it is easy to misuse inside agents. " \
+              "Prefer generate(). If you really want chat_raw(), pass allow_chat: true (or strict: true)."
+      end
+      attempts = 0
+      @current_schema = format # Store for validation
+      started_at = monotonic_time
+      begin
+        attempts += 1
+        attempt_started_at = monotonic_time
+        raw_body =
+          if stream
+            call_chat_api_raw_stream(
+              model: model,
+              messages: messages,
+              format: format,
+              tools: tools,
+              options: options,
+              &on_chunk
+            )
+          else
+            call_chat_api_raw(model: model, messages: messages, format: format, tools: tools, options: options)
+          end
+        attempt_latency_ms = elapsed_ms(attempt_started_at)
+        emit_response_hook(
+          raw_body.is_a?(Hash) ? raw_body.to_json : raw_body,
+          {
+            endpoint: "/api/chat",
+            model: model || @config.model,
+            attempt: attempts,
+            attempt_latency_ms: attempt_latency_ms
+          }
+        )
+        # `raw_body` is either a JSON string (non-stream) or a Hash (stream).
+        parsed_body = raw_body.is_a?(Hash) ? raw_body : JSON.parse(raw_body)
+        # If a format schema is provided, validate the assistant content JSON (when present).
+        if format
+          content = parsed_body.dig("message", "content")
+          if content.nil? || content.empty?
+            raise SchemaViolationError,
+                  "Empty or nil response when format schema is required"
+          end
+          parsed_content = parse_json_response(content)
+          if parsed_content.nil? || parsed_content.empty?
+            raise SchemaViolationError,
+                  "Empty or nil response when format schema is required"
+          end
+          SchemaValidator.validate!(parsed_content, format)
+        end
+        return parsed_body unless return_meta
+        {
+          "data" => parsed_body,
+          "meta" => {
+            "endpoint" => "/api/chat",
+            "model" => model || @config.model,
+            "attempts" => attempts,
+            "latency_ms" => elapsed_ms(started_at)
+          }
+        }
+      rescue NotFoundError => e
+        enhanced_error = enhance_not_found_error(e)
+        raise enhanced_error
+      rescue HTTPError => e
+        raise e unless e.retryable?
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      rescue JSON::ParserError => e
+        raise InvalidJSONError, "Failed to parse API response: #{e.message}" if strict
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      rescue InvalidJSONError, SchemaViolationError => e
+        raise e if strict
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      rescue TimeoutError, Error => e
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      end
+    end
+    # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/ParameterLists
+    def generate(prompt:, schema:, strict: false, return_meta: false)
+      attempts = 0
+      @current_schema = schema # Store for prompt enhancement
+      started_at = monotonic_time
+      begin
+        attempts += 1
+        attempt_started_at = monotonic_time
+        raw = call_api(prompt)
+        attempt_latency_ms = elapsed_ms(attempt_started_at)
+        emit_response_hook(
+          raw,
+          {
+            endpoint: "/api/generate",
+            model: @config.model,
+            attempt: attempts,
+            attempt_latency_ms: attempt_latency_ms
+          }
+        )
+        parsed = parse_json_response(raw)
+        # CRITICAL: If schema is provided, free-text output is forbidden
+        raise SchemaViolationError, "Empty or nil response when schema is required" if parsed.nil? || parsed.empty?
+        SchemaValidator.validate!(parsed, schema)
+        return parsed unless return_meta
+        {
+          "data" => parsed,
+          "meta" => {
+            "endpoint" => "/api/generate",
+            "model" => @config.model,
+            "attempts" => attempts,
+            "latency_ms" => elapsed_ms(started_at)
+          }
+        }
+      rescue NotFoundError => e
+        # 404 errors are never retried, but we can suggest models
+        enhanced_error = enhance_not_found_error(e)
+        raise enhanced_error
+      rescue HTTPError => e
+        # Don't retry non-retryable HTTP errors (400, etc.)
+        raise e unless e.retryable?
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      rescue InvalidJSONError, SchemaViolationError => e
+        raise e if strict
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      rescue TimeoutError, Error => e
+        raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
+        retry
+      end
+    end
+    # rubocop:enable Metrics/MethodLength
+    def generate_strict!(prompt:, schema:, return_meta: false)
+      generate(prompt: prompt, schema: schema, strict: true, return_meta: return_meta)
+    end
+    # Lightweight server health check.
+    # Returns true/false by default; pass return_meta: true for details.
+    # rubocop:disable Metrics/MethodLength
+    def health(return_meta: false)
+      ping_uri = URI.join(@base_uri.to_s.end_with?("/") ? @base_uri.to_s : "#{@base_uri}/", "api/ping")
+      started_at = monotonic_time
+      req = Net::HTTP::Get.new(ping_uri)
+      res = Net::HTTP.start(
+        ping_uri.hostname,
+        ping_uri.port,
+        read_timeout: @config.timeout,
+        open_timeout: @config.timeout
+      ) { |http| http.request(req) }
+      ok = res.is_a?(Net::HTTPSuccess)
+      return ok unless return_meta
+      {
+        "ok" => ok,
+        "meta" => {
+          "endpoint" => "/api/ping",
+          "status_code" => res.code.to_i,
+          "latency_ms" => elapsed_ms(started_at)
+        }
+      }
+    rescue Net::ReadTimeout, Net::OpenTimeout
+      return false unless return_meta
+      {
+        "ok" => false,
+        "meta" => {
+          "endpoint" => "/api/ping",
+          "error" => "timeout",
+          "latency_ms" => elapsed_ms(started_at)
+        }
+      }
+    rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
+      return false unless return_meta
+      {
+        "ok" => false,
+        "meta" => {
+          "endpoint" => "/api/ping",
+          "error" => e.message,
+          "latency_ms" => elapsed_ms(started_at)
+        }
+      }
+    end
+    # rubocop:enable Metrics/MethodLength
+    # Public method to list available models
+    def list_models
+      tags_uri = URI("#{@config.base_url}/api/tags")
+      req = Net::HTTP::Get.new(tags_uri)
+      res = Net::HTTP.start(
+        tags_uri.hostname,
+        tags_uri.port,
+        read_timeout: @config.timeout,
+        open_timeout: @config.timeout
+      ) { |http| http.request(req) }
+      raise Error, "Failed to fetch models: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
+      body = JSON.parse(res.body)
+      body["models"]&.map { |m| m["name"] } || []
+    rescue JSON::ParserError => e
+      raise InvalidJSONError, "Failed to parse models response: #{e.message}"
+    rescue Net::ReadTimeout, Net::OpenTimeout
+      raise TimeoutError, "Request timed out after #{@config.timeout}s"
+    rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
+      raise Error, "Connection failed: #{e.message}"
+    end
+    private
+    def handle_http_error(res, requested_model: nil)
+      status_code = res.code.to_i
+      requested_model ||= @config.model
+      raise NotFoundError.new(res.message, requested_model: requested_model) if status_code == 404
+      # All other errors use HTTPError
+      # Retryable: 408, 429, 500, 503 (handled by HTTPError#retryable?)
+      # Non-retryable: 400-403, 405-407, 409-428, 430-499, 501, 504-599
+      raise HTTPError.new("HTTP #{res.code}: #{res.message}", status_code)
+    end
+    def default_config
+      if defined?(OllamaClient)
+        # Avoid sharing a mutable global config object across clients/threads.
+        # The OllamaClient.config instance remains global for convenience,
+        # but each Client gets its own copy by default.
+        OllamaClient.config.dup
+      else
+        Config.new
+      end
+    end
+    def enhance_not_found_error(error)
+      return error if error.requested_model.nil?
+      begin
+        available_models = list_models
+        suggestions = find_similar_models(error.requested_model, available_models)
+        NotFoundError.new(error.message, requested_model: error.requested_model, suggestions: suggestions)
+      rescue Error
+        # If we can't fetch models, return original error
+        error
+      end
+    end
+    def enhance_prompt_for_json(prompt)
+      return prompt unless @current_schema
+      # Add JSON instruction if not already present
+      return prompt if prompt.match?(/json|JSON/i)
+      schema_summary = summarize_schema(@current_schema)
+      json_instruction = "CRITICAL: Respond with ONLY valid JSON (no markdown code blocks, no explanations). " \
+                         "The JSON must include these exact required fields: #{schema_summary}"
+      "#{prompt}\n\n#{json_instruction}"
+    end
+    def summarize_schema(schema)
+      return "object" unless schema.is_a?(Hash)
+      required = schema["required"] || []
+      properties = schema["properties"] || {}
+      return "object" if required.empty? && properties.empty?
+      # Create example JSON structure
+      example = {}
+      required.each do |key|
+        prop = properties[key] || {}
+        example[key] = case prop["type"]
+                       when "string" then "string_value"
+                       when "number" then 0
+                       when "boolean" then true
+                       when "array" then []
+                       else {}
+                       end
+      end
+      required_list = required.map { |k| "\"#{k}\"" }.join(", ")
+      example_json = JSON.pretty_generate(example)
+      "Required fields: [#{required_list}]. Example structure:\n#{example_json}"
+    end
+    def parse_json_response(raw)
+      json_text = extract_json_fragment(raw)
+      JSON.parse(json_text)
+    rescue JSON::ParserError => e
+      raise InvalidJSONError, "Failed to parse extracted JSON: #{e.message}. Extracted: #{json_text&.slice(0, 200)}..."
+    end
+    # rubocop:disable Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
+    def extract_json_fragment(text)
+      raise InvalidJSONError, "Empty response body" if text.nil? || text.empty?
+      stripped = text.lstrip
+      # Fast path: the whole (trimmed) body is valid JSON (including primitives).
+      if stripped.start_with?("{", "[", "\"", "-", "t", "f", "n") || stripped.match?(/\A\d/)
+        begin
+          JSON.parse(stripped)
+          return stripped
+        rescue JSON::ParserError
+          # Fall back to extraction below (common with prefix/suffix noise).
+        end
+      end
+      start_idx = text.index(/[{\[]/)
+      raise InvalidJSONError, "No JSON found in response. Response: #{text[0..200]}..." unless start_idx
+      stack = []
+      in_string = false
+      escape = false
+      i = start_idx
+      while i < text.length
+        ch = text.getbyte(i)
+        if in_string
+          if escape
+            escape = false
+          elsif ch == 92 # backslash
+            escape = true
+          elsif ch == 34 # double-quote
+            in_string = false
+          end
+        else
+          case ch
+          when 34 # double-quote
+            in_string = true
+          when 123 # {
+            stack << 125 # }
+          when 91 # [
+            stack << 93 # ]
+          when 125, 93 # }, ]
+            expected = stack.pop
+            raise InvalidJSONError, "Malformed JSON in response. Response: #{text[start_idx, 200]}..." if expected != ch
+            return text[start_idx..i] if stack.empty?
+          end
+        end
+        i += 1
+      end
+      raise InvalidJSONError, "Incomplete JSON in response. Response: #{text[start_idx, 200]}..."
+    end
+    # rubocop:enable Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
+    def emit_response_hook(raw, meta)
+      hook = @config.on_response
+      return unless hook.respond_to?(:call)
+      hook.call(raw, meta)
+    rescue StandardError
+      # Observability hooks must never break the main flow.
+      nil
+    end
+    def monotonic_time
+      Process.clock_gettime(Process::CLOCK_MONOTONIC)
+    end
+    def elapsed_ms(started_at)
+      ((monotonic_time - started_at) * 1000.0).round(1)
+    end
+    def find_similar_models(requested, available, limit: 5)
+      return [] if available.empty?
+      # Simple similarity: models containing the requested name or vice versa
+      requested_lower = requested.downcase
+      matches = available.select do |model|
+        model_lower = model.downcase
+        model_lower.include?(requested_lower) || requested_lower.include?(model_lower)
+      end
+      # Also try fuzzy matching on model name parts
+      if matches.empty?
+        requested_parts = requested_lower.split(/[:._-]/)
+        matches = available.select do |model|
+          model_parts = model.downcase.split(/[:._-]/)
+          requested_parts.any? { |part| model_parts.any? { |mp| mp.include?(part) || part.include?(mp) } }
+        end
+      end
+      matches.first(limit)
+    end
+    def call_chat_api(model:, messages:, format:, tools:, options:)
+      req = Net::HTTP::Post.new(@chat_uri)
+      req["Content-Type"] = "application/json"
+      # Build request body
+      body = {
+        model: model || @config.model,
+        messages: messages,
+        stream: false
+      }
+      # Merge options (temperature, top_p, etc.) with config defaults
+      body_options = {
+        temperature: options[:temperature] || @config.temperature,
+        top_p: options[:top_p] || @config.top_p,
+        num_ctx: options[:num_ctx] || @config.num_ctx
+      }
+      body[:options] = body_options
+      # Use Ollama's native format parameter for structured outputs
+      body[:format] = format if format
+      body[:tools] = tools if tools
+      req.body = body.to_json
+      res = Net::HTTP.start(
+        @chat_uri.hostname,
+        @chat_uri.port,
+        read_timeout: @config.timeout,
+        open_timeout: @config.timeout
+      ) { |http| http.request(req) }
+      handle_http_error(res, requested_model: model || @config.model) unless res.is_a?(Net::HTTPSuccess)
+      response_body = JSON.parse(res.body)
+      # Chat API returns message.content, not response
+      response_body["message"]["content"]
+    rescue JSON::ParserError => e
+      raise InvalidJSONError, "Failed to parse API response: #{e.message}"
+    rescue Net::ReadTimeout, Net::OpenTimeout
+      raise TimeoutError, "Request timed out after #{@config.timeout}s"
+    rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
+      raise Error, "Connection failed: #{e.message}"
+    end
+    def call_api(prompt)
+      req = Net::HTTP::Post.new(@uri)
+      req["Content-Type"] = "application/json"
+      # Build request body
+      body = {
+        model: @config.model,
+        prompt: prompt,
+        stream: false,
+        temperature: @config.temperature,
+        top_p: @config.top_p,
+        num_ctx: @config.num_ctx
+      }
+      # Use Ollama's native format parameter for structured outputs
+      if @current_schema
+        body[:format] = @current_schema
+        # Also enhance prompt as fallback (some models work better with both)
+        body[:prompt] = enhance_prompt_for_json(prompt)
+      end
+      req.body = body.to_json
+      res = Net::HTTP.start(
+        @uri.hostname,
+        @uri.port,
+        read_timeout: @config.timeout,
+        open_timeout: @config.timeout
+      ) { |http| http.request(req) }
+      handle_http_error(res) unless res.is_a?(Net::HTTPSuccess)
+      body = JSON.parse(res.body)
+      body["response"]
+    rescue JSON::ParserError => e
+      raise InvalidJSONError, "Failed to parse API response: #{e.message}"
+    rescue Net::ReadTimeout, Net::OpenTimeout
+      raise TimeoutError, "Request timed out after #{@config.timeout}s"
+    rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
+      raise Error, "Connection failed: #{e.message}"
+    end
+    def call_chat_api_raw(model:, messages:, format:, tools:, options:)
+      req = Net::HTTP::Post.new(@chat_uri)
+      req["Content-Type"] = "application/json"
+      body = {
+        model: model || @config.model,
+        messages: messages,
+        stream: false
+      }
+      body_options = {
+        temperature: options[:temperature] || @config.temperature,
+        top_p: options[:top_p] || @config.top_p,
+        num_ctx: options[:num_ctx] || @config.num_ctx
+      }
+      body[:options] = body_options
+      body[:format] = format if format
+      body[:tools] = tools if tools
+      req.body = body.to_json
+      res = Net::HTTP.start(
+        @chat_uri.hostname,
+        @chat_uri.port,
+        read_timeout: @config.timeout,
+        open_timeout: @config.timeout
+      ) { |http| http.request(req) }
+      handle_http_error(res, requested_model: model || @config.model) unless res.is_a?(Net::HTTPSuccess)
+      res.body
+    rescue Net::ReadTimeout, Net::OpenTimeout
+      raise TimeoutError, "Request timed out after #{@config.timeout}s"
+    rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
+      raise Error, "Connection failed: #{e.message}"
+    end
+    # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity, Metrics/BlockLength
+    def call_chat_api_raw_stream(model:, messages:, format:, tools:, options:)
+      req = Net::HTTP::Post.new(@chat_uri)
+      req["Content-Type"] = "application/json"
+      body = {
+        model: model || @config.model,
+        messages: messages,
+        stream: true
+      }
+      body_options = {
+        temperature: options[:temperature] || @config.temperature,
+        top_p: options[:top_p] || @config.top_p,
+        num_ctx: options[:num_ctx] || @config.num_ctx
+      }
+      body[:options] = body_options
+      body[:format] = format if format
+      body[:tools] = tools if tools
+      req.body = body.to_json
+      final_obj = nil
+      aggregated = {
+        "message" => {
+          "role" => "assistant",
+          "content" => +""
+        }
+      }
+      buffer = +""
+      Net::HTTP.start(
+        @chat_uri.hostname,
+        @chat_uri.port,
+        read_timeout: @config.timeout,
+        open_timeout: @config.timeout
+      ) do |http|
+        http.request(req) do |res|
+          handle_http_error(res, requested_model: model || @config.model) unless res.is_a?(Net::HTTPSuccess)
+          res.read_body do |chunk|
+            buffer << chunk
+            while (newline_idx = buffer.index("\n"))
+              line = buffer.slice!(0, newline_idx + 1).strip
+              next if line.empty?
+              # Tolerate SSE framing (e.g. "data: {...}") and ignore non-data lines.
+              if line.start_with?("data:")
+                line = line.sub(/\Adata:\s*/, "").strip
+              elsif line.start_with?("event:") || line.start_with?(":")
+                next
+              end
+              next if line.empty? || line == "[DONE]"
+              obj = JSON.parse(line)
+              # Expose the raw chunk to callers (presentation only).
+              yield(obj) if block_given?
+              msg = obj["message"]
+              if msg.is_a?(Hash)
+                delta_content = msg["content"]
+                aggregated["message"]["content"] << delta_content.to_s if delta_content
+                aggregated["message"]["tool_calls"] = msg["tool_calls"] if msg["tool_calls"]
+                aggregated["message"]["role"] = msg["role"] if msg["role"]
+              end
+              # Many Ollama stream payloads include `done: true` on the last line.
+              final_obj = obj if obj["done"] == true
+            end
+          end
+        end
+      end
+      # Prefer returning the final "done: true" frame (it typically contains
+      # useful metadata like durations), but always use our aggregated message
+      # content/tool_calls since streaming payloads often send deltas.
+      if final_obj.is_a?(Hash)
+        combined = final_obj.dup
+        combined_message =
+          if combined["message"].is_a?(Hash)
+            combined["message"].dup
+          else
+            {}
+          end
+        agg_message = aggregated["message"] || {}
+        agg_content = agg_message["content"].to_s
+        combined_message["content"] = agg_content unless agg_content.empty?
+        combined_message["tool_calls"] = agg_message["tool_calls"] if agg_message.key?("tool_calls")
+        combined_message["role"] ||= agg_message["role"] if agg_message["role"]
+        combined["message"] = combined_message unless combined_message.empty?
+        return combined
+      end
+      aggregated
+    rescue JSON::ParserError => e
+      raise InvalidJSONError, "Failed to parse streaming response: #{e.message}"
+    rescue Net::ReadTimeout, Net::OpenTimeout
+      raise TimeoutError, "Request timed out after #{@config.timeout}s"
+    rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
+      raise Error, "Connection failed: #{e.message}"
+    end
+    # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity, Metrics/BlockLength
+  end
+  # rubocop:enable Metrics/ClassLength
+end