RubyGems - durable-llm - Versions diffs - 0.1.4 → 0.1.5 - Mend

durable-llm 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/.envrc +7 -0
data/CHANGELOG.md +5 -0
data/Gemfile +7 -9
data/Gemfile.lock +3 -3
data/README.md +1 -0
data/Rakefile +6 -6
data/devenv.lock +103 -0
data/devenv.nix +9 -0
data/devenv.yaml +15 -0
data/durable-llm.gemspec +44 -0
data/examples/openai_quick_complete.rb +3 -1
data/lib/durable/llm/cli.rb +215 -22
data/lib/durable/llm/client.rb +85 -6
data/lib/durable/llm/configuration.rb +163 -10
data/lib/durable/llm/errors.rb +185 -0
data/lib/durable/llm/providers/anthropic.rb +232 -24
data/lib/durable/llm/providers/azure_openai.rb +347 -0
data/lib/durable/llm/providers/base.rb +83 -1
data/lib/durable/llm/providers/cohere.rb +138 -11
data/lib/durable/llm/providers/deepseek.rb +233 -0
data/lib/durable/llm/providers/fireworks.rb +278 -0
data/lib/durable/llm/providers/google.rb +301 -0
data/lib/durable/llm/providers/groq.rb +107 -25
data/lib/durable/llm/providers/huggingface.rb +120 -17
data/lib/durable/llm/providers/mistral.rb +431 -0
data/lib/durable/llm/providers/openai.rb +150 -4
data/lib/durable/llm/providers/opencode.rb +253 -0
data/lib/durable/llm/providers/openrouter.rb +256 -0
data/lib/durable/llm/providers/perplexity.rb +273 -0
data/lib/durable/llm/providers/together.rb +346 -0
data/lib/durable/llm/providers/xai.rb +355 -0
data/lib/durable/llm/providers.rb +103 -13
data/lib/durable/llm/version.rb +5 -1
data/lib/durable/llm.rb +141 -1
data/lib/durable.rb +29 -4
data/sig/durable/llm.rbs +302 -1
metadata +48 -36

data/lib/durable/llm/providers/google.rb ADDED Viewed

@@ -0,0 +1,301 @@
+# frozen_string_literal: true
+# This file implements the Google provider for accessing Google's Gemini language models through their API, providing completion capabilities with authentication handling, error management, and response normalization. It establishes HTTP connections to Google's Generative Language API endpoint, processes generateContent requests with text content, handles various API error responses, and includes comprehensive response classes to format Google's API responses into a consistent interface.
+require 'faraday'
+require 'json'
+require 'durable/llm/errors'
+require 'durable/llm/providers/base'
+require 'event_stream_parser'
+module Durable
+  module Llm
+    module Providers
+      class Google < Durable::Llm::Providers::Base
+        BASE_URL = 'https://generativelanguage.googleapis.com'
+        def default_api_key
+          begin
+            Durable::Llm.configuration.google&.api_key
+          rescue NoMethodError
+            nil
+          end || ENV['GOOGLE_API_KEY']
+        end
+        attr_accessor :api_key
+        def initialize(api_key: nil)
+          @api_key = api_key || default_api_key
+          @conn = Faraday.new(url: BASE_URL) do |faraday|
+            faraday.request :json
+            faraday.response :json
+            faraday.adapter Faraday.default_adapter
+          end
+        end
+        def completion(options)
+          model = options[:model]
+          url = "/v1beta/models/#{model}:generateContent?key=#{@api_key}"
+          # Transform options to Google's format
+          request_body = transform_options(options)
+          response = @conn.post(url) do |req|
+            req.body = request_body
+          end
+          handle_response(response)
+        end
+        def embedding(model:, input:, **_options)
+          url = "/v1beta/models/#{model}:embedContent?key=#{@api_key}"
+          request_body = {
+            content: {
+              parts: [{ text: input }]
+            }
+          }
+          response = @conn.post(url) do |req|
+            req.body = request_body
+          end
+          handle_response(response, GoogleEmbeddingResponse)
+        end
+        def models
+          # Google doesn't provide a public models API, so return hardcoded list
+          [
+            'gemini-1.5-flash',
+            'gemini-1.5-flash-001',
+            'gemini-1.5-flash-002',
+            'gemini-1.5-flash-8b',
+            'gemini-1.5-flash-8b-001',
+            'gemini-1.5-flash-8b-latest',
+            'gemini-1.5-flash-latest',
+            'gemini-1.5-pro',
+            'gemini-1.5-pro-001',
+            'gemini-1.5-pro-002',
+            'gemini-1.5-pro-latest',
+            'gemini-2.0-flash',
+            'gemini-2.0-flash-001',
+            'gemini-2.0-flash-exp',
+            'gemini-2.0-flash-lite',
+            'gemini-2.0-flash-lite-001',
+            'gemini-2.0-flash-live-001',
+            'gemini-2.0-flash-preview-image-generation',
+            'gemini-2.5-flash',
+            'gemini-2.5-flash-exp-native-audio-thinking-dialog',
+            'gemini-2.5-flash-lite',
+            'gemini-2.5-flash-lite-06-17',
+            'gemini-2.5-flash-preview-05-20',
+            'gemini-2.5-flash-preview-native-audio-dialog',
+            'gemini-2.5-flash-preview-tts',
+            'gemini-2.5-pro',
+            'gemini-2.5-pro-preview-tts',
+            'gemini-live-2.5-flash-preview',
+            'text-embedding-004',
+            'text-multilingual-embedding-002'
+          ]
+        end
+        def self.stream?
+          true
+        end
+        def stream(options)
+          model = options[:model]
+          url = "/v1beta/models/#{model}:streamGenerateContent?key=#{@api_key}&alt=sse"
+          request_body = transform_options(options)
+          response = @conn.post(url) do |req|
+            req.headers['Accept'] = 'text/event-stream'
+            req.body = request_body
+            user_proc = proc do |chunk, _size, _total|
+              yield GoogleStreamResponse.new(chunk)
+            end
+            req.options.on_data = to_json_stream(user_proc: user_proc)
+          end
+          handle_response(response)
+        end
+        private
+        def transform_options(options)
+          messages = options[:messages] || []
+          system_messages = messages.select { |m| m[:role] == 'system' }
+          conversation_messages = messages.reject { |m| m[:role] == 'system' }
+          body = {
+            contents: conversation_messages.map do |msg|
+              {
+                role: msg[:role] == 'assistant' ? 'model' : 'user',
+                parts: [{ text: msg[:content] }]
+              }
+            end
+          }
+          if system_messages.any?
+            body[:systemInstruction] = {
+              parts: [{ text: system_messages.map { |m| m[:content] }.join("\n") }]
+            }
+          end
+          generation_config = {}
+          generation_config[:temperature] = options[:temperature] if options[:temperature]
+          generation_config[:maxOutputTokens] = options[:max_tokens] if options[:max_tokens]
+          generation_config[:topP] = options[:top_p] if options[:top_p]
+          generation_config[:topK] = options[:top_k] if options[:top_k]
+          body[:generationConfig] = generation_config unless generation_config.empty?
+          body
+        end
+        # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
+        # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
+        def to_json_stream(user_proc:)
+          parser = EventStreamParser::Parser.new
+          proc do |chunk, _bytes, env|
+            if env && env.status != 200
+              raise_error = Faraday::Response::RaiseError.new
+              raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
+            end
+            parser.feed(chunk) do |_type, data|
+              user_proc.call(JSON.parse(data)) unless data == '[DONE]'
+            end
+          end
+        end
+        def try_parse_json(maybe_json)
+          JSON.parse(maybe_json)
+        rescue JSON::ParserError
+          maybe_json
+        end
+        def handle_response(response, response_class = GoogleResponse)
+          case response.status
+          when 200..299
+            response_class.new(response.body)
+          when 401
+            raise Durable::Llm::AuthenticationError, parse_error_message(response)
+          when 429
+            raise Durable::Llm::RateLimitError, parse_error_message(response)
+          when 400..499
+            raise Durable::Llm::InvalidRequestError, parse_error_message(response)
+          when 500..599
+            raise Durable::Llm::ServerError, parse_error_message(response)
+          else
+            raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
+          end
+        end
+        def parse_error_message(response)
+          body = begin
+            JSON.parse(response.body)
+          rescue StandardError
+            nil
+          end
+          message = body&.dig('error', 'message') || response.body
+          "#{response.status} Error: #{message}"
+        end
+        class GoogleResponse
+          attr_reader :raw_response
+          def initialize(response)
+            @raw_response = response
+          end
+          def choices
+            [GoogleChoice.new(@raw_response['candidates']&.first)]
+          end
+          def to_s
+            choices.map(&:to_s).join(' ')
+          end
+        end
+        class GoogleChoice
+          attr_reader :message
+          def initialize(candidate)
+            @message = GoogleMessage.new(candidate&.dig('content', 'parts')&.first)
+          end
+          def to_s
+            @message.to_s
+          end
+        end
+        class GoogleMessage
+          attr_reader :content
+          def initialize(part)
+            @content = part&.dig('text') || ''
+          end
+          def to_s
+            @content
+          end
+        end
+        class GoogleStreamResponse
+          attr_reader :choices
+          def initialize(parsed)
+            @choices = [GoogleStreamChoice.new(parsed)]
+          end
+          def to_s
+            @choices.map(&:to_s).join
+          end
+        end
+        class GoogleStreamChoice
+          attr_reader :delta
+          def initialize(parsed)
+            @delta = GoogleStreamDelta.new(parsed.dig('candidates', 0, 'content', 'parts', 0))
+          end
+          def to_s
+            @delta.to_s
+          end
+        end
+        class GoogleStreamDelta
+          attr_reader :content
+          def initialize(part)
+            @content = part&.dig('text') || ''
+          end
+          def to_s
+            @content
+          end
+        end
+        class GoogleEmbeddingResponse
+          attr_reader :embedding
+          def initialize(data)
+            @embedding = data.dig('embedding', 'values')
+          end
+          def to_a
+            @embedding
+          end
+        end
+      end
+    end
+  end
+end
+# Copyright (c) 2025 Durable Programming, LLC. All rights reserved.

data/lib/durable/llm/providers/groq.rb CHANGED Viewed

@@ -1,7 +1,12 @@
+# frozen_string_literal: true
+# Groq provider for OpenAI-compatible API access.
 require 'faraday'
 require 'json'
 require 'durable/llm/errors'
 require 'durable/llm/providers/base'
+require 'event_stream_parser'
 module Durable
   module Llm
@@ -15,21 +20,16 @@ module Durable
         attr_accessor :api_key
-        def self.conn
-          Faraday.new(url: BASE_URL) do |faraday|
+        def initialize(api_key: nil)
+          super
+          @conn = Faraday.new(url: BASE_URL) do |faraday|
             faraday.request :json
             faraday.response :json
             faraday.adapter Faraday.default_adapter
           end
         end
-        def conn
-          self.class.conn
-        end
-        def initialize(api_key: nil)
-          @api_key = api_key || default_api_key
-        end
+        attr_reader :conn
         def completion(options)
           response = conn.post('chat/completions') do |req|
@@ -46,7 +46,7 @@ module Durable
             req.body = { model: model, input: input, **options }
           end
-          handle_response(response)
+          handle_response(response, GroqEmbeddingResponse)
         end
         def models
@@ -60,23 +60,85 @@ module Durable
         end
         def self.stream?
-          false
+          true
+        end
+        def stream(options)
+          options[:stream] = true
+          response = conn.post('chat/completions') do |req|
+            req.headers['Authorization'] = "Bearer #{@api_key}"
+            req.headers['Accept'] = 'text/event-stream'
+            options['temperature'] = options['temperature'].to_f if options['temperature']
+            req.body = options
+            user_proc = proc do |chunk, _size, _total|
+              yield GroqStreamResponse.new(chunk)
+            end
+            req.options.on_data = to_json_stream(user_proc: user_proc)
+          end
+          handle_response(response)
         end
         private
-        def handle_response(response)
+        # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
+        # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
+        # Given a proc, returns an outer proc that can be used to iterate over a JSON stream of chunks.
+        # For each chunk, the inner user_proc is called giving it the JSON object. The JSON object could
+        # be a data object or an error object as described in the OpenAI API documentation.
+        #
+        # @param user_proc [Proc] The inner proc to call for each JSON object in the chunk.
+        # @return [Proc] An outer proc that iterates over a raw stream, converting it to JSON.
+        def to_json_stream(user_proc:)
+          parser = EventStreamParser::Parser.new
+          proc do |chunk, _bytes, env|
+            if env && env.status != 200
+              raise_error = Faraday::Response::RaiseError.new
+              raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
+            end
+            parser.feed(chunk) do |_type, data|
+              user_proc.call(JSON.parse(data)) unless data == '[DONE]'
+            end
+          end
+        end
+        def try_parse_json(maybe_json)
+          JSON.parse(maybe_json)
+        rescue JSON::ParserError
+          maybe_json
+        end
+        def parse_error_message(response)
+          body = begin
+            JSON.parse(response.body)
+          rescue StandardError
+            nil
+          end
+          message = body&.dig('error', 'message') || response.body
+          "#{response.status} Error: #{message}"
+        end
+        # END-CODE-FROM
+        def handle_response(response, response_class = GroqResponse)
           case response.status
           when 200..299
-            GroqResponse.new(response.body)
+            response_class.new(response.body)
           when 401
-            raise Durable::Llm::AuthenticationError, response.body['error']['message']
+            raise Durable::Llm::AuthenticationError, parse_error_message(response)
           when 429
-            raise Durable::Llm::RateLimitError, response.body['error']['message']
+            raise Durable::Llm::RateLimitError, parse_error_message(response)
           when 400..499
-            raise Durable::Llm::InvalidRequestError, response.body['error']['message']
+            raise Durable::Llm::InvalidRequestError, parse_error_message(response)
           when 500..599
-            raise Durable::Llm::ServerError, response.body['error']['message']
+            raise Durable::Llm::ServerError, parse_error_message(response)
           else
             raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
           end
@@ -93,6 +155,14 @@ module Durable
             @raw_response['choices'].map { |choice| GroqChoice.new(choice) }
           end
+          def data
+            @raw_response['data']
+          end
+          def embedding
+            @raw_response.dig('data', 0, 'embedding')
+          end
           def to_s
             choices.map(&:to_s).join(' ')
           end
@@ -131,15 +201,12 @@ module Durable
         class GroqStreamResponse
           attr_reader :choices
-          def initialize(fragment)
-            json_frag = fragment.split('data: ').last.strip
-            puts json_frag
-            parsed = JSON.parse(json_frag)
-            @choices = parsed['choices'].map { |choice| GroqStreamChoice.new(choice) }
+          def initialize(parsed)
+            @choices = GroqStreamChoice.new(parsed['choices'])
           end
           def to_s
-            @choices.map(&:to_s).join(' ')
+            @choices.to_s
           end
         end
@@ -147,8 +214,9 @@ module Durable
           attr_reader :delta, :finish_reason
           def initialize(choice)
-            @delta = GroqStreamDelta.new(choice['delta'])
-            @finish_reason = choice['finish_reason']
+            @choice = [choice].flatten.first
+            @delta = GroqStreamDelta.new(@choice['delta'])
+            @finish_reason = @choice['finish_reason']
           end
           def to_s
@@ -168,7 +236,21 @@ module Durable
             @content || ''
           end
         end
+        class GroqEmbeddingResponse
+          attr_reader :embedding
+          def initialize(data)
+            @embedding = data.dig('data', 0, 'embedding')
+          end
+          def to_a
+            @embedding
+          end
+        end
       end
     end
   end
 end
+# Copyright (c) 2025 Durable Programming, LLC. All rights reserved.

data/lib/durable/llm/providers/huggingface.rb CHANGED Viewed

@@ -1,13 +1,22 @@
+# frozen_string_literal: true
+# This file implements the Hugging Face provider for accessing Hugging Face's inference API models.
 require 'faraday'
 require 'json'
 require 'durable/llm/errors'
 require 'durable/llm/providers/base'
+require 'event_stream_parser'
 module Durable
   module Llm
     module Providers
+      # Hugging Face provider for accessing Hugging Face's inference API models.
+      #
+      # Provides completion, embedding, and streaming capabilities with authentication
+      # handling, error management, and response normalization.
       class Huggingface < Durable::Llm::Providers::Base
-        BASE_URL = 'https://api-inference.huggingface.co/models'
+        BASE_URL = 'https://api-inference.huggingface.co'
         def default_api_key
           Durable::Llm.configuration.huggingface&.api_key || ENV['HUGGINGFACE_API_KEY']
@@ -22,11 +31,12 @@ module Durable
             faraday.response :json
             faraday.adapter Faraday.default_adapter
           end
+          super()
         end
         def completion(options)
           model = options.delete(:model) || 'gpt2'
-          response = @conn.post("/#{model}") do |req|
+          response = @conn.post("models/#{model}") do |req|
             req.headers['Authorization'] = "Bearer #{@api_key}"
             req.body = options
           end
@@ -34,33 +44,84 @@ module Durable
           handle_response(response)
         end
+        def embedding(model:, input:, **options)
+          response = @conn.post("models/#{model}") do |req|
+            req.headers['Authorization'] = "Bearer #{@api_key}"
+            req.body = { inputs: input, **options }
+          end
+          handle_response(response, HuggingfaceEmbeddingResponse)
+        end
         def models
           self.class.models
         end
+        def self.stream?
+          true
+        end
+        def stream(options)
+          model = options.delete(:model) || 'gpt2'
+          options[:stream] = true
+          @conn.post("models/#{model}") do |req|
+            req.headers['Authorization'] = "Bearer #{@api_key}"
+            req.headers['Accept'] = 'text/event-stream'
+            req.body = options
+            req.options.on_data = to_json_stream(user_proc: proc { |chunk|
+              yield HuggingfaceStreamResponse.new(chunk)
+            })
+          end
+        end
         def self.models
           %w[gpt2 bert-base-uncased distilbert-base-uncased] # could use expansion
         end
         private
-        def handle_response(response)
-          case response.status
-          when 200..299
-            HuggingfaceResponse.new(response.body)
-          when 401
-            raise Durable::Llm::AuthenticationError, response.body['error']
-          when 429
-            raise Durable::Llm::RateLimitError, response.body['error']
-          when 400..499
-            raise Durable::Llm::InvalidRequestError, response.body['error']
-          when 500..599
-            raise Durable::Llm::ServerError, response.body['error']
-          else
-            raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
+        # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
+        # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
+        def to_json_stream(user_proc:)
+          parser = EventStreamParser::Parser.new
+          proc do |chunk, _bytes, env|
+            if env && env.status != 200
+              raise_error = Faraday::Response::RaiseError.new
+              raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
+            end
+            parser.feed(chunk) do |_type, data|
+              user_proc.call(JSON.parse(data)) unless data == '[DONE]'
+            end
+          end
+        end
+        def try_parse_json(maybe_json)
+          JSON.parse(maybe_json)
+        rescue JSON::ParserError
+          maybe_json
+        end
+        def handle_response(response, response_class = HuggingfaceResponse)
+          return response_class.new(response.body) if (200..299).cover?(response.status)
+          error_class = error_class_for_status(response.status)
+          raise error_class, response.body['error'] || "HTTP #{response.status}"
+        end
+        def error_class_for_status(status)
+          case status
+          when 401 then Durable::Llm::AuthenticationError
+          when 429 then Durable::Llm::RateLimitError
+          when 400..499 then Durable::Llm::InvalidRequestError
+          when 500..599 then Durable::Llm::ServerError
+          else Durable::Llm::APIError
           end
         end
+        # Response wrapper for Hugging Face completion API responses.
         class HuggingfaceResponse
           attr_reader :raw_response
@@ -69,7 +130,7 @@ module Durable
           end
           def choices
-            [@raw_response.first].map { |choice| HuggingfaceChoice.new(choice) }
+            [HuggingfaceChoice.new(@raw_response)]
           end
           def to_s
@@ -77,6 +138,7 @@ module Durable
           end
         end
+        # Individual choice from Hugging Face completion response.
         class HuggingfaceChoice
           attr_reader :text
@@ -88,7 +150,48 @@ module Durable
             @text
           end
         end
+        # Response wrapper for Hugging Face embedding API responses.
+        class HuggingfaceEmbeddingResponse
+          attr_reader :embedding
+          def initialize(data)
+            @embedding = data
+          end
+          def to_a
+            @embedding
+          end
+        end
+        # Response wrapper for Hugging Face streaming API responses.
+        class HuggingfaceStreamResponse
+          attr_reader :token
+          def initialize(parsed)
+            @token = HuggingfaceStreamToken.new(parsed)
+          end
+          def to_s
+            @token.to_s
+          end
+        end
+        # Individual token from Hugging Face streaming response.
+        class HuggingfaceStreamToken
+          attr_reader :text
+          def initialize(token)
+            @text = token['token']['text']
+          end
+          def to_s
+            @text || ''
+          end
+        end
       end
     end
   end
 end
+# Copyright (c) 2025 Durable Programming, LLC. All rights reserved.