RubyGems - durable-llm - Versions diffs - 0.1.4 → 0.1.6 - Mend

durable-llm 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +4 -4
data/.envrc +7 -0
data/CHANGELOG.md +5 -0
data/CLI.md +0 -2
data/Gemfile +7 -9
data/README.md +564 -30
data/Rakefile +16 -6
data/devenv.lock +171 -0
data/devenv.nix +12 -0
data/devenv.yaml +8 -0
data/durable-llm.gemspec +52 -0
data/examples/openai_quick_complete.rb +4 -2
data/lib/durable/llm/cli.rb +218 -22
data/lib/durable/llm/client.rb +228 -8
data/lib/durable/llm/configuration.rb +163 -10
data/lib/durable/llm/convenience.rb +102 -0
data/lib/durable/llm/errors.rb +185 -0
data/lib/durable/llm/provider_utilities.rb +201 -0
data/lib/durable/llm/providers/anthropic.rb +232 -24
data/lib/durable/llm/providers/azure_openai.rb +347 -0
data/lib/durable/llm/providers/base.rb +220 -11
data/lib/durable/llm/providers/cohere.rb +157 -11
data/lib/durable/llm/providers/deepseek.rb +233 -0
data/lib/durable/llm/providers/fireworks.rb +304 -0
data/lib/durable/llm/providers/google.rb +327 -0
data/lib/durable/llm/providers/groq.rb +133 -25
data/lib/durable/llm/providers/huggingface.rb +120 -17
data/lib/durable/llm/providers/mistral.rb +431 -0
data/lib/durable/llm/providers/openai.rb +150 -4
data/lib/durable/llm/providers/opencode.rb +253 -0
data/lib/durable/llm/providers/openrouter.rb +256 -0
data/lib/durable/llm/providers/perplexity.rb +273 -0
data/lib/durable/llm/providers/together.rb +346 -0
data/lib/durable/llm/providers/xai.rb +355 -0
data/lib/durable/llm/providers.rb +113 -13
data/lib/durable/llm/response_helpers.rb +185 -0
data/lib/durable/llm/version.rb +5 -1
data/lib/durable/llm.rb +214 -1
data/lib/durable.rb +29 -4
data/sig/durable/llm.rbs +303 -1
metadata +106 -28
data/Gemfile.lock +0 -103

data/lib/durable/llm/providers/fireworks.rb ADDED Viewed

@@ -0,0 +1,304 @@
+# frozen_string_literal: true
+# This file implements the Fireworks AI provider for accessing Fireworks AI's language models through their API, providing completion, embedding, and streaming capabilities with authentication handling, error management, and response normalization. It establishes HTTP connections to Fireworks AI's API endpoint, processes chat completions and embeddings, handles various API error responses, and includes comprehensive response classes to format Fireworks AI's API responses into a consistent interface.
+require 'faraday'
+require 'json'
+require 'event_stream_parser'
+require 'durable/llm/errors'
+require 'durable/llm/providers/base'
+module Durable
+  module Llm
+    module Providers
+      # Fireworks AI provider for accessing Fireworks AI's language models.
+      #
+      # Provides completion, embedding, and streaming capabilities with proper
+      # error handling and response normalization.
+      class Fireworks < Durable::Llm::Providers::Base
+        BASE_URL = 'https://api.fireworks.ai/inference/v1'
+        def default_api_key
+          Durable::Llm.configuration.fireworks&.api_key || ENV['FIREWORKS_API_KEY']
+        end
+        attr_accessor :api_key
+        # Initializes a new Fireworks provider instance.
+        #
+        # @param api_key [String, nil] The API key for Fireworks AI. If not provided, uses the default from configuration or environment.
+        # @return [Fireworks] A new instance of the Fireworks provider.
+        def initialize(api_key: nil)
+          super()
+          @api_key = api_key || default_api_key
+          @conn = Faraday.new(url: BASE_URL) do |faraday|
+            faraday.request :json
+            faraday.response :json
+            faraday.adapter Faraday.default_adapter
+          end
+        end
+        # Performs a chat completion request to Fireworks AI.
+        #
+        # @param options [Hash] The completion options including model, messages, temperature, etc.
+        # @return [FireworksResponse] The response object containing the completion results.
+        # @raise [Durable::Llm::AuthenticationError] If authentication fails.
+        # @raise [Durable::Llm::RateLimitError] If rate limit is exceeded.
+        # @raise [Durable::Llm::InvalidRequestError] If the request is invalid.
+        # @raise [Durable::Llm::ServerError] If there's a server error.
+        def completion(options)
+          response = @conn.post('chat/completions') do |req|
+            req.headers['Authorization'] = "Bearer #{@api_key}"
+            req.body = options
+          end
+          handle_response(response)
+        end
+        # Generates embeddings for the given input using Fireworks AI.
+        #
+        # @param model [String] The model to use for generating embeddings.
+        # @param input [String, Array<String>] The text input(s) to embed.
+        # @param options [Hash] Additional options for the embedding request.
+        # @return [FireworksEmbeddingResponse] The response object containing the embeddings.
+        # @raise [Durable::Llm::AuthenticationError] If authentication fails.
+        # @raise [Durable::Llm::RateLimitError] If rate limit is exceeded.
+        # @raise [Durable::Llm::InvalidRequestError] If the request is invalid.
+        # @raise [Durable::Llm::ServerError] If there's a server error.
+        def embedding(model:, input:, **options)
+          response = @conn.post('embeddings') do |req|
+            req.headers['Authorization'] = "Bearer #{@api_key}"
+            req.body = { model: model, input: input, **options }
+          end
+          handle_response(response, FireworksEmbeddingResponse)
+        end
+        # Retrieves the list of available models from Fireworks AI.
+        #
+        # @return [Array<String>] An array of model IDs available for use.
+        # @raise [Durable::Llm::AuthenticationError] If authentication fails.
+        # @raise [Durable::Llm::RateLimitError] If rate limit is exceeded.
+        # @raise [Durable::Llm::InvalidRequestError] If the request is invalid.
+        # @raise [Durable::Llm::ServerError] If there's a server error.
+        def models
+          response = @conn.get('models') do |req|
+            req.headers['Authorization'] = "Bearer #{@api_key}"
+          end
+          handle_response(response).data.map { |model| model['id'] }
+        end
+        def self.stream?
+          true
+        end
+        # Performs a streaming chat completion request to Fireworks AI.
+        #
+        # @param options [Hash] The completion options including model, messages, temperature, etc.
+        # @yield [FireworksStreamResponse] Yields each chunk of the streaming response.
+        # @return [nil] Returns nil after streaming is complete.
+        # @raise [Durable::Llm::AuthenticationError] If authentication fails.
+        # @raise [Durable::Llm::RateLimitError] If rate limit is exceeded.
+        # @raise [Durable::Llm::InvalidRequestError] If the request is invalid.
+        # @raise [Durable::Llm::ServerError] If there's a server error.
+        def stream(options)
+          options[:stream] = true
+          @conn.post('chat/completions') do |req|
+            req.headers['Authorization'] = "Bearer #{@api_key}"
+            req.headers['Accept'] = 'text/event-stream'
+            options['temperature'] = options['temperature'].to_f if options['temperature']
+            req.body = options
+            user_proc = proc do |chunk, _size, _total|
+              yield FireworksStreamResponse.new(chunk)
+            end
+            req.options.on_data = to_json_stream(user_proc: user_proc)
+          end
+          # For streaming, errors are handled in to_json_stream, no need for handle_response
+          nil
+        end
+        private
+        # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
+        # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
+        def to_json_stream(user_proc:)
+          parser = EventStreamParser::Parser.new
+          proc do |chunk, _bytes, env|
+            if env && env.status != 200
+              raise_error = Faraday::Response::RaiseError.new
+              raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
+            end
+            parser.feed(chunk) do |_type, data|
+              user_proc.call(JSON.parse(data)) unless data == '[DONE]'
+            end
+          end
+        end
+        def try_parse_json(maybe_json)
+          JSON.parse(maybe_json)
+        rescue JSON::ParserError
+          maybe_json
+        end
+        # END-CODE-FROM
+        def handle_response(response, response_class = FireworksResponse)
+          case response.status
+          when 200..299
+            response_class.new(response.body)
+          when 401
+            raise Durable::Llm::AuthenticationError, parse_error_message(response)
+          when 429
+            raise Durable::Llm::RateLimitError, parse_error_message(response)
+          when 400..499
+            raise Durable::Llm::InvalidRequestError, parse_error_message(response)
+          when 500..599
+            raise Durable::Llm::ServerError, parse_error_message(response)
+          else
+            raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
+          end
+        end
+        def parse_error_message(response)
+          body = begin
+            JSON.parse(response.body)
+          rescue StandardError
+            nil
+          end
+          message = body&.dig('error', 'message') || response.body
+          "#{response.status} Error: #{message}"
+        end
+        # Response object for Fireworks chat API responses.
+        #
+        # Wraps the raw response and provides a consistent interface for accessing
+        # message content and metadata.
+        class FireworksResponse
+          attr_reader :raw_response
+          def initialize(response)
+            @raw_response = response
+          end
+          def choices
+            @raw_response['choices'].map { |choice| FireworksChoice.new(choice) }
+          end
+          def data
+            @raw_response['data']
+          end
+          def to_s
+            choices.map(&:to_s).join(' ')
+          end
+        end
+        # Represents a single choice in a Fireworks response.
+        #
+        # Contains the message and finish reason for the choice.
+        class FireworksChoice
+          attr_reader :message, :finish_reason
+          def initialize(choice)
+            @message = FireworksMessage.new(choice['message'])
+            @finish_reason = choice['finish_reason']
+          end
+          def to_s
+            @message.to_s
+          end
+        end
+        # Represents a message in a Fireworks conversation.
+        #
+        # Messages have a role (user, assistant) and text content.
+        class FireworksMessage
+          attr_reader :role, :content
+          def initialize(message)
+            @role = message['role']
+            @content = message['content']
+          end
+          def to_s
+            @content
+          end
+        end
+        # Response object for streaming Fireworks chat chunks.
+        #
+        # Wraps individual chunks from the Server-Sent Events stream.
+        class FireworksStreamResponse
+          attr_reader :choices
+          def initialize(parsed)
+            @choices = FireworksStreamChoice.new(parsed['choices'])
+          end
+          def to_s
+            @choices.to_s
+          end
+        end
+        # Response object for Fireworks embedding API responses.
+        #
+        # Wraps embedding data and provides array access to the vector representation.
+        class FireworksEmbeddingResponse
+          attr_reader :embedding
+          def initialize(data)
+            @embedding = data.dig('data', 0, 'embedding')
+          end
+          def to_a
+            @embedding
+          end
+        end
+        # Represents a single choice in a streaming Fireworks response chunk.
+        #
+        # Contains the delta (incremental content) and finish reason for the choice.
+        class FireworksStreamChoice
+          attr_reader :delta, :finish_reason
+          def initialize(choice)
+            @choice = [choice].flatten.first
+            @delta = FireworksStreamDelta.new(@choice['delta'])
+            @finish_reason = @choice['finish_reason']
+          end
+          def to_s
+            @delta.to_s
+          end
+        end
+        # Represents the incremental content delta in a streaming response.
+        #
+        # Contains the role and text content of the delta.
+        class FireworksStreamDelta
+          attr_reader :role, :content
+          def initialize(delta)
+            @role = delta['role']
+            @content = delta['content']
+          end
+          def to_s
+            @content || ''
+          end
+        end
+      end
+    end
+  end
+end
+# Copyright (c) 2025 Durable Programming, LLC. All rights reserved.

data/lib/durable/llm/providers/google.rb ADDED Viewed

@@ -0,0 +1,327 @@
+# frozen_string_literal: true
+# This file implements the Google provider for accessing Google's Gemini language models through their API, providing completion capabilities with authentication handling, error management, and response normalization. It establishes HTTP connections to Google's Generative Language API endpoint, processes generateContent requests with text content, handles various API error responses, and includes comprehensive response classes to format Google's API responses into a consistent interface.
+require 'faraday'
+require 'json'
+require 'durable/llm/errors'
+require 'durable/llm/providers/base'
+require 'event_stream_parser'
+module Durable
+  module Llm
+    module Providers
+      # Google Generative AI provider for accessing Gemini language models.
+      #
+      # Provides completion, embedding, and streaming capabilities with proper
+      # error handling and response normalization for Google's Generative Language API.
+      class Google < Durable::Llm::Providers::Base
+        BASE_URL = 'https://generativelanguage.googleapis.com'
+        def default_api_key
+          begin
+            Durable::Llm.configuration.google&.api_key
+          rescue NoMethodError
+            nil
+          end || ENV['GOOGLE_API_KEY']
+        end
+        attr_accessor :api_key
+        def initialize(api_key: nil)
+          @api_key = api_key || default_api_key
+          @conn = Faraday.new(url: BASE_URL) do |faraday|
+            faraday.request :json
+            faraday.response :json
+            faraday.adapter Faraday.default_adapter
+          end
+        end
+        def completion(options)
+          model = options[:model]
+          url = "/v1beta/models/#{model}:generateContent?key=#{@api_key}"
+          # Transform options to Google's format
+          request_body = transform_options(options)
+          response = @conn.post(url) do |req|
+            req.body = request_body
+          end
+          handle_response(response)
+        end
+        def embedding(model:, input:, **_options)
+          url = "/v1beta/models/#{model}:embedContent?key=#{@api_key}"
+          request_body = {
+            content: {
+              parts: [{ text: input }]
+            }
+          }
+          response = @conn.post(url) do |req|
+            req.body = request_body
+          end
+          handle_response(response, GoogleEmbeddingResponse)
+        end
+        def models
+          # Google doesn't provide a public models API, so return hardcoded list
+          [
+            'gemini-1.5-flash',
+            'gemini-1.5-flash-001',
+            'gemini-1.5-flash-002',
+            'gemini-1.5-flash-8b',
+            'gemini-1.5-flash-8b-001',
+            'gemini-1.5-flash-8b-latest',
+            'gemini-1.5-flash-latest',
+            'gemini-1.5-pro',
+            'gemini-1.5-pro-001',
+            'gemini-1.5-pro-002',
+            'gemini-1.5-pro-latest',
+            'gemini-2.0-flash',
+            'gemini-2.0-flash-001',
+            'gemini-2.0-flash-exp',
+            'gemini-2.0-flash-lite',
+            'gemini-2.0-flash-lite-001',
+            'gemini-2.0-flash-live-001',
+            'gemini-2.0-flash-preview-image-generation',
+            'gemini-2.5-flash',
+            'gemini-2.5-flash-exp-native-audio-thinking-dialog',
+            'gemini-2.5-flash-lite',
+            'gemini-2.5-flash-lite-06-17',
+            'gemini-2.5-flash-preview-05-20',
+            'gemini-2.5-flash-preview-native-audio-dialog',
+            'gemini-2.5-flash-preview-tts',
+            'gemini-2.5-pro',
+            'gemini-2.5-pro-preview-tts',
+            'gemini-live-2.5-flash-preview',
+            'text-embedding-004',
+            'text-multilingual-embedding-002'
+          ]
+        end
+        def self.stream?
+          true
+        end
+        def stream(options)
+          model = options[:model]
+          url = "/v1beta/models/#{model}:streamGenerateContent?key=#{@api_key}&alt=sse"
+          request_body = transform_options(options)
+          response = @conn.post(url) do |req|
+            req.headers['Accept'] = 'text/event-stream'
+            req.body = request_body
+            user_proc = proc do |chunk, _size, _total|
+              yield GoogleStreamResponse.new(chunk)
+            end
+            req.options.on_data = to_json_stream(user_proc: user_proc)
+          end
+          handle_response(response)
+        end
+        private
+        def transform_options(options)
+          messages = options[:messages] || []
+          system_messages = messages.select { |m| m[:role] == 'system' }
+          conversation_messages = messages.reject { |m| m[:role] == 'system' }
+          body = {
+            contents: conversation_messages.map do |msg|
+              {
+                role: msg[:role] == 'assistant' ? 'model' : 'user',
+                parts: [{ text: msg[:content] }]
+              }
+            end
+          }
+          if system_messages.any?
+            body[:systemInstruction] = {
+              parts: [{ text: system_messages.map { |m| m[:content] }.join("\n") }]
+            }
+          end
+          generation_config = {}
+          generation_config[:temperature] = options[:temperature] if options[:temperature]
+          generation_config[:maxOutputTokens] = options[:max_tokens] if options[:max_tokens]
+          generation_config[:topP] = options[:top_p] if options[:top_p]
+          generation_config[:topK] = options[:top_k] if options[:top_k]
+          body[:generationConfig] = generation_config unless generation_config.empty?
+          body
+        end
+        # CODE-FROM: ruby-openai @ https://github.com/alexrudall/ruby-openai/blob/main/lib/openai/http.rb
+        # MIT License: https://github.com/alexrudall/ruby-openai/blob/main/LICENSE.md
+        def to_json_stream(user_proc:)
+          parser = EventStreamParser::Parser.new
+          proc do |chunk, _bytes, env|
+            if env && env.status != 200
+              raise_error = Faraday::Response::RaiseError.new
+              raise_error.on_complete(env.merge(body: try_parse_json(chunk)))
+            end
+            parser.feed(chunk) do |_type, data|
+              user_proc.call(JSON.parse(data)) unless data == '[DONE]'
+            end
+          end
+        end
+        def try_parse_json(maybe_json)
+          JSON.parse(maybe_json)
+        rescue JSON::ParserError
+          maybe_json
+        end
+        def handle_response(response, response_class = GoogleResponse)
+          case response.status
+          when 200..299
+            response_class.new(response.body)
+          when 401
+            raise Durable::Llm::AuthenticationError, parse_error_message(response)
+          when 429
+            raise Durable::Llm::RateLimitError, parse_error_message(response)
+          when 400..499
+            raise Durable::Llm::InvalidRequestError, parse_error_message(response)
+          when 500..599
+            raise Durable::Llm::ServerError, parse_error_message(response)
+          else
+            raise Durable::Llm::APIError, "Unexpected response code: #{response.status}"
+          end
+        end
+        def parse_error_message(response)
+          body = begin
+            JSON.parse(response.body)
+          rescue StandardError
+            nil
+          end
+          message = body&.dig('error', 'message') || response.body
+          "#{response.status} Error: #{message}"
+        end
+        # Response object for Google Generative AI API responses.
+        #
+        # Wraps the raw response and provides a consistent interface for accessing
+        # candidate content and metadata.
+        class GoogleResponse
+          attr_reader :raw_response
+          def initialize(response)
+            @raw_response = response
+          end
+          def choices
+            [GoogleChoice.new(@raw_response['candidates']&.first)]
+          end
+          def to_s
+            choices.map(&:to_s).join(' ')
+          end
+        end
+        # Represents a single candidate choice in a Google response.
+        #
+        # Contains the message content from the candidate.
+        class GoogleChoice
+          attr_reader :message
+          def initialize(candidate)
+            @message = GoogleMessage.new(candidate&.dig('content', 'parts')&.first)
+          end
+          def to_s
+            @message.to_s
+          end
+        end
+        # Represents a message in a Google conversation.
+        #
+        # Messages contain text content extracted from parts.
+        class GoogleMessage
+          attr_reader :content
+          def initialize(part)
+            @content = part&.dig('text') || ''
+          end
+          def to_s
+            @content
+          end
+        end
+        # Response object for streaming Google Generative AI chunks.
+        #
+        # Wraps individual chunks from the streaming response.
+        class GoogleStreamResponse
+          attr_reader :choices
+          def initialize(parsed)
+            @choices = [GoogleStreamChoice.new(parsed)]
+          end
+          def to_s
+            @choices.map(&:to_s).join
+          end
+        end
+        # Represents a single choice in a streaming Google response chunk.
+        #
+        # Contains the delta (incremental content) for the choice.
+        class GoogleStreamChoice
+          attr_reader :delta
+          def initialize(parsed)
+            @delta = GoogleStreamDelta.new(parsed.dig('candidates', 0, 'content', 'parts', 0))
+          end
+          def to_s
+            @delta.to_s
+          end
+        end
+        # Represents the incremental content delta in a streaming response.
+        #
+        # Contains the text content of the delta.
+        class GoogleStreamDelta
+          attr_reader :content
+          def initialize(part)
+            @content = part&.dig('text') || ''
+          end
+          def to_s
+            @content
+          end
+        end
+        # Response object for Google embedding API responses.
+        #
+        # Wraps embedding data and provides array access to the vector representation.
+        class GoogleEmbeddingResponse
+          attr_reader :embedding
+          def initialize(data)
+            @embedding = data.dig('embedding', 'values')
+          end
+          def to_a
+            @embedding
+          end
+        end
+      end
+    end
+  end
+end
+# Copyright (c) 2025 Durable Programming, LLC. All rights reserved.