RubyGems - llm.rb - Versions diffs - 0.2.1 → 0.3.0 - Mend

llm.rb 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

checksums.yaml +4 -4
data/README.md +264 -110
data/lib/llm/buffer.rb +83 -0
data/lib/llm/chat.rb +131 -0
data/lib/llm/file.rb +26 -40
data/lib/llm/http_client.rb +10 -5
data/lib/llm/message.rb +14 -8
data/lib/llm/mime.rb +54 -0
data/lib/llm/multipart.rb +98 -0
data/lib/llm/provider.rb +96 -19
data/lib/llm/providers/anthropic/error_handler.rb +2 -0
data/lib/llm/providers/anthropic/format.rb +2 -0
data/lib/llm/providers/anthropic/response_parser.rb +3 -1
data/lib/llm/providers/anthropic.rb +14 -5
data/lib/llm/providers/gemini/audio.rb +77 -0
data/lib/llm/providers/gemini/error_handler.rb +2 -0
data/lib/llm/providers/gemini/files.rb +160 -0
data/lib/llm/providers/gemini/format.rb +12 -6
data/lib/llm/providers/gemini/images.rb +99 -0
data/lib/llm/providers/gemini/response_parser.rb +27 -1
data/lib/llm/providers/gemini.rb +62 -6
data/lib/llm/providers/ollama/error_handler.rb +2 -0
data/lib/llm/providers/ollama/format.rb +13 -5
data/lib/llm/providers/ollama/response_parser.rb +3 -1
data/lib/llm/providers/ollama.rb +30 -7
data/lib/llm/providers/openai/audio.rb +97 -0
data/lib/llm/providers/openai/error_handler.rb +2 -0
data/lib/llm/providers/openai/files.rb +148 -0
data/lib/llm/providers/openai/format.rb +21 -8
data/lib/llm/providers/openai/images.rb +109 -0
data/lib/llm/providers/openai/response_parser.rb +58 -5
data/lib/llm/providers/openai/responses.rb +78 -0
data/lib/llm/providers/openai.rb +52 -6
data/lib/llm/providers/voyageai.rb +2 -2
data/lib/llm/response/audio.rb +13 -0
data/lib/llm/response/audio_transcription.rb +14 -0
data/lib/llm/response/audio_translation.rb +14 -0
data/lib/llm/response/download_file.rb +15 -0
data/lib/llm/response/file.rb +42 -0
data/lib/llm/response/filelist.rb +18 -0
data/lib/llm/response/image.rb +29 -0
data/lib/llm/response/output.rb +56 -0
data/lib/llm/response.rb +18 -6
data/lib/llm/utils.rb +19 -0
data/lib/llm/version.rb +1 -1
data/lib/llm.rb +5 -2
data/llm.gemspec +1 -6
data/spec/anthropic/completion_spec.rb +1 -1
data/spec/gemini/completion_spec.rb +1 -1
data/spec/gemini/conversation_spec.rb +31 -0
data/spec/gemini/files_spec.rb +124 -0
data/spec/gemini/images_spec.rb +47 -0
data/spec/llm/conversation_spec.rb +101 -61
data/spec/ollama/completion_spec.rb +1 -1
data/spec/ollama/conversation_spec.rb +31 -0
data/spec/openai/audio_spec.rb +55 -0
data/spec/openai/completion_spec.rb +1 -1
data/spec/openai/files_spec.rb +150 -0
data/spec/openai/images_spec.rb +95 -0
data/spec/openai/responses_spec.rb +51 -0
data/spec/setup.rb +8 -0
metadata +31 -49
data/LICENSE.txt +0 -21
data/lib/llm/conversation.rb +0 -90
data/lib/llm/message_queue.rb +0 -54

data/lib/llm/file.rb CHANGED Viewed

@@ -1,45 +1,10 @@
 # frozen_string_literal: true
+##
+# The {LLM::File LLM::File} class represents a local file. It can
+# be used as a prompt with certain providers (eg: Ollama, Gemini),
+# and as an input with certain methods
 class LLM::File
-  ##
-  # @return [Hash]
-  #  Returns a hash of common file extensions and their
-  #  corresponding MIME types
-  def self.mime_types
-    @mime_types ||= {
-      # Images
-      ".png" => "image/png",
-      ".jpg" => "image/jpeg",
-      ".jpeg" => "image/jpeg",
-      ".webp" => "image/webp",
-      # Videos
-      ".flv" => "video/x-flv",
-      ".mov" => "video/quicktime",
-      ".mpeg" => "video/mpeg",
-      ".mpg" => "video/mpeg",
-      ".mp4" => "video/mp4",
-      ".webm" => "video/webm",
-      ".wmv" => "video/x-ms-wmv",
-      ".3gp" => "video/3gpp",
-      # Audio
-      ".aac" => "audio/aac",
-      ".flac" => "audio/flac",
-      ".mp3" => "audio/mpeg",
-      ".m4a" => "audio/mp4",
-      ".mpga" => "audio/mpeg",
-      ".opus" => "audio/opus",
-      ".pcm" => "audio/L16",
-      ".wav" => "audio/wav",
-      ".weba" => "audio/webm",
-      # Documents
-      ".pdf" => "application/pdf",
-      ".txt" => "text/plain"
-    }.freeze
-  end
   ##
   # @return [String]
   #  Returns the path to a file
@@ -53,7 +18,28 @@ class LLM::File
   # @return [String]
   #  Returns the MIME type of the file
   def mime_type
-    self.class.mime_types[File.extname(path)]
+    LLM::Mime[File.extname(path)]
+  end
+  ##
+  # @return [String]
+  #  Returns true if the file is an image
+  def image?
+    mime_type.start_with?("image/")
+  end
+  ##
+  # @return [Integer]
+  #  Returns the size of the file in bytes
+  def bytesize
+    File.size(path)
+  end
+  ##
+  # @return [String]
+  #  Returns the file contents in base64
+  def to_b64
+    [File.binread(path)].pack("m0")
   end
 end

data/lib/llm/http_client.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
 module LLM
+  ##
+  # @private
   module HTTPClient
     require "net/http"
     ##
@@ -9,6 +11,8 @@ module LLM
     #  The HTTP object to use for the request
     # @param [Net::HTTPRequest] req
     #  The request to send
+    # @param [Proc] b
+    #  A block to yield the response to (optional)
     # @return [Net::HTTPResponse]
     #  The response from the server
     # @raise [LLM::Error::Unauthorized]
@@ -19,11 +23,12 @@ module LLM
     #  When any other unsuccessful status code is returned
     # @raise [SystemCallError]
     #  When there is a network error at the operating system level
-    def request(http, req)
-      res = http.request(req)
-      res.tap(&:value)
-    rescue Net::HTTPClientException
-      error_handler.new(res).raise_error!
+    def request(http, req, &b)
+      res = http.request(req, &b)
+      case res
+      when Net::HTTPOK then res
+      else error_handler.new(res).raise_error!
+      end
     end
   end
 end

data/lib/llm/message.rb CHANGED Viewed

@@ -3,18 +3,22 @@
 module LLM
   class Message
     ##
+    # Returns the role of the message
     # @return [Symbol]
     attr_reader :role
     ##
+    # Returns the content of the message
     # @return [String]
     attr_reader :content
     ##
+    # Returns extra context associated with the message
     # @return [Hash]
     attr_reader :extra
     ##
+    # Returns a new message
     # @param [Symbol] role
     # @param [String] content
     # @param [Hash] extra
@@ -26,23 +30,17 @@ module LLM
     end
     ##
-    # @return [OpenStruct]
-    def logprobs
-      return nil unless extra.key?(:logprobs)
-      OpenStruct.from_hash(extra[:logprobs])
-    end
-    ##
+    # Returns a hash representation of the message
     # @return [Hash]
     def to_h
       {role:, content:}
     end
     ##
+    # Returns true when two objects have the same role and content
     # @param [Object] other
     #  The other object to compare
     # @return [Boolean]
-    #  Returns true when the "other" object has the same role and content
     def ==(other)
       if other.respond_to?(:to_h)
         to_h == other.to_h
@@ -51,5 +49,13 @@ module LLM
       end
     end
     alias_method :eql?, :==
+    ##
+    # Returns a string representation of the message
+    # @return [String]
+    def inspect
+      "#<#{self.class.name}:0x#{object_id.to_s(16)} " \
+      "role=#{role.inspect} content=#{content.inspect}>"
+    end
   end
 end

data/lib/llm/mime.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+##
+# @private
+class LLM::Mime
+  ##
+  # Lookup a mime type
+  # @return [String, nil]
+  def self.[](key)
+    if key.respond_to?(:path)
+      types[File.extname(key.path)]
+    else
+      types[key]
+    end
+  end
+  ##
+  # Returns a Hash of mime types
+  # @return [Hash]
+  def self.types
+    @types ||= {
+      # Images
+      ".png" => "image/png",
+      ".jpg" => "image/jpeg",
+      ".jpeg" => "image/jpeg",
+      ".webp" => "image/webp",
+      # Videos
+      ".flv" => "video/x-flv",
+      ".mov" => "video/quicktime",
+      ".mpeg" => "video/mpeg",
+      ".mpg" => "video/mpeg",
+      ".mp4" => "video/mp4",
+      ".webm" => "video/webm",
+      ".wmv" => "video/x-ms-wmv",
+      ".3gp" => "video/3gpp",
+      # Audio
+      ".aac" => "audio/aac",
+      ".flac" => "audio/flac",
+      ".mp3" => "audio/mpeg",
+      ".m4a" => "audio/mp4",
+      ".mpga" => "audio/mpeg",
+      ".opus" => "audio/opus",
+      ".pcm" => "audio/L16",
+      ".wav" => "audio/wav",
+      ".weba" => "audio/webm",
+      # Documents
+      ".pdf" => "application/pdf",
+      ".txt" => "text/plain"
+    }
+  end
+end

data/lib/llm/multipart.rb ADDED Viewed

@@ -0,0 +1,98 @@
+# encoding: ascii-8bit
+# frozen_string_literal: true
+##
+# @private
+class LLM::Multipart
+  require "llm"
+  require "securerandom"
+  ##
+  # @return [String]
+  attr_reader :boundary
+  ##
+  # @param [Hash] params
+  #  Request parameters
+  # @return [LLM::Multipart]
+  def initialize(params)
+    @boundary = "BOUNDARY__#{SecureRandom.hex(16)}"
+    @params = params
+  end
+  ##
+  # Returns the multipart content type
+  # @return [String]
+  def content_type
+    "multipart/form-data; boundary=#{@boundary}"
+  end
+  ##
+  # Returns the multipart request body parts
+  # @return [Array<String>]
+  def parts
+    params.map do |key, value|
+      locals = {key: key.to_s.b, boundary: boundary.to_s.b}
+      if value.respond_to?(:path)
+        file_part(key, value, locals)
+      else
+        data_part(key, value, locals)
+      end
+    end
+  end
+  ##
+  # Returns the multipart request body
+  # @return [String]
+  def body
+    [*parts, "--#{@boundary}--\r\n"].inject(&:<<)
+  end
+  private
+  attr_reader :params
+  def attributes(file)
+    {
+      filename: File.basename(file.path).b,
+      content_type: LLM::Mime[file].b
+    }
+  end
+  def multipart_header(type:, locals:)
+    if type == :file
+      str = "".b
+      str << "--#{locals[:boundary]}" \
+             "\r\n" \
+             "Content-Disposition: form-data; name=\"#{locals[:key]}\";" \
+             "filename=\"#{locals[:filename]}\"" \
+             "\r\n" \
+             "Content-Type: #{locals[:content_type]}" \
+             "\r\n\r\n"
+    elsif type == :data
+      str = "".b
+      str << "--#{locals[:boundary]}" \
+             "\r\n" \
+             "Content-Disposition: form-data; name=\"#{locals[:key]}\"" \
+             "\r\n\r\n"
+    else
+      raise "unknown type: #{type}"
+    end
+  end
+  def file_part(key, file, locals)
+    locals = locals.merge(attributes(file))
+    multipart_header(type: :file, locals:).tap do
+      _1 << File.binread(file.path)
+      _1 << "\r\n"
+    end
+  end
+  def data_part(key, value, locals)
+    locals = locals.merge(value:)
+    multipart_header(type: :data, locals:).tap do
+      _1 << value.to_s
+      _1 << "\r\n"
+    end
+  end
+end

data/lib/llm/provider.rb CHANGED Viewed

@@ -44,62 +44,139 @@ class LLM::Provider
   end
   ##
+  # Provides an embedding
   # @param [String, Array<String>] input
   #  The input to embed
+  # @param [String] model
+  #  The embedding model to use
+  # @param [Hash] params
+  #  Other embedding parameters
   # @raise [NotImplementedError]
   #  When the method is not implemented by a subclass
   # @return [LLM::Response::Embedding]
-  def embed(input, **params)
+  def embed(input, model:, **params)
     raise NotImplementedError
   end
   ##
-  # Completes a given prompt using the LLM
+  # Provides an interface to the chat completions API
   # @example
   #   llm = LLM.openai(ENV["KEY"])
-  #   context = [
-  #     {role: "system", content: "Answer all of my questions"},
-  #     {role: "system", content: "Your name is Pablo, you are 25 years old and you are my amigo"},
+  #   messages = [
+  #     {role: "system", content: "Your task is to answer all of my questions"},
+  #     {role: "system", content: "Your answers should be short and concise"},
   #   ]
-  #   res = llm.complete "What is your name and what age are you?", :user, messages: context
+  #   res = llm.complete("Hello. What is the answer to 5 + 2 ?", :user, messages:)
   #   print "[#{res.choices[0].role}]", res.choices[0].content, "\n"
   # @param [String] prompt
   #  The input prompt to be completed
   # @param [Symbol] role
   #  The role of the prompt (e.g. :user, :system)
-  # @param [Array<Hash, LLM::Message>] messages
-  #  The messages to include in the completion
+  # @param [String] model
+  #  The model to use for the completion
+  # @param [Hash] params
+  #  Other completion parameters
   # @raise [NotImplementedError]
   #  When the method is not implemented by a subclass
   # @return [LLM::Response::Completion]
-  def complete(prompt, role = :user, **params)
+  def complete(prompt, role = :user, model:, **params)
     raise NotImplementedError
   end
   ##
-  # Starts a new lazy conversation
+  # Starts a new lazy chat powered by the chat completions API
+  # @note
+  #  This method creates a lazy version of a
+  #  {LLM::Chat LLM::Chat} object.
+  # @param prompt (see LLM::Provider#complete)
+  # @param role (see LLM::Provider#complete)
+  # @param model (see LLM::Provider#complete)
+  # @param [Hash] params
+  #  Other completion parameters to maintain throughout a chat
+  # @raise (see LLM::Provider#complete)
+  # @return [LLM::Chat]
+  def chat(prompt, role = :user, model: nil, **params)
+    LLM::Chat.new(self, params).lazy.chat(prompt, role)
+  end
+  ##
+  # Starts a new chat powered by the chat completions API
+  # @note
+  #  This method creates a non-lazy version of a
+  #  {LLM::Chat LLM::Chat} object.
+  # @param prompt (see LLM::Provider#complete)
+  # @param role (see LLM::Provider#complete)
+  # @param model (see LLM::Provider#complete)
+  # @param [Hash] params
+  #  Other completion parameters to maintain throughout a chat
+  # @raise (see LLM::Provider#complete)
+  # @return [LLM::Chat]
+  def chat!(prompt, role = :user, model: nil, **params)
+    LLM::Chat.new(self, params).chat(prompt, role)
+  end
+  ##
+  # Starts a new lazy chat powered by the responses API
   # @note
   #  This method creates a lazy variant of a
-  #  {LLM::Conversation LLM::Conversation} object.
+  #  {LLM::Chat LLM::Chat} object.
   # @param prompt (see LLM::Provider#complete)
   # @param role (see LLM::Provider#complete)
+  # @param model (see LLM::Provider#complete)
+  # @param [Hash] params
+  #  Other completion parameters to maintain throughout a chat
   # @raise (see LLM::Provider#complete)
-  # @return [LLM::LazyConversation]
-  def chat(prompt, role = :user, **params)
-    LLM::Conversation.new(self, params).lazy.chat(prompt, role)
+  # @return [LLM::Chat]
+  def respond(prompt, role = :user, model: nil, **params)
+    LLM::Chat.new(self, params).lazy.respond(prompt, role)
   end
   ##
-  # Starts a new conversation
+  # Starts a new chat powered by the responses API
   # @note
   #  This method creates a non-lazy variant of a
-  #  {LLM::Conversation LLM::Conversation} object.
+  #  {LLM::Chat LLM::Chat} object.
   # @param prompt (see LLM::Provider#complete)
   # @param role (see LLM::Provider#complete)
+  # @param model (see LLM::Provider#complete)
+  # @param [Hash] params
+  #  Other completion parameters to maintain throughout a chat
   # @raise (see LLM::Provider#complete)
-  # @return [LLM::Conversation]
-  def chat!(prompt, role = :user, **params)
-    LLM::Conversation.new(self, params).chat(prompt, role)
+  # @return [LLM::Chat]
+  def respond!(prompt, role = :user, model: nil, **params)
+    LLM::Chat.new(self, params).respond(prompt, role)
+  end
+  ##
+  # @note
+  # Compared to the chat completions API, the responses API
+  # can require less bandwidth on each turn, maintain state
+  # server-side, and produce faster responses.
+  # @return [LLM::OpenAI::Responses]
+  #  Returns an interface to the responses API
+  def responses
+    raise NotImplementedError
+  end
+  ##
+  # @return [LLM::OpenAI::Images, LLM::Gemini::Images]
+  #  Returns an interface to the images API
+  def images
+    raise NotImplementedError
+  end
+  ##
+  # @return [LLM::OpenAI::Audio]
+  #  Returns an interface to the audio API
+  def audio
+    raise NotImplementedError
+  end
+  ##
+  # @return [LLM::OpenAI::Files]
+  #  Returns an interface to the files API
+  def files
+    raise NotImplementedError
   end
   ##

data/lib/llm/providers/anthropic/error_handler.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
 class LLM::Anthropic
+  ##
+  # @private
   class ErrorHandler
     ##
     # @return [Net::HTTPResponse]

data/lib/llm/providers/anthropic/format.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
 class LLM::Anthropic
+  ##
+  # @private
   module Format
     ##
     # @param [Array<LLM::Message>] messages

data/lib/llm/providers/anthropic/response_parser.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
 class LLM::Anthropic
+  ##
+  # @private
   module ResponseParser
     def parse_embedding(body)
       {
@@ -19,7 +21,7 @@ class LLM::Anthropic
         model: body["model"],
         choices: body["content"].map do
           # TODO: don't hardcode role
-          LLM::Message.new("assistant", _1["text"], {completion: self})
+          LLM::Message.new("assistant", _1["text"], {response: self})
         end,
         prompt_tokens: body.dig("usage", "input_tokens"),
         completion_tokens: body.dig("usage", "output_tokens")

data/lib/llm/providers/anthropic.rb CHANGED Viewed

@@ -24,21 +24,30 @@ module LLM
     # @param input (see LLM::Provider#embed)
     # @param [String] token
     #  Valid token for the VoyageAI API
+    # @param [String] model
+    #  The embedding model to use
     # @param [Hash] params
-    #  Additional parameters to pass to the API
+    #  Other embedding parameters
+    # @raise (see LLM::HTTPClient#request)
     # @return (see LLM::Provider#embed)
-    def embed(input, token:, **params)
+    def embed(input, token:, model: "voyage-2", **params)
       llm = LLM.voyageai(token)
-      llm.embed(input, **params)
+      llm.embed(input, **params.merge(model:))
     end
     ##
+    # Provides an interface to the chat completions API
     # @see https://docs.anthropic.com/en/api/messages Anthropic docs
     # @param prompt (see LLM::Provider#complete)
     # @param role (see LLM::Provider#complete)
+    # @param model (see LLM::Provider#complete)
+    # @param max_tokens The maximum number of tokens to generate
+    # @param params (see LLM::Provider#complete)
+    # @example (see LLM::Provider#complete)
+    # @raise (see LLM::HTTPClient#request)
     # @return (see LLM::Provider#complete)
-    def complete(prompt, role = :user, **params)
-      params   = {max_tokens: 1024, model: "claude-3-5-sonnet-20240620"}.merge!(params)
+    def complete(prompt, role = :user, model: "claude-3-5-sonnet-20240620", max_tokens: 1024, **params)
+      params   = {max_tokens:, model:}.merge!(params)
       req      = Net::HTTP::Post.new("/v1/messages", headers)
       messages = [*(params.delete(:messages) || []), Message.new(role, prompt)]
       req.body = JSON.dump({messages: format(messages)}.merge!(params))

data/lib/llm/providers/gemini/audio.rb ADDED Viewed

@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+class LLM::Gemini
+  ##
+  # The {LLM::Gemini::Audio LLM::Gemini::Audio} class provides an audio
+  # object for interacting with [Gemini's audio API](https://ai.google.dev/gemini-api/docs/audio).
+  # @example
+  #   #!/usr/bin/env ruby
+  #   require "llm"
+  #
+  #   llm = LLM.gemini(ENV["KEY"])
+  #   res = llm.audio.create_transcription(input: LLM::File("/rocket.mp3"))
+  #   res.text # => "A dog on a rocket to the moon"
+  class Audio
+    ##
+    # Returns a new Audio object
+    # @param provider [LLM::Provider]
+    # @return [LLM::Gemini::Responses]
+    def initialize(provider)
+      @provider = provider
+    end
+    ##
+    # @raise [NotImplementedError]
+    #  This method is not implemented by Gemini
+    def create_speech
+      raise NotImplementedError
+    end
+    ##
+    # Create an audio transcription
+    # @example
+    #   llm = LLM.gemini(ENV["KEY"])
+    #   res = llm.audio.create_transcription(file: LLM::File("/rocket.mp3"))
+    #   res.text # => "A dog on a rocket to the moon"
+    # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
+    # @param [LLM::File, LLM::Response::File] file The input audio
+    # @param [String] model The model to use
+    # @param [Hash] params Other parameters (see Gemini docs)
+    # @raise (see LLM::HTTPClient#request)
+    # @return [LLM::Response::AudioTranscription]
+    def create_transcription(file:, model: "gemini-1.5-flash", **params)
+      res = @provider.complete [
+        "Your task is to transcribe the contents of an audio file",
+        "Your response should include the transcription, and nothing else",
+        file
+      ], :user, model:, **params
+      LLM::Response::AudioTranscription
+        .new(res)
+        .tap { _1.text = res.choices[0].content }
+    end
+    ##
+    # Create an audio translation (in English)
+    # @example
+    #   # Arabic => English
+    #   llm = LLM.gemini(ENV["KEY"])
+    #   res = llm.audio.create_translation(file: LLM::File("/bismillah.mp3"))
+    #   res.text # => "In the name of Allah, the Beneficent, the Merciful."
+    # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
+    # @param [LLM::File, LLM::Response::File] file The input audio
+    # @param [String] model The model to use
+    # @param [Hash] params Other parameters (see Gemini docs)
+    # @raise (see LLM::HTTPClient#request)
+    # @return [LLM::Response::AudioTranslation]
+    def create_translation(file:, model: "gemini-1.5-flash", **params)
+      res = @provider.complete [
+        "Your task is to translate the contents of an audio file into English",
+        "Your response should include the translation, and nothing else",
+        file
+      ], :user, model:, **params
+      LLM::Response::AudioTranslation
+        .new(res)
+        .tap { _1.text = res.choices[0].content }
+    end
+  end
+end

data/lib/llm/providers/gemini/error_handler.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
 class LLM::Gemini
+  ##
+  # @private
   class ErrorHandler
     ##
     # @return [Net::HTTPResponse]