RubyGems - llm.rb - Versions diffs - 0.3.1 → 0.3.3 - Mend

llm.rb 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +4 -4
data/README.md +89 -20
data/lib/llm/chat.rb +5 -3
data/lib/llm/core_ext/ostruct.rb +1 -1
data/lib/llm/error.rb +6 -1
data/lib/llm/file.rb +15 -1
data/lib/llm/model.rb +27 -2
data/lib/llm/provider.rb +28 -32
data/lib/llm/providers/anthropic/format.rb +19 -6
data/lib/llm/providers/anthropic/models.rb +62 -0
data/lib/llm/providers/anthropic.rb +23 -8
data/lib/llm/providers/gemini/files.rb +2 -2
data/lib/llm/providers/gemini/format.rb +6 -1
data/lib/llm/providers/gemini/images.rb +5 -5
data/lib/llm/providers/gemini/models.rb +69 -0
data/lib/llm/providers/gemini/response_parser.rb +1 -5
data/lib/llm/providers/gemini.rb +24 -8
data/lib/llm/providers/ollama/format.rb +11 -3
data/lib/llm/providers/ollama/models.rb +66 -0
data/lib/llm/providers/ollama.rb +23 -8
data/lib/llm/providers/openai/audio.rb +3 -5
data/lib/llm/providers/openai/files.rb +2 -2
data/lib/llm/providers/openai/format.rb +47 -11
data/lib/llm/providers/openai/images.rb +4 -4
data/lib/llm/providers/openai/models.rb +62 -0
data/lib/llm/providers/openai/response_parser.rb +1 -5
data/lib/llm/providers/openai/responses.rb +24 -6
data/lib/llm/providers/openai.rb +24 -7
data/lib/llm/response/modellist.rb +18 -0
data/lib/llm/response.rb +1 -0
data/lib/llm/version.rb +1 -1
data/lib/llm.rb +2 -1
data/spec/anthropic/completion_spec.rb +36 -0
data/spec/anthropic/models_spec.rb +21 -0
data/spec/gemini/images_spec.rb +4 -12
data/spec/gemini/models_spec.rb +21 -0
data/spec/llm/conversation_spec.rb +5 -3
data/spec/ollama/models_spec.rb +20 -0
data/spec/openai/completion_spec.rb +21 -2
data/spec/openai/files_spec.rb +3 -3
data/spec/openai/images_spec.rb +2 -6
data/spec/openai/models_spec.rb +21 -0
metadata +11 -6
data/share/llm/models/anthropic.yml +0 -35
data/share/llm/models/gemini.yml +0 -35
data/share/llm/models/ollama.yml +0 -155
data/share/llm/models/openai.yml +0 -46

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3939075c064b4abfd8853c3f67b6db7df6111d340d658d4d8ad0c4d1bccc96bc
-  data.tar.gz: 0ca274d3e4b032c25730aef896df903681c28033ebb0907c965339a33aff56d1
+  metadata.gz: c8ce8caa7c769da9197528a864c153071f3c4aca15718efc985e543911c04ce2
+  data.tar.gz: 389ff41e9e2b35782b1484048b7597f5573bf2a86cf9eaff8cfd7c4cb2b19be3
 SHA512:
-  metadata.gz: feaf87457b8fa5b4f756a5fe8cc1f670c8b0286a730fe00273bc99678092fe7f704d58f01ba0a0baf4072a0dcee063bc87cf88bc7cdf53125334476adbce41f6
-  data.tar.gz: 3be8b460d9b483c0e172d9159b2394ea39da7a1475aee3ab47b224303e2a251f3b04f0543402494485040998225f84342be986db8c7b8ea80df92f561d4d6d92
+  metadata.gz: 39c8f71eae878b5787ca839138de07ce06cba4fdee0bafb1bd75a71f3b3e59ee08fa05f5d9f280522ec751722c9a8a15430d1b999b05e14052d85c745bf9781c
+  data.tar.gz: fdb5268b0095f09b41481e6c7071a2dae66cf9a3fd21755834b76040e13a236bc18122dfe95230dd49a827e774487a1add1aa35c8976f284a03afad881321b46

data/README.md CHANGED Viewed

@@ -26,6 +26,7 @@ llm = LLM.openai("yourapikey")
 llm = LLM.gemini("yourapikey")
 llm = LLM.anthropic("yourapikey")
 llm = LLM.ollama(nil)
+llm = LLM.voyageai("yourapikey")
 ```
 ### Conversations
@@ -37,7 +38,9 @@ The following example enables lazy mode for a
 object by entering into a "lazy" conversation where messages are buffered and
 sent to the provider only when necessary.  Both lazy and non-lazy conversations
 maintain a message thread that can be reused as context throughout a conversation.
-The example uses the stateless chat completions API that all LLM providers support:
+The example captures the spirit of llm.rb by demonstrating how objects cooperate
+together through composition, and it uses the stateless chat completions API that
+all LLM providers support:
 ```ruby
 #!/usr/bin/env ruby
@@ -122,13 +125,10 @@ for more information on how to use the audio generation API:
 ```ruby
 #!/usr/bin/env ruby
 require "llm"
-require "open-uri"
-require "fileutils"
 llm = LLM.openai(ENV["KEY"])
 res = llm.audio.create_speech(input: "Hello world")
-File.binwrite File.join(Dir.home, "hello.mp3"),
-	          res.audio.string
+IO.copy_stream res.audio, File.join(Dir.home, "hello.mp3")
 ```
 #### Transcribe
@@ -151,8 +151,6 @@ examples and documentation
 ```ruby
 #!/usr/bin/env ruby
 require "llm"
-require "open-uri"
-require "fileutils"
 llm = LLM.openai(ENV["KEY"])
 res = llm.audio.create_transcription(
@@ -180,9 +178,8 @@ examples and documentation
 ```ruby
+#!/usr/bin/env ruby
 require "llm"
-require "open-uri"
-require "fileutils"
 llm = LLM.openai(ENV["KEY"])
 res = llm.audio.create_translation(
@@ -320,6 +317,48 @@ bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n"
 #             contains information about the features, installation, and usage of OpenBSD.
 ```
+### Prompts
+#### Multimodal
+Generally all providers accept text prompts but some providers can
+also understand URLs, and various file types (eg images, audio, video,
+etc). The llm.rb approach to multimodal prompts is to let you pass `URI`
+objects to describe links, `LLM::File` / `LLM::Response::File` objects
+to describe files, `String` objects to describe text blobs, or an array
+of the forementioned objects to describe multiple objects in a single
+prompt. Each object is a first class citizen that can be passed directly
+to a prompt.
+For more depth and examples on how to use the multimodal API, please see
+the [provider-specific documentation](https://0x1eef.github.io/x/llm.rb/)
+for more provider-specific examples &ndash; there can be subtle differences
+between providers and even between APIs from the same provider that are
+not covered in the README:
+```ruby
+#!/usr/bin/env ruby
+require "llm"
+llm = LLM.openai(ENV["KEY"])
+bot = LLM::Chat.new(llm).lazy
+bot.chat URI("https://example.com/path/to/image.png")
+bot.chat "Describe the above image"
+bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
+file = bot.files.create(file: LLM::File("/documents/openbsd_is_awesome.pdf"))
+bot.chat file
+bot.chat "What is this file about?"
+bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
+bot.chat [LLM::File("/images/puffy.png"), "What is this image about?"]
+bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
+bot.chat [LLM::File("/images/beastie.png"), "What is this image about?"]
+bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
+```
 ### Embeddings
 #### Text
@@ -350,6 +389,38 @@ print res.embeddings[0].size, "\n"
 # 1536
 ```
+### Models
+#### List
+Almost all LLM providers provide a models endpoint that allows a client to
+query the list of models that are available to use. The list is dynamic,
+maintained by LLM providers, and it is independent of a specific llm.rb release.
+True to the llm.rb spirit of small, composable objects that cooperate with
+each other, a
+[LLM::Model](https://0x1eef.github.io/x/llm.rb/LLM/Model.html)
+object can be used instead of a string that describes a model name (although
+either works). Let's take a look at an example:
+```ruby
+#!/usr/bin/env ruby
+require "llm"
+##
+# List all models
+llm = LLM.openai(ENV["KEY"])
+llm.models.all.each do |model|
+  print "model: ", model.id, "\n"
+end
+##
+# Select a model
+model = llm.models.all.find { |m| m.id == "gpt-3.5-turbo" }
+bot = LLM::Chat.new(llm, model:)
+bot.chat "Hello #{model.id} :)"
+bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
+```
 ### Memory
 #### Child process
@@ -372,7 +443,7 @@ llm = LLM.gemini(ENV["KEY"])
 fork do
   %w[dog cat sheep goat capybara].each do |animal|
     res = llm.images.create(prompt: "a #{animal} on a rocket to the moon")
-    File.binwrite "#{animal}.png", res.images[0].binary
+    IO.copy_stream res.images[0], "#{animal}.png"
   end
 end
 Process.wait
@@ -394,19 +465,17 @@ llm.rb can be installed via rubygems.org:
 ## Philosophy
-llm.rb was built for developers who believe that simplicity is strength.
-It provides a clean, dependency-free interface to Large Language Models,
-treating Ruby itself as the primary platform &ndash; not Rails or any other
-specific framework or library. There is no hidden magic or extreme
-metaprogramming.
+llm.rb was built for developers who believe that simplicity can be challenging
+but it is always worth it. It provides a clean, dependency-free interface to
+Large Language Models, treating Ruby itself as the primary platform &ndash;
+not Rails or any other specific framework or library. There is no hidden
+magic or complex metaprogramming.
 Every part of llm.rb is designed to be explicit, composable, memory-safe,
 and production-ready without compromise. No unnecessary abstractions,
-no global configuration, and no dependencies that aren't part of standard
-Ruby. It has been inspired in part by other languages such as Python, but
-it is not a port of any other library.
-Good software doesn’t need marketing. It just needs to work. :)
+no global configuration, no global state, and no dependencies that aren't
+part of standard Ruby. It has been inspired in part by other languages such
+as Python, but it is not a port of any other library.
 ## License

data/lib/llm/chat.rb CHANGED Viewed

@@ -27,11 +27,13 @@ module LLM
     ##
     # @param [LLM::Provider] provider
     #  A provider
+    # @param [String] model
+    #  The model to maintain throughout the conversation
     # @param [Hash] params
-    #  The parameters to maintain throughout the conversation
-    def initialize(provider, params = {})
+    #  Other parameters to maintain throughout the conversation
+    def initialize(provider, model: provider.default_model, **params)
       @provider = provider
-      @params = params
+      @params = params.merge!(model:)
       @lazy = false
       @messages = []
     end

data/lib/llm/core_ext/ostruct.rb CHANGED Viewed

@@ -18,7 +18,7 @@ class OpenStruct
       hash_obj.each do |key, value|
         visited_object[key] = walk(value)
       end
-      OpenStruct.new(visited_object)
+      new(visited_object)
     end
     private

data/lib/llm/error.rb CHANGED Viewed

@@ -4,8 +4,9 @@ module LLM
   ##
   # The superclass of all LLM errors
   class Error < RuntimeError
-    def initialize
+    def initialize(...)
       block_given? ? yield(self) : nil
+      super
     end
     ##
@@ -17,6 +18,10 @@ module LLM
       attr_accessor :response
     end
+    ##
+    # When a prompt is given an object that's not understood
+    PromptError = Class.new(Error)
     ##
     # HTTPUnauthorized
     Unauthorized = Class.new(ResponseError)

data/lib/llm/file.rb CHANGED Viewed

@@ -7,13 +7,20 @@
 class LLM::File
   ##
   # @return [String]
-  #  Returns the path to a file
+  #  Returns the path to the file
   attr_reader :path
   def initialize(path)
     @path = path
   end
+  ##
+  # @return [String]
+  #  Returns basename of the file
+  def basename
+    File.basename(path)
+  end
   ##
   # @return [String]
   #  Returns the MIME type of the file
@@ -42,6 +49,13 @@ class LLM::File
     [File.binread(path)].pack("m0")
   end
+  ##
+  # @return [String]
+  #  Returns the file contents in base64 URL format
+  def to_data_uri
+    "data:#{mime_type};base64,#{to_b64}"
+  end
   ##
   # @return [File]
   #  Yields an IO object suitable to be streamed

data/lib/llm/model.rb CHANGED Viewed

@@ -1,7 +1,32 @@
 # frozen_string_literal: true
-class LLM::Model < Struct.new(:name, :parameters, :description, :to_param, keyword_init: true)
+##
+# The {LLM::Model LLM::Model} class represents an LLM model that
+# is available to use. Its properties are delegated to the underlying
+# response body, and vary by provider.
+class LLM::Model < OpenStruct
+  ##
+  # Returns a subclass of {LLM::Provider LLM::Provider}
+  # @return [LLM::Provider]
+  attr_accessor :provider
+  ##
+  # Returns the model ID
+  # @return [String]
+  def id
+    case @provider.class.to_s
+    when "LLM::Ollama"
+      self["name"]
+    when "LLM::Gemini"
+      self["name"].sub(%r|\Amodels/|, "")
+    else
+      self["id"]
+    end
+  end
+  ##
+  # @return [String]
   def to_json(*)
-    to_param.to_json(*)
+    id.to_json(*)
   end
 end

data/lib/llm/provider.rb CHANGED Viewed

@@ -4,16 +4,7 @@
 # The Provider class represents an abstract class for
 # LLM (Language Model) providers.
 #
-# @note
-#  This class is not meant to be instantiated directly.
-#  Instead, use one of the subclasses that implement
-#  the methods defined here.
-#
 # @abstract
-# @see LLM::Provider::OpenAI
-# @see LLM::Provider::Anthropic
-# @see LLM::Provider::Gemini
-# @see LLM::Provider::Ollama
 class LLM::Provider
   require "net/http"
@@ -53,7 +44,7 @@ class LLM::Provider
   # @raise [NotImplementedError]
   #  When the method is not implemented by a subclass
   # @return [LLM::Response::Embedding]
-  def embed(input, model:, **params)
+  def embed(input, model: nil, **params)
     raise NotImplementedError
   end
@@ -78,7 +69,7 @@ class LLM::Provider
   # @raise [NotImplementedError]
   #  When the method is not implemented by a subclass
   # @return [LLM::Response::Completion]
-  def complete(prompt, role = :user, model:, **params)
+  def complete(prompt, role = :user, model: default_model, **params)
     raise NotImplementedError
   end
@@ -94,8 +85,8 @@ class LLM::Provider
   #  Other completion parameters to maintain throughout a chat
   # @raise (see LLM::Provider#complete)
   # @return [LLM::Chat]
-  def chat(prompt, role = :user, model: nil, **params)
-    LLM::Chat.new(self, params).lazy.chat(prompt, role)
+  def chat(prompt, role = :user, model: default_model, **params)
+    LLM::Chat.new(self, **params.merge(model:)).lazy.chat(prompt, role)
   end
   ##
@@ -110,8 +101,8 @@ class LLM::Provider
   #  Other completion parameters to maintain throughout a chat
   # @raise (see LLM::Provider#complete)
   # @return [LLM::Chat]
-  def chat!(prompt, role = :user, model: nil, **params)
-    LLM::Chat.new(self, params).chat(prompt, role)
+  def chat!(prompt, role = :user, model: default_model, **params)
+    LLM::Chat.new(self, **params.merge(model:)).chat(prompt, role)
   end
   ##
@@ -126,8 +117,8 @@ class LLM::Provider
   #  Other completion parameters to maintain throughout a chat
   # @raise (see LLM::Provider#complete)
   # @return [LLM::Chat]
-  def respond(prompt, role = :user, model: nil, **params)
-    LLM::Chat.new(self, params).lazy.respond(prompt, role)
+  def respond(prompt, role = :user, model: default_model, **params)
+    LLM::Chat.new(self, **params.merge(model:)).lazy.respond(prompt, role)
   end
   ##
@@ -142,8 +133,8 @@ class LLM::Provider
   #  Other completion parameters to maintain throughout a chat
   # @raise (see LLM::Provider#complete)
   # @return [LLM::Chat]
-  def respond!(prompt, role = :user, model: nil, **params)
-    LLM::Chat.new(self, params).respond(prompt, role)
+  def respond!(prompt, role = :user, model: default_model, **params)
+    LLM::Chat.new(self, **params.merge(model:)).respond(prompt, role)
   end
   ##
@@ -178,6 +169,13 @@ class LLM::Provider
     raise NotImplementedError
   end
+  ##
+  # @return [LLM::OpenAI::Models]
+  #  Returns an interface to the models API
+  def models
+    raise NotImplementedError
+  end
   ##
   # @return [String]
   #  Returns the role of the assistant in the conversation.
@@ -187,9 +185,9 @@ class LLM::Provider
   end
   ##
-  # @return [Hash<String, LLM::Model>]
-  #  Returns a hash of available models
-  def models
+  # @return [String]
+  #  Returns the default model for chat completions
+  def default_model
     raise NotImplementedError
   end
@@ -248,15 +246,13 @@ class LLM::Provider
   end
   ##
-  # @param [String] provider
-  #  The name of the provider
-  # @return [Hash<String, Hash>]
-  def load_models!(provider)
-    require "yaml" unless defined?(YAML)
-    rootdir  = File.realpath File.join(__dir__, "..", "..")
-    sharedir = File.join(rootdir, "share", "llm")
-    provider = provider.gsub(/[^a-z0-9]/i, "")
-    yaml     = File.join(sharedir, "models", "#{provider}.yml")
-    YAML.safe_load_file(yaml).transform_values { LLM::Model.new(_1) }
+  # @param [Net::HTTPRequest] req
+  #  The request to set the body stream for
+  # @param [IO] io
+  #  The IO object to set as the body stream
+  # @return [void]
+  def set_body_stream(req, io)
+    req.body_stream = io
+    req["transfer-encoding"] = "chunked" unless req["content-length"]
   end
 end

data/lib/llm/providers/anthropic/format.rb CHANGED Viewed

@@ -26,13 +26,26 @@ class LLM::Anthropic
     # @return [String, Hash]
     #  The formatted content
     def format_content(content)
-      if URI === content
-        [{
-          type: :image,
-          source: {type: :base64, media_type: LLM::File(content.to_s).mime_type, data: [content.to_s].pack("m0")}
-        }]
+      case content
+      when Array
+        content.flat_map { format_content(_1) }
+      when URI
+        [{type: :image, source: {type: "url", url: content.to_s}}]
+      when LLM::File
+        if content.image?
+          [{type: :image, source: {type: "base64", media_type: content.mime_type, data: content.to_b64}}]
+        else
+          raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
+                                          "is not an image, and therefore not supported by the " \
+                                          "Anthropic API"
+        end
+      when String
+        [{type: :text, text: content}]
+      when LLM::Message
+        format_content(content.content)
       else
-        content
+        raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
+                                       "is not supported by the Anthropic API"
       end
     end
   end

data/lib/llm/providers/anthropic/models.rb ADDED Viewed

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+class LLM::Anthropic
+  ##
+  # The {LLM::Anthropic::Models LLM::Anthropic::Models} class provides a model
+  # object for interacting with [Anthropic's models API](https://platform.anthropic.com/docs/api-reference/models/list).
+  # The models API allows a client to query Anthropic for a list of models
+  # that are available for use with the Anthropic API.
+  #
+  # @example
+  #   #!/usr/bin/env ruby
+  #   require "llm"
+  #
+  #   llm = LLM.anthropic(ENV["KEY"])
+  #   res = llm.models.all
+  #   res.each do |model|
+  #     print "id: ", model.id, "\n"
+  #   end
+  class Models
+    ##
+    # Returns a new Models object
+    # @param provider [LLM::Provider]
+    # @return [LLM::Anthropic::Files]
+    def initialize(provider)
+      @provider = provider
+    end
+    ##
+    # List all models
+    # @example
+    #   llm = LLM.anthropic(ENV["KEY"])
+    #   res = llm.models.all
+    #   res.each do |model|
+    #     print "id: ", model.id, "\n"
+    #   end
+    # @see https://docs.anthropic.com/en/api/models-list Anthropic docs
+    # @param [Hash] params Other parameters (see Anthropic docs)
+    # @raise (see LLM::Provider#request)
+    # @return [LLM::Response::FileList]
+    def all(**params)
+      query = URI.encode_www_form(params)
+      req = Net::HTTP::Get.new("/v1/models?#{query}", headers)
+      res = request(http, req)
+      LLM::Response::ModelList.new(res).tap { |modellist|
+        models = modellist.body["data"].map do |model|
+          LLM::Model.from_hash(model).tap { _1.provider = @provider }
+        end
+        modellist.models = models
+      }
+    end
+    private
+    def http
+      @provider.instance_variable_get(:@http)
+    end
+    [:headers, :request].each do |m|
+      define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
+    end
+  end
+end

data/lib/llm/providers/anthropic.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module LLM
     require_relative "anthropic/error_handler"
     require_relative "anthropic/response_parser"
     require_relative "anthropic/format"
+    require_relative "anthropic/models"
     include Format
     HOST = "api.anthropic.com"
@@ -45,16 +46,28 @@ module LLM
     # @param params (see LLM::Provider#complete)
     # @example (see LLM::Provider#complete)
     # @raise (see LLM::Provider#request)
+    # @raise [LLM::Error::PromptError]
+    #  When given an object a provider does not understand
     # @return (see LLM::Provider#complete)
-    def complete(prompt, role = :user, model: "claude-3-5-sonnet-20240620", max_tokens: 1024, **params)
-      params   = {max_tokens:, model:}.merge!(params)
-      req      = Net::HTTP::Post.new("/v1/messages", headers)
+    def complete(prompt, role = :user, model: default_model, max_tokens: 1024, **params)
+      params = {max_tokens:, model:}.merge!(params)
+      req = Net::HTTP::Post.new("/v1/messages", headers)
       messages = [*(params.delete(:messages) || []), Message.new(role, prompt)]
-      req.body = JSON.dump({messages: format(messages)}.merge!(params))
-      res      = request(@http, req)
+      body = JSON.dump({messages: format(messages)}.merge!(params))
+      set_body_stream(req, StringIO.new(body))
+      res = request(@http, req)
       Response::Completion.new(res).extend(response_parser)
     end
+    ##
+    # Provides an interface to Anthropic's models API
+    # @see https://docs.anthropic.com/en/api/models-list
+    # @return [LLM::Anthropic::Models]
+    def models
+      LLM::Anthropic::Models.new(self)
+    end
     ##
     # @return (see LLM::Provider#assistant_role)
     def assistant_role
@@ -62,9 +75,11 @@ module LLM
     end
     ##
-    # @return (see LLM::Provider#models)
-    def models
-      @models ||= load_models!("anthropic")
+    # Returns the default model for chat completions
+    # @see https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table claude-3-5-sonnet-20240620
+    # @return [String]
+    def default_model
+      "claude-3-5-sonnet-20240620"
     end
     private

data/lib/llm/providers/gemini/files.rb CHANGED Viewed

@@ -83,7 +83,7 @@ class LLM::Gemini
       req["X-Goog-Upload-Offset"] = 0
       req["X-Goog-Upload-Command"] = "upload, finalize"
       file.with_io do |io|
-        req.body_stream = io
+        set_body_stream(req, io)
         res = request(http, req)
         LLM::Response::File.new(res)
       end
@@ -155,7 +155,7 @@ class LLM::Gemini
       @provider.instance_variable_get(:@secret)
     end
-    [:headers, :request].each do |m|
+    [:headers, :request, :set_body_stream].each do |m|
       define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
     end
   end

data/lib/llm/providers/gemini/format.rb CHANGED Viewed

@@ -35,8 +35,13 @@ class LLM::Gemini
       when LLM::File
         file = content
         {inline_data: {mime_type: file.mime_type, data: file.to_b64}}
-      else
+      when String
         {text: content}
+      when LLM::Message
+        format_content(content.content)
+      else
+        raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
+                                       "is not supported by the Gemini API"
       end
     end
   end

data/lib/llm/providers/gemini/images.rb CHANGED Viewed

@@ -13,7 +13,7 @@ class LLM::Gemini
   #
   #   llm = LLM.gemini(ENV["KEY"])
   #   res = llm.images.create prompt: "A dog on a rocket to the moon"
-  #   File.binwrite "rocket.png", res.images[0].binary
+  #   IO.copy_stream res.images[0], "rocket.png"
   class Images
     include Format
@@ -30,7 +30,7 @@ class LLM::Gemini
     # @example
     #   llm = LLM.gemini(ENV["KEY"])
     #   res = llm.images.create prompt: "A dog on a rocket to the moon"
-    #   File.binwrite "rocket.png", res.images[0].binary
+    #   IO.copy_stream res.images[0], "rocket.png"
     # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
     # @param [String] prompt The prompt
     # @param [Hash] params Other parameters (see Gemini docs)
@@ -56,7 +56,7 @@ class LLM::Gemini
     # @example
     #   llm = LLM.gemini(ENV["KEY"])
     #   res = llm.images.edit image: LLM::File("cat.png"), prompt: "Add a hat to the cat"
-    #   File.binwrite "hatoncat.png", res.images[0].binary
+    #   IO.copy_stream res.images[0], "hatoncat.png"
     # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
     # @param [LLM::File] image The image to edit
     # @param [String] prompt The prompt
@@ -70,7 +70,7 @@ class LLM::Gemini
         contents: [{parts: [{text: prompt}, format_content(image)]}],
         generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
       }.merge!(params)).b
-      req.body_stream = StringIO.new(body)
+      set_body_stream(req, StringIO.new(body))
       res = request(http, req)
       LLM::Response::Image.new(res).extend(response_parser)
     end
@@ -92,7 +92,7 @@ class LLM::Gemini
       @provider.instance_variable_get(:@http)
     end
-    [:response_parser, :headers, :request].each do |m|
+    [:response_parser, :headers, :request, :set_body_stream].each do |m|
       define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
     end
   end