RubyGems - llm.rb - Versions diffs - 3.1.0 → 4.0.0 - Mend

llm.rb 3.1.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/lib/llm/error.rb +0 -4
data/lib/llm/provider.rb +1 -1
data/lib/llm/providers/gemini/images.rb +22 -54
data/lib/llm/providers/gemini/response_adapter/image.rb +3 -12
data/lib/llm/providers/gemini/stream_parser.rb +3 -1
data/lib/llm/providers/openai/stream_parser.rb +2 -2
data/lib/llm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: fa682f0c6793298daeaac88092cb52f03652cbbbf28adfd6b62f94b8a263f3f3
-  data.tar.gz: 1fb08983372becef70d866bdc4ee79ee8d8bba55ace5d4be4637a69e91341747
+  metadata.gz: 70c5f60cafc446edf8d1be15367ca77eb89e467785c77fbc7f758c29e761e8db
+  data.tar.gz: db9ec411a0c441e471a19a98624e0973815867c28ab89f8bcebc108b4dc11b3b
 SHA512:
-  metadata.gz: 720e09be8b25a9fde7d92887636d572edcdbd39a1b3a23ae1f44baaddb9f881c95927f63f16248f3a3d22da1704973f69f51309c487e1c97195175b772499b0d
-  data.tar.gz: 8cf35f7829b4e66ef002652643779658cf9c8cf8726f8b563eb5ca59ebcfc3a71eeb9b4cc473dfc4556324448855b6733fe3d48a73fb6e70fb91102544eb7061
+  metadata.gz: f06f9c0367ad3d3428ce7c5046aebd37e4bfea9eac483b5c08a448bac58e9c205d3566a246d3022e9bd6d1669a9f9b5244a475262b24303d845464f7ec3ce4de
+  data.tar.gz: 45e86e63614eb9f5c96111f6ba11d9ec3aae89572dd3959987ca4d0a190cd2d6f5c8ab8ad516cdf68512ec1c1285117616493b3670b4538564c097ec1aaa0ede

data/lib/llm/error.rb CHANGED Viewed

@@ -35,10 +35,6 @@ module LLM
   # HTTPServerError
   ServerError = Class.new(Error)
-  ##
-  # When no images are found in a response
-  NoImageError = Class.new(Error)
   ##
   # When an given an input object that is not understood
   FormatError = Class.new(Error)

data/lib/llm/provider.rb CHANGED Viewed

@@ -45,7 +45,7 @@ class LLM::Provider
   # @return [String]
   # @note The secret key is redacted in inspect for security reasons
   def inspect
-    "#<#{self.class.name}:0x#{object_id.to_s(16)} @key=[REDACTED] @http=#{@http.inspect}>"
+    "#<#{self.class.name}:0x#{object_id.to_s(16)} @key=[REDACTED] @client=#{@client.inspect}>"
   end
   ##

data/lib/llm/providers/gemini/images.rb CHANGED Viewed

@@ -3,14 +3,12 @@
 class LLM::Gemini
   ##
   # The {LLM::Gemini::Images LLM::Gemini::Images} class provides an images
-  # object for interacting with [Gemini's images API](https://ai.google.dev/gemini-api/docs/image-generation).
-  # Please note that unlike OpenAI, which can return either URLs or base64-encoded strings,
-  # Gemini's images API will always return an image as a base64 encoded string that
-  # can be decoded into binary.
+  # object for interacting with Google's Imagen text-to-image models via the
+  # Imagen API: https://ai.google.dev/gemini-api/docs/imagen
+  #
   # @example
   #   #!/usr/bin/env ruby
   #   require "llm"
-  #
   #   llm = LLM.gemini(key: ENV["KEY"])
   #   res = llm.images.create prompt: "A dog on a rocket to the moon"
   #   IO.copy_stream res.images[0], "rocket.png"
@@ -31,21 +29,30 @@ class LLM::Gemini
     #   llm = LLM.gemini(key: ENV["KEY"])
     #   res = llm.images.create prompt: "A dog on a rocket to the moon"
     #   IO.copy_stream res.images[0], "rocket.png"
-    # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
+    # @see https://ai.google.dev/gemini-api/docs/imagen Imagen docs
     # @param [String] prompt The prompt
-    # @param [Hash] params Other parameters (see Gemini docs)
+    # @param [Integer] n The number of images to generate
+    # @param [String] image_size The size of the image ("1K", "2K", etc.)
+    # @param [String] aspect_ratio The aspect ratio of the image ("1:1", "16:9", etc.)
+    # @param [String] person_generation Allow the model to generate images of people ("dont_allow", "allow_adult", "allow_all")
+    # @param [String] model The model to use
+    # @param [Hash] params Other parameters (see Imagen docs)
     # @raise (see LLM::Provider#request)
-    # @raise [LLM::NoImageError] when no images are returned
     # @return [LLM::Response]
-    def create(prompt:, model: "gemini-2.5-flash-image", **params)
-      req  = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{key}", headers)
+    def create(prompt:, n: 1, image_size: nil, aspect_ratio: nil, person_generation: nil, model: "imagen-4.0-generate-001", **params)
+      req  = Net::HTTP::Post.new("/v1beta/models/#{model}:predict?key=#{key}", headers)
       body = LLM.json.dump({
-        contents: [{parts: [{text: create_prompt}, {text: prompt}]}],
-        generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
-      }.merge!(params))
+        parameters: {
+          sampleCount: n,
+          imageSize: image_size,
+          aspectRatio: aspect_ratio,
+          personGeneration: person_generation
+        }.compact.merge!(params),
+        instances: [{prompt:}]
+      })
       req.body = body
       res = execute(request: req)
-      validate ResponseAdapter.adapt(res, type: :image)
+      ResponseAdapter.adapt(res, type: :image)
     end
     ##
@@ -59,19 +66,10 @@ class LLM::Gemini
     # @param [String] prompt The prompt
     # @param [Hash] params Other parameters (see Gemini docs)
     # @raise (see LLM::Provider#request)
-    # @raise [LLM::NoImageError] when no images are returned
     # @note (see LLM::Gemini::Images#create)
     # @return [LLM::Response]
     def edit(image:, prompt:, model: "gemini-2.5-flash-image", **params)
-      req   = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{key}", headers)
-      image = LLM::Object.from(value: LLM.File(image), kind: :local_file)
-      body  = LLM.json.dump({
-        contents: [{parts: [{text: edit_prompt}, {text: prompt}, adapter.adapt_content(image)]}],
-        generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
-      }.merge!(params)).b
-      set_body_stream(req, StringIO.new(body))
-      res = execute(request: req)
-      validate ResponseAdapter.adapt(res, type: :image)
+      raise NotImplementedError, "image editing is not yet supported by Gemini"
     end
     ##
@@ -91,36 +89,6 @@ class LLM::Gemini
       @provider.instance_variable_get(:@key)
     end
-    def create_prompt
-      <<~PROMPT
-        ## Context
-        Your task is to generate one or more image(s) based on the user's instructions.
-        The user will provide you with text only.
-        ## Instructions
-        1. The model *MUST* generate image(s) based on the user text alone.
-        2. The model *MUST NOT* generate anything else.
-      PROMPT
-    end
-    def edit_prompt
-      <<~PROMPT
-        ## Context
-        Your task is to edit the provided image based on the user's instructions.
-        The user will provide you with both text and an image.
-        ## Instructions
-        1. The model *MUST* edit the provided image based on the user's instructions
-        2. The model *MUST NOT* generate a new image.
-        3. The model *MUST NOT* generate anything else.
-      PROMPT
-    end
-    def validate(res)
-      return res unless res.images.empty?
-      raise LLM::NoImageError.new { _1.response = res.res }, "no images found in response"
-    end
     [:headers, :execute, :set_body_stream].each do |m|
       define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
     end

data/lib/llm/providers/gemini/response_adapter/image.rb CHANGED Viewed

@@ -5,13 +5,9 @@ module LLM::Gemini::ResponseAdapter
     ##
     # @return [Array<StringIO>]
     def images
-      candidates.flat_map do |candidate|
-        parts = candidate&.dig("content", "parts") || []
-        parts.filter_map do
-          data = _1.dig("inlineData", "data")
-          next unless data
-          StringIO.new(data.unpack1("m0"))
-        end
+      (body.predictions || []).map do
+        b64 = _1["bytesBase64Encoded"]
+        StringIO.new(b64.unpack1("m0"))
       end
     end
@@ -22,10 +18,5 @@ module LLM::Gemini::ResponseAdapter
     #  will always return an empty array.
     # @return [Array<String>]
     def urls = []
-    ##
-    # Returns one or more candidates, or an empty array
-    # @return [Array<Hash>]
-    def candidates = body.candidates || []
   end
 end

data/lib/llm/providers/gemini/stream_parser.rb CHANGED Viewed

@@ -43,7 +43,7 @@ class LLM::Gemini
     def merge_candidates!(deltas)
       deltas.each do |delta|
-        index = delta["index"]
+        index = delta["index"].to_i
         @body["candidates"][index] ||= {"content" => {"parts" => []}}
         candidate = @body["candidates"][index]
         delta.each do |key, value|
@@ -81,6 +81,8 @@ class LLM::Gemini
           parts << delta
         elsif delta["fileData"]
           parts << delta
+        else
+          parts << delta
         end
       end
     end

data/lib/llm/providers/openai/stream_parser.rb CHANGED Viewed

@@ -41,7 +41,7 @@ class LLM::OpenAI
         index = choice["index"]
         if @body["choices"][index]
           target_message = @body["choices"][index]["message"]
-          delta = choice["delta"]
+          delta = choice["delta"] || {}
           delta.each do |key, value|
             if key == "content"
               target_message[key] ||= +""
@@ -56,7 +56,7 @@ class LLM::OpenAI
         else
           message_hash = {"role" => "assistant"}
           @body["choices"][index] = {"message" => message_hash}
-          choice["delta"].each do |key, value|
+          (choice["delta"] || {}).each do |key, value|
             if key == "content"
               @io << value if @io.respond_to?(:<<)
               message_hash[key] = value

data/lib/llm/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module LLM
-  VERSION = "3.1.0"
+  VERSION = "4.0.0"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: llm.rb
 version: !ruby/object:Gem::Version
-  version: 3.1.0
+  version: 4.0.0
 platform: ruby
 authors:
 - Antar Azri