llm.rb 3.1.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa682f0c6793298daeaac88092cb52f03652cbbbf28adfd6b62f94b8a263f3f3
4
- data.tar.gz: 1fb08983372becef70d866bdc4ee79ee8d8bba55ace5d4be4637a69e91341747
3
+ metadata.gz: 70c5f60cafc446edf8d1be15367ca77eb89e467785c77fbc7f758c29e761e8db
4
+ data.tar.gz: db9ec411a0c441e471a19a98624e0973815867c28ab89f8bcebc108b4dc11b3b
5
5
  SHA512:
6
- metadata.gz: 720e09be8b25a9fde7d92887636d572edcdbd39a1b3a23ae1f44baaddb9f881c95927f63f16248f3a3d22da1704973f69f51309c487e1c97195175b772499b0d
7
- data.tar.gz: 8cf35f7829b4e66ef002652643779658cf9c8cf8726f8b563eb5ca59ebcfc3a71eeb9b4cc473dfc4556324448855b6733fe3d48a73fb6e70fb91102544eb7061
6
+ metadata.gz: f06f9c0367ad3d3428ce7c5046aebd37e4bfea9eac483b5c08a448bac58e9c205d3566a246d3022e9bd6d1669a9f9b5244a475262b24303d845464f7ec3ce4de
7
+ data.tar.gz: 45e86e63614eb9f5c96111f6ba11d9ec3aae89572dd3959987ca4d0a190cd2d6f5c8ab8ad516cdf68512ec1c1285117616493b3670b4538564c097ec1aaa0ede
data/lib/llm/error.rb CHANGED
@@ -35,10 +35,6 @@ module LLM
35
35
  # HTTPServerError
36
36
  ServerError = Class.new(Error)
37
37
 
38
- ##
39
- # When no images are found in a response
40
- NoImageError = Class.new(Error)
41
-
42
38
  ##
43
39
  # When an given an input object that is not understood
44
40
  FormatError = Class.new(Error)
data/lib/llm/provider.rb CHANGED
@@ -45,7 +45,7 @@ class LLM::Provider
45
45
  # @return [String]
46
46
  # @note The secret key is redacted in inspect for security reasons
47
47
  def inspect
48
- "#<#{self.class.name}:0x#{object_id.to_s(16)} @key=[REDACTED] @http=#{@http.inspect}>"
48
+ "#<#{self.class.name}:0x#{object_id.to_s(16)} @key=[REDACTED] @client=#{@client.inspect}>"
49
49
  end
50
50
 
51
51
  ##
@@ -3,14 +3,12 @@
3
3
  class LLM::Gemini
4
4
  ##
5
5
  # The {LLM::Gemini::Images LLM::Gemini::Images} class provides an images
6
- # object for interacting with [Gemini's images API](https://ai.google.dev/gemini-api/docs/image-generation).
7
- # Please note that unlike OpenAI, which can return either URLs or base64-encoded strings,
8
- # Gemini's images API will always return an image as a base64 encoded string that
9
- # can be decoded into binary.
6
+ # object for interacting with Google's Imagen text-to-image models via the
7
+ # Imagen API: https://ai.google.dev/gemini-api/docs/imagen
8
+ #
10
9
  # @example
11
10
  # #!/usr/bin/env ruby
12
11
  # require "llm"
13
- #
14
12
  # llm = LLM.gemini(key: ENV["KEY"])
15
13
  # res = llm.images.create prompt: "A dog on a rocket to the moon"
16
14
  # IO.copy_stream res.images[0], "rocket.png"
@@ -31,21 +29,30 @@ class LLM::Gemini
31
29
  # llm = LLM.gemini(key: ENV["KEY"])
32
30
  # res = llm.images.create prompt: "A dog on a rocket to the moon"
33
31
  # IO.copy_stream res.images[0], "rocket.png"
34
- # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
32
+ # @see https://ai.google.dev/gemini-api/docs/imagen Imagen docs
35
33
  # @param [String] prompt The prompt
36
- # @param [Hash] params Other parameters (see Gemini docs)
34
+ # @param [Integer] n The number of images to generate
35
+ # @param [String] image_size The size of the image ("1K", "2K", etc.)
36
+ # @param [String] aspect_ratio The aspect ratio of the image ("1:1", "16:9", etc.)
37
+ # @param [String] person_generation Allow the model to generate images of people ("dont_allow", "allow_adult", "allow_all")
38
+ # @param [String] model The model to use
39
+ # @param [Hash] params Other parameters (see Imagen docs)
37
40
  # @raise (see LLM::Provider#request)
38
- # @raise [LLM::NoImageError] when no images are returned
39
41
  # @return [LLM::Response]
40
- def create(prompt:, model: "gemini-2.5-flash-image", **params)
41
- req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{key}", headers)
42
+ def create(prompt:, n: 1, image_size: nil, aspect_ratio: nil, person_generation: nil, model: "imagen-4.0-generate-001", **params)
43
+ req = Net::HTTP::Post.new("/v1beta/models/#{model}:predict?key=#{key}", headers)
42
44
  body = LLM.json.dump({
43
- contents: [{parts: [{text: create_prompt}, {text: prompt}]}],
44
- generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
45
- }.merge!(params))
45
+ parameters: {
46
+ sampleCount: n,
47
+ imageSize: image_size,
48
+ aspectRatio: aspect_ratio,
49
+ personGeneration: person_generation
50
+ }.compact.merge!(params),
51
+ instances: [{prompt:}]
52
+ })
46
53
  req.body = body
47
54
  res = execute(request: req)
48
- validate ResponseAdapter.adapt(res, type: :image)
55
+ ResponseAdapter.adapt(res, type: :image)
49
56
  end
50
57
 
51
58
  ##
@@ -59,19 +66,10 @@ class LLM::Gemini
59
66
  # @param [String] prompt The prompt
60
67
  # @param [Hash] params Other parameters (see Gemini docs)
61
68
  # @raise (see LLM::Provider#request)
62
- # @raise [LLM::NoImageError] when no images are returned
63
69
  # @note (see LLM::Gemini::Images#create)
64
70
  # @return [LLM::Response]
65
71
  def edit(image:, prompt:, model: "gemini-2.5-flash-image", **params)
66
- req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{key}", headers)
67
- image = LLM::Object.from(value: LLM.File(image), kind: :local_file)
68
- body = LLM.json.dump({
69
- contents: [{parts: [{text: edit_prompt}, {text: prompt}, adapter.adapt_content(image)]}],
70
- generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
71
- }.merge!(params)).b
72
- set_body_stream(req, StringIO.new(body))
73
- res = execute(request: req)
74
- validate ResponseAdapter.adapt(res, type: :image)
72
+ raise NotImplementedError, "image editing is not yet supported by Gemini"
75
73
  end
76
74
 
77
75
  ##
@@ -91,36 +89,6 @@ class LLM::Gemini
91
89
  @provider.instance_variable_get(:@key)
92
90
  end
93
91
 
94
- def create_prompt
95
- <<~PROMPT
96
- ## Context
97
- Your task is to generate one or more image(s) based on the user's instructions.
98
- The user will provide you with text only.
99
-
100
- ## Instructions
101
- 1. The model *MUST* generate image(s) based on the user text alone.
102
- 2. The model *MUST NOT* generate anything else.
103
- PROMPT
104
- end
105
-
106
- def edit_prompt
107
- <<~PROMPT
108
- ## Context
109
- Your task is to edit the provided image based on the user's instructions.
110
- The user will provide you with both text and an image.
111
-
112
- ## Instructions
113
- 1. The model *MUST* edit the provided image based on the user's instructions
114
- 2. The model *MUST NOT* generate a new image.
115
- 3. The model *MUST NOT* generate anything else.
116
- PROMPT
117
- end
118
-
119
- def validate(res)
120
- return res unless res.images.empty?
121
- raise LLM::NoImageError.new { _1.response = res.res }, "no images found in response"
122
- end
123
-
124
92
  [:headers, :execute, :set_body_stream].each do |m|
125
93
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
126
94
  end
@@ -5,13 +5,9 @@ module LLM::Gemini::ResponseAdapter
5
5
  ##
6
6
  # @return [Array<StringIO>]
7
7
  def images
8
- candidates.flat_map do |candidate|
9
- parts = candidate&.dig("content", "parts") || []
10
- parts.filter_map do
11
- data = _1.dig("inlineData", "data")
12
- next unless data
13
- StringIO.new(data.unpack1("m0"))
14
- end
8
+ (body.predictions || []).map do
9
+ b64 = _1["bytesBase64Encoded"]
10
+ StringIO.new(b64.unpack1("m0"))
15
11
  end
16
12
  end
17
13
 
@@ -22,10 +18,5 @@ module LLM::Gemini::ResponseAdapter
22
18
  # will always return an empty array.
23
19
  # @return [Array<String>]
24
20
  def urls = []
25
-
26
- ##
27
- # Returns one or more candidates, or an empty array
28
- # @return [Array<Hash>]
29
- def candidates = body.candidates || []
30
21
  end
31
22
  end
@@ -43,7 +43,7 @@ class LLM::Gemini
43
43
 
44
44
  def merge_candidates!(deltas)
45
45
  deltas.each do |delta|
46
- index = delta["index"]
46
+ index = delta["index"].to_i
47
47
  @body["candidates"][index] ||= {"content" => {"parts" => []}}
48
48
  candidate = @body["candidates"][index]
49
49
  delta.each do |key, value|
@@ -81,6 +81,8 @@ class LLM::Gemini
81
81
  parts << delta
82
82
  elsif delta["fileData"]
83
83
  parts << delta
84
+ else
85
+ parts << delta
84
86
  end
85
87
  end
86
88
  end
@@ -41,7 +41,7 @@ class LLM::OpenAI
41
41
  index = choice["index"]
42
42
  if @body["choices"][index]
43
43
  target_message = @body["choices"][index]["message"]
44
- delta = choice["delta"]
44
+ delta = choice["delta"] || {}
45
45
  delta.each do |key, value|
46
46
  if key == "content"
47
47
  target_message[key] ||= +""
@@ -56,7 +56,7 @@ class LLM::OpenAI
56
56
  else
57
57
  message_hash = {"role" => "assistant"}
58
58
  @body["choices"][index] = {"message" => message_hash}
59
- choice["delta"].each do |key, value|
59
+ (choice["delta"] || {}).each do |key, value|
60
60
  if key == "content"
61
61
  @io << value if @io.respond_to?(:<<)
62
62
  message_hash[key] = value
data/lib/llm/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LLM
4
- VERSION = "3.1.0"
4
+ VERSION = "4.0.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm.rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Antar Azri