llm.rb 0.4.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +173 -115
  3. data/lib/json/schema/array.rb +5 -0
  4. data/lib/json/schema/boolean.rb +4 -0
  5. data/lib/json/schema/integer.rb +23 -1
  6. data/lib/json/schema/leaf.rb +11 -0
  7. data/lib/json/schema/null.rb +4 -0
  8. data/lib/json/schema/number.rb +23 -1
  9. data/lib/json/schema/object.rb +6 -2
  10. data/lib/json/schema/string.rb +26 -1
  11. data/lib/json/schema/version.rb +2 -0
  12. data/lib/json/schema.rb +10 -10
  13. data/lib/llm/buffer.rb +31 -12
  14. data/lib/llm/chat.rb +56 -29
  15. data/lib/llm/core_ext/ostruct.rb +14 -8
  16. data/lib/llm/file.rb +6 -1
  17. data/lib/llm/function.rb +86 -0
  18. data/lib/llm/message.rb +54 -2
  19. data/lib/llm/provider.rb +32 -46
  20. data/lib/llm/providers/anthropic/format/completion_format.rb +73 -0
  21. data/lib/llm/providers/anthropic/format.rb +8 -33
  22. data/lib/llm/providers/anthropic/response_parser/completion_parser.rb +51 -0
  23. data/lib/llm/providers/anthropic/response_parser.rb +1 -9
  24. data/lib/llm/providers/anthropic.rb +14 -14
  25. data/lib/llm/providers/gemini/audio.rb +9 -9
  26. data/lib/llm/providers/gemini/files.rb +11 -10
  27. data/lib/llm/providers/gemini/format/completion_format.rb +54 -0
  28. data/lib/llm/providers/gemini/format.rb +20 -27
  29. data/lib/llm/providers/gemini/images.rb +12 -7
  30. data/lib/llm/providers/gemini/models.rb +3 -3
  31. data/lib/llm/providers/gemini/response_parser/completion_parser.rb +46 -0
  32. data/lib/llm/providers/gemini/response_parser.rb +13 -20
  33. data/lib/llm/providers/gemini.rb +10 -20
  34. data/lib/llm/providers/ollama/format/completion_format.rb +72 -0
  35. data/lib/llm/providers/ollama/format.rb +11 -30
  36. data/lib/llm/providers/ollama/response_parser/completion_parser.rb +42 -0
  37. data/lib/llm/providers/ollama/response_parser.rb +8 -11
  38. data/lib/llm/providers/ollama.rb +9 -17
  39. data/lib/llm/providers/openai/audio.rb +6 -6
  40. data/lib/llm/providers/openai/files.rb +3 -3
  41. data/lib/llm/providers/openai/format/completion_format.rb +83 -0
  42. data/lib/llm/providers/openai/format/respond_format.rb +69 -0
  43. data/lib/llm/providers/openai/format.rb +27 -58
  44. data/lib/llm/providers/openai/images.rb +4 -2
  45. data/lib/llm/providers/openai/response_parser/completion_parser.rb +55 -0
  46. data/lib/llm/providers/openai/response_parser/respond_parser.rb +56 -0
  47. data/lib/llm/providers/openai/response_parser.rb +8 -44
  48. data/lib/llm/providers/openai/responses.rb +13 -14
  49. data/lib/llm/providers/openai.rb +11 -23
  50. data/lib/llm/providers/voyageai.rb +4 -4
  51. data/lib/llm/response/{output.rb → respond.rb} +2 -2
  52. data/lib/llm/response.rb +1 -1
  53. data/lib/llm/version.rb +1 -1
  54. data/lib/llm.rb +38 -10
  55. data/llm.gemspec +1 -0
  56. metadata +28 -3
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Anthropic::Format
4
+ ##
5
+ # @private
6
+ class CompletionFormat
7
+ ##
8
+ # @param [LLM::Message, Hash] message
9
+ # The message to format
10
+ def initialize(message)
11
+ @message = message
12
+ end
13
+
14
+ ##
15
+ # Formats the message for the Anthropic chat completions API
16
+ # @return [Hash]
17
+ def format
18
+ catch(:abort) do
19
+ if Hash === message
20
+ {role: message[:role], content: format_content(message[:content])}
21
+ else
22
+ format_message
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def format_message
30
+ if message.tool_call?
31
+ {role: message.role, content: message.extra[:original_tool_calls]}
32
+ else
33
+ {role: message.role, content: format_content(content)}
34
+ end
35
+ end
36
+
37
+ ##
38
+ # @param [String, URI] content
39
+ # The content to format
40
+ # @return [String, Hash]
41
+ # The formatted content
42
+ def format_content(content)
43
+ case content
44
+ when Hash
45
+ content.empty? ? throw(:abort, nil) : [content]
46
+ when Array
47
+ content.empty? ? throw(:abort, nil) : content.flat_map { format_content(_1) }
48
+ when URI
49
+ [{type: :image, source: {type: "url", url: content.to_s}}]
50
+ when LLM::File
51
+ if content.image?
52
+ [{type: :image, source: {type: "base64", media_type: content.mime_type, data: content.to_b64}}]
53
+ else
54
+ raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
55
+ "is not an image, and therefore not supported by the " \
56
+ "Anthropic API"
57
+ end
58
+ when String
59
+ [{type: :text, text: content}]
60
+ when LLM::Message
61
+ format_content(content.content)
62
+ when LLM::Function::Return
63
+ [{type: "tool_result", tool_use_id: content.id, content: [{type: :text, text: JSON.dump(content.value)}]}]
64
+ else
65
+ raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
66
+ "is not supported by the Anthropic API"
67
+ end
68
+ end
69
+
70
+ def message = @message
71
+ def content = message.content
72
+ end
73
+ end
@@ -4,49 +4,24 @@ class LLM::Anthropic
4
4
  ##
5
5
  # @private
6
6
  module Format
7
+ require_relative "format/completion_format"
8
+
7
9
  ##
8
10
  # @param [Array<LLM::Message>] messages
9
11
  # The messages to format
10
12
  # @return [Array<Hash>]
11
13
  def format(messages)
12
- messages.map do
13
- if Hash === _1
14
- {role: _1[:role], content: format_content(_1[:content])}
15
- else
16
- {role: _1.role, content: format_content(_1.content)}
17
- end
14
+ messages.filter_map do
15
+ CompletionFormat.new(_1).format
18
16
  end
19
17
  end
20
18
 
21
19
  private
22
20
 
23
- ##
24
- # @param [String, URI] content
25
- # The content to format
26
- # @return [String, Hash]
27
- # The formatted content
28
- def format_content(content)
29
- case content
30
- when Array
31
- content.flat_map { format_content(_1) }
32
- when URI
33
- [{type: :image, source: {type: "url", url: content.to_s}}]
34
- when LLM::File
35
- if content.image?
36
- [{type: :image, source: {type: "base64", media_type: content.mime_type, data: content.to_b64}}]
37
- else
38
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
39
- "is not an image, and therefore not supported by the " \
40
- "Anthropic API"
41
- end
42
- when String
43
- [{type: :text, text: content}]
44
- when LLM::Message
45
- format_content(content.content)
46
- else
47
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
48
- "is not supported by the Anthropic API"
49
- end
21
+ def format_tools(params)
22
+ return {} unless params and params[:tools]&.any?
23
+ tools = params[:tools]
24
+ {tools: tools.map { _1.format(self) }}
50
25
  end
51
26
  end
52
27
  end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Anthropic::ResponseParser
4
+ ##
5
+ # @private
6
+ class CompletionParser
7
+ def initialize(body)
8
+ @body = OpenStruct.from_hash(body)
9
+ end
10
+
11
+ def format(response)
12
+ {
13
+ model:,
14
+ prompt_tokens:,
15
+ completion_tokens:,
16
+ total_tokens:,
17
+ choices: format_choices(response)
18
+ }
19
+ end
20
+
21
+ private
22
+
23
+ def format_choices(response)
24
+ texts.map.with_index do |choice, index|
25
+ extra = {index:, response:, tool_calls: format_tool_calls(tools), original_tool_calls: tools}
26
+ LLM::Message.new(role, choice.text, extra)
27
+ end
28
+ end
29
+
30
+ def format_tool_calls(tools)
31
+ (tools || []).filter_map do |tool|
32
+ tool = {
33
+ id: tool.id,
34
+ name: tool.name,
35
+ arguments: tool.input
36
+ }
37
+ OpenStruct.new(tool)
38
+ end
39
+ end
40
+
41
+ def body = @body
42
+ def role = body.role
43
+ def model = body.model
44
+ def prompt_tokens = body.usage.input_tokens
45
+ def completion_tokens = body.usage.output_tokens
46
+ def total_tokens = body.usage.total_tokens
47
+ def parts = body.content
48
+ def texts = parts.select { _1["type"] == "text" }
49
+ def tools = parts.select { _1["type"] == "tool_use" }
50
+ end
51
+ end
@@ -17,15 +17,7 @@ class LLM::Anthropic
17
17
  # The response body from the LLM provider
18
18
  # @return [Hash]
19
19
  def parse_completion(body)
20
- {
21
- model: body["model"],
22
- choices: body["content"].map do
23
- # TODO: don't hardcode role
24
- LLM::Message.new("assistant", _1["text"], {response: self})
25
- end,
26
- prompt_tokens: body.dig("usage", "input_tokens"),
27
- completion_tokens: body.dig("usage", "output_tokens")
28
- }
20
+ CompletionParser.new(body).format(self)
29
21
  end
30
22
  end
31
23
  end
@@ -7,6 +7,7 @@ module LLM
7
7
  class Anthropic < Provider
8
8
  require_relative "anthropic/error_handler"
9
9
  require_relative "anthropic/response_parser"
10
+ require_relative "anthropic/response_parser/completion_parser"
10
11
  require_relative "anthropic/format"
11
12
  require_relative "anthropic/models"
12
13
  include Format
@@ -14,25 +15,25 @@ module LLM
14
15
  HOST = "api.anthropic.com"
15
16
 
16
17
  ##
17
- # @param secret (see LLM::Provider#initialize)
18
- def initialize(secret, **)
19
- super(secret, host: HOST, **)
18
+ # @param key (see LLM::Provider#initialize)
19
+ def initialize(**)
20
+ super(host: HOST, **)
20
21
  end
21
22
 
22
23
  ##
23
24
  # Provides an embedding via VoyageAI per
24
25
  # [Anthropic's recommendation](https://docs.anthropic.com/en/docs/build-with-claude/embeddings)
25
26
  # @param input (see LLM::Provider#embed)
26
- # @param [String] token
27
- # Valid token for the VoyageAI API
27
+ # @param [String] key
28
+ # Valid key for the VoyageAI API
28
29
  # @param [String] model
29
30
  # The embedding model to use
30
31
  # @param [Hash] params
31
32
  # Other embedding parameters
32
33
  # @raise (see LLM::Provider#request)
33
34
  # @return (see LLM::Provider#embed)
34
- def embed(input, token:, model: "voyage-2", **params)
35
- llm = LLM.voyageai(token)
35
+ def embed(input, key:, model: "voyage-2", **params)
36
+ llm = LLM.voyageai(key:)
36
37
  llm.embed(input, **params.merge(model:))
37
38
  end
38
39
 
@@ -40,20 +41,19 @@ module LLM
40
41
  # Provides an interface to the chat completions API
41
42
  # @see https://docs.anthropic.com/en/api/messages Anthropic docs
42
43
  # @param prompt (see LLM::Provider#complete)
43
- # @param role (see LLM::Provider#complete)
44
- # @param model (see LLM::Provider#complete)
45
- # @param max_tokens The maximum number of tokens to generate
46
44
  # @param params (see LLM::Provider#complete)
47
45
  # @example (see LLM::Provider#complete)
48
46
  # @raise (see LLM::Provider#request)
49
47
  # @raise [LLM::Error::PromptError]
50
48
  # When given an object a provider does not understand
51
49
  # @return (see LLM::Provider#complete)
52
- def complete(prompt, role = :user, model: default_model, max_tokens: 1024, **params)
53
- params = {max_tokens:, model:}.merge!(params)
50
+ def complete(prompt, params = {})
51
+ params = {role: :user, model: default_model, max_tokens: 1024}.merge!(params)
52
+ params = [params, format_tools(params)].inject({}, &:merge!).compact
53
+ role = params.delete(:role)
54
54
  req = Net::HTTP::Post.new("/v1/messages", headers)
55
55
  messages = [*(params.delete(:messages) || []), Message.new(role, prompt)]
56
- body = JSON.dump({messages: format(messages)}.merge!(params))
56
+ body = JSON.dump({messages: [format(messages)].flatten}.merge!(params))
57
57
  set_body_stream(req, StringIO.new(body))
58
58
  res = request(@http, req)
59
59
  Response::Completion.new(res).extend(response_parser)
@@ -86,7 +86,7 @@ module LLM
86
86
  def headers
87
87
  {
88
88
  "Content-Type" => "application/json",
89
- "x-api-key" => @secret,
89
+ "x-api-key" => @key,
90
90
  "anthropic-version" => "2023-06-01"
91
91
  }
92
92
  end
@@ -9,7 +9,7 @@ class LLM::Gemini
9
9
  # require "llm"
10
10
  #
11
11
  # llm = LLM.gemini(ENV["KEY"])
12
- # res = llm.audio.create_transcription(input: LLM::File("/rocket.mp3"))
12
+ # res = llm.audio.create_transcription(input: "/audio/rocket.mp3")
13
13
  # res.text # => "A dog on a rocket to the moon"
14
14
  class Audio
15
15
  ##
@@ -31,10 +31,10 @@ class LLM::Gemini
31
31
  # Create an audio transcription
32
32
  # @example
33
33
  # llm = LLM.gemini(ENV["KEY"])
34
- # res = llm.audio.create_transcription(file: LLM::File("/rocket.mp3"))
34
+ # res = llm.audio.create_transcription(file: "/audio/rocket.mp3")
35
35
  # res.text # => "A dog on a rocket to the moon"
36
36
  # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
37
- # @param [LLM::File, LLM::Response::File] file The input audio
37
+ # @param [String, LLM::File, LLM::Response::File] file The input audio
38
38
  # @param [String] model The model to use
39
39
  # @param [Hash] params Other parameters (see Gemini docs)
40
40
  # @raise (see LLM::Provider#request)
@@ -43,8 +43,8 @@ class LLM::Gemini
43
43
  res = @provider.complete [
44
44
  "Your task is to transcribe the contents of an audio file",
45
45
  "Your response should include the transcription, and nothing else",
46
- file
47
- ], :user, model:, **params
46
+ LLM.File(file)
47
+ ], params.merge(role: :user, model:)
48
48
  LLM::Response::AudioTranscription
49
49
  .new(res)
50
50
  .tap { _1.text = res.choices[0].content }
@@ -55,10 +55,10 @@ class LLM::Gemini
55
55
  # @example
56
56
  # # Arabic => English
57
57
  # llm = LLM.gemini(ENV["KEY"])
58
- # res = llm.audio.create_translation(file: LLM::File("/bismillah.mp3"))
58
+ # res = llm.audio.create_translation(file: "/audio/bismillah.mp3")
59
59
  # res.text # => "In the name of Allah, the Beneficent, the Merciful."
60
60
  # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
61
- # @param [LLM::File, LLM::Response::File] file The input audio
61
+ # @param [String, LLM::File, LLM::Response::File] file The input audio
62
62
  # @param [String] model The model to use
63
63
  # @param [Hash] params Other parameters (see Gemini docs)
64
64
  # @raise (see LLM::Provider#request)
@@ -67,8 +67,8 @@ class LLM::Gemini
67
67
  res = @provider.complete [
68
68
  "Your task is to translate the contents of an audio file into English",
69
69
  "Your response should include the translation, and nothing else",
70
- file
71
- ], :user, model:, **params
70
+ LLM.File(file)
71
+ ], params.merge(role: :user, model:)
72
72
  LLM::Response::AudioTranslation
73
73
  .new(res)
74
74
  .tap { _1.text = res.choices[0].content }
@@ -19,7 +19,7 @@ class LLM::Gemini
19
19
  #
20
20
  # llm = LLM.gemini(ENV["KEY"])
21
21
  # bot = LLM::Chat.new(llm).lazy
22
- # file = llm.files.create file: LLM::File("/audio/haiku.mp3")
22
+ # file = llm.files.create file: "/audio/haiku.mp3"
23
23
  # bot.chat(file)
24
24
  # bot.chat("Describe the audio file I sent to you")
25
25
  # bot.chat("The audio file is the first message I sent to you.")
@@ -30,7 +30,7 @@ class LLM::Gemini
30
30
  #
31
31
  # llm = LLM.gemini(ENV["KEY"])
32
32
  # bot = LLM::Chat.new(llm).lazy
33
- # file = llm.files.create file: LLM::File("/audio/haiku.mp3")
33
+ # file = llm.files.create file: "/audio/haiku.mp3"
34
34
  # bot.chat(["Describe the audio file I sent to you", file])
35
35
  # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
36
36
  class Files
@@ -55,7 +55,7 @@ class LLM::Gemini
55
55
  # @raise (see LLM::Provider#request)
56
56
  # @return [LLM::Response::FileList]
57
57
  def all(**params)
58
- query = URI.encode_www_form(params.merge!(key: secret))
58
+ query = URI.encode_www_form(params.merge!(key: key))
59
59
  req = Net::HTTP::Get.new("/v1beta/files?#{query}", headers)
60
60
  res = request(http, req)
61
61
  LLM::Response::FileList.new(res).tap { |filelist|
@@ -71,13 +71,14 @@ class LLM::Gemini
71
71
  # Create a file
72
72
  # @example
73
73
  # llm = LLM.gemini(ENV["KEY"])
74
- # res = llm.files.create file: LLM::File("/audio/haiku.mp3"),
74
+ # res = llm.files.create file: "/audio/haiku.mp3"
75
75
  # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
76
- # @param [File] file The file
76
+ # @param [String, LLM::File] file The file
77
77
  # @param [Hash] params Other parameters (see Gemini docs)
78
78
  # @raise (see LLM::Provider#request)
79
79
  # @return [LLM::Response::File]
80
80
  def create(file:, **params)
81
+ file = LLM.File(file)
81
82
  req = Net::HTTP::Post.new(request_upload_url(file:), {})
82
83
  req["content-length"] = file.bytesize
83
84
  req["X-Goog-Upload-Offset"] = 0
@@ -102,7 +103,7 @@ class LLM::Gemini
102
103
  # @return [LLM::Response::File]
103
104
  def get(file:, **params)
104
105
  file_id = file.respond_to?(:name) ? file.name : file.to_s
105
- query = URI.encode_www_form(params.merge!(key: secret))
106
+ query = URI.encode_www_form(params.merge!(key: key))
106
107
  req = Net::HTTP::Get.new("/v1beta/#{file_id}?#{query}", headers)
107
108
  res = request(http, req)
108
109
  LLM::Response::File.new(res)
@@ -120,7 +121,7 @@ class LLM::Gemini
120
121
  # @return [LLM::Response::File]
121
122
  def delete(file:, **params)
122
123
  file_id = file.respond_to?(:name) ? file.name : file.to_s
123
- query = URI.encode_www_form(params.merge!(key: secret))
124
+ query = URI.encode_www_form(params.merge!(key: key))
124
125
  req = Net::HTTP::Delete.new("/v1beta/#{file_id}?#{query}", headers)
125
126
  request(http, req)
126
127
  end
@@ -137,7 +138,7 @@ class LLM::Gemini
137
138
  include LLM::Utils
138
139
 
139
140
  def request_upload_url(file:)
140
- req = Net::HTTP::Post.new("/upload/v1beta/files?key=#{secret}", headers)
141
+ req = Net::HTTP::Post.new("/upload/v1beta/files?key=#{key}", headers)
141
142
  req["X-Goog-Upload-Protocol"] = "resumable"
142
143
  req["X-Goog-Upload-Command"] = "start"
143
144
  req["X-Goog-Upload-Header-Content-Length"] = file.bytesize
@@ -151,8 +152,8 @@ class LLM::Gemini
151
152
  @provider.instance_variable_get(:@http)
152
153
  end
153
154
 
154
- def secret
155
- @provider.instance_variable_get(:@secret)
155
+ def key
156
+ @provider.instance_variable_get(:@key)
156
157
  end
157
158
 
158
159
  [:headers, :request, :set_body_stream].each do |m|
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Gemini::Format
4
+ ##
5
+ # @private
6
+ class CompletionFormat
7
+ ##
8
+ # @param [LLM::Message, Hash] message
9
+ # The message to format
10
+ def initialize(message)
11
+ @message = message
12
+ end
13
+
14
+ ##
15
+ # Formats the message for the Gemini chat completions API
16
+ # @return [Hash]
17
+ def format
18
+ catch(:abort) do
19
+ if Hash === message
20
+ {role: message[:role], parts: format_content(message[:content])}
21
+ elsif message.tool_call?
22
+ {role: message.role, parts: message.extra[:original_tool_calls].map { {"functionCall" => _1} }}
23
+ else
24
+ {role: message.role, parts: format_content(message.content)}
25
+ end
26
+ end
27
+ end
28
+
29
+ def format_content(content)
30
+ case content
31
+ when Array
32
+ content.empty? ? throw(:abort, nil) : content.flat_map { format_content(_1) }
33
+ when LLM::Response::File
34
+ file = content
35
+ [{file_data: {mime_type: file.mime_type, file_uri: file.uri}}]
36
+ when LLM::File
37
+ file = content
38
+ [{inline_data: {mime_type: file.mime_type, data: file.to_b64}}]
39
+ when String
40
+ [{text: content}]
41
+ when LLM::Message
42
+ format_content(content.content)
43
+ when LLM::Function::Return
44
+ [{text: JSON.dump(content.value)}]
45
+ else
46
+ raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
47
+ "is not supported by the Gemini API"
48
+ end
49
+ end
50
+
51
+ def message = @message
52
+ def content = message.content
53
+ end
54
+ end
@@ -4,45 +4,38 @@ class LLM::Gemini
4
4
  ##
5
5
  # @private
6
6
  module Format
7
+ require_relative "format/completion_format"
8
+
7
9
  ##
8
10
  # @param [Array<LLM::Message>] messages
9
11
  # The messages to format
10
12
  # @return [Array<Hash>]
11
13
  def format(messages)
12
- messages.map do
13
- if Hash === _1
14
- {role: _1[:role], parts: [format_content(_1[:content])]}
15
- else
16
- {role: _1.role, parts: [format_content(_1.content)]}
17
- end
14
+ messages.filter_map do |message|
15
+ CompletionFormat.new(message).format
18
16
  end
19
17
  end
20
18
 
21
19
  private
22
20
 
23
21
  ##
24
- # @param [String, Array, LLM::Response::File, LLM::File] content
25
- # The content to format
22
+ # @param [JSON::Schema] schema
23
+ # The schema to format
26
24
  # @return [Hash]
27
- # The formatted content
28
- def format_content(content)
29
- case content
30
- when Array
31
- content.map { format_content(_1) }
32
- when LLM::Response::File
33
- file = content
34
- {file_data: {mime_type: file.mime_type, file_uri: file.uri}}
35
- when LLM::File
36
- file = content
37
- {inline_data: {mime_type: file.mime_type, data: file.to_b64}}
38
- when String
39
- {text: content}
40
- when LLM::Message
41
- format_content(content.content)
42
- else
43
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
44
- "is not supported by the Gemini API"
45
- end
25
+ def format_schema(params)
26
+ return {} unless params and params[:schema]
27
+ schema = params.delete(:schema)
28
+ {generationConfig: {response_mime_type: "application/json", response_schema: schema}}
29
+ end
30
+
31
+ ##
32
+ # @param [Array<LLM::Function>] tools
33
+ # The tools to format
34
+ # @return [Hash]
35
+ def format_tools(params)
36
+ return {} unless params and params[:tools]&.any?
37
+ functions = params.delete(:tools).grep(LLM::Function)
38
+ {tools: {functionDeclarations: functions.map { _1.format(self) }}}
46
39
  end
47
40
  end
48
41
  end
@@ -41,7 +41,7 @@ class LLM::Gemini
41
41
  # Gemini implements image generation under the hood.
42
42
  # @return [LLM::Response::Image]
43
43
  def create(prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
44
- req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
44
+ req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{key}", headers)
45
45
  body = JSON.dump({
46
46
  contents: [{parts: {text: prompt}}],
47
47
  generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
@@ -58,16 +58,17 @@ class LLM::Gemini
58
58
  # res = llm.images.edit image: LLM::File("cat.png"), prompt: "Add a hat to the cat"
59
59
  # IO.copy_stream res.images[0], "hatoncat.png"
60
60
  # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
61
- # @param [LLM::File] image The image to edit
61
+ # @param [String, LLM::File] image The image to edit
62
62
  # @param [String] prompt The prompt
63
63
  # @param [Hash] params Other parameters (see Gemini docs)
64
64
  # @raise (see LLM::Provider#request)
65
65
  # @note (see LLM::Gemini::Images#create)
66
66
  # @return [LLM::Response::Image]
67
67
  def edit(image:, prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
68
- req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
69
- body = JSON.dump({
70
- contents: [{parts: [{text: prompt}, format_content(image)]}],
68
+ req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{key}", headers)
69
+ image = LLM.File(image)
70
+ body = JSON.dump({
71
+ contents: [{parts: [{text: prompt}, format.format_content(image)]}],
71
72
  generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
72
73
  }.merge!(params)).b
73
74
  set_body_stream(req, StringIO.new(body))
@@ -84,8 +85,12 @@ class LLM::Gemini
84
85
 
85
86
  private
86
87
 
87
- def secret
88
- @provider.instance_variable_get(:@secret)
88
+ def format
89
+ @format ||= CompletionFormat.new(nil)
90
+ end
91
+
92
+ def key
93
+ @provider.instance_variable_get(:@key)
89
94
  end
90
95
 
91
96
  def http
@@ -40,7 +40,7 @@ class LLM::Gemini
40
40
  # @raise (see LLM::Provider#request)
41
41
  # @return [LLM::Response::ModelList]
42
42
  def all(**params)
43
- query = URI.encode_www_form(params.merge!(key: secret))
43
+ query = URI.encode_www_form(params.merge!(key: key))
44
44
  req = Net::HTTP::Get.new("/v1beta/models?#{query}", headers)
45
45
  res = request(http, req)
46
46
  LLM::Response::ModelList.new(res).tap { |modellist|
@@ -58,8 +58,8 @@ class LLM::Gemini
58
58
  @provider.instance_variable_get(:@http)
59
59
  end
60
60
 
61
- def secret
62
- @provider.instance_variable_get(:@secret)
61
+ def key
62
+ @provider.instance_variable_get(:@key)
63
63
  end
64
64
 
65
65
  [:headers, :request].each do |m|
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Gemini::ResponseParser
4
+ class CompletionParser
5
+ def initialize(body)
6
+ @body = OpenStruct.from_hash(body)
7
+ end
8
+
9
+ def format(response)
10
+ {
11
+ model:,
12
+ prompt_tokens:,
13
+ completion_tokens:,
14
+ total_tokens:,
15
+ choices: format_choices(response)
16
+ }
17
+ end
18
+
19
+ private
20
+
21
+ def format_choices(response)
22
+ candidates.map.with_index do |choice, index|
23
+ content = choice.content
24
+ role, parts = content.role, content.parts
25
+ text = parts.filter_map { _1["text"] }.join
26
+ tools = parts.filter_map { _1["functionCall"] }
27
+ extra = {index:, response:, tool_calls: format_tool_calls(tools), original_tool_calls: tools}
28
+ LLM::Message.new(role, text, extra)
29
+ end
30
+ end
31
+
32
+ def format_tool_calls(tools)
33
+ (tools || []).map do |tool|
34
+ function = {name: tool.name, arguments: tool.args}
35
+ OpenStruct.new(function)
36
+ end
37
+ end
38
+
39
+ def body = @body
40
+ def model = body.modelVersion
41
+ def prompt_tokens = body.usageMetadata.promptTokenCount
42
+ def completion_tokens = body.usageMetadata.candidatesTokenCount
43
+ def total_tokens = body.usageMetadata.totalTokenCount
44
+ def candidates = body.candidates
45
+ end
46
+ end