llm.rb 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +318 -110
  3. data/lib/llm/buffer.rb +83 -0
  4. data/lib/llm/chat.rb +131 -0
  5. data/lib/llm/error.rb +3 -3
  6. data/lib/llm/file.rb +36 -40
  7. data/lib/llm/message.rb +21 -8
  8. data/lib/llm/mime.rb +54 -0
  9. data/lib/llm/multipart.rb +100 -0
  10. data/lib/llm/provider.rb +123 -21
  11. data/lib/llm/providers/anthropic/error_handler.rb +3 -1
  12. data/lib/llm/providers/anthropic/format.rb +2 -0
  13. data/lib/llm/providers/anthropic/response_parser.rb +3 -1
  14. data/lib/llm/providers/anthropic.rb +14 -5
  15. data/lib/llm/providers/gemini/audio.rb +77 -0
  16. data/lib/llm/providers/gemini/error_handler.rb +4 -2
  17. data/lib/llm/providers/gemini/files.rb +162 -0
  18. data/lib/llm/providers/gemini/format.rb +12 -6
  19. data/lib/llm/providers/gemini/images.rb +99 -0
  20. data/lib/llm/providers/gemini/response_parser.rb +27 -1
  21. data/lib/llm/providers/gemini.rb +62 -6
  22. data/lib/llm/providers/ollama/error_handler.rb +3 -1
  23. data/lib/llm/providers/ollama/format.rb +13 -5
  24. data/lib/llm/providers/ollama/response_parser.rb +3 -1
  25. data/lib/llm/providers/ollama.rb +30 -7
  26. data/lib/llm/providers/openai/audio.rb +97 -0
  27. data/lib/llm/providers/openai/error_handler.rb +3 -1
  28. data/lib/llm/providers/openai/files.rb +148 -0
  29. data/lib/llm/providers/openai/format.rb +22 -8
  30. data/lib/llm/providers/openai/images.rb +109 -0
  31. data/lib/llm/providers/openai/response_parser.rb +58 -5
  32. data/lib/llm/providers/openai/responses.rb +85 -0
  33. data/lib/llm/providers/openai.rb +52 -6
  34. data/lib/llm/providers/voyageai/error_handler.rb +1 -1
  35. data/lib/llm/providers/voyageai.rb +2 -2
  36. data/lib/llm/response/audio.rb +13 -0
  37. data/lib/llm/response/audio_transcription.rb +14 -0
  38. data/lib/llm/response/audio_translation.rb +14 -0
  39. data/lib/llm/response/download_file.rb +15 -0
  40. data/lib/llm/response/file.rb +42 -0
  41. data/lib/llm/response/filelist.rb +18 -0
  42. data/lib/llm/response/image.rb +29 -0
  43. data/lib/llm/response/output.rb +56 -0
  44. data/lib/llm/response.rb +18 -6
  45. data/lib/llm/utils.rb +19 -0
  46. data/lib/llm/version.rb +1 -1
  47. data/lib/llm.rb +5 -2
  48. data/llm.gemspec +1 -6
  49. data/spec/anthropic/completion_spec.rb +1 -1
  50. data/spec/gemini/completion_spec.rb +1 -1
  51. data/spec/gemini/conversation_spec.rb +31 -0
  52. data/spec/gemini/files_spec.rb +124 -0
  53. data/spec/gemini/images_spec.rb +47 -0
  54. data/spec/llm/conversation_spec.rb +107 -62
  55. data/spec/ollama/completion_spec.rb +1 -1
  56. data/spec/ollama/conversation_spec.rb +31 -0
  57. data/spec/openai/audio_spec.rb +55 -0
  58. data/spec/openai/completion_spec.rb +5 -4
  59. data/spec/openai/files_spec.rb +204 -0
  60. data/spec/openai/images_spec.rb +95 -0
  61. data/spec/openai/responses_spec.rb +51 -0
  62. data/spec/setup.rb +8 -0
  63. metadata +31 -50
  64. data/LICENSE.txt +0 -21
  65. data/lib/llm/conversation.rb +0 -90
  66. data/lib/llm/http_client.rb +0 -29
  67. data/lib/llm/message_queue.rb +0 -54
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Anthropic
4
+ ##
5
+ # @private
4
6
  module ResponseParser
5
7
  def parse_embedding(body)
6
8
  {
@@ -19,7 +21,7 @@ class LLM::Anthropic
19
21
  model: body["model"],
20
22
  choices: body["content"].map do
21
23
  # TODO: don't hardcode role
22
- LLM::Message.new("assistant", _1["text"], {completion: self})
24
+ LLM::Message.new("assistant", _1["text"], {response: self})
23
25
  end,
24
26
  prompt_tokens: body.dig("usage", "input_tokens"),
25
27
  completion_tokens: body.dig("usage", "output_tokens")
@@ -24,21 +24,30 @@ module LLM
24
24
  # @param input (see LLM::Provider#embed)
25
25
  # @param [String] token
26
26
  # Valid token for the VoyageAI API
27
+ # @param [String] model
28
+ # The embedding model to use
27
29
  # @param [Hash] params
28
- # Additional parameters to pass to the API
30
+ # Other embedding parameters
31
+ # @raise (see LLM::Provider#request)
29
32
  # @return (see LLM::Provider#embed)
30
- def embed(input, token:, **params)
33
+ def embed(input, token:, model: "voyage-2", **params)
31
34
  llm = LLM.voyageai(token)
32
- llm.embed(input, **params)
35
+ llm.embed(input, **params.merge(model:))
33
36
  end
34
37
 
35
38
  ##
39
+ # Provides an interface to the chat completions API
36
40
  # @see https://docs.anthropic.com/en/api/messages Anthropic docs
37
41
  # @param prompt (see LLM::Provider#complete)
38
42
  # @param role (see LLM::Provider#complete)
43
+ # @param model (see LLM::Provider#complete)
44
+ # @param max_tokens The maximum number of tokens to generate
45
+ # @param params (see LLM::Provider#complete)
46
+ # @example (see LLM::Provider#complete)
47
+ # @raise (see LLM::Provider#request)
39
48
  # @return (see LLM::Provider#complete)
40
- def complete(prompt, role = :user, **params)
41
- params = {max_tokens: 1024, model: "claude-3-5-sonnet-20240620"}.merge!(params)
49
+ def complete(prompt, role = :user, model: "claude-3-5-sonnet-20240620", max_tokens: 1024, **params)
50
+ params = {max_tokens:, model:}.merge!(params)
42
51
  req = Net::HTTP::Post.new("/v1/messages", headers)
43
52
  messages = [*(params.delete(:messages) || []), Message.new(role, prompt)]
44
53
  req.body = JSON.dump({messages: format(messages)}.merge!(params))
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::Gemini
4
+ ##
5
+ # The {LLM::Gemini::Audio LLM::Gemini::Audio} class provides an audio
6
+ # object for interacting with [Gemini's audio API](https://ai.google.dev/gemini-api/docs/audio).
7
+ # @example
8
+ # #!/usr/bin/env ruby
9
+ # require "llm"
10
+ #
11
+ # llm = LLM.gemini(ENV["KEY"])
12
+ # res = llm.audio.create_transcription(input: LLM::File("/rocket.mp3"))
13
+ # res.text # => "A dog on a rocket to the moon"
14
+ class Audio
15
+ ##
16
+ # Returns a new Audio object
17
+ # @param provider [LLM::Provider]
18
+ # @return [LLM::Gemini::Responses]
19
+ def initialize(provider)
20
+ @provider = provider
21
+ end
22
+
23
+ ##
24
+ # @raise [NotImplementedError]
25
+ # This method is not implemented by Gemini
26
+ def create_speech
27
+ raise NotImplementedError
28
+ end
29
+
30
+ ##
31
+ # Create an audio transcription
32
+ # @example
33
+ # llm = LLM.gemini(ENV["KEY"])
34
+ # res = llm.audio.create_transcription(file: LLM::File("/rocket.mp3"))
35
+ # res.text # => "A dog on a rocket to the moon"
36
+ # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
37
+ # @param [LLM::File, LLM::Response::File] file The input audio
38
+ # @param [String] model The model to use
39
+ # @param [Hash] params Other parameters (see Gemini docs)
40
+ # @raise (see LLM::Provider#request)
41
+ # @return [LLM::Response::AudioTranscription]
42
+ def create_transcription(file:, model: "gemini-1.5-flash", **params)
43
+ res = @provider.complete [
44
+ "Your task is to transcribe the contents of an audio file",
45
+ "Your response should include the transcription, and nothing else",
46
+ file
47
+ ], :user, model:, **params
48
+ LLM::Response::AudioTranscription
49
+ .new(res)
50
+ .tap { _1.text = res.choices[0].content }
51
+ end
52
+
53
+ ##
54
+ # Create an audio translation (in English)
55
+ # @example
56
+ # # Arabic => English
57
+ # llm = LLM.gemini(ENV["KEY"])
58
+ # res = llm.audio.create_translation(file: LLM::File("/bismillah.mp3"))
59
+ # res.text # => "In the name of Allah, the Beneficent, the Merciful."
60
+ # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
61
+ # @param [LLM::File, LLM::Response::File] file The input audio
62
+ # @param [String] model The model to use
63
+ # @param [Hash] params Other parameters (see Gemini docs)
64
+ # @raise (see LLM::Provider#request)
65
+ # @return [LLM::Response::AudioTranslation]
66
+ def create_translation(file:, model: "gemini-1.5-flash", **params)
67
+ res = @provider.complete [
68
+ "Your task is to translate the contents of an audio file into English",
69
+ "Your response should include the translation, and nothing else",
70
+ file
71
+ ], :user, model:, **params
72
+ LLM::Response::AudioTranslation
73
+ .new(res)
74
+ .tap { _1.text = res.choices[0].content }
75
+ end
76
+ end
77
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Gemini
4
+ ##
5
+ # @private
4
6
  class ErrorHandler
5
7
  ##
6
8
  # @return [Net::HTTPResponse]
@@ -25,12 +27,12 @@ class LLM::Gemini
25
27
  if reason == "API_KEY_INVALID"
26
28
  raise LLM::Error::Unauthorized.new { _1.response = res }, "Authentication error"
27
29
  else
28
- raise LLM::Error::BadResponse.new { _1.response = res }, "Unexpected response"
30
+ raise LLM::Error::ResponseError.new { _1.response = res }, "Unexpected response"
29
31
  end
30
32
  when Net::HTTPTooManyRequests
31
33
  raise LLM::Error::RateLimit.new { _1.response = res }, "Too many requests"
32
34
  else
33
- raise LLM::Error::BadResponse.new { _1.response = res }, "Unexpected response"
35
+ raise LLM::Error::ResponseError.new { _1.response = res }, "Unexpected response"
34
36
  end
35
37
  end
36
38
 
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::Gemini
4
+ ##
5
+ # The {LLM::Gemini::Files LLM::Gemini::Files} class provides a files
6
+ # object for interacting with [Gemini's Files API](https://ai.google.dev/gemini-api/docs/files).
7
+ # The files API allows a client to reference media files in prompts
8
+ # where they can be referenced by their URL.
9
+ #
10
+ # The files API is intended to preserve bandwidth and latency,
11
+ # especially for large files but it can be helpful for smaller files
12
+ # as well because it does not require the client to include a file
13
+ # in the prompt over and over again (which could be the case in a
14
+ # multi-turn conversation).
15
+ #
16
+ # @example
17
+ # #!/usr/bin/env ruby
18
+ # require "llm"
19
+ #
20
+ # llm = LLM.gemini(ENV["KEY"])
21
+ # bot = LLM::Chat.new(llm).lazy
22
+ # file = llm.files.create file: LLM::File("/audio/haiku.mp3")
23
+ # bot.chat(file)
24
+ # bot.chat("Describe the audio file I sent to you")
25
+ # bot.chat("The audio file is the first message I sent to you.")
26
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
27
+ # @example
28
+ # #!/usr/bin/env ruby
29
+ # require "llm"
30
+ #
31
+ # llm = LLM.gemini(ENV["KEY"])
32
+ # bot = LLM::Chat.new(llm).lazy
33
+ # file = llm.files.create file: LLM::File("/audio/haiku.mp3")
34
+ # bot.chat(["Describe the audio file I sent to you", file])
35
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
36
+ class Files
37
+ ##
38
+ # Returns a new Files object
39
+ # @param provider [LLM::Provider]
40
+ # @return [LLM::Gemini::Files]
41
+ def initialize(provider)
42
+ @provider = provider
43
+ end
44
+
45
+ ##
46
+ # List all files
47
+ # @example
48
+ # llm = LLM.gemini(ENV["KEY"])
49
+ # res = llm.files.all
50
+ # res.each do |file|
51
+ # print "name: ", file.name, "\n"
52
+ # end
53
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
54
+ # @param [Hash] params Other parameters (see Gemini docs)
55
+ # @raise (see LLM::Provider#request)
56
+ # @return [LLM::Response::FileList]
57
+ def all(**params)
58
+ query = URI.encode_www_form(params.merge!(key: secret))
59
+ req = Net::HTTP::Get.new("/v1beta/files?#{query}", headers)
60
+ res = request(http, req)
61
+ LLM::Response::FileList.new(res).tap { |filelist|
62
+ files = filelist.body["files"]&.map do |file|
63
+ file = file.transform_keys { snakecase(_1) }
64
+ OpenStruct.from_hash(file)
65
+ end || []
66
+ filelist.files = files
67
+ }
68
+ end
69
+
70
+ ##
71
+ # Create a file
72
+ # @example
73
+ # llm = LLM.gemini(ENV["KEY"])
74
+ # res = llm.files.create file: LLM::File("/audio/haiku.mp3"),
75
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
76
+ # @param [File] file The file
77
+ # @param [Hash] params Other parameters (see Gemini docs)
78
+ # @raise (see LLM::Provider#request)
79
+ # @return [LLM::Response::File]
80
+ def create(file:, **params)
81
+ req = Net::HTTP::Post.new(request_upload_url(file:), {})
82
+ req["content-length"] = file.bytesize
83
+ req["X-Goog-Upload-Offset"] = 0
84
+ req["X-Goog-Upload-Command"] = "upload, finalize"
85
+ file.with_io do |io|
86
+ req.body_stream = io
87
+ res = request(http, req)
88
+ LLM::Response::File.new(res)
89
+ end
90
+ end
91
+
92
+ ##
93
+ # Get a file
94
+ # @example
95
+ # llm = LLM.gemini(ENV["KEY"])
96
+ # res = llm.files.get(file: "files/1234567890")
97
+ # print "name: ", res.name, "\n"
98
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
99
+ # @param [#name, String] file The file to get
100
+ # @param [Hash] params Other parameters (see Gemini docs)
101
+ # @raise (see LLM::Provider#request)
102
+ # @return [LLM::Response::File]
103
+ def get(file:, **params)
104
+ file_id = file.respond_to?(:name) ? file.name : file.to_s
105
+ query = URI.encode_www_form(params.merge!(key: secret))
106
+ req = Net::HTTP::Get.new("/v1beta/#{file_id}?#{query}", headers)
107
+ res = request(http, req)
108
+ LLM::Response::File.new(res)
109
+ end
110
+
111
+ ##
112
+ # Delete a file
113
+ # @example
114
+ # llm = LLM.gemini(ENV["KEY"])
115
+ # res = llm.files.delete(file: "files/1234567890")
116
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
117
+ # @param [#name, String] file The file to delete
118
+ # @param [Hash] params Other parameters (see Gemini docs)
119
+ # @raise (see LLM::Provider#request)
120
+ # @return [LLM::Response::File]
121
+ def delete(file:, **params)
122
+ file_id = file.respond_to?(:name) ? file.name : file.to_s
123
+ query = URI.encode_www_form(params.merge!(key: secret))
124
+ req = Net::HTTP::Delete.new("/v1beta/#{file_id}?#{query}", headers)
125
+ request(http, req)
126
+ end
127
+
128
+ ##
129
+ # @raise [NotImplementedError]
130
+ # This method is not implemented by Gemini
131
+ def download
132
+ raise NotImplementedError
133
+ end
134
+
135
+ private
136
+
137
+ include LLM::Utils
138
+
139
+ def request_upload_url(file:)
140
+ req = Net::HTTP::Post.new("/upload/v1beta/files?key=#{secret}", headers)
141
+ req["X-Goog-Upload-Protocol"] = "resumable"
142
+ req["X-Goog-Upload-Command"] = "start"
143
+ req["X-Goog-Upload-Header-Content-Length"] = file.bytesize
144
+ req["X-Goog-Upload-Header-Content-Type"] = file.mime_type
145
+ req.body = JSON.dump(file: {display_name: File.basename(file.path)})
146
+ res = request(http, req)
147
+ res["x-goog-upload-url"]
148
+ end
149
+
150
+ def http
151
+ @provider.instance_variable_get(:@http)
152
+ end
153
+
154
+ def secret
155
+ @provider.instance_variable_get(:@secret)
156
+ end
157
+
158
+ [:headers, :request].each do |m|
159
+ define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
160
+ end
161
+ end
162
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Gemini
4
+ ##
5
+ # @private
4
6
  module Format
5
7
  ##
6
8
  # @param [Array<LLM::Message>] messages
@@ -19,16 +21,20 @@ class LLM::Gemini
19
21
  private
20
22
 
21
23
  ##
22
- # @param [String, LLM::File] content
24
+ # @param [String, Array, LLM::Response::File, LLM::File] content
23
25
  # The content to format
24
- # @return [String, Hash]
26
+ # @return [Hash]
25
27
  # The formatted content
26
28
  def format_content(content)
27
- if LLM::File === content
29
+ case content
30
+ when Array
31
+ content.map { format_content(_1) }
32
+ when LLM::Response::File
28
33
  file = content
29
- {
30
- inline_data: {mime_type: file.mime_type, data: [File.binread(file.path)].pack("m0")}
31
- }
34
+ {file_data: {mime_type: file.mime_type, file_uri: file.uri}}
35
+ when LLM::File
36
+ file = content
37
+ {inline_data: {mime_type: file.mime_type, data: file.to_b64}}
32
38
  else
33
39
  {text: content}
34
40
  end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::Gemini
4
+ ##
5
+ # The {LLM::Gemini::Images LLM::Gemini::Images} class provides an images
6
+ # object for interacting with [Gemini's images API](https://ai.google.dev/gemini-api/docs/image-generation).
7
+ # Please note that unlike OpenAI, which can return either URLs or base64-encoded strings,
8
+ # Gemini's images API will always return an image as a base64 encoded string that
9
+ # can be decoded into binary.
10
+ # @example
11
+ # #!/usr/bin/env ruby
12
+ # require "llm"
13
+ #
14
+ # llm = LLM.gemini(ENV["KEY"])
15
+ # res = llm.images.create prompt: "A dog on a rocket to the moon"
16
+ # File.binwrite "rocket.png", res.images[0].binary
17
+ class Images
18
+ include Format
19
+
20
+ ##
21
+ # Returns a new Images object
22
+ # @param provider [LLM::Provider]
23
+ # @return [LLM::Gemini::Responses]
24
+ def initialize(provider)
25
+ @provider = provider
26
+ end
27
+
28
+ ##
29
+ # Create an image
30
+ # @example
31
+ # llm = LLM.gemini(ENV["KEY"])
32
+ # res = llm.images.create prompt: "A dog on a rocket to the moon"
33
+ # File.binwrite "rocket.png", res.images[0].binary
34
+ # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
35
+ # @param [String] prompt The prompt
36
+ # @param [Hash] params Other parameters (see Gemini docs)
37
+ # @raise (see LLM::Provider#request)
38
+ # @note
39
+ # The prompt should make it clear you want to generate an image, or you
40
+ # might unexpectedly receive a purely textual response. This is due to how
41
+ # Gemini implements image generation under the hood.
42
+ # @return [LLM::Response::Image]
43
+ def create(prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
44
+ req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
45
+ body = JSON.dump({
46
+ contents: [{parts: {text: prompt}}],
47
+ generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
48
+ }.merge!(params))
49
+ req.body = body
50
+ res = request(http, req)
51
+ LLM::Response::Image.new(res).extend(response_parser)
52
+ end
53
+
54
+ ##
55
+ # Edit an image
56
+ # @example
57
+ # llm = LLM.gemini(ENV["KEY"])
58
+ # res = llm.images.edit image: LLM::File("cat.png"), prompt: "Add a hat to the cat"
59
+ # File.binwrite "hatoncat.png", res.images[0].binary
60
+ # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
61
+ # @param [LLM::File] image The image to edit
62
+ # @param [String] prompt The prompt
63
+ # @param [Hash] params Other parameters (see Gemini docs)
64
+ # @raise (see LLM::Provider#request)
65
+ # @note (see LLM::Gemini::Images#create)
66
+ # @return [LLM::Response::Image]
67
+ def edit(image:, prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
68
+ req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
69
+ body = JSON.dump({
70
+ contents: [{parts: [{text: prompt}, format_content(image)]}],
71
+ generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
72
+ }.merge!(params)).b
73
+ req.body_stream = StringIO.new(body)
74
+ res = request(http, req)
75
+ LLM::Response::Image.new(res).extend(response_parser)
76
+ end
77
+
78
+ ##
79
+ # @raise [NotImplementedError]
80
+ # This method is not implemented by Gemini
81
+ def create_variation
82
+ raise NotImplementedError
83
+ end
84
+
85
+ private
86
+
87
+ def secret
88
+ @provider.instance_variable_get(:@secret)
89
+ end
90
+
91
+ def http
92
+ @provider.instance_variable_get(:@http)
93
+ end
94
+
95
+ [:response_parser, :headers, :request].each do |m|
96
+ define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
97
+ end
98
+ end
99
+ end
@@ -1,7 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Gemini
4
+ ##
5
+ # @private
4
6
  module ResponseParser
7
+ ##
8
+ # @param [Hash] body
9
+ # The response body from the LLM provider
10
+ # @return [Hash]
5
11
  def parse_embedding(body)
6
12
  {
7
13
  model: "text-embedding-004",
@@ -20,12 +26,32 @@ class LLM::Gemini
20
26
  LLM::Message.new(
21
27
  _1.dig("content", "role"),
22
28
  _1.dig("content", "parts", 0, "text"),
23
- {completion: self}
29
+ {response: self}
24
30
  )
25
31
  end,
26
32
  prompt_tokens: body.dig("usageMetadata", "promptTokenCount"),
27
33
  completion_tokens: body.dig("usageMetadata", "candidatesTokenCount")
28
34
  }
29
35
  end
36
+
37
+ ##
38
+ # @param [Hash] body
39
+ # The response body from the LLM provider
40
+ # @return [Hash]
41
+ def parse_image(body)
42
+ {
43
+ urls: [],
44
+ images: body["candidates"].flat_map do |candidate|
45
+ candidate["content"]["parts"].filter_map do
46
+ next unless _1.dig("inlineData", "data")
47
+ OpenStruct.from_hash(
48
+ mime_type: _1["inlineData"]["mimeType"],
49
+ encoded: _1["inlineData"]["data"],
50
+ binary: _1["inlineData"]["data"].unpack1("m0")
51
+ )
52
+ end
53
+ end
54
+ }
55
+ end
30
56
  end
31
57
  end
@@ -3,11 +3,37 @@
3
3
  module LLM
4
4
  ##
5
5
  # The Gemini class implements a provider for
6
- # [Gemini](https://ai.google.dev/)
6
+ # [Gemini](https://ai.google.dev/).
7
+ #
8
+ # The Gemini provider can accept multiple inputs (text, images,
9
+ # audio, and video). The inputs can be provided inline via the
10
+ # prompt for files under 20MB or via the Gemini Files API for
11
+ # files that are over 20MB
12
+ #
13
+ # @example
14
+ # #!/usr/bin/env ruby
15
+ # require "llm"
16
+ #
17
+ # llm = LLM.gemini(ENV["KEY"])
18
+ # bot = LLM::Chat.new(llm).lazy
19
+ # bot.chat LLM::File("/images/capybara.png")
20
+ # bot.chat "Describe the image"
21
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
22
+ # @example
23
+ # #!/usr/bin/env ruby
24
+ # require "llm"
25
+ #
26
+ # llm = LLM.gemini(ENV["KEY"])
27
+ # bot = LLM::Chat.new(llm).lazy
28
+ # bot.chat ["Describe the image", LLM::File("/images/capybara.png")]
29
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
7
30
  class Gemini < Provider
8
31
  require_relative "gemini/error_handler"
9
32
  require_relative "gemini/response_parser"
10
33
  require_relative "gemini/format"
34
+ require_relative "gemini/images"
35
+ require_relative "gemini/files"
36
+ require_relative "gemini/audio"
11
37
  include Format
12
38
 
13
39
  HOST = "generativelanguage.googleapis.com"
@@ -19,10 +45,14 @@ module LLM
19
45
  end
20
46
 
21
47
  ##
48
+ # Provides an embedding
22
49
  # @param input (see LLM::Provider#embed)
50
+ # @param model (see LLM::Provider#embed)
51
+ # @param params (see LLM::Provider#embed)
52
+ # @raise (see LLM::Provider#request)
23
53
  # @return (see LLM::Provider#embed)
24
- def embed(input, **params)
25
- path = ["/v1beta/models/text-embedding-004", "embedContent?key=#{@secret}"].join(":")
54
+ def embed(input, model: "text-embedding-004", **params)
55
+ path = ["/v1beta/models/#{model}", "embedContent?key=#{@secret}"].join(":")
26
56
  req = Net::HTTP::Post.new(path, headers)
27
57
  req.body = JSON.dump({content: {parts: [{text: input}]}})
28
58
  res = request(@http, req)
@@ -30,13 +60,17 @@ module LLM
30
60
  end
31
61
 
32
62
  ##
63
+ # Provides an interface to the chat completions API
33
64
  # @see https://ai.google.dev/api/generate-content#v1beta.models.generateContent Gemini docs
34
65
  # @param prompt (see LLM::Provider#complete)
35
66
  # @param role (see LLM::Provider#complete)
67
+ # @param model (see LLM::Provider#complete)
68
+ # @param params (see LLM::Provider#complete)
69
+ # @example (see LLM::Provider#complete)
70
+ # @raise (see LLM::Provider#request)
36
71
  # @return (see LLM::Provider#complete)
37
- def complete(prompt, role = :user, **params)
38
- params = {model: "gemini-1.5-flash"}.merge!(params)
39
- path = ["/v1beta/models/#{params.delete(:model)}", "generateContent?key=#{@secret}"].join(":")
72
+ def complete(prompt, role = :user, model: "gemini-1.5-flash", **params)
73
+ path = ["/v1beta/models/#{model}", "generateContent?key=#{@secret}"].join(":")
40
74
  req = Net::HTTP::Post.new(path, headers)
41
75
  messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
42
76
  req.body = JSON.dump({contents: format(messages)})
@@ -44,6 +78,28 @@ module LLM
44
78
  Response::Completion.new(res).extend(response_parser)
45
79
  end
46
80
 
81
+ ##
82
+ # Provides an interface to Gemini's audio API
83
+ # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
84
+ def audio
85
+ LLM::Gemini::Audio.new(self)
86
+ end
87
+
88
+ ##
89
+ # Provides an interface to Gemini's image generation API
90
+ # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
91
+ # @return [see LLM::Gemini::Images]
92
+ def images
93
+ LLM::Gemini::Images.new(self)
94
+ end
95
+
96
+ ##
97
+ # Provides an interface to Gemini's file management API
98
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
99
+ def files
100
+ LLM::Gemini::Files.new(self)
101
+ end
102
+
47
103
  ##
48
104
  # @return (see LLM::Provider#assistant_role)
49
105
  def assistant_role
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Ollama
4
+ ##
5
+ # @private
4
6
  class ErrorHandler
5
7
  ##
6
8
  # @return [Net::HTTPResponse]
@@ -25,7 +27,7 @@ class LLM::Ollama
25
27
  when Net::HTTPTooManyRequests
26
28
  raise LLM::Error::RateLimit.new { _1.response = res }, "Too many requests"
27
29
  else
28
- raise LLM::Error::BadResponse.new { _1.response = res }, "Unexpected response"
30
+ raise LLM::Error::ResponseError.new { _1.response = res }, "Unexpected response"
29
31
  end
30
32
  end
31
33
  end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Ollama
4
+ ##
5
+ # @private
4
6
  module Format
5
7
  ##
6
8
  # @param [Array<LLM::Message>] messages
@@ -9,9 +11,11 @@ class LLM::Ollama
9
11
  def format(messages)
10
12
  messages.map do
11
13
  if Hash === _1
12
- {role: _1[:role], content: format_content(_1[:content])}
14
+ {role: _1[:role]}
15
+ .merge!(_1)
16
+ .merge!(format_content(_1[:content]))
13
17
  else
14
- {role: _1.role, content: format_content(_1.content)}
18
+ {role: _1.role}.merge! format_content(_1.content)
15
19
  end
16
20
  end
17
21
  end
@@ -24,10 +28,14 @@ class LLM::Ollama
24
28
  # @return [String, Hash]
25
29
  # The formatted content
26
30
  def format_content(content)
27
- if URI === content
28
- [{type: :image_url, image_url: {url: content.to_s}}]
31
+ if LLM::File === content
32
+ if content.image?
33
+ {content: "This message has an image associated with it", images: [content.to_b64]}
34
+ else
35
+ raise TypeError, "'#{content.path}' was not recognized as an image file."
36
+ end
29
37
  else
30
- content
38
+ {content:}
31
39
  end
32
40
  end
33
41
  end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Ollama
4
+ ##
5
+ # @private
4
6
  module ResponseParser
5
7
  ##
6
8
  # @param [Hash] body
@@ -22,7 +24,7 @@ class LLM::Ollama
22
24
  def parse_completion(body)
23
25
  {
24
26
  model: body["model"],
25
- choices: [LLM::Message.new(*body["message"].values_at("role", "content"), {completion: self})],
27
+ choices: [LLM::Message.new(*body["message"].values_at("role", "content"), {response: self})],
26
28
  prompt_tokens: body.dig("prompt_eval_count"),
27
29
  completion_tokens: body.dig("eval_count")
28
30
  }