llm.rb 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +318 -110
- data/lib/llm/buffer.rb +83 -0
- data/lib/llm/chat.rb +131 -0
- data/lib/llm/error.rb +3 -3
- data/lib/llm/file.rb +36 -40
- data/lib/llm/message.rb +21 -8
- data/lib/llm/mime.rb +54 -0
- data/lib/llm/multipart.rb +100 -0
- data/lib/llm/provider.rb +123 -21
- data/lib/llm/providers/anthropic/error_handler.rb +3 -1
- data/lib/llm/providers/anthropic/format.rb +2 -0
- data/lib/llm/providers/anthropic/response_parser.rb +3 -1
- data/lib/llm/providers/anthropic.rb +14 -5
- data/lib/llm/providers/gemini/audio.rb +77 -0
- data/lib/llm/providers/gemini/error_handler.rb +4 -2
- data/lib/llm/providers/gemini/files.rb +162 -0
- data/lib/llm/providers/gemini/format.rb +12 -6
- data/lib/llm/providers/gemini/images.rb +99 -0
- data/lib/llm/providers/gemini/response_parser.rb +27 -1
- data/lib/llm/providers/gemini.rb +62 -6
- data/lib/llm/providers/ollama/error_handler.rb +3 -1
- data/lib/llm/providers/ollama/format.rb +13 -5
- data/lib/llm/providers/ollama/response_parser.rb +3 -1
- data/lib/llm/providers/ollama.rb +30 -7
- data/lib/llm/providers/openai/audio.rb +97 -0
- data/lib/llm/providers/openai/error_handler.rb +3 -1
- data/lib/llm/providers/openai/files.rb +148 -0
- data/lib/llm/providers/openai/format.rb +22 -8
- data/lib/llm/providers/openai/images.rb +109 -0
- data/lib/llm/providers/openai/response_parser.rb +58 -5
- data/lib/llm/providers/openai/responses.rb +85 -0
- data/lib/llm/providers/openai.rb +52 -6
- data/lib/llm/providers/voyageai/error_handler.rb +1 -1
- data/lib/llm/providers/voyageai.rb +2 -2
- data/lib/llm/response/audio.rb +13 -0
- data/lib/llm/response/audio_transcription.rb +14 -0
- data/lib/llm/response/audio_translation.rb +14 -0
- data/lib/llm/response/download_file.rb +15 -0
- data/lib/llm/response/file.rb +42 -0
- data/lib/llm/response/filelist.rb +18 -0
- data/lib/llm/response/image.rb +29 -0
- data/lib/llm/response/output.rb +56 -0
- data/lib/llm/response.rb +18 -6
- data/lib/llm/utils.rb +19 -0
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +5 -2
- data/llm.gemspec +1 -6
- data/spec/anthropic/completion_spec.rb +1 -1
- data/spec/gemini/completion_spec.rb +1 -1
- data/spec/gemini/conversation_spec.rb +31 -0
- data/spec/gemini/files_spec.rb +124 -0
- data/spec/gemini/images_spec.rb +47 -0
- data/spec/llm/conversation_spec.rb +107 -62
- data/spec/ollama/completion_spec.rb +1 -1
- data/spec/ollama/conversation_spec.rb +31 -0
- data/spec/openai/audio_spec.rb +55 -0
- data/spec/openai/completion_spec.rb +5 -4
- data/spec/openai/files_spec.rb +204 -0
- data/spec/openai/images_spec.rb +95 -0
- data/spec/openai/responses_spec.rb +51 -0
- data/spec/setup.rb +8 -0
- metadata +31 -50
- data/LICENSE.txt +0 -21
- data/lib/llm/conversation.rb +0 -90
- data/lib/llm/http_client.rb +0 -29
- data/lib/llm/message_queue.rb +0 -54
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Anthropic
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
module ResponseParser
|
5
7
|
def parse_embedding(body)
|
6
8
|
{
|
@@ -19,7 +21,7 @@ class LLM::Anthropic
|
|
19
21
|
model: body["model"],
|
20
22
|
choices: body["content"].map do
|
21
23
|
# TODO: don't hardcode role
|
22
|
-
LLM::Message.new("assistant", _1["text"], {
|
24
|
+
LLM::Message.new("assistant", _1["text"], {response: self})
|
23
25
|
end,
|
24
26
|
prompt_tokens: body.dig("usage", "input_tokens"),
|
25
27
|
completion_tokens: body.dig("usage", "output_tokens")
|
@@ -24,21 +24,30 @@ module LLM
|
|
24
24
|
# @param input (see LLM::Provider#embed)
|
25
25
|
# @param [String] token
|
26
26
|
# Valid token for the VoyageAI API
|
27
|
+
# @param [String] model
|
28
|
+
# The embedding model to use
|
27
29
|
# @param [Hash] params
|
28
|
-
#
|
30
|
+
# Other embedding parameters
|
31
|
+
# @raise (see LLM::Provider#request)
|
29
32
|
# @return (see LLM::Provider#embed)
|
30
|
-
def embed(input, token:, **params)
|
33
|
+
def embed(input, token:, model: "voyage-2", **params)
|
31
34
|
llm = LLM.voyageai(token)
|
32
|
-
llm.embed(input, **params)
|
35
|
+
llm.embed(input, **params.merge(model:))
|
33
36
|
end
|
34
37
|
|
35
38
|
##
|
39
|
+
# Provides an interface to the chat completions API
|
36
40
|
# @see https://docs.anthropic.com/en/api/messages Anthropic docs
|
37
41
|
# @param prompt (see LLM::Provider#complete)
|
38
42
|
# @param role (see LLM::Provider#complete)
|
43
|
+
# @param model (see LLM::Provider#complete)
|
44
|
+
# @param max_tokens The maximum number of tokens to generate
|
45
|
+
# @param params (see LLM::Provider#complete)
|
46
|
+
# @example (see LLM::Provider#complete)
|
47
|
+
# @raise (see LLM::Provider#request)
|
39
48
|
# @return (see LLM::Provider#complete)
|
40
|
-
def complete(prompt, role = :user, **params)
|
41
|
-
params = {max_tokens
|
49
|
+
def complete(prompt, role = :user, model: "claude-3-5-sonnet-20240620", max_tokens: 1024, **params)
|
50
|
+
params = {max_tokens:, model:}.merge!(params)
|
42
51
|
req = Net::HTTP::Post.new("/v1/messages", headers)
|
43
52
|
messages = [*(params.delete(:messages) || []), Message.new(role, prompt)]
|
44
53
|
req.body = JSON.dump({messages: format(messages)}.merge!(params))
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# The {LLM::Gemini::Audio LLM::Gemini::Audio} class provides an audio
|
6
|
+
# object for interacting with [Gemini's audio API](https://ai.google.dev/gemini-api/docs/audio).
|
7
|
+
# @example
|
8
|
+
# #!/usr/bin/env ruby
|
9
|
+
# require "llm"
|
10
|
+
#
|
11
|
+
# llm = LLM.gemini(ENV["KEY"])
|
12
|
+
# res = llm.audio.create_transcription(input: LLM::File("/rocket.mp3"))
|
13
|
+
# res.text # => "A dog on a rocket to the moon"
|
14
|
+
class Audio
|
15
|
+
##
|
16
|
+
# Returns a new Audio object
|
17
|
+
# @param provider [LLM::Provider]
|
18
|
+
# @return [LLM::Gemini::Responses]
|
19
|
+
def initialize(provider)
|
20
|
+
@provider = provider
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# @raise [NotImplementedError]
|
25
|
+
# This method is not implemented by Gemini
|
26
|
+
def create_speech
|
27
|
+
raise NotImplementedError
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Create an audio transcription
|
32
|
+
# @example
|
33
|
+
# llm = LLM.gemini(ENV["KEY"])
|
34
|
+
# res = llm.audio.create_transcription(file: LLM::File("/rocket.mp3"))
|
35
|
+
# res.text # => "A dog on a rocket to the moon"
|
36
|
+
# @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
|
37
|
+
# @param [LLM::File, LLM::Response::File] file The input audio
|
38
|
+
# @param [String] model The model to use
|
39
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
40
|
+
# @raise (see LLM::Provider#request)
|
41
|
+
# @return [LLM::Response::AudioTranscription]
|
42
|
+
def create_transcription(file:, model: "gemini-1.5-flash", **params)
|
43
|
+
res = @provider.complete [
|
44
|
+
"Your task is to transcribe the contents of an audio file",
|
45
|
+
"Your response should include the transcription, and nothing else",
|
46
|
+
file
|
47
|
+
], :user, model:, **params
|
48
|
+
LLM::Response::AudioTranscription
|
49
|
+
.new(res)
|
50
|
+
.tap { _1.text = res.choices[0].content }
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Create an audio translation (in English)
|
55
|
+
# @example
|
56
|
+
# # Arabic => English
|
57
|
+
# llm = LLM.gemini(ENV["KEY"])
|
58
|
+
# res = llm.audio.create_translation(file: LLM::File("/bismillah.mp3"))
|
59
|
+
# res.text # => "In the name of Allah, the Beneficent, the Merciful."
|
60
|
+
# @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
|
61
|
+
# @param [LLM::File, LLM::Response::File] file The input audio
|
62
|
+
# @param [String] model The model to use
|
63
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
64
|
+
# @raise (see LLM::Provider#request)
|
65
|
+
# @return [LLM::Response::AudioTranslation]
|
66
|
+
def create_translation(file:, model: "gemini-1.5-flash", **params)
|
67
|
+
res = @provider.complete [
|
68
|
+
"Your task is to translate the contents of an audio file into English",
|
69
|
+
"Your response should include the translation, and nothing else",
|
70
|
+
file
|
71
|
+
], :user, model:, **params
|
72
|
+
LLM::Response::AudioTranslation
|
73
|
+
.new(res)
|
74
|
+
.tap { _1.text = res.choices[0].content }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
class ErrorHandler
|
5
7
|
##
|
6
8
|
# @return [Net::HTTPResponse]
|
@@ -25,12 +27,12 @@ class LLM::Gemini
|
|
25
27
|
if reason == "API_KEY_INVALID"
|
26
28
|
raise LLM::Error::Unauthorized.new { _1.response = res }, "Authentication error"
|
27
29
|
else
|
28
|
-
raise LLM::Error::
|
30
|
+
raise LLM::Error::ResponseError.new { _1.response = res }, "Unexpected response"
|
29
31
|
end
|
30
32
|
when Net::HTTPTooManyRequests
|
31
33
|
raise LLM::Error::RateLimit.new { _1.response = res }, "Too many requests"
|
32
34
|
else
|
33
|
-
raise LLM::Error::
|
35
|
+
raise LLM::Error::ResponseError.new { _1.response = res }, "Unexpected response"
|
34
36
|
end
|
35
37
|
end
|
36
38
|
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# The {LLM::Gemini::Files LLM::Gemini::Files} class provides a files
|
6
|
+
# object for interacting with [Gemini's Files API](https://ai.google.dev/gemini-api/docs/files).
|
7
|
+
# The files API allows a client to reference media files in prompts
|
8
|
+
# where they can be referenced by their URL.
|
9
|
+
#
|
10
|
+
# The files API is intended to preserve bandwidth and latency,
|
11
|
+
# especially for large files but it can be helpful for smaller files
|
12
|
+
# as well because it does not require the client to include a file
|
13
|
+
# in the prompt over and over again (which could be the case in a
|
14
|
+
# multi-turn conversation).
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# #!/usr/bin/env ruby
|
18
|
+
# require "llm"
|
19
|
+
#
|
20
|
+
# llm = LLM.gemini(ENV["KEY"])
|
21
|
+
# bot = LLM::Chat.new(llm).lazy
|
22
|
+
# file = llm.files.create file: LLM::File("/audio/haiku.mp3")
|
23
|
+
# bot.chat(file)
|
24
|
+
# bot.chat("Describe the audio file I sent to you")
|
25
|
+
# bot.chat("The audio file is the first message I sent to you.")
|
26
|
+
# bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
27
|
+
# @example
|
28
|
+
# #!/usr/bin/env ruby
|
29
|
+
# require "llm"
|
30
|
+
#
|
31
|
+
# llm = LLM.gemini(ENV["KEY"])
|
32
|
+
# bot = LLM::Chat.new(llm).lazy
|
33
|
+
# file = llm.files.create file: LLM::File("/audio/haiku.mp3")
|
34
|
+
# bot.chat(["Describe the audio file I sent to you", file])
|
35
|
+
# bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
36
|
+
class Files
|
37
|
+
##
|
38
|
+
# Returns a new Files object
|
39
|
+
# @param provider [LLM::Provider]
|
40
|
+
# @return [LLM::Gemini::Files]
|
41
|
+
def initialize(provider)
|
42
|
+
@provider = provider
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# List all files
|
47
|
+
# @example
|
48
|
+
# llm = LLM.gemini(ENV["KEY"])
|
49
|
+
# res = llm.files.all
|
50
|
+
# res.each do |file|
|
51
|
+
# print "name: ", file.name, "\n"
|
52
|
+
# end
|
53
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
54
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
55
|
+
# @raise (see LLM::Provider#request)
|
56
|
+
# @return [LLM::Response::FileList]
|
57
|
+
def all(**params)
|
58
|
+
query = URI.encode_www_form(params.merge!(key: secret))
|
59
|
+
req = Net::HTTP::Get.new("/v1beta/files?#{query}", headers)
|
60
|
+
res = request(http, req)
|
61
|
+
LLM::Response::FileList.new(res).tap { |filelist|
|
62
|
+
files = filelist.body["files"]&.map do |file|
|
63
|
+
file = file.transform_keys { snakecase(_1) }
|
64
|
+
OpenStruct.from_hash(file)
|
65
|
+
end || []
|
66
|
+
filelist.files = files
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Create a file
|
72
|
+
# @example
|
73
|
+
# llm = LLM.gemini(ENV["KEY"])
|
74
|
+
# res = llm.files.create file: LLM::File("/audio/haiku.mp3"),
|
75
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
76
|
+
# @param [File] file The file
|
77
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
78
|
+
# @raise (see LLM::Provider#request)
|
79
|
+
# @return [LLM::Response::File]
|
80
|
+
def create(file:, **params)
|
81
|
+
req = Net::HTTP::Post.new(request_upload_url(file:), {})
|
82
|
+
req["content-length"] = file.bytesize
|
83
|
+
req["X-Goog-Upload-Offset"] = 0
|
84
|
+
req["X-Goog-Upload-Command"] = "upload, finalize"
|
85
|
+
file.with_io do |io|
|
86
|
+
req.body_stream = io
|
87
|
+
res = request(http, req)
|
88
|
+
LLM::Response::File.new(res)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
##
|
93
|
+
# Get a file
|
94
|
+
# @example
|
95
|
+
# llm = LLM.gemini(ENV["KEY"])
|
96
|
+
# res = llm.files.get(file: "files/1234567890")
|
97
|
+
# print "name: ", res.name, "\n"
|
98
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
99
|
+
# @param [#name, String] file The file to get
|
100
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
101
|
+
# @raise (see LLM::Provider#request)
|
102
|
+
# @return [LLM::Response::File]
|
103
|
+
def get(file:, **params)
|
104
|
+
file_id = file.respond_to?(:name) ? file.name : file.to_s
|
105
|
+
query = URI.encode_www_form(params.merge!(key: secret))
|
106
|
+
req = Net::HTTP::Get.new("/v1beta/#{file_id}?#{query}", headers)
|
107
|
+
res = request(http, req)
|
108
|
+
LLM::Response::File.new(res)
|
109
|
+
end
|
110
|
+
|
111
|
+
##
|
112
|
+
# Delete a file
|
113
|
+
# @example
|
114
|
+
# llm = LLM.gemini(ENV["KEY"])
|
115
|
+
# res = llm.files.delete(file: "files/1234567890")
|
116
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
117
|
+
# @param [#name, String] file The file to delete
|
118
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
119
|
+
# @raise (see LLM::Provider#request)
|
120
|
+
# @return [LLM::Response::File]
|
121
|
+
def delete(file:, **params)
|
122
|
+
file_id = file.respond_to?(:name) ? file.name : file.to_s
|
123
|
+
query = URI.encode_www_form(params.merge!(key: secret))
|
124
|
+
req = Net::HTTP::Delete.new("/v1beta/#{file_id}?#{query}", headers)
|
125
|
+
request(http, req)
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# @raise [NotImplementedError]
|
130
|
+
# This method is not implemented by Gemini
|
131
|
+
def download
|
132
|
+
raise NotImplementedError
|
133
|
+
end
|
134
|
+
|
135
|
+
private
|
136
|
+
|
137
|
+
include LLM::Utils
|
138
|
+
|
139
|
+
def request_upload_url(file:)
|
140
|
+
req = Net::HTTP::Post.new("/upload/v1beta/files?key=#{secret}", headers)
|
141
|
+
req["X-Goog-Upload-Protocol"] = "resumable"
|
142
|
+
req["X-Goog-Upload-Command"] = "start"
|
143
|
+
req["X-Goog-Upload-Header-Content-Length"] = file.bytesize
|
144
|
+
req["X-Goog-Upload-Header-Content-Type"] = file.mime_type
|
145
|
+
req.body = JSON.dump(file: {display_name: File.basename(file.path)})
|
146
|
+
res = request(http, req)
|
147
|
+
res["x-goog-upload-url"]
|
148
|
+
end
|
149
|
+
|
150
|
+
def http
|
151
|
+
@provider.instance_variable_get(:@http)
|
152
|
+
end
|
153
|
+
|
154
|
+
def secret
|
155
|
+
@provider.instance_variable_get(:@secret)
|
156
|
+
end
|
157
|
+
|
158
|
+
[:headers, :request].each do |m|
|
159
|
+
define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
module Format
|
5
7
|
##
|
6
8
|
# @param [Array<LLM::Message>] messages
|
@@ -19,16 +21,20 @@ class LLM::Gemini
|
|
19
21
|
private
|
20
22
|
|
21
23
|
##
|
22
|
-
# @param [String, LLM::File] content
|
24
|
+
# @param [String, Array, LLM::Response::File, LLM::File] content
|
23
25
|
# The content to format
|
24
|
-
# @return [
|
26
|
+
# @return [Hash]
|
25
27
|
# The formatted content
|
26
28
|
def format_content(content)
|
27
|
-
|
29
|
+
case content
|
30
|
+
when Array
|
31
|
+
content.map { format_content(_1) }
|
32
|
+
when LLM::Response::File
|
28
33
|
file = content
|
29
|
-
{
|
30
|
-
|
31
|
-
|
34
|
+
{file_data: {mime_type: file.mime_type, file_uri: file.uri}}
|
35
|
+
when LLM::File
|
36
|
+
file = content
|
37
|
+
{inline_data: {mime_type: file.mime_type, data: file.to_b64}}
|
32
38
|
else
|
33
39
|
{text: content}
|
34
40
|
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# The {LLM::Gemini::Images LLM::Gemini::Images} class provides an images
|
6
|
+
# object for interacting with [Gemini's images API](https://ai.google.dev/gemini-api/docs/image-generation).
|
7
|
+
# Please note that unlike OpenAI, which can return either URLs or base64-encoded strings,
|
8
|
+
# Gemini's images API will always return an image as a base64 encoded string that
|
9
|
+
# can be decoded into binary.
|
10
|
+
# @example
|
11
|
+
# #!/usr/bin/env ruby
|
12
|
+
# require "llm"
|
13
|
+
#
|
14
|
+
# llm = LLM.gemini(ENV["KEY"])
|
15
|
+
# res = llm.images.create prompt: "A dog on a rocket to the moon"
|
16
|
+
# File.binwrite "rocket.png", res.images[0].binary
|
17
|
+
class Images
|
18
|
+
include Format
|
19
|
+
|
20
|
+
##
|
21
|
+
# Returns a new Images object
|
22
|
+
# @param provider [LLM::Provider]
|
23
|
+
# @return [LLM::Gemini::Responses]
|
24
|
+
def initialize(provider)
|
25
|
+
@provider = provider
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# Create an image
|
30
|
+
# @example
|
31
|
+
# llm = LLM.gemini(ENV["KEY"])
|
32
|
+
# res = llm.images.create prompt: "A dog on a rocket to the moon"
|
33
|
+
# File.binwrite "rocket.png", res.images[0].binary
|
34
|
+
# @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
|
35
|
+
# @param [String] prompt The prompt
|
36
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
37
|
+
# @raise (see LLM::Provider#request)
|
38
|
+
# @note
|
39
|
+
# The prompt should make it clear you want to generate an image, or you
|
40
|
+
# might unexpectedly receive a purely textual response. This is due to how
|
41
|
+
# Gemini implements image generation under the hood.
|
42
|
+
# @return [LLM::Response::Image]
|
43
|
+
def create(prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
|
44
|
+
req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
|
45
|
+
body = JSON.dump({
|
46
|
+
contents: [{parts: {text: prompt}}],
|
47
|
+
generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
|
48
|
+
}.merge!(params))
|
49
|
+
req.body = body
|
50
|
+
res = request(http, req)
|
51
|
+
LLM::Response::Image.new(res).extend(response_parser)
|
52
|
+
end
|
53
|
+
|
54
|
+
##
|
55
|
+
# Edit an image
|
56
|
+
# @example
|
57
|
+
# llm = LLM.gemini(ENV["KEY"])
|
58
|
+
# res = llm.images.edit image: LLM::File("cat.png"), prompt: "Add a hat to the cat"
|
59
|
+
# File.binwrite "hatoncat.png", res.images[0].binary
|
60
|
+
# @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
|
61
|
+
# @param [LLM::File] image The image to edit
|
62
|
+
# @param [String] prompt The prompt
|
63
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
64
|
+
# @raise (see LLM::Provider#request)
|
65
|
+
# @note (see LLM::Gemini::Images#create)
|
66
|
+
# @return [LLM::Response::Image]
|
67
|
+
def edit(image:, prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
|
68
|
+
req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
|
69
|
+
body = JSON.dump({
|
70
|
+
contents: [{parts: [{text: prompt}, format_content(image)]}],
|
71
|
+
generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
|
72
|
+
}.merge!(params)).b
|
73
|
+
req.body_stream = StringIO.new(body)
|
74
|
+
res = request(http, req)
|
75
|
+
LLM::Response::Image.new(res).extend(response_parser)
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# @raise [NotImplementedError]
|
80
|
+
# This method is not implemented by Gemini
|
81
|
+
def create_variation
|
82
|
+
raise NotImplementedError
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def secret
|
88
|
+
@provider.instance_variable_get(:@secret)
|
89
|
+
end
|
90
|
+
|
91
|
+
def http
|
92
|
+
@provider.instance_variable_get(:@http)
|
93
|
+
end
|
94
|
+
|
95
|
+
[:response_parser, :headers, :request].each do |m|
|
96
|
+
define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -1,7 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
module ResponseParser
|
7
|
+
##
|
8
|
+
# @param [Hash] body
|
9
|
+
# The response body from the LLM provider
|
10
|
+
# @return [Hash]
|
5
11
|
def parse_embedding(body)
|
6
12
|
{
|
7
13
|
model: "text-embedding-004",
|
@@ -20,12 +26,32 @@ class LLM::Gemini
|
|
20
26
|
LLM::Message.new(
|
21
27
|
_1.dig("content", "role"),
|
22
28
|
_1.dig("content", "parts", 0, "text"),
|
23
|
-
{
|
29
|
+
{response: self}
|
24
30
|
)
|
25
31
|
end,
|
26
32
|
prompt_tokens: body.dig("usageMetadata", "promptTokenCount"),
|
27
33
|
completion_tokens: body.dig("usageMetadata", "candidatesTokenCount")
|
28
34
|
}
|
29
35
|
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# @param [Hash] body
|
39
|
+
# The response body from the LLM provider
|
40
|
+
# @return [Hash]
|
41
|
+
def parse_image(body)
|
42
|
+
{
|
43
|
+
urls: [],
|
44
|
+
images: body["candidates"].flat_map do |candidate|
|
45
|
+
candidate["content"]["parts"].filter_map do
|
46
|
+
next unless _1.dig("inlineData", "data")
|
47
|
+
OpenStruct.from_hash(
|
48
|
+
mime_type: _1["inlineData"]["mimeType"],
|
49
|
+
encoded: _1["inlineData"]["data"],
|
50
|
+
binary: _1["inlineData"]["data"].unpack1("m0")
|
51
|
+
)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
}
|
55
|
+
end
|
30
56
|
end
|
31
57
|
end
|
data/lib/llm/providers/gemini.rb
CHANGED
@@ -3,11 +3,37 @@
|
|
3
3
|
module LLM
|
4
4
|
##
|
5
5
|
# The Gemini class implements a provider for
|
6
|
-
# [Gemini](https://ai.google.dev/)
|
6
|
+
# [Gemini](https://ai.google.dev/).
|
7
|
+
#
|
8
|
+
# The Gemini provider can accept multiple inputs (text, images,
|
9
|
+
# audio, and video). The inputs can be provided inline via the
|
10
|
+
# prompt for files under 20MB or via the Gemini Files API for
|
11
|
+
# files that are over 20MB
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# #!/usr/bin/env ruby
|
15
|
+
# require "llm"
|
16
|
+
#
|
17
|
+
# llm = LLM.gemini(ENV["KEY"])
|
18
|
+
# bot = LLM::Chat.new(llm).lazy
|
19
|
+
# bot.chat LLM::File("/images/capybara.png")
|
20
|
+
# bot.chat "Describe the image"
|
21
|
+
# bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
22
|
+
# @example
|
23
|
+
# #!/usr/bin/env ruby
|
24
|
+
# require "llm"
|
25
|
+
#
|
26
|
+
# llm = LLM.gemini(ENV["KEY"])
|
27
|
+
# bot = LLM::Chat.new(llm).lazy
|
28
|
+
# bot.chat ["Describe the image", LLM::File("/images/capybara.png")]
|
29
|
+
# bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
7
30
|
class Gemini < Provider
|
8
31
|
require_relative "gemini/error_handler"
|
9
32
|
require_relative "gemini/response_parser"
|
10
33
|
require_relative "gemini/format"
|
34
|
+
require_relative "gemini/images"
|
35
|
+
require_relative "gemini/files"
|
36
|
+
require_relative "gemini/audio"
|
11
37
|
include Format
|
12
38
|
|
13
39
|
HOST = "generativelanguage.googleapis.com"
|
@@ -19,10 +45,14 @@ module LLM
|
|
19
45
|
end
|
20
46
|
|
21
47
|
##
|
48
|
+
# Provides an embedding
|
22
49
|
# @param input (see LLM::Provider#embed)
|
50
|
+
# @param model (see LLM::Provider#embed)
|
51
|
+
# @param params (see LLM::Provider#embed)
|
52
|
+
# @raise (see LLM::Provider#request)
|
23
53
|
# @return (see LLM::Provider#embed)
|
24
|
-
def embed(input, **params)
|
25
|
-
path = ["/v1beta/models
|
54
|
+
def embed(input, model: "text-embedding-004", **params)
|
55
|
+
path = ["/v1beta/models/#{model}", "embedContent?key=#{@secret}"].join(":")
|
26
56
|
req = Net::HTTP::Post.new(path, headers)
|
27
57
|
req.body = JSON.dump({content: {parts: [{text: input}]}})
|
28
58
|
res = request(@http, req)
|
@@ -30,13 +60,17 @@ module LLM
|
|
30
60
|
end
|
31
61
|
|
32
62
|
##
|
63
|
+
# Provides an interface to the chat completions API
|
33
64
|
# @see https://ai.google.dev/api/generate-content#v1beta.models.generateContent Gemini docs
|
34
65
|
# @param prompt (see LLM::Provider#complete)
|
35
66
|
# @param role (see LLM::Provider#complete)
|
67
|
+
# @param model (see LLM::Provider#complete)
|
68
|
+
# @param params (see LLM::Provider#complete)
|
69
|
+
# @example (see LLM::Provider#complete)
|
70
|
+
# @raise (see LLM::Provider#request)
|
36
71
|
# @return (see LLM::Provider#complete)
|
37
|
-
def complete(prompt, role = :user, **params)
|
38
|
-
|
39
|
-
path = ["/v1beta/models/#{params.delete(:model)}", "generateContent?key=#{@secret}"].join(":")
|
72
|
+
def complete(prompt, role = :user, model: "gemini-1.5-flash", **params)
|
73
|
+
path = ["/v1beta/models/#{model}", "generateContent?key=#{@secret}"].join(":")
|
40
74
|
req = Net::HTTP::Post.new(path, headers)
|
41
75
|
messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
|
42
76
|
req.body = JSON.dump({contents: format(messages)})
|
@@ -44,6 +78,28 @@ module LLM
|
|
44
78
|
Response::Completion.new(res).extend(response_parser)
|
45
79
|
end
|
46
80
|
|
81
|
+
##
|
82
|
+
# Provides an interface to Gemini's audio API
|
83
|
+
# @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
|
84
|
+
def audio
|
85
|
+
LLM::Gemini::Audio.new(self)
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# Provides an interface to Gemini's image generation API
|
90
|
+
# @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
|
91
|
+
# @return [see LLM::Gemini::Images]
|
92
|
+
def images
|
93
|
+
LLM::Gemini::Images.new(self)
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# Provides an interface to Gemini's file management API
|
98
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
99
|
+
def files
|
100
|
+
LLM::Gemini::Files.new(self)
|
101
|
+
end
|
102
|
+
|
47
103
|
##
|
48
104
|
# @return (see LLM::Provider#assistant_role)
|
49
105
|
def assistant_role
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Ollama
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
class ErrorHandler
|
5
7
|
##
|
6
8
|
# @return [Net::HTTPResponse]
|
@@ -25,7 +27,7 @@ class LLM::Ollama
|
|
25
27
|
when Net::HTTPTooManyRequests
|
26
28
|
raise LLM::Error::RateLimit.new { _1.response = res }, "Too many requests"
|
27
29
|
else
|
28
|
-
raise LLM::Error::
|
30
|
+
raise LLM::Error::ResponseError.new { _1.response = res }, "Unexpected response"
|
29
31
|
end
|
30
32
|
end
|
31
33
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Ollama
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
module Format
|
5
7
|
##
|
6
8
|
# @param [Array<LLM::Message>] messages
|
@@ -9,9 +11,11 @@ class LLM::Ollama
|
|
9
11
|
def format(messages)
|
10
12
|
messages.map do
|
11
13
|
if Hash === _1
|
12
|
-
{role: _1[:role]
|
14
|
+
{role: _1[:role]}
|
15
|
+
.merge!(_1)
|
16
|
+
.merge!(format_content(_1[:content]))
|
13
17
|
else
|
14
|
-
{role: _1.role
|
18
|
+
{role: _1.role}.merge! format_content(_1.content)
|
15
19
|
end
|
16
20
|
end
|
17
21
|
end
|
@@ -24,10 +28,14 @@ class LLM::Ollama
|
|
24
28
|
# @return [String, Hash]
|
25
29
|
# The formatted content
|
26
30
|
def format_content(content)
|
27
|
-
if
|
28
|
-
|
31
|
+
if LLM::File === content
|
32
|
+
if content.image?
|
33
|
+
{content: "This message has an image associated with it", images: [content.to_b64]}
|
34
|
+
else
|
35
|
+
raise TypeError, "'#{content.path}' was not recognized as an image file."
|
36
|
+
end
|
29
37
|
else
|
30
|
-
content
|
38
|
+
{content:}
|
31
39
|
end
|
32
40
|
end
|
33
41
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Ollama
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
module ResponseParser
|
5
7
|
##
|
6
8
|
# @param [Hash] body
|
@@ -22,7 +24,7 @@ class LLM::Ollama
|
|
22
24
|
def parse_completion(body)
|
23
25
|
{
|
24
26
|
model: body["model"],
|
25
|
-
choices: [LLM::Message.new(*body["message"].values_at("role", "content"), {
|
27
|
+
choices: [LLM::Message.new(*body["message"].values_at("role", "content"), {response: self})],
|
26
28
|
prompt_tokens: body.dig("prompt_eval_count"),
|
27
29
|
completion_tokens: body.dig("eval_count")
|
28
30
|
}
|