llm.rb 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +264 -110
- data/lib/llm/buffer.rb +83 -0
- data/lib/llm/chat.rb +131 -0
- data/lib/llm/file.rb +26 -40
- data/lib/llm/http_client.rb +10 -5
- data/lib/llm/message.rb +14 -8
- data/lib/llm/mime.rb +54 -0
- data/lib/llm/multipart.rb +98 -0
- data/lib/llm/provider.rb +96 -19
- data/lib/llm/providers/anthropic/error_handler.rb +2 -0
- data/lib/llm/providers/anthropic/format.rb +2 -0
- data/lib/llm/providers/anthropic/response_parser.rb +3 -1
- data/lib/llm/providers/anthropic.rb +14 -5
- data/lib/llm/providers/gemini/audio.rb +77 -0
- data/lib/llm/providers/gemini/error_handler.rb +2 -0
- data/lib/llm/providers/gemini/files.rb +160 -0
- data/lib/llm/providers/gemini/format.rb +12 -6
- data/lib/llm/providers/gemini/images.rb +99 -0
- data/lib/llm/providers/gemini/response_parser.rb +27 -1
- data/lib/llm/providers/gemini.rb +62 -6
- data/lib/llm/providers/ollama/error_handler.rb +2 -0
- data/lib/llm/providers/ollama/format.rb +13 -5
- data/lib/llm/providers/ollama/response_parser.rb +3 -1
- data/lib/llm/providers/ollama.rb +30 -7
- data/lib/llm/providers/openai/audio.rb +97 -0
- data/lib/llm/providers/openai/error_handler.rb +2 -0
- data/lib/llm/providers/openai/files.rb +148 -0
- data/lib/llm/providers/openai/format.rb +21 -8
- data/lib/llm/providers/openai/images.rb +109 -0
- data/lib/llm/providers/openai/response_parser.rb +58 -5
- data/lib/llm/providers/openai/responses.rb +78 -0
- data/lib/llm/providers/openai.rb +52 -6
- data/lib/llm/providers/voyageai.rb +2 -2
- data/lib/llm/response/audio.rb +13 -0
- data/lib/llm/response/audio_transcription.rb +14 -0
- data/lib/llm/response/audio_translation.rb +14 -0
- data/lib/llm/response/download_file.rb +15 -0
- data/lib/llm/response/file.rb +42 -0
- data/lib/llm/response/filelist.rb +18 -0
- data/lib/llm/response/image.rb +29 -0
- data/lib/llm/response/output.rb +56 -0
- data/lib/llm/response.rb +18 -6
- data/lib/llm/utils.rb +19 -0
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +5 -2
- data/llm.gemspec +1 -6
- data/spec/anthropic/completion_spec.rb +1 -1
- data/spec/gemini/completion_spec.rb +1 -1
- data/spec/gemini/conversation_spec.rb +31 -0
- data/spec/gemini/files_spec.rb +124 -0
- data/spec/gemini/images_spec.rb +47 -0
- data/spec/llm/conversation_spec.rb +101 -61
- data/spec/ollama/completion_spec.rb +1 -1
- data/spec/ollama/conversation_spec.rb +31 -0
- data/spec/openai/audio_spec.rb +55 -0
- data/spec/openai/completion_spec.rb +1 -1
- data/spec/openai/files_spec.rb +150 -0
- data/spec/openai/images_spec.rb +95 -0
- data/spec/openai/responses_spec.rb +51 -0
- data/spec/setup.rb +8 -0
- metadata +31 -49
- data/LICENSE.txt +0 -21
- data/lib/llm/conversation.rb +0 -90
- data/lib/llm/message_queue.rb +0 -54
@@ -0,0 +1,160 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# The {LLM::Gemini::Files LLM::Gemini::Files} class provides a files
|
6
|
+
# object for interacting with [Gemini's Files API](https://ai.google.dev/gemini-api/docs/files).
|
7
|
+
# The files API allows a client to reference media files in prompts
|
8
|
+
# where they can be referenced by their URL.
|
9
|
+
#
|
10
|
+
# The files API is intended to preserve bandwidth and latency,
|
11
|
+
# especially for large files but it can be helpful for smaller files
|
12
|
+
# as well because it does not require the client to include a file
|
13
|
+
# in the prompt over and over again (which could be the case in a
|
14
|
+
# multi-turn conversation).
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# #!/usr/bin/env ruby
|
18
|
+
# require "llm"
|
19
|
+
#
|
20
|
+
# llm = LLM.gemini(ENV["KEY"])
|
21
|
+
# file = llm.files.create file: LLM::File("/audio/haiku.mp3")
|
22
|
+
# bot = LLM::Chat.new(llm).lazy
|
23
|
+
# bot.chat(file)
|
24
|
+
# bot.chat("Describe the audio file I sent to you")
|
25
|
+
# bot.chat("The audio file is the first message I sent to you.")
|
26
|
+
# bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
27
|
+
# @example
|
28
|
+
# #!/usr/bin/env ruby
|
29
|
+
# require "llm"
|
30
|
+
#
|
31
|
+
# llm = LLM.gemini(ENV["KEY"])
|
32
|
+
# file = llm.files.create file: LLM::File("/audio/haiku.mp3")
|
33
|
+
# bot = LLM::Chat.new(llm).lazy
|
34
|
+
# bot.chat(["Describe the audio file I sent to you", file])
|
35
|
+
# bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
36
|
+
class Files
|
37
|
+
##
|
38
|
+
# Returns a new Files object
|
39
|
+
# @param provider [LLM::Provider]
|
40
|
+
# @return [LLM::Gemini::Files]
|
41
|
+
def initialize(provider)
|
42
|
+
@provider = provider
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# List all files
|
47
|
+
# @example
|
48
|
+
# llm = LLM.gemini(ENV["KEY"])
|
49
|
+
# res = llm.files.all
|
50
|
+
# res.each do |file|
|
51
|
+
# print "name: ", file.name, "\n"
|
52
|
+
# end
|
53
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
54
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
55
|
+
# @raise (see LLM::HTTPClient#request)
|
56
|
+
# @return [LLM::Response::FileList]
|
57
|
+
def all(**params)
|
58
|
+
query = URI.encode_www_form(params.merge!(key: secret))
|
59
|
+
req = Net::HTTP::Get.new("/v1beta/files?#{query}", headers)
|
60
|
+
res = request(http, req)
|
61
|
+
LLM::Response::FileList.new(res).tap { |filelist|
|
62
|
+
files = filelist.body["files"]&.map do |file|
|
63
|
+
file = file.transform_keys { snakecase(_1) }
|
64
|
+
OpenStruct.from_hash(file)
|
65
|
+
end || []
|
66
|
+
filelist.files = files
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Create a file
|
72
|
+
# @example
|
73
|
+
# llm = LLM.gemini(ENV["KEY"])
|
74
|
+
# res = llm.files.create file: LLM::File("/audio/haiku.mp3"),
|
75
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
76
|
+
# @param [File] file The file
|
77
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
78
|
+
# @raise (see LLM::HTTPClient#request)
|
79
|
+
# @return [LLM::Response::File]
|
80
|
+
def create(file:, **params)
|
81
|
+
req = Net::HTTP::Post.new(request_upload_url(file:), {})
|
82
|
+
req["content-length"] = file.bytesize
|
83
|
+
req["X-Goog-Upload-Offset"] = 0
|
84
|
+
req["X-Goog-Upload-Command"] = "upload, finalize"
|
85
|
+
req.body = File.binread(file.path)
|
86
|
+
res = request(http, req)
|
87
|
+
LLM::Response::File.new(res)
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Get a file
|
92
|
+
# @example
|
93
|
+
# llm = LLM.gemini(ENV["KEY"])
|
94
|
+
# res = llm.files.get(file: "files/1234567890")
|
95
|
+
# print "name: ", res.name, "\n"
|
96
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
97
|
+
# @param [#name, String] file The file to get
|
98
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
99
|
+
# @raise (see LLM::HTTPClient#request)
|
100
|
+
# @return [LLM::Response::File]
|
101
|
+
def get(file:, **params)
|
102
|
+
file_id = file.respond_to?(:name) ? file.name : file.to_s
|
103
|
+
query = URI.encode_www_form(params.merge!(key: secret))
|
104
|
+
req = Net::HTTP::Get.new("/v1beta/#{file_id}?#{query}", headers)
|
105
|
+
res = request(http, req)
|
106
|
+
LLM::Response::File.new(res)
|
107
|
+
end
|
108
|
+
|
109
|
+
##
|
110
|
+
# Delete a file
|
111
|
+
# @example
|
112
|
+
# llm = LLM.gemini(ENV["KEY"])
|
113
|
+
# res = llm.files.delete(file: "files/1234567890")
|
114
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
115
|
+
# @param [#name, String] file The file to delete
|
116
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
117
|
+
# @raise (see LLM::HTTPClient#request)
|
118
|
+
# @return [LLM::Response::File]
|
119
|
+
def delete(file:, **params)
|
120
|
+
file_id = file.respond_to?(:name) ? file.name : file.to_s
|
121
|
+
query = URI.encode_www_form(params.merge!(key: secret))
|
122
|
+
req = Net::HTTP::Delete.new("/v1beta/#{file_id}?#{query}", headers)
|
123
|
+
request(http, req)
|
124
|
+
end
|
125
|
+
|
126
|
+
##
|
127
|
+
# @raise [NotImplementedError]
|
128
|
+
# This method is not implemented by Gemini
|
129
|
+
def download
|
130
|
+
raise NotImplementedError
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
include LLM::Utils
|
136
|
+
|
137
|
+
def request_upload_url(file:)
|
138
|
+
req = Net::HTTP::Post.new("/upload/v1beta/files?key=#{secret}", headers)
|
139
|
+
req["X-Goog-Upload-Protocol"] = "resumable"
|
140
|
+
req["X-Goog-Upload-Command"] = "start"
|
141
|
+
req["X-Goog-Upload-Header-Content-Length"] = file.bytesize
|
142
|
+
req["X-Goog-Upload-Header-Content-Type"] = file.mime_type
|
143
|
+
req.body = JSON.dump(file: {display_name: File.basename(file.path)})
|
144
|
+
res = request(http, req)
|
145
|
+
res["x-goog-upload-url"]
|
146
|
+
end
|
147
|
+
|
148
|
+
def http
|
149
|
+
@provider.instance_variable_get(:@http)
|
150
|
+
end
|
151
|
+
|
152
|
+
def secret
|
153
|
+
@provider.instance_variable_get(:@secret)
|
154
|
+
end
|
155
|
+
|
156
|
+
[:headers, :request].each do |m|
|
157
|
+
define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
module Format
|
5
7
|
##
|
6
8
|
# @param [Array<LLM::Message>] messages
|
@@ -19,16 +21,20 @@ class LLM::Gemini
|
|
19
21
|
private
|
20
22
|
|
21
23
|
##
|
22
|
-
# @param [String, LLM::File] content
|
24
|
+
# @param [String, Array, LLM::Response::File, LLM::File] content
|
23
25
|
# The content to format
|
24
|
-
# @return [
|
26
|
+
# @return [Hash]
|
25
27
|
# The formatted content
|
26
28
|
def format_content(content)
|
27
|
-
|
29
|
+
case content
|
30
|
+
when Array
|
31
|
+
content.map { format_content(_1) }
|
32
|
+
when LLM::Response::File
|
28
33
|
file = content
|
29
|
-
{
|
30
|
-
|
31
|
-
|
34
|
+
{file_data: {mime_type: file.mime_type, file_uri: file.uri}}
|
35
|
+
when LLM::File
|
36
|
+
file = content
|
37
|
+
{inline_data: {mime_type: file.mime_type, data: file.to_b64}}
|
32
38
|
else
|
33
39
|
{text: content}
|
34
40
|
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# The {LLM::Gemini::Images LLM::Gemini::Images} class provides an images
|
6
|
+
# object for interacting with [Gemini's images API](https://ai.google.dev/gemini-api/docs/image-generation).
|
7
|
+
# Please note that unlike OpenAI, which can return either URLs or base64-encoded strings,
|
8
|
+
# Gemini's images API will always return an image as a base64 encoded string that
|
9
|
+
# can be decoded into binary.
|
10
|
+
# @example
|
11
|
+
# #!/usr/bin/env ruby
|
12
|
+
# require "llm"
|
13
|
+
#
|
14
|
+
# llm = LLM.gemini(ENV["KEY"])
|
15
|
+
# res = llm.images.create prompt: "A dog on a rocket to the moon"
|
16
|
+
# File.binwrite "rocket.png", res.images[0].binary
|
17
|
+
class Images
|
18
|
+
include Format
|
19
|
+
|
20
|
+
##
|
21
|
+
# Returns a new Images object
|
22
|
+
# @param provider [LLM::Provider]
|
23
|
+
# @return [LLM::Gemini::Responses]
|
24
|
+
def initialize(provider)
|
25
|
+
@provider = provider
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# Create an image
|
30
|
+
# @example
|
31
|
+
# llm = LLM.gemini(ENV["KEY"])
|
32
|
+
# res = llm.images.create prompt: "A dog on a rocket to the moon"
|
33
|
+
# File.binwrite "rocket.png", res.images[0].binary
|
34
|
+
# @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
|
35
|
+
# @param [String] prompt The prompt
|
36
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
37
|
+
# @raise (see LLM::HTTPClient#request)
|
38
|
+
# @note
|
39
|
+
# The prompt should make it clear you want to generate an image, or you
|
40
|
+
# might unexpectedly receive a purely textual response. This is due to how
|
41
|
+
# Gemini implements image generation under the hood.
|
42
|
+
# @return [LLM::Response::Image]
|
43
|
+
def create(prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
|
44
|
+
req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
|
45
|
+
req.body = JSON.dump({
|
46
|
+
contents: [{parts: {text: prompt}}],
|
47
|
+
generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
|
48
|
+
}.merge!(params))
|
49
|
+
res = request(http, req)
|
50
|
+
LLM::Response::Image.new(res).extend(response_parser)
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Edit an image
|
55
|
+
# @example
|
56
|
+
# llm = LLM.gemini(ENV["KEY"])
|
57
|
+
# res = llm.images.edit image: LLM::File("cat.png"), prompt: "Add a hat to the cat"
|
58
|
+
# File.binwrite "hatoncat.png", res.images[0].binary
|
59
|
+
# @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
|
60
|
+
# @param [LLM::File] image The image to edit
|
61
|
+
# @param [String] prompt The prompt
|
62
|
+
# @param [Hash] params Other parameters (see Gemini docs)
|
63
|
+
# @raise (see LLM::HTTPClient#request)
|
64
|
+
# @note (see LLM::Gemini::Images#create)
|
65
|
+
# @return [LLM::Response::Image]
|
66
|
+
def edit(image:, prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
|
67
|
+
req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
|
68
|
+
req.body = JSON.dump({
|
69
|
+
contents: [
|
70
|
+
{parts: [{text: prompt}, format_content(image)]}
|
71
|
+
],
|
72
|
+
generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
|
73
|
+
}.merge!(params))
|
74
|
+
res = request(http, req)
|
75
|
+
LLM::Response::Image.new(res).extend(response_parser)
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# @raise [NotImplementedError]
|
80
|
+
# This method is not implemented by Gemini
|
81
|
+
def create_variation
|
82
|
+
raise NotImplementedError
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def secret
|
88
|
+
@provider.instance_variable_get(:@secret)
|
89
|
+
end
|
90
|
+
|
91
|
+
def http
|
92
|
+
@provider.instance_variable_get(:@http)
|
93
|
+
end
|
94
|
+
|
95
|
+
[:response_parser, :headers, :request].each do |m|
|
96
|
+
define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -1,7 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Gemini
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
module ResponseParser
|
7
|
+
##
|
8
|
+
# @param [Hash] body
|
9
|
+
# The response body from the LLM provider
|
10
|
+
# @return [Hash]
|
5
11
|
def parse_embedding(body)
|
6
12
|
{
|
7
13
|
model: "text-embedding-004",
|
@@ -20,12 +26,32 @@ class LLM::Gemini
|
|
20
26
|
LLM::Message.new(
|
21
27
|
_1.dig("content", "role"),
|
22
28
|
_1.dig("content", "parts", 0, "text"),
|
23
|
-
{
|
29
|
+
{response: self}
|
24
30
|
)
|
25
31
|
end,
|
26
32
|
prompt_tokens: body.dig("usageMetadata", "promptTokenCount"),
|
27
33
|
completion_tokens: body.dig("usageMetadata", "candidatesTokenCount")
|
28
34
|
}
|
29
35
|
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# @param [Hash] body
|
39
|
+
# The response body from the LLM provider
|
40
|
+
# @return [Hash]
|
41
|
+
def parse_image(body)
|
42
|
+
{
|
43
|
+
urls: [],
|
44
|
+
images: body["candidates"].flat_map do |candidate|
|
45
|
+
candidate["content"]["parts"].filter_map do
|
46
|
+
next unless _1.dig("inlineData", "data")
|
47
|
+
OpenStruct.from_hash(
|
48
|
+
mime_type: _1["inlineData"]["mimeType"],
|
49
|
+
encoded: _1["inlineData"]["data"],
|
50
|
+
binary: _1["inlineData"]["data"].unpack1("m0")
|
51
|
+
)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
}
|
55
|
+
end
|
30
56
|
end
|
31
57
|
end
|
data/lib/llm/providers/gemini.rb
CHANGED
@@ -3,11 +3,37 @@
|
|
3
3
|
module LLM
|
4
4
|
##
|
5
5
|
# The Gemini class implements a provider for
|
6
|
-
# [Gemini](https://ai.google.dev/)
|
6
|
+
# [Gemini](https://ai.google.dev/).
|
7
|
+
#
|
8
|
+
# The Gemini provider can accept multiple inputs (text, images,
|
9
|
+
# audio, and video). The inputs can be provided inline via the
|
10
|
+
# prompt for files under 20MB or via the Gemini Files API for
|
11
|
+
# files that are over 20MB
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# #!/usr/bin/env ruby
|
15
|
+
# require "llm"
|
16
|
+
#
|
17
|
+
# llm = LLM.gemini(ENV["KEY"])
|
18
|
+
# bot = LLM::Chat.new(llm).lazy
|
19
|
+
# bot.chat LLM::File("/images/capybara.png")
|
20
|
+
# bot.chat "Describe the image"
|
21
|
+
# bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
22
|
+
# @example
|
23
|
+
# #!/usr/bin/env ruby
|
24
|
+
# require "llm"
|
25
|
+
#
|
26
|
+
# llm = LLM.gemini(ENV["KEY"])
|
27
|
+
# bot = LLM::Chat.new(llm).lazy
|
28
|
+
# bot.chat ["Describe the image", LLM::File("/images/capybara.png")]
|
29
|
+
# bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
7
30
|
class Gemini < Provider
|
8
31
|
require_relative "gemini/error_handler"
|
9
32
|
require_relative "gemini/response_parser"
|
10
33
|
require_relative "gemini/format"
|
34
|
+
require_relative "gemini/images"
|
35
|
+
require_relative "gemini/files"
|
36
|
+
require_relative "gemini/audio"
|
11
37
|
include Format
|
12
38
|
|
13
39
|
HOST = "generativelanguage.googleapis.com"
|
@@ -19,10 +45,14 @@ module LLM
|
|
19
45
|
end
|
20
46
|
|
21
47
|
##
|
48
|
+
# Provides an embedding
|
22
49
|
# @param input (see LLM::Provider#embed)
|
50
|
+
# @param model (see LLM::Provider#embed)
|
51
|
+
# @param params (see LLM::Provider#embed)
|
52
|
+
# @raise (see LLM::HTTPClient#request)
|
23
53
|
# @return (see LLM::Provider#embed)
|
24
|
-
def embed(input, **params)
|
25
|
-
path = ["/v1beta/models
|
54
|
+
def embed(input, model: "text-embedding-004", **params)
|
55
|
+
path = ["/v1beta/models/#{model}", "embedContent?key=#{@secret}"].join(":")
|
26
56
|
req = Net::HTTP::Post.new(path, headers)
|
27
57
|
req.body = JSON.dump({content: {parts: [{text: input}]}})
|
28
58
|
res = request(@http, req)
|
@@ -30,13 +60,17 @@ module LLM
|
|
30
60
|
end
|
31
61
|
|
32
62
|
##
|
63
|
+
# Provides an interface to the chat completions API
|
33
64
|
# @see https://ai.google.dev/api/generate-content#v1beta.models.generateContent Gemini docs
|
34
65
|
# @param prompt (see LLM::Provider#complete)
|
35
66
|
# @param role (see LLM::Provider#complete)
|
67
|
+
# @param model (see LLM::Provider#complete)
|
68
|
+
# @param params (see LLM::Provider#complete)
|
69
|
+
# @example (see LLM::Provider#complete)
|
70
|
+
# @raise (see LLM::HTTPClient#request)
|
36
71
|
# @return (see LLM::Provider#complete)
|
37
|
-
def complete(prompt, role = :user, **params)
|
38
|
-
|
39
|
-
path = ["/v1beta/models/#{params.delete(:model)}", "generateContent?key=#{@secret}"].join(":")
|
72
|
+
def complete(prompt, role = :user, model: "gemini-1.5-flash", **params)
|
73
|
+
path = ["/v1beta/models/#{model}", "generateContent?key=#{@secret}"].join(":")
|
40
74
|
req = Net::HTTP::Post.new(path, headers)
|
41
75
|
messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
|
42
76
|
req.body = JSON.dump({contents: format(messages)})
|
@@ -44,6 +78,28 @@ module LLM
|
|
44
78
|
Response::Completion.new(res).extend(response_parser)
|
45
79
|
end
|
46
80
|
|
81
|
+
##
|
82
|
+
# Provides an interface to Gemini's audio API
|
83
|
+
# @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
|
84
|
+
def audio
|
85
|
+
LLM::Gemini::Audio.new(self)
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# Provides an interface to Gemini's image generation API
|
90
|
+
# @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
|
91
|
+
# @return [see LLM::Gemini::Images]
|
92
|
+
def images
|
93
|
+
LLM::Gemini::Images.new(self)
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# Provides an interface to Gemini's file management API
|
98
|
+
# @see https://ai.google.dev/gemini-api/docs/files Gemini docs
|
99
|
+
def files
|
100
|
+
LLM::Gemini::Files.new(self)
|
101
|
+
end
|
102
|
+
|
47
103
|
##
|
48
104
|
# @return (see LLM::Provider#assistant_role)
|
49
105
|
def assistant_role
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Ollama
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
module Format
|
5
7
|
##
|
6
8
|
# @param [Array<LLM::Message>] messages
|
@@ -9,9 +11,11 @@ class LLM::Ollama
|
|
9
11
|
def format(messages)
|
10
12
|
messages.map do
|
11
13
|
if Hash === _1
|
12
|
-
{role: _1[:role]
|
14
|
+
{role: _1[:role]}
|
15
|
+
.merge!(_1)
|
16
|
+
.merge!(format_content(_1[:content]))
|
13
17
|
else
|
14
|
-
{role: _1.role
|
18
|
+
{role: _1.role}.merge! format_content(_1.content)
|
15
19
|
end
|
16
20
|
end
|
17
21
|
end
|
@@ -24,10 +28,14 @@ class LLM::Ollama
|
|
24
28
|
# @return [String, Hash]
|
25
29
|
# The formatted content
|
26
30
|
def format_content(content)
|
27
|
-
if
|
28
|
-
|
31
|
+
if LLM::File === content
|
32
|
+
if content.image?
|
33
|
+
{content: "This message has an image associated with it", images: [content.to_b64]}
|
34
|
+
else
|
35
|
+
raise TypeError, "'#{content.path}' was not recognized as an image file."
|
36
|
+
end
|
29
37
|
else
|
30
|
-
content
|
38
|
+
{content:}
|
31
39
|
end
|
32
40
|
end
|
33
41
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class LLM::Ollama
|
4
|
+
##
|
5
|
+
# @private
|
4
6
|
module ResponseParser
|
5
7
|
##
|
6
8
|
# @param [Hash] body
|
@@ -22,7 +24,7 @@ class LLM::Ollama
|
|
22
24
|
def parse_completion(body)
|
23
25
|
{
|
24
26
|
model: body["model"],
|
25
|
-
choices: [LLM::Message.new(*body["message"].values_at("role", "content"), {
|
27
|
+
choices: [LLM::Message.new(*body["message"].values_at("role", "content"), {response: self})],
|
26
28
|
prompt_tokens: body.dig("prompt_eval_count"),
|
27
29
|
completion_tokens: body.dig("eval_count")
|
28
30
|
}
|
data/lib/llm/providers/ollama.rb
CHANGED
@@ -2,8 +2,22 @@
|
|
2
2
|
|
3
3
|
module LLM
|
4
4
|
##
|
5
|
-
# The Ollama class implements a provider for
|
6
|
-
#
|
5
|
+
# The Ollama class implements a provider for [Ollama](https://ollama.ai/).
|
6
|
+
#
|
7
|
+
# This provider supports a wide range of models, it is relatively
|
8
|
+
# straight forward to run on your own hardware, and includes multi-modal
|
9
|
+
# models that can process images and text. See the example for a demonstration
|
10
|
+
# of a multi-modal model by the name `llava`
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# #!/usr/bin/env ruby
|
14
|
+
# require "llm"
|
15
|
+
#
|
16
|
+
# llm = LLM.ollama(nil)
|
17
|
+
# bot = LLM::Chat.new(llm, model: "llava").lazy
|
18
|
+
# bot.chat LLM::File("/images/capybara.png")
|
19
|
+
# bot.chat "Describe the image"
|
20
|
+
# bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
7
21
|
class Ollama < Provider
|
8
22
|
require_relative "ollama/error_handler"
|
9
23
|
require_relative "ollama/response_parser"
|
@@ -19,10 +33,14 @@ module LLM
|
|
19
33
|
end
|
20
34
|
|
21
35
|
##
|
36
|
+
# Provides an embedding
|
22
37
|
# @param input (see LLM::Provider#embed)
|
38
|
+
# @param model (see LLM::Provider#embed)
|
39
|
+
# @param params (see LLM::Provider#embed)
|
40
|
+
# @raise (see LLM::HTTPClient#request)
|
23
41
|
# @return (see LLM::Provider#embed)
|
24
|
-
def embed(input, **params)
|
25
|
-
params = {model:
|
42
|
+
def embed(input, model: "llama3.2", **params)
|
43
|
+
params = {model:}.merge!(params)
|
26
44
|
req = Net::HTTP::Post.new("/v1/embeddings", headers)
|
27
45
|
req.body = JSON.dump({input:}.merge!(params))
|
28
46
|
res = request(@http, req)
|
@@ -30,15 +48,20 @@ module LLM
|
|
30
48
|
end
|
31
49
|
|
32
50
|
##
|
51
|
+
# Provides an interface to the chat completions API
|
33
52
|
# @see https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion Ollama docs
|
34
53
|
# @param prompt (see LLM::Provider#complete)
|
35
54
|
# @param role (see LLM::Provider#complete)
|
55
|
+
# @param model (see LLM::Provider#complete)
|
56
|
+
# @param params (see LLM::Provider#complete)
|
57
|
+
# @example (see LLM::Provider#complete)
|
58
|
+
# @raise (see LLM::HTTPClient#request)
|
36
59
|
# @return (see LLM::Provider#complete)
|
37
|
-
def complete(prompt, role = :user, **params)
|
38
|
-
params = {model
|
60
|
+
def complete(prompt, role = :user, model: "llama3.2", **params)
|
61
|
+
params = {model:, stream: false}.merge!(params)
|
39
62
|
req = Net::HTTP::Post.new("/api/chat", headers)
|
40
63
|
messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
|
41
|
-
req.body = JSON.dump({messages: messages
|
64
|
+
req.body = JSON.dump({messages: format(messages)}.merge!(params))
|
42
65
|
res = request(@http, req)
|
43
66
|
Response::Completion.new(res).extend(response_parser)
|
44
67
|
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM::OpenAI
|
4
|
+
##
|
5
|
+
# The {LLM::OpenAI::Audio LLM::OpenAI::Audio} class provides an audio
|
6
|
+
# object for interacting with [OpenAI's audio API](https://platform.openai.com/docs/api-reference/audio/createSpeech).
|
7
|
+
# @example
|
8
|
+
# llm = LLM.openai(ENV["KEY"])
|
9
|
+
# res = llm.audio.create_speech(input: "A dog on a rocket to the moon")
|
10
|
+
# File.binwrite("rocket.mp3", res.audio.string)
|
11
|
+
class Audio
|
12
|
+
require "stringio"
|
13
|
+
|
14
|
+
##
|
15
|
+
# Returns a new Audio object
|
16
|
+
# @param provider [LLM::Provider]
|
17
|
+
# @return [LLM::OpenAI::Responses]
|
18
|
+
def initialize(provider)
|
19
|
+
@provider = provider
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
# Create an audio track
|
24
|
+
# @example
|
25
|
+
# llm = LLM.openai(ENV["KEY"])
|
26
|
+
# res = llm.images.create_speech(input: "A dog on a rocket to the moon")
|
27
|
+
# File.binwrite("rocket.mp3", res.audio.string)
|
28
|
+
# @see https://platform.openai.com/docs/api-reference/audio/createSpeech OpenAI docs
|
29
|
+
# @param [String] input The text input
|
30
|
+
# @param [String] voice The voice to use
|
31
|
+
# @param [String] model The model to use
|
32
|
+
# @param [String] response_format The response format
|
33
|
+
# @param [Hash] params Other parameters (see OpenAI docs)
|
34
|
+
# @raise (see LLM::HTTPClient#request)
|
35
|
+
# @return [LLM::Response::Audio]
|
36
|
+
def create_speech(input:, voice: "alloy", model: "gpt-4o-mini-tts", response_format: "mp3", **params)
|
37
|
+
req = Net::HTTP::Post.new("/v1/audio/speech", headers)
|
38
|
+
req.body = JSON.dump({input:, voice:, model:, response_format:}.merge!(params))
|
39
|
+
io = StringIO.new("".b)
|
40
|
+
res = request(http, req) { _1.read_body { |chunk| io << chunk } }
|
41
|
+
LLM::Response::Audio.new(res).tap { _1.audio = io }
|
42
|
+
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# Create an audio transcription
|
46
|
+
# @example
|
47
|
+
# llm = LLM.openai(ENV["KEY"])
|
48
|
+
# res = llm.audio.create_transcription(file: LLM::File("/rocket.mp3"))
|
49
|
+
# res.text # => "A dog on a rocket to the moon"
|
50
|
+
# @see https://platform.openai.com/docs/api-reference/audio/createTranscription OpenAI docs
|
51
|
+
# @param [LLM::File] file The input audio
|
52
|
+
# @param [String] model The model to use
|
53
|
+
# @param [Hash] params Other parameters (see OpenAI docs)
|
54
|
+
# @raise (see LLM::HTTPClient#request)
|
55
|
+
# @return [LLM::Response::AudioTranscription]
|
56
|
+
def create_transcription(file:, model: "whisper-1", **params)
|
57
|
+
multi = LLM::Multipart.new(params.merge!(file:, model:))
|
58
|
+
req = Net::HTTP::Post.new("/v1/audio/transcriptions", headers)
|
59
|
+
req["content-type"] = multi.content_type
|
60
|
+
req.body = multi.body
|
61
|
+
res = request(http, req)
|
62
|
+
LLM::Response::AudioTranscription.new(res).tap { _1.text = _1.body["text"] }
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Create an audio translation (in English)
|
67
|
+
# @example
|
68
|
+
# # Arabic => English
|
69
|
+
# llm = LLM.openai(ENV["KEY"])
|
70
|
+
# res = llm.audio.create_translation(file: LLM::File("/bismillah.mp3"))
|
71
|
+
# res.text # => "In the name of Allah, the Beneficent, the Merciful."
|
72
|
+
# @see https://platform.openai.com/docs/api-reference/audio/createTranslation OpenAI docs
|
73
|
+
# @param [LLM::File] file The input audio
|
74
|
+
# @param [String] model The model to use
|
75
|
+
# @param [Hash] params Other parameters (see OpenAI docs)
|
76
|
+
# @raise (see LLM::HTTPClient#request)
|
77
|
+
# @return [LLM::Response::AudioTranslation]
|
78
|
+
def create_translation(file:, model: "whisper-1", **params)
|
79
|
+
multi = LLM::Multipart.new(params.merge!(file:, model:))
|
80
|
+
req = Net::HTTP::Post.new("/v1/audio/translations", headers)
|
81
|
+
req["content-type"] = multi.content_type
|
82
|
+
req.body = multi.body
|
83
|
+
res = request(http, req)
|
84
|
+
LLM::Response::AudioTranslation.new(res).tap { _1.text = _1.body["text"] }
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def http
|
90
|
+
@provider.instance_variable_get(:@http)
|
91
|
+
end
|
92
|
+
|
93
|
+
[:headers, :request].each do |m|
|
94
|
+
define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|