llm.rb 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +264 -110
  3. data/lib/llm/buffer.rb +83 -0
  4. data/lib/llm/chat.rb +131 -0
  5. data/lib/llm/file.rb +26 -40
  6. data/lib/llm/http_client.rb +10 -5
  7. data/lib/llm/message.rb +14 -8
  8. data/lib/llm/mime.rb +54 -0
  9. data/lib/llm/multipart.rb +98 -0
  10. data/lib/llm/provider.rb +96 -19
  11. data/lib/llm/providers/anthropic/error_handler.rb +2 -0
  12. data/lib/llm/providers/anthropic/format.rb +2 -0
  13. data/lib/llm/providers/anthropic/response_parser.rb +3 -1
  14. data/lib/llm/providers/anthropic.rb +14 -5
  15. data/lib/llm/providers/gemini/audio.rb +77 -0
  16. data/lib/llm/providers/gemini/error_handler.rb +2 -0
  17. data/lib/llm/providers/gemini/files.rb +160 -0
  18. data/lib/llm/providers/gemini/format.rb +12 -6
  19. data/lib/llm/providers/gemini/images.rb +99 -0
  20. data/lib/llm/providers/gemini/response_parser.rb +27 -1
  21. data/lib/llm/providers/gemini.rb +62 -6
  22. data/lib/llm/providers/ollama/error_handler.rb +2 -0
  23. data/lib/llm/providers/ollama/format.rb +13 -5
  24. data/lib/llm/providers/ollama/response_parser.rb +3 -1
  25. data/lib/llm/providers/ollama.rb +30 -7
  26. data/lib/llm/providers/openai/audio.rb +97 -0
  27. data/lib/llm/providers/openai/error_handler.rb +2 -0
  28. data/lib/llm/providers/openai/files.rb +148 -0
  29. data/lib/llm/providers/openai/format.rb +21 -8
  30. data/lib/llm/providers/openai/images.rb +109 -0
  31. data/lib/llm/providers/openai/response_parser.rb +58 -5
  32. data/lib/llm/providers/openai/responses.rb +78 -0
  33. data/lib/llm/providers/openai.rb +52 -6
  34. data/lib/llm/providers/voyageai.rb +2 -2
  35. data/lib/llm/response/audio.rb +13 -0
  36. data/lib/llm/response/audio_transcription.rb +14 -0
  37. data/lib/llm/response/audio_translation.rb +14 -0
  38. data/lib/llm/response/download_file.rb +15 -0
  39. data/lib/llm/response/file.rb +42 -0
  40. data/lib/llm/response/filelist.rb +18 -0
  41. data/lib/llm/response/image.rb +29 -0
  42. data/lib/llm/response/output.rb +56 -0
  43. data/lib/llm/response.rb +18 -6
  44. data/lib/llm/utils.rb +19 -0
  45. data/lib/llm/version.rb +1 -1
  46. data/lib/llm.rb +5 -2
  47. data/llm.gemspec +1 -6
  48. data/spec/anthropic/completion_spec.rb +1 -1
  49. data/spec/gemini/completion_spec.rb +1 -1
  50. data/spec/gemini/conversation_spec.rb +31 -0
  51. data/spec/gemini/files_spec.rb +124 -0
  52. data/spec/gemini/images_spec.rb +47 -0
  53. data/spec/llm/conversation_spec.rb +101 -61
  54. data/spec/ollama/completion_spec.rb +1 -1
  55. data/spec/ollama/conversation_spec.rb +31 -0
  56. data/spec/openai/audio_spec.rb +55 -0
  57. data/spec/openai/completion_spec.rb +1 -1
  58. data/spec/openai/files_spec.rb +150 -0
  59. data/spec/openai/images_spec.rb +95 -0
  60. data/spec/openai/responses_spec.rb +51 -0
  61. data/spec/setup.rb +8 -0
  62. metadata +31 -49
  63. data/LICENSE.txt +0 -21
  64. data/lib/llm/conversation.rb +0 -90
  65. data/lib/llm/message_queue.rb +0 -54
data/lib/llm/file.rb CHANGED
@@ -1,45 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ ##
4
+ # The {LLM::File LLM::File} class represents a local file. It can
5
+ # be used as a prompt with certain providers (eg: Ollama, Gemini),
6
+ # and as an input with certain methods
3
7
  class LLM::File
4
- ##
5
- # @return [Hash]
6
- # Returns a hash of common file extensions and their
7
- # corresponding MIME types
8
- def self.mime_types
9
- @mime_types ||= {
10
- # Images
11
- ".png" => "image/png",
12
- ".jpg" => "image/jpeg",
13
- ".jpeg" => "image/jpeg",
14
- ".webp" => "image/webp",
15
-
16
- # Videos
17
- ".flv" => "video/x-flv",
18
- ".mov" => "video/quicktime",
19
- ".mpeg" => "video/mpeg",
20
- ".mpg" => "video/mpeg",
21
- ".mp4" => "video/mp4",
22
- ".webm" => "video/webm",
23
- ".wmv" => "video/x-ms-wmv",
24
- ".3gp" => "video/3gpp",
25
-
26
- # Audio
27
- ".aac" => "audio/aac",
28
- ".flac" => "audio/flac",
29
- ".mp3" => "audio/mpeg",
30
- ".m4a" => "audio/mp4",
31
- ".mpga" => "audio/mpeg",
32
- ".opus" => "audio/opus",
33
- ".pcm" => "audio/L16",
34
- ".wav" => "audio/wav",
35
- ".weba" => "audio/webm",
36
-
37
- # Documents
38
- ".pdf" => "application/pdf",
39
- ".txt" => "text/plain"
40
- }.freeze
41
- end
42
-
43
8
  ##
44
9
  # @return [String]
45
10
  # Returns the path to a file
@@ -53,7 +18,28 @@ class LLM::File
53
18
  # @return [String]
54
19
  # Returns the MIME type of the file
55
20
  def mime_type
56
- self.class.mime_types[File.extname(path)]
21
+ LLM::Mime[File.extname(path)]
22
+ end
23
+
24
+ ##
25
+ # @return [String]
26
+ # Returns true if the file is an image
27
+ def image?
28
+ mime_type.start_with?("image/")
29
+ end
30
+
31
+ ##
32
+ # @return [Integer]
33
+ # Returns the size of the file in bytes
34
+ def bytesize
35
+ File.size(path)
36
+ end
37
+
38
+ ##
39
+ # @return [String]
40
+ # Returns the file contents in base64
41
+ def to_b64
42
+ [File.binread(path)].pack("m0")
57
43
  end
58
44
  end
59
45
 
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LLM
4
+ ##
5
+ # @private
4
6
  module HTTPClient
5
7
  require "net/http"
6
8
  ##
@@ -9,6 +11,8 @@ module LLM
9
11
  # The HTTP object to use for the request
10
12
  # @param [Net::HTTPRequest] req
11
13
  # The request to send
14
+ # @param [Proc] b
15
+ # A block to yield the response to (optional)
12
16
  # @return [Net::HTTPResponse]
13
17
  # The response from the server
14
18
  # @raise [LLM::Error::Unauthorized]
@@ -19,11 +23,12 @@ module LLM
19
23
  # When any other unsuccessful status code is returned
20
24
  # @raise [SystemCallError]
21
25
  # When there is a network error at the operating system level
22
- def request(http, req)
23
- res = http.request(req)
24
- res.tap(&:value)
25
- rescue Net::HTTPClientException
26
- error_handler.new(res).raise_error!
26
+ def request(http, req, &b)
27
+ res = http.request(req, &b)
28
+ case res
29
+ when Net::HTTPOK then res
30
+ else error_handler.new(res).raise_error!
31
+ end
27
32
  end
28
33
  end
29
34
  end
data/lib/llm/message.rb CHANGED
@@ -3,18 +3,22 @@
3
3
  module LLM
4
4
  class Message
5
5
  ##
6
+ # Returns the role of the message
6
7
  # @return [Symbol]
7
8
  attr_reader :role
8
9
 
9
10
  ##
11
+ # Returns the content of the message
10
12
  # @return [String]
11
13
  attr_reader :content
12
14
 
13
15
  ##
16
+ # Returns extra context associated with the message
14
17
  # @return [Hash]
15
18
  attr_reader :extra
16
19
 
17
20
  ##
21
+ # Returns a new message
18
22
  # @param [Symbol] role
19
23
  # @param [String] content
20
24
  # @param [Hash] extra
@@ -26,23 +30,17 @@ module LLM
26
30
  end
27
31
 
28
32
  ##
29
- # @return [OpenStruct]
30
- def logprobs
31
- return nil unless extra.key?(:logprobs)
32
- OpenStruct.from_hash(extra[:logprobs])
33
- end
34
-
35
- ##
33
+ # Returns a hash representation of the message
36
34
  # @return [Hash]
37
35
  def to_h
38
36
  {role:, content:}
39
37
  end
40
38
 
41
39
  ##
40
+ # Returns true when two objects have the same role and content
42
41
  # @param [Object] other
43
42
  # The other object to compare
44
43
  # @return [Boolean]
45
- # Returns true when the "other" object has the same role and content
46
44
  def ==(other)
47
45
  if other.respond_to?(:to_h)
48
46
  to_h == other.to_h
@@ -51,5 +49,13 @@ module LLM
51
49
  end
52
50
  end
53
51
  alias_method :eql?, :==
52
+
53
+ ##
54
+ # Returns a string representation of the message
55
+ # @return [String]
56
+ def inspect
57
+ "#<#{self.class.name}:0x#{object_id.to_s(16)} " \
58
+ "role=#{role.inspect} content=#{content.inspect}>"
59
+ end
54
60
  end
55
61
  end
data/lib/llm/mime.rb ADDED
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # @private
5
+ class LLM::Mime
6
+ ##
7
+ # Lookup a mime type
8
+ # @return [String, nil]
9
+ def self.[](key)
10
+ if key.respond_to?(:path)
11
+ types[File.extname(key.path)]
12
+ else
13
+ types[key]
14
+ end
15
+ end
16
+
17
+ ##
18
+ # Returns a Hash of mime types
19
+ # @return [Hash]
20
+ def self.types
21
+ @types ||= {
22
+ # Images
23
+ ".png" => "image/png",
24
+ ".jpg" => "image/jpeg",
25
+ ".jpeg" => "image/jpeg",
26
+ ".webp" => "image/webp",
27
+
28
+ # Videos
29
+ ".flv" => "video/x-flv",
30
+ ".mov" => "video/quicktime",
31
+ ".mpeg" => "video/mpeg",
32
+ ".mpg" => "video/mpeg",
33
+ ".mp4" => "video/mp4",
34
+ ".webm" => "video/webm",
35
+ ".wmv" => "video/x-ms-wmv",
36
+ ".3gp" => "video/3gpp",
37
+
38
+ # Audio
39
+ ".aac" => "audio/aac",
40
+ ".flac" => "audio/flac",
41
+ ".mp3" => "audio/mpeg",
42
+ ".m4a" => "audio/mp4",
43
+ ".mpga" => "audio/mpeg",
44
+ ".opus" => "audio/opus",
45
+ ".pcm" => "audio/L16",
46
+ ".wav" => "audio/wav",
47
+ ".weba" => "audio/webm",
48
+
49
+ # Documents
50
+ ".pdf" => "application/pdf",
51
+ ".txt" => "text/plain"
52
+ }
53
+ end
54
+ end
@@ -0,0 +1,98 @@
1
+ # encoding: ascii-8bit
2
+ # frozen_string_literal: true
3
+
4
+ ##
5
+ # @private
6
+ class LLM::Multipart
7
+ require "llm"
8
+ require "securerandom"
9
+
10
+ ##
11
+ # @return [String]
12
+ attr_reader :boundary
13
+
14
+ ##
15
+ # @param [Hash] params
16
+ # Request parameters
17
+ # @return [LLM::Multipart]
18
+ def initialize(params)
19
+ @boundary = "BOUNDARY__#{SecureRandom.hex(16)}"
20
+ @params = params
21
+ end
22
+
23
+ ##
24
+ # Returns the multipart content type
25
+ # @return [String]
26
+ def content_type
27
+ "multipart/form-data; boundary=#{@boundary}"
28
+ end
29
+
30
+ ##
31
+ # Returns the multipart request body parts
32
+ # @return [Array<String>]
33
+ def parts
34
+ params.map do |key, value|
35
+ locals = {key: key.to_s.b, boundary: boundary.to_s.b}
36
+ if value.respond_to?(:path)
37
+ file_part(key, value, locals)
38
+ else
39
+ data_part(key, value, locals)
40
+ end
41
+ end
42
+ end
43
+
44
+ ##
45
+ # Returns the multipart request body
46
+ # @return [String]
47
+ def body
48
+ [*parts, "--#{@boundary}--\r\n"].inject(&:<<)
49
+ end
50
+
51
+ private
52
+
53
+ attr_reader :params
54
+
55
+ def attributes(file)
56
+ {
57
+ filename: File.basename(file.path).b,
58
+ content_type: LLM::Mime[file].b
59
+ }
60
+ end
61
+
62
+ def multipart_header(type:, locals:)
63
+ if type == :file
64
+ str = "".b
65
+ str << "--#{locals[:boundary]}" \
66
+ "\r\n" \
67
+ "Content-Disposition: form-data; name=\"#{locals[:key]}\";" \
68
+ "filename=\"#{locals[:filename]}\"" \
69
+ "\r\n" \
70
+ "Content-Type: #{locals[:content_type]}" \
71
+ "\r\n\r\n"
72
+ elsif type == :data
73
+ str = "".b
74
+ str << "--#{locals[:boundary]}" \
75
+ "\r\n" \
76
+ "Content-Disposition: form-data; name=\"#{locals[:key]}\"" \
77
+ "\r\n\r\n"
78
+ else
79
+ raise "unknown type: #{type}"
80
+ end
81
+ end
82
+
83
+ def file_part(key, file, locals)
84
+ locals = locals.merge(attributes(file))
85
+ multipart_header(type: :file, locals:).tap do
86
+ _1 << File.binread(file.path)
87
+ _1 << "\r\n"
88
+ end
89
+ end
90
+
91
+ def data_part(key, value, locals)
92
+ locals = locals.merge(value:)
93
+ multipart_header(type: :data, locals:).tap do
94
+ _1 << value.to_s
95
+ _1 << "\r\n"
96
+ end
97
+ end
98
+ end
data/lib/llm/provider.rb CHANGED
@@ -44,62 +44,139 @@ class LLM::Provider
44
44
  end
45
45
 
46
46
  ##
47
+ # Provides an embedding
47
48
  # @param [String, Array<String>] input
48
49
  # The input to embed
50
+ # @param [String] model
51
+ # The embedding model to use
52
+ # @param [Hash] params
53
+ # Other embedding parameters
49
54
  # @raise [NotImplementedError]
50
55
  # When the method is not implemented by a subclass
51
56
  # @return [LLM::Response::Embedding]
52
- def embed(input, **params)
57
+ def embed(input, model:, **params)
53
58
  raise NotImplementedError
54
59
  end
55
60
 
56
61
  ##
57
- # Completes a given prompt using the LLM
62
+ # Provides an interface to the chat completions API
58
63
  # @example
59
64
  # llm = LLM.openai(ENV["KEY"])
60
- # context = [
61
- # {role: "system", content: "Answer all of my questions"},
62
- # {role: "system", content: "Your name is Pablo, you are 25 years old and you are my amigo"},
65
+ # messages = [
66
+ # {role: "system", content: "Your task is to answer all of my questions"},
67
+ # {role: "system", content: "Your answers should be short and concise"},
63
68
  # ]
64
- # res = llm.complete "What is your name and what age are you?", :user, messages: context
69
+ # res = llm.complete("Hello. What is the answer to 5 + 2 ?", :user, messages:)
65
70
  # print "[#{res.choices[0].role}]", res.choices[0].content, "\n"
66
71
  # @param [String] prompt
67
72
  # The input prompt to be completed
68
73
  # @param [Symbol] role
69
74
  # The role of the prompt (e.g. :user, :system)
70
- # @param [Array<Hash, LLM::Message>] messages
71
- # The messages to include in the completion
75
+ # @param [String] model
76
+ # The model to use for the completion
77
+ # @param [Hash] params
78
+ # Other completion parameters
72
79
  # @raise [NotImplementedError]
73
80
  # When the method is not implemented by a subclass
74
81
  # @return [LLM::Response::Completion]
75
- def complete(prompt, role = :user, **params)
82
+ def complete(prompt, role = :user, model:, **params)
76
83
  raise NotImplementedError
77
84
  end
78
85
 
79
86
  ##
80
- # Starts a new lazy conversation
87
+ # Starts a new lazy chat powered by the chat completions API
88
+ # @note
89
+ # This method creates a lazy version of a
90
+ # {LLM::Chat LLM::Chat} object.
91
+ # @param prompt (see LLM::Provider#complete)
92
+ # @param role (see LLM::Provider#complete)
93
+ # @param model (see LLM::Provider#complete)
94
+ # @param [Hash] params
95
+ # Other completion parameters to maintain throughout a chat
96
+ # @raise (see LLM::Provider#complete)
97
+ # @return [LLM::Chat]
98
+ def chat(prompt, role = :user, model: nil, **params)
99
+ LLM::Chat.new(self, params).lazy.chat(prompt, role)
100
+ end
101
+
102
+ ##
103
+ # Starts a new chat powered by the chat completions API
104
+ # @note
105
+ # This method creates a non-lazy version of a
106
+ # {LLM::Chat LLM::Chat} object.
107
+ # @param prompt (see LLM::Provider#complete)
108
+ # @param role (see LLM::Provider#complete)
109
+ # @param model (see LLM::Provider#complete)
110
+ # @param [Hash] params
111
+ # Other completion parameters to maintain throughout a chat
112
+ # @raise (see LLM::Provider#complete)
113
+ # @return [LLM::Chat]
114
+ def chat!(prompt, role = :user, model: nil, **params)
115
+ LLM::Chat.new(self, params).chat(prompt, role)
116
+ end
117
+
118
+ ##
119
+ # Starts a new lazy chat powered by the responses API
81
120
  # @note
82
121
  # This method creates a lazy variant of a
83
- # {LLM::Conversation LLM::Conversation} object.
122
+ # {LLM::Chat LLM::Chat} object.
84
123
  # @param prompt (see LLM::Provider#complete)
85
124
  # @param role (see LLM::Provider#complete)
125
+ # @param model (see LLM::Provider#complete)
126
+ # @param [Hash] params
127
+ # Other completion parameters to maintain throughout a chat
86
128
  # @raise (see LLM::Provider#complete)
87
- # @return [LLM::LazyConversation]
88
- def chat(prompt, role = :user, **params)
89
- LLM::Conversation.new(self, params).lazy.chat(prompt, role)
129
+ # @return [LLM::Chat]
130
+ def respond(prompt, role = :user, model: nil, **params)
131
+ LLM::Chat.new(self, params).lazy.respond(prompt, role)
90
132
  end
91
133
 
92
134
  ##
93
- # Starts a new conversation
135
+ # Starts a new chat powered by the responses API
94
136
  # @note
95
137
  # This method creates a non-lazy variant of a
96
- # {LLM::Conversation LLM::Conversation} object.
138
+ # {LLM::Chat LLM::Chat} object.
97
139
  # @param prompt (see LLM::Provider#complete)
98
140
  # @param role (see LLM::Provider#complete)
141
+ # @param model (see LLM::Provider#complete)
142
+ # @param [Hash] params
143
+ # Other completion parameters to maintain throughout a chat
99
144
  # @raise (see LLM::Provider#complete)
100
- # @return [LLM::Conversation]
101
- def chat!(prompt, role = :user, **params)
102
- LLM::Conversation.new(self, params).chat(prompt, role)
145
+ # @return [LLM::Chat]
146
+ def respond!(prompt, role = :user, model: nil, **params)
147
+ LLM::Chat.new(self, params).respond(prompt, role)
148
+ end
149
+
150
+ ##
151
+ # @note
152
+ # Compared to the chat completions API, the responses API
153
+ # can require less bandwidth on each turn, maintain state
154
+ # server-side, and produce faster responses.
155
+ # @return [LLM::OpenAI::Responses]
156
+ # Returns an interface to the responses API
157
+ def responses
158
+ raise NotImplementedError
159
+ end
160
+
161
+ ##
162
+ # @return [LLM::OpenAI::Images, LLM::Gemini::Images]
163
+ # Returns an interface to the images API
164
+ def images
165
+ raise NotImplementedError
166
+ end
167
+
168
+ ##
169
+ # @return [LLM::OpenAI::Audio]
170
+ # Returns an interface to the audio API
171
+ def audio
172
+ raise NotImplementedError
173
+ end
174
+
175
+ ##
176
+ # @return [LLM::OpenAI::Files]
177
+ # Returns an interface to the files API
178
+ def files
179
+ raise NotImplementedError
103
180
  end
104
181
 
105
182
  ##
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Anthropic
4
+ ##
5
+ # @private
4
6
  class ErrorHandler
5
7
  ##
6
8
  # @return [Net::HTTPResponse]
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Anthropic
4
+ ##
5
+ # @private
4
6
  module Format
5
7
  ##
6
8
  # @param [Array<LLM::Message>] messages
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Anthropic
4
+ ##
5
+ # @private
4
6
  module ResponseParser
5
7
  def parse_embedding(body)
6
8
  {
@@ -19,7 +21,7 @@ class LLM::Anthropic
19
21
  model: body["model"],
20
22
  choices: body["content"].map do
21
23
  # TODO: don't hardcode role
22
- LLM::Message.new("assistant", _1["text"], {completion: self})
24
+ LLM::Message.new("assistant", _1["text"], {response: self})
23
25
  end,
24
26
  prompt_tokens: body.dig("usage", "input_tokens"),
25
27
  completion_tokens: body.dig("usage", "output_tokens")
@@ -24,21 +24,30 @@ module LLM
24
24
  # @param input (see LLM::Provider#embed)
25
25
  # @param [String] token
26
26
  # Valid token for the VoyageAI API
27
+ # @param [String] model
28
+ # The embedding model to use
27
29
  # @param [Hash] params
28
- # Additional parameters to pass to the API
30
+ # Other embedding parameters
31
+ # @raise (see LLM::HTTPClient#request)
29
32
  # @return (see LLM::Provider#embed)
30
- def embed(input, token:, **params)
33
+ def embed(input, token:, model: "voyage-2", **params)
31
34
  llm = LLM.voyageai(token)
32
- llm.embed(input, **params)
35
+ llm.embed(input, **params.merge(model:))
33
36
  end
34
37
 
35
38
  ##
39
+ # Provides an interface to the chat completions API
36
40
  # @see https://docs.anthropic.com/en/api/messages Anthropic docs
37
41
  # @param prompt (see LLM::Provider#complete)
38
42
  # @param role (see LLM::Provider#complete)
43
+ # @param model (see LLM::Provider#complete)
44
+ # @param max_tokens The maximum number of tokens to generate
45
+ # @param params (see LLM::Provider#complete)
46
+ # @example (see LLM::Provider#complete)
47
+ # @raise (see LLM::HTTPClient#request)
39
48
  # @return (see LLM::Provider#complete)
40
- def complete(prompt, role = :user, **params)
41
- params = {max_tokens: 1024, model: "claude-3-5-sonnet-20240620"}.merge!(params)
49
+ def complete(prompt, role = :user, model: "claude-3-5-sonnet-20240620", max_tokens: 1024, **params)
50
+ params = {max_tokens:, model:}.merge!(params)
42
51
  req = Net::HTTP::Post.new("/v1/messages", headers)
43
52
  messages = [*(params.delete(:messages) || []), Message.new(role, prompt)]
44
53
  req.body = JSON.dump({messages: format(messages)}.merge!(params))
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::Gemini
4
+ ##
5
+ # The {LLM::Gemini::Audio LLM::Gemini::Audio} class provides an audio
6
+ # object for interacting with [Gemini's audio API](https://ai.google.dev/gemini-api/docs/audio).
7
+ # @example
8
+ # #!/usr/bin/env ruby
9
+ # require "llm"
10
+ #
11
+ # llm = LLM.gemini(ENV["KEY"])
12
+ # res = llm.audio.create_transcription(input: LLM::File("/rocket.mp3"))
13
+ # res.text # => "A dog on a rocket to the moon"
14
+ class Audio
15
+ ##
16
+ # Returns a new Audio object
17
+ # @param provider [LLM::Provider]
18
+ # @return [LLM::Gemini::Responses]
19
+ def initialize(provider)
20
+ @provider = provider
21
+ end
22
+
23
+ ##
24
+ # @raise [NotImplementedError]
25
+ # This method is not implemented by Gemini
26
+ def create_speech
27
+ raise NotImplementedError
28
+ end
29
+
30
+ ##
31
+ # Create an audio transcription
32
+ # @example
33
+ # llm = LLM.gemini(ENV["KEY"])
34
+ # res = llm.audio.create_transcription(file: LLM::File("/rocket.mp3"))
35
+ # res.text # => "A dog on a rocket to the moon"
36
+ # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
37
+ # @param [LLM::File, LLM::Response::File] file The input audio
38
+ # @param [String] model The model to use
39
+ # @param [Hash] params Other parameters (see Gemini docs)
40
+ # @raise (see LLM::HTTPClient#request)
41
+ # @return [LLM::Response::AudioTranscription]
42
+ def create_transcription(file:, model: "gemini-1.5-flash", **params)
43
+ res = @provider.complete [
44
+ "Your task is to transcribe the contents of an audio file",
45
+ "Your response should include the transcription, and nothing else",
46
+ file
47
+ ], :user, model:, **params
48
+ LLM::Response::AudioTranscription
49
+ .new(res)
50
+ .tap { _1.text = res.choices[0].content }
51
+ end
52
+
53
+ ##
54
+ # Create an audio translation (in English)
55
+ # @example
56
+ # # Arabic => English
57
+ # llm = LLM.gemini(ENV["KEY"])
58
+ # res = llm.audio.create_translation(file: LLM::File("/bismillah.mp3"))
59
+ # res.text # => "In the name of Allah, the Beneficent, the Merciful."
60
+ # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
61
+ # @param [LLM::File, LLM::Response::File] file The input audio
62
+ # @param [String] model The model to use
63
+ # @param [Hash] params Other parameters (see Gemini docs)
64
+ # @raise (see LLM::HTTPClient#request)
65
+ # @return [LLM::Response::AudioTranslation]
66
+ def create_translation(file:, model: "gemini-1.5-flash", **params)
67
+ res = @provider.complete [
68
+ "Your task is to translate the contents of an audio file into English",
69
+ "Your response should include the translation, and nothing else",
70
+ file
71
+ ], :user, model:, **params
72
+ LLM::Response::AudioTranslation
73
+ .new(res)
74
+ .tap { _1.text = res.choices[0].content }
75
+ end
76
+ end
77
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Gemini
4
+ ##
5
+ # @private
4
6
  class ErrorHandler
5
7
  ##
6
8
  # @return [Net::HTTPResponse]