llm.rb 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +264 -110
  3. data/lib/llm/buffer.rb +83 -0
  4. data/lib/llm/chat.rb +131 -0
  5. data/lib/llm/file.rb +26 -40
  6. data/lib/llm/http_client.rb +10 -5
  7. data/lib/llm/message.rb +14 -8
  8. data/lib/llm/mime.rb +54 -0
  9. data/lib/llm/multipart.rb +98 -0
  10. data/lib/llm/provider.rb +116 -12
  11. data/lib/llm/providers/anthropic/error_handler.rb +2 -0
  12. data/lib/llm/providers/anthropic/format.rb +9 -1
  13. data/lib/llm/providers/anthropic/response_parser.rb +3 -1
  14. data/lib/llm/providers/anthropic.rb +14 -5
  15. data/lib/llm/providers/gemini/audio.rb +77 -0
  16. data/lib/llm/providers/gemini/error_handler.rb +2 -0
  17. data/lib/llm/providers/gemini/files.rb +160 -0
  18. data/lib/llm/providers/gemini/format.rb +19 -7
  19. data/lib/llm/providers/gemini/images.rb +99 -0
  20. data/lib/llm/providers/gemini/response_parser.rb +27 -1
  21. data/lib/llm/providers/gemini.rb +62 -6
  22. data/lib/llm/providers/ollama/error_handler.rb +2 -0
  23. data/lib/llm/providers/ollama/format.rb +18 -4
  24. data/lib/llm/providers/ollama/response_parser.rb +3 -1
  25. data/lib/llm/providers/ollama.rb +30 -7
  26. data/lib/llm/providers/openai/audio.rb +97 -0
  27. data/lib/llm/providers/openai/error_handler.rb +2 -0
  28. data/lib/llm/providers/openai/files.rb +148 -0
  29. data/lib/llm/providers/openai/format.rb +26 -7
  30. data/lib/llm/providers/openai/images.rb +109 -0
  31. data/lib/llm/providers/openai/response_parser.rb +58 -5
  32. data/lib/llm/providers/openai/responses.rb +78 -0
  33. data/lib/llm/providers/openai.rb +52 -6
  34. data/lib/llm/providers/voyageai.rb +2 -2
  35. data/lib/llm/response/audio.rb +13 -0
  36. data/lib/llm/response/audio_transcription.rb +14 -0
  37. data/lib/llm/response/audio_translation.rb +14 -0
  38. data/lib/llm/response/download_file.rb +15 -0
  39. data/lib/llm/response/file.rb +42 -0
  40. data/lib/llm/response/filelist.rb +18 -0
  41. data/lib/llm/response/image.rb +29 -0
  42. data/lib/llm/response/output.rb +56 -0
  43. data/lib/llm/response.rb +18 -6
  44. data/lib/llm/utils.rb +19 -0
  45. data/lib/llm/version.rb +1 -1
  46. data/lib/llm.rb +5 -2
  47. data/llm.gemspec +1 -6
  48. data/spec/anthropic/completion_spec.rb +1 -1
  49. data/spec/gemini/completion_spec.rb +22 -1
  50. data/spec/gemini/conversation_spec.rb +31 -0
  51. data/spec/gemini/files_spec.rb +124 -0
  52. data/spec/gemini/images_spec.rb +47 -0
  53. data/spec/llm/conversation_spec.rb +133 -1
  54. data/spec/ollama/completion_spec.rb +1 -1
  55. data/spec/ollama/conversation_spec.rb +31 -0
  56. data/spec/openai/audio_spec.rb +55 -0
  57. data/spec/openai/completion_spec.rb +22 -1
  58. data/spec/openai/files_spec.rb +150 -0
  59. data/spec/openai/images_spec.rb +95 -0
  60. data/spec/openai/responses_spec.rb +51 -0
  61. data/spec/setup.rb +8 -0
  62. metadata +31 -51
  63. data/LICENSE.txt +0 -21
  64. data/lib/llm/conversation.rb +0 -50
  65. data/lib/llm/lazy_conversation.rb +0 -51
  66. data/lib/llm/message_queue.rb +0 -47
  67. data/spec/llm/lazy_conversation_spec.rb +0 -92
data/lib/llm/file.rb CHANGED
@@ -1,45 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ ##
4
+ # The {LLM::File LLM::File} class represents a local file. It can
5
+ # be used as a prompt with certain providers (eg: Ollama, Gemini),
6
+ # and as an input with certain methods
3
7
  class LLM::File
4
- ##
5
- # @return [Hash]
6
- # Returns a hash of common file extensions and their
7
- # corresponding MIME types
8
- def self.mime_types
9
- @mime_types ||= {
10
- # Images
11
- ".png" => "image/png",
12
- ".jpg" => "image/jpeg",
13
- ".jpeg" => "image/jpeg",
14
- ".webp" => "image/webp",
15
-
16
- # Videos
17
- ".flv" => "video/x-flv",
18
- ".mov" => "video/quicktime",
19
- ".mpeg" => "video/mpeg",
20
- ".mpg" => "video/mpeg",
21
- ".mp4" => "video/mp4",
22
- ".webm" => "video/webm",
23
- ".wmv" => "video/x-ms-wmv",
24
- ".3gp" => "video/3gpp",
25
-
26
- # Audio
27
- ".aac" => "audio/aac",
28
- ".flac" => "audio/flac",
29
- ".mp3" => "audio/mpeg",
30
- ".m4a" => "audio/mp4",
31
- ".mpga" => "audio/mpeg",
32
- ".opus" => "audio/opus",
33
- ".pcm" => "audio/L16",
34
- ".wav" => "audio/wav",
35
- ".weba" => "audio/webm",
36
-
37
- # Documents
38
- ".pdf" => "application/pdf",
39
- ".txt" => "text/plain"
40
- }.freeze
41
- end
42
-
43
8
  ##
44
9
  # @return [String]
45
10
  # Returns the path to a file
@@ -53,7 +18,28 @@ class LLM::File
53
18
  # @return [String]
54
19
  # Returns the MIME type of the file
55
20
  def mime_type
56
- self.class.mime_types[File.extname(path)]
21
+ LLM::Mime[File.extname(path)]
22
+ end
23
+
24
+ ##
25
+ # @return [String]
26
+ # Returns true if the file is an image
27
+ def image?
28
+ mime_type.start_with?("image/")
29
+ end
30
+
31
+ ##
32
+ # @return [Integer]
33
+ # Returns the size of the file in bytes
34
+ def bytesize
35
+ File.size(path)
36
+ end
37
+
38
+ ##
39
+ # @return [String]
40
+ # Returns the file contents in base64
41
+ def to_b64
42
+ [File.binread(path)].pack("m0")
57
43
  end
58
44
  end
59
45
 
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LLM
4
+ ##
5
+ # @private
4
6
  module HTTPClient
5
7
  require "net/http"
6
8
  ##
@@ -9,6 +11,8 @@ module LLM
9
11
  # The HTTP object to use for the request
10
12
  # @param [Net::HTTPRequest] req
11
13
  # The request to send
14
+ # @param [Proc] b
15
+ # A block to yield the response to (optional)
12
16
  # @return [Net::HTTPResponse]
13
17
  # The response from the server
14
18
  # @raise [LLM::Error::Unauthorized]
@@ -19,11 +23,12 @@ module LLM
19
23
  # When any other unsuccessful status code is returned
20
24
  # @raise [SystemCallError]
21
25
  # When there is a network error at the operating system level
22
- def request(http, req)
23
- res = http.request(req)
24
- res.tap(&:value)
25
- rescue Net::HTTPClientException
26
- error_handler.new(res).raise_error!
26
+ def request(http, req, &b)
27
+ res = http.request(req, &b)
28
+ case res
29
+ when Net::HTTPOK then res
30
+ else error_handler.new(res).raise_error!
31
+ end
27
32
  end
28
33
  end
29
34
  end
data/lib/llm/message.rb CHANGED
@@ -3,18 +3,22 @@
3
3
  module LLM
4
4
  class Message
5
5
  ##
6
+ # Returns the role of the message
6
7
  # @return [Symbol]
7
8
  attr_reader :role
8
9
 
9
10
  ##
11
+ # Returns the content of the message
10
12
  # @return [String]
11
13
  attr_reader :content
12
14
 
13
15
  ##
16
+ # Returns extra context associated with the message
14
17
  # @return [Hash]
15
18
  attr_reader :extra
16
19
 
17
20
  ##
21
+ # Returns a new message
18
22
  # @param [Symbol] role
19
23
  # @param [String] content
20
24
  # @param [Hash] extra
@@ -26,23 +30,17 @@ module LLM
26
30
  end
27
31
 
28
32
  ##
29
- # @return [OpenStruct]
30
- def logprobs
31
- return nil unless extra.key?(:logprobs)
32
- OpenStruct.from_hash(extra[:logprobs])
33
- end
34
-
35
- ##
33
+ # Returns a hash representation of the message
36
34
  # @return [Hash]
37
35
  def to_h
38
36
  {role:, content:}
39
37
  end
40
38
 
41
39
  ##
40
+ # Returns true when two objects have the same role and content
42
41
  # @param [Object] other
43
42
  # The other object to compare
44
43
  # @return [Boolean]
45
- # Returns true when the "other" object has the same role and content
46
44
  def ==(other)
47
45
  if other.respond_to?(:to_h)
48
46
  to_h == other.to_h
@@ -51,5 +49,13 @@ module LLM
51
49
  end
52
50
  end
53
51
  alias_method :eql?, :==
52
+
53
+ ##
54
+ # Returns a string representation of the message
55
+ # @return [String]
56
+ def inspect
57
+ "#<#{self.class.name}:0x#{object_id.to_s(16)} " \
58
+ "role=#{role.inspect} content=#{content.inspect}>"
59
+ end
54
60
  end
55
61
  end
data/lib/llm/mime.rb ADDED
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # @private
5
+ class LLM::Mime
6
+ ##
7
+ # Lookup a mime type
8
+ # @return [String, nil]
9
+ def self.[](key)
10
+ if key.respond_to?(:path)
11
+ types[File.extname(key.path)]
12
+ else
13
+ types[key]
14
+ end
15
+ end
16
+
17
+ ##
18
+ # Returns a Hash of mime types
19
+ # @return [Hash]
20
+ def self.types
21
+ @types ||= {
22
+ # Images
23
+ ".png" => "image/png",
24
+ ".jpg" => "image/jpeg",
25
+ ".jpeg" => "image/jpeg",
26
+ ".webp" => "image/webp",
27
+
28
+ # Videos
29
+ ".flv" => "video/x-flv",
30
+ ".mov" => "video/quicktime",
31
+ ".mpeg" => "video/mpeg",
32
+ ".mpg" => "video/mpeg",
33
+ ".mp4" => "video/mp4",
34
+ ".webm" => "video/webm",
35
+ ".wmv" => "video/x-ms-wmv",
36
+ ".3gp" => "video/3gpp",
37
+
38
+ # Audio
39
+ ".aac" => "audio/aac",
40
+ ".flac" => "audio/flac",
41
+ ".mp3" => "audio/mpeg",
42
+ ".m4a" => "audio/mp4",
43
+ ".mpga" => "audio/mpeg",
44
+ ".opus" => "audio/opus",
45
+ ".pcm" => "audio/L16",
46
+ ".wav" => "audio/wav",
47
+ ".weba" => "audio/webm",
48
+
49
+ # Documents
50
+ ".pdf" => "application/pdf",
51
+ ".txt" => "text/plain"
52
+ }
53
+ end
54
+ end
@@ -0,0 +1,98 @@
1
+ # encoding: ascii-8bit
2
+ # frozen_string_literal: true
3
+
4
+ ##
5
+ # @private
6
+ class LLM::Multipart
7
+ require "llm"
8
+ require "securerandom"
9
+
10
+ ##
11
+ # @return [String]
12
+ attr_reader :boundary
13
+
14
+ ##
15
+ # @param [Hash] params
16
+ # Request parameters
17
+ # @return [LLM::Multipart]
18
+ def initialize(params)
19
+ @boundary = "BOUNDARY__#{SecureRandom.hex(16)}"
20
+ @params = params
21
+ end
22
+
23
+ ##
24
+ # Returns the multipart content type
25
+ # @return [String]
26
+ def content_type
27
+ "multipart/form-data; boundary=#{@boundary}"
28
+ end
29
+
30
+ ##
31
+ # Returns the multipart request body parts
32
+ # @return [Array<String>]
33
+ def parts
34
+ params.map do |key, value|
35
+ locals = {key: key.to_s.b, boundary: boundary.to_s.b}
36
+ if value.respond_to?(:path)
37
+ file_part(key, value, locals)
38
+ else
39
+ data_part(key, value, locals)
40
+ end
41
+ end
42
+ end
43
+
44
+ ##
45
+ # Returns the multipart request body
46
+ # @return [String]
47
+ def body
48
+ [*parts, "--#{@boundary}--\r\n"].inject(&:<<)
49
+ end
50
+
51
+ private
52
+
53
+ attr_reader :params
54
+
55
+ def attributes(file)
56
+ {
57
+ filename: File.basename(file.path).b,
58
+ content_type: LLM::Mime[file].b
59
+ }
60
+ end
61
+
62
+ def multipart_header(type:, locals:)
63
+ if type == :file
64
+ str = "".b
65
+ str << "--#{locals[:boundary]}" \
66
+ "\r\n" \
67
+ "Content-Disposition: form-data; name=\"#{locals[:key]}\";" \
68
+ "filename=\"#{locals[:filename]}\"" \
69
+ "\r\n" \
70
+ "Content-Type: #{locals[:content_type]}" \
71
+ "\r\n\r\n"
72
+ elsif type == :data
73
+ str = "".b
74
+ str << "--#{locals[:boundary]}" \
75
+ "\r\n" \
76
+ "Content-Disposition: form-data; name=\"#{locals[:key]}\"" \
77
+ "\r\n\r\n"
78
+ else
79
+ raise "unknown type: #{type}"
80
+ end
81
+ end
82
+
83
+ def file_part(key, file, locals)
84
+ locals = locals.merge(attributes(file))
85
+ multipart_header(type: :file, locals:).tap do
86
+ _1 << File.binread(file.path)
87
+ _1 << "\r\n"
88
+ end
89
+ end
90
+
91
+ def data_part(key, value, locals)
92
+ locals = locals.merge(value:)
93
+ multipart_header(type: :data, locals:).tap do
94
+ _1 << value.to_s
95
+ _1 << "\r\n"
96
+ end
97
+ end
98
+ end
data/lib/llm/provider.rb CHANGED
@@ -2,7 +2,18 @@
2
2
 
3
3
  ##
4
4
  # The Provider class represents an abstract class for
5
- # LLM (Language Model) providers
5
+ # LLM (Language Model) providers.
6
+ #
7
+ # @note
8
+ # This class is not meant to be instantiated directly.
9
+ # Instead, use one of the subclasses that implement
10
+ # the methods defined here.
11
+ #
12
+ # @abstract
13
+ # @see LLM::Provider::OpenAI
14
+ # @see LLM::Provider::Anthropic
15
+ # @see LLM::Provider::Gemini
16
+ # @see LLM::Provider::Ollama
6
17
  class LLM::Provider
7
18
  require_relative "http_client"
8
19
  include LLM::HTTPClient
@@ -33,46 +44,139 @@ class LLM::Provider
33
44
  end
34
45
 
35
46
  ##
47
+ # Provides an embedding
36
48
  # @param [String, Array<String>] input
37
49
  # The input to embed
50
+ # @param [String] model
51
+ # The embedding model to use
52
+ # @param [Hash] params
53
+ # Other embedding parameters
38
54
  # @raise [NotImplementedError]
39
55
  # When the method is not implemented by a subclass
40
56
  # @return [LLM::Response::Embedding]
41
- def embed(input, **params)
57
+ def embed(input, model:, **params)
42
58
  raise NotImplementedError
43
59
  end
44
60
 
45
61
  ##
46
- # Completes a given prompt using the LLM
62
+ # Provides an interface to the chat completions API
63
+ # @example
64
+ # llm = LLM.openai(ENV["KEY"])
65
+ # messages = [
66
+ # {role: "system", content: "Your task is to answer all of my questions"},
67
+ # {role: "system", content: "Your answers should be short and concise"},
68
+ # ]
69
+ # res = llm.complete("Hello. What is the answer to 5 + 2 ?", :user, messages:)
70
+ # print "[#{res.choices[0].role}]", res.choices[0].content, "\n"
47
71
  # @param [String] prompt
48
72
  # The input prompt to be completed
49
73
  # @param [Symbol] role
50
74
  # The role of the prompt (e.g. :user, :system)
75
+ # @param [String] model
76
+ # The model to use for the completion
77
+ # @param [Hash] params
78
+ # Other completion parameters
51
79
  # @raise [NotImplementedError]
52
80
  # When the method is not implemented by a subclass
53
81
  # @return [LLM::Response::Completion]
54
- def complete(prompt, role = :user, **params)
82
+ def complete(prompt, role = :user, model:, **params)
55
83
  raise NotImplementedError
56
84
  end
57
85
 
58
86
  ##
59
- # Starts a new lazy conversation
87
+ # Starts a new lazy chat powered by the chat completions API
88
+ # @note
89
+ # This method creates a lazy version of a
90
+ # {LLM::Chat LLM::Chat} object.
60
91
  # @param prompt (see LLM::Provider#complete)
61
92
  # @param role (see LLM::Provider#complete)
93
+ # @param model (see LLM::Provider#complete)
94
+ # @param [Hash] params
95
+ # Other completion parameters to maintain throughout a chat
62
96
  # @raise (see LLM::Provider#complete)
63
- # @return [LLM::LazyConversation]
64
- def chat(prompt, role = :user, **params)
65
- LLM::LazyConversation.new(self, params).chat(prompt, role)
97
+ # @return [LLM::Chat]
98
+ def chat(prompt, role = :user, model: nil, **params)
99
+ LLM::Chat.new(self, params).lazy.chat(prompt, role)
66
100
  end
67
101
 
68
102
  ##
69
- # Starts a new conversation
103
+ # Starts a new chat powered by the chat completions API
104
+ # @note
105
+ # This method creates a non-lazy version of a
106
+ # {LLM::Chat LLM::Chat} object.
70
107
  # @param prompt (see LLM::Provider#complete)
71
108
  # @param role (see LLM::Provider#complete)
109
+ # @param model (see LLM::Provider#complete)
110
+ # @param [Hash] params
111
+ # Other completion parameters to maintain throughout a chat
72
112
  # @raise (see LLM::Provider#complete)
73
- # @return [LLM::Conversation]
74
- def chat!(prompt, role = :user, **params)
75
- LLM::Conversation.new(self, params).chat(prompt, role)
113
+ # @return [LLM::Chat]
114
+ def chat!(prompt, role = :user, model: nil, **params)
115
+ LLM::Chat.new(self, params).chat(prompt, role)
116
+ end
117
+
118
+ ##
119
+ # Starts a new lazy chat powered by the responses API
120
+ # @note
121
+ # This method creates a lazy variant of a
122
+ # {LLM::Chat LLM::Chat} object.
123
+ # @param prompt (see LLM::Provider#complete)
124
+ # @param role (see LLM::Provider#complete)
125
+ # @param model (see LLM::Provider#complete)
126
+ # @param [Hash] params
127
+ # Other completion parameters to maintain throughout a chat
128
+ # @raise (see LLM::Provider#complete)
129
+ # @return [LLM::Chat]
130
+ def respond(prompt, role = :user, model: nil, **params)
131
+ LLM::Chat.new(self, params).lazy.respond(prompt, role)
132
+ end
133
+
134
+ ##
135
+ # Starts a new chat powered by the responses API
136
+ # @note
137
+ # This method creates a non-lazy variant of a
138
+ # {LLM::Chat LLM::Chat} object.
139
+ # @param prompt (see LLM::Provider#complete)
140
+ # @param role (see LLM::Provider#complete)
141
+ # @param model (see LLM::Provider#complete)
142
+ # @param [Hash] params
143
+ # Other completion parameters to maintain throughout a chat
144
+ # @raise (see LLM::Provider#complete)
145
+ # @return [LLM::Chat]
146
+ def respond!(prompt, role = :user, model: nil, **params)
147
+ LLM::Chat.new(self, params).respond(prompt, role)
148
+ end
149
+
150
+ ##
151
+ # @note
152
+ # Compared to the chat completions API, the responses API
153
+ # can require less bandwidth on each turn, maintain state
154
+ # server-side, and produce faster responses.
155
+ # @return [LLM::OpenAI::Responses]
156
+ # Returns an interface to the responses API
157
+ def responses
158
+ raise NotImplementedError
159
+ end
160
+
161
+ ##
162
+ # @return [LLM::OpenAI::Images, LLM::Gemini::Images]
163
+ # Returns an interface to the images API
164
+ def images
165
+ raise NotImplementedError
166
+ end
167
+
168
+ ##
169
+ # @return [LLM::OpenAI::Audio]
170
+ # Returns an interface to the audio API
171
+ def audio
172
+ raise NotImplementedError
173
+ end
174
+
175
+ ##
176
+ # @return [LLM::OpenAI::Files]
177
+ # Returns an interface to the files API
178
+ def files
179
+ raise NotImplementedError
76
180
  end
77
181
 
78
182
  ##
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Anthropic
4
+ ##
5
+ # @private
4
6
  class ErrorHandler
5
7
  ##
6
8
  # @return [Net::HTTPResponse]
@@ -1,13 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Anthropic
4
+ ##
5
+ # @private
4
6
  module Format
5
7
  ##
6
8
  # @param [Array<LLM::Message>] messages
7
9
  # The messages to format
8
10
  # @return [Array<Hash>]
9
11
  def format(messages)
10
- messages.map { {role: _1.role, content: format_content(_1.content)} }
12
+ messages.map do
13
+ if Hash === _1
14
+ {role: _1[:role], content: format_content(_1[:content])}
15
+ else
16
+ {role: _1.role, content: format_content(_1.content)}
17
+ end
18
+ end
11
19
  end
12
20
 
13
21
  private
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Anthropic
4
+ ##
5
+ # @private
4
6
  module ResponseParser
5
7
  def parse_embedding(body)
6
8
  {
@@ -19,7 +21,7 @@ class LLM::Anthropic
19
21
  model: body["model"],
20
22
  choices: body["content"].map do
21
23
  # TODO: don't hardcode role
22
- LLM::Message.new("assistant", _1["text"], {completion: self})
24
+ LLM::Message.new("assistant", _1["text"], {response: self})
23
25
  end,
24
26
  prompt_tokens: body.dig("usage", "input_tokens"),
25
27
  completion_tokens: body.dig("usage", "output_tokens")
@@ -24,21 +24,30 @@ module LLM
24
24
  # @param input (see LLM::Provider#embed)
25
25
  # @param [String] token
26
26
  # Valid token for the VoyageAI API
27
+ # @param [String] model
28
+ # The embedding model to use
27
29
  # @param [Hash] params
28
- # Additional parameters to pass to the API
30
+ # Other embedding parameters
31
+ # @raise (see LLM::HTTPClient#request)
29
32
  # @return (see LLM::Provider#embed)
30
- def embed(input, token:, **params)
33
+ def embed(input, token:, model: "voyage-2", **params)
31
34
  llm = LLM.voyageai(token)
32
- llm.embed(input, **params)
35
+ llm.embed(input, **params.merge(model:))
33
36
  end
34
37
 
35
38
  ##
39
+ # Provides an interface to the chat completions API
36
40
  # @see https://docs.anthropic.com/en/api/messages Anthropic docs
37
41
  # @param prompt (see LLM::Provider#complete)
38
42
  # @param role (see LLM::Provider#complete)
43
+ # @param model (see LLM::Provider#complete)
44
+ # @param max_tokens The maximum number of tokens to generate
45
+ # @param params (see LLM::Provider#complete)
46
+ # @example (see LLM::Provider#complete)
47
+ # @raise (see LLM::HTTPClient#request)
39
48
  # @return (see LLM::Provider#complete)
40
- def complete(prompt, role = :user, **params)
41
- params = {max_tokens: 1024, model: "claude-3-5-sonnet-20240620"}.merge!(params)
49
+ def complete(prompt, role = :user, model: "claude-3-5-sonnet-20240620", max_tokens: 1024, **params)
50
+ params = {max_tokens:, model:}.merge!(params)
42
51
  req = Net::HTTP::Post.new("/v1/messages", headers)
43
52
  messages = [*(params.delete(:messages) || []), Message.new(role, prompt)]
44
53
  req.body = JSON.dump({messages: format(messages)}.merge!(params))
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::Gemini
4
+ ##
5
+ # The {LLM::Gemini::Audio LLM::Gemini::Audio} class provides an audio
6
+ # object for interacting with [Gemini's audio API](https://ai.google.dev/gemini-api/docs/audio).
7
+ # @example
8
+ # #!/usr/bin/env ruby
9
+ # require "llm"
10
+ #
11
+ # llm = LLM.gemini(ENV["KEY"])
12
+ # res = llm.audio.create_transcription(input: LLM::File("/rocket.mp3"))
13
+ # res.text # => "A dog on a rocket to the moon"
14
+ class Audio
15
+ ##
16
+ # Returns a new Audio object
17
+ # @param provider [LLM::Provider]
18
+ # @return [LLM::Gemini::Responses]
19
+ def initialize(provider)
20
+ @provider = provider
21
+ end
22
+
23
+ ##
24
+ # @raise [NotImplementedError]
25
+ # This method is not implemented by Gemini
26
+ def create_speech
27
+ raise NotImplementedError
28
+ end
29
+
30
+ ##
31
+ # Create an audio transcription
32
+ # @example
33
+ # llm = LLM.gemini(ENV["KEY"])
34
+ # res = llm.audio.create_transcription(file: LLM::File("/rocket.mp3"))
35
+ # res.text # => "A dog on a rocket to the moon"
36
+ # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
37
+ # @param [LLM::File, LLM::Response::File] file The input audio
38
+ # @param [String] model The model to use
39
+ # @param [Hash] params Other parameters (see Gemini docs)
40
+ # @raise (see LLM::HTTPClient#request)
41
+ # @return [LLM::Response::AudioTranscription]
42
+ def create_transcription(file:, model: "gemini-1.5-flash", **params)
43
+ res = @provider.complete [
44
+ "Your task is to transcribe the contents of an audio file",
45
+ "Your response should include the transcription, and nothing else",
46
+ file
47
+ ], :user, model:, **params
48
+ LLM::Response::AudioTranscription
49
+ .new(res)
50
+ .tap { _1.text = res.choices[0].content }
51
+ end
52
+
53
+ ##
54
+ # Create an audio translation (in English)
55
+ # @example
56
+ # # Arabic => English
57
+ # llm = LLM.gemini(ENV["KEY"])
58
+ # res = llm.audio.create_translation(file: LLM::File("/bismillah.mp3"))
59
+ # res.text # => "In the name of Allah, the Beneficent, the Merciful."
60
+ # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
61
+ # @param [LLM::File, LLM::Response::File] file The input audio
62
+ # @param [String] model The model to use
63
+ # @param [Hash] params Other parameters (see Gemini docs)
64
+ # @raise (see LLM::HTTPClient#request)
65
+ # @return [LLM::Response::AudioTranslation]
66
+ def create_translation(file:, model: "gemini-1.5-flash", **params)
67
+ res = @provider.complete [
68
+ "Your task is to translate the contents of an audio file into English",
69
+ "Your response should include the translation, and nothing else",
70
+ file
71
+ ], :user, model:, **params
72
+ LLM::Response::AudioTranslation
73
+ .new(res)
74
+ .tap { _1.text = res.choices[0].content }
75
+ end
76
+ end
77
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Gemini
4
+ ##
5
+ # @private
4
6
  class ErrorHandler
5
7
  ##
6
8
  # @return [Net::HTTPResponse]