llm.rb 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +264 -110
  3. data/lib/llm/buffer.rb +83 -0
  4. data/lib/llm/chat.rb +131 -0
  5. data/lib/llm/file.rb +26 -40
  6. data/lib/llm/http_client.rb +10 -5
  7. data/lib/llm/message.rb +14 -8
  8. data/lib/llm/mime.rb +54 -0
  9. data/lib/llm/multipart.rb +98 -0
  10. data/lib/llm/provider.rb +96 -19
  11. data/lib/llm/providers/anthropic/error_handler.rb +2 -0
  12. data/lib/llm/providers/anthropic/format.rb +2 -0
  13. data/lib/llm/providers/anthropic/response_parser.rb +3 -1
  14. data/lib/llm/providers/anthropic.rb +14 -5
  15. data/lib/llm/providers/gemini/audio.rb +77 -0
  16. data/lib/llm/providers/gemini/error_handler.rb +2 -0
  17. data/lib/llm/providers/gemini/files.rb +160 -0
  18. data/lib/llm/providers/gemini/format.rb +12 -6
  19. data/lib/llm/providers/gemini/images.rb +99 -0
  20. data/lib/llm/providers/gemini/response_parser.rb +27 -1
  21. data/lib/llm/providers/gemini.rb +62 -6
  22. data/lib/llm/providers/ollama/error_handler.rb +2 -0
  23. data/lib/llm/providers/ollama/format.rb +13 -5
  24. data/lib/llm/providers/ollama/response_parser.rb +3 -1
  25. data/lib/llm/providers/ollama.rb +30 -7
  26. data/lib/llm/providers/openai/audio.rb +97 -0
  27. data/lib/llm/providers/openai/error_handler.rb +2 -0
  28. data/lib/llm/providers/openai/files.rb +148 -0
  29. data/lib/llm/providers/openai/format.rb +21 -8
  30. data/lib/llm/providers/openai/images.rb +109 -0
  31. data/lib/llm/providers/openai/response_parser.rb +58 -5
  32. data/lib/llm/providers/openai/responses.rb +78 -0
  33. data/lib/llm/providers/openai.rb +52 -6
  34. data/lib/llm/providers/voyageai.rb +2 -2
  35. data/lib/llm/response/audio.rb +13 -0
  36. data/lib/llm/response/audio_transcription.rb +14 -0
  37. data/lib/llm/response/audio_translation.rb +14 -0
  38. data/lib/llm/response/download_file.rb +15 -0
  39. data/lib/llm/response/file.rb +42 -0
  40. data/lib/llm/response/filelist.rb +18 -0
  41. data/lib/llm/response/image.rb +29 -0
  42. data/lib/llm/response/output.rb +56 -0
  43. data/lib/llm/response.rb +18 -6
  44. data/lib/llm/utils.rb +19 -0
  45. data/lib/llm/version.rb +1 -1
  46. data/lib/llm.rb +5 -2
  47. data/llm.gemspec +1 -6
  48. data/spec/anthropic/completion_spec.rb +1 -1
  49. data/spec/gemini/completion_spec.rb +1 -1
  50. data/spec/gemini/conversation_spec.rb +31 -0
  51. data/spec/gemini/files_spec.rb +124 -0
  52. data/spec/gemini/images_spec.rb +47 -0
  53. data/spec/llm/conversation_spec.rb +101 -61
  54. data/spec/ollama/completion_spec.rb +1 -1
  55. data/spec/ollama/conversation_spec.rb +31 -0
  56. data/spec/openai/audio_spec.rb +55 -0
  57. data/spec/openai/completion_spec.rb +1 -1
  58. data/spec/openai/files_spec.rb +150 -0
  59. data/spec/openai/images_spec.rb +95 -0
  60. data/spec/openai/responses_spec.rb +51 -0
  61. data/spec/setup.rb +8 -0
  62. metadata +31 -49
  63. data/LICENSE.txt +0 -21
  64. data/lib/llm/conversation.rb +0 -90
  65. data/lib/llm/message_queue.rb +0 -54
@@ -0,0 +1,160 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::Gemini
4
+ ##
5
+ # The {LLM::Gemini::Files LLM::Gemini::Files} class provides a files
6
+ # object for interacting with [Gemini's Files API](https://ai.google.dev/gemini-api/docs/files).
7
+ # The files API allows a client to reference media files in prompts
8
+ # where they can be referenced by their URL.
9
+ #
10
+ # The files API is intended to preserve bandwidth and latency,
11
+ # especially for large files but it can be helpful for smaller files
12
+ # as well because it does not require the client to include a file
13
+ # in the prompt over and over again (which could be the case in a
14
+ # multi-turn conversation).
15
+ #
16
+ # @example
17
+ # #!/usr/bin/env ruby
18
+ # require "llm"
19
+ #
20
+ # llm = LLM.gemini(ENV["KEY"])
21
+ # file = llm.files.create file: LLM::File("/audio/haiku.mp3")
22
+ # bot = LLM::Chat.new(llm).lazy
23
+ # bot.chat(file)
24
+ # bot.chat("Describe the audio file I sent to you")
25
+ # bot.chat("The audio file is the first message I sent to you.")
26
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
27
+ # @example
28
+ # #!/usr/bin/env ruby
29
+ # require "llm"
30
+ #
31
+ # llm = LLM.gemini(ENV["KEY"])
32
+ # file = llm.files.create file: LLM::File("/audio/haiku.mp3")
33
+ # bot = LLM::Chat.new(llm).lazy
34
+ # bot.chat(["Describe the audio file I sent to you", file])
35
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
36
+ class Files
37
+ ##
38
+ # Returns a new Files object
39
+ # @param provider [LLM::Provider]
40
+ # @return [LLM::Gemini::Files]
41
+ def initialize(provider)
42
+ @provider = provider
43
+ end
44
+
45
+ ##
46
+ # List all files
47
+ # @example
48
+ # llm = LLM.gemini(ENV["KEY"])
49
+ # res = llm.files.all
50
+ # res.each do |file|
51
+ # print "name: ", file.name, "\n"
52
+ # end
53
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
54
+ # @param [Hash] params Other parameters (see Gemini docs)
55
+ # @raise (see LLM::HTTPClient#request)
56
+ # @return [LLM::Response::FileList]
57
+ def all(**params)
58
+ query = URI.encode_www_form(params.merge!(key: secret))
59
+ req = Net::HTTP::Get.new("/v1beta/files?#{query}", headers)
60
+ res = request(http, req)
61
+ LLM::Response::FileList.new(res).tap { |filelist|
62
+ files = filelist.body["files"]&.map do |file|
63
+ file = file.transform_keys { snakecase(_1) }
64
+ OpenStruct.from_hash(file)
65
+ end || []
66
+ filelist.files = files
67
+ }
68
+ end
69
+
70
+ ##
71
+ # Create a file
72
+ # @example
73
+ # llm = LLM.gemini(ENV["KEY"])
74
+ # res = llm.files.create file: LLM::File("/audio/haiku.mp3"),
75
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
76
+ # @param [File] file The file
77
+ # @param [Hash] params Other parameters (see Gemini docs)
78
+ # @raise (see LLM::HTTPClient#request)
79
+ # @return [LLM::Response::File]
80
+ def create(file:, **params)
81
+ req = Net::HTTP::Post.new(request_upload_url(file:), {})
82
+ req["content-length"] = file.bytesize
83
+ req["X-Goog-Upload-Offset"] = 0
84
+ req["X-Goog-Upload-Command"] = "upload, finalize"
85
+ req.body = File.binread(file.path)
86
+ res = request(http, req)
87
+ LLM::Response::File.new(res)
88
+ end
89
+
90
+ ##
91
+ # Get a file
92
+ # @example
93
+ # llm = LLM.gemini(ENV["KEY"])
94
+ # res = llm.files.get(file: "files/1234567890")
95
+ # print "name: ", res.name, "\n"
96
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
97
+ # @param [#name, String] file The file to get
98
+ # @param [Hash] params Other parameters (see Gemini docs)
99
+ # @raise (see LLM::HTTPClient#request)
100
+ # @return [LLM::Response::File]
101
+ def get(file:, **params)
102
+ file_id = file.respond_to?(:name) ? file.name : file.to_s
103
+ query = URI.encode_www_form(params.merge!(key: secret))
104
+ req = Net::HTTP::Get.new("/v1beta/#{file_id}?#{query}", headers)
105
+ res = request(http, req)
106
+ LLM::Response::File.new(res)
107
+ end
108
+
109
+ ##
110
+ # Delete a file
111
+ # @example
112
+ # llm = LLM.gemini(ENV["KEY"])
113
+ # res = llm.files.delete(file: "files/1234567890")
114
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
115
+ # @param [#name, String] file The file to delete
116
+ # @param [Hash] params Other parameters (see Gemini docs)
117
+ # @raise (see LLM::HTTPClient#request)
118
+ # @return [LLM::Response::File]
119
+ def delete(file:, **params)
120
+ file_id = file.respond_to?(:name) ? file.name : file.to_s
121
+ query = URI.encode_www_form(params.merge!(key: secret))
122
+ req = Net::HTTP::Delete.new("/v1beta/#{file_id}?#{query}", headers)
123
+ request(http, req)
124
+ end
125
+
126
+ ##
127
+ # @raise [NotImplementedError]
128
+ # This method is not implemented by Gemini
129
+ def download
130
+ raise NotImplementedError
131
+ end
132
+
133
+ private
134
+
135
+ include LLM::Utils
136
+
137
+ def request_upload_url(file:)
138
+ req = Net::HTTP::Post.new("/upload/v1beta/files?key=#{secret}", headers)
139
+ req["X-Goog-Upload-Protocol"] = "resumable"
140
+ req["X-Goog-Upload-Command"] = "start"
141
+ req["X-Goog-Upload-Header-Content-Length"] = file.bytesize
142
+ req["X-Goog-Upload-Header-Content-Type"] = file.mime_type
143
+ req.body = JSON.dump(file: {display_name: File.basename(file.path)})
144
+ res = request(http, req)
145
+ res["x-goog-upload-url"]
146
+ end
147
+
148
+ def http
149
+ @provider.instance_variable_get(:@http)
150
+ end
151
+
152
+ def secret
153
+ @provider.instance_variable_get(:@secret)
154
+ end
155
+
156
+ [:headers, :request].each do |m|
157
+ define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
158
+ end
159
+ end
160
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Gemini
4
+ ##
5
+ # @private
4
6
  module Format
5
7
  ##
6
8
  # @param [Array<LLM::Message>] messages
@@ -19,16 +21,20 @@ class LLM::Gemini
19
21
  private
20
22
 
21
23
  ##
22
- # @param [String, LLM::File] content
24
+ # @param [String, Array, LLM::Response::File, LLM::File] content
23
25
  # The content to format
24
- # @return [String, Hash]
26
+ # @return [Hash]
25
27
  # The formatted content
26
28
  def format_content(content)
27
- if LLM::File === content
29
+ case content
30
+ when Array
31
+ content.map { format_content(_1) }
32
+ when LLM::Response::File
28
33
  file = content
29
- {
30
- inline_data: {mime_type: file.mime_type, data: [File.binread(file.path)].pack("m0")}
31
- }
34
+ {file_data: {mime_type: file.mime_type, file_uri: file.uri}}
35
+ when LLM::File
36
+ file = content
37
+ {inline_data: {mime_type: file.mime_type, data: file.to_b64}}
32
38
  else
33
39
  {text: content}
34
40
  end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::Gemini
4
+ ##
5
+ # The {LLM::Gemini::Images LLM::Gemini::Images} class provides an images
6
+ # object for interacting with [Gemini's images API](https://ai.google.dev/gemini-api/docs/image-generation).
7
+ # Please note that unlike OpenAI, which can return either URLs or base64-encoded strings,
8
+ # Gemini's images API will always return an image as a base64 encoded string that
9
+ # can be decoded into binary.
10
+ # @example
11
+ # #!/usr/bin/env ruby
12
+ # require "llm"
13
+ #
14
+ # llm = LLM.gemini(ENV["KEY"])
15
+ # res = llm.images.create prompt: "A dog on a rocket to the moon"
16
+ # File.binwrite "rocket.png", res.images[0].binary
17
+ class Images
18
+ include Format
19
+
20
+ ##
21
+ # Returns a new Images object
22
+ # @param provider [LLM::Provider]
23
+ # @return [LLM::Gemini::Responses]
24
+ def initialize(provider)
25
+ @provider = provider
26
+ end
27
+
28
+ ##
29
+ # Create an image
30
+ # @example
31
+ # llm = LLM.gemini(ENV["KEY"])
32
+ # res = llm.images.create prompt: "A dog on a rocket to the moon"
33
+ # File.binwrite "rocket.png", res.images[0].binary
34
+ # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
35
+ # @param [String] prompt The prompt
36
+ # @param [Hash] params Other parameters (see Gemini docs)
37
+ # @raise (see LLM::HTTPClient#request)
38
+ # @note
39
+ # The prompt should make it clear you want to generate an image, or you
40
+ # might unexpectedly receive a purely textual response. This is due to how
41
+ # Gemini implements image generation under the hood.
42
+ # @return [LLM::Response::Image]
43
+ def create(prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
44
+ req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
45
+ req.body = JSON.dump({
46
+ contents: [{parts: {text: prompt}}],
47
+ generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
48
+ }.merge!(params))
49
+ res = request(http, req)
50
+ LLM::Response::Image.new(res).extend(response_parser)
51
+ end
52
+
53
+ ##
54
+ # Edit an image
55
+ # @example
56
+ # llm = LLM.gemini(ENV["KEY"])
57
+ # res = llm.images.edit image: LLM::File("cat.png"), prompt: "Add a hat to the cat"
58
+ # File.binwrite "hatoncat.png", res.images[0].binary
59
+ # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
60
+ # @param [LLM::File] image The image to edit
61
+ # @param [String] prompt The prompt
62
+ # @param [Hash] params Other parameters (see Gemini docs)
63
+ # @raise (see LLM::HTTPClient#request)
64
+ # @note (see LLM::Gemini::Images#create)
65
+ # @return [LLM::Response::Image]
66
+ def edit(image:, prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
67
+ req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
68
+ req.body = JSON.dump({
69
+ contents: [
70
+ {parts: [{text: prompt}, format_content(image)]}
71
+ ],
72
+ generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
73
+ }.merge!(params))
74
+ res = request(http, req)
75
+ LLM::Response::Image.new(res).extend(response_parser)
76
+ end
77
+
78
+ ##
79
+ # @raise [NotImplementedError]
80
+ # This method is not implemented by Gemini
81
+ def create_variation
82
+ raise NotImplementedError
83
+ end
84
+
85
+ private
86
+
87
+ def secret
88
+ @provider.instance_variable_get(:@secret)
89
+ end
90
+
91
+ def http
92
+ @provider.instance_variable_get(:@http)
93
+ end
94
+
95
+ [:response_parser, :headers, :request].each do |m|
96
+ define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
97
+ end
98
+ end
99
+ end
@@ -1,7 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Gemini
4
+ ##
5
+ # @private
4
6
  module ResponseParser
7
+ ##
8
+ # @param [Hash] body
9
+ # The response body from the LLM provider
10
+ # @return [Hash]
5
11
  def parse_embedding(body)
6
12
  {
7
13
  model: "text-embedding-004",
@@ -20,12 +26,32 @@ class LLM::Gemini
20
26
  LLM::Message.new(
21
27
  _1.dig("content", "role"),
22
28
  _1.dig("content", "parts", 0, "text"),
23
- {completion: self}
29
+ {response: self}
24
30
  )
25
31
  end,
26
32
  prompt_tokens: body.dig("usageMetadata", "promptTokenCount"),
27
33
  completion_tokens: body.dig("usageMetadata", "candidatesTokenCount")
28
34
  }
29
35
  end
36
+
37
+ ##
38
+ # @param [Hash] body
39
+ # The response body from the LLM provider
40
+ # @return [Hash]
41
+ def parse_image(body)
42
+ {
43
+ urls: [],
44
+ images: body["candidates"].flat_map do |candidate|
45
+ candidate["content"]["parts"].filter_map do
46
+ next unless _1.dig("inlineData", "data")
47
+ OpenStruct.from_hash(
48
+ mime_type: _1["inlineData"]["mimeType"],
49
+ encoded: _1["inlineData"]["data"],
50
+ binary: _1["inlineData"]["data"].unpack1("m0")
51
+ )
52
+ end
53
+ end
54
+ }
55
+ end
30
56
  end
31
57
  end
@@ -3,11 +3,37 @@
3
3
  module LLM
4
4
  ##
5
5
  # The Gemini class implements a provider for
6
- # [Gemini](https://ai.google.dev/)
6
+ # [Gemini](https://ai.google.dev/).
7
+ #
8
+ # The Gemini provider can accept multiple inputs (text, images,
9
+ # audio, and video). The inputs can be provided inline via the
10
+ # prompt for files under 20MB or via the Gemini Files API for
11
+ # files that are over 20MB
12
+ #
13
+ # @example
14
+ # #!/usr/bin/env ruby
15
+ # require "llm"
16
+ #
17
+ # llm = LLM.gemini(ENV["KEY"])
18
+ # bot = LLM::Chat.new(llm).lazy
19
+ # bot.chat LLM::File("/images/capybara.png")
20
+ # bot.chat "Describe the image"
21
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
22
+ # @example
23
+ # #!/usr/bin/env ruby
24
+ # require "llm"
25
+ #
26
+ # llm = LLM.gemini(ENV["KEY"])
27
+ # bot = LLM::Chat.new(llm).lazy
28
+ # bot.chat ["Describe the image", LLM::File("/images/capybara.png")]
29
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
7
30
  class Gemini < Provider
8
31
  require_relative "gemini/error_handler"
9
32
  require_relative "gemini/response_parser"
10
33
  require_relative "gemini/format"
34
+ require_relative "gemini/images"
35
+ require_relative "gemini/files"
36
+ require_relative "gemini/audio"
11
37
  include Format
12
38
 
13
39
  HOST = "generativelanguage.googleapis.com"
@@ -19,10 +45,14 @@ module LLM
19
45
  end
20
46
 
21
47
  ##
48
+ # Provides an embedding
22
49
  # @param input (see LLM::Provider#embed)
50
+ # @param model (see LLM::Provider#embed)
51
+ # @param params (see LLM::Provider#embed)
52
+ # @raise (see LLM::HTTPClient#request)
23
53
  # @return (see LLM::Provider#embed)
24
- def embed(input, **params)
25
- path = ["/v1beta/models/text-embedding-004", "embedContent?key=#{@secret}"].join(":")
54
+ def embed(input, model: "text-embedding-004", **params)
55
+ path = ["/v1beta/models/#{model}", "embedContent?key=#{@secret}"].join(":")
26
56
  req = Net::HTTP::Post.new(path, headers)
27
57
  req.body = JSON.dump({content: {parts: [{text: input}]}})
28
58
  res = request(@http, req)
@@ -30,13 +60,17 @@ module LLM
30
60
  end
31
61
 
32
62
  ##
63
+ # Provides an interface to the chat completions API
33
64
  # @see https://ai.google.dev/api/generate-content#v1beta.models.generateContent Gemini docs
34
65
  # @param prompt (see LLM::Provider#complete)
35
66
  # @param role (see LLM::Provider#complete)
67
+ # @param model (see LLM::Provider#complete)
68
+ # @param params (see LLM::Provider#complete)
69
+ # @example (see LLM::Provider#complete)
70
+ # @raise (see LLM::HTTPClient#request)
36
71
  # @return (see LLM::Provider#complete)
37
- def complete(prompt, role = :user, **params)
38
- params = {model: "gemini-1.5-flash"}.merge!(params)
39
- path = ["/v1beta/models/#{params.delete(:model)}", "generateContent?key=#{@secret}"].join(":")
72
+ def complete(prompt, role = :user, model: "gemini-1.5-flash", **params)
73
+ path = ["/v1beta/models/#{model}", "generateContent?key=#{@secret}"].join(":")
40
74
  req = Net::HTTP::Post.new(path, headers)
41
75
  messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
42
76
  req.body = JSON.dump({contents: format(messages)})
@@ -44,6 +78,28 @@ module LLM
44
78
  Response::Completion.new(res).extend(response_parser)
45
79
  end
46
80
 
81
+ ##
82
+ # Provides an interface to Gemini's audio API
83
+ # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
84
+ def audio
85
+ LLM::Gemini::Audio.new(self)
86
+ end
87
+
88
+ ##
89
+ # Provides an interface to Gemini's image generation API
90
+ # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
91
+ # @return [see LLM::Gemini::Images]
92
+ def images
93
+ LLM::Gemini::Images.new(self)
94
+ end
95
+
96
+ ##
97
+ # Provides an interface to Gemini's file management API
98
+ # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
99
+ def files
100
+ LLM::Gemini::Files.new(self)
101
+ end
102
+
47
103
  ##
48
104
  # @return (see LLM::Provider#assistant_role)
49
105
  def assistant_role
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Ollama
4
+ ##
5
+ # @private
4
6
  class ErrorHandler
5
7
  ##
6
8
  # @return [Net::HTTPResponse]
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Ollama
4
+ ##
5
+ # @private
4
6
  module Format
5
7
  ##
6
8
  # @param [Array<LLM::Message>] messages
@@ -9,9 +11,11 @@ class LLM::Ollama
9
11
  def format(messages)
10
12
  messages.map do
11
13
  if Hash === _1
12
- {role: _1[:role], content: format_content(_1[:content])}
14
+ {role: _1[:role]}
15
+ .merge!(_1)
16
+ .merge!(format_content(_1[:content]))
13
17
  else
14
- {role: _1.role, content: format_content(_1.content)}
18
+ {role: _1.role}.merge! format_content(_1.content)
15
19
  end
16
20
  end
17
21
  end
@@ -24,10 +28,14 @@ class LLM::Ollama
24
28
  # @return [String, Hash]
25
29
  # The formatted content
26
30
  def format_content(content)
27
- if URI === content
28
- [{type: :image_url, image_url: {url: content.to_s}}]
31
+ if LLM::File === content
32
+ if content.image?
33
+ {content: "This message has an image associated with it", images: [content.to_b64]}
34
+ else
35
+ raise TypeError, "'#{content.path}' was not recognized as an image file."
36
+ end
29
37
  else
30
- content
38
+ {content:}
31
39
  end
32
40
  end
33
41
  end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Ollama
4
+ ##
5
+ # @private
4
6
  module ResponseParser
5
7
  ##
6
8
  # @param [Hash] body
@@ -22,7 +24,7 @@ class LLM::Ollama
22
24
  def parse_completion(body)
23
25
  {
24
26
  model: body["model"],
25
- choices: [LLM::Message.new(*body["message"].values_at("role", "content"), {completion: self})],
27
+ choices: [LLM::Message.new(*body["message"].values_at("role", "content"), {response: self})],
26
28
  prompt_tokens: body.dig("prompt_eval_count"),
27
29
  completion_tokens: body.dig("eval_count")
28
30
  }
@@ -2,8 +2,22 @@
2
2
 
3
3
  module LLM
4
4
  ##
5
- # The Ollama class implements a provider for
6
- # [Ollama](https://ollama.ai/)
5
+ # The Ollama class implements a provider for [Ollama](https://ollama.ai/).
6
+ #
7
+ # This provider supports a wide range of models, it is relatively
8
+ # straight forward to run on your own hardware, and includes multi-modal
9
+ # models that can process images and text. See the example for a demonstration
10
+ # of a multi-modal model by the name `llava`
11
+ #
12
+ # @example
13
+ # #!/usr/bin/env ruby
14
+ # require "llm"
15
+ #
16
+ # llm = LLM.ollama(nil)
17
+ # bot = LLM::Chat.new(llm, model: "llava").lazy
18
+ # bot.chat LLM::File("/images/capybara.png")
19
+ # bot.chat "Describe the image"
20
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
7
21
  class Ollama < Provider
8
22
  require_relative "ollama/error_handler"
9
23
  require_relative "ollama/response_parser"
@@ -19,10 +33,14 @@ module LLM
19
33
  end
20
34
 
21
35
  ##
36
+ # Provides an embedding
22
37
  # @param input (see LLM::Provider#embed)
38
+ # @param model (see LLM::Provider#embed)
39
+ # @param params (see LLM::Provider#embed)
40
+ # @raise (see LLM::HTTPClient#request)
23
41
  # @return (see LLM::Provider#embed)
24
- def embed(input, **params)
25
- params = {model: "llama3.2"}.merge!(params)
42
+ def embed(input, model: "llama3.2", **params)
43
+ params = {model:}.merge!(params)
26
44
  req = Net::HTTP::Post.new("/v1/embeddings", headers)
27
45
  req.body = JSON.dump({input:}.merge!(params))
28
46
  res = request(@http, req)
@@ -30,15 +48,20 @@ module LLM
30
48
  end
31
49
 
32
50
  ##
51
+ # Provides an interface to the chat completions API
33
52
  # @see https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion Ollama docs
34
53
  # @param prompt (see LLM::Provider#complete)
35
54
  # @param role (see LLM::Provider#complete)
55
+ # @param model (see LLM::Provider#complete)
56
+ # @param params (see LLM::Provider#complete)
57
+ # @example (see LLM::Provider#complete)
58
+ # @raise (see LLM::HTTPClient#request)
36
59
  # @return (see LLM::Provider#complete)
37
- def complete(prompt, role = :user, **params)
38
- params = {model: "llama3.2", stream: false}.merge!(params)
60
+ def complete(prompt, role = :user, model: "llama3.2", **params)
61
+ params = {model:, stream: false}.merge!(params)
39
62
  req = Net::HTTP::Post.new("/api/chat", headers)
40
63
  messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
41
- req.body = JSON.dump({messages: messages.map(&:to_h)}.merge!(params))
64
+ req.body = JSON.dump({messages: format(messages)}.merge!(params))
42
65
  res = request(@http, req)
43
66
  Response::Completion.new(res).extend(response_parser)
44
67
  end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::OpenAI
4
+ ##
5
+ # The {LLM::OpenAI::Audio LLM::OpenAI::Audio} class provides an audio
6
+ # object for interacting with [OpenAI's audio API](https://platform.openai.com/docs/api-reference/audio/createSpeech).
7
+ # @example
8
+ # llm = LLM.openai(ENV["KEY"])
9
+ # res = llm.audio.create_speech(input: "A dog on a rocket to the moon")
10
+ # File.binwrite("rocket.mp3", res.audio.string)
11
+ class Audio
12
+ require "stringio"
13
+
14
+ ##
15
+ # Returns a new Audio object
16
+ # @param provider [LLM::Provider]
17
+ # @return [LLM::OpenAI::Responses]
18
+ def initialize(provider)
19
+ @provider = provider
20
+ end
21
+
22
+ ##
23
+ # Create an audio track
24
+ # @example
25
+ # llm = LLM.openai(ENV["KEY"])
26
+ # res = llm.images.create_speech(input: "A dog on a rocket to the moon")
27
+ # File.binwrite("rocket.mp3", res.audio.string)
28
+ # @see https://platform.openai.com/docs/api-reference/audio/createSpeech OpenAI docs
29
+ # @param [String] input The text input
30
+ # @param [String] voice The voice to use
31
+ # @param [String] model The model to use
32
+ # @param [String] response_format The response format
33
+ # @param [Hash] params Other parameters (see OpenAI docs)
34
+ # @raise (see LLM::HTTPClient#request)
35
+ # @return [LLM::Response::Audio]
36
+ def create_speech(input:, voice: "alloy", model: "gpt-4o-mini-tts", response_format: "mp3", **params)
37
+ req = Net::HTTP::Post.new("/v1/audio/speech", headers)
38
+ req.body = JSON.dump({input:, voice:, model:, response_format:}.merge!(params))
39
+ io = StringIO.new("".b)
40
+ res = request(http, req) { _1.read_body { |chunk| io << chunk } }
41
+ LLM::Response::Audio.new(res).tap { _1.audio = io }
42
+ end
43
+
44
+ ##
45
+ # Create an audio transcription
46
+ # @example
47
+ # llm = LLM.openai(ENV["KEY"])
48
+ # res = llm.audio.create_transcription(file: LLM::File("/rocket.mp3"))
49
+ # res.text # => "A dog on a rocket to the moon"
50
+ # @see https://platform.openai.com/docs/api-reference/audio/createTranscription OpenAI docs
51
+ # @param [LLM::File] file The input audio
52
+ # @param [String] model The model to use
53
+ # @param [Hash] params Other parameters (see OpenAI docs)
54
+ # @raise (see LLM::HTTPClient#request)
55
+ # @return [LLM::Response::AudioTranscription]
56
+ def create_transcription(file:, model: "whisper-1", **params)
57
+ multi = LLM::Multipart.new(params.merge!(file:, model:))
58
+ req = Net::HTTP::Post.new("/v1/audio/transcriptions", headers)
59
+ req["content-type"] = multi.content_type
60
+ req.body = multi.body
61
+ res = request(http, req)
62
+ LLM::Response::AudioTranscription.new(res).tap { _1.text = _1.body["text"] }
63
+ end
64
+
65
+ ##
66
+ # Create an audio translation (in English)
67
+ # @example
68
+ # # Arabic => English
69
+ # llm = LLM.openai(ENV["KEY"])
70
+ # res = llm.audio.create_translation(file: LLM::File("/bismillah.mp3"))
71
+ # res.text # => "In the name of Allah, the Beneficent, the Merciful."
72
+ # @see https://platform.openai.com/docs/api-reference/audio/createTranslation OpenAI docs
73
+ # @param [LLM::File] file The input audio
74
+ # @param [String] model The model to use
75
+ # @param [Hash] params Other parameters (see OpenAI docs)
76
+ # @raise (see LLM::HTTPClient#request)
77
+ # @return [LLM::Response::AudioTranslation]
78
+ def create_translation(file:, model: "whisper-1", **params)
79
+ multi = LLM::Multipart.new(params.merge!(file:, model:))
80
+ req = Net::HTTP::Post.new("/v1/audio/translations", headers)
81
+ req["content-type"] = multi.content_type
82
+ req.body = multi.body
83
+ res = request(http, req)
84
+ LLM::Response::AudioTranslation.new(res).tap { _1.text = _1.body["text"] }
85
+ end
86
+
87
+ private
88
+
89
+ def http
90
+ @provider.instance_variable_get(:@http)
91
+ end
92
+
93
+ [:headers, :request].each do |m|
94
+ define_method(m) { |*args, &b| @provider.send(m, *args, &b) }
95
+ end
96
+ end
97
+ end