llm.rb 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +120 -119
  3. data/lib/llm/bot/builder.rb +2 -2
  4. data/lib/llm/bot.rb +13 -22
  5. data/lib/llm/buffer.rb +7 -0
  6. data/lib/llm/file.rb +22 -12
  7. data/lib/llm/function.rb +8 -7
  8. data/lib/llm/message.rb +8 -0
  9. data/lib/llm/multipart.rb +0 -1
  10. data/lib/llm/object/kernel.rb +8 -0
  11. data/lib/llm/object.rb +9 -3
  12. data/lib/llm/provider.rb +10 -12
  13. data/lib/llm/providers/anthropic/format/completion_format.rb +10 -5
  14. data/lib/llm/providers/anthropic/models.rb +4 -9
  15. data/lib/llm/providers/anthropic/response/completion.rb +39 -0
  16. data/lib/llm/providers/anthropic.rb +13 -25
  17. data/lib/llm/providers/deepseek/format/completion_format.rb +3 -3
  18. data/lib/llm/providers/deepseek.rb +16 -1
  19. data/lib/llm/providers/gemini/audio.rb +9 -13
  20. data/lib/llm/providers/gemini/files.rb +19 -34
  21. data/lib/llm/providers/gemini/format/completion_format.rb +20 -5
  22. data/lib/llm/providers/gemini/images.rb +12 -11
  23. data/lib/llm/providers/gemini/models.rb +4 -10
  24. data/lib/llm/providers/gemini/{response_parser/completion_parser.rb → response/completion.rb} +10 -24
  25. data/lib/llm/providers/gemini/response/embedding.rb +8 -0
  26. data/lib/llm/providers/gemini/response/file.rb +11 -0
  27. data/lib/llm/providers/gemini/response/image.rb +26 -0
  28. data/lib/llm/providers/gemini.rb +18 -29
  29. data/lib/llm/providers/llamacpp.rb +18 -1
  30. data/lib/llm/providers/ollama/format/completion_format.rb +8 -5
  31. data/lib/llm/providers/ollama/models.rb +2 -8
  32. data/lib/llm/providers/ollama/response/completion.rb +28 -0
  33. data/lib/llm/providers/ollama/response/embedding.rb +9 -0
  34. data/lib/llm/providers/ollama.rb +13 -19
  35. data/lib/llm/providers/openai/audio.rb +10 -10
  36. data/lib/llm/providers/openai/files.rb +22 -34
  37. data/lib/llm/providers/openai/format/completion_format.rb +11 -4
  38. data/lib/llm/providers/openai/format/moderation_format.rb +2 -2
  39. data/lib/llm/providers/openai/format/respond_format.rb +7 -4
  40. data/lib/llm/providers/openai/images.rb +18 -17
  41. data/lib/llm/providers/openai/models.rb +4 -9
  42. data/lib/llm/providers/openai/moderations.rb +9 -11
  43. data/lib/llm/providers/openai/response/audio.rb +7 -0
  44. data/lib/llm/providers/openai/{response_parser/completion_parser.rb → response/completion.rb} +14 -30
  45. data/lib/llm/providers/openai/response/embedding.rb +9 -0
  46. data/lib/llm/providers/openai/response/file.rb +7 -0
  47. data/lib/llm/providers/openai/response/image.rb +16 -0
  48. data/lib/llm/providers/openai/response/moderations.rb +34 -0
  49. data/lib/llm/providers/openai/{response_parser/respond_parser.rb → response/responds.rb} +7 -29
  50. data/lib/llm/providers/openai/responses.rb +16 -34
  51. data/lib/llm/providers/openai/stream_parser.rb +1 -0
  52. data/lib/llm/providers/openai/vector_stores.rb +188 -0
  53. data/lib/llm/providers/openai.rb +24 -9
  54. data/lib/llm/providers/xai/images.rb +58 -0
  55. data/lib/llm/providers/xai.rb +72 -0
  56. data/lib/llm/response.rb +42 -13
  57. data/lib/llm/version.rb +1 -1
  58. data/lib/llm.rb +12 -13
  59. data/llm.gemspec +5 -5
  60. metadata +29 -38
  61. data/lib/llm/model.rb +0 -32
  62. data/lib/llm/providers/anthropic/response_parser/completion_parser.rb +0 -51
  63. data/lib/llm/providers/anthropic/response_parser.rb +0 -24
  64. data/lib/llm/providers/gemini/response_parser.rb +0 -46
  65. data/lib/llm/providers/ollama/response_parser/completion_parser.rb +0 -42
  66. data/lib/llm/providers/ollama/response_parser.rb +0 -30
  67. data/lib/llm/providers/openai/response_parser.rb +0 -65
  68. data/lib/llm/providers/voyageai/error_handler.rb +0 -32
  69. data/lib/llm/providers/voyageai/response_parser.rb +0 -13
  70. data/lib/llm/providers/voyageai.rb +0 -44
  71. data/lib/llm/response/audio.rb +0 -13
  72. data/lib/llm/response/audio_transcription.rb +0 -14
  73. data/lib/llm/response/audio_translation.rb +0 -14
  74. data/lib/llm/response/completion.rb +0 -51
  75. data/lib/llm/response/download_file.rb +0 -15
  76. data/lib/llm/response/embedding.rb +0 -23
  77. data/lib/llm/response/file.rb +0 -42
  78. data/lib/llm/response/filelist.rb +0 -18
  79. data/lib/llm/response/image.rb +0 -29
  80. data/lib/llm/response/modellist.rb +0 -18
  81. data/lib/llm/response/moderationlist/moderation.rb +0 -47
  82. data/lib/llm/response/moderationlist.rb +0 -51
  83. data/lib/llm/response/respond.rb +0 -56
  84. /data/lib/llm/{event_handler.rb → eventhandler.rb} +0 -0
data/lib/llm/provider.rb CHANGED
@@ -44,7 +44,7 @@ class LLM::Provider
44
44
  # Other embedding parameters
45
45
  # @raise [NotImplementedError]
46
46
  # When the method is not implemented by a subclass
47
- # @return [LLM::Response::Embedding]
47
+ # @return [LLM::Response]
48
48
  def embed(input, model: nil, **params)
49
49
  raise NotImplementedError
50
50
  end
@@ -52,7 +52,7 @@ class LLM::Provider
52
52
  ##
53
53
  # Provides an interface to the chat completions API
54
54
  # @example
55
- # llm = LLM.openai(ENV["KEY"])
55
+ # llm = LLM.openai(key: ENV["KEY"])
56
56
  # messages = [{role: "system", content: "Your task is to answer all of my questions"}]
57
57
  # res = llm.complete("5 + 2 ?", messages:)
58
58
  # print "[#{res.choices[0].role}]", res.choices[0].content, "\n"
@@ -68,7 +68,7 @@ class LLM::Provider
68
68
  # @option params [Array<LLM::Function>, nil] :tools Defaults to nil
69
69
  # @raise [NotImplementedError]
70
70
  # When the method is not implemented by a subclass
71
- # @return [LLM::Response::Completion]
71
+ # @return [LLM::Response]
72
72
  def complete(prompt, params = {})
73
73
  raise NotImplementedError
74
74
  end
@@ -174,6 +174,13 @@ class LLM::Provider
174
174
  raise NotImplementedError
175
175
  end
176
176
 
177
+ ##
178
+ # @return [LLM::OpenAI::VectorStore]
179
+ # Returns an interface to the vector stores API
180
+ def vector_stores
181
+ raise NotImplementedError
182
+ end
183
+
177
184
  ##
178
185
  # @return [String]
179
186
  # Returns the role of the assistant in the conversation.
@@ -222,15 +229,6 @@ class LLM::Provider
222
229
  raise NotImplementedError
223
230
  end
224
231
 
225
- ##
226
- # @return [Module]
227
- # Returns the module responsible for parsing a successful LLM response
228
- # @raise [NotImplementedError]
229
- # (see LLM::Provider#complete)
230
- def response_parser
231
- raise NotImplementedError
232
- end
233
-
234
232
  ##
235
233
  # @return [Class]
236
234
  # Returns the class responsible for handling an unsuccessful LLM response
@@ -47,13 +47,18 @@ module LLM::Anthropic::Format
47
47
  content.empty? ? throw(:abort, nil) : content.flat_map { format_content(_1) }
48
48
  when URI
49
49
  [{type: :image, source: {type: "url", url: content.to_s}}]
50
+ when File
51
+ content.close unless content.closed?
52
+ format_content(LLM.File(content.path))
50
53
  when LLM::File
51
54
  if content.image?
52
55
  [{type: :image, source: {type: "base64", media_type: content.mime_type, data: content.to_b64}}]
56
+ elsif content.pdf?
57
+ [{type: :document, source: {type: "base64", media_type: content.mime_type, data: content.to_b64}}]
53
58
  else
54
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
55
- "is not an image, and therefore not supported by the " \
56
- "Anthropic API"
59
+ raise LLM::PromptError, "The given object (an instance of #{content.class}) " \
60
+ "is not an image or PDF, and therefore not supported by the " \
61
+ "Anthropic API"
57
62
  end
58
63
  when String
59
64
  [{type: :text, text: content}]
@@ -62,8 +67,8 @@ module LLM::Anthropic::Format
62
67
  when LLM::Function::Return
63
68
  [{type: "tool_result", tool_use_id: content.id, content: [{type: :text, text: JSON.dump(content.value)}]}]
64
69
  else
65
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
66
- "is not supported by the Anthropic API"
70
+ raise LLM::PromptError, "The given object (an instance of #{content.class}) " \
71
+ "is not supported by the Anthropic API"
67
72
  end
68
73
  end
69
74
 
@@ -11,7 +11,7 @@ class LLM::Anthropic
11
11
  # #!/usr/bin/env ruby
12
12
  # require "llm"
13
13
  #
14
- # llm = LLM.anthropic(ENV["KEY"])
14
+ # llm = LLM.anthropic(key: ENV["KEY"])
15
15
  # res = llm.models.all
16
16
  # res.each do |model|
17
17
  # print "id: ", model.id, "\n"
@@ -28,7 +28,7 @@ class LLM::Anthropic
28
28
  ##
29
29
  # List all models
30
30
  # @example
31
- # llm = LLM.anthropic(ENV["KEY"])
31
+ # llm = LLM.anthropic(key: ENV["KEY"])
32
32
  # res = llm.models.all
33
33
  # res.each do |model|
34
34
  # print "id: ", model.id, "\n"
@@ -36,17 +36,12 @@ class LLM::Anthropic
36
36
  # @see https://docs.anthropic.com/en/api/models-list Anthropic docs
37
37
  # @param [Hash] params Other parameters (see Anthropic docs)
38
38
  # @raise (see LLM::Provider#request)
39
- # @return [LLM::Response::FileList]
39
+ # @return [LLM::Response]
40
40
  def all(**params)
41
41
  query = URI.encode_www_form(params)
42
42
  req = Net::HTTP::Get.new("/v1/models?#{query}", headers)
43
43
  res = execute(request: req)
44
- LLM::Response::ModelList.new(res).tap { |modellist|
45
- models = modellist.body["data"].map do |model|
46
- LLM::Model.from_hash(model).tap { _1.provider = @provider }
47
- end
48
- modellist.models = models
49
- }
44
+ LLM::Response.new(res)
50
45
  end
51
46
 
52
47
  private
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Anthropic::Response
4
+ module Completion
5
+ def choices = format_choices
6
+ def role = body.role
7
+ def model = body.model
8
+ def prompt_tokens = body.usage&.input_tokens || 0
9
+ def completion_tokens = body.usage&.output_tokens || 0
10
+ def total_tokens = prompt_tokens + completion_tokens
11
+
12
+ private
13
+
14
+ def format_choices
15
+ texts.map.with_index do |choice, index|
16
+ extra = {
17
+ index:, response: self,
18
+ tool_calls: format_tool_calls(tools), original_tool_calls: tools
19
+ }
20
+ LLM::Message.new(role, choice["text"], extra)
21
+ end
22
+ end
23
+
24
+ def format_tool_calls(tools)
25
+ (tools || []).filter_map do |tool|
26
+ tool = {
27
+ id: tool.id,
28
+ name: tool.name,
29
+ arguments: tool.input
30
+ }
31
+ LLM::Object.new(tool)
32
+ end
33
+ end
34
+
35
+ def parts = body.content
36
+ def texts = @texts ||= LLM::Object.from_hash(parts.select { _1["type"] == "text" })
37
+ def tools = @tools ||= LLM::Object.from_hash(parts.select { _1["type"] == "tool_use" })
38
+ end
39
+ end
@@ -3,12 +3,21 @@
3
3
  module LLM
4
4
  ##
5
5
  # The Anthropic class implements a provider for
6
- # [Anthropic](https://www.anthropic.com)
6
+ # [Anthropic](https://www.anthropic.com).
7
+ #
8
+ # @example
9
+ # #!/usr/bin/env ruby
10
+ # require "llm"
11
+ #
12
+ # llm = LLM.anthropic(key: ENV["KEY"])
13
+ # bot = LLM::Bot.new(llm)
14
+ # bot.chat ["Tell me about this photo", File.open("/images/dog.jpg", "rb")]
15
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
7
16
  class Anthropic < Provider
17
+ require_relative "anthropic/response/completion"
8
18
  require_relative "anthropic/format"
9
19
  require_relative "anthropic/error_handler"
10
20
  require_relative "anthropic/stream_parser"
11
- require_relative "anthropic/response_parser"
12
21
  require_relative "anthropic/models"
13
22
  include Format
14
23
 
@@ -20,23 +29,6 @@ module LLM
20
29
  super(host: HOST, **)
21
30
  end
22
31
 
23
- ##
24
- # Provides an embedding via VoyageAI per
25
- # [Anthropic's recommendation](https://docs.anthropic.com/en/docs/build-with-claude/embeddings)
26
- # @param input (see LLM::Provider#embed)
27
- # @param [String] key
28
- # Valid key for the VoyageAI API
29
- # @param [String] model
30
- # The embedding model to use
31
- # @param [Hash] params
32
- # Other embedding parameters
33
- # @raise (see LLM::Provider#request)
34
- # @return (see LLM::Provider#embed)
35
- def embed(input, key:, model: "voyage-2", **params)
36
- llm = LLM.voyageai(key:)
37
- llm.embed(input, **params.merge(model:))
38
- end
39
-
40
32
  ##
41
33
  # Provides an interface to the chat completions API
42
34
  # @see https://docs.anthropic.com/en/api/messages Anthropic docs
@@ -44,7 +36,7 @@ module LLM
44
36
  # @param params (see LLM::Provider#complete)
45
37
  # @example (see LLM::Provider#complete)
46
38
  # @raise (see LLM::Provider#request)
47
- # @raise [LLM::Error::PromptError]
39
+ # @raise [LLM::PromptError]
48
40
  # When given an object a provider does not understand
49
41
  # @return (see LLM::Provider#complete)
50
42
  def complete(prompt, params = {})
@@ -57,7 +49,7 @@ module LLM
57
49
  body = JSON.dump({messages: [format(messages)].flatten}.merge!(params))
58
50
  set_body_stream(req, StringIO.new(body))
59
51
  res = execute(request: req, stream:)
60
- Response::Completion.new(res).extend(response_parser)
52
+ LLM::Response.new(res).extend(LLM::Anthropic::Response::Completion)
61
53
  end
62
54
 
63
55
  ##
@@ -92,10 +84,6 @@ module LLM
92
84
  )
93
85
  end
94
86
 
95
- def response_parser
96
- LLM::Anthropic::ResponseParser
97
- end
98
-
99
87
  def stream_parser
100
88
  LLM::Anthropic::StreamParser
101
89
  end
@@ -12,7 +12,7 @@ module LLM::DeepSeek::Format
12
12
  end
13
13
 
14
14
  ##
15
- # Formats the message for the OpenAI chat completions API
15
+ # Formats the message for the DeepSeek chat completions API
16
16
  # @return [Hash]
17
17
  def format
18
18
  catch(:abort) do
@@ -37,8 +37,8 @@ module LLM::DeepSeek::Format
37
37
  when LLM::Function::Return
38
38
  throw(:abort, {role: "tool", tool_call_id: content.id, content: JSON.dump(content.value)})
39
39
  else
40
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
41
- "is not supported by the DeepSeek chat completions API"
40
+ raise LLM::PromptError, "The given object (an instance of #{content.class}) " \
41
+ "is not supported by the DeepSeek chat completions API"
42
42
  end
43
43
  end
44
44
 
@@ -6,8 +6,17 @@ module LLM
6
6
  ##
7
7
  # The DeepSeek class implements a provider for
8
8
  # [DeepSeek](https://deepseek.com)
9
- # through its OpenAI-compatible API provided via
9
+ # through its OpenAI-compatible API available via
10
10
  # their [web platform](https://platform.deepseek.com).
11
+ #
12
+ # @example
13
+ # #!/usr/bin/env ruby
14
+ # require "llm"
15
+ #
16
+ # llm = LLM.deepseek(key: ENV["KEY"])
17
+ # bot = LLM::Bot.new(llm)
18
+ # bot.chat ["Tell me about this photo", File.open("/images/cat.jpg", "rb")]
19
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
11
20
  class DeepSeek < OpenAI
12
21
  require_relative "deepseek/format"
13
22
  include DeepSeek::Format
@@ -49,6 +58,12 @@ module LLM
49
58
  raise NotImplementedError
50
59
  end
51
60
 
61
+ ##
62
+ # @raise [NotImplementedError]
63
+ def vector_stores
64
+ raise NotImplementedError
65
+ end
66
+
52
67
  ##
53
68
  # Returns the default model for chat completions
54
69
  # @see https://api-docs.deepseek.com/quick_start/pricing deepseek-chat
@@ -8,7 +8,7 @@ class LLM::Gemini
8
8
  # #!/usr/bin/env ruby
9
9
  # require "llm"
10
10
  #
11
- # llm = LLM.gemini(ENV["KEY"])
11
+ # llm = LLM.gemini(key: ENV["KEY"])
12
12
  # res = llm.audio.create_transcription(input: "/audio/rocket.mp3")
13
13
  # res.text # => "A dog on a rocket to the moon"
14
14
  class Audio
@@ -30,48 +30,44 @@ class LLM::Gemini
30
30
  ##
31
31
  # Create an audio transcription
32
32
  # @example
33
- # llm = LLM.gemini(ENV["KEY"])
33
+ # llm = LLM.gemini(key: ENV["KEY"])
34
34
  # res = llm.audio.create_transcription(file: "/audio/rocket.mp3")
35
35
  # res.text # => "A dog on a rocket to the moon"
36
36
  # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
37
- # @param [String, LLM::File, LLM::Response::File] file The input audio
37
+ # @param [String, LLM::File, LLM::Response] file The input audio
38
38
  # @param [String] model The model to use
39
39
  # @param [Hash] params Other parameters (see Gemini docs)
40
40
  # @raise (see LLM::Provider#request)
41
- # @return [LLM::Response::AudioTranscription]
41
+ # @return [LLM::Response]
42
42
  def create_transcription(file:, model: "gemini-1.5-flash", **params)
43
43
  res = @provider.complete [
44
44
  "Your task is to transcribe the contents of an audio file",
45
45
  "Your response should include the transcription, and nothing else",
46
46
  LLM.File(file)
47
47
  ], params.merge(role: :user, model:)
48
- LLM::Response::AudioTranscription
49
- .new(res)
50
- .tap { _1.text = res.choices[0].content }
48
+ res.tap { _1.define_singleton_method(:text) { choices[0].content } }
51
49
  end
52
50
 
53
51
  ##
54
52
  # Create an audio translation (in English)
55
53
  # @example
56
54
  # # Arabic => English
57
- # llm = LLM.gemini(ENV["KEY"])
55
+ # llm = LLM.gemini(key: ENV["KEY"])
58
56
  # res = llm.audio.create_translation(file: "/audio/bismillah.mp3")
59
57
  # res.text # => "In the name of Allah, the Beneficent, the Merciful."
60
58
  # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
61
- # @param [String, LLM::File, LLM::Response::File] file The input audio
59
+ # @param [String, LLM::File, LLM::Response] file The input audio
62
60
  # @param [String] model The model to use
63
61
  # @param [Hash] params Other parameters (see Gemini docs)
64
62
  # @raise (see LLM::Provider#request)
65
- # @return [LLM::Response::AudioTranslation]
63
+ # @return [LLM::Response]
66
64
  def create_translation(file:, model: "gemini-1.5-flash", **params)
67
65
  res = @provider.complete [
68
66
  "Your task is to translate the contents of an audio file into English",
69
67
  "Your response should include the translation, and nothing else",
70
68
  LLM.File(file)
71
69
  ], params.merge(role: :user, model:)
72
- LLM::Response::AudioTranslation
73
- .new(res)
74
- .tap { _1.text = res.choices[0].content }
70
+ res.tap { _1.define_singleton_method(:text) { choices[0].content } }
75
71
  end
76
72
  end
77
73
  end
@@ -17,24 +17,14 @@ class LLM::Gemini
17
17
  # #!/usr/bin/env ruby
18
18
  # require "llm"
19
19
  #
20
- # llm = LLM.gemini(ENV["KEY"])
20
+ # llm = LLM.gemini(key: ENV["KEY"])
21
21
  # bot = LLM::Bot.new(llm)
22
- # file = llm.files.create file: "/audio/haiku.mp3"
23
- # bot.chat(file)
24
- # bot.chat("Describe the audio file I sent to you")
25
- # bot.chat("The audio file is the first message I sent to you.")
26
- # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
27
- #
28
- # @example example #2
29
- # #!/usr/bin/env ruby
30
- # require "llm"
31
- #
32
- # llm = LLM.gemini(ENV["KEY"])
33
- # bot = LLM::Bot.new(llm)
34
- # file = llm.files.create file: "/audio/haiku.mp3"
35
- # bot.chat(["Describe the audio file I sent to you", file])
22
+ # file = llm.files.create(file: "/audio/haiku.mp3")
23
+ # bot.chat ["Tell me about this file", file]
36
24
  # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
37
25
  class Files
26
+ require_relative "response/file"
27
+
38
28
  ##
39
29
  # Returns a new Files object
40
30
  # @param provider [LLM::Provider]
@@ -46,7 +36,7 @@ class LLM::Gemini
46
36
  ##
47
37
  # List all files
48
38
  # @example
49
- # llm = LLM.gemini(ENV["KEY"])
39
+ # llm = LLM.gemini(key: ENV["KEY"])
50
40
  # res = llm.files.all
51
41
  # res.each do |file|
52
42
  # print "name: ", file.name, "\n"
@@ -54,30 +44,24 @@ class LLM::Gemini
54
44
  # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
55
45
  # @param [Hash] params Other parameters (see Gemini docs)
56
46
  # @raise (see LLM::Provider#request)
57
- # @return [LLM::Response::FileList]
47
+ # @return [LLM::Response]
58
48
  def all(**params)
59
49
  query = URI.encode_www_form(params.merge!(key: key))
60
50
  req = Net::HTTP::Get.new("/v1beta/files?#{query}", headers)
61
51
  res = execute(request: req)
62
- LLM::Response::FileList.new(res).tap { |filelist|
63
- files = filelist.body["files"]&.map do |file|
64
- file = file.transform_keys { snakecase(_1) }
65
- LLM::Object.from_hash(file)
66
- end || []
67
- filelist.files = files
68
- }
52
+ LLM::Response.new(res)
69
53
  end
70
54
 
71
55
  ##
72
56
  # Create a file
73
57
  # @example
74
- # llm = LLM.gemini(ENV["KEY"])
75
- # res = llm.files.create file: "/audio/haiku.mp3"
58
+ # llm = LLM.gemini(key: ENV["KEY"])
59
+ # res = llm.files.create(file: "/audio/haiku.mp3")
76
60
  # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
77
61
  # @param [String, LLM::File] file The file
78
62
  # @param [Hash] params Other parameters (see Gemini docs)
79
63
  # @raise (see LLM::Provider#request)
80
- # @return [LLM::Response::File]
64
+ # @return [LLM::Response]
81
65
  def create(file:, **params)
82
66
  file = LLM.File(file)
83
67
  req = Net::HTTP::Post.new(request_upload_url(file:), {})
@@ -87,44 +71,45 @@ class LLM::Gemini
87
71
  file.with_io do |io|
88
72
  set_body_stream(req, io)
89
73
  res = execute(request: req)
90
- LLM::Response::File.new(res)
74
+ LLM::Response.new(res).extend(LLM::Gemini::Response::File)
91
75
  end
92
76
  end
93
77
 
94
78
  ##
95
79
  # Get a file
96
80
  # @example
97
- # llm = LLM.gemini(ENV["KEY"])
81
+ # llm = LLM.gemini(key: ENV["KEY"])
98
82
  # res = llm.files.get(file: "files/1234567890")
99
83
  # print "name: ", res.name, "\n"
100
84
  # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
101
85
  # @param [#name, String] file The file to get
102
86
  # @param [Hash] params Other parameters (see Gemini docs)
103
87
  # @raise (see LLM::Provider#request)
104
- # @return [LLM::Response::File]
88
+ # @return [LLM::Response]
105
89
  def get(file:, **params)
106
90
  file_id = file.respond_to?(:name) ? file.name : file.to_s
107
91
  query = URI.encode_www_form(params.merge!(key: key))
108
92
  req = Net::HTTP::Get.new("/v1beta/#{file_id}?#{query}", headers)
109
93
  res = execute(request: req)
110
- LLM::Response::File.new(res)
94
+ LLM::Response.new(res).extend(LLM::Gemini::Response::File)
111
95
  end
112
96
 
113
97
  ##
114
98
  # Delete a file
115
99
  # @example
116
- # llm = LLM.gemini(ENV["KEY"])
100
+ # llm = LLM.gemini(key: ENV["KEY"])
117
101
  # res = llm.files.delete(file: "files/1234567890")
118
102
  # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
119
103
  # @param [#name, String] file The file to delete
120
104
  # @param [Hash] params Other parameters (see Gemini docs)
121
105
  # @raise (see LLM::Provider#request)
122
- # @return [LLM::Response::File]
106
+ # @return [LLM::Response]
123
107
  def delete(file:, **params)
124
108
  file_id = file.respond_to?(:name) ? file.name : file.to_s
125
109
  query = URI.encode_www_form(params.merge!(key: key))
126
110
  req = Net::HTTP::Delete.new("/v1beta/#{file_id}?#{query}", headers)
127
- execute(request: req)
111
+ res = execute(request: req)
112
+ LLM::Response.new(res)
128
113
  end
129
114
 
130
115
  ##
@@ -30,9 +30,11 @@ module LLM::Gemini::Format
30
30
  case content
31
31
  when Array
32
32
  content.empty? ? throw(:abort, nil) : content.flat_map { format_content(_1) }
33
- when LLM::Response::File
34
- file = content
35
- [{file_data: {mime_type: file.mime_type, file_uri: file.uri}}]
33
+ when LLM::Response
34
+ format_response(content)
35
+ when File
36
+ content.close unless content.closed?
37
+ format_content(LLM.File(content.path))
36
38
  when LLM::File
37
39
  file = content
38
40
  [{inline_data: {mime_type: file.mime_type, data: file.to_b64}}]
@@ -43,11 +45,24 @@ module LLM::Gemini::Format
43
45
  when LLM::Function::Return
44
46
  [{text: JSON.dump(content.value)}]
45
47
  else
46
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
47
- "is not supported by the Gemini API"
48
+ prompt_error!(content)
48
49
  end
49
50
  end
50
51
 
52
+ def format_response(response)
53
+ if response.file?
54
+ file = response
55
+ [{file_data: {mime_type: file.mime_type, file_uri: file.uri}}]
56
+ else
57
+ prompt_error!(content)
58
+ end
59
+ end
60
+
61
+ def prompt_error!(object)
62
+ raise LLM::PromptError, "The given object (an instance of #{object.class}) " \
63
+ "is not supported by the Gemini API"
64
+ end
65
+
51
66
  def message = @message
52
67
  def content = message.content
53
68
  end
@@ -11,10 +11,11 @@ class LLM::Gemini
11
11
  # #!/usr/bin/env ruby
12
12
  # require "llm"
13
13
  #
14
- # llm = LLM.gemini(ENV["KEY"])
14
+ # llm = LLM.gemini(key: ENV["KEY"])
15
15
  # res = llm.images.create prompt: "A dog on a rocket to the moon"
16
16
  # IO.copy_stream res.images[0], "rocket.png"
17
17
  class Images
18
+ require_relative "response/image"
18
19
  include Format
19
20
 
20
21
  ##
@@ -28,7 +29,7 @@ class LLM::Gemini
28
29
  ##
29
30
  # Create an image
30
31
  # @example
31
- # llm = LLM.gemini(ENV["KEY"])
32
+ # llm = LLM.gemini(key: ENV["KEY"])
32
33
  # res = llm.images.create prompt: "A dog on a rocket to the moon"
33
34
  # IO.copy_stream res.images[0], "rocket.png"
34
35
  # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
@@ -39,23 +40,23 @@ class LLM::Gemini
39
40
  # The prompt should make it clear you want to generate an image, or you
40
41
  # might unexpectedly receive a purely textual response. This is due to how
41
42
  # Gemini implements image generation under the hood.
42
- # @return [LLM::Response::Image]
43
+ # @return [LLM::Response]
43
44
  def create(prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
44
45
  req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{key}", headers)
45
46
  body = JSON.dump({
46
- contents: [{parts: [{text: create_prompt}, {text: prompt}]}],
47
+ contents: [{parts: [{text: system_prompt}, {text: prompt}]}],
47
48
  generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
48
49
  }.merge!(params))
49
50
  req.body = body
50
51
  res = execute(request: req)
51
- LLM::Response::Image.new(res).extend(response_parser)
52
+ LLM::Response.new(res).extend(LLM::Gemini::Response::Image)
52
53
  end
53
54
 
54
55
  ##
55
56
  # Edit an image
56
57
  # @example
57
- # llm = LLM.gemini(ENV["KEY"])
58
- # res = llm.images.edit image: LLM::File("cat.png"), prompt: "Add a hat to the cat"
58
+ # llm = LLM.gemini(key: ENV["KEY"])
59
+ # res = llm.images.edit image: "cat.png", prompt: "Add a hat to the cat"
59
60
  # IO.copy_stream res.images[0], "hatoncat.png"
60
61
  # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
61
62
  # @param [String, LLM::File] image The image to edit
@@ -63,7 +64,7 @@ class LLM::Gemini
63
64
  # @param [Hash] params Other parameters (see Gemini docs)
64
65
  # @raise (see LLM::Provider#request)
65
66
  # @note (see LLM::Gemini::Images#create)
66
- # @return [LLM::Response::Image]
67
+ # @return [LLM::Response]
67
68
  def edit(image:, prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
68
69
  req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{key}", headers)
69
70
  image = LLM.File(image)
@@ -73,7 +74,7 @@ class LLM::Gemini
73
74
  }.merge!(params)).b
74
75
  set_body_stream(req, StringIO.new(body))
75
76
  res = execute(request: req)
76
- LLM::Response::Image.new(res).extend(response_parser)
77
+ LLM::Response.new(res).extend(LLM::Gemini::Response::Image)
77
78
  end
78
79
 
79
80
  ##
@@ -93,7 +94,7 @@ class LLM::Gemini
93
94
  @provider.instance_variable_get(:@key)
94
95
  end
95
96
 
96
- def create_prompt
97
+ def system_prompt
97
98
  <<~PROMPT
98
99
  Your task is to generate one or more image(s) from
99
100
  text I will provide to you. Your response *MUST* include
@@ -102,7 +103,7 @@ class LLM::Gemini
102
103
  PROMPT
103
104
  end
104
105
 
105
- [:response_parser, :headers, :execute, :set_body_stream].each do |m|
106
+ [:headers, :execute, :set_body_stream].each do |m|
106
107
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
107
108
  end
108
109
  end
@@ -11,7 +11,7 @@ class LLM::Gemini
11
11
  # #!/usr/bin/env ruby
12
12
  # require "llm"
13
13
  #
14
- # llm = LLM.gemini(ENV["KEY"])
14
+ # llm = LLM.gemini(key: ENV["KEY"])
15
15
  # res = llm.models.all
16
16
  # res.each do |model|
17
17
  # print "id: ", model.id, "\n"
@@ -30,7 +30,7 @@ class LLM::Gemini
30
30
  ##
31
31
  # List all models
32
32
  # @example
33
- # llm = LLM.gemini(ENV["KEY"])
33
+ # llm = LLM.gemini(key: ENV["KEY"])
34
34
  # res = llm.models.all
35
35
  # res.each do |model|
36
36
  # print "id: ", model.id, "\n"
@@ -38,18 +38,12 @@ class LLM::Gemini
38
38
  # @see https://ai.google.dev/api/models?hl=en#method:-models.list Gemini docs
39
39
  # @param [Hash] params Other parameters (see Gemini docs)
40
40
  # @raise (see LLM::Provider#request)
41
- # @return [LLM::Response::ModelList]
41
+ # @return [LLM::Response]
42
42
  def all(**params)
43
43
  query = URI.encode_www_form(params.merge!(key: key))
44
44
  req = Net::HTTP::Get.new("/v1beta/models?#{query}", headers)
45
45
  res = execute(request: req)
46
- LLM::Response::ModelList.new(res).tap { |modellist|
47
- models = modellist.body["models"].map do |model|
48
- model = model.transform_keys { snakecase(_1) }
49
- LLM::Model.from_hash(model).tap { _1.provider = @provider }
50
- end
51
- modellist.models = models
52
- }
46
+ LLM::Response.new(res)
53
47
  end
54
48
 
55
49
  private