llm.rb 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +120 -119
  3. data/lib/llm/bot/builder.rb +2 -2
  4. data/lib/llm/bot.rb +13 -22
  5. data/lib/llm/buffer.rb +7 -0
  6. data/lib/llm/file.rb +22 -12
  7. data/lib/llm/function.rb +8 -7
  8. data/lib/llm/message.rb +8 -0
  9. data/lib/llm/multipart.rb +0 -1
  10. data/lib/llm/object/kernel.rb +8 -0
  11. data/lib/llm/object.rb +9 -3
  12. data/lib/llm/provider.rb +10 -12
  13. data/lib/llm/providers/anthropic/format/completion_format.rb +10 -5
  14. data/lib/llm/providers/anthropic/models.rb +4 -9
  15. data/lib/llm/providers/anthropic/response/completion.rb +39 -0
  16. data/lib/llm/providers/anthropic.rb +13 -25
  17. data/lib/llm/providers/deepseek/format/completion_format.rb +3 -3
  18. data/lib/llm/providers/deepseek.rb +16 -1
  19. data/lib/llm/providers/gemini/audio.rb +9 -13
  20. data/lib/llm/providers/gemini/files.rb +19 -34
  21. data/lib/llm/providers/gemini/format/completion_format.rb +20 -5
  22. data/lib/llm/providers/gemini/images.rb +12 -11
  23. data/lib/llm/providers/gemini/models.rb +4 -10
  24. data/lib/llm/providers/gemini/{response_parser/completion_parser.rb → response/completion.rb} +10 -24
  25. data/lib/llm/providers/gemini/response/embedding.rb +8 -0
  26. data/lib/llm/providers/gemini/response/file.rb +11 -0
  27. data/lib/llm/providers/gemini/response/image.rb +26 -0
  28. data/lib/llm/providers/gemini.rb +18 -29
  29. data/lib/llm/providers/llamacpp.rb +18 -1
  30. data/lib/llm/providers/ollama/format/completion_format.rb +8 -5
  31. data/lib/llm/providers/ollama/models.rb +2 -8
  32. data/lib/llm/providers/ollama/response/completion.rb +28 -0
  33. data/lib/llm/providers/ollama/response/embedding.rb +9 -0
  34. data/lib/llm/providers/ollama.rb +13 -19
  35. data/lib/llm/providers/openai/audio.rb +10 -10
  36. data/lib/llm/providers/openai/files.rb +22 -34
  37. data/lib/llm/providers/openai/format/completion_format.rb +11 -4
  38. data/lib/llm/providers/openai/format/moderation_format.rb +2 -2
  39. data/lib/llm/providers/openai/format/respond_format.rb +7 -4
  40. data/lib/llm/providers/openai/images.rb +18 -17
  41. data/lib/llm/providers/openai/models.rb +4 -9
  42. data/lib/llm/providers/openai/moderations.rb +9 -11
  43. data/lib/llm/providers/openai/response/audio.rb +7 -0
  44. data/lib/llm/providers/openai/{response_parser/completion_parser.rb → response/completion.rb} +14 -30
  45. data/lib/llm/providers/openai/response/embedding.rb +9 -0
  46. data/lib/llm/providers/openai/response/file.rb +7 -0
  47. data/lib/llm/providers/openai/response/image.rb +16 -0
  48. data/lib/llm/providers/openai/response/moderations.rb +34 -0
  49. data/lib/llm/providers/openai/{response_parser/respond_parser.rb → response/responds.rb} +7 -29
  50. data/lib/llm/providers/openai/responses.rb +16 -34
  51. data/lib/llm/providers/openai/stream_parser.rb +1 -0
  52. data/lib/llm/providers/openai/vector_stores.rb +188 -0
  53. data/lib/llm/providers/openai.rb +24 -9
  54. data/lib/llm/providers/xai/images.rb +58 -0
  55. data/lib/llm/providers/xai.rb +72 -0
  56. data/lib/llm/response.rb +42 -13
  57. data/lib/llm/version.rb +1 -1
  58. data/lib/llm.rb +12 -13
  59. data/llm.gemspec +5 -5
  60. metadata +29 -38
  61. data/lib/llm/model.rb +0 -32
  62. data/lib/llm/providers/anthropic/response_parser/completion_parser.rb +0 -51
  63. data/lib/llm/providers/anthropic/response_parser.rb +0 -24
  64. data/lib/llm/providers/gemini/response_parser.rb +0 -46
  65. data/lib/llm/providers/ollama/response_parser/completion_parser.rb +0 -42
  66. data/lib/llm/providers/ollama/response_parser.rb +0 -30
  67. data/lib/llm/providers/openai/response_parser.rb +0 -65
  68. data/lib/llm/providers/voyageai/error_handler.rb +0 -32
  69. data/lib/llm/providers/voyageai/response_parser.rb +0 -13
  70. data/lib/llm/providers/voyageai.rb +0 -44
  71. data/lib/llm/response/audio.rb +0 -13
  72. data/lib/llm/response/audio_transcription.rb +0 -14
  73. data/lib/llm/response/audio_translation.rb +0 -14
  74. data/lib/llm/response/completion.rb +0 -51
  75. data/lib/llm/response/download_file.rb +0 -15
  76. data/lib/llm/response/embedding.rb +0 -23
  77. data/lib/llm/response/file.rb +0 -42
  78. data/lib/llm/response/filelist.rb +0 -18
  79. data/lib/llm/response/image.rb +0 -29
  80. data/lib/llm/response/modellist.rb +0 -18
  81. data/lib/llm/response/moderationlist/moderation.rb +0 -47
  82. data/lib/llm/response/moderationlist.rb +0 -51
  83. data/lib/llm/response/respond.rb +0 -56
  84. /data/lib/llm/{event_handler.rb → eventhandler.rb} +0 -0
@@ -1,30 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM::Gemini::ResponseParser
4
- class CompletionParser
5
- def initialize(body)
6
- @body = LLM::Object.from_hash(body)
7
- end
8
-
9
- def format(response)
10
- {
11
- model:,
12
- prompt_tokens:,
13
- completion_tokens:,
14
- total_tokens:,
15
- choices: format_choices(response)
16
- }
17
- end
3
+ module LLM::Gemini::Response
4
+ module Completion
5
+ def model = body.modelVersion
6
+ def prompt_tokens = body.usageMetadata.promptTokenCount
7
+ def completion_tokens = body.usageMetadata.candidatesTokenCount
8
+ def total_tokens = body.usageMetadata.totalTokenCount
9
+ def choices = format_choices
18
10
 
19
11
  private
20
12
 
21
- def format_choices(response)
13
+ def format_choices
22
14
  candidates.map.with_index do |choice, index|
15
+ choice = LLM::Object.from_hash(choice)
23
16
  content = choice.content
24
17
  role, parts = content.role, content.parts
25
18
  text = parts.filter_map { _1["text"] }.join
26
19
  tools = parts.filter_map { _1["functionCall"] }
27
- extra = {index:, response:, tool_calls: format_tool_calls(tools), original_tool_calls: tools}
20
+ extra = {index:, response: self, tool_calls: format_tool_calls(tools), original_tool_calls: tools}
28
21
  LLM::Message.new(role, text, extra)
29
22
  end
30
23
  end
@@ -35,12 +28,5 @@ module LLM::Gemini::ResponseParser
35
28
  LLM::Object.new(function)
36
29
  end
37
30
  end
38
-
39
- def body = @body
40
- def model = body.modelVersion
41
- def prompt_tokens = body.usageMetadata.promptTokenCount
42
- def completion_tokens = body.usageMetadata.candidatesTokenCount
43
- def total_tokens = body.usageMetadata.totalTokenCount
44
- def candidates = body.candidates
45
31
  end
46
32
  end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Gemini::Response
4
+ module Embedding
5
+ def model = "text-embedding-004"
6
+ def embeddings = body.dig("embedding", "values")
7
+ end
8
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Gemini::Response
4
+ module File
5
+ def name = respond_to?(:file) ? file.name : body.name
6
+ def display_name = respond_to?(:file) ? file.displayName : body.displayName
7
+ def mime_type = respond_to?(:file) ? file.mimeType : body.mimeType
8
+ def uri = respond_to?(:file) ? file.uri : body.uri
9
+ def file? = true
10
+ end
11
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Gemini::Response
4
+ module Image
5
+ ##
6
+ # @return [Array<StringIO>]
7
+ def images
8
+ candidates.flat_map do |candidate|
9
+ parts = candidate["content"]["parts"]
10
+ parts.filter_map do
11
+ data = _1.dig(:inlineData, :data)
12
+ next unless data
13
+ StringIO.new(data.unpack1("m0"))
14
+ end
15
+ end
16
+ end
17
+
18
+ ##
19
+ # Returns one or more image URLs, or an empty array
20
+ # @note
21
+ # Gemini's image generation API does not return URLs, so this method
22
+ # will always return an empty array.
23
+ # @return [Array<String>]
24
+ def urls = []
25
+ end
26
+ end
@@ -3,36 +3,26 @@
3
3
  module LLM
4
4
  ##
5
5
  # The Gemini class implements a provider for
6
- # [Gemini](https://ai.google.dev/).
6
+ # [Gemini](https://ai.google.dev/). The Gemini provider
7
+ # can accept multiple inputs (text, images, audio, and video).
8
+ # The inputs can be provided inline via the prompt for files
9
+ # under 20MB or via the Gemini Files API for files
10
+ # that are over 20MB.
7
11
  #
8
- # The Gemini provider can accept multiple inputs (text, images,
9
- # audio, and video). The inputs can be provided inline via the
10
- # prompt for files under 20MB or via the Gemini Files API for
11
- # files that are over 20MB
12
- #
13
- # @example example #1
12
+ # @example
14
13
  # #!/usr/bin/env ruby
15
14
  # require "llm"
16
15
  #
17
- # llm = LLM.gemini(ENV["KEY"])
16
+ # llm = LLM.gemini(key: ENV["KEY"])
18
17
  # bot = LLM::Bot.new(llm)
19
- # bot.chat LLM.File("/images/capybara.png")
20
- # bot.chat "Describe the image"
21
- # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
22
- #
23
- # @example example #2
24
- # #!/usr/bin/env ruby
25
- # require "llm"
26
- #
27
- # llm = LLM.gemini(ENV["KEY"])
28
- # bot = LLM::Bot.new(llm)
29
- # bot.chat ["Describe the image", LLM::File("/images/capybara.png")]
18
+ # bot.chat ["Tell me about this photo", File.open("/images/horse.jpg", "rb")]
30
19
  # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
31
20
  class Gemini < Provider
21
+ require_relative "gemini/response/embedding"
22
+ require_relative "gemini/response/completion"
32
23
  require_relative "gemini/error_handler"
33
24
  require_relative "gemini/format"
34
25
  require_relative "gemini/stream_parser"
35
- require_relative "gemini/response_parser"
36
26
  require_relative "gemini/models"
37
27
  require_relative "gemini/images"
38
28
  require_relative "gemini/files"
@@ -54,14 +44,14 @@ module LLM
54
44
  # @param model (see LLM::Provider#embed)
55
45
  # @param params (see LLM::Provider#embed)
56
46
  # @raise (see LLM::Provider#request)
57
- # @return (see LLM::Provider#embed)
47
+ # @return [LLM::Response]
58
48
  def embed(input, model: "text-embedding-004", **params)
59
49
  model = model.respond_to?(:id) ? model.id : model
60
50
  path = ["/v1beta/models/#{model}", "embedContent?key=#{@key}"].join(":")
61
51
  req = Net::HTTP::Post.new(path, headers)
62
52
  req.body = JSON.dump({content: {parts: [{text: input}]}})
63
53
  res = execute(request: req)
64
- Response::Embedding.new(res).extend(response_parser)
54
+ LLM::Response.new(res).extend(LLM::Gemini::Response::Embedding)
65
55
  end
66
56
 
67
57
  ##
@@ -71,9 +61,9 @@ module LLM
71
61
  # @param params (see LLM::Provider#complete)
72
62
  # @example (see LLM::Provider#complete)
73
63
  # @raise (see LLM::Provider#request)
74
- # @raise [LLM::Error::PromptError]
64
+ # @raise [LLM::PromptError]
75
65
  # When given an object a provider does not understand
76
- # @return (see LLM::Provider#complete)
66
+ # @return [LLM::Response]
77
67
  def complete(prompt, params = {})
78
68
  params = {role: :user, model: default_model}.merge!(params)
79
69
  params = [params, format_schema(params), format_tools(params)].inject({}, &:merge!).compact
@@ -86,12 +76,13 @@ module LLM
86
76
  body = JSON.dump({contents: format(messages)}.merge!(params))
87
77
  set_body_stream(req, StringIO.new(body))
88
78
  res = execute(request: req, stream:)
89
- Response::Completion.new(res).extend(response_parser)
79
+ LLM::Response.new(res).extend(LLM::Gemini::Response::Completion)
90
80
  end
91
81
 
92
82
  ##
93
83
  # Provides an interface to Gemini's audio API
94
84
  # @see https://ai.google.dev/gemini-api/docs/audio Gemini docs
85
+ # @return [LLM::Gemini::Audio]
95
86
  def audio
96
87
  LLM::Gemini::Audio.new(self)
97
88
  end
@@ -107,6 +98,7 @@ module LLM
107
98
  ##
108
99
  # Provides an interface to Gemini's file management API
109
100
  # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
101
+ # @return [LLM::Gemini::Files]
110
102
  def files
111
103
  LLM::Gemini::Files.new(self)
112
104
  end
@@ -114,6 +106,7 @@ module LLM
114
106
  ##
115
107
  # Provides an interface to Gemini's models API
116
108
  # @see https://ai.google.dev/gemini-api/docs/models Gemini docs
109
+ # @return [LLM::Gemini::Models]
117
110
  def models
118
111
  LLM::Gemini::Models.new(self)
119
112
  end
@@ -140,10 +133,6 @@ module LLM
140
133
  )
141
134
  end
142
135
 
143
- def response_parser
144
- LLM::Gemini::ResponseParser
145
- end
146
-
147
136
  def stream_parser
148
137
  LLM::Gemini::StreamParser
149
138
  end
@@ -7,7 +7,18 @@ module LLM
7
7
  # The LlamaCpp class implements a provider for
8
8
  # [llama.cpp](https://github.com/ggml-org/llama.cpp)
9
9
  # through the OpenAI-compatible API provided by the
10
- # llama-server binary.
10
+ # llama-server binary. Similar to the ollama provider,
11
+ # this provider supports a wide range of models and
12
+ # is straightforward to run on your own hardware.
13
+ #
14
+ # @example
15
+ # #!/usr/bin/env ruby
16
+ # require "llm"
17
+ #
18
+ # llm = LLM.llamacpp(key: nil)
19
+ # bot = LLM::Bot.new(llm)
20
+ # bot.chat ["Tell me about this photo", File.open("/images/frog.jpg", "rb")]
21
+ # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
11
22
  class LlamaCpp < OpenAI
12
23
  ##
13
24
  # @param (see LLM::Provider#initialize)
@@ -46,6 +57,12 @@ module LLM
46
57
  raise NotImplementedError
47
58
  end
48
59
 
60
+ ##
61
+ # @raise [NotImplementedError]
62
+ def vector_stores
63
+ raise NotImplementedError
64
+ end
65
+
49
66
  ##
50
67
  # Returns the default model for chat completions
51
68
  # @see https://ollama.com/library/qwen3 qwen3
@@ -28,13 +28,16 @@ module LLM::Ollama::Format
28
28
 
29
29
  def format_content(content)
30
30
  case content
31
+ when File
32
+ content.close unless content.closed?
33
+ format_content(LLM.File(content.path))
31
34
  when LLM::File
32
35
  if content.image?
33
36
  {content: "This message has an image associated with it", images: [content.to_b64]}
34
37
  else
35
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
36
- "is not an image, and therefore not supported by the " \
37
- "Ollama API"
38
+ raise LLM::PromptError, "The given object (an instance of #{content.class}) " \
39
+ "is not an image, and therefore not supported by the " \
40
+ "Ollama API"
38
41
  end
39
42
  when String
40
43
  {content:}
@@ -43,8 +46,8 @@ module LLM::Ollama::Format
43
46
  when LLM::Function::Return
44
47
  throw(:abort, {role: "tool", tool_call_id: content.id, content: JSON.dump(content.value)})
45
48
  else
46
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
47
- "is not supported by the Ollama API"
49
+ raise LLM::PromptError, "The given object (an instance of #{content.class}) " \
50
+ "is not supported by the Ollama API"
48
51
  end
49
52
  end
50
53
 
@@ -39,18 +39,12 @@ class LLM::Ollama
39
39
  # @see https://ollama.com/library Ollama library
40
40
  # @param [Hash] params Other parameters (see Ollama docs)
41
41
  # @raise (see LLM::Provider#request)
42
- # @return [LLM::Response::ModelList]
42
+ # @return [LLM::Response]
43
43
  def all(**params)
44
44
  query = URI.encode_www_form(params)
45
45
  req = Net::HTTP::Get.new("/api/tags?#{query}", headers)
46
46
  res = execute(request: req)
47
- LLM::Response::ModelList.new(res).tap { |modellist|
48
- models = modellist.body["models"].map do |model|
49
- model = model.transform_keys { snakecase(_1) }
50
- LLM::Model.from_hash(model).tap { _1.provider = @provider }
51
- end
52
- modellist.models = models
53
- }
47
+ LLM::Response.new(res)
54
48
  end
55
49
 
56
50
  private
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Ollama::Response
4
+ module Completion
5
+ def model = body.model
6
+ def prompt_tokens = body.prompt_eval_count || 0
7
+ def completion_tokens = body.eval_count || 0
8
+ def total_tokens = prompt_tokens + completion_tokens
9
+ def message = body.message
10
+ def choices = [format_choices]
11
+
12
+ private
13
+
14
+ def format_choices
15
+ role, content, calls = message.to_h.values_at("role", "content", "tool_calls")
16
+ extra = {response: self, tool_calls: format_tool_calls(calls)}
17
+ LLM::Message.new(role, content, extra)
18
+ end
19
+
20
+ def format_tool_calls(tools)
21
+ return [] unless tools
22
+ tools.filter_map do |tool|
23
+ next unless tool["function"]
24
+ LLM::Object.new(tool["function"])
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Ollama::Response
4
+ module Embedding
5
+ def embeddings = data.map { _1["embedding"] }
6
+ def prompt_tokens = body.dig("usage", "prompt_tokens") || 0
7
+ def total_tokens = body.dig("usage", "total_tokens") || 0
8
+ end
9
+ end
@@ -2,27 +2,25 @@
2
2
 
3
3
  module LLM
4
4
  ##
5
- # The Ollama class implements a provider for [Ollama](https://ollama.ai/).
6
- #
7
- # This provider supports a wide range of models, it is relatively
8
- # straight forward to run on your own hardware, and includes multi-modal
9
- # models that can process images and text. See the example for a demonstration
10
- # of a multi-modal model by the name `llava`
5
+ # The Ollama class implements a provider for [Ollama](https://ollama.ai/) &ndash;
6
+ # and the provider supports a wide range of models. It is straight forward
7
+ # to run on your own hardware, and there are a number of multi-modal models
8
+ # that can process both images and text.
11
9
  #
12
10
  # @example
13
11
  # #!/usr/bin/env ruby
14
12
  # require "llm"
15
13
  #
16
- # llm = LLM.ollama(nil)
14
+ # llm = LLM.ollama(key: nil)
17
15
  # bot = LLM::Bot.new(llm, model: "llava")
18
- # bot.chat LLM::File("/images/capybara.png")
19
- # bot.chat "Describe the image"
16
+ # bot.chat ["Tell me about this image", File.open("/images/parrot.png", "rb")]
20
17
  # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
21
18
  class Ollama < Provider
19
+ require_relative "ollama/response/embedding"
20
+ require_relative "ollama/response/completion"
22
21
  require_relative "ollama/error_handler"
23
22
  require_relative "ollama/format"
24
23
  require_relative "ollama/stream_parser"
25
- require_relative "ollama/response_parser"
26
24
  require_relative "ollama/models"
27
25
 
28
26
  include Format
@@ -41,13 +39,13 @@ module LLM
41
39
  # @param model (see LLM::Provider#embed)
42
40
  # @param params (see LLM::Provider#embed)
43
41
  # @raise (see LLM::Provider#request)
44
- # @return (see LLM::Provider#embed)
42
+ # @return [LLM::Response]
45
43
  def embed(input, model: default_model, **params)
46
44
  params = {model:}.merge!(params)
47
45
  req = Net::HTTP::Post.new("/v1/embeddings", headers)
48
46
  req.body = JSON.dump({input:}.merge!(params))
49
47
  res = execute(request: req)
50
- Response::Embedding.new(res).extend(response_parser)
48
+ LLM::Response.new(res).extend(LLM::Ollama::Response::Embedding)
51
49
  end
52
50
 
53
51
  ##
@@ -57,9 +55,9 @@ module LLM
57
55
  # @param params (see LLM::Provider#complete)
58
56
  # @example (see LLM::Provider#complete)
59
57
  # @raise (see LLM::Provider#request)
60
- # @raise [LLM::Error::PromptError]
58
+ # @raise [LLM::PromptError]
61
59
  # When given an object a provider does not understand
62
- # @return (see LLM::Provider#complete)
60
+ # @return [LLM::Response]
63
61
  def complete(prompt, params = {})
64
62
  params = {role: :user, model: default_model, stream: true}.merge!(params)
65
63
  params = [params, {format: params[:schema]}, format_tools(params)].inject({}, &:merge!).compact
@@ -70,7 +68,7 @@ module LLM
70
68
  body = JSON.dump({messages: [format(messages)].flatten}.merge!(params))
71
69
  set_body_stream(req, StringIO.new(body))
72
70
  res = execute(request: req, stream:)
73
- Response::Completion.new(res).extend(response_parser)
71
+ LLM::Response.new(res).extend(LLM::Ollama::Response::Completion)
74
72
  end
75
73
 
76
74
  ##
@@ -104,10 +102,6 @@ module LLM
104
102
  )
105
103
  end
106
104
 
107
- def response_parser
108
- LLM::Ollama::ResponseParser
109
- end
110
-
111
105
  def stream_parser
112
106
  LLM::Ollama::StreamParser
113
107
  end
@@ -5,7 +5,7 @@ class LLM::OpenAI
5
5
  # The {LLM::OpenAI::Audio LLM::OpenAI::Audio} class provides an audio
6
6
  # object for interacting with [OpenAI's audio API](https://platform.openai.com/docs/api-reference/audio/createSpeech).
7
7
  # @example
8
- # llm = LLM.openai(ENV["KEY"])
8
+ # llm = LLM.openai(key: ENV["KEY"])
9
9
  # res = llm.audio.create_speech(input: "A dog on a rocket to the moon")
10
10
  # IO.copy_stream res.audio, "rocket.mp3"
11
11
  class Audio
@@ -20,7 +20,7 @@ class LLM::OpenAI
20
20
  ##
21
21
  # Create an audio track
22
22
  # @example
23
- # llm = LLM.openai(ENV["KEY"])
23
+ # llm = LLM.openai(key: ENV["KEY"])
24
24
  # res = llm.images.create_speech(input: "A dog on a rocket to the moon")
25
25
  # File.binwrite("rocket.mp3", res.audio.string)
26
26
  # @see https://platform.openai.com/docs/api-reference/audio/createSpeech OpenAI docs
@@ -30,19 +30,19 @@ class LLM::OpenAI
30
30
  # @param [String] response_format The response format
31
31
  # @param [Hash] params Other parameters (see OpenAI docs)
32
32
  # @raise (see LLM::Provider#request)
33
- # @return [LLM::Response::Audio]
33
+ # @return [LLM::Response]
34
34
  def create_speech(input:, voice: "alloy", model: "gpt-4o-mini-tts", response_format: "mp3", **params)
35
35
  req = Net::HTTP::Post.new("/v1/audio/speech", headers)
36
36
  req.body = JSON.dump({input:, voice:, model:, response_format:}.merge!(params))
37
37
  io = StringIO.new("".b)
38
38
  res = execute(request: req) { _1.read_body { |chunk| io << chunk } }
39
- LLM::Response::Audio.new(res).tap { _1.audio = io }
39
+ LLM::Response.new(res).tap { _1.define_singleton_method(:audio) { io } }
40
40
  end
41
41
 
42
42
  ##
43
43
  # Create an audio transcription
44
44
  # @example
45
- # llm = LLM.openai(ENV["KEY"])
45
+ # llm = LLM.openai(key: ENV["KEY"])
46
46
  # res = llm.audio.create_transcription(file: "/audio/rocket.mp3")
47
47
  # res.text # => "A dog on a rocket to the moon"
48
48
  # @see https://platform.openai.com/docs/api-reference/audio/createTranscription OpenAI docs
@@ -50,21 +50,21 @@ class LLM::OpenAI
50
50
  # @param [String] model The model to use
51
51
  # @param [Hash] params Other parameters (see OpenAI docs)
52
52
  # @raise (see LLM::Provider#request)
53
- # @return [LLM::Response::AudioTranscription]
53
+ # @return [LLM::Response]
54
54
  def create_transcription(file:, model: "whisper-1", **params)
55
55
  multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
56
56
  req = Net::HTTP::Post.new("/v1/audio/transcriptions", headers)
57
57
  req["content-type"] = multi.content_type
58
58
  set_body_stream(req, multi.body)
59
59
  res = execute(request: req)
60
- LLM::Response::AudioTranscription.new(res).tap { _1.text = _1.body["text"] }
60
+ LLM::Response.new(res)
61
61
  end
62
62
 
63
63
  ##
64
64
  # Create an audio translation (in English)
65
65
  # @example
66
66
  # # Arabic => English
67
- # llm = LLM.openai(ENV["KEY"])
67
+ # llm = LLM.openai(key: ENV["KEY"])
68
68
  # res = llm.audio.create_translation(file: "/audio/bismillah.mp3")
69
69
  # res.text # => "In the name of Allah, the Beneficent, the Merciful."
70
70
  # @see https://platform.openai.com/docs/api-reference/audio/createTranslation OpenAI docs
@@ -72,14 +72,14 @@ class LLM::OpenAI
72
72
  # @param [String] model The model to use
73
73
  # @param [Hash] params Other parameters (see OpenAI docs)
74
74
  # @raise (see LLM::Provider#request)
75
- # @return [LLM::Response::AudioTranslation]
75
+ # @return [LLM::Response]
76
76
  def create_translation(file:, model: "whisper-1", **params)
77
77
  multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
78
78
  req = Net::HTTP::Post.new("/v1/audio/translations", headers)
79
79
  req["content-type"] = multi.content_type
80
80
  set_body_stream(req, multi.body)
81
81
  res = execute(request: req)
82
- LLM::Response::AudioTranslation.new(res).tap { _1.text = _1.body["text"] }
82
+ LLM::Response.new(res)
83
83
  end
84
84
 
85
85
  private
@@ -12,23 +12,14 @@ class LLM::OpenAI
12
12
  # #!/usr/bin/env ruby
13
13
  # require "llm"
14
14
  #
15
- # llm = LLM.openai(ENV["KEY"])
15
+ # llm = LLM.openai(key: ENV["KEY"])
16
16
  # bot = LLM::Bot.new(llm)
17
- # file = llm.files.create file: "/documents/freebsd.pdf"
18
- # bot.chat(file)
19
- # bot.chat("Describe the document")
20
- # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
21
- #
22
- # @example example #2
23
- # #!/usr/bin/env ruby
24
- # require "llm"
25
- #
26
- # llm = LLM.openai(ENV["KEY"])
27
- # bot = LLM::Bot.new(llm)
28
- # file = llm.files.create file: "/documents/openbsd.pdf"
29
- # bot.chat(["Describe the document I sent to you", file])
17
+ # file = llm.files.create file: "/books/goodread.pdf"
18
+ # bot.chat ["Tell me about this PDF", file]
30
19
  # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
31
20
  class Files
21
+ require_relative "response/file"
22
+
32
23
  ##
33
24
  # Returns a new Files object
34
25
  # @param provider [LLM::Provider]
@@ -40,7 +31,7 @@ class LLM::OpenAI
40
31
  ##
41
32
  # List all files
42
33
  # @example
43
- # llm = LLM.openai(ENV["KEY"])
34
+ # llm = LLM.openai(key: ENV["KEY"])
44
35
  # res = llm.files.all
45
36
  # res.each do |file|
46
37
  # print "id: ", file.id, "\n"
@@ -48,60 +39,57 @@ class LLM::OpenAI
48
39
  # @see https://platform.openai.com/docs/api-reference/files/list OpenAI docs
49
40
  # @param [Hash] params Other parameters (see OpenAI docs)
50
41
  # @raise (see LLM::Provider#request)
51
- # @return [LLM::Response::FileList]
42
+ # @return [LLM::Response]
52
43
  def all(**params)
53
44
  query = URI.encode_www_form(params)
54
45
  req = Net::HTTP::Get.new("/v1/files?#{query}", headers)
55
46
  res = execute(request: req)
56
- LLM::Response::FileList.new(res).tap { |filelist|
57
- files = filelist.body["data"].map { LLM::Object.from_hash(_1) }
58
- filelist.files = files
59
- }
47
+ LLM::Response.new(res)
60
48
  end
61
49
 
62
50
  ##
63
51
  # Create a file
64
52
  # @example
65
- # llm = LLM.openai(ENV["KEY"])
53
+ # llm = LLM.openai(key: ENV["KEY"])
66
54
  # res = llm.files.create file: "/documents/haiku.txt"
67
55
  # @see https://platform.openai.com/docs/api-reference/files/create OpenAI docs
68
- # @param [File] file The file
56
+ # @param [File, LLM::File, String] file The file
69
57
  # @param [String] purpose The purpose of the file (see OpenAI docs)
70
58
  # @param [Hash] params Other parameters (see OpenAI docs)
71
59
  # @raise (see LLM::Provider#request)
72
- # @return [LLM::Response::File]
60
+ # @return [LLM::Response]
73
61
  def create(file:, purpose: "assistants", **params)
74
- multi = LLM::Multipart.new(params.merge!(file:, purpose:))
62
+ multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), purpose:))
75
63
  req = Net::HTTP::Post.new("/v1/files", headers)
76
64
  req["content-type"] = multi.content_type
77
65
  set_body_stream(req, multi.body)
78
66
  res = execute(request: req)
79
- LLM::Response::File.new(res)
67
+ LLM::Response.new(res).extend(LLM::OpenAI::Response::File)
80
68
  end
81
69
 
82
70
  ##
83
71
  # Get a file
84
72
  # @example
85
- # llm = LLM.openai(ENV["KEY"])
73
+ # llm = LLM.openai(key: ENV["KEY"])
86
74
  # res = llm.files.get(file: "file-1234567890")
87
75
  # print "id: ", res.id, "\n"
88
76
  # @see https://platform.openai.com/docs/api-reference/files/get OpenAI docs
89
77
  # @param [#id, #to_s] file The file ID
90
78
  # @param [Hash] params Other parameters (see OpenAI docs)
91
79
  # @raise (see LLM::Provider#request)
92
- # @return [LLM::Response::File]
80
+ # @return [LLM::Response]
93
81
  def get(file:, **params)
94
82
  file_id = file.respond_to?(:id) ? file.id : file
95
83
  query = URI.encode_www_form(params)
96
84
  req = Net::HTTP::Get.new("/v1/files/#{file_id}?#{query}", headers)
97
85
  res = execute(request: req)
98
- LLM::Response::File.new(res)
86
+ LLM::Response.new(res).extend(LLM::OpenAI::Response::File)
99
87
  end
100
88
 
101
89
  ##
102
90
  # Download the content of a file
103
91
  # @example
104
- # llm = LLM.openai(ENV["KEY"])
92
+ # llm = LLM.openai(key: ENV["KEY"])
105
93
  # res = llm.files.download(file: "file-1234567890")
106
94
  # File.binwrite "haiku1.txt", res.file.read
107
95
  # print res.file.read, "\n"
@@ -109,31 +97,31 @@ class LLM::OpenAI
109
97
  # @param [#id, #to_s] file The file ID
110
98
  # @param [Hash] params Other parameters (see OpenAI docs)
111
99
  # @raise (see LLM::Provider#request)
112
- # @return [LLM::Response::DownloadFile]
100
+ # @return [LLM::Response]
113
101
  def download(file:, **params)
114
102
  query = URI.encode_www_form(params)
115
103
  file_id = file.respond_to?(:id) ? file.id : file
116
104
  req = Net::HTTP::Get.new("/v1/files/#{file_id}/content?#{query}", headers)
117
105
  io = StringIO.new("".b)
118
106
  res = execute(request: req) { |res| res.read_body { |chunk| io << chunk } }
119
- LLM::Response::DownloadFile.new(res).tap { _1.file = io }
107
+ LLM::Response.new(res).tap { _1.define_singleton_method(:file) { io } }
120
108
  end
121
109
 
122
110
  ##
123
111
  # Delete a file
124
112
  # @example
125
- # llm = LLM.openai(ENV["KEY"])
113
+ # llm = LLM.openai(key: ENV["KEY"])
126
114
  # res = llm.files.delete(file: "file-1234567890")
127
115
  # print res.deleted, "\n"
128
116
  # @see https://platform.openai.com/docs/api-reference/files/delete OpenAI docs
129
117
  # @param [#id, #to_s] file The file ID
130
118
  # @raise (see LLM::Provider#request)
131
- # @return [LLM::Object] Response body
119
+ # @return [LLM::Response]
132
120
  def delete(file:)
133
121
  file_id = file.respond_to?(:id) ? file.id : file
134
122
  req = Net::HTTP::Delete.new("/v1/files/#{file_id}", headers)
135
123
  res = execute(request: req)
136
- LLM::Object.from_hash JSON.parse(res.body)
124
+ LLM::Response.new(res)
137
125
  end
138
126
 
139
127
  private