llm.rb 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +134 -88
  3. data/lib/json/schema/array.rb +6 -2
  4. data/lib/json/schema/boolean.rb +4 -0
  5. data/lib/json/schema/integer.rb +23 -1
  6. data/lib/json/schema/leaf.rb +38 -0
  7. data/lib/json/schema/null.rb +4 -0
  8. data/lib/json/schema/number.rb +23 -1
  9. data/lib/json/schema/object.rb +6 -2
  10. data/lib/json/schema/string.rb +26 -1
  11. data/lib/json/schema/version.rb +8 -0
  12. data/lib/json/schema.rb +34 -23
  13. data/lib/llm/buffer.rb +28 -10
  14. data/lib/llm/chat.rb +26 -1
  15. data/lib/llm/core_ext/ostruct.rb +14 -8
  16. data/lib/llm/file.rb +6 -1
  17. data/lib/llm/function.rb +81 -0
  18. data/lib/llm/message.rb +46 -1
  19. data/lib/llm/providers/anthropic/format/completion_format.rb +73 -0
  20. data/lib/llm/providers/anthropic/format.rb +7 -33
  21. data/lib/llm/providers/anthropic/response_parser/completion_parser.rb +51 -0
  22. data/lib/llm/providers/anthropic/response_parser.rb +1 -9
  23. data/lib/llm/providers/anthropic.rb +4 -3
  24. data/lib/llm/providers/gemini/audio.rb +4 -4
  25. data/lib/llm/providers/gemini/files.rb +5 -4
  26. data/lib/llm/providers/gemini/format/completion_format.rb +54 -0
  27. data/lib/llm/providers/gemini/format.rb +28 -27
  28. data/lib/llm/providers/gemini/images.rb +9 -4
  29. data/lib/llm/providers/gemini/response_parser/completion_parser.rb +46 -0
  30. data/lib/llm/providers/gemini/response_parser.rb +13 -20
  31. data/lib/llm/providers/gemini.rb +3 -12
  32. data/lib/llm/providers/ollama/format/completion_format.rb +72 -0
  33. data/lib/llm/providers/ollama/format.rb +10 -30
  34. data/lib/llm/providers/ollama/response_parser/completion_parser.rb +42 -0
  35. data/lib/llm/providers/ollama/response_parser.rb +8 -11
  36. data/lib/llm/providers/ollama.rb +3 -11
  37. data/lib/llm/providers/openai/audio.rb +6 -6
  38. data/lib/llm/providers/openai/files.rb +3 -3
  39. data/lib/llm/providers/openai/format/completion_format.rb +81 -0
  40. data/lib/llm/providers/openai/format/respond_format.rb +69 -0
  41. data/lib/llm/providers/openai/format.rb +25 -58
  42. data/lib/llm/providers/openai/images.rb +4 -2
  43. data/lib/llm/providers/openai/response_parser/completion_parser.rb +55 -0
  44. data/lib/llm/providers/openai/response_parser/respond_parser.rb +56 -0
  45. data/lib/llm/providers/openai/response_parser.rb +8 -44
  46. data/lib/llm/providers/openai/responses.rb +10 -11
  47. data/lib/llm/providers/openai.rb +5 -16
  48. data/lib/llm/response/{output.rb → respond.rb} +2 -2
  49. data/lib/llm/response.rb +1 -1
  50. data/lib/llm/version.rb +1 -1
  51. data/lib/llm.rb +28 -0
  52. data/llm.gemspec +1 -0
  53. metadata +29 -3
@@ -19,7 +19,7 @@ class LLM::Gemini
19
19
  #
20
20
  # llm = LLM.gemini(ENV["KEY"])
21
21
  # bot = LLM::Chat.new(llm).lazy
22
- # file = llm.files.create file: LLM::File("/audio/haiku.mp3")
22
+ # file = llm.files.create file: "/audio/haiku.mp3"
23
23
  # bot.chat(file)
24
24
  # bot.chat("Describe the audio file I sent to you")
25
25
  # bot.chat("The audio file is the first message I sent to you.")
@@ -30,7 +30,7 @@ class LLM::Gemini
30
30
  #
31
31
  # llm = LLM.gemini(ENV["KEY"])
32
32
  # bot = LLM::Chat.new(llm).lazy
33
- # file = llm.files.create file: LLM::File("/audio/haiku.mp3")
33
+ # file = llm.files.create file: "/audio/haiku.mp3"
34
34
  # bot.chat(["Describe the audio file I sent to you", file])
35
35
  # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
36
36
  class Files
@@ -71,13 +71,14 @@ class LLM::Gemini
71
71
  # Create a file
72
72
  # @example
73
73
  # llm = LLM.gemini(ENV["KEY"])
74
- # res = llm.files.create file: LLM::File("/audio/haiku.mp3"),
74
+ # res = llm.files.create file: "/audio/haiku.mp3"
75
75
  # @see https://ai.google.dev/gemini-api/docs/files Gemini docs
76
- # @param [File] file The file
76
+ # @param [String, LLM::File] file The file
77
77
  # @param [Hash] params Other parameters (see Gemini docs)
78
78
  # @raise (see LLM::Provider#request)
79
79
  # @return [LLM::Response::File]
80
80
  def create(file:, **params)
81
+ file = LLM.File(file)
81
82
  req = Net::HTTP::Post.new(request_upload_url(file:), {})
82
83
  req["content-length"] = file.bytesize
83
84
  req["X-Goog-Upload-Offset"] = 0
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Gemini::Format
4
+ ##
5
+ # @private
6
+ class CompletionFormat
7
+ ##
8
+ # @param [LLM::Message, Hash] message
9
+ # The message to format
10
+ def initialize(message)
11
+ @message = message
12
+ end
13
+
14
+ ##
15
+ # Formats the message for the Gemini chat completions API
16
+ # @return [Hash]
17
+ def format
18
+ catch(:abort) do
19
+ if Hash === message
20
+ {role: message[:role], parts: format_content(message[:content])}
21
+ elsif message.tool_call?
22
+ {role: message.role, parts: message.extra[:original_tool_calls].map { {"functionCall" => _1} }}
23
+ else
24
+ {role: message.role, parts: format_content(message.content)}
25
+ end
26
+ end
27
+ end
28
+
29
+ def format_content(content)
30
+ case content
31
+ when Array
32
+ content.empty? ? throw(:abort, nil) : content.flat_map { format_content(_1) }
33
+ when LLM::Response::File
34
+ file = content
35
+ [{file_data: {mime_type: file.mime_type, file_uri: file.uri}}]
36
+ when LLM::File
37
+ file = content
38
+ [{inline_data: {mime_type: file.mime_type, data: file.to_b64}}]
39
+ when String
40
+ [{text: content}]
41
+ when LLM::Message
42
+ format_content(content.content)
43
+ when LLM::Function::Return
44
+ [{text: content.value}]
45
+ else
46
+ raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
47
+ "is not supported by the Gemini API"
48
+ end
49
+ end
50
+
51
+ def message = @message
52
+ def content = message.content
53
+ end
54
+ end
@@ -4,45 +4,46 @@ class LLM::Gemini
4
4
  ##
5
5
  # @private
6
6
  module Format
7
+ require_relative "format/completion_format"
8
+
7
9
  ##
8
10
  # @param [Array<LLM::Message>] messages
9
11
  # The messages to format
10
12
  # @return [Array<Hash>]
11
13
  def format(messages)
12
- messages.map do
13
- if Hash === _1
14
- {role: _1[:role], parts: [format_content(_1[:content])]}
15
- else
16
- {role: _1.role, parts: [format_content(_1.content)]}
17
- end
14
+ messages.filter_map do |message|
15
+ CompletionFormat.new(message).format
18
16
  end
19
17
  end
20
18
 
21
19
  private
22
20
 
23
21
  ##
24
- # @param [String, Array, LLM::Response::File, LLM::File] content
25
- # The content to format
22
+ # @param [JSON::Schema] schema
23
+ # The schema to format
26
24
  # @return [Hash]
27
- # The formatted content
28
- def format_content(content)
29
- case content
30
- when Array
31
- content.map { format_content(_1) }
32
- when LLM::Response::File
33
- file = content
34
- {file_data: {mime_type: file.mime_type, file_uri: file.uri}}
35
- when LLM::File
36
- file = content
37
- {inline_data: {mime_type: file.mime_type, data: file.to_b64}}
38
- when String
39
- {text: content}
40
- when LLM::Message
41
- format_content(content.content)
42
- else
43
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
44
- "is not supported by the Gemini API"
45
- end
25
+ def format_schema(schema)
26
+ return {} unless schema
27
+ {
28
+ "generationConfig" => {
29
+ "response_mime_type" => "application/json",
30
+ "response_schema" => schema
31
+ }
32
+ }
33
+ end
34
+
35
+ ##
36
+ # @param [Array<LLM::Function>] tools
37
+ # The tools to format
38
+ # @return [Hash]
39
+ def format_tools(tools)
40
+ return {} unless tools
41
+ functions = tools.grep(LLM::Function)
42
+ {
43
+ "tools" => {
44
+ "functionDeclarations" => functions.map { _1.format(self) }
45
+ }
46
+ }
46
47
  end
47
48
  end
48
49
  end
@@ -58,16 +58,17 @@ class LLM::Gemini
58
58
  # res = llm.images.edit image: LLM::File("cat.png"), prompt: "Add a hat to the cat"
59
59
  # IO.copy_stream res.images[0], "hatoncat.png"
60
60
  # @see https://ai.google.dev/gemini-api/docs/image-generation Gemini docs
61
- # @param [LLM::File] image The image to edit
61
+ # @param [String, LLM::File] image The image to edit
62
62
  # @param [String] prompt The prompt
63
63
  # @param [Hash] params Other parameters (see Gemini docs)
64
64
  # @raise (see LLM::Provider#request)
65
65
  # @note (see LLM::Gemini::Images#create)
66
66
  # @return [LLM::Response::Image]
67
67
  def edit(image:, prompt:, model: "gemini-2.0-flash-exp-image-generation", **params)
68
- req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
69
- body = JSON.dump({
70
- contents: [{parts: [{text: prompt}, format_content(image)]}],
68
+ req = Net::HTTP::Post.new("/v1beta/models/#{model}:generateContent?key=#{secret}", headers)
69
+ image = LLM.File(image)
70
+ body = JSON.dump({
71
+ contents: [{parts: [{text: prompt}, format.format_content(image)]}],
71
72
  generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
72
73
  }.merge!(params)).b
73
74
  set_body_stream(req, StringIO.new(body))
@@ -84,6 +85,10 @@ class LLM::Gemini
84
85
 
85
86
  private
86
87
 
88
+ def format
89
+ @format ||= CompletionFormat.new(nil)
90
+ end
91
+
87
92
  def secret
88
93
  @provider.instance_variable_get(:@secret)
89
94
  end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Gemini::ResponseParser
4
+ class CompletionParser
5
+ def initialize(body)
6
+ @body = OpenStruct.from_hash(body)
7
+ end
8
+
9
+ def format(response)
10
+ {
11
+ model:,
12
+ prompt_tokens:,
13
+ completion_tokens:,
14
+ total_tokens:,
15
+ choices: format_choices(response)
16
+ }
17
+ end
18
+
19
+ private
20
+
21
+ def format_choices(response)
22
+ candidates.map.with_index do |choice, index|
23
+ content = choice.content
24
+ role, parts = content.role, content.parts
25
+ text = parts.filter_map { _1["text"] }.join
26
+ tools = parts.filter_map { _1["functionCall"] }
27
+ extra = {index:, response:, tool_calls: format_tool_calls(tools), original_tool_calls: tools}
28
+ LLM::Message.new(role, text, extra)
29
+ end
30
+ end
31
+
32
+ def format_tool_calls(tools)
33
+ (tools || []).map do |tool|
34
+ function = {name: tool.name, arguments: tool.args}
35
+ OpenStruct.new(function)
36
+ end
37
+ end
38
+
39
+ def body = @body
40
+ def model = body.modelVersion
41
+ def prompt_tokens = body.usageMetadata.promptTokenCount
42
+ def completion_tokens = body.usageMetadata.candidatesTokenCount
43
+ def total_tokens = body.usageMetadata.totalTokenCount
44
+ def candidates = body.candidates
45
+ end
46
+ end
@@ -4,33 +4,24 @@ class LLM::Gemini
4
4
  ##
5
5
  # @private
6
6
  module ResponseParser
7
+ require_relative "response_parser/completion_parser"
8
+
7
9
  ##
8
10
  # @param [Hash] body
9
11
  # The response body from the LLM provider
10
12
  # @return [Hash]
11
- def parse_embedding(body)
12
- {
13
- model: "text-embedding-004",
14
- embeddings: body.dig("embedding", "values")
15
- }
13
+ def parse_completion(body)
14
+ CompletionParser.new(body).format(self)
16
15
  end
17
16
 
18
17
  ##
19
18
  # @param [Hash] body
20
19
  # The response body from the LLM provider
21
20
  # @return [Hash]
22
- def parse_completion(body)
21
+ def parse_embedding(body)
23
22
  {
24
- model: body["modelVersion"],
25
- choices: body["candidates"].map do
26
- LLM::Message.new(
27
- _1.dig("content", "role"),
28
- _1.dig("content", "parts", 0, "text"),
29
- {response: self}
30
- )
31
- end,
32
- prompt_tokens: body.dig("usageMetadata", "promptTokenCount"),
33
- completion_tokens: body.dig("usageMetadata", "candidatesTokenCount")
23
+ model: "text-embedding-004",
24
+ embeddings: body.dig("embedding", "values")
34
25
  }
35
26
  end
36
27
 
@@ -41,10 +32,12 @@ class LLM::Gemini
41
32
  def parse_image(body)
42
33
  {
43
34
  urls: [],
44
- images: body["candidates"].flat_map do |candidate|
45
- candidate["content"]["parts"].filter_map do
46
- next unless _1.dig("inlineData", "data")
47
- StringIO.new(_1["inlineData"]["data"].unpack1("m0"))
35
+ images: body["candidates"].flat_map do |c|
36
+ parts = c["content"]["parts"]
37
+ parts.filter_map do
38
+ data = _1.dig("inlineData", "data")
39
+ next unless data
40
+ StringIO.new(data.unpack1("m0"))
48
41
  end
49
42
  end
50
43
  }
@@ -74,12 +74,13 @@ module LLM
74
74
  # @raise [LLM::Error::PromptError]
75
75
  # When given an object a provider does not understand
76
76
  # @return (see LLM::Provider#complete)
77
- def complete(prompt, role = :user, model: default_model, schema: nil, **params)
77
+ def complete(prompt, role = :user, model: default_model, schema: nil, tools: nil, **params)
78
+ params = [format_schema(schema), format_tools(tools), params].inject({}, &:merge!).compact
78
79
  model.respond_to?(:id) ? model.id : model
79
80
  path = ["/v1beta/models/#{model}", "generateContent?key=#{@secret}"].join(":")
80
81
  req = Net::HTTP::Post.new(path, headers)
81
82
  messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
82
- body = JSON.dump({contents: format(messages)}.merge!(expand_schema(schema)))
83
+ body = JSON.dump({contents: format(messages)}.merge!(params))
83
84
  set_body_stream(req, StringIO.new(body))
84
85
  res = request(@http, req)
85
86
  Response::Completion.new(res).extend(response_parser)
@@ -136,16 +137,6 @@ module LLM
136
137
  }
137
138
  end
138
139
 
139
- def expand_schema(schema)
140
- return {} unless schema
141
- {
142
- "generationConfig" => {
143
- "response_mime_type" => "application/json",
144
- "response_schema" => schema
145
- }
146
- }
147
- end
148
-
149
140
  def response_parser
150
141
  LLM::Gemini::ResponseParser
151
142
  end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Ollama::Format
4
+ ##
5
+ # @private
6
+ class CompletionFormat
7
+ ##
8
+ # @param [LLM::Message] message
9
+ # The message to format
10
+ def initialize(message)
11
+ @message = message
12
+ end
13
+
14
+ ##
15
+ # Returns the message for the Ollama chat completions API
16
+ # @return [Hash]
17
+ def format
18
+ catch(:abort) do
19
+ if Hash === message
20
+ {role: message[:role]}.merge(format_content(message[:content]))
21
+ else
22
+ format_message
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def format_content(content)
30
+ case content
31
+ when LLM::File
32
+ if content.image?
33
+ {content: "This message has an image associated with it", images: [content.to_b64]}
34
+ else
35
+ raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
36
+ "is not an image, and therefore not supported by the " \
37
+ "Ollama API"
38
+ end
39
+ when String
40
+ {content:}
41
+ when LLM::Message
42
+ format_content(content.content)
43
+ else
44
+ raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
45
+ "is not supported by the Ollama API"
46
+ end
47
+ end
48
+
49
+ def format_message
50
+ case content
51
+ when Array
52
+ format_array
53
+ else
54
+ {role: message.role}.merge(format_content(content))
55
+ end
56
+ end
57
+
58
+ def format_array
59
+ if content.empty?
60
+ nil
61
+ elsif returns.any?
62
+ returns.map { {role: "tool", tool_call_id: _1.id, content: JSON.dump(_1.value)} }
63
+ else
64
+ [{role: message.role, content: content.flat_map { format_content(_1) }}]
65
+ end
66
+ end
67
+
68
+ def message = @message
69
+ def content = message.content
70
+ def returns = content.grep(LLM::Function::Return)
71
+ end
72
+ end
@@ -4,47 +4,27 @@ class LLM::Ollama
4
4
  ##
5
5
  # @private
6
6
  module Format
7
+ require_relative "format/completion_format"
8
+
7
9
  ##
8
10
  # @param [Array<LLM::Message>] messages
9
11
  # The messages to format
10
12
  # @return [Array<Hash>]
11
13
  def format(messages)
12
- messages.map do
13
- if Hash === _1
14
- {role: _1[:role]}
15
- .merge!(_1)
16
- .merge!(format_content(_1[:content]))
17
- else
18
- {role: _1.role}.merge! format_content(_1.content)
19
- end
14
+ messages.filter_map do |message|
15
+ CompletionFormat.new(message).format
20
16
  end
21
17
  end
22
18
 
23
19
  private
24
20
 
25
21
  ##
26
- # @param [String, URI] content
27
- # The content to format
28
- # @return [String, Hash]
29
- # The formatted content
30
- def format_content(content)
31
- case content
32
- when LLM::File
33
- if content.image?
34
- {content: "This message has an image associated with it", images: [content.to_b64]}
35
- else
36
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
37
- "is not an image, and therefore not supported by the " \
38
- "Ollama API"
39
- end
40
- when String
41
- {content:}
42
- when LLM::Message
43
- format_content(content.content)
44
- else
45
- raise LLM::Error::PromptError, "The given object (an instance of #{content.class}) " \
46
- "is not supported by the Ollama API"
47
- end
22
+ # @param [Array<LLM::Function>] tools
23
+ # The tools to format
24
+ # @return [Hash]
25
+ def format_tools(tools)
26
+ return {} unless tools
27
+ {tools: tools.map { _1.format(self) }}
48
28
  end
49
29
  end
50
30
  end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::Ollama::ResponseParser
4
+ ##
5
+ # @private
6
+ class CompletionParser
7
+ def initialize(body)
8
+ @body = OpenStruct.from_hash(body)
9
+ end
10
+
11
+ def format(response)
12
+ {
13
+ model:,
14
+ choices: [format_choices(response)],
15
+ prompt_tokens:,
16
+ completion_tokens:
17
+ }
18
+ end
19
+
20
+ private
21
+
22
+ def format_choices(response)
23
+ role, content, calls = message.to_h.values_at(:role, :content, :tool_calls)
24
+ extra = {response:, tool_calls: format_tool_calls(calls)}
25
+ LLM::Message.new(role, content, extra)
26
+ end
27
+
28
+ def format_tool_calls(tools)
29
+ return [] unless tools
30
+ tools.filter_map do |tool|
31
+ next unless tool["function"]
32
+ OpenStruct.new(tool["function"])
33
+ end
34
+ end
35
+
36
+ def body = @body
37
+ def model = body.model
38
+ def prompt_tokens = body.prompt_eval_count
39
+ def completion_tokens = body.eval_count
40
+ def message = body.message
41
+ end
42
+ end
@@ -4,29 +4,26 @@ class LLM::Ollama
4
4
  ##
5
5
  # @private
6
6
  module ResponseParser
7
+ require_relative "response_parser/completion_parser"
8
+
7
9
  ##
8
10
  # @param [Hash] body
9
11
  # The response body from the LLM provider
10
12
  # @return [Hash]
11
- def parse_embedding(body)
12
- {
13
- model: body["model"],
14
- embeddings: body["data"].map { _1["embedding"] },
15
- prompt_tokens: body.dig("usage", "prompt_tokens"),
16
- total_tokens: body.dig("usage", "total_tokens")
17
- }
13
+ def parse_completion(body)
14
+ CompletionParser.new(body).format(self)
18
15
  end
19
16
 
20
17
  ##
21
18
  # @param [Hash] body
22
19
  # The response body from the LLM provider
23
20
  # @return [Hash]
24
- def parse_completion(body)
21
+ def parse_embedding(body)
25
22
  {
26
23
  model: body["model"],
27
- choices: [LLM::Message.new(*body["message"].values_at("role", "content"), {response: self})],
28
- prompt_tokens: body.dig("prompt_eval_count"),
29
- completion_tokens: body.dig("eval_count")
24
+ embeddings: body["data"].map { _1["embedding"] },
25
+ prompt_tokens: body.dig("usage", "prompt_tokens"),
26
+ total_tokens: body.dig("usage", "total_tokens")
30
27
  }
31
28
  end
32
29
  end
@@ -60,14 +60,11 @@ module LLM
60
60
  # @raise [LLM::Error::PromptError]
61
61
  # When given an object a provider does not understand
62
62
  # @return (see LLM::Provider#complete)
63
- def complete(prompt, role = :user, model: default_model, schema: nil, **params)
64
- params = {model:, stream: false}
65
- .merge!(expand_schema(schema))
66
- .merge!(params)
67
- .compact
63
+ def complete(prompt, role = :user, model: default_model, schema: nil, tools: nil, **params)
64
+ params = [{model:, stream: false, format: schema}, format_tools(tools), params].inject({}, &:merge!).compact
68
65
  req = Net::HTTP::Post.new("/api/chat", headers)
69
66
  messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
70
- body = JSON.dump({messages: format(messages)}.merge!(params))
67
+ body = JSON.dump({messages: [format(messages)].flatten}.merge!(params))
71
68
  set_body_stream(req, StringIO.new(body))
72
69
  res = request(@http, req)
73
70
  Response::Completion.new(res).extend(response_parser)
@@ -104,11 +101,6 @@ module LLM
104
101
  }
105
102
  end
106
103
 
107
- def expand_schema(schema)
108
- return {} unless schema
109
- {format: schema}
110
- end
111
-
112
104
  def response_parser
113
105
  LLM::Ollama::ResponseParser
114
106
  end
@@ -7,7 +7,7 @@ class LLM::OpenAI
7
7
  # @example
8
8
  # llm = LLM.openai(ENV["KEY"])
9
9
  # res = llm.audio.create_speech(input: "A dog on a rocket to the moon")
10
- # File.binwrite("rocket.mp3", res.audio.string)
10
+ # IO.copy_stream res.audio, "rocket.mp3"
11
11
  class Audio
12
12
  ##
13
13
  # Returns a new Audio object
@@ -43,16 +43,16 @@ class LLM::OpenAI
43
43
  # Create an audio transcription
44
44
  # @example
45
45
  # llm = LLM.openai(ENV["KEY"])
46
- # res = llm.audio.create_transcription(file: LLM::File("/rocket.mp3"))
46
+ # res = llm.audio.create_transcription(file: "/audio/rocket.mp3")
47
47
  # res.text # => "A dog on a rocket to the moon"
48
48
  # @see https://platform.openai.com/docs/api-reference/audio/createTranscription OpenAI docs
49
- # @param [LLM::File] file The input audio
49
+ # @param [String, LLM::File] file The input audio
50
50
  # @param [String] model The model to use
51
51
  # @param [Hash] params Other parameters (see OpenAI docs)
52
52
  # @raise (see LLM::Provider#request)
53
53
  # @return [LLM::Response::AudioTranscription]
54
54
  def create_transcription(file:, model: "whisper-1", **params)
55
- multi = LLM::Multipart.new(params.merge!(file:, model:))
55
+ multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
56
56
  req = Net::HTTP::Post.new("/v1/audio/transcriptions", headers)
57
57
  req["content-type"] = multi.content_type
58
58
  set_body_stream(req, multi.body)
@@ -65,7 +65,7 @@ class LLM::OpenAI
65
65
  # @example
66
66
  # # Arabic => English
67
67
  # llm = LLM.openai(ENV["KEY"])
68
- # res = llm.audio.create_translation(file: LLM::File("/bismillah.mp3"))
68
+ # res = llm.audio.create_translation(file: "/audio/bismillah.mp3")
69
69
  # res.text # => "In the name of Allah, the Beneficent, the Merciful."
70
70
  # @see https://platform.openai.com/docs/api-reference/audio/createTranslation OpenAI docs
71
71
  # @param [LLM::File] file The input audio
@@ -74,7 +74,7 @@ class LLM::OpenAI
74
74
  # @raise (see LLM::Provider#request)
75
75
  # @return [LLM::Response::AudioTranslation]
76
76
  def create_translation(file:, model: "whisper-1", **params)
77
- multi = LLM::Multipart.new(params.merge!(file:, model:))
77
+ multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
78
78
  req = Net::HTTP::Post.new("/v1/audio/translations", headers)
79
79
  req["content-type"] = multi.content_type
80
80
  set_body_stream(req, multi.body)
@@ -14,7 +14,7 @@ class LLM::OpenAI
14
14
  #
15
15
  # llm = LLM.openai(ENV["KEY"])
16
16
  # bot = LLM::Chat.new(llm).lazy
17
- # file = llm.files.create file: LLM::File("/documents/freebsd.pdf")
17
+ # file = llm.files.create file: "/documents/freebsd.pdf"
18
18
  # bot.chat(file)
19
19
  # bot.chat("Describe the document")
20
20
  # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
@@ -24,7 +24,7 @@ class LLM::OpenAI
24
24
  #
25
25
  # llm = LLM.openai(ENV["KEY"])
26
26
  # bot = LLM::Chat.new(llm).lazy
27
- # file = llm.files.create file: LLM::File("/documents/openbsd.pdf")
27
+ # file = llm.files.create file: "/documents/openbsd.pdf"
28
28
  # bot.chat(["Describe the document I sent to you", file])
29
29
  # bot.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
30
30
  class Files
@@ -62,7 +62,7 @@ class LLM::OpenAI
62
62
  # Create a file
63
63
  # @example
64
64
  # llm = LLM.openai(ENV["KEY"])
65
- # res = llm.files.create file: LLM::File("/documents/haiku.txt"),
65
+ # res = llm.files.create file: "/documents/haiku.txt"
66
66
  # @see https://platform.openai.com/docs/api-reference/files/create OpenAI docs
67
67
  # @param [File] file The file
68
68
  # @param [String] purpose The purpose of the file (see OpenAI docs)