smart_prompt 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 683f259828b34a687bb598abff6fe3ef547e918954b7422eecf2d357ba237495
4
- data.tar.gz: 5410ee6c08d46643ac1c6c9ccbccc38f5729973a65063454c8b75c7787a587a6
3
+ metadata.gz: a1b5288acfef3c366b16a0e08dc0b4f43a7f8613f73879f92420a3cf60ce9332
4
+ data.tar.gz: c2fd82bf35e96c6784492dfe2dd9ee30dbf02e56b5cce41ee8d9895f4b050b13
5
5
  SHA512:
6
- metadata.gz: 25755db9db82c9af27d8007753da625767831f835bd5767726db738c2863dfea9a3429d4a600e7dbd5005729cfbd3b679b75e4657eab13dd7b6e446379984ea0
7
- data.tar.gz: 4fb6b5f037c504b7a0ae440066aa4608fafa5a97c516ac220ffcd0817ffd03e880d0d7473e0e6537b50c6e771ca59f3b08aa9cbb3b24fabb41665a06ebe69ca2
6
+ metadata.gz: ccd75b4af683bb4585ca46a24d60833fac4344e214f5fef6730e9b55e86e80a35951d39e6cbff8ad1f364623e62aade8bce065ade1cd8c6ddea7e1b1faea2126
7
+ data.tar.gz: 68cba98948160fc872d2b5661661fe16ae76f5b43025fa96ea0c546563e01be5a0930d76fd13ec327017e07b23c7b739953794a3b385185c448888862374190c
@@ -1,6 +1,7 @@
1
1
  require "yaml"
2
2
  require "retriable"
3
3
  require "numo/narray"
4
+ require "base64"
4
5
 
5
6
  module SmartPrompt
6
7
  class Conversation
@@ -234,8 +235,8 @@ module SmartPrompt
234
235
 
235
236
  def multimodal_content(text)
236
237
  parts = @pending_content_parts
237
- images_and_videos = parts.select { |part| ["image", "video"].include?(part[:type] || part["type"]) }
238
- audio_parts = parts.select { |part| (part[:type] || part["type"]) == "audio" }
238
+ images_and_videos = parts.select { |part| ["image_url", "image", "video_url", "video"].include?(part[:type] || part["type"]) }
239
+ audio_parts = parts.select { |part| ["input_audio", "audio"].include?(part[:type] || part["type"]) }
239
240
  other_parts = parts - images_and_videos - audio_parts
240
241
  normalize_content_parts(images_and_videos + other_parts + [{ type: "text", text: text.to_s }] + audio_parts)
241
242
  end
@@ -249,14 +250,26 @@ module SmartPrompt
249
250
  end
250
251
 
251
252
  def media_part(type, source, **metadata)
252
- part = { type: type }
253
253
  case type
254
254
  when "image"
255
- part[:url] = source
255
+ mime_type = detect_image_mime(source)
256
+ data = File.binread(source)
257
+ base64_data = Base64.strict_encode64(data)
258
+ url = "data:#{mime_type};base64,#{base64_data}"
259
+ part = { type: "image_url", image_url: { url: url } }
256
260
  when "audio"
257
- part[:audio] = source
261
+ format = detect_audio_format(source)
262
+ data = File.binread(source)
263
+ base64_data = Base64.strict_encode64(data)
264
+ part = { type: "input_audio", input_audio: { data: base64_data, format: format } }
258
265
  when "video"
259
- part[:video] = source
266
+ mime_type = detect_video_mime(source)
267
+ data = File.binread(source)
268
+ base64_data = Base64.strict_encode64(data)
269
+ url = "data:#{mime_type};base64,#{base64_data}"
270
+ part = { type: "video_url", video_url: { url: url } }
271
+ else
272
+ part = { type: type }
260
273
  end
261
274
  metadata.each do |key, value|
262
275
  part[key] = value unless value.nil?
@@ -264,6 +277,35 @@ module SmartPrompt
264
277
  part
265
278
  end
266
279
 
280
+ def detect_image_mime(path)
281
+ ext = File.extname(path).downcase
282
+ case ext
283
+ when ".png" then "image/png"
284
+ when ".jpg", ".jpeg" then "image/jpeg"
285
+ when ".gif" then "image/gif"
286
+ when ".webp" then "image/webp"
287
+ when ".bmp" then "image/bmp"
288
+ when ".svg" then "image/svg+xml"
289
+ else "application/octet-stream"
290
+ end
291
+ end
292
+
293
+ def detect_audio_format(path)
294
+ ext = File.extname(path).downcase.delete_prefix(".")
295
+ %w[wav mp3 ogg flac aac m4a].include?(ext) ? ext : "wav"
296
+ end
297
+
298
+ def detect_video_mime(path)
299
+ ext = File.extname(path).downcase
300
+ case ext
301
+ when ".mp4" then "video/mp4"
302
+ when ".webm" then "video/webm"
303
+ when ".mov" then "video/quicktime"
304
+ when ".avi" then "video/x-msvideo"
305
+ else "application/octet-stream"
306
+ end
307
+ end
308
+
267
309
  def thinking_system_message(message)
268
310
  message = message.to_s.sub(/\A<\|think\|>\n?/, "")
269
311
  return message if @thinking_enabled == false
@@ -1,3 +1,3 @@
1
1
  module SmartPrompt
2
- VERSION = "0.4.3"
2
+ VERSION = "0.4.4"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smart_prompt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - zhuang biaowei
@@ -93,6 +93,20 @@ dependencies:
93
93
  - - "~>"
94
94
  - !ruby/object:Gem::Version
95
95
  version: 0.9.2.1
96
+ - !ruby/object:Gem::Dependency
97
+ name: base64
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: 0.3.0
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: 0.3.0
96
110
  description: SmartPrompt provides a flexible DSL for managing prompts, interacting
97
111
  with multiple LLMs, and creating composable task workers.
98
112
  email: