RubyGems - ruby_llm-agents - Versions diffs - 3.2.0 → 3.3.0 - Mend

ruby_llm-agents 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/app/views/ruby_llm/agents/executions/_audio_player.html.erb +57 -0
data/app/views/ruby_llm/agents/executions/show.html.erb +8 -0
data/lib/ruby_llm/agents/audio/speaker/active_storage_support.rb +87 -0
data/lib/ruby_llm/agents/audio/speaker.rb +10 -0
data/lib/ruby_llm/agents/audio/transcriber.rb +10 -0
data/lib/ruby_llm/agents/core/configuration.rb +5 -1
data/lib/ruby_llm/agents/core/version.rb +1 -1
data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +49 -0
data/lib/ruby_llm/agents/results/speech_result.rb +12 -7
metadata +3 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d8c4a83ecc9e39e7df7243b98a51d1c249a963f3a1f96551ebefae13becb50c5
-  data.tar.gz: d042bda1737b7593187896e879b3065cc855e990e1406410e99d1d853819f3a9
+  metadata.gz: 463487c17c50bf1496a30c9eea51dab3c334a17010853da97da5624a6cf564b5
+  data.tar.gz: 470a6666266d17dc8190f5118eec0c5f674fb51bae8b368d8713ea65d1882025
 SHA512:
-  metadata.gz: 78b6fa31a8a656c36e0bb51f6e7a405101e90f47afc3ca35f87c392878b6a2d986b2b275c6424826ffb6b5d4bfb059f4632e8976aa5b2a473eab540619bd18cb
-  data.tar.gz: 3d5890ea864aea3531e96571b6010c9fdcedc5a15e8966c7974138cde0ebaec89a771e33fd20e3fd2d5097a1ecfc398236d833f9852f046fd3b1f720eaf7fb6e
+  metadata.gz: b1e2d4688dfc294c3b94c95df084a248fa25fbcd7d99910f31f72b85138bf37a392a8b350462ee341d5d209674b844a9d2692a177db30845857a586fa77ce3bc
+  data.tar.gz: 50130237011f12c808a073d55b9083ce8449f8e0ddf8dd800c13134104f48233ea4f0fac3ddcbcc9a8b45b91814f67db5c57632dd6d54930c1ffd19fda825e96

data/README.md CHANGED Viewed

@@ -135,7 +135,7 @@ result.save("logo.png")
 | **Attachments** | Images, PDFs, and multimodal support | [Attachments](https://github.com/adham90/ruby_llm-agents/wiki/Attachments) |
 | **Embeddings** | Vector embeddings with batching, caching, and preprocessing | [Embeddings](https://github.com/adham90/ruby_llm-agents/wiki/Embeddings) |
 | **Image Operations** | Generation, analysis, editing, pipelines with cost tracking | [Images](https://github.com/adham90/ruby_llm-agents/wiki/Image-Generation) |
-| **Audio** | Text-to-speech (OpenAI, ElevenLabs) and speech-to-text with cost tracking | [Audio](https://github.com/adham90/ruby_llm-agents/wiki/Audio) |
+| **Audio** | Text-to-speech (OpenAI, ElevenLabs), speech-to-text, dashboard audio playback | [Audio](https://github.com/adham90/ruby_llm-agents/wiki/Audio) |
 | **Alerts** | Slack, webhook, and custom notifications | [Alerts](https://github.com/adham90/ruby_llm-agents/wiki/Alerts) |
 ## Quick Start

data/app/views/ruby_llm/agents/executions/_audio_player.html.erb ADDED Viewed

@@ -0,0 +1,57 @@
+<%
+  response = @execution.response || {}
+  audio_src = response["audio_url"] || response[:audio_url] ||
+              response["audio_data_uri"] || response[:audio_data_uri]
+  audio_format = response["format"] || response[:format] ||
+                 @execution.metadata&.dig("audio_format")
+  audio_duration = response["duration"] || response[:duration] ||
+                   @execution.metadata&.dig("audio_duration_seconds")
+  audio_file_size = response["file_size"] || response[:file_size] ||
+                    @execution.metadata&.dig("audio_file_size_bytes")
+  audio_voice = response["voice_id"] || response[:voice_id] ||
+                @execution.metadata&.dig("voice_id")
+  audio_provider = response["provider"] || response[:provider] ||
+                   @execution.metadata&.dig("audio_provider")
+  audio_characters = @execution.metadata&.dig("audio_characters")
+%>
+<div class="flex items-center gap-3 mt-6 mb-3">
+  <span class="text-[10px] font-medium text-gray-400 dark:text-gray-600 uppercase tracking-widest font-mono">audio</span>
+  <div class="flex-1 border-t border-gray-200 dark:border-gray-800"></div>
+</div>
+<% if audio_src.present? %>
+  <div class="mb-3">
+    <audio controls preload="metadata" class="w-full max-w-lg" style="height: 36px;">
+      <source src="<%= audio_src %>">
+      Your browser does not support the audio element.
+    </audio>
+  </div>
+<% end %>
+<div class="flex flex-wrap items-center gap-x-4 gap-y-1 font-mono text-xs text-gray-400 dark:text-gray-500">
+  <% if audio_duration.present? %>
+    <span><span class="text-gray-800 dark:text-gray-200"><%= audio_duration.is_a?(Numeric) ? "#{audio_duration.round(1)}s" : audio_duration %></span> duration</span>
+  <% end %>
+  <% if audio_format.present? %>
+    <span><span class="text-gray-800 dark:text-gray-200"><%= audio_format %></span> format</span>
+  <% end %>
+  <% if audio_file_size.present? %>
+    <span><span class="text-gray-800 dark:text-gray-200"><%= number_to_human_size(audio_file_size) %></span> size</span>
+  <% end %>
+  <% if audio_voice.present? %>
+    <span><span class="text-gray-800 dark:text-gray-200"><%= audio_voice %></span> voice</span>
+  <% end %>
+  <% if audio_provider.present? %>
+    <span><span class="text-gray-800 dark:text-gray-200"><%= audio_provider %></span> provider</span>
+  <% end %>
+  <% if audio_characters.present? %>
+    <span><span class="text-gray-800 dark:text-gray-200"><%= number_to_human_short(audio_characters) %></span> characters</span>
+  <% end %>
+</div>
+<% if audio_src.blank? %>
+  <p class="text-xs text-gray-400 dark:text-gray-600 font-mono mt-2 italic">
+    No audio data stored. Enable <code class="text-gray-500 dark:text-gray-400">persist_audio_data</code> in config to play back Speaker audio here.
+  </p>
+<% end %>

data/app/views/ruby_llm/agents/executions/show.html.erb CHANGED Viewed

@@ -57,6 +57,14 @@
   <% end %>
 </div>
+<!-- ── audio player ──────────────────── -->
+<% if @execution.agent_type.to_s.match?(/Speaker|Narrator|Transcriber/i) ||
+      @execution.metadata&.dig("audio_duration_seconds").present? ||
+      @execution.response&.dig("audio_data_uri").present? ||
+      @execution.response&.dig("audio_url").present? %>
+  <%= render "ruby_llm/agents/executions/audio_player" %>
+<% end %>
 <!-- ── tokens ──────────────────────── -->
 <%
   input_tokens = @execution.input_tokens || 0

data/lib/ruby_llm/agents/audio/speaker/active_storage_support.rb ADDED Viewed

@@ -0,0 +1,87 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Agents
+    class Speaker
+      # ActiveStorage integration for speakers
+      #
+      # Provides convenience methods for generating audio and directly
+      # attaching it to ActiveStorage attachments.
+      #
+      # @example Attaching to a model
+      #   class Article < ApplicationRecord
+      #     has_one_attached :narration
+      #   end
+      #
+      #   class ArticleNarrator < RubyLLM::Agents::Speaker
+      #     include RubyLLM::Agents::Speaker::ActiveStorageSupport
+      #
+      #     provider :openai
+      #     model 'tts-1-hd'
+      #     voice 'nova'
+      #   end
+      #
+      #   article = Article.find(1)
+      #   result = ArticleNarrator.speak_and_attach(
+      #     text: article.body,
+      #     record: article,
+      #     attachment_name: :narration
+      #   )
+      #
+      module ActiveStorageSupport
+        extend ActiveSupport::Concern
+        class_methods do
+          # Generate audio and attach it to a record
+          #
+          # @param text [String] Text to convert to speech
+          # @param record [ActiveRecord::Base] The record to attach to
+          # @param attachment_name [Symbol] Name of the attachment (e.g., :narration)
+          # @param options [Hash] Additional options for generation
+          # @return [SpeechResult] The speech result with audio_url set
+          def speak_and_attach(text:, record:, attachment_name:, **options)
+            result = call(text: text, **options)
+            return result unless result.success?
+            attach_audio_to_record(result, record, attachment_name, options)
+            result
+          end
+          private
+          def attach_audio_to_record(result, record, attachment_name, options)
+            attachment = record.public_send(attachment_name)
+            filename = options[:filename] || generate_audio_filename(result)
+            attachment.attach(
+              io: StringIO.new(result.audio),
+              filename: filename,
+              content_type: result.content_type
+            )
+            result.audio_key = attachment.blob.key if attachment.respond_to?(:blob) && attachment.blob
+            result.audio_url = blob_url(attachment) if attachment.respond_to?(:blob) && attachment.blob
+          end
+          def blob_url(attachment)
+            if attachment.blob.respond_to?(:url)
+              attachment.blob.url
+            elsif attachment.blob.respond_to?(:service_url)
+              attachment.blob.service_url
+            end
+          rescue => _e
+            nil
+          end
+          def generate_audio_filename(result)
+            timestamp = Time.current.to_i
+            ext = result.format || :mp3
+            "speech_#{timestamp}.#{ext}"
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/agents/audio/speaker.rb CHANGED Viewed

@@ -336,6 +336,14 @@ module RubyLLM
         context.output_tokens = 0
         context.total_cost = calculate_cost(result)
+        # Store audio-specific metadata for execution tracking
+        context[:provider] = result[:provider].to_s
+        context[:voice_id] = (resolved_voice_id || resolved_voice).to_s
+        context[:characters] = result[:characters]
+        context[:output_format] = result[:format].to_s
+        context[:file_size] = result[:audio]&.bytesize
+        context[:audio_duration_seconds] = result[:duration] if result[:duration]
         # Build final result
         context.output = build_result(
           result,
@@ -559,3 +567,5 @@ module RubyLLM
     end
   end
 end
+require_relative "speaker/active_storage_support"

data/lib/ruby_llm/agents/audio/transcriber.rb CHANGED Viewed

@@ -318,6 +318,16 @@ module RubyLLM
         context.output_tokens = 0
         context.total_cost = calculate_cost(raw_result)
+        # Store transcription-specific metadata for execution tracking
+        context[:language] = resolved_language if resolved_language
+        context[:detected_language] = raw_result[:language] if raw_result[:language]
+        context[:audio_duration_seconds] = raw_result[:duration] if raw_result[:duration]
+        context[:audio_minutes] = (raw_result[:duration] / 60.0).round(4) if raw_result[:duration]
+        context[:output_format] = self.class.output_format.to_s
+        context[:timestamp_granularity] = self.class.include_timestamps.to_s
+        context[:segment_count] = raw_result[:segments]&.size if raw_result[:segments]
+        context[:word_count] = raw_result[:text]&.split(/\s+/)&.size if raw_result[:text]
         # Build final result
         context.output = build_result(
           raw_result,

data/lib/ruby_llm/agents/core/configuration.rb CHANGED Viewed

@@ -452,7 +452,8 @@ module RubyLLM
         :root_directory,
         :root_namespace,
         :tool_result_max_length,
-        :redaction
+        :redaction,
+        :persist_audio_data
       # Attributes with validation (readers only, custom setters below)
       attr_reader :default_temperature,
@@ -734,6 +735,9 @@ module RubyLLM
         # Redaction defaults (disabled by default)
         @redaction = nil
+        # Audio data persistence (disabled by default — base64 audio can be large)
+        @persist_audio_data = false
       end
       # Returns the configured cache store, falling back to Rails.cache

data/lib/ruby_llm/agents/core/version.rb CHANGED Viewed

@@ -4,6 +4,6 @@ module RubyLLM
   module Agents
     # Current version of the RubyLLM::Agents gem
     # @return [String] Semantic version string
-    VERSION = "3.2.0"
+    VERSION = "3.3.0"
   end
 end

data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb CHANGED Viewed

@@ -280,6 +280,9 @@ module RubyLLM
               detail_data[:response] = serialize_response(context)
             end
+            # Persist audio data for Speaker executions
+            maybe_persist_audio_response(context, detail_data)
             has_data = detail_data.values.any? { |v| v.present? && v != {} && v != [] }
             return unless has_data
@@ -376,6 +379,10 @@ module RubyLLM
             if global_config.persist_responses && context.output.respond_to?(:content)
               detail_data[:response] = serialize_response(context)
             end
+            # Persist audio data for Speaker executions
+            maybe_persist_audio_response(context, detail_data)
             data[:_detail_data] = detail_data
             data
@@ -463,6 +470,48 @@ module RubyLLM
             nil
           end
+          # Persists audio response data for Speaker executions
+          #
+          # When persist_audio_data is enabled and the output is a SpeechResult with
+          # audio binary data, stores a base64 data URI in the response column.
+          # Always stores audio_url if present (lightweight, no binary).
+          #
+          # @param context [Context] The execution context
+          # @param detail_data [Hash] The detail data hash to modify
+          def maybe_persist_audio_response(context, detail_data)
+            return unless context.output.is_a?(RubyLLM::Agents::SpeechResult)
+            # Always persist audio_url if present (it's just a string, no binary)
+            if context.output.audio_url.present?
+              detail_data[:response] ||= {}
+              detail_data[:response][:audio_url] = context.output.audio_url
+            end
+            # Persist full audio data URI only when opted in
+            return unless global_config.respond_to?(:persist_audio_data) && global_config.persist_audio_data
+            return unless context.output.audio.present?
+            detail_data[:response] = serialize_audio_response(context.output)
+          rescue => e
+            error("Failed to persist audio response: #{e.message}")
+          end
+          # Serializes a SpeechResult into a hash for the response column
+          #
+          # @param result [SpeechResult] The speech result to serialize
+          # @return [Hash] Serialized audio response data
+          def serialize_audio_response(result)
+            {
+              audio_data_uri: result.to_data_uri,
+              audio_url: result.audio_url,
+              format: result.format.to_s,
+              duration: result.duration,
+              file_size: result.file_size,
+              voice_id: result.voice_id,
+              provider: result.provider.to_s
+            }.compact
+          end
           # Queues async logging via background job
           #
           # @param data [Hash] Execution data

data/lib/ruby_llm/agents/results/speech_result.rb CHANGED Viewed

@@ -29,17 +29,17 @@ module RubyLLM
       #   @return [String, nil] Binary audio data
       attr_reader :audio
-      # @!attribute [r] audio_url
+      # @!attribute [rw] audio_url
       #   @return [String, nil] URL if audio was stored remotely
-      attr_reader :audio_url
+      attr_accessor :audio_url
-      # @!attribute [r] audio_key
+      # @!attribute [rw] audio_key
       #   @return [String, nil] Storage key if stored
-      attr_reader :audio_key
+      attr_accessor :audio_key
-      # @!attribute [r] audio_path
+      # @!attribute [rw] audio_path
       #   @return [String, nil] Local file path if saved
-      attr_reader :audio_path
+      attr_accessor :audio_path
       # @!endgroup
@@ -308,7 +308,12 @@ module RubyLLM
         }
       end
-      private
+      # Returns MIME type for the audio format
+      #
+      # @return [String] MIME type
+      def content_type
+        mime_type_for_format
+      end
       # Returns MIME type for the audio format
       #

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ruby_llm-agents
 version: !ruby/object:Gem::Version
-  version: 3.2.0
+  version: 3.3.0
 platform: ruby
 authors:
 - adham90
@@ -111,6 +111,7 @@ files:
 - app/views/ruby_llm/agents/dashboard/_action_center.html.erb
 - app/views/ruby_llm/agents/dashboard/_tenant_budget.html.erb
 - app/views/ruby_llm/agents/dashboard/index.html.erb
+- app/views/ruby_llm/agents/executions/_audio_player.html.erb
 - app/views/ruby_llm/agents/executions/_execution.html.erb
 - app/views/ruby_llm/agents/executions/_filters.html.erb
 - app/views/ruby_llm/agents/executions/_list.html.erb
@@ -209,6 +210,7 @@ files:
 - lib/ruby_llm-agents.rb
 - lib/ruby_llm/agents.rb
 - lib/ruby_llm/agents/audio/speaker.rb
+- lib/ruby_llm/agents/audio/speaker/active_storage_support.rb
 - lib/ruby_llm/agents/audio/speech_client.rb
 - lib/ruby_llm/agents/audio/speech_pricing.rb
 - lib/ruby_llm/agents/audio/transcriber.rb