RubyGems - ruby-gemini-api - Versions diffs - 1.0.0 → 1.2.0 - Mend

ruby-gemini-api 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +43 -0
data/README.md +455 -0
data/lib/gemini/client.rb +68 -3
data/lib/gemini/embeddings.rb +108 -17
data/lib/gemini/live/configuration.rb +65 -0
data/lib/gemini/live/connection.rb +83 -0
data/lib/gemini/live/message_builder.rb +217 -0
data/lib/gemini/live/session.rb +223 -0
data/lib/gemini/live.rb +102 -0
data/lib/gemini/response.rb +141 -4
data/lib/gemini/tokens.rb +77 -0
data/lib/gemini/tts.rb +83 -0
data/lib/gemini/version.rb +1 -1
data/lib/gemini.rb +3 -0
metadata +23 -2

data/lib/gemini/embeddings.rb CHANGED Viewed

@@ -1,27 +1,118 @@
 module Gemini
   class Embeddings
+    DEFAULT_MODEL = "gemini-embedding-001".freeze
+    VALID_TASK_TYPES = %w[
+      RETRIEVAL_QUERY
+      RETRIEVAL_DOCUMENT
+      SEMANTIC_SIMILARITY
+      CLASSIFICATION
+      CLUSTERING
+      QUESTION_ANSWERING
+      FACT_VERIFICATION
+      CODE_RETRIEVAL_QUERY
+    ].freeze
     def initialize(client:)
       @client = client
     end
-    def create(input:, model: "text-embedding-model", **parameters)
-      content = case input
-                when String
-                  { parts: [{ text: input }] }
-                when Array
-                  { parts: input.map { |text| { text: text.to_s } } }
-                else
-                  { parts: [{ text: input.to_s }] }
-                end
-      payload = {
-        content: content
-      }.merge(parameters)
-      @client.json_post(
-        path: "models/#{model}:embedContent",
+    # Generate an embedding for a single content, or batch when input is an Array
+    def create(input:, model: DEFAULT_MODEL, task_type: nil, title: nil,
+               output_dimensionality: nil, **parameters)
+      if input.is_a?(Array)
+        return batch_create(
+          inputs: input,
+          model: model,
+          task_type: task_type,
+          title: title,
+          output_dimensionality: output_dimensionality,
+          **parameters
+        )
+      end
+      payload = build_embed_payload(
+        input: input,
+        task_type: task_type,
+        title: title,
+        output_dimensionality: output_dimensionality
+      ).merge(parameters)
+      response = @client.json_post(
+        path: "models/#{normalize_model(model)}:embedContent",
         parameters: payload
       )
+      Gemini::Response.new(response)
+    end
+    # Generate embeddings for multiple inputs in a single batch request
+    def batch_create(inputs:, model: DEFAULT_MODEL, task_type: nil, title: nil,
+                     output_dimensionality: nil, **parameters)
+      requests = inputs.map do |input|
+        req = build_embed_payload(
+          input: input,
+          task_type: task_type,
+          title: title,
+          output_dimensionality: output_dimensionality
+        )
+        req[:model] = "models/#{normalize_model(model)}"
+        req
+      end
+      payload = { requests: requests }.merge(parameters)
+      response = @client.json_post(
+        path: "models/#{normalize_model(model)}:batchEmbedContents",
+        parameters: payload
+      )
+      Gemini::Response.new(response)
+    end
+    private
+    def build_embed_payload(input:, task_type:, title:, output_dimensionality:)
+      payload = { content: format_content(input) }
+      if task_type
+        validate_task_type!(task_type)
+        payload[:taskType] = task_type.to_s.upcase
+      end
+      payload[:title] = title if title
+      payload[:outputDimensionality] = output_dimensionality if output_dimensionality
+      payload
+    end
+    def format_content(input)
+      case input
+      when String
+        { parts: [{ text: input }] }
+      when Hash
+        if input.key?(:parts) || input.key?("parts")
+          input
+        elsif input.key?(:text) || input.key?("text") ||
+              input.key?(:inline_data) || input.key?("inline_data") ||
+              input.key?(:file_data) || input.key?("file_data")
+          { parts: [input] }
+        else
+          input
+        end
+      else
+        { parts: [{ text: input.to_s }] }
+      end
+    end
+    def normalize_model(model)
+      model_str = model.to_s
+      model_str.start_with?("models/") ? model_str.delete_prefix("models/") : model_str
+    end
+    def validate_task_type!(task_type)
+      task_type_str = task_type.to_s.upcase
+      unless VALID_TASK_TYPES.include?(task_type_str)
+        raise ArgumentError, "task_type must be one of: #{VALID_TASK_TYPES.join(', ')}"
+      end
     end
   end
-end
+end

data/lib/gemini/live/configuration.rb ADDED Viewed

@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+module Gemini
+  class Live
+    # Configuration class for Live API sessions
+    class Configuration
+      attr_accessor :model, :response_modality, :voice_name,
+                    :system_instruction, :tools,
+                    :context_window_compression, :session_resumption,
+                    :automatic_activity_detection,
+                    :media_resolution, :output_audio_transcription
+      VALID_MODALITIES = %w[TEXT AUDIO].freeze
+      VALID_VOICES = %w[Puck Charon Kore Fenrir Aoede Leda Orus Zephyr].freeze
+      # NOTE: gemini-2.5-flash-live-preview is listed in the public Live API
+      # tools documentation as the recommended model, but is not currently
+      # deployed (returns "model not found" on bidiGenerateContent). The
+      # native-audio preview model is the only Live model on which function
+      # calling currently works in practice (with AUDIO modality).
+      DEFAULT_MODEL = "gemini-2.5-flash-native-audio-preview-12-2025"
+      def initialize(
+        model: DEFAULT_MODEL,
+        response_modality: "TEXT",
+        voice_name: nil,
+        system_instruction: nil,
+        tools: nil,
+        context_window_compression: nil,
+        session_resumption: nil,
+        automatic_activity_detection: true,
+        media_resolution: nil,
+        output_audio_transcription: false
+      )
+        @model = model
+        @response_modality = validate_modality(response_modality)
+        @voice_name = validate_voice(voice_name)
+        @system_instruction = system_instruction
+        @tools = tools
+        @context_window_compression = context_window_compression
+        @session_resumption = session_resumption
+        @automatic_activity_detection = automatic_activity_detection
+        @media_resolution = media_resolution
+        @output_audio_transcription = output_audio_transcription
+      end
+      private
+      def validate_modality(modality)
+        modality = modality.to_s.upcase
+        unless VALID_MODALITIES.include?(modality)
+          raise ArgumentError, "Invalid modality: #{modality}. Must be one of: #{VALID_MODALITIES.join(', ')}"
+        end
+        modality
+      end
+      def validate_voice(voice)
+        return nil if voice.nil?
+        unless VALID_VOICES.include?(voice)
+          raise ArgumentError, "Invalid voice: #{voice}. Must be one of: #{VALID_VOICES.join(', ')}"
+        end
+        voice
+      end
+    end
+  end
+end

data/lib/gemini/live/connection.rb ADDED Viewed

@@ -0,0 +1,83 @@
+# frozen_string_literal: true
+require "websocket-client-simple"
+require "json"
+module Gemini
+  class Live
+    # WebSocket connection manager for Live API
+    class Connection
+      WEBSOCKET_BASE_URL = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"
+      attr_reader :connected
+      def initialize(api_key:, on_message:, on_open:, on_error:, on_close:)
+        @api_key = api_key
+        @on_message = on_message
+        @on_open = on_open
+        @on_error = on_error
+        @on_close = on_close
+        @ws = nil
+        @connected = false
+        @mutex = Mutex.new
+      end
+      def connect
+        url = "#{WEBSOCKET_BASE_URL}?key=#{@api_key}"
+        # Store callbacks in local variables for closure
+        on_message_callback = @on_message
+        on_open_callback = @on_open
+        on_error_callback = @on_error
+        on_close_callback = @on_close
+        connection = self
+        @ws = WebSocket::Client::Simple.connect(url) do |ws|
+          ws.on :open do
+            connection.instance_variable_set(:@connected, true)
+            on_open_callback.call if on_open_callback
+          end
+          ws.on :message do |msg|
+            on_message_callback.call(msg.data) if on_message_callback
+          end
+          ws.on :error do |e|
+            on_error_callback.call(e) if on_error_callback
+          end
+          ws.on :close do |e|
+            connection.instance_variable_set(:@connected, false)
+            code = e.respond_to?(:code) ? e.code : nil
+            reason = e.respond_to?(:reason) ? e.reason : nil
+            on_close_callback.call(code, reason) if on_close_callback
+          end
+        end
+        self
+      end
+      def send(data)
+        return false unless @ws && @connected
+        @mutex.synchronize do
+          json_data = data.is_a?(String) ? data : data.to_json
+          @ws.send(json_data)
+        end
+        true
+      rescue StandardError => e
+        @on_error&.call(e)
+        false
+      end
+      def close
+        @ws&.close
+        @connected = false
+      end
+      def connected?
+        @connected && @ws && !@ws.closed?
+      end
+    end
+  end
+end

data/lib/gemini/live/message_builder.rb ADDED Viewed

@@ -0,0 +1,217 @@
+# frozen_string_literal: true
+module Gemini
+  class Live
+    # Helper class to build Live API messages
+    class MessageBuilder
+      VALID_SCHEDULING = %w[INTERRUPT WHEN_IDLE SILENT].freeze
+      class << self
+        # Build setup message from configuration
+        def setup(config)
+          message = {
+            setup: {
+              model: normalize_model_name(config.model)
+            }
+          }
+          generation_config = build_generation_config(config)
+          message[:setup][:generationConfig] = generation_config unless generation_config.empty?
+          # System instruction
+          if config.system_instruction
+            message[:setup][:systemInstruction] = {
+              parts: [{ text: config.system_instruction }]
+            }
+          end
+          # Tools configuration
+          message[:setup][:tools] = config.tools if config.tools
+          # Context window compression
+          if config.context_window_compression
+            message[:setup][:contextWindowCompression] = config.context_window_compression
+          end
+          # Session resumption
+          if config.session_resumption
+            message[:setup][:sessionResumption] = config.session_resumption
+          end
+          # VAD (Voice Activity Detection) settings
+          unless config.automatic_activity_detection
+            message[:setup][:realtimeInputConfig] = {
+              automaticActivityDetection: {
+                disabled: true
+              }
+            }
+          end
+          message
+        end
+        # Build client content message (text)
+        def client_content(text:, turn_complete: true, role: "user")
+          {
+            clientContent: {
+              turns: [
+                {
+                  role: role,
+                  parts: [{ text: text }]
+                }
+              ],
+              turnComplete: turn_complete
+            }
+          }
+        end
+        # Build client content with multiple parts
+        def client_content_parts(parts:, turn_complete: true, role: "user")
+          {
+            clientContent: {
+              turns: [
+                {
+                  role: role,
+                  parts: parts
+                }
+              ],
+              turnComplete: turn_complete
+            }
+          }
+        end
+        # Build realtime input message (audio/video) using the legacy
+        # mediaChunks field. NOTE: mediaChunks is deprecated by the API in
+        # favor of the dedicated audio/video fields built by realtime_audio
+        # and realtime_video. Kept for backward compatibility with older
+        # Live models that still accept it.
+        def realtime_input(audio_data: nil, video_data: nil, mime_type:)
+          data = audio_data || video_data
+          {
+            realtimeInput: {
+              mediaChunks: [
+                {
+                  mimeType: mime_type,
+                  data: data
+                }
+              ]
+            }
+          }
+        end
+        # Build a realtime text input message. This is the universal
+        # text-input form for the Live API and is required by newer Live
+        # models such as gemini-3.1-flash-live-preview, which reject the
+        # turn-based clientContent payload.
+        def realtime_text(text)
+          { realtimeInput: { text: text.to_s } }
+        end
+        # Build activity start message (for manual VAD)
+        def activity_start
+          {
+            realtimeInput: {
+              activityStart: {}
+            }
+          }
+        end
+        # Build activity end message (for manual VAD)
+        def activity_end
+          {
+            realtimeInput: {
+              activityEnd: {}
+            }
+          }
+        end
+        # Build tool response message.
+        #
+        # Each function response hash supports:
+        #   :id       - The function call id from the server
+        #   :name     - The function name
+        #   :response - The function result (Hash or scalar). When using
+        #               NON_BLOCKING (async) function calls, include
+        #               `scheduling: "INTERRUPT" | "WHEN_IDLE" | "SILENT"`
+        #               inside the response hash.
+        #   :scheduling - (optional) Top-level shortcut. When provided,
+        #                 it is merged into the response hash as
+        #                 `response[:scheduling]`. Accepts Symbol or String.
+        #
+        # Raises ArgumentError if scheduling is not one of the valid values.
+        def tool_response(function_responses)
+          {
+            toolResponse: {
+              functionResponses: function_responses.map { |resp| build_function_response(resp) }
+            }
+          }
+        end
+        private
+        def build_function_response(resp)
+          response_payload =
+            case resp[:response]
+            when Hash then resp[:response].dup
+            when nil  then {}
+            else { result: resp[:response] }
+            end
+          if (top_level_scheduling = resp[:scheduling])
+            response_payload[:scheduling] = normalize_scheduling(top_level_scheduling)
+          elsif (sched = response_payload[:scheduling] || response_payload["scheduling"])
+            normalized = normalize_scheduling(sched)
+            response_payload.delete("scheduling")
+            response_payload[:scheduling] = normalized
+          end
+          { id: resp[:id], name: resp[:name], response: response_payload }
+        end
+        def normalize_scheduling(value)
+          value_str = value.to_s.upcase
+          unless VALID_SCHEDULING.include?(value_str)
+            raise ArgumentError,
+                  "scheduling must be one of: #{VALID_SCHEDULING.join(', ')} (got #{value.inspect})"
+          end
+          value_str
+        end
+        def normalize_model_name(model)
+          model.start_with?("models/") ? model : "models/#{model}"
+        end
+        def build_generation_config(config)
+          generation_config = {}
+          # Response modality
+          generation_config[:responseModalities] = [config.response_modality]
+          # Speech/Voice configuration for AUDIO modality
+          if config.response_modality == "AUDIO" && config.voice_name
+            generation_config[:speechConfig] = {
+              voiceConfig: {
+                prebuiltVoiceConfig: {
+                  voiceName: config.voice_name
+                }
+              }
+            }
+          end
+          # Media resolution
+          if config.media_resolution
+            generation_config[:mediaResolution] = config.media_resolution
+          end
+          # Output audio transcription
+          if config.output_audio_transcription
+            generation_config[:outputAudioTranscription] = {}
+          end
+          generation_config
+        end
+      end
+    end
+  end
+end