RubyGems - ruby-gemini-api - Versions diffs - 0.1.7 → 1.1.0 - Mend

ruby-gemini-api 0.1.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +59 -21
data/README.md +397 -0
data/lib/gemini/client.rb +85 -7
data/lib/gemini/embeddings.rb +108 -17
data/lib/gemini/function_calling_helper.rb +45 -0
data/lib/gemini/live/configuration.rb +65 -0
data/lib/gemini/live/connection.rb +83 -0
data/lib/gemini/live/message_builder.rb +217 -0
data/lib/gemini/live/session.rb +223 -0
data/lib/gemini/live.rb +102 -0
data/lib/gemini/response.rb +89 -4
data/lib/gemini/version.rb +1 -1
data/lib/gemini.rb +2 -0
metadata +23 -6

data/lib/gemini/client.rb CHANGED Viewed

@@ -1,9 +1,10 @@
 module Gemini
   class Client
     include Gemini::HTTP
     SENSITIVE_ATTRIBUTES = %i[@api_key @extra_headers].freeze
     CONFIG_KEYS = %i[api_key uri_base extra_headers log_errors request_timeout].freeze
+    VALID_THINKING_LEVELS = %w[minimal low medium high].freeze
     attr_reader(*CONFIG_KEYS, :faraday_middleware)
     attr_writer :api_key
@@ -69,6 +70,16 @@ module Gemini
       @cached_content ||= Gemini::CachedContent.new(client: self)
     end
+    # Live APIアクセサ
+    def live
+      @live ||= Gemini::Live.new(client: self)
+    end
+    # Embeddings APIアクセサ
+    def embeddings_api
+      @embeddings_api ||= Gemini::Embeddings.new(client: self)
+    end
     def reset_headers
       @extra_headers = {}
     end
@@ -83,7 +94,18 @@ module Gemini
     # Extended to support streaming callbacks
     def chat(parameters: {}, &stream_callback)
       model = parameters.delete(:model) || "gemini-2.5-flash"
+      # thinking_budget / thinking_level をパラメータから抽出
+      thinking_budget = parameters.delete(:thinking_budget)
+      thinking_level = parameters.delete(:thinking_level)
+      # Thinking設定
+      thinking_config = build_thinking_config(thinking_budget, thinking_level)
+      if thinking_config
+        parameters[:generationConfig] ||= {}
+        parameters[:generationConfig][:thinkingConfig] = thinking_config
+      end
       # If streaming callback is provided
       if block_given?
         path = "models/#{model}:streamGenerateContent"
@@ -100,10 +122,25 @@ module Gemini
       end
     end
-    # Method corresponding to OpenAI's embeddings
+    # Generate embeddings for the given input.
+    # input can be a String (single embed) or Array of Strings (batch embed).
+    # Supports task_type, title (RETRIEVAL_DOCUMENT only), and output_dimensionality.
+    def embed_content(input, model: Gemini::Embeddings::DEFAULT_MODEL, task_type: nil,
+                      title: nil, output_dimensionality: nil, **parameters)
+      embeddings_api.create(
+        input: input,
+        model: model,
+        task_type: task_type,
+        title: title,
+        output_dimensionality: output_dimensionality,
+        **parameters
+      )
+    end
+    # Method corresponding to OpenAI's embeddings (kept for compatibility)
     def embeddings(parameters: {})
-      model = parameters.delete(:model) || "text-embedding-model"
-      path = "models/#{model}:embedContent"
+      model = parameters.delete(:model) || Gemini::Embeddings::DEFAULT_MODEL
+      path = "models/#{model.to_s.delete_prefix("models/")}:embedContent"
       response = json_post(path: path, parameters: parameters)
       Gemini::Response.new(response)
     end
@@ -121,10 +158,12 @@ module Gemini
     # Helper methods for convenience
-        # Method with usage similar to OpenAI's chat
+    # Method with usage similar to OpenAI's chat
     def generate_content(prompt, model: "gemini-2.5-flash", system_instruction: nil,
                         response_mime_type: nil, response_schema: nil, temperature: 0.5, tools: nil,
-                        url_context: false, google_search: false, **parameters, &stream_callback)
+                        url_context: false, google_search: false,
+                        thinking_budget: nil, thinking_level: nil,
+                        **parameters, &stream_callback)
       content = format_content(prompt)
       params = {
         contents: [content],
@@ -144,6 +183,12 @@ module Gemini
         params[:generation_config]["response_schema"] = response_schema
       end
+      # Thinking設定を追加
+      thinking_config = build_thinking_config(thinking_budget, thinking_level)
+      if thinking_config
+        params[:generation_config][:thinkingConfig] = thinking_config
+      end
       # Handle tool shortcuts
       tools = build_tools_array(tools, url_context: url_context, google_search: google_search)
       params[:tools] = tools if tools && !tools.empty?
@@ -416,6 +461,39 @@ module Gemini
     private
+    # Build thinking config from budget and level options
+    def build_thinking_config(budget, level)
+      return nil unless budget || level
+      config = {}
+      if budget
+        validate_thinking_budget!(budget)
+        config[:thinkingBudget] = budget
+      end
+      if level
+        level_str = level.to_s
+        validate_thinking_level!(level_str)
+        config[:thinkingLevel] = level_str
+      end
+      config
+    end
+    def validate_thinking_budget!(budget)
+      return if budget == -1 || budget == 0
+      unless budget.is_a?(Integer) && budget > 0 && budget <= 32768
+        raise ArgumentError, "thinking_budget must be -1, 0, or 1-32768"
+      end
+    end
+    def validate_thinking_level!(level)
+      unless VALID_THINKING_LEVELS.include?(level)
+        raise ArgumentError, "thinking_level must be one of: #{VALID_THINKING_LEVELS.join(', ')}"
+      end
+    end
     # Build tools array from explicit tools parameter and shortcuts
     def build_tools_array(tools, url_context: false, google_search: false)
       result_tools = []

data/lib/gemini/embeddings.rb CHANGED Viewed

@@ -1,27 +1,118 @@
 module Gemini
   class Embeddings
+    DEFAULT_MODEL = "gemini-embedding-001".freeze
+    VALID_TASK_TYPES = %w[
+      RETRIEVAL_QUERY
+      RETRIEVAL_DOCUMENT
+      SEMANTIC_SIMILARITY
+      CLASSIFICATION
+      CLUSTERING
+      QUESTION_ANSWERING
+      FACT_VERIFICATION
+      CODE_RETRIEVAL_QUERY
+    ].freeze
     def initialize(client:)
       @client = client
     end
-    def create(input:, model: "text-embedding-model", **parameters)
-      content = case input
-                when String
-                  { parts: [{ text: input }] }
-                when Array
-                  { parts: input.map { |text| { text: text.to_s } } }
-                else
-                  { parts: [{ text: input.to_s }] }
-                end
-      payload = {
-        content: content
-      }.merge(parameters)
-      @client.json_post(
-        path: "models/#{model}:embedContent",
+    # Generate an embedding for a single content, or batch when input is an Array
+    def create(input:, model: DEFAULT_MODEL, task_type: nil, title: nil,
+               output_dimensionality: nil, **parameters)
+      if input.is_a?(Array)
+        return batch_create(
+          inputs: input,
+          model: model,
+          task_type: task_type,
+          title: title,
+          output_dimensionality: output_dimensionality,
+          **parameters
+        )
+      end
+      payload = build_embed_payload(
+        input: input,
+        task_type: task_type,
+        title: title,
+        output_dimensionality: output_dimensionality
+      ).merge(parameters)
+      response = @client.json_post(
+        path: "models/#{normalize_model(model)}:embedContent",
         parameters: payload
       )
+      Gemini::Response.new(response)
+    end
+    # Generate embeddings for multiple inputs in a single batch request
+    def batch_create(inputs:, model: DEFAULT_MODEL, task_type: nil, title: nil,
+                     output_dimensionality: nil, **parameters)
+      requests = inputs.map do |input|
+        req = build_embed_payload(
+          input: input,
+          task_type: task_type,
+          title: title,
+          output_dimensionality: output_dimensionality
+        )
+        req[:model] = "models/#{normalize_model(model)}"
+        req
+      end
+      payload = { requests: requests }.merge(parameters)
+      response = @client.json_post(
+        path: "models/#{normalize_model(model)}:batchEmbedContents",
+        parameters: payload
+      )
+      Gemini::Response.new(response)
+    end
+    private
+    def build_embed_payload(input:, task_type:, title:, output_dimensionality:)
+      payload = { content: format_content(input) }
+      if task_type
+        validate_task_type!(task_type)
+        payload[:taskType] = task_type.to_s.upcase
+      end
+      payload[:title] = title if title
+      payload[:outputDimensionality] = output_dimensionality if output_dimensionality
+      payload
+    end
+    def format_content(input)
+      case input
+      when String
+        { parts: [{ text: input }] }
+      when Hash
+        if input.key?(:parts) || input.key?("parts")
+          input
+        elsif input.key?(:text) || input.key?("text") ||
+              input.key?(:inline_data) || input.key?("inline_data") ||
+              input.key?(:file_data) || input.key?("file_data")
+          { parts: [input] }
+        else
+          input
+        end
+      else
+        { parts: [{ text: input.to_s }] }
+      end
+    end
+    def normalize_model(model)
+      model_str = model.to_s
+      model_str.start_with?("models/") ? model_str.delete_prefix("models/") : model_str
+    end
+    def validate_task_type!(task_type)
+      task_type_str = task_type.to_s.upcase
+      unless VALID_TASK_TYPES.include?(task_type_str)
+        raise ArgumentError, "task_type must be one of: #{VALID_TASK_TYPES.join(', ')}"
+      end
     end
   end
-end
+end

data/lib/gemini/function_calling_helper.rb ADDED Viewed

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+module Gemini
+  module FunctionCallingHelper
+    # Function Callレスポンスから継続用のcontentsを構築
+    # Gemini 3では関数呼び出しの継続時にThought Signatureが必須
+    #
+    # @param original_contents [Array] 元の会話履歴
+    # @param model_response [Gemini::Response] モデルの応答（function call含む）
+    # @param function_responses [Array<Hash>] 関数の結果の配列
+    #   各要素は { name: "function_name", response: { ... } } の形式
+    # @return [Array] 継続リクエスト用のcontents配列
+    #
+    # @example
+    #   contents = Gemini::FunctionCallingHelper.build_continuation(
+    #     original_contents: [{ role: "user", parts: [{ text: "東京の天気を教えて" }] }],
+    #     model_response: response,
+    #     function_responses: [
+    #       { name: "get_weather", response: { temperature: 20, condition: "晴れ" } }
+    #     ]
+    #   )
+    def self.build_continuation(original_contents:, model_response:, function_responses:)
+      # 元の会話履歴
+      contents = original_contents.dup
+      # モデルの応答（Signature付き）
+      contents << {
+        role: "model",
+        parts: model_response.build_function_call_parts_with_signature
+      }
+      # 関数の結果
+      function_response_parts = function_responses.map do |fr|
+        { functionResponse: fr }
+      end
+      contents << {
+        role: "user",
+        parts: function_response_parts
+      }
+      contents
+    end
+  end
+end

data/lib/gemini/live/configuration.rb ADDED Viewed

@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+module Gemini
+  class Live
+    # Configuration class for Live API sessions
+    class Configuration
+      attr_accessor :model, :response_modality, :voice_name,
+                    :system_instruction, :tools,
+                    :context_window_compression, :session_resumption,
+                    :automatic_activity_detection,
+                    :media_resolution, :output_audio_transcription
+      VALID_MODALITIES = %w[TEXT AUDIO].freeze
+      VALID_VOICES = %w[Puck Charon Kore Fenrir Aoede Leda Orus Zephyr].freeze
+      # NOTE: gemini-2.5-flash-live-preview is listed in the public Live API
+      # tools documentation as the recommended model, but is not currently
+      # deployed (returns "model not found" on bidiGenerateContent). The
+      # native-audio preview model is the only Live model on which function
+      # calling currently works in practice (with AUDIO modality).
+      DEFAULT_MODEL = "gemini-2.5-flash-native-audio-preview-12-2025"
+      def initialize(
+        model: DEFAULT_MODEL,
+        response_modality: "TEXT",
+        voice_name: nil,
+        system_instruction: nil,
+        tools: nil,
+        context_window_compression: nil,
+        session_resumption: nil,
+        automatic_activity_detection: true,
+        media_resolution: nil,
+        output_audio_transcription: false
+      )
+        @model = model
+        @response_modality = validate_modality(response_modality)
+        @voice_name = validate_voice(voice_name)
+        @system_instruction = system_instruction
+        @tools = tools
+        @context_window_compression = context_window_compression
+        @session_resumption = session_resumption
+        @automatic_activity_detection = automatic_activity_detection
+        @media_resolution = media_resolution
+        @output_audio_transcription = output_audio_transcription
+      end
+      private
+      def validate_modality(modality)
+        modality = modality.to_s.upcase
+        unless VALID_MODALITIES.include?(modality)
+          raise ArgumentError, "Invalid modality: #{modality}. Must be one of: #{VALID_MODALITIES.join(', ')}"
+        end
+        modality
+      end
+      def validate_voice(voice)
+        return nil if voice.nil?
+        unless VALID_VOICES.include?(voice)
+          raise ArgumentError, "Invalid voice: #{voice}. Must be one of: #{VALID_VOICES.join(', ')}"
+        end
+        voice
+      end
+    end
+  end
+end

data/lib/gemini/live/connection.rb ADDED Viewed

@@ -0,0 +1,83 @@
+# frozen_string_literal: true
+require "websocket-client-simple"
+require "json"
+module Gemini
+  class Live
+    # WebSocket connection manager for Live API
+    class Connection
+      WEBSOCKET_BASE_URL = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"
+      attr_reader :connected
+      def initialize(api_key:, on_message:, on_open:, on_error:, on_close:)
+        @api_key = api_key
+        @on_message = on_message
+        @on_open = on_open
+        @on_error = on_error
+        @on_close = on_close
+        @ws = nil
+        @connected = false
+        @mutex = Mutex.new
+      end
+      def connect
+        url = "#{WEBSOCKET_BASE_URL}?key=#{@api_key}"
+        # Store callbacks in local variables for closure
+        on_message_callback = @on_message
+        on_open_callback = @on_open
+        on_error_callback = @on_error
+        on_close_callback = @on_close
+        connection = self
+        @ws = WebSocket::Client::Simple.connect(url) do |ws|
+          ws.on :open do
+            connection.instance_variable_set(:@connected, true)
+            on_open_callback.call if on_open_callback
+          end
+          ws.on :message do |msg|
+            on_message_callback.call(msg.data) if on_message_callback
+          end
+          ws.on :error do |e|
+            on_error_callback.call(e) if on_error_callback
+          end
+          ws.on :close do |e|
+            connection.instance_variable_set(:@connected, false)
+            code = e.respond_to?(:code) ? e.code : nil
+            reason = e.respond_to?(:reason) ? e.reason : nil
+            on_close_callback.call(code, reason) if on_close_callback
+          end
+        end
+        self
+      end
+      def send(data)
+        return false unless @ws && @connected
+        @mutex.synchronize do
+          json_data = data.is_a?(String) ? data : data.to_json
+          @ws.send(json_data)
+        end
+        true
+      rescue StandardError => e
+        @on_error&.call(e)
+        false
+      end
+      def close
+        @ws&.close
+        @connected = false
+      end
+      def connected?
+        @connected && @ws && !@ws.closed?
+      end
+    end
+  end
+end

data/lib/gemini/live/message_builder.rb ADDED Viewed

@@ -0,0 +1,217 @@
+# frozen_string_literal: true
+module Gemini
+  class Live
+    # Helper class to build Live API messages
+    class MessageBuilder
+      VALID_SCHEDULING = %w[INTERRUPT WHEN_IDLE SILENT].freeze
+      class << self
+        # Build setup message from configuration
+        def setup(config)
+          message = {
+            setup: {
+              model: normalize_model_name(config.model)
+            }
+          }
+          generation_config = build_generation_config(config)
+          message[:setup][:generationConfig] = generation_config unless generation_config.empty?
+          # System instruction
+          if config.system_instruction
+            message[:setup][:systemInstruction] = {
+              parts: [{ text: config.system_instruction }]
+            }
+          end
+          # Tools configuration
+          message[:setup][:tools] = config.tools if config.tools
+          # Context window compression
+          if config.context_window_compression
+            message[:setup][:contextWindowCompression] = config.context_window_compression
+          end
+          # Session resumption
+          if config.session_resumption
+            message[:setup][:sessionResumption] = config.session_resumption
+          end
+          # VAD (Voice Activity Detection) settings
+          unless config.automatic_activity_detection
+            message[:setup][:realtimeInputConfig] = {
+              automaticActivityDetection: {
+                disabled: true
+              }
+            }
+          end
+          message
+        end
+        # Build client content message (text)
+        def client_content(text:, turn_complete: true, role: "user")
+          {
+            clientContent: {
+              turns: [
+                {
+                  role: role,
+                  parts: [{ text: text }]
+                }
+              ],
+              turnComplete: turn_complete
+            }
+          }
+        end
+        # Build client content with multiple parts
+        def client_content_parts(parts:, turn_complete: true, role: "user")
+          {
+            clientContent: {
+              turns: [
+                {
+                  role: role,
+                  parts: parts
+                }
+              ],
+              turnComplete: turn_complete
+            }
+          }
+        end
+        # Build realtime input message (audio/video) using the legacy
+        # mediaChunks field. NOTE: mediaChunks is deprecated by the API in
+        # favor of the dedicated audio/video fields built by realtime_audio
+        # and realtime_video. Kept for backward compatibility with older
+        # Live models that still accept it.
+        def realtime_input(audio_data: nil, video_data: nil, mime_type:)
+          data = audio_data || video_data
+          {
+            realtimeInput: {
+              mediaChunks: [
+                {
+                  mimeType: mime_type,
+                  data: data
+                }
+              ]
+            }
+          }
+        end
+        # Build a realtime text input message. This is the universal
+        # text-input form for the Live API and is required by newer Live
+        # models such as gemini-3.1-flash-live-preview, which reject the
+        # turn-based clientContent payload.
+        def realtime_text(text)
+          { realtimeInput: { text: text.to_s } }
+        end
+        # Build activity start message (for manual VAD)
+        def activity_start
+          {
+            realtimeInput: {
+              activityStart: {}
+            }
+          }
+        end
+        # Build activity end message (for manual VAD)
+        def activity_end
+          {
+            realtimeInput: {
+              activityEnd: {}
+            }
+          }
+        end
+        # Build tool response message.
+        #
+        # Each function response hash supports:
+        #   :id       - The function call id from the server
+        #   :name     - The function name
+        #   :response - The function result (Hash or scalar). When using
+        #               NON_BLOCKING (async) function calls, include
+        #               `scheduling: "INTERRUPT" | "WHEN_IDLE" | "SILENT"`
+        #               inside the response hash.
+        #   :scheduling - (optional) Top-level shortcut. When provided,
+        #                 it is merged into the response hash as
+        #                 `response[:scheduling]`. Accepts Symbol or String.
+        #
+        # Raises ArgumentError if scheduling is not one of the valid values.
+        def tool_response(function_responses)
+          {
+            toolResponse: {
+              functionResponses: function_responses.map { |resp| build_function_response(resp) }
+            }
+          }
+        end
+        private
+        def build_function_response(resp)
+          response_payload =
+            case resp[:response]
+            when Hash then resp[:response].dup
+            when nil  then {}
+            else { result: resp[:response] }
+            end
+          if (top_level_scheduling = resp[:scheduling])
+            response_payload[:scheduling] = normalize_scheduling(top_level_scheduling)
+          elsif (sched = response_payload[:scheduling] || response_payload["scheduling"])
+            normalized = normalize_scheduling(sched)
+            response_payload.delete("scheduling")
+            response_payload[:scheduling] = normalized
+          end
+          { id: resp[:id], name: resp[:name], response: response_payload }
+        end
+        def normalize_scheduling(value)
+          value_str = value.to_s.upcase
+          unless VALID_SCHEDULING.include?(value_str)
+            raise ArgumentError,
+                  "scheduling must be one of: #{VALID_SCHEDULING.join(', ')} (got #{value.inspect})"
+          end
+          value_str
+        end
+        def normalize_model_name(model)
+          model.start_with?("models/") ? model : "models/#{model}"
+        end
+        def build_generation_config(config)
+          generation_config = {}
+          # Response modality
+          generation_config[:responseModalities] = [config.response_modality]
+          # Speech/Voice configuration for AUDIO modality
+          if config.response_modality == "AUDIO" && config.voice_name
+            generation_config[:speechConfig] = {
+              voiceConfig: {
+                prebuiltVoiceConfig: {
+                  voiceName: config.voice_name
+                }
+              }
+            }
+          end
+          # Media resolution
+          if config.media_resolution
+            generation_config[:mediaResolution] = config.media_resolution
+          end
+          # Output audio transcription
+          if config.output_audio_transcription
+            generation_config[:outputAudioTranscription] = {}
+          end
+          generation_config
+        end
+      end
+    end
+  end
+end