RubyGems - langchainrb - Versions diffs - 0.13.1 → 0.13.2 - Mend

langchainrb 0.13.1 → 0.13.2

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/lib/langchain/assistants/assistant.rb +28 -2
data/lib/langchain/assistants/messages/anthropic_message.rb +75 -0
data/lib/langchain/llm/anthropic.rb +8 -0
data/lib/langchain/llm/google_gemini.rb +31 -0
data/lib/langchain/llm/google_vertex_ai.rb +4 -1
data/lib/langchain/llm/hugging_face.rb +19 -8
data/lib/langchain/llm/response/anthropic_response.rb +11 -1
data/lib/langchain/llm/response/google_gemini_response.rb +5 -1
data/lib/langchain/processors/xls.rb +27 -0
data/lib/langchain/tool/base.rb +12 -0
data/lib/langchain/tool/news_retriever/news_retriever.json +2 -1
data/lib/langchain/version.rb +1 -1
metadata +18 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 31daa3b09f92561f783122c10c1b48482bba75eac67e01550c71f7d76af36551
-  data.tar.gz: 355e21f33fbc3d21ac364ce046b0d2908ef111d2aa17996605df953ca25d0640
+  metadata.gz: 30a8890d61255a8d937f2e2996f2b32de8ff432697a4a946afb08e853d383e2a
+  data.tar.gz: ee561cd2fac6b60308c07bfa300f09b6d1f8092ca669f2712f81a5b36814dd31
 SHA512:
-  metadata.gz: f2bbf794a223f9b0da303f9b65a1a309213db00d45227ce6e9d5a9bc039d1150e06b786ff9730c1e4f2f2fd6d6566687d4a04d3c39f5dcd8d9e66c8e84e097ba
-  data.tar.gz: b406738ff1be88c7c545ec284d3050a3b5c0bb34a747f345ff18cbaeb63a3abf9763ec723913bd58ddd62be261c6abd88a87448fd2b9d3bde00eb53d795931e2
+  metadata.gz: 3ab2bf6b1e57754497165b931e366d9c46051d829d3c05f23f73d3f20017eb91c86bf586cc318e7329e7990ae69002b0ad16638f5f41b3b0d141560a56c7236a
+  data.tar.gz: e621507bece6ff42ee80788bb9fb486dbd685b33d0f3893a9affca3d2edaf2a55441e98278bb056b290dcb2102d12a6eac5faea1c29d36a6e6f61118c43ee121

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 ## [Unreleased]
+## [0.13.2] - 2024-05-20
+- New `Langchain::LLM::GoogleGemini#embed()` method
+- `Langchain::Assistant` works with `Langchain::LLM::Anthropic` llm
+- New XLS file processor
+- Fixes and improvements
 ## [0.13.1] - 2024-05-14
 - Better error handling for `Langchain::LLM::GoogleVertexAI`

data/lib/langchain/assistants/assistant.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module Langchain
     attr_accessor :tools
     SUPPORTED_LLMS = [
+      Langchain::LLM::Anthropic,
       Langchain::LLM::OpenAI,
       Langchain::LLM::GoogleGemini,
       Langchain::LLM::GoogleVertexAI
@@ -41,7 +42,7 @@ module Langchain
       if llm.is_a?(Langchain::LLM::OpenAI)
         add_message(role: "system", content: instructions) if instructions
       end
-      # For Google Gemini, system instructions are added to the `system:` param in the `chat` method
+      # For Google Gemini, and Anthropic system instructions are added to the `system:` param in the `chat` method
     end
     # Add a user message to the thread
@@ -137,6 +138,8 @@ module Langchain
         Langchain::Messages::OpenAIMessage::TOOL_ROLE
       elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
         Langchain::Messages::GoogleGeminiMessage::TOOL_ROLE
+      elsif llm.is_a?(Langchain::LLM::Anthropic)
+        Langchain::Messages::AnthropicMessage::TOOL_ROLE
       end
       # TODO: Validate that `tool_call_id` is valid by scanning messages and checking if this tool call ID was invoked
@@ -179,12 +182,17 @@ module Langchain
       if tools.any?
         if llm.is_a?(Langchain::LLM::OpenAI)
           params[:tools] = tools.map(&:to_openai_tools).flatten
+          params[:tool_choice] = "auto"
+        elsif llm.is_a?(Langchain::LLM::Anthropic)
+          params[:tools] = tools.map(&:to_anthropic_tools).flatten
+          params[:system] = instructions if instructions
+          params[:tool_choice] = {type: "auto"}
         elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
           params[:tools] = tools.map(&:to_google_gemini_tools).flatten
           params[:system] = instructions if instructions
+          params[:tool_choice] = "auto"
         end
         # TODO: Not sure that tool_choice should always be "auto"; Maybe we can let the user toggle it.
-        params[:tool_choice] = "auto"
       end
       llm.chat(**params)
@@ -200,6 +208,8 @@ module Langchain
           extract_openai_tool_call(tool_call: tool_call)
         elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
           extract_google_gemini_tool_call(tool_call: tool_call)
+        elsif llm.is_a?(Langchain::LLM::Anthropic)
+          extract_anthropic_tool_call(tool_call: tool_call)
         end
         tool_instance = tools.find do |t|
@@ -234,6 +244,20 @@ module Langchain
       [tool_call_id, tool_name, method_name, tool_arguments]
     end
+    # Extract the tool call information from the Anthropic tool call hash
+    #
+    # @param tool_call [Hash] The tool call hash, format: {"type"=>"tool_use", "id"=>"toolu_01TjusbFApEbwKPRWTRwzadR", "name"=>"news_retriever__get_top_headlines", "input"=>{"country"=>"us", "page_size"=>10}}], "stop_reason"=>"tool_use"}
+    # @return [Array] The tool call information
+    def extract_anthropic_tool_call(tool_call:)
+      tool_call_id = tool_call.dig("id")
+      function_name = tool_call.dig("name")
+      tool_name, method_name = function_name.split("__")
+      tool_arguments = tool_call.dig("input").transform_keys(&:to_sym)
+      [tool_call_id, tool_name, method_name, tool_arguments]
+    end
     # Extract the tool call information from the Google Gemini tool call hash
     #
     # @param tool_call [Hash] The tool call hash, format: {"functionCall"=>{"name"=>"weather__execute", "args"=>{"input"=>"NYC"}}}
@@ -260,6 +284,8 @@ module Langchain
         Langchain::Messages::OpenAIMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
       elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
         Langchain::Messages::GoogleGeminiMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
+      elsif llm.is_a?(Langchain::LLM::Anthropic)
+        Langchain::Messages::AnthropicMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
       end
     end

data/lib/langchain/assistants/messages/anthropic_message.rb ADDED Viewed

@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+module Langchain
+  module Messages
+    class AnthropicMessage < Base
+      ROLES = [
+        "assistant",
+        "user",
+        "tool_result"
+      ].freeze
+      TOOL_ROLE = "tool_result"
+      def initialize(role:, content: nil, tool_calls: [], tool_call_id: nil)
+        raise ArgumentError, "Role must be one of #{ROLES.join(", ")}" unless ROLES.include?(role)
+        raise ArgumentError, "Tool calls must be an array of hashes" unless tool_calls.is_a?(Array) && tool_calls.all? { |tool_call| tool_call.is_a?(Hash) }
+        @role = role
+        # Some Tools return content as a JSON hence `.to_s`
+        @content = content.to_s
+        @tool_calls = tool_calls
+        @tool_call_id = tool_call_id
+      end
+      # Convert the message to an Anthropic API-compatible hash
+      #
+      # @return [Hash] The message as an Anthropic API-compatible hash
+      def to_hash
+        {}.tap do |h|
+          h[:role] = tool? ? "user" : role
+          h[:content] = if tool?
+            [
+              {
+                type: "tool_result",
+                tool_use_id: tool_call_id,
+                content: content
+              }
+            ]
+          elsif tool_calls.any?
+            tool_calls
+          else
+            content
+          end
+        end
+      end
+      # Check if the message is a tool call
+      #
+      # @return [Boolean] true/false whether this message is a tool call
+      def tool?
+        role == "tool_result"
+      end
+      # Anthropic does not implement system prompts
+      def system?
+        false
+      end
+      # Check if the message came from an LLM
+      #
+      # @return [Boolean] true/false whether this message was produced by an LLM
+      def assistant?
+        role == "assistant"
+      end
+      # Check if the message came from an LLM
+      #
+      # @return [Boolean] true/false whether this message was produced by an LLM
+      def llm?
+        assistant?
+      end
+    end
+  end
+end

data/lib/langchain/llm/anthropic.rb CHANGED Viewed

@@ -101,6 +101,8 @@ module Langchain::LLM
     # @option params [Float] :top_p Use nucleus sampling.
     # @return [Langchain::LLM::AnthropicResponse] The chat completion
     def chat(params = {})
+      set_extra_headers! if params[:tools]
       parameters = chat_parameters.to_params(params)
       raise ArgumentError.new("messages argument is required") if Array(parameters[:messages]).empty?
@@ -111,5 +113,11 @@ module Langchain::LLM
       Langchain::LLM::AnthropicResponse.new(response)
     end
+    private
+    def set_extra_headers!
+      ::Anthropic.configuration.extra_headers = {"anthropic-beta": "tools-2024-05-16"}
+    end
   end
 end

data/lib/langchain/llm/google_gemini.rb CHANGED Viewed

@@ -6,6 +6,7 @@ module Langchain::LLM
   class GoogleGemini < Base
     DEFAULTS = {
       chat_completion_model_name: "gemini-1.5-pro-latest",
+      embeddings_model_name: "text-embedding-004",
       temperature: 0.0
     }
@@ -63,5 +64,35 @@ module Langchain::LLM
         raise StandardError.new(response)
       end
     end
+    def embed(
+      text:,
+      model: @defaults[:embeddings_model_name]
+    )
+      params = {
+        content: {
+          parts: [
+            {
+              text: text
+            }
+          ]
+        }
+      }
+      uri = URI("https://generativelanguage.googleapis.com/v1beta/models/#{model}:embedContent?key=#{api_key}")
+      request = Net::HTTP::Post.new(uri)
+      request.content_type = "application/json"
+      request.body = params.to_json
+      response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http|
+        http.request(request)
+      end
+      parsed_response = JSON.parse(response.body)
+      Langchain::LLM::GoogleGeminiResponse.new(parsed_response, model: model)
+    end
   end
 end

data/lib/langchain/llm/google_vertex_ai.rb CHANGED Viewed

@@ -28,7 +28,10 @@ module Langchain::LLM
     def initialize(project_id:, region:, default_options: {})
       depends_on "googleauth"
-      @authorizer = ::Google::Auth.get_application_default
+      @authorizer = ::Google::Auth.get_application_default(scope: [
+        "https://www.googleapis.com/auth/cloud-platform",
+        "https://www.googleapis.com/auth/generative-language.retriever"
+      ])
       proj_id = project_id || @authorizer.project_id || @authorizer.quota_project_id
       @url = "https://#{region}-aiplatform.googleapis.com/v1/projects/#{proj_id}/locations/#{region}/publishers/google/models/"

data/lib/langchain/llm/hugging_face.rb CHANGED Viewed

@@ -11,12 +11,12 @@ module Langchain::LLM
   #     hf = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
   #
   class HuggingFace < Base
-    # The gem does not currently accept other models:
-    # https://github.com/alchaplinsky/hugging-face/blob/main/lib/hugging_face/inference_api.rb#L32-L34
     DEFAULTS = {
-      temperature: 0.0,
-      embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2",
-      dimensions: 384 # Vector size generated by the above model
+      embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2"
+    }.freeze
+    EMBEDDING_SIZES = {
+      "sentence-transformers/all-MiniLM-L6-v2": 384
     }.freeze
     #
@@ -24,10 +24,21 @@ module Langchain::LLM
     #
     # @param api_key [String] The API key to use
     #
-    def initialize(api_key:)
+    def initialize(api_key:, default_options: {})
       depends_on "hugging-face", req: "hugging_face"
       @client = ::HuggingFace::InferenceApi.new(api_token: api_key)
+      @defaults = DEFAULTS.merge(default_options)
+    end
+    # Returns the # of vector dimensions for the embeddings
+    # @return [Integer] The # of vector dimensions
+    def default_dimensions
+      # since Huggin Face can run multiple models, look it up or generate an embedding and return the size
+      @default_dimensions ||= @defaults[:dimensions] ||
+        EMBEDDING_SIZES.fetch(@defaults[:embeddings_model_name].to_sym) do
+          embed(text: "test").embedding.size
+        end
     end
     #
@@ -39,9 +50,9 @@ module Langchain::LLM
     def embed(text:)
       response = client.embedding(
         input: text,
-        model: DEFAULTS[:embeddings_model_name]
+        model: @defaults[:embeddings_model_name]
       )
-      Langchain::LLM::HuggingFaceResponse.new(response, model: DEFAULTS[:embeddings_model_name])
+      Langchain::LLM::HuggingFaceResponse.new(response, model: @defaults[:embeddings_model_name])
     end
   end
 end

data/lib/langchain/llm/response/anthropic_response.rb CHANGED Viewed

@@ -11,7 +11,17 @@ module Langchain::LLM
     end
     def chat_completion
-      raw_response.dig("content", 0, "text")
+      chat_completion = chat_completions.find { |h| h["type"] == "text" }
+      chat_completion.dig("text")
+    end
+    def tool_calls
+      tool_call = chat_completions.find { |h| h["type"] == "tool_use" }
+      tool_call ? [tool_call] : []
+    end
+    def chat_completions
+      raw_response.dig("content")
     end
     def completions

data/lib/langchain/llm/response/google_gemini_response.rb CHANGED Viewed

@@ -27,7 +27,11 @@ module Langchain::LLM
     end
     def embeddings
-      [raw_response.dig("predictions", 0, "embeddings", "values")]
+      if raw_response.key?("embedding")
+        [raw_response.dig("embedding", "values")]
+      else
+        [raw_response.dig("predictions", 0, "embeddings", "values")]
+      end
     end
     def prompt_tokens

data/lib/langchain/processors/xls.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class Xls < Base
+      EXTENSIONS = [".xls"].freeze
+      CONTENT_TYPES = ["application/vnd.ms-excel"].freeze
+      def initialize(*)
+        depends_on "roo"
+        depends_on "roo-xls"
+      end
+      # Parse the document and return the text
+      # @param [File] data
+      # @return [Array<Array<String>>] Array of rows, each row is an array of cells
+      def parse(data)
+        xls_file = Roo::Spreadsheet.open(data)
+        xls_file.each_with_pagename.flat_map do |_, sheet|
+          sheet.map do |row|
+            row.map { |i| i.to_s.strip }
+          end
+        end
+      end
+    end
+  end
+end

data/lib/langchain/tool/base.rb CHANGED Viewed

@@ -71,6 +71,18 @@ module Langchain::Tool
       method_annotations
     end
+    # Returns the tool as a list of Anthropic formatted functions
+    #
+    # @return [Array<Hash>] List of hashes representing the tool as Anthropic formatted functions
+    def to_anthropic_tools
+      method_annotations.map do |annotation|
+        # Slice out only the content of the "function" key
+        annotation["function"]
+          # Rename "parameters" to "input_schema" key
+          .transform_keys("parameters" => "input_schema")
+      end
+    end
     # Returns the tool as a list of Google Gemini formatted functions
     #
     # @return [Array<Hash>] List of hashes representing the tool as Google Gemini formatted functions

data/lib/langchain/tool/news_retriever/news_retriever.json CHANGED Viewed

@@ -68,7 +68,8 @@
         "properties": {
           "country": {
             "type": "string",
-            "description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for."
+            "description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for.",
+            "enum": ["ae", "ar", "at", "au", "be", "bg", "br", "ca", "ch", "cn", "co", "cu", "cz", "de", "eg", "fr", "gb", "gr", "hk", "hu", "id", "ie", "il", "in", "it", "jp", "kr", "lt", "lv", "ma", "mx", "my", "ng", "nl", "no", "nz", "ph", "pl", "pt", "ro", "rs", "ru", "sa", "se", "sg", "si", "sk", "th", "tr", "tw", "ua", "us", "ve", "za"]
           },
           "category": {
             "type": "string",

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.13.1"
+  VERSION = "0.13.2"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.13.1
+  version: 0.13.2
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-05-15 00:00:00.000000000 Z
+date: 2024-05-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
@@ -598,6 +598,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 2.10.0
+- !ruby/object:Gem::Dependency
+  name: roo-xls
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 1.2.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 1.2.0
 - !ruby/object:Gem::Dependency
   name: ruby-openai
   requirement: !ruby/object:Gem::Requirement
@@ -708,6 +722,7 @@ files:
 - README.md
 - lib/langchain.rb
 - lib/langchain/assistants/assistant.rb
+- lib/langchain/assistants/messages/anthropic_message.rb
 - lib/langchain/assistants/messages/base.rb
 - lib/langchain/assistants/messages/google_gemini_message.rb
 - lib/langchain/assistants/messages/openai_message.rb
@@ -779,6 +794,7 @@ files:
 - lib/langchain/processors/pdf.rb
 - lib/langchain/processors/pptx.rb
 - lib/langchain/processors/text.rb
+- lib/langchain/processors/xls.rb
 - lib/langchain/processors/xlsx.rb
 - lib/langchain/prompt.rb
 - lib/langchain/prompt/base.rb