RubyGems - langchainrb - Versions diffs - 0.13.1 → 0.13.2 - Mend

langchainrb 0.13.1 → 0.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/lib/langchain/assistants/assistant.rb +28 -2
data/lib/langchain/assistants/messages/anthropic_message.rb +75 -0
data/lib/langchain/llm/anthropic.rb +8 -0
data/lib/langchain/llm/google_gemini.rb +31 -0
data/lib/langchain/llm/google_vertex_ai.rb +4 -1
data/lib/langchain/llm/hugging_face.rb +19 -8
data/lib/langchain/llm/response/anthropic_response.rb +11 -1
data/lib/langchain/llm/response/google_gemini_response.rb +5 -1
data/lib/langchain/processors/xls.rb +27 -0
data/lib/langchain/tool/base.rb +12 -0
data/lib/langchain/tool/news_retriever/news_retriever.json +2 -1
data/lib/langchain/version.rb +1 -1
metadata +18 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 31daa3b09f92561f783122c10c1b48482bba75eac67e01550c71f7d76af36551
-  data.tar.gz: 355e21f33fbc3d21ac364ce046b0d2908ef111d2aa17996605df953ca25d0640
+  metadata.gz: 30a8890d61255a8d937f2e2996f2b32de8ff432697a4a946afb08e853d383e2a
+  data.tar.gz: ee561cd2fac6b60308c07bfa300f09b6d1f8092ca669f2712f81a5b36814dd31
 SHA512:
-  metadata.gz: f2bbf794a223f9b0da303f9b65a1a309213db00d45227ce6e9d5a9bc039d1150e06b786ff9730c1e4f2f2fd6d6566687d4a04d3c39f5dcd8d9e66c8e84e097ba
-  data.tar.gz: b406738ff1be88c7c545ec284d3050a3b5c0bb34a747f345ff18cbaeb63a3abf9763ec723913bd58ddd62be261c6abd88a87448fd2b9d3bde00eb53d795931e2
+  metadata.gz: 3ab2bf6b1e57754497165b931e366d9c46051d829d3c05f23f73d3f20017eb91c86bf586cc318e7329e7990ae69002b0ad16638f5f41b3b0d141560a56c7236a
+  data.tar.gz: e621507bece6ff42ee80788bb9fb486dbd685b33d0f3893a9affca3d2edaf2a55441e98278bb056b290dcb2102d12a6eac5faea1c29d36a6e6f61118c43ee121

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 ## [Unreleased]
+## [0.13.2] - 2024-05-20
+- New `Langchain::LLM::GoogleGemini#embed()` method
+- `Langchain::Assistant` works with `Langchain::LLM::Anthropic` llm
+- New XLS file processor
+- Fixes and improvements
 ## [0.13.1] - 2024-05-14
 - Better error handling for `Langchain::LLM::GoogleVertexAI`

data/lib/langchain/assistants/assistant.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module Langchain
     attr_accessor :tools
     SUPPORTED_LLMS = [
+      Langchain::LLM::Anthropic,
       Langchain::LLM::OpenAI,
       Langchain::LLM::GoogleGemini,
       Langchain::LLM::GoogleVertexAI
@@ -41,7 +42,7 @@ module Langchain
       if llm.is_a?(Langchain::LLM::OpenAI)
         add_message(role: "system", content: instructions) if instructions
       end
-      # For Google Gemini, system instructions are added to the `system:` param in the `chat` method
+      # For Google Gemini, and Anthropic system instructions are added to the `system:` param in the `chat` method
     end
     # Add a user message to the thread
@@ -137,6 +138,8 @@ module Langchain
         Langchain::Messages::OpenAIMessage::TOOL_ROLE
       elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
         Langchain::Messages::GoogleGeminiMessage::TOOL_ROLE
+      elsif llm.is_a?(Langchain::LLM::Anthropic)
+        Langchain::Messages::AnthropicMessage::TOOL_ROLE
       end
       # TODO: Validate that `tool_call_id` is valid by scanning messages and checking if this tool call ID was invoked
@@ -179,12 +182,17 @@ module Langchain
       if tools.any?
         if llm.is_a?(Langchain::LLM::OpenAI)
           params[:tools] = tools.map(&:to_openai_tools).flatten
+          params[:tool_choice] = "auto"
+        elsif llm.is_a?(Langchain::LLM::Anthropic)
+          params[:tools] = tools.map(&:to_anthropic_tools).flatten
+          params[:system] = instructions if instructions
+          params[:tool_choice] = {type: "auto"}
         elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
           params[:tools] = tools.map(&:to_google_gemini_tools).flatten
           params[:system] = instructions if instructions
+          params[:tool_choice] = "auto"
         end
         # TODO: Not sure that tool_choice should always be "auto"; Maybe we can let the user toggle it.
-        params[:tool_choice] = "auto"
       end
       llm.chat(**params)
@@ -200,6 +208,8 @@ module Langchain
           extract_openai_tool_call(tool_call: tool_call)
         elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
           extract_google_gemini_tool_call(tool_call: tool_call)
+        elsif llm.is_a?(Langchain::LLM::Anthropic)
+          extract_anthropic_tool_call(tool_call: tool_call)
         end
         tool_instance = tools.find do |t|
@@ -234,6 +244,20 @@ module Langchain
       [tool_call_id, tool_name, method_name, tool_arguments]
     end
+    # Extract the tool call information from the Anthropic tool call hash
+    #
+    # @param tool_call [Hash] The tool call hash, format: {"type"=>"tool_use", "id"=>"toolu_01TjusbFApEbwKPRWTRwzadR", "name"=>"news_retriever__get_top_headlines", "input"=>{"country"=>"us", "page_size"=>10}}], "stop_reason"=>"tool_use"}
+    # @return [Array] The tool call information
+    def extract_anthropic_tool_call(tool_call:)
+      tool_call_id = tool_call.dig("id")
+      function_name = tool_call.dig("name")
+      tool_name, method_name = function_name.split("__")
+      tool_arguments = tool_call.dig("input").transform_keys(&:to_sym)
+      [tool_call_id, tool_name, method_name, tool_arguments]
+    end
     # Extract the tool call information from the Google Gemini tool call hash
     #
     # @param tool_call [Hash] The tool call hash, format: {"functionCall"=>{"name"=>"weather__execute", "args"=>{"input"=>"NYC"}}}
@@ -260,6 +284,8 @@ module Langchain
         Langchain::Messages::OpenAIMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
       elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
         Langchain::Messages::GoogleGeminiMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
+      elsif llm.is_a?(Langchain::LLM::Anthropic)
+        Langchain::Messages::AnthropicMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
       end
     end

data/lib/langchain/assistants/messages/anthropic_message.rb ADDED Viewed

@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+module Langchain
+  module Messages
+    class AnthropicMessage < Base
+      ROLES = [
+        "assistant",
+        "user",
+        "tool_result"
+      ].freeze
+      TOOL_ROLE = "tool_result"
+      def initialize(role:, content: nil, tool_calls: [], tool_call_id: nil)
+        raise ArgumentError, "Role must be one of #{ROLES.join(", ")}" unless ROLES.include?(role)
+        raise ArgumentError, "Tool calls must be an array of hashes" unless tool_calls.is_a?(Array) && tool_calls.all? { |tool_call| tool_call.is_a?(Hash) }
+        @role = role
+        # Some Tools return content as a JSON hence `.to_s`
+        @content = content.to_s
+        @tool_calls = tool_calls
+        @tool_call_id = tool_call_id
+      end
+      # Convert the message to an Anthropic API-compatible hash
+      #
+      # @return [Hash] The message as an Anthropic API-compatible hash
+      def to_hash
+        {}.tap do |h|
+          h[:role] = tool? ? "user" : role
+          h[:content] = if tool?
+            [
+              {
+                type: "tool_result",
+                tool_use_id: tool_call_id,
+                content: content
+              }
+            ]
+          elsif tool_calls.any?
+            tool_calls
+          else
+            content
+          end
+        end
+      end
+      # Check if the message is a tool call
+      #
+      # @return [Boolean] true/false whether this message is a tool call
+      def tool?
+        role == "tool_result"
+      end
+      # Anthropic does not implement system prompts
+      def system?
+        false
+      end
+      # Check if the message came from an LLM
+      #
+      # @return [Boolean] true/false whether this message was produced by an LLM
+      def assistant?
+        role == "assistant"
+      end
+      # Check if the message came from an LLM
+      #
+      # @return [Boolean] true/false whether this message was produced by an LLM
+      def llm?
+        assistant?
+      end
+    end
+  end
+end

data/lib/langchain/llm/anthropic.rb CHANGED Viewed

@@ -101,6 +101,8 @@ module Langchain::LLM
     # @option params [Float] :top_p Use nucleus sampling.
     # @return [Langchain::LLM::AnthropicResponse] The chat completion
     def chat(params = {})
+      set_extra_headers! if params[:tools]
       parameters = chat_parameters.to_params(params)
       raise ArgumentError.new("messages argument is required") if Array(parameters[:messages]).empty?
@@ -111,5 +113,11 @@ module Langchain::LLM
       Langchain::LLM::AnthropicResponse.new(response)
     end
+    private
+    def set_extra_headers!
+      ::Anthropic.configuration.extra_headers = {"anthropic-beta": "tools-2024-05-16"}
+    end
   end
 end

data/lib/langchain/llm/google_gemini.rb CHANGED Viewed

@@ -6,6 +6,7 @@ module Langchain::LLM
   class GoogleGemini < Base
     DEFAULTS = {
       chat_completion_model_name: "gemini-1.5-pro-latest",
+      embeddings_model_name: "text-embedding-004",
       temperature: 0.0
     }
@@ -63,5 +64,35 @@ module Langchain::LLM
         raise StandardError.new(response)
       end
     end
+    def embed(
+      text:,
+      model: @defaults[:embeddings_model_name]
+    )
+      params = {
+        content: {
+          parts: [
+            {
+              text: text
+            }
+          ]
+        }
+      }
+      uri = URI("https://generativelanguage.googleapis.com/v1beta/models/#{model}:embedContent?key=#{api_key}")
+      request = Net::HTTP::Post.new(uri)
+      request.content_type = "application/json"
+      request.body = params.to_json
+      response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http|
+        http.request(request)
+      end
+      parsed_response = JSON.parse(response.body)
+      Langchain::LLM::GoogleGeminiResponse.new(parsed_response, model: model)
+    end
   end
 end

data/lib/langchain/llm/google_vertex_ai.rb CHANGED Viewed

@@ -28,7 +28,10 @@ module Langchain::LLM
     def initialize(project_id:, region:, default_options: {})
       depends_on "googleauth"
-      @authorizer = ::Google::Auth.get_application_default
+      @authorizer = ::Google::Auth.get_application_default(scope: [
+        "https://www.googleapis.com/auth/cloud-platform",
+        "https://www.googleapis.com/auth/generative-language.retriever"
+      ])
       proj_id = project_id || @authorizer.project_id || @authorizer.quota_project_id
       @url = "https://#{region}-aiplatform.googleapis.com/v1/projects/#{proj_id}/locations/#{region}/publishers/google/models/"

data/lib/langchain/llm/hugging_face.rb CHANGED Viewed

@@ -11,12 +11,12 @@ module Langchain::LLM
   #     hf = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
   #
   class HuggingFace < Base
-    # The gem does not currently accept other models:
-    # https://github.com/alchaplinsky/hugging-face/blob/main/lib/hugging_face/inference_api.rb#L32-L34
     DEFAULTS = {
-      temperature: 0.0,
-      embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2",
-      dimensions: 384 # Vector size generated by the above model
+      embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2"
+    }.freeze
+    EMBEDDING_SIZES = {
+      "sentence-transformers/all-MiniLM-L6-v2": 384
     }.freeze
     #
@@ -24,10 +24,21 @@ module Langchain::LLM
     #
     # @param api_key [String] The API key to use
     #
-    def initialize(api_key:)
+    def initialize(api_key:, default_options: {})
       depends_on "hugging-face", req: "hugging_face"
       @client = ::HuggingFace::InferenceApi.new(api_token: api_key)
+      @defaults = DEFAULTS.merge(default_options)
+    end
+    # Returns the # of vector dimensions for the embeddings
+    # @return [Integer] The # of vector dimensions
+    def default_dimensions
+      # since Huggin Face can run multiple models, look it up or generate an embedding and return the size
+      @default_dimensions ||= @defaults[:dimensions] ||
+        EMBEDDING_SIZES.fetch(@defaults[:embeddings_model_name].to_sym) do
+          embed(text: "test").embedding.size
+        end
     end
     #
@@ -39,9 +50,9 @@ module Langchain::LLM
     def embed(text:)
       response = client.embedding(
         input: text,
-        model: DEFAULTS[:embeddings_model_name]
+        model: @defaults[:embeddings_model_name]
       )
-      Langchain::LLM::HuggingFaceResponse.new(response, model: DEFAULTS[:embeddings_model_name])
+      Langchain::LLM::HuggingFaceResponse.new(response, model: @defaults[:embeddings_model_name])
     end
   end
 end

data/lib/langchain/llm/response/anthropic_response.rb CHANGED Viewed

@@ -11,7 +11,17 @@ module Langchain::LLM
     end
     def chat_completion
-      raw_response.dig("content", 0, "text")
+      chat_completion = chat_completions.find { |h| h["type"] == "text" }
+      chat_completion.dig("text")
+    end
+    def tool_calls
+      tool_call = chat_completions.find { |h| h["type"] == "tool_use" }
+      tool_call ? [tool_call] : []
+    end
+    def chat_completions
+      raw_response.dig("content")
     end
     def completions

data/lib/langchain/llm/response/google_gemini_response.rb CHANGED Viewed

@@ -27,7 +27,11 @@ module Langchain::LLM
     end
     def embeddings
-      [raw_response.dig("predictions", 0, "embeddings", "values")]
+      if raw_response.key?("embedding")
+        [raw_response.dig("embedding", "values")]
+      else
+        [raw_response.dig("predictions", 0, "embeddings", "values")]
+      end
     end
     def prompt_tokens

data/lib/langchain/processors/xls.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class Xls < Base
+      EXTENSIONS = [".xls"].freeze
+      CONTENT_TYPES = ["application/vnd.ms-excel"].freeze
+      def initialize(*)
+        depends_on "roo"
+        depends_on "roo-xls"
+      end
+      # Parse the document and return the text
+      # @param [File] data
+      # @return [Array<Array<String>>] Array of rows, each row is an array of cells
+      def parse(data)
+        xls_file = Roo::Spreadsheet.open(data)
+        xls_file.each_with_pagename.flat_map do |_, sheet|
+          sheet.map do |row|
+            row.map { |i| i.to_s.strip }
+          end
+        end
+      end
+    end
+  end
+end

data/lib/langchain/tool/base.rb CHANGED Viewed

@@ -71,6 +71,18 @@ module Langchain::Tool
       method_annotations
     end
+    # Returns the tool as a list of Anthropic formatted functions
+    #
+    # @return [Array<Hash>] List of hashes representing the tool as Anthropic formatted functions
+    def to_anthropic_tools
+      method_annotations.map do |annotation|
+        # Slice out only the content of the "function" key
+        annotation["function"]
+          # Rename "parameters" to "input_schema" key
+          .transform_keys("parameters" => "input_schema")
+      end
+    end
     # Returns the tool as a list of Google Gemini formatted functions
     #
     # @return [Array<Hash>] List of hashes representing the tool as Google Gemini formatted functions

data/lib/langchain/tool/news_retriever/news_retriever.json CHANGED Viewed

@@ -68,7 +68,8 @@
         "properties": {
           "country": {
             "type": "string",
-            "description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for."
+            "description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for.",
+            "enum": ["ae", "ar", "at", "au", "be", "bg", "br", "ca", "ch", "cn", "co", "cu", "cz", "de", "eg", "fr", "gb", "gr", "hk", "hu", "id", "ie", "il", "in", "it", "jp", "kr", "lt", "lv", "ma", "mx", "my", "ng", "nl", "no", "nz", "ph", "pl", "pt", "ro", "rs", "ru", "sa", "se", "sg", "si", "sk", "th", "tr", "tw", "ua", "us", "ve", "za"]
           },
           "category": {
             "type": "string",

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.13.1"
+  VERSION = "0.13.2"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.13.1
+  version: 0.13.2
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-05-15 00:00:00.000000000 Z
+date: 2024-05-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
@@ -598,6 +598,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 2.10.0
+- !ruby/object:Gem::Dependency
+  name: roo-xls
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 1.2.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 1.2.0
 - !ruby/object:Gem::Dependency
   name: ruby-openai
   requirement: !ruby/object:Gem::Requirement
@@ -708,6 +722,7 @@ files:
 - README.md
 - lib/langchain.rb
 - lib/langchain/assistants/assistant.rb
+- lib/langchain/assistants/messages/anthropic_message.rb
 - lib/langchain/assistants/messages/base.rb
 - lib/langchain/assistants/messages/google_gemini_message.rb
 - lib/langchain/assistants/messages/openai_message.rb
@@ -779,6 +794,7 @@ files:
 - lib/langchain/processors/pdf.rb
 - lib/langchain/processors/pptx.rb
 - lib/langchain/processors/text.rb
+- lib/langchain/processors/xls.rb
 - lib/langchain/processors/xlsx.rb
 - lib/langchain/prompt.rb
 - lib/langchain/prompt/base.rb