RubyGems - langchainrb - Versions diffs - 0.13.5 → 0.14.0 - Mend

langchainrb 0.13.5 → 0.14.0

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +2 -17
data/lib/langchain/assistants/assistant.rb +90 -19
data/lib/langchain/assistants/messages/ollama_message.rb +86 -0
data/lib/langchain/assistants/thread.rb +8 -1
data/lib/langchain/llm/ai21.rb +0 -4
data/lib/langchain/llm/anthropic.rb +15 -6
data/lib/langchain/llm/azure.rb +3 -3
data/lib/langchain/llm/base.rb +1 -0
data/lib/langchain/llm/cohere.rb +0 -2
data/lib/langchain/llm/google_palm.rb +1 -4
data/lib/langchain/llm/ollama.rb +1 -1
data/lib/langchain/llm/response/google_gemini_response.rb +1 -1
data/lib/langchain/llm/response/ollama_response.rb +19 -1
data/lib/langchain/vectorsearch/milvus.rb +1 -1
data/lib/langchain/version.rb +1 -1
metadata +5 -24
data/lib/langchain/utils/token_length/ai21_validator.rb +0 -41
data/lib/langchain/utils/token_length/base_validator.rb +0 -42
data/lib/langchain/utils/token_length/cohere_validator.rb +0 -49
data/lib/langchain/utils/token_length/google_palm_validator.rb +0 -57
data/lib/langchain/utils/token_length/openai_validator.rb +0 -138
data/lib/langchain/utils/token_length/token_limit_exceeded.rb +0 -17

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d7eac7a6ba7767f6a3f84ee808fa4810eaa1843776695ab0225ddd6b77cf7a73
-  data.tar.gz: e9f7c0170fc2a8dbf443f1bac24874878ee0fbba7e0495bf65a8df969d3d86e6
+  metadata.gz: 68900cd116cf0fb1b77376a4906e5551f0d578ee2bb47c7ec86d32bf44f84e33
+  data.tar.gz: f68782c3cdc856799778618d78b6411a85b0c69adf6a4d33489b8025fdca3dce
 SHA512:
-  metadata.gz: e4d14ac64e54e5c7245a9586dfb4899154793ea466f9564a510eb3dfe17a3a7229cf61e408445b38fec37500065b5e1ee725afa634284bea5538abac0766237f
-  data.tar.gz: e8fe3e1639a3f2ed087436610dd1653e775703c1c6cc83f7f52eb7d3fb46db554e7be790bc6bc2ddf18ec4e3c26dddbe1ec72e8f25603db1192e5a111d0f9543
+  metadata.gz: 158410fd769caaf9074eddc1143ddee9256ac5a466a510c32b74d337eba62fab80b676661cbf1673604d236014a5cb4defdd4743e71abb713a659ddea0fe5e8c
+  data.tar.gz: 2e956356a443ff37ad711f6c42f8c4940925bcee4be075b403c78c3f702b487c12790dca9ba7d68a01acaf1c245b2910650b3f938e80cedd1fc2d5af14f7ffa8

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 ## [Unreleased]
+## [0.14.0] - 2024-07-12
+- Removed TokenLength validators
+- Assistant works with a Mistral LLM now
+- Assistant keeps track of tokens used
+- Misc fixes and improvements
 ## [0.13.5] - 2024-07-01
 - Add Milvus#remove_texts() method
 - Langchain::Assistant has a `state` now

data/README.md CHANGED Viewed

@@ -428,25 +428,10 @@ Assistants are Agent-like objects that leverage helpful instructions, LLMs, tool
 ```ruby
 llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
 ```
-2. Instantiate a Thread. Threads keep track of the messages in the Assistant conversation.
-```ruby
-thread = Langchain::Thread.new
-```
-You can pass old message from previously using the Assistant:
-```ruby
-thread.messages = messages
-```
-Messages contain the conversation history and the whole message history is sent to the LLM every time. A Message belongs to 1 of the 4 roles:
-* `Message(role: "system")` message usually contains the instructions.
-* `Message(role: "user")` messages come from the user.
-* `Message(role: "assistant")` messages are produced by the LLM.
-* `Message(role: "tool")` messages are sent in response to tool calls with tool outputs.
-3. Instantiate an Assistant
+2. Instantiate an Assistant
 ```ruby
 assistant = Langchain::Assistant.new(
   llm: llm,
-  thread: thread,
   instructions: "You are a Meteorologist Assistant that is able to pull the weather for any location",
   tools: [
     Langchain::Tool::Weather.new(api_key: ENV["OPEN_WEATHER_API_KEY"])
@@ -482,7 +467,7 @@ assistant.add_message_and_run content: "What about Sacramento, CA?", auto_tool_e
 ### Accessing Thread messages
 You can access the messages in a Thread by calling `assistant.thread.messages`.
 ```ruby
-assistant.thread.messages
+assistant.messages
 ```
 The Assistant checks the context window limits before every request to the LLM and remove oldest thread messages one by one if the context window is exceeded.

data/lib/langchain/assistants/assistant.rb CHANGED Viewed

@@ -16,13 +16,15 @@ module Langchain
     def_delegators :thread, :messages, :messages=
     attr_reader :llm, :thread, :instructions, :state
+    attr_reader :total_prompt_tokens, :total_completion_tokens, :total_tokens
     attr_accessor :tools
     SUPPORTED_LLMS = [
       Langchain::LLM::Anthropic,
-      Langchain::LLM::OpenAI,
       Langchain::LLM::GoogleGemini,
-      Langchain::LLM::GoogleVertexAI
+      Langchain::LLM::GoogleVertexAI,
+      Langchain::LLM::Ollama,
+      Langchain::LLM::OpenAI
     ]
     # Create a new assistant
@@ -40,6 +42,9 @@ module Langchain
       unless SUPPORTED_LLMS.include?(llm.class)
         raise ArgumentError, "Invalid LLM; currently only #{SUPPORTED_LLMS.join(", ")} are supported"
       end
+      if llm.is_a?(Langchain::LLM::Ollama)
+        raise ArgumentError, "Currently only `mistral:7b-instruct-v0.3-fp16` model is supported for Ollama LLM" unless llm.defaults[:completion_model_name] == "mistral:7b-instruct-v0.3-fp16"
+      end
       raise ArgumentError, "Tools must be an array of Langchain::Tool::Base instance(s)" unless tools.is_a?(Array) && tools.all? { |tool| tool.is_a?(Langchain::Tool::Base) }
       @llm = llm
@@ -48,13 +53,15 @@ module Langchain
       @instructions = instructions
       @state = :ready
+      @total_prompt_tokens = 0
+      @total_completion_tokens = 0
+      @total_tokens = 0
       raise ArgumentError, "Thread must be an instance of Langchain::Thread" unless @thread.is_a?(Langchain::Thread)
       # The first message in the thread should be the system instructions
       # TODO: What if the user added old messages and the system instructions are already in there? Should this overwrite the existing instructions?
-      if llm.is_a?(Langchain::LLM::OpenAI)
-        add_message(role: "system", content: instructions) if instructions
-      end
+      initialize_instructions
       # For Google Gemini, and Anthropic system instructions are added to the `system:` param in the `chat` method
     end
@@ -150,7 +157,6 @@ module Langchain
     # Handle the current state and transition to the next state
     #
-    # @param state [Symbol] The current state
     # @return [Symbol] The next state
     def handle_state
       case @state
@@ -189,7 +195,6 @@ module Langchain
     # Handle LLM message scenario
     #
-    # @param auto_tool_execution [Boolean] Flag to indicate if tools should be executed automatically
     # @return [Symbol] The next state
     def handle_llm_message
       thread.messages.last.tool_calls.any? ? :requires_action : :completed
@@ -208,14 +213,29 @@ module Langchain
     # @return [Symbol] The next state
     def handle_user_or_tool_message
       response = chat_with_llm
-      add_message(role: response.role, content: response.chat_completion, tool_calls: response.tool_calls)
+      # With Ollama, we're calling the `llm.complete()` method
+      content = if llm.is_a?(Langchain::LLM::Ollama)
+        response.completion
+      else
+        response.chat_completion
+      end
+      add_message(role: response.role, content: content, tool_calls: response.tool_calls)
+      record_used_tokens(response.prompt_tokens, response.completion_tokens, response.total_tokens)
+      set_state_for(response: response)
+    end
+    def set_state_for(response:)
       if response.tool_calls.any?
         :in_progress
       elsif response.chat_completion
         :completed
+      elsif response.completion # Currently only used by Ollama
+        :completed
       else
-        Langchain.logger.error("LLM response does not contain tool calls or chat completion")
+        Langchain.logger.error("LLM response does not contain tool calls, chat or completion response")
         :failed
       end
     end
@@ -236,6 +256,8 @@ module Langchain
     # @return [String] The tool role
     def determine_tool_role
       case llm
+      when Langchain::LLM::Ollama
+        Langchain::Messages::OllamaMessage::TOOL_ROLE
       when Langchain::LLM::OpenAI
         Langchain::Messages::OpenAIMessage::TOOL_ROLE
       when Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI
@@ -245,31 +267,58 @@ module Langchain
       end
     end
+    def initialize_instructions
+      if llm.is_a?(Langchain::LLM::Ollama)
+        content = String.new # rubocop: disable Performance/UnfreezeString
+        if tools.any?
+          content << %([AVAILABLE_TOOLS] #{tools.map(&:to_openai_tools).flatten}[/AVAILABLE_TOOLS])
+        end
+        if instructions
+          content << "[INST] #{instructions}[/INST]"
+        end
+        add_message(role: "system", content: content)
+      elsif llm.is_a?(Langchain::LLM::OpenAI)
+        add_message(role: "system", content: instructions) if instructions
+      end
+    end
     # Call to the LLM#chat() method
     #
     # @return [Langchain::LLM::BaseResponse] The LLM response object
     def chat_with_llm
       Langchain.logger.info("Sending a call to #{llm.class}", for: self.class)
-      params = {messages: thread.array_of_message_hashes}
+      params = {}
-      if tools.any?
-        if llm.is_a?(Langchain::LLM::OpenAI)
+      if llm.is_a?(Langchain::LLM::OpenAI)
+        if tools.any?
           params[:tools] = tools.map(&:to_openai_tools).flatten
           params[:tool_choice] = "auto"
-        elsif llm.is_a?(Langchain::LLM::Anthropic)
+        end
+      elsif llm.is_a?(Langchain::LLM::Anthropic)
+        if tools.any?
           params[:tools] = tools.map(&:to_anthropic_tools).flatten
-          params[:system] = instructions if instructions
           params[:tool_choice] = {type: "auto"}
-        elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
+        end
+        params[:system] = instructions if instructions
+      elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
+        if tools.any?
           params[:tools] = tools.map(&:to_google_gemini_tools).flatten
           params[:system] = instructions if instructions
           params[:tool_choice] = "auto"
         end
-        # TODO: Not sure that tool_choice should always be "auto"; Maybe we can let the user toggle it.
       end
+      # TODO: Not sure that tool_choice should always be "auto"; Maybe we can let the user toggle it.
-      llm.chat(**params)
+      if llm.is_a?(Langchain::LLM::Ollama)
+        params[:raw] = true
+        params[:prompt] = thread.prompt_of_concatenated_messages
+        llm.complete(**params)
+      else
+        params[:messages] = thread.array_of_message_hashes
+        llm.chat(**params)
+      end
     end
     # Run the tools automatically
@@ -278,7 +327,9 @@ module Langchain
     def run_tools(tool_calls)
       # Iterate over each function invocation and submit tool output
       tool_calls.each do |tool_call|
-        tool_call_id, tool_name, method_name, tool_arguments = if llm.is_a?(Langchain::LLM::OpenAI)
+        tool_call_id, tool_name, method_name, tool_arguments = if llm.is_a?(Langchain::LLM::Ollama)
+          extract_ollama_tool_call(tool_call: tool_call)
+        elsif llm.is_a?(Langchain::LLM::OpenAI)
           extract_openai_tool_call(tool_call: tool_call)
         elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
           extract_google_gemini_tool_call(tool_call: tool_call)
@@ -296,6 +347,12 @@ module Langchain
       end
     end
+    def extract_ollama_tool_call(tool_call:)
+      tool_name, method_name = tool_call.dig("name").split("__")
+      tool_arguments = tool_call.dig("arguments").transform_keys(&:to_sym)
+      [nil, tool_name, method_name, tool_arguments]
+    end
     # Extract the tool call information from the OpenAI tool call hash
     #
     # @param tool_call [Hash] The tool call hash
@@ -346,7 +403,9 @@ module Langchain
     # @param tool_call_id [String] The ID of the tool call to include in the message
     # @return [Langchain::Message] The Message object
     def build_message(role:, content: nil, tool_calls: [], tool_call_id: nil)
-      if llm.is_a?(Langchain::LLM::OpenAI)
+      if llm.is_a?(Langchain::LLM::Ollama)
+        Langchain::Messages::OllamaMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
+      elsif llm.is_a?(Langchain::LLM::OpenAI)
         Langchain::Messages::OpenAIMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
       elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
         Langchain::Messages::GoogleGeminiMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
@@ -355,6 +414,18 @@ module Langchain
       end
     end
+    # Increment the tokens count based on the last interaction with the LLM
+    #
+    # @param prompt_tokens [Integer] The number of used prmopt tokens
+    # @param completion_tokens [Integer] The number of used completion tokens
+    # @param total_tokens [Integer] The total number of used tokens
+    # @return [Integer] The current total tokens count
+    def record_used_tokens(prompt_tokens, completion_tokens, total_tokens_from_operation)
+      @total_prompt_tokens += prompt_tokens if prompt_tokens
+      @total_completion_tokens += completion_tokens if completion_tokens
+      @total_tokens += total_tokens_from_operation if total_tokens_from_operation
+    end
     # TODO: Fix the message truncation when context window is exceeded
   end
 end

data/lib/langchain/assistants/messages/ollama_message.rb ADDED Viewed

@@ -0,0 +1,86 @@
+# frozen_string_literal: true
+module Langchain
+  module Messages
+    class OllamaMessage < Base
+      # OpenAI uses the following roles:
+      ROLES = [
+        "system",
+        "assistant",
+        "user",
+        "tool"
+      ].freeze
+      TOOL_ROLE = "tool"
+      # Initialize a new OpenAI message
+      #
+      # @param [String] The role of the message
+      # @param [String] The content of the message
+      # @param [Array<Hash>] The tool calls made in the message
+      # @param [String] The ID of the tool call
+      def initialize(role:, content: nil, tool_calls: [], tool_call_id: nil)
+        raise ArgumentError, "Role must be one of #{ROLES.join(", ")}" unless ROLES.include?(role)
+        raise ArgumentError, "Tool calls must be an array of hashes" unless tool_calls.is_a?(Array) && tool_calls.all? { |tool_call| tool_call.is_a?(Hash) }
+        @role = role
+        # Some Tools return content as a JSON hence `.to_s`
+        @content = content.to_s
+        @tool_calls = tool_calls
+        @tool_call_id = tool_call_id
+      end
+      def to_s
+        send(:"to_#{role}_message_string")
+      end
+      def to_system_message_string
+        content
+      end
+      def to_user_message_string
+        "[INST] #{content}[/INST]"
+      end
+      def to_tool_message_string
+        "[TOOL_RESULTS] #{content}[/TOOL_RESULTS]"
+      end
+      def to_assistant_message_string
+        if tool_calls.any?
+          %("[TOOL_CALLS] #{tool_calls}")
+        else
+          content
+        end
+      end
+      # Check if the message came from an LLM
+      #
+      # @return [Boolean] true/false whether this message was produced by an LLM
+      def llm?
+        assistant?
+      end
+      # Check if the message came from an LLM
+      #
+      # @return [Boolean] true/false whether this message was produced by an LLM
+      def assistant?
+        role == "assistant"
+      end
+      # Check if the message are system instructions
+      #
+      # @return [Boolean] true/false whether this message are system instructions
+      def system?
+        role == "system"
+      end
+      # Check if the message is a tool call
+      #
+      # @return [Boolean] true/false whether this message is a tool call
+      def tool?
+        role == "tool"
+      end
+    end
+  end
+end

data/lib/langchain/assistants/thread.rb CHANGED Viewed

@@ -17,7 +17,14 @@ module Langchain
     #
     # @return [Array<Hash>] The thread as an OpenAI API-compatible array of hashes
     def array_of_message_hashes
-      messages.map(&:to_hash)
+      messages
+        .map(&:to_hash)
+        .compact
+    end
+    # Only used by the Assistant when it calls the LLM#complete() method
+    def prompt_of_concatenated_messages
+      messages.map(&:to_s).join
     end
     # Add a message to the thread

data/lib/langchain/llm/ai21.rb CHANGED Viewed

@@ -16,8 +16,6 @@ module Langchain::LLM
       model: "j2-ultra"
     }.freeze
-    LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AI21Validator
     def initialize(api_key:, default_options: {})
       depends_on "ai21"
@@ -35,8 +33,6 @@ module Langchain::LLM
     def complete(prompt:, **params)
       parameters = complete_parameters params
-      parameters[:maxTokens] = LENGTH_VALIDATOR.validate_max_tokens!(prompt, parameters[:model], {llm: client})
       response = client.complete(prompt, parameters)
       Langchain::LLM::AI21Response.new response, model: parameters[:model]
     end

data/lib/langchain/llm/anthropic.rb CHANGED Viewed

@@ -5,10 +5,10 @@ module Langchain::LLM
   # Wrapper around Anthropic APIs.
   #
   # Gem requirements:
-  #   gem "anthropic", "~> 0.1.0"
+  #   gem "anthropic", "~> 0.3.0"
   #
   # Usage:
-  #     anthorpic = Langchain::LLM::Anthropic.new(api_key: ENV["ANTHROPIC_API_KEY"])
+  #     anthropic = Langchain::LLM::Anthropic.new(api_key: ENV["ANTHROPIC_API_KEY"])
   #
   class Anthropic < Base
     DEFAULTS = {
@@ -18,9 +18,6 @@ module Langchain::LLM
       max_tokens_to_sample: 256
     }.freeze
-    # TODO: Implement token length validator for Anthropic
-    # LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AnthropicValidator
     # Initialize an Anthropic LLM instance
     #
     # @param api_key [String] The API key to use
@@ -81,7 +78,10 @@ module Langchain::LLM
       parameters[:metadata] = metadata if metadata
       parameters[:stream] = stream if stream
-      response = client.complete(parameters: parameters)
+      response = with_api_error_handling do
+        client.complete(parameters: parameters)
+      end
       Langchain::LLM::AnthropicResponse.new(response)
     end
@@ -114,6 +114,15 @@ module Langchain::LLM
       Langchain::LLM::AnthropicResponse.new(response)
     end
+    def with_api_error_handling
+      response = yield
+      return if response.empty?
+      raise Langchain::LLM::ApiError.new "Anthropic API error: #{response.dig("error", "message")}" if response&.dig("error")
+      response
+    end
     private
     def set_extra_headers!

data/lib/langchain/llm/azure.rb CHANGED Viewed

@@ -42,17 +42,17 @@ module Langchain::LLM
     def embed(...)
       @client = @embed_client
-      super(...)
+      super
     end
     def complete(...)
       @client = @chat_client
-      super(...)
+      super
     end
     def chat(...)
       @client = @chat_client
-      super(...)
+      super
     end
   end
 end

data/lib/langchain/llm/base.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module Langchain::LLM
   # Langchain.rb provides a common interface to interact with all supported LLMs:
   #
   # - {Langchain::LLM::AI21}
+  # - {Langchain::LLM::Anthropic}
   # - {Langchain::LLM::Azure}
   # - {Langchain::LLM::Cohere}
   # - {Langchain::LLM::GooglePalm}

data/lib/langchain/llm/cohere.rb CHANGED Viewed

@@ -74,8 +74,6 @@ module Langchain::LLM
       default_params.merge!(params)
-      default_params[:max_tokens] = Langchain::Utils::TokenLength::CohereValidator.validate_max_tokens!(prompt, default_params[:model], llm: client)
       response = client.generate(**default_params)
       Langchain::LLM::CohereResponse.new response, model: @defaults[:completion_model_name]
     end

data/lib/langchain/llm/google_palm.rb CHANGED Viewed

@@ -18,7 +18,7 @@ module Langchain::LLM
       chat_completion_model_name: "chat-bison-001",
       embeddings_model_name: "embedding-gecko-001"
     }.freeze
-    LENGTH_VALIDATOR = Langchain::Utils::TokenLength::GooglePalmValidator
     ROLE_MAPPING = {
       "assistant" => "ai"
     }
@@ -96,9 +96,6 @@ module Langchain::LLM
         examples: compose_examples(examples)
       }
-      # chat-bison-001 is the only model that currently supports countMessageTokens functions
-      LENGTH_VALIDATOR.validate_max_tokens!(default_params[:messages], "chat-bison-001", llm: self)
       if options[:stop_sequences]
         default_params[:stop] = options.delete(:stop_sequences)
       end

data/lib/langchain/llm/ollama.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module Langchain::LLM
     attr_reader :url, :defaults
     DEFAULTS = {
-      temperature: 0.8,
+      temperature: 0.0,
       completion_model_name: "llama3",
       embeddings_model_name: "llama3",
       chat_completion_model_name: "llama3"

data/lib/langchain/llm/response/google_gemini_response.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Langchain::LLM
   class GoogleGeminiResponse < BaseResponse
     def initialize(raw_response, model: nil)
-      super(raw_response, model: model)
+      super
     end
     def chat_completion

data/lib/langchain/llm/response/ollama_response.rb CHANGED Viewed

@@ -36,7 +36,7 @@ module Langchain::LLM
     end
     def prompt_tokens
-      raw_response.dig("prompt_eval_count") if done?
+      raw_response.fetch("prompt_eval_count", 0) if done?
     end
     def completion_tokens
@@ -47,6 +47,24 @@ module Langchain::LLM
       prompt_tokens + completion_tokens if done?
     end
+    def tool_calls
+      if chat_completion && (parsed_tool_calls = JSON.parse(chat_completion))
+        [parsed_tool_calls]
+      elsif completion&.include?("[TOOL_CALLS]") && (
+        parsed_tool_calls = JSON.parse(
+          completion
+            # Slice out the serialize JSON
+            .slice(/\{.*\}/)
+            # Replace hash rocket with colon
+            .gsub("=>", ":")
+        )
+      )
+        [parsed_tool_calls]
+      else
+        []
+      end
+    end
     private
     def done?

data/lib/langchain/vectorsearch/milvus.rb CHANGED Viewed

@@ -140,7 +140,7 @@ module Langchain::Vectorsearch
       client.search(
         collection_name: index_name,
-        output_fields: ["id", "content", "vectors"],
+        output_fields: ["id", "content"], # Add "vectors" if need to have full vectors returned.
         top_k: k.to_s,
         vectors: [embedding],
         dsl_type: 1,

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.13.5"
+  VERSION = "0.14.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.13.5
+  version: 0.14.0
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-07-01 00:00:00.000000000 Z
+date: 2024-07-12 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -212,14 +212,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.2'
+        version: '0.3'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.2'
+        version: '0.3'
 - !ruby/object:Gem::Dependency
   name: aws-sdk-bedrockruntime
   requirement: !ruby/object:Gem::Requirement
@@ -682,20 +682,6 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 0.1.0
-- !ruby/object:Gem::Dependency
-  name: tiktoken_ruby
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: 0.0.9
-  type: :development
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: 0.0.9
 description: Build LLM-backed Ruby applications with Ruby's Langchain.rb
 email:
 - andrei.bondarev13@gmail.com
@@ -711,6 +697,7 @@ files:
 - lib/langchain/assistants/messages/anthropic_message.rb
 - lib/langchain/assistants/messages/base.rb
 - lib/langchain/assistants/messages/google_gemini_message.rb
+- lib/langchain/assistants/messages/ollama_message.rb
 - lib/langchain/assistants/messages/openai_message.rb
 - lib/langchain/assistants/thread.rb
 - lib/langchain/chunk.rb
@@ -810,12 +797,6 @@ files:
 - lib/langchain/tool/wikipedia/wikipedia.rb
 - lib/langchain/utils/cosine_similarity.rb
 - lib/langchain/utils/hash_transformer.rb
-- lib/langchain/utils/token_length/ai21_validator.rb
-- lib/langchain/utils/token_length/base_validator.rb
-- lib/langchain/utils/token_length/cohere_validator.rb
-- lib/langchain/utils/token_length/google_palm_validator.rb
-- lib/langchain/utils/token_length/openai_validator.rb
-- lib/langchain/utils/token_length/token_limit_exceeded.rb
 - lib/langchain/vectorsearch/base.rb
 - lib/langchain/vectorsearch/chroma.rb
 - lib/langchain/vectorsearch/elasticsearch.rb

data/lib/langchain/utils/token_length/ai21_validator.rb DELETED Viewed

@@ -1,41 +0,0 @@
-# frozen_string_literal: true
-module Langchain
-  module Utils
-    module TokenLength
-      #
-      # This class is meant to validate the length of the text passed in to AI21's API.
-      # It is used to validate the token length before the API call is made
-      #
-      class AI21Validator < BaseValidator
-        TOKEN_LIMITS = {
-          "j2-ultra" => 8192,
-          "j2-mid" => 8192,
-          "j2-light" => 8192
-        }.freeze
-        #
-        # Calculate token length for a given text and model name
-        #
-        # @param text [String] The text to calculate the token length for
-        # @param model_name [String] The model name to validate against
-        # @return [Integer] The token length of the text
-        #
-        def self.token_length(text, model_name, options = {})
-          res = options[:llm].tokenize(text)
-          res.dig(:tokens).length
-        end
-        def self.token_limit(model_name)
-          TOKEN_LIMITS[model_name]
-        end
-        singleton_class.alias_method :completion_token_limit, :token_limit
-        def self.token_length_from_messages(messages, model_name, options)
-          messages.sum { |message| token_length(message.to_json, model_name, options) }
-        end
-      end
-    end
-  end
-end

data/lib/langchain/utils/token_length/base_validator.rb DELETED Viewed

@@ -1,42 +0,0 @@
-# frozen_string_literal: true
-module Langchain
-  module Utils
-    module TokenLength
-      #
-      # Calculate the `max_tokens:` parameter to be set by calculating the context length of the text minus the prompt length
-      #
-      # @param content [String | Array<String>] The text or array of texts to validate
-      # @param model_name [String] The model name to validate against
-      # @return [Integer] Whether the text is valid or not
-      # @raise [TokenLimitExceeded] If the text is too long
-      #
-      class BaseValidator
-        def self.validate_max_tokens!(content, model_name, options = {})
-          text_token_length = if content.is_a?(Array)
-            token_length_from_messages(content, model_name, options)
-          else
-            token_length(content, model_name, options)
-          end
-          leftover_tokens = token_limit(model_name) - text_token_length
-          # Some models have a separate token limit for completions (e.g. GPT-4 Turbo)
-          # We want the lower of the two limits
-          max_tokens = [leftover_tokens, completion_token_limit(model_name)].min
-          # Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
-          if max_tokens < 0
-            raise limit_exceeded_exception(token_limit(model_name), text_token_length)
-          end
-          max_tokens
-        end
-        def self.limit_exceeded_exception(limit, length)
-          TokenLimitExceeded.new("This model's maximum context length is #{limit} tokens, but the given text is #{length} tokens long.", length - limit)
-        end
-      end
-    end
-  end
-end

data/lib/langchain/utils/token_length/cohere_validator.rb DELETED Viewed

@@ -1,49 +0,0 @@
-# frozen_string_literal: true
-module Langchain
-  module Utils
-    module TokenLength
-      #
-      # This class is meant to validate the length of the text passed in to Cohere's API.
-      # It is used to validate the token length before the API call is made
-      #
-      class CohereValidator < BaseValidator
-        TOKEN_LIMITS = {
-          # Source:
-          # https://docs.cohere.com/docs/models
-          "command-light" => 4096,
-          "command" => 4096,
-          "base-light" => 2048,
-          "base" => 2048,
-          "embed-english-light-v2.0" => 512,
-          "embed-english-v2.0" => 512,
-          "embed-multilingual-v2.0" => 256,
-          "summarize-medium" => 2048,
-          "summarize-xlarge" => 2048
-        }.freeze
-        #
-        # Calculate token length for a given text and model name
-        #
-        # @param text [String] The text to calculate the token length for
-        # @param model_name [String] The model name to validate against
-        # @return [Integer] The token length of the text
-        #
-        def self.token_length(text, model_name, options = {})
-          res = options[:llm].tokenize(text: text)
-          res["tokens"].length
-        end
-        def self.token_limit(model_name)
-          TOKEN_LIMITS[model_name]
-        end
-        singleton_class.alias_method :completion_token_limit, :token_limit
-        def self.token_length_from_messages(messages, model_name, options)
-          messages.sum { |message| token_length(message.to_json, model_name, options) }
-        end
-      end
-    end
-  end
-end

data/lib/langchain/utils/token_length/google_palm_validator.rb DELETED Viewed

@@ -1,57 +0,0 @@
-# frozen_string_literal: true
-module Langchain
-  module Utils
-    module TokenLength
-      #
-      # This class is meant to validate the length of the text passed in to Google Palm's API.
-      # It is used to validate the token length before the API call is made
-      #
-      class GooglePalmValidator < BaseValidator
-        TOKEN_LIMITS = {
-          # Source:
-          # This data can be pulled when `list_models()` method is called: https://github.com/andreibondarev/google_palm_api#usage
-          # chat-bison-001 is the only model that currently supports countMessageTokens functions
-          "chat-bison-001" => {
-            "input_token_limit" => 4000, # 4096 is the limit but the countMessageTokens does not return anything higher than 4000
-            "output_token_limit" => 1024
-          }
-          # "text-bison-001" => {
-          #   "input_token_limit" => 8196,
-          #   "output_token_limit" => 1024
-          # },
-          # "embedding-gecko-001" => {
-          #   "input_token_limit" => 1024
-          # }
-        }.freeze
-        #
-        # Calculate token length for a given text and model name
-        #
-        # @param text [String] The text to calculate the token length for
-        # @param model_name [String] The model name to validate against
-        # @param options [Hash] the options to create a message with
-        # @option options [Langchain::LLM:GooglePalm] :llm The Langchain::LLM:GooglePalm instance
-        # @return [Integer] The token length of the text
-        #
-        def self.token_length(text, model_name = "chat-bison-001", options = {})
-          response = options[:llm].client.count_message_tokens(model: model_name, prompt: text)
-          raise Langchain::LLM::ApiError.new(response["error"]["message"]) unless response["error"].nil?
-          response.dig("tokenCount")
-        end
-        def self.token_length_from_messages(messages, model_name, options = {})
-          messages.sum { |message| token_length(message.to_json, model_name, options) }
-        end
-        def self.token_limit(model_name)
-          TOKEN_LIMITS.dig(model_name, "input_token_limit")
-        end
-        singleton_class.alias_method :completion_token_limit, :token_limit
-      end
-    end
-  end
-end

data/lib/langchain/utils/token_length/openai_validator.rb DELETED Viewed

@@ -1,138 +0,0 @@
-# frozen_string_literal: true
-require "tiktoken_ruby"
-module Langchain
-  module Utils
-    module TokenLength
-      #
-      # This class is meant to validate the length of the text passed in to OpenAI's API.
-      # It is used to validate the token length before the API call is made
-      #
-      class OpenAIValidator < BaseValidator
-        COMPLETION_TOKEN_LIMITS = {
-          # GPT-4 Turbo has a separate token limit for completion
-          # Source:
-          # https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
-          "gpt-4-1106-preview" => 4096,
-          "gpt-4-vision-preview" => 4096,
-          "gpt-3.5-turbo-1106" => 4096
-        }
-        # NOTE: The gpt-4-turbo-preview is an alias that will always point to the latest GPT 4 Turbo preview
-        #   the future previews may have a different token limit!
-        TOKEN_LIMITS = {
-          # Source:
-          # https://platform.openai.com/docs/api-reference/embeddings
-          # https://platform.openai.com/docs/models/gpt-4
-          "text-embedding-3-large" => 8191,
-          "text-embedding-3-small" => 8191,
-          "text-embedding-ada-002" => 8191,
-          "gpt-3.5-turbo" => 16385,
-          "gpt-3.5-turbo-0301" => 4096,
-          "gpt-3.5-turbo-0613" => 4096,
-          "gpt-3.5-turbo-1106" => 16385,
-          "gpt-3.5-turbo-0125" => 16385,
-          "gpt-3.5-turbo-16k" => 16384,
-          "gpt-3.5-turbo-16k-0613" => 16384,
-          "text-davinci-003" => 4097,
-          "text-davinci-002" => 4097,
-          "code-davinci-002" => 8001,
-          "gpt-4" => 8192,
-          "gpt-4-0314" => 8192,
-          "gpt-4-0613" => 8192,
-          "gpt-4-32k" => 32768,
-          "gpt-4-32k-0314" => 32768,
-          "gpt-4-32k-0613" => 32768,
-          "gpt-4-1106-preview" => 128000,
-          "gpt-4-turbo" => 128000,
-          "gpt-4-turbo-2024-04-09" => 128000,
-          "gpt-4-turbo-preview" => 128000,
-          "gpt-4-0125-preview" => 128000,
-          "gpt-4-vision-preview" => 128000,
-          "gpt-4o" => 128000,
-          "gpt-4o-2024-05-13" => 128000,
-          "text-curie-001" => 2049,
-          "text-babbage-001" => 2049,
-          "text-ada-001" => 2049,
-          "davinci" => 2049,
-          "curie" => 2049,
-          "babbage" => 2049,
-          "ada" => 2049
-        }.freeze
-        #
-        # Calculate token length for a given text and model name
-        #
-        # @param text [String] The text to calculate the token length for
-        # @param model_name [String] The model name to validate against
-        # @return [Integer] The token length of the text
-        #
-        def self.token_length(text, model_name, options = {})
-          # tiktoken-ruby doesn't support text-embedding-3-large or text-embedding-3-small yet
-          if ["text-embedding-3-large", "text-embedding-3-small"].include?(model_name)
-            model_name = "text-embedding-ada-002"
-          end
-          encoder = Tiktoken.encoding_for_model(model_name)
-          encoder.encode(text).length
-        end
-        def self.token_limit(model_name)
-          TOKEN_LIMITS[model_name]
-        end
-        def self.completion_token_limit(model_name)
-          COMPLETION_TOKEN_LIMITS[model_name] || token_limit(model_name)
-        end
-        # If :max_tokens is passed in, take the lower of it and the calculated max_tokens
-        def self.validate_max_tokens!(content, model_name, options = {})
-          max_tokens = super(content, model_name, options)
-          [options[:max_tokens], max_tokens].reject(&:nil?).min
-        end
-        # Copied from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
-        # Return the number of tokens used by a list of messages
-        #
-        # @param messages [Array<Hash>] The messages to calculate the token length for
-        # @param model [String] The model name to validate against
-        # @return [Integer] The token length of the messages
-        #
-        def self.token_length_from_messages(messages, model_name, options = {})
-          encoding = Tiktoken.encoding_for_model(model_name)
-          if ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-4-0314", "gpt-4-32k-0314", "gpt-4-0613", "gpt-4-32k-0613"].include?(model_name)
-            tokens_per_message = 3
-            tokens_per_name = 1
-          elsif model_name == "gpt-3.5-turbo-0301"
-            tokens_per_message = 4  # every message follows {role/name}\n{content}\n
-            tokens_per_name = -1  # if there's a name, the role is omitted
-          elsif model_name.include?("gpt-3.5-turbo")
-            # puts "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613."
-            return token_length_from_messages(messages, "gpt-3.5-turbo-0613", options)
-          elsif model_name.include?("gpt-4")
-            # puts "Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613."
-            return token_length_from_messages(messages, "gpt-4-0613", options)
-          else
-            raise NotImplementedError.new(
-              "token_length_from_messages() is not implemented for model #{model_name}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."
-            )
-          end
-          num_tokens = 0
-          messages.each do |message|
-            num_tokens += tokens_per_message
-            message.each do |key, value|
-              num_tokens += encoding.encode(value).length
-              num_tokens += tokens_per_name if ["name", :name].include?(key)
-            end
-          end
-          num_tokens += 3  # every reply is primed with assistant
-          num_tokens
-        end
-      end
-    end
-  end
-end

data/lib/langchain/utils/token_length/token_limit_exceeded.rb DELETED Viewed

@@ -1,17 +0,0 @@
-# frozen_string_literal: true
-module Langchain
-  module Utils
-    module TokenLength
-      class TokenLimitExceeded < StandardError
-        attr_reader :token_overflow
-        def initialize(message = "", token_overflow = 0)
-          super(message)
-          @token_overflow = token_overflow
-        end
-      end
-    end
-  end
-end