RubyGems - langchainrb - Versions diffs - 0.6.3 → 0.6.4 - Mend

langchainrb 0.6.3 → 0.6.4

Files changed (21) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/Gemfile.lock +1 -1
data/examples/conversation_with_openai.rb +52 -0
data/lib/langchain/conversation.rb +28 -36
data/lib/langchain/conversation_memory.rb +88 -0
data/lib/langchain/llm/base.rb +2 -0
data/lib/langchain/llm/cohere.rb +1 -1
data/lib/langchain/llm/openai.rb +1 -1
data/lib/langchain/loader.rb +31 -8
data/lib/langchain/utils/token_length/google_palm_validator.rb +3 -0
data/lib/langchain/vectorsearch/base.rb +5 -0
data/lib/langchain/vectorsearch/chroma.rb +11 -4
data/lib/langchain/vectorsearch/hnswlib.rb +1 -2
data/lib/langchain/vectorsearch/milvus.rb +11 -1
data/lib/langchain/vectorsearch/pinecone.rb +6 -0
data/lib/langchain/vectorsearch/qdrant.rb +10 -3
data/lib/langchain/vectorsearch/weaviate.rb +6 -0
data/lib/langchain/version.rb +1 -1
data/lib/langchain.rb +1 -0
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 73f980d6a7dd67d0112038a8266a05f8b5697e05c98e61a94598d38406de7c8b
-  data.tar.gz: 8abc93ad6da8ad05d76ac35eff9aaab963c33549acb94bda4dd83daddeb71f4d
+  metadata.gz: 78810f63a496c6b98208a9c838cbdae41a8c944879e68f16fc4362de90c49110
+  data.tar.gz: c95d357da62c8120a2a105a94b219ca1f3552f85fff30bb7cb3d40def336baeb
 SHA512:
-  metadata.gz: 7b5450e51ee732a1e2414e3db5f8a46d113d0b537b561f95556756e2854c9bb9175c898388acc2bb8672b2479e647625d3166580b7b1b25eb6cdc86ff6d42aee
-  data.tar.gz: b5843004533f952782946e6a753aa5306c6ad4a5f97887416f8f10f4192ca1f88d00d30624cd62581022314649e5d291d9c1ab46f2bab31f9455860fc533c83d
+  metadata.gz: ee0c549ecebd98ce940b6dc05c8aa2783c265d7cb3903ca30448be0f906e89f353e419b2bb862178fe9081baa002b42fd7aaf88ec244a63beec9bc862e3a9410
+  data.tar.gz: a4b67c5b0d268d6b96622209fe3201c8585bf44d1d44dca0bc061de3f1ba1797e87df61111ddc6565b0d75b23a06677aa3bad6e41fbd4a119ff69f6b11e756ee

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 ## [Unreleased]
+## [0.6.4] - 2023-07-01
+- Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
+- Introduce `ConversationMemory`
+- Allow loading multiple files from a directory
+- Add `get_default_schema()`, `create_default_schema()`, `destroy_default_schema()` missing methods to `Langchain::Vectorsearch::*` classes
 ## [0.6.3] - 2023-06-25
 - Add #destroy_default_schema() to Langchain::Vectorsearch::* classes

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.6.3)
+    langchainrb (0.6.4)
       baran (~> 0.1.6)
       colorize (~> 0.8.1)
       json-schema (~> 4.0.0)

data/examples/conversation_with_openai.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require "langchain"
+require "reline"
+# gem install reline
+# or add `gem "reline"` to your Gemfile
+openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
+chat = Langchain::Conversation.new(llm: openai)
+chat.set_context("You are a chatbot from the future")
+DONE = %w[done end eof exit].freeze
+puts "Welcome to the chatbot from the future!"
+def prompt_for_message
+  puts "(multiline input; type 'end' on its own line when done. or exit to exit)"
+  user_message = Reline.readmultiline("Question: ", true) do |multiline_input|
+    last = multiline_input.split.last
+    DONE.include?(last)
+  end
+  return :noop unless user_message
+  lines = user_message.split("\n")
+  if lines.size > 1 && DONE.include?(lines.last)
+    # remove the "done" from the message
+    user_message = lines[0..-2].join("\n")
+  end
+  return :exit if DONE.include?(user_message.downcase)
+  user_message
+end
+begin
+  loop do
+    user_message = prompt_for_message
+    case user_message
+    when :noop
+      next
+    when :exit
+      break
+    end
+    puts chat.message(user_message)
+  end
+rescue Interrupt
+  exit 0
+end

data/lib/langchain/conversation.rb CHANGED Viewed

@@ -17,10 +17,7 @@ module Langchain
   #     end
   #
   class Conversation
-    attr_reader :context, :examples, :messages
-    # The least number of tokens we want to be under the limit by
-    TOKEN_LEEWAY = 20
+    attr_reader :options
     # Intialize Conversation with a LLM
     #
@@ -31,7 +28,11 @@ module Langchain
       @llm = llm
       @context = nil
       @examples = []
-      @messages = options.delete(:messages) || []
+      @memory = ConversationMemory.new(
+        llm: llm,
+        messages: options.delete(:messages) || [],
+        strategy: options.delete(:memory_strategy)
+      )
       @options = options
       @block = block
     end
@@ -39,59 +40,50 @@ module Langchain
     # Set the context of the conversation. Usually used to set the model's persona.
     # @param message [String] The context of the conversation
     def set_context(message)
-      @context = message
+      @memory.set_context message
     end
     # Add examples to the conversation. Used to give the model a sense of the conversation.
     # @param examples [Array<Hash>] The examples to add to the conversation
     def add_examples(examples)
-      @examples.concat examples
+      @memory.add_examples examples
     end
     # Message the model with a prompt and return the response.
     # @param message [String] The prompt to message the model with
     # @return [String] The response from the model
     def message(message)
-      append_user_message(message)
+      @memory.append_user_message(message)
       response = llm_response(message)
-      append_ai_message(response)
+      @memory.append_ai_message(response)
       response
     end
-    private
-    def llm_response(prompt)
-      @llm.chat(messages: @messages, context: @context, examples: @examples, **@options, &@block)
-    rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
-      raise exception if @messages.size == 1
-      reduce_messages(exception.token_overflow)
-      retry
+    # Messages from conversation memory
+    # @return [Array<Hash>] The messages from the conversation memory
+    def messages
+      @memory.messages
     end
-    def reduce_messages(token_overflow)
-      @messages = @messages.drop_while do |message|
-        proceed = token_overflow > -TOKEN_LEEWAY
-        token_overflow -= token_length(message.to_json, model_name, llm: @llm)
-        proceed
-      end
+    # Context from conversation memory
+    # @return [String] Context from conversation memory
+    def context
+      @memory.context
     end
-    def append_ai_message(message)
-      @messages << {role: "ai", content: message}
+    # Examples from conversation memory
+    # @return [Array<Hash>] Examples from the conversation memory
+    def examples
+      @memory.examples
     end
-    def append_user_message(message)
-      @messages << {role: "user", content: message}
-    end
-    def model_name
-      @options[:model] || @llm.class::DEFAULTS[:chat_completion_model_name]
-    end
+    private
-    def token_length(content, model_name, options)
-      @llm.class::LENGTH_VALIDATOR.token_length(content, model_name, options)
+    def llm_response(prompt)
+      @llm.chat(messages: @memory.messages, context: @memory.context, examples: @memory.examples, **@options, &@block)
+    rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
+      @memory.reduce_messages(exception)
+      retry
     end
   end
 end

data/lib/langchain/conversation_memory.rb ADDED Viewed

@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+module Langchain
+  class ConversationMemory
+    attr_reader :examples, :messages
+    # The least number of tokens we want to be under the limit by
+    TOKEN_LEEWAY = 20
+    def initialize(llm:, messages: [], **options)
+      @llm = llm
+      @context = nil
+      @summary = nil
+      @examples = []
+      @messages = messages
+      @strategy = options.delete(:strategy) || :truncate
+      @options = options
+    end
+    def set_context(message)
+      @context = message
+    end
+    def add_examples(examples)
+      @examples.concat examples
+    end
+    def append_ai_message(message)
+      @messages << {role: "ai", content: message}
+    end
+    def append_user_message(message)
+      @messages << {role: "user", content: message}
+    end
+    def reduce_messages(exception)
+      case @strategy
+      when :truncate
+        truncate_messages(exception)
+      when :summarize
+        summarize_messages
+      else
+        raise "Unknown strategy: #{@options[:strategy]}"
+      end
+    end
+    def context
+      return if @context.nil? && @summary.nil?
+      [@context, @summary].compact.join("\n")
+    end
+    private
+    def truncate_messages(exception)
+      raise exception if @messages.size == 1
+      token_overflow = exception.token_overflow
+      @messages = @messages.drop_while do |message|
+        proceed = token_overflow > -TOKEN_LEEWAY
+        token_overflow -= token_length(message.to_json, model_name, llm: @llm)
+        proceed
+      end
+    end
+    def summarize_messages
+      history = [@summary, @messages.to_json].compact.join("\n")
+      partitions = [history[0, history.size / 2], history[history.size / 2, history.size]]
+      @summary = partitions.map { |messages| @llm.summarize(text: messages.to_json) }.join("\n")
+      @messages = [@messages.last]
+    end
+    def partition_messages
+    end
+    def model_name
+      @llm.class::DEFAULTS[:chat_completion_model_name]
+    end
+    def token_length(content, model_name, options)
+      @llm.class::LENGTH_VALIDATOR.token_length(content, model_name, options)
+    end
+  end
+end

data/lib/langchain/llm/base.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
 module Langchain::LLM
+  class ApiError < StandardError; end
   # A LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
   #
   # Langchain.rb provides a common interface to interact with all supported LLMs:

data/lib/langchain/llm/cohere.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Langchain::LLM
   # Wrapper around the Cohere API.
   #
   # Gem requirements:
-  #     gem "cohere-ruby", "~> 0.9.4"
+  #     gem "cohere-ruby", "~> 0.9.5"
   #
   # Usage:
   #     cohere = Langchain::LLM::Cohere.new(api_key: "YOUR_API_KEY")

data/lib/langchain/llm/openai.rb CHANGED Viewed

@@ -125,7 +125,7 @@ module Langchain::LLM
       response = client.chat(parameters: parameters)
-      raise "Chat completion failed: #{response}" if !response.empty? && response.dig("error")
+      raise Langchain::LLM::ApiError.new "Chat completion failed: #{response.dig("error", "message")}" if !response.empty? && response.dig("error")
       unless streaming
         response.dig("choices", 0, "message", "content")

data/lib/langchain/loader.rb CHANGED Viewed

@@ -51,6 +51,13 @@ module Langchain
       !!(@path =~ URI_REGEX)
     end
+    # Is the path a directory
+    #
+    # @return [Boolean] true if path is a directory
+    def directory?
+      File.directory?(@path)
+    end
     # Load data from a file or URL
     #
     #    loader = Langchain::Loader.new("README.md")
@@ -69,15 +76,10 @@ module Langchain
     #
     # @return [Data] data that was loaded
     def load(&block)
-      @raw_data = url? ? load_from_url : load_from_path
+      return process_data(load_from_url, &block) if url?
+      return load_from_directory(&block) if directory?
-      data = if block
-        yield @raw_data.read, @options
-      else
-        processor_klass.new(@options).parse(@raw_data)
-      end
-      Langchain::Data.new(data, source: @path)
+      process_data(load_from_path, &block)
     end
     private
@@ -92,6 +94,27 @@ module Langchain
       File.open(@path)
     end
+    def load_from_directory(&block)
+      Dir.glob(File.join(@path, "**/*")).map do |file|
+        # Only load and add to result files with supported extensions
+        Langchain::Loader.new(file, @options).load(&block)
+      rescue
+        UnknownFormatError nil
+      end.flatten.compact
+    end
+    def process_data(data, &block)
+      @raw_data = data
+      result = if block
+        yield @raw_data.read, @options
+      else
+        processor_klass.new(@options).parse(@raw_data)
+      end
+      Langchain::Data.new(result)
+    end
     def processor_klass
       raise UnknownFormatError unless (kind = find_processor)

data/lib/langchain/utils/token_length/google_palm_validator.rb CHANGED Viewed

@@ -37,6 +37,9 @@ module Langchain
         #
         def self.token_length(text, model_name = "chat-bison-001", options)
           response = options[:llm].client.count_message_tokens(model: model_name, prompt: text)
+          raise Langchain::LLM::ApiError.new(response["error"]["message"]) unless response["error"].nil?
           response.dig("tokenCount")
         end

data/lib/langchain/vectorsearch/base.rb CHANGED Viewed

@@ -98,6 +98,11 @@ module Langchain::Vectorsearch
       @llm = llm
     end
+    # Method supported by Vectorsearch DB to retrieve a default schema
+    def get_default_schema
+      raise NotImplementedError, "#{self.class.name} does not support retrieving a default schema"
+    end
     # Method supported by Vectorsearch DB to create a default schema
     def create_default_schema
       raise NotImplementedError, "#{self.class.name} does not support creating a default schema"

data/lib/langchain/vectorsearch/chroma.rb CHANGED Viewed

@@ -67,10 +67,17 @@ module Langchain::Vectorsearch
       ::Chroma::Resources::Collection.create(index_name)
     end
-    # TODO: Uncomment and add the spec
-    # def destroy_default_schema
-    #   ::Chroma::Resources::Collection.delete(index_name)
-    # end
+    # Get the default schema
+    # @return [Hash] The response from the server
+    def get_default_schema
+      ::Chroma::Resources::Collection.get(index_name)
+    end
+    # Delete the default schema
+    # @return [Hash] The response from the server
+    def destroy_default_schema
+      ::Chroma::Resources::Collection.delete(index_name)
+    end
     # Search for similar texts
     # @param query [String] The text to search for

data/lib/langchain/vectorsearch/hnswlib.rb CHANGED Viewed

@@ -10,8 +10,7 @@ module Langchain::Vectorsearch
     #     gem "hnswlib", "~> 0.8.1"
     #
     # Usage:
-    #     hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:, url:, index_name:)
-    #
+    #     hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:, path_to_index:)
     attr_reader :client, :path_to_index

data/lib/langchain/vectorsearch/milvus.rb CHANGED Viewed

@@ -79,7 +79,17 @@ module Langchain::Vectorsearch
       )
     end
-    # TODO: Add destroy_default_schema method
+    # Get the default schema
+    # @return [Hash] The response from the server
+    def get_default_schema
+      client.collections.get(collection_name: index_name)
+    end
+    # Delete default schema
+    # @return [Hash] The response from the server
+    def destroy_default_schema
+      client.collections.delete(collection_name: index_name)
+    end
     def similarity_search(query:, k: 4)
       embedding = llm.embed(text: query)

data/lib/langchain/vectorsearch/pinecone.rb CHANGED Viewed

@@ -85,6 +85,12 @@ module Langchain::Vectorsearch
       client.delete_index(index_name)
     end
+    # Get the default schema
+    # @return [Pinecone::Vector] The default schema
+    def get_default_schema
+      index
+    end
     # Search for similar texts
     # @param query [String] The text to search for
     # @param k [Integer] The number of results to return

data/lib/langchain/vectorsearch/qdrant.rb CHANGED Viewed

@@ -32,11 +32,12 @@ module Langchain::Vectorsearch
     # Add a list of texts to the index
     # @param texts [Array] The list of texts to add
     # @return [Hash] The response from the server
-    def add_texts(texts:, ids:)
+    def add_texts(texts:, ids: [])
       batch = {ids: [], vectors: [], payloads: []}
       Array(texts).each_with_index do |text, i|
-        batch[:ids].push(ids[i] || SecureRandom.uuid)
+        id = ids[i] || SecureRandom.uuid
+        batch[:ids].push(id)
         batch[:vectors].push(llm.embed(text: text))
         batch[:payloads].push({content: text})
       end
@@ -51,6 +52,12 @@ module Langchain::Vectorsearch
       add_texts(texts: texts, ids: ids)
     end
+    # Get the default schema
+    # @return [Hash] The response from the server
+    def get_default_schema
+      client.collections.get(collection_name: index_name)
+    end
     # Deletes the default schema
     # @return [Hash] The response from the server
     def destroy_default_schema
@@ -109,7 +116,7 @@ module Langchain::Vectorsearch
     def ask(question:)
       search_results = similarity_search(query: question)
-      context = search_results.dig("result").map do |result|
+      context = search_results.map do |result|
         result.dig("payload").to_s
       end
       context = context.join("\n---\n")

data/lib/langchain/vectorsearch/weaviate.rb CHANGED Viewed

@@ -85,6 +85,12 @@ module Langchain::Vectorsearch
       )
     end
+    # Get default schema
+    # @return [Hash] The response from the server
+    def get_default_schema
+      client.schema.get(class_name: index_name)
+    end
     # Delete the index
     # @return [Boolean] Whether the index was deleted
     def destroy_default_schema

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.6.3"
+  VERSION = "0.6.4"
 end

data/lib/langchain.rb CHANGED Viewed

@@ -51,6 +51,7 @@ module Langchain
   autoload :Loader, "langchain/loader"
   autoload :Data, "langchain/data"
   autoload :Conversation, "langchain/conversation"
+  autoload :ConversationMemory, "langchain/conversation_memory"
   autoload :DependencyHelper, "langchain/dependency_helper"
   autoload :ContextualLogger, "langchain/contextual_logger"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.6.3
+  version: 0.6.4
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-06-26 00:00:00.000000000 Z
+date: 2023-07-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -474,6 +474,7 @@ files:
 - LICENSE.txt
 - README.md
 - Rakefile
+- examples/conversation_with_openai.rb
 - examples/create_and_manage_few_shot_prompt_templates.rb
 - examples/create_and_manage_prompt_templates.rb
 - examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
@@ -494,6 +495,7 @@ files:
 - lib/langchain/chunker/text.rb
 - lib/langchain/contextual_logger.rb
 - lib/langchain/conversation.rb
+- lib/langchain/conversation_memory.rb
 - lib/langchain/data.rb
 - lib/langchain/dependency_helper.rb
 - lib/langchain/llm/ai21.rb