RubyGems - langchainrb - Versions diffs - 0.6.0 → 0.6.2 - Mend

langchainrb 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/Gemfile.lock +4 -4
data/README.md +5 -5
data/lib/langchain/active_record/hooks.rb +98 -0
data/lib/langchain/railtie.rb +11 -0
data/lib/langchain/vectorsearch/base.rb +5 -0
data/lib/langchain/vectorsearch/chroma.rb +17 -4
data/lib/langchain/vectorsearch/pinecone.rb +28 -4
data/lib/langchain/vectorsearch/qdrant.rb +11 -5
data/lib/langchain/vectorsearch/weaviate.rb +60 -18
data/lib/langchain/version.rb +1 -1
data/lib/langchain.rb +6 -0
metadata +6 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 7fba7b5e03ae75aa4ee3c89dd0322a73bbb9d3ced79f48dda8861af1f4f197b8
-  data.tar.gz: 404e742b0911305beec3bd22575740fc78ed9005e21295e0f7c348c1bede3e7e
+  metadata.gz: 3df4368be6da730348143ba599d9fba642277a644a6dff9b9dc1b81014d29a8b
+  data.tar.gz: 964c65411f2fe7d1768a65a1700eeb573f56946a60b8cfd04b43fade8faf75d1
 SHA512:
-  metadata.gz: c8166375c28abe9bc3a7e02a2ceba2ce1bea8ff4f751beb2c69d6f34aa46ba2b7c6ca34458f79b6dfba6d8908cc5e9b055f4e4e7dc6c4c09ac65f7f589c12eb7
-  data.tar.gz: fb81a51867575c5fae10b79f0f3ee761b25b75d623f2c071d000b39f84c672d9f7164e3c8442a1f11d7abdff8edd41f7e79c2c3a7cb99fa9952406bc60506ce8
+  metadata.gz: 1b032b337b109a953c49a5eafe4ad4161818bd02a2bd0076f7922aad438f4eb79029338dcb1c5d1f267b9f6ba1e0a35e7ba6fabe1369ae9d02b45240837012c8
+  data.tar.gz: e6d03a7bf10aa60ffc833ecda448e75c84edaaf9febece3ec06385857f340f19efd674dfd2f43a7d08c1a6698a13880a0e54dd90b740ce438a4658b08eae37d7

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 ## [Unreleased]
+## [0.6.2] - 2023-06-25
+- Qdrant, Chroma, and Pinecone are supported by ActiveRecord hooks
+## [0.6.1] - 2023-06-24
+- Adding support to hook vectorsearch into ActiveRecord models
 ## [0.6.0] - 2023-06-22
 - [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
 - Implement A21 token validator

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.6.0)
+    langchainrb (0.6.2)
       baran (~> 0.1.6)
       colorize (~> 0.8.1)
       json-schema (~> 4.0.0)
@@ -133,7 +133,7 @@ GEM
       faraday (>= 1.0)
       faraday_middleware
       graphql-client
-    graphql (2.0.21)
+    graphql (2.0.23)
     graphql-client (0.18.0)
       activesupport (>= 3.0)
       graphql
@@ -298,7 +298,7 @@ GEM
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
     unicode-display_width (2.4.2)
-    weaviate-ruby (0.8.1)
+    weaviate-ruby (0.8.3)
       faraday (~> 1)
       faraday_middleware (~> 1)
       graphlient (~> 0.6.0)
@@ -346,7 +346,7 @@ DEPENDENCIES
   safe_ruby (~> 1.0.4)
   sequel (~> 5.68.0)
   standardrb
-  weaviate-ruby (~> 0.8.0)
+  weaviate-ruby (~> 0.8.3)
   wikipedia-client (~> 1.17.0)
   yard

data/README.md CHANGED Viewed

@@ -35,19 +35,19 @@ require "langchain"
 | Database | Querying           | Storage | Schema Management | Backups | Rails Integration |
 | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
-| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
+| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | :white_check_mark: |
 | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
-| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
+| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | :white_check_mark: |
 | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
-| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
-| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
+| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | :white_check_mark: |
+| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | :white_check_mark: |
 ### Using Vector Search Databases 🔍
 Choose the LLM provider you'll be using (OpenAI or Cohere) and retrieve the API key.
-Add `gem "weaviate-ruby", "~> 0.8.0"`  to your Gemfile.
+Add `gem "weaviate-ruby", "~> 0.8.3"`  to your Gemfile.
 Pick the vector search database you'll be using and instantiate the client:
 ```ruby

data/lib/langchain/active_record/hooks.rb ADDED Viewed

@@ -0,0 +1,98 @@
+# frozen_string_literal: true
+module Langchain
+  module ActiveRecord
+    # This module adds the following functionality to your ActiveRecord models:
+    # * `vectorsearch` class method to set the vector search provider
+    # * `similarity_search` class method to search for similar texts
+    # * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
+    #
+    # Usage:
+    #     class Recipe < ActiveRecord::Base
+    #       vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
+    #                    api_key: ENV["WEAVIATE_API_KEY"],
+    #                    url: ENV["WEAVIATE_URL"],
+    #                    index_name: "Recipes",
+    #                    llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
+    #                 )
+    #
+    #       after_save :upsert_to_vectorsearch
+    #
+    #       # Overwriting how the model is serialized before it's indexed
+    #       def as_vector
+    #         [
+    #           "Title: #{title}",
+    #           "Description: #{description}",
+    #           ...
+    #         ]
+    #         .compact
+    #         .join("\n")
+    #       end
+    #     end
+    #
+    # Create the default schema
+    #     Recipe.class_variable_get(:@@provider).create_default_schema
+    # Query the vector search provider
+    #     Recipe.similarity_search("carnivore dish")
+    # Delete the default schema to start over
+    #     Recipe.class_variable_get(:@@provider).client.schema.delete class_name: "Recipes"
+    #
+    module Hooks
+      def self.included(base)
+        base.extend ClassMethods
+      end
+      # Index the text to the vector search provider
+      # You'd typically call this method in an ActiveRecord callback
+      #
+      # @return [Boolean] true
+      # @raise [Error] Indexing to vector search DB failed
+      def upsert_to_vectorsearch
+        if previously_new_record?
+          self.class.class_variable_get(:@@provider).add_texts(
+            texts: [as_vector],
+            ids: [id]
+          )
+        else
+          self.class.class_variable_get(:@@provider).update_texts(
+            texts: [as_vector],
+            ids: [id]
+          )
+        end
+      end
+      # Used to serialize the DB record to an indexable vector text
+      # Overwrite this method in your model to customize
+      #
+      # @return [String] the text representation of the model
+      def as_vector
+        to_json
+      end
+      module ClassMethods
+        # Set the vector search provider
+        #
+        # @param provider [Object] The `Langchain::Vectorsearch::*` instance
+        def vectorsearch(provider:)
+          class_variable_set(:@@provider, provider)
+        end
+        # Search for similar texts
+        #
+        # @param query [String] The query to search for
+        # @param k [Integer] The number of results to return
+        # @return [ActiveRecord::Relation] The ActiveRecord relation
+        def similarity_search(query, k: 1)
+          records = class_variable_get(:@@provider).similarity_search(
+            query: query,
+            k: k
+          )
+          # We use "__id" when Weaviate is the provider
+          ids = records.map { |record| record.dig("id") || record.dig("__id") }
+          where(id: ids)
+        end
+      end
+    end
+  end
+end

data/lib/langchain/railtie.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+module Langchain
+  class Railtie < Rails::Railtie
+    initializer "langchain" do
+      ActiveSupport.on_load(:active_record) do
+        ::ActiveRecord::Base.include Langchain::ActiveRecord::Hooks
+      end
+    end
+  end
+end

data/lib/langchain/vectorsearch/base.rb CHANGED Viewed

@@ -108,6 +108,11 @@ module Langchain::Vectorsearch
       raise NotImplementedError, "#{self.class.name} does not support adding texts"
     end
+    # Method supported by Vectorsearch DB to update a list of texts to the index
+    def update_texts(...)
+      raise NotImplementedError, "#{self.class.name} does not support updating texts"
+    end
     # Method supported by Vectorsearch DB to search for similar texts in the index
     def similarity_search(...)
       raise NotImplementedError, "#{self.class.name} does not support similarity search"

data/lib/langchain/vectorsearch/chroma.rb CHANGED Viewed

@@ -32,11 +32,10 @@ module Langchain::Vectorsearch
     # Add a list of texts to the index
     # @param texts [Array] The list of texts to add
     # @return [Hash] The response from the server
-    def add_texts(texts:)
-      embeddings = Array(texts).map do |text|
+    def add_texts(texts:, ids: [])
+      embeddings = Array(texts).map.with_index do |text, i|
         ::Chroma::Resources::Embedding.new(
-          # TODO: Add support for passing your own IDs
-          id: SecureRandom.uuid,
+          id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
           embedding: llm.embed(text: text),
           # TODO: Add support for passing metadata
           metadata: [], # metadatas[index],
@@ -48,6 +47,20 @@ module Langchain::Vectorsearch
       collection.add(embeddings)
     end
+    def update_texts(texts:, ids:)
+      embeddings = Array(texts).map.with_index do |text, i|
+        ::Chroma::Resources::Embedding.new(
+          id: ids[i].to_s,
+          embedding: llm.embed(text: text),
+          # TODO: Add support for passing metadata
+          metadata: [], # metadatas[index],
+          document: text # Do we actually need to store the whole original document?
+        )
+      end
+      collection.update(embeddings)
+    end
     # Create the collection with the default schema
     # @return [Hash] The response from the server
     def create_default_schema

data/lib/langchain/vectorsearch/pinecone.rb CHANGED Viewed

@@ -33,14 +33,14 @@ module Langchain::Vectorsearch
     # Add a list of texts to the index
     # @param texts [Array] The list of texts to add
+    # @param ids [Array] The list of IDs to add
     # @param namespace [String] The namespace to add the texts to
     # @param metadata [Hash] The metadata to use for the texts
     # @return [Hash] The response from the server
-    def add_texts(texts:, namespace: "", metadata: nil)
-      vectors = texts.map do |text|
+    def add_texts(texts:, ids: [], namespace: "", metadata: nil)
+      vectors = texts.map.with_index do |text, i|
         {
-          # TODO: Allows passing in your own IDs
-          id: SecureRandom.uuid,
+          id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
           metadata: metadata || {content: text},
           values: llm.embed(text: text)
         }
@@ -51,6 +51,24 @@ module Langchain::Vectorsearch
       index.upsert(vectors: vectors, namespace: namespace)
     end
+    # Update a list of texts in the index
+    # @param texts [Array] The list of texts to update
+    # @param ids [Array] The list of IDs to update
+    # @param namespace [String] The namespace to update the texts in
+    # @param metadata [Hash] The metadata to use for the texts
+    # @return [Array] The response from the server
+    def update_texts(texts:, ids:, namespace: "", metadata: nil)
+      texts.map.with_index do |text, i|
+        # Pinecone::Vector#update ignore args when it is empty
+        index.update(
+          namespace: namespace,
+          id: ids[i].to_s,
+          values: llm.embed(text: text),
+          set_metadata: metadata
+        )
+      end
+    end
     # Create the index with the default schema
     # @return [Hash] The response from the server
     def create_default_schema
@@ -122,5 +140,11 @@ module Langchain::Vectorsearch
       llm.chat(prompt: prompt)
     end
+    # Pinecone index
+    # @return [Object] The Pinecone index
+    private def index
+      client.index(index_name)
+    end
   end
 end

data/lib/langchain/vectorsearch/qdrant.rb CHANGED Viewed

@@ -32,11 +32,11 @@ module Langchain::Vectorsearch
     # Add a list of texts to the index
     # @param texts [Array] The list of texts to add
     # @return [Hash] The response from the server
-    def add_texts(texts:)
+    def add_texts(texts:, ids:)
       batch = {ids: [], vectors: [], payloads: []}
-      Array(texts).each do |text|
-        batch[:ids].push(SecureRandom.uuid)
+      Array(texts).each_with_index do |text, i|
+        batch[:ids].push(ids[i] || SecureRandom.uuid)
         batch[:vectors].push(llm.embed(text: text))
         batch[:payloads].push({content: text})
       end
@@ -47,6 +47,10 @@ module Langchain::Vectorsearch
       )
     end
+    def update_texts(texts:, ids:)
+      add_texts(texts: texts, ids: ids)
+    end
     # Create the index with the default schema
     # @return [Hash] The response from the server
     def create_default_schema
@@ -83,12 +87,14 @@ module Langchain::Vectorsearch
       embedding:,
       k: 4
     )
-      client.points.search(
+      response = client.points.search(
         collection_name: index_name,
         limit: k,
         vector: embedding,
-        with_payload: true
+        with_payload: true,
+        with_vector: true
       )
+      response.dig("result")
     end
     # Ask a question and return the answer

data/lib/langchain/vectorsearch/weaviate.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Langchain::Vectorsearch
     #
     # Wrapper around Weaviate
     #
-    # Gem requirements: gem "weaviate-ruby", "~> 0.8.0"
+    # Gem requirements: gem "weaviate-ruby", "~> 0.8.3"
     #
     # Usage:
     # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
@@ -14,7 +14,7 @@ module Langchain::Vectorsearch
     # Initialize the Weaviate adapter
     # @param url [String] The URL of the Weaviate instance
     # @param api_key [String] The API key to use
-    # @param index_name [String] The name of the index to use
+    # @param index_name [String] The capitalized name of the index to use
     # @param llm [Object] The LLM client to use
     def initialize(url:, api_key:, index_name:, llm:)
       depends_on "weaviate-ruby"
@@ -24,6 +24,9 @@ module Langchain::Vectorsearch
         url: url,
         api_key: api_key
       )
+      # Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
+      # TODO: Capitalize index_name
       @index_name = index_name
       super(llm: llm)
@@ -32,31 +35,51 @@ module Langchain::Vectorsearch
     # Add a list of texts to the index
     # @param texts [Array] The list of texts to add
     # @return [Hash] The response from the server
-    def add_texts(texts:)
-      objects = Array(texts).map do |text|
-        {
-          class: index_name,
-          properties: {content: text},
-          vector: llm.embed(text: text)
-        }
-      end
+    def add_texts(texts:, ids: [])
       client.objects.batch_create(
-        objects: objects
+        objects: weaviate_objects(texts, ids)
       )
     end
+    # Update a list of texts in the index
+    # @param texts [Array] The list of texts to update
+    # @return [Hash] The response from the server
+    def update_texts(texts:, ids:)
+      uuids = []
+      # Retrieve the UUIDs of the objects to update
+      Array(texts).map.with_index do |text, i|
+        record = client.query.get(
+          class_name: index_name,
+          fields: "_additional { id }",
+          where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
+        )
+        uuids.push record[0].dig("_additional", "id")
+      end
+      # Update the objects
+      texts.map.with_index do |text, i|
+        client.objects.update(
+          class_name: index_name,
+          id: uuids[i],
+          properties: {
+            __id: ids[i].to_s,
+            content: text
+          },
+          vector: llm.embed(text: text)
+        )
+      end
+    end
     # Create default schema
     def create_default_schema
       client.schema.create(
         class_name: index_name,
         vectorizer: "none",
         properties: [
-          # TODO: Allow passing in your own IDs
-          {
-            dataType: ["text"],
-            name: "content"
-          }
+          # __id to be used a pointer to the original document
+          {dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
+          {dataType: ["text"], name: "content"}
         ]
       )
     end
@@ -82,7 +105,7 @@ module Langchain::Vectorsearch
         class_name: index_name,
         near_vector: near_vector,
         limit: k.to_s,
-        fields: "content _additional { id }"
+        fields: "__id content _additional { id }"
       )
     end
@@ -101,5 +124,24 @@ module Langchain::Vectorsearch
       llm.chat(prompt: prompt)
     end
+    private
+    def weaviate_objects(texts, ids = [])
+      Array(texts).map.with_index do |text, i|
+        weaviate_object(text, ids[i])
+      end
+    end
+    def weaviate_object(text, id = nil)
+      {
+        class: index_name,
+        properties: {
+          __id: id.to_s,
+          content: text
+        },
+        vector: llm.embed(text: text)
+      }
+    end
   end
 end

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.6.0"
+  VERSION = "0.6.2"
 end

data/lib/langchain.rb CHANGED Viewed

@@ -145,6 +145,10 @@ module Langchain
     autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
   end
+  module ActiveRecord
+    autoload :Hooks, "langchain/active_record/hooks"
+  end
   module OutputParsers
     autoload :Base, "langchain/output_parsers/base"
     autoload :StructuredOutputParser, "langchain/output_parsers/structured"
@@ -154,3 +158,5 @@ module Langchain
     class BaseError < StandardError; end
   end
 end
+require "langchain/railtie" if defined?(Rails)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.6.0
+  version: 0.6.2
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-06-23 00:00:00.000000000 Z
+date: 2023-06-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -436,14 +436,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.8.0
+        version: 0.8.3
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.8.0
+        version: 0.8.3
 - !ruby/object:Gem::Dependency
   name: wikipedia-client
   requirement: !ruby/object:Gem::Requirement
@@ -483,6 +483,7 @@ files:
 - examples/store_and_query_with_weaviate.rb
 - lefthook.yml
 - lib/langchain.rb
+- lib/langchain/active_record/hooks.rb
 - lib/langchain/agent/base.rb
 - lib/langchain/agent/react_agent/react_agent.rb
 - lib/langchain/agent/react_agent/react_agent_prompt.yaml
@@ -519,6 +520,7 @@ files:
 - lib/langchain/prompt/few_shot_prompt_template.rb
 - lib/langchain/prompt/loading.rb
 - lib/langchain/prompt/prompt_template.rb
+- lib/langchain/railtie.rb
 - lib/langchain/tool/base.rb
 - lib/langchain/tool/calculator.rb
 - lib/langchain/tool/database.rb