RubyGems - langchainrb - Versions diffs - 0.6.5 → 0.6.6 - Mend

langchainrb 0.6.5 → 0.6.6

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/Gemfile.lock +1 -1
data/README.md +3 -3
data/lib/langchain/chunker/recursive_text.rb +38 -0
data/lib/langchain/vectorsearch/base.rb +7 -3
data/lib/langchain/vectorsearch/pgvector.rb +40 -7
data/lib/langchain/version.rb +1 -1
data/lib/langchain.rb +1 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3404535e036c3efe68fd12706d2ebb269caed87b562fc38434122b1be01a356d
-  data.tar.gz: e3be77b32cf754235e8895fb1af60edca54cb5acb84278bfa2e39b6ed7c2abbe
+  metadata.gz: a9949f3ffd0338c90274f13b9862b0a6b9ec7b717b14b7ccaa8b6b8e0115f621
+  data.tar.gz: 43ebcb26d51b286278d5098ba50defef0c8bd1a897fa744c4519cfa10bdfdf58
 SHA512:
-  metadata.gz: b3fae04c73176c758c2d2d32c3ac538f3e094eb10f378b9a8befbbdcc62b60e55941a1bfefcb61eac7daca43ef91d0e57306dbc26bd59afbdad6ab4efff2ba89
-  data.tar.gz: 626bb4a226112ee6fe709077a6d49ba91c0483fee657848153e9cff61693183709aede5844237c24cc02c561f59be82ea1fd296fe1c3f4ee4d971494ee4dcd75
+  metadata.gz: c95f6e104aaa9a8dab30c9e78e342fdf960ccfef332a2737218f3cc186521369e6f03216d5ccd08329d5110cd15ef10e10a3f460caecc02dd50e32b1b60ff8b3
+  data.tar.gz: c8c059c760b361975ea7ba8eb8a7aa24c1dd7dde5264d7d8bdf20da4f7ec80fe3f1cf4f60dd16dd8028638f3335b1e1632b655ae6c4bdd01912d33371892b5a3

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,14 @@
 ## [Unreleased]
+## [0.6.6] - 2023-07-13
+- Langchain::Chunker::RecursiveText
+- Fixes
+## [0.6.5] - 2023-07-06
+- 🗣️ LLMs
+  - Introducing Llama.cpp support
+- Langchain::OutputParsers::OutputFixingParser to wrap a Langchain::OutputParser and handle invalid response
 ## [0.6.4] - 2023-07-01
 - Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
 - Introduce `ConversationMemory`

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.6.5)
+    langchainrb (0.6.6)
       baran (~> 0.1.6)
       colorize (~> 0.8.1)
       json-schema (~> 4.0.0)

data/README.md CHANGED Viewed

@@ -39,7 +39,7 @@ require "langchain"
 | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | :white_check_mark: |
-| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
+| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | :white_check_mark: |
 | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | :white_check_mark: |
 | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | :white_check_mark: |
@@ -54,7 +54,7 @@ Pick the vector search database you'll be using and instantiate the client:
 client = Langchain::Vectorsearch::Weaviate.new(
     url: ENV["WEAVIATE_URL"],
     api_key: ENV["WEAVIATE_API_KEY"],
-    index: "",
+    index_name: "",
     llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
 )
@@ -427,7 +427,7 @@ agent.run(question: "How many users have a name with length greater than 5 in th
 | "database"   | Useful for querying a SQL database |                                                               | `gem "sequel", "~> 5.68.0"`                   |
 | "ruby_code_interpreter" | Interprets Ruby expressions             |                                                               | `gem "safe_ruby", "~> 1.0.4"`             |
 | "google_search"     | A wrapper around Google Search                     | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
-| "weather"  | Calls Open Weather API to retrieve the current weather        |      `ENV["OPEN_WEATHER_API_KEY]` (https://home.openweathermap.org/api_keys)               | `gem "open-weather-ruby-client", "~> 0.3.0"`    |
+| "weather"  | Calls Open Weather API to retrieve the current weather        |      `ENV["OPEN_WEATHER_API_KEY"]` (https://home.openweathermap.org/api_keys)               | `gem "open-weather-ruby-client", "~> 0.3.0"`    |
 | "wikipedia"  | Calls Wikipedia API to retrieve the summary        |                                                               | `gem "wikipedia-client", "~> 1.17.0"`     |
 #### Loaders 🚚

data/lib/langchain/chunker/recursive_text.rb ADDED Viewed

@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+require "baran"
+module Langchain
+  module Chunker
+    #
+    # Recursive text chunker. Preferentially splits on separators.
+    #
+    # Usage:
+    #     Langchain::Chunker::RecursiveText.new(text).chunks
+    #
+    class RecursiveText < Base
+      attr_reader :text, :chunk_size, :chunk_overlap, :separators
+      # @param [String] text
+      # @param [Integer] chunk_size
+      # @param [Integer] chunk_overlap
+      # @param [Array<String>] separators
+      def initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n"])
+        @text = text
+        @chunk_size = chunk_size
+        @chunk_overlap = chunk_overlap
+        @separators = separators
+      end
+      # @return [Array<String>]
+      def chunks
+        splitter = Baran::RecursiveCharacterTextSplitter.new(
+          chunk_size: chunk_size,
+          chunk_overlap: chunk_overlap,
+          separators: separators
+        )
+        splitter.chunks(text)
+      end
+    end
+  end
+end

data/lib/langchain/vectorsearch/base.rb CHANGED Viewed

@@ -161,12 +161,16 @@ module Langchain::Vectorsearch
     end
     def add_data(paths:)
-      raise ArgumentError, "Paths must be provided" if paths.to_a.empty?
+      raise ArgumentError, "Paths must be provided" if Array(paths).empty?
       texts = Array(paths)
         .flatten
-        .map { |path| Langchain::Loader.new(path)&.load&.value }
-        .compact
+        .map do |path|
+          data = Langchain::Loader.new(path)&.load&.chunks
+          data.map { |chunk| chunk[:text] }
+        end
+      texts.flatten!
       add_texts(texts: texts)
     end

data/lib/langchain/vectorsearch/pgvector.rb CHANGED Viewed

@@ -40,20 +40,53 @@ module Langchain::Vectorsearch
       super(llm: llm)
     end
-    # Add a list of texts to the index
+    # Upsert a list of texts to the index
     # @param texts [Array<String>] The texts to add to the index
-    # @return [PG::Result] The response from the database
-    def add_texts(texts:)
-      data = texts.flat_map do |text|
-        [text, llm.embed(text: text)]
+    # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
+    # @return [PG::Result] The response from the database including the ids of
+    # the added or updated texts.
+    def upsert_texts(texts:, ids:)
+      data = texts.zip(ids).flat_map do |(text, id)|
+        [id, text, llm.embed(text: text)]
       end
-      values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
+      values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
+      # see https://github.com/pgvector/pgvector#storing
       client.exec_params(
-        "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values};",
+        "INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
+#{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
         data
       )
     end
+    # Add a list of texts to the index
+    # @param texts [Array<String>] The texts to add to the index
+    # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
+    # @return [PG::Result] The response from the database including the ids of
+    # the added texts.
+    def add_texts(texts:, ids: nil)
+      if ids.nil? || ids.empty?
+        data = texts.flat_map do |text|
+          [text, llm.embed(text: text)]
+        end
+        values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
+        client.exec_params(
+          "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
+          data
+        )
+      else
+        upsert_texts(texts: texts, ids: ids)
+      end
+    end
+    # Update a list of ids and corresponding texts to the index
+    # @param texts [Array<String>] The texts to add to the index
+    # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
+    # @return [PG::Result] The response from the database including the ids of
+    # the updated texts.
+    def update_texts(texts:, ids:)
+      upsert_texts(texts: texts, ids: ids)
+    end
     # Create default schema
     # @return [PG::Result] The response from the database
     def create_default_schema

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.6.5"
+  VERSION = "0.6.6"
 end

data/lib/langchain.rb CHANGED Viewed

@@ -82,6 +82,7 @@ module Langchain
   module Chunker
     autoload :Base, "langchain/chunker/base"
     autoload :Text, "langchain/chunker/text"
+    autoload :RecursiveText, "langchain/chunker/recursive_text"
   end
   module Tool

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.6.5
+  version: 0.6.6
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-07-06 00:00:00.000000000 Z
+date: 2023-07-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -507,6 +507,7 @@ files:
 - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
 - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
 - lib/langchain/chunker/base.rb
+- lib/langchain/chunker/recursive_text.rb
 - lib/langchain/chunker/text.rb
 - lib/langchain/contextual_logger.rb
 - lib/langchain/conversation.rb