RubyGems - langchainrb - Versions diffs - 0.6.15 → 0.6.16 - Mend

langchainrb 0.6.15 → 0.6.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +4 -0
data/lib/langchain/chunker/sentence.rb +29 -0
data/lib/langchain/prompt/base.rb +1 -1
data/lib/langchain/prompt/few_shot_prompt_template.rb +1 -0
data/lib/langchain/vectorsearch/base.rb +31 -14
data/lib/langchain/vectorsearch/chroma.rb +2 -2
data/lib/langchain/vectorsearch/milvus.rb +1 -1
data/lib/langchain/vectorsearch/pgvector.rb +1 -1
data/lib/langchain/vectorsearch/pinecone.rb +1 -1
data/lib/langchain/vectorsearch/prompts/hyde.yaml +10 -0
data/lib/langchain/vectorsearch/prompts/rag.yaml +11 -0
data/lib/langchain/vectorsearch/qdrant.rb +1 -1
data/lib/langchain/vectorsearch/weaviate.rb +1 -1
data/lib/langchain/version.rb +1 -1
metadata +21 -5
data/lib/langchain/utils/token_length/ollama_validator.rb +0 -16

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 804ffbb08baabf8d2b0372e6893ca31a8c0933425dcabc78b2b48381b045d0c9
-  data.tar.gz: a53ed993838ab79c343618b445533c285f35e186c3a1f4412f40f7da12b9911b
+  metadata.gz: 36e0bec4ad6abfd9077c9e7f2d6166ba99acb7dc3859749ee6facfb9409e6379
+  data.tar.gz: 6bd8d3de4f1d31b718381fcef1c21a8b417b2bd8483d7fdc2610cfda3b60a50e
 SHA512:
-  metadata.gz: d4aa19658c6c6ffdd5268c6ab83abe3ba17c3bb84b3880a6347bb67fa5c1b4bf0e9304b22c477b27401394450b692d0ee545f5745c6e3a2ec2e5e2ba50779584
-  data.tar.gz: b1c918b8d28e86b11cde99e1b976cbffcca36dbc8ac354e08ce72d9056cc5eafd6ddb601f92edfcd28907687c2247c417477173746405cd1ca2b2ec0fc51df83
+  metadata.gz: ed7be8f193d44075f701622fd991127ab32580293fb6d1ab7ccc096eeff8704312ad34cdb7a4cfd09cf8879116ede17a5b017fe15851b9ee78cb159b7e8d8b59
+  data.tar.gz: f70d7a3707ed7fce123c2f9158c338cda3aa38a46abf5598f7d05c6ccd63d5a16a37ba10ff0a7a0a4cd17c0c2aeb2f07a07842a41f16322c48c7c9bae522dda4

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,10 @@
 ## [Unreleased]
+## [0.6.16] - 2023-10-02
+- HyDE-style similarity search
+- `Langchain::Chunker::Sentence` chunker
+- Bug fixes
 ## [0.6.15] - 2023-09-22
 - Bump weaviate-ruby gem version
 - Ollama support

data/README.md CHANGED Viewed

@@ -97,6 +97,10 @@ client.similarity_search(
 )
 ```
 ```ruby
+# Retrieve similar documents based on the query string passed in via the [HyDE technique](https://arxiv.org/abs/2212.10496)
+client.similarity_search_with_hyde()
+```
+```ruby
 # Retrieve similar documents based on the embedding passed in
 client.similarity_search_by_vector(
     embedding:,

data/lib/langchain/chunker/sentence.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+require "pragmatic_segmenter"
+module Langchain
+  module Chunker
+    #
+    # This chunker splits text by sentences.
+    #
+    # Usage:
+    #     Langchain::Chunker::Sentence.new(text).chunks
+    #
+    class Sentence < Base
+      attr_reader :text
+      # @param text [String]
+      # @return [Langchain::Chunker::Sentence]
+      def initialize(text)
+        @text = text
+      end
+      # @return [Array<String>]
+      def chunks
+        ps = PragmaticSegmenter::Segmenter.new(text: text)
+        ps.segment
+      end
+    end
+  end
+end

data/lib/langchain/prompt/base.rb CHANGED Viewed

@@ -34,7 +34,7 @@ module Langchain::Prompt
     # @return [void]
     #
     def validate(template:, input_variables:)
-      input_variables_set = @input_variables.uniq
+      input_variables_set = input_variables.uniq
       variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
       missing_variables = variables_from_template - input_variables_set

data/lib/langchain/prompt/few_shot_prompt_template.rb CHANGED Viewed

@@ -75,6 +75,7 @@ module Langchain::Prompt
       @prefix = prefix
       @suffix = suffix
       @example_separator = example_separator
+      @validate_template = validate_template
       validate(template: @prefix + @suffix, input_variables: @input_variables) if @validate_template
     end

data/lib/langchain/vectorsearch/base.rb CHANGED Viewed

@@ -128,6 +128,17 @@ module Langchain::Vectorsearch
       raise NotImplementedError, "#{self.class.name} does not support similarity search"
     end
+    # Paper: https://arxiv.org/abs/2212.10496
+    # Hypothetical Document Embeddings (HyDE)-augmented similarity search
+    #
+    # @param query [String] The query to search for
+    # @param k [Integer] The number of results to return
+    # @return [String] Response
+    def similarity_search_with_hyde(query:, k: 4)
+      hyde_completion = llm.complete(prompt: generate_hyde_prompt(question: query))
+      similarity_search(query: hyde_completion, k: k)
+    end
     # Method supported by Vectorsearch DB to search for similar texts in the index by the passed in vector.
     # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
     def similarity_search_by_vector(...)
@@ -142,24 +153,30 @@ module Langchain::Vectorsearch
     def_delegators :llm,
       :default_dimension
-    def generate_prompt(question:, context:)
-      prompt_template = Langchain::Prompt::FewShotPromptTemplate.new(
-        prefix: "Context:",
-        suffix: "---\nQuestion: {question}\n---\nAnswer:",
-        example_prompt: Langchain::Prompt::PromptTemplate.new(
-          template: "{context}",
-          input_variables: ["context"]
-        ),
-        examples: [
-          {context: context}
-        ],
-        input_variables: ["question"],
-        example_separator: "\n"
+    # HyDE-style prompt
+    #
+    # @param [String] User's question
+    # @return [String] Prompt
+    def generate_hyde_prompt(question:)
+      prompt_template = Langchain::Prompt.load_from_path(
+        # Zero-shot prompt to generate a hypothetical document based on a given question
+        file_path: Langchain.root.join("langchain/vectorsearch/prompts/hyde.yaml")
       )
       prompt_template.format(question: question)
     end
+    # Retrieval Augmented Generation (RAG)
+    #
+    # @param question [String] User's question
+    # @param context [String] The context to synthesize the answer from
+    # @return [String] Prompt
+    def generate_rag_prompt(question:, context:)
+      prompt_template = Langchain::Prompt.load_from_path(
+        file_path: Langchain.root.join("langchain/vectorsearch/prompts/rag.yaml")
+      )
+      prompt_template.format(question: question, context: context)
+    end
     def add_data(paths:)
       raise ArgumentError, "Paths must be provided" if Array(paths).empty?

data/lib/langchain/vectorsearch/chroma.rb CHANGED Viewed

@@ -37,7 +37,7 @@ module Langchain::Vectorsearch
           id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
           embedding: llm.embed(text: text),
           # TODO: Add support for passing metadata
-          metadata: [], # metadatas[index],
+          metadata: {}, # metadatas[index],
           document: text # Do we actually need to store the whole original document?
         )
       end
@@ -124,7 +124,7 @@ module Langchain::Vectorsearch
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/milvus.rb CHANGED Viewed

@@ -148,7 +148,7 @@ module Langchain::Vectorsearch
       context = content_data.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/pgvector.rb CHANGED Viewed

@@ -144,7 +144,7 @@ module Langchain::Vectorsearch
       end
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/pinecone.rb CHANGED Viewed

@@ -177,7 +177,7 @@ module Langchain::Vectorsearch
       end
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/prompts/hyde.yaml ADDED Viewed

@@ -0,0 +1,10 @@
+# Inspiration: https://github.com/langchain-ai/langchain/blob/v0.0.254/libs/langchain/langchain/chains/hyde/prompts.py#L4-L6
+_type: prompt
+input_variables:
+  - question
+template: |
+  Please write a passage to answer the question
+  Question: {question}
+  Passage:

data/lib/langchain/vectorsearch/prompts/rag.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_type: prompt
+input_variables:
+  - question
+  - context
+template: |
+  Context:
+  {context}
+  ---
+  Question: {question}
+  ---
+  Answer:

data/lib/langchain/vectorsearch/qdrant.rb CHANGED Viewed

@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
       end
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/weaviate.rb CHANGED Viewed

@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
       end
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.6.15"
+  VERSION = "0.6.16"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.6.15
+  version: 0.6.16
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-09-22 00:00:00.000000000 Z
+date: 2023-10-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -16,14 +16,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.8
+        version: 0.1.9
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.8
+        version: 0.1.9
 - !ruby/object:Gem::Dependency
   name: colorize
   requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,20 @@ dependencies:
     - - '='
       - !ruby/object:Gem::Version
         version: 2.6.11
+- !ruby/object:Gem::Dependency
+  name: pragmatic_segmenter
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.3.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.3.0
 - !ruby/object:Gem::Dependency
   name: dotenv-rails
   requirement: !ruby/object:Gem::Requirement
@@ -521,6 +535,7 @@ files:
 - lib/langchain/ai_message.rb
 - lib/langchain/chunker/base.rb
 - lib/langchain/chunker/recursive_text.rb
+- lib/langchain/chunker/sentence.rb
 - lib/langchain/chunker/text.rb
 - lib/langchain/contextual_logger.rb
 - lib/langchain/conversation.rb
@@ -572,7 +587,6 @@ files:
 - lib/langchain/utils/token_length/base_validator.rb
 - lib/langchain/utils/token_length/cohere_validator.rb
 - lib/langchain/utils/token_length/google_palm_validator.rb
-- lib/langchain/utils/token_length/ollama_validator.rb
 - lib/langchain/utils/token_length/openai_validator.rb
 - lib/langchain/utils/token_length/token_limit_exceeded.rb
 - lib/langchain/vectorsearch/base.rb
@@ -581,6 +595,8 @@ files:
 - lib/langchain/vectorsearch/milvus.rb
 - lib/langchain/vectorsearch/pgvector.rb
 - lib/langchain/vectorsearch/pinecone.rb
+- lib/langchain/vectorsearch/prompts/hyde.yaml
+- lib/langchain/vectorsearch/prompts/rag.yaml
 - lib/langchain/vectorsearch/qdrant.rb
 - lib/langchain/vectorsearch/weaviate.rb
 - lib/langchain/version.rb

data/lib/langchain/utils/token_length/ollama_validator.rb DELETED Viewed

@@ -1,16 +0,0 @@
-# frozen_string_literal: true
-require "tiktoken_ruby"
-module Langchain
-  module Utils
-    module TokenLength
-      #
-      # This class is meant to validate the length of the text passed in to Ollama.
-      # It is used to validate the token length before the API call is made
-      #
-      class OllamaValidator < BaseValidator
-      end
-    end
-  end
-end