RubyGems - langchainrb - Versions diffs - 0.6.14 → 0.6.16 - Mend

langchainrb 0.6.14 → 0.6.16

Files changed (18) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/README.md +16 -0
data/lib/langchain/chunker/sentence.rb +29 -0
data/lib/langchain/llm/ollama.rb +79 -0
data/lib/langchain/prompt/base.rb +1 -1
data/lib/langchain/prompt/few_shot_prompt_template.rb +1 -0
data/lib/langchain/vectorsearch/base.rb +31 -14
data/lib/langchain/vectorsearch/chroma.rb +2 -2
data/lib/langchain/vectorsearch/milvus.rb +1 -1
data/lib/langchain/vectorsearch/pgvector.rb +1 -1
data/lib/langchain/vectorsearch/pinecone.rb +1 -1
data/lib/langchain/vectorsearch/prompts/hyde.yaml +10 -0
data/lib/langchain/vectorsearch/prompts/rag.yaml +11 -0
data/lib/langchain/vectorsearch/qdrant.rb +1 -1
data/lib/langchain/vectorsearch/weaviate.rb +1 -1
data/lib/langchain/version.rb +1 -1
metadata +25 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 7beb48b4b2bc88c4a25bef1cbc9eb0e95c0bd3eaeb02af6a12e0026c9081dd6d
-  data.tar.gz: ebdb7816bf0e37e221a13ecf6cb620a335d3d49b564be76f4f714cb9849ebeb6
+  metadata.gz: 36e0bec4ad6abfd9077c9e7f2d6166ba99acb7dc3859749ee6facfb9409e6379
+  data.tar.gz: 6bd8d3de4f1d31b718381fcef1c21a8b417b2bd8483d7fdc2610cfda3b60a50e
 SHA512:
-  metadata.gz: 1cb2c147ffbe999eb1f027161e6cda3beea76e31b821bdec564eb36cc6a2d96e31c5d450be8d744738fabef07a9f519c8b96ab2e6dc9585fb05ceea7ebc494a2
-  data.tar.gz: 9f3a1d015de4f568bea1e08637a07ed6bf2ef93bb68068ebe51a50c16ca5a1d5d3f850cf19439ad785b6078305a7dfbd740f7bf7916c1e3466efdb04060f360e
+  metadata.gz: ed7be8f193d44075f701622fd991127ab32580293fb6d1ab7ccc096eeff8704312ad34cdb7a4cfd09cf8879116ede17a5b017fe15851b9ee78cb159b7e8d8b59
+  data.tar.gz: f70d7a3707ed7fce123c2f9158c338cda3aa38a46abf5598f7d05c6ccd63d5a16a37ba10ff0a7a0a4cd17c0c2aeb2f07a07842a41f16322c48c7c9bae522dda4

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,14 @@
 ## [Unreleased]
+## [0.6.16] - 2023-10-02
+- HyDE-style similarity search
+- `Langchain::Chunker::Sentence` chunker
+- Bug fixes
+## [0.6.15] - 2023-09-22
+- Bump weaviate-ruby gem version
+- Ollama support
 ## [0.6.14] - 2023-09-11
 - Add `find` method to `Langchain::Vectorsearch::Qdrant`
 - Enhance Google search output

data/README.md CHANGED Viewed

@@ -97,6 +97,10 @@ client.similarity_search(
 )
 ```
 ```ruby
+# Retrieve similar documents based on the query string passed in via the [HyDE technique](https://arxiv.org/abs/2212.10496)
+client.similarity_search_with_hyde()
+```
+```ruby
 # Retrieve similar documents based on the embedding passed in
 client.similarity_search_by_vector(
     embedding:,
@@ -210,6 +214,18 @@ anthropic = Langchain::LLM::Anthropic.new(api_key: ENV["ANTHROPIC_API_KEY"])
 anthropic.complete(prompt: "What is the meaning of life?")
 ```
+#### Ollama
+```ruby
+ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"])
+```
+```ruby
+ollama.complete(prompt: "What is the meaning of life?")
+```
+```ruby
+ollama.embed(text: "Hello world!")
+```
 ### Using Prompts 📋
 #### Prompt Templates

data/lib/langchain/chunker/sentence.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+require "pragmatic_segmenter"
+module Langchain
+  module Chunker
+    #
+    # This chunker splits text by sentences.
+    #
+    # Usage:
+    #     Langchain::Chunker::Sentence.new(text).chunks
+    #
+    class Sentence < Base
+      attr_reader :text
+      # @param text [String]
+      # @return [Langchain::Chunker::Sentence]
+      def initialize(text)
+        @text = text
+      end
+      # @return [Array<String>]
+      def chunks
+        ps = PragmaticSegmenter::Segmenter.new(text: text)
+        ps.segment
+      end
+    end
+  end
+end

data/lib/langchain/llm/ollama.rb ADDED Viewed

@@ -0,0 +1,79 @@
+# frozen_string_literal: true
+module Langchain::LLM
+  # Interface to Ollama API.
+  # Available models: https://ollama.ai/library
+  #
+  # Usage:
+  #    ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"])
+  #
+  class Ollama < Base
+    attr_reader :url
+    DEFAULTS = {
+      temperature: 0.0,
+      completion_model_name: "llama2",
+      embeddings_model_name: "llama2"
+    }.freeze
+    # Initialize the Ollama client
+    # @param url [String] The URL of the Ollama instance
+    def initialize(url:)
+      @url = url
+    end
+    # Generate the completion for a given prompt
+    # @param prompt [String] The prompt to complete
+    # @param model [String] The model to use
+    # @param options [Hash] The options to use (https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values)
+    # @return [String] The completed prompt
+    def complete(prompt:, model: nil, **options)
+      response = +""
+      client.post("api/generate") do |req|
+        req.body = {}
+        req.body["prompt"] = prompt
+        req.body["model"] = model || DEFAULTS[:completion_model_name]
+        req.body["options"] = options if options.any?
+        # TODO: Implement streaming support when a &block is passed in
+        req.options.on_data = proc do |chunk, size|
+          json_chunk = JSON.parse(chunk)
+          unless json_chunk.dig("done")
+            response.to_s << JSON.parse(chunk).dig("response")
+          end
+        end
+      end
+      response
+    end
+    # Generate an embedding for a given text
+    # @param text [String] The text to generate an embedding for
+    # @param model [String] The model to use
+    # @param options [Hash] The options to use (
+    def embed(text:, model: nil, **options)
+      response = client.post("api/embeddings") do |req|
+        req.body = {}
+        req.body["prompt"] = text
+        req.body["model"] = model || DEFAULTS[:embeddings_model_name]
+        req.body["options"] = options if options.any?
+      end
+      response.body.dig("embedding")
+    end
+    private
+    def client
+      @client ||= Faraday.new(url: url) do |conn|
+        conn.request :json
+        conn.response :json
+        conn.response :raise_error
+      end
+    end
+  end
+end

data/lib/langchain/prompt/base.rb CHANGED Viewed

@@ -34,7 +34,7 @@ module Langchain::Prompt
     # @return [void]
     #
     def validate(template:, input_variables:)
-      input_variables_set = @input_variables.uniq
+      input_variables_set = input_variables.uniq
       variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
       missing_variables = variables_from_template - input_variables_set

data/lib/langchain/prompt/few_shot_prompt_template.rb CHANGED Viewed

@@ -75,6 +75,7 @@ module Langchain::Prompt
       @prefix = prefix
       @suffix = suffix
       @example_separator = example_separator
+      @validate_template = validate_template
       validate(template: @prefix + @suffix, input_variables: @input_variables) if @validate_template
     end

data/lib/langchain/vectorsearch/base.rb CHANGED Viewed

@@ -128,6 +128,17 @@ module Langchain::Vectorsearch
       raise NotImplementedError, "#{self.class.name} does not support similarity search"
     end
+    # Paper: https://arxiv.org/abs/2212.10496
+    # Hypothetical Document Embeddings (HyDE)-augmented similarity search
+    #
+    # @param query [String] The query to search for
+    # @param k [Integer] The number of results to return
+    # @return [String] Response
+    def similarity_search_with_hyde(query:, k: 4)
+      hyde_completion = llm.complete(prompt: generate_hyde_prompt(question: query))
+      similarity_search(query: hyde_completion, k: k)
+    end
     # Method supported by Vectorsearch DB to search for similar texts in the index by the passed in vector.
     # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
     def similarity_search_by_vector(...)
@@ -142,24 +153,30 @@ module Langchain::Vectorsearch
     def_delegators :llm,
       :default_dimension
-    def generate_prompt(question:, context:)
-      prompt_template = Langchain::Prompt::FewShotPromptTemplate.new(
-        prefix: "Context:",
-        suffix: "---\nQuestion: {question}\n---\nAnswer:",
-        example_prompt: Langchain::Prompt::PromptTemplate.new(
-          template: "{context}",
-          input_variables: ["context"]
-        ),
-        examples: [
-          {context: context}
-        ],
-        input_variables: ["question"],
-        example_separator: "\n"
+    # HyDE-style prompt
+    #
+    # @param [String] User's question
+    # @return [String] Prompt
+    def generate_hyde_prompt(question:)
+      prompt_template = Langchain::Prompt.load_from_path(
+        # Zero-shot prompt to generate a hypothetical document based on a given question
+        file_path: Langchain.root.join("langchain/vectorsearch/prompts/hyde.yaml")
       )
       prompt_template.format(question: question)
     end
+    # Retrieval Augmented Generation (RAG)
+    #
+    # @param question [String] User's question
+    # @param context [String] The context to synthesize the answer from
+    # @return [String] Prompt
+    def generate_rag_prompt(question:, context:)
+      prompt_template = Langchain::Prompt.load_from_path(
+        file_path: Langchain.root.join("langchain/vectorsearch/prompts/rag.yaml")
+      )
+      prompt_template.format(question: question, context: context)
+    end
     def add_data(paths:)
       raise ArgumentError, "Paths must be provided" if Array(paths).empty?

data/lib/langchain/vectorsearch/chroma.rb CHANGED Viewed

@@ -37,7 +37,7 @@ module Langchain::Vectorsearch
           id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
           embedding: llm.embed(text: text),
           # TODO: Add support for passing metadata
-          metadata: [], # metadatas[index],
+          metadata: {}, # metadatas[index],
           document: text # Do we actually need to store the whole original document?
         )
       end
@@ -124,7 +124,7 @@ module Langchain::Vectorsearch
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/milvus.rb CHANGED Viewed

@@ -148,7 +148,7 @@ module Langchain::Vectorsearch
       context = content_data.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/pgvector.rb CHANGED Viewed

@@ -144,7 +144,7 @@ module Langchain::Vectorsearch
       end
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/pinecone.rb CHANGED Viewed

@@ -177,7 +177,7 @@ module Langchain::Vectorsearch
       end
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/prompts/hyde.yaml ADDED Viewed

@@ -0,0 +1,10 @@
+# Inspiration: https://github.com/langchain-ai/langchain/blob/v0.0.254/libs/langchain/langchain/chains/hyde/prompts.py#L4-L6
+_type: prompt
+input_variables:
+  - question
+template: |
+  Please write a passage to answer the question
+  Question: {question}
+  Passage:

data/lib/langchain/vectorsearch/prompts/rag.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_type: prompt
+input_variables:
+  - question
+  - context
+template: |
+  Context:
+  {context}
+  ---
+  Question: {question}
+  ---
+  Answer:

data/lib/langchain/vectorsearch/qdrant.rb CHANGED Viewed

@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
       end
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/vectorsearch/weaviate.rb CHANGED Viewed

@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
       end
       context = context.join("\n---\n")
-      prompt = generate_prompt(question: question, context: context)
+      prompt = generate_rag_prompt(question: question, context: context)
       llm.chat(prompt: prompt, &block)
     end

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.6.14"
+  VERSION = "0.6.16"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.6.14
+  version: 0.6.16
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-09-11 00:00:00.000000000 Z
+date: 2023-10-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -16,14 +16,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.8
+        version: 0.1.9
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.8
+        version: 0.1.9
 - !ruby/object:Gem::Dependency
   name: colorize
   requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,20 @@ dependencies:
     - - '='
       - !ruby/object:Gem::Version
         version: 2.6.11
+- !ruby/object:Gem::Dependency
+  name: pragmatic_segmenter
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.3.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.3.0
 - !ruby/object:Gem::Dependency
   name: dotenv-rails
   requirement: !ruby/object:Gem::Requirement
@@ -478,14 +492,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.8.6
+        version: 0.8.7
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.8.6
+        version: 0.8.7
 - !ruby/object:Gem::Dependency
   name: wikipedia-client
   requirement: !ruby/object:Gem::Requirement
@@ -521,6 +535,7 @@ files:
 - lib/langchain/ai_message.rb
 - lib/langchain/chunker/base.rb
 - lib/langchain/chunker/recursive_text.rb
+- lib/langchain/chunker/sentence.rb
 - lib/langchain/chunker/text.rb
 - lib/langchain/contextual_logger.rb
 - lib/langchain/conversation.rb
@@ -535,6 +550,7 @@ files:
 - lib/langchain/llm/google_palm.rb
 - lib/langchain/llm/hugging_face.rb
 - lib/langchain/llm/llama_cpp.rb
+- lib/langchain/llm/ollama.rb
 - lib/langchain/llm/openai.rb
 - lib/langchain/llm/prompts/summarize_template.yaml
 - lib/langchain/llm/replicate.rb
@@ -579,6 +595,8 @@ files:
 - lib/langchain/vectorsearch/milvus.rb
 - lib/langchain/vectorsearch/pgvector.rb
 - lib/langchain/vectorsearch/pinecone.rb
+- lib/langchain/vectorsearch/prompts/hyde.yaml
+- lib/langchain/vectorsearch/prompts/rag.yaml
 - lib/langchain/vectorsearch/qdrant.rb
 - lib/langchain/vectorsearch/weaviate.rb
 - lib/langchain/version.rb
@@ -606,7 +624,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.33
+rubygems_version: 3.3.7
 signing_key:
 specification_version: 4
 summary: Build LLM-backed Ruby applications with Ruby's LangChain