RubyGems - langchainrb - Versions diffs - 0.3.10 → 0.3.12 - Mend

langchainrb 0.3.10 → 0.3.12

Files changed (29) hide show

checksums.yaml +4 -4
data/.env.example +1 -0
data/CHANGELOG.md +11 -0
data/Gemfile +0 -4
data/Gemfile.lock +13 -13
data/README.md +30 -7
data/examples/pdf_store_and_query_with_chroma.rb +2 -2
data/lib/dependency_helper.rb +8 -4
data/lib/langchain/loader.rb +80 -0
data/lib/langchain/processors/base.rb +14 -0
data/lib/langchain/processors/docx.rb +24 -0
data/lib/langchain/processors/html.rb +28 -0
data/lib/langchain/processors/pdf.rb +26 -0
data/lib/langchain/processors/text.rb +17 -0
data/lib/langchain.rb +12 -13
data/lib/langchainrb.rb +1 -0
data/lib/llm/base.rb +2 -1
data/lib/llm/google_palm.rb +85 -0
data/lib/prompt/base.rb +4 -2
data/lib/prompt/prompt_template.rb +2 -2
data/lib/vectorsearch/base.rb +4 -12
data/lib/vectorsearch/pinecone.rb +26 -10
data/lib/version.rb +1 -1
metadata +42 -11
data/lib/loader.rb +0 -26
data/lib/loaders/base.rb +0 -19
data/lib/loaders/docx.rb +0 -34
data/lib/loaders/pdf.rb +0 -36
data/lib/loaders/text.rb +0 -24

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 7537d1ae285b9052051f58cfa43d1d79f9fbcf7590651b3e9a9742495aa9d58a
-  data.tar.gz: 9ec416a4c257c3218f065ec0d42f9a467eb0298961e6d144ec3f642949e4e087
+  metadata.gz: 974f0a2b8ce3fe42144016bd740ee9d4f7e597834319cc92fbf1d50bd1f4468e
+  data.tar.gz: 3686a42c37eb117e6d7485ef4f7777c0f12968bb9cdcc3a30c7721c86c0a4325
 SHA512:
-  metadata.gz: 2919f1aea592d394555b9d1b87e533f112ec36a651c41fa5ff2875741bc581f9d545936e5a24b63e89dbe122d289c1fd15c0f3c33c075b3c7cfd2fdd60e9c75f
-  data.tar.gz: f399765255e33aa215e2ef15bb768f73eb8f04ba279b7c0e2bd2c66cacd61bdc1fd537e7f9e6f140e7107a3d5696ef8cc37e1d2d6d330b1b619f1aacada589f9
+  metadata.gz: a61f9b36d9d19eb6cf87af18c7fb40f55d39771257d08a6af2ec3384988419dfb158ffa8fc81c3769c0149f1ffa8b03200366bbea55b03b0d1553912af8d9ae6
+  data.tar.gz: 7dc53be923fe5b8587f61617198b24c42e8793fbd8e18c42a17035bf68279c59c37c6c691cabe13c83adc5dc2cff66ea293f198297ab9a9de30aa68ca72bd9c4

data/.env.example CHANGED Viewed

@@ -3,6 +3,7 @@ COHERE_API_KEY=
 HUGGING_FACE_API_KEY=
 MILVUS_URL=
 OPENAI_API_KEY=
+GOOGLE_PALM_API_KEY=
 PINECONE_API_KEY=
 PINECONE_ENVIRONMENT=
 REPLICATE_API_KEY=

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,16 @@
 ## [Unreleased]
+## [0.3.12] - 2023-05-25
+- 🔍 Vectorsearch
+  - Introduce namespace support for Pinecone
+- 🚚 Loaders
+  - Loaders overhaul
+## [0.3.11] - 2023-05-23
+- 🗣️ LLMs
+  - Introducing support for Google PaLM (Pathways Language Model)
+- Bug fixes and improvements
 ## [0.3.10] - 2023-05-19
 - 🗣️ LLMs
   - Introducing support for Replicate.com

data/Gemfile CHANGED Viewed

@@ -10,7 +10,3 @@ gem "rake", "~> 13.0"
 gem "rspec", "~> 3.0"
 gem "standardrb"
-# TODO: Fix this `faraday` issue where some gems are using 1.x and others are using 2.x
-# Most likely everything will just need to be updated to `faraday 2.x`
-gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"

data/Gemfile.lock CHANGED Viewed

@@ -1,18 +1,7 @@
-GIT
-  remote: https://github.com/andreibondarev/replicate-ruby.git
-  revision: 6276dec02ce33ae68a57cdd362eb8e87ed29c8e6
-  branch: faraday-1.x
-  specs:
-    replicate-ruby (0.2.1)
-      addressable
-      faraday (>= 1.0)
-      faraday-multipart
-      faraday-retry
 PATH
   remote: .
   specs:
-    langchainrb (0.3.10)
+    langchainrb (0.3.12)
 GEM
   remote: https://rubygems.org/
@@ -128,6 +117,9 @@ GEM
     faraday-retry (1.0.3)
     faraday_middleware (1.2.0)
       faraday (~> 1.0)
+    google_palm_api (0.1.0)
+      faraday (>= 1.0.0)
+      faraday_middleware (>= 1.0.0)
     google_search_results (2.0.1)
     graphlient (0.6.0)
       faraday (>= 1.0)
@@ -208,6 +200,11 @@ GEM
     rainbow (3.1.1)
     rake (13.0.6)
     regexp_parser (2.8.0)
+    replicate-ruby (0.2.2)
+      addressable
+      faraday (>= 1.0)
+      faraday-multipart
+      faraday-retry
     rexml (3.2.5)
     rspec (3.12.0)
       rspec-core (~> 3.12.0)
@@ -274,6 +271,7 @@ GEM
     zeitwerk (2.6.8)
 PLATFORMS
+  arm64-darwin-21
   arm64-darwin-22
   x86_64-darwin-19
   x86_64-darwin-22
@@ -285,16 +283,18 @@ DEPENDENCIES
   docx (~> 0.8.0)
   dotenv-rails (~> 2.7.6)
   eqn (~> 1.6.5)
+  google_palm_api (~> 0.1.0)
   google_search_results (~> 2.0.0)
   hugging-face (~> 0.3.4)
   langchainrb!
   milvus (~> 0.9.0)
+  nokogiri (~> 1.13)
   pdf-reader (~> 1.4)
   pinecone (~> 0.1.6)
   pry-byebug (~> 3.10.0)
   qdrant-ruby (~> 0.9.0)
   rake (~> 13.0)
-  replicate-ruby!
+  replicate-ruby (~> 0.2.2)
   rspec (~> 3.0)
   ruby-openai (~> 4.0.0)
   standardrb

data/README.md CHANGED Viewed

@@ -136,11 +136,17 @@ cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
 ```
 #### Replicate
-Add `gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"` to your Gemfile.
+Add `gem "replicate-ruby", "~> 0.2.2"` to your Gemfile.
 ```ruby
 cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
 ```
+#### Google PaLM (Pathways Language Model)
+Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
+```ruby
+google_palm = LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
+```
 ### Using Prompts 📋
 #### Prompt Templates
@@ -262,11 +268,28 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
 Need to read data from various sources? Load it up.
-| Name | Class         | Gem Requirements             |
-| ---- | ------------- | :--------------------------: |
-| docx | Loaders::Docx | `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` |
-| pdf  | Loaders::PDF  | `gem "pdf-reader", "~> 1.4"` |
-| text | Loaders::Text |                              |
+##### Usage
+Just call `Langchan::Loader.load` with the path to the file or a URL you want to load.
+```ruby
+Langchaing::Loader.load('/path/to/file.pdf')
+```
+or
+```ruby
+Langchain::Loader.load('https://www.example.com/file.pdf')
+```
+##### Supported Formats
+| Format | Pocessor         |       Gem Requirements       |
+| ------ | ---------------- | :--------------------------: |
+| docx   | Processors::Docx |   `gem "docx", "~> 0.8.0"`   |
+| html   | Processors::HTML | `gem "nokogiri", "~> 1.13"`  |
+| pdf    | Processors::PDF  | `gem "pdf-reader", "~> 1.4"` |
+| text   | Processors::Text |                              |
 ## Examples
 Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
@@ -284,7 +307,7 @@ Langchain.logger.level = :info
 1. `git clone https://github.com/andreibondarev/langchainrb.git`
 2. `cp .env.example .env`, then fill out the environment variables in `.env`
-3. `rspec spec/` to ensure that the tests pass
+3. `bundle exec rake` to ensure that the tests pass and to run standardrb
 4. `bin/console` to load the gem in a REPL session. Feel free to add your own instances of LLMs, Tools, Agents, etc. and experiment with them.
 ## Core Contributors

data/examples/pdf_store_and_query_with_chroma.rb CHANGED Viewed

@@ -26,8 +26,8 @@ docs = [
 ]
 # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
-chroma.add_texts(
-  texts: docs
+chroma.add_data(
+  paths: docs
 )
 # Query your data

data/lib/dependency_helper.rb CHANGED Viewed

@@ -1,12 +1,14 @@
 # frozen_string_literal: true
+VersionError = Class.new(ScriptError)
 # This method requires and loads the given gem, and then checks to see if the version of the gem meets the requirements listed in `langchain.gemspec`
 # This solution was built to avoid auto-loading every single gem in the Gemfile when the developer will mostly likely be only using a few of them.
 #
 # @param gem_name [String] The name of the gem to load
 # @return [Boolean] Whether or not the gem was loaded successfully
 # @raise [LoadError] If the gem is not installed
-# @raise [LoadError] If the gem is installed, but the version does not meet the requirements
+# @raise [VersionError] If the gem is installed, but the version does not meet the requirements
 #
 def depends_on(gem_name)
   gem(gem_name) # require the gem
@@ -14,10 +16,12 @@ def depends_on(gem_name)
   return(true) unless defined?(Bundler) # If we're in a non-bundler environment, we're no longer able to determine if we'll meet requirements
   gem_version = Gem.loaded_specs[gem_name].version
-  gem_requirement = Bundler.load.dependencies.find { |g| g.name == gem_name }.requirement
+  gem_requirement = Bundler.load.dependencies.find { |g| g.name == gem_name }&.requirement
+  raise LoadError unless gem_requirement
-  if !gem_requirement.satisfied_by?(gem_version)
-    raise "The #{gem_name} gem is installed, but version #{gem_requirement} is required. You have #{gem_version}."
+  unless gem_requirement.satisfied_by?(gem_version)
+    raise VersionError, "The #{gem_name} gem is installed, but version #{gem_requirement} is required. You have #{gem_version}."
   end
   true

data/lib/langchain/loader.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+require "open-uri"
+module Langchain
+  class Loader
+    class FileNotFound < StandardError; end
+    class UnknownFormatError < StandardError; end
+    URI_REGEX = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
+    # Load data from a file or url
+    # Equivalent to Langchain::Loader.new(path).load
+    # @param path [String | Pathname] path to file or url
+    # @return [String] file content
+    def self.load(path)
+      new(path).load
+    end
+    # Initialize Langchain::Loader
+    # @param path [String | Pathname] path to file or url
+    # @return [Langchain::Loader] loader instance
+    def initialize(path)
+      @path = path
+    end
+    # Check if path is url
+    # @return [Boolean] true if path is url
+    def url?
+      return false if @path.is_a?(Pathname)
+      !!(@path =~ URI_REGEX)
+    end
+    # Load data from a file or url
+    # @return [String] file content
+    def load
+      url? ? from_url(@path) : from_path(@path)
+    end
+    private
+    def from_url(url)
+      process do
+        data = URI.parse(url).open
+        processor = find_processor(:CONTENT_TYPES, data.content_type)
+        [data, processor]
+      end
+    end
+    def from_path(path)
+      raise FileNotFound unless File.exist?(path)
+      process do
+        [File.open(path), find_processor(:EXTENSIONS, File.extname(path))]
+      end
+    end
+    def process(&block)
+      data, processor = yield
+      raise UnknownFormatError unless processor
+      Langchain::Processors.const_get(processor).new.parse(data)
+    end
+    def find_processor(constant, value)
+      processors.find { |klass| processor_matches? "#{klass}::#{constant}", value }
+    end
+    def processor_matches?(constant, value)
+      Langchain::Processors.const_get(constant).include?(value)
+    end
+    def processors
+      Langchain::Processors.constants
+    end
+  end
+end

data/lib/langchain/processors/base.rb ADDED Viewed

@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class Base
+      EXTENSIONS = []
+      CONTENT_TYPES = []
+      def parse(data)
+        raise NotImplementedError
+      end
+    end
+  end
+end

data/lib/langchain/processors/docx.rb ADDED Viewed

@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class Docx < Base
+      EXTENSIONS = [".docx"]
+      CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]
+      def initialize
+        depends_on "docx"
+        require "docx"
+      end
+      # Parse the document and return the text
+      # @param [File] data
+      # @return [String]
+      def parse(data)
+        ::Docx::Document
+          .open(StringIO.new(data.read))
+          .text
+      end
+    end
+  end
+end

data/lib/langchain/processors/html.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class HTML < Base
+      EXTENSIONS = [".html", ".htm"]
+      CONTENT_TYPES = ["text/html"]
+      # We only look for headings and paragraphs
+      TEXT_CONTENT_TAGS = %w[h1 h2 h3 h4 h5 h6 p]
+      def initialize
+        depends_on "nokogiri"
+        require "nokogiri"
+      end
+      # Parse the document and return the text
+      # @param [File] data
+      # @return [String]
+      def parse(data)
+        Nokogiri::HTML(data.read)
+          .css(TEXT_CONTENT_TAGS.join(","))
+          .map(&:inner_text)
+          .join("\n\n")
+      end
+    end
+  end
+end

data/lib/langchain/processors/pdf.rb ADDED Viewed

@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class PDF < Base
+      EXTENSIONS = [".pdf"]
+      CONTENT_TYPES = ["application/pdf"]
+      def initialize
+        depends_on "pdf-reader"
+        require "pdf-reader"
+      end
+      # Parse the document and return the text
+      # @param [File] data
+      # @return [String]
+      def parse(data)
+        ::PDF::Reader
+          .new(StringIO.new(data.read))
+          .pages
+          .map(&:text)
+          .join("\n\n")
+      end
+    end
+  end
+end

data/lib/langchain/processors/text.rb ADDED Viewed

@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class Text < Base
+      EXTENSIONS = [".txt"]
+      CONTENT_TYPES = ["text/plain"]
+      # Parse the document and return the text
+      # @param [File] data
+      # @return [String]
+      def parse(data)
+        data.read
+      end
+    end
+  end
+end

data/lib/langchain.rb CHANGED Viewed

@@ -1,12 +1,12 @@
 # frozen_string_literal: true
 require "logger"
+require "pathname"
 require_relative "./version"
 require_relative "./dependency_helper"
 module Langchain
   class << self
-    attr_accessor :default_loaders
     attr_accessor :logger
     attr_reader :root
@@ -15,6 +15,16 @@ module Langchain
   @logger ||= ::Logger.new($stdout, level: :warn, formatter: ->(severity, datetime, progname, msg) { "[LangChain.rb] #{msg}\n" })
   @root = Pathname.new(__dir__)
+  autoload :Loader, "langchain/loader"
+  module Processors
+    autoload :Base, "langchain/processors/base"
+    autoload :PDF, "langchain/processors/pdf"
+    autoload :HTML, "langchain/processors/html"
+    autoload :Text, "langchain/processors/text"
+    autoload :Docx, "langchain/processors/docx"
+  end
 end
 module Agent
@@ -34,6 +44,7 @@ end
 module LLM
   autoload :Base, "llm/base"
   autoload :Cohere, "llm/cohere"
+  autoload :GooglePalm, "llm/google_palm"
   autoload :HuggingFace, "llm/hugging_face"
   autoload :OpenAI, "llm/openai"
   autoload :Replicate, "llm/replicate"
@@ -53,15 +64,3 @@ module Tool
   autoload :SerpApi, "tool/serp_api"
   autoload :Wikipedia, "tool/wikipedia"
 end
-module Loaders
-  autoload :Base, "loaders/base"
-  autoload :Docx, "loaders/docx"
-  autoload :PDF, "loaders/pdf"
-  autoload :Text, "loaders/text"
-end
-autoload :Loader, "loader"
-# Load the default Loaders
-Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF, ::Loaders::Docx]

data/lib/langchainrb.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require "langchain"

data/lib/llm/base.rb CHANGED Viewed

@@ -7,9 +7,10 @@ module LLM
     # Currently supported LLMs
     # TODO: Add support for HuggingFace and other LLMs
     LLMS = {
-      openai: "OpenAI",
       cohere: "Cohere",
+      google_palm: "GooglePalm",
       huggingface: "HuggingFace",
+      openai: "OpenAI",
       replicate: "Replicate"
     }.freeze

data/lib/llm/google_palm.rb ADDED Viewed

@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+module LLM
+  class GooglePalm < Base
+    # Wrapper around the Google PaLM (Pathways Language Model) APIs.
+    DEFAULTS = {
+      temperature: 0.0,
+      dimension: 768 # This is what the `embedding-gecko-001` model generates
+    }.freeze
+    def initialize(api_key:)
+      depends_on "google_palm_api"
+      require "google_palm_api"
+      @client = ::GooglePalmApi::Client.new(api_key: api_key)
+    end
+    #
+    # Generate an embedding for a given text
+    #
+    # @param text [String] The text to generate an embedding for
+    # @return [Array] The embedding
+    #
+    def embed(text:)
+      response = client.embed(
+        text: text
+      )
+      response.dig("embedding", "value")
+    end
+    #
+    # Generate a completion for a given prompt
+    #
+    # @param prompt [String] The prompt to generate a completion for
+    # @return [String] The completion
+    #
+    def complete(prompt:, **params)
+      default_params = {
+        prompt: prompt,
+        temperature: DEFAULTS[:temperature]
+      }
+      if params[:stop_sequences]
+        default_params[:stop_sequences] = params.delete(:stop_sequences)
+      end
+      if params[:max_tokens]
+        default_params[:max_output_tokens] = params.delete(:max_tokens)
+      end
+      default_params.merge!(params)
+      response = client.generate_text(**default_params)
+      response.dig("candidates", 0, "output")
+    end
+    #
+    # Generate a chat completion for a given prompt
+    #
+    # @param prompt [String] The prompt to generate a chat completion for
+    # @return [String] The chat completion
+    #
+    def chat(prompt:, **params)
+      # TODO: Figure out how to introduce persisted conversations
+      default_params = {
+        prompt: prompt,
+        temperature: DEFAULTS[:temperature]
+      }
+      if params[:stop_sequences]
+        default_params[:stop] = params.delete(:stop_sequences)
+      end
+      if params[:max_tokens]
+        default_params[:max_output_tokens] = params.delete(:max_tokens)
+      end
+      default_params.merge!(params)
+      response = client.generate_chat_message(**default_params)
+      response.dig("candidates", 0, "content")
+    end
+  end
+end

data/lib/prompt/base.rb CHANGED Viewed

@@ -66,6 +66,8 @@ module Prompt
     # contained within the template. Input variables are defined as text enclosed in
     # curly braces (e.g. "{variable_name}").
     #
+    # Content within two consecutive curly braces (e.g. "{{ignore_me}}) are ignored.
+    #
     # @param template [String] The template string to extract variables from.
     #
     # @return [Array<String>] An array of input variable names.
@@ -74,9 +76,9 @@ module Prompt
       input_variables = []
       scanner = StringScanner.new(template)
-      while scanner.scan_until(/\{([^{}]*)\}/)
+      while scanner.scan_until(/\{([^}]*)\}/)
         variable = scanner[1].strip
-        input_variables << variable unless variable.empty?
+        input_variables << variable unless variable.empty? || variable[0] == "{"
       end
       input_variables

data/lib/prompt/prompt_template.rb CHANGED Viewed

@@ -20,7 +20,7 @@ module Prompt
     end
     #
-    # Format the prompt with the inputs.
+    # Format the prompt with the inputs. Double {{}} replaced with single {} to adhere to f-string spec.
     #
     # @param kwargs [Hash] Any arguments to be passed to the prompt template.
     # @return [String] A formatted string.
@@ -28,7 +28,7 @@ module Prompt
     def format(**kwargs)
       result = @template
       kwargs.each { |key, value| result = result.gsub(/\{#{key}\}/, value.to_s) }
-      result
+      result.gsub(/{{/, "{").gsub(/}}/, "}")
     end
     #

data/lib/vectorsearch/base.rb CHANGED Viewed

@@ -19,8 +19,6 @@ module Vectorsearch
       @llm_api_key = llm_api_key
       @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
-      @loaders = Langchain.default_loaders
     end
     # Method supported by Vectorsearch DB to create a default schema
@@ -74,18 +72,12 @@ module Vectorsearch
       raise ArgumentError, "Either path or paths must be provided" if path.nil? && paths.nil?
       raise ArgumentError, "Either path or paths must be provided, not both" if !path.nil? && !paths.nil?
-      texts =
-        Loader
-          .with(*loaders)
-          .load(path || paths)
+      texts = Array(path || paths)
+        .flatten
+        .map { |path| Langchain::Loader.new(path)&.load }
+        .compact
       add_texts(texts: texts)
     end
-    attr_reader :loaders
-    def add_loader(*loaders)
-      loaders.each { |loader| @loaders << loader }
-    end
   end
 end

data/lib/vectorsearch/pinecone.rb CHANGED Viewed

@@ -25,20 +25,22 @@ module Vectorsearch
     # Add a list of texts to the index
     # @param texts [Array] The list of texts to add
+    # @param namespace [String] The namespace to add the texts to
+    # @param metadata [Hash] The metadata to use for the texts
     # @return [Hash] The response from the server
-    def add_texts(texts:)
+    def add_texts(texts:, namespace: "", metadata: nil)
       vectors = texts.map do |text|
         {
           # TODO: Allows passing in your own IDs
           id: SecureRandom.uuid,
-          metadata: {content: text},
+          metadata: metadata || {content: text},
           values: llm_client.embed(text: text)
         }
       end
       index = client.index(index_name)
-      index.upsert(vectors: vectors)
+      index.upsert(vectors: vectors, namespace: namespace)
     end
     # Create the index with the default schema
@@ -54,40 +56,54 @@ module Vectorsearch
     # Search for similar texts
     # @param query [String] The text to search for
     # @param k [Integer] The number of results to return
+    # @param namespace [String] The namespace to search in
+    # @param filter [String] The filter to use
     # @return [Array] The list of results
     def similarity_search(
       query:,
-      k: 4
+      k: 4,
+      namespace: "",
+      filter: nil
     )
       embedding = llm_client.embed(text: query)
       similarity_search_by_vector(
         embedding: embedding,
-        k: k
+        k: k,
+        namespace: namespace,
+        filter: filter
       )
     end
     # Search for similar texts by embedding
     # @param embedding [Array] The embedding to search for
     # @param k [Integer] The number of results to return
+    # @param namespace [String] The namespace to search in
+    # @param filter [String] The filter to use
     # @return [Array] The list of results
-    def similarity_search_by_vector(embedding:, k: 4)
+    def similarity_search_by_vector(embedding:, k: 4, namespace: "", filter: nil)
       index = client.index(index_name)
-      response = index.query(
+      query_params = {
         vector: embedding,
+        namespace: namespace,
+        filter: filter,
         top_k: k,
         include_values: true,
         include_metadata: true
-      )
+      }.compact
+      response = index.query(query_params)
       response.dig("matches")
     end
     # Ask a question and return the answer
     # @param question [String] The question to ask
+    # @param namespace [String] The namespace to search in
+    # @param filter [String] The filter to use
     # @return [String] The answer to the question
-    def ask(question:)
-      search_results = similarity_search(query: question)
+    def ask(question:, namespace: "", filter: nil)
+      search_results = similarity_search(query: question, namespace: namespace, filter: filter)
       context = search_results.map do |result|
         result.dig("metadata").to_s

data/lib/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.3.10"
+  VERSION = "0.3.12"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.3.10
+  version: 0.3.12
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-05-21 00:00:00.000000000 Z
+date: 2023-05-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dotenv-rails
@@ -94,6 +94,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 1.6.5
+- !ruby/object:Gem::Dependency
+  name: google_palm_api
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.1.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.1.0
 - !ruby/object:Gem::Dependency
   name: google_search_results
   requirement: !ruby/object:Gem::Requirement
@@ -136,6 +150,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 0.9.0
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.13'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.13'
 - !ruby/object:Gem::Dependency
   name: pdf-reader
   requirement: !ruby/object:Gem::Requirement
@@ -168,16 +196,16 @@ dependencies:
   name: replicate-ruby
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.2.2
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.2.2
 - !ruby/object:Gem::Dependency
   name: qdrant-ruby
   requirement: !ruby/object:Gem::Requirement
@@ -260,16 +288,19 @@ files:
 - lib/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
 - lib/dependency_helper.rb
 - lib/langchain.rb
+- lib/langchain/loader.rb
+- lib/langchain/processors/base.rb
+- lib/langchain/processors/docx.rb
+- lib/langchain/processors/html.rb
+- lib/langchain/processors/pdf.rb
+- lib/langchain/processors/text.rb
+- lib/langchainrb.rb
 - lib/llm/base.rb
 - lib/llm/cohere.rb
+- lib/llm/google_palm.rb
 - lib/llm/hugging_face.rb
 - lib/llm/openai.rb
 - lib/llm/replicate.rb
-- lib/loader.rb
-- lib/loaders/base.rb
-- lib/loaders/docx.rb
-- lib/loaders/pdf.rb
-- lib/loaders/text.rb
 - lib/prompt/base.rb
 - lib/prompt/few_shot_prompt_template.rb
 - lib/prompt/loading.rb

data/lib/loader.rb DELETED Viewed

@@ -1,26 +0,0 @@
-module Loader
-  def self.with(*loaders)
-    LoaderSet.new(loaders)
-  end
-  class LoaderSet
-    def initialize(loaders)
-      @loaders = Array(loaders)
-    end
-    def load(*paths)
-      Array(paths)
-        .flatten
-        .map { |path| first_loadable_loader(path)&.load }
-        .compact
-    end
-    def first_loadable_loader(path)
-      @loaders
-        .each do |loader_klass|
-          loader_instance = loader_klass.new(path)
-          return(loader_instance) if loader_instance.loadable?
-        end
-    end
-  end
-end

data/lib/loaders/base.rb DELETED Viewed

@@ -1,19 +0,0 @@
-# frozen_string_literal: true
-# TODO: Add chunking options to the loaders
-module Loaders
-  class Base
-    def self.load(path)
-      new.load(path)
-    end
-    def initialize(path)
-      @path = path
-    end
-    def loadable?
-      raise NotImplementedError
-    end
-  end
-end

data/lib/loaders/docx.rb DELETED Viewed

@@ -1,34 +0,0 @@
-# frozen_string_literal: true
-module Loaders
-  class Docx < Base
-    #
-    # This Loader parses Docx files into text.
-    # If you'd like to use it directly you can do so like this:
-    # Loaders::Docx.new("path/to/my.docx").load
-    #
-    # This parser is also invoked when you're adding data to a Vectorsearch DB:
-    # qdrant = Vectorsearch::Qdrant.new(...)
-    # path = Langchain.root.join("path/to/my.docx")
-    # qdrant.add_data(path: path)
-    #
-    def initialize(path)
-      depends_on "docx"
-      require "docx"
-      @path = path
-    end
-    # Check that the file is a `.docx` file
-    def loadable?
-      @path.to_s.end_with?(".docx")
-    end
-    def load
-      ::Docx::Document
-        .open(@path.to_s)
-        .text
-    end
-  end
-end

data/lib/loaders/pdf.rb DELETED Viewed

@@ -1,36 +0,0 @@
-# frozen_string_literal: true
-module Loaders
-  class PDF < Base
-    #
-    # This Loader parses PDF files into text.
-    # If you'd like to use it directly you can do so like this:
-    # Loaders::PDF.new("path/to/my.pdf").load
-    #
-    # This parser is also invoked when you're adding data to a Vectorsearch DB:
-    # qdrant = Vectorsearch::Qdrant.new(...)
-    # path = Langchain.root.join("path/to/my.pdf")
-    # qdrant.add_data(path: path)
-    #
-    def initialize(path)
-      depends_on "pdf-reader"
-      require "pdf-reader"
-      @path = path
-    end
-    # Check that the file is a PDF file
-    def loadable?
-      @path.to_s.end_with?(".pdf")
-    end
-    def load
-      ::PDF::Reader
-        .new(@path)
-        .pages
-        .map(&:text)
-        .join("\n\n")
-    end
-  end
-end

data/lib/loaders/text.rb DELETED Viewed

@@ -1,24 +0,0 @@
-# frozen_string_literal: true
-module Loaders
-  class Text < Base
-    #
-    # This Loader parses .txt files.
-    # If you'd like to use it directly you can do so like this:
-    # Loaders::Text.new("path/to/my.txt").load
-    #
-    # This parser is also invoked when you're adding data to a Vectorsearch DB:
-    # qdrant = Vectorsearch::Qdrant.new(...)
-    # path = Langchain.root.join("path/to/my.txt")
-    # qdrant.add_data(path: path)
-    #
-    def loadable?
-      @path.to_s.end_with?(".txt")
-    end
-    def load
-      @path.read
-    end
-  end
-end