RubyGems - langchainrb - Versions diffs - 0.3.6 → 0.3.8 - Mend

langchainrb 0.3.6 → 0.3.8

Files changed (28) hide show

checksums.yaml +4 -4
data/.env.example +12 -0
data/CHANGELOG.md +19 -11
data/Gemfile.lock +23 -1
data/README.md +36 -14
data/examples/pdf_store_and_query_with_chroma.rb +36 -0
data/lib/agent/chain_of_thought_agent/chain_of_thought_agent.rb +2 -2
data/lib/dependency_helper.rb +8 -0
data/lib/langchain.rb +26 -1
data/lib/llm/base.rb +15 -0
data/lib/llm/cohere.rb +4 -2
data/lib/llm/hugging_face.rb +1 -1
data/lib/llm/openai.rb +21 -2
data/lib/loader.rb +26 -0
data/lib/loaders/base.rb +17 -0
data/lib/loaders/pdf.rb +34 -0
data/lib/loaders/text.rb +22 -0
data/lib/tool/base.rb +1 -2
data/lib/vectorsearch/base.rb +39 -8
data/lib/vectorsearch/chroma.rb +105 -0
data/lib/vectorsearch/milvus.rb +4 -8
data/lib/vectorsearch/pinecone.rb +3 -3
data/lib/vectorsearch/qdrant.rb +4 -4
data/lib/vectorsearch/weaviate.rb +2 -2
data/lib/version.rb +1 -1
metadata +37 -4
data/examples/.keep +0 -0
data/lib/logging.rb +0 -13

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 576c331bc4372bc1934a6a75fc496469242d0b645b56ffb9163cd9f812007414
-  data.tar.gz: 82a9ea6c734cab1490fbe8060732c41c4aba6e3d06d6895ea40e9f2f5db2f6ac
+  metadata.gz: 6b208f5fc51ce342bd7ffcfb776487452a40fb0505e4fa6a6b371e0db1d2a278
+  data.tar.gz: 8551edf0406827f92026c8fde54b3b27f32727dec6381f5a33cd58c9c39d40a5
 SHA512:
-  metadata.gz: ce0325a59c2257e35c0be5e3e78ad44d046a058070585ba8127b97338c4280d4897f0dd070b16b9bea9cc0f5f7cd7f6b330611bb49f6180ecb0dafc55bb77d16
-  data.tar.gz: f38012ae7d0da8c70d76f37f2a24431e709fe0841e62812f1471a1b9a2b33763235c4eb246bb6f20d61e50e903c137156abb851a25b4d46e4b7fd85333f2dfe2
+  metadata.gz: 0d0d10e84dd47b768979e4f004e9026aac48c45ed5e15ffe499dc0fc9679e806408cc5688cdbd06931e7f63e8840dbb33b5ad7f58ca311eb05a4528757fc9581
+  data.tar.gz: 8723656cefc802cdd4464d24f452a858a1315e654d64d1c256cab9e1de5297c1de0950a4a625278fe33aa8f149db698878bfe608cd06051bc0f8eb8c5abb22f3

data/.env.example ADDED Viewed

@@ -0,0 +1,12 @@
+CHROMA_URL=
+COHERE_API_KEY=
+HUGGING_FACE_API_KEY=
+MILVUS_URL=
+OPENAI_API_KEY=
+PINECONE_API_KEY=
+PINECONE_ENVIRONMENT=
+QDRANT_API_KEY=
+QDRANT_URL=
+SERPAPI_API_KEY=
+WEAVIATE_API_KEY=
+WEAVIATE_URL=

data/CHANGELOG.md CHANGED Viewed

@@ -1,14 +1,26 @@
 ## [Unreleased]
+## [0.3.8] - 2023-05-19
+- 🔍 Vectorsearch
+  - Introduce support for Chroma DB
+- 🚚 Loaders
+  - Bug fix `Loaders::Text` to only parse .txt files
+## [0.3.7] - 2023-05-19
+- 🚚 Loaders
+  - Introduce `Loaders::Text` to parse .txt files
+  - Introduce `Loaders::PDF` to parse .pdf files
 ## [0.3.6] - 2023-05-17
-- LLMs
+- 🗣️ LLMs
   - Bump `hugging-face` gem version
 ## [0.3.5] - 2023-05-16
 - Bug fixes
 ## [0.3.4] - 2023-05-16
-- LLMs
+- 🗣️ LLMs
   - Introducing support for HuggingFace
 ## [0.3.3] - 2023-05-16
@@ -17,32 +29,28 @@
 - Use the Ruby logger
 ## [0.3.2] - 2023-05-15
-- Agents
+- 🤖 Agents
   - Fix Chain of Thought prompt loader
 ## [0.3.1] - 2023-05-12
-- Tools
+- 🛠️ Tools
   - Introducing `Tool::Wikipedia`, a tool that looks up Wikipedia entries
 ## [0.3.0] - 2023-05-12
-- Agents
+- 🤖 Agents
   - Introducing `Agent::ChainOfThoughtAgent`, a semi-autonomous bot that uses Tools to retrieve additional information in order to make best-effort informed replies to user's questions.
-- Tools
+- 🛠️ Tools
   - Introducing `Tool::Calculator` tool that solves mathematical expressions.
   - Introducing `Tool::Search` tool that executes Google Searches.
 ## [0.2.0] - 2023-05-09
-- Prompt Templating
+- 📋 Prompt Templating
   - Ability to create prompt templates and save them to JSON files
   - Default `Prompt::FewShotPromptTemplate`
   - New examples added to `examples/`
 ## [0.1.4] - 2023-05-02
 - Backfilling missing specs
 ## [0.1.3] - 2023-05-01
 - Initial release

data/Gemfile.lock CHANGED Viewed

@@ -1,11 +1,12 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.3.6)
+    langchainrb (0.3.8)
 GEM
   remote: https://rubygems.org/
   specs:
+    Ascii85 (1.0.3)
     actionpack (7.0.4.3)
       actionview (= 7.0.4.3)
       activesupport (= 7.0.4.3)
@@ -26,9 +27,13 @@ GEM
       tzinfo (~> 2.0)
     addressable (2.8.4)
       public_suffix (>= 2.0.2, < 6.0)
+    afm (0.2.2)
     ast (2.4.2)
     builder (3.2.4)
     byebug (11.1.3)
+    chroma-db (0.3.0)
+      dry-monads (~> 1.6)
+      ruby-next-core (>= 0.15.0)
     coderay (1.1.3)
     cohere-ruby (0.9.3)
       faraday (~> 1)
@@ -52,6 +57,10 @@ GEM
       concurrent-ruby (~> 1.0)
       dry-core (~> 1.0, < 2)
       zeitwerk (~> 2.6)
+    dry-monads (1.6.0)
+      concurrent-ruby (~> 1.0)
+      dry-core (~> 1.0, < 2)
+      zeitwerk (~> 2.6)
     dry-schema (1.13.1)
       concurrent-ruby (~> 1.0)
       dry-configurable (~> 1.0, >= 1.0.1)
@@ -114,6 +123,7 @@ GEM
     graphql-client (0.18.0)
       activesupport (>= 3.0)
       graphql
+    hashery (2.1.2)
     httparty (0.21.0)
       mini_mime (>= 1.0.0)
       multi_xml (>= 0.5.2)
@@ -144,6 +154,12 @@ GEM
     parallel (1.23.0)
     parser (3.2.2.1)
       ast (~> 2.4.1)
+    pdf-reader (1.4.1)
+      Ascii85 (~> 1.0.0)
+      afm (~> 0.2.1)
+      hashery (~> 2.0)
+      ruby-rc4
+      ttfunk
     pinecone (0.1.71)
       dry-struct (~> 1.6.0)
       dry-validation (~> 1.10.0)
@@ -207,10 +223,12 @@ GEM
     rubocop-performance (1.16.0)
       rubocop (>= 1.7.0, < 2.0)
       rubocop-ast (>= 0.4.0)
+    ruby-next-core (0.15.3)
     ruby-openai (4.0.0)
       faraday (>= 1)
       faraday-multipart (>= 1)
     ruby-progressbar (1.13.0)
+    ruby-rc4 (0.1.5)
     ruby2_keywords (0.0.5)
     standard (1.28.2)
       language_server-protocol (~> 3.17.0.2)
@@ -228,6 +246,7 @@ GEM
     thor (1.2.1)
     treetop (1.6.12)
       polyglot (~> 0.3)
+    ttfunk (1.7.0)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
     unicode-display_width (2.4.2)
@@ -242,9 +261,11 @@ GEM
 PLATFORMS
   arm64-darwin-22
   x86_64-darwin-19
+  x86_64-darwin-22
   x86_64-linux
 DEPENDENCIES
+  chroma-db (~> 0.3.0)
   cohere-ruby (~> 0.9.3)
   dotenv-rails (~> 2.7.6)
   eqn (~> 1.6.5)
@@ -252,6 +273,7 @@ DEPENDENCIES
   hugging-face (~> 0.3.3)
   langchainrb!
   milvus (~> 0.9.0)
+  pdf-reader (~> 1.4)
   pinecone (~> 0.1.6)
   pry-byebug (~> 3.10.0)
   qdrant-ruby (~> 0.9.0)

data/README.md CHANGED Viewed

@@ -30,10 +30,11 @@ require "langchain"
 | Database | Querying           | Storage | Schema Management | Backups | Rails Integration | ??? |
 | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
-| Weaviate | :white_check_mark: | WIP     | WIP               | WIP     |                   |     |
-| Qdrant   | :white_check_mark: | WIP     | WIP               | WIP     |                   |     |
-| Milvus   | :white_check_mark: | WIP     | WIP               | WIP     |                   |     |
-| Pinecone | :white_check_mark: | WIP     | WIP               | WIP     |                   |     |
+| Chroma   | :white_check_mark: | WIP     | WIP               | WIP     | WIP               |     |
+| Milvus   | :white_check_mark: | WIP     | WIP               | WIP     | WIP               |     |
+| Pinecone | :white_check_mark: | WIP     | WIP               | WIP     | WIP               |     |
+| Qdrant   | :white_check_mark: | WIP     | WIP               | WIP     | WIP               |     |
+| Weaviate | :white_check_mark: | WIP     | WIP               | WIP     | WIP               |     |
 ### Using Vector Search Databases 🔍
@@ -54,6 +55,7 @@ client = Vectorsearch::Weaviate.new(
 client = Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
 client = Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
 client = Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
+client = Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
 ```
 ```ruby
@@ -62,7 +64,7 @@ client.create_default_schema
 ```
 ```ruby
-# Store your documents in your vector search database
+# Store plain texts in your vector search database
 client.add_texts(
     texts: [
         "Begin by preheating your oven to 375°F (190°C). Prepare four boneless, skinless chicken breasts by cutting a pocket into the side of each breast, being careful not to cut all the way through. Season the chicken with salt and pepper to taste. In a large skillet, melt 2 tablespoons of unsalted butter over medium heat. Add 1 small diced onion and 2 minced garlic cloves, and cook until softened, about 3-4 minutes. Add 8 ounces of fresh spinach and cook until wilted, about 3 minutes. Remove the skillet from heat and let the mixture cool slightly.",
@@ -70,7 +72,13 @@ client.add_texts(
     ]
 )
 ```
+```ruby
+# Store the contents of your files in your vector search database
+my_pdf = Langchain.root.join("path/to/my.pdf")
+my_text = Langchain.root.join("path/to/my.txt")
+client.add_data(paths: [my_pdf, my_text])
+```
 ```ruby
 # Retrieve similar documents based on the query string passed in
 client.similarity_search(
@@ -233,12 +241,24 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
 #### Available Tools 🛠️
-| Name     | Description          | Requirements         |
-| -------- | :------------------: | :------------------: |
-| "calculator" | Useful for getting the result of a math expression | |
-| "search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key)
-| "wikipedia" | Calls Wikipedia API to retrieve the summary | |
+| Name         | Description                                        | ENV Requirements                                              | Gem Requirements                          |
+| ------------ | :------------------------------------------------: | :-----------------------------------------------------------: | :---------------------------------------: |
+| "calculator" | Useful for getting the result of a math expression |                                                               | `gem "eqn", "~> 1.6.5"`                   |
+| "search"     | A wrapper around Google Search                     | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |                      |
+| "wikipedia"  | Calls Wikipedia API to retrieve the summary        |                                                               | `gem "wikipedia-client", "~> 1.17.0"`     |
+#### Loaders 🚚
+Need to read data from various sources? Load it up.
+| Name | Class         | Gem Requirements             |
+| ---- | ------------- | :--------------------------: |
+| pdf  | Loaders::PDF  | `gem "pdf-reader", "~> 1.4"` |
+| text | Loaders::Text |                              |
+## Examples
+Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
 ## Logging
@@ -251,9 +271,10 @@ Langchain.logger.level = :info
 ## Development
-After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
-To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+1. `git clone https://github.com/andreibondarev/langchainrb.git`
+2. `cp .env.example .env`, then fill out the environment variables in `.env`
+3. `rspec spec/` to ensure that the tests pass
+4. `bin/console` to load the gem in a REPL session. Feel free to add your own instances of LLMs, Tools, Agents, etc. and experiment with them.
 ## Core Contributors
 [<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
@@ -261,8 +282,9 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
 ## Honorary Contributors
 [<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
 [<img style="border-radius:50%" alt="Rafael Figueiredo" src="https://avatars.githubusercontent.com/u/35845775?v=4" width="80" height="80" class="avatar">](https://github.com/rafaelqfigueiredo)
+[<img style="border-radius:50%" alt="Ricky Chilcott" src="https://avatars.githubusercontent.com/u/445759?v=4" width="80" height="80" class="avatar">](https://github.com/rickychilcott)
-(Criteria of becoming an Honorary Contributor or Core Contributor is pending...)
+(Criteria for becoming an Honorary Contributor or Core Contributor is pending...)
 ## Contributing

data/examples/pdf_store_and_query_with_chroma.rb ADDED Viewed

@@ -0,0 +1,36 @@
+require "langchain"
+# gem install chroma-db
+# or add `gem "chroma-db", "~> 0.3.0"` to your Gemfile
+# Instantiate the Chroma client
+chroma = Vectorsearch::Chroma.new(
+  url: ENV["CHROMA_URL"],
+  index_name: "documents",
+  llm: :openai,
+  llm_api_key: ENV["OPENAI_API_KEY"]
+)
+# Create the default schema.
+chroma.create_default_schema
+# Set up an array of PDF and TXT documents
+docs = [
+  Langchain.root.join("/docs/document.pdf"),
+  Langchain.root.join("/docs/document.txt")
+]
+# Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
+chroma.add_texts(
+  texts: docs
+)
+# Query your data
+chroma.similarity_search(
+  query: "..."
+)
+# Interact with your index through Q&A
+chroma.ask(
+  question: "..."
+)

data/lib/agent/chain_of_thought_agent/chain_of_thought_agent.rb CHANGED Viewed

@@ -43,7 +43,7 @@ module Agent
       loop do
         Langchain.logger.info("Agent: Passing the prompt to the #{llm} LLM")
-        response = llm_client.generate_completion(
+        response = llm_client.complete(
           prompt: prompt,
           stop_sequences: ["Observation:"],
           max_tokens: 500
@@ -100,7 +100,7 @@ module Agent
     # @return [PromptTemplate] PromptTemplate instance
     def prompt_template
       @template ||= Prompt.load_from_path(
-        file_path: Pathname.new(__dir__).join("chain_of_thought_agent_prompt.json")
+        file_path: Langchain.root.join("agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json")
       )
     end
   end

data/lib/dependency_helper.rb CHANGED Viewed

@@ -1,5 +1,13 @@
 # frozen_string_literal: true
+# This method requires and loads the given gem, and then checks to see if the version of the gem meets the requirements listed in `langchain.gemspec`
+# This solution was built to avoid auto-loading every single gem in the Gemfile when the developer will mostly likely be only using a few of them.
+#
+# @param gem_name [String] The name of the gem to load
+# @return [Boolean] Whether or not the gem was loaded successfully
+# @raise [LoadError] If the gem is not installed
+# @raise [LoadError] If the gem is installed, but the version does not meet the requirements
+#
 def depends_on(gem_name)
   gem(gem_name) # require the gem

data/lib/langchain.rb CHANGED Viewed

@@ -1,8 +1,21 @@
 # frozen_string_literal: true
+require "logger"
 require_relative "./version"
 require_relative "./dependency_helper"
-require_relative "./logging"
+module Langchain
+  class << self
+    attr_accessor :default_loaders
+    attr_accessor :logger
+    attr_reader :root
+  end
+  @logger ||= ::Logger.new($stdout, level: :warn, formatter: ->(severity, datetime, progname, msg) { "[LangChain.rb] #{msg}\n" })
+  @root = Pathname.new(__dir__)
+end
 module Agent
   autoload :Base, "agent/base"
@@ -11,6 +24,7 @@ end
 module Vectorsearch
   autoload :Base, "vectorsearch/base"
+  autoload :Chroma, "vectorsearch/chroma"
   autoload :Milvus, "vectorsearch/milvus"
   autoload :Pinecone, "vectorsearch/pinecone"
   autoload :Qdrant, "vectorsearch/qdrant"
@@ -38,3 +52,14 @@ module Tool
   autoload :SerpApi, "tool/serp_api"
   autoload :Wikipedia, "tool/wikipedia"
 end
+module Loaders
+  autoload :Base, "loaders/base"
+  autoload :PDF, "loaders/pdf"
+  autoload :Text, "loaders/text"
+end
+autoload :Loader, "loader"
+# Load the default Loaders
+Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF]

data/lib/llm/base.rb CHANGED Viewed

@@ -16,6 +16,21 @@ module LLM
       self.class.const_get(:DEFAULTS).dig(:dimension)
     end
+    # Method supported by an LLM that generates a response for a given chat-style prompt
+    def chat(...)
+      raise NotImplementedError, "#{self.class.name} does not support chat"
+    end
+    # Method supported by an LLM that completes a given prompt
+    def complete(...)
+      raise NotImplementedError, "#{self.class.name} does not support completion"
+    end
+    # Method supported by an LLM that generates an embedding for a given text or array of texts
+    def embed(...)
+      raise NotImplementedError, "#{self.class.name} does not support generating embeddings"
+    end
     # Ensure that the LLM value passed in is supported
     # @param llm [Symbol] The LLM to use
     def self.validate_llm!(llm:)

data/lib/llm/cohere.rb CHANGED Viewed

@@ -47,7 +47,9 @@ module LLM
       response.dig("generations").first.dig("text")
     end
-    alias_method :generate_completion, :complete
-    alias_method :generate_embedding, :embed
+    # Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
+    def chat(...)
+      complete(...)
+    end
   end
 end

data/lib/llm/hugging_face.rb CHANGED Viewed

@@ -25,7 +25,7 @@ module LLM
     # @param text [String] The text to embed
     # @return [Array] The embedding
     def embed(text:)
-      response = client.embedding(
+      client.embedding(
         input: text,
         model: DEFAULTS[:embeddings_model_name]
       )

data/lib/llm/openai.rb CHANGED Viewed

@@ -5,6 +5,7 @@ module LLM
     DEFAULTS = {
       temperature: 0.0,
       completion_model_name: "text-davinci-003",
+      chat_completion_model_name: "gpt-3.5-turbo",
       embeddings_model_name: "text-embedding-ada-002",
       dimension: 1536
     }.freeze
@@ -50,7 +51,25 @@ module LLM
       response.dig("choices", 0, "text")
     end
-    alias_method :generate_completion, :complete
-    alias_method :generate_embedding, :embed
+    # Generate a chat completion for a given prompt
+    # @param prompt [String] The prompt to generate a chat completion for
+    # @return [String] The chat completion
+    def chat(prompt:, **params)
+      default_params = {
+        model: DEFAULTS[:chat_completion_model_name],
+        temperature: DEFAULTS[:temperature],
+        # TODO: Figure out how to introduce persisted conversations
+        messages: [{role: "user", content: prompt}]
+      }
+      if params[:stop_sequences]
+        default_params[:stop] = params.delete(:stop_sequences)
+      end
+      default_params.merge!(params)
+      response = client.chat(parameters: default_params)
+      response.dig("choices", 0, "message", "content")
+    end
   end
 end

data/lib/loader.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Loader
+  def self.with(*loaders)
+    LoaderSet.new(loaders)
+  end
+  class LoaderSet
+    def initialize(loaders)
+      @loaders = Array(loaders)
+    end
+    def load(*paths)
+      Array(paths)
+        .flatten
+        .map { |path| first_loadable_loader(path)&.load }
+        .compact
+    end
+    def first_loadable_loader(path)
+      @loaders
+        .each do |loader_klass|
+          loader_instance = loader_klass.new(path)
+          return(loader_instance) if loader_instance.loadable?
+        end
+    end
+  end
+end

data/lib/loaders/base.rb ADDED Viewed

@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+module Loaders
+  class Base
+    def self.load(path)
+      new.load(path)
+    end
+    def initialize(path)
+      @path = path
+    end
+    def loadable?
+      raise NotImplementedError
+    end
+  end
+end

data/lib/loaders/pdf.rb ADDED Viewed

@@ -0,0 +1,34 @@
+module Loaders
+  class PDF < Base
+    #
+    # This Loader parses PDF files into text.
+    # If you'd like to use it directly you can do so like this:
+    # Loaders::PDF.new("path/to/my.pdf").load
+    #
+    # This parser is also invoked when you're adding data to a Vectorsearch DB:
+    # qdrant = Vectorsearch::Qdrant.new(...)
+    # path = Langchain.root.join("path/to/my.pdf")
+    # qdrant.add_data(path: path)
+    #
+    def initialize(path)
+      depends_on "pdf-reader"
+      require "pdf-reader"
+      @path = path
+    end
+    # Check that the file is a PDF file
+    def loadable?
+      @path.to_s.end_with?(".pdf")
+    end
+    def load
+      ::PDF::Reader
+        .new(@path)
+        .pages
+        .map(&:text)
+        .join("\n\n")
+    end
+  end
+end

data/lib/loaders/text.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module Loaders
+  class Text < Base
+    #
+    # This Loader parses .txt files.
+    # If you'd like to use it directly you can do so like this:
+    # Loaders::Text.new("path/to/my.txt").load
+    #
+    # This parser is also invoked when you're adding data to a Vectorsearch DB:
+    # qdrant = Vectorsearch::Qdrant.new(...)
+    # path = Langchain.root.join("path/to/my.txt")
+    # qdrant.add_data(path: path)
+    #
+    def loadable?
+      @path.to_s.end_with?(".txt")
+    end
+    def load
+      @path.read
+    end
+  end
+end

data/lib/tool/base.rb CHANGED Viewed

@@ -12,8 +12,7 @@ module Tool
     TOOLS = {
       "calculator" => "Tool::Calculator",
       "search" => "Tool::SerpApi",
-      "wikipedia" => "Tool::Wikipedia",
-      "news" => "Tool::News"
+      "wikipedia" => "Tool::Wikipedia"
     }
     def self.description(value)

data/lib/vectorsearch/base.rb CHANGED Viewed

@@ -19,24 +19,37 @@ module Vectorsearch
       @llm_api_key = llm_api_key
       @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
+      @loaders = Langchain.default_loaders
     end
+    # Method supported by Vectorsearch DB to create a default schema
     def create_default_schema
-      raise NotImplementedError
+      raise NotImplementedError, "#{self.class.name} does not support creating a default schema"
+    end
+    # Method supported by Vectorsearch DB to add a list of texts to the index
+    def add_texts(...)
+      raise NotImplementedError, "#{self.class.name} does not support adding texts"
     end
-    def add_texts(texts:)
-      raise NotImplementedError
+    # Method supported by Vectorsearch DB to search for similar texts in the index
+    def similarity_search(...)
+      raise NotImplementedError, "#{self.class.name} does not support similarity search"
     end
-    # NotImplementedError will be raised if the subclass does not implement this method
-    def ask(question:)
-      raise NotImplementedError
+    # Method supported by Vectorsearch DB to search for similar texts in the index by the passed in vector.
+    # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
+    def similarity_search_by_vector(...)
+      raise NotImplementedError, "#{self.class.name} does not support similarity search by vector"
+    end
+    # Method supported by Vectorsearch DB to answer a question given a context (data) pulled from your Vectorsearch DB.
+    def ask(...)
+      raise NotImplementedError, "#{self.class.name} does not support asking questions"
     end
     def_delegators :llm_client,
-      :generate_embedding,
-      :generate_completion,
       :default_dimension
     def generate_prompt(question:, context:)
@@ -56,5 +69,23 @@ module Vectorsearch
       prompt_template.format(question: question)
     end
+    def add_data(path: nil, paths: nil)
+      raise ArgumentError, "Either path or paths must be provided" if path.nil? && paths.nil?
+      raise ArgumentError, "Either path or paths must be provided, not both" if !path.nil? && !paths.nil?
+      texts =
+        Loader
+          .with(*loaders)
+          .load(path || paths)
+      add_texts(texts: texts)
+    end
+    attr_reader :loaders
+    def add_loader(*loaders)
+      loaders.each { |loader| @loaders << loader }
+    end
   end
 end

data/lib/vectorsearch/chroma.rb ADDED Viewed

@@ -0,0 +1,105 @@
+# frozen_string_literal: true
+module Vectorsearch
+  class Chroma < Base
+    # Initialize the Chroma client
+    # @param url [String] The URL of the Qdrant server
+    # @param api_key [String] The API key to use
+    # @param index_name [String] The name of the index to use
+    # @param llm [Symbol] The LLM to use
+    # @param llm_api_key [String] The API key for the LLM
+    def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
+      depends_on "chroma-db"
+      require "chroma-db"
+      ::Chroma.connect_host = url
+      ::Chroma.logger = Langchain.logger
+      ::Chroma.log_level = Langchain.logger.level
+      @index_name = index_name
+      super(llm: llm, llm_api_key: llm_api_key)
+    end
+    # Add a list of texts to the index
+    # @param texts [Array] The list of texts to add
+    # @return [Hash] The response from the server
+    def add_texts(texts:)
+      embeddings = Array(texts).map do |text|
+        ::Chroma::Resources::Embedding.new(
+          # TODO: Add support for passing your own IDs
+          id: SecureRandom.uuid,
+          embedding: llm_client.embed(text: text),
+          # TODO: Add support for passing metadata
+          metadata: [], # metadatas[index],
+          document: text # Do we actually need to store the whole original document?
+        )
+      end
+      collection = ::Chroma::Resources::Collection.get(index_name)
+      collection.add(embeddings)
+    end
+    # Create the collection with the default schema
+    # @return [Hash] The response from the server
+    def create_default_schema
+      ::Chroma::Resources::Collection.create(index_name)
+    end
+    # Search for similar texts
+    # @param query [String] The text to search for
+    # @param k [Integer] The number of results to return
+    # @return [Chroma::Resources::Embedding] The response from the server
+    def similarity_search(
+      query:,
+      k: 4
+    )
+      embedding = llm_client.embed(text: query)
+      similarity_search_by_vector(
+        embedding: embedding,
+        k: k
+      )
+    end
+    # Search for similar texts by embedding
+    # @param embedding [Array] The embedding to search for
+    # @param k [Integer] The number of results to return
+    # @return [Chroma::Resources::Embedding] The response from the server
+    def similarity_search_by_vector(
+      embedding:,
+      k: 4
+    )
+      # Requesting more results than the number of documents in the collection currently throws an error in Chroma DB
+      # Temporary fix inspired by this comment: https://github.com/chroma-core/chroma/issues/301#issuecomment-1520494512
+      count = collection.count
+      n_results = [count, k].min
+      collection.query(query_embeddings: [embedding], results: n_results)
+    end
+    # Ask a question and return the answer
+    # @param question [String] The question to ask
+    # @return [String] The answer to the question
+    def ask(question:)
+      search_results = similarity_search(query: question)
+      context = search_results.map do |result|
+        result.document
+      end
+      context = context.join("\n---\n")
+      prompt = generate_prompt(question: question, context: context)
+      llm_client.chat(prompt: prompt)
+    end
+    private
+    # @return [Chroma::Resources::Collection] The collection
+    def collection
+      @collection ||= ::Chroma::Resources::Collection.get(index_name)
+    end
+  end
+end

data/lib/vectorsearch/milvus.rb CHANGED Viewed

@@ -15,16 +15,16 @@ module Vectorsearch
     def add_texts(texts:)
       client.entities.insert(
         collection_name: index_name,
-        num_rows: texts.count,
+        num_rows: Array(texts).size,
         fields_data: [
           {
             field_name: "content",
             type: ::Milvus::DATA_TYPES["varchar"],
-            field: texts
+            field: Array(texts)
           }, {
             field_name: "vectors",
             type: ::Milvus::DATA_TYPES["binary_vector"],
-            field: texts.map { |text| generate_embedding(text: text) }
+            field: Array(texts).map { |text| llm_client.embed(text: text) }
           }
         ]
       )
@@ -69,7 +69,7 @@ module Vectorsearch
     end
     def similarity_search(query:, k: 4)
-      embedding = generate_embedding(text: query)
+      embedding = llm_client.embed(text: query)
       similarity_search_by_vector(
         embedding: embedding,
@@ -88,9 +88,5 @@ module Vectorsearch
         metric_type: "L2"
       )
     end
-    def ask(question:)
-      raise NotImplementedError
-    end
   end
 end

data/lib/vectorsearch/pinecone.rb CHANGED Viewed

@@ -32,7 +32,7 @@ module Vectorsearch
           # TODO: Allows passing in your own IDs
           id: SecureRandom.uuid,
           metadata: {content: text},
-          values: generate_embedding(text: text)
+          values: llm_client.embed(text: text)
         }
       end
@@ -59,7 +59,7 @@ module Vectorsearch
       query:,
       k: 4
     )
-      embedding = generate_embedding(text: query)
+      embedding = llm_client.embed(text: query)
       similarity_search_by_vector(
         embedding: embedding,
@@ -96,7 +96,7 @@ module Vectorsearch
       prompt = generate_prompt(question: question, context: context)
-      generate_completion(prompt: prompt)
+      llm_client.chat(prompt: prompt)
     end
   end
 end

data/lib/vectorsearch/qdrant.rb CHANGED Viewed

@@ -27,9 +27,9 @@ module Vectorsearch
     def add_texts(texts:)
       batch = {ids: [], vectors: [], payloads: []}
-      texts.each do |text|
+      Array(texts).each do |text|
         batch[:ids].push(SecureRandom.uuid)
-        batch[:vectors].push(generate_embedding(text: text))
+        batch[:vectors].push(llm_client.embed(text: text))
         batch[:payloads].push({content: text})
       end
@@ -59,7 +59,7 @@ module Vectorsearch
       query:,
       k: 4
     )
-      embedding = generate_embedding(text: query)
+      embedding = llm_client.embed(text: query)
       similarity_search_by_vector(
         embedding: embedding,
@@ -96,7 +96,7 @@ module Vectorsearch
       prompt = generate_prompt(question: question, context: context)
-      generate_completion(prompt: prompt)
+      llm_client.chat(prompt: prompt)
     end
   end
 end

data/lib/vectorsearch/weaviate.rb CHANGED Viewed

@@ -27,7 +27,7 @@ module Vectorsearch
     # @param texts [Array] The list of texts to add
     # @return [Hash] The response from the server
     def add_texts(texts:)
-      objects = texts.map do |text|
+      objects = Array(texts).map do |text|
         {
           class: index_name,
           properties: {content: text}
@@ -113,7 +113,7 @@ module Vectorsearch
         prompt = generate_prompt(question: question, context: context)
-        generate_completion(prompt: prompt)
+        llm_client.chat(prompt: prompt)
       end
     end
   end

data/lib/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.3.6"
+  VERSION = "0.3.8"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.3.6
+  version: 0.3.8
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-05-17 00:00:00.000000000 Z
+date: 2023-05-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dotenv-rails
@@ -52,6 +52,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 0.9.3
+- !ruby/object:Gem::Dependency
+  name: chroma-db
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.3.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.3.0
 - !ruby/object:Gem::Dependency
   name: eqn
   requirement: !ruby/object:Gem::Requirement
@@ -108,6 +122,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 0.9.0
+- !ruby/object:Gem::Dependency
+  name: pdf-reader
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.4'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.4'
 - !ruby/object:Gem::Dependency
   name: pinecone
   requirement: !ruby/object:Gem::Requirement
@@ -185,6 +213,7 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
+- ".env.example"
 - ".rspec"
 - CHANGELOG.md
 - Gemfile
@@ -192,9 +221,9 @@ files:
 - LICENSE.txt
 - README.md
 - Rakefile
-- examples/.keep
 - examples/create_and_manage_few_shot_prompt_templates.rb
 - examples/create_and_manage_prompt_templates.rb
+- examples/pdf_store_and_query_with_chroma.rb
 - examples/store_and_query_with_pinecone.rb
 - examples/store_and_query_with_qdrant.rb
 - examples/store_and_query_with_weaviate.rb
@@ -207,7 +236,10 @@ files:
 - lib/llm/cohere.rb
 - lib/llm/hugging_face.rb
 - lib/llm/openai.rb
-- lib/logging.rb
+- lib/loader.rb
+- lib/loaders/base.rb
+- lib/loaders/pdf.rb
+- lib/loaders/text.rb
 - lib/prompt/base.rb
 - lib/prompt/few_shot_prompt_template.rb
 - lib/prompt/loading.rb
@@ -217,6 +249,7 @@ files:
 - lib/tool/serp_api.rb
 - lib/tool/wikipedia.rb
 - lib/vectorsearch/base.rb
+- lib/vectorsearch/chroma.rb
 - lib/vectorsearch/milvus.rb
 - lib/vectorsearch/pinecone.rb
 - lib/vectorsearch/qdrant.rb

data/examples/.keep DELETED Viewed

File without changes

data/lib/logging.rb DELETED Viewed

@@ -1,13 +0,0 @@
-# frozen_string_literal: true
-require "logger"
-module Langchain
-  def self.logger
-    @@logger ||= Logger.new($stdout, level: :warn, formatter: ->(severity, datetime, progname, msg) { "[LangChain.rb] #{msg}\n" })
-  end
-  def self.logger=(instance)
-    @@logger = instance
-  end
-end