RubyGems - vectorsearch - Versions diffs - 0.1.0 → 0.1.1 - Mend

vectorsearch 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/Gemfile.lock +1 -1
data/README.md +10 -9
data/lib/vectorsearch/base.rb +73 -12
data/lib/vectorsearch/pinecone.rb +10 -1
data/lib/vectorsearch/qdrant.rb +10 -1
data/lib/vectorsearch/version.rb +1 -1
data/lib/vectorsearch/weaviate.rb +32 -22
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 4c9913804bc8aaadc08a60c0a250e2923f2a4ceb28633fdd99e4bf86544203b4
-  data.tar.gz: 8db3a77121d948f6ed709618da5bd4411a87a01aba26feb7942e74e0dd18f207
+  metadata.gz: ee0ed9a4527aeaefb5488bc4263f41fd4b793f410eadfcde52be4669035be78c
+  data.tar.gz: 9321cfe450003f8bd2a8e8a3ba48ac86d905c8d90ac5c56d37009dd4c27dd79e
 SHA512:
-  metadata.gz: d76d13ee23c7219483eac27a37ae61cad335bbd2d4169a76723d19a0e334ab6ac01037923cb2c07a64c853280d2449e54c0107fba7f36a231a624efaf1b68b46
-  data.tar.gz: 43612526795e54138ec0c891e5bbdef2ef712ac287a4776680d26d94e8cabcb9d5976774b5144cbeb3ae95b367e26460230931041f011f1a69671e014060684f
+  metadata.gz: 3a54fada2b58a0da4d0b34bd73595ebbe2076c2a5039cc690ad076f705610efb85bf62d1709dca8157a718ccf09ac35cea6fdd51096b4fc374d32d51705b43c8
+  data.tar.gz: 83b1a9844757253457bc2d6186b5f4ac1ba4217843eccca23d9773d180e2316f5442715d3e83342c16894bda3606e1d76d054b20be7e2c998fde7d1311dafae2

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    vectorsearch (0.1.0)
+    vectorsearch (0.1.1)
       cohere-ruby (~> 0.9.1)
       milvus (~> 0.9.0)
       pinecone (~> 0.1.6)

data/README.md CHANGED Viewed

@@ -1,6 +1,7 @@
 # Vectorsearch
+![Tests status](https://github.com/andreibondarev/vectorsearch/actions/workflows/ci.yml/badge.svg) [![Gem Version](https://badge.fury.io/rb/vectorsearch.svg)](https://badge.fury.io/rb/vectorsearch)
-Vectorsearch library is an abstraction layer on top of many popular vector search databases. It is a modern ORM that allows developers to easily chunk, generate embeddings, store, search, query and retrieve data from vector search databases. Vectorsearch offers a straight-forward DSL and abstract developers away from overly complex machine learning/data science-specific configurations.
+Vectorsearch library is an abstraction layer on top of many popular vector search databases. It is a modern ORM that allows developers to easily chunk data, generate embeddings, store, search, query and retrieve data from vector search databases. Vectorsearch offers a straight-forward DSL and abstracts away overly complicated machine learning/data science-specific configurations and concepts
 ## Installation
@@ -20,12 +21,12 @@ require "vectorsearch"
 List of currently supported vector search databases and features:
-| Database | Querying | Storage |
-| -------------------------------------- |
-| Weaviate | :white_check_mark: | WIP |
-| Qdrant   | :white_check_mark: | WIP |
-| Milvus   | :white_check_mark: | WIP |
-| Pinecone | :white_check_mark: | WIP |
+| Database | Querying           | Storage | Schema Management | Backups | Rails Integration | ??? |
+| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
+| Weaviate | :white_check_mark: | WIP     | WIP               | WIP     |                   |     |
+| Qdrant   | :white_check_mark: | WIP     | WIP               | WIP     |                   |     |
+| Milvus   | :white_check_mark: | WIP     | WIP               | WIP     |                   |     |
+| Pinecone | :white_check_mark: | WIP     | WIP               | WIP     |                   |     |
 ### Create an instance
@@ -48,8 +49,8 @@ client = Vectorsearch::Pinecone.new(...)
 ```ruby
 # Store your documents in your vector search database
-client.add_documents(
-    documents: []
+client.add_texts(
+    texts: []
 )
 ```

data/lib/vectorsearch/base.rb CHANGED Viewed

@@ -7,8 +7,12 @@ module Vectorsearch
   class Base
     attr_reader :client, :index_name, :llm, :llm_api_key
+    # Currently supported LLMs
+    # TODO: Add support for HuggingFace
     LLMS = %i[openai cohere].freeze
+    # @param llm [Symbol] The LLM to use
+    # @param llm_api_key [String] The API key for the LLM
     def initialize(llm:, llm_api_key:)
       validate_llm!(llm: llm)
@@ -16,27 +20,84 @@ module Vectorsearch
       @llm_api_key = llm_api_key
     end
+    # TODO
+    def add_texts(texts:)
+      raise NotImplementedError
+    end
+    # NotImplementedError will be raised if the subclass does not implement the `ask()` method
+    def ask(question:)
+      raise NotImplementedError
+    end
+    # Generate an embedding for a given text
+    # Currently supports OpenAI and Cohere
+    # The LLM-related method will most likely need to be abstracted out into a separate class
+    # @param text [String] The text to generate an embedding for
+    # @return [String] The embedding
     def generate_embedding(text:)
       case llm
       when :openai
-        response = OpenAI::Client.new(access_token: llm_api_key)
-          .embeddings(
-            parameters: {
-              model: "text-embedding-ada-002",
-              input: text
-            }
-          )
+        response = openai_client.embeddings(
+          parameters: {
+            model: "text-embedding-ada-002",
+            input: text
+          }
+        )
         response.dig("data").first.dig("embedding")
       when :cohere
-        response = Cohere::Client.new(api_key: llm_api_key)
-          .embed(
-            texts: [text],
-            model: "small"
-          )
+        response = cohere_client.embed(
+          texts: [text],
+          model: "small"
+        )
         response.dig("embeddings").first
       end
     end
+    # Generate a completion for a given prompt
+    # Currently supports OpenAI and Cohere
+    # The LLM-related method will most likely need to be abstracted out into a separate class
+    # @param prompt [String] The prompt to generate a completion for
+    # @return [String] The completion
+    def generate_completion(prompt:)
+      case llm
+      when :openai
+        response = openai_client.completions(
+          parameters: {
+            model: "text-davinci-003",
+            temperature: 0.0,
+            prompt: prompt
+          }
+        )
+        response.dig("choices").first.dig("text")
+      when :cohere
+        response = cohere_client.generate(
+          prompt: prompt,
+          temperature: 0.0
+        )
+        response.dig("generations").first.dig("text")
+      end
+    end
+    def generate_prompt(question:, context:)
+      "Context:\n" +
+      "#{context}\n" +
+      "---\n" +
+      "Question: #{question}\n" +
+      "---\n" +
+      "Answer:"
+    end
+    private
+    def openai_client
+      @openai_client ||= OpenAI::Client.new(access_token: llm_api_key)
+    end
+    def cohere_client
+      @cohere_client ||= Cohere::Client.new(api_key: llm_api_key)
+    end
     def validate_llm!(llm:)
       raise ArgumentError, "LLM must be one of #{LLMS}" unless LLMS.include?(llm)
     end

data/lib/vectorsearch/pinecone.rb CHANGED Viewed

@@ -49,7 +49,16 @@ module Vectorsearch
     end
     def ask(question:)
-      raise NotImplementedError
+      search_results = similarity_search(query: question)
+      context = search_results.dig("matches").map do |result|
+        result.dig("metadata").to_s
+      end
+      context = context.join("\n---\n")
+      prompt = generate_prompt(question: question, context: context)
+      generate_completion(prompt: prompt)
     end
   end
 end

data/lib/vectorsearch/qdrant.rb CHANGED Viewed

@@ -45,7 +45,16 @@ module Vectorsearch
     end
     def ask(question:)
-      raise NotImplementedError
+      search_results = similarity_search(query: question)
+      context = search_results.dig("result").map do |result|
+        result.dig("payload").to_s
+      end
+      context = context.join("\n---\n")
+      prompt = generate_prompt(question: question, context: context)
+      generate_completion(prompt: prompt)
     end
   end
 end

data/lib/vectorsearch/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Vectorsearch
-  VERSION = "0.1.0"
+  VERSION = "0.1.1"
 end

data/lib/vectorsearch/weaviate.rb CHANGED Viewed

@@ -22,26 +22,15 @@ module Vectorsearch
       super(llm: llm, llm_api_key: llm_api_key)
     end
-    def add_texts(
-      texts:
-    )
-      texts.each do |text|
-        text['class'] = index_name
-      end
-      client.batch_create(
-        objects: texts
-      )
-    end
     # Return documents similar to the query
+    # @param query [String] The query to search for
+    # @param k [Integer|String] The number of results to return
+    # @return [Hash] The search results
     def similarity_search(
       query:,
       k: 4
     )
-      near_text = "{
-        concepts: [\"#{query}\"],
-      }"
+      near_text = "{ concepts: [\"#{query}\"] }"
       client.query.get(
         class_name: index_name,
@@ -51,6 +40,10 @@ module Vectorsearch
       )
     end
+    # Return documents similar to the vector
+    # @param embedding [Array] The vector to search for
+    # @param k [Integer|String] The number of results to return
+    # @return [Hash] The search results
     def similarity_search_by_vector(
       embedding:,
       k: 4
@@ -65,17 +58,34 @@ module Vectorsearch
       )
     end
+    # Ask a question and return the answer
+    # @param question [String] The question to ask
+    # @return [Hash] The answer
     def ask(
       question:
     )
-      ask_object = "{ question: \"#{question}\" }"
+      # Weaviate currently supports the `ask:` parameter only for the OpenAI LLM (with `qna-openai` module enabled).
+      if llm == :openai
+        ask_object = "{ question: \"#{question}\" }"
-      client.query.get(
-        class_name: index_name,
-        ask: ask_object,
-        limit: "1",
-        fields: "_additional { answer { result } }"
-      )
+        client.query.get(
+          class_name: index_name,
+          ask: ask_object,
+          limit: "1",
+          fields: "_additional { answer { result } }"
+        )
+      elsif llm == :cohere
+        search_results = similarity_search(query: question)
+        context = search_results.map do |result|
+          result.dig("content").to_s
+        end
+        context = context.join("\n---\n")
+        prompt = generate_prompt(question: question, context: context)
+        generate_completion(prompt: prompt)
+      end
     end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: vectorsearch
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - Andrei Bondarev