RubyGems - vectorsearch - Versions diffs - 0.1.1 → 0.1.2 - Mend

vectorsearch 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/Gemfile.lock +1 -1
data/README.md +10 -2
data/lib/vectorsearch/base.rb +16 -0
data/lib/vectorsearch/milvus.rb +59 -1
data/lib/vectorsearch/pinecone.rb +32 -1
data/lib/vectorsearch/qdrant.rb +32 -0
data/lib/vectorsearch/version.rb +1 -1
data/lib/vectorsearch/weaviate.rb +32 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ee0ed9a4527aeaefb5488bc4263f41fd4b793f410eadfcde52be4669035be78c
-  data.tar.gz: 9321cfe450003f8bd2a8e8a3ba48ac86d905c8d90ac5c56d37009dd4c27dd79e
+  metadata.gz: 160b98d1553c63fae2e50c07dae83e80376eade573874e0fcb28a0d1f7f476ea
+  data.tar.gz: 21fbcbb750cd878ceedec2646ef8db116a839b422f751ce3959aa6da8da78ff1
 SHA512:
-  metadata.gz: 3a54fada2b58a0da4d0b34bd73595ebbe2076c2a5039cc690ad076f705610efb85bf62d1709dca8157a718ccf09ac35cea6fdd51096b4fc374d32d51705b43c8
-  data.tar.gz: 83b1a9844757253457bc2d6186b5f4ac1ba4217843eccca23d9773d180e2316f5442715d3e83342c16894bda3606e1d76d054b20be7e2c998fde7d1311dafae2
+  metadata.gz: 40991aab084c3eb16d8029b598f70227946edc72411c74de0906289d7e23b16a83fbbd7aa9325c44b6ebc42ee0cac3146d55cf9bf5b1eeb23e862f0f66da66c2
+  data.tar.gz: 1c23318876143377b826c8e619978b83e61da68b10dbed34c203ae0f35a10efeb6fa48843fada3782b3fd7c1785802b0cda1332281ebcc39230bb7ed10c0905d

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    vectorsearch (0.1.1)
+    vectorsearch (0.1.2)
       cohere-ruby (~> 0.9.1)
       milvus (~> 0.9.0)
       pinecone (~> 0.1.6)

data/README.md CHANGED Viewed

@@ -41,16 +41,24 @@ client = Vectorsearch::Weaviate.new(
     llm_api_key: ENV["OPENAI_API_KEY"]
 )
-# You instantiate any other supported vector search database:
+# You can instantiate any other supported vector search database:
 client = Vectorsearch::Milvus.new(...)
 client = Vectorsearch::Qdrant.new(...)
 client = Vectorsearch::Pinecone.new(...)
 ```
+```ruby
+# Creating the default schema
+client.create_default_schema
+```
 ```ruby
 # Store your documents in your vector search database
 client.add_texts(
-    texts: []
+    texts: [
+        "Begin by preheating your oven to 375°F (190°C). Prepare four boneless, skinless chicken breasts by cutting a pocket into the side of each breast, being careful not to cut all the way through. Season the chicken with salt and pepper to taste. In a large skillet, melt 2 tablespoons of unsalted butter over medium heat. Add 1 small diced onion and 2 minced garlic cloves, and cook until softened, about 3-4 minutes. Add 8 ounces of fresh spinach and cook until wilted, about 3 minutes. Remove the skillet from heat and let the mixture cool slightly.",
+        "In a bowl, combine the spinach mixture with 4 ounces of softened cream cheese, 1/4 cup of grated Parmesan cheese, 1/4 cup of shredded mozzarella cheese, and 1/4 teaspoon of red pepper flakes. Mix until well combined. Stuff each chicken breast pocket with an equal amount of the spinach mixture. Seal the pocket with a toothpick if necessary. In the same skillet, heat 1 tablespoon of olive oil over medium-high heat. Add the stuffed chicken breasts and sear on each side for 3-4 minutes, or until golden brown."
+    ]
 )
 ```

data/lib/vectorsearch/base.rb CHANGED Viewed

@@ -7,6 +7,10 @@ module Vectorsearch
   class Base
     attr_reader :client, :index_name, :llm, :llm_api_key
+    DEFAULT_METRIC = "cosine".freeze
+    DEFAULT_COHERE_DIMENSION = 1024
+    DEFAULT_OPENAI_DIMENSION = 1536
     # Currently supported LLMs
     # TODO: Add support for HuggingFace
     LLMS = %i[openai cohere].freeze
@@ -20,6 +24,10 @@ module Vectorsearch
       @llm_api_key = llm_api_key
     end
+    def create_default_schema
+      raise NotImplementedError
+    end
     # TODO
     def add_texts(texts:)
       raise NotImplementedError
@@ -90,6 +98,14 @@ module Vectorsearch
     private
+    def default_dimension
+      if llm == :openai
+        DEFAULT_OPENAI_DIMENSION
+      elsif llm == :cohere
+        DEFAULT_COHERE_DIMENSION
+      end
+    end
     def openai_client
       @openai_client ||= OpenAI::Client.new(access_token: llm_api_key)
     end

data/lib/vectorsearch/milvus.rb CHANGED Viewed

@@ -19,6 +19,64 @@ module Vectorsearch
       super(llm: llm, llm_api_key: llm_api_key)
     end
+    def add_texts(
+      texts:
+    )
+      client.entities.insert(
+        collection_name: index_name,
+        num_rows: texts.count,
+        fields_data: [
+          {
+            field_name: "content",
+            type: ::Milvus::DATA_TYPES["varchar"],
+            field: texts
+          }, {
+            field_name: "vectors",
+            type: ::Milvus::DATA_TYPES["binary_vector"],
+            field: texts.map { |text| generate_embedding(text: text) }
+          }
+        ]
+      )
+    end
+    # Create default schema
+    # @return [Hash] The response from the server
+    def create_default_schema
+      client.collections.create(
+        auto_id: true,
+        collection_name: index_name,
+        description: "Default schema created by Vectorsearch",
+        fields: [
+          {
+            name: "id",
+            is_primary_key: true,
+            autoID: true,
+            data_type: ::Milvus::DATA_TYPES["int64"]
+          }, {
+            name: "content",
+            is_primary_key: false,
+            data_type: ::Milvus::DATA_TYPES["varchar"],
+            type_params: [
+              {
+                key: "max_length",
+                value: "32768" # Largest allowed value
+              }
+            ]
+          }, {
+            name: "vectors",
+            data_type: ::Milvus::DATA_TYPES["binary_vector"],
+            is_primary_key: false,
+            type_params: [
+              {
+                key: "dim",
+                value: default_dimension.to_s
+              }
+            ]
+          }
+        ]
+      )
+    end
     def similarity_search(
       query:,
       k: 4
@@ -41,7 +99,7 @@ module Vectorsearch
         vectors: [ embedding ],
         dsl_type: 1,
         params: "{\"nprobe\": 10}",
-        anns_field: "book_intro", # Should it get all abstracted away to "content" field?
+        anns_field: "content",
         metric_type: "L2"
       )
     end

data/lib/vectorsearch/pinecone.rb CHANGED Viewed

@@ -22,6 +22,36 @@ module Vectorsearch
       super(llm: llm, llm_api_key: llm_api_key)
     end
+    # Add a list of texts to the index
+    # @param texts [Array] The list of texts to add
+    # @return [Hash] The response from the server
+    def add_texts(
+      texts:
+    )
+      vectors = texts.map do |text|
+        {
+          # TODO: Allows passing in your own IDs
+          id: SecureRandom.uuid,
+          metadata: { content: text },
+          values: generate_embedding(text: text)
+        }
+      end
+      index = client.index(index_name)
+      index.upsert(vectors: vectors)
+    end
+    # Create the index with the default schema
+    # @return [Hash] The response from the server
+    def create_default_schema
+      client.create_index(
+        metric: DEFAULT_METRIC,
+        name: index_name,
+        dimension: default_dimension
+      )
+    end
     def similarity_search(
       query:,
       k: 4
@@ -40,12 +70,13 @@ module Vectorsearch
     )
       index = client.index(index_name)
-      index.query(
+      response = index.query(
         vector: embedding,
         top_k: k,
         include_values: true,
         include_metadata: true
       )
+      response.dig("matches")
     end
     def ask(question:)

data/lib/vectorsearch/qdrant.rb CHANGED Viewed

@@ -20,6 +20,38 @@ module Vectorsearch
       super(llm: llm, llm_api_key: llm_api_key)
     end
+    # Add a list of texts to the index
+    # @param texts [Array] The list of texts to add
+    # @return [Hash] The response from the server
+    def add_texts(
+      texts:
+    )
+      batch = { ids: [], vectors: [], payloads: [] }
+      texts.each do |text|
+        batch[:ids].push(SecureRandom.uuid)
+        batch[:vectors].push(generate_embedding(text: text))
+        batch[:payloads].push({ content: text })
+      end
+      client.points.upsert(
+        collection_name: index_name,
+        batch: batch
+      )
+    end
+    # Create the index with the default schema
+    # @return [Hash] The response from the server
+    def create_default_schema
+      client.collections.create(
+        collection_name: index_name,
+        vectors: {
+          distance: DEFAULT_METRIC.capitalize,
+          size: default_dimension
+        }
+      )
+    end
     def similarity_search(
       query:,
       k: 4

data/lib/vectorsearch/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Vectorsearch
-  VERSION = "0.1.1"
+  VERSION = "0.1.2"
 end

data/lib/vectorsearch/weaviate.rb CHANGED Viewed

@@ -22,6 +22,37 @@ module Vectorsearch
       super(llm: llm, llm_api_key: llm_api_key)
     end
+    def add_texts(
+      texts:
+    )
+      objects = []
+      texts.each do |text|
+        objects.push({
+          class_name: index_name,
+          properties: {
+            content: text
+          }
+        })
+      end
+      client.objects.batch_create(
+        objects: objects
+      )
+    end
+    def create_default_schema
+      client.schema.create(
+        class_name: index_name,
+        vectorizer: "text2vec-#{llm.to_s}",
+        properties: [
+          {
+            dataType: ["text"],
+            name: "content"
+          }
+        ]
+      )
+    end
     # Return documents similar to the query
     # @param query [String] The query to search for
     # @param k [Integer|String] The number of results to return
@@ -36,7 +67,7 @@ module Vectorsearch
         class_name: index_name,
         near_text: near_text,
         limit: k.to_s,
-        fields: "content recipe_id"
+        fields: "content _additional { id }"
       )
     end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: vectorsearch
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-04-30 00:00:00.000000000 Z
+date: 2023-05-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: pry-byebug