vectorsearch 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ee0ed9a4527aeaefb5488bc4263f41fd4b793f410eadfcde52be4669035be78c
4
- data.tar.gz: 9321cfe450003f8bd2a8e8a3ba48ac86d905c8d90ac5c56d37009dd4c27dd79e
3
+ metadata.gz: 160b98d1553c63fae2e50c07dae83e80376eade573874e0fcb28a0d1f7f476ea
4
+ data.tar.gz: 21fbcbb750cd878ceedec2646ef8db116a839b422f751ce3959aa6da8da78ff1
5
5
  SHA512:
6
- metadata.gz: 3a54fada2b58a0da4d0b34bd73595ebbe2076c2a5039cc690ad076f705610efb85bf62d1709dca8157a718ccf09ac35cea6fdd51096b4fc374d32d51705b43c8
7
- data.tar.gz: 83b1a9844757253457bc2d6186b5f4ac1ba4217843eccca23d9773d180e2316f5442715d3e83342c16894bda3606e1d76d054b20be7e2c998fde7d1311dafae2
6
+ metadata.gz: 40991aab084c3eb16d8029b598f70227946edc72411c74de0906289d7e23b16a83fbbd7aa9325c44b6ebc42ee0cac3146d55cf9bf5b1eeb23e862f0f66da66c2
7
+ data.tar.gz: 1c23318876143377b826c8e619978b83e61da68b10dbed34c203ae0f35a10efeb6fa48843fada3782b3fd7c1785802b0cda1332281ebcc39230bb7ed10c0905d
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- vectorsearch (0.1.1)
4
+ vectorsearch (0.1.2)
5
5
  cohere-ruby (~> 0.9.1)
6
6
  milvus (~> 0.9.0)
7
7
  pinecone (~> 0.1.6)
data/README.md CHANGED
@@ -41,16 +41,24 @@ client = Vectorsearch::Weaviate.new(
41
41
  llm_api_key: ENV["OPENAI_API_KEY"]
42
42
  )
43
43
 
44
- # You instantiate any other supported vector search database:
44
+ # You can instantiate any other supported vector search database:
45
45
  client = Vectorsearch::Milvus.new(...)
46
46
  client = Vectorsearch::Qdrant.new(...)
47
47
  client = Vectorsearch::Pinecone.new(...)
48
48
  ```
49
49
 
50
+ ```ruby
51
+ # Creating the default schema
52
+ client.create_default_schema
53
+ ```
54
+
50
55
  ```ruby
51
56
  # Store your documents in your vector search database
52
57
  client.add_texts(
53
- texts: []
58
+ texts: [
59
+ "Begin by preheating your oven to 375°F (190°C). Prepare four boneless, skinless chicken breasts by cutting a pocket into the side of each breast, being careful not to cut all the way through. Season the chicken with salt and pepper to taste. In a large skillet, melt 2 tablespoons of unsalted butter over medium heat. Add 1 small diced onion and 2 minced garlic cloves, and cook until softened, about 3-4 minutes. Add 8 ounces of fresh spinach and cook until wilted, about 3 minutes. Remove the skillet from heat and let the mixture cool slightly.",
60
+ "In a bowl, combine the spinach mixture with 4 ounces of softened cream cheese, 1/4 cup of grated Parmesan cheese, 1/4 cup of shredded mozzarella cheese, and 1/4 teaspoon of red pepper flakes. Mix until well combined. Stuff each chicken breast pocket with an equal amount of the spinach mixture. Seal the pocket with a toothpick if necessary. In the same skillet, heat 1 tablespoon of olive oil over medium-high heat. Add the stuffed chicken breasts and sear on each side for 3-4 minutes, or until golden brown."
61
+ ]
54
62
  )
55
63
  ```
56
64
 
@@ -7,6 +7,10 @@ module Vectorsearch
7
7
  class Base
8
8
  attr_reader :client, :index_name, :llm, :llm_api_key
9
9
 
10
+ DEFAULT_METRIC = "cosine".freeze
11
+ DEFAULT_COHERE_DIMENSION = 1024
12
+ DEFAULT_OPENAI_DIMENSION = 1536
13
+
10
14
  # Currently supported LLMs
11
15
  # TODO: Add support for HuggingFace
12
16
  LLMS = %i[openai cohere].freeze
@@ -20,6 +24,10 @@ module Vectorsearch
20
24
  @llm_api_key = llm_api_key
21
25
  end
22
26
 
27
+ def create_default_schema
28
+ raise NotImplementedError
29
+ end
30
+
23
31
  # TODO
24
32
  def add_texts(texts:)
25
33
  raise NotImplementedError
@@ -90,6 +98,14 @@ module Vectorsearch
90
98
 
91
99
  private
92
100
 
101
+ def default_dimension
102
+ if llm == :openai
103
+ DEFAULT_OPENAI_DIMENSION
104
+ elsif llm == :cohere
105
+ DEFAULT_COHERE_DIMENSION
106
+ end
107
+ end
108
+
93
109
  def openai_client
94
110
  @openai_client ||= OpenAI::Client.new(access_token: llm_api_key)
95
111
  end
@@ -19,6 +19,64 @@ module Vectorsearch
19
19
  super(llm: llm, llm_api_key: llm_api_key)
20
20
  end
21
21
 
22
+ def add_texts(
23
+ texts:
24
+ )
25
+ client.entities.insert(
26
+ collection_name: index_name,
27
+ num_rows: texts.count,
28
+ fields_data: [
29
+ {
30
+ field_name: "content",
31
+ type: ::Milvus::DATA_TYPES["varchar"],
32
+ field: texts
33
+ }, {
34
+ field_name: "vectors",
35
+ type: ::Milvus::DATA_TYPES["binary_vector"],
36
+ field: texts.map { |text| generate_embedding(text: text) }
37
+ }
38
+ ]
39
+ )
40
+ end
41
+
42
+ # Create default schema
43
+ # @return [Hash] The response from the server
44
+ def create_default_schema
45
+ client.collections.create(
46
+ auto_id: true,
47
+ collection_name: index_name,
48
+ description: "Default schema created by Vectorsearch",
49
+ fields: [
50
+ {
51
+ name: "id",
52
+ is_primary_key: true,
53
+ autoID: true,
54
+ data_type: ::Milvus::DATA_TYPES["int64"]
55
+ }, {
56
+ name: "content",
57
+ is_primary_key: false,
58
+ data_type: ::Milvus::DATA_TYPES["varchar"],
59
+ type_params: [
60
+ {
61
+ key: "max_length",
62
+ value: "32768" # Largest allowed value
63
+ }
64
+ ]
65
+ }, {
66
+ name: "vectors",
67
+ data_type: ::Milvus::DATA_TYPES["binary_vector"],
68
+ is_primary_key: false,
69
+ type_params: [
70
+ {
71
+ key: "dim",
72
+ value: default_dimension.to_s
73
+ }
74
+ ]
75
+ }
76
+ ]
77
+ )
78
+ end
79
+
22
80
  def similarity_search(
23
81
  query:,
24
82
  k: 4
@@ -41,7 +99,7 @@ module Vectorsearch
41
99
  vectors: [ embedding ],
42
100
  dsl_type: 1,
43
101
  params: "{\"nprobe\": 10}",
44
- anns_field: "book_intro", # Should it get all abstracted away to "content" field?
102
+ anns_field: "content",
45
103
  metric_type: "L2"
46
104
  )
47
105
  end
@@ -22,6 +22,36 @@ module Vectorsearch
22
22
  super(llm: llm, llm_api_key: llm_api_key)
23
23
  end
24
24
 
25
+ # Add a list of texts to the index
26
+ # @param texts [Array] The list of texts to add
27
+ # @return [Hash] The response from the server
28
+ def add_texts(
29
+ texts:
30
+ )
31
+ vectors = texts.map do |text|
32
+ {
33
+ # TODO: Allows passing in your own IDs
34
+ id: SecureRandom.uuid,
35
+ metadata: { content: text },
36
+ values: generate_embedding(text: text)
37
+ }
38
+ end
39
+
40
+ index = client.index(index_name)
41
+
42
+ index.upsert(vectors: vectors)
43
+ end
44
+
45
+ # Create the index with the default schema
46
+ # @return [Hash] The response from the server
47
+ def create_default_schema
48
+ client.create_index(
49
+ metric: DEFAULT_METRIC,
50
+ name: index_name,
51
+ dimension: default_dimension
52
+ )
53
+ end
54
+
25
55
  def similarity_search(
26
56
  query:,
27
57
  k: 4
@@ -40,12 +70,13 @@ module Vectorsearch
40
70
  )
41
71
  index = client.index(index_name)
42
72
 
43
- index.query(
73
+ response = index.query(
44
74
  vector: embedding,
45
75
  top_k: k,
46
76
  include_values: true,
47
77
  include_metadata: true
48
78
  )
79
+ response.dig("matches")
49
80
  end
50
81
 
51
82
  def ask(question:)
@@ -20,6 +20,38 @@ module Vectorsearch
20
20
  super(llm: llm, llm_api_key: llm_api_key)
21
21
  end
22
22
 
23
+ # Add a list of texts to the index
24
+ # @param texts [Array] The list of texts to add
25
+ # @return [Hash] The response from the server
26
+ def add_texts(
27
+ texts:
28
+ )
29
+ batch = { ids: [], vectors: [], payloads: [] }
30
+
31
+ texts.each do |text|
32
+ batch[:ids].push(SecureRandom.uuid)
33
+ batch[:vectors].push(generate_embedding(text: text))
34
+ batch[:payloads].push({ content: text })
35
+ end
36
+
37
+ client.points.upsert(
38
+ collection_name: index_name,
39
+ batch: batch
40
+ )
41
+ end
42
+
43
+ # Create the index with the default schema
44
+ # @return [Hash] The response from the server
45
+ def create_default_schema
46
+ client.collections.create(
47
+ collection_name: index_name,
48
+ vectors: {
49
+ distance: DEFAULT_METRIC.capitalize,
50
+ size: default_dimension
51
+ }
52
+ )
53
+ end
54
+
23
55
  def similarity_search(
24
56
  query:,
25
57
  k: 4
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vectorsearch
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.2"
5
5
  end
@@ -22,6 +22,37 @@ module Vectorsearch
22
22
  super(llm: llm, llm_api_key: llm_api_key)
23
23
  end
24
24
 
25
+ def add_texts(
26
+ texts:
27
+ )
28
+ objects = []
29
+ texts.each do |text|
30
+ objects.push({
31
+ class_name: index_name,
32
+ properties: {
33
+ content: text
34
+ }
35
+ })
36
+ end
37
+
38
+ client.objects.batch_create(
39
+ objects: objects
40
+ )
41
+ end
42
+
43
+ def create_default_schema
44
+ client.schema.create(
45
+ class_name: index_name,
46
+ vectorizer: "text2vec-#{llm.to_s}",
47
+ properties: [
48
+ {
49
+ dataType: ["text"],
50
+ name: "content"
51
+ }
52
+ ]
53
+ )
54
+ end
55
+
25
56
  # Return documents similar to the query
26
57
  # @param query [String] The query to search for
27
58
  # @param k [Integer|String] The number of results to return
@@ -36,7 +67,7 @@ module Vectorsearch
36
67
  class_name: index_name,
37
68
  near_text: near_text,
38
69
  limit: k.to_s,
39
- fields: "content recipe_id"
70
+ fields: "content _additional { id }"
40
71
  )
41
72
  end
42
73
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vectorsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-04-30 00:00:00.000000000 Z
11
+ date: 2023-05-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pry-byebug