vectorsearch 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ee0ed9a4527aeaefb5488bc4263f41fd4b793f410eadfcde52be4669035be78c
4
- data.tar.gz: 9321cfe450003f8bd2a8e8a3ba48ac86d905c8d90ac5c56d37009dd4c27dd79e
3
+ metadata.gz: 160b98d1553c63fae2e50c07dae83e80376eade573874e0fcb28a0d1f7f476ea
4
+ data.tar.gz: 21fbcbb750cd878ceedec2646ef8db116a839b422f751ce3959aa6da8da78ff1
5
5
  SHA512:
6
- metadata.gz: 3a54fada2b58a0da4d0b34bd73595ebbe2076c2a5039cc690ad076f705610efb85bf62d1709dca8157a718ccf09ac35cea6fdd51096b4fc374d32d51705b43c8
7
- data.tar.gz: 83b1a9844757253457bc2d6186b5f4ac1ba4217843eccca23d9773d180e2316f5442715d3e83342c16894bda3606e1d76d054b20be7e2c998fde7d1311dafae2
6
+ metadata.gz: 40991aab084c3eb16d8029b598f70227946edc72411c74de0906289d7e23b16a83fbbd7aa9325c44b6ebc42ee0cac3146d55cf9bf5b1eeb23e862f0f66da66c2
7
+ data.tar.gz: 1c23318876143377b826c8e619978b83e61da68b10dbed34c203ae0f35a10efeb6fa48843fada3782b3fd7c1785802b0cda1332281ebcc39230bb7ed10c0905d
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- vectorsearch (0.1.1)
4
+ vectorsearch (0.1.2)
5
5
  cohere-ruby (~> 0.9.1)
6
6
  milvus (~> 0.9.0)
7
7
  pinecone (~> 0.1.6)
data/README.md CHANGED
@@ -41,16 +41,24 @@ client = Vectorsearch::Weaviate.new(
41
41
  llm_api_key: ENV["OPENAI_API_KEY"]
42
42
  )
43
43
 
44
- # You instantiate any other supported vector search database:
44
+ # You can instantiate any other supported vector search database:
45
45
  client = Vectorsearch::Milvus.new(...)
46
46
  client = Vectorsearch::Qdrant.new(...)
47
47
  client = Vectorsearch::Pinecone.new(...)
48
48
  ```
49
49
 
50
+ ```ruby
51
+ # Creating the default schema
52
+ client.create_default_schema
53
+ ```
54
+
50
55
  ```ruby
51
56
  # Store your documents in your vector search database
52
57
  client.add_texts(
53
- texts: []
58
+ texts: [
59
+ "Begin by preheating your oven to 375°F (190°C). Prepare four boneless, skinless chicken breasts by cutting a pocket into the side of each breast, being careful not to cut all the way through. Season the chicken with salt and pepper to taste. In a large skillet, melt 2 tablespoons of unsalted butter over medium heat. Add 1 small diced onion and 2 minced garlic cloves, and cook until softened, about 3-4 minutes. Add 8 ounces of fresh spinach and cook until wilted, about 3 minutes. Remove the skillet from heat and let the mixture cool slightly.",
60
+ "In a bowl, combine the spinach mixture with 4 ounces of softened cream cheese, 1/4 cup of grated Parmesan cheese, 1/4 cup of shredded mozzarella cheese, and 1/4 teaspoon of red pepper flakes. Mix until well combined. Stuff each chicken breast pocket with an equal amount of the spinach mixture. Seal the pocket with a toothpick if necessary. In the same skillet, heat 1 tablespoon of olive oil over medium-high heat. Add the stuffed chicken breasts and sear on each side for 3-4 minutes, or until golden brown."
61
+ ]
54
62
  )
55
63
  ```
56
64
 
@@ -7,6 +7,10 @@ module Vectorsearch
7
7
  class Base
8
8
  attr_reader :client, :index_name, :llm, :llm_api_key
9
9
 
10
+ DEFAULT_METRIC = "cosine".freeze
11
+ DEFAULT_COHERE_DIMENSION = 1024
12
+ DEFAULT_OPENAI_DIMENSION = 1536
13
+
10
14
  # Currently supported LLMs
11
15
  # TODO: Add support for HuggingFace
12
16
  LLMS = %i[openai cohere].freeze
@@ -20,6 +24,10 @@ module Vectorsearch
20
24
  @llm_api_key = llm_api_key
21
25
  end
22
26
 
27
+ def create_default_schema
28
+ raise NotImplementedError
29
+ end
30
+
23
31
  # TODO
24
32
  def add_texts(texts:)
25
33
  raise NotImplementedError
@@ -90,6 +98,14 @@ module Vectorsearch
90
98
 
91
99
  private
92
100
 
101
+ def default_dimension
102
+ if llm == :openai
103
+ DEFAULT_OPENAI_DIMENSION
104
+ elsif llm == :cohere
105
+ DEFAULT_COHERE_DIMENSION
106
+ end
107
+ end
108
+
93
109
  def openai_client
94
110
  @openai_client ||= OpenAI::Client.new(access_token: llm_api_key)
95
111
  end
@@ -19,6 +19,64 @@ module Vectorsearch
19
19
  super(llm: llm, llm_api_key: llm_api_key)
20
20
  end
21
21
 
22
+ def add_texts(
23
+ texts:
24
+ )
25
+ client.entities.insert(
26
+ collection_name: index_name,
27
+ num_rows: texts.count,
28
+ fields_data: [
29
+ {
30
+ field_name: "content",
31
+ type: ::Milvus::DATA_TYPES["varchar"],
32
+ field: texts
33
+ }, {
34
+ field_name: "vectors",
35
+ type: ::Milvus::DATA_TYPES["binary_vector"],
36
+ field: texts.map { |text| generate_embedding(text: text) }
37
+ }
38
+ ]
39
+ )
40
+ end
41
+
42
+ # Create default schema
43
+ # @return [Hash] The response from the server
44
+ def create_default_schema
45
+ client.collections.create(
46
+ auto_id: true,
47
+ collection_name: index_name,
48
+ description: "Default schema created by Vectorsearch",
49
+ fields: [
50
+ {
51
+ name: "id",
52
+ is_primary_key: true,
53
+ autoID: true,
54
+ data_type: ::Milvus::DATA_TYPES["int64"]
55
+ }, {
56
+ name: "content",
57
+ is_primary_key: false,
58
+ data_type: ::Milvus::DATA_TYPES["varchar"],
59
+ type_params: [
60
+ {
61
+ key: "max_length",
62
+ value: "32768" # Largest allowed value
63
+ }
64
+ ]
65
+ }, {
66
+ name: "vectors",
67
+ data_type: ::Milvus::DATA_TYPES["binary_vector"],
68
+ is_primary_key: false,
69
+ type_params: [
70
+ {
71
+ key: "dim",
72
+ value: default_dimension.to_s
73
+ }
74
+ ]
75
+ }
76
+ ]
77
+ )
78
+ end
79
+
22
80
  def similarity_search(
23
81
  query:,
24
82
  k: 4
@@ -41,7 +99,7 @@ module Vectorsearch
41
99
  vectors: [ embedding ],
42
100
  dsl_type: 1,
43
101
  params: "{\"nprobe\": 10}",
44
- anns_field: "book_intro", # Should it get all abstracted away to "content" field?
102
+ anns_field: "content",
45
103
  metric_type: "L2"
46
104
  )
47
105
  end
@@ -22,6 +22,36 @@ module Vectorsearch
22
22
  super(llm: llm, llm_api_key: llm_api_key)
23
23
  end
24
24
 
25
+ # Add a list of texts to the index
26
+ # @param texts [Array] The list of texts to add
27
+ # @return [Hash] The response from the server
28
+ def add_texts(
29
+ texts:
30
+ )
31
+ vectors = texts.map do |text|
32
+ {
33
+ # TODO: Allows passing in your own IDs
34
+ id: SecureRandom.uuid,
35
+ metadata: { content: text },
36
+ values: generate_embedding(text: text)
37
+ }
38
+ end
39
+
40
+ index = client.index(index_name)
41
+
42
+ index.upsert(vectors: vectors)
43
+ end
44
+
45
+ # Create the index with the default schema
46
+ # @return [Hash] The response from the server
47
+ def create_default_schema
48
+ client.create_index(
49
+ metric: DEFAULT_METRIC,
50
+ name: index_name,
51
+ dimension: default_dimension
52
+ )
53
+ end
54
+
25
55
  def similarity_search(
26
56
  query:,
27
57
  k: 4
@@ -40,12 +70,13 @@ module Vectorsearch
40
70
  )
41
71
  index = client.index(index_name)
42
72
 
43
- index.query(
73
+ response = index.query(
44
74
  vector: embedding,
45
75
  top_k: k,
46
76
  include_values: true,
47
77
  include_metadata: true
48
78
  )
79
+ response.dig("matches")
49
80
  end
50
81
 
51
82
  def ask(question:)
@@ -20,6 +20,38 @@ module Vectorsearch
20
20
  super(llm: llm, llm_api_key: llm_api_key)
21
21
  end
22
22
 
23
+ # Add a list of texts to the index
24
+ # @param texts [Array] The list of texts to add
25
+ # @return [Hash] The response from the server
26
+ def add_texts(
27
+ texts:
28
+ )
29
+ batch = { ids: [], vectors: [], payloads: [] }
30
+
31
+ texts.each do |text|
32
+ batch[:ids].push(SecureRandom.uuid)
33
+ batch[:vectors].push(generate_embedding(text: text))
34
+ batch[:payloads].push({ content: text })
35
+ end
36
+
37
+ client.points.upsert(
38
+ collection_name: index_name,
39
+ batch: batch
40
+ )
41
+ end
42
+
43
+ # Create the index with the default schema
44
+ # @return [Hash] The response from the server
45
+ def create_default_schema
46
+ client.collections.create(
47
+ collection_name: index_name,
48
+ vectors: {
49
+ distance: DEFAULT_METRIC.capitalize,
50
+ size: default_dimension
51
+ }
52
+ )
53
+ end
54
+
23
55
  def similarity_search(
24
56
  query:,
25
57
  k: 4
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vectorsearch
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.2"
5
5
  end
@@ -22,6 +22,37 @@ module Vectorsearch
22
22
  super(llm: llm, llm_api_key: llm_api_key)
23
23
  end
24
24
 
25
+ def add_texts(
26
+ texts:
27
+ )
28
+ objects = []
29
+ texts.each do |text|
30
+ objects.push({
31
+ class_name: index_name,
32
+ properties: {
33
+ content: text
34
+ }
35
+ })
36
+ end
37
+
38
+ client.objects.batch_create(
39
+ objects: objects
40
+ )
41
+ end
42
+
43
+ def create_default_schema
44
+ client.schema.create(
45
+ class_name: index_name,
46
+ vectorizer: "text2vec-#{llm.to_s}",
47
+ properties: [
48
+ {
49
+ dataType: ["text"],
50
+ name: "content"
51
+ }
52
+ ]
53
+ )
54
+ end
55
+
25
56
  # Return documents similar to the query
26
57
  # @param query [String] The query to search for
27
58
  # @param k [Integer|String] The number of results to return
@@ -36,7 +67,7 @@ module Vectorsearch
36
67
  class_name: index_name,
37
68
  near_text: near_text,
38
69
  limit: k.to_s,
39
- fields: "content recipe_id"
70
+ fields: "content _additional { id }"
40
71
  )
41
72
  end
42
73
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vectorsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-04-30 00:00:00.000000000 Z
11
+ date: 2023-05-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pry-byebug