vectorsearch 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +10 -2
- data/lib/vectorsearch/base.rb +16 -0
- data/lib/vectorsearch/milvus.rb +59 -1
- data/lib/vectorsearch/pinecone.rb +32 -1
- data/lib/vectorsearch/qdrant.rb +32 -0
- data/lib/vectorsearch/version.rb +1 -1
- data/lib/vectorsearch/weaviate.rb +32 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 160b98d1553c63fae2e50c07dae83e80376eade573874e0fcb28a0d1f7f476ea
|
4
|
+
data.tar.gz: 21fbcbb750cd878ceedec2646ef8db116a839b422f751ce3959aa6da8da78ff1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40991aab084c3eb16d8029b598f70227946edc72411c74de0906289d7e23b16a83fbbd7aa9325c44b6ebc42ee0cac3146d55cf9bf5b1eeb23e862f0f66da66c2
|
7
|
+
data.tar.gz: 1c23318876143377b826c8e619978b83e61da68b10dbed34c203ae0f35a10efeb6fa48843fada3782b3fd7c1785802b0cda1332281ebcc39230bb7ed10c0905d
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -41,16 +41,24 @@ client = Vectorsearch::Weaviate.new(
|
|
41
41
|
llm_api_key: ENV["OPENAI_API_KEY"]
|
42
42
|
)
|
43
43
|
|
44
|
-
# You instantiate any other supported vector search database:
|
44
|
+
# You can instantiate any other supported vector search database:
|
45
45
|
client = Vectorsearch::Milvus.new(...)
|
46
46
|
client = Vectorsearch::Qdrant.new(...)
|
47
47
|
client = Vectorsearch::Pinecone.new(...)
|
48
48
|
```
|
49
49
|
|
50
|
+
```ruby
|
51
|
+
# Creating the default schema
|
52
|
+
client.create_default_schema
|
53
|
+
```
|
54
|
+
|
50
55
|
```ruby
|
51
56
|
# Store your documents in your vector search database
|
52
57
|
client.add_texts(
|
53
|
-
texts: [
|
58
|
+
texts: [
|
59
|
+
"Begin by preheating your oven to 375°F (190°C). Prepare four boneless, skinless chicken breasts by cutting a pocket into the side of each breast, being careful not to cut all the way through. Season the chicken with salt and pepper to taste. In a large skillet, melt 2 tablespoons of unsalted butter over medium heat. Add 1 small diced onion and 2 minced garlic cloves, and cook until softened, about 3-4 minutes. Add 8 ounces of fresh spinach and cook until wilted, about 3 minutes. Remove the skillet from heat and let the mixture cool slightly.",
|
60
|
+
"In a bowl, combine the spinach mixture with 4 ounces of softened cream cheese, 1/4 cup of grated Parmesan cheese, 1/4 cup of shredded mozzarella cheese, and 1/4 teaspoon of red pepper flakes. Mix until well combined. Stuff each chicken breast pocket with an equal amount of the spinach mixture. Seal the pocket with a toothpick if necessary. In the same skillet, heat 1 tablespoon of olive oil over medium-high heat. Add the stuffed chicken breasts and sear on each side for 3-4 minutes, or until golden brown."
|
61
|
+
]
|
54
62
|
)
|
55
63
|
```
|
56
64
|
|
data/lib/vectorsearch/base.rb
CHANGED
@@ -7,6 +7,10 @@ module Vectorsearch
|
|
7
7
|
class Base
|
8
8
|
attr_reader :client, :index_name, :llm, :llm_api_key
|
9
9
|
|
10
|
+
DEFAULT_METRIC = "cosine".freeze
|
11
|
+
DEFAULT_COHERE_DIMENSION = 1024
|
12
|
+
DEFAULT_OPENAI_DIMENSION = 1536
|
13
|
+
|
10
14
|
# Currently supported LLMs
|
11
15
|
# TODO: Add support for HuggingFace
|
12
16
|
LLMS = %i[openai cohere].freeze
|
@@ -20,6 +24,10 @@ module Vectorsearch
|
|
20
24
|
@llm_api_key = llm_api_key
|
21
25
|
end
|
22
26
|
|
27
|
+
def create_default_schema
|
28
|
+
raise NotImplementedError
|
29
|
+
end
|
30
|
+
|
23
31
|
# TODO
|
24
32
|
def add_texts(texts:)
|
25
33
|
raise NotImplementedError
|
@@ -90,6 +98,14 @@ module Vectorsearch
|
|
90
98
|
|
91
99
|
private
|
92
100
|
|
101
|
+
def default_dimension
|
102
|
+
if llm == :openai
|
103
|
+
DEFAULT_OPENAI_DIMENSION
|
104
|
+
elsif llm == :cohere
|
105
|
+
DEFAULT_COHERE_DIMENSION
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
93
109
|
def openai_client
|
94
110
|
@openai_client ||= OpenAI::Client.new(access_token: llm_api_key)
|
95
111
|
end
|
data/lib/vectorsearch/milvus.rb
CHANGED
@@ -19,6 +19,64 @@ module Vectorsearch
|
|
19
19
|
super(llm: llm, llm_api_key: llm_api_key)
|
20
20
|
end
|
21
21
|
|
22
|
+
def add_texts(
|
23
|
+
texts:
|
24
|
+
)
|
25
|
+
client.entities.insert(
|
26
|
+
collection_name: index_name,
|
27
|
+
num_rows: texts.count,
|
28
|
+
fields_data: [
|
29
|
+
{
|
30
|
+
field_name: "content",
|
31
|
+
type: ::Milvus::DATA_TYPES["varchar"],
|
32
|
+
field: texts
|
33
|
+
}, {
|
34
|
+
field_name: "vectors",
|
35
|
+
type: ::Milvus::DATA_TYPES["binary_vector"],
|
36
|
+
field: texts.map { |text| generate_embedding(text: text) }
|
37
|
+
}
|
38
|
+
]
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Create default schema
|
43
|
+
# @return [Hash] The response from the server
|
44
|
+
def create_default_schema
|
45
|
+
client.collections.create(
|
46
|
+
auto_id: true,
|
47
|
+
collection_name: index_name,
|
48
|
+
description: "Default schema created by Vectorsearch",
|
49
|
+
fields: [
|
50
|
+
{
|
51
|
+
name: "id",
|
52
|
+
is_primary_key: true,
|
53
|
+
autoID: true,
|
54
|
+
data_type: ::Milvus::DATA_TYPES["int64"]
|
55
|
+
}, {
|
56
|
+
name: "content",
|
57
|
+
is_primary_key: false,
|
58
|
+
data_type: ::Milvus::DATA_TYPES["varchar"],
|
59
|
+
type_params: [
|
60
|
+
{
|
61
|
+
key: "max_length",
|
62
|
+
value: "32768" # Largest allowed value
|
63
|
+
}
|
64
|
+
]
|
65
|
+
}, {
|
66
|
+
name: "vectors",
|
67
|
+
data_type: ::Milvus::DATA_TYPES["binary_vector"],
|
68
|
+
is_primary_key: false,
|
69
|
+
type_params: [
|
70
|
+
{
|
71
|
+
key: "dim",
|
72
|
+
value: default_dimension.to_s
|
73
|
+
}
|
74
|
+
]
|
75
|
+
}
|
76
|
+
]
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
22
80
|
def similarity_search(
|
23
81
|
query:,
|
24
82
|
k: 4
|
@@ -41,7 +99,7 @@ module Vectorsearch
|
|
41
99
|
vectors: [ embedding ],
|
42
100
|
dsl_type: 1,
|
43
101
|
params: "{\"nprobe\": 10}",
|
44
|
-
anns_field: "
|
102
|
+
anns_field: "content",
|
45
103
|
metric_type: "L2"
|
46
104
|
)
|
47
105
|
end
|
@@ -22,6 +22,36 @@ module Vectorsearch
|
|
22
22
|
super(llm: llm, llm_api_key: llm_api_key)
|
23
23
|
end
|
24
24
|
|
25
|
+
# Add a list of texts to the index
|
26
|
+
# @param texts [Array] The list of texts to add
|
27
|
+
# @return [Hash] The response from the server
|
28
|
+
def add_texts(
|
29
|
+
texts:
|
30
|
+
)
|
31
|
+
vectors = texts.map do |text|
|
32
|
+
{
|
33
|
+
# TODO: Allows passing in your own IDs
|
34
|
+
id: SecureRandom.uuid,
|
35
|
+
metadata: { content: text },
|
36
|
+
values: generate_embedding(text: text)
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
index = client.index(index_name)
|
41
|
+
|
42
|
+
index.upsert(vectors: vectors)
|
43
|
+
end
|
44
|
+
|
45
|
+
# Create the index with the default schema
|
46
|
+
# @return [Hash] The response from the server
|
47
|
+
def create_default_schema
|
48
|
+
client.create_index(
|
49
|
+
metric: DEFAULT_METRIC,
|
50
|
+
name: index_name,
|
51
|
+
dimension: default_dimension
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
25
55
|
def similarity_search(
|
26
56
|
query:,
|
27
57
|
k: 4
|
@@ -40,12 +70,13 @@ module Vectorsearch
|
|
40
70
|
)
|
41
71
|
index = client.index(index_name)
|
42
72
|
|
43
|
-
index.query(
|
73
|
+
response = index.query(
|
44
74
|
vector: embedding,
|
45
75
|
top_k: k,
|
46
76
|
include_values: true,
|
47
77
|
include_metadata: true
|
48
78
|
)
|
79
|
+
response.dig("matches")
|
49
80
|
end
|
50
81
|
|
51
82
|
def ask(question:)
|
data/lib/vectorsearch/qdrant.rb
CHANGED
@@ -20,6 +20,38 @@ module Vectorsearch
|
|
20
20
|
super(llm: llm, llm_api_key: llm_api_key)
|
21
21
|
end
|
22
22
|
|
23
|
+
# Add a list of texts to the index
|
24
|
+
# @param texts [Array] The list of texts to add
|
25
|
+
# @return [Hash] The response from the server
|
26
|
+
def add_texts(
|
27
|
+
texts:
|
28
|
+
)
|
29
|
+
batch = { ids: [], vectors: [], payloads: [] }
|
30
|
+
|
31
|
+
texts.each do |text|
|
32
|
+
batch[:ids].push(SecureRandom.uuid)
|
33
|
+
batch[:vectors].push(generate_embedding(text: text))
|
34
|
+
batch[:payloads].push({ content: text })
|
35
|
+
end
|
36
|
+
|
37
|
+
client.points.upsert(
|
38
|
+
collection_name: index_name,
|
39
|
+
batch: batch
|
40
|
+
)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Create the index with the default schema
|
44
|
+
# @return [Hash] The response from the server
|
45
|
+
def create_default_schema
|
46
|
+
client.collections.create(
|
47
|
+
collection_name: index_name,
|
48
|
+
vectors: {
|
49
|
+
distance: DEFAULT_METRIC.capitalize,
|
50
|
+
size: default_dimension
|
51
|
+
}
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
23
55
|
def similarity_search(
|
24
56
|
query:,
|
25
57
|
k: 4
|
data/lib/vectorsearch/version.rb
CHANGED
@@ -22,6 +22,37 @@ module Vectorsearch
|
|
22
22
|
super(llm: llm, llm_api_key: llm_api_key)
|
23
23
|
end
|
24
24
|
|
25
|
+
def add_texts(
|
26
|
+
texts:
|
27
|
+
)
|
28
|
+
objects = []
|
29
|
+
texts.each do |text|
|
30
|
+
objects.push({
|
31
|
+
class_name: index_name,
|
32
|
+
properties: {
|
33
|
+
content: text
|
34
|
+
}
|
35
|
+
})
|
36
|
+
end
|
37
|
+
|
38
|
+
client.objects.batch_create(
|
39
|
+
objects: objects
|
40
|
+
)
|
41
|
+
end
|
42
|
+
|
43
|
+
def create_default_schema
|
44
|
+
client.schema.create(
|
45
|
+
class_name: index_name,
|
46
|
+
vectorizer: "text2vec-#{llm.to_s}",
|
47
|
+
properties: [
|
48
|
+
{
|
49
|
+
dataType: ["text"],
|
50
|
+
name: "content"
|
51
|
+
}
|
52
|
+
]
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
25
56
|
# Return documents similar to the query
|
26
57
|
# @param query [String] The query to search for
|
27
58
|
# @param k [Integer|String] The number of results to return
|
@@ -36,7 +67,7 @@ module Vectorsearch
|
|
36
67
|
class_name: index_name,
|
37
68
|
near_text: near_text,
|
38
69
|
limit: k.to_s,
|
39
|
-
fields: "content
|
70
|
+
fields: "content _additional { id }"
|
40
71
|
)
|
41
72
|
end
|
42
73
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vectorsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pry-byebug
|