vectorsearch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +185 -0
- data/LICENSE.txt +21 -0
- data/README.md +89 -0
- data/Rakefile +8 -0
- data/lib/vectorsearch/base.rb +44 -0
- data/lib/vectorsearch/milvus.rb +53 -0
- data/lib/vectorsearch/pinecone.rb +55 -0
- data/lib/vectorsearch/qdrant.rb +51 -0
- data/lib/vectorsearch/version.rb +5 -0
- data/lib/vectorsearch/weaviate.rb +81 -0
- data/lib/vectorsearch.rb +13 -0
- data/sig/vectorsearch.rbs +4 -0
- metadata +187 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 4c9913804bc8aaadc08a60c0a250e2923f2a4ceb28633fdd99e4bf86544203b4
|
|
4
|
+
data.tar.gz: 8db3a77121d948f6ed709618da5bd4411a87a01aba26feb7942e74e0dd18f207
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: d76d13ee23c7219483eac27a37ae61cad335bbd2d4169a76723d19a0e334ab6ac01037923cb2c07a64c853280d2449e54c0107fba7f36a231a624efaf1b68b46
|
|
7
|
+
data.tar.gz: 43612526795e54138ec0c891e5bbdef2ef712ac287a4776680d26d94e8cabcb9d5976774b5144cbeb3ae95b367e26460230931041f011f1a69671e014060684f
|
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
vectorsearch (0.1.0)
|
|
5
|
+
cohere-ruby (~> 0.9.1)
|
|
6
|
+
milvus (~> 0.9.0)
|
|
7
|
+
pinecone (~> 0.1.6)
|
|
8
|
+
qdrant-ruby (~> 0.9.0)
|
|
9
|
+
ruby-openai (~> 4.0.0)
|
|
10
|
+
tokenizers (~> 0.3.3)
|
|
11
|
+
weaviate-ruby (~> 0.8.0)
|
|
12
|
+
|
|
13
|
+
GEM
|
|
14
|
+
remote: https://rubygems.org/
|
|
15
|
+
specs:
|
|
16
|
+
actionpack (7.0.4.3)
|
|
17
|
+
actionview (= 7.0.4.3)
|
|
18
|
+
activesupport (= 7.0.4.3)
|
|
19
|
+
rack (~> 2.0, >= 2.2.0)
|
|
20
|
+
rack-test (>= 0.6.3)
|
|
21
|
+
rails-dom-testing (~> 2.0)
|
|
22
|
+
rails-html-sanitizer (~> 1.0, >= 1.2.0)
|
|
23
|
+
actionview (7.0.4.3)
|
|
24
|
+
activesupport (= 7.0.4.3)
|
|
25
|
+
builder (~> 3.1)
|
|
26
|
+
erubi (~> 1.4)
|
|
27
|
+
rails-dom-testing (~> 2.0)
|
|
28
|
+
rails-html-sanitizer (~> 1.1, >= 1.2.0)
|
|
29
|
+
activesupport (7.0.4.3)
|
|
30
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
31
|
+
i18n (>= 1.6, < 2)
|
|
32
|
+
minitest (>= 5.1)
|
|
33
|
+
tzinfo (~> 2.0)
|
|
34
|
+
builder (3.2.4)
|
|
35
|
+
byebug (11.1.3)
|
|
36
|
+
coderay (1.1.3)
|
|
37
|
+
cohere-ruby (0.9.1)
|
|
38
|
+
faraday (~> 2.7.0)
|
|
39
|
+
concurrent-ruby (1.2.2)
|
|
40
|
+
crass (1.0.6)
|
|
41
|
+
diff-lcs (1.5.0)
|
|
42
|
+
dotenv (2.7.6)
|
|
43
|
+
dotenv-rails (2.7.6)
|
|
44
|
+
dotenv (= 2.7.6)
|
|
45
|
+
railties (>= 3.2)
|
|
46
|
+
dry-configurable (0.16.1)
|
|
47
|
+
dry-core (~> 0.6)
|
|
48
|
+
zeitwerk (~> 2.6)
|
|
49
|
+
dry-container (0.11.0)
|
|
50
|
+
concurrent-ruby (~> 1.0)
|
|
51
|
+
dry-core (0.9.1)
|
|
52
|
+
concurrent-ruby (~> 1.0)
|
|
53
|
+
zeitwerk (~> 2.6)
|
|
54
|
+
dry-inflector (0.3.0)
|
|
55
|
+
dry-initializer (3.1.1)
|
|
56
|
+
dry-logic (1.3.0)
|
|
57
|
+
concurrent-ruby (~> 1.0)
|
|
58
|
+
dry-core (~> 0.9, >= 0.9)
|
|
59
|
+
zeitwerk (~> 2.6)
|
|
60
|
+
dry-schema (1.11.3)
|
|
61
|
+
concurrent-ruby (~> 1.0)
|
|
62
|
+
dry-configurable (~> 0.16, >= 0.16)
|
|
63
|
+
dry-core (~> 0.9, >= 0.9)
|
|
64
|
+
dry-initializer (~> 3.0)
|
|
65
|
+
dry-logic (~> 1.3)
|
|
66
|
+
dry-types (~> 1.6)
|
|
67
|
+
zeitwerk (~> 2.6)
|
|
68
|
+
dry-struct (1.5.2)
|
|
69
|
+
dry-core (~> 0.9, >= 0.9)
|
|
70
|
+
dry-types (~> 1.6)
|
|
71
|
+
ice_nine (~> 0.11)
|
|
72
|
+
zeitwerk (~> 2.6)
|
|
73
|
+
dry-types (1.6.1)
|
|
74
|
+
concurrent-ruby (~> 1.0)
|
|
75
|
+
dry-container (~> 0.3)
|
|
76
|
+
dry-core (~> 0.9, >= 0.9)
|
|
77
|
+
dry-inflector (~> 0.1, >= 0.1.2)
|
|
78
|
+
dry-logic (~> 1.3, >= 1.3)
|
|
79
|
+
zeitwerk (~> 2.6)
|
|
80
|
+
dry-validation (1.9.0)
|
|
81
|
+
concurrent-ruby (~> 1.0)
|
|
82
|
+
dry-container (~> 0.7, >= 0.7.1)
|
|
83
|
+
dry-core (~> 0.9, >= 0.9)
|
|
84
|
+
dry-initializer (~> 3.0)
|
|
85
|
+
dry-schema (~> 1.11, >= 1.11.0)
|
|
86
|
+
zeitwerk (~> 2.6)
|
|
87
|
+
erubi (1.12.0)
|
|
88
|
+
faraday (2.7.4)
|
|
89
|
+
faraday-net_http (>= 2.0, < 3.1)
|
|
90
|
+
ruby2_keywords (>= 0.0.4)
|
|
91
|
+
faraday-multipart (1.0.4)
|
|
92
|
+
multipart-post (~> 2)
|
|
93
|
+
faraday-net_http (3.0.2)
|
|
94
|
+
graphlient (0.7.0)
|
|
95
|
+
faraday (~> 2.0)
|
|
96
|
+
graphql-client
|
|
97
|
+
graphql (2.0.21)
|
|
98
|
+
graphql-client (0.18.0)
|
|
99
|
+
activesupport (>= 3.0)
|
|
100
|
+
graphql
|
|
101
|
+
httparty (0.21.0)
|
|
102
|
+
mini_mime (>= 1.0.0)
|
|
103
|
+
multi_xml (>= 0.5.2)
|
|
104
|
+
i18n (1.13.0)
|
|
105
|
+
concurrent-ruby (~> 1.0)
|
|
106
|
+
ice_nine (0.11.2)
|
|
107
|
+
loofah (2.20.0)
|
|
108
|
+
crass (~> 1.0.2)
|
|
109
|
+
nokogiri (>= 1.5.9)
|
|
110
|
+
method_source (1.0.0)
|
|
111
|
+
milvus (0.9.0)
|
|
112
|
+
faraday (~> 2.7.0)
|
|
113
|
+
mini_mime (1.1.2)
|
|
114
|
+
minitest (5.18.0)
|
|
115
|
+
multi_xml (0.6.0)
|
|
116
|
+
multipart-post (2.3.0)
|
|
117
|
+
nokogiri (1.14.3-x86_64-darwin)
|
|
118
|
+
racc (~> 1.4)
|
|
119
|
+
pinecone (0.1.6)
|
|
120
|
+
dry-struct (~> 1.5.0)
|
|
121
|
+
dry-validation (~> 1.9.0)
|
|
122
|
+
httparty (~> 0.21.0)
|
|
123
|
+
pry (0.14.2)
|
|
124
|
+
coderay (~> 1.1)
|
|
125
|
+
method_source (~> 1.0)
|
|
126
|
+
pry-byebug (3.10.1)
|
|
127
|
+
byebug (~> 11.0)
|
|
128
|
+
pry (>= 0.13, < 0.15)
|
|
129
|
+
qdrant-ruby (0.9.0)
|
|
130
|
+
faraday (~> 2.7)
|
|
131
|
+
racc (1.6.2)
|
|
132
|
+
rack (2.2.7)
|
|
133
|
+
rack-test (2.1.0)
|
|
134
|
+
rack (>= 1.3)
|
|
135
|
+
rails-dom-testing (2.0.3)
|
|
136
|
+
activesupport (>= 4.2.0)
|
|
137
|
+
nokogiri (>= 1.6)
|
|
138
|
+
rails-html-sanitizer (1.5.0)
|
|
139
|
+
loofah (~> 2.19, >= 2.19.1)
|
|
140
|
+
railties (7.0.4.3)
|
|
141
|
+
actionpack (= 7.0.4.3)
|
|
142
|
+
activesupport (= 7.0.4.3)
|
|
143
|
+
method_source
|
|
144
|
+
rake (>= 12.2)
|
|
145
|
+
thor (~> 1.0)
|
|
146
|
+
zeitwerk (~> 2.5)
|
|
147
|
+
rake (13.0.6)
|
|
148
|
+
rspec (3.12.0)
|
|
149
|
+
rspec-core (~> 3.12.0)
|
|
150
|
+
rspec-expectations (~> 3.12.0)
|
|
151
|
+
rspec-mocks (~> 3.12.0)
|
|
152
|
+
rspec-core (3.12.2)
|
|
153
|
+
rspec-support (~> 3.12.0)
|
|
154
|
+
rspec-expectations (3.12.3)
|
|
155
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
156
|
+
rspec-support (~> 3.12.0)
|
|
157
|
+
rspec-mocks (3.12.5)
|
|
158
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
159
|
+
rspec-support (~> 3.12.0)
|
|
160
|
+
rspec-support (3.12.0)
|
|
161
|
+
ruby-openai (4.0.0)
|
|
162
|
+
faraday (>= 1)
|
|
163
|
+
faraday-multipart (>= 1)
|
|
164
|
+
ruby2_keywords (0.0.5)
|
|
165
|
+
thor (1.2.1)
|
|
166
|
+
tokenizers (0.3.3-x86_64-darwin)
|
|
167
|
+
tzinfo (2.0.6)
|
|
168
|
+
concurrent-ruby (~> 1.0)
|
|
169
|
+
weaviate-ruby (0.8.0)
|
|
170
|
+
faraday (~> 2.7)
|
|
171
|
+
graphlient (~> 0.7.0)
|
|
172
|
+
zeitwerk (2.6.8)
|
|
173
|
+
|
|
174
|
+
PLATFORMS
|
|
175
|
+
x86_64-darwin-19
|
|
176
|
+
|
|
177
|
+
DEPENDENCIES
|
|
178
|
+
dotenv-rails (~> 2.7.6)
|
|
179
|
+
pry-byebug (~> 3.10.0)
|
|
180
|
+
rake (~> 13.0)
|
|
181
|
+
rspec (~> 3.0)
|
|
182
|
+
vectorsearch!
|
|
183
|
+
|
|
184
|
+
BUNDLED WITH
|
|
185
|
+
2.4.0
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Andrei Bondarev
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Vectorsearch
|
|
2
|
+
|
|
3
|
+
Vectorsearch library is an abstraction layer on top of many popular vector search databases. It is a modern ORM that allows developers to easily chunk, generate embeddings, store, search, query and retrieve data from vector search databases. Vectorsearch offers a straight-forward DSL and abstract developers away from overly complex machine learning/data science-specific configurations.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Install the gem and add to the application's Gemfile by executing:
|
|
8
|
+
|
|
9
|
+
$ bundle add vectorsearch
|
|
10
|
+
|
|
11
|
+
If bundler is not being used to manage dependencies, install the gem by executing:
|
|
12
|
+
|
|
13
|
+
$ gem install vectorsearch
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
```ruby
|
|
18
|
+
require "vectorsearch"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
List of currently supported vector search databases and features:
|
|
22
|
+
|
|
23
|
+
| Database | Querying | Storage |
|
|
24
|
+
| -------------------------------------- |
|
|
25
|
+
| Weaviate | :white_check_mark: | WIP |
|
|
26
|
+
| Qdrant | :white_check_mark: | WIP |
|
|
27
|
+
| Milvus | :white_check_mark: | WIP |
|
|
28
|
+
| Pinecone | :white_check_mark: | WIP |
|
|
29
|
+
|
|
30
|
+
### Create an instance
|
|
31
|
+
|
|
32
|
+
Choose the LLM provider you'll be using (OpenAI or Cohere) and retrieve the API key.
|
|
33
|
+
|
|
34
|
+
Pick the vector search database you'll be using and instantiate the client:
|
|
35
|
+
```ruby
|
|
36
|
+
client = Vectorsearch::Weaviate.new(
|
|
37
|
+
url: ENV["WEAVIATE_URL"],
|
|
38
|
+
api_key: ENV["WEAVIATE_API_KEY"],
|
|
39
|
+
llm: :openai, # or :cohere
|
|
40
|
+
llm_api_key: ENV["OPENAI_API_KEY"]
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# You instantiate any other supported vector search database:
|
|
44
|
+
client = Vectorsearch::Milvus.new(...)
|
|
45
|
+
client = Vectorsearch::Qdrant.new(...)
|
|
46
|
+
client = Vectorsearch::Pinecone.new(...)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
# Store your documents in your vector search database
|
|
51
|
+
client.add_documents(
|
|
52
|
+
documents: []
|
|
53
|
+
)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
# Retrieve similar documents based on the query string passed in
|
|
58
|
+
client.similarity_search(
|
|
59
|
+
query:,
|
|
60
|
+
k: # number of results to be retrieved
|
|
61
|
+
)
|
|
62
|
+
```
|
|
63
|
+
```ruby
|
|
64
|
+
# Retrieve similar documents based on the embedding passed in
|
|
65
|
+
client.similarity_search_by_vector(
|
|
66
|
+
embedding:,
|
|
67
|
+
k: # number of results to be retrieved
|
|
68
|
+
)
|
|
69
|
+
```
|
|
70
|
+
```ruby
|
|
71
|
+
# Q&A-style querying based on the question passed in
|
|
72
|
+
client.ask(
|
|
73
|
+
question:
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Development
|
|
78
|
+
|
|
79
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
80
|
+
|
|
81
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
82
|
+
|
|
83
|
+
## Contributing
|
|
84
|
+
|
|
85
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/vectorsearch.
|
|
86
|
+
|
|
87
|
+
## License
|
|
88
|
+
|
|
89
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "openai"
|
|
4
|
+
require "cohere"
|
|
5
|
+
|
|
6
|
+
module Vectorsearch
|
|
7
|
+
class Base
|
|
8
|
+
attr_reader :client, :index_name, :llm, :llm_api_key
|
|
9
|
+
|
|
10
|
+
LLMS = %i[openai cohere].freeze
|
|
11
|
+
|
|
12
|
+
def initialize(llm:, llm_api_key:)
|
|
13
|
+
validate_llm!(llm: llm)
|
|
14
|
+
|
|
15
|
+
@llm = llm
|
|
16
|
+
@llm_api_key = llm_api_key
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def generate_embedding(text:)
|
|
20
|
+
case llm
|
|
21
|
+
when :openai
|
|
22
|
+
response = OpenAI::Client.new(access_token: llm_api_key)
|
|
23
|
+
.embeddings(
|
|
24
|
+
parameters: {
|
|
25
|
+
model: "text-embedding-ada-002",
|
|
26
|
+
input: text
|
|
27
|
+
}
|
|
28
|
+
)
|
|
29
|
+
response.dig("data").first.dig("embedding")
|
|
30
|
+
when :cohere
|
|
31
|
+
response = Cohere::Client.new(api_key: llm_api_key)
|
|
32
|
+
.embed(
|
|
33
|
+
texts: [text],
|
|
34
|
+
model: "small"
|
|
35
|
+
)
|
|
36
|
+
response.dig("embeddings").first
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def validate_llm!(llm:)
|
|
41
|
+
raise ArgumentError, "LLM must be one of #{LLMS}" unless LLMS.include?(llm)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "milvus"
|
|
4
|
+
|
|
5
|
+
module Vectorsearch
|
|
6
|
+
class Milvus < Base
|
|
7
|
+
def initialize(
|
|
8
|
+
url:,
|
|
9
|
+
api_key: nil,
|
|
10
|
+
index_name:,
|
|
11
|
+
llm:,
|
|
12
|
+
llm_api_key:
|
|
13
|
+
)
|
|
14
|
+
@client = ::Milvus::Client.new(
|
|
15
|
+
url: url
|
|
16
|
+
)
|
|
17
|
+
@index_name = index_name
|
|
18
|
+
|
|
19
|
+
super(llm: llm, llm_api_key: llm_api_key)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def similarity_search(
|
|
23
|
+
query:,
|
|
24
|
+
k: 4
|
|
25
|
+
)
|
|
26
|
+
embedding = generate_embedding(text: query)
|
|
27
|
+
|
|
28
|
+
similarity_search_by_vector(
|
|
29
|
+
embedding: embedding,
|
|
30
|
+
k: k
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def similarity_search_by_vector(
|
|
35
|
+
embedding:,
|
|
36
|
+
k: 4
|
|
37
|
+
)
|
|
38
|
+
client.search(
|
|
39
|
+
collection_name: index_name,
|
|
40
|
+
top_k: k.to_s,
|
|
41
|
+
vectors: [ embedding ],
|
|
42
|
+
dsl_type: 1,
|
|
43
|
+
params: "{\"nprobe\": 10}",
|
|
44
|
+
anns_field: "book_intro", # Should it get all abstracted away to "content" field?
|
|
45
|
+
metric_type: "L2"
|
|
46
|
+
)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def ask(question:)
|
|
50
|
+
raise NotImplementedError
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pinecone"
|
|
4
|
+
|
|
5
|
+
module Vectorsearch
|
|
6
|
+
class Pinecone < Base
|
|
7
|
+
def initialize(
|
|
8
|
+
environment:,
|
|
9
|
+
api_key:,
|
|
10
|
+
index_name:,
|
|
11
|
+
llm:,
|
|
12
|
+
llm_api_key:
|
|
13
|
+
)
|
|
14
|
+
::Pinecone.configure do |config|
|
|
15
|
+
config.api_key = api_key
|
|
16
|
+
config.environment = environment
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
@client = ::Pinecone::Client.new
|
|
20
|
+
@index_name = index_name
|
|
21
|
+
|
|
22
|
+
super(llm: llm, llm_api_key: llm_api_key)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def similarity_search(
|
|
26
|
+
query:,
|
|
27
|
+
k: 4
|
|
28
|
+
)
|
|
29
|
+
embedding = generate_embedding(text: query)
|
|
30
|
+
|
|
31
|
+
similarity_search_by_vector(
|
|
32
|
+
embedding: embedding,
|
|
33
|
+
k: k
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def similarity_search_by_vector(
|
|
38
|
+
embedding:,
|
|
39
|
+
k: 4
|
|
40
|
+
)
|
|
41
|
+
index = client.index(index_name)
|
|
42
|
+
|
|
43
|
+
index.query(
|
|
44
|
+
vector: embedding,
|
|
45
|
+
top_k: k,
|
|
46
|
+
include_values: true,
|
|
47
|
+
include_metadata: true
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def ask(question:)
|
|
52
|
+
raise NotImplementedError
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "qdrant"
|
|
4
|
+
|
|
5
|
+
module Vectorsearch
|
|
6
|
+
class Qdrant < Base
|
|
7
|
+
def initialize(
|
|
8
|
+
url:,
|
|
9
|
+
api_key:,
|
|
10
|
+
index_name:,
|
|
11
|
+
llm:,
|
|
12
|
+
llm_api_key:
|
|
13
|
+
)
|
|
14
|
+
@client = ::Qdrant::Client.new(
|
|
15
|
+
url: url,
|
|
16
|
+
api_key: api_key
|
|
17
|
+
)
|
|
18
|
+
@index_name = index_name
|
|
19
|
+
|
|
20
|
+
super(llm: llm, llm_api_key: llm_api_key)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def similarity_search(
|
|
24
|
+
query:,
|
|
25
|
+
k: 4
|
|
26
|
+
)
|
|
27
|
+
embedding = generate_embedding(text: query)
|
|
28
|
+
|
|
29
|
+
similarity_search_by_vector(
|
|
30
|
+
embedding: embedding,
|
|
31
|
+
k: k
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def similarity_search_by_vector(
|
|
36
|
+
embedding:,
|
|
37
|
+
k: 4
|
|
38
|
+
)
|
|
39
|
+
client.points.search(
|
|
40
|
+
collection_name: index_name,
|
|
41
|
+
limit: k,
|
|
42
|
+
vector: embedding,
|
|
43
|
+
with_payload: true
|
|
44
|
+
)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def ask(question:)
|
|
48
|
+
raise NotImplementedError
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "weaviate"
|
|
4
|
+
|
|
5
|
+
module Vectorsearch
|
|
6
|
+
class Weaviate < Base
|
|
7
|
+
def initialize(
|
|
8
|
+
url:,
|
|
9
|
+
api_key:,
|
|
10
|
+
index_name:,
|
|
11
|
+
llm:,
|
|
12
|
+
llm_api_key:
|
|
13
|
+
)
|
|
14
|
+
@client = ::Weaviate::Client.new(
|
|
15
|
+
url: url,
|
|
16
|
+
api_key: api_key,
|
|
17
|
+
model_service: llm,
|
|
18
|
+
model_service_api_key: llm_api_key
|
|
19
|
+
)
|
|
20
|
+
@index_name = index_name
|
|
21
|
+
|
|
22
|
+
super(llm: llm, llm_api_key: llm_api_key)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def add_texts(
|
|
26
|
+
texts:
|
|
27
|
+
)
|
|
28
|
+
texts.each do |text|
|
|
29
|
+
text['class'] = index_name
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
client.batch_create(
|
|
33
|
+
objects: texts
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Return documents similar to the query
|
|
38
|
+
def similarity_search(
|
|
39
|
+
query:,
|
|
40
|
+
k: 4
|
|
41
|
+
)
|
|
42
|
+
near_text = "{
|
|
43
|
+
concepts: [\"#{query}\"],
|
|
44
|
+
}"
|
|
45
|
+
|
|
46
|
+
client.query.get(
|
|
47
|
+
class_name: index_name,
|
|
48
|
+
near_text: near_text,
|
|
49
|
+
limit: k.to_s,
|
|
50
|
+
fields: "content recipe_id"
|
|
51
|
+
)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def similarity_search_by_vector(
|
|
55
|
+
embedding:,
|
|
56
|
+
k: 4
|
|
57
|
+
)
|
|
58
|
+
near_vector = "{ vector: #{embedding} }"
|
|
59
|
+
|
|
60
|
+
client.query.get(
|
|
61
|
+
class_name: index_name,
|
|
62
|
+
near_vector: near_vector,
|
|
63
|
+
limit: k.to_s,
|
|
64
|
+
fields: "content recipe_id"
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def ask(
|
|
69
|
+
question:
|
|
70
|
+
)
|
|
71
|
+
ask_object = "{ question: \"#{question}\" }"
|
|
72
|
+
|
|
73
|
+
client.query.get(
|
|
74
|
+
class_name: index_name,
|
|
75
|
+
ask: ask_object,
|
|
76
|
+
limit: "1",
|
|
77
|
+
fields: "_additional { answer { result } }"
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
data/lib/vectorsearch.rb
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "vectorsearch/version"
|
|
4
|
+
|
|
5
|
+
module Vectorsearch
|
|
6
|
+
class Error < StandardError; end
|
|
7
|
+
|
|
8
|
+
autoload :Base, "vectorsearch/base"
|
|
9
|
+
autoload :Milvus, "vectorsearch/milvus"
|
|
10
|
+
autoload :Pinecone, "vectorsearch/pinecone"
|
|
11
|
+
autoload :Qdrant, "vectorsearch/qdrant"
|
|
12
|
+
autoload :Weaviate, "vectorsearch/weaviate"
|
|
13
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: vectorsearch
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Andrei Bondarev
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2023-04-30 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: pry-byebug
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 3.10.0
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: 3.10.0
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: dotenv-rails
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: 2.7.6
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: 2.7.6
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: weaviate-ruby
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: 0.8.0
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: 0.8.0
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: qdrant-ruby
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: 0.9.0
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: 0.9.0
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: tokenizers
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: 0.3.3
|
|
76
|
+
type: :runtime
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: 0.3.3
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: ruby-openai
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - "~>"
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: 4.0.0
|
|
90
|
+
type: :runtime
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - "~>"
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: 4.0.0
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: cohere-ruby
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - "~>"
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: 0.9.1
|
|
104
|
+
type: :runtime
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - "~>"
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: 0.9.1
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: milvus
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - "~>"
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: 0.9.0
|
|
118
|
+
type: :runtime
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - "~>"
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: 0.9.0
|
|
125
|
+
- !ruby/object:Gem::Dependency
|
|
126
|
+
name: pinecone
|
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
|
128
|
+
requirements:
|
|
129
|
+
- - "~>"
|
|
130
|
+
- !ruby/object:Gem::Version
|
|
131
|
+
version: 0.1.6
|
|
132
|
+
type: :runtime
|
|
133
|
+
prerelease: false
|
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
135
|
+
requirements:
|
|
136
|
+
- - "~>"
|
|
137
|
+
- !ruby/object:Gem::Version
|
|
138
|
+
version: 0.1.6
|
|
139
|
+
description: Vector Search backed by your vector search DB of choice.
|
|
140
|
+
email:
|
|
141
|
+
- andrei.bondarev13@gmail.com
|
|
142
|
+
executables: []
|
|
143
|
+
extensions: []
|
|
144
|
+
extra_rdoc_files: []
|
|
145
|
+
files:
|
|
146
|
+
- ".rspec"
|
|
147
|
+
- CHANGELOG.md
|
|
148
|
+
- Gemfile
|
|
149
|
+
- Gemfile.lock
|
|
150
|
+
- LICENSE.txt
|
|
151
|
+
- README.md
|
|
152
|
+
- Rakefile
|
|
153
|
+
- lib/vectorsearch.rb
|
|
154
|
+
- lib/vectorsearch/base.rb
|
|
155
|
+
- lib/vectorsearch/milvus.rb
|
|
156
|
+
- lib/vectorsearch/pinecone.rb
|
|
157
|
+
- lib/vectorsearch/qdrant.rb
|
|
158
|
+
- lib/vectorsearch/version.rb
|
|
159
|
+
- lib/vectorsearch/weaviate.rb
|
|
160
|
+
- sig/vectorsearch.rbs
|
|
161
|
+
homepage: https://github.com/andreibondarev/vectorsearch
|
|
162
|
+
licenses:
|
|
163
|
+
- MIT
|
|
164
|
+
metadata:
|
|
165
|
+
homepage_uri: https://github.com/andreibondarev/vectorsearch
|
|
166
|
+
source_code_uri: https://github.com/andreibondarev/vectorsearch
|
|
167
|
+
changelog_uri: https://github.com/andreibondarev/vectorsearch/CHANGELOG.md
|
|
168
|
+
post_install_message:
|
|
169
|
+
rdoc_options: []
|
|
170
|
+
require_paths:
|
|
171
|
+
- lib
|
|
172
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
173
|
+
requirements:
|
|
174
|
+
- - ">="
|
|
175
|
+
- !ruby/object:Gem::Version
|
|
176
|
+
version: 2.6.0
|
|
177
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
|
+
requirements:
|
|
179
|
+
- - ">="
|
|
180
|
+
- !ruby/object:Gem::Version
|
|
181
|
+
version: '0'
|
|
182
|
+
requirements: []
|
|
183
|
+
rubygems_version: 3.2.3
|
|
184
|
+
signing_key:
|
|
185
|
+
specification_version: 4
|
|
186
|
+
summary: Vector Search backed by your vector search DB of choice.
|
|
187
|
+
test_files: []
|