vectorsearch 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +10 -9
- data/lib/vectorsearch/base.rb +73 -12
- data/lib/vectorsearch/pinecone.rb +10 -1
- data/lib/vectorsearch/qdrant.rb +10 -1
- data/lib/vectorsearch/version.rb +1 -1
- data/lib/vectorsearch/weaviate.rb +32 -22
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee0ed9a4527aeaefb5488bc4263f41fd4b793f410eadfcde52be4669035be78c
|
4
|
+
data.tar.gz: 9321cfe450003f8bd2a8e8a3ba48ac86d905c8d90ac5c56d37009dd4c27dd79e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a54fada2b58a0da4d0b34bd73595ebbe2076c2a5039cc690ad076f705610efb85bf62d1709dca8157a718ccf09ac35cea6fdd51096b4fc374d32d51705b43c8
|
7
|
+
data.tar.gz: 83b1a9844757253457bc2d6186b5f4ac1ba4217843eccca23d9773d180e2316f5442715d3e83342c16894bda3606e1d76d054b20be7e2c998fde7d1311dafae2
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Vectorsearch
|
2
|
+
![Tests status](https://github.com/andreibondarev/vectorsearch/actions/workflows/ci.yml/badge.svg) [![Gem Version](https://badge.fury.io/rb/vectorsearch.svg)](https://badge.fury.io/rb/vectorsearch)
|
2
3
|
|
3
|
-
Vectorsearch library is an abstraction layer on top of many popular vector search databases. It is a modern ORM that allows developers to easily chunk, generate embeddings, store, search, query and retrieve data from vector search databases. Vectorsearch offers a straight-forward DSL and
|
4
|
+
Vectorsearch library is an abstraction layer on top of many popular vector search databases. It is a modern ORM that allows developers to easily chunk data, generate embeddings, store, search, query and retrieve data from vector search databases. Vectorsearch offers a straight-forward DSL and abstracts away overly complicated machine learning/data science-specific configurations and concepts
|
4
5
|
|
5
6
|
## Installation
|
6
7
|
|
@@ -20,12 +21,12 @@ require "vectorsearch"
|
|
20
21
|
|
21
22
|
List of currently supported vector search databases and features:
|
22
23
|
|
23
|
-
| Database | Querying
|
24
|
-
|
|
25
|
-
| Weaviate | :white_check_mark: | WIP |
|
26
|
-
| Qdrant | :white_check_mark: | WIP |
|
27
|
-
| Milvus | :white_check_mark: | WIP |
|
28
|
-
| Pinecone | :white_check_mark: | WIP |
|
24
|
+
| Database | Querying | Storage | Schema Management | Backups | Rails Integration | ??? |
|
25
|
+
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
|
26
|
+
| Weaviate | :white_check_mark: | WIP | WIP | WIP | | |
|
27
|
+
| Qdrant | :white_check_mark: | WIP | WIP | WIP | | |
|
28
|
+
| Milvus | :white_check_mark: | WIP | WIP | WIP | | |
|
29
|
+
| Pinecone | :white_check_mark: | WIP | WIP | WIP | | |
|
29
30
|
|
30
31
|
### Create an instance
|
31
32
|
|
@@ -48,8 +49,8 @@ client = Vectorsearch::Pinecone.new(...)
|
|
48
49
|
|
49
50
|
```ruby
|
50
51
|
# Store your documents in your vector search database
|
51
|
-
client.
|
52
|
-
|
52
|
+
client.add_texts(
|
53
|
+
texts: []
|
53
54
|
)
|
54
55
|
```
|
55
56
|
|
data/lib/vectorsearch/base.rb
CHANGED
@@ -7,8 +7,12 @@ module Vectorsearch
|
|
7
7
|
class Base
|
8
8
|
attr_reader :client, :index_name, :llm, :llm_api_key
|
9
9
|
|
10
|
+
# Currently supported LLMs
|
11
|
+
# TODO: Add support for HuggingFace
|
10
12
|
LLMS = %i[openai cohere].freeze
|
11
13
|
|
14
|
+
# @param llm [Symbol] The LLM to use
|
15
|
+
# @param llm_api_key [String] The API key for the LLM
|
12
16
|
def initialize(llm:, llm_api_key:)
|
13
17
|
validate_llm!(llm: llm)
|
14
18
|
|
@@ -16,27 +20,84 @@ module Vectorsearch
|
|
16
20
|
@llm_api_key = llm_api_key
|
17
21
|
end
|
18
22
|
|
23
|
+
# TODO
|
24
|
+
def add_texts(texts:)
|
25
|
+
raise NotImplementedError
|
26
|
+
end
|
27
|
+
|
28
|
+
# NotImplementedError will be raised if the subclass does not implement the `ask()` method
|
29
|
+
def ask(question:)
|
30
|
+
raise NotImplementedError
|
31
|
+
end
|
32
|
+
|
33
|
+
# Generate an embedding for a given text
|
34
|
+
# Currently supports OpenAI and Cohere
|
35
|
+
# The LLM-related method will most likely need to be abstracted out into a separate class
|
36
|
+
# @param text [String] The text to generate an embedding for
|
37
|
+
# @return [String] The embedding
|
19
38
|
def generate_embedding(text:)
|
20
39
|
case llm
|
21
40
|
when :openai
|
22
|
-
response =
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
)
|
41
|
+
response = openai_client.embeddings(
|
42
|
+
parameters: {
|
43
|
+
model: "text-embedding-ada-002",
|
44
|
+
input: text
|
45
|
+
}
|
46
|
+
)
|
29
47
|
response.dig("data").first.dig("embedding")
|
30
48
|
when :cohere
|
31
|
-
response =
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
)
|
49
|
+
response = cohere_client.embed(
|
50
|
+
texts: [text],
|
51
|
+
model: "small"
|
52
|
+
)
|
36
53
|
response.dig("embeddings").first
|
37
54
|
end
|
38
55
|
end
|
39
56
|
|
57
|
+
# Generate a completion for a given prompt
|
58
|
+
# Currently supports OpenAI and Cohere
|
59
|
+
# The LLM-related method will most likely need to be abstracted out into a separate class
|
60
|
+
# @param prompt [String] The prompt to generate a completion for
|
61
|
+
# @return [String] The completion
|
62
|
+
def generate_completion(prompt:)
|
63
|
+
case llm
|
64
|
+
when :openai
|
65
|
+
response = openai_client.completions(
|
66
|
+
parameters: {
|
67
|
+
model: "text-davinci-003",
|
68
|
+
temperature: 0.0,
|
69
|
+
prompt: prompt
|
70
|
+
}
|
71
|
+
)
|
72
|
+
response.dig("choices").first.dig("text")
|
73
|
+
when :cohere
|
74
|
+
response = cohere_client.generate(
|
75
|
+
prompt: prompt,
|
76
|
+
temperature: 0.0
|
77
|
+
)
|
78
|
+
response.dig("generations").first.dig("text")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def generate_prompt(question:, context:)
|
83
|
+
"Context:\n" +
|
84
|
+
"#{context}\n" +
|
85
|
+
"---\n" +
|
86
|
+
"Question: #{question}\n" +
|
87
|
+
"---\n" +
|
88
|
+
"Answer:"
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def openai_client
|
94
|
+
@openai_client ||= OpenAI::Client.new(access_token: llm_api_key)
|
95
|
+
end
|
96
|
+
|
97
|
+
def cohere_client
|
98
|
+
@cohere_client ||= Cohere::Client.new(api_key: llm_api_key)
|
99
|
+
end
|
100
|
+
|
40
101
|
def validate_llm!(llm:)
|
41
102
|
raise ArgumentError, "LLM must be one of #{LLMS}" unless LLMS.include?(llm)
|
42
103
|
end
|
@@ -49,7 +49,16 @@ module Vectorsearch
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def ask(question:)
|
52
|
-
|
52
|
+
search_results = similarity_search(query: question)
|
53
|
+
|
54
|
+
context = search_results.dig("matches").map do |result|
|
55
|
+
result.dig("metadata").to_s
|
56
|
+
end
|
57
|
+
context = context.join("\n---\n")
|
58
|
+
|
59
|
+
prompt = generate_prompt(question: question, context: context)
|
60
|
+
|
61
|
+
generate_completion(prompt: prompt)
|
53
62
|
end
|
54
63
|
end
|
55
64
|
end
|
data/lib/vectorsearch/qdrant.rb
CHANGED
@@ -45,7 +45,16 @@ module Vectorsearch
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def ask(question:)
|
48
|
-
|
48
|
+
search_results = similarity_search(query: question)
|
49
|
+
|
50
|
+
context = search_results.dig("result").map do |result|
|
51
|
+
result.dig("payload").to_s
|
52
|
+
end
|
53
|
+
context = context.join("\n---\n")
|
54
|
+
|
55
|
+
prompt = generate_prompt(question: question, context: context)
|
56
|
+
|
57
|
+
generate_completion(prompt: prompt)
|
49
58
|
end
|
50
59
|
end
|
51
60
|
end
|
data/lib/vectorsearch/version.rb
CHANGED
@@ -22,26 +22,15 @@ module Vectorsearch
|
|
22
22
|
super(llm: llm, llm_api_key: llm_api_key)
|
23
23
|
end
|
24
24
|
|
25
|
-
def add_texts(
|
26
|
-
texts:
|
27
|
-
)
|
28
|
-
texts.each do |text|
|
29
|
-
text['class'] = index_name
|
30
|
-
end
|
31
|
-
|
32
|
-
client.batch_create(
|
33
|
-
objects: texts
|
34
|
-
)
|
35
|
-
end
|
36
|
-
|
37
25
|
# Return documents similar to the query
|
26
|
+
# @param query [String] The query to search for
|
27
|
+
# @param k [Integer|String] The number of results to return
|
28
|
+
# @return [Hash] The search results
|
38
29
|
def similarity_search(
|
39
30
|
query:,
|
40
31
|
k: 4
|
41
32
|
)
|
42
|
-
near_text = "{
|
43
|
-
concepts: [\"#{query}\"],
|
44
|
-
}"
|
33
|
+
near_text = "{ concepts: [\"#{query}\"] }"
|
45
34
|
|
46
35
|
client.query.get(
|
47
36
|
class_name: index_name,
|
@@ -51,6 +40,10 @@ module Vectorsearch
|
|
51
40
|
)
|
52
41
|
end
|
53
42
|
|
43
|
+
# Return documents similar to the vector
|
44
|
+
# @param embedding [Array] The vector to search for
|
45
|
+
# @param k [Integer|String] The number of results to return
|
46
|
+
# @return [Hash] The search results
|
54
47
|
def similarity_search_by_vector(
|
55
48
|
embedding:,
|
56
49
|
k: 4
|
@@ -65,17 +58,34 @@ module Vectorsearch
|
|
65
58
|
)
|
66
59
|
end
|
67
60
|
|
61
|
+
# Ask a question and return the answer
|
62
|
+
# @param question [String] The question to ask
|
63
|
+
# @return [Hash] The answer
|
68
64
|
def ask(
|
69
65
|
question:
|
70
66
|
)
|
71
|
-
|
67
|
+
# Weaviate currently supports the `ask:` parameter only for the OpenAI LLM (with `qna-openai` module enabled).
|
68
|
+
if llm == :openai
|
69
|
+
ask_object = "{ question: \"#{question}\" }"
|
72
70
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
71
|
+
client.query.get(
|
72
|
+
class_name: index_name,
|
73
|
+
ask: ask_object,
|
74
|
+
limit: "1",
|
75
|
+
fields: "_additional { answer { result } }"
|
76
|
+
)
|
77
|
+
elsif llm == :cohere
|
78
|
+
search_results = similarity_search(query: question)
|
79
|
+
|
80
|
+
context = search_results.map do |result|
|
81
|
+
result.dig("content").to_s
|
82
|
+
end
|
83
|
+
context = context.join("\n---\n")
|
84
|
+
|
85
|
+
prompt = generate_prompt(question: question, context: context)
|
86
|
+
|
87
|
+
generate_completion(prompt: prompt)
|
88
|
+
end
|
79
89
|
end
|
80
90
|
end
|
81
91
|
end
|