vectorsearch 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4c9913804bc8aaadc08a60c0a250e2923f2a4ceb28633fdd99e4bf86544203b4
4
- data.tar.gz: 8db3a77121d948f6ed709618da5bd4411a87a01aba26feb7942e74e0dd18f207
3
+ metadata.gz: ee0ed9a4527aeaefb5488bc4263f41fd4b793f410eadfcde52be4669035be78c
4
+ data.tar.gz: 9321cfe450003f8bd2a8e8a3ba48ac86d905c8d90ac5c56d37009dd4c27dd79e
5
5
  SHA512:
6
- metadata.gz: d76d13ee23c7219483eac27a37ae61cad335bbd2d4169a76723d19a0e334ab6ac01037923cb2c07a64c853280d2449e54c0107fba7f36a231a624efaf1b68b46
7
- data.tar.gz: 43612526795e54138ec0c891e5bbdef2ef712ac287a4776680d26d94e8cabcb9d5976774b5144cbeb3ae95b367e26460230931041f011f1a69671e014060684f
6
+ metadata.gz: 3a54fada2b58a0da4d0b34bd73595ebbe2076c2a5039cc690ad076f705610efb85bf62d1709dca8157a718ccf09ac35cea6fdd51096b4fc374d32d51705b43c8
7
+ data.tar.gz: 83b1a9844757253457bc2d6186b5f4ac1ba4217843eccca23d9773d180e2316f5442715d3e83342c16894bda3606e1d76d054b20be7e2c998fde7d1311dafae2
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- vectorsearch (0.1.0)
4
+ vectorsearch (0.1.1)
5
5
  cohere-ruby (~> 0.9.1)
6
6
  milvus (~> 0.9.0)
7
7
  pinecone (~> 0.1.6)
data/README.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # Vectorsearch
2
+ ![Tests status](https://github.com/andreibondarev/vectorsearch/actions/workflows/ci.yml/badge.svg) [![Gem Version](https://badge.fury.io/rb/vectorsearch.svg)](https://badge.fury.io/rb/vectorsearch)
2
3
 
3
- Vectorsearch library is an abstraction layer on top of many popular vector search databases. It is a modern ORM that allows developers to easily chunk, generate embeddings, store, search, query and retrieve data from vector search databases. Vectorsearch offers a straight-forward DSL and abstract developers away from overly complex machine learning/data science-specific configurations.
4
+ Vectorsearch library is an abstraction layer on top of many popular vector search databases. It is a modern ORM that allows developers to easily chunk data, generate embeddings, store, search, query and retrieve data from vector search databases. Vectorsearch offers a straight-forward DSL and abstracts away overly complicated machine learning/data science-specific configurations and concepts
4
5
 
5
6
  ## Installation
6
7
 
@@ -20,12 +21,12 @@ require "vectorsearch"
20
21
 
21
22
  List of currently supported vector search databases and features:
22
23
 
23
- | Database | Querying | Storage |
24
- | -------------------------------------- |
25
- | Weaviate | :white_check_mark: | WIP |
26
- | Qdrant | :white_check_mark: | WIP |
27
- | Milvus | :white_check_mark: | WIP |
28
- | Pinecone | :white_check_mark: | WIP |
24
+ | Database | Querying | Storage | Schema Management | Backups | Rails Integration | ??? |
25
+ | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
26
+ | Weaviate | :white_check_mark: | WIP | WIP | WIP | | |
27
+ | Qdrant | :white_check_mark: | WIP | WIP | WIP | | |
28
+ | Milvus | :white_check_mark: | WIP | WIP | WIP | | |
29
+ | Pinecone | :white_check_mark: | WIP | WIP | WIP | | |
29
30
 
30
31
  ### Create an instance
31
32
 
@@ -48,8 +49,8 @@ client = Vectorsearch::Pinecone.new(...)
48
49
 
49
50
  ```ruby
50
51
  # Store your documents in your vector search database
51
- client.add_documents(
52
- documents: []
52
+ client.add_texts(
53
+ texts: []
53
54
  )
54
55
  ```
55
56
 
@@ -7,8 +7,12 @@ module Vectorsearch
7
7
  class Base
8
8
  attr_reader :client, :index_name, :llm, :llm_api_key
9
9
 
10
+ # Currently supported LLMs
11
+ # TODO: Add support for HuggingFace
10
12
  LLMS = %i[openai cohere].freeze
11
13
 
14
+ # @param llm [Symbol] The LLM to use
15
+ # @param llm_api_key [String] The API key for the LLM
12
16
  def initialize(llm:, llm_api_key:)
13
17
  validate_llm!(llm: llm)
14
18
 
@@ -16,27 +20,84 @@ module Vectorsearch
16
20
  @llm_api_key = llm_api_key
17
21
  end
18
22
 
23
+ # TODO
24
+ def add_texts(texts:)
25
+ raise NotImplementedError
26
+ end
27
+
28
+ # NotImplementedError will be raised if the subclass does not implement the `ask()` method
29
+ def ask(question:)
30
+ raise NotImplementedError
31
+ end
32
+
33
+ # Generate an embedding for a given text
34
+ # Currently supports OpenAI and Cohere
35
+ # The LLM-related method will most likely need to be abstracted out into a separate class
36
+ # @param text [String] The text to generate an embedding for
37
+ # @return [String] The embedding
19
38
  def generate_embedding(text:)
20
39
  case llm
21
40
  when :openai
22
- response = OpenAI::Client.new(access_token: llm_api_key)
23
- .embeddings(
24
- parameters: {
25
- model: "text-embedding-ada-002",
26
- input: text
27
- }
28
- )
41
+ response = openai_client.embeddings(
42
+ parameters: {
43
+ model: "text-embedding-ada-002",
44
+ input: text
45
+ }
46
+ )
29
47
  response.dig("data").first.dig("embedding")
30
48
  when :cohere
31
- response = Cohere::Client.new(api_key: llm_api_key)
32
- .embed(
33
- texts: [text],
34
- model: "small"
35
- )
49
+ response = cohere_client.embed(
50
+ texts: [text],
51
+ model: "small"
52
+ )
36
53
  response.dig("embeddings").first
37
54
  end
38
55
  end
39
56
 
57
+ # Generate a completion for a given prompt
58
+ # Currently supports OpenAI and Cohere
59
+ # The LLM-related method will most likely need to be abstracted out into a separate class
60
+ # @param prompt [String] The prompt to generate a completion for
61
+ # @return [String] The completion
62
+ def generate_completion(prompt:)
63
+ case llm
64
+ when :openai
65
+ response = openai_client.completions(
66
+ parameters: {
67
+ model: "text-davinci-003",
68
+ temperature: 0.0,
69
+ prompt: prompt
70
+ }
71
+ )
72
+ response.dig("choices").first.dig("text")
73
+ when :cohere
74
+ response = cohere_client.generate(
75
+ prompt: prompt,
76
+ temperature: 0.0
77
+ )
78
+ response.dig("generations").first.dig("text")
79
+ end
80
+ end
81
+
82
+ def generate_prompt(question:, context:)
83
+ "Context:\n" +
84
+ "#{context}\n" +
85
+ "---\n" +
86
+ "Question: #{question}\n" +
87
+ "---\n" +
88
+ "Answer:"
89
+ end
90
+
91
+ private
92
+
93
+ def openai_client
94
+ @openai_client ||= OpenAI::Client.new(access_token: llm_api_key)
95
+ end
96
+
97
+ def cohere_client
98
+ @cohere_client ||= Cohere::Client.new(api_key: llm_api_key)
99
+ end
100
+
40
101
  def validate_llm!(llm:)
41
102
  raise ArgumentError, "LLM must be one of #{LLMS}" unless LLMS.include?(llm)
42
103
  end
@@ -49,7 +49,16 @@ module Vectorsearch
49
49
  end
50
50
 
51
51
  def ask(question:)
52
- raise NotImplementedError
52
+ search_results = similarity_search(query: question)
53
+
54
+ context = search_results.dig("matches").map do |result|
55
+ result.dig("metadata").to_s
56
+ end
57
+ context = context.join("\n---\n")
58
+
59
+ prompt = generate_prompt(question: question, context: context)
60
+
61
+ generate_completion(prompt: prompt)
53
62
  end
54
63
  end
55
64
  end
@@ -45,7 +45,16 @@ module Vectorsearch
45
45
  end
46
46
 
47
47
  def ask(question:)
48
- raise NotImplementedError
48
+ search_results = similarity_search(query: question)
49
+
50
+ context = search_results.dig("result").map do |result|
51
+ result.dig("payload").to_s
52
+ end
53
+ context = context.join("\n---\n")
54
+
55
+ prompt = generate_prompt(question: question, context: context)
56
+
57
+ generate_completion(prompt: prompt)
49
58
  end
50
59
  end
51
60
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vectorsearch
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
@@ -22,26 +22,15 @@ module Vectorsearch
22
22
  super(llm: llm, llm_api_key: llm_api_key)
23
23
  end
24
24
 
25
- def add_texts(
26
- texts:
27
- )
28
- texts.each do |text|
29
- text['class'] = index_name
30
- end
31
-
32
- client.batch_create(
33
- objects: texts
34
- )
35
- end
36
-
37
25
  # Return documents similar to the query
26
+ # @param query [String] The query to search for
27
+ # @param k [Integer|String] The number of results to return
28
+ # @return [Hash] The search results
38
29
  def similarity_search(
39
30
  query:,
40
31
  k: 4
41
32
  )
42
- near_text = "{
43
- concepts: [\"#{query}\"],
44
- }"
33
+ near_text = "{ concepts: [\"#{query}\"] }"
45
34
 
46
35
  client.query.get(
47
36
  class_name: index_name,
@@ -51,6 +40,10 @@ module Vectorsearch
51
40
  )
52
41
  end
53
42
 
43
+ # Return documents similar to the vector
44
+ # @param embedding [Array] The vector to search for
45
+ # @param k [Integer|String] The number of results to return
46
+ # @return [Hash] The search results
54
47
  def similarity_search_by_vector(
55
48
  embedding:,
56
49
  k: 4
@@ -65,17 +58,34 @@ module Vectorsearch
65
58
  )
66
59
  end
67
60
 
61
+ # Ask a question and return the answer
62
+ # @param question [String] The question to ask
63
+ # @return [Hash] The answer
68
64
  def ask(
69
65
  question:
70
66
  )
71
- ask_object = "{ question: \"#{question}\" }"
67
+ # Weaviate currently supports the `ask:` parameter only for the OpenAI LLM (with `qna-openai` module enabled).
68
+ if llm == :openai
69
+ ask_object = "{ question: \"#{question}\" }"
72
70
 
73
- client.query.get(
74
- class_name: index_name,
75
- ask: ask_object,
76
- limit: "1",
77
- fields: "_additional { answer { result } }"
78
- )
71
+ client.query.get(
72
+ class_name: index_name,
73
+ ask: ask_object,
74
+ limit: "1",
75
+ fields: "_additional { answer { result } }"
76
+ )
77
+ elsif llm == :cohere
78
+ search_results = similarity_search(query: question)
79
+
80
+ context = search_results.map do |result|
81
+ result.dig("content").to_s
82
+ end
83
+ context = context.join("\n---\n")
84
+
85
+ prompt = generate_prompt(question: question, context: context)
86
+
87
+ generate_completion(prompt: prompt)
88
+ end
79
89
  end
80
90
  end
81
91
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vectorsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev