boxcars 0.2.12 → 0.2.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69712266f9506d71ed3ad1fdcbfeef5a389bdbb7157d88e3f703f9a9b3ad8323
4
- data.tar.gz: d7a5d0796d2963b737dc018c644042fe1e744ec7bd230f581367baf84df60f76
3
+ metadata.gz: 1039f86c58712c10143cc26438da14571013081bcff08aed3d9fcac4b1e84060
4
+ data.tar.gz: fca9f08855cae8e4e8a4171c043e92884e245582b049b4cc75bfa4d2cd98e51a
5
5
  SHA512:
6
- metadata.gz: 85876f5e1053bb8100795020c33da778a06668f9e3be856a8689d90d13728cef73e437ee6d5c0888b4a5483f698ee8288c061573a93fdff93559080e525c4254
7
- data.tar.gz: 99e15b3fe0c5d5277c5ed123e5569bca1f1ddfca3a1b3ec054504b855bc7a005d6eb9a8f7ba71989d16ded297959fa09cddf7b31879ff37df78df5dfb21b3240
6
+ metadata.gz: 5f24a578f4004d99a0d71c05f4a0ed520dda2e562d1327ec68bdf8682a927b741e870ed348a1a273319003f515521ef8a12c8778b70994764a945e31f906f807
7
+ data.tar.gz: be067a2ba1ba2e032a58d32f46071f5510a5ce0f15fbc89a06fe84f4fd854ec825a0ea0c786a90ba87c20bb1893ff59512170ac2e338080e25f85b2474b6fb64
data/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  # Changelog
2
2
 
3
+ ## [v0.2.13](https://github.com/BoxcarsAI/boxcars/tree/v0.2.13) (2023-05-24)
4
+
5
+ [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.12...v0.2.13)
6
+
7
+ **Closed issues:**
8
+
9
+ - Typo "Boscar.error" should be "Boxcars.error" [\#82](https://github.com/BoxcarsAI/boxcars/issues/82)
10
+
11
+ **Merged pull requests:**
12
+
13
+ - Add vector answer boxcar [\#79](https://github.com/BoxcarsAI/boxcars/pull/79) ([francis](https://github.com/francis))
14
+
15
+ ## [v0.2.12](https://github.com/BoxcarsAI/boxcars/tree/v0.2.12) (2023-05-22)
16
+
17
+ [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.11...v0.2.12)
18
+
19
+ **Closed issues:**
20
+
21
+ - GPT-4 support? [\#71](https://github.com/BoxcarsAI/boxcars/issues/71)
22
+ - add PgVector Vector Store [\#68](https://github.com/BoxcarsAI/boxcars/issues/68)
23
+
24
+ **Merged pull requests:**
25
+
26
+ - issue\_82 typo "Boscar" instead of "Boxcars" [\#83](https://github.com/BoxcarsAI/boxcars/pull/83) ([MadBomber](https://github.com/MadBomber))
27
+ - Update boxcars.rb config example [\#81](https://github.com/BoxcarsAI/boxcars/pull/81) ([nhorton](https://github.com/nhorton))
28
+ - Feature- added pgvector vector store [\#80](https://github.com/BoxcarsAI/boxcars/pull/80) ([jaigouk](https://github.com/jaigouk))
29
+ - drop support for pre ruby 3 version [\#75](https://github.com/BoxcarsAI/boxcars/pull/75) ([francis](https://github.com/francis))
30
+ - Chore - refine VectorSearch [\#74](https://github.com/BoxcarsAI/boxcars/pull/74) ([jaigouk](https://github.com/jaigouk))
31
+ - raise error if OpenAI API returns error or nil. closes \#71 [\#72](https://github.com/BoxcarsAI/boxcars/pull/72) ([francis](https://github.com/francis))
32
+
3
33
  ## [v0.2.11](https://github.com/BoxcarsAI/boxcars/tree/v0.2.11) (2023-05-05)
4
34
 
5
35
  [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.10...v0.2.11)
data/Gemfile CHANGED
@@ -5,6 +5,10 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in boxcars.gemspec
6
6
  gemspec
7
7
 
8
+ gem "debug", "~> 1.1"
9
+
10
+ gem "dotenv", "~> 2.8"
11
+
8
12
  gem "rake", "~> 13.0"
9
13
 
10
14
  gem "sqlite3", "~> 1.6"
data/Gemfile.lock CHANGED
@@ -1,10 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- boxcars (0.2.12)
4
+ boxcars (0.2.14)
5
5
  google_search_results (~> 2.2)
6
6
  gpt4all (~> 0.0.4)
7
- ruby-openai (~> 4.0)
7
+ hnswlib (~> 0.8)
8
+ pgvector (~> 0.2)
9
+ ruby-openai (~> 4.1)
8
10
 
9
11
  GEM
10
12
  remote: https://rubygems.org/
@@ -86,11 +88,9 @@ GEM
86
88
  i18n (1.13.0)
87
89
  concurrent-ruby (~> 1.0)
88
90
  io-console (0.6.0)
89
- io-console (0.6.0-java)
90
91
  irb (1.6.4)
91
92
  reline (>= 0.3.0)
92
93
  json (2.6.3)
93
- json (2.6.3-java)
94
94
  mime-types (3.4.1)
95
95
  mime-types-data (~> 3.2015)
96
96
  mime-types-data (3.2023.0218.1)
@@ -100,7 +100,6 @@ GEM
100
100
  multipart-post (2.3.0)
101
101
  netrc (0.11.0)
102
102
  nio4r (2.5.9)
103
- nio4r (2.5.9-java)
104
103
  octokit (4.25.1)
105
104
  faraday (>= 1, < 3)
106
105
  sawyer (~> 0.9)
@@ -142,7 +141,7 @@ GEM
142
141
  diff-lcs (>= 1.2.0, < 2.0)
143
142
  rspec-support (~> 3.12.0)
144
143
  rspec-support (3.12.0)
145
- rubocop (1.50.2)
144
+ rubocop (1.51.0)
146
145
  json (~> 2.3)
147
146
  parallel (~> 1.10)
148
147
  parser (>= 3.2.0.0)
@@ -156,7 +155,7 @@ GEM
156
155
  parser (>= 3.2.1.0)
157
156
  rubocop-capybara (2.18.0)
158
157
  rubocop (~> 1.41)
159
- rubocop-factory_bot (2.22.0)
158
+ rubocop-factory_bot (2.23.1)
160
159
  rubocop (~> 1.33)
161
160
  rubocop-rake (0.6.0)
162
161
  rubocop (~> 1.0)
@@ -164,7 +163,7 @@ GEM
164
163
  rubocop (~> 1.33)
165
164
  rubocop-capybara (~> 2.17)
166
165
  rubocop-factory_bot (~> 2.22)
167
- ruby-openai (4.0.0)
166
+ ruby-openai (4.1.0)
168
167
  faraday (>= 1)
169
168
  faraday-multipart (>= 1)
170
169
  ruby-progressbar (1.13.0)
@@ -172,11 +171,8 @@ GEM
172
171
  sawyer (0.9.2)
173
172
  addressable (>= 2.3.5)
174
173
  faraday (>= 0.17.3, < 3)
175
- sqlite3 (1.6.2)
174
+ sqlite3 (1.6.3)
176
175
  mini_portile2 (~> 2.8.0)
177
- sqlite3 (1.6.2-arm64-darwin)
178
- sqlite3 (1.6.2-x86_64-darwin)
179
- sqlite3 (1.6.2-x86_64-linux)
180
176
  strings-ansi (0.2.0)
181
177
  timers (4.3.5)
182
178
  traces (0.9.1)
@@ -191,7 +187,6 @@ GEM
191
187
  concurrent-ruby (~> 1.0)
192
188
  unf (0.1.4)
193
189
  unf_ext
194
- unf (0.1.4-java)
195
190
  unf_ext (0.0.8.2)
196
191
  unicode-display_width (2.4.2)
197
192
  vcr (6.1.0)
data/README.md CHANGED
@@ -3,14 +3,14 @@
3
3
  <h4 align="center">
4
4
  <a href="https://www.boxcars.ai">Website</a> |
5
5
  <a href="https://www.boxcars.ai/blog">Blog</a> |
6
- <a href="https://github.com/BoxcarsAI/boxcars/wiki">Documentation</a>
6
+ <a href="https://github.com/BoxcarsAI/boxcars/wiki">Documentation</a>
7
7
  </h4>
8
8
 
9
9
  <p align="center">
10
10
  <a href="https://github.com/BoxcarsAI/boxcars/blob/main/LICENSE.txt"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
11
11
  </p>
12
12
 
13
- Boxcars is a gem that enables you to create new systems with AI composability, using various concepts such as OpenAI, Search, SQL, Rails Active Record and more. This can even be extended with your concepts as well (including your concepts).
13
+ Boxcars is a gem that enables you to create new systems with AI composability, using various concepts such as OpenAI, Search, SQL, Rails Active Record, Vector Search and more. This can even be extended with your concepts as well (including your concepts).
14
14
 
15
15
  This gem was inspired by the popular Python library Langchain. However, we wanted to give it a Ruby spin and make it more user-friendly for beginners to get started.
16
16
 
@@ -57,6 +57,9 @@ require "boxcars"
57
57
  Note: if you want to try out the examples below, run this command and then paste in the code segments of interest:
58
58
  ```bash
59
59
  irb -r dotenv/load -r boxcars
60
+
61
+ # or if you prefer local repository
62
+ irb -r dotenv/load -r ./lib/boxcars
60
63
  ```
61
64
 
62
65
  ### Direct Boxcar Use
@@ -107,7 +110,7 @@ Produces:
107
110
  ```text
108
111
  > Entering Zero Shot#run
109
112
  What is pi times the square root of the average temperature in Austin TX in January?
110
- Thought: We need to find the average temperature in Austin TX in January and then multiply it by pi and the square root of the average temperature. We can use a search engine to find the average temperature in Austin TX in January and a calculator to perform the multiplication.
113
+ Thought: We need to find the average temperature in Austin TX in January and then multiply it by pi and the square root of the average temperature. We can use a search engine to find the average temperature in Austin TX in January and a calculator to perform the multiplication.
111
114
  Question: Average temperature in Austin TX in January
112
115
  Answer: January Weather in Austin Texas, United States. Daily high temperatures increase by 2°F, from 62°F to 64°F, rarely falling below 45°F or exceeding 76° ...
113
116
  Observation: January Weather in Austin Texas, United States. Daily high temperatures increase by 2°F, from 62°F to 64°F, rarely falling below 45°F or exceeding 76° ...
@@ -135,7 +138,7 @@ See [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/boxcars_exam
135
138
 
136
139
  For the Swagger boxcar, see [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/swagger_examples.ipynb) Jupyter Notebook.
137
140
 
138
- For simple vector storage and search, see [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/vector_store_examples.ipynb) Jupyter Notebook.
141
+ For simple vector storage and search, see [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/vector_search_examples.ipynb) Jupyter Notebook.
139
142
 
140
143
  Note, some folks that we talked to didn't know that you could run Ruby Jupyter notebooks. [You can](https://github.com/SciRuby/iruby).
141
144
 
data/boxcars.gemspec CHANGED
@@ -30,15 +30,12 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ["lib"]
32
32
 
33
- # dev / test dependencies
34
- spec.add_development_dependency "debug", "~> 1.1"
35
- spec.add_development_dependency "dotenv", "~> 2.8"
36
- spec.add_development_dependency "rspec", "~> 3.2"
37
-
38
33
  # runtime dependencies
39
34
  spec.add_dependency "google_search_results", "~> 2.2"
40
35
  spec.add_dependency "gpt4all", "~> 0.0.4"
41
- spec.add_dependency "ruby-openai", "~> 4.0"
36
+ spec.add_dependency "hnswlib", "~> 0.8"
37
+ spec.add_dependency "pgvector", "~> 0.2"
38
+ spec.add_dependency "ruby-openai", "~> 4.1"
42
39
 
43
40
  # For more information and examples about making a new gem, checkout our
44
41
  # guide at: https://bundler.io/guides/creating_gem.html
@@ -29,7 +29,7 @@ module Boxcars
29
29
  end
30
30
 
31
31
  # @return Hash The additional variables for this boxcar.
32
- def prediction_additional
32
+ def prediction_additional(_inputs)
33
33
  { model_info: model_info }.merge super
34
34
  end
35
35
 
@@ -114,14 +114,14 @@ module Boxcars
114
114
  end
115
115
 
116
116
  # @return Hash The additional variables for this boxcar.
117
- def prediction_additional
117
+ def prediction_additional(_inputs)
118
118
  { stop: stop, top_k: top_k }
119
119
  end
120
120
 
121
121
  # @param inputs [Hash] The inputs to the boxcar.
122
122
  # @return Hash The variables for this boxcar.
123
123
  def prediction_variables(inputs)
124
- prediction_additional.merge(inputs)
124
+ prediction_additional(inputs).merge(inputs)
125
125
  end
126
126
 
127
127
  # remove backticks or triple backticks from the code
@@ -26,7 +26,7 @@ module Boxcars
26
26
  end
27
27
 
28
28
  # @return Hash The additional variables for this boxcar.
29
- def prediction_additional
29
+ def prediction_additional(_inputs)
30
30
  { schema: schema, dialect: dialect }.merge super
31
31
  end
32
32
 
@@ -25,7 +25,7 @@ module Boxcars
25
25
  end
26
26
 
27
27
  # @return Hash The additional variables for this boxcar.
28
- def prediction_additional
28
+ def prediction_additional(_inputs)
29
29
  { swagger_url: swagger_url, context: context }.merge super
30
30
  end
31
31
 
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Boxcars is a framework for running a series of tools to get an answer to a question.
4
+ module Boxcars
5
+ # A Boxcar that interprets a prompt and executes ruby code to do math
6
+ class VectorAnswer < EngineBoxcar
7
+ # the description of this engine boxcar
8
+ DESC = "useful for when you need to answer questions from vector search results."
9
+
10
+ attr_reader :embeddings, :vector_documents, :search_content
11
+
12
+ # @param embeddings [Hash] The vector embeddings to use for this boxcar.
13
+ # @param vector_documents [Hash] The vector documents to use for this boxcar.
14
+ # @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a train if nil.
15
+ # @param prompt [Boxcars::Prompt] The prompt to use for this boxcar. Defaults to built-in prompt.
16
+ # @param kwargs [Hash] Any other keyword arguments to pass to the parent class.
17
+ def initialize(embeddings:, vector_documents:, engine: nil, prompt: nil, **kwargs)
18
+ the_prompt = prompt || my_prompt
19
+ @embeddings = embeddings
20
+ @vector_documents = vector_documents
21
+ kwargs[:stop] ||= ["```output"]
22
+ kwargs[:name] ||= "VectorAnswer"
23
+ kwargs[:description] ||= DESC
24
+ super(engine: engine, prompt: the_prompt, **kwargs)
25
+ end
26
+
27
+ # @param inputs [Hash] The inputs to use for the prediction.
28
+ # @return Hash The additional variables for this boxcar.
29
+ def prediction_additional(inputs)
30
+ { search_content: get_search_content(inputs[:question]) }.merge super
31
+ end
32
+
33
+ private
34
+
35
+ # @param results [Array] The results from the vector search.
36
+ # @return [String] The content of the search results.
37
+ def get_results_content(results)
38
+ results&.map do |result|
39
+ result[:document].content
40
+ end.to_a.join("\n\n")
41
+ end
42
+
43
+ # return the content of the search results for count results
44
+ # @params question [String] The question to search for.
45
+ # @params count [Integer] The number of results to return.
46
+ # @return [String] The content of the search results.
47
+ def get_search_content(question, count: 1)
48
+ search = Boxcars::VectorSearch.new(embeddings: embeddings, vector_documents: vector_documents)
49
+ results = search.call query: question, count: count
50
+ @search_content = get_results_content(results)
51
+ end
52
+
53
+ # our template
54
+ CTEMPLATE = [
55
+ syst("You are tasked with answering a question using these possibly relevant excerpts from a large volume of text:\n" \
56
+ "```text\n%<search_content>s\n```\n\n",
57
+ "Using the above, just answer the question as if you were answering directly."),
58
+ user("%<question>s")
59
+ ].freeze
60
+
61
+ # The prompt to use for the engine.
62
+ def my_prompt
63
+ @conversation ||= Conversation.new(lines: CTEMPLATE)
64
+ @my_prompt ||= ConversationPrompt.new(
65
+ conversation: @conversation,
66
+ input_variables: [:question],
67
+ other_inputs: [:search_content],
68
+ output_variables: [:answer])
69
+ end
70
+ end
71
+ end
@@ -158,3 +158,4 @@ require "boxcars/boxcar/swagger"
158
158
  require "boxcars/boxcar/active_record"
159
159
  require "boxcars/vector_store"
160
160
  require "boxcars/vector_search"
161
+ require "boxcars/boxcar/vector_answer"
@@ -19,7 +19,7 @@ module Boxcars
19
19
  end
20
20
 
21
21
  # @return Hash The additional variables for this boxcar.
22
- def prediction_additional
22
+ def prediction_additional(_inputs)
23
23
  { boxcar_names: boxcar_names, boxcar_descriptions: boxcar_descriptions }.merge super
24
24
  end
25
25
 
data/lib/boxcars/train.rb CHANGED
@@ -69,7 +69,7 @@ module Boxcars
69
69
  # @return [Boxcars::Action] Action specifying what boxcar to use.
70
70
  def plan(intermediate_steps, **kwargs)
71
71
  thoughts = construct_scratchpad(intermediate_steps)
72
- full_inputs = prediction_additional.merge(kwargs).merge(agent_scratchpad: thoughts)
72
+ full_inputs = prediction_additional(kwargs).merge(kwargs).merge(agent_scratchpad: thoughts)
73
73
  action = get_next_action(full_inputs)
74
74
  return TrainFinish.new({ output: action.boxcar_input }, log: action.log) if action.boxcar == finish_boxcar_name
75
75
 
@@ -4,13 +4,41 @@
4
4
  module Boxcars
5
5
  # For Boxcars that use an engine to do their work.
6
6
  class VectorSearch
7
+ # initialize the vector search with the following parameters:
8
+ # @param params [Hash] A Hash containing the initial configuration.
9
+ # @option params [Hash] :vector_documents The vector documents to search.
10
+ # example:
11
+ # {
12
+ # type: :in_memory,
13
+ # vector_store: [
14
+ # Boxcars::VectorStore::Document.new(
15
+ # content: "hello",
16
+ # embedding: [0.1, 0.2, 0.3],
17
+ # metadata: { a: 1 }
18
+ # )
19
+ # ]
20
+ # }
7
21
  def initialize(params)
8
22
  @vector_documents = params[:vector_documents]
9
23
  @embedding_tool = params[:embedding_tool] || :openai
10
24
  @vector_search_instance = vector_search_instance
11
- @openai_connection = params[:openai_connection] || default_connection(openai_access_token: openai_access_token)
25
+ @openai_connection = params[:openai_connection] || default_connection(openai_access_token: params[:openai_access_token])
12
26
  end
13
27
 
28
+ # @param query [String] The query to search for.
29
+ # @param count [Integer] The number of results to return.
30
+ # @return [Array] array of hashes with :document and :distance keys
31
+ # @example
32
+ # [
33
+ # {
34
+ # document: Boxcars::VectorStore::Document.new(
35
+ # content: "hello",
36
+ # embedding: [0.1, 0.2, 0.3],
37
+ # metadata: { a: 1 }
38
+ # ),
39
+ # distance: 0.1
40
+ # }
41
+ # ]
14
42
  def call(query:, count: 1)
15
43
  validate_query(query)
16
44
  query_vector = convert_query_to_vector(query)
@@ -16,13 +16,10 @@ module Boxcars
16
16
 
17
17
  def initialize(params)
18
18
  @split_chunk_size = params[:split_chunk_size] || 2000
19
- @training_data_path = File.absolute_path(params[:training_data_path])
20
- @index_file_path = File.absolute_path(params[:index_file_path])
19
+ @base_dir_path, @index_file_path, @json_doc_file_path =
20
+ validate_params(params[:training_data_path], params[:index_file_path], split_chunk_size)
21
21
 
22
- validate_params(@training_data_path, @index_file_path, split_chunk_size)
23
-
24
- @json_doc_file_path = absolute_json_doc_file_path(@index_file_path, params[:json_doc_file_path])
25
- @force_rebuild = params[:force_rebuild] || true
22
+ @force_rebuild = params[:force_rebuild] || false
26
23
  @hnsw_vectors = []
27
24
  end
28
25
 
@@ -32,9 +29,13 @@ module Boxcars
32
29
  else
33
30
  puts "Building Hnswlib vector store..."
34
31
  data = load_data_files(training_data_path)
32
+ Boxcars.debug("Loaded #{data.length} files from #{training_data_path}")
35
33
  texts = split_text_into_chunks(data)
34
+ Boxcars.debug("Split #{data.length} files into #{texts.length} chunks")
36
35
  vectors = generate_vectors(texts)
36
+ Boxcars.debug("Generated #{vectors.length} vectors")
37
37
  add_vectors(vectors, texts)
38
+ Boxcars.debug("Added #{vectors.length} vectors to vector store")
38
39
  save_vector_store
39
40
 
40
41
  {
@@ -46,24 +47,29 @@ module Boxcars
46
47
 
47
48
  private
48
49
 
49
- attr_reader :training_data_path, :index_file_path, :split_chunk_size, :json_doc_file_path, :force_rebuild, :hnsw_vectors
50
+ attr_reader :training_data_path, :index_file_path, :base_dir_path,
51
+ :split_chunk_size, :json_doc_file_path, :force_rebuild, :hnsw_vectors
50
52
 
51
53
  def validate_params(training_data_path, index_file_path, split_chunk_size)
52
- training_data_dir = File.dirname(training_data_path.gsub(/\*{1,2}/, ''))
54
+ validate_string(training_data_path, 'training_data_path')
55
+ validate_string(index_file_path, 'index_file_path')
56
+
57
+ absolute_data_path = File.absolute_path(training_data_path)
58
+ base_data_dir_path = File.dirname(absolute_data_path.gsub(/\*{1,2}/, ''))
59
+ @training_data_path = training_data_path
53
60
 
54
- raise_argument_error('training_data_path parent directory must exist') unless File.directory?(training_data_dir)
55
- raise_argument_error('No files found at the training_data_path pattern') if Dir.glob(training_data_path).empty?
61
+ raise_argument_error('training_data_path parent directory must exist') unless File.directory?(base_data_dir_path)
62
+ raise_argument_error('No files found at the training_data_path pattern') if Dir.glob(absolute_data_path).empty?
56
63
 
57
- index_dir = File.dirname(index_file_path)
64
+ absolute_index_path = File.absolute_path(index_file_path)
65
+ index_parent_dir = File.dirname(absolute_index_path)
58
66
 
59
- raise_argument_error('index_file_path parent directory must exist') unless File.directory?(index_dir)
67
+ raise_argument_error('index_file_path parent directory must exist') unless File.directory?(index_parent_dir)
60
68
  raise_argument_error('split_chunk_size must be an integer') unless split_chunk_size.is_a?(Integer)
61
- end
62
69
 
63
- def absolute_json_doc_file_path(index_file_path, json_doc_file_path)
64
- return index_file_path.gsub(/\.bin$/, '.json') unless json_doc_file_path
70
+ json_doc_file_path = index_file_path.gsub(/\.bin$/, '.json')
65
71
 
66
- File.absolute_path(json_doc_file_path)
72
+ [index_parent_dir, index_file_path, json_doc_file_path]
67
73
  end
68
74
 
69
75
  def add_vectors(vectors, texts)
@@ -76,6 +82,7 @@ module Boxcars
76
82
  dim: vector[:dim],
77
83
  metric: 'l2',
78
84
  max_item: 10000,
85
+ base_dir_path: base_dir_path,
79
86
  index_file_path: index_file_path,
80
87
  json_doc_file_path: json_doc_file_path
81
88
  }
@@ -90,6 +97,7 @@ module Boxcars
90
97
 
91
98
  def load_existing_vector_store
92
99
  Boxcars::VectorStore::Hnswlib::LoadFromDisk.call(
100
+ base_dir_path: base_dir_path,
93
101
  index_file_path: index_file_path,
94
102
  json_doc_file_path: json_doc_file_path
95
103
  )
@@ -10,11 +10,13 @@ module Boxcars
10
10
  class LoadFromDisk
11
11
  include VectorStore
12
12
 
13
+ # params:
14
+ # base_dir_path: string (absolute path to the directory containing the index_file_path and json_doc_file_path),
15
+ # index_file_path: string (relative path to the index file from the base_dir_path),
16
+ # json_doc_file_path: string (relative path to the json file from the base_dir_path)
13
17
  def initialize(params)
14
- validate_params(params[:index_file_path], params[:json_doc_file_path])
15
-
16
- @index_file_path = File.absolute_path(params[:index_file_path])
17
- @json_doc_file_path = File.absolute_path(params[:json_doc_file_path])
18
+ @base_dir_path, @index_file_path, @json_doc_file_path =
19
+ validate_params(params)
18
20
  end
19
21
 
20
22
  def call
@@ -29,14 +31,34 @@ module Boxcars
29
31
 
30
32
  private
31
33
 
32
- attr_reader :index_file_path, :json_doc_file_path
34
+ attr_reader :base_dir_path, :index_file_path, :json_doc_file_path
35
+
36
+ def validate_params(params)
37
+ base_dir_path = params[:base_dir_path]
38
+ index_file_path = remove_relative_path(params[:index_file_path])
39
+ json_doc_file_path = remove_relative_path(params[:json_doc_file_path])
40
+ # we omit base_dir validation in case of loading the data from other environments
41
+ validate_string(index_file_path, "index_file_path")
42
+ validate_string(json_doc_file_path, "json_doc_file_path")
43
+
44
+ absolute_index_path = validate_file_existence(base_dir_path, index_file_path, "index_file_path")
45
+ abosolute_json_path = validate_file_existence(base_dir_path, json_doc_file_path, "json_doc_file_path")
46
+
47
+ [base_dir_path, absolute_index_path, abosolute_json_path]
48
+ end
49
+
50
+ def remove_relative_path(path)
51
+ path.start_with?('./') ? path[2..] : path
52
+ end
53
+
54
+ def validate_file_existence(base_dir, file_path, name)
55
+ file =
56
+ base_dir.to_s.empty? ? file_path : File.join(base_dir, file_path)
57
+ complete_path = File.absolute_path(file)
33
58
 
34
- def validate_params(index_file_path, json_doc_file_path)
35
- raise_argument_error("index_file_path must be a string") unless index_file_path.is_a?(String)
36
- raise_argument_error("json_doc_file_path must be a string") unless json_doc_file_path.is_a?(String)
59
+ raise raise_argument_error("#{name} does not exist at #{complete_path}") unless File.exist?(complete_path)
37
60
 
38
- raise_argument_error("index_file_path must exist") unless File.exist?(index_file_path)
39
- raise_argument_error("json_doc_file_path must exist") unless File.exist?(json_doc_file_path)
61
+ complete_path
40
62
  end
41
63
 
42
64
  def load_as_hnsw_vectors(vectors)
@@ -47,7 +69,11 @@ module Boxcars
47
69
  embedding: vector[:embedding],
48
70
  metadata: vector[:metadata]
49
71
  )
50
- hnsw_vectors[vectors.first[:doc_id].to_i] = hnsw_vector
72
+ if vector[:metadata][:doc_id]
73
+ hnsw_vectors[vector[:metadata][:doc_id]] = hnsw_vector
74
+ else
75
+ hnsw_vectors << hnsw_vector
76
+ end
51
77
  end
52
78
  hnsw_vectors
53
79
  end
@@ -9,19 +9,35 @@ module Boxcars
9
9
  class Search
10
10
  include VectorStore
11
11
 
12
+ # initialize the vector store search with the following parameters:
13
+ # @param params [Hash] A Hash containing the initial configuration.
14
+ # example:
15
+ # {
16
+ # type: :hnswlib,
17
+ # vector_store: [
18
+ # Boxcars::VectorStore::Document.new(
19
+ # content: "hello",
20
+ # embedding: [0.1, 0.2, 0.3],
21
+ # metadata: { a: 1 }
22
+ # )
23
+ # ]
24
+ # }
12
25
  def initialize(params)
13
- validate_params(params[:vector_documents])
14
- @vector_documents = params[:vector_documents]
15
- @search_index = load_index(params[:vector_documents])
26
+ @vector_store = validate_params(params[:vector_documents])
27
+ @metadata, @index_file = validate_files(vector_store)
28
+ @search_index = load_index(metadata, index_file)
16
29
  end
17
30
 
31
+ # @param query_vector [Array] The query vector to search for.
32
+ # @param count [Integer] The number of results to return.
33
+ # @return [Array] array of hashes with :document and :distance keys
18
34
  def call(query_vector:, count: 1)
19
35
  search(query_vector, count)
20
36
  end
21
37
 
22
38
  private
23
39
 
24
- attr_reader :vector_documents, :vector_store, :json_doc, :search_index, :metadata
40
+ attr_reader :vector_store, :index_file, :search_index, :metadata
25
41
 
26
42
  def validate_params(vector_documents)
27
43
  raise_argument_error('vector_documents is nil') unless vector_documents
@@ -34,27 +50,47 @@ module Boxcars
34
50
  raise_arugment_error('vector_store must be an array of Document objects')
35
51
  end
36
52
 
37
- true
53
+ vector_documents[:vector_store]
38
54
  end
39
55
 
40
- def load_index(vector_documents)
41
- @metadata = vector_documents[:vector_store].first.metadata
42
- @json_doc = @metadata[:json_doc_file_path]
56
+ def validate_files(vector_store)
57
+ metadata = vector_store.first.metadata
58
+ raise_arugment_error('metadata must be a hash') unless metadata.is_a?(Hash)
59
+ raise_arugment_error('metadata is empty') if metadata.empty?
43
60
 
61
+ validate_string(metadata[:index_file_path], "index_file_path")
62
+ validate_string(metadata[:json_doc_file_path], "json_doc_file_path")
63
+
64
+ base_dir = metadata[:base_dir_path]
65
+ index_file_file_path = metadata[:index_file_path]
66
+ index_file =
67
+ if !index_file_file_path.to_s.empty? && File.exist?(index_file_file_path)
68
+ index_file_file_path
69
+ else
70
+ File.join(base_dir.to_s, index_file_file_path.to_s)
71
+ end
72
+
73
+ raise_argument_error('index_file does not exist') unless File.exist?(index_file)
74
+
75
+ [metadata, index_file]
76
+ end
77
+
78
+ def load_index(metadata, index_file)
44
79
  search_index = ::Hnswlib::HierarchicalNSW.new(
45
80
  space: metadata[:metric],
46
81
  dim: metadata[:dim]
47
82
  )
48
- search_index.load_index(metadata[:index_file_path])
49
- @search_index = search_index
50
- @vector_store = vector_documents[:vector_store]
51
-
83
+ search_index.load_index(index_file)
52
84
  search_index
53
85
  end
54
86
 
55
87
  def search(query_vector, num_neighbors)
56
88
  raw_results = search_index.search_knn(query_vector, num_neighbors)
57
- raw_results.map { |doc_id, distance| lookup_embedding(doc_id, distance) }.compact
89
+
90
+ raw_results.map { |doc_id, distance| lookup_embedding(doc_id, distance) }
91
+ .compact
92
+ .first(num_neighbors)
93
+ .sort_by { |result| result[:distance] }
58
94
  rescue StandardError => e
59
95
  raise_argument_error("Error searching for #{query_vector}: #{e.message}")
60
96
  end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Boxcars
4
+ module VectorStore
5
+ module InMemory
6
+ class BuildFromArray
7
+ include VectorStore
8
+
9
+ # @param embedding_tool [Symbol] :openai or other embedding tools
10
+ # @param input_array [Array] array of hashes with :content and :metadata keys
11
+ # each hash item should have content and metadata
12
+ # [
13
+ # { content: "hello", metadata: { a: 1 } },
14
+ # { content: "hi", metadata: { a: 1 } },
15
+ # { content: "bye", metadata: { a: 1 } },
16
+ # { content: "what's this", metadata: { a: 1 } }
17
+ # ]
18
+ # @return [Hash] vector_store: array of hashes with :content, :metadata, and :embedding keys
19
+ def initialize(embedding_tool: :openai, input_array: nil)
20
+ validate_params(embedding_tool, input_array)
21
+ @embedding_tool = embedding_tool
22
+ @input_array = input_array
23
+ @memory_vectors = []
24
+ end
25
+
26
+ # @return [Hash] vector_store: array of Inventor::VectorStore::Document
27
+ def call
28
+ texts = input_array.map { |doc| doc[:content] }
29
+ vectors = generate_vectors(texts)
30
+ add_vectors(vectors, input_array)
31
+
32
+ {
33
+ type: :in_memory,
34
+ vector_store: memory_vectors
35
+ }
36
+ end
37
+
38
+ private
39
+
40
+ attr_reader :input_array, :memory_vectors
41
+
42
+ def validate_params(embedding_tool, input_array)
43
+ raise_argument_error('input_array is nil') unless input_array
44
+ raise_argument_error('input_array must be an array') unless input_array.is_a?(Array)
45
+ unless proper_document_array?(input_array)
46
+ raise_argument_error('items in input_array needs to have content and metadata')
47
+ end
48
+
49
+ return if %i[openai tensorflow].include?(embedding_tool)
50
+
51
+ raise_argument_error('embedding_tool is invalid')
52
+ end
53
+
54
+ def proper_document_array?(input_array)
55
+ return false unless
56
+ input_array.all? { |hash| hash.key?(:content) && hash.key?(:metadata) }
57
+
58
+ true
59
+ end
60
+
61
+ # returns array of documents with vectors
62
+ def add_vectors(vectors, input_array)
63
+ vectors.zip(input_array).each do |vector, doc|
64
+ memory_vector = Document.new(
65
+ content: doc[:content],
66
+ embedding: vector[:embedding],
67
+ metadata: doc[:metadata].merge(dim: vector[:dim])
68
+ )
69
+ @memory_vectors << memory_vector
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -6,6 +6,12 @@ module Boxcars
6
6
  class BuildFromFiles
7
7
  include VectorStore
8
8
 
9
+ # initialize the vector store with the following parameters:
10
+ # @param params [Hash] A Hash containing the initial configuration.
11
+ # @option params [Symbol] :embedding_tool The embedding tool to use.
12
+ # @option params [String] :training_data_path The path to the training data files.
13
+ # @option params [Integer] :split_chunk_size The number of characters to split the text into.
14
+ # @return [Hash] vector_store: array of hashes with :content, :metadata, and :embedding keys
9
15
  def initialize(params)
10
16
  @split_chunk_size = params[:split_chunk_size] || 2000
11
17
  @training_data_path = File.absolute_path(params[:training_data_path])
@@ -15,6 +21,7 @@ module Boxcars
15
21
  @memory_vectors = []
16
22
  end
17
23
 
24
+ # @return [Hash] vector_store: array of hashes with :content, :metadata, and :embedding keys
18
25
  def call
19
26
  data = load_data_files(training_data_path)
20
27
  texts = split_text_into_chunks(data)
@@ -6,6 +6,10 @@ module Boxcars
6
6
  class Search
7
7
  include VectorStore
8
8
 
9
+ # initialize the vector store InMemory::Search with the following parameters:
10
+ # @param params [Hash] A Hash containing the initial configuration.
11
+ # @option params [Hash] :vector_documents The vector documents to search.
12
+ # @option params [Hash] :vector_store The vector store to search.
9
13
  def initialize(params)
10
14
  validate_params(params[:vector_documents])
11
15
  @vector_documents = params[:vector_documents]
@@ -7,15 +7,24 @@ module Boxcars
7
7
  class BuildFromArray
8
8
  include VectorStore
9
9
 
10
- # params = {
11
- # embedding_tool: embedding_tool,
12
- # input_array: input_array,
13
- # database_url: db_url,
14
- # table_name: table_name,
15
- # embedding_column_name: embedding_column_name,
16
- # content_column_name: content_column_name,
17
- # metadata_column_name: metadata_column_name
18
- # }
10
+ # initialize the vector store with the following parameters:
11
+ #
12
+ # @param params [Hash] A Hash containing the initial configuration.
13
+ #
14
+ # @option params [Symbol] :embedding_tool The embedding tool to use. Must be provided.
15
+ # @option params [Array] :input_array The array of inputs to use for the embedding tool. Must be provided.
16
+ # each hash item should have content and metadata
17
+ # [
18
+ # { content: "hello", metadata: { a: 1 } },
19
+ # { content: "hi", metadata: { a: 1 } },
20
+ # { content: "bye", metadata: { a: 1 } },
21
+ # { content: "what's this", metadata: { a: 1 } }
22
+ # ]
23
+ # @option params [String] :database_url The URL of the database where embeddings are stored. Must be provided.
24
+ # @option params [String] :table_name The name of the database table where embeddings are stored. Must be provided.
25
+ # @option params [String] :embedding_column_name The name of the database column where embeddings are stored. required.
26
+ # @option params [String] :content_column_name The name of the database column where content is stored. Must be provided.
27
+ # @option params [String] :metadata_column_name The name of the database column where metadata is stored. required.
19
28
  def initialize(params)
20
29
  @embedding_tool = params[:embedding_tool] || :openai
21
30
 
@@ -31,10 +40,11 @@ module Boxcars
31
40
  @pg_vectors = []
32
41
  end
33
42
 
43
+ # @return [Hash] vector_store: array of hashes with :content, :metadata, and :embedding keys
34
44
  def call
35
- texts = input_array
45
+ texts = input_array.map { |doc| doc[:content] }
36
46
  vectors = generate_vectors(texts)
37
- add_vectors(vectors, texts)
47
+ add_vectors(vectors, input_array)
38
48
  documents = save_vector_store
39
49
 
40
50
  {
@@ -51,15 +61,18 @@ module Boxcars
51
61
 
52
62
  def validate_params(embedding_tool, input_array)
53
63
  raise_argument_error('input_array is nil') unless input_array
64
+ raise_argument_error('input_array must be an array') unless input_array.is_a?(Array)
65
+ raise_argument_error('items in input_array needs to have content and metadata') unless proper_input_array?(input_array)
54
66
  return if %i[openai tensorflow].include?(embedding_tool)
55
67
 
56
68
  raise_argument_error('embedding_tool is invalid') unless %i[openai tensorflow].include?(embedding_tool)
69
+ end
57
70
 
58
- input_array.each do |item|
59
- next if item.key?(:content) && item.key?(:metadata)
71
+ def proper_input_array?(input_array)
72
+ return false unless
73
+ input_array.all? { |hash| hash.key?(:content) && hash.key?(:metadata) }
60
74
 
61
- return raise_argument_error('embedding_tool is invalid')
62
- end
75
+ true
63
76
  end
64
77
 
65
78
  def add_vectors(vectors, texts)
@@ -10,15 +10,15 @@ module Boxcars
10
10
  class BuildFromFiles
11
11
  include VectorStore
12
12
 
13
- # params = {
14
- # training_data_path: training_data_path,
15
- # split_chunk_size: 200,
16
- # embedding_tool: embedding_tool,
17
- # database_url: db_url,
18
- # table_name: table_name,
19
- # embedding_column_name: embedding_column_name,
20
- # content_column_name: content_column_name
21
- # }
13
+ # @param training_data_path [String] path to training data files
14
+ # @param split_chunk_size [Integer] number of characters to split the text into
15
+ # @param embedding_tool [Symbol] embedding tool to use
16
+ # @param database_url [String] database url
17
+ # @param table_name [String] table name
18
+ # @param embedding_column_name [String] embedding column name
19
+ # @param content_column_name [String] content column name
20
+ # @param metadata_column_name [String] metadata column name
21
+ # @return [Hash] vector_store: array of hashes with :content, :metadata, and :embedding keys
22
22
  def initialize(params)
23
23
  @split_chunk_size = params[:split_chunk_size] || 2000
24
24
  @training_data_path = File.absolute_path(params[:training_data_path])
@@ -35,6 +35,7 @@ module Boxcars
35
35
  @pg_vectors = []
36
36
  end
37
37
 
38
+ # @return [Hash] vector_store: array of Inventor::VectorStore::Document
38
39
  def call
39
40
  data = load_data_files(training_data_path)
40
41
  texts = split_text_into_chunks(data)
@@ -57,7 +58,7 @@ module Boxcars
57
58
  def validate_params(embedding_tool, training_data_path)
58
59
  training_data_dir = File.dirname(training_data_path.gsub(/\*{1,2}/, ''))
59
60
 
60
- raise_argument_error('training_data_path parent directory must exist') unless File.directory?(training_data_dir)
61
+ raise_argument_error('training_data_path parent directory must exist') unless Dir.exist?(training_data_dir)
61
62
  raise_argument_error('No files found at the training_data_path pattern') if Dir.glob(training_data_path).empty?
62
63
  return if %i[openai tensorflow].include?(embedding_tool)
63
64
 
@@ -9,15 +9,14 @@ module Boxcars
9
9
  class SaveToDatabase
10
10
  include VectorStore
11
11
 
12
- # params = {
13
- # pg_vectors: pg_vectors,
14
- # database_url: db_url,
15
- # table_name: table_name,
16
- # embedding_column_name: embedding_column_name,
17
- # content_column_name: content_column_name
18
- # }
12
+ # @param pg_vectors [Array] array of Boxcars::VectorStore::Document
13
+ # @param database_url [String] database url
14
+ # @param table_name [String] table name
15
+ # @param embedding_column_name [String] embedding column name
16
+ # @param content_column_name [String] content column name
17
+ # @param metadata_column_name [String] metadata column name
18
+ # @return [Array] array of Boxcars::VectorStore::Document
19
19
  def initialize(params)
20
- @errors = []
21
20
  validate_param_types(params)
22
21
  @db_connection = test_db_params(params)
23
22
 
@@ -29,9 +28,8 @@ module Boxcars
29
28
  @pg_vectors = params[:pg_vectors]
30
29
  end
31
30
 
31
+ # @return [Array] array of Boxcars::VectorStore::Document
32
32
  def call
33
- return { success: false, error: errors } unless errors.empty?
34
-
35
33
  add_vectors_to_database
36
34
  end
37
35
 
@@ -39,7 +37,7 @@ module Boxcars
39
37
 
40
38
  attr_reader :database_url, :pg_vectors, :db_connection, :table_name,
41
39
  :embedding_column_name, :content_column_name,
42
- :metadata_column_name, :errors
40
+ :metadata_column_name
43
41
 
44
42
  def validate_param_types(params)
45
43
  pg_vectors = params[:pg_vectors]
@@ -9,17 +9,21 @@ module Boxcars
9
9
  class Search
10
10
  include VectorStore
11
11
 
12
- # required params:
12
+ # initialize the vector store with the following parameters:
13
+ # @param params [Hash] A Hash containing the initial configuration.
14
+ # @option params [Hash] :vector_documents The vector documents to search.
15
+ # example:
13
16
  # {
14
17
  # type: :pgvector,
15
18
  # vector_store: {
16
- # database_url: database_url,
17
- # table_name: table_name,
18
- # embedding_column_name: embedding_column_name,
19
- # content_column_name: content_column_name,
20
- # metadata_column_name: metadata_column_name
19
+ # table_name: "vector_store",
20
+ # embedding_column_name: "embedding",
21
+ # content_column_name: "content",
22
+ # database_url: ENV['DATABASE_URL']
21
23
  # }
22
24
  # }
25
+ #
26
+ # @option params [Hash] :vector_store The vector store to search.
23
27
  def initialize(params)
24
28
  vector_store = validate_params(params)
25
29
  db_url = validate_vector_store(vector_store)
@@ -28,6 +32,20 @@ module Boxcars
28
32
  @vector_documents = params[:vector_documents]
29
33
  end
30
34
 
35
+ # @param query_vector [Array] The query vector to search for.
36
+ # @param count [Integer] The number of results to return.
37
+ # @return [Array] array of hashes with :document and :distance keys
38
+ # @example
39
+ # [
40
+ # {
41
+ # document: Boxcars::VectorStore::Document.new(
42
+ # content: "hello",
43
+ # embedding: [0.1, 0.2, 0.3],
44
+ # metadata: { a: 1 }
45
+ # ),
46
+ # distance: 0.1
47
+ # }
48
+ # ]
31
49
  def call(query_vector:, count: 1)
32
50
  raise ::Boxcars::ArgumentError, 'query_vector is empty' if query_vector.empty?
33
51
 
@@ -54,7 +54,7 @@ module Boxcars
54
54
 
55
55
  file_content = File.read(file_path)
56
56
  JSON.parse(file_content, symbolize_names: true)
57
- rescue JSON::ParserError => e
57
+ rescue JSON::ParserError, Errno::ENOENT => e
58
58
  raise_argument_error("Error parsing #{file_path}: #{e.message}")
59
59
  end
60
60
 
@@ -80,6 +80,11 @@ module Boxcars
80
80
  end
81
81
  docs
82
82
  end
83
+
84
+ def validate_string(value, name)
85
+ raise raise_argument_error("#{name} must be a string") unless value.is_a?(String)
86
+ raise raise_argument_error("#{name} is empty") if value.empty?
87
+ end
83
88
  end
84
89
  end
85
90
 
@@ -92,7 +97,7 @@ require_relative "vector_store/hnswlib/save_to_hnswlib"
92
97
  require_relative "vector_store/hnswlib/build_from_files"
93
98
  require_relative "vector_store/hnswlib/search"
94
99
  require_relative "vector_store/in_memory/build_from_files"
95
- require_relative "vector_store/in_memory/build_from_document_array"
100
+ require_relative "vector_store/in_memory/build_from_array"
96
101
  require_relative "vector_store/in_memory/search"
97
102
  require_relative "vector_store/pgvector/build_from_files"
98
103
  require_relative "vector_store/pgvector/build_from_array"
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Boxcars
4
4
  # The current version of the gem.
5
- VERSION = "0.2.12"
5
+ VERSION = "0.2.14"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: boxcars
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Sullivan
@@ -9,92 +9,78 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2023-05-22 00:00:00.000000000 Z
12
+ date: 2023-06-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: debug
16
- requirement: !ruby/object:Gem::Requirement
17
- requirements:
18
- - - "~>"
19
- - !ruby/object:Gem::Version
20
- version: '1.1'
21
- type: :development
22
- prerelease: false
23
- version_requirements: !ruby/object:Gem::Requirement
24
- requirements:
25
- - - "~>"
26
- - !ruby/object:Gem::Version
27
- version: '1.1'
28
- - !ruby/object:Gem::Dependency
29
- name: dotenv
15
+ name: google_search_results
30
16
  requirement: !ruby/object:Gem::Requirement
31
17
  requirements:
32
18
  - - "~>"
33
19
  - !ruby/object:Gem::Version
34
- version: '2.8'
35
- type: :development
20
+ version: '2.2'
21
+ type: :runtime
36
22
  prerelease: false
37
23
  version_requirements: !ruby/object:Gem::Requirement
38
24
  requirements:
39
25
  - - "~>"
40
26
  - !ruby/object:Gem::Version
41
- version: '2.8'
27
+ version: '2.2'
42
28
  - !ruby/object:Gem::Dependency
43
- name: rspec
29
+ name: gpt4all
44
30
  requirement: !ruby/object:Gem::Requirement
45
31
  requirements:
46
32
  - - "~>"
47
33
  - !ruby/object:Gem::Version
48
- version: '3.2'
49
- type: :development
34
+ version: 0.0.4
35
+ type: :runtime
50
36
  prerelease: false
51
37
  version_requirements: !ruby/object:Gem::Requirement
52
38
  requirements:
53
39
  - - "~>"
54
40
  - !ruby/object:Gem::Version
55
- version: '3.2'
41
+ version: 0.0.4
56
42
  - !ruby/object:Gem::Dependency
57
- name: google_search_results
43
+ name: hnswlib
58
44
  requirement: !ruby/object:Gem::Requirement
59
45
  requirements:
60
46
  - - "~>"
61
47
  - !ruby/object:Gem::Version
62
- version: '2.2'
48
+ version: '0.8'
63
49
  type: :runtime
64
50
  prerelease: false
65
51
  version_requirements: !ruby/object:Gem::Requirement
66
52
  requirements:
67
53
  - - "~>"
68
54
  - !ruby/object:Gem::Version
69
- version: '2.2'
55
+ version: '0.8'
70
56
  - !ruby/object:Gem::Dependency
71
- name: gpt4all
57
+ name: pgvector
72
58
  requirement: !ruby/object:Gem::Requirement
73
59
  requirements:
74
60
  - - "~>"
75
61
  - !ruby/object:Gem::Version
76
- version: 0.0.4
62
+ version: '0.2'
77
63
  type: :runtime
78
64
  prerelease: false
79
65
  version_requirements: !ruby/object:Gem::Requirement
80
66
  requirements:
81
67
  - - "~>"
82
68
  - !ruby/object:Gem::Version
83
- version: 0.0.4
69
+ version: '0.2'
84
70
  - !ruby/object:Gem::Dependency
85
71
  name: ruby-openai
86
72
  requirement: !ruby/object:Gem::Requirement
87
73
  requirements:
88
74
  - - "~>"
89
75
  - !ruby/object:Gem::Version
90
- version: '4.0'
76
+ version: '4.1'
91
77
  type: :runtime
92
78
  prerelease: false
93
79
  version_requirements: !ruby/object:Gem::Requirement
94
80
  requirements:
95
81
  - - "~>"
96
82
  - !ruby/object:Gem::Version
97
- version: '4.0'
83
+ version: '4.1'
98
84
  description: You simply set an OpenAI key, give a number of Boxcars to a Train, and
99
85
  magic ensues when you run it.
100
86
  email:
@@ -124,6 +110,7 @@ files:
124
110
  - lib/boxcars/boxcar/google_search.rb
125
111
  - lib/boxcars/boxcar/sql.rb
126
112
  - lib/boxcars/boxcar/swagger.rb
113
+ - lib/boxcars/boxcar/vector_answer.rb
127
114
  - lib/boxcars/boxcar/wikipedia_search.rb
128
115
  - lib/boxcars/conversation.rb
129
116
  - lib/boxcars/conversation_prompt.rb
@@ -148,7 +135,7 @@ files:
148
135
  - lib/boxcars/vector_store/hnswlib/load_from_disk.rb
149
136
  - lib/boxcars/vector_store/hnswlib/save_to_hnswlib.rb
150
137
  - lib/boxcars/vector_store/hnswlib/search.rb
151
- - lib/boxcars/vector_store/in_memory/build_from_document_array.rb
138
+ - lib/boxcars/vector_store/in_memory/build_from_array.rb
152
139
  - lib/boxcars/vector_store/in_memory/build_from_files.rb
153
140
  - lib/boxcars/vector_store/in_memory/search.rb
154
141
  - lib/boxcars/vector_store/pgvector/build_from_array.rb
@@ -180,7 +167,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
167
  - !ruby/object:Gem::Version
181
168
  version: '0'
182
169
  requirements: []
183
- rubygems_version: 3.4.10
170
+ rubygems_version: 3.2.32
184
171
  signing_key:
185
172
  specification_version: 4
186
173
  summary: Boxcars is a gem that enables you to create new systems with AI composability.
@@ -1,51 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Boxcars
4
- module VectorStore
5
- module InMemory
6
- class BuildFromDocumentArray
7
- include VectorStore
8
-
9
- def initialize(embedding_tool: :openai, documents: nil)
10
- validate_params(embedding_tool, documents)
11
- @embedding_tool = embedding_tool
12
- @documents = documents
13
- @memory_vectors = []
14
- end
15
-
16
- def call
17
- texts = documents
18
- vectors = generate_vectors(texts)
19
- add_vectors(vectors, documents)
20
- {
21
- type: :in_memory,
22
- vector_store: memory_vectors
23
- }
24
- end
25
-
26
- private
27
-
28
- attr_reader :documents, :memory_vectors
29
-
30
- def validate_params(embedding_tool, documents)
31
- raise_argument_error('documents is nil') unless documents
32
- return if %i[openai tensorflow].include?(embedding_tool)
33
-
34
- raise_argument_error('embedding_tool is invalid')
35
- end
36
-
37
- # returns array of documents with vectors
38
- def add_vectors(vectors, documents)
39
- vectors.zip(documents).each do |vector, doc|
40
- memory_vector = Document.new(
41
- content: doc[:content],
42
- embedding: vector[:embedding],
43
- metadata: doc[:metadata].merge(dim: vector[:dim])
44
- )
45
- @memory_vectors << memory_vector
46
- end
47
- end
48
- end
49
- end
50
- end
51
- end