boxcars 0.2.11 → 0.2.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.env_sample +1 -0
  3. data/.rubocop.yml +16 -0
  4. data/CHANGELOG.md +12 -0
  5. data/Gemfile +15 -11
  6. data/Gemfile.lock +40 -32
  7. data/README.md +4 -1
  8. data/boxcars.gemspec +4 -7
  9. data/lib/boxcars/boxcar/active_record.rb +2 -2
  10. data/lib/boxcars/boxcar/engine_boxcar.rb +2 -2
  11. data/lib/boxcars/boxcar/sql.rb +1 -1
  12. data/lib/boxcars/boxcar/swagger.rb +1 -1
  13. data/lib/boxcars/boxcar/vector_answer.rb +71 -0
  14. data/lib/boxcars/boxcar.rb +2 -0
  15. data/lib/boxcars/engine/openai.rb +8 -1
  16. data/lib/boxcars/train/zero_shot.rb +1 -1
  17. data/lib/boxcars/train.rb +1 -1
  18. data/lib/boxcars/vector_search.rb +66 -2
  19. data/lib/boxcars/vector_store/document.rb +3 -2
  20. data/lib/boxcars/vector_store/embed_via_open_ai.rb +2 -2
  21. data/lib/boxcars/vector_store/hnswlib/build_from_files.rb +104 -0
  22. data/lib/boxcars/vector_store/hnswlib/load_from_disk.rb +57 -0
  23. data/lib/boxcars/vector_store/hnswlib/save_to_hnswlib.rb +48 -38
  24. data/lib/boxcars/vector_store/hnswlib/search.rb +70 -0
  25. data/lib/boxcars/vector_store/in_memory/build_from_document_array.rb +51 -0
  26. data/lib/boxcars/vector_store/in_memory/build_from_files.rb +61 -0
  27. data/lib/boxcars/vector_store/in_memory/search.rb +29 -49
  28. data/lib/boxcars/vector_store/pgvector/build_from_array.rb +95 -0
  29. data/lib/boxcars/vector_store/pgvector/build_from_files.rb +97 -0
  30. data/lib/boxcars/vector_store/pgvector/save_to_database.rb +152 -0
  31. data/lib/boxcars/vector_store/pgvector/search.rb +144 -0
  32. data/lib/boxcars/vector_store/split_text.rb +2 -3
  33. data/lib/boxcars/vector_store.rb +73 -7
  34. data/lib/boxcars/version.rb +1 -1
  35. data/lib/boxcars.rb +1 -1
  36. metadata +31 -40
  37. data/lib/boxcars/vector_store/hnswlib/build_vector_store.rb +0 -157
  38. data/lib/boxcars/vector_store/hnswlib/hnswlib_config.rb +0 -56
  39. data/lib/boxcars/vector_store/hnswlib/hnswlib_search.rb +0 -54
  40. data/lib/boxcars/vector_store/in_memory/add_documents.rb +0 -67
  41. data/lib/boxcars/vector_store/similarity_search.rb +0 -55
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: da3d8b9838602151837c0cc5bb9f3cae841ba24d1c338eade82c7807d913d4bb
4
- data.tar.gz: f7be434c18f0ff2c95625fe32fae25f3a5df265331425d0c1f0430ab75761578
3
+ metadata.gz: 74f14f8575e4670d2be6196c5196d41dd9728b5a44a0d4e199dfb705dfc77ed5
4
+ data.tar.gz: 06f2e8178f9696831870b5d8d5ea40bda8ba74a2fcc27283849f49124c51a06b
5
5
  SHA512:
6
- metadata.gz: 57dd238c56f13f63a4665d4469efdabfa5f3c54f82cb6832c4158858d4b307a80c57f619633cdad6934d64186d560dcab7a62efa9adc727edfa61afbc5acc188
7
- data.tar.gz: d2c782acf20c6b6b13cbfadf8f5406363b347be90a058626ec1bb21fe32baf1acb57a4a72c4770a7ad820700b465c0474a498080604e23e5d0270001d5d4aec1
6
+ metadata.gz: 9ff6e759f3d942f859de85763ffe9bc0ccf5636914d894205f78cffc99abb1b27dea47463f0fc968eba6746c055f796c95884d5e421486e06374fb0519eb8c63
7
+ data.tar.gz: 03bf42b1fbd6dac1eff4734bd2443a4a2a9f7cb931c99a2fe3f9453f5a23f0853b4eb26c817e1757a16629617eff4704f276b99fb165689ef6016ace86c2fb56
data/.env_sample CHANGED
@@ -1,2 +1,3 @@
1
1
  openai_access_token: ''
2
2
  serpapi_api_key: ''
3
+ DATABASE_URL: 'postgres://postgres:postgres@localhost:5432/boxcars_test'
data/.rubocop.yml CHANGED
@@ -3,6 +3,7 @@ require:
3
3
  - rubocop-rake
4
4
 
5
5
  AllCops:
6
+ TargetRubyVersion: 3
6
7
  Exclude:
7
8
  - 'bin/{rails,rake}'
8
9
  - 'node_modules/**/*'
@@ -152,3 +153,18 @@ Style/SlicingWithRange:
152
153
 
153
154
  Bundler/OrderedGems:
154
155
  Enabled: false
156
+
157
+ RSpec/MultipleMemoizedHelpers:
158
+ Enabled: false
159
+
160
+ RSpec/PendingWithoutReason:
161
+ Enabled: false
162
+
163
+ RSpec/NestedGroups:
164
+ Enabled: false
165
+
166
+ RSpec/ExampleLength:
167
+ Enabled: false
168
+
169
+ RSpec/MultipleExpectations:
170
+ Enabled: false
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [v0.2.11](https://github.com/BoxcarsAI/boxcars/tree/v0.2.11) (2023-05-05)
4
+
5
+ [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.10...v0.2.11)
6
+
7
+ **Closed issues:**
8
+
9
+ - Chore: move vector store to top level [\#67](https://github.com/BoxcarsAI/boxcars/issues/67)
10
+
11
+ **Merged pull requests:**
12
+
13
+ - Move vector store [\#69](https://github.com/BoxcarsAI/boxcars/pull/69) ([francis](https://github.com/francis))
14
+
3
15
  ## [v0.2.10](https://github.com/BoxcarsAI/boxcars/tree/v0.2.10) (2023-05-05)
4
16
 
5
17
  [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...v0.2.10)
data/Gemfile CHANGED
@@ -5,19 +5,11 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in boxcars.gemspec
6
6
  gemspec
7
7
 
8
- gem "rake", "~> 13.0"
9
-
10
- gem "rspec", "~> 3.2"
11
-
12
- gem "rubocop", "~> 1.21"
13
-
14
- gem "vcr", "~> 6.1.0"
8
+ gem "debug", "~> 1.1"
15
9
 
16
- gem "webmock", "~> 3.18.1"
10
+ gem "dotenv", "~> 2.8"
17
11
 
18
- gem "rubocop-rake", "~> 0.6.0"
19
-
20
- gem "rubocop-rspec", "~> 2.17"
12
+ gem "rake", "~> 13.0"
21
13
 
22
14
  gem "sqlite3", "~> 1.6"
23
15
 
@@ -32,3 +24,15 @@ gem "activesupport", "~> 7.0"
32
24
  gem "rest-client", "~> 2.1"
33
25
 
34
26
  gem "hnswlib", "~> 0.8.1"
27
+
28
+ gem "pg", "~> 1.5", ">= 1.5.3"
29
+ gem "pgvector", "~> 0.2.0"
30
+
31
+ group :development, :test do
32
+ gem "rspec", "~> 3.2"
33
+ gem "rubocop", "~> 1.21"
34
+ gem "vcr", "~> 6.1.0"
35
+ gem "webmock", "~> 3.18.1"
36
+ gem "rubocop-rake", "~> 0.6.0"
37
+ gem "rubocop-rspec", "~> 2.17"
38
+ end
data/Gemfile.lock CHANGED
@@ -1,10 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- boxcars (0.2.11)
4
+ boxcars (0.2.13)
5
5
  google_search_results (~> 2.2)
6
6
  gpt4all (~> 0.0.4)
7
- ruby-openai (~> 3.0)
7
+ hnswlib (~> 0.8)
8
+ pgvector (~> 0.2)
9
+ ruby-openai (~> 4.1)
8
10
 
9
11
  GEM
10
12
  remote: https://rubygems.org/
@@ -19,7 +21,7 @@ GEM
19
21
  i18n (>= 1.6, < 2)
20
22
  minitest (>= 5.1)
21
23
  tzinfo (~> 2.0)
22
- addressable (2.8.1)
24
+ addressable (2.8.4)
23
25
  public_suffix (>= 2.0.2, < 6.0)
24
26
  ast (2.4.2)
25
27
  async (1.31.0)
@@ -34,7 +36,7 @@ GEM
34
36
  protocol-http1 (~> 0.15.0)
35
37
  protocol-http2 (~> 0.15.0)
36
38
  traces (>= 0.8.0)
37
- async-http-faraday (0.11.0)
39
+ async-http-faraday (0.12.0)
38
40
  async-http (~> 0.42)
39
41
  faraday
40
42
  async-io (1.34.3)
@@ -46,7 +48,7 @@ GEM
46
48
  fiber-local
47
49
  crack (0.4.5)
48
50
  rexml
49
- debug (1.7.2)
51
+ debug (1.8.0)
50
52
  irb (>= 1.5.0)
51
53
  reline (>= 0.3.1)
52
54
  diff-lcs (1.5.0)
@@ -56,8 +58,10 @@ GEM
56
58
  faraday (2.7.4)
57
59
  faraday-net_http (>= 2.0, < 3.1)
58
60
  ruby2_keywords (>= 0.0.4)
59
- faraday-http-cache (2.4.1)
61
+ faraday-http-cache (2.5.0)
60
62
  faraday (>= 0.8)
63
+ faraday-multipart (1.0.4)
64
+ multipart-post (~> 2)
61
65
  faraday-net_http (3.0.2)
62
66
  faraday-retry (2.1.0)
63
67
  faraday (~> 2.0)
@@ -81,35 +85,33 @@ GEM
81
85
  http-accept (1.7.0)
82
86
  http-cookie (1.0.5)
83
87
  domain_name (~> 0.5)
84
- httparty (0.21.0)
85
- mini_mime (>= 1.0.0)
86
- multi_xml (>= 0.5.2)
87
- i18n (1.12.0)
88
+ i18n (1.13.0)
88
89
  concurrent-ruby (~> 1.0)
89
90
  io-console (0.6.0)
90
91
  io-console (0.6.0-java)
91
- irb (1.6.3)
92
+ irb (1.6.4)
92
93
  reline (>= 0.3.0)
93
94
  json (2.6.3)
94
95
  json (2.6.3-java)
95
96
  mime-types (3.4.1)
96
97
  mime-types-data (~> 3.2015)
97
98
  mime-types-data (3.2023.0218.1)
98
- mini_mime (1.1.2)
99
- mini_portile2 (2.8.1)
99
+ mini_portile2 (2.8.2)
100
100
  minitest (5.18.0)
101
101
  multi_json (1.15.0)
102
- multi_xml (0.6.0)
102
+ multipart-post (2.3.0)
103
103
  netrc (0.11.0)
104
- nio4r (2.5.8)
105
- nio4r (2.5.8-java)
104
+ nio4r (2.5.9)
105
+ nio4r (2.5.9-java)
106
106
  octokit (4.25.1)
107
107
  faraday (>= 1, < 3)
108
108
  sawyer (~> 0.9)
109
109
  os (1.1.4)
110
- parallel (1.22.1)
111
- parser (3.2.1.1)
110
+ parallel (1.23.0)
111
+ parser (3.2.2.1)
112
112
  ast (~> 2.4.1)
113
+ pg (1.5.3)
114
+ pgvector (0.2.0)
113
115
  protocol-hpack (1.4.2)
114
116
  protocol-http (0.24.1)
115
117
  protocol-http1 (0.15.0)
@@ -120,7 +122,7 @@ GEM
120
122
  public_suffix (5.0.1)
121
123
  rainbow (3.1.1)
122
124
  rake (13.0.6)
123
- regexp_parser (2.7.0)
125
+ regexp_parser (2.8.0)
124
126
  reline (0.3.3)
125
127
  io-console (~> 0.5)
126
128
  rest-client (2.1.0)
@@ -133,46 +135,50 @@ GEM
133
135
  rspec-core (~> 3.12.0)
134
136
  rspec-expectations (~> 3.12.0)
135
137
  rspec-mocks (~> 3.12.0)
136
- rspec-core (3.12.1)
138
+ rspec-core (3.12.2)
137
139
  rspec-support (~> 3.12.0)
138
- rspec-expectations (3.12.2)
140
+ rspec-expectations (3.12.3)
139
141
  diff-lcs (>= 1.2.0, < 2.0)
140
142
  rspec-support (~> 3.12.0)
141
143
  rspec-mocks (3.12.5)
142
144
  diff-lcs (>= 1.2.0, < 2.0)
143
145
  rspec-support (~> 3.12.0)
144
146
  rspec-support (3.12.0)
145
- rubocop (1.48.1)
147
+ rubocop (1.51.0)
146
148
  json (~> 2.3)
147
149
  parallel (~> 1.10)
148
150
  parser (>= 3.2.0.0)
149
151
  rainbow (>= 2.2.2, < 4.0)
150
152
  regexp_parser (>= 1.8, < 3.0)
151
153
  rexml (>= 3.2.5, < 4.0)
152
- rubocop-ast (>= 1.26.0, < 2.0)
154
+ rubocop-ast (>= 1.28.0, < 2.0)
153
155
  ruby-progressbar (~> 1.7)
154
156
  unicode-display_width (>= 2.4.0, < 3.0)
155
- rubocop-ast (1.28.0)
157
+ rubocop-ast (1.28.1)
156
158
  parser (>= 3.2.1.0)
157
- rubocop-capybara (2.17.1)
159
+ rubocop-capybara (2.18.0)
158
160
  rubocop (~> 1.41)
161
+ rubocop-factory_bot (2.23.1)
162
+ rubocop (~> 1.33)
159
163
  rubocop-rake (0.6.0)
160
164
  rubocop (~> 1.0)
161
- rubocop-rspec (2.19.0)
165
+ rubocop-rspec (2.22.0)
162
166
  rubocop (~> 1.33)
163
167
  rubocop-capybara (~> 2.17)
164
- ruby-openai (3.7.0)
165
- httparty (>= 0.18.1)
168
+ rubocop-factory_bot (~> 2.22)
169
+ ruby-openai (4.1.0)
170
+ faraday (>= 1)
171
+ faraday-multipart (>= 1)
166
172
  ruby-progressbar (1.13.0)
167
173
  ruby2_keywords (0.0.5)
168
174
  sawyer (0.9.2)
169
175
  addressable (>= 2.3.5)
170
176
  faraday (>= 0.17.3, < 3)
171
- sqlite3 (1.6.2)
177
+ sqlite3 (1.6.3)
172
178
  mini_portile2 (~> 2.8.0)
173
- sqlite3 (1.6.2-arm64-darwin)
174
- sqlite3 (1.6.2-x86_64-darwin)
175
- sqlite3 (1.6.2-x86_64-linux)
179
+ sqlite3 (1.6.3-arm64-darwin)
180
+ sqlite3 (1.6.3-x86_64-darwin)
181
+ sqlite3 (1.6.3-x86_64-linux)
176
182
  strings-ansi (0.2.0)
177
183
  timers (4.3.5)
178
184
  traces (0.9.1)
@@ -212,6 +218,8 @@ DEPENDENCIES
212
218
  faraday-retry (~> 2.0)
213
219
  github_changelog_generator (~> 1.16)
214
220
  hnswlib (~> 0.8.1)
221
+ pg (~> 1.5, >= 1.5.3)
222
+ pgvector (~> 0.2.0)
215
223
  rake (~> 13.0)
216
224
  rest-client (~> 2.1)
217
225
  rspec (~> 3.2)
data/README.md CHANGED
@@ -21,6 +21,7 @@ All of these concepts are in a module named Boxcars:
21
21
  - Train - Given a list of Boxcars and optionally an Engine, a Train breaks down a problem into pieces for individual Boxcars to solve. The individual results are then combined until a final answer is found. ZeroShot is the only current implementation of Train (but we are adding more soon), and you can either construct it directly or use `Boxcars::train` when you want to build a Train.
22
22
  - Prompt - used by an Engine to generate text results. Our Boxcars have built-in prompts, but you have the flexibility to change or augment them if you so desire.
23
23
  - Engine - an entity that generates text from a Prompt. OpenAI's LLM text generator is the default Engine if no other is specified, and you can override the default engine if so desired (`Boxcar.configuration.default_engine`).
24
+ - VectorStore - a place to store and query vectors.
24
25
 
25
26
  ## Security
26
27
  Currently, our system is designed for individuals who already possess administrative privileges for their project. It is likely possible to manipulate the system's prompts to carry out malicious actions, but if you already have administrative access, you can perform such actions without requiring boxcars in the first place.
@@ -132,7 +133,9 @@ Next Actions:
132
133
  ### More Examples
133
134
  See [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/boxcars_examples.ipynb) Jupyter Notebook for more examples.
134
135
 
135
- For the new Swagger boxcar, see [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/swagger_examples.ipynb) Jupyter Notebook.
136
+ For the Swagger boxcar, see [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/swagger_examples.ipynb) Jupyter Notebook.
137
+
138
+ For simple vector storage and search, see [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/vector_store_examples.ipynb) Jupyter Notebook.
136
139
 
137
140
  Note, some folks that we talked to didn't know that you could run Ruby Jupyter notebooks. [You can](https://github.com/SciRuby/iruby).
138
141
 
data/boxcars.gemspec CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
12
12
  spec.description = "You simply set an OpenAI key, give a number of Boxcars to a Train, and magic ensues when you run it."
13
13
  spec.homepage = "https://github.com/BoxcarsAI/boxcars"
14
14
  spec.license = "MIT"
15
- spec.required_ruby_version = ">= 2.6.0"
15
+ spec.required_ruby_version = ">= 3.0"
16
16
 
17
17
  spec.metadata["homepage_uri"] = spec.homepage
18
18
  spec.metadata["source_code_uri"] = spec.homepage
@@ -30,15 +30,12 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ["lib"]
32
32
 
33
- # dev / test dependencies
34
- spec.add_development_dependency "debug", "~> 1.1"
35
- spec.add_development_dependency "dotenv", "~> 2.8"
36
- spec.add_development_dependency "rspec", "~> 3.2"
37
-
38
33
  # runtime dependencies
39
34
  spec.add_dependency "google_search_results", "~> 2.2"
40
35
  spec.add_dependency "gpt4all", "~> 0.0.4"
41
- spec.add_dependency "ruby-openai", "~> 3.0"
36
+ spec.add_dependency "hnswlib", "~> 0.8"
37
+ spec.add_dependency "ruby-openai", "~> 4.1"
38
+ spec.add_dependency "pgvector", "~> 0.2"
42
39
 
43
40
  # For more information and examples about making a new gem, checkout our
44
41
  # guide at: https://bundler.io/guides/creating_gem.html
@@ -29,7 +29,7 @@ module Boxcars
29
29
  end
30
30
 
31
31
  # @return Hash The additional variables for this boxcar.
32
- def prediction_additional
32
+ def prediction_additional(_inputs)
33
33
  { model_info: model_info }.merge super
34
34
  end
35
35
 
@@ -161,7 +161,7 @@ module Boxcars
161
161
  begin
162
162
  return true unless changes&.positive?
163
163
  rescue StandardError => e
164
- Boscar.error "Error while computing change count: #{e.message}", :red
164
+ Boxcars.error "Error while computing change count: #{e.message}", :red
165
165
  end
166
166
 
167
167
  Boxcars.debug "#{name}(Pending Changes): #{changes}", :yellow
@@ -114,14 +114,14 @@ module Boxcars
114
114
  end
115
115
 
116
116
  # @return Hash The additional variables for this boxcar.
117
- def prediction_additional
117
+ def prediction_additional(_inputs)
118
118
  { stop: stop, top_k: top_k }
119
119
  end
120
120
 
121
121
  # @param inputs [Hash] The inputs to the boxcar.
122
122
  # @return Hash The variables for this boxcar.
123
123
  def prediction_variables(inputs)
124
- prediction_additional.merge(inputs)
124
+ prediction_additional(inputs).merge(inputs)
125
125
  end
126
126
 
127
127
  # remove backticks or triple backticks from the code
@@ -26,7 +26,7 @@ module Boxcars
26
26
  end
27
27
 
28
28
  # @return Hash The additional variables for this boxcar.
29
- def prediction_additional
29
+ def prediction_additional(_inputs)
30
30
  { schema: schema, dialect: dialect }.merge super
31
31
  end
32
32
 
@@ -25,7 +25,7 @@ module Boxcars
25
25
  end
26
26
 
27
27
  # @return Hash The additional variables for this boxcar.
28
- def prediction_additional
28
+ def prediction_additional(_inputs)
29
29
  { swagger_url: swagger_url, context: context }.merge super
30
30
  end
31
31
 
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Boxcars is a framework for running a series of tools to get an answer to a question.
4
+ module Boxcars
5
+ # A Boxcar that interprets a prompt and executes ruby code to do math
6
+ class VectorAnswer < EngineBoxcar
7
+ # the description of this engine boxcar
8
+ DESC = "useful for when you need to answer questions from vector search results."
9
+
10
+ attr_reader :embeddings, :vector_documents, :search_content
11
+
12
+ # @param embeddings [Hash] The vector embeddings to use for this boxcar.
13
+ # @param vector_documents [Hash] The vector documents to use for this boxcar.
14
+ # @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a train if nil.
15
+ # @param prompt [Boxcars::Prompt] The prompt to use for this boxcar. Defaults to built-in prompt.
16
+ # @param kwargs [Hash] Any other keyword arguments to pass to the parent class.
17
+ def initialize(embeddings:, vector_documents:, engine: nil, prompt: nil, **kwargs)
18
+ the_prompt = prompt || my_prompt
19
+ @embeddings = embeddings
20
+ @vector_documents = vector_documents
21
+ kwargs[:stop] ||= ["```output"]
22
+ kwargs[:name] ||= "VectorAnswer"
23
+ kwargs[:description] ||= DESC
24
+ super(engine: engine, prompt: the_prompt, **kwargs)
25
+ end
26
+
27
+ # @param inputs [Hash] The inputs to use for the prediction.
28
+ # @return Hash The additional variables for this boxcar.
29
+ def prediction_additional(inputs)
30
+ { search_content: get_search_content(inputs[:question]) }.merge super
31
+ end
32
+
33
+ private
34
+
35
+ # @param results [Array] The results from the vector search.
36
+ # @return [String] The content of the search results.
37
+ def get_results_content(results)
38
+ results&.map do |result|
39
+ result[:document].content
40
+ end.to_a.join("\n\n")
41
+ end
42
+
43
+ # return the content of the search results for count results
44
+ # @params question [String] The question to search for.
45
+ # @params count [Integer] The number of results to return.
46
+ # @return [String] The content of the search results.
47
+ def get_search_content(question, count: 1)
48
+ search = Boxcars::VectorSearch.new(embeddings: embeddings, vector_documents: vector_documents)
49
+ results = search.call query: question, count: count
50
+ @search_content = get_search_content(results)
51
+ end
52
+
53
+ # our template
54
+ CTEMPLATE = [
55
+ syst("You are tasked with answering a question using these possibly relevant excerpts from a large volume of text:\n" \
56
+ "```text\n%<search_content>s\n```\n\n",
57
+ "Using the above, just answer the question as if you were answering directly."),
58
+ user("%<question>s")
59
+ ].freeze
60
+
61
+ # The prompt to use for the engine.
62
+ def my_prompt
63
+ @conversation ||= Conversation.new(lines: CTEMPLATE)
64
+ @my_prompt ||= ConversationPrompt.new(
65
+ conversation: @conversation,
66
+ input_variables: [:question],
67
+ other_inputs: [:search_content],
68
+ output_variables: [:answer])
69
+ end
70
+ end
71
+ end
@@ -156,4 +156,6 @@ require "boxcars/boxcar/wikipedia_search"
156
156
  require "boxcars/boxcar/sql"
157
157
  require "boxcars/boxcar/swagger"
158
158
  require "boxcars/boxcar/active_record"
159
+ require "boxcars/vector_store"
159
160
  require "boxcars/vector_search"
161
+ require "boxcars/boxcar/vector_answer"
@@ -43,6 +43,10 @@ module Boxcars
43
43
  ::OpenAI::Client.new(access_token: access_token, organization_id: organization_id)
44
44
  end
45
45
 
46
+ def conversation_model?(model)
47
+ ["gpt-3.5-turbo", "gpt-4"].include?(model)
48
+ end
49
+
46
50
  # Get an answer from the engine.
47
51
  # @param prompt [String] The prompt to use when asking the engine.
48
52
  # @param openai_access_token [String] The access token to use when asking the engine.
@@ -51,7 +55,7 @@ module Boxcars
51
55
  def client(prompt:, inputs: {}, openai_access_token: nil, **kwargs)
52
56
  clnt = Openai.open_ai_client(openai_access_token: openai_access_token)
53
57
  params = open_ai_params.merge(kwargs)
54
- if params[:model] == "gpt-3.5-turbo"
58
+ if conversation_model?(params[:model])
55
59
  prompt = prompt.first if prompt.is_a?(Array)
56
60
  params = prompt.as_messages(inputs).merge(params)
57
61
  if Boxcars.configuration.log_prompts
@@ -71,6 +75,9 @@ module Boxcars
71
75
  def run(question, **kwargs)
72
76
  prompt = Prompt.new(template: question)
73
77
  response = client(prompt: prompt, **kwargs)
78
+ raise Error, "OpenAI: No response from API" unless response
79
+ raise Error, "OpenAI: #{response['error']}" if response["error"]
80
+
74
81
  answer = response["choices"].map { |c| c.dig("message", "content") || c["text"] }.join("\n").strip
75
82
  puts answer
76
83
  answer
@@ -19,7 +19,7 @@ module Boxcars
19
19
  end
20
20
 
21
21
  # @return Hash The additional variables for this boxcar.
22
- def prediction_additional
22
+ def prediction_additional(_inputs)
23
23
  { boxcar_names: boxcar_names, boxcar_descriptions: boxcar_descriptions }.merge super
24
24
  end
25
25
 
data/lib/boxcars/train.rb CHANGED
@@ -69,7 +69,7 @@ module Boxcars
69
69
  # @return [Boxcars::Action] Action specifying what boxcar to use.
70
70
  def plan(intermediate_steps, **kwargs)
71
71
  thoughts = construct_scratchpad(intermediate_steps)
72
- full_inputs = prediction_additional.merge(kwargs).merge(agent_scratchpad: thoughts)
72
+ full_inputs = prediction_additional(kwargs).merge(kwargs).merge(agent_scratchpad: thoughts)
73
73
  action = get_next_action(full_inputs)
74
74
  return TrainFinish.new({ output: action.boxcar_input }, log: action.log) if action.boxcar == finish_boxcar_name
75
75
 
@@ -3,8 +3,72 @@
3
3
  # Boxcars is a framework for running a series of tools to get an answer to a question.
4
4
  module Boxcars
5
5
  # For Boxcars that use an engine to do their work.
6
- class VectorSearch < Boxcar
7
- Error = Class.new(StandardError)
6
+ class VectorSearch
7
+ def initialize(params)
8
+ @vector_documents = params[:vector_documents]
9
+ @embedding_tool = params[:embedding_tool] || :openai
10
+ @vector_search_instance = vector_search_instance
11
+ @openai_connection = params[:openai_connection] || default_connection(openai_access_token: params[:openai_access_token])
12
+ end
13
+
14
+ def call(query:, count: 1)
15
+ validate_query(query)
16
+ query_vector = convert_query_to_vector(query)
17
+ @vector_search_instance.call(query_vector: query_vector, count: count)
18
+ end
19
+
20
+ private
21
+
22
+ attr_reader :vector_documents, :embedding_tool, :openai_connection
23
+
24
+ def vector_search_instance
25
+ case vector_documents[:type]
26
+ when :hnswlib
27
+ Boxcars::VectorStore::Hnswlib::Search.new(
28
+ vector_documents: vector_documents
29
+ )
30
+ when :in_memory
31
+ Boxcars::VectorStore::InMemory::Search.new(
32
+ vector_documents: vector_documents
33
+ )
34
+ when :pgvector
35
+ Boxcars::VectorStore::Pgvector::Search.new(
36
+ vector_documents: vector_documents
37
+ )
38
+ else
39
+ raise_argument_error('Unsupported vector store provided')
40
+ end
41
+ end
42
+
43
+ def default_connection(openai_access_token: nil)
44
+ Openai.open_ai_client(openai_access_token: openai_access_token)
45
+ end
46
+
47
+ def validate_query(query)
48
+ raise_argument_error('query must be a string') unless query.is_a?(String)
49
+ raise_argument_error('query must not be empty') if query.empty?
50
+ end
51
+
52
+ def convert_query_to_vector(query)
53
+ tool = embeddings_method(embedding_tool)
54
+ res = tool[:klass].call(
55
+ texts: [query], client: tool[:client]
56
+ ).first
57
+ res[:embedding]
58
+ end
59
+
60
+ def embeddings_method(embedding_tool)
61
+ case embedding_tool
62
+ when :openai
63
+ { klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_connection }
64
+ when :tensorflow
65
+ { klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
66
+ end
67
+ end
68
+
69
+ def raise_argument_error(message)
70
+ raise ::Boxcars::ArgumentError, message
71
+ end
8
72
  end
9
73
  end
10
74
 
@@ -3,10 +3,11 @@
3
3
  module Boxcars
4
4
  module VectorStore
5
5
  class Document
6
- attr_accessor :page_content, :metadata
6
+ attr_accessor :content, :metadata, :embedding
7
7
 
8
8
  def initialize(fields = {})
9
- @page_content = fields[:page_content] || ""
9
+ @content = fields[:content] || ""
10
+ @embedding = fields[:embedding] || []
10
11
  @metadata = fields[:metadata] || {}
11
12
  end
12
13
  end
@@ -7,8 +7,6 @@ module Boxcars
7
7
  class EmbedViaOpenAI
8
8
  include VectorStore
9
9
 
10
- attr_accessor :texts, :client, :model
11
-
12
10
  def initialize(texts:, client:, model: 'text-embedding-ada-002')
13
11
  validate_params(texts, client)
14
12
  @texts = texts
@@ -28,6 +26,8 @@ module Boxcars
28
26
 
29
27
  private
30
28
 
29
+ attr_accessor :texts, :client, :model
30
+
31
31
  def validate_params(texts, client)
32
32
  raise_error 'texts must be an array of strings' unless texts.is_a?(Array) && texts.all? { |text| text.is_a?(String) }
33
33
  raise_error 'openai_connection must be an OpenAI::Client' unless client.is_a?(OpenAI::Client)