boxcars 0.2.11 → 0.2.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.env_sample +1 -0
- data/.rubocop.yml +16 -0
- data/CHANGELOG.md +12 -0
- data/Gemfile +15 -11
- data/Gemfile.lock +40 -32
- data/README.md +4 -1
- data/boxcars.gemspec +4 -7
- data/lib/boxcars/boxcar/active_record.rb +2 -2
- data/lib/boxcars/boxcar/engine_boxcar.rb +2 -2
- data/lib/boxcars/boxcar/sql.rb +1 -1
- data/lib/boxcars/boxcar/swagger.rb +1 -1
- data/lib/boxcars/boxcar/vector_answer.rb +71 -0
- data/lib/boxcars/boxcar.rb +2 -0
- data/lib/boxcars/engine/openai.rb +8 -1
- data/lib/boxcars/train/zero_shot.rb +1 -1
- data/lib/boxcars/train.rb +1 -1
- data/lib/boxcars/vector_search.rb +66 -2
- data/lib/boxcars/vector_store/document.rb +3 -2
- data/lib/boxcars/vector_store/embed_via_open_ai.rb +2 -2
- data/lib/boxcars/vector_store/hnswlib/build_from_files.rb +104 -0
- data/lib/boxcars/vector_store/hnswlib/load_from_disk.rb +57 -0
- data/lib/boxcars/vector_store/hnswlib/save_to_hnswlib.rb +48 -38
- data/lib/boxcars/vector_store/hnswlib/search.rb +70 -0
- data/lib/boxcars/vector_store/in_memory/build_from_document_array.rb +51 -0
- data/lib/boxcars/vector_store/in_memory/build_from_files.rb +61 -0
- data/lib/boxcars/vector_store/in_memory/search.rb +29 -49
- data/lib/boxcars/vector_store/pgvector/build_from_array.rb +95 -0
- data/lib/boxcars/vector_store/pgvector/build_from_files.rb +97 -0
- data/lib/boxcars/vector_store/pgvector/save_to_database.rb +152 -0
- data/lib/boxcars/vector_store/pgvector/search.rb +144 -0
- data/lib/boxcars/vector_store/split_text.rb +2 -3
- data/lib/boxcars/vector_store.rb +73 -7
- data/lib/boxcars/version.rb +1 -1
- data/lib/boxcars.rb +1 -1
- metadata +31 -40
- data/lib/boxcars/vector_store/hnswlib/build_vector_store.rb +0 -157
- data/lib/boxcars/vector_store/hnswlib/hnswlib_config.rb +0 -56
- data/lib/boxcars/vector_store/hnswlib/hnswlib_search.rb +0 -54
- data/lib/boxcars/vector_store/in_memory/add_documents.rb +0 -67
- data/lib/boxcars/vector_store/similarity_search.rb +0 -55
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74f14f8575e4670d2be6196c5196d41dd9728b5a44a0d4e199dfb705dfc77ed5
|
4
|
+
data.tar.gz: 06f2e8178f9696831870b5d8d5ea40bda8ba74a2fcc27283849f49124c51a06b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ff6e759f3d942f859de85763ffe9bc0ccf5636914d894205f78cffc99abb1b27dea47463f0fc968eba6746c055f796c95884d5e421486e06374fb0519eb8c63
|
7
|
+
data.tar.gz: 03bf42b1fbd6dac1eff4734bd2443a4a2a9f7cb931c99a2fe3f9453f5a23f0853b4eb26c817e1757a16629617eff4704f276b99fb165689ef6016ace86c2fb56
|
data/.env_sample
CHANGED
data/.rubocop.yml
CHANGED
@@ -3,6 +3,7 @@ require:
|
|
3
3
|
- rubocop-rake
|
4
4
|
|
5
5
|
AllCops:
|
6
|
+
TargetRubyVersion: 3
|
6
7
|
Exclude:
|
7
8
|
- 'bin/{rails,rake}'
|
8
9
|
- 'node_modules/**/*'
|
@@ -152,3 +153,18 @@ Style/SlicingWithRange:
|
|
152
153
|
|
153
154
|
Bundler/OrderedGems:
|
154
155
|
Enabled: false
|
156
|
+
|
157
|
+
RSpec/MultipleMemoizedHelpers:
|
158
|
+
Enabled: false
|
159
|
+
|
160
|
+
RSpec/PendingWithoutReason:
|
161
|
+
Enabled: false
|
162
|
+
|
163
|
+
RSpec/NestedGroups:
|
164
|
+
Enabled: false
|
165
|
+
|
166
|
+
RSpec/ExampleLength:
|
167
|
+
Enabled: false
|
168
|
+
|
169
|
+
RSpec/MultipleExpectations:
|
170
|
+
Enabled: false
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [v0.2.11](https://github.com/BoxcarsAI/boxcars/tree/v0.2.11) (2023-05-05)
|
4
|
+
|
5
|
+
[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.10...v0.2.11)
|
6
|
+
|
7
|
+
**Closed issues:**
|
8
|
+
|
9
|
+
- Chore: move vector store to top level [\#67](https://github.com/BoxcarsAI/boxcars/issues/67)
|
10
|
+
|
11
|
+
**Merged pull requests:**
|
12
|
+
|
13
|
+
- Move vector store [\#69](https://github.com/BoxcarsAI/boxcars/pull/69) ([francis](https://github.com/francis))
|
14
|
+
|
3
15
|
## [v0.2.10](https://github.com/BoxcarsAI/boxcars/tree/v0.2.10) (2023-05-05)
|
4
16
|
|
5
17
|
[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...v0.2.10)
|
data/Gemfile
CHANGED
@@ -5,19 +5,11 @@ source "https://rubygems.org"
|
|
5
5
|
# Specify your gem's dependencies in boxcars.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem "
|
9
|
-
|
10
|
-
gem "rspec", "~> 3.2"
|
11
|
-
|
12
|
-
gem "rubocop", "~> 1.21"
|
13
|
-
|
14
|
-
gem "vcr", "~> 6.1.0"
|
8
|
+
gem "debug", "~> 1.1"
|
15
9
|
|
16
|
-
gem "
|
10
|
+
gem "dotenv", "~> 2.8"
|
17
11
|
|
18
|
-
gem "
|
19
|
-
|
20
|
-
gem "rubocop-rspec", "~> 2.17"
|
12
|
+
gem "rake", "~> 13.0"
|
21
13
|
|
22
14
|
gem "sqlite3", "~> 1.6"
|
23
15
|
|
@@ -32,3 +24,15 @@ gem "activesupport", "~> 7.0"
|
|
32
24
|
gem "rest-client", "~> 2.1"
|
33
25
|
|
34
26
|
gem "hnswlib", "~> 0.8.1"
|
27
|
+
|
28
|
+
gem "pg", "~> 1.5", ">= 1.5.3"
|
29
|
+
gem "pgvector", "~> 0.2.0"
|
30
|
+
|
31
|
+
group :development, :test do
|
32
|
+
gem "rspec", "~> 3.2"
|
33
|
+
gem "rubocop", "~> 1.21"
|
34
|
+
gem "vcr", "~> 6.1.0"
|
35
|
+
gem "webmock", "~> 3.18.1"
|
36
|
+
gem "rubocop-rake", "~> 0.6.0"
|
37
|
+
gem "rubocop-rspec", "~> 2.17"
|
38
|
+
end
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
boxcars (0.2.
|
4
|
+
boxcars (0.2.13)
|
5
5
|
google_search_results (~> 2.2)
|
6
6
|
gpt4all (~> 0.0.4)
|
7
|
-
|
7
|
+
hnswlib (~> 0.8)
|
8
|
+
pgvector (~> 0.2)
|
9
|
+
ruby-openai (~> 4.1)
|
8
10
|
|
9
11
|
GEM
|
10
12
|
remote: https://rubygems.org/
|
@@ -19,7 +21,7 @@ GEM
|
|
19
21
|
i18n (>= 1.6, < 2)
|
20
22
|
minitest (>= 5.1)
|
21
23
|
tzinfo (~> 2.0)
|
22
|
-
addressable (2.8.
|
24
|
+
addressable (2.8.4)
|
23
25
|
public_suffix (>= 2.0.2, < 6.0)
|
24
26
|
ast (2.4.2)
|
25
27
|
async (1.31.0)
|
@@ -34,7 +36,7 @@ GEM
|
|
34
36
|
protocol-http1 (~> 0.15.0)
|
35
37
|
protocol-http2 (~> 0.15.0)
|
36
38
|
traces (>= 0.8.0)
|
37
|
-
async-http-faraday (0.
|
39
|
+
async-http-faraday (0.12.0)
|
38
40
|
async-http (~> 0.42)
|
39
41
|
faraday
|
40
42
|
async-io (1.34.3)
|
@@ -46,7 +48,7 @@ GEM
|
|
46
48
|
fiber-local
|
47
49
|
crack (0.4.5)
|
48
50
|
rexml
|
49
|
-
debug (1.
|
51
|
+
debug (1.8.0)
|
50
52
|
irb (>= 1.5.0)
|
51
53
|
reline (>= 0.3.1)
|
52
54
|
diff-lcs (1.5.0)
|
@@ -56,8 +58,10 @@ GEM
|
|
56
58
|
faraday (2.7.4)
|
57
59
|
faraday-net_http (>= 2.0, < 3.1)
|
58
60
|
ruby2_keywords (>= 0.0.4)
|
59
|
-
faraday-http-cache (2.
|
61
|
+
faraday-http-cache (2.5.0)
|
60
62
|
faraday (>= 0.8)
|
63
|
+
faraday-multipart (1.0.4)
|
64
|
+
multipart-post (~> 2)
|
61
65
|
faraday-net_http (3.0.2)
|
62
66
|
faraday-retry (2.1.0)
|
63
67
|
faraday (~> 2.0)
|
@@ -81,35 +85,33 @@ GEM
|
|
81
85
|
http-accept (1.7.0)
|
82
86
|
http-cookie (1.0.5)
|
83
87
|
domain_name (~> 0.5)
|
84
|
-
|
85
|
-
mini_mime (>= 1.0.0)
|
86
|
-
multi_xml (>= 0.5.2)
|
87
|
-
i18n (1.12.0)
|
88
|
+
i18n (1.13.0)
|
88
89
|
concurrent-ruby (~> 1.0)
|
89
90
|
io-console (0.6.0)
|
90
91
|
io-console (0.6.0-java)
|
91
|
-
irb (1.6.
|
92
|
+
irb (1.6.4)
|
92
93
|
reline (>= 0.3.0)
|
93
94
|
json (2.6.3)
|
94
95
|
json (2.6.3-java)
|
95
96
|
mime-types (3.4.1)
|
96
97
|
mime-types-data (~> 3.2015)
|
97
98
|
mime-types-data (3.2023.0218.1)
|
98
|
-
|
99
|
-
mini_portile2 (2.8.1)
|
99
|
+
mini_portile2 (2.8.2)
|
100
100
|
minitest (5.18.0)
|
101
101
|
multi_json (1.15.0)
|
102
|
-
|
102
|
+
multipart-post (2.3.0)
|
103
103
|
netrc (0.11.0)
|
104
|
-
nio4r (2.5.
|
105
|
-
nio4r (2.5.
|
104
|
+
nio4r (2.5.9)
|
105
|
+
nio4r (2.5.9-java)
|
106
106
|
octokit (4.25.1)
|
107
107
|
faraday (>= 1, < 3)
|
108
108
|
sawyer (~> 0.9)
|
109
109
|
os (1.1.4)
|
110
|
-
parallel (1.
|
111
|
-
parser (3.2.
|
110
|
+
parallel (1.23.0)
|
111
|
+
parser (3.2.2.1)
|
112
112
|
ast (~> 2.4.1)
|
113
|
+
pg (1.5.3)
|
114
|
+
pgvector (0.2.0)
|
113
115
|
protocol-hpack (1.4.2)
|
114
116
|
protocol-http (0.24.1)
|
115
117
|
protocol-http1 (0.15.0)
|
@@ -120,7 +122,7 @@ GEM
|
|
120
122
|
public_suffix (5.0.1)
|
121
123
|
rainbow (3.1.1)
|
122
124
|
rake (13.0.6)
|
123
|
-
regexp_parser (2.
|
125
|
+
regexp_parser (2.8.0)
|
124
126
|
reline (0.3.3)
|
125
127
|
io-console (~> 0.5)
|
126
128
|
rest-client (2.1.0)
|
@@ -133,46 +135,50 @@ GEM
|
|
133
135
|
rspec-core (~> 3.12.0)
|
134
136
|
rspec-expectations (~> 3.12.0)
|
135
137
|
rspec-mocks (~> 3.12.0)
|
136
|
-
rspec-core (3.12.
|
138
|
+
rspec-core (3.12.2)
|
137
139
|
rspec-support (~> 3.12.0)
|
138
|
-
rspec-expectations (3.12.
|
140
|
+
rspec-expectations (3.12.3)
|
139
141
|
diff-lcs (>= 1.2.0, < 2.0)
|
140
142
|
rspec-support (~> 3.12.0)
|
141
143
|
rspec-mocks (3.12.5)
|
142
144
|
diff-lcs (>= 1.2.0, < 2.0)
|
143
145
|
rspec-support (~> 3.12.0)
|
144
146
|
rspec-support (3.12.0)
|
145
|
-
rubocop (1.
|
147
|
+
rubocop (1.51.0)
|
146
148
|
json (~> 2.3)
|
147
149
|
parallel (~> 1.10)
|
148
150
|
parser (>= 3.2.0.0)
|
149
151
|
rainbow (>= 2.2.2, < 4.0)
|
150
152
|
regexp_parser (>= 1.8, < 3.0)
|
151
153
|
rexml (>= 3.2.5, < 4.0)
|
152
|
-
rubocop-ast (>= 1.
|
154
|
+
rubocop-ast (>= 1.28.0, < 2.0)
|
153
155
|
ruby-progressbar (~> 1.7)
|
154
156
|
unicode-display_width (>= 2.4.0, < 3.0)
|
155
|
-
rubocop-ast (1.28.
|
157
|
+
rubocop-ast (1.28.1)
|
156
158
|
parser (>= 3.2.1.0)
|
157
|
-
rubocop-capybara (2.
|
159
|
+
rubocop-capybara (2.18.0)
|
158
160
|
rubocop (~> 1.41)
|
161
|
+
rubocop-factory_bot (2.23.1)
|
162
|
+
rubocop (~> 1.33)
|
159
163
|
rubocop-rake (0.6.0)
|
160
164
|
rubocop (~> 1.0)
|
161
|
-
rubocop-rspec (2.
|
165
|
+
rubocop-rspec (2.22.0)
|
162
166
|
rubocop (~> 1.33)
|
163
167
|
rubocop-capybara (~> 2.17)
|
164
|
-
|
165
|
-
|
168
|
+
rubocop-factory_bot (~> 2.22)
|
169
|
+
ruby-openai (4.1.0)
|
170
|
+
faraday (>= 1)
|
171
|
+
faraday-multipart (>= 1)
|
166
172
|
ruby-progressbar (1.13.0)
|
167
173
|
ruby2_keywords (0.0.5)
|
168
174
|
sawyer (0.9.2)
|
169
175
|
addressable (>= 2.3.5)
|
170
176
|
faraday (>= 0.17.3, < 3)
|
171
|
-
sqlite3 (1.6.
|
177
|
+
sqlite3 (1.6.3)
|
172
178
|
mini_portile2 (~> 2.8.0)
|
173
|
-
sqlite3 (1.6.
|
174
|
-
sqlite3 (1.6.
|
175
|
-
sqlite3 (1.6.
|
179
|
+
sqlite3 (1.6.3-arm64-darwin)
|
180
|
+
sqlite3 (1.6.3-x86_64-darwin)
|
181
|
+
sqlite3 (1.6.3-x86_64-linux)
|
176
182
|
strings-ansi (0.2.0)
|
177
183
|
timers (4.3.5)
|
178
184
|
traces (0.9.1)
|
@@ -212,6 +218,8 @@ DEPENDENCIES
|
|
212
218
|
faraday-retry (~> 2.0)
|
213
219
|
github_changelog_generator (~> 1.16)
|
214
220
|
hnswlib (~> 0.8.1)
|
221
|
+
pg (~> 1.5, >= 1.5.3)
|
222
|
+
pgvector (~> 0.2.0)
|
215
223
|
rake (~> 13.0)
|
216
224
|
rest-client (~> 2.1)
|
217
225
|
rspec (~> 3.2)
|
data/README.md
CHANGED
@@ -21,6 +21,7 @@ All of these concepts are in a module named Boxcars:
|
|
21
21
|
- Train - Given a list of Boxcars and optionally an Engine, a Train breaks down a problem into pieces for individual Boxcars to solve. The individual results are then combined until a final answer is found. ZeroShot is the only current implementation of Train (but we are adding more soon), and you can either construct it directly or use `Boxcars::train` when you want to build a Train.
|
22
22
|
- Prompt - used by an Engine to generate text results. Our Boxcars have built-in prompts, but you have the flexibility to change or augment them if you so desire.
|
23
23
|
- Engine - an entity that generates text from a Prompt. OpenAI's LLM text generator is the default Engine if no other is specified, and you can override the default engine if so desired (`Boxcar.configuration.default_engine`).
|
24
|
+
- VectorStore - a place to store and query vectors.
|
24
25
|
|
25
26
|
## Security
|
26
27
|
Currently, our system is designed for individuals who already possess administrative privileges for their project. It is likely possible to manipulate the system's prompts to carry out malicious actions, but if you already have administrative access, you can perform such actions without requiring boxcars in the first place.
|
@@ -132,7 +133,9 @@ Next Actions:
|
|
132
133
|
### More Examples
|
133
134
|
See [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/boxcars_examples.ipynb) Jupyter Notebook for more examples.
|
134
135
|
|
135
|
-
For the
|
136
|
+
For the Swagger boxcar, see [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/swagger_examples.ipynb) Jupyter Notebook.
|
137
|
+
|
138
|
+
For simple vector storage and search, see [this](https://github.com/BoxcarsAI/boxcars/blob/main/notebooks/vector_store_examples.ipynb) Jupyter Notebook.
|
136
139
|
|
137
140
|
Note, some folks that we talked to didn't know that you could run Ruby Jupyter notebooks. [You can](https://github.com/SciRuby/iruby).
|
138
141
|
|
data/boxcars.gemspec
CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.description = "You simply set an OpenAI key, give a number of Boxcars to a Train, and magic ensues when you run it."
|
13
13
|
spec.homepage = "https://github.com/BoxcarsAI/boxcars"
|
14
14
|
spec.license = "MIT"
|
15
|
-
spec.required_ruby_version = ">=
|
15
|
+
spec.required_ruby_version = ">= 3.0"
|
16
16
|
|
17
17
|
spec.metadata["homepage_uri"] = spec.homepage
|
18
18
|
spec.metadata["source_code_uri"] = spec.homepage
|
@@ -30,15 +30,12 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ["lib"]
|
32
32
|
|
33
|
-
# dev / test dependencies
|
34
|
-
spec.add_development_dependency "debug", "~> 1.1"
|
35
|
-
spec.add_development_dependency "dotenv", "~> 2.8"
|
36
|
-
spec.add_development_dependency "rspec", "~> 3.2"
|
37
|
-
|
38
33
|
# runtime dependencies
|
39
34
|
spec.add_dependency "google_search_results", "~> 2.2"
|
40
35
|
spec.add_dependency "gpt4all", "~> 0.0.4"
|
41
|
-
spec.add_dependency "
|
36
|
+
spec.add_dependency "hnswlib", "~> 0.8"
|
37
|
+
spec.add_dependency "ruby-openai", "~> 4.1"
|
38
|
+
spec.add_dependency "pgvector", "~> 0.2"
|
42
39
|
|
43
40
|
# For more information and examples about making a new gem, checkout our
|
44
41
|
# guide at: https://bundler.io/guides/creating_gem.html
|
@@ -29,7 +29,7 @@ module Boxcars
|
|
29
29
|
end
|
30
30
|
|
31
31
|
# @return Hash The additional variables for this boxcar.
|
32
|
-
def prediction_additional
|
32
|
+
def prediction_additional(_inputs)
|
33
33
|
{ model_info: model_info }.merge super
|
34
34
|
end
|
35
35
|
|
@@ -161,7 +161,7 @@ module Boxcars
|
|
161
161
|
begin
|
162
162
|
return true unless changes&.positive?
|
163
163
|
rescue StandardError => e
|
164
|
-
|
164
|
+
Boxcars.error "Error while computing change count: #{e.message}", :red
|
165
165
|
end
|
166
166
|
|
167
167
|
Boxcars.debug "#{name}(Pending Changes): #{changes}", :yellow
|
@@ -114,14 +114,14 @@ module Boxcars
|
|
114
114
|
end
|
115
115
|
|
116
116
|
# @return Hash The additional variables for this boxcar.
|
117
|
-
def prediction_additional
|
117
|
+
def prediction_additional(_inputs)
|
118
118
|
{ stop: stop, top_k: top_k }
|
119
119
|
end
|
120
120
|
|
121
121
|
# @param inputs [Hash] The inputs to the boxcar.
|
122
122
|
# @return Hash The variables for this boxcar.
|
123
123
|
def prediction_variables(inputs)
|
124
|
-
prediction_additional.merge(inputs)
|
124
|
+
prediction_additional(inputs).merge(inputs)
|
125
125
|
end
|
126
126
|
|
127
127
|
# remove backticks or triple backticks from the code
|
data/lib/boxcars/boxcar/sql.rb
CHANGED
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
4
|
+
module Boxcars
|
5
|
+
# A Boxcar that interprets a prompt and executes ruby code to do math
|
6
|
+
class VectorAnswer < EngineBoxcar
|
7
|
+
# the description of this engine boxcar
|
8
|
+
DESC = "useful for when you need to answer questions from vector search results."
|
9
|
+
|
10
|
+
attr_reader :embeddings, :vector_documents, :search_content
|
11
|
+
|
12
|
+
# @param embeddings [Hash] The vector embeddings to use for this boxcar.
|
13
|
+
# @param vector_documents [Hash] The vector documents to use for this boxcar.
|
14
|
+
# @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a train if nil.
|
15
|
+
# @param prompt [Boxcars::Prompt] The prompt to use for this boxcar. Defaults to built-in prompt.
|
16
|
+
# @param kwargs [Hash] Any other keyword arguments to pass to the parent class.
|
17
|
+
def initialize(embeddings:, vector_documents:, engine: nil, prompt: nil, **kwargs)
|
18
|
+
the_prompt = prompt || my_prompt
|
19
|
+
@embeddings = embeddings
|
20
|
+
@vector_documents = vector_documents
|
21
|
+
kwargs[:stop] ||= ["```output"]
|
22
|
+
kwargs[:name] ||= "VectorAnswer"
|
23
|
+
kwargs[:description] ||= DESC
|
24
|
+
super(engine: engine, prompt: the_prompt, **kwargs)
|
25
|
+
end
|
26
|
+
|
27
|
+
# @param inputs [Hash] The inputs to use for the prediction.
|
28
|
+
# @return Hash The additional variables for this boxcar.
|
29
|
+
def prediction_additional(inputs)
|
30
|
+
{ search_content: get_search_content(inputs[:question]) }.merge super
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# @param results [Array] The results from the vector search.
|
36
|
+
# @return [String] The content of the search results.
|
37
|
+
def get_results_content(results)
|
38
|
+
results&.map do |result|
|
39
|
+
result[:document].content
|
40
|
+
end.to_a.join("\n\n")
|
41
|
+
end
|
42
|
+
|
43
|
+
# return the content of the search results for count results
|
44
|
+
# @params question [String] The question to search for.
|
45
|
+
# @params count [Integer] The number of results to return.
|
46
|
+
# @return [String] The content of the search results.
|
47
|
+
def get_search_content(question, count: 1)
|
48
|
+
search = Boxcars::VectorSearch.new(embeddings: embeddings, vector_documents: vector_documents)
|
49
|
+
results = search.call query: question, count: count
|
50
|
+
@search_content = get_search_content(results)
|
51
|
+
end
|
52
|
+
|
53
|
+
# our template
|
54
|
+
CTEMPLATE = [
|
55
|
+
syst("You are tasked with answering a question using these possibly relevant excerpts from a large volume of text:\n" \
|
56
|
+
"```text\n%<search_content>s\n```\n\n",
|
57
|
+
"Using the above, just answer the question as if you were answering directly."),
|
58
|
+
user("%<question>s")
|
59
|
+
].freeze
|
60
|
+
|
61
|
+
# The prompt to use for the engine.
|
62
|
+
def my_prompt
|
63
|
+
@conversation ||= Conversation.new(lines: CTEMPLATE)
|
64
|
+
@my_prompt ||= ConversationPrompt.new(
|
65
|
+
conversation: @conversation,
|
66
|
+
input_variables: [:question],
|
67
|
+
other_inputs: [:search_content],
|
68
|
+
output_variables: [:answer])
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/lib/boxcars/boxcar.rb
CHANGED
@@ -156,4 +156,6 @@ require "boxcars/boxcar/wikipedia_search"
|
|
156
156
|
require "boxcars/boxcar/sql"
|
157
157
|
require "boxcars/boxcar/swagger"
|
158
158
|
require "boxcars/boxcar/active_record"
|
159
|
+
require "boxcars/vector_store"
|
159
160
|
require "boxcars/vector_search"
|
161
|
+
require "boxcars/boxcar/vector_answer"
|
@@ -43,6 +43,10 @@ module Boxcars
|
|
43
43
|
::OpenAI::Client.new(access_token: access_token, organization_id: organization_id)
|
44
44
|
end
|
45
45
|
|
46
|
+
def conversation_model?(model)
|
47
|
+
["gpt-3.5-turbo", "gpt-4"].include?(model)
|
48
|
+
end
|
49
|
+
|
46
50
|
# Get an answer from the engine.
|
47
51
|
# @param prompt [String] The prompt to use when asking the engine.
|
48
52
|
# @param openai_access_token [String] The access token to use when asking the engine.
|
@@ -51,7 +55,7 @@ module Boxcars
|
|
51
55
|
def client(prompt:, inputs: {}, openai_access_token: nil, **kwargs)
|
52
56
|
clnt = Openai.open_ai_client(openai_access_token: openai_access_token)
|
53
57
|
params = open_ai_params.merge(kwargs)
|
54
|
-
if params[:model]
|
58
|
+
if conversation_model?(params[:model])
|
55
59
|
prompt = prompt.first if prompt.is_a?(Array)
|
56
60
|
params = prompt.as_messages(inputs).merge(params)
|
57
61
|
if Boxcars.configuration.log_prompts
|
@@ -71,6 +75,9 @@ module Boxcars
|
|
71
75
|
def run(question, **kwargs)
|
72
76
|
prompt = Prompt.new(template: question)
|
73
77
|
response = client(prompt: prompt, **kwargs)
|
78
|
+
raise Error, "OpenAI: No response from API" unless response
|
79
|
+
raise Error, "OpenAI: #{response['error']}" if response["error"]
|
80
|
+
|
74
81
|
answer = response["choices"].map { |c| c.dig("message", "content") || c["text"] }.join("\n").strip
|
75
82
|
puts answer
|
76
83
|
answer
|
data/lib/boxcars/train.rb
CHANGED
@@ -69,7 +69,7 @@ module Boxcars
|
|
69
69
|
# @return [Boxcars::Action] Action specifying what boxcar to use.
|
70
70
|
def plan(intermediate_steps, **kwargs)
|
71
71
|
thoughts = construct_scratchpad(intermediate_steps)
|
72
|
-
full_inputs = prediction_additional.merge(kwargs).merge(agent_scratchpad: thoughts)
|
72
|
+
full_inputs = prediction_additional(kwargs).merge(kwargs).merge(agent_scratchpad: thoughts)
|
73
73
|
action = get_next_action(full_inputs)
|
74
74
|
return TrainFinish.new({ output: action.boxcar_input }, log: action.log) if action.boxcar == finish_boxcar_name
|
75
75
|
|
@@ -3,8 +3,72 @@
|
|
3
3
|
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
4
4
|
module Boxcars
|
5
5
|
# For Boxcars that use an engine to do their work.
|
6
|
-
class VectorSearch
|
7
|
-
|
6
|
+
class VectorSearch
|
7
|
+
def initialize(params)
|
8
|
+
@vector_documents = params[:vector_documents]
|
9
|
+
@embedding_tool = params[:embedding_tool] || :openai
|
10
|
+
@vector_search_instance = vector_search_instance
|
11
|
+
@openai_connection = params[:openai_connection] || default_connection(openai_access_token: params[:openai_access_token])
|
12
|
+
end
|
13
|
+
|
14
|
+
def call(query:, count: 1)
|
15
|
+
validate_query(query)
|
16
|
+
query_vector = convert_query_to_vector(query)
|
17
|
+
@vector_search_instance.call(query_vector: query_vector, count: count)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
attr_reader :vector_documents, :embedding_tool, :openai_connection
|
23
|
+
|
24
|
+
def vector_search_instance
|
25
|
+
case vector_documents[:type]
|
26
|
+
when :hnswlib
|
27
|
+
Boxcars::VectorStore::Hnswlib::Search.new(
|
28
|
+
vector_documents: vector_documents
|
29
|
+
)
|
30
|
+
when :in_memory
|
31
|
+
Boxcars::VectorStore::InMemory::Search.new(
|
32
|
+
vector_documents: vector_documents
|
33
|
+
)
|
34
|
+
when :pgvector
|
35
|
+
Boxcars::VectorStore::Pgvector::Search.new(
|
36
|
+
vector_documents: vector_documents
|
37
|
+
)
|
38
|
+
else
|
39
|
+
raise_argument_error('Unsupported vector store provided')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def default_connection(openai_access_token: nil)
|
44
|
+
Openai.open_ai_client(openai_access_token: openai_access_token)
|
45
|
+
end
|
46
|
+
|
47
|
+
def validate_query(query)
|
48
|
+
raise_argument_error('query must be a string') unless query.is_a?(String)
|
49
|
+
raise_argument_error('query must not be empty') if query.empty?
|
50
|
+
end
|
51
|
+
|
52
|
+
def convert_query_to_vector(query)
|
53
|
+
tool = embeddings_method(embedding_tool)
|
54
|
+
res = tool[:klass].call(
|
55
|
+
texts: [query], client: tool[:client]
|
56
|
+
).first
|
57
|
+
res[:embedding]
|
58
|
+
end
|
59
|
+
|
60
|
+
def embeddings_method(embedding_tool)
|
61
|
+
case embedding_tool
|
62
|
+
when :openai
|
63
|
+
{ klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_connection }
|
64
|
+
when :tensorflow
|
65
|
+
{ klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def raise_argument_error(message)
|
70
|
+
raise ::Boxcars::ArgumentError, message
|
71
|
+
end
|
8
72
|
end
|
9
73
|
end
|
10
74
|
|
@@ -3,10 +3,11 @@
|
|
3
3
|
module Boxcars
|
4
4
|
module VectorStore
|
5
5
|
class Document
|
6
|
-
attr_accessor :
|
6
|
+
attr_accessor :content, :metadata, :embedding
|
7
7
|
|
8
8
|
def initialize(fields = {})
|
9
|
-
@
|
9
|
+
@content = fields[:content] || ""
|
10
|
+
@embedding = fields[:embedding] || []
|
10
11
|
@metadata = fields[:metadata] || {}
|
11
12
|
end
|
12
13
|
end
|
@@ -7,8 +7,6 @@ module Boxcars
|
|
7
7
|
class EmbedViaOpenAI
|
8
8
|
include VectorStore
|
9
9
|
|
10
|
-
attr_accessor :texts, :client, :model
|
11
|
-
|
12
10
|
def initialize(texts:, client:, model: 'text-embedding-ada-002')
|
13
11
|
validate_params(texts, client)
|
14
12
|
@texts = texts
|
@@ -28,6 +26,8 @@ module Boxcars
|
|
28
26
|
|
29
27
|
private
|
30
28
|
|
29
|
+
attr_accessor :texts, :client, :model
|
30
|
+
|
31
31
|
def validate_params(texts, client)
|
32
32
|
raise_error 'texts must be an array of strings' unless texts.is_a?(Array) && texts.all? { |text| text.is_a?(String) }
|
33
33
|
raise_error 'openai_connection must be an OpenAI::Client' unless client.is_a?(OpenAI::Client)
|