boxcars 0.2.12 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -0
- data/Gemfile.lock +11 -9
- data/boxcars.gemspec +3 -6
- data/lib/boxcars/boxcar/active_record.rb +1 -1
- data/lib/boxcars/boxcar/engine_boxcar.rb +2 -2
- data/lib/boxcars/boxcar/sql.rb +1 -1
- data/lib/boxcars/boxcar/swagger.rb +1 -1
- data/lib/boxcars/boxcar/vector_answer.rb +71 -0
- data/lib/boxcars/boxcar.rb +1 -0
- data/lib/boxcars/train/zero_shot.rb +1 -1
- data/lib/boxcars/train.rb +1 -1
- data/lib/boxcars/vector_search.rb +1 -1
- data/lib/boxcars/vector_store/hnswlib/build_from_files.rb +5 -1
- data/lib/boxcars/version.rb +1 -1
- metadata +21 -34
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 74f14f8575e4670d2be6196c5196d41dd9728b5a44a0d4e199dfb705dfc77ed5
|
|
4
|
+
data.tar.gz: 06f2e8178f9696831870b5d8d5ea40bda8ba74a2fcc27283849f49124c51a06b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9ff6e759f3d942f859de85763ffe9bc0ccf5636914d894205f78cffc99abb1b27dea47463f0fc968eba6746c055f796c95884d5e421486e06374fb0519eb8c63
|
|
7
|
+
data.tar.gz: 03bf42b1fbd6dac1eff4734bd2443a4a2a9f7cb931c99a2fe3f9453f5a23f0853b4eb26c817e1757a16629617eff4704f276b99fb165689ef6016ace86c2fb56
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
boxcars (0.2.
|
|
4
|
+
boxcars (0.2.13)
|
|
5
5
|
google_search_results (~> 2.2)
|
|
6
6
|
gpt4all (~> 0.0.4)
|
|
7
|
-
|
|
7
|
+
hnswlib (~> 0.8)
|
|
8
|
+
pgvector (~> 0.2)
|
|
9
|
+
ruby-openai (~> 4.1)
|
|
8
10
|
|
|
9
11
|
GEM
|
|
10
12
|
remote: https://rubygems.org/
|
|
@@ -142,7 +144,7 @@ GEM
|
|
|
142
144
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
143
145
|
rspec-support (~> 3.12.0)
|
|
144
146
|
rspec-support (3.12.0)
|
|
145
|
-
rubocop (1.
|
|
147
|
+
rubocop (1.51.0)
|
|
146
148
|
json (~> 2.3)
|
|
147
149
|
parallel (~> 1.10)
|
|
148
150
|
parser (>= 3.2.0.0)
|
|
@@ -156,7 +158,7 @@ GEM
|
|
|
156
158
|
parser (>= 3.2.1.0)
|
|
157
159
|
rubocop-capybara (2.18.0)
|
|
158
160
|
rubocop (~> 1.41)
|
|
159
|
-
rubocop-factory_bot (2.
|
|
161
|
+
rubocop-factory_bot (2.23.1)
|
|
160
162
|
rubocop (~> 1.33)
|
|
161
163
|
rubocop-rake (0.6.0)
|
|
162
164
|
rubocop (~> 1.0)
|
|
@@ -164,7 +166,7 @@ GEM
|
|
|
164
166
|
rubocop (~> 1.33)
|
|
165
167
|
rubocop-capybara (~> 2.17)
|
|
166
168
|
rubocop-factory_bot (~> 2.22)
|
|
167
|
-
ruby-openai (4.
|
|
169
|
+
ruby-openai (4.1.0)
|
|
168
170
|
faraday (>= 1)
|
|
169
171
|
faraday-multipart (>= 1)
|
|
170
172
|
ruby-progressbar (1.13.0)
|
|
@@ -172,11 +174,11 @@ GEM
|
|
|
172
174
|
sawyer (0.9.2)
|
|
173
175
|
addressable (>= 2.3.5)
|
|
174
176
|
faraday (>= 0.17.3, < 3)
|
|
175
|
-
sqlite3 (1.6.
|
|
177
|
+
sqlite3 (1.6.3)
|
|
176
178
|
mini_portile2 (~> 2.8.0)
|
|
177
|
-
sqlite3 (1.6.
|
|
178
|
-
sqlite3 (1.6.
|
|
179
|
-
sqlite3 (1.6.
|
|
179
|
+
sqlite3 (1.6.3-arm64-darwin)
|
|
180
|
+
sqlite3 (1.6.3-x86_64-darwin)
|
|
181
|
+
sqlite3 (1.6.3-x86_64-linux)
|
|
180
182
|
strings-ansi (0.2.0)
|
|
181
183
|
timers (4.3.5)
|
|
182
184
|
traces (0.9.1)
|
data/boxcars.gemspec
CHANGED
|
@@ -30,15 +30,12 @@ Gem::Specification.new do |spec|
|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
31
31
|
spec.require_paths = ["lib"]
|
|
32
32
|
|
|
33
|
-
# dev / test dependencies
|
|
34
|
-
spec.add_development_dependency "debug", "~> 1.1"
|
|
35
|
-
spec.add_development_dependency "dotenv", "~> 2.8"
|
|
36
|
-
spec.add_development_dependency "rspec", "~> 3.2"
|
|
37
|
-
|
|
38
33
|
# runtime dependencies
|
|
39
34
|
spec.add_dependency "google_search_results", "~> 2.2"
|
|
40
35
|
spec.add_dependency "gpt4all", "~> 0.0.4"
|
|
41
|
-
spec.add_dependency "
|
|
36
|
+
spec.add_dependency "hnswlib", "~> 0.8"
|
|
37
|
+
spec.add_dependency "ruby-openai", "~> 4.1"
|
|
38
|
+
spec.add_dependency "pgvector", "~> 0.2"
|
|
42
39
|
|
|
43
40
|
# For more information and examples about making a new gem, checkout our
|
|
44
41
|
# guide at: https://bundler.io/guides/creating_gem.html
|
|
@@ -114,14 +114,14 @@ module Boxcars
|
|
|
114
114
|
end
|
|
115
115
|
|
|
116
116
|
# @return Hash The additional variables for this boxcar.
|
|
117
|
-
def prediction_additional
|
|
117
|
+
def prediction_additional(_inputs)
|
|
118
118
|
{ stop: stop, top_k: top_k }
|
|
119
119
|
end
|
|
120
120
|
|
|
121
121
|
# @param inputs [Hash] The inputs to the boxcar.
|
|
122
122
|
# @return Hash The variables for this boxcar.
|
|
123
123
|
def prediction_variables(inputs)
|
|
124
|
-
prediction_additional.merge(inputs)
|
|
124
|
+
prediction_additional(inputs).merge(inputs)
|
|
125
125
|
end
|
|
126
126
|
|
|
127
127
|
# remove backticks or triple backticks from the code
|
data/lib/boxcars/boxcar/sql.rb
CHANGED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
|
4
|
+
module Boxcars
|
|
5
|
+
# A Boxcar that interprets a prompt and executes ruby code to do math
|
|
6
|
+
class VectorAnswer < EngineBoxcar
|
|
7
|
+
# the description of this engine boxcar
|
|
8
|
+
DESC = "useful for when you need to answer questions from vector search results."
|
|
9
|
+
|
|
10
|
+
attr_reader :embeddings, :vector_documents, :search_content
|
|
11
|
+
|
|
12
|
+
# @param embeddings [Hash] The vector embeddings to use for this boxcar.
|
|
13
|
+
# @param vector_documents [Hash] The vector documents to use for this boxcar.
|
|
14
|
+
# @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a train if nil.
|
|
15
|
+
# @param prompt [Boxcars::Prompt] The prompt to use for this boxcar. Defaults to built-in prompt.
|
|
16
|
+
# @param kwargs [Hash] Any other keyword arguments to pass to the parent class.
|
|
17
|
+
def initialize(embeddings:, vector_documents:, engine: nil, prompt: nil, **kwargs)
|
|
18
|
+
the_prompt = prompt || my_prompt
|
|
19
|
+
@embeddings = embeddings
|
|
20
|
+
@vector_documents = vector_documents
|
|
21
|
+
kwargs[:stop] ||= ["```output"]
|
|
22
|
+
kwargs[:name] ||= "VectorAnswer"
|
|
23
|
+
kwargs[:description] ||= DESC
|
|
24
|
+
super(engine: engine, prompt: the_prompt, **kwargs)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# @param inputs [Hash] The inputs to use for the prediction.
|
|
28
|
+
# @return Hash The additional variables for this boxcar.
|
|
29
|
+
def prediction_additional(inputs)
|
|
30
|
+
{ search_content: get_search_content(inputs[:question]) }.merge super
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
# @param results [Array] The results from the vector search.
|
|
36
|
+
# @return [String] The content of the search results.
|
|
37
|
+
def get_results_content(results)
|
|
38
|
+
results&.map do |result|
|
|
39
|
+
result[:document].content
|
|
40
|
+
end.to_a.join("\n\n")
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# return the content of the search results for count results
|
|
44
|
+
# @params question [String] The question to search for.
|
|
45
|
+
# @params count [Integer] The number of results to return.
|
|
46
|
+
# @return [String] The content of the search results.
|
|
47
|
+
def get_search_content(question, count: 1)
|
|
48
|
+
search = Boxcars::VectorSearch.new(embeddings: embeddings, vector_documents: vector_documents)
|
|
49
|
+
results = search.call query: question, count: count
|
|
50
|
+
@search_content = get_search_content(results)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# our template
|
|
54
|
+
CTEMPLATE = [
|
|
55
|
+
syst("You are tasked with answering a question using these possibly relevant excerpts from a large volume of text:\n" \
|
|
56
|
+
"```text\n%<search_content>s\n```\n\n",
|
|
57
|
+
"Using the above, just answer the question as if you were answering directly."),
|
|
58
|
+
user("%<question>s")
|
|
59
|
+
].freeze
|
|
60
|
+
|
|
61
|
+
# The prompt to use for the engine.
|
|
62
|
+
def my_prompt
|
|
63
|
+
@conversation ||= Conversation.new(lines: CTEMPLATE)
|
|
64
|
+
@my_prompt ||= ConversationPrompt.new(
|
|
65
|
+
conversation: @conversation,
|
|
66
|
+
input_variables: [:question],
|
|
67
|
+
other_inputs: [:search_content],
|
|
68
|
+
output_variables: [:answer])
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
data/lib/boxcars/boxcar.rb
CHANGED
data/lib/boxcars/train.rb
CHANGED
|
@@ -69,7 +69,7 @@ module Boxcars
|
|
|
69
69
|
# @return [Boxcars::Action] Action specifying what boxcar to use.
|
|
70
70
|
def plan(intermediate_steps, **kwargs)
|
|
71
71
|
thoughts = construct_scratchpad(intermediate_steps)
|
|
72
|
-
full_inputs = prediction_additional.merge(kwargs).merge(agent_scratchpad: thoughts)
|
|
72
|
+
full_inputs = prediction_additional(kwargs).merge(kwargs).merge(agent_scratchpad: thoughts)
|
|
73
73
|
action = get_next_action(full_inputs)
|
|
74
74
|
return TrainFinish.new({ output: action.boxcar_input }, log: action.log) if action.boxcar == finish_boxcar_name
|
|
75
75
|
|
|
@@ -8,7 +8,7 @@ module Boxcars
|
|
|
8
8
|
@vector_documents = params[:vector_documents]
|
|
9
9
|
@embedding_tool = params[:embedding_tool] || :openai
|
|
10
10
|
@vector_search_instance = vector_search_instance
|
|
11
|
-
@openai_connection = params[:openai_connection] || default_connection(openai_access_token: openai_access_token)
|
|
11
|
+
@openai_connection = params[:openai_connection] || default_connection(openai_access_token: params[:openai_access_token])
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def call(query:, count: 1)
|
|
@@ -22,7 +22,7 @@ module Boxcars
|
|
|
22
22
|
validate_params(@training_data_path, @index_file_path, split_chunk_size)
|
|
23
23
|
|
|
24
24
|
@json_doc_file_path = absolute_json_doc_file_path(@index_file_path, params[:json_doc_file_path])
|
|
25
|
-
@force_rebuild = params[:force_rebuild]
|
|
25
|
+
@force_rebuild = params.key?(:force_rebuild) ? params[:force_rebuild] : true
|
|
26
26
|
@hnsw_vectors = []
|
|
27
27
|
end
|
|
28
28
|
|
|
@@ -32,9 +32,13 @@ module Boxcars
|
|
|
32
32
|
else
|
|
33
33
|
puts "Building Hnswlib vector store..."
|
|
34
34
|
data = load_data_files(training_data_path)
|
|
35
|
+
Boxcars.debug("Loaded #{data.length} files from #{training_data_path}")
|
|
35
36
|
texts = split_text_into_chunks(data)
|
|
37
|
+
Boxcars.debug("Split #{data.length} files into #{texts.length} chunks")
|
|
36
38
|
vectors = generate_vectors(texts)
|
|
39
|
+
Boxcars.debug("Generated #{vectors.length} vectors")
|
|
37
40
|
add_vectors(vectors, texts)
|
|
41
|
+
Boxcars.debug("Added #{vectors.length} vectors to vector store")
|
|
38
42
|
save_vector_store
|
|
39
43
|
|
|
40
44
|
{
|
data/lib/boxcars/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: boxcars
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.13
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Francis Sullivan
|
|
@@ -9,92 +9,78 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: exe
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2023-05-
|
|
12
|
+
date: 2023-05-24 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
|
-
name:
|
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
|
17
|
-
requirements:
|
|
18
|
-
- - "~>"
|
|
19
|
-
- !ruby/object:Gem::Version
|
|
20
|
-
version: '1.1'
|
|
21
|
-
type: :development
|
|
22
|
-
prerelease: false
|
|
23
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
24
|
-
requirements:
|
|
25
|
-
- - "~>"
|
|
26
|
-
- !ruby/object:Gem::Version
|
|
27
|
-
version: '1.1'
|
|
28
|
-
- !ruby/object:Gem::Dependency
|
|
29
|
-
name: dotenv
|
|
15
|
+
name: google_search_results
|
|
30
16
|
requirement: !ruby/object:Gem::Requirement
|
|
31
17
|
requirements:
|
|
32
18
|
- - "~>"
|
|
33
19
|
- !ruby/object:Gem::Version
|
|
34
|
-
version: '2.
|
|
35
|
-
type: :
|
|
20
|
+
version: '2.2'
|
|
21
|
+
type: :runtime
|
|
36
22
|
prerelease: false
|
|
37
23
|
version_requirements: !ruby/object:Gem::Requirement
|
|
38
24
|
requirements:
|
|
39
25
|
- - "~>"
|
|
40
26
|
- !ruby/object:Gem::Version
|
|
41
|
-
version: '2.
|
|
27
|
+
version: '2.2'
|
|
42
28
|
- !ruby/object:Gem::Dependency
|
|
43
|
-
name:
|
|
29
|
+
name: gpt4all
|
|
44
30
|
requirement: !ruby/object:Gem::Requirement
|
|
45
31
|
requirements:
|
|
46
32
|
- - "~>"
|
|
47
33
|
- !ruby/object:Gem::Version
|
|
48
|
-
version:
|
|
49
|
-
type: :
|
|
34
|
+
version: 0.0.4
|
|
35
|
+
type: :runtime
|
|
50
36
|
prerelease: false
|
|
51
37
|
version_requirements: !ruby/object:Gem::Requirement
|
|
52
38
|
requirements:
|
|
53
39
|
- - "~>"
|
|
54
40
|
- !ruby/object:Gem::Version
|
|
55
|
-
version:
|
|
41
|
+
version: 0.0.4
|
|
56
42
|
- !ruby/object:Gem::Dependency
|
|
57
|
-
name:
|
|
43
|
+
name: hnswlib
|
|
58
44
|
requirement: !ruby/object:Gem::Requirement
|
|
59
45
|
requirements:
|
|
60
46
|
- - "~>"
|
|
61
47
|
- !ruby/object:Gem::Version
|
|
62
|
-
version: '
|
|
48
|
+
version: '0.8'
|
|
63
49
|
type: :runtime
|
|
64
50
|
prerelease: false
|
|
65
51
|
version_requirements: !ruby/object:Gem::Requirement
|
|
66
52
|
requirements:
|
|
67
53
|
- - "~>"
|
|
68
54
|
- !ruby/object:Gem::Version
|
|
69
|
-
version: '
|
|
55
|
+
version: '0.8'
|
|
70
56
|
- !ruby/object:Gem::Dependency
|
|
71
|
-
name:
|
|
57
|
+
name: ruby-openai
|
|
72
58
|
requirement: !ruby/object:Gem::Requirement
|
|
73
59
|
requirements:
|
|
74
60
|
- - "~>"
|
|
75
61
|
- !ruby/object:Gem::Version
|
|
76
|
-
version:
|
|
62
|
+
version: '4.1'
|
|
77
63
|
type: :runtime
|
|
78
64
|
prerelease: false
|
|
79
65
|
version_requirements: !ruby/object:Gem::Requirement
|
|
80
66
|
requirements:
|
|
81
67
|
- - "~>"
|
|
82
68
|
- !ruby/object:Gem::Version
|
|
83
|
-
version:
|
|
69
|
+
version: '4.1'
|
|
84
70
|
- !ruby/object:Gem::Dependency
|
|
85
|
-
name:
|
|
71
|
+
name: pgvector
|
|
86
72
|
requirement: !ruby/object:Gem::Requirement
|
|
87
73
|
requirements:
|
|
88
74
|
- - "~>"
|
|
89
75
|
- !ruby/object:Gem::Version
|
|
90
|
-
version: '
|
|
76
|
+
version: '0.2'
|
|
91
77
|
type: :runtime
|
|
92
78
|
prerelease: false
|
|
93
79
|
version_requirements: !ruby/object:Gem::Requirement
|
|
94
80
|
requirements:
|
|
95
81
|
- - "~>"
|
|
96
82
|
- !ruby/object:Gem::Version
|
|
97
|
-
version: '
|
|
83
|
+
version: '0.2'
|
|
98
84
|
description: You simply set an OpenAI key, give a number of Boxcars to a Train, and
|
|
99
85
|
magic ensues when you run it.
|
|
100
86
|
email:
|
|
@@ -124,6 +110,7 @@ files:
|
|
|
124
110
|
- lib/boxcars/boxcar/google_search.rb
|
|
125
111
|
- lib/boxcars/boxcar/sql.rb
|
|
126
112
|
- lib/boxcars/boxcar/swagger.rb
|
|
113
|
+
- lib/boxcars/boxcar/vector_answer.rb
|
|
127
114
|
- lib/boxcars/boxcar/wikipedia_search.rb
|
|
128
115
|
- lib/boxcars/conversation.rb
|
|
129
116
|
- lib/boxcars/conversation_prompt.rb
|
|
@@ -180,7 +167,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
180
167
|
- !ruby/object:Gem::Version
|
|
181
168
|
version: '0'
|
|
182
169
|
requirements: []
|
|
183
|
-
rubygems_version: 3.
|
|
170
|
+
rubygems_version: 3.2.32
|
|
184
171
|
signing_key:
|
|
185
172
|
specification_version: 4
|
|
186
173
|
summary: Boxcars is a gem that enables you to create new systems with AI composability.
|