boxcars 0.2.12 → 0.2.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -0
- data/Gemfile.lock +11 -9
- data/boxcars.gemspec +3 -6
- data/lib/boxcars/boxcar/active_record.rb +1 -1
- data/lib/boxcars/boxcar/engine_boxcar.rb +2 -2
- data/lib/boxcars/boxcar/sql.rb +1 -1
- data/lib/boxcars/boxcar/swagger.rb +1 -1
- data/lib/boxcars/boxcar/vector_answer.rb +71 -0
- data/lib/boxcars/boxcar.rb +1 -0
- data/lib/boxcars/train/zero_shot.rb +1 -1
- data/lib/boxcars/train.rb +1 -1
- data/lib/boxcars/vector_search.rb +1 -1
- data/lib/boxcars/vector_store/hnswlib/build_from_files.rb +5 -1
- data/lib/boxcars/version.rb +1 -1
- metadata +21 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74f14f8575e4670d2be6196c5196d41dd9728b5a44a0d4e199dfb705dfc77ed5
|
4
|
+
data.tar.gz: 06f2e8178f9696831870b5d8d5ea40bda8ba74a2fcc27283849f49124c51a06b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ff6e759f3d942f859de85763ffe9bc0ccf5636914d894205f78cffc99abb1b27dea47463f0fc968eba6746c055f796c95884d5e421486e06374fb0519eb8c63
|
7
|
+
data.tar.gz: 03bf42b1fbd6dac1eff4734bd2443a4a2a9f7cb931c99a2fe3f9453f5a23f0853b4eb26c817e1757a16629617eff4704f276b99fb165689ef6016ace86c2fb56
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
boxcars (0.2.
|
4
|
+
boxcars (0.2.13)
|
5
5
|
google_search_results (~> 2.2)
|
6
6
|
gpt4all (~> 0.0.4)
|
7
|
-
|
7
|
+
hnswlib (~> 0.8)
|
8
|
+
pgvector (~> 0.2)
|
9
|
+
ruby-openai (~> 4.1)
|
8
10
|
|
9
11
|
GEM
|
10
12
|
remote: https://rubygems.org/
|
@@ -142,7 +144,7 @@ GEM
|
|
142
144
|
diff-lcs (>= 1.2.0, < 2.0)
|
143
145
|
rspec-support (~> 3.12.0)
|
144
146
|
rspec-support (3.12.0)
|
145
|
-
rubocop (1.
|
147
|
+
rubocop (1.51.0)
|
146
148
|
json (~> 2.3)
|
147
149
|
parallel (~> 1.10)
|
148
150
|
parser (>= 3.2.0.0)
|
@@ -156,7 +158,7 @@ GEM
|
|
156
158
|
parser (>= 3.2.1.0)
|
157
159
|
rubocop-capybara (2.18.0)
|
158
160
|
rubocop (~> 1.41)
|
159
|
-
rubocop-factory_bot (2.
|
161
|
+
rubocop-factory_bot (2.23.1)
|
160
162
|
rubocop (~> 1.33)
|
161
163
|
rubocop-rake (0.6.0)
|
162
164
|
rubocop (~> 1.0)
|
@@ -164,7 +166,7 @@ GEM
|
|
164
166
|
rubocop (~> 1.33)
|
165
167
|
rubocop-capybara (~> 2.17)
|
166
168
|
rubocop-factory_bot (~> 2.22)
|
167
|
-
ruby-openai (4.
|
169
|
+
ruby-openai (4.1.0)
|
168
170
|
faraday (>= 1)
|
169
171
|
faraday-multipart (>= 1)
|
170
172
|
ruby-progressbar (1.13.0)
|
@@ -172,11 +174,11 @@ GEM
|
|
172
174
|
sawyer (0.9.2)
|
173
175
|
addressable (>= 2.3.5)
|
174
176
|
faraday (>= 0.17.3, < 3)
|
175
|
-
sqlite3 (1.6.
|
177
|
+
sqlite3 (1.6.3)
|
176
178
|
mini_portile2 (~> 2.8.0)
|
177
|
-
sqlite3 (1.6.
|
178
|
-
sqlite3 (1.6.
|
179
|
-
sqlite3 (1.6.
|
179
|
+
sqlite3 (1.6.3-arm64-darwin)
|
180
|
+
sqlite3 (1.6.3-x86_64-darwin)
|
181
|
+
sqlite3 (1.6.3-x86_64-linux)
|
180
182
|
strings-ansi (0.2.0)
|
181
183
|
timers (4.3.5)
|
182
184
|
traces (0.9.1)
|
data/boxcars.gemspec
CHANGED
@@ -30,15 +30,12 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ["lib"]
|
32
32
|
|
33
|
-
# dev / test dependencies
|
34
|
-
spec.add_development_dependency "debug", "~> 1.1"
|
35
|
-
spec.add_development_dependency "dotenv", "~> 2.8"
|
36
|
-
spec.add_development_dependency "rspec", "~> 3.2"
|
37
|
-
|
38
33
|
# runtime dependencies
|
39
34
|
spec.add_dependency "google_search_results", "~> 2.2"
|
40
35
|
spec.add_dependency "gpt4all", "~> 0.0.4"
|
41
|
-
spec.add_dependency "
|
36
|
+
spec.add_dependency "hnswlib", "~> 0.8"
|
37
|
+
spec.add_dependency "ruby-openai", "~> 4.1"
|
38
|
+
spec.add_dependency "pgvector", "~> 0.2"
|
42
39
|
|
43
40
|
# For more information and examples about making a new gem, checkout our
|
44
41
|
# guide at: https://bundler.io/guides/creating_gem.html
|
@@ -114,14 +114,14 @@ module Boxcars
|
|
114
114
|
end
|
115
115
|
|
116
116
|
# @return Hash The additional variables for this boxcar.
|
117
|
-
def prediction_additional
|
117
|
+
def prediction_additional(_inputs)
|
118
118
|
{ stop: stop, top_k: top_k }
|
119
119
|
end
|
120
120
|
|
121
121
|
# @param inputs [Hash] The inputs to the boxcar.
|
122
122
|
# @return Hash The variables for this boxcar.
|
123
123
|
def prediction_variables(inputs)
|
124
|
-
prediction_additional.merge(inputs)
|
124
|
+
prediction_additional(inputs).merge(inputs)
|
125
125
|
end
|
126
126
|
|
127
127
|
# remove backticks or triple backticks from the code
|
data/lib/boxcars/boxcar/sql.rb
CHANGED
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
4
|
+
module Boxcars
|
5
|
+
# A Boxcar that interprets a prompt and executes ruby code to do math
|
6
|
+
class VectorAnswer < EngineBoxcar
|
7
|
+
# the description of this engine boxcar
|
8
|
+
DESC = "useful for when you need to answer questions from vector search results."
|
9
|
+
|
10
|
+
attr_reader :embeddings, :vector_documents, :search_content
|
11
|
+
|
12
|
+
# @param embeddings [Hash] The vector embeddings to use for this boxcar.
|
13
|
+
# @param vector_documents [Hash] The vector documents to use for this boxcar.
|
14
|
+
# @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a train if nil.
|
15
|
+
# @param prompt [Boxcars::Prompt] The prompt to use for this boxcar. Defaults to built-in prompt.
|
16
|
+
# @param kwargs [Hash] Any other keyword arguments to pass to the parent class.
|
17
|
+
def initialize(embeddings:, vector_documents:, engine: nil, prompt: nil, **kwargs)
|
18
|
+
the_prompt = prompt || my_prompt
|
19
|
+
@embeddings = embeddings
|
20
|
+
@vector_documents = vector_documents
|
21
|
+
kwargs[:stop] ||= ["```output"]
|
22
|
+
kwargs[:name] ||= "VectorAnswer"
|
23
|
+
kwargs[:description] ||= DESC
|
24
|
+
super(engine: engine, prompt: the_prompt, **kwargs)
|
25
|
+
end
|
26
|
+
|
27
|
+
# @param inputs [Hash] The inputs to use for the prediction.
|
28
|
+
# @return Hash The additional variables for this boxcar.
|
29
|
+
def prediction_additional(inputs)
|
30
|
+
{ search_content: get_search_content(inputs[:question]) }.merge super
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# @param results [Array] The results from the vector search.
|
36
|
+
# @return [String] The content of the search results.
|
37
|
+
def get_results_content(results)
|
38
|
+
results&.map do |result|
|
39
|
+
result[:document].content
|
40
|
+
end.to_a.join("\n\n")
|
41
|
+
end
|
42
|
+
|
43
|
+
# return the content of the search results for count results
|
44
|
+
# @params question [String] The question to search for.
|
45
|
+
# @params count [Integer] The number of results to return.
|
46
|
+
# @return [String] The content of the search results.
|
47
|
+
def get_search_content(question, count: 1)
|
48
|
+
search = Boxcars::VectorSearch.new(embeddings: embeddings, vector_documents: vector_documents)
|
49
|
+
results = search.call query: question, count: count
|
50
|
+
@search_content = get_search_content(results)
|
51
|
+
end
|
52
|
+
|
53
|
+
# our template
|
54
|
+
CTEMPLATE = [
|
55
|
+
syst("You are tasked with answering a question using these possibly relevant excerpts from a large volume of text:\n" \
|
56
|
+
"```text\n%<search_content>s\n```\n\n",
|
57
|
+
"Using the above, just answer the question as if you were answering directly."),
|
58
|
+
user("%<question>s")
|
59
|
+
].freeze
|
60
|
+
|
61
|
+
# The prompt to use for the engine.
|
62
|
+
def my_prompt
|
63
|
+
@conversation ||= Conversation.new(lines: CTEMPLATE)
|
64
|
+
@my_prompt ||= ConversationPrompt.new(
|
65
|
+
conversation: @conversation,
|
66
|
+
input_variables: [:question],
|
67
|
+
other_inputs: [:search_content],
|
68
|
+
output_variables: [:answer])
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/lib/boxcars/boxcar.rb
CHANGED
data/lib/boxcars/train.rb
CHANGED
@@ -69,7 +69,7 @@ module Boxcars
|
|
69
69
|
# @return [Boxcars::Action] Action specifying what boxcar to use.
|
70
70
|
def plan(intermediate_steps, **kwargs)
|
71
71
|
thoughts = construct_scratchpad(intermediate_steps)
|
72
|
-
full_inputs = prediction_additional.merge(kwargs).merge(agent_scratchpad: thoughts)
|
72
|
+
full_inputs = prediction_additional(kwargs).merge(kwargs).merge(agent_scratchpad: thoughts)
|
73
73
|
action = get_next_action(full_inputs)
|
74
74
|
return TrainFinish.new({ output: action.boxcar_input }, log: action.log) if action.boxcar == finish_boxcar_name
|
75
75
|
|
@@ -8,7 +8,7 @@ module Boxcars
|
|
8
8
|
@vector_documents = params[:vector_documents]
|
9
9
|
@embedding_tool = params[:embedding_tool] || :openai
|
10
10
|
@vector_search_instance = vector_search_instance
|
11
|
-
@openai_connection = params[:openai_connection] || default_connection(openai_access_token: openai_access_token)
|
11
|
+
@openai_connection = params[:openai_connection] || default_connection(openai_access_token: params[:openai_access_token])
|
12
12
|
end
|
13
13
|
|
14
14
|
def call(query:, count: 1)
|
@@ -22,7 +22,7 @@ module Boxcars
|
|
22
22
|
validate_params(@training_data_path, @index_file_path, split_chunk_size)
|
23
23
|
|
24
24
|
@json_doc_file_path = absolute_json_doc_file_path(@index_file_path, params[:json_doc_file_path])
|
25
|
-
@force_rebuild = params[:force_rebuild]
|
25
|
+
@force_rebuild = params.key?(:force_rebuild) ? params[:force_rebuild] : true
|
26
26
|
@hnsw_vectors = []
|
27
27
|
end
|
28
28
|
|
@@ -32,9 +32,13 @@ module Boxcars
|
|
32
32
|
else
|
33
33
|
puts "Building Hnswlib vector store..."
|
34
34
|
data = load_data_files(training_data_path)
|
35
|
+
Boxcars.debug("Loaded #{data.length} files from #{training_data_path}")
|
35
36
|
texts = split_text_into_chunks(data)
|
37
|
+
Boxcars.debug("Split #{data.length} files into #{texts.length} chunks")
|
36
38
|
vectors = generate_vectors(texts)
|
39
|
+
Boxcars.debug("Generated #{vectors.length} vectors")
|
37
40
|
add_vectors(vectors, texts)
|
41
|
+
Boxcars.debug("Added #{vectors.length} vectors to vector store")
|
38
42
|
save_vector_store
|
39
43
|
|
40
44
|
{
|
data/lib/boxcars/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: boxcars
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francis Sullivan
|
@@ -9,92 +9,78 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-05-
|
12
|
+
date: 2023-05-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
requirements:
|
18
|
-
- - "~>"
|
19
|
-
- !ruby/object:Gem::Version
|
20
|
-
version: '1.1'
|
21
|
-
type: :development
|
22
|
-
prerelease: false
|
23
|
-
version_requirements: !ruby/object:Gem::Requirement
|
24
|
-
requirements:
|
25
|
-
- - "~>"
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
version: '1.1'
|
28
|
-
- !ruby/object:Gem::Dependency
|
29
|
-
name: dotenv
|
15
|
+
name: google_search_results
|
30
16
|
requirement: !ruby/object:Gem::Requirement
|
31
17
|
requirements:
|
32
18
|
- - "~>"
|
33
19
|
- !ruby/object:Gem::Version
|
34
|
-
version: '2.
|
35
|
-
type: :
|
20
|
+
version: '2.2'
|
21
|
+
type: :runtime
|
36
22
|
prerelease: false
|
37
23
|
version_requirements: !ruby/object:Gem::Requirement
|
38
24
|
requirements:
|
39
25
|
- - "~>"
|
40
26
|
- !ruby/object:Gem::Version
|
41
|
-
version: '2.
|
27
|
+
version: '2.2'
|
42
28
|
- !ruby/object:Gem::Dependency
|
43
|
-
name:
|
29
|
+
name: gpt4all
|
44
30
|
requirement: !ruby/object:Gem::Requirement
|
45
31
|
requirements:
|
46
32
|
- - "~>"
|
47
33
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
49
|
-
type: :
|
34
|
+
version: 0.0.4
|
35
|
+
type: :runtime
|
50
36
|
prerelease: false
|
51
37
|
version_requirements: !ruby/object:Gem::Requirement
|
52
38
|
requirements:
|
53
39
|
- - "~>"
|
54
40
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
41
|
+
version: 0.0.4
|
56
42
|
- !ruby/object:Gem::Dependency
|
57
|
-
name:
|
43
|
+
name: hnswlib
|
58
44
|
requirement: !ruby/object:Gem::Requirement
|
59
45
|
requirements:
|
60
46
|
- - "~>"
|
61
47
|
- !ruby/object:Gem::Version
|
62
|
-
version: '
|
48
|
+
version: '0.8'
|
63
49
|
type: :runtime
|
64
50
|
prerelease: false
|
65
51
|
version_requirements: !ruby/object:Gem::Requirement
|
66
52
|
requirements:
|
67
53
|
- - "~>"
|
68
54
|
- !ruby/object:Gem::Version
|
69
|
-
version: '
|
55
|
+
version: '0.8'
|
70
56
|
- !ruby/object:Gem::Dependency
|
71
|
-
name:
|
57
|
+
name: ruby-openai
|
72
58
|
requirement: !ruby/object:Gem::Requirement
|
73
59
|
requirements:
|
74
60
|
- - "~>"
|
75
61
|
- !ruby/object:Gem::Version
|
76
|
-
version:
|
62
|
+
version: '4.1'
|
77
63
|
type: :runtime
|
78
64
|
prerelease: false
|
79
65
|
version_requirements: !ruby/object:Gem::Requirement
|
80
66
|
requirements:
|
81
67
|
- - "~>"
|
82
68
|
- !ruby/object:Gem::Version
|
83
|
-
version:
|
69
|
+
version: '4.1'
|
84
70
|
- !ruby/object:Gem::Dependency
|
85
|
-
name:
|
71
|
+
name: pgvector
|
86
72
|
requirement: !ruby/object:Gem::Requirement
|
87
73
|
requirements:
|
88
74
|
- - "~>"
|
89
75
|
- !ruby/object:Gem::Version
|
90
|
-
version: '
|
76
|
+
version: '0.2'
|
91
77
|
type: :runtime
|
92
78
|
prerelease: false
|
93
79
|
version_requirements: !ruby/object:Gem::Requirement
|
94
80
|
requirements:
|
95
81
|
- - "~>"
|
96
82
|
- !ruby/object:Gem::Version
|
97
|
-
version: '
|
83
|
+
version: '0.2'
|
98
84
|
description: You simply set an OpenAI key, give a number of Boxcars to a Train, and
|
99
85
|
magic ensues when you run it.
|
100
86
|
email:
|
@@ -124,6 +110,7 @@ files:
|
|
124
110
|
- lib/boxcars/boxcar/google_search.rb
|
125
111
|
- lib/boxcars/boxcar/sql.rb
|
126
112
|
- lib/boxcars/boxcar/swagger.rb
|
113
|
+
- lib/boxcars/boxcar/vector_answer.rb
|
127
114
|
- lib/boxcars/boxcar/wikipedia_search.rb
|
128
115
|
- lib/boxcars/conversation.rb
|
129
116
|
- lib/boxcars/conversation_prompt.rb
|
@@ -180,7 +167,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
180
167
|
- !ruby/object:Gem::Version
|
181
168
|
version: '0'
|
182
169
|
requirements: []
|
183
|
-
rubygems_version: 3.
|
170
|
+
rubygems_version: 3.2.32
|
184
171
|
signing_key:
|
185
172
|
specification_version: 4
|
186
173
|
summary: Boxcars is a gem that enables you to create new systems with AI composability.
|