boxcars 0.2.12 → 0.2.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69712266f9506d71ed3ad1fdcbfeef5a389bdbb7157d88e3f703f9a9b3ad8323
4
- data.tar.gz: d7a5d0796d2963b737dc018c644042fe1e744ec7bd230f581367baf84df60f76
3
+ metadata.gz: 74f14f8575e4670d2be6196c5196d41dd9728b5a44a0d4e199dfb705dfc77ed5
4
+ data.tar.gz: 06f2e8178f9696831870b5d8d5ea40bda8ba74a2fcc27283849f49124c51a06b
5
5
  SHA512:
6
- metadata.gz: 85876f5e1053bb8100795020c33da778a06668f9e3be856a8689d90d13728cef73e437ee6d5c0888b4a5483f698ee8288c061573a93fdff93559080e525c4254
7
- data.tar.gz: 99e15b3fe0c5d5277c5ed123e5569bca1f1ddfca3a1b3ec054504b855bc7a005d6eb9a8f7ba71989d16ded297959fa09cddf7b31879ff37df78df5dfb21b3240
6
+ metadata.gz: 9ff6e759f3d942f859de85763ffe9bc0ccf5636914d894205f78cffc99abb1b27dea47463f0fc968eba6746c055f796c95884d5e421486e06374fb0519eb8c63
7
+ data.tar.gz: 03bf42b1fbd6dac1eff4734bd2443a4a2a9f7cb931c99a2fe3f9453f5a23f0853b4eb26c817e1757a16629617eff4704f276b99fb165689ef6016ace86c2fb56
data/Gemfile CHANGED
@@ -5,6 +5,10 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in boxcars.gemspec
6
6
  gemspec
7
7
 
8
+ gem "debug", "~> 1.1"
9
+
10
+ gem "dotenv", "~> 2.8"
11
+
8
12
  gem "rake", "~> 13.0"
9
13
 
10
14
  gem "sqlite3", "~> 1.6"
data/Gemfile.lock CHANGED
@@ -1,10 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- boxcars (0.2.12)
4
+ boxcars (0.2.13)
5
5
  google_search_results (~> 2.2)
6
6
  gpt4all (~> 0.0.4)
7
- ruby-openai (~> 4.0)
7
+ hnswlib (~> 0.8)
8
+ pgvector (~> 0.2)
9
+ ruby-openai (~> 4.1)
8
10
 
9
11
  GEM
10
12
  remote: https://rubygems.org/
@@ -142,7 +144,7 @@ GEM
142
144
  diff-lcs (>= 1.2.0, < 2.0)
143
145
  rspec-support (~> 3.12.0)
144
146
  rspec-support (3.12.0)
145
- rubocop (1.50.2)
147
+ rubocop (1.51.0)
146
148
  json (~> 2.3)
147
149
  parallel (~> 1.10)
148
150
  parser (>= 3.2.0.0)
@@ -156,7 +158,7 @@ GEM
156
158
  parser (>= 3.2.1.0)
157
159
  rubocop-capybara (2.18.0)
158
160
  rubocop (~> 1.41)
159
- rubocop-factory_bot (2.22.0)
161
+ rubocop-factory_bot (2.23.1)
160
162
  rubocop (~> 1.33)
161
163
  rubocop-rake (0.6.0)
162
164
  rubocop (~> 1.0)
@@ -164,7 +166,7 @@ GEM
164
166
  rubocop (~> 1.33)
165
167
  rubocop-capybara (~> 2.17)
166
168
  rubocop-factory_bot (~> 2.22)
167
- ruby-openai (4.0.0)
169
+ ruby-openai (4.1.0)
168
170
  faraday (>= 1)
169
171
  faraday-multipart (>= 1)
170
172
  ruby-progressbar (1.13.0)
@@ -172,11 +174,11 @@ GEM
172
174
  sawyer (0.9.2)
173
175
  addressable (>= 2.3.5)
174
176
  faraday (>= 0.17.3, < 3)
175
- sqlite3 (1.6.2)
177
+ sqlite3 (1.6.3)
176
178
  mini_portile2 (~> 2.8.0)
177
- sqlite3 (1.6.2-arm64-darwin)
178
- sqlite3 (1.6.2-x86_64-darwin)
179
- sqlite3 (1.6.2-x86_64-linux)
179
+ sqlite3 (1.6.3-arm64-darwin)
180
+ sqlite3 (1.6.3-x86_64-darwin)
181
+ sqlite3 (1.6.3-x86_64-linux)
180
182
  strings-ansi (0.2.0)
181
183
  timers (4.3.5)
182
184
  traces (0.9.1)
data/boxcars.gemspec CHANGED
@@ -30,15 +30,12 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ["lib"]
32
32
 
33
- # dev / test dependencies
34
- spec.add_development_dependency "debug", "~> 1.1"
35
- spec.add_development_dependency "dotenv", "~> 2.8"
36
- spec.add_development_dependency "rspec", "~> 3.2"
37
-
38
33
  # runtime dependencies
39
34
  spec.add_dependency "google_search_results", "~> 2.2"
40
35
  spec.add_dependency "gpt4all", "~> 0.0.4"
41
- spec.add_dependency "ruby-openai", "~> 4.0"
36
+ spec.add_dependency "hnswlib", "~> 0.8"
37
+ spec.add_dependency "ruby-openai", "~> 4.1"
38
+ spec.add_dependency "pgvector", "~> 0.2"
42
39
 
43
40
  # For more information and examples about making a new gem, checkout our
44
41
  # guide at: https://bundler.io/guides/creating_gem.html
@@ -29,7 +29,7 @@ module Boxcars
29
29
  end
30
30
 
31
31
  # @return Hash The additional variables for this boxcar.
32
- def prediction_additional
32
+ def prediction_additional(_inputs)
33
33
  { model_info: model_info }.merge super
34
34
  end
35
35
 
@@ -114,14 +114,14 @@ module Boxcars
114
114
  end
115
115
 
116
116
  # @return Hash The additional variables for this boxcar.
117
- def prediction_additional
117
+ def prediction_additional(_inputs)
118
118
  { stop: stop, top_k: top_k }
119
119
  end
120
120
 
121
121
  # @param inputs [Hash] The inputs to the boxcar.
122
122
  # @return Hash The variables for this boxcar.
123
123
  def prediction_variables(inputs)
124
- prediction_additional.merge(inputs)
124
+ prediction_additional(inputs).merge(inputs)
125
125
  end
126
126
 
127
127
  # remove backticks or triple backticks from the code
@@ -26,7 +26,7 @@ module Boxcars
26
26
  end
27
27
 
28
28
  # @return Hash The additional variables for this boxcar.
29
- def prediction_additional
29
+ def prediction_additional(_inputs)
30
30
  { schema: schema, dialect: dialect }.merge super
31
31
  end
32
32
 
@@ -25,7 +25,7 @@ module Boxcars
25
25
  end
26
26
 
27
27
  # @return Hash The additional variables for this boxcar.
28
- def prediction_additional
28
+ def prediction_additional(_inputs)
29
29
  { swagger_url: swagger_url, context: context }.merge super
30
30
  end
31
31
 
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Boxcars is a framework for running a series of tools to get an answer to a question.
4
+ module Boxcars
5
+ # A Boxcar that interprets a prompt and executes ruby code to do math
6
+ class VectorAnswer < EngineBoxcar
7
+ # the description of this engine boxcar
8
+ DESC = "useful for when you need to answer questions from vector search results."
9
+
10
+ attr_reader :embeddings, :vector_documents, :search_content
11
+
12
+ # @param embeddings [Hash] The vector embeddings to use for this boxcar.
13
+ # @param vector_documents [Hash] The vector documents to use for this boxcar.
14
+ # @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a train if nil.
15
+ # @param prompt [Boxcars::Prompt] The prompt to use for this boxcar. Defaults to built-in prompt.
16
+ # @param kwargs [Hash] Any other keyword arguments to pass to the parent class.
17
+ def initialize(embeddings:, vector_documents:, engine: nil, prompt: nil, **kwargs)
18
+ the_prompt = prompt || my_prompt
19
+ @embeddings = embeddings
20
+ @vector_documents = vector_documents
21
+ kwargs[:stop] ||= ["```output"]
22
+ kwargs[:name] ||= "VectorAnswer"
23
+ kwargs[:description] ||= DESC
24
+ super(engine: engine, prompt: the_prompt, **kwargs)
25
+ end
26
+
27
+ # @param inputs [Hash] The inputs to use for the prediction.
28
+ # @return Hash The additional variables for this boxcar.
29
+ def prediction_additional(inputs)
30
+ { search_content: get_search_content(inputs[:question]) }.merge super
31
+ end
32
+
33
+ private
34
+
35
+ # @param results [Array] The results from the vector search.
36
+ # @return [String] The content of the search results.
37
+ def get_results_content(results)
38
+ results&.map do |result|
39
+ result[:document].content
40
+ end.to_a.join("\n\n")
41
+ end
42
+
43
+ # return the content of the search results for count results
44
+ # @params question [String] The question to search for.
45
+ # @params count [Integer] The number of results to return.
46
+ # @return [String] The content of the search results.
47
+ def get_search_content(question, count: 1)
48
+ search = Boxcars::VectorSearch.new(embeddings: embeddings, vector_documents: vector_documents)
49
+ results = search.call query: question, count: count
50
+ @search_content = get_search_content(results)
51
+ end
52
+
53
+ # our template
54
+ CTEMPLATE = [
55
+ syst("You are tasked with answering a question using these possibly relevant excerpts from a large volume of text:\n" \
56
+ "```text\n%<search_content>s\n```\n\n",
57
+ "Using the above, just answer the question as if you were answering directly."),
58
+ user("%<question>s")
59
+ ].freeze
60
+
61
+ # The prompt to use for the engine.
62
+ def my_prompt
63
+ @conversation ||= Conversation.new(lines: CTEMPLATE)
64
+ @my_prompt ||= ConversationPrompt.new(
65
+ conversation: @conversation,
66
+ input_variables: [:question],
67
+ other_inputs: [:search_content],
68
+ output_variables: [:answer])
69
+ end
70
+ end
71
+ end
@@ -158,3 +158,4 @@ require "boxcars/boxcar/swagger"
158
158
  require "boxcars/boxcar/active_record"
159
159
  require "boxcars/vector_store"
160
160
  require "boxcars/vector_search"
161
+ require "boxcars/boxcar/vector_answer"
@@ -19,7 +19,7 @@ module Boxcars
19
19
  end
20
20
 
21
21
  # @return Hash The additional variables for this boxcar.
22
- def prediction_additional
22
+ def prediction_additional(_inputs)
23
23
  { boxcar_names: boxcar_names, boxcar_descriptions: boxcar_descriptions }.merge super
24
24
  end
25
25
 
data/lib/boxcars/train.rb CHANGED
@@ -69,7 +69,7 @@ module Boxcars
69
69
  # @return [Boxcars::Action] Action specifying what boxcar to use.
70
70
  def plan(intermediate_steps, **kwargs)
71
71
  thoughts = construct_scratchpad(intermediate_steps)
72
- full_inputs = prediction_additional.merge(kwargs).merge(agent_scratchpad: thoughts)
72
+ full_inputs = prediction_additional(kwargs).merge(kwargs).merge(agent_scratchpad: thoughts)
73
73
  action = get_next_action(full_inputs)
74
74
  return TrainFinish.new({ output: action.boxcar_input }, log: action.log) if action.boxcar == finish_boxcar_name
75
75
 
@@ -8,7 +8,7 @@ module Boxcars
8
8
  @vector_documents = params[:vector_documents]
9
9
  @embedding_tool = params[:embedding_tool] || :openai
10
10
  @vector_search_instance = vector_search_instance
11
- @openai_connection = params[:openai_connection] || default_connection(openai_access_token: openai_access_token)
11
+ @openai_connection = params[:openai_connection] || default_connection(openai_access_token: params[:openai_access_token])
12
12
  end
13
13
 
14
14
  def call(query:, count: 1)
@@ -22,7 +22,7 @@ module Boxcars
22
22
  validate_params(@training_data_path, @index_file_path, split_chunk_size)
23
23
 
24
24
  @json_doc_file_path = absolute_json_doc_file_path(@index_file_path, params[:json_doc_file_path])
25
- @force_rebuild = params[:force_rebuild] || true
25
+ @force_rebuild = params.key?(:force_rebuild) ? params[:force_rebuild] : true
26
26
  @hnsw_vectors = []
27
27
  end
28
28
 
@@ -32,9 +32,13 @@ module Boxcars
32
32
  else
33
33
  puts "Building Hnswlib vector store..."
34
34
  data = load_data_files(training_data_path)
35
+ Boxcars.debug("Loaded #{data.length} files from #{training_data_path}")
35
36
  texts = split_text_into_chunks(data)
37
+ Boxcars.debug("Split #{data.length} files into #{texts.length} chunks")
36
38
  vectors = generate_vectors(texts)
39
+ Boxcars.debug("Generated #{vectors.length} vectors")
37
40
  add_vectors(vectors, texts)
41
+ Boxcars.debug("Added #{vectors.length} vectors to vector store")
38
42
  save_vector_store
39
43
 
40
44
  {
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Boxcars
4
4
  # The current version of the gem.
5
- VERSION = "0.2.12"
5
+ VERSION = "0.2.13"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: boxcars
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Sullivan
@@ -9,92 +9,78 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2023-05-22 00:00:00.000000000 Z
12
+ date: 2023-05-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: debug
16
- requirement: !ruby/object:Gem::Requirement
17
- requirements:
18
- - - "~>"
19
- - !ruby/object:Gem::Version
20
- version: '1.1'
21
- type: :development
22
- prerelease: false
23
- version_requirements: !ruby/object:Gem::Requirement
24
- requirements:
25
- - - "~>"
26
- - !ruby/object:Gem::Version
27
- version: '1.1'
28
- - !ruby/object:Gem::Dependency
29
- name: dotenv
15
+ name: google_search_results
30
16
  requirement: !ruby/object:Gem::Requirement
31
17
  requirements:
32
18
  - - "~>"
33
19
  - !ruby/object:Gem::Version
34
- version: '2.8'
35
- type: :development
20
+ version: '2.2'
21
+ type: :runtime
36
22
  prerelease: false
37
23
  version_requirements: !ruby/object:Gem::Requirement
38
24
  requirements:
39
25
  - - "~>"
40
26
  - !ruby/object:Gem::Version
41
- version: '2.8'
27
+ version: '2.2'
42
28
  - !ruby/object:Gem::Dependency
43
- name: rspec
29
+ name: gpt4all
44
30
  requirement: !ruby/object:Gem::Requirement
45
31
  requirements:
46
32
  - - "~>"
47
33
  - !ruby/object:Gem::Version
48
- version: '3.2'
49
- type: :development
34
+ version: 0.0.4
35
+ type: :runtime
50
36
  prerelease: false
51
37
  version_requirements: !ruby/object:Gem::Requirement
52
38
  requirements:
53
39
  - - "~>"
54
40
  - !ruby/object:Gem::Version
55
- version: '3.2'
41
+ version: 0.0.4
56
42
  - !ruby/object:Gem::Dependency
57
- name: google_search_results
43
+ name: hnswlib
58
44
  requirement: !ruby/object:Gem::Requirement
59
45
  requirements:
60
46
  - - "~>"
61
47
  - !ruby/object:Gem::Version
62
- version: '2.2'
48
+ version: '0.8'
63
49
  type: :runtime
64
50
  prerelease: false
65
51
  version_requirements: !ruby/object:Gem::Requirement
66
52
  requirements:
67
53
  - - "~>"
68
54
  - !ruby/object:Gem::Version
69
- version: '2.2'
55
+ version: '0.8'
70
56
  - !ruby/object:Gem::Dependency
71
- name: gpt4all
57
+ name: ruby-openai
72
58
  requirement: !ruby/object:Gem::Requirement
73
59
  requirements:
74
60
  - - "~>"
75
61
  - !ruby/object:Gem::Version
76
- version: 0.0.4
62
+ version: '4.1'
77
63
  type: :runtime
78
64
  prerelease: false
79
65
  version_requirements: !ruby/object:Gem::Requirement
80
66
  requirements:
81
67
  - - "~>"
82
68
  - !ruby/object:Gem::Version
83
- version: 0.0.4
69
+ version: '4.1'
84
70
  - !ruby/object:Gem::Dependency
85
- name: ruby-openai
71
+ name: pgvector
86
72
  requirement: !ruby/object:Gem::Requirement
87
73
  requirements:
88
74
  - - "~>"
89
75
  - !ruby/object:Gem::Version
90
- version: '4.0'
76
+ version: '0.2'
91
77
  type: :runtime
92
78
  prerelease: false
93
79
  version_requirements: !ruby/object:Gem::Requirement
94
80
  requirements:
95
81
  - - "~>"
96
82
  - !ruby/object:Gem::Version
97
- version: '4.0'
83
+ version: '0.2'
98
84
  description: You simply set an OpenAI key, give a number of Boxcars to a Train, and
99
85
  magic ensues when you run it.
100
86
  email:
@@ -124,6 +110,7 @@ files:
124
110
  - lib/boxcars/boxcar/google_search.rb
125
111
  - lib/boxcars/boxcar/sql.rb
126
112
  - lib/boxcars/boxcar/swagger.rb
113
+ - lib/boxcars/boxcar/vector_answer.rb
127
114
  - lib/boxcars/boxcar/wikipedia_search.rb
128
115
  - lib/boxcars/conversation.rb
129
116
  - lib/boxcars/conversation_prompt.rb
@@ -180,7 +167,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
167
  - !ruby/object:Gem::Version
181
168
  version: '0'
182
169
  requirements: []
183
- rubygems_version: 3.4.10
170
+ rubygems_version: 3.2.32
184
171
  signing_key:
185
172
  specification_version: 4
186
173
  summary: Boxcars is a gem that enables you to create new systems with AI composability.