boxcars 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69712266f9506d71ed3ad1fdcbfeef5a389bdbb7157d88e3f703f9a9b3ad8323
4
- data.tar.gz: d7a5d0796d2963b737dc018c644042fe1e744ec7bd230f581367baf84df60f76
3
+ metadata.gz: 74f14f8575e4670d2be6196c5196d41dd9728b5a44a0d4e199dfb705dfc77ed5
4
+ data.tar.gz: 06f2e8178f9696831870b5d8d5ea40bda8ba74a2fcc27283849f49124c51a06b
5
5
  SHA512:
6
- metadata.gz: 85876f5e1053bb8100795020c33da778a06668f9e3be856a8689d90d13728cef73e437ee6d5c0888b4a5483f698ee8288c061573a93fdff93559080e525c4254
7
- data.tar.gz: 99e15b3fe0c5d5277c5ed123e5569bca1f1ddfca3a1b3ec054504b855bc7a005d6eb9a8f7ba71989d16ded297959fa09cddf7b31879ff37df78df5dfb21b3240
6
+ metadata.gz: 9ff6e759f3d942f859de85763ffe9bc0ccf5636914d894205f78cffc99abb1b27dea47463f0fc968eba6746c055f796c95884d5e421486e06374fb0519eb8c63
7
+ data.tar.gz: 03bf42b1fbd6dac1eff4734bd2443a4a2a9f7cb931c99a2fe3f9453f5a23f0853b4eb26c817e1757a16629617eff4704f276b99fb165689ef6016ace86c2fb56
data/Gemfile CHANGED
@@ -5,6 +5,10 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in boxcars.gemspec
6
6
  gemspec
7
7
 
8
+ gem "debug", "~> 1.1"
9
+
10
+ gem "dotenv", "~> 2.8"
11
+
8
12
  gem "rake", "~> 13.0"
9
13
 
10
14
  gem "sqlite3", "~> 1.6"
data/Gemfile.lock CHANGED
@@ -1,10 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- boxcars (0.2.12)
4
+ boxcars (0.2.13)
5
5
  google_search_results (~> 2.2)
6
6
  gpt4all (~> 0.0.4)
7
- ruby-openai (~> 4.0)
7
+ hnswlib (~> 0.8)
8
+ pgvector (~> 0.2)
9
+ ruby-openai (~> 4.1)
8
10
 
9
11
  GEM
10
12
  remote: https://rubygems.org/
@@ -142,7 +144,7 @@ GEM
142
144
  diff-lcs (>= 1.2.0, < 2.0)
143
145
  rspec-support (~> 3.12.0)
144
146
  rspec-support (3.12.0)
145
- rubocop (1.50.2)
147
+ rubocop (1.51.0)
146
148
  json (~> 2.3)
147
149
  parallel (~> 1.10)
148
150
  parser (>= 3.2.0.0)
@@ -156,7 +158,7 @@ GEM
156
158
  parser (>= 3.2.1.0)
157
159
  rubocop-capybara (2.18.0)
158
160
  rubocop (~> 1.41)
159
- rubocop-factory_bot (2.22.0)
161
+ rubocop-factory_bot (2.23.1)
160
162
  rubocop (~> 1.33)
161
163
  rubocop-rake (0.6.0)
162
164
  rubocop (~> 1.0)
@@ -164,7 +166,7 @@ GEM
164
166
  rubocop (~> 1.33)
165
167
  rubocop-capybara (~> 2.17)
166
168
  rubocop-factory_bot (~> 2.22)
167
- ruby-openai (4.0.0)
169
+ ruby-openai (4.1.0)
168
170
  faraday (>= 1)
169
171
  faraday-multipart (>= 1)
170
172
  ruby-progressbar (1.13.0)
@@ -172,11 +174,11 @@ GEM
172
174
  sawyer (0.9.2)
173
175
  addressable (>= 2.3.5)
174
176
  faraday (>= 0.17.3, < 3)
175
- sqlite3 (1.6.2)
177
+ sqlite3 (1.6.3)
176
178
  mini_portile2 (~> 2.8.0)
177
- sqlite3 (1.6.2-arm64-darwin)
178
- sqlite3 (1.6.2-x86_64-darwin)
179
- sqlite3 (1.6.2-x86_64-linux)
179
+ sqlite3 (1.6.3-arm64-darwin)
180
+ sqlite3 (1.6.3-x86_64-darwin)
181
+ sqlite3 (1.6.3-x86_64-linux)
180
182
  strings-ansi (0.2.0)
181
183
  timers (4.3.5)
182
184
  traces (0.9.1)
data/boxcars.gemspec CHANGED
@@ -30,15 +30,12 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ["lib"]
32
32
 
33
- # dev / test dependencies
34
- spec.add_development_dependency "debug", "~> 1.1"
35
- spec.add_development_dependency "dotenv", "~> 2.8"
36
- spec.add_development_dependency "rspec", "~> 3.2"
37
-
38
33
  # runtime dependencies
39
34
  spec.add_dependency "google_search_results", "~> 2.2"
40
35
  spec.add_dependency "gpt4all", "~> 0.0.4"
41
- spec.add_dependency "ruby-openai", "~> 4.0"
36
+ spec.add_dependency "hnswlib", "~> 0.8"
37
+ spec.add_dependency "ruby-openai", "~> 4.1"
38
+ spec.add_dependency "pgvector", "~> 0.2"
42
39
 
43
40
  # For more information and examples about making a new gem, checkout our
44
41
  # guide at: https://bundler.io/guides/creating_gem.html
@@ -29,7 +29,7 @@ module Boxcars
29
29
  end
30
30
 
31
31
  # @return Hash The additional variables for this boxcar.
32
- def prediction_additional
32
+ def prediction_additional(_inputs)
33
33
  { model_info: model_info }.merge super
34
34
  end
35
35
 
@@ -114,14 +114,14 @@ module Boxcars
114
114
  end
115
115
 
116
116
  # @return Hash The additional variables for this boxcar.
117
- def prediction_additional
117
+ def prediction_additional(_inputs)
118
118
  { stop: stop, top_k: top_k }
119
119
  end
120
120
 
121
121
  # @param inputs [Hash] The inputs to the boxcar.
122
122
  # @return Hash The variables for this boxcar.
123
123
  def prediction_variables(inputs)
124
- prediction_additional.merge(inputs)
124
+ prediction_additional(inputs).merge(inputs)
125
125
  end
126
126
 
127
127
  # remove backticks or triple backticks from the code
@@ -26,7 +26,7 @@ module Boxcars
26
26
  end
27
27
 
28
28
  # @return Hash The additional variables for this boxcar.
29
- def prediction_additional
29
+ def prediction_additional(_inputs)
30
30
  { schema: schema, dialect: dialect }.merge super
31
31
  end
32
32
 
@@ -25,7 +25,7 @@ module Boxcars
25
25
  end
26
26
 
27
27
  # @return Hash The additional variables for this boxcar.
28
- def prediction_additional
28
+ def prediction_additional(_inputs)
29
29
  { swagger_url: swagger_url, context: context }.merge super
30
30
  end
31
31
 
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Boxcars is a framework for running a series of tools to get an answer to a question.
4
+ module Boxcars
5
+ # A Boxcar that interprets a prompt and executes ruby code to do math
6
+ class VectorAnswer < EngineBoxcar
7
+ # the description of this engine boxcar
8
+ DESC = "useful for when you need to answer questions from vector search results."
9
+
10
+ attr_reader :embeddings, :vector_documents, :search_content
11
+
12
+ # @param embeddings [Hash] The vector embeddings to use for this boxcar.
13
+ # @param vector_documents [Hash] The vector documents to use for this boxcar.
14
+ # @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a train if nil.
15
+ # @param prompt [Boxcars::Prompt] The prompt to use for this boxcar. Defaults to built-in prompt.
16
+ # @param kwargs [Hash] Any other keyword arguments to pass to the parent class.
17
+ def initialize(embeddings:, vector_documents:, engine: nil, prompt: nil, **kwargs)
18
+ the_prompt = prompt || my_prompt
19
+ @embeddings = embeddings
20
+ @vector_documents = vector_documents
21
+ kwargs[:stop] ||= ["```output"]
22
+ kwargs[:name] ||= "VectorAnswer"
23
+ kwargs[:description] ||= DESC
24
+ super(engine: engine, prompt: the_prompt, **kwargs)
25
+ end
26
+
27
+ # @param inputs [Hash] The inputs to use for the prediction.
28
+ # @return Hash The additional variables for this boxcar.
29
+ def prediction_additional(inputs)
30
+ { search_content: get_search_content(inputs[:question]) }.merge super
31
+ end
32
+
33
+ private
34
+
35
+ # @param results [Array] The results from the vector search.
36
+ # @return [String] The content of the search results.
37
+ def get_results_content(results)
38
+ results&.map do |result|
39
+ result[:document].content
40
+ end.to_a.join("\n\n")
41
+ end
42
+
43
+ # return the content of the search results for count results
44
+ # @params question [String] The question to search for.
45
+ # @params count [Integer] The number of results to return.
46
+ # @return [String] The content of the search results.
47
+ def get_search_content(question, count: 1)
48
+ search = Boxcars::VectorSearch.new(embeddings: embeddings, vector_documents: vector_documents)
49
+ results = search.call query: question, count: count
50
+ @search_content = get_search_content(results)
51
+ end
52
+
53
+ # our template
54
+ CTEMPLATE = [
55
+ syst("You are tasked with answering a question using these possibly relevant excerpts from a large volume of text:\n" \
56
+ "```text\n%<search_content>s\n```\n\n",
57
+ "Using the above, just answer the question as if you were answering directly."),
58
+ user("%<question>s")
59
+ ].freeze
60
+
61
+ # The prompt to use for the engine.
62
+ def my_prompt
63
+ @conversation ||= Conversation.new(lines: CTEMPLATE)
64
+ @my_prompt ||= ConversationPrompt.new(
65
+ conversation: @conversation,
66
+ input_variables: [:question],
67
+ other_inputs: [:search_content],
68
+ output_variables: [:answer])
69
+ end
70
+ end
71
+ end
@@ -158,3 +158,4 @@ require "boxcars/boxcar/swagger"
158
158
  require "boxcars/boxcar/active_record"
159
159
  require "boxcars/vector_store"
160
160
  require "boxcars/vector_search"
161
+ require "boxcars/boxcar/vector_answer"
@@ -19,7 +19,7 @@ module Boxcars
19
19
  end
20
20
 
21
21
  # @return Hash The additional variables for this boxcar.
22
- def prediction_additional
22
+ def prediction_additional(_inputs)
23
23
  { boxcar_names: boxcar_names, boxcar_descriptions: boxcar_descriptions }.merge super
24
24
  end
25
25
 
data/lib/boxcars/train.rb CHANGED
@@ -69,7 +69,7 @@ module Boxcars
69
69
  # @return [Boxcars::Action] Action specifying what boxcar to use.
70
70
  def plan(intermediate_steps, **kwargs)
71
71
  thoughts = construct_scratchpad(intermediate_steps)
72
- full_inputs = prediction_additional.merge(kwargs).merge(agent_scratchpad: thoughts)
72
+ full_inputs = prediction_additional(kwargs).merge(kwargs).merge(agent_scratchpad: thoughts)
73
73
  action = get_next_action(full_inputs)
74
74
  return TrainFinish.new({ output: action.boxcar_input }, log: action.log) if action.boxcar == finish_boxcar_name
75
75
 
@@ -8,7 +8,7 @@ module Boxcars
8
8
  @vector_documents = params[:vector_documents]
9
9
  @embedding_tool = params[:embedding_tool] || :openai
10
10
  @vector_search_instance = vector_search_instance
11
- @openai_connection = params[:openai_connection] || default_connection(openai_access_token: openai_access_token)
11
+ @openai_connection = params[:openai_connection] || default_connection(openai_access_token: params[:openai_access_token])
12
12
  end
13
13
 
14
14
  def call(query:, count: 1)
@@ -22,7 +22,7 @@ module Boxcars
22
22
  validate_params(@training_data_path, @index_file_path, split_chunk_size)
23
23
 
24
24
  @json_doc_file_path = absolute_json_doc_file_path(@index_file_path, params[:json_doc_file_path])
25
- @force_rebuild = params[:force_rebuild] || true
25
+ @force_rebuild = params.key?(:force_rebuild) ? params[:force_rebuild] : true
26
26
  @hnsw_vectors = []
27
27
  end
28
28
 
@@ -32,9 +32,13 @@ module Boxcars
32
32
  else
33
33
  puts "Building Hnswlib vector store..."
34
34
  data = load_data_files(training_data_path)
35
+ Boxcars.debug("Loaded #{data.length} files from #{training_data_path}")
35
36
  texts = split_text_into_chunks(data)
37
+ Boxcars.debug("Split #{data.length} files into #{texts.length} chunks")
36
38
  vectors = generate_vectors(texts)
39
+ Boxcars.debug("Generated #{vectors.length} vectors")
37
40
  add_vectors(vectors, texts)
41
+ Boxcars.debug("Added #{vectors.length} vectors to vector store")
38
42
  save_vector_store
39
43
 
40
44
  {
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Boxcars
4
4
  # The current version of the gem.
5
- VERSION = "0.2.12"
5
+ VERSION = "0.2.13"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: boxcars
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Sullivan
@@ -9,92 +9,78 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2023-05-22 00:00:00.000000000 Z
12
+ date: 2023-05-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: debug
16
- requirement: !ruby/object:Gem::Requirement
17
- requirements:
18
- - - "~>"
19
- - !ruby/object:Gem::Version
20
- version: '1.1'
21
- type: :development
22
- prerelease: false
23
- version_requirements: !ruby/object:Gem::Requirement
24
- requirements:
25
- - - "~>"
26
- - !ruby/object:Gem::Version
27
- version: '1.1'
28
- - !ruby/object:Gem::Dependency
29
- name: dotenv
15
+ name: google_search_results
30
16
  requirement: !ruby/object:Gem::Requirement
31
17
  requirements:
32
18
  - - "~>"
33
19
  - !ruby/object:Gem::Version
34
- version: '2.8'
35
- type: :development
20
+ version: '2.2'
21
+ type: :runtime
36
22
  prerelease: false
37
23
  version_requirements: !ruby/object:Gem::Requirement
38
24
  requirements:
39
25
  - - "~>"
40
26
  - !ruby/object:Gem::Version
41
- version: '2.8'
27
+ version: '2.2'
42
28
  - !ruby/object:Gem::Dependency
43
- name: rspec
29
+ name: gpt4all
44
30
  requirement: !ruby/object:Gem::Requirement
45
31
  requirements:
46
32
  - - "~>"
47
33
  - !ruby/object:Gem::Version
48
- version: '3.2'
49
- type: :development
34
+ version: 0.0.4
35
+ type: :runtime
50
36
  prerelease: false
51
37
  version_requirements: !ruby/object:Gem::Requirement
52
38
  requirements:
53
39
  - - "~>"
54
40
  - !ruby/object:Gem::Version
55
- version: '3.2'
41
+ version: 0.0.4
56
42
  - !ruby/object:Gem::Dependency
57
- name: google_search_results
43
+ name: hnswlib
58
44
  requirement: !ruby/object:Gem::Requirement
59
45
  requirements:
60
46
  - - "~>"
61
47
  - !ruby/object:Gem::Version
62
- version: '2.2'
48
+ version: '0.8'
63
49
  type: :runtime
64
50
  prerelease: false
65
51
  version_requirements: !ruby/object:Gem::Requirement
66
52
  requirements:
67
53
  - - "~>"
68
54
  - !ruby/object:Gem::Version
69
- version: '2.2'
55
+ version: '0.8'
70
56
  - !ruby/object:Gem::Dependency
71
- name: gpt4all
57
+ name: ruby-openai
72
58
  requirement: !ruby/object:Gem::Requirement
73
59
  requirements:
74
60
  - - "~>"
75
61
  - !ruby/object:Gem::Version
76
- version: 0.0.4
62
+ version: '4.1'
77
63
  type: :runtime
78
64
  prerelease: false
79
65
  version_requirements: !ruby/object:Gem::Requirement
80
66
  requirements:
81
67
  - - "~>"
82
68
  - !ruby/object:Gem::Version
83
- version: 0.0.4
69
+ version: '4.1'
84
70
  - !ruby/object:Gem::Dependency
85
- name: ruby-openai
71
+ name: pgvector
86
72
  requirement: !ruby/object:Gem::Requirement
87
73
  requirements:
88
74
  - - "~>"
89
75
  - !ruby/object:Gem::Version
90
- version: '4.0'
76
+ version: '0.2'
91
77
  type: :runtime
92
78
  prerelease: false
93
79
  version_requirements: !ruby/object:Gem::Requirement
94
80
  requirements:
95
81
  - - "~>"
96
82
  - !ruby/object:Gem::Version
97
- version: '4.0'
83
+ version: '0.2'
98
84
  description: You simply set an OpenAI key, give a number of Boxcars to a Train, and
99
85
  magic ensues when you run it.
100
86
  email:
@@ -124,6 +110,7 @@ files:
124
110
  - lib/boxcars/boxcar/google_search.rb
125
111
  - lib/boxcars/boxcar/sql.rb
126
112
  - lib/boxcars/boxcar/swagger.rb
113
+ - lib/boxcars/boxcar/vector_answer.rb
127
114
  - lib/boxcars/boxcar/wikipedia_search.rb
128
115
  - lib/boxcars/conversation.rb
129
116
  - lib/boxcars/conversation_prompt.rb
@@ -180,7 +167,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
167
  - !ruby/object:Gem::Version
181
168
  version: '0'
182
169
  requirements: []
183
- rubygems_version: 3.4.10
170
+ rubygems_version: 3.2.32
184
171
  signing_key:
185
172
  specification_version: 4
186
173
  summary: Boxcars is a gem that enables you to create new systems with AI composability.