boxcars 0.2.5 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +39 -1
- data/README.md +22 -19
- data/boxcars.gemspec +1 -0
- data/lib/boxcars/boxcar/active_record.rb +21 -4
- data/lib/boxcars/boxcar/embeddings/document.rb +14 -0
- data/lib/boxcars/boxcar/embeddings/embed_via_open_ai.rb +50 -0
- data/lib/boxcars/boxcar/embeddings/hnswlib/build_vector_store.rb +159 -0
- data/lib/boxcars/boxcar/embeddings/hnswlib/hnswlib_config.rb +56 -0
- data/lib/boxcars/boxcar/embeddings/hnswlib/hnswlib_search.rb +54 -0
- data/lib/boxcars/boxcar/embeddings/hnswlib/save_to_hnswlib.rb +80 -0
- data/lib/boxcars/boxcar/embeddings/similarity_search.rb +51 -0
- data/lib/boxcars/boxcar/embeddings/split_text.rb +104 -0
- data/lib/boxcars/boxcar/embeddings.rb +31 -0
- data/lib/boxcars/boxcar/engine_boxcar.rb +1 -1
- data/lib/boxcars/boxcar/google_search.rb +13 -5
- data/lib/boxcars/boxcar/sql.rb +4 -2
- data/lib/boxcars/boxcar/swagger.rb +80 -0
- data/lib/boxcars/boxcar/wikipedia_search.rb +39 -0
- data/lib/boxcars/boxcar.rb +6 -1
- data/lib/boxcars/embedding.rb +11 -0
- data/lib/boxcars/engine/gpt4all_eng.rb +56 -0
- data/lib/boxcars/engine.rb +1 -0
- data/lib/boxcars/ruby_repl.rb +1 -0
- data/lib/boxcars/version.rb +1 -1
- data/lib/boxcars.rb +1 -0
- metadata +29 -2
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'hnswlib'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
module Boxcars
|
7
|
+
module Embeddings
|
8
|
+
module Hnswlib
|
9
|
+
class HnswlibSearch
|
10
|
+
def initialize(vector_store:, options: {})
|
11
|
+
validate_params(vector_store)
|
12
|
+
@vector_store = vector_store
|
13
|
+
@json_doc_path = options[:json_doc_path]
|
14
|
+
@num_neighbors = options[:num_neighbors] || 1
|
15
|
+
end
|
16
|
+
|
17
|
+
def call(query)
|
18
|
+
search(query)
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
attr_reader :json_doc_path, :vector_store, :num_neighbors
|
24
|
+
|
25
|
+
def validate_params(vector_store)
|
26
|
+
raise_error 'vector_store must be an Hnswlib::HierarchicalNSW' unless vector_store.is_a?(::Hnswlib::HierarchicalNSW)
|
27
|
+
end
|
28
|
+
|
29
|
+
def search(query)
|
30
|
+
raw_results = vector_store.search_knn(query, num_neighbors)
|
31
|
+
raw_results.map { |doc_id, distance| lookup_embedding2(doc_id, distance) }.compact
|
32
|
+
end
|
33
|
+
|
34
|
+
def lookup_embedding2(doc_id, distance)
|
35
|
+
embedding_data = parsed_data.find { |embedding| embedding[:doc_id] == doc_id }
|
36
|
+
return unless embedding_data
|
37
|
+
|
38
|
+
{ document: embedding_data[:document], distance: distance }
|
39
|
+
end
|
40
|
+
|
41
|
+
def parsed_data
|
42
|
+
@parsed_data ||= JSON.parse(
|
43
|
+
File.read(json_doc_path),
|
44
|
+
symbolize_names: true
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
def raise_error(message)
|
49
|
+
raise ArgumentError, message
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'hnswlib'
|
4
|
+
require 'json'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
module Boxcars
|
8
|
+
module Embeddings
|
9
|
+
module Hnswlib
|
10
|
+
class SaveToHnswlib
|
11
|
+
include Embeddings
|
12
|
+
|
13
|
+
# @param document_embeddings [Array] An array of hashes containing the document id, document text, and embedding.
|
14
|
+
# @param index_file_path [String] The path to the index file.
|
15
|
+
# @param hnswlib_config [Boxcars::Embeddings::Hnswlib::Config] The config object for the hnswlib index.
|
16
|
+
# @option json_doc_file_path [String] Optional. The path to the json file containing the document text.
|
17
|
+
def initialize(document_embeddings:, index_file_path:, hnswlib_config:, json_doc_file_path: nil)
|
18
|
+
@document_embeddings = document_embeddings
|
19
|
+
@index_file_path = index_file_path
|
20
|
+
@json_doc_file_path = json_doc_file_path || index_file_path.gsub(/\.bin$/, '.json')
|
21
|
+
|
22
|
+
@hnswlib_config = hnswlib_config
|
23
|
+
@index = ::Hnswlib::HnswIndex.new(
|
24
|
+
n_features: hnswlib_config.dim,
|
25
|
+
max_item: hnswlib_config.max_item,
|
26
|
+
metric: hnswlib_config.metric
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
def call
|
31
|
+
validate_params
|
32
|
+
document_texts = []
|
33
|
+
|
34
|
+
document_embeddings.each do |embedding|
|
35
|
+
index.add_item(embedding[:doc_id], embedding[:embedding])
|
36
|
+
|
37
|
+
document_texts << { doc_id: embedding[:doc_id], embedding: embedding[:embedding], document: embedding[:document] }
|
38
|
+
end
|
39
|
+
|
40
|
+
write_files(index, document_texts)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def write_files(index, document_texts)
|
46
|
+
FileUtils.mkdir_p(File.dirname(json_doc_file_path))
|
47
|
+
File.write(json_doc_file_path, document_texts.to_json)
|
48
|
+
|
49
|
+
FileUtils.mkdir_p(File.dirname(index_file_path))
|
50
|
+
File.write("#{File.dirname(index_file_path)}/hnswlib_config.json", hnswlib_config.to_json)
|
51
|
+
|
52
|
+
index.save(index_file_path)
|
53
|
+
end
|
54
|
+
|
55
|
+
attr_reader :index, :document_embeddings, :index_file_path, :json_doc_file_path, :hnswlib_config
|
56
|
+
|
57
|
+
def validate_params
|
58
|
+
raise_error("document_embeddings must be an array") unless document_embeddings.is_a?(Array)
|
59
|
+
raise_error("dim must be an integer") unless hnswlib_config.dim.is_a?(Integer)
|
60
|
+
raise_error("index_file_path must be a string") unless index_file_path.is_a?(String)
|
61
|
+
|
62
|
+
[index_file_path, json_doc_file_path].each do |path|
|
63
|
+
check_parent_directory(path)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def check_parent_directory(path)
|
68
|
+
return unless path
|
69
|
+
|
70
|
+
parent_dir = File.dirname(path)
|
71
|
+
raise_error('parent directory must exist') unless File.directory?(parent_dir)
|
72
|
+
end
|
73
|
+
|
74
|
+
def raise_error(message)
|
75
|
+
raise ::Boxcars::ValueError, message
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'hnswlib'
|
4
|
+
|
5
|
+
module Boxcars
|
6
|
+
module Embeddings
|
7
|
+
class SimilaritySearch
|
8
|
+
def initialize(embeddings:, vector_store:, openai_connection:)
|
9
|
+
@embeddings = embeddings
|
10
|
+
@vector_store = vector_store
|
11
|
+
@similarity_search_instance = create_similarity_search_instance
|
12
|
+
@openai_connection = openai_connection
|
13
|
+
end
|
14
|
+
|
15
|
+
def call(query:)
|
16
|
+
validate_query(query)
|
17
|
+
query_vector = convert_query_to_vector(query)
|
18
|
+
@similarity_search_instance.call(query_vector)
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
attr_reader :embeddings, :vector_store, :openai_connection
|
24
|
+
|
25
|
+
def validate_query(query)
|
26
|
+
raise_error 'query must be a string' unless query.is_a?(String)
|
27
|
+
raise_error 'query must not be empty' if query.empty?
|
28
|
+
end
|
29
|
+
|
30
|
+
def convert_query_to_vector(query)
|
31
|
+
Boxcars::Embeddings::EmbedViaOpenAI.call(texts: [query], openai_connection: openai_connection).first[:embedding]
|
32
|
+
end
|
33
|
+
|
34
|
+
def create_similarity_search_instance
|
35
|
+
case vector_store
|
36
|
+
when ::Hnswlib::HierarchicalNSW
|
37
|
+
Boxcars::Embeddings::Hnswlib::HnswlibSearch.new(
|
38
|
+
vector_store: vector_store,
|
39
|
+
options: { json_doc_path: embeddings, num_neighbors: 2 }
|
40
|
+
)
|
41
|
+
else
|
42
|
+
raise_error 'Unsupported vector store provided'
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def raise_error(message)
|
47
|
+
raise ArgumentError, message
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Boxcars
|
4
|
+
module Embeddings
|
5
|
+
# Split a text into chunks of a given size.
|
6
|
+
class SplitText
|
7
|
+
include Embeddings
|
8
|
+
|
9
|
+
attr_reader :separator, :chunk_size, :chunk_overlap, :text
|
10
|
+
|
11
|
+
# @param separator [String] The string to use to split the text.
|
12
|
+
# @param chunk_size [Integer] The size of each chunk.
|
13
|
+
# @param chunk_overlap [Integer] The amount of overlap between chunks.
|
14
|
+
# @param text [String] The text to split.
|
15
|
+
def initialize(separator: "Search", chunk_size: 7, chunk_overlap: 3, text: "")
|
16
|
+
# require 'debugger'; debugger
|
17
|
+
validate_params(separator, chunk_size, chunk_overlap, text)
|
18
|
+
|
19
|
+
@separator = separator
|
20
|
+
@chunk_size = chunk_size
|
21
|
+
@chunk_overlap = chunk_overlap
|
22
|
+
@text = text
|
23
|
+
end
|
24
|
+
|
25
|
+
def call
|
26
|
+
splits = text.split(separator)
|
27
|
+
merged_splits = merge_splits(splits, separator)
|
28
|
+
|
29
|
+
merged_splits&.sort
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def validate_params(separator, chunk_size, chunk_overlap, text)
|
35
|
+
raise_error("separator must be a string") unless separator.is_a?(String)
|
36
|
+
raise_error("chunk_size must be an integer") unless chunk_size.is_a?(Integer)
|
37
|
+
raise_error("chunk_overlap must be an integer") unless chunk_overlap.is_a?(Integer)
|
38
|
+
raise_error("text must be a string") unless text.is_a?(String)
|
39
|
+
raise_error("chunk_overlap must be less than chunk_size") if chunk_overlap >= chunk_size
|
40
|
+
end
|
41
|
+
|
42
|
+
def raise_error(message)
|
43
|
+
raise ::Boxcars::ValueError, message
|
44
|
+
end
|
45
|
+
|
46
|
+
def merge_splits(splits, separator)
|
47
|
+
merged_splits = []
|
48
|
+
current_doc = []
|
49
|
+
total = 0
|
50
|
+
|
51
|
+
splits.each do |split|
|
52
|
+
split_len = split.length
|
53
|
+
total = process_split(total, split_len, current_doc, merged_splits, separator)
|
54
|
+
current_doc << split
|
55
|
+
total += split_len
|
56
|
+
end
|
57
|
+
|
58
|
+
add_remaining_doc(current_doc, merged_splits, separator)
|
59
|
+
merged_splits
|
60
|
+
end
|
61
|
+
|
62
|
+
def process_split(total, split_len, current_doc, merged_splits, separator)
|
63
|
+
if total + split_len >= chunk_size
|
64
|
+
warn_if_chunk_too_large(total)
|
65
|
+
total = handle_large_chunk(total, split_len, current_doc, merged_splits, separator)
|
66
|
+
end
|
67
|
+
total
|
68
|
+
end
|
69
|
+
|
70
|
+
def warn_if_chunk_too_large(total)
|
71
|
+
return unless total > chunk_size
|
72
|
+
|
73
|
+
puts "Created a chunk of size #{total}, which is longer than the specified #{chunk_size}"
|
74
|
+
end
|
75
|
+
|
76
|
+
def handle_large_chunk(total, split_len, current_doc, merged_splits, separator)
|
77
|
+
if current_doc.length.positive?
|
78
|
+
doc = join_docs(current_doc, separator)
|
79
|
+
merged_splits << doc if doc
|
80
|
+
total = remove_overlap(total, split_len, current_doc)
|
81
|
+
end
|
82
|
+
total
|
83
|
+
end
|
84
|
+
|
85
|
+
def remove_overlap(total, split_len, current_doc)
|
86
|
+
while total > chunk_overlap || (total + split_len > chunk_size && total.positive?)
|
87
|
+
total -= current_doc[0].length
|
88
|
+
current_doc.shift
|
89
|
+
end
|
90
|
+
total
|
91
|
+
end
|
92
|
+
|
93
|
+
def add_remaining_doc(current_doc, merged_splits, separator)
|
94
|
+
doc = join_docs(current_doc, separator)
|
95
|
+
merged_splits << doc if doc
|
96
|
+
end
|
97
|
+
|
98
|
+
def join_docs(docs, separator)
|
99
|
+
text = docs.join(separator).strip
|
100
|
+
text.empty? ? nil : text
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
4
|
+
module Boxcars
|
5
|
+
module Embeddings
|
6
|
+
module ClassMethods
|
7
|
+
EmbeddingsError = Class.new(StandardError)
|
8
|
+
|
9
|
+
def call(*args, **kw_args)
|
10
|
+
new(*args, **kw_args).call
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.included(base)
|
15
|
+
base.extend(ClassMethods)
|
16
|
+
|
17
|
+
class << base
|
18
|
+
private :new
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
require_relative "embeddings/document"
|
25
|
+
require_relative "embeddings/embed_via_open_ai"
|
26
|
+
require_relative "embeddings/split_text"
|
27
|
+
require_relative "embeddings/similarity_search"
|
28
|
+
require_relative "embeddings/hnswlib/hnswlib_config"
|
29
|
+
require_relative "embeddings/hnswlib/save_to_hnswlib"
|
30
|
+
require_relative "embeddings/hnswlib/build_vector_store"
|
31
|
+
require_relative "embeddings/hnswlib/hnswlib_search"
|
@@ -121,7 +121,7 @@ module Boxcars
|
|
121
121
|
# @param inputs [Hash] The inputs to the boxcar.
|
122
122
|
# @return Hash The variables for this boxcar.
|
123
123
|
def prediction_variables(inputs)
|
124
|
-
|
124
|
+
prediction_additional.merge(inputs)
|
125
125
|
end
|
126
126
|
|
127
127
|
# remove backticks or triple backticks from the code
|
@@ -34,11 +34,11 @@ module Boxcars
|
|
34
34
|
# @param question [String] The question to ask Google.
|
35
35
|
# @return [String] The location found.
|
36
36
|
def get_location(question)
|
37
|
+
Boxcars.debug "Question: #{question}", :yellow
|
37
38
|
search = ::GoogleSearch.new(q: question, limit: 3)
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
rv
|
39
|
+
answer = search.get_location
|
40
|
+
Boxcars.debug "Answer: #{answer}", :yellow, style: :bold
|
41
|
+
answer
|
42
42
|
end
|
43
43
|
|
44
44
|
private
|
@@ -57,7 +57,15 @@ module Boxcars
|
|
57
57
|
raise Error, "Got error from SerpAPI: {res[:error]}" if res[:error]
|
58
58
|
|
59
59
|
ANSWER_LOCATIONS.each do |path|
|
60
|
-
|
60
|
+
next unless res.dig(*path)
|
61
|
+
|
62
|
+
Boxcars.debug("Found SERP answer at #{path}", :cyan)
|
63
|
+
path_link = path.dup
|
64
|
+
last_word = path_link.pop
|
65
|
+
path_link << :link
|
66
|
+
return { last_word => res.dig(*path), url: res.dig(*path_link) } if last_word.is_a?(Symbol) && res.dig(*path_link)
|
67
|
+
|
68
|
+
return res.dig(*path)
|
61
69
|
end
|
62
70
|
"No good search result found"
|
63
71
|
end
|
data/lib/boxcars/boxcar/sql.rb
CHANGED
@@ -41,6 +41,8 @@ module Boxcars
|
|
41
41
|
end
|
42
42
|
elsif rtables
|
43
43
|
raise ArgumentError, "tables needs to be an array of Strings"
|
44
|
+
else
|
45
|
+
@requested_tables = tables
|
44
46
|
end
|
45
47
|
@except_models = LOCKED_OUT_TABLES + exceptions.to_a
|
46
48
|
end
|
@@ -55,8 +57,8 @@ module Boxcars
|
|
55
57
|
");"].join("\n")
|
56
58
|
end
|
57
59
|
|
58
|
-
def schema
|
59
|
-
wanted_tables =
|
60
|
+
def schema
|
61
|
+
wanted_tables = @requested_tables - @except_models
|
60
62
|
wanted_tables.map(&method(:table_schema)).join("\n")
|
61
63
|
end
|
62
64
|
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
4
|
+
module Boxcars
|
5
|
+
# A Boxcar that interprets a prompt and executes API calls to get an answer.
|
6
|
+
class Swagger < EngineBoxcar
|
7
|
+
# the description of this engine boxcar
|
8
|
+
DESC = "useful for when you need to make Open API calls to get an answer."
|
9
|
+
|
10
|
+
attr_accessor :swagger_url, :context
|
11
|
+
|
12
|
+
# @param swagger_url [String] The URL of the Open API Swagger file to use.
|
13
|
+
# @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a train if nil.
|
14
|
+
# @param prompt [Boxcars::Prompt] The prompt to use for this boxcar. Defaults to built-in prompt.
|
15
|
+
# @param context [String] Additional context to use for the prompt.
|
16
|
+
# @param kwargs [Hash] Any other keyword arguments to pass to the parent class.
|
17
|
+
def initialize(swagger_url:, engine: nil, prompt: nil, context: "", **kwargs)
|
18
|
+
@swagger_url = swagger_url
|
19
|
+
@context = context
|
20
|
+
the_prompt = prompt || my_prompt
|
21
|
+
kwargs[:stop] ||= ["```output"]
|
22
|
+
kwargs[:name] ||= "Swagger API"
|
23
|
+
kwargs[:description] ||= DESC
|
24
|
+
super(engine: engine, prompt: the_prompt, **kwargs)
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return Hash The additional variables for this boxcar.
|
28
|
+
def prediction_additional
|
29
|
+
{ swagger_url: swagger_url, context: context }.merge super
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def get_embedded_ruby_answer(text)
|
35
|
+
code = text.split("```ruby\n").last.split("```").first.strip
|
36
|
+
ruby_executor = Boxcars::RubyREPL.new
|
37
|
+
ruby_executor.call(code: code)
|
38
|
+
end
|
39
|
+
|
40
|
+
def get_answer(text)
|
41
|
+
case text
|
42
|
+
when /^```ruby/
|
43
|
+
get_embedded_ruby_answer(text)
|
44
|
+
when /^Answer:/
|
45
|
+
Result.from_text(text)
|
46
|
+
else
|
47
|
+
Result.new(status: :error,
|
48
|
+
explanation: "Error: expecting your response to begin with '```ruby'. Try answering the question again.")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# our template
|
53
|
+
CTEMPLATE = [
|
54
|
+
syst("Study this Open API Swagger file %<swagger_url>s\n",
|
55
|
+
"and write a Ruby Program that prints the answer to the following questions using the appropriate API calls:\n",
|
56
|
+
"Additional context that you might need in the Ruby program: (%<context>s)\n",
|
57
|
+
"Use the following format:\n",
|
58
|
+
"${{Question needing API calls and code}}\n",
|
59
|
+
"reply only with the following format:\n",
|
60
|
+
"```ruby\n${{Ruby code with API calls and code that prints the answer}}\n```\n",
|
61
|
+
"```output\n${{Output of your code}}\n```\n\n",
|
62
|
+
"Otherwise, if you know the answer and do not need any API calls, you should use this simpler format:\n",
|
63
|
+
"${{Question not needing API calls}}\n",
|
64
|
+
"Answer: ${{Answer}}\n\n",
|
65
|
+
"Do not give an explanation of the answer and make sure your answer starts with either 'Answer:' or '```ruby'. ",
|
66
|
+
"Make use of the rest-client gem to make your requests to the API. Just print the answer."),
|
67
|
+
user("%<question>s")
|
68
|
+
].freeze
|
69
|
+
|
70
|
+
# The prompt to use for the engine.
|
71
|
+
def my_prompt
|
72
|
+
@conversation ||= Conversation.new(lines: CTEMPLATE)
|
73
|
+
@my_prompt ||= ConversationPrompt.new(
|
74
|
+
conversation: @conversation,
|
75
|
+
input_variables: [:question],
|
76
|
+
other_inputs: [:context, :swagger_url],
|
77
|
+
output_variables: [:answer])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "net/http"
|
4
|
+
module Boxcars
|
5
|
+
# A Boxcar that uses the Wikipedia search API to get answers to questions.
|
6
|
+
class WikipediaSearch < Boxcar
|
7
|
+
# the description of this boxcar
|
8
|
+
WDESC = "useful for when you need to answer questions about topics from Wikipedia." \
|
9
|
+
"You should ask targeted questions"
|
10
|
+
|
11
|
+
# implements a boxcar that uses the Wikipedia Search to get answers to questions.
|
12
|
+
# @param name [String] The name of the boxcar. Defaults to classname.
|
13
|
+
# @param description [String] A description of the boxcar. Defaults to SERPDESC.
|
14
|
+
# @param serpapi_api_key [String] The API key to use for the SerpAPI. Defaults to Boxcars.configuration.serpapi_api_key.
|
15
|
+
def initialize(name: "Wikipedia", description: WDESC)
|
16
|
+
super(name: name, description: description)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Get an answer from Google using the SerpAPI.
|
20
|
+
# @param question [String] The question to ask Google.
|
21
|
+
# @return [String] The answer to the question.
|
22
|
+
def run(question)
|
23
|
+
Boxcars.debug "Question: #{question}", :yellow
|
24
|
+
uri = URI("https://en.wikipedia.org/w/api.php")
|
25
|
+
params = { action: "query", list: "search", srsearch: question, format: "json" }
|
26
|
+
uri.query = URI.encode_www_form(params)
|
27
|
+
|
28
|
+
res = Net::HTTP.get_response(uri)
|
29
|
+
raise "Error getting response from Wikipedia: #{res.body}" unless res.is_a?(Net::HTTPSuccess)
|
30
|
+
|
31
|
+
response = JSON.parse res.body
|
32
|
+
answer = response.dig("query", "search", 0, "snippet").to_s.gsub(/<[^>]*>/, "")
|
33
|
+
pageid = response.dig("query", "search", 0, "pageid")
|
34
|
+
answer = "#{answer}\nurl: https://en.wikipedia.org/?curid=#{pageid}" if pageid
|
35
|
+
Boxcars.debug "Answer: #{answer}", :yellow, style: :bold
|
36
|
+
answer
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/boxcars/boxcar.rb
CHANGED
@@ -119,7 +119,10 @@ module Boxcars
|
|
119
119
|
|
120
120
|
return run_boxcar(inputs: args[0])[output_keys.first]
|
121
121
|
end
|
122
|
-
|
122
|
+
if args.empty?
|
123
|
+
ans = run_boxcar(inputs: kwargs)
|
124
|
+
return ans[output_keys.first]
|
125
|
+
end
|
123
126
|
|
124
127
|
raise Boxcars::ArgumentError, "run supported with either positional or keyword arguments but not both. Got args" \
|
125
128
|
": #{args} and kwargs: #{kwargs}."
|
@@ -149,5 +152,7 @@ require "boxcars/result"
|
|
149
152
|
require "boxcars/boxcar/engine_boxcar"
|
150
153
|
require "boxcars/boxcar/calculator"
|
151
154
|
require "boxcars/boxcar/google_search"
|
155
|
+
require "boxcars/boxcar/wikipedia_search"
|
152
156
|
require "boxcars/boxcar/sql"
|
157
|
+
require "boxcars/boxcar/swagger"
|
153
158
|
require "boxcars/boxcar/active_record"
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
4
|
+
module Boxcars
|
5
|
+
# For Boxcars that use an engine to do their work.
|
6
|
+
class Embedding < Boxcar
|
7
|
+
Error = Class.new(StandardError)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
require "boxcars/boxcar/embeddings"
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'gpt4all'
|
4
|
+
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
5
|
+
module Boxcars
|
6
|
+
# A engine that uses local GPT4All API.
|
7
|
+
class Gpt4allEng < Engine
|
8
|
+
attr_reader :prompts, :model_kwargs, :batch_size
|
9
|
+
|
10
|
+
# the default name of the engine
|
11
|
+
DEFAULT_NAME = "Gpt4all engine"
|
12
|
+
# the default description of the engine
|
13
|
+
DEFAULT_DESCRIPTION = "useful for when you need to use local AI to answer questions. " \
|
14
|
+
"You should ask targeted questions"
|
15
|
+
|
16
|
+
# A engine is a container for a single tool to run.
|
17
|
+
# @param name [String] The name of the engine. Defaults to "OpenAI engine".
|
18
|
+
# @param description [String] A description of the engine. Defaults to:
|
19
|
+
# useful for when you need to use AI to answer questions. You should ask targeted questions".
|
20
|
+
# @param prompts [Array<String>] The prompts to use when asking the engine. Defaults to [].
|
21
|
+
# @param batch_size [Integer] The number of prompts to send to the engine at once. Defaults to 2.
|
22
|
+
def initialize(name: DEFAULT_NAME, description: DEFAULT_DESCRIPTION, prompts: [], batch_size: 2, **_kwargs)
|
23
|
+
@prompts = prompts
|
24
|
+
@batch_size = batch_size
|
25
|
+
super(description: description, name: name)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get an answer from the engine.
|
29
|
+
# @param prompt [String] The prompt to use when asking the engine.
|
30
|
+
# @param openai_access_token [String] The access token to use when asking the engine.
|
31
|
+
# Defaults to Boxcars.configuration.openai_access_token.
|
32
|
+
# @param kwargs [Hash] Additional parameters to pass to the engine if wanted.
|
33
|
+
def client(prompt:, inputs: {}, **_kwargs)
|
34
|
+
gpt4all = Gpt4all::ConversationalAI.new
|
35
|
+
gpt4all.prepare_resources(force_download: false)
|
36
|
+
gpt4all.start_bot
|
37
|
+
input_text = prompt.as_prompt(inputs)[:prompt]
|
38
|
+
Boxcars.debug("Prompt after formatting:\n#{input_text}", :cyan) if Boxcars.configuration.log_prompts
|
39
|
+
gpt4all.prompt(input_text)
|
40
|
+
rescue StandardError => e
|
41
|
+
Boxcars.error(["Error from gpt4all engine: #{e}", e.backtrace[-5..-1]].flatten.join("\n "))
|
42
|
+
ensure
|
43
|
+
gpt4all.stop_bot
|
44
|
+
end
|
45
|
+
|
46
|
+
# get an answer from the engine for a question.
|
47
|
+
# @param question [String] The question to ask the engine.
|
48
|
+
# @param kwargs [Hash] Additional parameters to pass to the engine if wanted.
|
49
|
+
def run(question, **kwargs)
|
50
|
+
prompt = Prompt.new(template: question)
|
51
|
+
answer = client(prompt: prompt, **kwargs)
|
52
|
+
Boxcars.debug("Answer: #{answer}", :cyan)
|
53
|
+
answer
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/boxcars/engine.rb
CHANGED
data/lib/boxcars/ruby_repl.rb
CHANGED
@@ -20,6 +20,7 @@ module Boxcars
|
|
20
20
|
Boxcars.debug output, :red
|
21
21
|
Result.from_error(output, code: code)
|
22
22
|
else
|
23
|
+
output = ::Regexp.last_match(1) if output =~ /^\s*Answer:\s*(.*)$/m
|
23
24
|
Boxcars.debug "Answer: #{output}", :yellow, style: :bold
|
24
25
|
Result.from_text(output, code: code)
|
25
26
|
end
|
data/lib/boxcars/version.rb
CHANGED
data/lib/boxcars.rb
CHANGED