RubyGems - boxcars - Versions diffs - 0.1.0 - Mend

boxcars 0.1.0

Files changed (31) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +154 -0
data/CHANGELOG.md +5 -0
data/CODE_OF_CONDUCT.md +84 -0
data/Gemfile +24 -0
data/Gemfile.lock +119 -0
data/LICENSE.txt +21 -0
data/README.md +43 -0
data/Rakefile +12 -0
data/bin/console +15 -0
data/bin/setup +8 -0
data/lib/boxcars/boxcar/calculator.rb +102 -0
data/lib/boxcars/boxcar/engine_boxcar.rb +90 -0
data/lib/boxcars/boxcar/serp.rb +64 -0
data/lib/boxcars/boxcar/sql.rb +126 -0
data/lib/boxcars/boxcar.rb +108 -0
data/lib/boxcars/conductor/conductor_action.rb +14 -0
data/lib/boxcars/conductor/conductor_executer.rb +95 -0
data/lib/boxcars/conductor/conductor_finish.rb +13 -0
data/lib/boxcars/conductor/zero_shot.rb +81 -0
data/lib/boxcars/conductor.rb +147 -0
data/lib/boxcars/engine/engine_result.rb +13 -0
data/lib/boxcars/engine/openai.rb +156 -0
data/lib/boxcars/engine.rb +23 -0
data/lib/boxcars/generation.rb +13 -0
data/lib/boxcars/prompt.rb +45 -0
data/lib/boxcars/ruby_repl.rb +22 -0
data/lib/boxcars/version.rb +5 -0
data/lib/boxcars.rb +93 -0
metadata +148 -0

data/lib/boxcars/boxcar/engine_boxcar.rb ADDED Viewed

@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+# Boxcars is a framework for running a series of tools to get an answer to a question.
+module Boxcars
+  # For Boxcars that use an engine to do their work.
+  # @abstract
+  class EngineBoxcar < Boxcars::Boxcar
+    attr_accessor :prompt, :engine, :output_key
+    # A Boxcar is a container for a single tool to run.
+    # @param prompt [Boxcars::Prompt] The prompt to use for this boxcar with sane defaults.
+    # @param name [String] The name of the boxcar. Defaults to classname.
+    # @param description [String] A description of the boxcar.
+    # @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a conductor if nil.
+    def initialize(prompt:, engine:, output_key: "text", name: nil, description: nil)
+      @prompt = prompt
+      @engine = engine
+      @output_key = output_key
+      super(name: name, description: description)
+    end
+    def input_keys
+      prompt.input_variables
+    end
+    def output_keys
+      [output_key]
+    end
+    # # Check that all inputs are present.
+    # def validate_inputs(inputs:)
+    #   missing_keys = input_keys - inputs.keys
+    #   raise Boxcars::ArgumentError, "Missing some input keys: #{missing_keys}" if missing_keys.any?
+    #   inputs
+    # end
+    # def validate_outputs(outputs:)
+    #   return if outputs.sort == output_keys.sort
+    #   raise Boxcars::ArgumentError, "Did not get out keys that were expected, got: #{outputs}. Expected: #{output_keys}"
+    # end
+    def generate(input_list:)
+      stop = input_list[0][:stop]
+      prompts = []
+      input_list.each do |inputs|
+        new_prompt = prompt.format(**inputs)
+        # puts "Prompt after formatting:\n#{new_prompt.colorize(:cyan)}"
+        prompts.push(new_prompt)
+      end
+      engine.generate(prompts: prompts, stop: stop)
+    end
+    def apply(input_list:)
+      response = generate(input_list: input_list)
+      response.generations.to_h do |generation|
+        [output_key, generation[0].text]
+      end
+    end
+    def predict(**kwargs)
+      apply(input_list: [kwargs])[output_key]
+    end
+    def predict_and_parse(**kwargs)
+      result = predict(**kwargs)
+      if prompt.output_parser
+        prompt.output_parser.parse(result)
+      else
+        result
+      end
+    end
+    def apply_and_parse(input_list:)
+      result = apply(input_list: input_list)
+      if prompt.output_parser
+        result.map { |r| prompt.output_parser.parse(r[output_key]) }
+      else
+        result
+      end
+    end
+    def check_output_keys
+      return unless output_keys.length != 1
+      raise Boxcars::ArgumentError, "run not supported when there is not exactly one output key. Got #{output_keys}."
+    end
+  end
+end

data/lib/boxcars/boxcar/serp.rb ADDED Viewed

@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+require 'google_search_results'
+module Boxcars
+  # A Boxcar that uses the Google SerpAPI to get answers to questions.
+  class Serp < Boxcar
+    SERPDESC = "useful for when you need to answer questions about current events." \
+               "You should ask targeted questions"
+    # implements a boxcar that uses the Google SerpAPI to get answers to questions.
+    # @param name [String] The name of the boxcar. Defaults to classname.
+    # @param description [String] A description of the boxcar. Defaults to SERPDESC.
+    # @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a Conductor if nil.
+    #
+    def initialize(name: "Search", description: SERPDESC, serpapi_api_key: "not set")
+      super(name: name, description: description)
+      api_key = Boxcars.configuration.serpapi_api_key(serpapi_api_key: serpapi_api_key)
+      GoogleSearch.api_key = api_key
+    end
+    # Get an answer from Google using the SerpAPI.
+    # @param question [String] The question to ask Google.
+    # @return [String] The answer to the question.
+    def run(question)
+      search = GoogleSearch.new(q: question)
+      rv = find_answer(search.get_hash)
+      puts "Question: #{question}"
+      puts "Answer: #{rv}"
+      rv
+    end
+    # Get the location of an answer from Google using the SerpAPI.
+    # @param question [String] The question to ask Google.
+    # @return [String] The location found.
+    def get_location(question)
+      search = GoogleSearch.new(q: question, limit: 3)
+      rv = search.get_location
+      puts "Question: #{question}"
+      puts "Answer: #{rv}"
+      rv
+    end
+    private
+    ANSWER_LOCATIONS = [
+      %i[answer_box answer],
+      %i[answer_box snippet],
+      [:answer_box, :snippet_highlighted_words, 0],
+      %i[sports_results game_spotlight],
+      %i[knowledge_graph description],
+      [:organic_results, 0, :snippet_highlighted_words, 0],
+      [:organic_results, 0, :snippet]
+    ].freeze
+    def find_answer(res)
+      raise Error, "Got error from SerpAPI: {res[:error]}" if res[:error]
+      ANSWER_LOCATIONS.each do |path|
+        return res.dig(*path) if res.dig(*path)
+      end
+      "No good search result found"
+    end
+  end
+end

data/lib/boxcars/boxcar/sql.rb ADDED Viewed

@@ -0,0 +1,126 @@
+# frozen_string_literal: true
+# Boxcars is a framework for running a series of tools to get an answer to a question.
+module Boxcars
+  # A Boxcar that interprets a prompt and executes SQL code to get answers
+  class SQL < EngineBoxcar
+    SQLDESC = "useful for when you need to query a SQL database"
+    attr_accessor :connection, :input_key
+    # @param connection [ActiveRecord::Connection] The SQL connection to use for this boxcar.
+    # @param prompt [Boxcars::Prompt] The prompt to use for this boxcar.
+    # @param name [String] The name of the boxcar. Defaults to classname.
+    # @param description [String] A description of the boxcar.
+    # @param engine [Boxcars::Engine] The engine to user for this boxcar. Can be inherited from a conductor if nil.
+    # @param input_key [Symbol] The key to use for the input. Defaults to :question.
+    # @param output_key [Symbol] The key to use for the output. Defaults to :answer.
+    def initialize(connection:, engine: nil, input_key: :question, output_key: :answer, **kwargs)
+      @connection = connection
+      @input_key = input_key
+      the_prompt = kwargs[prompt] || my_prompt
+      super(name: kwargs[:name] || "SQLdatabase",
+            description: kwargs[:description] || SQLDESC,
+            engine: engine,
+            prompt: the_prompt,
+            output_key: output_key)
+    end
+    def input_keys
+      [input_key]
+    end
+    def output_keys
+      [output_key]
+    end
+    def call(inputs:)
+      t = predict(question: inputs[input_key], dialect: dialect, top_k: 5, table_info: schema, stop: ["SQLQuery:"]).strip
+      answer = get_answer(t)
+      puts answer.colorize(:magenta)
+      { output_key => answer }
+    end
+    private
+    def tables
+      connection&.tables
+    end
+    def table_schema(table)
+      ["CREATE TABLE #{table} (",
+       connection&.columns(table)&.map { |c| " #{c.name} #{c.sql_type} #{c.null ? "NULL" : "NOT NULL"}" }&.join(",\n"),
+       ");"].join("\n")
+    end
+    def schema(except_tables: ['ar_internal_metadata'])
+      wanted_tables = tables.to_a - except_tables
+      wanted_tables.map(&method(:table_schema)).join("\n")
+    end
+    def dialect
+      # connection.instance_variable_get "@config"[:adapter]
+      connection.class.name.split("::").last.sub("Adapter", "")
+    end
+    def get_embedded_sql_answer(text)
+      code = text[/^SQLQuery: (.*)/, 1]
+      puts code.colorize(:yellow)
+      output = connection.exec_query(code).to_a
+      puts "Answer: #{output}"
+      "Answer: #{output}"
+    end
+    def get_answer(text)
+      case text
+      when /^SQLQuery:/
+        get_embedded_sql_answer(text)
+      when /^Answer:/
+        text
+      else
+        raise Boxcars::Error "Unknown format from engine: #{text}"
+      end
+    end
+    TEMPLATE = <<~IPT
+      Given an input question, first create a syntactically correct %<dialect>s query to run,
+      then look at the results of the query and return the answer. Unless the user specifies
+      in his question a specific number of examples he wishes to obtain, always limit your query
+      to at most %<top_k>s results using a LIMIT clause. You can order the results by a relevant column
+      to return the most interesting examples in the database.
+      Never query for all the columns from a specific table, only ask for a the few relevant columns given the question.
+      Pay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist.
+      Also, pay attention to which column is in which table.
+      Use the following format:
+      Question: "Question here"
+      SQLQuery: "SQL Query to run"
+      SQLResult: "Result of the SQLQuery"
+      Answer: "Final answer here"
+      Only use the following tables:
+      %<table_info>s
+      Question: %<question>s
+    IPT
+    # The prompt to use for the engine.
+    def my_prompt
+      @my_prompt ||= Prompt.new(input_variables: [:question, :dialect, :top_k], template: TEMPLATE)
+    end
+    # DECIDER_TEMPLATE = <<~DPT
+    #   Given the below input question and list of potential tables, output a comma separated list of the table names that may
+    #   be necessary to answer this question.
+    #   Question: %<query>s
+    #   Table Names: %<table_names>s
+    #   Relevant Table Names:
+    # DPT
+    # DECIDER_PROMPT = Prompt.new(
+    #   input_variables: %i[query table_names],
+    #   template: DECIDER_TEMPLATE,
+    #   output_parser: CommaSeparatedListOutputParser
+    # )
+  end
+end

data/lib/boxcars/boxcar.rb ADDED Viewed

@@ -0,0 +1,108 @@
+# frozen_string_literal: true
+module Boxcars
+  # @abstract
+  class Boxcar
+    attr_reader :name, :description, :return_direct
+    # A Boxcar is a container for a single tool to run.
+    # @param name [String] The name of the boxcar. Defaults to classname.
+    # @param description [String] A description of the boxcar.
+    # @param return_direct [Boolean] If true, return the output of this boxcar directly, without merging it with the inputs.
+    def initialize(description:, name: nil, return_direct: false)
+      @name = name || self.class.name
+      @description = description
+      @return_direct = return_direct
+    end
+    # Input keys this chain expects.
+    def input_keys
+      raise NotImplementedError
+    end
+    # Output keys this chain expects.
+    def output_keys
+      raise NotImplementedError
+    end
+    # Check that all inputs are present.
+    def validate_inputs(inputs:)
+      missing_keys = input_keys - inputs.keys
+      raise "Missing some input keys: #{missing_keys}" if missing_keys.any?
+      inputs
+    end
+    def validate_outputs(outputs:)
+      return if outputs.sort == output_keys.sort
+      raise "Did not get output keys that were expected, got: #{outputs}. Expected: #{output_keys}"
+    end
+    # Run the logic of this chain and return the output.
+    def call(inputs:)
+      raise NotImplementedError
+    end
+    def do_call(inputs:, return_only_outputs: false)
+      inputs = our_inputs(inputs)
+      output = nil
+      begin
+        output = call(inputs: inputs)
+      rescue StandardError => e
+        raise e
+      end
+      validate_outputs(outputs: output.keys)
+      # memory&.save_convext(inputs: inputs, outputs: outputs)
+      return output if return_only_outputs
+      inputs.merge(output)
+    end
+    def apply(input_list:)
+      input_list.map { |inputs| new(**inputs) }
+    end
+    # Get an answer from the boxcar.
+    # @param question [String] The question to ask the boxcar.
+    # @return [String] The answer to the question.
+    def run(*args, **kwargs)
+      puts "> Enterning #{name} boxcar#run".colorize(:gray, style: :bold)
+      rv = do_run(*args, **kwargs)
+      puts "< Exiting #{name} boxcar#run".colorize(:gray, style: :bold)
+      rv
+    end
+    private
+    def do_run(*args, **kwargs)
+      if kwargs.empty?
+        raise Boxcars::ArgumentError, "run supports only one positional argument." if args.length != 1
+        return do_call(inputs: args[0])[output_keys.first]
+      end
+      return do_call(**kwargs)[output_keys].first if args.empty?
+      raise Boxcars::ArgumentError, "run supported with either positional or keyword arguments but not both. Got args" \
+                                    ": #{args} and kwargs: #{kwargs}."
+    end
+    def our_inputs(inputs)
+      if inputs.is_a?(String)
+        puts inputs.colorize(:blue) # the question
+        if input_keys.length != 1
+          raise Boxcars::ArgumentError, "A single string input was passed in, but this boxcar expects " \
+                                        "multiple inputs (#{input_keys}). When a boxcar expects " \
+                                        "multiple inputs, please call it by passing in a hash, eg: `boxcar({'foo': 1, 'bar': 2})`"
+        end
+        inputs = { input_keys.first => inputs }
+      end
+      validate_inputs(inputs: inputs)
+    end
+  end
+end
+require "boxcars/boxcar/engine_boxcar"
+require "boxcars/boxcar/calculator"
+require "boxcars/boxcar/serp"
+require "boxcars/boxcar/sql"

data/lib/boxcars/conductor/conductor_action.rb ADDED Viewed

@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+module Boxcars
+  # Conductor's action to take.
+  class ConductorAction
+    attr_accessor :boxcar, :boxcar_input, :log
+    def initialize(boxcar: nil, boxcar_input: nil, log: nil)
+      @boxcar = boxcar
+      @boxcar_input = boxcar_input
+      @log = log
+    end
+  end
+end

data/lib/boxcars/conductor/conductor_executer.rb ADDED Viewed

@@ -0,0 +1,95 @@
+# frozen_string_literal: true
+module Boxcars
+  # Consists of an conductor using boxcars.
+  class ConductorExecuter < EngineBoxcar
+    attr_accessor :conductor, :boxcars, :return_intermediate_steps, :max_iterations, :early_stopping_method
+    # @param conductor [Boxcars::Conductor] The conductor to use.
+    # @param boxcars [Array<Boxcars::Boxcar>] The boxcars to use.
+    # @param return_intermediate_steps [Boolean] Whether to return the intermediate steps. Defaults to false.
+    # @param max_iterations [Integer] The maximum number of iterations to run. Defaults to nil.
+    # @param early_stopping_method [String] The early stopping method to use. Defaults to "force".
+    def initialize(conductor:, boxcars:, return_intermediate_steps: false, max_iterations: nil,
+                   early_stopping_method: "force")
+      @conductor = conductor
+      @boxcars = boxcars
+      @return_intermediate_steps = return_intermediate_steps
+      @max_iterations = max_iterations
+      @early_stopping_method = early_stopping_method
+      # def initialize(prompt:, engine:, output_key: "text", name: nil, description: nil)
+      super(prompt: conductor.prompt, engine: conductor.engine, name: conductor.name, description: conductor.description)
+    end
+    def same_boxcars?(boxcar_names)
+      conductor.allowed_boxcars.sort == boxcar_names
+    end
+    def validate_boxcars
+      boxcar_names = boxcars.map(&:name).sort
+      return if same_boxcars?(boxcar_names)
+      raise "Allowed boxcars (#{conductor.allowed_boxcars}) different than provided boxcars (#{boxcar_names})"
+    end
+    def input_keys
+      conductor.input_keys
+    end
+    def output_keys
+      return conductor.return_values + ["intermediate_steps"] if return_intermediate_steps
+      conductor.return_values
+    end
+    def should_continue?(iterations)
+      return true if max_iterations.nil?
+      iterations < max_iterations
+    end
+    # handler before returning
+    def pre_return(output, intermediate_steps)
+      puts output.log.colorize(:yellow)
+      final_output = output.return_values
+      final_output["intermediate_steps"] = intermediate_steps if return_intermediate_steps
+      final_output
+    end
+    def engine_prefix(return_direct)
+      return_direct ? "" : conductor.engine_prefix
+    end
+    def call(inputs:)
+      conductor.prepare_for_new_call
+      name_to_boxcar_map = boxcars.to_h { |boxcar| [boxcar.name, boxcar] }
+      intermediate_steps = []
+      iterations = 0
+      while should_continue?(iterations)
+        output = conductor.plan(intermediate_steps, **inputs)
+        return pre_return(output, intermediate_steps) if output.is_a?(ConductorFinish)
+        if (boxcar = name_to_boxcar_map[output.boxcar])
+          begin
+            observation = boxcar.run(output.boxcar_input)
+            return_direct = boxcar.return_direct
+          rescue StandardError => e
+            raise e
+          end
+        else
+          observation = "#{output.boxcar} is not a valid boxcar, try another one."
+          return_direct = false
+        end
+        puts "#Observation: #{observation}".colorize(:green)
+        intermediate_steps.append([output, observation])
+        if return_direct
+          output = ConductorFinish.new({ conductor.return_values[0] => observation }, "")
+          return pre_return(output, intermediate_steps)
+        end
+        iterations += 1
+      end
+      output = conductor.return_stopped_response(early_stopping_method, intermediate_steps, **inputs)
+      pre_return(output, intermediate_steps)
+    end
+  end
+end

data/lib/boxcars/conductor/conductor_finish.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+module Boxcars
+  # Conductor's return value
+  class ConductorFinish
+    attr_accessor :return_values, :log
+    def initialize(return_values, log:)
+      @return_values = return_values
+      @log = log
+    end
+  end
+end

data/lib/boxcars/conductor/zero_shot.rb ADDED Viewed

@@ -0,0 +1,81 @@
+# Agent for the MRKL chain
+module Boxcars
+  # A Conductor using the zero-shot react method.
+  class ZeroShot < Conductor
+    attr_reader :boxcars, :observation_prefix, :engine_prefix
+    PREFIX = "Answer the following questions as best you can. You have access to the following actions:".freeze
+    FORMAT_INSTRUCTIONS = <<~FINPUT.freeze
+      Use the following format:
+      Question: the input question you must answer
+      Thought: you should always think about what to do
+      Action: the action to take, should be one of [%<boxcar_names>s]
+      Action Input: the input to the action
+      Observation: the result of the action
+      ... (this Thought/Action/Action Input/Observation sequence can repeat N times)
+      Thought: I now know the final answer
+      Final Answer: the final answer to the original input question
+    FINPUT
+    SUFFIX = <<~SINPUT.freeze
+      Begin!
+      Question: %<input>s
+      Thought:%<agent_scratchpad>s
+    SINPUT
+    def initialize(boxcars:, engine:, name: 'Zero Shot', description: 'Zero Shot Conductor')
+      @observation_prefix = 'Observation: '
+      @engine_prefix = 'Thought:'
+      prompt = self.class.create_prompt(boxcars: boxcars)
+      super(engine: engine, boxcars: boxcars, prompt: prompt, name: name, description: description)
+    end
+    # Create prompt in the style of the zero shot agent.
+    #   Args:
+    #     boxcars: List of boxcars the agent will have access to, used to format the prompt.
+    #     prefix: String to put before the list of boxcars.
+    #     suffix: String to put after the list of boxcars.
+    #     input_variables: List of input variables the final prompt will expect.
+    #   Returns:
+    #     A Prompt with the template assembled from the pieces here.
+    def self.create_prompt(boxcars:, prefix: PREFIX, suffix: SUFFIX, input_variables: [:input, :agent_scratchpad])
+      boxcar_strings = boxcars.map { |boxcar| "#{boxcar.name}: #{boxcar.description}" }.join("\n")
+      boxcar_names = boxcars.map(&:name)
+      format_instructions = format(FORMAT_INSTRUCTIONS, boxcar_names: boxcar_names.join(", "))
+      template = [prefix, boxcar_strings, format_instructions, suffix].join("\n\n")
+      Prompt.new(template: template, input_variables: input_variables)
+    end
+    FINAL_ANSWER_ACTION = "Final Answer:".freeze
+    # Parse out the action and input from the engine output.
+    def get_action_and_input(engine_output:)
+      # NOTE: if you're specifying a custom prompt for the ZeroShotAgent,
+      #   you will need to ensure that it meets the following Regex requirements.
+      #   The string starting with "Action:" and the following string starting
+      #   with "Action Input:" should be separated by a newline.
+      if engine_output.include?(FINAL_ANSWER_ACTION)
+        answer = engine_output.split(FINAL_ANSWER_ACTION).last.strip
+        ['Final Answer', answer]
+      else
+        regex = /Action: (?<action>.*)\nAction Input: (?<action_input>.*)/
+        match = regex.match(engine_output)
+        raise ValueError, "Could not parse engine output: #{engine_output}" unless match
+        action = match[:action].strip
+        action_input = match[:action_input].strip
+        # [action, action_input.strip(" ").strip('"')]
+        [action, action_input]
+      end
+    end
+    def extract_boxcar_and_input(text)
+      get_action_and_input(engine_output: text)
+    end
+  end
+end