RubyGems - boxcars - Versions diffs - 0.3.1 → 0.3.2 - Mend

boxcars 0.3.1 → 0.3.2

Files changed (16) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -2
data/Gemfile.lock +12 -1
data/boxcars.gemspec +1 -0
data/lib/boxcars/boxcar/active_record.rb +1 -1
data/lib/boxcars/boxcar/url_text.rb +58 -0
data/lib/boxcars/boxcar.rb +23 -5
data/lib/boxcars/conversation.rb +5 -2
data/lib/boxcars/train/xml_train.rb +107 -0
data/lib/boxcars/train/xml_zero_shot.rb +60 -0
data/lib/boxcars/train/zero_shot.rb +3 -19
data/lib/boxcars/train.rb +54 -17
data/lib/boxcars/version.rb +1 -1
data/lib/boxcars/x_node.rb +75 -0
data/lib/boxcars.rb +4 -0
metadata +20 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a2448419a2e348f8111fa19bd7b9bb1a05ab19b30d3dc9c0255ae0bd8673c156
-  data.tar.gz: fd6577be64b72941a87cbdb8c1c1ca31a7c88383869fa67f86d21cf721749168
+  metadata.gz: 463f57f1436cfea29e0a60bbfce071afd355a5d0cadbe1d69f96adc7393eacd6
+  data.tar.gz: 282e528fb8cb8b532b621db5c2e7ce2b82c4607057c01f0feb1eaacfe98ba09b
 SHA512:
-  metadata.gz: fd21eb8cd3ea3cc2fdf29140a3dce7dec26a7e4ffbd791d7aa7dd63aaa267c34d4ead764f85d3f7dbb9d1038330e3614bbfaf7f8c34818544b0d282568ad66f5
-  data.tar.gz: e12626132060202679533c3a101f4ec4fb1c9846f7d54beb9bc84ce7d71cb4f56be10f16a2c229480cc6c4378a074566f26a7ccbeeadc0c7ffc27ab81dfc6ee4
+  metadata.gz: c76d2772db0925f71779c0bac1a2e92af20bcf398cc6e7cf943edeadb2014426554fba607b034fd68ac88bd26f2d42eef114de57332e83a4fe76558a9b2f0cea
+  data.tar.gz: '0489fffef87c32fbf6a7a9af4fbca77ef991437353211174617b6ea8e229e49accc85c39af93414ce1628a4472cc208d8075570abba5e8269b30cfb66000a40e'

data/CHANGELOG.md CHANGED Viewed

@@ -1,12 +1,17 @@
 # Changelog
-## [Unreleased](https://github.com/BoxcarsAI/boxcars/tree/HEAD)
+## [v0.3.1](https://github.com/BoxcarsAI/boxcars/tree/v0.3.1) (2023-07-01)
-[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.16...HEAD)
+[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.16...v0.3.1)
 **Closed issues:**
 - Add a running log of prompts for debugging [\#99](https://github.com/BoxcarsAI/boxcars/issues/99)
+- Anyway to create conversation? [\#73](https://github.com/BoxcarsAI/boxcars/issues/73)
+**Merged pull requests:**
+- now, when you call run on a train multiple times, it remembers the ru… [\#101](https://github.com/BoxcarsAI/boxcars/pull/101) ([francis](https://github.com/francis))
 ## [v0.2.16](https://github.com/BoxcarsAI/boxcars/tree/v0.2.16) (2023-06-26)

data/Gemfile.lock CHANGED Viewed

@@ -1,10 +1,11 @@
 PATH
   remote: .
   specs:
-    boxcars (0.3.1)
+    boxcars (0.3.2)
       google_search_results (~> 2.2)
       gpt4all (~> 0.0.4)
       hnswlib (~> 0.8)
+      nokogiri (~> 1.15)
       pgvector (~> 0.2)
       ruby-openai (~> 4.1)
@@ -103,6 +104,14 @@ GEM
     netrc (0.11.0)
     nio4r (2.5.9)
     nio4r (2.5.9-java)
+    nokogiri (1.15.2-arm64-darwin)
+      racc (~> 1.4)
+    nokogiri (1.15.2-java)
+      racc (~> 1.4)
+    nokogiri (1.15.2-x86_64-darwin)
+      racc (~> 1.4)
+    nokogiri (1.15.2-x86_64-linux)
+      racc (~> 1.4)
     octokit (4.25.1)
       faraday (>= 1, < 3)
       sawyer (~> 0.9)
@@ -120,6 +129,8 @@ GEM
       protocol-hpack (~> 1.4)
       protocol-http (~> 0.18)
     public_suffix (5.0.1)
+    racc (1.7.1)
+    racc (1.7.1-java)
     rainbow (3.1.1)
     rake (13.0.6)
     regexp_parser (2.8.0)

data/boxcars.gemspec CHANGED Viewed

@@ -34,6 +34,7 @@ Gem::Specification.new do |spec|
   spec.add_dependency "google_search_results", "~> 2.2"
   spec.add_dependency "gpt4all", "~> 0.0.4"
   spec.add_dependency "hnswlib", "~> 0.8"
+  spec.add_dependency "nokogiri", "~> 1.15"
   spec.add_dependency "pgvector", "~> 0.2"
   spec.add_dependency "ruby-openai", "~> 4.1"

data/lib/boxcars/boxcar/active_record.rb CHANGED Viewed

@@ -147,7 +147,7 @@ module Boxcars
     end
     def change_count(changes_code)
-      return 0 unless changes_code && changes_code != "None"
+      return 0 if changes_code.nil? || changes_code.empty? || changes_code =~ %r{^(None|N/A)$}i
       rollback_after_running do
         Boxcars.debug "computing change count with: #{changes_code}", :yellow

data/lib/boxcars/boxcar/url_text.rb ADDED Viewed

@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+module Boxcars
+  # A Boxcar that reads text from a URL.
+  class URLText < Boxcar
+    # the description of this boxcar
+    DESC = "useful when you want to get text from a URL."
+    # implements a boxcar that uses the Google SerpAPI to get answers to questions.
+    # @param name [String] The name of the boxcar. Defaults to classname.
+    # @param description [String] A description of the boxcar. Defaults to SERPDESC.
+    def initialize(name: "FetchURL", description: DESC)
+      super(name: name, description: description)
+    end
+    # Get text from a url.
+    # @param url [String] The url
+    # @return [String] The text for the url.
+    def run(url)
+      url = URI.parse(url)
+      get_answer(url)
+    end
+    private
+    def html_to_text(url, response)
+      Nokogiri::HTML(response.body).css(%w[h1 h2 h3 h4 h5 h6 p a].join(",")).map do |e|
+        itxt = e.inner_text.strip
+        itxt = itxt.gsub(/[[:space:]]+/, " ") # remove extra spaces
+        # next if itxt.nil? || itxt.empty?
+        if e.name == "a"
+          href = e.attributes["href"]&.value
+          href = URI.join(url, href).to_s if href =~ %r{^/}
+          "[#{itxt}](#{href})" # if e.attributes["href"]&.value =~ /^http/
+        else
+          itxt
+        end
+      end.compact.join("\n\n")
+    end
+    def get_answer(url)
+      response = Net::HTTP.get_response(url)
+      if response.is_a?(Net::HTTPSuccess)
+        return Result.from_text(response.body) if response.content_type == "text/plain"
+        if response.content_type == "text/html"
+          # return only the top level text
+          txt = html_to_text(url, response)
+          Result.from_text(txt)
+        else
+          Result.from_text(response.body)
+        end
+      else
+        Result.new(status: :error, explanation: "Error with url: #{response.code} #{response.message}")
+      end
+    end
+  end
+end

data/lib/boxcars/boxcar.rb CHANGED Viewed

@@ -17,12 +17,12 @@ module Boxcars
     # Input keys this chain expects.
     def input_keys
-      raise NotImplementedError
+      [:question]
     end
     # Output keys this chain expects.
     def output_keys
-      raise NotImplementedError
+      [:answer]
     end
     # Check that all inputs are present.
@@ -116,6 +116,20 @@ module Boxcars
     end
     # rubocop:enable Security/YAMLLoad
+    def schema
+      params = input_keys.map do |key|
+        "<param name=\"#{key}\" data-type=\"String\" required=\"true\" description=\"#{key}\" />"
+      end.join("\n")
+      <<~SCHEMA.freeze
+        <tool>
+          <tool name="#{name}" version="0.1" description="#{description}">
+          <params>
+            #{params}
+          </params>
+        </tool>
+      SCHEMA
+    end
     private
     # remember the history of this boxcar. Take the current intermediate steps and
@@ -126,18 +140,21 @@ module Boxcars
       # insert conversation history into the prompt
       history = []
-      history << Boxcar.user("Question: #{current_results[:input]}")
+      history << Boxcar.user(key_and_value_text(question_prefix, current_results[:input]))
       current_results[:intermediate_steps].each do |action, obs|
         if action.is_a?(TrainAction)
           obs = Observation.new(status: :ok, note: obs) if obs.is_a?(String)
           next if obs.status != :ok
-          history << Boxcar.assi("Thought: #{action.log}\n", "Observation: #{obs.note}")
+          history << Boxcar.assi("#{thought_prefix}#{action.log}", "\n",
+                                 key_and_value_text(observation_prefix, obs.note))
         else
           Boxcars.error "Unknown action: #{action}", :red
         end
       end
-      history << Boxcar.assi("Thought: I know the final answer\nFinal Answer: #{current_results[:output]}")
+      final_answer = key_and_value_text(final_answer_prefix, current_results[:output])
+      history << Boxcar.assi(
+        key_and_value_text(thought_prefix, "I know the final answer\n#{final_answer}\n"))
       prompt.add_history(history)
     end
@@ -196,6 +213,7 @@ require "boxcars/result"
 require "boxcars/boxcar/engine_boxcar"
 require "boxcars/boxcar/calculator"
 require "boxcars/boxcar/google_search"
+require "boxcars/boxcar/url_text"
 require "boxcars/boxcar/wikipedia_search"
 require "boxcars/boxcar/sql_base"
 require "boxcars/boxcar/sql_active_record"

data/lib/boxcars/conversation.rb CHANGED Viewed

@@ -64,12 +64,15 @@ module Boxcars
       @lines += conversation.lines
     end
-    # insert converation above history line
+    # insert converation above history line if it is present
     # @param conversation [Conversation] The conversation to add
     def add_history(conversation)
-      @lines = @lines.dup
       # find the history line
       hi = lines.rindex { |ln| ln[0] == :history }
+      return unless hi
+      @lines = @lines.dup
       # insert the conversation above the history line
       @lines.insert(hi, *conversation.lines)
     end

data/lib/boxcars/train/xml_train.rb ADDED Viewed

@@ -0,0 +1,107 @@
+# frozen_string_literal: true
+require "nokogiri"
+# base class for all XML trains
+module Boxcars
+  # A Train using XML for prompting and execution.
+  class XMLTrain < Train
+    # A Train will use a engine to run a series of boxcars.
+    # @param boxcars [Array<Boxcars::Boxcar>] The boxcars to run.
+    # @param prompt [Boxcars::Prompt] The prompt to use.
+    # @param engine [Boxcars::Engine] The engine to use for this train.
+    # @param kwargs [Hash] Additional arguments including: name, description, top_k, return_direct, and stop
+    # @abstract
+    def initialize(boxcars:, prompt:, engine: nil, **kwargs)
+      super
+    end
+    def init_prefixes
+      @thought_prefix ||= "<thought>"
+      @observation_prefix ||= "<observation>"
+      @final_answer_prefix ||= "<final_answer>"
+      @answer_prefix ||= "<answer>"
+      @question_prefix ||= "<question>"
+      @output_prefix ||= "<output>"
+    end
+    def close_tag(tag)
+      tag.to_s.sub("<", "</") if tag.to_s[0] == "<"
+    end
+    # the xml to describe the boxcars
+    def boxcars_xml
+      schema = boxcars.map(&:schema).join("\n")
+      "<boxcars>\n#{schema}</boxcars>"
+    end
+    # @return Hash The additional variables for this boxcar.
+    def prediction_additional(_inputs)
+      { boxcars_xml: boxcars_xml, next_actions: next_actions }.merge super
+    end
+    def build_output(text)
+      if text =~ /#{close_tag(thought_prefix)}/
+        "<data>#{engine_prefix}#{text}</data>"
+      else
+        "<data>#{text}</data>"
+      end
+    end
+    # Extract the boxcar and input from the engine output.
+    # @param text [String] The output from the engine.
+    # @return [Array<Boxcars::Boxcar, String>] The boxcar and input.
+    def extract_boxcar_and_input(text)
+      get_action_and_input(engine_output: build_output(text))
+    rescue StandardError => e
+      Boxcars.debug("Error: #{e.message}", :red)
+      [:error, e.message]
+    end
+    private
+    def parse_output(engine_output)
+      doc = Nokogiri::XML("<data>#{engine_prefix}#{engine_output}\n</data>")
+      keys = doc.element_children.first.element_children.map(&:name).map(&:to_sym)
+      keys.to_h do |key|
+        [key, doc.at_xpath("//#{key}")&.text]
+      end
+    end
+    def child_keys(xnode)
+      xnode.children.map(&:name).map(&:to_sym)
+    end
+    # get next action and input using an XNode
+    # @param xnode [XNode] The XNode to use.
+    # @return [Array<String, String>] The action and input.
+    def xn_get_action_and_input(xnode)
+      action = xnode.xtext("//action")
+      action_input = xnode.xtext("//action_input")
+      thought = xnode.xtext("//thought")
+      final_answer = xnode.xtext("//final_answer")
+      # the thought should be the frist line here if it doesn't start with "Action:"
+      Boxcars.debug("Thought: #{thought}", :yellow)
+      if final_answer.present?
+        Result.new(status: :ok, answer: final_answer, explanation: final_answer)
+      else
+        # we have an unexpected output from the engine
+        unless action.present? && action_input.present?
+          return [:error, "You gave me an improperly formatted answer or didn't use tags."]
+        end
+        Boxcars.debug("Action: #{action}\nAction Input: #{action_input}", :yellow)
+        [action, action_input]
+      end
+    end
+    # Parse out the action and input from the engine output.
+    # @param engine_output [String] The output from the engine.
+    # @return [Array<String>] The action and input.
+    def get_action_and_input(engine_output:)
+      xn_get_action_and_input(XNode.from_xml(engine_output))
+    end
+  end
+end

data/lib/boxcars/train/xml_zero_shot.rb ADDED Viewed

@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+require "nokogiri"
+# Agent for the MRKL chain
+module Boxcars
+  # A Train using the zero-shot react method and only XML in the prompt.
+  class XMLZeroShot < XMLTrain
+    attr_reader :boxcars
+    attr_accessor :wants_next_actions
+    # @param boxcars [Array<Boxcars::Boxcar>] The boxcars to run.
+    # @param engine [Boxcars::Engine] The engine to use for this train.
+    # @param name [String] The name of the train. Defaults to 'Zero Shot'.
+    # @param description [String] The description of the train. Defaults to 'Zero Shot Train'.
+    # @param prompt [Boxcars::Prompt] The prompt to use. Defaults to the built-in prompt.
+    # @param kwargs [Hash] Additional arguments to pass to the train. wants_next_actions: true
+    def initialize(boxcars:, engine: nil, name: 'Zero Shot XML', description: 'Zero Shot Train wiht XML', prompt: nil, **kwargs)
+      @engine_prefix = '<thought>'
+      @wants_next_actions = kwargs.fetch(:wants_next_actions, false)
+      prompt ||= my_prompt
+      super(engine: engine, boxcars: boxcars, prompt: prompt, name: name, description: description, **kwargs)
+    end
+    private
+    CTEMPLATE = [
+      syst("<training>Answer the following questions as best you can. You have access to the following tools for actions:\n",
+           "%<boxcars_xml>s",
+           "Use the following format making sure all open tags have closing tags:\n",
+           " <question>the input question you must answer</question>\n",
+           " <thought>you should always think about what to do</thought>\n",
+           " <action>the action to take, from this action list above</action>\n",
+           " <action_input>input to the action</action_input>\n",
+           " <observation>the result of the action</observation>\n",
+           " ... (this Thought/Action/Action Input/Observation sequence can repeat N times)\n",
+           " <thought>I know the final answer</thought>\n",
+           " <final_answer>the final answer to the original input question</final_answer>\n",
+           "-- FORMAT END -\n",
+           "Your answer should always have begin and end tags for each element.\n",
+           "Also make sure to specify a question for the action_input.\n",
+           "Finally, if you can deduct the answer from the question or observation, you can ",
+           "jump to final_answer and give me the answer.\n",
+           "</training>"),
+      hist, # insert thoughts here from previous runs
+      user("<question>%<input>s</question>"),
+      assi("<thought>%<agent_scratchpad>s")
+    ].freeze
+    # The prompt to use for the train.
+    def my_prompt
+      @conversation ||= Conversation.new(lines: CTEMPLATE)
+      @my_prompt ||= ConversationPrompt.new(
+        conversation: @conversation,
+        input_variables: [:input],
+        other_inputs: [:boxcars_xml, :next_actions, :agent_scratchpad],
+        output_variables: [:answer])
+    end
+  end
+end

data/lib/boxcars/train/zero_shot.rb CHANGED Viewed

@@ -4,7 +4,7 @@
 module Boxcars
   # A Train using the zero-shot react method.
   class ZeroShot < Train
-    attr_reader :boxcars, :observation_prefix, :engine_prefix
+    attr_reader :boxcars, :observation_prefix
     attr_accessor :wants_next_actions
     # @param boxcars [Array<Boxcars::Boxcar>] The boxcars to run.
@@ -14,8 +14,6 @@ module Boxcars
     # @param prompt [Boxcars::Prompt] The prompt to use. Defaults to the built-in prompt.
     # @param kwargs [Hash] Additional arguments to pass to the train. wants_next_actions: true
     def initialize(boxcars:, engine: nil, name: 'Zero Shot', description: 'Zero Shot Train', prompt: nil, **kwargs)
-      @observation_prefix = 'Observation: '
-      @engine_prefix = 'Thought:'
       @wants_next_actions = kwargs.fetch(:wants_next_actions, false)
       prompt ||= my_prompt
       super(engine: engine, boxcars: boxcars, prompt: prompt, name: name, description: description, **kwargs)
@@ -31,6 +29,8 @@ module Boxcars
     # @return [Array<Boxcars::Boxcar, String>] The boxcar and input.
     def extract_boxcar_and_input(text)
       get_action_and_input(engine_output: text)
+    rescue StandardError => e
+      [:error, e.message]
     end
     private
@@ -94,22 +94,6 @@ module Boxcars
       assi("Thought: %<agent_scratchpad>s")
     ].freeze
-    def boxcar_names
-      @boxcar_names ||= "[#{boxcars.map(&:name).join(', ')}]"
-    end
-    def boxcar_descriptions
-      @boxcar_descriptions ||= boxcars.map { |boxcar| "#{boxcar.name}: #{boxcar.description}" }.join("\n")
-    end
-    def next_actions
-      if wants_next_actions
-        "Next Actions: Up to 3 logical suggested next questions for the user to ask after getting this answer.\n"
-      else
-        ""
-      end
-    end
     # The prompt to use for the train.
     def my_prompt
       @conversation ||= Conversation.new(lines: CTEMPLATE)

data/lib/boxcars/train.rb CHANGED Viewed

@@ -4,7 +4,8 @@ module Boxcars
   # @abstract
   class Train < EngineBoxcar
     attr_reader :boxcars, :return_values, :return_intermediate_steps,
-                :max_iterations, :early_stopping_method, :name_to_boxcar_map
+                :max_iterations, :early_stopping_method, :name_to_boxcar_map,
+                :observation_prefix, :thought_prefix, :final_answer_prefix, :answer_prefix, :question_prefix, :engine_prefix
     # A Train will use a engine to run a series of boxcars.
     # @param boxcars [Array<Boxcars::Boxcar>] The boxcars to run.
@@ -20,12 +21,21 @@ module Boxcars
       kwargs.delete(:return_intermediate_steps)
       @max_iterations = kwargs.delete(:max_iterations) || 25
       @early_stopping_method = kwargs.delete(:early_stopping_method) || "force"
-      kwargs[:stop] ||= ["\n#{observation_prefix}"]
+      init_prefixes
+      kwargs[:stop] = ["\n#{observation_prefix}"] unless kwargs.key?(:stop)
       super(prompt: prompt, engine: engine, **kwargs)
     end
-    # Extract the boxcar name and input from the text.
+    def init_prefixes
+      @thought_prefix ||= "Thought: "
+      @observation_prefix ||= "Observation: "
+      @final_answer_prefix ||= "Final Answer: "
+      @answer_prefix ||= "Answer:"
+      @question_prefix ||= "Question: "
+    end
+    # Callback to process the action/action input of a train.
     # @param text [String] The text to extract from.
     def extract_boxcar_and_input(text)
       Result.new(status: :ok, answer: text, explanation: engine_output)
@@ -34,16 +44,14 @@ module Boxcars
     # build the scratchpad for the engine
     # @param intermediate_steps [Array] The intermediate steps to build the scratchpad from.
     # @return [String] The scratchpad.
-    # rubocop:disable Lint/RedundantStringCoercion
     def construct_scratchpad(intermediate_steps)
       thoughts = ""
       intermediate_steps.each do |action, observation|
         thoughts += action.is_a?(String) ? action : " #{action.log}"
-        thoughts += "\n#{observation_prefix}#{observation.to_s}\n#{engine_prefix}"
+        thoughts += "\n#{observation_text(observation)}\n#{engine_prefix}"
       end
       thoughts
     end
-    # rubocop:enable Lint/RedundantStringCoercion
     # determine the next action
     # @param full_inputs [Hash] The inputs to the engine.
@@ -91,9 +99,7 @@ module Boxcars
     # the input keys
     # @return [Array<Symbol>] The input keys.
     def input_keys
-      list = prompt.input_variables
-      list.delete(:agent_scratchpad)
-      list
+      prompt.input_variables - [:agent_scratchpad]
     end
     # the output keys
@@ -123,13 +129,6 @@ module Boxcars
       final_output
     end
-    # the prefix for the engine
-    # @param return_direct [Boolean] Whether to return directly.
-    # @return [String] The prefix.
-    def engine_prefix(return_direct)
-      return_direct ? "" : engine_prefix
-    end
     # validate the prompt
     # @param values [Hash] The values to validate.
     # @return [Hash] The validated values.
@@ -162,7 +161,7 @@ module Boxcars
         thoughts = ""
         intermediate_steps.each do |action, observation|
           thoughts += action.log
-          thoughts += "\n#{observation_prefix}#{observation}\n#{engine_prefix}"
+          thoughts += "\n#{observation_text(observation)}\n#{engine_prefix}"
         end
         thoughts += "\n\nI now need to return a final answer based on the previous steps:"
         new_inputs = { agent_scratchpad: thoughts, stop: _stop }
@@ -173,6 +172,7 @@ module Boxcars
           TrainFinish.new({ output: full_output }, full_output)
         else
           boxcar, boxcar_input = parsed_output
+          Boxcars.debug "Got boxcar #{boxcar} and input #{boxcar_input}"
           if boxcar == finish_boxcar_name
             TrainFinish.new({ output: boxcar_input }, full_output)
           else
@@ -225,9 +225,46 @@ module Boxcars
       output = return_stopped_response(early_stopping_method, intermediate_steps, **inputs)
       pre_return(output, intermediate_steps)
     end
+    def key_and_value_text(key, value)
+      value = value.to_s
+      if key =~ /^<(?<tag_name>[[:word:]]+)>$/
+        # we need a close tag too
+        "#{key}#{value}</#{Regexp.last_match[:tag_name]}>"
+      else
+        "#{key}#{value}"
+      end
+    end
+    # this is for the scratchpad
+    def observation_text(observation)
+      key_and_value_text(observation_prefix, observation)
+    end
+    def question_text(question)
+      key_and_value_text(question_prefix, question)
+    end
+    def boxcar_names
+      @boxcar_names ||= boxcars.map(&:name).join(', ')
+    end
+    def boxcar_descriptions
+      @boxcar_descriptions ||= boxcars.map { |boxcar| "#{boxcar.name}: #{boxcar.description}" }.join("\n")
+    end
+    def next_actions
+      if wants_next_actions
+        "Next Actions: Up to 3 logical suggested next questions for the user to ask after getting this answer.\n"
+      else
+        ""
+      end
+    end
   end
 end
 require "boxcars/train/train_action"
 require "boxcars/train/train_finish"
 require "boxcars/train/zero_shot"
+require "boxcars/train/xml_train"
+require "boxcars/train/xml_zero_shot"

data/lib/boxcars/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 module Boxcars
   # The current version of the gem.
-  VERSION = "0.3.1"
+  VERSION = "0.3.2"
 end

data/lib/boxcars/x_node.rb ADDED Viewed

@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+require 'nokogiri'
+module Boxcars
+  class XNode
+    attr_accessor :node, :children, :attributes
+    def initialize(node)
+      @node = node
+      @valid_names = []
+      @children = {}
+      # @attributes = node.attributes.transform_values(&:value)
+      @attributes = node.attributes.values.to_h { |a| [a.name.to_sym, a.value] }
+      node.children.each do |child|
+        next if child.text?
+        child_node = XNode.new(child)
+        if @children[child.name].nil?
+          @valid_names << child.name.to_sym
+          @children[child.name] = child_node
+        elsif @children[child.name].is_a?(Array)
+          @children[child.name] << child_node
+        else
+          @children[child.name] = [@children[child.name], child_node]
+        end
+      end
+    end
+    def self.from_xml(xml)
+      doc = Nokogiri::XML.parse(xml)
+      raise XmlError, "XML is not valid: #{doc.errors.map { |e| "#{e.line}:#{e.column} #{e.message}" }}" if doc.errors.any?
+      XNode.new(doc.root)
+    end
+    def xml
+      @node.to_xml
+    end
+    def text
+      @node.text
+    end
+    def xpath(path)
+      @node.xpath(path)
+    end
+    def xtext(path)
+      rv = xpath(path)&.text&.gsub(/[[:space:]]+/, " ")&.strip
+      return nil if rv.empty?
+      rv
+    end
+    def stext
+      @stext ||= text.gsub(/[[:space:]]+/, " ").strip # remove extra spaces
+    end
+    def [](key)
+      @children[key.to_s]
+    end
+    def method_missing(name, *args)
+      return @children[name.to_s] if @children.key?(name.to_s)
+      super
+    end
+    def respond_to_missing?(method_name, include_private = false)
+      @valid_names.include?(method) || super
+    end
+  end
+end

data/lib/boxcars.rb CHANGED Viewed

@@ -22,6 +22,9 @@ module Boxcars
   # Error class for all Boxcars key errors.
   class KeyError < Error; end
+  # Error class for all Boxcars XML errors.
+  class XmlError < Error; end
   # Configuration contains gem settings
   class Configuration
     attr_writer :openai_access_token, :serpapi_api_key
@@ -179,6 +182,7 @@ module Boxcars
 end
 require "boxcars/version"
+require "boxcars/x_node"
 require "boxcars/prompt"
 require "boxcars/conversation_prompt"
 require "boxcars/conversation"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: boxcars
 version: !ruby/object:Gem::Version
-  version: 0.3.1
+  version: 0.3.2
 platform: ruby
 authors:
 - Francis Sullivan
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-07-01 00:00:00.000000000 Z
+date: 2023-07-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: google_search_results
@@ -53,6 +53,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.8'
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.15'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.15'
 - !ruby/object:Gem::Dependency
   name: pgvector
   requirement: !ruby/object:Gem::Requirement
@@ -113,6 +127,7 @@ files:
 - lib/boxcars/boxcar/sql_base.rb
 - lib/boxcars/boxcar/sql_sequel.rb
 - lib/boxcars/boxcar/swagger.rb
+- lib/boxcars/boxcar/url_text.rb
 - lib/boxcars/boxcar/vector_answer.rb
 - lib/boxcars/boxcar/wikipedia_search.rb
 - lib/boxcars/conversation.rb
@@ -129,6 +144,8 @@ files:
 - lib/boxcars/train.rb
 - lib/boxcars/train/train_action.rb
 - lib/boxcars/train/train_finish.rb
+- lib/boxcars/train/xml_train.rb
+- lib/boxcars/train/xml_zero_shot.rb
 - lib/boxcars/train/zero_shot.rb
 - lib/boxcars/vector_search.rb
 - lib/boxcars/vector_store.rb
@@ -148,6 +165,7 @@ files:
 - lib/boxcars/vector_store/pgvector/search.rb
 - lib/boxcars/vector_store/split_text.rb
 - lib/boxcars/version.rb
+- lib/boxcars/x_node.rb
 homepage: https://github.com/BoxcarsAI/boxcars
 licenses:
 - MIT