saxxy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +22 -0
 - data/.travis.yml +5 -0
 - data/Gemfile +13 -0
 - data/LICENSE +22 -0
 - data/README.md +117 -0
 - data/Rakefile +12 -0
 - data/lib/saxxy.rb +2 -0
 - data/lib/saxxy/activatable.rb +160 -0
 - data/lib/saxxy/callbacks/libxml.rb +26 -0
 - data/lib/saxxy/callbacks/nokogiri.rb +30 -0
 - data/lib/saxxy/callbacks/ox.rb +66 -0
 - data/lib/saxxy/callbacks/sax.rb +86 -0
 - data/lib/saxxy/context.rb +88 -0
 - data/lib/saxxy/context_tree.rb +85 -0
 - data/lib/saxxy/event.rb +83 -0
 - data/lib/saxxy/event_registry.rb +122 -0
 - data/lib/saxxy/node_action.rb +59 -0
 - data/lib/saxxy/node_rule.rb +90 -0
 - data/lib/saxxy/parsers/base.rb +28 -0
 - data/lib/saxxy/parsers/libxml.rb +52 -0
 - data/lib/saxxy/parsers/nokogiri.rb +28 -0
 - data/lib/saxxy/parsers/ox.rb +30 -0
 - data/lib/saxxy/service.rb +47 -0
 - data/lib/saxxy/utils/agent.rb +66 -0
 - data/lib/saxxy/utils/callback_array.rb +27 -0
 - data/lib/saxxy/utils/helpers.rb +13 -0
 - data/lib/saxxy/version.rb +3 -0
 - data/saxxy.gemspec +21 -0
 - data/spec/saxxy/activatable_spec.rb +344 -0
 - data/spec/saxxy/callbacks/sax_spec.rb +456 -0
 - data/spec/saxxy/context_spec.rb +51 -0
 - data/spec/saxxy/context_tree_spec.rb +68 -0
 - data/spec/saxxy/event_registry_spec.rb +137 -0
 - data/spec/saxxy/event_spec.rb +49 -0
 - data/spec/saxxy/node_action_spec.rb +46 -0
 - data/spec/saxxy/node_rule_spec.rb +99 -0
 - data/spec/saxxy/parsers/libxml_spec.rb +104 -0
 - data/spec/saxxy/parsers/nokogiri_spec.rb +200 -0
 - data/spec/saxxy/parsers/ox_spec.rb +175 -0
 - data/spec/saxxy/utils/agent_spec.rb +63 -0
 - data/spec/spec_helper.rb +28 -0
 - data/spec/support/agent_macros.rb +24 -0
 - metadata +155 -0
 
| 
         @@ -0,0 +1,59 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Saxxy
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              ##
         
     | 
| 
      
 4 
     | 
    
         
            +
              # @author rubymaniac
         
     | 
| 
      
 5 
     | 
    
         
            +
              #
         
     | 
| 
      
 6 
     | 
    
         
            +
              # NodeAction describes something that should be run on a
         
     | 
| 
      
 7 
     | 
    
         
            +
              # node. In order to check whether to run this action it
         
     | 
| 
      
 8 
     | 
    
         
            +
              # accepts as the first argument an activation_rule.
         
     | 
| 
      
 9 
     | 
    
         
            +
              #
         
     | 
| 
      
 10 
     | 
    
         
            +
              #
         
     | 
| 
      
 11 
     | 
    
         
            +
              # @!attribute [r] activation_rule
         
     | 
| 
      
 12 
     | 
    
         
            +
              #   @return [Context] this action's activation rule
         
     | 
| 
      
 13 
     | 
    
         
            +
              #
         
     | 
| 
      
 14 
     | 
    
         
            +
              # @!attribute [r] action
         
     | 
| 
      
 15 
     | 
    
         
            +
              #   @return [Proc] the block of code that will run on a node
         
     | 
| 
      
 16 
     | 
    
         
            +
              ##
         
     | 
| 
      
 17 
     | 
    
         
            +
              class NodeAction
         
     | 
| 
      
 18 
     | 
    
         
            +
                attr_reader :activation_rule, :action
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                # Initializes a NodeAction with an `activation_rule` a context to run
         
     | 
| 
      
 21 
     | 
    
         
            +
                # its action (block) and the block.
         
     | 
| 
      
 22 
     | 
    
         
            +
                #
         
     | 
| 
      
 23 
     | 
    
         
            +
                # @param activation_rule [NodeRule] an instance of NodeRule
         
     | 
| 
      
 24 
     | 
    
         
            +
                #   used to check whether to run this action on a node
         
     | 
| 
      
 25 
     | 
    
         
            +
                #
         
     | 
| 
      
 26 
     | 
    
         
            +
                # @param context [Object] a context (object) on which the block
         
     | 
| 
      
 27 
     | 
    
         
            +
                #   will be evaluated
         
     | 
| 
      
 28 
     | 
    
         
            +
                #
         
     | 
| 
      
 29 
     | 
    
         
            +
                # @param block [Proc] a block that will get evaluated on context
         
     | 
| 
      
 30 
     | 
    
         
            +
                #
         
     | 
| 
      
 31 
     | 
    
         
            +
                def initialize(activation_rule, context = self, &block)
         
     | 
| 
      
 32 
     | 
    
         
            +
                  @activation_rule = activation_rule
         
     | 
| 
      
 33 
     | 
    
         
            +
                  @ctx = context
         
     | 
| 
      
 34 
     | 
    
         
            +
                  @action = block_given? ? block : ->(e) { e }
         
     | 
| 
      
 35 
     | 
    
         
            +
                end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                # Delegates the call to its `activation_rule`
         
     | 
| 
      
 38 
     | 
    
         
            +
                #
         
     | 
| 
      
 39 
     | 
    
         
            +
                # @param element_name [String] the name of a node
         
     | 
| 
      
 40 
     | 
    
         
            +
                #
         
     | 
| 
      
 41 
     | 
    
         
            +
                # @param attributes [Hash<String, String>] the attributes of a node
         
     | 
| 
      
 42 
     | 
    
         
            +
                #
         
     | 
| 
      
 43 
     | 
    
         
            +
                # @return [Boolean] whether it matches the node
         
     | 
| 
      
 44 
     | 
    
         
            +
                #
         
     | 
| 
      
 45 
     | 
    
         
            +
                def matches(element_name, attributes)
         
     | 
| 
      
 46 
     | 
    
         
            +
                  activation_rule.matches(element_name, attributes)
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                # Evaluates the block that was given to the constructor on the context
         
     | 
| 
      
 50 
     | 
    
         
            +
                # and passes the arguments to the block
         
     | 
| 
      
 51 
     | 
    
         
            +
                #
         
     | 
| 
      
 52 
     | 
    
         
            +
                # @param args [Array] variable arguments that pass to the block
         
     | 
| 
      
 53 
     | 
    
         
            +
                #
         
     | 
| 
      
 54 
     | 
    
         
            +
                def call(*args)
         
     | 
| 
      
 55 
     | 
    
         
            +
                  @ctx.instance_exec(args, &action)
         
     | 
| 
      
 56 
     | 
    
         
            +
                end
         
     | 
| 
      
 57 
     | 
    
         
            +
              end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,90 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Saxxy
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              ##
         
     | 
| 
      
 4 
     | 
    
         
            +
              # @author rubymaniac
         
     | 
| 
      
 5 
     | 
    
         
            +
              #
         
     | 
| 
      
 6 
     | 
    
         
            +
              # NodeRule describes a rule that will be tested upon an XML node
         
     | 
| 
      
 7 
     | 
    
         
            +
              # and will check if the node satisfies this NodeRule.
         
     | 
| 
      
 8 
     | 
    
         
            +
              #
         
     | 
| 
      
 9 
     | 
    
         
            +
              # The NodeRule consists of two parts. The `element` part which
         
     | 
| 
      
 10 
     | 
    
         
            +
              # refers to what should hold for the node's name. It can be
         
     | 
| 
      
 11 
     | 
    
         
            +
              # either a String (where the strict equality is should hold) or
         
     | 
| 
      
 12 
     | 
    
         
            +
              # a Regexp (where the Regexp must match the node name).
         
     | 
| 
      
 13 
     | 
    
         
            +
              #
         
     | 
| 
      
 14 
     | 
    
         
            +
              # The other part is the `attributes` part which refers to what
         
     | 
| 
      
 15 
     | 
    
         
            +
              # should hold for the attributes of the node. It consists of key-value
         
     | 
| 
      
 16 
     | 
    
         
            +
              # pairs where the key is the attribute to check and the value is what
         
     | 
| 
      
 17 
     | 
    
         
            +
              # should hold for that attribute.
         
     | 
| 
      
 18 
     | 
    
         
            +
              #
         
     | 
| 
      
 19 
     | 
    
         
            +
              # @!attribute [r] element
         
     | 
| 
      
 20 
     | 
    
         
            +
              #   @return [String|Regexp] node's name rule
         
     | 
| 
      
 21 
     | 
    
         
            +
              #
         
     | 
| 
      
 22 
     | 
    
         
            +
              # @!attribute [r] attributes
         
     | 
| 
      
 23 
     | 
    
         
            +
              #   @return [Hash<String, String|Regexp>] node's attributes rule
         
     | 
| 
      
 24 
     | 
    
         
            +
              ##
         
     | 
| 
      
 25 
     | 
    
         
            +
              class NodeRule
         
     | 
| 
      
 26 
     | 
    
         
            +
                attr_reader :element, :attributes
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                # Initializes a NodeRule with an `element` part and an `attributes` part.
         
     | 
| 
      
 29 
     | 
    
         
            +
                #
         
     | 
| 
      
 30 
     | 
    
         
            +
                # @param element [String|Regexp] what should hold for the node name
         
     | 
| 
      
 31 
     | 
    
         
            +
                # @param attributes [Hash<String, String|Regexp>]
         
     | 
| 
      
 32 
     | 
    
         
            +
                #   what should hold for node's attributes
         
     | 
| 
      
 33 
     | 
    
         
            +
                #
         
     | 
| 
      
 34 
     | 
    
         
            +
                def initialize(element, attributes = {})
         
     | 
| 
      
 35 
     | 
    
         
            +
                  @element = element
         
     | 
| 
      
 36 
     | 
    
         
            +
                  @attributes = Saxxy::Helpers.stringify_keys(attributes)
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                # Checks whether this NodeRule matches a node.
         
     | 
| 
      
 40 
     | 
    
         
            +
                #
         
     | 
| 
      
 41 
     | 
    
         
            +
                # @param element_name [String] node's name
         
     | 
| 
      
 42 
     | 
    
         
            +
                # @param attrs [Hash<String, String>] node's attributes
         
     | 
| 
      
 43 
     | 
    
         
            +
                #
         
     | 
| 
      
 44 
     | 
    
         
            +
                # @return [Boolean] whether this NodeRule matches the node
         
     | 
| 
      
 45 
     | 
    
         
            +
                #
         
     | 
| 
      
 46 
     | 
    
         
            +
                def matches(element_name, attrs = {})
         
     | 
| 
      
 47 
     | 
    
         
            +
                  match_element_name(element_name) && match_attributes(attrs)
         
     | 
| 
      
 48 
     | 
    
         
            +
                end
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                # Checks whether this NodeRule is equal to another.
         
     | 
| 
      
 51 
     | 
    
         
            +
                #
         
     | 
| 
      
 52 
     | 
    
         
            +
                # @param rule [NodeRule] the other NodeRule
         
     | 
| 
      
 53 
     | 
    
         
            +
                #
         
     | 
| 
      
 54 
     | 
    
         
            +
                # @return [Boolean] whether this NodeRule equals rule
         
     | 
| 
      
 55 
     | 
    
         
            +
                #
         
     | 
| 
      
 56 
     | 
    
         
            +
                def equals(rule)
         
     | 
| 
      
 57 
     | 
    
         
            +
                  element == rule.element && attributes == rule.attributes
         
     | 
| 
      
 58 
     | 
    
         
            +
                end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                # Checks whether this NodeRule matches only the name of a node.
         
     | 
| 
      
 61 
     | 
    
         
            +
                #
         
     | 
| 
      
 62 
     | 
    
         
            +
                # @param element_name [String] node's name
         
     | 
| 
      
 63 
     | 
    
         
            +
                #
         
     | 
| 
      
 64 
     | 
    
         
            +
                # @return [Boolean] whether this NodeRule matches node's name
         
     | 
| 
      
 65 
     | 
    
         
            +
                #
         
     | 
| 
      
 66 
     | 
    
         
            +
                def match_element_name(element_name)
         
     | 
| 
      
 67 
     | 
    
         
            +
                  match(element, element_name)
         
     | 
| 
      
 68 
     | 
    
         
            +
                end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
                # Checks whether this NodeRule matches only the attributes of a node.
         
     | 
| 
      
 71 
     | 
    
         
            +
                #
         
     | 
| 
      
 72 
     | 
    
         
            +
                # @param attrs [Hash<String, String>] node's attributes
         
     | 
| 
      
 73 
     | 
    
         
            +
                #
         
     | 
| 
      
 74 
     | 
    
         
            +
                # @return [Boolean] whether this NodeRule matches node's attributes
         
     | 
| 
      
 75 
     | 
    
         
            +
                #
         
     | 
| 
      
 76 
     | 
    
         
            +
                def match_attributes(attrs)
         
     | 
| 
      
 77 
     | 
    
         
            +
                  attrs = Saxxy::Helpers.stringify_keys(attrs)
         
     | 
| 
      
 78 
     | 
    
         
            +
                  attributes.reduce(true) do |b, (k, v)|
         
     | 
| 
      
 79 
     | 
    
         
            +
                    value = attrs[k]
         
     | 
| 
      
 80 
     | 
    
         
            +
                    b && ((!value.nil? && match(v, value)) || (v.nil? && value.nil?))
         
     | 
| 
      
 81 
     | 
    
         
            +
                  end
         
     | 
| 
      
 82 
     | 
    
         
            +
                end
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                private
         
     | 
| 
      
 85 
     | 
    
         
            +
                def match(obj, value)
         
     | 
| 
      
 86 
     | 
    
         
            +
                  obj.is_a?(Regexp) ? !obj.match(value).nil? : obj == value
         
     | 
| 
      
 87 
     | 
    
         
            +
                end
         
     | 
| 
      
 88 
     | 
    
         
            +
              end
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Saxxy
         
     | 
| 
      
 2 
     | 
    
         
            +
              module Parsers
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
                class NotImplemented < StandardError; end
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                class Base
         
     | 
| 
      
 7 
     | 
    
         
            +
                  attr_reader :context_tree, :options
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                  def initialize(context_tree, options = {})
         
     | 
| 
      
 10 
     | 
    
         
            +
                    @context_tree = context_tree
         
     | 
| 
      
 11 
     | 
    
         
            +
                    @options = options
         
     | 
| 
      
 12 
     | 
    
         
            +
                  end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                  def parse_file(path_to_file)
         
     | 
| 
      
 15 
     | 
    
         
            +
                    raise NotImplemented
         
     | 
| 
      
 16 
     | 
    
         
            +
                  end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                  def parse_string(string)
         
     | 
| 
      
 19 
     | 
    
         
            +
                    raise NotImplemented
         
     | 
| 
      
 20 
     | 
    
         
            +
                  end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                  def parse_io(io)
         
     | 
| 
      
 23 
     | 
    
         
            +
                    raise NotImplemented
         
     | 
| 
      
 24 
     | 
    
         
            +
                  end
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,52 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "saxxy/parsers/base"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "saxxy/callbacks/libxml"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module Saxxy
         
     | 
| 
      
 6 
     | 
    
         
            +
              module Parsers
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                class Libxml < Base
         
     | 
| 
      
 9 
     | 
    
         
            +
                  def initialize(context_tree, options = {})
         
     | 
| 
      
 10 
     | 
    
         
            +
                    super
         
     | 
| 
      
 11 
     | 
    
         
            +
                    @__internal_context_options =
         
     | 
| 
      
 12 
     | 
    
         
            +
                      case options[:mode]
         
     | 
| 
      
 13 
     | 
    
         
            +
                      when :html, nil
         
     | 
| 
      
 14 
     | 
    
         
            +
                        LibXML::XML::Parser::Options::RECOVER |
         
     | 
| 
      
 15 
     | 
    
         
            +
                        LibXML::XML::Parser::Options::NOERROR |
         
     | 
| 
      
 16 
     | 
    
         
            +
                        LibXML::XML::Parser::Options::NOWARNING |
         
     | 
| 
      
 17 
     | 
    
         
            +
                        LibXML::XML::Parser::Options::NONET
         
     | 
| 
      
 18 
     | 
    
         
            +
                      when :xml
         
     | 
| 
      
 19 
     | 
    
         
            +
                        LibXML::XML::Parser::Options::RECOVER |
         
     | 
| 
      
 20 
     | 
    
         
            +
                        LibXML::XML::Parser::Options::NONET
         
     | 
| 
      
 21 
     | 
    
         
            +
                      end
         
     | 
| 
      
 22 
     | 
    
         
            +
                  end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                  def parse_string(string, encoding = LibXML::XML::Encoding::UTF_8)
         
     | 
| 
      
 25 
     | 
    
         
            +
                    parse_with LibXML::XML::SaxParser.new(build_context(:string, string, encoding))
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                  def parse_file(path_to_file, encoding = LibXML::XML::Encoding::UTF_8)
         
     | 
| 
      
 29 
     | 
    
         
            +
                    parse_with LibXML::XML::SaxParser.new(build_context(:file, path_to_file, encoding))
         
     | 
| 
      
 30 
     | 
    
         
            +
                  end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                  def parse_io(io, encoding = LibXML::XML::Encoding::UTF_8)
         
     | 
| 
      
 33 
     | 
    
         
            +
                    parse_with LibXML::XML::SaxParser.new(build_context(:io, io, encoding))
         
     | 
| 
      
 34 
     | 
    
         
            +
                  end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                  private
         
     | 
| 
      
 37 
     | 
    
         
            +
                  def build_context(method, obj, encoding)
         
     | 
| 
      
 38 
     | 
    
         
            +
                    LibXML::XML::Parser::Context.public_send(method, obj).tap do |ctx|
         
     | 
| 
      
 39 
     | 
    
         
            +
                      ctx.options = @__internal_context_options
         
     | 
| 
      
 40 
     | 
    
         
            +
                      ctx.encoding = encoding
         
     | 
| 
      
 41 
     | 
    
         
            +
                      ctx.recovery = true
         
     | 
| 
      
 42 
     | 
    
         
            +
                    end
         
     | 
| 
      
 43 
     | 
    
         
            +
                  end
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                  def parse_with(parser)
         
     | 
| 
      
 46 
     | 
    
         
            +
                    parser.callbacks = Saxxy::Callbacks::Libxml.new(context_tree.root)
         
     | 
| 
      
 47 
     | 
    
         
            +
                    parser.parse
         
     | 
| 
      
 48 
     | 
    
         
            +
                  end
         
     | 
| 
      
 49 
     | 
    
         
            +
                end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
              end
         
     | 
| 
      
 52 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "saxxy/parsers/base"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "saxxy/callbacks/nokogiri"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module Saxxy
         
     | 
| 
      
 6 
     | 
    
         
            +
              module Parsers
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                class Nokogiri < Base
         
     | 
| 
      
 9 
     | 
    
         
            +
                  def parse_string(string, encoding = 'UTF-8', &block)
         
     | 
| 
      
 10 
     | 
    
         
            +
                    new_parser.parse_memory(string, encoding, &block)
         
     | 
| 
      
 11 
     | 
    
         
            +
                  end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                  def parse_file(path_to_file, encoding = 'UTF-8', &block)
         
     | 
| 
      
 14 
     | 
    
         
            +
                    new_parser.parse_file(path_to_file, encoding, &block)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                  def parse_io(io, encoding = 'UTF-8', &block)
         
     | 
| 
      
 18 
     | 
    
         
            +
                    new_parser.parse_io(io, encoding, &block)
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  private
         
     | 
| 
      
 22 
     | 
    
         
            +
                  def new_parser
         
     | 
| 
      
 23 
     | 
    
         
            +
                    ::Nokogiri::HTML::SAX::Parser.new(Saxxy::Callbacks::Nokogiri.new(context_tree.root))
         
     | 
| 
      
 24 
     | 
    
         
            +
                  end
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "saxxy/parsers/base"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "saxxy/callbacks/ox"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module Saxxy
         
     | 
| 
      
 6 
     | 
    
         
            +
              module Parsers
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                class Ox < Base
         
     | 
| 
      
 9 
     | 
    
         
            +
                  def parse_string(string, encoding = nil)
         
     | 
| 
      
 10 
     | 
    
         
            +
                    parse(StringIO.new(string), encoding)
         
     | 
| 
      
 11 
     | 
    
         
            +
                  end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                  def parse_file(path_to_file, encoding = nil)
         
     | 
| 
      
 14 
     | 
    
         
            +
                    parse(File.new(path_to_file), encoding)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                  def parse_io(io, encoding = nil)
         
     | 
| 
      
 18 
     | 
    
         
            +
                    parse(io, encoding)
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  private
         
     | 
| 
      
 22 
     | 
    
         
            +
                  def parse(io, encoding)
         
     | 
| 
      
 23 
     | 
    
         
            +
                    io.set_encoding(encoding) if encoding
         
     | 
| 
      
 24 
     | 
    
         
            +
                    callbacks = Saxxy::Callbacks::Ox.new(context_tree.root)
         
     | 
| 
      
 25 
     | 
    
         
            +
                    ::Ox.sax_parse(callbacks, io, {smart: true}.merge(options))
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
      
 27 
     | 
    
         
            +
                end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
              end
         
     | 
| 
      
 30 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,47 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "saxxy/context_tree"
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            module Saxxy
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
              module Parsers
         
     | 
| 
      
 7 
     | 
    
         
            +
                autoload :Nokogiri, "saxxy/parsers/nokogiri"
         
     | 
| 
      
 8 
     | 
    
         
            +
                autoload :Ox, "saxxy/parsers/ox"
         
     | 
| 
      
 9 
     | 
    
         
            +
                autoload :Libxml, "saxxy/parsers/libxml"
         
     | 
| 
      
 10 
     | 
    
         
            +
              end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
              class Service
         
     | 
| 
      
 13 
     | 
    
         
            +
                attr_reader :parser
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                def initialize(parser, options = {}, &block)
         
     | 
| 
      
 16 
     | 
    
         
            +
                  @parser = build_parser(parser, options, &block)
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                def parse_file(*args)
         
     | 
| 
      
 20 
     | 
    
         
            +
                  @parser.parse_file(*args)
         
     | 
| 
      
 21 
     | 
    
         
            +
                end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                def parse_string(*args)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  @parser.parse_string(*args)
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                def parse_io(*args)
         
     | 
| 
      
 28 
     | 
    
         
            +
                  @parser.parse_io(*args)
         
     | 
| 
      
 29 
     | 
    
         
            +
                end
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                private
         
     | 
| 
      
 32 
     | 
    
         
            +
                def build_parser(parser, options, &block)
         
     | 
| 
      
 33 
     | 
    
         
            +
                  ctx = eval("self", block.binding)
         
     | 
| 
      
 34 
     | 
    
         
            +
                  parser_class_from(parser).new(Saxxy::ContextTree.new(ctx, &block), options)
         
     | 
| 
      
 35 
     | 
    
         
            +
                end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                def parser_class_from(obj)
         
     | 
| 
      
 38 
     | 
    
         
            +
                  case obj
         
     | 
| 
      
 39 
     | 
    
         
            +
                  when Symbol, String
         
     | 
| 
      
 40 
     | 
    
         
            +
                    Saxxy::Parsers.const_get(Saxxy::Helpers.camelize(obj))
         
     | 
| 
      
 41 
     | 
    
         
            +
                  else
         
     | 
| 
      
 42 
     | 
    
         
            +
                    obj
         
     | 
| 
      
 43 
     | 
    
         
            +
                  end
         
     | 
| 
      
 44 
     | 
    
         
            +
                end
         
     | 
| 
      
 45 
     | 
    
         
            +
              end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,66 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "net/http"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "uri"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module Saxxy
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
              # The Agent is a thin wrapper over Net::HTTP::Proxy in order to be used
         
     | 
| 
      
 8 
     | 
    
         
            +
              # for crawling purposes. Supports GET and POST via its get and post methods.
         
     | 
| 
      
 9 
     | 
    
         
            +
              class Agent
         
     | 
| 
      
 10 
     | 
    
         
            +
                attr_reader :url, :uri, :proxy, :agent, :response
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                # Initializes an agent with optional proxy options.
         
     | 
| 
      
 13 
     | 
    
         
            +
                # Url: A string that it is the url that the agent is going to use for issuing
         
     | 
| 
      
 14 
     | 
    
         
            +
                #      requests. It can be reset to another via the self.uri = method.
         
     | 
| 
      
 15 
     | 
    
         
            +
                # Options:
         
     | 
| 
      
 16 
     | 
    
         
            +
                # - proxy:
         
     | 
| 
      
 17 
     | 
    
         
            +
                #   - address: The address of the proxy.
         
     | 
| 
      
 18 
     | 
    
         
            +
                #   - port: The port the proxy will use.
         
     | 
| 
      
 19 
     | 
    
         
            +
                #   - username: The username if the proxy needs auth.
         
     | 
| 
      
 20 
     | 
    
         
            +
                #   - password: The password if the proxy needs auth.
         
     | 
| 
      
 21 
     | 
    
         
            +
                def initialize(url, opts = {})
         
     | 
| 
      
 22 
     | 
    
         
            +
                  @proxy = opts[:proxy] || {}
         
     | 
| 
      
 23 
     | 
    
         
            +
                  @agent = proxy.empty? ? Net::HTTP : Net::HTTP::Proxy(proxy[:address], proxy[:port], proxy[:username], proxy[:password])
         
     | 
| 
      
 24 
     | 
    
         
            +
                  self.uri = url
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                # Sets the url and uri by inspecting the argument. Can accept either a string
         
     | 
| 
      
 28 
     | 
    
         
            +
                # which must be a valid URL or a URI object.
         
     | 
| 
      
 29 
     | 
    
         
            +
                def uri=(url_or_uri)
         
     | 
| 
      
 30 
     | 
    
         
            +
                  @uri = url_or_uri.is_a?(URI) ? url_or_uri : URI(url_or_uri)
         
     | 
| 
      
 31 
     | 
    
         
            +
                  @url = uri.to_s
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                # Issues a get request either by using the url provided as an argument or
         
     | 
| 
      
 35 
     | 
    
         
            +
                # the one the agent currently holds.
         
     | 
| 
      
 36 
     | 
    
         
            +
                # Note: if the provided url is different from the agent's it updates the
         
     | 
| 
      
 37 
     | 
    
         
            +
                #       agent's url also. See set_uri_for.
         
     | 
| 
      
 38 
     | 
    
         
            +
                def get(url = nil)
         
     | 
| 
      
 39 
     | 
    
         
            +
                  issue_request(url, :get_response)
         
     | 
| 
      
 40 
     | 
    
         
            +
                end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                # Issues a post request either by using the url provided as an argument or
         
     | 
| 
      
 43 
     | 
    
         
            +
                # the one the agent currently holds. Uses the post_form method of the
         
     | 
| 
      
 44 
     | 
    
         
            +
                # Net::HTTP::Proxy and forwards any passed options to the underlying agent.
         
     | 
| 
      
 45 
     | 
    
         
            +
                # Note: if the provided url is different from the agent's it updates the
         
     | 
| 
      
 46 
     | 
    
         
            +
                #       agent's url also. See set_uri_for.
         
     | 
| 
      
 47 
     | 
    
         
            +
                def post(url = nil, opts = {})
         
     | 
| 
      
 48 
     | 
    
         
            +
                  issue_request(url, :post_form, opts)
         
     | 
| 
      
 49 
     | 
    
         
            +
                end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                private
         
     | 
| 
      
 52 
     | 
    
         
            +
                def set_uri_for(url = nil)
         
     | 
| 
      
 53 
     | 
    
         
            +
                  self.uri = url if url
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                def issue_request(*args)
         
     | 
| 
      
 57 
     | 
    
         
            +
                  new_url_or_uri = args.shift
         
     | 
| 
      
 58 
     | 
    
         
            +
                  if new_url_or_uri.to_s != url
         
     | 
| 
      
 59 
     | 
    
         
            +
                    set_uri_for(new_url_or_uri)
         
     | 
| 
      
 60 
     | 
    
         
            +
                    @response = agent.public_send(args.shift, uri, *args)
         
     | 
| 
      
 61 
     | 
    
         
            +
                  end
         
     | 
| 
      
 62 
     | 
    
         
            +
                  response.body
         
     | 
| 
      
 63 
     | 
    
         
            +
                end
         
     | 
| 
      
 64 
     | 
    
         
            +
              end
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,27 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Saxxy
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              class CallbackArray < Array
         
     | 
| 
      
 4 
     | 
    
         
            +
                def <<(obj)
         
     | 
| 
      
 5 
     | 
    
         
            +
                  super(obj)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  @add_callback.call(obj) if @add_callback
         
     | 
| 
      
 7 
     | 
    
         
            +
                  self
         
     | 
| 
      
 8 
     | 
    
         
            +
                end
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                def >>(obj)
         
     | 
| 
      
 11 
     | 
    
         
            +
                  delete(obj)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @remove_callback.call(obj) if @remove_callback
         
     | 
| 
      
 13 
     | 
    
         
            +
                  self
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                def on_remove(&block)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  @remove_callback = block
         
     | 
| 
      
 18 
     | 
    
         
            +
                  self
         
     | 
| 
      
 19 
     | 
    
         
            +
                end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                def on_add(&block)
         
     | 
| 
      
 22 
     | 
    
         
            +
                  @add_callback = block
         
     | 
| 
      
 23 
     | 
    
         
            +
                  self
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
              end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            end
         
     |