RubyGems - imw - Versions diffs - 0.1.0 → 0.1.1 - Mend

imw 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

data/README.rdoc +194 -31
data/VERSION +1 -1
data/bin/imw +5 -0
data/lib/imw/boot.rb +0 -15
data/lib/imw/dataset/paths.rb +38 -0
data/lib/imw/dataset/task.rb +21 -18
data/lib/imw/dataset/workflow.rb +126 -65
data/lib/imw/dataset.rb +56 -82
data/lib/imw/files/basicfile.rb +3 -3
data/lib/imw/files/compressed_files_and_archives.rb +23 -37
data/lib/imw/files/csv.rb +2 -1
data/lib/imw/files/directory.rb +62 -0
data/lib/imw/files/excel.rb +84 -0
data/lib/imw/files/sgml.rb +4 -23
data/lib/imw/files.rb +62 -47
data/lib/imw/packagers/archiver.rb +19 -1
data/lib/imw/packagers/s3_mover.rb +8 -0
data/lib/imw/parsers/html_parser/matchers.rb +251 -268
data/lib/imw/parsers/html_parser.rb +181 -176
data/lib/imw/parsers.rb +1 -1
data/lib/imw/repository.rb +35 -0
data/lib/imw/runner.rb +114 -0
data/lib/imw/utils/extensions/core.rb +0 -16
data/lib/imw/utils/paths.rb +0 -28
data/lib/imw.rb +21 -32
metadata +11 -19
data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +0 -37
data/lib/imw/dataset/datamapper.rb +0 -66
data/lib/imw/dataset/loaddump.rb +0 -50
data/lib/imw/dataset/old/file_collection.rb +0 -88
data/lib/imw/dataset/old/file_collection_utils.rb +0 -71
data/lib/imw/dataset/scaffold.rb +0 -132
data/lib/imw/dataset/scraped_uri.rb +0 -305
data/lib/imw/dataset/scrub/old_working_scrubber.rb +0 -87
data/lib/imw/dataset/scrub/scrub.rb +0 -147
data/lib/imw/dataset/scrub/scrub_simple_url.rb +0 -38
data/lib/imw/dataset/scrub/scrub_test.rb +0 -60
data/lib/imw/dataset/scrub/slug.rb +0 -101
data/lib/imw/dataset/stats/counter.rb +0 -23
data/lib/imw/dataset/stats.rb +0 -73

data/lib/imw/packagers/archiver.rb CHANGED Viewed

@@ -26,6 +26,10 @@ module IMW
         add_inputs inputs
       end
+      # FIXME Instead of requiring +new_inputs+ to be either an Array
+      # or Hash just iterate through whatever it is using +each+ and
+      # see if the iterate can be interpreted as a mapping between
+      # strings.
       def add_inputs new_inputs
         @inputs ||= {}
         if new_inputs.is_a?(Array)
@@ -70,6 +74,10 @@ module IMW
         @dir ||= File.join(tmp_dir, name.to_s)
       end
+      # FIXME This needs to be made idempotent -- calling prepare
+      # twice should not do any work the second time (unless the user
+      # is insistent and passes a :force option -- or maybe use bang
+      # and not-bang versions of the method for this distinction).
       def prepare!
         FileUtils.mkdir_p dir unless File.exist?(dir)
         inputs.each_pair do |path, basename|
@@ -87,7 +95,17 @@ module IMW
           end
         end
       end
+      # Package the contents of the temporary directory to an archive
+      # at +output+ but return exceptions instead of raising them.
+      def package output, options={}
+        begin
+          package! output, options={}
+        rescue RuntimeError => e
+          return e
+        end
+      end
       # Package the contents of the temporary directory to an archive
       # at +output+.
       def package! output, options={}

data/lib/imw/packagers/s3_mover.rb CHANGED Viewed

@@ -19,6 +19,14 @@ module IMW
         last_response && last_response.response.class == Net::HTTPOK
       end
+      def upload local_path, remote_path
+        begin
+          upload! local_path, remote_path
+        rescue RuntimeError => e
+          return e
+        end
+      end
       def upload! local_path, remote_path
         @last_response = AWS::S3::S3Object.store(remote_path, open(local_path), bucket_name)
       end

data/lib/imw/parsers/html_parser/matchers.rb CHANGED Viewed

@@ -1,305 +1,288 @@
-#
-# h2. lib/imw/parsers/html_parser/matcher.rb -- utility classes for html parser
-#
-# == About
-#
-# This file defines the <tt>IMW::HTMLParserMatcher::Matcher</tt>
-# abstract class and some concrete subclasses which perform specific
-# kinds of matches against HTML documents using the
-# Hpricot[https://code.whytheluckystiff.net/hpricot/] library.
-#
-# Author::    (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
-# Copyright:: Copyright (c) 2008 infochimps.org
-# License::   GPL 3.0
-# Website::   http://infinitemonkeywrench.org/
-#
-# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
 require 'imw/utils/extensions/hpricot'
 module IMW
-  module HTMLParserMatcher
+  module Parsers
+    module HtmlMatchers
-    # An abstract class from which to subclass specific HTML matchers.
-    #
-    # A subclass is initialized with a +selector+ and an optional
-    # +matcher+.  The +selector+ is an HTML path specification used to
-    # collect elements from the document.  If initialized with a
-    # +matcher+, the +matcher+ is used to return match information
-    # from the elements; else the inner HTML is returned.  Subclasses
-    # decide how the +selector+ will collect elements.
-    class Matcher
-      attr_accessor :selector
-      attr_accessor :matcher
-      attr_accessor :options
-      def initialize selector, matcher=nil, options={}
-        self.selector = selector
-        self.matcher  = matcher
-        self.options  = options
-      end
+      # An abstract class from which to subclass specific HTML matchers.
+      #
+      # A subclass is initialized with a +selector+ and an optional
+      # +matcher+.  The +selector+ is an HTML path specification used to
+      # collect elements from the document.  If initialized with a
+      # +matcher+, the +matcher+ is used to return match information
+      # from the elements; else the inner HTML is returned.  Subclasses
+      # decide how the +selector+ will collect elements.
+      class Matcher
+        attr_accessor :selector
+        attr_accessor :matcher
+        attr_accessor :options
+        def initialize selector, matcher=nil, options={}
+          self.selector = selector
+          self.matcher  = matcher
+          self.options  = options
+        end
-      def match doc
-        raise "Abstract class #{self.class}"
+        def match doc
+          raise "Abstract class #{self.class}"
+        end
       end
-    end
-    # Concrete subclass of <tt>IMW::HTMLParserMatcher::Matcher</tt>
-    # for matching against the first element of a document matching a
-    # selector.
-    class MatchFirstElement < Matcher
-      # Grab the first element from +doc+ matching the +selector+ this
-      # class was initialized with.  If initialized with a +matcher+,
-      # then return the +matcher+'s match against the first element,
-      # else just return the inner HTML of the first element.
-      #
-      #   m = MatchFirstElement.new('span#bio/a.homepage')
-      #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
-      #   # => 'My Homepage'
-      def match doc
-        doc = Hpricot(doc) if doc.is_a?(String)
-        el = doc.at(selector) or return nil
-        if matcher
-          matcher.match(el)
-        else
-          options[:html] ? el.inner_html : el.inner_text.strip
+      # Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
+      # for matching against the first element of a document matching a
+      # selector.
+      class MatchFirstElement < Matcher
+        # Grab the first element from +doc+ matching the +selector+ this
+        # class was initialized with.  If initialized with a +matcher+,
+        # then return the +matcher+'s match against the first element,
+        # else just return the inner HTML of the first element.
+        #
+        #   m = MatchFirstElement.new('span#bio/a.homepage')
+        #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
+        #   # => 'My Homepage'
+        def match doc
+          doc = Hpricot(doc) if doc.is_a?(String)
+          el = doc.at(selector) or return nil
+          if matcher
+            matcher.match(el)
+          else
+            options[:html] ? el : el.inner_text.strip
+          end
         end
       end
-    end
-    # FIXME is there really a need for this separate class?  why can't
-    # MatchFirstElement.match accept a block?
-    class MatchProc < MatchFirstElement
-      attr_accessor :proc
-      attr_accessor :options
-      def initialize selector, proc, matcher=nil, options={}
-        super selector, matcher
-        self.options = options
-        self.proc = proc
-      end
-      def match doc
-        val = super doc
-        val ? self.proc.call(val) : self.proc.call(doc)
-      end
-    end
+      # FIXME is there really a need for this separate class?  why can't
+      # MatchFirstElement.match accept a block?
+      class MatchProc < MatchFirstElement
+        attr_accessor :proc
+        attr_accessor :options
+        def initialize selector, proc, matcher=nil, options={}
+          super selector, matcher
+          self.options = options
+          self.proc = proc
+        end
+        def match doc
+          val = super doc
+          val ? self.proc.call(val) : self.proc.call(doc)
+        end
+      end
-    # Concrete subclass of <tt>IMW::HTMLParserMatcher::Matcher</tt>
-    # for matching each element of a document matching a selector.
-    class MatchArray < Matcher
-      # Grab each element from +doc+ matching the +selector+ this
-      # class was initialized with.  If initialized with a +matcher+,
-      # then return an array consisting of the +matcher+'s match
-      # against each element, else just return an array consisting of
-      # the inner HTML of each element.
-      #
-      #   m = MatchArray.new('span#bio/a.homepage')
-      #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>
-      #            <span id="bio"><a class="homepage" href="http://foo.baz">Your Homepage</a></span>
-      #            <span id="bio"><a class="homepage" href="http://foo.qux">Their Homepage</a></span>')
-      #   # => ["My Homepage", "Your Homepage", "Their Homepage"]
-      def match doc
-        doc = Hpricot(doc) if doc.is_a?(String)
-        subdoc = (doc/selector) or return nil
-        if matcher
-          subdoc.map{|el| matcher.match(el)}
-        else
-          if options[:html]
-            subdoc.map{|el| el.inner_html }
+      # Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
+      # for matching each element of a document matching a selector.
+      class MatchArray < Matcher
+        # Grab each element from +doc+ matching the +selector+ this
+        # class was initialized with.  If initialized with a +matcher+,
+        # then return an array consisting of the +matcher+'s match
+        # against each element, else just return an array consisting of
+        # the inner HTML of each element.
+        #
+        #   m = MatchArray.new('span#bio/a.homepage')
+        #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>
+        #            <span id="bio"><a class="homepage" href="http://foo.baz">Your Homepage</a></span>
+        #            <span id="bio"><a class="homepage" href="http://foo.qux">Their Homepage</a></span>')
+        #   # => ["My Homepage", "Your Homepage", "Their Homepage"]
+        def match doc
+          doc = Hpricot(doc) if doc.is_a?(String)
+          subdoc = (doc/selector) or return nil
+          if matcher
+            subdoc.map{|el| matcher.match(el)}
           else
-            subdoc.map{|el| el.inner_text.strip }
+            if options[:html]
+              subdoc.map{|el| el }
+            else
+              subdoc.map{|el| el.inner_text.strip }
+            end
           end
         end
       end
-    end
-    # Concrete subclass of <tt>IMW::HTMLParserMatcher::Matcher</tt>
-    # for matching an attribute of the first element of a document
-    # matching a selector.
-    class MatchAttribute < Matcher
+      # Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
+      # for matching an attribute of the first element of a document
+      # matching a selector.
+      class MatchAttribute < Matcher
-      attr_accessor :attribute
+        attr_accessor :attribute
-      # Unlike <tt>IMW::HTMLParserMatcher::Matcher</tt>,
-      # <tt>IMW::HTMLParserMatcher::MatchAttribute</tt> is initialized
-      # with three arguments: the +selector+ which collects elements
-      # from an HTML document, an +attribute+ to extract, and
-      # (optionally) a +matcher+ to perform the matching.
-      def initialize selector, attribute, matcher=nil
-        super selector, matcher
-        self.attribute = attribute.to_s
-      end
-      # Grab the first element from +doc+ matching the +selector+ this
-      # class was initialized with.  If initialized with a +matcher+,
-      # then return the +matcher+'s match against the value of the
-      # +attribute+ this class was initialized with, else just return
-      # the value of the +attribute+.
-      #
-      #   m = MatchAttribute.new('span#bio/a.homepage', 'href')
-      #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
-      #   # => 'http://foo.bar'
-      def match doc
-        doc = Hpricot(doc) if doc.is_a?(String)
-        val = doc.path_attr(selector, attribute)
-        matcher ? matcher.match(val) : val
+        # Unlike <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>,
+        # <tt>IMW::Parsers::HtmlMatchers::MatchAttribute</tt> is initialized
+        # with three arguments: the +selector+ which collects elements
+        # from an HTML document, an +attribute+ to extract, and
+        # (optionally) a +matcher+ to perform the matching.
+        def initialize selector, attribute, matcher=nil
+          super selector, matcher
+          self.attribute = attribute.to_s
+        end
+        # Grab the first element from +doc+ matching the +selector+ this
+        # class was initialized with.  If initialized with a +matcher+,
+        # then return the +matcher+'s match against the value of the
+        # +attribute+ this class was initialized with, else just return
+        # the value of the +attribute+.
+        #
+        #   m = MatchAttribute.new('span#bio/a.homepage', 'href')
+        #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
+        #   # => 'http://foo.bar'
+        def match doc
+          doc = Hpricot(doc) if doc.is_a?(String)
+          val = doc.path_attr(selector, attribute)
+          matcher ? matcher.match(val) : val
+        end
       end
-    end
-    # Concrete subclass of <tt>IMW::HTMLParserMatcher::Matcher</tt>
-    # for using a regular expression to match against text in an HTML
-    # document.
-    class MatchRegexp < Matcher
-      attr_accessor :re
-      attr_accessor :options
+      # Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
+      # for using a regular expression to match against text in an HTML
+      # document.
+      class MatchRegexp < Matcher
+        attr_accessor :re
+        attr_accessor :options
-      # Use the regular expression +re+ to return captures from the
-      # elements collected by +selector+ (treated as text) used on an
-      # HTML document (if +selector+ is +nil+ then match against the
-      # full text of the document).  If the keyword argument
-      # <tt>:capture</tt> is specified then return the corresponding
-      # group (indexing is that of regular expressions; "1" is the
-      # first capture), else return an array of all captures.  If
-      # +matcher+, then use it on the capture(s) before returning.
-      #
-      # FIXME Shouldn't the matcher come BEFORE the regexp capture,
-      # not after?
-      def initialize selector, re, matcher=nil, options={}
-        super selector, matcher
-        self.options = options
-        self.re = re
-      end
+        # Use the regular expression +re+ to return captures from the
+        # elements collected by +selector+ (treated as text) used on an
+        # HTML document (if +selector+ is +nil+ then match against the
+        # full text of the document).  If the keyword argument
+        # <tt>:capture</tt> is specified then return the corresponding
+        # group (indexing is that of regular expressions; "1" is the
+        # first capture), else return an array of all captures.  If
+        # +matcher+, then use it on the capture(s) before returning.
+        #
+        # FIXME Shouldn't the matcher come BEFORE the regexp capture,
+        # not after?
+        def initialize selector, re, matcher=nil, options={}
+          super selector, matcher
+          self.options = options
+          self.re = re
+        end
-      # Grab the first element from +doc+ matching the +selector+ this
-      # object was initialized with.  Use the +re+ and the (optional)
-      # capture group this object was initialized with to capture a
-      # string (or array of strings if no capture group was specified)
-      # from the collected element (treated as text). If initialized
-      # with a +matcher+, then return the +matcher+'s match against
-      # the value of the capture(s), else just return the capture(s).
-      #
-      #   m = MatchRegexp.new('span#bio/a.homepage', /Homepage of (.*)$/, nil, :capture => 1 )
-      #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">Homepage of John Chimpo</a></span>')
-      #   # => "John Chimpo"
-      def match doc
-        doc = Hpricot(doc) if doc.is_a?(String)
-        el = selector ? doc.contents_of(selector) : doc
-        m = re.match(el.to_s)
-        val = case
-              when m.nil? then nil
-              when self.options.key?(:capture) then m.captures[self.options[:capture] - 1] # -1 to match regexp indexing
-              else m.captures
-              end
-        # pass to matcher, if any
-        matcher ? matcher.match(val) : val
+        # Grab the first element from +doc+ matching the +selector+ this
+        # object was initialized with.  Use the +re+ and the (optional)
+        # capture group this object was initialized with to capture a
+        # string (or array of strings if no capture group was specified)
+        # from the collected element (treated as text). If initialized
+        # with a +matcher+, then return the +matcher+'s match against
+        # the value of the capture(s), else just return the capture(s).
+        #
+        #   m = MatchRegexp.new('span#bio/a.homepage', /Homepage of (.*)$/, nil, :capture => 1 )
+        #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">Homepage of John Chimpo</a></span>')
+        #   # => "John Chimpo"
+        def match doc
+          doc = Hpricot(doc) if doc.is_a?(String)
+          el = selector ? doc.contents_of(selector) : doc
+          m = re.match(el.to_s)
+          val = case
+                when m.nil? then nil
+                when self.options.key?(:capture) then m.captures[self.options[:capture] - 1] # -1 to match regexp indexing
+                else m.captures
+                end
+          # pass to matcher, if any
+          matcher ? matcher.match(val) : val
+        end
       end
-    end
-    class MatchRegexpRepeatedly < Matcher
-      attr_accessor :re
-      def initialize selector, re, matcher=nil
-        super selector, matcher
-        self.re = re
-      end
-      def match doc
-        doc = Hpricot(doc) if doc.is_a?(String)
-        # apply selector, if any
-        el = selector ? doc.contents_of(selector) : doc
-        return unless el
-        # get all matches
-        val = el.to_s.scan(re)
-        # if there's only one capture group, flatten the array
-        val = val.flatten if val.first && val.first.length == 1
-        # pass to matcher, if any
-        matcher ? matcher.match(val) : val
+      class MatchRegexpRepeatedly < Matcher
+        attr_accessor :re
+        def initialize selector, re, matcher=nil
+          super selector, matcher
+          self.re = re
+        end
+        def match doc
+          doc = Hpricot(doc) if doc.is_a?(String)
+          # apply selector, if any
+          el = selector ? doc.contents_of(selector) : doc
+          return unless el
+          # get all matches
+          val = el.to_s.scan(re)
+          # if there's only one capture group, flatten the array
+          val = val.flatten if val.first && val.first.length == 1
+          # pass to matcher, if any
+          matcher ? matcher.match(val) : val
+        end
       end
-    end
-    # Class for building a hash of values by using appropriate
-    # matchers against an HTML document.
-    class MatchHash
+      # Class for building a hash of values by using appropriate
+      # matchers against an HTML document.
+      class MatchHash
+        attr_accessor :match_hash
-      attr_accessor :match_hash
+        # The +match_hash+ must be a +Hash+ of symbols matched to HTML
+        # matchers (subclasses of
+        # <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>).
+        def initialize match_hash
+          # Kludge? maybe.
+          raise "MatchHash requires a hash of :attributes => matchers." unless match_hash.is_a?(Hash)
+          self.match_hash = match_hash
+        end
-      # The +match_hash+ must be a +Hash+ of symbols matched to HTML
-      # matchers (subclasses of
-      # <tt>IMW::HTMLParserMatcher::Matcher</tt>).
-      def initialize match_hash
-        # Kludge? maybe.
-        raise "MatchHash requires a hash of :attributes => matchers." unless match_hash.is_a?(Hash)
-        self.match_hash = match_hash
+        # Use the +match_hash+ this +MatchHash+ was initialized with to
+        # select elements from +doc+ and extract information from them:
+        #
+        #   m = MatchHash.new({
+        #       :name         => MatchFirstElement.new('li/span.customer'),
+        #       :order_status => MatchAttribute.new('li/ul[@status]','status'),
+        #       :products     => MatchArray.new('li/ul/li')
+        #     })
+        #   m.match('<li><span class="customer">John Chimpo</span>
+        #                <ul status="shipped">
+        #                  <li>bananas</li>
+        #                  <li>mangos</li>
+        #                  <li>banangos</li>
+        #                </ul></li>')
+        #   # => {
+        #         :name         => "John Chimpo",
+        #         :order_status => "shipped",
+        #         :products     => ["bananas", "mangos", "banangos"]
+        #        }
+        def match doc
+          doc = Hpricot(doc) if doc.is_a?(String)
+          hsh = { }
+          match_hash.each do |attr, m|
+            val = m.match(doc)
+            case attr
+            when Array then hsh.merge!(Hash.zip(attr, val).reject{|k,v| v.nil? }) if val
+            else            hsh[attr] = val  end
+          end
+          self.class.scrub!(hsh)
+        end
+        # kill off keys with nil values
+        def self.scrub! hsh
+          hsh # .reject{|k,v| v.nil? }
+        end
       end
-      # Use the +match_hash+ this +MatchHash+ was initialized with to
-      # select elements from +doc+ and extract information from them:
       #
-      #   m = MatchHash.new({
-      #       :name         => MatchFirstElement.new('li/span.customer'),
-      #       :order_status => MatchAttribute.new('li/ul[@status]','status'),
-      #       :products     => MatchArray.new('li/ul/li')
-      #     })
-      #   m.match('<li><span class="customer">John Chimpo</span>
-      #                <ul status="shipped">
-      #                  <li>bananas</li>
-      #                  <li>mangos</li>
-      #                  <li>banangos</li>
-      #                </ul></li>')
-      #   # => {
-      #         :name         => "John Chimpo",
-      #         :order_status => "shipped",
-      #         :products     => ["bananas", "mangos", "banangos"]
-      #        }
-      def match doc
-        doc = Hpricot(doc) if doc.is_a?(String)
+      # construct the downstream part of a hash matcher
+      #
+      def self.build_match_hash spec_hash
         hsh = { }
-        match_hash.each do |attr, m|
-          val = m.match(doc)
-          case attr
-          when Array then hsh.merge!(Hash.zip(attr, val).reject{|k,v| v.nil? }) if val
-          else            hsh[attr] = val  end
+        spec_hash.each do |attr, spec|
+          hsh[attr] = build_parse_tree(spec)
         end
-        self.class.scrub!(hsh)
-      end
-      # kill off keys with nil values
-      def self.scrub! hsh
-        hsh # .reject{|k,v| v.nil? }
+        hsh
       end
-    end
-    #
-    # construct the downstream part of a hash matcher
-    #
-    def self.build_match_hash spec_hash
-      hsh = { }
-      spec_hash.each do |attr, spec|
-        hsh[attr] = build_parse_tree(spec)
-      end
-      hsh
-    end
-    #
-    # recursively build a tree of matchers
-    #
-    def self.build_parse_tree spec
-      case spec
-      when nil            then nil
-      when Matcher        then spec
-      when Hash           then MatchHash.new(build_match_hash(spec))
-      when Array          then
-        return nil if spec.empty?
-        raise "Array spec must be a single selector or a selector and another match specification" unless (spec.length <= 2)
-        MatchArray.new(spec[0].to_s, build_parse_tree(spec[1]))
-      when String         then MatchFirstElement.new(spec)
-      when Proc           then MatchProc.new(nil, spec)
-      when Regexp         then MatchRegexp.new(nil, spec, nil, :capture => 1)
-      else raise "Don't know how to parse #{spec.inspect}"
+      #
+      # recursively build a tree of matchers
+      #
+      def self.build_parse_tree spec
+        case spec
+        when nil            then nil
+        when Matcher        then spec
+        when Hash           then MatchHash.new(build_match_hash(spec))
+        when Array          then
+          return nil if spec.empty?
+          raise "Array spec must be a single selector or a selector and another match specification" unless (spec.length <= 2)
+          MatchArray.new(spec[0].to_s, build_parse_tree(spec[1]))
+        when String         then MatchFirstElement.new(spec)
+        when Proc           then MatchProc.new(nil, spec)
+        when Regexp         then MatchRegexp.new(nil, spec, nil, :capture => 1)
+        when Symbol         then MatchAttribute.new(nil, spec, nil)
+        else raise "Don't know how to parse #{spec.inspect}"
+        end
       end
     end
   end