RubyGems - imw - Versions diffs - 0.2.18 → 0.3.0 - Mend

imw 0.2.18 → 0.3.0

Files changed (172) hide show

data/Gemfile +7 -26
data/Gemfile.lock +13 -38
data/{LICENSE → LICENSE.txt} +1 -1
data/README.textile +35 -0
data/Rakefile +45 -22
data/VERSION +1 -1
data/examples/foo.rb +19 -0
data/examples/html_selector.rb +22 -0
data/examples/nes_game_list.csv +625 -0
data/examples/nes_gamespot.csv +1371 -0
data/examples/nes_nintendo.csv +624 -0
data/examples/nes_unlicensed.csv +89 -0
data/examples/nes_wikipedia.csv +710 -0
data/examples/nibbler_test.rb +24 -0
data/examples/script.rb +19 -0
data/lib/imw.rb +28 -140
data/lib/imw/error.rb +9 -0
data/lib/imw/recordizer.rb +8 -0
data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
data/lib/imw/resource.rb +3 -119
data/lib/imw/serializer.rb +7 -0
data/lib/imw/serializer/json_serializer.rb +17 -0
data/lib/imw/uri.rb +41 -0
data/spec/resource_spec.rb +78 -0
data/spec/uri_spec.rb +55 -0
metadata +81 -232
data/README.rdoc +0 -371
data/bin/imw +0 -5
data/bin/tsv_to_json.rb +0 -29
data/etc/imwrc.rb +0 -26
data/examples/dataset.rb +0 -12
data/examples/metadata.yml +0 -10
data/lib/imw/archives.rb +0 -120
data/lib/imw/archives/rar.rb +0 -19
data/lib/imw/archives/tar.rb +0 -19
data/lib/imw/archives/tarbz2.rb +0 -73
data/lib/imw/archives/targz.rb +0 -73
data/lib/imw/archives/zip.rb +0 -51
data/lib/imw/boot.rb +0 -87
data/lib/imw/compressed_files.rb +0 -94
data/lib/imw/compressed_files/bz2.rb +0 -16
data/lib/imw/compressed_files/compressible.rb +0 -75
data/lib/imw/compressed_files/gz.rb +0 -16
data/lib/imw/dataset.rb +0 -125
data/lib/imw/dataset/paths.rb +0 -29
data/lib/imw/dataset/workflow.rb +0 -195
data/lib/imw/formats.rb +0 -33
data/lib/imw/formats/delimited.rb +0 -170
data/lib/imw/formats/excel.rb +0 -100
data/lib/imw/formats/json.rb +0 -41
data/lib/imw/formats/pdf.rb +0 -71
data/lib/imw/formats/sgml.rb +0 -69
data/lib/imw/formats/yaml.rb +0 -41
data/lib/imw/metadata.rb +0 -83
data/lib/imw/metadata/contains_metadata.rb +0 -54
data/lib/imw/metadata/dsl.rb +0 -111
data/lib/imw/metadata/field.rb +0 -37
data/lib/imw/metadata/has_metadata.rb +0 -98
data/lib/imw/metadata/has_summary.rb +0 -57
data/lib/imw/metadata/schema.rb +0 -17
data/lib/imw/parsers.rb +0 -8
data/lib/imw/parsers/flat.rb +0 -44
data/lib/imw/parsers/html_parser.rb +0 -387
data/lib/imw/parsers/html_parser/matchers.rb +0 -289
data/lib/imw/parsers/line_parser.rb +0 -87
data/lib/imw/parsers/regexp_parser.rb +0 -72
data/lib/imw/repository.rb +0 -12
data/lib/imw/runner.rb +0 -118
data/lib/imw/schemes.rb +0 -23
data/lib/imw/schemes/ftp.rb +0 -142
data/lib/imw/schemes/hdfs.rb +0 -251
data/lib/imw/schemes/http.rb +0 -165
data/lib/imw/schemes/local.rb +0 -409
data/lib/imw/schemes/remote.rb +0 -119
data/lib/imw/schemes/s3.rb +0 -143
data/lib/imw/schemes/sql.rb +0 -129
data/lib/imw/tools.rb +0 -12
data/lib/imw/tools/aggregator.rb +0 -148
data/lib/imw/tools/archiver.rb +0 -220
data/lib/imw/tools/downloader.rb +0 -63
data/lib/imw/tools/extension_analyzer.rb +0 -114
data/lib/imw/tools/summarizer.rb +0 -83
data/lib/imw/tools/transferer.rb +0 -167
data/lib/imw/utils.rb +0 -74
data/lib/imw/utils/dynamically_extendable.rb +0 -137
data/lib/imw/utils/error.rb +0 -59
data/lib/imw/utils/extensions/hpricot.rb +0 -34
data/lib/imw/utils/has_uri.rb +0 -131
data/lib/imw/utils/log.rb +0 -92
data/lib/imw/utils/misc.rb +0 -57
data/lib/imw/utils/paths.rb +0 -146
data/lib/imw/utils/uri.rb +0 -59
data/lib/imw/utils/uuid.rb +0 -33
data/lib/imw/utils/validate.rb +0 -38
data/lib/imw/utils/version.rb +0 -11
data/spec/data/formats/delimited/sample.csv +0 -131
data/spec/data/formats/delimited/sample.tsv +0 -131
data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
data/spec/data/formats/excel/sample.xls +0 -0
data/spec/data/formats/json/sample.json +0 -1
data/spec/data/formats/none/sample +0 -650
data/spec/data/formats/sgml/sample.xml +0 -617
data/spec/data/formats/text/sample.txt +0 -650
data/spec/data/formats/yaml/sample.yaml +0 -410
data/spec/data/schema-tabular.yaml +0 -11
data/spec/imw/archives/rar_spec.rb +0 -16
data/spec/imw/archives/tar_spec.rb +0 -16
data/spec/imw/archives/tarbz2_spec.rb +0 -24
data/spec/imw/archives/targz_spec.rb +0 -21
data/spec/imw/archives/zip_spec.rb +0 -16
data/spec/imw/archives_spec.rb +0 -77
data/spec/imw/compressed_files/bz2_spec.rb +0 -15
data/spec/imw/compressed_files/compressible_spec.rb +0 -36
data/spec/imw/compressed_files/gz_spec.rb +0 -15
data/spec/imw/compressed_files_spec.rb +0 -47
data/spec/imw/dataset/paths_spec.rb +0 -32
data/spec/imw/dataset/workflow_spec.rb +0 -41
data/spec/imw/formats/delimited_spec.rb +0 -44
data/spec/imw/formats/excel_spec.rb +0 -55
data/spec/imw/formats/json_spec.rb +0 -18
data/spec/imw/formats/sgml_spec.rb +0 -24
data/spec/imw/formats/yaml_spec.rb +0 -19
data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
data/spec/imw/metadata/field_spec.rb +0 -25
data/spec/imw/metadata/has_metadata_spec.rb +0 -58
data/spec/imw/metadata/has_summary_spec.rb +0 -32
data/spec/imw/metadata/schema_spec.rb +0 -24
data/spec/imw/metadata_spec.rb +0 -86
data/spec/imw/parsers/line_parser_spec.rb +0 -96
data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
data/spec/imw/resource_spec.rb +0 -32
data/spec/imw/schemes/hdfs_spec.rb +0 -67
data/spec/imw/schemes/http_spec.rb +0 -19
data/spec/imw/schemes/local_spec.rb +0 -165
data/spec/imw/schemes/remote_spec.rb +0 -38
data/spec/imw/schemes/s3_spec.rb +0 -31
data/spec/imw/schemes/sql_spec.rb +0 -3
data/spec/imw/tools/aggregator_spec.rb +0 -71
data/spec/imw/tools/archiver_spec.rb +0 -120
data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
data/spec/imw/tools/summarizer_spec.rb +0 -8
data/spec/imw/tools/transferer_spec.rb +0 -195
data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
data/spec/imw/utils/has_uri_spec.rb +0 -61
data/spec/imw/utils/paths_spec.rb +0 -10
data/spec/imw/utils/shared_paths_spec.rb +0 -29
data/spec/imw_spec.rb +0 -14
data/spec/rcov.opts +0 -1
data/spec/spec_helper.rb +0 -31
data/spec/support/custom_matchers.rb +0 -28
data/spec/support/file_contents_matcher.rb +0 -30
data/spec/support/paths_matcher.rb +0 -66
data/spec/support/random.rb +0 -213
data/spec/support/without_regard_to_order_matcher.rb +0 -41

data/lib/imw/parsers/html_parser/matchers.rb DELETED

@@ -1,289 +0,0 @@
-require 'imw/utils/extensions/hpricot'
-module IMW
-  module Parsers
-    module HtmlMatchers
-      # An abstract class from which to subclass specific HTML matchers.
-      #
-      # A subclass is initialized with a +selector+ and an optional
-      # +matcher+.  The +selector+ is an HTML path specification used to
-      # collect elements from the document.  If initialized with a
-      # +matcher+, the +matcher+ is used to return match information
-      # from the elements; else the inner HTML is returned.  Subclasses
-      # decide how the +selector+ will collect elements.
-      class Matcher
-        attr_accessor :selector
-        attr_accessor :matcher
-        attr_accessor :options
-        def initialize selector, matcher=nil, options={}
-          self.selector = selector
-          self.matcher  = matcher
-          self.options  = options
-        end
-        def match doc
-          raise "Abstract class #{self.class}"
-        end
-      end
-      # Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
-      # for matching against the first element of a document matching a
-      # selector.
-      class MatchFirstElement < Matcher
-        # Grab the first element from +doc+ matching the +selector+ this
-        # class was initialized with.  If initialized with a +matcher+,
-        # then return the +matcher+'s match against the first element,
-        # else just return the inner HTML of the first element.
-        #
-        #   m = MatchFirstElement.new('span#bio/a.homepage')
-        #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
-        #   # => 'My Homepage'
-        def match doc
-          doc = Hpricot(doc) if doc.is_a?(String)
-          el = doc.at(selector) or return nil
-          if matcher
-            matcher.match(el)
-          else
-            options[:html] ? el : el.inner_text.strip
-          end
-        end
-      end
-      # FIXME is there really a need for this separate class?  why can't
-      # MatchFirstElement.match accept a block?
-      class MatchProc < MatchFirstElement
-        attr_accessor :proc
-        attr_accessor :options
-        def initialize selector, proc, matcher=nil, options={}
-          super selector, matcher
-          self.options = options
-          self.proc = proc
-        end
-        def match doc
-          val = super doc
-          val ? self.proc.call(val) : self.proc.call(doc)
-        end
-      end
-      # Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
-      # for matching each element of a document matching a selector.
-      class MatchArray < Matcher
-        # Grab each element from +doc+ matching the +selector+ this
-        # class was initialized with.  If initialized with a +matcher+,
-        # then return an array consisting of the +matcher+'s match
-        # against each element, else just return an array consisting of
-        # the inner HTML of each element.
-        #
-        #   m = MatchArray.new('span#bio/a.homepage')
-        #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>
-        #            <span id="bio"><a class="homepage" href="http://foo.baz">Your Homepage</a></span>
-        #            <span id="bio"><a class="homepage" href="http://foo.qux">Their Homepage</a></span>')
-        #   # => ["My Homepage", "Your Homepage", "Their Homepage"]
-        def match doc
-          doc = Hpricot(doc) if doc.is_a?(String)
-          subdoc = (doc/selector) or return nil
-          if matcher
-            subdoc.map{|el| matcher.match(el)}
-          else
-            if options[:html]
-              subdoc.map{|el| el }
-            else
-              subdoc.map{|el| el.inner_text.strip }
-            end
-          end
-        end
-      end
-      # Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
-      # for matching an attribute of the first element of a document
-      # matching a selector.
-      class MatchAttribute < Matcher
-        attr_accessor :attribute
-        # Unlike <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>,
-        # <tt>IMW::Parsers::HtmlMatchers::MatchAttribute</tt> is initialized
-        # with three arguments: the +selector+ which collects elements
-        # from an HTML document, an +attribute+ to extract, and
-        # (optionally) a +matcher+ to perform the matching.
-        def initialize selector, attribute, matcher=nil
-          super selector, matcher
-          self.attribute = attribute.to_s
-        end
-        # Grab the first element from +doc+ matching the +selector+ this
-        # class was initialized with.  If initialized with a +matcher+,
-        # then return the +matcher+'s match against the value of the
-        # +attribute+ this class was initialized with, else just return
-        # the value of the +attribute+.
-        #
-        #   m = MatchAttribute.new('span#bio/a.homepage', 'href')
-        #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
-        #   # => 'http://foo.bar'
-        def match doc
-          doc = Hpricot(doc) if doc.is_a?(String)
-          val = doc.path_attr(selector, attribute)
-          matcher ? matcher.match(val) : val
-        end
-      end
-      # Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
-      # for using a regular expression to match against text in an HTML
-      # document.
-      class MatchRegexp < Matcher
-        attr_accessor :re
-        attr_accessor :options
-        # Use the regular expression +re+ to return captures from the
-        # elements collected by +selector+ (treated as text) used on an
-        # HTML document (if +selector+ is +nil+ then match against the
-        # full text of the document).  If the keyword argument
-        # <tt>:capture</tt> is specified then return the corresponding
-        # group (indexing is that of regular expressions; "1" is the
-        # first capture), else return an array of all captures.  If
-        # +matcher+, then use it on the capture(s) before returning.
-        #
-        # FIXME Shouldn't the matcher come BEFORE the regexp capture,
-        # not after?
-        def initialize selector, re, matcher=nil, options={}
-          super selector, matcher
-          self.options = options
-          self.re = re
-        end
-        # Grab the first element from +doc+ matching the +selector+ this
-        # object was initialized with.  Use the +re+ and the (optional)
-        # capture group this object was initialized with to capture a
-        # string (or array of strings if no capture group was specified)
-        # from the collected element (treated as text). If initialized
-        # with a +matcher+, then return the +matcher+'s match against
-        # the value of the capture(s), else just return the capture(s).
-        #
-        #   m = MatchRegexp.new('span#bio/a.homepage', /Homepage of (.*)$/, nil, :capture => 1 )
-        #   m.match('<span id="bio"><a class="homepage" href="http://foo.bar">Homepage of John Chimpo</a></span>')
-        #   # => "John Chimpo"
-        def match doc
-          doc = Hpricot(doc) if doc.is_a?(String)
-          el = selector ? doc.contents_of(selector) : doc
-          m = re.match(el.to_s)
-          val = case
-                when m.nil? then nil
-                when self.options.key?(:capture) then m.captures[self.options[:capture] - 1] # -1 to match regexp indexing
-                else m.captures
-                end
-          # pass to matcher, if any
-          matcher ? matcher.match(val) : val
-        end
-      end
-      class MatchRegexpRepeatedly < Matcher
-        attr_accessor :re
-        def initialize selector, re, matcher=nil
-          super selector, matcher
-          self.re = re
-        end
-        def match doc
-          doc = Hpricot(doc) if doc.is_a?(String)
-          # apply selector, if any
-          el = selector ? doc.contents_of(selector) : doc
-          return unless el
-          # get all matches
-          val = el.to_s.scan(re)
-          # if there's only one capture group, flatten the array
-          val = val.flatten if val.first && val.first.length == 1
-          # pass to matcher, if any
-          matcher ? matcher.match(val) : val
-        end
-      end
-      # Class for building a hash of values by using appropriate
-      # matchers against an HTML document.
-      class MatchHash
-        attr_accessor :match_hash
-        # The +match_hash+ must be a +Hash+ of symbols matched to HTML
-        # matchers (subclasses of
-        # <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>).
-        def initialize match_hash
-          # Kludge? maybe.
-          raise "MatchHash requires a hash of :attributes => matchers." unless match_hash.is_a?(Hash)
-          self.match_hash = match_hash
-        end
-        # Use the +match_hash+ this +MatchHash+ was initialized with to
-        # select elements from +doc+ and extract information from them:
-        #
-        #   m = MatchHash.new({
-        #       :name         => MatchFirstElement.new('li/span.customer'),
-        #       :order_status => MatchAttribute.new('li/ul[@status]','status'),
-        #       :products     => MatchArray.new('li/ul/li')
-        #     })
-        #   m.match('<li><span class="customer">John Chimpo</span>
-        #                <ul status="shipped">
-        #                  <li>bananas</li>
-        #                  <li>mangos</li>
-        #                  <li>banangos</li>
-        #                </ul></li>')
-        #   # => {
-        #         :name         => "John Chimpo",
-        #         :order_status => "shipped",
-        #         :products     => ["bananas", "mangos", "banangos"]
-        #        }
-        def match doc
-          doc = Hpricot(doc) if doc.is_a?(String)
-          hsh = { }
-          match_hash.each do |attr, m|
-            val = m.match(doc)
-            case attr
-            when Array then hsh.merge!(Hash.zip(attr, val).reject{|k,v| v.nil? }) if val
-            else            hsh[attr] = val  end
-          end
-          self.class.scrub!(hsh)
-        end
-        # kill off keys with nil values
-        def self.scrub! hsh
-          hsh # .reject{|k,v| v.nil? }
-        end
-      end
-      #
-      # construct the downstream part of a hash matcher
-      #
-      def self.build_match_hash spec_hash
-        hsh = { }
-        spec_hash.each do |attr, spec|
-          hsh[attr] = build_parse_tree(spec)
-        end
-        hsh
-      end
-      #
-      # recursively build a tree of matchers
-      #
-      def self.build_parse_tree spec
-        case spec
-        when nil            then nil
-        when Matcher        then spec
-        when Hash           then MatchHash.new(build_match_hash(spec))
-        when Array          then
-          return nil if spec.empty?
-          raise "Array spec must be a single selector or a selector and another match specification" unless (spec.length <= 2)
-          MatchArray.new(spec[0].to_s, build_parse_tree(spec[1]))
-        when String         then MatchFirstElement.new(spec)
-        when Proc           then MatchProc.new(nil, spec)
-        when Regexp         then MatchRegexp.new(nil, spec, nil, :capture => 1)
-        when Symbol         then MatchAttribute.new(nil, spec, nil)
-        else raise "Don't know how to parse #{spec.inspect}"
-        end
-      end
-    end
-  end
-end

data/lib/imw/parsers/line_parser.rb DELETED

@@ -1,87 +0,0 @@
-module IMW
-  module Parsers
-    # This is an abstract class for a line-oriented parser intended to
-    # read and emit lines sequentially from a file.
-    #
-    # To leverage the functionality of this class, subclass it and
-    # define a +parse_line+ method.
-    class LineParser
-      # The number of lines to skip on each file parsed.
-      attr_accessor :skip_first
-      # The class to parse each line into.  The +new+ method of this
-      # class must accept a hash.
-      attr_accessor :klass
-      # If called with the option <tt>:skip_first</tt> then skip the
-      # corresponding number of lines at the beginning of the file when
-      # parsing.
-      def initialize options={}
-        @skip_first = options[:skip_first] || 0
-        @klass      = options[:of]         || options[:klass]
-      end
-      # Parse the given file.  If the option <tt>:lines</tt> is passed
-      # in then only parse that many lines.  If given a block then
-      # yield the result of each line to the block; else just return
-      # an array of results.
-      #
-      # If this parser has a +klass+ attribute then each parsed line
-      # will first be turned into an instance of that class (the class
-      # must accept a hash of values in its initializer).
-      def parse! file, options={}, &block
-        skip_lines!(file)
-        if options[:lines]
-          case
-          when klass && block_given?
-            options[:lines].times do
-              yield klass.new(parse_line(file.readline))
-            end
-          when block_given?
-            options[:lines].times do
-              yield parse_line(file.readline)
-            end
-          when klass
-            options[:lines].times do
-              klass.new(parse_line(file.readline))
-            end
-          else
-            options[:lines].times.map do
-              parse_line(file.readline)
-            end
-          end
-        else
-          case
-          when klass && block_given?
-            file.each do |line|
-              yield klass.new(parse_line(line))
-            end
-          when block_given?
-            file.each do |line|
-              yield parse_line(line)
-            end
-          when klass
-            file.map do |line|
-              klass.new(parse_line(line))
-            end
-          else
-            file.map do |line|
-              parse_line(line)
-            end
-          end
-        end
-      end
-      def parse_line line
-        raise IMW::NotImplementedError.new("Subclass the LineParser and redefine this method to create a true parser.")
-      end
-      protected
-      def skip_lines! file
-        skip_first.times { file.readline }
-      end
-    end
-  end
-end

data/lib/imw/parsers/regexp_parser.rb DELETED

@@ -1,72 +0,0 @@
-require 'imw/parsers/line_parser'
-module IMW
-  module Parsers
-    # A RegexpParser is a line-oriented parser which uses a regular
-    # expression to extract data from a line into either a hash or an
-    # object obeying hash semantics.
-    #
-    # As an example, a flat file with one record per line in the
-    # following format (this is a simplified version of common
-    # webserver log formats)
-    #
-    #   151.199.53.145 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/main.php HTTP/1.0
-    #   81.227.179.120 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/libraries/select_lang.lib.php HTTP/1.0
-    #   81.3.107.173 14-Oct-2007:13:54:26-0500 GET / HTTP/1.1
-    #   ...
-    #
-    # could be parsed as follows
-    #
-    #   file   = File.new '/path/to/file.log'
-    #   parser = IMW::Parsers::RegexpParser.new :by_regexp   => %r{^([\d\.]+) (\d{2}-\w{3}-\d{4}:\d{2}:\d{2}:\d{2}-\d{4}) (\w+) ([^\s]+) HTTP/([\d.]{3})$},
-    #                                           :into_fields => [:ip, :timestamp, :verb, :url, :version]
-    #   parser.parse file #=> [{:ip => '151.199.53.145', :timestamp => '14-Oct-2007:13:34:34-0500', :verb => 'GET', :url => '/phpmyadmin/main.php', :version => "1.0"}, ... ]
-    #
-    # Consecutive captures from the regular expression will be pushed
-    # into a hash with keys given by the +into_fields+ property of
-    # this parser.
-    #
-    # If the parser is instantiated with the <tt>:of</tt> keyword then
-    # the parsed hash from each line is used to instantiate a new
-    # object of the corresponding class:
-    #
-    #   require 'ostruct'
-    #
-    #   PageView = Class.new(OpenStruct)
-    #
-    #   parser = IMW::Parsers::RegexpParser.new :by_regexp   => %r{^([\d\.]+) (\d{2}-\w{3}-\d{4}:\d{2}:\d{2}:\d{2}-\d{4}) (\w+) ([^\s]+) HTTP/([\d.]{3})$},
-    #                                           :into_fields => [:ip, :timestamp, :verb, :url, :version],
-    #                                           :of          => PageView
-    #
-    #   parser.parse! file #=> [#<PageView ip="151.199.53.145", timestamp="14-Oct-2007:13:34:34-0500", verb="GET", url="/phpmyadmin/main.php", version="1.0">, ... ]
-    #
-    # The option <tt>:strictly</tt> can also be set to force the
-    # parser to raise an error if it finds a line which doesn't match
-    # its regexp.
-    class RegexpParser < LineParser
-      attr_accessor :regexp, :fields, :strict
-      def initialize options={}
-        @regexp = options[:regexp] || options[:by_regexp]
-        @fields = options[:fields] || options[:into_fields]
-        @strict = options[:strict] || options[:strictly]
-        super options
-      end
-      def parse_line line
-        match_data = regexp.match(line.chomp)
-        {}.tap do |hsh|
-          if match_data
-            match_data.captures.each_with_index do |capture, index|
-              hsh[fields[index]] = capture
-            end
-          else
-            raise IMW::ParseError.new("Could not parse the following line:\n\n#{line}\n\nusing regexp\n\n#{regexp.to_s}") if strict
-          end
-        end
-      end
-    end
-  end
-end

data/lib/imw/repository.rb DELETED

@@ -1,12 +0,0 @@
-module IMW
-  # A Repository is a collection of datasets.  It is used by the
-  # command-line +imw+ tool.
-  class Repository < Hash
-    alias_method :datasets, :values
-    alias_method :handles,  :keys
-  end
-end