RubyGems - imw - Versions diffs - 0.2.18 → 0.3.0 - Mend

imw 0.2.18 → 0.3.0

Files changed (172) hide show

data/Gemfile +7 -26
data/Gemfile.lock +13 -38
data/{LICENSE → LICENSE.txt} +1 -1
data/README.textile +35 -0
data/Rakefile +45 -22
data/VERSION +1 -1
data/examples/foo.rb +19 -0
data/examples/html_selector.rb +22 -0
data/examples/nes_game_list.csv +625 -0
data/examples/nes_gamespot.csv +1371 -0
data/examples/nes_nintendo.csv +624 -0
data/examples/nes_unlicensed.csv +89 -0
data/examples/nes_wikipedia.csv +710 -0
data/examples/nibbler_test.rb +24 -0
data/examples/script.rb +19 -0
data/lib/imw.rb +28 -140
data/lib/imw/error.rb +9 -0
data/lib/imw/recordizer.rb +8 -0
data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
data/lib/imw/resource.rb +3 -119
data/lib/imw/serializer.rb +7 -0
data/lib/imw/serializer/json_serializer.rb +17 -0
data/lib/imw/uri.rb +41 -0
data/spec/resource_spec.rb +78 -0
data/spec/uri_spec.rb +55 -0
metadata +81 -232
data/README.rdoc +0 -371
data/bin/imw +0 -5
data/bin/tsv_to_json.rb +0 -29
data/etc/imwrc.rb +0 -26
data/examples/dataset.rb +0 -12
data/examples/metadata.yml +0 -10
data/lib/imw/archives.rb +0 -120
data/lib/imw/archives/rar.rb +0 -19
data/lib/imw/archives/tar.rb +0 -19
data/lib/imw/archives/tarbz2.rb +0 -73
data/lib/imw/archives/targz.rb +0 -73
data/lib/imw/archives/zip.rb +0 -51
data/lib/imw/boot.rb +0 -87
data/lib/imw/compressed_files.rb +0 -94
data/lib/imw/compressed_files/bz2.rb +0 -16
data/lib/imw/compressed_files/compressible.rb +0 -75
data/lib/imw/compressed_files/gz.rb +0 -16
data/lib/imw/dataset.rb +0 -125
data/lib/imw/dataset/paths.rb +0 -29
data/lib/imw/dataset/workflow.rb +0 -195
data/lib/imw/formats.rb +0 -33
data/lib/imw/formats/delimited.rb +0 -170
data/lib/imw/formats/excel.rb +0 -100
data/lib/imw/formats/json.rb +0 -41
data/lib/imw/formats/pdf.rb +0 -71
data/lib/imw/formats/sgml.rb +0 -69
data/lib/imw/formats/yaml.rb +0 -41
data/lib/imw/metadata.rb +0 -83
data/lib/imw/metadata/contains_metadata.rb +0 -54
data/lib/imw/metadata/dsl.rb +0 -111
data/lib/imw/metadata/field.rb +0 -37
data/lib/imw/metadata/has_metadata.rb +0 -98
data/lib/imw/metadata/has_summary.rb +0 -57
data/lib/imw/metadata/schema.rb +0 -17
data/lib/imw/parsers.rb +0 -8
data/lib/imw/parsers/flat.rb +0 -44
data/lib/imw/parsers/html_parser.rb +0 -387
data/lib/imw/parsers/html_parser/matchers.rb +0 -289
data/lib/imw/parsers/line_parser.rb +0 -87
data/lib/imw/parsers/regexp_parser.rb +0 -72
data/lib/imw/repository.rb +0 -12
data/lib/imw/runner.rb +0 -118
data/lib/imw/schemes.rb +0 -23
data/lib/imw/schemes/ftp.rb +0 -142
data/lib/imw/schemes/hdfs.rb +0 -251
data/lib/imw/schemes/http.rb +0 -165
data/lib/imw/schemes/local.rb +0 -409
data/lib/imw/schemes/remote.rb +0 -119
data/lib/imw/schemes/s3.rb +0 -143
data/lib/imw/schemes/sql.rb +0 -129
data/lib/imw/tools.rb +0 -12
data/lib/imw/tools/aggregator.rb +0 -148
data/lib/imw/tools/archiver.rb +0 -220
data/lib/imw/tools/downloader.rb +0 -63
data/lib/imw/tools/extension_analyzer.rb +0 -114
data/lib/imw/tools/summarizer.rb +0 -83
data/lib/imw/tools/transferer.rb +0 -167
data/lib/imw/utils.rb +0 -74
data/lib/imw/utils/dynamically_extendable.rb +0 -137
data/lib/imw/utils/error.rb +0 -59
data/lib/imw/utils/extensions/hpricot.rb +0 -34
data/lib/imw/utils/has_uri.rb +0 -131
data/lib/imw/utils/log.rb +0 -92
data/lib/imw/utils/misc.rb +0 -57
data/lib/imw/utils/paths.rb +0 -146
data/lib/imw/utils/uri.rb +0 -59
data/lib/imw/utils/uuid.rb +0 -33
data/lib/imw/utils/validate.rb +0 -38
data/lib/imw/utils/version.rb +0 -11
data/spec/data/formats/delimited/sample.csv +0 -131
data/spec/data/formats/delimited/sample.tsv +0 -131
data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
data/spec/data/formats/excel/sample.xls +0 -0
data/spec/data/formats/json/sample.json +0 -1
data/spec/data/formats/none/sample +0 -650
data/spec/data/formats/sgml/sample.xml +0 -617
data/spec/data/formats/text/sample.txt +0 -650
data/spec/data/formats/yaml/sample.yaml +0 -410
data/spec/data/schema-tabular.yaml +0 -11
data/spec/imw/archives/rar_spec.rb +0 -16
data/spec/imw/archives/tar_spec.rb +0 -16
data/spec/imw/archives/tarbz2_spec.rb +0 -24
data/spec/imw/archives/targz_spec.rb +0 -21
data/spec/imw/archives/zip_spec.rb +0 -16
data/spec/imw/archives_spec.rb +0 -77
data/spec/imw/compressed_files/bz2_spec.rb +0 -15
data/spec/imw/compressed_files/compressible_spec.rb +0 -36
data/spec/imw/compressed_files/gz_spec.rb +0 -15
data/spec/imw/compressed_files_spec.rb +0 -47
data/spec/imw/dataset/paths_spec.rb +0 -32
data/spec/imw/dataset/workflow_spec.rb +0 -41
data/spec/imw/formats/delimited_spec.rb +0 -44
data/spec/imw/formats/excel_spec.rb +0 -55
data/spec/imw/formats/json_spec.rb +0 -18
data/spec/imw/formats/sgml_spec.rb +0 -24
data/spec/imw/formats/yaml_spec.rb +0 -19
data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
data/spec/imw/metadata/field_spec.rb +0 -25
data/spec/imw/metadata/has_metadata_spec.rb +0 -58
data/spec/imw/metadata/has_summary_spec.rb +0 -32
data/spec/imw/metadata/schema_spec.rb +0 -24
data/spec/imw/metadata_spec.rb +0 -86
data/spec/imw/parsers/line_parser_spec.rb +0 -96
data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
data/spec/imw/resource_spec.rb +0 -32
data/spec/imw/schemes/hdfs_spec.rb +0 -67
data/spec/imw/schemes/http_spec.rb +0 -19
data/spec/imw/schemes/local_spec.rb +0 -165
data/spec/imw/schemes/remote_spec.rb +0 -38
data/spec/imw/schemes/s3_spec.rb +0 -31
data/spec/imw/schemes/sql_spec.rb +0 -3
data/spec/imw/tools/aggregator_spec.rb +0 -71
data/spec/imw/tools/archiver_spec.rb +0 -120
data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
data/spec/imw/tools/summarizer_spec.rb +0 -8
data/spec/imw/tools/transferer_spec.rb +0 -195
data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
data/spec/imw/utils/has_uri_spec.rb +0 -61
data/spec/imw/utils/paths_spec.rb +0 -10
data/spec/imw/utils/shared_paths_spec.rb +0 -29
data/spec/imw_spec.rb +0 -14
data/spec/rcov.opts +0 -1
data/spec/spec_helper.rb +0 -31
data/spec/support/custom_matchers.rb +0 -28
data/spec/support/file_contents_matcher.rb +0 -30
data/spec/support/paths_matcher.rb +0 -66
data/spec/support/random.rb +0 -213
data/spec/support/without_regard_to_order_matcher.rb +0 -41

@@ -1,33 +0,0 @@
-module IMW
-  module Formats
-    autoload :Csv,   'imw/formats/delimited'
-    autoload :Tsv,   'imw/formats/delimited'
-    autoload :Excel, 'imw/formats/excel'
-    autoload :Json,  'imw/formats/json'
-    autoload :Xml,   'imw/formats/sgml'
-    autoload :Xsl,   'imw/formats/sgml'
-    autoload :Html,  'imw/formats/sgml'
-    autoload :Xhtml, 'imw/formats/sgml'
-    autoload :Rdf,   'imw/formats/sgml'
-    autoload :Yaml,  'imw/formats/yaml'
-    autoload :Pdf,   'imw/formats/pdf'
-    # Handlers which augment a resource with data format specific
-    # methods.
-    HANDLERS = [
-                [ "Formats::Csv",   /\.csv$/i    ],
-                [ "Formats::Tsv",   /\.tsv$/i    ],
-                [ "Formats::Excel", /\.xlsx?$/i  ],
-                [ "Formats::Json",  /\.json$/i   ],
-                [ "Formats::Xml",   /\.xml$/i    ],
-                [ "Formats::Xsl",   /\.xsl$/i    ],
-                [ "Formats::Html",  /\.html?$/i  ],
-                [ "Formats::Xhtml", /\.xhtml?$/i ],
-                [ "Formats::Rdf",   /\.rdf?$/i   ],
-                [ "Formats::Yaml",  /\.ya?ml$/i  ],
-                [ "Formats::Pdf",   /\.pdf$/i    ]
-               ]
-  end
-end

data/lib/imw/formats/delimited.rb DELETED

@@ -1,170 +0,0 @@
-module IMW
-  module Formats
-    # Defines methods used for parsing and writing delimited data
-    # formats (CSV, TSV, &c.)  with the FasterCSV library.  This
-    # module is not used to directly extend a resource.  Instead,
-    # more specific modules (e.g. - IMW::Resources::Formats::Csv)
-    # include this one and also define +delimited_options+ which is
-    # actually what's passed to FasterCSV.
-    #
-    # @abstract
-    module Delimited
-      # Default options to be passed to
-      # FasterCSV[http://fastercsv.rubyforge.org/]; see its
-      # documentation for more information.
-      #
-      # @return [Hash]
-      def delimited_options
-        @delimited_options ||= {
-          :headers        => fields && fields.map { |field| field['name'] }
-        }.merge(resource_options_compatible_with_faster_csv)
-      end
-      # Return the data in this delimited resource as an array of
-      # arrays.
-      #
-      # Yield each outer array (row) if passed a block.
-      #
-      # @return [Array] the full data matrix
-      # @yield [Array] each row of the data
-      def load &block
-        require 'fastercsv'
-        FasterCSV.parse(read, delimited_options, &block)
-      end
-      # Gives us goodies!  Needs +each+ below.
-      include Enumerable
-      # Call +block+ with each row in this delimited resource.
-      def each &block
-        require 'fastercsv'
-        FasterCSV.new(io, delimited_options).each(&block)
-      end
-      # Emit a single array or an array of arrays into this resource.
-      #
-      # @param [Array<Array>, Array] data array or array of arrays to emit
-      # @param [Hash] options
-      # @option options [true, false] :persist Keep this resource's IO object open after emiting
-      def emit data, options={}
-        require 'fastercsv'
-        data = [data] unless data.first.is_a?(Array)
-        data.each do |row|
-          write(FasterCSV.generate_line(row, delimited_options))
-        end
-        self
-      end
-      alias_method :<<, :emit
-      # Do a heuristic check to determine whether or not the first row
-      # of this delimited data is a row of headers.
-      #
-      # @return [true, false]
-      def fields_in_first_line?
-        # grab the header and up to 10 body rows
-        require 'fastercsv'
-        copy  = FasterCSV.new(io, resource_options_compatible_with_faster_csv.merge(:headers => false))
-        header = (copy.shift || []) rescue []
-        body   = 10.times.map { (copy.shift || []) rescue []}.flatten
-        # guess how many elements in a row
-        #size_guess = ((header.size + body.map(&:size).inject(0.0) { |e, s| s += e }).to_f / (1 + body.length).to_f).to_i
-        # calculate the fraction of bytes that are [-A-z_] (letters +
-        # underscore + hypen) for header and body and compute a
-        # threshold determinant
-        header_chars           = header.map(&:to_s).join
-        header_schema_bytes    = header_chars.bytes.find_all { |byte| (byte >= 65 && byte <= 90) || (byte >= 97 && byte <= 122) || byte == 95 || byte == 45 }
-        body_chars             = body.map(&:to_s).join
-        body_schema_bytes      = body_chars.bytes.find_all { |byte| (byte >= 65 && byte <= 90) || (byte >= 97 && byte <= 122) || byte == 95 || byte == 45 }
-        header_schema_fraction = header_schema_bytes.size.to_f / header_chars.size.to_f    rescue nil
-        body_schema_fraction   = body_schema_bytes.size.to_f   / body_chars.size.to_f      rescue nil
-        determinant            = (body_schema_fraction - header_schema_fraction).abs / 2.0 rescue nil
-        # decide, setting the threshold at 0.05 based on some guesswork...
-        determinant && determinant >= 0.05
-      end
-      # If it seems like there are fields in the first line of this
-      # data then go ahead and use them to define this resource's
-      # fields.
-      #
-      # Will overwrite any fields already present for this resource.
-      def guess_fields!
-        return unless fields_in_first_line?
-        copy                        = FasterCSV.new(io, resource_options_compatible_with_faster_csv.merge(:headers => false))
-        names                       = (copy.shift || []) rescue []
-        self.fields                 = names.map { |n| { 'name' => n } }
-        delimited_options[:headers] = names
-      end
-      # Return a 10-line sample of this file.
-      #
-      # @return [Array<Array>]
-      def snippet
-        require 'fastercsv'
-        [].tap do |rows|
-          rows_sampled = 0
-          begin
-            each do |row|
-              begin
-                break if rows_sampled > 100
-                row_size = row.size.to_f
-                if (row.reject(&:blank?).size.to_f / row_size) >= 0.5
-                  rows << row.size.times.map { |index| row[index] }
-                  rows_sampled += 1
-                end
-              rescue => e
-                next
-              end
-            end
-          rescue => e
-          end
-        end
-      end
-      protected
-      # An array of option names used by FasterCSV.
-      FASTER_CSV_OPTION_NAMES = %w[col_sep row_sep quote_char encoding field_size_limit converters unconverted_fields headers return_headers write_headers header_converters skip_blanks force_quotes].map(&:to_sym)
-      # Return the subset of options this resource was initialized
-      # with that are compatible with FasterCSV (it complains when you
-      # give it keywords it doesn't know).
-      #
-      # @return [Hash]
-      def resource_options_compatible_with_faster_csv
-        @compatible_options ||= {}.tap do |compatible_options|
-          FASTER_CSV_OPTION_NAMES.each do |option_name|
-            compatible_options[option_name] = resource_options[option_name] if resource_options.has_key?(option_name.to_sym)
-          end
-        end
-      end
-    end
-    # A module for working with CSV (comma-separated value) formatted
-    # data.
-    #
-    # @see IMW::Formats::Delimited
-    module Csv
-      include Delimited
-      def delimited_options
-        @delimited_options ||= {:col_sep => ","}.merge(super())
-      end
-    end
-    # A module for working with TSV (tab-separated value) formatted
-    # data.
-    #
-    # @see IMW::Formats::Delimited
-    module Tsv
-      include Delimited
-      def delimited_options
-        @delimited_options ||= {
-          :col_sep        => "\t",
-        }.merge(super())
-      end
-    end
-  end
-end

data/lib/imw/formats/excel.rb DELETED

@@ -1,100 +0,0 @@
-module IMW
-  module Formats
-    # Defines methods for reading and writing Microsoft Excel data.
-    module Excel
-      # Ensure that this Excel resource is described by a an ordered
-      # collection of flat fields.
-      def validate_schema!
-        raise IMW::SchemaError.new("#{self.class} resources must be described by an ordered set of flat fields") if schema.any?(&:nested?)
-      end
-      # Return the data in this Excel document as an array of arrays.
-      #
-      # Data from consecutive worksheets will be concatenated into a
-      # single outer array.
-      #
-      # @return [Array<Array>]
-      def load
-        require 'spreadsheet'
-        data = []
-        Spreadsheet.open(path).worksheets.each do |worksheet|
-          data += worksheet.map do |row|
-            row.to_a
-          end
-        end
-        data
-      end
-      # Gives us goodies!  Needs +each+ below.
-      include Enumerable
-      # Yield each row of this Excel document.
-      #
-      # Will loop from one worksheet to the next.
-      #
-      # @yield [Spreadsheet::Excel::Row]
-      def each &block
-        require 'spreadsheet'
-        Spreadsheet.open(path).worksheets.each do |worksheet|
-          worksheet.each(&block)
-        end
-      end
-      # Return the number of lines in this Excel document.
-      #
-      # Measured across worksheets.
-      #
-      # @return [Integer]
-      def num_lines
-        require 'spreadsheet'
-        Spreadsheet.open(path).worksheets.inject(0) do |sum, worksheet|
-          sum += worksheet.row_count
-        end
-      end
-      # TODO
-      #
-      # def emit
-      # end
-      # TODO
-      #
-      # Extract the following methods from delimited into a module and
-      # let both Excel and Delimited use them.
-      #
-      # Or let Excel include Delimited and let it override
-      # appropriately.
-      #
-      #   headers_in_first_line?
-      #   guess_schema!
-      #
-      #
-      #
-      def snippet
-        require 'spreadsheet'
-        [].tap do |snip|
-          rows_sampled = 0
-          Spreadsheet.open(path).worksheets.each do |worksheet|
-            worksheet.each do |row|
-              begin
-                break if rows_sampled > 100
-                row_size = row.size.to_f
-                if (row.reject(&:blank?).size.to_f / row_size) > 0.5
-                  snip << row.to_a
-                  rows_sampled += 1
-                end
-              rescue => e
-                next
-              end
-            end
-            break if rows_sampled > 10
-          end
-        end
-      end
-    end
-  end
-end

data/lib/imw/formats/json.rb DELETED

@@ -1,41 +0,0 @@
-module IMW
-  module Formats
-    # Defines methods for reading and writing JSON data.
-    module Json
-      include Enumerable
-      # Return the content of this resource.
-      #
-      # Will pass a block to the outermost JSON data structure's each
-      # method.
-      #
-      # @return [Hash, Array, String, Fixnum] whatever the JSON contained
-      def load &block
-        require 'json'
-        json = JSON.parse(read)
-        if block_given?
-          json.each(&block)
-        else
-          json
-        end
-      end
-      # Iterate over the elements in the JSON.
-      def each &block
-        load(&block)
-      end
-      # Emit the +data+ into this resource.  It must be opened for
-      # writing.
-      #
-      # @param [Hash, String, Array, Fixnum] data the Ruby object to emit
-      def emit data, options={}
-        require 'json'
-        write(data.to_json)
-        self
-      end
-    end
-  end
-end

data/lib/imw/formats/pdf.rb DELETED

@@ -1,71 +0,0 @@
-module IMW
-  module Formats
-    # Defines methods for parsing and generating PDF.
-    #
-    # Uses PDF::Reader for parsing and Prawn for generating.
-    module Pdf
-      # Return a snippet of text from this PDF.
-      #
-      # @return [String]
-      def snippet
-        begin
-          require 'pdf/reader'
-          snippetizer = Snippetizer.new
-          PDF::Reader.file(path, snippetizer)
-          snippetizer.snippet
-        rescue Snippetizer::SnippetEndError
-          snippetizer.snippet
-        rescue
-          ''
-        end
-      end
-      # A receiver class used by PDF::Reader which agglomerates text
-      # up to 1024 bytes and then bails.
-      class Snippetizer
-        # A custom error class that can be thrown while receiving text
-        # from PDF::Reader to cut-short walking large PDF documents.
-        SnippetEndError = Class.new(IMW::Error)
-        # The snippet being built by this snippetizer.
-        attr_accessor :snippet
-        def initialize
-          @snippet = ''
-        end
-        # Agglomerates text from PDF::Reader up to a fixed size of
-        # 1024 bytes.
-        #
-        # Will convert a single-space line from PDF::Reader as a
-        # newline character.
-        #
-        # FIXME How does the receiver ask PDF::Reader to abort walking
-        # the document now that enough text has been returned?  Till a
-        # more graceful way is found this method simply raises an
-        # error, creating a GOTO...
-        def show_text *params
-          params.each do |string|
-            if @snippet.size < 1024
-              if string == ' '
-                @snippet += "\n"
-              else
-                @snippet += string[0..1024]
-              end
-            else
-              raise SnippetEndError.new
-            end
-          end
-        end
-        alias_method :show_text_with_positioning,      :show_text
-        alias_method :move_to_next_line_and_show_text, :show_text
-        alias_method :set_spacing_next_line_show_text, :show_text
-      end
-    end
-  end
-end

data/lib/imw/formats/sgml.rb DELETED

@@ -1,69 +0,0 @@
-module IMW
-  module Formats
-    # Defines methods to parse SGML-derived data formats (XML, HTML,
-    # &c.).  This module isn't directly used to extend resources.
-    # Instead, more specific modules (e.g. -
-    # IMW::Resources::Formats::Xml) are used.
-    module Sgml
-      # Parse this resource using Hpricot and return (or yield if
-      # given a block) the resulting Hpricot::Doc.
-      #
-      # @return [Hpricot::Doc]
-      # @yield [Hpricot::Doc]
-      def load &block
-        require 'hpricot'
-        sgml = Hpricot(io)
-        if block_given?
-          yield sgml
-        else
-          sgml
-        end
-      end
-      # Parse the Hpricot::Doc of this resource with the given
-      # +parser+.
-      #
-      # The parser can either be an IMW::Parsers::HtmlParser or a
-      # hash which will be used to build such a parser.  See the
-      # documentation for IMW::Parsers::HtmlParser for more
-      # information.
-      #
-      # @param [Hash, IMW::Parsers::HtmlParser] parser
-      # @return [Hash] the parser's output
-      def parse parser
-        if parser.is_a?(IMW::Parsers::HtmlParser)
-          parser.parse(load)
-        else
-          IMW::Parsers::HtmlParser.new(parser).parse(load)
-        end
-      end
-    end
-    # Defines methods for XML data.
-    module Xml
-      include Sgml
-    end
-    # Defines methods for XSL data.
-    module Xsl
-      include Sgml
-    end
-    # Defines methods for XHTML data.
-    module Xhtml
-      include Sgml
-    end
-    # Defines methods for HTML data.
-    module Html
-      include Sgml
-    end
-    # Defines methods for RDF data.
-    module Rdf
-      include Sgml
-    end
-  end
-end