html_massage 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
 - data/Gemfile +4 -0
 - data/README.md +18 -0
 - data/Rakefile +1 -0
 - data/html_massage.gemspec +23 -0
 - data/lib/html_massage/version.rb +3 -0
 - data/lib/html_massage.rb +176 -0
 - metadata +82 -0
 
    
        data/.gitignore
    ADDED
    
    
    
        data/Gemfile
    ADDED
    
    
    
        data/README.md
    ADDED
    
    | 
         @@ -0,0 +1,18 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # html_massage
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            Give your HTML a massage, in just the ways it loves:
         
     | 
| 
      
 4 
     | 
    
         
            +
             * Remove headers and footers and navigation, and strip to only the "content" part of the HTML
         
     | 
| 
      
 5 
     | 
    
         
            +
             * Sanitize tags, removing javascript and styling
         
     | 
| 
      
 6 
     | 
    
         
            +
             * Convert your HTML to nicely-formatted plain text
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            ## Usage
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                require 'rubygems'
         
     | 
| 
      
 11 
     | 
    
         
            +
                require 'html_massage'
         
     | 
| 
      
 12 
     | 
    
         
            +
                html = "<html><body><div id='header'>My Site</div><div>This is some great content!</div></body></html>"
         
     | 
| 
      
 13 
     | 
    
         
            +
                html_massage = HtmlMassage.new( html, :ignored_selectors => [ '#header' ] )
         
     | 
| 
      
 14 
     | 
    
         
            +
                 # => #<HtmlMassager::HtmlMassage ... >
         
     | 
| 
      
 15 
     | 
    
         
            +
                html_massage.to_html
         
     | 
| 
      
 16 
     | 
    
         
            +
                 # => "<div>This is some great content!</div>"
         
     | 
| 
      
 17 
     | 
    
         
            +
                html_massage.to_text
         
     | 
| 
      
 18 
     | 
    
         
            +
                 # => "This is some great content!\n"
         
     | 
    
        data/Rakefile
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'bundler/gem_tasks'
         
     | 
| 
         @@ -0,0 +1,23 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
            $:.push File.expand_path("../lib", __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            require "html_massage/version"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Gem::Specification.new do |s|
         
     | 
| 
      
 6 
     | 
    
         
            +
              s.name        = "html_massage"
         
     | 
| 
      
 7 
     | 
    
         
            +
              s.version     = HtmlMassager::VERSION
         
     | 
| 
      
 8 
     | 
    
         
            +
              s.authors     = ["Harlan Knight Wood"]
         
     | 
| 
      
 9 
     | 
    
         
            +
              s.email       = ["code@hkw7.org"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              s.homepage    = "https://github.com/onesunone/html_massage"
         
     | 
| 
      
 11 
     | 
    
         
            +
              s.summary     = %{Massages HTML how you want to.}
         
     | 
| 
      
 12 
     | 
    
         
            +
              s.description = %{Massages HTML how you want to: sanitize tags, remove headers and footers, convert to plain text.}
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              s.rubyforge_project = "html_massage"
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              s.add_dependency('nokogiri', ">= 1.4.4")
         
     | 
| 
      
 17 
     | 
    
         
            +
              s.add_dependency('sanitize', ">= 2.0.0")
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
              s.files         = `git ls-files`.split("\n")
         
     | 
| 
      
 20 
     | 
    
         
            +
              s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
         
     | 
| 
      
 21 
     | 
    
         
            +
              s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
         
     | 
| 
      
 22 
     | 
    
         
            +
              s.require_paths = ["lib"]
         
     | 
| 
      
 23 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/html_massage.rb
    ADDED
    
    | 
         @@ -0,0 +1,176 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "cgi"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "nokogiri"
         
     | 
| 
      
 3 
     | 
    
         
            +
            require "sanitize"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "html_massage/version"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            module HtmlMassager
         
     | 
| 
      
 7 
     | 
    
         
            +
              class HtmlMassage
         
     | 
| 
      
 8 
     | 
    
         
            +
                def initialize( html, options )
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @source_url        = options[ :source_url ]
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @ignored_selectors = options[ :ignored_selectors ]
         
     | 
| 
      
 11 
     | 
    
         
            +
                  @clean_html = massage_html( html )
         
     | 
| 
      
 12 
     | 
    
         
            +
                end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                def massage_html( html )
         
     | 
| 
      
 15 
     | 
    
         
            +
                  html = content_only( html )
         
     | 
| 
      
 16 
     | 
    
         
            +
                  html = sanitize_html( html )
         
     | 
| 
      
 17 
     | 
    
         
            +
                  html = absolutify_links( html ) if @source_url
         
     | 
| 
      
 18 
     | 
    
         
            +
                  html
         
     | 
| 
      
 19 
     | 
    
         
            +
                end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                def content_only( content )
         
     | 
| 
      
 22 
     | 
    
         
            +
                  doc = Nokogiri::HTML( content )
         
     | 
| 
      
 23 
     | 
    
         
            +
                  body = doc / 'html' / 'body'
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                  @ignored_selectors.to_a.each do |ignored_selector|
         
     | 
| 
      
 26 
     | 
    
         
            +
                    ( body / ignored_selector ).remove
         
     | 
| 
      
 27 
     | 
    
         
            +
                  end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                  content = body / '#content'
         
     | 
| 
      
 30 
     | 
    
         
            +
                  content = body if content.empty?
         
     | 
| 
      
 31 
     | 
    
         
            +
                  content = content.inner_html
         
     | 
| 
      
 32 
     | 
    
         
            +
                  content
         
     | 
| 
      
 33 
     | 
    
         
            +
                end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                def sanitize_html(html)
         
     | 
| 
      
 36 
     | 
    
         
            +
                  html = html.dup
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                  %w[ script noscript style ].each do |tag|
         
     | 
| 
      
 39 
     | 
    
         
            +
                    html.gsub!( %r{<#{tag}[^>]*>.*?</#{tag}>}mi, '' )
         
     | 
| 
      
 40 
     | 
    
         
            +
                  end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                  Sanitize.clean(
         
     | 
| 
      
 43 
     | 
    
         
            +
                      html,
         
     | 
| 
      
 44 
     | 
    
         
            +
                      {
         
     | 
| 
      
 45 
     | 
    
         
            +
                          :elements => [
         
     | 
| 
      
 46 
     | 
    
         
            +
                              'a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
         
     | 
| 
      
 47 
     | 
    
         
            +
                              'blockquote', 'br', 'button', 'caption', 'center', 'cite',
         
     | 
| 
      
 48 
     | 
    
         
            +
                              'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir',
         
     | 
| 
      
 49 
     | 
    
         
            +
                              'div', 'dl', 'dt', 'em', 'fieldset', 'form', 'h1',
         
     | 
| 
      
 50 
     | 
    
         
            +
                              'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i',
         
     | 
| 
      
 51 
     | 
    
         
            +
                              'img',
         
     | 
| 
      
 52 
     | 
    
         
            +
                              'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu',
         
     | 
| 
      
 53 
     | 
    
         
            +
                              'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
         
     | 
| 
      
 54 
     | 
    
         
            +
                              'select', 'small', 'span', 'strike', 'strong', 'sub',
         
     | 
| 
      
 55 
     | 
    
         
            +
                              'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th',
         
     | 
| 
      
 56 
     | 
    
         
            +
                              'thead', 'tr', 'tt', 'u', 'ul', 'var',
         
     | 
| 
      
 57 
     | 
    
         
            +
                          ],
         
     | 
| 
      
 58 
     | 
    
         
            +
                          :attributes => {
         
     | 
| 
      
 59 
     | 
    
         
            +
                              'a' => ['href'],
         
     | 
| 
      
 60 
     | 
    
         
            +
                              'img' => ['src'],
         
     | 
| 
      
 61 
     | 
    
         
            +
                              :all => ['abbr', 'accept', 'accept-charset',
         
     | 
| 
      
 62 
     | 
    
         
            +
                                       'accesskey', 'action', 'align', 'alt', 'axis',
         
     | 
| 
      
 63 
     | 
    
         
            +
                                       'border', 'cellpadding', 'cellspacing', 'char',
         
     | 
| 
      
 64 
     | 
    
         
            +
                                       'charoff', 'class', 'charset', 'checked', 'cite',
         
     | 
| 
      
 65 
     | 
    
         
            +
                                       'clear', 'cols', 'colspan', 'color',
         
     | 
| 
      
 66 
     | 
    
         
            +
                                       'compact', 'coords', 'datetime', 'dir',
         
     | 
| 
      
 67 
     | 
    
         
            +
                                       'disabled', 'enctype', 'for', 'frame',
         
     | 
| 
      
 68 
     | 
    
         
            +
                                       'headers', 'height', 'hreflang',
         
     | 
| 
      
 69 
     | 
    
         
            +
                                       'hspace', 'id', 'ismap', 'label', 'lang',
         
     | 
| 
      
 70 
     | 
    
         
            +
                                       'longdesc', 'maxlength', 'media', 'method',
         
     | 
| 
      
 71 
     | 
    
         
            +
                                       'multiple', 'name', 'nohref', 'noshade',
         
     | 
| 
      
 72 
     | 
    
         
            +
                                       'nowrap', 'prompt', 'readonly', 'rel', 'rev',
         
     | 
| 
      
 73 
     | 
    
         
            +
                                       'rows', 'rowspan', 'rules', 'scope',
         
     | 
| 
      
 74 
     | 
    
         
            +
                                       'selected', 'shape', 'size', 'span',
         
     | 
| 
      
 75 
     | 
    
         
            +
                                       'start', 'summary', 'tabindex', 'target',
         
     | 
| 
      
 76 
     | 
    
         
            +
                                       'title', 'type', 'usemap', 'valign', 'value',
         
     | 
| 
      
 77 
     | 
    
         
            +
                                       'vspace', 'width']
         
     | 
| 
      
 78 
     | 
    
         
            +
                          },
         
     | 
| 
      
 79 
     | 
    
         
            +
                          :protocols => {
         
     | 
| 
      
 80 
     | 
    
         
            +
                              'a' => {'href' => ['http', 'https', 'mailto', :relative]},
         
     | 
| 
      
 81 
     | 
    
         
            +
                              'img' => {'src' => ['http', 'https', :relative]}
         
     | 
| 
      
 82 
     | 
    
         
            +
                          },
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                          # consider including for deprecated/historical/or spam-suspect pages:
         
     | 
| 
      
 85 
     | 
    
         
            +
                          # Gollum has a nice way to add this to your config optionally, see:
         
     | 
| 
      
 86 
     | 
    
         
            +
                          # https://github.com/github/gollum/blob/master/lib/gollum/sanitization.rb
         
     | 
| 
      
 87 
     | 
    
         
            +
                          #
         
     | 
| 
      
 88 
     | 
    
         
            +
                          #        :add_attributes => {
         
     | 
| 
      
 89 
     | 
    
         
            +
                          #            'a' => {'rel' => 'nofollow'}
         
     | 
| 
      
 90 
     | 
    
         
            +
                          #        }
         
     | 
| 
      
 91 
     | 
    
         
            +
                      }
         
     | 
| 
      
 92 
     | 
    
         
            +
                  )
         
     | 
| 
      
 93 
     | 
    
         
            +
                end
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
                def absolutify_links( html )
         
     | 
| 
      
 96 
     | 
    
         
            +
                  match = @source_url.match( %r{(^[a-z]+://[^/]+)(/.+/)}i )
         
     | 
| 
      
 97 
     | 
    
         
            +
                  return html unless match
         
     | 
| 
      
 98 
     | 
    
         
            +
                  base_url = match[ 1 ]
         
     | 
| 
      
 99 
     | 
    
         
            +
                  resource_dir_url = match[ 0 ]   # whole regexp match
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
                  dom = Nokogiri::HTML.fragment( html )
         
     | 
| 
      
 102 
     | 
    
         
            +
                  links = dom / 'a'
         
     | 
| 
      
 103 
     | 
    
         
            +
                  links.each do |link|
         
     | 
| 
      
 104 
     | 
    
         
            +
                    href = link[ 'href' ]
         
     | 
| 
      
 105 
     | 
    
         
            +
                    if href
         
     | 
| 
      
 106 
     | 
    
         
            +
                      link[ 'href' ] =
         
     | 
| 
      
 107 
     | 
    
         
            +
                        case href
         
     | 
| 
      
 108 
     | 
    
         
            +
                          when %r{^/}
         
     | 
| 
      
 109 
     | 
    
         
            +
                            File.join( base_url, href )
         
     | 
| 
      
 110 
     | 
    
         
            +
                          when %r{^\.\.}
         
     | 
| 
      
 111 
     | 
    
         
            +
                            File.join( resource_dir_url, href )
         
     | 
| 
      
 112 
     | 
    
         
            +
                          else
         
     | 
| 
      
 113 
     | 
    
         
            +
                            href
         
     | 
| 
      
 114 
     | 
    
         
            +
                        end
         
     | 
| 
      
 115 
     | 
    
         
            +
                    end
         
     | 
| 
      
 116 
     | 
    
         
            +
                  end
         
     | 
| 
      
 117 
     | 
    
         
            +
                  html = dom.to_s
         
     | 
| 
      
 118 
     | 
    
         
            +
                  html
         
     | 
| 
      
 119 
     | 
    
         
            +
                end
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
                def to_html
         
     | 
| 
      
 122 
     | 
    
         
            +
                  @clean_html
         
     | 
| 
      
 123 
     | 
    
         
            +
                end
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
                def to_text
         
     | 
| 
      
 126 
     | 
    
         
            +
                  text = CGI.unescapeHTML( @clean_html )
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
                  # normalize newlines
         
     | 
| 
      
 129 
     | 
    
         
            +
                  text.gsub!(/\r\n/, "\n")
         
     | 
| 
      
 130 
     | 
    
         
            +
                  text.gsub!(/\r/, "\n")
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
                  # nbsp => ' '
         
     | 
| 
      
 133 
     | 
    
         
            +
                  text.gsub!(/ /, ' ')
         
     | 
| 
      
 134 
     | 
    
         
            +
             
     | 
| 
      
 135 
     | 
    
         
            +
                  # TODO: figure out how to do these in ruby 1.9.2:
         
     | 
| 
      
 136 
     | 
    
         
            +
                  # They now throw 'incompatible encoding -- ascii regexp for utf8 string'
         
     | 
| 
      
 137 
     | 
    
         
            +
                  #    text.gsub!( /\302\240/, ' ' )  # UTF8 for nbsp
         
     | 
| 
      
 138 
     | 
    
         
            +
                  #    text.gsub!( /\240/, ' ' )      # ascii for nbsp
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
                  text.gsub!(/\s+/, ' ')   # all whitespace, including newlines, becomes a single space
         
     | 
| 
      
 141 
     | 
    
         
            +
             
     | 
| 
      
 142 
     | 
    
         
            +
                  # replace some tags with newlines
         
     | 
| 
      
 143 
     | 
    
         
            +
                  text.gsub!(%r{<br(\s[^>]*)?/?>}i, "\n")
         
     | 
| 
      
 144 
     | 
    
         
            +
                  text.gsub!(%r{<p(\s[^>]*)?/?>}i, "\n\n")
         
     | 
| 
      
 145 
     | 
    
         
            +
                  text.gsub!(%r{</(h\d|p|div|ol|ul)[^>]*>}i, "\n\n")
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
                  # replace some tags with meaningful text markup
         
     | 
| 
      
 148 
     | 
    
         
            +
                  text.gsub!(/<hr[^>]*>/i, "\n\n-------------------------\n\n")
         
     | 
| 
      
 149 
     | 
    
         
            +
                  text.gsub!(/<li[^>]*>/i, "\n* ")
         
     | 
| 
      
 150 
     | 
    
         
            +
             
     | 
| 
      
 151 
     | 
    
         
            +
                  # remove some tags and their inner html
         
     | 
| 
      
 152 
     | 
    
         
            +
                  text.gsub!(%r{<noscript\b.*?</noscript>}i, '')
         
     | 
| 
      
 153 
     | 
    
         
            +
             
     | 
| 
      
 154 
     | 
    
         
            +
                  # strip out all remaining tags
         
     | 
| 
      
 155 
     | 
    
         
            +
                  text.gsub!(/<[^>]+>/, '')
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
                  # normalize whitespace
         
     | 
| 
      
 158 
     | 
    
         
            +
                  text.gsub!(/ +/, ' ')
         
     | 
| 
      
 159 
     | 
    
         
            +
                  text = strip_lines(text)
         
     | 
| 
      
 160 
     | 
    
         
            +
                  text.gsub!( /\n{3,}/, "\n\n" )
         
     | 
| 
      
 161 
     | 
    
         
            +
                  text.strip!
         
     | 
| 
      
 162 
     | 
    
         
            +
             
     | 
| 
      
 163 
     | 
    
         
            +
                  "#{text}\n"
         
     | 
| 
      
 164 
     | 
    
         
            +
                end
         
     | 
| 
      
 165 
     | 
    
         
            +
             
     | 
| 
      
 166 
     | 
    
         
            +
                def strip_lines( text )
         
     | 
| 
      
 167 
     | 
    
         
            +
                  lines = text.split( "\n" )
         
     | 
| 
      
 168 
     | 
    
         
            +
                  lines.map!{ |line| line.strip }
         
     | 
| 
      
 169 
     | 
    
         
            +
                  text = lines.join( "\n" )
         
     | 
| 
      
 170 
     | 
    
         
            +
                  text.strip
         
     | 
| 
      
 171 
     | 
    
         
            +
                end
         
     | 
| 
      
 172 
     | 
    
         
            +
             
     | 
| 
      
 173 
     | 
    
         
            +
              end
         
     | 
| 
      
 174 
     | 
    
         
            +
            end
         
     | 
| 
      
 175 
     | 
    
         
            +
             
     | 
| 
      
 176 
     | 
    
         
            +
            include HtmlMassager
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,82 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification 
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: html_massage
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version 
         
     | 
| 
      
 4 
     | 
    
         
            +
              prerelease: 
         
     | 
| 
      
 5 
     | 
    
         
            +
              version: 0.0.2
         
     | 
| 
      
 6 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 7 
     | 
    
         
            +
            authors: 
         
     | 
| 
      
 8 
     | 
    
         
            +
            - Harlan Knight Wood
         
     | 
| 
      
 9 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 10 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 11 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            date: 2011-06-18 00:00:00 Z
         
     | 
| 
      
 14 
     | 
    
         
            +
            dependencies: 
         
     | 
| 
      
 15 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 16 
     | 
    
         
            +
              name: nokogiri
         
     | 
| 
      
 17 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 18 
     | 
    
         
            +
              requirement: &id001 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 19 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 20 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 21 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 22 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 23 
     | 
    
         
            +
                    version: 1.4.4
         
     | 
| 
      
 24 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 25 
     | 
    
         
            +
              version_requirements: *id001
         
     | 
| 
      
 26 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 27 
     | 
    
         
            +
              name: sanitize
         
     | 
| 
      
 28 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 29 
     | 
    
         
            +
              requirement: &id002 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 30 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 31 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 32 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 33 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 34 
     | 
    
         
            +
                    version: 2.0.0
         
     | 
| 
      
 35 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 36 
     | 
    
         
            +
              version_requirements: *id002
         
     | 
| 
      
 37 
     | 
    
         
            +
            description: "Massages HTML how you want to: sanitize tags, remove headers and footers, convert to plain text."
         
     | 
| 
      
 38 
     | 
    
         
            +
            email: 
         
     | 
| 
      
 39 
     | 
    
         
            +
            - code@hkw7.org
         
     | 
| 
      
 40 
     | 
    
         
            +
            executables: []
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            files: 
         
     | 
| 
      
 47 
     | 
    
         
            +
            - .gitignore
         
     | 
| 
      
 48 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 49 
     | 
    
         
            +
            - README.md
         
     | 
| 
      
 50 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 51 
     | 
    
         
            +
            - html_massage.gemspec
         
     | 
| 
      
 52 
     | 
    
         
            +
            - lib/html_massage.rb
         
     | 
| 
      
 53 
     | 
    
         
            +
            - lib/html_massage/version.rb
         
     | 
| 
      
 54 
     | 
    
         
            +
            homepage: https://github.com/onesunone/html_massage
         
     | 
| 
      
 55 
     | 
    
         
            +
            licenses: []
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 58 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
            require_paths: 
         
     | 
| 
      
 61 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 62 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         
     | 
| 
      
 63 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 64 
     | 
    
         
            +
              requirements: 
         
     | 
| 
      
 65 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 66 
     | 
    
         
            +
                - !ruby/object:Gem::Version 
         
     | 
| 
      
 67 
     | 
    
         
            +
                  version: "0"
         
     | 
| 
      
 68 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         
     | 
| 
      
 69 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 70 
     | 
    
         
            +
              requirements: 
         
     | 
| 
      
 71 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 72 
     | 
    
         
            +
                - !ruby/object:Gem::Version 
         
     | 
| 
      
 73 
     | 
    
         
            +
                  version: "0"
         
     | 
| 
      
 74 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
            rubyforge_project: html_massage
         
     | 
| 
      
 77 
     | 
    
         
            +
            rubygems_version: 1.8.5
         
     | 
| 
      
 78 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 79 
     | 
    
         
            +
            specification_version: 3
         
     | 
| 
      
 80 
     | 
    
         
            +
            summary: Massages HTML how you want to.
         
     | 
| 
      
 81 
     | 
    
         
            +
            test_files: []
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     |