parsehtml 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
 - data/Manifest.txt +19 -0
 - data/PostInstall.txt +2 -0
 - data/README.rdoc +52 -0
 - data/Rakefile +28 -0
 - data/config/website.yml +2 -0
 - data/lib/parsehtml.rb +452 -0
 - data/script/console +10 -0
 - data/script/destroy +14 -0
 - data/script/generate +14 -0
 - data/script/txt2html +71 -0
 - data/test/test_helper.rb +3 -0
 - data/test/test_parse.rb +40 -0
 - data/test/test_parsehtml.rb +11 -0
 - data/website/index.html +90 -0
 - data/website/index.txt +62 -0
 - data/website/javascripts/rounded_corners_lite.inc.js +285 -0
 - data/website/stylesheets/screen.css +159 -0
 - data/website/template.html.erb +50 -0
 - metadata +98 -0
 
    
        data/History.txt
    ADDED
    
    
    
        data/Manifest.txt
    ADDED
    
    | 
         @@ -0,0 +1,19 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            History.txt
         
     | 
| 
      
 2 
     | 
    
         
            +
            Manifest.txt
         
     | 
| 
      
 3 
     | 
    
         
            +
            PostInstall.txt
         
     | 
| 
      
 4 
     | 
    
         
            +
            README.rdoc
         
     | 
| 
      
 5 
     | 
    
         
            +
            Rakefile
         
     | 
| 
      
 6 
     | 
    
         
            +
            config/website.yml
         
     | 
| 
      
 7 
     | 
    
         
            +
            lib/parsehtml.rb
         
     | 
| 
      
 8 
     | 
    
         
            +
            script/console
         
     | 
| 
      
 9 
     | 
    
         
            +
            script/destroy
         
     | 
| 
      
 10 
     | 
    
         
            +
            script/generate
         
     | 
| 
      
 11 
     | 
    
         
            +
            script/txt2html
         
     | 
| 
      
 12 
     | 
    
         
            +
            test/test_helper.rb
         
     | 
| 
      
 13 
     | 
    
         
            +
            test/test_parsehtml.rb
         
     | 
| 
      
 14 
     | 
    
         
            +
            test/test_parse.rb
         
     | 
| 
      
 15 
     | 
    
         
            +
            website/index.html
         
     | 
| 
      
 16 
     | 
    
         
            +
            website/index.txt
         
     | 
| 
      
 17 
     | 
    
         
            +
            website/javascripts/rounded_corners_lite.inc.js
         
     | 
| 
      
 18 
     | 
    
         
            +
            website/stylesheets/screen.css
         
     | 
| 
      
 19 
     | 
    
         
            +
            website/template.html.erb
         
     | 
    
        data/PostInstall.txt
    ADDED
    
    
    
        data/README.rdoc
    ADDED
    
    | 
         @@ -0,0 +1,52 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            = parsehtml
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            http://parsehtml.rubyforge.org
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            == DESCRIPTION:
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            ParseHTML is an HTML parser which works with Ruby 1.8 and above.  ParseHTML will even try to handle invalid HTML to some degree.
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            == SYNOPSIS:
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
              FIX (code sample of usage)
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            == REQUIREMENTS:
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            Ruby 1.8
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            == INSTALL:
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            sudo gem install parsehtml
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            == DEVELOPERS:
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            Craig P Jolicoeur - http://github.com/cpjolicoeur
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            == ACKNOWLEDGEMENTS:
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            ParseHTML is heavily based on the ParseHTML PHP library by Milian Wolf (http://milianw.de)
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            == LICENSE:
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
            (The MIT License)
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
            Copyright (c) 2008 Craig P Jolicoeur
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
            Permission is hereby granted, free of charge, to any person obtaining
         
     | 
| 
      
 36 
     | 
    
         
            +
            a copy of this software and associated documentation files (the
         
     | 
| 
      
 37 
     | 
    
         
            +
            'Software'), to deal in the Software without restriction, including
         
     | 
| 
      
 38 
     | 
    
         
            +
            without limitation the rights to use, copy, modify, merge, publish,
         
     | 
| 
      
 39 
     | 
    
         
            +
            distribute, sublicense, and/or sell copies of the Software, and to
         
     | 
| 
      
 40 
     | 
    
         
            +
            permit persons to whom the Software is furnished to do so, subject to
         
     | 
| 
      
 41 
     | 
    
         
            +
            the following conditions:
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            The above copyright notice and this permission notice shall be
         
     | 
| 
      
 44 
     | 
    
         
            +
            included in all copies or substantial portions of the Software.
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
         
     | 
| 
      
 47 
     | 
    
         
            +
            EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         
     | 
| 
      
 48 
     | 
    
         
            +
            MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
         
     | 
| 
      
 49 
     | 
    
         
            +
            IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
         
     | 
| 
      
 50 
     | 
    
         
            +
            CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
         
     | 
| 
      
 51 
     | 
    
         
            +
            TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
         
     | 
| 
      
 52 
     | 
    
         
            +
            SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
         
     | 
    
        data/Rakefile
    ADDED
    
    | 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            %w[rubygems rake rake/clean fileutils newgem rubigen].each { |f| require f }
         
     | 
| 
      
 2 
     | 
    
         
            +
            require File.dirname(__FILE__) + '/lib/parsehtml'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            # Generate all the Rake tasks
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Run 'rake -T' to see list of generated tasks (from gem root directory)
         
     | 
| 
      
 6 
     | 
    
         
            +
            $hoe = Hoe.new('parsehtml', ParseHTML::VERSION) do |p|
         
     | 
| 
      
 7 
     | 
    
         
            +
              p.developer('ParseHTML', 'cpjolicoeur@gmail.com')
         
     | 
| 
      
 8 
     | 
    
         
            +
              p.changes              = p.paragraphs_of("History.txt", 0..1).join("\n\n")
         
     | 
| 
      
 9 
     | 
    
         
            +
              p.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
         
     | 
| 
      
 10 
     | 
    
         
            +
              p.rubyforge_name       = p.name # TODO this is default value
         
     | 
| 
      
 11 
     | 
    
         
            +
              # p.extra_deps         = [
         
     | 
| 
      
 12 
     | 
    
         
            +
              #   ['activesupport','>= 2.0.2'],
         
     | 
| 
      
 13 
     | 
    
         
            +
              # ]
         
     | 
| 
      
 14 
     | 
    
         
            +
              p.extra_dev_deps = [
         
     | 
| 
      
 15 
     | 
    
         
            +
                ['newgem', ">= #{::Newgem::VERSION}"]
         
     | 
| 
      
 16 
     | 
    
         
            +
              ]
         
     | 
| 
      
 17 
     | 
    
         
            +
              
         
     | 
| 
      
 18 
     | 
    
         
            +
              p.clean_globs |= %w[**/.DS_Store tmp *.log]
         
     | 
| 
      
 19 
     | 
    
         
            +
              path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
         
     | 
| 
      
 20 
     | 
    
         
            +
              p.remote_rdoc_dir = File.join(path.gsub(/^#{p.rubyforge_name}\/?/,''), 'rdoc')
         
     | 
| 
      
 21 
     | 
    
         
            +
              p.rsync_args = '-av --delete --ignore-errors'
         
     | 
| 
      
 22 
     | 
    
         
            +
            end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            require 'newgem/tasks' # load /tasks/*.rake
         
     | 
| 
      
 25 
     | 
    
         
            +
            Dir['tasks/**/*.rake'].each { |t| load t }
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            # TODO - want other tests/tasks run by default? Add them to the list
         
     | 
| 
      
 28 
     | 
    
         
            +
            # task :default => [:spec, :features]
         
     | 
    
        data/config/website.yml
    ADDED
    
    
    
        data/lib/parsehtml.rb
    ADDED
    
    | 
         @@ -0,0 +1,452 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            $:.unshift(File.dirname(__FILE__)) unless
         
     | 
| 
      
 2 
     | 
    
         
            +
              $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            class ParseHTML
         
     | 
| 
      
 5 
     | 
    
         
            +
              VERSION = '1.12.0'
         
     | 
| 
      
 6 
     | 
    
         
            +
              
         
     | 
| 
      
 7 
     | 
    
         
            +
              # tags which are always empty (<br />, etc.)
         
     | 
| 
      
 8 
     | 
    
         
            +
              EMPTY_TAGS = %w(br hr input img area link meta param)
         
     | 
| 
      
 9 
     | 
    
         
            +
              
         
     | 
| 
      
 10 
     | 
    
         
            +
              # tags with preformatted text - whitespace won't be touched in them
         
     | 
| 
      
 11 
     | 
    
         
            +
              PREFORMATTED_TAGS = %w(script style pre code)
         
     | 
| 
      
 12 
     | 
    
         
            +
              
         
     | 
| 
      
 13 
     | 
    
         
            +
              # list of block elements
         
     | 
| 
      
 14 
     | 
    
         
            +
              # - tag_name => bool (is block level)
         
     | 
| 
      
 15 
     | 
    
         
            +
              BLOCK_ELEMENTS = {'address' => true,
         
     | 
| 
      
 16 
     | 
    
         
            +
                            		'blockquote' => true,
         
     | 
| 
      
 17 
     | 
    
         
            +
                            		'center' => true,
         
     | 
| 
      
 18 
     | 
    
         
            +
                            		'del' => true,
         
     | 
| 
      
 19 
     | 
    
         
            +
                            		'dir' => true,
         
     | 
| 
      
 20 
     | 
    
         
            +
                            		'div' => true,
         
     | 
| 
      
 21 
     | 
    
         
            +
                            		'dl' => true,
         
     | 
| 
      
 22 
     | 
    
         
            +
                            		'fieldset' => true,
         
     | 
| 
      
 23 
     | 
    
         
            +
                            		'form' => true,
         
     | 
| 
      
 24 
     | 
    
         
            +
                            		'h1' => true,
         
     | 
| 
      
 25 
     | 
    
         
            +
                            		'h2' => true,
         
     | 
| 
      
 26 
     | 
    
         
            +
                            		'h3' => true,
         
     | 
| 
      
 27 
     | 
    
         
            +
                            		'h4' => true,
         
     | 
| 
      
 28 
     | 
    
         
            +
                            		'h5' => true,
         
     | 
| 
      
 29 
     | 
    
         
            +
                            		'h6' => true,
         
     | 
| 
      
 30 
     | 
    
         
            +
                            		'hr' => true,
         
     | 
| 
      
 31 
     | 
    
         
            +
                            		'ins' => true,
         
     | 
| 
      
 32 
     | 
    
         
            +
                            		'isindex' => true,
         
     | 
| 
      
 33 
     | 
    
         
            +
                            		'menu' => true,
         
     | 
| 
      
 34 
     | 
    
         
            +
                            		'noframes' => true,
         
     | 
| 
      
 35 
     | 
    
         
            +
                            		'noscript' => true,
         
     | 
| 
      
 36 
     | 
    
         
            +
                            		'ol' => true,
         
     | 
| 
      
 37 
     | 
    
         
            +
                            		'p' => true,
         
     | 
| 
      
 38 
     | 
    
         
            +
                            		'pre' => true,
         
     | 
| 
      
 39 
     | 
    
         
            +
                            		'table' => true,
         
     | 
| 
      
 40 
     | 
    
         
            +
                            		'ul' => true,
         
     | 
| 
      
 41 
     | 
    
         
            +
                            		# set table elements and list items to block as well
         
     | 
| 
      
 42 
     | 
    
         
            +
                            		'thead' => true,
         
     | 
| 
      
 43 
     | 
    
         
            +
                            		'tbody' => true,
         
     | 
| 
      
 44 
     | 
    
         
            +
                            		'tfoot' => true,
         
     | 
| 
      
 45 
     | 
    
         
            +
                            		'td' => true,
         
     | 
| 
      
 46 
     | 
    
         
            +
                            		'tr' => true,
         
     | 
| 
      
 47 
     | 
    
         
            +
                            		'th' => true,
         
     | 
| 
      
 48 
     | 
    
         
            +
                            		'li' => true,
         
     | 
| 
      
 49 
     | 
    
         
            +
                            		'dd' => true,
         
     | 
| 
      
 50 
     | 
    
         
            +
                            		'dt' => true,
         
     | 
| 
      
 51 
     | 
    
         
            +
                            		# header items and html / body as well
         
     | 
| 
      
 52 
     | 
    
         
            +
                            		'html' => true,
         
     | 
| 
      
 53 
     | 
    
         
            +
                            		'body' => true,
         
     | 
| 
      
 54 
     | 
    
         
            +
                            		'head' => true,
         
     | 
| 
      
 55 
     | 
    
         
            +
                            		'meta' => true,
         
     | 
| 
      
 56 
     | 
    
         
            +
                            		'link' => true,
         
     | 
| 
      
 57 
     | 
    
         
            +
                            		'style' => true,
         
     | 
| 
      
 58 
     | 
    
         
            +
                            		'title' => true,
         
     | 
| 
      
 59 
     | 
    
         
            +
                            		# media tags to render as block
         
     | 
| 
      
 60 
     | 
    
         
            +
                            		'map' => true,
         
     | 
| 
      
 61 
     | 
    
         
            +
                            		'object' => true,
         
     | 
| 
      
 62 
     | 
    
         
            +
                            		'param' => true,
         
     | 
| 
      
 63 
     | 
    
         
            +
                            		'embed' => true,
         
     | 
| 
      
 64 
     | 
    
         
            +
                            		'area' => true,
         
     | 
| 
      
 65 
     | 
    
         
            +
                            		# inline elements
         
     | 
| 
      
 66 
     | 
    
         
            +
                            		'a' => false,
         
     | 
| 
      
 67 
     | 
    
         
            +
                            		'abbr' => false,
         
     | 
| 
      
 68 
     | 
    
         
            +
                            		'acronym' => false,
         
     | 
| 
      
 69 
     | 
    
         
            +
                            		'applet' => false,
         
     | 
| 
      
 70 
     | 
    
         
            +
                            		'b' => false,
         
     | 
| 
      
 71 
     | 
    
         
            +
                            		'basefont' => false,
         
     | 
| 
      
 72 
     | 
    
         
            +
                            		'bdo' => false,
         
     | 
| 
      
 73 
     | 
    
         
            +
                            		'big' => false,
         
     | 
| 
      
 74 
     | 
    
         
            +
                            		'br' => false,
         
     | 
| 
      
 75 
     | 
    
         
            +
                            		'button' => false,
         
     | 
| 
      
 76 
     | 
    
         
            +
                            		'cite' => false,
         
     | 
| 
      
 77 
     | 
    
         
            +
                            		'code' => false,
         
     | 
| 
      
 78 
     | 
    
         
            +
                            		'del' => false,
         
     | 
| 
      
 79 
     | 
    
         
            +
                            		'dfn' => false,
         
     | 
| 
      
 80 
     | 
    
         
            +
                            		'em' => false,
         
     | 
| 
      
 81 
     | 
    
         
            +
                            		'font' => false,
         
     | 
| 
      
 82 
     | 
    
         
            +
                            		'i' => false,
         
     | 
| 
      
 83 
     | 
    
         
            +
                            		'img' => false,
         
     | 
| 
      
 84 
     | 
    
         
            +
                            		'ins' => false,
         
     | 
| 
      
 85 
     | 
    
         
            +
                            		'input' => false,
         
     | 
| 
      
 86 
     | 
    
         
            +
                            		'iframe' => false,
         
     | 
| 
      
 87 
     | 
    
         
            +
                            		'kbd' => false,
         
     | 
| 
      
 88 
     | 
    
         
            +
                            		'label' => false,
         
     | 
| 
      
 89 
     | 
    
         
            +
                            		'q' => false,
         
     | 
| 
      
 90 
     | 
    
         
            +
                            		'samp' => false,
         
     | 
| 
      
 91 
     | 
    
         
            +
                            		'script' => false,
         
     | 
| 
      
 92 
     | 
    
         
            +
                            		'select' => false,
         
     | 
| 
      
 93 
     | 
    
         
            +
                            		'small' => false,
         
     | 
| 
      
 94 
     | 
    
         
            +
                            		'span' => false,
         
     | 
| 
      
 95 
     | 
    
         
            +
                            		'strong' => false,
         
     | 
| 
      
 96 
     | 
    
         
            +
                            		'sub' => false,
         
     | 
| 
      
 97 
     | 
    
         
            +
                            		'sup' => false,
         
     | 
| 
      
 98 
     | 
    
         
            +
                            		'textarea' => false,
         
     | 
| 
      
 99 
     | 
    
         
            +
                            		'tt' => false,
         
     | 
| 
      
 100 
     | 
    
         
            +
                            		'var' => false}
         
     | 
| 
      
 101 
     | 
    
         
            +
              
         
     | 
| 
      
 102 
     | 
    
         
            +
              # html to be parsed
         
     | 
| 
      
 103 
     | 
    
         
            +
              attr_accessor :html
         
     | 
| 
      
 104 
     | 
    
         
            +
              
         
     | 
| 
      
 105 
     | 
    
         
            +
              # node type:
         
     | 
| 
      
 106 
     | 
    
         
            +
              # - tag (see isStartTag)
         
     | 
| 
      
 107 
     | 
    
         
            +
              # - text (include cdata)
         
     | 
| 
      
 108 
     | 
    
         
            +
              # - comment
         
     | 
| 
      
 109 
     | 
    
         
            +
              # - doctype
         
     | 
| 
      
 110 
     | 
    
         
            +
              # - pi (processing instruction)
         
     | 
| 
      
 111 
     | 
    
         
            +
              attr_reader :node_type
         
     | 
| 
      
 112 
     | 
    
         
            +
              
         
     | 
| 
      
 113 
     | 
    
         
            +
              # current node context
         
     | 
| 
      
 114 
     | 
    
         
            +
              # - either a simple string (text node) or something like
         
     | 
| 
      
 115 
     | 
    
         
            +
              # - <tag attrib="value"...>
         
     | 
| 
      
 116 
     | 
    
         
            +
              attr_accessor :node
         
     | 
| 
      
 117 
     | 
    
         
            +
              
         
     | 
| 
      
 118 
     | 
    
         
            +
              # supress HTML tags inside preformatted tags
         
     | 
| 
      
 119 
     | 
    
         
            +
              attr_accessor :no_tags_in_code
         
     | 
| 
      
 120 
     | 
    
         
            +
              
         
     | 
| 
      
 121 
     | 
    
         
            +
              # whether the current node is an opening tag (<a>) or not (</a>)
         
     | 
| 
      
 122 
     | 
    
         
            +
              # - set to nil if current node is not a tag
         
     | 
| 
      
 123 
     | 
    
         
            +
              # - NOTE: empty tags (<br />) set this to true as well!
         
     | 
| 
      
 124 
     | 
    
         
            +
              attr_reader :is_start_tag
         
     | 
| 
      
 125 
     | 
    
         
            +
              
         
     | 
| 
      
 126 
     | 
    
         
            +
              # whether current node is an empty tag (<br />) or not (<a></a>)
         
     | 
| 
      
 127 
     | 
    
         
            +
              attr_reader :is_empty_tag
         
     | 
| 
      
 128 
     | 
    
         
            +
              
         
     | 
| 
      
 129 
     | 
    
         
            +
              # whether the current tag is a block level element
         
     | 
| 
      
 130 
     | 
    
         
            +
              attr_reader :is_block_element
         
     | 
| 
      
 131 
     | 
    
         
            +
              
         
     | 
| 
      
 132 
     | 
    
         
            +
              # tag name
         
     | 
| 
      
 133 
     | 
    
         
            +
              attr_reader :tag_name
         
     | 
| 
      
 134 
     | 
    
         
            +
              
         
     | 
| 
      
 135 
     | 
    
         
            +
              # attributes of current_tag (in hash)
         
     | 
| 
      
 136 
     | 
    
         
            +
              attr_reader :tag_attributes
         
     | 
| 
      
 137 
     | 
    
         
            +
              
         
     | 
| 
      
 138 
     | 
    
         
            +
              # keep whitespace formatting
         
     | 
| 
      
 139 
     | 
    
         
            +
              attr_reader :keep_whitespace
         
     | 
| 
      
 140 
     | 
    
         
            +
              
         
     | 
| 
      
 141 
     | 
    
         
            +
              # list of open tags (array)
         
     | 
| 
      
 142 
     | 
    
         
            +
              # - count this to get current depth
         
     | 
| 
      
 143 
     | 
    
         
            +
              attr_reader :open_tags
         
     | 
| 
      
 144 
     | 
    
         
            +
              
         
     | 
| 
      
 145 
     | 
    
         
            +
             
     | 
| 
      
 146 
     | 
    
         
            +
              def initialize(html = '')
         
     | 
| 
      
 147 
     | 
    
         
            +
                @html = html
         
     | 
| 
      
 148 
     | 
    
         
            +
                @open_tags = []
         
     | 
| 
      
 149 
     | 
    
         
            +
                @node_type, @node, @tag_name = '', '', ''
         
     | 
| 
      
 150 
     | 
    
         
            +
                @is_start_tag, @is_empty_tag, @is_block_element, @no_tags_in_code = false, false, false, false
         
     | 
| 
      
 151 
     | 
    
         
            +
                @tag_attributes = nil
         
     | 
| 
      
 152 
     | 
    
         
            +
                @keep_whitespace = 0
         
     | 
| 
      
 153 
     | 
    
         
            +
              end
         
     | 
| 
      
 154 
     | 
    
         
            +
              
         
     | 
| 
      
 155 
     | 
    
         
            +
              # get next node
         
     | 
| 
      
 156 
     | 
    
         
            +
              def next_node
         
     | 
| 
      
 157 
     | 
    
         
            +
                return false if (@html.nil? || @html.empty?)
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
                skip_whitespace = true # FIXME: should probably be a class variable?
         
     | 
| 
      
 160 
     | 
    
         
            +
                if (@is_start_tag && !@is_empty_tag)
         
     | 
| 
      
 161 
     | 
    
         
            +
                  @open_tags << @tag_name
         
     | 
| 
      
 162 
     | 
    
         
            +
                  @keep_whitespace += 1 if PREFORMATTED_TAGS.include?(@tag_name)
         
     | 
| 
      
 163 
     | 
    
         
            +
                end
         
     | 
| 
      
 164 
     | 
    
         
            +
                
         
     | 
| 
      
 165 
     | 
    
         
            +
                if (@html[0,1] == '<')
         
     | 
| 
      
 166 
     | 
    
         
            +
                  token = html[0,9]
         
     | 
| 
      
 167 
     | 
    
         
            +
                  if (token[0,2] == '<?')
         
     | 
| 
      
 168 
     | 
    
         
            +
                    # xml, prolog, or other pi's
         
     | 
| 
      
 169 
     | 
    
         
            +
                    # TODO: trigger error (this might need some work)
         
     | 
| 
      
 170 
     | 
    
         
            +
                    pos = @html.index('>')
         
     | 
| 
      
 171 
     | 
    
         
            +
                    set_node('pi', pos+1)
         
     | 
| 
      
 172 
     | 
    
         
            +
                    return true;
         
     | 
| 
      
 173 
     | 
    
         
            +
                  end # end pi tag
         
     | 
| 
      
 174 
     | 
    
         
            +
                  if (token[0,4] == '<!--')
         
     | 
| 
      
 175 
     | 
    
         
            +
                    # HTML comment
         
     | 
| 
      
 176 
     | 
    
         
            +
                    pos = @html.index('-->')
         
     | 
| 
      
 177 
     | 
    
         
            +
                    if pos.nil?
         
     | 
| 
      
 178 
     | 
    
         
            +
                      # could not find a closing -->, use next gt tag instead
         
     | 
| 
      
 179 
     | 
    
         
            +
                      # this is what firefox does with its parsing
         
     | 
| 
      
 180 
     | 
    
         
            +
                      pos = @html.index('>') + 1
         
     | 
| 
      
 181 
     | 
    
         
            +
                    else
         
     | 
| 
      
 182 
     | 
    
         
            +
                      pos += 3
         
     | 
| 
      
 183 
     | 
    
         
            +
                    end
         
     | 
| 
      
 184 
     | 
    
         
            +
                    set_node('comment', pos)
         
     | 
| 
      
 185 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 186 
     | 
    
         
            +
                  end # end comment tag
         
     | 
| 
      
 187 
     | 
    
         
            +
                  if (token == '<!DOCTYPE')
         
     | 
| 
      
 188 
     | 
    
         
            +
                    # doctype
         
     | 
| 
      
 189 
     | 
    
         
            +
                    set_node('doctype', @html.index('>')+1)
         
     | 
| 
      
 190 
     | 
    
         
            +
                    @skip_whitespace = true
         
     | 
| 
      
 191 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 192 
     | 
    
         
            +
                  end # end <!DOCTYPE tag
         
     | 
| 
      
 193 
     | 
    
         
            +
                  if (token == '<![CDATA[')
         
     | 
| 
      
 194 
     | 
    
         
            +
                    # cdata, use text mode
         
     | 
| 
      
 195 
     | 
    
         
            +
                    
         
     | 
| 
      
 196 
     | 
    
         
            +
                    # remove leading <![CDATA[
         
     | 
| 
      
 197 
     | 
    
         
            +
                    @html = @html[9, @html.size-9]
         
     | 
| 
      
 198 
     | 
    
         
            +
                    set_node('text', @html.index(']]>')+3)
         
     | 
| 
      
 199 
     | 
    
         
            +
                    
         
     | 
| 
      
 200 
     | 
    
         
            +
                    # remove trailing ]]> and trim
         
     | 
| 
      
 201 
     | 
    
         
            +
                    @node = @node[0, -3]
         
     | 
| 
      
 202 
     | 
    
         
            +
                    handle_whitespaces
         
     | 
| 
      
 203 
     | 
    
         
            +
                    
         
     | 
| 
      
 204 
     | 
    
         
            +
                    @skip_whitespace = true
         
     | 
| 
      
 205 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 206 
     | 
    
         
            +
                  end # end cdata
         
     | 
| 
      
 207 
     | 
    
         
            +
                  if (parse_tag)
         
     | 
| 
      
 208 
     | 
    
         
            +
                    # seems to be a tag so handle whitespaces
         
     | 
| 
      
 209 
     | 
    
         
            +
                    skip_whitespace = @is_block_element ? true : false
         
     | 
| 
      
 210 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 211 
     | 
    
         
            +
                  end # end parse_tag
         
     | 
| 
      
 212 
     | 
    
         
            +
                end
         
     | 
| 
      
 213 
     | 
    
         
            +
                
         
     | 
| 
      
 214 
     | 
    
         
            +
                skip_whitespace = false if @keep_whitespace
         
     | 
| 
      
 215 
     | 
    
         
            +
                
         
     | 
| 
      
 216 
     | 
    
         
            +
                # when we get here it seems to be a text node
         
     | 
| 
      
 217 
     | 
    
         
            +
                pos = @html.index('<') || @html.size
         
     | 
| 
      
 218 
     | 
    
         
            +
                
         
     | 
| 
      
 219 
     | 
    
         
            +
                set_node('text', pos)
         
     | 
| 
      
 220 
     | 
    
         
            +
                handle_whitespaces
         
     | 
| 
      
 221 
     | 
    
         
            +
                return next_node if (skip_whitespace && @node == ' ')
         
     | 
| 
      
 222 
     | 
    
         
            +
                skip_whitespace = false
         
     | 
| 
      
 223 
     | 
    
         
            +
                return true
         
     | 
| 
      
 224 
     | 
    
         
            +
              end # end next_node
         
     | 
| 
      
 225 
     | 
    
         
            +
              
         
     | 
| 
      
 226 
     | 
    
         
            +
              # normalize self.node
         
     | 
| 
      
 227 
     | 
    
         
            +
              def normalize_node
         
     | 
| 
      
 228 
     | 
    
         
            +
                @node = '<'
         
     | 
| 
      
 229 
     | 
    
         
            +
                unless (@is_start_tag)
         
     | 
| 
      
 230 
     | 
    
         
            +
                  @node << "/#{@tag_name}>"
         
     | 
| 
      
 231 
     | 
    
         
            +
                  return
         
     | 
| 
      
 232 
     | 
    
         
            +
                end
         
     | 
| 
      
 233 
     | 
    
         
            +
                @node << @tag_name
         
     | 
| 
      
 234 
     | 
    
         
            +
                @tag_attributes.each do |name, value|
         
     | 
| 
      
 235 
     | 
    
         
            +
                  str = " #{name}=\"" + value.gsub(/\"/, '"') + "\""
         
     | 
| 
      
 236 
     | 
    
         
            +
                  @node << str
         
     | 
| 
      
 237 
     | 
    
         
            +
                end
         
     | 
| 
      
 238 
     | 
    
         
            +
                @node << ' /' if (@is_empty_tag)
         
     | 
| 
      
 239 
     | 
    
         
            +
                @node << '>'
         
     | 
| 
      
 240 
     | 
    
         
            +
              end
         
     | 
| 
      
 241 
     | 
    
         
            +
              
         
     | 
| 
      
 242 
     | 
    
         
            +
              private
         
     | 
| 
      
 243 
     | 
    
         
            +
              
         
     | 
| 
      
 244 
     | 
    
         
            +
              # parse tag, set tag name and attributes, check for closing tag, etc...
         
     | 
| 
      
 245 
     | 
    
         
            +
              def parse_tag
         
     | 
| 
      
 246 
     | 
    
         
            +
                a_ord = ?a
         
     | 
| 
      
 247 
     | 
    
         
            +
                z_ord = ?z
         
     | 
| 
      
 248 
     | 
    
         
            +
                special_ords = [?:, ?-] # for xml:lang and http-equiv
         
     | 
| 
      
 249 
     | 
    
         
            +
                
         
     | 
| 
      
 250 
     | 
    
         
            +
                tag_name = ''
         
     | 
| 
      
 251 
     | 
    
         
            +
                pos = 1
         
     | 
| 
      
 252 
     | 
    
         
            +
                is_start_tag = (@html[pos,1] != '/')
         
     | 
| 
      
 253 
     | 
    
         
            +
                pos += 1 unless is_start_tag
         
     | 
| 
      
 254 
     | 
    
         
            +
                
         
     | 
| 
      
 255 
     | 
    
         
            +
                # get tag name
         
     | 
| 
      
 256 
     | 
    
         
            +
                while (@html[pos,1])
         
     | 
| 
      
 257 
     | 
    
         
            +
                  char = @html.downcase[pos,1]
         
     | 
| 
      
 258 
     | 
    
         
            +
                  pos_ord = char[0]
         
     | 
| 
      
 259 
     | 
    
         
            +
                  if ((pos_ord >= a_ord && pos_ord <= z_ord) || (!tag_name.empty? && is_numeric?(char)))
         
     | 
| 
      
 260 
     | 
    
         
            +
                    tag_name << char
         
     | 
| 
      
 261 
     | 
    
         
            +
                    pos += 1
         
     | 
| 
      
 262 
     | 
    
         
            +
                  else
         
     | 
| 
      
 263 
     | 
    
         
            +
                    pos -= 1
         
     | 
| 
      
 264 
     | 
    
         
            +
                    break
         
     | 
| 
      
 265 
     | 
    
         
            +
                  end
         
     | 
| 
      
 266 
     | 
    
         
            +
                end # end while
         
     | 
| 
      
 267 
     | 
    
         
            +
                
         
     | 
| 
      
 268 
     | 
    
         
            +
                tag_name.downcase!
         
     | 
| 
      
 269 
     | 
    
         
            +
                if (tag_name.empty? || !BLOCK_ELEMENTS.include?(tag_name))
         
     | 
| 
      
 270 
     | 
    
         
            +
                  # something went wrong, invalid tag
         
     | 
| 
      
 271 
     | 
    
         
            +
                  invalid_tag
         
     | 
| 
      
 272 
     | 
    
         
            +
                  return false
         
     | 
| 
      
 273 
     | 
    
         
            +
                end
         
     | 
| 
      
 274 
     | 
    
         
            +
                
         
     | 
| 
      
 275 
     | 
    
         
            +
                if (@no_tags_in_code && @open_tags.last == 'code' && !(tag_name == 'code' && !is_start_tag))
         
     | 
| 
      
 276 
     | 
    
         
            +
                  # supress all HTML tags inside code tags
         
     | 
| 
      
 277 
     | 
    
         
            +
                  invalid_tag
         
     | 
| 
      
 278 
     | 
    
         
            +
                  return false
         
     | 
| 
      
 279 
     | 
    
         
            +
                end
         
     | 
| 
      
 280 
     | 
    
         
            +
                
         
     | 
| 
      
 281 
     | 
    
         
            +
                # get tag attributes
         
     | 
| 
      
 282 
     | 
    
         
            +
                # TODO: in HTML 4 attributes dont need to be quoted
         
     | 
| 
      
 283 
     | 
    
         
            +
                is_empty_tag = false
         
     | 
| 
      
 284 
     | 
    
         
            +
                attributes = {}
         
     | 
| 
      
 285 
     | 
    
         
            +
                curr_attribute = ''
         
     | 
| 
      
 286 
     | 
    
         
            +
                while (@html[pos+1,1])
         
     | 
| 
      
 287 
     | 
    
         
            +
                  pos += 1
         
     | 
| 
      
 288 
     | 
    
         
            +
                  # close tag
         
     | 
| 
      
 289 
     | 
    
         
            +
                  if (@html[pos,1] == '>' || @html[pos,2] == '/>')
         
     | 
| 
      
 290 
     | 
    
         
            +
                    if (@html[pos,1] == '/')
         
     | 
| 
      
 291 
     | 
    
         
            +
                      is_empty_tag = true
         
     | 
| 
      
 292 
     | 
    
         
            +
                      pos += 1
         
     | 
| 
      
 293 
     | 
    
         
            +
                    end
         
     | 
| 
      
 294 
     | 
    
         
            +
                    break 
         
     | 
| 
      
 295 
     | 
    
         
            +
                  end
         
     | 
| 
      
 296 
     | 
    
         
            +
             
     | 
| 
      
 297 
     | 
    
         
            +
                  char = @html.downcase[pos,1]
         
     | 
| 
      
 298 
     | 
    
         
            +
                  pos_ord = char[0]
         
     | 
| 
      
 299 
     | 
    
         
            +
                  if (pos_ord >= a_ord && pos_ord <= z_ord)
         
     | 
| 
      
 300 
     | 
    
         
            +
                    # attribute name
         
     | 
| 
      
 301 
     | 
    
         
            +
                    curr_attribute << char
         
     | 
| 
      
 302 
     | 
    
         
            +
                  elsif ([' ', "\t", "\n"].include?(char))
         
     | 
| 
      
 303 
     | 
    
         
            +
                    # drop whitespace
         
     | 
| 
      
 304 
     | 
    
         
            +
                  elsif
         
     | 
| 
      
 305 
     | 
    
         
            +
                    # get attribute value
         
     | 
| 
      
 306 
     | 
    
         
            +
                    pos += 1
         
     | 
| 
      
 307 
     | 
    
         
            +
                    await = @html[pos,1] # single or double quote
         
     | 
| 
      
 308 
     | 
    
         
            +
                    pos += 1
         
     | 
| 
      
 309 
     | 
    
         
            +
                    value = ''
         
     | 
| 
      
 310 
     | 
    
         
            +
                    while (@html[pos,1] && @html[pos,1] != await)
         
     | 
| 
      
 311 
     | 
    
         
            +
                      value << @html[pos,1]
         
     | 
| 
      
 312 
     | 
    
         
            +
                      pos += 1
         
     | 
| 
      
 313 
     | 
    
         
            +
                    end # end while
         
     | 
| 
      
 314 
     | 
    
         
            +
                    attributes[curr_attribute] = value
         
     | 
| 
      
 315 
     | 
    
         
            +
                    curr_attribute = ''
         
     | 
| 
      
 316 
     | 
    
         
            +
                  else
         
     | 
| 
      
 317 
     | 
    
         
            +
                    invalid_tag
         
     | 
| 
      
 318 
     | 
    
         
            +
                    return false
         
     | 
| 
      
 319 
     | 
    
         
            +
                  end
         
     | 
| 
      
 320 
     | 
    
         
            +
                end # end while
         
     | 
| 
      
 321 
     | 
    
         
            +
                
         
     | 
| 
      
 322 
     | 
    
         
            +
                if (@html[pos, 1] != '>')
         
     | 
| 
      
 323 
     | 
    
         
            +
                  invalid_tag
         
     | 
| 
      
 324 
     | 
    
         
            +
                  return false
         
     | 
| 
      
 325 
     | 
    
         
            +
                end
         
     | 
| 
      
 326 
     | 
    
         
            +
                
         
     | 
| 
      
 327 
     | 
    
         
            +
                if (!curr_attribute.empty?)
         
     | 
| 
      
 328 
     | 
    
         
            +
                  # html4 allows something like <option selected> instead of <option selected="selected">
         
     | 
| 
      
 329 
     | 
    
         
            +
                  attributes[curr_attribute] = curr_attribute
         
     | 
| 
      
 330 
     | 
    
         
            +
                end
         
     | 
| 
      
 331 
     | 
    
         
            +
                
         
     | 
| 
      
 332 
     | 
    
         
            +
                unless (is_start_tag)
         
     | 
| 
      
 333 
     | 
    
         
            +
                  if (!attributes.empty? || (tag_name != @open_tags.last))
         
     | 
| 
      
 334 
     | 
    
         
            +
                    # end tags must not contain any attributes
         
     | 
| 
      
 335 
     | 
    
         
            +
                    # or maybe we did not expect a different tag to be closed
         
     | 
| 
      
 336 
     | 
    
         
            +
                    invalid_tag
         
     | 
| 
      
 337 
     | 
    
         
            +
                    return false
         
     | 
| 
      
 338 
     | 
    
         
            +
                  end
         
     | 
| 
      
 339 
     | 
    
         
            +
                  @open_tags.pop
         
     | 
| 
      
 340 
     | 
    
         
            +
                  if (PREFORMATTED_TAGS.include?(tag_name))
         
     | 
| 
      
 341 
     | 
    
         
            +
                    @keep_whitespace -= 1
         
     | 
| 
      
 342 
     | 
    
         
            +
                  end
         
     | 
| 
      
 343 
     | 
    
         
            +
                end 
         
     | 
| 
      
 344 
     | 
    
         
            +
                pos += 1
         
     | 
| 
      
 345 
     | 
    
         
            +
                @node = @html[0,pos]
         
     | 
| 
      
 346 
     | 
    
         
            +
                @html = @html[pos, @html.size-pos]
         
     | 
| 
      
 347 
     | 
    
         
            +
                @tag_name = tag_name
         
     | 
| 
      
 348 
     | 
    
         
            +
                @tag_attributes = attributes
         
     | 
| 
      
 349 
     | 
    
         
            +
                @is_start_tag = is_start_tag
         
     | 
| 
      
 350 
     | 
    
         
            +
                @is_empty_tag = is_empty_tag || EMPTY_TAGS.include?(tag_name)
         
     | 
| 
      
 351 
     | 
    
         
            +
                if (@is_empty_tag)
         
     | 
| 
      
 352 
     | 
    
         
            +
                  # might not be well formed
         
     | 
| 
      
 353 
     | 
    
         
            +
                  @node.gsub!(/ *\/? *>$/, ' />')
         
     | 
| 
      
 354 
     | 
    
         
            +
                end
         
     | 
| 
      
 355 
     | 
    
         
            +
                @node_type = 'tag'
         
     | 
| 
      
 356 
     | 
    
         
            +
                @is_block_element = BLOCK_ELEMENTS[tag_name]
         
     | 
| 
      
 357 
     | 
    
         
            +
                return true
         
     | 
| 
      
 358 
     | 
    
         
            +
              end
         
     | 
| 
      
 359 
     | 
    
         
            +
              
         
     | 
| 
      
 360 
     | 
    
         
            +
              # handle invalid tags
         
     | 
| 
      
 361 
     | 
    
         
            +
              def invalid_tag
         
     | 
| 
      
 362 
     | 
    
         
            +
                @html = '<' + @html.slice(1, @html.size - 1)
         
     | 
| 
      
 363 
     | 
    
         
            +
              end
         
     | 
| 
      
 364 
     | 
    
         
            +
              
         
     | 
| 
      
 365 
     | 
    
         
            +
              # update all variables and make @html shorter
         
     | 
| 
      
 366 
     | 
    
         
            +
              # - param type => @nodeType
         
     | 
| 
      
 367 
     | 
    
         
            +
              # - param pos  => which position to cut at
         
     | 
| 
      
 368 
     | 
    
         
            +
              def set_node(type, pos)
         
     | 
| 
      
 369 
     | 
    
         
            +
                if (@node_type == 'tag') # (type == 'tag')
         
     | 
| 
      
 370 
     | 
    
         
            +
                  # set specific tag vars to null
         
     | 
| 
      
 371 
     | 
    
         
            +
                  # type == tag should not be called here
         
     | 
| 
      
 372 
     | 
    
         
            +
                  # see parse_tag for more info
         
     | 
| 
      
 373 
     | 
    
         
            +
                  @tag_name = nil
         
     | 
| 
      
 374 
     | 
    
         
            +
                  @tag_attributes = nil
         
     | 
| 
      
 375 
     | 
    
         
            +
                  @is_start_tag = false
         
     | 
| 
      
 376 
     | 
    
         
            +
                  @is_empty_tag = false
         
     | 
| 
      
 377 
     | 
    
         
            +
                  @is_block_element = false
         
     | 
| 
      
 378 
     | 
    
         
            +
                end
         
     | 
| 
      
 379 
     | 
    
         
            +
                @node_type = type
         
     | 
| 
      
 380 
     | 
    
         
            +
                @node = @html[0, pos]
         
     | 
| 
      
 381 
     | 
    
         
            +
                @html = @html[pos, @html.size-pos]
         
     | 
| 
      
 382 
     | 
    
         
            +
              end # end set_node
         
     | 
| 
      
 383 
     | 
    
         
            +
              
         
     | 
| 
      
 384 
     | 
    
         
            +
              # check if @html begins with a specific string
         
     | 
| 
      
 385 
     | 
    
         
            +
              def match?(str)
         
     | 
| 
      
 386 
     | 
    
         
            +
                @html.slice(0, str.size) == str
         
     | 
| 
      
 387 
     | 
    
         
            +
              end
         
     | 
| 
      
 388 
     | 
    
         
            +
              
         
     | 
| 
      
 389 
     | 
    
         
            +
              # truncate whitespaces
         
     | 
| 
      
 390 
     | 
    
         
            +
              def handle_whitespaces
         
     | 
| 
      
 391 
     | 
    
         
            +
                return if (@keep_whitespace.nil? || @keep_whitespace.zero?)
         
     | 
| 
      
 392 
     | 
    
         
            +
                @node.gsub!(/\s+/, ' ')
         
     | 
| 
      
 393 
     | 
    
         
            +
              end
         
     | 
| 
      
 394 
     | 
    
         
            +
              
         
     | 
| 
      
 395 
     | 
    
         
            +
              # check if a string is a valid numeric value
         
     | 
| 
      
 396 
     | 
    
         
            +
              def is_numeric?(val)
         
     | 
| 
      
 397 
     | 
    
         
            +
                Float val rescue false
         
     | 
| 
      
 398 
     | 
    
         
            +
              end
         
     | 
| 
      
 399 
     | 
    
         
            +
              
         
     | 
| 
      
 400 
     | 
    
         
            +
              # indent HTML properly
         
     | 
| 
      
 401 
     | 
    
         
            +
              def self.indent_html(html, indent = '  ', no_tags_in_code = false)
         
     | 
| 
      
 402 
     | 
    
         
            +
                parser = ParseHTML.new(html)
         
     | 
| 
      
 403 
     | 
    
         
            +
                parser.no_tags_in_code = no_tags_in_code
         
     | 
| 
      
 404 
     | 
    
         
            +
                html = ''
         
     | 
| 
      
 405 
     | 
    
         
            +
                last = true # last tag was block element
         
     | 
| 
      
 406 
     | 
    
         
            +
                indent_a = []
         
     | 
| 
      
 407 
     | 
    
         
            +
             
     | 
| 
      
 408 
     | 
    
         
            +
                while (parser.next_node)
         
     | 
| 
      
 409 
     | 
    
         
            +
                  parser.normalize_node if (parser.node_type == 'tag')
         
     | 
| 
      
 410 
     | 
    
         
            +
                  if ((parser.node_type == 'tag') && parser.is_block_element)
         
     | 
| 
      
 411 
     | 
    
         
            +
                    is_pre_or_code = ['code', 'pre'].include?(parser.tag_name)
         
     | 
| 
      
 412 
     | 
    
         
            +
                    if(parser.keep_whitespace.zero? && !last && !is_pre_or_code)
         
     | 
| 
      
 413 
     | 
    
         
            +
                      html = html.rstrip + "\n"
         
     | 
| 
      
 414 
     | 
    
         
            +
                    end
         
     | 
| 
      
 415 
     | 
    
         
            +
                    if (parser.is_start_tag)
         
     | 
| 
      
 416 
     | 
    
         
            +
                      html << indent_a.join(' ')
         
     | 
| 
      
 417 
     | 
    
         
            +
                      if (!parser.is_empty_tag)
         
     | 
| 
      
 418 
     | 
    
         
            +
                        indent_a << indent
         
     | 
| 
      
 419 
     | 
    
         
            +
                      end
         
     | 
| 
      
 420 
     | 
    
         
            +
                    else
         
     | 
| 
      
 421 
     | 
    
         
            +
                      indent_a.pop
         
     | 
| 
      
 422 
     | 
    
         
            +
                      if (!is_pre_or_code)
         
     | 
| 
      
 423 
     | 
    
         
            +
                        html << indent_a.join(' ')
         
     | 
| 
      
 424 
     | 
    
         
            +
                      end
         
     | 
| 
      
 425 
     | 
    
         
            +
                    end
         
     | 
| 
      
 426 
     | 
    
         
            +
                    html << parser.node
         
     | 
| 
      
 427 
     | 
    
         
            +
                    if (parser.keep_whitespace.zero? && !(is_pre_or_code && parser.is_start_tag))
         
     | 
| 
      
 428 
     | 
    
         
            +
                      html << "\n"
         
     | 
| 
      
 429 
     | 
    
         
            +
                    end
         
     | 
| 
      
 430 
     | 
    
         
            +
                    last = true
         
     | 
| 
      
 431 
     | 
    
         
            +
                  else
         
     | 
| 
      
 432 
     | 
    
         
            +
                    if (parser.node_type == 'tag' && parser.tag_name == 'br')
         
     | 
| 
      
 433 
     | 
    
         
            +
                      html << (parser.node + "\n")
         
     | 
| 
      
 434 
     | 
    
         
            +
                      last = true
         
     | 
| 
      
 435 
     | 
    
         
            +
                      next
         
     | 
| 
      
 436 
     | 
    
         
            +
                    elsif (last && parser.keep_whitespace.zero?)
         
     | 
| 
      
 437 
     | 
    
         
            +
                      html << indent_a.join(' ')
         
     | 
| 
      
 438 
     | 
    
         
            +
                      parser.node = parser.node.lstrip
         
     | 
| 
      
 439 
     | 
    
         
            +
                    end
         
     | 
| 
      
 440 
     | 
    
         
            +
                    html << parser.node
         
     | 
| 
      
 441 
     | 
    
         
            +
                    
         
     | 
| 
      
 442 
     | 
    
         
            +
                    if (['comment', 'pi', 'doctype'].include?(parser.node_type))
         
     | 
| 
      
 443 
     | 
    
         
            +
                      html << "\n"
         
     | 
| 
      
 444 
     | 
    
         
            +
                    else
         
     | 
| 
      
 445 
     | 
    
         
            +
                      last = false
         
     | 
| 
      
 446 
     | 
    
         
            +
                    end
         
     | 
| 
      
 447 
     | 
    
         
            +
                  end
         
     | 
| 
      
 448 
     | 
    
         
            +
                end # end while
         
     | 
| 
      
 449 
     | 
    
         
            +
                return html
         
     | 
| 
      
 450 
     | 
    
         
            +
              end
         
     | 
| 
      
 451 
     | 
    
         
            +
              
         
     | 
| 
      
 452 
     | 
    
         
            +
            end # end class ParseHTML
         
     |