RubyGems - html5 - Versions diffs - 0.1.0 - Mend

html5 0.1.0

Files changed (59) hide show

data/History.txt +3 -0
data/Manifest.txt +58 -0
data/README +9 -0
data/Rakefile.rb +17 -0
data/lib/html5/constants.rb +818 -0
data/lib/html5/filters/base.rb +10 -0
data/lib/html5/filters/inject_meta_charset.rb +82 -0
data/lib/html5/filters/optionaltags.rb +198 -0
data/lib/html5/filters/sanitizer.rb +15 -0
data/lib/html5/filters/whitespace.rb +36 -0
data/lib/html5/html5parser/after_body_phase.rb +46 -0
data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
data/lib/html5/html5parser/after_head_phase.rb +50 -0
data/lib/html5/html5parser/before_head_phase.rb +41 -0
data/lib/html5/html5parser/in_body_phase.rb +607 -0
data/lib/html5/html5parser/in_caption_phase.rb +68 -0
data/lib/html5/html5parser/in_cell_phase.rb +78 -0
data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
data/lib/html5/html5parser/in_head_phase.rb +138 -0
data/lib/html5/html5parser/in_row_phase.rb +87 -0
data/lib/html5/html5parser/in_select_phase.rb +84 -0
data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
data/lib/html5/html5parser/in_table_phase.rb +110 -0
data/lib/html5/html5parser/initial_phase.rb +134 -0
data/lib/html5/html5parser/phase.rb +158 -0
data/lib/html5/html5parser/root_element_phase.rb +42 -0
data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
data/lib/html5/html5parser.rb +248 -0
data/lib/html5/inputstream.rb +654 -0
data/lib/html5/liberalxmlparser.rb +158 -0
data/lib/html5/sanitizer.rb +188 -0
data/lib/html5/serializer/htmlserializer.rb +180 -0
data/lib/html5/serializer/xhtmlserializer.rb +20 -0
data/lib/html5/serializer.rb +2 -0
data/lib/html5/tokenizer.rb +968 -0
data/lib/html5/treebuilders/base.rb +334 -0
data/lib/html5/treebuilders/hpricot.rb +231 -0
data/lib/html5/treebuilders/rexml.rb +208 -0
data/lib/html5/treebuilders/simpletree.rb +185 -0
data/lib/html5/treebuilders.rb +24 -0
data/lib/html5/treewalkers/base.rb +154 -0
data/lib/html5/treewalkers/hpricot.rb +48 -0
data/lib/html5/treewalkers/rexml.rb +48 -0
data/lib/html5/treewalkers/simpletree.rb +48 -0
data/lib/html5/treewalkers.rb +26 -0
data/lib/html5.rb +13 -0
data/parse.rb +217 -0
data/tests/preamble.rb +82 -0
data/tests/test_encoding.rb +35 -0
data/tests/test_lxp.rb +263 -0
data/tests/test_parser.rb +68 -0
data/tests/test_sanitizer.rb +142 -0
data/tests/test_serializer.rb +68 -0
data/tests/test_stream.rb +62 -0
data/tests/test_tokenizer.rb +94 -0
data/tests/test_treewalkers.rb +116 -0
data/tests/tokenizer_test_parser.rb +63 -0
metadata +120 -0

data/tests/test_sanitizer.rb ADDED Viewed

@@ -0,0 +1,142 @@
+#!/usr/bin/env ruby
+require File.join(File.dirname(__FILE__), 'preamble')
+require 'html5/html5parser'
+require 'html5/liberalxmlparser'
+require 'html5/treewalkers'
+require 'html5/serializer'
+require 'html5/sanitizer'
+class SanitizeTest < Test::Unit::TestCase
+  include HTML5
+  def sanitize_xhtml stream
+    XHTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
+  end
+  def sanitize_html stream
+    HTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
+  end
+  def sanitize_rexml stream
+    require 'rexml/document'
+    doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{stream}</div>")
+    tokens = TreeWalkers.get_tree_walker('rexml').new(doc)
+    XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
+      :quote_char => "'",
+      :inject_meta_charset => false,
+      :sanitize => true}).gsub(/\A<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>\Z/m, '\1')
+  rescue REXML::ParseException
+    return "Ill-formed XHTML!"
+  end
+  def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
+    assert_equal htmloutput, sanitize_html(input)
+    assert_equal xhtmloutput, sanitize_xhtml(input)
+    assert_equal rexmloutput, sanitize_rexml(input)
+  end
+  HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
+    define_method "test_should_allow_#{tag_name}_tag" do
+      input       = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
+      htmloutput  = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
+      xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
+      rexmloutput = xhtmloutput
+      if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
+        htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
+        xhtmloutput = htmloutput
+      elsif tag_name == 'col'
+        htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
+        xhtmloutput = htmloutput
+        rexmloutput = "<col title='1' />"
+      elsif tag_name == 'table'
+        htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt;baz<table title='1'> </table>"
+        xhtmloutput = htmloutput
+      elsif tag_name == 'image'
+        htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
+        xhtmloutput = htmloutput
+        rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
+      elsif VOID_ELEMENTS.include?(tag_name)
+        htmloutput = "<#{tag_name} title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
+        xhtmloutput = htmloutput
+        htmloutput += '<br/>' if tag_name == 'br'
+        rexmloutput =  "<#{tag_name} title='1' />"
+      end
+      check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
+    end
+  end
+  HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
+    define_method "test_should_forbid_#{tag_name.upcase}_tag" do
+      input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
+      output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
+      check_sanitization(input, output, output, output)
+    end
+  end
+  HTMLSanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
+    next if attribute_name == 'style'
+    define_method "test_should_allow_#{attribute_name}_attribute" do
+      input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
+      output = "<p #{attribute_name}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
+      htmloutput = "<p #{attribute_name.downcase}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
+      check_sanitization(input, htmloutput, output, output)
+    end
+  end
+  HTMLSanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
+    define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
+      input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
+      output =  "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
+      check_sanitization(input, output, output, output)
+    end
+  end
+  HTMLSanitizer::ALLOWED_PROTOCOLS.each do |protocol|
+    define_method "test_should_allow_#{protocol}_uris" do
+      input = %(<a href="#{protocol}">foo</a>)
+      output = "<a href='#{protocol}'>foo</a>"
+      check_sanitization(input, output, output, output)
+    end
+  end
+  HTMLSanitizer::ALLOWED_PROTOCOLS.each do |protocol|
+    define_method "test_should_allow_uppercase_#{protocol}_uris" do
+      input = %(<a href="#{protocol.upcase}">foo</a>)
+      output = "<a href='#{protocol.upcase}'>foo</a>"
+      check_sanitization(input, output, output, output)
+    end
+  end
+  def test_should_handle_astral_plane_characters
+    input = "<p>&#x1d4b5; &#x1d538;</p>"
+    output = "<p>\360\235\222\265 \360\235\224\270</p>"
+    check_sanitization(input, output, output, output)
+    input = "<p><tspan>\360\235\224\270</tspan> a</p>"
+    output = "<p><tspan>\360\235\224\270</tspan> a</p>"
+    check_sanitization(input, output, output, output)
+  end
+# This affects only NS4. Is it worth fixing?
+#  def test_javascript_includes
+#    input = %(<div size="&{alert('XSS')}">foo</div>)
+#    output = "<div>foo</div>"
+#    check_sanitization(input, output, output, output)
+#  end
+  html5_test_files('sanitizer').each do |filename|
+    JSON::parse(open(filename).read).each do |test|
+      define_method "test_#{test['name']}" do
+        check_sanitization(
+          test['input'],
+          test['output'],
+          test['xhtml'] || test['output'],
+          test['rexml'] || test['output']
+        )
+      end
+    end
+  end
+end

data/tests/test_serializer.rb ADDED Viewed

@@ -0,0 +1,68 @@
+require File.join(File.dirname(__FILE__), 'preamble')
+require 'html5/html5parser'
+require 'html5/serializer'
+require 'html5/treewalkers'
+#Run the serialize error checks
+checkSerializeErrors = false
+class JsonWalker < HTML5::TreeWalkers::Base
+  def each
+    @tree.each do |token|
+      case token[0]
+      when 'StartTag'
+        yield start_tag(token[1], token[2])
+      when 'EndTag'
+        yield end_tag(token[1])
+      when 'EmptyTag'
+        yield empty_tag(token[1], token[2])
+      when 'Comment'
+        yield comment(token[1])
+      when 'Characters', 'SpaceCharacters'
+        text(token[1]) {|textToken| yield textToken}
+      when 'Doctype'
+        yield doctype(token[1], token[2], token[3])
+      else
+        raise "Unknown token type: " + token[0]
+      end
+    end
+  end
+end
+class Html5SerializeTestcase < Test::Unit::TestCase
+  html5_test_files('serializer').each do |filename|
+    test_name = File.basename(filename).sub('.test', '')
+    tests = JSON::parse(open(filename).read)
+    tests['tests'].each_with_index do |test, index|
+      define_method "test_#{test_name}_#{index+1}" do
+        if test["options"] and test["options"]["encoding"]
+          test["options"][:encoding] = test["options"]["encoding"]
+        end
+        result = HTML5::HTMLSerializer.
+          serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
+        expected = test["expected"]
+        if expected.length == 1
+          assert_equal(expected[0], result, test["description"])
+        elsif !expected.include?(result)
+          flunk("Expected: #{expected.inspect}, Received: #{result.inspect}")
+        end
+        return if test_name == 'optionaltags'
+        result = HTML5::XHTMLSerializer.
+          serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
+        expected = test["xhtml"] || test["expected"]
+        if expected.length == 1
+          assert_equal(expected[0], result, test["description"])
+        elsif !expected.include?(result)
+          flunk("Expected: #{expected.inspect}, Received: #{result.inspect}")
+        end
+      end
+    end
+  end
+end

data/tests/test_stream.rb ADDED Viewed

@@ -0,0 +1,62 @@
+require File.join(File.dirname(__FILE__), 'preamble')
+require 'html5/inputstream'
+class HTMLInputStreamTest < Test::Unit::TestCase
+  include HTML5
+  def test_char_ascii
+    stream = HTMLInputStream.new("'", :encoding=>'ascii')
+    assert_equal('ascii', stream.char_encoding)
+    assert_equal("'", stream.char)
+  end
+  def test_char_null
+    stream = HTMLInputStream.new("\x00")
+    assert_equal("\xef\xbf\xbd", stream.char)
+  end
+  def test_char_utf8
+    stream = HTMLInputStream.new("\xe2\x80\x98", :encoding=>'utf-8')
+    assert_equal('utf-8', stream.char_encoding)
+    assert_equal("\xe2\x80\x98", stream.char)
+  end
+  def test_char_win1252
+    stream = HTMLInputStream.new("\xa2\xc5\xf1\x92\x86")
+    assert_equal('windows-1252', stream.char_encoding)
+    assert_equal("\xc2\xa2", stream.char)
+    assert_equal("\xc3\x85", stream.char)
+    assert_equal("\xc3\xb1", stream.char)
+    assert_equal("\xe2\x80\x99", stream.char)
+    assert_equal("\xe2\x80\xa0", stream.char)
+  end
+  def test_bom
+    stream = HTMLInputStream.new("\xef\xbb\xbf" + "'")
+    assert_equal('utf-8', stream.char_encoding)
+    assert_equal("'", stream.char)
+  end
+  begin
+    require 'iconv'
+    def test_utf_16
+      stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025)
+      assert(stream.char_encoding, 'utf-16-le')
+      assert_equal(1025, stream.chars_until(' ',true).length)
+    end
+  rescue LoadError
+    puts "iconv not found, skipping iconv tests"
+  end
+  def test_newlines
+    stream = HTMLInputStream.new("\xef\xbb\xbf" + "a\nbb\r\nccc\rdddd")
+    assert_equal([1,0], stream.position)
+    assert_equal("a\nbb\n", stream.chars_until('c'))
+    assert_equal([3,0], stream.position)
+    assert_equal("ccc\ndddd", stream.chars_until('x'))
+    assert_equal([4,4], stream.position)
+    assert_equal([1,2,3], stream.instance_eval {@line_lengths})
+  end
+end

data/tests/test_tokenizer.rb ADDED Viewed

@@ -0,0 +1,94 @@
+require File.join(File.dirname(__FILE__), 'preamble')
+require 'html5/tokenizer'
+require 'tokenizer_test_parser'
+class Html5TokenizerTestCase < Test::Unit::TestCase
+  def assert_tokens_match(expectedTokens, receivedTokens, ignoreErrorOrder, message)
+    if !ignoreErrorOrder
+      return expectedTokens == receivedTokens
+    else
+      #Sort the tokens into two groups; non-parse errors and parse errors
+      expected = [[],[]]
+      received = [[],[]]
+      for token in expectedTokens
+        if token != "ParseError"
+          expected[0] << token
+        else
+          expected[1] << token
+        end
+      end
+      for token in receivedTokens
+        if token != "ParseError"
+          received[0] << token
+        else
+          received[1] << token
+        end
+      end
+      assert_equal expected, received, message
+    end
+  end
+  def type_of?(token_name, token)
+    token != 'ParseError' and token_name == token.first
+  end
+  def convert_attribute_arrays_to_hashes(tokens)
+    tokens.inject([]) do |tokens, token|
+      token[2] = Hash[*token[2].reverse.flatten] if type_of?('StartTag', token)
+      tokens << token
+    end
+  end
+  def concatenate_consecutive_characters(tokens)
+    tokens.inject([]) do |tokens, token|
+      if type_of?('Character', token) and tokens.any? and type_of?('Character', tokens.last)
+        tokens.last[1] = tokens.last[1] + token[1]
+        next tokens
+      end
+      tokens << token
+    end
+  end
+  def tokenizer_test(data)
+    (data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
+      message = [
+        '', 'Description:', data['description'],
+        '', 'Input:', data['input'],
+        '', 'Content Model Flag:', content_model_flag,
+        '' ] * "\n"
+      assert_nothing_raised message do
+        tokenizer = HTML5::HTMLTokenizer.new(data['input'])
+        tokenizer.content_model_flag = content_model_flag.to_sym
+        tokenizer.current_token = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
+        tokens = TokenizerTestParser.new(tokenizer).parse
+        actual = concatenate_consecutive_characters(convert_attribute_arrays_to_hashes(tokens))
+        expected = concatenate_consecutive_characters(data['output'])
+        assert_tokens_match expected, actual, data["ignoreErrorOrder"], message
+      end
+    end
+  end
+  html5_test_files('tokenizer').each do |test_file|
+    test_name = File.basename(test_file).sub('.test', '')
+    tests = JSON.parse(File.read(test_file))['tests']
+    tests.each_with_index do |data, index|
+      define_method('test_%s_%d' % [test_name, index + 1]) { tokenizer_test data }
+    end
+  end
+end

data/tests/test_treewalkers.rb ADDED Viewed

@@ -0,0 +1,116 @@
+require File.join(File.dirname(__FILE__), 'preamble')
+require 'html5/html5parser'
+require 'html5/treewalkers'
+require 'html5/treebuilders'
+$tree_types_to_test = {
+  'simpletree' =>
+    {:builder => HTML5::TreeBuilders['simpletree'],
+     :walker  => HTML5::TreeWalkers['simpletree']},
+  'rexml' =>
+    {:builder => HTML5::TreeBuilders['rexml'],
+     :walker  => HTML5::TreeWalkers['rexml']},
+  'hpricot' =>
+    {:builder => HTML5::TreeBuilders['hpricot'],
+     :walker  => HTML5::TreeWalkers['hpricot']},
+}
+puts 'Testing tree walkers: ' + $tree_types_to_test.keys * ', '
+class TestTreeWalkers < Test::Unit::TestCase
+  include HTML5::TestSupport
+  def concatenateCharacterTokens(tokens)
+    charactersToken = nil
+    for token in tokens
+        type = token[:type]
+        if [:Characters, :SpaceCharacters].include?(type)
+            if charactersToken == nil
+                charactersToken = {:type => :Characters, :data => token[:data]}
+            else
+                charactersToken[:data] += token[:data]
+            end
+        else
+            if charactersToken != nil
+                yield charactersToken
+                charactersToken = nil
+            end
+            yield token
+        end
+    end
+    yield charactersToken if charactersToken != nil
+  end
+  def convertTokens(tokens)
+    output = []
+    indent = 0
+    concatenateCharacterTokens(tokens) do |token|
+      case token[:type]
+        when :StartTag, :EmptyTag
+            output << "#{' '*indent}<#{token[:name]}>"
+            indent += 2
+            for name, value in token[:data].to_a.sort
+                next if name=='xmlns'
+                output << "#{' '*indent}#{name}=\"#{value}\""
+            end
+            indent -= 2 if token[:type] == :EmptyTag
+        when :EndTag
+            indent -= 2
+        when :Comment
+            output << "#{' '*indent}<!-- #{token[:data]} -->"
+        when :Doctype
+            if token[:name] and token[:name].any?
+              output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
+            else
+              output << "#{' '*indent}<!DOCTYPE >"
+            end
+        when :Characters, :SpaceCharacters
+            output << "#{' '*indent}\"#{token[:data]}\""
+        else
+            # TODO: what to do with errors?
+      end
+    end
+    return output.join("\n")
+  end
+  html5_test_files('tree-construction').each do |test_file|
+    test_name = File.basename(test_file).sub('.dat', '')
+    next if test_name == 'tests5' # TODO
+    TestData.new(test_file, %w(data errors document-fragment document)).
+      each_with_index do |(input, errors, inner_html, expected), index|
+      expected = expected.gsub("\n| ","\n")[2..-1]
+      $tree_types_to_test.each do |tree_name, tree_class|
+        define_method "test_#{test_name}_#{index}_#{tree_name}" do
+          parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])
+          if inner_html
+            parser.parse_fragment(input, inner_html)
+          else
+            parser.parse(input)
+          end
+          document = parser.tree.get_document
+          begin
+            output = sortattrs(convertTokens(tree_class[:walker].new(document)))
+            expected = sortattrs(expected)
+            assert_equal expected, output, [
+              '', 'Input:', input,
+              '', 'Expected:', expected,
+              '', 'Recieved:', output
+            ].join("\n")
+          rescue NotImplementedError
+            # Amnesty for those that confess...
+          end
+        end
+      end
+   end
+  end
+end

data/tests/tokenizer_test_parser.rb ADDED Viewed

@@ -0,0 +1,63 @@
+require 'html5/constants'
+class TokenizerTestParser
+  def initialize(tokenizer)
+    @tokenizer = tokenizer
+  end
+  def parse
+    @outputTokens = []
+    debug = nil
+    for token in @tokenizer
+      debug = token.inspect if token[:type] == :ParseError
+      send(('process' + token[:type].to_s), token)
+    end
+    return @outputTokens
+  end
+  def processDoctype(token)
+    @outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
+      token[:systemId], token[:correct]])
+  end
+  def processStartTag(token)
+    @outputTokens.push(["StartTag", token[:name], token[:data]])
+  end
+  def processEmptyTag(token)
+    if not HTML5::VOID_ELEMENTS.include? token[:name]
+      @outputTokens.push("ParseError")
+    end
+    @outputTokens.push(["StartTag", token[:name], token[:data]])
+  end
+  def processEndTag(token)
+    if token[:data].length > 0
+      self.processParseError(token)
+    end
+    @outputTokens.push(["EndTag", token[:name]])
+  end
+  def processComment(token)
+    @outputTokens.push(["Comment", token[:data]])
+  end
+  def processCharacters(token)
+    @outputTokens.push(["Character", token[:data]])
+  end
+  alias processSpaceCharacters processCharacters
+  def processCharacters(token)
+    @outputTokens.push(["Character", token[:data]])
+  end
+  def process_eof(token)
+  end
+  def processParseError(token)
+    @outputTokens.push("ParseError")
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,120 @@
+--- !ruby/object:Gem::Specification
+rubygems_version: 0.9.2
+specification_version: 1
+name: html5
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+date: 2007-08-07 00:00:00 -07:00
+summary: HTML5 parser/tokenizer.
+require_paths:
+- lib
+email: ryan@theryanking.com
+homepage: http://code.google.com/p/html5lib
+rubyforge_project: html5
+description: A ruby based HTML parser/tokenizer based on the WHATWG HTML5 specification for maximum compatibility with major desktop web browsers.
+autorequire:
+default_executable:
+bindir: bin
+has_rdoc: true
+required_ruby_version: !ruby/object:Gem::Version::Requirement
+  requirements:
+  - - ">"
+    - !ruby/object:Gem::Version
+      version: 0.0.0
+  version:
+platform: ruby
+signing_key:
+cert_chain:
+post_install_message:
+authors:
+- Ryan King
+files:
+- History.txt
+- Manifest.txt
+- README
+- Rakefile.rb
+- lib/html5.rb
+- lib/html5/constants.rb
+- lib/html5/filters/base.rb
+- lib/html5/filters/inject_meta_charset.rb
+- lib/html5/filters/optionaltags.rb
+- lib/html5/filters/sanitizer.rb
+- lib/html5/filters/whitespace.rb
+- lib/html5/html5parser.rb
+- lib/html5/html5parser/after_body_phase.rb
+- lib/html5/html5parser/after_frameset_phase.rb
+- lib/html5/html5parser/after_head_phase.rb
+- lib/html5/html5parser/before_head_phase.rb
+- lib/html5/html5parser/in_body_phase.rb
+- lib/html5/html5parser/in_caption_phase.rb
+- lib/html5/html5parser/in_cell_phase.rb
+- lib/html5/html5parser/in_column_group_phase.rb
+- lib/html5/html5parser/in_frameset_phase.rb
+- lib/html5/html5parser/in_head_phase.rb
+- lib/html5/html5parser/in_row_phase.rb
+- lib/html5/html5parser/in_select_phase.rb
+- lib/html5/html5parser/in_table_body_phase.rb
+- lib/html5/html5parser/in_table_phase.rb
+- lib/html5/html5parser/initial_phase.rb
+- lib/html5/html5parser/phase.rb
+- lib/html5/html5parser/root_element_phase.rb
+- lib/html5/html5parser/trailing_end_phase.rb
+- lib/html5/inputstream.rb
+- lib/html5/liberalxmlparser.rb
+- lib/html5/sanitizer.rb
+- lib/html5/serializer.rb
+- lib/html5/serializer/htmlserializer.rb
+- lib/html5/serializer/xhtmlserializer.rb
+- lib/html5/tokenizer.rb
+- lib/html5/treebuilders.rb
+- lib/html5/treebuilders/base.rb
+- lib/html5/treebuilders/hpricot.rb
+- lib/html5/treebuilders/rexml.rb
+- lib/html5/treebuilders/simpletree.rb
+- lib/html5/treewalkers.rb
+- lib/html5/treewalkers/base.rb
+- lib/html5/treewalkers/hpricot.rb
+- lib/html5/treewalkers/rexml.rb
+- lib/html5/treewalkers/simpletree.rb
+- parse.rb
+- tests/preamble.rb
+- tests/test_encoding.rb
+- tests/test_lxp.rb
+- tests/test_parser.rb
+- tests/test_sanitizer.rb
+- tests/test_serializer.rb
+- tests/test_stream.rb
+- tests/test_tokenizer.rb
+- tests/test_treewalkers.rb
+- tests/tokenizer_test_parser.rb
+test_files: []
+rdoc_options: []
+extra_rdoc_files: []
+executables: []
+extensions: []
+requirements: []
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: chardet
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Version::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.9.0
+    version:
+- !ruby/object:Gem::Dependency
+  name: hoe
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Version::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.2.0
+    version: