html5 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +58 -0
  3. data/README +9 -0
  4. data/Rakefile.rb +17 -0
  5. data/lib/html5/constants.rb +818 -0
  6. data/lib/html5/filters/base.rb +10 -0
  7. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  8. data/lib/html5/filters/optionaltags.rb +198 -0
  9. data/lib/html5/filters/sanitizer.rb +15 -0
  10. data/lib/html5/filters/whitespace.rb +36 -0
  11. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  12. data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
  13. data/lib/html5/html5parser/after_head_phase.rb +50 -0
  14. data/lib/html5/html5parser/before_head_phase.rb +41 -0
  15. data/lib/html5/html5parser/in_body_phase.rb +607 -0
  16. data/lib/html5/html5parser/in_caption_phase.rb +68 -0
  17. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  18. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  19. data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  20. data/lib/html5/html5parser/in_head_phase.rb +138 -0
  21. data/lib/html5/html5parser/in_row_phase.rb +87 -0
  22. data/lib/html5/html5parser/in_select_phase.rb +84 -0
  23. data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
  24. data/lib/html5/html5parser/in_table_phase.rb +110 -0
  25. data/lib/html5/html5parser/initial_phase.rb +134 -0
  26. data/lib/html5/html5parser/phase.rb +158 -0
  27. data/lib/html5/html5parser/root_element_phase.rb +42 -0
  28. data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  29. data/lib/html5/html5parser.rb +248 -0
  30. data/lib/html5/inputstream.rb +654 -0
  31. data/lib/html5/liberalxmlparser.rb +158 -0
  32. data/lib/html5/sanitizer.rb +188 -0
  33. data/lib/html5/serializer/htmlserializer.rb +180 -0
  34. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  35. data/lib/html5/serializer.rb +2 -0
  36. data/lib/html5/tokenizer.rb +968 -0
  37. data/lib/html5/treebuilders/base.rb +334 -0
  38. data/lib/html5/treebuilders/hpricot.rb +231 -0
  39. data/lib/html5/treebuilders/rexml.rb +208 -0
  40. data/lib/html5/treebuilders/simpletree.rb +185 -0
  41. data/lib/html5/treebuilders.rb +24 -0
  42. data/lib/html5/treewalkers/base.rb +154 -0
  43. data/lib/html5/treewalkers/hpricot.rb +48 -0
  44. data/lib/html5/treewalkers/rexml.rb +48 -0
  45. data/lib/html5/treewalkers/simpletree.rb +48 -0
  46. data/lib/html5/treewalkers.rb +26 -0
  47. data/lib/html5.rb +13 -0
  48. data/parse.rb +217 -0
  49. data/tests/preamble.rb +82 -0
  50. data/tests/test_encoding.rb +35 -0
  51. data/tests/test_lxp.rb +263 -0
  52. data/tests/test_parser.rb +68 -0
  53. data/tests/test_sanitizer.rb +142 -0
  54. data/tests/test_serializer.rb +68 -0
  55. data/tests/test_stream.rb +62 -0
  56. data/tests/test_tokenizer.rb +94 -0
  57. data/tests/test_treewalkers.rb +116 -0
  58. data/tests/tokenizer_test_parser.rb +63 -0
  59. metadata +120 -0
data/History.txt ADDED
@@ -0,0 +1,3 @@
1
+ == 0.1.0
2
+
3
+ * first gem release
data/Manifest.txt ADDED
@@ -0,0 +1,58 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README
4
+ Rakefile.rb
5
+ lib/html5.rb
6
+ lib/html5/constants.rb
7
+ lib/html5/filters/base.rb
8
+ lib/html5/filters/inject_meta_charset.rb
9
+ lib/html5/filters/optionaltags.rb
10
+ lib/html5/filters/sanitizer.rb
11
+ lib/html5/filters/whitespace.rb
12
+ lib/html5/html5parser.rb
13
+ lib/html5/html5parser/after_body_phase.rb
14
+ lib/html5/html5parser/after_frameset_phase.rb
15
+ lib/html5/html5parser/after_head_phase.rb
16
+ lib/html5/html5parser/before_head_phase.rb
17
+ lib/html5/html5parser/in_body_phase.rb
18
+ lib/html5/html5parser/in_caption_phase.rb
19
+ lib/html5/html5parser/in_cell_phase.rb
20
+ lib/html5/html5parser/in_column_group_phase.rb
21
+ lib/html5/html5parser/in_frameset_phase.rb
22
+ lib/html5/html5parser/in_head_phase.rb
23
+ lib/html5/html5parser/in_row_phase.rb
24
+ lib/html5/html5parser/in_select_phase.rb
25
+ lib/html5/html5parser/in_table_body_phase.rb
26
+ lib/html5/html5parser/in_table_phase.rb
27
+ lib/html5/html5parser/initial_phase.rb
28
+ lib/html5/html5parser/phase.rb
29
+ lib/html5/html5parser/root_element_phase.rb
30
+ lib/html5/html5parser/trailing_end_phase.rb
31
+ lib/html5/inputstream.rb
32
+ lib/html5/liberalxmlparser.rb
33
+ lib/html5/sanitizer.rb
34
+ lib/html5/serializer.rb
35
+ lib/html5/serializer/htmlserializer.rb
36
+ lib/html5/serializer/xhtmlserializer.rb
37
+ lib/html5/tokenizer.rb
38
+ lib/html5/treebuilders.rb
39
+ lib/html5/treebuilders/base.rb
40
+ lib/html5/treebuilders/hpricot.rb
41
+ lib/html5/treebuilders/rexml.rb
42
+ lib/html5/treebuilders/simpletree.rb
43
+ lib/html5/treewalkers.rb
44
+ lib/html5/treewalkers/base.rb
45
+ lib/html5/treewalkers/hpricot.rb
46
+ lib/html5/treewalkers/rexml.rb
47
+ lib/html5/treewalkers/simpletree.rb
48
+ parse.rb
49
+ tests/preamble.rb
50
+ tests/test_encoding.rb
51
+ tests/test_lxp.rb
52
+ tests/test_parser.rb
53
+ tests/test_sanitizer.rb
54
+ tests/test_serializer.rb
55
+ tests/test_stream.rb
56
+ tests/test_tokenizer.rb
57
+ tests/test_treewalkers.rb
58
+ tests/tokenizer_test_parser.rb
data/README ADDED
@@ -0,0 +1,9 @@
1
+ = HTML5lib
2
+
3
+ == Basic Usage
4
+
5
+ require 'html5lib'
6
+
7
+ doc = HTML5lib.parse('<html>...</html>')
8
+
9
+ doc.class # REXML::Document
data/Rakefile.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'rake'
2
+ require 'hoe'
3
+ require 'lib/html5/version'
4
+
5
+ Hoe.new("html5", HTML5::VERSION::STRING) do |p|
6
+ p.description = "A ruby based HTML parser/tokenizer based on the WHATWG HTML5 specification for maximum compatibility with major desktop web browsers."
7
+ p.summary = "HTML5 parser/tokenizer."
8
+
9
+ p.author = ['Ryan King'] # TODO: add more names
10
+ p.email = 'ryan@theryanking.com'
11
+ p.url = 'http://code.google.com/p/html5lib'
12
+ p.need_zip = true
13
+
14
+ p.extra_deps << ['chardet', '>= 0.9.0']
15
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
16
+ p p.paragraphs_of('History.txt', 0..1).join("\n\n")
17
+ end