feedtools 0.2.26 → 0.2.27
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -1,4 +1,29 @@
|
|
1
1
|
require 'rexml/document'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
module YAML
|
5
|
+
def YAML.dump( obj, io = nil )
|
6
|
+
if obj.kind_of?(FeedTools::Feed) || obj.kind_of?(FeedTools::FeedItem)
|
7
|
+
# Dangit, you WILL NOT serialize these things.
|
8
|
+
obj.instance_variable_set("@xml_document", nil)
|
9
|
+
obj.instance_variable_set("@root_node", nil)
|
10
|
+
obj.instance_variable_set("@channel_node", nil)
|
11
|
+
end
|
12
|
+
obj.to_yaml( io || io2 = StringIO.new )
|
13
|
+
io || ( io2.rewind; io2.read )
|
14
|
+
end
|
15
|
+
|
16
|
+
def YAML.load( io )
|
17
|
+
yp = parser.load( io )
|
18
|
+
if yp.kind_of?(FeedTools::Feed) || yp.kind_of?(FeedTools::FeedItem)
|
19
|
+
# No really, I'm serious, you WILL NOT deserialize these things.
|
20
|
+
yp.instance_variable_set("@xml_document", nil)
|
21
|
+
yp.instance_variable_set("@root_node", nil)
|
22
|
+
yp.instance_variable_set("@channel_node", nil)
|
23
|
+
end
|
24
|
+
yp
|
25
|
+
end
|
26
|
+
end
|
2
27
|
|
3
28
|
module REXML # :nodoc:
|
4
29
|
class LiberalXPathParser < XPathParser # :nodoc:
|
@@ -640,4 +665,5 @@ module REXML # :nodoc:
|
|
640
665
|
end
|
641
666
|
end
|
642
667
|
end
|
643
|
-
end
|
668
|
+
end
|
669
|
+
|
@@ -0,0 +1,117 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README
|
4
|
+
Rakefile.rb
|
5
|
+
bin/html5
|
6
|
+
lib/core_ext/string.rb
|
7
|
+
lib/html5.rb
|
8
|
+
lib/html5/constants.rb
|
9
|
+
lib/html5/filters/base.rb
|
10
|
+
lib/html5/filters/inject_meta_charset.rb
|
11
|
+
lib/html5/filters/iso639codes.rb
|
12
|
+
lib/html5/filters/optionaltags.rb
|
13
|
+
lib/html5/filters/rfc2046.rb
|
14
|
+
lib/html5/filters/rfc3987.rb
|
15
|
+
lib/html5/filters/sanitizer.rb
|
16
|
+
lib/html5/filters/validator.rb
|
17
|
+
lib/html5/filters/whitespace.rb
|
18
|
+
lib/html5/html5parser.rb
|
19
|
+
lib/html5/html5parser/after_body_phase.rb
|
20
|
+
lib/html5/html5parser/after_frameset_phase.rb
|
21
|
+
lib/html5/html5parser/after_head_phase.rb
|
22
|
+
lib/html5/html5parser/before_head_phase.rb
|
23
|
+
lib/html5/html5parser/in_body_phase.rb
|
24
|
+
lib/html5/html5parser/in_caption_phase.rb
|
25
|
+
lib/html5/html5parser/in_cell_phase.rb
|
26
|
+
lib/html5/html5parser/in_column_group_phase.rb
|
27
|
+
lib/html5/html5parser/in_frameset_phase.rb
|
28
|
+
lib/html5/html5parser/in_head_phase.rb
|
29
|
+
lib/html5/html5parser/in_row_phase.rb
|
30
|
+
lib/html5/html5parser/in_select_phase.rb
|
31
|
+
lib/html5/html5parser/in_table_body_phase.rb
|
32
|
+
lib/html5/html5parser/in_table_phase.rb
|
33
|
+
lib/html5/html5parser/initial_phase.rb
|
34
|
+
lib/html5/html5parser/phase.rb
|
35
|
+
lib/html5/html5parser/root_element_phase.rb
|
36
|
+
lib/html5/html5parser/trailing_end_phase.rb
|
37
|
+
lib/html5/inputstream.rb
|
38
|
+
lib/html5/liberalxmlparser.rb
|
39
|
+
lib/html5/sanitizer.rb
|
40
|
+
lib/html5/serializer.rb
|
41
|
+
lib/html5/serializer/htmlserializer.rb
|
42
|
+
lib/html5/serializer/xhtmlserializer.rb
|
43
|
+
lib/html5/sniffer.rb
|
44
|
+
lib/html5/tokenizer.rb
|
45
|
+
lib/html5/treebuilders.rb
|
46
|
+
lib/html5/treebuilders/base.rb
|
47
|
+
lib/html5/treebuilders/hpricot.rb
|
48
|
+
lib/html5/treebuilders/rexml.rb
|
49
|
+
lib/html5/treebuilders/simpletree.rb
|
50
|
+
lib/html5/treewalkers.rb
|
51
|
+
lib/html5/treewalkers/base.rb
|
52
|
+
lib/html5/treewalkers/hpricot.rb
|
53
|
+
lib/html5/treewalkers/rexml.rb
|
54
|
+
lib/html5/treewalkers/simpletree.rb
|
55
|
+
lib/html5/version.rb
|
56
|
+
testdata/encoding/chardet/test_big5.txt
|
57
|
+
testdata/encoding/test-yahoo-jp.dat
|
58
|
+
testdata/encoding/tests1.dat
|
59
|
+
testdata/encoding/tests2.dat
|
60
|
+
testdata/sanitizer/tests1.dat
|
61
|
+
testdata/serializer/core.test
|
62
|
+
testdata/serializer/injectmeta.test
|
63
|
+
testdata/serializer/optionaltags.test
|
64
|
+
testdata/serializer/options.test
|
65
|
+
testdata/serializer/whitespace.test
|
66
|
+
testdata/sites/google-results.htm
|
67
|
+
testdata/sites/python-ref-import.htm
|
68
|
+
testdata/sites/web-apps-old.htm
|
69
|
+
testdata/sites/web-apps.htm
|
70
|
+
testdata/sniffer/htmlOrFeed.json
|
71
|
+
testdata/tokenizer/contentModelFlags.test
|
72
|
+
testdata/tokenizer/entities.test
|
73
|
+
testdata/tokenizer/escapeFlag.test
|
74
|
+
testdata/tokenizer/test1.test
|
75
|
+
testdata/tokenizer/test2.test
|
76
|
+
testdata/tokenizer/test3.test
|
77
|
+
testdata/tokenizer/test4.test
|
78
|
+
testdata/tree-construction/tests1.dat
|
79
|
+
testdata/tree-construction/tests2.dat
|
80
|
+
testdata/tree-construction/tests3.dat
|
81
|
+
testdata/tree-construction/tests4.dat
|
82
|
+
testdata/tree-construction/tests5.dat
|
83
|
+
testdata/tree-construction/tests6.dat
|
84
|
+
testdata/validator/attributes.test
|
85
|
+
testdata/validator/base-href-attribute.test
|
86
|
+
testdata/validator/base-target-attribute.test
|
87
|
+
testdata/validator/blockquote-cite-attribute.test
|
88
|
+
testdata/validator/classattribute.test
|
89
|
+
testdata/validator/contenteditableattribute.test
|
90
|
+
testdata/validator/contextmenuattribute.test
|
91
|
+
testdata/validator/dirattribute.test
|
92
|
+
testdata/validator/draggableattribute.test
|
93
|
+
testdata/validator/html-xmlns-attribute.test
|
94
|
+
testdata/validator/idattribute.test
|
95
|
+
testdata/validator/inputattributes.test
|
96
|
+
testdata/validator/irrelevantattribute.test
|
97
|
+
testdata/validator/langattribute.test
|
98
|
+
testdata/validator/li-value-attribute.test
|
99
|
+
testdata/validator/link-href-attribute.test
|
100
|
+
testdata/validator/link-hreflang-attribute.test
|
101
|
+
testdata/validator/link-rel-attribute.test
|
102
|
+
testdata/validator/ol-start-attribute.test
|
103
|
+
testdata/validator/starttags.test
|
104
|
+
testdata/validator/style-scoped-attribute.test
|
105
|
+
testdata/validator/tabindexattribute.test
|
106
|
+
tests/preamble.rb
|
107
|
+
tests/test_encoding.rb
|
108
|
+
tests/test_lxp.rb
|
109
|
+
tests/test_parser.rb
|
110
|
+
tests/test_sanitizer.rb
|
111
|
+
tests/test_serializer.rb
|
112
|
+
tests/test_sniffer.rb
|
113
|
+
tests/test_stream.rb
|
114
|
+
tests/test_tokenizer.rb
|
115
|
+
tests/test_treewalkers.rb
|
116
|
+
tests/test_validator.rb
|
117
|
+
tests/tokenizer_test_parser.rb
|
@@ -0,0 +1,45 @@
|
|
1
|
+
html5
|
2
|
+
by Ryan King, et al
|
3
|
+
http://code.google.com/p/html5lib
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
A ruby implementation of the parsing algorithm in HTML5.
|
8
|
+
|
9
|
+
|
10
|
+
== FEATURES/PROBLEMS:
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
== SYNOPSIS:
|
15
|
+
|
16
|
+
TODO
|
17
|
+
|
18
|
+
== REQUIREMENTS:
|
19
|
+
|
20
|
+
* chardet, only tested with 0.9.0
|
21
|
+
|
22
|
+
== INSTALL:
|
23
|
+
|
24
|
+
* sudo gem install html5
|
25
|
+
|
26
|
+
== LICENSE:
|
27
|
+
|
28
|
+
Copyright (c) 2006-2007 The Authors
|
29
|
+
|
30
|
+
Contributers:
|
31
|
+
James Graham - jg307@cam.ac.uk
|
32
|
+
Anne van Kesteren - annevankesteren@gmail.com
|
33
|
+
Lachlan Hunt - lachlan.hunt@lachy.id.au
|
34
|
+
Matt McDonald - kanashii@kanashii.ca
|
35
|
+
Sam Ruby - rubys@intertwingly.net
|
36
|
+
Ian Hickson (Google) - ian@hixie.ch
|
37
|
+
Thomas Broyer - t.broyer@ltgt.net
|
38
|
+
Jacques Distler - distler@golem.ph.utexas.edu
|
39
|
+
Ryan King - ryan@theryanking.com
|
40
|
+
|
41
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
42
|
+
|
43
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
44
|
+
|
45
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'hoe'
|
3
|
+
require 'lib/html5/version'
|
4
|
+
|
5
|
+
Hoe.new("html5", HTML5::VERSION) do |p|
|
6
|
+
p.name = "html5"
|
7
|
+
p.description = p.paragraphs_of('README', 2..5).join("\n\n")
|
8
|
+
p.summary = "HTML5 parser/tokenizer."
|
9
|
+
|
10
|
+
p.author = ['Ryan King'] # TODO: add more names
|
11
|
+
p.email = 'ryan@theryanking.com'
|
12
|
+
p.url = 'http://code.google.com/p/html5lib'
|
13
|
+
p.need_zip = true
|
14
|
+
|
15
|
+
p.extra_deps << ['chardet', '>= 0.9.0']
|
16
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'rcov/rcovtask'
|
20
|
+
|
21
|
+
namespace :test do
|
22
|
+
namespace :coverage do
|
23
|
+
desc "Delete aggregate coverage data."
|
24
|
+
task(:clean) { rm_f "coverage.data" }
|
25
|
+
end
|
26
|
+
desc 'Aggregate code coverage for unit, functional and integration tests'
|
27
|
+
Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t|
|
28
|
+
t.libs << "tests"
|
29
|
+
t.test_files = FileList["tests/test_*.rb"]
|
30
|
+
t.output_dir = "tests/coverage/"
|
31
|
+
t.verbose = true
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,217 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'core_ext/string'
|
4
|
+
$:.unshift File.dirname(__FILE__), 'lib'
|
5
|
+
|
6
|
+
def parse(opts, args)
|
7
|
+
encoding = nil
|
8
|
+
|
9
|
+
f = args[-1]
|
10
|
+
if f
|
11
|
+
begin
|
12
|
+
if f[0..6] == 'http://'
|
13
|
+
require 'open-uri'
|
14
|
+
f = URI.parse(f).open
|
15
|
+
encoding = f.charset
|
16
|
+
elsif f == '-'
|
17
|
+
f = $stdin
|
18
|
+
else
|
19
|
+
f = open(f)
|
20
|
+
end
|
21
|
+
rescue
|
22
|
+
end
|
23
|
+
else
|
24
|
+
$stderr.write("No filename provided. Use -h for help\n")
|
25
|
+
exit(1)
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'html5/treebuilders'
|
29
|
+
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
|
30
|
+
|
31
|
+
if opts.output == :xml
|
32
|
+
require 'html5/liberalxmlparser'
|
33
|
+
p = HTML5::XMLParser.new(:tree=>treebuilder)
|
34
|
+
else
|
35
|
+
require 'html5/html5parser'
|
36
|
+
p = HTML5::HTMLParser.new(:tree=>treebuilder)
|
37
|
+
end
|
38
|
+
|
39
|
+
if opts.parsemethod == :parse
|
40
|
+
args = [f, encoding]
|
41
|
+
else
|
42
|
+
args = [f, (opts.container || 'div'), encoding]
|
43
|
+
end
|
44
|
+
|
45
|
+
if opts.profile
|
46
|
+
require 'profiler'
|
47
|
+
Profiler__::start_profile
|
48
|
+
p.send(opts.parsemethod, *args)
|
49
|
+
Profiler__::stop_profile
|
50
|
+
Profiler__::print_profile($stderr)
|
51
|
+
elsif opts.time
|
52
|
+
require 'time' # TODO: switch to benchmark
|
53
|
+
t0 = Time.new
|
54
|
+
document = p.send(opts.parsemethod, *args)
|
55
|
+
t1 = Time.new
|
56
|
+
print_output(p, document, opts)
|
57
|
+
t2 = Time.new
|
58
|
+
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
59
|
+
else
|
60
|
+
document = p.send(opts.parsemethod, *args)
|
61
|
+
print_output(p, document, opts)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def print_output(parser, document, opts)
|
66
|
+
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
|
67
|
+
|
68
|
+
case opts.output
|
69
|
+
when :xml
|
70
|
+
print document
|
71
|
+
when :html
|
72
|
+
require 'html5/treewalkers'
|
73
|
+
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
|
74
|
+
require 'html5/serializer'
|
75
|
+
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
|
76
|
+
when :hilite
|
77
|
+
print document.hilite
|
78
|
+
when :tree
|
79
|
+
document = [document] unless document.respond_to?(:each)
|
80
|
+
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
|
81
|
+
end
|
82
|
+
|
83
|
+
if opts.error
|
84
|
+
errList=[]
|
85
|
+
for pos, errorcode, datavars in parser.errors
|
86
|
+
errList << "Line #{pos[0]} Col #{pos[1]} " + (HTML5::E[errorcode] || "Unknown error \"#{errorcode}\"") % datavars
|
87
|
+
end
|
88
|
+
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
require 'ostruct'
|
93
|
+
options = OpenStruct.new
|
94
|
+
options.profile = false
|
95
|
+
options.time = false
|
96
|
+
options.output = :html
|
97
|
+
options.treebuilder = 'simpletree'
|
98
|
+
options.error = false
|
99
|
+
options.encoding = false
|
100
|
+
options.parsemethod = :parse
|
101
|
+
options.serializer = {
|
102
|
+
:encoding => 'utf-8',
|
103
|
+
:omit_optional_tags => false,
|
104
|
+
:inject_meta_charset => false
|
105
|
+
}
|
106
|
+
|
107
|
+
require 'optparse'
|
108
|
+
opts = OptionParser.new do |opts|
|
109
|
+
opts.separator ""
|
110
|
+
opts.separator "Parse Options:"
|
111
|
+
|
112
|
+
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
113
|
+
options.treebuilder = treebuilder
|
114
|
+
end
|
115
|
+
|
116
|
+
opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container|
|
117
|
+
options.parsemethod = :parse_fragment
|
118
|
+
options.container = container if container
|
119
|
+
end
|
120
|
+
|
121
|
+
opts.separator ""
|
122
|
+
opts.separator "Filter Options:"
|
123
|
+
|
124
|
+
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
125
|
+
options.serializer[:inject_meta_charset] = inject
|
126
|
+
end
|
127
|
+
|
128
|
+
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
129
|
+
options.serializer[:strip_whitespace] = strip
|
130
|
+
end
|
131
|
+
|
132
|
+
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
133
|
+
options.serializer[:sanitize] = sanitize
|
134
|
+
end
|
135
|
+
|
136
|
+
opts.separator ""
|
137
|
+
opts.separator "Output Options:"
|
138
|
+
|
139
|
+
opts.on("--tree", "output as debug tree") do |tree|
|
140
|
+
options.output = :tree
|
141
|
+
end
|
142
|
+
|
143
|
+
opts.on("-x", "--xml", "output as xml") do |xml|
|
144
|
+
options.output = :xml
|
145
|
+
options.treebuilder = "rexml"
|
146
|
+
end
|
147
|
+
|
148
|
+
opts.on("--[no-]html", "Output as html") do |html|
|
149
|
+
options.output = (html ? :html : nil)
|
150
|
+
end
|
151
|
+
|
152
|
+
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
153
|
+
options.output = :hilite
|
154
|
+
end
|
155
|
+
|
156
|
+
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
157
|
+
options.error = error
|
158
|
+
end
|
159
|
+
|
160
|
+
opts.separator ""
|
161
|
+
opts.separator "Serialization Options:"
|
162
|
+
|
163
|
+
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
164
|
+
options.serializer[:omit_optional_tags] = omit
|
165
|
+
end
|
166
|
+
|
167
|
+
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
168
|
+
options.serializer[:quote_attr_values] = quote
|
169
|
+
end
|
170
|
+
|
171
|
+
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
172
|
+
options.serializer[:use_best_quote_char] = best
|
173
|
+
end
|
174
|
+
|
175
|
+
opts.on("--quote-char C", "Use specified quote character") do |c|
|
176
|
+
options.serializer[:quote_char] = c
|
177
|
+
end
|
178
|
+
|
179
|
+
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
180
|
+
options.serializer[:minimize_boolean_attributes] = min
|
181
|
+
end
|
182
|
+
|
183
|
+
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
184
|
+
options.serializer[:use_trailing_solidus] = slash
|
185
|
+
end
|
186
|
+
|
187
|
+
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
188
|
+
options.serializer[:escape_lt_in_attrs] = lt
|
189
|
+
end
|
190
|
+
|
191
|
+
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
|
192
|
+
options.serializer[:escape_rcdata] = rcdata
|
193
|
+
end
|
194
|
+
|
195
|
+
opts.separator ""
|
196
|
+
opts.separator "Other Options:"
|
197
|
+
|
198
|
+
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
199
|
+
options.profile = profile
|
200
|
+
end
|
201
|
+
|
202
|
+
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
203
|
+
options.time = time
|
204
|
+
end
|
205
|
+
|
206
|
+
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
207
|
+
options.encoding = encoding
|
208
|
+
end
|
209
|
+
|
210
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
211
|
+
puts opts
|
212
|
+
exit
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
opts.parse!(ARGV)
|
217
|
+
parse options, ARGV
|