feedtools 0.2.26 → 0.2.27
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -1,48 +0,0 @@
|
|
1
|
-
# :stopdoc:
|
2
|
-
module HTree
|
3
|
-
class Name; include HTree end
|
4
|
-
class Context; include HTree end
|
5
|
-
|
6
|
-
module Tag; include HTree end
|
7
|
-
class STag; include Tag end
|
8
|
-
class ETag; include Tag end
|
9
|
-
|
10
|
-
module Node; include HTree end
|
11
|
-
module Container; include Node end
|
12
|
-
class Doc; include Container end
|
13
|
-
class Elem; include Container end
|
14
|
-
module Leaf; include Node end
|
15
|
-
class Text; include Leaf end
|
16
|
-
class XMLDecl; include Leaf end
|
17
|
-
class DocType; include Leaf end
|
18
|
-
class ProcIns; include Leaf end
|
19
|
-
class Comment; include Leaf end
|
20
|
-
class BogusETag; include Leaf end
|
21
|
-
|
22
|
-
module Traverse end
|
23
|
-
module Container::Trav; include Traverse end
|
24
|
-
module Leaf::Trav; include Traverse end
|
25
|
-
class Doc; module Trav; include Container::Trav end; include Trav end
|
26
|
-
class Elem; module Trav; include Container::Trav end; include Trav end
|
27
|
-
class Text; module Trav; include Leaf::Trav end; include Trav end
|
28
|
-
class XMLDecl; module Trav; include Leaf::Trav end; include Trav end
|
29
|
-
class DocType; module Trav; include Leaf::Trav end; include Trav end
|
30
|
-
class ProcIns; module Trav; include Leaf::Trav end; include Trav end
|
31
|
-
class Comment; module Trav; include Leaf::Trav end; include Trav end
|
32
|
-
class BogusETag; module Trav; include Leaf::Trav end; include Trav end
|
33
|
-
|
34
|
-
class Location; include HTree end
|
35
|
-
module Container::Loc end
|
36
|
-
module Leaf::Loc end
|
37
|
-
class Doc; class Loc < Location; include Trav, Container::Loc end end
|
38
|
-
class Elem; class Loc < Location; include Trav, Container::Loc end end
|
39
|
-
class Text; class Loc < Location; include Trav, Leaf::Loc end end
|
40
|
-
class XMLDecl; class Loc < Location; include Trav, Leaf::Loc end end
|
41
|
-
class DocType; class Loc < Location; include Trav, Leaf::Loc end end
|
42
|
-
class ProcIns; class Loc < Location; include Trav, Leaf::Loc end end
|
43
|
-
class Comment; class Loc < Location; include Trav, Leaf::Loc end end
|
44
|
-
class BogusETag; class Loc < Location; include Trav, Leaf::Loc end end
|
45
|
-
|
46
|
-
class Error < StandardError; end
|
47
|
-
end
|
48
|
-
# :startdoc:
|
@@ -1,124 +0,0 @@
|
|
1
|
-
# :stopdoc:
|
2
|
-
require 'htree/scan' # for Pat::Nmtoken
|
3
|
-
require 'htree/context'
|
4
|
-
|
5
|
-
module HTree # :nodoc:
|
6
|
-
# Name represents a element name and attribute name.
|
7
|
-
# It consists of a namespace prefix, a namespace URI and a local name.
|
8
|
-
class Name # :nodoc:
|
9
|
-
=begin
|
10
|
-
element name prefix uri localname
|
11
|
-
{u}n, n with xmlns=u nil 'u' 'n'
|
12
|
-
p{u}n, p:n with xmlns:p=u 'p' 'u' 'n'
|
13
|
-
n with xmlns='' nil '' 'n'
|
14
|
-
|
15
|
-
attribute name
|
16
|
-
xmlns= 'xmlns' nil nil
|
17
|
-
xmlns:n= 'xmlns' nil 'n'
|
18
|
-
p{u}n=, p:n= with xmlns:p=u 'p' 'u' 'n'
|
19
|
-
n= nil '' 'n'
|
20
|
-
=end
|
21
|
-
def Name.parse_element_name(name, context)
|
22
|
-
if /\{(.*)\}/ =~ name
|
23
|
-
# "{u}n" means "use default namespace",
|
24
|
-
# "p{u}n" means "use the specified prefix p"
|
25
|
-
$` == '' ? Name.new(nil, $1, $') : Name.new($`, $1, $')
|
26
|
-
elsif /:/ =~ name && !context.namespace_uri($`).empty?
|
27
|
-
Name.new($`, context.namespace_uri($`), $')
|
28
|
-
elsif !context.namespace_uri(nil).empty?
|
29
|
-
Name.new(nil, context.namespace_uri(nil), name)
|
30
|
-
else
|
31
|
-
Name.new(nil, '', name)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def Name.parse_attribute_name(name, context)
|
36
|
-
if name == 'xmlns'
|
37
|
-
Name.new('xmlns', nil, nil)
|
38
|
-
elsif /\Axmlns:/ =~ name
|
39
|
-
Name.new('xmlns', nil, $')
|
40
|
-
elsif /\{(.*)\}/ =~ name
|
41
|
-
case $`
|
42
|
-
when ''; Name.new(nil, $1, $')
|
43
|
-
else Name.new($`, $1, $')
|
44
|
-
end
|
45
|
-
elsif /:/ =~ name && !context.namespace_uri($`).empty?
|
46
|
-
Name.new($`, context.namespace_uri($`), $')
|
47
|
-
else
|
48
|
-
Name.new(nil, '', name)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
NameCache = {}
|
53
|
-
def Name.new(namespace_prefix, namespace_uri, local_name)
|
54
|
-
key = [namespace_prefix, namespace_uri, local_name, self]
|
55
|
-
NameCache.fetch(key) {
|
56
|
-
0.upto(2) {|i| key[i] = key[i].dup.freeze if key[i] }
|
57
|
-
NameCache[key] = super(key[0], key[1], key[2])
|
58
|
-
}
|
59
|
-
end
|
60
|
-
|
61
|
-
def initialize(namespace_prefix, namespace_uri, local_name)
|
62
|
-
@namespace_prefix = namespace_prefix
|
63
|
-
@namespace_uri = namespace_uri
|
64
|
-
@local_name = local_name
|
65
|
-
if @namespace_prefix && /\A#{Pat::Nmtoken}\z/o !~ @namespace_prefix
|
66
|
-
raise HTree::Error, "invalid namespace prefix: #{@namespace_prefix.inspect}"
|
67
|
-
end
|
68
|
-
if @local_name && /\A#{Pat::Nmtoken}\z/o !~ @local_name
|
69
|
-
raise HTree::Error, "invalid local name: #{@local_name.inspect}"
|
70
|
-
end
|
71
|
-
if @namespace_prefix == 'xmlns'
|
72
|
-
unless @namespace_uri == nil
|
73
|
-
raise HTree::Error, "Name object for xmlns:* must not have namespace URI: #{@namespace_uri.inspect}"
|
74
|
-
end
|
75
|
-
else
|
76
|
-
unless String === @namespace_uri
|
77
|
-
raise HTree::Error, "invalid namespace URI: #{@namespace_uri.inspect}"
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
attr_reader :namespace_prefix, :namespace_uri, :local_name
|
82
|
-
|
83
|
-
def xmlns?
|
84
|
-
@namespace_prefix == 'xmlns' && @namespace_uri == nil
|
85
|
-
end
|
86
|
-
|
87
|
-
def universal_name
|
88
|
-
if @namespace_uri && !@namespace_uri.empty?
|
89
|
-
"{#{@namespace_uri}}#{@local_name}"
|
90
|
-
else
|
91
|
-
@local_name.dup
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
def qualified_name
|
96
|
-
if @namespace_uri && !@namespace_uri.empty?
|
97
|
-
if @namespace_prefix
|
98
|
-
"#{@namespace_prefix}:#{@local_name}"
|
99
|
-
else
|
100
|
-
@local_name.dup
|
101
|
-
end
|
102
|
-
elsif @local_name
|
103
|
-
@local_name.dup
|
104
|
-
else
|
105
|
-
"xmlns"
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
def to_s
|
110
|
-
if @namespace_uri && !@namespace_uri.empty?
|
111
|
-
if @namespace_prefix
|
112
|
-
"#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
|
113
|
-
else
|
114
|
-
"{#{@namespace_uri}}#{@local_name}"
|
115
|
-
end
|
116
|
-
elsif @local_name
|
117
|
-
@local_name.dup
|
118
|
-
else
|
119
|
-
"xmlns"
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|
124
|
-
# :startdoc:
|
@@ -1,207 +0,0 @@
|
|
1
|
-
# :stopdoc:
|
2
|
-
require 'htree/encoder'
|
3
|
-
require 'htree/doc'
|
4
|
-
require 'htree/elem'
|
5
|
-
require 'htree/leaf'
|
6
|
-
require 'htree/text'
|
7
|
-
|
8
|
-
module HTree # :nodoc:
|
9
|
-
|
10
|
-
class Text # :nodoc:
|
11
|
-
ChRef = {
|
12
|
-
'>' => '>',
|
13
|
-
'<' => '<',
|
14
|
-
'"' => '"',
|
15
|
-
}
|
16
|
-
|
17
|
-
def output(out, context)
|
18
|
-
out.output_text @rcdata.gsub(/[<>]/) {|s| ChRef[s] }
|
19
|
-
end
|
20
|
-
|
21
|
-
def to_attvalue_content
|
22
|
-
@rcdata.gsub(/[<>"]/) {|s| ChRef[s] }
|
23
|
-
end
|
24
|
-
|
25
|
-
def output_attvalue(out, context)
|
26
|
-
out.output_string '"'
|
27
|
-
out.output_text to_attvalue_content
|
28
|
-
out.output_string '"'
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
class Name # :nodoc:
|
33
|
-
def output(out, context)
|
34
|
-
# xxx: validate namespace prefix
|
35
|
-
if xmlns?
|
36
|
-
if @local_name
|
37
|
-
out.output_string "xmlns:#{@local_name}"
|
38
|
-
else
|
39
|
-
out.output_string "xmlns"
|
40
|
-
end
|
41
|
-
else
|
42
|
-
out.output_string qualified_name
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def output_attribute(text, out, context)
|
47
|
-
output(out, context)
|
48
|
-
out.output_string '='
|
49
|
-
text.output_attvalue(out, context)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
class Doc # :nodoc:
|
54
|
-
def output(out, context)
|
55
|
-
context = DefaultContext # discard outer context
|
56
|
-
xmldecl = false
|
57
|
-
doctypedecl = false
|
58
|
-
@children.each {|n|
|
59
|
-
if n.respond_to? :output_prolog_xmldecl
|
60
|
-
n.output_prolog_xmldecl(out, context) unless xmldecl # xxx: encoding?
|
61
|
-
xmldecl = true
|
62
|
-
elsif n.respond_to? :output_prolog_doctypedecl
|
63
|
-
n.output_prolog_doctypedecl(out, context) unless doctypedecl
|
64
|
-
doctypedecl = true
|
65
|
-
else
|
66
|
-
n.output(out, context)
|
67
|
-
end
|
68
|
-
}
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
class Elem # :nodoc:
|
73
|
-
def output(out, context)
|
74
|
-
if @empty
|
75
|
-
@stag.output_emptytag(out, context)
|
76
|
-
else
|
77
|
-
children_context = @stag.output_stag(out, context)
|
78
|
-
@children.each {|n| n.output(out, children_context) }
|
79
|
-
@stag.output_etag(out, context)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
class STag # :nodoc:
|
85
|
-
def output_attributes(out, context)
|
86
|
-
@attributes.each {|aname, text|
|
87
|
-
next if aname.xmlns?
|
88
|
-
out.output_string ' '
|
89
|
-
aname.output_attribute(text, out, context)
|
90
|
-
}
|
91
|
-
@context.output_namespaces(out, context)
|
92
|
-
end
|
93
|
-
|
94
|
-
def output_emptytag(out, context)
|
95
|
-
out.output_string '<'
|
96
|
-
@name.output(out, context)
|
97
|
-
children_context = output_attributes(out, context)
|
98
|
-
out.output_string "\n/>"
|
99
|
-
children_context
|
100
|
-
end
|
101
|
-
|
102
|
-
def output_stag(out, context)
|
103
|
-
out.output_string '<'
|
104
|
-
@name.output(out, context)
|
105
|
-
children_context = output_attributes(out, context)
|
106
|
-
out.output_string "\n>"
|
107
|
-
children_context
|
108
|
-
end
|
109
|
-
|
110
|
-
def output_etag(out, context)
|
111
|
-
out.output_string '</'
|
112
|
-
@name.output(out, context)
|
113
|
-
out.output_string "\n>"
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
class Context # :nodoc:
|
118
|
-
def output_namespaces(out, outer_context)
|
119
|
-
unknown_namespaces = {}
|
120
|
-
@namespaces.each {|prefix, uri|
|
121
|
-
outer_uri = outer_context.namespace_uri(prefix)
|
122
|
-
if outer_uri == nil
|
123
|
-
unknown_namespaces[prefix] = uri
|
124
|
-
elsif outer_uri != uri
|
125
|
-
if prefix
|
126
|
-
out.output_string " xmlns:#{prefix}="
|
127
|
-
else
|
128
|
-
out.output_string " xmlns="
|
129
|
-
end
|
130
|
-
Text.new(uri).output_attvalue(out, outer_context)
|
131
|
-
end
|
132
|
-
}
|
133
|
-
unless unknown_namespaces.empty?
|
134
|
-
out.output_xmlns(unknown_namespaces)
|
135
|
-
end
|
136
|
-
outer_context.subst_namespaces(@namespaces)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
class BogusETag # :nodoc:
|
141
|
-
# don't output anything.
|
142
|
-
def output(out, context)
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
class XMLDecl # :nodoc:
|
147
|
-
# don't output anything.
|
148
|
-
def output(out, context)
|
149
|
-
end
|
150
|
-
|
151
|
-
def output_prolog_xmldecl(out, context)
|
152
|
-
out.output_string "<?xml version=\"#{@version}\""
|
153
|
-
if @encoding
|
154
|
-
out.output_string " encoding=\"#{@encoding}\""
|
155
|
-
end
|
156
|
-
if @standalone != nil
|
157
|
-
out.output_string " standalone=\"#{@standalone ? 'yes' : 'no'}\""
|
158
|
-
end
|
159
|
-
out.output_string "?>"
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
class DocType # :nodoc:
|
164
|
-
# don't output anything.
|
165
|
-
def output(out, context)
|
166
|
-
end
|
167
|
-
|
168
|
-
def generate_content # :nodoc:
|
169
|
-
result = ''
|
170
|
-
if @public_identifier
|
171
|
-
result << "PUBLIC \"#{@public_identifier}\""
|
172
|
-
else
|
173
|
-
result << "SYSTEM"
|
174
|
-
end
|
175
|
-
# Although a system identifier is not omissible in XML,
|
176
|
-
# we cannot output it if it is not given.
|
177
|
-
if @system_identifier
|
178
|
-
if /"/ !~ @system_identifier
|
179
|
-
result << " \"#{@system_identifier}\""
|
180
|
-
else
|
181
|
-
result << " '#{@system_identifier}'"
|
182
|
-
end
|
183
|
-
end
|
184
|
-
result
|
185
|
-
end
|
186
|
-
|
187
|
-
def output_prolog_doctypedecl(out, context)
|
188
|
-
out.output_string "<!DOCTYPE #{@root_element_name} #{generate_content}>"
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
class ProcIns # :nodoc:
|
193
|
-
def output(out, context)
|
194
|
-
out.output_string "<?#{@target}"
|
195
|
-
out.output_string " #{@content}" if @content
|
196
|
-
out.output_string "?>"
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
class Comment # :nodoc:
|
201
|
-
def output(out, context)
|
202
|
-
out.output_string "<!--#{@content}-->"
|
203
|
-
end
|
204
|
-
end
|
205
|
-
|
206
|
-
end
|
207
|
-
# :startdoc:
|
@@ -1,409 +0,0 @@
|
|
1
|
-
# :stopdoc:
|
2
|
-
require 'htree/scan'
|
3
|
-
require 'htree/htmlinfo'
|
4
|
-
require 'htree/text'
|
5
|
-
require 'htree/tag'
|
6
|
-
require 'htree/leaf'
|
7
|
-
require 'htree/doc'
|
8
|
-
require 'htree/elem'
|
9
|
-
require 'htree/raw_string'
|
10
|
-
require 'htree/context'
|
11
|
-
require 'htree/encoder'
|
12
|
-
require 'htree/fstr'
|
13
|
-
|
14
|
-
module HTree # :nodoc:
|
15
|
-
# HTree.parse parses <i>input</i> and return a document tree.
|
16
|
-
# represented by HTree::Doc.
|
17
|
-
#
|
18
|
-
# <i>input</i> should be a String or
|
19
|
-
# an object which respond to read or open method.
|
20
|
-
# For example, IO, StringIO, Pathname, URI::HTTP and URI::FTP are acceptable.
|
21
|
-
# Note that the URIs need open-uri.
|
22
|
-
#
|
23
|
-
# HTree.parse guesses <i>input</i> is HTML or not and XML or not.
|
24
|
-
#
|
25
|
-
# If it is guessed as HTML, the default namespace in the result is set to http://www.w3.org/1999/xhtml
|
26
|
-
# regardless of <i>input</i> has XML namespace declaration or not nor even it is pre-XML HTML.
|
27
|
-
#
|
28
|
-
# If it is guessed as HTML and not XML, all element and attribute names are downcaseed.
|
29
|
-
#
|
30
|
-
# If opened file or read content has charset method,
|
31
|
-
# HTree.parse decode it according to $KCODE before parsing.
|
32
|
-
# Otherwise HTree.parse assumes the character encoding of the content is
|
33
|
-
# compatible to $KCODE.
|
34
|
-
# Note that the charset method is provided by URI::HTTP with open-uri.
|
35
|
-
def HTree.parse(input)
|
36
|
-
HTree.with_frozen_string_hash {
|
37
|
-
parse_as(input, false)
|
38
|
-
}
|
39
|
-
end
|
40
|
-
|
41
|
-
# HTree.parse_xml parses <i>input</i> as XML and
|
42
|
-
# return a document tree represented by HTree::Doc.
|
43
|
-
#
|
44
|
-
# It behaves almost same as HTree.parse but it assumes <i>input</> is XML
|
45
|
-
# even if no XML declaration.
|
46
|
-
# The assumption causes following differences.
|
47
|
-
# * doesn't downcase element name.
|
48
|
-
# * The content of <script> and <style> element is PCDATA, not CDATA.
|
49
|
-
def HTree.parse_xml(input)
|
50
|
-
HTree.with_frozen_string_hash {
|
51
|
-
parse_as(input, true)
|
52
|
-
}
|
53
|
-
end
|
54
|
-
|
55
|
-
def HTree.parse_as(input, is_xml)
|
56
|
-
input_charset = nil
|
57
|
-
if input.tainted? && 1 <= $SAFE
|
58
|
-
raise SecurityError, "input tainted"
|
59
|
-
end
|
60
|
-
if input.respond_to? :read # IO, StringIO
|
61
|
-
input = input.read.untaint
|
62
|
-
input_charset = input.charset if input.respond_to? :charset
|
63
|
-
elsif input.respond_to? :open # Pathname, URI with open-uri
|
64
|
-
input.open {|f|
|
65
|
-
input = f.read.untaint
|
66
|
-
input_charset = f.charset if f.respond_to? :charset
|
67
|
-
}
|
68
|
-
end
|
69
|
-
if input_charset && input_charset != Encoder.internal_charset
|
70
|
-
input = Iconv.conv(Encoder.internal_charset, input_charset, input)
|
71
|
-
end
|
72
|
-
|
73
|
-
tokens = []
|
74
|
-
is_xml, is_html = HTree.scan(input, is_xml) {|token|
|
75
|
-
tokens << token
|
76
|
-
}
|
77
|
-
context = is_html ? HTMLContext: DefaultContext
|
78
|
-
structure_list = parse_pairs(tokens, is_xml, is_html)
|
79
|
-
structure_list = fix_structure_list(structure_list, is_xml, is_html)
|
80
|
-
nodes = structure_list.map {|s| build_node(s, is_xml, is_html, context) }
|
81
|
-
Doc.new(nodes)
|
82
|
-
end
|
83
|
-
|
84
|
-
def HTree.parse_pairs(tokens, is_xml, is_html)
|
85
|
-
stack = [[nil, nil, []]]
|
86
|
-
tokens.each {|token|
|
87
|
-
case token[0]
|
88
|
-
when :stag
|
89
|
-
stag_raw_string = token[1]
|
90
|
-
stagname = stag_raw_string[Pat::Name]
|
91
|
-
stagname = stagname.downcase if !is_xml && is_html
|
92
|
-
stagname = HTree.frozen_string(stagname)
|
93
|
-
stack << [stagname, stag_raw_string, []]
|
94
|
-
when :etag
|
95
|
-
etag_raw_string = token[1]
|
96
|
-
etagname = etag_raw_string[Pat::Name]
|
97
|
-
etagname = etagname.downcase if !is_xml && is_html
|
98
|
-
etagname = HTree.frozen_string(etagname)
|
99
|
-
matched_elem = nil
|
100
|
-
stack.reverse_each {|elem|
|
101
|
-
stagname, _, _ = elem
|
102
|
-
if stagname == etagname
|
103
|
-
matched_elem = elem
|
104
|
-
break
|
105
|
-
end
|
106
|
-
}
|
107
|
-
if matched_elem
|
108
|
-
# This line breaks in Rails 1.1.
|
109
|
-
#until matched_elem.equal? stack.last
|
110
|
-
until matched_elem.object_id == stack.last.object_id
|
111
|
-
stagname, stag_raw_string, children = stack.pop
|
112
|
-
stack.last[2] << [:elem, stag_raw_string, children]
|
113
|
-
end
|
114
|
-
stagname, stag_raw_string, children = stack.pop
|
115
|
-
stack.last[2] << [:elem, stag_raw_string, children, etag_raw_string]
|
116
|
-
else
|
117
|
-
stack.last[2] << [:bogus_etag, etag_raw_string]
|
118
|
-
end
|
119
|
-
else
|
120
|
-
stack.last[2] << token
|
121
|
-
end
|
122
|
-
}
|
123
|
-
elem = nil
|
124
|
-
while 1 < stack.length
|
125
|
-
stagname, stag_raw_string, children = stack.pop
|
126
|
-
stack.last[2] << [:elem, stag_raw_string, children]
|
127
|
-
end
|
128
|
-
stack[0][2]
|
129
|
-
end
|
130
|
-
|
131
|
-
def HTree.fix_structure_list(structure_list, is_xml, is_html)
|
132
|
-
result = []
|
133
|
-
rest = structure_list.dup
|
134
|
-
until rest.empty?
|
135
|
-
structure = rest.shift
|
136
|
-
if structure[0] == :elem
|
137
|
-
elem, rest2 = fix_element(structure, [], [], is_xml, is_html)
|
138
|
-
result << elem
|
139
|
-
rest = rest2 + rest
|
140
|
-
else
|
141
|
-
result << structure
|
142
|
-
end
|
143
|
-
end
|
144
|
-
result
|
145
|
-
end
|
146
|
-
|
147
|
-
def HTree.fix_element(elem, excluded_tags, included_tags, is_xml, is_html)
|
148
|
-
stag_raw_string = elem[1]
|
149
|
-
children = elem[2]
|
150
|
-
if etag_raw_string = elem[3]
|
151
|
-
return [:elem, stag_raw_string, fix_structure_list(children, is_xml, is_html), etag_raw_string], []
|
152
|
-
else
|
153
|
-
tagname = stag_raw_string[Pat::Name]
|
154
|
-
tagname = tagname.downcase if !is_xml && is_html
|
155
|
-
if ElementContent[tagname] == :EMPTY
|
156
|
-
return [:elem, stag_raw_string, []], children
|
157
|
-
else
|
158
|
-
if ElementContent[tagname] == :CDATA
|
159
|
-
possible_tags = []
|
160
|
-
else
|
161
|
-
possible_tags = ElementContent[tagname]
|
162
|
-
end
|
163
|
-
if possible_tags
|
164
|
-
excluded_tags2 = ElementExclusions[tagname]
|
165
|
-
included_tags2 = ElementInclusions[tagname]
|
166
|
-
excluded_tags |= excluded_tags2 if excluded_tags2
|
167
|
-
included_tags |= included_tags2 if included_tags2
|
168
|
-
containable_tags = (possible_tags | included_tags) - excluded_tags
|
169
|
-
uncontainable_tags = ElementContent.keys - containable_tags
|
170
|
-
else
|
171
|
-
# If the tagname is unknown, it is assumed that any element
|
172
|
-
# except excluded can be contained.
|
173
|
-
uncontainable_tags = excluded_tags
|
174
|
-
end
|
175
|
-
fixed_children = []
|
176
|
-
rest = children
|
177
|
-
until rest.empty?
|
178
|
-
if rest[0][0] == :elem
|
179
|
-
elem = rest.shift
|
180
|
-
elem_tagname = elem[1][Pat::Name]
|
181
|
-
elem_tagname = elem_tagname.downcase if !is_xml && is_html
|
182
|
-
if uncontainable_tags.include? elem_tagname
|
183
|
-
rest.unshift elem
|
184
|
-
break
|
185
|
-
else
|
186
|
-
fixed_elem, rest2 = fix_element(elem, excluded_tags, included_tags, is_xml, is_html)
|
187
|
-
fixed_children << fixed_elem
|
188
|
-
rest = rest2 + rest
|
189
|
-
end
|
190
|
-
else
|
191
|
-
fixed_children << rest.shift
|
192
|
-
end
|
193
|
-
end
|
194
|
-
return [:elem, stag_raw_string, fixed_children], rest
|
195
|
-
end
|
196
|
-
end
|
197
|
-
end
|
198
|
-
|
199
|
-
def HTree.build_node(structure, is_xml, is_html, inherited_context=DefaultContext)
|
200
|
-
case structure[0]
|
201
|
-
when :text_pcdata
|
202
|
-
Text.parse_pcdata(structure[1])
|
203
|
-
when :elem
|
204
|
-
_, stag_rawstring, children, etag_rawstring = structure
|
205
|
-
etag = etag_rawstring && ETag.parse(etag_rawstring, is_xml, is_html)
|
206
|
-
stag = STag.parse(stag_rawstring, true, is_xml, is_html, inherited_context)
|
207
|
-
if !children.empty? || etag
|
208
|
-
Elem.new!(stag,
|
209
|
-
children.map {|c| build_node(c, is_xml, is_html, stag.context) },
|
210
|
-
etag)
|
211
|
-
else
|
212
|
-
Elem.new!(stag)
|
213
|
-
end
|
214
|
-
when :emptytag
|
215
|
-
Elem.new!(STag.parse(structure[1], false, is_xml, is_html, inherited_context))
|
216
|
-
when :bogus_etag
|
217
|
-
BogusETag.parse(structure[1], is_xml, is_html)
|
218
|
-
when :xmldecl
|
219
|
-
XMLDecl.parse(structure[1])
|
220
|
-
when :doctype
|
221
|
-
DocType.parse(structure[1], is_xml, is_html)
|
222
|
-
when :procins
|
223
|
-
ProcIns.parse(structure[1])
|
224
|
-
when :comment
|
225
|
-
Comment.parse(structure[1])
|
226
|
-
when :text_cdata_content
|
227
|
-
Text.parse_cdata_content(structure[1])
|
228
|
-
when :text_cdata_section
|
229
|
-
Text.parse_cdata_section(structure[1])
|
230
|
-
else
|
231
|
-
raise Exception, "[bug] unknown structure: #{structure.inspect}"
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
def STag.parse(raw_string, is_stag, is_xml, is_html, inherited_context=DefaultContext)
|
236
|
-
attrs = []
|
237
|
-
if (is_stag ? /\A#{Pat::ValidStartTag_C}\z/o : /\A#{Pat::ValidEmptyTag_C}\z/o) =~ raw_string
|
238
|
-
qname = $1
|
239
|
-
$2.scan(Pat::ValidAttr_C) {
|
240
|
-
attrs << ($5 ? [nil, $5] : [$1, $2 || $3 || $4])
|
241
|
-
}
|
242
|
-
elsif (is_stag ? /\A#{Pat::InvalidStartTag_C}\z/o : /\A#{Pat::InvalidEmptyTag_C}\z/o) =~ raw_string
|
243
|
-
qname = $1
|
244
|
-
last_attr = $3
|
245
|
-
$2.scan(Pat::InvalidAttr1_C) {
|
246
|
-
attrs << ($5 ? [nil, $5] : [$1, $2 || $3 || $4])
|
247
|
-
}
|
248
|
-
if last_attr
|
249
|
-
/#{Pat::InvalidAttr1End_C}/o =~ last_attr
|
250
|
-
attrs << [$1, $2 || $3]
|
251
|
-
end
|
252
|
-
else
|
253
|
-
raise HTree::Error, "cannot recognize as start tag or empty tag: #{raw_string.inspect}"
|
254
|
-
end
|
255
|
-
|
256
|
-
qname = qname.downcase if !is_xml && is_html
|
257
|
-
|
258
|
-
attrs.map! {|aname, aval|
|
259
|
-
if aname
|
260
|
-
aname = (!is_xml && is_html) ? aname.downcase : aname
|
261
|
-
[aname, Text.parse_pcdata(aval)]
|
262
|
-
else
|
263
|
-
if val2name = OmittedAttrName[qname]
|
264
|
-
aval_downcase = aval.downcase
|
265
|
-
aname = val2name.fetch(aval_downcase, aval_downcase)
|
266
|
-
else
|
267
|
-
aname = aval
|
268
|
-
end
|
269
|
-
[aname, Text.new(aval)]
|
270
|
-
end
|
271
|
-
}
|
272
|
-
|
273
|
-
result = STag.new(qname, attrs, inherited_context)
|
274
|
-
result.raw_string = raw_string
|
275
|
-
result
|
276
|
-
end
|
277
|
-
|
278
|
-
def ETag.parse(raw_string, is_xml, is_html)
|
279
|
-
unless /\A#{Pat::EndTag_C}\z/o =~ raw_string
|
280
|
-
raise HTree::Error, "cannot recognize as end tag: #{raw_string.inspect}"
|
281
|
-
end
|
282
|
-
|
283
|
-
qname = $1
|
284
|
-
qname = qname.downcase if !is_xml && is_html
|
285
|
-
|
286
|
-
result = self.new(qname)
|
287
|
-
result.raw_string = raw_string
|
288
|
-
result
|
289
|
-
end
|
290
|
-
|
291
|
-
def BogusETag.parse(raw_string, is_xml, is_html)
|
292
|
-
unless /\A#{Pat::EndTag_C}\z/o =~ raw_string
|
293
|
-
raise HTree::Error, "cannot recognize as end tag: #{raw_string.inspect}"
|
294
|
-
end
|
295
|
-
|
296
|
-
qname = $1
|
297
|
-
qname = qname.downcase if !is_xml && is_html
|
298
|
-
|
299
|
-
result = self.new(qname)
|
300
|
-
result.raw_string = raw_string
|
301
|
-
result
|
302
|
-
end
|
303
|
-
|
304
|
-
def Text.parse_pcdata(raw_string)
|
305
|
-
fixed = raw_string.gsub(/&(?:(?:#[0-9]+|#x[0-9a-fA-F]+|([A-Za-z][A-Za-z0-9]*));?)?/o) {|s|
|
306
|
-
name = $1
|
307
|
-
case s
|
308
|
-
when /;\z/
|
309
|
-
s
|
310
|
-
when /\A&#/
|
311
|
-
"#{s};"
|
312
|
-
when '&'
|
313
|
-
'&'
|
314
|
-
else
|
315
|
-
if NamedCharactersPattern =~ name
|
316
|
-
"&#{name};"
|
317
|
-
else
|
318
|
-
"&#{name}"
|
319
|
-
end
|
320
|
-
end
|
321
|
-
}
|
322
|
-
fixed = raw_string if fixed == raw_string
|
323
|
-
result = Text.new_internal(fixed)
|
324
|
-
result.raw_string = raw_string
|
325
|
-
result
|
326
|
-
end
|
327
|
-
|
328
|
-
def Text.parse_cdata_content(raw_string)
|
329
|
-
result = Text.new(raw_string)
|
330
|
-
result.raw_string = raw_string
|
331
|
-
result
|
332
|
-
end
|
333
|
-
|
334
|
-
def Text.parse_cdata_section(raw_string)
|
335
|
-
unless /\A#{Pat::CDATA_C}\z/o =~ raw_string
|
336
|
-
raise HTree::Error, "cannot recognize as CDATA section: #{raw_string.inspect}"
|
337
|
-
end
|
338
|
-
|
339
|
-
content = $1
|
340
|
-
|
341
|
-
result = Text.new(content)
|
342
|
-
result.raw_string = raw_string
|
343
|
-
result
|
344
|
-
end
|
345
|
-
|
346
|
-
def XMLDecl.parse(raw_string)
|
347
|
-
unless /\A#{Pat::XmlDecl_C}\z/o =~ raw_string
|
348
|
-
raise HTree::Error, "cannot recognize as XML declaration: #{raw_string.inspect}"
|
349
|
-
end
|
350
|
-
|
351
|
-
version = $1 || $2
|
352
|
-
encoding = $3 || $4
|
353
|
-
case $5 || $6
|
354
|
-
when 'yes'
|
355
|
-
standalone = true
|
356
|
-
when 'no'
|
357
|
-
standalone = false
|
358
|
-
else
|
359
|
-
standalone = nil
|
360
|
-
end
|
361
|
-
|
362
|
-
result = XMLDecl.new(version, encoding, standalone)
|
363
|
-
result.raw_string = raw_string
|
364
|
-
result
|
365
|
-
end
|
366
|
-
|
367
|
-
def DocType.parse(raw_string, is_xml, is_html)
|
368
|
-
unless /\A#{Pat::DocType_C}\z/o =~ raw_string
|
369
|
-
raise HTree::Error, "cannot recognize as XML declaration: #{raw_string.inspect}"
|
370
|
-
end
|
371
|
-
|
372
|
-
root_element_name = $1
|
373
|
-
public_identifier = $2 || $3
|
374
|
-
system_identifier = $4 || $5
|
375
|
-
|
376
|
-
root_element_name = root_element_name.downcase if !is_xml && is_html
|
377
|
-
|
378
|
-
result = DocType.new(root_element_name, public_identifier, system_identifier)
|
379
|
-
result.raw_string = raw_string
|
380
|
-
result
|
381
|
-
end
|
382
|
-
|
383
|
-
def ProcIns.parse(raw_string)
|
384
|
-
unless /\A#{Pat::XmlProcIns_C}\z/o =~ raw_string
|
385
|
-
raise HTree::Error, "cannot recognize as processing instruction: #{raw_string.inspect}"
|
386
|
-
end
|
387
|
-
|
388
|
-
target = $1
|
389
|
-
content = $2
|
390
|
-
|
391
|
-
result = ProcIns.new(target, content)
|
392
|
-
result.raw_string = raw_string
|
393
|
-
result
|
394
|
-
end
|
395
|
-
|
396
|
-
def Comment.parse(raw_string)
|
397
|
-
unless /\A#{Pat::Comment_C}\z/o =~ raw_string
|
398
|
-
raise HTree::Error, "cannot recognize as comment: #{raw_string.inspect}"
|
399
|
-
end
|
400
|
-
|
401
|
-
content = $1
|
402
|
-
|
403
|
-
result = Comment.new(content)
|
404
|
-
result.raw_string = raw_string
|
405
|
-
result
|
406
|
-
end
|
407
|
-
|
408
|
-
end
|
409
|
-
# :startdoc:
|