feedtools 0.2.26 → 0.2.27
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -1,124 +0,0 @@
|
|
1
|
-
# :stopdoc:
|
2
|
-
require 'htree/modules'
|
3
|
-
require 'htree/fstr'
|
4
|
-
|
5
|
-
module HTree # :nodoc:
|
6
|
-
module Node # :nodoc:
|
7
|
-
# raw_string returns a source string recorded by parsing.
|
8
|
-
# It returns +nil+ if the node is constructed not via parsing.
|
9
|
-
def raw_string
|
10
|
-
catch(:raw_string_tag) {
|
11
|
-
return raw_string_internal('')
|
12
|
-
}
|
13
|
-
nil
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
class Doc # :nodoc:
|
18
|
-
def raw_string_internal(result)
|
19
|
-
@children.each {|n|
|
20
|
-
n.raw_string_internal(result)
|
21
|
-
}
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
class Elem # :nodoc:
|
26
|
-
def raw_string_internal(result)
|
27
|
-
@stag.raw_string_internal(result)
|
28
|
-
@children.each {|n| n.raw_string_internal(result) }
|
29
|
-
@etag.raw_string_internal(result) if @etag
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
module Tag # :nodoc:
|
34
|
-
def init_raw_string() @raw_string = nil end
|
35
|
-
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
|
36
|
-
def raw_string_internal(result)
|
37
|
-
throw :raw_string_tag if !@raw_string
|
38
|
-
result << @raw_string
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
module Leaf # :nodoc:
|
43
|
-
def init_raw_string() @raw_string = nil end
|
44
|
-
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
|
45
|
-
def raw_string_internal(result)
|
46
|
-
throw :raw_string_tag if !@raw_string
|
47
|
-
result << @raw_string
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
class Text # :nodoc:
|
52
|
-
def raw_string=(arg)
|
53
|
-
if arg == @rcdata then
|
54
|
-
@raw_string = @rcdata
|
55
|
-
else
|
56
|
-
super
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
module Node # :nodoc:
|
62
|
-
def eliminate_raw_string
|
63
|
-
raise NotImplementedError
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
class Doc # :nodoc:
|
68
|
-
def eliminate_raw_string
|
69
|
-
Doc.new(@children.map {|c| c.eliminate_raw_string })
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
class Elem # :nodoc:
|
74
|
-
def eliminate_raw_string
|
75
|
-
Elem.new!(
|
76
|
-
@stag.eliminate_raw_string,
|
77
|
-
@empty ? nil : @children.map {|c| c.eliminate_raw_string },
|
78
|
-
@etag && @etag.eliminate_raw_string)
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
class Text # :nodoc:
|
83
|
-
def eliminate_raw_string
|
84
|
-
Text.new_internal(@rcdata)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
class STag # :nodoc:
|
89
|
-
def eliminate_raw_string
|
90
|
-
STag.new(@qualified_name, @attributes, @inherited_context)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
class ETag # :nodoc:
|
95
|
-
def eliminate_raw_string
|
96
|
-
self.class.new(@qualified_name)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
class XMLDecl # :nodoc:
|
101
|
-
def eliminate_raw_string
|
102
|
-
XMLDecl.new(@version, @encoding, @standalone)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
class DocType # :nodoc:
|
107
|
-
def eliminate_raw_string
|
108
|
-
DocType.new(@root_element_name, @public_identifier, @system_identifier)
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
class ProcIns # :nodoc:
|
113
|
-
def eliminate_raw_string
|
114
|
-
ProcIns.new(@target, @content)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
class Comment # :nodoc:
|
119
|
-
def eliminate_raw_string
|
120
|
-
Comment.new(@content)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|
124
|
-
# :startdoc:
|
@@ -1,130 +0,0 @@
|
|
1
|
-
# = REXML Tree Generator
|
2
|
-
#
|
3
|
-
# HTree::Node#to_rexml is used for converting HTree to REXML.
|
4
|
-
#
|
5
|
-
# == Method Summary
|
6
|
-
#
|
7
|
-
# - HTree::Node#to_rexml -> REXML::Child
|
8
|
-
#
|
9
|
-
# == Example
|
10
|
-
#
|
11
|
-
# HTree.parse(...).to_rexml #=> REXML::Document
|
12
|
-
#
|
13
|
-
# == Comparison between HTree and REXML.
|
14
|
-
#
|
15
|
-
# - HTree parser is permissive HTML/XML parser.
|
16
|
-
# REXML parser is strict XML parser.
|
17
|
-
# HTree is recommended if you need to parse realworld HTML.
|
18
|
-
# REXML is recommended if you need strict error checking.
|
19
|
-
# - HTree object is immutable.
|
20
|
-
# REXML object is mutable.
|
21
|
-
# REXML should be used if you need modification.
|
22
|
-
#
|
23
|
-
|
24
|
-
# :stopdoc:
|
25
|
-
require 'htree/modules'
|
26
|
-
require 'htree/output' # HTree::DocType#generate_content
|
27
|
-
|
28
|
-
module HTree # :nodoc:
|
29
|
-
module Node # :nodoc:
|
30
|
-
# convert to REXML tree.
|
31
|
-
def to_rexml
|
32
|
-
require 'rexml/document'
|
33
|
-
to_rexml_internal(nil, DefaultContext)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
class Doc # :nodoc:
|
38
|
-
def to_rexml_internal(parent, context)
|
39
|
-
raise ArgumentError, "parent must be nil" if parent != nil
|
40
|
-
result = REXML::Document.new
|
41
|
-
self.children.each {|c|
|
42
|
-
c.to_rexml_internal(result, context)
|
43
|
-
}
|
44
|
-
result
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
class Elem # :nodoc:
|
49
|
-
def to_rexml_internal(parent, context)
|
50
|
-
ename = self.element_name
|
51
|
-
ns_decl = {}
|
52
|
-
if context.namespace_uri(ename.namespace_prefix) != ename.namespace_uri
|
53
|
-
ns_decl[ename.namespace_prefix] = ename.namespace_uri
|
54
|
-
end
|
55
|
-
|
56
|
-
if ename.namespace_prefix
|
57
|
-
result = REXML::Element.new("#{ename.namespace_prefix}:#{ename.local_name}", parent)
|
58
|
-
else
|
59
|
-
result = REXML::Element.new(ename.local_name, parent)
|
60
|
-
end
|
61
|
-
|
62
|
-
self.each_attribute {|aname, atext|
|
63
|
-
if aname.namespace_prefix
|
64
|
-
if context.namespace_uri(aname.namespace_prefix) != aname.namespace_uri
|
65
|
-
ns_decl[aname.namespace_prefix] = aname.namespace_uri
|
66
|
-
end
|
67
|
-
result.add_attribute("#{aname.namespace_prefix}:#{aname.local_name}", atext.to_s)
|
68
|
-
else
|
69
|
-
result.add_attribute(aname.local_name, atext.to_s)
|
70
|
-
end
|
71
|
-
}
|
72
|
-
|
73
|
-
ns_decl.each {|k, v|
|
74
|
-
if k
|
75
|
-
result.add_namespace(k, v)
|
76
|
-
else
|
77
|
-
result.add_namespace(v)
|
78
|
-
end
|
79
|
-
}
|
80
|
-
context = context.subst_namespaces(ns_decl)
|
81
|
-
|
82
|
-
self.children.each {|c|
|
83
|
-
c.to_rexml_internal(result, context)
|
84
|
-
}
|
85
|
-
result
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
class Text # :nodoc:
|
90
|
-
def to_rexml_internal(parent, context)
|
91
|
-
rcdata = self.rcdata.gsub(/[<>]/) { Encoder::ChRef[$&] }
|
92
|
-
REXML::Text.new(rcdata, true, parent, true)
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
class XMLDecl # :nodoc:
|
97
|
-
def to_rexml_internal(parent, context)
|
98
|
-
r = REXML::XMLDecl.new(self.version, self.encoding, self.standalone)
|
99
|
-
parent << r if parent
|
100
|
-
r
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
class DocType # :nodoc:
|
105
|
-
def to_rexml_internal(parent, context)
|
106
|
-
REXML::DocType.new([self.root_element_name, self.generate_content], parent)
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
class ProcIns # :nodoc:
|
111
|
-
def to_rexml_internal(parent, context)
|
112
|
-
r = REXML::Instruction.new(self.target, self.content)
|
113
|
-
parent << r if parent
|
114
|
-
r
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
class Comment # :nodoc:
|
119
|
-
def to_rexml_internal(parent, context)
|
120
|
-
REXML::Comment.new(self.content, parent)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
class BogusETag # :nodoc:
|
125
|
-
def to_rexml_internal(parent, context)
|
126
|
-
nil
|
127
|
-
end
|
128
|
-
end
|
129
|
-
end
|
130
|
-
# :startdoc:
|
@@ -1,166 +0,0 @@
|
|
1
|
-
# :stopdoc:
|
2
|
-
require 'htree/htmlinfo'
|
3
|
-
require 'htree/regexp-util'
|
4
|
-
require 'htree/fstr'
|
5
|
-
|
6
|
-
module HTree # :nodoc:
|
7
|
-
module Pat # :nodoc:
|
8
|
-
NameChar = /[-A-Za-z0-9._:]/
|
9
|
-
Name = /[A-Za-z_:]#{NameChar}*/
|
10
|
-
Nmtoken = /#{NameChar}+/
|
11
|
-
|
12
|
-
Comment_C = /<!--(.*?)-->/m
|
13
|
-
Comment = Comment_C.disable_capture
|
14
|
-
CDATA_C = /<!\[CDATA\[(.*?)\]\]>/m
|
15
|
-
CDATA = CDATA_C.disable_capture
|
16
|
-
|
17
|
-
QuotedAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)')/
|
18
|
-
QuotedAttr = QuotedAttr_C.disable_capture
|
19
|
-
ValidAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)'|(#{NameChar}*))|(#{Nmtoken})/
|
20
|
-
ValidAttr = ValidAttr_C.disable_capture
|
21
|
-
InvalidAttr1_C = /(#{Name})\s*=\s*(?:'([^'<>]*)'|"([^"<>]*)"|([^\s<>"']*))|(#{Nmtoken})/
|
22
|
-
InvalidAttr1 = InvalidAttr1_C.disable_capture
|
23
|
-
InvalidAttr1End_C = /(#{Name})(?:\s*=\s*(?:'([^'<>]*)|"([^"<>]*)))/
|
24
|
-
InvalidAttr1End = InvalidAttr1End_C.disable_capture
|
25
|
-
|
26
|
-
QuotedStartTag_C = /<(#{Name})((?:\s+#{QuotedAttr})*)\s*>/
|
27
|
-
QuotedStartTag = QuotedStartTag_C.disable_capture
|
28
|
-
ValidStartTag_C = /<(#{Name})((?:\s+#{ValidAttr})*)\s*>/
|
29
|
-
ValidStartTag = ValidStartTag_C.disable_capture
|
30
|
-
InvalidStartTag_C = /<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*>/
|
31
|
-
InvalidStartTag = InvalidStartTag_C.disable_capture
|
32
|
-
StartTag = /#{QuotedStartTag}|#{ValidStartTag}|#{InvalidStartTag}/
|
33
|
-
|
34
|
-
QuotedEmptyTag_C = %r{<(#{Name})((?:\s+#{QuotedAttr})*)\s*/>}
|
35
|
-
QuotedEmptyTag = QuotedEmptyTag_C.disable_capture
|
36
|
-
ValidEmptyTag_C = %r{<(#{Name})((?:\s+#{ValidAttr})*)\s*/>}
|
37
|
-
ValidEmptyTag = ValidEmptyTag_C.disable_capture
|
38
|
-
InvalidEmptyTag_C = %r{<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*/>}
|
39
|
-
InvalidEmptyTag = InvalidEmptyTag_C.disable_capture
|
40
|
-
EmptyTag = /#{QuotedEmptyTag}|#{ValidEmptyTag}|#{InvalidEmptyTag}/
|
41
|
-
|
42
|
-
EndTag_C = %r{</(#{Name})\s*>}
|
43
|
-
EndTag = EndTag_C.disable_capture
|
44
|
-
|
45
|
-
XmlVersionNum = /[a-zA-Z0-9_.:-]+/
|
46
|
-
XmlVersionInfo_C = /\s+version\s*=\s*(?:'(#{XmlVersionNum})'|"(#{XmlVersionNum})")/
|
47
|
-
XmlVersionInfo = XmlVersionInfo_C.disable_capture
|
48
|
-
XmlEncName = /[A-Za-z][A-Za-z0-9._-]*/
|
49
|
-
XmlEncodingDecl_C = /\s+encoding\s*=\s*(?:"(#{XmlEncName})"|'(#{XmlEncName})')/
|
50
|
-
XmlEncodingDecl = XmlEncodingDecl_C.disable_capture
|
51
|
-
XmlSDDecl_C = /\s+standalone\s*=\s*(?:'(yes|no)'|"(yes|no)")/
|
52
|
-
XmlSDDecl = XmlSDDecl_C.disable_capture
|
53
|
-
XmlDecl_C = /<\?xml#{XmlVersionInfo_C}#{XmlEncodingDecl_C}?#{XmlSDDecl_C}?\s*\?>/
|
54
|
-
XmlDecl = /<\?xml#{XmlVersionInfo}#{XmlEncodingDecl}?#{XmlSDDecl}?\s*\?>/
|
55
|
-
|
56
|
-
# xxx: internal DTD subset is not recognized: '[' (markupdecl | DeclSep)* ']' S?)?
|
57
|
-
SystemLiteral_C = /"([^"]*)"|'([^']*)'/
|
58
|
-
PubidLiteral_C = %r{"([\sa-zA-Z0-9\-'()+,./:=?;!*\#@$_%]*)"|'([\sa-zA-Z0-9\-()+,./:=?;!*\#@$_%]*)'}
|
59
|
-
ExternalID_C = /(?:SYSTEM|PUBLIC\s+#{PubidLiteral_C})(?:\s+#{SystemLiteral_C})?/
|
60
|
-
DocType_C = /<!DOCTYPE\s+(#{Name})(?:\s+#{ExternalID_C})?\s*(?:\[.*?\]\s*)?>/m
|
61
|
-
DocType = DocType_C.disable_capture
|
62
|
-
|
63
|
-
XmlProcIns_C = /<\?(#{Name})(?:\s+(.*?))?\?>/m
|
64
|
-
XmlProcIns = XmlProcIns_C.disable_capture
|
65
|
-
#ProcIns = /<\?([^>]*)>/m
|
66
|
-
end
|
67
|
-
|
68
|
-
def HTree.scan(input, is_xml=false)
|
69
|
-
is_html = false
|
70
|
-
cdata_content = nil
|
71
|
-
text_start = 0
|
72
|
-
first_element = true
|
73
|
-
index_xmldecl = 1
|
74
|
-
index_doctype = 2
|
75
|
-
index_xmlprocins = 3
|
76
|
-
index_quotedstarttag = 4
|
77
|
-
index_quotedemptytag = 5
|
78
|
-
index_starttag = 6
|
79
|
-
index_endtag = 7
|
80
|
-
index_emptytag = 8
|
81
|
-
index_comment = 9
|
82
|
-
index_cdata = 10
|
83
|
-
input.scan(/(#{Pat::XmlDecl})
|
84
|
-
|(#{Pat::DocType})
|
85
|
-
|(#{Pat::XmlProcIns})
|
86
|
-
|(#{Pat::QuotedStartTag})
|
87
|
-
|(#{Pat::QuotedEmptyTag})
|
88
|
-
|(#{Pat::StartTag})
|
89
|
-
|(#{Pat::EndTag})
|
90
|
-
|(#{Pat::EmptyTag})
|
91
|
-
|(#{Pat::Comment})
|
92
|
-
|(#{Pat::CDATA})
|
93
|
-
/ox) {
|
94
|
-
match = $~
|
95
|
-
if cdata_content
|
96
|
-
str = $&
|
97
|
-
if match.begin(index_endtag) && str[Pat::Name] == cdata_content
|
98
|
-
text_end = match.begin(0)
|
99
|
-
if text_start < text_end
|
100
|
-
yield [:text_cdata_content, HTree.frozen_string(input[text_start...text_end])]
|
101
|
-
end
|
102
|
-
yield [:etag, HTree.frozen_string(str)]
|
103
|
-
text_start = match.end(0)
|
104
|
-
cdata_content = nil
|
105
|
-
end
|
106
|
-
else
|
107
|
-
str = match[0]
|
108
|
-
text_end = match.begin(0)
|
109
|
-
if text_start < text_end
|
110
|
-
yield [:text_pcdata, HTree.frozen_string(input[text_start...text_end])]
|
111
|
-
end
|
112
|
-
text_start = match.end(0)
|
113
|
-
if match.begin(index_xmldecl)
|
114
|
-
yield [:xmldecl, HTree.frozen_string(str)]
|
115
|
-
is_xml = true
|
116
|
-
elsif match.begin(index_doctype)
|
117
|
-
Pat::DocType_C =~ str
|
118
|
-
root_element_name = $1
|
119
|
-
public_identifier = $2 || $3
|
120
|
-
system_identifier = $4 || $5
|
121
|
-
is_html = true if /\Ahtml\z/i =~ root_element_name
|
122
|
-
is_xml = true if public_identifier && %r{\A-//W3C//DTD XHTML } =~ public_identifier
|
123
|
-
yield [:doctype, HTree.frozen_string(str)]
|
124
|
-
elsif match.begin(index_xmlprocins)
|
125
|
-
yield [:procins, HTree.frozen_string(str)]
|
126
|
-
elsif match.begin(index_starttag) || match.begin(index_quotedstarttag)
|
127
|
-
yield stag = [:stag, HTree.frozen_string(str)]
|
128
|
-
tagname = str[Pat::Name]
|
129
|
-
if first_element
|
130
|
-
if /\A(?:html|head|title|isindex|base|script|style|meta|link|object)\z/i =~ tagname
|
131
|
-
is_html = true
|
132
|
-
else
|
133
|
-
is_xml = true
|
134
|
-
end
|
135
|
-
first_element = false
|
136
|
-
end
|
137
|
-
if !is_xml && ElementContent[tagname] == :CDATA
|
138
|
-
cdata_content = tagname
|
139
|
-
end
|
140
|
-
elsif match.begin(index_endtag)
|
141
|
-
yield [:etag, HTree.frozen_string(str)]
|
142
|
-
elsif match.begin(index_emptytag) || match.begin(index_quotedemptytag)
|
143
|
-
yield [:emptytag, HTree.frozen_string(str)]
|
144
|
-
first_element = false
|
145
|
-
#is_xml = true
|
146
|
-
elsif match.begin(index_comment)
|
147
|
-
yield [:comment, HTree.frozen_string(str)]
|
148
|
-
elsif match.begin(index_cdata)
|
149
|
-
yield [:text_cdata_section, HTree.frozen_string(str)]
|
150
|
-
else
|
151
|
-
raise Exception, "unknown match [bug]"
|
152
|
-
end
|
153
|
-
end
|
154
|
-
}
|
155
|
-
text_end = input.length
|
156
|
-
if text_start < text_end
|
157
|
-
if cdata_content
|
158
|
-
yield [:text_cdata_content, HTree.frozen_string(input[text_start...text_end])]
|
159
|
-
else
|
160
|
-
yield [:text_pcdata, HTree.frozen_string(input[text_start...text_end])]
|
161
|
-
end
|
162
|
-
end
|
163
|
-
return is_xml, is_html
|
164
|
-
end
|
165
|
-
end
|
166
|
-
# :startdoc:
|
@@ -1,111 +0,0 @@
|
|
1
|
-
# :stopdoc:
|
2
|
-
require 'htree/raw_string'
|
3
|
-
require 'htree/text'
|
4
|
-
require 'htree/scan' # for Pat::Name and Pat::Nmtoken
|
5
|
-
require 'htree/context'
|
6
|
-
require 'htree/name'
|
7
|
-
require 'htree/fstr'
|
8
|
-
|
9
|
-
module HTree # :nodoc:
|
10
|
-
class STag # :nodoc:
|
11
|
-
def initialize(name, attributes=[], inherited_context=DefaultContext)
|
12
|
-
init_raw_string
|
13
|
-
# normalize xml declaration name and attribute value.
|
14
|
-
attributes = attributes.map {|aname, val|
|
15
|
-
if !(Name === aname) && /\A(?:#{Pat::Name}?\{.*\})?#{Pat::Nmtoken}\z/o !~ aname
|
16
|
-
raise HTree::Error, "invalid attribute name: #{aname.inspect}"
|
17
|
-
end
|
18
|
-
if !(Name === aname) && /\Axmlns(?:\z|:)/ =~ aname
|
19
|
-
aname = Name.parse_attribute_name(aname, nil)
|
20
|
-
end
|
21
|
-
val = val.to_node if HTree::Location === val
|
22
|
-
val = Text.new(val) unless Text === val
|
23
|
-
[aname, val]
|
24
|
-
}
|
25
|
-
|
26
|
-
@inherited_context = inherited_context
|
27
|
-
@xmlns_decls = {}
|
28
|
-
|
29
|
-
# validate namespace consistency of given Name objects.
|
30
|
-
if Name === name
|
31
|
-
@xmlns_decls[name.namespace_prefix] = name.namespace_uri
|
32
|
-
end
|
33
|
-
attributes.each {|aname, text|
|
34
|
-
next unless Name === aname
|
35
|
-
next if aname.xmlns?
|
36
|
-
if aname.namespace_prefix && aname.namespace_uri
|
37
|
-
if @xmlns_decls.include? aname.namespace_prefix
|
38
|
-
if @xmlns_decls[aname.namespace_prefix] != aname.namespace_uri
|
39
|
-
raise ArgumentError, "inconsistent namespace use: #{aname.namespace_prefix} is used as #{@xmlns_decls[aname.namespace_prefix]} and #{aname.namespace_uri}"
|
40
|
-
end
|
41
|
-
else
|
42
|
-
@xmlns_decls[aname.namespace_prefix] = aname.namespace_uri
|
43
|
-
end
|
44
|
-
end
|
45
|
-
}
|
46
|
-
|
47
|
-
attributes.each {|aname, text|
|
48
|
-
next unless Name === aname
|
49
|
-
next unless aname.xmlns?
|
50
|
-
next if @xmlns_decls.include? aname.local_name
|
51
|
-
if aname.local_name
|
52
|
-
@xmlns_decls[aname.local_name] = text.to_s
|
53
|
-
else
|
54
|
-
uri = text.to_s
|
55
|
-
@xmlns_decls[nil] = uri
|
56
|
-
end
|
57
|
-
}
|
58
|
-
|
59
|
-
@context = make_context(@inherited_context)
|
60
|
-
|
61
|
-
if Name === name
|
62
|
-
@name = name
|
63
|
-
else
|
64
|
-
@name = Name.parse_element_name(name, @context)
|
65
|
-
end
|
66
|
-
|
67
|
-
@attributes = attributes.map {|aname, text|
|
68
|
-
aname = Name.parse_attribute_name(aname, @context) unless Name === aname
|
69
|
-
if !aname.namespace_prefix && !aname.namespace_uri.empty?
|
70
|
-
# xxx: should recover error?
|
71
|
-
raise HTree::Error, "global attribute without namespace prefix: #{aname.inspect}"
|
72
|
-
end
|
73
|
-
[aname, text]
|
74
|
-
}
|
75
|
-
@attributes.freeze
|
76
|
-
end
|
77
|
-
attr_reader :attributes, :inherited_context, :context
|
78
|
-
|
79
|
-
def element_name
|
80
|
-
@name
|
81
|
-
end
|
82
|
-
|
83
|
-
def make_context(inherited_context)
|
84
|
-
inherited_context.subst_namespaces(@xmlns_decls)
|
85
|
-
end
|
86
|
-
|
87
|
-
def each_namespace_attribute
|
88
|
-
@xmlns_decls.each {|name, uri|
|
89
|
-
yield name, uri
|
90
|
-
}
|
91
|
-
nil
|
92
|
-
end
|
93
|
-
|
94
|
-
def each_attribute
|
95
|
-
@attributes.each {|name, text|
|
96
|
-
next if name.xmlns?
|
97
|
-
yield name, text
|
98
|
-
}
|
99
|
-
nil
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
class ETag # :nodoc:
|
104
|
-
def initialize(qualified_name)
|
105
|
-
init_raw_string
|
106
|
-
@qualified_name = HTree.frozen_string(qualified_name)
|
107
|
-
end
|
108
|
-
attr_reader :qualified_name
|
109
|
-
end
|
110
|
-
end
|
111
|
-
# :startdoc:
|