feedtools 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +11 -0
- data/lib/feed_tools.rb +2496 -810
- data/lib/feed_tools/vendor/builder.rb +2 -0
- data/lib/feed_tools/vendor/builder/blankslate.rb +2 -0
- data/lib/feed_tools/vendor/builder/xmlbase.rb +2 -1
- data/lib/feed_tools/vendor/builder/xmlevents.rb +2 -0
- data/lib/feed_tools/vendor/builder/xmlmarkup.rb +4 -2
- data/lib/feed_tools/vendor/htree.rb +97 -0
- data/lib/feed_tools/vendor/htree/container.rb +10 -0
- data/lib/feed_tools/vendor/htree/context.rb +67 -0
- data/lib/feed_tools/vendor/htree/display.rb +27 -0
- data/lib/feed_tools/vendor/htree/doc.rb +149 -0
- data/lib/feed_tools/vendor/htree/elem.rb +262 -0
- data/lib/feed_tools/vendor/htree/encoder.rb +163 -0
- data/lib/feed_tools/vendor/htree/equality.rb +218 -0
- data/lib/feed_tools/vendor/htree/extract_text.rb +37 -0
- data/lib/feed_tools/vendor/htree/fstr.rb +33 -0
- data/lib/feed_tools/vendor/htree/gencode.rb +97 -0
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +672 -0
- data/lib/feed_tools/vendor/htree/inspect.rb +108 -0
- data/lib/feed_tools/vendor/htree/leaf.rb +94 -0
- data/lib/feed_tools/vendor/htree/loc.rb +367 -0
- data/lib/feed_tools/vendor/htree/modules.rb +48 -0
- data/lib/feed_tools/vendor/htree/name.rb +124 -0
- data/lib/feed_tools/vendor/htree/output.rb +207 -0
- data/lib/feed_tools/vendor/htree/parse.rb +407 -0
- data/lib/feed_tools/vendor/htree/raw_string.rb +124 -0
- data/lib/feed_tools/vendor/htree/regexp-util.rb +15 -0
- data/lib/feed_tools/vendor/htree/rexml.rb +130 -0
- data/lib/feed_tools/vendor/htree/scan.rb +166 -0
- data/lib/feed_tools/vendor/htree/tag.rb +111 -0
- data/lib/feed_tools/vendor/htree/template.rb +909 -0
- data/lib/feed_tools/vendor/htree/text.rb +115 -0
- data/lib/feed_tools/vendor/htree/traverse.rb +465 -0
- data/rakefile +1 -1
- data/test/rss_test.rb +97 -0
- metadata +30 -1
@@ -0,0 +1,124 @@
|
|
1
|
+
# :stopdoc:
|
2
|
+
require 'htree/modules'
|
3
|
+
require 'htree/fstr'
|
4
|
+
|
5
|
+
module HTree # :nodoc:
|
6
|
+
module Node # :nodoc:
|
7
|
+
# raw_string returns a source string recorded by parsing.
|
8
|
+
# It returns +nil+ if the node is constructed not via parsing.
|
9
|
+
def raw_string
|
10
|
+
catch(:raw_string_tag) {
|
11
|
+
return raw_string_internal('')
|
12
|
+
}
|
13
|
+
nil
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class Doc # :nodoc:
|
18
|
+
def raw_string_internal(result)
|
19
|
+
@children.each {|n|
|
20
|
+
n.raw_string_internal(result)
|
21
|
+
}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class Elem # :nodoc:
|
26
|
+
def raw_string_internal(result)
|
27
|
+
@stag.raw_string_internal(result)
|
28
|
+
@children.each {|n| n.raw_string_internal(result) }
|
29
|
+
@etag.raw_string_internal(result) if @etag
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module Tag # :nodoc:
|
34
|
+
def init_raw_string() @raw_string = nil end
|
35
|
+
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
|
36
|
+
def raw_string_internal(result)
|
37
|
+
throw :raw_string_tag if !@raw_string
|
38
|
+
result << @raw_string
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
module Leaf # :nodoc:
|
43
|
+
def init_raw_string() @raw_string = nil end
|
44
|
+
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
|
45
|
+
def raw_string_internal(result)
|
46
|
+
throw :raw_string_tag if !@raw_string
|
47
|
+
result << @raw_string
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Text # :nodoc:
|
52
|
+
def raw_string=(arg)
|
53
|
+
if arg == @rcdata then
|
54
|
+
@raw_string = @rcdata
|
55
|
+
else
|
56
|
+
super
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
module Node # :nodoc:
|
62
|
+
def eliminate_raw_string
|
63
|
+
raise NotImplementedError
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class Doc # :nodoc:
|
68
|
+
def eliminate_raw_string
|
69
|
+
Doc.new(@children.map {|c| c.eliminate_raw_string })
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class Elem # :nodoc:
|
74
|
+
def eliminate_raw_string
|
75
|
+
Elem.new!(
|
76
|
+
@stag.eliminate_raw_string,
|
77
|
+
@empty ? nil : @children.map {|c| c.eliminate_raw_string },
|
78
|
+
@etag && @etag.eliminate_raw_string)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
class Text # :nodoc:
|
83
|
+
def eliminate_raw_string
|
84
|
+
Text.new_internal(@rcdata)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class STag # :nodoc:
|
89
|
+
def eliminate_raw_string
|
90
|
+
STag.new(@qualified_name, @attributes, @inherited_context)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class ETag # :nodoc:
|
95
|
+
def eliminate_raw_string
|
96
|
+
self.class.new(@qualified_name)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
class XMLDecl # :nodoc:
|
101
|
+
def eliminate_raw_string
|
102
|
+
XMLDecl.new(@version, @encoding, @standalone)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
class DocType # :nodoc:
|
107
|
+
def eliminate_raw_string
|
108
|
+
DocType.new(@root_element_name, @public_identifier, @system_identifier)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
class ProcIns # :nodoc:
|
113
|
+
def eliminate_raw_string
|
114
|
+
ProcIns.new(@target, @content)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class Comment # :nodoc:
|
119
|
+
def eliminate_raw_string
|
120
|
+
Comment.new(@content)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
# :startdoc:
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# = REXML Tree Generator
|
2
|
+
#
|
3
|
+
# HTree::Node#to_rexml is used for converting HTree to REXML.
|
4
|
+
#
|
5
|
+
# == Method Summary
|
6
|
+
#
|
7
|
+
# - HTree::Node#to_rexml -> REXML::Child
|
8
|
+
#
|
9
|
+
# == Example
|
10
|
+
#
|
11
|
+
# HTree.parse(...).to_rexml #=> REXML::Document
|
12
|
+
#
|
13
|
+
# == Comparison between HTree and REXML.
|
14
|
+
#
|
15
|
+
# - HTree parser is permissive HTML/XML parser.
|
16
|
+
# REXML parser is strict XML parser.
|
17
|
+
# HTree is recommended if you need to parse realworld HTML.
|
18
|
+
# REXML is recommended if you need strict error checking.
|
19
|
+
# - HTree object is immutable.
|
20
|
+
# REXML object is mutable.
|
21
|
+
# REXML should be used if you need modification.
|
22
|
+
#
|
23
|
+
|
24
|
+
# :stopdoc:
|
25
|
+
require 'htree/modules'
|
26
|
+
require 'htree/output' # HTree::DocType#generate_content
|
27
|
+
|
28
|
+
module HTree # :nodoc:
|
29
|
+
module Node # :nodoc:
|
30
|
+
# convert to REXML tree.
|
31
|
+
def to_rexml
|
32
|
+
require 'rexml/document'
|
33
|
+
to_rexml_internal(nil, DefaultContext)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class Doc # :nodoc:
|
38
|
+
def to_rexml_internal(parent, context)
|
39
|
+
raise ArgumentError, "parent must be nil" if parent != nil
|
40
|
+
result = REXML::Document.new
|
41
|
+
self.children.each {|c|
|
42
|
+
c.to_rexml_internal(result, context)
|
43
|
+
}
|
44
|
+
result
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class Elem # :nodoc:
|
49
|
+
def to_rexml_internal(parent, context)
|
50
|
+
ename = self.element_name
|
51
|
+
ns_decl = {}
|
52
|
+
if context.namespace_uri(ename.namespace_prefix) != ename.namespace_uri
|
53
|
+
ns_decl[ename.namespace_prefix] = ename.namespace_uri
|
54
|
+
end
|
55
|
+
|
56
|
+
if ename.namespace_prefix
|
57
|
+
result = REXML::Element.new("#{ename.namespace_prefix}:#{ename.local_name}", parent)
|
58
|
+
else
|
59
|
+
result = REXML::Element.new(ename.local_name, parent)
|
60
|
+
end
|
61
|
+
|
62
|
+
self.each_attribute {|aname, atext|
|
63
|
+
if aname.namespace_prefix
|
64
|
+
if context.namespace_uri(aname.namespace_prefix) != aname.namespace_uri
|
65
|
+
ns_decl[aname.namespace_prefix] = aname.namespace_uri
|
66
|
+
end
|
67
|
+
result.add_attribute("#{aname.namespace_prefix}:#{aname.local_name}", atext.to_s)
|
68
|
+
else
|
69
|
+
result.add_attribute(aname.local_name, atext.to_s)
|
70
|
+
end
|
71
|
+
}
|
72
|
+
|
73
|
+
ns_decl.each {|k, v|
|
74
|
+
if k
|
75
|
+
result.add_namespace(k, v)
|
76
|
+
else
|
77
|
+
result.add_namespace(v)
|
78
|
+
end
|
79
|
+
}
|
80
|
+
context = context.subst_namespaces(ns_decl)
|
81
|
+
|
82
|
+
self.children.each {|c|
|
83
|
+
c.to_rexml_internal(result, context)
|
84
|
+
}
|
85
|
+
result
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class Text # :nodoc:
|
90
|
+
def to_rexml_internal(parent, context)
|
91
|
+
rcdata = self.rcdata.gsub(/[<>]/) { Encoder::ChRef[$&] }
|
92
|
+
REXML::Text.new(rcdata, true, parent, true)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class XMLDecl # :nodoc:
|
97
|
+
def to_rexml_internal(parent, context)
|
98
|
+
r = REXML::XMLDecl.new(self.version, self.encoding, self.standalone)
|
99
|
+
parent << r if parent
|
100
|
+
r
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
class DocType # :nodoc:
|
105
|
+
def to_rexml_internal(parent, context)
|
106
|
+
REXML::DocType.new([self.root_element_name, self.generate_content], parent)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class ProcIns # :nodoc:
|
111
|
+
def to_rexml_internal(parent, context)
|
112
|
+
r = REXML::Instruction.new(self.target, self.content)
|
113
|
+
parent << r if parent
|
114
|
+
r
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class Comment # :nodoc:
|
119
|
+
def to_rexml_internal(parent, context)
|
120
|
+
REXML::Comment.new(self.content, parent)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class BogusETag # :nodoc:
|
125
|
+
def to_rexml_internal(parent, context)
|
126
|
+
nil
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
# :startdoc:
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# :stopdoc:
|
2
|
+
require 'htree/htmlinfo'
|
3
|
+
require 'htree/regexp-util'
|
4
|
+
require 'htree/fstr'
|
5
|
+
|
6
|
+
module HTree # :nodoc:
|
7
|
+
module Pat # :nodoc:
|
8
|
+
NameChar = /[-A-Za-z0-9._:]/
|
9
|
+
Name = /[A-Za-z_:]#{NameChar}*/
|
10
|
+
Nmtoken = /#{NameChar}+/
|
11
|
+
|
12
|
+
Comment_C = /<!--(.*?)-->/m
|
13
|
+
Comment = Comment_C.disable_capture
|
14
|
+
CDATA_C = /<!\[CDATA\[(.*?)\]\]>/m
|
15
|
+
CDATA = CDATA_C.disable_capture
|
16
|
+
|
17
|
+
QuotedAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)')/
|
18
|
+
QuotedAttr = QuotedAttr_C.disable_capture
|
19
|
+
ValidAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)'|(#{NameChar}*))|(#{Nmtoken})/
|
20
|
+
ValidAttr = ValidAttr_C.disable_capture
|
21
|
+
InvalidAttr1_C = /(#{Name})\s*=\s*(?:'([^'<>]*)'|"([^"<>]*)"|([^\s<>"']*))|(#{Nmtoken})/
|
22
|
+
InvalidAttr1 = InvalidAttr1_C.disable_capture
|
23
|
+
InvalidAttr1End_C = /(#{Name})(?:\s*=\s*(?:'([^'<>]*)|"([^"<>]*)))/
|
24
|
+
InvalidAttr1End = InvalidAttr1End_C.disable_capture
|
25
|
+
|
26
|
+
QuotedStartTag_C = /<(#{Name})((?:\s+#{QuotedAttr})*)\s*>/
|
27
|
+
QuotedStartTag = QuotedStartTag_C.disable_capture
|
28
|
+
ValidStartTag_C = /<(#{Name})((?:\s+#{ValidAttr})*)\s*>/
|
29
|
+
ValidStartTag = ValidStartTag_C.disable_capture
|
30
|
+
InvalidStartTag_C = /<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*>/
|
31
|
+
InvalidStartTag = InvalidStartTag_C.disable_capture
|
32
|
+
StartTag = /#{QuotedStartTag}|#{ValidStartTag}|#{InvalidStartTag}/
|
33
|
+
|
34
|
+
QuotedEmptyTag_C = %r{<(#{Name})((?:\s+#{QuotedAttr})*)\s*/>}
|
35
|
+
QuotedEmptyTag = QuotedEmptyTag_C.disable_capture
|
36
|
+
ValidEmptyTag_C = %r{<(#{Name})((?:\s+#{ValidAttr})*)\s*/>}
|
37
|
+
ValidEmptyTag = ValidEmptyTag_C.disable_capture
|
38
|
+
InvalidEmptyTag_C = %r{<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*/>}
|
39
|
+
InvalidEmptyTag = InvalidEmptyTag_C.disable_capture
|
40
|
+
EmptyTag = /#{QuotedEmptyTag}|#{ValidEmptyTag}|#{InvalidEmptyTag}/
|
41
|
+
|
42
|
+
EndTag_C = %r{</(#{Name})\s*>}
|
43
|
+
EndTag = EndTag_C.disable_capture
|
44
|
+
|
45
|
+
XmlVersionNum = /[a-zA-Z0-9_.:-]+/
|
46
|
+
XmlVersionInfo_C = /\s+version\s*=\s*(?:'(#{XmlVersionNum})'|"(#{XmlVersionNum})")/
|
47
|
+
XmlVersionInfo = XmlVersionInfo_C.disable_capture
|
48
|
+
XmlEncName = /[A-Za-z][A-Za-z0-9._-]*/
|
49
|
+
XmlEncodingDecl_C = /\s+encoding\s*=\s*(?:"(#{XmlEncName})"|'(#{XmlEncName})')/
|
50
|
+
XmlEncodingDecl = XmlEncodingDecl_C.disable_capture
|
51
|
+
XmlSDDecl_C = /\s+standalone\s*=\s*(?:'(yes|no)'|"(yes|no)")/
|
52
|
+
XmlSDDecl = XmlSDDecl_C.disable_capture
|
53
|
+
XmlDecl_C = /<\?xml#{XmlVersionInfo_C}#{XmlEncodingDecl_C}?#{XmlSDDecl_C}?\s*\?>/
|
54
|
+
XmlDecl = /<\?xml#{XmlVersionInfo}#{XmlEncodingDecl}?#{XmlSDDecl}?\s*\?>/
|
55
|
+
|
56
|
+
# xxx: internal DTD subset is not recognized: '[' (markupdecl | DeclSep)* ']' S?)?
|
57
|
+
SystemLiteral_C = /"([^"]*)"|'([^']*)'/
|
58
|
+
PubidLiteral_C = %r{"([\sa-zA-Z0-9\-'()+,./:=?;!*\#@$_%]*)"|'([\sa-zA-Z0-9\-()+,./:=?;!*\#@$_%]*)'}
|
59
|
+
ExternalID_C = /(?:SYSTEM|PUBLIC\s+#{PubidLiteral_C})(?:\s+#{SystemLiteral_C})?/
|
60
|
+
DocType_C = /<!DOCTYPE\s+(#{Name})(?:\s+#{ExternalID_C})?\s*(?:\[.*?\]\s*)?>/m
|
61
|
+
DocType = DocType_C.disable_capture
|
62
|
+
|
63
|
+
XmlProcIns_C = /<\?(#{Name})(?:\s+(.*?))?\?>/m
|
64
|
+
XmlProcIns = XmlProcIns_C.disable_capture
|
65
|
+
#ProcIns = /<\?([^>]*)>/m
|
66
|
+
end
|
67
|
+
|
68
|
+
def HTree.scan(input, is_xml=false)
|
69
|
+
is_html = false
|
70
|
+
cdata_content = nil
|
71
|
+
text_start = 0
|
72
|
+
first_element = true
|
73
|
+
index_xmldecl = 1
|
74
|
+
index_doctype = 2
|
75
|
+
index_xmlprocins = 3
|
76
|
+
index_quotedstarttag = 4
|
77
|
+
index_quotedemptytag = 5
|
78
|
+
index_starttag = 6
|
79
|
+
index_endtag = 7
|
80
|
+
index_emptytag = 8
|
81
|
+
index_comment = 9
|
82
|
+
index_cdata = 10
|
83
|
+
input.scan(/(#{Pat::XmlDecl})
|
84
|
+
|(#{Pat::DocType})
|
85
|
+
|(#{Pat::XmlProcIns})
|
86
|
+
|(#{Pat::QuotedStartTag})
|
87
|
+
|(#{Pat::QuotedEmptyTag})
|
88
|
+
|(#{Pat::StartTag})
|
89
|
+
|(#{Pat::EndTag})
|
90
|
+
|(#{Pat::EmptyTag})
|
91
|
+
|(#{Pat::Comment})
|
92
|
+
|(#{Pat::CDATA})
|
93
|
+
/ox) {
|
94
|
+
match = $~
|
95
|
+
if cdata_content
|
96
|
+
str = $&
|
97
|
+
if match.begin(index_endtag) && str[Pat::Name] == cdata_content
|
98
|
+
text_end = match.begin(0)
|
99
|
+
if text_start < text_end
|
100
|
+
yield [:text_cdata_content, HTree.frozen_string(input[text_start...text_end])]
|
101
|
+
end
|
102
|
+
yield [:etag, HTree.frozen_string(str)]
|
103
|
+
text_start = match.end(0)
|
104
|
+
cdata_content = nil
|
105
|
+
end
|
106
|
+
else
|
107
|
+
str = match[0]
|
108
|
+
text_end = match.begin(0)
|
109
|
+
if text_start < text_end
|
110
|
+
yield [:text_pcdata, HTree.frozen_string(input[text_start...text_end])]
|
111
|
+
end
|
112
|
+
text_start = match.end(0)
|
113
|
+
if match.begin(index_xmldecl)
|
114
|
+
yield [:xmldecl, HTree.frozen_string(str)]
|
115
|
+
is_xml = true
|
116
|
+
elsif match.begin(index_doctype)
|
117
|
+
Pat::DocType_C =~ str
|
118
|
+
root_element_name = $1
|
119
|
+
public_identifier = $2 || $3
|
120
|
+
system_identifier = $4 || $5
|
121
|
+
is_html = true if /\Ahtml\z/i =~ root_element_name
|
122
|
+
is_xml = true if public_identifier && %r{\A-//W3C//DTD XHTML } =~ public_identifier
|
123
|
+
yield [:doctype, HTree.frozen_string(str)]
|
124
|
+
elsif match.begin(index_xmlprocins)
|
125
|
+
yield [:procins, HTree.frozen_string(str)]
|
126
|
+
elsif match.begin(index_starttag) || match.begin(index_quotedstarttag)
|
127
|
+
yield stag = [:stag, HTree.frozen_string(str)]
|
128
|
+
tagname = str[Pat::Name]
|
129
|
+
if first_element
|
130
|
+
if /\A(?:html|head|title|isindex|base|script|style|meta|link|object)\z/i =~ tagname
|
131
|
+
is_html = true
|
132
|
+
else
|
133
|
+
is_xml = true
|
134
|
+
end
|
135
|
+
first_element = false
|
136
|
+
end
|
137
|
+
if !is_xml && ElementContent[tagname] == :CDATA
|
138
|
+
cdata_content = tagname
|
139
|
+
end
|
140
|
+
elsif match.begin(index_endtag)
|
141
|
+
yield [:etag, HTree.frozen_string(str)]
|
142
|
+
elsif match.begin(index_emptytag) || match.begin(index_quotedemptytag)
|
143
|
+
yield [:emptytag, HTree.frozen_string(str)]
|
144
|
+
first_element = false
|
145
|
+
#is_xml = true
|
146
|
+
elsif match.begin(index_comment)
|
147
|
+
yield [:comment, HTree.frozen_string(str)]
|
148
|
+
elsif match.begin(index_cdata)
|
149
|
+
yield [:text_cdata_section, HTree.frozen_string(str)]
|
150
|
+
else
|
151
|
+
raise Exception, "unknown match [bug]"
|
152
|
+
end
|
153
|
+
end
|
154
|
+
}
|
155
|
+
text_end = input.length
|
156
|
+
if text_start < text_end
|
157
|
+
if cdata_content
|
158
|
+
yield [:text_cdata_content, HTree.frozen_string(input[text_start...text_end])]
|
159
|
+
else
|
160
|
+
yield [:text_pcdata, HTree.frozen_string(input[text_start...text_end])]
|
161
|
+
end
|
162
|
+
end
|
163
|
+
return is_xml, is_html
|
164
|
+
end
|
165
|
+
end
|
166
|
+
# :startdoc:
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# :stopdoc:
|
2
|
+
require 'htree/raw_string'
|
3
|
+
require 'htree/text'
|
4
|
+
require 'htree/scan' # for Pat::Name and Pat::Nmtoken
|
5
|
+
require 'htree/context'
|
6
|
+
require 'htree/name'
|
7
|
+
require 'htree/fstr'
|
8
|
+
|
9
|
+
module HTree # :nodoc:
|
10
|
+
class STag # :nodoc:
|
11
|
+
def initialize(name, attributes=[], inherited_context=DefaultContext)
|
12
|
+
init_raw_string
|
13
|
+
# normalize xml declaration name and attribute value.
|
14
|
+
attributes = attributes.map {|aname, val|
|
15
|
+
if !(Name === aname) && /\A(?:#{Pat::Name}?\{.*\})?#{Pat::Nmtoken}\z/o !~ aname
|
16
|
+
raise HTree::Error, "invalid attribute name: #{aname.inspect}"
|
17
|
+
end
|
18
|
+
if !(Name === aname) && /\Axmlns(?:\z|:)/ =~ aname
|
19
|
+
aname = Name.parse_attribute_name(aname, nil)
|
20
|
+
end
|
21
|
+
val = val.to_node if HTree::Location === val
|
22
|
+
val = Text.new(val) unless Text === val
|
23
|
+
[aname, val]
|
24
|
+
}
|
25
|
+
|
26
|
+
@inherited_context = inherited_context
|
27
|
+
@xmlns_decls = {}
|
28
|
+
|
29
|
+
# validate namespace consistency of given Name objects.
|
30
|
+
if Name === name
|
31
|
+
@xmlns_decls[name.namespace_prefix] = name.namespace_uri
|
32
|
+
end
|
33
|
+
attributes.each {|aname, text|
|
34
|
+
next unless Name === aname
|
35
|
+
next if aname.xmlns?
|
36
|
+
if aname.namespace_prefix && aname.namespace_uri
|
37
|
+
if @xmlns_decls.include? aname.namespace_prefix
|
38
|
+
if @xmlns_decls[aname.namespace_prefix] != aname.namespace_uri
|
39
|
+
raise ArgumentError, "inconsistent namespace use: #{aname.namespace_prefix} is used as #{@xmlns_decls[aname.namespace_prefix]} and #{aname.namespace_uri}"
|
40
|
+
end
|
41
|
+
else
|
42
|
+
@xmlns_decls[aname.namespace_prefix] = aname.namespace_uri
|
43
|
+
end
|
44
|
+
end
|
45
|
+
}
|
46
|
+
|
47
|
+
attributes.each {|aname, text|
|
48
|
+
next unless Name === aname
|
49
|
+
next unless aname.xmlns?
|
50
|
+
next if @xmlns_decls.include? aname.local_name
|
51
|
+
if aname.local_name
|
52
|
+
@xmlns_decls[aname.local_name] = text.to_s
|
53
|
+
else
|
54
|
+
uri = text.to_s
|
55
|
+
@xmlns_decls[nil] = uri
|
56
|
+
end
|
57
|
+
}
|
58
|
+
|
59
|
+
@context = make_context(@inherited_context)
|
60
|
+
|
61
|
+
if Name === name
|
62
|
+
@name = name
|
63
|
+
else
|
64
|
+
@name = Name.parse_element_name(name, @context)
|
65
|
+
end
|
66
|
+
|
67
|
+
@attributes = attributes.map {|aname, text|
|
68
|
+
aname = Name.parse_attribute_name(aname, @context) unless Name === aname
|
69
|
+
if !aname.namespace_prefix && !aname.namespace_uri.empty?
|
70
|
+
# xxx: should recover error?
|
71
|
+
raise HTree::Error, "global attribute without namespace prefix: #{aname.inspect}"
|
72
|
+
end
|
73
|
+
[aname, text]
|
74
|
+
}
|
75
|
+
@attributes.freeze
|
76
|
+
end
|
77
|
+
attr_reader :attributes, :inherited_context, :context
|
78
|
+
|
79
|
+
def element_name
|
80
|
+
@name
|
81
|
+
end
|
82
|
+
|
83
|
+
def make_context(inherited_context)
|
84
|
+
inherited_context.subst_namespaces(@xmlns_decls)
|
85
|
+
end
|
86
|
+
|
87
|
+
def each_namespace_attribute
|
88
|
+
@xmlns_decls.each {|name, uri|
|
89
|
+
yield name, uri
|
90
|
+
}
|
91
|
+
nil
|
92
|
+
end
|
93
|
+
|
94
|
+
def each_attribute
|
95
|
+
@attributes.each {|name, text|
|
96
|
+
next if name.xmlns?
|
97
|
+
yield name, text
|
98
|
+
}
|
99
|
+
nil
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class ETag # :nodoc:
|
104
|
+
def initialize(qualified_name)
|
105
|
+
init_raw_string
|
106
|
+
@qualified_name = HTree.frozen_string(qualified_name)
|
107
|
+
end
|
108
|
+
attr_reader :qualified_name
|
109
|
+
end
|
110
|
+
end
|
111
|
+
# :startdoc:
|