rubysl-rexml 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/lib/rexml/attlistdecl.rb +56 -56
- data/lib/rexml/attribute.rb +155 -149
- data/lib/rexml/cdata.rb +48 -48
- data/lib/rexml/child.rb +82 -82
- data/lib/rexml/comment.rb +59 -59
- data/lib/rexml/doctype.rb +22 -24
- data/lib/rexml/document.rb +185 -129
- data/lib/rexml/dtd/attlistdecl.rb +7 -7
- data/lib/rexml/dtd/dtd.rb +41 -41
- data/lib/rexml/dtd/elementdecl.rb +13 -13
- data/lib/rexml/dtd/entitydecl.rb +49 -49
- data/lib/rexml/dtd/notationdecl.rb +32 -32
- data/lib/rexml/element.rb +122 -107
- data/lib/rexml/encoding.rb +37 -58
- data/lib/rexml/entity.rb +144 -144
- data/lib/rexml/formatters/default.rb +6 -4
- data/lib/rexml/formatters/pretty.rb +11 -8
- data/lib/rexml/formatters/transitive.rb +4 -3
- data/lib/rexml/functions.rb +33 -21
- data/lib/rexml/instruction.rb +49 -49
- data/lib/rexml/light/node.rb +190 -191
- data/lib/rexml/namespace.rb +39 -39
- data/lib/rexml/node.rb +38 -38
- data/lib/rexml/output.rb +17 -12
- data/lib/rexml/parent.rb +26 -25
- data/lib/rexml/parseexception.rb +4 -4
- data/lib/rexml/parsers/baseparser.rb +90 -61
- data/lib/rexml/parsers/lightparser.rb +41 -43
- data/lib/rexml/parsers/pullparser.rb +1 -1
- data/lib/rexml/parsers/sax2parser.rb +233 -198
- data/lib/rexml/parsers/streamparser.rb +6 -2
- data/lib/rexml/parsers/treeparser.rb +9 -6
- data/lib/rexml/parsers/ultralightparser.rb +40 -40
- data/lib/rexml/parsers/xpathparser.rb +51 -52
- data/lib/rexml/quickpath.rb +247 -248
- data/lib/rexml/rexml.rb +9 -10
- data/lib/rexml/sax2listener.rb +92 -92
- data/lib/rexml/security.rb +27 -0
- data/lib/rexml/source.rb +95 -50
- data/lib/rexml/streamlistener.rb +90 -90
- data/lib/rexml/syncenumerator.rb +3 -4
- data/lib/rexml/text.rb +157 -76
- data/lib/rexml/validation/relaxng.rb +18 -18
- data/lib/rexml/validation/validation.rb +5 -5
- data/lib/rexml/xmldecl.rb +59 -63
- data/lib/rexml/xmltokens.rb +14 -14
- data/lib/rexml/xpath.rb +67 -53
- data/lib/rexml/xpath_parser.rb +49 -38
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +1 -1
- data/rubysl-rexml.gemspec +3 -1
- metadata +19 -28
- data/lib/rexml/encodings/CP-1252.rb +0 -103
- data/lib/rexml/encodings/EUC-JP.rb +0 -35
- data/lib/rexml/encodings/ICONV.rb +0 -22
- data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
- data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
- data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
- data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
- data/lib/rexml/encodings/UNILE.rb +0 -34
- data/lib/rexml/encodings/US-ASCII.rb +0 -30
- data/lib/rexml/encodings/UTF-16.rb +0 -35
- data/lib/rexml/encodings/UTF-8.rb +0 -18
data/lib/rexml/streamlistener.rb
CHANGED
@@ -1,92 +1,92 @@
|
|
1
1
|
module REXML
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
2
|
+
# A template for stream parser listeners.
|
3
|
+
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
|
4
|
+
# processed; REXML doesn't yet handle doctype entity declarations, so you
|
5
|
+
# have to parse them out yourself.
|
6
|
+
module StreamListener
|
7
|
+
# Called when a tag is encountered.
|
8
|
+
# @p name the tag name
|
9
|
+
# @p attrs an array of arrays of attribute/value pairs, suitable for
|
10
|
+
# use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
|
11
|
+
# will result in
|
12
|
+
# tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
|
13
|
+
def tag_start name, attrs
|
14
|
+
end
|
15
|
+
# Called when the end tag is reached. In the case of <tag/>, tag_end
|
16
|
+
# will be called immidiately after tag_start
|
17
|
+
# @p the name of the tag
|
18
|
+
def tag_end name
|
19
|
+
end
|
20
|
+
# Called when text is encountered in the document
|
21
|
+
# @p text the text content.
|
22
|
+
def text text
|
23
|
+
end
|
24
|
+
# Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
|
25
|
+
# @p name the instruction name; in the example, "xsl"
|
26
|
+
# @p instruction the rest of the instruction. In the example,
|
27
|
+
# "sheet='foo'"
|
28
|
+
def instruction name, instruction
|
29
|
+
end
|
30
|
+
# Called when a comment is encountered.
|
31
|
+
# @p comment The content of the comment
|
32
|
+
def comment comment
|
33
|
+
end
|
34
|
+
# Handles a doctype declaration. Any attributes of the doctype which are
|
35
|
+
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
|
36
|
+
# @p name the name of the doctype; EG, "me"
|
37
|
+
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
|
38
|
+
# @p long_name the supplied long name, or nil. EG, "foo"
|
39
|
+
# @p uri the uri of the doctype, or nil. EG, "bar"
|
40
|
+
def doctype name, pub_sys, long_name, uri
|
41
|
+
end
|
42
|
+
# Called when the doctype is done
|
43
|
+
def doctype_end
|
44
|
+
end
|
45
|
+
# If a doctype includes an ATTLIST declaration, it will cause this
|
46
|
+
# method to be called. The content is the declaration itself, unparsed.
|
47
|
+
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
48
|
+
# attr CDATA #REQUIRED". This is the same for all of the .*decl
|
49
|
+
# methods.
|
50
|
+
def attlistdecl element_name, attributes, raw_content
|
51
|
+
end
|
52
|
+
# <!ELEMENT ...>
|
53
|
+
def elementdecl content
|
54
|
+
end
|
55
|
+
# <!ENTITY ...>
|
56
|
+
# The argument passed to this method is an array of the entity
|
57
|
+
# declaration. It can be in a number of formats, but in general it
|
58
|
+
# returns (example, result):
|
59
|
+
# <!ENTITY % YN '"Yes"'>
|
60
|
+
# ["YN", "\"Yes\"", "%"]
|
61
|
+
# <!ENTITY % YN 'Yes'>
|
62
|
+
# ["YN", "Yes", "%"]
|
63
|
+
# <!ENTITY WhatHeSaid "He said %YN;">
|
64
|
+
# ["WhatHeSaid", "He said %YN;"]
|
65
|
+
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
66
|
+
# ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
|
67
|
+
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
68
|
+
# ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
|
69
|
+
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
|
70
|
+
# ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "gif"]
|
71
|
+
def entitydecl content
|
72
|
+
end
|
73
|
+
# <!NOTATION ...>
|
74
|
+
def notationdecl content
|
75
|
+
end
|
76
|
+
# Called when %foo; is encountered in a doctype declaration.
|
77
|
+
# @p content "foo"
|
78
|
+
def entity content
|
79
|
+
end
|
80
|
+
# Called when <![CDATA[ ... ]]> is encountered in a document.
|
81
|
+
# @p content "..."
|
82
|
+
def cdata content
|
83
|
+
end
|
84
|
+
# Called when an XML PI is encountered in the document.
|
85
|
+
# EG: <?xml version="1.0" encoding="utf"?>
|
86
|
+
# @p version the version attribute value. EG, "1.0"
|
87
|
+
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
88
|
+
# @p standalone the standalone attribute value, or nil. EG, nil
|
89
|
+
def xmldecl version, encoding, standalone
|
90
|
+
end
|
91
|
+
end
|
92
92
|
end
|
data/lib/rexml/syncenumerator.rb
CHANGED
@@ -6,8 +6,7 @@ module REXML
|
|
6
6
|
# Enumerable objects.
|
7
7
|
def initialize(*enums)
|
8
8
|
@gens = enums
|
9
|
-
@
|
10
|
-
@gens.each {|x| @biggest = x if x.size > @biggest.size }
|
9
|
+
@length = @gens.collect {|x| x.size }.max
|
11
10
|
end
|
12
11
|
|
13
12
|
# Returns the number of enumerated Enumerable objects, i.e. the size
|
@@ -24,8 +23,8 @@ module REXML
|
|
24
23
|
|
25
24
|
# Enumerates rows of the Enumerable objects.
|
26
25
|
def each
|
27
|
-
@
|
28
|
-
yield
|
26
|
+
@length.times {|i|
|
27
|
+
yield @gens.collect {|x| x[i]}
|
29
28
|
}
|
30
29
|
self
|
31
30
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'rexml/security'
|
1
2
|
require 'rexml/entity'
|
2
3
|
require 'rexml/doctype'
|
3
4
|
require 'rexml/child'
|
@@ -18,25 +19,57 @@ module REXML
|
|
18
19
|
# If +raw+ is true, then REXML leaves the value alone
|
19
20
|
attr_accessor :raw
|
20
21
|
|
21
|
-
|
22
|
-
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
22
|
+
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
|
23
|
+
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
24
|
+
VALID_CHAR = [
|
25
|
+
0x9, 0xA, 0xD,
|
26
|
+
(0x20..0xD7FF),
|
27
|
+
(0xE000..0xFFFD),
|
28
|
+
(0x10000..0x10FFFF)
|
29
|
+
]
|
30
|
+
|
31
|
+
if String.method_defined? :encode
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Fixnum
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
42
|
+
else
|
43
|
+
VALID_XML_CHARS = /^(
|
44
|
+
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
45
|
+
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
46
|
+
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
47
|
+
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
48
|
+
| \xEF[\x80-\xBE]{2} #
|
49
|
+
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
50
|
+
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
51
|
+
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
52
|
+
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
53
|
+
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
54
|
+
)*$/nx;
|
55
|
+
end
|
23
56
|
|
24
57
|
# Constructor
|
25
58
|
# +arg+ if a String, the content is set to the String. If a Text,
|
26
|
-
# the object is shallowly cloned.
|
59
|
+
# the object is shallowly cloned.
|
27
60
|
#
|
28
61
|
# +respect_whitespace+ (boolean, false) if true, whitespace is
|
29
62
|
# respected
|
30
63
|
#
|
31
64
|
# +parent+ (nil) if this is a Parent object, the parent
|
32
|
-
# will be set to this.
|
65
|
+
# will be set to this.
|
33
66
|
#
|
34
67
|
# +raw+ (nil) This argument can be given three values.
|
35
|
-
# If true, then the value of used to construct this object is expected to
|
36
|
-
# contain no unescaped XML markup, and REXML will not change the text. If
|
68
|
+
# If true, then the value of used to construct this object is expected to
|
69
|
+
# contain no unescaped XML markup, and REXML will not change the text. If
|
37
70
|
# this value is false, the string may contain any characters, and REXML will
|
38
71
|
# escape any and all defined entities whose values are contained in the
|
39
|
-
# text. If this value is nil (the default), then the raw value of the
|
72
|
+
# text. If this value is nil (the default), then the raw value of the
|
40
73
|
# parent will be used as the raw value for this node. If there is no raw
|
41
74
|
# value for the parent, and no value is supplied, the default is false.
|
42
75
|
# Use this field if you have entities defined for some text, and you don't
|
@@ -56,25 +89,24 @@ module REXML
|
|
56
89
|
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
|
57
90
|
# In the last example, the +entity_filter+ argument is ignored.
|
58
91
|
#
|
59
|
-
# +
|
60
|
-
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
61
|
-
entity_filter=nil, illegal=
|
92
|
+
# +illegal+ INTERNAL USE ONLY
|
93
|
+
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
94
|
+
entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
|
62
95
|
|
63
96
|
@raw = false
|
97
|
+
@parent = nil
|
64
98
|
|
65
99
|
if parent
|
66
100
|
super( parent )
|
67
|
-
@raw = parent.raw
|
68
|
-
else
|
69
|
-
@parent = nil
|
101
|
+
@raw = parent.raw
|
70
102
|
end
|
71
103
|
|
72
104
|
@raw = raw unless raw.nil?
|
73
105
|
@entity_filter = entity_filter
|
74
|
-
|
106
|
+
clear_cache
|
75
107
|
|
76
108
|
if arg.kind_of? String
|
77
|
-
@string = arg.
|
109
|
+
@string = arg.dup
|
78
110
|
@string.squeeze!(" \n\t") unless respect_whitespace
|
79
111
|
elsif arg.kind_of? Text
|
80
112
|
@string = arg.to_s
|
@@ -85,10 +117,55 @@ module REXML
|
|
85
117
|
|
86
118
|
@string.gsub!( /\r\n?/, "\n" )
|
87
119
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
120
|
+
Text.check(@string, illegal, doctype) if @raw
|
121
|
+
end
|
122
|
+
|
123
|
+
def parent= parent
|
124
|
+
super(parent)
|
125
|
+
Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
|
126
|
+
end
|
127
|
+
|
128
|
+
# check for illegal characters
|
129
|
+
def Text.check string, pattern, doctype
|
130
|
+
|
131
|
+
# illegal anywhere
|
132
|
+
if string !~ VALID_XML_CHARS
|
133
|
+
if String.method_defined? :encode
|
134
|
+
string.chars.each do |c|
|
135
|
+
case c.ord
|
136
|
+
when *VALID_CHAR
|
137
|
+
else
|
138
|
+
raise "Illegal character #{c.inspect} in raw string \"#{string}\""
|
139
|
+
end
|
140
|
+
end
|
141
|
+
else
|
142
|
+
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
143
|
+
case c.unpack('U')
|
144
|
+
when *VALID_CHAR
|
145
|
+
else
|
146
|
+
raise "Illegal character #{c.inspect} in raw string \"#{string}\""
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# context sensitive
|
153
|
+
string.scan(pattern) do
|
154
|
+
if $1[-1] != ?;
|
155
|
+
raise "Illegal character '#{$1}' in raw string \"#{string}\""
|
156
|
+
elsif $1[0] == ?&
|
157
|
+
if $5 and $5[0] == ?#
|
158
|
+
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
|
159
|
+
when *VALID_CHAR
|
160
|
+
else
|
161
|
+
raise "Illegal character '#{$1}' in raw string \"#{string}\""
|
162
|
+
end
|
163
|
+
# FIXME: below can't work but this needs API change.
|
164
|
+
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
165
|
+
# if !doctype or !doctype.entities.has_key?($3)
|
166
|
+
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
167
|
+
# end
|
168
|
+
end
|
92
169
|
end
|
93
170
|
end
|
94
171
|
end
|
@@ -109,8 +186,13 @@ module REXML
|
|
109
186
|
|
110
187
|
# Appends text to this text node. The text is appended in the +raw+ mode
|
111
188
|
# of this text node.
|
189
|
+
#
|
190
|
+
# +returns+ the text itself to enable method chain like
|
191
|
+
# 'text << "XXX" << "YYY"'.
|
112
192
|
def <<( to_append )
|
113
193
|
@string << to_append.gsub( /\r\n?/, "\n" )
|
194
|
+
clear_cache
|
195
|
+
self
|
114
196
|
end
|
115
197
|
|
116
198
|
|
@@ -120,17 +202,24 @@ module REXML
|
|
120
202
|
to_s() <=> other.to_s
|
121
203
|
end
|
122
204
|
|
205
|
+
def doctype
|
206
|
+
if @parent
|
207
|
+
doc = @parent.document
|
208
|
+
doc.doctype if doc
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
123
212
|
REFERENCE = /#{Entity::REFERENCE}/
|
124
213
|
# Returns the string value of this text node. This string is always
|
125
214
|
# escaped, meaning that it is a valid XML text node string, and all
|
126
215
|
# entities that can be escaped, have been inserted. This method respects
|
127
216
|
# the entity filter set in the constructor.
|
128
|
-
#
|
129
|
-
# # Assume that the entity "s" is defined to be "sean", and that the
|
217
|
+
#
|
218
|
+
# # Assume that the entity "s" is defined to be "sean", and that the
|
130
219
|
# # entity "r" is defined to be "russell"
|
131
|
-
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
220
|
+
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
132
221
|
# t.to_s #-> "< & &s; russell"
|
133
|
-
# t = Text.new( "< & &s; russell", false, nil, false )
|
222
|
+
# t = Text.new( "< & &s; russell", false, nil, false )
|
134
223
|
# t.to_s #-> "< & &s; russell"
|
135
224
|
# u = Text.new( "sean russell", false, nil, true )
|
136
225
|
# u.to_s #-> "sean russell"
|
@@ -138,12 +227,6 @@ module REXML
|
|
138
227
|
return @string if @raw
|
139
228
|
return @normalized if @normalized
|
140
229
|
|
141
|
-
doctype = nil
|
142
|
-
if @parent
|
143
|
-
doc = @parent.document
|
144
|
-
doctype = doc.doctype if doc
|
145
|
-
end
|
146
|
-
|
147
230
|
@normalized = Text::normalize( @string, doctype, @entity_filter )
|
148
231
|
end
|
149
232
|
|
@@ -156,25 +239,20 @@ module REXML
|
|
156
239
|
# console. This ignores the 'raw' attribute setting, and any
|
157
240
|
# entity_filter.
|
158
241
|
#
|
159
|
-
# # Assume that the entity "s" is defined to be "sean", and that the
|
242
|
+
# # Assume that the entity "s" is defined to be "sean", and that the
|
160
243
|
# # entity "r" is defined to be "russell"
|
161
|
-
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
244
|
+
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
162
245
|
# t.value #-> "< & sean russell"
|
163
246
|
# t = Text.new( "< & &s; russell", false, nil, false )
|
164
247
|
# t.value #-> "< & sean russell"
|
165
248
|
# u = Text.new( "sean russell", false, nil, true )
|
166
249
|
# u.value #-> "sean russell"
|
167
250
|
def value
|
168
|
-
@unnormalized if @unnormalized
|
169
|
-
doctype = nil
|
170
|
-
if @parent
|
171
|
-
doc = @parent.document
|
172
|
-
doctype = doc.doctype if doc
|
173
|
-
end
|
251
|
+
return @unnormalized if @unnormalized
|
174
252
|
@unnormalized = Text::unnormalize( @string, doctype )
|
175
253
|
end
|
176
254
|
|
177
|
-
# Sets the contents of this text node. This expects the text to be
|
255
|
+
# Sets the contents of this text node. This expects the text to be
|
178
256
|
# unnormalized. It returns self.
|
179
257
|
#
|
180
258
|
# e = Element.new( "a" )
|
@@ -183,11 +261,10 @@ module REXML
|
|
183
261
|
# e[0].value = "<a>" # <a><a></a>
|
184
262
|
def value=( val )
|
185
263
|
@string = val.gsub( /\r\n?/, "\n" )
|
186
|
-
|
187
|
-
@normalized = nil
|
264
|
+
clear_cache
|
188
265
|
@raw = false
|
189
266
|
end
|
190
|
-
|
267
|
+
|
191
268
|
def wrap(string, width, addnewline=false)
|
192
269
|
# Recursively wrap string at width.
|
193
270
|
return string if string.length <= width
|
@@ -202,7 +279,7 @@ module REXML
|
|
202
279
|
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
203
280
|
return string if level < 0
|
204
281
|
new_string = ''
|
205
|
-
string.
|
282
|
+
string.each_line { |line|
|
206
283
|
indent_string = style * level
|
207
284
|
new_line = (indent_string + line).sub(/[\s]+$/,'')
|
208
285
|
new_string << new_line
|
@@ -210,11 +287,11 @@ module REXML
|
|
210
287
|
new_string.strip! unless indentfirstline
|
211
288
|
return new_string
|
212
289
|
end
|
213
|
-
|
290
|
+
|
214
291
|
# == DEPRECATED
|
215
292
|
# See REXML::Formatters
|
216
293
|
#
|
217
|
-
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
294
|
+
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
218
295
|
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
|
219
296
|
formatter = if indent > -1
|
220
297
|
REXML::Formatters::Pretty.new( indent )
|
@@ -258,6 +335,12 @@ module REXML
|
|
258
335
|
out << copy
|
259
336
|
end
|
260
337
|
|
338
|
+
private
|
339
|
+
def clear_cache
|
340
|
+
@normalized = nil
|
341
|
+
@unnormalized = nil
|
342
|
+
end
|
343
|
+
|
261
344
|
# Reads text, substituting entities
|
262
345
|
def Text::read_with_substitution( input, illegal=nil )
|
263
346
|
copy = input.clone
|
@@ -265,7 +348,7 @@ module REXML
|
|
265
348
|
if copy =~ illegal
|
266
349
|
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
|
267
350
|
end if illegal
|
268
|
-
|
351
|
+
|
269
352
|
copy.gsub!( /\r\n?/, "\n" )
|
270
353
|
if copy.include? ?&
|
271
354
|
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
|
@@ -273,7 +356,7 @@ module REXML
|
|
273
356
|
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
|
274
357
|
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
|
275
358
|
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
|
276
|
-
copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {
|
359
|
+
copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {
|
277
360
|
m=$1
|
278
361
|
#m='0' if m==''
|
279
362
|
m = "0#{m}" if m[0] == ?x
|
@@ -293,9 +376,9 @@ module REXML
|
|
293
376
|
if doctype
|
294
377
|
# Replace all ampersands that aren't part of an entity
|
295
378
|
doctype.entities.each_value do |entity|
|
296
|
-
copy = copy.gsub( entity.value,
|
297
|
-
"&#{entity.name};" ) if entity.value and
|
298
|
-
not( entity_filter and entity_filter.include?(entity) )
|
379
|
+
copy = copy.gsub( entity.value,
|
380
|
+
"&#{entity.name};" ) if entity.value and
|
381
|
+
not( entity_filter and entity_filter.include?(entity.name) )
|
299
382
|
end
|
300
383
|
else
|
301
384
|
# Replace all ampersands that aren't part of an entity
|
@@ -308,37 +391,35 @@ module REXML
|
|
308
391
|
|
309
392
|
# Unescapes all possible entities
|
310
393
|
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
394
|
+
sum = 0
|
395
|
+
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
396
|
+
s = Text.expand($&, doctype, filter)
|
397
|
+
if sum + s.bytesize > Security.entity_expansion_text_limit
|
398
|
+
raise "entity expansion has grown too large"
|
399
|
+
else
|
400
|
+
sum += s.bytesize
|
401
|
+
end
|
402
|
+
s
|
319
403
|
}
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
re = /&#{entity_reference};/
|
327
|
-
rv.gsub!( re, entity_value ) if entity_value
|
328
|
-
end
|
329
|
-
end
|
404
|
+
end
|
405
|
+
|
406
|
+
def Text.expand(ref, doctype, filter)
|
407
|
+
if ref[1] == ?#
|
408
|
+
if ref[2] == ?x
|
409
|
+
[ref[3...-1].to_i(16)].pack('U*')
|
330
410
|
else
|
331
|
-
|
332
|
-
unless filter and filter.include?(entity_reference)
|
333
|
-
entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
|
334
|
-
re = /&#{entity_reference};/
|
335
|
-
rv.gsub!( re, entity_value.value ) if entity_value
|
336
|
-
end
|
337
|
-
end
|
411
|
+
[ref[2...-1].to_i].pack('U*')
|
338
412
|
end
|
339
|
-
|
413
|
+
elsif ref == '&'
|
414
|
+
'&'
|
415
|
+
elsif filter and filter.include?( ref[1...-1] )
|
416
|
+
ref
|
417
|
+
elsif doctype
|
418
|
+
doctype.entity( ref[1...-1] ) or ref
|
419
|
+
else
|
420
|
+
entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
|
421
|
+
entity_value ? entity_value.value : ref
|
340
422
|
end
|
341
|
-
rv
|
342
423
|
end
|
343
424
|
end
|
344
425
|
end
|