rubysl-rexml 1.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/lib/rexml/attlistdecl.rb +56 -56
- data/lib/rexml/attribute.rb +155 -149
- data/lib/rexml/cdata.rb +48 -48
- data/lib/rexml/child.rb +82 -82
- data/lib/rexml/comment.rb +59 -59
- data/lib/rexml/doctype.rb +22 -24
- data/lib/rexml/document.rb +185 -129
- data/lib/rexml/dtd/attlistdecl.rb +7 -7
- data/lib/rexml/dtd/dtd.rb +41 -41
- data/lib/rexml/dtd/elementdecl.rb +13 -13
- data/lib/rexml/dtd/entitydecl.rb +49 -49
- data/lib/rexml/dtd/notationdecl.rb +32 -32
- data/lib/rexml/element.rb +122 -107
- data/lib/rexml/encoding.rb +37 -58
- data/lib/rexml/entity.rb +144 -144
- data/lib/rexml/formatters/default.rb +6 -4
- data/lib/rexml/formatters/pretty.rb +11 -8
- data/lib/rexml/formatters/transitive.rb +4 -3
- data/lib/rexml/functions.rb +33 -21
- data/lib/rexml/instruction.rb +49 -49
- data/lib/rexml/light/node.rb +190 -191
- data/lib/rexml/namespace.rb +39 -39
- data/lib/rexml/node.rb +38 -38
- data/lib/rexml/output.rb +17 -12
- data/lib/rexml/parent.rb +26 -25
- data/lib/rexml/parseexception.rb +4 -4
- data/lib/rexml/parsers/baseparser.rb +90 -61
- data/lib/rexml/parsers/lightparser.rb +41 -43
- data/lib/rexml/parsers/pullparser.rb +1 -1
- data/lib/rexml/parsers/sax2parser.rb +233 -198
- data/lib/rexml/parsers/streamparser.rb +6 -2
- data/lib/rexml/parsers/treeparser.rb +9 -6
- data/lib/rexml/parsers/ultralightparser.rb +40 -40
- data/lib/rexml/parsers/xpathparser.rb +51 -52
- data/lib/rexml/quickpath.rb +247 -248
- data/lib/rexml/rexml.rb +9 -10
- data/lib/rexml/sax2listener.rb +92 -92
- data/lib/rexml/security.rb +27 -0
- data/lib/rexml/source.rb +95 -50
- data/lib/rexml/streamlistener.rb +90 -90
- data/lib/rexml/syncenumerator.rb +3 -4
- data/lib/rexml/text.rb +157 -76
- data/lib/rexml/validation/relaxng.rb +18 -18
- data/lib/rexml/validation/validation.rb +5 -5
- data/lib/rexml/xmldecl.rb +59 -63
- data/lib/rexml/xmltokens.rb +14 -14
- data/lib/rexml/xpath.rb +67 -53
- data/lib/rexml/xpath_parser.rb +49 -38
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +1 -1
- data/rubysl-rexml.gemspec +3 -1
- metadata +19 -28
- data/lib/rexml/encodings/CP-1252.rb +0 -103
- data/lib/rexml/encodings/EUC-JP.rb +0 -35
- data/lib/rexml/encodings/ICONV.rb +0 -22
- data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
- data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
- data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
- data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
- data/lib/rexml/encodings/UNILE.rb +0 -34
- data/lib/rexml/encodings/US-ASCII.rb +0 -30
- data/lib/rexml/encodings/UTF-16.rb +0 -35
- data/lib/rexml/encodings/UTF-8.rb +0 -18
data/lib/rexml/streamlistener.rb
CHANGED
@@ -1,92 +1,92 @@
|
|
1
1
|
module REXML
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
2
|
+
# A template for stream parser listeners.
|
3
|
+
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
|
4
|
+
# processed; REXML doesn't yet handle doctype entity declarations, so you
|
5
|
+
# have to parse them out yourself.
|
6
|
+
module StreamListener
|
7
|
+
# Called when a tag is encountered.
|
8
|
+
# @p name the tag name
|
9
|
+
# @p attrs an array of arrays of attribute/value pairs, suitable for
|
10
|
+
# use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
|
11
|
+
# will result in
|
12
|
+
# tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
|
13
|
+
def tag_start name, attrs
|
14
|
+
end
|
15
|
+
# Called when the end tag is reached. In the case of <tag/>, tag_end
|
16
|
+
# will be called immidiately after tag_start
|
17
|
+
# @p the name of the tag
|
18
|
+
def tag_end name
|
19
|
+
end
|
20
|
+
# Called when text is encountered in the document
|
21
|
+
# @p text the text content.
|
22
|
+
def text text
|
23
|
+
end
|
24
|
+
# Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
|
25
|
+
# @p name the instruction name; in the example, "xsl"
|
26
|
+
# @p instruction the rest of the instruction. In the example,
|
27
|
+
# "sheet='foo'"
|
28
|
+
def instruction name, instruction
|
29
|
+
end
|
30
|
+
# Called when a comment is encountered.
|
31
|
+
# @p comment The content of the comment
|
32
|
+
def comment comment
|
33
|
+
end
|
34
|
+
# Handles a doctype declaration. Any attributes of the doctype which are
|
35
|
+
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
|
36
|
+
# @p name the name of the doctype; EG, "me"
|
37
|
+
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
|
38
|
+
# @p long_name the supplied long name, or nil. EG, "foo"
|
39
|
+
# @p uri the uri of the doctype, or nil. EG, "bar"
|
40
|
+
def doctype name, pub_sys, long_name, uri
|
41
|
+
end
|
42
|
+
# Called when the doctype is done
|
43
|
+
def doctype_end
|
44
|
+
end
|
45
|
+
# If a doctype includes an ATTLIST declaration, it will cause this
|
46
|
+
# method to be called. The content is the declaration itself, unparsed.
|
47
|
+
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
48
|
+
# attr CDATA #REQUIRED". This is the same for all of the .*decl
|
49
|
+
# methods.
|
50
|
+
def attlistdecl element_name, attributes, raw_content
|
51
|
+
end
|
52
|
+
# <!ELEMENT ...>
|
53
|
+
def elementdecl content
|
54
|
+
end
|
55
|
+
# <!ENTITY ...>
|
56
|
+
# The argument passed to this method is an array of the entity
|
57
|
+
# declaration. It can be in a number of formats, but in general it
|
58
|
+
# returns (example, result):
|
59
|
+
# <!ENTITY % YN '"Yes"'>
|
60
|
+
# ["YN", "\"Yes\"", "%"]
|
61
|
+
# <!ENTITY % YN 'Yes'>
|
62
|
+
# ["YN", "Yes", "%"]
|
63
|
+
# <!ENTITY WhatHeSaid "He said %YN;">
|
64
|
+
# ["WhatHeSaid", "He said %YN;"]
|
65
|
+
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
66
|
+
# ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
|
67
|
+
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
68
|
+
# ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
|
69
|
+
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
|
70
|
+
# ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "gif"]
|
71
|
+
def entitydecl content
|
72
|
+
end
|
73
|
+
# <!NOTATION ...>
|
74
|
+
def notationdecl content
|
75
|
+
end
|
76
|
+
# Called when %foo; is encountered in a doctype declaration.
|
77
|
+
# @p content "foo"
|
78
|
+
def entity content
|
79
|
+
end
|
80
|
+
# Called when <![CDATA[ ... ]]> is encountered in a document.
|
81
|
+
# @p content "..."
|
82
|
+
def cdata content
|
83
|
+
end
|
84
|
+
# Called when an XML PI is encountered in the document.
|
85
|
+
# EG: <?xml version="1.0" encoding="utf"?>
|
86
|
+
# @p version the version attribute value. EG, "1.0"
|
87
|
+
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
88
|
+
# @p standalone the standalone attribute value, or nil. EG, nil
|
89
|
+
def xmldecl version, encoding, standalone
|
90
|
+
end
|
91
|
+
end
|
92
92
|
end
|
data/lib/rexml/syncenumerator.rb
CHANGED
@@ -6,8 +6,7 @@ module REXML
|
|
6
6
|
# Enumerable objects.
|
7
7
|
def initialize(*enums)
|
8
8
|
@gens = enums
|
9
|
-
@
|
10
|
-
@gens.each {|x| @biggest = x if x.size > @biggest.size }
|
9
|
+
@length = @gens.collect {|x| x.size }.max
|
11
10
|
end
|
12
11
|
|
13
12
|
# Returns the number of enumerated Enumerable objects, i.e. the size
|
@@ -24,8 +23,8 @@ module REXML
|
|
24
23
|
|
25
24
|
# Enumerates rows of the Enumerable objects.
|
26
25
|
def each
|
27
|
-
@
|
28
|
-
yield
|
26
|
+
@length.times {|i|
|
27
|
+
yield @gens.collect {|x| x[i]}
|
29
28
|
}
|
30
29
|
self
|
31
30
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'rexml/security'
|
1
2
|
require 'rexml/entity'
|
2
3
|
require 'rexml/doctype'
|
3
4
|
require 'rexml/child'
|
@@ -18,25 +19,57 @@ module REXML
|
|
18
19
|
# If +raw+ is true, then REXML leaves the value alone
|
19
20
|
attr_accessor :raw
|
20
21
|
|
21
|
-
|
22
|
-
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
22
|
+
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
|
23
|
+
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
24
|
+
VALID_CHAR = [
|
25
|
+
0x9, 0xA, 0xD,
|
26
|
+
(0x20..0xD7FF),
|
27
|
+
(0xE000..0xFFFD),
|
28
|
+
(0x10000..0x10FFFF)
|
29
|
+
]
|
30
|
+
|
31
|
+
if String.method_defined? :encode
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Fixnum
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
42
|
+
else
|
43
|
+
VALID_XML_CHARS = /^(
|
44
|
+
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
45
|
+
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
46
|
+
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
47
|
+
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
48
|
+
| \xEF[\x80-\xBE]{2} #
|
49
|
+
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
50
|
+
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
51
|
+
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
52
|
+
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
53
|
+
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
54
|
+
)*$/nx;
|
55
|
+
end
|
23
56
|
|
24
57
|
# Constructor
|
25
58
|
# +arg+ if a String, the content is set to the String. If a Text,
|
26
|
-
# the object is shallowly cloned.
|
59
|
+
# the object is shallowly cloned.
|
27
60
|
#
|
28
61
|
# +respect_whitespace+ (boolean, false) if true, whitespace is
|
29
62
|
# respected
|
30
63
|
#
|
31
64
|
# +parent+ (nil) if this is a Parent object, the parent
|
32
|
-
# will be set to this.
|
65
|
+
# will be set to this.
|
33
66
|
#
|
34
67
|
# +raw+ (nil) This argument can be given three values.
|
35
|
-
# If true, then the value of used to construct this object is expected to
|
36
|
-
# contain no unescaped XML markup, and REXML will not change the text. If
|
68
|
+
# If true, then the value of used to construct this object is expected to
|
69
|
+
# contain no unescaped XML markup, and REXML will not change the text. If
|
37
70
|
# this value is false, the string may contain any characters, and REXML will
|
38
71
|
# escape any and all defined entities whose values are contained in the
|
39
|
-
# text. If this value is nil (the default), then the raw value of the
|
72
|
+
# text. If this value is nil (the default), then the raw value of the
|
40
73
|
# parent will be used as the raw value for this node. If there is no raw
|
41
74
|
# value for the parent, and no value is supplied, the default is false.
|
42
75
|
# Use this field if you have entities defined for some text, and you don't
|
@@ -56,25 +89,24 @@ module REXML
|
|
56
89
|
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
|
57
90
|
# In the last example, the +entity_filter+ argument is ignored.
|
58
91
|
#
|
59
|
-
# +
|
60
|
-
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
61
|
-
entity_filter=nil, illegal=
|
92
|
+
# +illegal+ INTERNAL USE ONLY
|
93
|
+
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
94
|
+
entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
|
62
95
|
|
63
96
|
@raw = false
|
97
|
+
@parent = nil
|
64
98
|
|
65
99
|
if parent
|
66
100
|
super( parent )
|
67
|
-
@raw = parent.raw
|
68
|
-
else
|
69
|
-
@parent = nil
|
101
|
+
@raw = parent.raw
|
70
102
|
end
|
71
103
|
|
72
104
|
@raw = raw unless raw.nil?
|
73
105
|
@entity_filter = entity_filter
|
74
|
-
|
106
|
+
clear_cache
|
75
107
|
|
76
108
|
if arg.kind_of? String
|
77
|
-
@string = arg.
|
109
|
+
@string = arg.dup
|
78
110
|
@string.squeeze!(" \n\t") unless respect_whitespace
|
79
111
|
elsif arg.kind_of? Text
|
80
112
|
@string = arg.to_s
|
@@ -85,10 +117,55 @@ module REXML
|
|
85
117
|
|
86
118
|
@string.gsub!( /\r\n?/, "\n" )
|
87
119
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
120
|
+
Text.check(@string, illegal, doctype) if @raw
|
121
|
+
end
|
122
|
+
|
123
|
+
def parent= parent
|
124
|
+
super(parent)
|
125
|
+
Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
|
126
|
+
end
|
127
|
+
|
128
|
+
# check for illegal characters
|
129
|
+
def Text.check string, pattern, doctype
|
130
|
+
|
131
|
+
# illegal anywhere
|
132
|
+
if string !~ VALID_XML_CHARS
|
133
|
+
if String.method_defined? :encode
|
134
|
+
string.chars.each do |c|
|
135
|
+
case c.ord
|
136
|
+
when *VALID_CHAR
|
137
|
+
else
|
138
|
+
raise "Illegal character #{c.inspect} in raw string \"#{string}\""
|
139
|
+
end
|
140
|
+
end
|
141
|
+
else
|
142
|
+
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
143
|
+
case c.unpack('U')
|
144
|
+
when *VALID_CHAR
|
145
|
+
else
|
146
|
+
raise "Illegal character #{c.inspect} in raw string \"#{string}\""
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# context sensitive
|
153
|
+
string.scan(pattern) do
|
154
|
+
if $1[-1] != ?;
|
155
|
+
raise "Illegal character '#{$1}' in raw string \"#{string}\""
|
156
|
+
elsif $1[0] == ?&
|
157
|
+
if $5 and $5[0] == ?#
|
158
|
+
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
|
159
|
+
when *VALID_CHAR
|
160
|
+
else
|
161
|
+
raise "Illegal character '#{$1}' in raw string \"#{string}\""
|
162
|
+
end
|
163
|
+
# FIXME: below can't work but this needs API change.
|
164
|
+
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
165
|
+
# if !doctype or !doctype.entities.has_key?($3)
|
166
|
+
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
167
|
+
# end
|
168
|
+
end
|
92
169
|
end
|
93
170
|
end
|
94
171
|
end
|
@@ -109,8 +186,13 @@ module REXML
|
|
109
186
|
|
110
187
|
# Appends text to this text node. The text is appended in the +raw+ mode
|
111
188
|
# of this text node.
|
189
|
+
#
|
190
|
+
# +returns+ the text itself to enable method chain like
|
191
|
+
# 'text << "XXX" << "YYY"'.
|
112
192
|
def <<( to_append )
|
113
193
|
@string << to_append.gsub( /\r\n?/, "\n" )
|
194
|
+
clear_cache
|
195
|
+
self
|
114
196
|
end
|
115
197
|
|
116
198
|
|
@@ -120,17 +202,24 @@ module REXML
|
|
120
202
|
to_s() <=> other.to_s
|
121
203
|
end
|
122
204
|
|
205
|
+
def doctype
|
206
|
+
if @parent
|
207
|
+
doc = @parent.document
|
208
|
+
doc.doctype if doc
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
123
212
|
REFERENCE = /#{Entity::REFERENCE}/
|
124
213
|
# Returns the string value of this text node. This string is always
|
125
214
|
# escaped, meaning that it is a valid XML text node string, and all
|
126
215
|
# entities that can be escaped, have been inserted. This method respects
|
127
216
|
# the entity filter set in the constructor.
|
128
|
-
#
|
129
|
-
# # Assume that the entity "s" is defined to be "sean", and that the
|
217
|
+
#
|
218
|
+
# # Assume that the entity "s" is defined to be "sean", and that the
|
130
219
|
# # entity "r" is defined to be "russell"
|
131
|
-
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
220
|
+
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
132
221
|
# t.to_s #-> "< & &s; russell"
|
133
|
-
# t = Text.new( "< & &s; russell", false, nil, false )
|
222
|
+
# t = Text.new( "< & &s; russell", false, nil, false )
|
134
223
|
# t.to_s #-> "< & &s; russell"
|
135
224
|
# u = Text.new( "sean russell", false, nil, true )
|
136
225
|
# u.to_s #-> "sean russell"
|
@@ -138,12 +227,6 @@ module REXML
|
|
138
227
|
return @string if @raw
|
139
228
|
return @normalized if @normalized
|
140
229
|
|
141
|
-
doctype = nil
|
142
|
-
if @parent
|
143
|
-
doc = @parent.document
|
144
|
-
doctype = doc.doctype if doc
|
145
|
-
end
|
146
|
-
|
147
230
|
@normalized = Text::normalize( @string, doctype, @entity_filter )
|
148
231
|
end
|
149
232
|
|
@@ -156,25 +239,20 @@ module REXML
|
|
156
239
|
# console. This ignores the 'raw' attribute setting, and any
|
157
240
|
# entity_filter.
|
158
241
|
#
|
159
|
-
# # Assume that the entity "s" is defined to be "sean", and that the
|
242
|
+
# # Assume that the entity "s" is defined to be "sean", and that the
|
160
243
|
# # entity "r" is defined to be "russell"
|
161
|
-
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
244
|
+
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
162
245
|
# t.value #-> "< & sean russell"
|
163
246
|
# t = Text.new( "< & &s; russell", false, nil, false )
|
164
247
|
# t.value #-> "< & sean russell"
|
165
248
|
# u = Text.new( "sean russell", false, nil, true )
|
166
249
|
# u.value #-> "sean russell"
|
167
250
|
def value
|
168
|
-
@unnormalized if @unnormalized
|
169
|
-
doctype = nil
|
170
|
-
if @parent
|
171
|
-
doc = @parent.document
|
172
|
-
doctype = doc.doctype if doc
|
173
|
-
end
|
251
|
+
return @unnormalized if @unnormalized
|
174
252
|
@unnormalized = Text::unnormalize( @string, doctype )
|
175
253
|
end
|
176
254
|
|
177
|
-
# Sets the contents of this text node. This expects the text to be
|
255
|
+
# Sets the contents of this text node. This expects the text to be
|
178
256
|
# unnormalized. It returns self.
|
179
257
|
#
|
180
258
|
# e = Element.new( "a" )
|
@@ -183,11 +261,10 @@ module REXML
|
|
183
261
|
# e[0].value = "<a>" # <a><a></a>
|
184
262
|
def value=( val )
|
185
263
|
@string = val.gsub( /\r\n?/, "\n" )
|
186
|
-
|
187
|
-
@normalized = nil
|
264
|
+
clear_cache
|
188
265
|
@raw = false
|
189
266
|
end
|
190
|
-
|
267
|
+
|
191
268
|
def wrap(string, width, addnewline=false)
|
192
269
|
# Recursively wrap string at width.
|
193
270
|
return string if string.length <= width
|
@@ -202,7 +279,7 @@ module REXML
|
|
202
279
|
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
203
280
|
return string if level < 0
|
204
281
|
new_string = ''
|
205
|
-
string.
|
282
|
+
string.each_line { |line|
|
206
283
|
indent_string = style * level
|
207
284
|
new_line = (indent_string + line).sub(/[\s]+$/,'')
|
208
285
|
new_string << new_line
|
@@ -210,11 +287,11 @@ module REXML
|
|
210
287
|
new_string.strip! unless indentfirstline
|
211
288
|
return new_string
|
212
289
|
end
|
213
|
-
|
290
|
+
|
214
291
|
# == DEPRECATED
|
215
292
|
# See REXML::Formatters
|
216
293
|
#
|
217
|
-
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
294
|
+
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
218
295
|
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
|
219
296
|
formatter = if indent > -1
|
220
297
|
REXML::Formatters::Pretty.new( indent )
|
@@ -258,6 +335,12 @@ module REXML
|
|
258
335
|
out << copy
|
259
336
|
end
|
260
337
|
|
338
|
+
private
|
339
|
+
def clear_cache
|
340
|
+
@normalized = nil
|
341
|
+
@unnormalized = nil
|
342
|
+
end
|
343
|
+
|
261
344
|
# Reads text, substituting entities
|
262
345
|
def Text::read_with_substitution( input, illegal=nil )
|
263
346
|
copy = input.clone
|
@@ -265,7 +348,7 @@ module REXML
|
|
265
348
|
if copy =~ illegal
|
266
349
|
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
|
267
350
|
end if illegal
|
268
|
-
|
351
|
+
|
269
352
|
copy.gsub!( /\r\n?/, "\n" )
|
270
353
|
if copy.include? ?&
|
271
354
|
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
|
@@ -273,7 +356,7 @@ module REXML
|
|
273
356
|
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
|
274
357
|
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
|
275
358
|
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
|
276
|
-
copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {
|
359
|
+
copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {
|
277
360
|
m=$1
|
278
361
|
#m='0' if m==''
|
279
362
|
m = "0#{m}" if m[0] == ?x
|
@@ -293,9 +376,9 @@ module REXML
|
|
293
376
|
if doctype
|
294
377
|
# Replace all ampersands that aren't part of an entity
|
295
378
|
doctype.entities.each_value do |entity|
|
296
|
-
copy = copy.gsub( entity.value,
|
297
|
-
"&#{entity.name};" ) if entity.value and
|
298
|
-
not( entity_filter and entity_filter.include?(entity) )
|
379
|
+
copy = copy.gsub( entity.value,
|
380
|
+
"&#{entity.name};" ) if entity.value and
|
381
|
+
not( entity_filter and entity_filter.include?(entity.name) )
|
299
382
|
end
|
300
383
|
else
|
301
384
|
# Replace all ampersands that aren't part of an entity
|
@@ -308,37 +391,35 @@ module REXML
|
|
308
391
|
|
309
392
|
# Unescapes all possible entities
|
310
393
|
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
394
|
+
sum = 0
|
395
|
+
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
396
|
+
s = Text.expand($&, doctype, filter)
|
397
|
+
if sum + s.bytesize > Security.entity_expansion_text_limit
|
398
|
+
raise "entity expansion has grown too large"
|
399
|
+
else
|
400
|
+
sum += s.bytesize
|
401
|
+
end
|
402
|
+
s
|
319
403
|
}
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
re = /&#{entity_reference};/
|
327
|
-
rv.gsub!( re, entity_value ) if entity_value
|
328
|
-
end
|
329
|
-
end
|
404
|
+
end
|
405
|
+
|
406
|
+
def Text.expand(ref, doctype, filter)
|
407
|
+
if ref[1] == ?#
|
408
|
+
if ref[2] == ?x
|
409
|
+
[ref[3...-1].to_i(16)].pack('U*')
|
330
410
|
else
|
331
|
-
|
332
|
-
unless filter and filter.include?(entity_reference)
|
333
|
-
entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
|
334
|
-
re = /&#{entity_reference};/
|
335
|
-
rv.gsub!( re, entity_value.value ) if entity_value
|
336
|
-
end
|
337
|
-
end
|
411
|
+
[ref[2...-1].to_i].pack('U*')
|
338
412
|
end
|
339
|
-
|
413
|
+
elsif ref == '&'
|
414
|
+
'&'
|
415
|
+
elsif filter and filter.include?( ref[1...-1] )
|
416
|
+
ref
|
417
|
+
elsif doctype
|
418
|
+
doctype.entity( ref[1...-1] ) or ref
|
419
|
+
else
|
420
|
+
entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
|
421
|
+
entity_value ? entity_value.value : ref
|
340
422
|
end
|
341
|
-
rv
|
342
423
|
end
|
343
424
|
end
|
344
425
|
end
|