rubysl-rexml 1.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +3 -2
  3. data/lib/rexml/attlistdecl.rb +56 -56
  4. data/lib/rexml/attribute.rb +155 -149
  5. data/lib/rexml/cdata.rb +48 -48
  6. data/lib/rexml/child.rb +82 -82
  7. data/lib/rexml/comment.rb +59 -59
  8. data/lib/rexml/doctype.rb +22 -24
  9. data/lib/rexml/document.rb +185 -129
  10. data/lib/rexml/dtd/attlistdecl.rb +7 -7
  11. data/lib/rexml/dtd/dtd.rb +41 -41
  12. data/lib/rexml/dtd/elementdecl.rb +13 -13
  13. data/lib/rexml/dtd/entitydecl.rb +49 -49
  14. data/lib/rexml/dtd/notationdecl.rb +32 -32
  15. data/lib/rexml/element.rb +122 -107
  16. data/lib/rexml/encoding.rb +37 -58
  17. data/lib/rexml/entity.rb +144 -144
  18. data/lib/rexml/formatters/default.rb +6 -4
  19. data/lib/rexml/formatters/pretty.rb +11 -8
  20. data/lib/rexml/formatters/transitive.rb +4 -3
  21. data/lib/rexml/functions.rb +33 -21
  22. data/lib/rexml/instruction.rb +49 -49
  23. data/lib/rexml/light/node.rb +190 -191
  24. data/lib/rexml/namespace.rb +39 -39
  25. data/lib/rexml/node.rb +38 -38
  26. data/lib/rexml/output.rb +17 -12
  27. data/lib/rexml/parent.rb +26 -25
  28. data/lib/rexml/parseexception.rb +4 -4
  29. data/lib/rexml/parsers/baseparser.rb +90 -61
  30. data/lib/rexml/parsers/lightparser.rb +41 -43
  31. data/lib/rexml/parsers/pullparser.rb +1 -1
  32. data/lib/rexml/parsers/sax2parser.rb +233 -198
  33. data/lib/rexml/parsers/streamparser.rb +6 -2
  34. data/lib/rexml/parsers/treeparser.rb +9 -6
  35. data/lib/rexml/parsers/ultralightparser.rb +40 -40
  36. data/lib/rexml/parsers/xpathparser.rb +51 -52
  37. data/lib/rexml/quickpath.rb +247 -248
  38. data/lib/rexml/rexml.rb +9 -10
  39. data/lib/rexml/sax2listener.rb +92 -92
  40. data/lib/rexml/security.rb +27 -0
  41. data/lib/rexml/source.rb +95 -50
  42. data/lib/rexml/streamlistener.rb +90 -90
  43. data/lib/rexml/syncenumerator.rb +3 -4
  44. data/lib/rexml/text.rb +157 -76
  45. data/lib/rexml/validation/relaxng.rb +18 -18
  46. data/lib/rexml/validation/validation.rb +5 -5
  47. data/lib/rexml/xmldecl.rb +59 -63
  48. data/lib/rexml/xmltokens.rb +14 -14
  49. data/lib/rexml/xpath.rb +67 -53
  50. data/lib/rexml/xpath_parser.rb +49 -38
  51. data/lib/rubysl/rexml.rb +1 -0
  52. data/lib/rubysl/rexml/version.rb +1 -1
  53. data/rubysl-rexml.gemspec +3 -1
  54. metadata +19 -28
  55. data/lib/rexml/encodings/CP-1252.rb +0 -103
  56. data/lib/rexml/encodings/EUC-JP.rb +0 -35
  57. data/lib/rexml/encodings/ICONV.rb +0 -22
  58. data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
  59. data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
  60. data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
  61. data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
  62. data/lib/rexml/encodings/UNILE.rb +0 -34
  63. data/lib/rexml/encodings/US-ASCII.rb +0 -30
  64. data/lib/rexml/encodings/UTF-16.rb +0 -35
  65. data/lib/rexml/encodings/UTF-8.rb +0 -18
@@ -1,92 +1,92 @@
1
1
  module REXML
2
- # A template for stream parser listeners.
3
- # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
- # processed; REXML doesn't yet handle doctype entity declarations, so you
5
- # have to parse them out yourself.
6
- module StreamListener
7
- # Called when a tag is encountered.
8
- # @p name the tag name
9
- # @p attrs an array of arrays of attribute/value pairs, suitable for
10
- # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
11
- # will result in
12
- # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
13
- def tag_start name, attrs
14
- end
15
- # Called when the end tag is reached. In the case of <tag/>, tag_end
16
- # will be called immidiately after tag_start
17
- # @p the name of the tag
18
- def tag_end name
19
- end
20
- # Called when text is encountered in the document
21
- # @p text the text content.
22
- def text text
23
- end
24
- # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
25
- # @p name the instruction name; in the example, "xsl"
26
- # @p instruction the rest of the instruction. In the example,
27
- # "sheet='foo'"
28
- def instruction name, instruction
29
- end
30
- # Called when a comment is encountered.
31
- # @p comment The content of the comment
32
- def comment comment
33
- end
34
- # Handles a doctype declaration. Any attributes of the doctype which are
35
- # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
36
- # @p name the name of the doctype; EG, "me"
37
- # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
38
- # @p long_name the supplied long name, or nil. EG, "foo"
39
- # @p uri the uri of the doctype, or nil. EG, "bar"
40
- def doctype name, pub_sys, long_name, uri
41
- end
42
- # Called when the doctype is done
43
- def doctype_end
44
- end
45
- # If a doctype includes an ATTLIST declaration, it will cause this
46
- # method to be called. The content is the declaration itself, unparsed.
47
- # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
48
- # attr CDATA #REQUIRED". This is the same for all of the .*decl
49
- # methods.
50
- def attlistdecl element_name, attributes, raw_content
51
- end
52
- # <!ELEMENT ...>
53
- def elementdecl content
54
- end
55
- # <!ENTITY ...>
56
- # The argument passed to this method is an array of the entity
57
- # declaration. It can be in a number of formats, but in general it
58
- # returns (example, result):
59
- # <!ENTITY % YN '"Yes"'>
60
- # ["%", "YN", "'\"Yes\"'", "\""]
61
- # <!ENTITY % YN 'Yes'>
62
- # ["%", "YN", "'Yes'", "s"]
63
- # <!ENTITY WhatHeSaid "He said %YN;">
64
- # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
65
- # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
66
- # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
67
- # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
- # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
69
- # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
70
- # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
71
- def entitydecl content
72
- end
73
- # <!NOTATION ...>
74
- def notationdecl content
75
- end
76
- # Called when %foo; is encountered in a doctype declaration.
77
- # @p content "foo"
78
- def entity content
79
- end
80
- # Called when <![CDATA[ ... ]]> is encountered in a document.
81
- # @p content "..."
82
- def cdata content
83
- end
84
- # Called when an XML PI is encountered in the document.
85
- # EG: <?xml version="1.0" encoding="utf"?>
86
- # @p version the version attribute value. EG, "1.0"
87
- # @p encoding the encoding attribute value, or nil. EG, "utf"
88
- # @p standalone the standalone attribute value, or nil. EG, nil
89
- def xmldecl version, encoding, standalone
90
- end
91
- end
2
+ # A template for stream parser listeners.
3
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
5
+ # have to parse them out yourself.
6
+ module StreamListener
7
+ # Called when a tag is encountered.
8
+ # @p name the tag name
9
+ # @p attrs an array of arrays of attribute/value pairs, suitable for
10
+ # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
11
+ # will result in
12
+ # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
13
+ def tag_start name, attrs
14
+ end
15
+ # Called when the end tag is reached. In the case of <tag/>, tag_end
16
+ # will be called immidiately after tag_start
17
+ # @p the name of the tag
18
+ def tag_end name
19
+ end
20
+ # Called when text is encountered in the document
21
+ # @p text the text content.
22
+ def text text
23
+ end
24
+ # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
25
+ # @p name the instruction name; in the example, "xsl"
26
+ # @p instruction the rest of the instruction. In the example,
27
+ # "sheet='foo'"
28
+ def instruction name, instruction
29
+ end
30
+ # Called when a comment is encountered.
31
+ # @p comment The content of the comment
32
+ def comment comment
33
+ end
34
+ # Handles a doctype declaration. Any attributes of the doctype which are
35
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
36
+ # @p name the name of the doctype; EG, "me"
37
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
38
+ # @p long_name the supplied long name, or nil. EG, "foo"
39
+ # @p uri the uri of the doctype, or nil. EG, "bar"
40
+ def doctype name, pub_sys, long_name, uri
41
+ end
42
+ # Called when the doctype is done
43
+ def doctype_end
44
+ end
45
+ # If a doctype includes an ATTLIST declaration, it will cause this
46
+ # method to be called. The content is the declaration itself, unparsed.
47
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
48
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
49
+ # methods.
50
+ def attlistdecl element_name, attributes, raw_content
51
+ end
52
+ # <!ELEMENT ...>
53
+ def elementdecl content
54
+ end
55
+ # <!ENTITY ...>
56
+ # The argument passed to this method is an array of the entity
57
+ # declaration. It can be in a number of formats, but in general it
58
+ # returns (example, result):
59
+ # <!ENTITY % YN '"Yes"'>
60
+ # ["YN", "\"Yes\"", "%"]
61
+ # <!ENTITY % YN 'Yes'>
62
+ # ["YN", "Yes", "%"]
63
+ # <!ENTITY WhatHeSaid "He said %YN;">
64
+ # ["WhatHeSaid", "He said %YN;"]
65
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
66
+ # ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
67
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
+ # ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
69
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
70
+ # ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "gif"]
71
+ def entitydecl content
72
+ end
73
+ # <!NOTATION ...>
74
+ def notationdecl content
75
+ end
76
+ # Called when %foo; is encountered in a doctype declaration.
77
+ # @p content "foo"
78
+ def entity content
79
+ end
80
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
81
+ # @p content "..."
82
+ def cdata content
83
+ end
84
+ # Called when an XML PI is encountered in the document.
85
+ # EG: <?xml version="1.0" encoding="utf"?>
86
+ # @p version the version attribute value. EG, "1.0"
87
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
88
+ # @p standalone the standalone attribute value, or nil. EG, nil
89
+ def xmldecl version, encoding, standalone
90
+ end
91
+ end
92
92
  end
@@ -6,8 +6,7 @@ module REXML
6
6
  # Enumerable objects.
7
7
  def initialize(*enums)
8
8
  @gens = enums
9
- @biggest = @gens[0]
10
- @gens.each {|x| @biggest = x if x.size > @biggest.size }
9
+ @length = @gens.collect {|x| x.size }.max
11
10
  end
12
11
 
13
12
  # Returns the number of enumerated Enumerable objects, i.e. the size
@@ -24,8 +23,8 @@ module REXML
24
23
 
25
24
  # Enumerates rows of the Enumerable objects.
26
25
  def each
27
- @biggest.zip( *@gens ) {|a|
28
- yield(*a[1..-1])
26
+ @length.times {|i|
27
+ yield @gens.collect {|x| x[i]}
29
28
  }
30
29
  self
31
30
  end
data/lib/rexml/text.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'rexml/security'
1
2
  require 'rexml/entity'
2
3
  require 'rexml/doctype'
3
4
  require 'rexml/child'
@@ -18,25 +19,57 @@ module REXML
18
19
  # If +raw+ is true, then REXML leaves the value alone
19
20
  attr_accessor :raw
20
21
 
21
- ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
22
- NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
22
+ NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
23
+ NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
24
+ VALID_CHAR = [
25
+ 0x9, 0xA, 0xD,
26
+ (0x20..0xD7FF),
27
+ (0xE000..0xFFFD),
28
+ (0x10000..0x10FFFF)
29
+ ]
30
+
31
+ if String.method_defined? :encode
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Fixnum
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
42
+ else
43
+ VALID_XML_CHARS = /^(
44
+ [\x09\x0A\x0D\x20-\x7E] # ASCII
45
+ | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
46
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
47
+ | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
48
+ | \xEF[\x80-\xBE]{2} #
49
+ | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
50
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
51
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
52
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
53
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
54
+ )*$/nx;
55
+ end
23
56
 
24
57
  # Constructor
25
58
  # +arg+ if a String, the content is set to the String. If a Text,
26
- # the object is shallowly cloned.
59
+ # the object is shallowly cloned.
27
60
  #
28
61
  # +respect_whitespace+ (boolean, false) if true, whitespace is
29
62
  # respected
30
63
  #
31
64
  # +parent+ (nil) if this is a Parent object, the parent
32
- # will be set to this.
65
+ # will be set to this.
33
66
  #
34
67
  # +raw+ (nil) This argument can be given three values.
35
- # If true, then the value of used to construct this object is expected to
36
- # contain no unescaped XML markup, and REXML will not change the text. If
68
+ # If true, then the value of used to construct this object is expected to
69
+ # contain no unescaped XML markup, and REXML will not change the text. If
37
70
  # this value is false, the string may contain any characters, and REXML will
38
71
  # escape any and all defined entities whose values are contained in the
39
- # text. If this value is nil (the default), then the raw value of the
72
+ # text. If this value is nil (the default), then the raw value of the
40
73
  # parent will be used as the raw value for this node. If there is no raw
41
74
  # value for the parent, and no value is supplied, the default is false.
42
75
  # Use this field if you have entities defined for some text, and you don't
@@ -56,25 +89,24 @@ module REXML
56
89
  # Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
57
90
  # In the last example, the +entity_filter+ argument is ignored.
58
91
  #
59
- # +pattern+ INTERNAL USE ONLY
60
- def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
61
- entity_filter=nil, illegal=ILLEGAL )
92
+ # +illegal+ INTERNAL USE ONLY
93
+ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
94
+ entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
62
95
 
63
96
  @raw = false
97
+ @parent = nil
64
98
 
65
99
  if parent
66
100
  super( parent )
67
- @raw = parent.raw
68
- else
69
- @parent = nil
101
+ @raw = parent.raw
70
102
  end
71
103
 
72
104
  @raw = raw unless raw.nil?
73
105
  @entity_filter = entity_filter
74
- @normalized = @unnormalized = nil
106
+ clear_cache
75
107
 
76
108
  if arg.kind_of? String
77
- @string = arg.clone
109
+ @string = arg.dup
78
110
  @string.squeeze!(" \n\t") unless respect_whitespace
79
111
  elsif arg.kind_of? Text
80
112
  @string = arg.to_s
@@ -85,10 +117,55 @@ module REXML
85
117
 
86
118
  @string.gsub!( /\r\n?/, "\n" )
87
119
 
88
- # check for illegal characters
89
- if @raw
90
- if @string =~ illegal
91
- raise "Illegal character '#{$1}' in raw string \"#{@string}\""
120
+ Text.check(@string, illegal, doctype) if @raw
121
+ end
122
+
123
+ def parent= parent
124
+ super(parent)
125
+ Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
126
+ end
127
+
128
+ # check for illegal characters
129
+ def Text.check string, pattern, doctype
130
+
131
+ # illegal anywhere
132
+ if string !~ VALID_XML_CHARS
133
+ if String.method_defined? :encode
134
+ string.chars.each do |c|
135
+ case c.ord
136
+ when *VALID_CHAR
137
+ else
138
+ raise "Illegal character #{c.inspect} in raw string \"#{string}\""
139
+ end
140
+ end
141
+ else
142
+ string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
143
+ case c.unpack('U')
144
+ when *VALID_CHAR
145
+ else
146
+ raise "Illegal character #{c.inspect} in raw string \"#{string}\""
147
+ end
148
+ end
149
+ end
150
+ end
151
+
152
+ # context sensitive
153
+ string.scan(pattern) do
154
+ if $1[-1] != ?;
155
+ raise "Illegal character '#{$1}' in raw string \"#{string}\""
156
+ elsif $1[0] == ?&
157
+ if $5 and $5[0] == ?#
158
+ case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
159
+ when *VALID_CHAR
160
+ else
161
+ raise "Illegal character '#{$1}' in raw string \"#{string}\""
162
+ end
163
+ # FIXME: below can't work but this needs API change.
164
+ # elsif @parent and $3 and !SUBSTITUTES.include?($1)
165
+ # if !doctype or !doctype.entities.has_key?($3)
166
+ # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
167
+ # end
168
+ end
92
169
  end
93
170
  end
94
171
  end
@@ -109,8 +186,13 @@ module REXML
109
186
 
110
187
  # Appends text to this text node. The text is appended in the +raw+ mode
111
188
  # of this text node.
189
+ #
190
+ # +returns+ the text itself to enable method chain like
191
+ # 'text << "XXX" << "YYY"'.
112
192
  def <<( to_append )
113
193
  @string << to_append.gsub( /\r\n?/, "\n" )
194
+ clear_cache
195
+ self
114
196
  end
115
197
 
116
198
 
@@ -120,17 +202,24 @@ module REXML
120
202
  to_s() <=> other.to_s
121
203
  end
122
204
 
205
+ def doctype
206
+ if @parent
207
+ doc = @parent.document
208
+ doc.doctype if doc
209
+ end
210
+ end
211
+
123
212
  REFERENCE = /#{Entity::REFERENCE}/
124
213
  # Returns the string value of this text node. This string is always
125
214
  # escaped, meaning that it is a valid XML text node string, and all
126
215
  # entities that can be escaped, have been inserted. This method respects
127
216
  # the entity filter set in the constructor.
128
- #
129
- # # Assume that the entity "s" is defined to be "sean", and that the
217
+ #
218
+ # # Assume that the entity "s" is defined to be "sean", and that the
130
219
  # # entity "r" is defined to be "russell"
131
- # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
220
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
132
221
  # t.to_s #-> "&lt; &amp; &s; russell"
133
- # t = Text.new( "< & &s; russell", false, nil, false )
222
+ # t = Text.new( "< & &s; russell", false, nil, false )
134
223
  # t.to_s #-> "&lt; &amp; &s; russell"
135
224
  # u = Text.new( "sean russell", false, nil, true )
136
225
  # u.to_s #-> "sean russell"
@@ -138,12 +227,6 @@ module REXML
138
227
  return @string if @raw
139
228
  return @normalized if @normalized
140
229
 
141
- doctype = nil
142
- if @parent
143
- doc = @parent.document
144
- doctype = doc.doctype if doc
145
- end
146
-
147
230
  @normalized = Text::normalize( @string, doctype, @entity_filter )
148
231
  end
149
232
 
@@ -156,25 +239,20 @@ module REXML
156
239
  # console. This ignores the 'raw' attribute setting, and any
157
240
  # entity_filter.
158
241
  #
159
- # # Assume that the entity "s" is defined to be "sean", and that the
242
+ # # Assume that the entity "s" is defined to be "sean", and that the
160
243
  # # entity "r" is defined to be "russell"
161
- # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
244
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
162
245
  # t.value #-> "< & sean russell"
163
246
  # t = Text.new( "< & &s; russell", false, nil, false )
164
247
  # t.value #-> "< & sean russell"
165
248
  # u = Text.new( "sean russell", false, nil, true )
166
249
  # u.value #-> "sean russell"
167
250
  def value
168
- @unnormalized if @unnormalized
169
- doctype = nil
170
- if @parent
171
- doc = @parent.document
172
- doctype = doc.doctype if doc
173
- end
251
+ return @unnormalized if @unnormalized
174
252
  @unnormalized = Text::unnormalize( @string, doctype )
175
253
  end
176
254
 
177
- # Sets the contents of this text node. This expects the text to be
255
+ # Sets the contents of this text node. This expects the text to be
178
256
  # unnormalized. It returns self.
179
257
  #
180
258
  # e = Element.new( "a" )
@@ -183,11 +261,10 @@ module REXML
183
261
  # e[0].value = "<a>" # <a>&lt;a&gt;</a>
184
262
  def value=( val )
185
263
  @string = val.gsub( /\r\n?/, "\n" )
186
- @unnormalized = nil
187
- @normalized = nil
264
+ clear_cache
188
265
  @raw = false
189
266
  end
190
-
267
+
191
268
  def wrap(string, width, addnewline=false)
192
269
  # Recursively wrap string at width.
193
270
  return string if string.length <= width
@@ -202,7 +279,7 @@ module REXML
202
279
  def indent_text(string, level=1, style="\t", indentfirstline=true)
203
280
  return string if level < 0
204
281
  new_string = ''
205
- string.each { |line|
282
+ string.each_line { |line|
206
283
  indent_string = style * level
207
284
  new_line = (indent_string + line).sub(/[\s]+$/,'')
208
285
  new_string << new_line
@@ -210,11 +287,11 @@ module REXML
210
287
  new_string.strip! unless indentfirstline
211
288
  return new_string
212
289
  end
213
-
290
+
214
291
  # == DEPRECATED
215
292
  # See REXML::Formatters
216
293
  #
217
- def write( writer, indent=-1, transitive=false, ie_hack=false )
294
+ def write( writer, indent=-1, transitive=false, ie_hack=false )
218
295
  Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
219
296
  formatter = if indent > -1
220
297
  REXML::Formatters::Pretty.new( indent )
@@ -258,6 +335,12 @@ module REXML
258
335
  out << copy
259
336
  end
260
337
 
338
+ private
339
+ def clear_cache
340
+ @normalized = nil
341
+ @unnormalized = nil
342
+ end
343
+
261
344
  # Reads text, substituting entities
262
345
  def Text::read_with_substitution( input, illegal=nil )
263
346
  copy = input.clone
@@ -265,7 +348,7 @@ module REXML
265
348
  if copy =~ illegal
266
349
  raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
267
350
  end if illegal
268
-
351
+
269
352
  copy.gsub!( /\r\n?/, "\n" )
270
353
  if copy.include? ?&
271
354
  copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
@@ -273,7 +356,7 @@ module REXML
273
356
  copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
274
357
  copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
275
358
  copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
276
- copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
359
+ copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {
277
360
  m=$1
278
361
  #m='0' if m==''
279
362
  m = "0#{m}" if m[0] == ?x
@@ -293,9 +376,9 @@ module REXML
293
376
  if doctype
294
377
  # Replace all ampersands that aren't part of an entity
295
378
  doctype.entities.each_value do |entity|
296
- copy = copy.gsub( entity.value,
297
- "&#{entity.name};" ) if entity.value and
298
- not( entity_filter and entity_filter.include?(entity) )
379
+ copy = copy.gsub( entity.value,
380
+ "&#{entity.name};" ) if entity.value and
381
+ not( entity_filter and entity_filter.include?(entity.name) )
299
382
  end
300
383
  else
301
384
  # Replace all ampersands that aren't part of an entity
@@ -308,37 +391,35 @@ module REXML
308
391
 
309
392
  # Unescapes all possible entities
310
393
  def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
311
- rv = string.clone
312
- rv.gsub!( /\r\n?/, "\n" )
313
- matches = rv.scan( REFERENCE )
314
- return rv if matches.size == 0
315
- rv.gsub!( NUMERICENTITY ) {|m|
316
- m=$1
317
- m = "0#{m}" if m[0] == ?x
318
- [Integer(m)].pack('U*')
394
+ sum = 0
395
+ string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
396
+ s = Text.expand($&, doctype, filter)
397
+ if sum + s.bytesize > Security.entity_expansion_text_limit
398
+ raise "entity expansion has grown too large"
399
+ else
400
+ sum += s.bytesize
401
+ end
402
+ s
319
403
  }
320
- matches.collect!{|x|x[0]}.compact!
321
- if matches.size > 0
322
- if doctype
323
- matches.each do |entity_reference|
324
- unless filter and filter.include?(entity_reference)
325
- entity_value = doctype.entity( entity_reference )
326
- re = /&#{entity_reference};/
327
- rv.gsub!( re, entity_value ) if entity_value
328
- end
329
- end
404
+ end
405
+
406
+ def Text.expand(ref, doctype, filter)
407
+ if ref[1] == ?#
408
+ if ref[2] == ?x
409
+ [ref[3...-1].to_i(16)].pack('U*')
330
410
  else
331
- matches.each do |entity_reference|
332
- unless filter and filter.include?(entity_reference)
333
- entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
334
- re = /&#{entity_reference};/
335
- rv.gsub!( re, entity_value.value ) if entity_value
336
- end
337
- end
411
+ [ref[2...-1].to_i].pack('U*')
338
412
  end
339
- rv.gsub!( /&amp;/, '&' )
413
+ elsif ref == '&amp;'
414
+ '&'
415
+ elsif filter and filter.include?( ref[1...-1] )
416
+ ref
417
+ elsif doctype
418
+ doctype.entity( ref[1...-1] ) or ref
419
+ else
420
+ entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
421
+ entity_value ? entity_value.value : ref
340
422
  end
341
- rv
342
423
  end
343
424
  end
344
425
  end