rubysl-rexml 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +3 -2
  3. data/lib/rexml/attlistdecl.rb +56 -56
  4. data/lib/rexml/attribute.rb +155 -149
  5. data/lib/rexml/cdata.rb +48 -48
  6. data/lib/rexml/child.rb +82 -82
  7. data/lib/rexml/comment.rb +59 -59
  8. data/lib/rexml/doctype.rb +22 -24
  9. data/lib/rexml/document.rb +185 -129
  10. data/lib/rexml/dtd/attlistdecl.rb +7 -7
  11. data/lib/rexml/dtd/dtd.rb +41 -41
  12. data/lib/rexml/dtd/elementdecl.rb +13 -13
  13. data/lib/rexml/dtd/entitydecl.rb +49 -49
  14. data/lib/rexml/dtd/notationdecl.rb +32 -32
  15. data/lib/rexml/element.rb +122 -107
  16. data/lib/rexml/encoding.rb +37 -58
  17. data/lib/rexml/entity.rb +144 -144
  18. data/lib/rexml/formatters/default.rb +6 -4
  19. data/lib/rexml/formatters/pretty.rb +11 -8
  20. data/lib/rexml/formatters/transitive.rb +4 -3
  21. data/lib/rexml/functions.rb +33 -21
  22. data/lib/rexml/instruction.rb +49 -49
  23. data/lib/rexml/light/node.rb +190 -191
  24. data/lib/rexml/namespace.rb +39 -39
  25. data/lib/rexml/node.rb +38 -38
  26. data/lib/rexml/output.rb +17 -12
  27. data/lib/rexml/parent.rb +26 -25
  28. data/lib/rexml/parseexception.rb +4 -4
  29. data/lib/rexml/parsers/baseparser.rb +90 -61
  30. data/lib/rexml/parsers/lightparser.rb +41 -43
  31. data/lib/rexml/parsers/pullparser.rb +1 -1
  32. data/lib/rexml/parsers/sax2parser.rb +233 -198
  33. data/lib/rexml/parsers/streamparser.rb +6 -2
  34. data/lib/rexml/parsers/treeparser.rb +9 -6
  35. data/lib/rexml/parsers/ultralightparser.rb +40 -40
  36. data/lib/rexml/parsers/xpathparser.rb +51 -52
  37. data/lib/rexml/quickpath.rb +247 -248
  38. data/lib/rexml/rexml.rb +9 -10
  39. data/lib/rexml/sax2listener.rb +92 -92
  40. data/lib/rexml/security.rb +27 -0
  41. data/lib/rexml/source.rb +95 -50
  42. data/lib/rexml/streamlistener.rb +90 -90
  43. data/lib/rexml/syncenumerator.rb +3 -4
  44. data/lib/rexml/text.rb +157 -76
  45. data/lib/rexml/validation/relaxng.rb +18 -18
  46. data/lib/rexml/validation/validation.rb +5 -5
  47. data/lib/rexml/xmldecl.rb +59 -63
  48. data/lib/rexml/xmltokens.rb +14 -14
  49. data/lib/rexml/xpath.rb +67 -53
  50. data/lib/rexml/xpath_parser.rb +49 -38
  51. data/lib/rubysl/rexml.rb +1 -0
  52. data/lib/rubysl/rexml/version.rb +1 -1
  53. data/rubysl-rexml.gemspec +3 -1
  54. metadata +19 -28
  55. data/lib/rexml/encodings/CP-1252.rb +0 -103
  56. data/lib/rexml/encodings/EUC-JP.rb +0 -35
  57. data/lib/rexml/encodings/ICONV.rb +0 -22
  58. data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
  59. data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
  60. data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
  61. data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
  62. data/lib/rexml/encodings/UNILE.rb +0 -34
  63. data/lib/rexml/encodings/US-ASCII.rb +0 -30
  64. data/lib/rexml/encodings/UTF-16.rb +0 -35
  65. data/lib/rexml/encodings/UTF-8.rb +0 -18
@@ -1,92 +1,92 @@
1
1
  module REXML
2
- # A template for stream parser listeners.
3
- # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
- # processed; REXML doesn't yet handle doctype entity declarations, so you
5
- # have to parse them out yourself.
6
- module StreamListener
7
- # Called when a tag is encountered.
8
- # @p name the tag name
9
- # @p attrs an array of arrays of attribute/value pairs, suitable for
10
- # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
11
- # will result in
12
- # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
13
- def tag_start name, attrs
14
- end
15
- # Called when the end tag is reached. In the case of <tag/>, tag_end
16
- # will be called immidiately after tag_start
17
- # @p the name of the tag
18
- def tag_end name
19
- end
20
- # Called when text is encountered in the document
21
- # @p text the text content.
22
- def text text
23
- end
24
- # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
25
- # @p name the instruction name; in the example, "xsl"
26
- # @p instruction the rest of the instruction. In the example,
27
- # "sheet='foo'"
28
- def instruction name, instruction
29
- end
30
- # Called when a comment is encountered.
31
- # @p comment The content of the comment
32
- def comment comment
33
- end
34
- # Handles a doctype declaration. Any attributes of the doctype which are
35
- # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
36
- # @p name the name of the doctype; EG, "me"
37
- # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
38
- # @p long_name the supplied long name, or nil. EG, "foo"
39
- # @p uri the uri of the doctype, or nil. EG, "bar"
40
- def doctype name, pub_sys, long_name, uri
41
- end
42
- # Called when the doctype is done
43
- def doctype_end
44
- end
45
- # If a doctype includes an ATTLIST declaration, it will cause this
46
- # method to be called. The content is the declaration itself, unparsed.
47
- # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
48
- # attr CDATA #REQUIRED". This is the same for all of the .*decl
49
- # methods.
50
- def attlistdecl element_name, attributes, raw_content
51
- end
52
- # <!ELEMENT ...>
53
- def elementdecl content
54
- end
55
- # <!ENTITY ...>
56
- # The argument passed to this method is an array of the entity
57
- # declaration. It can be in a number of formats, but in general it
58
- # returns (example, result):
59
- # <!ENTITY % YN '"Yes"'>
60
- # ["%", "YN", "'\"Yes\"'", "\""]
61
- # <!ENTITY % YN 'Yes'>
62
- # ["%", "YN", "'Yes'", "s"]
63
- # <!ENTITY WhatHeSaid "He said %YN;">
64
- # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
65
- # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
66
- # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
67
- # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
- # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
69
- # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
70
- # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
71
- def entitydecl content
72
- end
73
- # <!NOTATION ...>
74
- def notationdecl content
75
- end
76
- # Called when %foo; is encountered in a doctype declaration.
77
- # @p content "foo"
78
- def entity content
79
- end
80
- # Called when <![CDATA[ ... ]]> is encountered in a document.
81
- # @p content "..."
82
- def cdata content
83
- end
84
- # Called when an XML PI is encountered in the document.
85
- # EG: <?xml version="1.0" encoding="utf"?>
86
- # @p version the version attribute value. EG, "1.0"
87
- # @p encoding the encoding attribute value, or nil. EG, "utf"
88
- # @p standalone the standalone attribute value, or nil. EG, nil
89
- def xmldecl version, encoding, standalone
90
- end
91
- end
2
+ # A template for stream parser listeners.
3
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
5
+ # have to parse them out yourself.
6
+ module StreamListener
7
+ # Called when a tag is encountered.
8
+ # @p name the tag name
9
+ # @p attrs an array of arrays of attribute/value pairs, suitable for
10
+ # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
11
+ # will result in
12
+ # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
13
+ def tag_start name, attrs
14
+ end
15
+ # Called when the end tag is reached. In the case of <tag/>, tag_end
16
+ # will be called immidiately after tag_start
17
+ # @p the name of the tag
18
+ def tag_end name
19
+ end
20
+ # Called when text is encountered in the document
21
+ # @p text the text content.
22
+ def text text
23
+ end
24
+ # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
25
+ # @p name the instruction name; in the example, "xsl"
26
+ # @p instruction the rest of the instruction. In the example,
27
+ # "sheet='foo'"
28
+ def instruction name, instruction
29
+ end
30
+ # Called when a comment is encountered.
31
+ # @p comment The content of the comment
32
+ def comment comment
33
+ end
34
+ # Handles a doctype declaration. Any attributes of the doctype which are
35
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
36
+ # @p name the name of the doctype; EG, "me"
37
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
38
+ # @p long_name the supplied long name, or nil. EG, "foo"
39
+ # @p uri the uri of the doctype, or nil. EG, "bar"
40
+ def doctype name, pub_sys, long_name, uri
41
+ end
42
+ # Called when the doctype is done
43
+ def doctype_end
44
+ end
45
+ # If a doctype includes an ATTLIST declaration, it will cause this
46
+ # method to be called. The content is the declaration itself, unparsed.
47
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
48
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
49
+ # methods.
50
+ def attlistdecl element_name, attributes, raw_content
51
+ end
52
+ # <!ELEMENT ...>
53
+ def elementdecl content
54
+ end
55
+ # <!ENTITY ...>
56
+ # The argument passed to this method is an array of the entity
57
+ # declaration. It can be in a number of formats, but in general it
58
+ # returns (example, result):
59
+ # <!ENTITY % YN '"Yes"'>
60
+ # ["YN", "\"Yes\"", "%"]
61
+ # <!ENTITY % YN 'Yes'>
62
+ # ["YN", "Yes", "%"]
63
+ # <!ENTITY WhatHeSaid "He said %YN;">
64
+ # ["WhatHeSaid", "He said %YN;"]
65
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
66
+ # ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
67
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
+ # ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
69
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
70
+ # ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "gif"]
71
+ def entitydecl content
72
+ end
73
+ # <!NOTATION ...>
74
+ def notationdecl content
75
+ end
76
+ # Called when %foo; is encountered in a doctype declaration.
77
+ # @p content "foo"
78
+ def entity content
79
+ end
80
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
81
+ # @p content "..."
82
+ def cdata content
83
+ end
84
+ # Called when an XML PI is encountered in the document.
85
+ # EG: <?xml version="1.0" encoding="utf"?>
86
+ # @p version the version attribute value. EG, "1.0"
87
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
88
+ # @p standalone the standalone attribute value, or nil. EG, nil
89
+ def xmldecl version, encoding, standalone
90
+ end
91
+ end
92
92
  end
@@ -6,8 +6,7 @@ module REXML
6
6
  # Enumerable objects.
7
7
  def initialize(*enums)
8
8
  @gens = enums
9
- @biggest = @gens[0]
10
- @gens.each {|x| @biggest = x if x.size > @biggest.size }
9
+ @length = @gens.collect {|x| x.size }.max
11
10
  end
12
11
 
13
12
  # Returns the number of enumerated Enumerable objects, i.e. the size
@@ -24,8 +23,8 @@ module REXML
24
23
 
25
24
  # Enumerates rows of the Enumerable objects.
26
25
  def each
27
- @biggest.zip( *@gens ) {|a|
28
- yield(*a[1..-1])
26
+ @length.times {|i|
27
+ yield @gens.collect {|x| x[i]}
29
28
  }
30
29
  self
31
30
  end
data/lib/rexml/text.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'rexml/security'
1
2
  require 'rexml/entity'
2
3
  require 'rexml/doctype'
3
4
  require 'rexml/child'
@@ -18,25 +19,57 @@ module REXML
18
19
  # If +raw+ is true, then REXML leaves the value alone
19
20
  attr_accessor :raw
20
21
 
21
- ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
22
- NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
22
+ NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
23
+ NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
24
+ VALID_CHAR = [
25
+ 0x9, 0xA, 0xD,
26
+ (0x20..0xD7FF),
27
+ (0xE000..0xFFFD),
28
+ (0x10000..0x10FFFF)
29
+ ]
30
+
31
+ if String.method_defined? :encode
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Fixnum
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
42
+ else
43
+ VALID_XML_CHARS = /^(
44
+ [\x09\x0A\x0D\x20-\x7E] # ASCII
45
+ | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
46
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
47
+ | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
48
+ | \xEF[\x80-\xBE]{2} #
49
+ | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
50
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
51
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
52
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
53
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
54
+ )*$/nx;
55
+ end
23
56
 
24
57
  # Constructor
25
58
  # +arg+ if a String, the content is set to the String. If a Text,
26
- # the object is shallowly cloned.
59
+ # the object is shallowly cloned.
27
60
  #
28
61
  # +respect_whitespace+ (boolean, false) if true, whitespace is
29
62
  # respected
30
63
  #
31
64
  # +parent+ (nil) if this is a Parent object, the parent
32
- # will be set to this.
65
+ # will be set to this.
33
66
  #
34
67
  # +raw+ (nil) This argument can be given three values.
35
- # If true, then the value of used to construct this object is expected to
36
- # contain no unescaped XML markup, and REXML will not change the text. If
68
+ # If true, then the value of used to construct this object is expected to
69
+ # contain no unescaped XML markup, and REXML will not change the text. If
37
70
  # this value is false, the string may contain any characters, and REXML will
38
71
  # escape any and all defined entities whose values are contained in the
39
- # text. If this value is nil (the default), then the raw value of the
72
+ # text. If this value is nil (the default), then the raw value of the
40
73
  # parent will be used as the raw value for this node. If there is no raw
41
74
  # value for the parent, and no value is supplied, the default is false.
42
75
  # Use this field if you have entities defined for some text, and you don't
@@ -56,25 +89,24 @@ module REXML
56
89
  # Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
57
90
  # In the last example, the +entity_filter+ argument is ignored.
58
91
  #
59
- # +pattern+ INTERNAL USE ONLY
60
- def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
61
- entity_filter=nil, illegal=ILLEGAL )
92
+ # +illegal+ INTERNAL USE ONLY
93
+ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
94
+ entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
62
95
 
63
96
  @raw = false
97
+ @parent = nil
64
98
 
65
99
  if parent
66
100
  super( parent )
67
- @raw = parent.raw
68
- else
69
- @parent = nil
101
+ @raw = parent.raw
70
102
  end
71
103
 
72
104
  @raw = raw unless raw.nil?
73
105
  @entity_filter = entity_filter
74
- @normalized = @unnormalized = nil
106
+ clear_cache
75
107
 
76
108
  if arg.kind_of? String
77
- @string = arg.clone
109
+ @string = arg.dup
78
110
  @string.squeeze!(" \n\t") unless respect_whitespace
79
111
  elsif arg.kind_of? Text
80
112
  @string = arg.to_s
@@ -85,10 +117,55 @@ module REXML
85
117
 
86
118
  @string.gsub!( /\r\n?/, "\n" )
87
119
 
88
- # check for illegal characters
89
- if @raw
90
- if @string =~ illegal
91
- raise "Illegal character '#{$1}' in raw string \"#{@string}\""
120
+ Text.check(@string, illegal, doctype) if @raw
121
+ end
122
+
123
+ def parent= parent
124
+ super(parent)
125
+ Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
126
+ end
127
+
128
+ # check for illegal characters
129
+ def Text.check string, pattern, doctype
130
+
131
+ # illegal anywhere
132
+ if string !~ VALID_XML_CHARS
133
+ if String.method_defined? :encode
134
+ string.chars.each do |c|
135
+ case c.ord
136
+ when *VALID_CHAR
137
+ else
138
+ raise "Illegal character #{c.inspect} in raw string \"#{string}\""
139
+ end
140
+ end
141
+ else
142
+ string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
143
+ case c.unpack('U')
144
+ when *VALID_CHAR
145
+ else
146
+ raise "Illegal character #{c.inspect} in raw string \"#{string}\""
147
+ end
148
+ end
149
+ end
150
+ end
151
+
152
+ # context sensitive
153
+ string.scan(pattern) do
154
+ if $1[-1] != ?;
155
+ raise "Illegal character '#{$1}' in raw string \"#{string}\""
156
+ elsif $1[0] == ?&
157
+ if $5 and $5[0] == ?#
158
+ case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
159
+ when *VALID_CHAR
160
+ else
161
+ raise "Illegal character '#{$1}' in raw string \"#{string}\""
162
+ end
163
+ # FIXME: below can't work but this needs API change.
164
+ # elsif @parent and $3 and !SUBSTITUTES.include?($1)
165
+ # if !doctype or !doctype.entities.has_key?($3)
166
+ # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
167
+ # end
168
+ end
92
169
  end
93
170
  end
94
171
  end
@@ -109,8 +186,13 @@ module REXML
109
186
 
110
187
  # Appends text to this text node. The text is appended in the +raw+ mode
111
188
  # of this text node.
189
+ #
190
+ # +returns+ the text itself to enable method chain like
191
+ # 'text << "XXX" << "YYY"'.
112
192
  def <<( to_append )
113
193
  @string << to_append.gsub( /\r\n?/, "\n" )
194
+ clear_cache
195
+ self
114
196
  end
115
197
 
116
198
 
@@ -120,17 +202,24 @@ module REXML
120
202
  to_s() <=> other.to_s
121
203
  end
122
204
 
205
+ def doctype
206
+ if @parent
207
+ doc = @parent.document
208
+ doc.doctype if doc
209
+ end
210
+ end
211
+
123
212
  REFERENCE = /#{Entity::REFERENCE}/
124
213
  # Returns the string value of this text node. This string is always
125
214
  # escaped, meaning that it is a valid XML text node string, and all
126
215
  # entities that can be escaped, have been inserted. This method respects
127
216
  # the entity filter set in the constructor.
128
- #
129
- # # Assume that the entity "s" is defined to be "sean", and that the
217
+ #
218
+ # # Assume that the entity "s" is defined to be "sean", and that the
130
219
  # # entity "r" is defined to be "russell"
131
- # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
220
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
132
221
  # t.to_s #-> "&lt; &amp; &s; russell"
133
- # t = Text.new( "< & &s; russell", false, nil, false )
222
+ # t = Text.new( "< & &s; russell", false, nil, false )
134
223
  # t.to_s #-> "&lt; &amp; &s; russell"
135
224
  # u = Text.new( "sean russell", false, nil, true )
136
225
  # u.to_s #-> "sean russell"
@@ -138,12 +227,6 @@ module REXML
138
227
  return @string if @raw
139
228
  return @normalized if @normalized
140
229
 
141
- doctype = nil
142
- if @parent
143
- doc = @parent.document
144
- doctype = doc.doctype if doc
145
- end
146
-
147
230
  @normalized = Text::normalize( @string, doctype, @entity_filter )
148
231
  end
149
232
 
@@ -156,25 +239,20 @@ module REXML
156
239
  # console. This ignores the 'raw' attribute setting, and any
157
240
  # entity_filter.
158
241
  #
159
- # # Assume that the entity "s" is defined to be "sean", and that the
242
+ # # Assume that the entity "s" is defined to be "sean", and that the
160
243
  # # entity "r" is defined to be "russell"
161
- # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
244
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
162
245
  # t.value #-> "< & sean russell"
163
246
  # t = Text.new( "< & &s; russell", false, nil, false )
164
247
  # t.value #-> "< & sean russell"
165
248
  # u = Text.new( "sean russell", false, nil, true )
166
249
  # u.value #-> "sean russell"
167
250
  def value
168
- @unnormalized if @unnormalized
169
- doctype = nil
170
- if @parent
171
- doc = @parent.document
172
- doctype = doc.doctype if doc
173
- end
251
+ return @unnormalized if @unnormalized
174
252
  @unnormalized = Text::unnormalize( @string, doctype )
175
253
  end
176
254
 
177
- # Sets the contents of this text node. This expects the text to be
255
+ # Sets the contents of this text node. This expects the text to be
178
256
  # unnormalized. It returns self.
179
257
  #
180
258
  # e = Element.new( "a" )
@@ -183,11 +261,10 @@ module REXML
183
261
  # e[0].value = "<a>" # <a>&lt;a&gt;</a>
184
262
  def value=( val )
185
263
  @string = val.gsub( /\r\n?/, "\n" )
186
- @unnormalized = nil
187
- @normalized = nil
264
+ clear_cache
188
265
  @raw = false
189
266
  end
190
-
267
+
191
268
  def wrap(string, width, addnewline=false)
192
269
  # Recursively wrap string at width.
193
270
  return string if string.length <= width
@@ -202,7 +279,7 @@ module REXML
202
279
  def indent_text(string, level=1, style="\t", indentfirstline=true)
203
280
  return string if level < 0
204
281
  new_string = ''
205
- string.each { |line|
282
+ string.each_line { |line|
206
283
  indent_string = style * level
207
284
  new_line = (indent_string + line).sub(/[\s]+$/,'')
208
285
  new_string << new_line
@@ -210,11 +287,11 @@ module REXML
210
287
  new_string.strip! unless indentfirstline
211
288
  return new_string
212
289
  end
213
-
290
+
214
291
  # == DEPRECATED
215
292
  # See REXML::Formatters
216
293
  #
217
- def write( writer, indent=-1, transitive=false, ie_hack=false )
294
+ def write( writer, indent=-1, transitive=false, ie_hack=false )
218
295
  Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
219
296
  formatter = if indent > -1
220
297
  REXML::Formatters::Pretty.new( indent )
@@ -258,6 +335,12 @@ module REXML
258
335
  out << copy
259
336
  end
260
337
 
338
+ private
339
+ def clear_cache
340
+ @normalized = nil
341
+ @unnormalized = nil
342
+ end
343
+
261
344
  # Reads text, substituting entities
262
345
  def Text::read_with_substitution( input, illegal=nil )
263
346
  copy = input.clone
@@ -265,7 +348,7 @@ module REXML
265
348
  if copy =~ illegal
266
349
  raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
267
350
  end if illegal
268
-
351
+
269
352
  copy.gsub!( /\r\n?/, "\n" )
270
353
  if copy.include? ?&
271
354
  copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
@@ -273,7 +356,7 @@ module REXML
273
356
  copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
274
357
  copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
275
358
  copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
276
- copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
359
+ copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {
277
360
  m=$1
278
361
  #m='0' if m==''
279
362
  m = "0#{m}" if m[0] == ?x
@@ -293,9 +376,9 @@ module REXML
293
376
  if doctype
294
377
  # Replace all ampersands that aren't part of an entity
295
378
  doctype.entities.each_value do |entity|
296
- copy = copy.gsub( entity.value,
297
- "&#{entity.name};" ) if entity.value and
298
- not( entity_filter and entity_filter.include?(entity) )
379
+ copy = copy.gsub( entity.value,
380
+ "&#{entity.name};" ) if entity.value and
381
+ not( entity_filter and entity_filter.include?(entity.name) )
299
382
  end
300
383
  else
301
384
  # Replace all ampersands that aren't part of an entity
@@ -308,37 +391,35 @@ module REXML
308
391
 
309
392
  # Unescapes all possible entities
310
393
  def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
311
- rv = string.clone
312
- rv.gsub!( /\r\n?/, "\n" )
313
- matches = rv.scan( REFERENCE )
314
- return rv if matches.size == 0
315
- rv.gsub!( NUMERICENTITY ) {|m|
316
- m=$1
317
- m = "0#{m}" if m[0] == ?x
318
- [Integer(m)].pack('U*')
394
+ sum = 0
395
+ string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
396
+ s = Text.expand($&, doctype, filter)
397
+ if sum + s.bytesize > Security.entity_expansion_text_limit
398
+ raise "entity expansion has grown too large"
399
+ else
400
+ sum += s.bytesize
401
+ end
402
+ s
319
403
  }
320
- matches.collect!{|x|x[0]}.compact!
321
- if matches.size > 0
322
- if doctype
323
- matches.each do |entity_reference|
324
- unless filter and filter.include?(entity_reference)
325
- entity_value = doctype.entity( entity_reference )
326
- re = /&#{entity_reference};/
327
- rv.gsub!( re, entity_value ) if entity_value
328
- end
329
- end
404
+ end
405
+
406
+ def Text.expand(ref, doctype, filter)
407
+ if ref[1] == ?#
408
+ if ref[2] == ?x
409
+ [ref[3...-1].to_i(16)].pack('U*')
330
410
  else
331
- matches.each do |entity_reference|
332
- unless filter and filter.include?(entity_reference)
333
- entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
334
- re = /&#{entity_reference};/
335
- rv.gsub!( re, entity_value.value ) if entity_value
336
- end
337
- end
411
+ [ref[2...-1].to_i].pack('U*')
338
412
  end
339
- rv.gsub!( /&amp;/, '&' )
413
+ elsif ref == '&amp;'
414
+ '&'
415
+ elsif filter and filter.include?( ref[1...-1] )
416
+ ref
417
+ elsif doctype
418
+ doctype.entity( ref[1...-1] ) or ref
419
+ else
420
+ entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
421
+ entity_value ? entity_value.value : ref
340
422
  end
341
- rv
342
423
  end
343
424
  end
344
425
  end