rexml 3.2.7 → 3.3.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8c54a60c677a865a023fc0bf1fc403419b530cbc7b306bc7da18f1489e02cd79
4
- data.tar.gz: 5dbbae05d90151d6d4ea9d8b5a4a3097e144ab79bb346c30e75c3d62cbc05dd7
3
+ metadata.gz: 1a402bb00d8bf352521fb6ca5354ba92a22d110feedcba40a50e2de5abad277a
4
+ data.tar.gz: 51f7b5893eef8d8183eb14c719064368029b18c9909b3454047e308c7425ce5b
5
5
  SHA512:
6
- metadata.gz: 5579b5fe5f6a5488d78d0ed19cdad1498aeb44bbe0b72dca9895391d1a3d1aaaed353fa14e7366d3c08ab6f723e4bb11d6cdb7a667fd310d5cdcec954bb0e77e
7
- data.tar.gz: 2db805399a3cf3c6cf5bced1157e3c84539c5f3d12d806db951c5c3fd6aaadb86b3a4feaa0ea60a2771432009f873df3be3a688947156be9a63039a5f9bf449c
6
+ metadata.gz: ff091fe421748562931d65301e66dc1d4d313e1c28cce753bc9f31a1f9bac65c0b4939db70117e47f2c3158daa24b708e2519a98a9638114f4e5a1c0d1265e7c
7
+ data.tar.gz: 720bc72a86eacebbe9a990152d4d0dfcde2e50c71b3fbabaaba44dec91b2f6ff7ca6180b86622cf0ffb36355ab5e5d43f8948e67c70ab4fca1f8bf0882a3585d
data/NEWS.md CHANGED
@@ -1,5 +1,274 @@
1
1
  # News
2
2
 
3
+ ## 3.3.7 - 2024-09-04 {#version-3-3-7}
4
+
5
+ ### Improvements
6
+
7
+ * Added local entity expansion limit methods
8
+ * GH-192
9
+ * GH-202
10
+ * Reported by takuya kodama.
11
+ * Patch by NAITOH Jun.
12
+
13
+ * Removed explicit strscan dependency
14
+ * GH-204
15
+ * Patch by Bo Anderson.
16
+
17
+ ### Thanks
18
+
19
+ * takuya kodama
20
+
21
+ * NAITOH Jun
22
+
23
+ * Bo Anderson
24
+
25
+ ## 3.3.6 - 2024-08-22 {#version-3-3-6}
26
+
27
+ ### Improvements
28
+
29
+ * Removed duplicated entity expansions for performance.
30
+ * GH-194
31
+ * Patch by Viktor Ivarsson.
32
+
33
+ * Improved namespace conflicted attribute check performance. It was
34
+ too slow for deep elements.
35
+ * Reported by l33thaxor.
36
+
37
+ ### Fixes
38
+
39
+ * Fixed a bug that default entity expansions are counted for
40
+ security check. Default entity expansions should not be counted
41
+ because they don't have a security risk.
42
+ * GH-198
43
+ * GH-199
44
+ * Patch Viktor Ivarsson
45
+
46
+ * Fixed a parser bug that parameter entity references in internal
47
+ subsets are expanded. It's not allowed in the XML specification.
48
+ * GH-191
49
+ * Patch by NAITOH Jun.
50
+
51
+ * Fixed a stream parser bug that user-defined entity references in
52
+ text aren't expanded.
53
+ * GH-200
54
+ * Patch by NAITOH Jun.
55
+
56
+ ### Thanks
57
+
58
+ * Viktor Ivarsson
59
+
60
+ * NAITOH Jun
61
+
62
+ * l33thaxor
63
+
64
+ ## 3.3.5 - 2024-08-12 {#version-3-3-5}
65
+
66
+ ### Fixes
67
+
68
+ * Fixed a bug that `REXML::Security.entity_expansion_text_limit`
69
+ check has wrong text size calculation in SAX and pull parsers.
70
+ * GH-193
71
+ * GH-195
72
+ * Reported by Viktor Ivarsson.
73
+ * Patch by NAITOH Jun.
74
+
75
+ ### Thanks
76
+
77
+ * Viktor Ivarsson
78
+
79
+ * NAITOH Jun
80
+
81
+ ## 3.3.4 - 2024-08-01 {#version-3-3-4}
82
+
83
+ ### Fixes
84
+
85
+ * Fixed a bug that `REXML::Security` isn't defined when
86
+ `REXML::Parsers::StreamParser` is used and
87
+ `rexml/parsers/streamparser` is only required.
88
+ * GH-189
89
+ * Patch by takuya kodama.
90
+
91
+ ### Thanks
92
+
93
+ * takuya kodama
94
+
95
+ ## 3.3.3 - 2024-08-01 {#version-3-3-3}
96
+
97
+ ### Improvements
98
+
99
+ * Added support for detecting invalid XML that has unsupported
100
+ content before root element
101
+ * GH-184
102
+ * Patch by NAITOH Jun.
103
+
104
+ * Added support for `REXML::Security.entity_expansion_limit=` and
105
+ `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
106
+ parsers
107
+ * GH-187
108
+ * Patch by NAITOH Jun.
109
+
110
+ * Added more tests for invalid XMLs.
111
+ * GH-183
112
+ * Patch by Watson.
113
+
114
+ * Added more performance tests.
115
+ * Patch by Watson.
116
+
117
+ * Improved parse performance.
118
+ * GH-186
119
+ * Patch by tomoya ishida.
120
+
121
+ ### Thanks
122
+
123
+ * NAITOH Jun
124
+
125
+ * Watson
126
+
127
+ * tomoya ishida
128
+
129
+ ## 3.3.2 - 2024-07-16 {#version-3-3-2}
130
+
131
+ ### Improvements
132
+
133
+ * Improved parse performance.
134
+ * GH-160
135
+ * Patch by NAITOH Jun.
136
+
137
+ * Improved parse performance.
138
+ * GH-169
139
+ * GH-170
140
+ * GH-171
141
+ * GH-172
142
+ * GH-173
143
+ * GH-174
144
+ * GH-175
145
+ * GH-176
146
+ * GH-177
147
+ * Patch by Watson.
148
+
149
+ * Added support for raising a parse exception when an XML has extra
150
+ content after the root element.
151
+ * GH-161
152
+ * Patch by NAITOH Jun.
153
+
154
+ * Added support for raising a parse exception when an XML
155
+ declaration exists in wrong position.
156
+ * GH-162
157
+ * Patch by NAITOH Jun.
158
+
159
+ * Removed needless a space after XML declaration in pretty print mode.
160
+ * GH-164
161
+ * Patch by NAITOH Jun.
162
+
163
+ * Stopped to emit `:text` event after the root element.
164
+ * GH-167
165
+ * Patch by NAITOH Jun.
166
+
167
+ ### Fixes
168
+
169
+ * Fixed a bug that SAX2 parser doesn't expand predefined entities for
170
+ `characters` callback.
171
+ * GH-168
172
+ * Patch by NAITOH Jun.
173
+
174
+ ### Thanks
175
+
176
+ * NAITOH Jun
177
+
178
+ * Watson
179
+
180
+ ## 3.3.1 - 2024-06-25 {#version-3-3-1}
181
+
182
+ ### Improvements
183
+
184
+ * Added support for detecting malformed top-level comments.
185
+ * GH-145
186
+ * Patch by Hiroya Fujinami.
187
+
188
+ * Improved `REXML::Element#attribute` performance.
189
+ * GH-146
190
+ * Patch by Hiroya Fujinami.
191
+
192
+ * Added support for detecting malformed `<!-->` comments.
193
+ * GH-147
194
+ * Patch by Hiroya Fujinami.
195
+
196
+ * Added support for detecting unclosed `DOCTYPE`.
197
+ * GH-152
198
+ * Patch by Hiroya Fujinami.
199
+
200
+ * Added `changlog_uri` metadata to gemspec.
201
+ * GH-156
202
+ * Patch by fynsta.
203
+
204
+ * Improved parse performance.
205
+ * GH-157
206
+ * GH-158
207
+ * Patch by NAITOH Jun.
208
+
209
+ ### Fixes
210
+
211
+ * Fixed a bug that large XML can't be parsed.
212
+ * GH-154
213
+ * Patch by NAITOH Jun.
214
+
215
+ * Fixed a bug that private constants are visible.
216
+ * GH-155
217
+ * Patch by NAITOH Jun.
218
+
219
+ ### Thanks
220
+
221
+ * Hiroya Fujinami
222
+
223
+ * NAITOH Jun
224
+
225
+ * fynsta
226
+
227
+ ## 3.3.0 - 2024-06-11 {#version-3-3-0}
228
+
229
+ ### Improvements
230
+
231
+ * Added support for strscan 0.7.0 installed with Ruby 2.6.
232
+ * GH-142
233
+ * Reported by Fernando Trigoso.
234
+
235
+ ### Thanks
236
+
237
+ * Fernando Trigoso
238
+
239
+ ## 3.2.9 - 2024-06-09 {#version-3-2-9}
240
+
241
+ ### Improvements
242
+
243
+ * Added support for old strscan.
244
+ * GH-132
245
+ * Reported by Adam.
246
+
247
+ * Improved attribute value parse performance.
248
+ * GH-135
249
+ * Patch by NAITOH Jun.
250
+
251
+ * Improved `REXML::Node#each_recursive` performance.
252
+ * GH-134
253
+ * GH-139
254
+ * Patch by Hiroya Fujinami.
255
+
256
+ * Improved text parse performance.
257
+ * Reported by mprogrammer.
258
+
259
+ ### Thanks
260
+
261
+ * Adam
262
+ * NAITOH Jun
263
+ * Hiroya Fujinami
264
+ * mprogrammer
265
+
266
+ ## 3.2.8 - 2024-05-16 {#version-3-2-8}
267
+
268
+ ### Fixes
269
+
270
+ * Suppressed a warning
271
+
3
272
  ## 3.2.7 - 2024-05-16 {#version-3-2-7}
4
273
 
5
274
  ### Improvements
@@ -24,7 +293,7 @@
24
293
 
25
294
  * Improved parse performance when an attribute has many `<`s.
26
295
 
27
- * GH-124
296
+ * GH-126
28
297
 
29
298
  ### Fixes
30
299
 
@@ -52,7 +321,14 @@
52
321
  * Reported by DuKewu.
53
322
  * Patch by NAITOH Jun.
54
323
 
55
- w## 3.2.6 - 2023-07-27 {#version-3-2-6}
324
+ ### Thanks
325
+
326
+ * NAITOH Jun
327
+ * flatisland
328
+ * jcavalieri
329
+ * DuKewu
330
+
331
+ ## 3.2.6 - 2023-07-27 {#version-3-2-6}
56
332
 
57
333
  ### Improvements
58
334
 
@@ -148,8 +148,9 @@ module REXML
148
148
  # have been expanded to their values
149
149
  def value
150
150
  return @unnormalized if @unnormalized
151
- @unnormalized = Text::unnormalize( @normalized, doctype )
152
- @unnormalized
151
+
152
+ @unnormalized = Text::unnormalize(@normalized, doctype,
153
+ entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
153
154
  end
154
155
 
155
156
  # The normalized value of this attribute. That is, the attribute with
@@ -91,6 +91,8 @@ module REXML
91
91
  #
92
92
  def initialize( source = nil, context = {} )
93
93
  @entity_expansion_count = 0
94
+ @entity_expansion_limit = Security.entity_expansion_limit
95
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
94
96
  super()
95
97
  @context = context
96
98
  return if source.nil?
@@ -431,10 +433,12 @@ module REXML
431
433
  end
432
434
 
433
435
  attr_reader :entity_expansion_count
436
+ attr_writer :entity_expansion_limit
437
+ attr_accessor :entity_expansion_text_limit
434
438
 
435
439
  def record_entity_expansion
436
440
  @entity_expansion_count += 1
437
- if @entity_expansion_count > Security.entity_expansion_limit
441
+ if @entity_expansion_count > @entity_expansion_limit
438
442
  raise "number of entity expansions exceeded, processing aborted."
439
443
  end
440
444
  end
data/lib/rexml/element.rb CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
7
7
  require_relative "parseexception"
8
8
 
9
9
  module REXML
10
- # An implementation note about namespaces:
11
- # As we parse, when we find namespaces we put them in a hash and assign
12
- # them a unique ID. We then convert the namespace prefix for the node
13
- # to the unique ID. This makes namespace lookup much faster for the
14
- # cost of extra memory use. We save the namespace prefix for the
15
- # context node and convert it back when we write it.
16
- @@namespaces = {}
17
-
18
10
  # An \REXML::Element object represents an XML element.
19
11
  #
20
12
  # An element:
@@ -449,9 +441,14 @@ module REXML
449
441
  # Related: #root_node, #document.
450
442
  #
451
443
  def root
452
- return elements[1] if self.kind_of? Document
453
- return self if parent.kind_of? Document or parent.nil?
454
- return parent.root
444
+ target = self
445
+ while target
446
+ return target.elements[1] if target.kind_of? Document
447
+ parent = target.parent
448
+ return target if parent.kind_of? Document or parent.nil?
449
+ target = parent
450
+ end
451
+ nil
455
452
  end
456
453
 
457
454
  # :call-seq:
@@ -627,8 +624,12 @@ module REXML
627
624
  else
628
625
  prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
629
626
  end
630
- ns = attributes[ prefix ]
631
- ns = parent.namespace(prefix) if ns.nil? and parent
627
+ ns = nil
628
+ target = self
629
+ while ns.nil? and target
630
+ ns = target.attributes[prefix]
631
+ target = target.parent
632
+ end
632
633
  ns = '' if ns.nil? and prefix == 'xmlns'
633
634
  return ns
634
635
  end
@@ -1284,16 +1285,11 @@ module REXML
1284
1285
  # document.root.attribute("x", "a") # => a:x='a:x'
1285
1286
  #
1286
1287
  def attribute( name, namespace=nil )
1287
- prefix = nil
1288
- if namespaces.respond_to? :key
1289
- prefix = namespaces.key(namespace) if namespace
1290
- else
1291
- prefix = namespaces.index(namespace) if namespace
1292
- end
1288
+ prefix = namespaces.key(namespace) if namespace
1293
1289
  prefix = nil if prefix == 'xmlns'
1294
1290
 
1295
1291
  ret_val =
1296
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
1292
+ attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
1297
1293
 
1298
1294
  return ret_val unless ret_val.nil?
1299
1295
  return nil if prefix.nil?
@@ -2388,17 +2384,6 @@ module REXML
2388
2384
  elsif old_attr.kind_of? Hash
2389
2385
  old_attr[value.prefix] = value
2390
2386
  elsif old_attr.prefix != value.prefix
2391
- # Check for conflicting namespaces
2392
- if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
2393
- old_namespace = old_attr.namespace
2394
- new_namespace = value.namespace
2395
- if old_namespace == new_namespace
2396
- raise ParseException.new(
2397
- "Namespace conflict in adding attribute \"#{value.name}\": "+
2398
- "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
2399
- "prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
2400
- end
2401
- end
2402
2387
  store value.name, {old_attr.prefix => old_attr,
2403
2388
  value.prefix => value}
2404
2389
  else
data/lib/rexml/entity.rb CHANGED
@@ -12,6 +12,7 @@ module REXML
12
12
  EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
13
13
  NDATADECL = "\\s+NDATA\\s+#{NAME}"
14
14
  PEREFERENCE = "%#{NAME};"
15
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
15
16
  ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
16
17
  PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
17
18
  ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
@@ -19,7 +20,7 @@ module REXML
19
20
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
20
21
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
21
22
 
22
- attr_reader :name, :external, :ref, :ndata, :pubid
23
+ attr_reader :name, :external, :ref, :ndata, :pubid, :value
23
24
 
24
25
  # Create a new entity. Simple entities can be constructed by passing a
25
26
  # name, value to the constructor; this creates a generic, plain entity
@@ -68,14 +69,14 @@ module REXML
68
69
  end
69
70
 
70
71
  # Evaluates to the unnormalized value of this entity; that is, replacing
71
- # all entities -- both %ent; and &ent; entities. This differs from
72
- # +value()+ in that +value+ only replaces %ent; entities.
72
+ # &ent; entities.
73
73
  def unnormalized
74
- document.record_entity_expansion unless document.nil?
75
- v = value()
76
- return nil if v.nil?
77
- @unnormalized = Text::unnormalize(v, parent)
78
- @unnormalized
74
+ document&.record_entity_expansion
75
+
76
+ return nil if @value.nil?
77
+
78
+ @unnormalized = Text::unnormalize(@value, parent,
79
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
79
80
  end
80
81
 
81
82
  #once :unnormalized
@@ -121,46 +122,6 @@ module REXML
121
122
  write rv
122
123
  rv
123
124
  end
124
-
125
- PEREFERENCE_RE = /#{PEREFERENCE}/um
126
- # Returns the value of this entity. At the moment, only internal entities
127
- # are processed. If the value contains internal references (IE,
128
- # %blah;), those are replaced with their values. IE, if the doctype
129
- # contains:
130
- # <!ENTITY % foo "bar">
131
- # <!ENTITY yada "nanoo %foo; nanoo>
132
- # then:
133
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
134
- def value
135
- @resolved_value ||= resolve_value
136
- end
137
-
138
- def parent=(other)
139
- @resolved_value = nil
140
- super
141
- end
142
-
143
- private
144
- def resolve_value
145
- return nil if @value.nil?
146
- return @value unless @value.match?(PEREFERENCE_RE)
147
-
148
- matches = @value.scan(PEREFERENCE_RE)
149
- rv = @value.clone
150
- if @parent
151
- sum = 0
152
- matches.each do |entity_reference|
153
- entity_value = @parent.entity( entity_reference[0] )
154
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
155
- raise "entity expansion has grown too large"
156
- else
157
- sum += entity_value.bytesize
158
- end
159
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
160
- end
161
- end
162
- rv
163
- end
164
125
  end
165
126
 
166
127
  # This is a set of entity constants -- the ones defined in the XML
@@ -111,7 +111,7 @@ module REXML
111
111
  # itself, then we don't need a carriage return... which makes this
112
112
  # logic more complex.
113
113
  node.children.each { |child|
114
- next if child == node.children[-1] and child.instance_of?(Text)
114
+ next if child.instance_of?(Text)
115
115
  unless child == node.children[0] or child.instance_of?(Text) or
116
116
  (child == node.children[1] and !node.children[0].writethis)
117
117
  output << "\n"
data/lib/rexml/node.rb CHANGED
@@ -52,10 +52,14 @@ module REXML
52
52
 
53
53
  # Visit all subnodes of +self+ recursively
54
54
  def each_recursive(&block) # :yields: node
55
- self.elements.each {|node|
56
- block.call(node)
57
- node.each_recursive(&block)
58
- }
55
+ stack = []
56
+ each { |child| stack.unshift child if child.node_type == :element }
57
+ until stack.empty?
58
+ child = stack.pop
59
+ yield child
60
+ n = stack.size
61
+ child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
62
+ end
59
63
  end
60
64
 
61
65
  # Find (and return) first subnode (recursively) for which the block