rexml 3.2.6 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2583ae302aa5e698f0887a689c416e5debe0533ac472a9f96fce6a8912040fd8
4
- data.tar.gz: b0ffa6301fd899969a78e060ccaeafebfc2169e3c63ff499ebc6170468866475
3
+ metadata.gz: 582bb5339257c81f2ce9c076155c01d7adfe8fb169c09bc7f5f489f6a76bca80
4
+ data.tar.gz: 160de8899d8d1f995bafca23631e9e4ab928ebbffa21684e3b61dad805a6187b
5
5
  SHA512:
6
- metadata.gz: f63fb0b84ef51e790cc6310244f2106d8c47ec9a00687c58c743afda82b60be9986d503c6f56f947db06f6758707facccd03405c4d1009376e856080aa26d0e4
7
- data.tar.gz: db62bea7391837a7ab4cfc5cb5a412ed4deb8d232653ca66d93a323a5a76383eed520cd4ced5b20204f29b04e84678791cd6f807195868f5d4a5e519a73d2aaf
6
+ metadata.gz: e2b095792523f54301e8a6af2f1682a9ad24d92cdd5d94c9e6088b27520e3c03b68fe06061b6ff2fd96b001b9cb947c57e4095244d83206a83fc2a1829dd4243
7
+ data.tar.gz: 4f335d2b1e58c1da233c3f0a0588def502c8cb2660633e0e06b4d0930bbcedcaae36b52dc550923704b4525d94a1011f4b5f4e87a81e5d689cce24ee89210a23
data/NEWS.md CHANGED
@@ -1,5 +1,404 @@
1
1
  # News
2
2
 
3
+ ## 3.4.0 - 2024-12-15 {#version-3-4-0}
4
+
5
+ ### Improvement
6
+
7
+ * Improved performance.
8
+ * GH-216
9
+ * Patch by NAITOH Jun
10
+
11
+ * JRuby: Improved parse performance.
12
+ * GH-219
13
+ * Patch by João Duarte
14
+
15
+ * Added support for reusing pull parser.
16
+ * GH-214
17
+ * GH-220
18
+ * Patch by Dmitry Pogrebnoy
19
+
20
+ * Improved error handling when source is `IO`.
21
+ * GH-221
22
+ * Patch by NAITOH Jun
23
+
24
+ ### Thanks
25
+
26
+ * NAITOH Jun
27
+
28
+ * João Duarte
29
+
30
+ * Dmitry Pogrebnoy
31
+
32
+ ## 3.3.9 - 2024-10-24 {#version-3-3-9}
33
+
34
+ ### Improvements
35
+
36
+ * Improved performance.
37
+ * GH-210
38
+ * Patch by NAITOH Jun.
39
+
40
+ ### Fixes
41
+
42
+ * Fixed a parse bug for text only invalid XML.
43
+ * GH-215
44
+ * Patch by NAITOH Jun.
45
+
46
+ * Fixed a parse bug that `&#0x...;` is accepted as a character
47
+ reference.
48
+
49
+ ### Thanks
50
+
51
+ * NAITOH Jun
52
+
53
+ ## 3.3.8 - 2024-09-29 {#version-3-3-8}
54
+
55
+ ### Improvements
56
+
57
+ * SAX2: Improve parse performance.
58
+ * GH-207
59
+ * Patch by NAITOH Jun.
60
+
61
+ ### Fixes
62
+
63
+ * Fixed a bug that unexpected attribute namespace conflict error for
64
+ the predefined "xml" namespace is reported.
65
+ * GH-208
66
+ * Patch by KITAITI Makoto
67
+
68
+ ### Thanks
69
+
70
+ * NAITOH Jun
71
+
72
+ * KITAITI Makoto
73
+
74
+ ## 3.3.7 - 2024-09-04 {#version-3-3-7}
75
+
76
+ ### Improvements
77
+
78
+ * Added local entity expansion limit methods
79
+ * GH-192
80
+ * GH-202
81
+ * Reported by takuya kodama.
82
+ * Patch by NAITOH Jun.
83
+
84
+ * Removed explicit strscan dependency
85
+ * GH-204
86
+ * Patch by Bo Anderson.
87
+
88
+ ### Thanks
89
+
90
+ * takuya kodama
91
+
92
+ * NAITOH Jun
93
+
94
+ * Bo Anderson
95
+
96
+ ## 3.3.6 - 2024-08-22 {#version-3-3-6}
97
+
98
+ ### Improvements
99
+
100
+ * Removed duplicated entity expansions for performance.
101
+ * GH-194
102
+ * Patch by Viktor Ivarsson.
103
+
104
+ * Improved namespace conflicted attribute check performance. It was
105
+ too slow for deep elements.
106
+ * Reported by l33thaxor.
107
+
108
+ ### Fixes
109
+
110
+ * Fixed a bug that default entity expansions are counted for
111
+ security check. Default entity expansions should not be counted
112
+ because they don't have a security risk.
113
+ * GH-198
114
+ * GH-199
115
+ * Patch Viktor Ivarsson
116
+
117
+ * Fixed a parser bug that parameter entity references in internal
118
+ subsets are expanded. It's not allowed in the XML specification.
119
+ * GH-191
120
+ * Patch by NAITOH Jun.
121
+
122
+ * Fixed a stream parser bug that user-defined entity references in
123
+ text aren't expanded.
124
+ * GH-200
125
+ * Patch by NAITOH Jun.
126
+
127
+ ### Thanks
128
+
129
+ * Viktor Ivarsson
130
+
131
+ * NAITOH Jun
132
+
133
+ * l33thaxor
134
+
135
+ ## 3.3.5 - 2024-08-12 {#version-3-3-5}
136
+
137
+ ### Fixes
138
+
139
+ * Fixed a bug that `REXML::Security.entity_expansion_text_limit`
140
+ check has wrong text size calculation in SAX and pull parsers.
141
+ * GH-193
142
+ * GH-195
143
+ * Reported by Viktor Ivarsson.
144
+ * Patch by NAITOH Jun.
145
+
146
+ ### Thanks
147
+
148
+ * Viktor Ivarsson
149
+
150
+ * NAITOH Jun
151
+
152
+ ## 3.3.4 - 2024-08-01 {#version-3-3-4}
153
+
154
+ ### Fixes
155
+
156
+ * Fixed a bug that `REXML::Security` isn't defined when
157
+ `REXML::Parsers::StreamParser` is used and
158
+ `rexml/parsers/streamparser` is only required.
159
+ * GH-189
160
+ * Patch by takuya kodama.
161
+
162
+ ### Thanks
163
+
164
+ * takuya kodama
165
+
166
+ ## 3.3.3 - 2024-08-01 {#version-3-3-3}
167
+
168
+ ### Improvements
169
+
170
+ * Added support for detecting invalid XML that has unsupported
171
+ content before root element
172
+ * GH-184
173
+ * Patch by NAITOH Jun.
174
+
175
+ * Added support for `REXML::Security.entity_expansion_limit=` and
176
+ `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
177
+ parsers
178
+ * GH-187
179
+ * Patch by NAITOH Jun.
180
+
181
+ * Added more tests for invalid XMLs.
182
+ * GH-183
183
+ * Patch by Watson.
184
+
185
+ * Added more performance tests.
186
+ * Patch by Watson.
187
+
188
+ * Improved parse performance.
189
+ * GH-186
190
+ * Patch by tomoya ishida.
191
+
192
+ ### Thanks
193
+
194
+ * NAITOH Jun
195
+
196
+ * Watson
197
+
198
+ * tomoya ishida
199
+
200
+ ## 3.3.2 - 2024-07-16 {#version-3-3-2}
201
+
202
+ ### Improvements
203
+
204
+ * Improved parse performance.
205
+ * GH-160
206
+ * Patch by NAITOH Jun.
207
+
208
+ * Improved parse performance.
209
+ * GH-169
210
+ * GH-170
211
+ * GH-171
212
+ * GH-172
213
+ * GH-173
214
+ * GH-174
215
+ * GH-175
216
+ * GH-176
217
+ * GH-177
218
+ * Patch by Watson.
219
+
220
+ * Added support for raising a parse exception when an XML has extra
221
+ content after the root element.
222
+ * GH-161
223
+ * Patch by NAITOH Jun.
224
+
225
+ * Added support for raising a parse exception when an XML
226
+ declaration exists in wrong position.
227
+ * GH-162
228
+ * Patch by NAITOH Jun.
229
+
230
+ * Removed needless a space after XML declaration in pretty print mode.
231
+ * GH-164
232
+ * Patch by NAITOH Jun.
233
+
234
+ * Stopped to emit `:text` event after the root element.
235
+ * GH-167
236
+ * Patch by NAITOH Jun.
237
+
238
+ ### Fixes
239
+
240
+ * Fixed a bug that SAX2 parser doesn't expand predefined entities for
241
+ `characters` callback.
242
+ * GH-168
243
+ * Patch by NAITOH Jun.
244
+
245
+ ### Thanks
246
+
247
+ * NAITOH Jun
248
+
249
+ * Watson
250
+
251
+ ## 3.3.1 - 2024-06-25 {#version-3-3-1}
252
+
253
+ ### Improvements
254
+
255
+ * Added support for detecting malformed top-level comments.
256
+ * GH-145
257
+ * Patch by Hiroya Fujinami.
258
+
259
+ * Improved `REXML::Element#attribute` performance.
260
+ * GH-146
261
+ * Patch by Hiroya Fujinami.
262
+
263
+ * Added support for detecting malformed `<!-->` comments.
264
+ * GH-147
265
+ * Patch by Hiroya Fujinami.
266
+
267
+ * Added support for detecting unclosed `DOCTYPE`.
268
+ * GH-152
269
+ * Patch by Hiroya Fujinami.
270
+
271
+ * Added `changlog_uri` metadata to gemspec.
272
+ * GH-156
273
+ * Patch by fynsta.
274
+
275
+ * Improved parse performance.
276
+ * GH-157
277
+ * GH-158
278
+ * Patch by NAITOH Jun.
279
+
280
+ ### Fixes
281
+
282
+ * Fixed a bug that large XML can't be parsed.
283
+ * GH-154
284
+ * Patch by NAITOH Jun.
285
+
286
+ * Fixed a bug that private constants are visible.
287
+ * GH-155
288
+ * Patch by NAITOH Jun.
289
+
290
+ ### Thanks
291
+
292
+ * Hiroya Fujinami
293
+
294
+ * NAITOH Jun
295
+
296
+ * fynsta
297
+
298
+ ## 3.3.0 - 2024-06-11 {#version-3-3-0}
299
+
300
+ ### Improvements
301
+
302
+ * Added support for strscan 0.7.0 installed with Ruby 2.6.
303
+ * GH-142
304
+ * Reported by Fernando Trigoso.
305
+
306
+ ### Thanks
307
+
308
+ * Fernando Trigoso
309
+
310
+ ## 3.2.9 - 2024-06-09 {#version-3-2-9}
311
+
312
+ ### Improvements
313
+
314
+ * Added support for old strscan.
315
+ * GH-132
316
+ * Reported by Adam.
317
+
318
+ * Improved attribute value parse performance.
319
+ * GH-135
320
+ * Patch by NAITOH Jun.
321
+
322
+ * Improved `REXML::Node#each_recursive` performance.
323
+ * GH-134
324
+ * GH-139
325
+ * Patch by Hiroya Fujinami.
326
+
327
+ * Improved text parse performance.
328
+ * Reported by mprogrammer.
329
+
330
+ ### Thanks
331
+
332
+ * Adam
333
+ * NAITOH Jun
334
+ * Hiroya Fujinami
335
+ * mprogrammer
336
+
337
+ ## 3.2.8 - 2024-05-16 {#version-3-2-8}
338
+
339
+ ### Fixes
340
+
341
+ * Suppressed a warning
342
+
343
+ ## 3.2.7 - 2024-05-16 {#version-3-2-7}
344
+
345
+ ### Improvements
346
+
347
+ * Improve parse performance by using `StringScanner`.
348
+
349
+ * GH-106
350
+ * GH-107
351
+ * GH-108
352
+ * GH-109
353
+ * GH-112
354
+ * GH-113
355
+ * GH-114
356
+ * GH-115
357
+ * GH-116
358
+ * GH-117
359
+ * GH-118
360
+ * GH-119
361
+ * GH-121
362
+
363
+ * Patch by NAITOH Jun.
364
+
365
+ * Improved parse performance when an attribute has many `<`s.
366
+
367
+ * GH-126
368
+
369
+ ### Fixes
370
+
371
+ * XPath: Fixed a bug of `normalize_space(array)`.
372
+
373
+ * GH-110
374
+ * GH-111
375
+
376
+ * Patch by flatisland.
377
+
378
+ * XPath: Fixed a bug that wrong position is used with nested path.
379
+
380
+ * GH-110
381
+ * GH-122
382
+
383
+ * Reported by jcavalieri.
384
+ * Patch by NAITOH Jun.
385
+
386
+ * Fixed a bug that an exception message can't be generated for
387
+ invalid encoding XML.
388
+
389
+ * GH-29
390
+ * GH-123
391
+
392
+ * Reported by DuKewu.
393
+ * Patch by NAITOH Jun.
394
+
395
+ ### Thanks
396
+
397
+ * NAITOH Jun
398
+ * flatisland
399
+ * jcavalieri
400
+ * DuKewu
401
+
3
402
  ## 3.2.6 - 2023-07-27 {#version-3-2-6}
4
403
 
5
404
  ### Improvements
@@ -148,8 +148,9 @@ module REXML
148
148
  # have been expanded to their values
149
149
  def value
150
150
  return @unnormalized if @unnormalized
151
- @unnormalized = Text::unnormalize( @normalized, doctype )
152
- @unnormalized
151
+
152
+ @unnormalized = Text::unnormalize(@normalized, doctype,
153
+ entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
153
154
  end
154
155
 
155
156
  # The normalized value of this attribute. That is, the attribute with
@@ -91,6 +91,8 @@ module REXML
91
91
  #
92
92
  def initialize( source = nil, context = {} )
93
93
  @entity_expansion_count = 0
94
+ @entity_expansion_limit = Security.entity_expansion_limit
95
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
94
96
  super()
95
97
  @context = context
96
98
  return if source.nil?
@@ -431,10 +433,12 @@ module REXML
431
433
  end
432
434
 
433
435
  attr_reader :entity_expansion_count
436
+ attr_writer :entity_expansion_limit
437
+ attr_accessor :entity_expansion_text_limit
434
438
 
435
439
  def record_entity_expansion
436
440
  @entity_expansion_count += 1
437
- if @entity_expansion_count > Security.entity_expansion_limit
441
+ if @entity_expansion_count > @entity_expansion_limit
438
442
  raise "number of entity expansions exceeded, processing aborted."
439
443
  end
440
444
  end
data/lib/rexml/element.rb CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
7
7
  require_relative "parseexception"
8
8
 
9
9
  module REXML
10
- # An implementation note about namespaces:
11
- # As we parse, when we find namespaces we put them in a hash and assign
12
- # them a unique ID. We then convert the namespace prefix for the node
13
- # to the unique ID. This makes namespace lookup much faster for the
14
- # cost of extra memory use. We save the namespace prefix for the
15
- # context node and convert it back when we write it.
16
- @@namespaces = {}
17
-
18
10
  # An \REXML::Element object represents an XML element.
19
11
  #
20
12
  # An element:
@@ -449,9 +441,14 @@ module REXML
449
441
  # Related: #root_node, #document.
450
442
  #
451
443
  def root
452
- return elements[1] if self.kind_of? Document
453
- return self if parent.kind_of? Document or parent.nil?
454
- return parent.root
444
+ target = self
445
+ while target
446
+ return target.elements[1] if target.kind_of? Document
447
+ parent = target.parent
448
+ return target if parent.kind_of? Document or parent.nil?
449
+ target = parent
450
+ end
451
+ nil
455
452
  end
456
453
 
457
454
  # :call-seq:
@@ -627,8 +624,12 @@ module REXML
627
624
  else
628
625
  prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
629
626
  end
630
- ns = attributes[ prefix ]
631
- ns = parent.namespace(prefix) if ns.nil? and parent
627
+ ns = nil
628
+ target = self
629
+ while ns.nil? and target
630
+ ns = target.attributes[prefix]
631
+ target = target.parent
632
+ end
632
633
  ns = '' if ns.nil? and prefix == 'xmlns'
633
634
  return ns
634
635
  end
@@ -1284,16 +1285,11 @@ module REXML
1284
1285
  # document.root.attribute("x", "a") # => a:x='a:x'
1285
1286
  #
1286
1287
  def attribute( name, namespace=nil )
1287
- prefix = nil
1288
- if namespaces.respond_to? :key
1289
- prefix = namespaces.key(namespace) if namespace
1290
- else
1291
- prefix = namespaces.index(namespace) if namespace
1292
- end
1288
+ prefix = namespaces.key(namespace) if namespace
1293
1289
  prefix = nil if prefix == 'xmlns'
1294
1290
 
1295
1291
  ret_val =
1296
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
1292
+ attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
1297
1293
 
1298
1294
  return ret_val unless ret_val.nil?
1299
1295
  return nil if prefix.nil?
@@ -2388,17 +2384,6 @@ module REXML
2388
2384
  elsif old_attr.kind_of? Hash
2389
2385
  old_attr[value.prefix] = value
2390
2386
  elsif old_attr.prefix != value.prefix
2391
- # Check for conflicting namespaces
2392
- if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
2393
- old_namespace = old_attr.namespace
2394
- new_namespace = value.namespace
2395
- if old_namespace == new_namespace
2396
- raise ParseException.new(
2397
- "Namespace conflict in adding attribute \"#{value.name}\": "+
2398
- "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
2399
- "prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
2400
- end
2401
- end
2402
2387
  store value.name, {old_attr.prefix => old_attr,
2403
2388
  value.prefix => value}
2404
2389
  else
data/lib/rexml/entity.rb CHANGED
@@ -12,6 +12,7 @@ module REXML
12
12
  EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
13
13
  NDATADECL = "\\s+NDATA\\s+#{NAME}"
14
14
  PEREFERENCE = "%#{NAME};"
15
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
15
16
  ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
16
17
  PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
17
18
  ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
@@ -19,7 +20,7 @@ module REXML
19
20
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
20
21
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
21
22
 
22
- attr_reader :name, :external, :ref, :ndata, :pubid
23
+ attr_reader :name, :external, :ref, :ndata, :pubid, :value
23
24
 
24
25
  # Create a new entity. Simple entities can be constructed by passing a
25
26
  # name, value to the constructor; this creates a generic, plain entity
@@ -68,14 +69,14 @@ module REXML
68
69
  end
69
70
 
70
71
  # Evaluates to the unnormalized value of this entity; that is, replacing
71
- # all entities -- both %ent; and &ent; entities. This differs from
72
- # +value()+ in that +value+ only replaces %ent; entities.
72
+ # &ent; entities.
73
73
  def unnormalized
74
- document.record_entity_expansion unless document.nil?
75
- v = value()
76
- return nil if v.nil?
77
- @unnormalized = Text::unnormalize(v, parent)
78
- @unnormalized
74
+ document&.record_entity_expansion
75
+
76
+ return nil if @value.nil?
77
+
78
+ @unnormalized = Text::unnormalize(@value, parent,
79
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
79
80
  end
80
81
 
81
82
  #once :unnormalized
@@ -121,46 +122,6 @@ module REXML
121
122
  write rv
122
123
  rv
123
124
  end
124
-
125
- PEREFERENCE_RE = /#{PEREFERENCE}/um
126
- # Returns the value of this entity. At the moment, only internal entities
127
- # are processed. If the value contains internal references (IE,
128
- # %blah;), those are replaced with their values. IE, if the doctype
129
- # contains:
130
- # <!ENTITY % foo "bar">
131
- # <!ENTITY yada "nanoo %foo; nanoo>
132
- # then:
133
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
134
- def value
135
- @resolved_value ||= resolve_value
136
- end
137
-
138
- def parent=(other)
139
- @resolved_value = nil
140
- super
141
- end
142
-
143
- private
144
- def resolve_value
145
- return nil if @value.nil?
146
- return @value unless @value.match?(PEREFERENCE_RE)
147
-
148
- matches = @value.scan(PEREFERENCE_RE)
149
- rv = @value.clone
150
- if @parent
151
- sum = 0
152
- matches.each do |entity_reference|
153
- entity_value = @parent.entity( entity_reference[0] )
154
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
155
- raise "entity expansion has grown too large"
156
- else
157
- sum += entity_value.bytesize
158
- end
159
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
160
- end
161
- end
162
- rv
163
- end
164
125
  end
165
126
 
166
127
  # This is a set of entity constants -- the ones defined in the XML
@@ -111,7 +111,7 @@ module REXML
111
111
  # itself, then we don't need a carriage return... which makes this
112
112
  # logic more complex.
113
113
  node.children.each { |child|
114
- next if child == node.children[-1] and child.instance_of?(Text)
114
+ next if child.instance_of?(Text)
115
115
  unless child == node.children[0] or child.instance_of?(Text) or
116
116
  (child == node.children[1] and !node.children[0].writethis)
117
117
  output << "\n"
@@ -262,11 +262,10 @@ module REXML
262
262
  string(string).length
263
263
  end
264
264
 
265
- # UNTESTED
266
265
  def Functions::normalize_space( string=nil )
267
266
  string = string(@@context[:node]) if string.nil?
268
267
  if string.kind_of? Array
269
- string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
268
+ string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
270
269
  else
271
270
  string.to_s.strip.gsub(/\s+/um, ' ')
272
271
  end
data/lib/rexml/node.rb CHANGED
@@ -52,10 +52,14 @@ module REXML
52
52
 
53
53
  # Visit all subnodes of +self+ recursively
54
54
  def each_recursive(&block) # :yields: node
55
- self.elements.each {|node|
56
- block.call(node)
57
- node.each_recursive(&block)
58
- }
55
+ stack = []
56
+ each { |child| stack.unshift child if child.node_type == :element }
57
+ until stack.empty?
58
+ child = stack.pop
59
+ yield child
60
+ n = stack.size
61
+ child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
62
+ end
59
63
  end
60
64
 
61
65
  # Find (and return) first subnode (recursively) for which the block
@@ -29,6 +29,7 @@ module REXML
29
29
  err << "\nLine: #{line}\n"
30
30
  err << "Position: #{position}\n"
31
31
  err << "Last 80 unconsumed characters:\n"
32
+ err.force_encoding("ASCII-8BIT")
32
33
  err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
33
34
  end
34
35