rexml 3.2.6 → 3.3.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2583ae302aa5e698f0887a689c416e5debe0533ac472a9f96fce6a8912040fd8
4
- data.tar.gz: b0ffa6301fd899969a78e060ccaeafebfc2169e3c63ff499ebc6170468866475
3
+ metadata.gz: 9d8de4465de1e9548d66ad026772932f724b9747dc8b1c62960d8efeaeaa8412
4
+ data.tar.gz: 1cb29aaa36dcef98ba8bd4e9fa249959405f67fdb6bed54d12b466fdf43f57af
5
5
  SHA512:
6
- metadata.gz: f63fb0b84ef51e790cc6310244f2106d8c47ec9a00687c58c743afda82b60be9986d503c6f56f947db06f6758707facccd03405c4d1009376e856080aa26d0e4
7
- data.tar.gz: db62bea7391837a7ab4cfc5cb5a412ed4deb8d232653ca66d93a323a5a76383eed520cd4ced5b20204f29b04e84678791cd6f807195868f5d4a5e519a73d2aaf
6
+ metadata.gz: 78c881a10f12e46e1b6710d6ec75e42e4311c233376a7587756bc098063d21f52a4d82bcac8201001bf7e39079b3db4015482dae5b4ba46e561ef75fa15b15a0
7
+ data.tar.gz: 8d7a4b94937ce7b0bdf6ed83152fe207098dfe45333498a64e50d5fe9a686dffa2f66913c1edf265470b2b6a04cfae20857f1cffa404c278249784eeb533d594
data/NEWS.md CHANGED
@@ -1,5 +1,375 @@
1
1
  # News
2
2
 
3
+ ## 3.3.9 - 2024-10-24 {#version-3-3-9}
4
+
5
+ ### Improvements
6
+
7
+ * Improved performance.
8
+ * GH-210
9
+ * Patch by NAITOH Jun.
10
+
11
+ ### Fixes
12
+
13
+ * Fixed a parse bug for text only invalid XML.
14
+ * GH-215
15
+ * Patch by NAITOH Jun.
16
+
17
+ * Fixed a parse bug that `&#0x...;` is accepted as a character
18
+ reference.
19
+
20
+ ### Thanks
21
+
22
+ * NAITOH Jun
23
+
24
+ ## 3.3.8 - 2024-09-29 {#version-3-3-8}
25
+
26
+ ### Improvements
27
+
28
+ * SAX2: Improve parse performance.
29
+ * GH-207
30
+ * Patch by NAITOH Jun.
31
+
32
+ ### Fixes
33
+
34
+ * Fixed a bug that unexpected attribute namespace conflict error for
35
+ the predefined "xml" namespace is reported.
36
+ * GH-208
37
+ * Patch by KITAITI Makoto
38
+
39
+ ### Thanks
40
+
41
+ * NAITOH Jun
42
+
43
+ * KITAITI Makoto
44
+
45
+ ## 3.3.7 - 2024-09-04 {#version-3-3-7}
46
+
47
+ ### Improvements
48
+
49
+ * Added local entity expansion limit methods
50
+ * GH-192
51
+ * GH-202
52
+ * Reported by takuya kodama.
53
+ * Patch by NAITOH Jun.
54
+
55
+ * Removed explicit strscan dependency
56
+ * GH-204
57
+ * Patch by Bo Anderson.
58
+
59
+ ### Thanks
60
+
61
+ * takuya kodama
62
+
63
+ * NAITOH Jun
64
+
65
+ * Bo Anderson
66
+
67
+ ## 3.3.6 - 2024-08-22 {#version-3-3-6}
68
+
69
+ ### Improvements
70
+
71
+ * Removed duplicated entity expansions for performance.
72
+ * GH-194
73
+ * Patch by Viktor Ivarsson.
74
+
75
+ * Improved namespace conflicted attribute check performance. It was
76
+ too slow for deep elements.
77
+ * Reported by l33thaxor.
78
+
79
+ ### Fixes
80
+
81
+ * Fixed a bug that default entity expansions are counted for
82
+ security check. Default entity expansions should not be counted
83
+ because they don't have a security risk.
84
+ * GH-198
85
+ * GH-199
86
+ * Patch Viktor Ivarsson
87
+
88
+ * Fixed a parser bug that parameter entity references in internal
89
+ subsets are expanded. It's not allowed in the XML specification.
90
+ * GH-191
91
+ * Patch by NAITOH Jun.
92
+
93
+ * Fixed a stream parser bug that user-defined entity references in
94
+ text aren't expanded.
95
+ * GH-200
96
+ * Patch by NAITOH Jun.
97
+
98
+ ### Thanks
99
+
100
+ * Viktor Ivarsson
101
+
102
+ * NAITOH Jun
103
+
104
+ * l33thaxor
105
+
106
+ ## 3.3.5 - 2024-08-12 {#version-3-3-5}
107
+
108
+ ### Fixes
109
+
110
+ * Fixed a bug that `REXML::Security.entity_expansion_text_limit`
111
+ check has wrong text size calculation in SAX and pull parsers.
112
+ * GH-193
113
+ * GH-195
114
+ * Reported by Viktor Ivarsson.
115
+ * Patch by NAITOH Jun.
116
+
117
+ ### Thanks
118
+
119
+ * Viktor Ivarsson
120
+
121
+ * NAITOH Jun
122
+
123
+ ## 3.3.4 - 2024-08-01 {#version-3-3-4}
124
+
125
+ ### Fixes
126
+
127
+ * Fixed a bug that `REXML::Security` isn't defined when
128
+ `REXML::Parsers::StreamParser` is used and
129
+ `rexml/parsers/streamparser` is only required.
130
+ * GH-189
131
+ * Patch by takuya kodama.
132
+
133
+ ### Thanks
134
+
135
+ * takuya kodama
136
+
137
+ ## 3.3.3 - 2024-08-01 {#version-3-3-3}
138
+
139
+ ### Improvements
140
+
141
+ * Added support for detecting invalid XML that has unsupported
142
+ content before root element
143
+ * GH-184
144
+ * Patch by NAITOH Jun.
145
+
146
+ * Added support for `REXML::Security.entity_expansion_limit=` and
147
+ `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
148
+ parsers
149
+ * GH-187
150
+ * Patch by NAITOH Jun.
151
+
152
+ * Added more tests for invalid XMLs.
153
+ * GH-183
154
+ * Patch by Watson.
155
+
156
+ * Added more performance tests.
157
+ * Patch by Watson.
158
+
159
+ * Improved parse performance.
160
+ * GH-186
161
+ * Patch by tomoya ishida.
162
+
163
+ ### Thanks
164
+
165
+ * NAITOH Jun
166
+
167
+ * Watson
168
+
169
+ * tomoya ishida
170
+
171
+ ## 3.3.2 - 2024-07-16 {#version-3-3-2}
172
+
173
+ ### Improvements
174
+
175
+ * Improved parse performance.
176
+ * GH-160
177
+ * Patch by NAITOH Jun.
178
+
179
+ * Improved parse performance.
180
+ * GH-169
181
+ * GH-170
182
+ * GH-171
183
+ * GH-172
184
+ * GH-173
185
+ * GH-174
186
+ * GH-175
187
+ * GH-176
188
+ * GH-177
189
+ * Patch by Watson.
190
+
191
+ * Added support for raising a parse exception when an XML has extra
192
+ content after the root element.
193
+ * GH-161
194
+ * Patch by NAITOH Jun.
195
+
196
+ * Added support for raising a parse exception when an XML
197
+ declaration exists in wrong position.
198
+ * GH-162
199
+ * Patch by NAITOH Jun.
200
+
201
+ * Removed needless a space after XML declaration in pretty print mode.
202
+ * GH-164
203
+ * Patch by NAITOH Jun.
204
+
205
+ * Stopped to emit `:text` event after the root element.
206
+ * GH-167
207
+ * Patch by NAITOH Jun.
208
+
209
+ ### Fixes
210
+
211
+ * Fixed a bug that SAX2 parser doesn't expand predefined entities for
212
+ `characters` callback.
213
+ * GH-168
214
+ * Patch by NAITOH Jun.
215
+
216
+ ### Thanks
217
+
218
+ * NAITOH Jun
219
+
220
+ * Watson
221
+
222
+ ## 3.3.1 - 2024-06-25 {#version-3-3-1}
223
+
224
+ ### Improvements
225
+
226
+ * Added support for detecting malformed top-level comments.
227
+ * GH-145
228
+ * Patch by Hiroya Fujinami.
229
+
230
+ * Improved `REXML::Element#attribute` performance.
231
+ * GH-146
232
+ * Patch by Hiroya Fujinami.
233
+
234
+ * Added support for detecting malformed `<!-->` comments.
235
+ * GH-147
236
+ * Patch by Hiroya Fujinami.
237
+
238
+ * Added support for detecting unclosed `DOCTYPE`.
239
+ * GH-152
240
+ * Patch by Hiroya Fujinami.
241
+
242
+ * Added `changlog_uri` metadata to gemspec.
243
+ * GH-156
244
+ * Patch by fynsta.
245
+
246
+ * Improved parse performance.
247
+ * GH-157
248
+ * GH-158
249
+ * Patch by NAITOH Jun.
250
+
251
+ ### Fixes
252
+
253
+ * Fixed a bug that large XML can't be parsed.
254
+ * GH-154
255
+ * Patch by NAITOH Jun.
256
+
257
+ * Fixed a bug that private constants are visible.
258
+ * GH-155
259
+ * Patch by NAITOH Jun.
260
+
261
+ ### Thanks
262
+
263
+ * Hiroya Fujinami
264
+
265
+ * NAITOH Jun
266
+
267
+ * fynsta
268
+
269
+ ## 3.3.0 - 2024-06-11 {#version-3-3-0}
270
+
271
+ ### Improvements
272
+
273
+ * Added support for strscan 0.7.0 installed with Ruby 2.6.
274
+ * GH-142
275
+ * Reported by Fernando Trigoso.
276
+
277
+ ### Thanks
278
+
279
+ * Fernando Trigoso
280
+
281
+ ## 3.2.9 - 2024-06-09 {#version-3-2-9}
282
+
283
+ ### Improvements
284
+
285
+ * Added support for old strscan.
286
+ * GH-132
287
+ * Reported by Adam.
288
+
289
+ * Improved attribute value parse performance.
290
+ * GH-135
291
+ * Patch by NAITOH Jun.
292
+
293
+ * Improved `REXML::Node#each_recursive` performance.
294
+ * GH-134
295
+ * GH-139
296
+ * Patch by Hiroya Fujinami.
297
+
298
+ * Improved text parse performance.
299
+ * Reported by mprogrammer.
300
+
301
+ ### Thanks
302
+
303
+ * Adam
304
+ * NAITOH Jun
305
+ * Hiroya Fujinami
306
+ * mprogrammer
307
+
308
+ ## 3.2.8 - 2024-05-16 {#version-3-2-8}
309
+
310
+ ### Fixes
311
+
312
+ * Suppressed a warning
313
+
314
+ ## 3.2.7 - 2024-05-16 {#version-3-2-7}
315
+
316
+ ### Improvements
317
+
318
+ * Improve parse performance by using `StringScanner`.
319
+
320
+ * GH-106
321
+ * GH-107
322
+ * GH-108
323
+ * GH-109
324
+ * GH-112
325
+ * GH-113
326
+ * GH-114
327
+ * GH-115
328
+ * GH-116
329
+ * GH-117
330
+ * GH-118
331
+ * GH-119
332
+ * GH-121
333
+
334
+ * Patch by NAITOH Jun.
335
+
336
+ * Improved parse performance when an attribute has many `<`s.
337
+
338
+ * GH-126
339
+
340
+ ### Fixes
341
+
342
+ * XPath: Fixed a bug of `normalize_space(array)`.
343
+
344
+ * GH-110
345
+ * GH-111
346
+
347
+ * Patch by flatisland.
348
+
349
+ * XPath: Fixed a bug that wrong position is used with nested path.
350
+
351
+ * GH-110
352
+ * GH-122
353
+
354
+ * Reported by jcavalieri.
355
+ * Patch by NAITOH Jun.
356
+
357
+ * Fixed a bug that an exception message can't be generated for
358
+ invalid encoding XML.
359
+
360
+ * GH-29
361
+ * GH-123
362
+
363
+ * Reported by DuKewu.
364
+ * Patch by NAITOH Jun.
365
+
366
+ ### Thanks
367
+
368
+ * NAITOH Jun
369
+ * flatisland
370
+ * jcavalieri
371
+ * DuKewu
372
+
3
373
  ## 3.2.6 - 2023-07-27 {#version-3-2-6}
4
374
 
5
375
  ### Improvements
@@ -148,8 +148,9 @@ module REXML
148
148
  # have been expanded to their values
149
149
  def value
150
150
  return @unnormalized if @unnormalized
151
- @unnormalized = Text::unnormalize( @normalized, doctype )
152
- @unnormalized
151
+
152
+ @unnormalized = Text::unnormalize(@normalized, doctype,
153
+ entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
153
154
  end
154
155
 
155
156
  # The normalized value of this attribute. That is, the attribute with
@@ -91,6 +91,8 @@ module REXML
91
91
  #
92
92
  def initialize( source = nil, context = {} )
93
93
  @entity_expansion_count = 0
94
+ @entity_expansion_limit = Security.entity_expansion_limit
95
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
94
96
  super()
95
97
  @context = context
96
98
  return if source.nil?
@@ -431,10 +433,12 @@ module REXML
431
433
  end
432
434
 
433
435
  attr_reader :entity_expansion_count
436
+ attr_writer :entity_expansion_limit
437
+ attr_accessor :entity_expansion_text_limit
434
438
 
435
439
  def record_entity_expansion
436
440
  @entity_expansion_count += 1
437
- if @entity_expansion_count > Security.entity_expansion_limit
441
+ if @entity_expansion_count > @entity_expansion_limit
438
442
  raise "number of entity expansions exceeded, processing aborted."
439
443
  end
440
444
  end
data/lib/rexml/element.rb CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
7
7
  require_relative "parseexception"
8
8
 
9
9
  module REXML
10
- # An implementation note about namespaces:
11
- # As we parse, when we find namespaces we put them in a hash and assign
12
- # them a unique ID. We then convert the namespace prefix for the node
13
- # to the unique ID. This makes namespace lookup much faster for the
14
- # cost of extra memory use. We save the namespace prefix for the
15
- # context node and convert it back when we write it.
16
- @@namespaces = {}
17
-
18
10
  # An \REXML::Element object represents an XML element.
19
11
  #
20
12
  # An element:
@@ -449,9 +441,14 @@ module REXML
449
441
  # Related: #root_node, #document.
450
442
  #
451
443
  def root
452
- return elements[1] if self.kind_of? Document
453
- return self if parent.kind_of? Document or parent.nil?
454
- return parent.root
444
+ target = self
445
+ while target
446
+ return target.elements[1] if target.kind_of? Document
447
+ parent = target.parent
448
+ return target if parent.kind_of? Document or parent.nil?
449
+ target = parent
450
+ end
451
+ nil
455
452
  end
456
453
 
457
454
  # :call-seq:
@@ -627,8 +624,12 @@ module REXML
627
624
  else
628
625
  prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
629
626
  end
630
- ns = attributes[ prefix ]
631
- ns = parent.namespace(prefix) if ns.nil? and parent
627
+ ns = nil
628
+ target = self
629
+ while ns.nil? and target
630
+ ns = target.attributes[prefix]
631
+ target = target.parent
632
+ end
632
633
  ns = '' if ns.nil? and prefix == 'xmlns'
633
634
  return ns
634
635
  end
@@ -1284,16 +1285,11 @@ module REXML
1284
1285
  # document.root.attribute("x", "a") # => a:x='a:x'
1285
1286
  #
1286
1287
  def attribute( name, namespace=nil )
1287
- prefix = nil
1288
- if namespaces.respond_to? :key
1289
- prefix = namespaces.key(namespace) if namespace
1290
- else
1291
- prefix = namespaces.index(namespace) if namespace
1292
- end
1288
+ prefix = namespaces.key(namespace) if namespace
1293
1289
  prefix = nil if prefix == 'xmlns'
1294
1290
 
1295
1291
  ret_val =
1296
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
1292
+ attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
1297
1293
 
1298
1294
  return ret_val unless ret_val.nil?
1299
1295
  return nil if prefix.nil?
@@ -2388,17 +2384,6 @@ module REXML
2388
2384
  elsif old_attr.kind_of? Hash
2389
2385
  old_attr[value.prefix] = value
2390
2386
  elsif old_attr.prefix != value.prefix
2391
- # Check for conflicting namespaces
2392
- if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
2393
- old_namespace = old_attr.namespace
2394
- new_namespace = value.namespace
2395
- if old_namespace == new_namespace
2396
- raise ParseException.new(
2397
- "Namespace conflict in adding attribute \"#{value.name}\": "+
2398
- "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
2399
- "prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
2400
- end
2401
- end
2402
2387
  store value.name, {old_attr.prefix => old_attr,
2403
2388
  value.prefix => value}
2404
2389
  else
data/lib/rexml/entity.rb CHANGED
@@ -12,6 +12,7 @@ module REXML
12
12
  EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
13
13
  NDATADECL = "\\s+NDATA\\s+#{NAME}"
14
14
  PEREFERENCE = "%#{NAME};"
15
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
15
16
  ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
16
17
  PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
17
18
  ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
@@ -19,7 +20,7 @@ module REXML
19
20
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
20
21
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
21
22
 
22
- attr_reader :name, :external, :ref, :ndata, :pubid
23
+ attr_reader :name, :external, :ref, :ndata, :pubid, :value
23
24
 
24
25
  # Create a new entity. Simple entities can be constructed by passing a
25
26
  # name, value to the constructor; this creates a generic, plain entity
@@ -68,14 +69,14 @@ module REXML
68
69
  end
69
70
 
70
71
  # Evaluates to the unnormalized value of this entity; that is, replacing
71
- # all entities -- both %ent; and &ent; entities. This differs from
72
- # +value()+ in that +value+ only replaces %ent; entities.
72
+ # &ent; entities.
73
73
  def unnormalized
74
- document.record_entity_expansion unless document.nil?
75
- v = value()
76
- return nil if v.nil?
77
- @unnormalized = Text::unnormalize(v, parent)
78
- @unnormalized
74
+ document&.record_entity_expansion
75
+
76
+ return nil if @value.nil?
77
+
78
+ @unnormalized = Text::unnormalize(@value, parent,
79
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
79
80
  end
80
81
 
81
82
  #once :unnormalized
@@ -121,46 +122,6 @@ module REXML
121
122
  write rv
122
123
  rv
123
124
  end
124
-
125
- PEREFERENCE_RE = /#{PEREFERENCE}/um
126
- # Returns the value of this entity. At the moment, only internal entities
127
- # are processed. If the value contains internal references (IE,
128
- # %blah;), those are replaced with their values. IE, if the doctype
129
- # contains:
130
- # <!ENTITY % foo "bar">
131
- # <!ENTITY yada "nanoo %foo; nanoo>
132
- # then:
133
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
134
- def value
135
- @resolved_value ||= resolve_value
136
- end
137
-
138
- def parent=(other)
139
- @resolved_value = nil
140
- super
141
- end
142
-
143
- private
144
- def resolve_value
145
- return nil if @value.nil?
146
- return @value unless @value.match?(PEREFERENCE_RE)
147
-
148
- matches = @value.scan(PEREFERENCE_RE)
149
- rv = @value.clone
150
- if @parent
151
- sum = 0
152
- matches.each do |entity_reference|
153
- entity_value = @parent.entity( entity_reference[0] )
154
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
155
- raise "entity expansion has grown too large"
156
- else
157
- sum += entity_value.bytesize
158
- end
159
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
160
- end
161
- end
162
- rv
163
- end
164
125
  end
165
126
 
166
127
  # This is a set of entity constants -- the ones defined in the XML
@@ -111,7 +111,7 @@ module REXML
111
111
  # itself, then we don't need a carriage return... which makes this
112
112
  # logic more complex.
113
113
  node.children.each { |child|
114
- next if child == node.children[-1] and child.instance_of?(Text)
114
+ next if child.instance_of?(Text)
115
115
  unless child == node.children[0] or child.instance_of?(Text) or
116
116
  (child == node.children[1] and !node.children[0].writethis)
117
117
  output << "\n"
@@ -262,11 +262,10 @@ module REXML
262
262
  string(string).length
263
263
  end
264
264
 
265
- # UNTESTED
266
265
  def Functions::normalize_space( string=nil )
267
266
  string = string(@@context[:node]) if string.nil?
268
267
  if string.kind_of? Array
269
- string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
268
+ string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
270
269
  else
271
270
  string.to_s.strip.gsub(/\s+/um, ' ')
272
271
  end
data/lib/rexml/node.rb CHANGED
@@ -52,10 +52,14 @@ module REXML
52
52
 
53
53
  # Visit all subnodes of +self+ recursively
54
54
  def each_recursive(&block) # :yields: node
55
- self.elements.each {|node|
56
- block.call(node)
57
- node.each_recursive(&block)
58
- }
55
+ stack = []
56
+ each { |child| stack.unshift child if child.node_type == :element }
57
+ until stack.empty?
58
+ child = stack.pop
59
+ yield child
60
+ n = stack.size
61
+ child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
62
+ end
59
63
  end
60
64
 
61
65
  # Find (and return) first subnode (recursively) for which the block
@@ -29,6 +29,7 @@ module REXML
29
29
  err << "\nLine: #{line}\n"
30
30
  err << "Position: #{position}\n"
31
31
  err << "Last 80 unconsumed characters:\n"
32
+ err.force_encoding("ASCII-8BIT")
32
33
  err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
33
34
  end
34
35