rexml 3.3.6 → 3.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +117 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/entity.rb +5 -2
- data/lib/rexml/parsers/baseparser.rb +87 -52
- data/lib/rexml/parsers/pullparser.rb +12 -0
- data/lib/rexml/parsers/sax2parser.rb +10 -0
- data/lib/rexml/parsers/streamparser.rb +8 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +61 -6
- data/lib/rexml/text.rb +20 -43
- metadata +5 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
|
4
|
+
data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
|
7
|
+
data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
|
data/NEWS.md
CHANGED
@@ -1,5 +1,122 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.4.1 - 2025-02-16 {#version-3-4-1}
|
4
|
+
|
5
|
+
### Improvement
|
6
|
+
|
7
|
+
* Improved performance.
|
8
|
+
* GH-226
|
9
|
+
* GH-227
|
10
|
+
* GH-237
|
11
|
+
* Patch by NAITOH Jun
|
12
|
+
|
13
|
+
### Fixes
|
14
|
+
|
15
|
+
* Fix serialization of ATTLIST is incorrect
|
16
|
+
* GH-233
|
17
|
+
* GH-234
|
18
|
+
* Patch by OlofKalufs
|
19
|
+
* Reported by OlofKalufs
|
20
|
+
|
21
|
+
### Thanks
|
22
|
+
|
23
|
+
* NAITOH Jun
|
24
|
+
|
25
|
+
* OlofKalufs
|
26
|
+
|
27
|
+
## 3.4.0 - 2024-12-15 {#version-3-4-0}
|
28
|
+
|
29
|
+
### Improvement
|
30
|
+
|
31
|
+
* Improved performance.
|
32
|
+
* GH-216
|
33
|
+
* Patch by NAITOH Jun
|
34
|
+
|
35
|
+
* JRuby: Improved parse performance.
|
36
|
+
* GH-219
|
37
|
+
* Patch by João Duarte
|
38
|
+
|
39
|
+
* Added support for reusing pull parser.
|
40
|
+
* GH-214
|
41
|
+
* GH-220
|
42
|
+
* Patch by Dmitry Pogrebnoy
|
43
|
+
|
44
|
+
* Improved error handling when source is `IO`.
|
45
|
+
* GH-221
|
46
|
+
* Patch by NAITOH Jun
|
47
|
+
|
48
|
+
### Thanks
|
49
|
+
|
50
|
+
* NAITOH Jun
|
51
|
+
|
52
|
+
* João Duarte
|
53
|
+
|
54
|
+
* Dmitry Pogrebnoy
|
55
|
+
|
56
|
+
## 3.3.9 - 2024-10-24 {#version-3-3-9}
|
57
|
+
|
58
|
+
### Improvements
|
59
|
+
|
60
|
+
* Improved performance.
|
61
|
+
* GH-210
|
62
|
+
* Patch by NAITOH Jun.
|
63
|
+
|
64
|
+
### Fixes
|
65
|
+
|
66
|
+
* Fixed a parse bug for text only invalid XML.
|
67
|
+
* GH-215
|
68
|
+
* Patch by NAITOH Jun.
|
69
|
+
|
70
|
+
* Fixed a parse bug that `�x...;` is accepted as a character
|
71
|
+
reference.
|
72
|
+
|
73
|
+
### Thanks
|
74
|
+
|
75
|
+
* NAITOH Jun
|
76
|
+
|
77
|
+
## 3.3.8 - 2024-09-29 {#version-3-3-8}
|
78
|
+
|
79
|
+
### Improvements
|
80
|
+
|
81
|
+
* SAX2: Improve parse performance.
|
82
|
+
* GH-207
|
83
|
+
* Patch by NAITOH Jun.
|
84
|
+
|
85
|
+
### Fixes
|
86
|
+
|
87
|
+
* Fixed a bug that unexpected attribute namespace conflict error for
|
88
|
+
the predefined "xml" namespace is reported.
|
89
|
+
* GH-208
|
90
|
+
* Patch by KITAITI Makoto
|
91
|
+
|
92
|
+
### Thanks
|
93
|
+
|
94
|
+
* NAITOH Jun
|
95
|
+
|
96
|
+
* KITAITI Makoto
|
97
|
+
|
98
|
+
## 3.3.7 - 2024-09-04 {#version-3-3-7}
|
99
|
+
|
100
|
+
### Improvements
|
101
|
+
|
102
|
+
* Added local entity expansion limit methods
|
103
|
+
* GH-192
|
104
|
+
* GH-202
|
105
|
+
* Reported by takuya kodama.
|
106
|
+
* Patch by NAITOH Jun.
|
107
|
+
|
108
|
+
* Removed explicit strscan dependency
|
109
|
+
* GH-204
|
110
|
+
* Patch by Bo Anderson.
|
111
|
+
|
112
|
+
### Thanks
|
113
|
+
|
114
|
+
* takuya kodama
|
115
|
+
|
116
|
+
* NAITOH Jun
|
117
|
+
|
118
|
+
* Bo Anderson
|
119
|
+
|
3
120
|
## 3.3.6 - 2024-08-22 {#version-3-3-6}
|
4
121
|
|
5
122
|
### Improvements
|
data/lib/rexml/attribute.rb
CHANGED
@@ -148,8 +148,9 @@ module REXML
|
|
148
148
|
# have been expanded to their values
|
149
149
|
def value
|
150
150
|
return @unnormalized if @unnormalized
|
151
|
-
|
152
|
-
@unnormalized
|
151
|
+
|
152
|
+
@unnormalized = Text::unnormalize(@normalized, doctype,
|
153
|
+
entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
|
153
154
|
end
|
154
155
|
|
155
156
|
# The normalized value of this attribute. That is, the attribute with
|
data/lib/rexml/document.rb
CHANGED
@@ -91,6 +91,8 @@ module REXML
|
|
91
91
|
#
|
92
92
|
def initialize( source = nil, context = {} )
|
93
93
|
@entity_expansion_count = 0
|
94
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
95
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
94
96
|
super()
|
95
97
|
@context = context
|
96
98
|
return if source.nil?
|
@@ -431,10 +433,12 @@ module REXML
|
|
431
433
|
end
|
432
434
|
|
433
435
|
attr_reader :entity_expansion_count
|
436
|
+
attr_writer :entity_expansion_limit
|
437
|
+
attr_accessor :entity_expansion_text_limit
|
434
438
|
|
435
439
|
def record_entity_expansion
|
436
440
|
@entity_expansion_count += 1
|
437
|
-
if @entity_expansion_count >
|
441
|
+
if @entity_expansion_count > @entity_expansion_limit
|
438
442
|
raise "number of entity expansions exceeded, processing aborted."
|
439
443
|
end
|
440
444
|
end
|
data/lib/rexml/entity.rb
CHANGED
@@ -71,9 +71,12 @@ module REXML
|
|
71
71
|
# Evaluates to the unnormalized value of this entity; that is, replacing
|
72
72
|
# &ent; entities.
|
73
73
|
def unnormalized
|
74
|
-
document
|
74
|
+
document&.record_entity_expansion
|
75
|
+
|
75
76
|
return nil if @value.nil?
|
76
|
-
|
77
|
+
|
78
|
+
@unnormalized = Text::unnormalize(@value, parent,
|
79
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
77
80
|
end
|
78
81
|
|
79
82
|
#once :unnormalized
|
@@ -150,12 +150,13 @@ module REXML
|
|
150
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
151
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
152
152
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
153
|
-
CHARACTER_REFERENCES = /&#
|
153
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
154
154
|
DEFAULT_ENTITIES_PATTERNS = {}
|
155
155
|
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
156
156
|
default_entities.each do |term|
|
157
157
|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
158
158
|
end
|
159
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
159
160
|
end
|
160
161
|
private_constant :Private
|
161
162
|
|
@@ -164,6 +165,9 @@ module REXML
|
|
164
165
|
@listeners = []
|
165
166
|
@prefixes = Set.new
|
166
167
|
@entity_expansion_count = 0
|
168
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
169
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
170
|
+
@source.ensure_buffer
|
167
171
|
end
|
168
172
|
|
169
173
|
def add_listener( listener )
|
@@ -172,16 +176,22 @@ module REXML
|
|
172
176
|
|
173
177
|
attr_reader :source
|
174
178
|
attr_reader :entity_expansion_count
|
179
|
+
attr_writer :entity_expansion_limit
|
180
|
+
attr_writer :entity_expansion_text_limit
|
175
181
|
|
176
182
|
def stream=( source )
|
177
183
|
@source = SourceFactory.create_from( source )
|
184
|
+
reset
|
185
|
+
end
|
186
|
+
|
187
|
+
def reset
|
178
188
|
@closed = nil
|
179
189
|
@have_root = false
|
180
190
|
@document_status = nil
|
181
191
|
@tags = []
|
182
192
|
@stack = []
|
183
193
|
@entities = []
|
184
|
-
@namespaces = {}
|
194
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
185
195
|
@namespaces_restore_stack = []
|
186
196
|
end
|
187
197
|
|
@@ -263,10 +273,10 @@ module REXML
|
|
263
273
|
@source.ensure_buffer
|
264
274
|
if @document_status == nil
|
265
275
|
start_position = @source.position
|
266
|
-
if @source.match("<?", true)
|
276
|
+
if @source.match?("<?", true)
|
267
277
|
return process_instruction
|
268
|
-
elsif @source.match("<!", true)
|
269
|
-
if @source.match("--", true)
|
278
|
+
elsif @source.match?("<!", true)
|
279
|
+
if @source.match?("--", true)
|
270
280
|
md = @source.match(/(.*?)-->/um, true)
|
271
281
|
if md.nil?
|
272
282
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
@@ -275,10 +285,10 @@ module REXML
|
|
275
285
|
raise REXML::ParseException.new("Malformed comment", @source)
|
276
286
|
end
|
277
287
|
return [ :comment, md[1] ]
|
278
|
-
elsif @source.match("DOCTYPE", true)
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
279
289
|
base_error_message = "Malformed DOCTYPE"
|
280
|
-
unless @source.match(/\s+/um, true)
|
281
|
-
if @source.match(">")
|
290
|
+
unless @source.match?(/\s+/um, true)
|
291
|
+
if @source.match?(">")
|
282
292
|
message = "#{base_error_message}: name is missing"
|
283
293
|
else
|
284
294
|
message = "#{base_error_message}: invalid name"
|
@@ -287,10 +297,11 @@ module REXML
|
|
287
297
|
raise REXML::ParseException.new(message, @source)
|
288
298
|
end
|
289
299
|
name = parse_name(base_error_message)
|
290
|
-
|
300
|
+
@source.match?(/\s*/um, true) # skip spaces
|
301
|
+
if @source.match?("[", true)
|
291
302
|
id = [nil, nil, nil]
|
292
303
|
@document_status = :in_doctype
|
293
|
-
elsif @source.match(
|
304
|
+
elsif @source.match?(">", true)
|
294
305
|
id = [nil, nil, nil]
|
295
306
|
@document_status = :after_doctype
|
296
307
|
@source.ensure_buffer
|
@@ -302,9 +313,10 @@ module REXML
|
|
302
313
|
# For backward compatibility
|
303
314
|
id[1], id[2] = id[2], nil
|
304
315
|
end
|
305
|
-
|
316
|
+
@source.match?(/\s*/um, true) # skip spaces
|
317
|
+
if @source.match?("[", true)
|
306
318
|
@document_status = :in_doctype
|
307
|
-
elsif @source.match(
|
319
|
+
elsif @source.match?(">", true)
|
308
320
|
@document_status = :after_doctype
|
309
321
|
@source.ensure_buffer
|
310
322
|
else
|
@@ -314,7 +326,7 @@ module REXML
|
|
314
326
|
end
|
315
327
|
args = [:start_doctype, name, *id]
|
316
328
|
if @document_status == :after_doctype
|
317
|
-
@source.match(/\s*/um, true)
|
329
|
+
@source.match?(/\s*/um, true)
|
318
330
|
@stack << [ :end_doctype ]
|
319
331
|
end
|
320
332
|
return args
|
@@ -325,14 +337,14 @@ module REXML
|
|
325
337
|
end
|
326
338
|
end
|
327
339
|
if @document_status == :in_doctype
|
328
|
-
@source.match(/\s*/um, true) # skip spaces
|
340
|
+
@source.match?(/\s*/um, true) # skip spaces
|
329
341
|
start_position = @source.position
|
330
|
-
if @source.match("<!", true)
|
331
|
-
if @source.match("ELEMENT", true)
|
342
|
+
if @source.match?("<!", true)
|
343
|
+
if @source.match?("ELEMENT", true)
|
332
344
|
md = @source.match(/(.*?)>/um, true)
|
333
345
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
334
346
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
335
|
-
elsif @source.match("ENTITY", true)
|
347
|
+
elsif @source.match?("ENTITY", true)
|
336
348
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
337
349
|
unless match_data
|
338
350
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
@@ -364,11 +376,11 @@ module REXML
|
|
364
376
|
end
|
365
377
|
match << '%' if ref
|
366
378
|
return match
|
367
|
-
elsif @source.match("ATTLIST", true)
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
368
380
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
369
381
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
370
382
|
element = md[1]
|
371
|
-
contents = md[0]
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
372
384
|
|
373
385
|
pairs = {}
|
374
386
|
values = md[0].strip.scan( ATTDEF_RE )
|
@@ -384,10 +396,10 @@ module REXML
|
|
384
396
|
end
|
385
397
|
end
|
386
398
|
return [ :attlistdecl, element, pairs, contents ]
|
387
|
-
elsif @source.match("NOTATION", true)
|
399
|
+
elsif @source.match?("NOTATION", true)
|
388
400
|
base_error_message = "Malformed notation declaration"
|
389
|
-
unless @source.match(/\s+/um, true)
|
390
|
-
if @source.match(">")
|
401
|
+
unless @source.match?(/\s+/um, true)
|
402
|
+
if @source.match?(">")
|
391
403
|
message = "#{base_error_message}: name is missing"
|
392
404
|
else
|
393
405
|
message = "#{base_error_message}: invalid name"
|
@@ -399,7 +411,8 @@ module REXML
|
|
399
411
|
id = parse_id(base_error_message,
|
400
412
|
accept_external_id: true,
|
401
413
|
accept_public_id: true)
|
402
|
-
|
414
|
+
@source.match?(/\s*/um, true) # skip spaces
|
415
|
+
unless @source.match?(">", true)
|
403
416
|
message = "#{base_error_message}: garbage before end >"
|
404
417
|
raise REXML::ParseException.new(message, @source)
|
405
418
|
end
|
@@ -413,7 +426,7 @@ module REXML
|
|
413
426
|
end
|
414
427
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
415
428
|
return [ :externalentity, match[1] ]
|
416
|
-
elsif @source.match(/\]\s*>/um, true)
|
429
|
+
elsif @source.match?(/\]\s*>/um, true)
|
417
430
|
@document_status = :after_doctype
|
418
431
|
return [ :end_doctype ]
|
419
432
|
end
|
@@ -422,16 +435,16 @@ module REXML
|
|
422
435
|
end
|
423
436
|
end
|
424
437
|
if @document_status == :after_doctype
|
425
|
-
@source.match(/\s*/um, true)
|
438
|
+
@source.match?(/\s*/um, true)
|
426
439
|
end
|
427
440
|
begin
|
428
441
|
start_position = @source.position
|
429
|
-
if @source.match("<", true)
|
442
|
+
if @source.match?("<", true)
|
430
443
|
# :text's read_until may remain only "<" in buffer. In the
|
431
444
|
# case, buffer is empty here. So we need to fill buffer
|
432
445
|
# here explicitly.
|
433
446
|
@source.ensure_buffer
|
434
|
-
if @source.match("/", true)
|
447
|
+
if @source.match?("/", true)
|
435
448
|
@namespaces_restore_stack.pop
|
436
449
|
last_tag = @tags.pop
|
437
450
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
@@ -446,7 +459,7 @@ module REXML
|
|
446
459
|
raise REXML::ParseException.new(message, @source)
|
447
460
|
end
|
448
461
|
return [ :end_element, last_tag ]
|
449
|
-
elsif @source.match("!", true)
|
462
|
+
elsif @source.match?("!", true)
|
450
463
|
md = @source.match(/([^>]*>)/um)
|
451
464
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
452
465
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
@@ -464,7 +477,7 @@ module REXML
|
|
464
477
|
end
|
465
478
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
466
479
|
"in the doctype declaration.", @source)
|
467
|
-
elsif @source.match("?", true)
|
480
|
+
elsif @source.match?("?", true)
|
468
481
|
return process_instruction
|
469
482
|
else
|
470
483
|
# Get the next tag
|
@@ -564,8 +577,12 @@ module REXML
|
|
564
577
|
return rv if matches.size == 0
|
565
578
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
566
579
|
m=$1
|
567
|
-
|
568
|
-
|
580
|
+
if m.start_with?("x")
|
581
|
+
code_point = Integer(m[1..-1], 16)
|
582
|
+
else
|
583
|
+
code_point = Integer(m, 10)
|
584
|
+
end
|
585
|
+
[code_point].pack('U*')
|
569
586
|
}
|
570
587
|
matches.collect!{|x|x[0]}.compact!
|
571
588
|
if filter
|
@@ -585,7 +602,7 @@ module REXML
|
|
585
602
|
end
|
586
603
|
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
587
604
|
rv.gsub!( re, entity_value )
|
588
|
-
if rv.bytesize >
|
605
|
+
if rv.bytesize > @entity_expansion_text_limit
|
589
606
|
raise "entity expansion has grown too large"
|
590
607
|
end
|
591
608
|
else
|
@@ -627,7 +644,7 @@ module REXML
|
|
627
644
|
|
628
645
|
def record_entity_expansion(delta=1)
|
629
646
|
@entity_expansion_count += delta
|
630
|
-
if @entity_expansion_count >
|
647
|
+
if @entity_expansion_count > @entity_expansion_limit
|
631
648
|
raise "number of entity expansions exceeded, processing aborted."
|
632
649
|
end
|
633
650
|
end
|
@@ -641,7 +658,7 @@ module REXML
|
|
641
658
|
def parse_name(base_error_message)
|
642
659
|
md = @source.match(Private::NAME_PATTERN, true)
|
643
660
|
unless md
|
644
|
-
if @source.match(/\S/um)
|
661
|
+
if @source.match?(/\S/um)
|
645
662
|
message = "#{base_error_message}: invalid name"
|
646
663
|
else
|
647
664
|
message = "#{base_error_message}: name is missing"
|
@@ -683,34 +700,34 @@ module REXML
|
|
683
700
|
accept_public_id:)
|
684
701
|
public = /\A\s*PUBLIC/um
|
685
702
|
system = /\A\s*SYSTEM/um
|
686
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
687
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
703
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
704
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
688
705
|
return "public ID literal is missing"
|
689
706
|
end
|
690
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
707
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
691
708
|
return "invalid public ID literal"
|
692
709
|
end
|
693
710
|
if accept_public_id
|
694
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
711
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
695
712
|
return "system ID literal is missing"
|
696
713
|
end
|
697
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
714
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
698
715
|
return "invalid system literal"
|
699
716
|
end
|
700
717
|
"garbage after system literal"
|
701
718
|
else
|
702
719
|
"garbage after public ID literal"
|
703
720
|
end
|
704
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
705
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
721
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
722
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
706
723
|
return "system literal is missing"
|
707
724
|
end
|
708
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
725
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
709
726
|
return "invalid system literal"
|
710
727
|
end
|
711
728
|
"garbage after system literal"
|
712
729
|
else
|
713
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
730
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
714
731
|
return "invalid ID type"
|
715
732
|
end
|
716
733
|
"ID type is missing"
|
@@ -719,7 +736,7 @@ module REXML
|
|
719
736
|
|
720
737
|
def process_instruction
|
721
738
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
722
|
-
if @source.match(/\s+/um, true)
|
739
|
+
if @source.match?(/\s+/um, true)
|
723
740
|
match_data = @source.match(/(.*?)\?>/um, true)
|
724
741
|
unless match_data
|
725
742
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
@@ -727,7 +744,7 @@ module REXML
|
|
727
744
|
content = match_data[1]
|
728
745
|
else
|
729
746
|
content = nil
|
730
|
-
unless @source.match("?>", true)
|
747
|
+
unless @source.match?("?>", true)
|
731
748
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
732
749
|
end
|
733
750
|
end
|
@@ -752,14 +769,33 @@ module REXML
|
|
752
769
|
[:processing_instruction, name, content]
|
753
770
|
end
|
754
771
|
|
772
|
+
if StringScanner::Version < "3.1.1"
|
773
|
+
def scan_quote
|
774
|
+
@source.match(/(['"])/, true)&.[](1)
|
775
|
+
end
|
776
|
+
else
|
777
|
+
def scan_quote
|
778
|
+
case @source.peek_byte
|
779
|
+
when 34 # '"'.ord
|
780
|
+
@source.scan_byte
|
781
|
+
'"'
|
782
|
+
when 39 # "'".ord
|
783
|
+
@source.scan_byte
|
784
|
+
"'"
|
785
|
+
else
|
786
|
+
nil
|
787
|
+
end
|
788
|
+
end
|
789
|
+
end
|
790
|
+
|
755
791
|
def parse_attributes(prefixes)
|
756
792
|
attributes = {}
|
757
793
|
expanded_names = {}
|
758
794
|
closed = false
|
759
795
|
while true
|
760
|
-
if @source.match(">", true)
|
796
|
+
if @source.match?(">", true)
|
761
797
|
return attributes, closed
|
762
|
-
elsif @source.match("/>", true)
|
798
|
+
elsif @source.match?("/>", true)
|
763
799
|
closed = true
|
764
800
|
return attributes, closed
|
765
801
|
elsif match = @source.match(QNAME, true)
|
@@ -767,15 +803,14 @@ module REXML
|
|
767
803
|
prefix = match[2]
|
768
804
|
local_part = match[3]
|
769
805
|
|
770
|
-
unless @source.match(/\s*=\s*/um, true)
|
806
|
+
unless @source.match?(/\s*=\s*/um, true)
|
771
807
|
message = "Missing attribute equal: <#{name}>"
|
772
808
|
raise REXML::ParseException.new(message, @source)
|
773
809
|
end
|
774
|
-
unless
|
810
|
+
unless quote = scan_quote
|
775
811
|
message = "Missing attribute value start quote: <#{name}>"
|
776
812
|
raise REXML::ParseException.new(message, @source)
|
777
813
|
end
|
778
|
-
quote = match[1]
|
779
814
|
start_position = @source.position
|
780
815
|
value = @source.read_until(quote)
|
781
816
|
unless value.chomp!(quote)
|
@@ -783,10 +818,10 @@ module REXML
|
|
783
818
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
784
819
|
raise REXML::ParseException.new(message, @source)
|
785
820
|
end
|
786
|
-
@source.match(/\s*/um, true)
|
821
|
+
@source.match?(/\s*/um, true)
|
787
822
|
if prefix == "xmlns"
|
788
823
|
if local_part == "xml"
|
789
|
-
if value !=
|
824
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
790
825
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
791
826
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
792
827
|
raise REXML::ParseException.new( msg, @source, self )
|
@@ -51,6 +51,14 @@ module REXML
|
|
51
51
|
@parser.entity_expansion_count
|
52
52
|
end
|
53
53
|
|
54
|
+
def entity_expansion_limit=( limit )
|
55
|
+
@parser.entity_expansion_limit = limit
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_expansion_text_limit=( limit )
|
59
|
+
@parser.entity_expansion_text_limit = limit
|
60
|
+
end
|
61
|
+
|
54
62
|
def each
|
55
63
|
while has_next?
|
56
64
|
yield self.pull
|
@@ -85,6 +93,10 @@ module REXML
|
|
85
93
|
def unshift token
|
86
94
|
@my_stack.unshift token
|
87
95
|
end
|
96
|
+
|
97
|
+
def reset
|
98
|
+
@parser.reset
|
99
|
+
end
|
88
100
|
end
|
89
101
|
|
90
102
|
# A parsing event. The contents of the event are accessed as an +Array?,
|
@@ -26,6 +26,14 @@ module REXML
|
|
26
26
|
@parser.entity_expansion_count
|
27
27
|
end
|
28
28
|
|
29
|
+
def entity_expansion_limit=( limit )
|
30
|
+
@parser.entity_expansion_limit = limit
|
31
|
+
end
|
32
|
+
|
33
|
+
def entity_expansion_text_limit=( limit )
|
34
|
+
@parser.entity_expansion_text_limit = limit
|
35
|
+
end
|
36
|
+
|
29
37
|
def add_listener( listener )
|
30
38
|
@parser.add_listener( listener )
|
31
39
|
end
|
@@ -251,6 +259,8 @@ module REXML
|
|
251
259
|
end
|
252
260
|
|
253
261
|
def get_namespace( prefix )
|
262
|
+
return nil if @namespace_stack.empty?
|
263
|
+
|
254
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
255
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
256
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
@@ -18,6 +18,14 @@ module REXML
|
|
18
18
|
@parser.entity_expansion_count
|
19
19
|
end
|
20
20
|
|
21
|
+
def entity_expansion_limit=( limit )
|
22
|
+
@parser.entity_expansion_limit = limit
|
23
|
+
end
|
24
|
+
|
25
|
+
def entity_expansion_text_limit=( limit )
|
26
|
+
@parser.entity_expansion_text_limit = limit
|
27
|
+
end
|
28
|
+
|
21
29
|
def parse
|
22
30
|
# entity string
|
23
31
|
while true
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
3
|
|
4
|
+
require "stringio"
|
4
5
|
require "strscan"
|
5
6
|
|
6
7
|
require_relative 'encoding'
|
@@ -18,6 +19,16 @@ module REXML
|
|
18
19
|
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
20
|
super(pattern)
|
20
21
|
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
21
32
|
end
|
22
33
|
end
|
23
34
|
using StringScannerCheckScanString
|
@@ -35,7 +46,6 @@ module REXML
|
|
35
46
|
arg.respond_to? :eof?
|
36
47
|
IOSource.new(arg)
|
37
48
|
elsif arg.respond_to? :to_str
|
38
|
-
require 'stringio'
|
39
49
|
IOSource.new(StringIO.new(arg))
|
40
50
|
elsif arg.kind_of? Source
|
41
51
|
arg
|
@@ -58,8 +68,14 @@ module REXML
|
|
58
68
|
SCANNER_RESET_SIZE = 100000
|
59
69
|
PRE_DEFINED_TERM_PATTERNS = {}
|
60
70
|
pre_defined_terms = ["'", '"', "<"]
|
61
|
-
|
62
|
-
|
71
|
+
if StringScanner::Version < "3.1.1"
|
72
|
+
pre_defined_terms.each do |term|
|
73
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
74
|
+
end
|
75
|
+
else
|
76
|
+
pre_defined_terms.each do |term|
|
77
|
+
PRE_DEFINED_TERM_PATTERNS[term] = term
|
78
|
+
end
|
63
79
|
end
|
64
80
|
end
|
65
81
|
private_constant :Private
|
@@ -77,6 +93,7 @@ module REXML
|
|
77
93
|
detect_encoding
|
78
94
|
end
|
79
95
|
@line = 0
|
96
|
+
@encoded_terms = {}
|
80
97
|
end
|
81
98
|
|
82
99
|
# The current buffer (what we're going to read next)
|
@@ -125,6 +142,14 @@ module REXML
|
|
125
142
|
end
|
126
143
|
end
|
127
144
|
|
145
|
+
def match?(pattern, cons=false)
|
146
|
+
if cons
|
147
|
+
!@scanner.skip(pattern).nil?
|
148
|
+
else
|
149
|
+
!@scanner.match?(pattern).nil?
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
128
153
|
def position
|
129
154
|
@scanner.pos
|
130
155
|
end
|
@@ -133,6 +158,14 @@ module REXML
|
|
133
158
|
@scanner.pos = pos
|
134
159
|
end
|
135
160
|
|
161
|
+
def peek_byte
|
162
|
+
@scanner.peek_byte
|
163
|
+
end
|
164
|
+
|
165
|
+
def scan_byte
|
166
|
+
@scanner.scan_byte
|
167
|
+
end
|
168
|
+
|
136
169
|
# @return true if the Source is exhausted
|
137
170
|
def empty?
|
138
171
|
@scanner.eos?
|
@@ -227,7 +260,7 @@ module REXML
|
|
227
260
|
|
228
261
|
def read_until(term)
|
229
262
|
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
230
|
-
term = encode(term)
|
263
|
+
term = @encoded_terms[term] ||= encode(term)
|
231
264
|
until str = @scanner.scan_until(pattern)
|
232
265
|
break if @source.nil?
|
233
266
|
break if @source.eof?
|
@@ -266,6 +299,23 @@ module REXML
|
|
266
299
|
md.nil? ? nil : @scanner
|
267
300
|
end
|
268
301
|
|
302
|
+
def match?( pattern, cons=false )
|
303
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
304
|
+
min_bytes = 1
|
305
|
+
while true
|
306
|
+
if cons
|
307
|
+
n_matched_bytes = @scanner.skip(pattern)
|
308
|
+
else
|
309
|
+
n_matched_bytes = @scanner.match?(pattern)
|
310
|
+
end
|
311
|
+
return true if n_matched_bytes
|
312
|
+
return false if pattern.is_a?(String)
|
313
|
+
return false if @source.nil?
|
314
|
+
return false unless read(nil, min_bytes)
|
315
|
+
min_bytes *= 2
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
269
319
|
def empty?
|
270
320
|
super and ( @source.nil? || @source.eof? )
|
271
321
|
end
|
@@ -285,7 +335,7 @@ module REXML
|
|
285
335
|
rescue
|
286
336
|
end
|
287
337
|
@er_source.seek(pos)
|
288
|
-
rescue IOError
|
338
|
+
rescue IOError, SystemCallError
|
289
339
|
pos = -1
|
290
340
|
line = -1
|
291
341
|
end
|
@@ -294,14 +344,19 @@ module REXML
|
|
294
344
|
|
295
345
|
private
|
296
346
|
def readline(term = nil)
|
297
|
-
str = @source.readline(term || @line_break)
|
298
347
|
if @pending_buffer
|
348
|
+
begin
|
349
|
+
str = @source.readline(term || @line_break)
|
350
|
+
rescue IOError
|
351
|
+
end
|
299
352
|
if str.nil?
|
300
353
|
str = @pending_buffer
|
301
354
|
else
|
302
355
|
str = @pending_buffer + str
|
303
356
|
end
|
304
357
|
@pending_buffer = nil
|
358
|
+
else
|
359
|
+
str = @source.readline(term || @line_break)
|
305
360
|
end
|
306
361
|
return nil if str.nil?
|
307
362
|
|
data/lib/rexml/text.rb
CHANGED
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -132,21 +117,11 @@ module REXML
|
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
119
|
if !string.match?(VALID_XML_CHARS)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
@@ -268,7 +243,8 @@ module REXML
|
|
268
243
|
# u = Text.new( "sean russell", false, nil, true )
|
269
244
|
# u.value #-> "sean russell"
|
270
245
|
def value
|
271
|
-
@unnormalized ||= Text::unnormalize(
|
246
|
+
@unnormalized ||= Text::unnormalize(@string, doctype,
|
247
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
272
248
|
end
|
273
249
|
|
274
250
|
# Sets the contents of this text node. This expects the text to be
|
@@ -411,11 +387,12 @@ module REXML
|
|
411
387
|
end
|
412
388
|
|
413
389
|
# Unescapes all possible entities
|
414
|
-
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
390
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
|
391
|
+
entity_expansion_text_limit ||= Security.entity_expansion_text_limit
|
415
392
|
sum = 0
|
416
393
|
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
417
394
|
s = Text.expand($&, doctype, filter)
|
418
|
-
if sum + s.bytesize >
|
395
|
+
if sum + s.bytesize > entity_expansion_text_limit
|
419
396
|
raise "entity expansion has grown too large"
|
420
397
|
else
|
421
398
|
sum += s.bytesize
|
metadata
CHANGED
@@ -1,28 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
11
|
-
dependencies:
|
12
|
-
- !ruby/object:Gem::Dependency
|
13
|
-
name: strscan
|
14
|
-
requirement: !ruby/object:Gem::Requirement
|
15
|
-
requirements:
|
16
|
-
- - ">="
|
17
|
-
- !ruby/object:Gem::Version
|
18
|
-
version: '0'
|
19
|
-
type: :runtime
|
20
|
-
prerelease: false
|
21
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
-
requirements:
|
23
|
-
- - ">="
|
24
|
-
- !ruby/object:Gem::Version
|
25
|
-
version: '0'
|
10
|
+
date: 2025-02-16 00:00:00.000000000 Z
|
11
|
+
dependencies: []
|
26
12
|
description: An XML toolkit for Ruby
|
27
13
|
email:
|
28
14
|
- kou@cozmixng.org
|
@@ -116,7 +102,7 @@ homepage: https://github.com/ruby/rexml
|
|
116
102
|
licenses:
|
117
103
|
- BSD-2-Clause
|
118
104
|
metadata:
|
119
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.
|
105
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
|
120
106
|
rdoc_options:
|
121
107
|
- "--main"
|
122
108
|
- README.md
|
@@ -133,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
133
119
|
- !ruby/object:Gem::Version
|
134
120
|
version: '0'
|
135
121
|
requirements: []
|
136
|
-
rubygems_version: 3.6.
|
122
|
+
rubygems_version: 3.6.2
|
137
123
|
specification_version: 4
|
138
124
|
summary: An XML toolkit for Ruby
|
139
125
|
test_files: []
|