rexml 3.3.6 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +117 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/entity.rb +5 -2
- data/lib/rexml/parsers/baseparser.rb +87 -52
- data/lib/rexml/parsers/pullparser.rb +12 -0
- data/lib/rexml/parsers/sax2parser.rb +10 -0
- data/lib/rexml/parsers/streamparser.rb +8 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +61 -6
- data/lib/rexml/text.rb +20 -43
- metadata +5 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
|
4
|
+
data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
|
7
|
+
data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
|
data/NEWS.md
CHANGED
@@ -1,5 +1,122 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.4.1 - 2025-02-16 {#version-3-4-1}
|
4
|
+
|
5
|
+
### Improvement
|
6
|
+
|
7
|
+
* Improved performance.
|
8
|
+
* GH-226
|
9
|
+
* GH-227
|
10
|
+
* GH-237
|
11
|
+
* Patch by NAITOH Jun
|
12
|
+
|
13
|
+
### Fixes
|
14
|
+
|
15
|
+
* Fix serialization of ATTLIST is incorrect
|
16
|
+
* GH-233
|
17
|
+
* GH-234
|
18
|
+
* Patch by OlofKalufs
|
19
|
+
* Reported by OlofKalufs
|
20
|
+
|
21
|
+
### Thanks
|
22
|
+
|
23
|
+
* NAITOH Jun
|
24
|
+
|
25
|
+
* OlofKalufs
|
26
|
+
|
27
|
+
## 3.4.0 - 2024-12-15 {#version-3-4-0}
|
28
|
+
|
29
|
+
### Improvement
|
30
|
+
|
31
|
+
* Improved performance.
|
32
|
+
* GH-216
|
33
|
+
* Patch by NAITOH Jun
|
34
|
+
|
35
|
+
* JRuby: Improved parse performance.
|
36
|
+
* GH-219
|
37
|
+
* Patch by João Duarte
|
38
|
+
|
39
|
+
* Added support for reusing pull parser.
|
40
|
+
* GH-214
|
41
|
+
* GH-220
|
42
|
+
* Patch by Dmitry Pogrebnoy
|
43
|
+
|
44
|
+
* Improved error handling when source is `IO`.
|
45
|
+
* GH-221
|
46
|
+
* Patch by NAITOH Jun
|
47
|
+
|
48
|
+
### Thanks
|
49
|
+
|
50
|
+
* NAITOH Jun
|
51
|
+
|
52
|
+
* João Duarte
|
53
|
+
|
54
|
+
* Dmitry Pogrebnoy
|
55
|
+
|
56
|
+
## 3.3.9 - 2024-10-24 {#version-3-3-9}
|
57
|
+
|
58
|
+
### Improvements
|
59
|
+
|
60
|
+
* Improved performance.
|
61
|
+
* GH-210
|
62
|
+
* Patch by NAITOH Jun.
|
63
|
+
|
64
|
+
### Fixes
|
65
|
+
|
66
|
+
* Fixed a parse bug for text only invalid XML.
|
67
|
+
* GH-215
|
68
|
+
* Patch by NAITOH Jun.
|
69
|
+
|
70
|
+
* Fixed a parse bug that `�x...;` is accepted as a character
|
71
|
+
reference.
|
72
|
+
|
73
|
+
### Thanks
|
74
|
+
|
75
|
+
* NAITOH Jun
|
76
|
+
|
77
|
+
## 3.3.8 - 2024-09-29 {#version-3-3-8}
|
78
|
+
|
79
|
+
### Improvements
|
80
|
+
|
81
|
+
* SAX2: Improve parse performance.
|
82
|
+
* GH-207
|
83
|
+
* Patch by NAITOH Jun.
|
84
|
+
|
85
|
+
### Fixes
|
86
|
+
|
87
|
+
* Fixed a bug that unexpected attribute namespace conflict error for
|
88
|
+
the predefined "xml" namespace is reported.
|
89
|
+
* GH-208
|
90
|
+
* Patch by KITAITI Makoto
|
91
|
+
|
92
|
+
### Thanks
|
93
|
+
|
94
|
+
* NAITOH Jun
|
95
|
+
|
96
|
+
* KITAITI Makoto
|
97
|
+
|
98
|
+
## 3.3.7 - 2024-09-04 {#version-3-3-7}
|
99
|
+
|
100
|
+
### Improvements
|
101
|
+
|
102
|
+
* Added local entity expansion limit methods
|
103
|
+
* GH-192
|
104
|
+
* GH-202
|
105
|
+
* Reported by takuya kodama.
|
106
|
+
* Patch by NAITOH Jun.
|
107
|
+
|
108
|
+
* Removed explicit strscan dependency
|
109
|
+
* GH-204
|
110
|
+
* Patch by Bo Anderson.
|
111
|
+
|
112
|
+
### Thanks
|
113
|
+
|
114
|
+
* takuya kodama
|
115
|
+
|
116
|
+
* NAITOH Jun
|
117
|
+
|
118
|
+
* Bo Anderson
|
119
|
+
|
3
120
|
## 3.3.6 - 2024-08-22 {#version-3-3-6}
|
4
121
|
|
5
122
|
### Improvements
|
data/lib/rexml/attribute.rb
CHANGED
@@ -148,8 +148,9 @@ module REXML
|
|
148
148
|
# have been expanded to their values
|
149
149
|
def value
|
150
150
|
return @unnormalized if @unnormalized
|
151
|
-
|
152
|
-
@unnormalized
|
151
|
+
|
152
|
+
@unnormalized = Text::unnormalize(@normalized, doctype,
|
153
|
+
entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
|
153
154
|
end
|
154
155
|
|
155
156
|
# The normalized value of this attribute. That is, the attribute with
|
data/lib/rexml/document.rb
CHANGED
@@ -91,6 +91,8 @@ module REXML
|
|
91
91
|
#
|
92
92
|
def initialize( source = nil, context = {} )
|
93
93
|
@entity_expansion_count = 0
|
94
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
95
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
94
96
|
super()
|
95
97
|
@context = context
|
96
98
|
return if source.nil?
|
@@ -431,10 +433,12 @@ module REXML
|
|
431
433
|
end
|
432
434
|
|
433
435
|
attr_reader :entity_expansion_count
|
436
|
+
attr_writer :entity_expansion_limit
|
437
|
+
attr_accessor :entity_expansion_text_limit
|
434
438
|
|
435
439
|
def record_entity_expansion
|
436
440
|
@entity_expansion_count += 1
|
437
|
-
if @entity_expansion_count >
|
441
|
+
if @entity_expansion_count > @entity_expansion_limit
|
438
442
|
raise "number of entity expansions exceeded, processing aborted."
|
439
443
|
end
|
440
444
|
end
|
data/lib/rexml/entity.rb
CHANGED
@@ -71,9 +71,12 @@ module REXML
|
|
71
71
|
# Evaluates to the unnormalized value of this entity; that is, replacing
|
72
72
|
# &ent; entities.
|
73
73
|
def unnormalized
|
74
|
-
document
|
74
|
+
document&.record_entity_expansion
|
75
|
+
|
75
76
|
return nil if @value.nil?
|
76
|
-
|
77
|
+
|
78
|
+
@unnormalized = Text::unnormalize(@value, parent,
|
79
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
77
80
|
end
|
78
81
|
|
79
82
|
#once :unnormalized
|
@@ -150,12 +150,13 @@ module REXML
|
|
150
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
151
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
152
152
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
153
|
-
CHARACTER_REFERENCES = /&#
|
153
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
154
154
|
DEFAULT_ENTITIES_PATTERNS = {}
|
155
155
|
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
156
156
|
default_entities.each do |term|
|
157
157
|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
158
158
|
end
|
159
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
159
160
|
end
|
160
161
|
private_constant :Private
|
161
162
|
|
@@ -164,6 +165,9 @@ module REXML
|
|
164
165
|
@listeners = []
|
165
166
|
@prefixes = Set.new
|
166
167
|
@entity_expansion_count = 0
|
168
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
169
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
170
|
+
@source.ensure_buffer
|
167
171
|
end
|
168
172
|
|
169
173
|
def add_listener( listener )
|
@@ -172,16 +176,22 @@ module REXML
|
|
172
176
|
|
173
177
|
attr_reader :source
|
174
178
|
attr_reader :entity_expansion_count
|
179
|
+
attr_writer :entity_expansion_limit
|
180
|
+
attr_writer :entity_expansion_text_limit
|
175
181
|
|
176
182
|
def stream=( source )
|
177
183
|
@source = SourceFactory.create_from( source )
|
184
|
+
reset
|
185
|
+
end
|
186
|
+
|
187
|
+
def reset
|
178
188
|
@closed = nil
|
179
189
|
@have_root = false
|
180
190
|
@document_status = nil
|
181
191
|
@tags = []
|
182
192
|
@stack = []
|
183
193
|
@entities = []
|
184
|
-
@namespaces = {}
|
194
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
185
195
|
@namespaces_restore_stack = []
|
186
196
|
end
|
187
197
|
|
@@ -263,10 +273,10 @@ module REXML
|
|
263
273
|
@source.ensure_buffer
|
264
274
|
if @document_status == nil
|
265
275
|
start_position = @source.position
|
266
|
-
if @source.match("<?", true)
|
276
|
+
if @source.match?("<?", true)
|
267
277
|
return process_instruction
|
268
|
-
elsif @source.match("<!", true)
|
269
|
-
if @source.match("--", true)
|
278
|
+
elsif @source.match?("<!", true)
|
279
|
+
if @source.match?("--", true)
|
270
280
|
md = @source.match(/(.*?)-->/um, true)
|
271
281
|
if md.nil?
|
272
282
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
@@ -275,10 +285,10 @@ module REXML
|
|
275
285
|
raise REXML::ParseException.new("Malformed comment", @source)
|
276
286
|
end
|
277
287
|
return [ :comment, md[1] ]
|
278
|
-
elsif @source.match("DOCTYPE", true)
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
279
289
|
base_error_message = "Malformed DOCTYPE"
|
280
|
-
unless @source.match(/\s+/um, true)
|
281
|
-
if @source.match(">")
|
290
|
+
unless @source.match?(/\s+/um, true)
|
291
|
+
if @source.match?(">")
|
282
292
|
message = "#{base_error_message}: name is missing"
|
283
293
|
else
|
284
294
|
message = "#{base_error_message}: invalid name"
|
@@ -287,10 +297,11 @@ module REXML
|
|
287
297
|
raise REXML::ParseException.new(message, @source)
|
288
298
|
end
|
289
299
|
name = parse_name(base_error_message)
|
290
|
-
|
300
|
+
@source.match?(/\s*/um, true) # skip spaces
|
301
|
+
if @source.match?("[", true)
|
291
302
|
id = [nil, nil, nil]
|
292
303
|
@document_status = :in_doctype
|
293
|
-
elsif @source.match(
|
304
|
+
elsif @source.match?(">", true)
|
294
305
|
id = [nil, nil, nil]
|
295
306
|
@document_status = :after_doctype
|
296
307
|
@source.ensure_buffer
|
@@ -302,9 +313,10 @@ module REXML
|
|
302
313
|
# For backward compatibility
|
303
314
|
id[1], id[2] = id[2], nil
|
304
315
|
end
|
305
|
-
|
316
|
+
@source.match?(/\s*/um, true) # skip spaces
|
317
|
+
if @source.match?("[", true)
|
306
318
|
@document_status = :in_doctype
|
307
|
-
elsif @source.match(
|
319
|
+
elsif @source.match?(">", true)
|
308
320
|
@document_status = :after_doctype
|
309
321
|
@source.ensure_buffer
|
310
322
|
else
|
@@ -314,7 +326,7 @@ module REXML
|
|
314
326
|
end
|
315
327
|
args = [:start_doctype, name, *id]
|
316
328
|
if @document_status == :after_doctype
|
317
|
-
@source.match(/\s*/um, true)
|
329
|
+
@source.match?(/\s*/um, true)
|
318
330
|
@stack << [ :end_doctype ]
|
319
331
|
end
|
320
332
|
return args
|
@@ -325,14 +337,14 @@ module REXML
|
|
325
337
|
end
|
326
338
|
end
|
327
339
|
if @document_status == :in_doctype
|
328
|
-
@source.match(/\s*/um, true) # skip spaces
|
340
|
+
@source.match?(/\s*/um, true) # skip spaces
|
329
341
|
start_position = @source.position
|
330
|
-
if @source.match("<!", true)
|
331
|
-
if @source.match("ELEMENT", true)
|
342
|
+
if @source.match?("<!", true)
|
343
|
+
if @source.match?("ELEMENT", true)
|
332
344
|
md = @source.match(/(.*?)>/um, true)
|
333
345
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
334
346
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
335
|
-
elsif @source.match("ENTITY", true)
|
347
|
+
elsif @source.match?("ENTITY", true)
|
336
348
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
337
349
|
unless match_data
|
338
350
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
@@ -364,11 +376,11 @@ module REXML
|
|
364
376
|
end
|
365
377
|
match << '%' if ref
|
366
378
|
return match
|
367
|
-
elsif @source.match("ATTLIST", true)
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
368
380
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
369
381
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
370
382
|
element = md[1]
|
371
|
-
contents = md[0]
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
372
384
|
|
373
385
|
pairs = {}
|
374
386
|
values = md[0].strip.scan( ATTDEF_RE )
|
@@ -384,10 +396,10 @@ module REXML
|
|
384
396
|
end
|
385
397
|
end
|
386
398
|
return [ :attlistdecl, element, pairs, contents ]
|
387
|
-
elsif @source.match("NOTATION", true)
|
399
|
+
elsif @source.match?("NOTATION", true)
|
388
400
|
base_error_message = "Malformed notation declaration"
|
389
|
-
unless @source.match(/\s+/um, true)
|
390
|
-
if @source.match(">")
|
401
|
+
unless @source.match?(/\s+/um, true)
|
402
|
+
if @source.match?(">")
|
391
403
|
message = "#{base_error_message}: name is missing"
|
392
404
|
else
|
393
405
|
message = "#{base_error_message}: invalid name"
|
@@ -399,7 +411,8 @@ module REXML
|
|
399
411
|
id = parse_id(base_error_message,
|
400
412
|
accept_external_id: true,
|
401
413
|
accept_public_id: true)
|
402
|
-
|
414
|
+
@source.match?(/\s*/um, true) # skip spaces
|
415
|
+
unless @source.match?(">", true)
|
403
416
|
message = "#{base_error_message}: garbage before end >"
|
404
417
|
raise REXML::ParseException.new(message, @source)
|
405
418
|
end
|
@@ -413,7 +426,7 @@ module REXML
|
|
413
426
|
end
|
414
427
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
415
428
|
return [ :externalentity, match[1] ]
|
416
|
-
elsif @source.match(/\]\s*>/um, true)
|
429
|
+
elsif @source.match?(/\]\s*>/um, true)
|
417
430
|
@document_status = :after_doctype
|
418
431
|
return [ :end_doctype ]
|
419
432
|
end
|
@@ -422,16 +435,16 @@ module REXML
|
|
422
435
|
end
|
423
436
|
end
|
424
437
|
if @document_status == :after_doctype
|
425
|
-
@source.match(/\s*/um, true)
|
438
|
+
@source.match?(/\s*/um, true)
|
426
439
|
end
|
427
440
|
begin
|
428
441
|
start_position = @source.position
|
429
|
-
if @source.match("<", true)
|
442
|
+
if @source.match?("<", true)
|
430
443
|
# :text's read_until may remain only "<" in buffer. In the
|
431
444
|
# case, buffer is empty here. So we need to fill buffer
|
432
445
|
# here explicitly.
|
433
446
|
@source.ensure_buffer
|
434
|
-
if @source.match("/", true)
|
447
|
+
if @source.match?("/", true)
|
435
448
|
@namespaces_restore_stack.pop
|
436
449
|
last_tag = @tags.pop
|
437
450
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
@@ -446,7 +459,7 @@ module REXML
|
|
446
459
|
raise REXML::ParseException.new(message, @source)
|
447
460
|
end
|
448
461
|
return [ :end_element, last_tag ]
|
449
|
-
elsif @source.match("!", true)
|
462
|
+
elsif @source.match?("!", true)
|
450
463
|
md = @source.match(/([^>]*>)/um)
|
451
464
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
452
465
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
@@ -464,7 +477,7 @@ module REXML
|
|
464
477
|
end
|
465
478
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
466
479
|
"in the doctype declaration.", @source)
|
467
|
-
elsif @source.match("?", true)
|
480
|
+
elsif @source.match?("?", true)
|
468
481
|
return process_instruction
|
469
482
|
else
|
470
483
|
# Get the next tag
|
@@ -564,8 +577,12 @@ module REXML
|
|
564
577
|
return rv if matches.size == 0
|
565
578
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
566
579
|
m=$1
|
567
|
-
|
568
|
-
|
580
|
+
if m.start_with?("x")
|
581
|
+
code_point = Integer(m[1..-1], 16)
|
582
|
+
else
|
583
|
+
code_point = Integer(m, 10)
|
584
|
+
end
|
585
|
+
[code_point].pack('U*')
|
569
586
|
}
|
570
587
|
matches.collect!{|x|x[0]}.compact!
|
571
588
|
if filter
|
@@ -585,7 +602,7 @@ module REXML
|
|
585
602
|
end
|
586
603
|
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
587
604
|
rv.gsub!( re, entity_value )
|
588
|
-
if rv.bytesize >
|
605
|
+
if rv.bytesize > @entity_expansion_text_limit
|
589
606
|
raise "entity expansion has grown too large"
|
590
607
|
end
|
591
608
|
else
|
@@ -627,7 +644,7 @@ module REXML
|
|
627
644
|
|
628
645
|
def record_entity_expansion(delta=1)
|
629
646
|
@entity_expansion_count += delta
|
630
|
-
if @entity_expansion_count >
|
647
|
+
if @entity_expansion_count > @entity_expansion_limit
|
631
648
|
raise "number of entity expansions exceeded, processing aborted."
|
632
649
|
end
|
633
650
|
end
|
@@ -641,7 +658,7 @@ module REXML
|
|
641
658
|
def parse_name(base_error_message)
|
642
659
|
md = @source.match(Private::NAME_PATTERN, true)
|
643
660
|
unless md
|
644
|
-
if @source.match(/\S/um)
|
661
|
+
if @source.match?(/\S/um)
|
645
662
|
message = "#{base_error_message}: invalid name"
|
646
663
|
else
|
647
664
|
message = "#{base_error_message}: name is missing"
|
@@ -683,34 +700,34 @@ module REXML
|
|
683
700
|
accept_public_id:)
|
684
701
|
public = /\A\s*PUBLIC/um
|
685
702
|
system = /\A\s*SYSTEM/um
|
686
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
687
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
703
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
704
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
688
705
|
return "public ID literal is missing"
|
689
706
|
end
|
690
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
707
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
691
708
|
return "invalid public ID literal"
|
692
709
|
end
|
693
710
|
if accept_public_id
|
694
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
711
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
695
712
|
return "system ID literal is missing"
|
696
713
|
end
|
697
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
714
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
698
715
|
return "invalid system literal"
|
699
716
|
end
|
700
717
|
"garbage after system literal"
|
701
718
|
else
|
702
719
|
"garbage after public ID literal"
|
703
720
|
end
|
704
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
705
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
721
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
722
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
706
723
|
return "system literal is missing"
|
707
724
|
end
|
708
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
725
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
709
726
|
return "invalid system literal"
|
710
727
|
end
|
711
728
|
"garbage after system literal"
|
712
729
|
else
|
713
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
730
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
714
731
|
return "invalid ID type"
|
715
732
|
end
|
716
733
|
"ID type is missing"
|
@@ -719,7 +736,7 @@ module REXML
|
|
719
736
|
|
720
737
|
def process_instruction
|
721
738
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
722
|
-
if @source.match(/\s+/um, true)
|
739
|
+
if @source.match?(/\s+/um, true)
|
723
740
|
match_data = @source.match(/(.*?)\?>/um, true)
|
724
741
|
unless match_data
|
725
742
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
@@ -727,7 +744,7 @@ module REXML
|
|
727
744
|
content = match_data[1]
|
728
745
|
else
|
729
746
|
content = nil
|
730
|
-
unless @source.match("?>", true)
|
747
|
+
unless @source.match?("?>", true)
|
731
748
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
732
749
|
end
|
733
750
|
end
|
@@ -752,14 +769,33 @@ module REXML
|
|
752
769
|
[:processing_instruction, name, content]
|
753
770
|
end
|
754
771
|
|
772
|
+
if StringScanner::Version < "3.1.1"
|
773
|
+
def scan_quote
|
774
|
+
@source.match(/(['"])/, true)&.[](1)
|
775
|
+
end
|
776
|
+
else
|
777
|
+
def scan_quote
|
778
|
+
case @source.peek_byte
|
779
|
+
when 34 # '"'.ord
|
780
|
+
@source.scan_byte
|
781
|
+
'"'
|
782
|
+
when 39 # "'".ord
|
783
|
+
@source.scan_byte
|
784
|
+
"'"
|
785
|
+
else
|
786
|
+
nil
|
787
|
+
end
|
788
|
+
end
|
789
|
+
end
|
790
|
+
|
755
791
|
def parse_attributes(prefixes)
|
756
792
|
attributes = {}
|
757
793
|
expanded_names = {}
|
758
794
|
closed = false
|
759
795
|
while true
|
760
|
-
if @source.match(">", true)
|
796
|
+
if @source.match?(">", true)
|
761
797
|
return attributes, closed
|
762
|
-
elsif @source.match("/>", true)
|
798
|
+
elsif @source.match?("/>", true)
|
763
799
|
closed = true
|
764
800
|
return attributes, closed
|
765
801
|
elsif match = @source.match(QNAME, true)
|
@@ -767,15 +803,14 @@ module REXML
|
|
767
803
|
prefix = match[2]
|
768
804
|
local_part = match[3]
|
769
805
|
|
770
|
-
unless @source.match(/\s*=\s*/um, true)
|
806
|
+
unless @source.match?(/\s*=\s*/um, true)
|
771
807
|
message = "Missing attribute equal: <#{name}>"
|
772
808
|
raise REXML::ParseException.new(message, @source)
|
773
809
|
end
|
774
|
-
unless
|
810
|
+
unless quote = scan_quote
|
775
811
|
message = "Missing attribute value start quote: <#{name}>"
|
776
812
|
raise REXML::ParseException.new(message, @source)
|
777
813
|
end
|
778
|
-
quote = match[1]
|
779
814
|
start_position = @source.position
|
780
815
|
value = @source.read_until(quote)
|
781
816
|
unless value.chomp!(quote)
|
@@ -783,10 +818,10 @@ module REXML
|
|
783
818
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
784
819
|
raise REXML::ParseException.new(message, @source)
|
785
820
|
end
|
786
|
-
@source.match(/\s*/um, true)
|
821
|
+
@source.match?(/\s*/um, true)
|
787
822
|
if prefix == "xmlns"
|
788
823
|
if local_part == "xml"
|
789
|
-
if value !=
|
824
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
790
825
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
791
826
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
792
827
|
raise REXML::ParseException.new( msg, @source, self )
|
@@ -51,6 +51,14 @@ module REXML
|
|
51
51
|
@parser.entity_expansion_count
|
52
52
|
end
|
53
53
|
|
54
|
+
def entity_expansion_limit=( limit )
|
55
|
+
@parser.entity_expansion_limit = limit
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_expansion_text_limit=( limit )
|
59
|
+
@parser.entity_expansion_text_limit = limit
|
60
|
+
end
|
61
|
+
|
54
62
|
def each
|
55
63
|
while has_next?
|
56
64
|
yield self.pull
|
@@ -85,6 +93,10 @@ module REXML
|
|
85
93
|
def unshift token
|
86
94
|
@my_stack.unshift token
|
87
95
|
end
|
96
|
+
|
97
|
+
def reset
|
98
|
+
@parser.reset
|
99
|
+
end
|
88
100
|
end
|
89
101
|
|
90
102
|
# A parsing event. The contents of the event are accessed as an +Array?,
|
@@ -26,6 +26,14 @@ module REXML
|
|
26
26
|
@parser.entity_expansion_count
|
27
27
|
end
|
28
28
|
|
29
|
+
def entity_expansion_limit=( limit )
|
30
|
+
@parser.entity_expansion_limit = limit
|
31
|
+
end
|
32
|
+
|
33
|
+
def entity_expansion_text_limit=( limit )
|
34
|
+
@parser.entity_expansion_text_limit = limit
|
35
|
+
end
|
36
|
+
|
29
37
|
def add_listener( listener )
|
30
38
|
@parser.add_listener( listener )
|
31
39
|
end
|
@@ -251,6 +259,8 @@ module REXML
|
|
251
259
|
end
|
252
260
|
|
253
261
|
def get_namespace( prefix )
|
262
|
+
return nil if @namespace_stack.empty?
|
263
|
+
|
254
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
255
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
256
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
@@ -18,6 +18,14 @@ module REXML
|
|
18
18
|
@parser.entity_expansion_count
|
19
19
|
end
|
20
20
|
|
21
|
+
def entity_expansion_limit=( limit )
|
22
|
+
@parser.entity_expansion_limit = limit
|
23
|
+
end
|
24
|
+
|
25
|
+
def entity_expansion_text_limit=( limit )
|
26
|
+
@parser.entity_expansion_text_limit = limit
|
27
|
+
end
|
28
|
+
|
21
29
|
def parse
|
22
30
|
# entity string
|
23
31
|
while true
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
3
|
|
4
|
+
require "stringio"
|
4
5
|
require "strscan"
|
5
6
|
|
6
7
|
require_relative 'encoding'
|
@@ -18,6 +19,16 @@ module REXML
|
|
18
19
|
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
20
|
super(pattern)
|
20
21
|
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
21
32
|
end
|
22
33
|
end
|
23
34
|
using StringScannerCheckScanString
|
@@ -35,7 +46,6 @@ module REXML
|
|
35
46
|
arg.respond_to? :eof?
|
36
47
|
IOSource.new(arg)
|
37
48
|
elsif arg.respond_to? :to_str
|
38
|
-
require 'stringio'
|
39
49
|
IOSource.new(StringIO.new(arg))
|
40
50
|
elsif arg.kind_of? Source
|
41
51
|
arg
|
@@ -58,8 +68,14 @@ module REXML
|
|
58
68
|
SCANNER_RESET_SIZE = 100000
|
59
69
|
PRE_DEFINED_TERM_PATTERNS = {}
|
60
70
|
pre_defined_terms = ["'", '"', "<"]
|
61
|
-
|
62
|
-
|
71
|
+
if StringScanner::Version < "3.1.1"
|
72
|
+
pre_defined_terms.each do |term|
|
73
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
74
|
+
end
|
75
|
+
else
|
76
|
+
pre_defined_terms.each do |term|
|
77
|
+
PRE_DEFINED_TERM_PATTERNS[term] = term
|
78
|
+
end
|
63
79
|
end
|
64
80
|
end
|
65
81
|
private_constant :Private
|
@@ -77,6 +93,7 @@ module REXML
|
|
77
93
|
detect_encoding
|
78
94
|
end
|
79
95
|
@line = 0
|
96
|
+
@encoded_terms = {}
|
80
97
|
end
|
81
98
|
|
82
99
|
# The current buffer (what we're going to read next)
|
@@ -125,6 +142,14 @@ module REXML
|
|
125
142
|
end
|
126
143
|
end
|
127
144
|
|
145
|
+
def match?(pattern, cons=false)
|
146
|
+
if cons
|
147
|
+
!@scanner.skip(pattern).nil?
|
148
|
+
else
|
149
|
+
!@scanner.match?(pattern).nil?
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
128
153
|
def position
|
129
154
|
@scanner.pos
|
130
155
|
end
|
@@ -133,6 +158,14 @@ module REXML
|
|
133
158
|
@scanner.pos = pos
|
134
159
|
end
|
135
160
|
|
161
|
+
def peek_byte
|
162
|
+
@scanner.peek_byte
|
163
|
+
end
|
164
|
+
|
165
|
+
def scan_byte
|
166
|
+
@scanner.scan_byte
|
167
|
+
end
|
168
|
+
|
136
169
|
# @return true if the Source is exhausted
|
137
170
|
def empty?
|
138
171
|
@scanner.eos?
|
@@ -227,7 +260,7 @@ module REXML
|
|
227
260
|
|
228
261
|
def read_until(term)
|
229
262
|
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
230
|
-
term = encode(term)
|
263
|
+
term = @encoded_terms[term] ||= encode(term)
|
231
264
|
until str = @scanner.scan_until(pattern)
|
232
265
|
break if @source.nil?
|
233
266
|
break if @source.eof?
|
@@ -266,6 +299,23 @@ module REXML
|
|
266
299
|
md.nil? ? nil : @scanner
|
267
300
|
end
|
268
301
|
|
302
|
+
def match?( pattern, cons=false )
|
303
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
304
|
+
min_bytes = 1
|
305
|
+
while true
|
306
|
+
if cons
|
307
|
+
n_matched_bytes = @scanner.skip(pattern)
|
308
|
+
else
|
309
|
+
n_matched_bytes = @scanner.match?(pattern)
|
310
|
+
end
|
311
|
+
return true if n_matched_bytes
|
312
|
+
return false if pattern.is_a?(String)
|
313
|
+
return false if @source.nil?
|
314
|
+
return false unless read(nil, min_bytes)
|
315
|
+
min_bytes *= 2
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
269
319
|
def empty?
|
270
320
|
super and ( @source.nil? || @source.eof? )
|
271
321
|
end
|
@@ -285,7 +335,7 @@ module REXML
|
|
285
335
|
rescue
|
286
336
|
end
|
287
337
|
@er_source.seek(pos)
|
288
|
-
rescue IOError
|
338
|
+
rescue IOError, SystemCallError
|
289
339
|
pos = -1
|
290
340
|
line = -1
|
291
341
|
end
|
@@ -294,14 +344,19 @@ module REXML
|
|
294
344
|
|
295
345
|
private
|
296
346
|
def readline(term = nil)
|
297
|
-
str = @source.readline(term || @line_break)
|
298
347
|
if @pending_buffer
|
348
|
+
begin
|
349
|
+
str = @source.readline(term || @line_break)
|
350
|
+
rescue IOError
|
351
|
+
end
|
299
352
|
if str.nil?
|
300
353
|
str = @pending_buffer
|
301
354
|
else
|
302
355
|
str = @pending_buffer + str
|
303
356
|
end
|
304
357
|
@pending_buffer = nil
|
358
|
+
else
|
359
|
+
str = @source.readline(term || @line_break)
|
305
360
|
end
|
306
361
|
return nil if str.nil?
|
307
362
|
|
data/lib/rexml/text.rb
CHANGED
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -132,21 +117,11 @@ module REXML
|
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
119
|
if !string.match?(VALID_XML_CHARS)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
@@ -268,7 +243,8 @@ module REXML
|
|
268
243
|
# u = Text.new( "sean russell", false, nil, true )
|
269
244
|
# u.value #-> "sean russell"
|
270
245
|
def value
|
271
|
-
@unnormalized ||= Text::unnormalize(
|
246
|
+
@unnormalized ||= Text::unnormalize(@string, doctype,
|
247
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
272
248
|
end
|
273
249
|
|
274
250
|
# Sets the contents of this text node. This expects the text to be
|
@@ -411,11 +387,12 @@ module REXML
|
|
411
387
|
end
|
412
388
|
|
413
389
|
# Unescapes all possible entities
|
414
|
-
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
390
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
|
391
|
+
entity_expansion_text_limit ||= Security.entity_expansion_text_limit
|
415
392
|
sum = 0
|
416
393
|
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
417
394
|
s = Text.expand($&, doctype, filter)
|
418
|
-
if sum + s.bytesize >
|
395
|
+
if sum + s.bytesize > entity_expansion_text_limit
|
419
396
|
raise "entity expansion has grown too large"
|
420
397
|
else
|
421
398
|
sum += s.bytesize
|
metadata
CHANGED
@@ -1,28 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
11
|
-
dependencies:
|
12
|
-
- !ruby/object:Gem::Dependency
|
13
|
-
name: strscan
|
14
|
-
requirement: !ruby/object:Gem::Requirement
|
15
|
-
requirements:
|
16
|
-
- - ">="
|
17
|
-
- !ruby/object:Gem::Version
|
18
|
-
version: '0'
|
19
|
-
type: :runtime
|
20
|
-
prerelease: false
|
21
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
-
requirements:
|
23
|
-
- - ">="
|
24
|
-
- !ruby/object:Gem::Version
|
25
|
-
version: '0'
|
10
|
+
date: 2025-02-16 00:00:00.000000000 Z
|
11
|
+
dependencies: []
|
26
12
|
description: An XML toolkit for Ruby
|
27
13
|
email:
|
28
14
|
- kou@cozmixng.org
|
@@ -116,7 +102,7 @@ homepage: https://github.com/ruby/rexml
|
|
116
102
|
licenses:
|
117
103
|
- BSD-2-Clause
|
118
104
|
metadata:
|
119
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.
|
105
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
|
120
106
|
rdoc_options:
|
121
107
|
- "--main"
|
122
108
|
- README.md
|
@@ -133,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
133
119
|
- !ruby/object:Gem::Version
|
134
120
|
version: '0'
|
135
121
|
requirements: []
|
136
|
-
rubygems_version: 3.6.
|
122
|
+
rubygems_version: 3.6.2
|
137
123
|
specification_version: 4
|
138
124
|
summary: An XML toolkit for Ruby
|
139
125
|
test_files: []
|