rexml 3.2.9 → 3.3.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +109 -2
- data/lib/rexml/element.rb +2 -15
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/parsers/baseparser.rb +76 -27
- data/lib/rexml/parsers/sax2parser.rb +2 -19
- data/lib/rexml/parsers/streamparser.rb +2 -2
- data/lib/rexml/parsers/treeparser.rb +9 -14
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +30 -4
- data/lib/rexml/text.rb +34 -14
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70ccd1465a05dba3d53dcfc4a98e76dec865a4f6ac833b954aff4234bce6c255
|
4
|
+
data.tar.gz: 53f43fab8f531e0ba7461ce091e5eae6bec27b12e9139450c7b3e748b4eeacdc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b46818d79ae57075c4e0bd620802e82c6958dddc7da1b182504c3fdc16685c887ac0ddd6a4838a080483abba330839e9ef4b2db22cc81b9eae3eac71ac14c965
|
7
|
+
data.tar.gz: 1e5205905eb435c02038dd0539de22472f5364ffc47635f13a1752cb79a423dcca558fb47394ac5d624b358e779b07cbcafedfd06b99742026856f9988109976
|
data/NEWS.md
CHANGED
@@ -1,12 +1,119 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
-
## 3.2
|
3
|
+
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Improved parse performance.
|
8
|
+
* GH-160
|
9
|
+
* Patch by NAITOH Jun.
|
10
|
+
|
11
|
+
* Improved parse performance.
|
12
|
+
* GH-169
|
13
|
+
* GH-170
|
14
|
+
* GH-171
|
15
|
+
* GH-172
|
16
|
+
* GH-173
|
17
|
+
* GH-174
|
18
|
+
* Patch by Watson.
|
19
|
+
|
20
|
+
* Added support for raising a parse exception when an XML has extra
|
21
|
+
content after the root element.
|
22
|
+
* GH-161
|
23
|
+
* Patch by NAITOH Jun.
|
24
|
+
|
25
|
+
* Added support for raising a parse exception when an XML
|
26
|
+
declaration exists in wrong position.
|
27
|
+
* GH-162
|
28
|
+
* Patch by NAITOH Jun.
|
29
|
+
|
30
|
+
* Removed needless a space after XML declaration in pretty print mode.
|
31
|
+
* GH-164
|
32
|
+
* Patch by NAITOH Jun.
|
33
|
+
|
34
|
+
* Stopped to emit `:text` event after the root element.
|
35
|
+
* GH-167
|
36
|
+
* Patch by NAITOH Jun.
|
37
|
+
|
38
|
+
### Fixes
|
39
|
+
|
40
|
+
* Fixed a bug that SAX2 parser doesn't expand predefined entities for
|
41
|
+
`characters` callback.
|
42
|
+
* GH-168
|
43
|
+
* Patch by NAITOH Jun.
|
44
|
+
|
45
|
+
### Thanks
|
46
|
+
|
47
|
+
* NAITOH Jun
|
48
|
+
|
49
|
+
* Watson
|
50
|
+
|
51
|
+
## 3.3.1 - 2024-06-25 {#version-3-3-1}
|
52
|
+
|
53
|
+
### Improvements
|
54
|
+
|
55
|
+
* Added support for detecting malformed top-level comments.
|
56
|
+
* GH-145
|
57
|
+
* Patch by Hiroya Fujinami.
|
58
|
+
|
59
|
+
* Improved `REXML::Element#attribute` performance.
|
60
|
+
* GH-146
|
61
|
+
* Patch by Hiroya Fujinami.
|
62
|
+
|
63
|
+
* Added support for detecting malformed `<!-->` comments.
|
64
|
+
* GH-147
|
65
|
+
* Patch by Hiroya Fujinami.
|
66
|
+
|
67
|
+
* Added support for detecting unclosed `DOCTYPE`.
|
68
|
+
* GH-152
|
69
|
+
* Patch by Hiroya Fujinami.
|
70
|
+
|
71
|
+
* Added `changlog_uri` metadata to gemspec.
|
72
|
+
* GH-156
|
73
|
+
* Patch by fynsta.
|
74
|
+
|
75
|
+
* Improved parse performance.
|
76
|
+
* GH-157
|
77
|
+
* GH-158
|
78
|
+
* Patch by NAITOH Jun.
|
79
|
+
|
80
|
+
### Fixes
|
81
|
+
|
82
|
+
* Fixed a bug that large XML can't be parsed.
|
83
|
+
* GH-154
|
84
|
+
* Patch by NAITOH Jun.
|
85
|
+
|
86
|
+
* Fixed a bug that private constants are visible.
|
87
|
+
* GH-155
|
88
|
+
* Patch by NAITOH Jun.
|
89
|
+
|
90
|
+
### Thanks
|
91
|
+
|
92
|
+
* Hiroya Fujinami
|
93
|
+
|
94
|
+
* NAITOH Jun
|
95
|
+
|
96
|
+
* fynsta
|
97
|
+
|
98
|
+
## 3.3.0 - 2024-06-11 {#version-3-3-0}
|
99
|
+
|
100
|
+
### Improvements
|
101
|
+
|
102
|
+
* Added support for strscan 0.7.0 installed with Ruby 2.6.
|
103
|
+
* GH-142
|
104
|
+
* Reported by Fernando Trigoso.
|
105
|
+
|
106
|
+
### Thanks
|
107
|
+
|
108
|
+
* Fernando Trigoso
|
109
|
+
|
110
|
+
## 3.2.9 - 2024-06-09 {#version-3-2-9}
|
4
111
|
|
5
112
|
### Improvements
|
6
113
|
|
7
114
|
* Added support for old strscan.
|
8
115
|
* GH-132
|
9
|
-
* Reported by Adam
|
116
|
+
* Reported by Adam.
|
10
117
|
|
11
118
|
* Improved attribute value parse performance.
|
12
119
|
* GH-135
|
data/lib/rexml/element.rb
CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
|
|
7
7
|
require_relative "parseexception"
|
8
8
|
|
9
9
|
module REXML
|
10
|
-
# An implementation note about namespaces:
|
11
|
-
# As we parse, when we find namespaces we put them in a hash and assign
|
12
|
-
# them a unique ID. We then convert the namespace prefix for the node
|
13
|
-
# to the unique ID. This makes namespace lookup much faster for the
|
14
|
-
# cost of extra memory use. We save the namespace prefix for the
|
15
|
-
# context node and convert it back when we write it.
|
16
|
-
@@namespaces = {}
|
17
|
-
|
18
10
|
# An \REXML::Element object represents an XML element.
|
19
11
|
#
|
20
12
|
# An element:
|
@@ -1284,16 +1276,11 @@ module REXML
|
|
1284
1276
|
# document.root.attribute("x", "a") # => a:x='a:x'
|
1285
1277
|
#
|
1286
1278
|
def attribute( name, namespace=nil )
|
1287
|
-
prefix =
|
1288
|
-
if namespaces.respond_to? :key
|
1289
|
-
prefix = namespaces.key(namespace) if namespace
|
1290
|
-
else
|
1291
|
-
prefix = namespaces.index(namespace) if namespace
|
1292
|
-
end
|
1279
|
+
prefix = namespaces.key(namespace) if namespace
|
1293
1280
|
prefix = nil if prefix == 'xmlns'
|
1294
1281
|
|
1295
1282
|
ret_val =
|
1296
|
-
attributes.get_attribute(
|
1283
|
+
attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
1297
1284
|
|
1298
1285
|
return ret_val unless ret_val.nil?
|
1299
1286
|
return nil if prefix.nil?
|
@@ -111,7 +111,7 @@ module REXML
|
|
111
111
|
# itself, then we don't need a carriage return... which makes this
|
112
112
|
# logic more complex.
|
113
113
|
node.children.each { |child|
|
114
|
-
next if child
|
114
|
+
next if child.instance_of?(Text)
|
115
115
|
unless child == node.children[0] or child.instance_of?(Text) or
|
116
116
|
(child == node.children[1] and !node.children[0].writethis)
|
117
117
|
output << "\n"
|
@@ -124,6 +124,14 @@ module REXML
|
|
124
124
|
}
|
125
125
|
|
126
126
|
module Private
|
127
|
+
# Terminal requires two or more letters.
|
128
|
+
INSTRUCTION_TERM = "?>"
|
129
|
+
COMMENT_TERM = "-->"
|
130
|
+
CDATA_TERM = "]]>"
|
131
|
+
DOCTYPE_TERM = "]>"
|
132
|
+
# Read to the end of DOCTYPE because there is no proper ENTITY termination
|
133
|
+
ENTITY_TERM = DOCTYPE_TERM
|
134
|
+
|
127
135
|
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
128
136
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
129
137
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
@@ -132,13 +140,20 @@ module REXML
|
|
132
140
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
133
141
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
134
142
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
143
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
144
|
+
CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
145
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
146
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
147
|
+
default_entities.each do |term|
|
148
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
149
|
+
end
|
135
150
|
end
|
136
151
|
private_constant :Private
|
137
|
-
include Private
|
138
152
|
|
139
153
|
def initialize( source )
|
140
154
|
self.stream = source
|
141
155
|
@listeners = []
|
156
|
+
@prefixes = Set.new
|
142
157
|
end
|
143
158
|
|
144
159
|
def add_listener( listener )
|
@@ -150,6 +165,7 @@ module REXML
|
|
150
165
|
def stream=( source )
|
151
166
|
@source = SourceFactory.create_from( source )
|
152
167
|
@closed = nil
|
168
|
+
@have_root = false
|
153
169
|
@document_status = nil
|
154
170
|
@tags = []
|
155
171
|
@stack = []
|
@@ -204,6 +220,8 @@ module REXML
|
|
204
220
|
|
205
221
|
# Returns the next event. This is a +PullEvent+ object.
|
206
222
|
def pull
|
223
|
+
@source.drop_parsed_content
|
224
|
+
|
207
225
|
pull_event.tap do |event|
|
208
226
|
@listeners.each do |listener|
|
209
227
|
listener.receive event
|
@@ -216,7 +234,12 @@ module REXML
|
|
216
234
|
x, @closed = @closed, nil
|
217
235
|
return [ :end_element, x ]
|
218
236
|
end
|
219
|
-
|
237
|
+
if empty?
|
238
|
+
if @document_status == :in_doctype
|
239
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
240
|
+
end
|
241
|
+
return [ :end_document ]
|
242
|
+
end
|
220
243
|
return @stack.shift if @stack.size > 0
|
221
244
|
#STDERR.puts @source.encoding
|
222
245
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -228,7 +251,14 @@ module REXML
|
|
228
251
|
return process_instruction(start_position)
|
229
252
|
elsif @source.match("<!", true)
|
230
253
|
if @source.match("--", true)
|
231
|
-
|
254
|
+
md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
|
255
|
+
if md.nil?
|
256
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
257
|
+
end
|
258
|
+
if /--|-\z/.match?(md[1])
|
259
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
260
|
+
end
|
261
|
+
return [ :comment, md[1] ]
|
232
262
|
elsif @source.match("DOCTYPE", true)
|
233
263
|
base_error_message = "Malformed DOCTYPE"
|
234
264
|
unless @source.match(/\s+/um, true)
|
@@ -240,7 +270,7 @@ module REXML
|
|
240
270
|
@source.position = start_position
|
241
271
|
raise REXML::ParseException.new(message, @source)
|
242
272
|
end
|
243
|
-
@nsstack.unshift(
|
273
|
+
@nsstack.unshift(Set.new)
|
244
274
|
name = parse_name(base_error_message)
|
245
275
|
if @source.match(/\s*\[/um, true)
|
246
276
|
id = [nil, nil, nil]
|
@@ -288,7 +318,7 @@ module REXML
|
|
288
318
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
289
319
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
290
320
|
elsif @source.match("ENTITY", true)
|
291
|
-
match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
|
321
|
+
match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
|
292
322
|
ref = false
|
293
323
|
if match[1] == '%'
|
294
324
|
ref = true
|
@@ -314,13 +344,13 @@ module REXML
|
|
314
344
|
match << '%' if ref
|
315
345
|
return match
|
316
346
|
elsif @source.match("ATTLIST", true)
|
317
|
-
md = @source.match(ATTLISTDECL_END, true)
|
347
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
318
348
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
319
349
|
element = md[1]
|
320
350
|
contents = md[0]
|
321
351
|
|
322
352
|
pairs = {}
|
323
|
-
values = md[0].scan( ATTDEF_RE )
|
353
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
324
354
|
values.each do |attdef|
|
325
355
|
unless attdef[3] == "#IMPLIED"
|
326
356
|
attdef.compact!
|
@@ -353,19 +383,22 @@ module REXML
|
|
353
383
|
raise REXML::ParseException.new(message, @source)
|
354
384
|
end
|
355
385
|
return [:notationdecl, name, *id]
|
356
|
-
elsif md = @source.match(/--(.*?)-->/um, true)
|
386
|
+
elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
|
357
387
|
case md[1]
|
358
388
|
when /--/, /-\z/
|
359
389
|
raise REXML::ParseException.new("Malformed comment", @source)
|
360
390
|
end
|
361
391
|
return [ :comment, md[1] ] if md
|
362
392
|
end
|
363
|
-
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
393
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
|
364
394
|
return [ :externalentity, match[1] ]
|
365
395
|
elsif @source.match(/\]\s*>/um, true)
|
366
396
|
@document_status = :after_doctype
|
367
397
|
return [ :end_doctype ]
|
368
398
|
end
|
399
|
+
if @document_status == :in_doctype
|
400
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
401
|
+
end
|
369
402
|
end
|
370
403
|
if @document_status == :after_doctype
|
371
404
|
@source.match(/\s*/um, true)
|
@@ -380,7 +413,7 @@ module REXML
|
|
380
413
|
if @source.match("/", true)
|
381
414
|
@nsstack.shift
|
382
415
|
last_tag = @tags.pop
|
383
|
-
md = @source.match(CLOSE_PATTERN, true)
|
416
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
384
417
|
if md and !last_tag
|
385
418
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
386
419
|
raise REXML::ParseException.new(message, @source)
|
@@ -397,16 +430,15 @@ module REXML
|
|
397
430
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
398
431
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
399
432
|
if md[0][0] == ?-
|
400
|
-
md = @source.match(/--(.*?)-->/um, true)
|
433
|
+
md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
|
401
434
|
|
402
|
-
|
403
|
-
when /--/, /-\z/
|
435
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
404
436
|
raise REXML::ParseException.new("Malformed comment", @source)
|
405
437
|
end
|
406
438
|
|
407
|
-
return [ :comment, md[1] ]
|
439
|
+
return [ :comment, md[1] ]
|
408
440
|
else
|
409
|
-
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
441
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
|
410
442
|
return [ :cdata, md[1] ] if md
|
411
443
|
end
|
412
444
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
@@ -415,19 +447,19 @@ module REXML
|
|
415
447
|
return process_instruction(start_position)
|
416
448
|
else
|
417
449
|
# Get the next tag
|
418
|
-
md = @source.match(TAG_PATTERN, true)
|
450
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
419
451
|
unless md
|
420
452
|
@source.position = start_position
|
421
453
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
422
454
|
end
|
423
455
|
tag = md[1]
|
424
456
|
@document_status = :in_element
|
425
|
-
prefixes
|
426
|
-
prefixes << md[2] if md[2]
|
457
|
+
@prefixes.clear
|
458
|
+
@prefixes << md[2] if md[2]
|
427
459
|
@nsstack.unshift(curr_ns=Set.new)
|
428
|
-
attributes, closed = parse_attributes(prefixes, curr_ns)
|
460
|
+
attributes, closed = parse_attributes(@prefixes, curr_ns)
|
429
461
|
# Verify that all of the prefixes have been defined
|
430
|
-
for prefix in prefixes
|
462
|
+
for prefix in @prefixes
|
431
463
|
unless @nsstack.find{|k| k.member?(prefix)}
|
432
464
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
433
465
|
end
|
@@ -437,8 +469,12 @@ module REXML
|
|
437
469
|
@closed = tag
|
438
470
|
@nsstack.shift
|
439
471
|
else
|
472
|
+
if @tags.empty? and @have_root
|
473
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
474
|
+
end
|
440
475
|
@tags.push( tag )
|
441
476
|
end
|
477
|
+
@have_root = true
|
442
478
|
return [ :start_element, tag, attributes ]
|
443
479
|
end
|
444
480
|
else
|
@@ -446,6 +482,12 @@ module REXML
|
|
446
482
|
if text.chomp!("<")
|
447
483
|
@source.position -= "<".bytesize
|
448
484
|
end
|
485
|
+
if @tags.empty? and @have_root
|
486
|
+
unless /\A\s*\z/.match?(text)
|
487
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
488
|
+
end
|
489
|
+
return pull_event
|
490
|
+
end
|
449
491
|
return [ :text, text ]
|
450
492
|
end
|
451
493
|
rescue REXML::UndefinedNamespaceException
|
@@ -488,10 +530,14 @@ module REXML
|
|
488
530
|
|
489
531
|
# Unescapes all possible entities
|
490
532
|
def unnormalize( string, entities=nil, filter=nil )
|
491
|
-
|
533
|
+
if string.include?("\r")
|
534
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
535
|
+
else
|
536
|
+
rv = string.dup
|
537
|
+
end
|
492
538
|
matches = rv.scan( REFERENCE_RE )
|
493
539
|
return rv if matches.size == 0
|
494
|
-
rv.gsub!(
|
540
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
495
541
|
m=$1
|
496
542
|
m = "0#{m}" if m[0] == ?x
|
497
543
|
[Integer(m)].pack('U*')
|
@@ -502,7 +548,7 @@ module REXML
|
|
502
548
|
unless filter and filter.include?(entity_reference)
|
503
549
|
entity_value = entity( entity_reference, entities )
|
504
550
|
if entity_value
|
505
|
-
re = /&#{entity_reference};/
|
551
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
506
552
|
rv.gsub!( re, entity_value )
|
507
553
|
else
|
508
554
|
er = DEFAULT_ENTITIES[entity_reference]
|
@@ -510,7 +556,7 @@ module REXML
|
|
510
556
|
end
|
511
557
|
end
|
512
558
|
end
|
513
|
-
rv.gsub!(
|
559
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
514
560
|
end
|
515
561
|
rv
|
516
562
|
end
|
@@ -523,7 +569,7 @@ module REXML
|
|
523
569
|
end
|
524
570
|
|
525
571
|
def parse_name(base_error_message)
|
526
|
-
md = @source.match(NAME_PATTERN, true)
|
572
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
527
573
|
unless md
|
528
574
|
if @source.match(/\s*\S/um)
|
529
575
|
message = "#{base_error_message}: invalid name"
|
@@ -602,13 +648,16 @@ module REXML
|
|
602
648
|
end
|
603
649
|
|
604
650
|
def process_instruction(start_position)
|
605
|
-
match_data = @source.match(INSTRUCTION_END, true)
|
651
|
+
match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
|
606
652
|
unless match_data
|
607
653
|
message = "Invalid processing instruction node"
|
608
654
|
@source.position = start_position
|
609
655
|
raise REXML::ParseException.new(message, @source)
|
610
656
|
end
|
611
|
-
if
|
657
|
+
if match_data[1] == "xml"
|
658
|
+
if @document_status
|
659
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
660
|
+
end
|
612
661
|
content = match_data[2]
|
613
662
|
version = VERSION.match(content)
|
614
663
|
version = version[1] unless version.nil?
|
@@ -157,25 +157,8 @@ module REXML
|
|
157
157
|
end
|
158
158
|
end
|
159
159
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
160
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
161
|
+
handle( :characters, unnormalized )
|
179
162
|
when :entitydecl
|
180
163
|
handle_entitydecl( event )
|
181
164
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -36,8 +36,8 @@ module REXML
|
|
36
36
|
@listener.tag_end( event[1] )
|
37
37
|
@tag_stack.pop
|
38
38
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
39
|
+
unnormalized = @parser.unnormalize( event[1] )
|
40
|
+
@listener.text( unnormalized )
|
41
41
|
when :processing_instruction
|
42
42
|
@listener.instruction( *event[1,2] )
|
43
43
|
when :start_doctype
|
@@ -16,7 +16,6 @@ module REXML
|
|
16
16
|
|
17
17
|
def parse
|
18
18
|
tag_stack = []
|
19
|
-
in_doctype = false
|
20
19
|
entities = nil
|
21
20
|
begin
|
22
21
|
while true
|
@@ -39,17 +38,15 @@ module REXML
|
|
39
38
|
tag_stack.pop
|
40
39
|
@build_context = @build_context.parent
|
41
40
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
41
|
+
if @build_context[-1].instance_of? Text
|
42
|
+
@build_context[-1] << event[1]
|
43
|
+
else
|
44
|
+
@build_context.add(
|
45
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
46
|
+
) unless (
|
47
|
+
@build_context.ignore_whitespace_nodes and
|
48
|
+
event[1].strip.size==0
|
49
|
+
)
|
53
50
|
end
|
54
51
|
when :comment
|
55
52
|
c = Comment.new( event[1] )
|
@@ -60,14 +57,12 @@ module REXML
|
|
60
57
|
when :processing_instruction
|
61
58
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
59
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
60
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
61
|
@build_context = @build_context.parent
|
66
62
|
when :start_doctype
|
67
63
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
64
|
@build_context = doctype
|
69
65
|
entities = {}
|
70
|
-
in_doctype = true
|
71
66
|
when :attlistdecl
|
72
67
|
n = AttlistDecl.new( event[1..-1] )
|
73
68
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -35,6 +55,7 @@ module REXML
|
|
35
55
|
attr_reader :encoding
|
36
56
|
|
37
57
|
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
38
59
|
PRE_DEFINED_TERM_PATTERNS = {}
|
39
60
|
pre_defined_terms = ["'", '"', "<"]
|
40
61
|
pre_defined_terms.each do |term|
|
@@ -42,7 +63,6 @@ module REXML
|
|
42
63
|
end
|
43
64
|
end
|
44
65
|
private_constant :Private
|
45
|
-
include Private
|
46
66
|
|
47
67
|
# Constructor
|
48
68
|
# @param arg must be a String, and should be a valid XML document
|
@@ -64,6 +84,12 @@ module REXML
|
|
64
84
|
@scanner.rest
|
65
85
|
end
|
66
86
|
|
87
|
+
def drop_parsed_content
|
88
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
89
|
+
@scanner.string = @scanner.rest
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
67
93
|
def buffer_encoding=(encoding)
|
68
94
|
@scanner.string.force_encoding(encoding)
|
69
95
|
end
|
@@ -91,7 +117,7 @@ module REXML
|
|
91
117
|
def ensure_buffer
|
92
118
|
end
|
93
119
|
|
94
|
-
def match(pattern, cons=false)
|
120
|
+
def match(pattern, cons=false, term: nil)
|
95
121
|
if cons
|
96
122
|
@scanner.scan(pattern).nil? ? nil : @scanner
|
97
123
|
else
|
@@ -214,7 +240,7 @@ module REXML
|
|
214
240
|
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
215
241
|
# - ">"
|
216
242
|
# - "XXX>" (X is any string excluding '>')
|
217
|
-
def match( pattern, cons=false )
|
243
|
+
def match( pattern, cons=false, term: nil )
|
218
244
|
while true
|
219
245
|
if cons
|
220
246
|
md = @scanner.scan(pattern)
|
@@ -224,7 +250,7 @@ module REXML
|
|
224
250
|
break if md
|
225
251
|
return nil if pattern.is_a?(String)
|
226
252
|
return nil if @source.nil?
|
227
|
-
return nil unless read
|
253
|
+
return nil unless read(term)
|
228
254
|
end
|
229
255
|
|
230
256
|
md.nil? ? nil : @scanner
|
data/lib/rexml/text.rb
CHANGED
@@ -151,25 +151,45 @@ module REXML
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
154
|
+
pos = 0
|
155
|
+
while (index = string.index(/<|&/, pos))
|
156
|
+
if string[index] == "<"
|
157
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
158
|
+
end
|
159
|
+
|
160
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
|
+
end
|
163
|
+
|
164
|
+
value = string[(index + 1)..end_index]
|
165
|
+
if /\s/.match?(value)
|
166
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
167
|
+
end
|
168
|
+
|
169
|
+
if value[0] == "#"
|
170
|
+
character_reference = value[1..-1]
|
171
|
+
|
172
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
173
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
174
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
175
|
else
|
163
|
-
raise "Illegal character #{
|
176
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
177
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
178
|
end
|
179
|
+
|
180
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
181
|
+
when *VALID_CHAR
|
182
|
+
else
|
183
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
184
|
+
end
|
185
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
186
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
187
|
end
|
188
|
+
|
189
|
+
pos = end_index + 1
|
172
190
|
end
|
191
|
+
|
192
|
+
string
|
173
193
|
end
|
174
194
|
|
175
195
|
def node_type
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2
|
4
|
+
version: 3.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
10
|
+
date: 2024-07-16 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -115,7 +115,8 @@ files:
|
|
115
115
|
homepage: https://github.com/ruby/rexml
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
|
-
metadata:
|
118
|
+
metadata:
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.2
|
119
120
|
rdoc_options:
|
120
121
|
- "--main"
|
121
122
|
- README.md
|