rexml 3.2.8 → 3.3.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +87 -2
- data/lib/rexml/element.rb +2 -15
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +65 -23
- data/lib/rexml/parsers/treeparser.rb +9 -14
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +56 -9
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: afaa8e7d5241253a1c36a218f94eeff525cc19378d2ed104f738abfc01693889
|
4
|
+
data.tar.gz: 665e18c0db75cce5e3db16c674c02e986ff9141df54fd7ff3da704b4403a928d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86ea7a0ce4847b320f297b1eb03158003c2931847c07ea118f0a7413f476660dcf40baec8b59a92a2e7096eb665ace359b04c5d8e82617b7162305465472c88d
|
7
|
+
data.tar.gz: ae248f28516ab6c76170623bcc5e5a30389596823133fd0a13cb74235d6101dd469235bab8b1e15bcbd7a7795f04b44e4674dfdcb1712109dce58001cea01648
|
data/NEWS.md
CHANGED
@@ -1,5 +1,91 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.1 - 2024-06-25 {#version-3-3-1}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for detecting malformed top-level comments.
|
8
|
+
* GH-145
|
9
|
+
* Patch by Hiroya Fujinami.
|
10
|
+
|
11
|
+
* Improved `REXML::Element#attribute` performance.
|
12
|
+
* GH-146
|
13
|
+
* Patch by Hiroya Fujinami.
|
14
|
+
|
15
|
+
* Added support for detecting malformed `<!-->` comments.
|
16
|
+
* GH-147
|
17
|
+
* Patch by Hiroya Fujinami.
|
18
|
+
|
19
|
+
* Added support for detecting unclosed `DOCTYPE`.
|
20
|
+
* GH-152
|
21
|
+
* Patch by Hiroya Fujinami.
|
22
|
+
|
23
|
+
* Added `changlog_uri` metadata to gemspec.
|
24
|
+
* GH-156
|
25
|
+
* Patch by fynsta.
|
26
|
+
|
27
|
+
* Improved parse performance.
|
28
|
+
* GH-157
|
29
|
+
* GH-158
|
30
|
+
* Patch by NAITOH Jun.
|
31
|
+
|
32
|
+
### Fixes
|
33
|
+
|
34
|
+
* Fixed a bug that large XML can't be parsed.
|
35
|
+
* GH-154
|
36
|
+
* Patch by NAITOH Jun.
|
37
|
+
|
38
|
+
* Fixed a bug that private constants are visible.
|
39
|
+
* GH-155
|
40
|
+
* Patch by NAITOH Jun.
|
41
|
+
|
42
|
+
### Thanks
|
43
|
+
|
44
|
+
* Hiroya Fujinami
|
45
|
+
|
46
|
+
* NAITOH Jun
|
47
|
+
|
48
|
+
* fynsta
|
49
|
+
|
50
|
+
## 3.3.0 - 2024-06-11 {#version-3-3-0}
|
51
|
+
|
52
|
+
### Improvements
|
53
|
+
|
54
|
+
* Added support for strscan 0.7.0 installed with Ruby 2.6.
|
55
|
+
* GH-142
|
56
|
+
* Reported by Fernando Trigoso.
|
57
|
+
|
58
|
+
### Thanks
|
59
|
+
|
60
|
+
* Fernando Trigoso
|
61
|
+
|
62
|
+
## 3.2.9 - 2024-06-09 {#version-3-2-9}
|
63
|
+
|
64
|
+
### Improvements
|
65
|
+
|
66
|
+
* Added support for old strscan.
|
67
|
+
* GH-132
|
68
|
+
* Reported by Adam.
|
69
|
+
|
70
|
+
* Improved attribute value parse performance.
|
71
|
+
* GH-135
|
72
|
+
* Patch by NAITOH Jun.
|
73
|
+
|
74
|
+
* Improved `REXML::Node#each_recursive` performance.
|
75
|
+
* GH-134
|
76
|
+
* GH-139
|
77
|
+
* Patch by Hiroya Fujinami.
|
78
|
+
|
79
|
+
* Improved text parse performance.
|
80
|
+
* Reported by mprogrammer.
|
81
|
+
|
82
|
+
### Thanks
|
83
|
+
|
84
|
+
* Adam
|
85
|
+
* NAITOH Jun
|
86
|
+
* Hiroya Fujinami
|
87
|
+
* mprogrammer
|
88
|
+
|
3
89
|
## 3.2.8 - 2024-05-16 {#version-3-2-8}
|
4
90
|
|
5
91
|
### Fixes
|
@@ -30,7 +116,7 @@
|
|
30
116
|
|
31
117
|
* Improved parse performance when an attribute has many `<`s.
|
32
118
|
|
33
|
-
* GH-
|
119
|
+
* GH-126
|
34
120
|
|
35
121
|
### Fixes
|
36
122
|
|
@@ -65,7 +151,6 @@
|
|
65
151
|
* jcavalieri
|
66
152
|
* DuKewu
|
67
153
|
|
68
|
-
|
69
154
|
## 3.2.6 - 2023-07-27 {#version-3-2-6}
|
70
155
|
|
71
156
|
### Improvements
|
data/lib/rexml/element.rb
CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
|
|
7
7
|
require_relative "parseexception"
|
8
8
|
|
9
9
|
module REXML
|
10
|
-
# An implementation note about namespaces:
|
11
|
-
# As we parse, when we find namespaces we put them in a hash and assign
|
12
|
-
# them a unique ID. We then convert the namespace prefix for the node
|
13
|
-
# to the unique ID. This makes namespace lookup much faster for the
|
14
|
-
# cost of extra memory use. We save the namespace prefix for the
|
15
|
-
# context node and convert it back when we write it.
|
16
|
-
@@namespaces = {}
|
17
|
-
|
18
10
|
# An \REXML::Element object represents an XML element.
|
19
11
|
#
|
20
12
|
# An element:
|
@@ -1284,16 +1276,11 @@ module REXML
|
|
1284
1276
|
# document.root.attribute("x", "a") # => a:x='a:x'
|
1285
1277
|
#
|
1286
1278
|
def attribute( name, namespace=nil )
|
1287
|
-
prefix =
|
1288
|
-
if namespaces.respond_to? :key
|
1289
|
-
prefix = namespaces.key(namespace) if namespace
|
1290
|
-
else
|
1291
|
-
prefix = namespaces.index(namespace) if namespace
|
1292
|
-
end
|
1279
|
+
prefix = namespaces.key(namespace) if namespace
|
1293
1280
|
prefix = nil if prefix == 'xmlns'
|
1294
1281
|
|
1295
1282
|
ret_val =
|
1296
|
-
attributes.get_attribute(
|
1283
|
+
attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
1297
1284
|
|
1298
1285
|
return ret_val unless ret_val.nil?
|
1299
1286
|
return nil if prefix.nil?
|
data/lib/rexml/node.rb
CHANGED
@@ -52,10 +52,14 @@ module REXML
|
|
52
52
|
|
53
53
|
# Visit all subnodes of +self+ recursively
|
54
54
|
def each_recursive(&block) # :yields: node
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
stack = []
|
56
|
+
each { |child| stack.unshift child if child.node_type == :element }
|
57
|
+
until stack.empty?
|
58
|
+
child = stack.pop
|
59
|
+
yield child
|
60
|
+
n = stack.size
|
61
|
+
child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
|
62
|
+
end
|
59
63
|
end
|
60
64
|
|
61
65
|
# Find (and return) first subnode (recursively) for which the block
|
@@ -7,6 +7,17 @@ require "strscan"
|
|
7
7
|
|
8
8
|
module REXML
|
9
9
|
module Parsers
|
10
|
+
if StringScanner::Version < "3.0.8"
|
11
|
+
module StringScannerCaptures
|
12
|
+
refine StringScanner do
|
13
|
+
def captures
|
14
|
+
values_at(*(1...size))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
using StringScannerCaptures
|
19
|
+
end
|
20
|
+
|
10
21
|
# = Using the Pull Parser
|
11
22
|
# <em>This API is experimental, and subject to change.</em>
|
12
23
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
@@ -121,13 +132,20 @@ module REXML
|
|
121
132
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
122
133
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
123
134
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
135
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
136
|
+
CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
137
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
138
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
139
|
+
default_entities.each do |term|
|
140
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
141
|
+
end
|
124
142
|
end
|
125
143
|
private_constant :Private
|
126
|
-
include Private
|
127
144
|
|
128
145
|
def initialize( source )
|
129
146
|
self.stream = source
|
130
147
|
@listeners = []
|
148
|
+
@prefixes = Set.new
|
131
149
|
end
|
132
150
|
|
133
151
|
def add_listener( listener )
|
@@ -193,6 +211,8 @@ module REXML
|
|
193
211
|
|
194
212
|
# Returns the next event. This is a +PullEvent+ object.
|
195
213
|
def pull
|
214
|
+
@source.drop_parsed_content
|
215
|
+
|
196
216
|
pull_event.tap do |event|
|
197
217
|
@listeners.each do |listener|
|
198
218
|
listener.receive event
|
@@ -205,7 +225,12 @@ module REXML
|
|
205
225
|
x, @closed = @closed, nil
|
206
226
|
return [ :end_element, x ]
|
207
227
|
end
|
208
|
-
|
228
|
+
if empty?
|
229
|
+
if @document_status == :in_doctype
|
230
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
231
|
+
end
|
232
|
+
return [ :end_document ]
|
233
|
+
end
|
209
234
|
return @stack.shift if @stack.size > 0
|
210
235
|
#STDERR.puts @source.encoding
|
211
236
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -217,7 +242,14 @@ module REXML
|
|
217
242
|
return process_instruction(start_position)
|
218
243
|
elsif @source.match("<!", true)
|
219
244
|
if @source.match("--", true)
|
220
|
-
|
245
|
+
md = @source.match(/(.*?)-->/um, true)
|
246
|
+
if md.nil?
|
247
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
248
|
+
end
|
249
|
+
if /--|-\z/.match?(md[1])
|
250
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
251
|
+
end
|
252
|
+
return [ :comment, md[1] ]
|
221
253
|
elsif @source.match("DOCTYPE", true)
|
222
254
|
base_error_message = "Malformed DOCTYPE"
|
223
255
|
unless @source.match(/\s+/um, true)
|
@@ -229,7 +261,7 @@ module REXML
|
|
229
261
|
@source.position = start_position
|
230
262
|
raise REXML::ParseException.new(message, @source)
|
231
263
|
end
|
232
|
-
@nsstack.unshift(
|
264
|
+
@nsstack.unshift(Set.new)
|
233
265
|
name = parse_name(base_error_message)
|
234
266
|
if @source.match(/\s*\[/um, true)
|
235
267
|
id = [nil, nil, nil]
|
@@ -277,7 +309,7 @@ module REXML
|
|
277
309
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
278
310
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
279
311
|
elsif @source.match("ENTITY", true)
|
280
|
-
match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
|
312
|
+
match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
|
281
313
|
ref = false
|
282
314
|
if match[1] == '%'
|
283
315
|
ref = true
|
@@ -303,7 +335,7 @@ module REXML
|
|
303
335
|
match << '%' if ref
|
304
336
|
return match
|
305
337
|
elsif @source.match("ATTLIST", true)
|
306
|
-
md = @source.match(ATTLISTDECL_END, true)
|
338
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
307
339
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
308
340
|
element = md[1]
|
309
341
|
contents = md[0]
|
@@ -355,6 +387,9 @@ module REXML
|
|
355
387
|
@document_status = :after_doctype
|
356
388
|
return [ :end_doctype ]
|
357
389
|
end
|
390
|
+
if @document_status == :in_doctype
|
391
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
392
|
+
end
|
358
393
|
end
|
359
394
|
if @document_status == :after_doctype
|
360
395
|
@source.match(/\s*/um, true)
|
@@ -362,10 +397,14 @@ module REXML
|
|
362
397
|
begin
|
363
398
|
start_position = @source.position
|
364
399
|
if @source.match("<", true)
|
400
|
+
# :text's read_until may remain only "<" in buffer. In the
|
401
|
+
# case, buffer is empty here. So we need to fill buffer
|
402
|
+
# here explicitly.
|
403
|
+
@source.ensure_buffer
|
365
404
|
if @source.match("/", true)
|
366
405
|
@nsstack.shift
|
367
406
|
last_tag = @tags.pop
|
368
|
-
md = @source.match(CLOSE_PATTERN, true)
|
407
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
369
408
|
if md and !last_tag
|
370
409
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
371
410
|
raise REXML::ParseException.new(message, @source)
|
@@ -384,12 +423,11 @@ module REXML
|
|
384
423
|
if md[0][0] == ?-
|
385
424
|
md = @source.match(/--(.*?)-->/um, true)
|
386
425
|
|
387
|
-
|
388
|
-
when /--/, /-\z/
|
426
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
389
427
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
428
|
end
|
391
429
|
|
392
|
-
return [ :comment, md[1] ]
|
430
|
+
return [ :comment, md[1] ]
|
393
431
|
else
|
394
432
|
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
395
433
|
return [ :cdata, md[1] ] if md
|
@@ -400,19 +438,19 @@ module REXML
|
|
400
438
|
return process_instruction(start_position)
|
401
439
|
else
|
402
440
|
# Get the next tag
|
403
|
-
md = @source.match(TAG_PATTERN, true)
|
441
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
404
442
|
unless md
|
405
443
|
@source.position = start_position
|
406
444
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
407
445
|
end
|
408
446
|
tag = md[1]
|
409
447
|
@document_status = :in_element
|
410
|
-
prefixes
|
411
|
-
prefixes << md[2] if md[2]
|
448
|
+
@prefixes.clear
|
449
|
+
@prefixes << md[2] if md[2]
|
412
450
|
@nsstack.unshift(curr_ns=Set.new)
|
413
|
-
attributes, closed = parse_attributes(prefixes, curr_ns)
|
451
|
+
attributes, closed = parse_attributes(@prefixes, curr_ns)
|
414
452
|
# Verify that all of the prefixes have been defined
|
415
|
-
for prefix in prefixes
|
453
|
+
for prefix in @prefixes
|
416
454
|
unless @nsstack.find{|k| k.member?(prefix)}
|
417
455
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
418
456
|
end
|
@@ -427,8 +465,10 @@ module REXML
|
|
427
465
|
return [ :start_element, tag, attributes ]
|
428
466
|
end
|
429
467
|
else
|
430
|
-
|
431
|
-
text
|
468
|
+
text = @source.read_until("<")
|
469
|
+
if text.chomp!("<")
|
470
|
+
@source.position -= "<".bytesize
|
471
|
+
end
|
432
472
|
return [ :text, text ]
|
433
473
|
end
|
434
474
|
rescue REXML::UndefinedNamespaceException
|
@@ -471,10 +511,10 @@ module REXML
|
|
471
511
|
|
472
512
|
# Unescapes all possible entities
|
473
513
|
def unnormalize( string, entities=nil, filter=nil )
|
474
|
-
rv = string.gsub(
|
514
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
475
515
|
matches = rv.scan( REFERENCE_RE )
|
476
516
|
return rv if matches.size == 0
|
477
|
-
rv.gsub!(
|
517
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
478
518
|
m=$1
|
479
519
|
m = "0#{m}" if m[0] == ?x
|
480
520
|
[Integer(m)].pack('U*')
|
@@ -485,7 +525,7 @@ module REXML
|
|
485
525
|
unless filter and filter.include?(entity_reference)
|
486
526
|
entity_value = entity( entity_reference, entities )
|
487
527
|
if entity_value
|
488
|
-
re = /&#{entity_reference};/
|
528
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
489
529
|
rv.gsub!( re, entity_value )
|
490
530
|
else
|
491
531
|
er = DEFAULT_ENTITIES[entity_reference]
|
@@ -493,7 +533,7 @@ module REXML
|
|
493
533
|
end
|
494
534
|
end
|
495
535
|
end
|
496
|
-
rv.gsub!(
|
536
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
497
537
|
end
|
498
538
|
rv
|
499
539
|
end
|
@@ -506,7 +546,7 @@ module REXML
|
|
506
546
|
end
|
507
547
|
|
508
548
|
def parse_name(base_error_message)
|
509
|
-
md = @source.match(NAME_PATTERN, true)
|
549
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
510
550
|
unless md
|
511
551
|
if @source.match(/\s*\S/um)
|
512
552
|
message = "#{base_error_message}: invalid name"
|
@@ -585,7 +625,7 @@ module REXML
|
|
585
625
|
end
|
586
626
|
|
587
627
|
def process_instruction(start_position)
|
588
|
-
match_data = @source.match(INSTRUCTION_END, true)
|
628
|
+
match_data = @source.match(Private::INSTRUCTION_END, true)
|
589
629
|
unless match_data
|
590
630
|
message = "Invalid processing instruction node"
|
591
631
|
@source.position = start_position
|
@@ -633,8 +673,10 @@ module REXML
|
|
633
673
|
raise REXML::ParseException.new(message, @source)
|
634
674
|
end
|
635
675
|
quote = match[1]
|
676
|
+
start_position = @source.position
|
636
677
|
value = @source.read_until(quote)
|
637
678
|
unless value.chomp!(quote)
|
679
|
+
@source.position = start_position
|
638
680
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
639
681
|
raise REXML::ParseException.new(message, @source)
|
640
682
|
end
|
@@ -16,7 +16,6 @@ module REXML
|
|
16
16
|
|
17
17
|
def parse
|
18
18
|
tag_stack = []
|
19
|
-
in_doctype = false
|
20
19
|
entities = nil
|
21
20
|
begin
|
22
21
|
while true
|
@@ -39,17 +38,15 @@ module REXML
|
|
39
38
|
tag_stack.pop
|
40
39
|
@build_context = @build_context.parent
|
41
40
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
41
|
+
if @build_context[-1].instance_of? Text
|
42
|
+
@build_context[-1] << event[1]
|
43
|
+
else
|
44
|
+
@build_context.add(
|
45
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
46
|
+
) unless (
|
47
|
+
@build_context.ignore_whitespace_nodes and
|
48
|
+
event[1].strip.size==0
|
49
|
+
)
|
53
50
|
end
|
54
51
|
when :comment
|
55
52
|
c = Comment.new( event[1] )
|
@@ -60,14 +57,12 @@ module REXML
|
|
60
57
|
when :processing_instruction
|
61
58
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
59
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
60
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
61
|
@build_context = @build_context.parent
|
66
62
|
when :start_doctype
|
67
63
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
64
|
@build_context = doctype
|
69
65
|
entities = {}
|
70
|
-
in_doctype = true
|
71
66
|
when :attlistdecl
|
72
67
|
n = AttlistDecl.new( event[1..-1] )
|
73
68
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -34,6 +54,16 @@ module REXML
|
|
34
54
|
attr_reader :line
|
35
55
|
attr_reader :encoding
|
36
56
|
|
57
|
+
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
59
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
60
|
+
pre_defined_terms = ["'", '"', "<"]
|
61
|
+
pre_defined_terms.each do |term|
|
62
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
63
|
+
end
|
64
|
+
end
|
65
|
+
private_constant :Private
|
66
|
+
|
37
67
|
# Constructor
|
38
68
|
# @param arg must be a String, and should be a valid XML document
|
39
69
|
# @param encoding if non-null, sets the encoding of the source to this
|
@@ -54,6 +84,12 @@ module REXML
|
|
54
84
|
@scanner.rest
|
55
85
|
end
|
56
86
|
|
87
|
+
def drop_parsed_content
|
88
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
89
|
+
@scanner.string = @scanner.rest
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
57
93
|
def buffer_encoding=(encoding)
|
58
94
|
@scanner.string.force_encoding(encoding)
|
59
95
|
end
|
@@ -69,7 +105,13 @@ module REXML
|
|
69
105
|
end
|
70
106
|
|
71
107
|
def read_until(term)
|
72
|
-
|
108
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
109
|
+
data = @scanner.scan_until(pattern)
|
110
|
+
unless data
|
111
|
+
data = @scanner.rest
|
112
|
+
@scanner.pos = @scanner.string.bytesize
|
113
|
+
end
|
114
|
+
data
|
73
115
|
end
|
74
116
|
|
75
117
|
def ensure_buffer
|
@@ -163,6 +205,7 @@ module REXML
|
|
163
205
|
end
|
164
206
|
|
165
207
|
def read(term = nil)
|
208
|
+
term = encode(term) if term
|
166
209
|
begin
|
167
210
|
@scanner << readline(term)
|
168
211
|
true
|
@@ -173,16 +216,20 @@ module REXML
|
|
173
216
|
end
|
174
217
|
|
175
218
|
def read_until(term)
|
176
|
-
pattern = Regexp.
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
219
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
220
|
+
term = encode(term)
|
221
|
+
until str = @scanner.scan_until(pattern)
|
222
|
+
break if @source.nil?
|
223
|
+
break if @source.eof?
|
224
|
+
@scanner << readline(term)
|
225
|
+
end
|
226
|
+
if str
|
184
227
|
read if @scanner.eos? and !@source.eof?
|
185
228
|
str
|
229
|
+
else
|
230
|
+
rest = @scanner.rest
|
231
|
+
@scanner.pos = @scanner.string.bytesize
|
232
|
+
rest
|
186
233
|
end
|
187
234
|
end
|
188
235
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
10
|
+
date: 2024-06-25 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
requirements:
|
16
16
|
- - ">="
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version:
|
18
|
+
version: '0'
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - ">="
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version:
|
25
|
+
version: '0'
|
26
26
|
description: An XML toolkit for Ruby
|
27
27
|
email:
|
28
28
|
- kou@cozmixng.org
|
@@ -115,7 +115,8 @@ files:
|
|
115
115
|
homepage: https://github.com/ruby/rexml
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
|
-
metadata:
|
118
|
+
metadata:
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
|
119
120
|
rdoc_options:
|
120
121
|
- "--main"
|
121
122
|
- README.md
|