rexml 3.3.2 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,6 +22,18 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
29
+ def entity_expansion_limit=( limit )
30
+ @parser.entity_expansion_limit = limit
31
+ end
32
+
33
+ def entity_expansion_text_limit=( limit )
34
+ @parser.entity_expansion_text_limit = limit
35
+ end
36
+
25
37
  def add_listener( listener )
26
38
  @parser.add_listener( listener )
27
39
  end
@@ -247,6 +259,8 @@ module REXML
247
259
  end
248
260
 
249
261
  def get_namespace( prefix )
262
+ return nil if @namespace_stack.empty?
263
+
250
264
  uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
251
265
  (@namespace_stack.find { |ns| not ns[nil].nil? })
252
266
  uris[-1][prefix] unless uris.nil? or 0 == uris.size
@@ -7,36 +7,41 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
21
+ def entity_expansion_limit=( limit )
22
+ @parser.entity_expansion_limit = limit
23
+ end
24
+
25
+ def entity_expansion_text_limit=( limit )
26
+ @parser.entity_expansion_text_limit = limit
27
+ end
28
+
17
29
  def parse
18
30
  # entity string
19
31
  while true
20
32
  event = @parser.pull
21
33
  case event[0]
22
34
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
35
  return
29
36
  when :start_element
30
- @tag_stack << event[1]
31
37
  attrs = event[2].each do |n, v|
32
38
  event[2][n] = @parser.unnormalize( v )
33
39
  end
34
40
  @listener.tag_start( event[1], attrs )
35
41
  when :end_element
36
42
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
43
  when :text
39
- unnormalized = @parser.unnormalize( event[1] )
44
+ unnormalized = @parser.unnormalize( event[1], @entities )
40
45
  @listener.text( unnormalized )
41
46
  when :processing_instruction
42
47
  @listener.instruction( *event[1,2] )
@@ -48,6 +53,7 @@ module REXML
48
53
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
54
  @listener.send( event[0].to_s, *event[1..-1] )
50
55
  when :entitydecl, :notationdecl
56
+ @entities[ event[1] ] = event[2] if event.size == 3
51
57
  @listener.send( event[0].to_s, event[1..-1] )
52
58
  when :externalentity
53
59
  entity_reference = event[1]
@@ -15,7 +15,6 @@ module REXML
15
15
  end
16
16
 
17
17
  def parse
18
- tag_stack = []
19
18
  entities = nil
20
19
  begin
21
20
  while true
@@ -23,19 +22,13 @@ module REXML
23
22
  #STDERR.puts "TREEPARSER GOT #{event.inspect}"
24
23
  case event[0]
25
24
  when :end_document
26
- unless tag_stack.empty?
27
- raise ParseException.new("No close tag for #{@build_context.xpath}",
28
- @parser.source, @parser)
29
- end
30
25
  return
31
26
  when :start_element
32
- tag_stack.push(event[1])
33
27
  el = @build_context = @build_context.add_element( event[1] )
34
28
  event[2].each do |key, value|
35
29
  el.attributes[key]=Attribute.new(key,value,self)
36
30
  end
37
31
  when :end_element
38
- tag_stack.pop
39
32
  @build_context = @build_context.parent
40
33
  when :text
41
34
  if @build_context[-1].instance_of? Text
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.2"
34
+ VERSION = "3.4.0"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
3
 
4
+ require "stringio"
4
5
  require "strscan"
5
6
 
6
7
  require_relative 'encoding'
@@ -18,6 +19,16 @@ module REXML
18
19
  pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
20
  super(pattern)
20
21
  end
22
+
23
+ def match?(pattern)
24
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
25
+ super(pattern)
26
+ end
27
+
28
+ def skip(pattern)
29
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
30
+ super(pattern)
31
+ end
21
32
  end
22
33
  end
23
34
  using StringScannerCheckScanString
@@ -35,7 +46,6 @@ module REXML
35
46
  arg.respond_to? :eof?
36
47
  IOSource.new(arg)
37
48
  elsif arg.respond_to? :to_str
38
- require 'stringio'
39
49
  IOSource.new(StringIO.new(arg))
40
50
  elsif arg.kind_of? Source
41
51
  arg
@@ -77,6 +87,7 @@ module REXML
77
87
  detect_encoding
78
88
  end
79
89
  @line = 0
90
+ @encoded_terms = {}
80
91
  end
81
92
 
82
93
  # The current buffer (what we're going to read next)
@@ -117,7 +128,7 @@ module REXML
117
128
  def ensure_buffer
118
129
  end
119
130
 
120
- def match(pattern, cons=false, term: nil)
131
+ def match(pattern, cons=false)
121
132
  if cons
122
133
  @scanner.scan(pattern).nil? ? nil : @scanner
123
134
  else
@@ -125,6 +136,14 @@ module REXML
125
136
  end
126
137
  end
127
138
 
139
+ def match?(pattern, cons=false)
140
+ if cons
141
+ !@scanner.skip(pattern).nil?
142
+ else
143
+ !@scanner.match?(pattern).nil?
144
+ end
145
+ end
146
+
128
147
  def position
129
148
  @scanner.pos
130
149
  end
@@ -204,10 +223,20 @@ module REXML
204
223
  end
205
224
  end
206
225
 
207
- def read(term = nil)
226
+ def read(term = nil, min_bytes = 1)
208
227
  term = encode(term) if term
209
228
  begin
210
- @scanner << readline(term)
229
+ str = readline(term)
230
+ @scanner << str
231
+ read_bytes = str.bytesize
232
+ begin
233
+ while read_bytes < min_bytes
234
+ str = readline(term)
235
+ @scanner << str
236
+ read_bytes += str.bytesize
237
+ end
238
+ rescue IOError
239
+ end
211
240
  true
212
241
  rescue Exception, NameError
213
242
  @source = nil
@@ -217,7 +246,7 @@ module REXML
217
246
 
218
247
  def read_until(term)
219
248
  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
220
- term = encode(term)
249
+ term = @encoded_terms[term] ||= encode(term)
221
250
  until str = @scanner.scan_until(pattern)
222
251
  break if @source.nil?
223
252
  break if @source.eof?
@@ -237,10 +266,9 @@ module REXML
237
266
  read if @scanner.eos? && @source
238
267
  end
239
268
 
240
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
241
- # - ">"
242
- # - "XXX>" (X is any string excluding '>')
243
- def match( pattern, cons=false, term: nil )
269
+ def match( pattern, cons=false )
270
+ # To avoid performance issue, we need to increase bytes to read per scan
271
+ min_bytes = 1
244
272
  while true
245
273
  if cons
246
274
  md = @scanner.scan(pattern)
@@ -250,12 +278,30 @@ module REXML
250
278
  break if md
251
279
  return nil if pattern.is_a?(String)
252
280
  return nil if @source.nil?
253
- return nil unless read(term)
281
+ return nil unless read(nil, min_bytes)
282
+ min_bytes *= 2
254
283
  end
255
284
 
256
285
  md.nil? ? nil : @scanner
257
286
  end
258
287
 
288
+ def match?( pattern, cons=false )
289
+ # To avoid performance issue, we need to increase bytes to read per scan
290
+ min_bytes = 1
291
+ while true
292
+ if cons
293
+ n_matched_bytes = @scanner.skip(pattern)
294
+ else
295
+ n_matched_bytes = @scanner.match?(pattern)
296
+ end
297
+ return true if n_matched_bytes
298
+ return false if pattern.is_a?(String)
299
+ return false if @source.nil?
300
+ return false unless read(nil, min_bytes)
301
+ min_bytes *= 2
302
+ end
303
+ end
304
+
259
305
  def empty?
260
306
  super and ( @source.nil? || @source.eof? )
261
307
  end
@@ -275,7 +321,7 @@ module REXML
275
321
  rescue
276
322
  end
277
323
  @er_source.seek(pos)
278
- rescue IOError
324
+ rescue IOError, SystemCallError
279
325
  pos = -1
280
326
  line = -1
281
327
  end
@@ -284,14 +330,19 @@ module REXML
284
330
 
285
331
  private
286
332
  def readline(term = nil)
287
- str = @source.readline(term || @line_break)
288
333
  if @pending_buffer
334
+ begin
335
+ str = @source.readline(term || @line_break)
336
+ rescue IOError
337
+ end
289
338
  if str.nil?
290
339
  str = @pending_buffer
291
340
  else
292
341
  str = @pending_buffer + str
293
342
  end
294
343
  @pending_buffer = nil
344
+ else
345
+ str = @source.readline(term || @line_break)
295
346
  end
296
347
  return nil if str.nil?
297
348
 
data/lib/rexml/text.rb CHANGED
@@ -29,31 +29,16 @@ module REXML
29
29
  (0x10000..0x10FFFF)
30
30
  ]
31
31
 
32
- if String.method_defined? :encode
33
- VALID_XML_CHARS = Regexp.new('^['+
34
- VALID_CHAR.map { |item|
35
- case item
36
- when Integer
37
- [item].pack('U').force_encoding('utf-8')
38
- when Range
39
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
40
- end
41
- }.join +
42
- ']*$')
43
- else
44
- VALID_XML_CHARS = /^(
45
- [\x09\x0A\x0D\x20-\x7E] # ASCII
46
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
47
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
48
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
49
- | \xEF[\x80-\xBE]{2} #
50
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
51
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
52
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
53
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
54
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
55
- )*$/nx;
56
- end
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Integer
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
57
42
 
58
43
  # Constructor
59
44
  # +arg+ if a String, the content is set to the String. If a Text,
@@ -132,21 +117,11 @@ module REXML
132
117
 
133
118
  # illegal anywhere
134
119
  if !string.match?(VALID_XML_CHARS)
135
- if String.method_defined? :encode
136
- string.chars.each do |c|
137
- case c.ord
138
- when *VALID_CHAR
139
- else
140
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
141
- end
142
- end
143
- else
144
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
145
- case c.unpack('U')
146
- when *VALID_CHAR
147
- else
148
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
149
- end
120
+ string.chars.each do |c|
121
+ case c.ord
122
+ when *VALID_CHAR
123
+ else
124
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
150
125
  end
151
126
  end
152
127
  end
@@ -268,7 +243,8 @@ module REXML
268
243
  # u = Text.new( "sean russell", false, nil, true )
269
244
  # u.value #-> "sean russell"
270
245
  def value
271
- @unnormalized ||= Text::unnormalize( @string, doctype )
246
+ @unnormalized ||= Text::unnormalize(@string, doctype,
247
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
272
248
  end
273
249
 
274
250
  # Sets the contents of this text node. This expects the text to be
@@ -411,11 +387,12 @@ module REXML
411
387
  end
412
388
 
413
389
  # Unescapes all possible entities
414
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
390
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
391
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
415
392
  sum = 0
416
393
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
417
394
  s = Text.expand($&, doctype, filter)
418
- if sum + s.bytesize > Security.entity_expansion_text_limit
395
+ if sum + s.bytesize > entity_expansion_text_limit
419
396
  raise "entity expansion has grown too large"
420
397
  else
421
398
  sum += s.bytesize
metadata CHANGED
@@ -1,28 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.2
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
+ autorequire:
8
9
  bindir: bin
9
10
  cert_chain: []
10
- date: 2024-07-16 00:00:00.000000000 Z
11
- dependencies:
12
- - !ruby/object:Gem::Dependency
13
- name: strscan
14
- requirement: !ruby/object:Gem::Requirement
15
- requirements:
16
- - - ">="
17
- - !ruby/object:Gem::Version
18
- version: '0'
19
- type: :runtime
20
- prerelease: false
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: '0'
11
+ date: 2024-12-15 00:00:00.000000000 Z
12
+ dependencies: []
26
13
  description: An XML toolkit for Ruby
27
14
  email:
28
15
  - kou@cozmixng.org
@@ -116,7 +103,8 @@ homepage: https://github.com/ruby/rexml
116
103
  licenses:
117
104
  - BSD-2-Clause
118
105
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.2
106
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.0
107
+ post_install_message:
120
108
  rdoc_options:
121
109
  - "--main"
122
110
  - README.md
@@ -133,7 +121,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
121
  - !ruby/object:Gem::Version
134
122
  version: '0'
135
123
  requirements: []
136
- rubygems_version: 3.6.0.dev
124
+ rubygems_version: 3.5.22
125
+ signing_key:
137
126
  specification_version: 4
138
127
  summary: An XML toolkit for Ruby
139
128
  test_files: []