rexml 3.3.2 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,18 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
29
+ def entity_expansion_limit=( limit )
30
+ @parser.entity_expansion_limit = limit
31
+ end
32
+
33
+ def entity_expansion_text_limit=( limit )
34
+ @parser.entity_expansion_text_limit = limit
35
+ end
36
+
25
37
  def add_listener( listener )
26
38
  @parser.add_listener( listener )
27
39
  end
@@ -247,6 +259,8 @@ module REXML
247
259
  end
248
260
 
249
261
  def get_namespace( prefix )
262
+ return nil if @namespace_stack.empty?
263
+
250
264
  uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
251
265
  (@namespace_stack.find { |ns| not ns[nil].nil? })
252
266
  uris[-1][prefix] unless uris.nil? or 0 == uris.size
@@ -7,36 +7,41 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
21
+ def entity_expansion_limit=( limit )
22
+ @parser.entity_expansion_limit = limit
23
+ end
24
+
25
+ def entity_expansion_text_limit=( limit )
26
+ @parser.entity_expansion_text_limit = limit
27
+ end
28
+
17
29
  def parse
18
30
  # entity string
19
31
  while true
20
32
  event = @parser.pull
21
33
  case event[0]
22
34
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
35
  return
29
36
  when :start_element
30
- @tag_stack << event[1]
31
37
  attrs = event[2].each do |n, v|
32
38
  event[2][n] = @parser.unnormalize( v )
33
39
  end
34
40
  @listener.tag_start( event[1], attrs )
35
41
  when :end_element
36
42
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
43
  when :text
39
- unnormalized = @parser.unnormalize( event[1] )
44
+ unnormalized = @parser.unnormalize( event[1], @entities )
40
45
  @listener.text( unnormalized )
41
46
  when :processing_instruction
42
47
  @listener.instruction( *event[1,2] )
@@ -48,6 +53,7 @@ module REXML
48
53
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
54
  @listener.send( event[0].to_s, *event[1..-1] )
50
55
  when :entitydecl, :notationdecl
56
+ @entities[ event[1] ] = event[2] if event.size == 3
51
57
  @listener.send( event[0].to_s, event[1..-1] )
52
58
  when :externalentity
53
59
  entity_reference = event[1]
@@ -15,7 +15,6 @@ module REXML
15
15
  end
16
16
 
17
17
  def parse
18
- tag_stack = []
19
18
  entities = nil
20
19
  begin
21
20
  while true
@@ -23,19 +22,13 @@ module REXML
23
22
  #STDERR.puts "TREEPARSER GOT #{event.inspect}"
24
23
  case event[0]
25
24
  when :end_document
26
- unless tag_stack.empty?
27
- raise ParseException.new("No close tag for #{@build_context.xpath}",
28
- @parser.source, @parser)
29
- end
30
25
  return
31
26
  when :start_element
32
- tag_stack.push(event[1])
33
27
  el = @build_context = @build_context.add_element( event[1] )
34
28
  event[2].each do |key, value|
35
29
  el.attributes[key]=Attribute.new(key,value,self)
36
30
  end
37
31
  when :end_element
38
- tag_stack.pop
39
32
  @build_context = @build_context.parent
40
33
  when :text
41
34
  if @build_context[-1].instance_of? Text
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.2"
34
+ VERSION = "3.4.0"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
3
 
4
+ require "stringio"
4
5
  require "strscan"
5
6
 
6
7
  require_relative 'encoding'
@@ -18,6 +19,16 @@ module REXML
18
19
  pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
20
  super(pattern)
20
21
  end
22
+
23
+ def match?(pattern)
24
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
25
+ super(pattern)
26
+ end
27
+
28
+ def skip(pattern)
29
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
30
+ super(pattern)
31
+ end
21
32
  end
22
33
  end
23
34
  using StringScannerCheckScanString
@@ -35,7 +46,6 @@ module REXML
35
46
  arg.respond_to? :eof?
36
47
  IOSource.new(arg)
37
48
  elsif arg.respond_to? :to_str
38
- require 'stringio'
39
49
  IOSource.new(StringIO.new(arg))
40
50
  elsif arg.kind_of? Source
41
51
  arg
@@ -77,6 +87,7 @@ module REXML
77
87
  detect_encoding
78
88
  end
79
89
  @line = 0
90
+ @encoded_terms = {}
80
91
  end
81
92
 
82
93
  # The current buffer (what we're going to read next)
@@ -117,7 +128,7 @@ module REXML
117
128
  def ensure_buffer
118
129
  end
119
130
 
120
- def match(pattern, cons=false, term: nil)
131
+ def match(pattern, cons=false)
121
132
  if cons
122
133
  @scanner.scan(pattern).nil? ? nil : @scanner
123
134
  else
@@ -125,6 +136,14 @@ module REXML
125
136
  end
126
137
  end
127
138
 
139
+ def match?(pattern, cons=false)
140
+ if cons
141
+ !@scanner.skip(pattern).nil?
142
+ else
143
+ !@scanner.match?(pattern).nil?
144
+ end
145
+ end
146
+
128
147
  def position
129
148
  @scanner.pos
130
149
  end
@@ -204,10 +223,20 @@ module REXML
204
223
  end
205
224
  end
206
225
 
207
- def read(term = nil)
226
+ def read(term = nil, min_bytes = 1)
208
227
  term = encode(term) if term
209
228
  begin
210
- @scanner << readline(term)
229
+ str = readline(term)
230
+ @scanner << str
231
+ read_bytes = str.bytesize
232
+ begin
233
+ while read_bytes < min_bytes
234
+ str = readline(term)
235
+ @scanner << str
236
+ read_bytes += str.bytesize
237
+ end
238
+ rescue IOError
239
+ end
211
240
  true
212
241
  rescue Exception, NameError
213
242
  @source = nil
@@ -217,7 +246,7 @@ module REXML
217
246
 
218
247
  def read_until(term)
219
248
  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
220
- term = encode(term)
249
+ term = @encoded_terms[term] ||= encode(term)
221
250
  until str = @scanner.scan_until(pattern)
222
251
  break if @source.nil?
223
252
  break if @source.eof?
@@ -237,10 +266,9 @@ module REXML
237
266
  read if @scanner.eos? && @source
238
267
  end
239
268
 
240
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
241
- # - ">"
242
- # - "XXX>" (X is any string excluding '>')
243
- def match( pattern, cons=false, term: nil )
269
+ def match( pattern, cons=false )
270
+ # To avoid performance issue, we need to increase bytes to read per scan
271
+ min_bytes = 1
244
272
  while true
245
273
  if cons
246
274
  md = @scanner.scan(pattern)
@@ -250,12 +278,30 @@ module REXML
250
278
  break if md
251
279
  return nil if pattern.is_a?(String)
252
280
  return nil if @source.nil?
253
- return nil unless read(term)
281
+ return nil unless read(nil, min_bytes)
282
+ min_bytes *= 2
254
283
  end
255
284
 
256
285
  md.nil? ? nil : @scanner
257
286
  end
258
287
 
288
+ def match?( pattern, cons=false )
289
+ # To avoid performance issue, we need to increase bytes to read per scan
290
+ min_bytes = 1
291
+ while true
292
+ if cons
293
+ n_matched_bytes = @scanner.skip(pattern)
294
+ else
295
+ n_matched_bytes = @scanner.match?(pattern)
296
+ end
297
+ return true if n_matched_bytes
298
+ return false if pattern.is_a?(String)
299
+ return false if @source.nil?
300
+ return false unless read(nil, min_bytes)
301
+ min_bytes *= 2
302
+ end
303
+ end
304
+
259
305
  def empty?
260
306
  super and ( @source.nil? || @source.eof? )
261
307
  end
@@ -275,7 +321,7 @@ module REXML
275
321
  rescue
276
322
  end
277
323
  @er_source.seek(pos)
278
- rescue IOError
324
+ rescue IOError, SystemCallError
279
325
  pos = -1
280
326
  line = -1
281
327
  end
@@ -284,14 +330,19 @@ module REXML
284
330
 
285
331
  private
286
332
  def readline(term = nil)
287
- str = @source.readline(term || @line_break)
288
333
  if @pending_buffer
334
+ begin
335
+ str = @source.readline(term || @line_break)
336
+ rescue IOError
337
+ end
289
338
  if str.nil?
290
339
  str = @pending_buffer
291
340
  else
292
341
  str = @pending_buffer + str
293
342
  end
294
343
  @pending_buffer = nil
344
+ else
345
+ str = @source.readline(term || @line_break)
295
346
  end
296
347
  return nil if str.nil?
297
348
 
data/lib/rexml/text.rb CHANGED
@@ -29,31 +29,16 @@ module REXML
29
29
  (0x10000..0x10FFFF)
30
30
  ]
31
31
 
32
- if String.method_defined? :encode
33
- VALID_XML_CHARS = Regexp.new('^['+
34
- VALID_CHAR.map { |item|
35
- case item
36
- when Integer
37
- [item].pack('U').force_encoding('utf-8')
38
- when Range
39
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
40
- end
41
- }.join +
42
- ']*$')
43
- else
44
- VALID_XML_CHARS = /^(
45
- [\x09\x0A\x0D\x20-\x7E] # ASCII
46
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
47
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
48
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
49
- | \xEF[\x80-\xBE]{2} #
50
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
51
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
52
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
53
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
54
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
55
- )*$/nx;
56
- end
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Integer
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
57
42
 
58
43
  # Constructor
59
44
  # +arg+ if a String, the content is set to the String. If a Text,
@@ -132,21 +117,11 @@ module REXML
132
117
 
133
118
  # illegal anywhere
134
119
  if !string.match?(VALID_XML_CHARS)
135
- if String.method_defined? :encode
136
- string.chars.each do |c|
137
- case c.ord
138
- when *VALID_CHAR
139
- else
140
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
141
- end
142
- end
143
- else
144
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
145
- case c.unpack('U')
146
- when *VALID_CHAR
147
- else
148
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
149
- end
120
+ string.chars.each do |c|
121
+ case c.ord
122
+ when *VALID_CHAR
123
+ else
124
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
150
125
  end
151
126
  end
152
127
  end
@@ -268,7 +243,8 @@ module REXML
268
243
  # u = Text.new( "sean russell", false, nil, true )
269
244
  # u.value #-> "sean russell"
270
245
  def value
271
- @unnormalized ||= Text::unnormalize( @string, doctype )
246
+ @unnormalized ||= Text::unnormalize(@string, doctype,
247
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
272
248
  end
273
249
 
274
250
  # Sets the contents of this text node. This expects the text to be
@@ -411,11 +387,12 @@ module REXML
411
387
  end
412
388
 
413
389
  # Unescapes all possible entities
414
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
390
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
391
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
415
392
  sum = 0
416
393
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
417
394
  s = Text.expand($&, doctype, filter)
418
- if sum + s.bytesize > Security.entity_expansion_text_limit
395
+ if sum + s.bytesize > entity_expansion_text_limit
419
396
  raise "entity expansion has grown too large"
420
397
  else
421
398
  sum += s.bytesize
metadata CHANGED
@@ -1,28 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.2
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
+ autorequire:
8
9
  bindir: bin
9
10
  cert_chain: []
10
- date: 2024-07-16 00:00:00.000000000 Z
11
- dependencies:
12
- - !ruby/object:Gem::Dependency
13
- name: strscan
14
- requirement: !ruby/object:Gem::Requirement
15
- requirements:
16
- - - ">="
17
- - !ruby/object:Gem::Version
18
- version: '0'
19
- type: :runtime
20
- prerelease: false
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: '0'
11
+ date: 2024-12-15 00:00:00.000000000 Z
12
+ dependencies: []
26
13
  description: An XML toolkit for Ruby
27
14
  email:
28
15
  - kou@cozmixng.org
@@ -116,7 +103,8 @@ homepage: https://github.com/ruby/rexml
116
103
  licenses:
117
104
  - BSD-2-Clause
118
105
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.2
106
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.0
107
+ post_install_message:
120
108
  rdoc_options:
121
109
  - "--main"
122
110
  - README.md
@@ -133,7 +121,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
121
  - !ruby/object:Gem::Version
134
122
  version: '0'
135
123
  requirements: []
136
- rubygems_version: 3.6.0.dev
124
+ rubygems_version: 3.5.22
125
+ signing_key:
137
126
  specification_version: 4
138
127
  summary: An XML toolkit for Ruby
139
128
  test_files: []