rexml 3.2.7 → 3.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

@@ -15,8 +15,6 @@ module REXML
15
15
  end
16
16
 
17
17
  def parse
18
- tag_stack = []
19
- in_doctype = false
20
18
  entities = nil
21
19
  begin
22
20
  while true
@@ -24,32 +22,24 @@ module REXML
24
22
  #STDERR.puts "TREEPARSER GOT #{event.inspect}"
25
23
  case event[0]
26
24
  when :end_document
27
- unless tag_stack.empty?
28
- raise ParseException.new("No close tag for #{@build_context.xpath}",
29
- @parser.source, @parser)
30
- end
31
25
  return
32
26
  when :start_element
33
- tag_stack.push(event[1])
34
27
  el = @build_context = @build_context.add_element( event[1] )
35
28
  event[2].each do |key, value|
36
29
  el.attributes[key]=Attribute.new(key,value,self)
37
30
  end
38
31
  when :end_element
39
- tag_stack.pop
40
32
  @build_context = @build_context.parent
41
33
  when :text
42
- if not in_doctype
43
- if @build_context[-1].instance_of? Text
44
- @build_context[-1] << event[1]
45
- else
46
- @build_context.add(
47
- Text.new(event[1], @build_context.whitespace, nil, true)
48
- ) unless (
49
- @build_context.ignore_whitespace_nodes and
50
- event[1].strip.size==0
51
- )
52
- end
34
+ if @build_context[-1].instance_of? Text
35
+ @build_context[-1] << event[1]
36
+ else
37
+ @build_context.add(
38
+ Text.new(event[1], @build_context.whitespace, nil, true)
39
+ ) unless (
40
+ @build_context.ignore_whitespace_nodes and
41
+ event[1].strip.size==0
42
+ )
53
43
  end
54
44
  when :comment
55
45
  c = Comment.new( event[1] )
@@ -60,14 +50,12 @@ module REXML
60
50
  when :processing_instruction
61
51
  @build_context.add( Instruction.new( event[1], event[2] ) )
62
52
  when :end_doctype
63
- in_doctype = false
64
53
  entities.each { |k,v| entities[k] = @build_context.entities[k].value }
65
54
  @build_context = @build_context.parent
66
55
  when :start_doctype
67
56
  doctype = DocType.new( event[1..-1], @build_context )
68
57
  @build_context = doctype
69
58
  entities = {}
70
- in_doctype = true
71
59
  when :attlistdecl
72
60
  n = AttlistDecl.new( event[1..-1] )
73
61
  @build_context.add( n )
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.2.7"
34
+ VERSION = "3.3.7"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,8 +1,28 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
+
4
+ require "strscan"
5
+
3
6
  require_relative 'encoding'
4
7
 
5
8
  module REXML
9
+ if StringScanner::Version < "1.0.0"
10
+ module StringScannerCheckScanString
11
+ refine StringScanner do
12
+ def check(pattern)
13
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
14
+ super(pattern)
15
+ end
16
+
17
+ def scan(pattern)
18
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
+ super(pattern)
20
+ end
21
+ end
22
+ end
23
+ using StringScannerCheckScanString
24
+ end
25
+
6
26
  # Generates Source-s. USE THIS CLASS.
7
27
  class SourceFactory
8
28
  # Generates a Source object
@@ -34,6 +54,16 @@ module REXML
34
54
  attr_reader :line
35
55
  attr_reader :encoding
36
56
 
57
+ module Private
58
+ SCANNER_RESET_SIZE = 100000
59
+ PRE_DEFINED_TERM_PATTERNS = {}
60
+ pre_defined_terms = ["'", '"', "<"]
61
+ pre_defined_terms.each do |term|
62
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
63
+ end
64
+ end
65
+ private_constant :Private
66
+
37
67
  # Constructor
38
68
  # @param arg must be a String, and should be a valid XML document
39
69
  # @param encoding if non-null, sets the encoding of the source to this
@@ -54,6 +84,12 @@ module REXML
54
84
  @scanner.rest
55
85
  end
56
86
 
87
+ def drop_parsed_content
88
+ if @scanner.pos > Private::SCANNER_RESET_SIZE
89
+ @scanner.string = @scanner.rest
90
+ end
91
+ end
92
+
57
93
  def buffer_encoding=(encoding)
58
94
  @scanner.string.force_encoding(encoding)
59
95
  end
@@ -69,7 +105,13 @@ module REXML
69
105
  end
70
106
 
71
107
  def read_until(term)
72
- @scanner.scan_until(Regexp.union(term)) or @scanner.rest
108
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
109
+ data = @scanner.scan_until(pattern)
110
+ unless data
111
+ data = @scanner.rest
112
+ @scanner.pos = @scanner.string.bytesize
113
+ end
114
+ data
73
115
  end
74
116
 
75
117
  def ensure_buffer
@@ -162,9 +204,20 @@ module REXML
162
204
  end
163
205
  end
164
206
 
165
- def read(term = nil)
207
+ def read(term = nil, min_bytes = 1)
208
+ term = encode(term) if term
166
209
  begin
167
- @scanner << readline(term)
210
+ str = readline(term)
211
+ @scanner << str
212
+ read_bytes = str.bytesize
213
+ begin
214
+ while read_bytes < min_bytes
215
+ str = readline(term)
216
+ @scanner << str
217
+ read_bytes += str.bytesize
218
+ end
219
+ rescue IOError
220
+ end
168
221
  true
169
222
  rescue Exception, NameError
170
223
  @source = nil
@@ -173,17 +226,20 @@ module REXML
173
226
  end
174
227
 
175
228
  def read_until(term)
176
- pattern = Regexp.union(term)
177
- data = []
178
- begin
179
- until str = @scanner.scan_until(pattern)
180
- @scanner << readline(term)
181
- end
182
- rescue EOFError
183
- @scanner.rest
184
- else
229
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
230
+ term = encode(term)
231
+ until str = @scanner.scan_until(pattern)
232
+ break if @source.nil?
233
+ break if @source.eof?
234
+ @scanner << readline(term)
235
+ end
236
+ if str
185
237
  read if @scanner.eos? and !@source.eof?
186
238
  str
239
+ else
240
+ rest = @scanner.rest
241
+ @scanner.pos = @scanner.string.bytesize
242
+ rest
187
243
  end
188
244
  end
189
245
 
@@ -191,10 +247,9 @@ module REXML
191
247
  read if @scanner.eos? && @source
192
248
  end
193
249
 
194
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
195
- # - ">"
196
- # - "XXX>" (X is any string excluding '>')
197
250
  def match( pattern, cons=false )
251
+ # To avoid performance issue, we need to increase bytes to read per scan
252
+ min_bytes = 1
198
253
  while true
199
254
  if cons
200
255
  md = @scanner.scan(pattern)
@@ -204,7 +259,8 @@ module REXML
204
259
  break if md
205
260
  return nil if pattern.is_a?(String)
206
261
  return nil if @source.nil?
207
- return nil unless read
262
+ return nil unless read(nil, min_bytes)
263
+ min_bytes *= 2
208
264
  end
209
265
 
210
266
  md.nil? ? nil : @scanner
data/lib/rexml/text.rb CHANGED
@@ -151,25 +151,45 @@ module REXML
151
151
  end
152
152
  end
153
153
 
154
- # context sensitive
155
- string.scan(pattern) do
156
- if $1[-1] != ?;
157
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158
- elsif $1[0] == ?&
159
- if $5 and $5[0] == ?#
160
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161
- when *VALID_CHAR
154
+ pos = 0
155
+ while (index = string.index(/<|&/, pos))
156
+ if string[index] == "<"
157
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
158
+ end
159
+
160
+ unless (end_index = string.index(/[^\s];/, index + 1))
161
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
+ end
163
+
164
+ value = string[(index + 1)..end_index]
165
+ if /\s/.match?(value)
166
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
167
+ end
168
+
169
+ if value[0] == "#"
170
+ character_reference = value[1..-1]
171
+
172
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
173
+ if character_reference[0] == "x" || character_reference[-1] == "x"
174
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
175
  else
163
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
176
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164
177
  end
165
- # FIXME: below can't work but this needs API change.
166
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167
- # if !doctype or !doctype.entities.has_key?($3)
168
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169
- # end
170
178
  end
179
+
180
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
181
+ when *VALID_CHAR
182
+ else
183
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
184
+ end
185
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
186
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171
187
  end
188
+
189
+ pos = end_index + 1
172
190
  end
191
+
192
+ string
173
193
  end
174
194
 
175
195
  def node_type
@@ -248,7 +268,8 @@ module REXML
248
268
  # u = Text.new( "sean russell", false, nil, true )
249
269
  # u.value #-> "sean russell"
250
270
  def value
251
- @unnormalized ||= Text::unnormalize( @string, doctype )
271
+ @unnormalized ||= Text::unnormalize(@string, doctype,
272
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
252
273
  end
253
274
 
254
275
  # Sets the contents of this text node. This expects the text to be
@@ -391,11 +412,12 @@ module REXML
391
412
  end
392
413
 
393
414
  # Unescapes all possible entities
394
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
415
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
416
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
395
417
  sum = 0
396
418
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
397
419
  s = Text.expand($&, doctype, filter)
398
- if sum + s.bytesize > Security.entity_expansion_text_limit
420
+ if sum + s.bytesize > entity_expansion_text_limit
399
421
  raise "entity expansion has grown too large"
400
422
  else
401
423
  sum += s.bytesize
metadata CHANGED
@@ -1,28 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.7
4
+ version: 3.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-05-16 00:00:00.000000000 Z
11
- dependencies:
12
- - !ruby/object:Gem::Dependency
13
- name: strscan
14
- requirement: !ruby/object:Gem::Requirement
15
- requirements:
16
- - - ">="
17
- - !ruby/object:Gem::Version
18
- version: 3.0.9
19
- type: :runtime
20
- prerelease: false
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: 3.0.9
10
+ date: 2024-09-04 00:00:00.000000000 Z
11
+ dependencies: []
26
12
  description: An XML toolkit for Ruby
27
13
  email:
28
14
  - kou@cozmixng.org
@@ -115,7 +101,8 @@ files:
115
101
  homepage: https://github.com/ruby/rexml
116
102
  licenses:
117
103
  - BSD-2-Clause
118
- metadata: {}
104
+ metadata:
105
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.7
119
106
  rdoc_options:
120
107
  - "--main"
121
108
  - README.md