rexml 3.2.8 → 3.3.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,8 +15,6 @@ module REXML
15
15
  end
16
16
 
17
17
  def parse
18
- tag_stack = []
19
- in_doctype = false
20
18
  entities = nil
21
19
  begin
22
20
  while true
@@ -24,32 +22,24 @@ module REXML
24
22
  #STDERR.puts "TREEPARSER GOT #{event.inspect}"
25
23
  case event[0]
26
24
  when :end_document
27
- unless tag_stack.empty?
28
- raise ParseException.new("No close tag for #{@build_context.xpath}",
29
- @parser.source, @parser)
30
- end
31
25
  return
32
26
  when :start_element
33
- tag_stack.push(event[1])
34
27
  el = @build_context = @build_context.add_element( event[1] )
35
28
  event[2].each do |key, value|
36
29
  el.attributes[key]=Attribute.new(key,value,self)
37
30
  end
38
31
  when :end_element
39
- tag_stack.pop
40
32
  @build_context = @build_context.parent
41
33
  when :text
42
- if not in_doctype
43
- if @build_context[-1].instance_of? Text
44
- @build_context[-1] << event[1]
45
- else
46
- @build_context.add(
47
- Text.new(event[1], @build_context.whitespace, nil, true)
48
- ) unless (
49
- @build_context.ignore_whitespace_nodes and
50
- event[1].strip.size==0
51
- )
52
- end
34
+ if @build_context[-1].instance_of? Text
35
+ @build_context[-1] << event[1]
36
+ else
37
+ @build_context.add(
38
+ Text.new(event[1], @build_context.whitespace, nil, true)
39
+ ) unless (
40
+ @build_context.ignore_whitespace_nodes and
41
+ event[1].strip.size==0
42
+ )
53
43
  end
54
44
  when :comment
55
45
  c = Comment.new( event[1] )
@@ -60,14 +50,12 @@ module REXML
60
50
  when :processing_instruction
61
51
  @build_context.add( Instruction.new( event[1], event[2] ) )
62
52
  when :end_doctype
63
- in_doctype = false
64
53
  entities.each { |k,v| entities[k] = @build_context.entities[k].value }
65
54
  @build_context = @build_context.parent
66
55
  when :start_doctype
67
56
  doctype = DocType.new( event[1..-1], @build_context )
68
57
  @build_context = doctype
69
58
  entities = {}
70
- in_doctype = true
71
59
  when :attlistdecl
72
60
  n = AttlistDecl.new( event[1..-1] )
73
61
  @build_context.add( n )
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.2.8"
34
+ VERSION = "3.3.9"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,8 +1,28 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
+
4
+ require "strscan"
5
+
3
6
  require_relative 'encoding'
4
7
 
5
8
  module REXML
9
+ if StringScanner::Version < "1.0.0"
10
+ module StringScannerCheckScanString
11
+ refine StringScanner do
12
+ def check(pattern)
13
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
14
+ super(pattern)
15
+ end
16
+
17
+ def scan(pattern)
18
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
+ super(pattern)
20
+ end
21
+ end
22
+ end
23
+ using StringScannerCheckScanString
24
+ end
25
+
6
26
  # Generates Source-s. USE THIS CLASS.
7
27
  class SourceFactory
8
28
  # Generates a Source object
@@ -34,6 +54,16 @@ module REXML
34
54
  attr_reader :line
35
55
  attr_reader :encoding
36
56
 
57
+ module Private
58
+ SCANNER_RESET_SIZE = 100000
59
+ PRE_DEFINED_TERM_PATTERNS = {}
60
+ pre_defined_terms = ["'", '"', "<"]
61
+ pre_defined_terms.each do |term|
62
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
63
+ end
64
+ end
65
+ private_constant :Private
66
+
37
67
  # Constructor
38
68
  # @param arg must be a String, and should be a valid XML document
39
69
  # @param encoding if non-null, sets the encoding of the source to this
@@ -47,6 +77,7 @@ module REXML
47
77
  detect_encoding
48
78
  end
49
79
  @line = 0
80
+ @term_encord = {}
50
81
  end
51
82
 
52
83
  # The current buffer (what we're going to read next)
@@ -54,6 +85,12 @@ module REXML
54
85
  @scanner.rest
55
86
  end
56
87
 
88
+ def drop_parsed_content
89
+ if @scanner.pos > Private::SCANNER_RESET_SIZE
90
+ @scanner.string = @scanner.rest
91
+ end
92
+ end
93
+
57
94
  def buffer_encoding=(encoding)
58
95
  @scanner.string.force_encoding(encoding)
59
96
  end
@@ -69,7 +106,13 @@ module REXML
69
106
  end
70
107
 
71
108
  def read_until(term)
72
- @scanner.scan_until(Regexp.union(term)) or @scanner.rest
109
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
110
+ data = @scanner.scan_until(pattern)
111
+ unless data
112
+ data = @scanner.rest
113
+ @scanner.pos = @scanner.string.bytesize
114
+ end
115
+ data
73
116
  end
74
117
 
75
118
  def ensure_buffer
@@ -162,9 +205,20 @@ module REXML
162
205
  end
163
206
  end
164
207
 
165
- def read(term = nil)
208
+ def read(term = nil, min_bytes = 1)
209
+ term = encode(term) if term
166
210
  begin
167
- @scanner << readline(term)
211
+ str = readline(term)
212
+ @scanner << str
213
+ read_bytes = str.bytesize
214
+ begin
215
+ while read_bytes < min_bytes
216
+ str = readline(term)
217
+ @scanner << str
218
+ read_bytes += str.bytesize
219
+ end
220
+ rescue IOError
221
+ end
168
222
  true
169
223
  rescue Exception, NameError
170
224
  @source = nil
@@ -173,16 +227,20 @@ module REXML
173
227
  end
174
228
 
175
229
  def read_until(term)
176
- pattern = Regexp.union(term)
177
- begin
178
- until str = @scanner.scan_until(pattern)
179
- @scanner << readline(term)
180
- end
181
- rescue EOFError
182
- @scanner.rest
183
- else
230
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
231
+ term = @term_encord[term] ||= encode(term)
232
+ until str = @scanner.scan_until(pattern)
233
+ break if @source.nil?
234
+ break if @source.eof?
235
+ @scanner << readline(term)
236
+ end
237
+ if str
184
238
  read if @scanner.eos? and !@source.eof?
185
239
  str
240
+ else
241
+ rest = @scanner.rest
242
+ @scanner.pos = @scanner.string.bytesize
243
+ rest
186
244
  end
187
245
  end
188
246
 
@@ -190,10 +248,9 @@ module REXML
190
248
  read if @scanner.eos? && @source
191
249
  end
192
250
 
193
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
194
- # - ">"
195
- # - "XXX>" (X is any string excluding '>')
196
251
  def match( pattern, cons=false )
252
+ # To avoid performance issue, we need to increase bytes to read per scan
253
+ min_bytes = 1
197
254
  while true
198
255
  if cons
199
256
  md = @scanner.scan(pattern)
@@ -203,7 +260,8 @@ module REXML
203
260
  break if md
204
261
  return nil if pattern.is_a?(String)
205
262
  return nil if @source.nil?
206
- return nil unless read
263
+ return nil unless read(nil, min_bytes)
264
+ min_bytes *= 2
207
265
  end
208
266
 
209
267
  md.nil? ? nil : @scanner
@@ -237,14 +295,19 @@ module REXML
237
295
 
238
296
  private
239
297
  def readline(term = nil)
240
- str = @source.readline(term || @line_break)
241
298
  if @pending_buffer
299
+ begin
300
+ str = @source.readline(term || @line_break)
301
+ rescue IOError
302
+ end
242
303
  if str.nil?
243
304
  str = @pending_buffer
244
305
  else
245
306
  str = @pending_buffer + str
246
307
  end
247
308
  @pending_buffer = nil
309
+ else
310
+ str = @source.readline(term || @line_break)
248
311
  end
249
312
  return nil if str.nil?
250
313
 
data/lib/rexml/text.rb CHANGED
@@ -151,25 +151,45 @@ module REXML
151
151
  end
152
152
  end
153
153
 
154
- # context sensitive
155
- string.scan(pattern) do
156
- if $1[-1] != ?;
157
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158
- elsif $1[0] == ?&
159
- if $5 and $5[0] == ?#
160
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161
- when *VALID_CHAR
154
+ pos = 0
155
+ while (index = string.index(/<|&/, pos))
156
+ if string[index] == "<"
157
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
158
+ end
159
+
160
+ unless (end_index = string.index(/[^\s];/, index + 1))
161
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
+ end
163
+
164
+ value = string[(index + 1)..end_index]
165
+ if /\s/.match?(value)
166
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
167
+ end
168
+
169
+ if value[0] == "#"
170
+ character_reference = value[1..-1]
171
+
172
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
173
+ if character_reference[0] == "x" || character_reference[-1] == "x"
174
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
175
  else
163
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
176
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164
177
  end
165
- # FIXME: below can't work but this needs API change.
166
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167
- # if !doctype or !doctype.entities.has_key?($3)
168
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169
- # end
170
178
  end
179
+
180
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
181
+ when *VALID_CHAR
182
+ else
183
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
184
+ end
185
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
186
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171
187
  end
188
+
189
+ pos = end_index + 1
172
190
  end
191
+
192
+ string
173
193
  end
174
194
 
175
195
  def node_type
@@ -248,7 +268,8 @@ module REXML
248
268
  # u = Text.new( "sean russell", false, nil, true )
249
269
  # u.value #-> "sean russell"
250
270
  def value
251
- @unnormalized ||= Text::unnormalize( @string, doctype )
271
+ @unnormalized ||= Text::unnormalize(@string, doctype,
272
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
252
273
  end
253
274
 
254
275
  # Sets the contents of this text node. This expects the text to be
@@ -391,11 +412,12 @@ module REXML
391
412
  end
392
413
 
393
414
  # Unescapes all possible entities
394
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
415
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
416
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
395
417
  sum = 0
396
418
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
397
419
  s = Text.expand($&, doctype, filter)
398
- if sum + s.bytesize > Security.entity_expansion_text_limit
420
+ if sum + s.bytesize > entity_expansion_text_limit
399
421
  raise "entity expansion has grown too large"
400
422
  else
401
423
  sum += s.bytesize
metadata CHANGED
@@ -1,28 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.8
4
+ version: 3.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-05-16 00:00:00.000000000 Z
11
- dependencies:
12
- - !ruby/object:Gem::Dependency
13
- name: strscan
14
- requirement: !ruby/object:Gem::Requirement
15
- requirements:
16
- - - ">="
17
- - !ruby/object:Gem::Version
18
- version: 3.0.9
19
- type: :runtime
20
- prerelease: false
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: 3.0.9
10
+ date: 2024-10-24 00:00:00.000000000 Z
11
+ dependencies: []
26
12
  description: An XML toolkit for Ruby
27
13
  email:
28
14
  - kou@cozmixng.org
@@ -115,7 +101,8 @@ files:
115
101
  homepage: https://github.com/ruby/rexml
116
102
  licenses:
117
103
  - BSD-2-Clause
118
- metadata: {}
104
+ metadata:
105
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.9
119
106
  rdoc_options:
120
107
  - "--main"
121
108
  - README.md