rexml 3.2.8 → 3.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,8 +15,6 @@ module REXML
15
15
  end
16
16
 
17
17
  def parse
18
- tag_stack = []
19
- in_doctype = false
20
18
  entities = nil
21
19
  begin
22
20
  while true
@@ -24,32 +22,24 @@ module REXML
24
22
  #STDERR.puts "TREEPARSER GOT #{event.inspect}"
25
23
  case event[0]
26
24
  when :end_document
27
- unless tag_stack.empty?
28
- raise ParseException.new("No close tag for #{@build_context.xpath}",
29
- @parser.source, @parser)
30
- end
31
25
  return
32
26
  when :start_element
33
- tag_stack.push(event[1])
34
27
  el = @build_context = @build_context.add_element( event[1] )
35
28
  event[2].each do |key, value|
36
29
  el.attributes[key]=Attribute.new(key,value,self)
37
30
  end
38
31
  when :end_element
39
- tag_stack.pop
40
32
  @build_context = @build_context.parent
41
33
  when :text
42
- if not in_doctype
43
- if @build_context[-1].instance_of? Text
44
- @build_context[-1] << event[1]
45
- else
46
- @build_context.add(
47
- Text.new(event[1], @build_context.whitespace, nil, true)
48
- ) unless (
49
- @build_context.ignore_whitespace_nodes and
50
- event[1].strip.size==0
51
- )
52
- end
34
+ if @build_context[-1].instance_of? Text
35
+ @build_context[-1] << event[1]
36
+ else
37
+ @build_context.add(
38
+ Text.new(event[1], @build_context.whitespace, nil, true)
39
+ ) unless (
40
+ @build_context.ignore_whitespace_nodes and
41
+ event[1].strip.size==0
42
+ )
53
43
  end
54
44
  when :comment
55
45
  c = Comment.new( event[1] )
@@ -60,14 +50,12 @@ module REXML
60
50
  when :processing_instruction
61
51
  @build_context.add( Instruction.new( event[1], event[2] ) )
62
52
  when :end_doctype
63
- in_doctype = false
64
53
  entities.each { |k,v| entities[k] = @build_context.entities[k].value }
65
54
  @build_context = @build_context.parent
66
55
  when :start_doctype
67
56
  doctype = DocType.new( event[1..-1], @build_context )
68
57
  @build_context = doctype
69
58
  entities = {}
70
- in_doctype = true
71
59
  when :attlistdecl
72
60
  n = AttlistDecl.new( event[1..-1] )
73
61
  @build_context.add( n )
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.2.8"
34
+ VERSION = "3.3.9"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,8 +1,28 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
+
4
+ require "strscan"
5
+
3
6
  require_relative 'encoding'
4
7
 
5
8
  module REXML
9
+ if StringScanner::Version < "1.0.0"
10
+ module StringScannerCheckScanString
11
+ refine StringScanner do
12
+ def check(pattern)
13
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
14
+ super(pattern)
15
+ end
16
+
17
+ def scan(pattern)
18
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
+ super(pattern)
20
+ end
21
+ end
22
+ end
23
+ using StringScannerCheckScanString
24
+ end
25
+
6
26
  # Generates Source-s. USE THIS CLASS.
7
27
  class SourceFactory
8
28
  # Generates a Source object
@@ -34,6 +54,16 @@ module REXML
34
54
  attr_reader :line
35
55
  attr_reader :encoding
36
56
 
57
+ module Private
58
+ SCANNER_RESET_SIZE = 100000
59
+ PRE_DEFINED_TERM_PATTERNS = {}
60
+ pre_defined_terms = ["'", '"', "<"]
61
+ pre_defined_terms.each do |term|
62
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
63
+ end
64
+ end
65
+ private_constant :Private
66
+
37
67
  # Constructor
38
68
  # @param arg must be a String, and should be a valid XML document
39
69
  # @param encoding if non-null, sets the encoding of the source to this
@@ -47,6 +77,7 @@ module REXML
47
77
  detect_encoding
48
78
  end
49
79
  @line = 0
80
+ @term_encord = {}
50
81
  end
51
82
 
52
83
  # The current buffer (what we're going to read next)
@@ -54,6 +85,12 @@ module REXML
54
85
  @scanner.rest
55
86
  end
56
87
 
88
+ def drop_parsed_content
89
+ if @scanner.pos > Private::SCANNER_RESET_SIZE
90
+ @scanner.string = @scanner.rest
91
+ end
92
+ end
93
+
57
94
  def buffer_encoding=(encoding)
58
95
  @scanner.string.force_encoding(encoding)
59
96
  end
@@ -69,7 +106,13 @@ module REXML
69
106
  end
70
107
 
71
108
  def read_until(term)
72
- @scanner.scan_until(Regexp.union(term)) or @scanner.rest
109
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
110
+ data = @scanner.scan_until(pattern)
111
+ unless data
112
+ data = @scanner.rest
113
+ @scanner.pos = @scanner.string.bytesize
114
+ end
115
+ data
73
116
  end
74
117
 
75
118
  def ensure_buffer
@@ -162,9 +205,20 @@ module REXML
162
205
  end
163
206
  end
164
207
 
165
- def read(term = nil)
208
+ def read(term = nil, min_bytes = 1)
209
+ term = encode(term) if term
166
210
  begin
167
- @scanner << readline(term)
211
+ str = readline(term)
212
+ @scanner << str
213
+ read_bytes = str.bytesize
214
+ begin
215
+ while read_bytes < min_bytes
216
+ str = readline(term)
217
+ @scanner << str
218
+ read_bytes += str.bytesize
219
+ end
220
+ rescue IOError
221
+ end
168
222
  true
169
223
  rescue Exception, NameError
170
224
  @source = nil
@@ -173,16 +227,20 @@ module REXML
173
227
  end
174
228
 
175
229
  def read_until(term)
176
- pattern = Regexp.union(term)
177
- begin
178
- until str = @scanner.scan_until(pattern)
179
- @scanner << readline(term)
180
- end
181
- rescue EOFError
182
- @scanner.rest
183
- else
230
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
231
+ term = @term_encord[term] ||= encode(term)
232
+ until str = @scanner.scan_until(pattern)
233
+ break if @source.nil?
234
+ break if @source.eof?
235
+ @scanner << readline(term)
236
+ end
237
+ if str
184
238
  read if @scanner.eos? and !@source.eof?
185
239
  str
240
+ else
241
+ rest = @scanner.rest
242
+ @scanner.pos = @scanner.string.bytesize
243
+ rest
186
244
  end
187
245
  end
188
246
 
@@ -190,10 +248,9 @@ module REXML
190
248
  read if @scanner.eos? && @source
191
249
  end
192
250
 
193
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
194
- # - ">"
195
- # - "XXX>" (X is any string excluding '>')
196
251
  def match( pattern, cons=false )
252
+ # To avoid performance issue, we need to increase bytes to read per scan
253
+ min_bytes = 1
197
254
  while true
198
255
  if cons
199
256
  md = @scanner.scan(pattern)
@@ -203,7 +260,8 @@ module REXML
203
260
  break if md
204
261
  return nil if pattern.is_a?(String)
205
262
  return nil if @source.nil?
206
- return nil unless read
263
+ return nil unless read(nil, min_bytes)
264
+ min_bytes *= 2
207
265
  end
208
266
 
209
267
  md.nil? ? nil : @scanner
@@ -237,14 +295,19 @@ module REXML
237
295
 
238
296
  private
239
297
  def readline(term = nil)
240
- str = @source.readline(term || @line_break)
241
298
  if @pending_buffer
299
+ begin
300
+ str = @source.readline(term || @line_break)
301
+ rescue IOError
302
+ end
242
303
  if str.nil?
243
304
  str = @pending_buffer
244
305
  else
245
306
  str = @pending_buffer + str
246
307
  end
247
308
  @pending_buffer = nil
309
+ else
310
+ str = @source.readline(term || @line_break)
248
311
  end
249
312
  return nil if str.nil?
250
313
 
data/lib/rexml/text.rb CHANGED
@@ -151,25 +151,45 @@ module REXML
151
151
  end
152
152
  end
153
153
 
154
- # context sensitive
155
- string.scan(pattern) do
156
- if $1[-1] != ?;
157
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158
- elsif $1[0] == ?&
159
- if $5 and $5[0] == ?#
160
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161
- when *VALID_CHAR
154
+ pos = 0
155
+ while (index = string.index(/<|&/, pos))
156
+ if string[index] == "<"
157
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
158
+ end
159
+
160
+ unless (end_index = string.index(/[^\s];/, index + 1))
161
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
+ end
163
+
164
+ value = string[(index + 1)..end_index]
165
+ if /\s/.match?(value)
166
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
167
+ end
168
+
169
+ if value[0] == "#"
170
+ character_reference = value[1..-1]
171
+
172
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
173
+ if character_reference[0] == "x" || character_reference[-1] == "x"
174
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
175
  else
163
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
176
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164
177
  end
165
- # FIXME: below can't work but this needs API change.
166
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167
- # if !doctype or !doctype.entities.has_key?($3)
168
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169
- # end
170
178
  end
179
+
180
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
181
+ when *VALID_CHAR
182
+ else
183
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
184
+ end
185
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
186
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171
187
  end
188
+
189
+ pos = end_index + 1
172
190
  end
191
+
192
+ string
173
193
  end
174
194
 
175
195
  def node_type
@@ -248,7 +268,8 @@ module REXML
248
268
  # u = Text.new( "sean russell", false, nil, true )
249
269
  # u.value #-> "sean russell"
250
270
  def value
251
- @unnormalized ||= Text::unnormalize( @string, doctype )
271
+ @unnormalized ||= Text::unnormalize(@string, doctype,
272
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
252
273
  end
253
274
 
254
275
  # Sets the contents of this text node. This expects the text to be
@@ -391,11 +412,12 @@ module REXML
391
412
  end
392
413
 
393
414
  # Unescapes all possible entities
394
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
415
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
416
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
395
417
  sum = 0
396
418
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
397
419
  s = Text.expand($&, doctype, filter)
398
- if sum + s.bytesize > Security.entity_expansion_text_limit
420
+ if sum + s.bytesize > entity_expansion_text_limit
399
421
  raise "entity expansion has grown too large"
400
422
  else
401
423
  sum += s.bytesize
metadata CHANGED
@@ -1,28 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.8
4
+ version: 3.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-05-16 00:00:00.000000000 Z
11
- dependencies:
12
- - !ruby/object:Gem::Dependency
13
- name: strscan
14
- requirement: !ruby/object:Gem::Requirement
15
- requirements:
16
- - - ">="
17
- - !ruby/object:Gem::Version
18
- version: 3.0.9
19
- type: :runtime
20
- prerelease: false
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: 3.0.9
10
+ date: 2024-10-24 00:00:00.000000000 Z
11
+ dependencies: []
26
12
  description: An XML toolkit for Ruby
27
13
  email:
28
14
  - kou@cozmixng.org
@@ -115,7 +101,8 @@ files:
115
101
  homepage: https://github.com/ruby/rexml
116
102
  licenses:
117
103
  - BSD-2-Clause
118
- metadata: {}
104
+ metadata:
105
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.9
119
106
  rdoc_options:
120
107
  - "--main"
121
108
  - README.md