rexml 3.2.6 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rexml/text.rb CHANGED
@@ -29,31 +29,16 @@ module REXML
29
29
  (0x10000..0x10FFFF)
30
30
  ]
31
31
 
32
- if String.method_defined? :encode
33
- VALID_XML_CHARS = Regexp.new('^['+
34
- VALID_CHAR.map { |item|
35
- case item
36
- when Integer
37
- [item].pack('U').force_encoding('utf-8')
38
- when Range
39
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
40
- end
41
- }.join +
42
- ']*$')
43
- else
44
- VALID_XML_CHARS = /^(
45
- [\x09\x0A\x0D\x20-\x7E] # ASCII
46
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
47
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
48
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
49
- | \xEF[\x80-\xBE]{2} #
50
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
51
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
52
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
53
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
54
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
55
- )*$/nx;
56
- end
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Integer
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
57
42
 
58
43
  # Constructor
59
44
  # +arg+ if a String, the content is set to the String. If a Text,
@@ -132,44 +117,54 @@ module REXML
132
117
 
133
118
  # illegal anywhere
134
119
  if !string.match?(VALID_XML_CHARS)
135
- if String.method_defined? :encode
136
- string.chars.each do |c|
137
- case c.ord
138
- when *VALID_CHAR
139
- else
140
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
141
- end
142
- end
143
- else
144
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
145
- case c.unpack('U')
146
- when *VALID_CHAR
147
- else
148
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
149
- end
120
+ string.chars.each do |c|
121
+ case c.ord
122
+ when *VALID_CHAR
123
+ else
124
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
150
125
  end
151
126
  end
152
127
  end
153
128
 
154
- # context sensitive
155
- string.scan(pattern) do
156
- if $1[-1] != ?;
157
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158
- elsif $1[0] == ?&
159
- if $5 and $5[0] == ?#
160
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161
- when *VALID_CHAR
129
+ pos = 0
130
+ while (index = string.index(/<|&/, pos))
131
+ if string[index] == "<"
132
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
133
+ end
134
+
135
+ unless (end_index = string.index(/[^\s];/, index + 1))
136
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
137
+ end
138
+
139
+ value = string[(index + 1)..end_index]
140
+ if /\s/.match?(value)
141
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
142
+ end
143
+
144
+ if value[0] == "#"
145
+ character_reference = value[1..-1]
146
+
147
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
148
+ if character_reference[0] == "x" || character_reference[-1] == "x"
149
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
150
  else
163
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
151
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164
152
  end
165
- # FIXME: below can't work but this needs API change.
166
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167
- # if !doctype or !doctype.entities.has_key?($3)
168
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169
- # end
170
153
  end
154
+
155
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
156
+ when *VALID_CHAR
157
+ else
158
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
159
+ end
160
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
161
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171
162
  end
163
+
164
+ pos = end_index + 1
172
165
  end
166
+
167
+ string
173
168
  end
174
169
 
175
170
  def node_type
@@ -248,7 +243,8 @@ module REXML
248
243
  # u = Text.new( "sean russell", false, nil, true )
249
244
  # u.value #-> "sean russell"
250
245
  def value
251
- @unnormalized ||= Text::unnormalize( @string, doctype )
246
+ @unnormalized ||= Text::unnormalize(@string, doctype,
247
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
252
248
  end
253
249
 
254
250
  # Sets the contents of this text node. This expects the text to be
@@ -391,11 +387,12 @@ module REXML
391
387
  end
392
388
 
393
389
  # Unescapes all possible entities
394
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
390
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
391
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
395
392
  sum = 0
396
393
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
397
394
  s = Text.expand($&, doctype, filter)
398
- if sum + s.bytesize > Security.entity_expansion_text_limit
395
+ if sum + s.bytesize > entity_expansion_text_limit
399
396
  raise "entity expansion has grown too large"
400
397
  else
401
398
  sum += s.bytesize
@@ -590,6 +590,7 @@ module REXML
590
590
 
591
591
  def evaluate_predicate(expression, nodesets)
592
592
  enter(:predicate, expression, nodesets) if @debug
593
+ new_nodeset_count = 0
593
594
  new_nodesets = nodesets.collect do |nodeset|
594
595
  new_nodeset = []
595
596
  subcontext = { :size => nodeset.size }
@@ -606,17 +607,20 @@ module REXML
606
607
  result = result[0] if result.kind_of? Array and result.length == 1
607
608
  if result.kind_of? Numeric
608
609
  if result == node.position
609
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
610
+ new_nodeset_count += 1
611
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
610
612
  end
611
613
  elsif result.instance_of? Array
612
614
  if result.size > 0 and result.inject(false) {|k,s| s or k}
613
615
  if result.size > 0
614
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
616
+ new_nodeset_count += 1
617
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
615
618
  end
616
619
  end
617
620
  else
618
621
  if result
619
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
622
+ new_nodeset_count += 1
623
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
620
624
  end
621
625
  end
622
626
  end
metadata CHANGED
@@ -1,57 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.6
4
+ version: 3.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2023-07-27 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: test-unit
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
10
+ date: 2025-02-16 00:00:00.000000000 Z
11
+ dependencies: []
55
12
  description: An XML toolkit for Ruby
56
13
  email:
57
14
  - kou@cozmixng.org
@@ -144,8 +101,8 @@ files:
144
101
  homepage: https://github.com/ruby/rexml
145
102
  licenses:
146
103
  - BSD-2-Clause
147
- metadata: {}
148
- post_install_message:
104
+ metadata:
105
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
149
106
  rdoc_options:
150
107
  - "--main"
151
108
  - README.md
@@ -162,8 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
119
  - !ruby/object:Gem::Version
163
120
  version: '0'
164
121
  requirements: []
165
- rubygems_version: 3.5.0.dev
166
- signing_key:
122
+ rubygems_version: 3.6.2
167
123
  specification_version: 4
168
124
  summary: An XML toolkit for Ruby
169
125
  test_files: []