rexml 3.2.6 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rexml/text.rb CHANGED
@@ -29,31 +29,16 @@ module REXML
29
29
  (0x10000..0x10FFFF)
30
30
  ]
31
31
 
32
- if String.method_defined? :encode
33
- VALID_XML_CHARS = Regexp.new('^['+
34
- VALID_CHAR.map { |item|
35
- case item
36
- when Integer
37
- [item].pack('U').force_encoding('utf-8')
38
- when Range
39
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
40
- end
41
- }.join +
42
- ']*$')
43
- else
44
- VALID_XML_CHARS = /^(
45
- [\x09\x0A\x0D\x20-\x7E] # ASCII
46
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
47
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
48
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
49
- | \xEF[\x80-\xBE]{2} #
50
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
51
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
52
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
53
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
54
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
55
- )*$/nx;
56
- end
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Integer
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
57
42
 
58
43
  # Constructor
59
44
  # +arg+ if a String, the content is set to the String. If a Text,
@@ -132,44 +117,54 @@ module REXML
132
117
 
133
118
  # illegal anywhere
134
119
  if !string.match?(VALID_XML_CHARS)
135
- if String.method_defined? :encode
136
- string.chars.each do |c|
137
- case c.ord
138
- when *VALID_CHAR
139
- else
140
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
141
- end
142
- end
143
- else
144
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
145
- case c.unpack('U')
146
- when *VALID_CHAR
147
- else
148
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
149
- end
120
+ string.chars.each do |c|
121
+ case c.ord
122
+ when *VALID_CHAR
123
+ else
124
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
150
125
  end
151
126
  end
152
127
  end
153
128
 
154
- # context sensitive
155
- string.scan(pattern) do
156
- if $1[-1] != ?;
157
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158
- elsif $1[0] == ?&
159
- if $5 and $5[0] == ?#
160
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161
- when *VALID_CHAR
129
+ pos = 0
130
+ while (index = string.index(/<|&/, pos))
131
+ if string[index] == "<"
132
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
133
+ end
134
+
135
+ unless (end_index = string.index(/[^\s];/, index + 1))
136
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
137
+ end
138
+
139
+ value = string[(index + 1)..end_index]
140
+ if /\s/.match?(value)
141
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
142
+ end
143
+
144
+ if value[0] == "#"
145
+ character_reference = value[1..-1]
146
+
147
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
148
+ if character_reference[0] == "x" || character_reference[-1] == "x"
149
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
150
  else
163
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
151
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164
152
  end
165
- # FIXME: below can't work but this needs API change.
166
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167
- # if !doctype or !doctype.entities.has_key?($3)
168
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169
- # end
170
153
  end
154
+
155
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
156
+ when *VALID_CHAR
157
+ else
158
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
159
+ end
160
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
161
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171
162
  end
163
+
164
+ pos = end_index + 1
172
165
  end
166
+
167
+ string
173
168
  end
174
169
 
175
170
  def node_type
@@ -248,7 +243,8 @@ module REXML
248
243
  # u = Text.new( "sean russell", false, nil, true )
249
244
  # u.value #-> "sean russell"
250
245
  def value
251
- @unnormalized ||= Text::unnormalize( @string, doctype )
246
+ @unnormalized ||= Text::unnormalize(@string, doctype,
247
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
252
248
  end
253
249
 
254
250
  # Sets the contents of this text node. This expects the text to be
@@ -391,11 +387,12 @@ module REXML
391
387
  end
392
388
 
393
389
  # Unescapes all possible entities
394
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
390
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
391
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
395
392
  sum = 0
396
393
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
397
394
  s = Text.expand($&, doctype, filter)
398
- if sum + s.bytesize > Security.entity_expansion_text_limit
395
+ if sum + s.bytesize > entity_expansion_text_limit
399
396
  raise "entity expansion has grown too large"
400
397
  else
401
398
  sum += s.bytesize
@@ -590,6 +590,7 @@ module REXML
590
590
 
591
591
  def evaluate_predicate(expression, nodesets)
592
592
  enter(:predicate, expression, nodesets) if @debug
593
+ new_nodeset_count = 0
593
594
  new_nodesets = nodesets.collect do |nodeset|
594
595
  new_nodeset = []
595
596
  subcontext = { :size => nodeset.size }
@@ -606,17 +607,20 @@ module REXML
606
607
  result = result[0] if result.kind_of? Array and result.length == 1
607
608
  if result.kind_of? Numeric
608
609
  if result == node.position
609
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
610
+ new_nodeset_count += 1
611
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
610
612
  end
611
613
  elsif result.instance_of? Array
612
614
  if result.size > 0 and result.inject(false) {|k,s| s or k}
613
615
  if result.size > 0
614
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
616
+ new_nodeset_count += 1
617
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
615
618
  end
616
619
  end
617
620
  else
618
621
  if result
619
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
622
+ new_nodeset_count += 1
623
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
620
624
  end
621
625
  end
622
626
  end
metadata CHANGED
@@ -1,57 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.6
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-07-27 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: test-unit
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
11
+ date: 2024-12-15 00:00:00.000000000 Z
12
+ dependencies: []
55
13
  description: An XML toolkit for Ruby
56
14
  email:
57
15
  - kou@cozmixng.org
@@ -144,7 +102,8 @@ files:
144
102
  homepage: https://github.com/ruby/rexml
145
103
  licenses:
146
104
  - BSD-2-Clause
147
- metadata: {}
105
+ metadata:
106
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.0
148
107
  post_install_message:
149
108
  rdoc_options:
150
109
  - "--main"
@@ -162,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
121
  - !ruby/object:Gem::Version
163
122
  version: '0'
164
123
  requirements: []
165
- rubygems_version: 3.5.0.dev
124
+ rubygems_version: 3.5.22
166
125
  signing_key:
167
126
  specification_version: 4
168
127
  summary: An XML toolkit for Ruby