rexml 3.2.6 → 3.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +399 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +446 -274
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +171 -100
- data/lib/rexml/text.rb +54 -57
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +6 -47
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 582bb5339257c81f2ce9c076155c01d7adfe8fb169c09bc7f5f489f6a76bca80
|
4
|
+
data.tar.gz: 160de8899d8d1f995bafca23631e9e4ab928ebbffa21684e3b61dad805a6187b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2b095792523f54301e8a6af2f1682a9ad24d92cdd5d94c9e6088b27520e3c03b68fe06061b6ff2fd96b001b9cb947c57e4095244d83206a83fc2a1829dd4243
|
7
|
+
data.tar.gz: 4f335d2b1e58c1da233c3f0a0588def502c8cb2660633e0e06b4d0930bbcedcaae36b52dc550923704b4525d94a1011f4b5f4e87a81e5d689cce24ee89210a23
|
data/NEWS.md
CHANGED
@@ -1,5 +1,404 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.4.0 - 2024-12-15 {#version-3-4-0}
|
4
|
+
|
5
|
+
### Improvement
|
6
|
+
|
7
|
+
* Improved performance.
|
8
|
+
* GH-216
|
9
|
+
* Patch by NAITOH Jun
|
10
|
+
|
11
|
+
* JRuby: Improved parse performance.
|
12
|
+
* GH-219
|
13
|
+
* Patch by João Duarte
|
14
|
+
|
15
|
+
* Added support for reusing pull parser.
|
16
|
+
* GH-214
|
17
|
+
* GH-220
|
18
|
+
* Patch by Dmitry Pogrebnoy
|
19
|
+
|
20
|
+
* Improved error handling when source is `IO`.
|
21
|
+
* GH-221
|
22
|
+
* Patch by NAITOH Jun
|
23
|
+
|
24
|
+
### Thanks
|
25
|
+
|
26
|
+
* NAITOH Jun
|
27
|
+
|
28
|
+
* João Duarte
|
29
|
+
|
30
|
+
* Dmitry Pogrebnoy
|
31
|
+
|
32
|
+
## 3.3.9 - 2024-10-24 {#version-3-3-9}
|
33
|
+
|
34
|
+
### Improvements
|
35
|
+
|
36
|
+
* Improved performance.
|
37
|
+
* GH-210
|
38
|
+
* Patch by NAITOH Jun.
|
39
|
+
|
40
|
+
### Fixes
|
41
|
+
|
42
|
+
* Fixed a parse bug for text only invalid XML.
|
43
|
+
* GH-215
|
44
|
+
* Patch by NAITOH Jun.
|
45
|
+
|
46
|
+
* Fixed a parse bug that `�x...;` is accepted as a character
|
47
|
+
reference.
|
48
|
+
|
49
|
+
### Thanks
|
50
|
+
|
51
|
+
* NAITOH Jun
|
52
|
+
|
53
|
+
## 3.3.8 - 2024-09-29 {#version-3-3-8}
|
54
|
+
|
55
|
+
### Improvements
|
56
|
+
|
57
|
+
* SAX2: Improve parse performance.
|
58
|
+
* GH-207
|
59
|
+
* Patch by NAITOH Jun.
|
60
|
+
|
61
|
+
### Fixes
|
62
|
+
|
63
|
+
* Fixed a bug that unexpected attribute namespace conflict error for
|
64
|
+
the predefined "xml" namespace is reported.
|
65
|
+
* GH-208
|
66
|
+
* Patch by KITAITI Makoto
|
67
|
+
|
68
|
+
### Thanks
|
69
|
+
|
70
|
+
* NAITOH Jun
|
71
|
+
|
72
|
+
* KITAITI Makoto
|
73
|
+
|
74
|
+
## 3.3.7 - 2024-09-04 {#version-3-3-7}
|
75
|
+
|
76
|
+
### Improvements
|
77
|
+
|
78
|
+
* Added local entity expansion limit methods
|
79
|
+
* GH-192
|
80
|
+
* GH-202
|
81
|
+
* Reported by takuya kodama.
|
82
|
+
* Patch by NAITOH Jun.
|
83
|
+
|
84
|
+
* Removed explicit strscan dependency
|
85
|
+
* GH-204
|
86
|
+
* Patch by Bo Anderson.
|
87
|
+
|
88
|
+
### Thanks
|
89
|
+
|
90
|
+
* takuya kodama
|
91
|
+
|
92
|
+
* NAITOH Jun
|
93
|
+
|
94
|
+
* Bo Anderson
|
95
|
+
|
96
|
+
## 3.3.6 - 2024-08-22 {#version-3-3-6}
|
97
|
+
|
98
|
+
### Improvements
|
99
|
+
|
100
|
+
* Removed duplicated entity expansions for performance.
|
101
|
+
* GH-194
|
102
|
+
* Patch by Viktor Ivarsson.
|
103
|
+
|
104
|
+
* Improved namespace conflicted attribute check performance. It was
|
105
|
+
too slow for deep elements.
|
106
|
+
* Reported by l33thaxor.
|
107
|
+
|
108
|
+
### Fixes
|
109
|
+
|
110
|
+
* Fixed a bug that default entity expansions are counted for
|
111
|
+
security check. Default entity expansions should not be counted
|
112
|
+
because they don't have a security risk.
|
113
|
+
* GH-198
|
114
|
+
* GH-199
|
115
|
+
* Patch Viktor Ivarsson
|
116
|
+
|
117
|
+
* Fixed a parser bug that parameter entity references in internal
|
118
|
+
subsets are expanded. It's not allowed in the XML specification.
|
119
|
+
* GH-191
|
120
|
+
* Patch by NAITOH Jun.
|
121
|
+
|
122
|
+
* Fixed a stream parser bug that user-defined entity references in
|
123
|
+
text aren't expanded.
|
124
|
+
* GH-200
|
125
|
+
* Patch by NAITOH Jun.
|
126
|
+
|
127
|
+
### Thanks
|
128
|
+
|
129
|
+
* Viktor Ivarsson
|
130
|
+
|
131
|
+
* NAITOH Jun
|
132
|
+
|
133
|
+
* l33thaxor
|
134
|
+
|
135
|
+
## 3.3.5 - 2024-08-12 {#version-3-3-5}
|
136
|
+
|
137
|
+
### Fixes
|
138
|
+
|
139
|
+
* Fixed a bug that `REXML::Security.entity_expansion_text_limit`
|
140
|
+
check has wrong text size calculation in SAX and pull parsers.
|
141
|
+
* GH-193
|
142
|
+
* GH-195
|
143
|
+
* Reported by Viktor Ivarsson.
|
144
|
+
* Patch by NAITOH Jun.
|
145
|
+
|
146
|
+
### Thanks
|
147
|
+
|
148
|
+
* Viktor Ivarsson
|
149
|
+
|
150
|
+
* NAITOH Jun
|
151
|
+
|
152
|
+
## 3.3.4 - 2024-08-01 {#version-3-3-4}
|
153
|
+
|
154
|
+
### Fixes
|
155
|
+
|
156
|
+
* Fixed a bug that `REXML::Security` isn't defined when
|
157
|
+
`REXML::Parsers::StreamParser` is used and
|
158
|
+
`rexml/parsers/streamparser` is only required.
|
159
|
+
* GH-189
|
160
|
+
* Patch by takuya kodama.
|
161
|
+
|
162
|
+
### Thanks
|
163
|
+
|
164
|
+
* takuya kodama
|
165
|
+
|
166
|
+
## 3.3.3 - 2024-08-01 {#version-3-3-3}
|
167
|
+
|
168
|
+
### Improvements
|
169
|
+
|
170
|
+
* Added support for detecting invalid XML that has unsupported
|
171
|
+
content before root element
|
172
|
+
* GH-184
|
173
|
+
* Patch by NAITOH Jun.
|
174
|
+
|
175
|
+
* Added support for `REXML::Security.entity_expansion_limit=` and
|
176
|
+
`REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
|
177
|
+
parsers
|
178
|
+
* GH-187
|
179
|
+
* Patch by NAITOH Jun.
|
180
|
+
|
181
|
+
* Added more tests for invalid XMLs.
|
182
|
+
* GH-183
|
183
|
+
* Patch by Watson.
|
184
|
+
|
185
|
+
* Added more performance tests.
|
186
|
+
* Patch by Watson.
|
187
|
+
|
188
|
+
* Improved parse performance.
|
189
|
+
* GH-186
|
190
|
+
* Patch by tomoya ishida.
|
191
|
+
|
192
|
+
### Thanks
|
193
|
+
|
194
|
+
* NAITOH Jun
|
195
|
+
|
196
|
+
* Watson
|
197
|
+
|
198
|
+
* tomoya ishida
|
199
|
+
|
200
|
+
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
201
|
+
|
202
|
+
### Improvements
|
203
|
+
|
204
|
+
* Improved parse performance.
|
205
|
+
* GH-160
|
206
|
+
* Patch by NAITOH Jun.
|
207
|
+
|
208
|
+
* Improved parse performance.
|
209
|
+
* GH-169
|
210
|
+
* GH-170
|
211
|
+
* GH-171
|
212
|
+
* GH-172
|
213
|
+
* GH-173
|
214
|
+
* GH-174
|
215
|
+
* GH-175
|
216
|
+
* GH-176
|
217
|
+
* GH-177
|
218
|
+
* Patch by Watson.
|
219
|
+
|
220
|
+
* Added support for raising a parse exception when an XML has extra
|
221
|
+
content after the root element.
|
222
|
+
* GH-161
|
223
|
+
* Patch by NAITOH Jun.
|
224
|
+
|
225
|
+
* Added support for raising a parse exception when an XML
|
226
|
+
declaration exists in wrong position.
|
227
|
+
* GH-162
|
228
|
+
* Patch by NAITOH Jun.
|
229
|
+
|
230
|
+
* Removed needless a space after XML declaration in pretty print mode.
|
231
|
+
* GH-164
|
232
|
+
* Patch by NAITOH Jun.
|
233
|
+
|
234
|
+
* Stopped to emit `:text` event after the root element.
|
235
|
+
* GH-167
|
236
|
+
* Patch by NAITOH Jun.
|
237
|
+
|
238
|
+
### Fixes
|
239
|
+
|
240
|
+
* Fixed a bug that SAX2 parser doesn't expand predefined entities for
|
241
|
+
`characters` callback.
|
242
|
+
* GH-168
|
243
|
+
* Patch by NAITOH Jun.
|
244
|
+
|
245
|
+
### Thanks
|
246
|
+
|
247
|
+
* NAITOH Jun
|
248
|
+
|
249
|
+
* Watson
|
250
|
+
|
251
|
+
## 3.3.1 - 2024-06-25 {#version-3-3-1}
|
252
|
+
|
253
|
+
### Improvements
|
254
|
+
|
255
|
+
* Added support for detecting malformed top-level comments.
|
256
|
+
* GH-145
|
257
|
+
* Patch by Hiroya Fujinami.
|
258
|
+
|
259
|
+
* Improved `REXML::Element#attribute` performance.
|
260
|
+
* GH-146
|
261
|
+
* Patch by Hiroya Fujinami.
|
262
|
+
|
263
|
+
* Added support for detecting malformed `<!-->` comments.
|
264
|
+
* GH-147
|
265
|
+
* Patch by Hiroya Fujinami.
|
266
|
+
|
267
|
+
* Added support for detecting unclosed `DOCTYPE`.
|
268
|
+
* GH-152
|
269
|
+
* Patch by Hiroya Fujinami.
|
270
|
+
|
271
|
+
* Added `changlog_uri` metadata to gemspec.
|
272
|
+
* GH-156
|
273
|
+
* Patch by fynsta.
|
274
|
+
|
275
|
+
* Improved parse performance.
|
276
|
+
* GH-157
|
277
|
+
* GH-158
|
278
|
+
* Patch by NAITOH Jun.
|
279
|
+
|
280
|
+
### Fixes
|
281
|
+
|
282
|
+
* Fixed a bug that large XML can't be parsed.
|
283
|
+
* GH-154
|
284
|
+
* Patch by NAITOH Jun.
|
285
|
+
|
286
|
+
* Fixed a bug that private constants are visible.
|
287
|
+
* GH-155
|
288
|
+
* Patch by NAITOH Jun.
|
289
|
+
|
290
|
+
### Thanks
|
291
|
+
|
292
|
+
* Hiroya Fujinami
|
293
|
+
|
294
|
+
* NAITOH Jun
|
295
|
+
|
296
|
+
* fynsta
|
297
|
+
|
298
|
+
## 3.3.0 - 2024-06-11 {#version-3-3-0}
|
299
|
+
|
300
|
+
### Improvements
|
301
|
+
|
302
|
+
* Added support for strscan 0.7.0 installed with Ruby 2.6.
|
303
|
+
* GH-142
|
304
|
+
* Reported by Fernando Trigoso.
|
305
|
+
|
306
|
+
### Thanks
|
307
|
+
|
308
|
+
* Fernando Trigoso
|
309
|
+
|
310
|
+
## 3.2.9 - 2024-06-09 {#version-3-2-9}
|
311
|
+
|
312
|
+
### Improvements
|
313
|
+
|
314
|
+
* Added support for old strscan.
|
315
|
+
* GH-132
|
316
|
+
* Reported by Adam.
|
317
|
+
|
318
|
+
* Improved attribute value parse performance.
|
319
|
+
* GH-135
|
320
|
+
* Patch by NAITOH Jun.
|
321
|
+
|
322
|
+
* Improved `REXML::Node#each_recursive` performance.
|
323
|
+
* GH-134
|
324
|
+
* GH-139
|
325
|
+
* Patch by Hiroya Fujinami.
|
326
|
+
|
327
|
+
* Improved text parse performance.
|
328
|
+
* Reported by mprogrammer.
|
329
|
+
|
330
|
+
### Thanks
|
331
|
+
|
332
|
+
* Adam
|
333
|
+
* NAITOH Jun
|
334
|
+
* Hiroya Fujinami
|
335
|
+
* mprogrammer
|
336
|
+
|
337
|
+
## 3.2.8 - 2024-05-16 {#version-3-2-8}
|
338
|
+
|
339
|
+
### Fixes
|
340
|
+
|
341
|
+
* Suppressed a warning
|
342
|
+
|
343
|
+
## 3.2.7 - 2024-05-16 {#version-3-2-7}
|
344
|
+
|
345
|
+
### Improvements
|
346
|
+
|
347
|
+
* Improve parse performance by using `StringScanner`.
|
348
|
+
|
349
|
+
* GH-106
|
350
|
+
* GH-107
|
351
|
+
* GH-108
|
352
|
+
* GH-109
|
353
|
+
* GH-112
|
354
|
+
* GH-113
|
355
|
+
* GH-114
|
356
|
+
* GH-115
|
357
|
+
* GH-116
|
358
|
+
* GH-117
|
359
|
+
* GH-118
|
360
|
+
* GH-119
|
361
|
+
* GH-121
|
362
|
+
|
363
|
+
* Patch by NAITOH Jun.
|
364
|
+
|
365
|
+
* Improved parse performance when an attribute has many `<`s.
|
366
|
+
|
367
|
+
* GH-126
|
368
|
+
|
369
|
+
### Fixes
|
370
|
+
|
371
|
+
* XPath: Fixed a bug of `normalize_space(array)`.
|
372
|
+
|
373
|
+
* GH-110
|
374
|
+
* GH-111
|
375
|
+
|
376
|
+
* Patch by flatisland.
|
377
|
+
|
378
|
+
* XPath: Fixed a bug that wrong position is used with nested path.
|
379
|
+
|
380
|
+
* GH-110
|
381
|
+
* GH-122
|
382
|
+
|
383
|
+
* Reported by jcavalieri.
|
384
|
+
* Patch by NAITOH Jun.
|
385
|
+
|
386
|
+
* Fixed a bug that an exception message can't be generated for
|
387
|
+
invalid encoding XML.
|
388
|
+
|
389
|
+
* GH-29
|
390
|
+
* GH-123
|
391
|
+
|
392
|
+
* Reported by DuKewu.
|
393
|
+
* Patch by NAITOH Jun.
|
394
|
+
|
395
|
+
### Thanks
|
396
|
+
|
397
|
+
* NAITOH Jun
|
398
|
+
* flatisland
|
399
|
+
* jcavalieri
|
400
|
+
* DuKewu
|
401
|
+
|
3
402
|
## 3.2.6 - 2023-07-27 {#version-3-2-6}
|
4
403
|
|
5
404
|
### Improvements
|
data/lib/rexml/attribute.rb
CHANGED
@@ -148,8 +148,9 @@ module REXML
|
|
148
148
|
# have been expanded to their values
|
149
149
|
def value
|
150
150
|
return @unnormalized if @unnormalized
|
151
|
-
|
152
|
-
@unnormalized
|
151
|
+
|
152
|
+
@unnormalized = Text::unnormalize(@normalized, doctype,
|
153
|
+
entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
|
153
154
|
end
|
154
155
|
|
155
156
|
# The normalized value of this attribute. That is, the attribute with
|
data/lib/rexml/document.rb
CHANGED
@@ -91,6 +91,8 @@ module REXML
|
|
91
91
|
#
|
92
92
|
def initialize( source = nil, context = {} )
|
93
93
|
@entity_expansion_count = 0
|
94
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
95
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
94
96
|
super()
|
95
97
|
@context = context
|
96
98
|
return if source.nil?
|
@@ -431,10 +433,12 @@ module REXML
|
|
431
433
|
end
|
432
434
|
|
433
435
|
attr_reader :entity_expansion_count
|
436
|
+
attr_writer :entity_expansion_limit
|
437
|
+
attr_accessor :entity_expansion_text_limit
|
434
438
|
|
435
439
|
def record_entity_expansion
|
436
440
|
@entity_expansion_count += 1
|
437
|
-
if @entity_expansion_count >
|
441
|
+
if @entity_expansion_count > @entity_expansion_limit
|
438
442
|
raise "number of entity expansions exceeded, processing aborted."
|
439
443
|
end
|
440
444
|
end
|
data/lib/rexml/element.rb
CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
|
|
7
7
|
require_relative "parseexception"
|
8
8
|
|
9
9
|
module REXML
|
10
|
-
# An implementation note about namespaces:
|
11
|
-
# As we parse, when we find namespaces we put them in a hash and assign
|
12
|
-
# them a unique ID. We then convert the namespace prefix for the node
|
13
|
-
# to the unique ID. This makes namespace lookup much faster for the
|
14
|
-
# cost of extra memory use. We save the namespace prefix for the
|
15
|
-
# context node and convert it back when we write it.
|
16
|
-
@@namespaces = {}
|
17
|
-
|
18
10
|
# An \REXML::Element object represents an XML element.
|
19
11
|
#
|
20
12
|
# An element:
|
@@ -449,9 +441,14 @@ module REXML
|
|
449
441
|
# Related: #root_node, #document.
|
450
442
|
#
|
451
443
|
def root
|
452
|
-
|
453
|
-
|
454
|
-
|
444
|
+
target = self
|
445
|
+
while target
|
446
|
+
return target.elements[1] if target.kind_of? Document
|
447
|
+
parent = target.parent
|
448
|
+
return target if parent.kind_of? Document or parent.nil?
|
449
|
+
target = parent
|
450
|
+
end
|
451
|
+
nil
|
455
452
|
end
|
456
453
|
|
457
454
|
# :call-seq:
|
@@ -627,8 +624,12 @@ module REXML
|
|
627
624
|
else
|
628
625
|
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
|
629
626
|
end
|
630
|
-
ns =
|
631
|
-
|
627
|
+
ns = nil
|
628
|
+
target = self
|
629
|
+
while ns.nil? and target
|
630
|
+
ns = target.attributes[prefix]
|
631
|
+
target = target.parent
|
632
|
+
end
|
632
633
|
ns = '' if ns.nil? and prefix == 'xmlns'
|
633
634
|
return ns
|
634
635
|
end
|
@@ -1284,16 +1285,11 @@ module REXML
|
|
1284
1285
|
# document.root.attribute("x", "a") # => a:x='a:x'
|
1285
1286
|
#
|
1286
1287
|
def attribute( name, namespace=nil )
|
1287
|
-
prefix =
|
1288
|
-
if namespaces.respond_to? :key
|
1289
|
-
prefix = namespaces.key(namespace) if namespace
|
1290
|
-
else
|
1291
|
-
prefix = namespaces.index(namespace) if namespace
|
1292
|
-
end
|
1288
|
+
prefix = namespaces.key(namespace) if namespace
|
1293
1289
|
prefix = nil if prefix == 'xmlns'
|
1294
1290
|
|
1295
1291
|
ret_val =
|
1296
|
-
attributes.get_attribute(
|
1292
|
+
attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
1297
1293
|
|
1298
1294
|
return ret_val unless ret_val.nil?
|
1299
1295
|
return nil if prefix.nil?
|
@@ -2388,17 +2384,6 @@ module REXML
|
|
2388
2384
|
elsif old_attr.kind_of? Hash
|
2389
2385
|
old_attr[value.prefix] = value
|
2390
2386
|
elsif old_attr.prefix != value.prefix
|
2391
|
-
# Check for conflicting namespaces
|
2392
|
-
if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
|
2393
|
-
old_namespace = old_attr.namespace
|
2394
|
-
new_namespace = value.namespace
|
2395
|
-
if old_namespace == new_namespace
|
2396
|
-
raise ParseException.new(
|
2397
|
-
"Namespace conflict in adding attribute \"#{value.name}\": "+
|
2398
|
-
"Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
|
2399
|
-
"prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
|
2400
|
-
end
|
2401
|
-
end
|
2402
2387
|
store value.name, {old_attr.prefix => old_attr,
|
2403
2388
|
value.prefix => value}
|
2404
2389
|
else
|
data/lib/rexml/entity.rb
CHANGED
@@ -12,6 +12,7 @@ module REXML
|
|
12
12
|
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
13
13
|
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
14
14
|
PEREFERENCE = "%#{NAME};"
|
15
|
+
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
15
16
|
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
16
17
|
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
17
18
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
@@ -19,7 +20,7 @@ module REXML
|
|
19
20
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
20
21
|
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
21
22
|
|
22
|
-
attr_reader :name, :external, :ref, :ndata, :pubid
|
23
|
+
attr_reader :name, :external, :ref, :ndata, :pubid, :value
|
23
24
|
|
24
25
|
# Create a new entity. Simple entities can be constructed by passing a
|
25
26
|
# name, value to the constructor; this creates a generic, plain entity
|
@@ -68,14 +69,14 @@ module REXML
|
|
68
69
|
end
|
69
70
|
|
70
71
|
# Evaluates to the unnormalized value of this entity; that is, replacing
|
71
|
-
#
|
72
|
-
# +value()+ in that +value+ only replaces %ent; entities.
|
72
|
+
# &ent; entities.
|
73
73
|
def unnormalized
|
74
|
-
document
|
75
|
-
|
76
|
-
return nil if
|
77
|
-
|
78
|
-
@unnormalized
|
74
|
+
document&.record_entity_expansion
|
75
|
+
|
76
|
+
return nil if @value.nil?
|
77
|
+
|
78
|
+
@unnormalized = Text::unnormalize(@value, parent,
|
79
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
79
80
|
end
|
80
81
|
|
81
82
|
#once :unnormalized
|
@@ -121,46 +122,6 @@ module REXML
|
|
121
122
|
write rv
|
122
123
|
rv
|
123
124
|
end
|
124
|
-
|
125
|
-
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
126
|
-
# Returns the value of this entity. At the moment, only internal entities
|
127
|
-
# are processed. If the value contains internal references (IE,
|
128
|
-
# %blah;), those are replaced with their values. IE, if the doctype
|
129
|
-
# contains:
|
130
|
-
# <!ENTITY % foo "bar">
|
131
|
-
# <!ENTITY yada "nanoo %foo; nanoo>
|
132
|
-
# then:
|
133
|
-
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
134
|
-
def value
|
135
|
-
@resolved_value ||= resolve_value
|
136
|
-
end
|
137
|
-
|
138
|
-
def parent=(other)
|
139
|
-
@resolved_value = nil
|
140
|
-
super
|
141
|
-
end
|
142
|
-
|
143
|
-
private
|
144
|
-
def resolve_value
|
145
|
-
return nil if @value.nil?
|
146
|
-
return @value unless @value.match?(PEREFERENCE_RE)
|
147
|
-
|
148
|
-
matches = @value.scan(PEREFERENCE_RE)
|
149
|
-
rv = @value.clone
|
150
|
-
if @parent
|
151
|
-
sum = 0
|
152
|
-
matches.each do |entity_reference|
|
153
|
-
entity_value = @parent.entity( entity_reference[0] )
|
154
|
-
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
155
|
-
raise "entity expansion has grown too large"
|
156
|
-
else
|
157
|
-
sum += entity_value.bytesize
|
158
|
-
end
|
159
|
-
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
160
|
-
end
|
161
|
-
end
|
162
|
-
rv
|
163
|
-
end
|
164
125
|
end
|
165
126
|
|
166
127
|
# This is a set of entity constants -- the ones defined in the XML
|
@@ -111,7 +111,7 @@ module REXML
|
|
111
111
|
# itself, then we don't need a carriage return... which makes this
|
112
112
|
# logic more complex.
|
113
113
|
node.children.each { |child|
|
114
|
-
next if child
|
114
|
+
next if child.instance_of?(Text)
|
115
115
|
unless child == node.children[0] or child.instance_of?(Text) or
|
116
116
|
(child == node.children[1] and !node.children[0].writethis)
|
117
117
|
output << "\n"
|
data/lib/rexml/functions.rb
CHANGED
@@ -262,11 +262,10 @@ module REXML
|
|
262
262
|
string(string).length
|
263
263
|
end
|
264
264
|
|
265
|
-
# UNTESTED
|
266
265
|
def Functions::normalize_space( string=nil )
|
267
266
|
string = string(@@context[:node]) if string.nil?
|
268
267
|
if string.kind_of? Array
|
269
|
-
string.collect{|x|
|
268
|
+
string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
|
270
269
|
else
|
271
270
|
string.to_s.strip.gsub(/\s+/um, ' ')
|
272
271
|
end
|
data/lib/rexml/node.rb
CHANGED
@@ -52,10 +52,14 @@ module REXML
|
|
52
52
|
|
53
53
|
# Visit all subnodes of +self+ recursively
|
54
54
|
def each_recursive(&block) # :yields: node
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
stack = []
|
56
|
+
each { |child| stack.unshift child if child.node_type == :element }
|
57
|
+
until stack.empty?
|
58
|
+
child = stack.pop
|
59
|
+
yield child
|
60
|
+
n = stack.size
|
61
|
+
child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
|
62
|
+
end
|
59
63
|
end
|
60
64
|
|
61
65
|
# Find (and return) first subnode (recursively) for which the block
|
data/lib/rexml/parseexception.rb
CHANGED