rexml 3.2.6 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +423 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +466 -273
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +185 -100
- data/lib/rexml/text.rb +54 -57
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +6 -50
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
|
4
|
+
data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
|
7
|
+
data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
|
data/NEWS.md
CHANGED
@@ -1,5 +1,428 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.4.1 - 2025-02-16 {#version-3-4-1}
|
4
|
+
|
5
|
+
### Improvement
|
6
|
+
|
7
|
+
* Improved performance.
|
8
|
+
* GH-226
|
9
|
+
* GH-227
|
10
|
+
* GH-237
|
11
|
+
* Patch by NAITOH Jun
|
12
|
+
|
13
|
+
### Fixes
|
14
|
+
|
15
|
+
* Fix serialization of ATTLIST is incorrect
|
16
|
+
* GH-233
|
17
|
+
* GH-234
|
18
|
+
* Patch by OlofKalufs
|
19
|
+
* Reported by OlofKalufs
|
20
|
+
|
21
|
+
### Thanks
|
22
|
+
|
23
|
+
* NAITOH Jun
|
24
|
+
|
25
|
+
* OlofKalufs
|
26
|
+
|
27
|
+
## 3.4.0 - 2024-12-15 {#version-3-4-0}
|
28
|
+
|
29
|
+
### Improvement
|
30
|
+
|
31
|
+
* Improved performance.
|
32
|
+
* GH-216
|
33
|
+
* Patch by NAITOH Jun
|
34
|
+
|
35
|
+
* JRuby: Improved parse performance.
|
36
|
+
* GH-219
|
37
|
+
* Patch by João Duarte
|
38
|
+
|
39
|
+
* Added support for reusing pull parser.
|
40
|
+
* GH-214
|
41
|
+
* GH-220
|
42
|
+
* Patch by Dmitry Pogrebnoy
|
43
|
+
|
44
|
+
* Improved error handling when source is `IO`.
|
45
|
+
* GH-221
|
46
|
+
* Patch by NAITOH Jun
|
47
|
+
|
48
|
+
### Thanks
|
49
|
+
|
50
|
+
* NAITOH Jun
|
51
|
+
|
52
|
+
* João Duarte
|
53
|
+
|
54
|
+
* Dmitry Pogrebnoy
|
55
|
+
|
56
|
+
## 3.3.9 - 2024-10-24 {#version-3-3-9}
|
57
|
+
|
58
|
+
### Improvements
|
59
|
+
|
60
|
+
* Improved performance.
|
61
|
+
* GH-210
|
62
|
+
* Patch by NAITOH Jun.
|
63
|
+
|
64
|
+
### Fixes
|
65
|
+
|
66
|
+
* Fixed a parse bug for text only invalid XML.
|
67
|
+
* GH-215
|
68
|
+
* Patch by NAITOH Jun.
|
69
|
+
|
70
|
+
* Fixed a parse bug that `�x...;` is accepted as a character
|
71
|
+
reference.
|
72
|
+
|
73
|
+
### Thanks
|
74
|
+
|
75
|
+
* NAITOH Jun
|
76
|
+
|
77
|
+
## 3.3.8 - 2024-09-29 {#version-3-3-8}
|
78
|
+
|
79
|
+
### Improvements
|
80
|
+
|
81
|
+
* SAX2: Improve parse performance.
|
82
|
+
* GH-207
|
83
|
+
* Patch by NAITOH Jun.
|
84
|
+
|
85
|
+
### Fixes
|
86
|
+
|
87
|
+
* Fixed a bug that unexpected attribute namespace conflict error for
|
88
|
+
the predefined "xml" namespace is reported.
|
89
|
+
* GH-208
|
90
|
+
* Patch by KITAITI Makoto
|
91
|
+
|
92
|
+
### Thanks
|
93
|
+
|
94
|
+
* NAITOH Jun
|
95
|
+
|
96
|
+
* KITAITI Makoto
|
97
|
+
|
98
|
+
## 3.3.7 - 2024-09-04 {#version-3-3-7}
|
99
|
+
|
100
|
+
### Improvements
|
101
|
+
|
102
|
+
* Added local entity expansion limit methods
|
103
|
+
* GH-192
|
104
|
+
* GH-202
|
105
|
+
* Reported by takuya kodama.
|
106
|
+
* Patch by NAITOH Jun.
|
107
|
+
|
108
|
+
* Removed explicit strscan dependency
|
109
|
+
* GH-204
|
110
|
+
* Patch by Bo Anderson.
|
111
|
+
|
112
|
+
### Thanks
|
113
|
+
|
114
|
+
* takuya kodama
|
115
|
+
|
116
|
+
* NAITOH Jun
|
117
|
+
|
118
|
+
* Bo Anderson
|
119
|
+
|
120
|
+
## 3.3.6 - 2024-08-22 {#version-3-3-6}
|
121
|
+
|
122
|
+
### Improvements
|
123
|
+
|
124
|
+
* Removed duplicated entity expansions for performance.
|
125
|
+
* GH-194
|
126
|
+
* Patch by Viktor Ivarsson.
|
127
|
+
|
128
|
+
* Improved namespace conflicted attribute check performance. It was
|
129
|
+
too slow for deep elements.
|
130
|
+
* Reported by l33thaxor.
|
131
|
+
|
132
|
+
### Fixes
|
133
|
+
|
134
|
+
* Fixed a bug that default entity expansions are counted for
|
135
|
+
security check. Default entity expansions should not be counted
|
136
|
+
because they don't have a security risk.
|
137
|
+
* GH-198
|
138
|
+
* GH-199
|
139
|
+
* Patch Viktor Ivarsson
|
140
|
+
|
141
|
+
* Fixed a parser bug that parameter entity references in internal
|
142
|
+
subsets are expanded. It's not allowed in the XML specification.
|
143
|
+
* GH-191
|
144
|
+
* Patch by NAITOH Jun.
|
145
|
+
|
146
|
+
* Fixed a stream parser bug that user-defined entity references in
|
147
|
+
text aren't expanded.
|
148
|
+
* GH-200
|
149
|
+
* Patch by NAITOH Jun.
|
150
|
+
|
151
|
+
### Thanks
|
152
|
+
|
153
|
+
* Viktor Ivarsson
|
154
|
+
|
155
|
+
* NAITOH Jun
|
156
|
+
|
157
|
+
* l33thaxor
|
158
|
+
|
159
|
+
## 3.3.5 - 2024-08-12 {#version-3-3-5}
|
160
|
+
|
161
|
+
### Fixes
|
162
|
+
|
163
|
+
* Fixed a bug that `REXML::Security.entity_expansion_text_limit`
|
164
|
+
check has wrong text size calculation in SAX and pull parsers.
|
165
|
+
* GH-193
|
166
|
+
* GH-195
|
167
|
+
* Reported by Viktor Ivarsson.
|
168
|
+
* Patch by NAITOH Jun.
|
169
|
+
|
170
|
+
### Thanks
|
171
|
+
|
172
|
+
* Viktor Ivarsson
|
173
|
+
|
174
|
+
* NAITOH Jun
|
175
|
+
|
176
|
+
## 3.3.4 - 2024-08-01 {#version-3-3-4}
|
177
|
+
|
178
|
+
### Fixes
|
179
|
+
|
180
|
+
* Fixed a bug that `REXML::Security` isn't defined when
|
181
|
+
`REXML::Parsers::StreamParser` is used and
|
182
|
+
`rexml/parsers/streamparser` is only required.
|
183
|
+
* GH-189
|
184
|
+
* Patch by takuya kodama.
|
185
|
+
|
186
|
+
### Thanks
|
187
|
+
|
188
|
+
* takuya kodama
|
189
|
+
|
190
|
+
## 3.3.3 - 2024-08-01 {#version-3-3-3}
|
191
|
+
|
192
|
+
### Improvements
|
193
|
+
|
194
|
+
* Added support for detecting invalid XML that has unsupported
|
195
|
+
content before root element
|
196
|
+
* GH-184
|
197
|
+
* Patch by NAITOH Jun.
|
198
|
+
|
199
|
+
* Added support for `REXML::Security.entity_expansion_limit=` and
|
200
|
+
`REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
|
201
|
+
parsers
|
202
|
+
* GH-187
|
203
|
+
* Patch by NAITOH Jun.
|
204
|
+
|
205
|
+
* Added more tests for invalid XMLs.
|
206
|
+
* GH-183
|
207
|
+
* Patch by Watson.
|
208
|
+
|
209
|
+
* Added more performance tests.
|
210
|
+
* Patch by Watson.
|
211
|
+
|
212
|
+
* Improved parse performance.
|
213
|
+
* GH-186
|
214
|
+
* Patch by tomoya ishida.
|
215
|
+
|
216
|
+
### Thanks
|
217
|
+
|
218
|
+
* NAITOH Jun
|
219
|
+
|
220
|
+
* Watson
|
221
|
+
|
222
|
+
* tomoya ishida
|
223
|
+
|
224
|
+
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
225
|
+
|
226
|
+
### Improvements
|
227
|
+
|
228
|
+
* Improved parse performance.
|
229
|
+
* GH-160
|
230
|
+
* Patch by NAITOH Jun.
|
231
|
+
|
232
|
+
* Improved parse performance.
|
233
|
+
* GH-169
|
234
|
+
* GH-170
|
235
|
+
* GH-171
|
236
|
+
* GH-172
|
237
|
+
* GH-173
|
238
|
+
* GH-174
|
239
|
+
* GH-175
|
240
|
+
* GH-176
|
241
|
+
* GH-177
|
242
|
+
* Patch by Watson.
|
243
|
+
|
244
|
+
* Added support for raising a parse exception when an XML has extra
|
245
|
+
content after the root element.
|
246
|
+
* GH-161
|
247
|
+
* Patch by NAITOH Jun.
|
248
|
+
|
249
|
+
* Added support for raising a parse exception when an XML
|
250
|
+
declaration exists in wrong position.
|
251
|
+
* GH-162
|
252
|
+
* Patch by NAITOH Jun.
|
253
|
+
|
254
|
+
* Removed needless a space after XML declaration in pretty print mode.
|
255
|
+
* GH-164
|
256
|
+
* Patch by NAITOH Jun.
|
257
|
+
|
258
|
+
* Stopped to emit `:text` event after the root element.
|
259
|
+
* GH-167
|
260
|
+
* Patch by NAITOH Jun.
|
261
|
+
|
262
|
+
### Fixes
|
263
|
+
|
264
|
+
* Fixed a bug that SAX2 parser doesn't expand predefined entities for
|
265
|
+
`characters` callback.
|
266
|
+
* GH-168
|
267
|
+
* Patch by NAITOH Jun.
|
268
|
+
|
269
|
+
### Thanks
|
270
|
+
|
271
|
+
* NAITOH Jun
|
272
|
+
|
273
|
+
* Watson
|
274
|
+
|
275
|
+
## 3.3.1 - 2024-06-25 {#version-3-3-1}
|
276
|
+
|
277
|
+
### Improvements
|
278
|
+
|
279
|
+
* Added support for detecting malformed top-level comments.
|
280
|
+
* GH-145
|
281
|
+
* Patch by Hiroya Fujinami.
|
282
|
+
|
283
|
+
* Improved `REXML::Element#attribute` performance.
|
284
|
+
* GH-146
|
285
|
+
* Patch by Hiroya Fujinami.
|
286
|
+
|
287
|
+
* Added support for detecting malformed `<!-->` comments.
|
288
|
+
* GH-147
|
289
|
+
* Patch by Hiroya Fujinami.
|
290
|
+
|
291
|
+
* Added support for detecting unclosed `DOCTYPE`.
|
292
|
+
* GH-152
|
293
|
+
* Patch by Hiroya Fujinami.
|
294
|
+
|
295
|
+
* Added `changlog_uri` metadata to gemspec.
|
296
|
+
* GH-156
|
297
|
+
* Patch by fynsta.
|
298
|
+
|
299
|
+
* Improved parse performance.
|
300
|
+
* GH-157
|
301
|
+
* GH-158
|
302
|
+
* Patch by NAITOH Jun.
|
303
|
+
|
304
|
+
### Fixes
|
305
|
+
|
306
|
+
* Fixed a bug that large XML can't be parsed.
|
307
|
+
* GH-154
|
308
|
+
* Patch by NAITOH Jun.
|
309
|
+
|
310
|
+
* Fixed a bug that private constants are visible.
|
311
|
+
* GH-155
|
312
|
+
* Patch by NAITOH Jun.
|
313
|
+
|
314
|
+
### Thanks
|
315
|
+
|
316
|
+
* Hiroya Fujinami
|
317
|
+
|
318
|
+
* NAITOH Jun
|
319
|
+
|
320
|
+
* fynsta
|
321
|
+
|
322
|
+
## 3.3.0 - 2024-06-11 {#version-3-3-0}
|
323
|
+
|
324
|
+
### Improvements
|
325
|
+
|
326
|
+
* Added support for strscan 0.7.0 installed with Ruby 2.6.
|
327
|
+
* GH-142
|
328
|
+
* Reported by Fernando Trigoso.
|
329
|
+
|
330
|
+
### Thanks
|
331
|
+
|
332
|
+
* Fernando Trigoso
|
333
|
+
|
334
|
+
## 3.2.9 - 2024-06-09 {#version-3-2-9}
|
335
|
+
|
336
|
+
### Improvements
|
337
|
+
|
338
|
+
* Added support for old strscan.
|
339
|
+
* GH-132
|
340
|
+
* Reported by Adam.
|
341
|
+
|
342
|
+
* Improved attribute value parse performance.
|
343
|
+
* GH-135
|
344
|
+
* Patch by NAITOH Jun.
|
345
|
+
|
346
|
+
* Improved `REXML::Node#each_recursive` performance.
|
347
|
+
* GH-134
|
348
|
+
* GH-139
|
349
|
+
* Patch by Hiroya Fujinami.
|
350
|
+
|
351
|
+
* Improved text parse performance.
|
352
|
+
* Reported by mprogrammer.
|
353
|
+
|
354
|
+
### Thanks
|
355
|
+
|
356
|
+
* Adam
|
357
|
+
* NAITOH Jun
|
358
|
+
* Hiroya Fujinami
|
359
|
+
* mprogrammer
|
360
|
+
|
361
|
+
## 3.2.8 - 2024-05-16 {#version-3-2-8}
|
362
|
+
|
363
|
+
### Fixes
|
364
|
+
|
365
|
+
* Suppressed a warning
|
366
|
+
|
367
|
+
## 3.2.7 - 2024-05-16 {#version-3-2-7}
|
368
|
+
|
369
|
+
### Improvements
|
370
|
+
|
371
|
+
* Improve parse performance by using `StringScanner`.
|
372
|
+
|
373
|
+
* GH-106
|
374
|
+
* GH-107
|
375
|
+
* GH-108
|
376
|
+
* GH-109
|
377
|
+
* GH-112
|
378
|
+
* GH-113
|
379
|
+
* GH-114
|
380
|
+
* GH-115
|
381
|
+
* GH-116
|
382
|
+
* GH-117
|
383
|
+
* GH-118
|
384
|
+
* GH-119
|
385
|
+
* GH-121
|
386
|
+
|
387
|
+
* Patch by NAITOH Jun.
|
388
|
+
|
389
|
+
* Improved parse performance when an attribute has many `<`s.
|
390
|
+
|
391
|
+
* GH-126
|
392
|
+
|
393
|
+
### Fixes
|
394
|
+
|
395
|
+
* XPath: Fixed a bug of `normalize_space(array)`.
|
396
|
+
|
397
|
+
* GH-110
|
398
|
+
* GH-111
|
399
|
+
|
400
|
+
* Patch by flatisland.
|
401
|
+
|
402
|
+
* XPath: Fixed a bug that wrong position is used with nested path.
|
403
|
+
|
404
|
+
* GH-110
|
405
|
+
* GH-122
|
406
|
+
|
407
|
+
* Reported by jcavalieri.
|
408
|
+
* Patch by NAITOH Jun.
|
409
|
+
|
410
|
+
* Fixed a bug that an exception message can't be generated for
|
411
|
+
invalid encoding XML.
|
412
|
+
|
413
|
+
* GH-29
|
414
|
+
* GH-123
|
415
|
+
|
416
|
+
* Reported by DuKewu.
|
417
|
+
* Patch by NAITOH Jun.
|
418
|
+
|
419
|
+
### Thanks
|
420
|
+
|
421
|
+
* NAITOH Jun
|
422
|
+
* flatisland
|
423
|
+
* jcavalieri
|
424
|
+
* DuKewu
|
425
|
+
|
3
426
|
## 3.2.6 - 2023-07-27 {#version-3-2-6}
|
4
427
|
|
5
428
|
### Improvements
|
data/lib/rexml/attribute.rb
CHANGED
@@ -148,8 +148,9 @@ module REXML
|
|
148
148
|
# have been expanded to their values
|
149
149
|
def value
|
150
150
|
return @unnormalized if @unnormalized
|
151
|
-
|
152
|
-
@unnormalized
|
151
|
+
|
152
|
+
@unnormalized = Text::unnormalize(@normalized, doctype,
|
153
|
+
entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
|
153
154
|
end
|
154
155
|
|
155
156
|
# The normalized value of this attribute. That is, the attribute with
|
data/lib/rexml/document.rb
CHANGED
@@ -91,6 +91,8 @@ module REXML
|
|
91
91
|
#
|
92
92
|
def initialize( source = nil, context = {} )
|
93
93
|
@entity_expansion_count = 0
|
94
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
95
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
94
96
|
super()
|
95
97
|
@context = context
|
96
98
|
return if source.nil?
|
@@ -431,10 +433,12 @@ module REXML
|
|
431
433
|
end
|
432
434
|
|
433
435
|
attr_reader :entity_expansion_count
|
436
|
+
attr_writer :entity_expansion_limit
|
437
|
+
attr_accessor :entity_expansion_text_limit
|
434
438
|
|
435
439
|
def record_entity_expansion
|
436
440
|
@entity_expansion_count += 1
|
437
|
-
if @entity_expansion_count >
|
441
|
+
if @entity_expansion_count > @entity_expansion_limit
|
438
442
|
raise "number of entity expansions exceeded, processing aborted."
|
439
443
|
end
|
440
444
|
end
|
data/lib/rexml/element.rb
CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
|
|
7
7
|
require_relative "parseexception"
|
8
8
|
|
9
9
|
module REXML
|
10
|
-
# An implementation note about namespaces:
|
11
|
-
# As we parse, when we find namespaces we put them in a hash and assign
|
12
|
-
# them a unique ID. We then convert the namespace prefix for the node
|
13
|
-
# to the unique ID. This makes namespace lookup much faster for the
|
14
|
-
# cost of extra memory use. We save the namespace prefix for the
|
15
|
-
# context node and convert it back when we write it.
|
16
|
-
@@namespaces = {}
|
17
|
-
|
18
10
|
# An \REXML::Element object represents an XML element.
|
19
11
|
#
|
20
12
|
# An element:
|
@@ -449,9 +441,14 @@ module REXML
|
|
449
441
|
# Related: #root_node, #document.
|
450
442
|
#
|
451
443
|
def root
|
452
|
-
|
453
|
-
|
454
|
-
|
444
|
+
target = self
|
445
|
+
while target
|
446
|
+
return target.elements[1] if target.kind_of? Document
|
447
|
+
parent = target.parent
|
448
|
+
return target if parent.kind_of? Document or parent.nil?
|
449
|
+
target = parent
|
450
|
+
end
|
451
|
+
nil
|
455
452
|
end
|
456
453
|
|
457
454
|
# :call-seq:
|
@@ -627,8 +624,12 @@ module REXML
|
|
627
624
|
else
|
628
625
|
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
|
629
626
|
end
|
630
|
-
ns =
|
631
|
-
|
627
|
+
ns = nil
|
628
|
+
target = self
|
629
|
+
while ns.nil? and target
|
630
|
+
ns = target.attributes[prefix]
|
631
|
+
target = target.parent
|
632
|
+
end
|
632
633
|
ns = '' if ns.nil? and prefix == 'xmlns'
|
633
634
|
return ns
|
634
635
|
end
|
@@ -1284,16 +1285,11 @@ module REXML
|
|
1284
1285
|
# document.root.attribute("x", "a") # => a:x='a:x'
|
1285
1286
|
#
|
1286
1287
|
def attribute( name, namespace=nil )
|
1287
|
-
prefix =
|
1288
|
-
if namespaces.respond_to? :key
|
1289
|
-
prefix = namespaces.key(namespace) if namespace
|
1290
|
-
else
|
1291
|
-
prefix = namespaces.index(namespace) if namespace
|
1292
|
-
end
|
1288
|
+
prefix = namespaces.key(namespace) if namespace
|
1293
1289
|
prefix = nil if prefix == 'xmlns'
|
1294
1290
|
|
1295
1291
|
ret_val =
|
1296
|
-
attributes.get_attribute(
|
1292
|
+
attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
1297
1293
|
|
1298
1294
|
return ret_val unless ret_val.nil?
|
1299
1295
|
return nil if prefix.nil?
|
@@ -2388,17 +2384,6 @@ module REXML
|
|
2388
2384
|
elsif old_attr.kind_of? Hash
|
2389
2385
|
old_attr[value.prefix] = value
|
2390
2386
|
elsif old_attr.prefix != value.prefix
|
2391
|
-
# Check for conflicting namespaces
|
2392
|
-
if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
|
2393
|
-
old_namespace = old_attr.namespace
|
2394
|
-
new_namespace = value.namespace
|
2395
|
-
if old_namespace == new_namespace
|
2396
|
-
raise ParseException.new(
|
2397
|
-
"Namespace conflict in adding attribute \"#{value.name}\": "+
|
2398
|
-
"Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
|
2399
|
-
"prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
|
2400
|
-
end
|
2401
|
-
end
|
2402
2387
|
store value.name, {old_attr.prefix => old_attr,
|
2403
2388
|
value.prefix => value}
|
2404
2389
|
else
|
data/lib/rexml/entity.rb
CHANGED
@@ -12,6 +12,7 @@ module REXML
|
|
12
12
|
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
13
13
|
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
14
14
|
PEREFERENCE = "%#{NAME};"
|
15
|
+
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
15
16
|
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
16
17
|
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
17
18
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
@@ -19,7 +20,7 @@ module REXML
|
|
19
20
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
20
21
|
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
21
22
|
|
22
|
-
attr_reader :name, :external, :ref, :ndata, :pubid
|
23
|
+
attr_reader :name, :external, :ref, :ndata, :pubid, :value
|
23
24
|
|
24
25
|
# Create a new entity. Simple entities can be constructed by passing a
|
25
26
|
# name, value to the constructor; this creates a generic, plain entity
|
@@ -68,14 +69,14 @@ module REXML
|
|
68
69
|
end
|
69
70
|
|
70
71
|
# Evaluates to the unnormalized value of this entity; that is, replacing
|
71
|
-
#
|
72
|
-
# +value()+ in that +value+ only replaces %ent; entities.
|
72
|
+
# &ent; entities.
|
73
73
|
def unnormalized
|
74
|
-
document
|
75
|
-
|
76
|
-
return nil if
|
77
|
-
|
78
|
-
@unnormalized
|
74
|
+
document&.record_entity_expansion
|
75
|
+
|
76
|
+
return nil if @value.nil?
|
77
|
+
|
78
|
+
@unnormalized = Text::unnormalize(@value, parent,
|
79
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
79
80
|
end
|
80
81
|
|
81
82
|
#once :unnormalized
|
@@ -121,46 +122,6 @@ module REXML
|
|
121
122
|
write rv
|
122
123
|
rv
|
123
124
|
end
|
124
|
-
|
125
|
-
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
126
|
-
# Returns the value of this entity. At the moment, only internal entities
|
127
|
-
# are processed. If the value contains internal references (IE,
|
128
|
-
# %blah;), those are replaced with their values. IE, if the doctype
|
129
|
-
# contains:
|
130
|
-
# <!ENTITY % foo "bar">
|
131
|
-
# <!ENTITY yada "nanoo %foo; nanoo>
|
132
|
-
# then:
|
133
|
-
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
134
|
-
def value
|
135
|
-
@resolved_value ||= resolve_value
|
136
|
-
end
|
137
|
-
|
138
|
-
def parent=(other)
|
139
|
-
@resolved_value = nil
|
140
|
-
super
|
141
|
-
end
|
142
|
-
|
143
|
-
private
|
144
|
-
def resolve_value
|
145
|
-
return nil if @value.nil?
|
146
|
-
return @value unless @value.match?(PEREFERENCE_RE)
|
147
|
-
|
148
|
-
matches = @value.scan(PEREFERENCE_RE)
|
149
|
-
rv = @value.clone
|
150
|
-
if @parent
|
151
|
-
sum = 0
|
152
|
-
matches.each do |entity_reference|
|
153
|
-
entity_value = @parent.entity( entity_reference[0] )
|
154
|
-
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
155
|
-
raise "entity expansion has grown too large"
|
156
|
-
else
|
157
|
-
sum += entity_value.bytesize
|
158
|
-
end
|
159
|
-
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
160
|
-
end
|
161
|
-
end
|
162
|
-
rv
|
163
|
-
end
|
164
125
|
end
|
165
126
|
|
166
127
|
# This is a set of entity constants -- the ones defined in the XML
|
@@ -111,7 +111,7 @@ module REXML
|
|
111
111
|
# itself, then we don't need a carriage return... which makes this
|
112
112
|
# logic more complex.
|
113
113
|
node.children.each { |child|
|
114
|
-
next if child
|
114
|
+
next if child.instance_of?(Text)
|
115
115
|
unless child == node.children[0] or child.instance_of?(Text) or
|
116
116
|
(child == node.children[1] and !node.children[0].writethis)
|
117
117
|
output << "\n"
|
data/lib/rexml/functions.rb
CHANGED
@@ -262,11 +262,10 @@ module REXML
|
|
262
262
|
string(string).length
|
263
263
|
end
|
264
264
|
|
265
|
-
# UNTESTED
|
266
265
|
def Functions::normalize_space( string=nil )
|
267
266
|
string = string(@@context[:node]) if string.nil?
|
268
267
|
if string.kind_of? Array
|
269
|
-
string.collect{|x|
|
268
|
+
string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
|
270
269
|
else
|
271
270
|
string.to_s.strip.gsub(/\s+/um, ' ')
|
272
271
|
end
|
data/lib/rexml/node.rb
CHANGED
@@ -52,10 +52,14 @@ module REXML
|
|
52
52
|
|
53
53
|
# Visit all subnodes of +self+ recursively
|
54
54
|
def each_recursive(&block) # :yields: node
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
stack = []
|
56
|
+
each { |child| stack.unshift child if child.node_type == :element }
|
57
|
+
until stack.empty?
|
58
|
+
child = stack.pop
|
59
|
+
yield child
|
60
|
+
n = stack.size
|
61
|
+
child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
|
62
|
+
end
|
59
63
|
end
|
60
64
|
|
61
65
|
# Find (and return) first subnode (recursively) for which the block
|
data/lib/rexml/parseexception.rb
CHANGED