magic-xml 0.2013.04.14 → 0.2016.05.07
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -7
- data/lib/magic_xml.rb +1218 -1228
- metadata +50 -33
- data/test.xml +0 -1
- data/tests.rb +0 -836
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
5
|
-
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0436ef9cb1270e202381dec66b3fa5f895ac5b6f
|
4
|
+
data.tar.gz: 211561626d9196c738ee1fc5c6103101c9521082
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f0f4df9bebae54afcc59f60658f2d9914ef71d2326344dbd5c98a7b0ad8f5269966e38cf8dadef229c26c412a6f53674bfd5a307f762f25bae7efcace992a9ef
|
7
|
+
data.tar.gz: acf49469ff1336c11624a0a6ce2a12f1b39f9414109965064fdcd62a2115f8ec0d84e12a8b3b2b107f3a9912ebba1235ef824a4da1dcbd6091838ea730e63f4e
|
data/lib/magic_xml.rb
CHANGED
@@ -6,359 +6,351 @@ require 'net/http'
|
|
6
6
|
|
7
7
|
# FIXME: Make comment formatting RDoc-friendly. It's not always so now.
|
8
8
|
|
9
|
-
# In Ruby 2 Symbol will be a subclass of String, and
|
10
|
-
# this won't be needed any more. Before then...
|
11
9
|
class Symbol
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
alias_method :eqeqeq_before_magic_xml, :===
|
19
|
-
def ===(*args, &blk)
|
20
|
-
if args.size >= 1 and args[0].is_a? XML
|
21
|
-
self == args[0].name
|
22
|
-
else
|
23
|
-
eqeqeq_before_magic_xml(*args, &blk)
|
24
|
-
end
|
10
|
+
alias_method :eqeqeq_before_magic_xml, :===
|
11
|
+
def ===(*args, &blk)
|
12
|
+
if args.size >= 1 and args[0].is_a? XML
|
13
|
+
self == args[0].name
|
14
|
+
else
|
15
|
+
eqeqeq_before_magic_xml(*args, &blk)
|
25
16
|
end
|
17
|
+
end
|
26
18
|
end
|
27
19
|
|
28
20
|
class Hash
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
end
|
21
|
+
alias_method :eqeqeq_before_magic_xml, :===
|
22
|
+
def ===(*args, &blk)
|
23
|
+
if args.size >= 1 and args[0].is_a? XML
|
24
|
+
all?{|k,v| v === args[0][k]}
|
25
|
+
else
|
26
|
+
eqeqeq_before_magic_xml(*args, &blk)
|
36
27
|
end
|
28
|
+
end
|
37
29
|
end
|
38
30
|
|
39
31
|
class String
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
32
|
+
# Escape string for output as XML text (< > &)
|
33
|
+
def xml_escape
|
34
|
+
replacements = {"<" => "<", ">" => ">", "&" => "&" }
|
35
|
+
gsub(/([<>&])/) { replacements[$1] }
|
36
|
+
end
|
37
|
+
# Escape characters for output as XML attribute values (< > & ' ")
|
38
|
+
def xml_attr_escape
|
39
|
+
replacements = {"<" => "<", ">" => ">", "&" => "&", "\"" => """, "'" => "'"}
|
40
|
+
gsub(/([<>&\'\"])/) { replacements[$1] }
|
41
|
+
end
|
42
|
+
# Unescape entities
|
43
|
+
# Supports:
|
44
|
+
# * Full set of HTML-compatible named entities
|
45
|
+
# * Decimal entities Ӓ
|
46
|
+
# * Hex entities ꂱ
|
47
|
+
def xml_unescape(extra_entities=nil)
|
48
|
+
@@xhtml_entity_replacements ||= {
|
49
|
+
'nbsp' => 160,
|
50
|
+
'iexcl' => 161,
|
51
|
+
'cent' => 162,
|
52
|
+
'pound' => 163,
|
53
|
+
'curren' => 164,
|
54
|
+
'yen' => 165,
|
55
|
+
'brvbar' => 166,
|
56
|
+
'sect' => 167,
|
57
|
+
'uml' => 168,
|
58
|
+
'copy' => 169,
|
59
|
+
'ordf' => 170,
|
60
|
+
'laquo' => 171,
|
61
|
+
'not' => 172,
|
62
|
+
'shy' => 173,
|
63
|
+
'reg' => 174,
|
64
|
+
'macr' => 175,
|
65
|
+
'deg' => 176,
|
66
|
+
'plusmn' => 177,
|
67
|
+
'sup2' => 178,
|
68
|
+
'sup3' => 179,
|
69
|
+
'acute' => 180,
|
70
|
+
'micro' => 181,
|
71
|
+
'para' => 182,
|
72
|
+
'middot' => 183,
|
73
|
+
'cedil' => 184,
|
74
|
+
'sup1' => 185,
|
75
|
+
'ordm' => 186,
|
76
|
+
'raquo' => 187,
|
77
|
+
'frac14' => 188,
|
78
|
+
'frac12' => 189,
|
79
|
+
'frac34' => 190,
|
80
|
+
'iquest' => 191,
|
81
|
+
'Agrave' => 192,
|
82
|
+
'Aacute' => 193,
|
83
|
+
'Acirc' => 194,
|
84
|
+
'Atilde' => 195,
|
85
|
+
'Auml' => 196,
|
86
|
+
'Aring' => 197,
|
87
|
+
'AElig' => 198,
|
88
|
+
'Ccedil' => 199,
|
89
|
+
'Egrave' => 200,
|
90
|
+
'Eacute' => 201,
|
91
|
+
'Ecirc' => 202,
|
92
|
+
'Euml' => 203,
|
93
|
+
'Igrave' => 204,
|
94
|
+
'Iacute' => 205,
|
95
|
+
'Icirc' => 206,
|
96
|
+
'Iuml' => 207,
|
97
|
+
'ETH' => 208,
|
98
|
+
'Ntilde' => 209,
|
99
|
+
'Ograve' => 210,
|
100
|
+
'Oacute' => 211,
|
101
|
+
'Ocirc' => 212,
|
102
|
+
'Otilde' => 213,
|
103
|
+
'Ouml' => 214,
|
104
|
+
'times' => 215,
|
105
|
+
'Oslash' => 216,
|
106
|
+
'Ugrave' => 217,
|
107
|
+
'Uacute' => 218,
|
108
|
+
'Ucirc' => 219,
|
109
|
+
'Uuml' => 220,
|
110
|
+
'Yacute' => 221,
|
111
|
+
'THORN' => 222,
|
112
|
+
'szlig' => 223,
|
113
|
+
'agrave' => 224,
|
114
|
+
'aacute' => 225,
|
115
|
+
'acirc' => 226,
|
116
|
+
'atilde' => 227,
|
117
|
+
'auml' => 228,
|
118
|
+
'aring' => 229,
|
119
|
+
'aelig' => 230,
|
120
|
+
'ccedil' => 231,
|
121
|
+
'egrave' => 232,
|
122
|
+
'eacute' => 233,
|
123
|
+
'ecirc' => 234,
|
124
|
+
'euml' => 235,
|
125
|
+
'igrave' => 236,
|
126
|
+
'iacute' => 237,
|
127
|
+
'icirc' => 238,
|
128
|
+
'iuml' => 239,
|
129
|
+
'eth' => 240,
|
130
|
+
'ntilde' => 241,
|
131
|
+
'ograve' => 242,
|
132
|
+
'oacute' => 243,
|
133
|
+
'ocirc' => 244,
|
134
|
+
'otilde' => 245,
|
135
|
+
'ouml' => 246,
|
136
|
+
'divide' => 247,
|
137
|
+
'oslash' => 248,
|
138
|
+
'ugrave' => 249,
|
139
|
+
'uacute' => 250,
|
140
|
+
'ucirc' => 251,
|
141
|
+
'uuml' => 252,
|
142
|
+
'yacute' => 253,
|
143
|
+
'thorn' => 254,
|
144
|
+
'yuml' => 255,
|
145
|
+
'quot' => 34,
|
146
|
+
'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
|
147
|
+
'amp' => 38,
|
148
|
+
'lt' => 60,
|
149
|
+
'gt' => 62,
|
150
|
+
'OElig' => 338,
|
151
|
+
'oelig' => 339,
|
152
|
+
'Scaron' => 352,
|
153
|
+
'scaron' => 353,
|
154
|
+
'Yuml' => 376,
|
155
|
+
'circ' => 710,
|
156
|
+
'tilde' => 732,
|
157
|
+
'ensp' => 8194,
|
158
|
+
'emsp' => 8195,
|
159
|
+
'thinsp' => 8201,
|
160
|
+
'zwnj' => 8204,
|
161
|
+
'zwj' => 8205,
|
162
|
+
'lrm' => 8206,
|
163
|
+
'rlm' => 8207,
|
164
|
+
'ndash' => 8211,
|
165
|
+
'mdash' => 8212,
|
166
|
+
'lsquo' => 8216,
|
167
|
+
'rsquo' => 8217,
|
168
|
+
'sbquo' => 8218,
|
169
|
+
'ldquo' => 8220,
|
170
|
+
'rdquo' => 8221,
|
171
|
+
'bdquo' => 8222,
|
172
|
+
'dagger' => 8224,
|
173
|
+
'Dagger' => 8225,
|
174
|
+
'permil' => 8240,
|
175
|
+
'lsaquo' => 8249,
|
176
|
+
'rsaquo' => 8250,
|
177
|
+
'euro' => 8364,
|
178
|
+
'fnof' => 402,
|
179
|
+
'Alpha' => 913,
|
180
|
+
'Beta' => 914,
|
181
|
+
'Gamma' => 915,
|
182
|
+
'Delta' => 916,
|
183
|
+
'Epsilon' => 917,
|
184
|
+
'Zeta' => 918,
|
185
|
+
'Eta' => 919,
|
186
|
+
'Theta' => 920,
|
187
|
+
'Iota' => 921,
|
188
|
+
'Kappa' => 922,
|
189
|
+
'Lambda' => 923,
|
190
|
+
'Mu' => 924,
|
191
|
+
'Nu' => 925,
|
192
|
+
'Xi' => 926,
|
193
|
+
'Omicron' => 927,
|
194
|
+
'Pi' => 928,
|
195
|
+
'Rho' => 929,
|
196
|
+
'Sigma' => 931,
|
197
|
+
'Tau' => 932,
|
198
|
+
'Upsilon' => 933,
|
199
|
+
'Phi' => 934,
|
200
|
+
'Chi' => 935,
|
201
|
+
'Psi' => 936,
|
202
|
+
'Omega' => 937,
|
203
|
+
'alpha' => 945,
|
204
|
+
'beta' => 946,
|
205
|
+
'gamma' => 947,
|
206
|
+
'delta' => 948,
|
207
|
+
'epsilon' => 949,
|
208
|
+
'zeta' => 950,
|
209
|
+
'eta' => 951,
|
210
|
+
'theta' => 952,
|
211
|
+
'iota' => 953,
|
212
|
+
'kappa' => 954,
|
213
|
+
'lambda' => 955,
|
214
|
+
'mu' => 956,
|
215
|
+
'nu' => 957,
|
216
|
+
'xi' => 958,
|
217
|
+
'omicron' => 959,
|
218
|
+
'pi' => 960,
|
219
|
+
'rho' => 961,
|
220
|
+
'sigmaf' => 962,
|
221
|
+
'sigma' => 963,
|
222
|
+
'tau' => 964,
|
223
|
+
'upsilon' => 965,
|
224
|
+
'phi' => 966,
|
225
|
+
'chi' => 967,
|
226
|
+
'psi' => 968,
|
227
|
+
'omega' => 969,
|
228
|
+
'thetasym' => 977,
|
229
|
+
'upsih' => 978,
|
230
|
+
'piv' => 982,
|
231
|
+
'bull' => 8226,
|
232
|
+
'hellip' => 8230,
|
233
|
+
'prime' => 8242,
|
234
|
+
'Prime' => 8243,
|
235
|
+
'oline' => 8254,
|
236
|
+
'frasl' => 8260,
|
237
|
+
'weierp' => 8472,
|
238
|
+
'image' => 8465,
|
239
|
+
'real' => 8476,
|
240
|
+
'trade' => 8482,
|
241
|
+
'alefsym' => 8501,
|
242
|
+
'larr' => 8592,
|
243
|
+
'uarr' => 8593,
|
244
|
+
'rarr' => 8594,
|
245
|
+
'darr' => 8595,
|
246
|
+
'harr' => 8596,
|
247
|
+
'crarr' => 8629,
|
248
|
+
'lArr' => 8656,
|
249
|
+
'uArr' => 8657,
|
250
|
+
'rArr' => 8658,
|
251
|
+
'dArr' => 8659,
|
252
|
+
'hArr' => 8660,
|
253
|
+
'forall' => 8704,
|
254
|
+
'part' => 8706,
|
255
|
+
'exist' => 8707,
|
256
|
+
'empty' => 8709,
|
257
|
+
'nabla' => 8711,
|
258
|
+
'isin' => 8712,
|
259
|
+
'notin' => 8713,
|
260
|
+
'ni' => 8715,
|
261
|
+
'prod' => 8719,
|
262
|
+
'sum' => 8721,
|
263
|
+
'minus' => 8722,
|
264
|
+
'lowast' => 8727,
|
265
|
+
'radic' => 8730,
|
266
|
+
'prop' => 8733,
|
267
|
+
'infin' => 8734,
|
268
|
+
'ang' => 8736,
|
269
|
+
'and' => 8743,
|
270
|
+
'or' => 8744,
|
271
|
+
'cap' => 8745,
|
272
|
+
'cup' => 8746,
|
273
|
+
'int' => 8747,
|
274
|
+
'there4' => 8756,
|
275
|
+
'sim' => 8764,
|
276
|
+
'cong' => 8773,
|
277
|
+
'asymp' => 8776,
|
278
|
+
'ne' => 8800,
|
279
|
+
'equiv' => 8801,
|
280
|
+
'le' => 8804,
|
281
|
+
'ge' => 8805,
|
282
|
+
'sub' => 8834,
|
283
|
+
'sup' => 8835,
|
284
|
+
'nsub' => 8836,
|
285
|
+
'sube' => 8838,
|
286
|
+
'supe' => 8839,
|
287
|
+
'oplus' => 8853,
|
288
|
+
'otimes' => 8855,
|
289
|
+
'perp' => 8869,
|
290
|
+
'sdot' => 8901,
|
291
|
+
'lceil' => 8968,
|
292
|
+
'rceil' => 8969,
|
293
|
+
'lfloor' => 8970,
|
294
|
+
'rfloor' => 8971,
|
295
|
+
'lang' => 9001,
|
296
|
+
'rang' => 9002,
|
297
|
+
'loz' => 9674,
|
298
|
+
'spades' => 9824,
|
299
|
+
'clubs' => 9827,
|
300
|
+
'hearts' => 9829,
|
301
|
+
'diams' => 9830,
|
302
|
+
}
|
303
|
+
gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
|
304
|
+
if $1 then
|
305
|
+
v = @@xhtml_entity_replacements[$1]
|
306
|
+
# Nonstandard entity
|
307
|
+
unless v
|
308
|
+
if extra_entities.is_a? Proc
|
309
|
+
v = extra_entities.call($1)
|
310
|
+
# Well, we expect a Hash here, but any container will do.
|
311
|
+
# As long as it's not a nil.
|
312
|
+
elsif extra_entities
|
313
|
+
v = extra_entities[$1]
|
314
|
+
end
|
315
|
+
end
|
316
|
+
raise "Unknown escape #{$1}" unless v
|
317
|
+
elsif $2
|
318
|
+
v = $2.to_i
|
319
|
+
else
|
320
|
+
v = $3.hex
|
321
|
+
end
|
322
|
+
# v can be a String or an Integer
|
323
|
+
if v.is_a? String then v else [v].pack('U') end
|
324
|
+
}
|
325
|
+
end
|
326
|
+
def xml_parse
|
327
|
+
XML.parse(self)
|
328
|
+
end
|
337
329
|
end
|
338
330
|
|
339
331
|
class File
|
340
|
-
|
341
|
-
|
342
|
-
|
332
|
+
def xml_parse
|
333
|
+
XML.parse(self)
|
334
|
+
end
|
343
335
|
end
|
344
336
|
|
345
337
|
class Array
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
338
|
+
# children of any element
|
339
|
+
def children(*args, &blk)
|
340
|
+
res = []
|
341
|
+
each{|c|
|
342
|
+
res += c.children(*args, &blk) if c.is_a? XML
|
343
|
+
}
|
344
|
+
res
|
345
|
+
end
|
346
|
+
# descendants of any element
|
347
|
+
def descendants(*args, &blk)
|
348
|
+
res = []
|
349
|
+
each{|c|
|
350
|
+
res += c.descendants(*args, &blk) if c.is_a? XML
|
351
|
+
}
|
352
|
+
res
|
353
|
+
end
|
362
354
|
end
|
363
355
|
|
364
356
|
# Methods of Enumerable.
|
@@ -372,942 +364,942 @@ end
|
|
372
364
|
#
|
373
365
|
# FIXME: Many methods use .dup, but do we want a shallow or a deep copy ?
|
374
366
|
class XML
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
# Iterate over children, possibly with a selector
|
380
|
-
def each(*selector, &blk)
|
381
|
-
children(*selector, &blk)
|
382
|
-
self
|
383
|
-
end
|
367
|
+
include Enumerable
|
368
|
+
# Default any? is ok
|
369
|
+
# Default all? is ok
|
384
370
|
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
371
|
+
# Iterate over children, possibly with a selector
|
372
|
+
def each(*selector, &blk)
|
373
|
+
children(*selector, &blk)
|
374
|
+
self
|
375
|
+
end
|
389
376
|
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
377
|
+
# Sort XML children of XML element.
|
378
|
+
def sort_by(*args, &blk)
|
379
|
+
self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
|
380
|
+
end
|
394
381
|
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
382
|
+
# Sort children of XML element.
|
383
|
+
def children_sort_by(*args, &blk)
|
384
|
+
self.dup{ @contents = @contents.sort_by(*args, &blk) }
|
385
|
+
end
|
386
|
+
|
387
|
+
# Sort children of XML element.
|
388
|
+
#
|
389
|
+
# Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
|
390
|
+
# Use sort_by instead.
|
391
|
+
#
|
392
|
+
# Unless you define your own XML#<=> operator, or do something equally weird.
|
393
|
+
def sort(*args, &blk)
|
394
|
+
self.dup{ @contents = @contents.sort(*args, &blk) }
|
395
|
+
end
|
404
396
|
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
397
|
+
#collect/map
|
398
|
+
#detect/find
|
399
|
+
#each_cons
|
400
|
+
#each_slice
|
401
|
+
#each_with_index
|
402
|
+
#to_a
|
403
|
+
#entries
|
404
|
+
#enum_cons
|
405
|
+
#enum_slice
|
406
|
+
#enum
|
407
|
+
# grep
|
408
|
+
# include?/member?
|
409
|
+
# inject
|
410
|
+
# max/min
|
411
|
+
# max_by/min_by - Ruby 1.9
|
412
|
+
# partition
|
413
|
+
# reject
|
414
|
+
# sort
|
415
|
+
# sort_by
|
416
|
+
# to_set
|
417
|
+
# zip
|
418
|
+
# And Enumerable::Enumerator-generating methods
|
427
419
|
end
|
428
420
|
|
429
421
|
# Class methods
|
430
422
|
class XML
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
end
|
423
|
+
# XML.foo! == xml!(:foo)
|
424
|
+
# XML.foo == xml(:foo)
|
425
|
+
def self.method_missing(meth, *args, &blk)
|
426
|
+
if meth.to_s =~ /^(.*)!$/
|
427
|
+
xml!($1.to_sym, *args, &blk)
|
428
|
+
else
|
429
|
+
XML.new(meth, *args, &blk)
|
439
430
|
end
|
431
|
+
end
|
440
432
|
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
433
|
+
# Read file and parse
|
434
|
+
def self.from_file(file)
|
435
|
+
file = File.open(file) if file.is_a? String
|
436
|
+
parse(file)
|
437
|
+
end
|
438
|
+
|
439
|
+
# Fetch URL and parse
|
440
|
+
# Supported:
|
441
|
+
# http://.../
|
442
|
+
# https://.../
|
443
|
+
# file:foo.xml
|
444
|
+
# string:<foo/>
|
445
|
+
def self.from_url(url)
|
446
|
+
if url =~ /^string:(.*)$/m
|
447
|
+
parse($1)
|
448
|
+
elsif url =~ /^file:(.*)$/m
|
449
|
+
from_file($1)
|
450
|
+
elsif url =~ /^http(s?):/
|
451
|
+
ssl = ($1 == "s")
|
452
|
+
# No, seriously - Ruby needs something better than net/http
|
453
|
+
# Something that groks basic auth and queries and redirects automatically:
|
454
|
+
# HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
|
455
|
+
# URI parsing must go inside the library, client programs
|
456
|
+
# should have nothing to do with it
|
457
|
+
|
458
|
+
# net/http is really inconvenient to use here
|
459
|
+
u = URI.parse(url)
|
460
|
+
# You're not seeing this:
|
461
|
+
if u.query then
|
462
|
+
path = u.path + "?" + u.query
|
463
|
+
else
|
464
|
+
path = u.path
|
465
|
+
end
|
466
|
+
req = Net::HTTP::Get.new(path)
|
467
|
+
if u.userinfo
|
468
|
+
username, passwd = u.userinfo.split(/:/,2)
|
469
|
+
req.basic_auth username, passwd
|
470
|
+
end
|
471
|
+
if ssl
|
472
|
+
# NOTE: You need libopenssl-ruby installed
|
473
|
+
# if you want to use HTTPS. Ubuntu is broken
|
474
|
+
# as it doesn't provide it in the default packages.
|
475
|
+
require 'net/https'
|
476
|
+
http = Net::HTTP.new(u.host, u.port)
|
477
|
+
http.use_ssl = true
|
478
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
479
|
+
else
|
480
|
+
http = Net::HTTP.new(u.host, u.port)
|
481
|
+
end
|
482
|
+
|
483
|
+
res = http.start {|http_conn| http_conn.request(req) }
|
484
|
+
# TODO: Throw a more meaningful exception
|
485
|
+
parse(res.body)
|
486
|
+
else
|
487
|
+
raise "URL protocol #{url} not supported (http, https, file, string are supported)"
|
445
488
|
end
|
489
|
+
end
|
446
490
|
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
# Something that groks basic auth and queries and redirects automatically:
|
462
|
-
# HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
|
463
|
-
# URI parsing must go inside the library, client programs
|
464
|
-
# should have nothing to do with it
|
465
|
-
|
466
|
-
# net/http is really inconvenient to use here
|
467
|
-
u = URI.parse(url)
|
468
|
-
# You're not seeing this:
|
469
|
-
if u.query then
|
470
|
-
path = u.path + "?" + u.query
|
471
|
-
else
|
472
|
-
path = u.path
|
473
|
-
end
|
474
|
-
req = Net::HTTP::Get.new(path)
|
475
|
-
if u.userinfo
|
476
|
-
username, passwd = u.userinfo.split(/:/,2)
|
477
|
-
req.basic_auth username, passwd
|
478
|
-
end
|
479
|
-
if ssl
|
480
|
-
# NOTE: You need libopenssl-ruby installed
|
481
|
-
# if you want to use HTTPS. Ubuntu is broken
|
482
|
-
# as it doesn't provide it in the default packages.
|
483
|
-
require 'net/https'
|
484
|
-
http = Net::HTTP.new(u.host, u.port)
|
485
|
-
http.use_ssl = true
|
486
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
487
|
-
else
|
488
|
-
http = Net::HTTP.new(u.host, u.port)
|
489
|
-
end
|
490
|
-
|
491
|
-
res = http.start {|http| http.request(req) }
|
492
|
-
# TODO: Throw a more meaningful exception
|
493
|
-
parse(res.body)
|
494
|
-
else
|
495
|
-
raise "URL protocol #{url} not supported (http, https, file, string are supported)"
|
496
|
-
end
|
491
|
+
# Like CDuce load_xml
|
492
|
+
# The path can be:
|
493
|
+
# * file handler
|
494
|
+
# * URL (a string with :)
|
495
|
+
# * file name (a string without :)
|
496
|
+
def self.load(obj)
|
497
|
+
if obj.is_a? String
|
498
|
+
if obj.include? ":"
|
499
|
+
from_url(obj)
|
500
|
+
else
|
501
|
+
from_file(obj)
|
502
|
+
end
|
503
|
+
else
|
504
|
+
parse(obj)
|
497
505
|
end
|
506
|
+
end
|
498
507
|
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
508
|
+
# Parse XML in mixed stream/tree mode
|
509
|
+
# Basically the idea is that every time we get start element,
|
510
|
+
# we ask the block what to do about it.
|
511
|
+
# If it wants a tree below it, it should call e.tree
|
512
|
+
# If a tree was requested, elements below the current one
|
513
|
+
# are *not* processed. If it wasn't, they are.
|
514
|
+
#
|
515
|
+
# For example:
|
516
|
+
# <foo><bar/></foo><foo2/>
|
517
|
+
# yield <foo> ... </foo>
|
518
|
+
# .complete! called
|
519
|
+
# process <foo2> next
|
520
|
+
#
|
521
|
+
# But:
|
522
|
+
# <foo><bar/></foo><foo2/>
|
523
|
+
# yield <foo> ... </foo>
|
524
|
+
# .complete! not called
|
525
|
+
# process <bar> next
|
526
|
+
#
|
527
|
+
# FIXME: yielded values are not reusable for now
|
528
|
+
# FIXME: make more object-oriented
|
529
|
+
def self.parse_as_twigs(stream)
|
530
|
+
parser = REXML::Parsers::BaseParser.new stream
|
531
|
+
# We don't really need to keep the stack ;-)
|
532
|
+
stack = []
|
533
|
+
while true
|
534
|
+
event = parser.pull
|
535
|
+
case event[0]
|
536
|
+
when :start_element
|
537
|
+
# Now the evil part evil
|
538
|
+
attrs = {}
|
539
|
+
event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
|
540
|
+
node = XML.new(event[1].to_sym, attrs, *event[3..-1])
|
541
|
+
|
542
|
+
# I can't say it's superelegant
|
543
|
+
class <<node
|
544
|
+
attr_accessor :do_complete
|
545
|
+
def complete!
|
546
|
+
if @do_complete
|
547
|
+
@do_complete.call
|
548
|
+
@do_complete = nil
|
510
549
|
end
|
511
|
-
|
512
|
-
parse(obj)
|
550
|
+
end
|
513
551
|
end
|
514
|
-
|
552
|
+
node.do_complete = proc{
|
553
|
+
parse_subtree(node, parser)
|
554
|
+
}
|
515
555
|
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
# If a tree was requested, elements below the current one
|
521
|
-
# are *not* processed. If it wasn't, they are.
|
522
|
-
#
|
523
|
-
# For example:
|
524
|
-
# <foo><bar/></foo><foo2/>
|
525
|
-
# yield <foo> ... </foo>
|
526
|
-
# .complete! called
|
527
|
-
# process <foo2> next
|
528
|
-
#
|
529
|
-
# But:
|
530
|
-
# <foo><bar/></foo><foo2/>
|
531
|
-
# yield <foo> ... </foo>
|
532
|
-
# .complete! not called
|
533
|
-
# process <bar> next
|
534
|
-
#
|
535
|
-
# FIXME: yielded values are not reusable for now
|
536
|
-
# FIXME: make more object-oriented
|
537
|
-
def self.parse_as_twigs(stream)
|
538
|
-
parser = REXML::Parsers::BaseParser.new stream
|
539
|
-
# We don't really need to keep the stack ;-)
|
540
|
-
stack = []
|
541
|
-
while true
|
542
|
-
event = parser.pull
|
543
|
-
case event[0]
|
544
|
-
when :start_element
|
545
|
-
# Now the evil part evil
|
546
|
-
attrs = {}
|
547
|
-
event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
|
548
|
-
node = XML.new(event[1].to_sym, attrs, *event[3..-1])
|
549
|
-
|
550
|
-
# I can't say it's superelegant
|
551
|
-
class <<node
|
552
|
-
attr_accessor :do_complete
|
553
|
-
def complete!
|
554
|
-
if @do_complete
|
555
|
-
@do_complete.call
|
556
|
-
@do_complete = nil
|
557
|
-
end
|
558
|
-
end
|
559
|
-
end
|
560
|
-
node.do_complete = proc{
|
561
|
-
parse_subtree(node, parser)
|
562
|
-
}
|
563
|
-
|
564
|
-
yield(node)
|
565
|
-
if node.do_complete
|
566
|
-
stack.push node
|
567
|
-
node.do_complete = nil # It's too late, complete! shouldn't do anything now
|
568
|
-
end
|
569
|
-
when :end_element
|
570
|
-
stack.pop
|
571
|
-
when :end_document
|
572
|
-
return
|
573
|
-
else
|
574
|
-
# FIXME: Do the right thing.
|
575
|
-
# For now, ignore *everything* else
|
576
|
-
# This is totally incorrect, user might want to
|
577
|
-
# see text, comments and stuff like that anyway
|
578
|
-
end
|
556
|
+
yield(node)
|
557
|
+
if node.do_complete
|
558
|
+
stack.push node
|
559
|
+
node.do_complete = nil # It's too late, complete! shouldn't do anything now
|
579
560
|
end
|
561
|
+
when :end_element
|
562
|
+
stack.pop
|
563
|
+
when :end_document
|
564
|
+
return
|
565
|
+
else
|
566
|
+
# FIXME: Do the right thing.
|
567
|
+
# For now, ignore *everything* else
|
568
|
+
# This is totally incorrect, user might want to
|
569
|
+
# see text, comments and stuff like that anyway
|
570
|
+
end
|
580
571
|
end
|
581
|
-
|
582
|
-
# Basically it's a copy of self.parse, ugly ...
|
583
|
-
def self.parse_subtree(start_node, parser)
|
584
|
-
stack = [start_node]
|
585
|
-
res = nil
|
586
|
-
while true
|
587
|
-
event = parser.pull
|
588
|
-
case event[0]
|
589
|
-
when :start_element
|
590
|
-
attrs = {}
|
591
|
-
event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
|
592
|
-
stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
|
593
|
-
if stack.size == 1
|
594
|
-
res = stack[0]
|
595
|
-
else
|
596
|
-
stack[-2] << stack[-1]
|
597
|
-
end
|
598
|
-
when :end_element
|
599
|
-
stack.pop
|
600
|
-
return if stack == []
|
601
|
-
# Needs unescaping
|
602
|
-
when :text
|
603
|
-
# Ignore whitespace
|
604
|
-
if stack.size == 0
|
605
|
-
next if event[1] !~ /\S/
|
606
|
-
raise "Non-whitespace text out of document root"
|
607
|
-
end
|
608
|
-
stack[-1] << event[1].xml_unescape
|
609
|
-
# CDATA is already unescaped
|
610
|
-
when :cdata
|
611
|
-
if stack.size == 0
|
612
|
-
raise "CDATA out of the document root"
|
613
|
-
end
|
614
|
-
stack[-1] << event[1]
|
615
|
-
when :end_document
|
616
|
-
raise "Parse error: end_document inside a subtree, tags are not balanced"
|
617
|
-
when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
|
618
|
-
# Positivery ignore
|
619
|
-
when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
|
620
|
-
# Ignore ???
|
621
|
-
#print "Ignored XML event #{event[0]} when parsing\n"
|
622
|
-
else
|
623
|
-
# Huh ? What's that ?
|
624
|
-
#print "Unknown XML event #{event[0]} when parsing\n"
|
625
|
-
end
|
626
|
-
end
|
627
|
-
res
|
572
|
+
end
|
628
573
|
|
574
|
+
# Basically it's a copy of self.parse, ugly ...
|
575
|
+
def self.parse_subtree(start_node, parser)
|
576
|
+
stack = [start_node]
|
577
|
+
res = nil
|
578
|
+
while true
|
579
|
+
event = parser.pull
|
580
|
+
case event[0]
|
581
|
+
when :start_element
|
582
|
+
attrs = {}
|
583
|
+
event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
|
584
|
+
stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
|
585
|
+
if stack.size == 1
|
586
|
+
res = stack[0]
|
587
|
+
else
|
588
|
+
stack[-2] << stack[-1]
|
589
|
+
end
|
590
|
+
when :end_element
|
591
|
+
stack.pop
|
592
|
+
return if stack == []
|
593
|
+
# Needs unescaping
|
594
|
+
when :text
|
595
|
+
# Ignore whitespace
|
596
|
+
if stack.size == 0
|
597
|
+
next if event[1] !~ /\S/
|
598
|
+
raise "Non-whitespace text out of document root"
|
599
|
+
end
|
600
|
+
stack[-1] << event[1].xml_unescape
|
601
|
+
# CDATA is already unescaped
|
602
|
+
when :cdata
|
603
|
+
if stack.size == 0
|
604
|
+
raise "CDATA out of the document root"
|
605
|
+
end
|
606
|
+
stack[-1] << event[1]
|
607
|
+
when :end_document
|
608
|
+
raise "Parse error: end_document inside a subtree, tags are not balanced"
|
609
|
+
when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
|
610
|
+
# Positivery ignore
|
611
|
+
when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
|
612
|
+
# Ignore ???
|
613
|
+
#print "Ignored XML event #{event[0]} when parsing\n"
|
614
|
+
else
|
615
|
+
# Huh ? What's that ?
|
616
|
+
#print "Unknown XML event #{event[0]} when parsing\n"
|
617
|
+
end
|
629
618
|
end
|
619
|
+
res
|
630
620
|
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
when :cdata
|
684
|
-
e = event[1]
|
685
|
-
if stack.size > 1 or options[:multiple_roots]
|
686
|
-
stack[-1] << e
|
687
|
-
else
|
688
|
-
raise "CDATA out of the document root"
|
689
|
-
end
|
690
|
-
when :comment
|
691
|
-
next unless options[:comments]
|
692
|
-
e = XML_Comment.new(event[1])
|
693
|
-
if stack.size > 1 or options[:multiple_roots]
|
694
|
-
stack[-1] << e
|
695
|
-
else
|
696
|
-
# FIXME: Ugly !
|
697
|
-
raise "Comments out of the document root"
|
698
|
-
end
|
699
|
-
when :processing_instruction
|
700
|
-
# FIXME: Real PI node
|
701
|
-
next unless options[:pi]
|
702
|
-
e = XML_PI.new(event[1], event[2])
|
703
|
-
if stack.size > 1 or options[:multiple_roots]
|
704
|
-
stack[-1] << e
|
705
|
-
else
|
706
|
-
# FIXME: Ugly !
|
707
|
-
raise "Processing instruction out of the document root"
|
708
|
-
end
|
709
|
-
when :end_document
|
710
|
-
break
|
711
|
-
when :xmldecl,:start_doctype,:end_doctype,:elementdecl
|
712
|
-
# Positivery ignore
|
713
|
-
when :externalentity,:entity,:attlistdecl,:notationdecl
|
714
|
-
# Ignore ???
|
715
|
-
#print "Ignored XML event #{event[0]} when parsing\n"
|
716
|
-
else
|
717
|
-
# Huh ? What's that ?
|
718
|
-
#print "Unknown XML event #{event[0]} when parsing\n"
|
719
|
-
end
|
621
|
+
end
|
622
|
+
|
623
|
+
# Parse XML using REXML. Available options:
|
624
|
+
# * :extra_entities => Proc or Hash (default = nil)
|
625
|
+
# * :remove_pretty_printing => true/false (default = false)
|
626
|
+
# * :comments => true/false (default = false)
|
627
|
+
# * :pi => true/false (default = false)
|
628
|
+
# * :normalize => true/false (default = false) - normalize
|
629
|
+
# * :multiple_roots => true/false (default=false) - document
|
630
|
+
# can have any number of roots (instread of one).
|
631
|
+
# Return all in an array instead of root/nil.
|
632
|
+
# Also include non-elements (String/PI/Comment) in the return set !!!
|
633
|
+
#
|
634
|
+
# FIXME: :comments/:pi will break everything
|
635
|
+
# if there are comments/PIs outside document root.
|
636
|
+
# Now PIs are outside the document root more often than not,
|
637
|
+
# so we're pretty much screwed here.
|
638
|
+
#
|
639
|
+
# FIXME: Integrate all kinds of parse, and make them support extra options
|
640
|
+
#
|
641
|
+
# FIXME: Benchmark normalize!
|
642
|
+
#
|
643
|
+
# FIXME: Benchmark dup-based Enumerable methods
|
644
|
+
#
|
645
|
+
# FIXME: Make it possible to include bogus XML_Document superparent,
|
646
|
+
# and to make it support out-of-root PIs/Comments
|
647
|
+
def self.parse(stream, options={})
|
648
|
+
extra_entities = options[:extra_entities]
|
649
|
+
|
650
|
+
parser = REXML::Parsers::BaseParser.new stream
|
651
|
+
stack = [[]]
|
652
|
+
|
653
|
+
while true
|
654
|
+
event = parser.pull
|
655
|
+
case event[0]
|
656
|
+
when :start_element
|
657
|
+
attrs = {}
|
658
|
+
event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
|
659
|
+
stack << XML.new(event[1].to_sym, attrs, event[3..-1])
|
660
|
+
stack[-2] << stack[-1]
|
661
|
+
when :end_element
|
662
|
+
stack.pop
|
663
|
+
# Needs unescaping
|
664
|
+
when :text
|
665
|
+
e = event[1].xml_unescape(extra_entities)
|
666
|
+
# Either inside root or in multi-root mode
|
667
|
+
if stack.size > 1 or options[:multiple_roots]
|
668
|
+
stack[-1] << e
|
669
|
+
elsif event[1] !~ /\S/
|
670
|
+
# Ignore out-of-root whitespace in single-root mode
|
671
|
+
else
|
672
|
+
raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
|
720
673
|
end
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
if options[:multiple_roots]
|
727
|
-
roots
|
674
|
+
# CDATA is already unescaped
|
675
|
+
when :cdata
|
676
|
+
e = event[1]
|
677
|
+
if stack.size > 1 or options[:multiple_roots]
|
678
|
+
stack[-1] << e
|
728
679
|
else
|
729
|
-
|
680
|
+
raise "CDATA out of the document root"
|
730
681
|
end
|
682
|
+
when :comment
|
683
|
+
next unless options[:comments]
|
684
|
+
e = XML_Comment.new(event[1])
|
685
|
+
if stack.size > 1 or options[:multiple_roots]
|
686
|
+
stack[-1] << e
|
687
|
+
else
|
688
|
+
# FIXME: Ugly !
|
689
|
+
raise "Comments out of the document root"
|
690
|
+
end
|
691
|
+
when :processing_instruction
|
692
|
+
# FIXME: Real PI node
|
693
|
+
next unless options[:pi]
|
694
|
+
e = XML_PI.new(event[1], event[2])
|
695
|
+
if stack.size > 1 or options[:multiple_roots]
|
696
|
+
stack[-1] << e
|
697
|
+
else
|
698
|
+
# FIXME: Ugly !
|
699
|
+
raise "Processing instruction out of the document root"
|
700
|
+
end
|
701
|
+
when :end_document
|
702
|
+
break
|
703
|
+
when :xmldecl,:start_doctype,:end_doctype,:elementdecl
|
704
|
+
# Positivery ignore
|
705
|
+
when :externalentity,:entity,:attlistdecl,:notationdecl
|
706
|
+
# Ignore ???
|
707
|
+
#print "Ignored XML event #{event[0]} when parsing\n"
|
708
|
+
else
|
709
|
+
# Huh ? What's that ?
|
710
|
+
#print "Unknown XML event #{event[0]} when parsing\n"
|
711
|
+
end
|
731
712
|
end
|
713
|
+
roots = stack[0]
|
732
714
|
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
715
|
+
roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
|
716
|
+
# :remove_pretty_printing does :normalize anyway
|
717
|
+
roots.each{|root| root.normalize!} if options[:normalize]
|
718
|
+
if options[:multiple_roots]
|
719
|
+
roots
|
720
|
+
else
|
721
|
+
roots[0]
|
738
722
|
end
|
723
|
+
end
|
739
724
|
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
725
|
+
# Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
|
726
|
+
def self.parse_sequence(stream, options={})
|
727
|
+
o = options.dup
|
728
|
+
o[:multiple_roots] = true
|
729
|
+
parse(stream, o)
|
730
|
+
end
|
744
731
|
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
732
|
+
# Renormalize a string containing XML document
|
733
|
+
def self.renormalize(stream)
|
734
|
+
parse(stream).to_s
|
735
|
+
end
|
736
|
+
|
737
|
+
# Renormalize a string containing a sequence of XML documents
|
738
|
+
# and strings
|
739
|
+
# XMLrenormalize_sequence("<hello />, <world></world>!") =>
|
740
|
+
# "<hello/>, <world/>!"
|
741
|
+
def self.renormalize_sequence(stream)
|
742
|
+
parse_sequence(stream).join
|
743
|
+
end
|
752
744
|
end
|
753
745
|
|
754
746
|
# Instance methods (other than those of Enumerable)
|
755
747
|
class XML
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
end
|
748
|
+
attr_accessor :name, :attrs, :contents
|
749
|
+
|
750
|
+
# initialize can be run in many ways
|
751
|
+
# * XML.new
|
752
|
+
# * XML.new(:tag_symbol)
|
753
|
+
# * XML.new(:tag_symbol, {attributes})
|
754
|
+
# * XML.new(:tag_symbol, "children", "more", XML.new(...))
|
755
|
+
# * XML.new(:tag_symbol, {attributes}, "and", "children")
|
756
|
+
# * XML.new(:tag_symbol) { monadic code }
|
757
|
+
# * XML.new(:tag_symbol, {attributes}) { monadic code }
|
758
|
+
#
|
759
|
+
# Or even:
|
760
|
+
# * XML.new(:tag_symbol, "children") { and some monadic code }
|
761
|
+
# * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
|
762
|
+
# But typically you won't be mixing these two style
|
763
|
+
#
|
764
|
+
# Attribute values can will be converted to strings
|
765
|
+
def initialize(*args, &blk)
|
766
|
+
@name = nil
|
767
|
+
@attrs = {}
|
768
|
+
@contents = []
|
769
|
+
@name = args.shift if args.size != 0
|
770
|
+
if args.size != 0 and args[0].is_a? Hash
|
771
|
+
args.shift.each{|k,v|
|
772
|
+
# Do automatic conversion here
|
773
|
+
# This also assures that the hashes are *not* shared
|
774
|
+
self[k] = v
|
775
|
+
}
|
776
|
+
end
|
777
|
+
# Expand Arrays passed as arguments
|
778
|
+
self << args
|
779
|
+
# FIXME: We'd rather not have people say @name = :foo there :-)
|
780
|
+
if blk
|
781
|
+
instance_eval(&blk)
|
791
782
|
end
|
783
|
+
end
|
792
784
|
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
end
|
785
|
+
# Convert to a well-formatted XML
|
786
|
+
def to_s
|
787
|
+
"<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
|
788
|
+
if @contents.size == 0
|
789
|
+
"/>"
|
790
|
+
else
|
791
|
+
">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
|
801
792
|
end
|
793
|
+
end
|
802
794
|
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
end
|
795
|
+
# Convert to a well-formatted XML, but without children information.
|
796
|
+
# This is a reasonable format for irb and debugging.
|
797
|
+
# If you want to see a few levels of children, call inspect(2) and so on
|
798
|
+
def inspect(include_children=0)
|
799
|
+
"<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
|
800
|
+
if @contents.size == 0
|
801
|
+
"/>"
|
802
|
+
elsif include_children == 0
|
803
|
+
">...</#{name}>"
|
804
|
+
else
|
805
|
+
">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
|
815
806
|
end
|
807
|
+
end
|
816
808
|
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
end
|
809
|
+
# Read attributes.
|
810
|
+
# Also works with pseudoattributes:
|
811
|
+
# img[:@x] == img.child(:x).text # or nil if there isn't any.
|
812
|
+
def [](key)
|
813
|
+
if key.to_s[0] == ?@
|
814
|
+
tag = key.to_s[1..-1].to_sym
|
815
|
+
c = child(tag)
|
816
|
+
if c
|
817
|
+
c.text
|
818
|
+
else
|
819
|
+
nil
|
820
|
+
end
|
821
|
+
else
|
822
|
+
@attrs[key]
|
832
823
|
end
|
824
|
+
end
|
833
825
|
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
end
|
826
|
+
# Set attributes.
|
827
|
+
# Value is automatically converted to String, so you can say:
|
828
|
+
# img[:x] = 200
|
829
|
+
# Also works with pseudoattributes:
|
830
|
+
# foo[:@bar] = "x"
|
831
|
+
def []=(key, value)
|
832
|
+
if key.to_s[0] == ?@
|
833
|
+
tag = key.to_s[1..-1].to_sym
|
834
|
+
c = child(tag)
|
835
|
+
if c
|
836
|
+
c.contents = [value.to_s]
|
837
|
+
else
|
838
|
+
self << XML.new(tag, value.to_s)
|
839
|
+
end
|
840
|
+
else
|
841
|
+
@attrs[key] = value.to_s
|
851
842
|
end
|
843
|
+
end
|
852
844
|
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
end
|
871
|
-
self
|
845
|
+
# Add children.
|
846
|
+
# Possible uses:
|
847
|
+
# * Add single element
|
848
|
+
# self << xml(...)
|
849
|
+
# self << "foo"
|
850
|
+
# Add nothing:
|
851
|
+
# self << nil
|
852
|
+
# Add multiple elements (also works recursively):
|
853
|
+
# self << [a, b, c]
|
854
|
+
# self << [a, [b, c], d]
|
855
|
+
def <<(cnt)
|
856
|
+
if cnt.nil?
|
857
|
+
# skip
|
858
|
+
elsif cnt.is_a? Array
|
859
|
+
cnt.each{|elem| self << elem}
|
860
|
+
else
|
861
|
+
@contents << cnt
|
872
862
|
end
|
863
|
+
self
|
864
|
+
end
|
873
865
|
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
866
|
+
# Equality test, works as if XMLs were normalized, so:
|
867
|
+
# XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
|
868
|
+
def ==(x)
|
869
|
+
return false unless x.is_a? XML
|
870
|
+
return false unless name == x.name and attrs == x.attrs
|
871
|
+
# Now the hard part, strings can be split in different ways
|
872
|
+
# empty string children are possible etc.
|
873
|
+
self_i = 0
|
874
|
+
othr_i = 0
|
875
|
+
while self_i != contents.size or othr_i != x.contents.size
|
876
|
+
# Ignore ""s
|
877
|
+
if contents[self_i].is_a? String and contents[self_i] == ""
|
878
|
+
self_i += 1
|
879
|
+
next
|
880
|
+
end
|
881
|
+
if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
|
882
|
+
othr_i += 1
|
883
|
+
next
|
884
|
+
end
|
893
885
|
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
# Are they both Strings ?
|
899
|
-
# Strings can be divided in different ways, and calling normalize!
|
900
|
-
# here would be rather expensive, so let's use this complicated
|
901
|
-
# algorithm
|
902
|
-
if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
|
903
|
-
a = contents[self_i]
|
904
|
-
b = x.contents[othr_i]
|
905
|
-
self_i += 1
|
906
|
-
othr_i += 1
|
907
|
-
while a != "" or b != ""
|
908
|
-
if a == b
|
909
|
-
a = ""
|
910
|
-
b = ""
|
911
|
-
elsif a.size > b.size and a[0, b.size] == b
|
912
|
-
a = a[b.size..-1]
|
913
|
-
if x.contents[othr_i].is_a? String
|
914
|
-
b = x.contents[othr_i]
|
915
|
-
othr_i += 1
|
916
|
-
next
|
917
|
-
end
|
918
|
-
elsif b.size > a.size and b[0, a.size] == a
|
919
|
-
b = b[a.size..-1]
|
920
|
-
if contents[self_i].is_a? String
|
921
|
-
a = contents[self_i]
|
922
|
-
self_i += 1
|
923
|
-
next
|
924
|
-
end
|
925
|
-
else
|
926
|
-
return false
|
927
|
-
end
|
928
|
-
end
|
929
|
-
next
|
930
|
-
end
|
886
|
+
# If one is finished and the other contains non-empty elements,
|
887
|
+
# they are not equal
|
888
|
+
return false if self_i == contents.size or othr_i == x.contents.size
|
931
889
|
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
890
|
+
# Are they both Strings ?
|
891
|
+
# Strings can be divided in different ways, and calling normalize!
|
892
|
+
# here would be rather expensive, so let's use this complicated
|
893
|
+
# algorithm
|
894
|
+
if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
|
895
|
+
a = contents[self_i]
|
896
|
+
b = x.contents[othr_i]
|
897
|
+
self_i += 1
|
898
|
+
othr_i += 1
|
899
|
+
while a != "" or b != ""
|
900
|
+
if a == b
|
901
|
+
a = ""
|
902
|
+
b = ""
|
903
|
+
elsif a.size > b.size and a[0, b.size] == b
|
904
|
+
a = a[b.size..-1]
|
905
|
+
if x.contents[othr_i].is_a? String
|
906
|
+
b = x.contents[othr_i]
|
907
|
+
othr_i += 1
|
908
|
+
next
|
909
|
+
end
|
910
|
+
elsif b.size > a.size and b[0, a.size] == a
|
911
|
+
b = b[a.size..-1]
|
912
|
+
if contents[self_i].is_a? String
|
913
|
+
a = contents[self_i]
|
914
|
+
self_i += 1
|
915
|
+
next
|
916
|
+
end
|
917
|
+
else
|
918
|
+
return false
|
919
|
+
end
|
946
920
|
end
|
947
|
-
|
948
|
-
|
921
|
+
next
|
922
|
+
end
|
949
923
|
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
924
|
+
# OK, so at least one of them is not a String.
|
925
|
+
# Hopefully they're either both XMLs or one is an XML and the
|
926
|
+
# other is a String. It is also possible that contents contains
|
927
|
+
# something illegal, but we aren't catching that,
|
928
|
+
# so xml(:foo, Garbage.new) is going to at least equal itself.
|
929
|
+
# And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
|
930
|
+
# is going to return an honest false, and incoherent sanity
|
931
|
+
# check is worse than no sanity check.
|
932
|
+
#
|
933
|
+
# Oh yeah, they can be XML_PI or XML_Comment. In such case, this
|
934
|
+
# is ok.
|
935
|
+
return false unless contents[self_i] == x.contents[othr_i]
|
936
|
+
self_i += 1
|
937
|
+
othr_i += 1
|
959
938
|
end
|
939
|
+
return true
|
940
|
+
end
|
960
941
|
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
942
|
+
alias_method :real_method_missing, :method_missing
|
943
|
+
# Define all foo!-methods for monadic interface, so you can write:
|
944
|
+
#
|
945
|
+
def method_missing(meth, *args, &blk)
|
946
|
+
if meth.to_s =~ /^(.*)!$/
|
947
|
+
self << XML.new($1.to_sym, *args, &blk)
|
948
|
+
else
|
949
|
+
real_method_missing(meth, *args, &blk)
|
967
950
|
end
|
951
|
+
end
|
968
952
|
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
# the result tree.
|
977
|
-
# Either start or end can be nil.
|
978
|
-
# * If both start and end are nil, return whole tree.
|
979
|
-
# * If start is nil, return subtree up to range_end.
|
980
|
-
# * If start is not inside the tree, return nil.
|
981
|
-
# * If end is nil, return subtree from start
|
982
|
-
# * If end is not inside the tree, return subtree from start.
|
983
|
-
# * If end is before or below start, or they're the same node, the result is unspecified.
|
984
|
-
# * if end comes directly after start, or as first node when start==nil, return path reaching there.
|
985
|
-
def range(range_start, range_end, end_reached_cb=nil)
|
986
|
-
if range_start == nil
|
987
|
-
result = XML.new(name, attrs)
|
988
|
-
else
|
989
|
-
result = nil
|
990
|
-
end
|
991
|
-
@contents.each {|c|
|
992
|
-
# end reached !
|
993
|
-
if range_end and c.object_id == range_end.object_id
|
994
|
-
end_reached_cb.call if end_reached_cb
|
995
|
-
break
|
996
|
-
end
|
997
|
-
# start reached !
|
998
|
-
if range_start and c.object_id == range_start.object_id
|
999
|
-
result = XML.new(name, attrs)
|
1000
|
-
next
|
1001
|
-
end
|
1002
|
-
if result # We already started
|
1003
|
-
if c.is_a? XML
|
1004
|
-
break_me = false
|
1005
|
-
result.add! c.range(nil, range_end, lambda{ break_me = true })
|
1006
|
-
if break_me
|
1007
|
-
end_reached_cb.call if end_reached_cb
|
1008
|
-
break
|
1009
|
-
end
|
1010
|
-
else # String/XML_PI/XML_Comment
|
1011
|
-
result.add! c
|
1012
|
-
end
|
1013
|
-
else
|
1014
|
-
# Strings/XML_PI/XML_Comment obviously cannot start a range
|
1015
|
-
if c.is_a? XML
|
1016
|
-
break_me = false
|
1017
|
-
r = c.range(range_start, range_end, lambda{ break_me = true })
|
1018
|
-
if r
|
1019
|
-
# start reached !
|
1020
|
-
result = XML.new(name, attrs, r)
|
1021
|
-
end
|
1022
|
-
if break_me
|
1023
|
-
# end reached !
|
1024
|
-
end_reached_cb.call if end_reached_cb
|
1025
|
-
break
|
1026
|
-
end
|
1027
|
-
end
|
1028
|
-
end
|
1029
|
-
}
|
1030
|
-
return result
|
1031
|
-
end
|
953
|
+
# Make monadic interface more "official"
|
954
|
+
# * node.exec! { foo!; bar! }
|
955
|
+
# is equivalent to
|
956
|
+
# * node << xml(:foo) << xml(:bar)
|
957
|
+
def exec!(&blk)
|
958
|
+
instance_eval(&blk)
|
959
|
+
end
|
1032
960
|
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
break
|
1055
|
-
end
|
1056
|
-
if range_start and range_start.object_id == c.object_id
|
1057
|
-
start_seen = true
|
1058
|
-
start_seen_cb.call if start_seen_cb
|
1059
|
-
next
|
1060
|
-
end
|
1061
|
-
if start_seen
|
1062
|
-
if c.is_a? XML
|
1063
|
-
break_me = false
|
1064
|
-
result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
|
1065
|
-
break if break_me
|
1066
|
-
else # String/XML_PI/XML_Comment
|
1067
|
-
result << c
|
1068
|
-
end
|
1069
|
-
else
|
1070
|
-
# String/XML_PI/XML_Comment cannot start a subsequence
|
1071
|
-
if c.is_a? XML
|
1072
|
-
break_me = false
|
1073
|
-
result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
|
1074
|
-
break if break_me
|
1075
|
-
end
|
1076
|
-
end
|
1077
|
-
}
|
1078
|
-
# Include starting tag if it was right from the range_start
|
1079
|
-
# Otherwise, return just the raw sequence
|
1080
|
-
result = [XML.new(@name, @attrs, result)] if range_start == nil
|
1081
|
-
return result
|
961
|
+
# Select a subtree
|
962
|
+
# NOTE: Uses object_id of the start/end tags !
|
963
|
+
# They have to be the same, not just identical !
|
964
|
+
# <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
|
965
|
+
# returns
|
966
|
+
# <foo><b/><c/></foo>
|
967
|
+
# start and end and their descendants are not included in
|
968
|
+
# the result tree.
|
969
|
+
# Either start or end can be nil.
|
970
|
+
# * If both start and end are nil, return whole tree.
|
971
|
+
# * If start is nil, return subtree up to range_end.
|
972
|
+
# * If start is not inside the tree, return nil.
|
973
|
+
# * If end is nil, return subtree from start
|
974
|
+
# * If end is not inside the tree, return subtree from start.
|
975
|
+
# * If end is before or below start, or they're the same node, the result is unspecified.
|
976
|
+
# * if end comes directly after start, or as first node when start==nil, return path reaching there.
|
977
|
+
def range(range_start, range_end, end_reached_cb=nil)
|
978
|
+
if range_start == nil
|
979
|
+
result = XML.new(name, attrs)
|
980
|
+
else
|
981
|
+
result = nil
|
1082
982
|
end
|
983
|
+
@contents.each {|c|
|
984
|
+
# end reached !
|
985
|
+
if range_end and c.object_id == range_end.object_id
|
986
|
+
end_reached_cb.call if end_reached_cb
|
987
|
+
break
|
988
|
+
end
|
989
|
+
# start reached !
|
990
|
+
if range_start and c.object_id == range_start.object_id
|
991
|
+
result = XML.new(name, attrs)
|
992
|
+
next
|
993
|
+
end
|
994
|
+
if result # We already started
|
995
|
+
if c.is_a? XML
|
996
|
+
break_me = false
|
997
|
+
result.add! c.range(nil, range_end, lambda{ break_me = true })
|
998
|
+
if break_me
|
999
|
+
end_reached_cb.call if end_reached_cb
|
1000
|
+
break
|
1001
|
+
end
|
1002
|
+
else # String/XML_PI/XML_Comment
|
1003
|
+
result.add! c
|
1004
|
+
end
|
1005
|
+
else
|
1006
|
+
# Strings/XML_PI/XML_Comment obviously cannot start a range
|
1007
|
+
if c.is_a? XML
|
1008
|
+
break_me = false
|
1009
|
+
r = c.range(range_start, range_end, lambda{ break_me = true })
|
1010
|
+
if r
|
1011
|
+
# start reached !
|
1012
|
+
result = XML.new(name, attrs, r)
|
1013
|
+
end
|
1014
|
+
if break_me
|
1015
|
+
# end reached !
|
1016
|
+
end_reached_cb.call if end_reached_cb
|
1017
|
+
break
|
1018
|
+
end
|
1019
|
+
end
|
1020
|
+
end
|
1021
|
+
}
|
1022
|
+
return result
|
1023
|
+
end
|
1083
1024
|
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1025
|
+
# XML#subsequence is similar to XML#range, but instead of
|
1026
|
+
# trimmed subtree in returns a list of elements
|
1027
|
+
# The same elements are included in both cases, but here
|
1028
|
+
# we do not include any parents !
|
1029
|
+
#
|
1030
|
+
# <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
|
1031
|
+
# <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
|
1032
|
+
#
|
1033
|
+
# <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
|
1034
|
+
# <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
|
1035
|
+
#
|
1036
|
+
# <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
|
1037
|
+
# <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
|
1038
|
+
#
|
1039
|
+
# And we return [], not nil if nothing matches
|
1040
|
+
def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
|
1041
|
+
result = []
|
1042
|
+
start_seen = range_start.nil?
|
1043
|
+
@contents.each{|c|
|
1044
|
+
if range_end and range_end.object_id == c.object_id
|
1045
|
+
end_seen_cb.call if end_seen_cb
|
1046
|
+
break
|
1047
|
+
end
|
1048
|
+
if range_start and range_start.object_id == c.object_id
|
1049
|
+
start_seen = true
|
1050
|
+
start_seen_cb.call if start_seen_cb
|
1051
|
+
next
|
1052
|
+
end
|
1053
|
+
if start_seen
|
1054
|
+
if c.is_a? XML
|
1055
|
+
break_me = false
|
1056
|
+
result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
|
1057
|
+
break if break_me
|
1058
|
+
else # String/XML_PI/XML_Comment
|
1059
|
+
result << c
|
1092
1060
|
end
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1061
|
+
else
|
1062
|
+
# String/XML_PI/XML_Comment cannot start a subsequence
|
1063
|
+
if c.is_a? XML
|
1064
|
+
break_me = false
|
1065
|
+
result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
|
1066
|
+
break if break_me
|
1067
|
+
end
|
1068
|
+
end
|
1069
|
+
}
|
1070
|
+
# Include starting tag if it was right from the range_start
|
1071
|
+
# Otherwise, return just the raw sequence
|
1072
|
+
result = [XML.new(@name, @attrs, result)] if range_start == nil
|
1073
|
+
return result
|
1074
|
+
end
|
1101
1075
|
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
c.gsub!(/\s+/, " ")
|
1111
|
-
elsif c.is_a? XML_PI or c.is_a? XML_Comment
|
1112
|
-
else
|
1113
|
-
c.real_remove_pretty_printing!(exceptions)
|
1114
|
-
end
|
1115
|
-
}
|
1076
|
+
# =~ for a few reasonable patterns
|
1077
|
+
def =~(pattern)
|
1078
|
+
if pattern.is_a? Symbol
|
1079
|
+
@name == pattern
|
1080
|
+
elsif pattern.is_a? Regexp
|
1081
|
+
text =~ pattern
|
1082
|
+
else # Hash, Pattern_any, Pattern_all
|
1083
|
+
pattern === self
|
1116
1084
|
end
|
1085
|
+
end
|
1117
1086
|
|
1118
|
-
|
1087
|
+
# Get rid of pretty-printing whitespace. Also normalizes the XML.
|
1088
|
+
def remove_pretty_printing!(exceptions=nil)
|
1089
|
+
normalize!
|
1090
|
+
real_remove_pretty_printing!(exceptions)
|
1091
|
+
normalize!
|
1092
|
+
end
|
1119
1093
|
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
}
|
1136
|
-
@contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
|
1137
|
-
end
|
1094
|
+
# normalize! is already recursive, so only one call at top level is needed.
|
1095
|
+
# This helper method lets us avoid extra calls to normalize!.
|
1096
|
+
def real_remove_pretty_printing!(exceptions=nil)
|
1097
|
+
return if exceptions and exceptions.include? @name
|
1098
|
+
each{|c|
|
1099
|
+
if c.is_a? String
|
1100
|
+
c.sub!(/^\s+/, "")
|
1101
|
+
c.sub!(/\s+$/, "")
|
1102
|
+
c.gsub!(/\s+/, " ")
|
1103
|
+
elsif c.is_a? XML_PI or c.is_a? XML_Comment
|
1104
|
+
else
|
1105
|
+
c.real_remove_pretty_printing!(exceptions)
|
1106
|
+
end
|
1107
|
+
}
|
1108
|
+
end
|
1138
1109
|
|
1139
|
-
|
1140
|
-
|
1141
|
-
alias_method :raw_dup, :dup
|
1142
|
-
# This is not a trivial method - first it does a *deep* copy,
|
1143
|
-
# second it takes a block which is instance_eval'ed,
|
1144
|
-
# so you can do things like:
|
1145
|
-
# * node.dup{ @name = :foo }
|
1146
|
-
# * node.dup{ self[:color] = "blue" }
|
1147
|
-
def dup(&blk)
|
1148
|
-
new_obj = self.raw_dup
|
1149
|
-
# Attr values stay shared - ugly
|
1150
|
-
new_obj.attrs = new_obj.attrs.dup
|
1151
|
-
new_obj.contents = new_obj.contents.map{|c| c.dup}
|
1152
|
-
|
1153
|
-
new_obj.instance_eval(&blk) if blk
|
1154
|
-
return new_obj
|
1155
|
-
end
|
1110
|
+
protected :real_remove_pretty_printing!
|
1156
1111
|
|
1112
|
+
# Add pretty-printing whitespace. Also normalizes the XML.
|
1113
|
+
def add_pretty_printing!
|
1114
|
+
normalize!
|
1115
|
+
real_add_pretty_printing!
|
1116
|
+
normalize!
|
1117
|
+
end
|
1157
1118
|
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1119
|
+
def real_add_pretty_printing!(indent = "")
|
1120
|
+
return if @contents.empty?
|
1121
|
+
each{|c|
|
1122
|
+
if c.is_a? XML
|
1123
|
+
c.real_add_pretty_printing!(indent+" ")
|
1124
|
+
elsif c.is_a? String
|
1125
|
+
c.gsub!(/\n\s*/, "\n#{indent} ")
|
1126
|
+
end
|
1127
|
+
}
|
1128
|
+
@contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
|
1129
|
+
end
|
1166
1130
|
|
1167
|
-
|
1168
|
-
|
1169
|
-
# Normalization means joining strings
|
1170
|
-
# and getting rid of ""s, recursively
|
1171
|
-
def normalize!
|
1172
|
-
new_contents = []
|
1173
|
-
@contents.each{|c|
|
1174
|
-
if c.is_a? String
|
1175
|
-
next if c == ""
|
1176
|
-
if new_contents[-1].is_a? String
|
1177
|
-
new_contents[-1] += c
|
1178
|
-
next
|
1179
|
-
end
|
1180
|
-
else
|
1181
|
-
c.normalize!
|
1182
|
-
end
|
1183
|
-
new_contents.push c
|
1184
|
-
}
|
1185
|
-
@contents = new_contents
|
1186
|
-
end
|
1131
|
+
protected :real_add_pretty_printing!
|
1187
1132
|
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
}
|
1200
|
-
res
|
1201
|
-
end
|
1133
|
+
alias_method :raw_dup, :dup
|
1134
|
+
# This is not a trivial method - first it does a *deep* copy,
|
1135
|
+
# second it takes a block which is instance_eval'ed,
|
1136
|
+
# so you can do things like:
|
1137
|
+
# * node.dup{ @name = :foo }
|
1138
|
+
# * node.dup{ self[:color] = "blue" }
|
1139
|
+
def dup(&blk)
|
1140
|
+
new_obj = self.raw_dup
|
1141
|
+
# Attr values stay shared - ugly
|
1142
|
+
new_obj.attrs = new_obj.attrs.dup
|
1143
|
+
new_obj.contents = new_obj.contents.map{|c| c.dup}
|
1202
1144
|
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
children(pat, *rest) {|c|
|
1207
|
-
return c
|
1208
|
-
}
|
1209
|
-
return nil
|
1210
|
-
end
|
1145
|
+
new_obj.instance_eval(&blk) if blk
|
1146
|
+
return new_obj
|
1147
|
+
end
|
1211
1148
|
|
1212
|
-
# Equivalent to node.descendants(pat, *rest)[0]
|
1213
|
-
# Returns nil if there aren't any matching descendants
|
1214
|
-
def descendant(pat=nil, *rest)
|
1215
|
-
descendants(pat, *rest) {|c|
|
1216
|
-
return c
|
1217
|
-
}
|
1218
|
-
return nil
|
1219
|
-
end
|
1220
1149
|
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1150
|
+
# Add some String children (all attributes get to_s'ed)
|
1151
|
+
def text!(*args)
|
1152
|
+
args.each{|s| self << s.to_s}
|
1153
|
+
end
|
1154
|
+
# Add XML child
|
1155
|
+
def xml!(*args, &blk)
|
1156
|
+
@contents << XML.new(*args, &blk)
|
1157
|
+
end
|
1158
|
+
|
1159
|
+
alias_method :add!, :<<
|
1160
|
+
|
1161
|
+
# Normalization means joining strings
|
1162
|
+
# and getting rid of ""s, recursively
|
1163
|
+
def normalize!
|
1164
|
+
new_contents = []
|
1165
|
+
@contents.each{|c|
|
1166
|
+
if c.is_a? String
|
1167
|
+
next if c == ""
|
1168
|
+
if new_contents[-1].is_a? String
|
1169
|
+
new_contents[-1] += c
|
1170
|
+
next
|
1171
|
+
end
|
1172
|
+
else
|
1173
|
+
c.normalize!
|
1174
|
+
end
|
1175
|
+
new_contents.push c
|
1176
|
+
}
|
1177
|
+
@contents = new_contents
|
1178
|
+
end
|
1179
|
+
|
1180
|
+
# Return text below the node, stripping all XML tags,
|
1181
|
+
# "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
|
1182
|
+
# returns "Hello, world!"
|
1183
|
+
def text
|
1184
|
+
res = ""
|
1185
|
+
@contents.each{|c|
|
1186
|
+
if c.is_a? XML
|
1187
|
+
res << c.text
|
1188
|
+
elsif c.is_a? String
|
1189
|
+
res << c
|
1190
|
+
end # Ignore XML_PI/XML_Comment
|
1191
|
+
}
|
1192
|
+
res
|
1193
|
+
end
|
1194
|
+
|
1195
|
+
# Equivalent to node.children(pat, *rest)[0]
|
1196
|
+
# Returns nil if there aren't any matching children
|
1197
|
+
def child(pat=nil, *rest)
|
1198
|
+
children(pat, *rest) {|c|
|
1199
|
+
return c
|
1200
|
+
}
|
1201
|
+
return nil
|
1202
|
+
end
|
1203
|
+
|
1204
|
+
# Equivalent to node.descendants(pat, *rest)[0]
|
1205
|
+
# Returns nil if there aren't any matching descendants
|
1206
|
+
def descendant(pat=nil, *rest)
|
1207
|
+
descendants(pat, *rest) {|c|
|
1208
|
+
return c
|
1209
|
+
}
|
1210
|
+
return nil
|
1211
|
+
end
|
1212
|
+
|
1213
|
+
# XML#children(pattern, more_patterns)
|
1214
|
+
# Return all children of a node with tags matching tag.
|
1215
|
+
# Also:
|
1216
|
+
# * children(:a, :b) == children(:a).children(:b)
|
1217
|
+
# * children(:a, :*, :c) == children(:a).descendants(:c)
|
1218
|
+
def children(pat=nil, *rest, &blk)
|
1219
|
+
return descendants(*rest, &blk) if pat == :*
|
1220
|
+
res = []
|
1221
|
+
@contents.each{|c|
|
1222
|
+
if pat.nil? or pat === c
|
1223
|
+
if rest == []
|
1224
|
+
res << c
|
1225
|
+
yield c if block_given?
|
1272
1226
|
else
|
1273
|
-
|
1274
|
-
each{|c|
|
1275
|
-
if c.is_a? XML
|
1276
|
-
r << c.deep_map(pat, &blk)
|
1277
|
-
else
|
1278
|
-
r << c
|
1279
|
-
end
|
1280
|
-
}
|
1281
|
-
r
|
1227
|
+
res += c.children(*rest, &blk)
|
1282
1228
|
end
|
1283
|
-
|
1229
|
+
end
|
1230
|
+
}
|
1231
|
+
res
|
1232
|
+
end
|
1284
1233
|
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1234
|
+
# * XML#descendants
|
1235
|
+
# * XML#descendants(pattern)
|
1236
|
+
# * XML#descendants(pattern, more_patterns)
|
1237
|
+
#
|
1238
|
+
# Return all descendants of a node matching the pattern.
|
1239
|
+
# If pattern==nil, simply return all descendants.
|
1240
|
+
# Optionally run a block on each of them if a block was given.
|
1241
|
+
# If pattern==nil, also match Strings !
|
1242
|
+
def descendants(pat=nil, *rest, &blk)
|
1243
|
+
res = []
|
1244
|
+
@contents.each{|c|
|
1245
|
+
if pat.nil? or pat === c
|
1246
|
+
if rest == []
|
1247
|
+
res << c
|
1248
|
+
yield c if block_given?
|
1249
|
+
else
|
1250
|
+
res += c.children(*rest, &blk)
|
1251
|
+
end
|
1252
|
+
end
|
1253
|
+
if c.is_a? XML
|
1254
|
+
res += c.descendants(pat, *rest, &blk)
|
1255
|
+
end
|
1256
|
+
}
|
1257
|
+
res
|
1258
|
+
end
|
1259
|
+
|
1260
|
+
# Change elements based on pattern
|
1261
|
+
def deep_map(pat, &blk)
|
1262
|
+
if self =~ pat
|
1263
|
+
yield self
|
1264
|
+
else
|
1265
|
+
r = XML.new(self.name, self.attrs)
|
1266
|
+
each{|c|
|
1267
|
+
if c.is_a? XML
|
1268
|
+
r << c.deep_map(pat, &blk)
|
1269
|
+
else
|
1270
|
+
r << c
|
1271
|
+
end
|
1272
|
+
}
|
1273
|
+
r
|
1297
1274
|
end
|
1275
|
+
end
|
1276
|
+
|
1277
|
+
# FIXME: do we want a shallow or a deep copy here ?
|
1278
|
+
# Map children, but leave the name/attributes
|
1279
|
+
def map(pat=nil)
|
1280
|
+
r = XML.new(self.name, self.attrs)
|
1281
|
+
each{|c|
|
1282
|
+
if !pat || (c.is_a?(XML) && c =~ pat)
|
1283
|
+
r << yield(c)
|
1284
|
+
else
|
1285
|
+
r << c
|
1286
|
+
end
|
1287
|
+
}
|
1288
|
+
r
|
1289
|
+
end
|
1298
1290
|
end
|
1299
1291
|
|
1300
1292
|
# FIXME: Is this even sane ?
|
1301
1293
|
# * What about escaping and all that stuff ?
|
1302
1294
|
# * Rest of the code assumes that everything is either XML or String
|
1303
1295
|
class XML_PI
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1296
|
+
def initialize(c, t)
|
1297
|
+
@c = c
|
1298
|
+
@t = t
|
1299
|
+
end
|
1300
|
+
def to_s
|
1301
|
+
"<?#{@c}#{@t}?>"
|
1302
|
+
end
|
1311
1303
|
end
|
1312
1304
|
|
1313
1305
|
# FIXME: Is this even sane ?
|
@@ -1315,25 +1307,25 @@ end
|
|
1315
1307
|
# * Rest of the code assumes that everything is either XML or String
|
1316
1308
|
# * There are some limitations on where one can put -s in the comment. Do not overdo.
|
1317
1309
|
class XML_Comment
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
|
1322
|
-
|
1323
|
-
|
1310
|
+
def initialize(c)
|
1311
|
+
@c = c
|
1312
|
+
end
|
1313
|
+
def to_s
|
1314
|
+
"<!--#{@c}-->"
|
1315
|
+
end
|
1324
1316
|
end
|
1325
1317
|
|
1326
1318
|
# Syntactic sugar for XML.new
|
1327
1319
|
def xml(*args, &blk)
|
1328
|
-
|
1320
|
+
XML.new(*args, &blk)
|
1329
1321
|
end
|
1330
1322
|
|
1331
1323
|
# xml! in XML { ... } - context adds node to parent
|
1332
1324
|
# xml! in main context prints the argument (and returns it anyway)
|
1333
1325
|
def xml!(*args, &blk)
|
1334
|
-
|
1335
|
-
|
1336
|
-
|
1326
|
+
node = xml(*args, &blk)
|
1327
|
+
print node
|
1328
|
+
node
|
1337
1329
|
end
|
1338
1330
|
|
1339
1331
|
# Perl 6 is supposed to have native support for something like that.
|
@@ -1341,20 +1333,19 @@ end
|
|
1341
1333
|
#
|
1342
1334
|
# Usage:
|
1343
1335
|
# case foo
|
1344
|
-
# when
|
1336
|
+
# when All[:foo, {:color => 'blue'}, /Hello/]
|
1345
1337
|
# print foo
|
1346
1338
|
# end
|
1347
|
-
class
|
1348
|
-
|
1349
|
-
|
1350
|
-
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1354
|
-
|
1355
|
-
|
1356
|
-
|
1357
|
-
Patterns_all.new(*patterns)
|
1339
|
+
class All
|
1340
|
+
def initialize(*patterns)
|
1341
|
+
@patterns = patterns
|
1342
|
+
end
|
1343
|
+
def ===(obj)
|
1344
|
+
@patterns.all?{|p| p === obj}
|
1345
|
+
end
|
1346
|
+
def self.[](*patterns)
|
1347
|
+
new(*patterns)
|
1348
|
+
end
|
1358
1349
|
end
|
1359
1350
|
|
1360
1351
|
# Perl 6 is supposed to have native support for something like that.
|
@@ -1362,18 +1353,17 @@ end
|
|
1362
1353
|
#
|
1363
1354
|
# Usage:
|
1364
1355
|
# case foo
|
1365
|
-
# when
|
1356
|
+
# when All[:foo, Any[{:color => 'blue'}, {:color => 'red'}], /Hello/]
|
1366
1357
|
# print foo
|
1367
1358
|
# end
|
1368
|
-
class
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1373
|
-
|
1374
|
-
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1378
|
-
Patterns_any.new(*patterns)
|
1359
|
+
class Any
|
1360
|
+
def initialize(*patterns)
|
1361
|
+
@patterns = patterns
|
1362
|
+
end
|
1363
|
+
def ===(obj)
|
1364
|
+
@patterns.any?{|p| p === obj}
|
1365
|
+
end
|
1366
|
+
def self.[](*patterns)
|
1367
|
+
new(*patterns)
|
1368
|
+
end
|
1379
1369
|
end
|