magic-xml 0.2013.04.14 → 0.2016.05.07

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +7 -7
  2. data/lib/magic_xml.rb +1218 -1228
  3. metadata +50 -33
  4. data/test.xml +0 -1
  5. data/tests.rb +0 -836
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
- ---
2
- SHA512:
3
- metadata.gz: 7bd51bf68b4fbc33e90776c1ed25a241940533549758d98110635611ad6e86b0110d57d0b5fd80423d7e8c62bee96c113e80d9c951f6f3d0e79a40d8b2a6ba8f
4
- data.tar.gz: ccf0ccd42e5ed6c43be62935965d02ffcfcf106946205b5f2f069971da9fdf49bbddce2663d38dc38cfc6d71bebb2ec21c5679c44c2de7fa32c8a66544eef668
5
- SHA1:
6
- metadata.gz: 3a3f3eda28edb3476dbdb92e2e7b3387290e6df6
7
- data.tar.gz: 75594336326d79a1b8465717f411141dfdae8e96
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0436ef9cb1270e202381dec66b3fa5f895ac5b6f
4
+ data.tar.gz: 211561626d9196c738ee1fc5c6103101c9521082
5
+ SHA512:
6
+ metadata.gz: f0f4df9bebae54afcc59f60658f2d9914ef71d2326344dbd5c98a7b0ad8f5269966e38cf8dadef229c26c412a6f53674bfd5a307f762f25bae7efcace992a9ef
7
+ data.tar.gz: acf49469ff1336c11624a0a6ce2a12f1b39f9414109965064fdcd62a2115f8ec0d84e12a8b3b2b107f3a9912ebba1235ef824a4da1dcbd6091838ea730e63f4e
data/lib/magic_xml.rb CHANGED
@@ -6,359 +6,351 @@ require 'net/http'
6
6
 
7
7
  # FIXME: Make comment formatting RDoc-friendly. It's not always so now.
8
8
 
9
- # In Ruby 2 Symbol will be a subclass of String, and
10
- # this won't be needed any more. Before then...
11
9
  class Symbol
12
- include Comparable
13
- def <=>(other)
14
- raise ArgumentError.new("comparison of #{self.class} with #{other.class} failed") unless other.is_a? Symbol
15
- to_s <=> other.to_s
16
- end
17
-
18
- alias_method :eqeqeq_before_magic_xml, :===
19
- def ===(*args, &blk)
20
- if args.size >= 1 and args[0].is_a? XML
21
- self == args[0].name
22
- else
23
- eqeqeq_before_magic_xml(*args, &blk)
24
- end
10
+ alias_method :eqeqeq_before_magic_xml, :===
11
+ def ===(*args, &blk)
12
+ if args.size >= 1 and args[0].is_a? XML
13
+ self == args[0].name
14
+ else
15
+ eqeqeq_before_magic_xml(*args, &blk)
25
16
  end
17
+ end
26
18
  end
27
19
 
28
20
  class Hash
29
- alias_method :eqeqeq_before_magic_xml, :===
30
- def ===(*args, &blk)
31
- if args.size >= 1 and args[0].is_a? XML
32
- all?{|k,v| v === args[0][k]}
33
- else
34
- eqeqeq_before_magic_xml(*args, &blk)
35
- end
21
+ alias_method :eqeqeq_before_magic_xml, :===
22
+ def ===(*args, &blk)
23
+ if args.size >= 1 and args[0].is_a? XML
24
+ all?{|k,v| v === args[0][k]}
25
+ else
26
+ eqeqeq_before_magic_xml(*args, &blk)
36
27
  end
28
+ end
37
29
  end
38
30
 
39
31
  class String
40
- # Escape string for output as XML text (< > &)
41
- def xml_escape
42
- replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;" }
43
- gsub(/([<>&])/) { replacements[$1] }
44
- end
45
- # Escape characters for output as XML attribute values (< > & ' ")
46
- def xml_attr_escape
47
- replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;", "\"" => "&quot;", "'" => "&apos;"}
48
- gsub(/([<>&\'\"])/) { replacements[$1] }
49
- end
50
- # Unescape entities
51
- # Supports:
52
- # * Full set of HTML-compatible named entities
53
- # * Decimal entities &#1234;
54
- # * Hex entities &#xA0b1;
55
- def xml_unescape(extra_entities=nil)
56
- @@xhtml_entity_replacements ||= {
57
- 'nbsp' => 160,
58
- 'iexcl' => 161,
59
- 'cent' => 162,
60
- 'pound' => 163,
61
- 'curren' => 164,
62
- 'yen' => 165,
63
- 'brvbar' => 166,
64
- 'sect' => 167,
65
- 'uml' => 168,
66
- 'copy' => 169,
67
- 'ordf' => 170,
68
- 'laquo' => 171,
69
- 'not' => 172,
70
- 'shy' => 173,
71
- 'reg' => 174,
72
- 'macr' => 175,
73
- 'deg' => 176,
74
- 'plusmn' => 177,
75
- 'sup2' => 178,
76
- 'sup3' => 179,
77
- 'acute' => 180,
78
- 'micro' => 181,
79
- 'para' => 182,
80
- 'middot' => 183,
81
- 'cedil' => 184,
82
- 'sup1' => 185,
83
- 'ordm' => 186,
84
- 'raquo' => 187,
85
- 'frac14' => 188,
86
- 'frac12' => 189,
87
- 'frac34' => 190,
88
- 'iquest' => 191,
89
- 'Agrave' => 192,
90
- 'Aacute' => 193,
91
- 'Acirc' => 194,
92
- 'Atilde' => 195,
93
- 'Auml' => 196,
94
- 'Aring' => 197,
95
- 'AElig' => 198,
96
- 'Ccedil' => 199,
97
- 'Egrave' => 200,
98
- 'Eacute' => 201,
99
- 'Ecirc' => 202,
100
- 'Euml' => 203,
101
- 'Igrave' => 204,
102
- 'Iacute' => 205,
103
- 'Icirc' => 206,
104
- 'Iuml' => 207,
105
- 'ETH' => 208,
106
- 'Ntilde' => 209,
107
- 'Ograve' => 210,
108
- 'Oacute' => 211,
109
- 'Ocirc' => 212,
110
- 'Otilde' => 213,
111
- 'Ouml' => 214,
112
- 'times' => 215,
113
- 'Oslash' => 216,
114
- 'Ugrave' => 217,
115
- 'Uacute' => 218,
116
- 'Ucirc' => 219,
117
- 'Uuml' => 220,
118
- 'Yacute' => 221,
119
- 'THORN' => 222,
120
- 'szlig' => 223,
121
- 'agrave' => 224,
122
- 'aacute' => 225,
123
- 'acirc' => 226,
124
- 'atilde' => 227,
125
- 'auml' => 228,
126
- 'aring' => 229,
127
- 'aelig' => 230,
128
- 'ccedil' => 231,
129
- 'egrave' => 232,
130
- 'eacute' => 233,
131
- 'ecirc' => 234,
132
- 'euml' => 235,
133
- 'igrave' => 236,
134
- 'iacute' => 237,
135
- 'icirc' => 238,
136
- 'iuml' => 239,
137
- 'eth' => 240,
138
- 'ntilde' => 241,
139
- 'ograve' => 242,
140
- 'oacute' => 243,
141
- 'ocirc' => 244,
142
- 'otilde' => 245,
143
- 'ouml' => 246,
144
- 'divide' => 247,
145
- 'oslash' => 248,
146
- 'ugrave' => 249,
147
- 'uacute' => 250,
148
- 'ucirc' => 251,
149
- 'uuml' => 252,
150
- 'yacute' => 253,
151
- 'thorn' => 254,
152
- 'yuml' => 255,
153
- 'quot' => 34,
154
- 'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
155
- 'amp' => 38,
156
- 'lt' => 60,
157
- 'gt' => 62,
158
- 'OElig' => 338,
159
- 'oelig' => 339,
160
- 'Scaron' => 352,
161
- 'scaron' => 353,
162
- 'Yuml' => 376,
163
- 'circ' => 710,
164
- 'tilde' => 732,
165
- 'ensp' => 8194,
166
- 'emsp' => 8195,
167
- 'thinsp' => 8201,
168
- 'zwnj' => 8204,
169
- 'zwj' => 8205,
170
- 'lrm' => 8206,
171
- 'rlm' => 8207,
172
- 'ndash' => 8211,
173
- 'mdash' => 8212,
174
- 'lsquo' => 8216,
175
- 'rsquo' => 8217,
176
- 'sbquo' => 8218,
177
- 'ldquo' => 8220,
178
- 'rdquo' => 8221,
179
- 'bdquo' => 8222,
180
- 'dagger' => 8224,
181
- 'Dagger' => 8225,
182
- 'permil' => 8240,
183
- 'lsaquo' => 8249,
184
- 'rsaquo' => 8250,
185
- 'euro' => 8364,
186
- 'fnof' => 402,
187
- 'Alpha' => 913,
188
- 'Beta' => 914,
189
- 'Gamma' => 915,
190
- 'Delta' => 916,
191
- 'Epsilon' => 917,
192
- 'Zeta' => 918,
193
- 'Eta' => 919,
194
- 'Theta' => 920,
195
- 'Iota' => 921,
196
- 'Kappa' => 922,
197
- 'Lambda' => 923,
198
- 'Mu' => 924,
199
- 'Nu' => 925,
200
- 'Xi' => 926,
201
- 'Omicron' => 927,
202
- 'Pi' => 928,
203
- 'Rho' => 929,
204
- 'Sigma' => 931,
205
- 'Tau' => 932,
206
- 'Upsilon' => 933,
207
- 'Phi' => 934,
208
- 'Chi' => 935,
209
- 'Psi' => 936,
210
- 'Omega' => 937,
211
- 'alpha' => 945,
212
- 'beta' => 946,
213
- 'gamma' => 947,
214
- 'delta' => 948,
215
- 'epsilon' => 949,
216
- 'zeta' => 950,
217
- 'eta' => 951,
218
- 'theta' => 952,
219
- 'iota' => 953,
220
- 'kappa' => 954,
221
- 'lambda' => 955,
222
- 'mu' => 956,
223
- 'nu' => 957,
224
- 'xi' => 958,
225
- 'omicron' => 959,
226
- 'pi' => 960,
227
- 'rho' => 961,
228
- 'sigmaf' => 962,
229
- 'sigma' => 963,
230
- 'tau' => 964,
231
- 'upsilon' => 965,
232
- 'phi' => 966,
233
- 'chi' => 967,
234
- 'psi' => 968,
235
- 'omega' => 969,
236
- 'thetasym' => 977,
237
- 'upsih' => 978,
238
- 'piv' => 982,
239
- 'bull' => 8226,
240
- 'hellip' => 8230,
241
- 'prime' => 8242,
242
- 'Prime' => 8243,
243
- 'oline' => 8254,
244
- 'frasl' => 8260,
245
- 'weierp' => 8472,
246
- 'image' => 8465,
247
- 'real' => 8476,
248
- 'trade' => 8482,
249
- 'alefsym' => 8501,
250
- 'larr' => 8592,
251
- 'uarr' => 8593,
252
- 'rarr' => 8594,
253
- 'darr' => 8595,
254
- 'harr' => 8596,
255
- 'crarr' => 8629,
256
- 'lArr' => 8656,
257
- 'uArr' => 8657,
258
- 'rArr' => 8658,
259
- 'dArr' => 8659,
260
- 'hArr' => 8660,
261
- 'forall' => 8704,
262
- 'part' => 8706,
263
- 'exist' => 8707,
264
- 'empty' => 8709,
265
- 'nabla' => 8711,
266
- 'isin' => 8712,
267
- 'notin' => 8713,
268
- 'ni' => 8715,
269
- 'prod' => 8719,
270
- 'sum' => 8721,
271
- 'minus' => 8722,
272
- 'lowast' => 8727,
273
- 'radic' => 8730,
274
- 'prop' => 8733,
275
- 'infin' => 8734,
276
- 'ang' => 8736,
277
- 'and' => 8743,
278
- 'or' => 8744,
279
- 'cap' => 8745,
280
- 'cup' => 8746,
281
- 'int' => 8747,
282
- 'there4' => 8756,
283
- 'sim' => 8764,
284
- 'cong' => 8773,
285
- 'asymp' => 8776,
286
- 'ne' => 8800,
287
- 'equiv' => 8801,
288
- 'le' => 8804,
289
- 'ge' => 8805,
290
- 'sub' => 8834,
291
- 'sup' => 8835,
292
- 'nsub' => 8836,
293
- 'sube' => 8838,
294
- 'supe' => 8839,
295
- 'oplus' => 8853,
296
- 'otimes' => 8855,
297
- 'perp' => 8869,
298
- 'sdot' => 8901,
299
- 'lceil' => 8968,
300
- 'rceil' => 8969,
301
- 'lfloor' => 8970,
302
- 'rfloor' => 8971,
303
- 'lang' => 9001,
304
- 'rang' => 9002,
305
- 'loz' => 9674,
306
- 'spades' => 9824,
307
- 'clubs' => 9827,
308
- 'hearts' => 9829,
309
- 'diams' => 9830,
310
- }
311
- gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
312
- if $1 then
313
- v = @@xhtml_entity_replacements[$1]
314
- # Nonstandard entity
315
- unless v
316
- if extra_entities.is_a? Proc
317
- v = extra_entities.call($1)
318
- # Well, we expect a Hash here, but any container will do.
319
- # As long as it's not a nil.
320
- elsif extra_entities
321
- v = extra_entities[$1]
322
- end
323
- end
324
- raise "Unknown escape #{$1}" unless v
325
- elsif $2
326
- v = $2.to_i
327
- else
328
- v = $3.hex
329
- end
330
- # v can be a String or an Integer
331
- if v.is_a? String then v else [v].pack('U') end
332
- }
333
- end
334
- def xml_parse
335
- XML.parse(self)
336
- end
32
+ # Escape string for output as XML text (< > &)
33
+ def xml_escape
34
+ replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;" }
35
+ gsub(/([<>&])/) { replacements[$1] }
36
+ end
37
+ # Escape characters for output as XML attribute values (< > & ' ")
38
+ def xml_attr_escape
39
+ replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;", "\"" => "&quot;", "'" => "&apos;"}
40
+ gsub(/([<>&\'\"])/) { replacements[$1] }
41
+ end
42
+ # Unescape entities
43
+ # Supports:
44
+ # * Full set of HTML-compatible named entities
45
+ # * Decimal entities &#1234;
46
+ # * Hex entities &#xA0b1;
47
+ def xml_unescape(extra_entities=nil)
48
+ @@xhtml_entity_replacements ||= {
49
+ 'nbsp' => 160,
50
+ 'iexcl' => 161,
51
+ 'cent' => 162,
52
+ 'pound' => 163,
53
+ 'curren' => 164,
54
+ 'yen' => 165,
55
+ 'brvbar' => 166,
56
+ 'sect' => 167,
57
+ 'uml' => 168,
58
+ 'copy' => 169,
59
+ 'ordf' => 170,
60
+ 'laquo' => 171,
61
+ 'not' => 172,
62
+ 'shy' => 173,
63
+ 'reg' => 174,
64
+ 'macr' => 175,
65
+ 'deg' => 176,
66
+ 'plusmn' => 177,
67
+ 'sup2' => 178,
68
+ 'sup3' => 179,
69
+ 'acute' => 180,
70
+ 'micro' => 181,
71
+ 'para' => 182,
72
+ 'middot' => 183,
73
+ 'cedil' => 184,
74
+ 'sup1' => 185,
75
+ 'ordm' => 186,
76
+ 'raquo' => 187,
77
+ 'frac14' => 188,
78
+ 'frac12' => 189,
79
+ 'frac34' => 190,
80
+ 'iquest' => 191,
81
+ 'Agrave' => 192,
82
+ 'Aacute' => 193,
83
+ 'Acirc' => 194,
84
+ 'Atilde' => 195,
85
+ 'Auml' => 196,
86
+ 'Aring' => 197,
87
+ 'AElig' => 198,
88
+ 'Ccedil' => 199,
89
+ 'Egrave' => 200,
90
+ 'Eacute' => 201,
91
+ 'Ecirc' => 202,
92
+ 'Euml' => 203,
93
+ 'Igrave' => 204,
94
+ 'Iacute' => 205,
95
+ 'Icirc' => 206,
96
+ 'Iuml' => 207,
97
+ 'ETH' => 208,
98
+ 'Ntilde' => 209,
99
+ 'Ograve' => 210,
100
+ 'Oacute' => 211,
101
+ 'Ocirc' => 212,
102
+ 'Otilde' => 213,
103
+ 'Ouml' => 214,
104
+ 'times' => 215,
105
+ 'Oslash' => 216,
106
+ 'Ugrave' => 217,
107
+ 'Uacute' => 218,
108
+ 'Ucirc' => 219,
109
+ 'Uuml' => 220,
110
+ 'Yacute' => 221,
111
+ 'THORN' => 222,
112
+ 'szlig' => 223,
113
+ 'agrave' => 224,
114
+ 'aacute' => 225,
115
+ 'acirc' => 226,
116
+ 'atilde' => 227,
117
+ 'auml' => 228,
118
+ 'aring' => 229,
119
+ 'aelig' => 230,
120
+ 'ccedil' => 231,
121
+ 'egrave' => 232,
122
+ 'eacute' => 233,
123
+ 'ecirc' => 234,
124
+ 'euml' => 235,
125
+ 'igrave' => 236,
126
+ 'iacute' => 237,
127
+ 'icirc' => 238,
128
+ 'iuml' => 239,
129
+ 'eth' => 240,
130
+ 'ntilde' => 241,
131
+ 'ograve' => 242,
132
+ 'oacute' => 243,
133
+ 'ocirc' => 244,
134
+ 'otilde' => 245,
135
+ 'ouml' => 246,
136
+ 'divide' => 247,
137
+ 'oslash' => 248,
138
+ 'ugrave' => 249,
139
+ 'uacute' => 250,
140
+ 'ucirc' => 251,
141
+ 'uuml' => 252,
142
+ 'yacute' => 253,
143
+ 'thorn' => 254,
144
+ 'yuml' => 255,
145
+ 'quot' => 34,
146
+ 'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
147
+ 'amp' => 38,
148
+ 'lt' => 60,
149
+ 'gt' => 62,
150
+ 'OElig' => 338,
151
+ 'oelig' => 339,
152
+ 'Scaron' => 352,
153
+ 'scaron' => 353,
154
+ 'Yuml' => 376,
155
+ 'circ' => 710,
156
+ 'tilde' => 732,
157
+ 'ensp' => 8194,
158
+ 'emsp' => 8195,
159
+ 'thinsp' => 8201,
160
+ 'zwnj' => 8204,
161
+ 'zwj' => 8205,
162
+ 'lrm' => 8206,
163
+ 'rlm' => 8207,
164
+ 'ndash' => 8211,
165
+ 'mdash' => 8212,
166
+ 'lsquo' => 8216,
167
+ 'rsquo' => 8217,
168
+ 'sbquo' => 8218,
169
+ 'ldquo' => 8220,
170
+ 'rdquo' => 8221,
171
+ 'bdquo' => 8222,
172
+ 'dagger' => 8224,
173
+ 'Dagger' => 8225,
174
+ 'permil' => 8240,
175
+ 'lsaquo' => 8249,
176
+ 'rsaquo' => 8250,
177
+ 'euro' => 8364,
178
+ 'fnof' => 402,
179
+ 'Alpha' => 913,
180
+ 'Beta' => 914,
181
+ 'Gamma' => 915,
182
+ 'Delta' => 916,
183
+ 'Epsilon' => 917,
184
+ 'Zeta' => 918,
185
+ 'Eta' => 919,
186
+ 'Theta' => 920,
187
+ 'Iota' => 921,
188
+ 'Kappa' => 922,
189
+ 'Lambda' => 923,
190
+ 'Mu' => 924,
191
+ 'Nu' => 925,
192
+ 'Xi' => 926,
193
+ 'Omicron' => 927,
194
+ 'Pi' => 928,
195
+ 'Rho' => 929,
196
+ 'Sigma' => 931,
197
+ 'Tau' => 932,
198
+ 'Upsilon' => 933,
199
+ 'Phi' => 934,
200
+ 'Chi' => 935,
201
+ 'Psi' => 936,
202
+ 'Omega' => 937,
203
+ 'alpha' => 945,
204
+ 'beta' => 946,
205
+ 'gamma' => 947,
206
+ 'delta' => 948,
207
+ 'epsilon' => 949,
208
+ 'zeta' => 950,
209
+ 'eta' => 951,
210
+ 'theta' => 952,
211
+ 'iota' => 953,
212
+ 'kappa' => 954,
213
+ 'lambda' => 955,
214
+ 'mu' => 956,
215
+ 'nu' => 957,
216
+ 'xi' => 958,
217
+ 'omicron' => 959,
218
+ 'pi' => 960,
219
+ 'rho' => 961,
220
+ 'sigmaf' => 962,
221
+ 'sigma' => 963,
222
+ 'tau' => 964,
223
+ 'upsilon' => 965,
224
+ 'phi' => 966,
225
+ 'chi' => 967,
226
+ 'psi' => 968,
227
+ 'omega' => 969,
228
+ 'thetasym' => 977,
229
+ 'upsih' => 978,
230
+ 'piv' => 982,
231
+ 'bull' => 8226,
232
+ 'hellip' => 8230,
233
+ 'prime' => 8242,
234
+ 'Prime' => 8243,
235
+ 'oline' => 8254,
236
+ 'frasl' => 8260,
237
+ 'weierp' => 8472,
238
+ 'image' => 8465,
239
+ 'real' => 8476,
240
+ 'trade' => 8482,
241
+ 'alefsym' => 8501,
242
+ 'larr' => 8592,
243
+ 'uarr' => 8593,
244
+ 'rarr' => 8594,
245
+ 'darr' => 8595,
246
+ 'harr' => 8596,
247
+ 'crarr' => 8629,
248
+ 'lArr' => 8656,
249
+ 'uArr' => 8657,
250
+ 'rArr' => 8658,
251
+ 'dArr' => 8659,
252
+ 'hArr' => 8660,
253
+ 'forall' => 8704,
254
+ 'part' => 8706,
255
+ 'exist' => 8707,
256
+ 'empty' => 8709,
257
+ 'nabla' => 8711,
258
+ 'isin' => 8712,
259
+ 'notin' => 8713,
260
+ 'ni' => 8715,
261
+ 'prod' => 8719,
262
+ 'sum' => 8721,
263
+ 'minus' => 8722,
264
+ 'lowast' => 8727,
265
+ 'radic' => 8730,
266
+ 'prop' => 8733,
267
+ 'infin' => 8734,
268
+ 'ang' => 8736,
269
+ 'and' => 8743,
270
+ 'or' => 8744,
271
+ 'cap' => 8745,
272
+ 'cup' => 8746,
273
+ 'int' => 8747,
274
+ 'there4' => 8756,
275
+ 'sim' => 8764,
276
+ 'cong' => 8773,
277
+ 'asymp' => 8776,
278
+ 'ne' => 8800,
279
+ 'equiv' => 8801,
280
+ 'le' => 8804,
281
+ 'ge' => 8805,
282
+ 'sub' => 8834,
283
+ 'sup' => 8835,
284
+ 'nsub' => 8836,
285
+ 'sube' => 8838,
286
+ 'supe' => 8839,
287
+ 'oplus' => 8853,
288
+ 'otimes' => 8855,
289
+ 'perp' => 8869,
290
+ 'sdot' => 8901,
291
+ 'lceil' => 8968,
292
+ 'rceil' => 8969,
293
+ 'lfloor' => 8970,
294
+ 'rfloor' => 8971,
295
+ 'lang' => 9001,
296
+ 'rang' => 9002,
297
+ 'loz' => 9674,
298
+ 'spades' => 9824,
299
+ 'clubs' => 9827,
300
+ 'hearts' => 9829,
301
+ 'diams' => 9830,
302
+ }
303
+ gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
304
+ if $1 then
305
+ v = @@xhtml_entity_replacements[$1]
306
+ # Nonstandard entity
307
+ unless v
308
+ if extra_entities.is_a? Proc
309
+ v = extra_entities.call($1)
310
+ # Well, we expect a Hash here, but any container will do.
311
+ # As long as it's not a nil.
312
+ elsif extra_entities
313
+ v = extra_entities[$1]
314
+ end
315
+ end
316
+ raise "Unknown escape #{$1}" unless v
317
+ elsif $2
318
+ v = $2.to_i
319
+ else
320
+ v = $3.hex
321
+ end
322
+ # v can be a String or an Integer
323
+ if v.is_a? String then v else [v].pack('U') end
324
+ }
325
+ end
326
+ def xml_parse
327
+ XML.parse(self)
328
+ end
337
329
  end
338
330
 
339
331
  class File
340
- def xml_parse
341
- XML.parse(self)
342
- end
332
+ def xml_parse
333
+ XML.parse(self)
334
+ end
343
335
  end
344
336
 
345
337
  class Array
346
- # children of any element
347
- def children(*args, &blk)
348
- res = []
349
- each{|c|
350
- res += c.children(*args, &blk) if c.is_a? XML
351
- }
352
- res
353
- end
354
- # descendants of any element
355
- def descendants(*args, &blk)
356
- res = []
357
- each{|c|
358
- res += c.descendants(*args, &blk) if c.is_a? XML
359
- }
360
- res
361
- end
338
+ # children of any element
339
+ def children(*args, &blk)
340
+ res = []
341
+ each{|c|
342
+ res += c.children(*args, &blk) if c.is_a? XML
343
+ }
344
+ res
345
+ end
346
+ # descendants of any element
347
+ def descendants(*args, &blk)
348
+ res = []
349
+ each{|c|
350
+ res += c.descendants(*args, &blk) if c.is_a? XML
351
+ }
352
+ res
353
+ end
362
354
  end
363
355
 
364
356
  # Methods of Enumerable.
@@ -372,942 +364,942 @@ end
372
364
  #
373
365
  # FIXME: Many methods use .dup, but do we want a shallow or a deep copy ?
374
366
  class XML
375
- include Enumerable
376
- # Default any? is ok
377
- # Default all? is ok
378
-
379
- # Iterate over children, possibly with a selector
380
- def each(*selector, &blk)
381
- children(*selector, &blk)
382
- self
383
- end
367
+ include Enumerable
368
+ # Default any? is ok
369
+ # Default all? is ok
384
370
 
385
- # Sort XML children of XML element.
386
- def sort_by(*args, &blk)
387
- self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
388
- end
371
+ # Iterate over children, possibly with a selector
372
+ def each(*selector, &blk)
373
+ children(*selector, &blk)
374
+ self
375
+ end
389
376
 
390
- # Sort children of XML element.
391
- def children_sort_by(*args, &blk)
392
- self.dup{ @contents = @contents.sort_by(*args, &blk) }
393
- end
377
+ # Sort XML children of XML element.
378
+ def sort_by(*args, &blk)
379
+ self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
380
+ end
394
381
 
395
- # Sort children of XML element.
396
- #
397
- # Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
398
- # Use sort_by instead.
399
- #
400
- # Unless you define your own XML#<=> operator, or do something equally weird.
401
- def sort(*args, &blk)
402
- self.dup{ @contents = @contents.sort(*args, &blk) }
403
- end
382
+ # Sort children of XML element.
383
+ def children_sort_by(*args, &blk)
384
+ self.dup{ @contents = @contents.sort_by(*args, &blk) }
385
+ end
386
+
387
+ # Sort children of XML element.
388
+ #
389
+ # Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
390
+ # Use sort_by instead.
391
+ #
392
+ # Unless you define your own XML#<=> operator, or do something equally weird.
393
+ def sort(*args, &blk)
394
+ self.dup{ @contents = @contents.sort(*args, &blk) }
395
+ end
404
396
 
405
- #collect/map
406
- #detect/find
407
- #each_cons
408
- #each_slice
409
- #each_with_index
410
- #to_a
411
- #entries
412
- #enum_cons
413
- #enum_slice
414
- #enum
415
- # grep
416
- # include?/member?
417
- # inject
418
- # max/min
419
- # max_by/min_by - Ruby 1.9
420
- # partition
421
- # reject
422
- # sort
423
- # sort_by
424
- # to_set
425
- # zip
426
- # And Enumerable::Enumerator-generating methods
397
+ #collect/map
398
+ #detect/find
399
+ #each_cons
400
+ #each_slice
401
+ #each_with_index
402
+ #to_a
403
+ #entries
404
+ #enum_cons
405
+ #enum_slice
406
+ #enum
407
+ # grep
408
+ # include?/member?
409
+ # inject
410
+ # max/min
411
+ # max_by/min_by - Ruby 1.9
412
+ # partition
413
+ # reject
414
+ # sort
415
+ # sort_by
416
+ # to_set
417
+ # zip
418
+ # And Enumerable::Enumerator-generating methods
427
419
  end
428
420
 
429
421
  # Class methods
430
422
  class XML
431
- # XML.foo! == xml!(:foo)
432
- # XML.foo == xml(:foo)
433
- def self.method_missing(meth, *args, &blk)
434
- if meth.to_s =~ /^(.*)!$/
435
- xml!($1.to_sym, *args, &blk)
436
- else
437
- XML.new(meth, *args, &blk)
438
- end
423
+ # XML.foo! == xml!(:foo)
424
+ # XML.foo == xml(:foo)
425
+ def self.method_missing(meth, *args, &blk)
426
+ if meth.to_s =~ /^(.*)!$/
427
+ xml!($1.to_sym, *args, &blk)
428
+ else
429
+ XML.new(meth, *args, &blk)
439
430
  end
431
+ end
440
432
 
441
- # Read file and parse
442
- def self.from_file(file)
443
- file = File.open(file) if file.is_a? String
444
- parse(file)
433
+ # Read file and parse
434
+ def self.from_file(file)
435
+ file = File.open(file) if file.is_a? String
436
+ parse(file)
437
+ end
438
+
439
+ # Fetch URL and parse
440
+ # Supported:
441
+ # http://.../
442
+ # https://.../
443
+ # file:foo.xml
444
+ # string:<foo/>
445
+ def self.from_url(url)
446
+ if url =~ /^string:(.*)$/m
447
+ parse($1)
448
+ elsif url =~ /^file:(.*)$/m
449
+ from_file($1)
450
+ elsif url =~ /^http(s?):/
451
+ ssl = ($1 == "s")
452
+ # No, seriously - Ruby needs something better than net/http
453
+ # Something that groks basic auth and queries and redirects automatically:
454
+ # HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
455
+ # URI parsing must go inside the library, client programs
456
+ # should have nothing to do with it
457
+
458
+ # net/http is really inconvenient to use here
459
+ u = URI.parse(url)
460
+ # You're not seeing this:
461
+ if u.query then
462
+ path = u.path + "?" + u.query
463
+ else
464
+ path = u.path
465
+ end
466
+ req = Net::HTTP::Get.new(path)
467
+ if u.userinfo
468
+ username, passwd = u.userinfo.split(/:/,2)
469
+ req.basic_auth username, passwd
470
+ end
471
+ if ssl
472
+ # NOTE: You need libopenssl-ruby installed
473
+ # if you want to use HTTPS. Ubuntu is broken
474
+ # as it doesn't provide it in the default packages.
475
+ require 'net/https'
476
+ http = Net::HTTP.new(u.host, u.port)
477
+ http.use_ssl = true
478
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
479
+ else
480
+ http = Net::HTTP.new(u.host, u.port)
481
+ end
482
+
483
+ res = http.start {|http_conn| http_conn.request(req) }
484
+ # TODO: Throw a more meaningful exception
485
+ parse(res.body)
486
+ else
487
+ raise "URL protocol #{url} not supported (http, https, file, string are supported)"
445
488
  end
489
+ end
446
490
 
447
- # Fetch URL and parse
448
- # Supported:
449
- # http://.../
450
- # https://.../
451
- # file:foo.xml
452
- # string:<foo/>
453
- def self.from_url(url)
454
- if url =~ /^string:(.*)$/m
455
- parse($1)
456
- elsif url =~ /^file:(.*)$/m
457
- from_file($1)
458
- elsif url =~ /^http(s?):/
459
- ssl = ($1 == "s")
460
- # No, seriously - Ruby needs something better than net/http
461
- # Something that groks basic auth and queries and redirects automatically:
462
- # HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
463
- # URI parsing must go inside the library, client programs
464
- # should have nothing to do with it
465
-
466
- # net/http is really inconvenient to use here
467
- u = URI.parse(url)
468
- # You're not seeing this:
469
- if u.query then
470
- path = u.path + "?" + u.query
471
- else
472
- path = u.path
473
- end
474
- req = Net::HTTP::Get.new(path)
475
- if u.userinfo
476
- username, passwd = u.userinfo.split(/:/,2)
477
- req.basic_auth username, passwd
478
- end
479
- if ssl
480
- # NOTE: You need libopenssl-ruby installed
481
- # if you want to use HTTPS. Ubuntu is broken
482
- # as it doesn't provide it in the default packages.
483
- require 'net/https'
484
- http = Net::HTTP.new(u.host, u.port)
485
- http.use_ssl = true
486
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
487
- else
488
- http = Net::HTTP.new(u.host, u.port)
489
- end
490
-
491
- res = http.start {|http| http.request(req) }
492
- # TODO: Throw a more meaningful exception
493
- parse(res.body)
494
- else
495
- raise "URL protocol #{url} not supported (http, https, file, string are supported)"
496
- end
491
+ # Like CDuce load_xml
492
+ # The path can be:
493
+ # * file handler
494
+ # * URL (a string with :)
495
+ # * file name (a string without :)
496
+ def self.load(obj)
497
+ if obj.is_a? String
498
+ if obj.include? ":"
499
+ from_url(obj)
500
+ else
501
+ from_file(obj)
502
+ end
503
+ else
504
+ parse(obj)
497
505
  end
506
+ end
498
507
 
499
- # Like CDuce load_xml
500
- # The path can be:
501
- # * file handler
502
- # * URL (a string with :)
503
- # * file name (a string without :)
504
- def self.load(obj)
505
- if obj.is_a? String
506
- if obj.include? ":"
507
- from_url(obj)
508
- else
509
- from_file(obj)
508
+ # Parse XML in mixed stream/tree mode
509
+ # Basically the idea is that every time we get start element,
510
+ # we ask the block what to do about it.
511
+ # If it wants a tree below it, it should call e.tree
512
+ # If a tree was requested, elements below the current one
513
+ # are *not* processed. If it wasn't, they are.
514
+ #
515
+ # For example:
516
+ # <foo><bar/></foo><foo2/>
517
+ # yield <foo> ... </foo>
518
+ # .complete! called
519
+ # process <foo2> next
520
+ #
521
+ # But:
522
+ # <foo><bar/></foo><foo2/>
523
+ # yield <foo> ... </foo>
524
+ # .complete! not called
525
+ # process <bar> next
526
+ #
527
+ # FIXME: yielded values are not reusable for now
528
+ # FIXME: make more object-oriented
529
+ def self.parse_as_twigs(stream)
530
+ parser = REXML::Parsers::BaseParser.new stream
531
+ # We don't really need to keep the stack ;-)
532
+ stack = []
533
+ while true
534
+ event = parser.pull
535
+ case event[0]
536
+ when :start_element
537
+ # Now the evil part evil
538
+ attrs = {}
539
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
540
+ node = XML.new(event[1].to_sym, attrs, *event[3..-1])
541
+
542
+ # I can't say it's superelegant
543
+ class <<node
544
+ attr_accessor :do_complete
545
+ def complete!
546
+ if @do_complete
547
+ @do_complete.call
548
+ @do_complete = nil
510
549
  end
511
- else
512
- parse(obj)
550
+ end
513
551
  end
514
- end
552
+ node.do_complete = proc{
553
+ parse_subtree(node, parser)
554
+ }
515
555
 
516
- # Parse XML in mixed stream/tree mode
517
- # Basically the idea is that every time we get start element,
518
- # we ask the block what to do about it.
519
- # If it wants a tree below it, it should call e.tree
520
- # If a tree was requested, elements below the current one
521
- # are *not* processed. If it wasn't, they are.
522
- #
523
- # For example:
524
- # <foo><bar/></foo><foo2/>
525
- # yield <foo> ... </foo>
526
- # .complete! called
527
- # process <foo2> next
528
- #
529
- # But:
530
- # <foo><bar/></foo><foo2/>
531
- # yield <foo> ... </foo>
532
- # .complete! not called
533
- # process <bar> next
534
- #
535
- # FIXME: yielded values are not reusable for now
536
- # FIXME: make more object-oriented
537
- def self.parse_as_twigs(stream)
538
- parser = REXML::Parsers::BaseParser.new stream
539
- # We don't really need to keep the stack ;-)
540
- stack = []
541
- while true
542
- event = parser.pull
543
- case event[0]
544
- when :start_element
545
- # Now the evil part evil
546
- attrs = {}
547
- event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
548
- node = XML.new(event[1].to_sym, attrs, *event[3..-1])
549
-
550
- # I can't say it's superelegant
551
- class <<node
552
- attr_accessor :do_complete
553
- def complete!
554
- if @do_complete
555
- @do_complete.call
556
- @do_complete = nil
557
- end
558
- end
559
- end
560
- node.do_complete = proc{
561
- parse_subtree(node, parser)
562
- }
563
-
564
- yield(node)
565
- if node.do_complete
566
- stack.push node
567
- node.do_complete = nil # It's too late, complete! shouldn't do anything now
568
- end
569
- when :end_element
570
- stack.pop
571
- when :end_document
572
- return
573
- else
574
- # FIXME: Do the right thing.
575
- # For now, ignore *everything* else
576
- # This is totally incorrect, user might want to
577
- # see text, comments and stuff like that anyway
578
- end
556
+ yield(node)
557
+ if node.do_complete
558
+ stack.push node
559
+ node.do_complete = nil # It's too late, complete! shouldn't do anything now
579
560
  end
561
+ when :end_element
562
+ stack.pop
563
+ when :end_document
564
+ return
565
+ else
566
+ # FIXME: Do the right thing.
567
+ # For now, ignore *everything* else
568
+ # This is totally incorrect, user might want to
569
+ # see text, comments and stuff like that anyway
570
+ end
580
571
  end
581
-
582
- # Basically it's a copy of self.parse, ugly ...
583
- def self.parse_subtree(start_node, parser)
584
- stack = [start_node]
585
- res = nil
586
- while true
587
- event = parser.pull
588
- case event[0]
589
- when :start_element
590
- attrs = {}
591
- event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
592
- stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
593
- if stack.size == 1
594
- res = stack[0]
595
- else
596
- stack[-2] << stack[-1]
597
- end
598
- when :end_element
599
- stack.pop
600
- return if stack == []
601
- # Needs unescaping
602
- when :text
603
- # Ignore whitespace
604
- if stack.size == 0
605
- next if event[1] !~ /\S/
606
- raise "Non-whitespace text out of document root"
607
- end
608
- stack[-1] << event[1].xml_unescape
609
- # CDATA is already unescaped
610
- when :cdata
611
- if stack.size == 0
612
- raise "CDATA out of the document root"
613
- end
614
- stack[-1] << event[1]
615
- when :end_document
616
- raise "Parse error: end_document inside a subtree, tags are not balanced"
617
- when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
618
- # Positivery ignore
619
- when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
620
- # Ignore ???
621
- #print "Ignored XML event #{event[0]} when parsing\n"
622
- else
623
- # Huh ? What's that ?
624
- #print "Unknown XML event #{event[0]} when parsing\n"
625
- end
626
- end
627
- res
572
+ end
628
573
 
574
+ # Basically it's a copy of self.parse, ugly ...
575
+ def self.parse_subtree(start_node, parser)
576
+ stack = [start_node]
577
+ res = nil
578
+ while true
579
+ event = parser.pull
580
+ case event[0]
581
+ when :start_element
582
+ attrs = {}
583
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
584
+ stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
585
+ if stack.size == 1
586
+ res = stack[0]
587
+ else
588
+ stack[-2] << stack[-1]
589
+ end
590
+ when :end_element
591
+ stack.pop
592
+ return if stack == []
593
+ # Needs unescaping
594
+ when :text
595
+ # Ignore whitespace
596
+ if stack.size == 0
597
+ next if event[1] !~ /\S/
598
+ raise "Non-whitespace text out of document root"
599
+ end
600
+ stack[-1] << event[1].xml_unescape
601
+ # CDATA is already unescaped
602
+ when :cdata
603
+ if stack.size == 0
604
+ raise "CDATA out of the document root"
605
+ end
606
+ stack[-1] << event[1]
607
+ when :end_document
608
+ raise "Parse error: end_document inside a subtree, tags are not balanced"
609
+ when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
610
+ # Positivery ignore
611
+ when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
612
+ # Ignore ???
613
+ #print "Ignored XML event #{event[0]} when parsing\n"
614
+ else
615
+ # Huh ? What's that ?
616
+ #print "Unknown XML event #{event[0]} when parsing\n"
617
+ end
629
618
  end
619
+ res
630
620
 
631
- # Parse XML using REXML. Available options:
632
- # * :extra_entities => Proc or Hash (default = nil)
633
- # * :remove_pretty_printing => true/false (default = false)
634
- # * :comments => true/false (default = false)
635
- # * :pi => true/false (default = false)
636
- # * :normalize => true/false (default = false) - normalize
637
- # * :multiple_roots => true/false (default=false) - document
638
- # can have any number of roots (instread of one).
639
- # Return all in an array instead of root/nil.
640
- # Also include non-elements (String/PI/Comment) in the return set !!!
641
- #
642
- # FIXME: :comments/:pi will break everything
643
- # if there are comments/PIs outside document root.
644
- # Now PIs are outside the document root more often than not,
645
- # so we're pretty much screwed here.
646
- #
647
- # FIXME: Integrate all kinds of parse, and make them support extra options
648
- #
649
- # FIXME: Benchmark normalize!
650
- #
651
- # FIXME: Benchmark dup-based Enumerable methods
652
- #
653
- # FIXME: Make it possible to include bogus XML_Document superparent,
654
- # and to make it support out-of-root PIs/Comments
655
- def self.parse(stream, options={})
656
- extra_entities = options[:extra_entities]
657
-
658
- parser = REXML::Parsers::BaseParser.new stream
659
- stack = [[]]
660
-
661
- while true
662
- event = parser.pull
663
- case event[0]
664
- when :start_element
665
- attrs = {}
666
- event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
667
- stack << XML.new(event[1].to_sym, attrs, event[3..-1])
668
- stack[-2] << stack[-1]
669
- when :end_element
670
- stack.pop
671
- # Needs unescaping
672
- when :text
673
- e = event[1].xml_unescape(extra_entities)
674
- # Either inside root or in multi-root mode
675
- if stack.size > 1 or options[:multiple_roots]
676
- stack[-1] << e
677
- elsif event[1] !~ /\S/
678
- # Ignore out-of-root whitespace in single-root mode
679
- else
680
- raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
681
- end
682
- # CDATA is already unescaped
683
- when :cdata
684
- e = event[1]
685
- if stack.size > 1 or options[:multiple_roots]
686
- stack[-1] << e
687
- else
688
- raise "CDATA out of the document root"
689
- end
690
- when :comment
691
- next unless options[:comments]
692
- e = XML_Comment.new(event[1])
693
- if stack.size > 1 or options[:multiple_roots]
694
- stack[-1] << e
695
- else
696
- # FIXME: Ugly !
697
- raise "Comments out of the document root"
698
- end
699
- when :processing_instruction
700
- # FIXME: Real PI node
701
- next unless options[:pi]
702
- e = XML_PI.new(event[1], event[2])
703
- if stack.size > 1 or options[:multiple_roots]
704
- stack[-1] << e
705
- else
706
- # FIXME: Ugly !
707
- raise "Processing instruction out of the document root"
708
- end
709
- when :end_document
710
- break
711
- when :xmldecl,:start_doctype,:end_doctype,:elementdecl
712
- # Positivery ignore
713
- when :externalentity,:entity,:attlistdecl,:notationdecl
714
- # Ignore ???
715
- #print "Ignored XML event #{event[0]} when parsing\n"
716
- else
717
- # Huh ? What's that ?
718
- #print "Unknown XML event #{event[0]} when parsing\n"
719
- end
621
+ end
622
+
623
+ # Parse XML using REXML. Available options:
624
+ # * :extra_entities => Proc or Hash (default = nil)
625
+ # * :remove_pretty_printing => true/false (default = false)
626
+ # * :comments => true/false (default = false)
627
+ # * :pi => true/false (default = false)
628
+ # * :normalize => true/false (default = false) - normalize
629
+ # * :multiple_roots => true/false (default=false) - document
630
+ # can have any number of roots (instread of one).
631
+ # Return all in an array instead of root/nil.
632
+ # Also include non-elements (String/PI/Comment) in the return set !!!
633
+ #
634
+ # FIXME: :comments/:pi will break everything
635
+ # if there are comments/PIs outside document root.
636
+ # Now PIs are outside the document root more often than not,
637
+ # so we're pretty much screwed here.
638
+ #
639
+ # FIXME: Integrate all kinds of parse, and make them support extra options
640
+ #
641
+ # FIXME: Benchmark normalize!
642
+ #
643
+ # FIXME: Benchmark dup-based Enumerable methods
644
+ #
645
+ # FIXME: Make it possible to include bogus XML_Document superparent,
646
+ # and to make it support out-of-root PIs/Comments
647
+ def self.parse(stream, options={})
648
+ extra_entities = options[:extra_entities]
649
+
650
+ parser = REXML::Parsers::BaseParser.new stream
651
+ stack = [[]]
652
+
653
+ while true
654
+ event = parser.pull
655
+ case event[0]
656
+ when :start_element
657
+ attrs = {}
658
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
659
+ stack << XML.new(event[1].to_sym, attrs, event[3..-1])
660
+ stack[-2] << stack[-1]
661
+ when :end_element
662
+ stack.pop
663
+ # Needs unescaping
664
+ when :text
665
+ e = event[1].xml_unescape(extra_entities)
666
+ # Either inside root or in multi-root mode
667
+ if stack.size > 1 or options[:multiple_roots]
668
+ stack[-1] << e
669
+ elsif event[1] !~ /\S/
670
+ # Ignore out-of-root whitespace in single-root mode
671
+ else
672
+ raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
720
673
  end
721
- roots = stack[0]
722
-
723
- roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
724
- # :remove_pretty_printing does :normalize anyway
725
- roots.each{|root| root.normalize!} if options[:normalize]
726
- if options[:multiple_roots]
727
- roots
674
+ # CDATA is already unescaped
675
+ when :cdata
676
+ e = event[1]
677
+ if stack.size > 1 or options[:multiple_roots]
678
+ stack[-1] << e
728
679
  else
729
- roots[0]
680
+ raise "CDATA out of the document root"
730
681
  end
682
+ when :comment
683
+ next unless options[:comments]
684
+ e = XML_Comment.new(event[1])
685
+ if stack.size > 1 or options[:multiple_roots]
686
+ stack[-1] << e
687
+ else
688
+ # FIXME: Ugly !
689
+ raise "Comments out of the document root"
690
+ end
691
+ when :processing_instruction
692
+ # FIXME: Real PI node
693
+ next unless options[:pi]
694
+ e = XML_PI.new(event[1], event[2])
695
+ if stack.size > 1 or options[:multiple_roots]
696
+ stack[-1] << e
697
+ else
698
+ # FIXME: Ugly !
699
+ raise "Processing instruction out of the document root"
700
+ end
701
+ when :end_document
702
+ break
703
+ when :xmldecl,:start_doctype,:end_doctype,:elementdecl
704
+ # Positivery ignore
705
+ when :externalentity,:entity,:attlistdecl,:notationdecl
706
+ # Ignore ???
707
+ #print "Ignored XML event #{event[0]} when parsing\n"
708
+ else
709
+ # Huh ? What's that ?
710
+ #print "Unknown XML event #{event[0]} when parsing\n"
711
+ end
731
712
  end
713
+ roots = stack[0]
732
714
 
733
- # Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
734
- def self.parse_sequence(stream, options={})
735
- o = options.dup
736
- o[:multiple_roots] = true
737
- parse(stream, o)
715
+ roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
716
+ # :remove_pretty_printing does :normalize anyway
717
+ roots.each{|root| root.normalize!} if options[:normalize]
718
+ if options[:multiple_roots]
719
+ roots
720
+ else
721
+ roots[0]
738
722
  end
723
+ end
739
724
 
740
- # Renormalize a string containing XML document
741
- def self.renormalize(stream)
742
- parse(stream).to_s
743
- end
725
+ # Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
726
+ def self.parse_sequence(stream, options={})
727
+ o = options.dup
728
+ o[:multiple_roots] = true
729
+ parse(stream, o)
730
+ end
744
731
 
745
- # Renormalize a string containing a sequence of XML documents
746
- # and strings
747
- # XMLrenormalize_sequence("<hello />, <world></world>!") =>
748
- # "<hello/>, <world/>!"
749
- def self.renormalize_sequence(stream)
750
- parse_sequence(stream).join
751
- end
732
+ # Renormalize a string containing XML document
733
+ def self.renormalize(stream)
734
+ parse(stream).to_s
735
+ end
736
+
737
+ # Renormalize a string containing a sequence of XML documents
738
+ # and strings
739
+ # XMLrenormalize_sequence("<hello />, <world></world>!") =>
740
+ # "<hello/>, <world/>!"
741
+ def self.renormalize_sequence(stream)
742
+ parse_sequence(stream).join
743
+ end
752
744
  end
753
745
 
754
746
  # Instance methods (other than those of Enumerable)
755
747
  class XML
756
- attr_accessor :name, :attrs, :contents
757
-
758
- # initialize can be run in many ways
759
- # * XML.new
760
- # * XML.new(:tag_symbol)
761
- # * XML.new(:tag_symbol, {attributes})
762
- # * XML.new(:tag_symbol, "children", "more", XML.new(...))
763
- # * XML.new(:tag_symbol, {attributes}, "and", "children")
764
- # * XML.new(:tag_symbol) { monadic code }
765
- # * XML.new(:tag_symbol, {attributes}) { monadic code }
766
- #
767
- # Or even:
768
- # * XML.new(:tag_symbol, "children") { and some monadic code }
769
- # * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
770
- # But typically you won't be mixing these two style
771
- #
772
- # Attribute values can will be converted to strings
773
- def initialize(*args, &blk)
774
- @name = nil
775
- @attrs = {}
776
- @contents = []
777
- @name = args.shift if args.size != 0
778
- if args.size != 0 and args[0].is_a? Hash
779
- args.shift.each{|k,v|
780
- # Do automatic conversion here
781
- # This also assures that the hashes are *not* shared
782
- self[k] = v
783
- }
784
- end
785
- # Expand Arrays passed as arguments
786
- self << args
787
- # FIXME: We'd rather not have people say @name = :foo there :-)
788
- if blk
789
- instance_eval(&blk)
790
- end
748
+ attr_accessor :name, :attrs, :contents
749
+
750
+ # initialize can be run in many ways
751
+ # * XML.new
752
+ # * XML.new(:tag_symbol)
753
+ # * XML.new(:tag_symbol, {attributes})
754
+ # * XML.new(:tag_symbol, "children", "more", XML.new(...))
755
+ # * XML.new(:tag_symbol, {attributes}, "and", "children")
756
+ # * XML.new(:tag_symbol) { monadic code }
757
+ # * XML.new(:tag_symbol, {attributes}) { monadic code }
758
+ #
759
+ # Or even:
760
+ # * XML.new(:tag_symbol, "children") { and some monadic code }
761
+ # * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
762
+ # But typically you won't be mixing these two style
763
+ #
764
+ # Attribute values can will be converted to strings
765
+ def initialize(*args, &blk)
766
+ @name = nil
767
+ @attrs = {}
768
+ @contents = []
769
+ @name = args.shift if args.size != 0
770
+ if args.size != 0 and args[0].is_a? Hash
771
+ args.shift.each{|k,v|
772
+ # Do automatic conversion here
773
+ # This also assures that the hashes are *not* shared
774
+ self[k] = v
775
+ }
776
+ end
777
+ # Expand Arrays passed as arguments
778
+ self << args
779
+ # FIXME: We'd rather not have people say @name = :foo there :-)
780
+ if blk
781
+ instance_eval(&blk)
791
782
  end
783
+ end
792
784
 
793
- # Convert to a well-formatted XML
794
- def to_s
795
- "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
796
- if @contents.size == 0
797
- "/>"
798
- else
799
- ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
800
- end
785
+ # Convert to a well-formatted XML
786
+ def to_s
787
+ "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
788
+ if @contents.size == 0
789
+ "/>"
790
+ else
791
+ ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
801
792
  end
793
+ end
802
794
 
803
- # Convert to a well-formatted XML, but without children information.
804
- # This is a reasonable format for irb and debugging.
805
- # If you want to see a few levels of children, call inspect(2) and so on
806
- def inspect(include_children=0)
807
- "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
808
- if @contents.size == 0
809
- "/>"
810
- elsif include_children == 0
811
- ">...</#{name}>"
812
- else
813
- ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
814
- end
795
+ # Convert to a well-formatted XML, but without children information.
796
+ # This is a reasonable format for irb and debugging.
797
+ # If you want to see a few levels of children, call inspect(2) and so on
798
+ def inspect(include_children=0)
799
+ "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
800
+ if @contents.size == 0
801
+ "/>"
802
+ elsif include_children == 0
803
+ ">...</#{name}>"
804
+ else
805
+ ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
815
806
  end
807
+ end
816
808
 
817
- # Read attributes.
818
- # Also works with pseudoattributes:
819
- # img[:@x] == img.child(:x).text # or nil if there isn't any.
820
- def [](key)
821
- if key.to_s[0] == ?@
822
- tag = key.to_s[1..-1].to_sym
823
- c = child(tag)
824
- if c
825
- c.text
826
- else
827
- nil
828
- end
829
- else
830
- @attrs[key]
831
- end
809
+ # Read attributes.
810
+ # Also works with pseudoattributes:
811
+ # img[:@x] == img.child(:x).text # or nil if there isn't any.
812
+ def [](key)
813
+ if key.to_s[0] == ?@
814
+ tag = key.to_s[1..-1].to_sym
815
+ c = child(tag)
816
+ if c
817
+ c.text
818
+ else
819
+ nil
820
+ end
821
+ else
822
+ @attrs[key]
832
823
  end
824
+ end
833
825
 
834
- # Set attributes.
835
- # Value is automatically converted to String, so you can say:
836
- # img[:x] = 200
837
- # Also works with pseudoattributes:
838
- # foo[:@bar] = "x"
839
- def []=(key, value)
840
- if key.to_s[0] == ?@
841
- tag = key.to_s[1..-1].to_sym
842
- c = child(tag)
843
- if c
844
- c.contents = [value.to_s]
845
- else
846
- self << XML.new(tag, value.to_s)
847
- end
848
- else
849
- @attrs[key] = value.to_s
850
- end
826
+ # Set attributes.
827
+ # Value is automatically converted to String, so you can say:
828
+ # img[:x] = 200
829
+ # Also works with pseudoattributes:
830
+ # foo[:@bar] = "x"
831
+ def []=(key, value)
832
+ if key.to_s[0] == ?@
833
+ tag = key.to_s[1..-1].to_sym
834
+ c = child(tag)
835
+ if c
836
+ c.contents = [value.to_s]
837
+ else
838
+ self << XML.new(tag, value.to_s)
839
+ end
840
+ else
841
+ @attrs[key] = value.to_s
851
842
  end
843
+ end
852
844
 
853
- # Add children.
854
- # Possible uses:
855
- # * Add single element
856
- # self << xml(...)
857
- # self << "foo"
858
- # Add nothing:
859
- # self << nil
860
- # Add multiple elements (also works recursively):
861
- # self << [a, b, c]
862
- # self << [a, [b, c], d]
863
- def <<(cnt)
864
- if cnt.nil?
865
- # skip
866
- elsif cnt.is_a? Array
867
- cnt.each{|elem| self << elem}
868
- else
869
- @contents << cnt
870
- end
871
- self
845
+ # Add children.
846
+ # Possible uses:
847
+ # * Add single element
848
+ # self << xml(...)
849
+ # self << "foo"
850
+ # Add nothing:
851
+ # self << nil
852
+ # Add multiple elements (also works recursively):
853
+ # self << [a, b, c]
854
+ # self << [a, [b, c], d]
855
+ def <<(cnt)
856
+ if cnt.nil?
857
+ # skip
858
+ elsif cnt.is_a? Array
859
+ cnt.each{|elem| self << elem}
860
+ else
861
+ @contents << cnt
872
862
  end
863
+ self
864
+ end
873
865
 
874
- # Equality test, works as if XMLs were normalized, so:
875
- # XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
876
- def ==(x)
877
- return false unless x.is_a? XML
878
- return false unless name == x.name and attrs == x.attrs
879
- # Now the hard part, strings can be split in different ways
880
- # empty string children are possible etc.
881
- self_i = 0
882
- othr_i = 0
883
- while self_i != contents.size or othr_i != x.contents.size
884
- # Ignore ""s
885
- if contents[self_i].is_a? String and contents[self_i] == ""
886
- self_i += 1
887
- next
888
- end
889
- if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
890
- othr_i += 1
891
- next
892
- end
866
+ # Equality test, works as if XMLs were normalized, so:
867
+ # XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
868
+ def ==(x)
869
+ return false unless x.is_a? XML
870
+ return false unless name == x.name and attrs == x.attrs
871
+ # Now the hard part, strings can be split in different ways
872
+ # empty string children are possible etc.
873
+ self_i = 0
874
+ othr_i = 0
875
+ while self_i != contents.size or othr_i != x.contents.size
876
+ # Ignore ""s
877
+ if contents[self_i].is_a? String and contents[self_i] == ""
878
+ self_i += 1
879
+ next
880
+ end
881
+ if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
882
+ othr_i += 1
883
+ next
884
+ end
893
885
 
894
- # If one is finished and the other contains non-empty elements,
895
- # they are not equal
896
- return false if self_i == contents.size or othr_i == x.contents.size
897
-
898
- # Are they both Strings ?
899
- # Strings can be divided in different ways, and calling normalize!
900
- # here would be rather expensive, so let's use this complicated
901
- # algorithm
902
- if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
903
- a = contents[self_i]
904
- b = x.contents[othr_i]
905
- self_i += 1
906
- othr_i += 1
907
- while a != "" or b != ""
908
- if a == b
909
- a = ""
910
- b = ""
911
- elsif a.size > b.size and a[0, b.size] == b
912
- a = a[b.size..-1]
913
- if x.contents[othr_i].is_a? String
914
- b = x.contents[othr_i]
915
- othr_i += 1
916
- next
917
- end
918
- elsif b.size > a.size and b[0, a.size] == a
919
- b = b[a.size..-1]
920
- if contents[self_i].is_a? String
921
- a = contents[self_i]
922
- self_i += 1
923
- next
924
- end
925
- else
926
- return false
927
- end
928
- end
929
- next
930
- end
886
+ # If one is finished and the other contains non-empty elements,
887
+ # they are not equal
888
+ return false if self_i == contents.size or othr_i == x.contents.size
931
889
 
932
- # OK, so at least one of them is not a String.
933
- # Hopefully they're either both XMLs or one is an XML and the
934
- # other is a String. It is also possible that contents contains
935
- # something illegal, but we aren't catching that,
936
- # so xml(:foo, Garbage.new) is going to at least equal itself.
937
- # And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
938
- # is going to return an honest false, and incoherent sanity
939
- # check is worse than no sanity check.
940
- #
941
- # Oh yeah, they can be XML_PI or XML_Comment. In such case, this
942
- # is ok.
943
- return false unless contents[self_i] == x.contents[othr_i]
944
- self_i += 1
945
- othr_i += 1
890
+ # Are they both Strings ?
891
+ # Strings can be divided in different ways, and calling normalize!
892
+ # here would be rather expensive, so let's use this complicated
893
+ # algorithm
894
+ if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
895
+ a = contents[self_i]
896
+ b = x.contents[othr_i]
897
+ self_i += 1
898
+ othr_i += 1
899
+ while a != "" or b != ""
900
+ if a == b
901
+ a = ""
902
+ b = ""
903
+ elsif a.size > b.size and a[0, b.size] == b
904
+ a = a[b.size..-1]
905
+ if x.contents[othr_i].is_a? String
906
+ b = x.contents[othr_i]
907
+ othr_i += 1
908
+ next
909
+ end
910
+ elsif b.size > a.size and b[0, a.size] == a
911
+ b = b[a.size..-1]
912
+ if contents[self_i].is_a? String
913
+ a = contents[self_i]
914
+ self_i += 1
915
+ next
916
+ end
917
+ else
918
+ return false
919
+ end
946
920
  end
947
- return true
948
- end
921
+ next
922
+ end
949
923
 
950
- alias_method :real_method_missing, :method_missing
951
- # Define all foo!-methods for monadic interface, so you can write:
952
- #
953
- def method_missing(meth, *args, &blk)
954
- if meth.to_s =~ /^(.*)!$/
955
- self << XML.new($1.to_sym, *args, &blk)
956
- else
957
- real_method_missing(meth, *args, &blk)
958
- end
924
+ # OK, so at least one of them is not a String.
925
+ # Hopefully they're either both XMLs or one is an XML and the
926
+ # other is a String. It is also possible that contents contains
927
+ # something illegal, but we aren't catching that,
928
+ # so xml(:foo, Garbage.new) is going to at least equal itself.
929
+ # And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
930
+ # is going to return an honest false, and incoherent sanity
931
+ # check is worse than no sanity check.
932
+ #
933
+ # Oh yeah, they can be XML_PI or XML_Comment. In such case, this
934
+ # is ok.
935
+ return false unless contents[self_i] == x.contents[othr_i]
936
+ self_i += 1
937
+ othr_i += 1
959
938
  end
939
+ return true
940
+ end
960
941
 
961
- # Make monadic interface more "official"
962
- # * node.exec! { foo!; bar! }
963
- # is equivalent to
964
- # * node << xml(:foo) << xml(:bar)
965
- def exec!(&blk)
966
- instance_eval(&blk)
942
+ alias_method :real_method_missing, :method_missing
943
+ # Define all foo!-methods for monadic interface, so you can write:
944
+ #
945
+ def method_missing(meth, *args, &blk)
946
+ if meth.to_s =~ /^(.*)!$/
947
+ self << XML.new($1.to_sym, *args, &blk)
948
+ else
949
+ real_method_missing(meth, *args, &blk)
967
950
  end
951
+ end
968
952
 
969
- # Select a subtree
970
- # NOTE: Uses object_id of the start/end tags !
971
- # They have to be the same, not just identical !
972
- # <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
973
- # returns
974
- # <foo><b/><c/></foo>
975
- # start and end and their descendants are not included in
976
- # the result tree.
977
- # Either start or end can be nil.
978
- # * If both start and end are nil, return whole tree.
979
- # * If start is nil, return subtree up to range_end.
980
- # * If start is not inside the tree, return nil.
981
- # * If end is nil, return subtree from start
982
- # * If end is not inside the tree, return subtree from start.
983
- # * If end is before or below start, or they're the same node, the result is unspecified.
984
- # * if end comes directly after start, or as first node when start==nil, return path reaching there.
985
- def range(range_start, range_end, end_reached_cb=nil)
986
- if range_start == nil
987
- result = XML.new(name, attrs)
988
- else
989
- result = nil
990
- end
991
- @contents.each {|c|
992
- # end reached !
993
- if range_end and c.object_id == range_end.object_id
994
- end_reached_cb.call if end_reached_cb
995
- break
996
- end
997
- # start reached !
998
- if range_start and c.object_id == range_start.object_id
999
- result = XML.new(name, attrs)
1000
- next
1001
- end
1002
- if result # We already started
1003
- if c.is_a? XML
1004
- break_me = false
1005
- result.add! c.range(nil, range_end, lambda{ break_me = true })
1006
- if break_me
1007
- end_reached_cb.call if end_reached_cb
1008
- break
1009
- end
1010
- else # String/XML_PI/XML_Comment
1011
- result.add! c
1012
- end
1013
- else
1014
- # Strings/XML_PI/XML_Comment obviously cannot start a range
1015
- if c.is_a? XML
1016
- break_me = false
1017
- r = c.range(range_start, range_end, lambda{ break_me = true })
1018
- if r
1019
- # start reached !
1020
- result = XML.new(name, attrs, r)
1021
- end
1022
- if break_me
1023
- # end reached !
1024
- end_reached_cb.call if end_reached_cb
1025
- break
1026
- end
1027
- end
1028
- end
1029
- }
1030
- return result
1031
- end
953
+ # Make monadic interface more "official"
954
+ # * node.exec! { foo!; bar! }
955
+ # is equivalent to
956
+ # * node << xml(:foo) << xml(:bar)
957
+ def exec!(&blk)
958
+ instance_eval(&blk)
959
+ end
1032
960
 
1033
- # XML#subsequence is similar to XML#range, but instead of
1034
- # trimmed subtree in returns a list of elements
1035
- # The same elements are included in both cases, but here
1036
- # we do not include any parents !
1037
- #
1038
- # <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
1039
- # <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
1040
- #
1041
- # <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
1042
- # <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
1043
- #
1044
- # <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
1045
- # <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
1046
- #
1047
- # And we return [], not nil if nothing matches
1048
- def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
1049
- result = []
1050
- start_seen = range_start.nil?
1051
- @contents.each{|c|
1052
- if range_end and range_end.object_id == c.object_id
1053
- end_seen_cb.call if end_seen_cb
1054
- break
1055
- end
1056
- if range_start and range_start.object_id == c.object_id
1057
- start_seen = true
1058
- start_seen_cb.call if start_seen_cb
1059
- next
1060
- end
1061
- if start_seen
1062
- if c.is_a? XML
1063
- break_me = false
1064
- result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
1065
- break if break_me
1066
- else # String/XML_PI/XML_Comment
1067
- result << c
1068
- end
1069
- else
1070
- # String/XML_PI/XML_Comment cannot start a subsequence
1071
- if c.is_a? XML
1072
- break_me = false
1073
- result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
1074
- break if break_me
1075
- end
1076
- end
1077
- }
1078
- # Include starting tag if it was right from the range_start
1079
- # Otherwise, return just the raw sequence
1080
- result = [XML.new(@name, @attrs, result)] if range_start == nil
1081
- return result
961
+ # Select a subtree
962
+ # NOTE: Uses object_id of the start/end tags !
963
+ # They have to be the same, not just identical !
964
+ # <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
965
+ # returns
966
+ # <foo><b/><c/></foo>
967
+ # start and end and their descendants are not included in
968
+ # the result tree.
969
+ # Either start or end can be nil.
970
+ # * If both start and end are nil, return whole tree.
971
+ # * If start is nil, return subtree up to range_end.
972
+ # * If start is not inside the tree, return nil.
973
+ # * If end is nil, return subtree from start
974
+ # * If end is not inside the tree, return subtree from start.
975
+ # * If end is before or below start, or they're the same node, the result is unspecified.
976
+ # * if end comes directly after start, or as first node when start==nil, return path reaching there.
977
+ def range(range_start, range_end, end_reached_cb=nil)
978
+ if range_start == nil
979
+ result = XML.new(name, attrs)
980
+ else
981
+ result = nil
1082
982
  end
983
+ @contents.each {|c|
984
+ # end reached !
985
+ if range_end and c.object_id == range_end.object_id
986
+ end_reached_cb.call if end_reached_cb
987
+ break
988
+ end
989
+ # start reached !
990
+ if range_start and c.object_id == range_start.object_id
991
+ result = XML.new(name, attrs)
992
+ next
993
+ end
994
+ if result # We already started
995
+ if c.is_a? XML
996
+ break_me = false
997
+ result.add! c.range(nil, range_end, lambda{ break_me = true })
998
+ if break_me
999
+ end_reached_cb.call if end_reached_cb
1000
+ break
1001
+ end
1002
+ else # String/XML_PI/XML_Comment
1003
+ result.add! c
1004
+ end
1005
+ else
1006
+ # Strings/XML_PI/XML_Comment obviously cannot start a range
1007
+ if c.is_a? XML
1008
+ break_me = false
1009
+ r = c.range(range_start, range_end, lambda{ break_me = true })
1010
+ if r
1011
+ # start reached !
1012
+ result = XML.new(name, attrs, r)
1013
+ end
1014
+ if break_me
1015
+ # end reached !
1016
+ end_reached_cb.call if end_reached_cb
1017
+ break
1018
+ end
1019
+ end
1020
+ end
1021
+ }
1022
+ return result
1023
+ end
1083
1024
 
1084
- # =~ for a few reasonable patterns
1085
- def =~(pattern)
1086
- if pattern.is_a? Symbol
1087
- @name == pattern
1088
- elsif pattern.is_a? Regexp
1089
- rv = text =~ pattern
1090
- else # Hash, Pattern_any, Pattern_all
1091
- pattern === self
1025
+ # XML#subsequence is similar to XML#range, but instead of
1026
+ # trimmed subtree in returns a list of elements
1027
+ # The same elements are included in both cases, but here
1028
+ # we do not include any parents !
1029
+ #
1030
+ # <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
1031
+ # <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
1032
+ #
1033
+ # <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
1034
+ # <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
1035
+ #
1036
+ # <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
1037
+ # <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
1038
+ #
1039
+ # And we return [], not nil if nothing matches
1040
+ def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
1041
+ result = []
1042
+ start_seen = range_start.nil?
1043
+ @contents.each{|c|
1044
+ if range_end and range_end.object_id == c.object_id
1045
+ end_seen_cb.call if end_seen_cb
1046
+ break
1047
+ end
1048
+ if range_start and range_start.object_id == c.object_id
1049
+ start_seen = true
1050
+ start_seen_cb.call if start_seen_cb
1051
+ next
1052
+ end
1053
+ if start_seen
1054
+ if c.is_a? XML
1055
+ break_me = false
1056
+ result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
1057
+ break if break_me
1058
+ else # String/XML_PI/XML_Comment
1059
+ result << c
1092
1060
  end
1093
- end
1094
-
1095
- # Get rid of pretty-printing whitespace. Also normalizes the XML.
1096
- def remove_pretty_printing!(exceptions=nil)
1097
- normalize!
1098
- real_remove_pretty_printing!(exceptions)
1099
- normalize!
1100
- end
1061
+ else
1062
+ # String/XML_PI/XML_Comment cannot start a subsequence
1063
+ if c.is_a? XML
1064
+ break_me = false
1065
+ result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
1066
+ break if break_me
1067
+ end
1068
+ end
1069
+ }
1070
+ # Include starting tag if it was right from the range_start
1071
+ # Otherwise, return just the raw sequence
1072
+ result = [XML.new(@name, @attrs, result)] if range_start == nil
1073
+ return result
1074
+ end
1101
1075
 
1102
- # normalize! is already recursive, so only one call at top level is needed.
1103
- # This helper method lets us avoid extra calls to normalize!.
1104
- def real_remove_pretty_printing!(exceptions=nil)
1105
- return if exceptions and exceptions.include? @name
1106
- each{|c|
1107
- if c.is_a? String
1108
- c.sub!(/^\s+/, "")
1109
- c.sub!(/\s+$/, "")
1110
- c.gsub!(/\s+/, " ")
1111
- elsif c.is_a? XML_PI or c.is_a? XML_Comment
1112
- else
1113
- c.real_remove_pretty_printing!(exceptions)
1114
- end
1115
- }
1076
+ # =~ for a few reasonable patterns
1077
+ def =~(pattern)
1078
+ if pattern.is_a? Symbol
1079
+ @name == pattern
1080
+ elsif pattern.is_a? Regexp
1081
+ text =~ pattern
1082
+ else # Hash, Pattern_any, Pattern_all
1083
+ pattern === self
1116
1084
  end
1085
+ end
1117
1086
 
1118
- protected :real_remove_pretty_printing!
1087
+ # Get rid of pretty-printing whitespace. Also normalizes the XML.
1088
+ def remove_pretty_printing!(exceptions=nil)
1089
+ normalize!
1090
+ real_remove_pretty_printing!(exceptions)
1091
+ normalize!
1092
+ end
1119
1093
 
1120
- # Add pretty-printing whitespace. Also normalizes the XML.
1121
- def add_pretty_printing!
1122
- normalize!
1123
- real_add_pretty_printing!
1124
- normalize!
1125
- end
1126
-
1127
- def real_add_pretty_printing!(indent = "")
1128
- return if @contents.empty?
1129
- each{|c|
1130
- if c.is_a? XML
1131
- c.real_add_pretty_printing!(indent+" ")
1132
- elsif c.is_a? String
1133
- c.gsub!(/\n\s*/, "\n#{indent} ")
1134
- end
1135
- }
1136
- @contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
1137
- end
1094
+ # normalize! is already recursive, so only one call at top level is needed.
1095
+ # This helper method lets us avoid extra calls to normalize!.
1096
+ def real_remove_pretty_printing!(exceptions=nil)
1097
+ return if exceptions and exceptions.include? @name
1098
+ each{|c|
1099
+ if c.is_a? String
1100
+ c.sub!(/^\s+/, "")
1101
+ c.sub!(/\s+$/, "")
1102
+ c.gsub!(/\s+/, " ")
1103
+ elsif c.is_a? XML_PI or c.is_a? XML_Comment
1104
+ else
1105
+ c.real_remove_pretty_printing!(exceptions)
1106
+ end
1107
+ }
1108
+ end
1138
1109
 
1139
- protected :real_add_pretty_printing!
1140
-
1141
- alias_method :raw_dup, :dup
1142
- # This is not a trivial method - first it does a *deep* copy,
1143
- # second it takes a block which is instance_eval'ed,
1144
- # so you can do things like:
1145
- # * node.dup{ @name = :foo }
1146
- # * node.dup{ self[:color] = "blue" }
1147
- def dup(&blk)
1148
- new_obj = self.raw_dup
1149
- # Attr values stay shared - ugly
1150
- new_obj.attrs = new_obj.attrs.dup
1151
- new_obj.contents = new_obj.contents.map{|c| c.dup}
1152
-
1153
- new_obj.instance_eval(&blk) if blk
1154
- return new_obj
1155
- end
1110
+ protected :real_remove_pretty_printing!
1156
1111
 
1112
+ # Add pretty-printing whitespace. Also normalizes the XML.
1113
+ def add_pretty_printing!
1114
+ normalize!
1115
+ real_add_pretty_printing!
1116
+ normalize!
1117
+ end
1157
1118
 
1158
- # Add some String children (all attributes get to_s'ed)
1159
- def text!(*args)
1160
- args.each{|s| self << s.to_s}
1161
- end
1162
- # Add XML child
1163
- def xml!(*args, &blk)
1164
- @contents << XML.new(*args, &blk)
1165
- end
1119
+ def real_add_pretty_printing!(indent = "")
1120
+ return if @contents.empty?
1121
+ each{|c|
1122
+ if c.is_a? XML
1123
+ c.real_add_pretty_printing!(indent+" ")
1124
+ elsif c.is_a? String
1125
+ c.gsub!(/\n\s*/, "\n#{indent} ")
1126
+ end
1127
+ }
1128
+ @contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
1129
+ end
1166
1130
 
1167
- alias_method :add!, :<<
1168
-
1169
- # Normalization means joining strings
1170
- # and getting rid of ""s, recursively
1171
- def normalize!
1172
- new_contents = []
1173
- @contents.each{|c|
1174
- if c.is_a? String
1175
- next if c == ""
1176
- if new_contents[-1].is_a? String
1177
- new_contents[-1] += c
1178
- next
1179
- end
1180
- else
1181
- c.normalize!
1182
- end
1183
- new_contents.push c
1184
- }
1185
- @contents = new_contents
1186
- end
1131
+ protected :real_add_pretty_printing!
1187
1132
 
1188
- # Return text below the node, stripping all XML tags,
1189
- # "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
1190
- # returns "Hello, world!"
1191
- def text
1192
- res = ""
1193
- @contents.each{|c|
1194
- if c.is_a? XML
1195
- res << c.text
1196
- elsif c.is_a? String
1197
- res << c
1198
- end # Ignore XML_PI/XML_Comment
1199
- }
1200
- res
1201
- end
1133
+ alias_method :raw_dup, :dup
1134
+ # This is not a trivial method - first it does a *deep* copy,
1135
+ # second it takes a block which is instance_eval'ed,
1136
+ # so you can do things like:
1137
+ # * node.dup{ @name = :foo }
1138
+ # * node.dup{ self[:color] = "blue" }
1139
+ def dup(&blk)
1140
+ new_obj = self.raw_dup
1141
+ # Attr values stay shared - ugly
1142
+ new_obj.attrs = new_obj.attrs.dup
1143
+ new_obj.contents = new_obj.contents.map{|c| c.dup}
1202
1144
 
1203
- # Equivalent to node.children(pat, *rest)[0]
1204
- # Returns nil if there aren't any matching children
1205
- def child(pat=nil, *rest)
1206
- children(pat, *rest) {|c|
1207
- return c
1208
- }
1209
- return nil
1210
- end
1145
+ new_obj.instance_eval(&blk) if blk
1146
+ return new_obj
1147
+ end
1211
1148
 
1212
- # Equivalent to node.descendants(pat, *rest)[0]
1213
- # Returns nil if there aren't any matching descendants
1214
- def descendant(pat=nil, *rest)
1215
- descendants(pat, *rest) {|c|
1216
- return c
1217
- }
1218
- return nil
1219
- end
1220
1149
 
1221
- # XML#children(pattern, more_patterns)
1222
- # Return all children of a node with tags matching tag.
1223
- # Also:
1224
- # * children(:a, :b) == children(:a).children(:b)
1225
- # * children(:a, :*, :c) == children(:a).descendants(:c)
1226
- def children(pat=nil, *rest, &blk)
1227
- return descendants(*rest, &blk) if pat == :*
1228
- res = []
1229
- @contents.each{|c|
1230
- if pat.nil? or pat === c
1231
- if rest == []
1232
- res << c
1233
- yield c if block_given?
1234
- else
1235
- res += c.children(*rest, &blk)
1236
- end
1237
- end
1238
- }
1239
- res
1240
- end
1241
-
1242
- # * XML#descendants
1243
- # * XML#descendants(pattern)
1244
- # * XML#descendants(pattern, more_patterns)
1245
- #
1246
- # Return all descendants of a node matching the pattern.
1247
- # If pattern==nil, simply return all descendants.
1248
- # Optionally run a block on each of them if a block was given.
1249
- # If pattern==nil, also match Strings !
1250
- def descendants(pat=nil, *rest, &blk)
1251
- res = []
1252
- @contents.each{|c|
1253
- if pat.nil? or pat === c
1254
- if rest == []
1255
- res << c
1256
- yield c if block_given?
1257
- else
1258
- res += c.children(*rest, &blk)
1259
- end
1260
- end
1261
- if c.is_a? XML
1262
- res += c.descendants(pat, *rest, &blk)
1263
- end
1264
- }
1265
- res
1266
- end
1267
-
1268
- # Change elements based on pattern
1269
- def deep_map(pat, &blk)
1270
- if self =~ pat
1271
- yield self
1150
+ # Add some String children (all attributes get to_s'ed)
1151
+ def text!(*args)
1152
+ args.each{|s| self << s.to_s}
1153
+ end
1154
+ # Add XML child
1155
+ def xml!(*args, &blk)
1156
+ @contents << XML.new(*args, &blk)
1157
+ end
1158
+
1159
+ alias_method :add!, :<<
1160
+
1161
+ # Normalization means joining strings
1162
+ # and getting rid of ""s, recursively
1163
+ def normalize!
1164
+ new_contents = []
1165
+ @contents.each{|c|
1166
+ if c.is_a? String
1167
+ next if c == ""
1168
+ if new_contents[-1].is_a? String
1169
+ new_contents[-1] += c
1170
+ next
1171
+ end
1172
+ else
1173
+ c.normalize!
1174
+ end
1175
+ new_contents.push c
1176
+ }
1177
+ @contents = new_contents
1178
+ end
1179
+
1180
+ # Return text below the node, stripping all XML tags,
1181
+ # "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
1182
+ # returns "Hello, world!"
1183
+ def text
1184
+ res = ""
1185
+ @contents.each{|c|
1186
+ if c.is_a? XML
1187
+ res << c.text
1188
+ elsif c.is_a? String
1189
+ res << c
1190
+ end # Ignore XML_PI/XML_Comment
1191
+ }
1192
+ res
1193
+ end
1194
+
1195
+ # Equivalent to node.children(pat, *rest)[0]
1196
+ # Returns nil if there aren't any matching children
1197
+ def child(pat=nil, *rest)
1198
+ children(pat, *rest) {|c|
1199
+ return c
1200
+ }
1201
+ return nil
1202
+ end
1203
+
1204
+ # Equivalent to node.descendants(pat, *rest)[0]
1205
+ # Returns nil if there aren't any matching descendants
1206
+ def descendant(pat=nil, *rest)
1207
+ descendants(pat, *rest) {|c|
1208
+ return c
1209
+ }
1210
+ return nil
1211
+ end
1212
+
1213
+ # XML#children(pattern, more_patterns)
1214
+ # Return all children of a node with tags matching tag.
1215
+ # Also:
1216
+ # * children(:a, :b) == children(:a).children(:b)
1217
+ # * children(:a, :*, :c) == children(:a).descendants(:c)
1218
+ def children(pat=nil, *rest, &blk)
1219
+ return descendants(*rest, &blk) if pat == :*
1220
+ res = []
1221
+ @contents.each{|c|
1222
+ if pat.nil? or pat === c
1223
+ if rest == []
1224
+ res << c
1225
+ yield c if block_given?
1272
1226
  else
1273
- r = XML.new(self.name, self.attrs)
1274
- each{|c|
1275
- if c.is_a? XML
1276
- r << c.deep_map(pat, &blk)
1277
- else
1278
- r << c
1279
- end
1280
- }
1281
- r
1227
+ res += c.children(*rest, &blk)
1282
1228
  end
1283
- end
1229
+ end
1230
+ }
1231
+ res
1232
+ end
1284
1233
 
1285
- # FIXME: do we want a shallow or a deep copy here ?
1286
- # Map children, but leave the name/attributes
1287
- def map(pat=nil)
1288
- r = XML.new(self.name, self.attrs)
1289
- each{|c|
1290
- if !pat || (c.is_a?(XML) && c =~ pat)
1291
- r << yield(c)
1292
- else
1293
- r << c
1294
- end
1295
- }
1296
- r
1234
+ # * XML#descendants
1235
+ # * XML#descendants(pattern)
1236
+ # * XML#descendants(pattern, more_patterns)
1237
+ #
1238
+ # Return all descendants of a node matching the pattern.
1239
+ # If pattern==nil, simply return all descendants.
1240
+ # Optionally run a block on each of them if a block was given.
1241
+ # If pattern==nil, also match Strings !
1242
+ def descendants(pat=nil, *rest, &blk)
1243
+ res = []
1244
+ @contents.each{|c|
1245
+ if pat.nil? or pat === c
1246
+ if rest == []
1247
+ res << c
1248
+ yield c if block_given?
1249
+ else
1250
+ res += c.children(*rest, &blk)
1251
+ end
1252
+ end
1253
+ if c.is_a? XML
1254
+ res += c.descendants(pat, *rest, &blk)
1255
+ end
1256
+ }
1257
+ res
1258
+ end
1259
+
1260
+ # Change elements based on pattern
1261
+ def deep_map(pat, &blk)
1262
+ if self =~ pat
1263
+ yield self
1264
+ else
1265
+ r = XML.new(self.name, self.attrs)
1266
+ each{|c|
1267
+ if c.is_a? XML
1268
+ r << c.deep_map(pat, &blk)
1269
+ else
1270
+ r << c
1271
+ end
1272
+ }
1273
+ r
1297
1274
  end
1275
+ end
1276
+
1277
+ # FIXME: do we want a shallow or a deep copy here ?
1278
+ # Map children, but leave the name/attributes
1279
+ def map(pat=nil)
1280
+ r = XML.new(self.name, self.attrs)
1281
+ each{|c|
1282
+ if !pat || (c.is_a?(XML) && c =~ pat)
1283
+ r << yield(c)
1284
+ else
1285
+ r << c
1286
+ end
1287
+ }
1288
+ r
1289
+ end
1298
1290
  end
1299
1291
 
1300
1292
  # FIXME: Is this even sane ?
1301
1293
  # * What about escaping and all that stuff ?
1302
1294
  # * Rest of the code assumes that everything is either XML or String
1303
1295
  class XML_PI
1304
- def initialize(c, t)
1305
- @c = c
1306
- @t = t
1307
- end
1308
- def to_s
1309
- "<?#{@c}#{@t}?>"
1310
- end
1296
+ def initialize(c, t)
1297
+ @c = c
1298
+ @t = t
1299
+ end
1300
+ def to_s
1301
+ "<?#{@c}#{@t}?>"
1302
+ end
1311
1303
  end
1312
1304
 
1313
1305
  # FIXME: Is this even sane ?
@@ -1315,25 +1307,25 @@ end
1315
1307
  # * Rest of the code assumes that everything is either XML or String
1316
1308
  # * There are some limitations on where one can put -s in the comment. Do not overdo.
1317
1309
  class XML_Comment
1318
- def initialize(c)
1319
- @c = c
1320
- end
1321
- def to_s
1322
- "<!--#{@c}-->"
1323
- end
1310
+ def initialize(c)
1311
+ @c = c
1312
+ end
1313
+ def to_s
1314
+ "<!--#{@c}-->"
1315
+ end
1324
1316
  end
1325
1317
 
1326
1318
  # Syntactic sugar for XML.new
1327
1319
  def xml(*args, &blk)
1328
- XML.new(*args, &blk)
1320
+ XML.new(*args, &blk)
1329
1321
  end
1330
1322
 
1331
1323
  # xml! in XML { ... } - context adds node to parent
1332
1324
  # xml! in main context prints the argument (and returns it anyway)
1333
1325
  def xml!(*args, &blk)
1334
- node = xml(*args, &blk)
1335
- print node
1336
- node
1326
+ node = xml(*args, &blk)
1327
+ print node
1328
+ node
1337
1329
  end
1338
1330
 
1339
1331
  # Perl 6 is supposed to have native support for something like that.
@@ -1341,20 +1333,19 @@ end
1341
1333
  #
1342
1334
  # Usage:
1343
1335
  # case foo
1344
- # when all(:foo, {:color => 'blue'}, /Hello/)
1336
+ # when All[:foo, {:color => 'blue'}, /Hello/]
1345
1337
  # print foo
1346
1338
  # end
1347
- class Patterns_all
1348
- def initialize(*patterns)
1349
- @patterns = patterns
1350
- end
1351
- def ===(obj)
1352
- @patterns.all?{|p| p === obj}
1353
- end
1354
- end
1355
-
1356
- def all(*patterns)
1357
- Patterns_all.new(*patterns)
1339
+ class All
1340
+ def initialize(*patterns)
1341
+ @patterns = patterns
1342
+ end
1343
+ def ===(obj)
1344
+ @patterns.all?{|p| p === obj}
1345
+ end
1346
+ def self.[](*patterns)
1347
+ new(*patterns)
1348
+ end
1358
1349
  end
1359
1350
 
1360
1351
  # Perl 6 is supposed to have native support for something like that.
@@ -1362,18 +1353,17 @@ end
1362
1353
  #
1363
1354
  # Usage:
1364
1355
  # case foo
1365
- # when all(:foo, any({:color => 'blue'}, {:color => 'red'}), /Hello/)
1356
+ # when All[:foo, Any[{:color => 'blue'}, {:color => 'red'}], /Hello/]
1366
1357
  # print foo
1367
1358
  # end
1368
- class Patterns_any
1369
- def initialize(*patterns)
1370
- @patterns = patterns
1371
- end
1372
- def ===(obj)
1373
- @patterns.any?{|p| p === obj}
1374
- end
1375
- end
1376
-
1377
- def any(*patterns)
1378
- Patterns_any.new(*patterns)
1359
+ class Any
1360
+ def initialize(*patterns)
1361
+ @patterns = patterns
1362
+ end
1363
+ def ===(obj)
1364
+ @patterns.any?{|p| p === obj}
1365
+ end
1366
+ def self.[](*patterns)
1367
+ new(*patterns)
1368
+ end
1379
1369
  end