magic-xml 0.2013.04.14 → 0.2016.05.07

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +7 -7
  2. data/lib/magic_xml.rb +1218 -1228
  3. metadata +50 -33
  4. data/test.xml +0 -1
  5. data/tests.rb +0 -836
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
- ---
2
- SHA512:
3
- metadata.gz: 7bd51bf68b4fbc33e90776c1ed25a241940533549758d98110635611ad6e86b0110d57d0b5fd80423d7e8c62bee96c113e80d9c951f6f3d0e79a40d8b2a6ba8f
4
- data.tar.gz: ccf0ccd42e5ed6c43be62935965d02ffcfcf106946205b5f2f069971da9fdf49bbddce2663d38dc38cfc6d71bebb2ec21c5679c44c2de7fa32c8a66544eef668
5
- SHA1:
6
- metadata.gz: 3a3f3eda28edb3476dbdb92e2e7b3387290e6df6
7
- data.tar.gz: 75594336326d79a1b8465717f411141dfdae8e96
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0436ef9cb1270e202381dec66b3fa5f895ac5b6f
4
+ data.tar.gz: 211561626d9196c738ee1fc5c6103101c9521082
5
+ SHA512:
6
+ metadata.gz: f0f4df9bebae54afcc59f60658f2d9914ef71d2326344dbd5c98a7b0ad8f5269966e38cf8dadef229c26c412a6f53674bfd5a307f762f25bae7efcace992a9ef
7
+ data.tar.gz: acf49469ff1336c11624a0a6ce2a12f1b39f9414109965064fdcd62a2115f8ec0d84e12a8b3b2b107f3a9912ebba1235ef824a4da1dcbd6091838ea730e63f4e
data/lib/magic_xml.rb CHANGED
@@ -6,359 +6,351 @@ require 'net/http'
6
6
 
7
7
  # FIXME: Make comment formatting RDoc-friendly. It's not always so now.
8
8
 
9
- # In Ruby 2 Symbol will be a subclass of String, and
10
- # this won't be needed any more. Before then...
11
9
  class Symbol
12
- include Comparable
13
- def <=>(other)
14
- raise ArgumentError.new("comparison of #{self.class} with #{other.class} failed") unless other.is_a? Symbol
15
- to_s <=> other.to_s
16
- end
17
-
18
- alias_method :eqeqeq_before_magic_xml, :===
19
- def ===(*args, &blk)
20
- if args.size >= 1 and args[0].is_a? XML
21
- self == args[0].name
22
- else
23
- eqeqeq_before_magic_xml(*args, &blk)
24
- end
10
+ alias_method :eqeqeq_before_magic_xml, :===
11
+ def ===(*args, &blk)
12
+ if args.size >= 1 and args[0].is_a? XML
13
+ self == args[0].name
14
+ else
15
+ eqeqeq_before_magic_xml(*args, &blk)
25
16
  end
17
+ end
26
18
  end
27
19
 
28
20
  class Hash
29
- alias_method :eqeqeq_before_magic_xml, :===
30
- def ===(*args, &blk)
31
- if args.size >= 1 and args[0].is_a? XML
32
- all?{|k,v| v === args[0][k]}
33
- else
34
- eqeqeq_before_magic_xml(*args, &blk)
35
- end
21
+ alias_method :eqeqeq_before_magic_xml, :===
22
+ def ===(*args, &blk)
23
+ if args.size >= 1 and args[0].is_a? XML
24
+ all?{|k,v| v === args[0][k]}
25
+ else
26
+ eqeqeq_before_magic_xml(*args, &blk)
36
27
  end
28
+ end
37
29
  end
38
30
 
39
31
  class String
40
- # Escape string for output as XML text (< > &)
41
- def xml_escape
42
- replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;" }
43
- gsub(/([<>&])/) { replacements[$1] }
44
- end
45
- # Escape characters for output as XML attribute values (< > & ' ")
46
- def xml_attr_escape
47
- replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;", "\"" => "&quot;", "'" => "&apos;"}
48
- gsub(/([<>&\'\"])/) { replacements[$1] }
49
- end
50
- # Unescape entities
51
- # Supports:
52
- # * Full set of HTML-compatible named entities
53
- # * Decimal entities &#1234;
54
- # * Hex entities &#xA0b1;
55
- def xml_unescape(extra_entities=nil)
56
- @@xhtml_entity_replacements ||= {
57
- 'nbsp' => 160,
58
- 'iexcl' => 161,
59
- 'cent' => 162,
60
- 'pound' => 163,
61
- 'curren' => 164,
62
- 'yen' => 165,
63
- 'brvbar' => 166,
64
- 'sect' => 167,
65
- 'uml' => 168,
66
- 'copy' => 169,
67
- 'ordf' => 170,
68
- 'laquo' => 171,
69
- 'not' => 172,
70
- 'shy' => 173,
71
- 'reg' => 174,
72
- 'macr' => 175,
73
- 'deg' => 176,
74
- 'plusmn' => 177,
75
- 'sup2' => 178,
76
- 'sup3' => 179,
77
- 'acute' => 180,
78
- 'micro' => 181,
79
- 'para' => 182,
80
- 'middot' => 183,
81
- 'cedil' => 184,
82
- 'sup1' => 185,
83
- 'ordm' => 186,
84
- 'raquo' => 187,
85
- 'frac14' => 188,
86
- 'frac12' => 189,
87
- 'frac34' => 190,
88
- 'iquest' => 191,
89
- 'Agrave' => 192,
90
- 'Aacute' => 193,
91
- 'Acirc' => 194,
92
- 'Atilde' => 195,
93
- 'Auml' => 196,
94
- 'Aring' => 197,
95
- 'AElig' => 198,
96
- 'Ccedil' => 199,
97
- 'Egrave' => 200,
98
- 'Eacute' => 201,
99
- 'Ecirc' => 202,
100
- 'Euml' => 203,
101
- 'Igrave' => 204,
102
- 'Iacute' => 205,
103
- 'Icirc' => 206,
104
- 'Iuml' => 207,
105
- 'ETH' => 208,
106
- 'Ntilde' => 209,
107
- 'Ograve' => 210,
108
- 'Oacute' => 211,
109
- 'Ocirc' => 212,
110
- 'Otilde' => 213,
111
- 'Ouml' => 214,
112
- 'times' => 215,
113
- 'Oslash' => 216,
114
- 'Ugrave' => 217,
115
- 'Uacute' => 218,
116
- 'Ucirc' => 219,
117
- 'Uuml' => 220,
118
- 'Yacute' => 221,
119
- 'THORN' => 222,
120
- 'szlig' => 223,
121
- 'agrave' => 224,
122
- 'aacute' => 225,
123
- 'acirc' => 226,
124
- 'atilde' => 227,
125
- 'auml' => 228,
126
- 'aring' => 229,
127
- 'aelig' => 230,
128
- 'ccedil' => 231,
129
- 'egrave' => 232,
130
- 'eacute' => 233,
131
- 'ecirc' => 234,
132
- 'euml' => 235,
133
- 'igrave' => 236,
134
- 'iacute' => 237,
135
- 'icirc' => 238,
136
- 'iuml' => 239,
137
- 'eth' => 240,
138
- 'ntilde' => 241,
139
- 'ograve' => 242,
140
- 'oacute' => 243,
141
- 'ocirc' => 244,
142
- 'otilde' => 245,
143
- 'ouml' => 246,
144
- 'divide' => 247,
145
- 'oslash' => 248,
146
- 'ugrave' => 249,
147
- 'uacute' => 250,
148
- 'ucirc' => 251,
149
- 'uuml' => 252,
150
- 'yacute' => 253,
151
- 'thorn' => 254,
152
- 'yuml' => 255,
153
- 'quot' => 34,
154
- 'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
155
- 'amp' => 38,
156
- 'lt' => 60,
157
- 'gt' => 62,
158
- 'OElig' => 338,
159
- 'oelig' => 339,
160
- 'Scaron' => 352,
161
- 'scaron' => 353,
162
- 'Yuml' => 376,
163
- 'circ' => 710,
164
- 'tilde' => 732,
165
- 'ensp' => 8194,
166
- 'emsp' => 8195,
167
- 'thinsp' => 8201,
168
- 'zwnj' => 8204,
169
- 'zwj' => 8205,
170
- 'lrm' => 8206,
171
- 'rlm' => 8207,
172
- 'ndash' => 8211,
173
- 'mdash' => 8212,
174
- 'lsquo' => 8216,
175
- 'rsquo' => 8217,
176
- 'sbquo' => 8218,
177
- 'ldquo' => 8220,
178
- 'rdquo' => 8221,
179
- 'bdquo' => 8222,
180
- 'dagger' => 8224,
181
- 'Dagger' => 8225,
182
- 'permil' => 8240,
183
- 'lsaquo' => 8249,
184
- 'rsaquo' => 8250,
185
- 'euro' => 8364,
186
- 'fnof' => 402,
187
- 'Alpha' => 913,
188
- 'Beta' => 914,
189
- 'Gamma' => 915,
190
- 'Delta' => 916,
191
- 'Epsilon' => 917,
192
- 'Zeta' => 918,
193
- 'Eta' => 919,
194
- 'Theta' => 920,
195
- 'Iota' => 921,
196
- 'Kappa' => 922,
197
- 'Lambda' => 923,
198
- 'Mu' => 924,
199
- 'Nu' => 925,
200
- 'Xi' => 926,
201
- 'Omicron' => 927,
202
- 'Pi' => 928,
203
- 'Rho' => 929,
204
- 'Sigma' => 931,
205
- 'Tau' => 932,
206
- 'Upsilon' => 933,
207
- 'Phi' => 934,
208
- 'Chi' => 935,
209
- 'Psi' => 936,
210
- 'Omega' => 937,
211
- 'alpha' => 945,
212
- 'beta' => 946,
213
- 'gamma' => 947,
214
- 'delta' => 948,
215
- 'epsilon' => 949,
216
- 'zeta' => 950,
217
- 'eta' => 951,
218
- 'theta' => 952,
219
- 'iota' => 953,
220
- 'kappa' => 954,
221
- 'lambda' => 955,
222
- 'mu' => 956,
223
- 'nu' => 957,
224
- 'xi' => 958,
225
- 'omicron' => 959,
226
- 'pi' => 960,
227
- 'rho' => 961,
228
- 'sigmaf' => 962,
229
- 'sigma' => 963,
230
- 'tau' => 964,
231
- 'upsilon' => 965,
232
- 'phi' => 966,
233
- 'chi' => 967,
234
- 'psi' => 968,
235
- 'omega' => 969,
236
- 'thetasym' => 977,
237
- 'upsih' => 978,
238
- 'piv' => 982,
239
- 'bull' => 8226,
240
- 'hellip' => 8230,
241
- 'prime' => 8242,
242
- 'Prime' => 8243,
243
- 'oline' => 8254,
244
- 'frasl' => 8260,
245
- 'weierp' => 8472,
246
- 'image' => 8465,
247
- 'real' => 8476,
248
- 'trade' => 8482,
249
- 'alefsym' => 8501,
250
- 'larr' => 8592,
251
- 'uarr' => 8593,
252
- 'rarr' => 8594,
253
- 'darr' => 8595,
254
- 'harr' => 8596,
255
- 'crarr' => 8629,
256
- 'lArr' => 8656,
257
- 'uArr' => 8657,
258
- 'rArr' => 8658,
259
- 'dArr' => 8659,
260
- 'hArr' => 8660,
261
- 'forall' => 8704,
262
- 'part' => 8706,
263
- 'exist' => 8707,
264
- 'empty' => 8709,
265
- 'nabla' => 8711,
266
- 'isin' => 8712,
267
- 'notin' => 8713,
268
- 'ni' => 8715,
269
- 'prod' => 8719,
270
- 'sum' => 8721,
271
- 'minus' => 8722,
272
- 'lowast' => 8727,
273
- 'radic' => 8730,
274
- 'prop' => 8733,
275
- 'infin' => 8734,
276
- 'ang' => 8736,
277
- 'and' => 8743,
278
- 'or' => 8744,
279
- 'cap' => 8745,
280
- 'cup' => 8746,
281
- 'int' => 8747,
282
- 'there4' => 8756,
283
- 'sim' => 8764,
284
- 'cong' => 8773,
285
- 'asymp' => 8776,
286
- 'ne' => 8800,
287
- 'equiv' => 8801,
288
- 'le' => 8804,
289
- 'ge' => 8805,
290
- 'sub' => 8834,
291
- 'sup' => 8835,
292
- 'nsub' => 8836,
293
- 'sube' => 8838,
294
- 'supe' => 8839,
295
- 'oplus' => 8853,
296
- 'otimes' => 8855,
297
- 'perp' => 8869,
298
- 'sdot' => 8901,
299
- 'lceil' => 8968,
300
- 'rceil' => 8969,
301
- 'lfloor' => 8970,
302
- 'rfloor' => 8971,
303
- 'lang' => 9001,
304
- 'rang' => 9002,
305
- 'loz' => 9674,
306
- 'spades' => 9824,
307
- 'clubs' => 9827,
308
- 'hearts' => 9829,
309
- 'diams' => 9830,
310
- }
311
- gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
312
- if $1 then
313
- v = @@xhtml_entity_replacements[$1]
314
- # Nonstandard entity
315
- unless v
316
- if extra_entities.is_a? Proc
317
- v = extra_entities.call($1)
318
- # Well, we expect a Hash here, but any container will do.
319
- # As long as it's not a nil.
320
- elsif extra_entities
321
- v = extra_entities[$1]
322
- end
323
- end
324
- raise "Unknown escape #{$1}" unless v
325
- elsif $2
326
- v = $2.to_i
327
- else
328
- v = $3.hex
329
- end
330
- # v can be a String or an Integer
331
- if v.is_a? String then v else [v].pack('U') end
332
- }
333
- end
334
- def xml_parse
335
- XML.parse(self)
336
- end
32
+ # Escape string for output as XML text (< > &)
33
+ def xml_escape
34
+ replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;" }
35
+ gsub(/([<>&])/) { replacements[$1] }
36
+ end
37
+ # Escape characters for output as XML attribute values (< > & ' ")
38
+ def xml_attr_escape
39
+ replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;", "\"" => "&quot;", "'" => "&apos;"}
40
+ gsub(/([<>&\'\"])/) { replacements[$1] }
41
+ end
42
+ # Unescape entities
43
+ # Supports:
44
+ # * Full set of HTML-compatible named entities
45
+ # * Decimal entities &#1234;
46
+ # * Hex entities &#xA0b1;
47
+ def xml_unescape(extra_entities=nil)
48
+ @@xhtml_entity_replacements ||= {
49
+ 'nbsp' => 160,
50
+ 'iexcl' => 161,
51
+ 'cent' => 162,
52
+ 'pound' => 163,
53
+ 'curren' => 164,
54
+ 'yen' => 165,
55
+ 'brvbar' => 166,
56
+ 'sect' => 167,
57
+ 'uml' => 168,
58
+ 'copy' => 169,
59
+ 'ordf' => 170,
60
+ 'laquo' => 171,
61
+ 'not' => 172,
62
+ 'shy' => 173,
63
+ 'reg' => 174,
64
+ 'macr' => 175,
65
+ 'deg' => 176,
66
+ 'plusmn' => 177,
67
+ 'sup2' => 178,
68
+ 'sup3' => 179,
69
+ 'acute' => 180,
70
+ 'micro' => 181,
71
+ 'para' => 182,
72
+ 'middot' => 183,
73
+ 'cedil' => 184,
74
+ 'sup1' => 185,
75
+ 'ordm' => 186,
76
+ 'raquo' => 187,
77
+ 'frac14' => 188,
78
+ 'frac12' => 189,
79
+ 'frac34' => 190,
80
+ 'iquest' => 191,
81
+ 'Agrave' => 192,
82
+ 'Aacute' => 193,
83
+ 'Acirc' => 194,
84
+ 'Atilde' => 195,
85
+ 'Auml' => 196,
86
+ 'Aring' => 197,
87
+ 'AElig' => 198,
88
+ 'Ccedil' => 199,
89
+ 'Egrave' => 200,
90
+ 'Eacute' => 201,
91
+ 'Ecirc' => 202,
92
+ 'Euml' => 203,
93
+ 'Igrave' => 204,
94
+ 'Iacute' => 205,
95
+ 'Icirc' => 206,
96
+ 'Iuml' => 207,
97
+ 'ETH' => 208,
98
+ 'Ntilde' => 209,
99
+ 'Ograve' => 210,
100
+ 'Oacute' => 211,
101
+ 'Ocirc' => 212,
102
+ 'Otilde' => 213,
103
+ 'Ouml' => 214,
104
+ 'times' => 215,
105
+ 'Oslash' => 216,
106
+ 'Ugrave' => 217,
107
+ 'Uacute' => 218,
108
+ 'Ucirc' => 219,
109
+ 'Uuml' => 220,
110
+ 'Yacute' => 221,
111
+ 'THORN' => 222,
112
+ 'szlig' => 223,
113
+ 'agrave' => 224,
114
+ 'aacute' => 225,
115
+ 'acirc' => 226,
116
+ 'atilde' => 227,
117
+ 'auml' => 228,
118
+ 'aring' => 229,
119
+ 'aelig' => 230,
120
+ 'ccedil' => 231,
121
+ 'egrave' => 232,
122
+ 'eacute' => 233,
123
+ 'ecirc' => 234,
124
+ 'euml' => 235,
125
+ 'igrave' => 236,
126
+ 'iacute' => 237,
127
+ 'icirc' => 238,
128
+ 'iuml' => 239,
129
+ 'eth' => 240,
130
+ 'ntilde' => 241,
131
+ 'ograve' => 242,
132
+ 'oacute' => 243,
133
+ 'ocirc' => 244,
134
+ 'otilde' => 245,
135
+ 'ouml' => 246,
136
+ 'divide' => 247,
137
+ 'oslash' => 248,
138
+ 'ugrave' => 249,
139
+ 'uacute' => 250,
140
+ 'ucirc' => 251,
141
+ 'uuml' => 252,
142
+ 'yacute' => 253,
143
+ 'thorn' => 254,
144
+ 'yuml' => 255,
145
+ 'quot' => 34,
146
+ 'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
147
+ 'amp' => 38,
148
+ 'lt' => 60,
149
+ 'gt' => 62,
150
+ 'OElig' => 338,
151
+ 'oelig' => 339,
152
+ 'Scaron' => 352,
153
+ 'scaron' => 353,
154
+ 'Yuml' => 376,
155
+ 'circ' => 710,
156
+ 'tilde' => 732,
157
+ 'ensp' => 8194,
158
+ 'emsp' => 8195,
159
+ 'thinsp' => 8201,
160
+ 'zwnj' => 8204,
161
+ 'zwj' => 8205,
162
+ 'lrm' => 8206,
163
+ 'rlm' => 8207,
164
+ 'ndash' => 8211,
165
+ 'mdash' => 8212,
166
+ 'lsquo' => 8216,
167
+ 'rsquo' => 8217,
168
+ 'sbquo' => 8218,
169
+ 'ldquo' => 8220,
170
+ 'rdquo' => 8221,
171
+ 'bdquo' => 8222,
172
+ 'dagger' => 8224,
173
+ 'Dagger' => 8225,
174
+ 'permil' => 8240,
175
+ 'lsaquo' => 8249,
176
+ 'rsaquo' => 8250,
177
+ 'euro' => 8364,
178
+ 'fnof' => 402,
179
+ 'Alpha' => 913,
180
+ 'Beta' => 914,
181
+ 'Gamma' => 915,
182
+ 'Delta' => 916,
183
+ 'Epsilon' => 917,
184
+ 'Zeta' => 918,
185
+ 'Eta' => 919,
186
+ 'Theta' => 920,
187
+ 'Iota' => 921,
188
+ 'Kappa' => 922,
189
+ 'Lambda' => 923,
190
+ 'Mu' => 924,
191
+ 'Nu' => 925,
192
+ 'Xi' => 926,
193
+ 'Omicron' => 927,
194
+ 'Pi' => 928,
195
+ 'Rho' => 929,
196
+ 'Sigma' => 931,
197
+ 'Tau' => 932,
198
+ 'Upsilon' => 933,
199
+ 'Phi' => 934,
200
+ 'Chi' => 935,
201
+ 'Psi' => 936,
202
+ 'Omega' => 937,
203
+ 'alpha' => 945,
204
+ 'beta' => 946,
205
+ 'gamma' => 947,
206
+ 'delta' => 948,
207
+ 'epsilon' => 949,
208
+ 'zeta' => 950,
209
+ 'eta' => 951,
210
+ 'theta' => 952,
211
+ 'iota' => 953,
212
+ 'kappa' => 954,
213
+ 'lambda' => 955,
214
+ 'mu' => 956,
215
+ 'nu' => 957,
216
+ 'xi' => 958,
217
+ 'omicron' => 959,
218
+ 'pi' => 960,
219
+ 'rho' => 961,
220
+ 'sigmaf' => 962,
221
+ 'sigma' => 963,
222
+ 'tau' => 964,
223
+ 'upsilon' => 965,
224
+ 'phi' => 966,
225
+ 'chi' => 967,
226
+ 'psi' => 968,
227
+ 'omega' => 969,
228
+ 'thetasym' => 977,
229
+ 'upsih' => 978,
230
+ 'piv' => 982,
231
+ 'bull' => 8226,
232
+ 'hellip' => 8230,
233
+ 'prime' => 8242,
234
+ 'Prime' => 8243,
235
+ 'oline' => 8254,
236
+ 'frasl' => 8260,
237
+ 'weierp' => 8472,
238
+ 'image' => 8465,
239
+ 'real' => 8476,
240
+ 'trade' => 8482,
241
+ 'alefsym' => 8501,
242
+ 'larr' => 8592,
243
+ 'uarr' => 8593,
244
+ 'rarr' => 8594,
245
+ 'darr' => 8595,
246
+ 'harr' => 8596,
247
+ 'crarr' => 8629,
248
+ 'lArr' => 8656,
249
+ 'uArr' => 8657,
250
+ 'rArr' => 8658,
251
+ 'dArr' => 8659,
252
+ 'hArr' => 8660,
253
+ 'forall' => 8704,
254
+ 'part' => 8706,
255
+ 'exist' => 8707,
256
+ 'empty' => 8709,
257
+ 'nabla' => 8711,
258
+ 'isin' => 8712,
259
+ 'notin' => 8713,
260
+ 'ni' => 8715,
261
+ 'prod' => 8719,
262
+ 'sum' => 8721,
263
+ 'minus' => 8722,
264
+ 'lowast' => 8727,
265
+ 'radic' => 8730,
266
+ 'prop' => 8733,
267
+ 'infin' => 8734,
268
+ 'ang' => 8736,
269
+ 'and' => 8743,
270
+ 'or' => 8744,
271
+ 'cap' => 8745,
272
+ 'cup' => 8746,
273
+ 'int' => 8747,
274
+ 'there4' => 8756,
275
+ 'sim' => 8764,
276
+ 'cong' => 8773,
277
+ 'asymp' => 8776,
278
+ 'ne' => 8800,
279
+ 'equiv' => 8801,
280
+ 'le' => 8804,
281
+ 'ge' => 8805,
282
+ 'sub' => 8834,
283
+ 'sup' => 8835,
284
+ 'nsub' => 8836,
285
+ 'sube' => 8838,
286
+ 'supe' => 8839,
287
+ 'oplus' => 8853,
288
+ 'otimes' => 8855,
289
+ 'perp' => 8869,
290
+ 'sdot' => 8901,
291
+ 'lceil' => 8968,
292
+ 'rceil' => 8969,
293
+ 'lfloor' => 8970,
294
+ 'rfloor' => 8971,
295
+ 'lang' => 9001,
296
+ 'rang' => 9002,
297
+ 'loz' => 9674,
298
+ 'spades' => 9824,
299
+ 'clubs' => 9827,
300
+ 'hearts' => 9829,
301
+ 'diams' => 9830,
302
+ }
303
+ gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
304
+ if $1 then
305
+ v = @@xhtml_entity_replacements[$1]
306
+ # Nonstandard entity
307
+ unless v
308
+ if extra_entities.is_a? Proc
309
+ v = extra_entities.call($1)
310
+ # Well, we expect a Hash here, but any container will do.
311
+ # As long as it's not a nil.
312
+ elsif extra_entities
313
+ v = extra_entities[$1]
314
+ end
315
+ end
316
+ raise "Unknown escape #{$1}" unless v
317
+ elsif $2
318
+ v = $2.to_i
319
+ else
320
+ v = $3.hex
321
+ end
322
+ # v can be a String or an Integer
323
+ if v.is_a? String then v else [v].pack('U') end
324
+ }
325
+ end
326
+ def xml_parse
327
+ XML.parse(self)
328
+ end
337
329
  end
338
330
 
339
331
  class File
340
- def xml_parse
341
- XML.parse(self)
342
- end
332
+ def xml_parse
333
+ XML.parse(self)
334
+ end
343
335
  end
344
336
 
345
337
  class Array
346
- # children of any element
347
- def children(*args, &blk)
348
- res = []
349
- each{|c|
350
- res += c.children(*args, &blk) if c.is_a? XML
351
- }
352
- res
353
- end
354
- # descendants of any element
355
- def descendants(*args, &blk)
356
- res = []
357
- each{|c|
358
- res += c.descendants(*args, &blk) if c.is_a? XML
359
- }
360
- res
361
- end
338
+ # children of any element
339
+ def children(*args, &blk)
340
+ res = []
341
+ each{|c|
342
+ res += c.children(*args, &blk) if c.is_a? XML
343
+ }
344
+ res
345
+ end
346
+ # descendants of any element
347
+ def descendants(*args, &blk)
348
+ res = []
349
+ each{|c|
350
+ res += c.descendants(*args, &blk) if c.is_a? XML
351
+ }
352
+ res
353
+ end
362
354
  end
363
355
 
364
356
  # Methods of Enumerable.
@@ -372,942 +364,942 @@ end
372
364
  #
373
365
  # FIXME: Many methods use .dup, but do we want a shallow or a deep copy ?
374
366
  class XML
375
- include Enumerable
376
- # Default any? is ok
377
- # Default all? is ok
378
-
379
- # Iterate over children, possibly with a selector
380
- def each(*selector, &blk)
381
- children(*selector, &blk)
382
- self
383
- end
367
+ include Enumerable
368
+ # Default any? is ok
369
+ # Default all? is ok
384
370
 
385
- # Sort XML children of XML element.
386
- def sort_by(*args, &blk)
387
- self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
388
- end
371
+ # Iterate over children, possibly with a selector
372
+ def each(*selector, &blk)
373
+ children(*selector, &blk)
374
+ self
375
+ end
389
376
 
390
- # Sort children of XML element.
391
- def children_sort_by(*args, &blk)
392
- self.dup{ @contents = @contents.sort_by(*args, &blk) }
393
- end
377
+ # Sort XML children of XML element.
378
+ def sort_by(*args, &blk)
379
+ self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
380
+ end
394
381
 
395
- # Sort children of XML element.
396
- #
397
- # Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
398
- # Use sort_by instead.
399
- #
400
- # Unless you define your own XML#<=> operator, or do something equally weird.
401
- def sort(*args, &blk)
402
- self.dup{ @contents = @contents.sort(*args, &blk) }
403
- end
382
+ # Sort children of XML element.
383
+ def children_sort_by(*args, &blk)
384
+ self.dup{ @contents = @contents.sort_by(*args, &blk) }
385
+ end
386
+
387
+ # Sort children of XML element.
388
+ #
389
+ # Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
390
+ # Use sort_by instead.
391
+ #
392
+ # Unless you define your own XML#<=> operator, or do something equally weird.
393
+ def sort(*args, &blk)
394
+ self.dup{ @contents = @contents.sort(*args, &blk) }
395
+ end
404
396
 
405
- #collect/map
406
- #detect/find
407
- #each_cons
408
- #each_slice
409
- #each_with_index
410
- #to_a
411
- #entries
412
- #enum_cons
413
- #enum_slice
414
- #enum
415
- # grep
416
- # include?/member?
417
- # inject
418
- # max/min
419
- # max_by/min_by - Ruby 1.9
420
- # partition
421
- # reject
422
- # sort
423
- # sort_by
424
- # to_set
425
- # zip
426
- # And Enumerable::Enumerator-generating methods
397
+ #collect/map
398
+ #detect/find
399
+ #each_cons
400
+ #each_slice
401
+ #each_with_index
402
+ #to_a
403
+ #entries
404
+ #enum_cons
405
+ #enum_slice
406
+ #enum
407
+ # grep
408
+ # include?/member?
409
+ # inject
410
+ # max/min
411
+ # max_by/min_by - Ruby 1.9
412
+ # partition
413
+ # reject
414
+ # sort
415
+ # sort_by
416
+ # to_set
417
+ # zip
418
+ # And Enumerable::Enumerator-generating methods
427
419
  end
428
420
 
429
421
  # Class methods
430
422
  class XML
431
- # XML.foo! == xml!(:foo)
432
- # XML.foo == xml(:foo)
433
- def self.method_missing(meth, *args, &blk)
434
- if meth.to_s =~ /^(.*)!$/
435
- xml!($1.to_sym, *args, &blk)
436
- else
437
- XML.new(meth, *args, &blk)
438
- end
423
+ # XML.foo! == xml!(:foo)
424
+ # XML.foo == xml(:foo)
425
+ def self.method_missing(meth, *args, &blk)
426
+ if meth.to_s =~ /^(.*)!$/
427
+ xml!($1.to_sym, *args, &blk)
428
+ else
429
+ XML.new(meth, *args, &blk)
439
430
  end
431
+ end
440
432
 
441
- # Read file and parse
442
- def self.from_file(file)
443
- file = File.open(file) if file.is_a? String
444
- parse(file)
433
+ # Read file and parse
434
+ def self.from_file(file)
435
+ file = File.open(file) if file.is_a? String
436
+ parse(file)
437
+ end
438
+
439
+ # Fetch URL and parse
440
+ # Supported:
441
+ # http://.../
442
+ # https://.../
443
+ # file:foo.xml
444
+ # string:<foo/>
445
+ def self.from_url(url)
446
+ if url =~ /^string:(.*)$/m
447
+ parse($1)
448
+ elsif url =~ /^file:(.*)$/m
449
+ from_file($1)
450
+ elsif url =~ /^http(s?):/
451
+ ssl = ($1 == "s")
452
+ # No, seriously - Ruby needs something better than net/http
453
+ # Something that groks basic auth and queries and redirects automatically:
454
+ # HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
455
+ # URI parsing must go inside the library, client programs
456
+ # should have nothing to do with it
457
+
458
+ # net/http is really inconvenient to use here
459
+ u = URI.parse(url)
460
+ # You're not seeing this:
461
+ if u.query then
462
+ path = u.path + "?" + u.query
463
+ else
464
+ path = u.path
465
+ end
466
+ req = Net::HTTP::Get.new(path)
467
+ if u.userinfo
468
+ username, passwd = u.userinfo.split(/:/,2)
469
+ req.basic_auth username, passwd
470
+ end
471
+ if ssl
472
+ # NOTE: You need libopenssl-ruby installed
473
+ # if you want to use HTTPS. Ubuntu is broken
474
+ # as it doesn't provide it in the default packages.
475
+ require 'net/https'
476
+ http = Net::HTTP.new(u.host, u.port)
477
+ http.use_ssl = true
478
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
479
+ else
480
+ http = Net::HTTP.new(u.host, u.port)
481
+ end
482
+
483
+ res = http.start {|http_conn| http_conn.request(req) }
484
+ # TODO: Throw a more meaningful exception
485
+ parse(res.body)
486
+ else
487
+ raise "URL protocol #{url} not supported (http, https, file, string are supported)"
445
488
  end
489
+ end
446
490
 
447
- # Fetch URL and parse
448
- # Supported:
449
- # http://.../
450
- # https://.../
451
- # file:foo.xml
452
- # string:<foo/>
453
- def self.from_url(url)
454
- if url =~ /^string:(.*)$/m
455
- parse($1)
456
- elsif url =~ /^file:(.*)$/m
457
- from_file($1)
458
- elsif url =~ /^http(s?):/
459
- ssl = ($1 == "s")
460
- # No, seriously - Ruby needs something better than net/http
461
- # Something that groks basic auth and queries and redirects automatically:
462
- # HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
463
- # URI parsing must go inside the library, client programs
464
- # should have nothing to do with it
465
-
466
- # net/http is really inconvenient to use here
467
- u = URI.parse(url)
468
- # You're not seeing this:
469
- if u.query then
470
- path = u.path + "?" + u.query
471
- else
472
- path = u.path
473
- end
474
- req = Net::HTTP::Get.new(path)
475
- if u.userinfo
476
- username, passwd = u.userinfo.split(/:/,2)
477
- req.basic_auth username, passwd
478
- end
479
- if ssl
480
- # NOTE: You need libopenssl-ruby installed
481
- # if you want to use HTTPS. Ubuntu is broken
482
- # as it doesn't provide it in the default packages.
483
- require 'net/https'
484
- http = Net::HTTP.new(u.host, u.port)
485
- http.use_ssl = true
486
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
487
- else
488
- http = Net::HTTP.new(u.host, u.port)
489
- end
490
-
491
- res = http.start {|http| http.request(req) }
492
- # TODO: Throw a more meaningful exception
493
- parse(res.body)
494
- else
495
- raise "URL protocol #{url} not supported (http, https, file, string are supported)"
496
- end
491
+ # Like CDuce load_xml
492
+ # The path can be:
493
+ # * file handler
494
+ # * URL (a string with :)
495
+ # * file name (a string without :)
496
+ def self.load(obj)
497
+ if obj.is_a? String
498
+ if obj.include? ":"
499
+ from_url(obj)
500
+ else
501
+ from_file(obj)
502
+ end
503
+ else
504
+ parse(obj)
497
505
  end
506
+ end
498
507
 
499
- # Like CDuce load_xml
500
- # The path can be:
501
- # * file handler
502
- # * URL (a string with :)
503
- # * file name (a string without :)
504
- def self.load(obj)
505
- if obj.is_a? String
506
- if obj.include? ":"
507
- from_url(obj)
508
- else
509
- from_file(obj)
508
+ # Parse XML in mixed stream/tree mode
509
+ # Basically the idea is that every time we get start element,
510
+ # we ask the block what to do about it.
511
+ # If it wants a tree below it, it should call e.tree
512
+ # If a tree was requested, elements below the current one
513
+ # are *not* processed. If it wasn't, they are.
514
+ #
515
+ # For example:
516
+ # <foo><bar/></foo><foo2/>
517
+ # yield <foo> ... </foo>
518
+ # .complete! called
519
+ # process <foo2> next
520
+ #
521
+ # But:
522
+ # <foo><bar/></foo><foo2/>
523
+ # yield <foo> ... </foo>
524
+ # .complete! not called
525
+ # process <bar> next
526
+ #
527
+ # FIXME: yielded values are not reusable for now
528
+ # FIXME: make more object-oriented
529
+ def self.parse_as_twigs(stream)
530
+ parser = REXML::Parsers::BaseParser.new stream
531
+ # We don't really need to keep the stack ;-)
532
+ stack = []
533
+ while true
534
+ event = parser.pull
535
+ case event[0]
536
+ when :start_element
537
+ # Now the evil part evil
538
+ attrs = {}
539
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
540
+ node = XML.new(event[1].to_sym, attrs, *event[3..-1])
541
+
542
+ # I can't say it's superelegant
543
+ class <<node
544
+ attr_accessor :do_complete
545
+ def complete!
546
+ if @do_complete
547
+ @do_complete.call
548
+ @do_complete = nil
510
549
  end
511
- else
512
- parse(obj)
550
+ end
513
551
  end
514
- end
552
+ node.do_complete = proc{
553
+ parse_subtree(node, parser)
554
+ }
515
555
 
516
- # Parse XML in mixed stream/tree mode
517
- # Basically the idea is that every time we get start element,
518
- # we ask the block what to do about it.
519
- # If it wants a tree below it, it should call e.tree
520
- # If a tree was requested, elements below the current one
521
- # are *not* processed. If it wasn't, they are.
522
- #
523
- # For example:
524
- # <foo><bar/></foo><foo2/>
525
- # yield <foo> ... </foo>
526
- # .complete! called
527
- # process <foo2> next
528
- #
529
- # But:
530
- # <foo><bar/></foo><foo2/>
531
- # yield <foo> ... </foo>
532
- # .complete! not called
533
- # process <bar> next
534
- #
535
- # FIXME: yielded values are not reusable for now
536
- # FIXME: make more object-oriented
537
- def self.parse_as_twigs(stream)
538
- parser = REXML::Parsers::BaseParser.new stream
539
- # We don't really need to keep the stack ;-)
540
- stack = []
541
- while true
542
- event = parser.pull
543
- case event[0]
544
- when :start_element
545
- # Now the evil part evil
546
- attrs = {}
547
- event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
548
- node = XML.new(event[1].to_sym, attrs, *event[3..-1])
549
-
550
- # I can't say it's superelegant
551
- class <<node
552
- attr_accessor :do_complete
553
- def complete!
554
- if @do_complete
555
- @do_complete.call
556
- @do_complete = nil
557
- end
558
- end
559
- end
560
- node.do_complete = proc{
561
- parse_subtree(node, parser)
562
- }
563
-
564
- yield(node)
565
- if node.do_complete
566
- stack.push node
567
- node.do_complete = nil # It's too late, complete! shouldn't do anything now
568
- end
569
- when :end_element
570
- stack.pop
571
- when :end_document
572
- return
573
- else
574
- # FIXME: Do the right thing.
575
- # For now, ignore *everything* else
576
- # This is totally incorrect, user might want to
577
- # see text, comments and stuff like that anyway
578
- end
556
+ yield(node)
557
+ if node.do_complete
558
+ stack.push node
559
+ node.do_complete = nil # It's too late, complete! shouldn't do anything now
579
560
  end
561
+ when :end_element
562
+ stack.pop
563
+ when :end_document
564
+ return
565
+ else
566
+ # FIXME: Do the right thing.
567
+ # For now, ignore *everything* else
568
+ # This is totally incorrect, user might want to
569
+ # see text, comments and stuff like that anyway
570
+ end
580
571
  end
581
-
582
- # Basically it's a copy of self.parse, ugly ...
583
- def self.parse_subtree(start_node, parser)
584
- stack = [start_node]
585
- res = nil
586
- while true
587
- event = parser.pull
588
- case event[0]
589
- when :start_element
590
- attrs = {}
591
- event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
592
- stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
593
- if stack.size == 1
594
- res = stack[0]
595
- else
596
- stack[-2] << stack[-1]
597
- end
598
- when :end_element
599
- stack.pop
600
- return if stack == []
601
- # Needs unescaping
602
- when :text
603
- # Ignore whitespace
604
- if stack.size == 0
605
- next if event[1] !~ /\S/
606
- raise "Non-whitespace text out of document root"
607
- end
608
- stack[-1] << event[1].xml_unescape
609
- # CDATA is already unescaped
610
- when :cdata
611
- if stack.size == 0
612
- raise "CDATA out of the document root"
613
- end
614
- stack[-1] << event[1]
615
- when :end_document
616
- raise "Parse error: end_document inside a subtree, tags are not balanced"
617
- when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
618
- # Positivery ignore
619
- when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
620
- # Ignore ???
621
- #print "Ignored XML event #{event[0]} when parsing\n"
622
- else
623
- # Huh ? What's that ?
624
- #print "Unknown XML event #{event[0]} when parsing\n"
625
- end
626
- end
627
- res
572
+ end
628
573
 
574
+ # Basically it's a copy of self.parse, ugly ...
575
+ def self.parse_subtree(start_node, parser)
576
+ stack = [start_node]
577
+ res = nil
578
+ while true
579
+ event = parser.pull
580
+ case event[0]
581
+ when :start_element
582
+ attrs = {}
583
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
584
+ stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
585
+ if stack.size == 1
586
+ res = stack[0]
587
+ else
588
+ stack[-2] << stack[-1]
589
+ end
590
+ when :end_element
591
+ stack.pop
592
+ return if stack == []
593
+ # Needs unescaping
594
+ when :text
595
+ # Ignore whitespace
596
+ if stack.size == 0
597
+ next if event[1] !~ /\S/
598
+ raise "Non-whitespace text out of document root"
599
+ end
600
+ stack[-1] << event[1].xml_unescape
601
+ # CDATA is already unescaped
602
+ when :cdata
603
+ if stack.size == 0
604
+ raise "CDATA out of the document root"
605
+ end
606
+ stack[-1] << event[1]
607
+ when :end_document
608
+ raise "Parse error: end_document inside a subtree, tags are not balanced"
609
+ when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
610
+ # Positivery ignore
611
+ when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
612
+ # Ignore ???
613
+ #print "Ignored XML event #{event[0]} when parsing\n"
614
+ else
615
+ # Huh ? What's that ?
616
+ #print "Unknown XML event #{event[0]} when parsing\n"
617
+ end
629
618
  end
619
+ res
630
620
 
631
- # Parse XML using REXML. Available options:
632
- # * :extra_entities => Proc or Hash (default = nil)
633
- # * :remove_pretty_printing => true/false (default = false)
634
- # * :comments => true/false (default = false)
635
- # * :pi => true/false (default = false)
636
- # * :normalize => true/false (default = false) - normalize
637
- # * :multiple_roots => true/false (default=false) - document
638
- # can have any number of roots (instread of one).
639
- # Return all in an array instead of root/nil.
640
- # Also include non-elements (String/PI/Comment) in the return set !!!
641
- #
642
- # FIXME: :comments/:pi will break everything
643
- # if there are comments/PIs outside document root.
644
- # Now PIs are outside the document root more often than not,
645
- # so we're pretty much screwed here.
646
- #
647
- # FIXME: Integrate all kinds of parse, and make them support extra options
648
- #
649
- # FIXME: Benchmark normalize!
650
- #
651
- # FIXME: Benchmark dup-based Enumerable methods
652
- #
653
- # FIXME: Make it possible to include bogus XML_Document superparent,
654
- # and to make it support out-of-root PIs/Comments
655
- def self.parse(stream, options={})
656
- extra_entities = options[:extra_entities]
657
-
658
- parser = REXML::Parsers::BaseParser.new stream
659
- stack = [[]]
660
-
661
- while true
662
- event = parser.pull
663
- case event[0]
664
- when :start_element
665
- attrs = {}
666
- event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
667
- stack << XML.new(event[1].to_sym, attrs, event[3..-1])
668
- stack[-2] << stack[-1]
669
- when :end_element
670
- stack.pop
671
- # Needs unescaping
672
- when :text
673
- e = event[1].xml_unescape(extra_entities)
674
- # Either inside root or in multi-root mode
675
- if stack.size > 1 or options[:multiple_roots]
676
- stack[-1] << e
677
- elsif event[1] !~ /\S/
678
- # Ignore out-of-root whitespace in single-root mode
679
- else
680
- raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
681
- end
682
- # CDATA is already unescaped
683
- when :cdata
684
- e = event[1]
685
- if stack.size > 1 or options[:multiple_roots]
686
- stack[-1] << e
687
- else
688
- raise "CDATA out of the document root"
689
- end
690
- when :comment
691
- next unless options[:comments]
692
- e = XML_Comment.new(event[1])
693
- if stack.size > 1 or options[:multiple_roots]
694
- stack[-1] << e
695
- else
696
- # FIXME: Ugly !
697
- raise "Comments out of the document root"
698
- end
699
- when :processing_instruction
700
- # FIXME: Real PI node
701
- next unless options[:pi]
702
- e = XML_PI.new(event[1], event[2])
703
- if stack.size > 1 or options[:multiple_roots]
704
- stack[-1] << e
705
- else
706
- # FIXME: Ugly !
707
- raise "Processing instruction out of the document root"
708
- end
709
- when :end_document
710
- break
711
- when :xmldecl,:start_doctype,:end_doctype,:elementdecl
712
- # Positivery ignore
713
- when :externalentity,:entity,:attlistdecl,:notationdecl
714
- # Ignore ???
715
- #print "Ignored XML event #{event[0]} when parsing\n"
716
- else
717
- # Huh ? What's that ?
718
- #print "Unknown XML event #{event[0]} when parsing\n"
719
- end
621
+ end
622
+
623
+ # Parse XML using REXML. Available options:
624
+ # * :extra_entities => Proc or Hash (default = nil)
625
+ # * :remove_pretty_printing => true/false (default = false)
626
+ # * :comments => true/false (default = false)
627
+ # * :pi => true/false (default = false)
628
+ # * :normalize => true/false (default = false) - normalize
629
+ # * :multiple_roots => true/false (default=false) - document
630
+ # can have any number of roots (instread of one).
631
+ # Return all in an array instead of root/nil.
632
+ # Also include non-elements (String/PI/Comment) in the return set !!!
633
+ #
634
+ # FIXME: :comments/:pi will break everything
635
+ # if there are comments/PIs outside document root.
636
+ # Now PIs are outside the document root more often than not,
637
+ # so we're pretty much screwed here.
638
+ #
639
+ # FIXME: Integrate all kinds of parse, and make them support extra options
640
+ #
641
+ # FIXME: Benchmark normalize!
642
+ #
643
+ # FIXME: Benchmark dup-based Enumerable methods
644
+ #
645
+ # FIXME: Make it possible to include bogus XML_Document superparent,
646
+ # and to make it support out-of-root PIs/Comments
647
+ def self.parse(stream, options={})
648
+ extra_entities = options[:extra_entities]
649
+
650
+ parser = REXML::Parsers::BaseParser.new stream
651
+ stack = [[]]
652
+
653
+ while true
654
+ event = parser.pull
655
+ case event[0]
656
+ when :start_element
657
+ attrs = {}
658
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
659
+ stack << XML.new(event[1].to_sym, attrs, event[3..-1])
660
+ stack[-2] << stack[-1]
661
+ when :end_element
662
+ stack.pop
663
+ # Needs unescaping
664
+ when :text
665
+ e = event[1].xml_unescape(extra_entities)
666
+ # Either inside root or in multi-root mode
667
+ if stack.size > 1 or options[:multiple_roots]
668
+ stack[-1] << e
669
+ elsif event[1] !~ /\S/
670
+ # Ignore out-of-root whitespace in single-root mode
671
+ else
672
+ raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
720
673
  end
721
- roots = stack[0]
722
-
723
- roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
724
- # :remove_pretty_printing does :normalize anyway
725
- roots.each{|root| root.normalize!} if options[:normalize]
726
- if options[:multiple_roots]
727
- roots
674
+ # CDATA is already unescaped
675
+ when :cdata
676
+ e = event[1]
677
+ if stack.size > 1 or options[:multiple_roots]
678
+ stack[-1] << e
728
679
  else
729
- roots[0]
680
+ raise "CDATA out of the document root"
730
681
  end
682
+ when :comment
683
+ next unless options[:comments]
684
+ e = XML_Comment.new(event[1])
685
+ if stack.size > 1 or options[:multiple_roots]
686
+ stack[-1] << e
687
+ else
688
+ # FIXME: Ugly !
689
+ raise "Comments out of the document root"
690
+ end
691
+ when :processing_instruction
692
+ # FIXME: Real PI node
693
+ next unless options[:pi]
694
+ e = XML_PI.new(event[1], event[2])
695
+ if stack.size > 1 or options[:multiple_roots]
696
+ stack[-1] << e
697
+ else
698
+ # FIXME: Ugly !
699
+ raise "Processing instruction out of the document root"
700
+ end
701
+ when :end_document
702
+ break
703
+ when :xmldecl,:start_doctype,:end_doctype,:elementdecl
704
+ # Positivery ignore
705
+ when :externalentity,:entity,:attlistdecl,:notationdecl
706
+ # Ignore ???
707
+ #print "Ignored XML event #{event[0]} when parsing\n"
708
+ else
709
+ # Huh ? What's that ?
710
+ #print "Unknown XML event #{event[0]} when parsing\n"
711
+ end
731
712
  end
713
+ roots = stack[0]
732
714
 
733
- # Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
734
- def self.parse_sequence(stream, options={})
735
- o = options.dup
736
- o[:multiple_roots] = true
737
- parse(stream, o)
715
+ roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
716
+ # :remove_pretty_printing does :normalize anyway
717
+ roots.each{|root| root.normalize!} if options[:normalize]
718
+ if options[:multiple_roots]
719
+ roots
720
+ else
721
+ roots[0]
738
722
  end
723
+ end
739
724
 
740
- # Renormalize a string containing XML document
741
- def self.renormalize(stream)
742
- parse(stream).to_s
743
- end
725
+ # Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
726
+ def self.parse_sequence(stream, options={})
727
+ o = options.dup
728
+ o[:multiple_roots] = true
729
+ parse(stream, o)
730
+ end
744
731
 
745
- # Renormalize a string containing a sequence of XML documents
746
- # and strings
747
- # XMLrenormalize_sequence("<hello />, <world></world>!") =>
748
- # "<hello/>, <world/>!"
749
- def self.renormalize_sequence(stream)
750
- parse_sequence(stream).join
751
- end
732
+ # Renormalize a string containing XML document
733
+ def self.renormalize(stream)
734
+ parse(stream).to_s
735
+ end
736
+
737
+ # Renormalize a string containing a sequence of XML documents
738
+ # and strings
739
+ # XMLrenormalize_sequence("<hello />, <world></world>!") =>
740
+ # "<hello/>, <world/>!"
741
+ def self.renormalize_sequence(stream)
742
+ parse_sequence(stream).join
743
+ end
752
744
  end
753
745
 
754
746
  # Instance methods (other than those of Enumerable)
755
747
  class XML
756
- attr_accessor :name, :attrs, :contents
757
-
758
- # initialize can be run in many ways
759
- # * XML.new
760
- # * XML.new(:tag_symbol)
761
- # * XML.new(:tag_symbol, {attributes})
762
- # * XML.new(:tag_symbol, "children", "more", XML.new(...))
763
- # * XML.new(:tag_symbol, {attributes}, "and", "children")
764
- # * XML.new(:tag_symbol) { monadic code }
765
- # * XML.new(:tag_symbol, {attributes}) { monadic code }
766
- #
767
- # Or even:
768
- # * XML.new(:tag_symbol, "children") { and some monadic code }
769
- # * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
770
- # But typically you won't be mixing these two style
771
- #
772
- # Attribute values can will be converted to strings
773
- def initialize(*args, &blk)
774
- @name = nil
775
- @attrs = {}
776
- @contents = []
777
- @name = args.shift if args.size != 0
778
- if args.size != 0 and args[0].is_a? Hash
779
- args.shift.each{|k,v|
780
- # Do automatic conversion here
781
- # This also assures that the hashes are *not* shared
782
- self[k] = v
783
- }
784
- end
785
- # Expand Arrays passed as arguments
786
- self << args
787
- # FIXME: We'd rather not have people say @name = :foo there :-)
788
- if blk
789
- instance_eval(&blk)
790
- end
748
+ attr_accessor :name, :attrs, :contents
749
+
750
+ # initialize can be run in many ways
751
+ # * XML.new
752
+ # * XML.new(:tag_symbol)
753
+ # * XML.new(:tag_symbol, {attributes})
754
+ # * XML.new(:tag_symbol, "children", "more", XML.new(...))
755
+ # * XML.new(:tag_symbol, {attributes}, "and", "children")
756
+ # * XML.new(:tag_symbol) { monadic code }
757
+ # * XML.new(:tag_symbol, {attributes}) { monadic code }
758
+ #
759
+ # Or even:
760
+ # * XML.new(:tag_symbol, "children") { and some monadic code }
761
+ # * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
762
+ # But typically you won't be mixing these two style
763
+ #
764
+ # Attribute values can will be converted to strings
765
+ def initialize(*args, &blk)
766
+ @name = nil
767
+ @attrs = {}
768
+ @contents = []
769
+ @name = args.shift if args.size != 0
770
+ if args.size != 0 and args[0].is_a? Hash
771
+ args.shift.each{|k,v|
772
+ # Do automatic conversion here
773
+ # This also assures that the hashes are *not* shared
774
+ self[k] = v
775
+ }
776
+ end
777
+ # Expand Arrays passed as arguments
778
+ self << args
779
+ # FIXME: We'd rather not have people say @name = :foo there :-)
780
+ if blk
781
+ instance_eval(&blk)
791
782
  end
783
+ end
792
784
 
793
- # Convert to a well-formatted XML
794
- def to_s
795
- "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
796
- if @contents.size == 0
797
- "/>"
798
- else
799
- ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
800
- end
785
+ # Convert to a well-formatted XML
786
+ def to_s
787
+ "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
788
+ if @contents.size == 0
789
+ "/>"
790
+ else
791
+ ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
801
792
  end
793
+ end
802
794
 
803
- # Convert to a well-formatted XML, but without children information.
804
- # This is a reasonable format for irb and debugging.
805
- # If you want to see a few levels of children, call inspect(2) and so on
806
- def inspect(include_children=0)
807
- "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
808
- if @contents.size == 0
809
- "/>"
810
- elsif include_children == 0
811
- ">...</#{name}>"
812
- else
813
- ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
814
- end
795
+ # Convert to a well-formatted XML, but without children information.
796
+ # This is a reasonable format for irb and debugging.
797
+ # If you want to see a few levels of children, call inspect(2) and so on
798
+ def inspect(include_children=0)
799
+ "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
800
+ if @contents.size == 0
801
+ "/>"
802
+ elsif include_children == 0
803
+ ">...</#{name}>"
804
+ else
805
+ ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
815
806
  end
807
+ end
816
808
 
817
- # Read attributes.
818
- # Also works with pseudoattributes:
819
- # img[:@x] == img.child(:x).text # or nil if there isn't any.
820
- def [](key)
821
- if key.to_s[0] == ?@
822
- tag = key.to_s[1..-1].to_sym
823
- c = child(tag)
824
- if c
825
- c.text
826
- else
827
- nil
828
- end
829
- else
830
- @attrs[key]
831
- end
809
+ # Read attributes.
810
+ # Also works with pseudoattributes:
811
+ # img[:@x] == img.child(:x).text # or nil if there isn't any.
812
+ def [](key)
813
+ if key.to_s[0] == ?@
814
+ tag = key.to_s[1..-1].to_sym
815
+ c = child(tag)
816
+ if c
817
+ c.text
818
+ else
819
+ nil
820
+ end
821
+ else
822
+ @attrs[key]
832
823
  end
824
+ end
833
825
 
834
- # Set attributes.
835
- # Value is automatically converted to String, so you can say:
836
- # img[:x] = 200
837
- # Also works with pseudoattributes:
838
- # foo[:@bar] = "x"
839
- def []=(key, value)
840
- if key.to_s[0] == ?@
841
- tag = key.to_s[1..-1].to_sym
842
- c = child(tag)
843
- if c
844
- c.contents = [value.to_s]
845
- else
846
- self << XML.new(tag, value.to_s)
847
- end
848
- else
849
- @attrs[key] = value.to_s
850
- end
826
+ # Set attributes.
827
+ # Value is automatically converted to String, so you can say:
828
+ # img[:x] = 200
829
+ # Also works with pseudoattributes:
830
+ # foo[:@bar] = "x"
831
+ def []=(key, value)
832
+ if key.to_s[0] == ?@
833
+ tag = key.to_s[1..-1].to_sym
834
+ c = child(tag)
835
+ if c
836
+ c.contents = [value.to_s]
837
+ else
838
+ self << XML.new(tag, value.to_s)
839
+ end
840
+ else
841
+ @attrs[key] = value.to_s
851
842
  end
843
+ end
852
844
 
853
- # Add children.
854
- # Possible uses:
855
- # * Add single element
856
- # self << xml(...)
857
- # self << "foo"
858
- # Add nothing:
859
- # self << nil
860
- # Add multiple elements (also works recursively):
861
- # self << [a, b, c]
862
- # self << [a, [b, c], d]
863
- def <<(cnt)
864
- if cnt.nil?
865
- # skip
866
- elsif cnt.is_a? Array
867
- cnt.each{|elem| self << elem}
868
- else
869
- @contents << cnt
870
- end
871
- self
845
+ # Add children.
846
+ # Possible uses:
847
+ # * Add single element
848
+ # self << xml(...)
849
+ # self << "foo"
850
+ # Add nothing:
851
+ # self << nil
852
+ # Add multiple elements (also works recursively):
853
+ # self << [a, b, c]
854
+ # self << [a, [b, c], d]
855
+ def <<(cnt)
856
+ if cnt.nil?
857
+ # skip
858
+ elsif cnt.is_a? Array
859
+ cnt.each{|elem| self << elem}
860
+ else
861
+ @contents << cnt
872
862
  end
863
+ self
864
+ end
873
865
 
874
- # Equality test, works as if XMLs were normalized, so:
875
- # XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
876
- def ==(x)
877
- return false unless x.is_a? XML
878
- return false unless name == x.name and attrs == x.attrs
879
- # Now the hard part, strings can be split in different ways
880
- # empty string children are possible etc.
881
- self_i = 0
882
- othr_i = 0
883
- while self_i != contents.size or othr_i != x.contents.size
884
- # Ignore ""s
885
- if contents[self_i].is_a? String and contents[self_i] == ""
886
- self_i += 1
887
- next
888
- end
889
- if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
890
- othr_i += 1
891
- next
892
- end
866
+ # Equality test, works as if XMLs were normalized, so:
867
+ # XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
868
+ def ==(x)
869
+ return false unless x.is_a? XML
870
+ return false unless name == x.name and attrs == x.attrs
871
+ # Now the hard part, strings can be split in different ways
872
+ # empty string children are possible etc.
873
+ self_i = 0
874
+ othr_i = 0
875
+ while self_i != contents.size or othr_i != x.contents.size
876
+ # Ignore ""s
877
+ if contents[self_i].is_a? String and contents[self_i] == ""
878
+ self_i += 1
879
+ next
880
+ end
881
+ if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
882
+ othr_i += 1
883
+ next
884
+ end
893
885
 
894
- # If one is finished and the other contains non-empty elements,
895
- # they are not equal
896
- return false if self_i == contents.size or othr_i == x.contents.size
897
-
898
- # Are they both Strings ?
899
- # Strings can be divided in different ways, and calling normalize!
900
- # here would be rather expensive, so let's use this complicated
901
- # algorithm
902
- if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
903
- a = contents[self_i]
904
- b = x.contents[othr_i]
905
- self_i += 1
906
- othr_i += 1
907
- while a != "" or b != ""
908
- if a == b
909
- a = ""
910
- b = ""
911
- elsif a.size > b.size and a[0, b.size] == b
912
- a = a[b.size..-1]
913
- if x.contents[othr_i].is_a? String
914
- b = x.contents[othr_i]
915
- othr_i += 1
916
- next
917
- end
918
- elsif b.size > a.size and b[0, a.size] == a
919
- b = b[a.size..-1]
920
- if contents[self_i].is_a? String
921
- a = contents[self_i]
922
- self_i += 1
923
- next
924
- end
925
- else
926
- return false
927
- end
928
- end
929
- next
930
- end
886
+ # If one is finished and the other contains non-empty elements,
887
+ # they are not equal
888
+ return false if self_i == contents.size or othr_i == x.contents.size
931
889
 
932
- # OK, so at least one of them is not a String.
933
- # Hopefully they're either both XMLs or one is an XML and the
934
- # other is a String. It is also possible that contents contains
935
- # something illegal, but we aren't catching that,
936
- # so xml(:foo, Garbage.new) is going to at least equal itself.
937
- # And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
938
- # is going to return an honest false, and incoherent sanity
939
- # check is worse than no sanity check.
940
- #
941
- # Oh yeah, they can be XML_PI or XML_Comment. In such case, this
942
- # is ok.
943
- return false unless contents[self_i] == x.contents[othr_i]
944
- self_i += 1
945
- othr_i += 1
890
+ # Are they both Strings ?
891
+ # Strings can be divided in different ways, and calling normalize!
892
+ # here would be rather expensive, so let's use this complicated
893
+ # algorithm
894
+ if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
895
+ a = contents[self_i]
896
+ b = x.contents[othr_i]
897
+ self_i += 1
898
+ othr_i += 1
899
+ while a != "" or b != ""
900
+ if a == b
901
+ a = ""
902
+ b = ""
903
+ elsif a.size > b.size and a[0, b.size] == b
904
+ a = a[b.size..-1]
905
+ if x.contents[othr_i].is_a? String
906
+ b = x.contents[othr_i]
907
+ othr_i += 1
908
+ next
909
+ end
910
+ elsif b.size > a.size and b[0, a.size] == a
911
+ b = b[a.size..-1]
912
+ if contents[self_i].is_a? String
913
+ a = contents[self_i]
914
+ self_i += 1
915
+ next
916
+ end
917
+ else
918
+ return false
919
+ end
946
920
  end
947
- return true
948
- end
921
+ next
922
+ end
949
923
 
950
- alias_method :real_method_missing, :method_missing
951
- # Define all foo!-methods for monadic interface, so you can write:
952
- #
953
- def method_missing(meth, *args, &blk)
954
- if meth.to_s =~ /^(.*)!$/
955
- self << XML.new($1.to_sym, *args, &blk)
956
- else
957
- real_method_missing(meth, *args, &blk)
958
- end
924
+ # OK, so at least one of them is not a String.
925
+ # Hopefully they're either both XMLs or one is an XML and the
926
+ # other is a String. It is also possible that contents contains
927
+ # something illegal, but we aren't catching that,
928
+ # so xml(:foo, Garbage.new) is going to at least equal itself.
929
+ # And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
930
+ # is going to return an honest false, and incoherent sanity
931
+ # check is worse than no sanity check.
932
+ #
933
+ # Oh yeah, they can be XML_PI or XML_Comment. In such case, this
934
+ # is ok.
935
+ return false unless contents[self_i] == x.contents[othr_i]
936
+ self_i += 1
937
+ othr_i += 1
959
938
  end
939
+ return true
940
+ end
960
941
 
961
- # Make monadic interface more "official"
962
- # * node.exec! { foo!; bar! }
963
- # is equivalent to
964
- # * node << xml(:foo) << xml(:bar)
965
- def exec!(&blk)
966
- instance_eval(&blk)
942
+ alias_method :real_method_missing, :method_missing
943
+ # Define all foo!-methods for monadic interface, so you can write:
944
+ #
945
+ def method_missing(meth, *args, &blk)
946
+ if meth.to_s =~ /^(.*)!$/
947
+ self << XML.new($1.to_sym, *args, &blk)
948
+ else
949
+ real_method_missing(meth, *args, &blk)
967
950
  end
951
+ end
968
952
 
969
- # Select a subtree
970
- # NOTE: Uses object_id of the start/end tags !
971
- # They have to be the same, not just identical !
972
- # <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
973
- # returns
974
- # <foo><b/><c/></foo>
975
- # start and end and their descendants are not included in
976
- # the result tree.
977
- # Either start or end can be nil.
978
- # * If both start and end are nil, return whole tree.
979
- # * If start is nil, return subtree up to range_end.
980
- # * If start is not inside the tree, return nil.
981
- # * If end is nil, return subtree from start
982
- # * If end is not inside the tree, return subtree from start.
983
- # * If end is before or below start, or they're the same node, the result is unspecified.
984
- # * if end comes directly after start, or as first node when start==nil, return path reaching there.
985
- def range(range_start, range_end, end_reached_cb=nil)
986
- if range_start == nil
987
- result = XML.new(name, attrs)
988
- else
989
- result = nil
990
- end
991
- @contents.each {|c|
992
- # end reached !
993
- if range_end and c.object_id == range_end.object_id
994
- end_reached_cb.call if end_reached_cb
995
- break
996
- end
997
- # start reached !
998
- if range_start and c.object_id == range_start.object_id
999
- result = XML.new(name, attrs)
1000
- next
1001
- end
1002
- if result # We already started
1003
- if c.is_a? XML
1004
- break_me = false
1005
- result.add! c.range(nil, range_end, lambda{ break_me = true })
1006
- if break_me
1007
- end_reached_cb.call if end_reached_cb
1008
- break
1009
- end
1010
- else # String/XML_PI/XML_Comment
1011
- result.add! c
1012
- end
1013
- else
1014
- # Strings/XML_PI/XML_Comment obviously cannot start a range
1015
- if c.is_a? XML
1016
- break_me = false
1017
- r = c.range(range_start, range_end, lambda{ break_me = true })
1018
- if r
1019
- # start reached !
1020
- result = XML.new(name, attrs, r)
1021
- end
1022
- if break_me
1023
- # end reached !
1024
- end_reached_cb.call if end_reached_cb
1025
- break
1026
- end
1027
- end
1028
- end
1029
- }
1030
- return result
1031
- end
953
+ # Make monadic interface more "official"
954
+ # * node.exec! { foo!; bar! }
955
+ # is equivalent to
956
+ # * node << xml(:foo) << xml(:bar)
957
+ def exec!(&blk)
958
+ instance_eval(&blk)
959
+ end
1032
960
 
1033
- # XML#subsequence is similar to XML#range, but instead of
1034
- # trimmed subtree in returns a list of elements
1035
- # The same elements are included in both cases, but here
1036
- # we do not include any parents !
1037
- #
1038
- # <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
1039
- # <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
1040
- #
1041
- # <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
1042
- # <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
1043
- #
1044
- # <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
1045
- # <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
1046
- #
1047
- # And we return [], not nil if nothing matches
1048
- def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
1049
- result = []
1050
- start_seen = range_start.nil?
1051
- @contents.each{|c|
1052
- if range_end and range_end.object_id == c.object_id
1053
- end_seen_cb.call if end_seen_cb
1054
- break
1055
- end
1056
- if range_start and range_start.object_id == c.object_id
1057
- start_seen = true
1058
- start_seen_cb.call if start_seen_cb
1059
- next
1060
- end
1061
- if start_seen
1062
- if c.is_a? XML
1063
- break_me = false
1064
- result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
1065
- break if break_me
1066
- else # String/XML_PI/XML_Comment
1067
- result << c
1068
- end
1069
- else
1070
- # String/XML_PI/XML_Comment cannot start a subsequence
1071
- if c.is_a? XML
1072
- break_me = false
1073
- result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
1074
- break if break_me
1075
- end
1076
- end
1077
- }
1078
- # Include starting tag if it was right from the range_start
1079
- # Otherwise, return just the raw sequence
1080
- result = [XML.new(@name, @attrs, result)] if range_start == nil
1081
- return result
961
+ # Select a subtree
962
+ # NOTE: Uses object_id of the start/end tags !
963
+ # They have to be the same, not just identical !
964
+ # <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
965
+ # returns
966
+ # <foo><b/><c/></foo>
967
+ # start and end and their descendants are not included in
968
+ # the result tree.
969
+ # Either start or end can be nil.
970
+ # * If both start and end are nil, return whole tree.
971
+ # * If start is nil, return subtree up to range_end.
972
+ # * If start is not inside the tree, return nil.
973
+ # * If end is nil, return subtree from start
974
+ # * If end is not inside the tree, return subtree from start.
975
+ # * If end is before or below start, or they're the same node, the result is unspecified.
976
+ # * if end comes directly after start, or as first node when start==nil, return path reaching there.
977
+ def range(range_start, range_end, end_reached_cb=nil)
978
+ if range_start == nil
979
+ result = XML.new(name, attrs)
980
+ else
981
+ result = nil
1082
982
  end
983
+ @contents.each {|c|
984
+ # end reached !
985
+ if range_end and c.object_id == range_end.object_id
986
+ end_reached_cb.call if end_reached_cb
987
+ break
988
+ end
989
+ # start reached !
990
+ if range_start and c.object_id == range_start.object_id
991
+ result = XML.new(name, attrs)
992
+ next
993
+ end
994
+ if result # We already started
995
+ if c.is_a? XML
996
+ break_me = false
997
+ result.add! c.range(nil, range_end, lambda{ break_me = true })
998
+ if break_me
999
+ end_reached_cb.call if end_reached_cb
1000
+ break
1001
+ end
1002
+ else # String/XML_PI/XML_Comment
1003
+ result.add! c
1004
+ end
1005
+ else
1006
+ # Strings/XML_PI/XML_Comment obviously cannot start a range
1007
+ if c.is_a? XML
1008
+ break_me = false
1009
+ r = c.range(range_start, range_end, lambda{ break_me = true })
1010
+ if r
1011
+ # start reached !
1012
+ result = XML.new(name, attrs, r)
1013
+ end
1014
+ if break_me
1015
+ # end reached !
1016
+ end_reached_cb.call if end_reached_cb
1017
+ break
1018
+ end
1019
+ end
1020
+ end
1021
+ }
1022
+ return result
1023
+ end
1083
1024
 
1084
- # =~ for a few reasonable patterns
1085
- def =~(pattern)
1086
- if pattern.is_a? Symbol
1087
- @name == pattern
1088
- elsif pattern.is_a? Regexp
1089
- rv = text =~ pattern
1090
- else # Hash, Pattern_any, Pattern_all
1091
- pattern === self
1025
+ # XML#subsequence is similar to XML#range, but instead of
1026
+ # trimmed subtree in returns a list of elements
1027
+ # The same elements are included in both cases, but here
1028
+ # we do not include any parents !
1029
+ #
1030
+ # <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
1031
+ # <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
1032
+ #
1033
+ # <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
1034
+ # <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
1035
+ #
1036
+ # <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
1037
+ # <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
1038
+ #
1039
+ # And we return [], not nil if nothing matches
1040
+ def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
1041
+ result = []
1042
+ start_seen = range_start.nil?
1043
+ @contents.each{|c|
1044
+ if range_end and range_end.object_id == c.object_id
1045
+ end_seen_cb.call if end_seen_cb
1046
+ break
1047
+ end
1048
+ if range_start and range_start.object_id == c.object_id
1049
+ start_seen = true
1050
+ start_seen_cb.call if start_seen_cb
1051
+ next
1052
+ end
1053
+ if start_seen
1054
+ if c.is_a? XML
1055
+ break_me = false
1056
+ result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
1057
+ break if break_me
1058
+ else # String/XML_PI/XML_Comment
1059
+ result << c
1092
1060
  end
1093
- end
1094
-
1095
- # Get rid of pretty-printing whitespace. Also normalizes the XML.
1096
- def remove_pretty_printing!(exceptions=nil)
1097
- normalize!
1098
- real_remove_pretty_printing!(exceptions)
1099
- normalize!
1100
- end
1061
+ else
1062
+ # String/XML_PI/XML_Comment cannot start a subsequence
1063
+ if c.is_a? XML
1064
+ break_me = false
1065
+ result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
1066
+ break if break_me
1067
+ end
1068
+ end
1069
+ }
1070
+ # Include starting tag if it was right from the range_start
1071
+ # Otherwise, return just the raw sequence
1072
+ result = [XML.new(@name, @attrs, result)] if range_start == nil
1073
+ return result
1074
+ end
1101
1075
 
1102
- # normalize! is already recursive, so only one call at top level is needed.
1103
- # This helper method lets us avoid extra calls to normalize!.
1104
- def real_remove_pretty_printing!(exceptions=nil)
1105
- return if exceptions and exceptions.include? @name
1106
- each{|c|
1107
- if c.is_a? String
1108
- c.sub!(/^\s+/, "")
1109
- c.sub!(/\s+$/, "")
1110
- c.gsub!(/\s+/, " ")
1111
- elsif c.is_a? XML_PI or c.is_a? XML_Comment
1112
- else
1113
- c.real_remove_pretty_printing!(exceptions)
1114
- end
1115
- }
1076
+ # =~ for a few reasonable patterns
1077
+ def =~(pattern)
1078
+ if pattern.is_a? Symbol
1079
+ @name == pattern
1080
+ elsif pattern.is_a? Regexp
1081
+ text =~ pattern
1082
+ else # Hash, Pattern_any, Pattern_all
1083
+ pattern === self
1116
1084
  end
1085
+ end
1117
1086
 
1118
- protected :real_remove_pretty_printing!
1087
+ # Get rid of pretty-printing whitespace. Also normalizes the XML.
1088
+ def remove_pretty_printing!(exceptions=nil)
1089
+ normalize!
1090
+ real_remove_pretty_printing!(exceptions)
1091
+ normalize!
1092
+ end
1119
1093
 
1120
- # Add pretty-printing whitespace. Also normalizes the XML.
1121
- def add_pretty_printing!
1122
- normalize!
1123
- real_add_pretty_printing!
1124
- normalize!
1125
- end
1126
-
1127
- def real_add_pretty_printing!(indent = "")
1128
- return if @contents.empty?
1129
- each{|c|
1130
- if c.is_a? XML
1131
- c.real_add_pretty_printing!(indent+" ")
1132
- elsif c.is_a? String
1133
- c.gsub!(/\n\s*/, "\n#{indent} ")
1134
- end
1135
- }
1136
- @contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
1137
- end
1094
+ # normalize! is already recursive, so only one call at top level is needed.
1095
+ # This helper method lets us avoid extra calls to normalize!.
1096
+ def real_remove_pretty_printing!(exceptions=nil)
1097
+ return if exceptions and exceptions.include? @name
1098
+ each{|c|
1099
+ if c.is_a? String
1100
+ c.sub!(/^\s+/, "")
1101
+ c.sub!(/\s+$/, "")
1102
+ c.gsub!(/\s+/, " ")
1103
+ elsif c.is_a? XML_PI or c.is_a? XML_Comment
1104
+ else
1105
+ c.real_remove_pretty_printing!(exceptions)
1106
+ end
1107
+ }
1108
+ end
1138
1109
 
1139
- protected :real_add_pretty_printing!
1140
-
1141
- alias_method :raw_dup, :dup
1142
- # This is not a trivial method - first it does a *deep* copy,
1143
- # second it takes a block which is instance_eval'ed,
1144
- # so you can do things like:
1145
- # * node.dup{ @name = :foo }
1146
- # * node.dup{ self[:color] = "blue" }
1147
- def dup(&blk)
1148
- new_obj = self.raw_dup
1149
- # Attr values stay shared - ugly
1150
- new_obj.attrs = new_obj.attrs.dup
1151
- new_obj.contents = new_obj.contents.map{|c| c.dup}
1152
-
1153
- new_obj.instance_eval(&blk) if blk
1154
- return new_obj
1155
- end
1110
+ protected :real_remove_pretty_printing!
1156
1111
 
1112
+ # Add pretty-printing whitespace. Also normalizes the XML.
1113
+ def add_pretty_printing!
1114
+ normalize!
1115
+ real_add_pretty_printing!
1116
+ normalize!
1117
+ end
1157
1118
 
1158
- # Add some String children (all attributes get to_s'ed)
1159
- def text!(*args)
1160
- args.each{|s| self << s.to_s}
1161
- end
1162
- # Add XML child
1163
- def xml!(*args, &blk)
1164
- @contents << XML.new(*args, &blk)
1165
- end
1119
+ def real_add_pretty_printing!(indent = "")
1120
+ return if @contents.empty?
1121
+ each{|c|
1122
+ if c.is_a? XML
1123
+ c.real_add_pretty_printing!(indent+" ")
1124
+ elsif c.is_a? String
1125
+ c.gsub!(/\n\s*/, "\n#{indent} ")
1126
+ end
1127
+ }
1128
+ @contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
1129
+ end
1166
1130
 
1167
- alias_method :add!, :<<
1168
-
1169
- # Normalization means joining strings
1170
- # and getting rid of ""s, recursively
1171
- def normalize!
1172
- new_contents = []
1173
- @contents.each{|c|
1174
- if c.is_a? String
1175
- next if c == ""
1176
- if new_contents[-1].is_a? String
1177
- new_contents[-1] += c
1178
- next
1179
- end
1180
- else
1181
- c.normalize!
1182
- end
1183
- new_contents.push c
1184
- }
1185
- @contents = new_contents
1186
- end
1131
+ protected :real_add_pretty_printing!
1187
1132
 
1188
- # Return text below the node, stripping all XML tags,
1189
- # "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
1190
- # returns "Hello, world!"
1191
- def text
1192
- res = ""
1193
- @contents.each{|c|
1194
- if c.is_a? XML
1195
- res << c.text
1196
- elsif c.is_a? String
1197
- res << c
1198
- end # Ignore XML_PI/XML_Comment
1199
- }
1200
- res
1201
- end
1133
+ alias_method :raw_dup, :dup
1134
+ # This is not a trivial method - first it does a *deep* copy,
1135
+ # second it takes a block which is instance_eval'ed,
1136
+ # so you can do things like:
1137
+ # * node.dup{ @name = :foo }
1138
+ # * node.dup{ self[:color] = "blue" }
1139
+ def dup(&blk)
1140
+ new_obj = self.raw_dup
1141
+ # Attr values stay shared - ugly
1142
+ new_obj.attrs = new_obj.attrs.dup
1143
+ new_obj.contents = new_obj.contents.map{|c| c.dup}
1202
1144
 
1203
- # Equivalent to node.children(pat, *rest)[0]
1204
- # Returns nil if there aren't any matching children
1205
- def child(pat=nil, *rest)
1206
- children(pat, *rest) {|c|
1207
- return c
1208
- }
1209
- return nil
1210
- end
1145
+ new_obj.instance_eval(&blk) if blk
1146
+ return new_obj
1147
+ end
1211
1148
 
1212
- # Equivalent to node.descendants(pat, *rest)[0]
1213
- # Returns nil if there aren't any matching descendants
1214
- def descendant(pat=nil, *rest)
1215
- descendants(pat, *rest) {|c|
1216
- return c
1217
- }
1218
- return nil
1219
- end
1220
1149
 
1221
- # XML#children(pattern, more_patterns)
1222
- # Return all children of a node with tags matching tag.
1223
- # Also:
1224
- # * children(:a, :b) == children(:a).children(:b)
1225
- # * children(:a, :*, :c) == children(:a).descendants(:c)
1226
- def children(pat=nil, *rest, &blk)
1227
- return descendants(*rest, &blk) if pat == :*
1228
- res = []
1229
- @contents.each{|c|
1230
- if pat.nil? or pat === c
1231
- if rest == []
1232
- res << c
1233
- yield c if block_given?
1234
- else
1235
- res += c.children(*rest, &blk)
1236
- end
1237
- end
1238
- }
1239
- res
1240
- end
1241
-
1242
- # * XML#descendants
1243
- # * XML#descendants(pattern)
1244
- # * XML#descendants(pattern, more_patterns)
1245
- #
1246
- # Return all descendants of a node matching the pattern.
1247
- # If pattern==nil, simply return all descendants.
1248
- # Optionally run a block on each of them if a block was given.
1249
- # If pattern==nil, also match Strings !
1250
- def descendants(pat=nil, *rest, &blk)
1251
- res = []
1252
- @contents.each{|c|
1253
- if pat.nil? or pat === c
1254
- if rest == []
1255
- res << c
1256
- yield c if block_given?
1257
- else
1258
- res += c.children(*rest, &blk)
1259
- end
1260
- end
1261
- if c.is_a? XML
1262
- res += c.descendants(pat, *rest, &blk)
1263
- end
1264
- }
1265
- res
1266
- end
1267
-
1268
- # Change elements based on pattern
1269
- def deep_map(pat, &blk)
1270
- if self =~ pat
1271
- yield self
1150
+ # Add some String children (all attributes get to_s'ed)
1151
+ def text!(*args)
1152
+ args.each{|s| self << s.to_s}
1153
+ end
1154
+ # Add XML child
1155
+ def xml!(*args, &blk)
1156
+ @contents << XML.new(*args, &blk)
1157
+ end
1158
+
1159
+ alias_method :add!, :<<
1160
+
1161
+ # Normalization means joining strings
1162
+ # and getting rid of ""s, recursively
1163
+ def normalize!
1164
+ new_contents = []
1165
+ @contents.each{|c|
1166
+ if c.is_a? String
1167
+ next if c == ""
1168
+ if new_contents[-1].is_a? String
1169
+ new_contents[-1] += c
1170
+ next
1171
+ end
1172
+ else
1173
+ c.normalize!
1174
+ end
1175
+ new_contents.push c
1176
+ }
1177
+ @contents = new_contents
1178
+ end
1179
+
1180
+ # Return text below the node, stripping all XML tags,
1181
+ # "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
1182
+ # returns "Hello, world!"
1183
+ def text
1184
+ res = ""
1185
+ @contents.each{|c|
1186
+ if c.is_a? XML
1187
+ res << c.text
1188
+ elsif c.is_a? String
1189
+ res << c
1190
+ end # Ignore XML_PI/XML_Comment
1191
+ }
1192
+ res
1193
+ end
1194
+
1195
+ # Equivalent to node.children(pat, *rest)[0]
1196
+ # Returns nil if there aren't any matching children
1197
+ def child(pat=nil, *rest)
1198
+ children(pat, *rest) {|c|
1199
+ return c
1200
+ }
1201
+ return nil
1202
+ end
1203
+
1204
+ # Equivalent to node.descendants(pat, *rest)[0]
1205
+ # Returns nil if there aren't any matching descendants
1206
+ def descendant(pat=nil, *rest)
1207
+ descendants(pat, *rest) {|c|
1208
+ return c
1209
+ }
1210
+ return nil
1211
+ end
1212
+
1213
+ # XML#children(pattern, more_patterns)
1214
+ # Return all children of a node with tags matching tag.
1215
+ # Also:
1216
+ # * children(:a, :b) == children(:a).children(:b)
1217
+ # * children(:a, :*, :c) == children(:a).descendants(:c)
1218
+ def children(pat=nil, *rest, &blk)
1219
+ return descendants(*rest, &blk) if pat == :*
1220
+ res = []
1221
+ @contents.each{|c|
1222
+ if pat.nil? or pat === c
1223
+ if rest == []
1224
+ res << c
1225
+ yield c if block_given?
1272
1226
  else
1273
- r = XML.new(self.name, self.attrs)
1274
- each{|c|
1275
- if c.is_a? XML
1276
- r << c.deep_map(pat, &blk)
1277
- else
1278
- r << c
1279
- end
1280
- }
1281
- r
1227
+ res += c.children(*rest, &blk)
1282
1228
  end
1283
- end
1229
+ end
1230
+ }
1231
+ res
1232
+ end
1284
1233
 
1285
- # FIXME: do we want a shallow or a deep copy here ?
1286
- # Map children, but leave the name/attributes
1287
- def map(pat=nil)
1288
- r = XML.new(self.name, self.attrs)
1289
- each{|c|
1290
- if !pat || (c.is_a?(XML) && c =~ pat)
1291
- r << yield(c)
1292
- else
1293
- r << c
1294
- end
1295
- }
1296
- r
1234
+ # * XML#descendants
1235
+ # * XML#descendants(pattern)
1236
+ # * XML#descendants(pattern, more_patterns)
1237
+ #
1238
+ # Return all descendants of a node matching the pattern.
1239
+ # If pattern==nil, simply return all descendants.
1240
+ # Optionally run a block on each of them if a block was given.
1241
+ # If pattern==nil, also match Strings !
1242
+ def descendants(pat=nil, *rest, &blk)
1243
+ res = []
1244
+ @contents.each{|c|
1245
+ if pat.nil? or pat === c
1246
+ if rest == []
1247
+ res << c
1248
+ yield c if block_given?
1249
+ else
1250
+ res += c.children(*rest, &blk)
1251
+ end
1252
+ end
1253
+ if c.is_a? XML
1254
+ res += c.descendants(pat, *rest, &blk)
1255
+ end
1256
+ }
1257
+ res
1258
+ end
1259
+
1260
+ # Change elements based on pattern
1261
+ def deep_map(pat, &blk)
1262
+ if self =~ pat
1263
+ yield self
1264
+ else
1265
+ r = XML.new(self.name, self.attrs)
1266
+ each{|c|
1267
+ if c.is_a? XML
1268
+ r << c.deep_map(pat, &blk)
1269
+ else
1270
+ r << c
1271
+ end
1272
+ }
1273
+ r
1297
1274
  end
1275
+ end
1276
+
1277
+ # FIXME: do we want a shallow or a deep copy here ?
1278
+ # Map children, but leave the name/attributes
1279
+ def map(pat=nil)
1280
+ r = XML.new(self.name, self.attrs)
1281
+ each{|c|
1282
+ if !pat || (c.is_a?(XML) && c =~ pat)
1283
+ r << yield(c)
1284
+ else
1285
+ r << c
1286
+ end
1287
+ }
1288
+ r
1289
+ end
1298
1290
  end
1299
1291
 
1300
1292
  # FIXME: Is this even sane ?
1301
1293
  # * What about escaping and all that stuff ?
1302
1294
  # * Rest of the code assumes that everything is either XML or String
1303
1295
  class XML_PI
1304
- def initialize(c, t)
1305
- @c = c
1306
- @t = t
1307
- end
1308
- def to_s
1309
- "<?#{@c}#{@t}?>"
1310
- end
1296
+ def initialize(c, t)
1297
+ @c = c
1298
+ @t = t
1299
+ end
1300
+ def to_s
1301
+ "<?#{@c}#{@t}?>"
1302
+ end
1311
1303
  end
1312
1304
 
1313
1305
  # FIXME: Is this even sane ?
@@ -1315,25 +1307,25 @@ end
1315
1307
  # * Rest of the code assumes that everything is either XML or String
1316
1308
  # * There are some limitations on where one can put -s in the comment. Do not overdo.
1317
1309
  class XML_Comment
1318
- def initialize(c)
1319
- @c = c
1320
- end
1321
- def to_s
1322
- "<!--#{@c}-->"
1323
- end
1310
+ def initialize(c)
1311
+ @c = c
1312
+ end
1313
+ def to_s
1314
+ "<!--#{@c}-->"
1315
+ end
1324
1316
  end
1325
1317
 
1326
1318
  # Syntactic sugar for XML.new
1327
1319
  def xml(*args, &blk)
1328
- XML.new(*args, &blk)
1320
+ XML.new(*args, &blk)
1329
1321
  end
1330
1322
 
1331
1323
  # xml! in XML { ... } - context adds node to parent
1332
1324
  # xml! in main context prints the argument (and returns it anyway)
1333
1325
  def xml!(*args, &blk)
1334
- node = xml(*args, &blk)
1335
- print node
1336
- node
1326
+ node = xml(*args, &blk)
1327
+ print node
1328
+ node
1337
1329
  end
1338
1330
 
1339
1331
  # Perl 6 is supposed to have native support for something like that.
@@ -1341,20 +1333,19 @@ end
1341
1333
  #
1342
1334
  # Usage:
1343
1335
  # case foo
1344
- # when all(:foo, {:color => 'blue'}, /Hello/)
1336
+ # when All[:foo, {:color => 'blue'}, /Hello/]
1345
1337
  # print foo
1346
1338
  # end
1347
- class Patterns_all
1348
- def initialize(*patterns)
1349
- @patterns = patterns
1350
- end
1351
- def ===(obj)
1352
- @patterns.all?{|p| p === obj}
1353
- end
1354
- end
1355
-
1356
- def all(*patterns)
1357
- Patterns_all.new(*patterns)
1339
+ class All
1340
+ def initialize(*patterns)
1341
+ @patterns = patterns
1342
+ end
1343
+ def ===(obj)
1344
+ @patterns.all?{|p| p === obj}
1345
+ end
1346
+ def self.[](*patterns)
1347
+ new(*patterns)
1348
+ end
1358
1349
  end
1359
1350
 
1360
1351
  # Perl 6 is supposed to have native support for something like that.
@@ -1362,18 +1353,17 @@ end
1362
1353
  #
1363
1354
  # Usage:
1364
1355
  # case foo
1365
- # when all(:foo, any({:color => 'blue'}, {:color => 'red'}), /Hello/)
1356
+ # when All[:foo, Any[{:color => 'blue'}, {:color => 'red'}], /Hello/]
1366
1357
  # print foo
1367
1358
  # end
1368
- class Patterns_any
1369
- def initialize(*patterns)
1370
- @patterns = patterns
1371
- end
1372
- def ===(obj)
1373
- @patterns.any?{|p| p === obj}
1374
- end
1375
- end
1376
-
1377
- def any(*patterns)
1378
- Patterns_any.new(*patterns)
1359
+ class Any
1360
+ def initialize(*patterns)
1361
+ @patterns = patterns
1362
+ end
1363
+ def ===(obj)
1364
+ @patterns.any?{|p| p === obj}
1365
+ end
1366
+ def self.[](*patterns)
1367
+ new(*patterns)
1368
+ end
1379
1369
  end