magic_xml 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (287) hide show
  1. data/README +22 -0
  2. data/Rakefile +52 -0
  3. data/VERSION +1 -0
  4. data/doc/classes/Array.html +148 -0
  5. data/doc/classes/File.html +113 -0
  6. data/doc/classes/Hash.html +117 -0
  7. data/doc/classes/Patterns_all.html +145 -0
  8. data/doc/classes/Patterns_any.html +145 -0
  9. data/doc/classes/String.html +470 -0
  10. data/doc/classes/Symbol.html +145 -0
  11. data/doc/classes/XML.html +1881 -0
  12. data/doc/classes/XML_Comment.html +148 -0
  13. data/doc/classes/XML_PI.html +145 -0
  14. data/doc/classes/XML_Tests.html +1727 -0
  15. data/doc/files/magic_xml_rb.html +186 -0
  16. data/doc/files/simple_examples/xml_hello_f_rb.html +88 -0
  17. data/doc/files/simple_examples/xml_hello_m_rb.html +88 -0
  18. data/doc/files/simple_examples/xml_list_f_rb.html +88 -0
  19. data/doc/files/simple_examples/xml_list_m_rb.html +88 -0
  20. data/doc/files/tests_rb.html +94 -0
  21. data/doc/files/xquery_use_cases/parts/q1_rb.html +117 -0
  22. data/doc/files/xquery_use_cases/rdb/q10_rb.html +88 -0
  23. data/doc/files/xquery_use_cases/rdb/q11_rb.html +88 -0
  24. data/doc/files/xquery_use_cases/rdb/q12_rb.html +88 -0
  25. data/doc/files/xquery_use_cases/rdb/q13_rb.html +88 -0
  26. data/doc/files/xquery_use_cases/rdb/q14_rb.html +88 -0
  27. data/doc/files/xquery_use_cases/rdb/q15_rb.html +88 -0
  28. data/doc/files/xquery_use_cases/rdb/q16_rb.html +88 -0
  29. data/doc/files/xquery_use_cases/rdb/q17_rb.html +88 -0
  30. data/doc/files/xquery_use_cases/rdb/q18_rb.html +88 -0
  31. data/doc/files/xquery_use_cases/rdb/q1_rb.html +88 -0
  32. data/doc/files/xquery_use_cases/rdb/q2_rb.html +88 -0
  33. data/doc/files/xquery_use_cases/rdb/q3_rb.html +88 -0
  34. data/doc/files/xquery_use_cases/rdb/q4_rb.html +88 -0
  35. data/doc/files/xquery_use_cases/rdb/q5_rb.html +88 -0
  36. data/doc/files/xquery_use_cases/rdb/q6_rb.html +88 -0
  37. data/doc/files/xquery_use_cases/rdb/q7_rb.html +88 -0
  38. data/doc/files/xquery_use_cases/rdb/q8_rb.html +88 -0
  39. data/doc/files/xquery_use_cases/rdb/q9_rb.html +88 -0
  40. data/doc/files/xquery_use_cases/seq/q1_rb.html +88 -0
  41. data/doc/files/xquery_use_cases/seq/q2_rb.html +88 -0
  42. data/doc/files/xquery_use_cases/seq/q3_rb.html +88 -0
  43. data/doc/files/xquery_use_cases/seq/q4_rb.html +88 -0
  44. data/doc/files/xquery_use_cases/seq/q5_rb.html +88 -0
  45. data/doc/files/xquery_use_cases/sgml/q10_rb.html +88 -0
  46. data/doc/files/xquery_use_cases/sgml/q1_rb.html +88 -0
  47. data/doc/files/xquery_use_cases/sgml/q2_rb.html +88 -0
  48. data/doc/files/xquery_use_cases/sgml/q3_rb.html +88 -0
  49. data/doc/files/xquery_use_cases/sgml/q4_rb.html +88 -0
  50. data/doc/files/xquery_use_cases/sgml/q5_rb.html +88 -0
  51. data/doc/files/xquery_use_cases/sgml/q6_rb.html +88 -0
  52. data/doc/files/xquery_use_cases/sgml/q7_rb.html +88 -0
  53. data/doc/files/xquery_use_cases/sgml/q8a_rb.html +88 -0
  54. data/doc/files/xquery_use_cases/sgml/q8b_rb.html +88 -0
  55. data/doc/files/xquery_use_cases/sgml/q9_rb.html +88 -0
  56. data/doc/files/xquery_use_cases/solution_sizes_rb.html +88 -0
  57. data/doc/files/xquery_use_cases/string/q1_rb.html +88 -0
  58. data/doc/files/xquery_use_cases/string/q2_rb.html +93 -0
  59. data/doc/files/xquery_use_cases/string/q4_rb.html +88 -0
  60. data/doc/files/xquery_use_cases/string/q5_rb.html +88 -0
  61. data/doc/files/xquery_use_cases/test_driver_rb.html +92 -0
  62. data/doc/files/xquery_use_cases/tree/q1_rb.html +111 -0
  63. data/doc/files/xquery_use_cases/tree/q2_rb.html +88 -0
  64. data/doc/files/xquery_use_cases/tree/q3_rb.html +88 -0
  65. data/doc/files/xquery_use_cases/tree/q4_rb.html +88 -0
  66. data/doc/files/xquery_use_cases/tree/q5_rb.html +88 -0
  67. data/doc/files/xquery_use_cases/tree/q6_rb.html +113 -0
  68. data/doc/files/xquery_use_cases/xmp/q10_rb.html +88 -0
  69. data/doc/files/xquery_use_cases/xmp/q11_rb.html +88 -0
  70. data/doc/files/xquery_use_cases/xmp/q12_rb.html +88 -0
  71. data/doc/files/xquery_use_cases/xmp/q1_rb.html +88 -0
  72. data/doc/files/xquery_use_cases/xmp/q2_rb.html +88 -0
  73. data/doc/files/xquery_use_cases/xmp/q3_rb.html +88 -0
  74. data/doc/files/xquery_use_cases/xmp/q4_rb.html +88 -0
  75. data/doc/files/xquery_use_cases/xmp/q5_rb.html +92 -0
  76. data/doc/files/xquery_use_cases/xmp/q6_rb.html +88 -0
  77. data/doc/files/xquery_use_cases/xmp/q7_rb.html +88 -0
  78. data/doc/files/xquery_use_cases/xmp/q8_rb.html +88 -0
  79. data/doc/files/xquery_use_cases/xmp/q9_rb.html +88 -0
  80. data/doc/fr_class_index.html +56 -0
  81. data/doc/fr_file_index.html +110 -0
  82. data/doc/fr_method_index.html +159 -0
  83. data/doc/index.html +26 -0
  84. data/doc/rdoc-style.css +175 -0
  85. data/lib/magic_xml.rb +1400 -0
  86. data/simple_examples/README +14 -0
  87. data/simple_examples/xml_hello_f.rb +32 -0
  88. data/simple_examples/xml_hello_m.rb +32 -0
  89. data/simple_examples/xml_list_f.rb +36 -0
  90. data/simple_examples/xml_list_m.rb +36 -0
  91. data/test/helper.rb +9 -0
  92. data/test/test_magic_xml.rb +855 -0
  93. data/xquery_use_cases/README +17 -0
  94. data/xquery_use_cases/parts/README +12 -0
  95. data/xquery_use_cases/parts/partlist.xml +13 -0
  96. data/xquery_use_cases/parts/q1.out +16 -0
  97. data/xquery_use_cases/parts/q1.rb +38 -0
  98. data/xquery_use_cases/parts/q1.xquery +18 -0
  99. data/xquery_use_cases/rdb/README +50 -0
  100. data/xquery_use_cases/rdb/bids.xml +81 -0
  101. data/xquery_use_cases/rdb/items.xml +57 -0
  102. data/xquery_use_cases/rdb/q1.out +10 -0
  103. data/xquery_use_cases/rdb/q1.rb +31 -0
  104. data/xquery_use_cases/rdb/q1.xquery +14 -0
  105. data/xquery_use_cases/rdb/q10.out +27 -0
  106. data/xquery_use_cases/rdb/q10.rb +37 -0
  107. data/xquery_use_cases/rdb/q10.xquery +15 -0
  108. data/xquery_use_cases/rdb/q11.out +7 -0
  109. data/xquery_use_cases/rdb/q11.rb +38 -0
  110. data/xquery_use_cases/rdb/q11.xquery +15 -0
  111. data/xquery_use_cases/rdb/q12.out +12 -0
  112. data/xquery_use_cases/rdb/q12.rb +42 -0
  113. data/xquery_use_cases/rdb/q12.xquery +28 -0
  114. data/xquery_use_cases/rdb/q13.out +32 -0
  115. data/xquery_use_cases/rdb/q13.rb +45 -0
  116. data/xquery_use_cases/rdb/q13.xquery +15 -0
  117. data/xquery_use_cases/rdb/q14.out +14 -0
  118. data/xquery_use_cases/rdb/q14.rb +42 -0
  119. data/xquery_use_cases/rdb/q14.xquery +14 -0
  120. data/xquery_use_cases/rdb/q15.out +5 -0
  121. data/xquery_use_cases/rdb/q15.rb +31 -0
  122. data/xquery_use_cases/rdb/q15.xquery +9 -0
  123. data/xquery_use_cases/rdb/q16.out +35 -0
  124. data/xquery_use_cases/rdb/q16.rb +35 -0
  125. data/xquery_use_cases/rdb/q16.xquery +17 -0
  126. data/xquery_use_cases/rdb/q17.out +1 -0
  127. data/xquery_use_cases/rdb/q17.rb +35 -0
  128. data/xquery_use_cases/rdb/q17.xquery +11 -0
  129. data/xquery_use_cases/rdb/q18.out +32 -0
  130. data/xquery_use_cases/rdb/q18.rb +40 -0
  131. data/xquery_use_cases/rdb/q18.xquery +19 -0
  132. data/xquery_use_cases/rdb/q2.out +22 -0
  133. data/xquery_use_cases/rdb/q2.rb +36 -0
  134. data/xquery_use_cases/rdb/q2.xquery +14 -0
  135. data/xquery_use_cases/rdb/q3.out +8 -0
  136. data/xquery_use_cases/rdb/q3.rb +34 -0
  137. data/xquery_use_cases/rdb/q3.xquery +16 -0
  138. data/xquery_use_cases/rdb/q4.out +14 -0
  139. data/xquery_use_cases/rdb/q4.rb +31 -0
  140. data/xquery_use_cases/rdb/q4.xquery +11 -0
  141. data/xquery_use_cases/rdb/q5.out +12 -0
  142. data/xquery_use_cases/rdb/q5.rb +46 -0
  143. data/xquery_use_cases/rdb/q5.xquery +25 -0
  144. data/xquery_use_cases/rdb/q6.out +14 -0
  145. data/xquery_use_cases/rdb/q6.rb +38 -0
  146. data/xquery_use_cases/rdb/q6.xquery +15 -0
  147. data/xquery_use_cases/rdb/q7.out +1 -0
  148. data/xquery_use_cases/rdb/q7.rb +30 -0
  149. data/xquery_use_cases/rdb/q7.xquery +10 -0
  150. data/xquery_use_cases/rdb/q8.out +1 -0
  151. data/xquery_use_cases/rdb/q8.rb +23 -0
  152. data/xquery_use_cases/rdb/q8.xquery +8 -0
  153. data/xquery_use_cases/rdb/q9.out +22 -0
  154. data/xquery_use_cases/rdb/q9.rb +32 -0
  155. data/xquery_use_cases/rdb/q9.xquery +16 -0
  156. data/xquery_use_cases/rdb/users.xml +25 -0
  157. data/xquery_use_cases/seq/README +12 -0
  158. data/xquery_use_cases/seq/q1.out +1 -0
  159. data/xquery_use_cases/seq/q1.rb +25 -0
  160. data/xquery_use_cases/seq/q1.xquery +2 -0
  161. data/xquery_use_cases/seq/q2.out +2 -0
  162. data/xquery_use_cases/seq/q2.rb +25 -0
  163. data/xquery_use_cases/seq/q2.xquery +2 -0
  164. data/xquery_use_cases/seq/q3.out +2 -0
  165. data/xquery_use_cases/seq/q3.rb +26 -0
  166. data/xquery_use_cases/seq/q3.xquery +3 -0
  167. data/xquery_use_cases/seq/q4.out +0 -0
  168. data/xquery_use_cases/seq/q4.rb +27 -0
  169. data/xquery_use_cases/seq/q4.xquery +4 -0
  170. data/xquery_use_cases/seq/q5.out +5 -0
  171. data/xquery_use_cases/seq/q5.rb +29 -0
  172. data/xquery_use_cases/seq/q5.xquery +10 -0
  173. data/xquery_use_cases/seq/report1.xml +40 -0
  174. data/xquery_use_cases/sgml/README +53 -0
  175. data/xquery_use_cases/sgml/q1.out +44 -0
  176. data/xquery_use_cases/sgml/q1.rb +23 -0
  177. data/xquery_use_cases/sgml/q1.xquery +5 -0
  178. data/xquery_use_cases/sgml/q10.out +1 -0
  179. data/xquery_use_cases/sgml/q10.rb +28 -0
  180. data/xquery_use_cases/sgml/q10.xquery +7 -0
  181. data/xquery_use_cases/sgml/q2.out +26 -0
  182. data/xquery_use_cases/sgml/q2.rb +23 -0
  183. data/xquery_use_cases/sgml/q2.xquery +5 -0
  184. data/xquery_use_cases/sgml/q3.out +6 -0
  185. data/xquery_use_cases/sgml/q3.rb +28 -0
  186. data/xquery_use_cases/sgml/q3.xquery +7 -0
  187. data/xquery_use_cases/sgml/q4.out +4 -0
  188. data/xquery_use_cases/sgml/q4.rb +25 -0
  189. data/xquery_use_cases/sgml/q4.xquery +5 -0
  190. data/xquery_use_cases/sgml/q5.out +3 -0
  191. data/xquery_use_cases/sgml/q5.rb +23 -0
  192. data/xquery_use_cases/sgml/q5.xquery +5 -0
  193. data/xquery_use_cases/sgml/q6.out +1 -0
  194. data/xquery_use_cases/sgml/q6.rb +27 -0
  195. data/xquery_use_cases/sgml/q6.xquery +6 -0
  196. data/xquery_use_cases/sgml/q7.out +1 -0
  197. data/xquery_use_cases/sgml/q7.rb +27 -0
  198. data/xquery_use_cases/sgml/q7.xquery +7 -0
  199. data/xquery_use_cases/sgml/q8a.out +34 -0
  200. data/xquery_use_cases/sgml/q8a.rb +27 -0
  201. data/xquery_use_cases/sgml/q8a.xquery +5 -0
  202. data/xquery_use_cases/sgml/q8b.out +26 -0
  203. data/xquery_use_cases/sgml/q8b.rb +32 -0
  204. data/xquery_use_cases/sgml/q8b.xquery +5 -0
  205. data/xquery_use_cases/sgml/q9.out +9 -0
  206. data/xquery_use_cases/sgml/q9.rb +29 -0
  207. data/xquery_use_cases/sgml/q9.xquery +6 -0
  208. data/xquery_use_cases/sgml/sgml.xml +101 -0
  209. data/xquery_use_cases/solution_sizes.rb +48 -0
  210. data/xquery_use_cases/string/README +29 -0
  211. data/xquery_use_cases/string/company-data.xml +20 -0
  212. data/xquery_use_cases/string/q1.out +4 -0
  213. data/xquery_use_cases/string/q1.rb +25 -0
  214. data/xquery_use_cases/string/q1.xquery +1 -0
  215. data/xquery_use_cases/string/q2.out +13 -0
  216. data/xquery_use_cases/string/q2.rb +32 -0
  217. data/xquery_use_cases/string/q2.xquery +23 -0
  218. data/xquery_use_cases/string/q4.out +50 -0
  219. data/xquery_use_cases/string/q4.rb +34 -0
  220. data/xquery_use_cases/string/q4.xquery +14 -0
  221. data/xquery_use_cases/string/q5.out +12 -0
  222. data/xquery_use_cases/string/q5.rb +33 -0
  223. data/xquery_use_cases/string/q5.xquery +8 -0
  224. data/xquery_use_cases/string/string.xml +82 -0
  225. data/xquery_use_cases/test_driver.rb +60 -0
  226. data/xquery_use_cases/tree/README +23 -0
  227. data/xquery_use_cases/tree/book.xml +50 -0
  228. data/xquery_use_cases/tree/q1.out +23 -0
  229. data/xquery_use_cases/tree/q1.rb +31 -0
  230. data/xquery_use_cases/tree/q1.xquery +14 -0
  231. data/xquery_use_cases/tree/q2.out +11 -0
  232. data/xquery_use_cases/tree/q2.rb +27 -0
  233. data/xquery_use_cases/tree/q2.xquery +10 -0
  234. data/xquery_use_cases/tree/q3.out +2 -0
  235. data/xquery_use_cases/tree/q3.rb +26 -0
  236. data/xquery_use_cases/tree/q3.xquery +2 -0
  237. data/xquery_use_cases/tree/q4.out +1 -0
  238. data/xquery_use_cases/tree/q4.rb +23 -0
  239. data/xquery_use_cases/tree/q4.xquery +5 -0
  240. data/xquery_use_cases/tree/q5.out +9 -0
  241. data/xquery_use_cases/tree/q5.rb +30 -0
  242. data/xquery_use_cases/tree/q5.xquery +8 -0
  243. data/xquery_use_cases/tree/q6.out +30 -0
  244. data/xquery_use_cases/tree/q6.rb +35 -0
  245. data/xquery_use_cases/tree/q6.xquery +21 -0
  246. data/xquery_use_cases/xmp/README +41 -0
  247. data/xquery_use_cases/xmp/bib.xml +35 -0
  248. data/xquery_use_cases/xmp/books.xml +15 -0
  249. data/xquery_use_cases/xmp/prices.xml +32 -0
  250. data/xquery_use_cases/xmp/q1.out +8 -0
  251. data/xquery_use_cases/xmp/q1.rb +29 -0
  252. data/xquery_use_cases/xmp/q1.xquery +10 -0
  253. data/xquery_use_cases/xmp/q10.out +11 -0
  254. data/xquery_use_cases/xmp/q10.rb +36 -0
  255. data/xquery_use_cases/xmp/q10.xquery +11 -0
  256. data/xquery_use_cases/xmp/q11.out +35 -0
  257. data/xquery_use_cases/xmp/q11.rb +37 -0
  258. data/xquery_use_cases/xmp/q11.xquery +18 -0
  259. data/xquery_use_cases/xmp/q12.out +6 -0
  260. data/xquery_use_cases/xmp/q12.rb +35 -0
  261. data/xquery_use_cases/xmp/q12.xquery +20 -0
  262. data/xquery_use_cases/xmp/q2.out +37 -0
  263. data/xquery_use_cases/xmp/q2.rb +30 -0
  264. data/xquery_use_cases/xmp/q2.xquery +12 -0
  265. data/xquery_use_cases/xmp/q3.out +34 -0
  266. data/xquery_use_cases/xmp/q3.rb +27 -0
  267. data/xquery_use_cases/xmp/q3.xquery +10 -0
  268. data/xquery_use_cases/xmp/q4.out +31 -0
  269. data/xquery_use_cases/xmp/q4.rb +44 -0
  270. data/xquery_use_cases/xmp/q4.xquery +21 -0
  271. data/xquery_use_cases/xmp/q5.out +17 -0
  272. data/xquery_use_cases/xmp/q5.rb +38 -0
  273. data/xquery_use_cases/xmp/q5.xquery +13 -0
  274. data/xquery_use_cases/xmp/q6.out +28 -0
  275. data/xquery_use_cases/xmp/q6.rb +33 -0
  276. data/xquery_use_cases/xmp/q6.xquery +19 -0
  277. data/xquery_use_cases/xmp/q7.out +8 -0
  278. data/xquery_use_cases/xmp/q7.rb +30 -0
  279. data/xquery_use_cases/xmp/q7.xquery +12 -0
  280. data/xquery_use_cases/xmp/q8.out +7 -0
  281. data/xquery_use_cases/xmp/q8.rb +29 -0
  282. data/xquery_use_cases/xmp/q8.xquery +9 -0
  283. data/xquery_use_cases/xmp/q9.out +4 -0
  284. data/xquery_use_cases/xmp/q9.rb +29 -0
  285. data/xquery_use_cases/xmp/q9.xquery +7 -0
  286. data/xquery_use_cases/xmp/reviews.xml +24 -0
  287. metadata +342 -0
data/lib/magic_xml.rb ADDED
@@ -0,0 +1,1400 @@
1
+ #Copyright (c) 2006-2007 Tomasz Wegrzanowski <Tomasz.Wegrzanowski@gmail.com>
2
+ #
3
+ #Permission is hereby granted, free of charge, to any person obtaining a
4
+ #copy of this software and associated documentation files (the "Software"),
5
+ #to deal in the Software without restriction, including without limitation
6
+ #the rights to use, copy, modify, merge, publish, distribute, sublicense,
7
+ #and/or sell copies of the Software, and to permit persons to whom the
8
+ #Software is furnished to do so, subject to the following conditions:
9
+ #
10
+ #The above copyright notice and this permission notice shall be included in
11
+ #all copies or substantial portions of the Software.
12
+ #
13
+ #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16
+ #THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
17
+ #OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18
+ #ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19
+ #DEALINGS IN THE SOFTWARE.
20
+
21
+ # Needed for parsing
22
+
23
+ require 'rexml/parsers/baseparser'
24
+ # Needed for fetching XMLs from the Internet
25
+ require 'uri'
26
+ require 'net/http'
27
+
28
+ # FIXME: Make comment formatting RDoc-friendly. It's not always so now.
29
+
30
+ # In Ruby 2 Symbol will be a subclass of String, and
31
+ # this won't be needed any more. Before then...
32
+ class Symbol
33
+ include Comparable
34
+ def <=>(other)
35
+ raise ArgumentError.new("comparison of #{self.class} with #{other.class} failed") unless other.is_a? Symbol
36
+ to_s <=> other.to_s
37
+ end
38
+
39
+ alias_method :eqeqeq_before_magic_xml, :===
40
+ def ===(*args, &blk)
41
+ if args.size >= 1 and args[0].is_a? XML
42
+ self == args[0].name
43
+ else
44
+ eqeqeq_before_magic_xml(*args, &blk)
45
+ end
46
+ end
47
+ end
48
+
49
+ class Hash
50
+ alias_method :eqeqeq_before_magic_xml, :===
51
+ def ===(*args, &blk)
52
+ if args.size >= 1 and args[0].is_a? XML
53
+ all?{|k,v| v === args[0][k]}
54
+ else
55
+ eqeqeq_before_magic_xml(*args, &blk)
56
+ end
57
+ end
58
+ end
59
+
60
+ class String
61
+ # Escape string for output as XML text (< > &)
62
+ def xml_escape
63
+ replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;" }
64
+ gsub(/([<>&])/) { replacements[$1] }
65
+ end
66
+ # Escape characters for output as XML attribute values (< > & ' ")
67
+ def xml_attr_escape
68
+ replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;", "\"" => "&quot;", "'" => "&apos;"}
69
+ gsub(/([<>&\'\"])/) { replacements[$1] }
70
+ end
71
+ # Unescape entities
72
+ # Supports:
73
+ # * Full set of HTML-compatible named entities
74
+ # * Decimal entities &#1234;
75
+ # * Hex entities &#xA0b1;
76
+ def xml_unescape(extra_entities=nil)
77
+ @@xhtml_entity_replacements ||= {
78
+ 'nbsp' => 160,
79
+ 'iexcl' => 161,
80
+ 'cent' => 162,
81
+ 'pound' => 163,
82
+ 'curren' => 164,
83
+ 'yen' => 165,
84
+ 'brvbar' => 166,
85
+ 'sect' => 167,
86
+ 'uml' => 168,
87
+ 'copy' => 169,
88
+ 'ordf' => 170,
89
+ 'laquo' => 171,
90
+ 'not' => 172,
91
+ 'shy' => 173,
92
+ 'reg' => 174,
93
+ 'macr' => 175,
94
+ 'deg' => 176,
95
+ 'plusmn' => 177,
96
+ 'sup2' => 178,
97
+ 'sup3' => 179,
98
+ 'acute' => 180,
99
+ 'micro' => 181,
100
+ 'para' => 182,
101
+ 'middot' => 183,
102
+ 'cedil' => 184,
103
+ 'sup1' => 185,
104
+ 'ordm' => 186,
105
+ 'raquo' => 187,
106
+ 'frac14' => 188,
107
+ 'frac12' => 189,
108
+ 'frac34' => 190,
109
+ 'iquest' => 191,
110
+ 'Agrave' => 192,
111
+ 'Aacute' => 193,
112
+ 'Acirc' => 194,
113
+ 'Atilde' => 195,
114
+ 'Auml' => 196,
115
+ 'Aring' => 197,
116
+ 'AElig' => 198,
117
+ 'Ccedil' => 199,
118
+ 'Egrave' => 200,
119
+ 'Eacute' => 201,
120
+ 'Ecirc' => 202,
121
+ 'Euml' => 203,
122
+ 'Igrave' => 204,
123
+ 'Iacute' => 205,
124
+ 'Icirc' => 206,
125
+ 'Iuml' => 207,
126
+ 'ETH' => 208,
127
+ 'Ntilde' => 209,
128
+ 'Ograve' => 210,
129
+ 'Oacute' => 211,
130
+ 'Ocirc' => 212,
131
+ 'Otilde' => 213,
132
+ 'Ouml' => 214,
133
+ 'times' => 215,
134
+ 'Oslash' => 216,
135
+ 'Ugrave' => 217,
136
+ 'Uacute' => 218,
137
+ 'Ucirc' => 219,
138
+ 'Uuml' => 220,
139
+ 'Yacute' => 221,
140
+ 'THORN' => 222,
141
+ 'szlig' => 223,
142
+ 'agrave' => 224,
143
+ 'aacute' => 225,
144
+ 'acirc' => 226,
145
+ 'atilde' => 227,
146
+ 'auml' => 228,
147
+ 'aring' => 229,
148
+ 'aelig' => 230,
149
+ 'ccedil' => 231,
150
+ 'egrave' => 232,
151
+ 'eacute' => 233,
152
+ 'ecirc' => 234,
153
+ 'euml' => 235,
154
+ 'igrave' => 236,
155
+ 'iacute' => 237,
156
+ 'icirc' => 238,
157
+ 'iuml' => 239,
158
+ 'eth' => 240,
159
+ 'ntilde' => 241,
160
+ 'ograve' => 242,
161
+ 'oacute' => 243,
162
+ 'ocirc' => 244,
163
+ 'otilde' => 245,
164
+ 'ouml' => 246,
165
+ 'divide' => 247,
166
+ 'oslash' => 248,
167
+ 'ugrave' => 249,
168
+ 'uacute' => 250,
169
+ 'ucirc' => 251,
170
+ 'uuml' => 252,
171
+ 'yacute' => 253,
172
+ 'thorn' => 254,
173
+ 'yuml' => 255,
174
+ 'quot' => 34,
175
+ 'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
176
+ 'amp' => 38,
177
+ 'lt' => 60,
178
+ 'gt' => 62,
179
+ 'OElig' => 338,
180
+ 'oelig' => 339,
181
+ 'Scaron' => 352,
182
+ 'scaron' => 353,
183
+ 'Yuml' => 376,
184
+ 'circ' => 710,
185
+ 'tilde' => 732,
186
+ 'ensp' => 8194,
187
+ 'emsp' => 8195,
188
+ 'thinsp' => 8201,
189
+ 'zwnj' => 8204,
190
+ 'zwj' => 8205,
191
+ 'lrm' => 8206,
192
+ 'rlm' => 8207,
193
+ 'ndash' => 8211,
194
+ 'mdash' => 8212,
195
+ 'lsquo' => 8216,
196
+ 'rsquo' => 8217,
197
+ 'sbquo' => 8218,
198
+ 'ldquo' => 8220,
199
+ 'rdquo' => 8221,
200
+ 'bdquo' => 8222,
201
+ 'dagger' => 8224,
202
+ 'Dagger' => 8225,
203
+ 'permil' => 8240,
204
+ 'lsaquo' => 8249,
205
+ 'rsaquo' => 8250,
206
+ 'euro' => 8364,
207
+ 'fnof' => 402,
208
+ 'Alpha' => 913,
209
+ 'Beta' => 914,
210
+ 'Gamma' => 915,
211
+ 'Delta' => 916,
212
+ 'Epsilon' => 917,
213
+ 'Zeta' => 918,
214
+ 'Eta' => 919,
215
+ 'Theta' => 920,
216
+ 'Iota' => 921,
217
+ 'Kappa' => 922,
218
+ 'Lambda' => 923,
219
+ 'Mu' => 924,
220
+ 'Nu' => 925,
221
+ 'Xi' => 926,
222
+ 'Omicron' => 927,
223
+ 'Pi' => 928,
224
+ 'Rho' => 929,
225
+ 'Sigma' => 931,
226
+ 'Tau' => 932,
227
+ 'Upsilon' => 933,
228
+ 'Phi' => 934,
229
+ 'Chi' => 935,
230
+ 'Psi' => 936,
231
+ 'Omega' => 937,
232
+ 'alpha' => 945,
233
+ 'beta' => 946,
234
+ 'gamma' => 947,
235
+ 'delta' => 948,
236
+ 'epsilon' => 949,
237
+ 'zeta' => 950,
238
+ 'eta' => 951,
239
+ 'theta' => 952,
240
+ 'iota' => 953,
241
+ 'kappa' => 954,
242
+ 'lambda' => 955,
243
+ 'mu' => 956,
244
+ 'nu' => 957,
245
+ 'xi' => 958,
246
+ 'omicron' => 959,
247
+ 'pi' => 960,
248
+ 'rho' => 961,
249
+ 'sigmaf' => 962,
250
+ 'sigma' => 963,
251
+ 'tau' => 964,
252
+ 'upsilon' => 965,
253
+ 'phi' => 966,
254
+ 'chi' => 967,
255
+ 'psi' => 968,
256
+ 'omega' => 969,
257
+ 'thetasym' => 977,
258
+ 'upsih' => 978,
259
+ 'piv' => 982,
260
+ 'bull' => 8226,
261
+ 'hellip' => 8230,
262
+ 'prime' => 8242,
263
+ 'Prime' => 8243,
264
+ 'oline' => 8254,
265
+ 'frasl' => 8260,
266
+ 'weierp' => 8472,
267
+ 'image' => 8465,
268
+ 'real' => 8476,
269
+ 'trade' => 8482,
270
+ 'alefsym' => 8501,
271
+ 'larr' => 8592,
272
+ 'uarr' => 8593,
273
+ 'rarr' => 8594,
274
+ 'darr' => 8595,
275
+ 'harr' => 8596,
276
+ 'crarr' => 8629,
277
+ 'lArr' => 8656,
278
+ 'uArr' => 8657,
279
+ 'rArr' => 8658,
280
+ 'dArr' => 8659,
281
+ 'hArr' => 8660,
282
+ 'forall' => 8704,
283
+ 'part' => 8706,
284
+ 'exist' => 8707,
285
+ 'empty' => 8709,
286
+ 'nabla' => 8711,
287
+ 'isin' => 8712,
288
+ 'notin' => 8713,
289
+ 'ni' => 8715,
290
+ 'prod' => 8719,
291
+ 'sum' => 8721,
292
+ 'minus' => 8722,
293
+ 'lowast' => 8727,
294
+ 'radic' => 8730,
295
+ 'prop' => 8733,
296
+ 'infin' => 8734,
297
+ 'ang' => 8736,
298
+ 'and' => 8743,
299
+ 'or' => 8744,
300
+ 'cap' => 8745,
301
+ 'cup' => 8746,
302
+ 'int' => 8747,
303
+ 'there4' => 8756,
304
+ 'sim' => 8764,
305
+ 'cong' => 8773,
306
+ 'asymp' => 8776,
307
+ 'ne' => 8800,
308
+ 'equiv' => 8801,
309
+ 'le' => 8804,
310
+ 'ge' => 8805,
311
+ 'sub' => 8834,
312
+ 'sup' => 8835,
313
+ 'nsub' => 8836,
314
+ 'sube' => 8838,
315
+ 'supe' => 8839,
316
+ 'oplus' => 8853,
317
+ 'otimes' => 8855,
318
+ 'perp' => 8869,
319
+ 'sdot' => 8901,
320
+ 'lceil' => 8968,
321
+ 'rceil' => 8969,
322
+ 'lfloor' => 8970,
323
+ 'rfloor' => 8971,
324
+ 'lang' => 9001,
325
+ 'rang' => 9002,
326
+ 'loz' => 9674,
327
+ 'spades' => 9824,
328
+ 'clubs' => 9827,
329
+ 'hearts' => 9829,
330
+ 'diams' => 9830,
331
+ }
332
+ gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
333
+ if $1 then
334
+ v = @@xhtml_entity_replacements[$1]
335
+ # Nonstandard entity
336
+ unless v
337
+ if extra_entities.is_a? Proc
338
+ v = extra_entities.call($1)
339
+ # Well, we expect a Hash here, but any container will do.
340
+ # As long as it's not a nil.
341
+ elsif extra_entities
342
+ v = extra_entities[$1]
343
+ end
344
+ end
345
+ raise "Unknown escape #{$1}" unless v
346
+ elsif $2
347
+ v = $2.to_i
348
+ else
349
+ v = $3.hex
350
+ end
351
+ # v can be a String or an Integer
352
+ if v.is_a? String then v else [v].pack('U') end
353
+ }
354
+ end
355
+ def xml_parse
356
+ XML.parse(self)
357
+ end
358
+ end
359
+
360
+ class File
361
+ def xml_parse
362
+ XML.parse(self)
363
+ end
364
+ end
365
+
366
+ class Array
367
+ # children of any element
368
+ def children(*args, &blk)
369
+ res = []
370
+ each{|c|
371
+ res += c.children(*args, &blk) if c.is_a? XML
372
+ }
373
+ res
374
+ end
375
+ # descendants of any element
376
+ def descendants(*args, &blk)
377
+ res = []
378
+ each{|c|
379
+ res += c.descendants(*args, &blk) if c.is_a? XML
380
+ }
381
+ res
382
+ end
383
+ end
384
+
385
+ # Methods of Enumerable.
386
+ # It is not easy to design good methods, because XML
387
+ # is not really "a container", it just acts as one sometimes.
388
+ # Generally:
389
+ # * Methods that return nil should work
390
+ # * Methods that return an element should work
391
+ # * Methods that return a container should return XML container, not Array
392
+ # * Conversion methods should convert
393
+ #
394
+ # FIXME: Many methods use .dup, but do we want a shallow or a deep copy ?
395
+ class XML
396
+ include Enumerable
397
+ # Default any? is ok
398
+ # Default all? is ok
399
+
400
+ # Iterate over children, possibly with a selector
401
+ def each(*selector, &blk)
402
+ children(*selector, &blk)
403
+ self
404
+ end
405
+
406
+ # Sort XML children of XML element.
407
+ def sort_by(*args, &blk)
408
+ self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
409
+ end
410
+
411
+ # Sort children of XML element.
412
+ def children_sort_by(*args, &blk)
413
+ self.dup{ @contents = @contents.sort_by(*args, &blk) }
414
+ end
415
+
416
+ # Sort children of XML element.
417
+ #
418
+ # Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
419
+ # Use sort_by instead.
420
+ #
421
+ # Unless you define your own XML#<=> operator, or do something equally weird.
422
+ def sort(*args, &blk)
423
+ self.dup{ @contents = @contents.sort(*args, &blk) }
424
+ end
425
+
426
+ #collect/map
427
+ #detect/find
428
+ #each_cons
429
+ #each_slice
430
+ #each_with_index
431
+ #to_a
432
+ #entries
433
+ #enum_cons
434
+ #enum_slice
435
+ #enum
436
+ # grep
437
+ # include?/member?
438
+ # inject
439
+ # max/min
440
+ # max_by/min_by - Ruby 1.9
441
+ # partition
442
+ # reject
443
+ # sort
444
+ # sort_by
445
+ # to_set
446
+ # zip
447
+ # And Enumerable::Enumerator-generating methods
448
+ end
449
+
450
+ # Class methods
451
+ class XML
452
+ # XML.foo! == xml!(:foo)
453
+ # XML.foo == xml(:foo)
454
+ def self.method_missing(meth, *args, &blk)
455
+ if meth.to_s =~ /^(.*)!$/
456
+ xml!($1.to_sym, *args, &blk)
457
+ else
458
+ XML.new(meth, *args, &blk)
459
+ end
460
+ end
461
+
462
+ # Read file and parse
463
+ def self.from_file(file)
464
+ file = File.open(file) if file.is_a? String
465
+ parse(file)
466
+ end
467
+
468
+ # Fetch URL and parse
469
+ # Supported:
470
+ # http://.../
471
+ # https://.../
472
+ # file:foo.xml
473
+ # string:<foo/>
474
+ def self.from_url(url)
475
+ if url =~ /^string:(.*)$/m
476
+ parse($1)
477
+ elsif url =~ /^file:(.*)$/m
478
+ from_file($1)
479
+ elsif url =~ /^http(s?):/
480
+ ssl = ($1 == "s")
481
+ # No, seriously - Ruby needs something better than net/http
482
+ # Something that groks basic auth and queries and redirects automatically:
483
+ # HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
484
+ # URI parsing must go inside the library, client programs
485
+ # should have nothing to do with it
486
+
487
+ # net/http is really inconvenient to use here
488
+ u = URI.parse(url)
489
+ # You're not seeing this:
490
+ if u.query then
491
+ path = u.path + "?" + u.query
492
+ else
493
+ path = u.path
494
+ end
495
+ req = Net::HTTP::Get.new(path)
496
+ if u.userinfo
497
+ username, passwd = u.userinfo.split(/:/,2)
498
+ req.basic_auth username, passwd
499
+ end
500
+ if ssl
501
+ # NOTE: You need libopenssl-ruby installed
502
+ # if you want to use HTTPS. Ubuntu is broken
503
+ # as it doesn't provide it in the default packages.
504
+ require 'net/https'
505
+ http = Net::HTTP.new(u.host, u.port)
506
+ http.use_ssl = true
507
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
508
+ else
509
+ http = Net::HTTP.new(u.host, u.port)
510
+ end
511
+
512
+ res = http.start {|http| http.request(req) }
513
+ # TODO: Throw a more meaningful exception
514
+ parse(res.body)
515
+ else
516
+ raise "URL protocol #{url} not supported (http, https, file, string are supported)"
517
+ end
518
+ end
519
+
520
+ # Like CDuce load_xml
521
+ # The path can be:
522
+ # * file handler
523
+ # * URL (a string with :)
524
+ # * file name (a string without :)
525
+ def self.load(obj)
526
+ if obj.is_a? String
527
+ if obj.include? ":"
528
+ from_url(obj)
529
+ else
530
+ from_file(obj)
531
+ end
532
+ else
533
+ parse(obj)
534
+ end
535
+ end
536
+
537
+ # Parse XML in mixed stream/tree mode
538
+ # Basically the idea is that every time we get start element,
539
+ # we ask the block what to do about it.
540
+ # If it wants a tree below it, it should call e.tree
541
+ # If a tree was requested, elements below the current one
542
+ # are *not* processed. If it wasn't, they are.
543
+ #
544
+ # For example:
545
+ # <foo><bar/></foo><foo2/>
546
+ # yield <foo> ... </foo>
547
+ # .complete! called
548
+ # process <foo2> next
549
+ #
550
+ # But:
551
+ # <foo><bar/></foo><foo2/>
552
+ # yield <foo> ... </foo>
553
+ # .complete! not called
554
+ # process <bar> next
555
+ #
556
+ # FIXME: yielded values are not reusable for now
557
+ # FIXME: make more object-oriented
558
+ def self.parse_as_twigs(stream)
559
+ parser = REXML::Parsers::BaseParser.new stream
560
+ # We don't really need to keep the stack ;-)
561
+ stack = []
562
+ while true
563
+ event = parser.pull
564
+ case event[0]
565
+ when :start_element
566
+ # Now the evil part evil
567
+ attrs = {}
568
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
569
+ node = XML.new(event[1].to_sym, attrs, *event[3..-1])
570
+
571
+ # I can't say it's superelegant
572
+ class <<node
573
+ attr_accessor :do_complete
574
+ def complete!
575
+ if @do_complete
576
+ @do_complete.call
577
+ @do_complete = nil
578
+ end
579
+ end
580
+ end
581
+ node.do_complete = proc{
582
+ parse_subtree(node, parser)
583
+ }
584
+
585
+ yield(node)
586
+ if node.do_complete
587
+ stack.push node
588
+ node.do_complete = nil # It's too late, complete! shouldn't do anything now
589
+ end
590
+ when :end_element
591
+ stack.pop
592
+ when :end_document
593
+ return
594
+ else
595
+ # FIXME: Do the right thing.
596
+ # For now, ignore *everything* else
597
+ # This is totally incorrect, user might want to
598
+ # see text, comments and stuff like that anyway
599
+ end
600
+ end
601
+ end
602
+
603
+ # Basically it's a copy of self.parse, ugly ...
604
+ def self.parse_subtree(start_node, parser)
605
+ stack = [start_node]
606
+ res = nil
607
+ while true
608
+ event = parser.pull
609
+ case event[0]
610
+ when :start_element
611
+ attrs = {}
612
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
613
+ stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
614
+ if stack.size == 1
615
+ res = stack[0]
616
+ else
617
+ stack[-2] << stack[-1]
618
+ end
619
+ when :end_element
620
+ stack.pop
621
+ return if stack == []
622
+ # Needs unescaping
623
+ when :text
624
+ # Ignore whitespace
625
+ if stack.size == 0
626
+ next if event[1] !~ /\S/
627
+ raise "Non-whitespace text out of document root"
628
+ end
629
+ stack[-1] << event[1].xml_unescape
630
+ # CDATA is already unescaped
631
+ when :cdata
632
+ if stack.size == 0
633
+ raise "CDATA out of the document root"
634
+ end
635
+ stack[-1] << event[1]
636
+ when :end_document
637
+ raise "Parse error: end_document inside a subtree, tags are not balanced"
638
+ when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
639
+ # Positivery ignore
640
+ when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
641
+ # Ignore ???
642
+ #print "Ignored XML event #{event[0]} when parsing\n"
643
+ else
644
+ # Huh ? What's that ?
645
+ #print "Unknown XML event #{event[0]} when parsing\n"
646
+ end
647
+ end
648
+ res
649
+
650
+ end
651
+
652
+ # Parse XML using REXML. Available options:
653
+ # * :extra_entities => Proc or Hash (default = nil)
654
+ # * :remove_pretty_printing => true/false (default = false)
655
+ # * :comments => true/false (default = false)
656
+ # * :pi => true/false (default = false)
657
+ # * :normalize => true/false (default = false) - normalize
658
+ # * :multiple_roots => true/false (default=false) - document
659
+ # can have any number of roots (instread of one).
660
+ # Return all in an array instead of root/nil.
661
+ # Also include non-elements (String/PI/Comment) in the return set !!!
662
+ #
663
+ # FIXME: :comments/:pi will break everything
664
+ # if there are comments/PIs outside document root.
665
+ # Now PIs are outside the document root more often than not,
666
+ # so we're pretty much screwed here.
667
+ #
668
+ # FIXME: Integrate all kinds of parse, and make them support extra options
669
+ #
670
+ # FIXME: Benchmark normalize!
671
+ #
672
+ # FIXME: Benchmark dup-based Enumerable methods
673
+ #
674
+ # FIXME: Make it possible to include bogus XML_Document superparent,
675
+ # and to make it support out-of-root PIs/Comments
676
+ def self.parse(stream, options={})
677
+ extra_entities = options[:extra_entities]
678
+
679
+ parser = REXML::Parsers::BaseParser.new stream
680
+ stack = [[]]
681
+
682
+ while true
683
+ event = parser.pull
684
+ case event[0]
685
+ when :start_element
686
+ attrs = {}
687
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
688
+ stack << XML.new(event[1].to_sym, attrs, event[3..-1])
689
+ stack[-2] << stack[-1]
690
+ when :end_element
691
+ stack.pop
692
+ # Needs unescaping
693
+ when :text
694
+ e = event[1].xml_unescape(extra_entities)
695
+ # Either inside root or in multi-root mode
696
+ if stack.size > 1 or options[:multiple_roots]
697
+ stack[-1] << e
698
+ elsif event[1] !~ /\S/
699
+ # Ignore out-of-root whitespace in single-root mode
700
+ else
701
+ raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
702
+ end
703
+ # CDATA is already unescaped
704
+ when :cdata
705
+ e = event[1]
706
+ if stack.size > 1 or options[:multiple_roots]
707
+ stack[-1] << e
708
+ else
709
+ raise "CDATA out of the document root"
710
+ end
711
+ when :comment
712
+ next unless options[:comments]
713
+ e = XML_Comment.new(event[1])
714
+ if stack.size > 1 or options[:multiple_roots]
715
+ stack[-1] << e
716
+ else
717
+ # FIXME: Ugly !
718
+ raise "Comments out of the document root"
719
+ end
720
+ when :processing_instruction
721
+ # FIXME: Real PI node
722
+ next unless options[:pi]
723
+ e = XML_PI.new(event[1], event[2])
724
+ if stack.size > 1 or options[:multiple_roots]
725
+ stack[-1] << e
726
+ else
727
+ # FIXME: Ugly !
728
+ raise "Processing instruction out of the document root"
729
+ end
730
+ when :end_document
731
+ break
732
+ when :xmldecl,:start_doctype,:end_doctype,:elementdecl
733
+ # Positivery ignore
734
+ when :externalentity,:entity,:attlistdecl,:notationdecl
735
+ # Ignore ???
736
+ #print "Ignored XML event #{event[0]} when parsing\n"
737
+ else
738
+ # Huh ? What's that ?
739
+ #print "Unknown XML event #{event[0]} when parsing\n"
740
+ end
741
+ end
742
+ roots = stack[0]
743
+
744
+ roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
745
+ # :remove_pretty_printing does :normalize anyway
746
+ roots.each{|root| root.normalize!} if options[:normalize]
747
+ if options[:multiple_roots]
748
+ roots
749
+ else
750
+ roots[0]
751
+ end
752
+ end
753
+
754
+ # Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
755
+ def self.parse_sequence(stream, options={})
756
+ o = options.dup
757
+ o[:multiple_roots] = true
758
+ parse(stream, o)
759
+ end
760
+
761
+ # Renormalize a string containing XML document
762
+ def self.renormalize(stream)
763
+ parse(stream).to_s
764
+ end
765
+
766
+ # Renormalize a string containing a sequence of XML documents
767
+ # and strings
768
+ # XMLrenormalize_sequence("<hello />, <world></world>!") =>
769
+ # "<hello/>, <world/>!"
770
+ def self.renormalize_sequence(stream)
771
+ parse_sequence(stream).to_s
772
+ end
773
+ end
774
+
775
+ # Instance methods (other than those of Enumerable)
776
+ class XML
777
+ attr_accessor :name, :attrs, :contents
778
+
779
+ # initialize can be run in many ways
780
+ # * XML.new
781
+ # * XML.new(:tag_symbol)
782
+ # * XML.new(:tag_symbol, {attributes})
783
+ # * XML.new(:tag_symbol, "children", "more", XML.new(...))
784
+ # * XML.new(:tag_symbol, {attributes}, "and", "children")
785
+ # * XML.new(:tag_symbol) { monadic code }
786
+ # * XML.new(:tag_symbol, {attributes}) { monadic code }
787
+ #
788
+ # Or even:
789
+ # * XML.new(:tag_symbol, "children") { and some monadic code }
790
+ # * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
791
+ # But typically you won't be mixing these two style
792
+ #
793
+ # Attribute values can will be converted to strings
794
+ def initialize(*args, &blk)
795
+ @name = nil
796
+ @attrs = {}
797
+ @contents = []
798
+ @name = args.shift if args.size != 0
799
+ if args.size != 0 and args[0].is_a? Hash
800
+ args.shift.each{|k,v|
801
+ # Do automatic conversion here
802
+ # This also assures that the hashes are *not* shared
803
+ self[k] = v
804
+ }
805
+ end
806
+ # Expand Arrays passed as arguments
807
+ self << args
808
+ # FIXME: We'd rather not have people say @name = :foo there :-)
809
+ if blk
810
+ instance_eval(&blk)
811
+ end
812
+ end
813
+
814
+ # Convert to a well-formatted XML
815
+ def to_s
816
+ "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
817
+ if @contents.size == 0
818
+ "/>"
819
+ else
820
+ ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
821
+ end
822
+ end
823
+
824
+ # Convert to a well-formatted XML, but without children information.
825
+ # This is a reasonable format for irb and debugging.
826
+ # If you want to see a few levels of children, call inspect(2) and so on
827
+ def inspect(include_children=0)
828
+ "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
829
+ if @contents.size == 0
830
+ "/>"
831
+ elsif include_children == 0
832
+ ">...</#{name}>"
833
+ else
834
+ ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
835
+ end
836
+ end
837
+
838
+ # Read attributes.
839
+ # Also works with pseudoattributes:
840
+ # img[:@x] == img.child(:x).text # or nil if there isn't any.
841
+ def [](key)
842
+ if key.to_s[0] == ?@
843
+ tag = key.to_s[1..-1].to_sym
844
+ c = child(tag)
845
+ if c
846
+ c.text
847
+ else
848
+ nil
849
+ end
850
+ else
851
+ @attrs[key]
852
+ end
853
+ end
854
+
855
+ # Set attributes.
856
+ # Value is automatically converted to String, so you can say:
857
+ # img[:x] = 200
858
+ # Also works with pseudoattributes:
859
+ # foo[:@bar] = "x"
860
+ def []=(key, value)
861
+ if key.to_s[0] == ?@
862
+ tag = key.to_s[1..-1].to_sym
863
+ c = child(tag)
864
+ if c
865
+ c.contents = [value.to_s]
866
+ else
867
+ self << XML.new(tag, value.to_s)
868
+ end
869
+ else
870
+ @attrs[key] = value.to_s
871
+ end
872
+ end
873
+
874
+ # Add children.
875
+ # Possible uses:
876
+ # * Add single element
877
+ # self << xml(...)
878
+ # self << "foo"
879
+ # Add nothing:
880
+ # self << nil
881
+ # Add multiple elements (also works recursively):
882
+ # self << [a, b, c]
883
+ # self << [a, [b, c], d]
884
+ def <<(cnt)
885
+ if cnt.nil?
886
+ # skip
887
+ elsif cnt.is_a? Array
888
+ cnt.each{|elem| self << elem}
889
+ else
890
+ @contents << cnt
891
+ end
892
+ self
893
+ end
894
+
895
+ # Equality test, works as if XMLs were normalized, so:
896
+ # XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
897
+ def ==(x)
898
+ return false unless x.is_a? XML
899
+ return false unless name == x.name and attrs == x.attrs
900
+ # Now the hard part, strings can be split in different ways
901
+ # empty string children are possible etc.
902
+ self_i = 0
903
+ othr_i = 0
904
+ while self_i != contents.size or othr_i != x.contents.size
905
+ # Ignore ""s
906
+ if contents[self_i].is_a? String and contents[self_i] == ""
907
+ self_i += 1
908
+ next
909
+ end
910
+ if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
911
+ othr_i += 1
912
+ next
913
+ end
914
+
915
+ # If one is finished and the other contains non-empty elements,
916
+ # they are not equal
917
+ return false if self_i == contents.size or othr_i == x.contents.size
918
+
919
+ # Are they both Strings ?
920
+ # Strings can be divided in different ways, and calling normalize!
921
+ # here would be rather expensive, so let's use this complicated
922
+ # algorithm
923
+ if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
924
+ a = contents[self_i]
925
+ b = x.contents[othr_i]
926
+ self_i += 1
927
+ othr_i += 1
928
+ while a != "" or b != ""
929
+ if a == b
930
+ a = ""
931
+ b = ""
932
+ elsif a.size > b.size and a[0, b.size] == b
933
+ a = a[b.size..-1]
934
+ if x.contents[othr_i].is_a? String
935
+ b = x.contents[othr_i]
936
+ othr_i += 1
937
+ next
938
+ end
939
+ elsif b.size > a.size and b[0, a.size] == a
940
+ b = b[a.size..-1]
941
+ if contents[self_i].is_a? String
942
+ a = contents[self_i]
943
+ self_i += 1
944
+ next
945
+ end
946
+ else
947
+ return false
948
+ end
949
+ end
950
+ next
951
+ end
952
+
953
+ # OK, so at least one of them is not a String.
954
+ # Hopefully they're either both XMLs or one is an XML and the
955
+ # other is a String. It is also possible that contents contains
956
+ # something illegal, but we aren't catching that,
957
+ # so xml(:foo, Garbage.new) is going to at least equal itself.
958
+ # And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
959
+ # is going to return an honest false, and incoherent sanity
960
+ # check is worse than no sanity check.
961
+ #
962
+ # Oh yeah, they can be XML_PI or XML_Comment. In such case, this
963
+ # is ok.
964
+ return false unless contents[self_i] == x.contents[othr_i]
965
+ self_i += 1
966
+ othr_i += 1
967
+ end
968
+ return true
969
+ end
970
+
971
+ alias_method :real_method_missing, :method_missing
972
+ # Define all foo!-methods for monadic interface, so you can write:
973
+ #
974
+ def method_missing(meth, *args, &blk)
975
+ if meth.to_s =~ /^(.*)!$/
976
+ self << XML.new($1.to_sym, *args, &blk)
977
+ else
978
+ real_method_missing(meth, *args, &blk)
979
+ end
980
+ end
981
+
982
+ # Make monadic interface more "official"
983
+ # * node.exec! { foo!; bar! }
984
+ # is equivalent to
985
+ # * node << xml(:foo) << xml(:bar)
986
+ def exec!(&blk)
987
+ instance_eval(&blk)
988
+ end
989
+
990
+ # Select a subtree
991
+ # NOTE: Uses object_id of the start/end tags !
992
+ # They have to be the same, not just identical !
993
+ # <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
994
+ # returns
995
+ # <foo><b/><c/></foo>
996
+ # start and end and their descendants are not included in
997
+ # the result tree.
998
+ # Either start or end can be nil.
999
+ # * If both start and end are nil, return whole tree.
1000
+ # * If start is nil, return subtree up to range_end.
1001
+ # * If start is not inside the tree, return nil.
1002
+ # * If end is nil, return subtree from start
1003
+ # * If end is not inside the tree, return subtree from start.
1004
+ # * If end is before or below start, or they're the same node, the result is unspecified.
1005
+ # * if end comes directly after start, or as first node when start==nil, return path reaching there.
1006
+ def range(range_start, range_end, end_reached_cb=nil)
1007
+ if range_start == nil
1008
+ result = XML.new(name, attrs)
1009
+ else
1010
+ result = nil
1011
+ end
1012
+ @contents.each {|c|
1013
+ # end reached !
1014
+ if range_end and c.object_id == range_end.object_id
1015
+ end_reached_cb.call if end_reached_cb
1016
+ break
1017
+ end
1018
+ # start reached !
1019
+ if range_start and c.object_id == range_start.object_id
1020
+ result = XML.new(name, attrs)
1021
+ next
1022
+ end
1023
+ if result # We already started
1024
+ if c.is_a? XML
1025
+ break_me = false
1026
+ result.add! c.range(nil, range_end, lambda{ break_me = true })
1027
+ if break_me
1028
+ end_reached_cb.call if end_reached_cb
1029
+ break
1030
+ end
1031
+ else # String/XML_PI/XML_Comment
1032
+ result.add! c
1033
+ end
1034
+ else
1035
+ # Strings/XML_PI/XML_Comment obviously cannot start a range
1036
+ if c.is_a? XML
1037
+ break_me = false
1038
+ r = c.range(range_start, range_end, lambda{ break_me = true })
1039
+ if r
1040
+ # start reached !
1041
+ result = XML.new(name, attrs, r)
1042
+ end
1043
+ if break_me
1044
+ # end reached !
1045
+ end_reached_cb.call if end_reached_cb
1046
+ break
1047
+ end
1048
+ end
1049
+ end
1050
+ }
1051
+ return result
1052
+ end
1053
+
1054
+ # XML#subsequence is similar to XML#range, but instead of
1055
+ # trimmed subtree in returns a list of elements
1056
+ # The same elements are included in both cases, but here
1057
+ # we do not include any parents !
1058
+ #
1059
+ # <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
1060
+ # <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
1061
+ #
1062
+ # <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
1063
+ # <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
1064
+ #
1065
+ # <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
1066
+ # <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
1067
+ #
1068
+ # And we return [], not nil if nothing matches
1069
+ def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
1070
+ result = []
1071
+ start_seen = range_start.nil?
1072
+ @contents.each{|c|
1073
+ if range_end and range_end.object_id == c.object_id
1074
+ end_seen_cb.call if end_seen_cb
1075
+ break
1076
+ end
1077
+ if range_start and range_start.object_id == c.object_id
1078
+ start_seen = true
1079
+ start_seen_cb.call if start_seen_cb
1080
+ next
1081
+ end
1082
+ if start_seen
1083
+ if c.is_a? XML
1084
+ break_me = false
1085
+ result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
1086
+ break if break_me
1087
+ else # String/XML_PI/XML_Comment
1088
+ result << c
1089
+ end
1090
+ else
1091
+ # String/XML_PI/XML_Comment cannot start a subsequence
1092
+ if c.is_a? XML
1093
+ break_me = false
1094
+ result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
1095
+ break if break_me
1096
+ end
1097
+ end
1098
+ }
1099
+ # Include starting tag if it was right from the range_start
1100
+ # Otherwise, return just the raw sequence
1101
+ result = [XML.new(@name, @attrs, result)] if range_start == nil
1102
+ return result
1103
+ end
1104
+
1105
+ # =~ for a few reasonable patterns
1106
+ def =~(pattern)
1107
+ if pattern.is_a? Symbol
1108
+ @name == pattern
1109
+ elsif pattern.is_a? Regexp
1110
+ rv = text =~ pattern
1111
+ else # Hash, Pattern_any, Pattern_all
1112
+ pattern === self
1113
+ end
1114
+ end
1115
+
1116
+ # Get rid of pretty-printing whitespace. Also normalizes the XML.
1117
+ def remove_pretty_printing!(exceptions=nil)
1118
+ normalize!
1119
+ real_remove_pretty_printing!(exceptions)
1120
+ normalize!
1121
+ end
1122
+
1123
+ # normalize! is already recursive, so only one call at top level is needed.
1124
+ # This helper method lets us avoid extra calls to normalize!.
1125
+ def real_remove_pretty_printing!(exceptions=nil)
1126
+ return if exceptions and exceptions.include? @name
1127
+ each{|c|
1128
+ if c.is_a? String
1129
+ c.sub!(/^\s+/, "")
1130
+ c.sub!(/\s+$/, "")
1131
+ c.gsub!(/\s+/, " ")
1132
+ elsif c.is_a? XML_PI or c.is_a? XML_Comment
1133
+ else
1134
+ c.real_remove_pretty_printing!(exceptions)
1135
+ end
1136
+ }
1137
+ end
1138
+
1139
+ protected :real_remove_pretty_printing!
1140
+
1141
+ # Add pretty-printing whitespace. Also normalizes the XML.
1142
+ def add_pretty_printing!
1143
+ normalize!
1144
+ real_add_pretty_printing!
1145
+ normalize!
1146
+ end
1147
+
1148
+ def real_add_pretty_printing!(indent = "")
1149
+ return if @contents.empty?
1150
+ each{|c|
1151
+ if c.is_a? XML
1152
+ c.real_add_pretty_printing!(indent+" ")
1153
+ elsif c.is_a? String
1154
+ c.gsub!(/\n\s*/, "\n#{indent} ")
1155
+ end
1156
+ }
1157
+ @contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
1158
+ end
1159
+
1160
+ protected :real_add_pretty_printing!
1161
+
1162
+ alias_method :raw_dup, :dup
1163
+ # This is not a trivial method - first it does a *deep* copy,
1164
+ # second it takes a block which is instance_eval'ed,
1165
+ # so you can do things like:
1166
+ # * node.dup{ @name = :foo }
1167
+ # * node.dup{ self[:color] = "blue" }
1168
+ def dup(&blk)
1169
+ new_obj = self.raw_dup
1170
+ # Attr values stay shared - ugly
1171
+ new_obj.attrs = new_obj.attrs.dup
1172
+ new_obj.contents = new_obj.contents.map{|c| c.dup}
1173
+
1174
+ new_obj.instance_eval(&blk) if blk
1175
+ return new_obj
1176
+ end
1177
+
1178
+
1179
+ # Add some String children (all attributes get to_s'ed)
1180
+ def text!(*args)
1181
+ args.each{|s| self << s.to_s}
1182
+ end
1183
+ # Add XML child
1184
+ def xml!(*args, &blk)
1185
+ @contents << XML.new(*args, &blk)
1186
+ end
1187
+
1188
+ alias_method :add!, :<<
1189
+
1190
+ # Normalization means joining strings
1191
+ # and getting rid of ""s, recursively
1192
+ def normalize!
1193
+ new_contents = []
1194
+ @contents.each{|c|
1195
+ if c.is_a? String
1196
+ next if c == ""
1197
+ if new_contents[-1].is_a? String
1198
+ new_contents[-1] += c
1199
+ next
1200
+ end
1201
+ else
1202
+ c.normalize!
1203
+ end
1204
+ new_contents.push c
1205
+ }
1206
+ @contents = new_contents
1207
+ end
1208
+
1209
+ # Return text below the node, stripping all XML tags,
1210
+ # "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
1211
+ # returns "Hello, world!"
1212
+ def text
1213
+ res = ""
1214
+ @contents.each{|c|
1215
+ if c.is_a? XML
1216
+ res << c.text
1217
+ elsif c.is_a? String
1218
+ res << c
1219
+ end # Ignore XML_PI/XML_Comment
1220
+ }
1221
+ res
1222
+ end
1223
+
1224
+ # Equivalent to node.children(pat, *rest)[0]
1225
+ # Returns nil if there aren't any matching children
1226
+ def child(pat=nil, *rest)
1227
+ children(pat, *rest) {|c|
1228
+ return c
1229
+ }
1230
+ return nil
1231
+ end
1232
+
1233
+ # Equivalent to node.descendants(pat, *rest)[0]
1234
+ # Returns nil if there aren't any matching descendants
1235
+ def descendant(pat=nil, *rest)
1236
+ descendants(pat, *rest) {|c|
1237
+ return c
1238
+ }
1239
+ return nil
1240
+ end
1241
+
1242
+ # XML#children(pattern, more_patterns)
1243
+ # Return all children of a node with tags matching tag.
1244
+ # Also:
1245
+ # * children(:a, :b) == children(:a).children(:b)
1246
+ # * children(:a, :*, :c) == children(:a).descendants(:c)
1247
+ def children(pat=nil, *rest, &blk)
1248
+ return descendants(*rest, &blk) if pat == :*
1249
+ res = []
1250
+ @contents.each{|c|
1251
+ if pat.nil? or pat === c
1252
+ if rest == []
1253
+ res << c
1254
+ yield c if block_given?
1255
+ else
1256
+ res += c.children(*rest, &blk)
1257
+ end
1258
+ end
1259
+ }
1260
+ res
1261
+ end
1262
+
1263
+ # * XML#descendants
1264
+ # * XML#descendants(pattern)
1265
+ # * XML#descendants(pattern, more_patterns)
1266
+ #
1267
+ # Return all descendants of a node matching the pattern.
1268
+ # If pattern==nil, simply return all descendants.
1269
+ # Optionally run a block on each of them if a block was given.
1270
+ # If pattern==nil, also match Strings !
1271
+ def descendants(pat=nil, *rest, &blk)
1272
+ res = []
1273
+ @contents.each{|c|
1274
+ if pat.nil? or pat === c
1275
+ if rest == []
1276
+ res << c
1277
+ yield c if block_given?
1278
+ else
1279
+ res += c.children(*rest, &blk)
1280
+ end
1281
+ end
1282
+ if c.is_a? XML
1283
+ res += c.descendants(pat, *rest, &blk)
1284
+ end
1285
+ }
1286
+ res
1287
+ end
1288
+
1289
+ # Change elements based on pattern
1290
+ def deep_map(pat, &blk)
1291
+ if self =~ pat
1292
+ yield self
1293
+ else
1294
+ r = XML.new(self.name, self.attrs)
1295
+ each{|c|
1296
+ if c.is_a? XML
1297
+ r << c.deep_map(pat, &blk)
1298
+ else
1299
+ r << c
1300
+ end
1301
+ }
1302
+ r
1303
+ end
1304
+ end
1305
+
1306
+ # FIXME: do we want a shallow or a deep copy here ?
1307
+ # Map children, but leave the name/attributes
1308
+ def map(pat=nil)
1309
+ r = XML.new(self.name, self.attrs)
1310
+ each{|c|
1311
+ if !pat || c =~ pat
1312
+ r << yield(c)
1313
+ else
1314
+ r << c
1315
+ end
1316
+ }
1317
+ r
1318
+ end
1319
+ end
1320
+
1321
+ # FIXME: Is this even sane ?
1322
+ # * What about escaping and all that stuff ?
1323
+ # * Rest of the code assumes that everything is either XML or String
1324
+ class XML_PI
1325
+ def initialize(c, t)
1326
+ @c = c
1327
+ @t = t
1328
+ end
1329
+ def to_s
1330
+ "<?#{@c}#{@t}?>"
1331
+ end
1332
+ end
1333
+
1334
+ # FIXME: Is this even sane ?
1335
+ # * What about escaping and all that stuff ?
1336
+ # * Rest of the code assumes that everything is either XML or String
1337
+ # * There are some limitations on where one can put -s in the comment. Do not overdo.
1338
+ class XML_Comment
1339
+ def initialize(c)
1340
+ @c = c
1341
+ end
1342
+ def to_s
1343
+ "<!--#{@c}-->"
1344
+ end
1345
+ end
1346
+
1347
+ # Syntactic sugar for XML.new
1348
+ def xml(*args, &blk)
1349
+ XML.new(*args, &blk)
1350
+ end
1351
+
1352
+ # xml! in XML { ... } - context adds node to parent
1353
+ # xml! in main context prints the argument (and returns it anyway)
1354
+ def xml!(*args, &blk)
1355
+ node = xml(*args, &blk)
1356
+ print node
1357
+ node
1358
+ end
1359
+
1360
+ # Perl 6 is supposed to have native support for something like that.
1361
+ # Constructor takes multiple patterns. The object matches if they all match.
1362
+ #
1363
+ # Usage:
1364
+ # case foo
1365
+ # when all(:foo, {:color => 'blue'}, /Hello/)
1366
+ # print foo
1367
+ # end
1368
+ class Patterns_all
1369
+ def initialize(*patterns)
1370
+ @patterns = patterns
1371
+ end
1372
+ def ===(obj)
1373
+ @patterns.all?{|p| p === obj}
1374
+ end
1375
+ end
1376
+
1377
+ def all(*patterns)
1378
+ Patterns_all.new(*patterns)
1379
+ end
1380
+
1381
+ # Perl 6 is supposed to have native support for something like that.
1382
+ # Constructor takes multiple patterns. The object matches if they all match.
1383
+ #
1384
+ # Usage:
1385
+ # case foo
1386
+ # when all(:foo, any({:color => 'blue'}, {:color => 'red'}), /Hello/)
1387
+ # print foo
1388
+ # end
1389
+ class Patterns_any
1390
+ def initialize(*patterns)
1391
+ @patterns = patterns
1392
+ end
1393
+ def ===(obj)
1394
+ @patterns.any?{|p| p === obj}
1395
+ end
1396
+ end
1397
+
1398
+ def any(*patterns)
1399
+ Patterns_any.new(*patterns)
1400
+ end