magic_xml 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. data/README +22 -0
  2. data/Rakefile +52 -0
  3. data/VERSION +1 -0
  4. data/doc/classes/Array.html +148 -0
  5. data/doc/classes/File.html +113 -0
  6. data/doc/classes/Hash.html +117 -0
  7. data/doc/classes/Patterns_all.html +145 -0
  8. data/doc/classes/Patterns_any.html +145 -0
  9. data/doc/classes/String.html +470 -0
  10. data/doc/classes/Symbol.html +145 -0
  11. data/doc/classes/XML.html +1881 -0
  12. data/doc/classes/XML_Comment.html +148 -0
  13. data/doc/classes/XML_PI.html +145 -0
  14. data/doc/classes/XML_Tests.html +1727 -0
  15. data/doc/files/magic_xml_rb.html +186 -0
  16. data/doc/files/simple_examples/xml_hello_f_rb.html +88 -0
  17. data/doc/files/simple_examples/xml_hello_m_rb.html +88 -0
  18. data/doc/files/simple_examples/xml_list_f_rb.html +88 -0
  19. data/doc/files/simple_examples/xml_list_m_rb.html +88 -0
  20. data/doc/files/tests_rb.html +94 -0
  21. data/doc/files/xquery_use_cases/parts/q1_rb.html +117 -0
  22. data/doc/files/xquery_use_cases/rdb/q10_rb.html +88 -0
  23. data/doc/files/xquery_use_cases/rdb/q11_rb.html +88 -0
  24. data/doc/files/xquery_use_cases/rdb/q12_rb.html +88 -0
  25. data/doc/files/xquery_use_cases/rdb/q13_rb.html +88 -0
  26. data/doc/files/xquery_use_cases/rdb/q14_rb.html +88 -0
  27. data/doc/files/xquery_use_cases/rdb/q15_rb.html +88 -0
  28. data/doc/files/xquery_use_cases/rdb/q16_rb.html +88 -0
  29. data/doc/files/xquery_use_cases/rdb/q17_rb.html +88 -0
  30. data/doc/files/xquery_use_cases/rdb/q18_rb.html +88 -0
  31. data/doc/files/xquery_use_cases/rdb/q1_rb.html +88 -0
  32. data/doc/files/xquery_use_cases/rdb/q2_rb.html +88 -0
  33. data/doc/files/xquery_use_cases/rdb/q3_rb.html +88 -0
  34. data/doc/files/xquery_use_cases/rdb/q4_rb.html +88 -0
  35. data/doc/files/xquery_use_cases/rdb/q5_rb.html +88 -0
  36. data/doc/files/xquery_use_cases/rdb/q6_rb.html +88 -0
  37. data/doc/files/xquery_use_cases/rdb/q7_rb.html +88 -0
  38. data/doc/files/xquery_use_cases/rdb/q8_rb.html +88 -0
  39. data/doc/files/xquery_use_cases/rdb/q9_rb.html +88 -0
  40. data/doc/files/xquery_use_cases/seq/q1_rb.html +88 -0
  41. data/doc/files/xquery_use_cases/seq/q2_rb.html +88 -0
  42. data/doc/files/xquery_use_cases/seq/q3_rb.html +88 -0
  43. data/doc/files/xquery_use_cases/seq/q4_rb.html +88 -0
  44. data/doc/files/xquery_use_cases/seq/q5_rb.html +88 -0
  45. data/doc/files/xquery_use_cases/sgml/q10_rb.html +88 -0
  46. data/doc/files/xquery_use_cases/sgml/q1_rb.html +88 -0
  47. data/doc/files/xquery_use_cases/sgml/q2_rb.html +88 -0
  48. data/doc/files/xquery_use_cases/sgml/q3_rb.html +88 -0
  49. data/doc/files/xquery_use_cases/sgml/q4_rb.html +88 -0
  50. data/doc/files/xquery_use_cases/sgml/q5_rb.html +88 -0
  51. data/doc/files/xquery_use_cases/sgml/q6_rb.html +88 -0
  52. data/doc/files/xquery_use_cases/sgml/q7_rb.html +88 -0
  53. data/doc/files/xquery_use_cases/sgml/q8a_rb.html +88 -0
  54. data/doc/files/xquery_use_cases/sgml/q8b_rb.html +88 -0
  55. data/doc/files/xquery_use_cases/sgml/q9_rb.html +88 -0
  56. data/doc/files/xquery_use_cases/solution_sizes_rb.html +88 -0
  57. data/doc/files/xquery_use_cases/string/q1_rb.html +88 -0
  58. data/doc/files/xquery_use_cases/string/q2_rb.html +93 -0
  59. data/doc/files/xquery_use_cases/string/q4_rb.html +88 -0
  60. data/doc/files/xquery_use_cases/string/q5_rb.html +88 -0
  61. data/doc/files/xquery_use_cases/test_driver_rb.html +92 -0
  62. data/doc/files/xquery_use_cases/tree/q1_rb.html +111 -0
  63. data/doc/files/xquery_use_cases/tree/q2_rb.html +88 -0
  64. data/doc/files/xquery_use_cases/tree/q3_rb.html +88 -0
  65. data/doc/files/xquery_use_cases/tree/q4_rb.html +88 -0
  66. data/doc/files/xquery_use_cases/tree/q5_rb.html +88 -0
  67. data/doc/files/xquery_use_cases/tree/q6_rb.html +113 -0
  68. data/doc/files/xquery_use_cases/xmp/q10_rb.html +88 -0
  69. data/doc/files/xquery_use_cases/xmp/q11_rb.html +88 -0
  70. data/doc/files/xquery_use_cases/xmp/q12_rb.html +88 -0
  71. data/doc/files/xquery_use_cases/xmp/q1_rb.html +88 -0
  72. data/doc/files/xquery_use_cases/xmp/q2_rb.html +88 -0
  73. data/doc/files/xquery_use_cases/xmp/q3_rb.html +88 -0
  74. data/doc/files/xquery_use_cases/xmp/q4_rb.html +88 -0
  75. data/doc/files/xquery_use_cases/xmp/q5_rb.html +92 -0
  76. data/doc/files/xquery_use_cases/xmp/q6_rb.html +88 -0
  77. data/doc/files/xquery_use_cases/xmp/q7_rb.html +88 -0
  78. data/doc/files/xquery_use_cases/xmp/q8_rb.html +88 -0
  79. data/doc/files/xquery_use_cases/xmp/q9_rb.html +88 -0
  80. data/doc/fr_class_index.html +56 -0
  81. data/doc/fr_file_index.html +110 -0
  82. data/doc/fr_method_index.html +159 -0
  83. data/doc/index.html +26 -0
  84. data/doc/rdoc-style.css +175 -0
  85. data/lib/magic_xml.rb +1400 -0
  86. data/simple_examples/README +14 -0
  87. data/simple_examples/xml_hello_f.rb +32 -0
  88. data/simple_examples/xml_hello_m.rb +32 -0
  89. data/simple_examples/xml_list_f.rb +36 -0
  90. data/simple_examples/xml_list_m.rb +36 -0
  91. data/test/helper.rb +9 -0
  92. data/test/test_magic_xml.rb +855 -0
  93. data/xquery_use_cases/README +17 -0
  94. data/xquery_use_cases/parts/README +12 -0
  95. data/xquery_use_cases/parts/partlist.xml +13 -0
  96. data/xquery_use_cases/parts/q1.out +16 -0
  97. data/xquery_use_cases/parts/q1.rb +38 -0
  98. data/xquery_use_cases/parts/q1.xquery +18 -0
  99. data/xquery_use_cases/rdb/README +50 -0
  100. data/xquery_use_cases/rdb/bids.xml +81 -0
  101. data/xquery_use_cases/rdb/items.xml +57 -0
  102. data/xquery_use_cases/rdb/q1.out +10 -0
  103. data/xquery_use_cases/rdb/q1.rb +31 -0
  104. data/xquery_use_cases/rdb/q1.xquery +14 -0
  105. data/xquery_use_cases/rdb/q10.out +27 -0
  106. data/xquery_use_cases/rdb/q10.rb +37 -0
  107. data/xquery_use_cases/rdb/q10.xquery +15 -0
  108. data/xquery_use_cases/rdb/q11.out +7 -0
  109. data/xquery_use_cases/rdb/q11.rb +38 -0
  110. data/xquery_use_cases/rdb/q11.xquery +15 -0
  111. data/xquery_use_cases/rdb/q12.out +12 -0
  112. data/xquery_use_cases/rdb/q12.rb +42 -0
  113. data/xquery_use_cases/rdb/q12.xquery +28 -0
  114. data/xquery_use_cases/rdb/q13.out +32 -0
  115. data/xquery_use_cases/rdb/q13.rb +45 -0
  116. data/xquery_use_cases/rdb/q13.xquery +15 -0
  117. data/xquery_use_cases/rdb/q14.out +14 -0
  118. data/xquery_use_cases/rdb/q14.rb +42 -0
  119. data/xquery_use_cases/rdb/q14.xquery +14 -0
  120. data/xquery_use_cases/rdb/q15.out +5 -0
  121. data/xquery_use_cases/rdb/q15.rb +31 -0
  122. data/xquery_use_cases/rdb/q15.xquery +9 -0
  123. data/xquery_use_cases/rdb/q16.out +35 -0
  124. data/xquery_use_cases/rdb/q16.rb +35 -0
  125. data/xquery_use_cases/rdb/q16.xquery +17 -0
  126. data/xquery_use_cases/rdb/q17.out +1 -0
  127. data/xquery_use_cases/rdb/q17.rb +35 -0
  128. data/xquery_use_cases/rdb/q17.xquery +11 -0
  129. data/xquery_use_cases/rdb/q18.out +32 -0
  130. data/xquery_use_cases/rdb/q18.rb +40 -0
  131. data/xquery_use_cases/rdb/q18.xquery +19 -0
  132. data/xquery_use_cases/rdb/q2.out +22 -0
  133. data/xquery_use_cases/rdb/q2.rb +36 -0
  134. data/xquery_use_cases/rdb/q2.xquery +14 -0
  135. data/xquery_use_cases/rdb/q3.out +8 -0
  136. data/xquery_use_cases/rdb/q3.rb +34 -0
  137. data/xquery_use_cases/rdb/q3.xquery +16 -0
  138. data/xquery_use_cases/rdb/q4.out +14 -0
  139. data/xquery_use_cases/rdb/q4.rb +31 -0
  140. data/xquery_use_cases/rdb/q4.xquery +11 -0
  141. data/xquery_use_cases/rdb/q5.out +12 -0
  142. data/xquery_use_cases/rdb/q5.rb +46 -0
  143. data/xquery_use_cases/rdb/q5.xquery +25 -0
  144. data/xquery_use_cases/rdb/q6.out +14 -0
  145. data/xquery_use_cases/rdb/q6.rb +38 -0
  146. data/xquery_use_cases/rdb/q6.xquery +15 -0
  147. data/xquery_use_cases/rdb/q7.out +1 -0
  148. data/xquery_use_cases/rdb/q7.rb +30 -0
  149. data/xquery_use_cases/rdb/q7.xquery +10 -0
  150. data/xquery_use_cases/rdb/q8.out +1 -0
  151. data/xquery_use_cases/rdb/q8.rb +23 -0
  152. data/xquery_use_cases/rdb/q8.xquery +8 -0
  153. data/xquery_use_cases/rdb/q9.out +22 -0
  154. data/xquery_use_cases/rdb/q9.rb +32 -0
  155. data/xquery_use_cases/rdb/q9.xquery +16 -0
  156. data/xquery_use_cases/rdb/users.xml +25 -0
  157. data/xquery_use_cases/seq/README +12 -0
  158. data/xquery_use_cases/seq/q1.out +1 -0
  159. data/xquery_use_cases/seq/q1.rb +25 -0
  160. data/xquery_use_cases/seq/q1.xquery +2 -0
  161. data/xquery_use_cases/seq/q2.out +2 -0
  162. data/xquery_use_cases/seq/q2.rb +25 -0
  163. data/xquery_use_cases/seq/q2.xquery +2 -0
  164. data/xquery_use_cases/seq/q3.out +2 -0
  165. data/xquery_use_cases/seq/q3.rb +26 -0
  166. data/xquery_use_cases/seq/q3.xquery +3 -0
  167. data/xquery_use_cases/seq/q4.out +0 -0
  168. data/xquery_use_cases/seq/q4.rb +27 -0
  169. data/xquery_use_cases/seq/q4.xquery +4 -0
  170. data/xquery_use_cases/seq/q5.out +5 -0
  171. data/xquery_use_cases/seq/q5.rb +29 -0
  172. data/xquery_use_cases/seq/q5.xquery +10 -0
  173. data/xquery_use_cases/seq/report1.xml +40 -0
  174. data/xquery_use_cases/sgml/README +53 -0
  175. data/xquery_use_cases/sgml/q1.out +44 -0
  176. data/xquery_use_cases/sgml/q1.rb +23 -0
  177. data/xquery_use_cases/sgml/q1.xquery +5 -0
  178. data/xquery_use_cases/sgml/q10.out +1 -0
  179. data/xquery_use_cases/sgml/q10.rb +28 -0
  180. data/xquery_use_cases/sgml/q10.xquery +7 -0
  181. data/xquery_use_cases/sgml/q2.out +26 -0
  182. data/xquery_use_cases/sgml/q2.rb +23 -0
  183. data/xquery_use_cases/sgml/q2.xquery +5 -0
  184. data/xquery_use_cases/sgml/q3.out +6 -0
  185. data/xquery_use_cases/sgml/q3.rb +28 -0
  186. data/xquery_use_cases/sgml/q3.xquery +7 -0
  187. data/xquery_use_cases/sgml/q4.out +4 -0
  188. data/xquery_use_cases/sgml/q4.rb +25 -0
  189. data/xquery_use_cases/sgml/q4.xquery +5 -0
  190. data/xquery_use_cases/sgml/q5.out +3 -0
  191. data/xquery_use_cases/sgml/q5.rb +23 -0
  192. data/xquery_use_cases/sgml/q5.xquery +5 -0
  193. data/xquery_use_cases/sgml/q6.out +1 -0
  194. data/xquery_use_cases/sgml/q6.rb +27 -0
  195. data/xquery_use_cases/sgml/q6.xquery +6 -0
  196. data/xquery_use_cases/sgml/q7.out +1 -0
  197. data/xquery_use_cases/sgml/q7.rb +27 -0
  198. data/xquery_use_cases/sgml/q7.xquery +7 -0
  199. data/xquery_use_cases/sgml/q8a.out +34 -0
  200. data/xquery_use_cases/sgml/q8a.rb +27 -0
  201. data/xquery_use_cases/sgml/q8a.xquery +5 -0
  202. data/xquery_use_cases/sgml/q8b.out +26 -0
  203. data/xquery_use_cases/sgml/q8b.rb +32 -0
  204. data/xquery_use_cases/sgml/q8b.xquery +5 -0
  205. data/xquery_use_cases/sgml/q9.out +9 -0
  206. data/xquery_use_cases/sgml/q9.rb +29 -0
  207. data/xquery_use_cases/sgml/q9.xquery +6 -0
  208. data/xquery_use_cases/sgml/sgml.xml +101 -0
  209. data/xquery_use_cases/solution_sizes.rb +48 -0
  210. data/xquery_use_cases/string/README +29 -0
  211. data/xquery_use_cases/string/company-data.xml +20 -0
  212. data/xquery_use_cases/string/q1.out +4 -0
  213. data/xquery_use_cases/string/q1.rb +25 -0
  214. data/xquery_use_cases/string/q1.xquery +1 -0
  215. data/xquery_use_cases/string/q2.out +13 -0
  216. data/xquery_use_cases/string/q2.rb +32 -0
  217. data/xquery_use_cases/string/q2.xquery +23 -0
  218. data/xquery_use_cases/string/q4.out +50 -0
  219. data/xquery_use_cases/string/q4.rb +34 -0
  220. data/xquery_use_cases/string/q4.xquery +14 -0
  221. data/xquery_use_cases/string/q5.out +12 -0
  222. data/xquery_use_cases/string/q5.rb +33 -0
  223. data/xquery_use_cases/string/q5.xquery +8 -0
  224. data/xquery_use_cases/string/string.xml +82 -0
  225. data/xquery_use_cases/test_driver.rb +60 -0
  226. data/xquery_use_cases/tree/README +23 -0
  227. data/xquery_use_cases/tree/book.xml +50 -0
  228. data/xquery_use_cases/tree/q1.out +23 -0
  229. data/xquery_use_cases/tree/q1.rb +31 -0
  230. data/xquery_use_cases/tree/q1.xquery +14 -0
  231. data/xquery_use_cases/tree/q2.out +11 -0
  232. data/xquery_use_cases/tree/q2.rb +27 -0
  233. data/xquery_use_cases/tree/q2.xquery +10 -0
  234. data/xquery_use_cases/tree/q3.out +2 -0
  235. data/xquery_use_cases/tree/q3.rb +26 -0
  236. data/xquery_use_cases/tree/q3.xquery +2 -0
  237. data/xquery_use_cases/tree/q4.out +1 -0
  238. data/xquery_use_cases/tree/q4.rb +23 -0
  239. data/xquery_use_cases/tree/q4.xquery +5 -0
  240. data/xquery_use_cases/tree/q5.out +9 -0
  241. data/xquery_use_cases/tree/q5.rb +30 -0
  242. data/xquery_use_cases/tree/q5.xquery +8 -0
  243. data/xquery_use_cases/tree/q6.out +30 -0
  244. data/xquery_use_cases/tree/q6.rb +35 -0
  245. data/xquery_use_cases/tree/q6.xquery +21 -0
  246. data/xquery_use_cases/xmp/README +41 -0
  247. data/xquery_use_cases/xmp/bib.xml +35 -0
  248. data/xquery_use_cases/xmp/books.xml +15 -0
  249. data/xquery_use_cases/xmp/prices.xml +32 -0
  250. data/xquery_use_cases/xmp/q1.out +8 -0
  251. data/xquery_use_cases/xmp/q1.rb +29 -0
  252. data/xquery_use_cases/xmp/q1.xquery +10 -0
  253. data/xquery_use_cases/xmp/q10.out +11 -0
  254. data/xquery_use_cases/xmp/q10.rb +36 -0
  255. data/xquery_use_cases/xmp/q10.xquery +11 -0
  256. data/xquery_use_cases/xmp/q11.out +35 -0
  257. data/xquery_use_cases/xmp/q11.rb +37 -0
  258. data/xquery_use_cases/xmp/q11.xquery +18 -0
  259. data/xquery_use_cases/xmp/q12.out +6 -0
  260. data/xquery_use_cases/xmp/q12.rb +35 -0
  261. data/xquery_use_cases/xmp/q12.xquery +20 -0
  262. data/xquery_use_cases/xmp/q2.out +37 -0
  263. data/xquery_use_cases/xmp/q2.rb +30 -0
  264. data/xquery_use_cases/xmp/q2.xquery +12 -0
  265. data/xquery_use_cases/xmp/q3.out +34 -0
  266. data/xquery_use_cases/xmp/q3.rb +27 -0
  267. data/xquery_use_cases/xmp/q3.xquery +10 -0
  268. data/xquery_use_cases/xmp/q4.out +31 -0
  269. data/xquery_use_cases/xmp/q4.rb +44 -0
  270. data/xquery_use_cases/xmp/q4.xquery +21 -0
  271. data/xquery_use_cases/xmp/q5.out +17 -0
  272. data/xquery_use_cases/xmp/q5.rb +38 -0
  273. data/xquery_use_cases/xmp/q5.xquery +13 -0
  274. data/xquery_use_cases/xmp/q6.out +28 -0
  275. data/xquery_use_cases/xmp/q6.rb +33 -0
  276. data/xquery_use_cases/xmp/q6.xquery +19 -0
  277. data/xquery_use_cases/xmp/q7.out +8 -0
  278. data/xquery_use_cases/xmp/q7.rb +30 -0
  279. data/xquery_use_cases/xmp/q7.xquery +12 -0
  280. data/xquery_use_cases/xmp/q8.out +7 -0
  281. data/xquery_use_cases/xmp/q8.rb +29 -0
  282. data/xquery_use_cases/xmp/q8.xquery +9 -0
  283. data/xquery_use_cases/xmp/q9.out +4 -0
  284. data/xquery_use_cases/xmp/q9.rb +29 -0
  285. data/xquery_use_cases/xmp/q9.xquery +7 -0
  286. data/xquery_use_cases/xmp/reviews.xml +24 -0
  287. metadata +342 -0
data/lib/magic_xml.rb ADDED
@@ -0,0 +1,1400 @@
1
+ #Copyright (c) 2006-2007 Tomasz Wegrzanowski <Tomasz.Wegrzanowski@gmail.com>
2
+ #
3
+ #Permission is hereby granted, free of charge, to any person obtaining a
4
+ #copy of this software and associated documentation files (the "Software"),
5
+ #to deal in the Software without restriction, including without limitation
6
+ #the rights to use, copy, modify, merge, publish, distribute, sublicense,
7
+ #and/or sell copies of the Software, and to permit persons to whom the
8
+ #Software is furnished to do so, subject to the following conditions:
9
+ #
10
+ #The above copyright notice and this permission notice shall be included in
11
+ #all copies or substantial portions of the Software.
12
+ #
13
+ #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16
+ #THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
17
+ #OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18
+ #ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19
+ #DEALINGS IN THE SOFTWARE.
20
+
21
+ # Needed for parsing
22
+
23
+ require 'rexml/parsers/baseparser'
24
+ # Needed for fetching XMLs from the Internet
25
+ require 'uri'
26
+ require 'net/http'
27
+
28
+ # FIXME: Make comment formatting RDoc-friendly. It's not always so now.
29
+
30
+ # In Ruby 2 Symbol will be a subclass of String, and
31
+ # this won't be needed any more. Before then...
32
+ class Symbol
33
+ include Comparable
34
+ def <=>(other)
35
+ raise ArgumentError.new("comparison of #{self.class} with #{other.class} failed") unless other.is_a? Symbol
36
+ to_s <=> other.to_s
37
+ end
38
+
39
+ alias_method :eqeqeq_before_magic_xml, :===
40
+ def ===(*args, &blk)
41
+ if args.size >= 1 and args[0].is_a? XML
42
+ self == args[0].name
43
+ else
44
+ eqeqeq_before_magic_xml(*args, &blk)
45
+ end
46
+ end
47
+ end
48
+
49
+ class Hash
50
+ alias_method :eqeqeq_before_magic_xml, :===
51
+ def ===(*args, &blk)
52
+ if args.size >= 1 and args[0].is_a? XML
53
+ all?{|k,v| v === args[0][k]}
54
+ else
55
+ eqeqeq_before_magic_xml(*args, &blk)
56
+ end
57
+ end
58
+ end
59
+
60
+ class String
61
+ # Escape string for output as XML text (< > &)
62
+ def xml_escape
63
+ replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;" }
64
+ gsub(/([<>&])/) { replacements[$1] }
65
+ end
66
+ # Escape characters for output as XML attribute values (< > & ' ")
67
+ def xml_attr_escape
68
+ replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;", "\"" => "&quot;", "'" => "&apos;"}
69
+ gsub(/([<>&\'\"])/) { replacements[$1] }
70
+ end
71
+ # Unescape entities
72
+ # Supports:
73
+ # * Full set of HTML-compatible named entities
74
+ # * Decimal entities &#1234;
75
+ # * Hex entities &#xA0b1;
76
+ def xml_unescape(extra_entities=nil)
77
+ @@xhtml_entity_replacements ||= {
78
+ 'nbsp' => 160,
79
+ 'iexcl' => 161,
80
+ 'cent' => 162,
81
+ 'pound' => 163,
82
+ 'curren' => 164,
83
+ 'yen' => 165,
84
+ 'brvbar' => 166,
85
+ 'sect' => 167,
86
+ 'uml' => 168,
87
+ 'copy' => 169,
88
+ 'ordf' => 170,
89
+ 'laquo' => 171,
90
+ 'not' => 172,
91
+ 'shy' => 173,
92
+ 'reg' => 174,
93
+ 'macr' => 175,
94
+ 'deg' => 176,
95
+ 'plusmn' => 177,
96
+ 'sup2' => 178,
97
+ 'sup3' => 179,
98
+ 'acute' => 180,
99
+ 'micro' => 181,
100
+ 'para' => 182,
101
+ 'middot' => 183,
102
+ 'cedil' => 184,
103
+ 'sup1' => 185,
104
+ 'ordm' => 186,
105
+ 'raquo' => 187,
106
+ 'frac14' => 188,
107
+ 'frac12' => 189,
108
+ 'frac34' => 190,
109
+ 'iquest' => 191,
110
+ 'Agrave' => 192,
111
+ 'Aacute' => 193,
112
+ 'Acirc' => 194,
113
+ 'Atilde' => 195,
114
+ 'Auml' => 196,
115
+ 'Aring' => 197,
116
+ 'AElig' => 198,
117
+ 'Ccedil' => 199,
118
+ 'Egrave' => 200,
119
+ 'Eacute' => 201,
120
+ 'Ecirc' => 202,
121
+ 'Euml' => 203,
122
+ 'Igrave' => 204,
123
+ 'Iacute' => 205,
124
+ 'Icirc' => 206,
125
+ 'Iuml' => 207,
126
+ 'ETH' => 208,
127
+ 'Ntilde' => 209,
128
+ 'Ograve' => 210,
129
+ 'Oacute' => 211,
130
+ 'Ocirc' => 212,
131
+ 'Otilde' => 213,
132
+ 'Ouml' => 214,
133
+ 'times' => 215,
134
+ 'Oslash' => 216,
135
+ 'Ugrave' => 217,
136
+ 'Uacute' => 218,
137
+ 'Ucirc' => 219,
138
+ 'Uuml' => 220,
139
+ 'Yacute' => 221,
140
+ 'THORN' => 222,
141
+ 'szlig' => 223,
142
+ 'agrave' => 224,
143
+ 'aacute' => 225,
144
+ 'acirc' => 226,
145
+ 'atilde' => 227,
146
+ 'auml' => 228,
147
+ 'aring' => 229,
148
+ 'aelig' => 230,
149
+ 'ccedil' => 231,
150
+ 'egrave' => 232,
151
+ 'eacute' => 233,
152
+ 'ecirc' => 234,
153
+ 'euml' => 235,
154
+ 'igrave' => 236,
155
+ 'iacute' => 237,
156
+ 'icirc' => 238,
157
+ 'iuml' => 239,
158
+ 'eth' => 240,
159
+ 'ntilde' => 241,
160
+ 'ograve' => 242,
161
+ 'oacute' => 243,
162
+ 'ocirc' => 244,
163
+ 'otilde' => 245,
164
+ 'ouml' => 246,
165
+ 'divide' => 247,
166
+ 'oslash' => 248,
167
+ 'ugrave' => 249,
168
+ 'uacute' => 250,
169
+ 'ucirc' => 251,
170
+ 'uuml' => 252,
171
+ 'yacute' => 253,
172
+ 'thorn' => 254,
173
+ 'yuml' => 255,
174
+ 'quot' => 34,
175
+ 'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
176
+ 'amp' => 38,
177
+ 'lt' => 60,
178
+ 'gt' => 62,
179
+ 'OElig' => 338,
180
+ 'oelig' => 339,
181
+ 'Scaron' => 352,
182
+ 'scaron' => 353,
183
+ 'Yuml' => 376,
184
+ 'circ' => 710,
185
+ 'tilde' => 732,
186
+ 'ensp' => 8194,
187
+ 'emsp' => 8195,
188
+ 'thinsp' => 8201,
189
+ 'zwnj' => 8204,
190
+ 'zwj' => 8205,
191
+ 'lrm' => 8206,
192
+ 'rlm' => 8207,
193
+ 'ndash' => 8211,
194
+ 'mdash' => 8212,
195
+ 'lsquo' => 8216,
196
+ 'rsquo' => 8217,
197
+ 'sbquo' => 8218,
198
+ 'ldquo' => 8220,
199
+ 'rdquo' => 8221,
200
+ 'bdquo' => 8222,
201
+ 'dagger' => 8224,
202
+ 'Dagger' => 8225,
203
+ 'permil' => 8240,
204
+ 'lsaquo' => 8249,
205
+ 'rsaquo' => 8250,
206
+ 'euro' => 8364,
207
+ 'fnof' => 402,
208
+ 'Alpha' => 913,
209
+ 'Beta' => 914,
210
+ 'Gamma' => 915,
211
+ 'Delta' => 916,
212
+ 'Epsilon' => 917,
213
+ 'Zeta' => 918,
214
+ 'Eta' => 919,
215
+ 'Theta' => 920,
216
+ 'Iota' => 921,
217
+ 'Kappa' => 922,
218
+ 'Lambda' => 923,
219
+ 'Mu' => 924,
220
+ 'Nu' => 925,
221
+ 'Xi' => 926,
222
+ 'Omicron' => 927,
223
+ 'Pi' => 928,
224
+ 'Rho' => 929,
225
+ 'Sigma' => 931,
226
+ 'Tau' => 932,
227
+ 'Upsilon' => 933,
228
+ 'Phi' => 934,
229
+ 'Chi' => 935,
230
+ 'Psi' => 936,
231
+ 'Omega' => 937,
232
+ 'alpha' => 945,
233
+ 'beta' => 946,
234
+ 'gamma' => 947,
235
+ 'delta' => 948,
236
+ 'epsilon' => 949,
237
+ 'zeta' => 950,
238
+ 'eta' => 951,
239
+ 'theta' => 952,
240
+ 'iota' => 953,
241
+ 'kappa' => 954,
242
+ 'lambda' => 955,
243
+ 'mu' => 956,
244
+ 'nu' => 957,
245
+ 'xi' => 958,
246
+ 'omicron' => 959,
247
+ 'pi' => 960,
248
+ 'rho' => 961,
249
+ 'sigmaf' => 962,
250
+ 'sigma' => 963,
251
+ 'tau' => 964,
252
+ 'upsilon' => 965,
253
+ 'phi' => 966,
254
+ 'chi' => 967,
255
+ 'psi' => 968,
256
+ 'omega' => 969,
257
+ 'thetasym' => 977,
258
+ 'upsih' => 978,
259
+ 'piv' => 982,
260
+ 'bull' => 8226,
261
+ 'hellip' => 8230,
262
+ 'prime' => 8242,
263
+ 'Prime' => 8243,
264
+ 'oline' => 8254,
265
+ 'frasl' => 8260,
266
+ 'weierp' => 8472,
267
+ 'image' => 8465,
268
+ 'real' => 8476,
269
+ 'trade' => 8482,
270
+ 'alefsym' => 8501,
271
+ 'larr' => 8592,
272
+ 'uarr' => 8593,
273
+ 'rarr' => 8594,
274
+ 'darr' => 8595,
275
+ 'harr' => 8596,
276
+ 'crarr' => 8629,
277
+ 'lArr' => 8656,
278
+ 'uArr' => 8657,
279
+ 'rArr' => 8658,
280
+ 'dArr' => 8659,
281
+ 'hArr' => 8660,
282
+ 'forall' => 8704,
283
+ 'part' => 8706,
284
+ 'exist' => 8707,
285
+ 'empty' => 8709,
286
+ 'nabla' => 8711,
287
+ 'isin' => 8712,
288
+ 'notin' => 8713,
289
+ 'ni' => 8715,
290
+ 'prod' => 8719,
291
+ 'sum' => 8721,
292
+ 'minus' => 8722,
293
+ 'lowast' => 8727,
294
+ 'radic' => 8730,
295
+ 'prop' => 8733,
296
+ 'infin' => 8734,
297
+ 'ang' => 8736,
298
+ 'and' => 8743,
299
+ 'or' => 8744,
300
+ 'cap' => 8745,
301
+ 'cup' => 8746,
302
+ 'int' => 8747,
303
+ 'there4' => 8756,
304
+ 'sim' => 8764,
305
+ 'cong' => 8773,
306
+ 'asymp' => 8776,
307
+ 'ne' => 8800,
308
+ 'equiv' => 8801,
309
+ 'le' => 8804,
310
+ 'ge' => 8805,
311
+ 'sub' => 8834,
312
+ 'sup' => 8835,
313
+ 'nsub' => 8836,
314
+ 'sube' => 8838,
315
+ 'supe' => 8839,
316
+ 'oplus' => 8853,
317
+ 'otimes' => 8855,
318
+ 'perp' => 8869,
319
+ 'sdot' => 8901,
320
+ 'lceil' => 8968,
321
+ 'rceil' => 8969,
322
+ 'lfloor' => 8970,
323
+ 'rfloor' => 8971,
324
+ 'lang' => 9001,
325
+ 'rang' => 9002,
326
+ 'loz' => 9674,
327
+ 'spades' => 9824,
328
+ 'clubs' => 9827,
329
+ 'hearts' => 9829,
330
+ 'diams' => 9830,
331
+ }
332
+ gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
333
+ if $1 then
334
+ v = @@xhtml_entity_replacements[$1]
335
+ # Nonstandard entity
336
+ unless v
337
+ if extra_entities.is_a? Proc
338
+ v = extra_entities.call($1)
339
+ # Well, we expect a Hash here, but any container will do.
340
+ # As long as it's not a nil.
341
+ elsif extra_entities
342
+ v = extra_entities[$1]
343
+ end
344
+ end
345
+ raise "Unknown escape #{$1}" unless v
346
+ elsif $2
347
+ v = $2.to_i
348
+ else
349
+ v = $3.hex
350
+ end
351
+ # v can be a String or an Integer
352
+ if v.is_a? String then v else [v].pack('U') end
353
+ }
354
+ end
355
+ def xml_parse
356
+ XML.parse(self)
357
+ end
358
+ end
359
+
360
+ class File
361
+ def xml_parse
362
+ XML.parse(self)
363
+ end
364
+ end
365
+
366
+ class Array
367
+ # children of any element
368
+ def children(*args, &blk)
369
+ res = []
370
+ each{|c|
371
+ res += c.children(*args, &blk) if c.is_a? XML
372
+ }
373
+ res
374
+ end
375
+ # descendants of any element
376
+ def descendants(*args, &blk)
377
+ res = []
378
+ each{|c|
379
+ res += c.descendants(*args, &blk) if c.is_a? XML
380
+ }
381
+ res
382
+ end
383
+ end
384
+
385
+ # Methods of Enumerable.
386
+ # It is not easy to design good methods, because XML
387
+ # is not really "a container", it just acts as one sometimes.
388
+ # Generally:
389
+ # * Methods that return nil should work
390
+ # * Methods that return an element should work
391
+ # * Methods that return a container should return XML container, not Array
392
+ # * Conversion methods should convert
393
+ #
394
+ # FIXME: Many methods use .dup, but do we want a shallow or a deep copy ?
395
+ class XML
396
+ include Enumerable
397
+ # Default any? is ok
398
+ # Default all? is ok
399
+
400
+ # Iterate over children, possibly with a selector
401
+ def each(*selector, &blk)
402
+ children(*selector, &blk)
403
+ self
404
+ end
405
+
406
+ # Sort XML children of XML element.
407
+ def sort_by(*args, &blk)
408
+ self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
409
+ end
410
+
411
+ # Sort children of XML element.
412
+ def children_sort_by(*args, &blk)
413
+ self.dup{ @contents = @contents.sort_by(*args, &blk) }
414
+ end
415
+
416
+ # Sort children of XML element.
417
+ #
418
+ # Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
419
+ # Use sort_by instead.
420
+ #
421
+ # Unless you define your own XML#<=> operator, or do something equally weird.
422
+ def sort(*args, &blk)
423
+ self.dup{ @contents = @contents.sort(*args, &blk) }
424
+ end
425
+
426
+ #collect/map
427
+ #detect/find
428
+ #each_cons
429
+ #each_slice
430
+ #each_with_index
431
+ #to_a
432
+ #entries
433
+ #enum_cons
434
+ #enum_slice
435
+ #enum
436
+ # grep
437
+ # include?/member?
438
+ # inject
439
+ # max/min
440
+ # max_by/min_by - Ruby 1.9
441
+ # partition
442
+ # reject
443
+ # sort
444
+ # sort_by
445
+ # to_set
446
+ # zip
447
+ # And Enumerable::Enumerator-generating methods
448
+ end
449
+
450
+ # Class methods
451
+ class XML
452
+ # XML.foo! == xml!(:foo)
453
+ # XML.foo == xml(:foo)
454
+ def self.method_missing(meth, *args, &blk)
455
+ if meth.to_s =~ /^(.*)!$/
456
+ xml!($1.to_sym, *args, &blk)
457
+ else
458
+ XML.new(meth, *args, &blk)
459
+ end
460
+ end
461
+
462
+ # Read file and parse
463
+ def self.from_file(file)
464
+ file = File.open(file) if file.is_a? String
465
+ parse(file)
466
+ end
467
+
468
+ # Fetch URL and parse
469
+ # Supported:
470
+ # http://.../
471
+ # https://.../
472
+ # file:foo.xml
473
+ # string:<foo/>
474
+ def self.from_url(url)
475
+ if url =~ /^string:(.*)$/m
476
+ parse($1)
477
+ elsif url =~ /^file:(.*)$/m
478
+ from_file($1)
479
+ elsif url =~ /^http(s?):/
480
+ ssl = ($1 == "s")
481
+ # No, seriously - Ruby needs something better than net/http
482
+ # Something that groks basic auth and queries and redirects automatically:
483
+ # HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
484
+ # URI parsing must go inside the library, client programs
485
+ # should have nothing to do with it
486
+
487
+ # net/http is really inconvenient to use here
488
+ u = URI.parse(url)
489
+ # You're not seeing this:
490
+ if u.query then
491
+ path = u.path + "?" + u.query
492
+ else
493
+ path = u.path
494
+ end
495
+ req = Net::HTTP::Get.new(path)
496
+ if u.userinfo
497
+ username, passwd = u.userinfo.split(/:/,2)
498
+ req.basic_auth username, passwd
499
+ end
500
+ if ssl
501
+ # NOTE: You need libopenssl-ruby installed
502
+ # if you want to use HTTPS. Ubuntu is broken
503
+ # as it doesn't provide it in the default packages.
504
+ require 'net/https'
505
+ http = Net::HTTP.new(u.host, u.port)
506
+ http.use_ssl = true
507
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
508
+ else
509
+ http = Net::HTTP.new(u.host, u.port)
510
+ end
511
+
512
+ res = http.start {|http| http.request(req) }
513
+ # TODO: Throw a more meaningful exception
514
+ parse(res.body)
515
+ else
516
+ raise "URL protocol #{url} not supported (http, https, file, string are supported)"
517
+ end
518
+ end
519
+
520
+ # Like CDuce load_xml
521
+ # The path can be:
522
+ # * file handler
523
+ # * URL (a string with :)
524
+ # * file name (a string without :)
525
+ def self.load(obj)
526
+ if obj.is_a? String
527
+ if obj.include? ":"
528
+ from_url(obj)
529
+ else
530
+ from_file(obj)
531
+ end
532
+ else
533
+ parse(obj)
534
+ end
535
+ end
536
+
537
+ # Parse XML in mixed stream/tree mode
538
+ # Basically the idea is that every time we get start element,
539
+ # we ask the block what to do about it.
540
+ # If it wants a tree below it, it should call e.tree
541
+ # If a tree was requested, elements below the current one
542
+ # are *not* processed. If it wasn't, they are.
543
+ #
544
+ # For example:
545
+ # <foo><bar/></foo><foo2/>
546
+ # yield <foo> ... </foo>
547
+ # .complete! called
548
+ # process <foo2> next
549
+ #
550
+ # But:
551
+ # <foo><bar/></foo><foo2/>
552
+ # yield <foo> ... </foo>
553
+ # .complete! not called
554
+ # process <bar> next
555
+ #
556
+ # FIXME: yielded values are not reusable for now
557
+ # FIXME: make more object-oriented
558
+ def self.parse_as_twigs(stream)
559
+ parser = REXML::Parsers::BaseParser.new stream
560
+ # We don't really need to keep the stack ;-)
561
+ stack = []
562
+ while true
563
+ event = parser.pull
564
+ case event[0]
565
+ when :start_element
566
+ # Now the evil part evil
567
+ attrs = {}
568
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
569
+ node = XML.new(event[1].to_sym, attrs, *event[3..-1])
570
+
571
+ # I can't say it's superelegant
572
+ class <<node
573
+ attr_accessor :do_complete
574
+ def complete!
575
+ if @do_complete
576
+ @do_complete.call
577
+ @do_complete = nil
578
+ end
579
+ end
580
+ end
581
+ node.do_complete = proc{
582
+ parse_subtree(node, parser)
583
+ }
584
+
585
+ yield(node)
586
+ if node.do_complete
587
+ stack.push node
588
+ node.do_complete = nil # It's too late, complete! shouldn't do anything now
589
+ end
590
+ when :end_element
591
+ stack.pop
592
+ when :end_document
593
+ return
594
+ else
595
+ # FIXME: Do the right thing.
596
+ # For now, ignore *everything* else
597
+ # This is totally incorrect, user might want to
598
+ # see text, comments and stuff like that anyway
599
+ end
600
+ end
601
+ end
602
+
603
+ # Basically it's a copy of self.parse, ugly ...
604
+ def self.parse_subtree(start_node, parser)
605
+ stack = [start_node]
606
+ res = nil
607
+ while true
608
+ event = parser.pull
609
+ case event[0]
610
+ when :start_element
611
+ attrs = {}
612
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
613
+ stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
614
+ if stack.size == 1
615
+ res = stack[0]
616
+ else
617
+ stack[-2] << stack[-1]
618
+ end
619
+ when :end_element
620
+ stack.pop
621
+ return if stack == []
622
+ # Needs unescaping
623
+ when :text
624
+ # Ignore whitespace
625
+ if stack.size == 0
626
+ next if event[1] !~ /\S/
627
+ raise "Non-whitespace text out of document root"
628
+ end
629
+ stack[-1] << event[1].xml_unescape
630
+ # CDATA is already unescaped
631
+ when :cdata
632
+ if stack.size == 0
633
+ raise "CDATA out of the document root"
634
+ end
635
+ stack[-1] << event[1]
636
+ when :end_document
637
+ raise "Parse error: end_document inside a subtree, tags are not balanced"
638
+ when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
639
+ # Positivery ignore
640
+ when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
641
+ # Ignore ???
642
+ #print "Ignored XML event #{event[0]} when parsing\n"
643
+ else
644
+ # Huh ? What's that ?
645
+ #print "Unknown XML event #{event[0]} when parsing\n"
646
+ end
647
+ end
648
+ res
649
+
650
+ end
651
+
652
+ # Parse XML using REXML. Available options:
653
+ # * :extra_entities => Proc or Hash (default = nil)
654
+ # * :remove_pretty_printing => true/false (default = false)
655
+ # * :comments => true/false (default = false)
656
+ # * :pi => true/false (default = false)
657
+ # * :normalize => true/false (default = false) - normalize
658
+ # * :multiple_roots => true/false (default=false) - document
659
+ # can have any number of roots (instread of one).
660
+ # Return all in an array instead of root/nil.
661
+ # Also include non-elements (String/PI/Comment) in the return set !!!
662
+ #
663
+ # FIXME: :comments/:pi will break everything
664
+ # if there are comments/PIs outside document root.
665
+ # Now PIs are outside the document root more often than not,
666
+ # so we're pretty much screwed here.
667
+ #
668
+ # FIXME: Integrate all kinds of parse, and make them support extra options
669
+ #
670
+ # FIXME: Benchmark normalize!
671
+ #
672
+ # FIXME: Benchmark dup-based Enumerable methods
673
+ #
674
+ # FIXME: Make it possible to include bogus XML_Document superparent,
675
+ # and to make it support out-of-root PIs/Comments
676
+ def self.parse(stream, options={})
677
+ extra_entities = options[:extra_entities]
678
+
679
+ parser = REXML::Parsers::BaseParser.new stream
680
+ stack = [[]]
681
+
682
+ while true
683
+ event = parser.pull
684
+ case event[0]
685
+ when :start_element
686
+ attrs = {}
687
+ event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
688
+ stack << XML.new(event[1].to_sym, attrs, event[3..-1])
689
+ stack[-2] << stack[-1]
690
+ when :end_element
691
+ stack.pop
692
+ # Needs unescaping
693
+ when :text
694
+ e = event[1].xml_unescape(extra_entities)
695
+ # Either inside root or in multi-root mode
696
+ if stack.size > 1 or options[:multiple_roots]
697
+ stack[-1] << e
698
+ elsif event[1] !~ /\S/
699
+ # Ignore out-of-root whitespace in single-root mode
700
+ else
701
+ raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
702
+ end
703
+ # CDATA is already unescaped
704
+ when :cdata
705
+ e = event[1]
706
+ if stack.size > 1 or options[:multiple_roots]
707
+ stack[-1] << e
708
+ else
709
+ raise "CDATA out of the document root"
710
+ end
711
+ when :comment
712
+ next unless options[:comments]
713
+ e = XML_Comment.new(event[1])
714
+ if stack.size > 1 or options[:multiple_roots]
715
+ stack[-1] << e
716
+ else
717
+ # FIXME: Ugly !
718
+ raise "Comments out of the document root"
719
+ end
720
+ when :processing_instruction
721
+ # FIXME: Real PI node
722
+ next unless options[:pi]
723
+ e = XML_PI.new(event[1], event[2])
724
+ if stack.size > 1 or options[:multiple_roots]
725
+ stack[-1] << e
726
+ else
727
+ # FIXME: Ugly !
728
+ raise "Processing instruction out of the document root"
729
+ end
730
+ when :end_document
731
+ break
732
+ when :xmldecl,:start_doctype,:end_doctype,:elementdecl
733
+ # Positivery ignore
734
+ when :externalentity,:entity,:attlistdecl,:notationdecl
735
+ # Ignore ???
736
+ #print "Ignored XML event #{event[0]} when parsing\n"
737
+ else
738
+ # Huh ? What's that ?
739
+ #print "Unknown XML event #{event[0]} when parsing\n"
740
+ end
741
+ end
742
+ roots = stack[0]
743
+
744
+ roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
745
+ # :remove_pretty_printing does :normalize anyway
746
+ roots.each{|root| root.normalize!} if options[:normalize]
747
+ if options[:multiple_roots]
748
+ roots
749
+ else
750
+ roots[0]
751
+ end
752
+ end
753
+
754
+ # Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
755
+ def self.parse_sequence(stream, options={})
756
+ o = options.dup
757
+ o[:multiple_roots] = true
758
+ parse(stream, o)
759
+ end
760
+
761
+ # Renormalize a string containing XML document
762
+ def self.renormalize(stream)
763
+ parse(stream).to_s
764
+ end
765
+
766
+ # Renormalize a string containing a sequence of XML documents
767
+ # and strings
768
+ # XMLrenormalize_sequence("<hello />, <world></world>!") =>
769
+ # "<hello/>, <world/>!"
770
+ def self.renormalize_sequence(stream)
771
+ parse_sequence(stream).to_s
772
+ end
773
+ end
774
+
775
+ # Instance methods (other than those of Enumerable)
776
+ class XML
777
+ attr_accessor :name, :attrs, :contents
778
+
779
+ # initialize can be run in many ways
780
+ # * XML.new
781
+ # * XML.new(:tag_symbol)
782
+ # * XML.new(:tag_symbol, {attributes})
783
+ # * XML.new(:tag_symbol, "children", "more", XML.new(...))
784
+ # * XML.new(:tag_symbol, {attributes}, "and", "children")
785
+ # * XML.new(:tag_symbol) { monadic code }
786
+ # * XML.new(:tag_symbol, {attributes}) { monadic code }
787
+ #
788
+ # Or even:
789
+ # * XML.new(:tag_symbol, "children") { and some monadic code }
790
+ # * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
791
+ # But typically you won't be mixing these two style
792
+ #
793
+ # Attribute values can will be converted to strings
794
+ def initialize(*args, &blk)
795
+ @name = nil
796
+ @attrs = {}
797
+ @contents = []
798
+ @name = args.shift if args.size != 0
799
+ if args.size != 0 and args[0].is_a? Hash
800
+ args.shift.each{|k,v|
801
+ # Do automatic conversion here
802
+ # This also assures that the hashes are *not* shared
803
+ self[k] = v
804
+ }
805
+ end
806
+ # Expand Arrays passed as arguments
807
+ self << args
808
+ # FIXME: We'd rather not have people say @name = :foo there :-)
809
+ if blk
810
+ instance_eval(&blk)
811
+ end
812
+ end
813
+
814
+ # Convert to a well-formatted XML
815
+ def to_s
816
+ "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
817
+ if @contents.size == 0
818
+ "/>"
819
+ else
820
+ ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
821
+ end
822
+ end
823
+
824
+ # Convert to a well-formatted XML, but without children information.
825
+ # This is a reasonable format for irb and debugging.
826
+ # If you want to see a few levels of children, call inspect(2) and so on
827
+ def inspect(include_children=0)
828
+ "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
829
+ if @contents.size == 0
830
+ "/>"
831
+ elsif include_children == 0
832
+ ">...</#{name}>"
833
+ else
834
+ ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
835
+ end
836
+ end
837
+
838
+ # Read attributes.
839
+ # Also works with pseudoattributes:
840
+ # img[:@x] == img.child(:x).text # or nil if there isn't any.
841
+ def [](key)
842
+ if key.to_s[0] == ?@
843
+ tag = key.to_s[1..-1].to_sym
844
+ c = child(tag)
845
+ if c
846
+ c.text
847
+ else
848
+ nil
849
+ end
850
+ else
851
+ @attrs[key]
852
+ end
853
+ end
854
+
855
+ # Set attributes.
856
+ # Value is automatically converted to String, so you can say:
857
+ # img[:x] = 200
858
+ # Also works with pseudoattributes:
859
+ # foo[:@bar] = "x"
860
+ def []=(key, value)
861
+ if key.to_s[0] == ?@
862
+ tag = key.to_s[1..-1].to_sym
863
+ c = child(tag)
864
+ if c
865
+ c.contents = [value.to_s]
866
+ else
867
+ self << XML.new(tag, value.to_s)
868
+ end
869
+ else
870
+ @attrs[key] = value.to_s
871
+ end
872
+ end
873
+
874
+ # Add children.
875
+ # Possible uses:
876
+ # * Add single element
877
+ # self << xml(...)
878
+ # self << "foo"
879
+ # Add nothing:
880
+ # self << nil
881
+ # Add multiple elements (also works recursively):
882
+ # self << [a, b, c]
883
+ # self << [a, [b, c], d]
884
+ def <<(cnt)
885
+ if cnt.nil?
886
+ # skip
887
+ elsif cnt.is_a? Array
888
+ cnt.each{|elem| self << elem}
889
+ else
890
+ @contents << cnt
891
+ end
892
+ self
893
+ end
894
+
895
+ # Equality test, works as if XMLs were normalized, so:
896
+ # XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
897
+ def ==(x)
898
+ return false unless x.is_a? XML
899
+ return false unless name == x.name and attrs == x.attrs
900
+ # Now the hard part, strings can be split in different ways
901
+ # empty string children are possible etc.
902
+ self_i = 0
903
+ othr_i = 0
904
+ while self_i != contents.size or othr_i != x.contents.size
905
+ # Ignore ""s
906
+ if contents[self_i].is_a? String and contents[self_i] == ""
907
+ self_i += 1
908
+ next
909
+ end
910
+ if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
911
+ othr_i += 1
912
+ next
913
+ end
914
+
915
+ # If one is finished and the other contains non-empty elements,
916
+ # they are not equal
917
+ return false if self_i == contents.size or othr_i == x.contents.size
918
+
919
+ # Are they both Strings ?
920
+ # Strings can be divided in different ways, and calling normalize!
921
+ # here would be rather expensive, so let's use this complicated
922
+ # algorithm
923
+ if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
924
+ a = contents[self_i]
925
+ b = x.contents[othr_i]
926
+ self_i += 1
927
+ othr_i += 1
928
+ while a != "" or b != ""
929
+ if a == b
930
+ a = ""
931
+ b = ""
932
+ elsif a.size > b.size and a[0, b.size] == b
933
+ a = a[b.size..-1]
934
+ if x.contents[othr_i].is_a? String
935
+ b = x.contents[othr_i]
936
+ othr_i += 1
937
+ next
938
+ end
939
+ elsif b.size > a.size and b[0, a.size] == a
940
+ b = b[a.size..-1]
941
+ if contents[self_i].is_a? String
942
+ a = contents[self_i]
943
+ self_i += 1
944
+ next
945
+ end
946
+ else
947
+ return false
948
+ end
949
+ end
950
+ next
951
+ end
952
+
953
+ # OK, so at least one of them is not a String.
954
+ # Hopefully they're either both XMLs or one is an XML and the
955
+ # other is a String. It is also possible that contents contains
956
+ # something illegal, but we aren't catching that,
957
+ # so xml(:foo, Garbage.new) is going to at least equal itself.
958
+ # And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
959
+ # is going to return an honest false, and incoherent sanity
960
+ # check is worse than no sanity check.
961
+ #
962
+ # Oh yeah, they can be XML_PI or XML_Comment. In such case, this
963
+ # is ok.
964
+ return false unless contents[self_i] == x.contents[othr_i]
965
+ self_i += 1
966
+ othr_i += 1
967
+ end
968
+ return true
969
+ end
970
+
971
+ alias_method :real_method_missing, :method_missing
972
+ # Define all foo!-methods for monadic interface, so you can write:
973
+ #
974
+ def method_missing(meth, *args, &blk)
975
+ if meth.to_s =~ /^(.*)!$/
976
+ self << XML.new($1.to_sym, *args, &blk)
977
+ else
978
+ real_method_missing(meth, *args, &blk)
979
+ end
980
+ end
981
+
982
+ # Make monadic interface more "official"
983
+ # * node.exec! { foo!; bar! }
984
+ # is equivalent to
985
+ # * node << xml(:foo) << xml(:bar)
986
+ def exec!(&blk)
987
+ instance_eval(&blk)
988
+ end
989
+
990
+ # Select a subtree
991
+ # NOTE: Uses object_id of the start/end tags !
992
+ # They have to be the same, not just identical !
993
+ # <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
994
+ # returns
995
+ # <foo><b/><c/></foo>
996
+ # start and end and their descendants are not included in
997
+ # the result tree.
998
+ # Either start or end can be nil.
999
+ # * If both start and end are nil, return whole tree.
1000
+ # * If start is nil, return subtree up to range_end.
1001
+ # * If start is not inside the tree, return nil.
1002
+ # * If end is nil, return subtree from start
1003
+ # * If end is not inside the tree, return subtree from start.
1004
+ # * If end is before or below start, or they're the same node, the result is unspecified.
1005
+ # * if end comes directly after start, or as first node when start==nil, return path reaching there.
1006
+ def range(range_start, range_end, end_reached_cb=nil)
1007
+ if range_start == nil
1008
+ result = XML.new(name, attrs)
1009
+ else
1010
+ result = nil
1011
+ end
1012
+ @contents.each {|c|
1013
+ # end reached !
1014
+ if range_end and c.object_id == range_end.object_id
1015
+ end_reached_cb.call if end_reached_cb
1016
+ break
1017
+ end
1018
+ # start reached !
1019
+ if range_start and c.object_id == range_start.object_id
1020
+ result = XML.new(name, attrs)
1021
+ next
1022
+ end
1023
+ if result # We already started
1024
+ if c.is_a? XML
1025
+ break_me = false
1026
+ result.add! c.range(nil, range_end, lambda{ break_me = true })
1027
+ if break_me
1028
+ end_reached_cb.call if end_reached_cb
1029
+ break
1030
+ end
1031
+ else # String/XML_PI/XML_Comment
1032
+ result.add! c
1033
+ end
1034
+ else
1035
+ # Strings/XML_PI/XML_Comment obviously cannot start a range
1036
+ if c.is_a? XML
1037
+ break_me = false
1038
+ r = c.range(range_start, range_end, lambda{ break_me = true })
1039
+ if r
1040
+ # start reached !
1041
+ result = XML.new(name, attrs, r)
1042
+ end
1043
+ if break_me
1044
+ # end reached !
1045
+ end_reached_cb.call if end_reached_cb
1046
+ break
1047
+ end
1048
+ end
1049
+ end
1050
+ }
1051
+ return result
1052
+ end
1053
+
1054
+ # XML#subsequence is similar to XML#range, but instead of
1055
+ # trimmed subtree in returns a list of elements
1056
+ # The same elements are included in both cases, but here
1057
+ # we do not include any parents !
1058
+ #
1059
+ # <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
1060
+ # <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
1061
+ #
1062
+ # <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
1063
+ # <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
1064
+ #
1065
+ # <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
1066
+ # <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
1067
+ #
1068
+ # And we return [], not nil if nothing matches
1069
+ def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
1070
+ result = []
1071
+ start_seen = range_start.nil?
1072
+ @contents.each{|c|
1073
+ if range_end and range_end.object_id == c.object_id
1074
+ end_seen_cb.call if end_seen_cb
1075
+ break
1076
+ end
1077
+ if range_start and range_start.object_id == c.object_id
1078
+ start_seen = true
1079
+ start_seen_cb.call if start_seen_cb
1080
+ next
1081
+ end
1082
+ if start_seen
1083
+ if c.is_a? XML
1084
+ break_me = false
1085
+ result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
1086
+ break if break_me
1087
+ else # String/XML_PI/XML_Comment
1088
+ result << c
1089
+ end
1090
+ else
1091
+ # String/XML_PI/XML_Comment cannot start a subsequence
1092
+ if c.is_a? XML
1093
+ break_me = false
1094
+ result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
1095
+ break if break_me
1096
+ end
1097
+ end
1098
+ }
1099
+ # Include starting tag if it was right from the range_start
1100
+ # Otherwise, return just the raw sequence
1101
+ result = [XML.new(@name, @attrs, result)] if range_start == nil
1102
+ return result
1103
+ end
1104
+
1105
+ # =~ for a few reasonable patterns
1106
+ def =~(pattern)
1107
+ if pattern.is_a? Symbol
1108
+ @name == pattern
1109
+ elsif pattern.is_a? Regexp
1110
+ rv = text =~ pattern
1111
+ else # Hash, Pattern_any, Pattern_all
1112
+ pattern === self
1113
+ end
1114
+ end
1115
+
1116
+ # Get rid of pretty-printing whitespace. Also normalizes the XML.
1117
+ def remove_pretty_printing!(exceptions=nil)
1118
+ normalize!
1119
+ real_remove_pretty_printing!(exceptions)
1120
+ normalize!
1121
+ end
1122
+
1123
+ # normalize! is already recursive, so only one call at top level is needed.
1124
+ # This helper method lets us avoid extra calls to normalize!.
1125
+ def real_remove_pretty_printing!(exceptions=nil)
1126
+ return if exceptions and exceptions.include? @name
1127
+ each{|c|
1128
+ if c.is_a? String
1129
+ c.sub!(/^\s+/, "")
1130
+ c.sub!(/\s+$/, "")
1131
+ c.gsub!(/\s+/, " ")
1132
+ elsif c.is_a? XML_PI or c.is_a? XML_Comment
1133
+ else
1134
+ c.real_remove_pretty_printing!(exceptions)
1135
+ end
1136
+ }
1137
+ end
1138
+
1139
+ protected :real_remove_pretty_printing!
1140
+
1141
+ # Add pretty-printing whitespace. Also normalizes the XML.
1142
+ def add_pretty_printing!
1143
+ normalize!
1144
+ real_add_pretty_printing!
1145
+ normalize!
1146
+ end
1147
+
1148
+ def real_add_pretty_printing!(indent = "")
1149
+ return if @contents.empty?
1150
+ each{|c|
1151
+ if c.is_a? XML
1152
+ c.real_add_pretty_printing!(indent+" ")
1153
+ elsif c.is_a? String
1154
+ c.gsub!(/\n\s*/, "\n#{indent} ")
1155
+ end
1156
+ }
1157
+ @contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
1158
+ end
1159
+
1160
+ protected :real_add_pretty_printing!
1161
+
1162
+ alias_method :raw_dup, :dup
1163
+ # This is not a trivial method - first it does a *deep* copy,
1164
+ # second it takes a block which is instance_eval'ed,
1165
+ # so you can do things like:
1166
+ # * node.dup{ @name = :foo }
1167
+ # * node.dup{ self[:color] = "blue" }
1168
+ def dup(&blk)
1169
+ new_obj = self.raw_dup
1170
+ # Attr values stay shared - ugly
1171
+ new_obj.attrs = new_obj.attrs.dup
1172
+ new_obj.contents = new_obj.contents.map{|c| c.dup}
1173
+
1174
+ new_obj.instance_eval(&blk) if blk
1175
+ return new_obj
1176
+ end
1177
+
1178
+
1179
+ # Add some String children (all attributes get to_s'ed)
1180
+ def text!(*args)
1181
+ args.each{|s| self << s.to_s}
1182
+ end
1183
+ # Add XML child
1184
+ def xml!(*args, &blk)
1185
+ @contents << XML.new(*args, &blk)
1186
+ end
1187
+
1188
+ alias_method :add!, :<<
1189
+
1190
+ # Normalization means joining strings
1191
+ # and getting rid of ""s, recursively
1192
+ def normalize!
1193
+ new_contents = []
1194
+ @contents.each{|c|
1195
+ if c.is_a? String
1196
+ next if c == ""
1197
+ if new_contents[-1].is_a? String
1198
+ new_contents[-1] += c
1199
+ next
1200
+ end
1201
+ else
1202
+ c.normalize!
1203
+ end
1204
+ new_contents.push c
1205
+ }
1206
+ @contents = new_contents
1207
+ end
1208
+
1209
+ # Return text below the node, stripping all XML tags,
1210
+ # "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
1211
+ # returns "Hello, world!"
1212
+ def text
1213
+ res = ""
1214
+ @contents.each{|c|
1215
+ if c.is_a? XML
1216
+ res << c.text
1217
+ elsif c.is_a? String
1218
+ res << c
1219
+ end # Ignore XML_PI/XML_Comment
1220
+ }
1221
+ res
1222
+ end
1223
+
1224
+ # Equivalent to node.children(pat, *rest)[0]
1225
+ # Returns nil if there aren't any matching children
1226
+ def child(pat=nil, *rest)
1227
+ children(pat, *rest) {|c|
1228
+ return c
1229
+ }
1230
+ return nil
1231
+ end
1232
+
1233
+ # Equivalent to node.descendants(pat, *rest)[0]
1234
+ # Returns nil if there aren't any matching descendants
1235
+ def descendant(pat=nil, *rest)
1236
+ descendants(pat, *rest) {|c|
1237
+ return c
1238
+ }
1239
+ return nil
1240
+ end
1241
+
1242
+ # XML#children(pattern, more_patterns)
1243
+ # Return all children of a node with tags matching tag.
1244
+ # Also:
1245
+ # * children(:a, :b) == children(:a).children(:b)
1246
+ # * children(:a, :*, :c) == children(:a).descendants(:c)
1247
+ def children(pat=nil, *rest, &blk)
1248
+ return descendants(*rest, &blk) if pat == :*
1249
+ res = []
1250
+ @contents.each{|c|
1251
+ if pat.nil? or pat === c
1252
+ if rest == []
1253
+ res << c
1254
+ yield c if block_given?
1255
+ else
1256
+ res += c.children(*rest, &blk)
1257
+ end
1258
+ end
1259
+ }
1260
+ res
1261
+ end
1262
+
1263
+ # * XML#descendants
1264
+ # * XML#descendants(pattern)
1265
+ # * XML#descendants(pattern, more_patterns)
1266
+ #
1267
+ # Return all descendants of a node matching the pattern.
1268
+ # If pattern==nil, simply return all descendants.
1269
+ # Optionally run a block on each of them if a block was given.
1270
+ # If pattern==nil, also match Strings !
1271
+ def descendants(pat=nil, *rest, &blk)
1272
+ res = []
1273
+ @contents.each{|c|
1274
+ if pat.nil? or pat === c
1275
+ if rest == []
1276
+ res << c
1277
+ yield c if block_given?
1278
+ else
1279
+ res += c.children(*rest, &blk)
1280
+ end
1281
+ end
1282
+ if c.is_a? XML
1283
+ res += c.descendants(pat, *rest, &blk)
1284
+ end
1285
+ }
1286
+ res
1287
+ end
1288
+
1289
+ # Change elements based on pattern
1290
+ def deep_map(pat, &blk)
1291
+ if self =~ pat
1292
+ yield self
1293
+ else
1294
+ r = XML.new(self.name, self.attrs)
1295
+ each{|c|
1296
+ if c.is_a? XML
1297
+ r << c.deep_map(pat, &blk)
1298
+ else
1299
+ r << c
1300
+ end
1301
+ }
1302
+ r
1303
+ end
1304
+ end
1305
+
1306
+ # FIXME: do we want a shallow or a deep copy here ?
1307
+ # Map children, but leave the name/attributes
1308
+ def map(pat=nil)
1309
+ r = XML.new(self.name, self.attrs)
1310
+ each{|c|
1311
+ if !pat || c =~ pat
1312
+ r << yield(c)
1313
+ else
1314
+ r << c
1315
+ end
1316
+ }
1317
+ r
1318
+ end
1319
+ end
1320
+
1321
+ # FIXME: Is this even sane ?
1322
+ # * What about escaping and all that stuff ?
1323
+ # * Rest of the code assumes that everything is either XML or String
1324
+ class XML_PI
1325
+ def initialize(c, t)
1326
+ @c = c
1327
+ @t = t
1328
+ end
1329
+ def to_s
1330
+ "<?#{@c}#{@t}?>"
1331
+ end
1332
+ end
1333
+
1334
+ # FIXME: Is this even sane ?
1335
+ # * What about escaping and all that stuff ?
1336
+ # * Rest of the code assumes that everything is either XML or String
1337
+ # * There are some limitations on where one can put -s in the comment. Do not overdo.
1338
+ class XML_Comment
1339
+ def initialize(c)
1340
+ @c = c
1341
+ end
1342
+ def to_s
1343
+ "<!--#{@c}-->"
1344
+ end
1345
+ end
1346
+
1347
+ # Syntactic sugar for XML.new
1348
+ def xml(*args, &blk)
1349
+ XML.new(*args, &blk)
1350
+ end
1351
+
1352
+ # xml! in XML { ... } - context adds node to parent
1353
+ # xml! in main context prints the argument (and returns it anyway)
1354
+ def xml!(*args, &blk)
1355
+ node = xml(*args, &blk)
1356
+ print node
1357
+ node
1358
+ end
1359
+
1360
+ # Perl 6 is supposed to have native support for something like that.
1361
+ # Constructor takes multiple patterns. The object matches if they all match.
1362
+ #
1363
+ # Usage:
1364
+ # case foo
1365
+ # when all(:foo, {:color => 'blue'}, /Hello/)
1366
+ # print foo
1367
+ # end
1368
+ class Patterns_all
1369
+ def initialize(*patterns)
1370
+ @patterns = patterns
1371
+ end
1372
+ def ===(obj)
1373
+ @patterns.all?{|p| p === obj}
1374
+ end
1375
+ end
1376
+
1377
+ def all(*patterns)
1378
+ Patterns_all.new(*patterns)
1379
+ end
1380
+
1381
+ # Perl 6 is supposed to have native support for something like that.
1382
+ # Constructor takes multiple patterns. The object matches if they all match.
1383
+ #
1384
+ # Usage:
1385
+ # case foo
1386
+ # when all(:foo, any({:color => 'blue'}, {:color => 'red'}), /Hello/)
1387
+ # print foo
1388
+ # end
1389
+ class Patterns_any
1390
+ def initialize(*patterns)
1391
+ @patterns = patterns
1392
+ end
1393
+ def ===(obj)
1394
+ @patterns.any?{|p| p === obj}
1395
+ end
1396
+ end
1397
+
1398
+ def any(*patterns)
1399
+ Patterns_any.new(*patterns)
1400
+ end