magic_xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +22 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/doc/classes/Array.html +148 -0
- data/doc/classes/File.html +113 -0
- data/doc/classes/Hash.html +117 -0
- data/doc/classes/Patterns_all.html +145 -0
- data/doc/classes/Patterns_any.html +145 -0
- data/doc/classes/String.html +470 -0
- data/doc/classes/Symbol.html +145 -0
- data/doc/classes/XML.html +1881 -0
- data/doc/classes/XML_Comment.html +148 -0
- data/doc/classes/XML_PI.html +145 -0
- data/doc/classes/XML_Tests.html +1727 -0
- data/doc/files/magic_xml_rb.html +186 -0
- data/doc/files/simple_examples/xml_hello_f_rb.html +88 -0
- data/doc/files/simple_examples/xml_hello_m_rb.html +88 -0
- data/doc/files/simple_examples/xml_list_f_rb.html +88 -0
- data/doc/files/simple_examples/xml_list_m_rb.html +88 -0
- data/doc/files/tests_rb.html +94 -0
- data/doc/files/xquery_use_cases/parts/q1_rb.html +117 -0
- data/doc/files/xquery_use_cases/rdb/q10_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q11_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q12_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q13_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q14_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q15_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q16_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q17_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q18_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q1_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q2_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q3_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q4_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q5_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q6_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q7_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q8_rb.html +88 -0
- data/doc/files/xquery_use_cases/rdb/q9_rb.html +88 -0
- data/doc/files/xquery_use_cases/seq/q1_rb.html +88 -0
- data/doc/files/xquery_use_cases/seq/q2_rb.html +88 -0
- data/doc/files/xquery_use_cases/seq/q3_rb.html +88 -0
- data/doc/files/xquery_use_cases/seq/q4_rb.html +88 -0
- data/doc/files/xquery_use_cases/seq/q5_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q10_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q1_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q2_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q3_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q4_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q5_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q6_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q7_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q8a_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q8b_rb.html +88 -0
- data/doc/files/xquery_use_cases/sgml/q9_rb.html +88 -0
- data/doc/files/xquery_use_cases/solution_sizes_rb.html +88 -0
- data/doc/files/xquery_use_cases/string/q1_rb.html +88 -0
- data/doc/files/xquery_use_cases/string/q2_rb.html +93 -0
- data/doc/files/xquery_use_cases/string/q4_rb.html +88 -0
- data/doc/files/xquery_use_cases/string/q5_rb.html +88 -0
- data/doc/files/xquery_use_cases/test_driver_rb.html +92 -0
- data/doc/files/xquery_use_cases/tree/q1_rb.html +111 -0
- data/doc/files/xquery_use_cases/tree/q2_rb.html +88 -0
- data/doc/files/xquery_use_cases/tree/q3_rb.html +88 -0
- data/doc/files/xquery_use_cases/tree/q4_rb.html +88 -0
- data/doc/files/xquery_use_cases/tree/q5_rb.html +88 -0
- data/doc/files/xquery_use_cases/tree/q6_rb.html +113 -0
- data/doc/files/xquery_use_cases/xmp/q10_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q11_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q12_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q1_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q2_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q3_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q4_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q5_rb.html +92 -0
- data/doc/files/xquery_use_cases/xmp/q6_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q7_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q8_rb.html +88 -0
- data/doc/files/xquery_use_cases/xmp/q9_rb.html +88 -0
- data/doc/fr_class_index.html +56 -0
- data/doc/fr_file_index.html +110 -0
- data/doc/fr_method_index.html +159 -0
- data/doc/index.html +26 -0
- data/doc/rdoc-style.css +175 -0
- data/lib/magic_xml.rb +1400 -0
- data/simple_examples/README +14 -0
- data/simple_examples/xml_hello_f.rb +32 -0
- data/simple_examples/xml_hello_m.rb +32 -0
- data/simple_examples/xml_list_f.rb +36 -0
- data/simple_examples/xml_list_m.rb +36 -0
- data/test/helper.rb +9 -0
- data/test/test_magic_xml.rb +855 -0
- data/xquery_use_cases/README +17 -0
- data/xquery_use_cases/parts/README +12 -0
- data/xquery_use_cases/parts/partlist.xml +13 -0
- data/xquery_use_cases/parts/q1.out +16 -0
- data/xquery_use_cases/parts/q1.rb +38 -0
- data/xquery_use_cases/parts/q1.xquery +18 -0
- data/xquery_use_cases/rdb/README +50 -0
- data/xquery_use_cases/rdb/bids.xml +81 -0
- data/xquery_use_cases/rdb/items.xml +57 -0
- data/xquery_use_cases/rdb/q1.out +10 -0
- data/xquery_use_cases/rdb/q1.rb +31 -0
- data/xquery_use_cases/rdb/q1.xquery +14 -0
- data/xquery_use_cases/rdb/q10.out +27 -0
- data/xquery_use_cases/rdb/q10.rb +37 -0
- data/xquery_use_cases/rdb/q10.xquery +15 -0
- data/xquery_use_cases/rdb/q11.out +7 -0
- data/xquery_use_cases/rdb/q11.rb +38 -0
- data/xquery_use_cases/rdb/q11.xquery +15 -0
- data/xquery_use_cases/rdb/q12.out +12 -0
- data/xquery_use_cases/rdb/q12.rb +42 -0
- data/xquery_use_cases/rdb/q12.xquery +28 -0
- data/xquery_use_cases/rdb/q13.out +32 -0
- data/xquery_use_cases/rdb/q13.rb +45 -0
- data/xquery_use_cases/rdb/q13.xquery +15 -0
- data/xquery_use_cases/rdb/q14.out +14 -0
- data/xquery_use_cases/rdb/q14.rb +42 -0
- data/xquery_use_cases/rdb/q14.xquery +14 -0
- data/xquery_use_cases/rdb/q15.out +5 -0
- data/xquery_use_cases/rdb/q15.rb +31 -0
- data/xquery_use_cases/rdb/q15.xquery +9 -0
- data/xquery_use_cases/rdb/q16.out +35 -0
- data/xquery_use_cases/rdb/q16.rb +35 -0
- data/xquery_use_cases/rdb/q16.xquery +17 -0
- data/xquery_use_cases/rdb/q17.out +1 -0
- data/xquery_use_cases/rdb/q17.rb +35 -0
- data/xquery_use_cases/rdb/q17.xquery +11 -0
- data/xquery_use_cases/rdb/q18.out +32 -0
- data/xquery_use_cases/rdb/q18.rb +40 -0
- data/xquery_use_cases/rdb/q18.xquery +19 -0
- data/xquery_use_cases/rdb/q2.out +22 -0
- data/xquery_use_cases/rdb/q2.rb +36 -0
- data/xquery_use_cases/rdb/q2.xquery +14 -0
- data/xquery_use_cases/rdb/q3.out +8 -0
- data/xquery_use_cases/rdb/q3.rb +34 -0
- data/xquery_use_cases/rdb/q3.xquery +16 -0
- data/xquery_use_cases/rdb/q4.out +14 -0
- data/xquery_use_cases/rdb/q4.rb +31 -0
- data/xquery_use_cases/rdb/q4.xquery +11 -0
- data/xquery_use_cases/rdb/q5.out +12 -0
- data/xquery_use_cases/rdb/q5.rb +46 -0
- data/xquery_use_cases/rdb/q5.xquery +25 -0
- data/xquery_use_cases/rdb/q6.out +14 -0
- data/xquery_use_cases/rdb/q6.rb +38 -0
- data/xquery_use_cases/rdb/q6.xquery +15 -0
- data/xquery_use_cases/rdb/q7.out +1 -0
- data/xquery_use_cases/rdb/q7.rb +30 -0
- data/xquery_use_cases/rdb/q7.xquery +10 -0
- data/xquery_use_cases/rdb/q8.out +1 -0
- data/xquery_use_cases/rdb/q8.rb +23 -0
- data/xquery_use_cases/rdb/q8.xquery +8 -0
- data/xquery_use_cases/rdb/q9.out +22 -0
- data/xquery_use_cases/rdb/q9.rb +32 -0
- data/xquery_use_cases/rdb/q9.xquery +16 -0
- data/xquery_use_cases/rdb/users.xml +25 -0
- data/xquery_use_cases/seq/README +12 -0
- data/xquery_use_cases/seq/q1.out +1 -0
- data/xquery_use_cases/seq/q1.rb +25 -0
- data/xquery_use_cases/seq/q1.xquery +2 -0
- data/xquery_use_cases/seq/q2.out +2 -0
- data/xquery_use_cases/seq/q2.rb +25 -0
- data/xquery_use_cases/seq/q2.xquery +2 -0
- data/xquery_use_cases/seq/q3.out +2 -0
- data/xquery_use_cases/seq/q3.rb +26 -0
- data/xquery_use_cases/seq/q3.xquery +3 -0
- data/xquery_use_cases/seq/q4.out +0 -0
- data/xquery_use_cases/seq/q4.rb +27 -0
- data/xquery_use_cases/seq/q4.xquery +4 -0
- data/xquery_use_cases/seq/q5.out +5 -0
- data/xquery_use_cases/seq/q5.rb +29 -0
- data/xquery_use_cases/seq/q5.xquery +10 -0
- data/xquery_use_cases/seq/report1.xml +40 -0
- data/xquery_use_cases/sgml/README +53 -0
- data/xquery_use_cases/sgml/q1.out +44 -0
- data/xquery_use_cases/sgml/q1.rb +23 -0
- data/xquery_use_cases/sgml/q1.xquery +5 -0
- data/xquery_use_cases/sgml/q10.out +1 -0
- data/xquery_use_cases/sgml/q10.rb +28 -0
- data/xquery_use_cases/sgml/q10.xquery +7 -0
- data/xquery_use_cases/sgml/q2.out +26 -0
- data/xquery_use_cases/sgml/q2.rb +23 -0
- data/xquery_use_cases/sgml/q2.xquery +5 -0
- data/xquery_use_cases/sgml/q3.out +6 -0
- data/xquery_use_cases/sgml/q3.rb +28 -0
- data/xquery_use_cases/sgml/q3.xquery +7 -0
- data/xquery_use_cases/sgml/q4.out +4 -0
- data/xquery_use_cases/sgml/q4.rb +25 -0
- data/xquery_use_cases/sgml/q4.xquery +5 -0
- data/xquery_use_cases/sgml/q5.out +3 -0
- data/xquery_use_cases/sgml/q5.rb +23 -0
- data/xquery_use_cases/sgml/q5.xquery +5 -0
- data/xquery_use_cases/sgml/q6.out +1 -0
- data/xquery_use_cases/sgml/q6.rb +27 -0
- data/xquery_use_cases/sgml/q6.xquery +6 -0
- data/xquery_use_cases/sgml/q7.out +1 -0
- data/xquery_use_cases/sgml/q7.rb +27 -0
- data/xquery_use_cases/sgml/q7.xquery +7 -0
- data/xquery_use_cases/sgml/q8a.out +34 -0
- data/xquery_use_cases/sgml/q8a.rb +27 -0
- data/xquery_use_cases/sgml/q8a.xquery +5 -0
- data/xquery_use_cases/sgml/q8b.out +26 -0
- data/xquery_use_cases/sgml/q8b.rb +32 -0
- data/xquery_use_cases/sgml/q8b.xquery +5 -0
- data/xquery_use_cases/sgml/q9.out +9 -0
- data/xquery_use_cases/sgml/q9.rb +29 -0
- data/xquery_use_cases/sgml/q9.xquery +6 -0
- data/xquery_use_cases/sgml/sgml.xml +101 -0
- data/xquery_use_cases/solution_sizes.rb +48 -0
- data/xquery_use_cases/string/README +29 -0
- data/xquery_use_cases/string/company-data.xml +20 -0
- data/xquery_use_cases/string/q1.out +4 -0
- data/xquery_use_cases/string/q1.rb +25 -0
- data/xquery_use_cases/string/q1.xquery +1 -0
- data/xquery_use_cases/string/q2.out +13 -0
- data/xquery_use_cases/string/q2.rb +32 -0
- data/xquery_use_cases/string/q2.xquery +23 -0
- data/xquery_use_cases/string/q4.out +50 -0
- data/xquery_use_cases/string/q4.rb +34 -0
- data/xquery_use_cases/string/q4.xquery +14 -0
- data/xquery_use_cases/string/q5.out +12 -0
- data/xquery_use_cases/string/q5.rb +33 -0
- data/xquery_use_cases/string/q5.xquery +8 -0
- data/xquery_use_cases/string/string.xml +82 -0
- data/xquery_use_cases/test_driver.rb +60 -0
- data/xquery_use_cases/tree/README +23 -0
- data/xquery_use_cases/tree/book.xml +50 -0
- data/xquery_use_cases/tree/q1.out +23 -0
- data/xquery_use_cases/tree/q1.rb +31 -0
- data/xquery_use_cases/tree/q1.xquery +14 -0
- data/xquery_use_cases/tree/q2.out +11 -0
- data/xquery_use_cases/tree/q2.rb +27 -0
- data/xquery_use_cases/tree/q2.xquery +10 -0
- data/xquery_use_cases/tree/q3.out +2 -0
- data/xquery_use_cases/tree/q3.rb +26 -0
- data/xquery_use_cases/tree/q3.xquery +2 -0
- data/xquery_use_cases/tree/q4.out +1 -0
- data/xquery_use_cases/tree/q4.rb +23 -0
- data/xquery_use_cases/tree/q4.xquery +5 -0
- data/xquery_use_cases/tree/q5.out +9 -0
- data/xquery_use_cases/tree/q5.rb +30 -0
- data/xquery_use_cases/tree/q5.xquery +8 -0
- data/xquery_use_cases/tree/q6.out +30 -0
- data/xquery_use_cases/tree/q6.rb +35 -0
- data/xquery_use_cases/tree/q6.xquery +21 -0
- data/xquery_use_cases/xmp/README +41 -0
- data/xquery_use_cases/xmp/bib.xml +35 -0
- data/xquery_use_cases/xmp/books.xml +15 -0
- data/xquery_use_cases/xmp/prices.xml +32 -0
- data/xquery_use_cases/xmp/q1.out +8 -0
- data/xquery_use_cases/xmp/q1.rb +29 -0
- data/xquery_use_cases/xmp/q1.xquery +10 -0
- data/xquery_use_cases/xmp/q10.out +11 -0
- data/xquery_use_cases/xmp/q10.rb +36 -0
- data/xquery_use_cases/xmp/q10.xquery +11 -0
- data/xquery_use_cases/xmp/q11.out +35 -0
- data/xquery_use_cases/xmp/q11.rb +37 -0
- data/xquery_use_cases/xmp/q11.xquery +18 -0
- data/xquery_use_cases/xmp/q12.out +6 -0
- data/xquery_use_cases/xmp/q12.rb +35 -0
- data/xquery_use_cases/xmp/q12.xquery +20 -0
- data/xquery_use_cases/xmp/q2.out +37 -0
- data/xquery_use_cases/xmp/q2.rb +30 -0
- data/xquery_use_cases/xmp/q2.xquery +12 -0
- data/xquery_use_cases/xmp/q3.out +34 -0
- data/xquery_use_cases/xmp/q3.rb +27 -0
- data/xquery_use_cases/xmp/q3.xquery +10 -0
- data/xquery_use_cases/xmp/q4.out +31 -0
- data/xquery_use_cases/xmp/q4.rb +44 -0
- data/xquery_use_cases/xmp/q4.xquery +21 -0
- data/xquery_use_cases/xmp/q5.out +17 -0
- data/xquery_use_cases/xmp/q5.rb +38 -0
- data/xquery_use_cases/xmp/q5.xquery +13 -0
- data/xquery_use_cases/xmp/q6.out +28 -0
- data/xquery_use_cases/xmp/q6.rb +33 -0
- data/xquery_use_cases/xmp/q6.xquery +19 -0
- data/xquery_use_cases/xmp/q7.out +8 -0
- data/xquery_use_cases/xmp/q7.rb +30 -0
- data/xquery_use_cases/xmp/q7.xquery +12 -0
- data/xquery_use_cases/xmp/q8.out +7 -0
- data/xquery_use_cases/xmp/q8.rb +29 -0
- data/xquery_use_cases/xmp/q8.xquery +9 -0
- data/xquery_use_cases/xmp/q9.out +4 -0
- data/xquery_use_cases/xmp/q9.rb +29 -0
- data/xquery_use_cases/xmp/q9.xquery +7 -0
- data/xquery_use_cases/xmp/reviews.xml +24 -0
- metadata +342 -0
data/lib/magic_xml.rb
ADDED
|
@@ -0,0 +1,1400 @@
|
|
|
1
|
+
#Copyright (c) 2006-2007 Tomasz Wegrzanowski <Tomasz.Wegrzanowski@gmail.com>
|
|
2
|
+
#
|
|
3
|
+
#Permission is hereby granted, free of charge, to any person obtaining a
|
|
4
|
+
#copy of this software and associated documentation files (the "Software"),
|
|
5
|
+
#to deal in the Software without restriction, including without limitation
|
|
6
|
+
#the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
7
|
+
#and/or sell copies of the Software, and to permit persons to whom the
|
|
8
|
+
#Software is furnished to do so, subject to the following conditions:
|
|
9
|
+
#
|
|
10
|
+
#The above copyright notice and this permission notice shall be included in
|
|
11
|
+
#all copies or substantial portions of the Software.
|
|
12
|
+
#
|
|
13
|
+
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
16
|
+
#THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
17
|
+
#OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
18
|
+
#ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
19
|
+
#DEALINGS IN THE SOFTWARE.
|
|
20
|
+
|
|
21
|
+
# Needed for parsing
|
|
22
|
+
|
|
23
|
+
require 'rexml/parsers/baseparser'
|
|
24
|
+
# Needed for fetching XMLs from the Internet
|
|
25
|
+
require 'uri'
|
|
26
|
+
require 'net/http'
|
|
27
|
+
|
|
28
|
+
# FIXME: Make comment formatting RDoc-friendly. It's not always so now.
|
|
29
|
+
|
|
30
|
+
# In Ruby 2 Symbol will be a subclass of String, and
|
|
31
|
+
# this won't be needed any more. Before then...
|
|
32
|
+
class Symbol
|
|
33
|
+
include Comparable
|
|
34
|
+
def <=>(other)
|
|
35
|
+
raise ArgumentError.new("comparison of #{self.class} with #{other.class} failed") unless other.is_a? Symbol
|
|
36
|
+
to_s <=> other.to_s
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
alias_method :eqeqeq_before_magic_xml, :===
|
|
40
|
+
def ===(*args, &blk)
|
|
41
|
+
if args.size >= 1 and args[0].is_a? XML
|
|
42
|
+
self == args[0].name
|
|
43
|
+
else
|
|
44
|
+
eqeqeq_before_magic_xml(*args, &blk)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class Hash
|
|
50
|
+
alias_method :eqeqeq_before_magic_xml, :===
|
|
51
|
+
def ===(*args, &blk)
|
|
52
|
+
if args.size >= 1 and args[0].is_a? XML
|
|
53
|
+
all?{|k,v| v === args[0][k]}
|
|
54
|
+
else
|
|
55
|
+
eqeqeq_before_magic_xml(*args, &blk)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
class String
|
|
61
|
+
# Escape string for output as XML text (< > &)
|
|
62
|
+
def xml_escape
|
|
63
|
+
replacements = {"<" => "<", ">" => ">", "&" => "&" }
|
|
64
|
+
gsub(/([<>&])/) { replacements[$1] }
|
|
65
|
+
end
|
|
66
|
+
# Escape characters for output as XML attribute values (< > & ' ")
|
|
67
|
+
def xml_attr_escape
|
|
68
|
+
replacements = {"<" => "<", ">" => ">", "&" => "&", "\"" => """, "'" => "'"}
|
|
69
|
+
gsub(/([<>&\'\"])/) { replacements[$1] }
|
|
70
|
+
end
|
|
71
|
+
# Unescape entities
|
|
72
|
+
# Supports:
|
|
73
|
+
# * Full set of HTML-compatible named entities
|
|
74
|
+
# * Decimal entities Ӓ
|
|
75
|
+
# * Hex entities ꂱ
|
|
76
|
+
def xml_unescape(extra_entities=nil)
|
|
77
|
+
@@xhtml_entity_replacements ||= {
|
|
78
|
+
'nbsp' => 160,
|
|
79
|
+
'iexcl' => 161,
|
|
80
|
+
'cent' => 162,
|
|
81
|
+
'pound' => 163,
|
|
82
|
+
'curren' => 164,
|
|
83
|
+
'yen' => 165,
|
|
84
|
+
'brvbar' => 166,
|
|
85
|
+
'sect' => 167,
|
|
86
|
+
'uml' => 168,
|
|
87
|
+
'copy' => 169,
|
|
88
|
+
'ordf' => 170,
|
|
89
|
+
'laquo' => 171,
|
|
90
|
+
'not' => 172,
|
|
91
|
+
'shy' => 173,
|
|
92
|
+
'reg' => 174,
|
|
93
|
+
'macr' => 175,
|
|
94
|
+
'deg' => 176,
|
|
95
|
+
'plusmn' => 177,
|
|
96
|
+
'sup2' => 178,
|
|
97
|
+
'sup3' => 179,
|
|
98
|
+
'acute' => 180,
|
|
99
|
+
'micro' => 181,
|
|
100
|
+
'para' => 182,
|
|
101
|
+
'middot' => 183,
|
|
102
|
+
'cedil' => 184,
|
|
103
|
+
'sup1' => 185,
|
|
104
|
+
'ordm' => 186,
|
|
105
|
+
'raquo' => 187,
|
|
106
|
+
'frac14' => 188,
|
|
107
|
+
'frac12' => 189,
|
|
108
|
+
'frac34' => 190,
|
|
109
|
+
'iquest' => 191,
|
|
110
|
+
'Agrave' => 192,
|
|
111
|
+
'Aacute' => 193,
|
|
112
|
+
'Acirc' => 194,
|
|
113
|
+
'Atilde' => 195,
|
|
114
|
+
'Auml' => 196,
|
|
115
|
+
'Aring' => 197,
|
|
116
|
+
'AElig' => 198,
|
|
117
|
+
'Ccedil' => 199,
|
|
118
|
+
'Egrave' => 200,
|
|
119
|
+
'Eacute' => 201,
|
|
120
|
+
'Ecirc' => 202,
|
|
121
|
+
'Euml' => 203,
|
|
122
|
+
'Igrave' => 204,
|
|
123
|
+
'Iacute' => 205,
|
|
124
|
+
'Icirc' => 206,
|
|
125
|
+
'Iuml' => 207,
|
|
126
|
+
'ETH' => 208,
|
|
127
|
+
'Ntilde' => 209,
|
|
128
|
+
'Ograve' => 210,
|
|
129
|
+
'Oacute' => 211,
|
|
130
|
+
'Ocirc' => 212,
|
|
131
|
+
'Otilde' => 213,
|
|
132
|
+
'Ouml' => 214,
|
|
133
|
+
'times' => 215,
|
|
134
|
+
'Oslash' => 216,
|
|
135
|
+
'Ugrave' => 217,
|
|
136
|
+
'Uacute' => 218,
|
|
137
|
+
'Ucirc' => 219,
|
|
138
|
+
'Uuml' => 220,
|
|
139
|
+
'Yacute' => 221,
|
|
140
|
+
'THORN' => 222,
|
|
141
|
+
'szlig' => 223,
|
|
142
|
+
'agrave' => 224,
|
|
143
|
+
'aacute' => 225,
|
|
144
|
+
'acirc' => 226,
|
|
145
|
+
'atilde' => 227,
|
|
146
|
+
'auml' => 228,
|
|
147
|
+
'aring' => 229,
|
|
148
|
+
'aelig' => 230,
|
|
149
|
+
'ccedil' => 231,
|
|
150
|
+
'egrave' => 232,
|
|
151
|
+
'eacute' => 233,
|
|
152
|
+
'ecirc' => 234,
|
|
153
|
+
'euml' => 235,
|
|
154
|
+
'igrave' => 236,
|
|
155
|
+
'iacute' => 237,
|
|
156
|
+
'icirc' => 238,
|
|
157
|
+
'iuml' => 239,
|
|
158
|
+
'eth' => 240,
|
|
159
|
+
'ntilde' => 241,
|
|
160
|
+
'ograve' => 242,
|
|
161
|
+
'oacute' => 243,
|
|
162
|
+
'ocirc' => 244,
|
|
163
|
+
'otilde' => 245,
|
|
164
|
+
'ouml' => 246,
|
|
165
|
+
'divide' => 247,
|
|
166
|
+
'oslash' => 248,
|
|
167
|
+
'ugrave' => 249,
|
|
168
|
+
'uacute' => 250,
|
|
169
|
+
'ucirc' => 251,
|
|
170
|
+
'uuml' => 252,
|
|
171
|
+
'yacute' => 253,
|
|
172
|
+
'thorn' => 254,
|
|
173
|
+
'yuml' => 255,
|
|
174
|
+
'quot' => 34,
|
|
175
|
+
'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
|
|
176
|
+
'amp' => 38,
|
|
177
|
+
'lt' => 60,
|
|
178
|
+
'gt' => 62,
|
|
179
|
+
'OElig' => 338,
|
|
180
|
+
'oelig' => 339,
|
|
181
|
+
'Scaron' => 352,
|
|
182
|
+
'scaron' => 353,
|
|
183
|
+
'Yuml' => 376,
|
|
184
|
+
'circ' => 710,
|
|
185
|
+
'tilde' => 732,
|
|
186
|
+
'ensp' => 8194,
|
|
187
|
+
'emsp' => 8195,
|
|
188
|
+
'thinsp' => 8201,
|
|
189
|
+
'zwnj' => 8204,
|
|
190
|
+
'zwj' => 8205,
|
|
191
|
+
'lrm' => 8206,
|
|
192
|
+
'rlm' => 8207,
|
|
193
|
+
'ndash' => 8211,
|
|
194
|
+
'mdash' => 8212,
|
|
195
|
+
'lsquo' => 8216,
|
|
196
|
+
'rsquo' => 8217,
|
|
197
|
+
'sbquo' => 8218,
|
|
198
|
+
'ldquo' => 8220,
|
|
199
|
+
'rdquo' => 8221,
|
|
200
|
+
'bdquo' => 8222,
|
|
201
|
+
'dagger' => 8224,
|
|
202
|
+
'Dagger' => 8225,
|
|
203
|
+
'permil' => 8240,
|
|
204
|
+
'lsaquo' => 8249,
|
|
205
|
+
'rsaquo' => 8250,
|
|
206
|
+
'euro' => 8364,
|
|
207
|
+
'fnof' => 402,
|
|
208
|
+
'Alpha' => 913,
|
|
209
|
+
'Beta' => 914,
|
|
210
|
+
'Gamma' => 915,
|
|
211
|
+
'Delta' => 916,
|
|
212
|
+
'Epsilon' => 917,
|
|
213
|
+
'Zeta' => 918,
|
|
214
|
+
'Eta' => 919,
|
|
215
|
+
'Theta' => 920,
|
|
216
|
+
'Iota' => 921,
|
|
217
|
+
'Kappa' => 922,
|
|
218
|
+
'Lambda' => 923,
|
|
219
|
+
'Mu' => 924,
|
|
220
|
+
'Nu' => 925,
|
|
221
|
+
'Xi' => 926,
|
|
222
|
+
'Omicron' => 927,
|
|
223
|
+
'Pi' => 928,
|
|
224
|
+
'Rho' => 929,
|
|
225
|
+
'Sigma' => 931,
|
|
226
|
+
'Tau' => 932,
|
|
227
|
+
'Upsilon' => 933,
|
|
228
|
+
'Phi' => 934,
|
|
229
|
+
'Chi' => 935,
|
|
230
|
+
'Psi' => 936,
|
|
231
|
+
'Omega' => 937,
|
|
232
|
+
'alpha' => 945,
|
|
233
|
+
'beta' => 946,
|
|
234
|
+
'gamma' => 947,
|
|
235
|
+
'delta' => 948,
|
|
236
|
+
'epsilon' => 949,
|
|
237
|
+
'zeta' => 950,
|
|
238
|
+
'eta' => 951,
|
|
239
|
+
'theta' => 952,
|
|
240
|
+
'iota' => 953,
|
|
241
|
+
'kappa' => 954,
|
|
242
|
+
'lambda' => 955,
|
|
243
|
+
'mu' => 956,
|
|
244
|
+
'nu' => 957,
|
|
245
|
+
'xi' => 958,
|
|
246
|
+
'omicron' => 959,
|
|
247
|
+
'pi' => 960,
|
|
248
|
+
'rho' => 961,
|
|
249
|
+
'sigmaf' => 962,
|
|
250
|
+
'sigma' => 963,
|
|
251
|
+
'tau' => 964,
|
|
252
|
+
'upsilon' => 965,
|
|
253
|
+
'phi' => 966,
|
|
254
|
+
'chi' => 967,
|
|
255
|
+
'psi' => 968,
|
|
256
|
+
'omega' => 969,
|
|
257
|
+
'thetasym' => 977,
|
|
258
|
+
'upsih' => 978,
|
|
259
|
+
'piv' => 982,
|
|
260
|
+
'bull' => 8226,
|
|
261
|
+
'hellip' => 8230,
|
|
262
|
+
'prime' => 8242,
|
|
263
|
+
'Prime' => 8243,
|
|
264
|
+
'oline' => 8254,
|
|
265
|
+
'frasl' => 8260,
|
|
266
|
+
'weierp' => 8472,
|
|
267
|
+
'image' => 8465,
|
|
268
|
+
'real' => 8476,
|
|
269
|
+
'trade' => 8482,
|
|
270
|
+
'alefsym' => 8501,
|
|
271
|
+
'larr' => 8592,
|
|
272
|
+
'uarr' => 8593,
|
|
273
|
+
'rarr' => 8594,
|
|
274
|
+
'darr' => 8595,
|
|
275
|
+
'harr' => 8596,
|
|
276
|
+
'crarr' => 8629,
|
|
277
|
+
'lArr' => 8656,
|
|
278
|
+
'uArr' => 8657,
|
|
279
|
+
'rArr' => 8658,
|
|
280
|
+
'dArr' => 8659,
|
|
281
|
+
'hArr' => 8660,
|
|
282
|
+
'forall' => 8704,
|
|
283
|
+
'part' => 8706,
|
|
284
|
+
'exist' => 8707,
|
|
285
|
+
'empty' => 8709,
|
|
286
|
+
'nabla' => 8711,
|
|
287
|
+
'isin' => 8712,
|
|
288
|
+
'notin' => 8713,
|
|
289
|
+
'ni' => 8715,
|
|
290
|
+
'prod' => 8719,
|
|
291
|
+
'sum' => 8721,
|
|
292
|
+
'minus' => 8722,
|
|
293
|
+
'lowast' => 8727,
|
|
294
|
+
'radic' => 8730,
|
|
295
|
+
'prop' => 8733,
|
|
296
|
+
'infin' => 8734,
|
|
297
|
+
'ang' => 8736,
|
|
298
|
+
'and' => 8743,
|
|
299
|
+
'or' => 8744,
|
|
300
|
+
'cap' => 8745,
|
|
301
|
+
'cup' => 8746,
|
|
302
|
+
'int' => 8747,
|
|
303
|
+
'there4' => 8756,
|
|
304
|
+
'sim' => 8764,
|
|
305
|
+
'cong' => 8773,
|
|
306
|
+
'asymp' => 8776,
|
|
307
|
+
'ne' => 8800,
|
|
308
|
+
'equiv' => 8801,
|
|
309
|
+
'le' => 8804,
|
|
310
|
+
'ge' => 8805,
|
|
311
|
+
'sub' => 8834,
|
|
312
|
+
'sup' => 8835,
|
|
313
|
+
'nsub' => 8836,
|
|
314
|
+
'sube' => 8838,
|
|
315
|
+
'supe' => 8839,
|
|
316
|
+
'oplus' => 8853,
|
|
317
|
+
'otimes' => 8855,
|
|
318
|
+
'perp' => 8869,
|
|
319
|
+
'sdot' => 8901,
|
|
320
|
+
'lceil' => 8968,
|
|
321
|
+
'rceil' => 8969,
|
|
322
|
+
'lfloor' => 8970,
|
|
323
|
+
'rfloor' => 8971,
|
|
324
|
+
'lang' => 9001,
|
|
325
|
+
'rang' => 9002,
|
|
326
|
+
'loz' => 9674,
|
|
327
|
+
'spades' => 9824,
|
|
328
|
+
'clubs' => 9827,
|
|
329
|
+
'hearts' => 9829,
|
|
330
|
+
'diams' => 9830,
|
|
331
|
+
}
|
|
332
|
+
gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
|
|
333
|
+
if $1 then
|
|
334
|
+
v = @@xhtml_entity_replacements[$1]
|
|
335
|
+
# Nonstandard entity
|
|
336
|
+
unless v
|
|
337
|
+
if extra_entities.is_a? Proc
|
|
338
|
+
v = extra_entities.call($1)
|
|
339
|
+
# Well, we expect a Hash here, but any container will do.
|
|
340
|
+
# As long as it's not a nil.
|
|
341
|
+
elsif extra_entities
|
|
342
|
+
v = extra_entities[$1]
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
raise "Unknown escape #{$1}" unless v
|
|
346
|
+
elsif $2
|
|
347
|
+
v = $2.to_i
|
|
348
|
+
else
|
|
349
|
+
v = $3.hex
|
|
350
|
+
end
|
|
351
|
+
# v can be a String or an Integer
|
|
352
|
+
if v.is_a? String then v else [v].pack('U') end
|
|
353
|
+
}
|
|
354
|
+
end
|
|
355
|
+
def xml_parse
|
|
356
|
+
XML.parse(self)
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
class File
|
|
361
|
+
def xml_parse
|
|
362
|
+
XML.parse(self)
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
class Array
|
|
367
|
+
# children of any element
|
|
368
|
+
def children(*args, &blk)
|
|
369
|
+
res = []
|
|
370
|
+
each{|c|
|
|
371
|
+
res += c.children(*args, &blk) if c.is_a? XML
|
|
372
|
+
}
|
|
373
|
+
res
|
|
374
|
+
end
|
|
375
|
+
# descendants of any element
|
|
376
|
+
def descendants(*args, &blk)
|
|
377
|
+
res = []
|
|
378
|
+
each{|c|
|
|
379
|
+
res += c.descendants(*args, &blk) if c.is_a? XML
|
|
380
|
+
}
|
|
381
|
+
res
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
# Methods of Enumerable.
|
|
386
|
+
# It is not easy to design good methods, because XML
|
|
387
|
+
# is not really "a container", it just acts as one sometimes.
|
|
388
|
+
# Generally:
|
|
389
|
+
# * Methods that return nil should work
|
|
390
|
+
# * Methods that return an element should work
|
|
391
|
+
# * Methods that return a container should return XML container, not Array
|
|
392
|
+
# * Conversion methods should convert
|
|
393
|
+
#
|
|
394
|
+
# FIXME: Many methods use .dup, but do we want a shallow or a deep copy ?
|
|
395
|
+
class XML
|
|
396
|
+
include Enumerable
|
|
397
|
+
# Default any? is ok
|
|
398
|
+
# Default all? is ok
|
|
399
|
+
|
|
400
|
+
# Iterate over children, possibly with a selector
|
|
401
|
+
def each(*selector, &blk)
|
|
402
|
+
children(*selector, &blk)
|
|
403
|
+
self
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# Sort XML children of XML element.
|
|
407
|
+
def sort_by(*args, &blk)
|
|
408
|
+
self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# Sort children of XML element.
|
|
412
|
+
def children_sort_by(*args, &blk)
|
|
413
|
+
self.dup{ @contents = @contents.sort_by(*args, &blk) }
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
# Sort children of XML element.
|
|
417
|
+
#
|
|
418
|
+
# Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
|
|
419
|
+
# Use sort_by instead.
|
|
420
|
+
#
|
|
421
|
+
# Unless you define your own XML#<=> operator, or do something equally weird.
|
|
422
|
+
def sort(*args, &blk)
|
|
423
|
+
self.dup{ @contents = @contents.sort(*args, &blk) }
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
#collect/map
|
|
427
|
+
#detect/find
|
|
428
|
+
#each_cons
|
|
429
|
+
#each_slice
|
|
430
|
+
#each_with_index
|
|
431
|
+
#to_a
|
|
432
|
+
#entries
|
|
433
|
+
#enum_cons
|
|
434
|
+
#enum_slice
|
|
435
|
+
#enum
|
|
436
|
+
# grep
|
|
437
|
+
# include?/member?
|
|
438
|
+
# inject
|
|
439
|
+
# max/min
|
|
440
|
+
# max_by/min_by - Ruby 1.9
|
|
441
|
+
# partition
|
|
442
|
+
# reject
|
|
443
|
+
# sort
|
|
444
|
+
# sort_by
|
|
445
|
+
# to_set
|
|
446
|
+
# zip
|
|
447
|
+
# And Enumerable::Enumerator-generating methods
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# Class methods
|
|
451
|
+
class XML
|
|
452
|
+
# XML.foo! == xml!(:foo)
|
|
453
|
+
# XML.foo == xml(:foo)
|
|
454
|
+
def self.method_missing(meth, *args, &blk)
|
|
455
|
+
if meth.to_s =~ /^(.*)!$/
|
|
456
|
+
xml!($1.to_sym, *args, &blk)
|
|
457
|
+
else
|
|
458
|
+
XML.new(meth, *args, &blk)
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
# Read file and parse
|
|
463
|
+
def self.from_file(file)
|
|
464
|
+
file = File.open(file) if file.is_a? String
|
|
465
|
+
parse(file)
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
# Fetch URL and parse
|
|
469
|
+
# Supported:
|
|
470
|
+
# http://.../
|
|
471
|
+
# https://.../
|
|
472
|
+
# file:foo.xml
|
|
473
|
+
# string:<foo/>
|
|
474
|
+
def self.from_url(url)
|
|
475
|
+
if url =~ /^string:(.*)$/m
|
|
476
|
+
parse($1)
|
|
477
|
+
elsif url =~ /^file:(.*)$/m
|
|
478
|
+
from_file($1)
|
|
479
|
+
elsif url =~ /^http(s?):/
|
|
480
|
+
ssl = ($1 == "s")
|
|
481
|
+
# No, seriously - Ruby needs something better than net/http
|
|
482
|
+
# Something that groks basic auth and queries and redirects automatically:
|
|
483
|
+
# HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
|
|
484
|
+
# URI parsing must go inside the library, client programs
|
|
485
|
+
# should have nothing to do with it
|
|
486
|
+
|
|
487
|
+
# net/http is really inconvenient to use here
|
|
488
|
+
u = URI.parse(url)
|
|
489
|
+
# You're not seeing this:
|
|
490
|
+
if u.query then
|
|
491
|
+
path = u.path + "?" + u.query
|
|
492
|
+
else
|
|
493
|
+
path = u.path
|
|
494
|
+
end
|
|
495
|
+
req = Net::HTTP::Get.new(path)
|
|
496
|
+
if u.userinfo
|
|
497
|
+
username, passwd = u.userinfo.split(/:/,2)
|
|
498
|
+
req.basic_auth username, passwd
|
|
499
|
+
end
|
|
500
|
+
if ssl
|
|
501
|
+
# NOTE: You need libopenssl-ruby installed
|
|
502
|
+
# if you want to use HTTPS. Ubuntu is broken
|
|
503
|
+
# as it doesn't provide it in the default packages.
|
|
504
|
+
require 'net/https'
|
|
505
|
+
http = Net::HTTP.new(u.host, u.port)
|
|
506
|
+
http.use_ssl = true
|
|
507
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
508
|
+
else
|
|
509
|
+
http = Net::HTTP.new(u.host, u.port)
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
res = http.start {|http| http.request(req) }
|
|
513
|
+
# TODO: Throw a more meaningful exception
|
|
514
|
+
parse(res.body)
|
|
515
|
+
else
|
|
516
|
+
raise "URL protocol #{url} not supported (http, https, file, string are supported)"
|
|
517
|
+
end
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
# Like CDuce load_xml
|
|
521
|
+
# The path can be:
|
|
522
|
+
# * file handler
|
|
523
|
+
# * URL (a string with :)
|
|
524
|
+
# * file name (a string without :)
|
|
525
|
+
def self.load(obj)
|
|
526
|
+
if obj.is_a? String
|
|
527
|
+
if obj.include? ":"
|
|
528
|
+
from_url(obj)
|
|
529
|
+
else
|
|
530
|
+
from_file(obj)
|
|
531
|
+
end
|
|
532
|
+
else
|
|
533
|
+
parse(obj)
|
|
534
|
+
end
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
# Parse XML in mixed stream/tree mode
|
|
538
|
+
# Basically the idea is that every time we get start element,
|
|
539
|
+
# we ask the block what to do about it.
|
|
540
|
+
# If it wants a tree below it, it should call e.tree
|
|
541
|
+
# If a tree was requested, elements below the current one
|
|
542
|
+
# are *not* processed. If it wasn't, they are.
|
|
543
|
+
#
|
|
544
|
+
# For example:
|
|
545
|
+
# <foo><bar/></foo><foo2/>
|
|
546
|
+
# yield <foo> ... </foo>
|
|
547
|
+
# .complete! called
|
|
548
|
+
# process <foo2> next
|
|
549
|
+
#
|
|
550
|
+
# But:
|
|
551
|
+
# <foo><bar/></foo><foo2/>
|
|
552
|
+
# yield <foo> ... </foo>
|
|
553
|
+
# .complete! not called
|
|
554
|
+
# process <bar> next
|
|
555
|
+
#
|
|
556
|
+
# FIXME: yielded values are not reusable for now
|
|
557
|
+
# FIXME: make more object-oriented
|
|
558
|
+
def self.parse_as_twigs(stream)
|
|
559
|
+
parser = REXML::Parsers::BaseParser.new stream
|
|
560
|
+
# We don't really need to keep the stack ;-)
|
|
561
|
+
stack = []
|
|
562
|
+
while true
|
|
563
|
+
event = parser.pull
|
|
564
|
+
case event[0]
|
|
565
|
+
when :start_element
|
|
566
|
+
# Now the evil part evil
|
|
567
|
+
attrs = {}
|
|
568
|
+
event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
|
|
569
|
+
node = XML.new(event[1].to_sym, attrs, *event[3..-1])
|
|
570
|
+
|
|
571
|
+
# I can't say it's superelegant
|
|
572
|
+
class <<node
|
|
573
|
+
attr_accessor :do_complete
|
|
574
|
+
def complete!
|
|
575
|
+
if @do_complete
|
|
576
|
+
@do_complete.call
|
|
577
|
+
@do_complete = nil
|
|
578
|
+
end
|
|
579
|
+
end
|
|
580
|
+
end
|
|
581
|
+
node.do_complete = proc{
|
|
582
|
+
parse_subtree(node, parser)
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
yield(node)
|
|
586
|
+
if node.do_complete
|
|
587
|
+
stack.push node
|
|
588
|
+
node.do_complete = nil # It's too late, complete! shouldn't do anything now
|
|
589
|
+
end
|
|
590
|
+
when :end_element
|
|
591
|
+
stack.pop
|
|
592
|
+
when :end_document
|
|
593
|
+
return
|
|
594
|
+
else
|
|
595
|
+
# FIXME: Do the right thing.
|
|
596
|
+
# For now, ignore *everything* else
|
|
597
|
+
# This is totally incorrect, user might want to
|
|
598
|
+
# see text, comments and stuff like that anyway
|
|
599
|
+
end
|
|
600
|
+
end
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
# Basically it's a copy of self.parse, ugly ...
|
|
604
|
+
def self.parse_subtree(start_node, parser)
|
|
605
|
+
stack = [start_node]
|
|
606
|
+
res = nil
|
|
607
|
+
while true
|
|
608
|
+
event = parser.pull
|
|
609
|
+
case event[0]
|
|
610
|
+
when :start_element
|
|
611
|
+
attrs = {}
|
|
612
|
+
event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
|
|
613
|
+
stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
|
|
614
|
+
if stack.size == 1
|
|
615
|
+
res = stack[0]
|
|
616
|
+
else
|
|
617
|
+
stack[-2] << stack[-1]
|
|
618
|
+
end
|
|
619
|
+
when :end_element
|
|
620
|
+
stack.pop
|
|
621
|
+
return if stack == []
|
|
622
|
+
# Needs unescaping
|
|
623
|
+
when :text
|
|
624
|
+
# Ignore whitespace
|
|
625
|
+
if stack.size == 0
|
|
626
|
+
next if event[1] !~ /\S/
|
|
627
|
+
raise "Non-whitespace text out of document root"
|
|
628
|
+
end
|
|
629
|
+
stack[-1] << event[1].xml_unescape
|
|
630
|
+
# CDATA is already unescaped
|
|
631
|
+
when :cdata
|
|
632
|
+
if stack.size == 0
|
|
633
|
+
raise "CDATA out of the document root"
|
|
634
|
+
end
|
|
635
|
+
stack[-1] << event[1]
|
|
636
|
+
when :end_document
|
|
637
|
+
raise "Parse error: end_document inside a subtree, tags are not balanced"
|
|
638
|
+
when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
|
|
639
|
+
# Positivery ignore
|
|
640
|
+
when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
|
|
641
|
+
# Ignore ???
|
|
642
|
+
#print "Ignored XML event #{event[0]} when parsing\n"
|
|
643
|
+
else
|
|
644
|
+
# Huh ? What's that ?
|
|
645
|
+
#print "Unknown XML event #{event[0]} when parsing\n"
|
|
646
|
+
end
|
|
647
|
+
end
|
|
648
|
+
res
|
|
649
|
+
|
|
650
|
+
end
|
|
651
|
+
|
|
652
|
+
# Parse XML using REXML. Available options:
|
|
653
|
+
# * :extra_entities => Proc or Hash (default = nil)
|
|
654
|
+
# * :remove_pretty_printing => true/false (default = false)
|
|
655
|
+
# * :comments => true/false (default = false)
|
|
656
|
+
# * :pi => true/false (default = false)
|
|
657
|
+
# * :normalize => true/false (default = false) - normalize
|
|
658
|
+
# * :multiple_roots => true/false (default=false) - document
|
|
659
|
+
# can have any number of roots (instread of one).
|
|
660
|
+
# Return all in an array instead of root/nil.
|
|
661
|
+
# Also include non-elements (String/PI/Comment) in the return set !!!
|
|
662
|
+
#
|
|
663
|
+
# FIXME: :comments/:pi will break everything
|
|
664
|
+
# if there are comments/PIs outside document root.
|
|
665
|
+
# Now PIs are outside the document root more often than not,
|
|
666
|
+
# so we're pretty much screwed here.
|
|
667
|
+
#
|
|
668
|
+
# FIXME: Integrate all kinds of parse, and make them support extra options
|
|
669
|
+
#
|
|
670
|
+
# FIXME: Benchmark normalize!
|
|
671
|
+
#
|
|
672
|
+
# FIXME: Benchmark dup-based Enumerable methods
|
|
673
|
+
#
|
|
674
|
+
# FIXME: Make it possible to include bogus XML_Document superparent,
|
|
675
|
+
# and to make it support out-of-root PIs/Comments
|
|
676
|
+
def self.parse(stream, options={})
|
|
677
|
+
extra_entities = options[:extra_entities]
|
|
678
|
+
|
|
679
|
+
parser = REXML::Parsers::BaseParser.new stream
|
|
680
|
+
stack = [[]]
|
|
681
|
+
|
|
682
|
+
while true
|
|
683
|
+
event = parser.pull
|
|
684
|
+
case event[0]
|
|
685
|
+
when :start_element
|
|
686
|
+
attrs = {}
|
|
687
|
+
event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
|
|
688
|
+
stack << XML.new(event[1].to_sym, attrs, event[3..-1])
|
|
689
|
+
stack[-2] << stack[-1]
|
|
690
|
+
when :end_element
|
|
691
|
+
stack.pop
|
|
692
|
+
# Needs unescaping
|
|
693
|
+
when :text
|
|
694
|
+
e = event[1].xml_unescape(extra_entities)
|
|
695
|
+
# Either inside root or in multi-root mode
|
|
696
|
+
if stack.size > 1 or options[:multiple_roots]
|
|
697
|
+
stack[-1] << e
|
|
698
|
+
elsif event[1] !~ /\S/
|
|
699
|
+
# Ignore out-of-root whitespace in single-root mode
|
|
700
|
+
else
|
|
701
|
+
raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
|
|
702
|
+
end
|
|
703
|
+
# CDATA is already unescaped
|
|
704
|
+
when :cdata
|
|
705
|
+
e = event[1]
|
|
706
|
+
if stack.size > 1 or options[:multiple_roots]
|
|
707
|
+
stack[-1] << e
|
|
708
|
+
else
|
|
709
|
+
raise "CDATA out of the document root"
|
|
710
|
+
end
|
|
711
|
+
when :comment
|
|
712
|
+
next unless options[:comments]
|
|
713
|
+
e = XML_Comment.new(event[1])
|
|
714
|
+
if stack.size > 1 or options[:multiple_roots]
|
|
715
|
+
stack[-1] << e
|
|
716
|
+
else
|
|
717
|
+
# FIXME: Ugly !
|
|
718
|
+
raise "Comments out of the document root"
|
|
719
|
+
end
|
|
720
|
+
when :processing_instruction
|
|
721
|
+
# FIXME: Real PI node
|
|
722
|
+
next unless options[:pi]
|
|
723
|
+
e = XML_PI.new(event[1], event[2])
|
|
724
|
+
if stack.size > 1 or options[:multiple_roots]
|
|
725
|
+
stack[-1] << e
|
|
726
|
+
else
|
|
727
|
+
# FIXME: Ugly !
|
|
728
|
+
raise "Processing instruction out of the document root"
|
|
729
|
+
end
|
|
730
|
+
when :end_document
|
|
731
|
+
break
|
|
732
|
+
when :xmldecl,:start_doctype,:end_doctype,:elementdecl
|
|
733
|
+
# Positivery ignore
|
|
734
|
+
when :externalentity,:entity,:attlistdecl,:notationdecl
|
|
735
|
+
# Ignore ???
|
|
736
|
+
#print "Ignored XML event #{event[0]} when parsing\n"
|
|
737
|
+
else
|
|
738
|
+
# Huh ? What's that ?
|
|
739
|
+
#print "Unknown XML event #{event[0]} when parsing\n"
|
|
740
|
+
end
|
|
741
|
+
end
|
|
742
|
+
roots = stack[0]
|
|
743
|
+
|
|
744
|
+
roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
|
|
745
|
+
# :remove_pretty_printing does :normalize anyway
|
|
746
|
+
roots.each{|root| root.normalize!} if options[:normalize]
|
|
747
|
+
if options[:multiple_roots]
|
|
748
|
+
roots
|
|
749
|
+
else
|
|
750
|
+
roots[0]
|
|
751
|
+
end
|
|
752
|
+
end
|
|
753
|
+
|
|
754
|
+
# Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
|
|
755
|
+
def self.parse_sequence(stream, options={})
|
|
756
|
+
o = options.dup
|
|
757
|
+
o[:multiple_roots] = true
|
|
758
|
+
parse(stream, o)
|
|
759
|
+
end
|
|
760
|
+
|
|
761
|
+
# Renormalize a string containing XML document
|
|
762
|
+
def self.renormalize(stream)
|
|
763
|
+
parse(stream).to_s
|
|
764
|
+
end
|
|
765
|
+
|
|
766
|
+
# Renormalize a string containing a sequence of XML documents
|
|
767
|
+
# and strings
|
|
768
|
+
# XMLrenormalize_sequence("<hello />, <world></world>!") =>
|
|
769
|
+
# "<hello/>, <world/>!"
|
|
770
|
+
def self.renormalize_sequence(stream)
|
|
771
|
+
parse_sequence(stream).to_s
|
|
772
|
+
end
|
|
773
|
+
end
|
|
774
|
+
|
|
775
|
+
# Instance methods (other than those of Enumerable)
|
|
776
|
+
class XML
|
|
777
|
+
attr_accessor :name, :attrs, :contents
|
|
778
|
+
|
|
779
|
+
# initialize can be run in many ways
|
|
780
|
+
# * XML.new
|
|
781
|
+
# * XML.new(:tag_symbol)
|
|
782
|
+
# * XML.new(:tag_symbol, {attributes})
|
|
783
|
+
# * XML.new(:tag_symbol, "children", "more", XML.new(...))
|
|
784
|
+
# * XML.new(:tag_symbol, {attributes}, "and", "children")
|
|
785
|
+
# * XML.new(:tag_symbol) { monadic code }
|
|
786
|
+
# * XML.new(:tag_symbol, {attributes}) { monadic code }
|
|
787
|
+
#
|
|
788
|
+
# Or even:
|
|
789
|
+
# * XML.new(:tag_symbol, "children") { and some monadic code }
|
|
790
|
+
# * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
|
|
791
|
+
# But typically you won't be mixing these two style
|
|
792
|
+
#
|
|
793
|
+
# Attribute values can will be converted to strings
|
|
794
|
+
def initialize(*args, &blk)
|
|
795
|
+
@name = nil
|
|
796
|
+
@attrs = {}
|
|
797
|
+
@contents = []
|
|
798
|
+
@name = args.shift if args.size != 0
|
|
799
|
+
if args.size != 0 and args[0].is_a? Hash
|
|
800
|
+
args.shift.each{|k,v|
|
|
801
|
+
# Do automatic conversion here
|
|
802
|
+
# This also assures that the hashes are *not* shared
|
|
803
|
+
self[k] = v
|
|
804
|
+
}
|
|
805
|
+
end
|
|
806
|
+
# Expand Arrays passed as arguments
|
|
807
|
+
self << args
|
|
808
|
+
# FIXME: We'd rather not have people say @name = :foo there :-)
|
|
809
|
+
if blk
|
|
810
|
+
instance_eval(&blk)
|
|
811
|
+
end
|
|
812
|
+
end
|
|
813
|
+
|
|
814
|
+
# Convert to a well-formatted XML
|
|
815
|
+
def to_s
|
|
816
|
+
"<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
|
|
817
|
+
if @contents.size == 0
|
|
818
|
+
"/>"
|
|
819
|
+
else
|
|
820
|
+
">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
|
|
821
|
+
end
|
|
822
|
+
end
|
|
823
|
+
|
|
824
|
+
# Convert to a well-formatted XML, but without children information.
|
|
825
|
+
# This is a reasonable format for irb and debugging.
|
|
826
|
+
# If you want to see a few levels of children, call inspect(2) and so on
|
|
827
|
+
def inspect(include_children=0)
|
|
828
|
+
"<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
|
|
829
|
+
if @contents.size == 0
|
|
830
|
+
"/>"
|
|
831
|
+
elsif include_children == 0
|
|
832
|
+
">...</#{name}>"
|
|
833
|
+
else
|
|
834
|
+
">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
|
|
835
|
+
end
|
|
836
|
+
end
|
|
837
|
+
|
|
838
|
+
# Read attributes.
|
|
839
|
+
# Also works with pseudoattributes:
|
|
840
|
+
# img[:@x] == img.child(:x).text # or nil if there isn't any.
|
|
841
|
+
def [](key)
|
|
842
|
+
if key.to_s[0] == ?@
|
|
843
|
+
tag = key.to_s[1..-1].to_sym
|
|
844
|
+
c = child(tag)
|
|
845
|
+
if c
|
|
846
|
+
c.text
|
|
847
|
+
else
|
|
848
|
+
nil
|
|
849
|
+
end
|
|
850
|
+
else
|
|
851
|
+
@attrs[key]
|
|
852
|
+
end
|
|
853
|
+
end
|
|
854
|
+
|
|
855
|
+
# Set attributes.
|
|
856
|
+
# Value is automatically converted to String, so you can say:
|
|
857
|
+
# img[:x] = 200
|
|
858
|
+
# Also works with pseudoattributes:
|
|
859
|
+
# foo[:@bar] = "x"
|
|
860
|
+
def []=(key, value)
|
|
861
|
+
if key.to_s[0] == ?@
|
|
862
|
+
tag = key.to_s[1..-1].to_sym
|
|
863
|
+
c = child(tag)
|
|
864
|
+
if c
|
|
865
|
+
c.contents = [value.to_s]
|
|
866
|
+
else
|
|
867
|
+
self << XML.new(tag, value.to_s)
|
|
868
|
+
end
|
|
869
|
+
else
|
|
870
|
+
@attrs[key] = value.to_s
|
|
871
|
+
end
|
|
872
|
+
end
|
|
873
|
+
|
|
874
|
+
# Add children.
|
|
875
|
+
# Possible uses:
|
|
876
|
+
# * Add single element
|
|
877
|
+
# self << xml(...)
|
|
878
|
+
# self << "foo"
|
|
879
|
+
# Add nothing:
|
|
880
|
+
# self << nil
|
|
881
|
+
# Add multiple elements (also works recursively):
|
|
882
|
+
# self << [a, b, c]
|
|
883
|
+
# self << [a, [b, c], d]
|
|
884
|
+
def <<(cnt)
|
|
885
|
+
if cnt.nil?
|
|
886
|
+
# skip
|
|
887
|
+
elsif cnt.is_a? Array
|
|
888
|
+
cnt.each{|elem| self << elem}
|
|
889
|
+
else
|
|
890
|
+
@contents << cnt
|
|
891
|
+
end
|
|
892
|
+
self
|
|
893
|
+
end
|
|
894
|
+
|
|
895
|
+
# Equality test, works as if XMLs were normalized, so:
|
|
896
|
+
# XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
|
|
897
|
+
def ==(x)
|
|
898
|
+
return false unless x.is_a? XML
|
|
899
|
+
return false unless name == x.name and attrs == x.attrs
|
|
900
|
+
# Now the hard part, strings can be split in different ways
|
|
901
|
+
# empty string children are possible etc.
|
|
902
|
+
self_i = 0
|
|
903
|
+
othr_i = 0
|
|
904
|
+
while self_i != contents.size or othr_i != x.contents.size
|
|
905
|
+
# Ignore ""s
|
|
906
|
+
if contents[self_i].is_a? String and contents[self_i] == ""
|
|
907
|
+
self_i += 1
|
|
908
|
+
next
|
|
909
|
+
end
|
|
910
|
+
if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
|
|
911
|
+
othr_i += 1
|
|
912
|
+
next
|
|
913
|
+
end
|
|
914
|
+
|
|
915
|
+
# If one is finished and the other contains non-empty elements,
|
|
916
|
+
# they are not equal
|
|
917
|
+
return false if self_i == contents.size or othr_i == x.contents.size
|
|
918
|
+
|
|
919
|
+
# Are they both Strings ?
|
|
920
|
+
# Strings can be divided in different ways, and calling normalize!
|
|
921
|
+
# here would be rather expensive, so let's use this complicated
|
|
922
|
+
# algorithm
|
|
923
|
+
if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
|
|
924
|
+
a = contents[self_i]
|
|
925
|
+
b = x.contents[othr_i]
|
|
926
|
+
self_i += 1
|
|
927
|
+
othr_i += 1
|
|
928
|
+
while a != "" or b != ""
|
|
929
|
+
if a == b
|
|
930
|
+
a = ""
|
|
931
|
+
b = ""
|
|
932
|
+
elsif a.size > b.size and a[0, b.size] == b
|
|
933
|
+
a = a[b.size..-1]
|
|
934
|
+
if x.contents[othr_i].is_a? String
|
|
935
|
+
b = x.contents[othr_i]
|
|
936
|
+
othr_i += 1
|
|
937
|
+
next
|
|
938
|
+
end
|
|
939
|
+
elsif b.size > a.size and b[0, a.size] == a
|
|
940
|
+
b = b[a.size..-1]
|
|
941
|
+
if contents[self_i].is_a? String
|
|
942
|
+
a = contents[self_i]
|
|
943
|
+
self_i += 1
|
|
944
|
+
next
|
|
945
|
+
end
|
|
946
|
+
else
|
|
947
|
+
return false
|
|
948
|
+
end
|
|
949
|
+
end
|
|
950
|
+
next
|
|
951
|
+
end
|
|
952
|
+
|
|
953
|
+
# OK, so at least one of them is not a String.
|
|
954
|
+
# Hopefully they're either both XMLs or one is an XML and the
|
|
955
|
+
# other is a String. It is also possible that contents contains
|
|
956
|
+
# something illegal, but we aren't catching that,
|
|
957
|
+
# so xml(:foo, Garbage.new) is going to at least equal itself.
|
|
958
|
+
# And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
|
|
959
|
+
# is going to return an honest false, and incoherent sanity
|
|
960
|
+
# check is worse than no sanity check.
|
|
961
|
+
#
|
|
962
|
+
# Oh yeah, they can be XML_PI or XML_Comment. In such case, this
|
|
963
|
+
# is ok.
|
|
964
|
+
return false unless contents[self_i] == x.contents[othr_i]
|
|
965
|
+
self_i += 1
|
|
966
|
+
othr_i += 1
|
|
967
|
+
end
|
|
968
|
+
return true
|
|
969
|
+
end
|
|
970
|
+
|
|
971
|
+
alias_method :real_method_missing, :method_missing
|
|
972
|
+
# Define all foo!-methods for monadic interface, so you can write:
|
|
973
|
+
#
|
|
974
|
+
def method_missing(meth, *args, &blk)
|
|
975
|
+
if meth.to_s =~ /^(.*)!$/
|
|
976
|
+
self << XML.new($1.to_sym, *args, &blk)
|
|
977
|
+
else
|
|
978
|
+
real_method_missing(meth, *args, &blk)
|
|
979
|
+
end
|
|
980
|
+
end
|
|
981
|
+
|
|
982
|
+
# Make monadic interface more "official"
|
|
983
|
+
# * node.exec! { foo!; bar! }
|
|
984
|
+
# is equivalent to
|
|
985
|
+
# * node << xml(:foo) << xml(:bar)
|
|
986
|
+
def exec!(&blk)
|
|
987
|
+
instance_eval(&blk)
|
|
988
|
+
end
|
|
989
|
+
|
|
990
|
+
# Select a subtree
|
|
991
|
+
# NOTE: Uses object_id of the start/end tags !
|
|
992
|
+
# They have to be the same, not just identical !
|
|
993
|
+
# <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
|
|
994
|
+
# returns
|
|
995
|
+
# <foo><b/><c/></foo>
|
|
996
|
+
# start and end and their descendants are not included in
|
|
997
|
+
# the result tree.
|
|
998
|
+
# Either start or end can be nil.
|
|
999
|
+
# * If both start and end are nil, return whole tree.
|
|
1000
|
+
# * If start is nil, return subtree up to range_end.
|
|
1001
|
+
# * If start is not inside the tree, return nil.
|
|
1002
|
+
# * If end is nil, return subtree from start
|
|
1003
|
+
# * If end is not inside the tree, return subtree from start.
|
|
1004
|
+
# * If end is before or below start, or they're the same node, the result is unspecified.
|
|
1005
|
+
# * if end comes directly after start, or as first node when start==nil, return path reaching there.
|
|
1006
|
+
def range(range_start, range_end, end_reached_cb=nil)
|
|
1007
|
+
if range_start == nil
|
|
1008
|
+
result = XML.new(name, attrs)
|
|
1009
|
+
else
|
|
1010
|
+
result = nil
|
|
1011
|
+
end
|
|
1012
|
+
@contents.each {|c|
|
|
1013
|
+
# end reached !
|
|
1014
|
+
if range_end and c.object_id == range_end.object_id
|
|
1015
|
+
end_reached_cb.call if end_reached_cb
|
|
1016
|
+
break
|
|
1017
|
+
end
|
|
1018
|
+
# start reached !
|
|
1019
|
+
if range_start and c.object_id == range_start.object_id
|
|
1020
|
+
result = XML.new(name, attrs)
|
|
1021
|
+
next
|
|
1022
|
+
end
|
|
1023
|
+
if result # We already started
|
|
1024
|
+
if c.is_a? XML
|
|
1025
|
+
break_me = false
|
|
1026
|
+
result.add! c.range(nil, range_end, lambda{ break_me = true })
|
|
1027
|
+
if break_me
|
|
1028
|
+
end_reached_cb.call if end_reached_cb
|
|
1029
|
+
break
|
|
1030
|
+
end
|
|
1031
|
+
else # String/XML_PI/XML_Comment
|
|
1032
|
+
result.add! c
|
|
1033
|
+
end
|
|
1034
|
+
else
|
|
1035
|
+
# Strings/XML_PI/XML_Comment obviously cannot start a range
|
|
1036
|
+
if c.is_a? XML
|
|
1037
|
+
break_me = false
|
|
1038
|
+
r = c.range(range_start, range_end, lambda{ break_me = true })
|
|
1039
|
+
if r
|
|
1040
|
+
# start reached !
|
|
1041
|
+
result = XML.new(name, attrs, r)
|
|
1042
|
+
end
|
|
1043
|
+
if break_me
|
|
1044
|
+
# end reached !
|
|
1045
|
+
end_reached_cb.call if end_reached_cb
|
|
1046
|
+
break
|
|
1047
|
+
end
|
|
1048
|
+
end
|
|
1049
|
+
end
|
|
1050
|
+
}
|
|
1051
|
+
return result
|
|
1052
|
+
end
|
|
1053
|
+
|
|
1054
|
+
# XML#subsequence is similar to XML#range, but instead of
|
|
1055
|
+
# trimmed subtree in returns a list of elements
|
|
1056
|
+
# The same elements are included in both cases, but here
|
|
1057
|
+
# we do not include any parents !
|
|
1058
|
+
#
|
|
1059
|
+
# <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
|
|
1060
|
+
# <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
|
|
1061
|
+
#
|
|
1062
|
+
# <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
|
|
1063
|
+
# <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
|
|
1064
|
+
#
|
|
1065
|
+
# <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
|
|
1066
|
+
# <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
|
|
1067
|
+
#
|
|
1068
|
+
# And we return [], not nil if nothing matches
|
|
1069
|
+
def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
|
|
1070
|
+
result = []
|
|
1071
|
+
start_seen = range_start.nil?
|
|
1072
|
+
@contents.each{|c|
|
|
1073
|
+
if range_end and range_end.object_id == c.object_id
|
|
1074
|
+
end_seen_cb.call if end_seen_cb
|
|
1075
|
+
break
|
|
1076
|
+
end
|
|
1077
|
+
if range_start and range_start.object_id == c.object_id
|
|
1078
|
+
start_seen = true
|
|
1079
|
+
start_seen_cb.call if start_seen_cb
|
|
1080
|
+
next
|
|
1081
|
+
end
|
|
1082
|
+
if start_seen
|
|
1083
|
+
if c.is_a? XML
|
|
1084
|
+
break_me = false
|
|
1085
|
+
result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
|
|
1086
|
+
break if break_me
|
|
1087
|
+
else # String/XML_PI/XML_Comment
|
|
1088
|
+
result << c
|
|
1089
|
+
end
|
|
1090
|
+
else
|
|
1091
|
+
# String/XML_PI/XML_Comment cannot start a subsequence
|
|
1092
|
+
if c.is_a? XML
|
|
1093
|
+
break_me = false
|
|
1094
|
+
result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
|
|
1095
|
+
break if break_me
|
|
1096
|
+
end
|
|
1097
|
+
end
|
|
1098
|
+
}
|
|
1099
|
+
# Include starting tag if it was right from the range_start
|
|
1100
|
+
# Otherwise, return just the raw sequence
|
|
1101
|
+
result = [XML.new(@name, @attrs, result)] if range_start == nil
|
|
1102
|
+
return result
|
|
1103
|
+
end
|
|
1104
|
+
|
|
1105
|
+
# =~ for a few reasonable patterns
|
|
1106
|
+
def =~(pattern)
|
|
1107
|
+
if pattern.is_a? Symbol
|
|
1108
|
+
@name == pattern
|
|
1109
|
+
elsif pattern.is_a? Regexp
|
|
1110
|
+
rv = text =~ pattern
|
|
1111
|
+
else # Hash, Pattern_any, Pattern_all
|
|
1112
|
+
pattern === self
|
|
1113
|
+
end
|
|
1114
|
+
end
|
|
1115
|
+
|
|
1116
|
+
# Get rid of pretty-printing whitespace. Also normalizes the XML.
|
|
1117
|
+
def remove_pretty_printing!(exceptions=nil)
|
|
1118
|
+
normalize!
|
|
1119
|
+
real_remove_pretty_printing!(exceptions)
|
|
1120
|
+
normalize!
|
|
1121
|
+
end
|
|
1122
|
+
|
|
1123
|
+
# normalize! is already recursive, so only one call at top level is needed.
|
|
1124
|
+
# This helper method lets us avoid extra calls to normalize!.
|
|
1125
|
+
def real_remove_pretty_printing!(exceptions=nil)
|
|
1126
|
+
return if exceptions and exceptions.include? @name
|
|
1127
|
+
each{|c|
|
|
1128
|
+
if c.is_a? String
|
|
1129
|
+
c.sub!(/^\s+/, "")
|
|
1130
|
+
c.sub!(/\s+$/, "")
|
|
1131
|
+
c.gsub!(/\s+/, " ")
|
|
1132
|
+
elsif c.is_a? XML_PI or c.is_a? XML_Comment
|
|
1133
|
+
else
|
|
1134
|
+
c.real_remove_pretty_printing!(exceptions)
|
|
1135
|
+
end
|
|
1136
|
+
}
|
|
1137
|
+
end
|
|
1138
|
+
|
|
1139
|
+
protected :real_remove_pretty_printing!
|
|
1140
|
+
|
|
1141
|
+
# Add pretty-printing whitespace. Also normalizes the XML.
|
|
1142
|
+
def add_pretty_printing!
|
|
1143
|
+
normalize!
|
|
1144
|
+
real_add_pretty_printing!
|
|
1145
|
+
normalize!
|
|
1146
|
+
end
|
|
1147
|
+
|
|
1148
|
+
def real_add_pretty_printing!(indent = "")
|
|
1149
|
+
return if @contents.empty?
|
|
1150
|
+
each{|c|
|
|
1151
|
+
if c.is_a? XML
|
|
1152
|
+
c.real_add_pretty_printing!(indent+" ")
|
|
1153
|
+
elsif c.is_a? String
|
|
1154
|
+
c.gsub!(/\n\s*/, "\n#{indent} ")
|
|
1155
|
+
end
|
|
1156
|
+
}
|
|
1157
|
+
@contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
|
|
1158
|
+
end
|
|
1159
|
+
|
|
1160
|
+
protected :real_add_pretty_printing!
|
|
1161
|
+
|
|
1162
|
+
alias_method :raw_dup, :dup
|
|
1163
|
+
# This is not a trivial method - first it does a *deep* copy,
|
|
1164
|
+
# second it takes a block which is instance_eval'ed,
|
|
1165
|
+
# so you can do things like:
|
|
1166
|
+
# * node.dup{ @name = :foo }
|
|
1167
|
+
# * node.dup{ self[:color] = "blue" }
|
|
1168
|
+
def dup(&blk)
|
|
1169
|
+
new_obj = self.raw_dup
|
|
1170
|
+
# Attr values stay shared - ugly
|
|
1171
|
+
new_obj.attrs = new_obj.attrs.dup
|
|
1172
|
+
new_obj.contents = new_obj.contents.map{|c| c.dup}
|
|
1173
|
+
|
|
1174
|
+
new_obj.instance_eval(&blk) if blk
|
|
1175
|
+
return new_obj
|
|
1176
|
+
end
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
# Add some String children (all attributes get to_s'ed)
|
|
1180
|
+
def text!(*args)
|
|
1181
|
+
args.each{|s| self << s.to_s}
|
|
1182
|
+
end
|
|
1183
|
+
# Add XML child
|
|
1184
|
+
def xml!(*args, &blk)
|
|
1185
|
+
@contents << XML.new(*args, &blk)
|
|
1186
|
+
end
|
|
1187
|
+
|
|
1188
|
+
alias_method :add!, :<<
|
|
1189
|
+
|
|
1190
|
+
# Normalization means joining strings
|
|
1191
|
+
# and getting rid of ""s, recursively
|
|
1192
|
+
def normalize!
|
|
1193
|
+
new_contents = []
|
|
1194
|
+
@contents.each{|c|
|
|
1195
|
+
if c.is_a? String
|
|
1196
|
+
next if c == ""
|
|
1197
|
+
if new_contents[-1].is_a? String
|
|
1198
|
+
new_contents[-1] += c
|
|
1199
|
+
next
|
|
1200
|
+
end
|
|
1201
|
+
else
|
|
1202
|
+
c.normalize!
|
|
1203
|
+
end
|
|
1204
|
+
new_contents.push c
|
|
1205
|
+
}
|
|
1206
|
+
@contents = new_contents
|
|
1207
|
+
end
|
|
1208
|
+
|
|
1209
|
+
# Return text below the node, stripping all XML tags,
|
|
1210
|
+
# "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
|
|
1211
|
+
# returns "Hello, world!"
|
|
1212
|
+
def text
|
|
1213
|
+
res = ""
|
|
1214
|
+
@contents.each{|c|
|
|
1215
|
+
if c.is_a? XML
|
|
1216
|
+
res << c.text
|
|
1217
|
+
elsif c.is_a? String
|
|
1218
|
+
res << c
|
|
1219
|
+
end # Ignore XML_PI/XML_Comment
|
|
1220
|
+
}
|
|
1221
|
+
res
|
|
1222
|
+
end
|
|
1223
|
+
|
|
1224
|
+
# Equivalent to node.children(pat, *rest)[0]
|
|
1225
|
+
# Returns nil if there aren't any matching children
|
|
1226
|
+
def child(pat=nil, *rest)
|
|
1227
|
+
children(pat, *rest) {|c|
|
|
1228
|
+
return c
|
|
1229
|
+
}
|
|
1230
|
+
return nil
|
|
1231
|
+
end
|
|
1232
|
+
|
|
1233
|
+
# Equivalent to node.descendants(pat, *rest)[0]
|
|
1234
|
+
# Returns nil if there aren't any matching descendants
|
|
1235
|
+
def descendant(pat=nil, *rest)
|
|
1236
|
+
descendants(pat, *rest) {|c|
|
|
1237
|
+
return c
|
|
1238
|
+
}
|
|
1239
|
+
return nil
|
|
1240
|
+
end
|
|
1241
|
+
|
|
1242
|
+
# XML#children(pattern, more_patterns)
|
|
1243
|
+
# Return all children of a node with tags matching tag.
|
|
1244
|
+
# Also:
|
|
1245
|
+
# * children(:a, :b) == children(:a).children(:b)
|
|
1246
|
+
# * children(:a, :*, :c) == children(:a).descendants(:c)
|
|
1247
|
+
def children(pat=nil, *rest, &blk)
|
|
1248
|
+
return descendants(*rest, &blk) if pat == :*
|
|
1249
|
+
res = []
|
|
1250
|
+
@contents.each{|c|
|
|
1251
|
+
if pat.nil? or pat === c
|
|
1252
|
+
if rest == []
|
|
1253
|
+
res << c
|
|
1254
|
+
yield c if block_given?
|
|
1255
|
+
else
|
|
1256
|
+
res += c.children(*rest, &blk)
|
|
1257
|
+
end
|
|
1258
|
+
end
|
|
1259
|
+
}
|
|
1260
|
+
res
|
|
1261
|
+
end
|
|
1262
|
+
|
|
1263
|
+
# * XML#descendants
|
|
1264
|
+
# * XML#descendants(pattern)
|
|
1265
|
+
# * XML#descendants(pattern, more_patterns)
|
|
1266
|
+
#
|
|
1267
|
+
# Return all descendants of a node matching the pattern.
|
|
1268
|
+
# If pattern==nil, simply return all descendants.
|
|
1269
|
+
# Optionally run a block on each of them if a block was given.
|
|
1270
|
+
# If pattern==nil, also match Strings !
|
|
1271
|
+
def descendants(pat=nil, *rest, &blk)
|
|
1272
|
+
res = []
|
|
1273
|
+
@contents.each{|c|
|
|
1274
|
+
if pat.nil? or pat === c
|
|
1275
|
+
if rest == []
|
|
1276
|
+
res << c
|
|
1277
|
+
yield c if block_given?
|
|
1278
|
+
else
|
|
1279
|
+
res += c.children(*rest, &blk)
|
|
1280
|
+
end
|
|
1281
|
+
end
|
|
1282
|
+
if c.is_a? XML
|
|
1283
|
+
res += c.descendants(pat, *rest, &blk)
|
|
1284
|
+
end
|
|
1285
|
+
}
|
|
1286
|
+
res
|
|
1287
|
+
end
|
|
1288
|
+
|
|
1289
|
+
# Change elements based on pattern
|
|
1290
|
+
def deep_map(pat, &blk)
|
|
1291
|
+
if self =~ pat
|
|
1292
|
+
yield self
|
|
1293
|
+
else
|
|
1294
|
+
r = XML.new(self.name, self.attrs)
|
|
1295
|
+
each{|c|
|
|
1296
|
+
if c.is_a? XML
|
|
1297
|
+
r << c.deep_map(pat, &blk)
|
|
1298
|
+
else
|
|
1299
|
+
r << c
|
|
1300
|
+
end
|
|
1301
|
+
}
|
|
1302
|
+
r
|
|
1303
|
+
end
|
|
1304
|
+
end
|
|
1305
|
+
|
|
1306
|
+
# FIXME: do we want a shallow or a deep copy here ?
|
|
1307
|
+
# Map children, but leave the name/attributes
|
|
1308
|
+
def map(pat=nil)
|
|
1309
|
+
r = XML.new(self.name, self.attrs)
|
|
1310
|
+
each{|c|
|
|
1311
|
+
if !pat || c =~ pat
|
|
1312
|
+
r << yield(c)
|
|
1313
|
+
else
|
|
1314
|
+
r << c
|
|
1315
|
+
end
|
|
1316
|
+
}
|
|
1317
|
+
r
|
|
1318
|
+
end
|
|
1319
|
+
end
|
|
1320
|
+
|
|
1321
|
+
# FIXME: Is this even sane ?
|
|
1322
|
+
# * What about escaping and all that stuff ?
|
|
1323
|
+
# * Rest of the code assumes that everything is either XML or String
|
|
1324
|
+
class XML_PI
|
|
1325
|
+
def initialize(c, t)
|
|
1326
|
+
@c = c
|
|
1327
|
+
@t = t
|
|
1328
|
+
end
|
|
1329
|
+
def to_s
|
|
1330
|
+
"<?#{@c}#{@t}?>"
|
|
1331
|
+
end
|
|
1332
|
+
end
|
|
1333
|
+
|
|
1334
|
+
# FIXME: Is this even sane ?
|
|
1335
|
+
# * What about escaping and all that stuff ?
|
|
1336
|
+
# * Rest of the code assumes that everything is either XML or String
|
|
1337
|
+
# * There are some limitations on where one can put -s in the comment. Do not overdo.
|
|
1338
|
+
class XML_Comment
|
|
1339
|
+
def initialize(c)
|
|
1340
|
+
@c = c
|
|
1341
|
+
end
|
|
1342
|
+
def to_s
|
|
1343
|
+
"<!--#{@c}-->"
|
|
1344
|
+
end
|
|
1345
|
+
end
|
|
1346
|
+
|
|
1347
|
+
# Syntactic sugar for XML.new
|
|
1348
|
+
def xml(*args, &blk)
|
|
1349
|
+
XML.new(*args, &blk)
|
|
1350
|
+
end
|
|
1351
|
+
|
|
1352
|
+
# xml! in XML { ... } - context adds node to parent
|
|
1353
|
+
# xml! in main context prints the argument (and returns it anyway)
|
|
1354
|
+
def xml!(*args, &blk)
|
|
1355
|
+
node = xml(*args, &blk)
|
|
1356
|
+
print node
|
|
1357
|
+
node
|
|
1358
|
+
end
|
|
1359
|
+
|
|
1360
|
+
# Perl 6 is supposed to have native support for something like that.
|
|
1361
|
+
# Constructor takes multiple patterns. The object matches if they all match.
|
|
1362
|
+
#
|
|
1363
|
+
# Usage:
|
|
1364
|
+
# case foo
|
|
1365
|
+
# when all(:foo, {:color => 'blue'}, /Hello/)
|
|
1366
|
+
# print foo
|
|
1367
|
+
# end
|
|
1368
|
+
class Patterns_all
|
|
1369
|
+
def initialize(*patterns)
|
|
1370
|
+
@patterns = patterns
|
|
1371
|
+
end
|
|
1372
|
+
def ===(obj)
|
|
1373
|
+
@patterns.all?{|p| p === obj}
|
|
1374
|
+
end
|
|
1375
|
+
end
|
|
1376
|
+
|
|
1377
|
+
def all(*patterns)
|
|
1378
|
+
Patterns_all.new(*patterns)
|
|
1379
|
+
end
|
|
1380
|
+
|
|
1381
|
+
# Perl 6 is supposed to have native support for something like that.
|
|
1382
|
+
# Constructor takes multiple patterns. The object matches if they all match.
|
|
1383
|
+
#
|
|
1384
|
+
# Usage:
|
|
1385
|
+
# case foo
|
|
1386
|
+
# when all(:foo, any({:color => 'blue'}, {:color => 'red'}), /Hello/)
|
|
1387
|
+
# print foo
|
|
1388
|
+
# end
|
|
1389
|
+
class Patterns_any
|
|
1390
|
+
def initialize(*patterns)
|
|
1391
|
+
@patterns = patterns
|
|
1392
|
+
end
|
|
1393
|
+
def ===(obj)
|
|
1394
|
+
@patterns.any?{|p| p === obj}
|
|
1395
|
+
end
|
|
1396
|
+
end
|
|
1397
|
+
|
|
1398
|
+
def any(*patterns)
|
|
1399
|
+
Patterns_any.new(*patterns)
|
|
1400
|
+
end
|