axml 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +34 -38
- data/Rakefile +19 -61
- data/lib/axml.rb +30 -367
- data/lib/axml/autoload.rb +106 -0
- data/lib/axml/el.rb +208 -0
- data/lib/axml/libxml.rb +110 -0
- data/lib/axml/traverse.rb +15 -0
- data/lib/axml/xmlparser.rb +114 -0
- data/spec/axml/autoload_spec.rb +18 -0
- data/spec/axml/libxml_spec.rb +13 -0
- data/spec/axml/xmlparser_spec.rb +16 -0
- data/spec/axml_spec.rb +299 -0
- metadata +18 -9
@@ -0,0 +1,106 @@
|
|
1
|
+
|
2
|
+
require 'axml'
|
3
|
+
|
4
|
+
module AXML
|
5
|
+
module Autoload
|
6
|
+
class << self
|
7
|
+
# if given a name, loads the parser class name if no arg (or nil) tries
|
8
|
+
# to load a parser from the AXML::PREFERRED list, returning the first
|
9
|
+
# one that works. Sets AXML::DEFAULT[:parser] with the parser name if
|
10
|
+
# it is available and raises any warnings in AXML::WARN. Raises a
|
11
|
+
# RuntimeError if no parser is found.
|
12
|
+
def parser!(name=nil)
|
13
|
+
parser_name_to_use = nil
|
14
|
+
parser_obj = nil
|
15
|
+
if name.nil?
|
16
|
+
PREFERRED.each do |nm|
|
17
|
+
parser_obj = load(nm)
|
18
|
+
if parser_obj
|
19
|
+
parser_name_to_use = nm
|
20
|
+
break
|
21
|
+
end
|
22
|
+
end
|
23
|
+
else
|
24
|
+
parser_name_to_use = name
|
25
|
+
parser_obj = parser(name)
|
26
|
+
end
|
27
|
+
if message = WARN[parser_obj]
|
28
|
+
warn message
|
29
|
+
end
|
30
|
+
if parser_obj
|
31
|
+
AXML::DEFAULTS[:parser] = parser_name_to_use
|
32
|
+
else
|
33
|
+
STDERR.puts "NO PARSERS CURRENTLY AVAILABLE!"
|
34
|
+
STDERR.puts "INSTALL INSTRUCTIONS:"
|
35
|
+
STDERR.puts "*****************************************************"
|
36
|
+
STDERR.puts install_instructions(:all)
|
37
|
+
STDERR.puts "*****************************************************"
|
38
|
+
raise RuntimeError, "no parser currently available!"
|
39
|
+
end
|
40
|
+
parser_obj
|
41
|
+
end
|
42
|
+
|
43
|
+
# loads the parser (if available) and returns an object ( that should
|
44
|
+
# respond_to parse_io and parse_string. If the parser is not available
|
45
|
+
# returns nil.
|
46
|
+
def parser(name)
|
47
|
+
req = 'axml/' << name.to_s.gsub('_', '/')
|
48
|
+
begin
|
49
|
+
require req
|
50
|
+
const_str = AXML::CLASS_MAPPINGS[name]
|
51
|
+
if AXML.const_defined?(const_str)
|
52
|
+
AXML.const_get(const_str)
|
53
|
+
else
|
54
|
+
nil
|
55
|
+
end
|
56
|
+
rescue LoadError
|
57
|
+
nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def install_instructions(name)
|
62
|
+
if name == :all
|
63
|
+
doublets = AXML::PREFERRED.map do |nm|
|
64
|
+
[nm, install_instructions(nm)]
|
65
|
+
end
|
66
|
+
string = ""
|
67
|
+
doublets.each do |k,v|
|
68
|
+
if v
|
69
|
+
string << '-' * k.to_s.size << "\n"
|
70
|
+
string << "#{k}\n"
|
71
|
+
string << '-' * k.to_s.size << "\n"
|
72
|
+
string << "#{v}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
string
|
76
|
+
else
|
77
|
+
case name
|
78
|
+
when :xmlparser
|
79
|
+
string = <<END
|
80
|
+
debian/ubuntu: sudo apt-get install libxml-parser-ruby1.8
|
81
|
+
|
82
|
+
cygwin:
|
83
|
+
Download the XMLParser module: http://www.yoshidam.net/Ruby.html
|
84
|
+
Build and install:
|
85
|
+
ruby extconf.rb --with-expat-lib=/usr/lib \
|
86
|
+
--with-expat-include=/usr/include
|
87
|
+
make
|
88
|
+
make site-install
|
89
|
+
|
90
|
+
windows: included in one-click-installer
|
91
|
+
END
|
92
|
+
when :libxml
|
93
|
+
string = <<END
|
94
|
+
deb/ubuntu: sudo apt-get install libxml-ruby
|
95
|
+
|
96
|
+
install as gem:
|
97
|
+
sudo gem install -r libxml-ruby
|
98
|
+
|
99
|
+
for more info: http://libxml.rubyforge.org/install.xml
|
100
|
+
END
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
data/lib/axml/el.rb
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
|
2
|
+
require 'axml/traverse'
|
3
|
+
|
4
|
+
module AXML ; end
|
5
|
+
|
6
|
+
AXML::El = Struct.new(:parent, :name, :attrs, :text, :children, :array_index)
|
7
|
+
|
8
|
+
class AXML::El
|
9
|
+
include Enumerable
|
10
|
+
include Traverseable
|
11
|
+
|
12
|
+
# use AXML::El::Indent.replace to swap without warning
|
13
|
+
# ["", " ", " ", " ", " ", " ", ... ]
|
14
|
+
Indent = ' '
|
15
|
+
# use AXML::El::Indentation.replace to replace w/o warning
|
16
|
+
Indentation = (0...30).to_a.map {|num| Indent*num }
|
17
|
+
|
18
|
+
# current depth
|
19
|
+
@@depth = 0
|
20
|
+
|
21
|
+
alias_method :content, :text
|
22
|
+
alias_method :content=, :text=
|
23
|
+
alias_method :kids, :children
|
24
|
+
alias_method :kids=, :children=
|
25
|
+
|
26
|
+
def [](attribute_string)
|
27
|
+
attrs[attribute_string]
|
28
|
+
end
|
29
|
+
|
30
|
+
def []=(attribute_string, value)
|
31
|
+
attrs[attribute_string] = value
|
32
|
+
end
|
33
|
+
|
34
|
+
# has text?
|
35
|
+
def text?
|
36
|
+
!!text
|
37
|
+
end
|
38
|
+
|
39
|
+
def children?
|
40
|
+
children.size > 0
|
41
|
+
end
|
42
|
+
alias_method :child?, :children?
|
43
|
+
|
44
|
+
def each(&block)
|
45
|
+
children.each do |child|
|
46
|
+
block.call(child)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# drops the current element from the list of its parents children
|
51
|
+
def drop
|
52
|
+
parent.children.delete(self)
|
53
|
+
end
|
54
|
+
|
55
|
+
def drop_child(node)
|
56
|
+
found_it = false
|
57
|
+
found_index = nil
|
58
|
+
children.each_with_index do |v,i|
|
59
|
+
if found_it
|
60
|
+
v.array_index = i - 1
|
61
|
+
end
|
62
|
+
if v.object_id == node.object_id
|
63
|
+
found_index = i
|
64
|
+
found_it = true
|
65
|
+
end
|
66
|
+
end
|
67
|
+
children.delete_at(found_index) if found_index
|
68
|
+
end
|
69
|
+
|
70
|
+
EscapeCharsRe = /['"&><]/
|
71
|
+
|
72
|
+
# returns data escaped if necessary
|
73
|
+
def escape(data)
|
74
|
+
# modified slightly from xmlsimple.rb
|
75
|
+
return data if !data.is_a?(String) || data.nil? || data == ''
|
76
|
+
result = data.dup
|
77
|
+
if EscapeCharsRe.match(data)
|
78
|
+
result.gsub!('&', '&')
|
79
|
+
result.gsub!('<', '<')
|
80
|
+
result.gsub!('>', '>')
|
81
|
+
result.gsub!('"', '"')
|
82
|
+
result.gsub!("'", ''')
|
83
|
+
end
|
84
|
+
result
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_s(indent=0)
|
88
|
+
attstring = ""
|
89
|
+
if attrs.size > 0
|
90
|
+
attstring = " " + attrs.collect { |k,v| "#{k}=\"#{escape(v)}\"" }.join(" ")
|
91
|
+
end
|
92
|
+
string = "#{Indentation[indent]}<#{name}#{attstring}"
|
93
|
+
if children.size > 0
|
94
|
+
string << ">"
|
95
|
+
if text?
|
96
|
+
string << escape(text)
|
97
|
+
end
|
98
|
+
string << "\n"
|
99
|
+
string << children.collect {|child| child.to_s(indent+1) }.join("")
|
100
|
+
string << "#{Indentation[indent]}</#{name}>\n"
|
101
|
+
elsif text?
|
102
|
+
string << ">" << escape(text) << "</#{name}>\n"
|
103
|
+
else
|
104
|
+
string << "/>\n"
|
105
|
+
end
|
106
|
+
string
|
107
|
+
end
|
108
|
+
|
109
|
+
def inspect
|
110
|
+
"<name='#{name}' attrs='#{attrs.inspect}' children.size=#{children.size}>"
|
111
|
+
end
|
112
|
+
|
113
|
+
# the next node
|
114
|
+
def next
|
115
|
+
parent.children[array_index+1]
|
116
|
+
end
|
117
|
+
|
118
|
+
# the first child (equivalent to children.first)
|
119
|
+
def child
|
120
|
+
children.first
|
121
|
+
end
|
122
|
+
|
123
|
+
def add_node(node)
|
124
|
+
node.array_index = children.size
|
125
|
+
children.push( node )
|
126
|
+
end
|
127
|
+
|
128
|
+
########################################################################
|
129
|
+
# FIND and FIND_FIRST (with a little useful xpath)
|
130
|
+
########################################################################
|
131
|
+
|
132
|
+
# Returns an array of nodes. Accepts same xpath strings as find_first.
|
133
|
+
def find(string)
|
134
|
+
(tp, name) = string.split('::')
|
135
|
+
case tp
|
136
|
+
when 'child'
|
137
|
+
find_children(name)
|
138
|
+
when 'descendant'
|
139
|
+
find_descendants(name)
|
140
|
+
when 'following-sibling'
|
141
|
+
find_following_siblings(name)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# currently must be called with descendant:: or child:: string prefix! e.g.
|
146
|
+
# "descendant::<name>" and "child::<name>" where <name> is the name of the
|
147
|
+
# node you seek)
|
148
|
+
def find_first(string)
|
149
|
+
(tp, name) = string.split('::')
|
150
|
+
case tp
|
151
|
+
when 'child'
|
152
|
+
find_first_child(name)
|
153
|
+
when 'descendant'
|
154
|
+
find_first_descendant(name)
|
155
|
+
when 'following-sibling'
|
156
|
+
find_first_following_sibling(name)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def find_descendants(name, collect_descendants=[])
|
161
|
+
children.each do |child|
|
162
|
+
collect_descendants.push(child) if child.name == name
|
163
|
+
child.find_descendants(name, collect_descendants)
|
164
|
+
end
|
165
|
+
collect_descendants
|
166
|
+
end
|
167
|
+
|
168
|
+
def find_first_descendant(name)
|
169
|
+
self.each do |child_node|
|
170
|
+
if child_node.name == name
|
171
|
+
return child_node
|
172
|
+
else
|
173
|
+
return child_node.find_first_descendant(name)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
return nil
|
177
|
+
end
|
178
|
+
|
179
|
+
def find_children(name)
|
180
|
+
children.select {|v| v.name == name }
|
181
|
+
end
|
182
|
+
|
183
|
+
def find_first_child(name)
|
184
|
+
self.each do |child_node|
|
185
|
+
if child_node.name == name
|
186
|
+
return child_node
|
187
|
+
end
|
188
|
+
end
|
189
|
+
return nil
|
190
|
+
end
|
191
|
+
|
192
|
+
def find_following_siblings(name)
|
193
|
+
parent.children[(array_index+1)..-1].select {|v| v.name == name }
|
194
|
+
end
|
195
|
+
|
196
|
+
def find_first_following_sibling(name)
|
197
|
+
node = nil
|
198
|
+
parent.children[(array_index+1)..-1].each do |sibling|
|
199
|
+
if sibling.name == name
|
200
|
+
node = sibling
|
201
|
+
break
|
202
|
+
end
|
203
|
+
end
|
204
|
+
node
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
208
|
+
|
data/lib/axml/libxml.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
|
2
|
+
require 'xml/libxml'
|
3
|
+
require 'axml/traverse'
|
4
|
+
|
5
|
+
module AXML ; end
|
6
|
+
|
7
|
+
# if using AXML::LibXML, LibXML::XML::Node behavior is slightly modified from
|
8
|
+
# the default LibXML behavior:
|
9
|
+
# 1. there are no 'text' nodes. Some nodes have text, some do not
|
10
|
+
# 2. :text? returns true if the node has text (in LibXML this returns true
|
11
|
+
# if the node *is* a text node)
|
12
|
+
# 3. text
|
13
|
+
class AXML::LibXML
|
14
|
+
extend AXML
|
15
|
+
class << self
|
16
|
+
|
17
|
+
# beware that threading with different :keep_blanks values could cause
|
18
|
+
# problems since this is a global Libxml setting
|
19
|
+
def parse_io(io, opts={})
|
20
|
+
opts = AXML::DEFAULTS.merge opts
|
21
|
+
XML::Parser.default_keep_blanks = opts[:keep_blanks] if opts.key?(:keep_blanks)
|
22
|
+
XML::Parser.io(io).parse.root
|
23
|
+
end
|
24
|
+
|
25
|
+
# beware that threading with different :keep_blanks values could cause
|
26
|
+
# problems since this is a global Libxml setting
|
27
|
+
def parse_string(string, opts={})
|
28
|
+
opts = AXML::DEFAULTS.merge opts
|
29
|
+
XML::Parser.default_keep_blanks = opts[:keep_blanks] if opts.key?(:keep_blanks)
|
30
|
+
XML::Parser.string(string).parse.root
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class LibXML::XML::Node
|
36
|
+
include Traverseable
|
37
|
+
|
38
|
+
alias_method :old_child, :child
|
39
|
+
alias_method :old_text?, :text?
|
40
|
+
alias_method :old_content, :content
|
41
|
+
|
42
|
+
def text?
|
43
|
+
self.old_child.old_text?
|
44
|
+
end
|
45
|
+
|
46
|
+
def text
|
47
|
+
cont = self.old_content
|
48
|
+
if cont == ""
|
49
|
+
nil
|
50
|
+
else
|
51
|
+
cont
|
52
|
+
end
|
53
|
+
|
54
|
+
#kid = self.old_child
|
55
|
+
#return nil unless kid
|
56
|
+
##p(kid.methods - Object.methods)
|
57
|
+
#if kid.old_text?
|
58
|
+
# kid.to_s
|
59
|
+
#end
|
60
|
+
end
|
61
|
+
alias_method :content, :text
|
62
|
+
|
63
|
+
def child
|
64
|
+
kid = self.old_child
|
65
|
+
return nil if kid.nil?
|
66
|
+
while kid.old_text?
|
67
|
+
kid = kid.old_child
|
68
|
+
return nil if kid.nil?
|
69
|
+
end
|
70
|
+
kid
|
71
|
+
end
|
72
|
+
|
73
|
+
def find_first_child(name)
|
74
|
+
find_first("child::#{name}")
|
75
|
+
end
|
76
|
+
def find_children(name)
|
77
|
+
find("child::#{name}")
|
78
|
+
end
|
79
|
+
def find_first_descendant(name)
|
80
|
+
find_first("descendant::#{name}")
|
81
|
+
end
|
82
|
+
|
83
|
+
# full traversal from the initial node
|
84
|
+
def traverse(type=:pre, &block)
|
85
|
+
if type == :pre
|
86
|
+
block.call(self) unless self.old_text?
|
87
|
+
end
|
88
|
+
children.each do |child|
|
89
|
+
child.traverse(type, &block)
|
90
|
+
end
|
91
|
+
if type == :post
|
92
|
+
block.call(self) unless self.old_text?
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
alias_method :kids, :children
|
98
|
+
alias_method :attrs, :attributes
|
99
|
+
alias_method :drop, :remove!
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
class LibXML::XML::Attributes
|
104
|
+
def values_at(*args)
|
105
|
+
args.map do |arg|
|
106
|
+
self[arg]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
module Traverseable
|
3
|
+
# full traversal from the initial node
|
4
|
+
def traverse(type=:pre, &block)
|
5
|
+
if type == :pre
|
6
|
+
block.call(self)
|
7
|
+
end
|
8
|
+
children.each do |child|
|
9
|
+
child.traverse(type, &block)
|
10
|
+
end
|
11
|
+
if type == :post
|
12
|
+
block.call(self)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'xmlparser'
|
2
|
+
require 'axml/el'
|
3
|
+
|
4
|
+
class AXML::XMLParser
|
5
|
+
extend AXML
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def parse_io(io, opts=DEFAULTS)
|
10
|
+
parser = AXML::XMLParser::Parser.new
|
11
|
+
if opts[:keep_blanks] == false
|
12
|
+
parser.set_no_keep_blanks
|
13
|
+
end
|
14
|
+
if ti = opts[:text_indices]
|
15
|
+
if ti.is_a?(Array) && ti.size > 1
|
16
|
+
raise NotImplementedError, "currently only supports a single element"
|
17
|
+
else
|
18
|
+
ti =
|
19
|
+
if ti.is_a?(Array)
|
20
|
+
ti.first.to_s
|
21
|
+
else
|
22
|
+
ti.to_s
|
23
|
+
end
|
24
|
+
parser.set_single_text_indices(ti)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
parser.parse(io)
|
28
|
+
parser.root
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_string(arg, opts=DEFAULTS)
|
32
|
+
parse_io(arg, opts)
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class AXML::XMLParser::Parser < XMLParser
|
39
|
+
NotBlankText_re = /[^\s+]+/m
|
40
|
+
|
41
|
+
attr_writer :root
|
42
|
+
|
43
|
+
# returns the first node found in the document
|
44
|
+
def root
|
45
|
+
@root.child
|
46
|
+
end
|
47
|
+
|
48
|
+
def set_no_keep_blanks
|
49
|
+
instance_eval do
|
50
|
+
def endElement(name)
|
51
|
+
unless NotBlankText_re.match(@cur.text)
|
52
|
+
@cur.text = nil
|
53
|
+
end
|
54
|
+
@cur = @cur.parent
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# returns text as an array for each occurence of the specified element: [start_index, num_bytes]
|
60
|
+
def set_single_text_indices(el_name)
|
61
|
+
@el_name = el_name
|
62
|
+
instance_eval do
|
63
|
+
def startElement(name, attributes)
|
64
|
+
text =
|
65
|
+
if name == @el_name ; []
|
66
|
+
else ; ''
|
67
|
+
end
|
68
|
+
new_el = ::AXML::El.new(@cur, name, attributes, text, [])
|
69
|
+
@cur.add_node(new_el)
|
70
|
+
@cur = new_el
|
71
|
+
end
|
72
|
+
|
73
|
+
def character(data)
|
74
|
+
if @cur.text.is_a? Array
|
75
|
+
@cur.text << byteIndex
|
76
|
+
else
|
77
|
+
@cur.text << data
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
tmpvar = $VERBOSE ; $VERBOSE = nil
|
82
|
+
def endElement(name)
|
83
|
+
if @cur.text.is_a? Array
|
84
|
+
@cur.text << (byteIndex - @cur.text.first)
|
85
|
+
end
|
86
|
+
@cur = @cur.parent
|
87
|
+
end
|
88
|
+
$VERBOSE = tmpvar
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# takes opts from AXML::parse method
|
93
|
+
def initialize
|
94
|
+
#@keep_blanks = opts[:keep_blanks]
|
95
|
+
@root = AXML::El.new(nil, "root", {}, '', [])
|
96
|
+
@cur = @root
|
97
|
+
end
|
98
|
+
|
99
|
+
def startElement(name, attributes)
|
100
|
+
new_el = AXML::El.new(@cur, name, attributes, '', [])
|
101
|
+
@cur.add_node(new_el)
|
102
|
+
@cur = new_el
|
103
|
+
end
|
104
|
+
|
105
|
+
def character(data)
|
106
|
+
@cur.text << data
|
107
|
+
end
|
108
|
+
|
109
|
+
def endElement(name)
|
110
|
+
@cur = @cur.parent
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|