axml 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +34 -38
- data/Rakefile +19 -61
- data/lib/axml.rb +30 -367
- data/lib/axml/autoload.rb +106 -0
- data/lib/axml/el.rb +208 -0
- data/lib/axml/libxml.rb +110 -0
- data/lib/axml/traverse.rb +15 -0
- data/lib/axml/xmlparser.rb +114 -0
- data/spec/axml/autoload_spec.rb +18 -0
- data/spec/axml/libxml_spec.rb +13 -0
- data/spec/axml/xmlparser_spec.rb +16 -0
- data/spec/axml_spec.rb +299 -0
- metadata +18 -9
@@ -0,0 +1,106 @@
|
|
1
|
+
|
2
|
+
require 'axml'
|
3
|
+
|
4
|
+
module AXML
|
5
|
+
module Autoload
|
6
|
+
class << self
|
7
|
+
# if given a name, loads the parser class name if no arg (or nil) tries
|
8
|
+
# to load a parser from the AXML::PREFERRED list, returning the first
|
9
|
+
# one that works. Sets AXML::DEFAULT[:parser] with the parser name if
|
10
|
+
# it is available and raises any warnings in AXML::WARN. Raises a
|
11
|
+
# RuntimeError if no parser is found.
|
12
|
+
def parser!(name=nil)
|
13
|
+
parser_name_to_use = nil
|
14
|
+
parser_obj = nil
|
15
|
+
if name.nil?
|
16
|
+
PREFERRED.each do |nm|
|
17
|
+
parser_obj = load(nm)
|
18
|
+
if parser_obj
|
19
|
+
parser_name_to_use = nm
|
20
|
+
break
|
21
|
+
end
|
22
|
+
end
|
23
|
+
else
|
24
|
+
parser_name_to_use = name
|
25
|
+
parser_obj = parser(name)
|
26
|
+
end
|
27
|
+
if message = WARN[parser_obj]
|
28
|
+
warn message
|
29
|
+
end
|
30
|
+
if parser_obj
|
31
|
+
AXML::DEFAULTS[:parser] = parser_name_to_use
|
32
|
+
else
|
33
|
+
STDERR.puts "NO PARSERS CURRENTLY AVAILABLE!"
|
34
|
+
STDERR.puts "INSTALL INSTRUCTIONS:"
|
35
|
+
STDERR.puts "*****************************************************"
|
36
|
+
STDERR.puts install_instructions(:all)
|
37
|
+
STDERR.puts "*****************************************************"
|
38
|
+
raise RuntimeError, "no parser currently available!"
|
39
|
+
end
|
40
|
+
parser_obj
|
41
|
+
end
|
42
|
+
|
43
|
+
# loads the parser (if available) and returns an object ( that should
|
44
|
+
# respond_to parse_io and parse_string. If the parser is not available
|
45
|
+
# returns nil.
|
46
|
+
def parser(name)
|
47
|
+
req = 'axml/' << name.to_s.gsub('_', '/')
|
48
|
+
begin
|
49
|
+
require req
|
50
|
+
const_str = AXML::CLASS_MAPPINGS[name]
|
51
|
+
if AXML.const_defined?(const_str)
|
52
|
+
AXML.const_get(const_str)
|
53
|
+
else
|
54
|
+
nil
|
55
|
+
end
|
56
|
+
rescue LoadError
|
57
|
+
nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def install_instructions(name)
|
62
|
+
if name == :all
|
63
|
+
doublets = AXML::PREFERRED.map do |nm|
|
64
|
+
[nm, install_instructions(nm)]
|
65
|
+
end
|
66
|
+
string = ""
|
67
|
+
doublets.each do |k,v|
|
68
|
+
if v
|
69
|
+
string << '-' * k.to_s.size << "\n"
|
70
|
+
string << "#{k}\n"
|
71
|
+
string << '-' * k.to_s.size << "\n"
|
72
|
+
string << "#{v}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
string
|
76
|
+
else
|
77
|
+
case name
|
78
|
+
when :xmlparser
|
79
|
+
string = <<END
|
80
|
+
debian/ubuntu: sudo apt-get install libxml-parser-ruby1.8
|
81
|
+
|
82
|
+
cygwin:
|
83
|
+
Download the XMLParser module: http://www.yoshidam.net/Ruby.html
|
84
|
+
Build and install:
|
85
|
+
ruby extconf.rb --with-expat-lib=/usr/lib \
|
86
|
+
--with-expat-include=/usr/include
|
87
|
+
make
|
88
|
+
make site-install
|
89
|
+
|
90
|
+
windows: included in one-click-installer
|
91
|
+
END
|
92
|
+
when :libxml
|
93
|
+
string = <<END
|
94
|
+
deb/ubuntu: sudo apt-get install libxml-ruby
|
95
|
+
|
96
|
+
install as gem:
|
97
|
+
sudo gem install -r libxml-ruby
|
98
|
+
|
99
|
+
for more info: http://libxml.rubyforge.org/install.xml
|
100
|
+
END
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
data/lib/axml/el.rb
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
|
2
|
+
require 'axml/traverse'
|
3
|
+
|
4
|
+
module AXML ; end
|
5
|
+
|
6
|
+
AXML::El = Struct.new(:parent, :name, :attrs, :text, :children, :array_index)
|
7
|
+
|
8
|
+
class AXML::El
|
9
|
+
include Enumerable
|
10
|
+
include Traverseable
|
11
|
+
|
12
|
+
# use AXML::El::Indent.replace to swap without warning
|
13
|
+
# ["", " ", " ", " ", " ", " ", ... ]
|
14
|
+
Indent = ' '
|
15
|
+
# use AXML::El::Indentation.replace to replace w/o warning
|
16
|
+
Indentation = (0...30).to_a.map {|num| Indent*num }
|
17
|
+
|
18
|
+
# current depth
|
19
|
+
@@depth = 0
|
20
|
+
|
21
|
+
alias_method :content, :text
|
22
|
+
alias_method :content=, :text=
|
23
|
+
alias_method :kids, :children
|
24
|
+
alias_method :kids=, :children=
|
25
|
+
|
26
|
+
def [](attribute_string)
|
27
|
+
attrs[attribute_string]
|
28
|
+
end
|
29
|
+
|
30
|
+
def []=(attribute_string, value)
|
31
|
+
attrs[attribute_string] = value
|
32
|
+
end
|
33
|
+
|
34
|
+
# has text?
|
35
|
+
def text?
|
36
|
+
!!text
|
37
|
+
end
|
38
|
+
|
39
|
+
def children?
|
40
|
+
children.size > 0
|
41
|
+
end
|
42
|
+
alias_method :child?, :children?
|
43
|
+
|
44
|
+
def each(&block)
|
45
|
+
children.each do |child|
|
46
|
+
block.call(child)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# drops the current element from the list of its parents children
|
51
|
+
def drop
|
52
|
+
parent.children.delete(self)
|
53
|
+
end
|
54
|
+
|
55
|
+
def drop_child(node)
|
56
|
+
found_it = false
|
57
|
+
found_index = nil
|
58
|
+
children.each_with_index do |v,i|
|
59
|
+
if found_it
|
60
|
+
v.array_index = i - 1
|
61
|
+
end
|
62
|
+
if v.object_id == node.object_id
|
63
|
+
found_index = i
|
64
|
+
found_it = true
|
65
|
+
end
|
66
|
+
end
|
67
|
+
children.delete_at(found_index) if found_index
|
68
|
+
end
|
69
|
+
|
70
|
+
EscapeCharsRe = /['"&><]/
|
71
|
+
|
72
|
+
# returns data escaped if necessary
|
73
|
+
def escape(data)
|
74
|
+
# modified slightly from xmlsimple.rb
|
75
|
+
return data if !data.is_a?(String) || data.nil? || data == ''
|
76
|
+
result = data.dup
|
77
|
+
if EscapeCharsRe.match(data)
|
78
|
+
result.gsub!('&', '&')
|
79
|
+
result.gsub!('<', '<')
|
80
|
+
result.gsub!('>', '>')
|
81
|
+
result.gsub!('"', '"')
|
82
|
+
result.gsub!("'", ''')
|
83
|
+
end
|
84
|
+
result
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_s(indent=0)
|
88
|
+
attstring = ""
|
89
|
+
if attrs.size > 0
|
90
|
+
attstring = " " + attrs.collect { |k,v| "#{k}=\"#{escape(v)}\"" }.join(" ")
|
91
|
+
end
|
92
|
+
string = "#{Indentation[indent]}<#{name}#{attstring}"
|
93
|
+
if children.size > 0
|
94
|
+
string << ">"
|
95
|
+
if text?
|
96
|
+
string << escape(text)
|
97
|
+
end
|
98
|
+
string << "\n"
|
99
|
+
string << children.collect {|child| child.to_s(indent+1) }.join("")
|
100
|
+
string << "#{Indentation[indent]}</#{name}>\n"
|
101
|
+
elsif text?
|
102
|
+
string << ">" << escape(text) << "</#{name}>\n"
|
103
|
+
else
|
104
|
+
string << "/>\n"
|
105
|
+
end
|
106
|
+
string
|
107
|
+
end
|
108
|
+
|
109
|
+
def inspect
|
110
|
+
"<name='#{name}' attrs='#{attrs.inspect}' children.size=#{children.size}>"
|
111
|
+
end
|
112
|
+
|
113
|
+
# the next node
|
114
|
+
def next
|
115
|
+
parent.children[array_index+1]
|
116
|
+
end
|
117
|
+
|
118
|
+
# the first child (equivalent to children.first)
|
119
|
+
def child
|
120
|
+
children.first
|
121
|
+
end
|
122
|
+
|
123
|
+
def add_node(node)
|
124
|
+
node.array_index = children.size
|
125
|
+
children.push( node )
|
126
|
+
end
|
127
|
+
|
128
|
+
########################################################################
|
129
|
+
# FIND and FIND_FIRST (with a little useful xpath)
|
130
|
+
########################################################################
|
131
|
+
|
132
|
+
# Returns an array of nodes. Accepts same xpath strings as find_first.
|
133
|
+
def find(string)
|
134
|
+
(tp, name) = string.split('::')
|
135
|
+
case tp
|
136
|
+
when 'child'
|
137
|
+
find_children(name)
|
138
|
+
when 'descendant'
|
139
|
+
find_descendants(name)
|
140
|
+
when 'following-sibling'
|
141
|
+
find_following_siblings(name)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# currently must be called with descendant:: or child:: string prefix! e.g.
|
146
|
+
# "descendant::<name>" and "child::<name>" where <name> is the name of the
|
147
|
+
# node you seek)
|
148
|
+
def find_first(string)
|
149
|
+
(tp, name) = string.split('::')
|
150
|
+
case tp
|
151
|
+
when 'child'
|
152
|
+
find_first_child(name)
|
153
|
+
when 'descendant'
|
154
|
+
find_first_descendant(name)
|
155
|
+
when 'following-sibling'
|
156
|
+
find_first_following_sibling(name)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def find_descendants(name, collect_descendants=[])
|
161
|
+
children.each do |child|
|
162
|
+
collect_descendants.push(child) if child.name == name
|
163
|
+
child.find_descendants(name, collect_descendants)
|
164
|
+
end
|
165
|
+
collect_descendants
|
166
|
+
end
|
167
|
+
|
168
|
+
def find_first_descendant(name)
|
169
|
+
self.each do |child_node|
|
170
|
+
if child_node.name == name
|
171
|
+
return child_node
|
172
|
+
else
|
173
|
+
return child_node.find_first_descendant(name)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
return nil
|
177
|
+
end
|
178
|
+
|
179
|
+
def find_children(name)
|
180
|
+
children.select {|v| v.name == name }
|
181
|
+
end
|
182
|
+
|
183
|
+
def find_first_child(name)
|
184
|
+
self.each do |child_node|
|
185
|
+
if child_node.name == name
|
186
|
+
return child_node
|
187
|
+
end
|
188
|
+
end
|
189
|
+
return nil
|
190
|
+
end
|
191
|
+
|
192
|
+
def find_following_siblings(name)
|
193
|
+
parent.children[(array_index+1)..-1].select {|v| v.name == name }
|
194
|
+
end
|
195
|
+
|
196
|
+
def find_first_following_sibling(name)
|
197
|
+
node = nil
|
198
|
+
parent.children[(array_index+1)..-1].each do |sibling|
|
199
|
+
if sibling.name == name
|
200
|
+
node = sibling
|
201
|
+
break
|
202
|
+
end
|
203
|
+
end
|
204
|
+
node
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
208
|
+
|
data/lib/axml/libxml.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
|
2
|
+
require 'xml/libxml'
|
3
|
+
require 'axml/traverse'
|
4
|
+
|
5
|
+
module AXML ; end
|
6
|
+
|
7
|
+
# if using AXML::LibXML, LibXML::XML::Node behavior is slightly modified from
|
8
|
+
# the default LibXML behavior:
|
9
|
+
# 1. there are no 'text' nodes. Some nodes have text, some do not
|
10
|
+
# 2. :text? returns true if the node has text (in LibXML this returns true
|
11
|
+
# if the node *is* a text node)
|
12
|
+
# 3. text
|
13
|
+
class AXML::LibXML
|
14
|
+
extend AXML
|
15
|
+
class << self
|
16
|
+
|
17
|
+
# beware that threading with different :keep_blanks values could cause
|
18
|
+
# problems since this is a global Libxml setting
|
19
|
+
def parse_io(io, opts={})
|
20
|
+
opts = AXML::DEFAULTS.merge opts
|
21
|
+
XML::Parser.default_keep_blanks = opts[:keep_blanks] if opts.key?(:keep_blanks)
|
22
|
+
XML::Parser.io(io).parse.root
|
23
|
+
end
|
24
|
+
|
25
|
+
# beware that threading with different :keep_blanks values could cause
|
26
|
+
# problems since this is a global Libxml setting
|
27
|
+
def parse_string(string, opts={})
|
28
|
+
opts = AXML::DEFAULTS.merge opts
|
29
|
+
XML::Parser.default_keep_blanks = opts[:keep_blanks] if opts.key?(:keep_blanks)
|
30
|
+
XML::Parser.string(string).parse.root
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class LibXML::XML::Node
|
36
|
+
include Traverseable
|
37
|
+
|
38
|
+
alias_method :old_child, :child
|
39
|
+
alias_method :old_text?, :text?
|
40
|
+
alias_method :old_content, :content
|
41
|
+
|
42
|
+
def text?
|
43
|
+
self.old_child.old_text?
|
44
|
+
end
|
45
|
+
|
46
|
+
def text
|
47
|
+
cont = self.old_content
|
48
|
+
if cont == ""
|
49
|
+
nil
|
50
|
+
else
|
51
|
+
cont
|
52
|
+
end
|
53
|
+
|
54
|
+
#kid = self.old_child
|
55
|
+
#return nil unless kid
|
56
|
+
##p(kid.methods - Object.methods)
|
57
|
+
#if kid.old_text?
|
58
|
+
# kid.to_s
|
59
|
+
#end
|
60
|
+
end
|
61
|
+
alias_method :content, :text
|
62
|
+
|
63
|
+
def child
|
64
|
+
kid = self.old_child
|
65
|
+
return nil if kid.nil?
|
66
|
+
while kid.old_text?
|
67
|
+
kid = kid.old_child
|
68
|
+
return nil if kid.nil?
|
69
|
+
end
|
70
|
+
kid
|
71
|
+
end
|
72
|
+
|
73
|
+
def find_first_child(name)
|
74
|
+
find_first("child::#{name}")
|
75
|
+
end
|
76
|
+
def find_children(name)
|
77
|
+
find("child::#{name}")
|
78
|
+
end
|
79
|
+
def find_first_descendant(name)
|
80
|
+
find_first("descendant::#{name}")
|
81
|
+
end
|
82
|
+
|
83
|
+
# full traversal from the initial node
|
84
|
+
def traverse(type=:pre, &block)
|
85
|
+
if type == :pre
|
86
|
+
block.call(self) unless self.old_text?
|
87
|
+
end
|
88
|
+
children.each do |child|
|
89
|
+
child.traverse(type, &block)
|
90
|
+
end
|
91
|
+
if type == :post
|
92
|
+
block.call(self) unless self.old_text?
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
alias_method :kids, :children
|
98
|
+
alias_method :attrs, :attributes
|
99
|
+
alias_method :drop, :remove!
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
class LibXML::XML::Attributes
|
104
|
+
def values_at(*args)
|
105
|
+
args.map do |arg|
|
106
|
+
self[arg]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
module Traverseable
|
3
|
+
# full traversal from the initial node
|
4
|
+
def traverse(type=:pre, &block)
|
5
|
+
if type == :pre
|
6
|
+
block.call(self)
|
7
|
+
end
|
8
|
+
children.each do |child|
|
9
|
+
child.traverse(type, &block)
|
10
|
+
end
|
11
|
+
if type == :post
|
12
|
+
block.call(self)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'xmlparser'
|
2
|
+
require 'axml/el'
|
3
|
+
|
4
|
+
class AXML::XMLParser
|
5
|
+
extend AXML
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def parse_io(io, opts=DEFAULTS)
|
10
|
+
parser = AXML::XMLParser::Parser.new
|
11
|
+
if opts[:keep_blanks] == false
|
12
|
+
parser.set_no_keep_blanks
|
13
|
+
end
|
14
|
+
if ti = opts[:text_indices]
|
15
|
+
if ti.is_a?(Array) && ti.size > 1
|
16
|
+
raise NotImplementedError, "currently only supports a single element"
|
17
|
+
else
|
18
|
+
ti =
|
19
|
+
if ti.is_a?(Array)
|
20
|
+
ti.first.to_s
|
21
|
+
else
|
22
|
+
ti.to_s
|
23
|
+
end
|
24
|
+
parser.set_single_text_indices(ti)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
parser.parse(io)
|
28
|
+
parser.root
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_string(arg, opts=DEFAULTS)
|
32
|
+
parse_io(arg, opts)
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class AXML::XMLParser::Parser < XMLParser
|
39
|
+
NotBlankText_re = /[^\s+]+/m
|
40
|
+
|
41
|
+
attr_writer :root
|
42
|
+
|
43
|
+
# returns the first node found in the document
|
44
|
+
def root
|
45
|
+
@root.child
|
46
|
+
end
|
47
|
+
|
48
|
+
def set_no_keep_blanks
|
49
|
+
instance_eval do
|
50
|
+
def endElement(name)
|
51
|
+
unless NotBlankText_re.match(@cur.text)
|
52
|
+
@cur.text = nil
|
53
|
+
end
|
54
|
+
@cur = @cur.parent
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# returns text as an array for each occurence of the specified element: [start_index, num_bytes]
|
60
|
+
def set_single_text_indices(el_name)
|
61
|
+
@el_name = el_name
|
62
|
+
instance_eval do
|
63
|
+
def startElement(name, attributes)
|
64
|
+
text =
|
65
|
+
if name == @el_name ; []
|
66
|
+
else ; ''
|
67
|
+
end
|
68
|
+
new_el = ::AXML::El.new(@cur, name, attributes, text, [])
|
69
|
+
@cur.add_node(new_el)
|
70
|
+
@cur = new_el
|
71
|
+
end
|
72
|
+
|
73
|
+
def character(data)
|
74
|
+
if @cur.text.is_a? Array
|
75
|
+
@cur.text << byteIndex
|
76
|
+
else
|
77
|
+
@cur.text << data
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
tmpvar = $VERBOSE ; $VERBOSE = nil
|
82
|
+
def endElement(name)
|
83
|
+
if @cur.text.is_a? Array
|
84
|
+
@cur.text << (byteIndex - @cur.text.first)
|
85
|
+
end
|
86
|
+
@cur = @cur.parent
|
87
|
+
end
|
88
|
+
$VERBOSE = tmpvar
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# takes opts from AXML::parse method
|
93
|
+
def initialize
|
94
|
+
#@keep_blanks = opts[:keep_blanks]
|
95
|
+
@root = AXML::El.new(nil, "root", {}, '', [])
|
96
|
+
@cur = @root
|
97
|
+
end
|
98
|
+
|
99
|
+
def startElement(name, attributes)
|
100
|
+
new_el = AXML::El.new(@cur, name, attributes, '', [])
|
101
|
+
@cur.add_node(new_el)
|
102
|
+
@cur = new_el
|
103
|
+
end
|
104
|
+
|
105
|
+
def character(data)
|
106
|
+
@cur.text << data
|
107
|
+
end
|
108
|
+
|
109
|
+
def endElement(name)
|
110
|
+
@cur = @cur.parent
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|