axml 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,106 @@
1
+
2
+ require 'axml'
3
+
4
+ module AXML
5
+ module Autoload
6
+ class << self
7
+ # if given a name, loads the parser class name if no arg (or nil) tries
8
+ # to load a parser from the AXML::PREFERRED list, returning the first
9
+ # one that works. Sets AXML::DEFAULT[:parser] with the parser name if
10
+ # it is available and raises any warnings in AXML::WARN. Raises a
11
+ # RuntimeError if no parser is found.
12
+ def parser!(name=nil)
13
+ parser_name_to_use = nil
14
+ parser_obj = nil
15
+ if name.nil?
16
+ PREFERRED.each do |nm|
17
+ parser_obj = load(nm)
18
+ if parser_obj
19
+ parser_name_to_use = nm
20
+ break
21
+ end
22
+ end
23
+ else
24
+ parser_name_to_use = name
25
+ parser_obj = parser(name)
26
+ end
27
+ if message = WARN[parser_obj]
28
+ warn message
29
+ end
30
+ if parser_obj
31
+ AXML::DEFAULTS[:parser] = parser_name_to_use
32
+ else
33
+ STDERR.puts "NO PARSERS CURRENTLY AVAILABLE!"
34
+ STDERR.puts "INSTALL INSTRUCTIONS:"
35
+ STDERR.puts "*****************************************************"
36
+ STDERR.puts install_instructions(:all)
37
+ STDERR.puts "*****************************************************"
38
+ raise RuntimeError, "no parser currently available!"
39
+ end
40
+ parser_obj
41
+ end
42
+
43
+ # loads the parser (if available) and returns an object ( that should
44
+ # respond_to parse_io and parse_string. If the parser is not available
45
+ # returns nil.
46
+ def parser(name)
47
+ req = 'axml/' << name.to_s.gsub('_', '/')
48
+ begin
49
+ require req
50
+ const_str = AXML::CLASS_MAPPINGS[name]
51
+ if AXML.const_defined?(const_str)
52
+ AXML.const_get(const_str)
53
+ else
54
+ nil
55
+ end
56
+ rescue LoadError
57
+ nil
58
+ end
59
+ end
60
+
61
+ def install_instructions(name)
62
+ if name == :all
63
+ doublets = AXML::PREFERRED.map do |nm|
64
+ [nm, install_instructions(nm)]
65
+ end
66
+ string = ""
67
+ doublets.each do |k,v|
68
+ if v
69
+ string << '-' * k.to_s.size << "\n"
70
+ string << "#{k}\n"
71
+ string << '-' * k.to_s.size << "\n"
72
+ string << "#{v}"
73
+ end
74
+ end
75
+ string
76
+ else
77
+ case name
78
+ when :xmlparser
79
+ string = <<END
80
+ debian/ubuntu: sudo apt-get install libxml-parser-ruby1.8
81
+
82
+ cygwin:
83
+ Download the XMLParser module: http://www.yoshidam.net/Ruby.html
84
+ Build and install:
85
+ ruby extconf.rb --with-expat-lib=/usr/lib \
86
+ --with-expat-include=/usr/include
87
+ make
88
+ make site-install
89
+
90
+ windows: included in one-click-installer
91
+ END
92
+ when :libxml
93
+ string = <<END
94
+ deb/ubuntu: sudo apt-get install libxml-ruby
95
+
96
+ install as gem:
97
+ sudo gem install -r libxml-ruby
98
+
99
+ for more info: http://libxml.rubyforge.org/install.xml
100
+ END
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
data/lib/axml/el.rb ADDED
@@ -0,0 +1,208 @@
1
+
2
+ require 'axml/traverse'
3
+
4
+ module AXML ; end
5
+
6
+ AXML::El = Struct.new(:parent, :name, :attrs, :text, :children, :array_index)
7
+
8
+ class AXML::El
9
+ include Enumerable
10
+ include Traverseable
11
+
12
+ # use AXML::El::Indent.replace to swap without warning
13
+ # ["", " ", " ", " ", " ", " ", ... ]
14
+ Indent = ' '
15
+ # use AXML::El::Indentation.replace to replace w/o warning
16
+ Indentation = (0...30).to_a.map {|num| Indent*num }
17
+
18
+ # current depth
19
+ @@depth = 0
20
+
21
+ alias_method :content, :text
22
+ alias_method :content=, :text=
23
+ alias_method :kids, :children
24
+ alias_method :kids=, :children=
25
+
26
+ def [](attribute_string)
27
+ attrs[attribute_string]
28
+ end
29
+
30
+ def []=(attribute_string, value)
31
+ attrs[attribute_string] = value
32
+ end
33
+
34
+ # has text?
35
+ def text?
36
+ !!text
37
+ end
38
+
39
+ def children?
40
+ children.size > 0
41
+ end
42
+ alias_method :child?, :children?
43
+
44
+ def each(&block)
45
+ children.each do |child|
46
+ block.call(child)
47
+ end
48
+ end
49
+
50
+ # drops the current element from the list of its parents children
51
+ def drop
52
+ parent.children.delete(self)
53
+ end
54
+
55
+ def drop_child(node)
56
+ found_it = false
57
+ found_index = nil
58
+ children.each_with_index do |v,i|
59
+ if found_it
60
+ v.array_index = i - 1
61
+ end
62
+ if v.object_id == node.object_id
63
+ found_index = i
64
+ found_it = true
65
+ end
66
+ end
67
+ children.delete_at(found_index) if found_index
68
+ end
69
+
70
+ EscapeCharsRe = /['"&><]/
71
+
72
+ # returns data escaped if necessary
73
+ def escape(data)
74
+ # modified slightly from xmlsimple.rb
75
+ return data if !data.is_a?(String) || data.nil? || data == ''
76
+ result = data.dup
77
+ if EscapeCharsRe.match(data)
78
+ result.gsub!('&', '&amp;')
79
+ result.gsub!('<', '&lt;')
80
+ result.gsub!('>', '&gt;')
81
+ result.gsub!('"', '&quot;')
82
+ result.gsub!("'", '&apos;')
83
+ end
84
+ result
85
+ end
86
+
87
+ def to_s(indent=0)
88
+ attstring = ""
89
+ if attrs.size > 0
90
+ attstring = " " + attrs.collect { |k,v| "#{k}=\"#{escape(v)}\"" }.join(" ")
91
+ end
92
+ string = "#{Indentation[indent]}<#{name}#{attstring}"
93
+ if children.size > 0
94
+ string << ">"
95
+ if text?
96
+ string << escape(text)
97
+ end
98
+ string << "\n"
99
+ string << children.collect {|child| child.to_s(indent+1) }.join("")
100
+ string << "#{Indentation[indent]}</#{name}>\n"
101
+ elsif text?
102
+ string << ">" << escape(text) << "</#{name}>\n"
103
+ else
104
+ string << "/>\n"
105
+ end
106
+ string
107
+ end
108
+
109
+ def inspect
110
+ "<name='#{name}' attrs='#{attrs.inspect}' children.size=#{children.size}>"
111
+ end
112
+
113
+ # the next node
114
+ def next
115
+ parent.children[array_index+1]
116
+ end
117
+
118
+ # the first child (equivalent to children.first)
119
+ def child
120
+ children.first
121
+ end
122
+
123
+ def add_node(node)
124
+ node.array_index = children.size
125
+ children.push( node )
126
+ end
127
+
128
+ ########################################################################
129
+ # FIND and FIND_FIRST (with a little useful xpath)
130
+ ########################################################################
131
+
132
+ # Returns an array of nodes. Accepts same xpath strings as find_first.
133
+ def find(string)
134
+ (tp, name) = string.split('::')
135
+ case tp
136
+ when 'child'
137
+ find_children(name)
138
+ when 'descendant'
139
+ find_descendants(name)
140
+ when 'following-sibling'
141
+ find_following_siblings(name)
142
+ end
143
+ end
144
+
145
+ # currently must be called with descendant:: or child:: string prefix! e.g.
146
+ # "descendant::<name>" and "child::<name>" where <name> is the name of the
147
+ # node you seek)
148
+ def find_first(string)
149
+ (tp, name) = string.split('::')
150
+ case tp
151
+ when 'child'
152
+ find_first_child(name)
153
+ when 'descendant'
154
+ find_first_descendant(name)
155
+ when 'following-sibling'
156
+ find_first_following_sibling(name)
157
+ end
158
+ end
159
+
160
+ def find_descendants(name, collect_descendants=[])
161
+ children.each do |child|
162
+ collect_descendants.push(child) if child.name == name
163
+ child.find_descendants(name, collect_descendants)
164
+ end
165
+ collect_descendants
166
+ end
167
+
168
+ def find_first_descendant(name)
169
+ self.each do |child_node|
170
+ if child_node.name == name
171
+ return child_node
172
+ else
173
+ return child_node.find_first_descendant(name)
174
+ end
175
+ end
176
+ return nil
177
+ end
178
+
179
+ def find_children(name)
180
+ children.select {|v| v.name == name }
181
+ end
182
+
183
+ def find_first_child(name)
184
+ self.each do |child_node|
185
+ if child_node.name == name
186
+ return child_node
187
+ end
188
+ end
189
+ return nil
190
+ end
191
+
192
+ def find_following_siblings(name)
193
+ parent.children[(array_index+1)..-1].select {|v| v.name == name }
194
+ end
195
+
196
+ def find_first_following_sibling(name)
197
+ node = nil
198
+ parent.children[(array_index+1)..-1].each do |sibling|
199
+ if sibling.name == name
200
+ node = sibling
201
+ break
202
+ end
203
+ end
204
+ node
205
+ end
206
+
207
+ end
208
+
@@ -0,0 +1,110 @@
1
+
2
+ require 'xml/libxml'
3
+ require 'axml/traverse'
4
+
5
+ module AXML ; end
6
+
7
+ # if using AXML::LibXML, LibXML::XML::Node behavior is slightly modified from
8
+ # the default LibXML behavior:
9
+ # 1. there are no 'text' nodes. Some nodes have text, some do not
10
+ # 2. :text? returns true if the node has text (in LibXML this returns true
11
+ # if the node *is* a text node)
12
+ # 3. text
13
+ class AXML::LibXML
14
+ extend AXML
15
+ class << self
16
+
17
+ # beware that threading with different :keep_blanks values could cause
18
+ # problems since this is a global Libxml setting
19
+ def parse_io(io, opts={})
20
+ opts = AXML::DEFAULTS.merge opts
21
+ XML::Parser.default_keep_blanks = opts[:keep_blanks] if opts.key?(:keep_blanks)
22
+ XML::Parser.io(io).parse.root
23
+ end
24
+
25
+ # beware that threading with different :keep_blanks values could cause
26
+ # problems since this is a global Libxml setting
27
+ def parse_string(string, opts={})
28
+ opts = AXML::DEFAULTS.merge opts
29
+ XML::Parser.default_keep_blanks = opts[:keep_blanks] if opts.key?(:keep_blanks)
30
+ XML::Parser.string(string).parse.root
31
+ end
32
+ end
33
+ end
34
+
35
+ class LibXML::XML::Node
36
+ include Traverseable
37
+
38
+ alias_method :old_child, :child
39
+ alias_method :old_text?, :text?
40
+ alias_method :old_content, :content
41
+
42
+ def text?
43
+ self.old_child.old_text?
44
+ end
45
+
46
+ def text
47
+ cont = self.old_content
48
+ if cont == ""
49
+ nil
50
+ else
51
+ cont
52
+ end
53
+
54
+ #kid = self.old_child
55
+ #return nil unless kid
56
+ ##p(kid.methods - Object.methods)
57
+ #if kid.old_text?
58
+ # kid.to_s
59
+ #end
60
+ end
61
+ alias_method :content, :text
62
+
63
+ def child
64
+ kid = self.old_child
65
+ return nil if kid.nil?
66
+ while kid.old_text?
67
+ kid = kid.old_child
68
+ return nil if kid.nil?
69
+ end
70
+ kid
71
+ end
72
+
73
+ def find_first_child(name)
74
+ find_first("child::#{name}")
75
+ end
76
+ def find_children(name)
77
+ find("child::#{name}")
78
+ end
79
+ def find_first_descendant(name)
80
+ find_first("descendant::#{name}")
81
+ end
82
+
83
+ # full traversal from the initial node
84
+ def traverse(type=:pre, &block)
85
+ if type == :pre
86
+ block.call(self) unless self.old_text?
87
+ end
88
+ children.each do |child|
89
+ child.traverse(type, &block)
90
+ end
91
+ if type == :post
92
+ block.call(self) unless self.old_text?
93
+ end
94
+ end
95
+
96
+
97
+ alias_method :kids, :children
98
+ alias_method :attrs, :attributes
99
+ alias_method :drop, :remove!
100
+
101
+ end
102
+
103
+ class LibXML::XML::Attributes
104
+ def values_at(*args)
105
+ args.map do |arg|
106
+ self[arg]
107
+ end
108
+ end
109
+ end
110
+
@@ -0,0 +1,15 @@
1
+
2
+ module Traverseable
3
+ # full traversal from the initial node
4
+ def traverse(type=:pre, &block)
5
+ if type == :pre
6
+ block.call(self)
7
+ end
8
+ children.each do |child|
9
+ child.traverse(type, &block)
10
+ end
11
+ if type == :post
12
+ block.call(self)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,114 @@
1
+ require 'xmlparser'
2
+ require 'axml/el'
3
+
4
+ class AXML::XMLParser
5
+ extend AXML
6
+
7
+ class << self
8
+
9
+ def parse_io(io, opts=DEFAULTS)
10
+ parser = AXML::XMLParser::Parser.new
11
+ if opts[:keep_blanks] == false
12
+ parser.set_no_keep_blanks
13
+ end
14
+ if ti = opts[:text_indices]
15
+ if ti.is_a?(Array) && ti.size > 1
16
+ raise NotImplementedError, "currently only supports a single element"
17
+ else
18
+ ti =
19
+ if ti.is_a?(Array)
20
+ ti.first.to_s
21
+ else
22
+ ti.to_s
23
+ end
24
+ parser.set_single_text_indices(ti)
25
+ end
26
+ end
27
+ parser.parse(io)
28
+ parser.root
29
+ end
30
+
31
+ def parse_string(arg, opts=DEFAULTS)
32
+ parse_io(arg, opts)
33
+ end
34
+
35
+ end
36
+ end
37
+
38
+ class AXML::XMLParser::Parser < XMLParser
39
+ NotBlankText_re = /[^\s+]+/m
40
+
41
+ attr_writer :root
42
+
43
+ # returns the first node found in the document
44
+ def root
45
+ @root.child
46
+ end
47
+
48
+ def set_no_keep_blanks
49
+ instance_eval do
50
+ def endElement(name)
51
+ unless NotBlankText_re.match(@cur.text)
52
+ @cur.text = nil
53
+ end
54
+ @cur = @cur.parent
55
+ end
56
+ end
57
+ end
58
+
59
+ # returns text as an array for each occurence of the specified element: [start_index, num_bytes]
60
+ def set_single_text_indices(el_name)
61
+ @el_name = el_name
62
+ instance_eval do
63
+ def startElement(name, attributes)
64
+ text =
65
+ if name == @el_name ; []
66
+ else ; ''
67
+ end
68
+ new_el = ::AXML::El.new(@cur, name, attributes, text, [])
69
+ @cur.add_node(new_el)
70
+ @cur = new_el
71
+ end
72
+
73
+ def character(data)
74
+ if @cur.text.is_a? Array
75
+ @cur.text << byteIndex
76
+ else
77
+ @cur.text << data
78
+ end
79
+ end
80
+
81
+ tmpvar = $VERBOSE ; $VERBOSE = nil
82
+ def endElement(name)
83
+ if @cur.text.is_a? Array
84
+ @cur.text << (byteIndex - @cur.text.first)
85
+ end
86
+ @cur = @cur.parent
87
+ end
88
+ $VERBOSE = tmpvar
89
+ end
90
+ end
91
+
92
+ # takes opts from AXML::parse method
93
+ def initialize
94
+ #@keep_blanks = opts[:keep_blanks]
95
+ @root = AXML::El.new(nil, "root", {}, '', [])
96
+ @cur = @root
97
+ end
98
+
99
+ def startElement(name, attributes)
100
+ new_el = AXML::El.new(@cur, name, attributes, '', [])
101
+ @cur.add_node(new_el)
102
+ @cur = new_el
103
+ end
104
+
105
+ def character(data)
106
+ @cur.text << data
107
+ end
108
+
109
+ def endElement(name)
110
+ @cur = @cur.parent
111
+ end
112
+
113
+ end
114
+