axml 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,106 @@
1
+
2
+ require 'axml'
3
+
4
+ module AXML
5
+ module Autoload
6
+ class << self
7
+ # if given a name, loads the parser class name if no arg (or nil) tries
8
+ # to load a parser from the AXML::PREFERRED list, returning the first
9
+ # one that works. Sets AXML::DEFAULT[:parser] with the parser name if
10
+ # it is available and raises any warnings in AXML::WARN. Raises a
11
+ # RuntimeError if no parser is found.
12
+ def parser!(name=nil)
13
+ parser_name_to_use = nil
14
+ parser_obj = nil
15
+ if name.nil?
16
+ PREFERRED.each do |nm|
17
+ parser_obj = load(nm)
18
+ if parser_obj
19
+ parser_name_to_use = nm
20
+ break
21
+ end
22
+ end
23
+ else
24
+ parser_name_to_use = name
25
+ parser_obj = parser(name)
26
+ end
27
+ if message = WARN[parser_obj]
28
+ warn message
29
+ end
30
+ if parser_obj
31
+ AXML::DEFAULTS[:parser] = parser_name_to_use
32
+ else
33
+ STDERR.puts "NO PARSERS CURRENTLY AVAILABLE!"
34
+ STDERR.puts "INSTALL INSTRUCTIONS:"
35
+ STDERR.puts "*****************************************************"
36
+ STDERR.puts install_instructions(:all)
37
+ STDERR.puts "*****************************************************"
38
+ raise RuntimeError, "no parser currently available!"
39
+ end
40
+ parser_obj
41
+ end
42
+
43
+ # loads the parser (if available) and returns an object ( that should
44
+ # respond_to parse_io and parse_string. If the parser is not available
45
+ # returns nil.
46
+ def parser(name)
47
+ req = 'axml/' << name.to_s.gsub('_', '/')
48
+ begin
49
+ require req
50
+ const_str = AXML::CLASS_MAPPINGS[name]
51
+ if AXML.const_defined?(const_str)
52
+ AXML.const_get(const_str)
53
+ else
54
+ nil
55
+ end
56
+ rescue LoadError
57
+ nil
58
+ end
59
+ end
60
+
61
+ def install_instructions(name)
62
+ if name == :all
63
+ doublets = AXML::PREFERRED.map do |nm|
64
+ [nm, install_instructions(nm)]
65
+ end
66
+ string = ""
67
+ doublets.each do |k,v|
68
+ if v
69
+ string << '-' * k.to_s.size << "\n"
70
+ string << "#{k}\n"
71
+ string << '-' * k.to_s.size << "\n"
72
+ string << "#{v}"
73
+ end
74
+ end
75
+ string
76
+ else
77
+ case name
78
+ when :xmlparser
79
+ string = <<END
80
+ debian/ubuntu: sudo apt-get install libxml-parser-ruby1.8
81
+
82
+ cygwin:
83
+ Download the XMLParser module: http://www.yoshidam.net/Ruby.html
84
+ Build and install:
85
+ ruby extconf.rb --with-expat-lib=/usr/lib \
86
+ --with-expat-include=/usr/include
87
+ make
88
+ make site-install
89
+
90
+ windows: included in one-click-installer
91
+ END
92
+ when :libxml
93
+ string = <<END
94
+ deb/ubuntu: sudo apt-get install libxml-ruby
95
+
96
+ install as gem:
97
+ sudo gem install -r libxml-ruby
98
+
99
+ for more info: http://libxml.rubyforge.org/install.xml
100
+ END
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
data/lib/axml/el.rb ADDED
@@ -0,0 +1,208 @@
1
+
2
+ require 'axml/traverse'
3
+
4
+ module AXML ; end
5
+
6
+ AXML::El = Struct.new(:parent, :name, :attrs, :text, :children, :array_index)
7
+
8
+ class AXML::El
9
+ include Enumerable
10
+ include Traverseable
11
+
12
+ # use AXML::El::Indent.replace to swap without warning
13
+ # ["", " ", " ", " ", " ", " ", ... ]
14
+ Indent = ' '
15
+ # use AXML::El::Indentation.replace to replace w/o warning
16
+ Indentation = (0...30).to_a.map {|num| Indent*num }
17
+
18
+ # current depth
19
+ @@depth = 0
20
+
21
+ alias_method :content, :text
22
+ alias_method :content=, :text=
23
+ alias_method :kids, :children
24
+ alias_method :kids=, :children=
25
+
26
+ def [](attribute_string)
27
+ attrs[attribute_string]
28
+ end
29
+
30
+ def []=(attribute_string, value)
31
+ attrs[attribute_string] = value
32
+ end
33
+
34
+ # has text?
35
+ def text?
36
+ !!text
37
+ end
38
+
39
+ def children?
40
+ children.size > 0
41
+ end
42
+ alias_method :child?, :children?
43
+
44
+ def each(&block)
45
+ children.each do |child|
46
+ block.call(child)
47
+ end
48
+ end
49
+
50
+ # drops the current element from the list of its parents children
51
+ def drop
52
+ parent.children.delete(self)
53
+ end
54
+
55
+ def drop_child(node)
56
+ found_it = false
57
+ found_index = nil
58
+ children.each_with_index do |v,i|
59
+ if found_it
60
+ v.array_index = i - 1
61
+ end
62
+ if v.object_id == node.object_id
63
+ found_index = i
64
+ found_it = true
65
+ end
66
+ end
67
+ children.delete_at(found_index) if found_index
68
+ end
69
+
70
+ EscapeCharsRe = /['"&><]/
71
+
72
+ # returns data escaped if necessary
73
+ def escape(data)
74
+ # modified slightly from xmlsimple.rb
75
+ return data if !data.is_a?(String) || data.nil? || data == ''
76
+ result = data.dup
77
+ if EscapeCharsRe.match(data)
78
+ result.gsub!('&', '&amp;')
79
+ result.gsub!('<', '&lt;')
80
+ result.gsub!('>', '&gt;')
81
+ result.gsub!('"', '&quot;')
82
+ result.gsub!("'", '&apos;')
83
+ end
84
+ result
85
+ end
86
+
87
+ def to_s(indent=0)
88
+ attstring = ""
89
+ if attrs.size > 0
90
+ attstring = " " + attrs.collect { |k,v| "#{k}=\"#{escape(v)}\"" }.join(" ")
91
+ end
92
+ string = "#{Indentation[indent]}<#{name}#{attstring}"
93
+ if children.size > 0
94
+ string << ">"
95
+ if text?
96
+ string << escape(text)
97
+ end
98
+ string << "\n"
99
+ string << children.collect {|child| child.to_s(indent+1) }.join("")
100
+ string << "#{Indentation[indent]}</#{name}>\n"
101
+ elsif text?
102
+ string << ">" << escape(text) << "</#{name}>\n"
103
+ else
104
+ string << "/>\n"
105
+ end
106
+ string
107
+ end
108
+
109
+ def inspect
110
+ "<name='#{name}' attrs='#{attrs.inspect}' children.size=#{children.size}>"
111
+ end
112
+
113
+ # the next node
114
+ def next
115
+ parent.children[array_index+1]
116
+ end
117
+
118
+ # the first child (equivalent to children.first)
119
+ def child
120
+ children.first
121
+ end
122
+
123
+ def add_node(node)
124
+ node.array_index = children.size
125
+ children.push( node )
126
+ end
127
+
128
+ ########################################################################
129
+ # FIND and FIND_FIRST (with a little useful xpath)
130
+ ########################################################################
131
+
132
+ # Returns an array of nodes. Accepts same xpath strings as find_first.
133
+ def find(string)
134
+ (tp, name) = string.split('::')
135
+ case tp
136
+ when 'child'
137
+ find_children(name)
138
+ when 'descendant'
139
+ find_descendants(name)
140
+ when 'following-sibling'
141
+ find_following_siblings(name)
142
+ end
143
+ end
144
+
145
+ # currently must be called with descendant:: or child:: string prefix! e.g.
146
+ # "descendant::<name>" and "child::<name>" where <name> is the name of the
147
+ # node you seek)
148
+ def find_first(string)
149
+ (tp, name) = string.split('::')
150
+ case tp
151
+ when 'child'
152
+ find_first_child(name)
153
+ when 'descendant'
154
+ find_first_descendant(name)
155
+ when 'following-sibling'
156
+ find_first_following_sibling(name)
157
+ end
158
+ end
159
+
160
+ def find_descendants(name, collect_descendants=[])
161
+ children.each do |child|
162
+ collect_descendants.push(child) if child.name == name
163
+ child.find_descendants(name, collect_descendants)
164
+ end
165
+ collect_descendants
166
+ end
167
+
168
+ def find_first_descendant(name)
169
+ self.each do |child_node|
170
+ if child_node.name == name
171
+ return child_node
172
+ else
173
+ return child_node.find_first_descendant(name)
174
+ end
175
+ end
176
+ return nil
177
+ end
178
+
179
+ def find_children(name)
180
+ children.select {|v| v.name == name }
181
+ end
182
+
183
+ def find_first_child(name)
184
+ self.each do |child_node|
185
+ if child_node.name == name
186
+ return child_node
187
+ end
188
+ end
189
+ return nil
190
+ end
191
+
192
+ def find_following_siblings(name)
193
+ parent.children[(array_index+1)..-1].select {|v| v.name == name }
194
+ end
195
+
196
+ def find_first_following_sibling(name)
197
+ node = nil
198
+ parent.children[(array_index+1)..-1].each do |sibling|
199
+ if sibling.name == name
200
+ node = sibling
201
+ break
202
+ end
203
+ end
204
+ node
205
+ end
206
+
207
+ end
208
+
@@ -0,0 +1,110 @@
1
+
2
+ require 'xml/libxml'
3
+ require 'axml/traverse'
4
+
5
+ module AXML ; end
6
+
7
+ # if using AXML::LibXML, LibXML::XML::Node behavior is slightly modified from
8
+ # the default LibXML behavior:
9
+ # 1. there are no 'text' nodes. Some nodes have text, some do not
10
+ # 2. :text? returns true if the node has text (in LibXML this returns true
11
+ # if the node *is* a text node)
12
+ # 3. text
13
+ class AXML::LibXML
14
+ extend AXML
15
+ class << self
16
+
17
+ # beware that threading with different :keep_blanks values could cause
18
+ # problems since this is a global Libxml setting
19
+ def parse_io(io, opts={})
20
+ opts = AXML::DEFAULTS.merge opts
21
+ XML::Parser.default_keep_blanks = opts[:keep_blanks] if opts.key?(:keep_blanks)
22
+ XML::Parser.io(io).parse.root
23
+ end
24
+
25
+ # beware that threading with different :keep_blanks values could cause
26
+ # problems since this is a global Libxml setting
27
+ def parse_string(string, opts={})
28
+ opts = AXML::DEFAULTS.merge opts
29
+ XML::Parser.default_keep_blanks = opts[:keep_blanks] if opts.key?(:keep_blanks)
30
+ XML::Parser.string(string).parse.root
31
+ end
32
+ end
33
+ end
34
+
35
+ class LibXML::XML::Node
36
+ include Traverseable
37
+
38
+ alias_method :old_child, :child
39
+ alias_method :old_text?, :text?
40
+ alias_method :old_content, :content
41
+
42
+ def text?
43
+ self.old_child.old_text?
44
+ end
45
+
46
+ def text
47
+ cont = self.old_content
48
+ if cont == ""
49
+ nil
50
+ else
51
+ cont
52
+ end
53
+
54
+ #kid = self.old_child
55
+ #return nil unless kid
56
+ ##p(kid.methods - Object.methods)
57
+ #if kid.old_text?
58
+ # kid.to_s
59
+ #end
60
+ end
61
+ alias_method :content, :text
62
+
63
+ def child
64
+ kid = self.old_child
65
+ return nil if kid.nil?
66
+ while kid.old_text?
67
+ kid = kid.old_child
68
+ return nil if kid.nil?
69
+ end
70
+ kid
71
+ end
72
+
73
+ def find_first_child(name)
74
+ find_first("child::#{name}")
75
+ end
76
+ def find_children(name)
77
+ find("child::#{name}")
78
+ end
79
+ def find_first_descendant(name)
80
+ find_first("descendant::#{name}")
81
+ end
82
+
83
+ # full traversal from the initial node
84
+ def traverse(type=:pre, &block)
85
+ if type == :pre
86
+ block.call(self) unless self.old_text?
87
+ end
88
+ children.each do |child|
89
+ child.traverse(type, &block)
90
+ end
91
+ if type == :post
92
+ block.call(self) unless self.old_text?
93
+ end
94
+ end
95
+
96
+
97
+ alias_method :kids, :children
98
+ alias_method :attrs, :attributes
99
+ alias_method :drop, :remove!
100
+
101
+ end
102
+
103
+ class LibXML::XML::Attributes
104
+ def values_at(*args)
105
+ args.map do |arg|
106
+ self[arg]
107
+ end
108
+ end
109
+ end
110
+
@@ -0,0 +1,15 @@
1
+
2
+ module Traverseable
3
+ # full traversal from the initial node
4
+ def traverse(type=:pre, &block)
5
+ if type == :pre
6
+ block.call(self)
7
+ end
8
+ children.each do |child|
9
+ child.traverse(type, &block)
10
+ end
11
+ if type == :post
12
+ block.call(self)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,114 @@
1
+ require 'xmlparser'
2
+ require 'axml/el'
3
+
4
+ class AXML::XMLParser
5
+ extend AXML
6
+
7
+ class << self
8
+
9
+ def parse_io(io, opts=DEFAULTS)
10
+ parser = AXML::XMLParser::Parser.new
11
+ if opts[:keep_blanks] == false
12
+ parser.set_no_keep_blanks
13
+ end
14
+ if ti = opts[:text_indices]
15
+ if ti.is_a?(Array) && ti.size > 1
16
+ raise NotImplementedError, "currently only supports a single element"
17
+ else
18
+ ti =
19
+ if ti.is_a?(Array)
20
+ ti.first.to_s
21
+ else
22
+ ti.to_s
23
+ end
24
+ parser.set_single_text_indices(ti)
25
+ end
26
+ end
27
+ parser.parse(io)
28
+ parser.root
29
+ end
30
+
31
+ def parse_string(arg, opts=DEFAULTS)
32
+ parse_io(arg, opts)
33
+ end
34
+
35
+ end
36
+ end
37
+
38
+ class AXML::XMLParser::Parser < XMLParser
39
+ NotBlankText_re = /[^\s+]+/m
40
+
41
+ attr_writer :root
42
+
43
+ # returns the first node found in the document
44
+ def root
45
+ @root.child
46
+ end
47
+
48
+ def set_no_keep_blanks
49
+ instance_eval do
50
+ def endElement(name)
51
+ unless NotBlankText_re.match(@cur.text)
52
+ @cur.text = nil
53
+ end
54
+ @cur = @cur.parent
55
+ end
56
+ end
57
+ end
58
+
59
+ # returns text as an array for each occurence of the specified element: [start_index, num_bytes]
60
+ def set_single_text_indices(el_name)
61
+ @el_name = el_name
62
+ instance_eval do
63
+ def startElement(name, attributes)
64
+ text =
65
+ if name == @el_name ; []
66
+ else ; ''
67
+ end
68
+ new_el = ::AXML::El.new(@cur, name, attributes, text, [])
69
+ @cur.add_node(new_el)
70
+ @cur = new_el
71
+ end
72
+
73
+ def character(data)
74
+ if @cur.text.is_a? Array
75
+ @cur.text << byteIndex
76
+ else
77
+ @cur.text << data
78
+ end
79
+ end
80
+
81
+ tmpvar = $VERBOSE ; $VERBOSE = nil
82
+ def endElement(name)
83
+ if @cur.text.is_a? Array
84
+ @cur.text << (byteIndex - @cur.text.first)
85
+ end
86
+ @cur = @cur.parent
87
+ end
88
+ $VERBOSE = tmpvar
89
+ end
90
+ end
91
+
92
+ # takes opts from AXML::parse method
93
+ def initialize
94
+ #@keep_blanks = opts[:keep_blanks]
95
+ @root = AXML::El.new(nil, "root", {}, '', [])
96
+ @cur = @root
97
+ end
98
+
99
+ def startElement(name, attributes)
100
+ new_el = AXML::El.new(@cur, name, attributes, '', [])
101
+ @cur.add_node(new_el)
102
+ @cur = new_el
103
+ end
104
+
105
+ def character(data)
106
+ @cur.text << data
107
+ end
108
+
109
+ def endElement(name)
110
+ @cur = @cur.parent
111
+ end
112
+
113
+ end
114
+