axml 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +79 -49
- data/Rakefile +58 -29
- data/lib/axml.rb +237 -88
- metadata +41 -36
- data/specs/axml_spec.rb +0 -222
- data/specs/spec_helper.rb +0 -57
data/README
CHANGED
@@ -2,78 +2,108 @@ AXML
|
|
2
2
|
====
|
3
3
|
|
4
4
|
AXML - Provides a simple DOM for working with XML (using XMLParser under the
|
5
|
-
hood) that can serve as a drop in replacement for
|
6
|
-
functionality (e.g., each, children, child, find_first, find, next).
|
7
|
-
comes from the idea that XML should be AXED (or at least simple to use!)
|
8
|
-
to use).
|
5
|
+
hood) that can serve as a drop in replacement for a subset of basic libxml
|
6
|
+
functionality (e.g., each, children, child, find_first, find, next).
|
9
7
|
|
10
|
-
|
11
|
-
|
8
|
+
'AXML' means 'ax XML' which succinctly describes the occasional feeling of a
|
9
|
+
programmer towards XML or its myriad parsers. AXML won't solve all your
|
10
|
+
problems, but it does make working with XML much less painful.
|
11
|
+
|
12
|
+
Features
|
13
|
+
--------
|
14
|
+
|
15
|
+
* *fast*: it's implemented in XMLParser (expat under the hood)
|
16
|
+
* *lean*: as in 'lines of code' (~220 w/ blank lines) and as in 'memory consumption' (nodes implemented as Struct, children in Array)
|
17
|
+
* *easy to extend*: code your Grandmother could read and understand (if she reads ruby)
|
18
|
+
* *quacks like libxml*: implements a very useful subset of libxml methods for near drop in replacement.
|
12
19
|
|
13
|
-
Parses elements, attributes, and text(content), and nothing more. Should be
|
14
|
-
very easy to extend and modify for special cases. It is roughly as fast as
|
15
|
-
libxml, although it currently reads in the entire document first (however,
|
16
|
-
this is memory efficient - nodes are implemented using Struct).
|
17
20
|
|
18
21
|
Examples
|
19
22
|
--------
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
23
|
+
|
24
|
+
require 'axml' # currently requires 'xmlparser' be installed
|
25
|
+
# Windows: already in one-click-installer
|
26
|
+
# Ubuntu: sudo apt-get install libxml-parser-ruby1.8
|
27
|
+
# Cygwin: see http://mspire.rubyforge.org/tutorial/cygwin_mspire.html
|
28
|
+
|
29
|
+
# a little example xml string to use
|
30
|
+
string_or_io = "
|
31
|
+
<n1>
|
32
|
+
<n2 size='big'>
|
33
|
+
<n3>words here</n3>
|
34
|
+
<n3></n3>
|
35
|
+
</n2>
|
36
|
+
<n2 size='small'>
|
37
|
+
<n3 id='3'></n3>
|
38
|
+
</n2>
|
39
|
+
</n1>
|
40
|
+
"
|
41
|
+
|
42
|
+
### Read a string or io
|
43
|
+
|
44
|
+
n1_node = AXML.parse(string_or_io)
|
45
|
+
|
46
|
+
### Read a file
|
47
|
+
|
48
|
+
n1_node = AXML.parse_file('path/to/file')
|
49
|
+
|
50
|
+
### Access children
|
51
|
+
|
39
52
|
n1_node.children # -> [array]
|
40
53
|
n1_node.each {|child| # do something with child }
|
41
54
|
|
42
|
-
|
55
|
+
### Get attributes and text
|
56
|
+
|
43
57
|
n2_node['size'] == 'big'
|
44
58
|
n3_node = n2_node.child
|
45
59
|
n3_node.text # -> 'words here'
|
46
60
|
n3_node.content # -> [same]
|
47
61
|
|
48
|
-
|
62
|
+
### Traverse nodes with next and child
|
63
|
+
|
49
64
|
n2_node = n1_node.child
|
50
65
|
the_other_n2_node = n2_node.next
|
51
66
|
the_other_n2_node.next = nil
|
52
67
|
|
53
|
-
|
68
|
+
### Does a little xpath
|
69
|
+
|
70
|
+
# find_first (returns the first node)
|
54
71
|
n3_node = n1_node.find_first('descendant::n3')
|
72
|
+
other_n3_node = n3_node.find_first('following-sibling::n3')
|
55
73
|
n1_node.find_first('child::n3') # -> nil
|
74
|
+
|
75
|
+
# find (returns an array)
|
56
76
|
n1_node.find('descendant::n3') # -> [array of all 3 <n3> nodes]
|
57
77
|
n1_node.find('child::n2') # -> [array of 2 <n2> nodes]
|
58
78
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
79
|
+
### Switch to libxml
|
80
|
+
|
81
|
+
This is all it takes to get all of the above code to work under libxml:
|
82
|
+
|
83
|
+
require 'xml/libxml' # instead of: require 'axml'
|
84
|
+
|
85
|
+
# A file
|
86
|
+
REPLACE: n1_node = AXML.parse_file(file)
|
87
|
+
WITH: n1_node = XML::Document.file(file).root # note the .root call on the end!
|
88
|
+
|
89
|
+
# A string
|
90
|
+
REPLACE: n1_node = AXML.parse(string)
|
91
|
+
WITH: n1_node = XML::Parser.string(string).parse.root # note the .root call on the end!
|
92
|
+
|
93
|
+
Wallah! All the above method calls work under libxml
|
94
|
+
|
95
|
+
|
96
|
+
See `specs/axml_spec.rb` for more examples and functionality
|
97
|
+
|
98
|
+
Detailed Description
|
99
|
+
--------------------
|
100
|
+
|
101
|
+
Parses elements, attributes, and text(content), and nothing more. Should be
|
102
|
+
very easy to extend and modify for special cases. It is roughly as fast as
|
103
|
+
libxml, although it currently reads in the entire document first (however,
|
104
|
+
this is memory efficient - nodes are implemented using Struct).
|
74
105
|
|
75
106
|
Installation
|
76
107
|
------------
|
77
108
|
|
78
|
-
gem install axml
|
79
|
-
|
109
|
+
gem install axml
|
data/Rakefile
CHANGED
@@ -4,7 +4,8 @@ require 'rake/rdoctask'
|
|
4
4
|
require 'rake/gempackagetask'
|
5
5
|
require 'rake/clean'
|
6
6
|
require 'fileutils'
|
7
|
-
require 'spec/rake/spectask'
|
7
|
+
#require 'spec/rake/spectask'
|
8
|
+
require 'email_encrypt'
|
8
9
|
|
9
10
|
###############################################
|
10
11
|
# GLOBAL
|
@@ -12,6 +13,7 @@ require 'spec/rake/spectask'
|
|
12
13
|
|
13
14
|
FL = FileList
|
14
15
|
NAME = "axml"
|
16
|
+
FU = FileUtils
|
15
17
|
|
16
18
|
readme = "README"
|
17
19
|
|
@@ -33,6 +35,7 @@ if WIN32
|
|
33
35
|
end
|
34
36
|
end
|
35
37
|
|
38
|
+
|
36
39
|
###############################################
|
37
40
|
# DOC
|
38
41
|
###############################################
|
@@ -42,6 +45,32 @@ Rake::RDocTask.new do |rd|
|
|
42
45
|
rd.options.push( *rdoc_options )
|
43
46
|
end
|
44
47
|
|
48
|
+
task :html_docs do
|
49
|
+
css = 'doc/src/style.css'
|
50
|
+
FU.mkpath 'doc/output'
|
51
|
+
FU.cp css, 'doc/output/'
|
52
|
+
index = 'doc/output/index.html'
|
53
|
+
header = 'doc/src/header'
|
54
|
+
File.open(index, 'w') do |index|
|
55
|
+
index.puts '<html>'
|
56
|
+
index.puts IO.read(header)
|
57
|
+
index.puts '<html><body>'
|
58
|
+
index.puts `bluecloth --fragment #{readme}`
|
59
|
+
|
60
|
+
# add contact info:
|
61
|
+
index.puts '<h2>Contact</h2>'
|
62
|
+
index.puts 'jprince@icmb.utexas.edu'.email_encrypt
|
63
|
+
|
64
|
+
index.puts '</body></html>'
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
desc "create and upload docs to server"
|
69
|
+
task :upload_docs => :html_docs do
|
70
|
+
sh "scp -i ~/.ssh/rubyforge_key -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/axml/"
|
71
|
+
end
|
72
|
+
|
73
|
+
|
45
74
|
###############################################
|
46
75
|
# TESTS
|
47
76
|
###############################################
|
@@ -59,36 +88,36 @@ task :ensure_gem_is_uninstalled do
|
|
59
88
|
end
|
60
89
|
end
|
61
90
|
|
62
|
-
namespace :spec do
|
63
|
-
task :autotest do
|
64
|
-
require './specs/rspec_autotest'
|
65
|
-
RspecAutotest.run
|
66
|
-
end
|
67
|
-
end
|
91
|
+
#namespace :spec do
|
92
|
+
# task :autotest do
|
93
|
+
# require './specs/rspec_autotest'
|
94
|
+
# RspecAutotest.run
|
95
|
+
# end
|
96
|
+
#end
|
68
97
|
|
69
|
-
desc "Run specs"
|
70
|
-
Spec::Rake::SpecTask.new('spec') do |t|
|
71
|
-
Rake::Task[:ensure_gem_is_uninstalled].invoke
|
72
|
-
t.libs = ['lib']
|
73
|
-
t.spec_files = FileList['specs/**/*_spec.rb']
|
74
|
-
end
|
98
|
+
#desc "Run specs"
|
99
|
+
#Spec::Rake::SpecTask.new('spec') do |t|
|
100
|
+
# Rake::Task[:ensure_gem_is_uninstalled].invoke
|
101
|
+
# t.libs = ['lib']
|
102
|
+
# t.spec_files = FileList['specs/**/*_spec.rb']
|
103
|
+
#end
|
75
104
|
|
76
|
-
desc "Run specs and output specdoc"
|
77
|
-
Spec::Rake::SpecTask.new('specl') do |t|
|
78
|
-
Rake::Task[:ensure_gem_is_uninstalled].invoke
|
79
|
-
t.spec_files = FileList['specs/**/*_spec.rb']
|
80
|
-
t.libs = ['lib']
|
81
|
-
t.spec_opts = ['--format', 'specdoc' ]
|
82
|
-
end
|
105
|
+
#desc "Run specs and output specdoc"
|
106
|
+
#Spec::Rake::SpecTask.new('specl') do |t|
|
107
|
+
# Rake::Task[:ensure_gem_is_uninstalled].invoke
|
108
|
+
# t.spec_files = FileList['specs/**/*_spec.rb']
|
109
|
+
# t.libs = ['lib']
|
110
|
+
# t.spec_opts = ['--format', 'specdoc' ]
|
111
|
+
#end
|
83
112
|
|
84
|
-
desc "Run all specs with RCov"
|
85
|
-
Spec::Rake::SpecTask.new('rcov') do |t|
|
86
|
-
Rake::Task[:ensure_gem_is_uninstalled].invoke
|
87
|
-
t.spec_files = FileList['specs/**/*_spec.rb']
|
88
|
-
t.rcov = true
|
89
|
-
t.libs = ['lib']
|
90
|
-
t.rcov_opts = ['--exclude', 'specs']
|
91
|
-
end
|
113
|
+
#desc "Run all specs with RCov"
|
114
|
+
#Spec::Rake::SpecTask.new('rcov') do |t|
|
115
|
+
# Rake::Task[:ensure_gem_is_uninstalled].invoke
|
116
|
+
# t.spec_files = FileList['specs/**/*_spec.rb']
|
117
|
+
# t.rcov = true
|
118
|
+
# t.libs = ['lib']
|
119
|
+
# t.rcov_opts = ['--exclude', 'specs']
|
120
|
+
#end
|
92
121
|
|
93
122
|
#task :spec do
|
94
123
|
# uninstall_gem
|
@@ -154,6 +183,7 @@ def get_description(readme)
|
|
154
183
|
get_section('Description', readme)
|
155
184
|
end
|
156
185
|
|
186
|
+
|
157
187
|
tm = Time.now
|
158
188
|
gemspec = Gem::Specification.new do |t|
|
159
189
|
description = get_description(readme)
|
@@ -185,7 +215,6 @@ task :remove_pkg do
|
|
185
215
|
FileUtils.rm_rf "pkg"
|
186
216
|
end
|
187
217
|
|
188
|
-
|
189
218
|
task :install => [:reinstall]
|
190
219
|
|
191
220
|
desc "uninstalls the package, packages a fresh one, and installs"
|
data/lib/axml.rb
CHANGED
@@ -2,17 +2,35 @@ require 'xmlparser'
|
|
2
2
|
|
3
3
|
class AXML
|
4
4
|
|
5
|
+
NotBlankText_re = /[^\s+]+/m
|
6
|
+
|
5
7
|
def self.parse_file(file)
|
6
8
|
root = nil
|
7
|
-
File.open(file)
|
8
|
-
root = parse(fh)
|
9
|
-
end
|
9
|
+
File.open(file) {|fh| root = parse(fh) }
|
10
10
|
root
|
11
11
|
end
|
12
12
|
|
13
13
|
# Returns the root node (as Element) or nodes (as Array)
|
14
|
-
|
14
|
+
# options:
|
15
|
+
# :keep_blanks => *true | false
|
16
|
+
def self.parse(stream, opts={:keep_blanks => false})
|
15
17
|
parser = AXML::XMLParser.new
|
18
|
+
if opts[:keep_blanks] == false
|
19
|
+
parser.set_no_keep_blanks
|
20
|
+
end
|
21
|
+
if ti = opts[:text_indices]
|
22
|
+
if ti.is_a?(Array) && ti.size > 1
|
23
|
+
raise NotImplementedError, "currently only supports a single element"
|
24
|
+
else
|
25
|
+
ti =
|
26
|
+
if ti.is_a?(Array)
|
27
|
+
ti.first.to_s
|
28
|
+
else
|
29
|
+
ti.to_s
|
30
|
+
end
|
31
|
+
parser.set_single_text_indices(ti)
|
32
|
+
end
|
33
|
+
end
|
16
34
|
parser.parse(stream)
|
17
35
|
parser.root
|
18
36
|
end
|
@@ -22,13 +40,16 @@ end
|
|
22
40
|
AXML::El = Struct.new(:parent, :name, :attrs, :text, :children, :array_index)
|
23
41
|
|
24
42
|
class AXML::El
|
25
|
-
@@tabs = ["", " ", " ", " ", " ", " ", " "]
|
26
43
|
include Enumerable
|
27
44
|
|
45
|
+
# use AXML::El::Indent.replace to swap without warning
|
46
|
+
# ["", " ", " ", " ", " ", " ", ... ]
|
47
|
+
Indent = ' '
|
48
|
+
# use AXML::El::Indentation.replace to replace w/o warning
|
49
|
+
Indentation = (0...30).to_a.map {|num| Indent*num }
|
50
|
+
|
51
|
+
# current depth
|
28
52
|
@@depth = 0
|
29
|
-
#attr_accessor :name, :attrs, :text, :children, :parent
|
30
|
-
# keeps track of location in array
|
31
|
-
#attr_accessor :array_index
|
32
53
|
|
33
54
|
alias_method :content, :text
|
34
55
|
alias_method :content=, :text=
|
@@ -39,88 +60,32 @@ class AXML::El
|
|
39
60
|
attrs[attribute_string]
|
40
61
|
end
|
41
62
|
|
42
|
-
|
43
|
-
|
44
|
-
def find(string)
|
45
|
-
(tp, name) = string.split('::')
|
46
|
-
case tp
|
47
|
-
when 'child'
|
48
|
-
find_children(name)
|
49
|
-
when 'descendant'
|
50
|
-
find_descendants(name)
|
51
|
-
when 'following-sibling'
|
52
|
-
find_following_siblings(name)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def find_descendants(name, collect_descendants=[])
|
57
|
-
children.each do |child|
|
58
|
-
collect_descendants.push(child) if child.name == name
|
59
|
-
child.find_descendants(name, collect_descendants)
|
60
|
-
end
|
61
|
-
collect_descendants
|
63
|
+
def []=(attribute_string, value)
|
64
|
+
attrs[attribute_string] = value
|
62
65
|
end
|
63
66
|
|
64
|
-
|
65
|
-
|
67
|
+
# has text?
|
68
|
+
def text?
|
69
|
+
!!text
|
66
70
|
end
|
67
71
|
|
68
|
-
|
69
|
-
|
70
|
-
# "descendant::<name>" and "child::<name>" where <name> is the name of the
|
71
|
-
# node you seek)
|
72
|
-
def find_first(string)
|
73
|
-
(tp, name) = string.split('::')
|
74
|
-
case tp
|
75
|
-
when 'child'
|
76
|
-
find_first_child(name)
|
77
|
-
when 'descendant'
|
78
|
-
find_first_descendant(name)
|
79
|
-
when 'following-sibling'
|
80
|
-
find_first_following_sibling(name)
|
81
|
-
end
|
72
|
+
def children?
|
73
|
+
children.size > 0
|
82
74
|
end
|
75
|
+
alias_method :child?, :children?
|
83
76
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
else
|
89
|
-
return child_node.find_first_descendant(name)
|
90
|
-
end
|
77
|
+
# full traversal from the initial node
|
78
|
+
def traverse(type=:pre, &block)
|
79
|
+
if type == :pre
|
80
|
+
block.call(self)
|
91
81
|
end
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
def find_first_child(name)
|
96
|
-
self.each do |child_node|
|
97
|
-
if child_node.name == name
|
98
|
-
return child_node
|
99
|
-
end
|
82
|
+
children.each do |child|
|
83
|
+
child.traverse(type, &block)
|
100
84
|
end
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
def find_following_siblings(name)
|
105
|
-
parent.children[(array_index+1)..-1].select {|v| v.name == name }
|
106
|
-
end
|
107
|
-
|
108
|
-
def find_first_following_sibling(name)
|
109
|
-
node = nil
|
110
|
-
parent.children[(array_index+1)..-1].each do |sibling|
|
111
|
-
if sibling.name == name
|
112
|
-
node = sibling
|
113
|
-
break
|
114
|
-
end
|
85
|
+
if type == :post
|
86
|
+
block.call(self)
|
115
87
|
end
|
116
|
-
node
|
117
|
-
end
|
118
|
-
|
119
|
-
def children?
|
120
|
-
children.size > 0
|
121
88
|
end
|
122
|
-
alias_method :child?, :children?
|
123
|
-
|
124
89
|
|
125
90
|
def each(&block)
|
126
91
|
children.each do |child|
|
@@ -149,21 +114,44 @@ class AXML::El
|
|
149
114
|
end
|
150
115
|
|
151
116
|
def tabs
|
152
|
-
|
117
|
+
Indentation[@@depth]
|
118
|
+
end
|
119
|
+
|
120
|
+
EscapeCharsRe = /['"&><]/
|
121
|
+
|
122
|
+
# returns data escaped if necessary
|
123
|
+
def escape(data)
|
124
|
+
# modified slightly from xmlsimple.rb
|
125
|
+
return data if !data.is_a?(String) || data.nil? || data == ''
|
126
|
+
result = data.dup
|
127
|
+
if EscapeCharsRe.match(data)
|
128
|
+
result.gsub!('&', '&')
|
129
|
+
result.gsub!('<', '<')
|
130
|
+
result.gsub!('>', '>')
|
131
|
+
result.gsub!('"', '"')
|
132
|
+
result.gsub!("'", ''')
|
133
|
+
end
|
134
|
+
result
|
153
135
|
end
|
154
136
|
|
155
137
|
def to_s
|
156
138
|
attstring = ""
|
157
139
|
if attrs.size > 0
|
158
|
-
attstring = " " + attrs.collect { |k,v| "#{k}=\"#{v}\"" }.join(" ")
|
140
|
+
attstring = " " + attrs.collect { |k,v| "#{k}=\"#{escape(v)}\"" }.join(" ")
|
159
141
|
end
|
160
142
|
string = "#{tabs}<#{name}#{attstring}"
|
161
143
|
if children.size > 0
|
162
|
-
string << "
|
144
|
+
string << ">"
|
145
|
+
if text?
|
146
|
+
string << escape(text)
|
147
|
+
end
|
148
|
+
string << "\n"
|
163
149
|
@@depth += 1
|
164
150
|
string << children.collect {|child| child.to_s }.join("")
|
165
151
|
@@depth -= 1
|
166
152
|
string << "#{tabs}</#{name}>\n"
|
153
|
+
elsif text?
|
154
|
+
string << ">" << escape(text) << "</#{name}>\n"
|
167
155
|
else
|
168
156
|
string << "/>\n"
|
169
157
|
end
|
@@ -189,6 +177,85 @@ class AXML::El
|
|
189
177
|
children.push( node )
|
190
178
|
end
|
191
179
|
|
180
|
+
########################################################################
|
181
|
+
# FIND and FIND_FIRST (with a little useful xpath)
|
182
|
+
########################################################################
|
183
|
+
|
184
|
+
# Returns an array of nodes. Accepts same xpath strings as find_first.
|
185
|
+
def find(string)
|
186
|
+
(tp, name) = string.split('::')
|
187
|
+
case tp
|
188
|
+
when 'child'
|
189
|
+
find_children(name)
|
190
|
+
when 'descendant'
|
191
|
+
find_descendants(name)
|
192
|
+
when 'following-sibling'
|
193
|
+
find_following_siblings(name)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# currently must be called with descendant:: or child:: string prefix! e.g.
|
198
|
+
# "descendant::<name>" and "child::<name>" where <name> is the name of the
|
199
|
+
# node you seek)
|
200
|
+
def find_first(string)
|
201
|
+
(tp, name) = string.split('::')
|
202
|
+
case tp
|
203
|
+
when 'child'
|
204
|
+
find_first_child(name)
|
205
|
+
when 'descendant'
|
206
|
+
find_first_descendant(name)
|
207
|
+
when 'following-sibling'
|
208
|
+
find_first_following_sibling(name)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def find_descendants(name, collect_descendants=[])
|
213
|
+
children.each do |child|
|
214
|
+
collect_descendants.push(child) if child.name == name
|
215
|
+
child.find_descendants(name, collect_descendants)
|
216
|
+
end
|
217
|
+
collect_descendants
|
218
|
+
end
|
219
|
+
|
220
|
+
def find_first_descendant(name)
|
221
|
+
self.each do |child_node|
|
222
|
+
if child_node.name == name
|
223
|
+
return child_node
|
224
|
+
else
|
225
|
+
return child_node.find_first_descendant(name)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
return nil
|
229
|
+
end
|
230
|
+
|
231
|
+
def find_children(name)
|
232
|
+
children.select {|v| v.name == name }
|
233
|
+
end
|
234
|
+
|
235
|
+
def find_first_child(name)
|
236
|
+
self.each do |child_node|
|
237
|
+
if child_node.name == name
|
238
|
+
return child_node
|
239
|
+
end
|
240
|
+
end
|
241
|
+
return nil
|
242
|
+
end
|
243
|
+
|
244
|
+
def find_following_siblings(name)
|
245
|
+
parent.children[(array_index+1)..-1].select {|v| v.name == name }
|
246
|
+
end
|
247
|
+
|
248
|
+
def find_first_following_sibling(name)
|
249
|
+
node = nil
|
250
|
+
parent.children[(array_index+1)..-1].each do |sibling|
|
251
|
+
if sibling.name == name
|
252
|
+
node = sibling
|
253
|
+
break
|
254
|
+
end
|
255
|
+
end
|
256
|
+
node
|
257
|
+
end
|
258
|
+
|
192
259
|
end
|
193
260
|
|
194
261
|
class AXML::XMLParser < XMLParser
|
@@ -200,28 +267,110 @@ class AXML::XMLParser < XMLParser
|
|
200
267
|
@root.child
|
201
268
|
end
|
202
269
|
|
270
|
+
def set_no_keep_blanks
|
271
|
+
instance_eval do
|
272
|
+
def endElement(name)
|
273
|
+
unless AXML::NotBlankText_re.match(@cur.text)
|
274
|
+
@cur.text = nil
|
275
|
+
end
|
276
|
+
@cur = @cur.parent
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
# returns text as an array for each occurence of the specified element: [start_index, num_bytes]
|
282
|
+
def set_single_text_indices(el_name)
|
283
|
+
@el_name = el_name
|
284
|
+
instance_eval do
|
285
|
+
def startElement(name, attributes)
|
286
|
+
text =
|
287
|
+
if name == @el_name ; []
|
288
|
+
else ; ''
|
289
|
+
end
|
290
|
+
new_el = ::AXML::El.new(@cur, name, attributes, text, [])
|
291
|
+
@cur.add_node(new_el)
|
292
|
+
@cur = new_el
|
293
|
+
end
|
294
|
+
|
295
|
+
def character(data)
|
296
|
+
if @cur.text.is_a? Array
|
297
|
+
@cur.text << byteIndex
|
298
|
+
else
|
299
|
+
@cur.text << data
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def endElement(name)
|
304
|
+
if @cur.text.is_a? Array
|
305
|
+
@cur.text << (byteIndex - @cur.text.first)
|
306
|
+
end
|
307
|
+
@cur = @cur.parent
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
# takes opts from AXML::parse method
|
203
313
|
def initialize
|
314
|
+
#@keep_blanks = opts[:keep_blanks]
|
204
315
|
@root = AXML::El.new(nil, "root", {}, '', [])
|
205
316
|
@cur = @root
|
206
317
|
end
|
207
318
|
|
208
319
|
def startElement(name, attributes)
|
209
320
|
new_el = AXML::El.new(@cur, name, attributes, '', [])
|
210
|
-
# add the new node to the previous parent node
|
211
321
|
@cur.add_node(new_el)
|
212
|
-
# notice the change in @cur node
|
213
322
|
@cur = new_el
|
214
|
-
|
323
|
+
end
|
324
|
+
|
325
|
+
def character(data)
|
326
|
+
@cur.text << data
|
215
327
|
end
|
216
328
|
|
217
329
|
def endElement(name)
|
218
330
|
@cur = @cur.parent
|
219
331
|
end
|
220
332
|
|
333
|
+
|
334
|
+
end
|
335
|
+
|
336
|
+
|
337
|
+
|
338
|
+
=begin
|
339
|
+
|
340
|
+
# This parser stores information about where the peaks information is in the
|
341
|
+
# file
|
342
|
+
# The content of the peaks node is an array where the first member is the
|
343
|
+
# start index and the last member is the number of bytes. All other members
|
344
|
+
# should be ignored.
|
345
|
+
class AXML::XMLParser::LazyPeaks < ::AXML::XMLParser
|
346
|
+
|
347
|
+
def startElement(name, attributes)
|
348
|
+
text =
|
349
|
+
if name == 'peaks' ; []
|
350
|
+
else ; ''
|
351
|
+
end
|
352
|
+
new_el = ::AXML::El.new(@cur, name, attributes, text, [])
|
353
|
+
# add the new node to the previous parent node
|
354
|
+
@cur.add_node(new_el)
|
355
|
+
# notice the change in @cur node
|
356
|
+
@cur = new_el
|
357
|
+
end
|
358
|
+
|
221
359
|
def character(data)
|
222
|
-
@cur.text
|
360
|
+
if @cur.text.is_a? Array
|
361
|
+
@cur.text << byteIndex
|
362
|
+
else
|
363
|
+
@cur.text << data
|
364
|
+
end
|
223
365
|
end
|
224
366
|
|
225
|
-
|
367
|
+
def endElement(name)
|
368
|
+
if @cur.text.is_a? Array
|
369
|
+
@cur.text << (byteIndex - @cur.text.first)
|
370
|
+
end
|
371
|
+
@cur = @cur.parent
|
372
|
+
end
|
226
373
|
|
374
|
+
end
|
227
375
|
|
376
|
+
=end
|
metadata
CHANGED
@@ -1,55 +1,60 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.2
|
3
|
-
specification_version: 1
|
4
2
|
name: axml
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2007-10-19 00:00:00 -05:00
|
8
|
-
summary: AXML - Provides a simple DOM for working with XML (using XMLParser under the hood) that can serve as a drop in replacement for much of the basic libxml functionality (e.g., each, children, child, find_first, find, next). AXML comes from the idea that XML should be AXED (or at least simple to use!) to use).
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: jprince@icmb.utexas.edu
|
12
|
-
homepage:
|
13
|
-
rubyforge_project:
|
14
|
-
description: Parses elements, attributes, and text(content), and nothing more. Should be very easy to extend and modify for special cases. It is roughly as fast as libxml, although it currently reads in the entire document first (however, this is memory efficient - nodes are implemented using Struct).
|
15
|
-
autorequire:
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: true
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: 0.0.3
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- John Prince
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-01-07 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: jprince@icmb.utexas.edu
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README
|
24
|
+
- LICENSE
|
31
25
|
files:
|
32
26
|
- lib/axml.rb
|
33
27
|
- README
|
34
28
|
- LICENSE
|
35
29
|
- Rakefile
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
- specs/axml_spec.rb
|
30
|
+
has_rdoc: true
|
31
|
+
homepage:
|
32
|
+
post_install_message:
|
40
33
|
rdoc_options:
|
41
34
|
- --main
|
42
35
|
- README
|
43
36
|
- --title
|
44
37
|
- axml
|
45
|
-
|
46
|
-
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: "0"
|
45
|
+
version:
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
52
|
requirements:
|
53
53
|
- xmlparser is needed right now
|
54
|
-
|
54
|
+
rubyforge_project:
|
55
|
+
rubygems_version: 1.3.1
|
56
|
+
signing_key:
|
57
|
+
specification_version: 2
|
58
|
+
summary: AXML - Provides a simple DOM for working with XML (using XMLParser under the hood) that can serve as a drop in replacement for a subset of basic libxml functionality (e.g., each, children, child, find_first, find, next).
|
59
|
+
test_files: []
|
55
60
|
|
data/specs/axml_spec.rb
DELETED
@@ -1,222 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
|
2
|
-
|
3
|
-
require 'axml'
|
4
|
-
|
5
|
-
describe AXML do
|
6
|
-
|
7
|
-
before(:each) do
|
8
|
-
@xml_string = <<END
|
9
|
-
<xml>
|
10
|
-
<doc1>
|
11
|
-
<dog name="spot" height="23" weight="13">
|
12
|
-
<flea name="ouchy" height="20" weight="10"/>
|
13
|
-
<flea name="crawly" height="22" weight="9"/>
|
14
|
-
</dog>
|
15
|
-
</doc1>
|
16
|
-
<doc2>
|
17
|
-
<dog name="billy" height="5" weight="3">
|
18
|
-
</dog>
|
19
|
-
</doc2>
|
20
|
-
</xml>
|
21
|
-
END
|
22
|
-
@small_xml_string = <<END
|
23
|
-
<first>
|
24
|
-
<inner1 name="hello">
|
25
|
-
</inner1>
|
26
|
-
<inner2>
|
27
|
-
</inner2>
|
28
|
-
</first>
|
29
|
-
END
|
30
|
-
@xml_large_string = <<END
|
31
|
-
<n1>
|
32
|
-
<n2 id='1'>
|
33
|
-
<n3 id='1'> </n3>
|
34
|
-
<n3 id='2'> </n3>
|
35
|
-
<n3b id='5'></n3b>
|
36
|
-
<n3b id='6'></n3b>
|
37
|
-
</n2>
|
38
|
-
<n2 id='2'>
|
39
|
-
<n3 id='3'> </n3>
|
40
|
-
<n3 id='4'> </n3>
|
41
|
-
</n2>
|
42
|
-
</n1>
|
43
|
-
END
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'reads xml strings with "AXML.parse"' do
|
47
|
-
node = AXML.parse(@small_xml_string)
|
48
|
-
|
49
|
-
node.class.should == AXML::El
|
50
|
-
node.name.should == 'first'
|
51
|
-
node.children.first.name.should == 'inner1'
|
52
|
-
node.kids.first.name.should == 'inner1'
|
53
|
-
end
|
54
|
-
|
55
|
-
it 'reads file handles with "AXML.parse"' do
|
56
|
-
file = File.dirname(__FILE__) + '/axml.tmp'
|
57
|
-
File.open(file, 'w') {|fh| fh.puts @xml_string}
|
58
|
-
fh = File.open(file)
|
59
|
-
root = AXML.parse(fh)
|
60
|
-
fh.close
|
61
|
-
root.name.should == 'xml'
|
62
|
-
root.kids[0].kids.first.attrs["name"].should == 'spot'
|
63
|
-
File.unlink(file) if File.exist? file
|
64
|
-
end
|
65
|
-
|
66
|
-
it 'reads files with "AXML.parse_file"' do
|
67
|
-
file = File.dirname(__FILE__) + '/axml.tmp'
|
68
|
-
File.open(file, 'w') {|fh| fh.puts @xml_string}
|
69
|
-
root = AXML.parse_file(file)
|
70
|
-
root.name.should == 'xml'
|
71
|
-
root.kids[0].kids.first.attrs["name"].should == 'spot'
|
72
|
-
File.unlink(file) if File.exist? file
|
73
|
-
end
|
74
|
-
|
75
|
-
it 'can access subnodes with "children" or "kids"' do
|
76
|
-
node = AXML.parse(@small_xml_string)
|
77
|
-
node.name.should == 'first'
|
78
|
-
node.kids.zip(%w(inner1 inner2)) do |kd,nm|
|
79
|
-
kd.name.should == nm
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
it 'accesses attributes with "[]"' do
|
84
|
-
root = AXML.parse(@xml_string)
|
85
|
-
dog_node = root.kids[0].kids[0]
|
86
|
-
dog_node.name.should == 'dog'
|
87
|
-
dog_node['name'].should == 'spot'
|
88
|
-
dog_node['height'].should == '23'
|
89
|
-
end
|
90
|
-
|
91
|
-
it 'can "find_first_descendant"' do
|
92
|
-
root = AXML.parse(@xml_string)
|
93
|
-
doc1_node = root.find_first_descendant('doc1')
|
94
|
-
doc1_node.name.should == 'doc1'
|
95
|
-
doc1_node.children.size.should == 1
|
96
|
-
doc1_node.find_first_descendant('xml').should == nil
|
97
|
-
doc1_node.find_first_descendant('doc1').should == nil
|
98
|
-
child_node = root.find_first_descendant('flea')
|
99
|
-
child_node.name.should == 'flea'
|
100
|
-
child_node['name'].should == 'ouchy'
|
101
|
-
child_node.find_first_descendant('flea').should == nil
|
102
|
-
end
|
103
|
-
|
104
|
-
it 'can "find_first_child"' do
|
105
|
-
root = AXML.parse(@xml_string)
|
106
|
-
doc1_node = root.find_first_child('doc1')
|
107
|
-
doc1_node.name.should == 'doc1'
|
108
|
-
doc1_node.children.size.should == 1
|
109
|
-
doc1_node.find_first_child('xml').should == nil
|
110
|
-
doc1_node.find_first_child('doc1').should == nil
|
111
|
-
dog_node = doc1_node.find_first_child('dog')
|
112
|
-
dog_node.name.should == 'dog'
|
113
|
-
dog_node['name'].should == 'spot'
|
114
|
-
doc2_node = root.find_first_child('doc2')
|
115
|
-
doc2_node.name.should == 'doc2'
|
116
|
-
doc2_node.children.size.should == 1
|
117
|
-
end
|
118
|
-
|
119
|
-
it 'can "find" with a little xpath' do
|
120
|
-
root = AXML.parse(@xml_large_string)
|
121
|
-
nodes = root.find('child::n2')
|
122
|
-
nodes.size.should == 2
|
123
|
-
nodes.each {|v| v.name.should == 'n2' }
|
124
|
-
nodes.each_with_index {|v,i| v['id'].should == (i+1).to_s }
|
125
|
-
|
126
|
-
nodes = root.find('descendant::n3')
|
127
|
-
nodes.size.should == 4
|
128
|
-
nodes.each {|v| v.name.should == 'n3' }
|
129
|
-
nodes.each_with_index {|v,i| v['id'].should == (i+1).to_s }
|
130
|
-
end
|
131
|
-
|
132
|
-
it 'can do some find_first xpath' do
|
133
|
-
root = AXML.parse(@xml_string)
|
134
|
-
flea_node = root.find_first('descendant::flea')
|
135
|
-
flea_node.name.should == 'flea'
|
136
|
-
flea_node['name'].should == 'ouchy'
|
137
|
-
root.find_first('child::flea').should be_nil
|
138
|
-
root.find_first('child::doc2').name.should == 'doc2'
|
139
|
-
end
|
140
|
-
|
141
|
-
it 'can drop nodes with "drop"' do
|
142
|
-
root = AXML.parse(@xml_string)
|
143
|
-
root.children[0].children[0].drop
|
144
|
-
to_string = <<END
|
145
|
-
<xml>
|
146
|
-
<doc1/>
|
147
|
-
<doc2>
|
148
|
-
<dog name="billy" weight="3" height="5"/>
|
149
|
-
</doc2>
|
150
|
-
</xml>
|
151
|
-
END
|
152
|
-
root.to_s.should == to_string
|
153
|
-
end
|
154
|
-
|
155
|
-
it 'can return "child" node' do
|
156
|
-
root = AXML.parse(@xml_string)
|
157
|
-
doc1 = root.child
|
158
|
-
doc1.name.should == 'doc1'
|
159
|
-
dog = doc1.child
|
160
|
-
dog.name.should == 'dog'
|
161
|
-
dog['name'].should == 'spot'
|
162
|
-
flea = dog.child
|
163
|
-
flea.name.should == 'flea'
|
164
|
-
flea['name'].should == 'ouchy'
|
165
|
-
flea.child.should == nil
|
166
|
-
end
|
167
|
-
|
168
|
-
it 'can return "next" node' do
|
169
|
-
root = AXML.parse(@xml_string)
|
170
|
-
root.next.should == nil
|
171
|
-
doc1 = root.child
|
172
|
-
doc2 = doc1.next
|
173
|
-
doc2.name.should == 'doc2'
|
174
|
-
doc2.next.should == nil
|
175
|
-
dog = doc1.child
|
176
|
-
dog.name.should == 'dog'
|
177
|
-
dog.next.should == nil
|
178
|
-
flea2 = dog.child.next
|
179
|
-
flea2.name.should == 'flea'
|
180
|
-
flea2['name'].should == 'crawly'
|
181
|
-
flea2.next.should == nil
|
182
|
-
end
|
183
|
-
|
184
|
-
it 'can find "following-sibling"' do
|
185
|
-
n1 = AXML.parse(@xml_large_string)
|
186
|
-
n3 = n1.child.child
|
187
|
-
n3.name.should == 'n3'
|
188
|
-
n3id2 = n3.find_first("following-sibling::n3")
|
189
|
-
n3id2['id'].should == '2'
|
190
|
-
n3b = n3.find_first("following-sibling::n3b")
|
191
|
-
n3b['id'].should == '5'
|
192
|
-
n3b.find_first("following-sibling::n3").should == nil
|
193
|
-
end
|
194
|
-
|
195
|
-
it '"to_s" gives xml' do
|
196
|
-
root = AXML.parse(@xml_string)
|
197
|
-
to_string = <<END
|
198
|
-
<xml>
|
199
|
-
<doc1>
|
200
|
-
<dog name="spot" weight="13" height="23">
|
201
|
-
<flea name="ouchy" weight="10" height="20"/>
|
202
|
-
<flea name="crawly" weight="9" height="22"/>
|
203
|
-
</dog>
|
204
|
-
</doc1>
|
205
|
-
<doc2>
|
206
|
-
<dog name="billy" weight="3" height="5"/>
|
207
|
-
</doc2>
|
208
|
-
</xml>
|
209
|
-
END
|
210
|
-
root.to_s.should == to_string
|
211
|
-
end
|
212
|
-
|
213
|
-
it 'can get many attributes at once with values_at (not supported by libxml)' do
|
214
|
-
root = AXML.parse(@xml_string)
|
215
|
-
dog = root.child.child.child
|
216
|
-
exp = %w(ouchy 20 10)
|
217
|
-
reply = dog.attrs.values_at('name', 'height', 'weight')
|
218
|
-
reply.should == exp
|
219
|
-
end
|
220
|
-
|
221
|
-
|
222
|
-
end
|
data/specs/spec_helper.rb
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
|
2
|
-
gem 'rspec'
|
3
|
-
|
4
|
-
|
5
|
-
def xdescribe(*args)
|
6
|
-
puts "#{args.join(' ')}"
|
7
|
-
puts "**SKIPPING**"
|
8
|
-
end
|
9
|
-
|
10
|
-
def Xdescribe(*args)
|
11
|
-
xdescribe(*args)
|
12
|
-
end
|
13
|
-
|
14
|
-
def xit(*args)
|
15
|
-
puts "- SKIPPING: #{args.join(' ')}"
|
16
|
-
end
|
17
|
-
|
18
|
-
def silent(&block)
|
19
|
-
tmp = $VERBOSE ; $VERBOSE = nil
|
20
|
-
block.call
|
21
|
-
$VERBOSE = tmp
|
22
|
-
end
|
23
|
-
|
24
|
-
silent {
|
25
|
-
ROOT_DIR = File.dirname(__FILE__) + '/..'
|
26
|
-
SPEC_DIR = File.dirname(__FILE__)
|
27
|
-
}
|
28
|
-
|
29
|
-
|
30
|
-
class String
|
31
|
-
#alias_method :exist?, exist_as_a_file?
|
32
|
-
#alias_method exist_as_a_file?, exist?
|
33
|
-
def exist?
|
34
|
-
File.exist? self
|
35
|
-
end
|
36
|
-
def exist_as_a_file?
|
37
|
-
File.exist? self
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
describe "a cmdline program", :shared => true do
|
42
|
-
before(:all) do
|
43
|
-
testdir = File.dirname(__FILE__)
|
44
|
-
libdir = testdir + '/../lib'
|
45
|
-
bindir = testdir + '/../bin'
|
46
|
-
progname = "fasta_shaker.rb"
|
47
|
-
@cmd = "ruby -I #{libdir} #{bindir}/#{@progname} "
|
48
|
-
end
|
49
|
-
|
50
|
-
it 'gives usage when called with no args' do
|
51
|
-
reply = `#{@cmd}`
|
52
|
-
reply.should =~ /usage/i
|
53
|
-
end
|
54
|
-
|
55
|
-
end
|
56
|
-
|
57
|
-
|