axml 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README +56 -35
- data/Rakefile +11 -25
- data/lib/axml.rb +28 -5
- data/lib/axml/autoload.rb +66 -68
- data/lib/axml/el.rb +15 -0
- metadata +3 -2
data/README
CHANGED
@@ -1,53 +1,50 @@
|
|
1
|
-
AXML
|
2
|
-
====
|
1
|
+
= {AXML}[http://axml.rubyforge.org]
|
3
2
|
|
4
3
|
AXML - Provides a simple, minimalistic DOM for working with data stored in an
|
5
4
|
XML document. The API is very similar to LibXML, differing slightly in the
|
6
|
-
handling of text nodes. It is designed with very large documents in mind:
|
5
|
+
handling of text nodes. It is designed with very large documents in mind:
|
6
|
+
nodes are represented in memory efficient Struct objects and it works with
|
7
|
+
either XMLParser or LibXML!
|
7
8
|
|
8
9
|
'AXML' literally translates into 'ax XML' which succinctly describes the
|
9
10
|
occasional feeling of a programmer towards XML or its myriad parsers. AXML
|
10
11
|
won't solve all your XML woes, but it does make working with XML much less
|
11
12
|
painful.
|
12
13
|
|
13
|
-
|
14
|
-
--------
|
14
|
+
== Overview
|
15
15
|
|
16
16
|
* *fast*: runs on either XMLParser or LibXML
|
17
|
-
* *lean*: as in 'lines of code' and as in 'memory consumption'
|
18
|
-
* *easy to extend*: code your Grandmother could read and understand (if she reads ruby)
|
19
|
-
* PLOS: implements a useful subset of libxml methods for near drop in replacement.
|
17
|
+
* *lean*: as in 'lines of code' and as in 'memory consumption'
|
20
18
|
|
21
|
-
Examples
|
22
|
-
--------
|
19
|
+
== Examples
|
23
20
|
|
24
21
|
require 'axml'
|
25
22
|
|
26
23
|
# a little example xml string to use
|
27
|
-
|
24
|
+
string = "
|
28
25
|
<n1>
|
29
26
|
<n2 size='big'>
|
30
27
|
<n3>words here</n3>
|
31
28
|
<n3></n3>
|
32
29
|
</n2>
|
33
|
-
<n2 size='small'>
|
34
|
-
<n3 id='3'></n3>
|
30
|
+
<n2 size='small' >
|
31
|
+
<n3 id='3' thinks='out loud'></n3>
|
35
32
|
</n2>
|
36
33
|
</n1>
|
37
34
|
"
|
38
35
|
|
39
|
-
|
36
|
+
=== Read a string, io, or file
|
40
37
|
|
41
|
-
n1_node = AXML.parse(
|
42
|
-
#
|
43
|
-
n1_node = AXML.parse('path/to/file')
|
38
|
+
n1_node = AXML.parse(string) # <- can read xml as string
|
39
|
+
n1_node = AXML.parse(io) # <- can read an io object
|
40
|
+
n1_node = AXML.parse('path/to/file') # <- can read a file
|
44
41
|
|
45
|
-
|
42
|
+
=== Access children
|
46
43
|
|
47
44
|
n1_node.children # -> [array]
|
48
45
|
n1_node.each {|child| # do something with each child }
|
49
46
|
|
50
|
-
|
47
|
+
=== Traverse the whole tree structure
|
51
48
|
|
52
49
|
n1_node.traverse do |node|
|
53
50
|
# pre traversal
|
@@ -55,20 +52,20 @@ Examples
|
|
55
52
|
|
56
53
|
n1_node.traverse(:post) {|node| # post traversal }
|
57
54
|
|
58
|
-
|
55
|
+
=== Get attributes and text
|
59
56
|
|
60
57
|
n2_node['size'] == 'big'
|
61
58
|
n3_node = n2_node.child
|
62
59
|
n3_node.text # -> 'words here'
|
63
|
-
n3_node.content # ->
|
60
|
+
n3_node.content # -> 'words here'
|
64
61
|
|
65
|
-
|
62
|
+
=== Navigate nodes
|
66
63
|
|
67
64
|
n2_node = n1_node.child
|
68
65
|
the_other_n2_node = n2_node.next
|
69
66
|
the_other_n2_node.next = nil
|
70
67
|
|
71
|
-
|
68
|
+
=== Does a little xpath
|
72
69
|
|
73
70
|
# find_first (returns the first node)
|
74
71
|
n3_node = n1_node.find_first('descendant::n3')
|
@@ -81,25 +78,49 @@ Examples
|
|
81
78
|
n1_node.find('descendant::n3') # -> [array of all 3 <n3> nodes]
|
82
79
|
# also callable as find_child and find_descendant
|
83
80
|
|
81
|
+
=== Manipulate tree structure
|
84
82
|
|
85
|
-
|
83
|
+
node.drop # drop the node from its parents
|
84
|
+
## (insert?)
|
86
85
|
|
87
|
-
|
88
|
-
--------------------
|
86
|
+
=== Output
|
89
87
|
|
90
|
-
|
91
|
-
very easy to extend and modify for special cases. It is roughly as fast as
|
92
|
-
libxml, although it currently reads in the entire document first (however,
|
93
|
-
this is memory efficient - nodes are implemented using Struct).
|
88
|
+
XML Output is currently tested only with XMLParser.
|
94
89
|
|
95
|
-
|
96
|
-
|
90
|
+
node.to_s # -> formatted xml
|
91
|
+
node.to_doc # -> with xml header line
|
92
|
+
node.to_doc(filename) # -> written to filename
|
93
|
+
|
94
|
+
See `spec/` dir for more examples and functionality
|
95
|
+
|
96
|
+
== Details
|
97
|
+
|
98
|
+
If using XMLParser, builds nodes out of Struct objects (AXML::El). Currently
|
99
|
+
only parses elements, attributes, and text(content) (no CDATA right now).
|
100
|
+
|
101
|
+
If using LibXML, it uses the underlying LibXML nodes already available. It
|
102
|
+
overrides some methods to treat the text in a text node as the parent node's
|
103
|
+
text attribute.
|
104
|
+
|
105
|
+
== Warnings
|
106
|
+
|
107
|
+
Output of xml (i.e., node#to_s) under LibXML is untested (and probably buggy)
|
108
|
+
since the node text behavor has been modified. Will work it out in future
|
109
|
+
release.
|
110
|
+
|
111
|
+
Doesn't parse CDATA using XMLParser right now.
|
112
|
+
|
113
|
+
== Installation
|
97
114
|
|
98
115
|
gem install axml
|
99
116
|
|
100
|
-
|
101
|
-
|
117
|
+
Can get instructions on installing XMLParser and LibXML by issuing this command:
|
118
|
+
|
119
|
+
ruby -rubygems -e 'require "axml"; puts AXML::Autoload.install_instructions(:all)'
|
120
|
+
|
121
|
+
== See Also
|
102
122
|
|
103
|
-
If you are parsing HTML or complex word processing documents this is not the
|
123
|
+
If you are parsing HTML or complex word processing documents this is not the
|
124
|
+
parser for you. Try something like hpricot or LibXML.
|
104
125
|
|
105
126
|
|
data/Rakefile
CHANGED
@@ -17,10 +17,11 @@ FU = FileUtils
|
|
17
17
|
|
18
18
|
readme = "README"
|
19
19
|
|
20
|
+
rdoc_dir = 'rdoc'
|
20
21
|
rdoc_extra_includes = [readme, "LICENSE"]
|
21
|
-
rdoc_options = ['--main', readme, '--title', NAME]
|
22
|
+
rdoc_options = ['--main', readme, '--title', NAME, '--line-numbers', '--inline-source']
|
22
23
|
|
23
|
-
lib_files = FL["lib
|
24
|
+
lib_files = FL["lib/**/*.rb"]
|
24
25
|
dist_files = lib_files + FL[readme, "LICENSE", "Rakefile", "{specs}/**/*"]
|
25
26
|
changelog = 'CHANGELOG'
|
26
27
|
|
@@ -40,37 +41,22 @@ end
|
|
40
41
|
# DOC
|
41
42
|
###############################################
|
42
43
|
Rake::RDocTask.new do |rd|
|
44
|
+
rd.rdoc_dir = rdoc_dir
|
43
45
|
rd.main = readme
|
44
|
-
rd.rdoc_files.include rdoc_extra_includes
|
46
|
+
rd.rdoc_files.include( rdoc_extra_includes )
|
47
|
+
rd.rdoc_files.include( lib_files.uniq )
|
45
48
|
rd.options.push( *rdoc_options )
|
46
|
-
|
47
|
-
|
48
|
-
task :html_docs do
|
49
|
-
css = 'doc/src/style.css'
|
50
|
-
FU.mkpath 'doc/output'
|
51
|
-
FU.cp css, 'doc/output/'
|
52
|
-
index = 'doc/output/index.html'
|
53
|
-
header = 'doc/src/header'
|
54
|
-
File.open(index, 'w') do |index|
|
55
|
-
index.puts '<html>'
|
56
|
-
index.puts IO.read(header)
|
57
|
-
index.puts '<html><body>'
|
58
|
-
index.puts `bluecloth --fragment #{readme}`
|
59
|
-
|
60
|
-
# add contact info:
|
61
|
-
index.puts '<h2>Contact</h2>'
|
62
|
-
index.puts 'jtprince@gmail.com'.email_encrypt
|
63
|
-
|
64
|
-
index.puts '</body></html>'
|
65
|
-
end
|
49
|
+
cmd = "rdoc -o #{rdoc_dir} #{(lib_files + rdoc_extra_includes).join(" ")} #{rdoc_options.join(" ")}"
|
50
|
+
puts cmd
|
66
51
|
end
|
67
52
|
|
68
53
|
desc "create and upload docs to server"
|
69
|
-
task :upload_docs => :
|
70
|
-
sh "scp -
|
54
|
+
task :upload_docs => [:rdoc] do
|
55
|
+
sh "scp -r #{rdoc_dir}/* jtprince@rubyforge.org:/var/www/gforge-projects/axml/"
|
71
56
|
end
|
72
57
|
|
73
58
|
|
59
|
+
|
74
60
|
###############################################
|
75
61
|
# TESTS
|
76
62
|
###############################################
|
data/lib/axml.rb
CHANGED
@@ -10,30 +10,53 @@ module AXML
|
|
10
10
|
WARN = {:rexml => "Using REXML as parser! This is very slow on large docs!\nCall the method AXML::Autoload.install_instructions for help installing\nsomething FASTER!",
|
11
11
|
}
|
12
12
|
|
13
|
+
# returns the root node
|
13
14
|
# opts:
|
14
|
-
# :parser
|
15
|
+
# :parser => :xmlparser || :libxml
|
16
|
+
# :keep_blanks => false
|
17
|
+
#
|
18
|
+
# arg may be a string of xml, an io object, or a filename
|
19
|
+
# if the first non-blank character of the string is '<' then it is
|
20
|
+
# considered xml. If you want to be sure, you can use parse_file or
|
21
|
+
# parse_string.
|
22
|
+
# Gets the parser with AXML::Autoload.parser! Will set the default parser
|
23
|
+
# to the first one found unless a parser is provided.
|
15
24
|
def parse(arg, opts={})
|
16
25
|
opts = DEFAULTS.merge opts
|
17
26
|
parser = AXML::Autoload.parser!(opts[:parser])
|
18
27
|
method =
|
19
|
-
if arg.is_a?(String)
|
20
|
-
|
28
|
+
if arg.is_a?(String)
|
29
|
+
if xml?(arg)
|
30
|
+
:parse_string
|
31
|
+
else
|
32
|
+
unless File.exist? arg
|
33
|
+
messg = "thinking this is a file: #{arg}\nbut file doesn't exist!"
|
34
|
+
raise RuntimeError, messg
|
35
|
+
end
|
36
|
+
:parse_file
|
37
|
+
end
|
21
38
|
elsif arg.is_a?(IO)
|
22
39
|
:parse_io
|
23
|
-
elsif arg.is_a?(String)
|
24
|
-
:parse_string
|
25
40
|
else
|
26
41
|
raise ArgumentError, "can deal with filenames, Strings, and IO objects.\nDon't know how to work with object of class: #{arg.class}"
|
27
42
|
end
|
28
43
|
parser.send(method, arg, opts)
|
29
44
|
end
|
30
45
|
|
46
|
+
# Returns the root node.
|
47
|
+
# Opens the filename given and calls parse_io on it
|
31
48
|
def parse_file(file, opts={}) # :nodoc:
|
32
49
|
opts = DEFAULTS.merge opts
|
33
50
|
parser = AXML::Autoload.parser!(opts[:parser])
|
34
51
|
File.open(file) {|fh| parser.parse_io(fh, opts) }
|
35
52
|
end
|
36
53
|
|
54
|
+
# returns true if the first nonblank char is a '<'
|
55
|
+
def xml?(string)
|
56
|
+
first_real_char = string.match(/[^\s]/m)
|
57
|
+
!first_real_char.nil? && first_real_char[0] == '<'
|
58
|
+
end
|
59
|
+
|
37
60
|
extend AXML
|
38
61
|
|
39
62
|
end
|
data/lib/axml/autoload.rb
CHANGED
@@ -3,80 +3,79 @@ require 'axml'
|
|
3
3
|
|
4
4
|
module AXML
|
5
5
|
module Autoload
|
6
|
-
class
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
break
|
21
|
-
end
|
6
|
+
# if given a name, loads the parser class name if no arg (or nil) tries
|
7
|
+
# to load a parser from the AXML::PREFERRED list, returning the first
|
8
|
+
# one that works. Sets AXML::DEFAULT[:parser] with the parser name if
|
9
|
+
# it is available and raises any warnings in AXML::WARN. Raises a
|
10
|
+
# RuntimeError if no parser is found.
|
11
|
+
def self.parser!(name=nil)
|
12
|
+
parser_name_to_use = nil
|
13
|
+
parser_obj = nil
|
14
|
+
if name.nil?
|
15
|
+
PREFERRED.each do |nm|
|
16
|
+
parser_obj = parser(nm)
|
17
|
+
if parser_obj
|
18
|
+
parser_name_to_use = nm
|
19
|
+
break
|
22
20
|
end
|
23
|
-
else
|
24
|
-
parser_name_to_use = name
|
25
|
-
parser_obj = parser(name)
|
26
|
-
end
|
27
|
-
if message = WARN[parser_obj]
|
28
|
-
warn message
|
29
|
-
end
|
30
|
-
if parser_obj
|
31
|
-
AXML::DEFAULTS[:parser] = parser_name_to_use
|
32
|
-
else
|
33
|
-
STDERR.puts "NO PARSERS CURRENTLY AVAILABLE!"
|
34
|
-
STDERR.puts "INSTALL INSTRUCTIONS:"
|
35
|
-
STDERR.puts "*****************************************************"
|
36
|
-
STDERR.puts install_instructions(:all)
|
37
|
-
STDERR.puts "*****************************************************"
|
38
|
-
raise RuntimeError, "no parser currently available!"
|
39
21
|
end
|
40
|
-
|
22
|
+
else
|
23
|
+
parser_name_to_use = name
|
24
|
+
parser_obj = parser(name)
|
41
25
|
end
|
26
|
+
if message = WARN[parser_obj]
|
27
|
+
warn message
|
28
|
+
end
|
29
|
+
if parser_obj
|
30
|
+
AXML::DEFAULTS[:parser] = parser_name_to_use
|
31
|
+
else
|
32
|
+
STDERR.puts "NO PARSERS CURRENTLY AVAILABLE!"
|
33
|
+
STDERR.puts "INSTALL INSTRUCTIONS:"
|
34
|
+
STDERR.puts "*****************************************************"
|
35
|
+
STDERR.puts install_instructions(:all)
|
36
|
+
STDERR.puts "*****************************************************"
|
37
|
+
raise RuntimeError, "no parser currently available!"
|
38
|
+
end
|
39
|
+
parser_obj
|
40
|
+
end
|
42
41
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
nil
|
55
|
-
end
|
56
|
-
rescue LoadError
|
42
|
+
# loads the parser (if available) and returns an object ( that should
|
43
|
+
# respond_to parse_io and parse_string. If the parser is not available
|
44
|
+
# returns nil.
|
45
|
+
def self.parser(name)
|
46
|
+
req = 'axml/' << name.to_s.gsub('_', '/')
|
47
|
+
begin
|
48
|
+
require req
|
49
|
+
const_str = AXML::CLASS_MAPPINGS[name]
|
50
|
+
if AXML.const_defined?(const_str)
|
51
|
+
AXML.const_get(const_str)
|
52
|
+
else
|
57
53
|
nil
|
58
54
|
end
|
55
|
+
rescue LoadError
|
56
|
+
nil
|
59
57
|
end
|
58
|
+
end
|
60
59
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
end
|
60
|
+
def self.install_instructions(name)
|
61
|
+
if name == :all
|
62
|
+
doublets = AXML::PREFERRED.map do |nm|
|
63
|
+
[nm, install_instructions(nm)]
|
64
|
+
end
|
65
|
+
string = ""
|
66
|
+
doublets.each do |k,v|
|
67
|
+
if v
|
68
|
+
string << '-' * k.to_s.size << "\n"
|
69
|
+
string << "#{k}\n"
|
70
|
+
string << '-' * k.to_s.size << "\n"
|
71
|
+
string << "#{v}"
|
74
72
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
73
|
+
end
|
74
|
+
string
|
75
|
+
else
|
76
|
+
case name
|
77
|
+
when :xmlparser
|
78
|
+
string = <<END
|
80
79
|
debian/ubuntu: sudo apt-get install libxml-parser-ruby1.8
|
81
80
|
|
82
81
|
cygwin:
|
@@ -89,8 +88,8 @@ cygwin:
|
|
89
88
|
|
90
89
|
windows: included in one-click-installer
|
91
90
|
END
|
92
|
-
|
93
|
-
|
91
|
+
when :libxml
|
92
|
+
string = <<END
|
94
93
|
deb/ubuntu: sudo apt-get install libxml-ruby
|
95
94
|
|
96
95
|
install as gem:
|
@@ -98,7 +97,6 @@ install as gem:
|
|
98
97
|
|
99
98
|
for more info: http://libxml.rubyforge.org/install.xml
|
100
99
|
END
|
101
|
-
end
|
102
100
|
end
|
103
101
|
end
|
104
102
|
end
|
data/lib/axml/el.rb
CHANGED
@@ -84,7 +84,22 @@ class AXML::El
|
|
84
84
|
result
|
85
85
|
end
|
86
86
|
|
87
|
+
# returns the xml string with the xml header in place
|
88
|
+
# if given a filename, writes it to the file
|
89
|
+
def to_doc(filename=nil)
|
90
|
+
output = '<?xml version="1.0" encoding="UTF-8"?>'
|
91
|
+
output << self.to_s
|
92
|
+
if filename
|
93
|
+
File.open(filename, 'w') do |out|
|
94
|
+
out.print output
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
87
99
|
def to_s(indent=0)
|
100
|
+
if indent.is_a? Hash
|
101
|
+
to_doc
|
102
|
+
end
|
88
103
|
attstring = ""
|
89
104
|
if attrs.size > 0
|
90
105
|
attstring = " " + attrs.collect { |k,v| "#{k}=\"#{escape(v)}\"" }.join(" ")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: axml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -23,7 +23,6 @@ extra_rdoc_files:
|
|
23
23
|
- README
|
24
24
|
- LICENSE
|
25
25
|
files:
|
26
|
-
- lib/axml
|
27
26
|
- lib/axml/el.rb
|
28
27
|
- lib/axml/autoload.rb
|
29
28
|
- lib/axml/libxml.rb
|
@@ -41,6 +40,8 @@ rdoc_options:
|
|
41
40
|
- README
|
42
41
|
- --title
|
43
42
|
- axml
|
43
|
+
- --line-numbers
|
44
|
+
- --inline-source
|
44
45
|
require_paths:
|
45
46
|
- lib
|
46
47
|
required_ruby_version: !ruby/object:Gem::Requirement
|