axml 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +56 -35
- data/Rakefile +11 -25
- data/lib/axml.rb +28 -5
- data/lib/axml/autoload.rb +66 -68
- data/lib/axml/el.rb +15 -0
- metadata +3 -2
data/README
CHANGED
@@ -1,53 +1,50 @@
|
|
1
|
-
AXML
|
2
|
-
====
|
1
|
+
= {AXML}[http://axml.rubyforge.org]
|
3
2
|
|
4
3
|
AXML - Provides a simple, minimalistic DOM for working with data stored in an
|
5
4
|
XML document. The API is very similar to LibXML, differing slightly in the
|
6
|
-
handling of text nodes. It is designed with very large documents in mind:
|
5
|
+
handling of text nodes. It is designed with very large documents in mind:
|
6
|
+
nodes are represented in memory efficient Struct objects and it works with
|
7
|
+
either XMLParser or LibXML!
|
7
8
|
|
8
9
|
'AXML' literally translates into 'ax XML' which succinctly describes the
|
9
10
|
occasional feeling of a programmer towards XML or its myriad parsers. AXML
|
10
11
|
won't solve all your XML woes, but it does make working with XML much less
|
11
12
|
painful.
|
12
13
|
|
13
|
-
|
14
|
-
--------
|
14
|
+
== Overview
|
15
15
|
|
16
16
|
* *fast*: runs on either XMLParser or LibXML
|
17
|
-
* *lean*: as in 'lines of code' and as in 'memory consumption'
|
18
|
-
* *easy to extend*: code your Grandmother could read and understand (if she reads ruby)
|
19
|
-
* PLOS: implements a useful subset of libxml methods for near drop in replacement.
|
17
|
+
* *lean*: as in 'lines of code' and as in 'memory consumption'
|
20
18
|
|
21
|
-
Examples
|
22
|
-
--------
|
19
|
+
== Examples
|
23
20
|
|
24
21
|
require 'axml'
|
25
22
|
|
26
23
|
# a little example xml string to use
|
27
|
-
|
24
|
+
string = "
|
28
25
|
<n1>
|
29
26
|
<n2 size='big'>
|
30
27
|
<n3>words here</n3>
|
31
28
|
<n3></n3>
|
32
29
|
</n2>
|
33
|
-
<n2 size='small'>
|
34
|
-
<n3 id='3'></n3>
|
30
|
+
<n2 size='small' >
|
31
|
+
<n3 id='3' thinks='out loud'></n3>
|
35
32
|
</n2>
|
36
33
|
</n1>
|
37
34
|
"
|
38
35
|
|
39
|
-
|
36
|
+
=== Read a string, io, or file
|
40
37
|
|
41
|
-
n1_node = AXML.parse(
|
42
|
-
#
|
43
|
-
n1_node = AXML.parse('path/to/file')
|
38
|
+
n1_node = AXML.parse(string) # <- can read xml as string
|
39
|
+
n1_node = AXML.parse(io) # <- can read an io object
|
40
|
+
n1_node = AXML.parse('path/to/file') # <- can read a file
|
44
41
|
|
45
|
-
|
42
|
+
=== Access children
|
46
43
|
|
47
44
|
n1_node.children # -> [array]
|
48
45
|
n1_node.each {|child| # do something with each child }
|
49
46
|
|
50
|
-
|
47
|
+
=== Traverse the whole tree structure
|
51
48
|
|
52
49
|
n1_node.traverse do |node|
|
53
50
|
# pre traversal
|
@@ -55,20 +52,20 @@ Examples
|
|
55
52
|
|
56
53
|
n1_node.traverse(:post) {|node| # post traversal }
|
57
54
|
|
58
|
-
|
55
|
+
=== Get attributes and text
|
59
56
|
|
60
57
|
n2_node['size'] == 'big'
|
61
58
|
n3_node = n2_node.child
|
62
59
|
n3_node.text # -> 'words here'
|
63
|
-
n3_node.content # ->
|
60
|
+
n3_node.content # -> 'words here'
|
64
61
|
|
65
|
-
|
62
|
+
=== Navigate nodes
|
66
63
|
|
67
64
|
n2_node = n1_node.child
|
68
65
|
the_other_n2_node = n2_node.next
|
69
66
|
the_other_n2_node.next = nil
|
70
67
|
|
71
|
-
|
68
|
+
=== Does a little xpath
|
72
69
|
|
73
70
|
# find_first (returns the first node)
|
74
71
|
n3_node = n1_node.find_first('descendant::n3')
|
@@ -81,25 +78,49 @@ Examples
|
|
81
78
|
n1_node.find('descendant::n3') # -> [array of all 3 <n3> nodes]
|
82
79
|
# also callable as find_child and find_descendant
|
83
80
|
|
81
|
+
=== Manipulate tree structure
|
84
82
|
|
85
|
-
|
83
|
+
node.drop # drop the node from its parents
|
84
|
+
## (insert?)
|
86
85
|
|
87
|
-
|
88
|
-
--------------------
|
86
|
+
=== Output
|
89
87
|
|
90
|
-
|
91
|
-
very easy to extend and modify for special cases. It is roughly as fast as
|
92
|
-
libxml, although it currently reads in the entire document first (however,
|
93
|
-
this is memory efficient - nodes are implemented using Struct).
|
88
|
+
XML Output is currently tested only with XMLParser.
|
94
89
|
|
95
|
-
|
96
|
-
|
90
|
+
node.to_s # -> formatted xml
|
91
|
+
node.to_doc # -> with xml header line
|
92
|
+
node.to_doc(filename) # -> written to filename
|
93
|
+
|
94
|
+
See `spec/` dir for more examples and functionality
|
95
|
+
|
96
|
+
== Details
|
97
|
+
|
98
|
+
If using XMLParser, builds nodes out of Struct objects (AXML::El). Currently
|
99
|
+
only parses elements, attributes, and text(content) (no CDATA right now).
|
100
|
+
|
101
|
+
If using LibXML, it uses the underlying LibXML nodes already available. It
|
102
|
+
overrides some methods to treat the text in a text node as the parent node's
|
103
|
+
text attribute.
|
104
|
+
|
105
|
+
== Warnings
|
106
|
+
|
107
|
+
Output of xml (i.e., node#to_s) under LibXML is untested (and probably buggy)
|
108
|
+
since the node text behavor has been modified. Will work it out in future
|
109
|
+
release.
|
110
|
+
|
111
|
+
Doesn't parse CDATA using XMLParser right now.
|
112
|
+
|
113
|
+
== Installation
|
97
114
|
|
98
115
|
gem install axml
|
99
116
|
|
100
|
-
|
101
|
-
|
117
|
+
Can get instructions on installing XMLParser and LibXML by issuing this command:
|
118
|
+
|
119
|
+
ruby -rubygems -e 'require "axml"; puts AXML::Autoload.install_instructions(:all)'
|
120
|
+
|
121
|
+
== See Also
|
102
122
|
|
103
|
-
If you are parsing HTML or complex word processing documents this is not the
|
123
|
+
If you are parsing HTML or complex word processing documents this is not the
|
124
|
+
parser for you. Try something like hpricot or LibXML.
|
104
125
|
|
105
126
|
|
data/Rakefile
CHANGED
@@ -17,10 +17,11 @@ FU = FileUtils
|
|
17
17
|
|
18
18
|
readme = "README"
|
19
19
|
|
20
|
+
rdoc_dir = 'rdoc'
|
20
21
|
rdoc_extra_includes = [readme, "LICENSE"]
|
21
|
-
rdoc_options = ['--main', readme, '--title', NAME]
|
22
|
+
rdoc_options = ['--main', readme, '--title', NAME, '--line-numbers', '--inline-source']
|
22
23
|
|
23
|
-
lib_files = FL["lib
|
24
|
+
lib_files = FL["lib/**/*.rb"]
|
24
25
|
dist_files = lib_files + FL[readme, "LICENSE", "Rakefile", "{specs}/**/*"]
|
25
26
|
changelog = 'CHANGELOG'
|
26
27
|
|
@@ -40,37 +41,22 @@ end
|
|
40
41
|
# DOC
|
41
42
|
###############################################
|
42
43
|
Rake::RDocTask.new do |rd|
|
44
|
+
rd.rdoc_dir = rdoc_dir
|
43
45
|
rd.main = readme
|
44
|
-
rd.rdoc_files.include rdoc_extra_includes
|
46
|
+
rd.rdoc_files.include( rdoc_extra_includes )
|
47
|
+
rd.rdoc_files.include( lib_files.uniq )
|
45
48
|
rd.options.push( *rdoc_options )
|
46
|
-
|
47
|
-
|
48
|
-
task :html_docs do
|
49
|
-
css = 'doc/src/style.css'
|
50
|
-
FU.mkpath 'doc/output'
|
51
|
-
FU.cp css, 'doc/output/'
|
52
|
-
index = 'doc/output/index.html'
|
53
|
-
header = 'doc/src/header'
|
54
|
-
File.open(index, 'w') do |index|
|
55
|
-
index.puts '<html>'
|
56
|
-
index.puts IO.read(header)
|
57
|
-
index.puts '<html><body>'
|
58
|
-
index.puts `bluecloth --fragment #{readme}`
|
59
|
-
|
60
|
-
# add contact info:
|
61
|
-
index.puts '<h2>Contact</h2>'
|
62
|
-
index.puts 'jtprince@gmail.com'.email_encrypt
|
63
|
-
|
64
|
-
index.puts '</body></html>'
|
65
|
-
end
|
49
|
+
cmd = "rdoc -o #{rdoc_dir} #{(lib_files + rdoc_extra_includes).join(" ")} #{rdoc_options.join(" ")}"
|
50
|
+
puts cmd
|
66
51
|
end
|
67
52
|
|
68
53
|
desc "create and upload docs to server"
|
69
|
-
task :upload_docs => :
|
70
|
-
sh "scp -
|
54
|
+
task :upload_docs => [:rdoc] do
|
55
|
+
sh "scp -r #{rdoc_dir}/* jtprince@rubyforge.org:/var/www/gforge-projects/axml/"
|
71
56
|
end
|
72
57
|
|
73
58
|
|
59
|
+
|
74
60
|
###############################################
|
75
61
|
# TESTS
|
76
62
|
###############################################
|
data/lib/axml.rb
CHANGED
@@ -10,30 +10,53 @@ module AXML
|
|
10
10
|
WARN = {:rexml => "Using REXML as parser! This is very slow on large docs!\nCall the method AXML::Autoload.install_instructions for help installing\nsomething FASTER!",
|
11
11
|
}
|
12
12
|
|
13
|
+
# returns the root node
|
13
14
|
# opts:
|
14
|
-
# :parser
|
15
|
+
# :parser => :xmlparser || :libxml
|
16
|
+
# :keep_blanks => false
|
17
|
+
#
|
18
|
+
# arg may be a string of xml, an io object, or a filename
|
19
|
+
# if the first non-blank character of the string is '<' then it is
|
20
|
+
# considered xml. If you want to be sure, you can use parse_file or
|
21
|
+
# parse_string.
|
22
|
+
# Gets the parser with AXML::Autoload.parser! Will set the default parser
|
23
|
+
# to the first one found unless a parser is provided.
|
15
24
|
def parse(arg, opts={})
|
16
25
|
opts = DEFAULTS.merge opts
|
17
26
|
parser = AXML::Autoload.parser!(opts[:parser])
|
18
27
|
method =
|
19
|
-
if arg.is_a?(String)
|
20
|
-
|
28
|
+
if arg.is_a?(String)
|
29
|
+
if xml?(arg)
|
30
|
+
:parse_string
|
31
|
+
else
|
32
|
+
unless File.exist? arg
|
33
|
+
messg = "thinking this is a file: #{arg}\nbut file doesn't exist!"
|
34
|
+
raise RuntimeError, messg
|
35
|
+
end
|
36
|
+
:parse_file
|
37
|
+
end
|
21
38
|
elsif arg.is_a?(IO)
|
22
39
|
:parse_io
|
23
|
-
elsif arg.is_a?(String)
|
24
|
-
:parse_string
|
25
40
|
else
|
26
41
|
raise ArgumentError, "can deal with filenames, Strings, and IO objects.\nDon't know how to work with object of class: #{arg.class}"
|
27
42
|
end
|
28
43
|
parser.send(method, arg, opts)
|
29
44
|
end
|
30
45
|
|
46
|
+
# Returns the root node.
|
47
|
+
# Opens the filename given and calls parse_io on it
|
31
48
|
def parse_file(file, opts={}) # :nodoc:
|
32
49
|
opts = DEFAULTS.merge opts
|
33
50
|
parser = AXML::Autoload.parser!(opts[:parser])
|
34
51
|
File.open(file) {|fh| parser.parse_io(fh, opts) }
|
35
52
|
end
|
36
53
|
|
54
|
+
# returns true if the first nonblank char is a '<'
|
55
|
+
def xml?(string)
|
56
|
+
first_real_char = string.match(/[^\s]/m)
|
57
|
+
!first_real_char.nil? && first_real_char[0] == '<'
|
58
|
+
end
|
59
|
+
|
37
60
|
extend AXML
|
38
61
|
|
39
62
|
end
|
data/lib/axml/autoload.rb
CHANGED
@@ -3,80 +3,79 @@ require 'axml'
|
|
3
3
|
|
4
4
|
module AXML
|
5
5
|
module Autoload
|
6
|
-
class
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
break
|
21
|
-
end
|
6
|
+
# if given a name, loads the parser class name if no arg (or nil) tries
|
7
|
+
# to load a parser from the AXML::PREFERRED list, returning the first
|
8
|
+
# one that works. Sets AXML::DEFAULT[:parser] with the parser name if
|
9
|
+
# it is available and raises any warnings in AXML::WARN. Raises a
|
10
|
+
# RuntimeError if no parser is found.
|
11
|
+
def self.parser!(name=nil)
|
12
|
+
parser_name_to_use = nil
|
13
|
+
parser_obj = nil
|
14
|
+
if name.nil?
|
15
|
+
PREFERRED.each do |nm|
|
16
|
+
parser_obj = parser(nm)
|
17
|
+
if parser_obj
|
18
|
+
parser_name_to_use = nm
|
19
|
+
break
|
22
20
|
end
|
23
|
-
else
|
24
|
-
parser_name_to_use = name
|
25
|
-
parser_obj = parser(name)
|
26
|
-
end
|
27
|
-
if message = WARN[parser_obj]
|
28
|
-
warn message
|
29
|
-
end
|
30
|
-
if parser_obj
|
31
|
-
AXML::DEFAULTS[:parser] = parser_name_to_use
|
32
|
-
else
|
33
|
-
STDERR.puts "NO PARSERS CURRENTLY AVAILABLE!"
|
34
|
-
STDERR.puts "INSTALL INSTRUCTIONS:"
|
35
|
-
STDERR.puts "*****************************************************"
|
36
|
-
STDERR.puts install_instructions(:all)
|
37
|
-
STDERR.puts "*****************************************************"
|
38
|
-
raise RuntimeError, "no parser currently available!"
|
39
21
|
end
|
40
|
-
|
22
|
+
else
|
23
|
+
parser_name_to_use = name
|
24
|
+
parser_obj = parser(name)
|
41
25
|
end
|
26
|
+
if message = WARN[parser_obj]
|
27
|
+
warn message
|
28
|
+
end
|
29
|
+
if parser_obj
|
30
|
+
AXML::DEFAULTS[:parser] = parser_name_to_use
|
31
|
+
else
|
32
|
+
STDERR.puts "NO PARSERS CURRENTLY AVAILABLE!"
|
33
|
+
STDERR.puts "INSTALL INSTRUCTIONS:"
|
34
|
+
STDERR.puts "*****************************************************"
|
35
|
+
STDERR.puts install_instructions(:all)
|
36
|
+
STDERR.puts "*****************************************************"
|
37
|
+
raise RuntimeError, "no parser currently available!"
|
38
|
+
end
|
39
|
+
parser_obj
|
40
|
+
end
|
42
41
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
nil
|
55
|
-
end
|
56
|
-
rescue LoadError
|
42
|
+
# loads the parser (if available) and returns an object ( that should
|
43
|
+
# respond_to parse_io and parse_string. If the parser is not available
|
44
|
+
# returns nil.
|
45
|
+
def self.parser(name)
|
46
|
+
req = 'axml/' << name.to_s.gsub('_', '/')
|
47
|
+
begin
|
48
|
+
require req
|
49
|
+
const_str = AXML::CLASS_MAPPINGS[name]
|
50
|
+
if AXML.const_defined?(const_str)
|
51
|
+
AXML.const_get(const_str)
|
52
|
+
else
|
57
53
|
nil
|
58
54
|
end
|
55
|
+
rescue LoadError
|
56
|
+
nil
|
59
57
|
end
|
58
|
+
end
|
60
59
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
end
|
60
|
+
def self.install_instructions(name)
|
61
|
+
if name == :all
|
62
|
+
doublets = AXML::PREFERRED.map do |nm|
|
63
|
+
[nm, install_instructions(nm)]
|
64
|
+
end
|
65
|
+
string = ""
|
66
|
+
doublets.each do |k,v|
|
67
|
+
if v
|
68
|
+
string << '-' * k.to_s.size << "\n"
|
69
|
+
string << "#{k}\n"
|
70
|
+
string << '-' * k.to_s.size << "\n"
|
71
|
+
string << "#{v}"
|
74
72
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
73
|
+
end
|
74
|
+
string
|
75
|
+
else
|
76
|
+
case name
|
77
|
+
when :xmlparser
|
78
|
+
string = <<END
|
80
79
|
debian/ubuntu: sudo apt-get install libxml-parser-ruby1.8
|
81
80
|
|
82
81
|
cygwin:
|
@@ -89,8 +88,8 @@ cygwin:
|
|
89
88
|
|
90
89
|
windows: included in one-click-installer
|
91
90
|
END
|
92
|
-
|
93
|
-
|
91
|
+
when :libxml
|
92
|
+
string = <<END
|
94
93
|
deb/ubuntu: sudo apt-get install libxml-ruby
|
95
94
|
|
96
95
|
install as gem:
|
@@ -98,7 +97,6 @@ install as gem:
|
|
98
97
|
|
99
98
|
for more info: http://libxml.rubyforge.org/install.xml
|
100
99
|
END
|
101
|
-
end
|
102
100
|
end
|
103
101
|
end
|
104
102
|
end
|
data/lib/axml/el.rb
CHANGED
@@ -84,7 +84,22 @@ class AXML::El
|
|
84
84
|
result
|
85
85
|
end
|
86
86
|
|
87
|
+
# returns the xml string with the xml header in place
|
88
|
+
# if given a filename, writes it to the file
|
89
|
+
def to_doc(filename=nil)
|
90
|
+
output = '<?xml version="1.0" encoding="UTF-8"?>'
|
91
|
+
output << self.to_s
|
92
|
+
if filename
|
93
|
+
File.open(filename, 'w') do |out|
|
94
|
+
out.print output
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
87
99
|
def to_s(indent=0)
|
100
|
+
if indent.is_a? Hash
|
101
|
+
to_doc
|
102
|
+
end
|
88
103
|
attstring = ""
|
89
104
|
if attrs.size > 0
|
90
105
|
attstring = " " + attrs.collect { |k,v| "#{k}=\"#{escape(v)}\"" }.join(" ")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: axml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -23,7 +23,6 @@ extra_rdoc_files:
|
|
23
23
|
- README
|
24
24
|
- LICENSE
|
25
25
|
files:
|
26
|
-
- lib/axml
|
27
26
|
- lib/axml/el.rb
|
28
27
|
- lib/axml/autoload.rb
|
29
28
|
- lib/axml/libxml.rb
|
@@ -41,6 +40,8 @@ rdoc_options:
|
|
41
40
|
- README
|
42
41
|
- --title
|
43
42
|
- axml
|
43
|
+
- --line-numbers
|
44
|
+
- --inline-source
|
44
45
|
require_paths:
|
45
46
|
- lib
|
46
47
|
required_ruby_version: !ruby/object:Gem::Requirement
|