xml_node_stream 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +30 -0
- data/README.md +139 -0
- data/VERSION +1 -0
- data/lib/xml_node_stream/http_stream.rb +179 -0
- data/lib/xml_node_stream/node.rb +98 -47
- data/lib/xml_node_stream/parser/base.rb +49 -12
- data/lib/xml_node_stream/parser/libxml_parser.rb +36 -9
- data/lib/xml_node_stream/parser/nokogiri_parser.rb +42 -12
- data/lib/xml_node_stream/parser/rexml_parser.rb +35 -8
- data/lib/xml_node_stream/parser.rb +54 -29
- data/lib/xml_node_stream/selector.rb +144 -34
- data/lib/xml_node_stream.rb +18 -5
- data/xml_node_stream.gemspec +39 -0
- metadata +46 -88
- data/README.rdoc +0 -61
- data/Rakefile +0 -44
- data/spec/node_spec.rb +0 -140
- data/spec/parser_spec.rb +0 -148
- data/spec/selector_spec.rb +0 -73
- data/spec/spec_helper.rb +0 -2
- data/spec/test.xml +0 -57
- data/spec/xml_node_stream_spec.rb +0 -11
- /data/{MIT_LICENSE → MIT-LICENSE} +0 -0
metadata
CHANGED
|
@@ -1,67 +1,40 @@
|
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xml_node_stream
|
|
3
|
-
version: !ruby/object:Gem::Version
|
|
4
|
-
|
|
5
|
-
prerelease:
|
|
6
|
-
segments:
|
|
7
|
-
- 1
|
|
8
|
-
- 0
|
|
9
|
-
- 2
|
|
10
|
-
version: 1.0.2
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 2.0.0
|
|
11
5
|
platform: ruby
|
|
12
|
-
authors:
|
|
6
|
+
authors:
|
|
13
7
|
- Brian Durand
|
|
14
|
-
autorequire:
|
|
15
8
|
bindir: bin
|
|
16
9
|
cert_chain: []
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
prerelease: false
|
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
|
25
|
-
none: false
|
|
26
|
-
requirements:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: bundler
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
27
16
|
- - ">="
|
|
28
|
-
- !ruby/object:Gem::Version
|
|
29
|
-
|
|
30
|
-
segments:
|
|
31
|
-
- 2
|
|
32
|
-
- 0
|
|
33
|
-
- 0
|
|
34
|
-
version: 2.0.0
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0'
|
|
35
19
|
type: :development
|
|
36
|
-
version_requirements: *id001
|
|
37
|
-
- !ruby/object:Gem::Dependency
|
|
38
|
-
name: jeweler
|
|
39
20
|
prerelease: false
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
requirements:
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
43
23
|
- - ">="
|
|
44
|
-
- !ruby/object:Gem::Version
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
version: "0"
|
|
49
|
-
type: :development
|
|
50
|
-
version_requirements: *id002
|
|
51
|
-
description:
|
|
52
|
-
email: brian@embellishedvisions.com
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0'
|
|
26
|
+
email:
|
|
27
|
+
- bbdurand@gmail.com
|
|
53
28
|
executables: []
|
|
54
|
-
|
|
55
29
|
extensions: []
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
-
|
|
59
|
-
-
|
|
60
|
-
|
|
61
|
-
-
|
|
62
|
-
- README.rdoc
|
|
63
|
-
- Rakefile
|
|
30
|
+
extra_rdoc_files: []
|
|
31
|
+
files:
|
|
32
|
+
- CHANGELOG.md
|
|
33
|
+
- MIT-LICENSE
|
|
34
|
+
- README.md
|
|
35
|
+
- VERSION
|
|
64
36
|
- lib/xml_node_stream.rb
|
|
37
|
+
- lib/xml_node_stream/http_stream.rb
|
|
65
38
|
- lib/xml_node_stream/node.rb
|
|
66
39
|
- lib/xml_node_stream/parser.rb
|
|
67
40
|
- lib/xml_node_stream/parser/base.rb
|
|
@@ -69,45 +42,30 @@ files:
|
|
|
69
42
|
- lib/xml_node_stream/parser/nokogiri_parser.rb
|
|
70
43
|
- lib/xml_node_stream/parser/rexml_parser.rb
|
|
71
44
|
- lib/xml_node_stream/selector.rb
|
|
72
|
-
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
licenses: []
|
|
81
|
-
|
|
82
|
-
post_install_message:
|
|
45
|
+
- xml_node_stream.gemspec
|
|
46
|
+
homepage: https://github.com/bdurand/xml_node_stream
|
|
47
|
+
licenses:
|
|
48
|
+
- MIT
|
|
49
|
+
metadata:
|
|
50
|
+
homepage_uri: https://github.com/bdurand/xml_node_stream
|
|
51
|
+
source_code_uri: https://github.com/bdurand/xml_node_stream
|
|
52
|
+
changelog_uri: https://github.com/bdurand/xml_node_stream/blob/main/CHANGELOG.md
|
|
83
53
|
rdoc_options: []
|
|
84
|
-
|
|
85
|
-
require_paths:
|
|
54
|
+
require_paths:
|
|
86
55
|
- lib
|
|
87
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
|
88
|
-
|
|
89
|
-
requirements:
|
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
90
58
|
- - ">="
|
|
91
|
-
- !ruby/object:Gem::Version
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
version: "0"
|
|
96
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
|
-
none: false
|
|
98
|
-
requirements:
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '0'
|
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
62
|
+
requirements:
|
|
99
63
|
- - ">="
|
|
100
|
-
- !ruby/object:Gem::Version
|
|
101
|
-
|
|
102
|
-
segments:
|
|
103
|
-
- 0
|
|
104
|
-
version: "0"
|
|
64
|
+
- !ruby/object:Gem::Version
|
|
65
|
+
version: '0'
|
|
105
66
|
requirements: []
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
specification_version: 3
|
|
111
|
-
summary: Simple XML parser wrapper that provides the benefits of stream parsing with the ease of using document nodes.
|
|
67
|
+
rubygems_version: 4.0.3
|
|
68
|
+
specification_version: 4
|
|
69
|
+
summary: Memory-efficient XML parser that reduces memory allocation when parsing large
|
|
70
|
+
XML documents while maintaining a simple, easy-to-use interface.
|
|
112
71
|
test_files: []
|
|
113
|
-
|
data/README.rdoc
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
= XML Node Stream
|
|
2
|
-
|
|
3
|
-
This gem provides a very easy to use XML parser the provides the benefits of both stream parsing (i.e. SAX) and document parsing (i.e. DOM). In addition, it provides a unified parsing language for each of the major Ruby XML parsers (REXML, Nokogiri, and LibXML) so that your code doesn't have to be bound to a particular XML library.
|
|
4
|
-
|
|
5
|
-
== Stream Parsing
|
|
6
|
-
|
|
7
|
-
The primary purpose of this gem is to facilitate parsing large XML files (i.e. several megabytes in size). Often, reading these files into a document structure is not feasible because the whole document must be read into memory. Stream/SAX parsing solves this issue by reading in the file incrementally and providing callbacks for various events. This method can be quite painful to deal with for any sort of complex document structure.
|
|
8
|
-
|
|
9
|
-
This gem attempts to solve both of these issues by combining the best features of both. Parsing is performed by a stream parser which construct document style nodes and calls back to the application code with these nodes. When your application is done with a node, it can release it to free up memory and keep your heap from bloating.
|
|
10
|
-
|
|
11
|
-
In order to keep the interface simple and universal, only XML elements and text nodes are supported. XML processing instructions and comments will be ignored.
|
|
12
|
-
|
|
13
|
-
== Examples
|
|
14
|
-
|
|
15
|
-
Suppose we have file with every book in the world in it:
|
|
16
|
-
|
|
17
|
-
<books>
|
|
18
|
-
<book isbn="123456">
|
|
19
|
-
<title>Moby Dick</title>
|
|
20
|
-
<author>Herman Melville</author>
|
|
21
|
-
<categories>
|
|
22
|
-
<category>Fiction</category>
|
|
23
|
-
<category>Adventure</category>
|
|
24
|
-
</categories>
|
|
25
|
-
</book>
|
|
26
|
-
<book isbn="98765643">
|
|
27
|
-
<title>The Decline and Fall of the Roman Empire</title>
|
|
28
|
-
<author>Edward Gibbon</author>
|
|
29
|
-
<category>
|
|
30
|
-
<category>History</category>
|
|
31
|
-
<category>Ancient</category>
|
|
32
|
-
</categories>
|
|
33
|
-
</book>
|
|
34
|
-
...
|
|
35
|
-
</books>
|
|
36
|
-
|
|
37
|
-
And we want to get them into our Books data model:
|
|
38
|
-
|
|
39
|
-
XmlNodeStream.parse('/tmp/books.xml') do |node|
|
|
40
|
-
if node.path == '/books/book'
|
|
41
|
-
book = Book.new
|
|
42
|
-
book.isbn = node['isbn']
|
|
43
|
-
book.title = node.find('title').value
|
|
44
|
-
book.author = node.find('author/text()')
|
|
45
|
-
book.categories = node.select('categories/category/text()')
|
|
46
|
-
book.save
|
|
47
|
-
node.release!
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
== Releasing Nodes
|
|
52
|
-
|
|
53
|
-
In the above example, what prevents memory bloat when parsing a large document is the call to node.release!. This call will remove the node from the node tree. The general practice is to look for the higher level nodes you are interested in and then release them immediately. If there are nodes you don't care about at all, those can be released immediately as well.
|
|
54
|
-
|
|
55
|
-
A sample 77Mb XML document parsed into Nokogiri consumes over 800Mb of memory. Parsing the same document with XmlNodeStream and releasing top level nodes as they're processed uses less than 1Mb.
|
|
56
|
-
|
|
57
|
-
== XPath
|
|
58
|
-
|
|
59
|
-
You can use a subset of the XPath language to navigate nodes. The only parts of XPath implemented are the paths themselves and the text() function. The text() function is useful for getting the value of node directly from the find or select methods without having to do a nil check on the nodes. For instance, in the above example we can get the name of an author with node.find('author/text()') instead of node.find('author').value if node.find('author').
|
|
60
|
-
|
|
61
|
-
The rest of the XPath language is not implemented since it is a programming language and there is really no need for it since we already have Ruby at our disposal which is far more powerful than XPath. See the Selector class for details.
|
data/Rakefile
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
require 'rubygems'
|
|
2
|
-
require 'rake'
|
|
3
|
-
require 'rake/rdoctask'
|
|
4
|
-
|
|
5
|
-
desc 'Default: run unit tests.'
|
|
6
|
-
task :default => :test
|
|
7
|
-
|
|
8
|
-
begin
|
|
9
|
-
require 'rspec'
|
|
10
|
-
require 'rspec/core/rake_task'
|
|
11
|
-
desc 'Run the unit tests'
|
|
12
|
-
RSpec::Core::RakeTask.new(:test)
|
|
13
|
-
rescue LoadError
|
|
14
|
-
task :test do
|
|
15
|
-
STDERR.puts "You must have rspec 2.0 installed to run the tests"
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
desc 'Generate documentation for xml_node_stream.'
|
|
20
|
-
Rake::RDocTask.new(:rdoc) do |rdoc|
|
|
21
|
-
rdoc.rdoc_dir = 'rdoc'
|
|
22
|
-
rdoc.options << '--title' << 'XML Node Stream' << '--line-numbers' << '--inline-source' << '--main' << 'README.rdoc'
|
|
23
|
-
rdoc.rdoc_files.include('README.rdoc')
|
|
24
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
begin
|
|
28
|
-
require 'jeweler'
|
|
29
|
-
Jeweler::Tasks.new do |gem|
|
|
30
|
-
gem.name = "xml_node_stream"
|
|
31
|
-
gem.summary = %Q{Simple XML parser wrapper that provides the benefits of stream parsing with the ease of using document nodes.}
|
|
32
|
-
gem.email = "brian@embellishedvisions.com"
|
|
33
|
-
gem.homepage = "http://github.com/bdurand/xml_node_stream"
|
|
34
|
-
gem.authors = ["Brian Durand"]
|
|
35
|
-
gem.files = FileList["lib/**/*", "spec/**/*", "README.rdoc", "Rakefile", "MIT_LICENSE"].to_a
|
|
36
|
-
gem.has_rdoc = true
|
|
37
|
-
gem.extra_rdoc_files = ["README.rdoc", "MIT_LICENSE"]
|
|
38
|
-
gem.add_development_dependency('rspec', '>=2.0.0')
|
|
39
|
-
gem.add_development_dependency('jeweler')
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
Jeweler::GemcutterTasks.new
|
|
43
|
-
rescue LoadError
|
|
44
|
-
end
|
data/spec/node_spec.rb
DELETED
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'spec_helper'))
|
|
2
|
-
|
|
3
|
-
describe XmlNodeStream::Node do
|
|
4
|
-
|
|
5
|
-
it "should have a name" do
|
|
6
|
-
node = XmlNodeStream::Node.new("tag")
|
|
7
|
-
node.name.should == "tag"
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it "should have attributes" do
|
|
11
|
-
node = XmlNodeStream::Node.new("tag")
|
|
12
|
-
node.attributes.should == {}
|
|
13
|
-
node["attr1"].should == nil
|
|
14
|
-
node = XmlNodeStream::Node.new("tag", nil, "attr1" => "val1", "attr2" => "val2")
|
|
15
|
-
node.attributes.should == {"attr1" => "val1", "attr2" => "val2"}
|
|
16
|
-
node["attr1"].should == "val1"
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
it "should have a value" do
|
|
20
|
-
node = XmlNodeStream::Node.new("tag")
|
|
21
|
-
node.value.should == nil
|
|
22
|
-
node = XmlNodeStream::Node.new("tag", nil, nil, "value")
|
|
23
|
-
node.value.should == "value"
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
it "should have a parent and children" do
|
|
27
|
-
parent = XmlNodeStream::Node.new("tag")
|
|
28
|
-
parent.parent.should == nil
|
|
29
|
-
parent.children.should == []
|
|
30
|
-
child_1 = XmlNodeStream::Node.new("child", parent)
|
|
31
|
-
child_2 = XmlNodeStream::Node.new("child")
|
|
32
|
-
parent.add_child(child_2)
|
|
33
|
-
parent.children.should == [child_1, child_2]
|
|
34
|
-
child_1.parent.should == parent
|
|
35
|
-
child_2.parent.should == parent
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
it "should be able to remove children" do
|
|
39
|
-
parent = XmlNodeStream::Node.new("tag")
|
|
40
|
-
child_1 = XmlNodeStream::Node.new("child", parent)
|
|
41
|
-
child_2 = XmlNodeStream::Node.new("child", parent)
|
|
42
|
-
parent.children.should == [child_1, child_2]
|
|
43
|
-
parent.remove_child(child_1)
|
|
44
|
-
parent.children.should == [child_2]
|
|
45
|
-
child_1.parent.should == nil
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
it "should release itself from its parent" do
|
|
49
|
-
parent = XmlNodeStream::Node.new("tag")
|
|
50
|
-
child_1 = XmlNodeStream::Node.new("child", parent)
|
|
51
|
-
child_2 = XmlNodeStream::Node.new("child", parent)
|
|
52
|
-
parent.children.should == [child_1, child_2]
|
|
53
|
-
child_1.release!
|
|
54
|
-
parent.children.should == [child_2]
|
|
55
|
-
child_1.parent.should == nil
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
it "should have ancestors" do
|
|
59
|
-
parent = XmlNodeStream::Node.new("tag")
|
|
60
|
-
child = XmlNodeStream::Node.new("child", parent)
|
|
61
|
-
grandchild = XmlNodeStream::Node.new("grandchild", child)
|
|
62
|
-
parent.ancestors.should == []
|
|
63
|
-
child.ancestors.should == [parent]
|
|
64
|
-
grandchild.ancestors.should == [child, parent]
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
it "should have descendants" do
|
|
68
|
-
parent = XmlNodeStream::Node.new("tag")
|
|
69
|
-
child_1 = XmlNodeStream::Node.new("child", parent)
|
|
70
|
-
child_2 = XmlNodeStream::Node.new("child", parent)
|
|
71
|
-
grandchild_1 = XmlNodeStream::Node.new("grandchild", child_1)
|
|
72
|
-
grandchild_2 = XmlNodeStream::Node.new("grandchild", child_1)
|
|
73
|
-
parent.descendants.should == [child_1, child_2, grandchild_1, grandchild_2]
|
|
74
|
-
child_1.descendants.should == [grandchild_1, grandchild_2]
|
|
75
|
-
grandchild_1.descendants.should == []
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
it "should have a root node" do
|
|
79
|
-
parent = XmlNodeStream::Node.new("tag")
|
|
80
|
-
child = XmlNodeStream::Node.new("child", parent)
|
|
81
|
-
grandchild = XmlNodeStream::Node.new("grandchild", child)
|
|
82
|
-
parent.root.should == parent
|
|
83
|
-
child.root.should == parent
|
|
84
|
-
grandchild.root.should == parent
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
it "should have a path" do
|
|
88
|
-
parent = XmlNodeStream::Node.new("tag")
|
|
89
|
-
child = XmlNodeStream::Node.new("child", parent)
|
|
90
|
-
grandchild = XmlNodeStream::Node.new("grandchild", child)
|
|
91
|
-
parent.path.should == "/tag"
|
|
92
|
-
child.path.should == "/tag/child"
|
|
93
|
-
grandchild.path.should == "/tag/child/grandchild"
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
it "should be able to select related nodes using a selector" do
|
|
97
|
-
parent = XmlNodeStream::Node.new("tag")
|
|
98
|
-
child_1 = XmlNodeStream::Node.new("child", parent)
|
|
99
|
-
child_2 = XmlNodeStream::Node.new("child", parent)
|
|
100
|
-
grandchild_1 = XmlNodeStream::Node.new("grandchild", child_1, nil, "val1")
|
|
101
|
-
grandchild_2 = XmlNodeStream::Node.new("grandchild", child_1, nil, "val2")
|
|
102
|
-
parent.select("nothing").should == []
|
|
103
|
-
parent.select("child").should == [child_1, child_2]
|
|
104
|
-
parent.select("child/grandchild").should == [grandchild_1, grandchild_2]
|
|
105
|
-
parent.select("child/grandchild/text()").should == ["val1", "val2"]
|
|
106
|
-
grandchild_1.select("../..").should == [parent]
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
it "should be able to find the first related node using a selector" do
|
|
110
|
-
parent = XmlNodeStream::Node.new("tag")
|
|
111
|
-
child_1 = XmlNodeStream::Node.new("child", parent)
|
|
112
|
-
child_2 = XmlNodeStream::Node.new("child", parent)
|
|
113
|
-
grandchild_1 = XmlNodeStream::Node.new("grandchild", child_1, nil, "val1")
|
|
114
|
-
grandchild_2 = XmlNodeStream::Node.new("grandchild", child_1, nil, "val2")
|
|
115
|
-
parent.find("nothing").should == nil
|
|
116
|
-
parent.find("child").should == child_1
|
|
117
|
-
parent.find("child/grandchild").should == grandchild_1
|
|
118
|
-
parent.find("child/grandchild/text()").should == "val1"
|
|
119
|
-
grandchild_1.find("../..").should == parent
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
it "should append text which strips whitespace from the start and end of the value" do
|
|
123
|
-
node = XmlNodeStream::Node.new("tag")
|
|
124
|
-
node.append(" ")
|
|
125
|
-
node.append(" \t\r\nhello ")
|
|
126
|
-
node.append(" there\n")
|
|
127
|
-
node.finish!
|
|
128
|
-
node.value.should == "hello there"
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
it "should append cdata which preserves all whitespace" do
|
|
132
|
-
node = XmlNodeStream::Node.new("tag")
|
|
133
|
-
node.append_cdata(" ")
|
|
134
|
-
node.append(" \t\r\nhello ")
|
|
135
|
-
node.append_cdata(" there\n")
|
|
136
|
-
node.finish!
|
|
137
|
-
node.value.should == " \t\r\nhello there\n"
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
end
|
data/spec/parser_spec.rb
DELETED
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'spec_helper'))
|
|
2
|
-
|
|
3
|
-
describe XmlNodeStream::Parser do
|
|
4
|
-
|
|
5
|
-
before :each do
|
|
6
|
-
@text_xml_path = File.expand_path(File.join(File.dirname(__FILE__), 'test.xml'))
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
it "should parse a document in a string" do
|
|
10
|
-
validate_text_xml(XmlNodeStream::Parser.parse(File.read(@text_xml_path)))
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
it "should parse a document in a file path string" do
|
|
14
|
-
validate_text_xml(XmlNodeStream::Parser.parse(@text_xml_path))
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
it "should parse a document in a file path" do
|
|
18
|
-
validate_text_xml(XmlNodeStream::Parser.parse(Pathname.new(@text_xml_path)))
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
it "should parse a document in a url string" do
|
|
22
|
-
uri = URI.parse("http://test.host/test.xml")
|
|
23
|
-
URI.should_receive(:parse).with("http://test.host/test.xml").and_return(uri)
|
|
24
|
-
File.open(@text_xml_path) do |stream|
|
|
25
|
-
uri.should_receive(:open).and_return(stream)
|
|
26
|
-
validate_text_xml(XmlNodeStream::Parser.parse("http://test.host/test.xml"))
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
it "should parse a document in a URI" do
|
|
31
|
-
uri = URI.parse("http://test.host/test.xml")
|
|
32
|
-
stream = mock(:stream)
|
|
33
|
-
File.open(@text_xml_path) do |stream|
|
|
34
|
-
uri.should_receive(:open).and_return(stream)
|
|
35
|
-
validate_text_xml(XmlNodeStream::Parser.parse(uri))
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
it "should parse a document in a stream" do
|
|
40
|
-
io = StringIO.new(File.read(@text_xml_path))
|
|
41
|
-
io.should_not_receive(:close)
|
|
42
|
-
validate_text_xml(XmlNodeStream::Parser.parse(io))
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
it "should call a block with each element in a document" do
|
|
46
|
-
nodes = []
|
|
47
|
-
XmlNodeStream::Parser.parse(@text_xml_path) do |node|
|
|
48
|
-
nodes << node.path
|
|
49
|
-
end
|
|
50
|
-
nodes.should == %w(
|
|
51
|
-
/library/authors/author/name
|
|
52
|
-
/library/authors/author
|
|
53
|
-
/library/authors/author/name
|
|
54
|
-
/library/authors/author
|
|
55
|
-
/library/authors/author/name
|
|
56
|
-
/library/authors/author
|
|
57
|
-
/library/authors
|
|
58
|
-
/library/collection/section/book/title
|
|
59
|
-
/library/collection/section/book/author
|
|
60
|
-
/library/collection/section/book/abstract
|
|
61
|
-
/library/collection/section/book/volumes
|
|
62
|
-
/library/collection/section/book
|
|
63
|
-
/library/collection/section
|
|
64
|
-
/library/collection/section/book/title
|
|
65
|
-
/library/collection/section/book/author
|
|
66
|
-
/library/collection/section/book/abstract
|
|
67
|
-
/library/collection/section/book
|
|
68
|
-
/library/collection/section/book/title
|
|
69
|
-
/library/collection/section/book/author
|
|
70
|
-
/library/collection/section/book/abstract
|
|
71
|
-
/library/collection/section/book
|
|
72
|
-
/library/collection/section/book/title
|
|
73
|
-
/library/collection/section/book/alternate_title
|
|
74
|
-
/library/collection/section/book/author
|
|
75
|
-
/library/collection/section/book/abstract
|
|
76
|
-
/library/collection/section/book
|
|
77
|
-
/library/collection/section
|
|
78
|
-
/library/collection
|
|
79
|
-
/library
|
|
80
|
-
)
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
XmlNodeStream::Parser::SUPPORTED_PARSERS.each do |parser_name|
|
|
84
|
-
context "with #{parser_name}" do
|
|
85
|
-
before :all do
|
|
86
|
-
@save_parser_name = XmlNodeStream::Parser.parser_name
|
|
87
|
-
XmlNodeStream::Parser.parser_name = parser_name
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
after :all do
|
|
91
|
-
XmlNodeStream::Parser.parser_name = @save_parser_name
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
it "should parse a document" do
|
|
95
|
-
begin
|
|
96
|
-
validate_text_xml(XmlNodeStream::Parser.parse(@text_xml_path))
|
|
97
|
-
rescue NotImplementedError
|
|
98
|
-
pending("#{parser_name} is not installed for testing")
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
def validate_text_xml (root)
|
|
105
|
-
validate(root, :name => "library", :children => ["authors", "collection"])
|
|
106
|
-
|
|
107
|
-
validate(root.children[0], :name => "authors", :children => ["author"] * 3)
|
|
108
|
-
validate(root.children[0].children[0], :name => "author", :attributes => {"id" => "1"}, :children => ["name"])
|
|
109
|
-
validate(root.children[0].children[0].children[0], :name => "name", :value => "Edward Gibbon")
|
|
110
|
-
validate(root.children[0].children[1], :name => "author", :attributes => {"id" => "2"}, :children => ["name"])
|
|
111
|
-
validate(root.children[0].children[1].children[0], :name => "name", :value => "Herman Melville")
|
|
112
|
-
validate(root.children[0].children[2], :name => "author", :attributes => {"id" => "3"}, :children => ["name"])
|
|
113
|
-
validate(root.children[0].children[2].children[0], :name => "name", :value => "Jack London")
|
|
114
|
-
|
|
115
|
-
validate(root.children[1], :name => "collection", :children => ["section"] * 2)
|
|
116
|
-
history = root.children[1].children[0]
|
|
117
|
-
fiction = root.children[1].children[1]
|
|
118
|
-
|
|
119
|
-
validate(history, :name => "section", :attributes => {"id" => "100", "name" => "History"}, :children => ["book"])
|
|
120
|
-
validate(history.children[0], :name => "book", :attributes => {"id" => "1"}, :children => ["title", "author", "abstract", "volumes"])
|
|
121
|
-
validate(history.children[0].children[0], :name => "title", :value => "The Decline & Fall of the Roman Empire")
|
|
122
|
-
validate(history.children[0].children[1], :name => "author", :value => nil, :attributes => {"id" => "1"})
|
|
123
|
-
validate(history.children[0].children[2], :name => "abstract", :value => "History of the fall of Rome.")
|
|
124
|
-
validate(history.children[0].children[3], :name => "volumes", :value => "6")
|
|
125
|
-
|
|
126
|
-
validate(fiction, :name => "section", :attributes => {"id" => "200", "name" => "Fiction"}, :children => ["book"] * 3)
|
|
127
|
-
validate(fiction.children[0], :name => "book", :attributes => {"id" => "2"}, :children => ["title", "author", "abstract"])
|
|
128
|
-
validate(fiction.children[0].children[0], :name => "title", :value => "Call of the Wild")
|
|
129
|
-
validate(fiction.children[0].children[1], :name => "author", :value => nil, :attributes => {"id" => "3"})
|
|
130
|
-
validate(fiction.children[0].children[2], :name => "abstract", :value => "\n A dog goes to Alaska.\n ")
|
|
131
|
-
validate(fiction.children[1], :name => "book", :attributes => {"id" => "3"}, :children => ["title", "author", "abstract"])
|
|
132
|
-
validate(fiction.children[1].children[0], :name => "title", :value => "White Fang")
|
|
133
|
-
validate(fiction.children[1].children[1], :name => "author", :value => nil, :attributes => {"id" => "3"})
|
|
134
|
-
validate(fiction.children[1].children[2], :name => "abstract", :value => "Dogs, wolves, etc.")
|
|
135
|
-
validate(fiction.children[2], :name => "book", :attributes => {"id" => "4"}, :children => ["title", "alternate_title", "author", "abstract"])
|
|
136
|
-
validate(fiction.children[2].children[0], :name => "title", :value => "Moby Dick")
|
|
137
|
-
validate(fiction.children[2].children[1], :name => "alternate_title", :value => "The Whale")
|
|
138
|
-
validate(fiction.children[2].children[2], :name => "author", :value => nil, :attributes => {"id" => "2"})
|
|
139
|
-
validate(fiction.children[2].children[3], :name => "abstract", :value => "A mad captain seeks a mysterious white whale.")
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
def validate (node, options)
|
|
143
|
-
node.name.should == options[:name]
|
|
144
|
-
node.attributes.should == (options[:attributes] || {})
|
|
145
|
-
node.value.should == (options.include?(:value) ? options[:value] : "")
|
|
146
|
-
node.children.collect{|c| c.name}.should == (options[:children] || [])
|
|
147
|
-
end
|
|
148
|
-
end
|
data/spec/selector_spec.rb
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'spec_helper'))
|
|
2
|
-
|
|
3
|
-
describe XmlNodeStream::Selector do
|
|
4
|
-
|
|
5
|
-
before :each do
|
|
6
|
-
@root = XmlNodeStream::Node.new("root")
|
|
7
|
-
@child_1 = XmlNodeStream::Node.new("child", @root)
|
|
8
|
-
@child_2 = XmlNodeStream::Node.new("child", @root)
|
|
9
|
-
@grandchild_1 = XmlNodeStream::Node.new("grandchild", @child_1, nil, "val1")
|
|
10
|
-
@grandchild_2 = XmlNodeStream::Node.new("grandchild", @child_1, nil, "val2")
|
|
11
|
-
@grandchild_3 = XmlNodeStream::Node.new("grandchild", @child_2, nil, "val3")
|
|
12
|
-
@grandchild_4 = XmlNodeStream::Node.new("grandchild", @child_2, nil, "val4")
|
|
13
|
-
@great_grandchild = XmlNodeStream::Node.new("grandchild", @grandchild_1, nil, "val1.a")
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
it "should find child nodes with a specified name" do
|
|
17
|
-
selector = XmlNodeStream::Selector.new("child")
|
|
18
|
-
selector.find(@root).should == [@child_1, @child_2]
|
|
19
|
-
selector = XmlNodeStream::Selector.new("./child")
|
|
20
|
-
selector.find(@root).should == [@child_1, @child_2]
|
|
21
|
-
selector = XmlNodeStream::Selector.new("nothing")
|
|
22
|
-
selector.find(@root).should == []
|
|
23
|
-
selector.find(@child_1).should == []
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
it "should find descendant nodes with a specified name" do
|
|
27
|
-
selector = XmlNodeStream::Selector.new(".//grandchild")
|
|
28
|
-
selector.find(@root).should == [@grandchild_1, @grandchild_2, @grandchild_3, @grandchild_4, @great_grandchild]
|
|
29
|
-
selector.find(@child_1).should == [@great_grandchild]
|
|
30
|
-
selector.find(@child_2).should == []
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
it "should find child nodes in a specified hierarchy" do
|
|
34
|
-
selector = XmlNodeStream::Selector.new("child/grandchild")
|
|
35
|
-
selector.find(@root).should == [@grandchild_1, @grandchild_2, @grandchild_3, @grandchild_4]
|
|
36
|
-
selector = XmlNodeStream::Selector.new("child/nothing")
|
|
37
|
-
selector.find(@root).should == []
|
|
38
|
-
selector.find(@child_1).should == []
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
it "should find an node itself" do
|
|
42
|
-
selector = XmlNodeStream::Selector.new(".")
|
|
43
|
-
selector.find(@child_1).should == [@child_1]
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
it "should find a parent node" do
|
|
47
|
-
selector = XmlNodeStream::Selector.new("..")
|
|
48
|
-
selector.find(@child_1).should == [@root]
|
|
49
|
-
selector.find(@root).should == []
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
it "should find an node's value" do
|
|
53
|
-
selector = XmlNodeStream::Selector.new("text()")
|
|
54
|
-
selector.find(@child_1).should == [nil]
|
|
55
|
-
selector.find(@grandchild_1).should == ["val1"]
|
|
56
|
-
selector = XmlNodeStream::Selector.new("child/grandchild/text()")
|
|
57
|
-
selector.find(@root).should == ["val1", "val2", "val3", "val4"]
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
it "should allow wildcards in the hierarchy" do
|
|
61
|
-
selector = XmlNodeStream::Selector.new("*/grandchild")
|
|
62
|
-
selector.find(@root).should == [@grandchild_1, @grandchild_2, @grandchild_3, @grandchild_4]
|
|
63
|
-
selector.find(@child_1).should == [@great_grandchild]
|
|
64
|
-
selector.find(@child_2).should == []
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
it "should find using full paths" do
|
|
68
|
-
selector = XmlNodeStream::Selector.new("/root/child")
|
|
69
|
-
selector.find(@root).should == [@child_1, @child_2]
|
|
70
|
-
selector.find(@grandchild_1).should == [@child_1, @child_2]
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
end
|
data/spec/spec_helper.rb
DELETED