dozuki 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +13 -0
- data/.gitignore +4 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +58 -0
- data/README.rdoc +116 -0
- data/Rakefile +2 -0
- data/dozuki.gemspec +29 -0
- data/features/each_accessor.feature +72 -0
- data/features/exists_accessor.feature +29 -0
- data/features/float_accessor.feature +44 -0
- data/features/get_accessor.feature +68 -0
- data/features/int_accessor.feature +43 -0
- data/features/steps/xml_steps.rb +84 -0
- data/features/string_accessor.feature +41 -0
- data/features/support/env.rb +1 -0
- data/lib/dozuki.rb +3 -0
- data/lib/dozuki/version.rb +3 -0
- data/lib/dozuki/xml.rb +12 -0
- data/lib/dozuki/xml/exceptions.rb +27 -0
- data/lib/dozuki/xml/node.rb +54 -0
- data/lib/dozuki/xml/node_collection.rb +27 -0
- data/lib/dozuki/xml/parser.rb +23 -0
- data/spec/dozuki/xml/node_collection_spec.rb +117 -0
- data/spec/dozuki/xml/node_spec.rb +257 -0
- data/spec/dozuki/xml/parser_spec.rb +78 -0
- data/spec/dozuki/xml_spec.rb +29 -0
- data/spec/spec_helper.rb +2 -0
- metadata +169 -0
data/.autotest
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Autotest.add_hook :initialize do |at|
|
|
2
|
+
root = File.dirname(__FILE__)
|
|
3
|
+
at.add_mapping(%r%^lib/(.*)\.rb$%) { |_, m|
|
|
4
|
+
["spec/#{m[1]}_spec.rb"]
|
|
5
|
+
}
|
|
6
|
+
at.add_mapping(%r%^spec/.*_spec\.rb$%) { |filename|
|
|
7
|
+
filename
|
|
8
|
+
}
|
|
9
|
+
at.add_mapping(%r%^spec/support/.*\.rb$%) { |_|
|
|
10
|
+
Dir[File.join(root, 'spec/**/*_spec.rb')]
|
|
11
|
+
}
|
|
12
|
+
nil
|
|
13
|
+
end
|
data/.gitignore
ADDED
data/.rspec
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--color
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
dozuki (0.0.1)
|
|
5
|
+
nokogiri
|
|
6
|
+
|
|
7
|
+
GEM
|
|
8
|
+
remote: http://rubygems.org/
|
|
9
|
+
specs:
|
|
10
|
+
ZenTest (4.4.2)
|
|
11
|
+
archive-tar-minitar (0.5.2)
|
|
12
|
+
autotest (4.4.6)
|
|
13
|
+
ZenTest (>= 4.4.1)
|
|
14
|
+
builder (3.0.0)
|
|
15
|
+
columnize (0.3.2)
|
|
16
|
+
cucumber (0.10.0)
|
|
17
|
+
builder (>= 2.1.2)
|
|
18
|
+
diff-lcs (~> 1.1.2)
|
|
19
|
+
gherkin (~> 2.3.2)
|
|
20
|
+
json (~> 1.4.6)
|
|
21
|
+
term-ansicolor (~> 1.0.5)
|
|
22
|
+
diff-lcs (1.1.2)
|
|
23
|
+
gherkin (2.3.3)
|
|
24
|
+
json (~> 1.4.6)
|
|
25
|
+
json (1.4.6)
|
|
26
|
+
linecache19 (0.5.11)
|
|
27
|
+
ruby_core_source (>= 0.1.4)
|
|
28
|
+
nokogiri (1.4.4)
|
|
29
|
+
rspec (2.3.0)
|
|
30
|
+
rspec-core (~> 2.3.0)
|
|
31
|
+
rspec-expectations (~> 2.3.0)
|
|
32
|
+
rspec-mocks (~> 2.3.0)
|
|
33
|
+
rspec-core (2.3.1)
|
|
34
|
+
rspec-expectations (2.3.0)
|
|
35
|
+
diff-lcs (~> 1.1.2)
|
|
36
|
+
rspec-mocks (2.3.0)
|
|
37
|
+
ruby-debug-base19 (0.11.24)
|
|
38
|
+
columnize (>= 0.3.1)
|
|
39
|
+
linecache19 (>= 0.5.11)
|
|
40
|
+
ruby_core_source (>= 0.1.4)
|
|
41
|
+
ruby-debug19 (0.11.6)
|
|
42
|
+
columnize (>= 0.3.1)
|
|
43
|
+
linecache19 (>= 0.5.11)
|
|
44
|
+
ruby-debug-base19 (>= 0.11.19)
|
|
45
|
+
ruby_core_source (0.1.4)
|
|
46
|
+
archive-tar-minitar (>= 0.5.2)
|
|
47
|
+
term-ansicolor (1.0.5)
|
|
48
|
+
|
|
49
|
+
PLATFORMS
|
|
50
|
+
ruby
|
|
51
|
+
|
|
52
|
+
DEPENDENCIES
|
|
53
|
+
autotest
|
|
54
|
+
cucumber
|
|
55
|
+
dozuki!
|
|
56
|
+
nokogiri
|
|
57
|
+
rspec
|
|
58
|
+
ruby-debug19
|
data/README.rdoc
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
= Dozuki
|
|
2
|
+
|
|
3
|
+
A Nokogiri wrapper that simplifies commonly occurring tasks.
|
|
4
|
+
|
|
5
|
+
== What does it do?
|
|
6
|
+
|
|
7
|
+
Dozuki removes the repetitive tasks from parsing XML documents with XPaths such as:
|
|
8
|
+
|
|
9
|
+
* extracting ints, floats, strings etc.
|
|
10
|
+
* accessing single nodes
|
|
11
|
+
* checking whether an element exists
|
|
12
|
+
* iterating through nodes
|
|
13
|
+
* iterating nodes and extracting ints, floats and strings etc.
|
|
14
|
+
|
|
15
|
+
It's mainly sugar for reducing the amount of chaining on calls like:
|
|
16
|
+
|
|
17
|
+
doc.xpath('/my/xpath').first.to_i
|
|
18
|
+
|
|
19
|
+
== How do I use it?
|
|
20
|
+
|
|
21
|
+
Parsing XML (currently only supports strings):
|
|
22
|
+
|
|
23
|
+
doc = Dozuki::XML.parse(xml_string)
|
|
24
|
+
|
|
25
|
+
This documents supports the Dozuki extensions for:
|
|
26
|
+
|
|
27
|
+
=== Extracting a single node
|
|
28
|
+
|
|
29
|
+
The get methods takes an xpath and returns the first node that matches the xpath:
|
|
30
|
+
|
|
31
|
+
doc.get('/my/xpath')
|
|
32
|
+
|
|
33
|
+
If the node can't be found then an exception is raised.
|
|
34
|
+
|
|
35
|
+
=== Extracting a single node of a certain type
|
|
36
|
+
|
|
37
|
+
The following methods take the first node that matches the xpath and returns the formatted result:
|
|
38
|
+
|
|
39
|
+
doc.string('/my/xpath') # surrounding whitespace stripped
|
|
40
|
+
doc.float('/my/xpath')
|
|
41
|
+
doc.int('/my/xpath')
|
|
42
|
+
|
|
43
|
+
These functions are to replace calls using plain Nokogiri such as:
|
|
44
|
+
|
|
45
|
+
doc.xpath('/my/xpath').first.to_i
|
|
46
|
+
|
|
47
|
+
=== Checking whether an element exists
|
|
48
|
+
|
|
49
|
+
doc.exists?('/my/xpath')
|
|
50
|
+
|
|
51
|
+
=== Iterating through nodes
|
|
52
|
+
|
|
53
|
+
Dozuki also provides a slightly more succinct way to 'each' an xpath:
|
|
54
|
+
|
|
55
|
+
doc.each('/my/xpath') do |node|
|
|
56
|
+
# do something
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
=== Iterating through node text and parsing
|
|
60
|
+
|
|
61
|
+
There are also simple ways to extract formatted text of a series of nodes with an each
|
|
62
|
+
|
|
63
|
+
doc.each('/my/xpath').as_string do |node|
|
|
64
|
+
# string with surrounding whitespace stripped
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
doc.each('/my/xpath').as_int do |node|
|
|
68
|
+
# int
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
doc.each('/my/xpath').as_float do |node|
|
|
72
|
+
# float
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
== Playing nicely with Nokogiri
|
|
76
|
+
|
|
77
|
+
Dozuki will proxy any calls not recognised onto the underlying Nokogiri structure, including responds_to?, allowing you to treat it like any other Nokogiri document.
|
|
78
|
+
|
|
79
|
+
== More documentation
|
|
80
|
+
|
|
81
|
+
More features are described in the.. {features}[https://github.com/jamesalmond/dozuki/tree/master/features]
|
|
82
|
+
|
|
83
|
+
== Contributing to Dozuki
|
|
84
|
+
|
|
85
|
+
* Fork the project.
|
|
86
|
+
* Add tests that cover the new feature or bug fix.
|
|
87
|
+
* Make your feature addition or bug fix.
|
|
88
|
+
* Commit, do not mess with rakefile, version, or history. (if you want to have your own version, that is fine but bump version in a commit by itself in another branch so I can ignore when I pull)
|
|
89
|
+
* Send me a pull request. Bonus points for topic branches.
|
|
90
|
+
|
|
91
|
+
== LICENSE:
|
|
92
|
+
|
|
93
|
+
(The MIT License)
|
|
94
|
+
|
|
95
|
+
Copyright (c) 2010:
|
|
96
|
+
|
|
97
|
+
* {James Almond}[http://jamesalmond.com]
|
|
98
|
+
|
|
99
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
100
|
+
a copy of this software and associated documentation files (the
|
|
101
|
+
'Software'), to deal in the Software without restriction, including
|
|
102
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
103
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
104
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
105
|
+
the following conditions:
|
|
106
|
+
|
|
107
|
+
The above copyright notice and this permission notice shall be
|
|
108
|
+
included in all copies or substantial portions of the Software.
|
|
109
|
+
|
|
110
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
111
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
112
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
113
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
114
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
115
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
116
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
data/dozuki.gemspec
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
|
3
|
+
require "dozuki/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |s|
|
|
6
|
+
s.name = "dozuki"
|
|
7
|
+
s.version = Dozuki::VERSION
|
|
8
|
+
s.platform = Gem::Platform::RUBY
|
|
9
|
+
s.authors = ["James Almond"]
|
|
10
|
+
s.email = ["james@jamesalmond.com"]
|
|
11
|
+
s.homepage = "https://github.com/jamesalmond/dozuki"
|
|
12
|
+
s.summary = %q{An XPath syntactic sugar wrapper for Nokogiri}
|
|
13
|
+
s.description = %q{A simple way of extracting various elements from an XML document using XPaths}
|
|
14
|
+
|
|
15
|
+
s.rubyforge_project = "dozuki"
|
|
16
|
+
|
|
17
|
+
s.files = `git ls-files`.split("\n")
|
|
18
|
+
s.test_files = `git ls-files -- {spec,features}/* .autotest`.split("\n")
|
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
|
20
|
+
s.require_paths = ["lib"]
|
|
21
|
+
|
|
22
|
+
s.add_dependency("nokogiri")
|
|
23
|
+
|
|
24
|
+
s.add_development_dependency("rspec")
|
|
25
|
+
s.add_development_dependency("cucumber")
|
|
26
|
+
s.add_development_dependency("ruby-debug19")
|
|
27
|
+
s.add_development_dependency("autotest")
|
|
28
|
+
|
|
29
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Feature: Iterating through nodes
|
|
2
|
+
In order to provide simpler way of accessing groups of nods
|
|
3
|
+
As a traverser
|
|
4
|
+
I want to access nodes using the each method and an xpath
|
|
5
|
+
|
|
6
|
+
Scenario: using each to traverse a document
|
|
7
|
+
When I parse the XML:
|
|
8
|
+
"""
|
|
9
|
+
<root>
|
|
10
|
+
<name>St. George's Arms</name>
|
|
11
|
+
<average_price>20.32</average_price>
|
|
12
|
+
<number_of_beers>2</number_of_beers>
|
|
13
|
+
<rooms>
|
|
14
|
+
<room>SINGLE</room>
|
|
15
|
+
<room>Double</room>
|
|
16
|
+
</rooms>
|
|
17
|
+
</root>
|
|
18
|
+
"""
|
|
19
|
+
And I call "each('/root/rooms/room')" on the document and collect the results
|
|
20
|
+
Then the results should contain a node with the text "SINGLE"
|
|
21
|
+
And the results should contain a node with the text "Double"
|
|
22
|
+
|
|
23
|
+
Scenario: using each to traverse a document and getting the string elements
|
|
24
|
+
When I parse the XML:
|
|
25
|
+
"""
|
|
26
|
+
<root>
|
|
27
|
+
<name>St. George's Arms</name>
|
|
28
|
+
<average_price>20.32</average_price>
|
|
29
|
+
<number_of_beers>2</number_of_beers>
|
|
30
|
+
<rooms>
|
|
31
|
+
<room>SINGLE</room>
|
|
32
|
+
<room>Double</room>
|
|
33
|
+
</rooms>
|
|
34
|
+
</root>
|
|
35
|
+
"""
|
|
36
|
+
And I call "each('/root/rooms/room').as_string" on the document and collect the results
|
|
37
|
+
Then the results should contain "SINGLE"
|
|
38
|
+
And the results should contain "Double"
|
|
39
|
+
|
|
40
|
+
Scenario: using each to traverse a document and getting the integer elements
|
|
41
|
+
When I parse the XML:
|
|
42
|
+
"""
|
|
43
|
+
<root>
|
|
44
|
+
<name>St. George's Arms</name>
|
|
45
|
+
<average_price>20.32</average_price>
|
|
46
|
+
<number_of_beers>2</number_of_beers>
|
|
47
|
+
<rooms>
|
|
48
|
+
<room>5</room>
|
|
49
|
+
<room>7</room>
|
|
50
|
+
</rooms>
|
|
51
|
+
</root>
|
|
52
|
+
"""
|
|
53
|
+
And I call "each('/root/rooms/room').as_int" on the document and collect the results
|
|
54
|
+
Then the results should contain 5
|
|
55
|
+
And the results should contain 5
|
|
56
|
+
|
|
57
|
+
Scenario: using each to traverse a document and getting the float elements
|
|
58
|
+
When I parse the XML:
|
|
59
|
+
"""
|
|
60
|
+
<root>
|
|
61
|
+
<name>St. George's Arms</name>
|
|
62
|
+
<average_price>20.32</average_price>
|
|
63
|
+
<number_of_beers>2</number_of_beers>
|
|
64
|
+
<rooms>
|
|
65
|
+
<room>53.50</room>
|
|
66
|
+
<room>799.78</room>
|
|
67
|
+
</rooms>
|
|
68
|
+
</root>
|
|
69
|
+
"""
|
|
70
|
+
And I call "each('/root/rooms/room').as_float" on the document and collect the results
|
|
71
|
+
Then the results should contain 53.50
|
|
72
|
+
And the results should contain 799.78
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Feature: Exists accessor
|
|
2
|
+
In order to easily determine whether a node exists in a document
|
|
3
|
+
As a traverser
|
|
4
|
+
I want to check whether a node exists
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
Scenario: the node exists
|
|
8
|
+
When I parse the XML:
|
|
9
|
+
"""
|
|
10
|
+
<root>
|
|
11
|
+
<name>St. George's Arms</name>
|
|
12
|
+
<average_price>20.32</average_price>
|
|
13
|
+
<number_of_beers>2</number_of_beers>
|
|
14
|
+
</root>
|
|
15
|
+
"""
|
|
16
|
+
And I call "exists?('/root/number_of_beers')" on the document
|
|
17
|
+
Then the result should be true
|
|
18
|
+
|
|
19
|
+
Scenario: the node doesn't exist
|
|
20
|
+
When I parse the XML:
|
|
21
|
+
"""
|
|
22
|
+
<root>
|
|
23
|
+
<name>St. George's Arms</name>
|
|
24
|
+
<average_price>20.32</average_price>
|
|
25
|
+
<number_of_beers>2</number_of_beers>
|
|
26
|
+
</root>
|
|
27
|
+
"""
|
|
28
|
+
And I call "exists?('/root/food')" on the document
|
|
29
|
+
Then the result should be false
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Feature: Getting floats from the document
|
|
2
|
+
In order to provide simpler way of getting floats from a node
|
|
3
|
+
As a traverser
|
|
4
|
+
I want to access nodes using the float method and an xpath
|
|
5
|
+
|
|
6
|
+
Scenario: getting the float of a single node
|
|
7
|
+
When I parse the XML:
|
|
8
|
+
"""
|
|
9
|
+
<root>
|
|
10
|
+
<name>St. George's Arms</name>
|
|
11
|
+
<average_price>20.32</average_price>
|
|
12
|
+
<number_of_beers>2</number_of_beers>
|
|
13
|
+
</root>
|
|
14
|
+
"""
|
|
15
|
+
And I call "float('/root/average_price')" on the document
|
|
16
|
+
Then the result should be 20.32
|
|
17
|
+
|
|
18
|
+
Scenario: getting the float of a single node with whitespace
|
|
19
|
+
When I parse the XML:
|
|
20
|
+
"""
|
|
21
|
+
<root>
|
|
22
|
+
<name>St. George's Arms</name>
|
|
23
|
+
<average_price>
|
|
24
|
+
20.32
|
|
25
|
+
</average_price>
|
|
26
|
+
<number_of_beers>2</number_of_beers>
|
|
27
|
+
</root>
|
|
28
|
+
"""
|
|
29
|
+
And I call "float('/root/average_price')" on the document
|
|
30
|
+
Then the result should be 20.32
|
|
31
|
+
|
|
32
|
+
Scenario: getting a non-existent node
|
|
33
|
+
When I parse the XML:
|
|
34
|
+
"""
|
|
35
|
+
<root>
|
|
36
|
+
<name>St. George's Arms</name>
|
|
37
|
+
<average_price>20.32</average_price>
|
|
38
|
+
<number_of_beers>2</number_of_beers>
|
|
39
|
+
</root>
|
|
40
|
+
"""
|
|
41
|
+
Then calling "float('//something/missing')" on the document should raise a "NotFound" error
|
|
42
|
+
And the error should have the xpath "//something/missing"
|
|
43
|
+
And the error should have a stored node
|
|
44
|
+
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Feature: Getting nodes from the document
|
|
2
|
+
In order to provide a unified interface to the xml objects
|
|
3
|
+
As a traverser
|
|
4
|
+
I want to access nodes using the get method and an xpath
|
|
5
|
+
|
|
6
|
+
Scenario: getting a single node
|
|
7
|
+
When I parse the XML:
|
|
8
|
+
"""
|
|
9
|
+
<root>
|
|
10
|
+
<name>St. George's Arms</name>
|
|
11
|
+
<average_price>20.32</average_price>
|
|
12
|
+
<number_of_beers>2</number_of_beers>
|
|
13
|
+
<rooms>
|
|
14
|
+
<room>SINGLE</room>
|
|
15
|
+
<room>Double</room>
|
|
16
|
+
</rooms>
|
|
17
|
+
</root>
|
|
18
|
+
"""
|
|
19
|
+
And I call "get('//rooms')" on the document
|
|
20
|
+
Then the result should be a "Dozuki::XML::Node"
|
|
21
|
+
And the result should have 2 elements
|
|
22
|
+
|
|
23
|
+
Scenario: getting a single node but accessing it with a block
|
|
24
|
+
When I parse the XML:
|
|
25
|
+
"""
|
|
26
|
+
<root>
|
|
27
|
+
<name>St. George's Arms</name>
|
|
28
|
+
<average_price>20.32</average_price>
|
|
29
|
+
<number_of_beers>2</number_of_beers>
|
|
30
|
+
<rooms>
|
|
31
|
+
<room>SINGLE</room>
|
|
32
|
+
<room>Double</room>
|
|
33
|
+
</rooms>
|
|
34
|
+
</root>
|
|
35
|
+
"""
|
|
36
|
+
And I call "get('//rooms')" on the document with a block
|
|
37
|
+
Then the block parameter should be a "Dozuki::XML::Node"
|
|
38
|
+
And the parameter should have 2 elements
|
|
39
|
+
|
|
40
|
+
Scenario: getting the first of multiple nodes
|
|
41
|
+
When I parse the XML:
|
|
42
|
+
"""
|
|
43
|
+
<root>
|
|
44
|
+
<name>St. George's Arms</name>
|
|
45
|
+
<average_price>20.32</average_price>
|
|
46
|
+
<number_of_beers>2</number_of_beers>
|
|
47
|
+
<rooms>
|
|
48
|
+
<room>SINGLE</room>
|
|
49
|
+
<room>Double</room>
|
|
50
|
+
</rooms>
|
|
51
|
+
</root>
|
|
52
|
+
"""
|
|
53
|
+
And I call "get('//rooms/room')" on the document
|
|
54
|
+
Then the result should have the text "SINGLE"
|
|
55
|
+
|
|
56
|
+
Scenario: getting a non-existent node
|
|
57
|
+
When I parse the XML:
|
|
58
|
+
"""
|
|
59
|
+
<root>
|
|
60
|
+
<name>St. George's Arms</name>
|
|
61
|
+
<average_price>20.32</average_price>
|
|
62
|
+
<number_of_beers>2</number_of_beers>
|
|
63
|
+
</root>
|
|
64
|
+
"""
|
|
65
|
+
Then calling "get('//something/missing')" on the document should raise a "NotFound" error
|
|
66
|
+
And the error should have the xpath "//something/missing"
|
|
67
|
+
And the error should have a stored node
|
|
68
|
+
And the error should have a stored node
|