saxerator 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +20 -4
- data/lib/saxerator.rb +4 -3
- data/lib/saxerator/dsl.rb +8 -4
- data/lib/saxerator/parser/{depth_latch.rb → at_depth_latch.rb} +2 -2
- data/lib/saxerator/parser/{element_name_latch.rb → for_tag_latch.rb} +1 -1
- data/lib/saxerator/parser/with_attribute_latch.rb +25 -0
- data/lib/saxerator/parser/{within_element_latch.rb → within_latch.rb} +1 -1
- data/lib/saxerator/version.rb +1 -1
- data/spec/lib/saxerator_spec.rb +19 -7
- metadata +6 -5
data/README.md
CHANGED
@@ -8,15 +8,31 @@ independently.
|
|
8
8
|
|
9
9
|
Each xml chunk is parsed into a JSON-like Ruby Hash structure for consumption.
|
10
10
|
|
11
|
-
Examples
|
12
|
-
--------
|
13
11
|
You can parse any valid xml in 3 simple steps.
|
14
12
|
|
15
13
|
1. Initialize the parser
|
16
|
-
1.
|
17
|
-
1. Perform your work in an
|
14
|
+
1. Specify which tag you care about using a simple DSL
|
15
|
+
1. Perform your work in an `each` block, or using any [Enumerable](http://apidock.com/ruby/Enumerable)
|
18
16
|
method
|
19
17
|
|
18
|
+
The DSL
|
19
|
+
-------
|
20
|
+
The DSL consists of predicates that may be combined to describe which elements the parser should enumerate over.
|
21
|
+
Saxerator will only enumerate over chunks of xml that match all of the combined predicates (see Examples section
|
22
|
+
for added clarity).
|
23
|
+
|
24
|
+
| Predicate | Explanation |
|
25
|
+
|:----------------|:------------|
|
26
|
+
| `all` | Returns the entire document parsed into a hash. Cannot combine with other predicates
|
27
|
+
| `for_tag(name)` | Elements whose name matches the given `name`
|
28
|
+
| `at_depth(n)` | Elements `n` levels deep inside the root of an xml document. The root element itself is `n = 0`
|
29
|
+
| `within(name)` | Elements nested anywhere within an element with the given `name`
|
30
|
+
| `child_of(name)`| Elements that are direct children of an element with the given `name`
|
31
|
+
| `with_attribute(name, value)` | Elements with a given `name` and `value`. If no `value` is given, matches any element with the specified attribute name present
|
32
|
+
|
33
|
+
|
34
|
+
Examples
|
35
|
+
--------
|
20
36
|
```ruby
|
21
37
|
parser = Saxerator.parser(File.new("rss.xml"))
|
22
38
|
|
data/lib/saxerator.rb
CHANGED
@@ -7,11 +7,12 @@ require 'saxerator/hash_with_attributes'
|
|
7
7
|
require 'saxerator/xml_node'
|
8
8
|
|
9
9
|
require 'saxerator/parser/accumulator'
|
10
|
-
require 'saxerator/parser/
|
11
|
-
require 'saxerator/parser/
|
12
|
-
require 'saxerator/parser/
|
10
|
+
require 'saxerator/parser/for_tag_latch'
|
11
|
+
require 'saxerator/parser/at_depth_latch'
|
12
|
+
require 'saxerator/parser/within_latch'
|
13
13
|
require 'saxerator/parser/latched_accumulator'
|
14
14
|
require 'saxerator/parser/child_of_latch'
|
15
|
+
require 'saxerator/parser/with_attribute_latch'
|
15
16
|
|
16
17
|
module Saxerator
|
17
18
|
extend self
|
data/lib/saxerator/dsl.rb
CHANGED
@@ -1,19 +1,23 @@
|
|
1
1
|
module Saxerator
|
2
2
|
module DSL
|
3
3
|
def for_tag(tag)
|
4
|
-
specify
|
4
|
+
specify Parser::ForTagLatch.new(tag.to_s)
|
5
5
|
end
|
6
6
|
|
7
7
|
def at_depth(depth)
|
8
|
-
specify
|
8
|
+
specify Parser::AtDepthLatch.new(depth.to_i)
|
9
9
|
end
|
10
10
|
|
11
11
|
def within(tag)
|
12
|
-
specify
|
12
|
+
specify Parser::WithinLatch.new(tag.to_s)
|
13
13
|
end
|
14
14
|
|
15
15
|
def child_of(tag)
|
16
|
-
specify
|
16
|
+
specify Parser::ChildOfLatch.new(tag.to_s)
|
17
|
+
end
|
18
|
+
|
19
|
+
def with_attribute(name, value = nil)
|
20
|
+
specify Parser::WithAttributeLatch.new(name.to_s, !!value ? value.to_s : nil)
|
17
21
|
end
|
18
22
|
|
19
23
|
private
|
@@ -2,10 +2,10 @@ require 'saxerator/parser/document_latch'
|
|
2
2
|
|
3
3
|
module Saxerator
|
4
4
|
module Parser
|
5
|
-
class
|
5
|
+
class AtDepthLatch < DocumentLatch
|
6
6
|
def initialize(depth)
|
7
7
|
@target_depth = depth
|
8
|
-
@current_depth =
|
8
|
+
@current_depth = -1
|
9
9
|
end
|
10
10
|
|
11
11
|
def start_element(_, __)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'saxerator/parser/document_latch'
|
2
|
+
|
3
|
+
module Saxerator
|
4
|
+
module Parser
|
5
|
+
class WithAttributeLatch < DocumentLatch
|
6
|
+
def initialize(name, value)
|
7
|
+
@attr_name = name
|
8
|
+
@attr_value = value
|
9
|
+
end
|
10
|
+
|
11
|
+
def start_element _, attributes
|
12
|
+
attributes = Hash[attributes]
|
13
|
+
if attributes[@attr_name] && (@attr_value.nil? || attributes[@attr_name] == @attr_value)
|
14
|
+
open
|
15
|
+
else
|
16
|
+
close
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def end_element _
|
21
|
+
close
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/saxerator/version.rb
CHANGED
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -43,8 +43,8 @@ describe Saxerator do
|
|
43
43
|
<book>
|
44
44
|
<name>How to eat an airplane</name>
|
45
45
|
<author>
|
46
|
-
<name>Leviticus Alabaster</name>
|
47
|
-
<name>Eunice Diesel</name>
|
46
|
+
<name type="primary">Leviticus Alabaster</name>
|
47
|
+
<name type="foreword">Eunice Diesel</name>
|
48
48
|
</author>
|
49
49
|
</book>
|
50
50
|
<book>
|
@@ -64,7 +64,7 @@ describe Saxerator do
|
|
64
64
|
end
|
65
65
|
|
66
66
|
it "should only parse the requested tag depth" do
|
67
|
-
subject.at_depth(
|
67
|
+
subject.at_depth(2).inject([], :<<).should == [
|
68
68
|
'How to eat an airplane', { 'name' => ['Leviticus Alabaster', 'Eunice Diesel'] },
|
69
69
|
'To wallop a horse in the face', { 'name' => 'Jeanne Clarewood' },
|
70
70
|
'Is our children learning?', { 'name' => 'Hazel Nutt' }
|
@@ -72,7 +72,7 @@ describe Saxerator do
|
|
72
72
|
end
|
73
73
|
|
74
74
|
it "should only parse the requested tag depth and tag" do
|
75
|
-
subject.at_depth(
|
75
|
+
subject.at_depth(2).for_tag(:name).inject([], :<<).should == [
|
76
76
|
'How to eat an airplane',
|
77
77
|
'To wallop a horse in the face',
|
78
78
|
'Is our children learning?'
|
@@ -92,6 +92,17 @@ describe Saxerator do
|
|
92
92
|
'Hazel Nutt'
|
93
93
|
]
|
94
94
|
end
|
95
|
+
|
96
|
+
it "should match tags with the specified attributes" do
|
97
|
+
subject.with_attribute(:type).inject([], :<<).should == [
|
98
|
+
'Leviticus Alabaster',
|
99
|
+
'Eunice Diesel'
|
100
|
+
]
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should match tags with the specified attributes" do
|
104
|
+
subject.with_attribute(:type, :primary).inject([], :<<).should == ['Leviticus Alabaster']
|
105
|
+
end
|
95
106
|
end
|
96
107
|
|
97
108
|
context "with a grand child" do
|
@@ -142,17 +153,18 @@ describe Saxerator do
|
|
142
153
|
end
|
143
154
|
end
|
144
155
|
|
156
|
+
# Verifying the basic parsing behaviors (strings, hashes, arrays, attributes, character entity decoding)
|
145
157
|
context "with a file with nested elements" do
|
146
158
|
let(:xml) { fixture_file('nested_elements.xml') }
|
147
159
|
subject { parser.for_tag(:entry).first }
|
148
160
|
|
149
161
|
specify { subject['title'].should == 'How to eat an airplane' }
|
150
162
|
specify { subject['author'].should == {'name' => 'Soulcutter'} }
|
163
|
+
|
151
164
|
specify { subject['contributor'].should == [{'name' => 'Jane Doe'}, {'name' => 'Leviticus Alabaster'}] }
|
152
|
-
specify { subject['content'].should == "<p>Airplanes are very large — this can present difficulty in digestion.</p>"}
|
153
|
-
specify { subject['content'].attributes['type'].should == 'html' }
|
154
165
|
specify { subject['contributor'][0].attributes['type'].should == 'primary' }
|
155
|
-
|
166
|
+
|
167
|
+
specify { subject['content'].should == "<p>Airplanes are very large — this can present difficulty in digestion.</p>"}
|
156
168
|
end
|
157
169
|
end
|
158
170
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -82,12 +82,13 @@ files:
|
|
82
82
|
- lib/saxerator/full_document.rb
|
83
83
|
- lib/saxerator/hash_with_attributes.rb
|
84
84
|
- lib/saxerator/parser/accumulator.rb
|
85
|
+
- lib/saxerator/parser/at_depth_latch.rb
|
85
86
|
- lib/saxerator/parser/child_of_latch.rb
|
86
|
-
- lib/saxerator/parser/depth_latch.rb
|
87
87
|
- lib/saxerator/parser/document_latch.rb
|
88
|
-
- lib/saxerator/parser/
|
88
|
+
- lib/saxerator/parser/for_tag_latch.rb
|
89
89
|
- lib/saxerator/parser/latched_accumulator.rb
|
90
|
-
- lib/saxerator/parser/
|
90
|
+
- lib/saxerator/parser/with_attribute_latch.rb
|
91
|
+
- lib/saxerator/parser/within_latch.rb
|
91
92
|
- lib/saxerator/string_with_attributes.rb
|
92
93
|
- lib/saxerator/version.rb
|
93
94
|
- lib/saxerator/xml_node.rb
|