saxerator 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +1 -0
- data/lib/saxerator/document.rb +4 -0
- data/lib/saxerator/parser/depth_latch.rb +2 -2
- data/lib/saxerator/parser/latched_accumulator.rb +12 -12
- data/lib/saxerator/parser/within_element_latch.rb +30 -0
- data/lib/saxerator/version.rb +1 -1
- data/lib/saxerator.rb +1 -0
- data/spec/lib/saxerator_spec.rb +24 -5
- metadata +2 -1
data/README.md
CHANGED
@@ -42,6 +42,7 @@ You can combine predicates to isolate just the tags you want.
|
|
42
42
|
```ruby
|
43
43
|
parser.for_tag(:name).each { |x| all_the_names_in_a_document << x }
|
44
44
|
parser.for_tag(:name).at_depth(2).each { |x| names_nested_under_document_root << x }
|
45
|
+
parser.for_tag(:name).within(:author).each { |x| author_names << x }
|
45
46
|
```
|
46
47
|
|
47
48
|
Known Issues
|
data/lib/saxerator/document.rb
CHANGED
@@ -16,6 +16,10 @@ module Saxerator
|
|
16
16
|
Document.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
|
17
17
|
end
|
18
18
|
|
19
|
+
def within(tag)
|
20
|
+
Document.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
|
21
|
+
end
|
22
|
+
|
19
23
|
def each(&block)
|
20
24
|
document = Parser::LatchedAccumulator.new(@config, @latches, block)
|
21
25
|
parser = ::Nokogiri::XML::SAX::Parser.new document
|
@@ -6,14 +6,14 @@ module Saxerator
|
|
6
6
|
@actual_depth = 0
|
7
7
|
end
|
8
8
|
|
9
|
-
def start_element
|
9
|
+
def start_element(_, __)
|
10
10
|
@actual_depth += 1
|
11
11
|
if @actual_depth == @depth
|
12
12
|
open
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
def end_element
|
16
|
+
def end_element(_)
|
17
17
|
@actual_depth -= 1
|
18
18
|
end
|
19
19
|
end
|
@@ -11,19 +11,19 @@ module Saxerator
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def reset_latches
|
14
|
-
@latches.each
|
14
|
+
@latches.each { |latch| latch.reset }
|
15
15
|
end
|
16
16
|
|
17
17
|
def check_latches_and_passthrough(method, *args)
|
18
18
|
@latches.each { |latch| latch.send(method, *args) }
|
19
|
-
if @latches.all?
|
19
|
+
if @latches.all? { |latch| latch.open? }
|
20
20
|
@accumulator.send(method, *args)
|
21
21
|
else
|
22
22
|
reset_latches
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
def xmldecl
|
26
|
+
def xmldecl(version, encoding, standalone)
|
27
27
|
check_latches_and_passthrough(:xmldecl, version, encoding, standalone)
|
28
28
|
end
|
29
29
|
|
@@ -35,39 +35,39 @@ module Saxerator
|
|
35
35
|
check_latches_and_passthrough(:end_document)
|
36
36
|
end
|
37
37
|
|
38
|
-
def start_element
|
38
|
+
def start_element(name, attrs = [])
|
39
39
|
check_latches_and_passthrough(:start_element, name, attrs)
|
40
40
|
end
|
41
41
|
|
42
|
-
def end_element
|
42
|
+
def end_element(name)
|
43
43
|
check_latches_and_passthrough(:end_element, name)
|
44
44
|
end
|
45
45
|
|
46
|
-
def start_element_namespace
|
46
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
47
47
|
check_latches_and_passthrough(:start_element_namespace, name, attrs, prefix, uri, ns)
|
48
48
|
end
|
49
49
|
|
50
|
-
def end_element_namespace
|
50
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
51
51
|
check_latches_and_passthrough(:end_element_namespace, name, prefix, uri)
|
52
52
|
end
|
53
53
|
|
54
|
-
def characters
|
54
|
+
def characters(string)
|
55
55
|
check_latches_and_passthrough(:characters, string)
|
56
56
|
end
|
57
57
|
|
58
|
-
def comment
|
58
|
+
def comment(string)
|
59
59
|
check_latches_and_passthrough(:comment, string)
|
60
60
|
end
|
61
61
|
|
62
|
-
def warning
|
62
|
+
def warning(string)
|
63
63
|
check_latches_and_passthrough(:warning, string)
|
64
64
|
end
|
65
65
|
|
66
|
-
def error
|
66
|
+
def error(string)
|
67
67
|
check_latches_and_passthrough(:error, string)
|
68
68
|
end
|
69
69
|
|
70
|
-
def cdata_block
|
70
|
+
def cdata_block(string)
|
71
71
|
check_latches_and_passthrough(:cdata_block, string)
|
72
72
|
end
|
73
73
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Saxerator
|
2
|
+
module Parser
|
3
|
+
class WithinElementLatch < DocumentLatch
|
4
|
+
def initialize(name)
|
5
|
+
@name = name
|
6
|
+
@inner_depth = 0
|
7
|
+
end
|
8
|
+
|
9
|
+
def start_element name, _
|
10
|
+
if @inner_depth == 0
|
11
|
+
if name == @name
|
12
|
+
@inner_depth += 1
|
13
|
+
end
|
14
|
+
else
|
15
|
+
open if @inner_depth == 1
|
16
|
+
@inner_depth += 1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def end_element _
|
21
|
+
if @inner_depth > 0
|
22
|
+
@inner_depth -= 1
|
23
|
+
close if @inner_depth == 0
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def reset; end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/saxerator/version.rb
CHANGED
data/lib/saxerator.rb
CHANGED
@@ -10,6 +10,7 @@ require 'saxerator/parser/accumulator'
|
|
10
10
|
require 'saxerator/parser/document_latch'
|
11
11
|
require 'saxerator/parser/element_name_latch'
|
12
12
|
require 'saxerator/parser/depth_latch'
|
13
|
+
require 'saxerator/parser/within_element_latch'
|
13
14
|
require 'saxerator/parser/latched_accumulator'
|
14
15
|
|
15
16
|
module Saxerator
|
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -35,7 +35,7 @@ describe Saxerator do
|
|
35
35
|
context "with a string with an element at multiple depths" do
|
36
36
|
let(:xml) do
|
37
37
|
<<-eos
|
38
|
-
<
|
38
|
+
<publications>
|
39
39
|
<book>
|
40
40
|
<name>How to eat an airplane</name>
|
41
41
|
<author>
|
@@ -49,7 +49,13 @@ describe Saxerator do
|
|
49
49
|
<name>Jeanne Clarewood</name>
|
50
50
|
</author>
|
51
51
|
</book>
|
52
|
-
|
52
|
+
<article>
|
53
|
+
<name>Is our children learning?</name>
|
54
|
+
<author>
|
55
|
+
<name>Hazel Nutt</name>
|
56
|
+
</author>
|
57
|
+
</article>
|
58
|
+
</publication>
|
53
59
|
eos
|
54
60
|
end
|
55
61
|
|
@@ -57,15 +63,28 @@ describe Saxerator do
|
|
57
63
|
results = []
|
58
64
|
subject.at_depth(3).each { |x| results << x }
|
59
65
|
results.should == [
|
60
|
-
'How to eat an airplane', {'name' => ['Leviticus Alabaster', 'Eunice Diesel']},
|
61
|
-
'To wallop a horse in the face', {'name' => 'Jeanne Clarewood'}
|
66
|
+
'How to eat an airplane', { 'name' => ['Leviticus Alabaster', 'Eunice Diesel'] },
|
67
|
+
'To wallop a horse in the face', { 'name' => 'Jeanne Clarewood' },
|
68
|
+
'Is our children learning?', { 'name' => 'Hazel Nutt' }
|
62
69
|
]
|
63
70
|
end
|
64
71
|
|
65
72
|
it "should only parse the requested tag depth and tag" do
|
66
73
|
results = []
|
67
74
|
subject.at_depth(3).for_tag(:name).each { |x| results << x }
|
68
|
-
results.should == ['How to eat an airplane', 'To wallop a horse in the face']
|
75
|
+
results.should == ['How to eat an airplane', 'To wallop a horse in the face', 'Is our children learning?']
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should only parse tags nested inside the specified tag" do
|
79
|
+
results = []
|
80
|
+
subject.within(:article).each { |x| results << x }
|
81
|
+
results.should == ['Is our children learning?', { 'name' => 'Hazel Nutt' }]
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should only parse specified tags nested inside a specified tag" do
|
85
|
+
results = []
|
86
|
+
subject.for_tag(:name).within(:article).each { |x| results << x }
|
87
|
+
results.should == ['Is our children learning?', 'Hazel Nutt' ]
|
69
88
|
end
|
70
89
|
end
|
71
90
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -85,6 +85,7 @@ files:
|
|
85
85
|
- lib/saxerator/parser/document_latch.rb
|
86
86
|
- lib/saxerator/parser/element_name_latch.rb
|
87
87
|
- lib/saxerator/parser/latched_accumulator.rb
|
88
|
+
- lib/saxerator/parser/within_element_latch.rb
|
88
89
|
- lib/saxerator/string_with_attributes.rb
|
89
90
|
- lib/saxerator/version.rb
|
90
91
|
- lib/saxerator/xml_node.rb
|