saxerator 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +1 -0
- data/lib/saxerator/document.rb +4 -0
- data/lib/saxerator/parser/depth_latch.rb +2 -2
- data/lib/saxerator/parser/latched_accumulator.rb +12 -12
- data/lib/saxerator/parser/within_element_latch.rb +30 -0
- data/lib/saxerator/version.rb +1 -1
- data/lib/saxerator.rb +1 -0
- data/spec/lib/saxerator_spec.rb +24 -5
- metadata +2 -1
data/README.md
CHANGED
@@ -42,6 +42,7 @@ You can combine predicates to isolate just the tags you want.
|
|
42
42
|
```ruby
|
43
43
|
parser.for_tag(:name).each { |x| all_the_names_in_a_document << x }
|
44
44
|
parser.for_tag(:name).at_depth(2).each { |x| names_nested_under_document_root << x }
|
45
|
+
parser.for_tag(:name).within(:author).each { |x| author_names << x }
|
45
46
|
```
|
46
47
|
|
47
48
|
Known Issues
|
data/lib/saxerator/document.rb
CHANGED
@@ -16,6 +16,10 @@ module Saxerator
|
|
16
16
|
Document.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
|
17
17
|
end
|
18
18
|
|
19
|
+
def within(tag)
|
20
|
+
Document.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
|
21
|
+
end
|
22
|
+
|
19
23
|
def each(&block)
|
20
24
|
document = Parser::LatchedAccumulator.new(@config, @latches, block)
|
21
25
|
parser = ::Nokogiri::XML::SAX::Parser.new document
|
@@ -6,14 +6,14 @@ module Saxerator
|
|
6
6
|
@actual_depth = 0
|
7
7
|
end
|
8
8
|
|
9
|
-
def start_element
|
9
|
+
def start_element(_, __)
|
10
10
|
@actual_depth += 1
|
11
11
|
if @actual_depth == @depth
|
12
12
|
open
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
def end_element
|
16
|
+
def end_element(_)
|
17
17
|
@actual_depth -= 1
|
18
18
|
end
|
19
19
|
end
|
@@ -11,19 +11,19 @@ module Saxerator
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def reset_latches
|
14
|
-
@latches.each
|
14
|
+
@latches.each { |latch| latch.reset }
|
15
15
|
end
|
16
16
|
|
17
17
|
def check_latches_and_passthrough(method, *args)
|
18
18
|
@latches.each { |latch| latch.send(method, *args) }
|
19
|
-
if @latches.all?
|
19
|
+
if @latches.all? { |latch| latch.open? }
|
20
20
|
@accumulator.send(method, *args)
|
21
21
|
else
|
22
22
|
reset_latches
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
def xmldecl
|
26
|
+
def xmldecl(version, encoding, standalone)
|
27
27
|
check_latches_and_passthrough(:xmldecl, version, encoding, standalone)
|
28
28
|
end
|
29
29
|
|
@@ -35,39 +35,39 @@ module Saxerator
|
|
35
35
|
check_latches_and_passthrough(:end_document)
|
36
36
|
end
|
37
37
|
|
38
|
-
def start_element
|
38
|
+
def start_element(name, attrs = [])
|
39
39
|
check_latches_and_passthrough(:start_element, name, attrs)
|
40
40
|
end
|
41
41
|
|
42
|
-
def end_element
|
42
|
+
def end_element(name)
|
43
43
|
check_latches_and_passthrough(:end_element, name)
|
44
44
|
end
|
45
45
|
|
46
|
-
def start_element_namespace
|
46
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
47
47
|
check_latches_and_passthrough(:start_element_namespace, name, attrs, prefix, uri, ns)
|
48
48
|
end
|
49
49
|
|
50
|
-
def end_element_namespace
|
50
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
51
51
|
check_latches_and_passthrough(:end_element_namespace, name, prefix, uri)
|
52
52
|
end
|
53
53
|
|
54
|
-
def characters
|
54
|
+
def characters(string)
|
55
55
|
check_latches_and_passthrough(:characters, string)
|
56
56
|
end
|
57
57
|
|
58
|
-
def comment
|
58
|
+
def comment(string)
|
59
59
|
check_latches_and_passthrough(:comment, string)
|
60
60
|
end
|
61
61
|
|
62
|
-
def warning
|
62
|
+
def warning(string)
|
63
63
|
check_latches_and_passthrough(:warning, string)
|
64
64
|
end
|
65
65
|
|
66
|
-
def error
|
66
|
+
def error(string)
|
67
67
|
check_latches_and_passthrough(:error, string)
|
68
68
|
end
|
69
69
|
|
70
|
-
def cdata_block
|
70
|
+
def cdata_block(string)
|
71
71
|
check_latches_and_passthrough(:cdata_block, string)
|
72
72
|
end
|
73
73
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Saxerator
|
2
|
+
module Parser
|
3
|
+
class WithinElementLatch < DocumentLatch
|
4
|
+
def initialize(name)
|
5
|
+
@name = name
|
6
|
+
@inner_depth = 0
|
7
|
+
end
|
8
|
+
|
9
|
+
def start_element name, _
|
10
|
+
if @inner_depth == 0
|
11
|
+
if name == @name
|
12
|
+
@inner_depth += 1
|
13
|
+
end
|
14
|
+
else
|
15
|
+
open if @inner_depth == 1
|
16
|
+
@inner_depth += 1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def end_element _
|
21
|
+
if @inner_depth > 0
|
22
|
+
@inner_depth -= 1
|
23
|
+
close if @inner_depth == 0
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def reset; end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/saxerator/version.rb
CHANGED
data/lib/saxerator.rb
CHANGED
@@ -10,6 +10,7 @@ require 'saxerator/parser/accumulator'
|
|
10
10
|
require 'saxerator/parser/document_latch'
|
11
11
|
require 'saxerator/parser/element_name_latch'
|
12
12
|
require 'saxerator/parser/depth_latch'
|
13
|
+
require 'saxerator/parser/within_element_latch'
|
13
14
|
require 'saxerator/parser/latched_accumulator'
|
14
15
|
|
15
16
|
module Saxerator
|
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -35,7 +35,7 @@ describe Saxerator do
|
|
35
35
|
context "with a string with an element at multiple depths" do
|
36
36
|
let(:xml) do
|
37
37
|
<<-eos
|
38
|
-
<
|
38
|
+
<publications>
|
39
39
|
<book>
|
40
40
|
<name>How to eat an airplane</name>
|
41
41
|
<author>
|
@@ -49,7 +49,13 @@ describe Saxerator do
|
|
49
49
|
<name>Jeanne Clarewood</name>
|
50
50
|
</author>
|
51
51
|
</book>
|
52
|
-
|
52
|
+
<article>
|
53
|
+
<name>Is our children learning?</name>
|
54
|
+
<author>
|
55
|
+
<name>Hazel Nutt</name>
|
56
|
+
</author>
|
57
|
+
</article>
|
58
|
+
</publication>
|
53
59
|
eos
|
54
60
|
end
|
55
61
|
|
@@ -57,15 +63,28 @@ describe Saxerator do
|
|
57
63
|
results = []
|
58
64
|
subject.at_depth(3).each { |x| results << x }
|
59
65
|
results.should == [
|
60
|
-
'How to eat an airplane', {'name' => ['Leviticus Alabaster', 'Eunice Diesel']},
|
61
|
-
'To wallop a horse in the face', {'name' => 'Jeanne Clarewood'}
|
66
|
+
'How to eat an airplane', { 'name' => ['Leviticus Alabaster', 'Eunice Diesel'] },
|
67
|
+
'To wallop a horse in the face', { 'name' => 'Jeanne Clarewood' },
|
68
|
+
'Is our children learning?', { 'name' => 'Hazel Nutt' }
|
62
69
|
]
|
63
70
|
end
|
64
71
|
|
65
72
|
it "should only parse the requested tag depth and tag" do
|
66
73
|
results = []
|
67
74
|
subject.at_depth(3).for_tag(:name).each { |x| results << x }
|
68
|
-
results.should == ['How to eat an airplane', 'To wallop a horse in the face']
|
75
|
+
results.should == ['How to eat an airplane', 'To wallop a horse in the face', 'Is our children learning?']
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should only parse tags nested inside the specified tag" do
|
79
|
+
results = []
|
80
|
+
subject.within(:article).each { |x| results << x }
|
81
|
+
results.should == ['Is our children learning?', { 'name' => 'Hazel Nutt' }]
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should only parse specified tags nested inside a specified tag" do
|
85
|
+
results = []
|
86
|
+
subject.for_tag(:name).within(:article).each { |x| results << x }
|
87
|
+
results.should == ['Is our children learning?', 'Hazel Nutt' ]
|
69
88
|
end
|
70
89
|
end
|
71
90
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -85,6 +85,7 @@ files:
|
|
85
85
|
- lib/saxerator/parser/document_latch.rb
|
86
86
|
- lib/saxerator/parser/element_name_latch.rb
|
87
87
|
- lib/saxerator/parser/latched_accumulator.rb
|
88
|
+
- lib/saxerator/parser/within_element_latch.rb
|
88
89
|
- lib/saxerator/string_with_attributes.rb
|
89
90
|
- lib/saxerator/version.rb
|
90
91
|
- lib/saxerator/xml_node.rb
|