saxxy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +13 -0
  5. data/LICENSE +22 -0
  6. data/README.md +117 -0
  7. data/Rakefile +12 -0
  8. data/lib/saxxy.rb +2 -0
  9. data/lib/saxxy/activatable.rb +160 -0
  10. data/lib/saxxy/callbacks/libxml.rb +26 -0
  11. data/lib/saxxy/callbacks/nokogiri.rb +30 -0
  12. data/lib/saxxy/callbacks/ox.rb +66 -0
  13. data/lib/saxxy/callbacks/sax.rb +86 -0
  14. data/lib/saxxy/context.rb +88 -0
  15. data/lib/saxxy/context_tree.rb +85 -0
  16. data/lib/saxxy/event.rb +83 -0
  17. data/lib/saxxy/event_registry.rb +122 -0
  18. data/lib/saxxy/node_action.rb +59 -0
  19. data/lib/saxxy/node_rule.rb +90 -0
  20. data/lib/saxxy/parsers/base.rb +28 -0
  21. data/lib/saxxy/parsers/libxml.rb +52 -0
  22. data/lib/saxxy/parsers/nokogiri.rb +28 -0
  23. data/lib/saxxy/parsers/ox.rb +30 -0
  24. data/lib/saxxy/service.rb +47 -0
  25. data/lib/saxxy/utils/agent.rb +66 -0
  26. data/lib/saxxy/utils/callback_array.rb +27 -0
  27. data/lib/saxxy/utils/helpers.rb +13 -0
  28. data/lib/saxxy/version.rb +3 -0
  29. data/saxxy.gemspec +21 -0
  30. data/spec/saxxy/activatable_spec.rb +344 -0
  31. data/spec/saxxy/callbacks/sax_spec.rb +456 -0
  32. data/spec/saxxy/context_spec.rb +51 -0
  33. data/spec/saxxy/context_tree_spec.rb +68 -0
  34. data/spec/saxxy/event_registry_spec.rb +137 -0
  35. data/spec/saxxy/event_spec.rb +49 -0
  36. data/spec/saxxy/node_action_spec.rb +46 -0
  37. data/spec/saxxy/node_rule_spec.rb +99 -0
  38. data/spec/saxxy/parsers/libxml_spec.rb +104 -0
  39. data/spec/saxxy/parsers/nokogiri_spec.rb +200 -0
  40. data/spec/saxxy/parsers/ox_spec.rb +175 -0
  41. data/spec/saxxy/utils/agent_spec.rb +63 -0
  42. data/spec/spec_helper.rb +28 -0
  43. data/spec/support/agent_macros.rb +24 -0
  44. metadata +155 -0
@@ -0,0 +1,104 @@
1
+ require "spec_helper"
2
+ require "saxxy/utils/agent"
3
+ require "saxxy/context_tree"
4
+
5
+
6
+ describe "Saxxy::Parsers::Libxml", :not_jruby do
7
+
8
+ def parser(*args)
9
+ Saxxy::Parsers::Libxml.new(*args)
10
+ end
11
+
12
+
13
+ describe "#initialize" do
14
+ let(:tree) { Saxxy::ContextTree.new {} }
15
+ let(:subject) { parser(tree, {foo: :bar}) }
16
+
17
+ it "should set the options" do
18
+ subject.options.should == {foo: :bar}
19
+ end
20
+
21
+ it "should set the context tree" do
22
+ subject.context_tree.should == tree
23
+ end
24
+ end
25
+
26
+
27
+ context "integration" do
28
+ let(:valid) do
29
+ "<html><div><span class='fo'></span></div><div class='f'></div></html>"
30
+ end
31
+
32
+ let(:not_closed) do
33
+ "<html><div><span class='fo'></span><div class='f'></div></html>"
34
+ end
35
+
36
+ let(:not_opened) do
37
+ "<html><div></span></div><div></div></html>"
38
+ end
39
+
40
+ let(:tree) do
41
+ Saxxy::ContextTree.new do
42
+ on("div", class: nil) do |text, elem, attrs|
43
+ @counts[:div] += 1
44
+ end
45
+ under("div") do
46
+ on("span", class: /foo?/) do |text, elem, attrs|
47
+ @counts[:span] += 1
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ def parse(string, tree)
54
+ parser(tree).parse_string(string)
55
+ end
56
+
57
+
58
+ before { @counts = { div: 0, span: 0 } }
59
+
60
+ describe "valid html" do
61
+ it "should change the div count" do
62
+ pending("Libxml generates double callbacks") do
63
+ expect { parse(valid, tree) }.to change { @counts[:div] }.from(0).to(1)
64
+ end
65
+ end
66
+
67
+ it "should change the span count" do
68
+ pending("Libxml generates double callbacks") do
69
+ expect { parse(valid, tree) }.to change { @counts[:span] }.from(0).to(1)
70
+ end
71
+ end
72
+ end
73
+
74
+ describe "not closed div" do
75
+ it "should change the div count" do
76
+ pending("Libxml does not handle malformed html") do
77
+ expect { parse(not_closed, tree) }.to change { @counts[:div] }.from(0).to(1)
78
+ end
79
+ end
80
+
81
+ it "should change the span count" do
82
+ pending("Libxml does not handle malformed html") do
83
+ expect { parse(not_closed, tree) }.to change { @counts[:span] }.from(0).to(1)
84
+ end
85
+ end
86
+ end
87
+
88
+ describe "not opened span" do
89
+ it "should change the div count" do
90
+ pending("Libxml does not handle malformed html") do
91
+ expect { parse(not_opened, tree) }.to change { @counts[:div] }.from(0).to(2)
92
+ end
93
+ end
94
+
95
+ it "should not change the span count" do
96
+ pending("Libxml does not handle malformed html") do
97
+ expect { parse(not_opened, tree) }.to_not change { @counts[:span] }
98
+ end
99
+ end
100
+ end
101
+
102
+ end
103
+
104
+ end
@@ -0,0 +1,200 @@
1
+ require "spec_helper"
2
+ require "saxxy/utils/agent"
3
+ require "saxxy/context_tree"
4
+
5
+
6
+ describe Saxxy::Parsers::Nokogiri do
7
+
8
+ def parser(*args)
9
+ Saxxy::Parsers::Nokogiri.new(*args)
10
+ end
11
+
12
+
13
+ describe "#initialize" do
14
+ let(:tree) { Saxxy::ContextTree.new {} }
15
+ let(:subject) { parser(tree, {foo: :bar}) }
16
+
17
+ it "should set the options" do
18
+ subject.options.should == {foo: :bar}
19
+ end
20
+
21
+ it "should set the context tree" do
22
+ subject.context_tree.should == tree
23
+ end
24
+ end
25
+
26
+
27
+ describe "#parse_io" do
28
+ let(:tree) { Saxxy::ContextTree.new {} }
29
+ let(:subject) { parser(tree) }
30
+
31
+ it "should delegate the call to the underlying parser" do
32
+ obj = Object.new
33
+ io = IO.new(0)
34
+ subject.stub(new_parser: obj)
35
+ obj.should_receive(:parse_io).with(io, 'UTF-8')
36
+ subject.parse_io(io)
37
+ end
38
+ end
39
+
40
+
41
+ describe "#parse_file" do
42
+ let(:tree) { Saxxy::ContextTree.new {} }
43
+ let(:subject) { parser(tree) }
44
+
45
+ it "should delegate the call to the underlying parser" do
46
+ obj = Object.new
47
+ file = File.new(0)
48
+ subject.stub(new_parser: obj)
49
+ obj.should_receive(:parse_file).with(file.path, 'UTF-8')
50
+ subject.parse_file(file.path)
51
+ end
52
+ end
53
+
54
+
55
+ describe "#parse_string" do
56
+ let(:tree) { Saxxy::ContextTree.new {} }
57
+ let(:subject) { parser(tree) }
58
+
59
+ it "should delegate the call to the underlying parsers' parse_memory" do
60
+ obj = Object.new
61
+ string = ""
62
+ subject.stub(new_parser: obj)
63
+ obj.should_receive(:parse_memory).with(string, 'UTF-8')
64
+ subject.parse_string(string)
65
+ end
66
+ end
67
+
68
+
69
+
70
+ context "node count" do
71
+ let(:valid) do
72
+ "<html><div><span class='fo'></span></div><div class='f'></div></html>"
73
+ end
74
+
75
+ let(:not_closed) do
76
+ "<html><div><span class='fo'></span><div class='f'></div></html>"
77
+ end
78
+
79
+ let(:not_opened) do
80
+ "<html><div></span></div><div></div></html>"
81
+ end
82
+
83
+ let(:tree) do
84
+ Saxxy::ContextTree.new do
85
+ on("div", class: nil) do |text, elem, attrs|
86
+ @counts[:div] += 1
87
+ end
88
+ under("div") do
89
+ on("span", class: /foo?/) do |text, elem, attrs|
90
+ @counts[:span] += 1
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ def parse(string, tree)
97
+ parser(tree).parse_string(string)
98
+ end
99
+
100
+
101
+ before { @counts = { div: 0, span: 0 } }
102
+
103
+ describe "valid html" do
104
+ it "should change the div count" do
105
+ expect { parse(valid, tree) }.to change { @counts[:div] }.from(0).to(1)
106
+ end
107
+
108
+ it "should change the span count" do
109
+ expect { parse(valid, tree) }.to change { @counts[:span] }.from(0).to(1)
110
+ end
111
+ end
112
+
113
+ describe "not closed div" do
114
+ it "should change the div count" do
115
+ expect { parse(not_closed, tree) }.to change { @counts[:div] }.from(0).to(1)
116
+ end
117
+
118
+ it "should change the span count" do
119
+ expect { parse(not_closed, tree) }.to change { @counts[:span] }.from(0).to(1)
120
+ end
121
+ end
122
+
123
+ describe "not opened span" do
124
+ it "should change the div count" do
125
+ expect { parse(not_opened, tree) }.to change { @counts[:div] }.from(0).to(2)
126
+ end
127
+
128
+ it "should not change the span count" do
129
+ expect { parse(not_opened, tree) }.to_not change { @counts[:span] }
130
+ end
131
+ end
132
+ end
133
+
134
+
135
+
136
+ context "text aggregation" do
137
+ let(:valid) do
138
+ "<html>0<div>1<span class='fo'>2</span>3</div><div class='f'>4</div></html>"
139
+ end
140
+
141
+ let(:not_closed) do
142
+ "<html>0<div>1<span class='fo'>2</span>3<div>4</div></html>"
143
+ end
144
+
145
+ let(:not_opened) do
146
+ "<html>0<div>1</span>23</div><div>4</div></html>"
147
+ end
148
+
149
+ let(:tree) do
150
+ Saxxy::ContextTree.new do
151
+ on("div", class: nil) do |text, elem, attrs|
152
+ @texts[:div] = (@texts[:div] || "") + text
153
+ end
154
+ under("div") do
155
+ on("span", class: /foo?/) do |text, elem, attrs|
156
+ @texts[:span] = (@texts[:span] || "") + text
157
+ end
158
+ end
159
+ end
160
+ end
161
+
162
+ def parse(string, tree)
163
+ parser(tree).parse_string(string)
164
+ end
165
+
166
+
167
+ before { @texts = { div: nil, span: nil } }
168
+
169
+ describe "valid html" do
170
+ it "should change the div text" do
171
+ expect { parse(valid, tree) }.to change { @texts[:div] }.to("123")
172
+ end
173
+
174
+ it "should change the span text" do
175
+ expect { parse(valid, tree) }.to change { @texts[:span] }.to("2")
176
+ end
177
+ end
178
+
179
+ describe "not closed div" do
180
+ it "should change the div text" do
181
+ expect { parse(not_closed, tree) }.to change { @texts[:div] }.to("41234")
182
+ end
183
+
184
+ it "should change the span text" do
185
+ expect { parse(not_closed, tree) }.to change { @texts[:span] }.to("2")
186
+ end
187
+ end
188
+
189
+ describe "not opened span" do
190
+ it "should change the div text" do
191
+ expect { parse(not_opened, tree) }.to change { @texts[:div] }.to("1234")
192
+ end
193
+
194
+ it "should not change the span text" do
195
+ expect { parse(not_opened, tree) }.to_not change { @texts[:span] }
196
+ end
197
+ end
198
+ end
199
+
200
+ end
@@ -0,0 +1,175 @@
1
+ require "spec_helper"
2
+ require "saxxy/utils/agent"
3
+ require "saxxy/context_tree"
4
+
5
+
6
+ # We have :not_jruby here because travis-ci does not
7
+ # support C extensions for jruby.
8
+ describe "Saxxy::Parsers::Ox", :not_jruby do
9
+
10
+ def parser(*args)
11
+ Saxxy::Parsers::Ox.new(*args)
12
+ end
13
+
14
+
15
+ describe "#initialize" do
16
+ let(:tree) { Saxxy::ContextTree.new {} }
17
+ let(:subject) { parser(tree, {foo: :bar}) }
18
+
19
+ it "should set the options" do
20
+ subject.options.should == {foo: :bar}
21
+ end
22
+
23
+ it "should set the context tree" do
24
+ subject.context_tree.should == tree
25
+ end
26
+ end
27
+
28
+
29
+ describe "#parse_*" do
30
+ let(:tree) { Saxxy::ContextTree.new {} }
31
+ let(:subject) { parser(tree) }
32
+
33
+ it "#parse_io should delegate the call to parse" do
34
+ ::Ox.should_receive(:sax_parse)
35
+ subject.parse_io(StringIO.new(""))
36
+ end
37
+
38
+ it "#parse_string should delegate the call to parse" do
39
+ ::Ox.should_receive(:sax_parse)
40
+ subject.parse_string("")
41
+ end
42
+ end
43
+
44
+
45
+
46
+ context "node count" do
47
+ let(:valid) do
48
+ "<html><div><span class='fo'></span></div><div class='f'></div></html>"
49
+ end
50
+
51
+ let(:not_closed) do
52
+ "<html><div><span class='fo'></span><div class='f'></div></html>"
53
+ end
54
+
55
+ let(:not_opened) do
56
+ "<html><div></span></div><div></div></html>"
57
+ end
58
+
59
+ let(:tree) do
60
+ Saxxy::ContextTree.new do
61
+ on("div", class: nil) do |text, elem, attrs|
62
+ @counts[:div] += 1
63
+ end
64
+ under("div") do
65
+ on("span", class: /foo?/) do |text, elem, attrs|
66
+ @counts[:span] += 1
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ def parse(string, tree)
73
+ parser(tree).parse_string(string)
74
+ end
75
+
76
+
77
+ before { @counts = { div: 0, span: 0 } }
78
+
79
+ describe "valid html" do
80
+ it "should change the div count" do
81
+ expect { parse(valid, tree) }.to change { @counts[:div] }.from(0).to(1)
82
+ end
83
+
84
+ it "should change the span count" do
85
+ expect { parse(valid, tree) }.to change { @counts[:span] }.from(0).to(1)
86
+ end
87
+ end
88
+
89
+ describe "not closed div" do
90
+ it "should change the div count" do
91
+ expect { parse(not_closed, tree) }.to change { @counts[:div] }.from(0).to(1)
92
+ end
93
+
94
+ it "should change the span count" do
95
+ expect { parse(not_closed, tree) }.to change { @counts[:span] }.from(0).to(1)
96
+ end
97
+ end
98
+
99
+ describe "not opened span" do
100
+ it "should change the div count" do
101
+ expect { parse(not_opened, tree) }.to change { @counts[:div] }.from(0).to(2)
102
+ end
103
+
104
+ it "should not change the span count" do
105
+ expect { parse(not_opened, tree) }.to_not change { @counts[:span] }
106
+ end
107
+ end
108
+ end
109
+
110
+
111
+ context "text aggregation" do
112
+ let(:valid) do
113
+ "<html>0<div>1<span class='fo'>2</span>3</div><div class='f'>4</div></html>"
114
+ end
115
+
116
+ let(:not_closed) do
117
+ "<html>0<div>1<span class='fo'>2</span>3<div>4</div></html>"
118
+ end
119
+
120
+ let(:not_opened) do
121
+ "<html>0<div>1</span>23</div><div>4</div></html>"
122
+ end
123
+
124
+ let(:tree) do
125
+ Saxxy::ContextTree.new do
126
+ on("div", class: nil) do |text, elem, attrs|
127
+ @texts[:div] = (@texts[:div] || "") + text
128
+ end
129
+ under("div") do
130
+ on("span", class: /foo?/) do |text, elem, attrs|
131
+ @texts[:span] = (@texts[:span] || "") + text
132
+ end
133
+ end
134
+ end
135
+ end
136
+
137
+ def parse(string, tree)
138
+ parser(tree).parse_string(string)
139
+ end
140
+
141
+
142
+ before { @texts = { div: nil, span: nil } }
143
+
144
+ describe "valid html" do
145
+ it "should change the div text" do
146
+ expect { parse(valid, tree) }.to change { @texts[:div] }.to("123")
147
+ end
148
+
149
+ it "should change the span text" do
150
+ expect { parse(valid, tree) }.to change { @texts[:span] }.to("2")
151
+ end
152
+ end
153
+
154
+ describe "not closed div" do
155
+ it "should change the div text" do
156
+ expect { parse(not_closed, tree) }.to change { @texts[:div] }.to("41234")
157
+ end
158
+
159
+ it "should change the span text" do
160
+ expect { parse(not_closed, tree) }.to change { @texts[:span] }.to("2")
161
+ end
162
+ end
163
+
164
+ describe "not opened span" do
165
+ it "should change the div text" do
166
+ expect { parse(not_opened, tree) }.to change { @texts[:div] }.to("1234")
167
+ end
168
+
169
+ it "should not change the span text" do
170
+ expect { parse(not_opened, tree) }.to_not change { @texts[:span] }
171
+ end
172
+ end
173
+ end
174
+
175
+ end