saxxy 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +13 -0
  5. data/LICENSE +22 -0
  6. data/README.md +117 -0
  7. data/Rakefile +12 -0
  8. data/lib/saxxy.rb +2 -0
  9. data/lib/saxxy/activatable.rb +160 -0
  10. data/lib/saxxy/callbacks/libxml.rb +26 -0
  11. data/lib/saxxy/callbacks/nokogiri.rb +30 -0
  12. data/lib/saxxy/callbacks/ox.rb +66 -0
  13. data/lib/saxxy/callbacks/sax.rb +86 -0
  14. data/lib/saxxy/context.rb +88 -0
  15. data/lib/saxxy/context_tree.rb +85 -0
  16. data/lib/saxxy/event.rb +83 -0
  17. data/lib/saxxy/event_registry.rb +122 -0
  18. data/lib/saxxy/node_action.rb +59 -0
  19. data/lib/saxxy/node_rule.rb +90 -0
  20. data/lib/saxxy/parsers/base.rb +28 -0
  21. data/lib/saxxy/parsers/libxml.rb +52 -0
  22. data/lib/saxxy/parsers/nokogiri.rb +28 -0
  23. data/lib/saxxy/parsers/ox.rb +30 -0
  24. data/lib/saxxy/service.rb +47 -0
  25. data/lib/saxxy/utils/agent.rb +66 -0
  26. data/lib/saxxy/utils/callback_array.rb +27 -0
  27. data/lib/saxxy/utils/helpers.rb +13 -0
  28. data/lib/saxxy/version.rb +3 -0
  29. data/saxxy.gemspec +21 -0
  30. data/spec/saxxy/activatable_spec.rb +344 -0
  31. data/spec/saxxy/callbacks/sax_spec.rb +456 -0
  32. data/spec/saxxy/context_spec.rb +51 -0
  33. data/spec/saxxy/context_tree_spec.rb +68 -0
  34. data/spec/saxxy/event_registry_spec.rb +137 -0
  35. data/spec/saxxy/event_spec.rb +49 -0
  36. data/spec/saxxy/node_action_spec.rb +46 -0
  37. data/spec/saxxy/node_rule_spec.rb +99 -0
  38. data/spec/saxxy/parsers/libxml_spec.rb +104 -0
  39. data/spec/saxxy/parsers/nokogiri_spec.rb +200 -0
  40. data/spec/saxxy/parsers/ox_spec.rb +175 -0
  41. data/spec/saxxy/utils/agent_spec.rb +63 -0
  42. data/spec/spec_helper.rb +28 -0
  43. data/spec/support/agent_macros.rb +24 -0
  44. metadata +155 -0
@@ -0,0 +1,104 @@
1
+ require "spec_helper"
2
+ require "saxxy/utils/agent"
3
+ require "saxxy/context_tree"
4
+
5
+
6
+ describe "Saxxy::Parsers::Libxml", :not_jruby do
7
+
8
+ def parser(*args)
9
+ Saxxy::Parsers::Libxml.new(*args)
10
+ end
11
+
12
+
13
+ describe "#initialize" do
14
+ let(:tree) { Saxxy::ContextTree.new {} }
15
+ let(:subject) { parser(tree, {foo: :bar}) }
16
+
17
+ it "should set the options" do
18
+ subject.options.should == {foo: :bar}
19
+ end
20
+
21
+ it "should set the context tree" do
22
+ subject.context_tree.should == tree
23
+ end
24
+ end
25
+
26
+
27
+ context "integration" do
28
+ let(:valid) do
29
+ "<html><div><span class='fo'></span></div><div class='f'></div></html>"
30
+ end
31
+
32
+ let(:not_closed) do
33
+ "<html><div><span class='fo'></span><div class='f'></div></html>"
34
+ end
35
+
36
+ let(:not_opened) do
37
+ "<html><div></span></div><div></div></html>"
38
+ end
39
+
40
+ let(:tree) do
41
+ Saxxy::ContextTree.new do
42
+ on("div", class: nil) do |text, elem, attrs|
43
+ @counts[:div] += 1
44
+ end
45
+ under("div") do
46
+ on("span", class: /foo?/) do |text, elem, attrs|
47
+ @counts[:span] += 1
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ def parse(string, tree)
54
+ parser(tree).parse_string(string)
55
+ end
56
+
57
+
58
+ before { @counts = { div: 0, span: 0 } }
59
+
60
+ describe "valid html" do
61
+ it "should change the div count" do
62
+ pending("Libxml generates double callbacks") do
63
+ expect { parse(valid, tree) }.to change { @counts[:div] }.from(0).to(1)
64
+ end
65
+ end
66
+
67
+ it "should change the span count" do
68
+ pending("Libxml generates double callbacks") do
69
+ expect { parse(valid, tree) }.to change { @counts[:span] }.from(0).to(1)
70
+ end
71
+ end
72
+ end
73
+
74
+ describe "not closed div" do
75
+ it "should change the div count" do
76
+ pending("Libxml does not handle malformed html") do
77
+ expect { parse(not_closed, tree) }.to change { @counts[:div] }.from(0).to(1)
78
+ end
79
+ end
80
+
81
+ it "should change the span count" do
82
+ pending("Libxml does not handle malformed html") do
83
+ expect { parse(not_closed, tree) }.to change { @counts[:span] }.from(0).to(1)
84
+ end
85
+ end
86
+ end
87
+
88
+ describe "not opened span" do
89
+ it "should change the div count" do
90
+ pending("Libxml does not handle malformed html") do
91
+ expect { parse(not_opened, tree) }.to change { @counts[:div] }.from(0).to(2)
92
+ end
93
+ end
94
+
95
+ it "should not change the span count" do
96
+ pending("Libxml does not handle malformed html") do
97
+ expect { parse(not_opened, tree) }.to_not change { @counts[:span] }
98
+ end
99
+ end
100
+ end
101
+
102
+ end
103
+
104
+ end
@@ -0,0 +1,200 @@
1
+ require "spec_helper"
2
+ require "saxxy/utils/agent"
3
+ require "saxxy/context_tree"
4
+
5
+
6
+ describe Saxxy::Parsers::Nokogiri do
7
+
8
+ def parser(*args)
9
+ Saxxy::Parsers::Nokogiri.new(*args)
10
+ end
11
+
12
+
13
+ describe "#initialize" do
14
+ let(:tree) { Saxxy::ContextTree.new {} }
15
+ let(:subject) { parser(tree, {foo: :bar}) }
16
+
17
+ it "should set the options" do
18
+ subject.options.should == {foo: :bar}
19
+ end
20
+
21
+ it "should set the context tree" do
22
+ subject.context_tree.should == tree
23
+ end
24
+ end
25
+
26
+
27
+ describe "#parse_io" do
28
+ let(:tree) { Saxxy::ContextTree.new {} }
29
+ let(:subject) { parser(tree) }
30
+
31
+ it "should delegate the call to the underlying parser" do
32
+ obj = Object.new
33
+ io = IO.new(0)
34
+ subject.stub(new_parser: obj)
35
+ obj.should_receive(:parse_io).with(io, 'UTF-8')
36
+ subject.parse_io(io)
37
+ end
38
+ end
39
+
40
+
41
+ describe "#parse_file" do
42
+ let(:tree) { Saxxy::ContextTree.new {} }
43
+ let(:subject) { parser(tree) }
44
+
45
+ it "should delegate the call to the underlying parser" do
46
+ obj = Object.new
47
+ file = File.new(0)
48
+ subject.stub(new_parser: obj)
49
+ obj.should_receive(:parse_file).with(file.path, 'UTF-8')
50
+ subject.parse_file(file.path)
51
+ end
52
+ end
53
+
54
+
55
+ describe "#parse_string" do
56
+ let(:tree) { Saxxy::ContextTree.new {} }
57
+ let(:subject) { parser(tree) }
58
+
59
+ it "should delegate the call to the underlying parsers' parse_memory" do
60
+ obj = Object.new
61
+ string = ""
62
+ subject.stub(new_parser: obj)
63
+ obj.should_receive(:parse_memory).with(string, 'UTF-8')
64
+ subject.parse_string(string)
65
+ end
66
+ end
67
+
68
+
69
+
70
+ context "node count" do
71
+ let(:valid) do
72
+ "<html><div><span class='fo'></span></div><div class='f'></div></html>"
73
+ end
74
+
75
+ let(:not_closed) do
76
+ "<html><div><span class='fo'></span><div class='f'></div></html>"
77
+ end
78
+
79
+ let(:not_opened) do
80
+ "<html><div></span></div><div></div></html>"
81
+ end
82
+
83
+ let(:tree) do
84
+ Saxxy::ContextTree.new do
85
+ on("div", class: nil) do |text, elem, attrs|
86
+ @counts[:div] += 1
87
+ end
88
+ under("div") do
89
+ on("span", class: /foo?/) do |text, elem, attrs|
90
+ @counts[:span] += 1
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ def parse(string, tree)
97
+ parser(tree).parse_string(string)
98
+ end
99
+
100
+
101
+ before { @counts = { div: 0, span: 0 } }
102
+
103
+ describe "valid html" do
104
+ it "should change the div count" do
105
+ expect { parse(valid, tree) }.to change { @counts[:div] }.from(0).to(1)
106
+ end
107
+
108
+ it "should change the span count" do
109
+ expect { parse(valid, tree) }.to change { @counts[:span] }.from(0).to(1)
110
+ end
111
+ end
112
+
113
+ describe "not closed div" do
114
+ it "should change the div count" do
115
+ expect { parse(not_closed, tree) }.to change { @counts[:div] }.from(0).to(1)
116
+ end
117
+
118
+ it "should change the span count" do
119
+ expect { parse(not_closed, tree) }.to change { @counts[:span] }.from(0).to(1)
120
+ end
121
+ end
122
+
123
+ describe "not opened span" do
124
+ it "should change the div count" do
125
+ expect { parse(not_opened, tree) }.to change { @counts[:div] }.from(0).to(2)
126
+ end
127
+
128
+ it "should not change the span count" do
129
+ expect { parse(not_opened, tree) }.to_not change { @counts[:span] }
130
+ end
131
+ end
132
+ end
133
+
134
+
135
+
136
+ context "text aggregation" do
137
+ let(:valid) do
138
+ "<html>0<div>1<span class='fo'>2</span>3</div><div class='f'>4</div></html>"
139
+ end
140
+
141
+ let(:not_closed) do
142
+ "<html>0<div>1<span class='fo'>2</span>3<div>4</div></html>"
143
+ end
144
+
145
+ let(:not_opened) do
146
+ "<html>0<div>1</span>23</div><div>4</div></html>"
147
+ end
148
+
149
+ let(:tree) do
150
+ Saxxy::ContextTree.new do
151
+ on("div", class: nil) do |text, elem, attrs|
152
+ @texts[:div] = (@texts[:div] || "") + text
153
+ end
154
+ under("div") do
155
+ on("span", class: /foo?/) do |text, elem, attrs|
156
+ @texts[:span] = (@texts[:span] || "") + text
157
+ end
158
+ end
159
+ end
160
+ end
161
+
162
+ def parse(string, tree)
163
+ parser(tree).parse_string(string)
164
+ end
165
+
166
+
167
+ before { @texts = { div: nil, span: nil } }
168
+
169
+ describe "valid html" do
170
+ it "should change the div text" do
171
+ expect { parse(valid, tree) }.to change { @texts[:div] }.to("123")
172
+ end
173
+
174
+ it "should change the span text" do
175
+ expect { parse(valid, tree) }.to change { @texts[:span] }.to("2")
176
+ end
177
+ end
178
+
179
+ describe "not closed div" do
180
+ it "should change the div text" do
181
+ expect { parse(not_closed, tree) }.to change { @texts[:div] }.to("41234")
182
+ end
183
+
184
+ it "should change the span text" do
185
+ expect { parse(not_closed, tree) }.to change { @texts[:span] }.to("2")
186
+ end
187
+ end
188
+
189
+ describe "not opened span" do
190
+ it "should change the div text" do
191
+ expect { parse(not_opened, tree) }.to change { @texts[:div] }.to("1234")
192
+ end
193
+
194
+ it "should not change the span text" do
195
+ expect { parse(not_opened, tree) }.to_not change { @texts[:span] }
196
+ end
197
+ end
198
+ end
199
+
200
+ end
@@ -0,0 +1,175 @@
1
+ require "spec_helper"
2
+ require "saxxy/utils/agent"
3
+ require "saxxy/context_tree"
4
+
5
+
6
+ # We have :not_jruby here because travis-ci does not
7
+ # support C extensions for jruby.
8
+ describe "Saxxy::Parsers::Ox", :not_jruby do
9
+
10
+ def parser(*args)
11
+ Saxxy::Parsers::Ox.new(*args)
12
+ end
13
+
14
+
15
+ describe "#initialize" do
16
+ let(:tree) { Saxxy::ContextTree.new {} }
17
+ let(:subject) { parser(tree, {foo: :bar}) }
18
+
19
+ it "should set the options" do
20
+ subject.options.should == {foo: :bar}
21
+ end
22
+
23
+ it "should set the context tree" do
24
+ subject.context_tree.should == tree
25
+ end
26
+ end
27
+
28
+
29
+ describe "#parse_*" do
30
+ let(:tree) { Saxxy::ContextTree.new {} }
31
+ let(:subject) { parser(tree) }
32
+
33
+ it "#parse_io should delegate the call to parse" do
34
+ ::Ox.should_receive(:sax_parse)
35
+ subject.parse_io(StringIO.new(""))
36
+ end
37
+
38
+ it "#parse_string should delegate the call to parse" do
39
+ ::Ox.should_receive(:sax_parse)
40
+ subject.parse_string("")
41
+ end
42
+ end
43
+
44
+
45
+
46
+ context "node count" do
47
+ let(:valid) do
48
+ "<html><div><span class='fo'></span></div><div class='f'></div></html>"
49
+ end
50
+
51
+ let(:not_closed) do
52
+ "<html><div><span class='fo'></span><div class='f'></div></html>"
53
+ end
54
+
55
+ let(:not_opened) do
56
+ "<html><div></span></div><div></div></html>"
57
+ end
58
+
59
+ let(:tree) do
60
+ Saxxy::ContextTree.new do
61
+ on("div", class: nil) do |text, elem, attrs|
62
+ @counts[:div] += 1
63
+ end
64
+ under("div") do
65
+ on("span", class: /foo?/) do |text, elem, attrs|
66
+ @counts[:span] += 1
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ def parse(string, tree)
73
+ parser(tree).parse_string(string)
74
+ end
75
+
76
+
77
+ before { @counts = { div: 0, span: 0 } }
78
+
79
+ describe "valid html" do
80
+ it "should change the div count" do
81
+ expect { parse(valid, tree) }.to change { @counts[:div] }.from(0).to(1)
82
+ end
83
+
84
+ it "should change the span count" do
85
+ expect { parse(valid, tree) }.to change { @counts[:span] }.from(0).to(1)
86
+ end
87
+ end
88
+
89
+ describe "not closed div" do
90
+ it "should change the div count" do
91
+ expect { parse(not_closed, tree) }.to change { @counts[:div] }.from(0).to(1)
92
+ end
93
+
94
+ it "should change the span count" do
95
+ expect { parse(not_closed, tree) }.to change { @counts[:span] }.from(0).to(1)
96
+ end
97
+ end
98
+
99
+ describe "not opened span" do
100
+ it "should change the div count" do
101
+ expect { parse(not_opened, tree) }.to change { @counts[:div] }.from(0).to(2)
102
+ end
103
+
104
+ it "should not change the span count" do
105
+ expect { parse(not_opened, tree) }.to_not change { @counts[:span] }
106
+ end
107
+ end
108
+ end
109
+
110
+
111
+ context "text aggregation" do
112
+ let(:valid) do
113
+ "<html>0<div>1<span class='fo'>2</span>3</div><div class='f'>4</div></html>"
114
+ end
115
+
116
+ let(:not_closed) do
117
+ "<html>0<div>1<span class='fo'>2</span>3<div>4</div></html>"
118
+ end
119
+
120
+ let(:not_opened) do
121
+ "<html>0<div>1</span>23</div><div>4</div></html>"
122
+ end
123
+
124
+ let(:tree) do
125
+ Saxxy::ContextTree.new do
126
+ on("div", class: nil) do |text, elem, attrs|
127
+ @texts[:div] = (@texts[:div] || "") + text
128
+ end
129
+ under("div") do
130
+ on("span", class: /foo?/) do |text, elem, attrs|
131
+ @texts[:span] = (@texts[:span] || "") + text
132
+ end
133
+ end
134
+ end
135
+ end
136
+
137
+ def parse(string, tree)
138
+ parser(tree).parse_string(string)
139
+ end
140
+
141
+
142
+ before { @texts = { div: nil, span: nil } }
143
+
144
+ describe "valid html" do
145
+ it "should change the div text" do
146
+ expect { parse(valid, tree) }.to change { @texts[:div] }.to("123")
147
+ end
148
+
149
+ it "should change the span text" do
150
+ expect { parse(valid, tree) }.to change { @texts[:span] }.to("2")
151
+ end
152
+ end
153
+
154
+ describe "not closed div" do
155
+ it "should change the div text" do
156
+ expect { parse(not_closed, tree) }.to change { @texts[:div] }.to("41234")
157
+ end
158
+
159
+ it "should change the span text" do
160
+ expect { parse(not_closed, tree) }.to change { @texts[:span] }.to("2")
161
+ end
162
+ end
163
+
164
+ describe "not opened span" do
165
+ it "should change the div text" do
166
+ expect { parse(not_opened, tree) }.to change { @texts[:div] }.to("1234")
167
+ end
168
+
169
+ it "should not change the span text" do
170
+ expect { parse(not_opened, tree) }.to_not change { @texts[:span] }
171
+ end
172
+ end
173
+ end
174
+
175
+ end