pupa 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.travis.yml +5 -0
  4. data/.yardopts +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE +20 -0
  7. data/README.md +52 -0
  8. data/Rakefile +37 -0
  9. data/USAGE +1 -0
  10. data/lib/pupa/errors.rb +30 -0
  11. data/lib/pupa/logger.rb +37 -0
  12. data/lib/pupa/models/base.rb +190 -0
  13. data/lib/pupa/models/concerns/contactable.rb +34 -0
  14. data/lib/pupa/models/concerns/identifiable.rb +26 -0
  15. data/lib/pupa/models/concerns/linkable.rb +26 -0
  16. data/lib/pupa/models/concerns/nameable.rb +34 -0
  17. data/lib/pupa/models/concerns/sourceable.rb +26 -0
  18. data/lib/pupa/models/concerns/timestamps.rb +22 -0
  19. data/lib/pupa/models/contact_detail_list.rb +28 -0
  20. data/lib/pupa/models/membership.rb +37 -0
  21. data/lib/pupa/models/organization.rb +40 -0
  22. data/lib/pupa/models/person.rb +35 -0
  23. data/lib/pupa/models/post.rb +28 -0
  24. data/lib/pupa/processor/client.rb +42 -0
  25. data/lib/pupa/processor/dependency_graph.rb +18 -0
  26. data/lib/pupa/processor/helper.rb +15 -0
  27. data/lib/pupa/processor/middleware/logger.rb +37 -0
  28. data/lib/pupa/processor/middleware/parse_html.rb +16 -0
  29. data/lib/pupa/processor/persistence.rb +80 -0
  30. data/lib/pupa/processor/yielder.rb +50 -0
  31. data/lib/pupa/processor.rb +351 -0
  32. data/lib/pupa/refinements/faraday_middleware.rb +32 -0
  33. data/lib/pupa/refinements/json-schema.rb +36 -0
  34. data/lib/pupa/runner.rb +185 -0
  35. data/lib/pupa/version.rb +3 -0
  36. data/lib/pupa.rb +31 -0
  37. data/pupa.gemspec +34 -0
  38. data/schemas/popolo/contact_detail.json +44 -0
  39. data/schemas/popolo/identifier.json +18 -0
  40. data/schemas/popolo/link.json +19 -0
  41. data/schemas/popolo/membership.json +86 -0
  42. data/schemas/popolo/organization.json +104 -0
  43. data/schemas/popolo/other_name.json +28 -0
  44. data/schemas/popolo/person.json +130 -0
  45. data/schemas/popolo/post.json +78 -0
  46. data/spec/cassettes/31ac91ccad069eefc07d96cfbe66fa66c1b41fcf.yml +56 -0
  47. data/spec/cassettes/4ff54d737afb5d693653752d7bf234a405a80172.yml +48 -0
  48. data/spec/cassettes/898049a22e6ca51dfa2510d9e0e0207a5c396524.yml +54 -0
  49. data/spec/cassettes/ce69ff734ce852d2bfaa482bbf55d7ffb4762e87.yml +26 -0
  50. data/spec/cassettes/da629b01e0836deda8a5540a4e6a08783dd7aef9.yml +46 -0
  51. data/spec/cassettes/e398f35bea86b3d4c87a6934bae1eb7fca8744f9.yml +26 -0
  52. data/spec/logger_spec.rb +4 -0
  53. data/spec/models/base_spec.rb +194 -0
  54. data/spec/models/concerns/contactable_spec.rb +37 -0
  55. data/spec/models/concerns/identifiable_spec.rb +25 -0
  56. data/spec/models/concerns/linkable_spec.rb +25 -0
  57. data/spec/models/concerns/nameable_spec.rb +25 -0
  58. data/spec/models/concerns/sourceable_spec.rb +25 -0
  59. data/spec/models/concerns/timestamps_spec.rb +32 -0
  60. data/spec/models/contact_detail_list_spec.rb +44 -0
  61. data/spec/models/membership_spec.rb +30 -0
  62. data/spec/models/organization_spec.rb +24 -0
  63. data/spec/models/person_spec.rb +24 -0
  64. data/spec/models/post_spec.rb +19 -0
  65. data/spec/processor/client_spec.rb +4 -0
  66. data/spec/processor/dependency_graph_spec.rb +4 -0
  67. data/spec/processor/helper_spec.rb +4 -0
  68. data/spec/processor/middleware/logger_spec.rb +87 -0
  69. data/spec/processor/middleware/parse_html_spec.rb +92 -0
  70. data/spec/processor/persistence_spec.rb +41 -0
  71. data/spec/processor/yielder_spec.rb +55 -0
  72. data/spec/processor_spec.rb +268 -0
  73. data/spec/runner_spec.rb +85 -0
  74. data/spec/spec_helper.rb +17 -0
  75. metadata +342 -0
@@ -0,0 +1,87 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ # @see test/adapters/logger_test.rb in faraday
4
+ describe Pupa::Processor::Middleware::Logger do
5
+ let :io do
6
+ StringIO.new
7
+ end
8
+
9
+ context 'with DEBUG log level' do
10
+ let :logger do
11
+ logger = Logger.new(io)
12
+ logger.level = Logger::DEBUG
13
+ logger
14
+ end
15
+
16
+ let :connection do
17
+ Faraday.new do |connection|
18
+ connection.use Pupa::Processor::Middleware::Logger, logger
19
+ connection.adapter :test do |stubs|
20
+ stubs.get('/hello') { [200, {'Content-Type' => 'text/html'}, 'hello'] }
21
+ end
22
+ end
23
+ end
24
+
25
+ before :each do
26
+ @response = connection.get('/hello', nil, :accept => 'text/html')
27
+ end
28
+
29
+ it 'should still return output' do
30
+ @response.body.should == 'hello'
31
+ end
32
+
33
+ it 'should log the method and URL' do
34
+ io.string.should match('get http:/hello')
35
+ end
36
+
37
+ it 'should log request headers' do
38
+ io.string.should match('Accept: "text/html')
39
+ end
40
+ end
41
+
42
+ context 'with INFO log level' do
43
+ let :logger do
44
+ logger = Logger.new(io)
45
+ logger.level = Logger::INFO
46
+ logger
47
+ end
48
+
49
+ let :connection do
50
+ Faraday.new do |connection|
51
+ connection.use Pupa::Processor::Middleware::Logger, logger
52
+ connection.adapter :test do |stubs|
53
+ stubs.get('/hello') { [200, {'Content-Type' => 'text/html'}, 'hello'] }
54
+ stubs.post('/hello') { [200, {'Content-Type' => 'text/html'}, 'hello'] }
55
+ end
56
+ end
57
+ end
58
+
59
+ context 'with GET request' do
60
+ before :each do
61
+ connection.get('/hello', nil, :accept => 'text/html')
62
+ end
63
+
64
+ it 'should log the method and URL' do
65
+ io.string.should match('get http:/hello')
66
+ end
67
+
68
+ it 'should not log request headers' do
69
+ io.string.should_not match('Accept: "text/html')
70
+ end
71
+ end
72
+
73
+ context 'with POST request' do
74
+ before :each do
75
+ connection.post('/hello', 'foo=bar', :accept => 'text/html')
76
+ end
77
+
78
+ it 'should log the method and URL' do
79
+ io.string.should match('post http:/hello foo=bar')
80
+ end
81
+
82
+ it 'should not log request headers' do
83
+ io.string.should_not match('Accept: "text/html')
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,92 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ # @see spec/helper.rb and spec/parse_xml_spec.rb in faraday_middleware
4
+ describe Pupa::Processor::Middleware::ParseHtml do
5
+ let(:options) { Hash.new }
6
+ let(:headers) { Hash.new }
7
+ let(:middleware) {
8
+ described_class.new(lambda {|env|
9
+ Faraday::Response.new(env)
10
+ }, options)
11
+ }
12
+
13
+ def process(body, content_type = nil, options = {})
14
+ env = {
15
+ :body => body, :request => options,
16
+ :response_headers => Faraday::Utils::Headers.new(headers)
17
+ }
18
+ env[:response_headers]['content-type'] = content_type if content_type
19
+ middleware.call(env)
20
+ end
21
+
22
+ let(:html) { '<html><head><title>foo</title></head><body>bar</body></html>' }
23
+ let(:title) { 'foo' }
24
+ let(:body) { 'bar' }
25
+
26
+ context "no type matching" do
27
+ it "doesn't change nil body" do
28
+ expect(process(nil).body).to be_nil
29
+ end
30
+
31
+ it "turns empty body into nil" do
32
+ expect(process('').body).to be_nil
33
+ end
34
+
35
+ it "parses html body" do
36
+ response = process(html)
37
+ expect(response.body.at_css('title').text).to eq(title)
38
+ expect(response.body.at_css('body').text).to eq(body)
39
+ expect(response.env[:raw_body]).to be_nil
40
+ end
41
+ end
42
+
43
+ context "with preserving raw" do
44
+ let(:options) { {:preserve_raw => true} }
45
+
46
+ it "parses html body" do
47
+ response = process(html)
48
+ expect(response.body.at_css('title').text).to eq(title)
49
+ expect(response.body.at_css('body').text).to eq(body)
50
+ expect(response.env[:raw_body]).to eq(html)
51
+ end
52
+
53
+ it "can opt out of preserving raw" do
54
+ response = process(html, nil, :preserve_raw => false)
55
+ expect(response.env[:raw_body]).to be_nil
56
+ end
57
+ end
58
+
59
+ context "with regexp type matching" do
60
+ let(:options) { {:content_type => /\bhtml$/} }
61
+
62
+ it "parses html body of correct type" do
63
+ response = process(html, 'text/html')
64
+ expect(response.body.at_css('title').text).to eq(title)
65
+ expect(response.body.at_css('body').text).to eq(body)
66
+ end
67
+
68
+ it "ignores html body of incorrect type" do
69
+ response = process(html, 'application/xml')
70
+ expect(response.body).to eq(html)
71
+ end
72
+ end
73
+
74
+ context "with array type matching" do
75
+ let(:options) { {:content_type => %w[a/b c/d]} }
76
+
77
+ it "parses html body of correct type" do
78
+ expect(process(html, 'a/b').body).to be_a(Nokogiri::HTML::Document)
79
+ expect(process(html, 'c/d').body).to be_a(Nokogiri::HTML::Document)
80
+ end
81
+
82
+ it "ignores html body of incorrect type" do
83
+ expect(process(html, 'a/d').body).not_to be_a(Nokogiri::HTML::Document)
84
+ end
85
+ end
86
+
87
+ it "doesn't choke on invalid html" do
88
+ ['{!', '"a"', 'true', 'null', '1'].each do |data|
89
+ expect{ process(data) }.to_not raise_error
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,41 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Pupa::Processor::Persistence do
4
+ before :all do
5
+ Pupa.session = Moped::Session.new(['localhost:27017'], database: 'pupa_test')
6
+ Pupa.session.collections.each(&:drop)
7
+
8
+ Pupa::Processor::Persistence.new(Pupa::Person.new(name: 'existing')).save
9
+
10
+ Pupa.session[:people].insert(_type: 'pupa/person', name: 'non-unique')
11
+ Pupa.session[:people].insert(_type: 'pupa/person', name: 'non-unique')
12
+ end
13
+
14
+ describe '#find' do
15
+ it 'should return nil if no matches' do
16
+ Pupa::Processor::Persistence.find(_type: 'pupa/person', name: 'nonexistent').should == nil
17
+ end
18
+
19
+ it 'should return a document if one match' do
20
+ Pupa::Processor::Persistence.find(_type: 'pupa/person', name: 'existing').should be_a(Hash)
21
+ end
22
+
23
+ it 'should raise an error if many matches' do
24
+ expect{Pupa::Processor::Persistence.find(_type: 'pupa/person', name: 'non-unique')}.to raise_error(Pupa::Errors::TooManyMatches)
25
+ end
26
+ end
27
+
28
+ describe '#save' do
29
+ it 'should insert a document if no matches' do
30
+ Pupa::Processor::Persistence.new(Pupa::Person.new(_id: 'new', name: 'new')).save.should == 'new'
31
+ end
32
+
33
+ it 'should update a document if one match' do
34
+ Pupa::Processor::Persistence.new(Pupa::Person.new(_id: 'existing', name: 'existing')).save.should_not == 'existing'
35
+ end
36
+
37
+ it 'should raise an error if many matches' do
38
+ expect{Pupa::Processor::Persistence.new(Pupa::Person.new(name: 'non-unique')).save}.to raise_error(Pupa::Errors::TooManyMatches)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,55 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Pupa::Processor::Yielder do
4
+ let :yielder do
5
+ Pupa::Processor::Yielder.new do
6
+ 10.times do |n|
7
+ Fiber.yield(n)
8
+ end
9
+ end
10
+ end
11
+
12
+ let :raiser do
13
+ Pupa::Processor::Yielder.new do
14
+ raise
15
+ end
16
+ end
17
+
18
+ describe '#each' do
19
+ it 'should iterate over the items in the enumeration' do
20
+ array = []
21
+ yielder.each do |n|
22
+ array << n
23
+ end
24
+ array.should == (0..9).to_a
25
+ end
26
+
27
+ it 'should be composable with other iterators' do
28
+ yielder.each.map{|n| n}.should == (0..9).to_a
29
+ end
30
+ end
31
+
32
+ describe '#next' do
33
+ it 'should return the next item in the enumeration' do
34
+ array = []
35
+ 10.times do |n|
36
+ array << yielder.next
37
+ end
38
+ array.should == (0..9).to_a
39
+ end
40
+
41
+ it 'should raise an error if the enumerator is at the end' do
42
+ expect{11.times{yielder.next}}.to raise_error(StopIteration)
43
+ end
44
+ end
45
+
46
+ describe '#to_enum' do
47
+ it 'should return an enumerator' do
48
+ yielder.to_enum.should be_a(Enumerator)
49
+ end
50
+
51
+ it 'should return a lazy enumerator' do
52
+ expect{raiser.to_enum}.to_not raise_error
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,268 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Pupa::Processor do
4
+ class PersonProcessor < Pupa::Processor
5
+ def person
6
+ @person ||= make_person_valid
7
+ end
8
+
9
+ def make_person_valid
10
+ @person = Pupa::Person.new(name: 'foo')
11
+ end
12
+
13
+ def make_person_invalid
14
+ @person = Pupa::Person.new
15
+ end
16
+
17
+ def scrape_people
18
+ Fiber.yield(person)
19
+ end
20
+ end
21
+
22
+ before :all do
23
+ PersonProcessor.add_scraping_task(:people)
24
+ end
25
+
26
+ let :io do
27
+ StringIO.new
28
+ end
29
+
30
+ let :processor do
31
+ PersonProcessor.new('/tmp', level: 'WARN', logdev: io)
32
+ end
33
+
34
+ describe '#get' do
35
+ it 'should send a GET request' do
36
+ processor.get('http://httpbin.org/get', 'foo=bar')['args'].should == {'foo' => 'bar'}
37
+ end
38
+
39
+ it 'should automatically parse the response' do
40
+ processor.get('http://httpbin.org/get').should be_a(Hash)
41
+ end
42
+ end
43
+
44
+ describe '#post' do
45
+ it 'should send a POST request' do
46
+ processor.post('http://httpbin.org/post', 'foo=bar')['form'].should == {'foo' => 'bar'}
47
+ end
48
+
49
+ it 'should automatically parse the response' do
50
+ processor.post('http://httpbin.org/post').should be_a(Hash)
51
+ end
52
+ end
53
+
54
+ describe '#add_scraping_task' do
55
+ it 'should add a scraping task and define a lazy method' do
56
+ PersonProcessor.tasks.should == [:people]
57
+ processor.should respond_to(:people)
58
+ end
59
+ end
60
+
61
+ describe '#dump_scraped_objects' do
62
+ let :path do
63
+ path = "/tmp/person_#{processor.person._id}.json"
64
+ end
65
+
66
+ it 'should not overwrite an existing file' do
67
+ FileUtils.touch(path)
68
+ expect{processor.dump_scraped_objects(:people)}.to raise_error(Pupa::Errors::DuplicateObjectIdError)
69
+ FileUtils.rm(path)
70
+ end
71
+
72
+ it 'should dump a JSON document' do
73
+ processor.dump_scraped_objects(:people)
74
+ File.exist?(path).should == true
75
+ io.string.should_not match('http://popoloproject.com/schemas/person.json')
76
+ end
77
+
78
+ it 'should validate the object' do
79
+ processor.make_person_invalid
80
+ processor.dump_scraped_objects(:people)
81
+ io.string.should match('http://popoloproject.com/schemas/person.json')
82
+ end
83
+ end
84
+
85
+ describe '#import' do
86
+ before :each do
87
+ Pupa.session = Moped::Session.new(['localhost:27017'], database: 'pupa_test')
88
+ Pupa.session.collections.each(&:drop)
89
+ end
90
+
91
+ let :graphable do
92
+ {
93
+ '1' => Pupa::Organization.new({
94
+ _id: '1',
95
+ name: 'Child',
96
+ parent_id: '3',
97
+ }),
98
+ '2' => Pupa::Organization.new({
99
+ _id: '2',
100
+ name: 'Parent',
101
+ }),
102
+ '3' => Pupa::Organization.new({
103
+ _id: '3',
104
+ name: 'Parent',
105
+ }),
106
+ }
107
+ end
108
+
109
+ let :ungraphable do
110
+ {
111
+ '4' => Pupa::Organization.new({
112
+ _id: '4',
113
+ name: 'Child',
114
+ parent: {_type: 'pupa/organization', name: 'Parent'},
115
+ }),
116
+ '5' => Pupa::Organization.new({
117
+ _id: '5',
118
+ name: 'Parent',
119
+ }),
120
+ '6' => Pupa::Organization.new({
121
+ _id: '6',
122
+ name: 'Parent',
123
+ }),
124
+ }
125
+ end
126
+
127
+ it 'should use a dependency graph if possible' do
128
+ processor.should_receive(:load_scraped_objects).and_return(graphable)
129
+
130
+ Pupa::Processor::DependencyGraph.any_instance.should_receive(:tsort).and_return(['2', '1'])
131
+ processor.import
132
+ end
133
+
134
+ it 'should not use a dependency graph if not possible' do
135
+ processor.should_receive(:load_scraped_objects).and_return(ungraphable)
136
+
137
+ Pupa::Processor::DependencyGraph.any_instance.should_not_receive(:tsort)
138
+ processor.import
139
+ end
140
+
141
+ it 'should remove duplicate objects and re-assign foreign keys' do
142
+ processor.should_receive(:load_scraped_objects).and_return(graphable)
143
+
144
+ processor.import
145
+ documents = Pupa.session[:organizations].find.entries
146
+ documents.size.should == 2
147
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '2', '_type' => 'pupa/organization', 'name' => 'Parent'}
148
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '1', '_type' => 'pupa/organization', 'name' => 'Child', 'parent_id' => '2'}
149
+ end
150
+
151
+ it 'should resolve foreign objects' do
152
+ processor.should_receive(:load_scraped_objects).and_return(ungraphable)
153
+
154
+ processor.import
155
+ documents = Pupa.session[:organizations].find.entries
156
+ documents.size.should == 2
157
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '5', '_type' => 'pupa/organization', 'name' => 'Parent'}
158
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '4', '_type' => 'pupa/organization', 'name' => 'Child', 'parent_id' => '5'}
159
+ end
160
+
161
+ context 'with existing documents' do
162
+ before :each do
163
+ processor.should_receive(:load_scraped_objects).and_return(graphable)
164
+ processor.import
165
+ end
166
+
167
+ let :resolvable_foreign_key do
168
+ {
169
+ 'a' => Pupa::Organization.new({
170
+ _id: 'a',
171
+ name: 'Child',
172
+ parent_id: 'c',
173
+ }),
174
+ 'b' => Pupa::Organization.new({
175
+ _id: 'b',
176
+ name: 'Parent',
177
+ }),
178
+ 'c' => Pupa::Organization.new({
179
+ _id: 'c',
180
+ name: 'Parent',
181
+ }),
182
+ }
183
+ end
184
+
185
+ let :unresolvable_foreign_key do
186
+ {
187
+ 'a' => Pupa::Organization.new({
188
+ _id: 'a',
189
+ name: 'Child',
190
+ parent: {_type: 'pupa/organization', name: 'Parent'},
191
+ }),
192
+ 'b' => Pupa::Organization.new({
193
+ _id: 'b',
194
+ name: 'Parent',
195
+ }),
196
+ 'c' => Pupa::Organization.new({
197
+ _id: 'c',
198
+ name: 'Child',
199
+ parent_id: 'nonexistent',
200
+ }),
201
+ }
202
+ end
203
+
204
+ let :unresolvable_foreign_object do
205
+ {
206
+ 'a' => Pupa::Organization.new({
207
+ _id: 'a',
208
+ name: 'Child',
209
+ parent: {_type: 'pupa/organization', name: 'Nonexistent'},
210
+ }),
211
+ 'b' => Pupa::Organization.new({
212
+ _id: 'b',
213
+ name: 'Parent',
214
+ }),
215
+ 'c' => Pupa::Organization.new({
216
+ _id: 'c',
217
+ name: 'Child',
218
+ parent_id: 'b',
219
+ }),
220
+ }
221
+ end
222
+
223
+ let :duplicate_documents do
224
+ {
225
+ 'a' => Pupa::Organization.new({
226
+ _id: 'a',
227
+ name: 'Child',
228
+ parent: {_type: 'pupa/organization', name: 'Parent'},
229
+ }),
230
+ 'b' => Pupa::Organization.new({
231
+ _id: 'b',
232
+ name: 'Parent',
233
+ }),
234
+ 'c' => Pupa::Organization.new({
235
+ _id: 'c',
236
+ name: 'Child',
237
+ parent_id: 'b',
238
+ }),
239
+ }
240
+ end
241
+
242
+ it 'should resolve foreign keys' do
243
+ processor.should_receive(:load_scraped_objects).and_return(resolvable_foreign_key)
244
+
245
+ processor.import
246
+ documents = Pupa.session[:organizations].find.entries
247
+ documents.size.should == 2
248
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '2', '_type' => 'pupa/organization', 'name' => 'Parent'}
249
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '1', '_type' => 'pupa/organization', 'name' => 'Child', 'parent_id' => '2'}
250
+ end
251
+
252
+ it 'should raise an error if a foreign key cannot be resolved' do
253
+ processor.should_receive(:load_scraped_objects).and_return(unresolvable_foreign_key)
254
+ expect{processor.import}.to raise_error(Pupa::Errors::UnprocessableEntity)
255
+ end
256
+
257
+ it 'should raise an error if a foreign object cannot be resolved' do
258
+ processor.should_receive(:load_scraped_objects).and_return(unresolvable_foreign_object)
259
+ expect{processor.import}.to raise_error(Pupa::Errors::UnprocessableEntity)
260
+ end
261
+
262
+ it 'should raise an error if a duplicate was inadvertently saved' do
263
+ processor.should_receive(:load_scraped_objects).and_return(duplicate_documents)
264
+ expect{processor.import}.to raise_error(Pupa::Errors::DuplicateDocumentError)
265
+ end
266
+ end
267
+ end
268
+ end
@@ -0,0 +1,85 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Pupa::Runner do
4
+ class TestProcessor < Pupa::Processor
5
+ def scrape_people
6
+ end
7
+
8
+ def scrape_organizations
9
+ end
10
+ end
11
+
12
+ before :all do
13
+ TestProcessor.add_scraping_task(:people)
14
+ TestProcessor.add_scraping_task(:organizations)
15
+ end
16
+
17
+ let :dry_runner do
18
+ runner = Pupa::Runner.new(TestProcessor, level: 'UNKNOWN', dry_run: true)
19
+ runner.add_action(name: 'example', description: 'An example action')
20
+ runner
21
+ end
22
+
23
+ let :runner do
24
+ Pupa::Runner.new(TestProcessor, level: 'UNKNOWN')
25
+ end
26
+
27
+ describe '#initialize' do
28
+ it 'should accept default options' do
29
+ dry_runner.options.level.should_not == 'INFO'
30
+ end
31
+ end
32
+
33
+ describe '#add_action' do
34
+ it 'should add an action' do
35
+ dry_runner.actions.last.to_h.should == {name: 'example', description: 'An example action'}
36
+ end
37
+ end
38
+
39
+ describe '#run' do
40
+ def dry_run(argv = [], **kwargs)
41
+ begin
42
+ dry_runner.run(argv, kwargs)
43
+ rescue SystemExit
44
+ # pass
45
+ end
46
+ end
47
+
48
+ it 'should accept overridden options' do
49
+ dry_run(['--quiet'], level: 'ERROR')
50
+ dry_runner.options.level.should == 'ERROR'
51
+ end
52
+
53
+ it 'should use default actions if none set' do
54
+ dry_run
55
+ dry_runner.options.actions.should == %w(scrape import)
56
+ end
57
+
58
+ it 'should use default tasks if none set' do
59
+ dry_run
60
+ dry_runner.options.tasks.should == %i(people organizations)
61
+ end
62
+
63
+ # Unlike an action, it's not possible for a task to be undefined, because
64
+ # `add_scraping_task` would raise an error first.
65
+ it 'should abort if the action is not defined' do
66
+ expect{dry_runner.run(['--action', 'example'])}.to raise_error(SystemExit, "`example` is not a rspec action. See `rspec --help` for a list of available actions.")
67
+ end
68
+
69
+ it 'should not run any actions on a dry run' do
70
+ expect{dry_runner.run([])}.to raise_error(SystemExit, nil)
71
+ end
72
+
73
+ it 'should run actions' do
74
+ TestProcessor.any_instance.should_receive(:dump_scraped_objects).twice
75
+ TestProcessor.any_instance.should_receive(:import)
76
+ runner.run([])
77
+ end
78
+
79
+ it 'should run tasks' do
80
+ TestProcessor.any_instance.should_receive(:people).and_return([])
81
+ TestProcessor.any_instance.should_receive(:organizations).and_return([])
82
+ runner.run([])
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+
3
+ require 'coveralls'
4
+ Coveralls.wear!
5
+
6
+ require 'rspec'
7
+ require 'vcr'
8
+ require File.dirname(__FILE__) + '/../lib/pupa'
9
+
10
+ VCR.configure do |c|
11
+ c.cassette_library_dir = 'spec/cassettes'
12
+ c.hook_into :faraday
13
+
14
+ c.around_http_request do |request|
15
+ VCR.use_cassette(Digest::SHA1.hexdigest(request.uri + request.body + request.headers.to_s), &request)
16
+ end
17
+ end