pupa 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.travis.yml +5 -0
  4. data/.yardopts +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE +20 -0
  7. data/README.md +52 -0
  8. data/Rakefile +37 -0
  9. data/USAGE +1 -0
  10. data/lib/pupa/errors.rb +30 -0
  11. data/lib/pupa/logger.rb +37 -0
  12. data/lib/pupa/models/base.rb +190 -0
  13. data/lib/pupa/models/concerns/contactable.rb +34 -0
  14. data/lib/pupa/models/concerns/identifiable.rb +26 -0
  15. data/lib/pupa/models/concerns/linkable.rb +26 -0
  16. data/lib/pupa/models/concerns/nameable.rb +34 -0
  17. data/lib/pupa/models/concerns/sourceable.rb +26 -0
  18. data/lib/pupa/models/concerns/timestamps.rb +22 -0
  19. data/lib/pupa/models/contact_detail_list.rb +28 -0
  20. data/lib/pupa/models/membership.rb +37 -0
  21. data/lib/pupa/models/organization.rb +40 -0
  22. data/lib/pupa/models/person.rb +35 -0
  23. data/lib/pupa/models/post.rb +28 -0
  24. data/lib/pupa/processor/client.rb +42 -0
  25. data/lib/pupa/processor/dependency_graph.rb +18 -0
  26. data/lib/pupa/processor/helper.rb +15 -0
  27. data/lib/pupa/processor/middleware/logger.rb +37 -0
  28. data/lib/pupa/processor/middleware/parse_html.rb +16 -0
  29. data/lib/pupa/processor/persistence.rb +80 -0
  30. data/lib/pupa/processor/yielder.rb +50 -0
  31. data/lib/pupa/processor.rb +351 -0
  32. data/lib/pupa/refinements/faraday_middleware.rb +32 -0
  33. data/lib/pupa/refinements/json-schema.rb +36 -0
  34. data/lib/pupa/runner.rb +185 -0
  35. data/lib/pupa/version.rb +3 -0
  36. data/lib/pupa.rb +31 -0
  37. data/pupa.gemspec +34 -0
  38. data/schemas/popolo/contact_detail.json +44 -0
  39. data/schemas/popolo/identifier.json +18 -0
  40. data/schemas/popolo/link.json +19 -0
  41. data/schemas/popolo/membership.json +86 -0
  42. data/schemas/popolo/organization.json +104 -0
  43. data/schemas/popolo/other_name.json +28 -0
  44. data/schemas/popolo/person.json +130 -0
  45. data/schemas/popolo/post.json +78 -0
  46. data/spec/cassettes/31ac91ccad069eefc07d96cfbe66fa66c1b41fcf.yml +56 -0
  47. data/spec/cassettes/4ff54d737afb5d693653752d7bf234a405a80172.yml +48 -0
  48. data/spec/cassettes/898049a22e6ca51dfa2510d9e0e0207a5c396524.yml +54 -0
  49. data/spec/cassettes/ce69ff734ce852d2bfaa482bbf55d7ffb4762e87.yml +26 -0
  50. data/spec/cassettes/da629b01e0836deda8a5540a4e6a08783dd7aef9.yml +46 -0
  51. data/spec/cassettes/e398f35bea86b3d4c87a6934bae1eb7fca8744f9.yml +26 -0
  52. data/spec/logger_spec.rb +4 -0
  53. data/spec/models/base_spec.rb +194 -0
  54. data/spec/models/concerns/contactable_spec.rb +37 -0
  55. data/spec/models/concerns/identifiable_spec.rb +25 -0
  56. data/spec/models/concerns/linkable_spec.rb +25 -0
  57. data/spec/models/concerns/nameable_spec.rb +25 -0
  58. data/spec/models/concerns/sourceable_spec.rb +25 -0
  59. data/spec/models/concerns/timestamps_spec.rb +32 -0
  60. data/spec/models/contact_detail_list_spec.rb +44 -0
  61. data/spec/models/membership_spec.rb +30 -0
  62. data/spec/models/organization_spec.rb +24 -0
  63. data/spec/models/person_spec.rb +24 -0
  64. data/spec/models/post_spec.rb +19 -0
  65. data/spec/processor/client_spec.rb +4 -0
  66. data/spec/processor/dependency_graph_spec.rb +4 -0
  67. data/spec/processor/helper_spec.rb +4 -0
  68. data/spec/processor/middleware/logger_spec.rb +87 -0
  69. data/spec/processor/middleware/parse_html_spec.rb +92 -0
  70. data/spec/processor/persistence_spec.rb +41 -0
  71. data/spec/processor/yielder_spec.rb +55 -0
  72. data/spec/processor_spec.rb +268 -0
  73. data/spec/runner_spec.rb +85 -0
  74. data/spec/spec_helper.rb +17 -0
  75. metadata +342 -0
@@ -0,0 +1,87 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ # @see test/adapters/logger_test.rb in faraday
4
+ describe Pupa::Processor::Middleware::Logger do
5
+ let :io do
6
+ StringIO.new
7
+ end
8
+
9
+ context 'with DEBUG log level' do
10
+ let :logger do
11
+ logger = Logger.new(io)
12
+ logger.level = Logger::DEBUG
13
+ logger
14
+ end
15
+
16
+ let :connection do
17
+ Faraday.new do |connection|
18
+ connection.use Pupa::Processor::Middleware::Logger, logger
19
+ connection.adapter :test do |stubs|
20
+ stubs.get('/hello') { [200, {'Content-Type' => 'text/html'}, 'hello'] }
21
+ end
22
+ end
23
+ end
24
+
25
+ before :each do
26
+ @response = connection.get('/hello', nil, :accept => 'text/html')
27
+ end
28
+
29
+ it 'should still return output' do
30
+ @response.body.should == 'hello'
31
+ end
32
+
33
+ it 'should log the method and URL' do
34
+ io.string.should match('get http:/hello')
35
+ end
36
+
37
+ it 'should log request headers' do
38
+ io.string.should match('Accept: "text/html')
39
+ end
40
+ end
41
+
42
+ context 'with INFO log level' do
43
+ let :logger do
44
+ logger = Logger.new(io)
45
+ logger.level = Logger::INFO
46
+ logger
47
+ end
48
+
49
+ let :connection do
50
+ Faraday.new do |connection|
51
+ connection.use Pupa::Processor::Middleware::Logger, logger
52
+ connection.adapter :test do |stubs|
53
+ stubs.get('/hello') { [200, {'Content-Type' => 'text/html'}, 'hello'] }
54
+ stubs.post('/hello') { [200, {'Content-Type' => 'text/html'}, 'hello'] }
55
+ end
56
+ end
57
+ end
58
+
59
+ context 'with GET request' do
60
+ before :each do
61
+ connection.get('/hello', nil, :accept => 'text/html')
62
+ end
63
+
64
+ it 'should log the method and URL' do
65
+ io.string.should match('get http:/hello')
66
+ end
67
+
68
+ it 'should not log request headers' do
69
+ io.string.should_not match('Accept: "text/html')
70
+ end
71
+ end
72
+
73
+ context 'with POST request' do
74
+ before :each do
75
+ connection.post('/hello', 'foo=bar', :accept => 'text/html')
76
+ end
77
+
78
+ it 'should log the method and URL' do
79
+ io.string.should match('post http:/hello foo=bar')
80
+ end
81
+
82
+ it 'should not log request headers' do
83
+ io.string.should_not match('Accept: "text/html')
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,92 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ # @see spec/helper.rb and spec/parse_xml_spec.rb in faraday_middleware
4
+ describe Pupa::Processor::Middleware::ParseHtml do
5
+ let(:options) { Hash.new }
6
+ let(:headers) { Hash.new }
7
+ let(:middleware) {
8
+ described_class.new(lambda {|env|
9
+ Faraday::Response.new(env)
10
+ }, options)
11
+ }
12
+
13
+ def process(body, content_type = nil, options = {})
14
+ env = {
15
+ :body => body, :request => options,
16
+ :response_headers => Faraday::Utils::Headers.new(headers)
17
+ }
18
+ env[:response_headers]['content-type'] = content_type if content_type
19
+ middleware.call(env)
20
+ end
21
+
22
+ let(:html) { '<html><head><title>foo</title></head><body>bar</body></html>' }
23
+ let(:title) { 'foo' }
24
+ let(:body) { 'bar' }
25
+
26
+ context "no type matching" do
27
+ it "doesn't change nil body" do
28
+ expect(process(nil).body).to be_nil
29
+ end
30
+
31
+ it "turns empty body into nil" do
32
+ expect(process('').body).to be_nil
33
+ end
34
+
35
+ it "parses html body" do
36
+ response = process(html)
37
+ expect(response.body.at_css('title').text).to eq(title)
38
+ expect(response.body.at_css('body').text).to eq(body)
39
+ expect(response.env[:raw_body]).to be_nil
40
+ end
41
+ end
42
+
43
+ context "with preserving raw" do
44
+ let(:options) { {:preserve_raw => true} }
45
+
46
+ it "parses html body" do
47
+ response = process(html)
48
+ expect(response.body.at_css('title').text).to eq(title)
49
+ expect(response.body.at_css('body').text).to eq(body)
50
+ expect(response.env[:raw_body]).to eq(html)
51
+ end
52
+
53
+ it "can opt out of preserving raw" do
54
+ response = process(html, nil, :preserve_raw => false)
55
+ expect(response.env[:raw_body]).to be_nil
56
+ end
57
+ end
58
+
59
+ context "with regexp type matching" do
60
+ let(:options) { {:content_type => /\bhtml$/} }
61
+
62
+ it "parses html body of correct type" do
63
+ response = process(html, 'text/html')
64
+ expect(response.body.at_css('title').text).to eq(title)
65
+ expect(response.body.at_css('body').text).to eq(body)
66
+ end
67
+
68
+ it "ignores html body of incorrect type" do
69
+ response = process(html, 'application/xml')
70
+ expect(response.body).to eq(html)
71
+ end
72
+ end
73
+
74
+ context "with array type matching" do
75
+ let(:options) { {:content_type => %w[a/b c/d]} }
76
+
77
+ it "parses html body of correct type" do
78
+ expect(process(html, 'a/b').body).to be_a(Nokogiri::HTML::Document)
79
+ expect(process(html, 'c/d').body).to be_a(Nokogiri::HTML::Document)
80
+ end
81
+
82
+ it "ignores html body of incorrect type" do
83
+ expect(process(html, 'a/d').body).not_to be_a(Nokogiri::HTML::Document)
84
+ end
85
+ end
86
+
87
+ it "doesn't choke on invalid html" do
88
+ ['{!', '"a"', 'true', 'null', '1'].each do |data|
89
+ expect{ process(data) }.to_not raise_error
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,41 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Pupa::Processor::Persistence do
4
+ before :all do
5
+ Pupa.session = Moped::Session.new(['localhost:27017'], database: 'pupa_test')
6
+ Pupa.session.collections.each(&:drop)
7
+
8
+ Pupa::Processor::Persistence.new(Pupa::Person.new(name: 'existing')).save
9
+
10
+ Pupa.session[:people].insert(_type: 'pupa/person', name: 'non-unique')
11
+ Pupa.session[:people].insert(_type: 'pupa/person', name: 'non-unique')
12
+ end
13
+
14
+ describe '#find' do
15
+ it 'should return nil if no matches' do
16
+ Pupa::Processor::Persistence.find(_type: 'pupa/person', name: 'nonexistent').should == nil
17
+ end
18
+
19
+ it 'should return a document if one match' do
20
+ Pupa::Processor::Persistence.find(_type: 'pupa/person', name: 'existing').should be_a(Hash)
21
+ end
22
+
23
+ it 'should raise an error if many matches' do
24
+ expect{Pupa::Processor::Persistence.find(_type: 'pupa/person', name: 'non-unique')}.to raise_error(Pupa::Errors::TooManyMatches)
25
+ end
26
+ end
27
+
28
+ describe '#save' do
29
+ it 'should insert a document if no matches' do
30
+ Pupa::Processor::Persistence.new(Pupa::Person.new(_id: 'new', name: 'new')).save.should == 'new'
31
+ end
32
+
33
+ it 'should update a document if one match' do
34
+ Pupa::Processor::Persistence.new(Pupa::Person.new(_id: 'existing', name: 'existing')).save.should_not == 'existing'
35
+ end
36
+
37
+ it 'should raise an error if many matches' do
38
+ expect{Pupa::Processor::Persistence.new(Pupa::Person.new(name: 'non-unique')).save}.to raise_error(Pupa::Errors::TooManyMatches)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,55 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Pupa::Processor::Yielder do
4
+ let :yielder do
5
+ Pupa::Processor::Yielder.new do
6
+ 10.times do |n|
7
+ Fiber.yield(n)
8
+ end
9
+ end
10
+ end
11
+
12
+ let :raiser do
13
+ Pupa::Processor::Yielder.new do
14
+ raise
15
+ end
16
+ end
17
+
18
+ describe '#each' do
19
+ it 'should iterate over the items in the enumeration' do
20
+ array = []
21
+ yielder.each do |n|
22
+ array << n
23
+ end
24
+ array.should == (0..9).to_a
25
+ end
26
+
27
+ it 'should be composable with other iterators' do
28
+ yielder.each.map{|n| n}.should == (0..9).to_a
29
+ end
30
+ end
31
+
32
+ describe '#next' do
33
+ it 'should return the next item in the enumeration' do
34
+ array = []
35
+ 10.times do |n|
36
+ array << yielder.next
37
+ end
38
+ array.should == (0..9).to_a
39
+ end
40
+
41
+ it 'should raise an error if the enumerator is at the end' do
42
+ expect{11.times{yielder.next}}.to raise_error(StopIteration)
43
+ end
44
+ end
45
+
46
+ describe '#to_enum' do
47
+ it 'should return an enumerator' do
48
+ yielder.to_enum.should be_a(Enumerator)
49
+ end
50
+
51
+ it 'should return a lazy enumerator' do
52
+ expect{raiser.to_enum}.to_not raise_error
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,268 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Pupa::Processor do
4
+ class PersonProcessor < Pupa::Processor
5
+ def person
6
+ @person ||= make_person_valid
7
+ end
8
+
9
+ def make_person_valid
10
+ @person = Pupa::Person.new(name: 'foo')
11
+ end
12
+
13
+ def make_person_invalid
14
+ @person = Pupa::Person.new
15
+ end
16
+
17
+ def scrape_people
18
+ Fiber.yield(person)
19
+ end
20
+ end
21
+
22
+ before :all do
23
+ PersonProcessor.add_scraping_task(:people)
24
+ end
25
+
26
+ let :io do
27
+ StringIO.new
28
+ end
29
+
30
+ let :processor do
31
+ PersonProcessor.new('/tmp', level: 'WARN', logdev: io)
32
+ end
33
+
34
+ describe '#get' do
35
+ it 'should send a GET request' do
36
+ processor.get('http://httpbin.org/get', 'foo=bar')['args'].should == {'foo' => 'bar'}
37
+ end
38
+
39
+ it 'should automatically parse the response' do
40
+ processor.get('http://httpbin.org/get').should be_a(Hash)
41
+ end
42
+ end
43
+
44
+ describe '#post' do
45
+ it 'should send a POST request' do
46
+ processor.post('http://httpbin.org/post', 'foo=bar')['form'].should == {'foo' => 'bar'}
47
+ end
48
+
49
+ it 'should automatically parse the response' do
50
+ processor.post('http://httpbin.org/post').should be_a(Hash)
51
+ end
52
+ end
53
+
54
+ describe '#add_scraping_task' do
55
+ it 'should add a scraping task and define a lazy method' do
56
+ PersonProcessor.tasks.should == [:people]
57
+ processor.should respond_to(:people)
58
+ end
59
+ end
60
+
61
+ describe '#dump_scraped_objects' do
62
+ let :path do
63
+ path = "/tmp/person_#{processor.person._id}.json"
64
+ end
65
+
66
+ it 'should not overwrite an existing file' do
67
+ FileUtils.touch(path)
68
+ expect{processor.dump_scraped_objects(:people)}.to raise_error(Pupa::Errors::DuplicateObjectIdError)
69
+ FileUtils.rm(path)
70
+ end
71
+
72
+ it 'should dump a JSON document' do
73
+ processor.dump_scraped_objects(:people)
74
+ File.exist?(path).should == true
75
+ io.string.should_not match('http://popoloproject.com/schemas/person.json')
76
+ end
77
+
78
+ it 'should validate the object' do
79
+ processor.make_person_invalid
80
+ processor.dump_scraped_objects(:people)
81
+ io.string.should match('http://popoloproject.com/schemas/person.json')
82
+ end
83
+ end
84
+
85
+ describe '#import' do
86
+ before :each do
87
+ Pupa.session = Moped::Session.new(['localhost:27017'], database: 'pupa_test')
88
+ Pupa.session.collections.each(&:drop)
89
+ end
90
+
91
+ let :graphable do
92
+ {
93
+ '1' => Pupa::Organization.new({
94
+ _id: '1',
95
+ name: 'Child',
96
+ parent_id: '3',
97
+ }),
98
+ '2' => Pupa::Organization.new({
99
+ _id: '2',
100
+ name: 'Parent',
101
+ }),
102
+ '3' => Pupa::Organization.new({
103
+ _id: '3',
104
+ name: 'Parent',
105
+ }),
106
+ }
107
+ end
108
+
109
+ let :ungraphable do
110
+ {
111
+ '4' => Pupa::Organization.new({
112
+ _id: '4',
113
+ name: 'Child',
114
+ parent: {_type: 'pupa/organization', name: 'Parent'},
115
+ }),
116
+ '5' => Pupa::Organization.new({
117
+ _id: '5',
118
+ name: 'Parent',
119
+ }),
120
+ '6' => Pupa::Organization.new({
121
+ _id: '6',
122
+ name: 'Parent',
123
+ }),
124
+ }
125
+ end
126
+
127
+ it 'should use a dependency graph if possible' do
128
+ processor.should_receive(:load_scraped_objects).and_return(graphable)
129
+
130
+ Pupa::Processor::DependencyGraph.any_instance.should_receive(:tsort).and_return(['2', '1'])
131
+ processor.import
132
+ end
133
+
134
+ it 'should not use a dependency graph if not possible' do
135
+ processor.should_receive(:load_scraped_objects).and_return(ungraphable)
136
+
137
+ Pupa::Processor::DependencyGraph.any_instance.should_not_receive(:tsort)
138
+ processor.import
139
+ end
140
+
141
+ it 'should remove duplicate objects and re-assign foreign keys' do
142
+ processor.should_receive(:load_scraped_objects).and_return(graphable)
143
+
144
+ processor.import
145
+ documents = Pupa.session[:organizations].find.entries
146
+ documents.size.should == 2
147
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '2', '_type' => 'pupa/organization', 'name' => 'Parent'}
148
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '1', '_type' => 'pupa/organization', 'name' => 'Child', 'parent_id' => '2'}
149
+ end
150
+
151
+ it 'should resolve foreign objects' do
152
+ processor.should_receive(:load_scraped_objects).and_return(ungraphable)
153
+
154
+ processor.import
155
+ documents = Pupa.session[:organizations].find.entries
156
+ documents.size.should == 2
157
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '5', '_type' => 'pupa/organization', 'name' => 'Parent'}
158
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '4', '_type' => 'pupa/organization', 'name' => 'Child', 'parent_id' => '5'}
159
+ end
160
+
161
+ context 'with existing documents' do
162
+ before :each do
163
+ processor.should_receive(:load_scraped_objects).and_return(graphable)
164
+ processor.import
165
+ end
166
+
167
+ let :resolvable_foreign_key do
168
+ {
169
+ 'a' => Pupa::Organization.new({
170
+ _id: 'a',
171
+ name: 'Child',
172
+ parent_id: 'c',
173
+ }),
174
+ 'b' => Pupa::Organization.new({
175
+ _id: 'b',
176
+ name: 'Parent',
177
+ }),
178
+ 'c' => Pupa::Organization.new({
179
+ _id: 'c',
180
+ name: 'Parent',
181
+ }),
182
+ }
183
+ end
184
+
185
+ let :unresolvable_foreign_key do
186
+ {
187
+ 'a' => Pupa::Organization.new({
188
+ _id: 'a',
189
+ name: 'Child',
190
+ parent: {_type: 'pupa/organization', name: 'Parent'},
191
+ }),
192
+ 'b' => Pupa::Organization.new({
193
+ _id: 'b',
194
+ name: 'Parent',
195
+ }),
196
+ 'c' => Pupa::Organization.new({
197
+ _id: 'c',
198
+ name: 'Child',
199
+ parent_id: 'nonexistent',
200
+ }),
201
+ }
202
+ end
203
+
204
+ let :unresolvable_foreign_object do
205
+ {
206
+ 'a' => Pupa::Organization.new({
207
+ _id: 'a',
208
+ name: 'Child',
209
+ parent: {_type: 'pupa/organization', name: 'Nonexistent'},
210
+ }),
211
+ 'b' => Pupa::Organization.new({
212
+ _id: 'b',
213
+ name: 'Parent',
214
+ }),
215
+ 'c' => Pupa::Organization.new({
216
+ _id: 'c',
217
+ name: 'Child',
218
+ parent_id: 'b',
219
+ }),
220
+ }
221
+ end
222
+
223
+ let :duplicate_documents do
224
+ {
225
+ 'a' => Pupa::Organization.new({
226
+ _id: 'a',
227
+ name: 'Child',
228
+ parent: {_type: 'pupa/organization', name: 'Parent'},
229
+ }),
230
+ 'b' => Pupa::Organization.new({
231
+ _id: 'b',
232
+ name: 'Parent',
233
+ }),
234
+ 'c' => Pupa::Organization.new({
235
+ _id: 'c',
236
+ name: 'Child',
237
+ parent_id: 'b',
238
+ }),
239
+ }
240
+ end
241
+
242
+ it 'should resolve foreign keys' do
243
+ processor.should_receive(:load_scraped_objects).and_return(resolvable_foreign_key)
244
+
245
+ processor.import
246
+ documents = Pupa.session[:organizations].find.entries
247
+ documents.size.should == 2
248
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '2', '_type' => 'pupa/organization', 'name' => 'Parent'}
249
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '1', '_type' => 'pupa/organization', 'name' => 'Child', 'parent_id' => '2'}
250
+ end
251
+
252
+ it 'should raise an error if a foreign key cannot be resolved' do
253
+ processor.should_receive(:load_scraped_objects).and_return(unresolvable_foreign_key)
254
+ expect{processor.import}.to raise_error(Pupa::Errors::UnprocessableEntity)
255
+ end
256
+
257
+ it 'should raise an error if a foreign object cannot be resolved' do
258
+ processor.should_receive(:load_scraped_objects).and_return(unresolvable_foreign_object)
259
+ expect{processor.import}.to raise_error(Pupa::Errors::UnprocessableEntity)
260
+ end
261
+
262
+ it 'should raise an error if a duplicate was inadvertently saved' do
263
+ processor.should_receive(:load_scraped_objects).and_return(duplicate_documents)
264
+ expect{processor.import}.to raise_error(Pupa::Errors::DuplicateDocumentError)
265
+ end
266
+ end
267
+ end
268
+ end
@@ -0,0 +1,85 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Pupa::Runner do
4
+ class TestProcessor < Pupa::Processor
5
+ def scrape_people
6
+ end
7
+
8
+ def scrape_organizations
9
+ end
10
+ end
11
+
12
+ before :all do
13
+ TestProcessor.add_scraping_task(:people)
14
+ TestProcessor.add_scraping_task(:organizations)
15
+ end
16
+
17
+ let :dry_runner do
18
+ runner = Pupa::Runner.new(TestProcessor, level: 'UNKNOWN', dry_run: true)
19
+ runner.add_action(name: 'example', description: 'An example action')
20
+ runner
21
+ end
22
+
23
+ let :runner do
24
+ Pupa::Runner.new(TestProcessor, level: 'UNKNOWN')
25
+ end
26
+
27
+ describe '#initialize' do
28
+ it 'should accept default options' do
29
+ dry_runner.options.level.should_not == 'INFO'
30
+ end
31
+ end
32
+
33
+ describe '#add_action' do
34
+ it 'should add an action' do
35
+ dry_runner.actions.last.to_h.should == {name: 'example', description: 'An example action'}
36
+ end
37
+ end
38
+
39
+ describe '#run' do
40
+ def dry_run(argv = [], **kwargs)
41
+ begin
42
+ dry_runner.run(argv, kwargs)
43
+ rescue SystemExit
44
+ # pass
45
+ end
46
+ end
47
+
48
+ it 'should accept overridden options' do
49
+ dry_run(['--quiet'], level: 'ERROR')
50
+ dry_runner.options.level.should == 'ERROR'
51
+ end
52
+
53
+ it 'should use default actions if none set' do
54
+ dry_run
55
+ dry_runner.options.actions.should == %w(scrape import)
56
+ end
57
+
58
+ it 'should use default tasks if none set' do
59
+ dry_run
60
+ dry_runner.options.tasks.should == %i(people organizations)
61
+ end
62
+
63
+ # Unlike an action, it's not possible for a task to be undefined, because
64
+ # `add_scraping_task` would raise an error first.
65
+ it 'should abort if the action is not defined' do
66
+ expect{dry_runner.run(['--action', 'example'])}.to raise_error(SystemExit, "`example` is not a rspec action. See `rspec --help` for a list of available actions.")
67
+ end
68
+
69
+ it 'should not run any actions on a dry run' do
70
+ expect{dry_runner.run([])}.to raise_error(SystemExit, nil)
71
+ end
72
+
73
+ it 'should run actions' do
74
+ TestProcessor.any_instance.should_receive(:dump_scraped_objects).twice
75
+ TestProcessor.any_instance.should_receive(:import)
76
+ runner.run([])
77
+ end
78
+
79
+ it 'should run tasks' do
80
+ TestProcessor.any_instance.should_receive(:people).and_return([])
81
+ TestProcessor.any_instance.should_receive(:organizations).and_return([])
82
+ runner.run([])
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+
3
+ require 'coveralls'
4
+ Coveralls.wear!
5
+
6
+ require 'rspec'
7
+ require 'vcr'
8
+ require File.dirname(__FILE__) + '/../lib/pupa'
9
+
10
+ VCR.configure do |c|
11
+ c.cassette_library_dir = 'spec/cassettes'
12
+ c.hook_into :faraday
13
+
14
+ c.around_http_request do |request|
15
+ VCR.use_cassette(Digest::SHA1.hexdigest(request.uri + request.body + request.headers.to_s), &request)
16
+ end
17
+ end