massive 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +22 -0
  3. data/.rspec +3 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +19 -0
  7. data/Gemfile.lock +141 -0
  8. data/Guardfile +9 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +196 -0
  11. data/Rakefile +8 -0
  12. data/lib/massive.rb +63 -0
  13. data/lib/massive/cancelling.rb +20 -0
  14. data/lib/massive/file.rb +80 -0
  15. data/lib/massive/file_job.rb +9 -0
  16. data/lib/massive/file_process.rb +7 -0
  17. data/lib/massive/file_step.rb +7 -0
  18. data/lib/massive/job.rb +115 -0
  19. data/lib/massive/locking.rb +27 -0
  20. data/lib/massive/memory_consumption.rb +15 -0
  21. data/lib/massive/notifications.rb +40 -0
  22. data/lib/massive/notifiers.rb +6 -0
  23. data/lib/massive/notifiers/base.rb +32 -0
  24. data/lib/massive/notifiers/pusher.rb +17 -0
  25. data/lib/massive/process.rb +69 -0
  26. data/lib/massive/process_serializer.rb +12 -0
  27. data/lib/massive/retry.rb +49 -0
  28. data/lib/massive/status.rb +59 -0
  29. data/lib/massive/step.rb +143 -0
  30. data/lib/massive/step_serializer.rb +12 -0
  31. data/lib/massive/timing_support.rb +10 -0
  32. data/lib/massive/version.rb +3 -0
  33. data/massive.gemspec +23 -0
  34. data/spec/fixtures/custom_job.rb +4 -0
  35. data/spec/fixtures/custom_step.rb +19 -0
  36. data/spec/models/massive/cancelling_spec.rb +83 -0
  37. data/spec/models/massive/file_job_spec.rb +24 -0
  38. data/spec/models/massive/file_spec.rb +209 -0
  39. data/spec/models/massive/file_step_spec.rb +22 -0
  40. data/spec/models/massive/job_spec.rb +319 -0
  41. data/spec/models/massive/locking_spec.rb +52 -0
  42. data/spec/models/massive/memory_consumption_spec.rb +24 -0
  43. data/spec/models/massive/notifications_spec.rb +107 -0
  44. data/spec/models/massive/notifiers/base_spec.rb +48 -0
  45. data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
  46. data/spec/models/massive/process_serializer_spec.rb +38 -0
  47. data/spec/models/massive/process_spec.rb +235 -0
  48. data/spec/models/massive/status_spec.rb +104 -0
  49. data/spec/models/massive/step_serializer_spec.rb +40 -0
  50. data/spec/models/massive/step_spec.rb +490 -0
  51. data/spec/models/massive/timing_support_spec.rb +55 -0
  52. data/spec/shared/step_context.rb +25 -0
  53. data/spec/spec_helper.rb +42 -0
  54. data/spec/support/mongoid.yml +78 -0
  55. metadata +175 -0
@@ -0,0 +1,24 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::FileJob do
4
+ let(:process) { Massive::FileProcess.new file_attributes: { url: 'http://someurl.com' } }
5
+ let(:step) { Massive::FileStep.new process: process }
6
+ let(:job) { Massive::FileJob.new step: step, offset: 1000, limit: 300 }
7
+
8
+ it "delegates file to step" do
9
+ step.file.should eq(step.file)
10
+ end
11
+
12
+ describe "when running through each item" do
13
+ let(:file) { process.file }
14
+ let(:block) { Proc.new { } }
15
+
16
+ it "yields the process range of the file processor, with its offset and limit" do
17
+ file.stub_chain(:processor, :process_range)
18
+ .with({ offset: job.offset, limit: job.limit })
19
+ .and_yield(block)
20
+
21
+ expect { |block| job.each_item(&block) }.to yield_control(&block)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,209 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::File do
4
+ let(:processor) { double(FileProcessor::CSV) }
5
+ let(:process) { Massive::FileProcess.new }
6
+
7
+ let(:url) { 'http://someurl.com' }
8
+ let(:encoding) { nil }
9
+ let(:col_sep) { nil }
10
+
11
+ let(:expected_options) do
12
+ {
13
+ headers: true,
14
+ encoding: encoding,
15
+ col_sep: col_sep
16
+ }
17
+ end
18
+
19
+ subject(:file) { process.file = Massive::File.new(url: url, encoding: encoding, col_sep: col_sep) }
20
+
21
+ before { FileProcessor::CSV.stub(:new).with(file.url, expected_options).and_return(processor) }
22
+
23
+ describe "#processor" do
24
+ it "creates a new instance of the CSV file processor, enabling headers but without encoding and separator" do
25
+ file.processor.should eq(processor)
26
+ end
27
+
28
+ context "when encoding and col_sep are defined" do
29
+ let(:encoding) { 'iso-8859-1' }
30
+ let(:col_sep) { ';' }
31
+
32
+ it "creates a new instance of the CSV file processor, passing encoding and col_sep" do
33
+ file.processor.should eq(processor)
34
+ end
35
+ end
36
+
37
+ describe "when specifying that the file should have no headers" do
38
+ subject(:file) { process.file = Massive::File.new(url: url, encoding: encoding, col_sep: col_sep, use_headers: false) }
39
+
40
+ let(:expected_options) do
41
+ {
42
+ headers: false,
43
+ encoding: encoding,
44
+ col_sep: col_sep
45
+ }
46
+ end
47
+
48
+ it "creates a new instance of the CSV file processor, passing encoding and col_sep" do
49
+ file.processor.should eq(processor)
50
+ end
51
+ end
52
+
53
+ describe "when using Fog" do
54
+ let(:filename) { 'my-file.txt' }
55
+ let(:fog_connection) { double(Fog::Storage) }
56
+ let(:fog_directory) { double('Directory') }
57
+ let(:fog_file) { double('File') }
58
+ let(:authenticated_url) { 'http://my-auth.url.com' }
59
+
60
+ subject(:file) { Massive::File.new(filename: filename, encoding: encoding, col_sep: col_sep) }
61
+
62
+ before do
63
+ Massive.fog_credentials = { provider: 'AWS', aws_access_key_id: 'some-key', aws_secret_access_key: 'some-secret' }
64
+ Massive.fog_authenticated_url_expiration = 1.hour
65
+ Massive.fog_directory = 'my-bucket'
66
+
67
+ Fog::Storage.stub(:new).with(Massive.fog_credentials).and_return(fog_connection)
68
+ fog_connection.stub_chain(:directories, :get).with(Massive.fog_directory).and_return(fog_directory)
69
+ fog_directory.stub_chain(:files, :get).with(filename).and_return(fog_file)
70
+ fog_file.stub(:url).with(Time.current.to_i + Massive.fog_authenticated_url_expiration).and_return(authenticated_url)
71
+ end
72
+
73
+ it "creates a new instance of the CSV file processor, pointing its URL to the authenticated fog url" do
74
+ FileProcessor::CSV.should_receive(:new).with(authenticated_url, expected_options).and_return(processor)
75
+ file.processor.should eq(processor)
76
+ end
77
+ end
78
+ end
79
+
80
+ describe "#gather_info!" do
81
+ let(:detected_encoding) { 'iso-8859-1' }
82
+ let(:detected_col_sep) { ';' }
83
+ let(:total_count) { 1000 }
84
+ let(:headers) { ['some header', 'other header' ] }
85
+
86
+ let(:processor) do
87
+ double(FileProcessor::CSV, {
88
+ detected_encoding: detected_encoding,
89
+ col_sep: detected_col_sep,
90
+ total_count: total_count,
91
+ shift: true,
92
+ headers: headers
93
+ })
94
+ end
95
+
96
+ let(:row) do
97
+ double(CSV::Row, fields: ['some value', 'other value'])
98
+ end
99
+
100
+ before do
101
+ processor.stub(:process_range)
102
+ .with(limit: 3)
103
+ .and_yield(row)
104
+ .and_yield(row)
105
+ .and_yield(row)
106
+ end
107
+
108
+ it "detects the file encoding, and persists it" do
109
+ file.gather_info!
110
+ file.reload.encoding.should eq(detected_encoding)
111
+ end
112
+
113
+ it "detects the column separator, and persists it" do
114
+ file.gather_info!
115
+ file.reload.col_sep.should eq(detected_col_sep)
116
+ end
117
+
118
+ it "stores the total count, and persists it" do
119
+ file.gather_info!
120
+ file.reload.total_count.should eq(total_count)
121
+ end
122
+
123
+ it "stores the headers, and persists it" do
124
+ file.gather_info!
125
+ file.reload.headers.should eq(headers)
126
+ end
127
+
128
+ it "stores a sample data with 3 rows data, and persists it" do
129
+ file.gather_info!
130
+ file.reload.sample_data.should eq([row.fields, row.fields, row.fields])
131
+ end
132
+
133
+ context "when file has no headers" do
134
+ subject(:file) { process.file = Massive::File.new(url: url, encoding: encoding, col_sep: col_sep, use_headers: false) }
135
+
136
+ let(:expected_options) do
137
+ {
138
+ headers: false,
139
+ encoding: encoding,
140
+ col_sep: col_sep
141
+ }
142
+ end
143
+
144
+ let(:processor) do
145
+ double(FileProcessor::CSV, {
146
+ detected_encoding: encoding,
147
+ col_sep: col_sep,
148
+ total_count: 3,
149
+ shift: true
150
+ })
151
+ end
152
+
153
+ let(:row) { ['some value', 'other value'] }
154
+
155
+ before do
156
+ processor.stub(:process_range)
157
+ .with(limit: 3)
158
+ .and_yield(row)
159
+ .and_yield(row)
160
+ .and_yield(row)
161
+ end
162
+
163
+ it "do not store the headers" do
164
+ file.gather_info!
165
+ file.reload.headers.should be_blank
166
+ end
167
+
168
+ it "store raw row in the sample data" do
169
+ file.gather_info!
170
+ file.reload.sample_data.should eq [row, row, row]
171
+ end
172
+ end
173
+
174
+ context "when file already has gathered info" do
175
+ before do
176
+ file.encoding = 'utf-8'
177
+ file.col_sep = '|'
178
+ file.total_count = 3000
179
+ file.headers = ['some other headers']
180
+ file.sample_data = [['some other values']]
181
+ end
182
+
183
+ it "detects the file encoding, and persists it" do
184
+ file.gather_info!
185
+ file.reload.encoding.should eq(detected_encoding)
186
+ end
187
+
188
+ it "detects the column separator, and persists it" do
189
+ file.gather_info!
190
+ file.reload.col_sep.should eq(detected_col_sep)
191
+ end
192
+
193
+ it "stores the total count, and persists it" do
194
+ file.gather_info!
195
+ file.reload.total_count.should eq(total_count)
196
+ end
197
+
198
+ it "stores the headers, and persists it" do
199
+ file.gather_info!
200
+ file.reload.headers.should eq(headers)
201
+ end
202
+
203
+ it "stores a sample data with 3 rows data, and persists it" do
204
+ file.gather_info!
205
+ file.reload.sample_data.should eq([row.fields, row.fields, row.fields])
206
+ end
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,22 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::FileStep do
4
+ let(:process) { Massive::FileProcess.new file_attributes: { url: 'http://someurl.com' } }
5
+ subject(:step) { Massive::FileStep.new process: process }
6
+
7
+ it "delegates file to process" do
8
+ step.file.should eq(process.file)
9
+ end
10
+
11
+ context "when it is started!" do
12
+ let(:file) { process.file }
13
+ let(:total_count) { 1234 }
14
+
15
+ before { file.total_count = total_count }
16
+
17
+ it "calculates the total count, using the processor total count" do
18
+ step.start!
19
+ step.total_count.should eq(total_count)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,319 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::Job do
4
+ include_context "frozen time"
5
+ include_context "stubbed memory_consumption"
6
+
7
+ let(:process) { Massive::Process.new }
8
+ let(:step) { process.steps.build }
9
+ subject(:job) { step.jobs.build }
10
+
11
+ describe ".perform" do
12
+ before do
13
+ Massive::Process.stub(:find_job).with(process.id, step.id, job.id).and_return(job)
14
+ end
15
+
16
+ it "finds the job and calls work on it" do
17
+ job.should_receive(:work)
18
+ Massive::Job.perform(process.id, step.id, job.id)
19
+ end
20
+ end
21
+
22
+ describe ".queue" do
23
+ it "should be massive_job" do
24
+ Massive::Job.queue.should eq(:massive_job)
25
+ end
26
+ end
27
+
28
+ describe "#enqueue" do
29
+ it "enqueues itself, passing ids as strings" do
30
+ Resque.should_receive(:enqueue).with(job.class, process.id.to_s, step.id.to_s, job.id.to_s)
31
+ job.enqueue
32
+ end
33
+
34
+ context "when a subclass redefines calculate_total_count" do
35
+ subject(:job) { CustomJob.new }
36
+ before { step.jobs << job }
37
+
38
+ it "enqueues itself, passing ids as strings" do
39
+ Resque.should_receive(:enqueue).with(job.class, process.id.to_s, step.id.to_s, job.id.to_s)
40
+ job.enqueue
41
+ end
42
+ end
43
+ end
44
+
45
+ describe "when creating" do
46
+ it "enqueues the job" do
47
+ job.should_receive(:enqueue)
48
+ job.save
49
+ end
50
+ end
51
+
52
+ describe "#start!" do
53
+ it "zeroes the processed items, persisting it" do
54
+ job.start!
55
+ job.reload.processed.should be_zero
56
+ end
57
+ end
58
+
59
+ describe "#finish!" do
60
+ it "updates the finished_at with the current time, persisting it" do
61
+ job.finish!
62
+ job.reload.finished_at.to_i.should eq(now.to_i)
63
+ end
64
+
65
+ it "updates the memory_consumption, persisting it" do
66
+ job.finish!
67
+ job.reload.memory_consumption.should eq(current_memory_consumption)
68
+ end
69
+
70
+ it "calls step#complete" do
71
+ step.should_receive(:complete)
72
+ job.finish!
73
+ end
74
+ end
75
+
76
+ describe "#work" do
77
+ it "starts the job, then runs through each item, and finally finishes the job" do
78
+ job.should_receive(:start!) do
79
+ job.should_receive(:each_item) do
80
+ job.should_receive(:finish!)
81
+ end
82
+ end
83
+
84
+ job.work
85
+ end
86
+
87
+ context "when it process one item" do
88
+ include_context "job processing"
89
+
90
+ it "increments the number of processed items by one" do
91
+ job.work
92
+ job.reload.processed.should eq(1)
93
+ end
94
+
95
+ it "process the item" do
96
+ job.should_receive(:process_each).with(item, 0).once
97
+ job.work
98
+ end
99
+ end
100
+
101
+ context "when it process multiple itens" do
102
+ include_context "job processing"
103
+
104
+ before do
105
+ job.stub(:each_item).and_yield(item, index)
106
+ .and_yield(item, index + 1)
107
+ .and_yield(item, index + 2)
108
+ end
109
+
110
+ it "increments the number of processed items by the number of items processed" do
111
+ job.work
112
+ job.reload.processed.should eq(3)
113
+ end
114
+
115
+ it "process each one of the items" do
116
+ job.should_receive(:process_each).with(item, 0).once
117
+ job.should_receive(:process_each).with(item, 1).once
118
+ job.should_receive(:process_each).with(item, 2).once
119
+ job.work
120
+ end
121
+
122
+ it "sends a :progress notification" do
123
+ step.stub(:notify)
124
+ step.should_receive(:notify).with(:progress)
125
+ job.work
126
+ end
127
+ end
128
+
129
+ context "when it is cancelled" do
130
+ before { step.stub(:notify) }
131
+
132
+ context "before it is started" do
133
+ before { process.stub(:cancelled?).and_return(true) }
134
+
135
+ it "sends a cancelled notification" do
136
+ step.should_receive(:notify).with(:cancelled)
137
+ job.work
138
+ end
139
+
140
+ it "sets the step cancelled_at" do
141
+ job.work
142
+ step.reload.should be_cancelled_at
143
+ end
144
+
145
+ it "sets the job cancelled_at" do
146
+ job.work
147
+ job.reload.should be_cancelled_at
148
+ end
149
+ end
150
+
151
+ context "while it is processing" do
152
+ let(:item) { double(:item) }
153
+ let(:index) { 0 }
154
+
155
+ before do
156
+ job.stub(:each_item).and_yield(item, index)
157
+ .and_yield(item, index + 1)
158
+ .and_yield(item, index + 2)
159
+
160
+ job.stub(:process_each) do
161
+ process.stub(:cancelled?).and_return(true)
162
+ end
163
+
164
+ Kernel.stub(:sleep)
165
+ end
166
+
167
+ it "sends a cancelled notification" do
168
+ step.should_receive(:notify).with(:cancelled)
169
+ job.work
170
+ end
171
+
172
+ it "sets the step cancelled_at" do
173
+ job.work
174
+ step.reload.should be_cancelled_at
175
+ end
176
+
177
+ it "sets the job cancelled_at" do
178
+ job.work
179
+ job.reload.should be_cancelled_at
180
+ end
181
+
182
+ it "does not retry the processing" do
183
+ Kernel.should_not_receive(:sleep)
184
+ job.work
185
+ job.reload.retries.should be_zero
186
+ end
187
+ end
188
+ end
189
+
190
+ shared_examples_for "handles error" do
191
+ it "re-raises the exception" do
192
+ expect { job.work }.to raise_error(error)
193
+ end
194
+
195
+ it "sets the step as failed" do
196
+ begin
197
+ job.work
198
+ rescue StandardError, SignalException
199
+ end
200
+
201
+ step.reload.should be_failed
202
+ end
203
+
204
+ it "saves the last error" do
205
+ begin
206
+ job.work
207
+ rescue StandardError, SignalException
208
+ end
209
+
210
+ job.reload.last_error.should eq(error.message)
211
+ end
212
+
213
+ it "sends a :failed notification" do
214
+ step.stub(:notify)
215
+ step.should_receive(:notify).with(:failed)
216
+
217
+ begin
218
+ job.work
219
+ rescue StandardError, SignalException
220
+ end
221
+ end
222
+ end
223
+
224
+ context "when an error occurs" do
225
+ let(:error) { StandardError.new('some-error') }
226
+
227
+ context "while starting" do
228
+ before { job.stub(:start!).and_raise(error) }
229
+
230
+ it_should_behave_like "handles error"
231
+ end
232
+
233
+ context "while running through each item" do
234
+ before { job.stub(:each_item).and_raise(error) }
235
+
236
+ it_should_behave_like "handles error"
237
+ end
238
+
239
+ context "while processing each item" do
240
+ include_context "job processing"
241
+
242
+ before { job.stub(:process_each).and_raise(error) }
243
+
244
+ it_should_behave_like "handles error"
245
+
246
+ it "retries 10 times, with a 2 second interval" do
247
+ Kernel.should_receive(:sleep).with(retry_interval).exactly(maximum_retries - 1).times
248
+ job.should_receive(:process_each).exactly(maximum_retries).times.and_raise(error)
249
+ expect { job.work }.to raise_error(error)
250
+ job.reload.retries.should eq(maximum_retries)
251
+ end
252
+
253
+ context "when a subclass redefines the retry interval and maximum retries" do
254
+ subject(:job) { CustomJob.new }
255
+ before { step.jobs << job }
256
+
257
+ it "retries 20 times, with a 5 second interval" do
258
+ Kernel.should_receive(:sleep).with(retry_interval).exactly(maximum_retries - 1).times
259
+ job.should_receive(:process_each).exactly(maximum_retries).times.and_raise(error)
260
+ expect { job.work }.to raise_error(error)
261
+ job.reload.retries.should eq(maximum_retries)
262
+ end
263
+ end
264
+ end
265
+
266
+ context "while finishing" do
267
+ before { job.stub(:finish!).and_raise(error) }
268
+
269
+ it_should_behave_like "handles error"
270
+ end
271
+ end
272
+
273
+ context "when a system signal is sent" do
274
+ let(:error) { SignalException.new('TERM') }
275
+
276
+ context "while starting" do
277
+ before { job.stub(:start!).and_raise(error) }
278
+
279
+ it_should_behave_like "handles error"
280
+ end
281
+
282
+ context "while running through each item" do
283
+ before { job.stub(:each_item).and_raise(error) }
284
+
285
+ it_should_behave_like "handles error"
286
+ end
287
+
288
+ context "while processing each item" do
289
+ include_context "job processing"
290
+
291
+ before { job.stub(:process_each).and_raise(error) }
292
+
293
+ it_should_behave_like "handles error"
294
+
295
+ it "does not retry the processing, raising error immediately" do
296
+ Kernel.should_not_receive(:sleep)
297
+ job.should_receive(:process_each).once.and_raise(error)
298
+ expect { job.work }.to raise_error(error)
299
+ job.reload.retries.should be_zero
300
+ end
301
+ end
302
+
303
+ context "while finishing" do
304
+ before { job.stub(:finish!).and_raise(error) }
305
+
306
+ it_should_behave_like "handles error"
307
+ end
308
+ end
309
+ end
310
+
311
+ context "on a subclass" do
312
+ subject(:job) { CustomJob.new }
313
+ before { step.jobs << job }
314
+
315
+ it "properly sets the _type" do
316
+ job._type.should be_present
317
+ end
318
+ end
319
+ end