massive 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +22 -0
  3. data/.rspec +3 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +19 -0
  7. data/Gemfile.lock +141 -0
  8. data/Guardfile +9 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +196 -0
  11. data/Rakefile +8 -0
  12. data/lib/massive.rb +63 -0
  13. data/lib/massive/cancelling.rb +20 -0
  14. data/lib/massive/file.rb +80 -0
  15. data/lib/massive/file_job.rb +9 -0
  16. data/lib/massive/file_process.rb +7 -0
  17. data/lib/massive/file_step.rb +7 -0
  18. data/lib/massive/job.rb +115 -0
  19. data/lib/massive/locking.rb +27 -0
  20. data/lib/massive/memory_consumption.rb +15 -0
  21. data/lib/massive/notifications.rb +40 -0
  22. data/lib/massive/notifiers.rb +6 -0
  23. data/lib/massive/notifiers/base.rb +32 -0
  24. data/lib/massive/notifiers/pusher.rb +17 -0
  25. data/lib/massive/process.rb +69 -0
  26. data/lib/massive/process_serializer.rb +12 -0
  27. data/lib/massive/retry.rb +49 -0
  28. data/lib/massive/status.rb +59 -0
  29. data/lib/massive/step.rb +143 -0
  30. data/lib/massive/step_serializer.rb +12 -0
  31. data/lib/massive/timing_support.rb +10 -0
  32. data/lib/massive/version.rb +3 -0
  33. data/massive.gemspec +23 -0
  34. data/spec/fixtures/custom_job.rb +4 -0
  35. data/spec/fixtures/custom_step.rb +19 -0
  36. data/spec/models/massive/cancelling_spec.rb +83 -0
  37. data/spec/models/massive/file_job_spec.rb +24 -0
  38. data/spec/models/massive/file_spec.rb +209 -0
  39. data/spec/models/massive/file_step_spec.rb +22 -0
  40. data/spec/models/massive/job_spec.rb +319 -0
  41. data/spec/models/massive/locking_spec.rb +52 -0
  42. data/spec/models/massive/memory_consumption_spec.rb +24 -0
  43. data/spec/models/massive/notifications_spec.rb +107 -0
  44. data/spec/models/massive/notifiers/base_spec.rb +48 -0
  45. data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
  46. data/spec/models/massive/process_serializer_spec.rb +38 -0
  47. data/spec/models/massive/process_spec.rb +235 -0
  48. data/spec/models/massive/status_spec.rb +104 -0
  49. data/spec/models/massive/step_serializer_spec.rb +40 -0
  50. data/spec/models/massive/step_spec.rb +490 -0
  51. data/spec/models/massive/timing_support_spec.rb +55 -0
  52. data/spec/shared/step_context.rb +25 -0
  53. data/spec/spec_helper.rb +42 -0
  54. data/spec/support/mongoid.yml +78 -0
  55. metadata +175 -0
@@ -0,0 +1,24 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::FileJob do
4
+ let(:process) { Massive::FileProcess.new file_attributes: { url: 'http://someurl.com' } }
5
+ let(:step) { Massive::FileStep.new process: process }
6
+ let(:job) { Massive::FileJob.new step: step, offset: 1000, limit: 300 }
7
+
8
+ it "delegates file to step" do
9
+ step.file.should eq(step.file)
10
+ end
11
+
12
+ describe "when running through each item" do
13
+ let(:file) { process.file }
14
+ let(:block) { Proc.new { } }
15
+
16
+ it "yields the process range of the file processor, with its offset and limit" do
17
+ file.stub_chain(:processor, :process_range)
18
+ .with({ offset: job.offset, limit: job.limit })
19
+ .and_yield(block)
20
+
21
+ expect { |block| job.each_item(&block) }.to yield_control(&block)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,209 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::File do
4
+ let(:processor) { double(FileProcessor::CSV) }
5
+ let(:process) { Massive::FileProcess.new }
6
+
7
+ let(:url) { 'http://someurl.com' }
8
+ let(:encoding) { nil }
9
+ let(:col_sep) { nil }
10
+
11
+ let(:expected_options) do
12
+ {
13
+ headers: true,
14
+ encoding: encoding,
15
+ col_sep: col_sep
16
+ }
17
+ end
18
+
19
+ subject(:file) { process.file = Massive::File.new(url: url, encoding: encoding, col_sep: col_sep) }
20
+
21
+ before { FileProcessor::CSV.stub(:new).with(file.url, expected_options).and_return(processor) }
22
+
23
+ describe "#processor" do
24
+ it "creates a new instance of the CSV file processor, enabling headers but without encoding and separator" do
25
+ file.processor.should eq(processor)
26
+ end
27
+
28
+ context "when encoding and col_sep are defined" do
29
+ let(:encoding) { 'iso-8859-1' }
30
+ let(:col_sep) { ';' }
31
+
32
+ it "creates a new instance of the CSV file processor, passing encoding and col_sep" do
33
+ file.processor.should eq(processor)
34
+ end
35
+ end
36
+
37
+ describe "when specifying that the file should have no headers" do
38
+ subject(:file) { process.file = Massive::File.new(url: url, encoding: encoding, col_sep: col_sep, use_headers: false) }
39
+
40
+ let(:expected_options) do
41
+ {
42
+ headers: false,
43
+ encoding: encoding,
44
+ col_sep: col_sep
45
+ }
46
+ end
47
+
48
+ it "creates a new instance of the CSV file processor, passing encoding and col_sep" do
49
+ file.processor.should eq(processor)
50
+ end
51
+ end
52
+
53
+ describe "when using Fog" do
54
+ let(:filename) { 'my-file.txt' }
55
+ let(:fog_connection) { double(Fog::Storage) }
56
+ let(:fog_directory) { double('Directory') }
57
+ let(:fog_file) { double('File') }
58
+ let(:authenticated_url) { 'http://my-auth.url.com' }
59
+
60
+ subject(:file) { Massive::File.new(filename: filename, encoding: encoding, col_sep: col_sep) }
61
+
62
+ before do
63
+ Massive.fog_credentials = { provider: 'AWS', aws_access_key_id: 'some-key', aws_secret_access_key: 'some-secret' }
64
+ Massive.fog_authenticated_url_expiration = 1.hour
65
+ Massive.fog_directory = 'my-bucket'
66
+
67
+ Fog::Storage.stub(:new).with(Massive.fog_credentials).and_return(fog_connection)
68
+ fog_connection.stub_chain(:directories, :get).with(Massive.fog_directory).and_return(fog_directory)
69
+ fog_directory.stub_chain(:files, :get).with(filename).and_return(fog_file)
70
+ fog_file.stub(:url).with(Time.current.to_i + Massive.fog_authenticated_url_expiration).and_return(authenticated_url)
71
+ end
72
+
73
+ it "creates a new instance of the CSV file processor, pointing its URL to the authenticated fog url" do
74
+ FileProcessor::CSV.should_receive(:new).with(authenticated_url, expected_options).and_return(processor)
75
+ file.processor.should eq(processor)
76
+ end
77
+ end
78
+ end
79
+
80
+ describe "#gather_info!" do
81
+ let(:detected_encoding) { 'iso-8859-1' }
82
+ let(:detected_col_sep) { ';' }
83
+ let(:total_count) { 1000 }
84
+ let(:headers) { ['some header', 'other header' ] }
85
+
86
+ let(:processor) do
87
+ double(FileProcessor::CSV, {
88
+ detected_encoding: detected_encoding,
89
+ col_sep: detected_col_sep,
90
+ total_count: total_count,
91
+ shift: true,
92
+ headers: headers
93
+ })
94
+ end
95
+
96
+ let(:row) do
97
+ double(CSV::Row, fields: ['some value', 'other value'])
98
+ end
99
+
100
+ before do
101
+ processor.stub(:process_range)
102
+ .with(limit: 3)
103
+ .and_yield(row)
104
+ .and_yield(row)
105
+ .and_yield(row)
106
+ end
107
+
108
+ it "detects the file encoding, and persists it" do
109
+ file.gather_info!
110
+ file.reload.encoding.should eq(detected_encoding)
111
+ end
112
+
113
+ it "detects the column separator, and persists it" do
114
+ file.gather_info!
115
+ file.reload.col_sep.should eq(detected_col_sep)
116
+ end
117
+
118
+ it "stores the total count, and persists it" do
119
+ file.gather_info!
120
+ file.reload.total_count.should eq(total_count)
121
+ end
122
+
123
+ it "stores the headers, and persists it" do
124
+ file.gather_info!
125
+ file.reload.headers.should eq(headers)
126
+ end
127
+
128
+ it "stores a sample data with 3 rows data, and persists it" do
129
+ file.gather_info!
130
+ file.reload.sample_data.should eq([row.fields, row.fields, row.fields])
131
+ end
132
+
133
+ context "when file has no headers" do
134
+ subject(:file) { process.file = Massive::File.new(url: url, encoding: encoding, col_sep: col_sep, use_headers: false) }
135
+
136
+ let(:expected_options) do
137
+ {
138
+ headers: false,
139
+ encoding: encoding,
140
+ col_sep: col_sep
141
+ }
142
+ end
143
+
144
+ let(:processor) do
145
+ double(FileProcessor::CSV, {
146
+ detected_encoding: encoding,
147
+ col_sep: col_sep,
148
+ total_count: 3,
149
+ shift: true
150
+ })
151
+ end
152
+
153
+ let(:row) { ['some value', 'other value'] }
154
+
155
+ before do
156
+ processor.stub(:process_range)
157
+ .with(limit: 3)
158
+ .and_yield(row)
159
+ .and_yield(row)
160
+ .and_yield(row)
161
+ end
162
+
163
+ it "do not store the headers" do
164
+ file.gather_info!
165
+ file.reload.headers.should be_blank
166
+ end
167
+
168
+ it "store raw row in the sample data" do
169
+ file.gather_info!
170
+ file.reload.sample_data.should eq [row, row, row]
171
+ end
172
+ end
173
+
174
+ context "when file already has gathered info" do
175
+ before do
176
+ file.encoding = 'utf-8'
177
+ file.col_sep = '|'
178
+ file.total_count = 3000
179
+ file.headers = ['some other headers']
180
+ file.sample_data = [['some other values']]
181
+ end
182
+
183
+ it "detects the file encoding, and persists it" do
184
+ file.gather_info!
185
+ file.reload.encoding.should eq(detected_encoding)
186
+ end
187
+
188
+ it "detects the column separator, and persists it" do
189
+ file.gather_info!
190
+ file.reload.col_sep.should eq(detected_col_sep)
191
+ end
192
+
193
+ it "stores the total count, and persists it" do
194
+ file.gather_info!
195
+ file.reload.total_count.should eq(total_count)
196
+ end
197
+
198
+ it "stores the headers, and persists it" do
199
+ file.gather_info!
200
+ file.reload.headers.should eq(headers)
201
+ end
202
+
203
+ it "stores a sample data with 3 rows data, and persists it" do
204
+ file.gather_info!
205
+ file.reload.sample_data.should eq([row.fields, row.fields, row.fields])
206
+ end
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,22 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::FileStep do
4
+ let(:process) { Massive::FileProcess.new file_attributes: { url: 'http://someurl.com' } }
5
+ subject(:step) { Massive::FileStep.new process: process }
6
+
7
+ it "delegates file to process" do
8
+ step.file.should eq(process.file)
9
+ end
10
+
11
+ context "when it is started!" do
12
+ let(:file) { process.file }
13
+ let(:total_count) { 1234 }
14
+
15
+ before { file.total_count = total_count }
16
+
17
+ it "calculates the total count, using the processor total count" do
18
+ step.start!
19
+ step.total_count.should eq(total_count)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,319 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::Job do
4
+ include_context "frozen time"
5
+ include_context "stubbed memory_consumption"
6
+
7
+ let(:process) { Massive::Process.new }
8
+ let(:step) { process.steps.build }
9
+ subject(:job) { step.jobs.build }
10
+
11
+ describe ".perform" do
12
+ before do
13
+ Massive::Process.stub(:find_job).with(process.id, step.id, job.id).and_return(job)
14
+ end
15
+
16
+ it "finds the job and calls work on it" do
17
+ job.should_receive(:work)
18
+ Massive::Job.perform(process.id, step.id, job.id)
19
+ end
20
+ end
21
+
22
+ describe ".queue" do
23
+ it "should be massive_job" do
24
+ Massive::Job.queue.should eq(:massive_job)
25
+ end
26
+ end
27
+
28
+ describe "#enqueue" do
29
+ it "enqueues itself, passing ids as strings" do
30
+ Resque.should_receive(:enqueue).with(job.class, process.id.to_s, step.id.to_s, job.id.to_s)
31
+ job.enqueue
32
+ end
33
+
34
+ context "when a subclass redefines calculate_total_count" do
35
+ subject(:job) { CustomJob.new }
36
+ before { step.jobs << job }
37
+
38
+ it "enqueues itself, passing ids as strings" do
39
+ Resque.should_receive(:enqueue).with(job.class, process.id.to_s, step.id.to_s, job.id.to_s)
40
+ job.enqueue
41
+ end
42
+ end
43
+ end
44
+
45
+ describe "when creating" do
46
+ it "enqueues the job" do
47
+ job.should_receive(:enqueue)
48
+ job.save
49
+ end
50
+ end
51
+
52
+ describe "#start!" do
53
+ it "zeroes the processed items, persisting it" do
54
+ job.start!
55
+ job.reload.processed.should be_zero
56
+ end
57
+ end
58
+
59
+ describe "#finish!" do
60
+ it "updates the finished_at with the current time, persisting it" do
61
+ job.finish!
62
+ job.reload.finished_at.to_i.should eq(now.to_i)
63
+ end
64
+
65
+ it "updates the memory_consumption, persisting it" do
66
+ job.finish!
67
+ job.reload.memory_consumption.should eq(current_memory_consumption)
68
+ end
69
+
70
+ it "calls step#complete" do
71
+ step.should_receive(:complete)
72
+ job.finish!
73
+ end
74
+ end
75
+
76
+ describe "#work" do
77
+ it "starts the job, then runs through each item, and finally finishes the job" do
78
+ job.should_receive(:start!) do
79
+ job.should_receive(:each_item) do
80
+ job.should_receive(:finish!)
81
+ end
82
+ end
83
+
84
+ job.work
85
+ end
86
+
87
+ context "when it process one item" do
88
+ include_context "job processing"
89
+
90
+ it "increments the number of processed items by one" do
91
+ job.work
92
+ job.reload.processed.should eq(1)
93
+ end
94
+
95
+ it "process the item" do
96
+ job.should_receive(:process_each).with(item, 0).once
97
+ job.work
98
+ end
99
+ end
100
+
101
+ context "when it process multiple itens" do
102
+ include_context "job processing"
103
+
104
+ before do
105
+ job.stub(:each_item).and_yield(item, index)
106
+ .and_yield(item, index + 1)
107
+ .and_yield(item, index + 2)
108
+ end
109
+
110
+ it "increments the number of processed items by the number of items processed" do
111
+ job.work
112
+ job.reload.processed.should eq(3)
113
+ end
114
+
115
+ it "process each one of the items" do
116
+ job.should_receive(:process_each).with(item, 0).once
117
+ job.should_receive(:process_each).with(item, 1).once
118
+ job.should_receive(:process_each).with(item, 2).once
119
+ job.work
120
+ end
121
+
122
+ it "sends a :progress notification" do
123
+ step.stub(:notify)
124
+ step.should_receive(:notify).with(:progress)
125
+ job.work
126
+ end
127
+ end
128
+
129
+ context "when it is cancelled" do
130
+ before { step.stub(:notify) }
131
+
132
+ context "before it is started" do
133
+ before { process.stub(:cancelled?).and_return(true) }
134
+
135
+ it "sends a cancelled notification" do
136
+ step.should_receive(:notify).with(:cancelled)
137
+ job.work
138
+ end
139
+
140
+ it "sets the step cancelled_at" do
141
+ job.work
142
+ step.reload.should be_cancelled_at
143
+ end
144
+
145
+ it "sets the job cancelled_at" do
146
+ job.work
147
+ job.reload.should be_cancelled_at
148
+ end
149
+ end
150
+
151
+ context "while it is processing" do
152
+ let(:item) { double(:item) }
153
+ let(:index) { 0 }
154
+
155
+ before do
156
+ job.stub(:each_item).and_yield(item, index)
157
+ .and_yield(item, index + 1)
158
+ .and_yield(item, index + 2)
159
+
160
+ job.stub(:process_each) do
161
+ process.stub(:cancelled?).and_return(true)
162
+ end
163
+
164
+ Kernel.stub(:sleep)
165
+ end
166
+
167
+ it "sends a cancelled notification" do
168
+ step.should_receive(:notify).with(:cancelled)
169
+ job.work
170
+ end
171
+
172
+ it "sets the step cancelled_at" do
173
+ job.work
174
+ step.reload.should be_cancelled_at
175
+ end
176
+
177
+ it "sets the job cancelled_at" do
178
+ job.work
179
+ job.reload.should be_cancelled_at
180
+ end
181
+
182
+ it "does not retry the processing" do
183
+ Kernel.should_not_receive(:sleep)
184
+ job.work
185
+ job.reload.retries.should be_zero
186
+ end
187
+ end
188
+ end
189
+
190
+ shared_examples_for "handles error" do
191
+ it "re-raises the exception" do
192
+ expect { job.work }.to raise_error(error)
193
+ end
194
+
195
+ it "sets the step as failed" do
196
+ begin
197
+ job.work
198
+ rescue StandardError, SignalException
199
+ end
200
+
201
+ step.reload.should be_failed
202
+ end
203
+
204
+ it "saves the last error" do
205
+ begin
206
+ job.work
207
+ rescue StandardError, SignalException
208
+ end
209
+
210
+ job.reload.last_error.should eq(error.message)
211
+ end
212
+
213
+ it "sends a :failed notification" do
214
+ step.stub(:notify)
215
+ step.should_receive(:notify).with(:failed)
216
+
217
+ begin
218
+ job.work
219
+ rescue StandardError, SignalException
220
+ end
221
+ end
222
+ end
223
+
224
+ context "when an error occurs" do
225
+ let(:error) { StandardError.new('some-error') }
226
+
227
+ context "while starting" do
228
+ before { job.stub(:start!).and_raise(error) }
229
+
230
+ it_should_behave_like "handles error"
231
+ end
232
+
233
+ context "while running through each item" do
234
+ before { job.stub(:each_item).and_raise(error) }
235
+
236
+ it_should_behave_like "handles error"
237
+ end
238
+
239
+ context "while processing each item" do
240
+ include_context "job processing"
241
+
242
+ before { job.stub(:process_each).and_raise(error) }
243
+
244
+ it_should_behave_like "handles error"
245
+
246
+ it "retries 10 times, with a 2 second interval" do
247
+ Kernel.should_receive(:sleep).with(retry_interval).exactly(maximum_retries - 1).times
248
+ job.should_receive(:process_each).exactly(maximum_retries).times.and_raise(error)
249
+ expect { job.work }.to raise_error(error)
250
+ job.reload.retries.should eq(maximum_retries)
251
+ end
252
+
253
+ context "when a subclass redefines the retry interval and maximum retries" do
254
+ subject(:job) { CustomJob.new }
255
+ before { step.jobs << job }
256
+
257
+ it "retries 20 times, with a 5 second interval" do
258
+ Kernel.should_receive(:sleep).with(retry_interval).exactly(maximum_retries - 1).times
259
+ job.should_receive(:process_each).exactly(maximum_retries).times.and_raise(error)
260
+ expect { job.work }.to raise_error(error)
261
+ job.reload.retries.should eq(maximum_retries)
262
+ end
263
+ end
264
+ end
265
+
266
+ context "while finishing" do
267
+ before { job.stub(:finish!).and_raise(error) }
268
+
269
+ it_should_behave_like "handles error"
270
+ end
271
+ end
272
+
273
+ context "when a system signal is sent" do
274
+ let(:error) { SignalException.new('TERM') }
275
+
276
+ context "while starting" do
277
+ before { job.stub(:start!).and_raise(error) }
278
+
279
+ it_should_behave_like "handles error"
280
+ end
281
+
282
+ context "while running through each item" do
283
+ before { job.stub(:each_item).and_raise(error) }
284
+
285
+ it_should_behave_like "handles error"
286
+ end
287
+
288
+ context "while processing each item" do
289
+ include_context "job processing"
290
+
291
+ before { job.stub(:process_each).and_raise(error) }
292
+
293
+ it_should_behave_like "handles error"
294
+
295
+ it "does not retry the processing, raising error immediately" do
296
+ Kernel.should_not_receive(:sleep)
297
+ job.should_receive(:process_each).once.and_raise(error)
298
+ expect { job.work }.to raise_error(error)
299
+ job.reload.retries.should be_zero
300
+ end
301
+ end
302
+
303
+ context "while finishing" do
304
+ before { job.stub(:finish!).and_raise(error) }
305
+
306
+ it_should_behave_like "handles error"
307
+ end
308
+ end
309
+ end
310
+
311
+ context "on a subclass" do
312
+ subject(:job) { CustomJob.new }
313
+ before { step.jobs << job }
314
+
315
+ it "properly sets the _type" do
316
+ job._type.should be_present
317
+ end
318
+ end
319
+ end