massive 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +22 -0
  3. data/.rspec +3 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +19 -0
  7. data/Gemfile.lock +141 -0
  8. data/Guardfile +9 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +196 -0
  11. data/Rakefile +8 -0
  12. data/lib/massive.rb +63 -0
  13. data/lib/massive/cancelling.rb +20 -0
  14. data/lib/massive/file.rb +80 -0
  15. data/lib/massive/file_job.rb +9 -0
  16. data/lib/massive/file_process.rb +7 -0
  17. data/lib/massive/file_step.rb +7 -0
  18. data/lib/massive/job.rb +115 -0
  19. data/lib/massive/locking.rb +27 -0
  20. data/lib/massive/memory_consumption.rb +15 -0
  21. data/lib/massive/notifications.rb +40 -0
  22. data/lib/massive/notifiers.rb +6 -0
  23. data/lib/massive/notifiers/base.rb +32 -0
  24. data/lib/massive/notifiers/pusher.rb +17 -0
  25. data/lib/massive/process.rb +69 -0
  26. data/lib/massive/process_serializer.rb +12 -0
  27. data/lib/massive/retry.rb +49 -0
  28. data/lib/massive/status.rb +59 -0
  29. data/lib/massive/step.rb +143 -0
  30. data/lib/massive/step_serializer.rb +12 -0
  31. data/lib/massive/timing_support.rb +10 -0
  32. data/lib/massive/version.rb +3 -0
  33. data/massive.gemspec +23 -0
  34. data/spec/fixtures/custom_job.rb +4 -0
  35. data/spec/fixtures/custom_step.rb +19 -0
  36. data/spec/models/massive/cancelling_spec.rb +83 -0
  37. data/spec/models/massive/file_job_spec.rb +24 -0
  38. data/spec/models/massive/file_spec.rb +209 -0
  39. data/spec/models/massive/file_step_spec.rb +22 -0
  40. data/spec/models/massive/job_spec.rb +319 -0
  41. data/spec/models/massive/locking_spec.rb +52 -0
  42. data/spec/models/massive/memory_consumption_spec.rb +24 -0
  43. data/spec/models/massive/notifications_spec.rb +107 -0
  44. data/spec/models/massive/notifiers/base_spec.rb +48 -0
  45. data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
  46. data/spec/models/massive/process_serializer_spec.rb +38 -0
  47. data/spec/models/massive/process_spec.rb +235 -0
  48. data/spec/models/massive/status_spec.rb +104 -0
  49. data/spec/models/massive/step_serializer_spec.rb +40 -0
  50. data/spec/models/massive/step_spec.rb +490 -0
  51. data/spec/models/massive/timing_support_spec.rb +55 -0
  52. data/spec/shared/step_context.rb +25 -0
  53. data/spec/spec_helper.rb +42 -0
  54. data/spec/support/mongoid.yml +78 -0
  55. metadata +175 -0
@@ -0,0 +1,104 @@
1
+ shared_examples_for Massive::Status do
2
+ include_context "frozen time"
3
+
4
+ context "when it has not been started" do
5
+ it { should_not be_started }
6
+ it { should_not be_completed }
7
+ it { should_not be_failed }
8
+ end
9
+
10
+ context "when it has been started" do
11
+ let(:started_at) { 1.minute.ago }
12
+ before { model.started_at = started_at }
13
+
14
+ it { should be_started }
15
+ it { should_not be_completed }
16
+ it { should_not be_failed }
17
+
18
+ context "1 minute ago" do
19
+ context "and it has not been finished yet" do
20
+ it { should be_started }
21
+ it { should_not be_completed }
22
+ it { should_not be_failed }
23
+ end
24
+
25
+ context "and it has been finished 10 seconds ago" do
26
+ let(:finished_at) { 10.seconds.ago }
27
+ before { model.finished_at = finished_at }
28
+
29
+ it { should be_started }
30
+ it { should be_completed }
31
+ it { should_not be_failed }
32
+
33
+ context "and it has failed" do
34
+ let(:failed_at) { 1.minute.ago }
35
+ before { model.failed_at = failed_at }
36
+
37
+ it { should_not be_started }
38
+ it { should_not be_completed }
39
+ it { should be_failed }
40
+ end
41
+ end
42
+ end
43
+
44
+ context "and it has failed" do
45
+ let(:failed_at) { 1.minute.ago }
46
+ before { model.failed_at = failed_at }
47
+
48
+ it { should_not be_started }
49
+ it { should_not be_completed }
50
+ it { should be_failed }
51
+ end
52
+ end
53
+
54
+ describe "#start!" do
55
+ it "updates the started_at with the current time, persisting it" do
56
+ model.start!
57
+ model.reload.started_at.to_i.should eq(now.to_i)
58
+ end
59
+
60
+ it "clears the finished_at, persisting it" do
61
+ model.update_attributes(finished_at: now)
62
+ model.start!
63
+ model.reload.finished_at.should be_nil
64
+ end
65
+
66
+ it "clears the failed_at, persisting it" do
67
+ model.update_attributes(failed_at: now)
68
+ model.start!
69
+ model.reload.failed_at.should be_nil
70
+ end
71
+
72
+ it "zeroes the number of retries, persisting it" do
73
+ model.start!
74
+ model.reload.retries.should be_zero
75
+ end
76
+ end
77
+
78
+ describe "#enqueued?" do
79
+ context "when model is enqueued" do
80
+ before { model.enqueue }
81
+
82
+ its(:enqueued?) { should be_true }
83
+ end
84
+
85
+ context "when model is not enqueued" do
86
+ its(:enqueued?) { should be_false }
87
+ end
88
+ end
89
+ end
90
+
91
+ describe Massive::Step do
92
+ let(:process) { Massive::Process.new }
93
+ subject(:model) { process.steps.build }
94
+
95
+ it_should_behave_like Massive::Status
96
+ end
97
+
98
+ describe Massive::Job do
99
+ let(:process) { Massive::Process.new }
100
+ let(:step) { process.steps.build }
101
+ subject(:model) { step.jobs.build }
102
+
103
+ it_should_behave_like Massive::Status
104
+ end
@@ -0,0 +1,40 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::StepSerializer do
4
+ let(:step) { Massive::Step.new }
5
+ subject(:serialized) { described_class.new(step).as_json(root: false) }
6
+
7
+ it "serializes the id as a string" do
8
+ serialized[:id].should eq(step.id.to_s)
9
+ end
10
+
11
+ [ :created_at, :updated_at, :started_at, :finished_at, :failed_at ].each do |field|
12
+ it "serializes the #{field}" do
13
+ step[field] = 1.minute.ago
14
+ serialized[field].should eq(step[field])
15
+ end
16
+ end
17
+
18
+ it "serializes the last_error" do
19
+ step.last_error = "Some error"
20
+ serialized[:last_error].should eq(step.last_error)
21
+ end
22
+
23
+ it "serializes notifier_id" do
24
+ serialized[:notifier_id].should eq(step.notifier_id)
25
+ end
26
+
27
+ [ :retries, :memory_consumption, :total_count ].each do |field|
28
+ it "serializes the #{field}" do
29
+ step[field] = 100
30
+ serialized[field].should eq(step[field])
31
+ end
32
+ end
33
+
34
+ [ :processed, :processed_percentage, :processing_time, :elapsed_time ].each do |field|
35
+ it "serializes the #{field}" do
36
+ step.stub(field).and_return(100)
37
+ serialized[field].should eq(step.send(field))
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,490 @@
1
+ require "spec_helper"
2
+
3
+ describe Massive::Step do
4
+ include_context "frozen time"
5
+ include_context "stubbed memory_consumption"
6
+
7
+ let(:process) { Massive::Process.new }
8
+ subject(:step) { process.steps.build }
9
+
10
+ describe ".perform" do
11
+ before do
12
+ Massive::Process.stub(:find_step).with(process.id, step.id).and_return(step)
13
+ end
14
+
15
+ it "finds the step and calls work on it" do
16
+ step.should_receive(:work)
17
+ Massive::Step.perform(process.id, step.id)
18
+ end
19
+ end
20
+
21
+ describe ".queue" do
22
+ it "should be massive_step" do
23
+ Massive::Step.queue.should eq(:massive_step)
24
+ end
25
+ end
26
+
27
+ describe ".calculate_total_count_with" do
28
+ after { Massive::Step.calculates_total_count_with { 0 } }
29
+
30
+ it "defaults to return 0" do
31
+ step.calculate_total_count.should be_zero
32
+ end
33
+
34
+ it "defines the calculate_total_count method, which returns the returned value of the given block" do
35
+ Massive::Step.calculates_total_count_with { 1234 }
36
+ Massive::Step.new.calculate_total_count.should eq(1234)
37
+ end
38
+ end
39
+
40
+ describe "#enqueue" do
41
+ it "enqueues itself, passing ids as strings" do
42
+ Resque.should_receive(:enqueue).with(step.class, step.process.id.to_s, step.id.to_s)
43
+ step.enqueue
44
+ end
45
+
46
+ context "when a subclass redefines calculate_total_count" do
47
+ subject(:step) { CustomStep.new }
48
+ before { process.steps << step }
49
+
50
+ it "enqueues itself, passing ids as strings" do
51
+ Resque.should_receive(:enqueue).with(step.class, step.process.id.to_s, step.id.to_s)
52
+ step.enqueue
53
+ end
54
+ end
55
+ end
56
+
57
+ describe "#start!" do
58
+ it "persists the total_count" do
59
+ step.start!
60
+ step.reload.total_count.should be_present
61
+ end
62
+
63
+ it "sends a :start notification" do
64
+ step.should_receive(:notify).with(:start)
65
+ step.start!
66
+ end
67
+
68
+ context "when total_count is not defined" do
69
+ it "updates it to zero" do
70
+ step.start!
71
+ step.total_count.should be_zero
72
+ end
73
+ end
74
+
75
+ context "when total_count is defined" do
76
+ before { step.total_count = 10 }
77
+
78
+ it "does not change it" do
79
+ expect { step.start! }.to_not change(step, :total_count)
80
+ end
81
+ end
82
+
83
+ context "when a subclass redefines calculate_total_count" do
84
+ subject(:step) { CustomStep.new }
85
+ before { process.steps << step }
86
+
87
+ context "and the total_count is not defined" do
88
+ it "updates it to the return value of calculate_total_count" do
89
+ step.start!
90
+ step.total_count.should eq(step.send(:calculate_total_count))
91
+ end
92
+ end
93
+
94
+ context "when total_count is defined" do
95
+ context "and it is 0" do
96
+ before { step.total_count = 0 }
97
+
98
+ it "does not change it" do
99
+ expect { step.work }.to_not change(step, :total_count)
100
+ end
101
+ end
102
+
103
+ context "and it is 10" do
104
+ before { step.total_count = 10 }
105
+
106
+ it "does not change it" do
107
+ expect { step.work }.to_not change(step, :total_count)
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ describe "#work" do
115
+ it "starts the step, then process it" do
116
+ step.should_receive(:start!) do
117
+ step.should_receive(:process_step)
118
+ end
119
+
120
+ step.work
121
+ end
122
+
123
+ it "calls complete after processing step" do
124
+ step.should_receive(:process_step) do
125
+ step.should_receive(:complete)
126
+ end
127
+
128
+ step.work
129
+ end
130
+ end
131
+
132
+ describe "jobs completion" do
133
+ context "when it is not persisted" do
134
+ it "does not reloads itself" do
135
+ step.should_not_receive(:reload)
136
+ step.completed_all_jobs?
137
+ end
138
+ end
139
+
140
+ context "when it is persisted" do
141
+ before { step.save }
142
+
143
+ it "reloads itself, so that it can get the latest information" do
144
+ step.should_receive(:reload).and_return(step)
145
+ step.completed_all_jobs?
146
+ end
147
+ end
148
+
149
+ context "when there are no jobs" do
150
+ it { should be_completed_all_jobs }
151
+ end
152
+
153
+ context "when there are jobs" do
154
+ let!(:jobs) { step.jobs = 3.times.map { |i| Massive::Job.new } }
155
+
156
+ before do
157
+ jobs.each { |job| job.stub(:completed?).and_return(true) }
158
+ end
159
+
160
+ context "but there is at least one that is not completed" do
161
+ before do
162
+ jobs.each { |job| job.stub(:completed?).and_return(true) }
163
+
164
+ jobs.last.stub(:completed?).and_return(false)
165
+ end
166
+
167
+ it { should_not be_completed_all_jobs }
168
+ end
169
+
170
+ context "and all jobs are completed" do
171
+ before do
172
+ jobs.each { |job| job.stub(:completed?).and_return(true) }
173
+ end
174
+
175
+ it { should be_completed_all_jobs }
176
+ end
177
+ end
178
+ end
179
+
180
+ describe "#complete" do
181
+ context "when there is at least one job that is not completed" do
182
+ before { step.stub(:completed_all_jobs?).and_return(false) }
183
+
184
+ it "does not updates the finished_at" do
185
+ step.complete
186
+ step.finished_at.should be_nil
187
+ end
188
+
189
+ it "does not updates the memory_consumption" do
190
+ step.complete
191
+ step.memory_consumption.should be_zero
192
+ end
193
+
194
+ it "does not persists the step" do
195
+ step.should_not be_persisted
196
+ end
197
+
198
+ it "does not send a :complete notification" do
199
+ step.should_not_receive(:notify).with(:complete)
200
+ step.complete
201
+ end
202
+
203
+ context "when it should not execute next after completion" do
204
+ it "does not enqueues next step of process" do
205
+ process.should_not_receive(:enqueue_next)
206
+ step.complete
207
+ end
208
+ end
209
+
210
+ context "when it should execute next after completion" do
211
+ before { step.execute_next = true }
212
+
213
+ it "does not enqueues next step of process" do
214
+ process.should_not_receive(:enqueue_next)
215
+ step.complete
216
+ end
217
+ end
218
+ end
219
+
220
+ context "when all jobs are completed" do
221
+ let(:lock_key) { step.send(:lock_key_for, :complete) }
222
+
223
+ let(:redis) { Resque.redis }
224
+
225
+ before { step.stub(:completed_all_jobs?).and_return(true) }
226
+
227
+ context "but there is a complete lock for this step" do
228
+ before do
229
+ redis.set(lock_key, 1.minute.from_now)
230
+ end
231
+
232
+ it "does not updates the finished_at" do
233
+ step.complete
234
+ step.finished_at.should be_nil
235
+ end
236
+
237
+ it "does not updates the memory_consumption" do
238
+ step.complete
239
+ step.memory_consumption.should be_zero
240
+ end
241
+
242
+ it "does not persists the step" do
243
+ step.should_not be_persisted
244
+ end
245
+
246
+ it "does not send a :complete notification" do
247
+ step.should_not_receive(:notify).with(:complete)
248
+ step.complete
249
+ end
250
+
251
+ context "when it should not execute next after completion" do
252
+ it "does not enqueues next step of process" do
253
+ process.should_not_receive(:enqueue_next)
254
+ step.complete
255
+ end
256
+ end
257
+
258
+ context "when it should execute next after completion" do
259
+ before { step.execute_next = true }
260
+
261
+ it "does not enqueues next step of process" do
262
+ process.should_not_receive(:enqueue_next)
263
+ step.complete
264
+ end
265
+ end
266
+ end
267
+
268
+ context "but there is no complete lock for this step" do
269
+ it "updates the finished_at with the current time, persisting it" do
270
+ step.complete
271
+ step.reload.finished_at.to_i.should eq(now.to_i)
272
+ end
273
+
274
+ it "updates the memory_consumption, persisting it" do
275
+ step.complete
276
+ step.reload.memory_consumption.should eq(current_memory_consumption)
277
+ end
278
+
279
+ it "sends a :complete notification" do
280
+ step.should_receive(:notify).with(:complete)
281
+ step.complete
282
+ end
283
+
284
+ context "when it should not execute next after completion" do
285
+ it "does not enqueues next step of process" do
286
+ process.should_not_receive(:enqueue_next)
287
+ step.complete
288
+ end
289
+ end
290
+
291
+ context "when it should execute next after completion" do
292
+ before { step.execute_next = true }
293
+
294
+ it "enqueues next step of process" do
295
+ process.should_receive(:enqueue_next)
296
+ step.complete
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ context "#process_step" do
304
+ context "when total_count is zero" do
305
+ before { step.total_count = 0 }
306
+
307
+ it "creates no jobs" do
308
+ step.process_step
309
+ step.jobs.should be_empty
310
+ end
311
+ end
312
+
313
+ context "when total_count is 2000" do
314
+ before { step.total_count = 2000 }
315
+
316
+ let(:limit) { 100 }
317
+
318
+ it "creates 20 jobs, each processing 100 items" do
319
+ step.process_step
320
+ step.jobs.each_with_index do |job, index|
321
+ job.limit.should eq(limit)
322
+ job.offset.should eq(index * limit)
323
+ end
324
+ end
325
+
326
+ it "creates jobs of the Massive::Job class" do
327
+ step.process_step
328
+ step.jobs.each do |job|
329
+ job.should be_an_instance_of(Massive::Job)
330
+ end
331
+ end
332
+
333
+ context "on custom step class" do
334
+ subject(:step) { CustomStep.new }
335
+ before { process.steps << step }
336
+ let(:limit) { 1000 }
337
+
338
+ it "follows redefined limit_ratio, creating 2 jobs, each processing 1000 items" do
339
+ step.process_step
340
+ step.jobs.each_with_index do |job, index|
341
+ job.limit.should eq(limit)
342
+ job.offset.should eq(index * limit)
343
+ end
344
+ end
345
+
346
+ it "creates jobs of the redefined job_class" do
347
+ step.process_step
348
+ step.jobs.each do |job|
349
+ job.should be_an_instance_of(CustomJob)
350
+ end
351
+ end
352
+ end
353
+
354
+ context "on a inherited step, that didn't redefine any configuration" do
355
+ subject(:step) { InheritedStep.new }
356
+ before { process.steps << step }
357
+
358
+ it "follows redefined limit_ratio, creating 2 jobs, each processing 1000 items" do
359
+ step.process_step
360
+ step.jobs.each_with_index do |job, index|
361
+ job.limit.should eq(limit)
362
+ job.offset.should eq(index * limit)
363
+ end
364
+ end
365
+
366
+ it "creates jobs of the Massive::Job" do
367
+ step.process_step
368
+ step.jobs.each do |job|
369
+ job.should be_an_instance_of(Massive::Job)
370
+ end
371
+ end
372
+ end
373
+ end
374
+
375
+ context "when total_count is 3000" do
376
+ before { step.total_count = 3000 }
377
+
378
+ let(:limit) { 1000 }
379
+
380
+ it "creates 3 jobs, each processing 1000 items" do
381
+ step.process_step
382
+ step.jobs.each_with_index do |job, index|
383
+ job.limit.should eq(limit)
384
+ job.offset.should eq(index * limit)
385
+ end
386
+ end
387
+
388
+ context "on custom step class" do
389
+ subject(:step) { CustomStep.new }
390
+ before { process.steps << step }
391
+ let(:limit) { 1500 }
392
+
393
+ it "follows redefined limit_ratio, creating 2 jobs, each processing 1000 items" do
394
+ step.process_step
395
+ step.jobs.each_with_index do |job, index|
396
+ job.limit.should eq(limit)
397
+ job.offset.should eq(index * limit)
398
+ end
399
+ end
400
+
401
+ it "creates jobs of the redefined job_class" do
402
+ step.process_step
403
+ step.jobs.each do |job|
404
+ job.should be_an_instance_of(CustomJob)
405
+ end
406
+ end
407
+ end
408
+
409
+ context "on a inherited step, that didn't redefine any configuration" do
410
+ subject(:step) { InheritedStep.new }
411
+ before { process.steps << step }
412
+
413
+ it "follows redefined limit_ratio, creating 2 jobs, each processing 1000 items" do
414
+ step.process_step
415
+ step.jobs.each_with_index do |job, index|
416
+ job.limit.should eq(limit)
417
+ job.offset.should eq(index * limit)
418
+ end
419
+ end
420
+
421
+ it "creates jobs of the Massive::Job" do
422
+ step.process_step
423
+ step.jobs.each do |job|
424
+ job.should be_an_instance_of(Massive::Job)
425
+ end
426
+ end
427
+ end
428
+ end
429
+ end
430
+
431
+ describe "processed items and time" do
432
+ context "when the step has no jobs" do
433
+ its(:processed) { should be_zero }
434
+ its(:processed_percentage) { should be_zero }
435
+ its(:processing_time) { should be_zero }
436
+ end
437
+
438
+ context "when the step has jobs with processed itens" do
439
+ let!(:jobs) { step.jobs = 3.times.map { |i| Massive::Job.new(processed: 100 * i) } }
440
+ let(:total_processed) { jobs.map(&:processed).sum }
441
+
442
+ its(:processed) { should eq(total_processed) }
443
+
444
+ context "and the total count is zero" do
445
+ its(:processed_percentage) { should be_zero }
446
+ end
447
+
448
+ context "and the total count is greater than zero" do
449
+ before { step.total_count = 1000 }
450
+
451
+ its(:processed_percentage) { should eq(total_processed.to_f / step.total_count) }
452
+ end
453
+ end
454
+
455
+ context "when the step has jobs that have some elapsed time" do
456
+ let!(:jobs) do
457
+ step.jobs = 3.times.map do |i|
458
+ Massive::Job.new.tap { |j| j.stub(:elapsed_time).and_return(100 * i) }
459
+ end
460
+ end
461
+
462
+ let(:total_elapsed_time) { jobs.map(&:elapsed_time).sum }
463
+
464
+ its(:processing_time) { should eq(total_elapsed_time) }
465
+ end
466
+ end
467
+
468
+ context "on a inherited step" do
469
+ subject(:step) { InheritedStep.new }
470
+ before { process.steps << step }
471
+
472
+ it "properly sets the _type" do
473
+ step._type.should be_present
474
+ end
475
+ end
476
+
477
+ describe "#active_model_serializer" do
478
+ its(:active_model_serializer) { should eq Massive::StepSerializer }
479
+
480
+ context "when class inherits from Massive::Step and does not have a serializer" do
481
+ class TestStep < Massive::Step
482
+ end
483
+
484
+ it "returns Massive::StepSerializer" do
485
+ process = TestStep.new
486
+ process.active_model_serializer.should eq Massive::StepSerializer
487
+ end
488
+ end
489
+ end
490
+ end