RubyGems - massive - Versions diffs - 0.1.0 - Mend

massive 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +15 -0
data/.gitignore +22 -0
data/.rspec +3 -0
data/.rvmrc +1 -0
data/.travis.yml +7 -0
data/Gemfile +19 -0
data/Gemfile.lock +141 -0
data/Guardfile +9 -0
data/LICENSE.txt +22 -0
data/README.md +196 -0
data/Rakefile +8 -0
data/lib/massive.rb +63 -0
data/lib/massive/cancelling.rb +20 -0
data/lib/massive/file.rb +80 -0
data/lib/massive/file_job.rb +9 -0
data/lib/massive/file_process.rb +7 -0
data/lib/massive/file_step.rb +7 -0
data/lib/massive/job.rb +115 -0
data/lib/massive/locking.rb +27 -0
data/lib/massive/memory_consumption.rb +15 -0
data/lib/massive/notifications.rb +40 -0
data/lib/massive/notifiers.rb +6 -0
data/lib/massive/notifiers/base.rb +32 -0
data/lib/massive/notifiers/pusher.rb +17 -0
data/lib/massive/process.rb +69 -0
data/lib/massive/process_serializer.rb +12 -0
data/lib/massive/retry.rb +49 -0
data/lib/massive/status.rb +59 -0
data/lib/massive/step.rb +143 -0
data/lib/massive/step_serializer.rb +12 -0
data/lib/massive/timing_support.rb +10 -0
data/lib/massive/version.rb +3 -0
data/massive.gemspec +23 -0
data/spec/fixtures/custom_job.rb +4 -0
data/spec/fixtures/custom_step.rb +19 -0
data/spec/models/massive/cancelling_spec.rb +83 -0
data/spec/models/massive/file_job_spec.rb +24 -0
data/spec/models/massive/file_spec.rb +209 -0
data/spec/models/massive/file_step_spec.rb +22 -0
data/spec/models/massive/job_spec.rb +319 -0
data/spec/models/massive/locking_spec.rb +52 -0
data/spec/models/massive/memory_consumption_spec.rb +24 -0
data/spec/models/massive/notifications_spec.rb +107 -0
data/spec/models/massive/notifiers/base_spec.rb +48 -0
data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
data/spec/models/massive/process_serializer_spec.rb +38 -0
data/spec/models/massive/process_spec.rb +235 -0
data/spec/models/massive/status_spec.rb +104 -0
data/spec/models/massive/step_serializer_spec.rb +40 -0
data/spec/models/massive/step_spec.rb +490 -0
data/spec/models/massive/timing_support_spec.rb +55 -0
data/spec/shared/step_context.rb +25 -0
data/spec/spec_helper.rb +42 -0
data/spec/support/mongoid.yml +78 -0
metadata +175 -0

data/spec/models/massive/file_job_spec.rb ADDED

@@ -0,0 +1,24 @@
+require "spec_helper"
+describe Massive::FileJob do
+  let(:process)  { Massive::FileProcess.new file_attributes: { url: 'http://someurl.com' } }
+  let(:step)     { Massive::FileStep.new process: process }
+  let(:job)      { Massive::FileJob.new step: step, offset: 1000, limit: 300 }
+  it "delegates file to step" do
+    step.file.should eq(step.file)
+  end
+  describe "when running through each item" do
+    let(:file) { process.file }
+    let(:block) { Proc.new { } }
+    it "yields the process range of the file processor, with its offset and limit" do
+      file.stub_chain(:processor, :process_range)
+          .with({ offset: job.offset, limit: job.limit })
+          .and_yield(block)
+      expect { |block| job.each_item(&block) }.to yield_control(&block)
+    end
+  end
+end

data/spec/models/massive/file_spec.rb ADDED

@@ -0,0 +1,209 @@
+require "spec_helper"
+describe Massive::File do
+  let(:processor) { double(FileProcessor::CSV) }
+  let(:process)   { Massive::FileProcess.new }
+  let(:url)       { 'http://someurl.com' }
+  let(:encoding)  { nil }
+  let(:col_sep)   { nil }
+  let(:expected_options) do
+    {
+      headers: true,
+      encoding: encoding,
+      col_sep: col_sep
+    }
+  end
+  subject(:file)  { process.file = Massive::File.new(url: url, encoding: encoding, col_sep: col_sep) }
+  before { FileProcessor::CSV.stub(:new).with(file.url, expected_options).and_return(processor) }
+  describe "#processor" do
+    it "creates a new instance of the CSV file processor, enabling headers but without encoding and separator" do
+      file.processor.should eq(processor)
+    end
+    context "when encoding and col_sep are defined" do
+      let(:encoding)  { 'iso-8859-1' }
+      let(:col_sep)   { ';' }
+      it "creates a new instance of the CSV file processor, passing encoding and col_sep" do
+        file.processor.should eq(processor)
+      end
+    end
+    describe "when specifying that the file should have no headers" do
+      subject(:file)  { process.file = Massive::File.new(url: url, encoding: encoding, col_sep: col_sep, use_headers: false) }
+      let(:expected_options) do
+        {
+          headers: false,
+          encoding: encoding,
+          col_sep: col_sep
+        }
+      end
+      it "creates a new instance of the CSV file processor, passing encoding and col_sep" do
+        file.processor.should eq(processor)
+      end
+    end
+    describe "when using Fog" do
+      let(:filename) { 'my-file.txt' }
+      let(:fog_connection) { double(Fog::Storage) }
+      let(:fog_directory) { double('Directory') }
+      let(:fog_file) { double('File') }
+      let(:authenticated_url) { 'http://my-auth.url.com' }
+      subject(:file)  { Massive::File.new(filename: filename, encoding: encoding, col_sep: col_sep) }
+      before do
+        Massive.fog_credentials = { provider: 'AWS', aws_access_key_id: 'some-key', aws_secret_access_key: 'some-secret' }
+        Massive.fog_authenticated_url_expiration = 1.hour
+        Massive.fog_directory = 'my-bucket'
+        Fog::Storage.stub(:new).with(Massive.fog_credentials).and_return(fog_connection)
+        fog_connection.stub_chain(:directories, :get).with(Massive.fog_directory).and_return(fog_directory)
+        fog_directory.stub_chain(:files, :get).with(filename).and_return(fog_file)
+        fog_file.stub(:url).with(Time.current.to_i + Massive.fog_authenticated_url_expiration).and_return(authenticated_url)
+      end
+      it "creates a new instance of the CSV file processor, pointing its URL to the authenticated fog url" do
+        FileProcessor::CSV.should_receive(:new).with(authenticated_url, expected_options).and_return(processor)
+        file.processor.should eq(processor)
+      end
+    end
+  end
+  describe "#gather_info!" do
+    let(:detected_encoding) { 'iso-8859-1' }
+    let(:detected_col_sep)  { ';' }
+    let(:total_count)       { 1000 }
+    let(:headers)           { ['some header', 'other header' ] }
+    let(:processor) do
+      double(FileProcessor::CSV, {
+        detected_encoding: detected_encoding,
+        col_sep:           detected_col_sep,
+        total_count:       total_count,
+        shift:             true,
+        headers:           headers
+      })
+    end
+    let(:row) do
+      double(CSV::Row, fields: ['some value', 'other value'])
+    end
+    before do
+      processor.stub(:process_range)
+               .with(limit: 3)
+               .and_yield(row)
+               .and_yield(row)
+               .and_yield(row)
+    end
+    it "detects the file encoding, and persists it" do
+      file.gather_info!
+      file.reload.encoding.should eq(detected_encoding)
+    end
+    it "detects the column separator, and persists it" do
+      file.gather_info!
+      file.reload.col_sep.should eq(detected_col_sep)
+    end
+    it "stores the total count, and persists it" do
+      file.gather_info!
+      file.reload.total_count.should eq(total_count)
+    end
+    it "stores the headers, and persists it" do
+      file.gather_info!
+      file.reload.headers.should eq(headers)
+    end
+    it "stores a sample data with 3 rows data, and persists it" do
+      file.gather_info!
+      file.reload.sample_data.should eq([row.fields, row.fields, row.fields])
+    end
+    context "when file has no headers" do
+      subject(:file) { process.file = Massive::File.new(url: url, encoding: encoding, col_sep: col_sep, use_headers: false) }
+      let(:expected_options) do
+        {
+          headers: false,
+          encoding: encoding,
+          col_sep: col_sep
+        }
+      end
+      let(:processor) do
+        double(FileProcessor::CSV, {
+          detected_encoding: encoding,
+          col_sep:           col_sep,
+          total_count:       3,
+          shift:             true
+        })
+      end
+      let(:row) { ['some value', 'other value'] }
+      before do
+        processor.stub(:process_range)
+                 .with(limit: 3)
+                 .and_yield(row)
+                 .and_yield(row)
+                 .and_yield(row)
+      end
+      it "do not store the headers" do
+        file.gather_info!
+        file.reload.headers.should be_blank
+      end
+      it "store raw row in the sample data" do
+        file.gather_info!
+        file.reload.sample_data.should eq [row, row, row]
+      end
+    end
+    context "when file already has gathered info" do
+      before do
+        file.encoding = 'utf-8'
+        file.col_sep = '|'
+        file.total_count = 3000
+        file.headers = ['some other headers']
+        file.sample_data = [['some other values']]
+      end
+      it "detects the file encoding, and persists it" do
+        file.gather_info!
+        file.reload.encoding.should eq(detected_encoding)
+      end
+      it "detects the column separator, and persists it" do
+        file.gather_info!
+        file.reload.col_sep.should eq(detected_col_sep)
+      end
+      it "stores the total count, and persists it" do
+        file.gather_info!
+        file.reload.total_count.should eq(total_count)
+      end
+      it "stores the headers, and persists it" do
+        file.gather_info!
+        file.reload.headers.should eq(headers)
+      end
+      it "stores a sample data with 3 rows data, and persists it" do
+        file.gather_info!
+        file.reload.sample_data.should eq([row.fields, row.fields, row.fields])
+      end
+    end
+  end
+end

data/spec/models/massive/file_step_spec.rb ADDED

@@ -0,0 +1,22 @@
+require "spec_helper"
+describe Massive::FileStep do
+  let(:process)  { Massive::FileProcess.new file_attributes: { url: 'http://someurl.com' } }
+  subject(:step) { Massive::FileStep.new process: process }
+  it "delegates file to process" do
+    step.file.should eq(process.file)
+  end
+  context "when it is started!" do
+    let(:file) { process.file }
+    let(:total_count) { 1234 }
+    before { file.total_count = total_count }
+    it "calculates the total count, using the processor total count" do
+      step.start!
+      step.total_count.should eq(total_count)
+    end
+  end
+end

data/spec/models/massive/job_spec.rb ADDED

@@ -0,0 +1,319 @@
+require "spec_helper"
+describe Massive::Job do
+  include_context "frozen time"
+  include_context "stubbed memory_consumption"
+  let(:process) { Massive::Process.new }
+  let(:step) { process.steps.build }
+  subject(:job) { step.jobs.build }
+  describe ".perform" do
+    before do
+      Massive::Process.stub(:find_job).with(process.id, step.id, job.id).and_return(job)
+    end
+    it "finds the job and calls work on it" do
+      job.should_receive(:work)
+      Massive::Job.perform(process.id, step.id, job.id)
+    end
+  end
+  describe ".queue" do
+    it "should be massive_job" do
+      Massive::Job.queue.should eq(:massive_job)
+    end
+  end
+  describe "#enqueue" do
+    it "enqueues itself, passing ids as strings" do
+      Resque.should_receive(:enqueue).with(job.class, process.id.to_s, step.id.to_s, job.id.to_s)
+      job.enqueue
+    end
+    context "when a subclass redefines calculate_total_count" do
+      subject(:job) { CustomJob.new }
+      before { step.jobs << job }
+      it "enqueues itself, passing ids as strings" do
+        Resque.should_receive(:enqueue).with(job.class, process.id.to_s, step.id.to_s, job.id.to_s)
+        job.enqueue
+      end
+    end
+  end
+  describe "when creating" do
+    it "enqueues the job" do
+      job.should_receive(:enqueue)
+      job.save
+    end
+  end
+  describe "#start!" do
+    it "zeroes the processed items, persisting it" do
+      job.start!
+      job.reload.processed.should be_zero
+    end
+  end
+  describe "#finish!" do
+    it "updates the finished_at with the current time, persisting it" do
+      job.finish!
+      job.reload.finished_at.to_i.should eq(now.to_i)
+    end
+    it "updates the memory_consumption, persisting it" do
+      job.finish!
+      job.reload.memory_consumption.should eq(current_memory_consumption)
+    end
+    it "calls step#complete" do
+      step.should_receive(:complete)
+      job.finish!
+    end
+  end
+  describe "#work" do
+    it "starts the job, then runs through each item, and finally finishes the job" do
+      job.should_receive(:start!) do
+        job.should_receive(:each_item) do
+          job.should_receive(:finish!)
+        end
+      end
+      job.work
+    end
+    context "when it process one item" do
+      include_context "job processing"
+      it "increments the number of processed items by one" do
+        job.work
+        job.reload.processed.should eq(1)
+      end
+      it "process the item" do
+        job.should_receive(:process_each).with(item, 0).once
+        job.work
+      end
+    end
+    context "when it process multiple itens" do
+      include_context "job processing"
+      before do
+        job.stub(:each_item).and_yield(item, index)
+                            .and_yield(item, index + 1)
+                            .and_yield(item, index + 2)
+      end
+      it "increments the number of processed items by the number of items processed" do
+        job.work
+        job.reload.processed.should eq(3)
+      end
+      it "process each one of the items" do
+        job.should_receive(:process_each).with(item, 0).once
+        job.should_receive(:process_each).with(item, 1).once
+        job.should_receive(:process_each).with(item, 2).once
+        job.work
+      end
+      it "sends a :progress notification" do
+        step.stub(:notify)
+        step.should_receive(:notify).with(:progress)
+        job.work
+      end
+    end
+    context "when it is cancelled" do
+      before { step.stub(:notify) }
+      context "before it is started" do
+        before { process.stub(:cancelled?).and_return(true) }
+        it "sends a cancelled notification" do
+          step.should_receive(:notify).with(:cancelled)
+          job.work
+        end
+        it "sets the step cancelled_at" do
+          job.work
+          step.reload.should be_cancelled_at
+        end
+        it "sets the job cancelled_at" do
+          job.work
+          job.reload.should be_cancelled_at
+        end
+      end
+      context "while it is processing" do
+        let(:item) { double(:item) }
+        let(:index) { 0 }
+        before do
+          job.stub(:each_item).and_yield(item, index)
+                              .and_yield(item, index + 1)
+                              .and_yield(item, index + 2)
+          job.stub(:process_each) do
+            process.stub(:cancelled?).and_return(true)
+          end
+          Kernel.stub(:sleep)
+        end
+        it "sends a cancelled notification" do
+          step.should_receive(:notify).with(:cancelled)
+          job.work
+        end
+        it "sets the step cancelled_at" do
+          job.work
+          step.reload.should be_cancelled_at
+        end
+        it "sets the job cancelled_at" do
+          job.work
+          job.reload.should be_cancelled_at
+        end
+        it "does not retry the processing" do
+          Kernel.should_not_receive(:sleep)
+          job.work
+          job.reload.retries.should be_zero
+        end
+      end
+    end
+    shared_examples_for "handles error" do
+      it "re-raises the exception" do
+        expect { job.work }.to raise_error(error)
+      end
+      it "sets the step as failed" do
+        begin
+          job.work
+        rescue StandardError, SignalException
+        end
+        step.reload.should be_failed
+      end
+      it "saves the last error" do
+        begin
+          job.work
+        rescue StandardError, SignalException
+        end
+        job.reload.last_error.should eq(error.message)
+      end
+      it "sends a :failed notification" do
+        step.stub(:notify)
+        step.should_receive(:notify).with(:failed)
+        begin
+          job.work
+        rescue StandardError, SignalException
+        end
+      end
+    end
+    context "when an error occurs" do
+      let(:error) { StandardError.new('some-error') }
+      context "while starting" do
+        before { job.stub(:start!).and_raise(error) }
+        it_should_behave_like "handles error"
+      end
+      context "while running through each item" do
+        before { job.stub(:each_item).and_raise(error) }
+        it_should_behave_like "handles error"
+      end
+      context "while processing each item" do
+        include_context "job processing"
+        before { job.stub(:process_each).and_raise(error) }
+        it_should_behave_like "handles error"
+        it "retries 10 times, with a 2 second interval" do
+          Kernel.should_receive(:sleep).with(retry_interval).exactly(maximum_retries - 1).times
+          job.should_receive(:process_each).exactly(maximum_retries).times.and_raise(error)
+          expect { job.work }.to raise_error(error)
+          job.reload.retries.should eq(maximum_retries)
+        end
+        context "when a subclass redefines the retry interval and maximum retries" do
+          subject(:job) { CustomJob.new }
+          before { step.jobs << job }
+          it "retries 20 times, with a 5 second interval" do
+            Kernel.should_receive(:sleep).with(retry_interval).exactly(maximum_retries - 1).times
+            job.should_receive(:process_each).exactly(maximum_retries).times.and_raise(error)
+            expect { job.work }.to raise_error(error)
+            job.reload.retries.should eq(maximum_retries)
+          end
+        end
+      end
+      context "while finishing" do
+        before { job.stub(:finish!).and_raise(error) }
+        it_should_behave_like "handles error"
+      end
+    end
+    context "when a system signal is sent" do
+      let(:error) { SignalException.new('TERM') }
+      context "while starting" do
+        before { job.stub(:start!).and_raise(error) }
+        it_should_behave_like "handles error"
+      end
+      context "while running through each item" do
+        before { job.stub(:each_item).and_raise(error) }
+        it_should_behave_like "handles error"
+      end
+      context "while processing each item" do
+        include_context "job processing"
+        before { job.stub(:process_each).and_raise(error) }
+        it_should_behave_like "handles error"
+        it "does not retry the processing, raising error immediately" do
+          Kernel.should_not_receive(:sleep)
+          job.should_receive(:process_each).once.and_raise(error)
+          expect { job.work }.to raise_error(error)
+          job.reload.retries.should be_zero
+        end
+      end
+      context "while finishing" do
+        before { job.stub(:finish!).and_raise(error) }
+        it_should_behave_like "handles error"
+      end
+    end
+  end
+  context "on a subclass" do
+    subject(:job) { CustomJob.new }
+    before { step.jobs << job }
+    it "properly sets the _type" do
+      job._type.should be_present
+    end
+  end
+end