bulk-processor 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -1
- data/bulk-processor.gemspec +2 -2
- data/lib/bulk_processor/back_end/active_job/process_csv_job.rb +18 -0
- data/lib/bulk_processor/back_end/active_job/split_csv_job.rb +19 -0
- data/lib/bulk_processor/back_end/active_job.rb +10 -5
- data/lib/bulk_processor/back_end/dynosaur/process_csv_task.rb +26 -0
- data/lib/bulk_processor/back_end/dynosaur/split_csv_task.rb +27 -0
- data/lib/bulk_processor/back_end/dynosaur/tasks.rb +2 -0
- data/lib/bulk_processor/back_end/dynosaur.rb +6 -4
- data/lib/bulk_processor/back_end.rb +1 -1
- data/lib/bulk_processor/config.rb +10 -2
- data/lib/bulk_processor/process_csv.rb +24 -0
- data/lib/bulk_processor/split_csv.rb +46 -0
- data/lib/bulk_processor/version.rb +1 -1
- data/lib/bulk_processor.rb +2 -4
- metadata +30 -26
- data/lib/bulk_processor/job/process_csv.rb +0 -22
- data/lib/bulk_processor/job/split_csv.rb +0 -41
- data/lib/bulk_processor/tasks.rb +0 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5d0cde0e86097416d0234ead33d2836f4e08c7a
|
4
|
+
data.tar.gz: d176ea1309c0e618ccf718fd8c4320a2b3948831
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dfea7cabd9b210116fd21644d34740793f785df799b954c79bdc452f0ec11155575465f0d9ac6bd6b4c1fc2df2a8ca96902c142c99ff15c6a5182761e970f55
|
7
|
+
data.tar.gz: 9b8dc6e9fcd908b6bf0ddc2e6e9c4a36c89ba398ccb11016ed2668c188339b953a4e8c4c58bf8c8025b7ae1b3903170871617fd8e6a808a6d68164ede85af3aa
|
data/README.md
CHANGED
@@ -30,6 +30,15 @@ Bulk processor requires the following configuration
|
|
30
30
|
|
31
31
|
#### Back end: ActiveJob
|
32
32
|
|
33
|
+
Include the `activejob` and back-end queueing gems in your Gemfile, e.g.
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
# Gemfile
|
37
|
+
gem 'activejob'
|
38
|
+
gem 'bulk-processor'
|
39
|
+
gem 'resque'
|
40
|
+
```
|
41
|
+
|
33
42
|
```ruby
|
34
43
|
BulkProcessor.back_end = :active_job
|
35
44
|
BulkProcessor.queue_adapter = <adapter>
|
@@ -41,6 +50,14 @@ including `:resque`.
|
|
41
50
|
|
42
51
|
#### Back end: Dynosaur
|
43
52
|
|
53
|
+
Include the `dynosaur` gem in your Gemfile, e.g.
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
# Gemfile
|
57
|
+
gem 'dynosaur'
|
58
|
+
gem 'resque'
|
59
|
+
```
|
60
|
+
|
44
61
|
```ruby
|
45
62
|
BulkProcessor.back_end = :dynosaur
|
46
63
|
BulkProcessor.heroku.api_key = 'my-heroku-api-key'
|
@@ -49,7 +66,12 @@ BulkProcessor.heroku.app_name = 'my-heroku-app-name'
|
|
49
66
|
|
50
67
|
```ruby
|
51
68
|
# Rakefile
|
52
|
-
require 'bulk_processor/tasks'
|
69
|
+
require 'bulk_processor/back_end/dynosaur/tasks'
|
70
|
+
|
71
|
+
# If you do not already have an :enivronment rake task, create a no-op one as
|
72
|
+
# Dynosaur tasks depend on it.
|
73
|
+
task :environment
|
74
|
+
end
|
53
75
|
```
|
54
76
|
|
55
77
|
#### AWS S3
|
data/bulk-processor.gemspec
CHANGED
@@ -21,12 +21,12 @@ success or failure report
|
|
21
21
|
spec.require_paths = ['lib']
|
22
22
|
spec.required_ruby_version = '>= 2.1'
|
23
23
|
|
24
|
-
spec.add_runtime_dependency 'activejob', '~> 4'
|
25
24
|
spec.add_runtime_dependency 'aws-sdk', '~> 2.1'
|
26
|
-
spec.add_runtime_dependency 'dynosaur', '~> 0.2.1'
|
27
25
|
spec.add_runtime_dependency 'rack', '~> 1.5'
|
28
26
|
|
27
|
+
spec.add_development_dependency 'activejob', '~> 4'
|
29
28
|
spec.add_development_dependency 'bundler'
|
29
|
+
spec.add_development_dependency 'dynosaur', '~> 0.2.1'
|
30
30
|
spec.add_development_dependency 'pry-byebug', '~> 3'
|
31
31
|
spec.add_development_dependency 'rake', '~> 10.4'
|
32
32
|
spec.add_development_dependency 'rspec', '~> 3.3'
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
module BackEnd
|
3
|
+
class ActiveJob
|
4
|
+
# ActiveJob to handle processing the CSV in the background
|
5
|
+
class ProcessCSVJob < ::ActiveJob::Base
|
6
|
+
queue_as 'bulk_processor'
|
7
|
+
|
8
|
+
def perform(processor_class, payload, key)
|
9
|
+
BulkProcessor::ProcessCSV.new(
|
10
|
+
processor_class.constantize,
|
11
|
+
PayloadSerializer.deserialize(payload),
|
12
|
+
key
|
13
|
+
).perform
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
module BackEnd
|
3
|
+
class ActiveJob
|
4
|
+
# ActiveJob to handle processing the CSV in the background
|
5
|
+
class SplitCSVJob < ::ActiveJob::Base
|
6
|
+
queue_as 'bulk_processor'
|
7
|
+
|
8
|
+
def perform(processor_class, payload, key, num_chunks)
|
9
|
+
BulkProcessor::SplitCSV.new(
|
10
|
+
processor_class.constantize,
|
11
|
+
PayloadSerializer.deserialize(payload),
|
12
|
+
key,
|
13
|
+
num_chunks
|
14
|
+
).perform
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -1,20 +1,25 @@
|
|
1
|
+
require 'active_job'
|
2
|
+
|
3
|
+
require_relative 'active_job/process_csv_job'
|
4
|
+
require_relative 'active_job/split_csv_job'
|
5
|
+
|
1
6
|
class BulkProcessor
|
2
7
|
module BackEnd
|
3
8
|
# Execute jobs via ActiveJob, e.g. Resque
|
4
9
|
class ActiveJob
|
5
10
|
def initialize(processor_class:, payload:, key:)
|
6
|
-
@processor_class = processor_class
|
7
|
-
@payload = payload
|
11
|
+
@processor_class = processor_class.name
|
12
|
+
@payload = PayloadSerializer.serialize(payload)
|
8
13
|
@key = key
|
9
14
|
end
|
10
15
|
|
11
16
|
def start
|
12
|
-
|
17
|
+
ActiveJob::ProcessCSVJob.perform_later(processor_class, payload, key)
|
13
18
|
end
|
14
19
|
|
15
20
|
def split(num_processes)
|
16
|
-
|
17
|
-
|
21
|
+
ActiveJob::SplitCSVJob.perform_later(processor_class, payload, key,
|
22
|
+
num_processes)
|
18
23
|
end
|
19
24
|
|
20
25
|
private
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
class BulkProcessor
|
4
|
+
module BackEnd
|
5
|
+
class ActiveJob
|
6
|
+
class ProcessCSVTask
|
7
|
+
include Rake::DSL
|
8
|
+
|
9
|
+
def install_task
|
10
|
+
namespace :bulk_processor do
|
11
|
+
desc 'Start processing a CSV file'
|
12
|
+
task :start, [:processor_class, :payload, :key] => :environment do |_task, args|
|
13
|
+
BulkProcessor::ProcessCSV.new(
|
14
|
+
args[:processor_class].constantize,
|
15
|
+
PayloadSerializer.deserialize(args[:payload]),
|
16
|
+
args[:key]
|
17
|
+
).perform
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
BulkProcessor::BackEnd::ActiveJob::ProcessCSVTask.new.install_task
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
class BulkProcessor
|
4
|
+
module BackEnd
|
5
|
+
class ActiveJob
|
6
|
+
class SplitCSVTask
|
7
|
+
include Rake::DSL
|
8
|
+
|
9
|
+
def install_task
|
10
|
+
namespace :bulk_processor do
|
11
|
+
desc 'Split a CSV file and process each piece'
|
12
|
+
task :split, [:processor_class, :payload, :key, :num_chunks] => :environment do |_task, args|
|
13
|
+
BulkProcessor::SplitCSV.new(
|
14
|
+
args[:processor_class].constantize,
|
15
|
+
PayloadSerializer.deserialize(args[:payload]),
|
16
|
+
args[:key],
|
17
|
+
args[:num_chunks].to_i
|
18
|
+
).perform
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
BulkProcessor::BackEnd::ActiveJob::SplitCSVTask.new.install_task
|
@@ -1,12 +1,14 @@
|
|
1
1
|
require 'dynosaur'
|
2
2
|
|
3
|
+
require_relative 'dynosaur/tasks'
|
4
|
+
|
3
5
|
class BulkProcessor
|
4
6
|
module BackEnd
|
5
7
|
# Execute jobs via rake tasks that will spawn a new Heroku dyno
|
6
8
|
class Dynosaur
|
7
9
|
def initialize(processor_class:, payload:, key:)
|
8
|
-
@processor_class = processor_class
|
9
|
-
@payload = payload
|
10
|
+
@processor_class = processor_class.name
|
11
|
+
@payload = PayloadSerializer.serialize(payload)
|
10
12
|
@key = key
|
11
13
|
configure_dynosaur
|
12
14
|
end
|
@@ -14,7 +16,7 @@ class BulkProcessor
|
|
14
16
|
def start
|
15
17
|
args = {
|
16
18
|
task: 'bulk_processor:start',
|
17
|
-
args: [processor_class
|
19
|
+
args: [processor_class, payload, key]
|
18
20
|
}
|
19
21
|
::Dynosaur::Process::Heroku.new(args).start
|
20
22
|
end
|
@@ -22,7 +24,7 @@ class BulkProcessor
|
|
22
24
|
def split(num_processes)
|
23
25
|
args = {
|
24
26
|
task: 'bulk_processor:split',
|
25
|
-
args: [processor_class
|
27
|
+
args: [processor_class, payload, key, num_processes.to_s]
|
26
28
|
}
|
27
29
|
::Dynosaur::Process::Heroku.new(args).start
|
28
30
|
end
|
@@ -4,7 +4,7 @@ class BulkProcessor
|
|
4
4
|
def start(processor_class:, payload:, key:, num_processes: 1)
|
5
5
|
back_end = back_end_class.new(
|
6
6
|
processor_class: processor_class,
|
7
|
-
payload:
|
7
|
+
payload: payload,
|
8
8
|
key: key
|
9
9
|
)
|
10
10
|
num_processes > 1 ? back_end.split(num_processes) : back_end.start
|
@@ -1,9 +1,17 @@
|
|
1
1
|
class BulkProcessor
|
2
2
|
# Store configuration data set by clients
|
3
3
|
class Config
|
4
|
-
attr_reader :queue_adapter
|
4
|
+
attr_reader :back_end, :queue_adapter
|
5
5
|
attr_writer :file_class
|
6
|
-
attr_accessor :
|
6
|
+
attr_accessor :temp_directory
|
7
|
+
|
8
|
+
def back_end=(back_end)
|
9
|
+
require_relative "back_end/#{back_end}"
|
10
|
+
@back_end = back_end
|
11
|
+
rescue LoadError => error
|
12
|
+
puts error.message
|
13
|
+
raise ArgumentError, "Invalid back-end: #{back_end}"
|
14
|
+
end
|
7
15
|
|
8
16
|
def queue_adapter=(adapter)
|
9
17
|
ActiveJob::Base.queue_adapter = @queue_adapter = adapter
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
class ProcessCSV
|
3
|
+
def initialize(processor_class, payload, key)
|
4
|
+
@processor_class = processor_class
|
5
|
+
@payload = payload
|
6
|
+
@key = key
|
7
|
+
end
|
8
|
+
|
9
|
+
def perform
|
10
|
+
file = BulkProcessor.config.file_class.new(key)
|
11
|
+
file.open do |f|
|
12
|
+
csv = CSV.parse(f.read, headers: true)
|
13
|
+
processor = processor_class.new(csv, payload: payload.merge('key' => key))
|
14
|
+
processor.start
|
15
|
+
end
|
16
|
+
ensure
|
17
|
+
file.try(:delete)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
attr_reader :processor_class, :payload, :key
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
class SplitCSV
|
3
|
+
def initialize(processor_class, payload, key, num_chunks)
|
4
|
+
@processor_class = processor_class
|
5
|
+
@payload = payload
|
6
|
+
@key = key
|
7
|
+
@num_chunks = num_chunks
|
8
|
+
end
|
9
|
+
|
10
|
+
def perform
|
11
|
+
splitter = FileSplitter.new(key: key, row_chunker: row_chunker)
|
12
|
+
keys = splitter.split!
|
13
|
+
keys.each do |key|
|
14
|
+
BackEnd.start(processor_class: processor_class, payload: payload, key: key)
|
15
|
+
end
|
16
|
+
rescue Exception => error
|
17
|
+
handle_error(error)
|
18
|
+
raise
|
19
|
+
ensure
|
20
|
+
BulkProcessor.config.file_class.new(key).delete
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
attr_reader :processor_class, :payload, :key, :num_chunks
|
26
|
+
|
27
|
+
def row_chunker
|
28
|
+
if processor_class.respond_to?(:boundary_column)
|
29
|
+
boundary_column = processor_class.boundary_column
|
30
|
+
RowChunker::Boundary.new(num_chunks, boundary_column: boundary_column)
|
31
|
+
else
|
32
|
+
RowChunker::Balanced.new(num_chunks)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def handle_error(error)
|
37
|
+
if processor_class.respond_to?(:handler_class)
|
38
|
+
handler = processor_class.handler_class.new(
|
39
|
+
payload: payload.merge('key' => key),
|
40
|
+
results: []
|
41
|
+
)
|
42
|
+
handler.fail!(error)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/bulk_processor.rb
CHANGED
@@ -1,14 +1,12 @@
|
|
1
1
|
require 'bulk_processor/back_end'
|
2
|
-
require 'bulk_processor/back_end/active_job'
|
3
|
-
require 'bulk_processor/back_end/dynosaur'
|
4
2
|
require 'bulk_processor/config'
|
5
3
|
require 'bulk_processor/file_splitter'
|
6
|
-
require 'bulk_processor/job/process_csv'
|
7
|
-
require 'bulk_processor/job/split_csv'
|
8
4
|
require 'bulk_processor/payload_serializer'
|
5
|
+
require 'bulk_processor/process_csv'
|
9
6
|
require 'bulk_processor/row_chunker/balanced'
|
10
7
|
require 'bulk_processor/row_chunker/boundary'
|
11
8
|
require 'bulk_processor/s3_file'
|
9
|
+
require 'bulk_processor/split_csv'
|
12
10
|
require 'bulk_processor/stream_encoder'
|
13
11
|
require 'bulk_processor/validated_csv'
|
14
12
|
require 'bulk_processor/version'
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulk-processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Collier, Justin Richard
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: activejob
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '4'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '4'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: aws-sdk
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,33 +25,33 @@ dependencies:
|
|
39
25
|
- !ruby/object:Gem::Version
|
40
26
|
version: '2.1'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
28
|
+
name: rack
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
44
30
|
requirements:
|
45
31
|
- - "~>"
|
46
32
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
33
|
+
version: '1.5'
|
48
34
|
type: :runtime
|
49
35
|
prerelease: false
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
38
|
- - "~>"
|
53
39
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
40
|
+
version: '1.5'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
42
|
+
name: activejob
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
45
|
- - "~>"
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
62
|
-
type: :
|
47
|
+
version: '4'
|
48
|
+
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
52
|
- - "~>"
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
54
|
+
version: '4'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: bundler
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +66,20 @@ dependencies:
|
|
80
66
|
- - ">="
|
81
67
|
- !ruby/object:Gem::Version
|
82
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: dynosaur
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.2.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.2.1
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: pry-byebug
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,7 +145,12 @@ files:
|
|
145
145
|
- lib/bulk_processor.rb
|
146
146
|
- lib/bulk_processor/back_end.rb
|
147
147
|
- lib/bulk_processor/back_end/active_job.rb
|
148
|
+
- lib/bulk_processor/back_end/active_job/process_csv_job.rb
|
149
|
+
- lib/bulk_processor/back_end/active_job/split_csv_job.rb
|
148
150
|
- lib/bulk_processor/back_end/dynosaur.rb
|
151
|
+
- lib/bulk_processor/back_end/dynosaur/process_csv_task.rb
|
152
|
+
- lib/bulk_processor/back_end/dynosaur/split_csv_task.rb
|
153
|
+
- lib/bulk_processor/back_end/dynosaur/tasks.rb
|
149
154
|
- lib/bulk_processor/config.rb
|
150
155
|
- lib/bulk_processor/csv_processor.rb
|
151
156
|
- lib/bulk_processor/csv_processor/no_op_handler.rb
|
@@ -153,14 +158,13 @@ files:
|
|
153
158
|
- lib/bulk_processor/csv_processor/result.rb
|
154
159
|
- lib/bulk_processor/csv_processor/row_processor.rb
|
155
160
|
- lib/bulk_processor/file_splitter.rb
|
156
|
-
- lib/bulk_processor/job/process_csv.rb
|
157
|
-
- lib/bulk_processor/job/split_csv.rb
|
158
161
|
- lib/bulk_processor/payload_serializer.rb
|
162
|
+
- lib/bulk_processor/process_csv.rb
|
159
163
|
- lib/bulk_processor/row_chunker/balanced.rb
|
160
164
|
- lib/bulk_processor/row_chunker/boundary.rb
|
161
165
|
- lib/bulk_processor/s3_file.rb
|
166
|
+
- lib/bulk_processor/split_csv.rb
|
162
167
|
- lib/bulk_processor/stream_encoder.rb
|
163
|
-
- lib/bulk_processor/tasks.rb
|
164
168
|
- lib/bulk_processor/validated_csv.rb
|
165
169
|
- lib/bulk_processor/version.rb
|
166
170
|
homepage:
|
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'active_job'
|
2
|
-
|
3
|
-
class BulkProcessor
|
4
|
-
# ActiveJob to handle processing the CSV in the background
|
5
|
-
module Job
|
6
|
-
class ProcessCSV < ActiveJob::Base
|
7
|
-
queue_as 'bulk_processor'
|
8
|
-
|
9
|
-
def perform(processor_class, payload, key)
|
10
|
-
file = BulkProcessor.config.file_class.new(key)
|
11
|
-
payload = PayloadSerializer.deserialize(payload).merge('key' => key)
|
12
|
-
file.open do |f|
|
13
|
-
csv = CSV.parse(f.read, headers: true)
|
14
|
-
processor = processor_class.constantize.new(csv, payload: payload)
|
15
|
-
processor.start
|
16
|
-
end
|
17
|
-
ensure
|
18
|
-
file.try(:delete)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'active_job'
|
2
|
-
|
3
|
-
class BulkProcessor
|
4
|
-
# ActiveJob to handle processing the CSV in the background
|
5
|
-
module Job
|
6
|
-
class SplitCSV < ActiveJob::Base
|
7
|
-
queue_as 'bulk_processor'
|
8
|
-
|
9
|
-
def perform(processor_class, payload, key, num_chunks)
|
10
|
-
processor_class = processor_class.constantize
|
11
|
-
chunker = row_chunker(processor_class, num_chunks)
|
12
|
-
payload = PayloadSerializer.deserialize(payload)
|
13
|
-
splitter = FileSplitter.new(key: key, row_chunker: chunker)
|
14
|
-
keys = splitter.split!
|
15
|
-
keys.each do |key|
|
16
|
-
BackEnd.start(processor_class: processor_class, payload: payload, key: key)
|
17
|
-
end
|
18
|
-
rescue Exception => error
|
19
|
-
if processor_class.respond_to?(:handler_class)
|
20
|
-
payload = payload.merge('key' => key)
|
21
|
-
handler = processor_class.handler_class.new(payload: payload, results: [])
|
22
|
-
handler.fail!(error)
|
23
|
-
end
|
24
|
-
raise
|
25
|
-
ensure
|
26
|
-
BulkProcessor.config.file_class.new(key).delete
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
def row_chunker(processor_class, num_chunks)
|
32
|
-
if processor_class.respond_to?(:boundary_column)
|
33
|
-
boundary_column = processor_class.boundary_column
|
34
|
-
RowChunker::Boundary.new(num_chunks, boundary_column: boundary_column)
|
35
|
-
else
|
36
|
-
RowChunker::Balanced.new(num_chunks)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
data/lib/bulk_processor/tasks.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
|
3
|
-
class BulkProcessor
|
4
|
-
class Tasks
|
5
|
-
include Rake::DSL
|
6
|
-
|
7
|
-
def install_tasks
|
8
|
-
namespace :bulk_processor do
|
9
|
-
desc 'Start processing a CSV file'
|
10
|
-
task :start, [:processor_class, :payload, :key] => :environment do |_task, args|
|
11
|
-
Job::ProcessCSV.new.perform(
|
12
|
-
args[:processor_class],
|
13
|
-
args[:payload],
|
14
|
-
args[:key]
|
15
|
-
)
|
16
|
-
end
|
17
|
-
|
18
|
-
desc 'Split a CSV file and process each piece'
|
19
|
-
task :split, [:processor_class, :payload, :key, :num_chunks] => :environment do |_task, args|
|
20
|
-
Job::SplitCSV.new.perform(
|
21
|
-
args[:processor_class],
|
22
|
-
args[:payload],
|
23
|
-
args[:key],
|
24
|
-
args[:num_chunks]
|
25
|
-
)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
BulkProcessor::Tasks.new.install_tasks
|