bulk-processor 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -1
- data/bulk-processor.gemspec +2 -2
- data/lib/bulk_processor/back_end/active_job/process_csv_job.rb +18 -0
- data/lib/bulk_processor/back_end/active_job/split_csv_job.rb +19 -0
- data/lib/bulk_processor/back_end/active_job.rb +10 -5
- data/lib/bulk_processor/back_end/dynosaur/process_csv_task.rb +26 -0
- data/lib/bulk_processor/back_end/dynosaur/split_csv_task.rb +27 -0
- data/lib/bulk_processor/back_end/dynosaur/tasks.rb +2 -0
- data/lib/bulk_processor/back_end/dynosaur.rb +6 -4
- data/lib/bulk_processor/back_end.rb +1 -1
- data/lib/bulk_processor/config.rb +10 -2
- data/lib/bulk_processor/process_csv.rb +24 -0
- data/lib/bulk_processor/split_csv.rb +46 -0
- data/lib/bulk_processor/version.rb +1 -1
- data/lib/bulk_processor.rb +2 -4
- metadata +30 -26
- data/lib/bulk_processor/job/process_csv.rb +0 -22
- data/lib/bulk_processor/job/split_csv.rb +0 -41
- data/lib/bulk_processor/tasks.rb +0 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5d0cde0e86097416d0234ead33d2836f4e08c7a
|
4
|
+
data.tar.gz: d176ea1309c0e618ccf718fd8c4320a2b3948831
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dfea7cabd9b210116fd21644d34740793f785df799b954c79bdc452f0ec11155575465f0d9ac6bd6b4c1fc2df2a8ca96902c142c99ff15c6a5182761e970f55
|
7
|
+
data.tar.gz: 9b8dc6e9fcd908b6bf0ddc2e6e9c4a36c89ba398ccb11016ed2668c188339b953a4e8c4c58bf8c8025b7ae1b3903170871617fd8e6a808a6d68164ede85af3aa
|
data/README.md
CHANGED
@@ -30,6 +30,15 @@ Bulk processor requires the following configuration
|
|
30
30
|
|
31
31
|
#### Back end: ActiveJob
|
32
32
|
|
33
|
+
Include the `activejob` and back-end queueing gems in your Gemfile, e.g.
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
# Gemfile
|
37
|
+
gem 'activejob'
|
38
|
+
gem 'bulk-processor'
|
39
|
+
gem 'resque'
|
40
|
+
```
|
41
|
+
|
33
42
|
```ruby
|
34
43
|
BulkProcessor.back_end = :active_job
|
35
44
|
BulkProcessor.queue_adapter = <adapter>
|
@@ -41,6 +50,14 @@ including `:resque`.
|
|
41
50
|
|
42
51
|
#### Back end: Dynosaur
|
43
52
|
|
53
|
+
Include the `dynosaur` gem in your Gemfile, e.g.
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
# Gemfile
|
57
|
+
gem 'dynosaur'
|
58
|
+
gem 'resque'
|
59
|
+
```
|
60
|
+
|
44
61
|
```ruby
|
45
62
|
BulkProcessor.back_end = :dynosaur
|
46
63
|
BulkProcessor.heroku.api_key = 'my-heroku-api-key'
|
@@ -49,7 +66,12 @@ BulkProcessor.heroku.app_name = 'my-heroku-app-name'
|
|
49
66
|
|
50
67
|
```ruby
|
51
68
|
# Rakefile
|
52
|
-
require 'bulk_processor/tasks'
|
69
|
+
require 'bulk_processor/back_end/dynosaur/tasks'
|
70
|
+
|
71
|
+
# If you do not already have an :enivronment rake task, create a no-op one as
|
72
|
+
# Dynosaur tasks depend on it.
|
73
|
+
task :environment
|
74
|
+
end
|
53
75
|
```
|
54
76
|
|
55
77
|
#### AWS S3
|
data/bulk-processor.gemspec
CHANGED
@@ -21,12 +21,12 @@ success or failure report
|
|
21
21
|
spec.require_paths = ['lib']
|
22
22
|
spec.required_ruby_version = '>= 2.1'
|
23
23
|
|
24
|
-
spec.add_runtime_dependency 'activejob', '~> 4'
|
25
24
|
spec.add_runtime_dependency 'aws-sdk', '~> 2.1'
|
26
|
-
spec.add_runtime_dependency 'dynosaur', '~> 0.2.1'
|
27
25
|
spec.add_runtime_dependency 'rack', '~> 1.5'
|
28
26
|
|
27
|
+
spec.add_development_dependency 'activejob', '~> 4'
|
29
28
|
spec.add_development_dependency 'bundler'
|
29
|
+
spec.add_development_dependency 'dynosaur', '~> 0.2.1'
|
30
30
|
spec.add_development_dependency 'pry-byebug', '~> 3'
|
31
31
|
spec.add_development_dependency 'rake', '~> 10.4'
|
32
32
|
spec.add_development_dependency 'rspec', '~> 3.3'
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
module BackEnd
|
3
|
+
class ActiveJob
|
4
|
+
# ActiveJob to handle processing the CSV in the background
|
5
|
+
class ProcessCSVJob < ::ActiveJob::Base
|
6
|
+
queue_as 'bulk_processor'
|
7
|
+
|
8
|
+
def perform(processor_class, payload, key)
|
9
|
+
BulkProcessor::ProcessCSV.new(
|
10
|
+
processor_class.constantize,
|
11
|
+
PayloadSerializer.deserialize(payload),
|
12
|
+
key
|
13
|
+
).perform
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
module BackEnd
|
3
|
+
class ActiveJob
|
4
|
+
# ActiveJob to handle processing the CSV in the background
|
5
|
+
class SplitCSVJob < ::ActiveJob::Base
|
6
|
+
queue_as 'bulk_processor'
|
7
|
+
|
8
|
+
def perform(processor_class, payload, key, num_chunks)
|
9
|
+
BulkProcessor::SplitCSV.new(
|
10
|
+
processor_class.constantize,
|
11
|
+
PayloadSerializer.deserialize(payload),
|
12
|
+
key,
|
13
|
+
num_chunks
|
14
|
+
).perform
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -1,20 +1,25 @@
|
|
1
|
+
require 'active_job'
|
2
|
+
|
3
|
+
require_relative 'active_job/process_csv_job'
|
4
|
+
require_relative 'active_job/split_csv_job'
|
5
|
+
|
1
6
|
class BulkProcessor
|
2
7
|
module BackEnd
|
3
8
|
# Execute jobs via ActiveJob, e.g. Resque
|
4
9
|
class ActiveJob
|
5
10
|
def initialize(processor_class:, payload:, key:)
|
6
|
-
@processor_class = processor_class
|
7
|
-
@payload = payload
|
11
|
+
@processor_class = processor_class.name
|
12
|
+
@payload = PayloadSerializer.serialize(payload)
|
8
13
|
@key = key
|
9
14
|
end
|
10
15
|
|
11
16
|
def start
|
12
|
-
|
17
|
+
ActiveJob::ProcessCSVJob.perform_later(processor_class, payload, key)
|
13
18
|
end
|
14
19
|
|
15
20
|
def split(num_processes)
|
16
|
-
|
17
|
-
|
21
|
+
ActiveJob::SplitCSVJob.perform_later(processor_class, payload, key,
|
22
|
+
num_processes)
|
18
23
|
end
|
19
24
|
|
20
25
|
private
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
class BulkProcessor
|
4
|
+
module BackEnd
|
5
|
+
class ActiveJob
|
6
|
+
class ProcessCSVTask
|
7
|
+
include Rake::DSL
|
8
|
+
|
9
|
+
def install_task
|
10
|
+
namespace :bulk_processor do
|
11
|
+
desc 'Start processing a CSV file'
|
12
|
+
task :start, [:processor_class, :payload, :key] => :environment do |_task, args|
|
13
|
+
BulkProcessor::ProcessCSV.new(
|
14
|
+
args[:processor_class].constantize,
|
15
|
+
PayloadSerializer.deserialize(args[:payload]),
|
16
|
+
args[:key]
|
17
|
+
).perform
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
BulkProcessor::BackEnd::ActiveJob::ProcessCSVTask.new.install_task
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
class BulkProcessor
|
4
|
+
module BackEnd
|
5
|
+
class ActiveJob
|
6
|
+
class SplitCSVTask
|
7
|
+
include Rake::DSL
|
8
|
+
|
9
|
+
def install_task
|
10
|
+
namespace :bulk_processor do
|
11
|
+
desc 'Split a CSV file and process each piece'
|
12
|
+
task :split, [:processor_class, :payload, :key, :num_chunks] => :environment do |_task, args|
|
13
|
+
BulkProcessor::SplitCSV.new(
|
14
|
+
args[:processor_class].constantize,
|
15
|
+
PayloadSerializer.deserialize(args[:payload]),
|
16
|
+
args[:key],
|
17
|
+
args[:num_chunks].to_i
|
18
|
+
).perform
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
BulkProcessor::BackEnd::ActiveJob::SplitCSVTask.new.install_task
|
@@ -1,12 +1,14 @@
|
|
1
1
|
require 'dynosaur'
|
2
2
|
|
3
|
+
require_relative 'dynosaur/tasks'
|
4
|
+
|
3
5
|
class BulkProcessor
|
4
6
|
module BackEnd
|
5
7
|
# Execute jobs via rake tasks that will spawn a new Heroku dyno
|
6
8
|
class Dynosaur
|
7
9
|
def initialize(processor_class:, payload:, key:)
|
8
|
-
@processor_class = processor_class
|
9
|
-
@payload = payload
|
10
|
+
@processor_class = processor_class.name
|
11
|
+
@payload = PayloadSerializer.serialize(payload)
|
10
12
|
@key = key
|
11
13
|
configure_dynosaur
|
12
14
|
end
|
@@ -14,7 +16,7 @@ class BulkProcessor
|
|
14
16
|
def start
|
15
17
|
args = {
|
16
18
|
task: 'bulk_processor:start',
|
17
|
-
args: [processor_class
|
19
|
+
args: [processor_class, payload, key]
|
18
20
|
}
|
19
21
|
::Dynosaur::Process::Heroku.new(args).start
|
20
22
|
end
|
@@ -22,7 +24,7 @@ class BulkProcessor
|
|
22
24
|
def split(num_processes)
|
23
25
|
args = {
|
24
26
|
task: 'bulk_processor:split',
|
25
|
-
args: [processor_class
|
27
|
+
args: [processor_class, payload, key, num_processes.to_s]
|
26
28
|
}
|
27
29
|
::Dynosaur::Process::Heroku.new(args).start
|
28
30
|
end
|
@@ -4,7 +4,7 @@ class BulkProcessor
|
|
4
4
|
def start(processor_class:, payload:, key:, num_processes: 1)
|
5
5
|
back_end = back_end_class.new(
|
6
6
|
processor_class: processor_class,
|
7
|
-
payload:
|
7
|
+
payload: payload,
|
8
8
|
key: key
|
9
9
|
)
|
10
10
|
num_processes > 1 ? back_end.split(num_processes) : back_end.start
|
@@ -1,9 +1,17 @@
|
|
1
1
|
class BulkProcessor
|
2
2
|
# Store configuration data set by clients
|
3
3
|
class Config
|
4
|
-
attr_reader :queue_adapter
|
4
|
+
attr_reader :back_end, :queue_adapter
|
5
5
|
attr_writer :file_class
|
6
|
-
attr_accessor :
|
6
|
+
attr_accessor :temp_directory
|
7
|
+
|
8
|
+
def back_end=(back_end)
|
9
|
+
require_relative "back_end/#{back_end}"
|
10
|
+
@back_end = back_end
|
11
|
+
rescue LoadError => error
|
12
|
+
puts error.message
|
13
|
+
raise ArgumentError, "Invalid back-end: #{back_end}"
|
14
|
+
end
|
7
15
|
|
8
16
|
def queue_adapter=(adapter)
|
9
17
|
ActiveJob::Base.queue_adapter = @queue_adapter = adapter
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
class ProcessCSV
|
3
|
+
def initialize(processor_class, payload, key)
|
4
|
+
@processor_class = processor_class
|
5
|
+
@payload = payload
|
6
|
+
@key = key
|
7
|
+
end
|
8
|
+
|
9
|
+
def perform
|
10
|
+
file = BulkProcessor.config.file_class.new(key)
|
11
|
+
file.open do |f|
|
12
|
+
csv = CSV.parse(f.read, headers: true)
|
13
|
+
processor = processor_class.new(csv, payload: payload.merge('key' => key))
|
14
|
+
processor.start
|
15
|
+
end
|
16
|
+
ensure
|
17
|
+
file.try(:delete)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
attr_reader :processor_class, :payload, :key
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
class SplitCSV
|
3
|
+
def initialize(processor_class, payload, key, num_chunks)
|
4
|
+
@processor_class = processor_class
|
5
|
+
@payload = payload
|
6
|
+
@key = key
|
7
|
+
@num_chunks = num_chunks
|
8
|
+
end
|
9
|
+
|
10
|
+
def perform
|
11
|
+
splitter = FileSplitter.new(key: key, row_chunker: row_chunker)
|
12
|
+
keys = splitter.split!
|
13
|
+
keys.each do |key|
|
14
|
+
BackEnd.start(processor_class: processor_class, payload: payload, key: key)
|
15
|
+
end
|
16
|
+
rescue Exception => error
|
17
|
+
handle_error(error)
|
18
|
+
raise
|
19
|
+
ensure
|
20
|
+
BulkProcessor.config.file_class.new(key).delete
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
attr_reader :processor_class, :payload, :key, :num_chunks
|
26
|
+
|
27
|
+
def row_chunker
|
28
|
+
if processor_class.respond_to?(:boundary_column)
|
29
|
+
boundary_column = processor_class.boundary_column
|
30
|
+
RowChunker::Boundary.new(num_chunks, boundary_column: boundary_column)
|
31
|
+
else
|
32
|
+
RowChunker::Balanced.new(num_chunks)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def handle_error(error)
|
37
|
+
if processor_class.respond_to?(:handler_class)
|
38
|
+
handler = processor_class.handler_class.new(
|
39
|
+
payload: payload.merge('key' => key),
|
40
|
+
results: []
|
41
|
+
)
|
42
|
+
handler.fail!(error)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/bulk_processor.rb
CHANGED
@@ -1,14 +1,12 @@
|
|
1
1
|
require 'bulk_processor/back_end'
|
2
|
-
require 'bulk_processor/back_end/active_job'
|
3
|
-
require 'bulk_processor/back_end/dynosaur'
|
4
2
|
require 'bulk_processor/config'
|
5
3
|
require 'bulk_processor/file_splitter'
|
6
|
-
require 'bulk_processor/job/process_csv'
|
7
|
-
require 'bulk_processor/job/split_csv'
|
8
4
|
require 'bulk_processor/payload_serializer'
|
5
|
+
require 'bulk_processor/process_csv'
|
9
6
|
require 'bulk_processor/row_chunker/balanced'
|
10
7
|
require 'bulk_processor/row_chunker/boundary'
|
11
8
|
require 'bulk_processor/s3_file'
|
9
|
+
require 'bulk_processor/split_csv'
|
12
10
|
require 'bulk_processor/stream_encoder'
|
13
11
|
require 'bulk_processor/validated_csv'
|
14
12
|
require 'bulk_processor/version'
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulk-processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Collier, Justin Richard
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: activejob
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '4'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '4'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: aws-sdk
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,33 +25,33 @@ dependencies:
|
|
39
25
|
- !ruby/object:Gem::Version
|
40
26
|
version: '2.1'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
28
|
+
name: rack
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
44
30
|
requirements:
|
45
31
|
- - "~>"
|
46
32
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
33
|
+
version: '1.5'
|
48
34
|
type: :runtime
|
49
35
|
prerelease: false
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
38
|
- - "~>"
|
53
39
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
40
|
+
version: '1.5'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
42
|
+
name: activejob
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
45
|
- - "~>"
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
62
|
-
type: :
|
47
|
+
version: '4'
|
48
|
+
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
52
|
- - "~>"
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
54
|
+
version: '4'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: bundler
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +66,20 @@ dependencies:
|
|
80
66
|
- - ">="
|
81
67
|
- !ruby/object:Gem::Version
|
82
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: dynosaur
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.2.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.2.1
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: pry-byebug
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,7 +145,12 @@ files:
|
|
145
145
|
- lib/bulk_processor.rb
|
146
146
|
- lib/bulk_processor/back_end.rb
|
147
147
|
- lib/bulk_processor/back_end/active_job.rb
|
148
|
+
- lib/bulk_processor/back_end/active_job/process_csv_job.rb
|
149
|
+
- lib/bulk_processor/back_end/active_job/split_csv_job.rb
|
148
150
|
- lib/bulk_processor/back_end/dynosaur.rb
|
151
|
+
- lib/bulk_processor/back_end/dynosaur/process_csv_task.rb
|
152
|
+
- lib/bulk_processor/back_end/dynosaur/split_csv_task.rb
|
153
|
+
- lib/bulk_processor/back_end/dynosaur/tasks.rb
|
149
154
|
- lib/bulk_processor/config.rb
|
150
155
|
- lib/bulk_processor/csv_processor.rb
|
151
156
|
- lib/bulk_processor/csv_processor/no_op_handler.rb
|
@@ -153,14 +158,13 @@ files:
|
|
153
158
|
- lib/bulk_processor/csv_processor/result.rb
|
154
159
|
- lib/bulk_processor/csv_processor/row_processor.rb
|
155
160
|
- lib/bulk_processor/file_splitter.rb
|
156
|
-
- lib/bulk_processor/job/process_csv.rb
|
157
|
-
- lib/bulk_processor/job/split_csv.rb
|
158
161
|
- lib/bulk_processor/payload_serializer.rb
|
162
|
+
- lib/bulk_processor/process_csv.rb
|
159
163
|
- lib/bulk_processor/row_chunker/balanced.rb
|
160
164
|
- lib/bulk_processor/row_chunker/boundary.rb
|
161
165
|
- lib/bulk_processor/s3_file.rb
|
166
|
+
- lib/bulk_processor/split_csv.rb
|
162
167
|
- lib/bulk_processor/stream_encoder.rb
|
163
|
-
- lib/bulk_processor/tasks.rb
|
164
168
|
- lib/bulk_processor/validated_csv.rb
|
165
169
|
- lib/bulk_processor/version.rb
|
166
170
|
homepage:
|
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'active_job'
|
2
|
-
|
3
|
-
class BulkProcessor
|
4
|
-
# ActiveJob to handle processing the CSV in the background
|
5
|
-
module Job
|
6
|
-
class ProcessCSV < ActiveJob::Base
|
7
|
-
queue_as 'bulk_processor'
|
8
|
-
|
9
|
-
def perform(processor_class, payload, key)
|
10
|
-
file = BulkProcessor.config.file_class.new(key)
|
11
|
-
payload = PayloadSerializer.deserialize(payload).merge('key' => key)
|
12
|
-
file.open do |f|
|
13
|
-
csv = CSV.parse(f.read, headers: true)
|
14
|
-
processor = processor_class.constantize.new(csv, payload: payload)
|
15
|
-
processor.start
|
16
|
-
end
|
17
|
-
ensure
|
18
|
-
file.try(:delete)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'active_job'
|
2
|
-
|
3
|
-
class BulkProcessor
|
4
|
-
# ActiveJob to handle processing the CSV in the background
|
5
|
-
module Job
|
6
|
-
class SplitCSV < ActiveJob::Base
|
7
|
-
queue_as 'bulk_processor'
|
8
|
-
|
9
|
-
def perform(processor_class, payload, key, num_chunks)
|
10
|
-
processor_class = processor_class.constantize
|
11
|
-
chunker = row_chunker(processor_class, num_chunks)
|
12
|
-
payload = PayloadSerializer.deserialize(payload)
|
13
|
-
splitter = FileSplitter.new(key: key, row_chunker: chunker)
|
14
|
-
keys = splitter.split!
|
15
|
-
keys.each do |key|
|
16
|
-
BackEnd.start(processor_class: processor_class, payload: payload, key: key)
|
17
|
-
end
|
18
|
-
rescue Exception => error
|
19
|
-
if processor_class.respond_to?(:handler_class)
|
20
|
-
payload = payload.merge('key' => key)
|
21
|
-
handler = processor_class.handler_class.new(payload: payload, results: [])
|
22
|
-
handler.fail!(error)
|
23
|
-
end
|
24
|
-
raise
|
25
|
-
ensure
|
26
|
-
BulkProcessor.config.file_class.new(key).delete
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
def row_chunker(processor_class, num_chunks)
|
32
|
-
if processor_class.respond_to?(:boundary_column)
|
33
|
-
boundary_column = processor_class.boundary_column
|
34
|
-
RowChunker::Boundary.new(num_chunks, boundary_column: boundary_column)
|
35
|
-
else
|
36
|
-
RowChunker::Balanced.new(num_chunks)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
data/lib/bulk_processor/tasks.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
|
3
|
-
class BulkProcessor
|
4
|
-
class Tasks
|
5
|
-
include Rake::DSL
|
6
|
-
|
7
|
-
def install_tasks
|
8
|
-
namespace :bulk_processor do
|
9
|
-
desc 'Start processing a CSV file'
|
10
|
-
task :start, [:processor_class, :payload, :key] => :environment do |_task, args|
|
11
|
-
Job::ProcessCSV.new.perform(
|
12
|
-
args[:processor_class],
|
13
|
-
args[:payload],
|
14
|
-
args[:key]
|
15
|
-
)
|
16
|
-
end
|
17
|
-
|
18
|
-
desc 'Split a CSV file and process each piece'
|
19
|
-
task :split, [:processor_class, :payload, :key, :num_chunks] => :environment do |_task, args|
|
20
|
-
Job::SplitCSV.new.perform(
|
21
|
-
args[:processor_class],
|
22
|
-
args[:payload],
|
23
|
-
args[:key],
|
24
|
-
args[:num_chunks]
|
25
|
-
)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
BulkProcessor::Tasks.new.install_tasks
|