bulk-processor 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -4
- data/bulk-processor.gemspec +1 -0
- data/lib/bulk_processor.rb +7 -3
- data/lib/bulk_processor/back_end.rb +23 -0
- data/lib/bulk_processor/back_end/active_job.rb +20 -0
- data/lib/bulk_processor/back_end/dynosaur.rb +34 -0
- data/lib/bulk_processor/config.rb +5 -1
- data/lib/bulk_processor/job.rb +1 -0
- data/lib/bulk_processor/tasks.rb +23 -0
- data/lib/bulk_processor/version.rb +1 -1
- metadata +19 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90877b508066f5c70c78f247b6b29a3110c87c3c
|
4
|
+
data.tar.gz: ab09632f364999e0138f781b86341b5f87333bd3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 101c55e1a96ced37167a87dd8b4f2d8933e4b19461e43331d77534c53d52ce199a438155701395f1d0b628b8857057d36bcf4f3b5966b522e9283361670d2c7c
|
7
|
+
data.tar.gz: bfd29f4c4a09c8ee2fcbeeb73e00891557eabcd0cc1f0ac8c3f9e08b8bf5745a58b6133d223cc843c4995ae86b34f5d9ac0edba9dcea999924fa809184ad6d79
|
data/README.md
CHANGED
@@ -28,18 +28,39 @@ Or install it yourself as:
|
|
28
28
|
|
29
29
|
Bulk processor requires the following configuration
|
30
30
|
|
31
|
+
#### Back end: ActiveJob
|
32
|
+
|
31
33
|
```ruby
|
34
|
+
BulkProcessor.back_end = :active_job
|
32
35
|
BulkProcessor.queue_adapter = <adapter>
|
33
|
-
BulkProcessor.temp_directory = '/tmp'
|
34
|
-
BulkProcessor.aws.access_key_id = 'my-aws-access-key'
|
35
|
-
BulkProcessor.aws.secret_access_key = 'my-aws-secret'
|
36
|
-
BulkProcessor.aws.bucket = 'my-s3-bucket'
|
37
36
|
```
|
38
37
|
|
39
38
|
The default queue_adapter is `:inline`, which skips queueing and processes synchronously. Since
|
40
39
|
this is backed by ActiveJob, all of the adapters in [ActiveJob::QueueAdapters]( http://api.rubyonrails.org/classes/ActiveJob/QueueAdapters.html ) are supported,
|
41
40
|
including `:resque`.
|
42
41
|
|
42
|
+
#### Back end: Dynosaur
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
BulkProcessor.back_end = :dynosaur
|
46
|
+
BulkProcessor.heroku.api_key = 'my-heroku-api-key'
|
47
|
+
BulkProcessor.heroku.app_name = 'my-heroku-app-name'
|
48
|
+
```
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
# Rakefile
|
52
|
+
require 'bulk_processor/tasks'
|
53
|
+
```
|
54
|
+
|
55
|
+
#### AWS S3
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
BulkProcessor.temp_directory = '/tmp'
|
59
|
+
BulkProcessor.aws.access_key_id = 'my-aws-access-key'
|
60
|
+
BulkProcessor.aws.secret_access_key = 'my-aws-secret'
|
61
|
+
BulkProcessor.aws.bucket = 'my-s3-bucket'
|
62
|
+
```
|
63
|
+
|
43
64
|
The CSV file passed to BulkProcessor will be persisted on AWS S3 so that the job
|
44
65
|
can access it. This requires configuring AWS credentials, the S3 bucket in which
|
45
66
|
to store the file, and a local temp directory to hold the file locally.
|
data/bulk-processor.gemspec
CHANGED
@@ -23,6 +23,7 @@ success or failure report
|
|
23
23
|
|
24
24
|
spec.add_runtime_dependency 'activejob', '~> 4'
|
25
25
|
spec.add_runtime_dependency 'aws-sdk', '~> 2.1'
|
26
|
+
spec.add_runtime_dependency 'dynosaur', '~> 0.2.1'
|
26
27
|
|
27
28
|
spec.add_development_dependency 'bundler'
|
28
29
|
spec.add_development_dependency 'pry-byebug', '~> 3'
|
data/lib/bulk_processor.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'bulk_processor/back_end'
|
2
|
+
require 'bulk_processor/back_end/active_job'
|
3
|
+
require 'bulk_processor/back_end/dynosaur'
|
1
4
|
require 'bulk_processor/config'
|
2
5
|
require 'bulk_processor/job'
|
3
6
|
require 'bulk_processor/s3_file'
|
@@ -43,7 +46,7 @@ class BulkProcessor
|
|
43
46
|
)
|
44
47
|
|
45
48
|
if csv.valid?
|
46
|
-
|
49
|
+
start_backend(file_class, encoded_contents)
|
47
50
|
else
|
48
51
|
errors.concat(csv.errors)
|
49
52
|
end
|
@@ -54,10 +57,11 @@ class BulkProcessor
|
|
54
57
|
|
55
58
|
attr_reader :key, :stream, :processor_class, :payload
|
56
59
|
|
57
|
-
def
|
60
|
+
def start_backend(file_class, contents)
|
58
61
|
file = file_class.new(key)
|
59
62
|
file.write(contents)
|
60
|
-
|
63
|
+
BackEnd.start(processor_class: processor_class, payload: payload,
|
64
|
+
file_class: file_class, key: key)
|
61
65
|
rescue Exception
|
62
66
|
# Clean up the file, which is treated as a lock, if we bail out of here
|
63
67
|
# unexpectedly.
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
module BackEnd
|
3
|
+
class << self
|
4
|
+
def start(processor_class:, payload:, file_class:, key:)
|
5
|
+
back_end = back_end_class.new(
|
6
|
+
processor_class: processor_class,
|
7
|
+
payload: payload,
|
8
|
+
file_class: file_class,
|
9
|
+
key: key
|
10
|
+
)
|
11
|
+
back_end.start
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def back_end_class
|
17
|
+
back_end = BulkProcessor.config.back_end
|
18
|
+
classified = back_end.to_s.split('_').collect(&:capitalize).join
|
19
|
+
BulkProcessor::BackEnd.const_get(classified)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
module BackEnd
|
3
|
+
class ActiveJob
|
4
|
+
def initialize(processor_class:, payload:, file_class:, key:)
|
5
|
+
@processor_class = processor_class
|
6
|
+
@payload = payload
|
7
|
+
@file_class = file_class
|
8
|
+
@key = key
|
9
|
+
end
|
10
|
+
|
11
|
+
def start
|
12
|
+
Job.perform_later(processor_class.name, payload.to_json, file_class.name, key)
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
attr_reader :processor_class, :payload, :file_class, :key
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'dynosaur'
|
2
|
+
|
3
|
+
class BulkProcessor
|
4
|
+
module BackEnd
|
5
|
+
class Dynosaur
|
6
|
+
def initialize(processor_class:, payload:, file_class:, key:)
|
7
|
+
@processor_class = processor_class
|
8
|
+
@payload = payload
|
9
|
+
@file_class = file_class
|
10
|
+
@key = key
|
11
|
+
configure_dynosaur
|
12
|
+
end
|
13
|
+
|
14
|
+
def start
|
15
|
+
args = {
|
16
|
+
task: 'bulk_processor:start',
|
17
|
+
args: [processor_class.name, payload.to_json, file_class.name, key]
|
18
|
+
}
|
19
|
+
::Dynosaur::Process::Heroku.new(args).start
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
attr_reader :processor_class, :payload, :file_class, :key
|
25
|
+
|
26
|
+
def configure_dynosaur
|
27
|
+
::Dynosaur::Client::HerokuClient.configure do |config|
|
28
|
+
config.api_key = BulkProcessor.config.heroku.api_key
|
29
|
+
config.app_name = BulkProcessor.config.heroku.app_name
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -2,7 +2,7 @@ class BulkProcessor
|
|
2
2
|
# Store configuration data set by clients
|
3
3
|
class Config
|
4
4
|
attr_reader :queue_adapter
|
5
|
-
attr_accessor :temp_directory
|
5
|
+
attr_accessor :back_end, :temp_directory
|
6
6
|
|
7
7
|
def queue_adapter=(adapter)
|
8
8
|
ActiveJob::Base.queue_adapter = @queue_adapter = adapter
|
@@ -11,5 +11,9 @@ class BulkProcessor
|
|
11
11
|
def aws
|
12
12
|
@aws ||= Struct.new(:access_key_id, :secret_access_key, :bucket).new
|
13
13
|
end
|
14
|
+
|
15
|
+
def heroku
|
16
|
+
@heroku ||= Struct.new(:api_key, :app_name).new
|
17
|
+
end
|
14
18
|
end
|
15
19
|
end
|
data/lib/bulk_processor/job.rb
CHANGED
@@ -7,6 +7,7 @@ class BulkProcessor
|
|
7
7
|
|
8
8
|
def perform(processor_class, payload, file_class, key)
|
9
9
|
file = file_class.constantize.new(key)
|
10
|
+
payload = payload.nil? ? nil : JSON.parse(payload)
|
10
11
|
file.open do |f|
|
11
12
|
csv = CSV.parse(f.read, headers: true)
|
12
13
|
processor = processor_class.constantize.new(csv, payload: payload)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
class BulkProcessor
|
4
|
+
class Tasks
|
5
|
+
include Rake::DSL
|
6
|
+
|
7
|
+
def install_tasks
|
8
|
+
namespace :bulk_processor do
|
9
|
+
desc 'Start processing a CSV file'
|
10
|
+
task :start, [:processor_class, :payload, :file_class, :key] => :environment do |_task, args|
|
11
|
+
Job.new.perform(
|
12
|
+
args[:processor_class],
|
13
|
+
args[:payload],
|
14
|
+
args[:file_class],
|
15
|
+
args[:key]
|
16
|
+
)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
BulkProcessor::Tasks.new.install_tasks
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulk-processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Collier, Justin Richard
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '2.1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: dynosaur
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.2.1
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.2.1
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: bundler
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,6 +129,9 @@ files:
|
|
115
129
|
- bin/setup
|
116
130
|
- bulk-processor.gemspec
|
117
131
|
- lib/bulk_processor.rb
|
132
|
+
- lib/bulk_processor/back_end.rb
|
133
|
+
- lib/bulk_processor/back_end/active_job.rb
|
134
|
+
- lib/bulk_processor/back_end/dynosaur.rb
|
118
135
|
- lib/bulk_processor/config.rb
|
119
136
|
- lib/bulk_processor/csv_processor.rb
|
120
137
|
- lib/bulk_processor/csv_processor/no_op_handler.rb
|
@@ -124,6 +141,7 @@ files:
|
|
124
141
|
- lib/bulk_processor/job.rb
|
125
142
|
- lib/bulk_processor/s3_file.rb
|
126
143
|
- lib/bulk_processor/stream_encoder.rb
|
144
|
+
- lib/bulk_processor/tasks.rb
|
127
145
|
- lib/bulk_processor/validated_csv.rb
|
128
146
|
- lib/bulk_processor/version.rb
|
129
147
|
homepage:
|