bulk-processor 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +25 -4
- data/bulk-processor.gemspec +1 -0
- data/lib/bulk_processor.rb +7 -3
- data/lib/bulk_processor/back_end.rb +23 -0
- data/lib/bulk_processor/back_end/active_job.rb +20 -0
- data/lib/bulk_processor/back_end/dynosaur.rb +34 -0
- data/lib/bulk_processor/config.rb +5 -1
- data/lib/bulk_processor/job.rb +1 -0
- data/lib/bulk_processor/tasks.rb +23 -0
- data/lib/bulk_processor/version.rb +1 -1
- metadata +19 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90877b508066f5c70c78f247b6b29a3110c87c3c
|
4
|
+
data.tar.gz: ab09632f364999e0138f781b86341b5f87333bd3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 101c55e1a96ced37167a87dd8b4f2d8933e4b19461e43331d77534c53d52ce199a438155701395f1d0b628b8857057d36bcf4f3b5966b522e9283361670d2c7c
|
7
|
+
data.tar.gz: bfd29f4c4a09c8ee2fcbeeb73e00891557eabcd0cc1f0ac8c3f9e08b8bf5745a58b6133d223cc843c4995ae86b34f5d9ac0edba9dcea999924fa809184ad6d79
|
data/README.md
CHANGED
@@ -28,18 +28,39 @@ Or install it yourself as:
|
|
28
28
|
|
29
29
|
Bulk processor requires the following configuration
|
30
30
|
|
31
|
+
#### Back end: ActiveJob
|
32
|
+
|
31
33
|
```ruby
|
34
|
+
BulkProcessor.back_end = :active_job
|
32
35
|
BulkProcessor.queue_adapter = <adapter>
|
33
|
-
BulkProcessor.temp_directory = '/tmp'
|
34
|
-
BulkProcessor.aws.access_key_id = 'my-aws-access-key'
|
35
|
-
BulkProcessor.aws.secret_access_key = 'my-aws-secret'
|
36
|
-
BulkProcessor.aws.bucket = 'my-s3-bucket'
|
37
36
|
```
|
38
37
|
|
39
38
|
The default queue_adapter is `:inline`, which skips queueing and processes synchronously. Since
|
40
39
|
this is backed by ActiveJob, all of the adapters in [ActiveJob::QueueAdapters]( http://api.rubyonrails.org/classes/ActiveJob/QueueAdapters.html ) are supported,
|
41
40
|
including `:resque`.
|
42
41
|
|
42
|
+
#### Back end: Dynosaur
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
BulkProcessor.back_end = :dynosaur
|
46
|
+
BulkProcessor.heroku.api_key = 'my-heroku-api-key'
|
47
|
+
BulkProcessor.heroku.app_name = 'my-heroku-app-name'
|
48
|
+
```
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
# Rakefile
|
52
|
+
require 'bulk_processor/tasks'
|
53
|
+
```
|
54
|
+
|
55
|
+
#### AWS S3
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
BulkProcessor.temp_directory = '/tmp'
|
59
|
+
BulkProcessor.aws.access_key_id = 'my-aws-access-key'
|
60
|
+
BulkProcessor.aws.secret_access_key = 'my-aws-secret'
|
61
|
+
BulkProcessor.aws.bucket = 'my-s3-bucket'
|
62
|
+
```
|
63
|
+
|
43
64
|
The CSV file passed to BulkProcessor will be persisted on AWS S3 so that the job
|
44
65
|
can access it. This requires configuring AWS credentials, the S3 bucket in which
|
45
66
|
to store the file, and a local temp directory to hold the file locally.
|
data/bulk-processor.gemspec
CHANGED
@@ -23,6 +23,7 @@ success or failure report
|
|
23
23
|
|
24
24
|
spec.add_runtime_dependency 'activejob', '~> 4'
|
25
25
|
spec.add_runtime_dependency 'aws-sdk', '~> 2.1'
|
26
|
+
spec.add_runtime_dependency 'dynosaur', '~> 0.2.1'
|
26
27
|
|
27
28
|
spec.add_development_dependency 'bundler'
|
28
29
|
spec.add_development_dependency 'pry-byebug', '~> 3'
|
data/lib/bulk_processor.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'bulk_processor/back_end'
|
2
|
+
require 'bulk_processor/back_end/active_job'
|
3
|
+
require 'bulk_processor/back_end/dynosaur'
|
1
4
|
require 'bulk_processor/config'
|
2
5
|
require 'bulk_processor/job'
|
3
6
|
require 'bulk_processor/s3_file'
|
@@ -43,7 +46,7 @@ class BulkProcessor
|
|
43
46
|
)
|
44
47
|
|
45
48
|
if csv.valid?
|
46
|
-
|
49
|
+
start_backend(file_class, encoded_contents)
|
47
50
|
else
|
48
51
|
errors.concat(csv.errors)
|
49
52
|
end
|
@@ -54,10 +57,11 @@ class BulkProcessor
|
|
54
57
|
|
55
58
|
attr_reader :key, :stream, :processor_class, :payload
|
56
59
|
|
57
|
-
def
|
60
|
+
def start_backend(file_class, contents)
|
58
61
|
file = file_class.new(key)
|
59
62
|
file.write(contents)
|
60
|
-
|
63
|
+
BackEnd.start(processor_class: processor_class, payload: payload,
|
64
|
+
file_class: file_class, key: key)
|
61
65
|
rescue Exception
|
62
66
|
# Clean up the file, which is treated as a lock, if we bail out of here
|
63
67
|
# unexpectedly.
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
module BackEnd
|
3
|
+
class << self
|
4
|
+
def start(processor_class:, payload:, file_class:, key:)
|
5
|
+
back_end = back_end_class.new(
|
6
|
+
processor_class: processor_class,
|
7
|
+
payload: payload,
|
8
|
+
file_class: file_class,
|
9
|
+
key: key
|
10
|
+
)
|
11
|
+
back_end.start
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def back_end_class
|
17
|
+
back_end = BulkProcessor.config.back_end
|
18
|
+
classified = back_end.to_s.split('_').collect(&:capitalize).join
|
19
|
+
BulkProcessor::BackEnd.const_get(classified)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
module BackEnd
|
3
|
+
class ActiveJob
|
4
|
+
def initialize(processor_class:, payload:, file_class:, key:)
|
5
|
+
@processor_class = processor_class
|
6
|
+
@payload = payload
|
7
|
+
@file_class = file_class
|
8
|
+
@key = key
|
9
|
+
end
|
10
|
+
|
11
|
+
def start
|
12
|
+
Job.perform_later(processor_class.name, payload.to_json, file_class.name, key)
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
attr_reader :processor_class, :payload, :file_class, :key
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'dynosaur'
|
2
|
+
|
3
|
+
class BulkProcessor
|
4
|
+
module BackEnd
|
5
|
+
class Dynosaur
|
6
|
+
def initialize(processor_class:, payload:, file_class:, key:)
|
7
|
+
@processor_class = processor_class
|
8
|
+
@payload = payload
|
9
|
+
@file_class = file_class
|
10
|
+
@key = key
|
11
|
+
configure_dynosaur
|
12
|
+
end
|
13
|
+
|
14
|
+
def start
|
15
|
+
args = {
|
16
|
+
task: 'bulk_processor:start',
|
17
|
+
args: [processor_class.name, payload.to_json, file_class.name, key]
|
18
|
+
}
|
19
|
+
::Dynosaur::Process::Heroku.new(args).start
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
attr_reader :processor_class, :payload, :file_class, :key
|
25
|
+
|
26
|
+
def configure_dynosaur
|
27
|
+
::Dynosaur::Client::HerokuClient.configure do |config|
|
28
|
+
config.api_key = BulkProcessor.config.heroku.api_key
|
29
|
+
config.app_name = BulkProcessor.config.heroku.app_name
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -2,7 +2,7 @@ class BulkProcessor
|
|
2
2
|
# Store configuration data set by clients
|
3
3
|
class Config
|
4
4
|
attr_reader :queue_adapter
|
5
|
-
attr_accessor :temp_directory
|
5
|
+
attr_accessor :back_end, :temp_directory
|
6
6
|
|
7
7
|
def queue_adapter=(adapter)
|
8
8
|
ActiveJob::Base.queue_adapter = @queue_adapter = adapter
|
@@ -11,5 +11,9 @@ class BulkProcessor
|
|
11
11
|
def aws
|
12
12
|
@aws ||= Struct.new(:access_key_id, :secret_access_key, :bucket).new
|
13
13
|
end
|
14
|
+
|
15
|
+
def heroku
|
16
|
+
@heroku ||= Struct.new(:api_key, :app_name).new
|
17
|
+
end
|
14
18
|
end
|
15
19
|
end
|
data/lib/bulk_processor/job.rb
CHANGED
@@ -7,6 +7,7 @@ class BulkProcessor
|
|
7
7
|
|
8
8
|
def perform(processor_class, payload, file_class, key)
|
9
9
|
file = file_class.constantize.new(key)
|
10
|
+
payload = payload.nil? ? nil : JSON.parse(payload)
|
10
11
|
file.open do |f|
|
11
12
|
csv = CSV.parse(f.read, headers: true)
|
12
13
|
processor = processor_class.constantize.new(csv, payload: payload)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
class BulkProcessor
|
4
|
+
class Tasks
|
5
|
+
include Rake::DSL
|
6
|
+
|
7
|
+
def install_tasks
|
8
|
+
namespace :bulk_processor do
|
9
|
+
desc 'Start processing a CSV file'
|
10
|
+
task :start, [:processor_class, :payload, :file_class, :key] => :environment do |_task, args|
|
11
|
+
Job.new.perform(
|
12
|
+
args[:processor_class],
|
13
|
+
args[:payload],
|
14
|
+
args[:file_class],
|
15
|
+
args[:key]
|
16
|
+
)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
BulkProcessor::Tasks.new.install_tasks
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulk-processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Collier, Justin Richard
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '2.1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: dynosaur
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.2.1
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.2.1
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: bundler
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,6 +129,9 @@ files:
|
|
115
129
|
- bin/setup
|
116
130
|
- bulk-processor.gemspec
|
117
131
|
- lib/bulk_processor.rb
|
132
|
+
- lib/bulk_processor/back_end.rb
|
133
|
+
- lib/bulk_processor/back_end/active_job.rb
|
134
|
+
- lib/bulk_processor/back_end/dynosaur.rb
|
118
135
|
- lib/bulk_processor/config.rb
|
119
136
|
- lib/bulk_processor/csv_processor.rb
|
120
137
|
- lib/bulk_processor/csv_processor/no_op_handler.rb
|
@@ -124,6 +141,7 @@ files:
|
|
124
141
|
- lib/bulk_processor/job.rb
|
125
142
|
- lib/bulk_processor/s3_file.rb
|
126
143
|
- lib/bulk_processor/stream_encoder.rb
|
144
|
+
- lib/bulk_processor/tasks.rb
|
127
145
|
- lib/bulk_processor/validated_csv.rb
|
128
146
|
- lib/bulk_processor/version.rb
|
129
147
|
homepage:
|