bulk-processor 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 81b6cbba22963959c1e355f71f78790cbc0b6592
4
- data.tar.gz: 8859344abc19572527f69ba9259c5b1289bde273
3
+ metadata.gz: 56580f10538cc75b8fbfb8006248905cb0cfeb71
4
+ data.tar.gz: 3ce3bc03cf878836f5a768a3be0fad169bfabdc6
5
5
  SHA512:
6
- metadata.gz: 584baf85c25c5a741eb8392ab068a3e1798a71398dd3f53e0da6e24aca144648fb77c8442865a4f5c134193fbf255f4b5cf55f630bc1915241e1f2d229189ce7
7
- data.tar.gz: 7a5cc788e2b8bc7caf6d4960aa28fd2233661b3e8f5d1b2d90ed4a474ba8885c35ee63fcf077b0b25a1a0c2cb9408ca1c22846105592a7de9a87b9330e04c93f
6
+ metadata.gz: 1c4fa2fdf92ec038c73c6eec886a682e9cb82f8fdb1ef0ade84a0eefa0eb1fffbd29cd510c3c555c6456b5ee7e600c2e52999031169dfbd773a73a593ee543ee
7
+ data.tar.gz: b32c0129b95fa2a44e7d2dca5455bca2a60137aac6d0f97b260b60e90d8c79d28acd4e91a12f9d9a6b14e0a1ac4735d505b31bea182499f14c9c19991fbfa930
data/README.md CHANGED
@@ -34,11 +34,48 @@ The default is `:inline`, which skips queueing and processes synchronously. Sinc
34
34
  this is backed by ActiveJob, all of the adapters in [ActiveJob::QueueAdapters]( http://api.rubyonrails.org/classes/ActiveJob/QueueAdapters.html ),
35
35
  including `:resque`.
36
36
 
37
- You will also need to supply a class for item processing and a class/module for completion handling.
38
- The item processor instance must respond to the following messages:
37
+ You will also need to supply a class for CSV processing. This class must respond to the
38
+ `start` instance method, the `required_columns` and `optional_columns` class methods,
39
+ and have the following signature for initialize:
39
40
 
41
+ ```ruby
42
+ class PetCSVProcessor
43
+ # @return [Array<String>] column headers that must be present
44
+ def self.required_columns
45
+ ['species', 'name', 'age']
46
+ end
47
+
48
+ # @return [Array<String>] column headers that may be present. If a column
49
+ # header is present that is not in 'required_columns' or 'optional_columns',
50
+ # the file will be considered invalid and no rows will be processed.
51
+ def self.optional_columns
52
+ ['favorite_toy', 'talents']
53
+ end
54
+
55
+ def initialize(records, payload:)
56
+ # Assign instance variables and do any other setup
57
+ end
58
+
59
+ def start
60
+ # Process the records
61
+ end
62
+ end
40
63
  ```
41
- class PetItemProcessor
64
+
65
+ To account for a common use case, a base `BulkProcessor::CSVProcessor` class is provided,
66
+ though it must be explicitly required. This base class can be subclassed to build a CSV processor.
67
+ This base class implements the initializer and `#start` methods and returns an empty set for `.optional_columns`.
68
+
69
+ The `#start` method iterates over each record, processes it using a `RowProcessor`,
70
+ accumulates the results, which are passed off to a `Handler`. An example
71
+ implementation could look like:
72
+
73
+ ```ruby
74
+ require 'bulk_processor/csv_processor'
75
+
76
+ class PetCSVProcessor < BulkProcessor::CSVProcessor
77
+ # Note: this must be overridden in a subclass
78
+ #
42
79
  # @return [Array<String>] column headers that must be present
43
80
  def self.required_columns
44
81
  ['species', 'name', 'age']
@@ -51,18 +88,27 @@ class PetItemProcessor
51
88
  ['favorite_toy', 'talents']
52
89
  end
53
90
 
54
- # Instantiate the processor with a single row from the CSV represented by
55
- # a Hash<String, String>
56
- def initialize(record_hash, payload)
57
- @record_hash = record_hash
58
- @payload = payload
59
- @messages = []
60
- @success = false
91
+ # Note: this must be overridden in a subclass
92
+ #
93
+ # @return [RowProcessor] a class that implements the RowProcessor role
94
+ def self.row_processor_class
95
+ PetRowProcessor
96
+ end
97
+
98
+ # @return [Handler] a class that implements the Handler role
99
+ def self.handler_class
100
+ PetHandler
101
+ end
102
+ end
103
+
104
+ class PetRowProcessor
105
+ def initialize(record, payload:)
106
+ # Assign instance variables and do any other setup
61
107
  end
62
108
 
63
109
  # Process the row, e.g. create a new record in the DB, send an email, etc
64
110
  def process!
65
- pet = Pet.new(record_hash)
111
+ pet = Pet.new(record)
66
112
  if pet.save
67
113
  @success = true
68
114
  else
@@ -72,25 +118,17 @@ class PetItemProcessor
72
118
 
73
119
  # @return [true|false] true iff the item was processed completely
74
120
  def success?
75
- @success
121
+ @success == true
76
122
  end
77
123
 
78
124
  # @return [Array<String>] list of messages for this item to pass back to the
79
125
  # completion handler.
80
126
  def messages
81
- @messages
127
+ @messages || []
82
128
  end
83
129
  end
84
- ```
85
130
 
86
- A completion handler must respond to the following messages
87
-
88
- ```ruby
89
- module NotificationHandler
90
- # Handle full or partial processing of records. Unless there was a fatal
91
- # error, all row indexes will be present either successes or errors, but not
92
- # both.
93
- #
131
+ class PetHandler
94
132
  # @param payload [Hash] the payload passed into 'BulkProcessor.process', can
95
133
  # be used to pass metadata around, e.g. the email address to send a
96
134
  # completion report to
@@ -100,25 +138,37 @@ module NotificationHandler
100
138
  # (may be empty), e.g. { 0 => [], 1 => ['pet ID = 22 created'] }
101
139
  # @param errors [Hash<Fixnum, Array<String>>] similar structure to successes,
102
140
  # but rows that were not completed successfully.
141
+ def initialize(payload:, successes:, errors:)
142
+ # Assign instance variables and do any other setup
143
+ end
144
+
145
+ # Notify the owner that their pets were processed
146
+ def complete!
147
+ OwnerMailer.competed(successes, errors)
148
+ end
149
+
150
+ # Notify the owner that processing failed
151
+ #
103
152
  # @param fatal_error [StandardError] if nil, then all rows were processed,
104
153
  # else the error that was raise is passed in here
105
- def self.complete(payload, successes, errors, fatal_error = nil)
106
- if fatal_error
107
- PetProcessorMailer.fail(payload['recipient'], successes, errors, fatal_error)
108
- else
109
- PetProcessorMailer.complete(payload['recipient'], successes, errors)
110
- end
154
+ def fail!(fatal_error)
155
+ OwnerMailer.failed(fatal_error)
111
156
  end
112
157
  end
113
158
  ```
114
159
 
115
- Requesting file processing
160
+ Putting it all together
116
161
 
117
162
  ```ruby
118
- processor = BulkProcessor.new(file_stream, PetItemProcessor, NotificationHandler, {recipient: current_user.email})
119
- if processor.process
163
+ processor = BulkProcessor.new(
164
+ stream: file_stream,
165
+ processor_class: PetCSVProcessor,
166
+ payload: {recipient: current_user.email}
167
+ )
168
+ if processor.start
120
169
  # The job has been enqueued, go get a coffee and wait
121
170
  else
171
+ # Something went wrong, alert the file uploader
122
172
  handle_invalid_file(processor.errors)
123
173
  end
124
174
  ```
@@ -1,4 +1,5 @@
1
1
  class BulkProcessor
2
+ # Store configuration data set by clients
2
3
  class Config
3
4
  attr_reader :queue_adapter
4
5
 
@@ -0,0 +1,98 @@
1
+ require_relative 'no_op_handler'
2
+
3
+ class BulkProcessor
4
+ # An abstract implmentation of the CSVProcessor role. Provides
5
+ #
6
+ # * A default implementation of `.optional_columns`, returning []
7
+ # * An initializer that assigns the arguments as instance attributes
8
+ # * An implementation of #start to cover a common use case
9
+ #
10
+ # The common use case cover by this class' implementation of `#start` is
11
+ #
12
+ # 1. Iteratively process each record
13
+ # 2. Accumulate the results (did the processing succeed? what were the error
14
+ # messages?)
15
+ # 3. Send the results to an instance of the Handler role.
16
+ #
17
+ # This class adds 2 required class methods that can be overridden in any
18
+ # subclass
19
+ #
20
+ # * row_processor_class - (required) Returns the class that implements the
21
+ # RowProcessor role to process rows of the CSV
22
+ # * handler_class - (optional) Returns the class that implements the Handler
23
+ # role, which handles results from the completion (or failure) of
24
+ # processing the entire CSV.
25
+ #
26
+ # The `required_columns` method must still be implemented in a subclass
27
+ #
28
+ class CSVProcessor
29
+ # @return [RowProcessor] a class that implements the RowProcessor interface
30
+ def self.row_processor_class
31
+ raise NotImplementedError,
32
+ "#{self.class.name} must implement #{__method__}"
33
+ end
34
+
35
+ # @return [Handler] a class that implements the Handler role
36
+ def self.handler_class
37
+ NoOpHandler
38
+ end
39
+
40
+ # @return [Array<String>] column headers that must be present
41
+ def self.required_columns
42
+ raise NotImplementedError,
43
+ "#{self.class.name} must implement #{__method__}"
44
+ end
45
+
46
+ # @return [Array<String>] column headers that may be present. If a column
47
+ # header is present that is not in 'required_columns' or
48
+ # 'optional_columns', the file will be considered invalid and no rows will
49
+ # be processed.
50
+ def self.optional_columns
51
+ []
52
+ end
53
+
54
+ def initialize(records, payload: {})
55
+ @records = records
56
+ @payload = payload
57
+ @successes = {}
58
+ @errors = {}
59
+ end
60
+
61
+ # Iteratively process each record, accumulate the results, and pass those
62
+ # off to the handler. If an unrescued error is raised for any record,
63
+ # processing will halt for all remaining records and the `#fail!` will be
64
+ # invoked on the handler.
65
+ def start
66
+ records.each_with_index do |record, index|
67
+ processor = row_processor(record)
68
+ processor.process!
69
+ if processor.success?
70
+ successes[index] = processor.messages
71
+ else
72
+ errors[index] = processor.messages
73
+ end
74
+ end
75
+ handler.complete!
76
+ rescue Exception => exception
77
+ handler.fail!(exception)
78
+
79
+ # Swallow any StandardError, since we are already reporting it to the
80
+ # user. However, we must re-raise Exceptions, such as SIGTERMs since they
81
+ # need to be handled at a level above this gem.
82
+ raise unless exception.is_a?(StandardError)
83
+ end
84
+
85
+ private
86
+
87
+ attr_reader :records, :payload, :successes, :errors
88
+
89
+ def handler
90
+ self.class.handler_class.new(payload: payload, successes: successes,
91
+ errors: errors)
92
+ end
93
+
94
+ def row_processor(record)
95
+ self.class.row_processor_class.new(record, payload: payload)
96
+ end
97
+ end
98
+ end
@@ -1,26 +1,13 @@
1
+ require 'active_job'
2
+
1
3
  class BulkProcessor
4
+ # ActiveJob to handle processing the CSV in the background
2
5
  class Job < ActiveJob::Base
3
6
  queue_as 'bulk_processor'
4
7
 
5
- def perform(records, item_proccessor, handler, payload)
6
- item_proccessor_class = item_proccessor.constantize
7
- handler_class = handler.constantize
8
-
9
- successes = {}
10
- failures = {}
11
- records.each_with_index do |record, index|
12
- processor = item_proccessor_class.new(record, payload)
13
- processor.process!
14
- if processor.success?
15
- successes[index] = processor.messages
16
- else
17
- failures[index] = processor.messages
18
- end
19
- end
20
- handler_class.complete(payload, successes, failures, nil)
21
- rescue Exception => exception
22
- handler_class.complete(payload, successes, failures, exception)
23
- raise unless exception.is_a?(StandardError)
8
+ def perform(records, processor_class, payload)
9
+ processor = processor_class.constantize.new(records, payload: payload)
10
+ processor.start
24
11
  end
25
12
  end
26
13
  end
@@ -0,0 +1,12 @@
1
+ class BulkProcessor
2
+ class NoOpHandler
3
+ def initialize(payload:, successes:, errors:)
4
+ end
5
+
6
+ def complete!
7
+ end
8
+
9
+ def fail!(fatal_error)
10
+ end
11
+ end
12
+ end
@@ -1,11 +1,20 @@
1
+ require 'csv'
2
+
1
3
  class BulkProcessor
4
+ # A Wrapper on CSV that validates column headers.
2
5
  class ValidatedCSV
3
6
  PARSING_OPTIONS = { headers: true, header_converters: :downcase }
4
7
  private_constant :PARSING_OPTIONS
5
8
 
9
+ # This cryptic message usually just means that the header row contains a
10
+ # blank field; in ruby ~> 2.1.5 It is the error message for a NoMethodError
11
+ # raised when parsing a CSV.
6
12
  BAD_HEADERS_ERROR_MSG = "undefined method `encode' for nil:NilClass"
7
13
  private_constant :BAD_HEADERS_ERROR_MSG
8
14
 
15
+ MISSING_COLUMN_MESSAGE = 'Missing or malformed column header, is one of them blank?'
16
+ private_constant :MISSING_COLUMN_MESSAGE
17
+
9
18
  attr_reader :errors, :records
10
19
 
11
20
  def initialize(stream, required_headers, optional_headers)
@@ -15,7 +24,12 @@ class BulkProcessor
15
24
  @errors = []
16
25
  end
17
26
 
27
+ # @return [true|false] true iff:
28
+ # * All required columns are present
29
+ # * No column exists that isn't a required or optional column
30
+ # * No column heading is blank
18
31
  def valid?
32
+ return false if csv.nil?
19
33
  @errors = []
20
34
 
21
35
  if missing_headers.any?
@@ -26,20 +40,17 @@ class BulkProcessor
26
40
  errors << "Unrecognized column(s) found: #{extra_headers.join(', ')}"
27
41
  end
28
42
 
29
- unless csv.headers.all?
30
- errors << 'Missing or malformed column header, is one of them blank?'
31
- end
32
- rescue NoMethodError => error
33
- if error.message == BAD_HEADERS_ERROR_MSG
34
- errors << 'Missing or malformed column header, is one of them blank?'
35
- else
36
- raise error
43
+ if csv.headers.any? { |header| header.nil? || header.strip == '' }
44
+ errors << MISSING_COLUMN_MESSAGE
37
45
  end
38
- ensure
39
- return errors.empty?
46
+
47
+ errors.empty?
40
48
  end
41
49
 
50
+ # @return [Array<Hash<String, String>>] a serializable representation of the
51
+ # CSV that will be passed to the background job.
42
52
  def row_hashes
53
+ return [] unless valid?
43
54
  csv.map(&:to_hash)
44
55
  end
45
56
 
@@ -48,7 +59,15 @@ class BulkProcessor
48
59
  attr_reader :stream, :required_headers, :optional_headers
49
60
 
50
61
  def csv
51
- @csv ||= CSV.parse(stream, PARSING_OPTIONS)
62
+ return @csv if instance_variable_defined?('@csv')
63
+ @csv = CSV.parse(stream, PARSING_OPTIONS)
64
+ rescue NoMethodError => error
65
+ if error.message == BAD_HEADERS_ERROR_MSG
66
+ errors << MISSING_COLUMN_MESSAGE
67
+ @csv = nil
68
+ else
69
+ raise error
70
+ end
52
71
  end
53
72
 
54
73
  def missing_headers
@@ -1,3 +1,3 @@
1
1
  class BulkProcessor
2
- VERSION = '0.1.0'
2
+ VERSION = '0.2.0'
3
3
  end
@@ -1,12 +1,10 @@
1
- require 'active_job'
2
- require 'csv'
3
-
4
1
  require 'bulk_processor/config'
5
2
  require 'bulk_processor/job'
6
3
  require 'bulk_processor/stream_encoder'
7
4
  require 'bulk_processor/validated_csv'
8
5
  require 'bulk_processor/version'
9
6
 
7
+ # Process large CSV files in the background.
10
8
  class BulkProcessor
11
9
  class << self
12
10
  def config
@@ -16,31 +14,34 @@ class BulkProcessor
16
14
  def configure
17
15
  yield config
18
16
  end
19
-
20
17
  end
21
18
 
22
- attr_reader :stream, :item_processor, :handler, :payload, :errors
19
+ attr_reader :errors
23
20
 
24
- def initialize(stream, item_processor, handler, payload = {})
21
+ def initialize(stream:, processor_class:, payload: {})
25
22
  @stream = stream
26
- @item_processor = item_processor
27
- @handler = handler
23
+ @processor_class = processor_class
28
24
  @payload = payload
29
25
  @errors = []
30
26
  end
31
27
 
32
- def process
28
+ # Validate the CSV and enqueue if for processing in the background.
29
+ def start
33
30
  csv = ValidatedCSV.new(
34
31
  StreamEncoder.new(stream).encoded,
35
- item_processor.required_columns,
36
- item_processor.optional_columns
32
+ processor_class.required_columns,
33
+ processor_class.optional_columns
37
34
  )
38
35
 
39
36
  if csv.valid?
40
- Job.perform_later(csv.row_hashes, item_processor.to_s, handler.to_s, payload)
37
+ Job.perform_later(csv.row_hashes, processor_class.name, payload)
41
38
  else
42
39
  @errors = csv.errors
43
40
  end
44
41
  @errors.empty?
45
42
  end
43
+
44
+ private
45
+
46
+ attr_reader :stream, :processor_class, :payload
46
47
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulk-processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Collier, Justin Richard
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-14 00:00:00.000000000 Z
11
+ date: 2016-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activejob
@@ -102,7 +102,9 @@ files:
102
102
  - bulk-processor.gemspec
103
103
  - lib/bulk_processor.rb
104
104
  - lib/bulk_processor/config.rb
105
+ - lib/bulk_processor/csv_processor.rb
105
106
  - lib/bulk_processor/job.rb
107
+ - lib/bulk_processor/no_op_handler.rb
106
108
  - lib/bulk_processor/stream_encoder.rb
107
109
  - lib/bulk_processor/validated_csv.rb
108
110
  - lib/bulk_processor/version.rb
@@ -126,8 +128,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
128
  version: '0'
127
129
  requirements: []
128
130
  rubyforge_project:
129
- rubygems_version: 2.4.3
131
+ rubygems_version: 2.4.5
130
132
  signing_key:
131
133
  specification_version: 4
132
134
  summary: Background process CSV data
133
135
  test_files: []
136
+ has_rdoc: