bulk-processor 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 56580f10538cc75b8fbfb8006248905cb0cfeb71
4
- data.tar.gz: 3ce3bc03cf878836f5a768a3be0fad169bfabdc6
3
+ metadata.gz: d7b6dc445bf46f3f35477510449e5f56aed3f854
4
+ data.tar.gz: ef53879ba52375923ba2b55460b5fb217665d68c
5
5
  SHA512:
6
- metadata.gz: 1c4fa2fdf92ec038c73c6eec886a682e9cb82f8fdb1ef0ade84a0eefa0eb1fffbd29cd510c3c555c6456b5ee7e600c2e52999031169dfbd773a73a593ee543ee
7
- data.tar.gz: b32c0129b95fa2a44e7d2dca5455bca2a60137aac6d0f97b260b60e90d8c79d28acd4e91a12f9d9a6b14e0a1ac4735d505b31bea182499f14c9c19991fbfa930
6
+ metadata.gz: a4b9727d06824b5cf68789a4f3fb00d5b039d079201c33f4f68bfdb9ee720a9517df2e78d12614dcf1f1ca2673e7580dfd914b8963d3b463747026ce4681fec6
7
+ data.tar.gz: 67c87c2517515fd5912d05f494ce2d3454409cdb9417466a112b48be00ab9a4d9cfb1b72fe57f4139403bbdc9980803f5e3140405efdae5c1c1ab54339f95e42
data/README.md CHANGED
@@ -52,12 +52,12 @@ class PetCSVProcessor
52
52
  ['favorite_toy', 'talents']
53
53
  end
54
54
 
55
- def initialize(records, payload:)
55
+ def initialize(csv, payload:)
56
56
  # Assign instance variables and do any other setup
57
57
  end
58
58
 
59
59
  def start
60
- # Process the records
60
+ # Process the CSV
61
61
  end
62
62
  end
63
63
  ```
@@ -66,7 +66,7 @@ To account for a common use case, a base `BulkProcessor::CSVProcessor` class is
66
66
  though it must be explicitly required. This base class can be subclassed to build a CSV processor.
67
67
  This base class implements the initializer and `#start` methods and returns an empty set for `.optional_columns`.
68
68
 
69
- The `#start` method iterates over each record, processes it using a `RowProcessor`,
69
+ The `#start` method iterates over each row, processes it using a `RowProcessor`,
70
70
  accumulates the results, which are passed off to a `Handler`. An example
71
71
  implementation could look like:
72
72
 
@@ -95,56 +95,76 @@ class PetCSVProcessor < BulkProcessor::CSVProcessor
95
95
  PetRowProcessor
96
96
  end
97
97
 
98
+ # @return [PostProcessor] a class that implements the PostProcessor role
99
+ def self.post_processor_class
100
+ PetPostProcessor
101
+ end
102
+
98
103
  # @return [Handler] a class that implements the Handler role
99
104
  def self.handler_class
100
105
  PetHandler
101
106
  end
102
107
  end
108
+ ```
103
109
 
104
- class PetRowProcessor
105
- def initialize(record, payload:)
106
- # Assign instance variables and do any other setup
107
- end
108
-
110
+ ```ruby
111
+ class PetRowProcessor < BulkProcessor::CSVProcessor::RowProcessor
109
112
  # Process the row, e.g. create a new record in the DB, send an email, etc
110
113
  def process!
111
- pet = Pet.new(record)
114
+ pet = Pet.new(row)
112
115
  if pet.save
113
- @success = true
116
+ self.successful = true
114
117
  else
115
- @messages = pet.errors.full_messages
118
+ messages.concat(pet.errors.full_messages)
116
119
  end
117
120
  end
118
121
 
119
- # @return [true|false] true iff the item was processed completely
120
- def success?
121
- @success == true
122
+ # Setting these allow us to identify error messages by these key/values for
123
+ # a row, rather than using the row number
124
+ def primary_keys
125
+ ['species', 'name']
122
126
  end
127
+ end
128
+ ```
123
129
 
124
- # @return [Array<String>] list of messages for this item to pass back to the
125
- # completion handler.
126
- def messages
127
- @messages || []
130
+ ```ruby
131
+ class PetPostProcessor
132
+ attr_reader :results
133
+
134
+ def initialize(row_processors)
135
+ # Assign instance variables and do any other setup
136
+ end
137
+
138
+ def start
139
+ cat_count = 0
140
+ @results = []
141
+ row_processors.each do |row_processor|
142
+ cat_count += 1 if row_processor.cat?
143
+ end
144
+
145
+ if cat_count > 2
146
+ @results << BulkProcessor::CSVProcessor::Result.new(messages: ['Too many cats!'],
147
+ successful: false)
148
+ end
128
149
  end
129
150
  end
151
+ ```
130
152
 
153
+ ```ruby
131
154
  class PetHandler
132
155
  # @param payload [Hash] the payload passed into 'BulkProcessor.process', can
133
156
  # be used to pass metadata around, e.g. the email address to send a
134
157
  # completion report to
135
- # @param successes [Hash<Fixnum, Array<String>>] keys are all successfully
136
- # processed rows, indexed from 0 (row 1 in the CSV is index 0 in this hash)
137
- # The values are arrays of messages the item processor generated for the row
138
- # (may be empty), e.g. { 0 => [], 1 => ['pet ID = 22 created'] }
139
- # @param errors [Hash<Fixnum, Array<String>>] similar structure to successes,
140
- # but rows that were not completed successfully.
141
- def initialize(payload:, successes:, errors:)
158
+ # @param results [Array<BulkProcessor::CSVProcessor::RowProcessor>] results
159
+ # for processing the rows (there will be one pre row in the CSV plus zero
160
+ # or more from post-processing)
161
+ def initialize(payload:, results:)
142
162
  # Assign instance variables and do any other setup
143
163
  end
144
164
 
145
165
  # Notify the owner that their pets were processed
146
166
  def complete!
147
- OwnerMailer.competed(successes, errors)
167
+ OwnerMailer.completed(results, payload)
148
168
  end
149
169
 
150
170
  # Notify the owner that processing failed
@@ -152,7 +172,7 @@ class PetHandler
152
172
  # @param fatal_error [StandardError] if nil, then all rows were processed,
153
173
  # else the error that was raise is passed in here
154
174
  def fail!(fatal_error)
155
- OwnerMailer.failed(fatal_error)
175
+ OwnerMailer.failed(fatal_error, payload)
156
176
  end
157
177
  end
158
178
  ```
@@ -163,7 +183,7 @@ Putting it all together
163
183
  processor = BulkProcessor.new(
164
184
  stream: file_stream,
165
185
  processor_class: PetCSVProcessor,
166
- payload: {recipient: current_user.email}
186
+ payload: { recipient: current_user.email }
167
187
  )
168
188
  if processor.start
169
189
  # The job has been enqueued, go get a coffee and wait
@@ -173,6 +193,17 @@ else
173
193
  end
174
194
  ```
175
195
 
196
+ ### BulkProcessor::CSVProcessor::Result
197
+
198
+ The result instances passed from BulkProcessor::CSVProcessor to the Handler
199
+ respond to the following messages:
200
+
201
+ * `#messages [Array<String>]` - zero or more messages generated when processing the row
202
+ * `#row_num [Fixnum|nil]` - the CSV row number (starting with 2) or nil if result is from post-processing
203
+ * `#primary_attributes [Hash]` - a set of values that can be used to identify which row the messages are for.
204
+ You must override `#primary_keys` to use this.
205
+ * `#successful?` - true iff the processing happened with no errors
206
+
176
207
  ## Development
177
208
 
178
209
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -0,0 +1,15 @@
1
+ class BulkProcessor
2
+ class CSVProcessor
3
+ # A null object implementation of the Handler role
4
+ class NoOpHandler
5
+ def initialize(payload:, results:)
6
+ end
7
+
8
+ def complete!
9
+ end
10
+
11
+ def fail!(fatal_error)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ class BulkProcessor
2
+ class CSVProcessor
3
+ # A null object implementation of the PostProcessor role
4
+ class NoOpPostProcessor
5
+ def initialize(row_processors)
6
+ end
7
+
8
+ def start
9
+ end
10
+
11
+ def results
12
+ []
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,20 @@
1
+ class BulkProcessor
2
+ class CSVProcessor
3
+ # A container for messages generated by processing that need to be passed
4
+ # back to the handler.
5
+ class Result
6
+ attr_reader :messages, :primary_attributes, :row_num
7
+
8
+ def initialize(messages:, successful:, row_num: nil, primary_attributes: nil)
9
+ @messages = messages
10
+ @successful = successful
11
+ @row_num = row_num
12
+ @primary_attributes = primary_attributes
13
+ end
14
+
15
+ def successful?
16
+ @successful
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,61 @@
1
+ class BulkProcessor
2
+ class CSVProcessor
3
+ # An abstract implementation of the RowProcessor role. This class implements
4
+ # `#results` by returning an array of `Results`. To subclass, just implement
5
+ # `#process` to handle the row.
6
+ #
7
+ # The row will be considered a failure by default. After a row is successfully
8
+ # processed, set `self.successful = true`. If there are any messages that
9
+ # should be passed back to the Handler, add them to the `#errors` array.
10
+ #
11
+ # You can optionally override `#primary_keys` so that the result returned
12
+ # has more natural identifiers than just the row number. For example, you
13
+ # setting this to ['species', 'name'] (for the PetRowProcessor example from
14
+ # the README), the result would have `#primary_attributes` like
15
+ #
16
+ # { 'species' => 'dog', 'name' => 'Fido' }
17
+ #
18
+ class RowProcessor
19
+ attr_reader :messages
20
+
21
+ def initialize(row, row_num:, payload:)
22
+ @row = row
23
+ @row_num = row_num
24
+ @payload = payload
25
+ @successful = false
26
+ @messages = []
27
+ end
28
+
29
+ def process!
30
+ raise NotImplementedError,
31
+ "#{self.class.name} must implement #{__method__}"
32
+ end
33
+
34
+ def successful?
35
+ @successful
36
+ end
37
+
38
+ def result
39
+ Result.new(messages: messages, row_num: row_num,
40
+ primary_attributes: primary_attrs, successful: @successful)
41
+ end
42
+
43
+ private
44
+
45
+ attr_reader :row, :row_num, :payload
46
+ attr_writer :successful
47
+
48
+ # Override this with an array of column names that can be used to uniquely
49
+ # identify a row, if you'd prefer to not identify rows by row number
50
+ def primary_keys
51
+ []
52
+ end
53
+
54
+ # @return [Hash<String, String>] the set of primary keys and their values
55
+ # for this row
56
+ def primary_attrs
57
+ row.slice(*primary_keys)
58
+ end
59
+ end
60
+ end
61
+ end
@@ -1,4 +1,7 @@
1
- require_relative 'no_op_handler'
1
+ require_relative 'csv_processor/no_op_handler'
2
+ require_relative 'csv_processor/no_op_post_processor'
3
+ require_relative 'csv_processor/result'
4
+ require_relative 'csv_processor/row_processor'
2
5
 
3
6
  class BulkProcessor
4
7
  # An abstract implmentation of the CSVProcessor role. Provides
@@ -9,7 +12,7 @@ class BulkProcessor
9
12
  #
10
13
  # The common use case cover by this class' implementation of `#start` is
11
14
  #
12
- # 1. Iteratively process each record
15
+ # 1. Iteratively process each row
13
16
  # 2. Accumulate the results (did the processing succeed? what were the error
14
17
  # messages?)
15
18
  # 3. Send the results to an instance of the Handler role.
@@ -26,6 +29,12 @@ class BulkProcessor
26
29
  # The `required_columns` method must still be implemented in a subclass
27
30
  #
28
31
  class CSVProcessor
32
+ # Since the first data column in a CSV is row 2, but will have index 0 in
33
+ # the items array, we need to offset the index by 2 when we add a row
34
+ # identifier to all error messages.
35
+ FIRST_ROW_OFFSET = 2
36
+ private_constant :FIRST_ROW_OFFSET
37
+
29
38
  # @return [RowProcessor] a class that implements the RowProcessor interface
30
39
  def self.row_processor_class
31
40
  raise NotImplementedError,
@@ -37,6 +46,11 @@ class BulkProcessor
37
46
  NoOpHandler
38
47
  end
39
48
 
49
+ # @return [PostProcessor] a class that implements the PostProcessor role
50
+ def self.post_processor_class
51
+ NoOpPostProcessor
52
+ end
53
+
40
54
  # @return [Array<String>] column headers that must be present
41
55
  def self.required_columns
42
56
  raise NotImplementedError,
@@ -51,27 +65,22 @@ class BulkProcessor
51
65
  []
52
66
  end
53
67
 
54
- def initialize(records, payload: {})
55
- @records = records
68
+ def initialize(csv, payload: {})
56
69
  @payload = payload
57
- @successes = {}
58
- @errors = {}
70
+ @row_processors = csv.map.with_index(&method(:row_processor))
71
+ @results = []
59
72
  end
60
73
 
61
- # Iteratively process each record, accumulate the results, and pass those
62
- # off to the handler. If an unrescued error is raised for any record,
63
- # processing will halt for all remaining records and the `#fail!` will be
74
+ # Iteratively process each row, accumulate the results, and pass those
75
+ # off to the handler. If an unrescued error is raised for any row,
76
+ # processing will halt for all remaining rows and the `#fail!` will be
64
77
  # invoked on the handler.
65
78
  def start
66
- records.each_with_index do |record, index|
67
- processor = row_processor(record)
79
+ row_processors.each do |processor|
68
80
  processor.process!
69
- if processor.success?
70
- successes[index] = processor.messages
71
- else
72
- errors[index] = processor.messages
73
- end
81
+ results << processor.result
74
82
  end
83
+ post_processes
75
84
  handler.complete!
76
85
  rescue Exception => exception
77
86
  handler.fail!(exception)
@@ -84,15 +93,21 @@ class BulkProcessor
84
93
 
85
94
  private
86
95
 
87
- attr_reader :records, :payload, :successes, :errors
96
+ attr_reader :row_processors, :payload, :results
88
97
 
89
98
  def handler
90
- self.class.handler_class.new(payload: payload, successes: successes,
91
- errors: errors)
99
+ self.class.handler_class.new(payload: payload, results: results)
100
+ end
101
+
102
+ def row_processor(row, index)
103
+ row_num = index + FIRST_ROW_OFFSET
104
+ self.class.row_processor_class.new(row, row_num: row_num, payload: payload)
92
105
  end
93
106
 
94
- def row_processor(record)
95
- self.class.row_processor_class.new(record, payload: payload)
107
+ def post_processes
108
+ post_processor = self.class.post_processor_class.new(row_processors)
109
+ post_processor.start
110
+ results.concat(post_processor.results)
96
111
  end
97
112
  end
98
113
  end
@@ -2,7 +2,7 @@ class BulkProcessor
2
2
  # Force encode a stream into UTF-8 by removing invalid and undefined
3
3
  # characters.
4
4
  class StreamEncoder
5
- ENCODING_OPTIONS = { undef: :replace, invalid: :replace, replace: '' }
5
+ ENCODING_OPTIONS = { undef: :replace, invalid: :replace, replace: '' }.freeze
6
6
  private_constant :ENCODING_OPTIONS
7
7
 
8
8
  def initialize(stream)
@@ -3,19 +3,20 @@ require 'csv'
3
3
  class BulkProcessor
4
4
  # A Wrapper on CSV that validates column headers.
5
5
  class ValidatedCSV
6
- PARSING_OPTIONS = { headers: true, header_converters: :downcase }
6
+ PARSING_OPTIONS = { headers: true, header_converters: :downcase }.freeze
7
7
  private_constant :PARSING_OPTIONS
8
8
 
9
9
  # This cryptic message usually just means that the header row contains a
10
10
  # blank field; in ruby ~> 2.1.5 It is the error message for a NoMethodError
11
11
  # raised when parsing a CSV.
12
- BAD_HEADERS_ERROR_MSG = "undefined method `encode' for nil:NilClass"
12
+ BAD_HEADERS_ERROR_MSG = "undefined method `encode' for nil:NilClass".freeze
13
13
  private_constant :BAD_HEADERS_ERROR_MSG
14
14
 
15
- MISSING_COLUMN_MESSAGE = 'Missing or malformed column header, is one of them blank?'
15
+ MISSING_COLUMN_MESSAGE =
16
+ 'Missing or malformed column header, is one of them blank?'.freeze
16
17
  private_constant :MISSING_COLUMN_MESSAGE
17
18
 
18
- attr_reader :errors, :records
19
+ attr_reader :errors
19
20
 
20
21
  def initialize(stream, required_headers, optional_headers)
21
22
  @stream = stream
@@ -33,11 +34,11 @@ class BulkProcessor
33
34
  @errors = []
34
35
 
35
36
  if missing_headers.any?
36
- errors << "Missing required column(s): #{missing_headers.join(', ')}"
37
+ errors << "Missing required column(s): #{missing_headers.join(', ')}".freeze
37
38
  end
38
39
 
39
40
  if extra_headers.any?
40
- errors << "Unrecognized column(s) found: #{extra_headers.join(', ')}"
41
+ errors << "Unrecognized column(s) found: #{extra_headers.join(', ')}".freeze
41
42
  end
42
43
 
43
44
  if csv.headers.any? { |header| header.nil? || header.strip == '' }
@@ -1,3 +1,3 @@
1
1
  class BulkProcessor
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulk-processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Collier, Justin Richard
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-15 00:00:00.000000000 Z
11
+ date: 2016-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activejob
@@ -103,8 +103,11 @@ files:
103
103
  - lib/bulk_processor.rb
104
104
  - lib/bulk_processor/config.rb
105
105
  - lib/bulk_processor/csv_processor.rb
106
+ - lib/bulk_processor/csv_processor/no_op_handler.rb
107
+ - lib/bulk_processor/csv_processor/no_op_post_processor.rb
108
+ - lib/bulk_processor/csv_processor/result.rb
109
+ - lib/bulk_processor/csv_processor/row_processor.rb
106
110
  - lib/bulk_processor/job.rb
107
- - lib/bulk_processor/no_op_handler.rb
108
111
  - lib/bulk_processor/stream_encoder.rb
109
112
  - lib/bulk_processor/validated_csv.rb
110
113
  - lib/bulk_processor/version.rb
@@ -1,12 +0,0 @@
1
- class BulkProcessor
2
- class NoOpHandler
3
- def initialize(payload:, successes:, errors:)
4
- end
5
-
6
- def complete!
7
- end
8
-
9
- def fail!(fatal_error)
10
- end
11
- end
12
- end