bulk-processor 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 56580f10538cc75b8fbfb8006248905cb0cfeb71
4
- data.tar.gz: 3ce3bc03cf878836f5a768a3be0fad169bfabdc6
3
+ metadata.gz: d7b6dc445bf46f3f35477510449e5f56aed3f854
4
+ data.tar.gz: ef53879ba52375923ba2b55460b5fb217665d68c
5
5
  SHA512:
6
- metadata.gz: 1c4fa2fdf92ec038c73c6eec886a682e9cb82f8fdb1ef0ade84a0eefa0eb1fffbd29cd510c3c555c6456b5ee7e600c2e52999031169dfbd773a73a593ee543ee
7
- data.tar.gz: b32c0129b95fa2a44e7d2dca5455bca2a60137aac6d0f97b260b60e90d8c79d28acd4e91a12f9d9a6b14e0a1ac4735d505b31bea182499f14c9c19991fbfa930
6
+ metadata.gz: a4b9727d06824b5cf68789a4f3fb00d5b039d079201c33f4f68bfdb9ee720a9517df2e78d12614dcf1f1ca2673e7580dfd914b8963d3b463747026ce4681fec6
7
+ data.tar.gz: 67c87c2517515fd5912d05f494ce2d3454409cdb9417466a112b48be00ab9a4d9cfb1b72fe57f4139403bbdc9980803f5e3140405efdae5c1c1ab54339f95e42
data/README.md CHANGED
@@ -52,12 +52,12 @@ class PetCSVProcessor
52
52
  ['favorite_toy', 'talents']
53
53
  end
54
54
 
55
- def initialize(records, payload:)
55
+ def initialize(csv, payload:)
56
56
  # Assign instance variables and do any other setup
57
57
  end
58
58
 
59
59
  def start
60
- # Process the records
60
+ # Process the CSV
61
61
  end
62
62
  end
63
63
  ```
@@ -66,7 +66,7 @@ To account for a common use case, a base `BulkProcessor::CSVProcessor` class is
66
66
  though it must be explicitly required. This base class can be subclassed to build a CSV processor.
67
67
  This base class implements the initializer and `#start` methods and returns an empty set for `.optional_columns`.
68
68
 
69
- The `#start` method iterates over each record, processes it using a `RowProcessor`,
69
+ The `#start` method iterates over each row, processes it using a `RowProcessor`,
70
70
  accumulates the results, which are passed off to a `Handler`. An example
71
71
  implementation could look like:
72
72
 
@@ -95,56 +95,76 @@ class PetCSVProcessor < BulkProcessor::CSVProcessor
95
95
  PetRowProcessor
96
96
  end
97
97
 
98
+ # @return [PostProcessor] a class that implements the PostProcessor role
99
+ def self.post_processor_class
100
+ PetPostProcessor
101
+ end
102
+
98
103
  # @return [Handler] a class that implements the Handler role
99
104
  def self.handler_class
100
105
  PetHandler
101
106
  end
102
107
  end
108
+ ```
103
109
 
104
- class PetRowProcessor
105
- def initialize(record, payload:)
106
- # Assign instance variables and do any other setup
107
- end
108
-
110
+ ```ruby
111
+ class PetRowProcessor < BulkProcessor::CSVProcessor::RowProcessor
109
112
  # Process the row, e.g. create a new record in the DB, send an email, etc
110
113
  def process!
111
- pet = Pet.new(record)
114
+ pet = Pet.new(row)
112
115
  if pet.save
113
- @success = true
116
+ self.successful = true
114
117
  else
115
- @messages = pet.errors.full_messages
118
+ messages.concat(pet.errors.full_messages)
116
119
  end
117
120
  end
118
121
 
119
- # @return [true|false] true iff the item was processed completely
120
- def success?
121
- @success == true
122
+ # Setting these allow us to identify error messages by these key/values for
123
+ # a row, rather than using the row number
124
+ def primary_keys
125
+ ['species', 'name']
122
126
  end
127
+ end
128
+ ```
123
129
 
124
- # @return [Array<String>] list of messages for this item to pass back to the
125
- # completion handler.
126
- def messages
127
- @messages || []
130
+ ```ruby
131
+ class PetPostProcessor
132
+ attr_reader :results
133
+
134
+ def initialize(row_processors)
135
+ # Assign instance variables and do any other setup
136
+ end
137
+
138
+ def start
139
+ cat_count = 0
140
+ @results = []
141
+ row_processors.each do |row_processor|
142
+ cat_count += 1 if row_processor.cat?
143
+ end
144
+
145
+ if cat_count > 2
146
+ @results << BulkProcessor::CSVProcessor::Result.new(messages: ['Too many cats!'],
147
+ successful: false)
148
+ end
128
149
  end
129
150
  end
151
+ ```
130
152
 
153
+ ```ruby
131
154
  class PetHandler
132
155
  # @param payload [Hash] the payload passed into 'BulkProcessor.process', can
133
156
  # be used to pass metadata around, e.g. the email address to send a
134
157
  # completion report to
135
- # @param successes [Hash<Fixnum, Array<String>>] keys are all successfully
136
- # processed rows, indexed from 0 (row 1 in the CSV is index 0 in this hash)
137
- # The values are arrays of messages the item processor generated for the row
138
- # (may be empty), e.g. { 0 => [], 1 => ['pet ID = 22 created'] }
139
- # @param errors [Hash<Fixnum, Array<String>>] similar structure to successes,
140
- # but rows that were not completed successfully.
141
- def initialize(payload:, successes:, errors:)
158
+ # @param results [Array<BulkProcessor::CSVProcessor::RowProcessor>] results
159
+ # for processing the rows (there will be one pre row in the CSV plus zero
160
+ # or more from post-processing)
161
+ def initialize(payload:, results:)
142
162
  # Assign instance variables and do any other setup
143
163
  end
144
164
 
145
165
  # Notify the owner that their pets were processed
146
166
  def complete!
147
- OwnerMailer.competed(successes, errors)
167
+ OwnerMailer.completed(results, payload)
148
168
  end
149
169
 
150
170
  # Notify the owner that processing failed
@@ -152,7 +172,7 @@ class PetHandler
152
172
  # @param fatal_error [StandardError] if nil, then all rows were processed,
153
173
  # else the error that was raise is passed in here
154
174
  def fail!(fatal_error)
155
- OwnerMailer.failed(fatal_error)
175
+ OwnerMailer.failed(fatal_error, payload)
156
176
  end
157
177
  end
158
178
  ```
@@ -163,7 +183,7 @@ Putting it all together
163
183
  processor = BulkProcessor.new(
164
184
  stream: file_stream,
165
185
  processor_class: PetCSVProcessor,
166
- payload: {recipient: current_user.email}
186
+ payload: { recipient: current_user.email }
167
187
  )
168
188
  if processor.start
169
189
  # The job has been enqueued, go get a coffee and wait
@@ -173,6 +193,17 @@ else
173
193
  end
174
194
  ```
175
195
 
196
+ ### BulkProcessor::CSVProcessor::Result
197
+
198
+ The result instances passed from BulkProcessor::CSVProcessor to the Handler
199
+ respond to the following messages:
200
+
201
+ * `#messages [Array<String>]` - zero or more messages generated when processing the row
202
+ * `#row_num [Fixnum|nil]` - the CSV row number (starting with 2) or nil if result is from post-processing
203
+ * `#primary_attributes [Hash]` - a set of values that can be used to identify which row the messages are for.
204
+ You must override `#primary_keys` to use this.
205
+ * `#successful?` - true iff the processing happened with no errors
206
+
176
207
  ## Development
177
208
 
178
209
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -0,0 +1,15 @@
1
+ class BulkProcessor
2
+ class CSVProcessor
3
+ # A null object implementation of the Handler role
4
+ class NoOpHandler
5
+ def initialize(payload:, results:)
6
+ end
7
+
8
+ def complete!
9
+ end
10
+
11
+ def fail!(fatal_error)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ class BulkProcessor
2
+ class CSVProcessor
3
+ # A null object implementation of the PostProcessor role
4
+ class NoOpPostProcessor
5
+ def initialize(row_processors)
6
+ end
7
+
8
+ def start
9
+ end
10
+
11
+ def results
12
+ []
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,20 @@
1
+ class BulkProcessor
2
+ class CSVProcessor
3
+ # A container for messages generated by processing that need to be passed
4
+ # back to the handler.
5
+ class Result
6
+ attr_reader :messages, :primary_attributes, :row_num
7
+
8
+ def initialize(messages:, successful:, row_num: nil, primary_attributes: nil)
9
+ @messages = messages
10
+ @successful = successful
11
+ @row_num = row_num
12
+ @primary_attributes = primary_attributes
13
+ end
14
+
15
+ def successful?
16
+ @successful
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,61 @@
1
+ class BulkProcessor
2
+ class CSVProcessor
3
+ # An abstract implementation of the RowProcessor role. This class implements
4
+ # `#results` by returning an array of `Results`. To subclass, just implement
5
+ # `#process` to handle the row.
6
+ #
7
+ # The row will be considered a failure by default. After a row is successfully
8
+ # processed, set `self.successful = true`. If there are any messages that
9
+ # should be passed back to the Handler, add them to the `#errors` array.
10
+ #
11
+ # You can optionally override `#primary_keys` so that the result returned
12
+ # has more natural identifiers than just the row number. For example, you
13
+ # setting this to ['species', 'name'] (for the PetRowProcessor example from
14
+ # the README), the result would have `#primary_attributes` like
15
+ #
16
+ # { 'species' => 'dog', 'name' => 'Fido' }
17
+ #
18
+ class RowProcessor
19
+ attr_reader :messages
20
+
21
+ def initialize(row, row_num:, payload:)
22
+ @row = row
23
+ @row_num = row_num
24
+ @payload = payload
25
+ @successful = false
26
+ @messages = []
27
+ end
28
+
29
+ def process!
30
+ raise NotImplementedError,
31
+ "#{self.class.name} must implement #{__method__}"
32
+ end
33
+
34
+ def successful?
35
+ @successful
36
+ end
37
+
38
+ def result
39
+ Result.new(messages: messages, row_num: row_num,
40
+ primary_attributes: primary_attrs, successful: @successful)
41
+ end
42
+
43
+ private
44
+
45
+ attr_reader :row, :row_num, :payload
46
+ attr_writer :successful
47
+
48
+ # Override this with an array of column names that can be used to uniquely
49
+ # identify a row, if you'd prefer to not identify rows by row number
50
+ def primary_keys
51
+ []
52
+ end
53
+
54
+ # @return [Hash<String, String>] the set of primary keys and their values
55
+ # for this row
56
+ def primary_attrs
57
+ row.slice(*primary_keys)
58
+ end
59
+ end
60
+ end
61
+ end
@@ -1,4 +1,7 @@
1
- require_relative 'no_op_handler'
1
+ require_relative 'csv_processor/no_op_handler'
2
+ require_relative 'csv_processor/no_op_post_processor'
3
+ require_relative 'csv_processor/result'
4
+ require_relative 'csv_processor/row_processor'
2
5
 
3
6
  class BulkProcessor
4
7
  # An abstract implmentation of the CSVProcessor role. Provides
@@ -9,7 +12,7 @@ class BulkProcessor
9
12
  #
10
13
  # The common use case cover by this class' implementation of `#start` is
11
14
  #
12
- # 1. Iteratively process each record
15
+ # 1. Iteratively process each row
13
16
  # 2. Accumulate the results (did the processing succeed? what were the error
14
17
  # messages?)
15
18
  # 3. Send the results to an instance of the Handler role.
@@ -26,6 +29,12 @@ class BulkProcessor
26
29
  # The `required_columns` method must still be implemented in a subclass
27
30
  #
28
31
  class CSVProcessor
32
+ # Since the first data column in a CSV is row 2, but will have index 0 in
33
+ # the items array, we need to offset the index by 2 when we add a row
34
+ # identifier to all error messages.
35
+ FIRST_ROW_OFFSET = 2
36
+ private_constant :FIRST_ROW_OFFSET
37
+
29
38
  # @return [RowProcessor] a class that implements the RowProcessor interface
30
39
  def self.row_processor_class
31
40
  raise NotImplementedError,
@@ -37,6 +46,11 @@ class BulkProcessor
37
46
  NoOpHandler
38
47
  end
39
48
 
49
+ # @return [PostProcessor] a class that implements the PostProcessor role
50
+ def self.post_processor_class
51
+ NoOpPostProcessor
52
+ end
53
+
40
54
  # @return [Array<String>] column headers that must be present
41
55
  def self.required_columns
42
56
  raise NotImplementedError,
@@ -51,27 +65,22 @@ class BulkProcessor
51
65
  []
52
66
  end
53
67
 
54
- def initialize(records, payload: {})
55
- @records = records
68
+ def initialize(csv, payload: {})
56
69
  @payload = payload
57
- @successes = {}
58
- @errors = {}
70
+ @row_processors = csv.map.with_index(&method(:row_processor))
71
+ @results = []
59
72
  end
60
73
 
61
- # Iteratively process each record, accumulate the results, and pass those
62
- # off to the handler. If an unrescued error is raised for any record,
63
- # processing will halt for all remaining records and the `#fail!` will be
74
+ # Iteratively process each row, accumulate the results, and pass those
75
+ # off to the handler. If an unrescued error is raised for any row,
76
+ # processing will halt for all remaining rows and the `#fail!` will be
64
77
  # invoked on the handler.
65
78
  def start
66
- records.each_with_index do |record, index|
67
- processor = row_processor(record)
79
+ row_processors.each do |processor|
68
80
  processor.process!
69
- if processor.success?
70
- successes[index] = processor.messages
71
- else
72
- errors[index] = processor.messages
73
- end
81
+ results << processor.result
74
82
  end
83
+ post_processes
75
84
  handler.complete!
76
85
  rescue Exception => exception
77
86
  handler.fail!(exception)
@@ -84,15 +93,21 @@ class BulkProcessor
84
93
 
85
94
  private
86
95
 
87
- attr_reader :records, :payload, :successes, :errors
96
+ attr_reader :row_processors, :payload, :results
88
97
 
89
98
  def handler
90
- self.class.handler_class.new(payload: payload, successes: successes,
91
- errors: errors)
99
+ self.class.handler_class.new(payload: payload, results: results)
100
+ end
101
+
102
+ def row_processor(row, index)
103
+ row_num = index + FIRST_ROW_OFFSET
104
+ self.class.row_processor_class.new(row, row_num: row_num, payload: payload)
92
105
  end
93
106
 
94
- def row_processor(record)
95
- self.class.row_processor_class.new(record, payload: payload)
107
+ def post_processes
108
+ post_processor = self.class.post_processor_class.new(row_processors)
109
+ post_processor.start
110
+ results.concat(post_processor.results)
96
111
  end
97
112
  end
98
113
  end
@@ -2,7 +2,7 @@ class BulkProcessor
2
2
  # Force encode a stream into UTF-8 by removing invalid and undefined
3
3
  # characters.
4
4
  class StreamEncoder
5
- ENCODING_OPTIONS = { undef: :replace, invalid: :replace, replace: '' }
5
+ ENCODING_OPTIONS = { undef: :replace, invalid: :replace, replace: '' }.freeze
6
6
  private_constant :ENCODING_OPTIONS
7
7
 
8
8
  def initialize(stream)
@@ -3,19 +3,20 @@ require 'csv'
3
3
  class BulkProcessor
4
4
  # A Wrapper on CSV that validates column headers.
5
5
  class ValidatedCSV
6
- PARSING_OPTIONS = { headers: true, header_converters: :downcase }
6
+ PARSING_OPTIONS = { headers: true, header_converters: :downcase }.freeze
7
7
  private_constant :PARSING_OPTIONS
8
8
 
9
9
  # This cryptic message usually just means that the header row contains a
10
10
  # blank field; in ruby ~> 2.1.5 It is the error message for a NoMethodError
11
11
  # raised when parsing a CSV.
12
- BAD_HEADERS_ERROR_MSG = "undefined method `encode' for nil:NilClass"
12
+ BAD_HEADERS_ERROR_MSG = "undefined method `encode' for nil:NilClass".freeze
13
13
  private_constant :BAD_HEADERS_ERROR_MSG
14
14
 
15
- MISSING_COLUMN_MESSAGE = 'Missing or malformed column header, is one of them blank?'
15
+ MISSING_COLUMN_MESSAGE =
16
+ 'Missing or malformed column header, is one of them blank?'.freeze
16
17
  private_constant :MISSING_COLUMN_MESSAGE
17
18
 
18
- attr_reader :errors, :records
19
+ attr_reader :errors
19
20
 
20
21
  def initialize(stream, required_headers, optional_headers)
21
22
  @stream = stream
@@ -33,11 +34,11 @@ class BulkProcessor
33
34
  @errors = []
34
35
 
35
36
  if missing_headers.any?
36
- errors << "Missing required column(s): #{missing_headers.join(', ')}"
37
+ errors << "Missing required column(s): #{missing_headers.join(', ')}".freeze
37
38
  end
38
39
 
39
40
  if extra_headers.any?
40
- errors << "Unrecognized column(s) found: #{extra_headers.join(', ')}"
41
+ errors << "Unrecognized column(s) found: #{extra_headers.join(', ')}".freeze
41
42
  end
42
43
 
43
44
  if csv.headers.any? { |header| header.nil? || header.strip == '' }
@@ -1,3 +1,3 @@
1
1
  class BulkProcessor
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulk-processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Collier, Justin Richard
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-15 00:00:00.000000000 Z
11
+ date: 2016-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activejob
@@ -103,8 +103,11 @@ files:
103
103
  - lib/bulk_processor.rb
104
104
  - lib/bulk_processor/config.rb
105
105
  - lib/bulk_processor/csv_processor.rb
106
+ - lib/bulk_processor/csv_processor/no_op_handler.rb
107
+ - lib/bulk_processor/csv_processor/no_op_post_processor.rb
108
+ - lib/bulk_processor/csv_processor/result.rb
109
+ - lib/bulk_processor/csv_processor/row_processor.rb
106
110
  - lib/bulk_processor/job.rb
107
- - lib/bulk_processor/no_op_handler.rb
108
111
  - lib/bulk_processor/stream_encoder.rb
109
112
  - lib/bulk_processor/validated_csv.rb
110
113
  - lib/bulk_processor/version.rb
@@ -1,12 +0,0 @@
1
- class BulkProcessor
2
- class NoOpHandler
3
- def initialize(payload:, successes:, errors:)
4
- end
5
-
6
- def complete!
7
- end
8
-
9
- def fail!(fatal_error)
10
- end
11
- end
12
- end