bulk-processor 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +59 -28
- data/lib/bulk_processor/csv_processor/no_op_handler.rb +15 -0
- data/lib/bulk_processor/csv_processor/no_op_post_processor.rb +16 -0
- data/lib/bulk_processor/csv_processor/result.rb +20 -0
- data/lib/bulk_processor/csv_processor/row_processor.rb +61 -0
- data/lib/bulk_processor/csv_processor.rb +36 -21
- data/lib/bulk_processor/stream_encoder.rb +1 -1
- data/lib/bulk_processor/validated_csv.rb +7 -6
- data/lib/bulk_processor/version.rb +1 -1
- metadata +6 -3
- data/lib/bulk_processor/no_op_handler.rb +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d7b6dc445bf46f3f35477510449e5f56aed3f854
|
4
|
+
data.tar.gz: ef53879ba52375923ba2b55460b5fb217665d68c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a4b9727d06824b5cf68789a4f3fb00d5b039d079201c33f4f68bfdb9ee720a9517df2e78d12614dcf1f1ca2673e7580dfd914b8963d3b463747026ce4681fec6
|
7
|
+
data.tar.gz: 67c87c2517515fd5912d05f494ce2d3454409cdb9417466a112b48be00ab9a4d9cfb1b72fe57f4139403bbdc9980803f5e3140405efdae5c1c1ab54339f95e42
|
data/README.md
CHANGED
@@ -52,12 +52,12 @@ class PetCSVProcessor
|
|
52
52
|
['favorite_toy', 'talents']
|
53
53
|
end
|
54
54
|
|
55
|
-
def initialize(
|
55
|
+
def initialize(csv, payload:)
|
56
56
|
# Assign instance variables and do any other setup
|
57
57
|
end
|
58
58
|
|
59
59
|
def start
|
60
|
-
# Process the
|
60
|
+
# Process the CSV
|
61
61
|
end
|
62
62
|
end
|
63
63
|
```
|
@@ -66,7 +66,7 @@ To account for a common use case, a base `BulkProcessor::CSVProcessor` class is
|
|
66
66
|
though it must be explicitly required. This base class can be subclassed to build a CSV processor.
|
67
67
|
This base class implements the initializer and `#start` methods and returns an empty set for `.optional_columns`.
|
68
68
|
|
69
|
-
The `#start` method iterates over each
|
69
|
+
The `#start` method iterates over each row, processes it using a `RowProcessor`,
|
70
70
|
accumulates the results, which are passed off to a `Handler`. An example
|
71
71
|
implementation could look like:
|
72
72
|
|
@@ -95,56 +95,76 @@ class PetCSVProcessor < BulkProcessor::CSVProcessor
|
|
95
95
|
PetRowProcessor
|
96
96
|
end
|
97
97
|
|
98
|
+
# @return [PostProcessor] a class that implements the PostProcessor role
|
99
|
+
def self.post_processor_class
|
100
|
+
PetPostProcessor
|
101
|
+
end
|
102
|
+
|
98
103
|
# @return [Handler] a class that implements the Handler role
|
99
104
|
def self.handler_class
|
100
105
|
PetHandler
|
101
106
|
end
|
102
107
|
end
|
108
|
+
```
|
103
109
|
|
104
|
-
|
105
|
-
|
106
|
-
# Assign instance variables and do any other setup
|
107
|
-
end
|
108
|
-
|
110
|
+
```ruby
|
111
|
+
class PetRowProcessor < BulkProcessor::CSVProcessor::RowProcessor
|
109
112
|
# Process the row, e.g. create a new record in the DB, send an email, etc
|
110
113
|
def process!
|
111
|
-
pet = Pet.new(
|
114
|
+
pet = Pet.new(row)
|
112
115
|
if pet.save
|
113
|
-
|
116
|
+
self.successful = true
|
114
117
|
else
|
115
|
-
|
118
|
+
messages.concat(pet.errors.full_messages)
|
116
119
|
end
|
117
120
|
end
|
118
121
|
|
119
|
-
#
|
120
|
-
|
121
|
-
|
122
|
+
# Setting these allow us to identify error messages by these key/values for
|
123
|
+
# a row, rather than using the row number
|
124
|
+
def primary_keys
|
125
|
+
['species', 'name']
|
122
126
|
end
|
127
|
+
end
|
128
|
+
```
|
123
129
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
130
|
+
```ruby
|
131
|
+
class PetPostProcessor
|
132
|
+
attr_reader :results
|
133
|
+
|
134
|
+
def initialize(row_processors)
|
135
|
+
# Assign instance variables and do any other setup
|
136
|
+
end
|
137
|
+
|
138
|
+
def start
|
139
|
+
cat_count = 0
|
140
|
+
@results = []
|
141
|
+
row_processors.each do |row_processor|
|
142
|
+
cat_count += 1 if row_processor.cat?
|
143
|
+
end
|
144
|
+
|
145
|
+
if cat_count > 2
|
146
|
+
@results << BulkProcessor::CSVProcessor::Result.new(messages: ['Too many cats!'],
|
147
|
+
successful: false)
|
148
|
+
end
|
128
149
|
end
|
129
150
|
end
|
151
|
+
```
|
130
152
|
|
153
|
+
```ruby
|
131
154
|
class PetHandler
|
132
155
|
# @param payload [Hash] the payload passed into 'BulkProcessor.process', can
|
133
156
|
# be used to pass metadata around, e.g. the email address to send a
|
134
157
|
# completion report to
|
135
|
-
# @param
|
136
|
-
#
|
137
|
-
#
|
138
|
-
|
139
|
-
# @param errors [Hash<Fixnum, Array<String>>] similar structure to successes,
|
140
|
-
# but rows that were not completed successfully.
|
141
|
-
def initialize(payload:, successes:, errors:)
|
158
|
+
# @param results [Array<BulkProcessor::CSVProcessor::RowProcessor>] results
|
159
|
+
# for processing the rows (there will be one pre row in the CSV plus zero
|
160
|
+
# or more from post-processing)
|
161
|
+
def initialize(payload:, results:)
|
142
162
|
# Assign instance variables and do any other setup
|
143
163
|
end
|
144
164
|
|
145
165
|
# Notify the owner that their pets were processed
|
146
166
|
def complete!
|
147
|
-
OwnerMailer.
|
167
|
+
OwnerMailer.completed(results, payload)
|
148
168
|
end
|
149
169
|
|
150
170
|
# Notify the owner that processing failed
|
@@ -152,7 +172,7 @@ class PetHandler
|
|
152
172
|
# @param fatal_error [StandardError] if nil, then all rows were processed,
|
153
173
|
# else the error that was raise is passed in here
|
154
174
|
def fail!(fatal_error)
|
155
|
-
OwnerMailer.failed(fatal_error)
|
175
|
+
OwnerMailer.failed(fatal_error, payload)
|
156
176
|
end
|
157
177
|
end
|
158
178
|
```
|
@@ -163,7 +183,7 @@ Putting it all together
|
|
163
183
|
processor = BulkProcessor.new(
|
164
184
|
stream: file_stream,
|
165
185
|
processor_class: PetCSVProcessor,
|
166
|
-
payload: {recipient: current_user.email}
|
186
|
+
payload: { recipient: current_user.email }
|
167
187
|
)
|
168
188
|
if processor.start
|
169
189
|
# The job has been enqueued, go get a coffee and wait
|
@@ -173,6 +193,17 @@ else
|
|
173
193
|
end
|
174
194
|
```
|
175
195
|
|
196
|
+
### BulkProcessor::CSVProcessor::Result
|
197
|
+
|
198
|
+
The result instances passed from BulkProcessor::CSVProcessor to the Handler
|
199
|
+
respond to the following messages:
|
200
|
+
|
201
|
+
* `#messages [Array<String>]` - zero or more messages generated when processing the row
|
202
|
+
* `#row_num [Fixnum|nil]` - the CSV row number (starting with 2) or nil if result is from post-processing
|
203
|
+
* `#primary_attributes [Hash]` - a set of values that can be used to identify which row the messages are for.
|
204
|
+
You must override `#primary_keys` to use this.
|
205
|
+
* `#successful?` - true iff the processing happened with no errors
|
206
|
+
|
176
207
|
## Development
|
177
208
|
|
178
209
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
class CSVProcessor
|
3
|
+
# A container for messages generated by processing that need to be passed
|
4
|
+
# back to the handler.
|
5
|
+
class Result
|
6
|
+
attr_reader :messages, :primary_attributes, :row_num
|
7
|
+
|
8
|
+
def initialize(messages:, successful:, row_num: nil, primary_attributes: nil)
|
9
|
+
@messages = messages
|
10
|
+
@successful = successful
|
11
|
+
@row_num = row_num
|
12
|
+
@primary_attributes = primary_attributes
|
13
|
+
end
|
14
|
+
|
15
|
+
def successful?
|
16
|
+
@successful
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class BulkProcessor
|
2
|
+
class CSVProcessor
|
3
|
+
# An abstract implementation of the RowProcessor role. This class implements
|
4
|
+
# `#results` by returning an array of `Results`. To subclass, just implement
|
5
|
+
# `#process` to handle the row.
|
6
|
+
#
|
7
|
+
# The row will be considered a failure by default. After a row is successfully
|
8
|
+
# processed, set `self.successful = true`. If there are any messages that
|
9
|
+
# should be passed back to the Handler, add them to the `#errors` array.
|
10
|
+
#
|
11
|
+
# You can optionally override `#primary_keys` so that the result returned
|
12
|
+
# has more natural identifiers than just the row number. For example, you
|
13
|
+
# setting this to ['species', 'name'] (for the PetRowProcessor example from
|
14
|
+
# the README), the result would have `#primary_attributes` like
|
15
|
+
#
|
16
|
+
# { 'species' => 'dog', 'name' => 'Fido' }
|
17
|
+
#
|
18
|
+
class RowProcessor
|
19
|
+
attr_reader :messages
|
20
|
+
|
21
|
+
def initialize(row, row_num:, payload:)
|
22
|
+
@row = row
|
23
|
+
@row_num = row_num
|
24
|
+
@payload = payload
|
25
|
+
@successful = false
|
26
|
+
@messages = []
|
27
|
+
end
|
28
|
+
|
29
|
+
def process!
|
30
|
+
raise NotImplementedError,
|
31
|
+
"#{self.class.name} must implement #{__method__}"
|
32
|
+
end
|
33
|
+
|
34
|
+
def successful?
|
35
|
+
@successful
|
36
|
+
end
|
37
|
+
|
38
|
+
def result
|
39
|
+
Result.new(messages: messages, row_num: row_num,
|
40
|
+
primary_attributes: primary_attrs, successful: @successful)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
attr_reader :row, :row_num, :payload
|
46
|
+
attr_writer :successful
|
47
|
+
|
48
|
+
# Override this with an array of column names that can be used to uniquely
|
49
|
+
# identify a row, if you'd prefer to not identify rows by row number
|
50
|
+
def primary_keys
|
51
|
+
[]
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Hash<String, String>] the set of primary keys and their values
|
55
|
+
# for this row
|
56
|
+
def primary_attrs
|
57
|
+
row.slice(*primary_keys)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -1,4 +1,7 @@
|
|
1
|
-
require_relative 'no_op_handler'
|
1
|
+
require_relative 'csv_processor/no_op_handler'
|
2
|
+
require_relative 'csv_processor/no_op_post_processor'
|
3
|
+
require_relative 'csv_processor/result'
|
4
|
+
require_relative 'csv_processor/row_processor'
|
2
5
|
|
3
6
|
class BulkProcessor
|
4
7
|
# An abstract implmentation of the CSVProcessor role. Provides
|
@@ -9,7 +12,7 @@ class BulkProcessor
|
|
9
12
|
#
|
10
13
|
# The common use case cover by this class' implementation of `#start` is
|
11
14
|
#
|
12
|
-
# 1. Iteratively process each
|
15
|
+
# 1. Iteratively process each row
|
13
16
|
# 2. Accumulate the results (did the processing succeed? what were the error
|
14
17
|
# messages?)
|
15
18
|
# 3. Send the results to an instance of the Handler role.
|
@@ -26,6 +29,12 @@ class BulkProcessor
|
|
26
29
|
# The `required_columns` method must still be implemented in a subclass
|
27
30
|
#
|
28
31
|
class CSVProcessor
|
32
|
+
# Since the first data column in a CSV is row 2, but will have index 0 in
|
33
|
+
# the items array, we need to offset the index by 2 when we add a row
|
34
|
+
# identifier to all error messages.
|
35
|
+
FIRST_ROW_OFFSET = 2
|
36
|
+
private_constant :FIRST_ROW_OFFSET
|
37
|
+
|
29
38
|
# @return [RowProcessor] a class that implements the RowProcessor interface
|
30
39
|
def self.row_processor_class
|
31
40
|
raise NotImplementedError,
|
@@ -37,6 +46,11 @@ class BulkProcessor
|
|
37
46
|
NoOpHandler
|
38
47
|
end
|
39
48
|
|
49
|
+
# @return [PostProcessor] a class that implements the PostProcessor role
|
50
|
+
def self.post_processor_class
|
51
|
+
NoOpPostProcessor
|
52
|
+
end
|
53
|
+
|
40
54
|
# @return [Array<String>] column headers that must be present
|
41
55
|
def self.required_columns
|
42
56
|
raise NotImplementedError,
|
@@ -51,27 +65,22 @@ class BulkProcessor
|
|
51
65
|
[]
|
52
66
|
end
|
53
67
|
|
54
|
-
def initialize(
|
55
|
-
@records = records
|
68
|
+
def initialize(csv, payload: {})
|
56
69
|
@payload = payload
|
57
|
-
@
|
58
|
-
@
|
70
|
+
@row_processors = csv.map.with_index(&method(:row_processor))
|
71
|
+
@results = []
|
59
72
|
end
|
60
73
|
|
61
|
-
# Iteratively process each
|
62
|
-
# off to the handler. If an unrescued error is raised for any
|
63
|
-
# processing will halt for all remaining
|
74
|
+
# Iteratively process each row, accumulate the results, and pass those
|
75
|
+
# off to the handler. If an unrescued error is raised for any row,
|
76
|
+
# processing will halt for all remaining rows and the `#fail!` will be
|
64
77
|
# invoked on the handler.
|
65
78
|
def start
|
66
|
-
|
67
|
-
processor = row_processor(record)
|
79
|
+
row_processors.each do |processor|
|
68
80
|
processor.process!
|
69
|
-
|
70
|
-
successes[index] = processor.messages
|
71
|
-
else
|
72
|
-
errors[index] = processor.messages
|
73
|
-
end
|
81
|
+
results << processor.result
|
74
82
|
end
|
83
|
+
post_processes
|
75
84
|
handler.complete!
|
76
85
|
rescue Exception => exception
|
77
86
|
handler.fail!(exception)
|
@@ -84,15 +93,21 @@ class BulkProcessor
|
|
84
93
|
|
85
94
|
private
|
86
95
|
|
87
|
-
attr_reader :
|
96
|
+
attr_reader :row_processors, :payload, :results
|
88
97
|
|
89
98
|
def handler
|
90
|
-
self.class.handler_class.new(payload: payload,
|
91
|
-
|
99
|
+
self.class.handler_class.new(payload: payload, results: results)
|
100
|
+
end
|
101
|
+
|
102
|
+
def row_processor(row, index)
|
103
|
+
row_num = index + FIRST_ROW_OFFSET
|
104
|
+
self.class.row_processor_class.new(row, row_num: row_num, payload: payload)
|
92
105
|
end
|
93
106
|
|
94
|
-
def
|
95
|
-
self.class.
|
107
|
+
def post_processes
|
108
|
+
post_processor = self.class.post_processor_class.new(row_processors)
|
109
|
+
post_processor.start
|
110
|
+
results.concat(post_processor.results)
|
96
111
|
end
|
97
112
|
end
|
98
113
|
end
|
@@ -2,7 +2,7 @@ class BulkProcessor
|
|
2
2
|
# Force encode a stream into UTF-8 by removing invalid and undefined
|
3
3
|
# characters.
|
4
4
|
class StreamEncoder
|
5
|
-
ENCODING_OPTIONS = { undef: :replace, invalid: :replace, replace: '' }
|
5
|
+
ENCODING_OPTIONS = { undef: :replace, invalid: :replace, replace: '' }.freeze
|
6
6
|
private_constant :ENCODING_OPTIONS
|
7
7
|
|
8
8
|
def initialize(stream)
|
@@ -3,19 +3,20 @@ require 'csv'
|
|
3
3
|
class BulkProcessor
|
4
4
|
# A Wrapper on CSV that validates column headers.
|
5
5
|
class ValidatedCSV
|
6
|
-
PARSING_OPTIONS = { headers: true, header_converters: :downcase }
|
6
|
+
PARSING_OPTIONS = { headers: true, header_converters: :downcase }.freeze
|
7
7
|
private_constant :PARSING_OPTIONS
|
8
8
|
|
9
9
|
# This cryptic message usually just means that the header row contains a
|
10
10
|
# blank field; in ruby ~> 2.1.5 It is the error message for a NoMethodError
|
11
11
|
# raised when parsing a CSV.
|
12
|
-
BAD_HEADERS_ERROR_MSG = "undefined method `encode' for nil:NilClass"
|
12
|
+
BAD_HEADERS_ERROR_MSG = "undefined method `encode' for nil:NilClass".freeze
|
13
13
|
private_constant :BAD_HEADERS_ERROR_MSG
|
14
14
|
|
15
|
-
MISSING_COLUMN_MESSAGE =
|
15
|
+
MISSING_COLUMN_MESSAGE =
|
16
|
+
'Missing or malformed column header, is one of them blank?'.freeze
|
16
17
|
private_constant :MISSING_COLUMN_MESSAGE
|
17
18
|
|
18
|
-
attr_reader :errors
|
19
|
+
attr_reader :errors
|
19
20
|
|
20
21
|
def initialize(stream, required_headers, optional_headers)
|
21
22
|
@stream = stream
|
@@ -33,11 +34,11 @@ class BulkProcessor
|
|
33
34
|
@errors = []
|
34
35
|
|
35
36
|
if missing_headers.any?
|
36
|
-
errors << "Missing required column(s): #{missing_headers.join(', ')}"
|
37
|
+
errors << "Missing required column(s): #{missing_headers.join(', ')}".freeze
|
37
38
|
end
|
38
39
|
|
39
40
|
if extra_headers.any?
|
40
|
-
errors << "Unrecognized column(s) found: #{extra_headers.join(', ')}"
|
41
|
+
errors << "Unrecognized column(s) found: #{extra_headers.join(', ')}".freeze
|
41
42
|
end
|
42
43
|
|
43
44
|
if csv.headers.any? { |header| header.nil? || header.strip == '' }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulk-processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Collier, Justin Richard
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activejob
|
@@ -103,8 +103,11 @@ files:
|
|
103
103
|
- lib/bulk_processor.rb
|
104
104
|
- lib/bulk_processor/config.rb
|
105
105
|
- lib/bulk_processor/csv_processor.rb
|
106
|
+
- lib/bulk_processor/csv_processor/no_op_handler.rb
|
107
|
+
- lib/bulk_processor/csv_processor/no_op_post_processor.rb
|
108
|
+
- lib/bulk_processor/csv_processor/result.rb
|
109
|
+
- lib/bulk_processor/csv_processor/row_processor.rb
|
106
110
|
- lib/bulk_processor/job.rb
|
107
|
-
- lib/bulk_processor/no_op_handler.rb
|
108
111
|
- lib/bulk_processor/stream_encoder.rb
|
109
112
|
- lib/bulk_processor/validated_csv.rb
|
110
113
|
- lib/bulk_processor/version.rb
|