csv_model 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,128 @@
1
+ using CSVModel::Extensions
2
+
3
+ module CSVModel
4
+ class Row
5
+ include Utilities::Options
6
+
7
+ attr_reader :data, :header, :marked_as_duplicate
8
+
9
+ def initialize(header, data, options = {})
10
+ @header = header
11
+ @data = data
12
+ @options = options
13
+ end
14
+
15
+ def index(value)
16
+ index = column_index(value) || value
17
+ data[index] if index.is_a?(Fixnum) && index >= 0
18
+ end
19
+ alias_method :[], :index
20
+
21
+ def errors
22
+ errors = []
23
+ errors << duplicate_row_error if is_dry_run? && marked_as_duplicate?
24
+ errors << model_instance.errors if !model_instance.valid?
25
+ errors.flatten
26
+ end
27
+
28
+ def key
29
+ cols = primary_key_columns
30
+ if cols.one?
31
+ index(cols.first.key)
32
+ elsif cols.any?
33
+ cols.collect { |x| index(x.key) }
34
+ else
35
+ data
36
+ end
37
+ end
38
+
39
+ def marked_as_duplicate?
40
+ !!marked_as_duplicate
41
+ end
42
+
43
+ def mark_as_duplicate
44
+ @marked_as_duplicate = true
45
+ end
46
+
47
+ def status
48
+ model_instance.status
49
+ end
50
+
51
+ def valid?
52
+ errors.empty?
53
+ end
54
+
55
+ [:errors, :status, :valid?].each do |method_name|
56
+ method = instance_method(method_name)
57
+ define_method(method_name) do |*args, &block|
58
+ process_row
59
+ method.bind(self).(*args, &block)
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ def all_attributes
66
+ @all_attributes ||= column_attributes_with_values(columns)
67
+ end
68
+
69
+ def columns
70
+ header.columns
71
+ end
72
+
73
+ def column_attributes_with_values(cols)
74
+ Hash[cols.collect { |col| [col.model_attribute, index(col.key)] }]
75
+ end
76
+
77
+ def column_index(key)
78
+ header.column_index(key)
79
+ end
80
+
81
+ def duplicate_row_error
82
+ names = primary_key_columns.collect { |x| x.name }
83
+ names.any? ? "Duplicate #{names.join(', ')}" : "Duplicate row"
84
+ end
85
+
86
+ def is_dry_run?
87
+ option(:dry_run, false)
88
+ end
89
+
90
+ def model
91
+ option(:model)
92
+ end
93
+
94
+ def model_instance
95
+ @model_instance ||= begin
96
+ x = inherit_or_delegate(:find_row_model, key_attributes)
97
+ x ||= inherit_or_delegate(:new_row_model, key_attributes)
98
+ x = CSVModel::ObjectWithStatusSnapshot.new(x)
99
+ end
100
+ end
101
+
102
+ def key_attributes
103
+ cols = primary_key_columns.any? ? primary_key_columns : columns
104
+ @key_attributes ||= column_attributes_with_values(cols)
105
+ end
106
+
107
+
108
+ def primary_key_columns
109
+ header.primary_key_columns
110
+ end
111
+
112
+ def process_row
113
+ return if @processed
114
+ @processed = true
115
+
116
+ model_instance.assign_attributes(all_attributes)
117
+ model_instance.mark_as_duplicate if marked_as_duplicate?
118
+ model_instance.save(dry_run: is_dry_run?)
119
+ end
120
+
121
+ private
122
+
123
+ def inherit_or_delegate(method, *args)
124
+ try(method, *args) || model.try(method, *args)
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,15 @@
1
+ using CSVModel::Extensions
2
+
3
+ module CSVModel
4
+ module Utilities
5
+ module Options
6
+
7
+ attr_reader :options
8
+
9
+ def option(key, default = nil)
10
+ options.try(:[], key) || options.try(key) || default
11
+ end
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,3 @@
1
+ module CSVModel
2
+ VERSION = "0.1.0"
3
+ end
data/lib/csv_model.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'csv'
2
+ require 'delegate'
3
+
4
+ require 'csv_model/errors'
5
+ require 'csv_model/extensions'
6
+ require 'csv_model/record_status'
7
+ require 'csv_model/utilities'
8
+
9
+ require 'csv_model/column'
10
+ require 'csv_model/row'
11
+ require 'csv_model/header_row'
12
+ require 'csv_model/model'
13
+ require 'csv_model/object_with_status_snapshot'
14
+ require 'csv_model/version'
15
+
16
+ module CSVModel
17
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe CSVModel::Column do
4
+
5
+ let(:column_name) { "Column One" }
6
+ let(:subject) { described_class.new(column_name) }
7
+
8
+ describe "#is_primary_key?" do
9
+ it "defaults to false" do
10
+ expect(subject.is_primary_key?).to eq(false)
11
+ end
12
+
13
+ it "returns false if options indicate column is a primary key" do
14
+ subject = described_class.new(column_name, primary_key: true)
15
+ expect(subject.is_primary_key?).to eq(true)
16
+ end
17
+ end
18
+
19
+ describe "#key" do
20
+ it "returns the column key" do
21
+ expect(subject.key).to eq("column one")
22
+ end
23
+ end
24
+
25
+ describe "#model_attribute" do
26
+ it "returns a symbolized version of the column key" do
27
+ expect(subject.model_attribute).to eq(:column_one)
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,278 @@
1
+ require 'spec_helper'
2
+
3
+ describe CSVModel::HeaderRow do
4
+
5
+ let(:data) { ["Column One"] }
6
+ let(:subject) { described_class.new(data) }
7
+
8
+ # TODO: should detect invalid opbions
9
+ # e.g. when primar_key is not a subset of columns
10
+ # raise ArgumentError, 'Message'
11
+
12
+ describe "#columns" do
13
+ it "returns an empty array when no columns exist" do
14
+ subject = described_class.new([])
15
+ expect(subject.columns).to eq([])
16
+ end
17
+
18
+ it "returns a column object that describes each column" do
19
+ columns = subject.columns
20
+ expect(columns).to be_an(Array)
21
+ expect(columns.size).to eq(1)
22
+
23
+ column = columns.first
24
+ expect(column.name).to eq(data.first)
25
+ end
26
+ end
27
+
28
+ describe "#column_index" do
29
+ it "returns nil when a column does not exist" do
30
+ expect(subject.column_index("Non-existent Column")).to eq(nil)
31
+ end
32
+
33
+ it "finds the index of a column given its value" do
34
+ expect(subject.column_index("Column One")).to eq(0)
35
+ end
36
+
37
+ it "finds the index of a column regardless of capitalization" do
38
+ expect(subject.column_index("column ONE")).to eq(0)
39
+ end
40
+
41
+ it "finds the index of a column regardless of leading whitespace" do
42
+ expect(subject.column_index(" Column One")).to eq(0)
43
+ end
44
+
45
+ it "finds the index of a column regardless of trailing whitespace" do
46
+ expect(subject.column_index("Column One\t")).to eq(0)
47
+ end
48
+
49
+ it "finds the index of a column given its symbolic representation" do
50
+ expect(subject.column_index("Column One".to_sym)).to eq(0)
51
+ end
52
+ end
53
+
54
+ describe "#column_count" do
55
+ it "returns 0 when no columns" do
56
+ subject = described_class.new([])
57
+ expect(subject.column_count).to eq(0)
58
+ end
59
+
60
+ it "returns the number of columns" do
61
+ expect(subject.column_count).to eq(1)
62
+ end
63
+ end
64
+
65
+ describe "#errors" do
66
+ it "returns an empty array when header is valid" do
67
+ expect(subject.errors).to eq([])
68
+ end
69
+
70
+ it "returns duplicate column message when header has duplicate columns" do
71
+ subject = described_class.new(data + data)
72
+ expect(subject.errors).to eq(["Multiple columns found for Column One, column headings must be unique"])
73
+ end
74
+
75
+ it "returns illegal column message when header contains illegal columns" do
76
+ subject = described_class.new(data, legal_columns: ["Column Two"])
77
+ expect(subject.errors).to eq(["Unknown column Column One"])
78
+ end
79
+
80
+ it "returns returns required column message when header is missing required columns" do
81
+ subject = described_class.new(data, required_columns: ["Column Two"])
82
+ expect(subject.errors).to eq(["Missing column Column Two"])
83
+ end
84
+ end
85
+
86
+ describe "#has_column?" do
87
+ it "returns false when a column does not exist" do
88
+ expect(subject.has_column?("Non-existent Column")).to eq(false)
89
+ end
90
+
91
+ it "returns true when a column does exist" do
92
+ expect(subject.has_column?("Column One")).to eq(true)
93
+ end
94
+ end
95
+
96
+ describe "#primary_key_columns" do
97
+ it "returns emptry array when no primary key columns specified" do
98
+ expect(subject.primary_key_columns).to eq([])
99
+ end
100
+
101
+ context "when the row has a single primary key column" do
102
+ let(:subject) { described_class.new(data, OpenStruct.new(primary_key: ["Column One"])) }
103
+
104
+ it "has a single key column" do
105
+ expect(subject.primary_key_columns.count).to eq(1)
106
+ end
107
+
108
+ it "returns the key column" do
109
+ column = subject.primary_key_columns.first
110
+ expect(column.name).to eq("Column One")
111
+ end
112
+
113
+ context "even if the column capitalization does not match" do
114
+ let(:subject) { described_class.new(data, OpenStruct.new(primary_key: ["column one"])) }
115
+
116
+ it "returns the key column" do
117
+ column = subject.primary_key_columns.first
118
+ expect(column.name).to eq("Column One")
119
+ end
120
+ end
121
+ end
122
+
123
+ context "when the row has multiple primary key columns" do
124
+ let(:data) { ["Column One", "Column Two"] }
125
+ let(:subject) { described_class.new(data, primary_key: data) }
126
+
127
+ it "returns the key columns" do
128
+ expect(subject.primary_key_columns.collect { |x| x.name }).to eq(["Column One", "Column Two"])
129
+ end
130
+ end
131
+ end
132
+
133
+ describe "#valid?" do
134
+ it "returns true when header is valid" do
135
+ expect(subject.valid?).to eq(true)
136
+ end
137
+
138
+ it "returns false when header has duplicate columns" do
139
+ subject = described_class.new(data + data)
140
+ expect(subject.valid?).to eq(false)
141
+ end
142
+
143
+ it "returns false when header contains illegal columns" do
144
+ subject = described_class.new(data, legal_columns: ["Column Two"])
145
+ expect(subject.valid?).to eq(false)
146
+ end
147
+
148
+ it "returns false when header is missing required columns" do
149
+ subject = described_class.new(data, required_columns: ["Column Two"])
150
+ expect(subject.valid?).to eq(false)
151
+ end
152
+ end
153
+
154
+ describe "internals" do
155
+ describe "#duplicate_column_names" do
156
+ it "doesn't respond to duplicate_column_names" do
157
+ expect(subject.respond_to?(:duplicate_column_names)).to eq(false)
158
+ end
159
+
160
+ it "returns an empty array when no duplicate columns exist" do
161
+ expect(subject.send(:duplicate_column_names)).to eq([])
162
+ end
163
+
164
+ it "returns the duplicate column names when duplicate columns exist" do
165
+ data << data.first
166
+ expect(subject.send(:duplicate_column_names)).to eq(["Column One"])
167
+ end
168
+ end
169
+
170
+ describe "#has_duplicate_columns?" do
171
+ it "doesn't respond to has_duplicate_columns?" do
172
+ expect(subject.respond_to?(:has_duplicate_columns?)).to eq(false)
173
+ end
174
+
175
+ it "returns false when no duplicate columns exist" do
176
+ expect(subject.send(:has_duplicate_columns?)).to eq(false)
177
+ end
178
+
179
+ it "returns true when duplicate columns exist" do
180
+ data << data.first
181
+ expect(subject.send(:has_duplicate_columns?)).to eq(true)
182
+ end
183
+ end
184
+
185
+ describe "#has_illegal_columns?" do
186
+ it "doesn't respond to has_illegal_columns?" do
187
+ expect(subject.respond_to?(:has_illegal_columns?)).to eq(false)
188
+ end
189
+
190
+ it "returns false when no legal columns specified" do
191
+ expect(subject.send(:has_illegal_columns?)).to eq(false)
192
+ end
193
+
194
+ context "with legal columns specified" do
195
+ let(:subject) { described_class.new(data, legal_columns: ["Column One"]) }
196
+
197
+ it "returns false when no illegal columns are present" do
198
+ expect(subject.send(:has_illegal_columns?)).to eq(false)
199
+ end
200
+
201
+ it "returns true when an illegal column is present" do
202
+ data[0] = "Column Two"
203
+ expect(subject.send(:has_illegal_columns?)).to eq(true)
204
+ end
205
+ end
206
+ end
207
+
208
+ describe "#has_required_columns?" do
209
+ it "doesn't respond to has_required_columns?" do
210
+ expect(subject.respond_to?(:has_required_columns?)).to eq(false)
211
+ end
212
+
213
+ it "returns true when no required columns specified" do
214
+ expect(subject.send(:has_required_columns?)).to eq(true)
215
+ end
216
+
217
+ context "with required columns specified" do
218
+ let(:subject) { described_class.new(data, required_columns: ["Column One"]) }
219
+
220
+ it "returns true when all required columns are present" do
221
+ expect(subject.send(:has_required_columns?)).to eq(true)
222
+ end
223
+
224
+ it "returns false when a required columns is missing" do
225
+ data[0] = "Column Two"
226
+ expect(subject.send(:has_required_columns?)).to eq(false)
227
+ end
228
+ end
229
+ end
230
+
231
+ describe "illegal_column_names" do
232
+ it "doesn't respond to illegal_colum_names" do
233
+ expect(subject.respond_to?(:illegal_colum_names)).to eq(false)
234
+ end
235
+
236
+ it "returns emptry array when no legal columns specified" do
237
+ expect(subject.send(:illegal_column_names)).to eq([])
238
+ end
239
+
240
+ context "with legal columns specified" do
241
+ let(:subject) { described_class.new(data, OpenStruct.new(legal_columns: ["Column One"])) }
242
+
243
+ it "returns empty array when no illegal columns are present" do
244
+ expect(subject.send(:illegal_column_names)).to eq([])
245
+ end
246
+
247
+ it "returns array of illegal column names when a illegal column is present" do
248
+ data[0] = "Column Two"
249
+ expect(subject.send(:illegal_column_names)).to eq(["Column Two"])
250
+ end
251
+ end
252
+ end
253
+
254
+ describe "#missing_column_names" do
255
+ it "doesn't respond to missing_column_names" do
256
+ expect(subject.respond_to?(:missing_column_names)).to eq(false)
257
+ end
258
+
259
+ it "returns emptry array when no required columns specified" do
260
+ expect(subject.send(:missing_column_names)).to eq([])
261
+ end
262
+
263
+ context "with required columns specified" do
264
+ let(:subject) { described_class.new(data, OpenStruct.new(required_columns: ["Column One"])) }
265
+
266
+ it "returns empty array when all required columns are present" do
267
+ expect(subject.send(:missing_column_names)).to eq([])
268
+ end
269
+
270
+ it "returns array of missing column names when a required column is missing" do
271
+ data[0] = "Column Two"
272
+ expect(subject.send(:missing_column_names)).to eq(["Column One"])
273
+ end
274
+ end
275
+ end
276
+ end
277
+
278
+ end
@@ -0,0 +1,192 @@
1
+ require 'spec_helper'
2
+
3
+ describe CSVModel::Model do
4
+
5
+ let(:header_row) { ["Column One", "Column Two"] }
6
+ let(:data_row) { ["Value One", "Value Two"] }
7
+ let(:data) { [header_row.join("\t"), data_row.join("\t")].join($/) }
8
+ let(:subject) { described_class.new(data) }
9
+
10
+ describe "header" do
11
+ context "with options" do
12
+ let(:options) { { dry_run: true } }
13
+ let(:subject) { described_class.new(data, options) }
14
+
15
+ it "instantiates header with options" do
16
+ expect(subject.header.options).to eq(options)
17
+ end
18
+ end
19
+ end
20
+
21
+ describe "rows" do
22
+ it "has a row for each data row" do
23
+ expect(subject.rows.count).to eq(1)
24
+ end
25
+
26
+ context "with options" do
27
+ let(:options) { { dry_run: true } }
28
+ let(:subject) { described_class.new(data, options) }
29
+
30
+ it "instantiates rows with options" do
31
+ expect(subject.rows.first.options).to eq(options)
32
+ end
33
+ end
34
+ end
35
+
36
+ describe "#row_count" do
37
+ it "returns 0 when no data" do
38
+ expect(described_class.new("").row_count).to eq(0)
39
+ end
40
+
41
+ it "returns the number of rows" do
42
+ expect(subject.row_count).to eq(1)
43
+ end
44
+ end
45
+
46
+ describe "#structure_errors" do
47
+ it "returns an empty array when CSV data has integrity" do
48
+ expect(subject.structure_errors).to eq([])
49
+ end
50
+
51
+ it "returns a parser specific error when CSV data is maleformed" do
52
+ expect(CSV).to receive(:parse).and_raise(CSV::MalformedCSVError.new("foo"))
53
+ expect(subject.structure_errors).to eq(["The data could not be parsed. Please check for formatting errors: foo"])
54
+ end
55
+
56
+ it "returns a parser specific error when CSV data is inconsistent" do
57
+ data_row.shift
58
+ expect(subject.structure_errors).to eq(["Each row should have exactly 2 columns. Error on row 2."])
59
+ end
60
+
61
+ it "returns a generic error when CSV data is invalid" do
62
+ expect(CSV).to receive(:parse).and_raise(Exception.new("foo"))
63
+ expect(subject.structure_errors).to eq(["An unexpected error occurred. Please try again or contact support if the issue persists: foo"])
64
+ end
65
+
66
+ it "returns header errors when header is invalid" do
67
+ header_row.pop
68
+ header_row << header_row.first
69
+ expect(subject.structure_errors).to eq(["Multiple columns found for Column One, column headings must be unique"])
70
+ end
71
+
72
+ context "with required columns" do
73
+ let(:options) { OpenStruct.new(required_columns: ["Column Two", "Column Three", "Column Four"]) }
74
+ let(:subject) { described_class.new(data, options) }
75
+
76
+ it "returns errors for each missing required column" do
77
+ expect(subject.structure_errors).to eq(["Missing column Column Three", "Missing column Column Four"])
78
+ end
79
+ end
80
+ end
81
+
82
+ describe "#structure_valid?" do
83
+ it "returns true when CSV data has integrity" do
84
+ expect(subject.structure_valid?).to eq(true)
85
+ end
86
+
87
+ it "returns false when CSV data is maleformed" do
88
+ expect(CSV).to receive(:parse).and_raise(CSV::MalformedCSVError.new("foo"))
89
+ expect(subject.structure_valid?).to eq(false)
90
+ end
91
+
92
+ it "returns false when CSV data is invalid" do
93
+ expect(CSV).to receive(:parse).and_raise(Exception.new("foo"))
94
+ expect(subject.structure_valid?).to eq(false)
95
+ end
96
+
97
+ it "returns false when header is invalid" do
98
+ header_row.shift
99
+ header_row << header_row.first
100
+ expect(subject.structure_valid?).to eq(false)
101
+ end
102
+ end
103
+
104
+ describe "internals" do
105
+ describe "#parse_data" do
106
+ context "with a custom header row class" do
107
+ class TestHeaderRow < CSVModel::HeaderRow; end
108
+
109
+ let(:subject) { described_class.new(data, header_class: TestHeaderRow) }
110
+
111
+ before do
112
+ subject.send(:parse_data)
113
+ end
114
+
115
+ it "uses the custom class in parsing" do
116
+ expect(subject.header.class).to eq(TestHeaderRow)
117
+ end
118
+ end
119
+
120
+ context "With a custom row class" do
121
+ class TestRow < CSVModel::Row; end
122
+
123
+ let(:subject) { described_class.new(data, row_class: TestRow) }
124
+
125
+ before do
126
+ subject.send(:parse_data)
127
+ end
128
+
129
+ it "uses the custom class in parsing" do
130
+ expect(subject.rows.count).to eq(1)
131
+ expect(subject.rows.first.class).to eq(TestRow)
132
+ end
133
+ end
134
+
135
+ context "with duplicate rows" do
136
+ let(:data) { [header_row.join("\t"), data_row.join("\t"), data_row.join("\t"), data_row.join("\t")].join($/) }
137
+
138
+ before do
139
+ subject.send(:parse_data)
140
+ end
141
+
142
+ context "with a single column primary key" do
143
+ let(:subject) { described_class.new(data, primary_key: [header_row.first]) }
144
+
145
+ context "when primary key values are present" do
146
+ it "does not mark the first row as a duplicate" do
147
+ puts subject.structure_errors
148
+ expect(subject.rows.first.marked_as_duplicate?).to eq(false)
149
+ end
150
+
151
+ it "marks subsequent instances as a duplicates" do
152
+ rows = subject.rows
153
+ rows.shift
154
+ rows.each { |row| expect(row.marked_as_duplicate?).to eq(true) }
155
+ end
156
+ end
157
+
158
+ context "when no primary key values are present" do
159
+ let(:data_row) { ["", ""] }
160
+
161
+ it "does not mark any row as a duplicate" do
162
+ subject.rows.each { |row| expect(row.marked_as_duplicate?).to eq(false) }
163
+ end
164
+ end
165
+ end
166
+
167
+ context "with a compount primary key" do
168
+ context "when primary key values are present" do
169
+ it "does not mark the first row as a duplicate" do
170
+ expect(subject.rows.first.marked_as_duplicate?).to eq(false)
171
+ end
172
+
173
+ it "marks subsequent instances as a duplicates" do
174
+ rows = subject.rows
175
+ rows.shift
176
+ rows.each { |row| expect(row.marked_as_duplicate?).to eq(true) }
177
+ end
178
+ end
179
+
180
+ context "when no primary key values are present" do
181
+ let(:data_row) { ["", ""] }
182
+
183
+ it "does not mark any row as a duplicate" do
184
+ subject.rows.each { |row| expect(row.marked_as_duplicate?).to eq(false) }
185
+ end
186
+ end
187
+ end
188
+
189
+ end
190
+ end
191
+ end
192
+ end