csv_model 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,128 @@
1
+ using CSVModel::Extensions
2
+
3
+ module CSVModel
4
+ class Row
5
+ include Utilities::Options
6
+
7
+ attr_reader :data, :header, :marked_as_duplicate
8
+
9
+ def initialize(header, data, options = {})
10
+ @header = header
11
+ @data = data
12
+ @options = options
13
+ end
14
+
15
+ def index(value)
16
+ index = column_index(value) || value
17
+ data[index] if index.is_a?(Fixnum) && index >= 0
18
+ end
19
+ alias_method :[], :index
20
+
21
+ def errors
22
+ errors = []
23
+ errors << duplicate_row_error if is_dry_run? && marked_as_duplicate?
24
+ errors << model_instance.errors if !model_instance.valid?
25
+ errors.flatten
26
+ end
27
+
28
+ def key
29
+ cols = primary_key_columns
30
+ if cols.one?
31
+ index(cols.first.key)
32
+ elsif cols.any?
33
+ cols.collect { |x| index(x.key) }
34
+ else
35
+ data
36
+ end
37
+ end
38
+
39
+ def marked_as_duplicate?
40
+ !!marked_as_duplicate
41
+ end
42
+
43
+ def mark_as_duplicate
44
+ @marked_as_duplicate = true
45
+ end
46
+
47
+ def status
48
+ model_instance.status
49
+ end
50
+
51
+ def valid?
52
+ errors.empty?
53
+ end
54
+
55
+ [:errors, :status, :valid?].each do |method_name|
56
+ method = instance_method(method_name)
57
+ define_method(method_name) do |*args, &block|
58
+ process_row
59
+ method.bind(self).(*args, &block)
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ def all_attributes
66
+ @all_attributes ||= column_attributes_with_values(columns)
67
+ end
68
+
69
+ def columns
70
+ header.columns
71
+ end
72
+
73
+ def column_attributes_with_values(cols)
74
+ Hash[cols.collect { |col| [col.model_attribute, index(col.key)] }]
75
+ end
76
+
77
+ def column_index(key)
78
+ header.column_index(key)
79
+ end
80
+
81
+ def duplicate_row_error
82
+ names = primary_key_columns.collect { |x| x.name }
83
+ names.any? ? "Duplicate #{names.join(', ')}" : "Duplicate row"
84
+ end
85
+
86
+ def is_dry_run?
87
+ option(:dry_run, false)
88
+ end
89
+
90
+ def model
91
+ option(:model)
92
+ end
93
+
94
+ def model_instance
95
+ @model_instance ||= begin
96
+ x = inherit_or_delegate(:find_row_model, key_attributes)
97
+ x ||= inherit_or_delegate(:new_row_model, key_attributes)
98
+ x = CSVModel::ObjectWithStatusSnapshot.new(x)
99
+ end
100
+ end
101
+
102
+ def key_attributes
103
+ cols = primary_key_columns.any? ? primary_key_columns : columns
104
+ @key_attributes ||= column_attributes_with_values(cols)
105
+ end
106
+
107
+
108
+ def primary_key_columns
109
+ header.primary_key_columns
110
+ end
111
+
112
+ def process_row
113
+ return if @processed
114
+ @processed = true
115
+
116
+ model_instance.assign_attributes(all_attributes)
117
+ model_instance.mark_as_duplicate if marked_as_duplicate?
118
+ model_instance.save(dry_run: is_dry_run?)
119
+ end
120
+
121
+ private
122
+
123
+ def inherit_or_delegate(method, *args)
124
+ try(method, *args) || model.try(method, *args)
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,15 @@
1
+ using CSVModel::Extensions
2
+
3
+ module CSVModel
4
+ module Utilities
5
+ module Options
6
+
7
+ attr_reader :options
8
+
9
+ def option(key, default = nil)
10
+ options.try(:[], key) || options.try(key) || default
11
+ end
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,3 @@
1
+ module CSVModel
2
+ VERSION = "0.1.0"
3
+ end
data/lib/csv_model.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'csv'
2
+ require 'delegate'
3
+
4
+ require 'csv_model/errors'
5
+ require 'csv_model/extensions'
6
+ require 'csv_model/record_status'
7
+ require 'csv_model/utilities'
8
+
9
+ require 'csv_model/column'
10
+ require 'csv_model/row'
11
+ require 'csv_model/header_row'
12
+ require 'csv_model/model'
13
+ require 'csv_model/object_with_status_snapshot'
14
+ require 'csv_model/version'
15
+
16
+ module CSVModel
17
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe CSVModel::Column do
4
+
5
+ let(:column_name) { "Column One" }
6
+ let(:subject) { described_class.new(column_name) }
7
+
8
+ describe "#is_primary_key?" do
9
+ it "defaults to false" do
10
+ expect(subject.is_primary_key?).to eq(false)
11
+ end
12
+
13
+ it "returns false if options indicate column is a primary key" do
14
+ subject = described_class.new(column_name, primary_key: true)
15
+ expect(subject.is_primary_key?).to eq(true)
16
+ end
17
+ end
18
+
19
+ describe "#key" do
20
+ it "returns the column key" do
21
+ expect(subject.key).to eq("column one")
22
+ end
23
+ end
24
+
25
+ describe "#model_attribute" do
26
+ it "returns a symbolized version of the column key" do
27
+ expect(subject.model_attribute).to eq(:column_one)
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,278 @@
1
+ require 'spec_helper'
2
+
3
+ describe CSVModel::HeaderRow do
4
+
5
+ let(:data) { ["Column One"] }
6
+ let(:subject) { described_class.new(data) }
7
+
8
+ # TODO: should detect invalid opbions
9
+ # e.g. when primar_key is not a subset of columns
10
+ # raise ArgumentError, 'Message'
11
+
12
+ describe "#columns" do
13
+ it "returns an empty array when no columns exist" do
14
+ subject = described_class.new([])
15
+ expect(subject.columns).to eq([])
16
+ end
17
+
18
+ it "returns a column object that describes each column" do
19
+ columns = subject.columns
20
+ expect(columns).to be_an(Array)
21
+ expect(columns.size).to eq(1)
22
+
23
+ column = columns.first
24
+ expect(column.name).to eq(data.first)
25
+ end
26
+ end
27
+
28
+ describe "#column_index" do
29
+ it "returns nil when a column does not exist" do
30
+ expect(subject.column_index("Non-existent Column")).to eq(nil)
31
+ end
32
+
33
+ it "finds the index of a column given its value" do
34
+ expect(subject.column_index("Column One")).to eq(0)
35
+ end
36
+
37
+ it "finds the index of a column regardless of capitalization" do
38
+ expect(subject.column_index("column ONE")).to eq(0)
39
+ end
40
+
41
+ it "finds the index of a column regardless of leading whitespace" do
42
+ expect(subject.column_index(" Column One")).to eq(0)
43
+ end
44
+
45
+ it "finds the index of a column regardless of trailing whitespace" do
46
+ expect(subject.column_index("Column One\t")).to eq(0)
47
+ end
48
+
49
+ it "finds the index of a column given its symbolic representation" do
50
+ expect(subject.column_index("Column One".to_sym)).to eq(0)
51
+ end
52
+ end
53
+
54
+ describe "#column_count" do
55
+ it "returns 0 when no columns" do
56
+ subject = described_class.new([])
57
+ expect(subject.column_count).to eq(0)
58
+ end
59
+
60
+ it "returns the number of columns" do
61
+ expect(subject.column_count).to eq(1)
62
+ end
63
+ end
64
+
65
+ describe "#errors" do
66
+ it "returns an empty array when header is valid" do
67
+ expect(subject.errors).to eq([])
68
+ end
69
+
70
+ it "returns duplicate column message when header has duplicate columns" do
71
+ subject = described_class.new(data + data)
72
+ expect(subject.errors).to eq(["Multiple columns found for Column One, column headings must be unique"])
73
+ end
74
+
75
+ it "returns illegal column message when header contains illegal columns" do
76
+ subject = described_class.new(data, legal_columns: ["Column Two"])
77
+ expect(subject.errors).to eq(["Unknown column Column One"])
78
+ end
79
+
80
+ it "returns returns required column message when header is missing required columns" do
81
+ subject = described_class.new(data, required_columns: ["Column Two"])
82
+ expect(subject.errors).to eq(["Missing column Column Two"])
83
+ end
84
+ end
85
+
86
+ describe "#has_column?" do
87
+ it "returns false when a column does not exist" do
88
+ expect(subject.has_column?("Non-existent Column")).to eq(false)
89
+ end
90
+
91
+ it "returns true when a column does exist" do
92
+ expect(subject.has_column?("Column One")).to eq(true)
93
+ end
94
+ end
95
+
96
+ describe "#primary_key_columns" do
97
+ it "returns emptry array when no primary key columns specified" do
98
+ expect(subject.primary_key_columns).to eq([])
99
+ end
100
+
101
+ context "when the row has a single primary key column" do
102
+ let(:subject) { described_class.new(data, OpenStruct.new(primary_key: ["Column One"])) }
103
+
104
+ it "has a single key column" do
105
+ expect(subject.primary_key_columns.count).to eq(1)
106
+ end
107
+
108
+ it "returns the key column" do
109
+ column = subject.primary_key_columns.first
110
+ expect(column.name).to eq("Column One")
111
+ end
112
+
113
+ context "even if the column capitalization does not match" do
114
+ let(:subject) { described_class.new(data, OpenStruct.new(primary_key: ["column one"])) }
115
+
116
+ it "returns the key column" do
117
+ column = subject.primary_key_columns.first
118
+ expect(column.name).to eq("Column One")
119
+ end
120
+ end
121
+ end
122
+
123
+ context "when the row has multiple primary key columns" do
124
+ let(:data) { ["Column One", "Column Two"] }
125
+ let(:subject) { described_class.new(data, primary_key: data) }
126
+
127
+ it "returns the key columns" do
128
+ expect(subject.primary_key_columns.collect { |x| x.name }).to eq(["Column One", "Column Two"])
129
+ end
130
+ end
131
+ end
132
+
133
+ describe "#valid?" do
134
+ it "returns true when header is valid" do
135
+ expect(subject.valid?).to eq(true)
136
+ end
137
+
138
+ it "returns false when header has duplicate columns" do
139
+ subject = described_class.new(data + data)
140
+ expect(subject.valid?).to eq(false)
141
+ end
142
+
143
+ it "returns false when header contains illegal columns" do
144
+ subject = described_class.new(data, legal_columns: ["Column Two"])
145
+ expect(subject.valid?).to eq(false)
146
+ end
147
+
148
+ it "returns false when header is missing required columns" do
149
+ subject = described_class.new(data, required_columns: ["Column Two"])
150
+ expect(subject.valid?).to eq(false)
151
+ end
152
+ end
153
+
154
+ describe "internals" do
155
+ describe "#duplicate_column_names" do
156
+ it "doesn't respond to duplicate_column_names" do
157
+ expect(subject.respond_to?(:duplicate_column_names)).to eq(false)
158
+ end
159
+
160
+ it "returns an empty array when no duplicate columns exist" do
161
+ expect(subject.send(:duplicate_column_names)).to eq([])
162
+ end
163
+
164
+ it "returns the duplicate column names when duplicate columns exist" do
165
+ data << data.first
166
+ expect(subject.send(:duplicate_column_names)).to eq(["Column One"])
167
+ end
168
+ end
169
+
170
+ describe "#has_duplicate_columns?" do
171
+ it "doesn't respond to has_duplicate_columns?" do
172
+ expect(subject.respond_to?(:has_duplicate_columns?)).to eq(false)
173
+ end
174
+
175
+ it "returns false when no duplicate columns exist" do
176
+ expect(subject.send(:has_duplicate_columns?)).to eq(false)
177
+ end
178
+
179
+ it "returns true when duplicate columns exist" do
180
+ data << data.first
181
+ expect(subject.send(:has_duplicate_columns?)).to eq(true)
182
+ end
183
+ end
184
+
185
+ describe "#has_illegal_columns?" do
186
+ it "doesn't respond to has_illegal_columns?" do
187
+ expect(subject.respond_to?(:has_illegal_columns?)).to eq(false)
188
+ end
189
+
190
+ it "returns false when no legal columns specified" do
191
+ expect(subject.send(:has_illegal_columns?)).to eq(false)
192
+ end
193
+
194
+ context "with legal columns specified" do
195
+ let(:subject) { described_class.new(data, legal_columns: ["Column One"]) }
196
+
197
+ it "returns false when no illegal columns are present" do
198
+ expect(subject.send(:has_illegal_columns?)).to eq(false)
199
+ end
200
+
201
+ it "returns true when an illegal column is present" do
202
+ data[0] = "Column Two"
203
+ expect(subject.send(:has_illegal_columns?)).to eq(true)
204
+ end
205
+ end
206
+ end
207
+
208
+ describe "#has_required_columns?" do
209
+ it "doesn't respond to has_required_columns?" do
210
+ expect(subject.respond_to?(:has_required_columns?)).to eq(false)
211
+ end
212
+
213
+ it "returns true when no required columns specified" do
214
+ expect(subject.send(:has_required_columns?)).to eq(true)
215
+ end
216
+
217
+ context "with required columns specified" do
218
+ let(:subject) { described_class.new(data, required_columns: ["Column One"]) }
219
+
220
+ it "returns true when all required columns are present" do
221
+ expect(subject.send(:has_required_columns?)).to eq(true)
222
+ end
223
+
224
+ it "returns false when a required columns is missing" do
225
+ data[0] = "Column Two"
226
+ expect(subject.send(:has_required_columns?)).to eq(false)
227
+ end
228
+ end
229
+ end
230
+
231
+ describe "illegal_column_names" do
232
+ it "doesn't respond to illegal_colum_names" do
233
+ expect(subject.respond_to?(:illegal_colum_names)).to eq(false)
234
+ end
235
+
236
+ it "returns emptry array when no legal columns specified" do
237
+ expect(subject.send(:illegal_column_names)).to eq([])
238
+ end
239
+
240
+ context "with legal columns specified" do
241
+ let(:subject) { described_class.new(data, OpenStruct.new(legal_columns: ["Column One"])) }
242
+
243
+ it "returns empty array when no illegal columns are present" do
244
+ expect(subject.send(:illegal_column_names)).to eq([])
245
+ end
246
+
247
+ it "returns array of illegal column names when a illegal column is present" do
248
+ data[0] = "Column Two"
249
+ expect(subject.send(:illegal_column_names)).to eq(["Column Two"])
250
+ end
251
+ end
252
+ end
253
+
254
+ describe "#missing_column_names" do
255
+ it "doesn't respond to missing_column_names" do
256
+ expect(subject.respond_to?(:missing_column_names)).to eq(false)
257
+ end
258
+
259
+ it "returns emptry array when no required columns specified" do
260
+ expect(subject.send(:missing_column_names)).to eq([])
261
+ end
262
+
263
+ context "with required columns specified" do
264
+ let(:subject) { described_class.new(data, OpenStruct.new(required_columns: ["Column One"])) }
265
+
266
+ it "returns empty array when all required columns are present" do
267
+ expect(subject.send(:missing_column_names)).to eq([])
268
+ end
269
+
270
+ it "returns array of missing column names when a required column is missing" do
271
+ data[0] = "Column Two"
272
+ expect(subject.send(:missing_column_names)).to eq(["Column One"])
273
+ end
274
+ end
275
+ end
276
+ end
277
+
278
+ end
@@ -0,0 +1,192 @@
1
+ require 'spec_helper'
2
+
3
+ describe CSVModel::Model do
4
+
5
+ let(:header_row) { ["Column One", "Column Two"] }
6
+ let(:data_row) { ["Value One", "Value Two"] }
7
+ let(:data) { [header_row.join("\t"), data_row.join("\t")].join($/) }
8
+ let(:subject) { described_class.new(data) }
9
+
10
+ describe "header" do
11
+ context "with options" do
12
+ let(:options) { { dry_run: true } }
13
+ let(:subject) { described_class.new(data, options) }
14
+
15
+ it "instantiates header with options" do
16
+ expect(subject.header.options).to eq(options)
17
+ end
18
+ end
19
+ end
20
+
21
+ describe "rows" do
22
+ it "has a row for each data row" do
23
+ expect(subject.rows.count).to eq(1)
24
+ end
25
+
26
+ context "with options" do
27
+ let(:options) { { dry_run: true } }
28
+ let(:subject) { described_class.new(data, options) }
29
+
30
+ it "instantiates rows with options" do
31
+ expect(subject.rows.first.options).to eq(options)
32
+ end
33
+ end
34
+ end
35
+
36
+ describe "#row_count" do
37
+ it "returns 0 when no data" do
38
+ expect(described_class.new("").row_count).to eq(0)
39
+ end
40
+
41
+ it "returns the number of rows" do
42
+ expect(subject.row_count).to eq(1)
43
+ end
44
+ end
45
+
46
+ describe "#structure_errors" do
47
+ it "returns an empty array when CSV data has integrity" do
48
+ expect(subject.structure_errors).to eq([])
49
+ end
50
+
51
+ it "returns a parser specific error when CSV data is maleformed" do
52
+ expect(CSV).to receive(:parse).and_raise(CSV::MalformedCSVError.new("foo"))
53
+ expect(subject.structure_errors).to eq(["The data could not be parsed. Please check for formatting errors: foo"])
54
+ end
55
+
56
+ it "returns a parser specific error when CSV data is inconsistent" do
57
+ data_row.shift
58
+ expect(subject.structure_errors).to eq(["Each row should have exactly 2 columns. Error on row 2."])
59
+ end
60
+
61
+ it "returns a generic error when CSV data is invalid" do
62
+ expect(CSV).to receive(:parse).and_raise(Exception.new("foo"))
63
+ expect(subject.structure_errors).to eq(["An unexpected error occurred. Please try again or contact support if the issue persists: foo"])
64
+ end
65
+
66
+ it "returns header errors when header is invalid" do
67
+ header_row.pop
68
+ header_row << header_row.first
69
+ expect(subject.structure_errors).to eq(["Multiple columns found for Column One, column headings must be unique"])
70
+ end
71
+
72
+ context "with required columns" do
73
+ let(:options) { OpenStruct.new(required_columns: ["Column Two", "Column Three", "Column Four"]) }
74
+ let(:subject) { described_class.new(data, options) }
75
+
76
+ it "returns errors for each missing required column" do
77
+ expect(subject.structure_errors).to eq(["Missing column Column Three", "Missing column Column Four"])
78
+ end
79
+ end
80
+ end
81
+
82
+ describe "#structure_valid?" do
83
+ it "returns true when CSV data has integrity" do
84
+ expect(subject.structure_valid?).to eq(true)
85
+ end
86
+
87
+ it "returns false when CSV data is maleformed" do
88
+ expect(CSV).to receive(:parse).and_raise(CSV::MalformedCSVError.new("foo"))
89
+ expect(subject.structure_valid?).to eq(false)
90
+ end
91
+
92
+ it "returns false when CSV data is invalid" do
93
+ expect(CSV).to receive(:parse).and_raise(Exception.new("foo"))
94
+ expect(subject.structure_valid?).to eq(false)
95
+ end
96
+
97
+ it "returns false when header is invalid" do
98
+ header_row.shift
99
+ header_row << header_row.first
100
+ expect(subject.structure_valid?).to eq(false)
101
+ end
102
+ end
103
+
104
+ describe "internals" do
105
+ describe "#parse_data" do
106
+ context "with a custom header row class" do
107
+ class TestHeaderRow < CSVModel::HeaderRow; end
108
+
109
+ let(:subject) { described_class.new(data, header_class: TestHeaderRow) }
110
+
111
+ before do
112
+ subject.send(:parse_data)
113
+ end
114
+
115
+ it "uses the custom class in parsing" do
116
+ expect(subject.header.class).to eq(TestHeaderRow)
117
+ end
118
+ end
119
+
120
+ context "With a custom row class" do
121
+ class TestRow < CSVModel::Row; end
122
+
123
+ let(:subject) { described_class.new(data, row_class: TestRow) }
124
+
125
+ before do
126
+ subject.send(:parse_data)
127
+ end
128
+
129
+ it "uses the custom class in parsing" do
130
+ expect(subject.rows.count).to eq(1)
131
+ expect(subject.rows.first.class).to eq(TestRow)
132
+ end
133
+ end
134
+
135
+ context "with duplicate rows" do
136
+ let(:data) { [header_row.join("\t"), data_row.join("\t"), data_row.join("\t"), data_row.join("\t")].join($/) }
137
+
138
+ before do
139
+ subject.send(:parse_data)
140
+ end
141
+
142
+ context "with a single column primary key" do
143
+ let(:subject) { described_class.new(data, primary_key: [header_row.first]) }
144
+
145
+ context "when primary key values are present" do
146
+ it "does not mark the first row as a duplicate" do
147
+ puts subject.structure_errors
148
+ expect(subject.rows.first.marked_as_duplicate?).to eq(false)
149
+ end
150
+
151
+ it "marks subsequent instances as a duplicates" do
152
+ rows = subject.rows
153
+ rows.shift
154
+ rows.each { |row| expect(row.marked_as_duplicate?).to eq(true) }
155
+ end
156
+ end
157
+
158
+ context "when no primary key values are present" do
159
+ let(:data_row) { ["", ""] }
160
+
161
+ it "does not mark any row as a duplicate" do
162
+ subject.rows.each { |row| expect(row.marked_as_duplicate?).to eq(false) }
163
+ end
164
+ end
165
+ end
166
+
167
+ context "with a compount primary key" do
168
+ context "when primary key values are present" do
169
+ it "does not mark the first row as a duplicate" do
170
+ expect(subject.rows.first.marked_as_duplicate?).to eq(false)
171
+ end
172
+
173
+ it "marks subsequent instances as a duplicates" do
174
+ rows = subject.rows
175
+ rows.shift
176
+ rows.each { |row| expect(row.marked_as_duplicate?).to eq(true) }
177
+ end
178
+ end
179
+
180
+ context "when no primary key values are present" do
181
+ let(:data_row) { ["", ""] }
182
+
183
+ it "does not mark any row as a duplicate" do
184
+ subject.rows.each { |row| expect(row.marked_as_duplicate?).to eq(false) }
185
+ end
186
+ end
187
+ end
188
+
189
+ end
190
+ end
191
+ end
192
+ end