csvlint 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.gitattributes +2 -0
- data/CHANGELOG.md +19 -1
- data/README.md +15 -1
- data/bin/csvlint +13 -3
- data/csvlint.gemspec +1 -0
- data/features/csvupload.feature +5 -5
- data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
- data/features/fixtures/inconsistent-line-endings.csv +0 -0
- data/features/fixtures/invalid_many_rows.csv +0 -0
- data/features/fixtures/valid_many_rows.csv +0 -0
- data/features/information.feature +4 -4
- data/features/step_definitions/csv_options_steps.rb +5 -0
- data/features/validation_errors.feature +1 -1
- data/features/validation_info.feature +6 -6
- data/lib/csvlint.rb +1 -1
- data/lib/csvlint/csvw/number_format.rb +1 -1
- data/lib/csvlint/field.rb +10 -4
- data/lib/csvlint/validate.rb +326 -219
- data/lib/csvlint/version.rb +1 -1
- data/spec/csvw/number_format_spec.rb +14 -0
- data/spec/validator_spec.rb +450 -146
- metadata +21 -3
- data/lib/csvlint/wrapped_io.rb +0 -21
data/lib/csvlint/version.rb
CHANGED
@@ -300,6 +300,20 @@ describe Csvlint::Csvw::NumberFormat do
|
|
300
300
|
expect(format.parse("12.345,67,8")).to eql(nil)
|
301
301
|
end
|
302
302
|
|
303
|
+
it "should parse numbers that match 0.###,### correctly" do
|
304
|
+
format = Csvlint::Csvw::NumberFormat.new("0.###,###")
|
305
|
+
expect(format.parse("1")).to eq(1)
|
306
|
+
expect(format.parse("12.3")).to eql(12.3)
|
307
|
+
expect(format.parse("12.34")).to eql(12.34)
|
308
|
+
expect(format.parse("12.345")).to eq(12.345)
|
309
|
+
expect(format.parse("12.3456")).to eql(nil)
|
310
|
+
expect(format.parse("12.345,6")).to eql(12.3456)
|
311
|
+
expect(format.parse("12.34,56")).to eql(nil)
|
312
|
+
expect(format.parse("12.345,67")).to eq(12.34567)
|
313
|
+
expect(format.parse("12.345,678")).to eql(12.345678)
|
314
|
+
expect(format.parse("12.345,67,8")).to eql(nil)
|
315
|
+
end
|
316
|
+
|
303
317
|
it "should parse numbers that match 0.000,### correctly" do
|
304
318
|
format = Csvlint::Csvw::NumberFormat.new("0.000,###")
|
305
319
|
expect(format.parse("1")).to eq(nil)
|
data/spec/validator_spec.rb
CHANGED
@@ -1,14 +1,101 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Csvlint::Validator do
|
4
|
-
|
4
|
+
|
5
5
|
before do
|
6
6
|
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :body => "")
|
7
7
|
stub_request(:get, "http://example.com/.well-known/csvm").to_return(:status => 404)
|
8
8
|
stub_request(:get, "http://example.com/example.csv-metadata.json").to_return(:status => 404)
|
9
9
|
stub_request(:get, "http://example.com/csv-metadata.json").to_return(:status => 404)
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
|
+
it "should validate from a URL" do
|
13
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
14
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
15
|
+
|
16
|
+
expect(validator.valid?).to eql(true)
|
17
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
18
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(3)
|
19
|
+
expect(validator.data.size).to eql(3)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should validate from a file path" do
|
23
|
+
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
24
|
+
|
25
|
+
expect(validator.valid?).to eql(true)
|
26
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
27
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(3)
|
28
|
+
expect(validator.data.size).to eql(3)
|
29
|
+
end
|
30
|
+
|
31
|
+
context "multi line CSV validation with included schema" do
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
context "single line row validation with included schema" do
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
context "validation with multiple lines: " do
|
40
|
+
|
41
|
+
# TODO multiple lines permits testing of warnings
|
42
|
+
# TODO need more assertions in each test IE @formats
|
43
|
+
# TODO the phrasing of col_counts if only consulting specs might be confusing
|
44
|
+
# TODO ^-> col_counts and data.size should be equivalent, but only data is populated outside of if row.nil?
|
45
|
+
# TODO ^- -> and its less the size of col_counts than the homogeneity of its contents which is important
|
46
|
+
|
47
|
+
it ".each() -> parse_contents method validates a well formed CSV" do
|
48
|
+
# when invoking parse contents
|
49
|
+
data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
|
50
|
+
|
51
|
+
validator = Csvlint::Validator.new(data)
|
52
|
+
|
53
|
+
expect(validator.valid?).to eql(true)
|
54
|
+
# TODO would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total
|
55
|
+
# TODO in its formats object but is provided with 5 rows (with one nil row) [uses validation_warnings_steps.rb]
|
56
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
57
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(4)
|
58
|
+
expect(validator.data.size).to eql(4)
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
it ".each() -> `parse_contents` parses malformed CSV and catches unclosed quote" do
|
63
|
+
# doesn't build warnings because check_consistency isn't invoked
|
64
|
+
# TODO below is trailing whitespace but is interpreted as an unclosed quote
|
65
|
+
data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
|
66
|
+
|
67
|
+
validator = Csvlint::Validator.new(data)
|
68
|
+
|
69
|
+
expect(validator.valid?).to eql(false)
|
70
|
+
expect(validator.errors.first.type).to eql(:unclosed_quote)
|
71
|
+
expect(validator.errors.count).to eql(1)
|
72
|
+
end
|
73
|
+
|
74
|
+
it ".each() -> `parse_contents` parses malformed CSV and catches whitespace and edge case" do
|
75
|
+
# when this data gets passed the header it rescues a whitespace error, resulting in the header row being discarded
|
76
|
+
# TODO - check if this is an edge case, currently passing because it requires advice on how to specify
|
77
|
+
data = StringIO.new(" \"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"Foo\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
|
78
|
+
|
79
|
+
validator = Csvlint::Validator.new(data)
|
80
|
+
|
81
|
+
expect(validator.valid?).to eql(false)
|
82
|
+
expect(validator.errors.first.type).to eql(:whitespace)
|
83
|
+
expect(validator.errors.count).to eql(2)
|
84
|
+
end
|
85
|
+
|
86
|
+
it "handles line breaks within a cell" do
|
87
|
+
data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"e\",\"this is\r\nvalid\"\r\n\"a\",\"b\",\"c\"")
|
88
|
+
validator = Csvlint::Validator.new(data)
|
89
|
+
expect(validator.valid?).to eql(true)
|
90
|
+
end
|
91
|
+
|
92
|
+
it "handles multiple line breaks within a cell" do
|
93
|
+
data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"this is\r\n valid\",\"as is this\r\n too\"")
|
94
|
+
validator = Csvlint::Validator.new(data)
|
95
|
+
expect(validator.valid?).to eql(true)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
12
99
|
context "csv dialect" do
|
13
100
|
it "should provide sensible defaults for CSV parsing" do
|
14
101
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
@@ -20,7 +107,7 @@ describe Csvlint::Validator do
|
|
20
107
|
:skip_blanks => false
|
21
108
|
})
|
22
109
|
end
|
23
|
-
|
110
|
+
|
24
111
|
it "should map CSV DDF to correct values" do
|
25
112
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
26
113
|
opts = validator.dialect_to_csv_options( {
|
@@ -33,67 +120,145 @@ describe Csvlint::Validator do
|
|
33
120
|
:row_sep => "\n",
|
34
121
|
:quote_char => "'",
|
35
122
|
:skip_blanks => false
|
36
|
-
})
|
123
|
+
})
|
124
|
+
end
|
125
|
+
|
126
|
+
it ".each() -> `validate` to pass input in streaming fashion" do
|
127
|
+
# warnings are built when validate is used to call all three methods
|
128
|
+
data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
|
129
|
+
validator = Csvlint::Validator.new(data)
|
130
|
+
|
131
|
+
expect(validator.valid?).to eql(true)
|
132
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
133
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(4)
|
134
|
+
expect(validator.data.size).to eql(4)
|
135
|
+
expect(validator.info_messages.count).to eql(1)
|
37
136
|
end
|
38
|
-
|
137
|
+
|
138
|
+
it ".each() -> `validate` parses malformed CSV, populates errors, warnings & info_msgs,invokes finish()" do
|
139
|
+
data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"two\",\"3\"\r\n\"3\",\"2\", \"1\"")
|
140
|
+
|
141
|
+
validator = Csvlint::Validator.new(data)
|
142
|
+
|
143
|
+
expect(validator.valid?).to eql(false)
|
144
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
145
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(4)
|
146
|
+
expect(validator.data.size).to eql(5)
|
147
|
+
expect(validator.info_messages.count).to eql(1)
|
148
|
+
expect(validator.errors.count).to eql(1)
|
149
|
+
expect(validator.errors.first.type).to eql(:whitespace)
|
150
|
+
expect(validator.warnings.count).to eql(1)
|
151
|
+
expect(validator.warnings.first.type).to eql(:inconsistent_values)
|
152
|
+
end
|
153
|
+
|
154
|
+
it "File.open.each_line -> `validate` passes a valid csv" do
|
155
|
+
filename = 'valid_many_rows.csv'
|
156
|
+
file = File.join(File.expand_path(Dir.pwd), "features", "fixtures", filename)
|
157
|
+
validator = Csvlint::Validator.new(File.new(file))
|
158
|
+
|
159
|
+
expect(validator.valid?).to eql(true)
|
160
|
+
expect(validator.info_messages.size).to eql(1)
|
161
|
+
expect(validator.info_messages.first.type).to eql(:assumed_header)
|
162
|
+
expect(validator.info_messages.first.category).to eql(:structure)
|
163
|
+
end
|
164
|
+
|
39
165
|
end
|
40
|
-
|
41
|
-
context "
|
42
|
-
|
43
|
-
|
44
|
-
|
166
|
+
|
167
|
+
context "with a single row" do
|
168
|
+
|
169
|
+
it "validates correctly" do
|
170
|
+
stream = "\"a\",\"b\",\"c\"\r\n"
|
171
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
|
172
|
+
expect(validator.valid?).to eql(true)
|
45
173
|
end
|
46
|
-
|
47
|
-
it "
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
validator
|
52
|
-
expect(
|
53
|
-
opts = {
|
54
|
-
"header" => false
|
55
|
-
}
|
56
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
|
57
|
-
expect( validator.header? ).to eql(false)
|
174
|
+
|
175
|
+
it "checks for non rfc line breaks" do
|
176
|
+
stream = "\"a\",\"b\",\"c\"\n"
|
177
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), {"header" => false})
|
178
|
+
expect(validator.valid?).to eql(true)
|
179
|
+
expect(validator.info_messages.count).to eq(1)
|
180
|
+
expect(validator.info_messages.first.type).to eql(:nonrfc_line_breaks)
|
58
181
|
end
|
59
182
|
|
60
|
-
it "
|
61
|
-
|
62
|
-
validator = Csvlint::Validator.new("
|
63
|
-
|
64
|
-
expect(
|
183
|
+
it "checks for blank rows" do
|
184
|
+
data = StringIO.new('"","",')
|
185
|
+
validator = Csvlint::Validator.new(data, "header" => false)
|
186
|
+
|
187
|
+
expect(validator.valid?).to eql(false)
|
188
|
+
expect(validator.errors.count).to eq(1)
|
189
|
+
expect(validator.errors.first.type).to eql(:blank_rows)
|
65
190
|
end
|
66
191
|
|
67
|
-
it "
|
68
|
-
|
69
|
-
validator = Csvlint::Validator.new(
|
70
|
-
expect(
|
71
|
-
expect( validator.errors.size ).to eql(0)
|
192
|
+
it "returns the content of the string with the error" do
|
193
|
+
stream = "\"\",\"\",\"\"\r\n"
|
194
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
|
195
|
+
expect(validator.errors.first.content).to eql("\"\",\"\",\"\"\r\n")
|
72
196
|
end
|
73
197
|
|
74
|
-
it "
|
75
|
-
|
76
|
-
validator = Csvlint::Validator.new(
|
77
|
-
|
78
|
-
expect( validator.
|
198
|
+
it "should presume a header unless told otherwise" do
|
199
|
+
stream = "1,2,3\r\n"
|
200
|
+
validator = Csvlint::Validator.new(StringIO.new(stream))
|
201
|
+
|
202
|
+
expect( validator.valid? ).to eql(true)
|
79
203
|
expect( validator.info_messages.size ).to eql(1)
|
80
204
|
expect( validator.info_messages.first.type).to eql(:assumed_header)
|
205
|
+
expect( validator.info_messages.first.category).to eql(:structure)
|
81
206
|
end
|
82
207
|
|
83
|
-
it "
|
84
|
-
|
85
|
-
validator = Csvlint::Validator.new(
|
86
|
-
|
87
|
-
expect(
|
88
|
-
expect(
|
89
|
-
expect( validator.errors[1].type).to eql(:undeclared_header)
|
90
|
-
expect( validator.info_messages.size ).to eql(0)
|
208
|
+
it "should evaluate the row as 'row 2' when stipulated" do
|
209
|
+
stream = "1,2,3\r\n"
|
210
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
|
211
|
+
validator.validate
|
212
|
+
expect(validator.valid?).to eql(true)
|
213
|
+
expect(validator.info_messages.size).to eql(0)
|
91
214
|
end
|
92
215
|
|
93
216
|
end
|
94
|
-
|
217
|
+
|
218
|
+
context "it returns the correct error from ERROR_MATCHES" do
|
219
|
+
|
220
|
+
it "checks for unclosed quotes" do
|
221
|
+
stream = "\"a,\"b\",\"c\"\n"
|
222
|
+
validator = Csvlint::Validator.new(StringIO.new(stream))
|
223
|
+
expect(validator.valid?).to eql(false)
|
224
|
+
expect(validator.errors.count).to eq(1)
|
225
|
+
expect(validator.errors.first.type).to eql(:unclosed_quote)
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
# TODO stray quotes is not covered in any spec in this library
|
230
|
+
# it "checks for stray quotes" do
|
231
|
+
# stream = "\"a\",“b“,\"c\"" "\r\n"
|
232
|
+
# validator = Csvlint::Validator.new(stream)
|
233
|
+
# validator.validate # implicitly invokes parse_contents(stream)
|
234
|
+
# expect(validator.valid?).to eql(false)
|
235
|
+
# expect(validator.errors.count).to eq(1)
|
236
|
+
# expect(validator.errors.first.type).to eql(:stray_quote)
|
237
|
+
# end
|
238
|
+
|
239
|
+
it "checks for whitespace" do
|
240
|
+
stream = " \"a\",\"b\",\"c\"\r\n"
|
241
|
+
validator = Csvlint::Validator.new(StringIO.new(stream))
|
242
|
+
|
243
|
+
expect(validator.valid?).to eql(false)
|
244
|
+
expect(validator.errors.count).to eq(1)
|
245
|
+
expect(validator.errors.first.type).to eql(:whitespace)
|
246
|
+
end
|
247
|
+
|
248
|
+
it "returns line break errors if incorrectly specified" do
|
249
|
+
# TODO the logic for catching this error message is very esoteric
|
250
|
+
stream = "\"a\",\"b\",\"c\"\n"
|
251
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), {"lineTerminator" => "\r\n"})
|
252
|
+
expect(validator.valid?).to eql(false)
|
253
|
+
expect(validator.errors.count).to eq(1)
|
254
|
+
expect(validator.errors.first.type).to eql(:line_breaks)
|
255
|
+
end
|
256
|
+
|
257
|
+
end
|
258
|
+
|
95
259
|
context "when validating headers" do
|
96
|
-
|
260
|
+
|
261
|
+
it "should warn if column names aren't unique" do
|
97
262
|
data = StringIO.new( "minimum, minimum" )
|
98
263
|
validator = Csvlint::Validator.new(data)
|
99
264
|
validator.reset
|
@@ -106,210 +271,349 @@ describe Csvlint::Validator do
|
|
106
271
|
it "should warn if column names are blank" do
|
107
272
|
data = StringIO.new( "minimum," )
|
108
273
|
validator = Csvlint::Validator.new(data)
|
109
|
-
|
274
|
+
|
110
275
|
expect( validator.validate_header(["minimum", ""]) ).to eql(true)
|
111
276
|
expect( validator.warnings.size ).to eql(1)
|
112
277
|
expect( validator.warnings.first.type).to eql(:empty_column_name)
|
113
278
|
expect( validator.warnings.first.category).to eql(:schema)
|
114
279
|
end
|
115
|
-
|
280
|
+
|
116
281
|
it "should include info message about missing header when we have assumed a header" do
|
117
282
|
data = StringIO.new( "1,2,3\r\n" )
|
118
283
|
validator = Csvlint::Validator.new(data)
|
119
|
-
|
120
284
|
expect( validator.valid? ).to eql(true)
|
121
285
|
expect( validator.info_messages.size ).to eql(1)
|
122
286
|
expect( validator.info_messages.first.type).to eql(:assumed_header)
|
123
287
|
expect( validator.info_messages.first.category).to eql(:structure)
|
124
|
-
end
|
288
|
+
end
|
125
289
|
|
126
290
|
it "should not include info message about missing header when we are told about the header" do
|
127
291
|
data = StringIO.new( "1,2,3\r\n" )
|
128
|
-
validator = Csvlint::Validator.new(data, "header"=>false)
|
129
|
-
|
292
|
+
validator = Csvlint::Validator.new(data, "header" => false)
|
130
293
|
expect( validator.valid? ).to eql(true)
|
131
294
|
expect( validator.info_messages.size ).to eql(0)
|
132
295
|
end
|
133
|
-
|
134
|
-
it "should be an error if we have assumed a header, there is no dialect and there's no content-type" do
|
135
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
136
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
137
|
-
expect( validator.valid? ).to eql(false)
|
138
|
-
end
|
139
|
-
|
140
|
-
it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
|
141
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
142
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
143
|
-
expect( validator.valid? ).to eql(true)
|
144
|
-
end
|
145
|
-
|
146
|
-
it "should be valid if we have a dialect and the data is from the web" do
|
147
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
148
|
-
#header defaults to true in csv dialect, so this is valid
|
149
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv", {})
|
150
|
-
expect( validator.valid? ).to eql(true)
|
151
|
-
|
152
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
153
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
|
154
|
-
expect( validator.valid? ).to eql(true)
|
155
|
-
|
156
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
157
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
|
158
|
-
expect( validator.valid? ).to eql(true)
|
159
|
-
end
|
160
|
-
|
161
296
|
end
|
162
|
-
|
297
|
+
|
163
298
|
context "build_formats" do
|
164
|
-
|
299
|
+
|
165
300
|
{
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
301
|
+
:string => "foo",
|
302
|
+
:numeric => "1",
|
303
|
+
:uri => "http://www.example.com",
|
304
|
+
:dateTime_iso8601 => "2013-01-01T13:00:00Z",
|
305
|
+
:date_db => "2013-01-01",
|
306
|
+
:dateTime_hms => "13:00:00"
|
172
307
|
}.each do |type, content|
|
173
308
|
it "should return the format of #{type} correctly" do
|
174
309
|
row = [content]
|
175
|
-
|
310
|
+
|
176
311
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
177
312
|
validator.build_formats(row)
|
178
313
|
formats = validator.instance_variable_get("@formats")
|
179
|
-
|
314
|
+
|
180
315
|
expect(formats[0].keys.first).to eql type
|
181
316
|
end
|
182
317
|
end
|
183
|
-
|
318
|
+
|
184
319
|
it "treats floats and ints the same" do
|
185
320
|
row = ["12", "3.1476"]
|
186
|
-
|
321
|
+
|
187
322
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
188
323
|
validator.build_formats(row)
|
189
324
|
formats = validator.instance_variable_get("@formats")
|
190
|
-
|
325
|
+
|
191
326
|
expect(formats[0].keys.first).to eql :numeric
|
192
327
|
expect(formats[1].keys.first).to eql :numeric
|
193
328
|
end
|
194
|
-
|
329
|
+
|
195
330
|
it "should ignore blank arrays" do
|
196
331
|
row = []
|
197
|
-
|
332
|
+
|
198
333
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
199
334
|
validator.build_formats(row)
|
200
|
-
|
335
|
+
|
336
|
+
formats = validator.instance_variable_get("@formats")
|
201
337
|
expect(formats).to eql []
|
202
338
|
end
|
203
|
-
|
339
|
+
|
204
340
|
it "should work correctly for single columns" do
|
205
341
|
rows = [
|
206
342
|
["foo"],
|
207
343
|
["bar"],
|
208
344
|
["baz"]
|
209
|
-
|
210
|
-
|
345
|
+
]
|
346
|
+
|
211
347
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
212
|
-
|
348
|
+
|
213
349
|
rows.each_with_index do |row, i|
|
214
350
|
validator.build_formats(row)
|
215
351
|
end
|
216
|
-
|
352
|
+
|
217
353
|
formats = validator.instance_variable_get("@formats")
|
218
|
-
|
219
354
|
expect(formats).to eql [{:string => 3}]
|
220
355
|
end
|
221
|
-
|
356
|
+
|
222
357
|
it "should return formats correctly if a row is blank" do
|
223
358
|
rows = [
|
224
359
|
[],
|
225
|
-
["foo","1","$2345"]
|
226
|
-
|
227
|
-
|
360
|
+
["foo", "1", "$2345"]
|
361
|
+
]
|
362
|
+
|
228
363
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
229
|
-
|
364
|
+
|
230
365
|
rows.each_with_index do |row, i|
|
231
366
|
validator.build_formats(row)
|
232
367
|
end
|
233
|
-
|
234
|
-
formats = validator.instance_variable_get("@formats")
|
235
|
-
|
368
|
+
|
369
|
+
formats = validator.instance_variable_get("@formats")
|
370
|
+
|
236
371
|
expect(formats).to eql [
|
237
372
|
{:string => 1},
|
238
373
|
{:numeric => 1},
|
239
374
|
{:string => 1},
|
240
375
|
]
|
241
376
|
end
|
242
|
-
|
377
|
+
|
243
378
|
end
|
244
|
-
|
379
|
+
|
380
|
+
context "csv dialect" do
|
381
|
+
it "should provide sensible defaults for CSV parsing" do
|
382
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
383
|
+
opts = validator.instance_variable_get("@csv_options")
|
384
|
+
expect(opts).to include({
|
385
|
+
:col_sep => ",",
|
386
|
+
:row_sep => :auto,
|
387
|
+
:quote_char => '"',
|
388
|
+
:skip_blanks => false
|
389
|
+
})
|
390
|
+
end
|
391
|
+
|
392
|
+
it "should map CSV DDF to correct values" do
|
393
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
394
|
+
opts = validator.dialect_to_csv_options({
|
395
|
+
"lineTerminator" => "\n",
|
396
|
+
"delimiter" => "\t",
|
397
|
+
"quoteChar" => "'"
|
398
|
+
})
|
399
|
+
expect(opts).to include({
|
400
|
+
:col_sep => "\t",
|
401
|
+
:row_sep => "\n",
|
402
|
+
:quote_char => "'",
|
403
|
+
:skip_blanks => false
|
404
|
+
})
|
405
|
+
end
|
406
|
+
|
407
|
+
end
|
408
|
+
|
245
409
|
context "check_consistency" do
|
246
|
-
|
410
|
+
|
247
411
|
it "should return a warning if columns have inconsistent values" do
|
248
412
|
formats = [
|
249
413
|
{:string => 3},
|
250
414
|
{:string => 2, :numeric => 1},
|
251
415
|
{:numeric => 3},
|
252
|
-
|
253
|
-
|
416
|
+
]
|
417
|
+
|
254
418
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
255
419
|
validator.instance_variable_set("@formats", formats)
|
256
420
|
validator.check_consistency
|
257
|
-
|
258
|
-
warnings = validator.instance_variable_get("@warnings")
|
421
|
+
|
422
|
+
warnings = validator.instance_variable_get("@warnings")
|
259
423
|
warnings.delete_if { |h| h.type != :inconsistent_values }
|
260
|
-
|
424
|
+
|
261
425
|
expect(warnings.count).to eql 1
|
262
426
|
end
|
263
|
-
|
427
|
+
|
264
428
|
end
|
265
|
-
|
429
|
+
|
430
|
+
#TODO the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs
|
431
|
+
context "when detecting headers" do
|
432
|
+
it "should default to expecting a header" do
|
433
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
434
|
+
expect( validator.header? ).to eql(true)
|
435
|
+
end
|
436
|
+
|
437
|
+
it "should look in CSV options to detect header" do
|
438
|
+
opts = {
|
439
|
+
"header" => true
|
440
|
+
}
|
441
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
|
442
|
+
expect( validator.header? ).to eql(true)
|
443
|
+
opts = {
|
444
|
+
"header" => false
|
445
|
+
}
|
446
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
|
447
|
+
expect( validator.header? ).to eql(false)
|
448
|
+
end
|
449
|
+
|
450
|
+
it "should look in content-type for header=absent" do
|
451
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=absent"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
452
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
453
|
+
expect( validator.header? ).to eql(false)
|
454
|
+
expect( validator.errors.size ).to eql(0)
|
455
|
+
end
|
456
|
+
|
457
|
+
it "should look in content-type for header=present" do
|
458
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=present"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
459
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
460
|
+
expect( validator.header? ).to eql(true)
|
461
|
+
expect( validator.errors.size ).to eql(0)
|
462
|
+
end
|
463
|
+
|
464
|
+
it "assume header present if not specified in content type" do
|
465
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
466
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
467
|
+
expect( validator.header? ).to eql(true)
|
468
|
+
expect( validator.errors.size ).to eql(0)
|
469
|
+
expect( validator.info_messages.size ).to eql(1)
|
470
|
+
expect( validator.info_messages.first.type).to eql(:assumed_header)
|
471
|
+
end
|
472
|
+
|
473
|
+
it "give wrong content type error if content type is wrong" do
|
474
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/html"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
475
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
476
|
+
expect( validator.header? ).to eql(true)
|
477
|
+
expect( validator.errors.size ).to eql(1)
|
478
|
+
expect( validator.errors[0].type).to eql(:wrong_content_type)
|
479
|
+
end
|
480
|
+
|
481
|
+
end
|
482
|
+
|
483
|
+
context "when validating headers" do
|
484
|
+
it "should warn if column names aren't unique" do
|
485
|
+
data = StringIO.new( "minimum, minimum" )
|
486
|
+
validator = Csvlint::Validator.new(data)
|
487
|
+
expect( validator.warnings.size ).to eql(1)
|
488
|
+
expect( validator.warnings.first.type).to eql(:duplicate_column_name)
|
489
|
+
expect( validator.warnings.first.category).to eql(:schema)
|
490
|
+
end
|
491
|
+
|
492
|
+
it "should warn if column names are blank" do
|
493
|
+
data = StringIO.new( "minimum," )
|
494
|
+
validator = Csvlint::Validator.new(data)
|
495
|
+
|
496
|
+
expect( validator.validate_header(["minimum", ""]) ).to eql(true)
|
497
|
+
expect( validator.warnings.size ).to eql(1)
|
498
|
+
expect( validator.warnings.first.type).to eql(:empty_column_name)
|
499
|
+
expect( validator.warnings.first.category).to eql(:schema)
|
500
|
+
end
|
501
|
+
|
502
|
+
it "should include info message about missing header when we have assumed a header" do
|
503
|
+
data = StringIO.new( "1,2,3\r\n" )
|
504
|
+
validator = Csvlint::Validator.new(data)
|
505
|
+
|
506
|
+
expect( validator.valid? ).to eql(true)
|
507
|
+
expect( validator.info_messages.size ).to eql(1)
|
508
|
+
expect( validator.info_messages.first.type).to eql(:assumed_header)
|
509
|
+
expect( validator.info_messages.first.category).to eql(:structure)
|
510
|
+
end
|
511
|
+
|
512
|
+
it "should not include info message about missing header when we are told about the header" do
|
513
|
+
data = StringIO.new( "1,2,3\r\n" )
|
514
|
+
validator = Csvlint::Validator.new(data, "header"=>false)
|
515
|
+
expect( validator.valid? ).to eql(true)
|
516
|
+
expect( validator.info_messages.size ).to eql(0)
|
517
|
+
end
|
518
|
+
|
519
|
+
it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
|
520
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
521
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
522
|
+
expect( validator.valid? ).to eql(true)
|
523
|
+
end
|
524
|
+
|
525
|
+
it "should be valid if we have a dialect and the data is from the web" do
|
526
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
527
|
+
#header defaults to true in csv dialect, so this is valid
|
528
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", {})
|
529
|
+
expect( validator.valid? ).to eql(true)
|
530
|
+
|
531
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
532
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
|
533
|
+
expect( validator.valid? ).to eql(true)
|
534
|
+
|
535
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
536
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
|
537
|
+
expect( validator.valid? ).to eql(true)
|
538
|
+
end
|
539
|
+
|
540
|
+
end
|
541
|
+
|
266
542
|
context "accessing metadata" do
|
267
|
-
|
543
|
+
|
268
544
|
before :all do
|
269
545
|
stub_request(:get, "http://example.com/crlf.csv").to_return(:status => 200, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','windows-line-endings.csv')))
|
270
546
|
stub_request(:get, "http://example.com/crlf.csv-metadata.json").to_return(:status => 404)
|
271
547
|
end
|
272
|
-
|
273
|
-
it "can get line break symbol" do
|
274
548
|
|
549
|
+
it "can get line break symbol" do
|
275
550
|
validator = Csvlint::Validator.new("http://example.com/crlf.csv")
|
276
551
|
expect(validator.line_breaks).to eql "\r\n"
|
277
|
-
|
278
552
|
end
|
279
|
-
|
553
|
+
|
280
554
|
end
|
281
|
-
|
555
|
+
|
282
556
|
it "should give access to the complete CSV data file" do
|
283
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
284
|
-
:headers=>{"Content-Type" => "text/csv; header=present"},
|
557
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
558
|
+
:headers=>{"Content-Type" => "text/csv; header=present"},
|
285
559
|
:body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
286
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
560
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
287
561
|
expect( validator.valid? ).to eql(true)
|
288
562
|
data = validator.data
|
289
|
-
|
563
|
+
|
564
|
+
expect( data.count ).to eql 3
|
290
565
|
expect( data[0] ).to eql ['Foo','Bar','Baz']
|
291
566
|
expect( data[2] ).to eql ['3','2','1']
|
292
567
|
end
|
293
|
-
|
568
|
+
|
569
|
+
it "should count the total number of rows read" do
|
570
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
571
|
+
:headers=>{"Content-Type" => "text/csv; header=present"},
|
572
|
+
:body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
573
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
574
|
+
expect(validator.row_count).to eq(3)
|
575
|
+
end
|
576
|
+
|
294
577
|
it "should limit number of lines read" do
|
295
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
296
|
-
:headers=>{"Content-Type" => "text/csv; header=present"},
|
578
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
579
|
+
:headers=>{"Content-Type" => "text/csv; header=present"},
|
297
580
|
:body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
298
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv",
|
581
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", {}, nil, limit_lines: 2)
|
299
582
|
expect( validator.valid? ).to eql(true)
|
300
583
|
data = validator.data
|
301
|
-
expect( data.count ).to eql 2
|
584
|
+
expect( data.count ).to eql 2
|
302
585
|
expect( data[0] ).to eql ['Foo','Bar','Baz']
|
303
586
|
end
|
304
|
-
|
305
|
-
it "should follow redirects to SSL" do
|
306
|
-
stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
|
307
|
-
stub_request(:get, "http://example.com/redirect-metadata.json").to_return(:status => 404)
|
308
|
-
stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
|
309
|
-
:headers=>{"Content-Type" => "text/csv; header=present"},
|
310
|
-
:body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
311
587
|
|
312
|
-
|
313
|
-
|
588
|
+
context "with a lambda" do
|
589
|
+
|
590
|
+
it "should call a lambda for each line" do
|
591
|
+
@count = 0
|
592
|
+
mylambda = lambda { |row| @count = @count + 1 }
|
593
|
+
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
|
594
|
+
expect(@count).to eq(3)
|
595
|
+
end
|
596
|
+
|
597
|
+
it "reports back the status of each line" do
|
598
|
+
@results = []
|
599
|
+
mylambda = lambda { |row| @results << row.current_line }
|
600
|
+
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
|
601
|
+
expect(@results.count).to eq(3)
|
602
|
+
expect(@results[0]).to eq(1)
|
603
|
+
expect(@results[1]).to eq(2)
|
604
|
+
expect(@results[2]).to eq(3)
|
605
|
+
end
|
606
|
+
|
314
607
|
end
|
315
|
-
|
608
|
+
|
609
|
+
# Commented out because there is currently no way to mock redirects with Typhoeus and WebMock - see https://github.com/bblimke/webmock/issues/237
|
610
|
+
# it "should follow redirects to SSL" do
|
611
|
+
# stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
|
612
|
+
# stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
|
613
|
+
# :headers=>{"Content-Type" => "text/csv; header=present"},
|
614
|
+
# :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
615
|
+
#
|
616
|
+
# validator = Csvlint::Validator.new("http://example.com/redirect")
|
617
|
+
# expect( validator.valid? ).to eql(true)
|
618
|
+
# end
|
619
|
+
end
|