csvlint 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.gitattributes +2 -0
- data/CHANGELOG.md +19 -1
- data/README.md +15 -1
- data/bin/csvlint +13 -3
- data/csvlint.gemspec +1 -0
- data/features/csvupload.feature +5 -5
- data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
- data/features/fixtures/inconsistent-line-endings.csv +0 -0
- data/features/fixtures/invalid_many_rows.csv +0 -0
- data/features/fixtures/valid_many_rows.csv +0 -0
- data/features/information.feature +4 -4
- data/features/step_definitions/csv_options_steps.rb +5 -0
- data/features/validation_errors.feature +1 -1
- data/features/validation_info.feature +6 -6
- data/lib/csvlint.rb +1 -1
- data/lib/csvlint/csvw/number_format.rb +1 -1
- data/lib/csvlint/field.rb +10 -4
- data/lib/csvlint/validate.rb +326 -219
- data/lib/csvlint/version.rb +1 -1
- data/spec/csvw/number_format_spec.rb +14 -0
- data/spec/validator_spec.rb +450 -146
- metadata +21 -3
- data/lib/csvlint/wrapped_io.rb +0 -21
data/lib/csvlint/version.rb
CHANGED
@@ -300,6 +300,20 @@ describe Csvlint::Csvw::NumberFormat do
|
|
300
300
|
expect(format.parse("12.345,67,8")).to eql(nil)
|
301
301
|
end
|
302
302
|
|
303
|
+
it "should parse numbers that match 0.###,### correctly" do
|
304
|
+
format = Csvlint::Csvw::NumberFormat.new("0.###,###")
|
305
|
+
expect(format.parse("1")).to eq(1)
|
306
|
+
expect(format.parse("12.3")).to eql(12.3)
|
307
|
+
expect(format.parse("12.34")).to eql(12.34)
|
308
|
+
expect(format.parse("12.345")).to eq(12.345)
|
309
|
+
expect(format.parse("12.3456")).to eql(nil)
|
310
|
+
expect(format.parse("12.345,6")).to eql(12.3456)
|
311
|
+
expect(format.parse("12.34,56")).to eql(nil)
|
312
|
+
expect(format.parse("12.345,67")).to eq(12.34567)
|
313
|
+
expect(format.parse("12.345,678")).to eql(12.345678)
|
314
|
+
expect(format.parse("12.345,67,8")).to eql(nil)
|
315
|
+
end
|
316
|
+
|
303
317
|
it "should parse numbers that match 0.000,### correctly" do
|
304
318
|
format = Csvlint::Csvw::NumberFormat.new("0.000,###")
|
305
319
|
expect(format.parse("1")).to eq(nil)
|
data/spec/validator_spec.rb
CHANGED
@@ -1,14 +1,101 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Csvlint::Validator do
|
4
|
-
|
4
|
+
|
5
5
|
before do
|
6
6
|
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :body => "")
|
7
7
|
stub_request(:get, "http://example.com/.well-known/csvm").to_return(:status => 404)
|
8
8
|
stub_request(:get, "http://example.com/example.csv-metadata.json").to_return(:status => 404)
|
9
9
|
stub_request(:get, "http://example.com/csv-metadata.json").to_return(:status => 404)
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
|
+
it "should validate from a URL" do
|
13
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
14
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
15
|
+
|
16
|
+
expect(validator.valid?).to eql(true)
|
17
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
18
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(3)
|
19
|
+
expect(validator.data.size).to eql(3)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should validate from a file path" do
|
23
|
+
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
24
|
+
|
25
|
+
expect(validator.valid?).to eql(true)
|
26
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
27
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(3)
|
28
|
+
expect(validator.data.size).to eql(3)
|
29
|
+
end
|
30
|
+
|
31
|
+
context "multi line CSV validation with included schema" do
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
context "single line row validation with included schema" do
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
context "validation with multiple lines: " do
|
40
|
+
|
41
|
+
# TODO multiple lines permits testing of warnings
|
42
|
+
# TODO need more assertions in each test IE @formats
|
43
|
+
# TODO the phrasing of col_counts if only consulting specs might be confusing
|
44
|
+
# TODO ^-> col_counts and data.size should be equivalent, but only data is populated outside of if row.nil?
|
45
|
+
# TODO ^- -> and its less the size of col_counts than the homogeneity of its contents which is important
|
46
|
+
|
47
|
+
it ".each() -> parse_contents method validates a well formed CSV" do
|
48
|
+
# when invoking parse contents
|
49
|
+
data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
|
50
|
+
|
51
|
+
validator = Csvlint::Validator.new(data)
|
52
|
+
|
53
|
+
expect(validator.valid?).to eql(true)
|
54
|
+
# TODO would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total
|
55
|
+
# TODO in its formats object but is provided with 5 rows (with one nil row) [uses validation_warnings_steps.rb]
|
56
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
57
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(4)
|
58
|
+
expect(validator.data.size).to eql(4)
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
it ".each() -> `parse_contents` parses malformed CSV and catches unclosed quote" do
|
63
|
+
# doesn't build warnings because check_consistency isn't invoked
|
64
|
+
# TODO below is trailing whitespace but is interpreted as an unclosed quote
|
65
|
+
data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
|
66
|
+
|
67
|
+
validator = Csvlint::Validator.new(data)
|
68
|
+
|
69
|
+
expect(validator.valid?).to eql(false)
|
70
|
+
expect(validator.errors.first.type).to eql(:unclosed_quote)
|
71
|
+
expect(validator.errors.count).to eql(1)
|
72
|
+
end
|
73
|
+
|
74
|
+
it ".each() -> `parse_contents` parses malformed CSV and catches whitespace and edge case" do
|
75
|
+
# when this data gets passed the header it rescues a whitespace error, resulting in the header row being discarded
|
76
|
+
# TODO - check if this is an edge case, currently passing because it requires advice on how to specify
|
77
|
+
data = StringIO.new(" \"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"Foo\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
|
78
|
+
|
79
|
+
validator = Csvlint::Validator.new(data)
|
80
|
+
|
81
|
+
expect(validator.valid?).to eql(false)
|
82
|
+
expect(validator.errors.first.type).to eql(:whitespace)
|
83
|
+
expect(validator.errors.count).to eql(2)
|
84
|
+
end
|
85
|
+
|
86
|
+
it "handles line breaks within a cell" do
|
87
|
+
data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"e\",\"this is\r\nvalid\"\r\n\"a\",\"b\",\"c\"")
|
88
|
+
validator = Csvlint::Validator.new(data)
|
89
|
+
expect(validator.valid?).to eql(true)
|
90
|
+
end
|
91
|
+
|
92
|
+
it "handles multiple line breaks within a cell" do
|
93
|
+
data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"this is\r\n valid\",\"as is this\r\n too\"")
|
94
|
+
validator = Csvlint::Validator.new(data)
|
95
|
+
expect(validator.valid?).to eql(true)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
12
99
|
context "csv dialect" do
|
13
100
|
it "should provide sensible defaults for CSV parsing" do
|
14
101
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
@@ -20,7 +107,7 @@ describe Csvlint::Validator do
|
|
20
107
|
:skip_blanks => false
|
21
108
|
})
|
22
109
|
end
|
23
|
-
|
110
|
+
|
24
111
|
it "should map CSV DDF to correct values" do
|
25
112
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
26
113
|
opts = validator.dialect_to_csv_options( {
|
@@ -33,67 +120,145 @@ describe Csvlint::Validator do
|
|
33
120
|
:row_sep => "\n",
|
34
121
|
:quote_char => "'",
|
35
122
|
:skip_blanks => false
|
36
|
-
})
|
123
|
+
})
|
124
|
+
end
|
125
|
+
|
126
|
+
it ".each() -> `validate` to pass input in streaming fashion" do
|
127
|
+
# warnings are built when validate is used to call all three methods
|
128
|
+
data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
|
129
|
+
validator = Csvlint::Validator.new(data)
|
130
|
+
|
131
|
+
expect(validator.valid?).to eql(true)
|
132
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
133
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(4)
|
134
|
+
expect(validator.data.size).to eql(4)
|
135
|
+
expect(validator.info_messages.count).to eql(1)
|
37
136
|
end
|
38
|
-
|
137
|
+
|
138
|
+
it ".each() -> `validate` parses malformed CSV, populates errors, warnings & info_msgs,invokes finish()" do
|
139
|
+
data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"two\",\"3\"\r\n\"3\",\"2\", \"1\"")
|
140
|
+
|
141
|
+
validator = Csvlint::Validator.new(data)
|
142
|
+
|
143
|
+
expect(validator.valid?).to eql(false)
|
144
|
+
expect(validator.instance_variable_get("@expected_columns")).to eql(3)
|
145
|
+
expect(validator.instance_variable_get("@col_counts").count).to eql(4)
|
146
|
+
expect(validator.data.size).to eql(5)
|
147
|
+
expect(validator.info_messages.count).to eql(1)
|
148
|
+
expect(validator.errors.count).to eql(1)
|
149
|
+
expect(validator.errors.first.type).to eql(:whitespace)
|
150
|
+
expect(validator.warnings.count).to eql(1)
|
151
|
+
expect(validator.warnings.first.type).to eql(:inconsistent_values)
|
152
|
+
end
|
153
|
+
|
154
|
+
it "File.open.each_line -> `validate` passes a valid csv" do
|
155
|
+
filename = 'valid_many_rows.csv'
|
156
|
+
file = File.join(File.expand_path(Dir.pwd), "features", "fixtures", filename)
|
157
|
+
validator = Csvlint::Validator.new(File.new(file))
|
158
|
+
|
159
|
+
expect(validator.valid?).to eql(true)
|
160
|
+
expect(validator.info_messages.size).to eql(1)
|
161
|
+
expect(validator.info_messages.first.type).to eql(:assumed_header)
|
162
|
+
expect(validator.info_messages.first.category).to eql(:structure)
|
163
|
+
end
|
164
|
+
|
39
165
|
end
|
40
|
-
|
41
|
-
context "
|
42
|
-
|
43
|
-
|
44
|
-
|
166
|
+
|
167
|
+
context "with a single row" do
|
168
|
+
|
169
|
+
it "validates correctly" do
|
170
|
+
stream = "\"a\",\"b\",\"c\"\r\n"
|
171
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
|
172
|
+
expect(validator.valid?).to eql(true)
|
45
173
|
end
|
46
|
-
|
47
|
-
it "
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
validator
|
52
|
-
expect(
|
53
|
-
opts = {
|
54
|
-
"header" => false
|
55
|
-
}
|
56
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
|
57
|
-
expect( validator.header? ).to eql(false)
|
174
|
+
|
175
|
+
it "checks for non rfc line breaks" do
|
176
|
+
stream = "\"a\",\"b\",\"c\"\n"
|
177
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), {"header" => false})
|
178
|
+
expect(validator.valid?).to eql(true)
|
179
|
+
expect(validator.info_messages.count).to eq(1)
|
180
|
+
expect(validator.info_messages.first.type).to eql(:nonrfc_line_breaks)
|
58
181
|
end
|
59
182
|
|
60
|
-
it "
|
61
|
-
|
62
|
-
validator = Csvlint::Validator.new("
|
63
|
-
|
64
|
-
expect(
|
183
|
+
it "checks for blank rows" do
|
184
|
+
data = StringIO.new('"","",')
|
185
|
+
validator = Csvlint::Validator.new(data, "header" => false)
|
186
|
+
|
187
|
+
expect(validator.valid?).to eql(false)
|
188
|
+
expect(validator.errors.count).to eq(1)
|
189
|
+
expect(validator.errors.first.type).to eql(:blank_rows)
|
65
190
|
end
|
66
191
|
|
67
|
-
it "
|
68
|
-
|
69
|
-
validator = Csvlint::Validator.new(
|
70
|
-
expect(
|
71
|
-
expect( validator.errors.size ).to eql(0)
|
192
|
+
it "returns the content of the string with the error" do
|
193
|
+
stream = "\"\",\"\",\"\"\r\n"
|
194
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
|
195
|
+
expect(validator.errors.first.content).to eql("\"\",\"\",\"\"\r\n")
|
72
196
|
end
|
73
197
|
|
74
|
-
it "
|
75
|
-
|
76
|
-
validator = Csvlint::Validator.new(
|
77
|
-
|
78
|
-
expect( validator.
|
198
|
+
it "should presume a header unless told otherwise" do
|
199
|
+
stream = "1,2,3\r\n"
|
200
|
+
validator = Csvlint::Validator.new(StringIO.new(stream))
|
201
|
+
|
202
|
+
expect( validator.valid? ).to eql(true)
|
79
203
|
expect( validator.info_messages.size ).to eql(1)
|
80
204
|
expect( validator.info_messages.first.type).to eql(:assumed_header)
|
205
|
+
expect( validator.info_messages.first.category).to eql(:structure)
|
81
206
|
end
|
82
207
|
|
83
|
-
it "
|
84
|
-
|
85
|
-
validator = Csvlint::Validator.new(
|
86
|
-
|
87
|
-
expect(
|
88
|
-
expect(
|
89
|
-
expect( validator.errors[1].type).to eql(:undeclared_header)
|
90
|
-
expect( validator.info_messages.size ).to eql(0)
|
208
|
+
it "should evaluate the row as 'row 2' when stipulated" do
|
209
|
+
stream = "1,2,3\r\n"
|
210
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
|
211
|
+
validator.validate
|
212
|
+
expect(validator.valid?).to eql(true)
|
213
|
+
expect(validator.info_messages.size).to eql(0)
|
91
214
|
end
|
92
215
|
|
93
216
|
end
|
94
|
-
|
217
|
+
|
218
|
+
context "it returns the correct error from ERROR_MATCHES" do
|
219
|
+
|
220
|
+
it "checks for unclosed quotes" do
|
221
|
+
stream = "\"a,\"b\",\"c\"\n"
|
222
|
+
validator = Csvlint::Validator.new(StringIO.new(stream))
|
223
|
+
expect(validator.valid?).to eql(false)
|
224
|
+
expect(validator.errors.count).to eq(1)
|
225
|
+
expect(validator.errors.first.type).to eql(:unclosed_quote)
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
# TODO stray quotes is not covered in any spec in this library
|
230
|
+
# it "checks for stray quotes" do
|
231
|
+
# stream = "\"a\",“b“,\"c\"" "\r\n"
|
232
|
+
# validator = Csvlint::Validator.new(stream)
|
233
|
+
# validator.validate # implicitly invokes parse_contents(stream)
|
234
|
+
# expect(validator.valid?).to eql(false)
|
235
|
+
# expect(validator.errors.count).to eq(1)
|
236
|
+
# expect(validator.errors.first.type).to eql(:stray_quote)
|
237
|
+
# end
|
238
|
+
|
239
|
+
it "checks for whitespace" do
|
240
|
+
stream = " \"a\",\"b\",\"c\"\r\n"
|
241
|
+
validator = Csvlint::Validator.new(StringIO.new(stream))
|
242
|
+
|
243
|
+
expect(validator.valid?).to eql(false)
|
244
|
+
expect(validator.errors.count).to eq(1)
|
245
|
+
expect(validator.errors.first.type).to eql(:whitespace)
|
246
|
+
end
|
247
|
+
|
248
|
+
it "returns line break errors if incorrectly specified" do
|
249
|
+
# TODO the logic for catching this error message is very esoteric
|
250
|
+
stream = "\"a\",\"b\",\"c\"\n"
|
251
|
+
validator = Csvlint::Validator.new(StringIO.new(stream), {"lineTerminator" => "\r\n"})
|
252
|
+
expect(validator.valid?).to eql(false)
|
253
|
+
expect(validator.errors.count).to eq(1)
|
254
|
+
expect(validator.errors.first.type).to eql(:line_breaks)
|
255
|
+
end
|
256
|
+
|
257
|
+
end
|
258
|
+
|
95
259
|
context "when validating headers" do
|
96
|
-
|
260
|
+
|
261
|
+
it "should warn if column names aren't unique" do
|
97
262
|
data = StringIO.new( "minimum, minimum" )
|
98
263
|
validator = Csvlint::Validator.new(data)
|
99
264
|
validator.reset
|
@@ -106,210 +271,349 @@ describe Csvlint::Validator do
|
|
106
271
|
it "should warn if column names are blank" do
|
107
272
|
data = StringIO.new( "minimum," )
|
108
273
|
validator = Csvlint::Validator.new(data)
|
109
|
-
|
274
|
+
|
110
275
|
expect( validator.validate_header(["minimum", ""]) ).to eql(true)
|
111
276
|
expect( validator.warnings.size ).to eql(1)
|
112
277
|
expect( validator.warnings.first.type).to eql(:empty_column_name)
|
113
278
|
expect( validator.warnings.first.category).to eql(:schema)
|
114
279
|
end
|
115
|
-
|
280
|
+
|
116
281
|
it "should include info message about missing header when we have assumed a header" do
|
117
282
|
data = StringIO.new( "1,2,3\r\n" )
|
118
283
|
validator = Csvlint::Validator.new(data)
|
119
|
-
|
120
284
|
expect( validator.valid? ).to eql(true)
|
121
285
|
expect( validator.info_messages.size ).to eql(1)
|
122
286
|
expect( validator.info_messages.first.type).to eql(:assumed_header)
|
123
287
|
expect( validator.info_messages.first.category).to eql(:structure)
|
124
|
-
end
|
288
|
+
end
|
125
289
|
|
126
290
|
it "should not include info message about missing header when we are told about the header" do
|
127
291
|
data = StringIO.new( "1,2,3\r\n" )
|
128
|
-
validator = Csvlint::Validator.new(data, "header"=>false)
|
129
|
-
|
292
|
+
validator = Csvlint::Validator.new(data, "header" => false)
|
130
293
|
expect( validator.valid? ).to eql(true)
|
131
294
|
expect( validator.info_messages.size ).to eql(0)
|
132
295
|
end
|
133
|
-
|
134
|
-
it "should be an error if we have assumed a header, there is no dialect and there's no content-type" do
|
135
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
136
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
137
|
-
expect( validator.valid? ).to eql(false)
|
138
|
-
end
|
139
|
-
|
140
|
-
it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
|
141
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
142
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
143
|
-
expect( validator.valid? ).to eql(true)
|
144
|
-
end
|
145
|
-
|
146
|
-
it "should be valid if we have a dialect and the data is from the web" do
|
147
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
148
|
-
#header defaults to true in csv dialect, so this is valid
|
149
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv", {})
|
150
|
-
expect( validator.valid? ).to eql(true)
|
151
|
-
|
152
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
153
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
|
154
|
-
expect( validator.valid? ).to eql(true)
|
155
|
-
|
156
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
157
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
|
158
|
-
expect( validator.valid? ).to eql(true)
|
159
|
-
end
|
160
|
-
|
161
296
|
end
|
162
|
-
|
297
|
+
|
163
298
|
context "build_formats" do
|
164
|
-
|
299
|
+
|
165
300
|
{
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
301
|
+
:string => "foo",
|
302
|
+
:numeric => "1",
|
303
|
+
:uri => "http://www.example.com",
|
304
|
+
:dateTime_iso8601 => "2013-01-01T13:00:00Z",
|
305
|
+
:date_db => "2013-01-01",
|
306
|
+
:dateTime_hms => "13:00:00"
|
172
307
|
}.each do |type, content|
|
173
308
|
it "should return the format of #{type} correctly" do
|
174
309
|
row = [content]
|
175
|
-
|
310
|
+
|
176
311
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
177
312
|
validator.build_formats(row)
|
178
313
|
formats = validator.instance_variable_get("@formats")
|
179
|
-
|
314
|
+
|
180
315
|
expect(formats[0].keys.first).to eql type
|
181
316
|
end
|
182
317
|
end
|
183
|
-
|
318
|
+
|
184
319
|
it "treats floats and ints the same" do
|
185
320
|
row = ["12", "3.1476"]
|
186
|
-
|
321
|
+
|
187
322
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
188
323
|
validator.build_formats(row)
|
189
324
|
formats = validator.instance_variable_get("@formats")
|
190
|
-
|
325
|
+
|
191
326
|
expect(formats[0].keys.first).to eql :numeric
|
192
327
|
expect(formats[1].keys.first).to eql :numeric
|
193
328
|
end
|
194
|
-
|
329
|
+
|
195
330
|
it "should ignore blank arrays" do
|
196
331
|
row = []
|
197
|
-
|
332
|
+
|
198
333
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
199
334
|
validator.build_formats(row)
|
200
|
-
|
335
|
+
|
336
|
+
formats = validator.instance_variable_get("@formats")
|
201
337
|
expect(formats).to eql []
|
202
338
|
end
|
203
|
-
|
339
|
+
|
204
340
|
it "should work correctly for single columns" do
|
205
341
|
rows = [
|
206
342
|
["foo"],
|
207
343
|
["bar"],
|
208
344
|
["baz"]
|
209
|
-
|
210
|
-
|
345
|
+
]
|
346
|
+
|
211
347
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
212
|
-
|
348
|
+
|
213
349
|
rows.each_with_index do |row, i|
|
214
350
|
validator.build_formats(row)
|
215
351
|
end
|
216
|
-
|
352
|
+
|
217
353
|
formats = validator.instance_variable_get("@formats")
|
218
|
-
|
219
354
|
expect(formats).to eql [{:string => 3}]
|
220
355
|
end
|
221
|
-
|
356
|
+
|
222
357
|
it "should return formats correctly if a row is blank" do
|
223
358
|
rows = [
|
224
359
|
[],
|
225
|
-
["foo","1","$2345"]
|
226
|
-
|
227
|
-
|
360
|
+
["foo", "1", "$2345"]
|
361
|
+
]
|
362
|
+
|
228
363
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
229
|
-
|
364
|
+
|
230
365
|
rows.each_with_index do |row, i|
|
231
366
|
validator.build_formats(row)
|
232
367
|
end
|
233
|
-
|
234
|
-
formats = validator.instance_variable_get("@formats")
|
235
|
-
|
368
|
+
|
369
|
+
formats = validator.instance_variable_get("@formats")
|
370
|
+
|
236
371
|
expect(formats).to eql [
|
237
372
|
{:string => 1},
|
238
373
|
{:numeric => 1},
|
239
374
|
{:string => 1},
|
240
375
|
]
|
241
376
|
end
|
242
|
-
|
377
|
+
|
243
378
|
end
|
244
|
-
|
379
|
+
|
380
|
+
context "csv dialect" do
|
381
|
+
it "should provide sensible defaults for CSV parsing" do
|
382
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
383
|
+
opts = validator.instance_variable_get("@csv_options")
|
384
|
+
expect(opts).to include({
|
385
|
+
:col_sep => ",",
|
386
|
+
:row_sep => :auto,
|
387
|
+
:quote_char => '"',
|
388
|
+
:skip_blanks => false
|
389
|
+
})
|
390
|
+
end
|
391
|
+
|
392
|
+
it "should map CSV DDF to correct values" do
|
393
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
394
|
+
opts = validator.dialect_to_csv_options({
|
395
|
+
"lineTerminator" => "\n",
|
396
|
+
"delimiter" => "\t",
|
397
|
+
"quoteChar" => "'"
|
398
|
+
})
|
399
|
+
expect(opts).to include({
|
400
|
+
:col_sep => "\t",
|
401
|
+
:row_sep => "\n",
|
402
|
+
:quote_char => "'",
|
403
|
+
:skip_blanks => false
|
404
|
+
})
|
405
|
+
end
|
406
|
+
|
407
|
+
end
|
408
|
+
|
245
409
|
context "check_consistency" do
|
246
|
-
|
410
|
+
|
247
411
|
it "should return a warning if columns have inconsistent values" do
|
248
412
|
formats = [
|
249
413
|
{:string => 3},
|
250
414
|
{:string => 2, :numeric => 1},
|
251
415
|
{:numeric => 3},
|
252
|
-
|
253
|
-
|
416
|
+
]
|
417
|
+
|
254
418
|
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
255
419
|
validator.instance_variable_set("@formats", formats)
|
256
420
|
validator.check_consistency
|
257
|
-
|
258
|
-
warnings = validator.instance_variable_get("@warnings")
|
421
|
+
|
422
|
+
warnings = validator.instance_variable_get("@warnings")
|
259
423
|
warnings.delete_if { |h| h.type != :inconsistent_values }
|
260
|
-
|
424
|
+
|
261
425
|
expect(warnings.count).to eql 1
|
262
426
|
end
|
263
|
-
|
427
|
+
|
264
428
|
end
|
265
|
-
|
429
|
+
|
430
|
+
#TODO the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs
|
431
|
+
context "when detecting headers" do
|
432
|
+
it "should default to expecting a header" do
|
433
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
434
|
+
expect( validator.header? ).to eql(true)
|
435
|
+
end
|
436
|
+
|
437
|
+
it "should look in CSV options to detect header" do
|
438
|
+
opts = {
|
439
|
+
"header" => true
|
440
|
+
}
|
441
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
|
442
|
+
expect( validator.header? ).to eql(true)
|
443
|
+
opts = {
|
444
|
+
"header" => false
|
445
|
+
}
|
446
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
|
447
|
+
expect( validator.header? ).to eql(false)
|
448
|
+
end
|
449
|
+
|
450
|
+
it "should look in content-type for header=absent" do
|
451
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=absent"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
452
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
453
|
+
expect( validator.header? ).to eql(false)
|
454
|
+
expect( validator.errors.size ).to eql(0)
|
455
|
+
end
|
456
|
+
|
457
|
+
it "should look in content-type for header=present" do
|
458
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=present"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
459
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
460
|
+
expect( validator.header? ).to eql(true)
|
461
|
+
expect( validator.errors.size ).to eql(0)
|
462
|
+
end
|
463
|
+
|
464
|
+
it "assume header present if not specified in content type" do
|
465
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
466
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
467
|
+
expect( validator.header? ).to eql(true)
|
468
|
+
expect( validator.errors.size ).to eql(0)
|
469
|
+
expect( validator.info_messages.size ).to eql(1)
|
470
|
+
expect( validator.info_messages.first.type).to eql(:assumed_header)
|
471
|
+
end
|
472
|
+
|
473
|
+
it "give wrong content type error if content type is wrong" do
|
474
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/html"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
475
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
476
|
+
expect( validator.header? ).to eql(true)
|
477
|
+
expect( validator.errors.size ).to eql(1)
|
478
|
+
expect( validator.errors[0].type).to eql(:wrong_content_type)
|
479
|
+
end
|
480
|
+
|
481
|
+
end
|
482
|
+
|
483
|
+
context "when validating headers" do
|
484
|
+
it "should warn if column names aren't unique" do
|
485
|
+
data = StringIO.new( "minimum, minimum" )
|
486
|
+
validator = Csvlint::Validator.new(data)
|
487
|
+
expect( validator.warnings.size ).to eql(1)
|
488
|
+
expect( validator.warnings.first.type).to eql(:duplicate_column_name)
|
489
|
+
expect( validator.warnings.first.category).to eql(:schema)
|
490
|
+
end
|
491
|
+
|
492
|
+
it "should warn if column names are blank" do
|
493
|
+
data = StringIO.new( "minimum," )
|
494
|
+
validator = Csvlint::Validator.new(data)
|
495
|
+
|
496
|
+
expect( validator.validate_header(["minimum", ""]) ).to eql(true)
|
497
|
+
expect( validator.warnings.size ).to eql(1)
|
498
|
+
expect( validator.warnings.first.type).to eql(:empty_column_name)
|
499
|
+
expect( validator.warnings.first.category).to eql(:schema)
|
500
|
+
end
|
501
|
+
|
502
|
+
it "should include info message about missing header when we have assumed a header" do
|
503
|
+
data = StringIO.new( "1,2,3\r\n" )
|
504
|
+
validator = Csvlint::Validator.new(data)
|
505
|
+
|
506
|
+
expect( validator.valid? ).to eql(true)
|
507
|
+
expect( validator.info_messages.size ).to eql(1)
|
508
|
+
expect( validator.info_messages.first.type).to eql(:assumed_header)
|
509
|
+
expect( validator.info_messages.first.category).to eql(:structure)
|
510
|
+
end
|
511
|
+
|
512
|
+
it "should not include info message about missing header when we are told about the header" do
|
513
|
+
data = StringIO.new( "1,2,3\r\n" )
|
514
|
+
validator = Csvlint::Validator.new(data, "header"=>false)
|
515
|
+
expect( validator.valid? ).to eql(true)
|
516
|
+
expect( validator.info_messages.size ).to eql(0)
|
517
|
+
end
|
518
|
+
|
519
|
+
it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
|
520
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
521
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
522
|
+
expect( validator.valid? ).to eql(true)
|
523
|
+
end
|
524
|
+
|
525
|
+
it "should be valid if we have a dialect and the data is from the web" do
|
526
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
527
|
+
#header defaults to true in csv dialect, so this is valid
|
528
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", {})
|
529
|
+
expect( validator.valid? ).to eql(true)
|
530
|
+
|
531
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
532
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
|
533
|
+
expect( validator.valid? ).to eql(true)
|
534
|
+
|
535
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
536
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
|
537
|
+
expect( validator.valid? ).to eql(true)
|
538
|
+
end
|
539
|
+
|
540
|
+
end
|
541
|
+
|
266
542
|
context "accessing metadata" do
|
267
|
-
|
543
|
+
|
268
544
|
before :all do
|
269
545
|
stub_request(:get, "http://example.com/crlf.csv").to_return(:status => 200, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','windows-line-endings.csv')))
|
270
546
|
stub_request(:get, "http://example.com/crlf.csv-metadata.json").to_return(:status => 404)
|
271
547
|
end
|
272
|
-
|
273
|
-
it "can get line break symbol" do
|
274
548
|
|
549
|
+
it "can get line break symbol" do
|
275
550
|
validator = Csvlint::Validator.new("http://example.com/crlf.csv")
|
276
551
|
expect(validator.line_breaks).to eql "\r\n"
|
277
|
-
|
278
552
|
end
|
279
|
-
|
553
|
+
|
280
554
|
end
|
281
|
-
|
555
|
+
|
282
556
|
it "should give access to the complete CSV data file" do
|
283
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
284
|
-
:headers=>{"Content-Type" => "text/csv; header=present"},
|
557
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
558
|
+
:headers=>{"Content-Type" => "text/csv; header=present"},
|
285
559
|
:body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
286
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
560
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
287
561
|
expect( validator.valid? ).to eql(true)
|
288
562
|
data = validator.data
|
289
|
-
|
563
|
+
|
564
|
+
expect( data.count ).to eql 3
|
290
565
|
expect( data[0] ).to eql ['Foo','Bar','Baz']
|
291
566
|
expect( data[2] ).to eql ['3','2','1']
|
292
567
|
end
|
293
|
-
|
568
|
+
|
569
|
+
it "should count the total number of rows read" do
|
570
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
571
|
+
:headers=>{"Content-Type" => "text/csv; header=present"},
|
572
|
+
:body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
573
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv")
|
574
|
+
expect(validator.row_count).to eq(3)
|
575
|
+
end
|
576
|
+
|
294
577
|
it "should limit number of lines read" do
|
295
|
-
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
296
|
-
:headers=>{"Content-Type" => "text/csv; header=present"},
|
578
|
+
stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
|
579
|
+
:headers=>{"Content-Type" => "text/csv; header=present"},
|
297
580
|
:body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
298
|
-
validator = Csvlint::Validator.new("http://example.com/example.csv",
|
581
|
+
validator = Csvlint::Validator.new("http://example.com/example.csv", {}, nil, limit_lines: 2)
|
299
582
|
expect( validator.valid? ).to eql(true)
|
300
583
|
data = validator.data
|
301
|
-
expect( data.count ).to eql 2
|
584
|
+
expect( data.count ).to eql 2
|
302
585
|
expect( data[0] ).to eql ['Foo','Bar','Baz']
|
303
586
|
end
|
304
|
-
|
305
|
-
it "should follow redirects to SSL" do
|
306
|
-
stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
|
307
|
-
stub_request(:get, "http://example.com/redirect-metadata.json").to_return(:status => 404)
|
308
|
-
stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
|
309
|
-
:headers=>{"Content-Type" => "text/csv; header=present"},
|
310
|
-
:body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
311
587
|
|
312
|
-
|
313
|
-
|
588
|
+
context "with a lambda" do
|
589
|
+
|
590
|
+
it "should call a lambda for each line" do
|
591
|
+
@count = 0
|
592
|
+
mylambda = lambda { |row| @count = @count + 1 }
|
593
|
+
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
|
594
|
+
expect(@count).to eq(3)
|
595
|
+
end
|
596
|
+
|
597
|
+
it "reports back the status of each line" do
|
598
|
+
@results = []
|
599
|
+
mylambda = lambda { |row| @results << row.current_line }
|
600
|
+
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
|
601
|
+
expect(@results.count).to eq(3)
|
602
|
+
expect(@results[0]).to eq(1)
|
603
|
+
expect(@results[1]).to eq(2)
|
604
|
+
expect(@results[2]).to eq(3)
|
605
|
+
end
|
606
|
+
|
314
607
|
end
|
315
|
-
|
608
|
+
|
609
|
+
# Commented out because there is currently no way to mock redirects with Typhoeus and WebMock - see https://github.com/bblimke/webmock/issues/237
|
610
|
+
# it "should follow redirects to SSL" do
|
611
|
+
# stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
|
612
|
+
# stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
|
613
|
+
# :headers=>{"Content-Type" => "text/csv; header=present"},
|
614
|
+
# :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
|
615
|
+
#
|
616
|
+
# validator = Csvlint::Validator.new("http://example.com/redirect")
|
617
|
+
# expect( validator.valid? ).to eql(true)
|
618
|
+
# end
|
619
|
+
end
|