csvlint 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
1
  module Csvlint
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
@@ -300,6 +300,20 @@ describe Csvlint::Csvw::NumberFormat do
300
300
  expect(format.parse("12.345,67,8")).to eql(nil)
301
301
  end
302
302
 
303
+ it "should parse numbers that match 0.###,### correctly" do
304
+ format = Csvlint::Csvw::NumberFormat.new("0.###,###")
305
+ expect(format.parse("1")).to eq(1)
306
+ expect(format.parse("12.3")).to eql(12.3)
307
+ expect(format.parse("12.34")).to eql(12.34)
308
+ expect(format.parse("12.345")).to eq(12.345)
309
+ expect(format.parse("12.3456")).to eql(nil)
310
+ expect(format.parse("12.345,6")).to eql(12.3456)
311
+ expect(format.parse("12.34,56")).to eql(nil)
312
+ expect(format.parse("12.345,67")).to eq(12.34567)
313
+ expect(format.parse("12.345,678")).to eql(12.345678)
314
+ expect(format.parse("12.345,67,8")).to eql(nil)
315
+ end
316
+
303
317
  it "should parse numbers that match 0.000,### correctly" do
304
318
  format = Csvlint::Csvw::NumberFormat.new("0.000,###")
305
319
  expect(format.parse("1")).to eq(nil)
@@ -1,14 +1,101 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Csvlint::Validator do
4
-
4
+
5
5
  before do
6
6
  stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :body => "")
7
7
  stub_request(:get, "http://example.com/.well-known/csvm").to_return(:status => 404)
8
8
  stub_request(:get, "http://example.com/example.csv-metadata.json").to_return(:status => 404)
9
9
  stub_request(:get, "http://example.com/csv-metadata.json").to_return(:status => 404)
10
10
  end
11
-
11
+
12
+ it "should validate from a URL" do
13
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
14
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
15
+
16
+ expect(validator.valid?).to eql(true)
17
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
18
+ expect(validator.instance_variable_get("@col_counts").count).to eql(3)
19
+ expect(validator.data.size).to eql(3)
20
+ end
21
+
22
+ it "should validate from a file path" do
23
+ validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
24
+
25
+ expect(validator.valid?).to eql(true)
26
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
27
+ expect(validator.instance_variable_get("@col_counts").count).to eql(3)
28
+ expect(validator.data.size).to eql(3)
29
+ end
30
+
31
+ context "multi line CSV validation with included schema" do
32
+
33
+ end
34
+
35
+ context "single line row validation with included schema" do
36
+
37
+ end
38
+
39
+ context "validation with multiple lines: " do
40
+
41
+ # TODO multiple lines permits testing of warnings
42
+ # TODO need more assertions in each test IE @formats
43
+ # TODO the phrasing of col_counts if only consulting specs might be confusing
44
+ # TODO ^-> col_counts and data.size should be equivalent, but only data is populated outside of if row.nil?
45
+ # TODO ^- -> and its less the size of col_counts than the homogeneity of its contents which is important
46
+
47
+ it ".each() -> parse_contents method validates a well formed CSV" do
48
+ # when invoking parse contents
49
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
50
+
51
+ validator = Csvlint::Validator.new(data)
52
+
53
+ expect(validator.valid?).to eql(true)
54
+ # TODO would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total
55
+ # TODO in its formats object but is provided with 5 rows (with one nil row) [uses validation_warnings_steps.rb]
56
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
57
+ expect(validator.instance_variable_get("@col_counts").count).to eql(4)
58
+ expect(validator.data.size).to eql(4)
59
+
60
+ end
61
+
62
+ it ".each() -> `parse_contents` parses malformed CSV and catches unclosed quote" do
63
+ # doesn't build warnings because check_consistency isn't invoked
64
+ # TODO below is trailing whitespace but is interpreted as an unclosed quote
65
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
66
+
67
+ validator = Csvlint::Validator.new(data)
68
+
69
+ expect(validator.valid?).to eql(false)
70
+ expect(validator.errors.first.type).to eql(:unclosed_quote)
71
+ expect(validator.errors.count).to eql(1)
72
+ end
73
+
74
+ it ".each() -> `parse_contents` parses malformed CSV and catches whitespace and edge case" do
75
+ # when this data gets passed the header it rescues a whitespace error, resulting in the header row being discarded
76
+ # TODO - check if this is an edge case, currently passing because it requires advice on how to specify
77
+ data = StringIO.new(" \"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"Foo\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
78
+
79
+ validator = Csvlint::Validator.new(data)
80
+
81
+ expect(validator.valid?).to eql(false)
82
+ expect(validator.errors.first.type).to eql(:whitespace)
83
+ expect(validator.errors.count).to eql(2)
84
+ end
85
+
86
+ it "handles line breaks within a cell" do
87
+ data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"e\",\"this is\r\nvalid\"\r\n\"a\",\"b\",\"c\"")
88
+ validator = Csvlint::Validator.new(data)
89
+ expect(validator.valid?).to eql(true)
90
+ end
91
+
92
+ it "handles multiple line breaks within a cell" do
93
+ data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"this is\r\n valid\",\"as is this\r\n too\"")
94
+ validator = Csvlint::Validator.new(data)
95
+ expect(validator.valid?).to eql(true)
96
+ end
97
+ end
98
+
12
99
  context "csv dialect" do
13
100
  it "should provide sensible defaults for CSV parsing" do
14
101
  validator = Csvlint::Validator.new("http://example.com/example.csv")
@@ -20,7 +107,7 @@ describe Csvlint::Validator do
20
107
  :skip_blanks => false
21
108
  })
22
109
  end
23
-
110
+
24
111
  it "should map CSV DDF to correct values" do
25
112
  validator = Csvlint::Validator.new("http://example.com/example.csv")
26
113
  opts = validator.dialect_to_csv_options( {
@@ -33,67 +120,145 @@ describe Csvlint::Validator do
33
120
  :row_sep => "\n",
34
121
  :quote_char => "'",
35
122
  :skip_blanks => false
36
- })
123
+ })
124
+ end
125
+
126
+ it ".each() -> `validate` to pass input in streaming fashion" do
127
+ # warnings are built when validate is used to call all three methods
128
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
129
+ validator = Csvlint::Validator.new(data)
130
+
131
+ expect(validator.valid?).to eql(true)
132
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
133
+ expect(validator.instance_variable_get("@col_counts").count).to eql(4)
134
+ expect(validator.data.size).to eql(4)
135
+ expect(validator.info_messages.count).to eql(1)
37
136
  end
38
-
137
+
138
+ it ".each() -> `validate` parses malformed CSV, populates errors, warnings & info_msgs,invokes finish()" do
139
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"two\",\"3\"\r\n\"3\",\"2\", \"1\"")
140
+
141
+ validator = Csvlint::Validator.new(data)
142
+
143
+ expect(validator.valid?).to eql(false)
144
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
145
+ expect(validator.instance_variable_get("@col_counts").count).to eql(4)
146
+ expect(validator.data.size).to eql(5)
147
+ expect(validator.info_messages.count).to eql(1)
148
+ expect(validator.errors.count).to eql(1)
149
+ expect(validator.errors.first.type).to eql(:whitespace)
150
+ expect(validator.warnings.count).to eql(1)
151
+ expect(validator.warnings.first.type).to eql(:inconsistent_values)
152
+ end
153
+
154
+ it "File.open.each_line -> `validate` passes a valid csv" do
155
+ filename = 'valid_many_rows.csv'
156
+ file = File.join(File.expand_path(Dir.pwd), "features", "fixtures", filename)
157
+ validator = Csvlint::Validator.new(File.new(file))
158
+
159
+ expect(validator.valid?).to eql(true)
160
+ expect(validator.info_messages.size).to eql(1)
161
+ expect(validator.info_messages.first.type).to eql(:assumed_header)
162
+ expect(validator.info_messages.first.category).to eql(:structure)
163
+ end
164
+
39
165
  end
40
-
41
- context "when detecting headers" do
42
- it "should default to expecting a header" do
43
- validator = Csvlint::Validator.new("http://example.com/example.csv")
44
- expect( validator.header? ).to eql(true)
166
+
167
+ context "with a single row" do
168
+
169
+ it "validates correctly" do
170
+ stream = "\"a\",\"b\",\"c\"\r\n"
171
+ validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
172
+ expect(validator.valid?).to eql(true)
45
173
  end
46
-
47
- it "should look in CSV options to detect header" do
48
- opts = {
49
- "header" => true
50
- }
51
- validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
52
- expect( validator.header? ).to eql(true)
53
- opts = {
54
- "header" => false
55
- }
56
- validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
57
- expect( validator.header? ).to eql(false)
174
+
175
+ it "checks for non rfc line breaks" do
176
+ stream = "\"a\",\"b\",\"c\"\n"
177
+ validator = Csvlint::Validator.new(StringIO.new(stream), {"header" => false})
178
+ expect(validator.valid?).to eql(true)
179
+ expect(validator.info_messages.count).to eq(1)
180
+ expect(validator.info_messages.first.type).to eql(:nonrfc_line_breaks)
58
181
  end
59
182
 
60
- it "should look in content-type for header=absent" do
61
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=absent"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
62
- validator = Csvlint::Validator.new("http://example.com/example.csv")
63
- expect( validator.header? ).to eql(false)
64
- expect( validator.errors.size ).to eql(0)
183
+ it "checks for blank rows" do
184
+ data = StringIO.new('"","",')
185
+ validator = Csvlint::Validator.new(data, "header" => false)
186
+
187
+ expect(validator.valid?).to eql(false)
188
+ expect(validator.errors.count).to eq(1)
189
+ expect(validator.errors.first.type).to eql(:blank_rows)
65
190
  end
66
191
 
67
- it "should look in content-type for header=present" do
68
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=present"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
69
- validator = Csvlint::Validator.new("http://example.com/example.csv")
70
- expect( validator.header? ).to eql(true)
71
- expect( validator.errors.size ).to eql(0)
192
+ it "returns the content of the string with the error" do
193
+ stream = "\"\",\"\",\"\"\r\n"
194
+ validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
195
+ expect(validator.errors.first.content).to eql("\"\",\"\",\"\"\r\n")
72
196
  end
73
197
 
74
- it "assume header present if not specified in content type" do
75
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
76
- validator = Csvlint::Validator.new("http://example.com/example.csv")
77
- expect( validator.header? ).to eql(true)
78
- expect( validator.errors.size ).to eql(0)
198
+ it "should presume a header unless told otherwise" do
199
+ stream = "1,2,3\r\n"
200
+ validator = Csvlint::Validator.new(StringIO.new(stream))
201
+
202
+ expect( validator.valid? ).to eql(true)
79
203
  expect( validator.info_messages.size ).to eql(1)
80
204
  expect( validator.info_messages.first.type).to eql(:assumed_header)
205
+ expect( validator.info_messages.first.category).to eql(:structure)
81
206
  end
82
207
 
83
- it "give undeclared header error if content type is wrong" do
84
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/html"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
85
- validator = Csvlint::Validator.new("http://example.com/example.csv")
86
- expect( validator.header? ).to eql(true)
87
- expect( validator.errors.size ).to eql(2)
88
- expect( validator.errors[0].type).to eql(:wrong_content_type)
89
- expect( validator.errors[1].type).to eql(:undeclared_header)
90
- expect( validator.info_messages.size ).to eql(0)
208
+ it "should evaluate the row as 'row 2' when stipulated" do
209
+ stream = "1,2,3\r\n"
210
+ validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
211
+ validator.validate
212
+ expect(validator.valid?).to eql(true)
213
+ expect(validator.info_messages.size).to eql(0)
91
214
  end
92
215
 
93
216
  end
94
-
217
+
218
+ context "it returns the correct error from ERROR_MATCHES" do
219
+
220
+ it "checks for unclosed quotes" do
221
+ stream = "\"a,\"b\",\"c\"\n"
222
+ validator = Csvlint::Validator.new(StringIO.new(stream))
223
+ expect(validator.valid?).to eql(false)
224
+ expect(validator.errors.count).to eq(1)
225
+ expect(validator.errors.first.type).to eql(:unclosed_quote)
226
+ end
227
+
228
+
229
+ # TODO stray quotes is not covered in any spec in this library
230
+ # it "checks for stray quotes" do
231
+ # stream = "\"a\",“b“,\"c\"" "\r\n"
232
+ # validator = Csvlint::Validator.new(stream)
233
+ # validator.validate # implicitly invokes parse_contents(stream)
234
+ # expect(validator.valid?).to eql(false)
235
+ # expect(validator.errors.count).to eq(1)
236
+ # expect(validator.errors.first.type).to eql(:stray_quote)
237
+ # end
238
+
239
+ it "checks for whitespace" do
240
+ stream = " \"a\",\"b\",\"c\"\r\n"
241
+ validator = Csvlint::Validator.new(StringIO.new(stream))
242
+
243
+ expect(validator.valid?).to eql(false)
244
+ expect(validator.errors.count).to eq(1)
245
+ expect(validator.errors.first.type).to eql(:whitespace)
246
+ end
247
+
248
+ it "returns line break errors if incorrectly specified" do
249
+ # TODO the logic for catching this error message is very esoteric
250
+ stream = "\"a\",\"b\",\"c\"\n"
251
+ validator = Csvlint::Validator.new(StringIO.new(stream), {"lineTerminator" => "\r\n"})
252
+ expect(validator.valid?).to eql(false)
253
+ expect(validator.errors.count).to eq(1)
254
+ expect(validator.errors.first.type).to eql(:line_breaks)
255
+ end
256
+
257
+ end
258
+
95
259
  context "when validating headers" do
96
- it "should warn if column names aren't unique" do
260
+
261
+ it "should warn if column names aren't unique" do
97
262
  data = StringIO.new( "minimum, minimum" )
98
263
  validator = Csvlint::Validator.new(data)
99
264
  validator.reset
@@ -106,210 +271,349 @@ describe Csvlint::Validator do
106
271
  it "should warn if column names are blank" do
107
272
  data = StringIO.new( "minimum," )
108
273
  validator = Csvlint::Validator.new(data)
109
-
274
+
110
275
  expect( validator.validate_header(["minimum", ""]) ).to eql(true)
111
276
  expect( validator.warnings.size ).to eql(1)
112
277
  expect( validator.warnings.first.type).to eql(:empty_column_name)
113
278
  expect( validator.warnings.first.category).to eql(:schema)
114
279
  end
115
-
280
+
116
281
  it "should include info message about missing header when we have assumed a header" do
117
282
  data = StringIO.new( "1,2,3\r\n" )
118
283
  validator = Csvlint::Validator.new(data)
119
-
120
284
  expect( validator.valid? ).to eql(true)
121
285
  expect( validator.info_messages.size ).to eql(1)
122
286
  expect( validator.info_messages.first.type).to eql(:assumed_header)
123
287
  expect( validator.info_messages.first.category).to eql(:structure)
124
- end
288
+ end
125
289
 
126
290
  it "should not include info message about missing header when we are told about the header" do
127
291
  data = StringIO.new( "1,2,3\r\n" )
128
- validator = Csvlint::Validator.new(data, "header"=>false)
129
-
292
+ validator = Csvlint::Validator.new(data, "header" => false)
130
293
  expect( validator.valid? ).to eql(true)
131
294
  expect( validator.info_messages.size ).to eql(0)
132
295
  end
133
-
134
- it "should be an error if we have assumed a header, there is no dialect and there's no content-type" do
135
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
136
- validator = Csvlint::Validator.new("http://example.com/example.csv")
137
- expect( validator.valid? ).to eql(false)
138
- end
139
-
140
- it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
141
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
142
- validator = Csvlint::Validator.new("http://example.com/example.csv")
143
- expect( validator.valid? ).to eql(true)
144
- end
145
-
146
- it "should be valid if we have a dialect and the data is from the web" do
147
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
148
- #header defaults to true in csv dialect, so this is valid
149
- validator = Csvlint::Validator.new("http://example.com/example.csv", {})
150
- expect( validator.valid? ).to eql(true)
151
-
152
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
153
- validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
154
- expect( validator.valid? ).to eql(true)
155
-
156
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
157
- validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
158
- expect( validator.valid? ).to eql(true)
159
- end
160
-
161
296
  end
162
-
297
+
163
298
  context "build_formats" do
164
-
299
+
165
300
  {
166
- :string => "foo",
167
- :numeric => "1",
168
- :uri => "http://www.example.com",
169
- :dateTime_iso8601 => "2013-01-01T13:00:00Z",
170
- :date_db => "2013-01-01",
171
- :dateTime_hms => "13:00:00"
301
+ :string => "foo",
302
+ :numeric => "1",
303
+ :uri => "http://www.example.com",
304
+ :dateTime_iso8601 => "2013-01-01T13:00:00Z",
305
+ :date_db => "2013-01-01",
306
+ :dateTime_hms => "13:00:00"
172
307
  }.each do |type, content|
173
308
  it "should return the format of #{type} correctly" do
174
309
  row = [content]
175
-
310
+
176
311
  validator = Csvlint::Validator.new("http://example.com/example.csv")
177
312
  validator.build_formats(row)
178
313
  formats = validator.instance_variable_get("@formats")
179
-
314
+
180
315
  expect(formats[0].keys.first).to eql type
181
316
  end
182
317
  end
183
-
318
+
184
319
  it "treats floats and ints the same" do
185
320
  row = ["12", "3.1476"]
186
-
321
+
187
322
  validator = Csvlint::Validator.new("http://example.com/example.csv")
188
323
  validator.build_formats(row)
189
324
  formats = validator.instance_variable_get("@formats")
190
-
325
+
191
326
  expect(formats[0].keys.first).to eql :numeric
192
327
  expect(formats[1].keys.first).to eql :numeric
193
328
  end
194
-
329
+
195
330
  it "should ignore blank arrays" do
196
331
  row = []
197
-
332
+
198
333
  validator = Csvlint::Validator.new("http://example.com/example.csv")
199
334
  validator.build_formats(row)
200
- formats = validator.instance_variable_get("@formats")
335
+
336
+ formats = validator.instance_variable_get("@formats")
201
337
  expect(formats).to eql []
202
338
  end
203
-
339
+
204
340
  it "should work correctly for single columns" do
205
341
  rows = [
206
342
  ["foo"],
207
343
  ["bar"],
208
344
  ["baz"]
209
- ]
210
-
345
+ ]
346
+
211
347
  validator = Csvlint::Validator.new("http://example.com/example.csv")
212
-
348
+
213
349
  rows.each_with_index do |row, i|
214
350
  validator.build_formats(row)
215
351
  end
216
-
352
+
217
353
  formats = validator.instance_variable_get("@formats")
218
-
219
354
  expect(formats).to eql [{:string => 3}]
220
355
  end
221
-
356
+
222
357
  it "should return formats correctly if a row is blank" do
223
358
  rows = [
224
359
  [],
225
- ["foo","1","$2345"]
226
- ]
227
-
360
+ ["foo", "1", "$2345"]
361
+ ]
362
+
228
363
  validator = Csvlint::Validator.new("http://example.com/example.csv")
229
-
364
+
230
365
  rows.each_with_index do |row, i|
231
366
  validator.build_formats(row)
232
367
  end
233
-
234
- formats = validator.instance_variable_get("@formats")
235
-
368
+
369
+ formats = validator.instance_variable_get("@formats")
370
+
236
371
  expect(formats).to eql [
237
372
  {:string => 1},
238
373
  {:numeric => 1},
239
374
  {:string => 1},
240
375
  ]
241
376
  end
242
-
377
+
243
378
  end
244
-
379
+
380
+ context "csv dialect" do
381
+ it "should provide sensible defaults for CSV parsing" do
382
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
383
+ opts = validator.instance_variable_get("@csv_options")
384
+ expect(opts).to include({
385
+ :col_sep => ",",
386
+ :row_sep => :auto,
387
+ :quote_char => '"',
388
+ :skip_blanks => false
389
+ })
390
+ end
391
+
392
+ it "should map CSV DDF to correct values" do
393
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
394
+ opts = validator.dialect_to_csv_options({
395
+ "lineTerminator" => "\n",
396
+ "delimiter" => "\t",
397
+ "quoteChar" => "'"
398
+ })
399
+ expect(opts).to include({
400
+ :col_sep => "\t",
401
+ :row_sep => "\n",
402
+ :quote_char => "'",
403
+ :skip_blanks => false
404
+ })
405
+ end
406
+
407
+ end
408
+
245
409
  context "check_consistency" do
246
-
410
+
247
411
  it "should return a warning if columns have inconsistent values" do
248
412
  formats = [
249
413
  {:string => 3},
250
414
  {:string => 2, :numeric => 1},
251
415
  {:numeric => 3},
252
- ]
253
-
416
+ ]
417
+
254
418
  validator = Csvlint::Validator.new("http://example.com/example.csv")
255
419
  validator.instance_variable_set("@formats", formats)
256
420
  validator.check_consistency
257
-
258
- warnings = validator.instance_variable_get("@warnings")
421
+
422
+ warnings = validator.instance_variable_get("@warnings")
259
423
  warnings.delete_if { |h| h.type != :inconsistent_values }
260
-
424
+
261
425
  expect(warnings.count).to eql 1
262
426
  end
263
-
427
+
264
428
  end
265
-
429
+
430
+ #TODO the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs
431
+ context "when detecting headers" do
432
+ it "should default to expecting a header" do
433
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
434
+ expect( validator.header? ).to eql(true)
435
+ end
436
+
437
+ it "should look in CSV options to detect header" do
438
+ opts = {
439
+ "header" => true
440
+ }
441
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
442
+ expect( validator.header? ).to eql(true)
443
+ opts = {
444
+ "header" => false
445
+ }
446
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
447
+ expect( validator.header? ).to eql(false)
448
+ end
449
+
450
+ it "should look in content-type for header=absent" do
451
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=absent"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
452
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
453
+ expect( validator.header? ).to eql(false)
454
+ expect( validator.errors.size ).to eql(0)
455
+ end
456
+
457
+ it "should look in content-type for header=present" do
458
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=present"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
459
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
460
+ expect( validator.header? ).to eql(true)
461
+ expect( validator.errors.size ).to eql(0)
462
+ end
463
+
464
+ it "assume header present if not specified in content type" do
465
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
466
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
467
+ expect( validator.header? ).to eql(true)
468
+ expect( validator.errors.size ).to eql(0)
469
+ expect( validator.info_messages.size ).to eql(1)
470
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
471
+ end
472
+
473
+ it "give wrong content type error if content type is wrong" do
474
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/html"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
475
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
476
+ expect( validator.header? ).to eql(true)
477
+ expect( validator.errors.size ).to eql(1)
478
+ expect( validator.errors[0].type).to eql(:wrong_content_type)
479
+ end
480
+
481
+ end
482
+
483
+ context "when validating headers" do
484
+ it "should warn if column names aren't unique" do
485
+ data = StringIO.new( "minimum, minimum" )
486
+ validator = Csvlint::Validator.new(data)
487
+ expect( validator.warnings.size ).to eql(1)
488
+ expect( validator.warnings.first.type).to eql(:duplicate_column_name)
489
+ expect( validator.warnings.first.category).to eql(:schema)
490
+ end
491
+
492
+ it "should warn if column names are blank" do
493
+ data = StringIO.new( "minimum," )
494
+ validator = Csvlint::Validator.new(data)
495
+
496
+ expect( validator.validate_header(["minimum", ""]) ).to eql(true)
497
+ expect( validator.warnings.size ).to eql(1)
498
+ expect( validator.warnings.first.type).to eql(:empty_column_name)
499
+ expect( validator.warnings.first.category).to eql(:schema)
500
+ end
501
+
502
+ it "should include info message about missing header when we have assumed a header" do
503
+ data = StringIO.new( "1,2,3\r\n" )
504
+ validator = Csvlint::Validator.new(data)
505
+
506
+ expect( validator.valid? ).to eql(true)
507
+ expect( validator.info_messages.size ).to eql(1)
508
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
509
+ expect( validator.info_messages.first.category).to eql(:structure)
510
+ end
511
+
512
+ it "should not include info message about missing header when we are told about the header" do
513
+ data = StringIO.new( "1,2,3\r\n" )
514
+ validator = Csvlint::Validator.new(data, "header"=>false)
515
+ expect( validator.valid? ).to eql(true)
516
+ expect( validator.info_messages.size ).to eql(0)
517
+ end
518
+
519
+ it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
520
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
521
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
522
+ expect( validator.valid? ).to eql(true)
523
+ end
524
+
525
+ it "should be valid if we have a dialect and the data is from the web" do
526
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
527
+ #header defaults to true in csv dialect, so this is valid
528
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {})
529
+ expect( validator.valid? ).to eql(true)
530
+
531
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
532
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
533
+ expect( validator.valid? ).to eql(true)
534
+
535
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
536
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
537
+ expect( validator.valid? ).to eql(true)
538
+ end
539
+
540
+ end
541
+
266
542
  context "accessing metadata" do
267
-
543
+
268
544
  before :all do
269
545
  stub_request(:get, "http://example.com/crlf.csv").to_return(:status => 200, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','windows-line-endings.csv')))
270
546
  stub_request(:get, "http://example.com/crlf.csv-metadata.json").to_return(:status => 404)
271
547
  end
272
-
273
- it "can get line break symbol" do
274
548
 
549
+ it "can get line break symbol" do
275
550
  validator = Csvlint::Validator.new("http://example.com/crlf.csv")
276
551
  expect(validator.line_breaks).to eql "\r\n"
277
-
278
552
  end
279
-
553
+
280
554
  end
281
-
555
+
282
556
  it "should give access to the complete CSV data file" do
283
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
284
- :headers=>{"Content-Type" => "text/csv; header=present"},
557
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
558
+ :headers=>{"Content-Type" => "text/csv; header=present"},
285
559
  :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
286
- validator = Csvlint::Validator.new("http://example.com/example.csv")
560
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
287
561
  expect( validator.valid? ).to eql(true)
288
562
  data = validator.data
289
- expect( data.count ).to eql 4
563
+
564
+ expect( data.count ).to eql 3
290
565
  expect( data[0] ).to eql ['Foo','Bar','Baz']
291
566
  expect( data[2] ).to eql ['3','2','1']
292
567
  end
293
-
568
+
569
+ it "should count the total number of rows read" do
570
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
571
+ :headers=>{"Content-Type" => "text/csv; header=present"},
572
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
573
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
574
+ expect(validator.row_count).to eq(3)
575
+ end
576
+
294
577
  it "should limit number of lines read" do
295
- stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
296
- :headers=>{"Content-Type" => "text/csv; header=present"},
578
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
579
+ :headers=>{"Content-Type" => "text/csv; header=present"},
297
580
  :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
298
- validator = Csvlint::Validator.new("http://example.com/example.csv", nil, nil, limit_lines: 2)
581
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {}, nil, limit_lines: 2)
299
582
  expect( validator.valid? ).to eql(true)
300
583
  data = validator.data
301
- expect( data.count ).to eql 2
584
+ expect( data.count ).to eql 2
302
585
  expect( data[0] ).to eql ['Foo','Bar','Baz']
303
586
  end
304
-
305
- it "should follow redirects to SSL" do
306
- stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
307
- stub_request(:get, "http://example.com/redirect-metadata.json").to_return(:status => 404)
308
- stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
309
- :headers=>{"Content-Type" => "text/csv; header=present"},
310
- :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
311
587
 
312
- validator = Csvlint::Validator.new("http://example.com/redirect")
313
- expect( validator.valid? ).to eql(true)
588
+ context "with a lambda" do
589
+
590
+ it "should call a lambda for each line" do
591
+ @count = 0
592
+ mylambda = lambda { |row| @count = @count + 1 }
593
+ validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
594
+ expect(@count).to eq(3)
595
+ end
596
+
597
+ it "reports back the status of each line" do
598
+ @results = []
599
+ mylambda = lambda { |row| @results << row.current_line }
600
+ validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
601
+ expect(@results.count).to eq(3)
602
+ expect(@results[0]).to eq(1)
603
+ expect(@results[1]).to eq(2)
604
+ expect(@results[2]).to eq(3)
605
+ end
606
+
314
607
  end
315
- end
608
+
609
+ # Commented out because there is currently no way to mock redirects with Typhoeus and WebMock - see https://github.com/bblimke/webmock/issues/237
610
+ # it "should follow redirects to SSL" do
611
+ # stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
612
+ # stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
613
+ # :headers=>{"Content-Type" => "text/csv; header=present"},
614
+ # :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
615
+ #
616
+ # validator = Csvlint::Validator.new("http://example.com/redirect")
617
+ # expect( validator.valid? ).to eql(true)
618
+ # end
619
+ end