csvlint 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitignore +22 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +214 -0
  8. data/Rakefile +17 -0
  9. data/bin/create_schema +32 -0
  10. data/bin/csvlint +52 -0
  11. data/csvlint.gemspec +39 -0
  12. data/features/check_format.feature +46 -0
  13. data/features/csv_options.feature +35 -0
  14. data/features/fixtures/cr-line-endings.csv +1 -0
  15. data/features/fixtures/crlf-line-endings.csv +3 -0
  16. data/features/fixtures/inconsistent-line-endings.csv +2 -0
  17. data/features/fixtures/invalid-byte-sequence.csv +24 -0
  18. data/features/fixtures/lf-line-endings.csv +3 -0
  19. data/features/fixtures/spreadsheet.xls +0 -0
  20. data/features/fixtures/title-row.csv +4 -0
  21. data/features/fixtures/valid.csv +3 -0
  22. data/features/fixtures/windows-line-endings.csv +2 -0
  23. data/features/information.feature +22 -0
  24. data/features/parse_csv.feature +90 -0
  25. data/features/schema_validation.feature +63 -0
  26. data/features/sources.feature +18 -0
  27. data/features/step_definitions/csv_options_steps.rb +19 -0
  28. data/features/step_definitions/information_steps.rb +13 -0
  29. data/features/step_definitions/parse_csv_steps.rb +30 -0
  30. data/features/step_definitions/schema_validation_steps.rb +7 -0
  31. data/features/step_definitions/sources_steps.rb +7 -0
  32. data/features/step_definitions/validation_errors_steps.rb +43 -0
  33. data/features/step_definitions/validation_info_steps.rb +18 -0
  34. data/features/step_definitions/validation_warnings_steps.rb +46 -0
  35. data/features/support/env.rb +30 -0
  36. data/features/support/webmock.rb +1 -0
  37. data/features/validation_errors.feature +151 -0
  38. data/features/validation_info.feature +24 -0
  39. data/features/validation_warnings.feature +74 -0
  40. data/lib/csvlint.rb +13 -0
  41. data/lib/csvlint/error_collector.rb +43 -0
  42. data/lib/csvlint/error_message.rb +15 -0
  43. data/lib/csvlint/field.rb +102 -0
  44. data/lib/csvlint/schema.rb +69 -0
  45. data/lib/csvlint/types.rb +113 -0
  46. data/lib/csvlint/validate.rb +253 -0
  47. data/lib/csvlint/version.rb +3 -0
  48. data/lib/csvlint/wrapped_io.rb +39 -0
  49. data/spec/field_spec.rb +247 -0
  50. data/spec/schema_spec.rb +149 -0
  51. data/spec/spec_helper.rb +20 -0
  52. data/spec/validator_spec.rb +279 -0
  53. metadata +367 -0
@@ -0,0 +1,35 @@
1
+ Feature: CSV options
2
+
3
+ Scenario: Sucessfully parse a valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ 'Foo';'Bar';'Baz'
7
+ '1';'2';'3'
8
+ '3';'2';'1'
9
+ """
10
+ And I set the delimiter to ";"
11
+ And I set quotechar to "'"
12
+ And it is stored at the url "http://example.com/example1.csv"
13
+ When I ask if the CSV is valid
14
+ Then I should get the value of true
15
+
16
+ Scenario: Warn if options seem to return invalid data
17
+ Given I have a CSV with the following content:
18
+ """
19
+ 'Foo';'Bar';'Baz'
20
+ '1';'2';'3'
21
+ '3';'2';'1'
22
+ """
23
+ And I set the delimiter to ","
24
+ And I set quotechar to """
25
+ And it is stored at the url "http://example.com/example1.csv"
26
+ And I ask if there are warnings
27
+ Then there should be 1 warnings
28
+ And that warning should have the type "check_options"
29
+
30
+ Scenario: Use esoteric line endings
31
+ Given I have a CSV file called "windows-line-endings.csv"
32
+ And it is stored at the url "http://example.com/example1.csv"
33
+ When I ask if the CSV is valid
34
+ Then I should get the value of true
35
+
@@ -0,0 +1 @@
1
+ "Foo","Bar","Baz"
@@ -0,0 +1,3 @@
1
+ "Foo","Bsr","Baz"
2
+ "Biff","Baff","Boff"
3
+ "Qux","Teaspoon","Doge"
@@ -0,0 +1,2 @@
1
+ "Foo","Bsr","Baz"
2
+ "Qux","Teaspoon","Doge"
@@ -0,0 +1,24 @@
1
+ "Data","Dependencia Origem","Hist�rico","Data do Balancete","N�mero do documento","Valor",
2
+ "10/31/2012","","Saldo Anterior","","0","100.00",
3
+ "11/01/2012","0000-9","Transfer�ncia on line - 01/11 4885 256620-6 XXXXXXXXXXXXX","","224885000256620","100.00",
4
+ "11/01/2012","","Dep�sito COMPE - 033 0502 27588602104 XXXXXXXXXXXXXX","","101150","100.00",
5
+ "11/01/2012","","Proventos","","496774","1000.00",
6
+ "11/01/2012","","Benef�cio","","496775","100.00",
7
+ "11/01/2012","0000-0","Compra com Cart�o - 01/11 09:45 XXXXXXXXXXX","","135102","-1.00",
8
+ "11/01/2012","0000-0","Compra com Cart�o - 01/11 09:48 XXXXXXXXXXX","","235338","-10.00",
9
+ "11/01/2012","0000-0","Compra com Cart�o - 01/11 12:35 XXXXXXXX","","345329","-10.00",
10
+ "11/01/2012","0000-0","Compra com Cart�o - 01/11 23:57 XXXXXXXXXXXXXXXX","","686249","-10.00",
11
+ "11/01/2012","0000-0","Saque com cart�o - 01/11 13:17 XXXXXXXXXXXXXXXX","","11317296267021","-10.00",
12
+ "11/01/2012","","Pagto conta telefone - VIVO DF","","110101","-100.00",
13
+ "11/01/2012","","Cobran�a de I.O.F.","","391100701","-1.00",
14
+ "11/05/2012","0000-0","Compra com Cart�o - 02/11 16:57 XXXXXXXXXXXX","","161057","-10.00",
15
+ "11/05/2012","0000-0","Compra com Cart�o - 03/11 18:57 XXXXXXXXXXXXXXX","","168279","-10.00",
16
+ "11/05/2012","0000-0","Compra com Cart�o - 05/11 12:32 XXXXXXXXXXXXXXXXX","","245166","-10.00",
17
+ "11/05/2012","0000-0","Compra com Cart�o - 02/11 17:18 XXXXXXXXXXXXX","","262318","-1.00",
18
+ "11/05/2012","0000-0","Compra com Cart�o - 02/11 22:46 XXXXXXXXXXX","","382002","-100.00",
19
+ "11/05/2012","0000-0","Compra com Cart�o - 02/11 23:19 XXXXXXXXXXX","","683985","-1.00",
20
+ "11/05/2012","0000-0","Compra com Cart�o - 03/11 01:19 XXXXXXXXXXXXXXXX","","704772","-10.00",
21
+ "11/05/2012","0000-0","Compra com Cart�o - 03/11 11:08 XXXXXXXX","","840112","-1.00",
22
+ "11/05/2012","0000-0","Saque com cart�o - 05/11 19:24 XXXXXXXXXXXXXXXXX","","51924256267021","-10.00",
23
+ "11/05/2012","0000-0","Transfer�ncia on line - 05/11 4885 256620-6 XXXXXXXXXXXXX","","224885000256620","-100.00",
24
+ "11/05/2012","","Pagamento de T�tulo - XXXXXXXXXXXXXXXXXXX","","110501","-100.00",
@@ -0,0 +1,3 @@
1
+ "Foo","Bsr","Baz"
2
+ "Biff","Baff","Boff"
3
+ "Qux","Teaspoon","Doge"
@@ -0,0 +1,4 @@
1
+ "This is a title row",,
2
+ "Foo","Bsr","Baz"
3
+ "Biff","Baff","Boff"
4
+ "Qux","Teaspoon","Doge"
@@ -0,0 +1,3 @@
1
+ "Foo","Bar","Baz"
2
+ "1","2","3"
3
+ "3","2","1"
@@ -0,0 +1,2 @@
1
+ a,b,c
2
+ d,e,f
@@ -0,0 +1,22 @@
1
+ Feature: Return information
2
+
3
+ Background:
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "abc","2","3"
7
+ """
8
+ And it is encoded as "utf-8"
9
+ And the content type is "text/csv"
10
+ And it is stored at the url "http://example.com/example1.csv?query=true"
11
+
12
+ Scenario: Return encoding
13
+ Then the "encoding" should be "utf-8"
14
+
15
+ Scenario: Return content type
16
+ Then the "content_type" should be "text/csv"
17
+
18
+ Scenario: Return extension
19
+ Then the "extension" should be ".csv"
20
+
21
+ Scenario: Return meta
22
+ Then the metadata content type should be "text/csv; charset=utf-8"
@@ -0,0 +1,90 @@
1
+ Feature: Parse CSV
2
+
3
+ Scenario: Sucessfully parse a valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Foo","Bar","Baz"
7
+ "1","2","3"
8
+ "3","2","1"
9
+ """
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if the CSV is valid
12
+ Then I should get the value of true
13
+
14
+ Scenario: Successfully parse a CSV with newlines in quoted fields
15
+ Given I have a CSV with the following content:
16
+ """
17
+ "a","b","c"
18
+ "d","e","this is
19
+ valid"
20
+ "a","b","c"
21
+ """
22
+ And it is stored at the url "http://example.com/example1.csv"
23
+ When I ask if the CSV is valid
24
+ Then I should get the value of true
25
+
26
+ Scenario: Successfully parse a CSV with multiple newlines in quoted fields
27
+ Given I have a CSV with the following content:
28
+ """
29
+ "a","b","c"
30
+ "d","this is
31
+ valid","as is this
32
+ too"
33
+ """
34
+ And it is stored at the url "http://example.com/example1.csv"
35
+ When I ask if the CSV is valid
36
+ Then I should get the value of true
37
+
38
+ Scenario: Successfully report an invalid CSV
39
+ Given I have a CSV with the following content:
40
+ """
41
+ "Foo", "Bar" , "Baz
42
+ """
43
+ And it is stored at the url "http://example.com/example1.csv"
44
+ When I ask if the CSV is valid
45
+ Then I should get the value of false
46
+
47
+ Scenario: Successfully report a CSV with incorrect quoting
48
+ Given I have a CSV with the following content:
49
+ """
50
+ "Foo","Bar","Baz
51
+ """
52
+ And it is stored at the url "http://example.com/example1.csv"
53
+ When I ask if the CSV is valid
54
+ Then I should get the value of false
55
+
56
+ Scenario: Successfully report a CSV with incorrect whitespace
57
+ Given I have a CSV with the following content:
58
+ """
59
+ "Foo","Bar", "Baz"
60
+ """
61
+ And it is stored at the url "http://example.com/example1.csv"
62
+ When I ask if the CSV is valid
63
+ Then I should get the value of false
64
+
65
+ Scenario: Successfully report a CSV with ragged rows
66
+ Given I have a CSV with the following content:
67
+ """
68
+ "col1","col2","col2"
69
+ "1","2","3"
70
+ "4","5"
71
+ """
72
+ And it is stored at the url "http://example.com/example1.csv"
73
+ When I ask if the CSV is valid
74
+ Then I should get the value of false
75
+
76
+ Scenario: Don't class blank values as inconsistencies
77
+ Given I have a CSV with the following content:
78
+ """
79
+ "col1","col2","col3"
80
+ "1","2","3"
81
+ "4","5","6"
82
+ "","7","8"
83
+ "9","10","11"
84
+ "","12","13"
85
+ "","14","15"
86
+ "16","17","18"
87
+ """
88
+ And it is stored at the url "http://example.com/example1.csv"
89
+ When I ask if there are warnings
90
+ Then there should be 0 warnings
@@ -0,0 +1,63 @@
1
+ Feature: Schema Validation
2
+
3
+ Scenario: Valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Bob","1234","bob@example.org"
7
+ "Alice","5","alice@example.com"
8
+ """
9
+ And it is stored at the url "http://example.com/example1.csv"
10
+ And I have a schema with the following content:
11
+ """
12
+ {
13
+ "fields": [
14
+ { "name": "Name", "constraints": { "required": true } },
15
+ { "name": "Id", "constraints": { "required": true, "minLength": 1 } },
16
+ { "name": "Email", "constraints": { "required": true } }
17
+ ]
18
+ }
19
+ """
20
+ When I ask if there are errors
21
+ Then there should be 0 error
22
+
23
+ Scenario: Schema invalid CSV
24
+ Given I have a CSV with the following content:
25
+ """
26
+ "Bob","1234","bob@example.org"
27
+ "Alice","5","alice@example.com"
28
+ """
29
+ And it is stored at the url "http://example.com/example1.csv"
30
+ And I have a schema with the following content:
31
+ """
32
+ {
33
+ "fields": [
34
+ { "name": "Name", "constraints": { "required": true } },
35
+ { "name": "Id", "constraints": { "required": true, "minLength": 3 } },
36
+ { "name": "Email", "constraints": { "required": true } }
37
+ ]
38
+ }
39
+ """
40
+ When I ask if there are errors
41
+ Then there should be 1 error
42
+
43
+ Scenario: CSV with incorrect header
44
+ Given I have a CSV with the following content:
45
+ """
46
+ "name","id","contact"
47
+ "Bob","1234","bob@example.org"
48
+ "Alice","5","alice@example.com"
49
+ """
50
+ And it is stored at the url "http://example.com/example1.csv"
51
+ And I have a schema with the following content:
52
+ """
53
+ {
54
+ "fields": [
55
+ { "name": "name", "constraints": { "required": true } },
56
+ { "name": "id", "constraints": { "required": true, "minLength": 3 } },
57
+ { "name": "email", "constraints": { "required": true } }
58
+ ]
59
+ }
60
+ """
61
+ When I ask if there are warnings
62
+ Then there should be 1 warnings
63
+
@@ -0,0 +1,18 @@
1
+ Feature: Parse CSV from Different Sources
2
+
3
+ Scenario: Successfully parse a valid CSV from a StringIO
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Foo","Bar","Baz"
7
+ "1","2","3"
8
+ "3","2","1"
9
+ """
10
+ And it is parsed as a StringIO
11
+ When I ask if the CSV is valid
12
+ Then I should get the value of true
13
+
14
+ Scenario: Successfully parse a valid CSV from a File
15
+ Given I parse a CSV file called "valid.csv"
16
+ When I ask if the CSV is valid
17
+ Then I should get the value of true
18
+
@@ -0,0 +1,19 @@
1
+ Given(/^I set the delimiter to "(.*?)"$/) do |delimiter|
2
+ @csv_options ||= default_csv_options
3
+ @csv_options["delimiter"] = delimiter
4
+ end
5
+
6
+ Given(/^I set quotechar to "(.*?)"$/) do |doublequote|
7
+ @csv_options ||= default_csv_options
8
+ @csv_options["quoteChar"] = doublequote
9
+ end
10
+
11
+ Given(/^I set the line endings to linefeed$/) do
12
+ @csv_options ||= default_csv_options
13
+ @csv_options["lineTerminator"] = "\n"
14
+ end
15
+
16
+ Given(/^I set header to "(.*?)"$/) do |boolean|
17
+ @csv_options ||= default_csv_options
18
+ @csv_options["header"] = boolean == "true"
19
+ end
@@ -0,0 +1,13 @@
1
+ Given(/^the content type is "(.*?)"$/) do |arg1|
2
+ @content_type = "text/csv"
3
+ end
4
+
5
+ Then(/^the "(.*?)" should be "(.*?)"$/) do |type, encoding|
6
+ validator = Csvlint::Validator.new( @url, default_csv_options )
7
+ validator.send(type.to_sym).should == encoding
8
+ end
9
+
10
+ Then(/^the metadata content type should be "(.*?)"$/) do |content_type|
11
+ validator = Csvlint::Validator.new( @url, default_csv_options )
12
+ validator.headers['content-type'].should == content_type
13
+ end
@@ -0,0 +1,30 @@
1
+ Given(/^I have a CSV with the following content:$/) do |string|
2
+ @csv = string
3
+ end
4
+
5
+ Given(/^it is stored at the url "(.*?)"$/) do |url|
6
+ @url = url
7
+ content_type = @content_type || "text/csv"
8
+ charset = @encoding || "UTF-8"
9
+ stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => {"Content-Type" => "#{content_type}; charset=#{charset}"})
10
+ end
11
+
12
+ Given(/^it is stored at the url "(.*?)" with no character set$/) do |url|
13
+ @url = url
14
+ content_type = @content_type || "text/csv"
15
+ stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => {"Content-Type" => "#{content_type}"})
16
+ end
17
+
18
+ When(/^I ask if the CSV is valid$/) do
19
+ @csv_options ||= default_csv_options
20
+ @validator = Csvlint::Validator.new( @url, @csv_options )
21
+ @valid = @validator.valid?
22
+ end
23
+
24
+ Then(/^I should get the value of true$/) do
25
+ expect( @valid ).to be(true)
26
+ end
27
+
28
+ Then(/^I should get the value of false$/) do
29
+ expect( @valid ).to be(false)
30
+ end
@@ -0,0 +1,7 @@
1
+ Given(/^I have a schema with the following content:$/) do |json|
2
+ @schema_json = json
3
+ end
4
+
5
+ Given(/^the schema is stored at the url "(.*?)"$/) do |schema_url|
6
+ @schema_url = schema_url
7
+ end
@@ -0,0 +1,7 @@
1
+ Given(/^it is parsed as a StringIO$/) do
2
+ @url = StringIO.new(@csv)
3
+ end
4
+
5
+ Given(/^I parse a CSV file called "(.*?)"$/) do |filename|
6
+ @url = File.new( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
7
+ end
@@ -0,0 +1,43 @@
1
+ When(/^I ask if there are errors$/) do
2
+ @csv_options ||= default_csv_options
3
+
4
+ if @schema_json
5
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
6
+ end
7
+
8
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
9
+ @errors = @validator.errors
10
+ end
11
+
12
+ Then(/^there should be (\d+) error$/) do |count|
13
+ @errors.count.should == count.to_i
14
+ end
15
+
16
+ Then(/^that error should have the type "(.*?)"$/) do |type|
17
+ @errors.first.type.should == type.to_sym
18
+ end
19
+
20
+ Then(/^that error should have the row "(.*?)"$/) do |row|
21
+ @errors.first.row.should == row.to_i
22
+ end
23
+
24
+ Then(/^that error should have the column "(.*?)"$/) do |column|
25
+ @errors.first.column.should == column.to_i
26
+ end
27
+
28
+ Then(/^that error should have the content "(.*)"$/) do |content|
29
+ @errors.first.content.chomp.should == content.chomp
30
+ end
31
+
32
+ Then(/^that error should have no content$/) do
33
+ @errors.first.content.should == nil
34
+ end
35
+
36
+ Given(/^I have a CSV that doesn't exist$/) do
37
+ @url = "http//www.example.com/fake-csv.csv"
38
+ stub_request(:get, @url).to_return(:status => 404)
39
+ end
40
+
41
+ Then(/^there should be no "(.*?)" errors$/) do |type|
42
+ @errors.each do |error| error.type.should_not == type.to_sym end
43
+ end
@@ -0,0 +1,18 @@
1
+ Given(/^I ask if there are info messages$/) do
2
+ @csv_options ||= default_csv_options
3
+
4
+ if @schema_json
5
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
6
+ end
7
+
8
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
9
+ @info_messages = @validator.info_messages
10
+ end
11
+
12
+ Then(/^there should be (\d+) info messages?$/) do |num|
13
+ @info_messages.count.should == num.to_i
14
+ end
15
+
16
+ Then(/^that message should have the type "(.*?)"$/) do |msg_type|
17
+ @info_messages.first.type.should == msg_type.to_sym
18
+ end
@@ -0,0 +1,46 @@
1
+ Given(/^it is encoded as "(.*?)"$/) do |encoding|
2
+ @csv = @csv.encode(encoding)
3
+ @encoding = encoding
4
+ end
5
+
6
+ Given(/^I set an encoding header of "(.*?)"$/) do |encoding|
7
+ @encoding = encoding
8
+ end
9
+
10
+ Given(/^I do not set an encoding header$/) do
11
+ @encoding = nil
12
+ end
13
+
14
+ Given(/^I have a CSV file called "(.*?)"$/) do |filename|
15
+ @csv = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
16
+ end
17
+
18
+ When(/^I ask if there are warnings$/) do
19
+ @csv_options ||= default_csv_options
20
+ if @schema_json
21
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
22
+ end
23
+
24
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
25
+ @warnings = @validator.warnings
26
+ end
27
+
28
+ Then(/^there should be (\d+) warnings$/) do |count|
29
+ @warnings.count.should == count.to_i
30
+ end
31
+
32
+ Given(/^the content type is set to "(.*?)"$/) do |type|
33
+ @content_type = type
34
+ end
35
+
36
+ Then(/^that warning should have the row "(.*?)"$/) do |row|
37
+ @warnings.first.row.should == row.to_i
38
+ end
39
+
40
+ Then(/^that warning should have the column "(.*?)"$/) do |column|
41
+ @warnings.first.column.should == column.to_i
42
+ end
43
+
44
+ Then(/^that warning should have the type "(.*?)"$/) do |type|
45
+ @warnings.first.type.should == type.to_sym
46
+ end