csvlint 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitignore +22 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +214 -0
  8. data/Rakefile +17 -0
  9. data/bin/create_schema +32 -0
  10. data/bin/csvlint +52 -0
  11. data/csvlint.gemspec +39 -0
  12. data/features/check_format.feature +46 -0
  13. data/features/csv_options.feature +35 -0
  14. data/features/fixtures/cr-line-endings.csv +1 -0
  15. data/features/fixtures/crlf-line-endings.csv +3 -0
  16. data/features/fixtures/inconsistent-line-endings.csv +2 -0
  17. data/features/fixtures/invalid-byte-sequence.csv +24 -0
  18. data/features/fixtures/lf-line-endings.csv +3 -0
  19. data/features/fixtures/spreadsheet.xls +0 -0
  20. data/features/fixtures/title-row.csv +4 -0
  21. data/features/fixtures/valid.csv +3 -0
  22. data/features/fixtures/windows-line-endings.csv +2 -0
  23. data/features/information.feature +22 -0
  24. data/features/parse_csv.feature +90 -0
  25. data/features/schema_validation.feature +63 -0
  26. data/features/sources.feature +18 -0
  27. data/features/step_definitions/csv_options_steps.rb +19 -0
  28. data/features/step_definitions/information_steps.rb +13 -0
  29. data/features/step_definitions/parse_csv_steps.rb +30 -0
  30. data/features/step_definitions/schema_validation_steps.rb +7 -0
  31. data/features/step_definitions/sources_steps.rb +7 -0
  32. data/features/step_definitions/validation_errors_steps.rb +43 -0
  33. data/features/step_definitions/validation_info_steps.rb +18 -0
  34. data/features/step_definitions/validation_warnings_steps.rb +46 -0
  35. data/features/support/env.rb +30 -0
  36. data/features/support/webmock.rb +1 -0
  37. data/features/validation_errors.feature +151 -0
  38. data/features/validation_info.feature +24 -0
  39. data/features/validation_warnings.feature +74 -0
  40. data/lib/csvlint.rb +13 -0
  41. data/lib/csvlint/error_collector.rb +43 -0
  42. data/lib/csvlint/error_message.rb +15 -0
  43. data/lib/csvlint/field.rb +102 -0
  44. data/lib/csvlint/schema.rb +69 -0
  45. data/lib/csvlint/types.rb +113 -0
  46. data/lib/csvlint/validate.rb +253 -0
  47. data/lib/csvlint/version.rb +3 -0
  48. data/lib/csvlint/wrapped_io.rb +39 -0
  49. data/spec/field_spec.rb +247 -0
  50. data/spec/schema_spec.rb +149 -0
  51. data/spec/spec_helper.rb +20 -0
  52. data/spec/validator_spec.rb +279 -0
  53. metadata +367 -0
@@ -0,0 +1,35 @@
1
+ Feature: CSV options
2
+
3
+ Scenario: Sucessfully parse a valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ 'Foo';'Bar';'Baz'
7
+ '1';'2';'3'
8
+ '3';'2';'1'
9
+ """
10
+ And I set the delimiter to ";"
11
+ And I set quotechar to "'"
12
+ And it is stored at the url "http://example.com/example1.csv"
13
+ When I ask if the CSV is valid
14
+ Then I should get the value of true
15
+
16
+ Scenario: Warn if options seem to return invalid data
17
+ Given I have a CSV with the following content:
18
+ """
19
+ 'Foo';'Bar';'Baz'
20
+ '1';'2';'3'
21
+ '3';'2';'1'
22
+ """
23
+ And I set the delimiter to ","
24
+ And I set quotechar to """
25
+ And it is stored at the url "http://example.com/example1.csv"
26
+ And I ask if there are warnings
27
+ Then there should be 1 warnings
28
+ And that warning should have the type "check_options"
29
+
30
+ Scenario: Use esoteric line endings
31
+ Given I have a CSV file called "windows-line-endings.csv"
32
+ And it is stored at the url "http://example.com/example1.csv"
33
+ When I ask if the CSV is valid
34
+ Then I should get the value of true
35
+
@@ -0,0 +1 @@
1
+ "Foo","Bar","Baz"
@@ -0,0 +1,3 @@
1
+ "Foo","Bsr","Baz"
2
+ "Biff","Baff","Boff"
3
+ "Qux","Teaspoon","Doge"
@@ -0,0 +1,2 @@
1
+ "Foo","Bsr","Baz"
2
+ "Qux","Teaspoon","Doge"
@@ -0,0 +1,24 @@
1
+ "Data","Dependencia Origem","Hist�rico","Data do Balancete","N�mero do documento","Valor",
2
+ "10/31/2012","","Saldo Anterior","","0","100.00",
3
+ "11/01/2012","0000-9","Transfer�ncia on line - 01/11 4885 256620-6 XXXXXXXXXXXXX","","224885000256620","100.00",
4
+ "11/01/2012","","Dep�sito COMPE - 033 0502 27588602104 XXXXXXXXXXXXXX","","101150","100.00",
5
+ "11/01/2012","","Proventos","","496774","1000.00",
6
+ "11/01/2012","","Benef�cio","","496775","100.00",
7
+ "11/01/2012","0000-0","Compra com Cart�o - 01/11 09:45 XXXXXXXXXXX","","135102","-1.00",
8
+ "11/01/2012","0000-0","Compra com Cart�o - 01/11 09:48 XXXXXXXXXXX","","235338","-10.00",
9
+ "11/01/2012","0000-0","Compra com Cart�o - 01/11 12:35 XXXXXXXX","","345329","-10.00",
10
+ "11/01/2012","0000-0","Compra com Cart�o - 01/11 23:57 XXXXXXXXXXXXXXXX","","686249","-10.00",
11
+ "11/01/2012","0000-0","Saque com cart�o - 01/11 13:17 XXXXXXXXXXXXXXXX","","11317296267021","-10.00",
12
+ "11/01/2012","","Pagto conta telefone - VIVO DF","","110101","-100.00",
13
+ "11/01/2012","","Cobran�a de I.O.F.","","391100701","-1.00",
14
+ "11/05/2012","0000-0","Compra com Cart�o - 02/11 16:57 XXXXXXXXXXXX","","161057","-10.00",
15
+ "11/05/2012","0000-0","Compra com Cart�o - 03/11 18:57 XXXXXXXXXXXXXXX","","168279","-10.00",
16
+ "11/05/2012","0000-0","Compra com Cart�o - 05/11 12:32 XXXXXXXXXXXXXXXXX","","245166","-10.00",
17
+ "11/05/2012","0000-0","Compra com Cart�o - 02/11 17:18 XXXXXXXXXXXXX","","262318","-1.00",
18
+ "11/05/2012","0000-0","Compra com Cart�o - 02/11 22:46 XXXXXXXXXXX","","382002","-100.00",
19
+ "11/05/2012","0000-0","Compra com Cart�o - 02/11 23:19 XXXXXXXXXXX","","683985","-1.00",
20
+ "11/05/2012","0000-0","Compra com Cart�o - 03/11 01:19 XXXXXXXXXXXXXXXX","","704772","-10.00",
21
+ "11/05/2012","0000-0","Compra com Cart�o - 03/11 11:08 XXXXXXXX","","840112","-1.00",
22
+ "11/05/2012","0000-0","Saque com cart�o - 05/11 19:24 XXXXXXXXXXXXXXXXX","","51924256267021","-10.00",
23
+ "11/05/2012","0000-0","Transfer�ncia on line - 05/11 4885 256620-6 XXXXXXXXXXXXX","","224885000256620","-100.00",
24
+ "11/05/2012","","Pagamento de T�tulo - XXXXXXXXXXXXXXXXXXX","","110501","-100.00",
@@ -0,0 +1,3 @@
1
+ "Foo","Bsr","Baz"
2
+ "Biff","Baff","Boff"
3
+ "Qux","Teaspoon","Doge"
@@ -0,0 +1,4 @@
1
+ "This is a title row",,
2
+ "Foo","Bsr","Baz"
3
+ "Biff","Baff","Boff"
4
+ "Qux","Teaspoon","Doge"
@@ -0,0 +1,3 @@
1
+ "Foo","Bar","Baz"
2
+ "1","2","3"
3
+ "3","2","1"
@@ -0,0 +1,2 @@
1
+ a,b,c
2
+ d,e,f
@@ -0,0 +1,22 @@
1
+ Feature: Return information
2
+
3
+ Background:
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "abc","2","3"
7
+ """
8
+ And it is encoded as "utf-8"
9
+ And the content type is "text/csv"
10
+ And it is stored at the url "http://example.com/example1.csv?query=true"
11
+
12
+ Scenario: Return encoding
13
+ Then the "encoding" should be "utf-8"
14
+
15
+ Scenario: Return content type
16
+ Then the "content_type" should be "text/csv"
17
+
18
+ Scenario: Return extension
19
+ Then the "extension" should be ".csv"
20
+
21
+ Scenario: Return meta
22
+ Then the metadata content type should be "text/csv; charset=utf-8"
@@ -0,0 +1,90 @@
1
+ Feature: Parse CSV
2
+
3
+ Scenario: Sucessfully parse a valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Foo","Bar","Baz"
7
+ "1","2","3"
8
+ "3","2","1"
9
+ """
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if the CSV is valid
12
+ Then I should get the value of true
13
+
14
+ Scenario: Successfully parse a CSV with newlines in quoted fields
15
+ Given I have a CSV with the following content:
16
+ """
17
+ "a","b","c"
18
+ "d","e","this is
19
+ valid"
20
+ "a","b","c"
21
+ """
22
+ And it is stored at the url "http://example.com/example1.csv"
23
+ When I ask if the CSV is valid
24
+ Then I should get the value of true
25
+
26
+ Scenario: Successfully parse a CSV with multiple newlines in quoted fields
27
+ Given I have a CSV with the following content:
28
+ """
29
+ "a","b","c"
30
+ "d","this is
31
+ valid","as is this
32
+ too"
33
+ """
34
+ And it is stored at the url "http://example.com/example1.csv"
35
+ When I ask if the CSV is valid
36
+ Then I should get the value of true
37
+
38
+ Scenario: Successfully report an invalid CSV
39
+ Given I have a CSV with the following content:
40
+ """
41
+ "Foo", "Bar" , "Baz
42
+ """
43
+ And it is stored at the url "http://example.com/example1.csv"
44
+ When I ask if the CSV is valid
45
+ Then I should get the value of false
46
+
47
+ Scenario: Successfully report a CSV with incorrect quoting
48
+ Given I have a CSV with the following content:
49
+ """
50
+ "Foo","Bar","Baz
51
+ """
52
+ And it is stored at the url "http://example.com/example1.csv"
53
+ When I ask if the CSV is valid
54
+ Then I should get the value of false
55
+
56
+ Scenario: Successfully report a CSV with incorrect whitespace
57
+ Given I have a CSV with the following content:
58
+ """
59
+ "Foo","Bar", "Baz"
60
+ """
61
+ And it is stored at the url "http://example.com/example1.csv"
62
+ When I ask if the CSV is valid
63
+ Then I should get the value of false
64
+
65
+ Scenario: Successfully report a CSV with ragged rows
66
+ Given I have a CSV with the following content:
67
+ """
68
+ "col1","col2","col2"
69
+ "1","2","3"
70
+ "4","5"
71
+ """
72
+ And it is stored at the url "http://example.com/example1.csv"
73
+ When I ask if the CSV is valid
74
+ Then I should get the value of false
75
+
76
+ Scenario: Don't class blank values as inconsistencies
77
+ Given I have a CSV with the following content:
78
+ """
79
+ "col1","col2","col3"
80
+ "1","2","3"
81
+ "4","5","6"
82
+ "","7","8"
83
+ "9","10","11"
84
+ "","12","13"
85
+ "","14","15"
86
+ "16","17","18"
87
+ """
88
+ And it is stored at the url "http://example.com/example1.csv"
89
+ When I ask if there are warnings
90
+ Then there should be 0 warnings
@@ -0,0 +1,63 @@
1
+ Feature: Schema Validation
2
+
3
+ Scenario: Valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Bob","1234","bob@example.org"
7
+ "Alice","5","alice@example.com"
8
+ """
9
+ And it is stored at the url "http://example.com/example1.csv"
10
+ And I have a schema with the following content:
11
+ """
12
+ {
13
+ "fields": [
14
+ { "name": "Name", "constraints": { "required": true } },
15
+ { "name": "Id", "constraints": { "required": true, "minLength": 1 } },
16
+ { "name": "Email", "constraints": { "required": true } }
17
+ ]
18
+ }
19
+ """
20
+ When I ask if there are errors
21
+ Then there should be 0 error
22
+
23
+ Scenario: Schema invalid CSV
24
+ Given I have a CSV with the following content:
25
+ """
26
+ "Bob","1234","bob@example.org"
27
+ "Alice","5","alice@example.com"
28
+ """
29
+ And it is stored at the url "http://example.com/example1.csv"
30
+ And I have a schema with the following content:
31
+ """
32
+ {
33
+ "fields": [
34
+ { "name": "Name", "constraints": { "required": true } },
35
+ { "name": "Id", "constraints": { "required": true, "minLength": 3 } },
36
+ { "name": "Email", "constraints": { "required": true } }
37
+ ]
38
+ }
39
+ """
40
+ When I ask if there are errors
41
+ Then there should be 1 error
42
+
43
+ Scenario: CSV with incorrect header
44
+ Given I have a CSV with the following content:
45
+ """
46
+ "name","id","contact"
47
+ "Bob","1234","bob@example.org"
48
+ "Alice","5","alice@example.com"
49
+ """
50
+ And it is stored at the url "http://example.com/example1.csv"
51
+ And I have a schema with the following content:
52
+ """
53
+ {
54
+ "fields": [
55
+ { "name": "name", "constraints": { "required": true } },
56
+ { "name": "id", "constraints": { "required": true, "minLength": 3 } },
57
+ { "name": "email", "constraints": { "required": true } }
58
+ ]
59
+ }
60
+ """
61
+ When I ask if there are warnings
62
+ Then there should be 1 warnings
63
+
@@ -0,0 +1,18 @@
1
+ Feature: Parse CSV from Different Sources
2
+
3
+ Scenario: Successfully parse a valid CSV from a StringIO
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Foo","Bar","Baz"
7
+ "1","2","3"
8
+ "3","2","1"
9
+ """
10
+ And it is parsed as a StringIO
11
+ When I ask if the CSV is valid
12
+ Then I should get the value of true
13
+
14
+ Scenario: Successfully parse a valid CSV from a File
15
+ Given I parse a CSV file called "valid.csv"
16
+ When I ask if the CSV is valid
17
+ Then I should get the value of true
18
+
@@ -0,0 +1,19 @@
1
+ Given(/^I set the delimiter to "(.*?)"$/) do |delimiter|
2
+ @csv_options ||= default_csv_options
3
+ @csv_options["delimiter"] = delimiter
4
+ end
5
+
6
+ Given(/^I set quotechar to "(.*?)"$/) do |doublequote|
7
+ @csv_options ||= default_csv_options
8
+ @csv_options["quoteChar"] = doublequote
9
+ end
10
+
11
+ Given(/^I set the line endings to linefeed$/) do
12
+ @csv_options ||= default_csv_options
13
+ @csv_options["lineTerminator"] = "\n"
14
+ end
15
+
16
+ Given(/^I set header to "(.*?)"$/) do |boolean|
17
+ @csv_options ||= default_csv_options
18
+ @csv_options["header"] = boolean == "true"
19
+ end
@@ -0,0 +1,13 @@
1
+ Given(/^the content type is "(.*?)"$/) do |arg1|
2
+ @content_type = "text/csv"
3
+ end
4
+
5
+ Then(/^the "(.*?)" should be "(.*?)"$/) do |type, encoding|
6
+ validator = Csvlint::Validator.new( @url, default_csv_options )
7
+ validator.send(type.to_sym).should == encoding
8
+ end
9
+
10
+ Then(/^the metadata content type should be "(.*?)"$/) do |content_type|
11
+ validator = Csvlint::Validator.new( @url, default_csv_options )
12
+ validator.headers['content-type'].should == content_type
13
+ end
@@ -0,0 +1,30 @@
1
+ Given(/^I have a CSV with the following content:$/) do |string|
2
+ @csv = string
3
+ end
4
+
5
+ Given(/^it is stored at the url "(.*?)"$/) do |url|
6
+ @url = url
7
+ content_type = @content_type || "text/csv"
8
+ charset = @encoding || "UTF-8"
9
+ stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => {"Content-Type" => "#{content_type}; charset=#{charset}"})
10
+ end
11
+
12
+ Given(/^it is stored at the url "(.*?)" with no character set$/) do |url|
13
+ @url = url
14
+ content_type = @content_type || "text/csv"
15
+ stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => {"Content-Type" => "#{content_type}"})
16
+ end
17
+
18
+ When(/^I ask if the CSV is valid$/) do
19
+ @csv_options ||= default_csv_options
20
+ @validator = Csvlint::Validator.new( @url, @csv_options )
21
+ @valid = @validator.valid?
22
+ end
23
+
24
+ Then(/^I should get the value of true$/) do
25
+ expect( @valid ).to be(true)
26
+ end
27
+
28
+ Then(/^I should get the value of false$/) do
29
+ expect( @valid ).to be(false)
30
+ end
@@ -0,0 +1,7 @@
1
+ Given(/^I have a schema with the following content:$/) do |json|
2
+ @schema_json = json
3
+ end
4
+
5
+ Given(/^the schema is stored at the url "(.*?)"$/) do |schema_url|
6
+ @schema_url = schema_url
7
+ end
@@ -0,0 +1,7 @@
1
+ Given(/^it is parsed as a StringIO$/) do
2
+ @url = StringIO.new(@csv)
3
+ end
4
+
5
+ Given(/^I parse a CSV file called "(.*?)"$/) do |filename|
6
+ @url = File.new( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
7
+ end
@@ -0,0 +1,43 @@
1
+ When(/^I ask if there are errors$/) do
2
+ @csv_options ||= default_csv_options
3
+
4
+ if @schema_json
5
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
6
+ end
7
+
8
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
9
+ @errors = @validator.errors
10
+ end
11
+
12
+ Then(/^there should be (\d+) error$/) do |count|
13
+ @errors.count.should == count.to_i
14
+ end
15
+
16
+ Then(/^that error should have the type "(.*?)"$/) do |type|
17
+ @errors.first.type.should == type.to_sym
18
+ end
19
+
20
+ Then(/^that error should have the row "(.*?)"$/) do |row|
21
+ @errors.first.row.should == row.to_i
22
+ end
23
+
24
+ Then(/^that error should have the column "(.*?)"$/) do |column|
25
+ @errors.first.column.should == column.to_i
26
+ end
27
+
28
+ Then(/^that error should have the content "(.*)"$/) do |content|
29
+ @errors.first.content.chomp.should == content.chomp
30
+ end
31
+
32
+ Then(/^that error should have no content$/) do
33
+ @errors.first.content.should == nil
34
+ end
35
+
36
+ Given(/^I have a CSV that doesn't exist$/) do
37
+ @url = "http//www.example.com/fake-csv.csv"
38
+ stub_request(:get, @url).to_return(:status => 404)
39
+ end
40
+
41
+ Then(/^there should be no "(.*?)" errors$/) do |type|
42
+ @errors.each do |error| error.type.should_not == type.to_sym end
43
+ end
@@ -0,0 +1,18 @@
1
+ Given(/^I ask if there are info messages$/) do
2
+ @csv_options ||= default_csv_options
3
+
4
+ if @schema_json
5
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
6
+ end
7
+
8
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
9
+ @info_messages = @validator.info_messages
10
+ end
11
+
12
+ Then(/^there should be (\d+) info messages?$/) do |num|
13
+ @info_messages.count.should == num.to_i
14
+ end
15
+
16
+ Then(/^that message should have the type "(.*?)"$/) do |msg_type|
17
+ @info_messages.first.type.should == msg_type.to_sym
18
+ end
@@ -0,0 +1,46 @@
1
+ Given(/^it is encoded as "(.*?)"$/) do |encoding|
2
+ @csv = @csv.encode(encoding)
3
+ @encoding = encoding
4
+ end
5
+
6
+ Given(/^I set an encoding header of "(.*?)"$/) do |encoding|
7
+ @encoding = encoding
8
+ end
9
+
10
+ Given(/^I do not set an encoding header$/) do
11
+ @encoding = nil
12
+ end
13
+
14
+ Given(/^I have a CSV file called "(.*?)"$/) do |filename|
15
+ @csv = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
16
+ end
17
+
18
+ When(/^I ask if there are warnings$/) do
19
+ @csv_options ||= default_csv_options
20
+ if @schema_json
21
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
22
+ end
23
+
24
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
25
+ @warnings = @validator.warnings
26
+ end
27
+
28
+ Then(/^there should be (\d+) warnings$/) do |count|
29
+ @warnings.count.should == count.to_i
30
+ end
31
+
32
+ Given(/^the content type is set to "(.*?)"$/) do |type|
33
+ @content_type = type
34
+ end
35
+
36
+ Then(/^that warning should have the row "(.*?)"$/) do |row|
37
+ @warnings.first.row.should == row.to_i
38
+ end
39
+
40
+ Then(/^that warning should have the column "(.*?)"$/) do |column|
41
+ @warnings.first.column.should == column.to_i
42
+ end
43
+
44
+ Then(/^that warning should have the type "(.*?)"$/) do |type|
45
+ @warnings.first.type.should == type.to_sym
46
+ end