wjordan213-csvlint 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,22 @@
1
+ Feature: Return information
2
+
3
+ Background:
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "abc","2","3"
7
+ """
8
+ And it is encoded as "utf-8"
9
+ And the content type is "text/csv"
10
+ And it is stored at the url "http://example.com/example1.csv?query=true"
11
+
12
+ Scenario: Return encoding
13
+ Then the "encoding" should be "UTF-8"
14
+
15
+ Scenario: Return content type
16
+ Then the "content_type" should be "text/csv; charset=utf-8"
17
+
18
+ Scenario: Return extension
19
+ Then the "extension" should be ".csv"
20
+
21
+ Scenario: Return meta
22
+ Then the metadata content type should be "text/csv; charset=utf-8"
@@ -0,0 +1,90 @@
1
+ Feature: Parse CSV
2
+
3
+ Scenario: Successfully parse a valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Foo","Bar","Baz"
7
+ "1","2","3"
8
+ "3","2","1"
9
+ """
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if the CSV is valid
12
+ Then I should get the value of true
13
+
14
+ Scenario: Successfully parse a CSV with newlines in quoted fields
15
+ Given I have a CSV with the following content:
16
+ """
17
+ "a","b","c"
18
+ "d","e","this is
19
+ valid"
20
+ "a","b","c"
21
+ """
22
+ And it is stored at the url "http://example.com/example1.csv"
23
+ When I ask if the CSV is valid
24
+ Then I should get the value of true
25
+
26
+ Scenario: Successfully parse a CSV with multiple newlines in quoted fields
27
+ Given I have a CSV with the following content:
28
+ """
29
+ "a","b","c"
30
+ "d","this is
31
+ valid","as is this
32
+ too"
33
+ """
34
+ And it is stored at the url "http://example.com/example1.csv"
35
+ When I ask if the CSV is valid
36
+ Then I should get the value of true
37
+
38
+ Scenario: Successfully report an invalid CSV
39
+ Given I have a CSV with the following content:
40
+ """
41
+ "Foo", "Bar" , "Baz
42
+ """
43
+ And it is stored at the url "http://example.com/example1.csv"
44
+ When I ask if the CSV is valid
45
+ Then I should get the value of false
46
+
47
+ Scenario: Successfully report a CSV with incorrect quoting
48
+ Given I have a CSV with the following content:
49
+ """
50
+ "Foo","Bar","Baz
51
+ """
52
+ And it is stored at the url "http://example.com/example1.csv"
53
+ When I ask if the CSV is valid
54
+ Then I should get the value of false
55
+
56
+ Scenario: Successfully report a CSV with incorrect whitespace
57
+ Given I have a CSV with the following content:
58
+ """
59
+ "Foo","Bar", "Baz"
60
+ """
61
+ And it is stored at the url "http://example.com/example1.csv"
62
+ When I ask if the CSV is valid
63
+ Then I should get the value of false
64
+
65
+ Scenario: Successfully report a CSV with ragged rows
66
+ Given I have a CSV with the following content:
67
+ """
68
+ "col1","col2","col2"
69
+ "1","2","3"
70
+ "4","5"
71
+ """
72
+ And it is stored at the url "http://example.com/example1.csv"
73
+ When I ask if the CSV is valid
74
+ Then I should get the value of false
75
+
76
+ Scenario: Don't class blank values as inconsistencies
77
+ Given I have a CSV with the following content:
78
+ """
79
+ "col1","col2","col3"
80
+ "1","2","3"
81
+ "4","5","6"
82
+ "","7","8"
83
+ "9","10","11"
84
+ "","12","13"
85
+ "","14","15"
86
+ "16","17","18"
87
+ """
88
+ And it is stored at the url "http://example.com/example1.csv"
89
+ When I ask if there are warnings
90
+ Then there should be 0 warnings
@@ -0,0 +1,105 @@
1
+ Feature: Schema Validation
2
+
3
+ Scenario: Valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Bob","1234","bob@example.org"
7
+ "Alice","5","alice@example.com"
8
+ """
9
+ And it is stored at the url "http://example.com/example1.csv"
10
+ And I have a schema with the following content:
11
+ """
12
+ {
13
+ "fields": [
14
+ { "name": "Name", "constraints": { "required": true } },
15
+ { "name": "Id", "constraints": { "required": true, "minLength": 1 } },
16
+ { "name": "Email", "constraints": { "required": true } }
17
+ ]
18
+ }
19
+ """
20
+ When I ask if there are errors
21
+ Then there should be 0 error
22
+
23
+ Scenario: Schema invalid CSV
24
+ Given I have a CSV with the following content:
25
+ """
26
+ "Bob","1234","bob@example.org"
27
+ "Alice","5","alice@example.com"
28
+ """
29
+ And it is stored at the url "http://example.com/example1.csv"
30
+ And I have a schema with the following content:
31
+ """
32
+ {
33
+ "fields": [
34
+ { "name": "Name", "constraints": { "required": true } },
35
+ { "name": "Id", "constraints": { "required": true, "minLength": 3 } },
36
+ { "name": "Email", "constraints": { "required": true } }
37
+ ]
38
+ }
39
+ """
40
+ When I ask if there are errors
41
+ Then there should be 1 error
42
+
43
+ Scenario: CSV with incorrect header
44
+ Given I have a CSV with the following content:
45
+ """
46
+ "name","id","contact"
47
+ "Bob","1234","bob@example.org"
48
+ "Alice","5","alice@example.com"
49
+ """
50
+ And it is stored at the url "http://example.com/example1.csv"
51
+ And I have a schema with the following content:
52
+ """
53
+ {
54
+ "fields": [
55
+ { "name": "name", "constraints": { "required": true } },
56
+ { "name": "id", "constraints": { "required": true, "minLength": 3 } },
57
+ { "name": "email", "constraints": { "required": true } }
58
+ ]
59
+ }
60
+ """
61
+ When I ask if there are warnings
62
+ Then there should be 1 warnings
63
+
64
+ Scenario: Schema with valid regex
65
+ Given I have a CSV with the following content:
66
+ """
67
+ "firstname","id","email"
68
+ "Bob","1234","bob@example.org"
69
+ "Alice","5","alice@example.com"
70
+ """
71
+ And it is stored at the url "http://example.com/example1.csv"
72
+ And I have a schema with the following content:
73
+ """
74
+ {
75
+ "fields": [
76
+ { "name": "Name", "constraints": { "required": true, "pattern": "^[A-Za-z0-9_]*$" } },
77
+ { "name": "Id", "constraints": { "required": true, "minLength": 1 } },
78
+ { "name": "Email", "constraints": { "required": true } }
79
+ ]
80
+ }
81
+ """
82
+ When I ask if there are errors
83
+ Then there should be 0 error
84
+
85
+ Scenario: Schema with invalid regex
86
+ Given I have a CSV with the following content:
87
+ """
88
+ "firstname","id","email"
89
+ "Bob","1234","bob@example.org"
90
+ "Alice","5","alice@example.com"
91
+ """
92
+ And it is stored at the url "http://example.com/example1.csv"
93
+ And I have a schema with the following content:
94
+ """
95
+ {
96
+ "fields": [
97
+ { "name": "Name", "constraints": { "required": true, "pattern": "((" } },
98
+ { "name": "Id", "constraints": { "required": true, "minLength": 1 } },
99
+ { "name": "Email", "constraints": { "required": true } }
100
+ ]
101
+ }
102
+ """
103
+ When I ask if there are errors
104
+ Then there should be 1 error
105
+ And that error should have the type "invalid_regex"
@@ -0,0 +1,17 @@
1
+ Feature: Parse CSV from Different Sources
2
+
3
+ Scenario: Successfully parse a valid CSV from a StringIO
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Foo","Bar","Baz"
7
+ "1","2","3"
8
+ "3","2","1"
9
+ """
10
+ And it is parsed as a StringIO
11
+ When I ask if the CSV is valid
12
+ Then I should get the value of true
13
+
14
+ Scenario: Successfully parse a valid CSV from a File
15
+ Given I parse a file called "valid.csv"
16
+ When I ask if the CSV is valid
17
+ Then I should get the value of true
@@ -0,0 +1,11 @@
1
+ Given(/^I have stubbed stdin to contain "(.*?)"$/) do |file|
2
+ expect(STDIN).to receive(:read).and_return(File.read(file))
3
+ end
4
+
5
+ Given(/^I have stubbed stdin to contain nothing$/) do
6
+ expect(STDIN).to receive(:read).and_return(nil)
7
+ end
8
+
9
+ Then(/^nothing should be outputted to STDERR$/) do
10
+ expect(STDERR).to_not receive(:puts)
11
+ end
@@ -0,0 +1,24 @@
1
+ Given(/^I set the delimiter to "(.*?)"$/) do |delimiter|
2
+ @csv_options ||= default_csv_options
3
+ @csv_options["delimiter"] = delimiter
4
+ end
5
+
6
+ Given(/^I set quotechar to "(.*?)"$/) do |doublequote|
7
+ @csv_options ||= default_csv_options
8
+ @csv_options["quoteChar"] = doublequote
9
+ end
10
+
11
+ Given(/^I set the line endings to linefeed$/) do
12
+ @csv_options ||= default_csv_options
13
+ @csv_options["lineTerminator"] = "\n"
14
+ end
15
+
16
+ Given(/^I set the line endings to carriage return$/) do
17
+ @csv_options ||= default_csv_options
18
+ @csv_options["lineTerminator"] = "\r"
19
+ end
20
+
21
+ Given(/^I set header to "(.*?)"$/) do |boolean|
22
+ @csv_options ||= default_csv_options
23
+ @csv_options["header"] = boolean == "true"
24
+ end
@@ -0,0 +1,13 @@
1
+ Given(/^the content type is "(.*?)"$/) do |arg1|
2
+ @content_type = "text/csv"
3
+ end
4
+
5
+ Then(/^the "(.*?)" should be "(.*?)"$/) do |type, encoding|
6
+ validator = Csvlint::Validator.new( @url, default_csv_options )
7
+ expect( validator.send(type.to_sym) ).to eq( encoding )
8
+ end
9
+
10
+ Then(/^the metadata content type should be "(.*?)"$/) do |content_type|
11
+ validator = Csvlint::Validator.new( @url, default_csv_options )
12
+ expect( validator.headers['content-type'] ).to eq( content_type )
13
+ end
@@ -0,0 +1,42 @@
1
+ Given(/^I have a CSV with the following content:$/) do |string|
2
+ @csv = string.to_s
3
+ end
4
+
5
+ Given(/^it has a Link header holding "(.*?)"$/) do |link|
6
+ @link = "#{link}; type=\"application/csvm+json\""
7
+ end
8
+
9
+ Given(/^it is stored at the url "(.*?)"$/) do |url|
10
+ @url = url
11
+ content_type = @content_type || "text/csv"
12
+ charset = @encoding || "UTF-8"
13
+ headers = {"Content-Type" => "#{content_type}; charset=#{charset}"}
14
+ headers["Link"] = @link if @link
15
+ stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => headers)
16
+ stub_request(:get, URI.join(url, '/.well-known/csvm')).to_return(:status => 404)
17
+ stub_request(:get, url + '-metadata.json').to_return(:status => 404)
18
+ stub_request(:get, URI.join(url, 'csv-metadata.json')).to_return(:status => 404)
19
+ end
20
+
21
+ Given(/^it is stored at the url "(.*?)" with no character set$/) do |url|
22
+ @url = url
23
+ content_type = @content_type || "text/csv"
24
+ stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => {"Content-Type" => "#{content_type}"})
25
+ stub_request(:get, URI.join(url, '/.well-known/csvm')).to_return(:status => 404)
26
+ stub_request(:get, url + '-metadata.json').to_return(:status => 404)
27
+ stub_request(:get, URI.join(url, 'csv-metadata.json')).to_return(:status => 404)
28
+ end
29
+
30
+ When(/^I ask if the CSV is valid$/) do
31
+ @csv_options ||= default_csv_options
32
+ @validator = Csvlint::Validator.new( @url, @csv_options )
33
+ @valid = @validator.valid?
34
+ end
35
+
36
+ Then(/^I should get the value of true$/) do
37
+ expect( @valid ).to be(true)
38
+ end
39
+
40
+ Then(/^I should get the value of false$/) do
41
+ expect( @valid ).to be(false)
42
+ end
@@ -0,0 +1,33 @@
1
+ Given(/^I have a schema with the following content:$/) do |json|
2
+ @schema_type = :json_table
3
+ @schema_json = json
4
+ end
5
+
6
+ Given(/^I have metadata with the following content:$/) do |json|
7
+ @schema_type = :csvw_metadata
8
+ @schema_json = json
9
+ end
10
+
11
+ Given(/^I have a metadata file called "([^"]*)"$/) do |filename|
12
+ @schema_type = :csvw_metadata
13
+ @schema_json = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
14
+ end
15
+
16
+ Given(/^the (schema|metadata) is stored at the url "(.*?)"$/) do |schema_type,schema_url|
17
+ @schema_url = schema_url
18
+ stub_request(:get, @schema_url).to_return(:status => 200, :body => @schema_json.to_str)
19
+ end
20
+
21
+ Given(/^there is a file at "(.*?)" with the content:$/) do |url, content|
22
+ stub_request(:get, url).to_return(:status => 200, :body => content.to_str)
23
+ end
24
+
25
+ Given(/^I have a file called "(.*?)" at the url "(.*?)"$/) do |filename,url|
26
+ content = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
27
+ content_type = filename =~ /.csv$/ ? "text/csv" : "application/csvm+json"
28
+ stub_request(:get, url).to_return(:status => 200, :body => content, :headers => {"Content-Type" => "#{content_type}; charset=UTF-8"})
29
+ end
30
+
31
+ Given(/^there is no file at the url "(.*?)"$/) do |url|
32
+ stub_request(:get, url).to_return(:status => 404)
33
+ end
@@ -0,0 +1,7 @@
1
+ Given(/^it is parsed as a StringIO$/) do
2
+ @url = StringIO.new(@csv)
3
+ end
4
+
5
+ Given(/^I parse a file called "(.*?)"$/) do |filename|
6
+ @url = File.new( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
7
+ end
@@ -0,0 +1,90 @@
1
+ When(/^I ask if there are errors$/) do
2
+ @csv_options ||= default_csv_options
3
+
4
+ if @schema_json
5
+ if @schema_type == :json_table
6
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
7
+ else
8
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
9
+ end
10
+ end
11
+
12
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
13
+ @errors = @validator.errors
14
+ end
15
+
16
+ When(/^I carry out CSVW validation$/) do
17
+ @csv_options ||= default_csv_options
18
+
19
+ begin
20
+ if @schema_json
21
+ json = JSON.parse(@schema_json)
22
+ if @schema_type == :json_table
23
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", json )
24
+ else
25
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", json )
26
+ end
27
+ end
28
+
29
+ if @url.nil?
30
+ @errors = []
31
+ @warnings = []
32
+ @schema.tables.keys.each do |table_url|
33
+ validator = Csvlint::Validator.new( table_url, @csv_options, @schema )
34
+ @errors += validator.errors
35
+ @warnings += validator.warnings
36
+ end
37
+ else
38
+ validator = Csvlint::Validator.new( @url, @csv_options, @schema )
39
+ @errors = validator.errors
40
+ @warnings = validator.warnings
41
+ end
42
+ rescue JSON::ParserError => e
43
+ @errors = [e]
44
+ rescue Csvlint::Csvw::MetadataError => e
45
+ @errors = [e]
46
+ end
47
+ end
48
+
49
+ Then(/^there should be errors$/) do
50
+ # this test is only used for CSVW testing; :invalid_encoding masks lack of real errors
51
+ @errors.delete_if { |e| e.instance_of?(Csvlint::ErrorMessage) && [:invalid_encoding, :line_breaks].include?(e.type) }
52
+ expect( @errors.count ).to be > 0
53
+ end
54
+
55
+ Then(/^there should not be errors$/) do
56
+ expect( @errors.count ).to eq(0)
57
+ end
58
+
59
+ Then(/^there should be (\d+) error$/) do |count|
60
+ expect( @errors.count ).to eq( count.to_i )
61
+ end
62
+
63
+ Then(/^that error should have the type "(.*?)"$/) do |type|
64
+ expect( @errors.first.type ).to eq( type.to_sym )
65
+ end
66
+
67
+ Then(/^that error should have the row "(.*?)"$/) do |row|
68
+ expect( @errors.first.row ).to eq( row.to_i )
69
+ end
70
+
71
+ Then(/^that error should have the column "(.*?)"$/) do |column|
72
+ expect( @errors.first.column ).to eq( column.to_i )
73
+ end
74
+
75
+ Then(/^that error should have the content "(.*)"$/) do |content|
76
+ expect( @errors.first.content.chomp ).to eq( content.chomp )
77
+ end
78
+
79
+ Then(/^that error should have no content$/) do
80
+ expect( @errors.first.content ).to eq( nil )
81
+ end
82
+
83
+ Given(/^I have a CSV that doesn't exist$/) do
84
+ @url = "http//www.example.com/fake-csv.csv"
85
+ stub_request(:get, @url).to_return(:status => 404)
86
+ end
87
+
88
+ Then(/^there should be no "(.*?)" errors$/) do |type|
89
+ @errors.each do |error| error.type.should_not == type.to_sym end
90
+ end