csvlint 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +7 -1
  3. data/CHANGELOG.md +19 -1
  4. data/README.md +93 -36
  5. data/bin/csvlint +68 -27
  6. data/csvlint.gemspec +2 -0
  7. data/features/csvw_schema_validation.feature +127 -0
  8. data/features/fixtures/spreadsheet.xlsx +0 -0
  9. data/features/sources.feature +3 -4
  10. data/features/step_definitions/parse_csv_steps.rb +13 -1
  11. data/features/step_definitions/schema_validation_steps.rb +27 -1
  12. data/features/step_definitions/sources_steps.rb +1 -1
  13. data/features/step_definitions/validation_errors_steps.rb +48 -1
  14. data/features/step_definitions/validation_info_steps.rb +5 -1
  15. data/features/step_definitions/validation_warnings_steps.rb +15 -1
  16. data/features/support/load_tests.rb +114 -0
  17. data/features/validation_errors.feature +12 -24
  18. data/features/validation_warnings.feature +18 -6
  19. data/lib/csvlint.rb +10 -0
  20. data/lib/csvlint/csvw/column.rb +359 -0
  21. data/lib/csvlint/csvw/date_format.rb +182 -0
  22. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  23. data/lib/csvlint/csvw/number_format.rb +211 -0
  24. data/lib/csvlint/csvw/property_checker.rb +761 -0
  25. data/lib/csvlint/csvw/table.rb +204 -0
  26. data/lib/csvlint/csvw/table_group.rb +165 -0
  27. data/lib/csvlint/schema.rb +40 -23
  28. data/lib/csvlint/validate.rb +142 -19
  29. data/lib/csvlint/version.rb +1 -1
  30. data/spec/csvw/column_spec.rb +112 -0
  31. data/spec/csvw/date_format_spec.rb +49 -0
  32. data/spec/csvw/number_format_spec.rb +403 -0
  33. data/spec/csvw/table_group_spec.rb +143 -0
  34. data/spec/csvw/table_spec.rb +90 -0
  35. data/spec/schema_spec.rb +27 -1
  36. data/spec/spec_helper.rb +0 -1
  37. data/spec/validator_spec.rb +16 -10
  38. metadata +53 -2
@@ -1,5 +1,5 @@
1
1
  Feature: Parse CSV from Different Sources
2
-
2
+
3
3
  Scenario: Successfully parse a valid CSV from a StringIO
4
4
  Given I have a CSV with the following content:
5
5
  """
@@ -10,9 +10,8 @@ Feature: Parse CSV from Different Sources
10
10
  And it is parsed as a StringIO
11
11
  When I ask if the CSV is valid
12
12
  Then I should get the value of true
13
-
13
+
14
14
  Scenario: Successfully parse a valid CSV from a File
15
- Given I parse a CSV file called "valid.csv"
15
+ Given I parse a file called "valid.csv"
16
16
  When I ask if the CSV is valid
17
17
  Then I should get the value of true
18
-
@@ -2,17 +2,29 @@ Given(/^I have a CSV with the following content:$/) do |string|
2
2
  @csv = string.to_s
3
3
  end
4
4
 
5
+ Given(/^it has a Link header holding "(.*?)"$/) do |link|
6
+ @link = "#{link}; type=\"application/csvm+json\""
7
+ end
8
+
5
9
  Given(/^it is stored at the url "(.*?)"$/) do |url|
6
10
  @url = url
7
11
  content_type = @content_type || "text/csv"
8
12
  charset = @encoding || "UTF-8"
9
- stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => {"Content-Type" => "#{content_type}; charset=#{charset}"})
13
+ headers = {"Content-Type" => "#{content_type}; charset=#{charset}"}
14
+ headers["Link"] = @link if @link
15
+ stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => headers)
16
+ stub_request(:get, URI.join(url, '/.well-known/csvm')).to_return(:status => 404)
17
+ stub_request(:get, url + '-metadata.json').to_return(:status => 404)
18
+ stub_request(:get, URI.join(url, 'csv-metadata.json')).to_return(:status => 404)
10
19
  end
11
20
 
12
21
  Given(/^it is stored at the url "(.*?)" with no character set$/) do |url|
13
22
  @url = url
14
23
  content_type = @content_type || "text/csv"
15
24
  stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => {"Content-Type" => "#{content_type}"})
25
+ stub_request(:get, URI.join(url, '/.well-known/csvm')).to_return(:status => 404)
26
+ stub_request(:get, url + '-metadata.json').to_return(:status => 404)
27
+ stub_request(:get, URI.join(url, 'csv-metadata.json')).to_return(:status => 404)
16
28
  end
17
29
 
18
30
  When(/^I ask if the CSV is valid$/) do
@@ -1,7 +1,33 @@
1
1
  Given(/^I have a schema with the following content:$/) do |json|
2
+ @schema_type = :json_table
2
3
  @schema_json = json
3
4
  end
4
5
 
5
- Given(/^the schema is stored at the url "(.*?)"$/) do |schema_url|
6
+ Given(/^I have metadata with the following content:$/) do |json|
7
+ @schema_type = :csvw_metadata
8
+ @schema_json = json
9
+ end
10
+
11
+ Given(/^I have a metadata file called "([^"]*)"$/) do |filename|
12
+ @schema_type = :csvw_metadata
13
+ @schema_json = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
14
+ end
15
+
16
+ Given(/^the (schema|metadata) is stored at the url "(.*?)"$/) do |schema_type,schema_url|
6
17
  @schema_url = schema_url
18
+ stub_request(:get, @schema_url).to_return(:status => 200, :body => @schema_json.to_str)
19
+ end
20
+
21
+ Given(/^there is a file at "(.*?)" with the content:$/) do |url, content|
22
+ stub_request(:get, url).to_return(:status => 200, :body => content.to_str)
23
+ end
24
+
25
+ Given(/^I have a file called "(.*?)" at the url "(.*?)"$/) do |filename,url|
26
+ content = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
27
+ content_type = filename =~ /.csv$/ ? "text/csv" : "application/csvm+json"
28
+ stub_request(:get, url).to_return(:status => 200, :body => content, :headers => {"Content-Type" => "#{content_type}; charset=UTF-8"})
29
+ end
30
+
31
+ Given(/^there is no file at the url "(.*?)"$/) do |url|
32
+ stub_request(:get, url).to_return(:status => 404)
7
33
  end
@@ -2,6 +2,6 @@ Given(/^it is parsed as a StringIO$/) do
2
2
  @url = StringIO.new(@csv)
3
3
  end
4
4
 
5
- Given(/^I parse a CSV file called "(.*?)"$/) do |filename|
5
+ Given(/^I parse a file called "(.*?)"$/) do |filename|
6
6
  @url = File.new( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
7
7
  end
@@ -2,13 +2,60 @@ When(/^I ask if there are errors$/) do
2
2
  @csv_options ||= default_csv_options
3
3
 
4
4
  if @schema_json
5
- @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
5
+ if @schema_type == :json_table
6
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
7
+ else
8
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
9
+ end
6
10
  end
7
11
 
8
12
  @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
9
13
  @errors = @validator.errors
10
14
  end
11
15
 
16
+ When(/^I carry out CSVW validation$/) do
17
+ @csv_options ||= default_csv_options
18
+
19
+ begin
20
+ if @schema_json
21
+ json = JSON.parse(@schema_json)
22
+ if @schema_type == :json_table
23
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", json )
24
+ else
25
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", json )
26
+ end
27
+ end
28
+
29
+ if @url.nil?
30
+ @errors = []
31
+ @warnings = []
32
+ @schema.tables.keys.each do |table_url|
33
+ validator = Csvlint::Validator.new( table_url, @csv_options, @schema )
34
+ @errors += validator.errors
35
+ @warnings += validator.warnings
36
+ end
37
+ else
38
+ validator = Csvlint::Validator.new( @url, @csv_options, @schema )
39
+ @errors = validator.errors
40
+ @warnings = validator.warnings
41
+ end
42
+ rescue JSON::ParserError => e
43
+ @errors = [e]
44
+ rescue Csvlint::Csvw::MetadataError => e
45
+ @errors = [e]
46
+ end
47
+ end
48
+
49
+ Then(/^there should be errors$/) do
50
+ # this test is only used for CSVW testing; :invalid_encoding masks lack of real errors
51
+ @errors.delete_if { |e| e.instance_of?(Csvlint::ErrorMessage) && [:invalid_encoding, :line_breaks].include?(e.type) }
52
+ expect( @errors.count ).to be > 0
53
+ end
54
+
55
+ Then(/^there should not be errors$/) do
56
+ expect( @errors.count ).to eq(0)
57
+ end
58
+
12
59
  Then(/^there should be (\d+) error$/) do |count|
13
60
  expect( @errors.count ).to eq( count.to_i )
14
61
  end
@@ -2,7 +2,11 @@ Given(/^I ask if there are info messages$/) do
2
2
  @csv_options ||= default_csv_options
3
3
 
4
4
  if @schema_json
5
- @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
5
+ if @schema_type == :json_table
6
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
7
+ else
8
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
9
+ end
6
10
  end
7
11
 
8
12
  @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
@@ -18,13 +18,27 @@ end
18
18
  When(/^I ask if there are warnings$/) do
19
19
  @csv_options ||= default_csv_options
20
20
  if @schema_json
21
- @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
21
+ if @schema_type == :json_table
22
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
23
+ else
24
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
25
+ end
22
26
  end
23
27
 
24
28
  @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
25
29
  @warnings = @validator.warnings
26
30
  end
27
31
 
32
+ Then(/^there should be warnings$/) do
33
+ expect( @warnings.count ).to be > 0
34
+ end
35
+
36
+ Then(/^there should not be warnings$/) do
37
+ # this test is only used for CSVW testing, and :inconsistent_values warnings don't count in CSVW
38
+ @warnings.delete_if { |w| [:inconsistent_values, :check_options].include?(w.type) }
39
+ expect( @warnings.count ).to eq(0)
40
+ end
41
+
28
42
  Then(/^there should be (\d+) warnings$/) do |count|
29
43
  expect( @warnings.count ).to eq( count.to_i )
30
44
  end
@@ -0,0 +1,114 @@
1
+ require 'json'
2
+ require 'open-uri'
3
+ require 'uri'
4
+
5
+ BASE_URI = "http://w3c.github.io/csvw/tests/"
6
+ BASE_PATH = File.join(File.dirname(__FILE__), "..", "fixtures", "csvw")
7
+ FEATURE_FILE_PATH = File.join(File.dirname(__FILE__), "..", "csvw_validation_tests.feature")
8
+ SCRIPT_FILE_PATH = File.join(File.dirname(__FILE__), "..", "..", "bin", "run-csvw-tests")
9
+
10
+ Dir.mkdir(BASE_PATH) unless Dir.exist?(BASE_PATH)
11
+
12
+ def cache_file(filename)
13
+ file = File.join(BASE_PATH, filename)
14
+ uri = URI.join(BASE_URI, filename)
15
+ unless File.exist?(file)
16
+ if filename.include? "/"
17
+ levels = filename.split("/")[0..-2]
18
+ for i in 0..levels.length
19
+ dir = File.join(BASE_PATH, levels[0..i].join("/"))
20
+ Dir.mkdir(dir) unless Dir.exist?(dir)
21
+ end
22
+ end
23
+ STDERR.puts("storing #{file} locally")
24
+ File.open(file, 'wb') do |f|
25
+ f.puts open(uri, 'rb').read
26
+ end
27
+ end
28
+ return uri, file
29
+ end
30
+
31
+ File.open(SCRIPT_FILE_PATH, 'w') do |file|
32
+ File.chmod(0755, SCRIPT_FILE_PATH)
33
+ manifest = JSON.parse( open("http://w3c.github.io/csvw/tests/manifest-validation.jsonld").read )
34
+ manifest["entries"].each do |entry|
35
+ type = "valid"
36
+ case entry["type"]
37
+ when "csvt:WarningValidationTest"
38
+ type = "warnings"
39
+ when "csvt:NegativeValidationTest"
40
+ type = "errors"
41
+ end
42
+ file.puts "echo \"#{entry["id"].split("#")[-1]}: #{entry["name"].gsub("`", "'")}\""
43
+ file.puts "echo \"#{type}: #{entry["comment"].gsub("\"", "\\\"").gsub("`", "'")}\""
44
+ if entry["action"].end_with?(".json")
45
+ file.puts "csvlint --schema=features/fixtures/csvw/#{entry["action"]}"
46
+ elsif entry["option"] && entry["option"]["metadata"]
47
+ file.puts "csvlint features/fixtures/csvw/#{entry["action"]} --schema=features/fixtures/csvw/#{entry["option"]["metadata"]}"
48
+ else
49
+ file.puts "csvlint features/fixtures/csvw/#{entry["action"]}"
50
+ end
51
+ file.puts "echo"
52
+ end
53
+ end unless File.exist? SCRIPT_FILE_PATH
54
+
55
+ File.open(FEATURE_FILE_PATH, 'w') do |file|
56
+ file.puts "# Auto-generated file based on standard validation CSVW tests from http://w3c.github.io/csvw/tests/manifest-validation.jsonld"
57
+ file.puts ""
58
+
59
+ manifest = JSON.parse( open("http://w3c.github.io/csvw/tests/manifest-validation.jsonld").read )
60
+
61
+ file.puts "Feature: #{manifest["label"]}"
62
+ file.puts ""
63
+
64
+ manifest["entries"].each do |entry|
65
+ action_uri, action_file = cache_file(entry["action"])
66
+ metadata = nil
67
+ provided_files = []
68
+ missing_files = []
69
+ file.puts "\t# #{entry["id"]}"
70
+ file.puts "\t# #{entry["comment"]}"
71
+ file.puts "\tScenario: #{entry["id"]} #{entry["name"].gsub("<", "less than")}"
72
+ if entry["action"].end_with?(".json")
73
+ file.puts "\t\tGiven I have a metadata file called \"csvw/#{entry["action"]}\""
74
+ file.puts "\t\tAnd the metadata is stored at the url \"#{action_uri}\""
75
+ else
76
+ file.puts "\t\tGiven I have a CSV file called \"csvw/#{entry["action"]}\""
77
+ file.puts "\t\tAnd it has a Link header holding \"#{entry["httpLink"]}\"" if entry["httpLink"]
78
+ file.puts "\t\tAnd it is stored at the url \"#{action_uri}\""
79
+ if entry["option"] && entry["option"]["metadata"]
80
+ # no need to store the file here, as it will be listed in the 'implicit' list, which all get stored
81
+ metadata = URI.join(BASE_URI, entry["option"]["metadata"])
82
+ file.puts "\t\tAnd I have a metadata file called \"csvw/#{entry["option"]["metadata"]}\""
83
+ file.puts "\t\tAnd the metadata is stored at the url \"#{metadata}\""
84
+ end
85
+ provided_files << action_uri.to_s
86
+ missing_files = [
87
+ URI.join(action_uri, '/.well-known/csvm').to_s,
88
+ "#{action_uri}-metadata.json",
89
+ URI.join(action_uri, 'csv-metadata.json').to_s
90
+ ]
91
+ end
92
+ entry["implicit"].each do |implicit|
93
+ implicit_uri, implicit_file = cache_file(implicit)
94
+ provided_files << implicit_uri.to_s
95
+ unless implicit_uri == metadata
96
+ file.puts "\t\tAnd I have a file called \"csvw/#{implicit}\" at the url \"#{implicit_uri}\""
97
+ end
98
+ end if entry["implicit"]
99
+ missing_files.each do |uri|
100
+ file.puts "\t\tAnd there is no file at the url \"#{uri}\"" unless provided_files.include? uri
101
+ end
102
+ file.puts "\t\tWhen I carry out CSVW validation"
103
+ if entry["type"] == "csvt:WarningValidationTest"
104
+ file.puts "\t\tThen there should not be errors"
105
+ file.puts "\t\tAnd there should be warnings"
106
+ elsif entry["type"] == "csvt:NegativeValidationTest"
107
+ file.puts "\t\tThen there should be errors"
108
+ else
109
+ file.puts "\t\tThen there should not be errors"
110
+ file.puts "\t\tAnd there should not be warnings"
111
+ end
112
+ file.puts "\t"
113
+ end
114
+ end unless File.exist? FEATURE_FILE_PATH
@@ -26,7 +26,7 @@ Feature: Get validation errors
26
26
  And that error should have the type "unclosed_quote"
27
27
  And that error should have the row "2"
28
28
  And that error should have the content ""Foo","Bar","Baz"
29
-
29
+
30
30
  Scenario: Successfully report a CSV with incorrect whitespace
31
31
  Given I have a CSV with the following content:
32
32
  """
@@ -39,7 +39,7 @@ Feature: Get validation errors
39
39
  And that error should have the type "whitespace"
40
40
  And that error should have the row "2"
41
41
  And that error should have the content ""Foo","Bar", "Baz""
42
-
42
+
43
43
  Scenario: Successfully report a CSV with blank rows
44
44
  Given I have a CSV with the following content:
45
45
  """
@@ -83,40 +83,28 @@ Feature: Get validation errors
83
83
  Then there should be 1 error
84
84
  And that error should have the type "blank_rows"
85
85
  And that error should have the row "3"
86
-
86
+
87
87
  Scenario: Report invalid Encoding
88
88
  Given I have a CSV file called "invalid-byte-sequence.csv"
89
89
  And I set an encoding header of "UTF-8"
90
90
  And it is stored at the url "http://example.com/example1.csv"
91
91
  When I ask if there are errors
92
- Then there should be 1 error
92
+ Then there should be 1 error
93
93
  And that error should have the type "invalid_encoding"
94
-
94
+
95
95
  Scenario: Correctly handle different encodings
96
96
  Given I have a CSV file called "invalid-byte-sequence.csv"
97
- And I set an encoding header of "ISO-8859-1"
97
+ And I set an encoding header of "ISO-8859-1"
98
98
  And it is stored at the url "http://example.com/example1.csv"
99
99
  When I ask if there are errors
100
- Then there should be no "content_encoding" errors
101
-
100
+ Then there should be no "content_encoding" errors
101
+
102
102
  Scenario: Report invalid file
103
-
104
103
  Given I have a CSV file called "spreadsheet.xls"
105
104
  And it is stored at the url "http://example.com/example1.csv"
106
105
  When I ask if there are errors
107
- Then there should be 1 error
108
- And that error should have the type "invalid_encoding"
109
-
110
- Scenario: Incorrect content type
111
- Given I have a CSV with the following content:
112
- """
113
- "abc","2","3"
114
- """
115
- And the content type is set to "application/excel"
116
- And it is stored at the url "http://example.com/example1.xls"
117
- And I ask if there are errors
118
106
  Then there should be 1 error
119
- And that error should have the type "wrong_content_type"
107
+ And that error should have the type "invalid_encoding"
120
108
 
121
109
  Scenario: Incorrect extension
122
110
  Given I have a CSV with the following content:
@@ -128,13 +116,13 @@ Feature: Get validation errors
128
116
  And I ask if there are errors
129
117
  Then there should be 1 error
130
118
  And that error should have the type "wrong_content_type"
131
-
119
+
132
120
  Scenario: Handles urls that 404
133
121
  Given I have a CSV that doesn't exist
134
122
  When I ask if there are errors
135
123
  Then there should be 1 error
136
124
  And that error should have the type "not_found"
137
-
125
+
138
126
  Scenario: Incorrect line endings specified in settings
139
127
  Given I have a CSV file called "cr-line-endings.csv"
140
128
  And I set the line endings to linefeed
@@ -142,7 +130,7 @@ Feature: Get validation errors
142
130
  And I ask if there are errors
143
131
  Then there should be 1 error
144
132
  And that error should have the type "line_breaks"
145
-
133
+
146
134
  Scenario: inconsistent line endings in file cause an error
147
135
  Given I have a CSV file called "inconsistent-line-endings.csv"
148
136
  And it is stored at the url "http://example.com/example1.csv"
@@ -10,18 +10,18 @@ Feature: Validation warnings
10
10
  And it is stored at the url "http://example.com/example1.csv"
11
11
  When I ask if there are warnings
12
12
  Then there should be 0 warnings
13
-
13
+
14
14
  Scenario: ISO-8859-1 Encoding
15
15
  Given I have a CSV with the following content:
16
16
  """
17
17
  "col1","col2","col3"
18
18
  "1","2","3"
19
19
  """
20
- And it is encoded as "iso-8859-1"
20
+ And it is encoded as "iso-8859-1"
21
21
  And it is stored at the url "http://example.com/example1.csv"
22
22
  When I ask if there are warnings
23
23
  Then there should be 1 warnings
24
-
24
+
25
25
  Scenario: Correct content type
26
26
  Given I have a CSV with the following content:
27
27
  """
@@ -32,7 +32,7 @@ Feature: Validation warnings
32
32
  And it is stored at the url "http://example.com/example1.csv"
33
33
  And I ask if there are warnings
34
34
  Then there should be 0 warnings
35
-
35
+
36
36
  Scenario: No extension
37
37
  Given I have a CSV with the following content:
38
38
  """
@@ -43,7 +43,7 @@ Feature: Validation warnings
43
43
  And it is stored at the url "http://example.com/example1"
44
44
  And I ask if there are warnings
45
45
  Then there should be 0 warnings
46
-
46
+
47
47
  Scenario: Allow query params after extension
48
48
  Given I have a CSV with the following content:
49
49
  """
@@ -54,7 +54,7 @@ Feature: Validation warnings
54
54
  And it is stored at the url "http://example.com/example1.csv?query=param"
55
55
  And I ask if there are warnings
56
56
  Then there should be 0 warnings
57
-
57
+
58
58
  Scenario: User doesn't supply encoding
59
59
  Given I have a CSV with the following content:
60
60
  """
@@ -72,3 +72,15 @@ Feature: Validation warnings
72
72
  And I ask if there are warnings
73
73
  Then there should be 1 warnings
74
74
  And that warning should have the type "title_row"
75
+
76
+ Scenario: catch excel warnings
77
+ Given I parse a file called "spreadsheet.xls"
78
+ And I ask if there are warnings
79
+ Then there should be 1 warnings
80
+ And that warning should have the type "excel"
81
+
82
+ Scenario: catch excel warnings
83
+ Given I parse a file called "spreadsheet.xlsx"
84
+ And I ask if there are warnings
85
+ Then there should be 1 warnings
86
+ And that warning should have the type "excel"