wjordan213.csvlint 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
@@ -0,0 +1,22 @@
1
+ Given(/^I ask if there are info messages$/) do
2
+ @csv_options ||= default_csv_options
3
+
4
+ if @schema_json
5
+ if @schema_type == :json_table
6
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
7
+ else
8
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
9
+ end
10
+ end
11
+
12
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
13
+ @info_messages = @validator.info_messages
14
+ end
15
+
16
+ Then(/^there should be (\d+) info messages?$/) do |num|
17
+ expect( @info_messages.count ).to eq( num.to_i )
18
+ end
19
+
20
+ Then(/^one of the messages should have the type "(.*?)"$/) do |msg_type|
21
+ expect( @info_messages.find{|x| x.type == msg_type.to_sym} ).to be_present
22
+ end
@@ -0,0 +1,60 @@
1
+ Given(/^it is encoded as "(.*?)"$/) do |encoding|
2
+ @csv = @csv.encode(encoding)
3
+ @encoding = encoding
4
+ end
5
+
6
+ Given(/^I set an encoding header of "(.*?)"$/) do |encoding|
7
+ @encoding = encoding
8
+ end
9
+
10
+ Given(/^I do not set an encoding header$/) do
11
+ @encoding = nil
12
+ end
13
+
14
+ Given(/^I have a CSV file called "(.*?)"$/) do |filename|
15
+ @csv = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
16
+ end
17
+
18
+ When(/^I ask if there are warnings$/) do
19
+ @csv_options ||= default_csv_options
20
+ if @schema_json
21
+ if @schema_type == :json_table
22
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
23
+ else
24
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
25
+ end
26
+ end
27
+
28
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
29
+ @warnings = @validator.warnings
30
+ end
31
+
32
+ Then(/^there should be warnings$/) do
33
+ expect( @warnings.count ).to be > 0
34
+ end
35
+
36
+ Then(/^there should not be warnings$/) do
37
+ # this test is only used for CSVW testing, and :inconsistent_values warnings don't count in CSVW
38
+ @warnings.delete_if { |w| [:inconsistent_values, :check_options].include?(w.type) }
39
+ expect( @warnings.count ).to eq(0)
40
+ end
41
+
42
+ Then(/^there should be (\d+) warnings$/) do |count|
43
+ expect( @warnings.count ).to eq( count.to_i )
44
+ end
45
+
46
+ Given(/^the content type is set to "(.*?)"$/) do |type|
47
+ @content_type = type
48
+ end
49
+
50
+ Then(/^that warning should have the row "(.*?)"$/) do |row|
51
+ expect( @warnings.first.row ).to eq( row.to_i )
52
+ end
53
+
54
+ Then(/^that warning should have the column "(.*?)"$/) do |column|
55
+ expect( @warnings.first.column ).to eq( column.to_i )
56
+ end
57
+
58
+ Then(/^that warning should have the type "(.*?)"$/) do |type|
59
+ expect( @warnings.first.type ).to eq( type.to_sym )
60
+ end
@@ -0,0 +1,56 @@
1
+ require 'aruba'
2
+ require 'aruba/in_process'
3
+ require 'aruba/cucumber'
4
+
5
+ require 'csvlint/cli'
6
+
7
+ module Csvlint
8
+ class CliRunner
9
+ # Allow everything fun to be injected from the outside while defaulting to normal implementations.
10
+ def initialize(argv, stdin = STDIN, stdout = STDOUT, stderr = STDERR, kernel = Kernel)
11
+ @argv, @stdin, @stdout, @stderr, @kernel = argv, stdin, stdout, stderr, kernel
12
+ end
13
+
14
+ def execute!
15
+ exit_code = begin
16
+ # Thor accesses these streams directly rather than letting them be injected, so we replace them...
17
+ $stderr = @stderr
18
+ $stdin = @stdin
19
+ $stdout = @stdout
20
+
21
+ # Run our normal Thor app the way we know and love.
22
+ Csvlint::Cli.start(@argv.dup.unshift("validate"))
23
+
24
+ # Thor::Base#start does not have a return value, assume success if no exception is raised.
25
+ 0
26
+ rescue StandardError => e
27
+ # The ruby interpreter would pipe this to STDERR and exit 1 in the case of an unhandled exception
28
+ b = e.backtrace
29
+ @stderr.puts("#{b.shift}: #{e.message} (#{e.class})")
30
+ @stderr.puts(b.map{|s| "\tfrom #{s}"}.join("\n"))
31
+ 1
32
+ rescue SystemExit => e
33
+ e.status
34
+ ensure
35
+ # TODO: reset your app here, free up resources, etc.
36
+ # Examples:
37
+ # MyApp.logger.flush
38
+ # MyApp.logger.close
39
+ # MyApp.logger = nil
40
+ #
41
+ # MyApp.reset_singleton_instance_variables
42
+
43
+ # ...then we put the streams back.
44
+ $stderr = STDERR
45
+ $stdin = STDIN
46
+ $stdout = STDOUT
47
+ end
48
+
49
+ # Proxy our exit code back to the injected kernel.
50
+ @kernel.exit(exit_code)
51
+ end
52
+ end
53
+ end
54
+
55
+ Aruba.process = Aruba::Processes::InProcess
56
+ Aruba.process.main_class = Csvlint::CliRunner
@@ -0,0 +1,26 @@
1
+ require 'coveralls'
2
+ Coveralls.wear_merged!('test_frameworks')
3
+
4
+ $:.unshift File.join( File.dirname(__FILE__), "..", "..", "lib")
5
+
6
+ require 'rspec/expectations'
7
+ require 'cucumber/rspec/doubles'
8
+ require 'csvlint'
9
+ require 'pry'
10
+
11
+ require 'spork'
12
+
13
+ Spork.each_run do
14
+ require 'csvlint'
15
+ end
16
+
17
+ class CustomWorld
18
+ def default_csv_options
19
+ return {
20
+ }
21
+ end
22
+ end
23
+
24
+ World do
25
+ CustomWorld.new
26
+ end
@@ -0,0 +1,114 @@
1
+ require 'json'
2
+ require 'open-uri'
3
+ require 'uri'
4
+
5
+ BASE_URI = "http://w3c.github.io/csvw/tests/"
6
+ BASE_PATH = File.join(File.dirname(__FILE__), "..", "fixtures", "csvw")
7
+ FEATURE_FILE_PATH = File.join(File.dirname(__FILE__), "..", "csvw_validation_tests.feature")
8
+ SCRIPT_FILE_PATH = File.join(File.dirname(__FILE__), "..", "..", "bin", "run-csvw-tests")
9
+
10
+ Dir.mkdir(BASE_PATH) unless Dir.exist?(BASE_PATH)
11
+
12
+ def cache_file(filename)
13
+ file = File.join(BASE_PATH, filename)
14
+ uri = URI.join(BASE_URI, filename)
15
+ unless File.exist?(file)
16
+ if filename.include? "/"
17
+ levels = filename.split("/")[0..-2]
18
+ for i in 0..levels.length
19
+ dir = File.join(BASE_PATH, levels[0..i].join("/"))
20
+ Dir.mkdir(dir) unless Dir.exist?(dir)
21
+ end
22
+ end
23
+ STDERR.puts("storing #{file} locally")
24
+ File.open(file, 'wb') do |f|
25
+ f.puts open(uri, 'rb').read
26
+ end
27
+ end
28
+ return uri, file
29
+ end
30
+
31
+ File.open(SCRIPT_FILE_PATH, 'w') do |file|
32
+ File.chmod(0755, SCRIPT_FILE_PATH)
33
+ manifest = JSON.parse( open("http://w3c.github.io/csvw/tests/manifest-validation.jsonld").read )
34
+ manifest["entries"].each do |entry|
35
+ type = "valid"
36
+ case entry["type"]
37
+ when "csvt:WarningValidationTest"
38
+ type = "warnings"
39
+ when "csvt:NegativeValidationTest"
40
+ type = "errors"
41
+ end
42
+ file.puts "echo \"#{entry["id"].split("#")[-1]}: #{entry["name"].gsub("`", "'")}\""
43
+ file.puts "echo \"#{type}: #{entry["comment"].gsub("\"", "\\\"").gsub("`", "'")}\""
44
+ if entry["action"].end_with?(".json")
45
+ file.puts "csvlint --schema=features/fixtures/csvw/#{entry["action"]}"
46
+ elsif entry["option"] && entry["option"]["metadata"]
47
+ file.puts "csvlint features/fixtures/csvw/#{entry["action"]} --schema=features/fixtures/csvw/#{entry["option"]["metadata"]}"
48
+ else
49
+ file.puts "csvlint features/fixtures/csvw/#{entry["action"]}"
50
+ end
51
+ file.puts "echo"
52
+ end
53
+ end unless File.exist? SCRIPT_FILE_PATH
54
+
55
+ File.open(FEATURE_FILE_PATH, 'w') do |file|
56
+ file.puts "# Auto-generated file based on standard validation CSVW tests from http://w3c.github.io/csvw/tests/manifest-validation.jsonld"
57
+ file.puts ""
58
+
59
+ manifest = JSON.parse( open("http://w3c.github.io/csvw/tests/manifest-validation.jsonld").read )
60
+
61
+ file.puts "Feature: #{manifest["label"]}"
62
+ file.puts ""
63
+
64
+ manifest["entries"].each do |entry|
65
+ action_uri, action_file = cache_file(entry["action"])
66
+ metadata = nil
67
+ provided_files = []
68
+ missing_files = []
69
+ file.puts "\t# #{entry["id"]}"
70
+ file.puts "\t# #{entry["comment"]}"
71
+ file.puts "\tScenario: #{entry["id"]} #{entry["name"].gsub("<", "less than")}"
72
+ if entry["action"].end_with?(".json")
73
+ file.puts "\t\tGiven I have a metadata file called \"csvw/#{entry["action"]}\""
74
+ file.puts "\t\tAnd the metadata is stored at the url \"#{action_uri}\""
75
+ else
76
+ file.puts "\t\tGiven I have a CSV file called \"csvw/#{entry["action"]}\""
77
+ file.puts "\t\tAnd it has a Link header holding \"#{entry["httpLink"]}\"" if entry["httpLink"]
78
+ file.puts "\t\tAnd it is stored at the url \"#{action_uri}\""
79
+ if entry["option"] && entry["option"]["metadata"]
80
+ # no need to store the file here, as it will be listed in the 'implicit' list, which all get stored
81
+ metadata = URI.join(BASE_URI, entry["option"]["metadata"])
82
+ file.puts "\t\tAnd I have a metadata file called \"csvw/#{entry["option"]["metadata"]}\""
83
+ file.puts "\t\tAnd the metadata is stored at the url \"#{metadata}\""
84
+ end
85
+ provided_files << action_uri.to_s
86
+ missing_files = [
87
+ URI.join(action_uri, '/.well-known/csvm').to_s,
88
+ "#{action_uri}-metadata.json",
89
+ URI.join(action_uri, 'csv-metadata.json').to_s
90
+ ]
91
+ end
92
+ entry["implicit"].each do |implicit|
93
+ implicit_uri, implicit_file = cache_file(implicit)
94
+ provided_files << implicit_uri.to_s
95
+ unless implicit_uri == metadata
96
+ file.puts "\t\tAnd I have a file called \"csvw/#{implicit}\" at the url \"#{implicit_uri}\""
97
+ end
98
+ end if entry["implicit"]
99
+ missing_files.each do |uri|
100
+ file.puts "\t\tAnd there is no file at the url \"#{uri}\"" unless provided_files.include? uri
101
+ end
102
+ file.puts "\t\tWhen I carry out CSVW validation"
103
+ if entry["type"] == "csvt:WarningValidationTest"
104
+ file.puts "\t\tThen there should not be errors"
105
+ file.puts "\t\tAnd there should be warnings"
106
+ elsif entry["type"] == "csvt:NegativeValidationTest"
107
+ file.puts "\t\tThen there should be errors"
108
+ else
109
+ file.puts "\t\tThen there should not be errors"
110
+ file.puts "\t\tAnd there should not be warnings"
111
+ end
112
+ file.puts "\t"
113
+ end
114
+ end unless File.exist? FEATURE_FILE_PATH
@@ -0,0 +1 @@
1
+ require 'webmock/cucumber'
@@ -0,0 +1,147 @@
1
+ Feature: Get validation errors
2
+
3
+ Scenario: CSV with ragged rows
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "col1","col2","col3"
7
+ "1","2","3"
8
+ "4","5"
9
+ """
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if there are errors
12
+ Then there should be 1 error
13
+ And that error should have the type "ragged_rows"
14
+ And that error should have the row "3"
15
+ And that error should have the content ""4","5""
16
+
17
+ Scenario: CSV with incorrect quoting
18
+ Given I have a CSV with the following content:
19
+ """
20
+ "col1","col2","col3"
21
+ "Foo","Bar","Baz
22
+ """
23
+ And it is stored at the url "http://example.com/example1.csv"
24
+ When I ask if there are errors
25
+ Then there should be 1 error
26
+ And that error should have the type "unclosed_quote"
27
+ And that error should have the row "2"
28
+ And that error should have the content ""Foo","Bar","Baz"
29
+
30
+ Scenario: Successfully report a CSV with incorrect whitespace
31
+ Given I have a CSV with the following content:
32
+ """
33
+ "col1","col2","col3"
34
+ "Foo","Bar", "Baz"
35
+ """
36
+ And it is stored at the url "http://example.com/example1.csv"
37
+ When I ask if there are errors
38
+ Then there should be 1 error
39
+ And that error should have the type "whitespace"
40
+ And that error should have the row "2"
41
+ And that error should have the content ""Foo","Bar", "Baz""
42
+
43
+ Scenario: Successfully report a CSV with blank rows
44
+ Given I have a CSV with the following content:
45
+ """
46
+ "col1","col2","col3"
47
+ "Foo","Bar","Baz"
48
+ "","",
49
+ "Baz","Bar","Foo"
50
+ """
51
+ And it is stored at the url "http://example.com/example1.csv"
52
+ When I ask if there are errors
53
+ Then there should be 1 error
54
+ And that error should have the type "blank_rows"
55
+ And that error should have the row "3"
56
+ And that error should have the content ""","","
57
+
58
+ Scenario: Successfully report a CSV with multiple trailing empty rows
59
+ Given I have a CSV with the following content:
60
+ """
61
+ "col1","col2","col3"
62
+ "Foo","Bar","Baz"
63
+ "Foo","Bar","Baz"
64
+
65
+
66
+ """
67
+ And it is stored at the url "http://example.com/example1.csv"
68
+ When I ask if there are errors
69
+ Then there should be 1 error
70
+ And that error should have the type "blank_rows"
71
+ And that error should have the row "4"
72
+
73
+ Scenario: Successfully report a CSV with an empty row
74
+ Given I have a CSV with the following content:
75
+ """
76
+ "col1","col2","col3"
77
+ "Foo","Bar","Baz"
78
+
79
+ "Foo","Bar","Baz"
80
+ """
81
+ And it is stored at the url "http://example.com/example1.csv"
82
+ When I ask if there are errors
83
+ Then there should be 1 error
84
+ And that error should have the type "blank_rows"
85
+ And that error should have the row "3"
86
+
87
+ Scenario: Report invalid Encoding
88
+ Given I have a CSV file called "invalid-byte-sequence.csv"
89
+ And I set an encoding header of "UTF-8"
90
+ And it is stored at the url "http://example.com/example1.csv"
91
+ When I ask if there are errors
92
+ Then there should be 1 error
93
+ And that error should have the type "invalid_encoding"
94
+
95
+ Scenario: Correctly handle different encodings
96
+ Given I have a CSV file called "invalid-byte-sequence.csv"
97
+ And I set an encoding header of "ISO-8859-1"
98
+ And it is stored at the url "http://example.com/example1.csv"
99
+ When I ask if there are errors
100
+ Then there should be no "content_encoding" errors
101
+
102
+ Scenario: Report invalid file
103
+ Given I have a CSV file called "spreadsheet.xls"
104
+ And it is stored at the url "http://example.com/example1.csv"
105
+ When I ask if there are errors
106
+ Then there should be 1 error
107
+ And that error should have the type "invalid_encoding"
108
+
109
+ Scenario: Incorrect extension
110
+ Given I have a CSV with the following content:
111
+ """
112
+ "abc","2","3"
113
+ """
114
+ And the content type is set to "application/excel"
115
+ And it is stored at the url "http://example.com/example1.csv"
116
+ And I ask if there are errors
117
+ Then there should be 1 error
118
+ And that error should have the type "wrong_content_type"
119
+
120
+ Scenario: Handles urls that 404
121
+ Given I have a CSV that doesn't exist
122
+ When I ask if there are errors
123
+ Then there should be 1 error
124
+ And that error should have the type "not_found"
125
+
126
+ Scenario: Incorrect line endings specified in settings
127
+ Given I have a CSV file called "cr-line-endings.csv"
128
+ And I set the line endings to linefeed
129
+ And it is stored at the url "http://example.com/example1.csv"
130
+ And I ask if there are errors
131
+ Then there should be 1 error
132
+ And that error should have the type "line_breaks"
133
+
134
+ Scenario: inconsistent line endings in file cause an error
135
+ Given I have a CSV file called "inconsistent-line-endings.csv"
136
+ And it is stored at the url "http://example.com/example1.csv"
137
+ And I ask if there are errors
138
+ Then there should be 1 error
139
+ And that error should have the type "line_breaks"
140
+
141
+
142
+ Scenario: inconsistent line endings with unquoted fields in file cause an error
143
+ Given I have a CSV file called "inconsistent-line-endings-unquoted.csv"
144
+ And it is stored at the url "http://example.com/example1.csv"
145
+ And I ask if there are errors
146
+ Then there should be 1 error
147
+ And that error should have the type "line_breaks"
@@ -0,0 +1,16 @@
1
+ Feature: Get validation information messages
2
+
3
+ Scenario: LF line endings in file give an info message
4
+ Given I have a CSV file called "lf-line-endings.csv"
5
+ And it is stored at the url "http://example.com/example1.csv"
6
+ And I set header to "true"
7
+ And I ask if there are info messages
8
+ Then there should be 1 info messages
9
+ And one of the messages should have the type "nonrfc_line_breaks"
10
+
11
+ Scenario: CRLF line endings in file produces no info messages
12
+ Given I have a CSV file called "crlf-line-endings.csv"
13
+ And it is stored at the url "http://example.com/example1.csv"
14
+ And I set header to "true"
15
+ And I ask if there are info messages
16
+ Then there should be 0 info messages