wjordan213-csvlint 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
@@ -0,0 +1,22 @@
1
+ Given(/^I ask if there are info messages$/) do
2
+ @csv_options ||= default_csv_options
3
+
4
+ if @schema_json
5
+ if @schema_type == :json_table
6
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
7
+ else
8
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
9
+ end
10
+ end
11
+
12
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
13
+ @info_messages = @validator.info_messages
14
+ end
15
+
16
+ Then(/^there should be (\d+) info messages?$/) do |num|
17
+ expect( @info_messages.count ).to eq( num.to_i )
18
+ end
19
+
20
+ Then(/^one of the messages should have the type "(.*?)"$/) do |msg_type|
21
+ expect( @info_messages.find{|x| x.type == msg_type.to_sym} ).to be_present
22
+ end
@@ -0,0 +1,60 @@
1
+ Given(/^it is encoded as "(.*?)"$/) do |encoding|
2
+ @csv = @csv.encode(encoding)
3
+ @encoding = encoding
4
+ end
5
+
6
+ Given(/^I set an encoding header of "(.*?)"$/) do |encoding|
7
+ @encoding = encoding
8
+ end
9
+
10
+ Given(/^I do not set an encoding header$/) do
11
+ @encoding = nil
12
+ end
13
+
14
+ Given(/^I have a CSV file called "(.*?)"$/) do |filename|
15
+ @csv = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
16
+ end
17
+
18
+ When(/^I ask if there are warnings$/) do
19
+ @csv_options ||= default_csv_options
20
+ if @schema_json
21
+ if @schema_type == :json_table
22
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
23
+ else
24
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
25
+ end
26
+ end
27
+
28
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
29
+ @warnings = @validator.warnings
30
+ end
31
+
32
+ Then(/^there should be warnings$/) do
33
+ expect( @warnings.count ).to be > 0
34
+ end
35
+
36
+ Then(/^there should not be warnings$/) do
37
+ # this test is only used for CSVW testing, and :inconsistent_values warnings don't count in CSVW
38
+ @warnings.delete_if { |w| [:inconsistent_values, :check_options].include?(w.type) }
39
+ expect( @warnings.count ).to eq(0)
40
+ end
41
+
42
+ Then(/^there should be (\d+) warnings$/) do |count|
43
+ expect( @warnings.count ).to eq( count.to_i )
44
+ end
45
+
46
+ Given(/^the content type is set to "(.*?)"$/) do |type|
47
+ @content_type = type
48
+ end
49
+
50
+ Then(/^that warning should have the row "(.*?)"$/) do |row|
51
+ expect( @warnings.first.row ).to eq( row.to_i )
52
+ end
53
+
54
+ Then(/^that warning should have the column "(.*?)"$/) do |column|
55
+ expect( @warnings.first.column ).to eq( column.to_i )
56
+ end
57
+
58
+ Then(/^that warning should have the type "(.*?)"$/) do |type|
59
+ expect( @warnings.first.type ).to eq( type.to_sym )
60
+ end
@@ -0,0 +1,56 @@
1
+ require 'aruba'
2
+ require 'aruba/in_process'
3
+ require 'aruba/cucumber'
4
+
5
+ require 'csvlint/cli'
6
+
7
+ module Csvlint
8
+ class CliRunner
9
+ # Allow everything fun to be injected from the outside while defaulting to normal implementations.
10
+ def initialize(argv, stdin = STDIN, stdout = STDOUT, stderr = STDERR, kernel = Kernel)
11
+ @argv, @stdin, @stdout, @stderr, @kernel = argv, stdin, stdout, stderr, kernel
12
+ end
13
+
14
+ def execute!
15
+ exit_code = begin
16
+ # Thor accesses these streams directly rather than letting them be injected, so we replace them...
17
+ $stderr = @stderr
18
+ $stdin = @stdin
19
+ $stdout = @stdout
20
+
21
+ # Run our normal Thor app the way we know and love.
22
+ Csvlint::Cli.start(@argv.dup.unshift("validate"))
23
+
24
+ # Thor::Base#start does not have a return value, assume success if no exception is raised.
25
+ 0
26
+ rescue StandardError => e
27
+ # The ruby interpreter would pipe this to STDERR and exit 1 in the case of an unhandled exception
28
+ b = e.backtrace
29
+ @stderr.puts("#{b.shift}: #{e.message} (#{e.class})")
30
+ @stderr.puts(b.map{|s| "\tfrom #{s}"}.join("\n"))
31
+ 1
32
+ rescue SystemExit => e
33
+ e.status
34
+ ensure
35
+ # TODO: reset your app here, free up resources, etc.
36
+ # Examples:
37
+ # MyApp.logger.flush
38
+ # MyApp.logger.close
39
+ # MyApp.logger = nil
40
+ #
41
+ # MyApp.reset_singleton_instance_variables
42
+
43
+ # ...then we put the streams back.
44
+ $stderr = STDERR
45
+ $stdin = STDIN
46
+ $stdout = STDOUT
47
+ end
48
+
49
+ # Proxy our exit code back to the injected kernel.
50
+ @kernel.exit(exit_code)
51
+ end
52
+ end
53
+ end
54
+
55
+ Aruba.process = Aruba::Processes::InProcess
56
+ Aruba.process.main_class = Csvlint::CliRunner
@@ -0,0 +1,26 @@
1
+ require 'coveralls'
2
+ Coveralls.wear_merged!('test_frameworks')
3
+
4
+ $:.unshift File.join( File.dirname(__FILE__), "..", "..", "lib")
5
+
6
+ require 'rspec/expectations'
7
+ require 'cucumber/rspec/doubles'
8
+ require 'csvlint'
9
+ require 'pry'
10
+
11
+ require 'spork'
12
+
13
+ Spork.each_run do
14
+ require 'csvlint'
15
+ end
16
+
17
+ class CustomWorld
18
+ def default_csv_options
19
+ return {
20
+ }
21
+ end
22
+ end
23
+
24
+ World do
25
+ CustomWorld.new
26
+ end
@@ -0,0 +1,114 @@
1
+ require 'json'
2
+ require 'open-uri'
3
+ require 'uri'
4
+
5
+ BASE_URI = "http://w3c.github.io/csvw/tests/"
6
+ BASE_PATH = File.join(File.dirname(__FILE__), "..", "fixtures", "csvw")
7
+ FEATURE_FILE_PATH = File.join(File.dirname(__FILE__), "..", "csvw_validation_tests.feature")
8
+ SCRIPT_FILE_PATH = File.join(File.dirname(__FILE__), "..", "..", "bin", "run-csvw-tests")
9
+
10
+ Dir.mkdir(BASE_PATH) unless Dir.exist?(BASE_PATH)
11
+
12
+ def cache_file(filename)
13
+ file = File.join(BASE_PATH, filename)
14
+ uri = URI.join(BASE_URI, filename)
15
+ unless File.exist?(file)
16
+ if filename.include? "/"
17
+ levels = filename.split("/")[0..-2]
18
+ for i in 0..levels.length
19
+ dir = File.join(BASE_PATH, levels[0..i].join("/"))
20
+ Dir.mkdir(dir) unless Dir.exist?(dir)
21
+ end
22
+ end
23
+ STDERR.puts("storing #{file} locally")
24
+ File.open(file, 'wb') do |f|
25
+ f.puts open(uri, 'rb').read
26
+ end
27
+ end
28
+ return uri, file
29
+ end
30
+
31
+ File.open(SCRIPT_FILE_PATH, 'w') do |file|
32
+ File.chmod(0755, SCRIPT_FILE_PATH)
33
+ manifest = JSON.parse( open("http://w3c.github.io/csvw/tests/manifest-validation.jsonld").read )
34
+ manifest["entries"].each do |entry|
35
+ type = "valid"
36
+ case entry["type"]
37
+ when "csvt:WarningValidationTest"
38
+ type = "warnings"
39
+ when "csvt:NegativeValidationTest"
40
+ type = "errors"
41
+ end
42
+ file.puts "echo \"#{entry["id"].split("#")[-1]}: #{entry["name"].gsub("`", "'")}\""
43
+ file.puts "echo \"#{type}: #{entry["comment"].gsub("\"", "\\\"").gsub("`", "'")}\""
44
+ if entry["action"].end_with?(".json")
45
+ file.puts "csvlint --schema=features/fixtures/csvw/#{entry["action"]}"
46
+ elsif entry["option"] && entry["option"]["metadata"]
47
+ file.puts "csvlint features/fixtures/csvw/#{entry["action"]} --schema=features/fixtures/csvw/#{entry["option"]["metadata"]}"
48
+ else
49
+ file.puts "csvlint features/fixtures/csvw/#{entry["action"]}"
50
+ end
51
+ file.puts "echo"
52
+ end
53
+ end unless File.exist? SCRIPT_FILE_PATH
54
+
55
+ File.open(FEATURE_FILE_PATH, 'w') do |file|
56
+ file.puts "# Auto-generated file based on standard validation CSVW tests from http://w3c.github.io/csvw/tests/manifest-validation.jsonld"
57
+ file.puts ""
58
+
59
+ manifest = JSON.parse( open("http://w3c.github.io/csvw/tests/manifest-validation.jsonld").read )
60
+
61
+ file.puts "Feature: #{manifest["label"]}"
62
+ file.puts ""
63
+
64
+ manifest["entries"].each do |entry|
65
+ action_uri, action_file = cache_file(entry["action"])
66
+ metadata = nil
67
+ provided_files = []
68
+ missing_files = []
69
+ file.puts "\t# #{entry["id"]}"
70
+ file.puts "\t# #{entry["comment"]}"
71
+ file.puts "\tScenario: #{entry["id"]} #{entry["name"].gsub("<", "less than")}"
72
+ if entry["action"].end_with?(".json")
73
+ file.puts "\t\tGiven I have a metadata file called \"csvw/#{entry["action"]}\""
74
+ file.puts "\t\tAnd the metadata is stored at the url \"#{action_uri}\""
75
+ else
76
+ file.puts "\t\tGiven I have a CSV file called \"csvw/#{entry["action"]}\""
77
+ file.puts "\t\tAnd it has a Link header holding \"#{entry["httpLink"]}\"" if entry["httpLink"]
78
+ file.puts "\t\tAnd it is stored at the url \"#{action_uri}\""
79
+ if entry["option"] && entry["option"]["metadata"]
80
+ # no need to store the file here, as it will be listed in the 'implicit' list, which all get stored
81
+ metadata = URI.join(BASE_URI, entry["option"]["metadata"])
82
+ file.puts "\t\tAnd I have a metadata file called \"csvw/#{entry["option"]["metadata"]}\""
83
+ file.puts "\t\tAnd the metadata is stored at the url \"#{metadata}\""
84
+ end
85
+ provided_files << action_uri.to_s
86
+ missing_files = [
87
+ URI.join(action_uri, '/.well-known/csvm').to_s,
88
+ "#{action_uri}-metadata.json",
89
+ URI.join(action_uri, 'csv-metadata.json').to_s
90
+ ]
91
+ end
92
+ entry["implicit"].each do |implicit|
93
+ implicit_uri, implicit_file = cache_file(implicit)
94
+ provided_files << implicit_uri.to_s
95
+ unless implicit_uri == metadata
96
+ file.puts "\t\tAnd I have a file called \"csvw/#{implicit}\" at the url \"#{implicit_uri}\""
97
+ end
98
+ end if entry["implicit"]
99
+ missing_files.each do |uri|
100
+ file.puts "\t\tAnd there is no file at the url \"#{uri}\"" unless provided_files.include? uri
101
+ end
102
+ file.puts "\t\tWhen I carry out CSVW validation"
103
+ if entry["type"] == "csvt:WarningValidationTest"
104
+ file.puts "\t\tThen there should not be errors"
105
+ file.puts "\t\tAnd there should be warnings"
106
+ elsif entry["type"] == "csvt:NegativeValidationTest"
107
+ file.puts "\t\tThen there should be errors"
108
+ else
109
+ file.puts "\t\tThen there should not be errors"
110
+ file.puts "\t\tAnd there should not be warnings"
111
+ end
112
+ file.puts "\t"
113
+ end
114
+ end unless File.exist? FEATURE_FILE_PATH
@@ -0,0 +1 @@
1
+ require 'webmock/cucumber'
@@ -0,0 +1,147 @@
1
+ Feature: Get validation errors
2
+
3
+ Scenario: CSV with ragged rows
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "col1","col2","col3"
7
+ "1","2","3"
8
+ "4","5"
9
+ """
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if there are errors
12
+ Then there should be 1 error
13
+ And that error should have the type "ragged_rows"
14
+ And that error should have the row "3"
15
+ And that error should have the content ""4","5""
16
+
17
+ Scenario: CSV with incorrect quoting
18
+ Given I have a CSV with the following content:
19
+ """
20
+ "col1","col2","col3"
21
+ "Foo","Bar","Baz
22
+ """
23
+ And it is stored at the url "http://example.com/example1.csv"
24
+ When I ask if there are errors
25
+ Then there should be 1 error
26
+ And that error should have the type "unclosed_quote"
27
+ And that error should have the row "2"
28
+ And that error should have the content ""Foo","Bar","Baz"
29
+
30
+ Scenario: Successfully report a CSV with incorrect whitespace
31
+ Given I have a CSV with the following content:
32
+ """
33
+ "col1","col2","col3"
34
+ "Foo","Bar", "Baz"
35
+ """
36
+ And it is stored at the url "http://example.com/example1.csv"
37
+ When I ask if there are errors
38
+ Then there should be 1 error
39
+ And that error should have the type "whitespace"
40
+ And that error should have the row "2"
41
+ And that error should have the content ""Foo","Bar", "Baz""
42
+
43
+ Scenario: Successfully report a CSV with blank rows
44
+ Given I have a CSV with the following content:
45
+ """
46
+ "col1","col2","col3"
47
+ "Foo","Bar","Baz"
48
+ "","",
49
+ "Baz","Bar","Foo"
50
+ """
51
+ And it is stored at the url "http://example.com/example1.csv"
52
+ When I ask if there are errors
53
+ Then there should be 1 error
54
+ And that error should have the type "blank_rows"
55
+ And that error should have the row "3"
56
+ And that error should have the content ""","","
57
+
58
+ Scenario: Successfully report a CSV with multiple trailing empty rows
59
+ Given I have a CSV with the following content:
60
+ """
61
+ "col1","col2","col3"
62
+ "Foo","Bar","Baz"
63
+ "Foo","Bar","Baz"
64
+
65
+
66
+ """
67
+ And it is stored at the url "http://example.com/example1.csv"
68
+ When I ask if there are errors
69
+ Then there should be 1 error
70
+ And that error should have the type "blank_rows"
71
+ And that error should have the row "4"
72
+
73
+ Scenario: Successfully report a CSV with an empty row
74
+ Given I have a CSV with the following content:
75
+ """
76
+ "col1","col2","col3"
77
+ "Foo","Bar","Baz"
78
+
79
+ "Foo","Bar","Baz"
80
+ """
81
+ And it is stored at the url "http://example.com/example1.csv"
82
+ When I ask if there are errors
83
+ Then there should be 1 error
84
+ And that error should have the type "blank_rows"
85
+ And that error should have the row "3"
86
+
87
+ Scenario: Report invalid Encoding
88
+ Given I have a CSV file called "invalid-byte-sequence.csv"
89
+ And I set an encoding header of "UTF-8"
90
+ And it is stored at the url "http://example.com/example1.csv"
91
+ When I ask if there are errors
92
+ Then there should be 1 error
93
+ And that error should have the type "invalid_encoding"
94
+
95
+ Scenario: Correctly handle different encodings
96
+ Given I have a CSV file called "invalid-byte-sequence.csv"
97
+ And I set an encoding header of "ISO-8859-1"
98
+ And it is stored at the url "http://example.com/example1.csv"
99
+ When I ask if there are errors
100
+ Then there should be no "content_encoding" errors
101
+
102
+ Scenario: Report invalid file
103
+ Given I have a CSV file called "spreadsheet.xls"
104
+ And it is stored at the url "http://example.com/example1.csv"
105
+ When I ask if there are errors
106
+ Then there should be 1 error
107
+ And that error should have the type "invalid_encoding"
108
+
109
+ Scenario: Incorrect extension
110
+ Given I have a CSV with the following content:
111
+ """
112
+ "abc","2","3"
113
+ """
114
+ And the content type is set to "application/excel"
115
+ And it is stored at the url "http://example.com/example1.csv"
116
+ And I ask if there are errors
117
+ Then there should be 1 error
118
+ And that error should have the type "wrong_content_type"
119
+
120
+ Scenario: Handles urls that 404
121
+ Given I have a CSV that doesn't exist
122
+ When I ask if there are errors
123
+ Then there should be 1 error
124
+ And that error should have the type "not_found"
125
+
126
+ Scenario: Incorrect line endings specified in settings
127
+ Given I have a CSV file called "cr-line-endings.csv"
128
+ And I set the line endings to linefeed
129
+ And it is stored at the url "http://example.com/example1.csv"
130
+ And I ask if there are errors
131
+ Then there should be 1 error
132
+ And that error should have the type "line_breaks"
133
+
134
+ Scenario: inconsistent line endings in file cause an error
135
+ Given I have a CSV file called "inconsistent-line-endings.csv"
136
+ And it is stored at the url "http://example.com/example1.csv"
137
+ And I ask if there are errors
138
+ Then there should be 1 error
139
+ And that error should have the type "line_breaks"
140
+
141
+
142
+ Scenario: inconsistent line endings with unquoted fields in file cause an error
143
+ Given I have a CSV file called "inconsistent-line-endings-unquoted.csv"
144
+ And it is stored at the url "http://example.com/example1.csv"
145
+ And I ask if there are errors
146
+ Then there should be 1 error
147
+ And that error should have the type "line_breaks"
@@ -0,0 +1,16 @@
1
+ Feature: Get validation information messages
2
+
3
+ Scenario: LF line endings in file give an info message
4
+ Given I have a CSV file called "lf-line-endings.csv"
5
+ And it is stored at the url "http://example.com/example1.csv"
6
+ And I set header to "true"
7
+ And I ask if there are info messages
8
+ Then there should be 1 info messages
9
+ And one of the messages should have the type "nonrfc_line_breaks"
10
+
11
+ Scenario: CRLF line endings in file produces no info messages
12
+ Given I have a CSV file called "crlf-line-endings.csv"
13
+ And it is stored at the url "http://example.com/example1.csv"
14
+ And I set header to "true"
15
+ And I ask if there are info messages
16
+ Then there should be 0 info messages