wjordan213-csvlint 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,22 @@
1
+ Feature: Return information
2
+
3
+ Background:
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "abc","2","3"
7
+ """
8
+ And it is encoded as "utf-8"
9
+ And the content type is "text/csv"
10
+ And it is stored at the url "http://example.com/example1.csv?query=true"
11
+
12
+ Scenario: Return encoding
13
+ Then the "encoding" should be "UTF-8"
14
+
15
+ Scenario: Return content type
16
+ Then the "content_type" should be "text/csv; charset=utf-8"
17
+
18
+ Scenario: Return extension
19
+ Then the "extension" should be ".csv"
20
+
21
+ Scenario: Return meta
22
+ Then the metadata content type should be "text/csv; charset=utf-8"
@@ -0,0 +1,90 @@
1
+ Feature: Parse CSV
2
+
3
+ Scenario: Successfully parse a valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Foo","Bar","Baz"
7
+ "1","2","3"
8
+ "3","2","1"
9
+ """
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if the CSV is valid
12
+ Then I should get the value of true
13
+
14
+ Scenario: Successfully parse a CSV with newlines in quoted fields
15
+ Given I have a CSV with the following content:
16
+ """
17
+ "a","b","c"
18
+ "d","e","this is
19
+ valid"
20
+ "a","b","c"
21
+ """
22
+ And it is stored at the url "http://example.com/example1.csv"
23
+ When I ask if the CSV is valid
24
+ Then I should get the value of true
25
+
26
+ Scenario: Successfully parse a CSV with multiple newlines in quoted fields
27
+ Given I have a CSV with the following content:
28
+ """
29
+ "a","b","c"
30
+ "d","this is
31
+ valid","as is this
32
+ too"
33
+ """
34
+ And it is stored at the url "http://example.com/example1.csv"
35
+ When I ask if the CSV is valid
36
+ Then I should get the value of true
37
+
38
+ Scenario: Successfully report an invalid CSV
39
+ Given I have a CSV with the following content:
40
+ """
41
+ "Foo", "Bar" , "Baz
42
+ """
43
+ And it is stored at the url "http://example.com/example1.csv"
44
+ When I ask if the CSV is valid
45
+ Then I should get the value of false
46
+
47
+ Scenario: Successfully report a CSV with incorrect quoting
48
+ Given I have a CSV with the following content:
49
+ """
50
+ "Foo","Bar","Baz
51
+ """
52
+ And it is stored at the url "http://example.com/example1.csv"
53
+ When I ask if the CSV is valid
54
+ Then I should get the value of false
55
+
56
+ Scenario: Successfully report a CSV with incorrect whitespace
57
+ Given I have a CSV with the following content:
58
+ """
59
+ "Foo","Bar", "Baz"
60
+ """
61
+ And it is stored at the url "http://example.com/example1.csv"
62
+ When I ask if the CSV is valid
63
+ Then I should get the value of false
64
+
65
+ Scenario: Successfully report a CSV with ragged rows
66
+ Given I have a CSV with the following content:
67
+ """
68
+ "col1","col2","col2"
69
+ "1","2","3"
70
+ "4","5"
71
+ """
72
+ And it is stored at the url "http://example.com/example1.csv"
73
+ When I ask if the CSV is valid
74
+ Then I should get the value of false
75
+
76
+ Scenario: Don't class blank values as inconsistencies
77
+ Given I have a CSV with the following content:
78
+ """
79
+ "col1","col2","col3"
80
+ "1","2","3"
81
+ "4","5","6"
82
+ "","7","8"
83
+ "9","10","11"
84
+ "","12","13"
85
+ "","14","15"
86
+ "16","17","18"
87
+ """
88
+ And it is stored at the url "http://example.com/example1.csv"
89
+ When I ask if there are warnings
90
+ Then there should be 0 warnings
@@ -0,0 +1,105 @@
1
+ Feature: Schema Validation
2
+
3
+ Scenario: Valid CSV
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Bob","1234","bob@example.org"
7
+ "Alice","5","alice@example.com"
8
+ """
9
+ And it is stored at the url "http://example.com/example1.csv"
10
+ And I have a schema with the following content:
11
+ """
12
+ {
13
+ "fields": [
14
+ { "name": "Name", "constraints": { "required": true } },
15
+ { "name": "Id", "constraints": { "required": true, "minLength": 1 } },
16
+ { "name": "Email", "constraints": { "required": true } }
17
+ ]
18
+ }
19
+ """
20
+ When I ask if there are errors
21
+ Then there should be 0 error
22
+
23
+ Scenario: Schema invalid CSV
24
+ Given I have a CSV with the following content:
25
+ """
26
+ "Bob","1234","bob@example.org"
27
+ "Alice","5","alice@example.com"
28
+ """
29
+ And it is stored at the url "http://example.com/example1.csv"
30
+ And I have a schema with the following content:
31
+ """
32
+ {
33
+ "fields": [
34
+ { "name": "Name", "constraints": { "required": true } },
35
+ { "name": "Id", "constraints": { "required": true, "minLength": 3 } },
36
+ { "name": "Email", "constraints": { "required": true } }
37
+ ]
38
+ }
39
+ """
40
+ When I ask if there are errors
41
+ Then there should be 1 error
42
+
43
+ Scenario: CSV with incorrect header
44
+ Given I have a CSV with the following content:
45
+ """
46
+ "name","id","contact"
47
+ "Bob","1234","bob@example.org"
48
+ "Alice","5","alice@example.com"
49
+ """
50
+ And it is stored at the url "http://example.com/example1.csv"
51
+ And I have a schema with the following content:
52
+ """
53
+ {
54
+ "fields": [
55
+ { "name": "name", "constraints": { "required": true } },
56
+ { "name": "id", "constraints": { "required": true, "minLength": 3 } },
57
+ { "name": "email", "constraints": { "required": true } }
58
+ ]
59
+ }
60
+ """
61
+ When I ask if there are warnings
62
+ Then there should be 1 warnings
63
+
64
+ Scenario: Schema with valid regex
65
+ Given I have a CSV with the following content:
66
+ """
67
+ "firstname","id","email"
68
+ "Bob","1234","bob@example.org"
69
+ "Alice","5","alice@example.com"
70
+ """
71
+ And it is stored at the url "http://example.com/example1.csv"
72
+ And I have a schema with the following content:
73
+ """
74
+ {
75
+ "fields": [
76
+ { "name": "Name", "constraints": { "required": true, "pattern": "^[A-Za-z0-9_]*$" } },
77
+ { "name": "Id", "constraints": { "required": true, "minLength": 1 } },
78
+ { "name": "Email", "constraints": { "required": true } }
79
+ ]
80
+ }
81
+ """
82
+ When I ask if there are errors
83
+ Then there should be 0 error
84
+
85
+ Scenario: Schema with invalid regex
86
+ Given I have a CSV with the following content:
87
+ """
88
+ "firstname","id","email"
89
+ "Bob","1234","bob@example.org"
90
+ "Alice","5","alice@example.com"
91
+ """
92
+ And it is stored at the url "http://example.com/example1.csv"
93
+ And I have a schema with the following content:
94
+ """
95
+ {
96
+ "fields": [
97
+ { "name": "Name", "constraints": { "required": true, "pattern": "((" } },
98
+ { "name": "Id", "constraints": { "required": true, "minLength": 1 } },
99
+ { "name": "Email", "constraints": { "required": true } }
100
+ ]
101
+ }
102
+ """
103
+ When I ask if there are errors
104
+ Then there should be 1 error
105
+ And that error should have the type "invalid_regex"
@@ -0,0 +1,17 @@
1
+ Feature: Parse CSV from Different Sources
2
+
3
+ Scenario: Successfully parse a valid CSV from a StringIO
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "Foo","Bar","Baz"
7
+ "1","2","3"
8
+ "3","2","1"
9
+ """
10
+ And it is parsed as a StringIO
11
+ When I ask if the CSV is valid
12
+ Then I should get the value of true
13
+
14
+ Scenario: Successfully parse a valid CSV from a File
15
+ Given I parse a file called "valid.csv"
16
+ When I ask if the CSV is valid
17
+ Then I should get the value of true
@@ -0,0 +1,11 @@
1
+ Given(/^I have stubbed stdin to contain "(.*?)"$/) do |file|
2
+ expect(STDIN).to receive(:read).and_return(File.read(file))
3
+ end
4
+
5
+ Given(/^I have stubbed stdin to contain nothing$/) do
6
+ expect(STDIN).to receive(:read).and_return(nil)
7
+ end
8
+
9
+ Then(/^nothing should be outputted to STDERR$/) do
10
+ expect(STDERR).to_not receive(:puts)
11
+ end
@@ -0,0 +1,24 @@
1
+ Given(/^I set the delimiter to "(.*?)"$/) do |delimiter|
2
+ @csv_options ||= default_csv_options
3
+ @csv_options["delimiter"] = delimiter
4
+ end
5
+
6
+ Given(/^I set quotechar to "(.*?)"$/) do |doublequote|
7
+ @csv_options ||= default_csv_options
8
+ @csv_options["quoteChar"] = doublequote
9
+ end
10
+
11
+ Given(/^I set the line endings to linefeed$/) do
12
+ @csv_options ||= default_csv_options
13
+ @csv_options["lineTerminator"] = "\n"
14
+ end
15
+
16
+ Given(/^I set the line endings to carriage return$/) do
17
+ @csv_options ||= default_csv_options
18
+ @csv_options["lineTerminator"] = "\r"
19
+ end
20
+
21
+ Given(/^I set header to "(.*?)"$/) do |boolean|
22
+ @csv_options ||= default_csv_options
23
+ @csv_options["header"] = boolean == "true"
24
+ end
@@ -0,0 +1,13 @@
1
+ Given(/^the content type is "(.*?)"$/) do |arg1|
2
+ @content_type = "text/csv"
3
+ end
4
+
5
+ Then(/^the "(.*?)" should be "(.*?)"$/) do |type, encoding|
6
+ validator = Csvlint::Validator.new( @url, default_csv_options )
7
+ expect( validator.send(type.to_sym) ).to eq( encoding )
8
+ end
9
+
10
+ Then(/^the metadata content type should be "(.*?)"$/) do |content_type|
11
+ validator = Csvlint::Validator.new( @url, default_csv_options )
12
+ expect( validator.headers['content-type'] ).to eq( content_type )
13
+ end
@@ -0,0 +1,42 @@
1
+ Given(/^I have a CSV with the following content:$/) do |string|
2
+ @csv = string.to_s
3
+ end
4
+
5
+ Given(/^it has a Link header holding "(.*?)"$/) do |link|
6
+ @link = "#{link}; type=\"application/csvm+json\""
7
+ end
8
+
9
+ Given(/^it is stored at the url "(.*?)"$/) do |url|
10
+ @url = url
11
+ content_type = @content_type || "text/csv"
12
+ charset = @encoding || "UTF-8"
13
+ headers = {"Content-Type" => "#{content_type}; charset=#{charset}"}
14
+ headers["Link"] = @link if @link
15
+ stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => headers)
16
+ stub_request(:get, URI.join(url, '/.well-known/csvm')).to_return(:status => 404)
17
+ stub_request(:get, url + '-metadata.json').to_return(:status => 404)
18
+ stub_request(:get, URI.join(url, 'csv-metadata.json')).to_return(:status => 404)
19
+ end
20
+
21
+ Given(/^it is stored at the url "(.*?)" with no character set$/) do |url|
22
+ @url = url
23
+ content_type = @content_type || "text/csv"
24
+ stub_request(:get, url).to_return(:status => 200, :body => @csv, :headers => {"Content-Type" => "#{content_type}"})
25
+ stub_request(:get, URI.join(url, '/.well-known/csvm')).to_return(:status => 404)
26
+ stub_request(:get, url + '-metadata.json').to_return(:status => 404)
27
+ stub_request(:get, URI.join(url, 'csv-metadata.json')).to_return(:status => 404)
28
+ end
29
+
30
+ When(/^I ask if the CSV is valid$/) do
31
+ @csv_options ||= default_csv_options
32
+ @validator = Csvlint::Validator.new( @url, @csv_options )
33
+ @valid = @validator.valid?
34
+ end
35
+
36
+ Then(/^I should get the value of true$/) do
37
+ expect( @valid ).to be(true)
38
+ end
39
+
40
+ Then(/^I should get the value of false$/) do
41
+ expect( @valid ).to be(false)
42
+ end
@@ -0,0 +1,33 @@
1
+ Given(/^I have a schema with the following content:$/) do |json|
2
+ @schema_type = :json_table
3
+ @schema_json = json
4
+ end
5
+
6
+ Given(/^I have metadata with the following content:$/) do |json|
7
+ @schema_type = :csvw_metadata
8
+ @schema_json = json
9
+ end
10
+
11
+ Given(/^I have a metadata file called "([^"]*)"$/) do |filename|
12
+ @schema_type = :csvw_metadata
13
+ @schema_json = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
14
+ end
15
+
16
+ Given(/^the (schema|metadata) is stored at the url "(.*?)"$/) do |schema_type,schema_url|
17
+ @schema_url = schema_url
18
+ stub_request(:get, @schema_url).to_return(:status => 200, :body => @schema_json.to_str)
19
+ end
20
+
21
+ Given(/^there is a file at "(.*?)" with the content:$/) do |url, content|
22
+ stub_request(:get, url).to_return(:status => 200, :body => content.to_str)
23
+ end
24
+
25
+ Given(/^I have a file called "(.*?)" at the url "(.*?)"$/) do |filename,url|
26
+ content = File.read( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
27
+ content_type = filename =~ /.csv$/ ? "text/csv" : "application/csvm+json"
28
+ stub_request(:get, url).to_return(:status => 200, :body => content, :headers => {"Content-Type" => "#{content_type}; charset=UTF-8"})
29
+ end
30
+
31
+ Given(/^there is no file at the url "(.*?)"$/) do |url|
32
+ stub_request(:get, url).to_return(:status => 404)
33
+ end
@@ -0,0 +1,7 @@
1
+ Given(/^it is parsed as a StringIO$/) do
2
+ @url = StringIO.new(@csv)
3
+ end
4
+
5
+ Given(/^I parse a file called "(.*?)"$/) do |filename|
6
+ @url = File.new( File.join( File.dirname(__FILE__), "..", "fixtures", filename ) )
7
+ end
@@ -0,0 +1,90 @@
1
+ When(/^I ask if there are errors$/) do
2
+ @csv_options ||= default_csv_options
3
+
4
+ if @schema_json
5
+ if @schema_type == :json_table
6
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
7
+ else
8
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", JSON.parse(@schema_json) )
9
+ end
10
+ end
11
+
12
+ @validator = Csvlint::Validator.new( @url, @csv_options, @schema )
13
+ @errors = @validator.errors
14
+ end
15
+
16
+ When(/^I carry out CSVW validation$/) do
17
+ @csv_options ||= default_csv_options
18
+
19
+ begin
20
+ if @schema_json
21
+ json = JSON.parse(@schema_json)
22
+ if @schema_type == :json_table
23
+ @schema = Csvlint::Schema.from_json_table( @schema_url || "http://example.org ", json )
24
+ else
25
+ @schema = Csvlint::Schema.from_csvw_metadata( @schema_url || "http://example.org ", json )
26
+ end
27
+ end
28
+
29
+ if @url.nil?
30
+ @errors = []
31
+ @warnings = []
32
+ @schema.tables.keys.each do |table_url|
33
+ validator = Csvlint::Validator.new( table_url, @csv_options, @schema )
34
+ @errors += validator.errors
35
+ @warnings += validator.warnings
36
+ end
37
+ else
38
+ validator = Csvlint::Validator.new( @url, @csv_options, @schema )
39
+ @errors = validator.errors
40
+ @warnings = validator.warnings
41
+ end
42
+ rescue JSON::ParserError => e
43
+ @errors = [e]
44
+ rescue Csvlint::Csvw::MetadataError => e
45
+ @errors = [e]
46
+ end
47
+ end
48
+
49
+ Then(/^there should be errors$/) do
50
+ # this test is only used for CSVW testing; :invalid_encoding masks lack of real errors
51
+ @errors.delete_if { |e| e.instance_of?(Csvlint::ErrorMessage) && [:invalid_encoding, :line_breaks].include?(e.type) }
52
+ expect( @errors.count ).to be > 0
53
+ end
54
+
55
+ Then(/^there should not be errors$/) do
56
+ expect( @errors.count ).to eq(0)
57
+ end
58
+
59
+ Then(/^there should be (\d+) error$/) do |count|
60
+ expect( @errors.count ).to eq( count.to_i )
61
+ end
62
+
63
+ Then(/^that error should have the type "(.*?)"$/) do |type|
64
+ expect( @errors.first.type ).to eq( type.to_sym )
65
+ end
66
+
67
+ Then(/^that error should have the row "(.*?)"$/) do |row|
68
+ expect( @errors.first.row ).to eq( row.to_i )
69
+ end
70
+
71
+ Then(/^that error should have the column "(.*?)"$/) do |column|
72
+ expect( @errors.first.column ).to eq( column.to_i )
73
+ end
74
+
75
+ Then(/^that error should have the content "(.*)"$/) do |content|
76
+ expect( @errors.first.content.chomp ).to eq( content.chomp )
77
+ end
78
+
79
+ Then(/^that error should have no content$/) do
80
+ expect( @errors.first.content ).to eq( nil )
81
+ end
82
+
83
+ Given(/^I have a CSV that doesn't exist$/) do
84
+ @url = "http//www.example.com/fake-csv.csv"
85
+ stub_request(:get, @url).to_return(:status => 404)
86
+ end
87
+
88
+ Then(/^there should be no "(.*?)" errors$/) do |type|
89
+ @errors.each do |error| error.type.should_not == type.to_sym end
90
+ end