csvlint 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +22 -0
- data/.travis.yml +10 -0
- data/Gemfile +7 -0
- data/LICENSE.md +22 -0
- data/README.md +214 -0
- data/Rakefile +17 -0
- data/bin/create_schema +32 -0
- data/bin/csvlint +52 -0
- data/csvlint.gemspec +39 -0
- data/features/check_format.feature +46 -0
- data/features/csv_options.feature +35 -0
- data/features/fixtures/cr-line-endings.csv +1 -0
- data/features/fixtures/crlf-line-endings.csv +3 -0
- data/features/fixtures/inconsistent-line-endings.csv +2 -0
- data/features/fixtures/invalid-byte-sequence.csv +24 -0
- data/features/fixtures/lf-line-endings.csv +3 -0
- data/features/fixtures/spreadsheet.xls +0 -0
- data/features/fixtures/title-row.csv +4 -0
- data/features/fixtures/valid.csv +3 -0
- data/features/fixtures/windows-line-endings.csv +2 -0
- data/features/information.feature +22 -0
- data/features/parse_csv.feature +90 -0
- data/features/schema_validation.feature +63 -0
- data/features/sources.feature +18 -0
- data/features/step_definitions/csv_options_steps.rb +19 -0
- data/features/step_definitions/information_steps.rb +13 -0
- data/features/step_definitions/parse_csv_steps.rb +30 -0
- data/features/step_definitions/schema_validation_steps.rb +7 -0
- data/features/step_definitions/sources_steps.rb +7 -0
- data/features/step_definitions/validation_errors_steps.rb +43 -0
- data/features/step_definitions/validation_info_steps.rb +18 -0
- data/features/step_definitions/validation_warnings_steps.rb +46 -0
- data/features/support/env.rb +30 -0
- data/features/support/webmock.rb +1 -0
- data/features/validation_errors.feature +151 -0
- data/features/validation_info.feature +24 -0
- data/features/validation_warnings.feature +74 -0
- data/lib/csvlint.rb +13 -0
- data/lib/csvlint/error_collector.rb +43 -0
- data/lib/csvlint/error_message.rb +15 -0
- data/lib/csvlint/field.rb +102 -0
- data/lib/csvlint/schema.rb +69 -0
- data/lib/csvlint/types.rb +113 -0
- data/lib/csvlint/validate.rb +253 -0
- data/lib/csvlint/version.rb +3 -0
- data/lib/csvlint/wrapped_io.rb +39 -0
- data/spec/field_spec.rb +247 -0
- data/spec/schema_spec.rb +149 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/validator_spec.rb +279 -0
- metadata +367 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
$:.unshift File.join( File.dirname(__FILE__), "..", "..", "lib")
|
2
|
+
|
3
|
+
require 'simplecov'
|
4
|
+
require 'simplecov-rcov'
|
5
|
+
require 'rspec/expectations'
|
6
|
+
require 'csvlint'
|
7
|
+
require 'coveralls'
|
8
|
+
require 'pry'
|
9
|
+
|
10
|
+
Coveralls.wear_merged!
|
11
|
+
|
12
|
+
SimpleCov.formatter = SimpleCov::Formatter::RcovFormatter
|
13
|
+
SimpleCov.start
|
14
|
+
|
15
|
+
require 'spork'
|
16
|
+
|
17
|
+
Spork.each_run do
|
18
|
+
require 'csvlint'
|
19
|
+
end
|
20
|
+
|
21
|
+
class CustomWorld
|
22
|
+
def default_csv_options
|
23
|
+
return {
|
24
|
+
}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
World do
|
29
|
+
CustomWorld.new
|
30
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'webmock/cucumber'
|
@@ -0,0 +1,151 @@
|
|
1
|
+
Feature: Get validation errors
|
2
|
+
|
3
|
+
Scenario: CSV with ragged rows
|
4
|
+
Given I have a CSV with the following content:
|
5
|
+
"""
|
6
|
+
"col1","col2","col3"
|
7
|
+
"1","2","3"
|
8
|
+
"4","5"
|
9
|
+
"""
|
10
|
+
And it is stored at the url "http://example.com/example1.csv"
|
11
|
+
When I ask if there are errors
|
12
|
+
Then there should be 1 error
|
13
|
+
And that error should have the type "ragged_rows"
|
14
|
+
And that error should have the row "3"
|
15
|
+
And that error should have the content ""4","5""
|
16
|
+
|
17
|
+
Scenario: CSV with incorrect quoting
|
18
|
+
Given I have a CSV with the following content:
|
19
|
+
"""
|
20
|
+
"col1","col2","col3"
|
21
|
+
"Foo","Bar","Baz
|
22
|
+
"""
|
23
|
+
And it is stored at the url "http://example.com/example1.csv"
|
24
|
+
When I ask if there are errors
|
25
|
+
Then there should be 1 error
|
26
|
+
And that error should have the type "unclosed_quote"
|
27
|
+
And that error should have the row "2"
|
28
|
+
And that error should have the content ""Foo","Bar","Baz"
|
29
|
+
|
30
|
+
Scenario: Successfully report a CSV with incorrect whitespace
|
31
|
+
Given I have a CSV with the following content:
|
32
|
+
"""
|
33
|
+
"col1","col2","col3"
|
34
|
+
"Foo","Bar", "Baz"
|
35
|
+
"""
|
36
|
+
And it is stored at the url "http://example.com/example1.csv"
|
37
|
+
When I ask if there are errors
|
38
|
+
Then there should be 1 error
|
39
|
+
And that error should have the type "whitespace"
|
40
|
+
And that error should have the row "2"
|
41
|
+
And that error should have the content ""Foo","Bar", "Baz""
|
42
|
+
|
43
|
+
Scenario: Successfully report a CSV with blank rows
|
44
|
+
Given I have a CSV with the following content:
|
45
|
+
"""
|
46
|
+
"col1","col2","col3"
|
47
|
+
"Foo","Bar","Baz"
|
48
|
+
"","",
|
49
|
+
"Baz","Bar","Foo"
|
50
|
+
"""
|
51
|
+
And it is stored at the url "http://example.com/example1.csv"
|
52
|
+
When I ask if there are errors
|
53
|
+
Then there should be 1 error
|
54
|
+
And that error should have the type "blank_rows"
|
55
|
+
And that error should have the row "3"
|
56
|
+
And that error should have the content ""","","
|
57
|
+
|
58
|
+
Scenario: Successfully report a CSV with multiple trailing empty rows
|
59
|
+
Given I have a CSV with the following content:
|
60
|
+
"""
|
61
|
+
"col1","col2","col3"
|
62
|
+
"Foo","Bar","Baz"
|
63
|
+
"Foo","Bar","Baz"
|
64
|
+
|
65
|
+
|
66
|
+
"""
|
67
|
+
And it is stored at the url "http://example.com/example1.csv"
|
68
|
+
When I ask if there are errors
|
69
|
+
Then there should be 1 error
|
70
|
+
And that error should have the type "blank_rows"
|
71
|
+
And that error should have the row "4"
|
72
|
+
|
73
|
+
Scenario: Successfully report a CSV with an empty row
|
74
|
+
Given I have a CSV with the following content:
|
75
|
+
"""
|
76
|
+
"col1","col2","col3"
|
77
|
+
"Foo","Bar","Baz"
|
78
|
+
|
79
|
+
"Foo","Bar","Baz"
|
80
|
+
"""
|
81
|
+
And it is stored at the url "http://example.com/example1.csv"
|
82
|
+
When I ask if there are errors
|
83
|
+
Then there should be 1 error
|
84
|
+
And that error should have the type "blank_rows"
|
85
|
+
And that error should have the row "3"
|
86
|
+
|
87
|
+
Scenario: Report invalid Encoding
|
88
|
+
Given I have a CSV file called "invalid-byte-sequence.csv"
|
89
|
+
And I set an encoding header of "UTF-8"
|
90
|
+
And it is stored at the url "http://example.com/example1.csv"
|
91
|
+
When I ask if there are errors
|
92
|
+
Then there should be 1 error
|
93
|
+
And that error should have the type "invalid_encoding"
|
94
|
+
|
95
|
+
Scenario: Correctly handle different encodings
|
96
|
+
Given I have a CSV file called "invalid-byte-sequence.csv"
|
97
|
+
And I set an encoding header of "ISO-8859-1"
|
98
|
+
And it is stored at the url "http://example.com/example1.csv"
|
99
|
+
When I ask if there are errors
|
100
|
+
Then there should be no "content_encoding" errors
|
101
|
+
|
102
|
+
Scenario: Report invalid file
|
103
|
+
|
104
|
+
Given I have a CSV file called "spreadsheet.xls"
|
105
|
+
And it is stored at the url "http://example.com/example1.csv"
|
106
|
+
When I ask if there are errors
|
107
|
+
Then there should be 1 error
|
108
|
+
And that error should have the type "invalid_encoding"
|
109
|
+
|
110
|
+
Scenario: Incorrect content type
|
111
|
+
Given I have a CSV with the following content:
|
112
|
+
"""
|
113
|
+
"abc","2","3"
|
114
|
+
"""
|
115
|
+
And the content type is set to "application/excel"
|
116
|
+
And it is stored at the url "http://example.com/example1.xls"
|
117
|
+
And I ask if there are errors
|
118
|
+
Then there should be 1 error
|
119
|
+
And that error should have the type "wrong_content_type"
|
120
|
+
|
121
|
+
Scenario: Incorrect extension
|
122
|
+
Given I have a CSV with the following content:
|
123
|
+
"""
|
124
|
+
"abc","2","3"
|
125
|
+
"""
|
126
|
+
And the content type is set to "application/excel"
|
127
|
+
And it is stored at the url "http://example.com/example1.csv"
|
128
|
+
And I ask if there are errors
|
129
|
+
Then there should be 1 error
|
130
|
+
And that error should have the type "wrong_content_type"
|
131
|
+
|
132
|
+
Scenario: Handles urls that 404
|
133
|
+
Given I have a CSV that doesn't exist
|
134
|
+
When I ask if there are errors
|
135
|
+
Then there should be 1 error
|
136
|
+
And that error should have the type "not_found"
|
137
|
+
|
138
|
+
Scenario: Incorrect line endings specified in settings
|
139
|
+
Given I have a CSV file called "cr-line-endings.csv"
|
140
|
+
And I set the line endings to linefeed
|
141
|
+
And it is stored at the url "http://example.com/example1.csv"
|
142
|
+
And I ask if there are errors
|
143
|
+
Then there should be 1 error
|
144
|
+
And that error should have the type "line_breaks"
|
145
|
+
|
146
|
+
Scenario: inconsistent line endings in file cause an error
|
147
|
+
Given I have a CSV file called "inconsistent-line-endings.csv"
|
148
|
+
And it is stored at the url "http://example.com/example1.csv"
|
149
|
+
And I ask if there are errors
|
150
|
+
Then there should be 1 error
|
151
|
+
And that error should have the type "line_breaks"
|
@@ -0,0 +1,24 @@
|
|
1
|
+
Feature: Get validation information messages
|
2
|
+
|
3
|
+
Scenario: LF line endings in file give an info message
|
4
|
+
Given I have a CSV file called "lf-line-endings.csv"
|
5
|
+
And it is stored at the url "http://example.com/example1.csv"
|
6
|
+
And I set header to "true"
|
7
|
+
And I ask if there are info messages
|
8
|
+
Then there should be 1 info message
|
9
|
+
And that message should have the type "nonrfc_line_breaks"
|
10
|
+
|
11
|
+
Scenario: CR line endings in file give an info message
|
12
|
+
Given I have a CSV file called "cr-line-endings.csv"
|
13
|
+
And it is stored at the url "http://example.com/example1.csv"
|
14
|
+
And I set header to "true"
|
15
|
+
And I ask if there are info messages
|
16
|
+
Then there should be 1 info message
|
17
|
+
And that message should have the type "nonrfc_line_breaks"
|
18
|
+
|
19
|
+
Scenario: CRLF line endings in file produces no info messages
|
20
|
+
Given I have a CSV file called "crlf-line-endings.csv"
|
21
|
+
And it is stored at the url "http://example.com/example1.csv"
|
22
|
+
And I set header to "true"
|
23
|
+
And I ask if there are info messages
|
24
|
+
Then there should be 0 info messages
|
@@ -0,0 +1,74 @@
|
|
1
|
+
Feature: Validation warnings
|
2
|
+
|
3
|
+
Scenario: UTF-8 Encoding
|
4
|
+
Given I have a CSV with the following content:
|
5
|
+
"""
|
6
|
+
"col1","col2","col3"
|
7
|
+
"abc","2","3"
|
8
|
+
"""
|
9
|
+
And it is encoded as "utf-8"
|
10
|
+
And it is stored at the url "http://example.com/example1.csv"
|
11
|
+
When I ask if there are warnings
|
12
|
+
Then there should be 0 warnings
|
13
|
+
|
14
|
+
Scenario: ISO-8859-1 Encoding
|
15
|
+
Given I have a CSV with the following content:
|
16
|
+
"""
|
17
|
+
"col1","col2","col3"
|
18
|
+
"1","2","3"
|
19
|
+
"""
|
20
|
+
And it is encoded as "iso-8859-1"
|
21
|
+
And it is stored at the url "http://example.com/example1.csv"
|
22
|
+
When I ask if there are warnings
|
23
|
+
Then there should be 1 warnings
|
24
|
+
|
25
|
+
Scenario: Correct content type
|
26
|
+
Given I have a CSV with the following content:
|
27
|
+
"""
|
28
|
+
"col1","col2","col3"
|
29
|
+
"abc","2","3"
|
30
|
+
"""
|
31
|
+
And the content type is set to "text/csv"
|
32
|
+
And it is stored at the url "http://example.com/example1.csv"
|
33
|
+
And I ask if there are warnings
|
34
|
+
Then there should be 0 warnings
|
35
|
+
|
36
|
+
Scenario: No extension
|
37
|
+
Given I have a CSV with the following content:
|
38
|
+
"""
|
39
|
+
"col1","col2","col3"
|
40
|
+
"abc","2","3"
|
41
|
+
"""
|
42
|
+
And the content type is set to "text/csv"
|
43
|
+
And it is stored at the url "http://example.com/example1"
|
44
|
+
And I ask if there are warnings
|
45
|
+
Then there should be 0 warnings
|
46
|
+
|
47
|
+
Scenario: Allow query params after extension
|
48
|
+
Given I have a CSV with the following content:
|
49
|
+
"""
|
50
|
+
"col1","col2","col3"
|
51
|
+
"abc","2","3"
|
52
|
+
"""
|
53
|
+
And the content type is set to "text/csv"
|
54
|
+
And it is stored at the url "http://example.com/example1.csv?query=param"
|
55
|
+
And I ask if there are warnings
|
56
|
+
Then there should be 0 warnings
|
57
|
+
|
58
|
+
Scenario: User doesn't supply encoding
|
59
|
+
Given I have a CSV with the following content:
|
60
|
+
"""
|
61
|
+
"col1","col2","col3"
|
62
|
+
"abc","2","3"
|
63
|
+
"""
|
64
|
+
And it is stored at the url "http://example.com/example1.csv" with no character set
|
65
|
+
When I ask if there are warnings
|
66
|
+
Then there should be 1 warnings
|
67
|
+
And that warning should have the type "no_encoding"
|
68
|
+
|
69
|
+
Scenario: Title rows
|
70
|
+
Given I have a CSV file called "title-row.csv"
|
71
|
+
And it is stored at the url "http://example.com/example1.csv"
|
72
|
+
And I ask if there are warnings
|
73
|
+
Then there should be 1 warnings
|
74
|
+
And that warning should have the type "title_row"
|
data/lib/csvlint.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require "csvlint/version"
|
2
|
+
require 'csv'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'mime/types'
|
5
|
+
require 'tempfile'
|
6
|
+
|
7
|
+
require 'csvlint/types'
|
8
|
+
require 'csvlint/error_message'
|
9
|
+
require 'csvlint/error_collector'
|
10
|
+
require 'csvlint/validate'
|
11
|
+
require 'csvlint/wrapped_io'
|
12
|
+
require 'csvlint/field'
|
13
|
+
require 'csvlint/schema'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Csvlint
|
2
|
+
|
3
|
+
module ErrorCollector
|
4
|
+
|
5
|
+
def build_message(type, category, row, column, content, constraints)
|
6
|
+
Csvlint::ErrorMessage.new({
|
7
|
+
:type => type,
|
8
|
+
:category => category,
|
9
|
+
:row => row,
|
10
|
+
:column => column,
|
11
|
+
:content => content,
|
12
|
+
:constraints => constraints
|
13
|
+
})
|
14
|
+
end
|
15
|
+
|
16
|
+
MESSAGE_LEVELS = [
|
17
|
+
:errors,
|
18
|
+
:warnings,
|
19
|
+
:info_messages
|
20
|
+
]
|
21
|
+
|
22
|
+
MESSAGE_LEVELS.each do |level|
|
23
|
+
|
24
|
+
attr_reader level
|
25
|
+
|
26
|
+
define_method "build_#{level}" do |type, category = nil, row = nil, column = nil, content = nil, constraints = {}|
|
27
|
+
instance_variable_get("@#{level}") << build_message(type, category, row, column, content, constraints)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def valid?
|
33
|
+
errors.empty?
|
34
|
+
end
|
35
|
+
|
36
|
+
def reset
|
37
|
+
MESSAGE_LEVELS.each do |level|
|
38
|
+
instance_variable_set("@#{level}", [])
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Csvlint
|
2
|
+
|
3
|
+
class ErrorMessage
|
4
|
+
|
5
|
+
attr_reader :type, :category, :row, :column, :content, :constraints
|
6
|
+
|
7
|
+
def initialize(params)
|
8
|
+
params.each do |key, value|
|
9
|
+
self.instance_variable_set("@#{key}".to_sym, value)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Csvlint
|
2
|
+
|
3
|
+
class Field
|
4
|
+
include Csvlint::ErrorCollector
|
5
|
+
include Csvlint::Types
|
6
|
+
|
7
|
+
attr_reader :name, :constraints, :title, :description
|
8
|
+
|
9
|
+
def initialize(name, constraints={}, title=nil, description=nil)
|
10
|
+
@name = name
|
11
|
+
@constraints = constraints || {}
|
12
|
+
@uniques = Set.new
|
13
|
+
@title = title
|
14
|
+
@description = description
|
15
|
+
reset
|
16
|
+
end
|
17
|
+
|
18
|
+
def validate_column(value, row=nil, column=nil)
|
19
|
+
reset
|
20
|
+
validate_length(value, row, column)
|
21
|
+
validate_values(value, row, column)
|
22
|
+
parsed = validate_type(value, row, column)
|
23
|
+
validate_range(parsed, row, column) if parsed != nil
|
24
|
+
return valid?
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
def validate_length(value, row, column)
|
29
|
+
if constraints["required"] == true
|
30
|
+
build_errors(:missing_value, :schema, row, column, value,
|
31
|
+
{ "required" => true }) if value.nil? || value.length == 0
|
32
|
+
end
|
33
|
+
if constraints["minLength"]
|
34
|
+
build_errors(:min_length, :schema, row, column, value,
|
35
|
+
{ "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
|
36
|
+
end
|
37
|
+
if constraints["maxLength"]
|
38
|
+
build_errors(:max_length, :schema, row, column, value,
|
39
|
+
{ "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def validate_values(value, row, column)
|
44
|
+
if constraints["pattern"]
|
45
|
+
build_errors(:pattern, :schema, row, column, value,
|
46
|
+
{ "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
|
47
|
+
end
|
48
|
+
if constraints["unique"] == true
|
49
|
+
if @uniques.include? value
|
50
|
+
build_errors(:unique, :schema, row, column, value, { "unique" => true })
|
51
|
+
else
|
52
|
+
@uniques << value
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def validate_type(value, row, column)
|
58
|
+
if constraints["type"] && value != ""
|
59
|
+
parsed = convert_to_type(value)
|
60
|
+
if parsed == nil
|
61
|
+
failed = { "type" => constraints["type"] }
|
62
|
+
failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
|
63
|
+
build_errors(:invalid_type, :schema, row, column, value, failed)
|
64
|
+
return nil
|
65
|
+
end
|
66
|
+
return parsed
|
67
|
+
end
|
68
|
+
return nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def validate_range(value, row, column)
|
72
|
+
#TODO: we're ignoring issues with converting ranges to actual types, maybe we
|
73
|
+
#should generate a warning? The schema is invalid
|
74
|
+
if constraints["minimum"]
|
75
|
+
minimumValue = convert_to_type( constraints["minimum"] )
|
76
|
+
if minimumValue
|
77
|
+
build_errors(:below_minimum, :schema, row, column, value,
|
78
|
+
{ "minimum" => constraints["minimum"] }) unless value >= minimumValue
|
79
|
+
end
|
80
|
+
end
|
81
|
+
if constraints["maximum"]
|
82
|
+
maximumValue = convert_to_type( constraints["maximum"] )
|
83
|
+
if maximumValue
|
84
|
+
build_errors(:above_maximum, :schema, row, column, value,
|
85
|
+
{ "maximum" => constraints["maximum"] }) unless value <= maximumValue
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def convert_to_type(value)
|
91
|
+
parsed = nil
|
92
|
+
tv = TYPE_VALIDATIONS[constraints["type"]]
|
93
|
+
if tv
|
94
|
+
begin
|
95
|
+
parsed = tv.call value, constraints
|
96
|
+
rescue ArgumentError
|
97
|
+
end
|
98
|
+
end
|
99
|
+
return parsed
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|