csvlint 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +22 -0
- data/.travis.yml +10 -0
- data/Gemfile +7 -0
- data/LICENSE.md +22 -0
- data/README.md +214 -0
- data/Rakefile +17 -0
- data/bin/create_schema +32 -0
- data/bin/csvlint +52 -0
- data/csvlint.gemspec +39 -0
- data/features/check_format.feature +46 -0
- data/features/csv_options.feature +35 -0
- data/features/fixtures/cr-line-endings.csv +1 -0
- data/features/fixtures/crlf-line-endings.csv +3 -0
- data/features/fixtures/inconsistent-line-endings.csv +2 -0
- data/features/fixtures/invalid-byte-sequence.csv +24 -0
- data/features/fixtures/lf-line-endings.csv +3 -0
- data/features/fixtures/spreadsheet.xls +0 -0
- data/features/fixtures/title-row.csv +4 -0
- data/features/fixtures/valid.csv +3 -0
- data/features/fixtures/windows-line-endings.csv +2 -0
- data/features/information.feature +22 -0
- data/features/parse_csv.feature +90 -0
- data/features/schema_validation.feature +63 -0
- data/features/sources.feature +18 -0
- data/features/step_definitions/csv_options_steps.rb +19 -0
- data/features/step_definitions/information_steps.rb +13 -0
- data/features/step_definitions/parse_csv_steps.rb +30 -0
- data/features/step_definitions/schema_validation_steps.rb +7 -0
- data/features/step_definitions/sources_steps.rb +7 -0
- data/features/step_definitions/validation_errors_steps.rb +43 -0
- data/features/step_definitions/validation_info_steps.rb +18 -0
- data/features/step_definitions/validation_warnings_steps.rb +46 -0
- data/features/support/env.rb +30 -0
- data/features/support/webmock.rb +1 -0
- data/features/validation_errors.feature +151 -0
- data/features/validation_info.feature +24 -0
- data/features/validation_warnings.feature +74 -0
- data/lib/csvlint.rb +13 -0
- data/lib/csvlint/error_collector.rb +43 -0
- data/lib/csvlint/error_message.rb +15 -0
- data/lib/csvlint/field.rb +102 -0
- data/lib/csvlint/schema.rb +69 -0
- data/lib/csvlint/types.rb +113 -0
- data/lib/csvlint/validate.rb +253 -0
- data/lib/csvlint/version.rb +3 -0
- data/lib/csvlint/wrapped_io.rb +39 -0
- data/spec/field_spec.rb +247 -0
- data/spec/schema_spec.rb +149 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/validator_spec.rb +279 -0
- metadata +367 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
$:.unshift File.join( File.dirname(__FILE__), "..", "..", "lib")
|
2
|
+
|
3
|
+
require 'simplecov'
|
4
|
+
require 'simplecov-rcov'
|
5
|
+
require 'rspec/expectations'
|
6
|
+
require 'csvlint'
|
7
|
+
require 'coveralls'
|
8
|
+
require 'pry'
|
9
|
+
|
10
|
+
Coveralls.wear_merged!
|
11
|
+
|
12
|
+
SimpleCov.formatter = SimpleCov::Formatter::RcovFormatter
|
13
|
+
SimpleCov.start
|
14
|
+
|
15
|
+
require 'spork'
|
16
|
+
|
17
|
+
Spork.each_run do
|
18
|
+
require 'csvlint'
|
19
|
+
end
|
20
|
+
|
21
|
+
class CustomWorld
|
22
|
+
def default_csv_options
|
23
|
+
return {
|
24
|
+
}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
World do
|
29
|
+
CustomWorld.new
|
30
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'webmock/cucumber'
|
@@ -0,0 +1,151 @@
|
|
1
|
+
Feature: Get validation errors
|
2
|
+
|
3
|
+
Scenario: CSV with ragged rows
|
4
|
+
Given I have a CSV with the following content:
|
5
|
+
"""
|
6
|
+
"col1","col2","col3"
|
7
|
+
"1","2","3"
|
8
|
+
"4","5"
|
9
|
+
"""
|
10
|
+
And it is stored at the url "http://example.com/example1.csv"
|
11
|
+
When I ask if there are errors
|
12
|
+
Then there should be 1 error
|
13
|
+
And that error should have the type "ragged_rows"
|
14
|
+
And that error should have the row "3"
|
15
|
+
And that error should have the content ""4","5""
|
16
|
+
|
17
|
+
Scenario: CSV with incorrect quoting
|
18
|
+
Given I have a CSV with the following content:
|
19
|
+
"""
|
20
|
+
"col1","col2","col3"
|
21
|
+
"Foo","Bar","Baz
|
22
|
+
"""
|
23
|
+
And it is stored at the url "http://example.com/example1.csv"
|
24
|
+
When I ask if there are errors
|
25
|
+
Then there should be 1 error
|
26
|
+
And that error should have the type "unclosed_quote"
|
27
|
+
And that error should have the row "2"
|
28
|
+
And that error should have the content ""Foo","Bar","Baz"
|
29
|
+
|
30
|
+
Scenario: Successfully report a CSV with incorrect whitespace
|
31
|
+
Given I have a CSV with the following content:
|
32
|
+
"""
|
33
|
+
"col1","col2","col3"
|
34
|
+
"Foo","Bar", "Baz"
|
35
|
+
"""
|
36
|
+
And it is stored at the url "http://example.com/example1.csv"
|
37
|
+
When I ask if there are errors
|
38
|
+
Then there should be 1 error
|
39
|
+
And that error should have the type "whitespace"
|
40
|
+
And that error should have the row "2"
|
41
|
+
And that error should have the content ""Foo","Bar", "Baz""
|
42
|
+
|
43
|
+
Scenario: Successfully report a CSV with blank rows
|
44
|
+
Given I have a CSV with the following content:
|
45
|
+
"""
|
46
|
+
"col1","col2","col3"
|
47
|
+
"Foo","Bar","Baz"
|
48
|
+
"","",
|
49
|
+
"Baz","Bar","Foo"
|
50
|
+
"""
|
51
|
+
And it is stored at the url "http://example.com/example1.csv"
|
52
|
+
When I ask if there are errors
|
53
|
+
Then there should be 1 error
|
54
|
+
And that error should have the type "blank_rows"
|
55
|
+
And that error should have the row "3"
|
56
|
+
And that error should have the content ""","","
|
57
|
+
|
58
|
+
Scenario: Successfully report a CSV with multiple trailing empty rows
|
59
|
+
Given I have a CSV with the following content:
|
60
|
+
"""
|
61
|
+
"col1","col2","col3"
|
62
|
+
"Foo","Bar","Baz"
|
63
|
+
"Foo","Bar","Baz"
|
64
|
+
|
65
|
+
|
66
|
+
"""
|
67
|
+
And it is stored at the url "http://example.com/example1.csv"
|
68
|
+
When I ask if there are errors
|
69
|
+
Then there should be 1 error
|
70
|
+
And that error should have the type "blank_rows"
|
71
|
+
And that error should have the row "4"
|
72
|
+
|
73
|
+
Scenario: Successfully report a CSV with an empty row
|
74
|
+
Given I have a CSV with the following content:
|
75
|
+
"""
|
76
|
+
"col1","col2","col3"
|
77
|
+
"Foo","Bar","Baz"
|
78
|
+
|
79
|
+
"Foo","Bar","Baz"
|
80
|
+
"""
|
81
|
+
And it is stored at the url "http://example.com/example1.csv"
|
82
|
+
When I ask if there are errors
|
83
|
+
Then there should be 1 error
|
84
|
+
And that error should have the type "blank_rows"
|
85
|
+
And that error should have the row "3"
|
86
|
+
|
87
|
+
Scenario: Report invalid Encoding
|
88
|
+
Given I have a CSV file called "invalid-byte-sequence.csv"
|
89
|
+
And I set an encoding header of "UTF-8"
|
90
|
+
And it is stored at the url "http://example.com/example1.csv"
|
91
|
+
When I ask if there are errors
|
92
|
+
Then there should be 1 error
|
93
|
+
And that error should have the type "invalid_encoding"
|
94
|
+
|
95
|
+
Scenario: Correctly handle different encodings
|
96
|
+
Given I have a CSV file called "invalid-byte-sequence.csv"
|
97
|
+
And I set an encoding header of "ISO-8859-1"
|
98
|
+
And it is stored at the url "http://example.com/example1.csv"
|
99
|
+
When I ask if there are errors
|
100
|
+
Then there should be no "content_encoding" errors
|
101
|
+
|
102
|
+
Scenario: Report invalid file
|
103
|
+
|
104
|
+
Given I have a CSV file called "spreadsheet.xls"
|
105
|
+
And it is stored at the url "http://example.com/example1.csv"
|
106
|
+
When I ask if there are errors
|
107
|
+
Then there should be 1 error
|
108
|
+
And that error should have the type "invalid_encoding"
|
109
|
+
|
110
|
+
Scenario: Incorrect content type
|
111
|
+
Given I have a CSV with the following content:
|
112
|
+
"""
|
113
|
+
"abc","2","3"
|
114
|
+
"""
|
115
|
+
And the content type is set to "application/excel"
|
116
|
+
And it is stored at the url "http://example.com/example1.xls"
|
117
|
+
And I ask if there are errors
|
118
|
+
Then there should be 1 error
|
119
|
+
And that error should have the type "wrong_content_type"
|
120
|
+
|
121
|
+
Scenario: Incorrect extension
|
122
|
+
Given I have a CSV with the following content:
|
123
|
+
"""
|
124
|
+
"abc","2","3"
|
125
|
+
"""
|
126
|
+
And the content type is set to "application/excel"
|
127
|
+
And it is stored at the url "http://example.com/example1.csv"
|
128
|
+
And I ask if there are errors
|
129
|
+
Then there should be 1 error
|
130
|
+
And that error should have the type "wrong_content_type"
|
131
|
+
|
132
|
+
Scenario: Handles urls that 404
|
133
|
+
Given I have a CSV that doesn't exist
|
134
|
+
When I ask if there are errors
|
135
|
+
Then there should be 1 error
|
136
|
+
And that error should have the type "not_found"
|
137
|
+
|
138
|
+
Scenario: Incorrect line endings specified in settings
|
139
|
+
Given I have a CSV file called "cr-line-endings.csv"
|
140
|
+
And I set the line endings to linefeed
|
141
|
+
And it is stored at the url "http://example.com/example1.csv"
|
142
|
+
And I ask if there are errors
|
143
|
+
Then there should be 1 error
|
144
|
+
And that error should have the type "line_breaks"
|
145
|
+
|
146
|
+
Scenario: inconsistent line endings in file cause an error
|
147
|
+
Given I have a CSV file called "inconsistent-line-endings.csv"
|
148
|
+
And it is stored at the url "http://example.com/example1.csv"
|
149
|
+
And I ask if there are errors
|
150
|
+
Then there should be 1 error
|
151
|
+
And that error should have the type "line_breaks"
|
@@ -0,0 +1,24 @@
|
|
1
|
+
Feature: Get validation information messages
|
2
|
+
|
3
|
+
Scenario: LF line endings in file give an info message
|
4
|
+
Given I have a CSV file called "lf-line-endings.csv"
|
5
|
+
And it is stored at the url "http://example.com/example1.csv"
|
6
|
+
And I set header to "true"
|
7
|
+
And I ask if there are info messages
|
8
|
+
Then there should be 1 info message
|
9
|
+
And that message should have the type "nonrfc_line_breaks"
|
10
|
+
|
11
|
+
Scenario: CR line endings in file give an info message
|
12
|
+
Given I have a CSV file called "cr-line-endings.csv"
|
13
|
+
And it is stored at the url "http://example.com/example1.csv"
|
14
|
+
And I set header to "true"
|
15
|
+
And I ask if there are info messages
|
16
|
+
Then there should be 1 info message
|
17
|
+
And that message should have the type "nonrfc_line_breaks"
|
18
|
+
|
19
|
+
Scenario: CRLF line endings in file produces no info messages
|
20
|
+
Given I have a CSV file called "crlf-line-endings.csv"
|
21
|
+
And it is stored at the url "http://example.com/example1.csv"
|
22
|
+
And I set header to "true"
|
23
|
+
And I ask if there are info messages
|
24
|
+
Then there should be 0 info messages
|
@@ -0,0 +1,74 @@
|
|
1
|
+
Feature: Validation warnings
|
2
|
+
|
3
|
+
Scenario: UTF-8 Encoding
|
4
|
+
Given I have a CSV with the following content:
|
5
|
+
"""
|
6
|
+
"col1","col2","col3"
|
7
|
+
"abc","2","3"
|
8
|
+
"""
|
9
|
+
And it is encoded as "utf-8"
|
10
|
+
And it is stored at the url "http://example.com/example1.csv"
|
11
|
+
When I ask if there are warnings
|
12
|
+
Then there should be 0 warnings
|
13
|
+
|
14
|
+
Scenario: ISO-8859-1 Encoding
|
15
|
+
Given I have a CSV with the following content:
|
16
|
+
"""
|
17
|
+
"col1","col2","col3"
|
18
|
+
"1","2","3"
|
19
|
+
"""
|
20
|
+
And it is encoded as "iso-8859-1"
|
21
|
+
And it is stored at the url "http://example.com/example1.csv"
|
22
|
+
When I ask if there are warnings
|
23
|
+
Then there should be 1 warnings
|
24
|
+
|
25
|
+
Scenario: Correct content type
|
26
|
+
Given I have a CSV with the following content:
|
27
|
+
"""
|
28
|
+
"col1","col2","col3"
|
29
|
+
"abc","2","3"
|
30
|
+
"""
|
31
|
+
And the content type is set to "text/csv"
|
32
|
+
And it is stored at the url "http://example.com/example1.csv"
|
33
|
+
And I ask if there are warnings
|
34
|
+
Then there should be 0 warnings
|
35
|
+
|
36
|
+
Scenario: No extension
|
37
|
+
Given I have a CSV with the following content:
|
38
|
+
"""
|
39
|
+
"col1","col2","col3"
|
40
|
+
"abc","2","3"
|
41
|
+
"""
|
42
|
+
And the content type is set to "text/csv"
|
43
|
+
And it is stored at the url "http://example.com/example1"
|
44
|
+
And I ask if there are warnings
|
45
|
+
Then there should be 0 warnings
|
46
|
+
|
47
|
+
Scenario: Allow query params after extension
|
48
|
+
Given I have a CSV with the following content:
|
49
|
+
"""
|
50
|
+
"col1","col2","col3"
|
51
|
+
"abc","2","3"
|
52
|
+
"""
|
53
|
+
And the content type is set to "text/csv"
|
54
|
+
And it is stored at the url "http://example.com/example1.csv?query=param"
|
55
|
+
And I ask if there are warnings
|
56
|
+
Then there should be 0 warnings
|
57
|
+
|
58
|
+
Scenario: User doesn't supply encoding
|
59
|
+
Given I have a CSV with the following content:
|
60
|
+
"""
|
61
|
+
"col1","col2","col3"
|
62
|
+
"abc","2","3"
|
63
|
+
"""
|
64
|
+
And it is stored at the url "http://example.com/example1.csv" with no character set
|
65
|
+
When I ask if there are warnings
|
66
|
+
Then there should be 1 warnings
|
67
|
+
And that warning should have the type "no_encoding"
|
68
|
+
|
69
|
+
Scenario: Title rows
|
70
|
+
Given I have a CSV file called "title-row.csv"
|
71
|
+
And it is stored at the url "http://example.com/example1.csv"
|
72
|
+
And I ask if there are warnings
|
73
|
+
Then there should be 1 warnings
|
74
|
+
And that warning should have the type "title_row"
|
data/lib/csvlint.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require "csvlint/version"
|
2
|
+
require 'csv'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'mime/types'
|
5
|
+
require 'tempfile'
|
6
|
+
|
7
|
+
require 'csvlint/types'
|
8
|
+
require 'csvlint/error_message'
|
9
|
+
require 'csvlint/error_collector'
|
10
|
+
require 'csvlint/validate'
|
11
|
+
require 'csvlint/wrapped_io'
|
12
|
+
require 'csvlint/field'
|
13
|
+
require 'csvlint/schema'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Csvlint
|
2
|
+
|
3
|
+
module ErrorCollector
|
4
|
+
|
5
|
+
def build_message(type, category, row, column, content, constraints)
|
6
|
+
Csvlint::ErrorMessage.new({
|
7
|
+
:type => type,
|
8
|
+
:category => category,
|
9
|
+
:row => row,
|
10
|
+
:column => column,
|
11
|
+
:content => content,
|
12
|
+
:constraints => constraints
|
13
|
+
})
|
14
|
+
end
|
15
|
+
|
16
|
+
MESSAGE_LEVELS = [
|
17
|
+
:errors,
|
18
|
+
:warnings,
|
19
|
+
:info_messages
|
20
|
+
]
|
21
|
+
|
22
|
+
MESSAGE_LEVELS.each do |level|
|
23
|
+
|
24
|
+
attr_reader level
|
25
|
+
|
26
|
+
define_method "build_#{level}" do |type, category = nil, row = nil, column = nil, content = nil, constraints = {}|
|
27
|
+
instance_variable_get("@#{level}") << build_message(type, category, row, column, content, constraints)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def valid?
|
33
|
+
errors.empty?
|
34
|
+
end
|
35
|
+
|
36
|
+
def reset
|
37
|
+
MESSAGE_LEVELS.each do |level|
|
38
|
+
instance_variable_set("@#{level}", [])
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Csvlint
|
2
|
+
|
3
|
+
class ErrorMessage
|
4
|
+
|
5
|
+
attr_reader :type, :category, :row, :column, :content, :constraints
|
6
|
+
|
7
|
+
def initialize(params)
|
8
|
+
params.each do |key, value|
|
9
|
+
self.instance_variable_set("@#{key}".to_sym, value)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Csvlint
|
2
|
+
|
3
|
+
class Field
|
4
|
+
include Csvlint::ErrorCollector
|
5
|
+
include Csvlint::Types
|
6
|
+
|
7
|
+
attr_reader :name, :constraints, :title, :description
|
8
|
+
|
9
|
+
def initialize(name, constraints={}, title=nil, description=nil)
|
10
|
+
@name = name
|
11
|
+
@constraints = constraints || {}
|
12
|
+
@uniques = Set.new
|
13
|
+
@title = title
|
14
|
+
@description = description
|
15
|
+
reset
|
16
|
+
end
|
17
|
+
|
18
|
+
def validate_column(value, row=nil, column=nil)
|
19
|
+
reset
|
20
|
+
validate_length(value, row, column)
|
21
|
+
validate_values(value, row, column)
|
22
|
+
parsed = validate_type(value, row, column)
|
23
|
+
validate_range(parsed, row, column) if parsed != nil
|
24
|
+
return valid?
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
def validate_length(value, row, column)
|
29
|
+
if constraints["required"] == true
|
30
|
+
build_errors(:missing_value, :schema, row, column, value,
|
31
|
+
{ "required" => true }) if value.nil? || value.length == 0
|
32
|
+
end
|
33
|
+
if constraints["minLength"]
|
34
|
+
build_errors(:min_length, :schema, row, column, value,
|
35
|
+
{ "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
|
36
|
+
end
|
37
|
+
if constraints["maxLength"]
|
38
|
+
build_errors(:max_length, :schema, row, column, value,
|
39
|
+
{ "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def validate_values(value, row, column)
|
44
|
+
if constraints["pattern"]
|
45
|
+
build_errors(:pattern, :schema, row, column, value,
|
46
|
+
{ "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
|
47
|
+
end
|
48
|
+
if constraints["unique"] == true
|
49
|
+
if @uniques.include? value
|
50
|
+
build_errors(:unique, :schema, row, column, value, { "unique" => true })
|
51
|
+
else
|
52
|
+
@uniques << value
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def validate_type(value, row, column)
|
58
|
+
if constraints["type"] && value != ""
|
59
|
+
parsed = convert_to_type(value)
|
60
|
+
if parsed == nil
|
61
|
+
failed = { "type" => constraints["type"] }
|
62
|
+
failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
|
63
|
+
build_errors(:invalid_type, :schema, row, column, value, failed)
|
64
|
+
return nil
|
65
|
+
end
|
66
|
+
return parsed
|
67
|
+
end
|
68
|
+
return nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def validate_range(value, row, column)
|
72
|
+
#TODO: we're ignoring issues with converting ranges to actual types, maybe we
|
73
|
+
#should generate a warning? The schema is invalid
|
74
|
+
if constraints["minimum"]
|
75
|
+
minimumValue = convert_to_type( constraints["minimum"] )
|
76
|
+
if minimumValue
|
77
|
+
build_errors(:below_minimum, :schema, row, column, value,
|
78
|
+
{ "minimum" => constraints["minimum"] }) unless value >= minimumValue
|
79
|
+
end
|
80
|
+
end
|
81
|
+
if constraints["maximum"]
|
82
|
+
maximumValue = convert_to_type( constraints["maximum"] )
|
83
|
+
if maximumValue
|
84
|
+
build_errors(:above_maximum, :schema, row, column, value,
|
85
|
+
{ "maximum" => constraints["maximum"] }) unless value <= maximumValue
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def convert_to_type(value)
|
91
|
+
parsed = nil
|
92
|
+
tv = TYPE_VALIDATIONS[constraints["type"]]
|
93
|
+
if tv
|
94
|
+
begin
|
95
|
+
parsed = tv.call value, constraints
|
96
|
+
rescue ArgumentError
|
97
|
+
end
|
98
|
+
end
|
99
|
+
return parsed
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|