csvlint 0.4.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/push.yml +35 -0
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/.standard_todo.yml +43 -0
- data/CHANGELOG.md +38 -0
- data/Dockerfile +16 -0
- data/Gemfile +2 -2
- data/README.md +13 -10
- data/Rakefile +7 -7
- data/bin/create_schema +2 -2
- data/csvlint.gemspec +19 -22
- data/docker_notes_for_windows.txt +20 -0
- data/features/step_definitions/cli_steps.rb +11 -11
- data/features/step_definitions/information_steps.rb +4 -4
- data/features/step_definitions/parse_csv_steps.rb +11 -11
- data/features/step_definitions/schema_validation_steps.rb +10 -10
- data/features/step_definitions/sources_steps.rb +1 -1
- data/features/step_definitions/validation_errors_steps.rb +19 -19
- data/features/step_definitions/validation_info_steps.rb +9 -9
- data/features/step_definitions/validation_warnings_steps.rb +11 -11
- data/features/support/aruba.rb +10 -9
- data/features/support/earl_formatter.rb +39 -39
- data/features/support/env.rb +10 -11
- data/features/support/load_tests.rb +109 -105
- data/features/support/webmock.rb +3 -1
- data/lib/csvlint/cli.rb +136 -142
- data/lib/csvlint/csvw/column.rb +279 -280
- data/lib/csvlint/csvw/date_format.rb +90 -92
- data/lib/csvlint/csvw/metadata_error.rb +1 -3
- data/lib/csvlint/csvw/number_format.rb +40 -32
- data/lib/csvlint/csvw/property_checker.rb +714 -717
- data/lib/csvlint/csvw/table.rb +49 -52
- data/lib/csvlint/csvw/table_group.rb +24 -23
- data/lib/csvlint/error_collector.rb +2 -0
- data/lib/csvlint/error_message.rb +0 -1
- data/lib/csvlint/field.rb +153 -141
- data/lib/csvlint/schema.rb +35 -43
- data/lib/csvlint/validate.rb +173 -151
- data/lib/csvlint/version.rb +1 -1
- data/lib/csvlint.rb +22 -23
- data/spec/csvw/column_spec.rb +15 -16
- data/spec/csvw/date_format_spec.rb +5 -7
- data/spec/csvw/number_format_spec.rb +2 -4
- data/spec/csvw/table_group_spec.rb +103 -105
- data/spec/csvw/table_spec.rb +71 -73
- data/spec/field_spec.rb +116 -121
- data/spec/schema_spec.rb +131 -141
- data/spec/spec_helper.rb +6 -6
- data/spec/validator_spec.rb +167 -203
- metadata +41 -85
- data/.travis.yml +0 -37
data/lib/csvlint/field.rb
CHANGED
@@ -1,196 +1,208 @@
|
|
1
1
|
module Csvlint
|
2
|
-
|
3
2
|
class Field
|
4
3
|
include Csvlint::ErrorCollector
|
5
4
|
|
6
5
|
attr_reader :name, :constraints, :title, :description
|
7
6
|
|
8
|
-
def initialize(name, constraints={}, title=nil, description=nil)
|
7
|
+
def initialize(name, constraints = {}, title = nil, description = nil)
|
9
8
|
@name = name
|
10
9
|
@constraints = constraints || {}
|
11
10
|
@uniques = Set.new
|
12
11
|
@title = title
|
13
12
|
@description = description
|
13
|
+
@regex = nil
|
14
14
|
reset
|
15
15
|
end
|
16
16
|
|
17
|
-
def validate_column(value, row=nil, column=nil, all_errors=[])
|
17
|
+
def validate_column(value, row = nil, column = nil, all_errors = [])
|
18
18
|
reset
|
19
|
-
unless all_errors.any?{|error| ((error.type == :invalid_regex) && (error.column == column))}
|
19
|
+
unless all_errors.any? { |error| ((error.type == :invalid_regex) && (error.column == column)) }
|
20
20
|
validate_regex(value, row, column, all_errors)
|
21
21
|
end
|
22
22
|
validate_length(value, row, column)
|
23
23
|
validate_values(value, row, column)
|
24
24
|
parsed = validate_type(value, row, column)
|
25
|
-
validate_range(parsed, row, column) if parsed
|
26
|
-
|
25
|
+
validate_range(parsed, row, column) if !parsed.nil?
|
26
|
+
valid?
|
27
27
|
end
|
28
28
|
|
29
29
|
private
|
30
|
-
|
31
|
-
|
30
|
+
|
31
|
+
def validate_length(value, row, column)
|
32
|
+
if constraints["required"] == true
|
33
|
+
if value.nil? || value.length == 0
|
32
34
|
build_errors(:missing_value, :schema, row, column, value,
|
33
|
-
{
|
35
|
+
{"required" => true})
|
34
36
|
end
|
35
|
-
|
37
|
+
end
|
38
|
+
if constraints["minLength"]
|
39
|
+
if value.nil? || value.length < constraints["minLength"]
|
36
40
|
build_errors(:min_length, :schema, row, column, value,
|
37
|
-
{
|
41
|
+
{"minLength" => constraints["minLength"]})
|
38
42
|
end
|
39
|
-
|
40
|
-
|
41
|
-
|
43
|
+
end
|
44
|
+
if constraints["maxLength"]
|
45
|
+
if !value.nil? && value.length > constraints["maxLength"]
|
46
|
+
build_errors(:max_length, :schema, row, column, value,
|
47
|
+
{"maxLength" => constraints["maxLength"]})
|
42
48
|
end
|
43
49
|
end
|
50
|
+
end
|
44
51
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
52
|
+
def validate_regex(value, row, column, all_errors)
|
53
|
+
pattern = constraints["pattern"]
|
54
|
+
if pattern
|
55
|
+
begin
|
56
|
+
if !value.nil? && !value.match(@regex ||= Regexp.new(pattern))
|
50
57
|
build_errors(:pattern, :schema, row, column, value,
|
51
|
-
|
52
|
-
rescue RegexpError
|
53
|
-
build_regex_error(value, row, column, pattern, all_errors)
|
58
|
+
{"pattern" => constraints["pattern"]})
|
54
59
|
end
|
60
|
+
rescue RegexpError
|
61
|
+
build_regex_error(value, row, column, pattern, all_errors)
|
55
62
|
end
|
56
63
|
end
|
64
|
+
end
|
57
65
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
66
|
+
def build_regex_error(value, row, column, pattern, all_errors)
|
67
|
+
return if @regex_error_exists
|
68
|
+
build_errors(:invalid_regex, :schema, nil, column, "#{name}: Constraints: Pattern: #{pattern}",
|
69
|
+
{"pattern" => constraints["pattern"]})
|
70
|
+
@regex_error_exists = true
|
71
|
+
end
|
64
72
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
end
|
73
|
+
def validate_values(value, row, column)
|
74
|
+
# If a pattern exists, raise an invalid regex error if it is not in
|
75
|
+
# valid regex form, else, if the value of the relevant field in the csv
|
76
|
+
# does not match the given regex pattern in the schema, raise a
|
77
|
+
# pattern error.
|
78
|
+
if constraints["unique"] == true
|
79
|
+
if @uniques.include? value
|
80
|
+
build_errors(:unique, :schema, row, column, value, {"unique" => true})
|
81
|
+
else
|
82
|
+
@uniques << value
|
76
83
|
end
|
77
84
|
end
|
85
|
+
end
|
78
86
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
end
|
88
|
-
return parsed
|
87
|
+
def validate_type(value, row, column)
|
88
|
+
if constraints["type"] && value != ""
|
89
|
+
parsed = convert_to_type(value)
|
90
|
+
if parsed.nil?
|
91
|
+
failed = {"type" => constraints["type"]}
|
92
|
+
failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
|
93
|
+
build_errors(:invalid_type, :schema, row, column, value, failed)
|
94
|
+
return nil
|
89
95
|
end
|
90
|
-
return
|
96
|
+
return parsed
|
91
97
|
end
|
98
|
+
nil
|
99
|
+
end
|
92
100
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
101
|
+
def validate_range(value, row, column)
|
102
|
+
# TODO: we're ignoring issues with converting ranges to actual types, maybe we
|
103
|
+
# should generate a warning? The schema is invalid
|
104
|
+
if constraints["minimum"]
|
105
|
+
minimumValue = convert_to_type(constraints["minimum"])
|
106
|
+
if minimumValue
|
107
|
+
unless value >= minimumValue
|
99
108
|
build_errors(:below_minimum, :schema, row, column, value,
|
100
|
-
{
|
109
|
+
{"minimum" => constraints["minimum"]})
|
101
110
|
end
|
102
111
|
end
|
103
|
-
|
104
|
-
|
105
|
-
|
112
|
+
end
|
113
|
+
if constraints["maximum"]
|
114
|
+
maximumValue = convert_to_type(constraints["maximum"])
|
115
|
+
if maximumValue
|
116
|
+
unless value <= maximumValue
|
106
117
|
build_errors(:above_maximum, :schema, row, column, value,
|
107
|
-
|
118
|
+
{"maximum" => constraints["maximum"]})
|
108
119
|
end
|
109
120
|
end
|
110
121
|
end
|
122
|
+
end
|
111
123
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
end
|
124
|
+
def convert_to_type(value)
|
125
|
+
parsed = nil
|
126
|
+
tv = TYPE_VALIDATIONS[constraints["type"]]
|
127
|
+
if tv
|
128
|
+
begin
|
129
|
+
parsed = tv.call value, constraints
|
130
|
+
rescue ArgumentError
|
120
131
|
end
|
121
|
-
return parsed
|
122
132
|
end
|
133
|
+
parsed
|
134
|
+
end
|
123
135
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
136
|
+
TYPE_VALIDATIONS = {
|
137
|
+
"http://www.w3.org/2001/XMLSchema#string" => lambda { |value, constraints| value },
|
138
|
+
"http://www.w3.org/2001/XMLSchema#int" => lambda { |value, constraints| Integer value },
|
139
|
+
"http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, constraints| Integer value },
|
140
|
+
"http://www.w3.org/2001/XMLSchema#float" => lambda { |value, constraints| Float value },
|
141
|
+
"http://www.w3.org/2001/XMLSchema#double" => lambda { |value, constraints| Float value },
|
142
|
+
"http://www.w3.org/2001/XMLSchema#anyURI" => lambda do |value, constraints|
|
143
|
+
begin
|
144
|
+
u = URI.parse value
|
145
|
+
raise ArgumentError unless u.is_a?(URI::HTTP) || u.is_a?(URI::HTTPS)
|
146
|
+
rescue URI::InvalidURIError
|
147
|
+
raise ArgumentError
|
148
|
+
end
|
149
|
+
u
|
150
|
+
end,
|
151
|
+
"http://www.w3.org/2001/XMLSchema#boolean" => lambda do |value, constraints|
|
152
|
+
return true if ["true", "1"].include? value
|
153
|
+
return false if ["false", "0"].include? value
|
154
|
+
raise ArgumentError
|
155
|
+
end,
|
156
|
+
"http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda do |value, constraints|
|
157
|
+
i = Integer value
|
158
|
+
raise ArgumentError unless i <= 0
|
159
|
+
i
|
160
|
+
end,
|
161
|
+
"http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda do |value, constraints|
|
162
|
+
i = Integer value
|
163
|
+
raise ArgumentError unless i < 0
|
164
|
+
i
|
165
|
+
end,
|
166
|
+
"http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda do |value, constraints|
|
167
|
+
i = Integer value
|
168
|
+
raise ArgumentError unless i >= 0
|
169
|
+
i
|
170
|
+
end,
|
171
|
+
"http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda do |value, constraints|
|
172
|
+
i = Integer value
|
173
|
+
raise ArgumentError unless i > 0
|
174
|
+
i
|
175
|
+
end,
|
176
|
+
"http://www.w3.org/2001/XMLSchema#dateTime" => lambda do |value, constraints|
|
177
|
+
date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
|
178
|
+
d = DateTime.strptime(value, date_pattern)
|
179
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
180
|
+
d
|
181
|
+
end,
|
182
|
+
"http://www.w3.org/2001/XMLSchema#date" => lambda do |value, constraints|
|
183
|
+
date_pattern = constraints["datePattern"] || "%Y-%m-%d"
|
184
|
+
d = Date.strptime(value, date_pattern)
|
185
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
186
|
+
d
|
187
|
+
end,
|
188
|
+
"http://www.w3.org/2001/XMLSchema#time" => lambda do |value, constraints|
|
189
|
+
date_pattern = constraints["datePattern"] || "%H:%M:%S"
|
190
|
+
d = DateTime.strptime(value, date_pattern)
|
191
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
192
|
+
d
|
193
|
+
end,
|
194
|
+
"http://www.w3.org/2001/XMLSchema#gYear" => lambda do |value, constraints|
|
195
|
+
date_pattern = constraints["datePattern"] || "%Y"
|
196
|
+
d = Date.strptime(value, date_pattern)
|
197
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
198
|
+
d
|
199
|
+
end,
|
200
|
+
"http://www.w3.org/2001/XMLSchema#gYearMonth" => lambda do |value, constraints|
|
201
|
+
date_pattern = constraints["datePattern"] || "%Y-%m"
|
202
|
+
d = Date.strptime(value, date_pattern)
|
203
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
204
|
+
d
|
205
|
+
end
|
206
|
+
}
|
195
207
|
end
|
196
208
|
end
|
data/lib/csvlint/schema.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
module Csvlint
|
2
|
-
|
3
2
|
class Schema
|
4
|
-
|
5
3
|
include Csvlint::ErrorCollector
|
6
4
|
|
7
5
|
attr_reader :uri, :fields, :title, :description
|
8
6
|
|
9
|
-
def initialize(uri, fields=[], title=nil, description=nil)
|
7
|
+
def initialize(uri, fields = [], title = nil, description = nil)
|
10
8
|
@uri = uri
|
11
9
|
@fields = fields
|
12
10
|
@title = title
|
@@ -15,20 +13,19 @@ module Csvlint
|
|
15
13
|
end
|
16
14
|
|
17
15
|
class << self
|
18
|
-
|
19
16
|
extend Gem::Deprecate
|
20
17
|
|
21
18
|
def from_json_table(uri, json)
|
22
19
|
fields = []
|
23
|
-
json["fields"]
|
24
|
-
fields << Csvlint::Field.new(
|
25
|
-
field_desc["title"], field_desc["description"]
|
26
|
-
end
|
27
|
-
|
20
|
+
json["fields"]&.each do |field_desc|
|
21
|
+
fields << Csvlint::Field.new(field_desc["name"], field_desc["constraints"],
|
22
|
+
field_desc["title"], field_desc["description"])
|
23
|
+
end
|
24
|
+
Schema.new(uri, fields, json["title"], json["description"])
|
28
25
|
end
|
29
26
|
|
30
27
|
def from_csvw_metadata(uri, json)
|
31
|
-
|
28
|
+
Csvlint::Csvw::TableGroup.from_json(uri, json)
|
32
29
|
end
|
33
30
|
|
34
31
|
# Deprecated method signature
|
@@ -38,70 +35,65 @@ module Csvlint
|
|
38
35
|
deprecate :load_from_json, :load_from_uri, 2018, 1
|
39
36
|
|
40
37
|
def load_from_uri(uri, output_errors = true)
|
41
|
-
load_from_string(uri, open(uri).read, output_errors)
|
38
|
+
load_from_string(uri, URI.open(uri).read, output_errors)
|
42
39
|
rescue OpenURI::HTTPError, Errno::ENOENT => e
|
43
40
|
raise e
|
44
41
|
end
|
45
42
|
|
46
43
|
def load_from_string(uri, string, output_errors = true)
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
return Schema.from_json_table(uri,json)
|
54
|
-
end
|
55
|
-
rescue TypeError => e
|
56
|
-
# NO IDEA what this was even trying to do - SP 20160526
|
57
|
-
|
58
|
-
rescue Csvlint::Csvw::MetadataError => e
|
59
|
-
raise e
|
60
|
-
rescue => e
|
61
|
-
if output_errors === true
|
62
|
-
STDERR.puts e.class
|
63
|
-
STDERR.puts e.message
|
64
|
-
STDERR.puts e.backtrace
|
65
|
-
end
|
66
|
-
return Schema.new(nil, [], "malformed", "malformed")
|
44
|
+
json = JSON.parse(string)
|
45
|
+
if json["@context"]
|
46
|
+
uri = "file:#{File.expand_path(uri)}" unless /^http(s)?/.match?(uri.to_s)
|
47
|
+
Schema.from_csvw_metadata(uri, json)
|
48
|
+
else
|
49
|
+
Schema.from_json_table(uri, json)
|
67
50
|
end
|
51
|
+
rescue TypeError => e
|
52
|
+
# NO IDEA what this was even trying to do - SP 20160526
|
53
|
+
rescue Csvlint::Csvw::MetadataError => e
|
54
|
+
raise e
|
55
|
+
rescue => e
|
56
|
+
if output_errors === true
|
57
|
+
warn e.class
|
58
|
+
warn e.message
|
59
|
+
warn e.backtrace
|
60
|
+
end
|
61
|
+
Schema.new(nil, [], "malformed", "malformed")
|
68
62
|
end
|
69
|
-
|
70
63
|
end
|
71
64
|
|
72
|
-
def validate_header(header, source_url=nil, validate=true)
|
65
|
+
def validate_header(header, source_url = nil, validate = true)
|
73
66
|
reset
|
74
67
|
|
75
|
-
found_header = header.to_csv(:
|
76
|
-
expected_header = @fields.map{ |f| f.name }.to_csv(:
|
68
|
+
found_header = header.to_csv(row_sep: "")
|
69
|
+
expected_header = @fields.map { |f| f.name }.to_csv(row_sep: "")
|
77
70
|
if found_header != expected_header
|
78
71
|
build_warnings(:malformed_header, :schema, 1, nil, found_header, "expectedHeader" => expected_header)
|
79
72
|
end
|
80
|
-
|
73
|
+
valid?
|
81
74
|
end
|
82
75
|
|
83
|
-
def validate_row(values, row=nil, all_errors=[], source_url=nil, validate=true)
|
76
|
+
def validate_row(values, row = nil, all_errors = [], source_url = nil, validate = true)
|
84
77
|
reset
|
85
78
|
if values.length < fields.length
|
86
79
|
fields[values.size..-1].each_with_index do |field, i|
|
87
|
-
build_warnings(:missing_column, :schema, row, values.size+i+1)
|
80
|
+
build_warnings(:missing_column, :schema, row, values.size + i + 1)
|
88
81
|
end
|
89
82
|
end
|
90
83
|
if values.length > fields.length
|
91
84
|
values[fields.size..-1].each_with_index do |data_column, i|
|
92
|
-
build_warnings(:extra_column, :schema, row, fields.size+i+1)
|
85
|
+
build_warnings(:extra_column, :schema, row, fields.size + i + 1)
|
93
86
|
end
|
94
87
|
end
|
95
88
|
|
96
|
-
fields.each_with_index do |field,i|
|
89
|
+
fields.each_with_index do |field, i|
|
97
90
|
value = values[i] || ""
|
98
|
-
result = field.validate_column(value, row, i+1, all_errors)
|
91
|
+
result = field.validate_column(value, row, i + 1, all_errors)
|
99
92
|
@errors += fields[i].errors
|
100
93
|
@warnings += fields[i].warnings
|
101
94
|
end
|
102
95
|
|
103
|
-
|
96
|
+
valid?
|
104
97
|
end
|
105
|
-
|
106
98
|
end
|
107
99
|
end
|