csvlint 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +4 -0
- data/.github/workflows/push.yml +14 -2
- data/.pre-commit-hooks.yaml +5 -0
- data/.ruby-version +1 -1
- data/.standard_todo.yml +43 -0
- data/CHANGELOG.md +84 -32
- data/Dockerfile +16 -0
- data/Gemfile +2 -2
- data/README.md +30 -9
- data/Rakefile +7 -7
- data/csvlint.gemspec +14 -16
- data/docker_notes_for_windows.txt +20 -0
- data/features/step_definitions/cli_steps.rb +11 -11
- data/features/step_definitions/information_steps.rb +4 -4
- data/features/step_definitions/parse_csv_steps.rb +11 -11
- data/features/step_definitions/schema_validation_steps.rb +10 -10
- data/features/step_definitions/sources_steps.rb +1 -1
- data/features/step_definitions/validation_errors_steps.rb +19 -19
- data/features/step_definitions/validation_info_steps.rb +9 -9
- data/features/step_definitions/validation_warnings_steps.rb +11 -11
- data/features/support/aruba.rb +6 -6
- data/features/support/earl_formatter.rb +39 -39
- data/features/support/env.rb +10 -11
- data/features/support/load_tests.rb +107 -103
- data/features/support/webmock.rb +2 -2
- data/lib/csvlint/cli.rb +133 -130
- data/lib/csvlint/csvw/column.rb +279 -280
- data/lib/csvlint/csvw/date_format.rb +90 -92
- data/lib/csvlint/csvw/metadata_error.rb +1 -3
- data/lib/csvlint/csvw/number_format.rb +40 -32
- data/lib/csvlint/csvw/property_checker.rb +714 -717
- data/lib/csvlint/csvw/table.rb +49 -52
- data/lib/csvlint/csvw/table_group.rb +24 -23
- data/lib/csvlint/error_collector.rb +2 -0
- data/lib/csvlint/error_message.rb +0 -1
- data/lib/csvlint/field.rb +153 -141
- data/lib/csvlint/schema.rb +34 -42
- data/lib/csvlint/validate.rb +161 -143
- data/lib/csvlint/version.rb +1 -1
- data/lib/csvlint.rb +22 -23
- data/spec/csvw/column_spec.rb +15 -16
- data/spec/csvw/date_format_spec.rb +5 -7
- data/spec/csvw/number_format_spec.rb +2 -4
- data/spec/csvw/table_group_spec.rb +103 -105
- data/spec/csvw/table_spec.rb +71 -73
- data/spec/field_spec.rb +116 -121
- data/spec/schema_spec.rb +129 -139
- data/spec/spec_helper.rb +6 -6
- data/spec/validator_spec.rb +167 -190
- metadata +23 -55
data/lib/csvlint/field.rb
CHANGED
@@ -1,196 +1,208 @@
|
|
1
1
|
module Csvlint
|
2
|
-
|
3
2
|
class Field
|
4
3
|
include Csvlint::ErrorCollector
|
5
4
|
|
6
5
|
attr_reader :name, :constraints, :title, :description
|
7
6
|
|
8
|
-
def initialize(name, constraints={}, title=nil, description=nil)
|
7
|
+
def initialize(name, constraints = {}, title = nil, description = nil)
|
9
8
|
@name = name
|
10
9
|
@constraints = constraints || {}
|
11
10
|
@uniques = Set.new
|
12
11
|
@title = title
|
13
12
|
@description = description
|
13
|
+
@regex = nil
|
14
14
|
reset
|
15
15
|
end
|
16
16
|
|
17
|
-
def validate_column(value, row=nil, column=nil, all_errors=[])
|
17
|
+
def validate_column(value, row = nil, column = nil, all_errors = [])
|
18
18
|
reset
|
19
|
-
unless all_errors.any?{|error| ((error.type == :invalid_regex) && (error.column == column))}
|
19
|
+
unless all_errors.any? { |error| ((error.type == :invalid_regex) && (error.column == column)) }
|
20
20
|
validate_regex(value, row, column, all_errors)
|
21
21
|
end
|
22
22
|
validate_length(value, row, column)
|
23
23
|
validate_values(value, row, column)
|
24
24
|
parsed = validate_type(value, row, column)
|
25
|
-
validate_range(parsed, row, column) if parsed
|
26
|
-
|
25
|
+
validate_range(parsed, row, column) if !parsed.nil?
|
26
|
+
valid?
|
27
27
|
end
|
28
28
|
|
29
29
|
private
|
30
|
-
|
31
|
-
|
30
|
+
|
31
|
+
def validate_length(value, row, column)
|
32
|
+
if constraints["required"] == true
|
33
|
+
if value.nil? || value.length == 0
|
32
34
|
build_errors(:missing_value, :schema, row, column, value,
|
33
|
-
{
|
35
|
+
{"required" => true})
|
34
36
|
end
|
35
|
-
|
37
|
+
end
|
38
|
+
if constraints["minLength"]
|
39
|
+
if value.nil? || value.length < constraints["minLength"]
|
36
40
|
build_errors(:min_length, :schema, row, column, value,
|
37
|
-
{
|
41
|
+
{"minLength" => constraints["minLength"]})
|
38
42
|
end
|
39
|
-
|
40
|
-
|
41
|
-
|
43
|
+
end
|
44
|
+
if constraints["maxLength"]
|
45
|
+
if !value.nil? && value.length > constraints["maxLength"]
|
46
|
+
build_errors(:max_length, :schema, row, column, value,
|
47
|
+
{"maxLength" => constraints["maxLength"]})
|
42
48
|
end
|
43
49
|
end
|
50
|
+
end
|
44
51
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
52
|
+
def validate_regex(value, row, column, all_errors)
|
53
|
+
pattern = constraints["pattern"]
|
54
|
+
if pattern
|
55
|
+
begin
|
56
|
+
if !value.nil? && !value.match(@regex ||= Regexp.new(pattern))
|
50
57
|
build_errors(:pattern, :schema, row, column, value,
|
51
|
-
|
52
|
-
rescue RegexpError
|
53
|
-
build_regex_error(value, row, column, pattern, all_errors)
|
58
|
+
{"pattern" => constraints["pattern"]})
|
54
59
|
end
|
60
|
+
rescue RegexpError
|
61
|
+
build_regex_error(value, row, column, pattern, all_errors)
|
55
62
|
end
|
56
63
|
end
|
64
|
+
end
|
57
65
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
66
|
+
def build_regex_error(value, row, column, pattern, all_errors)
|
67
|
+
return if @regex_error_exists
|
68
|
+
build_errors(:invalid_regex, :schema, nil, column, "#{name}: Constraints: Pattern: #{pattern}",
|
69
|
+
{"pattern" => constraints["pattern"]})
|
70
|
+
@regex_error_exists = true
|
71
|
+
end
|
64
72
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
end
|
73
|
+
def validate_values(value, row, column)
|
74
|
+
# If a pattern exists, raise an invalid regex error if it is not in
|
75
|
+
# valid regex form, else, if the value of the relevant field in the csv
|
76
|
+
# does not match the given regex pattern in the schema, raise a
|
77
|
+
# pattern error.
|
78
|
+
if constraints["unique"] == true
|
79
|
+
if @uniques.include? value
|
80
|
+
build_errors(:unique, :schema, row, column, value, {"unique" => true})
|
81
|
+
else
|
82
|
+
@uniques << value
|
76
83
|
end
|
77
84
|
end
|
85
|
+
end
|
78
86
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
end
|
88
|
-
return parsed
|
87
|
+
def validate_type(value, row, column)
|
88
|
+
if constraints["type"] && value != ""
|
89
|
+
parsed = convert_to_type(value)
|
90
|
+
if parsed.nil?
|
91
|
+
failed = {"type" => constraints["type"]}
|
92
|
+
failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
|
93
|
+
build_errors(:invalid_type, :schema, row, column, value, failed)
|
94
|
+
return nil
|
89
95
|
end
|
90
|
-
return
|
96
|
+
return parsed
|
91
97
|
end
|
98
|
+
nil
|
99
|
+
end
|
92
100
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
101
|
+
def validate_range(value, row, column)
|
102
|
+
# TODO: we're ignoring issues with converting ranges to actual types, maybe we
|
103
|
+
# should generate a warning? The schema is invalid
|
104
|
+
if constraints["minimum"]
|
105
|
+
minimumValue = convert_to_type(constraints["minimum"])
|
106
|
+
if minimumValue
|
107
|
+
unless value >= minimumValue
|
99
108
|
build_errors(:below_minimum, :schema, row, column, value,
|
100
|
-
{
|
109
|
+
{"minimum" => constraints["minimum"]})
|
101
110
|
end
|
102
111
|
end
|
103
|
-
|
104
|
-
|
105
|
-
|
112
|
+
end
|
113
|
+
if constraints["maximum"]
|
114
|
+
maximumValue = convert_to_type(constraints["maximum"])
|
115
|
+
if maximumValue
|
116
|
+
unless value <= maximumValue
|
106
117
|
build_errors(:above_maximum, :schema, row, column, value,
|
107
|
-
|
118
|
+
{"maximum" => constraints["maximum"]})
|
108
119
|
end
|
109
120
|
end
|
110
121
|
end
|
122
|
+
end
|
111
123
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
end
|
124
|
+
def convert_to_type(value)
|
125
|
+
parsed = nil
|
126
|
+
tv = TYPE_VALIDATIONS[constraints["type"]]
|
127
|
+
if tv
|
128
|
+
begin
|
129
|
+
parsed = tv.call value, constraints
|
130
|
+
rescue ArgumentError
|
120
131
|
end
|
121
|
-
return parsed
|
122
132
|
end
|
133
|
+
parsed
|
134
|
+
end
|
123
135
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
136
|
+
TYPE_VALIDATIONS = {
|
137
|
+
"http://www.w3.org/2001/XMLSchema#string" => lambda { |value, constraints| value },
|
138
|
+
"http://www.w3.org/2001/XMLSchema#int" => lambda { |value, constraints| Integer value },
|
139
|
+
"http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, constraints| Integer value },
|
140
|
+
"http://www.w3.org/2001/XMLSchema#float" => lambda { |value, constraints| Float value },
|
141
|
+
"http://www.w3.org/2001/XMLSchema#double" => lambda { |value, constraints| Float value },
|
142
|
+
"http://www.w3.org/2001/XMLSchema#anyURI" => lambda do |value, constraints|
|
143
|
+
begin
|
144
|
+
u = URI.parse value
|
145
|
+
raise ArgumentError unless u.is_a?(URI::HTTP) || u.is_a?(URI::HTTPS)
|
146
|
+
rescue URI::InvalidURIError
|
147
|
+
raise ArgumentError
|
148
|
+
end
|
149
|
+
u
|
150
|
+
end,
|
151
|
+
"http://www.w3.org/2001/XMLSchema#boolean" => lambda do |value, constraints|
|
152
|
+
return true if ["true", "1"].include? value
|
153
|
+
return false if ["false", "0"].include? value
|
154
|
+
raise ArgumentError
|
155
|
+
end,
|
156
|
+
"http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda do |value, constraints|
|
157
|
+
i = Integer value
|
158
|
+
raise ArgumentError unless i <= 0
|
159
|
+
i
|
160
|
+
end,
|
161
|
+
"http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda do |value, constraints|
|
162
|
+
i = Integer value
|
163
|
+
raise ArgumentError unless i < 0
|
164
|
+
i
|
165
|
+
end,
|
166
|
+
"http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda do |value, constraints|
|
167
|
+
i = Integer value
|
168
|
+
raise ArgumentError unless i >= 0
|
169
|
+
i
|
170
|
+
end,
|
171
|
+
"http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda do |value, constraints|
|
172
|
+
i = Integer value
|
173
|
+
raise ArgumentError unless i > 0
|
174
|
+
i
|
175
|
+
end,
|
176
|
+
"http://www.w3.org/2001/XMLSchema#dateTime" => lambda do |value, constraints|
|
177
|
+
date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
|
178
|
+
d = DateTime.strptime(value, date_pattern)
|
179
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
180
|
+
d
|
181
|
+
end,
|
182
|
+
"http://www.w3.org/2001/XMLSchema#date" => lambda do |value, constraints|
|
183
|
+
date_pattern = constraints["datePattern"] || "%Y-%m-%d"
|
184
|
+
d = Date.strptime(value, date_pattern)
|
185
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
186
|
+
d
|
187
|
+
end,
|
188
|
+
"http://www.w3.org/2001/XMLSchema#time" => lambda do |value, constraints|
|
189
|
+
date_pattern = constraints["datePattern"] || "%H:%M:%S"
|
190
|
+
d = DateTime.strptime(value, date_pattern)
|
191
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
192
|
+
d
|
193
|
+
end,
|
194
|
+
"http://www.w3.org/2001/XMLSchema#gYear" => lambda do |value, constraints|
|
195
|
+
date_pattern = constraints["datePattern"] || "%Y"
|
196
|
+
d = Date.strptime(value, date_pattern)
|
197
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
198
|
+
d
|
199
|
+
end,
|
200
|
+
"http://www.w3.org/2001/XMLSchema#gYearMonth" => lambda do |value, constraints|
|
201
|
+
date_pattern = constraints["datePattern"] || "%Y-%m"
|
202
|
+
d = Date.strptime(value, date_pattern)
|
203
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
204
|
+
d
|
205
|
+
end
|
206
|
+
}
|
195
207
|
end
|
196
208
|
end
|
data/lib/csvlint/schema.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
module Csvlint
|
2
|
-
|
3
2
|
class Schema
|
4
|
-
|
5
3
|
include Csvlint::ErrorCollector
|
6
4
|
|
7
5
|
attr_reader :uri, :fields, :title, :description
|
8
6
|
|
9
|
-
def initialize(uri, fields=[], title=nil, description=nil)
|
7
|
+
def initialize(uri, fields = [], title = nil, description = nil)
|
10
8
|
@uri = uri
|
11
9
|
@fields = fields
|
12
10
|
@title = title
|
@@ -15,20 +13,19 @@ module Csvlint
|
|
15
13
|
end
|
16
14
|
|
17
15
|
class << self
|
18
|
-
|
19
16
|
extend Gem::Deprecate
|
20
17
|
|
21
18
|
def from_json_table(uri, json)
|
22
19
|
fields = []
|
23
|
-
json["fields"]
|
24
|
-
fields << Csvlint::Field.new(
|
25
|
-
field_desc["title"], field_desc["description"]
|
26
|
-
end
|
27
|
-
|
20
|
+
json["fields"]&.each do |field_desc|
|
21
|
+
fields << Csvlint::Field.new(field_desc["name"], field_desc["constraints"],
|
22
|
+
field_desc["title"], field_desc["description"])
|
23
|
+
end
|
24
|
+
Schema.new(uri, fields, json["title"], json["description"])
|
28
25
|
end
|
29
26
|
|
30
27
|
def from_csvw_metadata(uri, json)
|
31
|
-
|
28
|
+
Csvlint::Csvw::TableGroup.from_json(uri, json)
|
32
29
|
end
|
33
30
|
|
34
31
|
# Deprecated method signature
|
@@ -44,64 +41,59 @@ module Csvlint
|
|
44
41
|
end
|
45
42
|
|
46
43
|
def load_from_string(uri, string, output_errors = true)
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
return Schema.from_json_table(uri,json)
|
54
|
-
end
|
55
|
-
rescue TypeError => e
|
56
|
-
# NO IDEA what this was even trying to do - SP 20160526
|
57
|
-
|
58
|
-
rescue Csvlint::Csvw::MetadataError => e
|
59
|
-
raise e
|
60
|
-
rescue => e
|
61
|
-
if output_errors === true
|
62
|
-
STDERR.puts e.class
|
63
|
-
STDERR.puts e.message
|
64
|
-
STDERR.puts e.backtrace
|
65
|
-
end
|
66
|
-
return Schema.new(nil, [], "malformed", "malformed")
|
44
|
+
json = JSON.parse(string)
|
45
|
+
if json["@context"]
|
46
|
+
uri = "file:#{File.expand_path(uri)}" unless /^http(s)?/.match?(uri.to_s)
|
47
|
+
Schema.from_csvw_metadata(uri, json)
|
48
|
+
else
|
49
|
+
Schema.from_json_table(uri, json)
|
67
50
|
end
|
51
|
+
rescue TypeError => e
|
52
|
+
# NO IDEA what this was even trying to do - SP 20160526
|
53
|
+
rescue Csvlint::Csvw::MetadataError => e
|
54
|
+
raise e
|
55
|
+
rescue => e
|
56
|
+
if output_errors === true
|
57
|
+
warn e.class
|
58
|
+
warn e.message
|
59
|
+
warn e.backtrace
|
60
|
+
end
|
61
|
+
Schema.new(nil, [], "malformed", "malformed")
|
68
62
|
end
|
69
|
-
|
70
63
|
end
|
71
64
|
|
72
|
-
def validate_header(header, source_url=nil, validate=true)
|
65
|
+
def validate_header(header, source_url = nil, validate = true)
|
73
66
|
reset
|
74
67
|
|
75
|
-
found_header = header.to_csv(:
|
76
|
-
expected_header = @fields.map{ |f| f.name }.to_csv(:
|
68
|
+
found_header = header.to_csv(row_sep: "")
|
69
|
+
expected_header = @fields.map { |f| f.name }.to_csv(row_sep: "")
|
77
70
|
if found_header != expected_header
|
78
71
|
build_warnings(:malformed_header, :schema, 1, nil, found_header, "expectedHeader" => expected_header)
|
79
72
|
end
|
80
|
-
|
73
|
+
valid?
|
81
74
|
end
|
82
75
|
|
83
|
-
def validate_row(values, row=nil, all_errors=[], source_url=nil, validate=true)
|
76
|
+
def validate_row(values, row = nil, all_errors = [], source_url = nil, validate = true)
|
84
77
|
reset
|
85
78
|
if values.length < fields.length
|
86
79
|
fields[values.size..-1].each_with_index do |field, i|
|
87
|
-
build_warnings(:missing_column, :schema, row, values.size+i+1)
|
80
|
+
build_warnings(:missing_column, :schema, row, values.size + i + 1)
|
88
81
|
end
|
89
82
|
end
|
90
83
|
if values.length > fields.length
|
91
84
|
values[fields.size..-1].each_with_index do |data_column, i|
|
92
|
-
build_warnings(:extra_column, :schema, row, fields.size+i+1)
|
85
|
+
build_warnings(:extra_column, :schema, row, fields.size + i + 1)
|
93
86
|
end
|
94
87
|
end
|
95
88
|
|
96
|
-
fields.each_with_index do |field,i|
|
89
|
+
fields.each_with_index do |field, i|
|
97
90
|
value = values[i] || ""
|
98
|
-
result = field.validate_column(value, row, i+1, all_errors)
|
91
|
+
result = field.validate_column(value, row, i + 1, all_errors)
|
99
92
|
@errors += fields[i].errors
|
100
93
|
@warnings += fields[i].warnings
|
101
94
|
end
|
102
95
|
|
103
|
-
|
96
|
+
valid?
|
104
97
|
end
|
105
|
-
|
106
98
|
end
|
107
99
|
end
|