csvlint 1.0.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +4 -0
  3. data/.github/workflows/push.yml +14 -2
  4. data/.pre-commit-hooks.yaml +5 -0
  5. data/.ruby-version +1 -1
  6. data/.standard_todo.yml +43 -0
  7. data/CHANGELOG.md +84 -32
  8. data/Dockerfile +16 -0
  9. data/Gemfile +2 -2
  10. data/README.md +30 -9
  11. data/Rakefile +7 -7
  12. data/csvlint.gemspec +14 -16
  13. data/docker_notes_for_windows.txt +20 -0
  14. data/features/step_definitions/cli_steps.rb +11 -11
  15. data/features/step_definitions/information_steps.rb +4 -4
  16. data/features/step_definitions/parse_csv_steps.rb +11 -11
  17. data/features/step_definitions/schema_validation_steps.rb +10 -10
  18. data/features/step_definitions/sources_steps.rb +1 -1
  19. data/features/step_definitions/validation_errors_steps.rb +19 -19
  20. data/features/step_definitions/validation_info_steps.rb +9 -9
  21. data/features/step_definitions/validation_warnings_steps.rb +11 -11
  22. data/features/support/aruba.rb +6 -6
  23. data/features/support/earl_formatter.rb +39 -39
  24. data/features/support/env.rb +10 -11
  25. data/features/support/load_tests.rb +107 -103
  26. data/features/support/webmock.rb +2 -2
  27. data/lib/csvlint/cli.rb +133 -130
  28. data/lib/csvlint/csvw/column.rb +279 -280
  29. data/lib/csvlint/csvw/date_format.rb +90 -92
  30. data/lib/csvlint/csvw/metadata_error.rb +1 -3
  31. data/lib/csvlint/csvw/number_format.rb +40 -32
  32. data/lib/csvlint/csvw/property_checker.rb +714 -717
  33. data/lib/csvlint/csvw/table.rb +49 -52
  34. data/lib/csvlint/csvw/table_group.rb +24 -23
  35. data/lib/csvlint/error_collector.rb +2 -0
  36. data/lib/csvlint/error_message.rb +0 -1
  37. data/lib/csvlint/field.rb +153 -141
  38. data/lib/csvlint/schema.rb +34 -42
  39. data/lib/csvlint/validate.rb +161 -143
  40. data/lib/csvlint/version.rb +1 -1
  41. data/lib/csvlint.rb +22 -23
  42. data/spec/csvw/column_spec.rb +15 -16
  43. data/spec/csvw/date_format_spec.rb +5 -7
  44. data/spec/csvw/number_format_spec.rb +2 -4
  45. data/spec/csvw/table_group_spec.rb +103 -105
  46. data/spec/csvw/table_spec.rb +71 -73
  47. data/spec/field_spec.rb +116 -121
  48. data/spec/schema_spec.rb +129 -139
  49. data/spec/spec_helper.rb +6 -6
  50. data/spec/validator_spec.rb +167 -190
  51. metadata +23 -55
data/lib/csvlint/field.rb CHANGED
@@ -1,196 +1,208 @@
1
1
  module Csvlint
2
-
3
2
  class Field
4
3
  include Csvlint::ErrorCollector
5
4
 
6
5
  attr_reader :name, :constraints, :title, :description
7
6
 
8
- def initialize(name, constraints={}, title=nil, description=nil)
7
+ def initialize(name, constraints = {}, title = nil, description = nil)
9
8
  @name = name
10
9
  @constraints = constraints || {}
11
10
  @uniques = Set.new
12
11
  @title = title
13
12
  @description = description
13
+ @regex = nil
14
14
  reset
15
15
  end
16
16
 
17
- def validate_column(value, row=nil, column=nil, all_errors=[])
17
+ def validate_column(value, row = nil, column = nil, all_errors = [])
18
18
  reset
19
- unless all_errors.any?{|error| ((error.type == :invalid_regex) && (error.column == column))}
19
+ unless all_errors.any? { |error| ((error.type == :invalid_regex) && (error.column == column)) }
20
20
  validate_regex(value, row, column, all_errors)
21
21
  end
22
22
  validate_length(value, row, column)
23
23
  validate_values(value, row, column)
24
24
  parsed = validate_type(value, row, column)
25
- validate_range(parsed, row, column) if parsed != nil
26
- return valid?
25
+ validate_range(parsed, row, column) if !parsed.nil?
26
+ valid?
27
27
  end
28
28
 
29
29
  private
30
- def validate_length(value, row, column)
31
- if constraints["required"] == true
30
+
31
+ def validate_length(value, row, column)
32
+ if constraints["required"] == true
33
+ if value.nil? || value.length == 0
32
34
  build_errors(:missing_value, :schema, row, column, value,
33
- { "required" => true }) if value.nil? || value.length == 0
35
+ {"required" => true})
34
36
  end
35
- if constraints["minLength"]
37
+ end
38
+ if constraints["minLength"]
39
+ if value.nil? || value.length < constraints["minLength"]
36
40
  build_errors(:min_length, :schema, row, column, value,
37
- { "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
41
+ {"minLength" => constraints["minLength"]})
38
42
  end
39
- if constraints["maxLength"]
40
- build_errors(:max_length, :schema, row, column, value,
41
- { "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
43
+ end
44
+ if constraints["maxLength"]
45
+ if !value.nil? && value.length > constraints["maxLength"]
46
+ build_errors(:max_length, :schema, row, column, value,
47
+ {"maxLength" => constraints["maxLength"]})
42
48
  end
43
49
  end
50
+ end
44
51
 
45
- def validate_regex(value, row, column, all_errors)
46
- pattern = constraints["pattern"]
47
- if pattern
48
- begin
49
- Regexp.new(pattern)
52
+ def validate_regex(value, row, column, all_errors)
53
+ pattern = constraints["pattern"]
54
+ if pattern
55
+ begin
56
+ if !value.nil? && !value.match(@regex ||= Regexp.new(pattern))
50
57
  build_errors(:pattern, :schema, row, column, value,
51
- { "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
52
- rescue RegexpError
53
- build_regex_error(value, row, column, pattern, all_errors)
58
+ {"pattern" => constraints["pattern"]})
54
59
  end
60
+ rescue RegexpError
61
+ build_regex_error(value, row, column, pattern, all_errors)
55
62
  end
56
63
  end
64
+ end
57
65
 
58
- def build_regex_error(value, row, column, pattern, all_errors)
59
- return if @regex_error_exists
60
- build_errors(:invalid_regex, :schema, nil, column, ("#{name}: Constraints: Pattern: #{pattern}"),
61
- { "pattern" => constraints["pattern"] })
62
- @regex_error_exists = true
63
- end
66
+ def build_regex_error(value, row, column, pattern, all_errors)
67
+ return if @regex_error_exists
68
+ build_errors(:invalid_regex, :schema, nil, column, "#{name}: Constraints: Pattern: #{pattern}",
69
+ {"pattern" => constraints["pattern"]})
70
+ @regex_error_exists = true
71
+ end
64
72
 
65
- def validate_values(value, row, column)
66
- # If a pattern exists, raise an invalid regex error if it is not in
67
- # valid regex form, else, if the value of the relevant field in the csv
68
- # does not match the given regex pattern in the schema, raise a
69
- # pattern error.
70
- if constraints["unique"] == true
71
- if @uniques.include? value
72
- build_errors(:unique, :schema, row, column, value, { "unique" => true })
73
- else
74
- @uniques << value
75
- end
73
+ def validate_values(value, row, column)
74
+ # If a pattern exists, raise an invalid regex error if it is not in
75
+ # valid regex form, else, if the value of the relevant field in the csv
76
+ # does not match the given regex pattern in the schema, raise a
77
+ # pattern error.
78
+ if constraints["unique"] == true
79
+ if @uniques.include? value
80
+ build_errors(:unique, :schema, row, column, value, {"unique" => true})
81
+ else
82
+ @uniques << value
76
83
  end
77
84
  end
85
+ end
78
86
 
79
- def validate_type(value, row, column)
80
- if constraints["type"] && value != ""
81
- parsed = convert_to_type(value)
82
- if parsed == nil
83
- failed = { "type" => constraints["type"] }
84
- failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
85
- build_errors(:invalid_type, :schema, row, column, value, failed)
86
- return nil
87
- end
88
- return parsed
87
+ def validate_type(value, row, column)
88
+ if constraints["type"] && value != ""
89
+ parsed = convert_to_type(value)
90
+ if parsed.nil?
91
+ failed = {"type" => constraints["type"]}
92
+ failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
93
+ build_errors(:invalid_type, :schema, row, column, value, failed)
94
+ return nil
89
95
  end
90
- return nil
96
+ return parsed
91
97
  end
98
+ nil
99
+ end
92
100
 
93
- def validate_range(value, row, column)
94
- #TODO: we're ignoring issues with converting ranges to actual types, maybe we
95
- #should generate a warning? The schema is invalid
96
- if constraints["minimum"]
97
- minimumValue = convert_to_type( constraints["minimum"] )
98
- if minimumValue
101
+ def validate_range(value, row, column)
102
+ # TODO: we're ignoring issues with converting ranges to actual types, maybe we
103
+ # should generate a warning? The schema is invalid
104
+ if constraints["minimum"]
105
+ minimumValue = convert_to_type(constraints["minimum"])
106
+ if minimumValue
107
+ unless value >= minimumValue
99
108
  build_errors(:below_minimum, :schema, row, column, value,
100
- { "minimum" => constraints["minimum"] }) unless value >= minimumValue
109
+ {"minimum" => constraints["minimum"]})
101
110
  end
102
111
  end
103
- if constraints["maximum"]
104
- maximumValue = convert_to_type( constraints["maximum"] )
105
- if maximumValue
112
+ end
113
+ if constraints["maximum"]
114
+ maximumValue = convert_to_type(constraints["maximum"])
115
+ if maximumValue
116
+ unless value <= maximumValue
106
117
  build_errors(:above_maximum, :schema, row, column, value,
107
- { "maximum" => constraints["maximum"] }) unless value <= maximumValue
118
+ {"maximum" => constraints["maximum"]})
108
119
  end
109
120
  end
110
121
  end
122
+ end
111
123
 
112
- def convert_to_type(value)
113
- parsed = nil
114
- tv = TYPE_VALIDATIONS[constraints["type"]]
115
- if tv
116
- begin
117
- parsed = tv.call value, constraints
118
- rescue ArgumentError
119
- end
124
+ def convert_to_type(value)
125
+ parsed = nil
126
+ tv = TYPE_VALIDATIONS[constraints["type"]]
127
+ if tv
128
+ begin
129
+ parsed = tv.call value, constraints
130
+ rescue ArgumentError
120
131
  end
121
- return parsed
122
132
  end
133
+ parsed
134
+ end
123
135
 
124
- TYPE_VALIDATIONS = {
125
- 'http://www.w3.org/2001/XMLSchema#string' => lambda { |value, constraints| value },
126
- 'http://www.w3.org/2001/XMLSchema#int' => lambda { |value, constraints| Integer value },
127
- 'http://www.w3.org/2001/XMLSchema#integer' => lambda { |value, constraints| Integer value },
128
- 'http://www.w3.org/2001/XMLSchema#float' => lambda { |value, constraints| Float value },
129
- 'http://www.w3.org/2001/XMLSchema#double' => lambda { |value, constraints| Float value },
130
- 'http://www.w3.org/2001/XMLSchema#anyURI' => lambda do |value, constraints|
131
- begin
132
- u = URI.parse value
133
- raise ArgumentError unless u.kind_of?(URI::HTTP) || u.kind_of?(URI::HTTPS)
134
- rescue URI::InvalidURIError
135
- raise ArgumentError
136
- end
137
- u
138
- end,
139
- 'http://www.w3.org/2001/XMLSchema#boolean' => lambda do |value, constraints|
140
- return true if ['true', '1'].include? value
141
- return false if ['false', '0'].include? value
142
- raise ArgumentError
143
- end,
144
- 'http://www.w3.org/2001/XMLSchema#nonPositiveInteger' => lambda do |value, constraints|
145
- i = Integer value
146
- raise ArgumentError unless i <= 0
147
- i
148
- end,
149
- 'http://www.w3.org/2001/XMLSchema#negativeInteger' => lambda do |value, constraints|
150
- i = Integer value
151
- raise ArgumentError unless i < 0
152
- i
153
- end,
154
- 'http://www.w3.org/2001/XMLSchema#nonNegativeInteger' => lambda do |value, constraints|
155
- i = Integer value
156
- raise ArgumentError unless i >= 0
157
- i
158
- end,
159
- 'http://www.w3.org/2001/XMLSchema#positiveInteger' => lambda do |value, constraints|
160
- i = Integer value
161
- raise ArgumentError unless i > 0
162
- i
163
- end,
164
- 'http://www.w3.org/2001/XMLSchema#dateTime' => lambda do |value, constraints|
165
- date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
166
- d = DateTime.strptime(value, date_pattern)
167
- raise ArgumentError unless d.strftime(date_pattern) == value
168
- d
169
- end,
170
- 'http://www.w3.org/2001/XMLSchema#date' => lambda do |value, constraints|
171
- date_pattern = constraints["datePattern"] || "%Y-%m-%d"
172
- d = Date.strptime(value, date_pattern)
173
- raise ArgumentError unless d.strftime(date_pattern) == value
174
- d
175
- end,
176
- 'http://www.w3.org/2001/XMLSchema#time' => lambda do |value, constraints|
177
- date_pattern = constraints["datePattern"] || "%H:%M:%S"
178
- d = DateTime.strptime(value, date_pattern)
179
- raise ArgumentError unless d.strftime(date_pattern) == value
180
- d
181
- end,
182
- 'http://www.w3.org/2001/XMLSchema#gYear' => lambda do |value, constraints|
183
- date_pattern = constraints["datePattern"] || "%Y"
184
- d = Date.strptime(value, date_pattern)
185
- raise ArgumentError unless d.strftime(date_pattern) == value
186
- d
187
- end,
188
- 'http://www.w3.org/2001/XMLSchema#gYearMonth' => lambda do |value, constraints|
189
- date_pattern = constraints["datePattern"] || "%Y-%m"
190
- d = Date.strptime(value, date_pattern)
191
- raise ArgumentError unless d.strftime(date_pattern) == value
192
- d
193
- end,
194
- }
136
+ TYPE_VALIDATIONS = {
137
+ "http://www.w3.org/2001/XMLSchema#string" => lambda { |value, constraints| value },
138
+ "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, constraints| Integer value },
139
+ "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, constraints| Integer value },
140
+ "http://www.w3.org/2001/XMLSchema#float" => lambda { |value, constraints| Float value },
141
+ "http://www.w3.org/2001/XMLSchema#double" => lambda { |value, constraints| Float value },
142
+ "http://www.w3.org/2001/XMLSchema#anyURI" => lambda do |value, constraints|
143
+ begin
144
+ u = URI.parse value
145
+ raise ArgumentError unless u.is_a?(URI::HTTP) || u.is_a?(URI::HTTPS)
146
+ rescue URI::InvalidURIError
147
+ raise ArgumentError
148
+ end
149
+ u
150
+ end,
151
+ "http://www.w3.org/2001/XMLSchema#boolean" => lambda do |value, constraints|
152
+ return true if ["true", "1"].include? value
153
+ return false if ["false", "0"].include? value
154
+ raise ArgumentError
155
+ end,
156
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda do |value, constraints|
157
+ i = Integer value
158
+ raise ArgumentError unless i <= 0
159
+ i
160
+ end,
161
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda do |value, constraints|
162
+ i = Integer value
163
+ raise ArgumentError unless i < 0
164
+ i
165
+ end,
166
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda do |value, constraints|
167
+ i = Integer value
168
+ raise ArgumentError unless i >= 0
169
+ i
170
+ end,
171
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda do |value, constraints|
172
+ i = Integer value
173
+ raise ArgumentError unless i > 0
174
+ i
175
+ end,
176
+ "http://www.w3.org/2001/XMLSchema#dateTime" => lambda do |value, constraints|
177
+ date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
178
+ d = DateTime.strptime(value, date_pattern)
179
+ raise ArgumentError unless d.strftime(date_pattern) == value
180
+ d
181
+ end,
182
+ "http://www.w3.org/2001/XMLSchema#date" => lambda do |value, constraints|
183
+ date_pattern = constraints["datePattern"] || "%Y-%m-%d"
184
+ d = Date.strptime(value, date_pattern)
185
+ raise ArgumentError unless d.strftime(date_pattern) == value
186
+ d
187
+ end,
188
+ "http://www.w3.org/2001/XMLSchema#time" => lambda do |value, constraints|
189
+ date_pattern = constraints["datePattern"] || "%H:%M:%S"
190
+ d = DateTime.strptime(value, date_pattern)
191
+ raise ArgumentError unless d.strftime(date_pattern) == value
192
+ d
193
+ end,
194
+ "http://www.w3.org/2001/XMLSchema#gYear" => lambda do |value, constraints|
195
+ date_pattern = constraints["datePattern"] || "%Y"
196
+ d = Date.strptime(value, date_pattern)
197
+ raise ArgumentError unless d.strftime(date_pattern) == value
198
+ d
199
+ end,
200
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" => lambda do |value, constraints|
201
+ date_pattern = constraints["datePattern"] || "%Y-%m"
202
+ d = Date.strptime(value, date_pattern)
203
+ raise ArgumentError unless d.strftime(date_pattern) == value
204
+ d
205
+ end
206
+ }
195
207
  end
196
208
  end
@@ -1,12 +1,10 @@
1
1
  module Csvlint
2
-
3
2
  class Schema
4
-
5
3
  include Csvlint::ErrorCollector
6
4
 
7
5
  attr_reader :uri, :fields, :title, :description
8
6
 
9
- def initialize(uri, fields=[], title=nil, description=nil)
7
+ def initialize(uri, fields = [], title = nil, description = nil)
10
8
  @uri = uri
11
9
  @fields = fields
12
10
  @title = title
@@ -15,20 +13,19 @@ module Csvlint
15
13
  end
16
14
 
17
15
  class << self
18
-
19
16
  extend Gem::Deprecate
20
17
 
21
18
  def from_json_table(uri, json)
22
19
  fields = []
23
- json["fields"].each do |field_desc|
24
- fields << Csvlint::Field.new( field_desc["name"] , field_desc["constraints"],
25
- field_desc["title"], field_desc["description"] )
26
- end if json["fields"]
27
- return Schema.new( uri , fields, json["title"], json["description"] )
20
+ json["fields"]&.each do |field_desc|
21
+ fields << Csvlint::Field.new(field_desc["name"], field_desc["constraints"],
22
+ field_desc["title"], field_desc["description"])
23
+ end
24
+ Schema.new(uri, fields, json["title"], json["description"])
28
25
  end
29
26
 
30
27
  def from_csvw_metadata(uri, json)
31
- return Csvlint::Csvw::TableGroup.from_json(uri, json)
28
+ Csvlint::Csvw::TableGroup.from_json(uri, json)
32
29
  end
33
30
 
34
31
  # Deprecated method signature
@@ -44,64 +41,59 @@ module Csvlint
44
41
  end
45
42
 
46
43
  def load_from_string(uri, string, output_errors = true)
47
- begin
48
- json = JSON.parse( string )
49
- if json["@context"]
50
- uri = "file:#{File.expand_path(uri)}" unless uri.to_s =~ /^http(s)?/
51
- return Schema.from_csvw_metadata(uri,json)
52
- else
53
- return Schema.from_json_table(uri,json)
54
- end
55
- rescue TypeError => e
56
- # NO IDEA what this was even trying to do - SP 20160526
57
-
58
- rescue Csvlint::Csvw::MetadataError => e
59
- raise e
60
- rescue => e
61
- if output_errors === true
62
- STDERR.puts e.class
63
- STDERR.puts e.message
64
- STDERR.puts e.backtrace
65
- end
66
- return Schema.new(nil, [], "malformed", "malformed")
44
+ json = JSON.parse(string)
45
+ if json["@context"]
46
+ uri = "file:#{File.expand_path(uri)}" unless /^http(s)?/.match?(uri.to_s)
47
+ Schema.from_csvw_metadata(uri, json)
48
+ else
49
+ Schema.from_json_table(uri, json)
67
50
  end
51
+ rescue TypeError => e
52
+ # NO IDEA what this was even trying to do - SP 20160526
53
+ rescue Csvlint::Csvw::MetadataError => e
54
+ raise e
55
+ rescue => e
56
+ if output_errors === true
57
+ warn e.class
58
+ warn e.message
59
+ warn e.backtrace
60
+ end
61
+ Schema.new(nil, [], "malformed", "malformed")
68
62
  end
69
-
70
63
  end
71
64
 
72
- def validate_header(header, source_url=nil, validate=true)
65
+ def validate_header(header, source_url = nil, validate = true)
73
66
  reset
74
67
 
75
- found_header = header.to_csv(:row_sep => '')
76
- expected_header = @fields.map{ |f| f.name }.to_csv(:row_sep => '')
68
+ found_header = header.to_csv(row_sep: "")
69
+ expected_header = @fields.map { |f| f.name }.to_csv(row_sep: "")
77
70
  if found_header != expected_header
78
71
  build_warnings(:malformed_header, :schema, 1, nil, found_header, "expectedHeader" => expected_header)
79
72
  end
80
- return valid?
73
+ valid?
81
74
  end
82
75
 
83
- def validate_row(values, row=nil, all_errors=[], source_url=nil, validate=true)
76
+ def validate_row(values, row = nil, all_errors = [], source_url = nil, validate = true)
84
77
  reset
85
78
  if values.length < fields.length
86
79
  fields[values.size..-1].each_with_index do |field, i|
87
- build_warnings(:missing_column, :schema, row, values.size+i+1)
80
+ build_warnings(:missing_column, :schema, row, values.size + i + 1)
88
81
  end
89
82
  end
90
83
  if values.length > fields.length
91
84
  values[fields.size..-1].each_with_index do |data_column, i|
92
- build_warnings(:extra_column, :schema, row, fields.size+i+1)
85
+ build_warnings(:extra_column, :schema, row, fields.size + i + 1)
93
86
  end
94
87
  end
95
88
 
96
- fields.each_with_index do |field,i|
89
+ fields.each_with_index do |field, i|
97
90
  value = values[i] || ""
98
- result = field.validate_column(value, row, i+1, all_errors)
91
+ result = field.validate_column(value, row, i + 1, all_errors)
99
92
  @errors += fields[i].errors
100
93
  @warnings += fields[i].warnings
101
94
  end
102
95
 
103
- return valid?
96
+ valid?
104
97
  end
105
-
106
98
  end
107
99
  end