csvlint 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +4 -0
  3. data/.github/workflows/push.yml +14 -2
  4. data/.pre-commit-hooks.yaml +5 -0
  5. data/.ruby-version +1 -1
  6. data/.standard_todo.yml +43 -0
  7. data/CHANGELOG.md +84 -32
  8. data/Dockerfile +16 -0
  9. data/Gemfile +2 -2
  10. data/README.md +30 -9
  11. data/Rakefile +7 -7
  12. data/csvlint.gemspec +14 -16
  13. data/docker_notes_for_windows.txt +20 -0
  14. data/features/step_definitions/cli_steps.rb +11 -11
  15. data/features/step_definitions/information_steps.rb +4 -4
  16. data/features/step_definitions/parse_csv_steps.rb +11 -11
  17. data/features/step_definitions/schema_validation_steps.rb +10 -10
  18. data/features/step_definitions/sources_steps.rb +1 -1
  19. data/features/step_definitions/validation_errors_steps.rb +19 -19
  20. data/features/step_definitions/validation_info_steps.rb +9 -9
  21. data/features/step_definitions/validation_warnings_steps.rb +11 -11
  22. data/features/support/aruba.rb +6 -6
  23. data/features/support/earl_formatter.rb +39 -39
  24. data/features/support/env.rb +10 -11
  25. data/features/support/load_tests.rb +107 -103
  26. data/features/support/webmock.rb +2 -2
  27. data/lib/csvlint/cli.rb +133 -130
  28. data/lib/csvlint/csvw/column.rb +279 -280
  29. data/lib/csvlint/csvw/date_format.rb +90 -92
  30. data/lib/csvlint/csvw/metadata_error.rb +1 -3
  31. data/lib/csvlint/csvw/number_format.rb +40 -32
  32. data/lib/csvlint/csvw/property_checker.rb +714 -717
  33. data/lib/csvlint/csvw/table.rb +49 -52
  34. data/lib/csvlint/csvw/table_group.rb +24 -23
  35. data/lib/csvlint/error_collector.rb +2 -0
  36. data/lib/csvlint/error_message.rb +0 -1
  37. data/lib/csvlint/field.rb +153 -141
  38. data/lib/csvlint/schema.rb +34 -42
  39. data/lib/csvlint/validate.rb +161 -143
  40. data/lib/csvlint/version.rb +1 -1
  41. data/lib/csvlint.rb +22 -23
  42. data/spec/csvw/column_spec.rb +15 -16
  43. data/spec/csvw/date_format_spec.rb +5 -7
  44. data/spec/csvw/number_format_spec.rb +2 -4
  45. data/spec/csvw/table_group_spec.rb +103 -105
  46. data/spec/csvw/table_spec.rb +71 -73
  47. data/spec/field_spec.rb +116 -121
  48. data/spec/schema_spec.rb +129 -139
  49. data/spec/spec_helper.rb +6 -6
  50. data/spec/validator_spec.rb +167 -190
  51. metadata +23 -55
data/lib/csvlint/field.rb CHANGED
@@ -1,196 +1,208 @@
1
1
  module Csvlint
2
-
3
2
  class Field
4
3
  include Csvlint::ErrorCollector
5
4
 
6
5
  attr_reader :name, :constraints, :title, :description
7
6
 
8
- def initialize(name, constraints={}, title=nil, description=nil)
7
+ def initialize(name, constraints = {}, title = nil, description = nil)
9
8
  @name = name
10
9
  @constraints = constraints || {}
11
10
  @uniques = Set.new
12
11
  @title = title
13
12
  @description = description
13
+ @regex = nil
14
14
  reset
15
15
  end
16
16
 
17
- def validate_column(value, row=nil, column=nil, all_errors=[])
17
+ def validate_column(value, row = nil, column = nil, all_errors = [])
18
18
  reset
19
- unless all_errors.any?{|error| ((error.type == :invalid_regex) && (error.column == column))}
19
+ unless all_errors.any? { |error| ((error.type == :invalid_regex) && (error.column == column)) }
20
20
  validate_regex(value, row, column, all_errors)
21
21
  end
22
22
  validate_length(value, row, column)
23
23
  validate_values(value, row, column)
24
24
  parsed = validate_type(value, row, column)
25
- validate_range(parsed, row, column) if parsed != nil
26
- return valid?
25
+ validate_range(parsed, row, column) if !parsed.nil?
26
+ valid?
27
27
  end
28
28
 
29
29
  private
30
- def validate_length(value, row, column)
31
- if constraints["required"] == true
30
+
31
+ def validate_length(value, row, column)
32
+ if constraints["required"] == true
33
+ if value.nil? || value.length == 0
32
34
  build_errors(:missing_value, :schema, row, column, value,
33
- { "required" => true }) if value.nil? || value.length == 0
35
+ {"required" => true})
34
36
  end
35
- if constraints["minLength"]
37
+ end
38
+ if constraints["minLength"]
39
+ if value.nil? || value.length < constraints["minLength"]
36
40
  build_errors(:min_length, :schema, row, column, value,
37
- { "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
41
+ {"minLength" => constraints["minLength"]})
38
42
  end
39
- if constraints["maxLength"]
40
- build_errors(:max_length, :schema, row, column, value,
41
- { "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
43
+ end
44
+ if constraints["maxLength"]
45
+ if !value.nil? && value.length > constraints["maxLength"]
46
+ build_errors(:max_length, :schema, row, column, value,
47
+ {"maxLength" => constraints["maxLength"]})
42
48
  end
43
49
  end
50
+ end
44
51
 
45
- def validate_regex(value, row, column, all_errors)
46
- pattern = constraints["pattern"]
47
- if pattern
48
- begin
49
- Regexp.new(pattern)
52
+ def validate_regex(value, row, column, all_errors)
53
+ pattern = constraints["pattern"]
54
+ if pattern
55
+ begin
56
+ if !value.nil? && !value.match(@regex ||= Regexp.new(pattern))
50
57
  build_errors(:pattern, :schema, row, column, value,
51
- { "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
52
- rescue RegexpError
53
- build_regex_error(value, row, column, pattern, all_errors)
58
+ {"pattern" => constraints["pattern"]})
54
59
  end
60
+ rescue RegexpError
61
+ build_regex_error(value, row, column, pattern, all_errors)
55
62
  end
56
63
  end
64
+ end
57
65
 
58
- def build_regex_error(value, row, column, pattern, all_errors)
59
- return if @regex_error_exists
60
- build_errors(:invalid_regex, :schema, nil, column, ("#{name}: Constraints: Pattern: #{pattern}"),
61
- { "pattern" => constraints["pattern"] })
62
- @regex_error_exists = true
63
- end
66
+ def build_regex_error(value, row, column, pattern, all_errors)
67
+ return if @regex_error_exists
68
+ build_errors(:invalid_regex, :schema, nil, column, "#{name}: Constraints: Pattern: #{pattern}",
69
+ {"pattern" => constraints["pattern"]})
70
+ @regex_error_exists = true
71
+ end
64
72
 
65
- def validate_values(value, row, column)
66
- # If a pattern exists, raise an invalid regex error if it is not in
67
- # valid regex form, else, if the value of the relevant field in the csv
68
- # does not match the given regex pattern in the schema, raise a
69
- # pattern error.
70
- if constraints["unique"] == true
71
- if @uniques.include? value
72
- build_errors(:unique, :schema, row, column, value, { "unique" => true })
73
- else
74
- @uniques << value
75
- end
73
+ def validate_values(value, row, column)
74
+ # If a pattern exists, raise an invalid regex error if it is not in
75
+ # valid regex form, else, if the value of the relevant field in the csv
76
+ # does not match the given regex pattern in the schema, raise a
77
+ # pattern error.
78
+ if constraints["unique"] == true
79
+ if @uniques.include? value
80
+ build_errors(:unique, :schema, row, column, value, {"unique" => true})
81
+ else
82
+ @uniques << value
76
83
  end
77
84
  end
85
+ end
78
86
 
79
- def validate_type(value, row, column)
80
- if constraints["type"] && value != ""
81
- parsed = convert_to_type(value)
82
- if parsed == nil
83
- failed = { "type" => constraints["type"] }
84
- failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
85
- build_errors(:invalid_type, :schema, row, column, value, failed)
86
- return nil
87
- end
88
- return parsed
87
+ def validate_type(value, row, column)
88
+ if constraints["type"] && value != ""
89
+ parsed = convert_to_type(value)
90
+ if parsed.nil?
91
+ failed = {"type" => constraints["type"]}
92
+ failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
93
+ build_errors(:invalid_type, :schema, row, column, value, failed)
94
+ return nil
89
95
  end
90
- return nil
96
+ return parsed
91
97
  end
98
+ nil
99
+ end
92
100
 
93
- def validate_range(value, row, column)
94
- #TODO: we're ignoring issues with converting ranges to actual types, maybe we
95
- #should generate a warning? The schema is invalid
96
- if constraints["minimum"]
97
- minimumValue = convert_to_type( constraints["minimum"] )
98
- if minimumValue
101
+ def validate_range(value, row, column)
102
+ # TODO: we're ignoring issues with converting ranges to actual types, maybe we
103
+ # should generate a warning? The schema is invalid
104
+ if constraints["minimum"]
105
+ minimumValue = convert_to_type(constraints["minimum"])
106
+ if minimumValue
107
+ unless value >= minimumValue
99
108
  build_errors(:below_minimum, :schema, row, column, value,
100
- { "minimum" => constraints["minimum"] }) unless value >= minimumValue
109
+ {"minimum" => constraints["minimum"]})
101
110
  end
102
111
  end
103
- if constraints["maximum"]
104
- maximumValue = convert_to_type( constraints["maximum"] )
105
- if maximumValue
112
+ end
113
+ if constraints["maximum"]
114
+ maximumValue = convert_to_type(constraints["maximum"])
115
+ if maximumValue
116
+ unless value <= maximumValue
106
117
  build_errors(:above_maximum, :schema, row, column, value,
107
- { "maximum" => constraints["maximum"] }) unless value <= maximumValue
118
+ {"maximum" => constraints["maximum"]})
108
119
  end
109
120
  end
110
121
  end
122
+ end
111
123
 
112
- def convert_to_type(value)
113
- parsed = nil
114
- tv = TYPE_VALIDATIONS[constraints["type"]]
115
- if tv
116
- begin
117
- parsed = tv.call value, constraints
118
- rescue ArgumentError
119
- end
124
+ def convert_to_type(value)
125
+ parsed = nil
126
+ tv = TYPE_VALIDATIONS[constraints["type"]]
127
+ if tv
128
+ begin
129
+ parsed = tv.call value, constraints
130
+ rescue ArgumentError
120
131
  end
121
- return parsed
122
132
  end
133
+ parsed
134
+ end
123
135
 
124
- TYPE_VALIDATIONS = {
125
- 'http://www.w3.org/2001/XMLSchema#string' => lambda { |value, constraints| value },
126
- 'http://www.w3.org/2001/XMLSchema#int' => lambda { |value, constraints| Integer value },
127
- 'http://www.w3.org/2001/XMLSchema#integer' => lambda { |value, constraints| Integer value },
128
- 'http://www.w3.org/2001/XMLSchema#float' => lambda { |value, constraints| Float value },
129
- 'http://www.w3.org/2001/XMLSchema#double' => lambda { |value, constraints| Float value },
130
- 'http://www.w3.org/2001/XMLSchema#anyURI' => lambda do |value, constraints|
131
- begin
132
- u = URI.parse value
133
- raise ArgumentError unless u.kind_of?(URI::HTTP) || u.kind_of?(URI::HTTPS)
134
- rescue URI::InvalidURIError
135
- raise ArgumentError
136
- end
137
- u
138
- end,
139
- 'http://www.w3.org/2001/XMLSchema#boolean' => lambda do |value, constraints|
140
- return true if ['true', '1'].include? value
141
- return false if ['false', '0'].include? value
142
- raise ArgumentError
143
- end,
144
- 'http://www.w3.org/2001/XMLSchema#nonPositiveInteger' => lambda do |value, constraints|
145
- i = Integer value
146
- raise ArgumentError unless i <= 0
147
- i
148
- end,
149
- 'http://www.w3.org/2001/XMLSchema#negativeInteger' => lambda do |value, constraints|
150
- i = Integer value
151
- raise ArgumentError unless i < 0
152
- i
153
- end,
154
- 'http://www.w3.org/2001/XMLSchema#nonNegativeInteger' => lambda do |value, constraints|
155
- i = Integer value
156
- raise ArgumentError unless i >= 0
157
- i
158
- end,
159
- 'http://www.w3.org/2001/XMLSchema#positiveInteger' => lambda do |value, constraints|
160
- i = Integer value
161
- raise ArgumentError unless i > 0
162
- i
163
- end,
164
- 'http://www.w3.org/2001/XMLSchema#dateTime' => lambda do |value, constraints|
165
- date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
166
- d = DateTime.strptime(value, date_pattern)
167
- raise ArgumentError unless d.strftime(date_pattern) == value
168
- d
169
- end,
170
- 'http://www.w3.org/2001/XMLSchema#date' => lambda do |value, constraints|
171
- date_pattern = constraints["datePattern"] || "%Y-%m-%d"
172
- d = Date.strptime(value, date_pattern)
173
- raise ArgumentError unless d.strftime(date_pattern) == value
174
- d
175
- end,
176
- 'http://www.w3.org/2001/XMLSchema#time' => lambda do |value, constraints|
177
- date_pattern = constraints["datePattern"] || "%H:%M:%S"
178
- d = DateTime.strptime(value, date_pattern)
179
- raise ArgumentError unless d.strftime(date_pattern) == value
180
- d
181
- end,
182
- 'http://www.w3.org/2001/XMLSchema#gYear' => lambda do |value, constraints|
183
- date_pattern = constraints["datePattern"] || "%Y"
184
- d = Date.strptime(value, date_pattern)
185
- raise ArgumentError unless d.strftime(date_pattern) == value
186
- d
187
- end,
188
- 'http://www.w3.org/2001/XMLSchema#gYearMonth' => lambda do |value, constraints|
189
- date_pattern = constraints["datePattern"] || "%Y-%m"
190
- d = Date.strptime(value, date_pattern)
191
- raise ArgumentError unless d.strftime(date_pattern) == value
192
- d
193
- end,
194
- }
136
+ TYPE_VALIDATIONS = {
137
+ "http://www.w3.org/2001/XMLSchema#string" => lambda { |value, constraints| value },
138
+ "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, constraints| Integer value },
139
+ "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, constraints| Integer value },
140
+ "http://www.w3.org/2001/XMLSchema#float" => lambda { |value, constraints| Float value },
141
+ "http://www.w3.org/2001/XMLSchema#double" => lambda { |value, constraints| Float value },
142
+ "http://www.w3.org/2001/XMLSchema#anyURI" => lambda do |value, constraints|
143
+ begin
144
+ u = URI.parse value
145
+ raise ArgumentError unless u.is_a?(URI::HTTP) || u.is_a?(URI::HTTPS)
146
+ rescue URI::InvalidURIError
147
+ raise ArgumentError
148
+ end
149
+ u
150
+ end,
151
+ "http://www.w3.org/2001/XMLSchema#boolean" => lambda do |value, constraints|
152
+ return true if ["true", "1"].include? value
153
+ return false if ["false", "0"].include? value
154
+ raise ArgumentError
155
+ end,
156
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda do |value, constraints|
157
+ i = Integer value
158
+ raise ArgumentError unless i <= 0
159
+ i
160
+ end,
161
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda do |value, constraints|
162
+ i = Integer value
163
+ raise ArgumentError unless i < 0
164
+ i
165
+ end,
166
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda do |value, constraints|
167
+ i = Integer value
168
+ raise ArgumentError unless i >= 0
169
+ i
170
+ end,
171
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda do |value, constraints|
172
+ i = Integer value
173
+ raise ArgumentError unless i > 0
174
+ i
175
+ end,
176
+ "http://www.w3.org/2001/XMLSchema#dateTime" => lambda do |value, constraints|
177
+ date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
178
+ d = DateTime.strptime(value, date_pattern)
179
+ raise ArgumentError unless d.strftime(date_pattern) == value
180
+ d
181
+ end,
182
+ "http://www.w3.org/2001/XMLSchema#date" => lambda do |value, constraints|
183
+ date_pattern = constraints["datePattern"] || "%Y-%m-%d"
184
+ d = Date.strptime(value, date_pattern)
185
+ raise ArgumentError unless d.strftime(date_pattern) == value
186
+ d
187
+ end,
188
+ "http://www.w3.org/2001/XMLSchema#time" => lambda do |value, constraints|
189
+ date_pattern = constraints["datePattern"] || "%H:%M:%S"
190
+ d = DateTime.strptime(value, date_pattern)
191
+ raise ArgumentError unless d.strftime(date_pattern) == value
192
+ d
193
+ end,
194
+ "http://www.w3.org/2001/XMLSchema#gYear" => lambda do |value, constraints|
195
+ date_pattern = constraints["datePattern"] || "%Y"
196
+ d = Date.strptime(value, date_pattern)
197
+ raise ArgumentError unless d.strftime(date_pattern) == value
198
+ d
199
+ end,
200
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" => lambda do |value, constraints|
201
+ date_pattern = constraints["datePattern"] || "%Y-%m"
202
+ d = Date.strptime(value, date_pattern)
203
+ raise ArgumentError unless d.strftime(date_pattern) == value
204
+ d
205
+ end
206
+ }
195
207
  end
196
208
  end
@@ -1,12 +1,10 @@
1
1
  module Csvlint
2
-
3
2
  class Schema
4
-
5
3
  include Csvlint::ErrorCollector
6
4
 
7
5
  attr_reader :uri, :fields, :title, :description
8
6
 
9
- def initialize(uri, fields=[], title=nil, description=nil)
7
+ def initialize(uri, fields = [], title = nil, description = nil)
10
8
  @uri = uri
11
9
  @fields = fields
12
10
  @title = title
@@ -15,20 +13,19 @@ module Csvlint
15
13
  end
16
14
 
17
15
  class << self
18
-
19
16
  extend Gem::Deprecate
20
17
 
21
18
  def from_json_table(uri, json)
22
19
  fields = []
23
- json["fields"].each do |field_desc|
24
- fields << Csvlint::Field.new( field_desc["name"] , field_desc["constraints"],
25
- field_desc["title"], field_desc["description"] )
26
- end if json["fields"]
27
- return Schema.new( uri , fields, json["title"], json["description"] )
20
+ json["fields"]&.each do |field_desc|
21
+ fields << Csvlint::Field.new(field_desc["name"], field_desc["constraints"],
22
+ field_desc["title"], field_desc["description"])
23
+ end
24
+ Schema.new(uri, fields, json["title"], json["description"])
28
25
  end
29
26
 
30
27
  def from_csvw_metadata(uri, json)
31
- return Csvlint::Csvw::TableGroup.from_json(uri, json)
28
+ Csvlint::Csvw::TableGroup.from_json(uri, json)
32
29
  end
33
30
 
34
31
  # Deprecated method signature
@@ -44,64 +41,59 @@ module Csvlint
44
41
  end
45
42
 
46
43
  def load_from_string(uri, string, output_errors = true)
47
- begin
48
- json = JSON.parse( string )
49
- if json["@context"]
50
- uri = "file:#{File.expand_path(uri)}" unless uri.to_s =~ /^http(s)?/
51
- return Schema.from_csvw_metadata(uri,json)
52
- else
53
- return Schema.from_json_table(uri,json)
54
- end
55
- rescue TypeError => e
56
- # NO IDEA what this was even trying to do - SP 20160526
57
-
58
- rescue Csvlint::Csvw::MetadataError => e
59
- raise e
60
- rescue => e
61
- if output_errors === true
62
- STDERR.puts e.class
63
- STDERR.puts e.message
64
- STDERR.puts e.backtrace
65
- end
66
- return Schema.new(nil, [], "malformed", "malformed")
44
+ json = JSON.parse(string)
45
+ if json["@context"]
46
+ uri = "file:#{File.expand_path(uri)}" unless /^http(s)?/.match?(uri.to_s)
47
+ Schema.from_csvw_metadata(uri, json)
48
+ else
49
+ Schema.from_json_table(uri, json)
67
50
  end
51
+ rescue TypeError => e
52
+ # NO IDEA what this was even trying to do - SP 20160526
53
+ rescue Csvlint::Csvw::MetadataError => e
54
+ raise e
55
+ rescue => e
56
+ if output_errors === true
57
+ warn e.class
58
+ warn e.message
59
+ warn e.backtrace
60
+ end
61
+ Schema.new(nil, [], "malformed", "malformed")
68
62
  end
69
-
70
63
  end
71
64
 
72
- def validate_header(header, source_url=nil, validate=true)
65
+ def validate_header(header, source_url = nil, validate = true)
73
66
  reset
74
67
 
75
- found_header = header.to_csv(:row_sep => '')
76
- expected_header = @fields.map{ |f| f.name }.to_csv(:row_sep => '')
68
+ found_header = header.to_csv(row_sep: "")
69
+ expected_header = @fields.map { |f| f.name }.to_csv(row_sep: "")
77
70
  if found_header != expected_header
78
71
  build_warnings(:malformed_header, :schema, 1, nil, found_header, "expectedHeader" => expected_header)
79
72
  end
80
- return valid?
73
+ valid?
81
74
  end
82
75
 
83
- def validate_row(values, row=nil, all_errors=[], source_url=nil, validate=true)
76
+ def validate_row(values, row = nil, all_errors = [], source_url = nil, validate = true)
84
77
  reset
85
78
  if values.length < fields.length
86
79
  fields[values.size..-1].each_with_index do |field, i|
87
- build_warnings(:missing_column, :schema, row, values.size+i+1)
80
+ build_warnings(:missing_column, :schema, row, values.size + i + 1)
88
81
  end
89
82
  end
90
83
  if values.length > fields.length
91
84
  values[fields.size..-1].each_with_index do |data_column, i|
92
- build_warnings(:extra_column, :schema, row, fields.size+i+1)
85
+ build_warnings(:extra_column, :schema, row, fields.size + i + 1)
93
86
  end
94
87
  end
95
88
 
96
- fields.each_with_index do |field,i|
89
+ fields.each_with_index do |field, i|
97
90
  value = values[i] || ""
98
- result = field.validate_column(value, row, i+1, all_errors)
91
+ result = field.validate_column(value, row, i + 1, all_errors)
99
92
  @errors += fields[i].errors
100
93
  @warnings += fields[i].warnings
101
94
  end
102
95
 
103
- return valid?
96
+ valid?
104
97
  end
105
-
106
98
  end
107
99
  end