csvlint 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +4 -0
  3. data/.github/workflows/push.yml +14 -2
  4. data/.pre-commit-hooks.yaml +5 -0
  5. data/.ruby-version +1 -1
  6. data/.standard_todo.yml +43 -0
  7. data/CHANGELOG.md +84 -32
  8. data/Dockerfile +16 -0
  9. data/Gemfile +2 -2
  10. data/README.md +30 -9
  11. data/Rakefile +7 -7
  12. data/csvlint.gemspec +14 -16
  13. data/docker_notes_for_windows.txt +20 -0
  14. data/features/step_definitions/cli_steps.rb +11 -11
  15. data/features/step_definitions/information_steps.rb +4 -4
  16. data/features/step_definitions/parse_csv_steps.rb +11 -11
  17. data/features/step_definitions/schema_validation_steps.rb +10 -10
  18. data/features/step_definitions/sources_steps.rb +1 -1
  19. data/features/step_definitions/validation_errors_steps.rb +19 -19
  20. data/features/step_definitions/validation_info_steps.rb +9 -9
  21. data/features/step_definitions/validation_warnings_steps.rb +11 -11
  22. data/features/support/aruba.rb +6 -6
  23. data/features/support/earl_formatter.rb +39 -39
  24. data/features/support/env.rb +10 -11
  25. data/features/support/load_tests.rb +107 -103
  26. data/features/support/webmock.rb +2 -2
  27. data/lib/csvlint/cli.rb +133 -130
  28. data/lib/csvlint/csvw/column.rb +279 -280
  29. data/lib/csvlint/csvw/date_format.rb +90 -92
  30. data/lib/csvlint/csvw/metadata_error.rb +1 -3
  31. data/lib/csvlint/csvw/number_format.rb +40 -32
  32. data/lib/csvlint/csvw/property_checker.rb +714 -717
  33. data/lib/csvlint/csvw/table.rb +49 -52
  34. data/lib/csvlint/csvw/table_group.rb +24 -23
  35. data/lib/csvlint/error_collector.rb +2 -0
  36. data/lib/csvlint/error_message.rb +0 -1
  37. data/lib/csvlint/field.rb +153 -141
  38. data/lib/csvlint/schema.rb +34 -42
  39. data/lib/csvlint/validate.rb +161 -143
  40. data/lib/csvlint/version.rb +1 -1
  41. data/lib/csvlint.rb +22 -23
  42. data/spec/csvw/column_spec.rb +15 -16
  43. data/spec/csvw/date_format_spec.rb +5 -7
  44. data/spec/csvw/number_format_spec.rb +2 -4
  45. data/spec/csvw/table_group_spec.rb +103 -105
  46. data/spec/csvw/table_spec.rb +71 -73
  47. data/spec/field_spec.rb +116 -121
  48. data/spec/schema_spec.rb +129 -139
  49. data/spec/spec_helper.rb +6 -6
  50. data/spec/validator_spec.rb +167 -190
  51. metadata +23 -55
@@ -1,7 +1,6 @@
1
1
  module Csvlint
2
2
  module Csvw
3
3
  class Table
4
-
5
4
  include Csvlint::ErrorCollector
6
5
 
7
6
  attr_reader :columns, :dialect, :table_direction, :foreign_keys, :foreign_key_references, :id, :notes, :primary_key, :row_title_columns, :schema, :suppress_output, :transformations, :url, :annotations
@@ -26,13 +25,13 @@ module Csvlint
26
25
  @annotations = annotations
27
26
  reset
28
27
  @warnings += warnings
29
- @errors += columns.map{|c| c.errors}.flatten
30
- @warnings += columns.map{|c| c.warnings}.flatten
28
+ @errors += columns.map { |c| c.errors }.flatten
29
+ @warnings += columns.map { |c| c.warnings }.flatten
31
30
  end
32
31
 
33
32
  def validate_header(headers, strict)
34
33
  reset
35
- headers.each_with_index do |header,i|
34
+ headers.each_with_index do |header, i|
36
35
  if columns[i]
37
36
  columns[i].validate_header(header, strict)
38
37
  @errors += columns[i].errors
@@ -43,26 +42,28 @@ module Csvlint
43
42
  build_warnings(:malformed_header, :schema, 1, nil, header, nil)
44
43
  end
45
44
  end # unless columns.empty?
46
- return valid?
45
+ valid?
47
46
  end
48
47
 
49
- def validate_row(values, row=nil, validate=false)
48
+ def validate_row(values, row = nil, validate = false)
50
49
  reset
51
- values.each_with_index do |value,i|
52
- column = columns[i]
53
- if column
54
- v = column.validate(value, row)
55
- values[i] = v
56
- @errors += column.errors
57
- @warnings += column.warnings
58
- else
59
- build_errors(:too_many_values, :schema, row, nil, value, nil)
50
+ unless columns.empty?
51
+ values.each_with_index do |value, i|
52
+ column = columns[i]
53
+ if column
54
+ v = column.validate(value, row)
55
+ values[i] = v
56
+ @errors += column.errors
57
+ @warnings += column.warnings
58
+ else
59
+ build_errors(:too_many_values, :schema, row, nil, value, nil)
60
+ end
60
61
  end
61
- end unless columns.empty?
62
+ end
62
63
  if validate
63
64
  unless @primary_key.nil?
64
65
  key = @primary_key.map { |column| column.validate(values[column.number - 1], row) }
65
- colnum = if primary_key.length == 1 then primary_key[0].number else nil end
66
+ colnum = (primary_key.length == 1) ? primary_key[0].number : nil
66
67
  build_errors(:duplicate_key, :schema, row, colnum, key.join(","), @primary_key_values[key]) if @primary_key_values.include?(key)
67
68
  @primary_key_values[key] = row
68
69
  end
@@ -70,7 +71,7 @@ module Csvlint
70
71
  # so that later we can check whether those foreign keys reference these values
71
72
  @foreign_key_references.each do |foreign_key|
72
73
  referenced_columns = foreign_key["referenced_columns"]
73
- key = referenced_columns.map{ |column| column.validate(values[column.number - 1], row) }
74
+ key = referenced_columns.map { |column| column.validate(values[column.number - 1], row) }
74
75
  known_values = @foreign_key_reference_values[foreign_key] = @foreign_key_reference_values[foreign_key] || {}
75
76
  known_values[key] = known_values[key] || []
76
77
  known_values[key] << row
@@ -80,12 +81,12 @@ module Csvlint
80
81
  # we might not have parsed those other tables
81
82
  @foreign_keys.each do |foreign_key|
82
83
  referencing_columns = foreign_key["referencing_columns"]
83
- key = referencing_columns.map{ |column| column.validate(values[column.number - 1], row) }
84
+ key = referencing_columns.map { |column| column.validate(values[column.number - 1], row) }
84
85
  known_values = @foreign_key_values[foreign_key] = @foreign_key_values[foreign_key] || []
85
86
  known_values << key unless known_values.include?(key)
86
87
  end
87
88
  end
88
- return valid?
89
+ valid?
89
90
  end
90
91
 
91
92
  def validate_foreign_keys
@@ -97,43 +98,43 @@ module Csvlint
97
98
  @errors += remote_table.errors unless remote_table == self
98
99
  @warnings += remote_table.warnings unless remote_table == self
99
100
  end
100
- return valid?
101
+ valid?
101
102
  end
102
103
 
103
104
  def validate_foreign_key_references(foreign_key, remote_url, remote)
104
105
  reset
105
106
  local = @foreign_key_reference_values[foreign_key]
106
- context = { "from" => { "url" => remote_url.to_s.split("/")[-1], "columns" => foreign_key["columnReference"] }, "to" => { "url" => @url.to_s.split("/")[-1], "columns" => foreign_key["reference"]["columnReference"] }}
107
- colnum = if foreign_key["referencing_columns"].length == 1 then foreign_key["referencing_columns"][0].number else nil end
108
- remote.each_with_index do |r,i|
107
+ context = {"from" => {"url" => remote_url.to_s.split("/")[-1], "columns" => foreign_key["columnReference"]}, "to" => {"url" => @url.to_s.split("/")[-1], "columns" => foreign_key["reference"]["columnReference"]}}
108
+ colnum = (foreign_key["referencing_columns"].length == 1) ? foreign_key["referencing_columns"][0].number : nil
109
+ remote.each_with_index do |r, i|
109
110
  if local[r]
110
- build_errors(:multiple_matched_rows, :schema, i+1, colnum, r, context) if local[r].length > 1
111
+ build_errors(:multiple_matched_rows, :schema, i + 1, colnum, r, context) if local[r].length > 1
111
112
  else
112
- build_errors(:unmatched_foreign_key_reference, :schema, i+1, colnum, r, context)
113
+ build_errors(:unmatched_foreign_key_reference, :schema, i + 1, colnum, r, context)
113
114
  end
114
115
  end
115
- return valid?
116
+ valid?
116
117
  end
117
118
 
118
- def self.from_json(table_desc, base_url=nil, lang="und", common_properties={}, inherited_properties={})
119
+ def self.from_json(table_desc, base_url = nil, lang = "und", common_properties = {}, inherited_properties = {})
119
120
  annotations = {}
120
121
  warnings = []
121
122
  columns = []
122
123
  table_properties = common_properties.clone
123
124
  inherited_properties = inherited_properties.clone
124
125
 
125
- table_desc.each do |property,value|
126
- if property =="@type"
127
- raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].@type"), "@type of table is not 'Table'" unless value == 'Table'
126
+ table_desc.each do |property, value|
127
+ if property == "@type"
128
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].@type"), "@type of table is not 'Table'" unless value == "Table"
128
129
  else
129
130
  v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
130
- warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
131
+ warnings += Array(warning).map { |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
131
132
  if type == :annotation
132
133
  annotations[property] = v
133
134
  elsif type == :table || type == :common
134
135
  table_properties[property] = v
135
136
  elsif type == :column
136
- warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
137
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, property.to_s, nil)
137
138
  else
138
139
  inherited_properties[property] = v
139
140
  end
@@ -143,37 +144,35 @@ module Csvlint
143
144
  table_schema = table_properties["tableSchema"] || inherited_properties["tableSchema"]
144
145
  column_names = []
145
146
  foreign_keys = []
146
- primary_key = nil
147
147
  if table_schema
148
148
  unless table_schema["columns"].instance_of? Array
149
149
  table_schema["columns"] = []
150
150
  warnings << Csvlint::ErrorMessage.new(:invalid_value, :metadata, nil, nil, "columns", nil)
151
151
  end
152
152
 
153
- table_schema.each do |p,v|
153
+ table_schema.each do |p, v|
154
154
  unless ["columns", "primaryKey", "foreignKeys", "rowTitles"].include? p
155
155
  inherited_properties[p] = v
156
156
  end
157
157
  end
158
158
 
159
159
  virtual_columns = false
160
- table_schema["columns"].each_with_index do |column_desc,i|
160
+ table_schema["columns"].each_with_index do |column_desc, i|
161
161
  if column_desc.instance_of? Hash
162
- column = Csvlint::Csvw::Column.from_json(i+1, column_desc, base_url, lang, inherited_properties)
162
+ column = Csvlint::Csvw::Column.from_json(i + 1, column_desc, base_url, lang, inherited_properties)
163
163
  raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns[#{i}].virtual"), "virtual columns before non-virtual column #{column.name || i}" if virtual_columns && !column.virtual
164
- virtual_columns = virtual_columns || column.virtual
164
+ virtual_columns ||= column.virtual
165
165
  raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns"), "multiple columns named #{column.name}" if column_names.include? column.name
166
166
  column_names << column.name unless column.name.nil?
167
167
  columns << column
168
168
  else
169
- warnings << Csvlint::ErrorMessage.new(:invalid_column_description, :metadata, nil, nil, "#{column_desc}", nil)
169
+ warnings << Csvlint::ErrorMessage.new(:invalid_column_description, :metadata, nil, nil, column_desc.to_s, nil)
170
170
  end
171
171
  end
172
172
 
173
- primary_key = table_schema["primaryKey"]
174
173
  primary_key_columns = []
175
174
  primary_key_valid = true
176
- primary_key.each do |reference|
175
+ table_schema["primaryKey"]&.each do |reference|
177
176
  i = column_names.index(reference)
178
177
  if i
179
178
  primary_key_columns << columns[i]
@@ -181,10 +180,10 @@ module Csvlint
181
180
  warnings << Csvlint::ErrorMessage.new(:invalid_column_reference, :metadata, nil, nil, "primaryKey: #{reference}", nil)
182
181
  primary_key_valid = false
183
182
  end
184
- end if primary_key
183
+ end
185
184
 
186
185
  foreign_keys = table_schema["foreignKeys"]
187
- foreign_keys.each_with_index do |foreign_key, i|
186
+ foreign_keys&.each_with_index do |foreign_key, i|
188
187
  foreign_key_columns = []
189
188
  foreign_key["columnReference"].each do |reference|
190
189
  i = column_names.index(reference)
@@ -192,33 +191,31 @@ module Csvlint
192
191
  foreign_key_columns << columns[i]
193
192
  end
194
193
  foreign_key["referencing_columns"] = foreign_key_columns
195
- end if foreign_keys
194
+ end
196
195
 
197
196
  row_titles = table_schema["rowTitles"]
198
197
  row_title_columns = []
199
- row_titles.each_with_index do |row_title|
198
+ row_titles&.each do |row_title|
200
199
  i = column_names.index(row_title)
201
200
  raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.rowTitles[#{i}]"), "rowTitles references non-existant column" unless i
202
201
  row_title_columns << columns[i]
203
- end if row_titles
202
+ end
204
203
 
205
204
  end
206
205
 
207
- return self.new(table_properties["url"],
206
+ new(table_properties["url"],
208
207
  id: table_properties["@id"],
209
208
  columns: columns,
210
209
  dialect: table_properties["dialect"],
211
210
  foreign_keys: foreign_keys || [],
212
211
  notes: table_properties["notes"] || [],
213
- primary_key: primary_key_valid && !primary_key_columns.empty? ? primary_key_columns : nil,
212
+ primary_key: (primary_key_valid && !primary_key_columns.empty?) ? primary_key_columns : nil,
214
213
  row_title_columns: row_title_columns,
215
214
  schema: table_schema ? table_schema["@id"] : nil,
216
- suppress_output: table_properties["suppressOutput"] ? table_properties["suppressOutput"] : false,
215
+ suppress_output: table_properties["suppressOutput"] || false,
217
216
  annotations: annotations,
218
- warnings: warnings
219
- )
217
+ warnings: warnings)
220
218
  end
221
-
222
219
  end
223
220
  end
224
221
  end
@@ -1,7 +1,6 @@
1
1
  module Csvlint
2
2
  module Csvw
3
3
  class TableGroup
4
-
5
4
  include Csvlint::ErrorCollector
6
5
 
7
6
  attr_reader :url, :id, :tables, :notes, :annotations
@@ -13,11 +12,11 @@ module Csvlint
13
12
  @notes = notes
14
13
  @annotations = annotations
15
14
  @validated_tables = {}
16
- @tables.each { |t,v| @validated_tables[t] = false }
15
+ @tables.each { |t, v| @validated_tables[t] = false }
17
16
  reset
18
17
  @warnings += warnings
19
- @errors += @tables.map{|url,table| table.errors}.flatten
20
- @warnings += @tables.map{|url,table| table.warnings}.flatten
18
+ @errors += @tables.map { |url, table| table.errors }.flatten
19
+ @warnings += @tables.map { |url, table| table.warnings }.flatten
21
20
  end
22
21
 
23
22
  def validate_header(header, table_url, strict)
@@ -27,10 +26,10 @@ module Csvlint
27
26
  table.validate_header(header, strict)
28
27
  @errors += table.errors
29
28
  @warnings += table.warnings
30
- return valid?
29
+ valid?
31
30
  end
32
31
 
33
- def validate_row(values, row=nil, all_errors=[], table_url, validate)
32
+ def validate_row(values, row = nil, all_errors = [], table_url, validate)
34
33
  reset
35
34
  table_url = "file:#{File.absolute_path(table_url)}" if table_url.instance_of? File
36
35
  @validated_tables[table_url] = true
@@ -38,19 +37,19 @@ module Csvlint
38
37
  table.validate_row(values, row, validate)
39
38
  @errors += table.errors
40
39
  @warnings += table.warnings
41
- return valid?
40
+ valid?
42
41
  end
43
42
 
44
43
  def validate_foreign_keys
45
44
  reset
46
45
  unless @validated_tables.has_value?(false)
47
- @tables.each do |table_url,table|
46
+ @tables.each do |table_url, table|
48
47
  table.validate_foreign_keys
49
48
  @errors += table.errors
50
49
  @warnings += table.warnings
51
50
  end
52
51
  end
53
- return valid?
52
+ valid?
54
53
  end
55
54
 
56
55
  def self.from_json(url, json)
@@ -64,7 +63,7 @@ module Csvlint
64
63
 
65
64
  context = json["@context"]
66
65
  if context.instance_of?(Array) && context[1]
67
- context[1].each do |property,value|
66
+ context[1].each do |property, value|
68
67
  v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
69
68
  if warning.nil? || warning.empty?
70
69
  if type == :context
@@ -75,20 +74,22 @@ module Csvlint
75
74
  end
76
75
  else
77
76
  raise Csvlint::Csvw::MetadataError.new("$.@context"), "@context contains properties other than @base or @language (#{property})" unless ["@base", "@language"].include?(property)
78
- warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "@context: #{property}: #{value}", nil) }
77
+ warnings += Array(warning).map { |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "@context: #{property}: #{value}", nil) }
79
78
  end
80
79
  end
81
80
  end
82
81
  json.delete("@context")
83
82
 
84
- if json["url"]
85
- json = { "tables" => [ json ] }
86
- end unless json["tables"]
83
+ unless json["tables"]
84
+ if json["url"]
85
+ json = {"tables" => [json]}
86
+ end
87
+ end
87
88
 
88
- json.each do |property,value|
89
+ json.each do |property, value|
89
90
  unless VALID_PROPERTIES.include? property
90
91
  v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
91
- warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
92
+ warnings += Array(warning).map { |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
92
93
  if type == :annotation
93
94
  annotations[property] = v
94
95
  elsif type == :common
@@ -96,14 +97,14 @@ module Csvlint
96
97
  elsif type == :inherited
97
98
  inherited_properties[property] = v
98
99
  else
99
- warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
100
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, property.to_s, nil)
100
101
  end
101
102
  end
102
103
  end
103
104
 
104
105
  id = common_properties["@id"]
105
106
 
106
- raise Csvlint::Csvw::MetadataError.new("$.@type"), "@type of table group is not 'TableGroup'" if json["@type"] && json["@type"] != 'TableGroup'
107
+ raise Csvlint::Csvw::MetadataError.new("$.@type"), "@type of table group is not 'TableGroup'" if json["@type"] && json["@type"] != "TableGroup"
107
108
 
108
109
  raise Csvlint::Csvw::MetadataError.new("$"), "no tables property" unless json["tables"]
109
110
  raise Csvlint::Csvw::MetadataError.new("$.tables"), "empty tables property" if json["tables"].empty?
@@ -121,12 +122,12 @@ module Csvlint
121
122
  table = Csvlint::Csvw::Table.from_json(table_desc, base_url, lang, common_properties, inherited_properties)
122
123
  tables[table_url] = table
123
124
  else
124
- warnings << Csvlint::ErrorMessage.new(:invalid_table_description, :metadata, nil, nil, "#{table_desc}", nil)
125
+ warnings << Csvlint::ErrorMessage.new(:invalid_table_description, :metadata, nil, nil, table_desc.to_s, nil)
125
126
  end
126
127
  end
127
128
 
128
129
  tables.each do |table_url, table|
129
- table.foreign_keys.each_with_index do |foreign_key,i|
130
+ table.foreign_keys.each_with_index do |foreign_key, i|
130
131
  reference = foreign_key["reference"]
131
132
  if reference["resource"]
132
133
  resource = URI.join(base_url, reference["resource"]).to_s
@@ -134,7 +135,7 @@ module Csvlint
134
135
  raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.resource"), "foreign key references table that does not exist (#{resource})" if referenced_table.nil?
135
136
  else
136
137
  schema_url = URI.join(base_url, reference["schemaReference"]).to_s
137
- referenced_tables = tables.values.select{ |table| table.schema == schema_url }
138
+ referenced_tables = tables.values.select { |table| table.schema == schema_url }
138
139
  referenced_table = referenced_tables[0]
139
140
  raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.schemaReference"), "foreign key references schema that is not used (#{schema_url})" if referenced_table.nil?
140
141
  end
@@ -154,12 +155,12 @@ module Csvlint
154
155
  end
155
156
  end
156
157
 
157
- return self.new(base_url, id: id, tables: tables, notes: common_properties["notes"] || [], annotations: annotations, warnings: warnings)
158
+ new(base_url, id: id, tables: tables, notes: common_properties["notes"] || [], annotations: annotations, warnings: warnings)
158
159
  end
159
160
 
160
161
  private
161
- VALID_PROPERTIES = ['tables', 'notes', '@type']
162
162
 
163
+ VALID_PROPERTIES = ["tables", "notes", "@type"]
163
164
  end
164
165
  end
165
166
  end
@@ -5,10 +5,12 @@ module Csvlint
5
5
  def build_errors(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
6
6
  @errors << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
7
7
  end
8
+
8
9
  # Creates a validation warning
9
10
  def build_warnings(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
10
11
  @warnings << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
11
12
  end
13
+
12
14
  # Creates a validation information message
13
15
  def build_info_messages(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
14
16
  @info_messages << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
@@ -9,7 +9,6 @@ module Csvlint
9
9
  @column = column
10
10
  @content = content
11
11
  @constraints = constraints
12
-
13
12
  end
14
13
  end
15
14
  end