csvlint 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +4 -0
  3. data/.github/workflows/push.yml +14 -2
  4. data/.ruby-version +1 -1
  5. data/.standard_todo.yml +43 -0
  6. data/Dockerfile +16 -0
  7. data/Gemfile +2 -2
  8. data/README.md +9 -9
  9. data/Rakefile +7 -7
  10. data/csvlint.gemspec +14 -16
  11. data/docker_notes_for_windows.txt +20 -0
  12. data/features/step_definitions/cli_steps.rb +11 -11
  13. data/features/step_definitions/information_steps.rb +4 -4
  14. data/features/step_definitions/parse_csv_steps.rb +11 -11
  15. data/features/step_definitions/schema_validation_steps.rb +10 -10
  16. data/features/step_definitions/sources_steps.rb +1 -1
  17. data/features/step_definitions/validation_errors_steps.rb +19 -19
  18. data/features/step_definitions/validation_info_steps.rb +9 -9
  19. data/features/step_definitions/validation_warnings_steps.rb +11 -11
  20. data/features/support/aruba.rb +6 -6
  21. data/features/support/earl_formatter.rb +39 -39
  22. data/features/support/env.rb +10 -11
  23. data/features/support/load_tests.rb +107 -103
  24. data/features/support/webmock.rb +2 -2
  25. data/lib/csvlint/cli.rb +133 -130
  26. data/lib/csvlint/csvw/column.rb +279 -280
  27. data/lib/csvlint/csvw/date_format.rb +90 -92
  28. data/lib/csvlint/csvw/metadata_error.rb +1 -3
  29. data/lib/csvlint/csvw/number_format.rb +40 -32
  30. data/lib/csvlint/csvw/property_checker.rb +714 -717
  31. data/lib/csvlint/csvw/table.rb +49 -52
  32. data/lib/csvlint/csvw/table_group.rb +24 -23
  33. data/lib/csvlint/error_collector.rb +2 -0
  34. data/lib/csvlint/error_message.rb +0 -1
  35. data/lib/csvlint/field.rb +153 -141
  36. data/lib/csvlint/schema.rb +34 -42
  37. data/lib/csvlint/validate.rb +161 -143
  38. data/lib/csvlint/version.rb +1 -1
  39. data/lib/csvlint.rb +22 -23
  40. data/spec/csvw/column_spec.rb +15 -16
  41. data/spec/csvw/date_format_spec.rb +5 -7
  42. data/spec/csvw/number_format_spec.rb +2 -4
  43. data/spec/csvw/table_group_spec.rb +103 -105
  44. data/spec/csvw/table_spec.rb +71 -73
  45. data/spec/field_spec.rb +116 -121
  46. data/spec/schema_spec.rb +129 -139
  47. data/spec/spec_helper.rb +6 -6
  48. data/spec/validator_spec.rb +167 -190
  49. metadata +22 -55
@@ -5,7 +5,7 @@ module Csvlint
5
5
 
6
6
  attr_reader :id, :about_url, :datatype, :default, :lang, :name, :null, :number, :ordered, :property_url, :required, :separator, :source_number, :suppress_output, :text_direction, :default_name, :titles, :value_url, :virtual, :annotations
7
7
 
8
- def initialize(number, name, id: nil, about_url: nil, datatype: { "@id" => "http://www.w3.org/2001/XMLSchema#string" }, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, default_name: nil, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: [])
8
+ def initialize(number, name, id: nil, about_url: nil, datatype: {"@id" => "http://www.w3.org/2001/XMLSchema#string"}, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, default_name: nil, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: [])
9
9
  @number = number
10
10
  @name = name
11
11
  @id = id
@@ -30,18 +30,18 @@ module Csvlint
30
30
  @warnings += warnings
31
31
  end
32
32
 
33
- def self.from_json(number, column_desc, base_url=nil, lang="und", inherited_properties={})
33
+ def self.from_json(number, column_desc, base_url = nil, lang = "und", inherited_properties = {})
34
34
  annotations = {}
35
35
  warnings = []
36
36
  column_properties = {}
37
37
  inherited_properties = inherited_properties.clone
38
38
 
39
- column_desc.each do |property,value|
39
+ column_desc.each do |property, value|
40
40
  if property == "@type"
41
- raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != 'Column'
41
+ raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != "Column"
42
42
  else
43
43
  v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
44
- warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
44
+ warnings += Array(warning).map { |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
45
45
  if type == :annotation
46
46
  annotations[property] = v
47
47
  elsif type == :common || type == :column
@@ -54,9 +54,9 @@ module Csvlint
54
54
  end
55
55
  end
56
56
 
57
- return self.new(number, column_properties["name"],
57
+ new(number, column_properties["name"],
58
58
  id: column_properties["@id"],
59
- datatype: inherited_properties["datatype"] || { "@id" => "http://www.w3.org/2001/XMLSchema#string" },
59
+ datatype: inherited_properties["datatype"] || {"@id" => "http://www.w3.org/2001/XMLSchema#string"},
60
60
  lang: inherited_properties["lang"] || "und",
61
61
  null: inherited_properties["null"] || [""],
62
62
  default: inherited_properties["default"] || "",
@@ -68,35 +68,34 @@ module Csvlint
68
68
  ordered: inherited_properties["ordered"] || false,
69
69
  default_name: column_properties["titles"] && column_properties["titles"][lang] ? column_properties["titles"][lang][0] : nil,
70
70
  titles: column_properties["titles"],
71
- suppress_output: column_properties["suppressOutput"] ? column_properties["suppressOutput"] : false,
71
+ suppress_output: column_properties["suppressOutput"] || false,
72
72
  virtual: column_properties["virtual"] || false,
73
73
  annotations: annotations,
74
- warnings: warnings
75
- )
74
+ warnings: warnings)
76
75
  end
77
76
 
78
77
  def validate_header(header, strict)
79
78
  reset
80
79
  if strict || @titles
81
- valid_headers = @titles ? @titles.map{ |l,v| v if Column.languages_match(l, lang) }.flatten : []
80
+ valid_headers = @titles ? @titles.map { |l, v| v if Column.languages_match(l, lang) }.flatten : []
82
81
  unless valid_headers.include? header
83
82
  if strict
84
- build_errors(:invalid_header, :schema, 1, @number, header, @titles)
83
+ build_errors(:invalid_header, :schema, 1, @number, header, @titles)
85
84
  else
86
85
  build_warnings(:invalid_header, :schema, 1, @number, header, @titles)
87
86
  end
88
87
  end
89
88
  end
90
- return valid?
89
+ valid?
91
90
  end
92
91
 
93
- def validate(string_value, row=nil)
92
+ def validate(string_value, row = nil)
94
93
  reset
95
- string_value = string_value || @default
94
+ string_value ||= @default
96
95
  if null.include? string_value
97
96
  validate_required(nil, row)
98
- values = nil
99
- return values
97
+ nil
98
+
100
99
  else
101
100
  string_values = @separator.nil? ? [string_value] : string_value.split(@separator)
102
101
  values = []
@@ -108,300 +107,300 @@ module Csvlint
108
107
  invalid = !validate_format(value, row) || invalid
109
108
  invalid = !validate_length(value, row) || invalid
110
109
  invalid = !validate_value(value, row) || invalid
111
- values << (invalid ? { :invalid => s } : value)
110
+ values << (invalid ? {invalid: s} : value)
112
111
  else
113
112
  build_errors(warning, :schema, row, @number, s, @datatype)
114
- values << { :invalid => s }
113
+ values << {invalid: s}
115
114
  end
116
115
  end
117
- values = (values && @separator.nil?) ? values[0] : values
118
- return values
116
+ values && @separator.nil? ? values[0] : values
117
+
119
118
  end
120
119
  end
121
120
 
122
121
  private
123
- class << self
124
122
 
125
- def create_date_parser(type, warning)
126
- return lambda { |value, format|
127
- format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil?
128
- v = format.parse(value)
129
- return nil, warning if v.nil?
130
- return v, nil
131
- }
132
- end
133
-
134
- def create_regexp_based_parser(regexp, warning)
135
- return lambda { |value, format|
136
- return nil, warning unless value =~ regexp
137
- return value, nil
138
- }
139
- end
140
-
141
- def languages_match(l1, l2)
142
- return true if l1 == l2 || l1 == "und" || l2 == "und"
143
- return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-")
144
- return false
145
- end
123
+ class << self
124
+ def create_date_parser(type, warning)
125
+ lambda { |value, format|
126
+ format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil?
127
+ v = format.parse(value)
128
+ return nil, warning if v.nil?
129
+ return v, nil
130
+ }
146
131
  end
147
132
 
148
- def validate_required(value, row)
149
- if @required && value.nil?
150
- build_errors(:required, :schema, row, number, value, { "required" => @required })
151
- return false
152
- end
153
- return true
133
+ def create_regexp_based_parser(regexp, warning)
134
+ lambda { |value, format|
135
+ return nil, warning unless value&.match?(regexp)
136
+ return value, nil
137
+ }
154
138
  end
155
139
 
156
- def validate_length(value, row)
157
- valid = true
158
- if datatype["length"] || datatype["minLength"] || datatype["maxLength"]
159
- length = value.length
160
- length = value.gsub(/==?$/,"").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary"
161
- length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary"
162
-
163
- if datatype["minLength"] && length < datatype["minLength"]
164
- build_errors(:min_length, :schema, row, number, value, { "minLength" => datatype["minLength"] })
165
- valid = false
166
- end
167
- if datatype["maxLength"] && length > datatype["maxLength"]
168
- build_errors(:max_length, :schema, row, number, value, { "maxLength" => datatype["maxLength"] })
169
- valid = false
170
- end
171
- if datatype["length"] && length != datatype["length"]
172
- build_errors(:length, :schema, row, number, value, { "length" => datatype["length"] })
173
- valid = false
174
- end
175
- end
176
- return valid
140
+ def languages_match(l1, l2)
141
+ return true if l1 == l2 || l1 == "und" || l2 == "und"
142
+ return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-")
143
+ false
177
144
  end
145
+ end
178
146
 
179
- def validate_format(value, row)
180
- if datatype["format"]
181
- unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"])
182
- build_errors(:format, :schema, row, number, value, { "format" => datatype["format"] })
183
- return false
184
- end
185
- end
186
- return true
147
+ def validate_required(value, row)
148
+ if @required && value.nil?
149
+ build_errors(:required, :schema, row, number, value, {"required" => @required})
150
+ return false
187
151
  end
152
+ true
153
+ end
154
+
155
+ def validate_length(value, row)
156
+ valid = true
157
+ if datatype["length"] || datatype["minLength"] || datatype["maxLength"]
158
+ length = value.length
159
+ length = value.gsub(/==?$/, "").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary"
160
+ length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary"
188
161
 
189
- def validate_value(value, row)
190
- valid = true
191
- if datatype["minInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] < datatype["minInclusive"][:dateTime]) : (value < datatype["minInclusive"]))
192
- build_errors(:min_inclusive, :schema, row, number, value, { "minInclusive" => datatype["minInclusive"] })
162
+ if datatype["minLength"] && length < datatype["minLength"]
163
+ build_errors(:min_length, :schema, row, number, value, {"minLength" => datatype["minLength"]})
193
164
  valid = false
194
165
  end
195
- if datatype["maxInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] > datatype["maxInclusive"][:dateTime]) : (value > datatype["maxInclusive"]))
196
- build_errors(:max_inclusive, :schema, row, number, value, { "maxInclusive" => datatype["maxInclusive"] })
166
+ if datatype["maxLength"] && length > datatype["maxLength"]
167
+ build_errors(:max_length, :schema, row, number, value, {"maxLength" => datatype["maxLength"]})
197
168
  valid = false
198
169
  end
199
- if datatype["minExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] <= datatype["minExclusive"][:dateTime]) : (value <= datatype["minExclusive"]))
200
- build_errors(:min_exclusive, :schema, row, number, value, { "minExclusive" => datatype["minExclusive"] })
170
+ if datatype["length"] && length != datatype["length"]
171
+ build_errors(:length, :schema, row, number, value, {"length" => datatype["length"]})
201
172
  valid = false
202
173
  end
203
- if datatype["maxExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] >= datatype["maxExclusive"][:dateTime]) : (value >= datatype["maxExclusive"]))
204
- build_errors(:max_exclusive, :schema, row, number, value, { "maxExclusive" => datatype["maxExclusive"] })
205
- valid = false
174
+ end
175
+ valid
176
+ end
177
+
178
+ def validate_format(value, row)
179
+ if datatype["format"]
180
+ unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"])
181
+ build_errors(:format, :schema, row, number, value, {"format" => datatype["format"]})
182
+ return false
206
183
  end
207
- return valid
208
184
  end
185
+ true
186
+ end
187
+
188
+ def validate_value(value, row)
189
+ valid = true
190
+ if datatype["minInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] < datatype["minInclusive"][:dateTime]) : (value < datatype["minInclusive"]))
191
+ build_errors(:min_inclusive, :schema, row, number, value, {"minInclusive" => datatype["minInclusive"]})
192
+ valid = false
193
+ end
194
+ if datatype["maxInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] > datatype["maxInclusive"][:dateTime]) : (value > datatype["maxInclusive"]))
195
+ build_errors(:max_inclusive, :schema, row, number, value, {"maxInclusive" => datatype["maxInclusive"]})
196
+ valid = false
197
+ end
198
+ if datatype["minExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] <= datatype["minExclusive"][:dateTime]) : (value <= datatype["minExclusive"]))
199
+ build_errors(:min_exclusive, :schema, row, number, value, {"minExclusive" => datatype["minExclusive"]})
200
+ valid = false
201
+ end
202
+ if datatype["maxExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] >= datatype["maxExclusive"][:dateTime]) : (value >= datatype["maxExclusive"]))
203
+ build_errors(:max_exclusive, :schema, row, number, value, {"maxExclusive" => datatype["maxExclusive"]})
204
+ valid = false
205
+ end
206
+ valid
207
+ end
209
208
 
210
- REGEXP_VALIDATION = lambda { |value, format| value =~ format }
209
+ REGEXP_VALIDATION = lambda { |value, format| value =~ format }
211
210
 
212
- NO_ADDITIONAL_VALIDATION = lambda { |value, format| true }
211
+ NO_ADDITIONAL_VALIDATION = lambda { |value, format| true }
213
212
 
214
- DATATYPE_FORMAT_VALIDATION = {
215
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION,
216
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION,
217
- "http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION,
218
- "http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION,
219
- "http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION,
220
- "http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION,
221
- "http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION,
222
- "http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION,
223
- "http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION,
224
- "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION,
225
- "http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION,
226
- "http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION,
227
- "http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION,
228
- "http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION,
229
- "http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION,
230
- "http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION,
231
- "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION,
232
- "http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION,
233
- "http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION,
234
- "http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION,
235
- "http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION,
236
- "http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION,
237
- "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION,
238
- "http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION,
239
- "http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION,
240
- "http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION,
241
- "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION,
242
- "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION,
243
- "http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION,
244
- "http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION,
245
- "http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION,
246
- "http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION,
247
- "http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION,
248
- "http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION,
249
- "http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION,
250
- "http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION,
251
- "http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION,
252
- "http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION,
253
- "http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION,
254
- "http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION,
255
- "http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION,
256
- "http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION,
257
- "http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION
258
- }
213
+ DATATYPE_FORMAT_VALIDATION = {
214
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION,
215
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION,
216
+ "http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION,
217
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION,
218
+ "http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION,
219
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION,
220
+ "http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION,
221
+ "http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION,
222
+ "http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION,
223
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION,
224
+ "http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION,
225
+ "http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION,
226
+ "http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION,
227
+ "http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION,
228
+ "http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION,
229
+ "http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION,
230
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION,
231
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION,
232
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION,
233
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION,
234
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION,
235
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION,
236
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION,
237
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION,
238
+ "http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION,
239
+ "http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION,
240
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION,
241
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION,
242
+ "http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION,
243
+ "http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION,
244
+ "http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION,
245
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION,
246
+ "http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION,
247
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION,
248
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION,
249
+ "http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION,
250
+ "http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION,
251
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION,
252
+ "http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION,
253
+ "http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION,
254
+ "http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION,
255
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION,
256
+ "http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION
257
+ }
259
258
 
260
- TRIM_VALUE = lambda { |value, format| return value.strip, nil }
261
- ALL_VALUES_VALID = lambda { |value, format| return value, nil }
259
+ TRIM_VALUE = lambda { |value, format| return value.strip, nil }
260
+ ALL_VALUES_VALID = lambda { |value, format| return value, nil }
262
261
 
263
- NUMERIC_PARSER = lambda { |value, format, integer=false|
264
- format = Csvlint::Csvw::NumberFormat.new(nil, nil, ".", integer) if format.nil?
265
- v = format.parse(value)
266
- return nil, :invalid_number if v.nil?
267
- return v, nil
268
- }
262
+ NUMERIC_PARSER = lambda { |value, format, integer = false|
263
+ format = Csvlint::Csvw::NumberFormat.new(nil, nil, ".", integer) if format.nil?
264
+ v = format.parse(value)
265
+ return nil, :invalid_number if v.nil?
266
+ return v, nil
267
+ }
269
268
 
270
- DATATYPE_PARSER = {
271
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => TRIM_VALUE,
272
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => TRIM_VALUE,
273
- "http://www.w3.org/ns/csvw#JSON" => TRIM_VALUE,
274
- "http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID,
275
- "http://www.w3.org/2001/XMLSchema#anyURI" => TRIM_VALUE,
276
- "http://www.w3.org/2001/XMLSchema#base64Binary" => TRIM_VALUE,
277
- "http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format|
278
- if format.nil?
279
- return true, nil if ["true", "1"].include? value
280
- return false, nil if ["false", "0"].include? value
281
- else
282
- return true, nil if value == format[0]
283
- return false, nil if value == format[1]
284
- end
285
- return value, :invalid_boolean
286
- },
287
- "http://www.w3.org/2001/XMLSchema#date" =>
288
- create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date),
289
- "http://www.w3.org/2001/XMLSchema#dateTime" =>
290
- create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time),
291
- "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
292
- create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp),
293
- "http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format|
294
- return nil, :invalid_decimal if value =~ /(E|e|^(NaN|INF|-INF)$)/
295
- return NUMERIC_PARSER.call(value, format)
296
- },
297
- "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format|
298
- v, w = NUMERIC_PARSER.call(value, format, true)
299
- return v, :invalid_integer unless w.nil?
300
- return nil, :invalid_integer unless v.kind_of? Integer
301
- return v, w
302
- },
303
- "http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format|
304
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
305
- return v, :invalid_long unless w.nil?
306
- return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808
307
- return v, w
308
- },
309
- "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format|
310
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
311
- return v, :invalid_int unless w.nil?
312
- return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648
313
- return v, w
314
- },
315
- "http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format|
316
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
317
- return v, :invalid_short unless w.nil?
318
- return nil, :invalid_short unless v <= 32767 && v >= -32768
319
- return v, w
320
- },
321
- "http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format|
322
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
323
- return v, :invalid_byte unless w.nil?
324
- return nil, :invalid_byte unless v <= 127 && v >= -128
325
- return v, w
326
- },
327
- "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format|
328
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
329
- return v, :invalid_nonNegativeInteger unless w.nil?
330
- return nil, :invalid_nonNegativeInteger unless v >= 0
331
- return v, w
332
- },
333
- "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format|
334
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
335
- return v, :invalid_positiveInteger unless w.nil?
336
- return nil, :invalid_positiveInteger unless v > 0
337
- return v, w
338
- },
339
- "http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format|
340
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
341
- return v, :invalid_unsignedLong unless w.nil?
342
- return nil, :invalid_unsignedLong unless v <= 18446744073709551615
343
- return v, w
344
- },
345
- "http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format|
346
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
347
- return v, :invalid_unsignedInt unless w.nil?
348
- return nil, :invalid_unsignedInt unless v <= 4294967295
349
- return v, w
350
- },
351
- "http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format|
352
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
353
- return v, :invalid_unsignedShort unless w.nil?
354
- return nil, :invalid_unsignedShort unless v <= 65535
355
- return v, w
356
- },
357
- "http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format|
358
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
359
- return v, :invalid_unsignedByte unless w.nil?
360
- return nil, :invalid_unsignedByte unless v <= 255
361
- return v, w
362
- },
363
- "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format|
364
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
365
- return v, :invalid_nonPositiveInteger unless w.nil?
366
- return nil, :invalid_nonPositiveInteger unless v <= 0
367
- return v, w
368
- },
369
- "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format|
370
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
371
- return v, :invalid_negativeInteger unless w.nil?
372
- return nil, :invalid_negativeInteger unless v < 0
373
- return v, w
374
- },
375
- "http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER,
376
- # regular expressions here taken from XML Schema datatypes spec
377
- "http://www.w3.org/2001/XMLSchema#duration" =>
378
- create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration),
379
- "http://www.w3.org/2001/XMLSchema#dayTimeDuration" =>
380
- create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration),
381
- "http://www.w3.org/2001/XMLSchema#yearMonthDuration" =>
382
- create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration),
383
- "http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER,
384
- "http://www.w3.org/2001/XMLSchema#gDay" =>
385
- create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay),
386
- "http://www.w3.org/2001/XMLSchema#gMonth" =>
387
- create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth),
388
- "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
389
- create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay),
390
- "http://www.w3.org/2001/XMLSchema#gYear" =>
391
- create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear),
392
- "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
393
- create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth),
394
- "http://www.w3.org/2001/XMLSchema#hexBinary" => TRIM_VALUE,
395
- "http://www.w3.org/2001/XMLSchema#QName" => TRIM_VALUE,
396
- "http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID,
397
- "http://www.w3.org/2001/XMLSchema#normalizedString" => TRIM_VALUE,
398
- "http://www.w3.org/2001/XMLSchema#token" => TRIM_VALUE,
399
- "http://www.w3.org/2001/XMLSchema#language" => TRIM_VALUE,
400
- "http://www.w3.org/2001/XMLSchema#Name" => TRIM_VALUE,
401
- "http://www.w3.org/2001/XMLSchema#NMTOKEN" => TRIM_VALUE,
402
- "http://www.w3.org/2001/XMLSchema#time" =>
403
- create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time)
404
- }
269
+ DATATYPE_PARSER = {
270
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => TRIM_VALUE,
271
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => TRIM_VALUE,
272
+ "http://www.w3.org/ns/csvw#JSON" => TRIM_VALUE,
273
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID,
274
+ "http://www.w3.org/2001/XMLSchema#anyURI" => TRIM_VALUE,
275
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => TRIM_VALUE,
276
+ "http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format|
277
+ if format.nil?
278
+ return true, nil if ["true", "1"].include? value
279
+ return false, nil if ["false", "0"].include? value
280
+ else
281
+ return true, nil if value == format[0]
282
+ return false, nil if value == format[1]
283
+ end
284
+ return value, :invalid_boolean
285
+ },
286
+ "http://www.w3.org/2001/XMLSchema#date" =>
287
+ create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date),
288
+ "http://www.w3.org/2001/XMLSchema#dateTime" =>
289
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time),
290
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
291
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp),
292
+ "http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format|
293
+ return nil, :invalid_decimal if /(E|e|^(NaN|INF|-INF)$)/.match?(value)
294
+ return NUMERIC_PARSER.call(value, format)
295
+ },
296
+ "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format|
297
+ v, w = NUMERIC_PARSER.call(value, format, true)
298
+ return v, :invalid_integer unless w.nil?
299
+ return nil, :invalid_integer unless v.is_a? Integer
300
+ return v, w
301
+ },
302
+ "http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format|
303
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
304
+ return v, :invalid_long unless w.nil?
305
+ return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808
306
+ return v, w
307
+ },
308
+ "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format|
309
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
310
+ return v, :invalid_int unless w.nil?
311
+ return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648
312
+ return v, w
313
+ },
314
+ "http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format|
315
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
316
+ return v, :invalid_short unless w.nil?
317
+ return nil, :invalid_short unless v <= 32767 && v >= -32768
318
+ return v, w
319
+ },
320
+ "http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format|
321
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
322
+ return v, :invalid_byte unless w.nil?
323
+ return nil, :invalid_byte unless v <= 127 && v >= -128
324
+ return v, w
325
+ },
326
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format|
327
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
328
+ return v, :invalid_nonNegativeInteger unless w.nil?
329
+ return nil, :invalid_nonNegativeInteger unless v >= 0
330
+ return v, w
331
+ },
332
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format|
333
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
334
+ return v, :invalid_positiveInteger unless w.nil?
335
+ return nil, :invalid_positiveInteger unless v > 0
336
+ return v, w
337
+ },
338
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format|
339
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
340
+ return v, :invalid_unsignedLong unless w.nil?
341
+ return nil, :invalid_unsignedLong unless v <= 18446744073709551615
342
+ return v, w
343
+ },
344
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format|
345
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
346
+ return v, :invalid_unsignedInt unless w.nil?
347
+ return nil, :invalid_unsignedInt unless v <= 4294967295
348
+ return v, w
349
+ },
350
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format|
351
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
352
+ return v, :invalid_unsignedShort unless w.nil?
353
+ return nil, :invalid_unsignedShort unless v <= 65535
354
+ return v, w
355
+ },
356
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format|
357
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
358
+ return v, :invalid_unsignedByte unless w.nil?
359
+ return nil, :invalid_unsignedByte unless v <= 255
360
+ return v, w
361
+ },
362
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format|
363
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
364
+ return v, :invalid_nonPositiveInteger unless w.nil?
365
+ return nil, :invalid_nonPositiveInteger unless v <= 0
366
+ return v, w
367
+ },
368
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format|
369
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
370
+ return v, :invalid_negativeInteger unless w.nil?
371
+ return nil, :invalid_negativeInteger unless v < 0
372
+ return v, w
373
+ },
374
+ "http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER,
375
+ # regular expressions here taken from XML Schema datatypes spec
376
+ "http://www.w3.org/2001/XMLSchema#duration" =>
377
+ create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration),
378
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" =>
379
+ create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration),
380
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" =>
381
+ create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration),
382
+ "http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER,
383
+ "http://www.w3.org/2001/XMLSchema#gDay" =>
384
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay),
385
+ "http://www.w3.org/2001/XMLSchema#gMonth" =>
386
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth),
387
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
388
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay),
389
+ "http://www.w3.org/2001/XMLSchema#gYear" =>
390
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear),
391
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
392
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth),
393
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => TRIM_VALUE,
394
+ "http://www.w3.org/2001/XMLSchema#QName" => TRIM_VALUE,
395
+ "http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID,
396
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => TRIM_VALUE,
397
+ "http://www.w3.org/2001/XMLSchema#token" => TRIM_VALUE,
398
+ "http://www.w3.org/2001/XMLSchema#language" => TRIM_VALUE,
399
+ "http://www.w3.org/2001/XMLSchema#Name" => TRIM_VALUE,
400
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => TRIM_VALUE,
401
+ "http://www.w3.org/2001/XMLSchema#time" =>
402
+ create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time)
403
+ }
405
404
  end
406
405
  end
407
406
  end