csvlint 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +4 -0
  3. data/.github/workflows/push.yml +14 -2
  4. data/.ruby-version +1 -1
  5. data/.standard_todo.yml +43 -0
  6. data/Dockerfile +16 -0
  7. data/Gemfile +2 -2
  8. data/README.md +9 -9
  9. data/Rakefile +7 -7
  10. data/csvlint.gemspec +14 -16
  11. data/docker_notes_for_windows.txt +20 -0
  12. data/features/step_definitions/cli_steps.rb +11 -11
  13. data/features/step_definitions/information_steps.rb +4 -4
  14. data/features/step_definitions/parse_csv_steps.rb +11 -11
  15. data/features/step_definitions/schema_validation_steps.rb +10 -10
  16. data/features/step_definitions/sources_steps.rb +1 -1
  17. data/features/step_definitions/validation_errors_steps.rb +19 -19
  18. data/features/step_definitions/validation_info_steps.rb +9 -9
  19. data/features/step_definitions/validation_warnings_steps.rb +11 -11
  20. data/features/support/aruba.rb +6 -6
  21. data/features/support/earl_formatter.rb +39 -39
  22. data/features/support/env.rb +10 -11
  23. data/features/support/load_tests.rb +107 -103
  24. data/features/support/webmock.rb +2 -2
  25. data/lib/csvlint/cli.rb +133 -130
  26. data/lib/csvlint/csvw/column.rb +279 -280
  27. data/lib/csvlint/csvw/date_format.rb +90 -92
  28. data/lib/csvlint/csvw/metadata_error.rb +1 -3
  29. data/lib/csvlint/csvw/number_format.rb +40 -32
  30. data/lib/csvlint/csvw/property_checker.rb +714 -717
  31. data/lib/csvlint/csvw/table.rb +49 -52
  32. data/lib/csvlint/csvw/table_group.rb +24 -23
  33. data/lib/csvlint/error_collector.rb +2 -0
  34. data/lib/csvlint/error_message.rb +0 -1
  35. data/lib/csvlint/field.rb +153 -141
  36. data/lib/csvlint/schema.rb +34 -42
  37. data/lib/csvlint/validate.rb +161 -143
  38. data/lib/csvlint/version.rb +1 -1
  39. data/lib/csvlint.rb +22 -23
  40. data/spec/csvw/column_spec.rb +15 -16
  41. data/spec/csvw/date_format_spec.rb +5 -7
  42. data/spec/csvw/number_format_spec.rb +2 -4
  43. data/spec/csvw/table_group_spec.rb +103 -105
  44. data/spec/csvw/table_spec.rb +71 -73
  45. data/spec/field_spec.rb +116 -121
  46. data/spec/schema_spec.rb +129 -139
  47. data/spec/spec_helper.rb +6 -6
  48. data/spec/validator_spec.rb +167 -190
  49. metadata +22 -55
@@ -5,7 +5,7 @@ module Csvlint
5
5
 
6
6
  attr_reader :id, :about_url, :datatype, :default, :lang, :name, :null, :number, :ordered, :property_url, :required, :separator, :source_number, :suppress_output, :text_direction, :default_name, :titles, :value_url, :virtual, :annotations
7
7
 
8
- def initialize(number, name, id: nil, about_url: nil, datatype: { "@id" => "http://www.w3.org/2001/XMLSchema#string" }, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, default_name: nil, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: [])
8
+ def initialize(number, name, id: nil, about_url: nil, datatype: {"@id" => "http://www.w3.org/2001/XMLSchema#string"}, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, default_name: nil, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: [])
9
9
  @number = number
10
10
  @name = name
11
11
  @id = id
@@ -30,18 +30,18 @@ module Csvlint
30
30
  @warnings += warnings
31
31
  end
32
32
 
33
- def self.from_json(number, column_desc, base_url=nil, lang="und", inherited_properties={})
33
+ def self.from_json(number, column_desc, base_url = nil, lang = "und", inherited_properties = {})
34
34
  annotations = {}
35
35
  warnings = []
36
36
  column_properties = {}
37
37
  inherited_properties = inherited_properties.clone
38
38
 
39
- column_desc.each do |property,value|
39
+ column_desc.each do |property, value|
40
40
  if property == "@type"
41
- raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != 'Column'
41
+ raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != "Column"
42
42
  else
43
43
  v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
44
- warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
44
+ warnings += Array(warning).map { |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
45
45
  if type == :annotation
46
46
  annotations[property] = v
47
47
  elsif type == :common || type == :column
@@ -54,9 +54,9 @@ module Csvlint
54
54
  end
55
55
  end
56
56
 
57
- return self.new(number, column_properties["name"],
57
+ new(number, column_properties["name"],
58
58
  id: column_properties["@id"],
59
- datatype: inherited_properties["datatype"] || { "@id" => "http://www.w3.org/2001/XMLSchema#string" },
59
+ datatype: inherited_properties["datatype"] || {"@id" => "http://www.w3.org/2001/XMLSchema#string"},
60
60
  lang: inherited_properties["lang"] || "und",
61
61
  null: inherited_properties["null"] || [""],
62
62
  default: inherited_properties["default"] || "",
@@ -68,35 +68,34 @@ module Csvlint
68
68
  ordered: inherited_properties["ordered"] || false,
69
69
  default_name: column_properties["titles"] && column_properties["titles"][lang] ? column_properties["titles"][lang][0] : nil,
70
70
  titles: column_properties["titles"],
71
- suppress_output: column_properties["suppressOutput"] ? column_properties["suppressOutput"] : false,
71
+ suppress_output: column_properties["suppressOutput"] || false,
72
72
  virtual: column_properties["virtual"] || false,
73
73
  annotations: annotations,
74
- warnings: warnings
75
- )
74
+ warnings: warnings)
76
75
  end
77
76
 
78
77
  def validate_header(header, strict)
79
78
  reset
80
79
  if strict || @titles
81
- valid_headers = @titles ? @titles.map{ |l,v| v if Column.languages_match(l, lang) }.flatten : []
80
+ valid_headers = @titles ? @titles.map { |l, v| v if Column.languages_match(l, lang) }.flatten : []
82
81
  unless valid_headers.include? header
83
82
  if strict
84
- build_errors(:invalid_header, :schema, 1, @number, header, @titles)
83
+ build_errors(:invalid_header, :schema, 1, @number, header, @titles)
85
84
  else
86
85
  build_warnings(:invalid_header, :schema, 1, @number, header, @titles)
87
86
  end
88
87
  end
89
88
  end
90
- return valid?
89
+ valid?
91
90
  end
92
91
 
93
- def validate(string_value, row=nil)
92
+ def validate(string_value, row = nil)
94
93
  reset
95
- string_value = string_value || @default
94
+ string_value ||= @default
96
95
  if null.include? string_value
97
96
  validate_required(nil, row)
98
- values = nil
99
- return values
97
+ nil
98
+
100
99
  else
101
100
  string_values = @separator.nil? ? [string_value] : string_value.split(@separator)
102
101
  values = []
@@ -108,300 +107,300 @@ module Csvlint
108
107
  invalid = !validate_format(value, row) || invalid
109
108
  invalid = !validate_length(value, row) || invalid
110
109
  invalid = !validate_value(value, row) || invalid
111
- values << (invalid ? { :invalid => s } : value)
110
+ values << (invalid ? {invalid: s} : value)
112
111
  else
113
112
  build_errors(warning, :schema, row, @number, s, @datatype)
114
- values << { :invalid => s }
113
+ values << {invalid: s}
115
114
  end
116
115
  end
117
- values = (values && @separator.nil?) ? values[0] : values
118
- return values
116
+ values && @separator.nil? ? values[0] : values
117
+
119
118
  end
120
119
  end
121
120
 
122
121
  private
123
- class << self
124
122
 
125
- def create_date_parser(type, warning)
126
- return lambda { |value, format|
127
- format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil?
128
- v = format.parse(value)
129
- return nil, warning if v.nil?
130
- return v, nil
131
- }
132
- end
133
-
134
- def create_regexp_based_parser(regexp, warning)
135
- return lambda { |value, format|
136
- return nil, warning unless value =~ regexp
137
- return value, nil
138
- }
139
- end
140
-
141
- def languages_match(l1, l2)
142
- return true if l1 == l2 || l1 == "und" || l2 == "und"
143
- return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-")
144
- return false
145
- end
123
+ class << self
124
+ def create_date_parser(type, warning)
125
+ lambda { |value, format|
126
+ format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil?
127
+ v = format.parse(value)
128
+ return nil, warning if v.nil?
129
+ return v, nil
130
+ }
146
131
  end
147
132
 
148
- def validate_required(value, row)
149
- if @required && value.nil?
150
- build_errors(:required, :schema, row, number, value, { "required" => @required })
151
- return false
152
- end
153
- return true
133
+ def create_regexp_based_parser(regexp, warning)
134
+ lambda { |value, format|
135
+ return nil, warning unless value&.match?(regexp)
136
+ return value, nil
137
+ }
154
138
  end
155
139
 
156
- def validate_length(value, row)
157
- valid = true
158
- if datatype["length"] || datatype["minLength"] || datatype["maxLength"]
159
- length = value.length
160
- length = value.gsub(/==?$/,"").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary"
161
- length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary"
162
-
163
- if datatype["minLength"] && length < datatype["minLength"]
164
- build_errors(:min_length, :schema, row, number, value, { "minLength" => datatype["minLength"] })
165
- valid = false
166
- end
167
- if datatype["maxLength"] && length > datatype["maxLength"]
168
- build_errors(:max_length, :schema, row, number, value, { "maxLength" => datatype["maxLength"] })
169
- valid = false
170
- end
171
- if datatype["length"] && length != datatype["length"]
172
- build_errors(:length, :schema, row, number, value, { "length" => datatype["length"] })
173
- valid = false
174
- end
175
- end
176
- return valid
140
+ def languages_match(l1, l2)
141
+ return true if l1 == l2 || l1 == "und" || l2 == "und"
142
+ return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-")
143
+ false
177
144
  end
145
+ end
178
146
 
179
- def validate_format(value, row)
180
- if datatype["format"]
181
- unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"])
182
- build_errors(:format, :schema, row, number, value, { "format" => datatype["format"] })
183
- return false
184
- end
185
- end
186
- return true
147
+ def validate_required(value, row)
148
+ if @required && value.nil?
149
+ build_errors(:required, :schema, row, number, value, {"required" => @required})
150
+ return false
187
151
  end
152
+ true
153
+ end
154
+
155
+ def validate_length(value, row)
156
+ valid = true
157
+ if datatype["length"] || datatype["minLength"] || datatype["maxLength"]
158
+ length = value.length
159
+ length = value.gsub(/==?$/, "").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary"
160
+ length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary"
188
161
 
189
- def validate_value(value, row)
190
- valid = true
191
- if datatype["minInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] < datatype["minInclusive"][:dateTime]) : (value < datatype["minInclusive"]))
192
- build_errors(:min_inclusive, :schema, row, number, value, { "minInclusive" => datatype["minInclusive"] })
162
+ if datatype["minLength"] && length < datatype["minLength"]
163
+ build_errors(:min_length, :schema, row, number, value, {"minLength" => datatype["minLength"]})
193
164
  valid = false
194
165
  end
195
- if datatype["maxInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] > datatype["maxInclusive"][:dateTime]) : (value > datatype["maxInclusive"]))
196
- build_errors(:max_inclusive, :schema, row, number, value, { "maxInclusive" => datatype["maxInclusive"] })
166
+ if datatype["maxLength"] && length > datatype["maxLength"]
167
+ build_errors(:max_length, :schema, row, number, value, {"maxLength" => datatype["maxLength"]})
197
168
  valid = false
198
169
  end
199
- if datatype["minExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] <= datatype["minExclusive"][:dateTime]) : (value <= datatype["minExclusive"]))
200
- build_errors(:min_exclusive, :schema, row, number, value, { "minExclusive" => datatype["minExclusive"] })
170
+ if datatype["length"] && length != datatype["length"]
171
+ build_errors(:length, :schema, row, number, value, {"length" => datatype["length"]})
201
172
  valid = false
202
173
  end
203
- if datatype["maxExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] >= datatype["maxExclusive"][:dateTime]) : (value >= datatype["maxExclusive"]))
204
- build_errors(:max_exclusive, :schema, row, number, value, { "maxExclusive" => datatype["maxExclusive"] })
205
- valid = false
174
+ end
175
+ valid
176
+ end
177
+
178
+ def validate_format(value, row)
179
+ if datatype["format"]
180
+ unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"])
181
+ build_errors(:format, :schema, row, number, value, {"format" => datatype["format"]})
182
+ return false
206
183
  end
207
- return valid
208
184
  end
185
+ true
186
+ end
187
+
188
+ def validate_value(value, row)
189
+ valid = true
190
+ if datatype["minInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] < datatype["minInclusive"][:dateTime]) : (value < datatype["minInclusive"]))
191
+ build_errors(:min_inclusive, :schema, row, number, value, {"minInclusive" => datatype["minInclusive"]})
192
+ valid = false
193
+ end
194
+ if datatype["maxInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] > datatype["maxInclusive"][:dateTime]) : (value > datatype["maxInclusive"]))
195
+ build_errors(:max_inclusive, :schema, row, number, value, {"maxInclusive" => datatype["maxInclusive"]})
196
+ valid = false
197
+ end
198
+ if datatype["minExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] <= datatype["minExclusive"][:dateTime]) : (value <= datatype["minExclusive"]))
199
+ build_errors(:min_exclusive, :schema, row, number, value, {"minExclusive" => datatype["minExclusive"]})
200
+ valid = false
201
+ end
202
+ if datatype["maxExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] >= datatype["maxExclusive"][:dateTime]) : (value >= datatype["maxExclusive"]))
203
+ build_errors(:max_exclusive, :schema, row, number, value, {"maxExclusive" => datatype["maxExclusive"]})
204
+ valid = false
205
+ end
206
+ valid
207
+ end
209
208
 
210
- REGEXP_VALIDATION = lambda { |value, format| value =~ format }
209
+ REGEXP_VALIDATION = lambda { |value, format| value =~ format }
211
210
 
212
- NO_ADDITIONAL_VALIDATION = lambda { |value, format| true }
211
+ NO_ADDITIONAL_VALIDATION = lambda { |value, format| true }
213
212
 
214
- DATATYPE_FORMAT_VALIDATION = {
215
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION,
216
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION,
217
- "http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION,
218
- "http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION,
219
- "http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION,
220
- "http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION,
221
- "http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION,
222
- "http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION,
223
- "http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION,
224
- "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION,
225
- "http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION,
226
- "http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION,
227
- "http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION,
228
- "http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION,
229
- "http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION,
230
- "http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION,
231
- "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION,
232
- "http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION,
233
- "http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION,
234
- "http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION,
235
- "http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION,
236
- "http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION,
237
- "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION,
238
- "http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION,
239
- "http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION,
240
- "http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION,
241
- "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION,
242
- "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION,
243
- "http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION,
244
- "http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION,
245
- "http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION,
246
- "http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION,
247
- "http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION,
248
- "http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION,
249
- "http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION,
250
- "http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION,
251
- "http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION,
252
- "http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION,
253
- "http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION,
254
- "http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION,
255
- "http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION,
256
- "http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION,
257
- "http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION
258
- }
213
+ DATATYPE_FORMAT_VALIDATION = {
214
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION,
215
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION,
216
+ "http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION,
217
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION,
218
+ "http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION,
219
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION,
220
+ "http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION,
221
+ "http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION,
222
+ "http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION,
223
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION,
224
+ "http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION,
225
+ "http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION,
226
+ "http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION,
227
+ "http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION,
228
+ "http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION,
229
+ "http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION,
230
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION,
231
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION,
232
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION,
233
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION,
234
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION,
235
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION,
236
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION,
237
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION,
238
+ "http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION,
239
+ "http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION,
240
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION,
241
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION,
242
+ "http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION,
243
+ "http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION,
244
+ "http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION,
245
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION,
246
+ "http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION,
247
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION,
248
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION,
249
+ "http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION,
250
+ "http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION,
251
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION,
252
+ "http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION,
253
+ "http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION,
254
+ "http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION,
255
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION,
256
+ "http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION
257
+ }
259
258
 
260
- TRIM_VALUE = lambda { |value, format| return value.strip, nil }
261
- ALL_VALUES_VALID = lambda { |value, format| return value, nil }
259
+ TRIM_VALUE = lambda { |value, format| return value.strip, nil }
260
+ ALL_VALUES_VALID = lambda { |value, format| return value, nil }
262
261
 
263
- NUMERIC_PARSER = lambda { |value, format, integer=false|
264
- format = Csvlint::Csvw::NumberFormat.new(nil, nil, ".", integer) if format.nil?
265
- v = format.parse(value)
266
- return nil, :invalid_number if v.nil?
267
- return v, nil
268
- }
262
+ NUMERIC_PARSER = lambda { |value, format, integer = false|
263
+ format = Csvlint::Csvw::NumberFormat.new(nil, nil, ".", integer) if format.nil?
264
+ v = format.parse(value)
265
+ return nil, :invalid_number if v.nil?
266
+ return v, nil
267
+ }
269
268
 
270
- DATATYPE_PARSER = {
271
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => TRIM_VALUE,
272
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => TRIM_VALUE,
273
- "http://www.w3.org/ns/csvw#JSON" => TRIM_VALUE,
274
- "http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID,
275
- "http://www.w3.org/2001/XMLSchema#anyURI" => TRIM_VALUE,
276
- "http://www.w3.org/2001/XMLSchema#base64Binary" => TRIM_VALUE,
277
- "http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format|
278
- if format.nil?
279
- return true, nil if ["true", "1"].include? value
280
- return false, nil if ["false", "0"].include? value
281
- else
282
- return true, nil if value == format[0]
283
- return false, nil if value == format[1]
284
- end
285
- return value, :invalid_boolean
286
- },
287
- "http://www.w3.org/2001/XMLSchema#date" =>
288
- create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date),
289
- "http://www.w3.org/2001/XMLSchema#dateTime" =>
290
- create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time),
291
- "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
292
- create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp),
293
- "http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format|
294
- return nil, :invalid_decimal if value =~ /(E|e|^(NaN|INF|-INF)$)/
295
- return NUMERIC_PARSER.call(value, format)
296
- },
297
- "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format|
298
- v, w = NUMERIC_PARSER.call(value, format, true)
299
- return v, :invalid_integer unless w.nil?
300
- return nil, :invalid_integer unless v.kind_of? Integer
301
- return v, w
302
- },
303
- "http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format|
304
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
305
- return v, :invalid_long unless w.nil?
306
- return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808
307
- return v, w
308
- },
309
- "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format|
310
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
311
- return v, :invalid_int unless w.nil?
312
- return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648
313
- return v, w
314
- },
315
- "http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format|
316
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
317
- return v, :invalid_short unless w.nil?
318
- return nil, :invalid_short unless v <= 32767 && v >= -32768
319
- return v, w
320
- },
321
- "http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format|
322
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
323
- return v, :invalid_byte unless w.nil?
324
- return nil, :invalid_byte unless v <= 127 && v >= -128
325
- return v, w
326
- },
327
- "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format|
328
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
329
- return v, :invalid_nonNegativeInteger unless w.nil?
330
- return nil, :invalid_nonNegativeInteger unless v >= 0
331
- return v, w
332
- },
333
- "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format|
334
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
335
- return v, :invalid_positiveInteger unless w.nil?
336
- return nil, :invalid_positiveInteger unless v > 0
337
- return v, w
338
- },
339
- "http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format|
340
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
341
- return v, :invalid_unsignedLong unless w.nil?
342
- return nil, :invalid_unsignedLong unless v <= 18446744073709551615
343
- return v, w
344
- },
345
- "http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format|
346
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
347
- return v, :invalid_unsignedInt unless w.nil?
348
- return nil, :invalid_unsignedInt unless v <= 4294967295
349
- return v, w
350
- },
351
- "http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format|
352
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
353
- return v, :invalid_unsignedShort unless w.nil?
354
- return nil, :invalid_unsignedShort unless v <= 65535
355
- return v, w
356
- },
357
- "http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format|
358
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
359
- return v, :invalid_unsignedByte unless w.nil?
360
- return nil, :invalid_unsignedByte unless v <= 255
361
- return v, w
362
- },
363
- "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format|
364
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
365
- return v, :invalid_nonPositiveInteger unless w.nil?
366
- return nil, :invalid_nonPositiveInteger unless v <= 0
367
- return v, w
368
- },
369
- "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format|
370
- v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
371
- return v, :invalid_negativeInteger unless w.nil?
372
- return nil, :invalid_negativeInteger unless v < 0
373
- return v, w
374
- },
375
- "http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER,
376
- # regular expressions here taken from XML Schema datatypes spec
377
- "http://www.w3.org/2001/XMLSchema#duration" =>
378
- create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration),
379
- "http://www.w3.org/2001/XMLSchema#dayTimeDuration" =>
380
- create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration),
381
- "http://www.w3.org/2001/XMLSchema#yearMonthDuration" =>
382
- create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration),
383
- "http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER,
384
- "http://www.w3.org/2001/XMLSchema#gDay" =>
385
- create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay),
386
- "http://www.w3.org/2001/XMLSchema#gMonth" =>
387
- create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth),
388
- "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
389
- create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay),
390
- "http://www.w3.org/2001/XMLSchema#gYear" =>
391
- create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear),
392
- "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
393
- create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth),
394
- "http://www.w3.org/2001/XMLSchema#hexBinary" => TRIM_VALUE,
395
- "http://www.w3.org/2001/XMLSchema#QName" => TRIM_VALUE,
396
- "http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID,
397
- "http://www.w3.org/2001/XMLSchema#normalizedString" => TRIM_VALUE,
398
- "http://www.w3.org/2001/XMLSchema#token" => TRIM_VALUE,
399
- "http://www.w3.org/2001/XMLSchema#language" => TRIM_VALUE,
400
- "http://www.w3.org/2001/XMLSchema#Name" => TRIM_VALUE,
401
- "http://www.w3.org/2001/XMLSchema#NMTOKEN" => TRIM_VALUE,
402
- "http://www.w3.org/2001/XMLSchema#time" =>
403
- create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time)
404
- }
269
+ DATATYPE_PARSER = {
270
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => TRIM_VALUE,
271
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => TRIM_VALUE,
272
+ "http://www.w3.org/ns/csvw#JSON" => TRIM_VALUE,
273
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID,
274
+ "http://www.w3.org/2001/XMLSchema#anyURI" => TRIM_VALUE,
275
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => TRIM_VALUE,
276
+ "http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format|
277
+ if format.nil?
278
+ return true, nil if ["true", "1"].include? value
279
+ return false, nil if ["false", "0"].include? value
280
+ else
281
+ return true, nil if value == format[0]
282
+ return false, nil if value == format[1]
283
+ end
284
+ return value, :invalid_boolean
285
+ },
286
+ "http://www.w3.org/2001/XMLSchema#date" =>
287
+ create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date),
288
+ "http://www.w3.org/2001/XMLSchema#dateTime" =>
289
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time),
290
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
291
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp),
292
+ "http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format|
293
+ return nil, :invalid_decimal if /(E|e|^(NaN|INF|-INF)$)/.match?(value)
294
+ return NUMERIC_PARSER.call(value, format)
295
+ },
296
+ "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format|
297
+ v, w = NUMERIC_PARSER.call(value, format, true)
298
+ return v, :invalid_integer unless w.nil?
299
+ return nil, :invalid_integer unless v.is_a? Integer
300
+ return v, w
301
+ },
302
+ "http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format|
303
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
304
+ return v, :invalid_long unless w.nil?
305
+ return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808
306
+ return v, w
307
+ },
308
+ "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format|
309
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
310
+ return v, :invalid_int unless w.nil?
311
+ return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648
312
+ return v, w
313
+ },
314
+ "http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format|
315
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
316
+ return v, :invalid_short unless w.nil?
317
+ return nil, :invalid_short unless v <= 32767 && v >= -32768
318
+ return v, w
319
+ },
320
+ "http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format|
321
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
322
+ return v, :invalid_byte unless w.nil?
323
+ return nil, :invalid_byte unless v <= 127 && v >= -128
324
+ return v, w
325
+ },
326
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format|
327
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
328
+ return v, :invalid_nonNegativeInteger unless w.nil?
329
+ return nil, :invalid_nonNegativeInteger unless v >= 0
330
+ return v, w
331
+ },
332
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format|
333
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
334
+ return v, :invalid_positiveInteger unless w.nil?
335
+ return nil, :invalid_positiveInteger unless v > 0
336
+ return v, w
337
+ },
338
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format|
339
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
340
+ return v, :invalid_unsignedLong unless w.nil?
341
+ return nil, :invalid_unsignedLong unless v <= 18446744073709551615
342
+ return v, w
343
+ },
344
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format|
345
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
346
+ return v, :invalid_unsignedInt unless w.nil?
347
+ return nil, :invalid_unsignedInt unless v <= 4294967295
348
+ return v, w
349
+ },
350
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format|
351
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
352
+ return v, :invalid_unsignedShort unless w.nil?
353
+ return nil, :invalid_unsignedShort unless v <= 65535
354
+ return v, w
355
+ },
356
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format|
357
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
358
+ return v, :invalid_unsignedByte unless w.nil?
359
+ return nil, :invalid_unsignedByte unless v <= 255
360
+ return v, w
361
+ },
362
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format|
363
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
364
+ return v, :invalid_nonPositiveInteger unless w.nil?
365
+ return nil, :invalid_nonPositiveInteger unless v <= 0
366
+ return v, w
367
+ },
368
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format|
369
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
370
+ return v, :invalid_negativeInteger unless w.nil?
371
+ return nil, :invalid_negativeInteger unless v < 0
372
+ return v, w
373
+ },
374
+ "http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER,
375
+ # regular expressions here taken from XML Schema datatypes spec
376
+ "http://www.w3.org/2001/XMLSchema#duration" =>
377
+ create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration),
378
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" =>
379
+ create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration),
380
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" =>
381
+ create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration),
382
+ "http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER,
383
+ "http://www.w3.org/2001/XMLSchema#gDay" =>
384
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay),
385
+ "http://www.w3.org/2001/XMLSchema#gMonth" =>
386
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth),
387
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
388
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay),
389
+ "http://www.w3.org/2001/XMLSchema#gYear" =>
390
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear),
391
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
392
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth),
393
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => TRIM_VALUE,
394
+ "http://www.w3.org/2001/XMLSchema#QName" => TRIM_VALUE,
395
+ "http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID,
396
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => TRIM_VALUE,
397
+ "http://www.w3.org/2001/XMLSchema#token" => TRIM_VALUE,
398
+ "http://www.w3.org/2001/XMLSchema#language" => TRIM_VALUE,
399
+ "http://www.w3.org/2001/XMLSchema#Name" => TRIM_VALUE,
400
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => TRIM_VALUE,
401
+ "http://www.w3.org/2001/XMLSchema#time" =>
402
+ create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time)
403
+ }
405
404
  end
406
405
  end
407
406
  end