csvlint 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +7 -1
  3. data/CHANGELOG.md +19 -1
  4. data/README.md +93 -36
  5. data/bin/csvlint +68 -27
  6. data/csvlint.gemspec +2 -0
  7. data/features/csvw_schema_validation.feature +127 -0
  8. data/features/fixtures/spreadsheet.xlsx +0 -0
  9. data/features/sources.feature +3 -4
  10. data/features/step_definitions/parse_csv_steps.rb +13 -1
  11. data/features/step_definitions/schema_validation_steps.rb +27 -1
  12. data/features/step_definitions/sources_steps.rb +1 -1
  13. data/features/step_definitions/validation_errors_steps.rb +48 -1
  14. data/features/step_definitions/validation_info_steps.rb +5 -1
  15. data/features/step_definitions/validation_warnings_steps.rb +15 -1
  16. data/features/support/load_tests.rb +114 -0
  17. data/features/validation_errors.feature +12 -24
  18. data/features/validation_warnings.feature +18 -6
  19. data/lib/csvlint.rb +10 -0
  20. data/lib/csvlint/csvw/column.rb +359 -0
  21. data/lib/csvlint/csvw/date_format.rb +182 -0
  22. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  23. data/lib/csvlint/csvw/number_format.rb +211 -0
  24. data/lib/csvlint/csvw/property_checker.rb +761 -0
  25. data/lib/csvlint/csvw/table.rb +204 -0
  26. data/lib/csvlint/csvw/table_group.rb +165 -0
  27. data/lib/csvlint/schema.rb +40 -23
  28. data/lib/csvlint/validate.rb +142 -19
  29. data/lib/csvlint/version.rb +1 -1
  30. data/spec/csvw/column_spec.rb +112 -0
  31. data/spec/csvw/date_format_spec.rb +49 -0
  32. data/spec/csvw/number_format_spec.rb +403 -0
  33. data/spec/csvw/table_group_spec.rb +143 -0
  34. data/spec/csvw/table_spec.rb +90 -0
  35. data/spec/schema_spec.rb +27 -1
  36. data/spec/spec_helper.rb +0 -1
  37. data/spec/validator_spec.rb +16 -10
  38. metadata +53 -2
@@ -8,10 +8,20 @@ require 'active_support/core_ext/date/conversions'
8
8
  require 'active_support/core_ext/time/conversions'
9
9
  require 'mime/types'
10
10
  require 'open_uri_redirections'
11
+ require 'uri_template'
11
12
 
12
13
  require 'csvlint/error_message'
13
14
  require 'csvlint/error_collector'
14
15
  require 'csvlint/validate'
15
16
  require 'csvlint/wrapped_io'
16
17
  require 'csvlint/field'
18
+
19
+ require 'csvlint/csvw/metadata_error'
20
+ require 'csvlint/csvw/number_format'
21
+ require 'csvlint/csvw/date_format'
22
+ require 'csvlint/csvw/property_checker'
23
+ require 'csvlint/csvw/column'
24
+ require 'csvlint/csvw/table'
25
+ require 'csvlint/csvw/table_group'
26
+
17
27
  require 'csvlint/schema'
@@ -0,0 +1,359 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class Column
4
+ include Csvlint::ErrorCollector
5
+
6
+ attr_reader :id, :about_url, :datatype, :default, :lang, :name, :null, :number, :ordered, :property_url, :required, :separator, :source_number, :suppress_output, :text_direction, :titles, :value_url, :virtual, :annotations
7
+
8
+ def initialize(number, name, id: nil, about_url: nil, datatype: { "@id" => "http://www.w3.org/2001/XMLSchema#string" }, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: [])
9
+ @number = number
10
+ @name = name
11
+ @id = id
12
+ @about_url = about_url
13
+ @datatype = datatype
14
+ @default = default
15
+ @lang = lang
16
+ @null = null
17
+ @ordered = ordered
18
+ @property_url = property_url
19
+ @required = required
20
+ @separator = separator
21
+ @source_number = source_number || number
22
+ @suppress_output = suppress_output
23
+ @text_direction = text_direction
24
+ @titles = titles
25
+ @value_url = value_url
26
+ @virtual = virtual
27
+ @annotations = annotations
28
+ reset
29
+ @warnings += warnings
30
+ end
31
+
32
+ def self.from_json(number, column_desc, base_url=nil, lang="und", inherited_properties={})
33
+ annotations = {}
34
+ warnings = []
35
+ column_properties = {}
36
+ inherited_properties = inherited_properties.clone
37
+
38
+ column_desc.each do |property,value|
39
+ if property == "@type"
40
+ raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != 'Column'
41
+ else
42
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
43
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
44
+ if type == :annotation
45
+ annotations[property] = v
46
+ elsif type == :common || type == :column
47
+ column_properties[property] = v
48
+ elsif type == :inherited
49
+ inherited_properties[property] = v
50
+ else
51
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "column: #{property}", nil)
52
+ end
53
+ end
54
+ end
55
+
56
+ return self.new(number, column_properties["name"],
57
+ id: column_properties["@id"],
58
+ datatype: inherited_properties["datatype"] || { "@id" => "http://www.w3.org/2001/XMLSchema#string" },
59
+ lang: inherited_properties["lang"] || "und",
60
+ null: inherited_properties["null"] || [""],
61
+ property_url: column_desc["propertyUrl"],
62
+ required: inherited_properties["required"] || false,
63
+ separator: inherited_properties["separator"],
64
+ titles: column_properties["titles"],
65
+ virtual: column_properties["virtual"] || false,
66
+ annotations: annotations,
67
+ warnings: warnings
68
+ )
69
+ end
70
+
71
+ def validate_header(header)
72
+ reset
73
+ valid_headers = @titles ? @titles.map{ |l,v| v if Column.languages_match(l, lang) }.flatten : []
74
+ build_errors(:invalid_header, :schema, 1, @number, header, @titles) unless valid_headers.include? header
75
+ return valid?
76
+ end
77
+
78
+ def validate(string_value, row=nil)
79
+ reset
80
+ values = parse(string_value || "", row)
81
+ # STDERR.puts "#{name} - #{string_value.inspect} - #{values.inspect}"
82
+ values.each do |value|
83
+ validate_required(value, row)
84
+ validate_format(value, row)
85
+ validate_length(value, row)
86
+ validate_value(value, row)
87
+ end unless values.nil?
88
+ validate_required(values, row) if values.nil?
89
+ return valid?
90
+ end
91
+
92
+ def parse(string_value, row=nil)
93
+ return nil if null.include? string_value
94
+ string_values = @separator.nil? ? [string_value] : string_value.split(@separator)
95
+ values = []
96
+ string_values.each do |s|
97
+ value, warning = DATATYPE_PARSER[@datatype["base"] || @datatype["@id"]].call(s, @datatype["format"])
98
+ if warning.nil?
99
+ values << value
100
+ else
101
+ build_errors(warning, :schema, row, @number, s, @datatype)
102
+ values << s
103
+ end
104
+ end
105
+ return values
106
+ end
107
+
108
+ private
109
+ class << self
110
+
111
+ def create_date_parser(type, warning)
112
+ return lambda { |value, format|
113
+ format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil?
114
+ v = format.parse(value)
115
+ return nil, warning if v.nil?
116
+ return v, nil
117
+ }
118
+ end
119
+
120
+ def create_regexp_based_parser(regexp, warning)
121
+ return lambda { |value, format|
122
+ return nil, warning unless value =~ regexp
123
+ return value, nil
124
+ }
125
+ end
126
+
127
+ def languages_match(l1, l2)
128
+ return true if l1 == l2 || l1 == "und" || l2 == "und"
129
+ return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-")
130
+ return false
131
+ end
132
+ end
133
+
134
+ def validate_required(value, row)
135
+ build_errors(:required, :schema, row, number, value, { "required" => @required }) if @required && value.nil?
136
+ end
137
+
138
+ def validate_length(value, row)
139
+ if datatype["length"] || datatype["minLength"] || datatype["maxLength"]
140
+ length = value.length
141
+ length = value.gsub(/==?$/,"").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary"
142
+ length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary"
143
+
144
+ build_errors(:min_length, :schema, row, number, value, { "minLength" => datatype["minLength"] }) if datatype["minLength"] && length < datatype["minLength"]
145
+ build_errors(:max_length, :schema, row, number, value, { "maxLength" => datatype["maxLength"] }) if datatype["maxLength"] && length > datatype["maxLength"]
146
+ build_errors(:length, :schema, row, number, value, { "length" => datatype["length"] }) if datatype["length"] && length != datatype["length"]
147
+ end
148
+ end
149
+
150
+ def validate_format(value, row)
151
+ if datatype["format"]
152
+ build_errors(:format, :schema, row, number, value, { "format" => datatype["format"] }) unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"])
153
+ end
154
+ end
155
+
156
+ def validate_value(value, row)
157
+ build_errors(:min_inclusive, :schema, row, number, value, { "minInclusive" => datatype["minInclusive"] }) if datatype["minInclusive"] && value < datatype["minInclusive"]
158
+ build_errors(:max_inclusive, :schema, row, number, value, { "maxInclusive" => datatype["maxInclusive"] }) if datatype["maxInclusive"] && value > datatype["maxInclusive"]
159
+ build_errors(:min_exclusive, :schema, row, number, value, { "minExclusive" => datatype["minExclusive"] }) if datatype["minExclusive"] && value <= datatype["minExclusive"]
160
+ build_errors(:max_exclusive, :schema, row, number, value, { "maxExclusive" => datatype["maxExclusive"] }) if datatype["maxExclusive"] && value >= datatype["maxExclusive"]
161
+ end
162
+
163
+ REGEXP_VALIDATION = lambda { |value, format| value =~ format }
164
+
165
+ NO_ADDITIONAL_VALIDATION = lambda { |value, format| true }
166
+
167
+ DATATYPE_FORMAT_VALIDATION = {
168
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION,
169
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION,
170
+ "http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION,
171
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION,
172
+ "http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION,
173
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION,
174
+ "http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION,
175
+ "http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION,
176
+ "http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION,
177
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION,
178
+ "http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION,
179
+ "http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION,
180
+ "http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION,
181
+ "http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION,
182
+ "http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION,
183
+ "http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION,
184
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION,
185
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION,
186
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION,
187
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION,
188
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION,
189
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION,
190
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION,
191
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION,
192
+ "http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION,
193
+ "http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION,
194
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION,
195
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION,
196
+ "http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION,
197
+ "http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION,
198
+ "http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION,
199
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION,
200
+ "http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION,
201
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION,
202
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION,
203
+ "http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION,
204
+ "http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION,
205
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION,
206
+ "http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION,
207
+ "http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION,
208
+ "http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION,
209
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION,
210
+ "http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION
211
+ }
212
+
213
+ ALL_VALUES_VALID = lambda { |value, format| return value, nil }
214
+
215
+ NUMERIC_PARSER = lambda { |value, format|
216
+ format = Csvlint::Csvw::NumberFormat.new() if format.nil?
217
+ v = format.parse(value)
218
+ return nil, :invalid_number if v.nil?
219
+ return v, nil
220
+ }
221
+
222
+ DATATYPE_PARSER = {
223
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => ALL_VALUES_VALID,
224
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => ALL_VALUES_VALID,
225
+ "http://www.w3.org/ns/csvw#JSON" => ALL_VALUES_VALID,
226
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID,
227
+ "http://www.w3.org/2001/XMLSchema#anyURI" => ALL_VALUES_VALID,
228
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => ALL_VALUES_VALID,
229
+ "http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format|
230
+ if format.nil?
231
+ return true, nil if ["true", "1"].include? value
232
+ return false, nil if ["false", "0"].include? value
233
+ else
234
+ return true, nil if value == format[0]
235
+ return false, nil if value == format[1]
236
+ end
237
+ return value, :invalid_boolean
238
+ },
239
+ "http://www.w3.org/2001/XMLSchema#date" =>
240
+ create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date),
241
+ "http://www.w3.org/2001/XMLSchema#dateTime" =>
242
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time),
243
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
244
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp),
245
+ "http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format|
246
+ return nil, :invalid_decimal if value =~ /(E|^(NaN|INF|-INF)$)/
247
+ return NUMERIC_PARSER.call(value, format)
248
+ },
249
+ "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format|
250
+ v, w = NUMERIC_PARSER.call(value, format)
251
+ return v, :invalid_integer unless w.nil?
252
+ return nil, :invalid_integer unless v.kind_of? Integer
253
+ return v, w
254
+ },
255
+ "http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format|
256
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
257
+ return v, :invalid_long unless w.nil?
258
+ return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808
259
+ return v, w
260
+ },
261
+ "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format|
262
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
263
+ return v, :invalid_int unless w.nil?
264
+ return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648
265
+ return v, w
266
+ },
267
+ "http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format|
268
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
269
+ return v, :invalid_short unless w.nil?
270
+ return nil, :invalid_short unless v <= 32767 && v >= -32768
271
+ return v, w
272
+ },
273
+ "http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format|
274
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
275
+ return v, :invalid_byte unless w.nil?
276
+ return nil, :invalid_byte unless v <= 127 && v >= -128
277
+ return v, w
278
+ },
279
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format|
280
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
281
+ return v, :invalid_nonNegativeInteger unless w.nil?
282
+ return nil, :invalid_nonNegativeInteger unless v >= 0
283
+ return v, w
284
+ },
285
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format|
286
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
287
+ return v, :invalid_positiveInteger unless w.nil?
288
+ return nil, :invalid_positiveInteger unless v > 0
289
+ return v, w
290
+ },
291
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format|
292
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
293
+ return v, :invalid_unsignedLong unless w.nil?
294
+ return nil, :invalid_unsignedLong unless v <= 18446744073709551615
295
+ return v, w
296
+ },
297
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format|
298
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
299
+ return v, :invalid_unsignedInt unless w.nil?
300
+ return nil, :invalid_unsignedInt unless v <= 4294967295
301
+ return v, w
302
+ },
303
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format|
304
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
305
+ return v, :invalid_unsignedShort unless w.nil?
306
+ return nil, :invalid_unsignedShort unless v <= 65535
307
+ return v, w
308
+ },
309
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format|
310
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
311
+ return v, :invalid_unsignedByte unless w.nil?
312
+ return nil, :invalid_unsignedByte unless v <= 255
313
+ return v, w
314
+ },
315
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format|
316
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
317
+ return v, :invalid_nonPositiveInteger unless w.nil?
318
+ return nil, :invalid_nonPositiveInteger unless v <= 0
319
+ return v, w
320
+ },
321
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format|
322
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
323
+ return v, :invalid_negativeInteger unless w.nil?
324
+ return nil, :invalid_negativeInteger unless v < 0
325
+ return v, w
326
+ },
327
+ "http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER,
328
+ # regular expressions here taken from XML Schema datatypes spec
329
+ "http://www.w3.org/2001/XMLSchema#duration" =>
330
+ create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration),
331
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" =>
332
+ create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration),
333
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" =>
334
+ create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration),
335
+ "http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER,
336
+ "http://www.w3.org/2001/XMLSchema#gDay" =>
337
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay),
338
+ "http://www.w3.org/2001/XMLSchema#gMonth" =>
339
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth),
340
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
341
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay),
342
+ "http://www.w3.org/2001/XMLSchema#gYear" =>
343
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear),
344
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
345
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth),
346
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => ALL_VALUES_VALID,
347
+ "http://www.w3.org/2001/XMLSchema#QName" => ALL_VALUES_VALID,
348
+ "http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID,
349
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => ALL_VALUES_VALID,
350
+ "http://www.w3.org/2001/XMLSchema#token" => ALL_VALUES_VALID,
351
+ "http://www.w3.org/2001/XMLSchema#language" => ALL_VALUES_VALID,
352
+ "http://www.w3.org/2001/XMLSchema#Name" => ALL_VALUES_VALID,
353
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => ALL_VALUES_VALID,
354
+ "http://www.w3.org/2001/XMLSchema#time" =>
355
+ create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time)
356
+ }
357
+ end
358
+ end
359
+ end
@@ -0,0 +1,182 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class DateFormat
4
+
5
+ attr_reader :pattern
6
+
7
+ def initialize(pattern, datatype=nil)
8
+ @pattern = pattern
9
+
10
+ if @pattern.nil?
11
+ @regexp = DEFAULT_REGEXP[datatype]
12
+ @type = datatype
13
+ else
14
+ test_pattern = pattern.clone
15
+ test_pattern.gsub!(/S+/, "")
16
+ FIELDS.keys.sort_by{|f| -f.length}.each do |field|
17
+ test_pattern.gsub!(field, "")
18
+ end
19
+ raise Csvw::DateFormatError, "unrecognised date field symbols in date format" if test_pattern =~ /[GyYuUrQqMLlwWdDFgEecahHKkjJmsSAzZOvVXx]/
20
+
21
+ @regexp = DATE_PATTERN_REGEXP[@pattern]
22
+ @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#time" : "http://www.w3.org/2001/XMLSchema#date"
23
+ @regexp = @regexp || TIME_PATTERN_REGEXP[@pattern]
24
+ @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#dateTime" : @type
25
+ @regexp = @regexp || DATE_TIME_PATTERN_REGEXP[@pattern]
26
+
27
+ if @regexp.nil?
28
+ regexp = @pattern
29
+
30
+ @type = "http://www.w3.org/2001/XMLSchema#date" if !(regexp =~ /HH/) && regexp =~ /yyyy/
31
+ @type = "http://www.w3.org/2001/XMLSchema#time" if regexp =~ /HH/ && !(regexp =~ /yyyy/)
32
+ @type = "http://www.w3.org/2001/XMLSchema#dateTime" if regexp =~ /HH/ && regexp =~ /yyyy/
33
+
34
+ regexp = regexp.sub("HH", FIELDS["HH"].to_s)
35
+ regexp = regexp.sub("mm", FIELDS["mm"].to_s)
36
+ if @pattern =~ /ss\.S+/
37
+ max_fractional_seconds = @pattern.split(".")[-1].length
38
+ regexp = regexp.sub(/ss\.S+$/, "(?<second>#{FIELDS["ss"]}(\.[0-9]{1,#{max_fractional_seconds}})?)")
39
+ else
40
+ regexp = regexp.sub("ss", "(?<second>#{FIELDS["ss"]})")
41
+ end
42
+
43
+ if regexp =~ /yyyy/
44
+ regexp = regexp.sub("yyyy", FIELDS["yyyy"].to_s)
45
+ regexp = regexp.sub("MM", FIELDS["MM"].to_s)
46
+ regexp = regexp.sub("M", FIELDS["M"].to_s)
47
+ regexp = regexp.sub("dd", FIELDS["dd"].to_s)
48
+ regexp = regexp.sub(/d(?=[-T \/\.])/, FIELDS["d"].to_s)
49
+ end
50
+
51
+ regexp = regexp.sub("XXX", FIELDS["XXX"].to_s)
52
+ regexp = regexp.sub("XX", FIELDS["XX"].to_s)
53
+ regexp = regexp.sub("X", FIELDS["X"].to_s)
54
+ regexp = regexp.sub("xxx", FIELDS["xxx"].to_s)
55
+ regexp = regexp.sub("xx", FIELDS["xx"].to_s)
56
+ regexp = regexp.sub(/x(?!:)/, FIELDS["x"].to_s)
57
+
58
+ @regexp = Regexp.new("^#{regexp}$")
59
+ end
60
+ end
61
+ end
62
+
63
+ def match(value)
64
+ value =~ @regexp ? true : false
65
+ end
66
+
67
+ def parse(value)
68
+ match = @regexp.match(value)
69
+ return nil if match.nil?
70
+ # STDERR.puts(@regexp)
71
+ # STDERR.puts(value)
72
+ # STDERR.puts(match.inspect)
73
+ case @type
74
+ when "http://www.w3.org/2001/XMLSchema#date"
75
+ begin
76
+ return Date.new(match["year"].to_i, match["month"].to_i, match["day"].to_i)
77
+ rescue ArgumentError
78
+ return nil
79
+ end
80
+ when "http://www.w3.org/2001/XMLSchema#dateTime"
81
+ begin
82
+ return DateTime.new(match["year"].to_i, match["month"].to_i, match["day"].to_i, match["hour"].to_i, match["minute"].to_i, (match.names.include?("second") ? match["second"].to_f : 0), match.names.include?("timezone") && match["timezone"] ? match["timezone"] : '')
83
+ rescue ArgumentError
84
+ return nil
85
+ end
86
+ else
87
+ value = {}
88
+ match.names.each do |field|
89
+ unless match[field].nil?
90
+ case field
91
+ when "timezone"
92
+ tz = match["timezone"]
93
+ tz = "+00:00" if tz == 'Z'
94
+ tz += ':00' if tz.length == 3
95
+ tz = "#{tz[0..2]}:#{tz[3..4]}" unless tz =~ /:/
96
+ value["timezone"] = tz
97
+ when "second"
98
+ value["second"] = match["second"].to_f
99
+ else
100
+ value[field] = match[field].to_i
101
+ end
102
+ end
103
+ end
104
+ return value
105
+ end
106
+ end
107
+
108
+ private
109
+ FIELDS = {
110
+ "yyyy" => /(?<year>-?([1-9][0-9]{3,}|0[0-9]{3}))/,
111
+ "MM" => /(?<month>0[1-9]|1[0-2])/,
112
+ "M" => /(?<month>[1-9]|1[0-2])/,
113
+ "dd" => /(?<day>0[1-9]|[12][0-9]|3[01])/,
114
+ "d" => /(?<day>[1-9]|[12][0-9]|3[01])/,
115
+ "HH" => /(?<hour>[01][0-9]|2[0-3])/,
116
+ "mm" => /(?<minute>[0-5][0-9])/,
117
+ "ss" => /([0-6][0-9])/,
118
+ "X" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
119
+ "XX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
120
+ "XXX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
121
+ "x" => /(?<timezone>[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
122
+ "xx" => /(?<timezone>[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
123
+ "xxx" => /(?<timezone>[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
124
+ }
125
+
126
+ DATE_PATTERN_REGEXP = {
127
+ "yyyy-MM-dd" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}$"),
128
+ "yyyyMMdd" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["MM"]}#{FIELDS["dd"]}$"),
129
+ "dd-MM-yyyy" => Regexp.new("^#{FIELDS["dd"]}-#{FIELDS["MM"]}-#{FIELDS["yyyy"]}$"),
130
+ "d-M-yyyy" => Regexp.new("^#{FIELDS["d"]}-#{FIELDS["M"]}-#{FIELDS["yyyy"]}$"),
131
+ "MM-dd-yyyy" => Regexp.new("^#{FIELDS["MM"]}-#{FIELDS["dd"]}-#{FIELDS["yyyy"]}$"),
132
+ "M-d-yyyy" => Regexp.new("^#{FIELDS["M"]}-#{FIELDS["d"]}-#{FIELDS["yyyy"]}$"),
133
+ "dd/MM/yyyy" => Regexp.new("^#{FIELDS["dd"]}/#{FIELDS["MM"]}/#{FIELDS["yyyy"]}$"),
134
+ "d/M/yyyy" => Regexp.new("^#{FIELDS["d"]}/#{FIELDS["M"]}/#{FIELDS["yyyy"]}$"),
135
+ "MM/dd/yyyy" => Regexp.new("^#{FIELDS["MM"]}/#{FIELDS["dd"]}/#{FIELDS["yyyy"]}$"),
136
+ "M/d/yyyy" => Regexp.new("^#{FIELDS["M"]}/#{FIELDS["d"]}/#{FIELDS["yyyy"]}$"),
137
+ "dd.MM.yyyy" => Regexp.new("^#{FIELDS["dd"]}.#{FIELDS["MM"]}.#{FIELDS["yyyy"]}$"),
138
+ "d.M.yyyy" => Regexp.new("^#{FIELDS["d"]}.#{FIELDS["M"]}.#{FIELDS["yyyy"]}$"),
139
+ "MM.dd.yyyy" => Regexp.new("^#{FIELDS["MM"]}.#{FIELDS["dd"]}.#{FIELDS["yyyy"]}$"),
140
+ "M.d.yyyy" => Regexp.new("^#{FIELDS["M"]}.#{FIELDS["d"]}.#{FIELDS["yyyy"]}$")
141
+ }
142
+
143
+ TIME_PATTERN_REGEXP = {
144
+ "HH:mm:ss" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
145
+ "HHmmss" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}(?<second>#{FIELDS["ss"]})$"),
146
+ "HH:mm" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}$"),
147
+ "HHmm" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}$")
148
+ }
149
+
150
+ DATE_TIME_PATTERN_REGEXP = {
151
+ "yyyy-MM-ddTHH:mm:ss" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
152
+ "yyyy-MM-ddTHH:mm" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}$")
153
+ }
154
+
155
+ DEFAULT_REGEXP = {
156
+ "http://www.w3.org/2001/XMLSchema#date" =>
157
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
158
+ "http://www.w3.org/2001/XMLSchema#dateTime" =>
159
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$"),
160
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
161
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}$"),
162
+ "http://www.w3.org/2001/XMLSchema#gDay" =>
163
+ Regexp.new("^---#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
164
+ "http://www.w3.org/2001/XMLSchema#gMonth" =>
165
+ Regexp.new("^--#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
166
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
167
+ Regexp.new("^--#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
168
+ "http://www.w3.org/2001/XMLSchema#gYear" =>
169
+ Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["XXX"]}?$"),
170
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
171
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
172
+ "http://www.w3.org/2001/XMLSchema#time" =>
173
+ Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$")
174
+ }
175
+
176
+ end
177
+
178
+ class DateFormatError < StandardError
179
+
180
+ end
181
+ end
182
+ end