csvlint 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +7 -1
  3. data/CHANGELOG.md +19 -1
  4. data/README.md +93 -36
  5. data/bin/csvlint +68 -27
  6. data/csvlint.gemspec +2 -0
  7. data/features/csvw_schema_validation.feature +127 -0
  8. data/features/fixtures/spreadsheet.xlsx +0 -0
  9. data/features/sources.feature +3 -4
  10. data/features/step_definitions/parse_csv_steps.rb +13 -1
  11. data/features/step_definitions/schema_validation_steps.rb +27 -1
  12. data/features/step_definitions/sources_steps.rb +1 -1
  13. data/features/step_definitions/validation_errors_steps.rb +48 -1
  14. data/features/step_definitions/validation_info_steps.rb +5 -1
  15. data/features/step_definitions/validation_warnings_steps.rb +15 -1
  16. data/features/support/load_tests.rb +114 -0
  17. data/features/validation_errors.feature +12 -24
  18. data/features/validation_warnings.feature +18 -6
  19. data/lib/csvlint.rb +10 -0
  20. data/lib/csvlint/csvw/column.rb +359 -0
  21. data/lib/csvlint/csvw/date_format.rb +182 -0
  22. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  23. data/lib/csvlint/csvw/number_format.rb +211 -0
  24. data/lib/csvlint/csvw/property_checker.rb +761 -0
  25. data/lib/csvlint/csvw/table.rb +204 -0
  26. data/lib/csvlint/csvw/table_group.rb +165 -0
  27. data/lib/csvlint/schema.rb +40 -23
  28. data/lib/csvlint/validate.rb +142 -19
  29. data/lib/csvlint/version.rb +1 -1
  30. data/spec/csvw/column_spec.rb +112 -0
  31. data/spec/csvw/date_format_spec.rb +49 -0
  32. data/spec/csvw/number_format_spec.rb +403 -0
  33. data/spec/csvw/table_group_spec.rb +143 -0
  34. data/spec/csvw/table_spec.rb +90 -0
  35. data/spec/schema_spec.rb +27 -1
  36. data/spec/spec_helper.rb +0 -1
  37. data/spec/validator_spec.rb +16 -10
  38. metadata +53 -2
@@ -8,10 +8,20 @@ require 'active_support/core_ext/date/conversions'
8
8
  require 'active_support/core_ext/time/conversions'
9
9
  require 'mime/types'
10
10
  require 'open_uri_redirections'
11
+ require 'uri_template'
11
12
 
12
13
  require 'csvlint/error_message'
13
14
  require 'csvlint/error_collector'
14
15
  require 'csvlint/validate'
15
16
  require 'csvlint/wrapped_io'
16
17
  require 'csvlint/field'
18
+
19
+ require 'csvlint/csvw/metadata_error'
20
+ require 'csvlint/csvw/number_format'
21
+ require 'csvlint/csvw/date_format'
22
+ require 'csvlint/csvw/property_checker'
23
+ require 'csvlint/csvw/column'
24
+ require 'csvlint/csvw/table'
25
+ require 'csvlint/csvw/table_group'
26
+
17
27
  require 'csvlint/schema'
@@ -0,0 +1,359 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class Column
4
+ include Csvlint::ErrorCollector
5
+
6
+ attr_reader :id, :about_url, :datatype, :default, :lang, :name, :null, :number, :ordered, :property_url, :required, :separator, :source_number, :suppress_output, :text_direction, :titles, :value_url, :virtual, :annotations
7
+
8
+ def initialize(number, name, id: nil, about_url: nil, datatype: { "@id" => "http://www.w3.org/2001/XMLSchema#string" }, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: [])
9
+ @number = number
10
+ @name = name
11
+ @id = id
12
+ @about_url = about_url
13
+ @datatype = datatype
14
+ @default = default
15
+ @lang = lang
16
+ @null = null
17
+ @ordered = ordered
18
+ @property_url = property_url
19
+ @required = required
20
+ @separator = separator
21
+ @source_number = source_number || number
22
+ @suppress_output = suppress_output
23
+ @text_direction = text_direction
24
+ @titles = titles
25
+ @value_url = value_url
26
+ @virtual = virtual
27
+ @annotations = annotations
28
+ reset
29
+ @warnings += warnings
30
+ end
31
+
32
+ def self.from_json(number, column_desc, base_url=nil, lang="und", inherited_properties={})
33
+ annotations = {}
34
+ warnings = []
35
+ column_properties = {}
36
+ inherited_properties = inherited_properties.clone
37
+
38
+ column_desc.each do |property,value|
39
+ if property == "@type"
40
+ raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != 'Column'
41
+ else
42
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
43
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
44
+ if type == :annotation
45
+ annotations[property] = v
46
+ elsif type == :common || type == :column
47
+ column_properties[property] = v
48
+ elsif type == :inherited
49
+ inherited_properties[property] = v
50
+ else
51
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "column: #{property}", nil)
52
+ end
53
+ end
54
+ end
55
+
56
+ return self.new(number, column_properties["name"],
57
+ id: column_properties["@id"],
58
+ datatype: inherited_properties["datatype"] || { "@id" => "http://www.w3.org/2001/XMLSchema#string" },
59
+ lang: inherited_properties["lang"] || "und",
60
+ null: inherited_properties["null"] || [""],
61
+ property_url: column_desc["propertyUrl"],
62
+ required: inherited_properties["required"] || false,
63
+ separator: inherited_properties["separator"],
64
+ titles: column_properties["titles"],
65
+ virtual: column_properties["virtual"] || false,
66
+ annotations: annotations,
67
+ warnings: warnings
68
+ )
69
+ end
70
+
71
+ def validate_header(header)
72
+ reset
73
+ valid_headers = @titles ? @titles.map{ |l,v| v if Column.languages_match(l, lang) }.flatten : []
74
+ build_errors(:invalid_header, :schema, 1, @number, header, @titles) unless valid_headers.include? header
75
+ return valid?
76
+ end
77
+
78
+ def validate(string_value, row=nil)
79
+ reset
80
+ values = parse(string_value || "", row)
81
+ # STDERR.puts "#{name} - #{string_value.inspect} - #{values.inspect}"
82
+ values.each do |value|
83
+ validate_required(value, row)
84
+ validate_format(value, row)
85
+ validate_length(value, row)
86
+ validate_value(value, row)
87
+ end unless values.nil?
88
+ validate_required(values, row) if values.nil?
89
+ return valid?
90
+ end
91
+
92
+ def parse(string_value, row=nil)
93
+ return nil if null.include? string_value
94
+ string_values = @separator.nil? ? [string_value] : string_value.split(@separator)
95
+ values = []
96
+ string_values.each do |s|
97
+ value, warning = DATATYPE_PARSER[@datatype["base"] || @datatype["@id"]].call(s, @datatype["format"])
98
+ if warning.nil?
99
+ values << value
100
+ else
101
+ build_errors(warning, :schema, row, @number, s, @datatype)
102
+ values << s
103
+ end
104
+ end
105
+ return values
106
+ end
107
+
108
+ private
109
+ class << self
110
+
111
+ def create_date_parser(type, warning)
112
+ return lambda { |value, format|
113
+ format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil?
114
+ v = format.parse(value)
115
+ return nil, warning if v.nil?
116
+ return v, nil
117
+ }
118
+ end
119
+
120
+ def create_regexp_based_parser(regexp, warning)
121
+ return lambda { |value, format|
122
+ return nil, warning unless value =~ regexp
123
+ return value, nil
124
+ }
125
+ end
126
+
127
+ def languages_match(l1, l2)
128
+ return true if l1 == l2 || l1 == "und" || l2 == "und"
129
+ return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-")
130
+ return false
131
+ end
132
+ end
133
+
134
+ def validate_required(value, row)
135
+ build_errors(:required, :schema, row, number, value, { "required" => @required }) if @required && value.nil?
136
+ end
137
+
138
+ def validate_length(value, row)
139
+ if datatype["length"] || datatype["minLength"] || datatype["maxLength"]
140
+ length = value.length
141
+ length = value.gsub(/==?$/,"").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary"
142
+ length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary"
143
+
144
+ build_errors(:min_length, :schema, row, number, value, { "minLength" => datatype["minLength"] }) if datatype["minLength"] && length < datatype["minLength"]
145
+ build_errors(:max_length, :schema, row, number, value, { "maxLength" => datatype["maxLength"] }) if datatype["maxLength"] && length > datatype["maxLength"]
146
+ build_errors(:length, :schema, row, number, value, { "length" => datatype["length"] }) if datatype["length"] && length != datatype["length"]
147
+ end
148
+ end
149
+
150
+ def validate_format(value, row)
151
+ if datatype["format"]
152
+ build_errors(:format, :schema, row, number, value, { "format" => datatype["format"] }) unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"])
153
+ end
154
+ end
155
+
156
+ def validate_value(value, row)
157
+ build_errors(:min_inclusive, :schema, row, number, value, { "minInclusive" => datatype["minInclusive"] }) if datatype["minInclusive"] && value < datatype["minInclusive"]
158
+ build_errors(:max_inclusive, :schema, row, number, value, { "maxInclusive" => datatype["maxInclusive"] }) if datatype["maxInclusive"] && value > datatype["maxInclusive"]
159
+ build_errors(:min_exclusive, :schema, row, number, value, { "minExclusive" => datatype["minExclusive"] }) if datatype["minExclusive"] && value <= datatype["minExclusive"]
160
+ build_errors(:max_exclusive, :schema, row, number, value, { "maxExclusive" => datatype["maxExclusive"] }) if datatype["maxExclusive"] && value >= datatype["maxExclusive"]
161
+ end
162
+
163
+ REGEXP_VALIDATION = lambda { |value, format| value =~ format }
164
+
165
+ NO_ADDITIONAL_VALIDATION = lambda { |value, format| true }
166
+
167
+ DATATYPE_FORMAT_VALIDATION = {
168
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION,
169
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION,
170
+ "http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION,
171
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION,
172
+ "http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION,
173
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION,
174
+ "http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION,
175
+ "http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION,
176
+ "http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION,
177
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION,
178
+ "http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION,
179
+ "http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION,
180
+ "http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION,
181
+ "http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION,
182
+ "http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION,
183
+ "http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION,
184
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION,
185
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION,
186
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION,
187
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION,
188
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION,
189
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION,
190
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION,
191
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION,
192
+ "http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION,
193
+ "http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION,
194
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION,
195
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION,
196
+ "http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION,
197
+ "http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION,
198
+ "http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION,
199
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION,
200
+ "http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION,
201
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION,
202
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION,
203
+ "http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION,
204
+ "http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION,
205
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION,
206
+ "http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION,
207
+ "http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION,
208
+ "http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION,
209
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION,
210
+ "http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION
211
+ }
212
+
213
+ ALL_VALUES_VALID = lambda { |value, format| return value, nil }
214
+
215
+ NUMERIC_PARSER = lambda { |value, format|
216
+ format = Csvlint::Csvw::NumberFormat.new() if format.nil?
217
+ v = format.parse(value)
218
+ return nil, :invalid_number if v.nil?
219
+ return v, nil
220
+ }
221
+
222
+ DATATYPE_PARSER = {
223
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => ALL_VALUES_VALID,
224
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => ALL_VALUES_VALID,
225
+ "http://www.w3.org/ns/csvw#JSON" => ALL_VALUES_VALID,
226
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID,
227
+ "http://www.w3.org/2001/XMLSchema#anyURI" => ALL_VALUES_VALID,
228
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => ALL_VALUES_VALID,
229
+ "http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format|
230
+ if format.nil?
231
+ return true, nil if ["true", "1"].include? value
232
+ return false, nil if ["false", "0"].include? value
233
+ else
234
+ return true, nil if value == format[0]
235
+ return false, nil if value == format[1]
236
+ end
237
+ return value, :invalid_boolean
238
+ },
239
+ "http://www.w3.org/2001/XMLSchema#date" =>
240
+ create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date),
241
+ "http://www.w3.org/2001/XMLSchema#dateTime" =>
242
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time),
243
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
244
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp),
245
+ "http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format|
246
+ return nil, :invalid_decimal if value =~ /(E|^(NaN|INF|-INF)$)/
247
+ return NUMERIC_PARSER.call(value, format)
248
+ },
249
+ "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format|
250
+ v, w = NUMERIC_PARSER.call(value, format)
251
+ return v, :invalid_integer unless w.nil?
252
+ return nil, :invalid_integer unless v.kind_of? Integer
253
+ return v, w
254
+ },
255
+ "http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format|
256
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
257
+ return v, :invalid_long unless w.nil?
258
+ return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808
259
+ return v, w
260
+ },
261
+ "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format|
262
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
263
+ return v, :invalid_int unless w.nil?
264
+ return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648
265
+ return v, w
266
+ },
267
+ "http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format|
268
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
269
+ return v, :invalid_short unless w.nil?
270
+ return nil, :invalid_short unless v <= 32767 && v >= -32768
271
+ return v, w
272
+ },
273
+ "http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format|
274
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
275
+ return v, :invalid_byte unless w.nil?
276
+ return nil, :invalid_byte unless v <= 127 && v >= -128
277
+ return v, w
278
+ },
279
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format|
280
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
281
+ return v, :invalid_nonNegativeInteger unless w.nil?
282
+ return nil, :invalid_nonNegativeInteger unless v >= 0
283
+ return v, w
284
+ },
285
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format|
286
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
287
+ return v, :invalid_positiveInteger unless w.nil?
288
+ return nil, :invalid_positiveInteger unless v > 0
289
+ return v, w
290
+ },
291
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format|
292
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
293
+ return v, :invalid_unsignedLong unless w.nil?
294
+ return nil, :invalid_unsignedLong unless v <= 18446744073709551615
295
+ return v, w
296
+ },
297
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format|
298
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
299
+ return v, :invalid_unsignedInt unless w.nil?
300
+ return nil, :invalid_unsignedInt unless v <= 4294967295
301
+ return v, w
302
+ },
303
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format|
304
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
305
+ return v, :invalid_unsignedShort unless w.nil?
306
+ return nil, :invalid_unsignedShort unless v <= 65535
307
+ return v, w
308
+ },
309
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format|
310
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
311
+ return v, :invalid_unsignedByte unless w.nil?
312
+ return nil, :invalid_unsignedByte unless v <= 255
313
+ return v, w
314
+ },
315
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format|
316
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
317
+ return v, :invalid_nonPositiveInteger unless w.nil?
318
+ return nil, :invalid_nonPositiveInteger unless v <= 0
319
+ return v, w
320
+ },
321
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format|
322
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
323
+ return v, :invalid_negativeInteger unless w.nil?
324
+ return nil, :invalid_negativeInteger unless v < 0
325
+ return v, w
326
+ },
327
+ "http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER,
328
+ # regular expressions here taken from XML Schema datatypes spec
329
+ "http://www.w3.org/2001/XMLSchema#duration" =>
330
+ create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration),
331
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" =>
332
+ create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration),
333
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" =>
334
+ create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration),
335
+ "http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER,
336
+ "http://www.w3.org/2001/XMLSchema#gDay" =>
337
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay),
338
+ "http://www.w3.org/2001/XMLSchema#gMonth" =>
339
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth),
340
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
341
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay),
342
+ "http://www.w3.org/2001/XMLSchema#gYear" =>
343
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear),
344
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
345
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth),
346
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => ALL_VALUES_VALID,
347
+ "http://www.w3.org/2001/XMLSchema#QName" => ALL_VALUES_VALID,
348
+ "http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID,
349
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => ALL_VALUES_VALID,
350
+ "http://www.w3.org/2001/XMLSchema#token" => ALL_VALUES_VALID,
351
+ "http://www.w3.org/2001/XMLSchema#language" => ALL_VALUES_VALID,
352
+ "http://www.w3.org/2001/XMLSchema#Name" => ALL_VALUES_VALID,
353
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => ALL_VALUES_VALID,
354
+ "http://www.w3.org/2001/XMLSchema#time" =>
355
+ create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time)
356
+ }
357
+ end
358
+ end
359
+ end
@@ -0,0 +1,182 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class DateFormat
4
+
5
+ attr_reader :pattern
6
+
7
+ def initialize(pattern, datatype=nil)
8
+ @pattern = pattern
9
+
10
+ if @pattern.nil?
11
+ @regexp = DEFAULT_REGEXP[datatype]
12
+ @type = datatype
13
+ else
14
+ test_pattern = pattern.clone
15
+ test_pattern.gsub!(/S+/, "")
16
+ FIELDS.keys.sort_by{|f| -f.length}.each do |field|
17
+ test_pattern.gsub!(field, "")
18
+ end
19
+ raise Csvw::DateFormatError, "unrecognised date field symbols in date format" if test_pattern =~ /[GyYuUrQqMLlwWdDFgEecahHKkjJmsSAzZOvVXx]/
20
+
21
+ @regexp = DATE_PATTERN_REGEXP[@pattern]
22
+ @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#time" : "http://www.w3.org/2001/XMLSchema#date"
23
+ @regexp = @regexp || TIME_PATTERN_REGEXP[@pattern]
24
+ @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#dateTime" : @type
25
+ @regexp = @regexp || DATE_TIME_PATTERN_REGEXP[@pattern]
26
+
27
+ if @regexp.nil?
28
+ regexp = @pattern
29
+
30
+ @type = "http://www.w3.org/2001/XMLSchema#date" if !(regexp =~ /HH/) && regexp =~ /yyyy/
31
+ @type = "http://www.w3.org/2001/XMLSchema#time" if regexp =~ /HH/ && !(regexp =~ /yyyy/)
32
+ @type = "http://www.w3.org/2001/XMLSchema#dateTime" if regexp =~ /HH/ && regexp =~ /yyyy/
33
+
34
+ regexp = regexp.sub("HH", FIELDS["HH"].to_s)
35
+ regexp = regexp.sub("mm", FIELDS["mm"].to_s)
36
+ if @pattern =~ /ss\.S+/
37
+ max_fractional_seconds = @pattern.split(".")[-1].length
38
+ regexp = regexp.sub(/ss\.S+$/, "(?<second>#{FIELDS["ss"]}(\.[0-9]{1,#{max_fractional_seconds}})?)")
39
+ else
40
+ regexp = regexp.sub("ss", "(?<second>#{FIELDS["ss"]})")
41
+ end
42
+
43
+ if regexp =~ /yyyy/
44
+ regexp = regexp.sub("yyyy", FIELDS["yyyy"].to_s)
45
+ regexp = regexp.sub("MM", FIELDS["MM"].to_s)
46
+ regexp = regexp.sub("M", FIELDS["M"].to_s)
47
+ regexp = regexp.sub("dd", FIELDS["dd"].to_s)
48
+ regexp = regexp.sub(/d(?=[-T \/\.])/, FIELDS["d"].to_s)
49
+ end
50
+
51
+ regexp = regexp.sub("XXX", FIELDS["XXX"].to_s)
52
+ regexp = regexp.sub("XX", FIELDS["XX"].to_s)
53
+ regexp = regexp.sub("X", FIELDS["X"].to_s)
54
+ regexp = regexp.sub("xxx", FIELDS["xxx"].to_s)
55
+ regexp = regexp.sub("xx", FIELDS["xx"].to_s)
56
+ regexp = regexp.sub(/x(?!:)/, FIELDS["x"].to_s)
57
+
58
+ @regexp = Regexp.new("^#{regexp}$")
59
+ end
60
+ end
61
+ end
62
+
63
+ def match(value)
64
+ value =~ @regexp ? true : false
65
+ end
66
+
67
+ def parse(value)
68
+ match = @regexp.match(value)
69
+ return nil if match.nil?
70
+ # STDERR.puts(@regexp)
71
+ # STDERR.puts(value)
72
+ # STDERR.puts(match.inspect)
73
+ case @type
74
+ when "http://www.w3.org/2001/XMLSchema#date"
75
+ begin
76
+ return Date.new(match["year"].to_i, match["month"].to_i, match["day"].to_i)
77
+ rescue ArgumentError
78
+ return nil
79
+ end
80
+ when "http://www.w3.org/2001/XMLSchema#dateTime"
81
+ begin
82
+ return DateTime.new(match["year"].to_i, match["month"].to_i, match["day"].to_i, match["hour"].to_i, match["minute"].to_i, (match.names.include?("second") ? match["second"].to_f : 0), match.names.include?("timezone") && match["timezone"] ? match["timezone"] : '')
83
+ rescue ArgumentError
84
+ return nil
85
+ end
86
+ else
87
+ value = {}
88
+ match.names.each do |field|
89
+ unless match[field].nil?
90
+ case field
91
+ when "timezone"
92
+ tz = match["timezone"]
93
+ tz = "+00:00" if tz == 'Z'
94
+ tz += ':00' if tz.length == 3
95
+ tz = "#{tz[0..2]}:#{tz[3..4]}" unless tz =~ /:/
96
+ value["timezone"] = tz
97
+ when "second"
98
+ value["second"] = match["second"].to_f
99
+ else
100
+ value[field] = match[field].to_i
101
+ end
102
+ end
103
+ end
104
+ return value
105
+ end
106
+ end
107
+
108
+ private
109
+ FIELDS = {
110
+ "yyyy" => /(?<year>-?([1-9][0-9]{3,}|0[0-9]{3}))/,
111
+ "MM" => /(?<month>0[1-9]|1[0-2])/,
112
+ "M" => /(?<month>[1-9]|1[0-2])/,
113
+ "dd" => /(?<day>0[1-9]|[12][0-9]|3[01])/,
114
+ "d" => /(?<day>[1-9]|[12][0-9]|3[01])/,
115
+ "HH" => /(?<hour>[01][0-9]|2[0-3])/,
116
+ "mm" => /(?<minute>[0-5][0-9])/,
117
+ "ss" => /([0-6][0-9])/,
118
+ "X" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
119
+ "XX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
120
+ "XXX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
121
+ "x" => /(?<timezone>[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
122
+ "xx" => /(?<timezone>[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
123
+ "xxx" => /(?<timezone>[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
124
+ }
125
+
126
+ DATE_PATTERN_REGEXP = {
127
+ "yyyy-MM-dd" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}$"),
128
+ "yyyyMMdd" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["MM"]}#{FIELDS["dd"]}$"),
129
+ "dd-MM-yyyy" => Regexp.new("^#{FIELDS["dd"]}-#{FIELDS["MM"]}-#{FIELDS["yyyy"]}$"),
130
+ "d-M-yyyy" => Regexp.new("^#{FIELDS["d"]}-#{FIELDS["M"]}-#{FIELDS["yyyy"]}$"),
131
+ "MM-dd-yyyy" => Regexp.new("^#{FIELDS["MM"]}-#{FIELDS["dd"]}-#{FIELDS["yyyy"]}$"),
132
+ "M-d-yyyy" => Regexp.new("^#{FIELDS["M"]}-#{FIELDS["d"]}-#{FIELDS["yyyy"]}$"),
133
+ "dd/MM/yyyy" => Regexp.new("^#{FIELDS["dd"]}/#{FIELDS["MM"]}/#{FIELDS["yyyy"]}$"),
134
+ "d/M/yyyy" => Regexp.new("^#{FIELDS["d"]}/#{FIELDS["M"]}/#{FIELDS["yyyy"]}$"),
135
+ "MM/dd/yyyy" => Regexp.new("^#{FIELDS["MM"]}/#{FIELDS["dd"]}/#{FIELDS["yyyy"]}$"),
136
+ "M/d/yyyy" => Regexp.new("^#{FIELDS["M"]}/#{FIELDS["d"]}/#{FIELDS["yyyy"]}$"),
137
+ "dd.MM.yyyy" => Regexp.new("^#{FIELDS["dd"]}.#{FIELDS["MM"]}.#{FIELDS["yyyy"]}$"),
138
+ "d.M.yyyy" => Regexp.new("^#{FIELDS["d"]}.#{FIELDS["M"]}.#{FIELDS["yyyy"]}$"),
139
+ "MM.dd.yyyy" => Regexp.new("^#{FIELDS["MM"]}.#{FIELDS["dd"]}.#{FIELDS["yyyy"]}$"),
140
+ "M.d.yyyy" => Regexp.new("^#{FIELDS["M"]}.#{FIELDS["d"]}.#{FIELDS["yyyy"]}$")
141
+ }
142
+
143
+ TIME_PATTERN_REGEXP = {
144
+ "HH:mm:ss" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
145
+ "HHmmss" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}(?<second>#{FIELDS["ss"]})$"),
146
+ "HH:mm" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}$"),
147
+ "HHmm" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}$")
148
+ }
149
+
150
+ DATE_TIME_PATTERN_REGEXP = {
151
+ "yyyy-MM-ddTHH:mm:ss" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
152
+ "yyyy-MM-ddTHH:mm" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}$")
153
+ }
154
+
155
+ DEFAULT_REGEXP = {
156
+ "http://www.w3.org/2001/XMLSchema#date" =>
157
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
158
+ "http://www.w3.org/2001/XMLSchema#dateTime" =>
159
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$"),
160
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
161
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}$"),
162
+ "http://www.w3.org/2001/XMLSchema#gDay" =>
163
+ Regexp.new("^---#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
164
+ "http://www.w3.org/2001/XMLSchema#gMonth" =>
165
+ Regexp.new("^--#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
166
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
167
+ Regexp.new("^--#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
168
+ "http://www.w3.org/2001/XMLSchema#gYear" =>
169
+ Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["XXX"]}?$"),
170
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
171
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
172
+ "http://www.w3.org/2001/XMLSchema#time" =>
173
+ Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$")
174
+ }
175
+
176
+ end
177
+
178
+ class DateFormatError < StandardError
179
+
180
+ end
181
+ end
182
+ end