rdf-tabular 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -40,6 +40,8 @@ module RDF::Tabular
40
40
  # @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
41
41
  # @option options [Boolean] :minimal includes only the information gleaned from the cells of the tabular data
42
42
  # @option options [Boolean] :noProv do not output optional provenance information
43
+ # @option options [Array] :errors
44
+ # array for placing errors found when processing metadata. If not set, and validating, errors are output to `$stderr`
43
45
  # @option options [Array] :warnings
44
46
  # array for placing warnings found when processing metadata. If not set, and validating, warnings are output to `$stderr`
45
47
  # @option optinons [Array<Hash>] :fks_referencing_table
@@ -0,0 +1,324 @@
1
+ # encoding: UTF-8
2
+ module RDF::Tabular
3
+ ##
4
+ # Utilities for parsing UAX35 dates and numbers.
5
+ #
6
+ # @see http://www.unicode.org/reports/tr35
7
+ module UAX35
8
+
9
+ ##
10
+ # Parse the date format (if provided), and match against the value (if provided)
11
+ # Otherwise, validate format and raise an error
12
+ #
13
+ # @param [String] format
14
+ # @param [String] value
15
+ # @return [String] XMLSchema version of value
16
+ # @raise [ArgumentError] if format is not valid, or nil, if value does not match
17
+ def parse_uax35_date(format, value)
18
+ date_format, time_format = nil, nil
19
+ return value unless format
20
+ value ||= ""
21
+
22
+ # Extract tz info
23
+ if md = format.match(/^(.*[dyms])+(\s*[xX]+)$/)
24
+ format, tz_format = md[1], md[2]
25
+ end
26
+
27
+ date_format, time_format = format.split(' ')
28
+ date_format, time_format = nil, date_format if self.base.to_sym == :time
29
+
30
+ # Extract date, of specified
31
+ date_part = case date_format
32
+ when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
33
+ when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
34
+ when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
35
+ when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
36
+ when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
37
+ when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
38
+ when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
39
+ when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
40
+ when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
41
+ when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
42
+ when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
43
+ when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
44
+ when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
45
+ when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
46
+ when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
47
+ when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
48
+ when /yyyy-MM-ddTHH:mm:ss\.S+/
49
+ md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
50
+ num_ms = date_format.match(/S+/).to_s.length
51
+ md if md && md[:ms].length <= num_ms
52
+ else
53
+ raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
54
+ nil
55
+ end
56
+
57
+ # Forward past date part
58
+ if date_part
59
+ value = value[date_part.to_s.length..-1]
60
+ value = value.lstrip if date_part && value.start_with?(' ')
61
+ end
62
+
63
+ # Extract time, of specified
64
+ time_part = case time_format
65
+ when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
66
+ when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
67
+ when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
68
+ when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
69
+ when /HH:mm:ss\.S+/
70
+ md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
71
+ num_ms = time_format.match(/S+/).to_s.length
72
+ md if md && md[:ms].length <= num_ms
73
+ else
74
+ raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
75
+ nil
76
+ end
77
+
78
+ # If there's a date_format but no date_part, match fails
79
+ return nil if date_format && date_part.nil?
80
+
81
+ # If there's a time_format but no time_part, match fails
82
+ return nil if time_format && time_part.nil?
83
+
84
+ # Forward past time part
85
+ value = value[time_part.to_s.length..-1] if time_part
86
+
87
+ # Use datetime match for time
88
+ time_part = date_part if date_part && date_part.names.include?("hr")
89
+
90
+ # If there's a timezone, it may optionally start with whitespace
91
+ value = value.lstrip if tz_format.to_s.start_with?(' ')
92
+ tz_part = case tz_format.to_s.lstrip
93
+ when 'x' then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)$/)
94
+ when 'X' then value.match(/^(?:(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)|(?<z>Z))$/)
95
+ when 'xx' then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2}))|$/)
96
+ when 'XX' then value.match(/^(?:(?:(?<hr>[+-]\d{2})(?<mi>\d{2}))|(?<z>Z))$/)
97
+ when 'xxx' then value.match(/^(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))$/)
98
+ when 'XXX' then value.match(/^(?:(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))|(?<z>Z))$/)
99
+ else
100
+ raise ArgumentError, "unrecognized timezone format #{tz_format.to_s.lstrip}" if tz_format
101
+ nil
102
+ end
103
+
104
+ # If there's a tz_format but no time_part, match fails
105
+ return nil if tz_format && tz_part.nil?
106
+
107
+ # Compose normalized value
108
+ vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
109
+ vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
110
+
111
+ # Add milliseconds, if matched
112
+ vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
113
+
114
+ value = [vd, vt].compact.join('T')
115
+ value += tz_part[:z] ? "Z" : ("%s:%02d" % [tz_part[:hr], tz_part[:mi].to_i]) if tz_part
116
+ value
117
+ end
118
+
119
+ ##
120
+ # Parse the date format (if provided), and match against the value (if provided)
121
+ # Otherwise, validate format and raise an error
122
+ #
123
+ # @param [String] pattern
124
+ # @param [String] value
125
+ # @param [String] groupChar
126
+ # @param [String] decimalChar
127
+ # @return [String] XMLSchema version of value or nil, if value does not match
128
+ # @raise [ArgumentError] if format is not valid
129
+ def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
130
+ value ||= ""
131
+
132
+ re = build_number_re(pattern, groupChar, decimalChar)
133
+
134
+ # Upcase value and remove internal spaces
135
+ value = value.upcase
136
+
137
+ #require 'byebug'; byebug unless value.empty?
138
+ if value =~ re
139
+
140
+ # Upcase value and remove internal spaces
141
+ value = value.
142
+ upcase.
143
+ gsub(/\s+/, '').
144
+ gsub(groupChar, '').
145
+ gsub(decimalChar, '.')
146
+
147
+ # result re-assembles parts removed from value
148
+ value
149
+ else
150
+ # no match
151
+ nil
152
+ end
153
+ end
154
+
155
+ # Build a regular expression from the provided pattern to match value, after suitable modifications
156
+ #
157
+ # @param [String] pattern
158
+ # @param [String] groupChar
159
+ # @param [String] decimalChar
160
+ # @return [Regexp] Regular expression matching value
161
+ # @raise [ArgumentError] if format is not valid
162
+ def build_number_re(pattern, groupChar, decimalChar)
163
+ # pattern must be composed of only 0, #, decimalChar, groupChar, E, %, and ‰
164
+ ge = Regexp.escape groupChar
165
+ de = Regexp.escape decimalChar
166
+
167
+ default_pattern = /^
168
+ ([+-]?
169
+ [\d#{ge}]+
170
+ (#{de}[\d#{ge}]+
171
+ ([Ee][+-]?\d+)?
172
+ )?[%‰]?
173
+ |NAN|INF|-INF)
174
+ $/x
175
+
176
+ return default_pattern if pattern.nil?
177
+ numeric_pattern = /
178
+ # Mantissa
179
+ (\#|#{ge})*
180
+ (0|#{ge})*
181
+ # Fractional
182
+ (?:#{de}
183
+ (0|#{ge})*
184
+ (\#|#{ge})*
185
+ # Exponent
186
+ (E
187
+ [+-]?
188
+ (?:\#|#{ge})*
189
+ (?:0|#{ge})*
190
+ )?
191
+ )?
192
+ /x
193
+
194
+ legal_number_pattern = /^(?<prefix>[^\#0]*)(?<numeric_part>#{numeric_pattern})(?<suffix>.*)$/x
195
+
196
+ match = legal_number_pattern.match(pattern)
197
+ raise ArgumentError, "unrecognized number pattern #{pattern}" if match["numeric_part"].empty?
198
+
199
+ prefix, numeric_part, suffix = match["prefix"], match["numeric_part"], match["suffix"]
200
+ prefix = Regexp.escape prefix unless prefix.empty?
201
+ prefix += "[+-]?" unless prefix =~ /[+-]/
202
+ suffix = Regexp.escape suffix unless suffix.empty?
203
+
204
+ # Split on decimalChar and E
205
+ parts = numeric_part.split("E")
206
+ mantissa_part, exponent_part = parts[0], (parts[1] || '')
207
+
208
+ mantissa_parts = mantissa_part.split(decimalChar)
209
+ raise ArgumentError, "Multiple decimal separators in #{pattern}" if mantissa_parts.length > 2
210
+ integer_part, fractional_part = mantissa_parts[0], mantissa_parts[1] || ''
211
+
212
+ min_integer_digits = integer_part.gsub(groupChar, '').gsub('#', '').length
213
+ all_integer_digits = integer_part.gsub(groupChar, '').length
214
+ min_fractional_digits = fractional_part.gsub(groupChar, '').gsub('#', '').length
215
+ max_fractional_digits = fractional_part.gsub(groupChar, '').length
216
+ exponent_sign = exponent_part[0] if exponent_part =~ /^[+-]/
217
+ min_exponent_digits = exponent_part.sub(/[+-]/, '').gsub("#", "").length
218
+ max_exponent_digits = exponent_part.sub(/[+-]/, '').length
219
+
220
+ integer_parts = integer_part.split(groupChar)[1..-1]
221
+ primary_grouping_size = integer_parts[-1].to_s.length
222
+ secondary_grouping_size = integer_parts.length <= 1 ? primary_grouping_size : integer_parts[-2].length
223
+
224
+ fractional_parts = fractional_part.split(groupChar)[0..-2]
225
+ fractional_grouping_size = fractional_parts[0].to_s.length
226
+
227
+ # Construct regular expression for integer part
228
+ integer_str = if primary_grouping_size == 0
229
+ all_integer_digits > min_integer_digits ? "\\d{#{min_integer_digits},}" : "\\d{#{min_integer_digits}}"
230
+ else
231
+ # These number of groupings must be there
232
+ integer_parts = []
233
+ integer_rem = 0
234
+ while min_integer_digits > 0
235
+ sz = [primary_grouping_size, min_integer_digits].min
236
+ integer_rem = primary_grouping_size - sz
237
+ integer_parts << "\\d{#{sz}}"
238
+ min_integer_digits -= primary_grouping_size
239
+ all_integer_digits -= primary_grouping_size
240
+ primary_grouping_size = secondary_grouping_size
241
+ end
242
+ required_digits = integer_parts.reverse.join(ge)
243
+
244
+ if all_integer_digits > 0
245
+ # Add digits up to end of group creating
246
+ # (?:(?:\d)?)\d)? ...
247
+ integer_parts = []
248
+ while integer_rem > 0
249
+ integer_parts << '\d'
250
+ integer_rem -= 1
251
+ end
252
+
253
+ # If secondary_grouping_size is not primary_grouping_size, add digits up to secondary_grouping_size
254
+ if secondary_grouping_size != primary_grouping_size
255
+ primary_grouping_size = secondary_grouping_size
256
+ integer_rem = primary_grouping_size - 1
257
+ integer_parts << '\d' + ge
258
+
259
+ while integer_rem > 0
260
+ integer_parts << '\d'
261
+ integer_rem -= 1
262
+ end
263
+ end
264
+
265
+ # Allow repeated separated groups
266
+ if integer_parts.empty?
267
+ opt_digits = "(?:\\d{1,#{primary_grouping_size}}#{ge})?(?:\\d{#{primary_grouping_size}}#{ge})*"
268
+ else
269
+ integer_parts[-1] = "(?:\\d{1,#{primary_grouping_size}}#{ge})?(?:\\d{#{primary_grouping_size}}#{ge})*#{integer_parts[-1]}"
270
+ opt_digits = integer_parts.reverse.inject("") {|memo, part| "(?:#{memo}#{part})?"}
271
+ end
272
+
273
+ opt_digits + required_digits
274
+ else
275
+ required_digits
276
+ end
277
+ end
278
+
279
+ # Construct regular expression for fractional part
280
+ fractional_str = if max_fractional_digits > 0
281
+ if fractional_grouping_size == 0
282
+ min_fractional_digits == max_fractional_digits ? "\\d{#{max_fractional_digits}}" : "\\d{#{min_fractional_digits},#{max_fractional_digits}}"
283
+ else
284
+ # These number of groupings must be there
285
+ fractional_parts = []
286
+ fractional_rem = 0
287
+ while min_fractional_digits > 0
288
+ sz = [fractional_grouping_size, min_fractional_digits].min
289
+ fractional_rem = fractional_grouping_size - sz
290
+ fractional_parts << "\\d{#{sz}}"
291
+ max_fractional_digits -= sz
292
+ min_fractional_digits -= sz
293
+ end
294
+ required_digits = fractional_parts.join(ge)
295
+
296
+ # If max digits fill within existing group
297
+ fractional_parts = []
298
+ while max_fractional_digits > 0
299
+ fractional_parts << (fractional_rem == 0 ? ge + '\d' : '\d')
300
+ max_fractional_digits -= 1
301
+ fractional_rem = (fractional_rem - 1) % fractional_grouping_size
302
+ end
303
+
304
+ opt_digits = fractional_parts.reverse.inject("") {|memo, part| "(?:#{part}#{memo})?"}
305
+ required_digits + opt_digits
306
+ end
307
+ end.to_s
308
+ fractional_str = de + fractional_str unless fractional_str.empty?
309
+ fractional_str = "(?:#{fractional_str})?" if max_fractional_digits > 0 && min_fractional_digits == 0
310
+
311
+ # Exponent pattern
312
+ exponent_str = case
313
+ when max_exponent_digits > 0 && max_exponent_digits == min_exponent_digits
314
+ "E#{exponent_sign ? Regexp.escape(exponent_sign) : '[+-]?'}\\d{#{max_exponent_digits}}"
315
+ when max_exponent_digits > 0
316
+ "E#{exponent_sign ? Regexp.escape(exponent_sign) : '[+-]?'}\\d{#{min_exponent_digits},#{max_exponent_digits}}"
317
+ when min_exponent_digits > 0
318
+ "E#{exponent_sign ? Regexp.escape(exponent_sign) : '[+-]?'}\\d{#{min_exponent_digits},#{max_exponent_digits}}"
319
+ end
320
+
321
+ Regexp.new("^(?<prefix>#{prefix})(?<numeric_part>#{integer_str}#{fractional_str}#{exponent_str})(?<suffix>#{suffix})$")
322
+ end
323
+ end
324
+ end
@@ -1,4 +1,4 @@
1
- # coding: utf-8
1
+ # encoding: UTF-8
2
2
  $:.unshift "."
3
3
  require 'spec_helper'
4
4
 
@@ -308,9 +308,10 @@ describe RDF::Tabular::Metadata do
308
308
 
309
309
  its(:type) {is_expected.to eql :Schema}
310
310
 
311
- it "is invalid if referenced column does not exist" do
311
+ it "is valid if referenced column does not exist" do
312
312
  subject[:columns] = []
313
- expect(subject).not_to be_valid
313
+ expect(subject).to be_valid
314
+ expect(subject.warnings).not_to be_empty
314
315
  end
315
316
 
316
317
  it "is valid with multiple names" do
@@ -322,13 +323,14 @@ describe RDF::Tabular::Metadata do
322
323
  expect(v).to be_valid
323
324
  end
324
325
 
325
- it "is invalid with multiple names if any column missing" do
326
+ it "is valid with multiple names if any column missing" do
326
327
  v = described_class.new({
327
328
  "columns" => [column],
328
329
  "primaryKey" => [column["name"], column2["name"]]},
329
330
  base: RDF::URI("http://example.org/base",
330
331
  debug: @debug))
331
- expect(v).not_to be_valid
332
+ expect(v).to be_valid
333
+ expect(v.warnings).not_to be_empty
332
334
  end
333
335
  end
334
336
 
@@ -369,10 +371,10 @@ describe RDF::Tabular::Metadata do
369
371
  "columnReference" => ["b1", "b2"]
370
372
  }
371
373
  },
372
- "references single column with tableSchema" => {
374
+ "references single column with schemaReference" => {
373
375
  "columnReference" => "a1",
374
376
  "reference" => {
375
- "tableSchema" => "b_s",
377
+ "schemaReference" => "b_s",
376
378
  "columnReference" => "b1"
377
379
  }
378
380
  }
@@ -454,7 +456,7 @@ describe RDF::Tabular::Metadata do
454
456
  specify {is_expected.to be_valid}
455
457
  it_behaves_like("inherited properties", false)
456
458
  it_behaves_like("common properties")
457
- its(:type) {is_expected.to eql :Transformation}
459
+ its(:type) {is_expected.to eql :Template}
458
460
 
459
461
  {
460
462
  source: {
@@ -772,13 +774,13 @@ describe RDF::Tabular::Metadata do
772
774
  {
773
775
  ":type TableGroup" => [{}, {type: :TableGroup}, RDF::Tabular::TableGroup],
774
776
  ":type Table" => [{}, {type: :Table}, RDF::Tabular::Table],
775
- ":type Transformation" => [{}, {type: :Transformation}, RDF::Tabular::Transformation],
777
+ ":type Template" => [{}, {type: :Template}, RDF::Tabular::Transformation],
776
778
  ":type Schema" => [{}, {type: :Schema}, RDF::Tabular::Schema],
777
779
  ":type Column" => [{}, {type: :Column}, RDF::Tabular::Column],
778
780
  ":type Dialect" => [{}, {type: :Dialect}, RDF::Tabular::Dialect],
779
781
  "@type TableGroup" => [{"@type" => "TableGroup"}, RDF::Tabular::TableGroup],
780
782
  "@type Table" => [{"@type" => "Table"}, RDF::Tabular::Table],
781
- "@type Transformation" => [{"@type" => "Transformation"}, RDF::Tabular::Transformation],
783
+ "@type Template" => [{"@type" => "Template"}, RDF::Tabular::Transformation],
782
784
  "@type Schema" => [{"@type" => "Schema"}, RDF::Tabular::Schema],
783
785
  "@type Column" => [{"@type" => "Column"}, RDF::Tabular::Column],
784
786
  "@type Dialect" => [{"@type" => "Dialect"}, RDF::Tabular::Dialect],
@@ -1058,12 +1060,6 @@ describe RDF::Tabular::Metadata do
1058
1060
  format: {"pattern" => '000'},
1059
1061
  value: "123"
1060
1062
  },
1061
- "decimal with wrong pattern" => {
1062
- base: "decimal",
1063
- format: {"pattern" => '0000'},
1064
- value: "123",
1065
- errors: [/123 does not match pattern/]
1066
- },
1067
1063
  "decimal with explicit groupChar" => {
1068
1064
  base: "decimal",
1069
1065
  format: {"groupChar" => ";"},
@@ -1148,20 +1144,20 @@ describe RDF::Tabular::Metadata do
1148
1144
  "valid boolean Y|N N" => {base: "boolean", value: "N", format: "Y|N", result: "false"},
1149
1145
 
1150
1146
  # Dates
1151
- "validate date yyyy-MM-dd" => {base: "date", value: "2015-03-22", format: "yyyy-MM-dd", result: "2015-03-22"},
1152
- "validate date yyyyMMdd" => {base: "date", value: "20150322", format: "yyyyMMdd", result: "2015-03-22"},
1153
- "validate date dd-MM-yyyy" => {base: "date", value: "22-03-2015", format: "dd-MM-yyyy", result: "2015-03-22"},
1154
- "validate date d-M-yyyy" => {base: "date", value: "22-3-2015", format: "d-M-yyyy", result: "2015-03-22"},
1155
- "validate date MM-dd-yyyy" => {base: "date", value: "03-22-2015", format: "MM-dd-yyyy", result: "2015-03-22"},
1156
- "validate date M-d-yyyy" => {base: "date", value: "3-22-2015", format: "M-d-yyyy", result: "2015-03-22"},
1157
- "validate date dd/MM/yyyy" => {base: "date", value: "22/03/2015", format: "dd/MM/yyyy", result: "2015-03-22"},
1158
- "validate date d/M/yyyy" => {base: "date", value: "22/3/2015", format: "d/M/yyyy", result: "2015-03-22"},
1159
- "validate date MM/dd/yyyy" => {base: "date", value: "03/22/2015", format: "MM/dd/yyyy", result: "2015-03-22"},
1160
- "validate date M/d/yyyy" => {base: "date", value: "3/22/2015", format: "M/d/yyyy", result: "2015-03-22"},
1161
- "validate date dd.MM.yyyy" => {base: "date", value: "22.03.2015", format: "dd.MM.yyyy", result: "2015-03-22"},
1162
- "validate date d.M.yyyy" => {base: "date", value: "22.3.2015", format: "d.M.yyyy", result: "2015-03-22"},
1163
- "validate date MM.dd.yyyy" => {base: "date", value: "03.22.2015", format: "MM.dd.yyyy", result: "2015-03-22"},
1164
- "validate date M.d.yyyy" => {base: "date", value: "3.22.2015", format: "M.d.yyyy", result: "2015-03-22"},
1147
+ "valid date yyyy-MM-dd" => {base: "date", value: "2015-03-22", format: "yyyy-MM-dd", result: "2015-03-22"},
1148
+ "valid date yyyyMMdd" => {base: "date", value: "20150322", format: "yyyyMMdd", result: "2015-03-22"},
1149
+ "valid date dd-MM-yyyy" => {base: "date", value: "22-03-2015", format: "dd-MM-yyyy", result: "2015-03-22"},
1150
+ "valid date d-M-yyyy" => {base: "date", value: "22-3-2015", format: "d-M-yyyy", result: "2015-03-22"},
1151
+ "valid date MM-dd-yyyy" => {base: "date", value: "03-22-2015", format: "MM-dd-yyyy", result: "2015-03-22"},
1152
+ "valid date M-d-yyyy" => {base: "date", value: "3-22-2015", format: "M-d-yyyy", result: "2015-03-22"},
1153
+ "valid date dd/MM/yyyy" => {base: "date", value: "22/03/2015", format: "dd/MM/yyyy", result: "2015-03-22"},
1154
+ "valid date d/M/yyyy" => {base: "date", value: "22/3/2015", format: "d/M/yyyy", result: "2015-03-22"},
1155
+ "valid date MM/dd/yyyy" => {base: "date", value: "03/22/2015", format: "MM/dd/yyyy", result: "2015-03-22"},
1156
+ "valid date M/d/yyyy" => {base: "date", value: "3/22/2015", format: "M/d/yyyy", result: "2015-03-22"},
1157
+ "valid date dd.MM.yyyy" => {base: "date", value: "22.03.2015", format: "dd.MM.yyyy", result: "2015-03-22"},
1158
+ "valid date d.M.yyyy" => {base: "date", value: "22.3.2015", format: "d.M.yyyy", result: "2015-03-22"},
1159
+ "valid date MM.dd.yyyy" => {base: "date", value: "03.22.2015", format: "MM.dd.yyyy", result: "2015-03-22"},
1160
+ "valid date M.d.yyyy" => {base: "date", value: "3.22.2015", format: "M.d.yyyy", result: "2015-03-22"},
1165
1161
 
1166
1162
  # Times
1167
1163
  "valid time HH:mm:ss.S" => {base: "time", value: "15:02:37.1", format: "HH:mm:ss.S", result: "15:02:37.1"},
@@ -1188,11 +1184,16 @@ describe RDF::Tabular::Metadata do
1188
1184
 
1189
1185
  # Timezones
1190
1186
  "valid w/TZ yyyy-MM-ddX" => {base: "date", value: "2015-03-22Z", format: "yyyy-MM-ddX", result: "2015-03-22Z"},
1191
- "valid w/TZ dd.MM.yyyy XXXXX" => {base: "date", value: "22.03.2015 Z", format: "dd.MM.yyyy XXXXX", result: "2015-03-22Z"},
1192
- "valid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05:00", format: "HH:mm:ssX", result: "15:02:37-05:00"},
1193
- "valid w/TZ HHmm XX" => {base: "time", value: "1502 +08:00", format: "HHmm XX", result: "15:02:00+08:00"},
1187
+ "valid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05", format: "HH:mm:ssX", result: "15:02:37-05:00"},
1188
+ "valid w/TZ yyyy-MM-dd HH:mm:ss X" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 +0800", format: "yyyy-MM-dd HH:mm:ss X", result: "2015-03-15T15:02:37+08:00"},
1189
+ "valid w/TZ HHmm XX" => {base: "time", value: "1502 +0800", format: "HHmm XX", result: "15:02:00+08:00"},
1190
+ "valid w/TZ yyyy-MM-dd HH:mm:ss XX" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 -0800", format: "yyyy-MM-dd HH:mm:ss XX", result: "2015-03-15T15:02:37-08:00"},
1191
+ "valid w/TZ HHmm XXX" => {base: "time", value: "1502 +08:00", format: "HHmm XXX", result: "15:02:00+08:00"},
1194
1192
  "valid w/TZ yyyy-MM-ddTHH:mm:ssXXX" => {base: "dateTime", value: "2015-03-15T15:02:37-05:00", format: "yyyy-MM-ddTHH:mm:ssXXX", result: "2015-03-15T15:02:37-05:00"},
1195
- "valid w/TZ yyyy-MM-dd HH:mm:ss X" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 +08:00", format: "yyyy-MM-dd HH:mm:ss X", result: "2015-03-15T15:02:37+08:00"},
1193
+ "invalid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05:00", format: "HH:mm:ssX", errors: ["15:02:37-05:00 does not match format HH:mm:ssX"]},
1194
+ "invalid w/TZ HH:mm:ssXX" => {base: "time", value: "15:02:37-05", format: "HH:mm:ssXX", errors: ["15:02:37-05 does not match format HH:mm:ssXX"]},
1195
+
1196
+ # Other date-like things
1196
1197
  "valid gDay" => {base: "gDay", value: "---31"},
1197
1198
  "valid gMonth" => {base: "gMonth", value: "--02"},
1198
1199
  "valid gMonthDay" => {base: "gMonthDay", value: "--02-21"},
@@ -1216,7 +1217,43 @@ describe RDF::Tabular::Metadata do
1216
1217
  "valid anyAtomicType" => {base: "anyAtomicType", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.anyAtomicType)},
1217
1218
  "valid anyURI" => {base: "anyURI", value: "http://example.com/", result: RDF::Literal("http://example.com/", datatype: RDF::XSD.anyURI)},
1218
1219
  "valid base64Binary" => {base: "base64Binary", value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", result: RDF::Literal("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", datatype: RDF::XSD.base64Binary)},
1220
+ "base64Binary with matching length:" => {
1221
+ base: "base64Binary",
1222
+ value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g",
1223
+ length: 45,
1224
+ result: RDF::Literal("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", datatype: RDF::XSD.base64Binary)
1225
+ },
1226
+ "base64Binary with wrong maxLength:" => {
1227
+ base: "base64Binary",
1228
+ value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g",
1229
+ maxLength: 1,
1230
+ errors: ["decoded Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g has length 45 not <= 1"]
1231
+ },
1232
+ "base64Binary with wrong minLength" => {
1233
+ base: "base64Binary",
1234
+ value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g",
1235
+ minLength: 50,
1236
+ errors: ["decoded Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g has length 45 not >= 50"]
1237
+ },
1219
1238
  "valid hexBinary" => {base: "hexBinary", value: "0FB7", result: RDF::Literal("0FB7", datatype: RDF::XSD.hexBinary)},
1239
+ "hexBinary with matching length:" => {
1240
+ base: "hexBinary",
1241
+ value: "0FB7",
1242
+ length: 2,
1243
+ result: RDF::Literal("0FB7", datatype: RDF::XSD.hexBinary)
1244
+ },
1245
+ "hexBinary with wrong maxLength:" => {
1246
+ base: "hexBinary",
1247
+ value: "0FB7",
1248
+ maxLength: 1,
1249
+ errors: ["decoded 0FB7 has length 2 not <= 1"]
1250
+ },
1251
+ "hexBinary with wrong minLength" => {
1252
+ base: "hexBinary",
1253
+ value: "0FB7",
1254
+ minLength: 4,
1255
+ errors: ["decoded 0FB7 has length 2 not >= 4"]
1256
+ },
1220
1257
  "valid QName" => {base: "QName", value: "foo:bar", result: RDF::Literal("foo:bar", datatype: RDF::XSD.QName)},
1221
1258
  "valid normalizedString" => {base: "normalizedString", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.normalizedString)},
1222
1259
  "valid token" => {base: "token", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.token)},
@@ -1244,7 +1281,7 @@ describe RDF::Tabular::Metadata do
1244
1281
  }
1245
1282
  let(:md) {
1246
1283
  RDF::Tabular::Table.new({
1247
- url: "http://example.com/table.csv",
1284
+ url: "http://example.com/table.csv",
1248
1285
  dialect: {header: false},
1249
1286
  tableSchema: {
1250
1287
  columns: [{
@@ -1293,51 +1330,99 @@ describe RDF::Tabular::Metadata do
1293
1330
  end
1294
1331
  end
1295
1332
 
1296
- describe "#build_number_re" do
1297
- subject {RDF::Tabular::Datatype.new({})}
1333
+ context "Number formats" do
1298
1334
  {
1299
- '#,##0.##' => /^\d{1,}\.\d{,2}$/,
1300
- '#,##0.###' => /^\d{1,}\.\d{,3}$/,
1301
- '###0.#####' => /^\d{1,}\.\d{,5}$/,
1302
- '###0.0000#' => /^\d{1,}\.\d{4,5}$/,
1303
- '00000.0000' => /^\d{5}\.\d{4}$/,
1304
-
1305
- '0' => /^\d{1}$/,
1306
- '00' => /^\d{2}$/,
1307
- '#' => /^\d*$/,
1308
- '##' => /^\d*$/,
1309
-
1310
- '.0' => /^\.\d{1}$/,
1311
- '.00' => /^\.\d{2}$/,
1312
- '.#' => /^\.\d{,1}$/,
1313
- '.##' => /^\.\d{,2}$/,
1314
-
1315
- '+0' => /^+\d{1}$/,
1316
- '-0' => /^-\d{1}$/,
1317
- '%0' => /^%\d{1}$/,
1318
- '0' => /^‰\d{1}$/,
1319
- '0%' => /^\d{1}%$/,
1320
- '0‰' => /^\d{1}‰$/,
1321
- '0.0%' => /^\d{1}\.\d{1}%$/,
1322
-
1323
- '#0.0#E#0' => /^\d{1,}\.\d{1,2}E\d{1,2}$/,
1324
- '#0.0#E+#' => /^\d{1,}\.\d{1,2}E+\d{,1}$/,
1325
- '#0.0#E-00' => /^\d{1,}\.\d{1,2}E-\d{2}$/,
1326
- '#0.0#E#0%' => /^\d{1,}\.\d{1,2}E\d{1,2}%$/,
1327
- }.each do |pattern,regexp|
1328
- it "generates #{regexp} for #{pattern}" do
1329
- expect(subject.build_number_re(pattern, ",", ".")).to eql regexp
1330
- end
1331
- end
1335
+ '0' => {valid: %w(1 -1 +1), invalid: %w(12 1.2), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1})(?<suffix>)$/},
1336
+ '00' => {valid: %w(12), invalid: %w(1 123 1,2), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{2})(?<suffix>)$/},
1337
+ '#' => {valid: %w(1 12 123), invalid: %w(1.2), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{0,})(?<suffix>)$/},
1338
+ '##' => {re: /^(?<prefix>[+-]?)(?<numeric_part>\d{0,})(?<suffix>)$/},
1339
+ '#0' => {re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,})(?<suffix>)$/},
1340
+
1341
+ '0.0' => {valid: %w(1.1 -1.1), invalid: %w(12.1 1.12), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{1})(?<suffix>)$/},
1342
+ '0.00' => {valid: %w(1.12 +1.12), invalid: %w(12.12 1.1 1.123), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{2})(?<suffix>)$/},
1343
+ '0.#' => {valid: %w(1 1.1), invalid: %w(12.1 1.12), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}(?:\.\d{0,1})?)(?<suffix>)$/},
1344
+ '0.##' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}(?:\.\d{0,2})?)(?<suffix>)$/},
1345
+
1346
+ '+0' => {valid: %w(+1), invalid: %w(1 -1 +10), base: "decimal", re: /^(?<prefix>\+)(?<numeric_part>\d{1})(?<suffix>)$/},
1347
+ '-0' => {valid: %w(-1), invalid: %w(1 +1 -10), base: "decimal", re: /^(?<prefix>\-)(?<numeric_part>\d{1})(?<suffix>)$/},
1348
+ '%000' => {valid: %w(%123 %+123 %-123), invalid: %w(%12 %1234 123%), base: "decimal", re: /^(?<prefix>%[+-]?)(?<numeric_part>\d{3})(?<suffix>)$/},
1349
+ '‰000' => {valid: %w(‰123 ‰+123 ‰-123), invalid: %w(‰12 ‰1234 123‰), base: "decimal", re: /^(?<prefix>‰[+-]?)(?<numeric_part>\d{3})(?<suffix>)$/},
1350
+ '000%' => {valid: %w(123% +123% -123%), invalid: %w(12% 1234% %123), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{3})(?<suffix>%)$/},
1351
+ '000‰' => {valid: %w(123‰ +123‰ -123‰), invalid: %w(12‰ 1234‰ ‰123), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{3})(?<suffix>‰)$/},
1352
+ '000.0%' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{3}\.\d{1})(?<suffix>%)$/},
1353
+
1354
+ '###0.#####' => {valid: %w(1 1.1 12345.12345), invalid: %w(1,234.1 1.123456), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}(?:\.\d{0,5})?)(?<suffix>)$/},
1355
+ '###0.0000#' => {valid: %w(1.1234 1.12345 12345.12345), invalid: %w(1,234.1234 1.12), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{4,5})(?<suffix>)$/},
1356
+ '00000.0000' => {valid: %w(12345.1234), invalid: %w(1.2 1,234.123,4), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{5}\.\d{4})(?<suffix>)$/},
1357
+
1358
+ '#0.0#E#0' => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E[+-]?\d{1,2})(?<suffix>)$/},
1359
+ '#0.0#E+#0' => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E\+\d{1,2})(?<suffix>)$/},
1360
+ '#0.0#E#0%' => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E[+-]?\d{1,2}%)(?<suffix>)$/},
1361
+ '#0.0#E#0%' => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E[+-]?\d{1,2})(?<suffix>%)$/},
1362
+
1363
+ # Grouping
1364
+ '#,##,##0' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d)?\d{1})(?<suffix>)$/},
1365
+ '#,##,#00' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d{2})(?<suffix>)$/},
1366
+ '#,##,000' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:\d{1,2},)?(?:\d{2},)*\d{3})(?<suffix>)$/},
1367
+ '#,#0,000' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d{1},\d{3})(?<suffix>)$/},
1368
+ '#,00,000' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:\d{1,2},)?(?:\d{2},)*\d{2},\d{3})(?<suffix>)$/},
1369
+ '0,00,000' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1},\d{2},\d{3})(?<suffix>)$/},
1370
+
1371
+ '0.0##,###' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{1}(?:\d(?:\d(?:,\d(?:\d(?:\d)?)?)?)?)?)(?<suffix>)$/},
1372
+ '0.00#,###' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{2}(?:\d(?:,\d(?:\d(?:\d)?)?)?)?)(?<suffix>)$/},
1373
+ '0.000,###' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3}(?:,\d(?:\d(?:\d)?)?)?)(?<suffix>)$/},
1374
+ '0.000,0##' => {base: "decimal", re:/^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3},\d{1}(?:\d(?:\d)?)?)(?<suffix>)$/},
1375
+ '0.000,00#' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3},\d{2}(?:\d)?)(?<suffix>)$/},
1376
+ '0.000,000' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3},\d{3})(?<suffix>)$/},
1377
+
1378
+ # Jeni's
1379
+ '##0' => {valid: %w(1 12 123 1234), invalid: %w(1,234 123.4), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,})(?<suffix>)$/},
1380
+ '#,#00' => {valid: %w(12 123 1,234 1,234,567), invalid: %w(1 1234 12,34 12,34,567), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:\d{1,3},)?(?:\d{3},)*\d)?\d{2})(?<suffix>)$/},
1381
+ '#0.#' => {valid: %w(1 1.2 1234.5), invalid: %w(12.34 1,234.5), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}(?:\.\d{0,1})?)(?<suffix>)$/},
1382
+ '#0.0#,#' => {valid: %w(12.3 12.34 12.34,5), invalid: %w(1 12.345 12.34,56,7 12.34,567), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1}(?:\d(?:,\d)?)?)(?<suffix>)$/},
1383
+ }.each do |pattern, props|
1384
+ context pattern do
1385
+ subject {RDF::Tabular::Datatype.new({})}
1386
+ describe "#build_number_re" do
1387
+ it "generates #{props[:re]} for #{pattern}" do
1388
+ expect(subject.build_number_re(pattern, ",", ".")).to eql props[:re]
1389
+ end if props[:re].is_a?(Regexp)
1390
+
1391
+ it "recognizes bad pattern #{pattern}" do
1392
+ expect{subject.build_number_re(pattern, ",", ".")}.to raise_error(ArgumentError)
1393
+ end if props[:re] == ArgumentError
1394
+ end
1332
1395
 
1333
- %W{
1334
- +%0
1335
- 0#
1336
- 0E0
1337
- 0-
1338
- }.each do |pattern|
1339
- it "recognizes bad pattern #{pattern}" do
1340
- expect{subject.build_number_re(pattern, ",", ".")}.to raise_error(ArgumentError)
1396
+ describe "Metadata" do
1397
+ let(:md) {
1398
+ RDF::Tabular::Table.new({
1399
+ url: "http://example.com/table.csv",
1400
+ dialect: {header: false},
1401
+ tableSchema: {
1402
+ columns: [{
1403
+ name: "name",
1404
+ datatype: {"base" => props[:base], "format" => {"pattern" => pattern}}
1405
+ }]
1406
+ }
1407
+ }, debug: @debug)
1408
+ }
1409
+ describe "valid" do
1410
+ Array(props[:valid]).each do |num|
1411
+ it "for #{num}" do
1412
+ cell = md.to_enum(:each_row, "\"#{num}\"\n").to_a.first.values.first
1413
+ expect(cell).to be_valid
1414
+ end
1415
+ end
1416
+ end
1417
+ describe "invalid" do
1418
+ Array(props[:invalid]).each do |num|
1419
+ it "for #{num}" do
1420
+ cell = md.to_enum(:each_row, "\"#{num}\"\n").to_a.first.values.first
1421
+ expect(cell).not_to be_valid
1422
+ end
1423
+ end
1424
+ end
1425
+ end
1341
1426
  end
1342
1427
  end
1343
1428
  end