rdf-tabular 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,6 +40,8 @@ module RDF::Tabular
40
40
  # @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
41
41
  # @option options [Boolean] :minimal includes only the information gleaned from the cells of the tabular data
42
42
  # @option options [Boolean] :noProv do not output optional provenance information
43
+ # @option options [Array] :errors
44
+ # array for placing errors found when processing metadata. If not set, and validating, errors are output to `$stderr`
43
45
  # @option options [Array] :warnings
44
46
  # array for placing warnings found when processing metadata. If not set, and validating, warnings are output to `$stderr`
45
47
  # @option optinons [Array<Hash>] :fks_referencing_table
@@ -0,0 +1,324 @@
1
+ # encoding: UTF-8
2
+ module RDF::Tabular
3
+ ##
4
+ # Utilities for parsing UAX35 dates and numbers.
5
+ #
6
+ # @see http://www.unicode.org/reports/tr35
7
+ module UAX35
8
+
9
+ ##
10
+ # Parse the date format (if provided), and match against the value (if provided)
11
+ # Otherwise, validate format and raise an error
12
+ #
13
+ # @param [String] format
14
+ # @param [String] value
15
+ # @return [String] XMLSchema version of value
16
+ # @raise [ArgumentError] if format is not valid, or nil, if value does not match
17
+ def parse_uax35_date(format, value)
18
+ date_format, time_format = nil, nil
19
+ return value unless format
20
+ value ||= ""
21
+
22
+ # Extract tz info
23
+ if md = format.match(/^(.*[dyms])+(\s*[xX]+)$/)
24
+ format, tz_format = md[1], md[2]
25
+ end
26
+
27
+ date_format, time_format = format.split(' ')
28
+ date_format, time_format = nil, date_format if self.base.to_sym == :time
29
+
30
+ # Extract date, of specified
31
+ date_part = case date_format
32
+ when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
33
+ when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
34
+ when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
35
+ when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
36
+ when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
37
+ when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
38
+ when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
39
+ when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
40
+ when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
41
+ when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
42
+ when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
43
+ when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
44
+ when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
45
+ when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
46
+ when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
47
+ when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
48
+ when /yyyy-MM-ddTHH:mm:ss\.S+/
49
+ md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
50
+ num_ms = date_format.match(/S+/).to_s.length
51
+ md if md && md[:ms].length <= num_ms
52
+ else
53
+ raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
54
+ nil
55
+ end
56
+
57
+ # Forward past date part
58
+ if date_part
59
+ value = value[date_part.to_s.length..-1]
60
+ value = value.lstrip if date_part && value.start_with?(' ')
61
+ end
62
+
63
+ # Extract time, of specified
64
+ time_part = case time_format
65
+ when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
66
+ when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
67
+ when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
68
+ when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
69
+ when /HH:mm:ss\.S+/
70
+ md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
71
+ num_ms = time_format.match(/S+/).to_s.length
72
+ md if md && md[:ms].length <= num_ms
73
+ else
74
+ raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
75
+ nil
76
+ end
77
+
78
+ # If there's a date_format but no date_part, match fails
79
+ return nil if date_format && date_part.nil?
80
+
81
+ # If there's a time_format but no time_part, match fails
82
+ return nil if time_format && time_part.nil?
83
+
84
+ # Forward past time part
85
+ value = value[time_part.to_s.length..-1] if time_part
86
+
87
+ # Use datetime match for time
88
+ time_part = date_part if date_part && date_part.names.include?("hr")
89
+
90
+ # If there's a timezone, it may optionally start with whitespace
91
+ value = value.lstrip if tz_format.to_s.start_with?(' ')
92
+ tz_part = case tz_format.to_s.lstrip
93
+ when 'x' then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)$/)
94
+ when 'X' then value.match(/^(?:(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)|(?<z>Z))$/)
95
+ when 'xx' then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2}))|$/)
96
+ when 'XX' then value.match(/^(?:(?:(?<hr>[+-]\d{2})(?<mi>\d{2}))|(?<z>Z))$/)
97
+ when 'xxx' then value.match(/^(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))$/)
98
+ when 'XXX' then value.match(/^(?:(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))|(?<z>Z))$/)
99
+ else
100
+ raise ArgumentError, "unrecognized timezone format #{tz_format.to_s.lstrip}" if tz_format
101
+ nil
102
+ end
103
+
104
+ # If there's a tz_format but no time_part, match fails
105
+ return nil if tz_format && tz_part.nil?
106
+
107
+ # Compose normalized value
108
+ vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
109
+ vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
110
+
111
+ # Add milliseconds, if matched
112
+ vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
113
+
114
+ value = [vd, vt].compact.join('T')
115
+ value += tz_part[:z] ? "Z" : ("%s:%02d" % [tz_part[:hr], tz_part[:mi].to_i]) if tz_part
116
+ value
117
+ end
118
+
119
+ ##
120
+ # Parse the date format (if provided), and match against the value (if provided)
121
+ # Otherwise, validate format and raise an error
122
+ #
123
+ # @param [String] pattern
124
+ # @param [String] value
125
+ # @param [String] groupChar
126
+ # @param [String] decimalChar
127
+ # @return [String] XMLSchema version of value or nil, if value does not match
128
+ # @raise [ArgumentError] if format is not valid
129
+ def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
130
+ value ||= ""
131
+
132
+ re = build_number_re(pattern, groupChar, decimalChar)
133
+
134
+ # Upcase value and remove internal spaces
135
+ value = value.upcase
136
+
137
+ #require 'byebug'; byebug unless value.empty?
138
+ if value =~ re
139
+
140
+ # Upcase value and remove internal spaces
141
+ value = value.
142
+ upcase.
143
+ gsub(/\s+/, '').
144
+ gsub(groupChar, '').
145
+ gsub(decimalChar, '.')
146
+
147
+ # result re-assembles parts removed from value
148
+ value
149
+ else
150
+ # no match
151
+ nil
152
+ end
153
+ end
154
+
155
+ # Build a regular expression from the provided pattern to match value, after suitable modifications
156
+ #
157
+ # @param [String] pattern
158
+ # @param [String] groupChar
159
+ # @param [String] decimalChar
160
+ # @return [Regexp] Regular expression matching value
161
+ # @raise [ArgumentError] if format is not valid
162
+ def build_number_re(pattern, groupChar, decimalChar)
163
+ # pattern must be composed of only 0, #, decimalChar, groupChar, E, %, and ‰
164
+ ge = Regexp.escape groupChar
165
+ de = Regexp.escape decimalChar
166
+
167
+ default_pattern = /^
168
+ ([+-]?
169
+ [\d#{ge}]+
170
+ (#{de}[\d#{ge}]+
171
+ ([Ee][+-]?\d+)?
172
+ )?[%‰]?
173
+ |NAN|INF|-INF)
174
+ $/x
175
+
176
+ return default_pattern if pattern.nil?
177
+ numeric_pattern = /
178
+ # Mantissa
179
+ (\#|#{ge})*
180
+ (0|#{ge})*
181
+ # Fractional
182
+ (?:#{de}
183
+ (0|#{ge})*
184
+ (\#|#{ge})*
185
+ # Exponent
186
+ (E
187
+ [+-]?
188
+ (?:\#|#{ge})*
189
+ (?:0|#{ge})*
190
+ )?
191
+ )?
192
+ /x
193
+
194
+ legal_number_pattern = /^(?<prefix>[^\#0]*)(?<numeric_part>#{numeric_pattern})(?<suffix>.*)$/x
195
+
196
+ match = legal_number_pattern.match(pattern)
197
+ raise ArgumentError, "unrecognized number pattern #{pattern}" if match["numeric_part"].empty?
198
+
199
+ prefix, numeric_part, suffix = match["prefix"], match["numeric_part"], match["suffix"]
200
+ prefix = Regexp.escape prefix unless prefix.empty?
201
+ prefix += "[+-]?" unless prefix =~ /[+-]/
202
+ suffix = Regexp.escape suffix unless suffix.empty?
203
+
204
+ # Split on decimalChar and E
205
+ parts = numeric_part.split("E")
206
+ mantissa_part, exponent_part = parts[0], (parts[1] || '')
207
+
208
+ mantissa_parts = mantissa_part.split(decimalChar)
209
+ raise ArgumentError, "Multiple decimal separators in #{pattern}" if mantissa_parts.length > 2
210
+ integer_part, fractional_part = mantissa_parts[0], mantissa_parts[1] || ''
211
+
212
+ min_integer_digits = integer_part.gsub(groupChar, '').gsub('#', '').length
213
+ all_integer_digits = integer_part.gsub(groupChar, '').length
214
+ min_fractional_digits = fractional_part.gsub(groupChar, '').gsub('#', '').length
215
+ max_fractional_digits = fractional_part.gsub(groupChar, '').length
216
+ exponent_sign = exponent_part[0] if exponent_part =~ /^[+-]/
217
+ min_exponent_digits = exponent_part.sub(/[+-]/, '').gsub("#", "").length
218
+ max_exponent_digits = exponent_part.sub(/[+-]/, '').length
219
+
220
+ integer_parts = integer_part.split(groupChar)[1..-1]
221
+ primary_grouping_size = integer_parts[-1].to_s.length
222
+ secondary_grouping_size = integer_parts.length <= 1 ? primary_grouping_size : integer_parts[-2].length
223
+
224
+ fractional_parts = fractional_part.split(groupChar)[0..-2]
225
+ fractional_grouping_size = fractional_parts[0].to_s.length
226
+
227
+ # Construct regular expression for integer part
228
+ integer_str = if primary_grouping_size == 0
229
+ all_integer_digits > min_integer_digits ? "\\d{#{min_integer_digits},}" : "\\d{#{min_integer_digits}}"
230
+ else
231
+ # These number of groupings must be there
232
+ integer_parts = []
233
+ integer_rem = 0
234
+ while min_integer_digits > 0
235
+ sz = [primary_grouping_size, min_integer_digits].min
236
+ integer_rem = primary_grouping_size - sz
237
+ integer_parts << "\\d{#{sz}}"
238
+ min_integer_digits -= primary_grouping_size
239
+ all_integer_digits -= primary_grouping_size
240
+ primary_grouping_size = secondary_grouping_size
241
+ end
242
+ required_digits = integer_parts.reverse.join(ge)
243
+
244
+ if all_integer_digits > 0
245
+ # Add digits up to end of group creating
246
+ # (?:(?:\d)?)\d)? ...
247
+ integer_parts = []
248
+ while integer_rem > 0
249
+ integer_parts << '\d'
250
+ integer_rem -= 1
251
+ end
252
+
253
+ # If secondary_grouping_size is not primary_grouping_size, add digits up to secondary_grouping_size
254
+ if secondary_grouping_size != primary_grouping_size
255
+ primary_grouping_size = secondary_grouping_size
256
+ integer_rem = primary_grouping_size - 1
257
+ integer_parts << '\d' + ge
258
+
259
+ while integer_rem > 0
260
+ integer_parts << '\d'
261
+ integer_rem -= 1
262
+ end
263
+ end
264
+
265
+ # Allow repeated separated groups
266
+ if integer_parts.empty?
267
+ opt_digits = "(?:\\d{1,#{primary_grouping_size}}#{ge})?(?:\\d{#{primary_grouping_size}}#{ge})*"
268
+ else
269
+ integer_parts[-1] = "(?:\\d{1,#{primary_grouping_size}}#{ge})?(?:\\d{#{primary_grouping_size}}#{ge})*#{integer_parts[-1]}"
270
+ opt_digits = integer_parts.reverse.inject("") {|memo, part| "(?:#{memo}#{part})?"}
271
+ end
272
+
273
+ opt_digits + required_digits
274
+ else
275
+ required_digits
276
+ end
277
+ end
278
+
279
+ # Construct regular expression for fractional part
280
+ fractional_str = if max_fractional_digits > 0
281
+ if fractional_grouping_size == 0
282
+ min_fractional_digits == max_fractional_digits ? "\\d{#{max_fractional_digits}}" : "\\d{#{min_fractional_digits},#{max_fractional_digits}}"
283
+ else
284
+ # These number of groupings must be there
285
+ fractional_parts = []
286
+ fractional_rem = 0
287
+ while min_fractional_digits > 0
288
+ sz = [fractional_grouping_size, min_fractional_digits].min
289
+ fractional_rem = fractional_grouping_size - sz
290
+ fractional_parts << "\\d{#{sz}}"
291
+ max_fractional_digits -= sz
292
+ min_fractional_digits -= sz
293
+ end
294
+ required_digits = fractional_parts.join(ge)
295
+
296
+ # If max digits fill within existing group
297
+ fractional_parts = []
298
+ while max_fractional_digits > 0
299
+ fractional_parts << (fractional_rem == 0 ? ge + '\d' : '\d')
300
+ max_fractional_digits -= 1
301
+ fractional_rem = (fractional_rem - 1) % fractional_grouping_size
302
+ end
303
+
304
+ opt_digits = fractional_parts.reverse.inject("") {|memo, part| "(?:#{part}#{memo})?"}
305
+ required_digits + opt_digits
306
+ end
307
+ end.to_s
308
+ fractional_str = de + fractional_str unless fractional_str.empty?
309
+ fractional_str = "(?:#{fractional_str})?" if max_fractional_digits > 0 && min_fractional_digits == 0
310
+
311
+ # Exponent pattern
312
+ exponent_str = case
313
+ when max_exponent_digits > 0 && max_exponent_digits == min_exponent_digits
314
+ "E#{exponent_sign ? Regexp.escape(exponent_sign) : '[+-]?'}\\d{#{max_exponent_digits}}"
315
+ when max_exponent_digits > 0
316
+ "E#{exponent_sign ? Regexp.escape(exponent_sign) : '[+-]?'}\\d{#{min_exponent_digits},#{max_exponent_digits}}"
317
+ when min_exponent_digits > 0
318
+ "E#{exponent_sign ? Regexp.escape(exponent_sign) : '[+-]?'}\\d{#{min_exponent_digits},#{max_exponent_digits}}"
319
+ end
320
+
321
+ Regexp.new("^(?<prefix>#{prefix})(?<numeric_part>#{integer_str}#{fractional_str}#{exponent_str})(?<suffix>#{suffix})$")
322
+ end
323
+ end
324
+ end
@@ -1,4 +1,4 @@
1
- # coding: utf-8
1
+ # encoding: UTF-8
2
2
  $:.unshift "."
3
3
  require 'spec_helper'
4
4
 
@@ -308,9 +308,10 @@ describe RDF::Tabular::Metadata do
308
308
 
309
309
  its(:type) {is_expected.to eql :Schema}
310
310
 
311
- it "is invalid if referenced column does not exist" do
311
+ it "is valid if referenced column does not exist" do
312
312
  subject[:columns] = []
313
- expect(subject).not_to be_valid
313
+ expect(subject).to be_valid
314
+ expect(subject.warnings).not_to be_empty
314
315
  end
315
316
 
316
317
  it "is valid with multiple names" do
@@ -322,13 +323,14 @@ describe RDF::Tabular::Metadata do
322
323
  expect(v).to be_valid
323
324
  end
324
325
 
325
- it "is invalid with multiple names if any column missing" do
326
+ it "is valid with multiple names if any column missing" do
326
327
  v = described_class.new({
327
328
  "columns" => [column],
328
329
  "primaryKey" => [column["name"], column2["name"]]},
329
330
  base: RDF::URI("http://example.org/base",
330
331
  debug: @debug))
331
- expect(v).not_to be_valid
332
+ expect(v).to be_valid
333
+ expect(v.warnings).not_to be_empty
332
334
  end
333
335
  end
334
336
 
@@ -369,10 +371,10 @@ describe RDF::Tabular::Metadata do
369
371
  "columnReference" => ["b1", "b2"]
370
372
  }
371
373
  },
372
- "references single column with tableSchema" => {
374
+ "references single column with schemaReference" => {
373
375
  "columnReference" => "a1",
374
376
  "reference" => {
375
- "tableSchema" => "b_s",
377
+ "schemaReference" => "b_s",
376
378
  "columnReference" => "b1"
377
379
  }
378
380
  }
@@ -454,7 +456,7 @@ describe RDF::Tabular::Metadata do
454
456
  specify {is_expected.to be_valid}
455
457
  it_behaves_like("inherited properties", false)
456
458
  it_behaves_like("common properties")
457
- its(:type) {is_expected.to eql :Transformation}
459
+ its(:type) {is_expected.to eql :Template}
458
460
 
459
461
  {
460
462
  source: {
@@ -772,13 +774,13 @@ describe RDF::Tabular::Metadata do
772
774
  {
773
775
  ":type TableGroup" => [{}, {type: :TableGroup}, RDF::Tabular::TableGroup],
774
776
  ":type Table" => [{}, {type: :Table}, RDF::Tabular::Table],
775
- ":type Transformation" => [{}, {type: :Transformation}, RDF::Tabular::Transformation],
777
+ ":type Template" => [{}, {type: :Template}, RDF::Tabular::Transformation],
776
778
  ":type Schema" => [{}, {type: :Schema}, RDF::Tabular::Schema],
777
779
  ":type Column" => [{}, {type: :Column}, RDF::Tabular::Column],
778
780
  ":type Dialect" => [{}, {type: :Dialect}, RDF::Tabular::Dialect],
779
781
  "@type TableGroup" => [{"@type" => "TableGroup"}, RDF::Tabular::TableGroup],
780
782
  "@type Table" => [{"@type" => "Table"}, RDF::Tabular::Table],
781
- "@type Transformation" => [{"@type" => "Transformation"}, RDF::Tabular::Transformation],
783
+ "@type Template" => [{"@type" => "Template"}, RDF::Tabular::Transformation],
782
784
  "@type Schema" => [{"@type" => "Schema"}, RDF::Tabular::Schema],
783
785
  "@type Column" => [{"@type" => "Column"}, RDF::Tabular::Column],
784
786
  "@type Dialect" => [{"@type" => "Dialect"}, RDF::Tabular::Dialect],
@@ -1058,12 +1060,6 @@ describe RDF::Tabular::Metadata do
1058
1060
  format: {"pattern" => '000'},
1059
1061
  value: "123"
1060
1062
  },
1061
- "decimal with wrong pattern" => {
1062
- base: "decimal",
1063
- format: {"pattern" => '0000'},
1064
- value: "123",
1065
- errors: [/123 does not match pattern/]
1066
- },
1067
1063
  "decimal with explicit groupChar" => {
1068
1064
  base: "decimal",
1069
1065
  format: {"groupChar" => ";"},
@@ -1148,20 +1144,20 @@ describe RDF::Tabular::Metadata do
1148
1144
  "valid boolean Y|N N" => {base: "boolean", value: "N", format: "Y|N", result: "false"},
1149
1145
 
1150
1146
  # Dates
1151
- "validate date yyyy-MM-dd" => {base: "date", value: "2015-03-22", format: "yyyy-MM-dd", result: "2015-03-22"},
1152
- "validate date yyyyMMdd" => {base: "date", value: "20150322", format: "yyyyMMdd", result: "2015-03-22"},
1153
- "validate date dd-MM-yyyy" => {base: "date", value: "22-03-2015", format: "dd-MM-yyyy", result: "2015-03-22"},
1154
- "validate date d-M-yyyy" => {base: "date", value: "22-3-2015", format: "d-M-yyyy", result: "2015-03-22"},
1155
- "validate date MM-dd-yyyy" => {base: "date", value: "03-22-2015", format: "MM-dd-yyyy", result: "2015-03-22"},
1156
- "validate date M-d-yyyy" => {base: "date", value: "3-22-2015", format: "M-d-yyyy", result: "2015-03-22"},
1157
- "validate date dd/MM/yyyy" => {base: "date", value: "22/03/2015", format: "dd/MM/yyyy", result: "2015-03-22"},
1158
- "validate date d/M/yyyy" => {base: "date", value: "22/3/2015", format: "d/M/yyyy", result: "2015-03-22"},
1159
- "validate date MM/dd/yyyy" => {base: "date", value: "03/22/2015", format: "MM/dd/yyyy", result: "2015-03-22"},
1160
- "validate date M/d/yyyy" => {base: "date", value: "3/22/2015", format: "M/d/yyyy", result: "2015-03-22"},
1161
- "validate date dd.MM.yyyy" => {base: "date", value: "22.03.2015", format: "dd.MM.yyyy", result: "2015-03-22"},
1162
- "validate date d.M.yyyy" => {base: "date", value: "22.3.2015", format: "d.M.yyyy", result: "2015-03-22"},
1163
- "validate date MM.dd.yyyy" => {base: "date", value: "03.22.2015", format: "MM.dd.yyyy", result: "2015-03-22"},
1164
- "validate date M.d.yyyy" => {base: "date", value: "3.22.2015", format: "M.d.yyyy", result: "2015-03-22"},
1147
+ "valid date yyyy-MM-dd" => {base: "date", value: "2015-03-22", format: "yyyy-MM-dd", result: "2015-03-22"},
1148
+ "valid date yyyyMMdd" => {base: "date", value: "20150322", format: "yyyyMMdd", result: "2015-03-22"},
1149
+ "valid date dd-MM-yyyy" => {base: "date", value: "22-03-2015", format: "dd-MM-yyyy", result: "2015-03-22"},
1150
+ "valid date d-M-yyyy" => {base: "date", value: "22-3-2015", format: "d-M-yyyy", result: "2015-03-22"},
1151
+ "valid date MM-dd-yyyy" => {base: "date", value: "03-22-2015", format: "MM-dd-yyyy", result: "2015-03-22"},
1152
+ "valid date M-d-yyyy" => {base: "date", value: "3-22-2015", format: "M-d-yyyy", result: "2015-03-22"},
1153
+ "valid date dd/MM/yyyy" => {base: "date", value: "22/03/2015", format: "dd/MM/yyyy", result: "2015-03-22"},
1154
+ "valid date d/M/yyyy" => {base: "date", value: "22/3/2015", format: "d/M/yyyy", result: "2015-03-22"},
1155
+ "valid date MM/dd/yyyy" => {base: "date", value: "03/22/2015", format: "MM/dd/yyyy", result: "2015-03-22"},
1156
+ "valid date M/d/yyyy" => {base: "date", value: "3/22/2015", format: "M/d/yyyy", result: "2015-03-22"},
1157
+ "valid date dd.MM.yyyy" => {base: "date", value: "22.03.2015", format: "dd.MM.yyyy", result: "2015-03-22"},
1158
+ "valid date d.M.yyyy" => {base: "date", value: "22.3.2015", format: "d.M.yyyy", result: "2015-03-22"},
1159
+ "valid date MM.dd.yyyy" => {base: "date", value: "03.22.2015", format: "MM.dd.yyyy", result: "2015-03-22"},
1160
+ "valid date M.d.yyyy" => {base: "date", value: "3.22.2015", format: "M.d.yyyy", result: "2015-03-22"},
1165
1161
 
1166
1162
  # Times
1167
1163
  "valid time HH:mm:ss.S" => {base: "time", value: "15:02:37.1", format: "HH:mm:ss.S", result: "15:02:37.1"},
@@ -1188,11 +1184,16 @@ describe RDF::Tabular::Metadata do
1188
1184
 
1189
1185
  # Timezones
1190
1186
  "valid w/TZ yyyy-MM-ddX" => {base: "date", value: "2015-03-22Z", format: "yyyy-MM-ddX", result: "2015-03-22Z"},
1191
- "valid w/TZ dd.MM.yyyy XXXXX" => {base: "date", value: "22.03.2015 Z", format: "dd.MM.yyyy XXXXX", result: "2015-03-22Z"},
1192
- "valid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05:00", format: "HH:mm:ssX", result: "15:02:37-05:00"},
1193
- "valid w/TZ HHmm XX" => {base: "time", value: "1502 +08:00", format: "HHmm XX", result: "15:02:00+08:00"},
1187
+ "valid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05", format: "HH:mm:ssX", result: "15:02:37-05:00"},
1188
+ "valid w/TZ yyyy-MM-dd HH:mm:ss X" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 +0800", format: "yyyy-MM-dd HH:mm:ss X", result: "2015-03-15T15:02:37+08:00"},
1189
+ "valid w/TZ HHmm XX" => {base: "time", value: "1502 +0800", format: "HHmm XX", result: "15:02:00+08:00"},
1190
+ "valid w/TZ yyyy-MM-dd HH:mm:ss XX" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 -0800", format: "yyyy-MM-dd HH:mm:ss XX", result: "2015-03-15T15:02:37-08:00"},
1191
+ "valid w/TZ HHmm XXX" => {base: "time", value: "1502 +08:00", format: "HHmm XXX", result: "15:02:00+08:00"},
1194
1192
  "valid w/TZ yyyy-MM-ddTHH:mm:ssXXX" => {base: "dateTime", value: "2015-03-15T15:02:37-05:00", format: "yyyy-MM-ddTHH:mm:ssXXX", result: "2015-03-15T15:02:37-05:00"},
1195
- "valid w/TZ yyyy-MM-dd HH:mm:ss X" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 +08:00", format: "yyyy-MM-dd HH:mm:ss X", result: "2015-03-15T15:02:37+08:00"},
1193
+ "invalid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05:00", format: "HH:mm:ssX", errors: ["15:02:37-05:00 does not match format HH:mm:ssX"]},
1194
+ "invalid w/TZ HH:mm:ssXX" => {base: "time", value: "15:02:37-05", format: "HH:mm:ssXX", errors: ["15:02:37-05 does not match format HH:mm:ssXX"]},
1195
+
1196
+ # Other date-like things
1196
1197
  "valid gDay" => {base: "gDay", value: "---31"},
1197
1198
  "valid gMonth" => {base: "gMonth", value: "--02"},
1198
1199
  "valid gMonthDay" => {base: "gMonthDay", value: "--02-21"},
@@ -1216,7 +1217,43 @@ describe RDF::Tabular::Metadata do
1216
1217
  "valid anyAtomicType" => {base: "anyAtomicType", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.anyAtomicType)},
1217
1218
  "valid anyURI" => {base: "anyURI", value: "http://example.com/", result: RDF::Literal("http://example.com/", datatype: RDF::XSD.anyURI)},
1218
1219
  "valid base64Binary" => {base: "base64Binary", value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", result: RDF::Literal("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", datatype: RDF::XSD.base64Binary)},
1220
+ "base64Binary with matching length:" => {
1221
+ base: "base64Binary",
1222
+ value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g",
1223
+ length: 45,
1224
+ result: RDF::Literal("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", datatype: RDF::XSD.base64Binary)
1225
+ },
1226
+ "base64Binary with wrong maxLength:" => {
1227
+ base: "base64Binary",
1228
+ value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g",
1229
+ maxLength: 1,
1230
+ errors: ["decoded Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g has length 45 not <= 1"]
1231
+ },
1232
+ "base64Binary with wrong minLength" => {
1233
+ base: "base64Binary",
1234
+ value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g",
1235
+ minLength: 50,
1236
+ errors: ["decoded Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g has length 45 not >= 50"]
1237
+ },
1219
1238
  "valid hexBinary" => {base: "hexBinary", value: "0FB7", result: RDF::Literal("0FB7", datatype: RDF::XSD.hexBinary)},
1239
+ "hexBinary with matching length:" => {
1240
+ base: "hexBinary",
1241
+ value: "0FB7",
1242
+ length: 2,
1243
+ result: RDF::Literal("0FB7", datatype: RDF::XSD.hexBinary)
1244
+ },
1245
+ "hexBinary with wrong maxLength:" => {
1246
+ base: "hexBinary",
1247
+ value: "0FB7",
1248
+ maxLength: 1,
1249
+ errors: ["decoded 0FB7 has length 2 not <= 1"]
1250
+ },
1251
+ "hexBinary with wrong minLength" => {
1252
+ base: "hexBinary",
1253
+ value: "0FB7",
1254
+ minLength: 4,
1255
+ errors: ["decoded 0FB7 has length 2 not >= 4"]
1256
+ },
1220
1257
  "valid QName" => {base: "QName", value: "foo:bar", result: RDF::Literal("foo:bar", datatype: RDF::XSD.QName)},
1221
1258
  "valid normalizedString" => {base: "normalizedString", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.normalizedString)},
1222
1259
  "valid token" => {base: "token", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.token)},
@@ -1244,7 +1281,7 @@ describe RDF::Tabular::Metadata do
1244
1281
  }
1245
1282
  let(:md) {
1246
1283
  RDF::Tabular::Table.new({
1247
- url: "http://example.com/table.csv",
1284
+ url: "http://example.com/table.csv",
1248
1285
  dialect: {header: false},
1249
1286
  tableSchema: {
1250
1287
  columns: [{
@@ -1293,51 +1330,99 @@ describe RDF::Tabular::Metadata do
1293
1330
  end
1294
1331
  end
1295
1332
 
1296
- describe "#build_number_re" do
1297
- subject {RDF::Tabular::Datatype.new({})}
1333
+ context "Number formats" do
1298
1334
  {
1299
- '#,##0.##' => /^\d{1,}\.\d{,2}$/,
1300
- '#,##0.###' => /^\d{1,}\.\d{,3}$/,
1301
- '###0.#####' => /^\d{1,}\.\d{,5}$/,
1302
- '###0.0000#' => /^\d{1,}\.\d{4,5}$/,
1303
- '00000.0000' => /^\d{5}\.\d{4}$/,
1304
-
1305
- '0' => /^\d{1}$/,
1306
- '00' => /^\d{2}$/,
1307
- '#' => /^\d*$/,
1308
- '##' => /^\d*$/,
1309
-
1310
- '.0' => /^\.\d{1}$/,
1311
- '.00' => /^\.\d{2}$/,
1312
- '.#' => /^\.\d{,1}$/,
1313
- '.##' => /^\.\d{,2}$/,
1314
-
1315
- '+0' => /^+\d{1}$/,
1316
- '-0' => /^-\d{1}$/,
1317
- '%0' => /^%\d{1}$/,
1318
- '0' => /^‰\d{1}$/,
1319
- '0%' => /^\d{1}%$/,
1320
- '0‰' => /^\d{1}‰$/,
1321
- '0.0%' => /^\d{1}\.\d{1}%$/,
1322
-
1323
- '#0.0#E#0' => /^\d{1,}\.\d{1,2}E\d{1,2}$/,
1324
- '#0.0#E+#' => /^\d{1,}\.\d{1,2}E+\d{,1}$/,
1325
- '#0.0#E-00' => /^\d{1,}\.\d{1,2}E-\d{2}$/,
1326
- '#0.0#E#0%' => /^\d{1,}\.\d{1,2}E\d{1,2}%$/,
1327
- }.each do |pattern,regexp|
1328
- it "generates #{regexp} for #{pattern}" do
1329
- expect(subject.build_number_re(pattern, ",", ".")).to eql regexp
1330
- end
1331
- end
1335
+ '0' => {valid: %w(1 -1 +1), invalid: %w(12 1.2), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1})(?<suffix>)$/},
1336
+ '00' => {valid: %w(12), invalid: %w(1 123 1,2), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{2})(?<suffix>)$/},
1337
+ '#' => {valid: %w(1 12 123), invalid: %w(1.2), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{0,})(?<suffix>)$/},
1338
+ '##' => {re: /^(?<prefix>[+-]?)(?<numeric_part>\d{0,})(?<suffix>)$/},
1339
+ '#0' => {re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,})(?<suffix>)$/},
1340
+
1341
+ '0.0' => {valid: %w(1.1 -1.1), invalid: %w(12.1 1.12), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{1})(?<suffix>)$/},
1342
+ '0.00' => {valid: %w(1.12 +1.12), invalid: %w(12.12 1.1 1.123), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{2})(?<suffix>)$/},
1343
+ '0.#' => {valid: %w(1 1.1), invalid: %w(12.1 1.12), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}(?:\.\d{0,1})?)(?<suffix>)$/},
1344
+ '0.##' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}(?:\.\d{0,2})?)(?<suffix>)$/},
1345
+
1346
+ '+0' => {valid: %w(+1), invalid: %w(1 -1 +10), base: "decimal", re: /^(?<prefix>\+)(?<numeric_part>\d{1})(?<suffix>)$/},
1347
+ '-0' => {valid: %w(-1), invalid: %w(1 +1 -10), base: "decimal", re: /^(?<prefix>\-)(?<numeric_part>\d{1})(?<suffix>)$/},
1348
+ '%000' => {valid: %w(%123 %+123 %-123), invalid: %w(%12 %1234 123%), base: "decimal", re: /^(?<prefix>%[+-]?)(?<numeric_part>\d{3})(?<suffix>)$/},
1349
+ '‰000' => {valid: %w(‰123 ‰+123 ‰-123), invalid: %w(‰12 ‰1234 123‰), base: "decimal", re: /^(?<prefix>‰[+-]?)(?<numeric_part>\d{3})(?<suffix>)$/},
1350
+ '000%' => {valid: %w(123% +123% -123%), invalid: %w(12% 1234% %123), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{3})(?<suffix>%)$/},
1351
+ '000‰' => {valid: %w(123‰ +123‰ -123‰), invalid: %w(12‰ 1234‰ ‰123), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{3})(?<suffix>‰)$/},
1352
+ '000.0%' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{3}\.\d{1})(?<suffix>%)$/},
1353
+
1354
+ '###0.#####' => {valid: %w(1 1.1 12345.12345), invalid: %w(1,234.1 1.123456), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}(?:\.\d{0,5})?)(?<suffix>)$/},
1355
+ '###0.0000#' => {valid: %w(1.1234 1.12345 12345.12345), invalid: %w(1,234.1234 1.12), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{4,5})(?<suffix>)$/},
1356
+ '00000.0000' => {valid: %w(12345.1234), invalid: %w(1.2 1,234.123,4), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{5}\.\d{4})(?<suffix>)$/},
1357
+
1358
+ '#0.0#E#0' => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E[+-]?\d{1,2})(?<suffix>)$/},
1359
+ '#0.0#E+#0' => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E\+\d{1,2})(?<suffix>)$/},
1360
+ '#0.0#E#0%' => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E[+-]?\d{1,2}%)(?<suffix>)$/},
1361
+ '#0.0#E#0%' => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E[+-]?\d{1,2})(?<suffix>%)$/},
1362
+
1363
+ # Grouping
1364
+ '#,##,##0' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d)?\d{1})(?<suffix>)$/},
1365
+ '#,##,#00' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d{2})(?<suffix>)$/},
1366
+ '#,##,000' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:\d{1,2},)?(?:\d{2},)*\d{3})(?<suffix>)$/},
1367
+ '#,#0,000' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d{1},\d{3})(?<suffix>)$/},
1368
+ '#,00,000' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:\d{1,2},)?(?:\d{2},)*\d{2},\d{3})(?<suffix>)$/},
1369
+ '0,00,000' => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1},\d{2},\d{3})(?<suffix>)$/},
1370
+
1371
+ '0.0##,###' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{1}(?:\d(?:\d(?:,\d(?:\d(?:\d)?)?)?)?)?)(?<suffix>)$/},
1372
+ '0.00#,###' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{2}(?:\d(?:,\d(?:\d(?:\d)?)?)?)?)(?<suffix>)$/},
1373
+ '0.000,###' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3}(?:,\d(?:\d(?:\d)?)?)?)(?<suffix>)$/},
1374
+ '0.000,0##' => {base: "decimal", re:/^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3},\d{1}(?:\d(?:\d)?)?)(?<suffix>)$/},
1375
+ '0.000,00#' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3},\d{2}(?:\d)?)(?<suffix>)$/},
1376
+ '0.000,000' => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3},\d{3})(?<suffix>)$/},
1377
+
1378
+ # Jeni's
1379
+ '##0' => {valid: %w(1 12 123 1234), invalid: %w(1,234 123.4), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,})(?<suffix>)$/},
1380
+ '#,#00' => {valid: %w(12 123 1,234 1,234,567), invalid: %w(1 1234 12,34 12,34,567), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:\d{1,3},)?(?:\d{3},)*\d)?\d{2})(?<suffix>)$/},
1381
+ '#0.#' => {valid: %w(1 1.2 1234.5), invalid: %w(12.34 1,234.5), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}(?:\.\d{0,1})?)(?<suffix>)$/},
1382
+ '#0.0#,#' => {valid: %w(12.3 12.34 12.34,5), invalid: %w(1 12.345 12.34,56,7 12.34,567), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1}(?:\d(?:,\d)?)?)(?<suffix>)$/},
1383
+ }.each do |pattern, props|
1384
+ context pattern do
1385
+ subject {RDF::Tabular::Datatype.new({})}
1386
+ describe "#build_number_re" do
1387
+ it "generates #{props[:re]} for #{pattern}" do
1388
+ expect(subject.build_number_re(pattern, ",", ".")).to eql props[:re]
1389
+ end if props[:re].is_a?(Regexp)
1390
+
1391
+ it "recognizes bad pattern #{pattern}" do
1392
+ expect{subject.build_number_re(pattern, ",", ".")}.to raise_error(ArgumentError)
1393
+ end if props[:re] == ArgumentError
1394
+ end
1332
1395
 
1333
- %W{
1334
- +%0
1335
- 0#
1336
- 0E0
1337
- 0-
1338
- }.each do |pattern|
1339
- it "recognizes bad pattern #{pattern}" do
1340
- expect{subject.build_number_re(pattern, ",", ".")}.to raise_error(ArgumentError)
1396
+ describe "Metadata" do
1397
+ let(:md) {
1398
+ RDF::Tabular::Table.new({
1399
+ url: "http://example.com/table.csv",
1400
+ dialect: {header: false},
1401
+ tableSchema: {
1402
+ columns: [{
1403
+ name: "name",
1404
+ datatype: {"base" => props[:base], "format" => {"pattern" => pattern}}
1405
+ }]
1406
+ }
1407
+ }, debug: @debug)
1408
+ }
1409
+ describe "valid" do
1410
+ Array(props[:valid]).each do |num|
1411
+ it "for #{num}" do
1412
+ cell = md.to_enum(:each_row, "\"#{num}\"\n").to_a.first.values.first
1413
+ expect(cell).to be_valid
1414
+ end
1415
+ end
1416
+ end
1417
+ describe "invalid" do
1418
+ Array(props[:invalid]).each do |num|
1419
+ it "for #{num}" do
1420
+ cell = md.to_enum(:each_row, "\"#{num}\"\n").to_a.first.values.first
1421
+ expect(cell).not_to be_valid
1422
+ end
1423
+ end
1424
+ end
1425
+ end
1341
1426
  end
1342
1427
  end
1343
1428
  end