rdf-tabular 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +8 -1
- data/VERSION +1 -1
- data/etc/earl.ttl +1255 -708
- data/lib/rdf/tabular.rb +12 -11
- data/lib/rdf/tabular/metadata.rb +107 -254
- data/lib/rdf/tabular/reader.rb +2 -0
- data/lib/rdf/tabular/uax35.rb +324 -0
- data/spec/metadata_spec.rb +163 -78
- data/spec/suite_spec.rb +6 -12
- metadata +3 -2
data/lib/rdf/tabular.rb
CHANGED
@@ -12,17 +12,18 @@ module RDF
|
|
12
12
|
module Tabular
|
13
13
|
require 'rdf/tabular/format'
|
14
14
|
require 'rdf/tabular/utils'
|
15
|
-
autoload :Column,
|
16
|
-
autoload :CSVW,
|
17
|
-
autoload :Dialect,
|
18
|
-
autoload :JSON,
|
19
|
-
autoload :Metadata,
|
20
|
-
autoload :Reader,
|
21
|
-
autoload :Schema,
|
22
|
-
autoload :Table,
|
23
|
-
autoload :TableGroup,
|
24
|
-
autoload :Transformation,
|
25
|
-
autoload :
|
15
|
+
autoload :Column, 'rdf/tabular/metadata'
|
16
|
+
autoload :CSVW, 'rdf/tabular/csvw'
|
17
|
+
autoload :Dialect, 'rdf/tabular/metadata'
|
18
|
+
autoload :JSON, 'rdf/tabular/literal'
|
19
|
+
autoload :Metadata, 'rdf/tabular/metadata'
|
20
|
+
autoload :Reader, 'rdf/tabular/reader'
|
21
|
+
autoload :Schema, 'rdf/tabular/metadata'
|
22
|
+
autoload :Table, 'rdf/tabular/metadata'
|
23
|
+
autoload :TableGroup, 'rdf/tabular/metadata'
|
24
|
+
autoload :Transformation, 'rdf/tabular/metadata'
|
25
|
+
autoload :UAX35, 'rdf/tabular/uax35'
|
26
|
+
autoload :VERSION, 'rdf/tabular/version'
|
26
27
|
|
27
28
|
# Metadata errors detected
|
28
29
|
class Error < RDF::ReaderError; end
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -179,6 +179,7 @@ module RDF::Tabular
|
|
179
179
|
# @return [Metadata]
|
180
180
|
def self.for_input(input, options = {})
|
181
181
|
base = options[:base]
|
182
|
+
warnings = options.fetch(:warnings, [])
|
182
183
|
|
183
184
|
# Use user metadata, if provided
|
184
185
|
metadata = case options[:metadata]
|
@@ -192,14 +193,21 @@ module RDF::Tabular
|
|
192
193
|
# Search for metadata until found
|
193
194
|
|
194
195
|
# load link metadata, if available
|
195
|
-
all_locs = []
|
196
196
|
if !metadata && input.respond_to?(:links) &&
|
197
197
|
link = input.links.find_link(%w(rel describedby))
|
198
198
|
link_loc = RDF::URI(base).join(link.href).to_s
|
199
199
|
md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
200
|
-
|
201
|
-
|
202
|
-
|
200
|
+
if md
|
201
|
+
# Metadata must describe file to be useful
|
202
|
+
if md.describes_file?(base)
|
203
|
+
metadata = md
|
204
|
+
else
|
205
|
+
warnings << "Found metadata at #{link_loc}, which does not describe #{base}, ignoring"
|
206
|
+
if options[:validate] && !options[:warnings]
|
207
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
203
211
|
end
|
204
212
|
|
205
213
|
locs = []
|
@@ -217,8 +225,18 @@ module RDF::Tabular
|
|
217
225
|
metadata ||= begin
|
218
226
|
md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
219
227
|
# Metadata must describe file to be useful
|
220
|
-
|
221
|
-
|
228
|
+
if md
|
229
|
+
# Metadata must describe file to be useful
|
230
|
+
if md.describes_file?(base)
|
231
|
+
md
|
232
|
+
else
|
233
|
+
warnings << "Found metadata at #{loc}, which does not describe #{base}, ignoring"
|
234
|
+
if options[:validate] && !options[:warnings]
|
235
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
236
|
+
end
|
237
|
+
nil
|
238
|
+
end
|
239
|
+
end
|
222
240
|
rescue IOError
|
223
241
|
debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
224
242
|
nil
|
@@ -226,15 +244,6 @@ module RDF::Tabular
|
|
226
244
|
end
|
227
245
|
end
|
228
246
|
|
229
|
-
# If Metadata was found, but no metadata describes the file, issue a warning
|
230
|
-
if !all_locs.empty? && !metadata
|
231
|
-
warnings = options.fetch(:warnings, [])
|
232
|
-
warnings << "Found metadata at #{all_locs.join(",")}, which does not describe #{base}, ignoring"
|
233
|
-
if options[:validate] && !options[:warnings]
|
234
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
238
247
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
239
248
|
metadata = case
|
240
249
|
when metadata then metadata
|
@@ -279,7 +288,7 @@ module RDF::Tabular
|
|
279
288
|
type ||= case
|
280
289
|
when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
|
281
290
|
when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
|
282
|
-
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :
|
291
|
+
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Template
|
283
292
|
when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema
|
284
293
|
when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
|
285
294
|
when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
|
@@ -289,7 +298,7 @@ module RDF::Tabular
|
|
289
298
|
case type.to_s.to_sym
|
290
299
|
when :TableGroup, :"" then RDF::Tabular::TableGroup
|
291
300
|
when :Table then RDF::Tabular::Table
|
292
|
-
when :
|
301
|
+
when :Template then RDF::Tabular::Transformation
|
293
302
|
when :Schema then RDF::Tabular::Schema
|
294
303
|
when :Column then RDF::Tabular::Column
|
295
304
|
when :Dialect then RDF::Tabular::Dialect
|
@@ -329,29 +338,41 @@ module RDF::Tabular
|
|
329
338
|
|
330
339
|
# Get context from input
|
331
340
|
# Optimize by using built-in version of context, and just extract @base, @lang
|
341
|
+
opt_base = @options[:base]
|
342
|
+
opt_base ||= input.base_uri if input.respond_to?(:base_uri)
|
343
|
+
opt_base ||= input.filename if input.respond_to?(:filename)
|
344
|
+
|
332
345
|
@context = case input['@context']
|
333
346
|
when Array
|
334
347
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
335
|
-
LOCAL_CONTEXT.dup
|
348
|
+
c = LOCAL_CONTEXT.dup
|
349
|
+
c.base = RDF::URI(opt_base)
|
350
|
+
obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
|
351
|
+
raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
|
352
|
+
c.parse(obj)
|
336
353
|
when Hash
|
337
|
-
warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
338
|
-
LOCAL_CONTEXT.dup
|
339
|
-
|
354
|
+
warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
355
|
+
c = LOCAL_CONTEXT.dup
|
356
|
+
c.base = RDF::URI(opt_base)
|
357
|
+
c.parse(input['@context'])
|
358
|
+
when "http://www.w3.org/ns/csvw"
|
359
|
+
LOCAL_CONTEXT.dup
|
360
|
+
c = LOCAL_CONTEXT.dup
|
361
|
+
c.base = RDF::URI(opt_base)
|
362
|
+
c
|
340
363
|
else
|
341
364
|
if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
|
342
365
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
343
366
|
LOCAL_CONTEXT.dup
|
367
|
+
c = LOCAL_CONTEXT.dup
|
368
|
+
c.base = RDF::URI(opt_base)
|
369
|
+
c
|
344
370
|
end
|
345
371
|
end
|
346
372
|
|
347
373
|
reason = @options.delete(:reason)
|
348
374
|
|
349
|
-
@options[:base]
|
350
|
-
@options[:base] ||= input.base_uri if input.respond_to?(:base_uri)
|
351
|
-
@options[:base] ||= input.filename if input.respond_to?(:filename)
|
352
|
-
@options[:base] = RDF::URI(@options[:base])
|
353
|
-
|
354
|
-
@context.base = @options[:base] if @context
|
375
|
+
@options[:base] = @context ? @context.base : RDF::URI(opt_base)
|
355
376
|
|
356
377
|
if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
|
357
378
|
warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
|
@@ -541,7 +562,7 @@ module RDF::Tabular
|
|
541
562
|
end
|
542
563
|
|
543
564
|
# Type of this Metadata
|
544
|
-
# @return [:TableGroup, :Table, :
|
565
|
+
# @return [:TableGroup, :Table, :Template, :Schema, :Column]
|
545
566
|
def type; self.class.name.split('::').last.to_sym; end
|
546
567
|
|
547
568
|
# Base URL of metadata
|
@@ -644,6 +665,7 @@ module RDF::Tabular
|
|
644
665
|
end
|
645
666
|
|
646
667
|
if reference.is_a?(Hash)
|
668
|
+
errors << "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
|
647
669
|
ref_cols = reference['columnReference']
|
648
670
|
schema = if reference.has_key?('resource')
|
649
671
|
if reference.has_key?('schemaReference')
|
@@ -802,7 +824,10 @@ module RDF::Tabular
|
|
802
824
|
# A column reference property that holds either a single reference to a column description object or an array of references.
|
803
825
|
"#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
|
804
826
|
Array(value).each do |k|
|
805
|
-
|
827
|
+
unless self.columns.any? {|c| c[:name] == k}
|
828
|
+
warn "#{type} has invalid property '#{key}': column reference not found #{k}"
|
829
|
+
object.delete(key)
|
830
|
+
end
|
806
831
|
end
|
807
832
|
when :@context
|
808
833
|
# Skip these
|
@@ -819,10 +844,13 @@ module RDF::Tabular
|
|
819
844
|
when :@type
|
820
845
|
# Must not be a BNode
|
821
846
|
if value.to_s.start_with?("_:")
|
822
|
-
errors << "#{type} has invalid property '
|
847
|
+
errors << "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
|
823
848
|
end
|
824
|
-
|
825
|
-
|
849
|
+
case type
|
850
|
+
when :Transformation
|
851
|
+
errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
|
852
|
+
else
|
853
|
+
errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
|
826
854
|
end
|
827
855
|
when ->(k) {key.to_s.include?(':')}
|
828
856
|
begin
|
@@ -1005,12 +1033,27 @@ module RDF::Tabular
|
|
1005
1033
|
non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
|
1006
1034
|
object_columns = Array(other.tableSchema.columns)
|
1007
1035
|
|
1008
|
-
# Special case, if there is no header, then there are no column definitions, allow this as being
|
1009
|
-
|
1010
|
-
|
1036
|
+
# Special case, if there is no header, then there are no column definitions, allow this as being compatible
|
1037
|
+
if non_virtual_columns.length != object_columns.length && !object_columns.empty?
|
1038
|
+
if @options[:validate]
|
1039
|
+
raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1040
|
+
else
|
1041
|
+
warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1042
|
+
|
1043
|
+
# If present, a virtual column MUST appear after all other non-virtual column definitions
|
1044
|
+
raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
|
1045
|
+
virtual_columns = Array(tableSchema.columns).select(&:virtual)
|
1046
|
+
while non_virtual_columns.length < object_columns.length
|
1047
|
+
non_virtual_columns << nil
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
# Create necessary column entries
|
1051
|
+
tableSchema.columns = non_virtual_columns + virtual_columns
|
1052
|
+
end
|
1053
|
+
end
|
1011
1054
|
index = 0
|
1012
1055
|
object_columns.all? do |cb|
|
1013
|
-
ca = non_virtual_columns[index]
|
1056
|
+
ca = non_virtual_columns[index] || Column.new({})
|
1014
1057
|
ta = ca.titles || {}
|
1015
1058
|
tb = cb.titles || {}
|
1016
1059
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
@@ -1141,8 +1184,7 @@ module RDF::Tabular
|
|
1141
1184
|
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
1142
1185
|
raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
|
1143
1186
|
elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
|
1144
|
-
|
1145
|
-
value.delete('@language')
|
1187
|
+
raise Error, "Value object with @language must use valid language: #{value.to_json}"
|
1146
1188
|
elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
|
1147
1189
|
raise Error, "Value object with @type must defined type: #{value.to_json}"
|
1148
1190
|
end
|
@@ -1232,10 +1274,11 @@ module RDF::Tabular
|
|
1232
1274
|
end
|
1233
1275
|
private
|
1234
1276
|
# Options passed to CSV.new based on dialect
|
1277
|
+
# @todo lineTerminators is ignored, as CSV parser uses single string or `:auto`
|
1235
1278
|
def csv_options
|
1236
1279
|
{
|
1237
1280
|
col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
|
1238
|
-
row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
|
1281
|
+
#row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
|
1239
1282
|
quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
|
1240
1283
|
encoding: (is_a?(Dialect) ? self : dialect).encoding
|
1241
1284
|
}
|
@@ -1591,10 +1634,10 @@ module RDF::Tabular
|
|
1591
1634
|
|
1592
1635
|
# Return or create a name for the column from titles, if it exists
|
1593
1636
|
def name
|
1594
|
-
self[:name] || if titles && (ts = titles[context.default_language || 'und'])
|
1637
|
+
self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
|
1595
1638
|
n = Array(ts).first
|
1596
|
-
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
|
1597
|
-
n1 = URI.encode(n[1..-1], /[^\w\.]/)
|
1639
|
+
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1640
|
+
n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1598
1641
|
"#{n0}#{n1}"
|
1599
1642
|
end || "_col.#{number}"
|
1600
1643
|
end
|
@@ -1638,6 +1681,10 @@ module RDF::Tabular
|
|
1638
1681
|
DEFAULTS = {}.freeze
|
1639
1682
|
REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
|
1640
1683
|
|
1684
|
+
# Type of this Metadata
|
1685
|
+
# @return [:Template]
|
1686
|
+
def type; :Template; end
|
1687
|
+
|
1641
1688
|
# Getters and Setters
|
1642
1689
|
PROPERTIES.each do |key, type|
|
1643
1690
|
next if [:url].include?(key)
|
@@ -1709,8 +1756,10 @@ module RDF::Tabular
|
|
1709
1756
|
|
1710
1757
|
define_method("#{key}=".to_sym) do |value|
|
1711
1758
|
invalid = case key
|
1712
|
-
when :commentPrefix, :delimiter, :quoteChar
|
1759
|
+
when :commentPrefix, :delimiter, :quoteChar
|
1713
1760
|
"a string" unless value.is_a?(String)
|
1761
|
+
when :lineTerminators
|
1762
|
+
"a string or array of strings" unless Array(value).all? {|e| e.is_a?(String)}
|
1714
1763
|
when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
|
1715
1764
|
"boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
1716
1765
|
when :encoding
|
@@ -1825,6 +1874,8 @@ module RDF::Tabular
|
|
1825
1874
|
end
|
1826
1875
|
|
1827
1876
|
class Datatype < Metadata
|
1877
|
+
include UAX35
|
1878
|
+
|
1828
1879
|
PROPERTIES = {
|
1829
1880
|
:@id => :link,
|
1830
1881
|
:@type => :atomic,
|
@@ -1888,202 +1939,6 @@ module RDF::Tabular
|
|
1888
1939
|
end
|
1889
1940
|
end
|
1890
1941
|
end
|
1891
|
-
|
1892
|
-
##
|
1893
|
-
# Parse the date format (if provided), and match against the value (if provided)
|
1894
|
-
# Otherwise, validate format and raise an error
|
1895
|
-
#
|
1896
|
-
# @param [String] format
|
1897
|
-
# @param [String] value
|
1898
|
-
# @return [String] XMLSchema version of value
|
1899
|
-
# @raise [ArgumentError] if format is not valid, or nil, if value does not match
|
1900
|
-
def parse_uax35_date(format, value)
|
1901
|
-
tz, date_format, time_format = nil, nil, nil
|
1902
|
-
return value unless format
|
1903
|
-
value ||= ""
|
1904
|
-
|
1905
|
-
# Extract tz info
|
1906
|
-
if md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/)
|
1907
|
-
format, tz = md[1], md[2]
|
1908
|
-
end
|
1909
|
-
|
1910
|
-
date_format, time_format = format.split(' ')
|
1911
|
-
date_format, time_format = nil, date_format if self.base.to_sym == :time
|
1912
|
-
|
1913
|
-
# Extract date, of specified
|
1914
|
-
date_part = case date_format
|
1915
|
-
when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
|
1916
|
-
when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
|
1917
|
-
when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
|
1918
|
-
when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
|
1919
|
-
when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
|
1920
|
-
when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
|
1921
|
-
when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
|
1922
|
-
when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
|
1923
|
-
when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
|
1924
|
-
when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
|
1925
|
-
when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
|
1926
|
-
when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
|
1927
|
-
when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
|
1928
|
-
when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
|
1929
|
-
when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
|
1930
|
-
when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
1931
|
-
when /yyyy-MM-ddTHH:mm:ss\.S+/
|
1932
|
-
md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
1933
|
-
num_ms = date_format.match(/S+/).to_s.length
|
1934
|
-
md if md && md[:ms].length <= num_ms
|
1935
|
-
else
|
1936
|
-
raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
|
1937
|
-
nil
|
1938
|
-
end
|
1939
|
-
|
1940
|
-
# Forward past date part
|
1941
|
-
if date_part
|
1942
|
-
value = value[date_part.to_s.length..-1]
|
1943
|
-
value = value.lstrip if date_part && value.start_with?(' ')
|
1944
|
-
end
|
1945
|
-
|
1946
|
-
# Extract time, of specified
|
1947
|
-
time_part = case time_format
|
1948
|
-
when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
1949
|
-
when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
|
1950
|
-
when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
|
1951
|
-
when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
|
1952
|
-
when /HH:mm:ss\.S+/
|
1953
|
-
md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
1954
|
-
num_ms = time_format.match(/S+/).to_s.length
|
1955
|
-
md if md && md[:ms].length <= num_ms
|
1956
|
-
else
|
1957
|
-
raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
|
1958
|
-
nil
|
1959
|
-
end
|
1960
|
-
|
1961
|
-
# If there's a date_format but no date_part, match fails
|
1962
|
-
return nil if date_format && date_part.nil?
|
1963
|
-
|
1964
|
-
# If there's a time_format but no time_part, match fails
|
1965
|
-
return nil if time_format && time_part.nil?
|
1966
|
-
|
1967
|
-
# Forward past time part
|
1968
|
-
value = value[time_part.to_s.length..-1] if time_part
|
1969
|
-
|
1970
|
-
# Use datetime match for time
|
1971
|
-
time_part = date_part if date_part && date_part.names.include?("hr")
|
1972
|
-
|
1973
|
-
# If there's a timezone, it may optionally start with whitespace
|
1974
|
-
value = value.lstrip if tz.to_s.start_with?(' ')
|
1975
|
-
tz_part = value if tz
|
1976
|
-
|
1977
|
-
# Compose normalized value
|
1978
|
-
vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
|
1979
|
-
vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
|
1980
|
-
|
1981
|
-
# Add milliseconds, if matched
|
1982
|
-
vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
|
1983
|
-
|
1984
|
-
value = [vd, vt].compact.join('T')
|
1985
|
-
value += tz_part.to_s
|
1986
|
-
end
|
1987
|
-
|
1988
|
-
##
|
1989
|
-
# Parse the date format (if provided), and match against the value (if provided)
|
1990
|
-
# Otherwise, validate format and raise an error
|
1991
|
-
#
|
1992
|
-
# @param [String] pattern
|
1993
|
-
# @param [String] value
|
1994
|
-
# @param [String] groupChar
|
1995
|
-
# @param [String] decimalChar
|
1996
|
-
# @return [String] XMLSchema version of value or nil, if value does not match
|
1997
|
-
# @raise [ArgumentError] if format is not valid
|
1998
|
-
def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
|
1999
|
-
return value if pattern.to_s.empty?
|
2000
|
-
value ||= ""
|
2001
|
-
|
2002
|
-
re = build_number_re(pattern, groupChar, decimalChar)
|
2003
|
-
|
2004
|
-
# Upcase value and remove internal spaces
|
2005
|
-
value = value.upcase.gsub(/\s+/, '')
|
2006
|
-
|
2007
|
-
# Remove groupChar from value
|
2008
|
-
value = value.gsub(groupChar, '')
|
2009
|
-
|
2010
|
-
# Replace decimalChar with "."
|
2011
|
-
value = value.gsub(decimalChar, '.')
|
2012
|
-
|
2013
|
-
if value =~ re
|
2014
|
-
# result re-assembles parts removed from value
|
2015
|
-
value
|
2016
|
-
else
|
2017
|
-
# no match
|
2018
|
-
nil
|
2019
|
-
end
|
2020
|
-
end
|
2021
|
-
|
2022
|
-
# Build a regular expression from the provided pattern to match value, after suitable modifications
|
2023
|
-
#
|
2024
|
-
# @param [String] pattern
|
2025
|
-
# @param [String] groupChar
|
2026
|
-
# @param [String] decimalChar
|
2027
|
-
# @return [Regexp] Regular expression matching value
|
2028
|
-
# @raise [ArgumentError] if format is not valid
|
2029
|
-
def build_number_re(pattern, groupChar, decimalChar)
|
2030
|
-
# pattern must be composed of only 0, #, decimalChar, groupChar, E, +, -, %, and ‰
|
2031
|
-
legal_number_pattern = /\A
|
2032
|
-
([%‰])?
|
2033
|
-
([+-])?
|
2034
|
-
# Mantissa
|
2035
|
-
(\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2036
|
-
(0|#{groupChar == '.' ? '\.' : groupChar})*
|
2037
|
-
# Fractional
|
2038
|
-
(?:#{decimalChar == '.' ? '\.' : decimalChar}
|
2039
|
-
(0|#{groupChar == '.' ? '\.' : groupChar})*
|
2040
|
-
(\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2041
|
-
# Exponent
|
2042
|
-
(E
|
2043
|
-
[+-]?
|
2044
|
-
(?:\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2045
|
-
(?:0|#{groupChar == '.' ? '\.' : groupChar})*
|
2046
|
-
)?
|
2047
|
-
)?
|
2048
|
-
([%‰])?
|
2049
|
-
\Z/x
|
2050
|
-
|
2051
|
-
unless pattern =~ legal_number_pattern
|
2052
|
-
raise ArgumentError, "unrecognized number pattern #{pattern}"
|
2053
|
-
end
|
2054
|
-
|
2055
|
-
# Remove groupChar from pattern
|
2056
|
-
pattern = pattern.gsub(groupChar, '')
|
2057
|
-
|
2058
|
-
# Replace decimalChar with "."
|
2059
|
-
pattern = pattern.gsub(decimalChar, '.')
|
2060
|
-
|
2061
|
-
# Split on decimalChar and E
|
2062
|
-
parts = pattern.split(/[\.E]/)
|
2063
|
-
|
2064
|
-
# Construct regular expression
|
2065
|
-
mantissa_str = case parts[0]
|
2066
|
-
when /\A([%‰])?([+-])?#+(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length},}#{$4}"
|
2067
|
-
when /\A([%‰])?([+-])?(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length}}#{$4}"
|
2068
|
-
when /\A([%‰])?([+-])?#+([%‰])?\Z/ then "#{$1}#{$2}\\d*#{$4}"
|
2069
|
-
end
|
2070
|
-
|
2071
|
-
fractional_str = case parts[1]
|
2072
|
-
when /\A(0+)(#+)([%‰])?\Z/ then "\\d{#{$1.length},#{$1.length+$2.length}}#{$3}"
|
2073
|
-
when /\A(0+)([%‰])?\Z/ then "\\d{#{$1.length}}#{$2}"
|
2074
|
-
when /\A(#+)([%‰])?\Z/ then "\\d{,#{$1.length}}#{$2}"
|
2075
|
-
end
|
2076
|
-
fractional_str = "\\.#{fractional_str}" if fractional_str
|
2077
|
-
|
2078
|
-
exponent_str = case parts[2]
|
2079
|
-
when /\A([+-])?(#+)(0+)([%‰])?\Z/ then "#{$1}\\d{#{$3.length},#{$2.length+$3.length}}#{$4}"
|
2080
|
-
when /\A([+-])?(0+)([%‰])?\Z/ then "#{$1}\\d{#{$2.length}}#{$3}"
|
2081
|
-
when /\A([+-])?(#+)([%‰])?\Z/ then "#{$1}\\d{,#{$2.length}}#{$3}"
|
2082
|
-
end
|
2083
|
-
exponent_str = "E#{exponent_str}" if exponent_str
|
2084
|
-
|
2085
|
-
Regexp.new("^#{mantissa_str}#{fractional_str}#{exponent_str}$")
|
2086
|
-
end
|
2087
1942
|
end
|
2088
1943
|
|
2089
1944
|
# Wraps each resulting row
|
@@ -2320,25 +2175,23 @@ module RDF::Tabular
|
|
2320
2175
|
pattern = format["pattern"]
|
2321
2176
|
|
2322
2177
|
if !datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
|
2323
|
-
value_errors << "#{value} does not match pattern #{pattern}"
|
2178
|
+
value_errors << "#{value} does not match numeric pattern #{pattern ? pattern.inspect : 'default'}"
|
2324
2179
|
end
|
2325
2180
|
|
2326
2181
|
# pattern facet failed
|
2327
2182
|
value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
|
2328
|
-
value = value.gsub(groupChar, '')
|
2183
|
+
value = value.gsub(groupChar || ',', '')
|
2329
2184
|
value = value.sub(decimalChar, '.')
|
2330
2185
|
|
2331
2186
|
# Extract percent or per-mille sign
|
2332
2187
|
percent = permille = false
|
2333
|
-
|
2334
|
-
|
2335
|
-
|
2336
|
-
|
2337
|
-
|
2338
|
-
|
2339
|
-
|
2340
|
-
permille = true
|
2341
|
-
end
|
2188
|
+
case value
|
2189
|
+
when /%/
|
2190
|
+
value = value.sub('%', '')
|
2191
|
+
percent = true
|
2192
|
+
when /‰/
|
2193
|
+
value = value.sub('‰', '')
|
2194
|
+
permille = true
|
2342
2195
|
end
|
2343
2196
|
|
2344
2197
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
@@ -2408,13 +2261,13 @@ module RDF::Tabular
|
|
2408
2261
|
lit = RDF::Literal.new(value)
|
2409
2262
|
else
|
2410
2263
|
if datatype.length && lit.object.length != datatype.length
|
2411
|
-
value_errors << "decoded #{value}
|
2264
|
+
value_errors << "decoded #{value} has length #{lit.object.length} not #{datatype.length}"
|
2412
2265
|
end
|
2413
2266
|
if datatype.minLength && lit.object.length < datatype.minLength
|
2414
|
-
value_errors << "decoded #{value}
|
2267
|
+
value_errors << "decoded #{value} has length #{lit.object.length} not >= #{datatype.minLength}"
|
2415
2268
|
end
|
2416
|
-
if datatype.maxLength && lit.object.length
|
2417
|
-
value_errors << "decoded #{value}
|
2269
|
+
if datatype.maxLength && lit.object.length > datatype.maxLength
|
2270
|
+
value_errors << "decoded #{value} has length #{lit.object.length} not <= #{datatype.maxLength}"
|
2418
2271
|
end
|
2419
2272
|
end
|
2420
2273
|
when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
|