rdf-tabular 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -1
- data/VERSION +1 -1
- data/etc/earl.ttl +1255 -708
- data/lib/rdf/tabular.rb +12 -11
- data/lib/rdf/tabular/metadata.rb +107 -254
- data/lib/rdf/tabular/reader.rb +2 -0
- data/lib/rdf/tabular/uax35.rb +324 -0
- data/spec/metadata_spec.rb +163 -78
- data/spec/suite_spec.rb +6 -12
- metadata +3 -2
data/lib/rdf/tabular.rb
CHANGED
@@ -12,17 +12,18 @@ module RDF
|
|
12
12
|
module Tabular
|
13
13
|
require 'rdf/tabular/format'
|
14
14
|
require 'rdf/tabular/utils'
|
15
|
-
autoload :Column,
|
16
|
-
autoload :CSVW,
|
17
|
-
autoload :Dialect,
|
18
|
-
autoload :JSON,
|
19
|
-
autoload :Metadata,
|
20
|
-
autoload :Reader,
|
21
|
-
autoload :Schema,
|
22
|
-
autoload :Table,
|
23
|
-
autoload :TableGroup,
|
24
|
-
autoload :Transformation,
|
25
|
-
autoload :
|
15
|
+
autoload :Column, 'rdf/tabular/metadata'
|
16
|
+
autoload :CSVW, 'rdf/tabular/csvw'
|
17
|
+
autoload :Dialect, 'rdf/tabular/metadata'
|
18
|
+
autoload :JSON, 'rdf/tabular/literal'
|
19
|
+
autoload :Metadata, 'rdf/tabular/metadata'
|
20
|
+
autoload :Reader, 'rdf/tabular/reader'
|
21
|
+
autoload :Schema, 'rdf/tabular/metadata'
|
22
|
+
autoload :Table, 'rdf/tabular/metadata'
|
23
|
+
autoload :TableGroup, 'rdf/tabular/metadata'
|
24
|
+
autoload :Transformation, 'rdf/tabular/metadata'
|
25
|
+
autoload :UAX35, 'rdf/tabular/uax35'
|
26
|
+
autoload :VERSION, 'rdf/tabular/version'
|
26
27
|
|
27
28
|
# Metadata errors detected
|
28
29
|
class Error < RDF::ReaderError; end
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -179,6 +179,7 @@ module RDF::Tabular
|
|
179
179
|
# @return [Metadata]
|
180
180
|
def self.for_input(input, options = {})
|
181
181
|
base = options[:base]
|
182
|
+
warnings = options.fetch(:warnings, [])
|
182
183
|
|
183
184
|
# Use user metadata, if provided
|
184
185
|
metadata = case options[:metadata]
|
@@ -192,14 +193,21 @@ module RDF::Tabular
|
|
192
193
|
# Search for metadata until found
|
193
194
|
|
194
195
|
# load link metadata, if available
|
195
|
-
all_locs = []
|
196
196
|
if !metadata && input.respond_to?(:links) &&
|
197
197
|
link = input.links.find_link(%w(rel describedby))
|
198
198
|
link_loc = RDF::URI(base).join(link.href).to_s
|
199
199
|
md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
200
|
-
|
201
|
-
|
202
|
-
|
200
|
+
if md
|
201
|
+
# Metadata must describe file to be useful
|
202
|
+
if md.describes_file?(base)
|
203
|
+
metadata = md
|
204
|
+
else
|
205
|
+
warnings << "Found metadata at #{link_loc}, which does not describe #{base}, ignoring"
|
206
|
+
if options[:validate] && !options[:warnings]
|
207
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
203
211
|
end
|
204
212
|
|
205
213
|
locs = []
|
@@ -217,8 +225,18 @@ module RDF::Tabular
|
|
217
225
|
metadata ||= begin
|
218
226
|
md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
219
227
|
# Metadata must describe file to be useful
|
220
|
-
|
221
|
-
|
228
|
+
if md
|
229
|
+
# Metadata must describe file to be useful
|
230
|
+
if md.describes_file?(base)
|
231
|
+
md
|
232
|
+
else
|
233
|
+
warnings << "Found metadata at #{loc}, which does not describe #{base}, ignoring"
|
234
|
+
if options[:validate] && !options[:warnings]
|
235
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
236
|
+
end
|
237
|
+
nil
|
238
|
+
end
|
239
|
+
end
|
222
240
|
rescue IOError
|
223
241
|
debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
224
242
|
nil
|
@@ -226,15 +244,6 @@ module RDF::Tabular
|
|
226
244
|
end
|
227
245
|
end
|
228
246
|
|
229
|
-
# If Metadata was found, but no metadata describes the file, issue a warning
|
230
|
-
if !all_locs.empty? && !metadata
|
231
|
-
warnings = options.fetch(:warnings, [])
|
232
|
-
warnings << "Found metadata at #{all_locs.join(",")}, which does not describe #{base}, ignoring"
|
233
|
-
if options[:validate] && !options[:warnings]
|
234
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
238
247
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
239
248
|
metadata = case
|
240
249
|
when metadata then metadata
|
@@ -279,7 +288,7 @@ module RDF::Tabular
|
|
279
288
|
type ||= case
|
280
289
|
when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
|
281
290
|
when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
|
282
|
-
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :
|
291
|
+
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Template
|
283
292
|
when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema
|
284
293
|
when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
|
285
294
|
when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
|
@@ -289,7 +298,7 @@ module RDF::Tabular
|
|
289
298
|
case type.to_s.to_sym
|
290
299
|
when :TableGroup, :"" then RDF::Tabular::TableGroup
|
291
300
|
when :Table then RDF::Tabular::Table
|
292
|
-
when :
|
301
|
+
when :Template then RDF::Tabular::Transformation
|
293
302
|
when :Schema then RDF::Tabular::Schema
|
294
303
|
when :Column then RDF::Tabular::Column
|
295
304
|
when :Dialect then RDF::Tabular::Dialect
|
@@ -329,29 +338,41 @@ module RDF::Tabular
|
|
329
338
|
|
330
339
|
# Get context from input
|
331
340
|
# Optimize by using built-in version of context, and just extract @base, @lang
|
341
|
+
opt_base = @options[:base]
|
342
|
+
opt_base ||= input.base_uri if input.respond_to?(:base_uri)
|
343
|
+
opt_base ||= input.filename if input.respond_to?(:filename)
|
344
|
+
|
332
345
|
@context = case input['@context']
|
333
346
|
when Array
|
334
347
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
335
|
-
LOCAL_CONTEXT.dup
|
348
|
+
c = LOCAL_CONTEXT.dup
|
349
|
+
c.base = RDF::URI(opt_base)
|
350
|
+
obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
|
351
|
+
raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
|
352
|
+
c.parse(obj)
|
336
353
|
when Hash
|
337
|
-
warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
338
|
-
LOCAL_CONTEXT.dup
|
339
|
-
|
354
|
+
warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
355
|
+
c = LOCAL_CONTEXT.dup
|
356
|
+
c.base = RDF::URI(opt_base)
|
357
|
+
c.parse(input['@context'])
|
358
|
+
when "http://www.w3.org/ns/csvw"
|
359
|
+
LOCAL_CONTEXT.dup
|
360
|
+
c = LOCAL_CONTEXT.dup
|
361
|
+
c.base = RDF::URI(opt_base)
|
362
|
+
c
|
340
363
|
else
|
341
364
|
if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
|
342
365
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
343
366
|
LOCAL_CONTEXT.dup
|
367
|
+
c = LOCAL_CONTEXT.dup
|
368
|
+
c.base = RDF::URI(opt_base)
|
369
|
+
c
|
344
370
|
end
|
345
371
|
end
|
346
372
|
|
347
373
|
reason = @options.delete(:reason)
|
348
374
|
|
349
|
-
@options[:base]
|
350
|
-
@options[:base] ||= input.base_uri if input.respond_to?(:base_uri)
|
351
|
-
@options[:base] ||= input.filename if input.respond_to?(:filename)
|
352
|
-
@options[:base] = RDF::URI(@options[:base])
|
353
|
-
|
354
|
-
@context.base = @options[:base] if @context
|
375
|
+
@options[:base] = @context ? @context.base : RDF::URI(opt_base)
|
355
376
|
|
356
377
|
if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
|
357
378
|
warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
|
@@ -541,7 +562,7 @@ module RDF::Tabular
|
|
541
562
|
end
|
542
563
|
|
543
564
|
# Type of this Metadata
|
544
|
-
# @return [:TableGroup, :Table, :
|
565
|
+
# @return [:TableGroup, :Table, :Template, :Schema, :Column]
|
545
566
|
def type; self.class.name.split('::').last.to_sym; end
|
546
567
|
|
547
568
|
# Base URL of metadata
|
@@ -644,6 +665,7 @@ module RDF::Tabular
|
|
644
665
|
end
|
645
666
|
|
646
667
|
if reference.is_a?(Hash)
|
668
|
+
errors << "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
|
647
669
|
ref_cols = reference['columnReference']
|
648
670
|
schema = if reference.has_key?('resource')
|
649
671
|
if reference.has_key?('schemaReference')
|
@@ -802,7 +824,10 @@ module RDF::Tabular
|
|
802
824
|
# A column reference property that holds either a single reference to a column description object or an array of references.
|
803
825
|
"#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
|
804
826
|
Array(value).each do |k|
|
805
|
-
|
827
|
+
unless self.columns.any? {|c| c[:name] == k}
|
828
|
+
warn "#{type} has invalid property '#{key}': column reference not found #{k}"
|
829
|
+
object.delete(key)
|
830
|
+
end
|
806
831
|
end
|
807
832
|
when :@context
|
808
833
|
# Skip these
|
@@ -819,10 +844,13 @@ module RDF::Tabular
|
|
819
844
|
when :@type
|
820
845
|
# Must not be a BNode
|
821
846
|
if value.to_s.start_with?("_:")
|
822
|
-
errors << "#{type} has invalid property '
|
847
|
+
errors << "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
|
823
848
|
end
|
824
|
-
|
825
|
-
|
849
|
+
case type
|
850
|
+
when :Transformation
|
851
|
+
errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
|
852
|
+
else
|
853
|
+
errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
|
826
854
|
end
|
827
855
|
when ->(k) {key.to_s.include?(':')}
|
828
856
|
begin
|
@@ -1005,12 +1033,27 @@ module RDF::Tabular
|
|
1005
1033
|
non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
|
1006
1034
|
object_columns = Array(other.tableSchema.columns)
|
1007
1035
|
|
1008
|
-
# Special case, if there is no header, then there are no column definitions, allow this as being
|
1009
|
-
|
1010
|
-
|
1036
|
+
# Special case, if there is no header, then there are no column definitions, allow this as being compatible
|
1037
|
+
if non_virtual_columns.length != object_columns.length && !object_columns.empty?
|
1038
|
+
if @options[:validate]
|
1039
|
+
raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1040
|
+
else
|
1041
|
+
warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1042
|
+
|
1043
|
+
# If present, a virtual column MUST appear after all other non-virtual column definitions
|
1044
|
+
raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
|
1045
|
+
virtual_columns = Array(tableSchema.columns).select(&:virtual)
|
1046
|
+
while non_virtual_columns.length < object_columns.length
|
1047
|
+
non_virtual_columns << nil
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
# Create necessary column entries
|
1051
|
+
tableSchema.columns = non_virtual_columns + virtual_columns
|
1052
|
+
end
|
1053
|
+
end
|
1011
1054
|
index = 0
|
1012
1055
|
object_columns.all? do |cb|
|
1013
|
-
ca = non_virtual_columns[index]
|
1056
|
+
ca = non_virtual_columns[index] || Column.new({})
|
1014
1057
|
ta = ca.titles || {}
|
1015
1058
|
tb = cb.titles || {}
|
1016
1059
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
@@ -1141,8 +1184,7 @@ module RDF::Tabular
|
|
1141
1184
|
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
1142
1185
|
raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
|
1143
1186
|
elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
|
1144
|
-
|
1145
|
-
value.delete('@language')
|
1187
|
+
raise Error, "Value object with @language must use valid language: #{value.to_json}"
|
1146
1188
|
elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
|
1147
1189
|
raise Error, "Value object with @type must defined type: #{value.to_json}"
|
1148
1190
|
end
|
@@ -1232,10 +1274,11 @@ module RDF::Tabular
|
|
1232
1274
|
end
|
1233
1275
|
private
|
1234
1276
|
# Options passed to CSV.new based on dialect
|
1277
|
+
# @todo lineTerminators is ignored, as CSV parser uses single string or `:auto`
|
1235
1278
|
def csv_options
|
1236
1279
|
{
|
1237
1280
|
col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
|
1238
|
-
row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
|
1281
|
+
#row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
|
1239
1282
|
quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
|
1240
1283
|
encoding: (is_a?(Dialect) ? self : dialect).encoding
|
1241
1284
|
}
|
@@ -1591,10 +1634,10 @@ module RDF::Tabular
|
|
1591
1634
|
|
1592
1635
|
# Return or create a name for the column from titles, if it exists
|
1593
1636
|
def name
|
1594
|
-
self[:name] || if titles && (ts = titles[context.default_language || 'und'])
|
1637
|
+
self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
|
1595
1638
|
n = Array(ts).first
|
1596
|
-
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
|
1597
|
-
n1 = URI.encode(n[1..-1], /[^\w\.]/)
|
1639
|
+
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1640
|
+
n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1598
1641
|
"#{n0}#{n1}"
|
1599
1642
|
end || "_col.#{number}"
|
1600
1643
|
end
|
@@ -1638,6 +1681,10 @@ module RDF::Tabular
|
|
1638
1681
|
DEFAULTS = {}.freeze
|
1639
1682
|
REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
|
1640
1683
|
|
1684
|
+
# Type of this Metadata
|
1685
|
+
# @return [:Template]
|
1686
|
+
def type; :Template; end
|
1687
|
+
|
1641
1688
|
# Getters and Setters
|
1642
1689
|
PROPERTIES.each do |key, type|
|
1643
1690
|
next if [:url].include?(key)
|
@@ -1709,8 +1756,10 @@ module RDF::Tabular
|
|
1709
1756
|
|
1710
1757
|
define_method("#{key}=".to_sym) do |value|
|
1711
1758
|
invalid = case key
|
1712
|
-
when :commentPrefix, :delimiter, :quoteChar
|
1759
|
+
when :commentPrefix, :delimiter, :quoteChar
|
1713
1760
|
"a string" unless value.is_a?(String)
|
1761
|
+
when :lineTerminators
|
1762
|
+
"a string or array of strings" unless Array(value).all? {|e| e.is_a?(String)}
|
1714
1763
|
when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
|
1715
1764
|
"boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
1716
1765
|
when :encoding
|
@@ -1825,6 +1874,8 @@ module RDF::Tabular
|
|
1825
1874
|
end
|
1826
1875
|
|
1827
1876
|
class Datatype < Metadata
|
1877
|
+
include UAX35
|
1878
|
+
|
1828
1879
|
PROPERTIES = {
|
1829
1880
|
:@id => :link,
|
1830
1881
|
:@type => :atomic,
|
@@ -1888,202 +1939,6 @@ module RDF::Tabular
|
|
1888
1939
|
end
|
1889
1940
|
end
|
1890
1941
|
end
|
1891
|
-
|
1892
|
-
##
|
1893
|
-
# Parse the date format (if provided), and match against the value (if provided)
|
1894
|
-
# Otherwise, validate format and raise an error
|
1895
|
-
#
|
1896
|
-
# @param [String] format
|
1897
|
-
# @param [String] value
|
1898
|
-
# @return [String] XMLSchema version of value
|
1899
|
-
# @raise [ArgumentError] if format is not valid, or nil, if value does not match
|
1900
|
-
def parse_uax35_date(format, value)
|
1901
|
-
tz, date_format, time_format = nil, nil, nil
|
1902
|
-
return value unless format
|
1903
|
-
value ||= ""
|
1904
|
-
|
1905
|
-
# Extract tz info
|
1906
|
-
if md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/)
|
1907
|
-
format, tz = md[1], md[2]
|
1908
|
-
end
|
1909
|
-
|
1910
|
-
date_format, time_format = format.split(' ')
|
1911
|
-
date_format, time_format = nil, date_format if self.base.to_sym == :time
|
1912
|
-
|
1913
|
-
# Extract date, of specified
|
1914
|
-
date_part = case date_format
|
1915
|
-
when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
|
1916
|
-
when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
|
1917
|
-
when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
|
1918
|
-
when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
|
1919
|
-
when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
|
1920
|
-
when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
|
1921
|
-
when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
|
1922
|
-
when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
|
1923
|
-
when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
|
1924
|
-
when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
|
1925
|
-
when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
|
1926
|
-
when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
|
1927
|
-
when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
|
1928
|
-
when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
|
1929
|
-
when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
|
1930
|
-
when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
1931
|
-
when /yyyy-MM-ddTHH:mm:ss\.S+/
|
1932
|
-
md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
1933
|
-
num_ms = date_format.match(/S+/).to_s.length
|
1934
|
-
md if md && md[:ms].length <= num_ms
|
1935
|
-
else
|
1936
|
-
raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
|
1937
|
-
nil
|
1938
|
-
end
|
1939
|
-
|
1940
|
-
# Forward past date part
|
1941
|
-
if date_part
|
1942
|
-
value = value[date_part.to_s.length..-1]
|
1943
|
-
value = value.lstrip if date_part && value.start_with?(' ')
|
1944
|
-
end
|
1945
|
-
|
1946
|
-
# Extract time, of specified
|
1947
|
-
time_part = case time_format
|
1948
|
-
when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
1949
|
-
when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
|
1950
|
-
when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
|
1951
|
-
when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
|
1952
|
-
when /HH:mm:ss\.S+/
|
1953
|
-
md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
1954
|
-
num_ms = time_format.match(/S+/).to_s.length
|
1955
|
-
md if md && md[:ms].length <= num_ms
|
1956
|
-
else
|
1957
|
-
raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
|
1958
|
-
nil
|
1959
|
-
end
|
1960
|
-
|
1961
|
-
# If there's a date_format but no date_part, match fails
|
1962
|
-
return nil if date_format && date_part.nil?
|
1963
|
-
|
1964
|
-
# If there's a time_format but no time_part, match fails
|
1965
|
-
return nil if time_format && time_part.nil?
|
1966
|
-
|
1967
|
-
# Forward past time part
|
1968
|
-
value = value[time_part.to_s.length..-1] if time_part
|
1969
|
-
|
1970
|
-
# Use datetime match for time
|
1971
|
-
time_part = date_part if date_part && date_part.names.include?("hr")
|
1972
|
-
|
1973
|
-
# If there's a timezone, it may optionally start with whitespace
|
1974
|
-
value = value.lstrip if tz.to_s.start_with?(' ')
|
1975
|
-
tz_part = value if tz
|
1976
|
-
|
1977
|
-
# Compose normalized value
|
1978
|
-
vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
|
1979
|
-
vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
|
1980
|
-
|
1981
|
-
# Add milliseconds, if matched
|
1982
|
-
vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
|
1983
|
-
|
1984
|
-
value = [vd, vt].compact.join('T')
|
1985
|
-
value += tz_part.to_s
|
1986
|
-
end
|
1987
|
-
|
1988
|
-
##
|
1989
|
-
# Parse the date format (if provided), and match against the value (if provided)
|
1990
|
-
# Otherwise, validate format and raise an error
|
1991
|
-
#
|
1992
|
-
# @param [String] pattern
|
1993
|
-
# @param [String] value
|
1994
|
-
# @param [String] groupChar
|
1995
|
-
# @param [String] decimalChar
|
1996
|
-
# @return [String] XMLSchema version of value or nil, if value does not match
|
1997
|
-
# @raise [ArgumentError] if format is not valid
|
1998
|
-
def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
|
1999
|
-
return value if pattern.to_s.empty?
|
2000
|
-
value ||= ""
|
2001
|
-
|
2002
|
-
re = build_number_re(pattern, groupChar, decimalChar)
|
2003
|
-
|
2004
|
-
# Upcase value and remove internal spaces
|
2005
|
-
value = value.upcase.gsub(/\s+/, '')
|
2006
|
-
|
2007
|
-
# Remove groupChar from value
|
2008
|
-
value = value.gsub(groupChar, '')
|
2009
|
-
|
2010
|
-
# Replace decimalChar with "."
|
2011
|
-
value = value.gsub(decimalChar, '.')
|
2012
|
-
|
2013
|
-
if value =~ re
|
2014
|
-
# result re-assembles parts removed from value
|
2015
|
-
value
|
2016
|
-
else
|
2017
|
-
# no match
|
2018
|
-
nil
|
2019
|
-
end
|
2020
|
-
end
|
2021
|
-
|
2022
|
-
# Build a regular expression from the provided pattern to match value, after suitable modifications
|
2023
|
-
#
|
2024
|
-
# @param [String] pattern
|
2025
|
-
# @param [String] groupChar
|
2026
|
-
# @param [String] decimalChar
|
2027
|
-
# @return [Regexp] Regular expression matching value
|
2028
|
-
# @raise [ArgumentError] if format is not valid
|
2029
|
-
def build_number_re(pattern, groupChar, decimalChar)
|
2030
|
-
# pattern must be composed of only 0, #, decimalChar, groupChar, E, +, -, %, and ‰
|
2031
|
-
legal_number_pattern = /\A
|
2032
|
-
([%‰])?
|
2033
|
-
([+-])?
|
2034
|
-
# Mantissa
|
2035
|
-
(\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2036
|
-
(0|#{groupChar == '.' ? '\.' : groupChar})*
|
2037
|
-
# Fractional
|
2038
|
-
(?:#{decimalChar == '.' ? '\.' : decimalChar}
|
2039
|
-
(0|#{groupChar == '.' ? '\.' : groupChar})*
|
2040
|
-
(\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2041
|
-
# Exponent
|
2042
|
-
(E
|
2043
|
-
[+-]?
|
2044
|
-
(?:\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2045
|
-
(?:0|#{groupChar == '.' ? '\.' : groupChar})*
|
2046
|
-
)?
|
2047
|
-
)?
|
2048
|
-
([%‰])?
|
2049
|
-
\Z/x
|
2050
|
-
|
2051
|
-
unless pattern =~ legal_number_pattern
|
2052
|
-
raise ArgumentError, "unrecognized number pattern #{pattern}"
|
2053
|
-
end
|
2054
|
-
|
2055
|
-
# Remove groupChar from pattern
|
2056
|
-
pattern = pattern.gsub(groupChar, '')
|
2057
|
-
|
2058
|
-
# Replace decimalChar with "."
|
2059
|
-
pattern = pattern.gsub(decimalChar, '.')
|
2060
|
-
|
2061
|
-
# Split on decimalChar and E
|
2062
|
-
parts = pattern.split(/[\.E]/)
|
2063
|
-
|
2064
|
-
# Construct regular expression
|
2065
|
-
mantissa_str = case parts[0]
|
2066
|
-
when /\A([%‰])?([+-])?#+(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length},}#{$4}"
|
2067
|
-
when /\A([%‰])?([+-])?(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length}}#{$4}"
|
2068
|
-
when /\A([%‰])?([+-])?#+([%‰])?\Z/ then "#{$1}#{$2}\\d*#{$4}"
|
2069
|
-
end
|
2070
|
-
|
2071
|
-
fractional_str = case parts[1]
|
2072
|
-
when /\A(0+)(#+)([%‰])?\Z/ then "\\d{#{$1.length},#{$1.length+$2.length}}#{$3}"
|
2073
|
-
when /\A(0+)([%‰])?\Z/ then "\\d{#{$1.length}}#{$2}"
|
2074
|
-
when /\A(#+)([%‰])?\Z/ then "\\d{,#{$1.length}}#{$2}"
|
2075
|
-
end
|
2076
|
-
fractional_str = "\\.#{fractional_str}" if fractional_str
|
2077
|
-
|
2078
|
-
exponent_str = case parts[2]
|
2079
|
-
when /\A([+-])?(#+)(0+)([%‰])?\Z/ then "#{$1}\\d{#{$3.length},#{$2.length+$3.length}}#{$4}"
|
2080
|
-
when /\A([+-])?(0+)([%‰])?\Z/ then "#{$1}\\d{#{$2.length}}#{$3}"
|
2081
|
-
when /\A([+-])?(#+)([%‰])?\Z/ then "#{$1}\\d{,#{$2.length}}#{$3}"
|
2082
|
-
end
|
2083
|
-
exponent_str = "E#{exponent_str}" if exponent_str
|
2084
|
-
|
2085
|
-
Regexp.new("^#{mantissa_str}#{fractional_str}#{exponent_str}$")
|
2086
|
-
end
|
2087
1942
|
end
|
2088
1943
|
|
2089
1944
|
# Wraps each resulting row
|
@@ -2320,25 +2175,23 @@ module RDF::Tabular
|
|
2320
2175
|
pattern = format["pattern"]
|
2321
2176
|
|
2322
2177
|
if !datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
|
2323
|
-
value_errors << "#{value} does not match pattern #{pattern}"
|
2178
|
+
value_errors << "#{value} does not match numeric pattern #{pattern ? pattern.inspect : 'default'}"
|
2324
2179
|
end
|
2325
2180
|
|
2326
2181
|
# pattern facet failed
|
2327
2182
|
value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
|
2328
|
-
value = value.gsub(groupChar, '')
|
2183
|
+
value = value.gsub(groupChar || ',', '')
|
2329
2184
|
value = value.sub(decimalChar, '.')
|
2330
2185
|
|
2331
2186
|
# Extract percent or per-mille sign
|
2332
2187
|
percent = permille = false
|
2333
|
-
|
2334
|
-
|
2335
|
-
|
2336
|
-
|
2337
|
-
|
2338
|
-
|
2339
|
-
|
2340
|
-
permille = true
|
2341
|
-
end
|
2188
|
+
case value
|
2189
|
+
when /%/
|
2190
|
+
value = value.sub('%', '')
|
2191
|
+
percent = true
|
2192
|
+
when /‰/
|
2193
|
+
value = value.sub('‰', '')
|
2194
|
+
permille = true
|
2342
2195
|
end
|
2343
2196
|
|
2344
2197
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
@@ -2408,13 +2261,13 @@ module RDF::Tabular
|
|
2408
2261
|
lit = RDF::Literal.new(value)
|
2409
2262
|
else
|
2410
2263
|
if datatype.length && lit.object.length != datatype.length
|
2411
|
-
value_errors << "decoded #{value}
|
2264
|
+
value_errors << "decoded #{value} has length #{lit.object.length} not #{datatype.length}"
|
2412
2265
|
end
|
2413
2266
|
if datatype.minLength && lit.object.length < datatype.minLength
|
2414
|
-
value_errors << "decoded #{value}
|
2267
|
+
value_errors << "decoded #{value} has length #{lit.object.length} not >= #{datatype.minLength}"
|
2415
2268
|
end
|
2416
|
-
if datatype.maxLength && lit.object.length
|
2417
|
-
value_errors << "decoded #{value}
|
2269
|
+
if datatype.maxLength && lit.object.length > datatype.maxLength
|
2270
|
+
value_errors << "decoded #{value} has length #{lit.object.length} not <= #{datatype.maxLength}"
|
2418
2271
|
end
|
2419
2272
|
end
|
2420
2273
|
when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
|