rdf-tabular 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,17 +12,18 @@ module RDF
12
12
  module Tabular
13
13
  require 'rdf/tabular/format'
14
14
  require 'rdf/tabular/utils'
15
- autoload :Column, 'rdf/tabular/metadata'
16
- autoload :CSVW, 'rdf/tabular/csvw'
17
- autoload :Dialect, 'rdf/tabular/metadata'
18
- autoload :JSON, 'rdf/tabular/literal'
19
- autoload :Metadata, 'rdf/tabular/metadata'
20
- autoload :Reader, 'rdf/tabular/reader'
21
- autoload :Schema, 'rdf/tabular/metadata'
22
- autoload :Table, 'rdf/tabular/metadata'
23
- autoload :TableGroup, 'rdf/tabular/metadata'
24
- autoload :Transformation, 'rdf/tabular/metadata'
25
- autoload :VERSION, 'rdf/tabular/version'
15
+ autoload :Column, 'rdf/tabular/metadata'
16
+ autoload :CSVW, 'rdf/tabular/csvw'
17
+ autoload :Dialect, 'rdf/tabular/metadata'
18
+ autoload :JSON, 'rdf/tabular/literal'
19
+ autoload :Metadata, 'rdf/tabular/metadata'
20
+ autoload :Reader, 'rdf/tabular/reader'
21
+ autoload :Schema, 'rdf/tabular/metadata'
22
+ autoload :Table, 'rdf/tabular/metadata'
23
+ autoload :TableGroup, 'rdf/tabular/metadata'
24
+ autoload :Transformation, 'rdf/tabular/metadata'
25
+ autoload :UAX35, 'rdf/tabular/uax35'
26
+ autoload :VERSION, 'rdf/tabular/version'
26
27
 
27
28
  # Metadata errors detected
28
29
  class Error < RDF::ReaderError; end
@@ -179,6 +179,7 @@ module RDF::Tabular
179
179
  # @return [Metadata]
180
180
  def self.for_input(input, options = {})
181
181
  base = options[:base]
182
+ warnings = options.fetch(:warnings, [])
182
183
 
183
184
  # Use user metadata, if provided
184
185
  metadata = case options[:metadata]
@@ -192,14 +193,21 @@ module RDF::Tabular
192
193
  # Search for metadata until found
193
194
 
194
195
  # load link metadata, if available
195
- all_locs = []
196
196
  if !metadata && input.respond_to?(:links) &&
197
197
  link = input.links.find_link(%w(rel describedby))
198
198
  link_loc = RDF::URI(base).join(link.href).to_s
199
199
  md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
200
- all_locs << link_loc if md
201
- # Metadata must describe file to be useful
202
- metadata = md if md && md.describes_file?(base)
200
+ if md
201
+ # Metadata must describe file to be useful
202
+ if md.describes_file?(base)
203
+ metadata = md
204
+ else
205
+ warnings << "Found metadata at #{link_loc}, which does not describe #{base}, ignoring"
206
+ if options[:validate] && !options[:warnings]
207
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
208
+ end
209
+ end
210
+ end
203
211
  end
204
212
 
205
213
  locs = []
@@ -217,8 +225,18 @@ module RDF::Tabular
217
225
  metadata ||= begin
218
226
  md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
219
227
  # Metadata must describe file to be useful
220
- all_locs << loc if md
221
- md if md && md.describes_file?(base)
228
+ if md
229
+ # Metadata must describe file to be useful
230
+ if md.describes_file?(base)
231
+ md
232
+ else
233
+ warnings << "Found metadata at #{loc}, which does not describe #{base}, ignoring"
234
+ if options[:validate] && !options[:warnings]
235
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
236
+ end
237
+ nil
238
+ end
239
+ end
222
240
  rescue IOError
223
241
  debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
224
242
  nil
@@ -226,15 +244,6 @@ module RDF::Tabular
226
244
  end
227
245
  end
228
246
 
229
- # If Metadata was found, but no metadata describes the file, issue a warning
230
- if !all_locs.empty? && !metadata
231
- warnings = options.fetch(:warnings, [])
232
- warnings << "Found metadata at #{all_locs.join(",")}, which does not describe #{base}, ignoring"
233
- if options[:validate] && !options[:warnings]
234
- $stderr.puts "Warnings: #{warnings.join("\n")}"
235
- end
236
- end
237
-
238
247
  # Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
239
248
  metadata = case
240
249
  when metadata then metadata
@@ -279,7 +288,7 @@ module RDF::Tabular
279
288
  type ||= case
280
289
  when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
281
290
  when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
282
- when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
291
+ when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Template
283
292
  when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema
284
293
  when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
285
294
  when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
@@ -289,7 +298,7 @@ module RDF::Tabular
289
298
  case type.to_s.to_sym
290
299
  when :TableGroup, :"" then RDF::Tabular::TableGroup
291
300
  when :Table then RDF::Tabular::Table
292
- when :Transformation then RDF::Tabular::Transformation
301
+ when :Template then RDF::Tabular::Transformation
293
302
  when :Schema then RDF::Tabular::Schema
294
303
  when :Column then RDF::Tabular::Column
295
304
  when :Dialect then RDF::Tabular::Dialect
@@ -329,29 +338,41 @@ module RDF::Tabular
329
338
 
330
339
  # Get context from input
331
340
  # Optimize by using built-in version of context, and just extract @base, @lang
341
+ opt_base = @options[:base]
342
+ opt_base ||= input.base_uri if input.respond_to?(:base_uri)
343
+ opt_base ||= input.filename if input.respond_to?(:filename)
344
+
332
345
  @context = case input['@context']
333
346
  when Array
334
347
  warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
335
- LOCAL_CONTEXT.dup.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
348
+ c = LOCAL_CONTEXT.dup
349
+ c.base = RDF::URI(opt_base)
350
+ obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
351
+ raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
352
+ c.parse(obj)
336
353
  when Hash
337
- warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
338
- LOCAL_CONTEXT.dup.parse(input['@context'])
339
- when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT.dup
354
+ warn "Context missing required value 'http://www.w3.org/ns/csvw'"
355
+ c = LOCAL_CONTEXT.dup
356
+ c.base = RDF::URI(opt_base)
357
+ c.parse(input['@context'])
358
+ when "http://www.w3.org/ns/csvw"
359
+ LOCAL_CONTEXT.dup
360
+ c = LOCAL_CONTEXT.dup
361
+ c.base = RDF::URI(opt_base)
362
+ c
340
363
  else
341
364
  if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
342
365
  warn "Context missing required value 'http://www.w3.org/ns/csvw'"
343
366
  LOCAL_CONTEXT.dup
367
+ c = LOCAL_CONTEXT.dup
368
+ c.base = RDF::URI(opt_base)
369
+ c
344
370
  end
345
371
  end
346
372
 
347
373
  reason = @options.delete(:reason)
348
374
 
349
- @options[:base] ||= @context.base if @context
350
- @options[:base] ||= input.base_uri if input.respond_to?(:base_uri)
351
- @options[:base] ||= input.filename if input.respond_to?(:filename)
352
- @options[:base] = RDF::URI(@options[:base])
353
-
354
- @context.base = @options[:base] if @context
375
+ @options[:base] = @context ? @context.base : RDF::URI(opt_base)
355
376
 
356
377
  if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
357
378
  warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
@@ -541,7 +562,7 @@ module RDF::Tabular
541
562
  end
542
563
 
543
564
  # Type of this Metadata
544
- # @return [:TableGroup, :Table, :Transformation, :Schema, :Column]
565
+ # @return [:TableGroup, :Table, :Template, :Schema, :Column]
545
566
  def type; self.class.name.split('::').last.to_sym; end
546
567
 
547
568
  # Base URL of metadata
@@ -644,6 +665,7 @@ module RDF::Tabular
644
665
  end
645
666
 
646
667
  if reference.is_a?(Hash)
668
+ errors << "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
647
669
  ref_cols = reference['columnReference']
648
670
  schema = if reference.has_key?('resource')
649
671
  if reference.has_key?('schemaReference')
@@ -802,7 +824,10 @@ module RDF::Tabular
802
824
  # A column reference property that holds either a single reference to a column description object or an array of references.
803
825
  "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
804
826
  Array(value).each do |k|
805
- errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
827
+ unless self.columns.any? {|c| c[:name] == k}
828
+ warn "#{type} has invalid property '#{key}': column reference not found #{k}"
829
+ object.delete(key)
830
+ end
806
831
  end
807
832
  when :@context
808
833
  # Skip these
@@ -819,10 +844,13 @@ module RDF::Tabular
819
844
  when :@type
820
845
  # Must not be a BNode
821
846
  if value.to_s.start_with?("_:")
822
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
847
+ errors << "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
823
848
  end
824
- unless value.to_sym == type
825
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
849
+ case type
850
+ when :Transformation
851
+ errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
852
+ else
853
+ errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
826
854
  end
827
855
  when ->(k) {key.to_s.include?(':')}
828
856
  begin
@@ -1005,12 +1033,27 @@ module RDF::Tabular
1005
1033
  non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
1006
1034
  object_columns = Array(other.tableSchema.columns)
1007
1035
 
1008
- # Special case, if there is no header, then there are no column definitions, allow this as being compatile
1009
- raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}" if
1010
- non_virtual_columns.length != object_columns.length && !object_columns.empty?
1036
+ # Special case, if there is no header, then there are no column definitions, allow this as being compatible
1037
+ if non_virtual_columns.length != object_columns.length && !object_columns.empty?
1038
+ if @options[:validate]
1039
+ raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1040
+ else
1041
+ warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1042
+
1043
+ # If present, a virtual column MUST appear after all other non-virtual column definitions
1044
+ raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
1045
+ virtual_columns = Array(tableSchema.columns).select(&:virtual)
1046
+ while non_virtual_columns.length < object_columns.length
1047
+ non_virtual_columns << nil
1048
+ end
1049
+
1050
+ # Create necessary column entries
1051
+ tableSchema.columns = non_virtual_columns + virtual_columns
1052
+ end
1053
+ end
1011
1054
  index = 0
1012
1055
  object_columns.all? do |cb|
1013
- ca = non_virtual_columns[index]
1056
+ ca = non_virtual_columns[index] || Column.new({})
1014
1057
  ta = ca.titles || {}
1015
1058
  tb = cb.titles || {}
1016
1059
  if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
@@ -1141,8 +1184,7 @@ module RDF::Tabular
1141
1184
  elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
1142
1185
  raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
1143
1186
  elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
1144
- warn "Value object with @language must use valid language: #{value.to_json}"
1145
- value.delete('@language')
1187
+ raise Error, "Value object with @language must use valid language: #{value.to_json}"
1146
1188
  elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
1147
1189
  raise Error, "Value object with @type must defined type: #{value.to_json}"
1148
1190
  end
@@ -1232,10 +1274,11 @@ module RDF::Tabular
1232
1274
  end
1233
1275
  private
1234
1276
  # Options passed to CSV.new based on dialect
1277
+ # @todo lineTerminators is ignored, as CSV parser uses single string or `:auto`
1235
1278
  def csv_options
1236
1279
  {
1237
1280
  col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
1238
- row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
1281
+ #row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
1239
1282
  quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
1240
1283
  encoding: (is_a?(Dialect) ? self : dialect).encoding
1241
1284
  }
@@ -1591,10 +1634,10 @@ module RDF::Tabular
1591
1634
 
1592
1635
  # Return or create a name for the column from titles, if it exists
1593
1636
  def name
1594
- self[:name] || if titles && (ts = titles[context.default_language || 'und'])
1637
+ self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
1595
1638
  n = Array(ts).first
1596
- n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
1597
- n1 = URI.encode(n[1..-1], /[^\w\.]/)
1639
+ n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
1640
+ n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
1598
1641
  "#{n0}#{n1}"
1599
1642
  end || "_col.#{number}"
1600
1643
  end
@@ -1638,6 +1681,10 @@ module RDF::Tabular
1638
1681
  DEFAULTS = {}.freeze
1639
1682
  REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
1640
1683
 
1684
+ # Type of this Metadata
1685
+ # @return [:Template]
1686
+ def type; :Template; end
1687
+
1641
1688
  # Getters and Setters
1642
1689
  PROPERTIES.each do |key, type|
1643
1690
  next if [:url].include?(key)
@@ -1709,8 +1756,10 @@ module RDF::Tabular
1709
1756
 
1710
1757
  define_method("#{key}=".to_sym) do |value|
1711
1758
  invalid = case key
1712
- when :commentPrefix, :delimiter, :quoteChar, :lineTerminators
1759
+ when :commentPrefix, :delimiter, :quoteChar
1713
1760
  "a string" unless value.is_a?(String)
1761
+ when :lineTerminators
1762
+ "a string or array of strings" unless Array(value).all? {|e| e.is_a?(String)}
1714
1763
  when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
1715
1764
  "boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
1716
1765
  when :encoding
@@ -1825,6 +1874,8 @@ module RDF::Tabular
1825
1874
  end
1826
1875
 
1827
1876
  class Datatype < Metadata
1877
+ include UAX35
1878
+
1828
1879
  PROPERTIES = {
1829
1880
  :@id => :link,
1830
1881
  :@type => :atomic,
@@ -1888,202 +1939,6 @@ module RDF::Tabular
1888
1939
  end
1889
1940
  end
1890
1941
  end
1891
-
1892
- ##
1893
- # Parse the date format (if provided), and match against the value (if provided)
1894
- # Otherwise, validate format and raise an error
1895
- #
1896
- # @param [String] format
1897
- # @param [String] value
1898
- # @return [String] XMLSchema version of value
1899
- # @raise [ArgumentError] if format is not valid, or nil, if value does not match
1900
- def parse_uax35_date(format, value)
1901
- tz, date_format, time_format = nil, nil, nil
1902
- return value unless format
1903
- value ||= ""
1904
-
1905
- # Extract tz info
1906
- if md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/)
1907
- format, tz = md[1], md[2]
1908
- end
1909
-
1910
- date_format, time_format = format.split(' ')
1911
- date_format, time_format = nil, date_format if self.base.to_sym == :time
1912
-
1913
- # Extract date, of specified
1914
- date_part = case date_format
1915
- when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
1916
- when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
1917
- when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
1918
- when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
1919
- when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
1920
- when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
1921
- when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
1922
- when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
1923
- when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
1924
- when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
1925
- when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
1926
- when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
1927
- when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
1928
- when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
1929
- when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
1930
- when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
1931
- when /yyyy-MM-ddTHH:mm:ss\.S+/
1932
- md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
1933
- num_ms = date_format.match(/S+/).to_s.length
1934
- md if md && md[:ms].length <= num_ms
1935
- else
1936
- raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
1937
- nil
1938
- end
1939
-
1940
- # Forward past date part
1941
- if date_part
1942
- value = value[date_part.to_s.length..-1]
1943
- value = value.lstrip if date_part && value.start_with?(' ')
1944
- end
1945
-
1946
- # Extract time, of specified
1947
- time_part = case time_format
1948
- when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
1949
- when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
1950
- when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
1951
- when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
1952
- when /HH:mm:ss\.S+/
1953
- md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
1954
- num_ms = time_format.match(/S+/).to_s.length
1955
- md if md && md[:ms].length <= num_ms
1956
- else
1957
- raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
1958
- nil
1959
- end
1960
-
1961
- # If there's a date_format but no date_part, match fails
1962
- return nil if date_format && date_part.nil?
1963
-
1964
- # If there's a time_format but no time_part, match fails
1965
- return nil if time_format && time_part.nil?
1966
-
1967
- # Forward past time part
1968
- value = value[time_part.to_s.length..-1] if time_part
1969
-
1970
- # Use datetime match for time
1971
- time_part = date_part if date_part && date_part.names.include?("hr")
1972
-
1973
- # If there's a timezone, it may optionally start with whitespace
1974
- value = value.lstrip if tz.to_s.start_with?(' ')
1975
- tz_part = value if tz
1976
-
1977
- # Compose normalized value
1978
- vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
1979
- vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
1980
-
1981
- # Add milliseconds, if matched
1982
- vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
1983
-
1984
- value = [vd, vt].compact.join('T')
1985
- value += tz_part.to_s
1986
- end
1987
-
1988
- ##
1989
- # Parse the date format (if provided), and match against the value (if provided)
1990
- # Otherwise, validate format and raise an error
1991
- #
1992
- # @param [String] pattern
1993
- # @param [String] value
1994
- # @param [String] groupChar
1995
- # @param [String] decimalChar
1996
- # @return [String] XMLSchema version of value or nil, if value does not match
1997
- # @raise [ArgumentError] if format is not valid
1998
- def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
1999
- return value if pattern.to_s.empty?
2000
- value ||= ""
2001
-
2002
- re = build_number_re(pattern, groupChar, decimalChar)
2003
-
2004
- # Upcase value and remove internal spaces
2005
- value = value.upcase.gsub(/\s+/, '')
2006
-
2007
- # Remove groupChar from value
2008
- value = value.gsub(groupChar, '')
2009
-
2010
- # Replace decimalChar with "."
2011
- value = value.gsub(decimalChar, '.')
2012
-
2013
- if value =~ re
2014
- # result re-assembles parts removed from value
2015
- value
2016
- else
2017
- # no match
2018
- nil
2019
- end
2020
- end
2021
-
2022
- # Build a regular expression from the provided pattern to match value, after suitable modifications
2023
- #
2024
- # @param [String] pattern
2025
- # @param [String] groupChar
2026
- # @param [String] decimalChar
2027
- # @return [Regexp] Regular expression matching value
2028
- # @raise [ArgumentError] if format is not valid
2029
- def build_number_re(pattern, groupChar, decimalChar)
2030
- # pattern must be composed of only 0, #, decimalChar, groupChar, E, +, -, %, and ‰
2031
- legal_number_pattern = /\A
2032
- ([%‰])?
2033
- ([+-])?
2034
- # Mantissa
2035
- (\#|#{groupChar == '.' ? '\.' : groupChar})*
2036
- (0|#{groupChar == '.' ? '\.' : groupChar})*
2037
- # Fractional
2038
- (?:#{decimalChar == '.' ? '\.' : decimalChar}
2039
- (0|#{groupChar == '.' ? '\.' : groupChar})*
2040
- (\#|#{groupChar == '.' ? '\.' : groupChar})*
2041
- # Exponent
2042
- (E
2043
- [+-]?
2044
- (?:\#|#{groupChar == '.' ? '\.' : groupChar})*
2045
- (?:0|#{groupChar == '.' ? '\.' : groupChar})*
2046
- )?
2047
- )?
2048
- ([%‰])?
2049
- \Z/x
2050
-
2051
- unless pattern =~ legal_number_pattern
2052
- raise ArgumentError, "unrecognized number pattern #{pattern}"
2053
- end
2054
-
2055
- # Remove groupChar from pattern
2056
- pattern = pattern.gsub(groupChar, '')
2057
-
2058
- # Replace decimalChar with "."
2059
- pattern = pattern.gsub(decimalChar, '.')
2060
-
2061
- # Split on decimalChar and E
2062
- parts = pattern.split(/[\.E]/)
2063
-
2064
- # Construct regular expression
2065
- mantissa_str = case parts[0]
2066
- when /\A([%‰])?([+-])?#+(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length},}#{$4}"
2067
- when /\A([%‰])?([+-])?(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length}}#{$4}"
2068
- when /\A([%‰])?([+-])?#+([%‰])?\Z/ then "#{$1}#{$2}\\d*#{$4}"
2069
- end
2070
-
2071
- fractional_str = case parts[1]
2072
- when /\A(0+)(#+)([%‰])?\Z/ then "\\d{#{$1.length},#{$1.length+$2.length}}#{$3}"
2073
- when /\A(0+)([%‰])?\Z/ then "\\d{#{$1.length}}#{$2}"
2074
- when /\A(#+)([%‰])?\Z/ then "\\d{,#{$1.length}}#{$2}"
2075
- end
2076
- fractional_str = "\\.#{fractional_str}" if fractional_str
2077
-
2078
- exponent_str = case parts[2]
2079
- when /\A([+-])?(#+)(0+)([%‰])?\Z/ then "#{$1}\\d{#{$3.length},#{$2.length+$3.length}}#{$4}"
2080
- when /\A([+-])?(0+)([%‰])?\Z/ then "#{$1}\\d{#{$2.length}}#{$3}"
2081
- when /\A([+-])?(#+)([%‰])?\Z/ then "#{$1}\\d{,#{$2.length}}#{$3}"
2082
- end
2083
- exponent_str = "E#{exponent_str}" if exponent_str
2084
-
2085
- Regexp.new("^#{mantissa_str}#{fractional_str}#{exponent_str}$")
2086
- end
2087
1942
  end
2088
1943
 
2089
1944
  # Wraps each resulting row
@@ -2320,25 +2175,23 @@ module RDF::Tabular
2320
2175
  pattern = format["pattern"]
2321
2176
 
2322
2177
  if !datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
2323
- value_errors << "#{value} does not match pattern #{pattern}"
2178
+ value_errors << "#{value} does not match numeric pattern #{pattern ? pattern.inspect : 'default'}"
2324
2179
  end
2325
2180
 
2326
2181
  # pattern facet failed
2327
2182
  value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
2328
- value = value.gsub(groupChar, '') if groupChar
2183
+ value = value.gsub(groupChar || ',', '')
2329
2184
  value = value.sub(decimalChar, '.')
2330
2185
 
2331
2186
  # Extract percent or per-mille sign
2332
2187
  percent = permille = false
2333
- if groupChar
2334
- case value
2335
- when /%/
2336
- value = value.sub('%', '')
2337
- percent = true
2338
- when /‰/
2339
- value = value.sub('‰', '')
2340
- permille = true
2341
- end
2188
+ case value
2189
+ when /%/
2190
+ value = value.sub('%', '')
2191
+ percent = true
2192
+ when /‰/
2193
+ value = value.sub('‰', '')
2194
+ permille = true
2342
2195
  end
2343
2196
 
2344
2197
  lit = RDF::Literal(value, datatype: expanded_dt)
@@ -2408,13 +2261,13 @@ module RDF::Tabular
2408
2261
  lit = RDF::Literal.new(value)
2409
2262
  else
2410
2263
  if datatype.length && lit.object.length != datatype.length
2411
- value_errors << "decoded #{value} does not have length #{datatype.length}"
2264
+ value_errors << "decoded #{value} has length #{lit.object.length} not #{datatype.length}"
2412
2265
  end
2413
2266
  if datatype.minLength && lit.object.length < datatype.minLength
2414
- value_errors << "decoded #{value} does not have length >= #{datatype.length}"
2267
+ value_errors << "decoded #{value} has length #{lit.object.length} not >= #{datatype.minLength}"
2415
2268
  end
2416
- if datatype.maxLength && lit.object.length < datatype.maxLength
2417
- value_errors << "decoded #{value} does not have length <= #{datatype.length}"
2269
+ if datatype.maxLength && lit.object.length > datatype.maxLength
2270
+ value_errors << "decoded #{value} has length #{lit.object.length} not <= #{datatype.maxLength}"
2418
2271
  end
2419
2272
  end
2420
2273
  when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,