rdf-tabular 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,17 +12,18 @@ module RDF
12
12
  module Tabular
13
13
  require 'rdf/tabular/format'
14
14
  require 'rdf/tabular/utils'
15
- autoload :Column, 'rdf/tabular/metadata'
16
- autoload :CSVW, 'rdf/tabular/csvw'
17
- autoload :Dialect, 'rdf/tabular/metadata'
18
- autoload :JSON, 'rdf/tabular/literal'
19
- autoload :Metadata, 'rdf/tabular/metadata'
20
- autoload :Reader, 'rdf/tabular/reader'
21
- autoload :Schema, 'rdf/tabular/metadata'
22
- autoload :Table, 'rdf/tabular/metadata'
23
- autoload :TableGroup, 'rdf/tabular/metadata'
24
- autoload :Transformation, 'rdf/tabular/metadata'
25
- autoload :VERSION, 'rdf/tabular/version'
15
+ autoload :Column, 'rdf/tabular/metadata'
16
+ autoload :CSVW, 'rdf/tabular/csvw'
17
+ autoload :Dialect, 'rdf/tabular/metadata'
18
+ autoload :JSON, 'rdf/tabular/literal'
19
+ autoload :Metadata, 'rdf/tabular/metadata'
20
+ autoload :Reader, 'rdf/tabular/reader'
21
+ autoload :Schema, 'rdf/tabular/metadata'
22
+ autoload :Table, 'rdf/tabular/metadata'
23
+ autoload :TableGroup, 'rdf/tabular/metadata'
24
+ autoload :Transformation, 'rdf/tabular/metadata'
25
+ autoload :UAX35, 'rdf/tabular/uax35'
26
+ autoload :VERSION, 'rdf/tabular/version'
26
27
 
27
28
  # Metadata errors detected
28
29
  class Error < RDF::ReaderError; end
@@ -179,6 +179,7 @@ module RDF::Tabular
179
179
  # @return [Metadata]
180
180
  def self.for_input(input, options = {})
181
181
  base = options[:base]
182
+ warnings = options.fetch(:warnings, [])
182
183
 
183
184
  # Use user metadata, if provided
184
185
  metadata = case options[:metadata]
@@ -192,14 +193,21 @@ module RDF::Tabular
192
193
  # Search for metadata until found
193
194
 
194
195
  # load link metadata, if available
195
- all_locs = []
196
196
  if !metadata && input.respond_to?(:links) &&
197
197
  link = input.links.find_link(%w(rel describedby))
198
198
  link_loc = RDF::URI(base).join(link.href).to_s
199
199
  md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
200
- all_locs << link_loc if md
201
- # Metadata must describe file to be useful
202
- metadata = md if md && md.describes_file?(base)
200
+ if md
201
+ # Metadata must describe file to be useful
202
+ if md.describes_file?(base)
203
+ metadata = md
204
+ else
205
+ warnings << "Found metadata at #{link_loc}, which does not describe #{base}, ignoring"
206
+ if options[:validate] && !options[:warnings]
207
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
208
+ end
209
+ end
210
+ end
203
211
  end
204
212
 
205
213
  locs = []
@@ -217,8 +225,18 @@ module RDF::Tabular
217
225
  metadata ||= begin
218
226
  md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
219
227
  # Metadata must describe file to be useful
220
- all_locs << loc if md
221
- md if md && md.describes_file?(base)
228
+ if md
229
+ # Metadata must describe file to be useful
230
+ if md.describes_file?(base)
231
+ md
232
+ else
233
+ warnings << "Found metadata at #{loc}, which does not describe #{base}, ignoring"
234
+ if options[:validate] && !options[:warnings]
235
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
236
+ end
237
+ nil
238
+ end
239
+ end
222
240
  rescue IOError
223
241
  debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
224
242
  nil
@@ -226,15 +244,6 @@ module RDF::Tabular
226
244
  end
227
245
  end
228
246
 
229
- # If Metadata was found, but no metadata describes the file, issue a warning
230
- if !all_locs.empty? && !metadata
231
- warnings = options.fetch(:warnings, [])
232
- warnings << "Found metadata at #{all_locs.join(",")}, which does not describe #{base}, ignoring"
233
- if options[:validate] && !options[:warnings]
234
- $stderr.puts "Warnings: #{warnings.join("\n")}"
235
- end
236
- end
237
-
238
247
  # Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
239
248
  metadata = case
240
249
  when metadata then metadata
@@ -279,7 +288,7 @@ module RDF::Tabular
279
288
  type ||= case
280
289
  when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
281
290
  when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
282
- when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
291
+ when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Template
283
292
  when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema
284
293
  when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
285
294
  when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
@@ -289,7 +298,7 @@ module RDF::Tabular
289
298
  case type.to_s.to_sym
290
299
  when :TableGroup, :"" then RDF::Tabular::TableGroup
291
300
  when :Table then RDF::Tabular::Table
292
- when :Transformation then RDF::Tabular::Transformation
301
+ when :Template then RDF::Tabular::Transformation
293
302
  when :Schema then RDF::Tabular::Schema
294
303
  when :Column then RDF::Tabular::Column
295
304
  when :Dialect then RDF::Tabular::Dialect
@@ -329,29 +338,41 @@ module RDF::Tabular
329
338
 
330
339
  # Get context from input
331
340
  # Optimize by using built-in version of context, and just extract @base, @lang
341
+ opt_base = @options[:base]
342
+ opt_base ||= input.base_uri if input.respond_to?(:base_uri)
343
+ opt_base ||= input.filename if input.respond_to?(:filename)
344
+
332
345
  @context = case input['@context']
333
346
  when Array
334
347
  warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
335
- LOCAL_CONTEXT.dup.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
348
+ c = LOCAL_CONTEXT.dup
349
+ c.base = RDF::URI(opt_base)
350
+ obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
351
+ raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
352
+ c.parse(obj)
336
353
  when Hash
337
- warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
338
- LOCAL_CONTEXT.dup.parse(input['@context'])
339
- when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT.dup
354
+ warn "Context missing required value 'http://www.w3.org/ns/csvw'"
355
+ c = LOCAL_CONTEXT.dup
356
+ c.base = RDF::URI(opt_base)
357
+ c.parse(input['@context'])
358
+ when "http://www.w3.org/ns/csvw"
359
+ LOCAL_CONTEXT.dup
360
+ c = LOCAL_CONTEXT.dup
361
+ c.base = RDF::URI(opt_base)
362
+ c
340
363
  else
341
364
  if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
342
365
  warn "Context missing required value 'http://www.w3.org/ns/csvw'"
343
366
  LOCAL_CONTEXT.dup
367
+ c = LOCAL_CONTEXT.dup
368
+ c.base = RDF::URI(opt_base)
369
+ c
344
370
  end
345
371
  end
346
372
 
347
373
  reason = @options.delete(:reason)
348
374
 
349
- @options[:base] ||= @context.base if @context
350
- @options[:base] ||= input.base_uri if input.respond_to?(:base_uri)
351
- @options[:base] ||= input.filename if input.respond_to?(:filename)
352
- @options[:base] = RDF::URI(@options[:base])
353
-
354
- @context.base = @options[:base] if @context
375
+ @options[:base] = @context ? @context.base : RDF::URI(opt_base)
355
376
 
356
377
  if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
357
378
  warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
@@ -541,7 +562,7 @@ module RDF::Tabular
541
562
  end
542
563
 
543
564
  # Type of this Metadata
544
- # @return [:TableGroup, :Table, :Transformation, :Schema, :Column]
565
+ # @return [:TableGroup, :Table, :Template, :Schema, :Column]
545
566
  def type; self.class.name.split('::').last.to_sym; end
546
567
 
547
568
  # Base URL of metadata
@@ -644,6 +665,7 @@ module RDF::Tabular
644
665
  end
645
666
 
646
667
  if reference.is_a?(Hash)
668
+ errors << "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
647
669
  ref_cols = reference['columnReference']
648
670
  schema = if reference.has_key?('resource')
649
671
  if reference.has_key?('schemaReference')
@@ -802,7 +824,10 @@ module RDF::Tabular
802
824
  # A column reference property that holds either a single reference to a column description object or an array of references.
803
825
  "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
804
826
  Array(value).each do |k|
805
- errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
827
+ unless self.columns.any? {|c| c[:name] == k}
828
+ warn "#{type} has invalid property '#{key}': column reference not found #{k}"
829
+ object.delete(key)
830
+ end
806
831
  end
807
832
  when :@context
808
833
  # Skip these
@@ -819,10 +844,13 @@ module RDF::Tabular
819
844
  when :@type
820
845
  # Must not be a BNode
821
846
  if value.to_s.start_with?("_:")
822
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
847
+ errors << "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
823
848
  end
824
- unless value.to_sym == type
825
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
849
+ case type
850
+ when :Transformation
851
+ errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
852
+ else
853
+ errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
826
854
  end
827
855
  when ->(k) {key.to_s.include?(':')}
828
856
  begin
@@ -1005,12 +1033,27 @@ module RDF::Tabular
1005
1033
  non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
1006
1034
  object_columns = Array(other.tableSchema.columns)
1007
1035
 
1008
- # Special case, if there is no header, then there are no column definitions, allow this as being compatile
1009
- raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}" if
1010
- non_virtual_columns.length != object_columns.length && !object_columns.empty?
1036
+ # Special case, if there is no header, then there are no column definitions, allow this as being compatible
1037
+ if non_virtual_columns.length != object_columns.length && !object_columns.empty?
1038
+ if @options[:validate]
1039
+ raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1040
+ else
1041
+ warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1042
+
1043
+ # If present, a virtual column MUST appear after all other non-virtual column definitions
1044
+ raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
1045
+ virtual_columns = Array(tableSchema.columns).select(&:virtual)
1046
+ while non_virtual_columns.length < object_columns.length
1047
+ non_virtual_columns << nil
1048
+ end
1049
+
1050
+ # Create necessary column entries
1051
+ tableSchema.columns = non_virtual_columns + virtual_columns
1052
+ end
1053
+ end
1011
1054
  index = 0
1012
1055
  object_columns.all? do |cb|
1013
- ca = non_virtual_columns[index]
1056
+ ca = non_virtual_columns[index] || Column.new({})
1014
1057
  ta = ca.titles || {}
1015
1058
  tb = cb.titles || {}
1016
1059
  if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
@@ -1141,8 +1184,7 @@ module RDF::Tabular
1141
1184
  elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
1142
1185
  raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
1143
1186
  elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
1144
- warn "Value object with @language must use valid language: #{value.to_json}"
1145
- value.delete('@language')
1187
+ raise Error, "Value object with @language must use valid language: #{value.to_json}"
1146
1188
  elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
1147
1189
  raise Error, "Value object with @type must defined type: #{value.to_json}"
1148
1190
  end
@@ -1232,10 +1274,11 @@ module RDF::Tabular
1232
1274
  end
1233
1275
  private
1234
1276
  # Options passed to CSV.new based on dialect
1277
+ # @todo lineTerminators is ignored, as CSV parser uses single string or `:auto`
1235
1278
  def csv_options
1236
1279
  {
1237
1280
  col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
1238
- row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
1281
+ #row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
1239
1282
  quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
1240
1283
  encoding: (is_a?(Dialect) ? self : dialect).encoding
1241
1284
  }
@@ -1591,10 +1634,10 @@ module RDF::Tabular
1591
1634
 
1592
1635
  # Return or create a name for the column from titles, if it exists
1593
1636
  def name
1594
- self[:name] || if titles && (ts = titles[context.default_language || 'und'])
1637
+ self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
1595
1638
  n = Array(ts).first
1596
- n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
1597
- n1 = URI.encode(n[1..-1], /[^\w\.]/)
1639
+ n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
1640
+ n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
1598
1641
  "#{n0}#{n1}"
1599
1642
  end || "_col.#{number}"
1600
1643
  end
@@ -1638,6 +1681,10 @@ module RDF::Tabular
1638
1681
  DEFAULTS = {}.freeze
1639
1682
  REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
1640
1683
 
1684
+ # Type of this Metadata
1685
+ # @return [:Template]
1686
+ def type; :Template; end
1687
+
1641
1688
  # Getters and Setters
1642
1689
  PROPERTIES.each do |key, type|
1643
1690
  next if [:url].include?(key)
@@ -1709,8 +1756,10 @@ module RDF::Tabular
1709
1756
 
1710
1757
  define_method("#{key}=".to_sym) do |value|
1711
1758
  invalid = case key
1712
- when :commentPrefix, :delimiter, :quoteChar, :lineTerminators
1759
+ when :commentPrefix, :delimiter, :quoteChar
1713
1760
  "a string" unless value.is_a?(String)
1761
+ when :lineTerminators
1762
+ "a string or array of strings" unless Array(value).all? {|e| e.is_a?(String)}
1714
1763
  when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
1715
1764
  "boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
1716
1765
  when :encoding
@@ -1825,6 +1874,8 @@ module RDF::Tabular
1825
1874
  end
1826
1875
 
1827
1876
  class Datatype < Metadata
1877
+ include UAX35
1878
+
1828
1879
  PROPERTIES = {
1829
1880
  :@id => :link,
1830
1881
  :@type => :atomic,
@@ -1888,202 +1939,6 @@ module RDF::Tabular
1888
1939
  end
1889
1940
  end
1890
1941
  end
1891
-
1892
- ##
1893
- # Parse the date format (if provided), and match against the value (if provided)
1894
- # Otherwise, validate format and raise an error
1895
- #
1896
- # @param [String] format
1897
- # @param [String] value
1898
- # @return [String] XMLSchema version of value
1899
- # @raise [ArgumentError] if format is not valid, or nil, if value does not match
1900
- def parse_uax35_date(format, value)
1901
- tz, date_format, time_format = nil, nil, nil
1902
- return value unless format
1903
- value ||= ""
1904
-
1905
- # Extract tz info
1906
- if md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/)
1907
- format, tz = md[1], md[2]
1908
- end
1909
-
1910
- date_format, time_format = format.split(' ')
1911
- date_format, time_format = nil, date_format if self.base.to_sym == :time
1912
-
1913
- # Extract date, of specified
1914
- date_part = case date_format
1915
- when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
1916
- when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
1917
- when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
1918
- when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
1919
- when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
1920
- when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
1921
- when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
1922
- when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
1923
- when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
1924
- when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
1925
- when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
1926
- when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
1927
- when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
1928
- when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
1929
- when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
1930
- when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
1931
- when /yyyy-MM-ddTHH:mm:ss\.S+/
1932
- md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
1933
- num_ms = date_format.match(/S+/).to_s.length
1934
- md if md && md[:ms].length <= num_ms
1935
- else
1936
- raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
1937
- nil
1938
- end
1939
-
1940
- # Forward past date part
1941
- if date_part
1942
- value = value[date_part.to_s.length..-1]
1943
- value = value.lstrip if date_part && value.start_with?(' ')
1944
- end
1945
-
1946
- # Extract time, of specified
1947
- time_part = case time_format
1948
- when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
1949
- when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
1950
- when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
1951
- when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
1952
- when /HH:mm:ss\.S+/
1953
- md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
1954
- num_ms = time_format.match(/S+/).to_s.length
1955
- md if md && md[:ms].length <= num_ms
1956
- else
1957
- raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
1958
- nil
1959
- end
1960
-
1961
- # If there's a date_format but no date_part, match fails
1962
- return nil if date_format && date_part.nil?
1963
-
1964
- # If there's a time_format but no time_part, match fails
1965
- return nil if time_format && time_part.nil?
1966
-
1967
- # Forward past time part
1968
- value = value[time_part.to_s.length..-1] if time_part
1969
-
1970
- # Use datetime match for time
1971
- time_part = date_part if date_part && date_part.names.include?("hr")
1972
-
1973
- # If there's a timezone, it may optionally start with whitespace
1974
- value = value.lstrip if tz.to_s.start_with?(' ')
1975
- tz_part = value if tz
1976
-
1977
- # Compose normalized value
1978
- vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
1979
- vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
1980
-
1981
- # Add milliseconds, if matched
1982
- vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
1983
-
1984
- value = [vd, vt].compact.join('T')
1985
- value += tz_part.to_s
1986
- end
1987
-
1988
- ##
1989
- # Parse the date format (if provided), and match against the value (if provided)
1990
- # Otherwise, validate format and raise an error
1991
- #
1992
- # @param [String] pattern
1993
- # @param [String] value
1994
- # @param [String] groupChar
1995
- # @param [String] decimalChar
1996
- # @return [String] XMLSchema version of value or nil, if value does not match
1997
- # @raise [ArgumentError] if format is not valid
1998
- def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
1999
- return value if pattern.to_s.empty?
2000
- value ||= ""
2001
-
2002
- re = build_number_re(pattern, groupChar, decimalChar)
2003
-
2004
- # Upcase value and remove internal spaces
2005
- value = value.upcase.gsub(/\s+/, '')
2006
-
2007
- # Remove groupChar from value
2008
- value = value.gsub(groupChar, '')
2009
-
2010
- # Replace decimalChar with "."
2011
- value = value.gsub(decimalChar, '.')
2012
-
2013
- if value =~ re
2014
- # result re-assembles parts removed from value
2015
- value
2016
- else
2017
- # no match
2018
- nil
2019
- end
2020
- end
2021
-
2022
- # Build a regular expression from the provided pattern to match value, after suitable modifications
2023
- #
2024
- # @param [String] pattern
2025
- # @param [String] groupChar
2026
- # @param [String] decimalChar
2027
- # @return [Regexp] Regular expression matching value
2028
- # @raise [ArgumentError] if format is not valid
2029
- def build_number_re(pattern, groupChar, decimalChar)
2030
- # pattern must be composed of only 0, #, decimalChar, groupChar, E, +, -, %, and ‰
2031
- legal_number_pattern = /\A
2032
- ([%‰])?
2033
- ([+-])?
2034
- # Mantissa
2035
- (\#|#{groupChar == '.' ? '\.' : groupChar})*
2036
- (0|#{groupChar == '.' ? '\.' : groupChar})*
2037
- # Fractional
2038
- (?:#{decimalChar == '.' ? '\.' : decimalChar}
2039
- (0|#{groupChar == '.' ? '\.' : groupChar})*
2040
- (\#|#{groupChar == '.' ? '\.' : groupChar})*
2041
- # Exponent
2042
- (E
2043
- [+-]?
2044
- (?:\#|#{groupChar == '.' ? '\.' : groupChar})*
2045
- (?:0|#{groupChar == '.' ? '\.' : groupChar})*
2046
- )?
2047
- )?
2048
- ([%‰])?
2049
- \Z/x
2050
-
2051
- unless pattern =~ legal_number_pattern
2052
- raise ArgumentError, "unrecognized number pattern #{pattern}"
2053
- end
2054
-
2055
- # Remove groupChar from pattern
2056
- pattern = pattern.gsub(groupChar, '')
2057
-
2058
- # Replace decimalChar with "."
2059
- pattern = pattern.gsub(decimalChar, '.')
2060
-
2061
- # Split on decimalChar and E
2062
- parts = pattern.split(/[\.E]/)
2063
-
2064
- # Construct regular expression
2065
- mantissa_str = case parts[0]
2066
- when /\A([%‰])?([+-])?#+(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length},}#{$4}"
2067
- when /\A([%‰])?([+-])?(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length}}#{$4}"
2068
- when /\A([%‰])?([+-])?#+([%‰])?\Z/ then "#{$1}#{$2}\\d*#{$4}"
2069
- end
2070
-
2071
- fractional_str = case parts[1]
2072
- when /\A(0+)(#+)([%‰])?\Z/ then "\\d{#{$1.length},#{$1.length+$2.length}}#{$3}"
2073
- when /\A(0+)([%‰])?\Z/ then "\\d{#{$1.length}}#{$2}"
2074
- when /\A(#+)([%‰])?\Z/ then "\\d{,#{$1.length}}#{$2}"
2075
- end
2076
- fractional_str = "\\.#{fractional_str}" if fractional_str
2077
-
2078
- exponent_str = case parts[2]
2079
- when /\A([+-])?(#+)(0+)([%‰])?\Z/ then "#{$1}\\d{#{$3.length},#{$2.length+$3.length}}#{$4}"
2080
- when /\A([+-])?(0+)([%‰])?\Z/ then "#{$1}\\d{#{$2.length}}#{$3}"
2081
- when /\A([+-])?(#+)([%‰])?\Z/ then "#{$1}\\d{,#{$2.length}}#{$3}"
2082
- end
2083
- exponent_str = "E#{exponent_str}" if exponent_str
2084
-
2085
- Regexp.new("^#{mantissa_str}#{fractional_str}#{exponent_str}$")
2086
- end
2087
1942
  end
2088
1943
 
2089
1944
  # Wraps each resulting row
@@ -2320,25 +2175,23 @@ module RDF::Tabular
2320
2175
  pattern = format["pattern"]
2321
2176
 
2322
2177
  if !datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
2323
- value_errors << "#{value} does not match pattern #{pattern}"
2178
+ value_errors << "#{value} does not match numeric pattern #{pattern ? pattern.inspect : 'default'}"
2324
2179
  end
2325
2180
 
2326
2181
  # pattern facet failed
2327
2182
  value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
2328
- value = value.gsub(groupChar, '') if groupChar
2183
+ value = value.gsub(groupChar || ',', '')
2329
2184
  value = value.sub(decimalChar, '.')
2330
2185
 
2331
2186
  # Extract percent or per-mille sign
2332
2187
  percent = permille = false
2333
- if groupChar
2334
- case value
2335
- when /%/
2336
- value = value.sub('%', '')
2337
- percent = true
2338
- when /‰/
2339
- value = value.sub('‰', '')
2340
- permille = true
2341
- end
2188
+ case value
2189
+ when /%/
2190
+ value = value.sub('%', '')
2191
+ percent = true
2192
+ when /‰/
2193
+ value = value.sub('‰', '')
2194
+ permille = true
2342
2195
  end
2343
2196
 
2344
2197
  lit = RDF::Literal(value, datatype: expanded_dt)
@@ -2408,13 +2261,13 @@ module RDF::Tabular
2408
2261
  lit = RDF::Literal.new(value)
2409
2262
  else
2410
2263
  if datatype.length && lit.object.length != datatype.length
2411
- value_errors << "decoded #{value} does not have length #{datatype.length}"
2264
+ value_errors << "decoded #{value} has length #{lit.object.length} not #{datatype.length}"
2412
2265
  end
2413
2266
  if datatype.minLength && lit.object.length < datatype.minLength
2414
- value_errors << "decoded #{value} does not have length >= #{datatype.length}"
2267
+ value_errors << "decoded #{value} has length #{lit.object.length} not >= #{datatype.minLength}"
2415
2268
  end
2416
- if datatype.maxLength && lit.object.length < datatype.maxLength
2417
- value_errors << "decoded #{value} does not have length <= #{datatype.length}"
2269
+ if datatype.maxLength && lit.object.length > datatype.maxLength
2270
+ value_errors << "decoded #{value} has length #{lit.object.length} not <= #{datatype.maxLength}"
2418
2271
  end
2419
2272
  end
2420
2273
  when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,