RubyGems - rdf-tabular - Versions diffs - 0.2.0 → 0.2.1 - Mend

rdf-tabular 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/README.md +8 -1
data/VERSION +1 -1
data/etc/earl.ttl +1255 -708
data/lib/rdf/tabular.rb +12 -11
data/lib/rdf/tabular/metadata.rb +107 -254
data/lib/rdf/tabular/reader.rb +2 -0
data/lib/rdf/tabular/uax35.rb +324 -0
data/spec/metadata_spec.rb +163 -78
data/spec/suite_spec.rb +6 -12
metadata +3 -2

data/lib/rdf/tabular/reader.rb CHANGED

@@ -40,6 +40,8 @@ module RDF::Tabular
     # @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
     # @option options [Boolean] :minimal includes only the information gleaned from the cells of the tabular data
     # @option options [Boolean] :noProv do not output optional provenance information
+    # @option options [Array] :errors
+    #   array for placing errors found when processing metadata. If not set, and validating, errors are output to `$stderr`
     # @option options [Array] :warnings
     #   array for placing warnings found when processing metadata. If not set, and validating, warnings are output to `$stderr`
     # @option optinons [Array<Hash>] :fks_referencing_table

data/lib/rdf/tabular/uax35.rb ADDED

@@ -0,0 +1,324 @@
+# encoding: UTF-8
+module RDF::Tabular
+  ##
+  # Utilities for parsing UAX35 dates and numbers.
+  #
+  # @see http://www.unicode.org/reports/tr35
+  module UAX35
+    ##
+    # Parse the date format (if provided), and match against the value (if provided)
+    # Otherwise, validate format and raise an error
+    #
+    # @param [String] format
+    # @param [String] value
+    # @return [String] XMLSchema version of value
+    # @raise [ArgumentError] if format is not valid, or nil, if value does not match
+    def parse_uax35_date(format, value)
+      date_format, time_format = nil, nil
+      return value unless format
+      value ||= ""
+      # Extract tz info
+      if md = format.match(/^(.*[dyms])+(\s*[xX]+)$/)
+        format, tz_format = md[1], md[2]
+      end
+      date_format, time_format = format.split(' ')
+      date_format, time_format = nil, date_format if self.base.to_sym == :time
+      # Extract date, of specified
+      date_part = case date_format
+      when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
+      when 'yyyyMMdd'   then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
+      when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
+      when 'd-M-yyyy'   then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
+      when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
+      when 'M-d-yyyy'   then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
+      when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
+      when 'd/M/yyyy'   then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
+      when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
+      when 'M/d/yyyy'   then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
+      when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
+      when 'd.M.yyyy'   then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
+      when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
+      when 'M.d.yyyy'   then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
+      when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
+      when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
+      when /yyyy-MM-ddTHH:mm:ss\.S+/
+        md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
+        num_ms = date_format.match(/S+/).to_s.length
+        md if md && md[:ms].length <= num_ms
+      else
+        raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
+        nil
+      end
+      # Forward past date part
+      if date_part
+        value = value[date_part.to_s.length..-1]
+        value = value.lstrip if date_part && value.start_with?(' ')
+      end
+      # Extract time, of specified
+      time_part = case time_format
+      when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
+      when 'HHmmss'   then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
+      when 'HH:mm'    then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
+      when 'HHmm'     then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
+      when /HH:mm:ss\.S+/
+        md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
+        num_ms = time_format.match(/S+/).to_s.length
+        md if md && md[:ms].length <= num_ms
+      else
+        raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
+        nil
+      end
+      # If there's a date_format but no date_part, match fails
+      return nil if date_format && date_part.nil?
+      # If there's a time_format but no time_part, match fails
+      return nil if time_format && time_part.nil?
+      # Forward past time part
+      value = value[time_part.to_s.length..-1] if time_part
+      # Use datetime match for time
+      time_part = date_part if date_part && date_part.names.include?("hr")
+      # If there's a timezone, it may optionally start with whitespace
+      value = value.lstrip if tz_format.to_s.start_with?(' ')
+      tz_part = case tz_format.to_s.lstrip
+      when 'x'    then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)$/)
+      when 'X'    then value.match(/^(?:(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)|(?<z>Z))$/)
+      when 'xx'   then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2}))|$/)
+      when 'XX'   then value.match(/^(?:(?:(?<hr>[+-]\d{2})(?<mi>\d{2}))|(?<z>Z))$/)
+      when 'xxx'  then value.match(/^(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))$/)
+      when 'XXX'  then value.match(/^(?:(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))|(?<z>Z))$/)
+      else
+        raise ArgumentError, "unrecognized timezone format #{tz_format.to_s.lstrip}" if tz_format
+        nil
+      end
+      # If there's a tz_format but no time_part, match fails
+      return nil if tz_format && tz_part.nil?
+      # Compose normalized value
+      vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
+      vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
+      # Add milliseconds, if matched
+      vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
+      value = [vd, vt].compact.join('T')
+      value += tz_part[:z] ? "Z" : ("%s:%02d" % [tz_part[:hr], tz_part[:mi].to_i]) if tz_part
+      value
+    end
+    ##
+    # Parse the date format (if provided), and match against the value (if provided)
+    # Otherwise, validate format and raise an error
+    #
+    # @param [String] pattern
+    # @param [String] value
+    # @param [String] groupChar
+    # @param [String] decimalChar
+    # @return [String] XMLSchema version of value or nil, if value does not match
+    # @raise [ArgumentError] if format is not valid
+    def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
+      value ||= ""
+      re = build_number_re(pattern, groupChar, decimalChar)
+      # Upcase value and remove internal spaces
+      value = value.upcase
+      #require 'byebug'; byebug unless value.empty?
+      if value =~ re
+        # Upcase value and remove internal spaces
+        value = value.
+          upcase.
+          gsub(/\s+/, '').
+          gsub(groupChar, '').
+          gsub(decimalChar, '.')
+        # result re-assembles parts removed from value
+        value
+      else
+        # no match
+        nil
+      end
+    end
+    # Build a regular expression from the provided pattern to match value, after suitable modifications
+    #
+    # @param [String] pattern
+    # @param [String] groupChar
+    # @param [String] decimalChar
+    # @return [Regexp] Regular expression matching value
+    # @raise [ArgumentError] if format is not valid
+    def build_number_re(pattern, groupChar, decimalChar)
+      # pattern must be composed of only 0, #, decimalChar, groupChar, E, %, and ‰
+      ge = Regexp.escape groupChar
+      de = Regexp.escape decimalChar
+      default_pattern = /^
+        ([+-]?
+         [\d#{ge}]+
+         (#{de}[\d#{ge}]+
+          ([Ee][+-]?\d+)?
+         )?[%‰]?
+        |NAN|INF|-INF)
+      $/x
+      return default_pattern if pattern.nil?
+      numeric_pattern = /
+        # Mantissa
+        (\#|#{ge})*
+        (0|#{ge})*
+        # Fractional
+        (?:#{de}
+          (0|#{ge})*
+          (\#|#{ge})*
+          # Exponent
+          (E
+            [+-]?
+            (?:\#|#{ge})*
+            (?:0|#{ge})*
+          )?
+        )?
+      /x
+      legal_number_pattern = /^(?<prefix>[^\#0]*)(?<numeric_part>#{numeric_pattern})(?<suffix>.*)$/x
+      match = legal_number_pattern.match(pattern)
+      raise ArgumentError, "unrecognized number pattern #{pattern}" if match["numeric_part"].empty?
+      prefix, numeric_part, suffix = match["prefix"], match["numeric_part"], match["suffix"]
+      prefix = Regexp.escape prefix unless prefix.empty?
+      prefix += "[+-]?" unless prefix =~ /[+-]/
+      suffix = Regexp.escape suffix unless suffix.empty?
+      # Split on decimalChar and E
+      parts = numeric_part.split("E")
+      mantissa_part, exponent_part = parts[0], (parts[1] || '')
+      mantissa_parts = mantissa_part.split(decimalChar)
+      raise ArgumentError, "Multiple decimal separators in #{pattern}" if mantissa_parts.length > 2
+      integer_part, fractional_part = mantissa_parts[0], mantissa_parts[1] || ''
+      min_integer_digits = integer_part.gsub(groupChar, '').gsub('#', '').length
+      all_integer_digits = integer_part.gsub(groupChar, '').length
+      min_fractional_digits = fractional_part.gsub(groupChar, '').gsub('#', '').length
+      max_fractional_digits = fractional_part.gsub(groupChar, '').length
+      exponent_sign = exponent_part[0] if exponent_part =~ /^[+-]/
+      min_exponent_digits = exponent_part.sub(/[+-]/, '').gsub("#", "").length
+      max_exponent_digits = exponent_part.sub(/[+-]/, '').length
+      integer_parts = integer_part.split(groupChar)[1..-1]
+      primary_grouping_size = integer_parts[-1].to_s.length
+      secondary_grouping_size = integer_parts.length <= 1 ? primary_grouping_size : integer_parts[-2].length
+      fractional_parts = fractional_part.split(groupChar)[0..-2]
+      fractional_grouping_size = fractional_parts[0].to_s.length
+      # Construct regular expression for integer part
+      integer_str = if primary_grouping_size == 0
+        all_integer_digits > min_integer_digits ? "\\d{#{min_integer_digits},}" : "\\d{#{min_integer_digits}}"
+      else
+        # These number of groupings must be there
+        integer_parts = []
+        integer_rem = 0
+        while min_integer_digits > 0
+          sz = [primary_grouping_size, min_integer_digits].min
+          integer_rem = primary_grouping_size - sz
+          integer_parts << "\\d{#{sz}}"
+          min_integer_digits -= primary_grouping_size
+          all_integer_digits -= primary_grouping_size
+          primary_grouping_size = secondary_grouping_size
+        end
+        required_digits = integer_parts.reverse.join(ge)
+        if all_integer_digits > 0
+          # Add digits up to end of group creating
+          # (?:(?:\d)?)\d)? ...
+          integer_parts = []
+          while integer_rem > 0
+            integer_parts << '\d'
+            integer_rem -= 1
+          end
+          # If secondary_grouping_size is not primary_grouping_size, add digits up to secondary_grouping_size
+          if secondary_grouping_size != primary_grouping_size
+            primary_grouping_size = secondary_grouping_size
+            integer_rem = primary_grouping_size - 1
+            integer_parts << '\d' + ge
+            while integer_rem > 0
+              integer_parts << '\d'
+              integer_rem -= 1
+            end
+          end
+          # Allow repeated separated groups
+          if integer_parts.empty?
+            opt_digits = "(?:\\d{1,#{primary_grouping_size}}#{ge})?(?:\\d{#{primary_grouping_size}}#{ge})*"
+          else
+            integer_parts[-1] = "(?:\\d{1,#{primary_grouping_size}}#{ge})?(?:\\d{#{primary_grouping_size}}#{ge})*#{integer_parts[-1]}"
+            opt_digits = integer_parts.reverse.inject("") {|memo, part| "(?:#{memo}#{part})?"}
+          end
+          opt_digits + required_digits
+        else
+          required_digits
+        end
+      end
+      # Construct regular expression for fractional part
+      fractional_str = if max_fractional_digits > 0
+        if fractional_grouping_size == 0
+          min_fractional_digits == max_fractional_digits ? "\\d{#{max_fractional_digits}}" : "\\d{#{min_fractional_digits},#{max_fractional_digits}}"
+        else
+          # These number of groupings must be there
+          fractional_parts = []
+          fractional_rem = 0
+          while min_fractional_digits > 0
+            sz = [fractional_grouping_size, min_fractional_digits].min
+            fractional_rem = fractional_grouping_size - sz
+            fractional_parts << "\\d{#{sz}}"
+            max_fractional_digits -= sz
+            min_fractional_digits -= sz
+          end
+          required_digits = fractional_parts.join(ge)
+          # If max digits fill within existing group
+          fractional_parts = []
+          while max_fractional_digits > 0
+            fractional_parts << (fractional_rem == 0 ? ge + '\d' : '\d')
+            max_fractional_digits -= 1
+            fractional_rem = (fractional_rem - 1) % fractional_grouping_size
+          end
+          opt_digits = fractional_parts.reverse.inject("") {|memo, part| "(?:#{part}#{memo})?"}
+          required_digits + opt_digits
+        end
+      end.to_s
+      fractional_str = de + fractional_str unless fractional_str.empty?
+      fractional_str = "(?:#{fractional_str})?" if max_fractional_digits > 0 && min_fractional_digits == 0
+      # Exponent pattern
+      exponent_str = case
+      when max_exponent_digits > 0 && max_exponent_digits == min_exponent_digits
+        "E#{exponent_sign ? Regexp.escape(exponent_sign) : '[+-]?'}\\d{#{max_exponent_digits}}"
+      when max_exponent_digits > 0
+        "E#{exponent_sign ? Regexp.escape(exponent_sign) : '[+-]?'}\\d{#{min_exponent_digits},#{max_exponent_digits}}"
+      when min_exponent_digits > 0
+        "E#{exponent_sign ? Regexp.escape(exponent_sign) : '[+-]?'}\\d{#{min_exponent_digits},#{max_exponent_digits}}"
+      end
+      Regexp.new("^(?<prefix>#{prefix})(?<numeric_part>#{integer_str}#{fractional_str}#{exponent_str})(?<suffix>#{suffix})$")
+    end
+  end
+end

data/spec/metadata_spec.rb CHANGED

@@ -1,4 +1,4 @@
-# coding: utf-8
+# encoding: UTF-8
 $:.unshift "."
 require 'spec_helper'
@@ -308,9 +308,10 @@ describe RDF::Tabular::Metadata do
       its(:type) {is_expected.to eql :Schema}
-      it "is invalid if referenced column does not exist" do
+      it "is valid if referenced column does not exist" do
         subject[:columns] = []
-        expect(subject).not_to be_valid
+        expect(subject).to be_valid
+        expect(subject.warnings).not_to be_empty
       end
       it "is valid with multiple names" do
@@ -322,13 +323,14 @@ describe RDF::Tabular::Metadata do
         expect(v).to be_valid
       end
-      it "is invalid with multiple names if any column missing" do
+      it "is valid with multiple names if any column missing" do
         v = described_class.new({
           "columns" => [column],
           "primaryKey" => [column["name"], column2["name"]]},
           base: RDF::URI("http://example.org/base",
           debug: @debug))
-        expect(v).not_to be_valid
+        expect(v).to be_valid
+        expect(v.warnings).not_to be_empty
       end
     end
@@ -369,10 +371,10 @@ describe RDF::Tabular::Metadata do
               "columnReference" => ["b1", "b2"]
             }
           },
-          "references single column with tableSchema" => {
+          "references single column with schemaReference" => {
             "columnReference" => "a1",
             "reference" => {
-              "tableSchema" => "b_s",
+              "schemaReference" => "b_s",
               "columnReference" => "b1"
             }
           }
@@ -454,7 +456,7 @@ describe RDF::Tabular::Metadata do
     specify {is_expected.to be_valid}
     it_behaves_like("inherited properties", false)
     it_behaves_like("common properties")
-    its(:type) {is_expected.to eql :Transformation}
+    its(:type) {is_expected.to eql :Template}
     {
       source: {
@@ -772,13 +774,13 @@ describe RDF::Tabular::Metadata do
       {
         ":type TableGroup" => [{}, {type: :TableGroup}, RDF::Tabular::TableGroup],
         ":type Table" => [{}, {type: :Table}, RDF::Tabular::Table],
-        ":type Transformation" => [{}, {type: :Transformation}, RDF::Tabular::Transformation],
+        ":type Template" => [{}, {type: :Template}, RDF::Tabular::Transformation],
         ":type Schema" => [{}, {type: :Schema}, RDF::Tabular::Schema],
         ":type Column" => [{}, {type: :Column}, RDF::Tabular::Column],
         ":type Dialect" => [{}, {type: :Dialect}, RDF::Tabular::Dialect],
         "@type TableGroup" => [{"@type" => "TableGroup"}, RDF::Tabular::TableGroup],
         "@type Table" => [{"@type" => "Table"}, RDF::Tabular::Table],
-        "@type Transformation" => [{"@type" => "Transformation"}, RDF::Tabular::Transformation],
+        "@type Template" => [{"@type" => "Template"}, RDF::Tabular::Transformation],
         "@type Schema" => [{"@type" => "Schema"}, RDF::Tabular::Schema],
         "@type Column" => [{"@type" => "Column"}, RDF::Tabular::Column],
         "@type Dialect" => [{"@type" => "Dialect"}, RDF::Tabular::Dialect],
@@ -1058,12 +1060,6 @@ describe RDF::Tabular::Metadata do
           format: {"pattern" => '000'},
           value: "123"
         },
-        "decimal with wrong pattern" => {
-          base: "decimal",
-          format: {"pattern" => '0000'},
-          value: "123",
-          errors: [/123 does not match pattern/]
-        },
         "decimal with explicit groupChar" => {
           base: "decimal",
           format: {"groupChar" => ";"},
@@ -1148,20 +1144,20 @@ describe RDF::Tabular::Metadata do
         "valid boolean Y|N N" => {base: "boolean", value: "N", format: "Y|N", result: "false"},
         # Dates
-        "validate date yyyy-MM-dd" => {base: "date", value: "2015-03-22", format: "yyyy-MM-dd", result: "2015-03-22"},
-        "validate date yyyyMMdd" => {base: "date", value: "20150322", format: "yyyyMMdd", result: "2015-03-22"},
-        "validate date dd-MM-yyyy" => {base: "date", value: "22-03-2015", format: "dd-MM-yyyy", result: "2015-03-22"},
-        "validate date d-M-yyyy" => {base: "date", value: "22-3-2015", format: "d-M-yyyy", result: "2015-03-22"},
-        "validate date MM-dd-yyyy" => {base: "date", value: "03-22-2015", format: "MM-dd-yyyy", result: "2015-03-22"},
-        "validate date M-d-yyyy" => {base: "date", value: "3-22-2015", format: "M-d-yyyy", result: "2015-03-22"},
-        "validate date dd/MM/yyyy" => {base: "date", value: "22/03/2015", format: "dd/MM/yyyy", result: "2015-03-22"},
-        "validate date d/M/yyyy" => {base: "date", value: "22/3/2015", format: "d/M/yyyy", result: "2015-03-22"},
-        "validate date MM/dd/yyyy" => {base: "date", value: "03/22/2015", format: "MM/dd/yyyy", result: "2015-03-22"},
-        "validate date M/d/yyyy" => {base: "date", value: "3/22/2015", format: "M/d/yyyy", result: "2015-03-22"},
-        "validate date dd.MM.yyyy" => {base: "date", value: "22.03.2015", format: "dd.MM.yyyy", result: "2015-03-22"},
-        "validate date d.M.yyyy" => {base: "date", value: "22.3.2015", format: "d.M.yyyy", result: "2015-03-22"},
-        "validate date MM.dd.yyyy" => {base: "date", value: "03.22.2015", format: "MM.dd.yyyy", result: "2015-03-22"},
-        "validate date M.d.yyyy" => {base: "date", value: "3.22.2015", format: "M.d.yyyy", result: "2015-03-22"},
+        "valid date yyyy-MM-dd" => {base: "date", value: "2015-03-22", format: "yyyy-MM-dd", result: "2015-03-22"},
+        "valid date yyyyMMdd" => {base: "date", value: "20150322", format: "yyyyMMdd", result: "2015-03-22"},
+        "valid date dd-MM-yyyy" => {base: "date", value: "22-03-2015", format: "dd-MM-yyyy", result: "2015-03-22"},
+        "valid date d-M-yyyy" => {base: "date", value: "22-3-2015", format: "d-M-yyyy", result: "2015-03-22"},
+        "valid date MM-dd-yyyy" => {base: "date", value: "03-22-2015", format: "MM-dd-yyyy", result: "2015-03-22"},
+        "valid date M-d-yyyy" => {base: "date", value: "3-22-2015", format: "M-d-yyyy", result: "2015-03-22"},
+        "valid date dd/MM/yyyy" => {base: "date", value: "22/03/2015", format: "dd/MM/yyyy", result: "2015-03-22"},
+        "valid date d/M/yyyy" => {base: "date", value: "22/3/2015", format: "d/M/yyyy", result: "2015-03-22"},
+        "valid date MM/dd/yyyy" => {base: "date", value: "03/22/2015", format: "MM/dd/yyyy", result: "2015-03-22"},
+        "valid date M/d/yyyy" => {base: "date", value: "3/22/2015", format: "M/d/yyyy", result: "2015-03-22"},
+        "valid date dd.MM.yyyy" => {base: "date", value: "22.03.2015", format: "dd.MM.yyyy", result: "2015-03-22"},
+        "valid date d.M.yyyy" => {base: "date", value: "22.3.2015", format: "d.M.yyyy", result: "2015-03-22"},
+        "valid date MM.dd.yyyy" => {base: "date", value: "03.22.2015", format: "MM.dd.yyyy", result: "2015-03-22"},
+        "valid date M.d.yyyy" => {base: "date", value: "3.22.2015", format: "M.d.yyyy", result: "2015-03-22"},
         # Times
         "valid time HH:mm:ss.S" => {base: "time", value: "15:02:37.1", format: "HH:mm:ss.S", result: "15:02:37.1"},
@@ -1188,11 +1184,16 @@ describe RDF::Tabular::Metadata do
         # Timezones
         "valid w/TZ yyyy-MM-ddX" => {base: "date", value: "2015-03-22Z", format: "yyyy-MM-ddX", result: "2015-03-22Z"},
-        "valid w/TZ dd.MM.yyyy XXXXX" => {base: "date", value: "22.03.2015 Z", format: "dd.MM.yyyy XXXXX", result: "2015-03-22Z"},
-        "valid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05:00", format: "HH:mm:ssX", result: "15:02:37-05:00"},
-        "valid w/TZ HHmm XX" => {base: "time", value: "1502 +08:00", format: "HHmm XX", result: "15:02:00+08:00"},
+        "valid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05", format: "HH:mm:ssX", result: "15:02:37-05:00"},
+        "valid w/TZ yyyy-MM-dd HH:mm:ss X" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 +0800", format: "yyyy-MM-dd HH:mm:ss X", result: "2015-03-15T15:02:37+08:00"},
+        "valid w/TZ HHmm XX" => {base: "time", value: "1502 +0800", format: "HHmm XX", result: "15:02:00+08:00"},
+        "valid w/TZ yyyy-MM-dd HH:mm:ss XX" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 -0800", format: "yyyy-MM-dd HH:mm:ss XX", result: "2015-03-15T15:02:37-08:00"},
+        "valid w/TZ HHmm XXX" => {base: "time", value: "1502 +08:00", format: "HHmm XXX", result: "15:02:00+08:00"},
         "valid w/TZ yyyy-MM-ddTHH:mm:ssXXX" => {base: "dateTime", value: "2015-03-15T15:02:37-05:00", format: "yyyy-MM-ddTHH:mm:ssXXX", result: "2015-03-15T15:02:37-05:00"},
-        "valid w/TZ yyyy-MM-dd HH:mm:ss X" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 +08:00", format: "yyyy-MM-dd HH:mm:ss X", result: "2015-03-15T15:02:37+08:00"},
+        "invalid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05:00", format: "HH:mm:ssX", errors: ["15:02:37-05:00 does not match format HH:mm:ssX"]},
+        "invalid w/TZ HH:mm:ssXX" => {base: "time", value: "15:02:37-05", format: "HH:mm:ssXX", errors: ["15:02:37-05 does not match format HH:mm:ssXX"]},
+        # Other date-like things
         "valid gDay" => {base: "gDay", value: "---31"},
         "valid gMonth" => {base: "gMonth", value: "--02"},
         "valid gMonthDay" => {base: "gMonthDay", value: "--02-21"},
@@ -1216,7 +1217,43 @@ describe RDF::Tabular::Metadata do
         "valid anyAtomicType" => {base: "anyAtomicType", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.anyAtomicType)},
         "valid anyURI" => {base: "anyURI", value: "http://example.com/", result: RDF::Literal("http://example.com/", datatype: RDF::XSD.anyURI)},
         "valid base64Binary" => {base: "base64Binary", value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", result: RDF::Literal("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", datatype: RDF::XSD.base64Binary)},
+        "base64Binary with matching length:" => {
+          base: "base64Binary",
+          value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g",
+          length: 45,
+          result: RDF::Literal("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", datatype: RDF::XSD.base64Binary)
+        },
+        "base64Binary with wrong maxLength:" => {
+          base: "base64Binary",
+          value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g",
+          maxLength: 1,
+          errors: ["decoded Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g has length 45 not <= 1"]
+        },
+        "base64Binary with wrong minLength" => {
+          base: "base64Binary",
+          value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g",
+          minLength: 50,
+          errors: ["decoded Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g has length 45 not >= 50"]
+        },
         "valid hexBinary" => {base: "hexBinary", value: "0FB7", result: RDF::Literal("0FB7", datatype: RDF::XSD.hexBinary)},
+        "hexBinary with matching length:" => {
+          base: "hexBinary",
+          value: "0FB7",
+          length: 2,
+          result: RDF::Literal("0FB7", datatype: RDF::XSD.hexBinary)
+        },
+        "hexBinary with wrong maxLength:" => {
+          base: "hexBinary",
+          value: "0FB7",
+          maxLength: 1,
+          errors: ["decoded 0FB7 has length 2 not <= 1"]
+        },
+        "hexBinary with wrong minLength" => {
+          base: "hexBinary",
+          value: "0FB7",
+          minLength: 4,
+          errors: ["decoded 0FB7 has length 2 not >= 4"]
+        },
         "valid QName" => {base: "QName", value: "foo:bar", result: RDF::Literal("foo:bar", datatype: RDF::XSD.QName)},
         "valid normalizedString" => {base: "normalizedString", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.normalizedString)},
         "valid token" => {base: "token", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.token)},
@@ -1244,7 +1281,7 @@ describe RDF::Tabular::Metadata do
           }
           let(:md) {
             RDF::Tabular::Table.new({
-             url: "http://example.com/table.csv",
+              url: "http://example.com/table.csv",
               dialect: {header: false},
               tableSchema: {
                 columns: [{
@@ -1293,51 +1330,99 @@ describe RDF::Tabular::Metadata do
     end
   end
-  describe "#build_number_re" do
-    subject {RDF::Tabular::Datatype.new({})}
+  context "Number formats" do
     {
-      '#,##0.##'   => /^\d{1,}\.\d{,2}$/,
-      '#,##0.###'  => /^\d{1,}\.\d{,3}$/,
-      '###0.#####' => /^\d{1,}\.\d{,5}$/,
-      '###0.0000#' => /^\d{1,}\.\d{4,5}$/,
-      '00000.0000' => /^\d{5}\.\d{4}$/,
-      '0'          => /^\d{1}$/,
-      '00'         => /^\d{2}$/,
-      '#'          => /^\d*$/,
-      '##'         => /^\d*$/,
-      '.0'         => /^\.\d{1}$/,
-      '.00'        => /^\.\d{2}$/,
-      '.#'         => /^\.\d{,1}$/,
-      '.##'        => /^\.\d{,2}$/,
-      '+0'         => /^+\d{1}$/,
-      '-0'         => /^-\d{1}$/,
-      '%0'         => /^%\d{1}$/,
-      '‰0'         => /^‰\d{1}$/,
-      '0%'         => /^\d{1}%$/,
-      '0‰'         => /^\d{1}‰$/,
-      '0.0%'       => /^\d{1}\.\d{1}%$/,
-      '#0.0#E#0'   => /^\d{1,}\.\d{1,2}E\d{1,2}$/,
-      '#0.0#E+#'   => /^\d{1,}\.\d{1,2}E+\d{,1}$/,
-      '#0.0#E-00'  => /^\d{1,}\.\d{1,2}E-\d{2}$/,
-      '#0.0#E#0%'  => /^\d{1,}\.\d{1,2}E\d{1,2}%$/,
-    }.each do |pattern,regexp|
-      it "generates #{regexp} for #{pattern}" do
-        expect(subject.build_number_re(pattern, ",", ".")).to eql regexp
-      end
-    end
+      '0'          => {valid: %w(1 -1 +1), invalid: %w(12 1.2), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1})(?<suffix>)$/},
+      '00'         => {valid: %w(12), invalid: %w(1 123 1,2), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{2})(?<suffix>)$/},
+      '#'          => {valid: %w(1 12 123), invalid: %w(1.2), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{0,})(?<suffix>)$/},
+      '##'         => {re: /^(?<prefix>[+-]?)(?<numeric_part>\d{0,})(?<suffix>)$/},
+      '#0'         => {re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,})(?<suffix>)$/},
+      '0.0'         => {valid: %w(1.1 -1.1), invalid: %w(12.1 1.12), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{1})(?<suffix>)$/},
+      '0.00'        => {valid: %w(1.12 +1.12), invalid: %w(12.12 1.1 1.123), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{2})(?<suffix>)$/},
+      '0.#'         => {valid: %w(1 1.1), invalid: %w(12.1 1.12), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}(?:\.\d{0,1})?)(?<suffix>)$/},
+      '0.##'        => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}(?:\.\d{0,2})?)(?<suffix>)$/},
+      '+0'         => {valid: %w(+1), invalid: %w(1 -1 +10), base: "decimal", re: /^(?<prefix>\+)(?<numeric_part>\d{1})(?<suffix>)$/},
+      '-0'         => {valid: %w(-1), invalid: %w(1 +1 -10), base: "decimal", re: /^(?<prefix>\-)(?<numeric_part>\d{1})(?<suffix>)$/},
+      '%000'       => {valid: %w(%123 %+123 %-123), invalid: %w(%12 %1234 123%), base: "decimal", re: /^(?<prefix>%[+-]?)(?<numeric_part>\d{3})(?<suffix>)$/},
+      '‰000'       => {valid: %w(‰123 ‰+123 ‰-123), invalid: %w(‰12 ‰1234 123‰), base: "decimal", re: /^(?<prefix>‰[+-]?)(?<numeric_part>\d{3})(?<suffix>)$/},
+      '000%'       => {valid: %w(123% +123% -123%), invalid: %w(12% 1234% %123), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{3})(?<suffix>%)$/},
+      '000‰'       => {valid: %w(123‰ +123‰ -123‰), invalid: %w(12‰ 1234‰ ‰123), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{3})(?<suffix>‰)$/},
+      '000.0%'     => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{3}\.\d{1})(?<suffix>%)$/},
+      '###0.#####' => {valid: %w(1 1.1 12345.12345), invalid: %w(1,234.1 1.123456), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}(?:\.\d{0,5})?)(?<suffix>)$/},
+      '###0.0000#' => {valid: %w(1.1234 1.12345 12345.12345), invalid: %w(1,234.1234 1.12), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{4,5})(?<suffix>)$/},
+      '00000.0000' => {valid: %w(12345.1234), invalid: %w(1.2 1,234.123,4), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{5}\.\d{4})(?<suffix>)$/},
+      '#0.0#E#0'   => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E[+-]?\d{1,2})(?<suffix>)$/},
+      '#0.0#E+#0'   => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E\+\d{1,2})(?<suffix>)$/},
+      '#0.0#E#0%'  => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E[+-]?\d{1,2}%)(?<suffix>)$/},
+      '#0.0#E#0%'  => {base: "double", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1,2}E[+-]?\d{1,2})(?<suffix>%)$/},
+      # Grouping
+      '#,##,##0'   => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d)?\d{1})(?<suffix>)$/},
+      '#,##,#00'   => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d{2})(?<suffix>)$/},
+      '#,##,000'   => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:\d{1,2},)?(?:\d{2},)*\d{3})(?<suffix>)$/},
+      '#,#0,000'   => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d{1},\d{3})(?<suffix>)$/},
+      '#,00,000'   => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:\d{1,2},)?(?:\d{2},)*\d{2},\d{3})(?<suffix>)$/},
+      '0,00,000'   => {base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1},\d{2},\d{3})(?<suffix>)$/},
+      '0.0##,###'  => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{1}(?:\d(?:\d(?:,\d(?:\d(?:\d)?)?)?)?)?)(?<suffix>)$/},
+      '0.00#,###'  => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{2}(?:\d(?:,\d(?:\d(?:\d)?)?)?)?)(?<suffix>)$/},
+      '0.000,###'  => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3}(?:,\d(?:\d(?:\d)?)?)?)(?<suffix>)$/},
+      '0.000,0##'  => {base: "decimal", re:/^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3},\d{1}(?:\d(?:\d)?)?)(?<suffix>)$/},
+      '0.000,00#'  => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3},\d{2}(?:\d)?)(?<suffix>)$/},
+      '0.000,000'  => {base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1}\.\d{3},\d{3})(?<suffix>)$/},
+      # Jeni's
+      '##0'        => {valid: %w(1 12 123 1234), invalid: %w(1,234 123.4), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,})(?<suffix>)$/},
+      '#,#00'      => {valid: %w(12 123 1,234 1,234,567), invalid: %w(1 1234 12,34 12,34,567), base: "integer", re: /^(?<prefix>[+-]?)(?<numeric_part>(?:(?:\d{1,3},)?(?:\d{3},)*\d)?\d{2})(?<suffix>)$/},
+      '#0.#'       => {valid: %w(1 1.2 1234.5), invalid: %w(12.34 1,234.5), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}(?:\.\d{0,1})?)(?<suffix>)$/},
+      '#0.0#,#'    => {valid: %w(12.3 12.34 12.34,5), invalid: %w(1 12.345 12.34,56,7 12.34,567), base: "decimal", re: /^(?<prefix>[+-]?)(?<numeric_part>\d{1,}\.\d{1}(?:\d(?:,\d)?)?)(?<suffix>)$/},
+    }.each do |pattern, props|
+      context pattern do
+        subject {RDF::Tabular::Datatype.new({})}
+        describe "#build_number_re" do
+          it "generates #{props[:re]} for #{pattern}" do
+            expect(subject.build_number_re(pattern, ",", ".")).to eql props[:re]
+          end if props[:re].is_a?(Regexp)
+          it "recognizes bad pattern #{pattern}" do
+            expect{subject.build_number_re(pattern, ",", ".")}.to raise_error(ArgumentError)
+          end if props[:re] == ArgumentError
+        end
-    %W{
-      +%0
-      0#
-      0E0
-      0-
-    }.each do |pattern|
-      it "recognizes bad pattern #{pattern}" do
-        expect{subject.build_number_re(pattern, ",", ".")}.to raise_error(ArgumentError)
+        describe "Metadata" do
+          let(:md) {
+            RDF::Tabular::Table.new({
+              url: "http://example.com/table.csv",
+              dialect: {header: false},
+              tableSchema: {
+                columns: [{
+                  name: "name",
+                  datatype: {"base" => props[:base], "format" => {"pattern" => pattern}}
+                }]
+              }
+            }, debug: @debug)
+          }
+          describe "valid" do
+            Array(props[:valid]).each do |num|
+              it "for #{num}" do
+                cell = md.to_enum(:each_row, "\"#{num}\"\n").to_a.first.values.first
+                expect(cell).to be_valid
+              end
+            end
+          end
+          describe "invalid" do
+            Array(props[:invalid]).each do |num|
+              it "for #{num}" do
+                cell = md.to_enum(:each_row, "\"#{num}\"\n").to_a.first.values.first
+                expect(cell).not_to be_valid
+              end
+            end
+          end
+        end
       end
     end
   end